[R package] GPU support (#2732)
* [R] MSVC compatibility * [GPU] allow seed in BernoulliRng up to size_t and scale to uint32_t * R package build with cmake and CUDA * R package CUDA build fixes and cleanups * always export the R package native initialization routine on windows * update the install instructions doc * fix lint * use static_cast directly to set BernoulliRng seed * [R] demo for GPU accelerated algorithm * tidy up the R package cmake stuff * R pack cmake: installs main dependency packages if needed * [R] version bump in DESCRIPTION * update NEWS * added short missing/sparse values explanations to FAQ
This commit is contained in:
parent
5c9f01d0a9
commit
74db9757b3
@ -1,6 +1,7 @@
|
|||||||
cmake_minimum_required (VERSION 3.2)
|
cmake_minimum_required (VERSION 3.2)
|
||||||
project(xgboost)
|
project(xgboost)
|
||||||
include(cmake/Utils.cmake)
|
include(cmake/Utils.cmake)
|
||||||
|
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules")
|
||||||
find_package(OpenMP)
|
find_package(OpenMP)
|
||||||
|
|
||||||
set_default_configuration_release()
|
set_default_configuration_release()
|
||||||
@ -10,6 +11,7 @@ msvc_use_static_runtime()
|
|||||||
option(USE_CUDA "Build with GPU acceleration")
|
option(USE_CUDA "Build with GPU acceleration")
|
||||||
option(JVM_BINDINGS "Build JVM bindings" OFF)
|
option(JVM_BINDINGS "Build JVM bindings" OFF)
|
||||||
option(GOOGLE_TEST "Build google tests" OFF)
|
option(GOOGLE_TEST "Build google tests" OFF)
|
||||||
|
option(R_LIB "Build shared library for R package" OFF)
|
||||||
set(GPU_COMPUTE_VER 35;50;52;60;61 CACHE STRING
|
set(GPU_COMPUTE_VER 35;50;52;60;61 CACHE STRING
|
||||||
"Space separated list of compute versions to be built against")
|
"Space separated list of compute versions to be built against")
|
||||||
|
|
||||||
@ -34,6 +36,19 @@ else()
|
|||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funroll-loops")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funroll-loops")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# compiled code customizations for R package
|
||||||
|
if(R_LIB)
|
||||||
|
add_definitions(
|
||||||
|
-DXGBOOST_STRICT_R_MODE=1
|
||||||
|
-DXGBOOST_CUSTOMIZE_GLOBAL_PRNG=1
|
||||||
|
-DDMLC_LOG_BEFORE_THROW=0
|
||||||
|
-DDMLC_DISABLE_STDIN=1
|
||||||
|
-DDMLC_LOG_CUSTOMIZE=1
|
||||||
|
-DRABIT_CUSTOMIZE_MSG_
|
||||||
|
-DRABIT_STRICT_CXX98_
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
include_directories (
|
include_directories (
|
||||||
${PROJECT_SOURCE_DIR}/include
|
${PROJECT_SOURCE_DIR}/include
|
||||||
${PROJECT_SOURCE_DIR}/dmlc-core/include
|
${PROJECT_SOURCE_DIR}/dmlc-core/include
|
||||||
@ -66,7 +81,7 @@ set(RABIT_EMPTY_SOURCES
|
|||||||
rabit/src/engine_empty.cc
|
rabit/src/engine_empty.cc
|
||||||
rabit/src/c_api.cc
|
rabit/src/c_api.cc
|
||||||
)
|
)
|
||||||
if(MINGW)
|
if(MINGW OR R_LIB)
|
||||||
# build a dummy rabit library
|
# build a dummy rabit library
|
||||||
add_library(rabit STATIC ${RABIT_EMPTY_SOURCES})
|
add_library(rabit STATIC ${RABIT_EMPTY_SOURCES})
|
||||||
else()
|
else()
|
||||||
@ -78,6 +93,7 @@ endif()
|
|||||||
add_subdirectory(dmlc-core)
|
add_subdirectory(dmlc-core)
|
||||||
set(LINK_LIBRARIES dmlccore rabit)
|
set(LINK_LIBRARIES dmlccore rabit)
|
||||||
|
|
||||||
|
|
||||||
if(USE_CUDA)
|
if(USE_CUDA)
|
||||||
find_package(CUDA 7.5 REQUIRED)
|
find_package(CUDA 7.5 REQUIRED)
|
||||||
cmake_minimum_required(VERSION 3.5)
|
cmake_minimum_required(VERSION 3.5)
|
||||||
@ -102,27 +118,65 @@ if(USE_CUDA)
|
|||||||
list(APPEND LINK_LIBRARIES gpuxgboost)
|
list(APPEND LINK_LIBRARIES gpuxgboost)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_library(objxgboost OBJECT ${SOURCES})
|
|
||||||
|
|
||||||
# Executable
|
# flags and sources for R-package
|
||||||
add_executable(runxgboost $<TARGET_OBJECTS:objxgboost> src/cli_main.cc)
|
if(R_LIB)
|
||||||
set_target_properties(runxgboost PROPERTIES
|
file(GLOB_RECURSE R_SOURCES
|
||||||
OUTPUT_NAME xgboost
|
R-package/src/*.h
|
||||||
)
|
R-package/src/*.c
|
||||||
set_output_directory(runxgboost ${PROJECT_SOURCE_DIR})
|
R-package/src/*.cc
|
||||||
target_link_libraries(runxgboost ${LINK_LIBRARIES})
|
)
|
||||||
|
list(APPEND SOURCES ${R_SOURCES})
|
||||||
# Shared library
|
endif()
|
||||||
add_library(xgboost SHARED $<TARGET_OBJECTS:objxgboost>)
|
|
||||||
target_link_libraries(xgboost ${LINK_LIBRARIES})
|
add_library(objxgboost OBJECT ${SOURCES})
|
||||||
set_output_directory(xgboost ${PROJECT_SOURCE_DIR}/lib)
|
|
||||||
if(MINGW)
|
|
||||||
# remove the 'lib' prefix to conform to windows convention for shared library names
|
# building shared library for R package
|
||||||
set_target_properties(xgboost PROPERTIES PREFIX "")
|
if(R_LIB)
|
||||||
|
find_package(LibR REQUIRED)
|
||||||
|
|
||||||
|
list(APPEND LINK_LIBRARIES "${LIBR_CORE_LIBRARY}")
|
||||||
|
MESSAGE(STATUS "LIBR_CORE_LIBRARY " ${LIBR_CORE_LIBRARY})
|
||||||
|
|
||||||
|
include_directories(
|
||||||
|
"${LIBR_INCLUDE_DIRS}"
|
||||||
|
"${PROJECT_SOURCE_DIR}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Shared library target for the R package
|
||||||
|
add_library(xgboost SHARED $<TARGET_OBJECTS:objxgboost>)
|
||||||
|
target_link_libraries(xgboost ${LINK_LIBRARIES})
|
||||||
|
# R uses no lib prefix in shared library names of its packages
|
||||||
|
set_target_properties(xgboost PROPERTIES PREFIX "")
|
||||||
|
|
||||||
|
setup_rpackage_install_target(xgboost ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
# use a dummy location for any other remaining installs
|
||||||
|
set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/dummy_inst")
|
||||||
|
|
||||||
|
# main targets: shared library & exe
|
||||||
|
else()
|
||||||
|
# Executable
|
||||||
|
add_executable(runxgboost $<TARGET_OBJECTS:objxgboost> src/cli_main.cc)
|
||||||
|
set_target_properties(runxgboost PROPERTIES
|
||||||
|
OUTPUT_NAME xgboost
|
||||||
|
)
|
||||||
|
set_output_directory(runxgboost ${PROJECT_SOURCE_DIR})
|
||||||
|
target_link_libraries(runxgboost ${LINK_LIBRARIES})
|
||||||
|
|
||||||
|
# Shared library
|
||||||
|
add_library(xgboost SHARED $<TARGET_OBJECTS:objxgboost>)
|
||||||
|
target_link_libraries(xgboost ${LINK_LIBRARIES})
|
||||||
|
set_output_directory(xgboost ${PROJECT_SOURCE_DIR}/lib)
|
||||||
|
if(MINGW)
|
||||||
|
# remove the 'lib' prefix to conform to windows convention for shared library names
|
||||||
|
set_target_properties(xgboost PROPERTIES PREFIX "")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
#Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names
|
||||||
|
add_dependencies(xgboost runxgboost)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names
|
|
||||||
add_dependencies(xgboost runxgboost)
|
|
||||||
|
|
||||||
# JVM
|
# JVM
|
||||||
if(JVM_BINDINGS)
|
if(JVM_BINDINGS)
|
||||||
@ -139,6 +193,7 @@ if(JVM_BINDINGS)
|
|||||||
${JAVA_JVM_LIBRARY})
|
${JAVA_JVM_LIBRARY})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
# Test
|
# Test
|
||||||
if(GOOGLE_TEST)
|
if(GOOGLE_TEST)
|
||||||
enable_testing()
|
enable_testing()
|
||||||
@ -162,5 +217,6 @@ if(GOOGLE_TEST)
|
|||||||
add_test(TestXGBoost testxgboost)
|
add_test(TestXGBoost testxgboost)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
# Group sources
|
# Group sources
|
||||||
auto_source_group("${SOURCES}")
|
auto_source_group("${SOURCES}")
|
||||||
|
|||||||
4
NEWS.md
4
NEWS.md
@ -15,15 +15,19 @@ This file records the changes in xgboost library in reverse chronological order.
|
|||||||
- Thread local variable is upgraded so it is automatically freed at thread exit.
|
- Thread local variable is upgraded so it is automatically freed at thread exit.
|
||||||
* Migrate to C++11
|
* Migrate to C++11
|
||||||
- The current master version now requires C++11 enabled compiled(g++4.8 or higher)
|
- The current master version now requires C++11 enabled compiled(g++4.8 or higher)
|
||||||
|
* Predictor interface was factored out (in a manner similar to the updater interface).
|
||||||
* New functionality
|
* New functionality
|
||||||
- Ability to adjust tree model's statistics to a new dataset without changing tree structures.
|
- Ability to adjust tree model's statistics to a new dataset without changing tree structures.
|
||||||
- Extracting feature contributions to individual predictions.
|
- Extracting feature contributions to individual predictions.
|
||||||
|
- Faster, histogram-based tree algorithm (`tree_method='hist'`) .
|
||||||
|
- GPU/CUDA accelerated tree algorithms (`tree_method='gpu_hist'` or `'gpu_exact'`), including the GPU-based predictor.
|
||||||
* R package:
|
* R package:
|
||||||
- New parameters:
|
- New parameters:
|
||||||
- `silent` in `xgb.DMatrix()`
|
- `silent` in `xgb.DMatrix()`
|
||||||
- `use_int_id` in `xgb.model.dt.tree()`
|
- `use_int_id` in `xgb.model.dt.tree()`
|
||||||
- `predcontrib` in `predict()`
|
- `predcontrib` in `predict()`
|
||||||
- Default value of the `save_period` parameter in `xgboost()` changed to NULL (consistent with `xgb.train()`).
|
- Default value of the `save_period` parameter in `xgboost()` changed to NULL (consistent with `xgb.train()`).
|
||||||
|
- It's possible to custom-build the R package with GPU acceleration support.
|
||||||
|
|
||||||
## v0.6 (2016.07.29)
|
## v0.6 (2016.07.29)
|
||||||
* Version 0.5 is skipped due to major improvements in the core
|
* Version 0.5 is skipped due to major improvements in the core
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 0.6.4.6
|
Version: 0.6.4.7
|
||||||
Date: 2017-01-04
|
Date: 2017-09-25
|
||||||
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
|
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
|
||||||
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
|
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
|
||||||
Yuan Tang <terrytangyuan@gmail.com>
|
Yuan Tang <terrytangyuan@gmail.com>
|
||||||
|
|||||||
@ -10,3 +10,4 @@ predict_leaf_indices Predicting the corresponding leaves
|
|||||||
early_stopping Early Stop in training
|
early_stopping Early Stop in training
|
||||||
poisson_regression Poisson Regression on count data
|
poisson_regression Poisson Regression on count data
|
||||||
tweedie_regression Tweddie Regression
|
tweedie_regression Tweddie Regression
|
||||||
|
gpu_accelerated GPU-accelerated tree building algorithms
|
||||||
@ -8,6 +8,7 @@ XGBoost R Feature Walkthrough
|
|||||||
* [Generalized Linear Model](generalized_linear_model.R)
|
* [Generalized Linear Model](generalized_linear_model.R)
|
||||||
* [Cross validation](cross_validation.R)
|
* [Cross validation](cross_validation.R)
|
||||||
* [Create a sparse matrix from a dense one](create_sparse_matrix.R)
|
* [Create a sparse matrix from a dense one](create_sparse_matrix.R)
|
||||||
|
* [Use GPU-accelerated tree building algorithms](gpu_accelerated.R)
|
||||||
|
|
||||||
Benchmarks
|
Benchmarks
|
||||||
====
|
====
|
||||||
|
|||||||
45
R-package/demo/gpu_accelerated.R
Normal file
45
R-package/demo/gpu_accelerated.R
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# An example of using GPU-accelerated tree building algorithms
|
||||||
|
#
|
||||||
|
# NOTE: it can only run if you have a CUDA-enable GPU and the package was
|
||||||
|
# specially compiled with GPU support.
|
||||||
|
#
|
||||||
|
# For the current functionality, see
|
||||||
|
# https://xgboost.readthedocs.io/en/latest/gpu/index.html
|
||||||
|
#
|
||||||
|
|
||||||
|
library('xgboost')
|
||||||
|
|
||||||
|
# Simulate N x p random matrix with some binomial response dependent on pp columns
|
||||||
|
set.seed(111)
|
||||||
|
N <- 1000000
|
||||||
|
p <- 50
|
||||||
|
pp <- 25
|
||||||
|
X <- matrix(runif(N * p), ncol = p)
|
||||||
|
betas <- 2 * runif(pp) - 1
|
||||||
|
sel <- sort(sample(p, pp))
|
||||||
|
m <- X[, sel] %*% betas - 1 + rnorm(N)
|
||||||
|
y <- rbinom(N, 1, plogis(m))
|
||||||
|
|
||||||
|
tr <- sample.int(N, N * 0.75)
|
||||||
|
dtrain <- xgb.DMatrix(X[tr,], label = y[tr])
|
||||||
|
dtest <- xgb.DMatrix(X[-tr,], label = y[-tr])
|
||||||
|
wl <- list(train = dtrain, test = dtest)
|
||||||
|
|
||||||
|
# An example of running 'gpu_hist' algorithm
|
||||||
|
# which is
|
||||||
|
# - similar to the 'hist'
|
||||||
|
# - the fastest option for moderately large datasets
|
||||||
|
# - current limitations: max_depth < 16, does not implement guided loss
|
||||||
|
# You can use tree_method = 'gpu_exact' for another GPU accelerated algorithm,
|
||||||
|
# which is slower, more memory-hungry, but does not use binning.
|
||||||
|
param <- list(objective = 'reg:logistic', eval_metric = 'auc', subsample = 0.5, nthread = 4,
|
||||||
|
max_bin = 64, tree_method = 'gpu_hist')
|
||||||
|
pt <- proc.time()
|
||||||
|
bst_gpu <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
|
||||||
|
proc.time() - pt
|
||||||
|
|
||||||
|
# Compare to the 'hist' algorithm:
|
||||||
|
param$tree_method <- 'hist'
|
||||||
|
pt <- proc.time()
|
||||||
|
bst_hist <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
|
||||||
|
proc.time() - pt
|
||||||
@ -11,3 +11,4 @@ demo(early_stopping)
|
|||||||
demo(poisson_regression)
|
demo(poisson_regression)
|
||||||
demo(caret_wrapper)
|
demo(caret_wrapper)
|
||||||
demo(tweedie_regression)
|
demo(tweedie_regression)
|
||||||
|
#demo(gpu_accelerated) # can only run when built with GPU support
|
||||||
@ -68,6 +68,9 @@ static const R_CallMethodDef CallEntries[] = {
|
|||||||
{NULL, NULL, 0}
|
{NULL, NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
__declspec(dllexport)
|
||||||
|
#endif
|
||||||
void R_init_xgboost(DllInfo *dll) {
|
void R_init_xgboost(DllInfo *dll) {
|
||||||
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
|
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
|
||||||
R_useDynamicSymbols(dll, FALSE);
|
R_useDynamicSymbols(dll, FALSE);
|
||||||
|
|||||||
@ -112,7 +112,7 @@ SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
|
|||||||
col_ptr_[i] = static_cast<size_t>(p_indptr[i]);
|
col_ptr_[i] = static_cast<size_t>(p_indptr[i]);
|
||||||
}
|
}
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (size_t i = 0; i < ndata; ++i) {
|
for (int64_t i = 0; i < static_cast<int64_t>(ndata); ++i) {
|
||||||
indices_[i] = static_cast<unsigned>(p_indices[i]);
|
indices_[i] = static_cast<unsigned>(p_indices[i]);
|
||||||
data_[i] = static_cast<float>(p_data[i]);
|
data_[i] = static_cast<float>(p_data[i]);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -60,3 +60,27 @@ function(format_gencode_flags flags out)
|
|||||||
endforeach()
|
endforeach()
|
||||||
set(${out} "${${out}}" PARENT_SCOPE)
|
set(${out} "${${out}}" PARENT_SCOPE)
|
||||||
endfunction(format_gencode_flags flags)
|
endfunction(format_gencode_flags flags)
|
||||||
|
|
||||||
|
# Assembles the R-package files in build_dir;
|
||||||
|
# if necessary, installs the main R package dependencies;
|
||||||
|
# runs R CMD INSTALL.
|
||||||
|
function(setup_rpackage_install_target rlib_target build_dir)
|
||||||
|
install(CODE "file(REMOVE_RECURSE \"${build_dir}/R-package\")")
|
||||||
|
install(
|
||||||
|
DIRECTORY "${PROJECT_SOURCE_DIR}/R-package"
|
||||||
|
DESTINATION "${build_dir}"
|
||||||
|
REGEX "src/*" EXCLUDE
|
||||||
|
REGEX "R-package/configure" EXCLUDE
|
||||||
|
)
|
||||||
|
install(TARGETS ${rlib_target}
|
||||||
|
LIBRARY DESTINATION "${build_dir}/R-package/src/"
|
||||||
|
RUNTIME DESTINATION "${build_dir}/R-package/src/")
|
||||||
|
install(CODE "file(WRITE \"${build_dir}/R-package/src/Makevars\" \"all:\")")
|
||||||
|
install(CODE "file(WRITE \"${build_dir}/R-package/src/Makevars.win\" \"all:\")")
|
||||||
|
set(XGB_DEPS_SCRIPT
|
||||||
|
"deps = setdiff(c('statar','data.table', 'magrittr', 'stringi'), rownames(installed.packages()));\
|
||||||
|
if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')")
|
||||||
|
install(CODE "execute_process(COMMAND \"${LIBR_EXECUTABLE}\" \"-q\" \"-e\" \"${XGB_DEPS_SCRIPT}\")")
|
||||||
|
install(CODE "execute_process(COMMAND \"${LIBR_EXECUTABLE}\" CMD INSTALL\
|
||||||
|
\"--no-multiarch\" \"${build_dir}/R-package\")")
|
||||||
|
endfunction(setup_rpackage_install_target)
|
||||||
177
cmake/modules/FindLibR.cmake
Normal file
177
cmake/modules/FindLibR.cmake
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
# CMake module for R
|
||||||
|
# Borrows ideas from RStudio's FindLibR.cmake
|
||||||
|
#
|
||||||
|
# Defines the following:
|
||||||
|
# LIBR_FOUND
|
||||||
|
# LIBR_HOME
|
||||||
|
# LIBR_EXECUTABLE
|
||||||
|
# LIBR_INCLUDE_DIRS
|
||||||
|
# LIBR_LIB_DIR
|
||||||
|
# LIBR_CORE_LIBRARY
|
||||||
|
# and a cmake function to create R.lib for MSVC
|
||||||
|
#
|
||||||
|
# The following could be provided by user through cmake's -D options:
|
||||||
|
# LIBR_EXECUTABLE (for unix and win)
|
||||||
|
# R_VERSION (for win)
|
||||||
|
# R_ARCH (for win 64 when want 32 bit build)
|
||||||
|
#
|
||||||
|
# TODO:
|
||||||
|
# - someone to verify OSX detection,
|
||||||
|
# - possibly, add OSX detection based on current R in PATH or LIBR_EXECUTABLE
|
||||||
|
# - improve registry-based R_HOME detection in Windows (from a set of R_VERSION's)
|
||||||
|
|
||||||
|
|
||||||
|
# Windows users might want to change this to their R version:
|
||||||
|
if(NOT R_VERSION)
|
||||||
|
set(R_VERSION "3.4.1")
|
||||||
|
endif()
|
||||||
|
if(NOT R_ARCH)
|
||||||
|
if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4")
|
||||||
|
set(R_ARCH "i386")
|
||||||
|
else()
|
||||||
|
set(R_ARCH "x64")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
# Creates R.lib and R.def in the build directory for linking with MSVC
|
||||||
|
function(create_rlib_for_msvc)
|
||||||
|
# various checks and warnings
|
||||||
|
if(NOT WIN32 OR NOT MSVC)
|
||||||
|
message(FATAL_ERROR "create_rlib_for_msvc() can only be used with MSVC")
|
||||||
|
endif()
|
||||||
|
if(NOT EXISTS "${LIBR_LIB_DIR}")
|
||||||
|
message(FATAL_ERROR "LIBR_LIB_DIR was not set!")
|
||||||
|
endif()
|
||||||
|
find_program(GENDEF_EXE gendef)
|
||||||
|
find_program(DLLTOOL_EXE dlltool)
|
||||||
|
if(NOT GENDEF_EXE OR NOT DLLTOOL_EXE)
|
||||||
|
message(FATAL_ERROR "\nEither gendef.exe or dlltool.exe not found!\
|
||||||
|
\nDo you have Rtools installed with its MinGW's bin/ in PATH?")
|
||||||
|
endif()
|
||||||
|
# extract symbols from R.dll into R.def and R.lib import library
|
||||||
|
execute_process(COMMAND gendef
|
||||||
|
"-" "${LIBR_LIB_DIR}/R.dll"
|
||||||
|
OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/R.def")
|
||||||
|
execute_process(COMMAND dlltool
|
||||||
|
"--input-def" "${CMAKE_CURRENT_BINARY_DIR}/R.def"
|
||||||
|
"--output-lib" "${CMAKE_CURRENT_BINARY_DIR}/R.lib")
|
||||||
|
endfunction(create_rlib_for_msvc)
|
||||||
|
|
||||||
|
|
||||||
|
# detection for OSX
|
||||||
|
if(APPLE)
|
||||||
|
|
||||||
|
find_library(LIBR_LIBRARIES R)
|
||||||
|
|
||||||
|
if(LIBR_LIBRARIES MATCHES ".*\\.framework")
|
||||||
|
set(LIBR_HOME "${LIBR_LIBRARIES}/Resources" CACHE PATH "R home directory")
|
||||||
|
set(LIBR_INCLUDE_DIRS "${LIBR_HOME}/include" CACHE PATH "R include directory")
|
||||||
|
set(LIBR_EXECUTABLE "${LIBR_HOME}/R" CACHE PATH "R executable")
|
||||||
|
set(LIBR_LIB_DIR "${LIBR_HOME}/lib" CACHE PATH "R lib directory")
|
||||||
|
else()
|
||||||
|
get_filename_component(_LIBR_LIBRARIES "${LIBR_LIBRARIES}" REALPATH)
|
||||||
|
get_filename_component(_LIBR_LIBRARIES_DIR "${_LIBR_LIBRARIES}" DIRECTORY)
|
||||||
|
set(LIBR_EXECUTABLE "${_LIBR_LIBRARIES_DIR}/../bin/R")
|
||||||
|
execute_process(
|
||||||
|
COMMAND ${LIBR_EXECUTABLE} "--slave" "--vanilla" "-e" "cat(R.home())"
|
||||||
|
OUTPUT_VARIABLE LIBR_HOME)
|
||||||
|
set(LIBR_HOME ${LIBR_HOME} CACHE PATH "R home directory")
|
||||||
|
set(LIBR_INCLUDE_DIRS "${LIBR_HOME}/include" CACHE PATH "R include directory")
|
||||||
|
set(LIBR_LIB_DIR "${LIBR_HOME}/lib" CACHE PATH "R lib directory")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# detection for UNIX & Win32
|
||||||
|
else()
|
||||||
|
|
||||||
|
# attempt to find R executable
|
||||||
|
if(NOT LIBR_EXECUTABLE)
|
||||||
|
find_program(LIBR_EXECUTABLE R)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(UNIX)
|
||||||
|
|
||||||
|
if(NOT LIBR_EXECUTABLE)
|
||||||
|
message(FATAL_ERROR "Unable to locate R executable.\
|
||||||
|
\nEither add its location to PATH or provide it through the LIBR_EXECUTABLE cmake variable")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# ask R for the home path
|
||||||
|
execute_process(
|
||||||
|
COMMAND ${LIBR_EXECUTABLE} "--slave" "--vanilla" "-e" "cat(R.home())"
|
||||||
|
OUTPUT_VARIABLE LIBR_HOME
|
||||||
|
)
|
||||||
|
# ask R for the include dir
|
||||||
|
execute_process(
|
||||||
|
COMMAND ${LIBR_EXECUTABLE} "--slave" "--no-save" "-e" "cat(R.home('include'))"
|
||||||
|
OUTPUT_VARIABLE LIBR_INCLUDE_DIRS
|
||||||
|
)
|
||||||
|
# ask R for the lib dir
|
||||||
|
execute_process(
|
||||||
|
COMMAND ${LIBR_EXECUTABLE} "--slave" "--no-save" "-e" "cat(R.home('lib'))"
|
||||||
|
OUTPUT_VARIABLE LIBR_LIB_DIR
|
||||||
|
)
|
||||||
|
|
||||||
|
# Windows
|
||||||
|
else()
|
||||||
|
# ask R for R_HOME
|
||||||
|
if(LIBR_EXECUTABLE)
|
||||||
|
execute_process(
|
||||||
|
COMMAND ${LIBR_EXECUTABLE} "--slave" "--no-save" "-e" "cat(normalizePath(R.home(), winslash='/')"
|
||||||
|
OUTPUT_VARIABLE LIBR_HOME)
|
||||||
|
endif()
|
||||||
|
# if R executable not available, query R_HOME path from registry
|
||||||
|
if(NOT LIBR_HOME)
|
||||||
|
get_filename_component(LIBR_HOME
|
||||||
|
"[HKEY_LOCAL_MACHINE\\SOFTWARE\\R-core\\R\\${R_VERSION};InstallPath]"
|
||||||
|
ABSOLUTE)
|
||||||
|
if(NOT LIBR_HOME)
|
||||||
|
message(FATAL_ERROR "\nUnable to locate R executable.\
|
||||||
|
\nEither add its location to PATH or provide it through the LIBR_EXECUTABLE cmake variable")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
# set exe location based on R_ARCH
|
||||||
|
if(NOT LIBR_EXECUTABLE)
|
||||||
|
set(LIBR_EXECUTABLE "${LIBR_HOME}/bin/${R_ARCH}/R.exe")
|
||||||
|
endif()
|
||||||
|
# set other R paths based on home path
|
||||||
|
set(LIBR_INCLUDE_DIRS "${LIBR_HOME}/include")
|
||||||
|
set(LIBR_LIB_DIR "${LIBR_HOME}/bin/${R_ARCH}")
|
||||||
|
|
||||||
|
endif()
|
||||||
|
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(WIN32 AND MSVC)
|
||||||
|
# create a local R.lib import library for R.dll if it doesn't exist
|
||||||
|
if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/R.lib")
|
||||||
|
create_rlib_for_msvc()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# look for the core R library
|
||||||
|
find_library(LIBR_CORE_LIBRARY NAMES R
|
||||||
|
HINTS "${CMAKE_CURRENT_BINARY_DIR}" "${LIBR_LIB_DIR}" "${LIBR_HOME}/bin" "${LIBR_LIBRARIES}")
|
||||||
|
if(LIBR_CORE_LIBRARY-NOTFOUND)
|
||||||
|
message(STATUS "Could not find R core shared library.")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(LIBR_HOME ${LIBR_HOME} CACHE PATH "R home directory")
|
||||||
|
set(LIBR_EXECUTABLE ${LIBR_EXECUTABLE} CACHE PATH "R executable")
|
||||||
|
set(LIBR_INCLUDE_DIRS ${LIBR_INCLUDE_DIRS} CACHE PATH "R include directory")
|
||||||
|
set(LIBR_LIB_DIR ${LIBR_LIB_DIR} CACHE PATH "R shared libraries directory")
|
||||||
|
set(LIBR_CORE_LIBRARY ${LIBR_CORE_LIBRARY} CACHE PATH "R core shared library")
|
||||||
|
|
||||||
|
# define find requirements
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
find_package_handle_standard_args(LibR DEFAULT_MSG
|
||||||
|
LIBR_HOME
|
||||||
|
LIBR_EXECUTABLE
|
||||||
|
LIBR_INCLUDE_DIRS
|
||||||
|
LIBR_LIB_DIR
|
||||||
|
LIBR_CORE_LIBRARY
|
||||||
|
)
|
||||||
|
|
||||||
|
if(LIBR_FOUND)
|
||||||
|
message(STATUS "Found R: ${LIBR_EXECUTABLE}")
|
||||||
|
endif()
|
||||||
50
doc/build.md
50
doc/build.md
@ -134,9 +134,9 @@ Other versions of Visual Studio may work but are untested.
|
|||||||
|
|
||||||
### Building with GPU support
|
### Building with GPU support
|
||||||
|
|
||||||
XGBoost can be built with GPU support for both Linux and Windows using cmake. GPU support works with the Python package as well as the CLI version. The R package is not yet supported.
|
XGBoost can be built with GPU support for both Linux and Windows using cmake. GPU support works with the Python package as well as the CLI version. See [Installing R package with GPU support](#installing-r-package-with-gpu-support) for special instructions for R.
|
||||||
|
|
||||||
An up-to-date version of the cuda toolkit is required.
|
An up-to-date version of the CUDA toolkit is required.
|
||||||
|
|
||||||
From the command line on Linux starting from the xgboost directory:
|
From the command line on Linux starting from the xgboost directory:
|
||||||
|
|
||||||
@ -146,7 +146,9 @@ $ cd build
|
|||||||
$ cmake .. -DUSE_CUDA=ON
|
$ cmake .. -DUSE_CUDA=ON
|
||||||
$ make -j
|
$ make -j
|
||||||
```
|
```
|
||||||
On Windows using cmake, see what options for Generators you have for cmake, and choose one with [arch] replaced by Win64:
|
**Windows requirements** for GPU build: only Visual C++ 2015 or 2013 with CUDA v8.0 were fully tested. Either install Visual C++ 2015 Build Tools separately, or as a part of Visual Studio 2015. If you already have Visual Studio 2017, the Visual C++ 2015 Toolchain componenet has to be installed using the VS 2017 Installer. Likely, you would need to use the VS2015 x64 Native Tools command prompt to run the cmake commands given below. In some situations, however, things run just fine from MSYS2 bash command line.
|
||||||
|
|
||||||
|
On Windows, using cmake, see what options for Generators you have for cmake, and choose one with [arch] replaced by Win64:
|
||||||
```bash
|
```bash
|
||||||
cmake -help
|
cmake -help
|
||||||
```
|
```
|
||||||
@ -156,9 +158,15 @@ $ mkdir build
|
|||||||
$ cd build
|
$ cd build
|
||||||
$ cmake .. -G"Visual Studio 14 2015 Win64" -DUSE_CUDA=ON
|
$ cmake .. -G"Visual Studio 14 2015 Win64" -DUSE_CUDA=ON
|
||||||
```
|
```
|
||||||
Cmake will create an xgboost.sln solution file in the build directory. Build this solution in release mode as a x64 build.
|
To speed up compilation, compute version specific to your GPU could be passed to cmake as, e.g., `-DGPU_COMPUTE_VER=50`.
|
||||||
|
The above cmake configuration run will create an xgboost.sln solution file in the build directory. Build this solution in release mode as a x64 build, either from Visual studio or from command line:
|
||||||
|
```
|
||||||
|
cmake --build . --target xgboost --config Release
|
||||||
|
```
|
||||||
|
If build seems to use only a single process, you might try to append an option like ` -- /m:6` to the above command.
|
||||||
|
|
||||||
### Windows Binaries
|
### Windows Binaries
|
||||||
|
|
||||||
Unofficial windows binaries and instructions on how to use them are hosted on [Guido Tapia's blog](http://www.picnet.com.au/blogs/guido/post/2016/09/22/xgboost-windows-x64-binaries-for-download/)
|
Unofficial windows binaries and instructions on how to use them are hosted on [Guido Tapia's blog](http://www.picnet.com.au/blogs/guido/post/2016/09/22/xgboost-windows-x64-binaries-for-download/)
|
||||||
|
|
||||||
### Customized Building
|
### Customized Building
|
||||||
@ -273,8 +281,42 @@ setwd('wherever/you/cloned/it/xgboost/R-package/')
|
|||||||
install.packages('.', repos = NULL, type="source")
|
install.packages('.', repos = NULL, type="source")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The package could also be built and installed with cmake (and Visual C++ 2015 on Windows) using instructions from the next section, but without GPU support (omit the `-DUSE_CUDA=ON` cmake parameter).
|
||||||
|
|
||||||
If all fails, try [building the shared library](#build-the-shared-library) to see whether a problem is specific to R package or not.
|
If all fails, try [building the shared library](#build-the-shared-library) to see whether a problem is specific to R package or not.
|
||||||
|
|
||||||
|
### Installing R package with GPU support
|
||||||
|
|
||||||
|
The procedure and requirements are similar as in [Building with GPU support](#building-with-gpu-support), so make sure to read it first.
|
||||||
|
|
||||||
|
On Linux, starting from the xgboost directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -DUSE_CUDA=ON -DR_LIB=ON
|
||||||
|
make install -j
|
||||||
|
```
|
||||||
|
When default target is used, an R package shared library would be built in the `build` area.
|
||||||
|
The `install` target, in addition, assembles the package files with this shared library under `build/R-package`, and runs `R CMD INSTALL`.
|
||||||
|
|
||||||
|
On Windows, cmake with Visual C++ Build Tools (or Visual Studio) has to be used to build an R package with GPU support. Rtools must also be installed (perhaps, some other MinGW distributions with `gendef.exe` and `dlltool.exe` would work, but that was not tested).
|
||||||
|
```bash
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -G"Visual Studio 14 2015 Win64" -DUSE_CUDA=ON -DR_LIB=ON
|
||||||
|
cmake --build . --target install --config Release
|
||||||
|
```
|
||||||
|
When `--target xgboost` is used, an R package dll would be built under `build/Release`.
|
||||||
|
The `--target install`, in addition, assembles the package files with this dll under `build/R-package`, and runs `R CMD INSTALL`.
|
||||||
|
|
||||||
|
If cmake can't find your R during the configuration step, you might provide the location of its executable to cmake like this: `-DLIBR_EXECUTABLE="C:/Program Files/R/R-3.4.1/bin/x64/R.exe"`.
|
||||||
|
|
||||||
|
If on Windows you get a "permission denied" error when trying to write to ...Program Files/R/... during the package installation, create a `.Rprofile` file in your personal home directory (if you don't already have one in there), and add a line to it which specifies the location of your R packages user library, like the following:
|
||||||
|
```r
|
||||||
|
.libPaths( unique(c("C:/Users/USERNAME/Documents/R/win-library/3.4", .libPaths())))
|
||||||
|
```
|
||||||
|
You might find the exact location by running `.libPaths()` in R GUI or RStudio.
|
||||||
|
|
||||||
## Trouble Shooting
|
## Trouble Shooting
|
||||||
|
|
||||||
|
|||||||
@ -57,9 +57,17 @@ Yes, xgboost implements LambdaMART. Checkout the objective section in [parameter
|
|||||||
How to deal with Missing Value
|
How to deal with Missing Value
|
||||||
------------------------------
|
------------------------------
|
||||||
xgboost supports missing value by default.
|
xgboost supports missing value by default.
|
||||||
|
In tree algorithms, branch directions for missing values are learned during training.
|
||||||
|
Note that the gblinear booster treats missing values as zeros.
|
||||||
|
|
||||||
|
|
||||||
Slightly different result between runs
|
Slightly different result between runs
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
This could happen, due to non-determinism in floating point summation order and multi-threading.
|
This could happen, due to non-determinism in floating point summation order and multi-threading.
|
||||||
Though the general accuracy will usually remain the same.
|
Though the general accuracy will usually remain the same.
|
||||||
|
|
||||||
|
|
||||||
|
Why do I see different results with sparse and dense data?
|
||||||
|
--------------------------------------------------------
|
||||||
|
"Sparse" elements are treated as if they were "missing" by the tree booster, and as zeros by the linear booster.
|
||||||
|
For tree models, it is important to use consistent data formats during training and scoring.
|
||||||
@ -199,9 +199,11 @@ inline void dense2sparse_tree(RegTree* p_tree,
|
|||||||
|
|
||||||
struct BernoulliRng {
|
struct BernoulliRng {
|
||||||
float p;
|
float p;
|
||||||
int seed;
|
uint32_t seed;
|
||||||
|
|
||||||
__host__ __device__ BernoulliRng(float p, int seed) : p(p), seed(seed) {}
|
__host__ __device__ BernoulliRng(float p, size_t seed_) : p(p) {
|
||||||
|
seed = static_cast<uint32_t>(seed_);
|
||||||
|
}
|
||||||
|
|
||||||
__host__ __device__ bool operator()(const int i) const {
|
__host__ __device__ bool operator()(const int i) const {
|
||||||
thrust::default_random_engine rng(seed);
|
thrust::default_random_engine rng(seed);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user