diff --git a/CMakeLists.txt b/CMakeLists.txt index 2014d6574..b7b9d665d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,7 @@ cmake_minimum_required (VERSION 3.2) project(xgboost) include(cmake/Utils.cmake) +list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules") find_package(OpenMP) set_default_configuration_release() @@ -10,6 +11,7 @@ msvc_use_static_runtime() option(USE_CUDA "Build with GPU acceleration") option(JVM_BINDINGS "Build JVM bindings" OFF) option(GOOGLE_TEST "Build google tests" OFF) +option(R_LIB "Build shared library for R package" OFF) set(GPU_COMPUTE_VER 35;50;52;60;61 CACHE STRING "Space separated list of compute versions to be built against") @@ -34,6 +36,19 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funroll-loops") endif() +# compiled code customizations for R package +if(R_LIB) + add_definitions( + -DXGBOOST_STRICT_R_MODE=1 + -DXGBOOST_CUSTOMIZE_GLOBAL_PRNG=1 + -DDMLC_LOG_BEFORE_THROW=0 + -DDMLC_DISABLE_STDIN=1 + -DDMLC_LOG_CUSTOMIZE=1 + -DRABIT_CUSTOMIZE_MSG_ + -DRABIT_STRICT_CXX98_ + ) +endif() + include_directories ( ${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/dmlc-core/include @@ -66,7 +81,7 @@ set(RABIT_EMPTY_SOURCES rabit/src/engine_empty.cc rabit/src/c_api.cc ) -if(MINGW) +if(MINGW OR R_LIB) # build a dummy rabit library add_library(rabit STATIC ${RABIT_EMPTY_SOURCES}) else() @@ -78,6 +93,7 @@ endif() add_subdirectory(dmlc-core) set(LINK_LIBRARIES dmlccore rabit) + if(USE_CUDA) find_package(CUDA 7.5 REQUIRED) cmake_minimum_required(VERSION 3.5) @@ -102,27 +118,65 @@ if(USE_CUDA) list(APPEND LINK_LIBRARIES gpuxgboost) endif() -add_library(objxgboost OBJECT ${SOURCES}) -# Executable -add_executable(runxgboost $ src/cli_main.cc) -set_target_properties(runxgboost PROPERTIES - OUTPUT_NAME xgboost -) -set_output_directory(runxgboost ${PROJECT_SOURCE_DIR}) -target_link_libraries(runxgboost ${LINK_LIBRARIES}) - -# Shared library -add_library(xgboost SHARED $) -target_link_libraries(xgboost ${LINK_LIBRARIES}) -set_output_directory(xgboost ${PROJECT_SOURCE_DIR}/lib) -if(MINGW) - # remove the 'lib' prefix to conform to windows convention for shared library names - set_target_properties(xgboost PROPERTIES PREFIX "") +# flags and sources for R-package +if(R_LIB) + file(GLOB_RECURSE R_SOURCES + R-package/src/*.h + R-package/src/*.c + R-package/src/*.cc + ) + list(APPEND SOURCES ${R_SOURCES}) +endif() + +add_library(objxgboost OBJECT ${SOURCES}) + + +# building shared library for R package +if(R_LIB) + find_package(LibR REQUIRED) + + list(APPEND LINK_LIBRARIES "${LIBR_CORE_LIBRARY}") + MESSAGE(STATUS "LIBR_CORE_LIBRARY " ${LIBR_CORE_LIBRARY}) + + include_directories( + "${LIBR_INCLUDE_DIRS}" + "${PROJECT_SOURCE_DIR}" + ) + + # Shared library target for the R package + add_library(xgboost SHARED $) + target_link_libraries(xgboost ${LINK_LIBRARIES}) + # R uses no lib prefix in shared library names of its packages + set_target_properties(xgboost PROPERTIES PREFIX "") + + setup_rpackage_install_target(xgboost ${CMAKE_CURRENT_BINARY_DIR}) + # use a dummy location for any other remaining installs + set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/dummy_inst") + +# main targets: shared library & exe +else() + # Executable + add_executable(runxgboost $ src/cli_main.cc) + set_target_properties(runxgboost PROPERTIES + OUTPUT_NAME xgboost + ) + set_output_directory(runxgboost ${PROJECT_SOURCE_DIR}) + target_link_libraries(runxgboost ${LINK_LIBRARIES}) + + # Shared library + add_library(xgboost SHARED $) + target_link_libraries(xgboost ${LINK_LIBRARIES}) + set_output_directory(xgboost ${PROJECT_SOURCE_DIR}/lib) + if(MINGW) + # remove the 'lib' prefix to conform to windows convention for shared library names + set_target_properties(xgboost PROPERTIES PREFIX "") + endif() + + #Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names + add_dependencies(xgboost runxgboost) endif() -#Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names -add_dependencies(xgboost runxgboost) # JVM if(JVM_BINDINGS) @@ -139,6 +193,7 @@ if(JVM_BINDINGS) ${JAVA_JVM_LIBRARY}) endif() + # Test if(GOOGLE_TEST) enable_testing() @@ -162,5 +217,6 @@ if(GOOGLE_TEST) add_test(TestXGBoost testxgboost) endif() + # Group sources auto_source_group("${SOURCES}") diff --git a/NEWS.md b/NEWS.md index 750aa00b7..61336f45d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,15 +15,19 @@ This file records the changes in xgboost library in reverse chronological order. - Thread local variable is upgraded so it is automatically freed at thread exit. * Migrate to C++11 - The current master version now requires C++11 enabled compiled(g++4.8 or higher) +* Predictor interface was factored out (in a manner similar to the updater interface). * New functionality - Ability to adjust tree model's statistics to a new dataset without changing tree structures. - Extracting feature contributions to individual predictions. + - Faster, histogram-based tree algorithm (`tree_method='hist'`) . + - GPU/CUDA accelerated tree algorithms (`tree_method='gpu_hist'` or `'gpu_exact'`), including the GPU-based predictor. * R package: - New parameters: - `silent` in `xgb.DMatrix()` - `use_int_id` in `xgb.model.dt.tree()` - `predcontrib` in `predict()` - Default value of the `save_period` parameter in `xgboost()` changed to NULL (consistent with `xgb.train()`). + - It's possible to custom-build the R package with GPU acceleration support. ## v0.6 (2016.07.29) * Version 0.5 is skipped due to major improvements in the core diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index d838ce4e5..b39385bc4 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 0.6.4.6 -Date: 2017-01-04 +Version: 0.6.4.7 +Date: 2017-09-25 Author: Tianqi Chen , Tong He , Michael Benesty , Vadim Khotilovich , Yuan Tang diff --git a/R-package/demo/00Index b/R-package/demo/00Index index 025a4b472..3ecb7966c 100644 --- a/R-package/demo/00Index +++ b/R-package/demo/00Index @@ -10,3 +10,4 @@ predict_leaf_indices Predicting the corresponding leaves early_stopping Early Stop in training poisson_regression Poisson Regression on count data tweedie_regression Tweddie Regression +gpu_accelerated GPU-accelerated tree building algorithms \ No newline at end of file diff --git a/R-package/demo/README.md b/R-package/demo/README.md index 5ee1aa797..e53afeaea 100644 --- a/R-package/demo/README.md +++ b/R-package/demo/README.md @@ -8,6 +8,7 @@ XGBoost R Feature Walkthrough * [Generalized Linear Model](generalized_linear_model.R) * [Cross validation](cross_validation.R) * [Create a sparse matrix from a dense one](create_sparse_matrix.R) +* [Use GPU-accelerated tree building algorithms](gpu_accelerated.R) Benchmarks ==== diff --git a/R-package/demo/gpu_accelerated.R b/R-package/demo/gpu_accelerated.R new file mode 100644 index 000000000..770148a5d --- /dev/null +++ b/R-package/demo/gpu_accelerated.R @@ -0,0 +1,45 @@ +# An example of using GPU-accelerated tree building algorithms +# +# NOTE: it can only run if you have a CUDA-enable GPU and the package was +# specially compiled with GPU support. +# +# For the current functionality, see +# https://xgboost.readthedocs.io/en/latest/gpu/index.html +# + +library('xgboost') + +# Simulate N x p random matrix with some binomial response dependent on pp columns +set.seed(111) +N <- 1000000 +p <- 50 +pp <- 25 +X <- matrix(runif(N * p), ncol = p) +betas <- 2 * runif(pp) - 1 +sel <- sort(sample(p, pp)) +m <- X[, sel] %*% betas - 1 + rnorm(N) +y <- rbinom(N, 1, plogis(m)) + +tr <- sample.int(N, N * 0.75) +dtrain <- xgb.DMatrix(X[tr,], label = y[tr]) +dtest <- xgb.DMatrix(X[-tr,], label = y[-tr]) +wl <- list(train = dtrain, test = dtest) + +# An example of running 'gpu_hist' algorithm +# which is +# - similar to the 'hist' +# - the fastest option for moderately large datasets +# - current limitations: max_depth < 16, does not implement guided loss +# You can use tree_method = 'gpu_exact' for another GPU accelerated algorithm, +# which is slower, more memory-hungry, but does not use binning. +param <- list(objective = 'reg:logistic', eval_metric = 'auc', subsample = 0.5, nthread = 4, + max_bin = 64, tree_method = 'gpu_hist') +pt <- proc.time() +bst_gpu <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50) +proc.time() - pt + +# Compare to the 'hist' algorithm: +param$tree_method <- 'hist' +pt <- proc.time() +bst_hist <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50) +proc.time() - pt diff --git a/R-package/demo/runall.R b/R-package/demo/runall.R index 614f61116..0c1392ac8 100644 --- a/R-package/demo/runall.R +++ b/R-package/demo/runall.R @@ -10,4 +10,5 @@ demo(predict_leaf_indices) demo(early_stopping) demo(poisson_regression) demo(caret_wrapper) -demo(tweedie_regression) \ No newline at end of file +demo(tweedie_regression) +#demo(gpu_accelerated) # can only run when built with GPU support \ No newline at end of file diff --git a/R-package/src/init.c b/R-package/src/init.c index c19e618b9..c07fa99eb 100644 --- a/R-package/src/init.c +++ b/R-package/src/init.c @@ -68,6 +68,9 @@ static const R_CallMethodDef CallEntries[] = { {NULL, NULL, 0} }; +#if defined(_WIN32) +__declspec(dllexport) +#endif void R_init_xgboost(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index 7bf3bc0a8..5281ef6bd 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -112,7 +112,7 @@ SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, col_ptr_[i] = static_cast(p_indptr[i]); } #pragma omp parallel for schedule(static) - for (size_t i = 0; i < ndata; ++i) { + for (int64_t i = 0; i < static_cast(ndata); ++i) { indices_[i] = static_cast(p_indices[i]); data_[i] = static_cast(p_data[i]); } diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index eca94c687..75cbebb1f 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -59,4 +59,28 @@ function(format_gencode_flags flags out) set(${out} "${${out}}-gencode arch=compute_${ver},code=sm_${ver};") endforeach() set(${out} "${${out}}" PARENT_SCOPE) -endfunction(format_gencode_flags flags) \ No newline at end of file +endfunction(format_gencode_flags flags) + +# Assembles the R-package files in build_dir; +# if necessary, installs the main R package dependencies; +# runs R CMD INSTALL. +function(setup_rpackage_install_target rlib_target build_dir) + install(CODE "file(REMOVE_RECURSE \"${build_dir}/R-package\")") + install( + DIRECTORY "${PROJECT_SOURCE_DIR}/R-package" + DESTINATION "${build_dir}" + REGEX "src/*" EXCLUDE + REGEX "R-package/configure" EXCLUDE + ) + install(TARGETS ${rlib_target} + LIBRARY DESTINATION "${build_dir}/R-package/src/" + RUNTIME DESTINATION "${build_dir}/R-package/src/") + install(CODE "file(WRITE \"${build_dir}/R-package/src/Makevars\" \"all:\")") + install(CODE "file(WRITE \"${build_dir}/R-package/src/Makevars.win\" \"all:\")") + set(XGB_DEPS_SCRIPT + "deps = setdiff(c('statar','data.table', 'magrittr', 'stringi'), rownames(installed.packages()));\ + if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')") + install(CODE "execute_process(COMMAND \"${LIBR_EXECUTABLE}\" \"-q\" \"-e\" \"${XGB_DEPS_SCRIPT}\")") + install(CODE "execute_process(COMMAND \"${LIBR_EXECUTABLE}\" CMD INSTALL\ + \"--no-multiarch\" \"${build_dir}/R-package\")") +endfunction(setup_rpackage_install_target) \ No newline at end of file diff --git a/cmake/modules/FindLibR.cmake b/cmake/modules/FindLibR.cmake new file mode 100644 index 000000000..dbb65006d --- /dev/null +++ b/cmake/modules/FindLibR.cmake @@ -0,0 +1,177 @@ +# CMake module for R +# Borrows ideas from RStudio's FindLibR.cmake +# +# Defines the following: +# LIBR_FOUND +# LIBR_HOME +# LIBR_EXECUTABLE +# LIBR_INCLUDE_DIRS +# LIBR_LIB_DIR +# LIBR_CORE_LIBRARY +# and a cmake function to create R.lib for MSVC +# +# The following could be provided by user through cmake's -D options: +# LIBR_EXECUTABLE (for unix and win) +# R_VERSION (for win) +# R_ARCH (for win 64 when want 32 bit build) +# +# TODO: +# - someone to verify OSX detection, +# - possibly, add OSX detection based on current R in PATH or LIBR_EXECUTABLE +# - improve registry-based R_HOME detection in Windows (from a set of R_VERSION's) + + +# Windows users might want to change this to their R version: +if(NOT R_VERSION) + set(R_VERSION "3.4.1") +endif() +if(NOT R_ARCH) + if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4") + set(R_ARCH "i386") + else() + set(R_ARCH "x64") + endif() +endif() + + +# Creates R.lib and R.def in the build directory for linking with MSVC +function(create_rlib_for_msvc) + # various checks and warnings + if(NOT WIN32 OR NOT MSVC) + message(FATAL_ERROR "create_rlib_for_msvc() can only be used with MSVC") + endif() + if(NOT EXISTS "${LIBR_LIB_DIR}") + message(FATAL_ERROR "LIBR_LIB_DIR was not set!") + endif() + find_program(GENDEF_EXE gendef) + find_program(DLLTOOL_EXE dlltool) + if(NOT GENDEF_EXE OR NOT DLLTOOL_EXE) + message(FATAL_ERROR "\nEither gendef.exe or dlltool.exe not found!\ + \nDo you have Rtools installed with its MinGW's bin/ in PATH?") + endif() + # extract symbols from R.dll into R.def and R.lib import library + execute_process(COMMAND gendef + "-" "${LIBR_LIB_DIR}/R.dll" + OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/R.def") + execute_process(COMMAND dlltool + "--input-def" "${CMAKE_CURRENT_BINARY_DIR}/R.def" + "--output-lib" "${CMAKE_CURRENT_BINARY_DIR}/R.lib") +endfunction(create_rlib_for_msvc) + + +# detection for OSX +if(APPLE) + + find_library(LIBR_LIBRARIES R) + + if(LIBR_LIBRARIES MATCHES ".*\\.framework") + set(LIBR_HOME "${LIBR_LIBRARIES}/Resources" CACHE PATH "R home directory") + set(LIBR_INCLUDE_DIRS "${LIBR_HOME}/include" CACHE PATH "R include directory") + set(LIBR_EXECUTABLE "${LIBR_HOME}/R" CACHE PATH "R executable") + set(LIBR_LIB_DIR "${LIBR_HOME}/lib" CACHE PATH "R lib directory") + else() + get_filename_component(_LIBR_LIBRARIES "${LIBR_LIBRARIES}" REALPATH) + get_filename_component(_LIBR_LIBRARIES_DIR "${_LIBR_LIBRARIES}" DIRECTORY) + set(LIBR_EXECUTABLE "${_LIBR_LIBRARIES_DIR}/../bin/R") + execute_process( + COMMAND ${LIBR_EXECUTABLE} "--slave" "--vanilla" "-e" "cat(R.home())" + OUTPUT_VARIABLE LIBR_HOME) + set(LIBR_HOME ${LIBR_HOME} CACHE PATH "R home directory") + set(LIBR_INCLUDE_DIRS "${LIBR_HOME}/include" CACHE PATH "R include directory") + set(LIBR_LIB_DIR "${LIBR_HOME}/lib" CACHE PATH "R lib directory") + endif() + +# detection for UNIX & Win32 +else() + + # attempt to find R executable + if(NOT LIBR_EXECUTABLE) + find_program(LIBR_EXECUTABLE R) + endif() + + if(UNIX) + + if(NOT LIBR_EXECUTABLE) + message(FATAL_ERROR "Unable to locate R executable.\ + \nEither add its location to PATH or provide it through the LIBR_EXECUTABLE cmake variable") + endif() + + # ask R for the home path + execute_process( + COMMAND ${LIBR_EXECUTABLE} "--slave" "--vanilla" "-e" "cat(R.home())" + OUTPUT_VARIABLE LIBR_HOME + ) + # ask R for the include dir + execute_process( + COMMAND ${LIBR_EXECUTABLE} "--slave" "--no-save" "-e" "cat(R.home('include'))" + OUTPUT_VARIABLE LIBR_INCLUDE_DIRS + ) + # ask R for the lib dir + execute_process( + COMMAND ${LIBR_EXECUTABLE} "--slave" "--no-save" "-e" "cat(R.home('lib'))" + OUTPUT_VARIABLE LIBR_LIB_DIR + ) + + # Windows + else() + # ask R for R_HOME + if(LIBR_EXECUTABLE) + execute_process( + COMMAND ${LIBR_EXECUTABLE} "--slave" "--no-save" "-e" "cat(normalizePath(R.home(), winslash='/')" + OUTPUT_VARIABLE LIBR_HOME) + endif() + # if R executable not available, query R_HOME path from registry + if(NOT LIBR_HOME) + get_filename_component(LIBR_HOME + "[HKEY_LOCAL_MACHINE\\SOFTWARE\\R-core\\R\\${R_VERSION};InstallPath]" + ABSOLUTE) + if(NOT LIBR_HOME) + message(FATAL_ERROR "\nUnable to locate R executable.\ + \nEither add its location to PATH or provide it through the LIBR_EXECUTABLE cmake variable") + endif() + endif() + # set exe location based on R_ARCH + if(NOT LIBR_EXECUTABLE) + set(LIBR_EXECUTABLE "${LIBR_HOME}/bin/${R_ARCH}/R.exe") + endif() + # set other R paths based on home path + set(LIBR_INCLUDE_DIRS "${LIBR_HOME}/include") + set(LIBR_LIB_DIR "${LIBR_HOME}/bin/${R_ARCH}") + + endif() + +endif() + +if(WIN32 AND MSVC) + # create a local R.lib import library for R.dll if it doesn't exist + if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/R.lib") + create_rlib_for_msvc() + endif() +endif() + +# look for the core R library +find_library(LIBR_CORE_LIBRARY NAMES R + HINTS "${CMAKE_CURRENT_BINARY_DIR}" "${LIBR_LIB_DIR}" "${LIBR_HOME}/bin" "${LIBR_LIBRARIES}") +if(LIBR_CORE_LIBRARY-NOTFOUND) + message(STATUS "Could not find R core shared library.") +endif() + +set(LIBR_HOME ${LIBR_HOME} CACHE PATH "R home directory") +set(LIBR_EXECUTABLE ${LIBR_EXECUTABLE} CACHE PATH "R executable") +set(LIBR_INCLUDE_DIRS ${LIBR_INCLUDE_DIRS} CACHE PATH "R include directory") +set(LIBR_LIB_DIR ${LIBR_LIB_DIR} CACHE PATH "R shared libraries directory") +set(LIBR_CORE_LIBRARY ${LIBR_CORE_LIBRARY} CACHE PATH "R core shared library") + +# define find requirements +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LibR DEFAULT_MSG + LIBR_HOME + LIBR_EXECUTABLE + LIBR_INCLUDE_DIRS + LIBR_LIB_DIR + LIBR_CORE_LIBRARY +) + +if(LIBR_FOUND) + message(STATUS "Found R: ${LIBR_EXECUTABLE}") +endif() diff --git a/doc/build.md b/doc/build.md index 58d2a5918..6b7e50675 100644 --- a/doc/build.md +++ b/doc/build.md @@ -134,9 +134,9 @@ Other versions of Visual Studio may work but are untested. ### Building with GPU support -XGBoost can be built with GPU support for both Linux and Windows using cmake. GPU support works with the Python package as well as the CLI version. The R package is not yet supported. +XGBoost can be built with GPU support for both Linux and Windows using cmake. GPU support works with the Python package as well as the CLI version. See [Installing R package with GPU support](#installing-r-package-with-gpu-support) for special instructions for R. -An up-to-date version of the cuda toolkit is required. +An up-to-date version of the CUDA toolkit is required. From the command line on Linux starting from the xgboost directory: @@ -146,7 +146,9 @@ $ cd build $ cmake .. -DUSE_CUDA=ON $ make -j ``` -On Windows using cmake, see what options for Generators you have for cmake, and choose one with [arch] replaced by Win64: +**Windows requirements** for GPU build: only Visual C++ 2015 or 2013 with CUDA v8.0 were fully tested. Either install Visual C++ 2015 Build Tools separately, or as a part of Visual Studio 2015. If you already have Visual Studio 2017, the Visual C++ 2015 Toolchain componenet has to be installed using the VS 2017 Installer. Likely, you would need to use the VS2015 x64 Native Tools command prompt to run the cmake commands given below. In some situations, however, things run just fine from MSYS2 bash command line. + +On Windows, using cmake, see what options for Generators you have for cmake, and choose one with [arch] replaced by Win64: ```bash cmake -help ``` @@ -156,9 +158,15 @@ $ mkdir build $ cd build $ cmake .. -G"Visual Studio 14 2015 Win64" -DUSE_CUDA=ON ``` -Cmake will create an xgboost.sln solution file in the build directory. Build this solution in release mode as a x64 build. +To speed up compilation, compute version specific to your GPU could be passed to cmake as, e.g., `-DGPU_COMPUTE_VER=50`. +The above cmake configuration run will create an xgboost.sln solution file in the build directory. Build this solution in release mode as a x64 build, either from Visual studio or from command line: +``` +cmake --build . --target xgboost --config Release +``` +If build seems to use only a single process, you might try to append an option like ` -- /m:6` to the above command. ### Windows Binaries + Unofficial windows binaries and instructions on how to use them are hosted on [Guido Tapia's blog](http://www.picnet.com.au/blogs/guido/post/2016/09/22/xgboost-windows-x64-binaries-for-download/) ### Customized Building @@ -273,8 +281,42 @@ setwd('wherever/you/cloned/it/xgboost/R-package/') install.packages('.', repos = NULL, type="source") ``` +The package could also be built and installed with cmake (and Visual C++ 2015 on Windows) using instructions from the next section, but without GPU support (omit the `-DUSE_CUDA=ON` cmake parameter). + If all fails, try [building the shared library](#build-the-shared-library) to see whether a problem is specific to R package or not. +### Installing R package with GPU support + +The procedure and requirements are similar as in [Building with GPU support](#building-with-gpu-support), so make sure to read it first. + +On Linux, starting from the xgboost directory: + +```bash +mkdir build +cd build +cmake .. -DUSE_CUDA=ON -DR_LIB=ON +make install -j +``` +When default target is used, an R package shared library would be built in the `build` area. +The `install` target, in addition, assembles the package files with this shared library under `build/R-package`, and runs `R CMD INSTALL`. + +On Windows, cmake with Visual C++ Build Tools (or Visual Studio) has to be used to build an R package with GPU support. Rtools must also be installed (perhaps, some other MinGW distributions with `gendef.exe` and `dlltool.exe` would work, but that was not tested). +```bash +mkdir build +cd build +cmake .. -G"Visual Studio 14 2015 Win64" -DUSE_CUDA=ON -DR_LIB=ON +cmake --build . --target install --config Release +``` +When `--target xgboost` is used, an R package dll would be built under `build/Release`. +The `--target install`, in addition, assembles the package files with this dll under `build/R-package`, and runs `R CMD INSTALL`. + +If cmake can't find your R during the configuration step, you might provide the location of its executable to cmake like this: `-DLIBR_EXECUTABLE="C:/Program Files/R/R-3.4.1/bin/x64/R.exe"`. + +If on Windows you get a "permission denied" error when trying to write to ...Program Files/R/... during the package installation, create a `.Rprofile` file in your personal home directory (if you don't already have one in there), and add a line to it which specifies the location of your R packages user library, like the following: +```r +.libPaths( unique(c("C:/Users/USERNAME/Documents/R/win-library/3.4", .libPaths()))) +``` +You might find the exact location by running `.libPaths()` in R GUI or RStudio. ## Trouble Shooting diff --git a/doc/faq.md b/doc/faq.md index 8abab9e36..253441bdd 100644 --- a/doc/faq.md +++ b/doc/faq.md @@ -57,9 +57,17 @@ Yes, xgboost implements LambdaMART. Checkout the objective section in [parameter How to deal with Missing Value ------------------------------ xgboost supports missing value by default. +In tree algorithms, branch directions for missing values are learned during training. +Note that the gblinear booster treats missing values as zeros. Slightly different result between runs -------------------------------------- This could happen, due to non-determinism in floating point summation order and multi-threading. Though the general accuracy will usually remain the same. + + +Why do I see different results with sparse and dense data? +-------------------------------------------------------- +"Sparse" elements are treated as if they were "missing" by the tree booster, and as zeros by the linear booster. +For tree models, it is important to use consistent data formats during training and scoring. \ No newline at end of file diff --git a/src/tree/updater_gpu_common.cuh b/src/tree/updater_gpu_common.cuh index 0b2035588..e31c692ae 100644 --- a/src/tree/updater_gpu_common.cuh +++ b/src/tree/updater_gpu_common.cuh @@ -199,9 +199,11 @@ inline void dense2sparse_tree(RegTree* p_tree, struct BernoulliRng { float p; - int seed; + uint32_t seed; - __host__ __device__ BernoulliRng(float p, int seed) : p(p), seed(seed) {} + __host__ __device__ BernoulliRng(float p, size_t seed_) : p(p) { + seed = static_cast(seed_); + } __host__ __device__ bool operator()(const int i) const { thrust::default_random_engine rng(seed);