enable ROCm on latest XGBoost
This commit is contained in:
commit
15421e40d9
20
.github/workflows/main.yml
vendored
20
.github/workflows/main.yml
vendored
@ -141,18 +141,14 @@ jobs:
|
|||||||
architecture: 'x64'
|
architecture: 'x64'
|
||||||
- name: Install Python packages
|
- name: Install Python packages
|
||||||
run: |
|
run: |
|
||||||
python -m pip install wheel setuptools cpplint pylint
|
python -m pip install wheel setuptools cmakelint cpplint pylint
|
||||||
- name: Run lint
|
- name: Run lint
|
||||||
run: |
|
run: |
|
||||||
python3 dmlc-core/scripts/lint.py xgboost cpp R-package/src
|
python3 tests/ci_build/lint_cpp.py xgboost cpp R-package/src
|
||||||
|
|
||||||
python3 dmlc-core/scripts/lint.py --exclude_path \
|
python3 tests/ci_build/lint_cpp.py xgboost cpp include src python-package \
|
||||||
python-package/xgboost/dmlc-core \
|
--exclude_path python-package/xgboost/dmlc-core python-package/xgboost/include \
|
||||||
python-package/xgboost/include \
|
python-package/xgboost/lib python-package/xgboost/rabit \
|
||||||
python-package/xgboost/lib \
|
python-package/xgboost/src
|
||||||
python-package/xgboost/rabit \
|
|
||||||
python-package/xgboost/src \
|
sh ./tests/ci_build/lint_cmake.sh || true
|
||||||
--pylint-rc python-package/.pylintrc \
|
|
||||||
xgboost \
|
|
||||||
cpp \
|
|
||||||
include src python-package
|
|
||||||
|
|||||||
2
.github/workflows/python_tests.yml
vendored
2
.github/workflows/python_tests.yml
vendored
@ -190,7 +190,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
mkdir build_msvc
|
mkdir build_msvc
|
||||||
cd build_msvc
|
cd build_msvc
|
||||||
cmake .. -G"Visual Studio 17 2022" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DBUILD_DEPRECATED_CLI=ON
|
cmake .. -G"Visual Studio 17 2022" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DBUILD_DEPRECATED_CLI=ON
|
||||||
cmake --build . --config Release --parallel $(nproc)
|
cmake --build . --config Release --parallel $(nproc)
|
||||||
|
|
||||||
- name: Install Python package
|
- name: Install Python package
|
||||||
|
|||||||
264
CMakeLists.txt
264
CMakeLists.txt
@ -8,9 +8,9 @@ cmake_policy(SET CMP0076 NEW)
|
|||||||
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
|
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
|
||||||
cmake_policy(SET CMP0063 NEW)
|
cmake_policy(SET CMP0063 NEW)
|
||||||
|
|
||||||
if ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
|
if((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
|
||||||
cmake_policy(SET CMP0077 NEW)
|
cmake_policy(SET CMP0077 NEW)
|
||||||
endif ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
|
endif()
|
||||||
|
|
||||||
message(STATUS "CMake version ${CMAKE_VERSION}")
|
message(STATUS "CMake version ${CMAKE_VERSION}")
|
||||||
|
|
||||||
@ -90,108 +90,99 @@ option(PLUGIN_UPDATER_ONEAPI "DPC++ updater" OFF)
|
|||||||
option(ADD_PKGCONFIG "Add xgboost.pc into system." ON)
|
option(ADD_PKGCONFIG "Add xgboost.pc into system." ON)
|
||||||
|
|
||||||
#-- Checks for building XGBoost
|
#-- Checks for building XGBoost
|
||||||
if (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
|
if(USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
|
||||||
message(SEND_ERROR "Do not enable `USE_DEBUG_OUTPUT' with release build.")
|
message(SEND_ERROR "Do not enable `USE_DEBUG_OUTPUT' with release build.")
|
||||||
endif (USE_DEBUG_OUTPUT AND (NOT (CMAKE_BUILD_TYPE MATCHES Debug)))
|
endif()
|
||||||
|
if(USE_NCCL AND NOT (USE_CUDA))
|
||||||
if (USE_NCCL AND NOT (USE_CUDA))
|
|
||||||
message(SEND_ERROR "`USE_NCCL` must be enabled with `USE_CUDA` flag.")
|
message(SEND_ERROR "`USE_NCCL` must be enabled with `USE_CUDA` flag.")
|
||||||
endif (USE_NCCL AND NOT (USE_CUDA))
|
endif()
|
||||||
if (USE_DEVICE_DEBUG AND NOT (USE_CUDA))
|
if(USE_DEVICE_DEBUG AND NOT (USE_CUDA))
|
||||||
message(SEND_ERROR "`USE_DEVICE_DEBUG` must be enabled with `USE_CUDA` flag.")
|
message(SEND_ERROR "`USE_DEVICE_DEBUG` must be enabled with `USE_CUDA` flag.")
|
||||||
endif (USE_DEVICE_DEBUG AND NOT (USE_CUDA))
|
endif()
|
||||||
if (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
|
if(BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
|
||||||
message(SEND_ERROR "Build XGBoost with -DUSE_NCCL=ON to enable BUILD_WITH_SHARED_NCCL.")
|
message(SEND_ERROR "Build XGBoost with -DUSE_NCCL=ON to enable BUILD_WITH_SHARED_NCCL.")
|
||||||
endif (BUILD_WITH_SHARED_NCCL AND (NOT USE_NCCL))
|
endif()
|
||||||
|
if(USE_RCCL AND NOT (USE_HIP))
|
||||||
if (USE_RCCL AND NOT (USE_HIP))
|
message(SEND_ERROR "`USE_RCCL` must be enabled with `USE_HIP` flag.")
|
||||||
message(SEND_ERROR "`USE_RCCL` must be enabled with `USE_HIP` flag.")
|
endif()
|
||||||
endif (USE_RCCL AND NOT (USE_HIP))
|
if(BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
|
||||||
if (USE_DEVICE_DEBUG AND NOT (USE_HIP))
|
|
||||||
message(SEND_ERROR "`USE_DEVICE_DEBUG` must be enabled with `USE_HIP` flag.")
|
|
||||||
endif (USE_DEVICE_DEBUG AND NOT (USE_HIP))
|
|
||||||
if (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
|
|
||||||
message(SEND_ERROR "Build XGBoost with -DUSE_RCCL=ON to enable BUILD_WITH_SHARED_RCCL.")
|
message(SEND_ERROR "Build XGBoost with -DUSE_RCCL=ON to enable BUILD_WITH_SHARED_RCCL.")
|
||||||
endif (BUILD_WITH_SHARED_RCCL AND (NOT USE_RCCL))
|
endif()
|
||||||
|
if(JVM_BINDINGS AND R_LIB)
|
||||||
if (JVM_BINDINGS AND R_LIB)
|
|
||||||
message(SEND_ERROR "`R_LIB' is not compatible with `JVM_BINDINGS' as they both have customized configurations.")
|
message(SEND_ERROR "`R_LIB' is not compatible with `JVM_BINDINGS' as they both have customized configurations.")
|
||||||
endif (JVM_BINDINGS AND R_LIB)
|
endif()
|
||||||
if (R_LIB AND GOOGLE_TEST)
|
if(R_LIB AND GOOGLE_TEST)
|
||||||
message(WARNING "Some C++ unittests will fail with `R_LIB` enabled,
|
message(
|
||||||
as R package redirects some functions to R runtime implementation.")
|
WARNING
|
||||||
endif (R_LIB AND GOOGLE_TEST)
|
"Some C++ tests will fail with `R_LIB` enabled, as R package redirects some functions to R runtime implementation."
|
||||||
if (PLUGIN_RMM AND NOT (USE_CUDA))
|
)
|
||||||
|
endif()
|
||||||
|
if(PLUGIN_RMM AND NOT (USE_CUDA))
|
||||||
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
|
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_CUDA` flag.")
|
||||||
endif (PLUGIN_RMM AND NOT (USE_CUDA))
|
endif()
|
||||||
|
if(PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
|
||||||
if (PLUGIN_RMM AND NOT (USE_HIP))
|
|
||||||
message(SEND_ERROR "`PLUGIN_RMM` must be enabled with `USE_HIP` flag.")
|
|
||||||
endif (PLUGIN_RMM AND NOT (USE_HIP))
|
|
||||||
|
|
||||||
if (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
|
|
||||||
message(SEND_ERROR "`PLUGIN_RMM` must be used with GCC or Clang compiler.")
|
message(SEND_ERROR "`PLUGIN_RMM` must be used with GCC or Clang compiler.")
|
||||||
endif (PLUGIN_RMM AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")))
|
endif()
|
||||||
if (PLUGIN_RMM AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux"))
|
if(PLUGIN_RMM AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux"))
|
||||||
message(SEND_ERROR "`PLUGIN_RMM` must be used with Linux.")
|
message(SEND_ERROR "`PLUGIN_RMM` must be used with Linux.")
|
||||||
endif (PLUGIN_RMM AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux"))
|
endif()
|
||||||
if (ENABLE_ALL_WARNINGS)
|
if(ENABLE_ALL_WARNINGS)
|
||||||
if ((NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
|
if((NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
|
||||||
message(SEND_ERROR "ENABLE_ALL_WARNINGS is only available for Clang and GCC.")
|
message(SEND_ERROR "ENABLE_ALL_WARNINGS is only available for Clang and GCC.")
|
||||||
endif ((NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
|
endif()
|
||||||
endif (ENABLE_ALL_WARNINGS)
|
endif()
|
||||||
if (BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
|
if(BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
|
||||||
message(SEND_ERROR "Cannot build a static library libxgboost.a when R or JVM packages are enabled.")
|
message(SEND_ERROR "Cannot build a static library libxgboost.a when R or JVM packages are enabled.")
|
||||||
endif (BUILD_STATIC_LIB AND (R_LIB OR JVM_BINDINGS))
|
endif()
|
||||||
if (PLUGIN_FEDERATED)
|
if(PLUGIN_FEDERATED)
|
||||||
if (CMAKE_CROSSCOMPILING)
|
if(CMAKE_CROSSCOMPILING)
|
||||||
message(SEND_ERROR "Cannot cross compile with federated learning support")
|
message(SEND_ERROR "Cannot cross compile with federated learning support")
|
||||||
endif ()
|
endif()
|
||||||
if (BUILD_STATIC_LIB)
|
if(BUILD_STATIC_LIB)
|
||||||
message(SEND_ERROR "Cannot build static lib with federated learning support")
|
message(SEND_ERROR "Cannot build static lib with federated learning support")
|
||||||
endif ()
|
endif()
|
||||||
if (R_LIB OR JVM_BINDINGS)
|
if(R_LIB OR JVM_BINDINGS)
|
||||||
message(SEND_ERROR "Cannot enable federated learning support when R or JVM packages are enabled.")
|
message(SEND_ERROR "Cannot enable federated learning support when R or JVM packages are enabled.")
|
||||||
endif ()
|
endif()
|
||||||
if (WIN32)
|
if(WIN32)
|
||||||
message(SEND_ERROR "Federated learning not supported for Windows platform")
|
message(SEND_ERROR "Federated learning not supported for Windows platform")
|
||||||
endif ()
|
endif()
|
||||||
endif ()
|
endif()
|
||||||
|
|
||||||
#-- Removed options
|
#-- Removed options
|
||||||
if (USE_AVX)
|
if(USE_AVX)
|
||||||
message(SEND_ERROR "The option `USE_AVX` is deprecated as experimental AVX features have been removed from XGBoost.")
|
message(SEND_ERROR "The option `USE_AVX` is deprecated as experimental AVX features have been removed from XGBoost.")
|
||||||
endif (USE_AVX)
|
endif()
|
||||||
if (PLUGIN_LZ4)
|
if(PLUGIN_LZ4)
|
||||||
message(SEND_ERROR "The option `PLUGIN_LZ4` is removed from XGBoost.")
|
message(SEND_ERROR "The option `PLUGIN_LZ4` is removed from XGBoost.")
|
||||||
endif (PLUGIN_LZ4)
|
endif()
|
||||||
if (RABIT_BUILD_MPI)
|
if(RABIT_BUILD_MPI)
|
||||||
message(SEND_ERROR "The option `RABIT_BUILD_MPI` has been removed from XGBoost.")
|
message(SEND_ERROR "The option `RABIT_BUILD_MPI` has been removed from XGBoost.")
|
||||||
endif (RABIT_BUILD_MPI)
|
endif()
|
||||||
if (USE_S3)
|
if(USE_S3)
|
||||||
message(SEND_ERROR "The option `USE_S3` has been removed from XGBoost")
|
message(SEND_ERROR "The option `USE_S3` has been removed from XGBoost")
|
||||||
endif (USE_S3)
|
endif()
|
||||||
if (USE_AZURE)
|
if(USE_AZURE)
|
||||||
message(SEND_ERROR "The option `USE_AZURE` has been removed from XGBoost")
|
message(SEND_ERROR "The option `USE_AZURE` has been removed from XGBoost")
|
||||||
endif (USE_AZURE)
|
endif()
|
||||||
if (USE_HDFS)
|
if(USE_HDFS)
|
||||||
message(SEND_ERROR "The option `USE_HDFS` has been removed from XGBoost")
|
message(SEND_ERROR "The option `USE_HDFS` has been removed from XGBoost")
|
||||||
endif (USE_HDFS)
|
endif()
|
||||||
|
|
||||||
#-- Sanitizer
|
#-- Sanitizer
|
||||||
if (USE_SANITIZER)
|
if(USE_SANITIZER)
|
||||||
include(cmake/Sanitizer.cmake)
|
include(cmake/Sanitizer.cmake)
|
||||||
enable_sanitizers("${ENABLED_SANITIZERS}")
|
enable_sanitizers("${ENABLED_SANITIZERS}")
|
||||||
endif (USE_SANITIZER)
|
endif()
|
||||||
|
|
||||||
if (USE_CUDA)
|
if(USE_CUDA)
|
||||||
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
|
set(USE_OPENMP ON CACHE BOOL "CUDA requires OpenMP" FORCE)
|
||||||
# `export CXX=' is ignored by CMake CUDA.
|
# `export CXX=' is ignored by CMake CUDA.
|
||||||
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
|
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
|
||||||
message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")
|
message(STATUS "Configured CUDA host compiler: ${CMAKE_CUDA_HOST_COMPILER}")
|
||||||
|
|
||||||
enable_language(CUDA)
|
enable_language(CUDA)
|
||||||
if (${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0)
|
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.0)
|
||||||
message(FATAL_ERROR "CUDA version must be at least 11.0!")
|
message(FATAL_ERROR "CUDA version must be at least 11.0!")
|
||||||
endif()
|
endif()
|
||||||
set(GEN_CODE "")
|
set(GEN_CODE "")
|
||||||
@ -199,7 +190,7 @@ if (USE_CUDA)
|
|||||||
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
|
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)
|
||||||
|
|
||||||
find_package(CUDAToolkit REQUIRED)
|
find_package(CUDAToolkit REQUIRED)
|
||||||
endif (USE_CUDA)
|
endif()
|
||||||
|
|
||||||
if (USE_HIP)
|
if (USE_HIP)
|
||||||
set(USE_OPENMP ON CACHE BOOL "HIP requires OpenMP" FORCE)
|
set(USE_OPENMP ON CACHE BOOL "HIP requires OpenMP" FORCE)
|
||||||
@ -218,7 +209,7 @@ if (USE_HIP)
|
|||||||
add_subdirectory(${PROJECT_SOURCE_DIR}/rocgputreeshap)
|
add_subdirectory(${PROJECT_SOURCE_DIR}/rocgputreeshap)
|
||||||
endif (USE_HIP)
|
endif (USE_HIP)
|
||||||
|
|
||||||
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
|
if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
|
||||||
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
|
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
|
||||||
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
|
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always")
|
||||||
@ -226,10 +217,10 @@ endif()
|
|||||||
|
|
||||||
find_package(Threads REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
if (USE_OPENMP)
|
if(USE_OPENMP)
|
||||||
if (APPLE)
|
if(APPLE)
|
||||||
find_package(OpenMP)
|
find_package(OpenMP)
|
||||||
if (NOT OpenMP_FOUND)
|
if(NOT OpenMP_FOUND)
|
||||||
# Try again with extra path info; required for libomp 15+ from Homebrew
|
# Try again with extra path info; required for libomp 15+ from Homebrew
|
||||||
execute_process(COMMAND brew --prefix libomp
|
execute_process(COMMAND brew --prefix libomp
|
||||||
OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
|
OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
|
||||||
@ -242,20 +233,20 @@ if (USE_OPENMP)
|
|||||||
set(OpenMP_CXX_LIB_NAMES omp)
|
set(OpenMP_CXX_LIB_NAMES omp)
|
||||||
set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
|
set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib)
|
||||||
find_package(OpenMP REQUIRED)
|
find_package(OpenMP REQUIRED)
|
||||||
endif ()
|
endif()
|
||||||
else ()
|
else()
|
||||||
find_package(OpenMP REQUIRED)
|
find_package(OpenMP REQUIRED)
|
||||||
endif ()
|
endif()
|
||||||
endif (USE_OPENMP)
|
endif()
|
||||||
#Add for IBM i
|
#Add for IBM i
|
||||||
if (${CMAKE_SYSTEM_NAME} MATCHES "OS400")
|
if(${CMAKE_SYSTEM_NAME} MATCHES "OS400")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||||
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> -X64 qc <TARGET> <OBJECTS>")
|
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> -X64 qc <TARGET> <OBJECTS>")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (USE_NCCL)
|
if(USE_NCCL)
|
||||||
find_package(Nccl REQUIRED)
|
find_package(Nccl REQUIRED)
|
||||||
endif (USE_NCCL)
|
endif()
|
||||||
|
|
||||||
if (USE_RCCL)
|
if (USE_RCCL)
|
||||||
find_package(rccl REQUIRED)
|
find_package(rccl REQUIRED)
|
||||||
@ -263,17 +254,19 @@ endif (USE_RCCL)
|
|||||||
|
|
||||||
# dmlc-core
|
# dmlc-core
|
||||||
msvc_use_static_runtime()
|
msvc_use_static_runtime()
|
||||||
if (FORCE_SHARED_CRT)
|
if(FORCE_SHARED_CRT)
|
||||||
set(DMLC_FORCE_SHARED_CRT ON)
|
set(DMLC_FORCE_SHARED_CRT ON)
|
||||||
endif ()
|
endif()
|
||||||
add_subdirectory(${xgboost_SOURCE_DIR}/dmlc-core)
|
add_subdirectory(${xgboost_SOURCE_DIR}/dmlc-core)
|
||||||
|
|
||||||
if (MSVC)
|
if(MSVC)
|
||||||
if (TARGET dmlc_unit_tests)
|
if(TARGET dmlc_unit_tests)
|
||||||
target_compile_options(dmlc_unit_tests PRIVATE
|
target_compile_options(
|
||||||
-D_CRT_SECURE_NO_WARNINGS -D_CRT_SECURE_NO_DEPRECATE)
|
dmlc_unit_tests PRIVATE
|
||||||
endif (TARGET dmlc_unit_tests)
|
-D_CRT_SECURE_NO_WARNINGS -D_CRT_SECURE_NO_DEPRECATE
|
||||||
endif (MSVC)
|
)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
# rabit
|
# rabit
|
||||||
add_subdirectory(rabit)
|
add_subdirectory(rabit)
|
||||||
@ -282,20 +275,25 @@ add_subdirectory(rabit)
|
|||||||
add_subdirectory(${xgboost_SOURCE_DIR}/src)
|
add_subdirectory(${xgboost_SOURCE_DIR}/src)
|
||||||
target_link_libraries(objxgboost PUBLIC dmlc)
|
target_link_libraries(objxgboost PUBLIC dmlc)
|
||||||
|
|
||||||
|
# Link -lstdc++fs for GCC 8.x
|
||||||
|
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0")
|
||||||
|
target_link_libraries(objxgboost PUBLIC stdc++fs)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Exports some R specific definitions and objects
|
# Exports some R specific definitions and objects
|
||||||
if (R_LIB)
|
if(R_LIB)
|
||||||
add_subdirectory(${xgboost_SOURCE_DIR}/R-package)
|
add_subdirectory(${xgboost_SOURCE_DIR}/R-package)
|
||||||
endif (R_LIB)
|
endif()
|
||||||
|
|
||||||
# This creates its own shared library `xgboost4j'.
|
# This creates its own shared library `xgboost4j'.
|
||||||
if (JVM_BINDINGS)
|
if(JVM_BINDINGS)
|
||||||
add_subdirectory(${xgboost_SOURCE_DIR}/jvm-packages)
|
add_subdirectory(${xgboost_SOURCE_DIR}/jvm-packages)
|
||||||
endif (JVM_BINDINGS)
|
endif()
|
||||||
|
|
||||||
# Plugin
|
# Plugin
|
||||||
add_subdirectory(${xgboost_SOURCE_DIR}/plugin)
|
add_subdirectory(${xgboost_SOURCE_DIR}/plugin)
|
||||||
|
|
||||||
if (PLUGIN_RMM)
|
if(PLUGIN_RMM)
|
||||||
find_package(rmm REQUIRED)
|
find_package(rmm REQUIRED)
|
||||||
|
|
||||||
# Patch the rmm targets so they reference the static cudart
|
# Patch the rmm targets so they reference the static cudart
|
||||||
@ -306,14 +304,14 @@ if (PLUGIN_RMM)
|
|||||||
list(APPEND rmm_link_libs CUDA::cudart_static)
|
list(APPEND rmm_link_libs CUDA::cudart_static)
|
||||||
set_target_properties(rmm::rmm PROPERTIES INTERFACE_LINK_LIBRARIES "${rmm_link_libs}")
|
set_target_properties(rmm::rmm PROPERTIES INTERFACE_LINK_LIBRARIES "${rmm_link_libs}")
|
||||||
get_target_property(rmm_link_libs rmm::rmm INTERFACE_LINK_LIBRARIES)
|
get_target_property(rmm_link_libs rmm::rmm INTERFACE_LINK_LIBRARIES)
|
||||||
endif (PLUGIN_RMM)
|
endif()
|
||||||
|
|
||||||
#-- library
|
#-- library
|
||||||
if (BUILD_STATIC_LIB)
|
if(BUILD_STATIC_LIB)
|
||||||
add_library(xgboost STATIC)
|
add_library(xgboost STATIC)
|
||||||
else (BUILD_STATIC_LIB)
|
else()
|
||||||
add_library(xgboost SHARED)
|
add_library(xgboost SHARED)
|
||||||
endif (BUILD_STATIC_LIB)
|
endif()
|
||||||
target_link_libraries(xgboost PRIVATE objxgboost)
|
target_link_libraries(xgboost PRIVATE objxgboost)
|
||||||
target_include_directories(xgboost
|
target_include_directories(xgboost
|
||||||
INTERFACE
|
INTERFACE
|
||||||
@ -322,7 +320,7 @@ target_include_directories(xgboost
|
|||||||
#-- End shared library
|
#-- End shared library
|
||||||
|
|
||||||
#-- CLI for xgboost
|
#-- CLI for xgboost
|
||||||
if (BUILD_DEPRECATED_CLI)
|
if(BUILD_DEPRECATED_CLI)
|
||||||
add_executable(runxgboost ${xgboost_SOURCE_DIR}/src/cli_main.cc)
|
add_executable(runxgboost ${xgboost_SOURCE_DIR}/src/cli_main.cc)
|
||||||
target_link_libraries(runxgboost PRIVATE objxgboost)
|
target_link_libraries(runxgboost PRIVATE objxgboost)
|
||||||
target_include_directories(runxgboost
|
target_include_directories(runxgboost
|
||||||
@ -336,12 +334,12 @@ if (BUILD_DEPRECATED_CLI)
|
|||||||
xgboost_target_link_libraries(runxgboost)
|
xgboost_target_link_libraries(runxgboost)
|
||||||
xgboost_target_defs(runxgboost)
|
xgboost_target_defs(runxgboost)
|
||||||
|
|
||||||
if (KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)
|
if(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)
|
||||||
set_output_directory(runxgboost ${xgboost_BINARY_DIR})
|
set_output_directory(runxgboost ${xgboost_BINARY_DIR})
|
||||||
else ()
|
else()
|
||||||
set_output_directory(runxgboost ${xgboost_SOURCE_DIR})
|
set_output_directory(runxgboost ${xgboost_SOURCE_DIR})
|
||||||
endif (KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)
|
endif()
|
||||||
endif (BUILD_DEPRECATED_CLI)
|
endif()
|
||||||
#-- End CLI for xgboost
|
#-- End CLI for xgboost
|
||||||
|
|
||||||
# Common setup for all targets
|
# Common setup for all targets
|
||||||
@ -351,41 +349,41 @@ foreach(target xgboost objxgboost dmlc)
|
|||||||
xgboost_target_defs(${target})
|
xgboost_target_defs(${target})
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
if (JVM_BINDINGS)
|
if(JVM_BINDINGS)
|
||||||
xgboost_target_properties(xgboost4j)
|
xgboost_target_properties(xgboost4j)
|
||||||
xgboost_target_link_libraries(xgboost4j)
|
xgboost_target_link_libraries(xgboost4j)
|
||||||
xgboost_target_defs(xgboost4j)
|
xgboost_target_defs(xgboost4j)
|
||||||
endif (JVM_BINDINGS)
|
endif()
|
||||||
|
|
||||||
if (KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)
|
if(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR)
|
||||||
set_output_directory(xgboost ${xgboost_BINARY_DIR}/lib)
|
set_output_directory(xgboost ${xgboost_BINARY_DIR}/lib)
|
||||||
else ()
|
else()
|
||||||
set_output_directory(xgboost ${xgboost_SOURCE_DIR}/lib)
|
set_output_directory(xgboost ${xgboost_SOURCE_DIR}/lib)
|
||||||
endif ()
|
endif()
|
||||||
|
|
||||||
# Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names
|
# Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names
|
||||||
if (BUILD_DEPRECATED_CLI)
|
if(BUILD_DEPRECATED_CLI)
|
||||||
add_dependencies(xgboost runxgboost)
|
add_dependencies(xgboost runxgboost)
|
||||||
endif (BUILD_DEPRECATED_CLI)
|
endif()
|
||||||
|
|
||||||
#-- Installing XGBoost
|
#-- Installing XGBoost
|
||||||
if (R_LIB)
|
if(R_LIB)
|
||||||
include(cmake/RPackageInstallTargetSetup.cmake)
|
include(cmake/RPackageInstallTargetSetup.cmake)
|
||||||
set_target_properties(xgboost PROPERTIES PREFIX "")
|
set_target_properties(xgboost PROPERTIES PREFIX "")
|
||||||
if (APPLE)
|
if(APPLE)
|
||||||
set_target_properties(xgboost PROPERTIES SUFFIX ".so")
|
set_target_properties(xgboost PROPERTIES SUFFIX ".so")
|
||||||
endif (APPLE)
|
endif()
|
||||||
setup_rpackage_install_target(xgboost "${CMAKE_CURRENT_BINARY_DIR}/R-package-install")
|
setup_rpackage_install_target(xgboost "${CMAKE_CURRENT_BINARY_DIR}/R-package-install")
|
||||||
set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/dummy_inst")
|
set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/dummy_inst")
|
||||||
endif (R_LIB)
|
endif()
|
||||||
if (MINGW)
|
if(MINGW)
|
||||||
set_target_properties(xgboost PROPERTIES PREFIX "")
|
set_target_properties(xgboost PROPERTIES PREFIX "")
|
||||||
endif (MINGW)
|
endif()
|
||||||
|
|
||||||
if (BUILD_C_DOC)
|
if(BUILD_C_DOC)
|
||||||
include(cmake/Doc.cmake)
|
include(cmake/Doc.cmake)
|
||||||
run_doxygen()
|
run_doxygen()
|
||||||
endif (BUILD_C_DOC)
|
endif()
|
||||||
|
|
||||||
include(CPack)
|
include(CPack)
|
||||||
|
|
||||||
@ -401,19 +399,19 @@ install(DIRECTORY ${xgboost_SOURCE_DIR}/include/xgboost
|
|||||||
# > in any export set.
|
# > in any export set.
|
||||||
#
|
#
|
||||||
# https://github.com/dmlc/xgboost/issues/6085
|
# https://github.com/dmlc/xgboost/issues/6085
|
||||||
if (BUILD_STATIC_LIB)
|
if(BUILD_STATIC_LIB)
|
||||||
if (BUILD_DEPRECATED_CLI)
|
if(BUILD_DEPRECATED_CLI)
|
||||||
set(INSTALL_TARGETS xgboost runxgboost objxgboost dmlc)
|
set(INSTALL_TARGETS xgboost runxgboost objxgboost dmlc)
|
||||||
else()
|
else()
|
||||||
set(INSTALL_TARGETS xgboost objxgboost dmlc)
|
set(INSTALL_TARGETS xgboost objxgboost dmlc)
|
||||||
endif (BUILD_DEPRECATED_CLI)
|
endif()
|
||||||
else (BUILD_STATIC_LIB)
|
else()
|
||||||
if (BUILD_DEPRECATED_CLI)
|
if(BUILD_DEPRECATED_CLI)
|
||||||
set(INSTALL_TARGETS xgboost runxgboost)
|
set(INSTALL_TARGETS xgboost runxgboost)
|
||||||
else(BUILD_DEPRECATED_CLI)
|
else()
|
||||||
set(INSTALL_TARGETS xgboost)
|
set(INSTALL_TARGETS xgboost)
|
||||||
endif (BUILD_DEPRECATED_CLI)
|
endif()
|
||||||
endif (BUILD_STATIC_LIB)
|
endif()
|
||||||
|
|
||||||
install(TARGETS ${INSTALL_TARGETS}
|
install(TARGETS ${INSTALL_TARGETS}
|
||||||
EXPORT XGBoostTargets
|
EXPORT XGBoostTargets
|
||||||
@ -442,7 +440,7 @@ install(
|
|||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/xgboost)
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/xgboost)
|
||||||
|
|
||||||
#-- Test
|
#-- Test
|
||||||
if (GOOGLE_TEST)
|
if(GOOGLE_TEST)
|
||||||
enable_testing()
|
enable_testing()
|
||||||
# Unittests.
|
# Unittests.
|
||||||
add_executable(testxgboost)
|
add_executable(testxgboost)
|
||||||
@ -462,7 +460,7 @@ if (GOOGLE_TEST)
|
|||||||
${xgboost_SOURCE_DIR}/tests/cli/machine.conf.in
|
${xgboost_SOURCE_DIR}/tests/cli/machine.conf.in
|
||||||
${xgboost_BINARY_DIR}/tests/cli/machine.conf
|
${xgboost_BINARY_DIR}/tests/cli/machine.conf
|
||||||
@ONLY)
|
@ONLY)
|
||||||
if (BUILD_DEPRECATED_CLI)
|
if(BUILD_DEPRECATED_CLI)
|
||||||
add_test(
|
add_test(
|
||||||
NAME TestXGBoostCLI
|
NAME TestXGBoostCLI
|
||||||
COMMAND runxgboost ${xgboost_BINARY_DIR}/tests/cli/machine.conf
|
COMMAND runxgboost ${xgboost_BINARY_DIR}/tests/cli/machine.conf
|
||||||
@ -470,8 +468,8 @@ if (GOOGLE_TEST)
|
|||||||
set_tests_properties(TestXGBoostCLI
|
set_tests_properties(TestXGBoostCLI
|
||||||
PROPERTIES
|
PROPERTIES
|
||||||
PASS_REGULAR_EXPRESSION ".*test-rmse:0.087.*")
|
PASS_REGULAR_EXPRESSION ".*test-rmse:0.087.*")
|
||||||
endif (BUILD_DEPRECATED_CLI)
|
endif()
|
||||||
endif (GOOGLE_TEST)
|
endif()
|
||||||
|
|
||||||
# For MSVC: Call msvc_use_static_runtime() once again to completely
|
# For MSVC: Call msvc_use_static_runtime() once again to completely
|
||||||
# replace /MD with /MT. See https://github.com/dmlc/xgboost/issues/4462
|
# replace /MD with /MT. See https://github.com/dmlc/xgboost/issues/4462
|
||||||
@ -479,10 +477,10 @@ endif (GOOGLE_TEST)
|
|||||||
msvc_use_static_runtime()
|
msvc_use_static_runtime()
|
||||||
|
|
||||||
# Add xgboost.pc
|
# Add xgboost.pc
|
||||||
if (ADD_PKGCONFIG)
|
if(ADD_PKGCONFIG)
|
||||||
configure_file(${xgboost_SOURCE_DIR}/cmake/xgboost.pc.in ${xgboost_BINARY_DIR}/xgboost.pc @ONLY)
|
configure_file(${xgboost_SOURCE_DIR}/cmake/xgboost.pc.in ${xgboost_BINARY_DIR}/xgboost.pc @ONLY)
|
||||||
|
|
||||||
install(
|
install(
|
||||||
FILES ${xgboost_BINARY_DIR}/xgboost.pc
|
FILES ${xgboost_BINARY_DIR}/xgboost.pc
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||||
endif (ADD_PKGCONFIG)
|
endif()
|
||||||
|
|||||||
201
NEWS.md
201
NEWS.md
@ -3,6 +3,207 @@ XGBoost Change Log
|
|||||||
|
|
||||||
This file records the changes in xgboost library in reverse chronological order.
|
This file records the changes in xgboost library in reverse chronological order.
|
||||||
|
|
||||||
|
## 2.0.0 (2023 Aug 16)
|
||||||
|
|
||||||
|
We are excited to announce the release of XGBoost 2.0. This note will begin by covering some overall changes and then highlight specific updates to the package.
|
||||||
|
|
||||||
|
### Initial work on multi-target trees with vector-leaf outputs
|
||||||
|
We have been working on vector-leaf tree models for multi-target regression, multi-label classification, and multi-class classification in version 2.0. Previously, XGBoost would build a separate model for each target. However, with this new feature that's still being developed, XGBoost can build one tree for all targets. The feature has multiple benefits and trade-offs compared to the existing approach. It can help prevent overfitting, produce smaller models, and build trees that consider the correlation between targets. In addition, users can combine vector leaf and scalar leaf trees during a training session using a callback. Please note that the feature is still a working in progress, and many parts are not yet available. See #9043 for the current status. Related PRs: (#8538, #8697, #8902, #8884, #8895, #8898, #8612, #8652, #8698, #8908, #8928, #8968, #8616, #8922, #8890, #8872, #8889, #9509) Please note that, only the `hist` (default) tree method on CPU can be used for building vector leaf trees at the moment.
|
||||||
|
|
||||||
|
### New `device` parameter.
|
||||||
|
|
||||||
|
A new `device` parameter is set to replace the existing `gpu_id`, `gpu_hist`, `gpu_predictor`, `cpu_predictor`, `gpu_coord_descent`, and the PySpark specific parameter `use_gpu`. Onward, users need only the `device` parameter to select which device to run along with the ordinal of the device. For more information, please see our document page (https://xgboost.readthedocs.io/en/stable/parameter.html#general-parameters) . For example, with `device="cuda", tree_method="hist"`, XGBoost will run the `hist` tree method on GPU. (#9363, #8528, #8604, #9354, #9274, #9243, #8896, #9129, #9362, #9402, #9385, #9398, #9390, #9386, #9412, #9507, #9536). The old behavior of ``gpu_hist`` is preserved but deprecated. In addition, the `predictor` parameter is removed.
|
||||||
|
|
||||||
|
|
||||||
|
### `hist` is now the default tree method
|
||||||
|
Starting from 2.0, the `hist` tree method will be the default. In previous versions, XGBoost chooses `approx` or `exact` depending on the input data and training environment. The new default can help XGBoost train models more efficiently and consistently. (#9320, #9353)
|
||||||
|
|
||||||
|
### GPU-based approx tree method
|
||||||
|
There's initial support for using the `approx` tree method on GPU. The performance of the `approx` is not yet well optimized but is feature complete except for the JVM packages. It can be accessed through the use of the parameter combination `device="cuda", tree_method="approx"`. (#9414, #9399, #9478). Please note that the Scala-based Spark interface is not yet supported.
|
||||||
|
|
||||||
|
### Optimize and bound the size of the histogram on CPU, to control memory footprint
|
||||||
|
|
||||||
|
XGBoost has a new parameter `max_cached_hist_node` for users to limit the CPU cache size for histograms. It can help prevent XGBoost from caching histograms too aggressively. Without the cache, performance is likely to decrease. However, the size of the cache grows exponentially with the depth of the tree. The limit can be crucial when growing deep trees. In most cases, users need not configure this parameter as it does not affect the model's accuracy. (#9455, #9441, #9440, #9427, #9400).
|
||||||
|
|
||||||
|
Along with the cache limit, XGBoost also reduces the memory usage of the `hist` and `approx` tree method on distributed systems by cutting the size of the cache by half. (#9433)
|
||||||
|
|
||||||
|
### Improved external memory support
|
||||||
|
There is some exciting development around external memory support in XGBoost. It's still an experimental feature, but the performance has been significantly improved with the default `hist` tree method. We replaced the old file IO logic with memory map. In addition to performance, we have reduced CPU memory usage and added extensive documentation. Beginning from 2.0.0, we encourage users to try it with the `hist` tree method when the memory saving by `QuantileDMatrix` is not sufficient. (#9361, #9317, #9282, #9315, #8457)
|
||||||
|
|
||||||
|
### Learning to rank
|
||||||
|
We created a brand-new implementation for the learning-to-rank task. With the latest version, XGBoost gained a set of new features for ranking task including:
|
||||||
|
|
||||||
|
- A new parameter `lambdarank_pair_method` for choosing the pair construction strategy.
|
||||||
|
- A new parameter `lambdarank_num_pair_per_sample` for controlling the number of samples for each group.
|
||||||
|
- An experimental implementation of unbiased learning-to-rank, which can be accessed using the `lambdarank_unbiased` parameter.
|
||||||
|
- Support for custom gain function with `NDCG` using the `ndcg_exp_gain` parameter.
|
||||||
|
- Deterministic GPU computation for all objectives and metrics.
|
||||||
|
- `NDCG` is now the default objective function.
|
||||||
|
- Improved performance of metrics using caches.
|
||||||
|
- Support scikit-learn utilities for `XGBRanker`.
|
||||||
|
- Extensive documentation on how learning-to-rank works with XGBoost.
|
||||||
|
|
||||||
|
For more information, please see the [tutorial](https://xgboost.readthedocs.io/en/latest/tutorials/learning_to_rank.html). Related PRs: (#8771, #8692, #8783, #8789, #8790, #8859, #8887, #8893, #8906, #8931, #9075, #9015, #9381, #9336, #8822, #9222, #8984, #8785, #8786, #8768)
|
||||||
|
|
||||||
|
### Automatically estimated intercept
|
||||||
|
|
||||||
|
In the previous version, `base_score` was a constant that could be set as a training parameter. In the new version, XGBoost can automatically estimate this parameter based on input labels for optimal accuracy. (#8539, #8498, #8272, #8793, #8607)
|
||||||
|
|
||||||
|
### Quantile regression
|
||||||
|
The XGBoost algorithm now supports quantile regression, which involves minimizing the quantile loss (also called "pinball loss"). Furthermore, XGBoost allows for training with multiple target quantiles simultaneously with one tree per quantile. (#8775, #8761, #8760, #8758, #8750)
|
||||||
|
|
||||||
|
### L1 and Quantile regression now supports learning rate
|
||||||
|
Both objectives use adaptive trees due to the lack of proper Hessian values. In the new version, XGBoost can scale the leaf value with the learning rate accordingly. (#8866)
|
||||||
|
|
||||||
|
### Export cut value
|
||||||
|
|
||||||
|
Using the Python or the C package, users can export the quantile values (not to be confused with quantile regression) used for the `hist` tree method. (#9356)
|
||||||
|
|
||||||
|
### column-based split and federated learning
|
||||||
|
We made progress on column-based split for federated learning. In 2.0, both `approx`, `hist`, and `hist` with vector leaf can work with column-based data split, along with support for vertical federated learning. Work on GPU support is still on-going, stay tuned. (#8576, #8468, #8442, #8847, #8811, #8985, #8623, #8568, #8828, #8932, #9081, #9102, #9103, #9124, #9120, #9367, #9370, #9343, #9171, #9346, #9270, #9244, #8494, #8434, #8742, #8804, #8710, #8676, #9020, #9002, #9058, #9037, #9018, #9295, #9006, #9300, #8765, #9365, #9060)
|
||||||
|
|
||||||
|
### PySpark
|
||||||
|
After the initial introduction of the PySpark interface, it has gained some new features and optimizations in 2.0.
|
||||||
|
|
||||||
|
- GPU-based prediction. (#9292, #9542)
|
||||||
|
- Optimization for data initialization by avoiding the stack operation. (#9088)
|
||||||
|
- Support predict feature contribution. (#8633)
|
||||||
|
- Python typing support. (#9156, #9172, #9079, #8375)
|
||||||
|
- `use_gpu` is deprecated. The `device` parameter is preferred.
|
||||||
|
- Update eval_metric validation to support list of strings (#8826)
|
||||||
|
- Improved logs for training (#9449)
|
||||||
|
- Maintenance, including refactoring and document updates (#8324, #8465, #8605, #9202, #9460, #9302, #8385, #8630, #8525, #8496)
|
||||||
|
- Fix for GPU setup. (#9495)
|
||||||
|
|
||||||
|
### Other General New Features
|
||||||
|
Here's a list of new features that don't have their own section and yet are general to all language bindings.
|
||||||
|
|
||||||
|
- Use array interface for CSC matrix. This helps XGBoost to use a consistent number of threads and align the interface of the CSC matrix with other interfaces. In addition, memory usage is likely to decrease with CSC input thanks to on-the-fly type conversion. (#8672)
|
||||||
|
- CUDA compute 90 is now part of the default build.. (#9397)
|
||||||
|
|
||||||
|
### Other General Optimization
|
||||||
|
These optimizations are general to all language bindings. For language-specific optimization, please visit the corresponding sections.
|
||||||
|
|
||||||
|
- Performance for input with `array_interface` on CPU (like `numpy`) is significantly improved. (#9090)
|
||||||
|
- Some optimization with CUDA for data initialization. (#9199, #9209, #9144)
|
||||||
|
- Use the latest thrust policy to prevent synchronizing GPU devices. (#9212)
|
||||||
|
- XGBoost now uses a per-thread CUDA stream, which prevents synchronization with other streams. (#9416, #9396, #9413)
|
||||||
|
|
||||||
|
### Notable breaking change
|
||||||
|
|
||||||
|
Other than the aforementioned change with the `device` parameter, here's a list of breaking changes affecting all packages.
|
||||||
|
|
||||||
|
- Users must specify the format for text input (#9077). However, we suggest using third-party data structures such as `numpy.ndarray` instead of relying on text inputs. See https://github.com/dmlc/xgboost/issues/9472 for more info.
|
||||||
|
|
||||||
|
### Notable bug fixes
|
||||||
|
|
||||||
|
Some noteworthy bug fixes that are not related to specific language bindings are listed in this section.
|
||||||
|
|
||||||
|
- Some language environments use a different thread to perform garbage collection, which breaks the thread-local cache used in XGBoost. XGBoost 2.0 implements a new thread-safe cache using a light weight lock to replace the thread-local cache. (#8851)
|
||||||
|
- Fix model IO by clearing the prediction cache. (#8904)
|
||||||
|
- `inf` is checked during data construction. (#8911)
|
||||||
|
- Preserve order of saved updaters configuration. Usually, this is not an issue unless the `updater` parameter is used instead of the `tree_method` parameter (#9355)
|
||||||
|
- Fix GPU memory allocation issue with categorical splits. (#9529)
|
||||||
|
- Handle escape sequence like `\t\n` in feature names for JSON model dump. (#9474)
|
||||||
|
- Normalize file path for model IO and text input. This handles short paths on Windows and paths that contain `~` on Unix (#9463). In addition, all path inputs are required to be encoded in UTF-8 (#9448, #9443)
|
||||||
|
- Fix integer overflow on H100. (#9380)
|
||||||
|
- Fix weighted sketching on GPU with categorical features. (#9341)
|
||||||
|
- Fix metric serialization. The bug might cause some of the metrics to be dropped during evaluation. (#9405)
|
||||||
|
- Fixes compilation errors on MSVC x86 targets (#8823)
|
||||||
|
- Pick up the dmlc-core fix for the CSV parser. (#8897)
|
||||||
|
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
Aside from documents for new features, we have many smaller updates to improve user experience, from troubleshooting guides to typo fixes.
|
||||||
|
|
||||||
|
- Explain CPU/GPU interop. (#8450)
|
||||||
|
- Guide to troubleshoot NCCL errors. (#8943, #9206)
|
||||||
|
- Add a note for rabit port selection. (#8879)
|
||||||
|
- How to build the docs using conda (#9276)
|
||||||
|
- Explain how to obtain reproducible results on distributed systems. (#8903)
|
||||||
|
|
||||||
|
* Fixes and small updates to document and demonstration scripts. (#8626, #8436, #8995, #8907, #8923, #8926, #9358, #9232, #9201, #9469, #9462, #9458, #8543, #8597, #8401, #8784, #9213, #9098, #9008, #9223, #9333, #9434, #9435, #9415, #8773, #8752, #9291, #9549)
|
||||||
|
|
||||||
|
### Python package
|
||||||
|
* New Features and Improvements
|
||||||
|
- Support primitive types of pyarrow-backed pandas dataframe. (#8653)
|
||||||
|
- Warning messages emitted by XGBoost are now emitted using Python warnings. (#9387)
|
||||||
|
- User can now format the value printed near the bars on the `plot_importance` plot (#8540)
|
||||||
|
- XGBoost has improved half-type support (float16) with pandas, cupy, and cuDF. With GPU input, the handling is through CUDA `__half` type, and no data copy is made. (#8487, #9207, #8481)
|
||||||
|
- Support `Series` and Python primitive types in `inplace_predict` and `QuantileDMatrix` (#8547, #8542)
|
||||||
|
- Support all pandas' nullable integer types. (#8480)
|
||||||
|
- Custom metric with the scikit-learn interface now supports `sample_weight`. (#8706)
|
||||||
|
- Enable Installation of Python Package with System lib in a Virtual Environment (#9349)
|
||||||
|
- Raise if expected workers are not alive in `xgboost.dask.train` (#9421)
|
||||||
|
|
||||||
|
* Optimization
|
||||||
|
- Cache transformed data in `QuantileDMatrix` for efficiency. (#8666, #9445)
|
||||||
|
- Take datatable as row-major input. (#8472)
|
||||||
|
- Remove unnecessary conversions between data structures (#8546)
|
||||||
|
|
||||||
|
* Adopt modern Python packaging conventions (PEP 517, PEP 518, PEP 621)
|
||||||
|
- XGBoost adopted the modern Python packaging conventions. The old setup script `setup.py` is now replaced with the new configuration file `pyproject.toml`. Along with this, XGBoost now supports Python 3.11. (#9021, #9112, #9114, #9115) Consult the latest documentation for the updated instructions to build and install XGBoost.
|
||||||
|
|
||||||
|
* Fixes
|
||||||
|
- `DataIter` now accepts only keyword arguments. (#9431)
|
||||||
|
- Fix empty DMatrix with categorical features. (#8739)
|
||||||
|
- Convert ``DaskXGBClassifier.classes_`` to an array (#8452)
|
||||||
|
- Define `best_iteration` only if early stopping is used to be consistent with documented behavior. (#9403)
|
||||||
|
- Make feature validation immutable. (#9388)
|
||||||
|
|
||||||
|
* Breaking changes
|
||||||
|
- Discussed in the new `device` parameter section, the `predictor` parameter is now removed. (#9129)
|
||||||
|
- Remove support for single-string feature info. Feature type and names should be a sequence of strings (#9401)
|
||||||
|
- Remove parameters in the `save_model` call for the scikit-learn interface. (#8963)
|
||||||
|
- Remove the `ntree_limit` in the python package. This has been deprecated in previous versions. (#8345)
|
||||||
|
|
||||||
|
* Maintenance including formatting and refactoring along with type hints.
|
||||||
|
- More consistent use of `black` and `isort` for code formatting (#8420, #8748, #8867)
|
||||||
|
- Improved type support. Most of the type changes happen in the PySpark module; here, we list the remaining changes. (#8444, #8617, #9197, #9005)
|
||||||
|
- Set `enable_categorical` to True in predict. (#8592)
|
||||||
|
- Some refactoring and updates for tests (#8395, #8372, #8557, #8379, #8702, #9459, #9316, #8446, #8695, #8409, #8993, #9480)
|
||||||
|
|
||||||
|
* Documentation
|
||||||
|
- Add introduction and notes for the sklearn interface. (#8948)
|
||||||
|
- Demo for using dask for hyper-parameter optimization. (#8891)
|
||||||
|
- Document all supported Python input types. (#8643)
|
||||||
|
- Other documentation updates (#8944, #9304)
|
||||||
|
|
||||||
|
### R package
|
||||||
|
- Use the new data consumption interface for CSR and CSC. This provides better control for the number of threads and improves performance. (#8455, #8673)
|
||||||
|
- Accept multiple evaluation metrics during training. (#8657)
|
||||||
|
- Fix integer inputs with `NA`. (#9522)
|
||||||
|
- Some refactoring for the R package (#8545, #8430, #8614, #8624, #8613, #9457, #8689, #8563, #9461, #8647, #8564, #8565, #8736, #8610, #8609, #8599, #8704, #9456, #9450, #9476, #9477, #9481). Special thanks to @jameslamb.
|
||||||
|
- Document updates (#8886, #9323, #9437, #8998)
|
||||||
|
|
||||||
|
### JVM packages
|
||||||
|
Following are changes specific to various JVM-based packages.
|
||||||
|
|
||||||
|
- Stop using Rabit in prediction (#9054)
|
||||||
|
- Set feature_names and feature_types in jvm-packages. This is to prepare support for categorical features (#9364)
|
||||||
|
- Scala 2.13 support. (#9099)
|
||||||
|
- Change training stage from `ResultStage` to `ShuffleMapStage` (#9423)
|
||||||
|
- Automatically set the max/min direction for the best score during early stopping. (#9404)
|
||||||
|
* Revised support for `flink` (#9046)
|
||||||
|
|
||||||
|
* Breaking changes
|
||||||
|
- Scala-based tracker is removed. (#9078, #9045)
|
||||||
|
- Change `DeviceQuantileDmatrix` into `QuantileDMatrix` (#8461)
|
||||||
|
|
||||||
|
* Maintenance (#9253, #9166, #9395, #9389, #9224, #9233, #9351, #9479)
|
||||||
|
|
||||||
|
* CI bot PRs
|
||||||
|
We employed GitHub dependent bot to help us keep the dependencies up-to-date for JVM packages. With the help from the bot, we have cleared up all the dependencies that are lagging behind (#8501, #8507).
|
||||||
|
|
||||||
|
Here's a list of dependency update PRs including those made by dependent bots (#8456, #8560, #8571, #8561, #8562, #8600, #8594, #8524, #8509, #8548, #8549, #8533, #8521, #8534, #8532, #8516, #8503, #8531, #8530, #8518, #8512, #8515, #8517, #8506, #8504, #8502, #8629, #8815, #8813, #8814, #8877, #8876, #8875, #8874, #8873, #9049, #9070, #9073, #9039, #9083, #8917, #8952, #8980, #8973, #8962, #9252, #9208, #9131, #9136, #9219, #9160, #9158, #9163, #9184, #9192, #9265, #9268, #8882, #8837, #8662, #8661, #8390, #9056, #8508, #8925, #8920, #9149, #9230, #9097, #8648, #9203, #8593).
|
||||||
|
|
||||||
|
### Maintenance
|
||||||
|
Maintenance work includes refactoring, fixing small issues that don't affect end users. (#9256, #8627, #8756, #8735, #8966, #8864, #8747, #8892, #9057, #8921, #8949, #8941, #8942, #9108, #9125, #9155, #9153, #9176, #9447, #9444, #9436, #9438, #9430, #9200, #9210, #9055, #9014, #9004, #8999, #9154, #9148, #9283, #9246, #8888, #8900, #8871, #8861, #8858, #8791, #8807, #8751, #8703, #8696, #8693, #8677, #8686, #8665, #8660, #8386, #8371, #8410, #8578, #8574, #8483, #8443, #8454, #8733)
|
||||||
|
|
||||||
|
### CI
|
||||||
|
- Build pip wheel with RMM support (#9383)
|
||||||
|
- Other CI updates including updating dependencies and work on the CI infrastructure. (#9464, #9428, #8767, #9394, #9278, #9214, #9234, #9205, #9034, #9104, #8878, #9294, #8625, #8806, #8741, #8707, #8381, #8382, #8388, #8402, #8397, #8445, #8602, #8628, #8583, #8460, #9544)
|
||||||
|
|
||||||
## 1.7.6 (2023 Jun 16)
|
## 1.7.6 (2023 Jun 16)
|
||||||
|
|
||||||
This is a patch release for bug fixes. The CRAN package for the R binding is kept at 1.7.5.
|
This is a patch release for bug fixes. The CRAN package for the R binding is kept at 1.7.5.
|
||||||
|
|||||||
@ -4,3 +4,5 @@
|
|||||||
^.*\.Rproj$
|
^.*\.Rproj$
|
||||||
^\.Rproj\.user$
|
^\.Rproj\.user$
|
||||||
README.md
|
README.md
|
||||||
|
^doc$
|
||||||
|
^Meta$
|
||||||
|
|||||||
@ -70,7 +70,7 @@ cb.print.evaluation <- function(period = 1, showsd = TRUE) {
|
|||||||
i == env$begin_iteration ||
|
i == env$begin_iteration ||
|
||||||
i == env$end_iteration) {
|
i == env$end_iteration) {
|
||||||
stdev <- if (showsd) env$bst_evaluation_err else NULL
|
stdev <- if (showsd) env$bst_evaluation_err else NULL
|
||||||
msg <- format.eval.string(i, env$bst_evaluation, stdev)
|
msg <- .format_eval_string(i, env$bst_evaluation, stdev)
|
||||||
cat(msg, '\n')
|
cat(msg, '\n')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -380,7 +380,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
|||||||
if ((maximize && score > best_score) ||
|
if ((maximize && score > best_score) ||
|
||||||
(!maximize && score < best_score)) {
|
(!maximize && score < best_score)) {
|
||||||
|
|
||||||
best_msg <<- format.eval.string(i, env$bst_evaluation, env$bst_evaluation_err)
|
best_msg <<- .format_eval_string(
|
||||||
|
i, env$bst_evaluation, env$bst_evaluation_err
|
||||||
|
)
|
||||||
best_score <<- score
|
best_score <<- score
|
||||||
best_iteration <<- i
|
best_iteration <<- i
|
||||||
best_ntreelimit <<- best_iteration * env$num_parallel_tree
|
best_ntreelimit <<- best_iteration * env$num_parallel_tree
|
||||||
@ -555,14 +557,18 @@ cb.cv.predict <- function(save_models = FALSE) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' #### Binary classification:
|
#' #### Binary classification:
|
||||||
#' #
|
#'
|
||||||
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
||||||
#' # without considering the 2nd order interactions:
|
#' # without considering the 2nd order interactions:
|
||||||
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||||
#' colnames(x)
|
#' colnames(x)
|
||||||
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
|
||||||
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||||
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||||
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
||||||
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
|
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
|
||||||
@ -592,9 +598,9 @@ cb.cv.predict <- function(save_models = FALSE) {
|
|||||||
#'
|
#'
|
||||||
#' #### Multiclass classification:
|
#' #### Multiclass classification:
|
||||||
#' #
|
#' #
|
||||||
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
|
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
|
||||||
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 1)
|
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||||
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
||||||
#' # to use smaller eta to reduce instability
|
#' # to use smaller eta to reduce instability
|
||||||
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
||||||
@ -754,7 +760,7 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
|
|||||||
#
|
#
|
||||||
|
|
||||||
# Format the evaluation metric string
|
# Format the evaluation metric string
|
||||||
format.eval.string <- function(iter, eval_res, eval_err = NULL) {
|
.format_eval_string <- function(iter, eval_res, eval_err = NULL) {
|
||||||
if (length(eval_res) == 0)
|
if (length(eval_res) == 0)
|
||||||
stop('no evaluation results')
|
stop('no evaluation results')
|
||||||
enames <- names(eval_res)
|
enames <- names(eval_res)
|
||||||
|
|||||||
@ -21,13 +21,13 @@ xgb.Booster.handle <- function(params, cachelist, modelfile, handle) {
|
|||||||
## A memory buffer
|
## A memory buffer
|
||||||
bst <- xgb.unserialize(modelfile, handle)
|
bst <- xgb.unserialize(modelfile, handle)
|
||||||
xgb.parameters(bst) <- params
|
xgb.parameters(bst) <- params
|
||||||
return (bst)
|
return(bst)
|
||||||
} else if (inherits(modelfile, "xgb.Booster")) {
|
} else if (inherits(modelfile, "xgb.Booster")) {
|
||||||
## A booster object
|
## A booster object
|
||||||
bst <- xgb.Booster.complete(modelfile, saveraw = TRUE)
|
bst <- xgb.Booster.complete(modelfile, saveraw = TRUE)
|
||||||
bst <- xgb.unserialize(bst$raw)
|
bst <- xgb.unserialize(bst$raw)
|
||||||
xgb.parameters(bst) <- params
|
xgb.parameters(bst) <- params
|
||||||
return (bst)
|
return(bst)
|
||||||
} else {
|
} else {
|
||||||
stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
|
stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
|
||||||
}
|
}
|
||||||
@ -267,11 +267,16 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
|||||||
#'
|
#'
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
#'
|
||||||
|
#' ## Keep the number of threads to 2 for examples
|
||||||
|
#' nthread <- 2
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||||
#' eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
|
#' eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
|
||||||
#' # use all trees by default
|
#' # use all trees by default
|
||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
#' # use only the 1st tree
|
#' # use only the 1st tree
|
||||||
@ -337,8 +342,14 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
|||||||
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
|
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
|
||||||
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
||||||
|
|
||||||
if (!inherits(newdata, "xgb.DMatrix"))
|
if (!inherits(newdata, "xgb.DMatrix")) {
|
||||||
newdata <- xgb.DMatrix(newdata, missing = missing, nthread = NVL(object$params[["nthread"]], -1))
|
config <- jsonlite::fromJSON(xgb.config(object))
|
||||||
|
nthread <- strtoi(config$learner$generic_param$nthread)
|
||||||
|
newdata <- xgb.DMatrix(
|
||||||
|
newdata,
|
||||||
|
missing = missing, nthread = NVL(nthread, -1)
|
||||||
|
)
|
||||||
|
}
|
||||||
if (!is.null(object[["feature_names"]]) &&
|
if (!is.null(object[["feature_names"]]) &&
|
||||||
!is.null(colnames(newdata)) &&
|
!is.null(colnames(newdata)) &&
|
||||||
!identical(object[["feature_names"]], colnames(newdata)))
|
!identical(object[["feature_names"]], colnames(newdata)))
|
||||||
@ -371,7 +382,7 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
|||||||
cval[0] <- val
|
cval[0] <- val
|
||||||
return(cval)
|
return(cval)
|
||||||
}
|
}
|
||||||
return (val)
|
return(val)
|
||||||
}
|
}
|
||||||
|
|
||||||
## We set strict_shape to TRUE then drop the dimensions conditionally
|
## We set strict_shape to TRUE then drop the dimensions conditionally
|
||||||
@ -628,10 +639,15 @@ xgb.attributes <- function(object) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
#' bst <- xgboost(
|
||||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
#' data = train$data, label = train$label, max_depth = 2,
|
||||||
|
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||||
|
#' )
|
||||||
#' config <- xgb.config(bst)
|
#' config <- xgb.config(bst)
|
||||||
#'
|
#'
|
||||||
#' @rdname xgb.config
|
#' @rdname xgb.config
|
||||||
|
|||||||
@ -18,7 +18,12 @@
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#' dtrain <- with(
|
||||||
|
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
#' )
|
||||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
@ -112,7 +117,7 @@ xgb.get.DMatrix <- function(data, label, missing, weight, nthread) {
|
|||||||
stop("xgboost: invalid input data")
|
stop("xgboost: invalid input data")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (dtrain)
|
return(dtrain)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -22,14 +22,23 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
#'
|
||||||
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
#' bst <- xgboost(
|
||||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||||
|
#' nthread = nthread,
|
||||||
|
#' nrounds = 2,
|
||||||
|
#' objective = "binary:logistic"
|
||||||
|
#' )
|
||||||
|
#'
|
||||||
#' xgb.save(bst, 'xgb.model')
|
#' xgb.save(bst, 'xgb.model')
|
||||||
#' bst <- xgb.load('xgb.model')
|
#' bst <- xgb.load('xgb.model')
|
||||||
#' if (file.exists('xgb.model')) file.remove('xgb.model')
|
#' if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||||
#' pred <- predict(bst, test$data)
|
|
||||||
#' @export
|
#' @export
|
||||||
xgb.load <- function(modelfile) {
|
xgb.load <- function(modelfile) {
|
||||||
if (is.null(modelfile))
|
if (is.null(modelfile))
|
||||||
|
|||||||
@ -18,6 +18,6 @@ xgb.load.raw <- function(buffer, as_booster = FALSE) {
|
|||||||
booster <- xgb.Booster.complete(booster, saveraw = TRUE)
|
booster <- xgb.Booster.complete(booster, saveraw = TRUE)
|
||||||
return(booster)
|
return(booster)
|
||||||
} else {
|
} else {
|
||||||
return (handle)
|
return(handle)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -46,9 +46,12 @@
|
|||||||
#' # Basic use:
|
#' # Basic use:
|
||||||
#'
|
#'
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
||||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||||
#'
|
#'
|
||||||
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
||||||
#'
|
#'
|
||||||
|
|||||||
@ -45,10 +45,13 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#'
|
#'
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
|
#' ## Keep the number of threads to 2 for examples
|
||||||
|
#' nthread <- 2
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
#'
|
#'
|
||||||
#' # Change max_depth to a higher number to get a more significant result
|
#' ## Change max_depth to a higher number to get a more significant result
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
|
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
|
||||||
#' eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
|
#' eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
|
||||||
#' subsample = 0.5, min_child_weight = 2)
|
#' subsample = 0.5, min_child_weight = 2)
|
||||||
#'
|
#'
|
||||||
#' xgb.plot.deepness(bst)
|
#' xgb.plot.deepness(bst)
|
||||||
|
|||||||
@ -45,9 +45,14 @@
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train)
|
#' data(agaricus.train)
|
||||||
|
#' ## Keep the number of threads to 2 for examples
|
||||||
|
#' nthread <- 2
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
#' bst <- xgboost(
|
||||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||||
|
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||||
|
#' )
|
||||||
#'
|
#'
|
||||||
#' importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
|
#' importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
|
||||||
#'
|
#'
|
||||||
|
|||||||
@ -43,10 +43,15 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#'
|
#'
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
|
#' ## Keep the number of threads to 2 for examples
|
||||||
|
#' nthread <- 2
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
#' bst <- xgboost(
|
||||||
#' eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
|
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||||
#' min_child_weight = 50, verbose = 0)
|
#' eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
|
||||||
|
#' min_child_weight = 50, verbose = 0
|
||||||
|
#' )
|
||||||
#'
|
#'
|
||||||
#' p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
#' p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
||||||
#' print(p)
|
#' print(p)
|
||||||
|
|||||||
@ -74,9 +74,14 @@
|
|||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#' nrounds <- 20
|
||||||
|
#'
|
||||||
|
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
|
||||||
#' eta = 0.1, max_depth = 3, subsample = .5,
|
#' eta = 0.1, max_depth = 3, subsample = .5,
|
||||||
#' method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
|
#' method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
|
||||||
#'
|
#'
|
||||||
#' xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
#' xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
||||||
#' contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
#' contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
||||||
@ -85,12 +90,11 @@
|
|||||||
#'
|
#'
|
||||||
#' # multiclass example - plots for each class separately:
|
#' # multiclass example - plots for each class separately:
|
||||||
#' nclass <- 3
|
#' nclass <- 3
|
||||||
#' nrounds <- 20
|
|
||||||
#' x <- as.matrix(iris[, -5])
|
#' x <- as.matrix(iris[, -5])
|
||||||
#' set.seed(123)
|
#' set.seed(123)
|
||||||
#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
||||||
#' mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
#' mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
||||||
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
|
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
|
||||||
#' objective = "multi:softprob", num_class = nclass, verbose = 0)
|
#' objective = "multi:softprob", num_class = nclass, verbose = 0)
|
||||||
#' trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
#' trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
||||||
#' col <- rgb(0, 0, 1, 0.5)
|
#' col <- rgb(0, 0, 1, 0.5)
|
||||||
|
|||||||
@ -25,14 +25,22 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
#'
|
||||||
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
#' bst <- xgboost(
|
||||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||||
|
#' nthread = nthread,
|
||||||
|
#' nrounds = 2,
|
||||||
|
#' objective = "binary:logistic"
|
||||||
|
#' )
|
||||||
#' xgb.save(bst, 'xgb.model')
|
#' xgb.save(bst, 'xgb.model')
|
||||||
#' bst <- xgb.load('xgb.model')
|
#' bst <- xgb.load('xgb.model')
|
||||||
#' if (file.exists('xgb.model')) file.remove('xgb.model')
|
#' if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||||
#' pred <- predict(bst, test$data)
|
|
||||||
#' @export
|
#' @export
|
||||||
xgb.save <- function(model, fname) {
|
xgb.save <- function(model, fname) {
|
||||||
if (typeof(fname) != "character")
|
if (typeof(fname) != "character")
|
||||||
|
|||||||
@ -16,13 +16,18 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
#'
|
||||||
|
#' ## Keep the number of threads to 2 for examples
|
||||||
|
#' nthread <- 2
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||||
|
#'
|
||||||
#' raw <- xgb.save.raw(bst)
|
#' raw <- xgb.save.raw(bst)
|
||||||
#' bst <- xgb.load.raw(raw)
|
#' bst <- xgb.load.raw(raw)
|
||||||
#' pred <- predict(bst, test$data)
|
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.save.raw <- function(model, raw_format = "deprecated") {
|
xgb.save.raw <- function(model, raw_format = "deprecated") {
|
||||||
|
|||||||
@ -168,7 +168,8 @@
|
|||||||
#' than the \code{xgboost} interface.
|
#' than the \code{xgboost} interface.
|
||||||
#'
|
#'
|
||||||
#' Parallelization is automatically enabled if \code{OpenMP} is present.
|
#' Parallelization is automatically enabled if \code{OpenMP} is present.
|
||||||
#' Number of threads can also be manually specified via \code{nthread} parameter.
|
#' Number of threads can also be manually specified via the \code{nthread}
|
||||||
|
#' parameter.
|
||||||
#'
|
#'
|
||||||
#' The evaluation metric is chosen automatically by XGBoost (according to the objective)
|
#' The evaluation metric is chosen automatically by XGBoost (according to the objective)
|
||||||
#' when the \code{eval_metric} parameter is not provided.
|
#' when the \code{eval_metric} parameter is not provided.
|
||||||
@ -237,17 +238,25 @@
|
|||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#'
|
#'
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
#' ## Keep the number of threads to 1 for examples
|
||||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
|
#' dtrain <- with(
|
||||||
|
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
#' )
|
||||||
|
#' dtest <- with(
|
||||||
|
#' agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
#' )
|
||||||
#' watchlist <- list(train = dtrain, eval = dtest)
|
#' watchlist <- list(train = dtrain, eval = dtest)
|
||||||
#'
|
#'
|
||||||
#' ## A simple xgb.train example:
|
#' ## A simple xgb.train example:
|
||||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
#' objective = "binary:logistic", eval_metric = "auc")
|
#' objective = "binary:logistic", eval_metric = "auc")
|
||||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||||
#'
|
#'
|
||||||
#'
|
#' ## An xgb.train example where custom objective and evaluation metric are
|
||||||
#' ## An xgb.train example where custom objective and evaluation metric are used:
|
#' ## used:
|
||||||
#' logregobj <- function(preds, dtrain) {
|
#' logregobj <- function(preds, dtrain) {
|
||||||
#' labels <- getinfo(dtrain, "label")
|
#' labels <- getinfo(dtrain, "label")
|
||||||
#' preds <- 1/(1 + exp(-preds))
|
#' preds <- 1/(1 + exp(-preds))
|
||||||
@ -263,12 +272,12 @@
|
|||||||
#'
|
#'
|
||||||
#' # These functions could be used by passing them either:
|
#' # These functions could be used by passing them either:
|
||||||
#' # as 'objective' and 'eval_metric' parameters in the params list:
|
#' # as 'objective' and 'eval_metric' parameters in the params list:
|
||||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
#' objective = logregobj, eval_metric = evalerror)
|
#' objective = logregobj, eval_metric = evalerror)
|
||||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||||
#'
|
#'
|
||||||
#' # or through the ... arguments:
|
#' # or through the ... arguments:
|
||||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
|
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
|
||||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||||
#' objective = logregobj, eval_metric = evalerror)
|
#' objective = logregobj, eval_metric = evalerror)
|
||||||
#'
|
#'
|
||||||
@ -278,7 +287,7 @@
|
|||||||
#'
|
#'
|
||||||
#'
|
#'
|
||||||
#' ## An xgb.train example of using variable learning rates at each iteration:
|
#' ## An xgb.train example of using variable learning rates at each iteration:
|
||||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
#' objective = "binary:logistic", eval_metric = "auc")
|
#' objective = "binary:logistic", eval_metric = "auc")
|
||||||
#' my_etas <- list(eta = c(0.5, 0.1))
|
#' my_etas <- list(eta = c(0.5, 0.1))
|
||||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||||
@ -290,7 +299,7 @@
|
|||||||
#'
|
#'
|
||||||
#' ## An 'xgboost' interface example:
|
#' ## An 'xgboost' interface example:
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
||||||
#' max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
|
#' max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
|
||||||
#' objective = "binary:logistic")
|
#' objective = "binary:logistic")
|
||||||
#' pred <- predict(bst, agaricus.test$data)
|
#' pred <- predict(bst, agaricus.test$data)
|
||||||
#'
|
#'
|
||||||
|
|||||||
@ -37,5 +37,5 @@ xgb.unserialize <- function(buffer, handle = NULL) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
class(handle) <- "xgb.Booster.handle"
|
class(handle) <- "xgb.Booster.handle"
|
||||||
return (handle)
|
return(handle)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -24,7 +24,7 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
|||||||
early_stopping_rounds = early_stopping_rounds, maximize = maximize,
|
early_stopping_rounds = early_stopping_rounds, maximize = maximize,
|
||||||
save_period = save_period, save_name = save_name,
|
save_period = save_period, save_name = save_name,
|
||||||
xgb_model = xgb_model, callbacks = callbacks, ...)
|
xgb_model = xgb_model, callbacks = callbacks, ...)
|
||||||
return (bst)
|
return(bst)
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Training part from Mushroom Data Set
|
#' Training part from Mushroom Data Set
|
||||||
|
|||||||
@ -25,7 +25,7 @@ xgb.cv(param, dtrain, nrounds, nfold = 5,
|
|||||||
# you can also do cross validation with customized loss function
|
# you can also do cross validation with customized loss function
|
||||||
# See custom_objective.R
|
# See custom_objective.R
|
||||||
##
|
##
|
||||||
print ('running cross validation, with customized loss function')
|
print('running cross validation, with customized loss function')
|
||||||
|
|
||||||
logregobj <- function(preds, dtrain) {
|
logregobj <- function(preds, dtrain) {
|
||||||
labels <- getinfo(dtrain, "label")
|
labels <- getinfo(dtrain, "label")
|
||||||
|
|||||||
@ -35,7 +35,7 @@ evalerror <- function(preds, dtrain) {
|
|||||||
|
|
||||||
param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0,
|
param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0,
|
||||||
objective = logregobj, eval_metric = evalerror)
|
objective = logregobj, eval_metric = evalerror)
|
||||||
print ('start training with user customized objective')
|
print('start training with user customized objective')
|
||||||
# training with customized objective, we can also do step by step training
|
# training with customized objective, we can also do step by step training
|
||||||
# simply look at xgboost.py's implementation of train
|
# simply look at xgboost.py's implementation of train
|
||||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||||
@ -59,7 +59,7 @@ logregobjattr <- function(preds, dtrain) {
|
|||||||
}
|
}
|
||||||
param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0,
|
param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0,
|
||||||
objective = logregobjattr, eval_metric = evalerror)
|
objective = logregobjattr, eval_metric = evalerror)
|
||||||
print ('start training with user customized objective, with additional attributes in DMatrix')
|
print('start training with user customized objective, with additional attributes in DMatrix')
|
||||||
# training with customized objective, we can also do step by step training
|
# training with customized objective, we can also do step by step training
|
||||||
# simply look at xgboost.py's implementation of train
|
# simply look at xgboost.py's implementation of train
|
||||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||||
|
|||||||
@ -30,7 +30,7 @@ evalerror <- function(preds, dtrain) {
|
|||||||
err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
|
err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
|
||||||
return(list(metric = "error", value = err))
|
return(list(metric = "error", value = err))
|
||||||
}
|
}
|
||||||
print ('start training with early Stopping setting')
|
print('start training with early Stopping setting')
|
||||||
|
|
||||||
bst <- xgb.train(param, dtrain, num_round, watchlist,
|
bst <- xgb.train(param, dtrain, num_round, watchlist,
|
||||||
objective = logregobj, eval_metric = evalerror, maximize = FALSE,
|
objective = logregobj, eval_metric = evalerror, maximize = FALSE,
|
||||||
|
|||||||
@ -35,14 +35,18 @@ Callback function expects the following values to be set in its calling frame:
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
#### Binary classification:
|
#### Binary classification:
|
||||||
#
|
|
||||||
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
||||||
# without considering the 2nd order interactions:
|
# without considering the 2nd order interactions:
|
||||||
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||||
colnames(x)
|
colnames(x)
|
||||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
|
||||||
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||||
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||||
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||||
# unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
# unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
||||||
# rate does not break the convergence, but allows us to illustrate the typical pattern of
|
# rate does not break the convergence, but allows us to illustrate the typical pattern of
|
||||||
@ -72,9 +76,9 @@ matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
|||||||
|
|
||||||
#### Multiclass classification:
|
#### Multiclass classification:
|
||||||
#
|
#
|
||||||
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
|
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
|
||||||
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||||
lambda = 0.0003, alpha = 0.0003, nthread = 1)
|
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||||
# For the default linear updater 'shotgun' it sometimes is helpful
|
# For the default linear updater 'shotgun' it sometimes is helpful
|
||||||
# to use smaller eta to reduce instability
|
# to use smaller eta to reduce instability
|
||||||
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
||||||
|
|||||||
@ -132,11 +132,16 @@ Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple th
|
|||||||
|
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
## Keep the number of threads to 2 for examples
|
||||||
|
nthread <- 2
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
|
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||||
eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
|
eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
|
||||||
# use all trees by default
|
# use all trees by default
|
||||||
pred <- predict(bst, test$data)
|
pred <- predict(bst, test$data)
|
||||||
# use only the 1st tree
|
# use only the 1st tree
|
||||||
|
|||||||
@ -38,7 +38,12 @@ Supported input file formats are either a LIBSVM text file or a binary file that
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
dtrain <- with(
|
||||||
|
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
)
|
||||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
|
|||||||
@ -19,10 +19,15 @@ Accessors for model parameters as JSON string.
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
|
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
data = train$data, label = train$label, max_depth = 2,
|
||||||
|
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||||
|
)
|
||||||
config <- xgb.config(bst)
|
config <- xgb.config(bst)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -27,14 +27,23 @@ not \code{xgb.load}.
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||||
|
nthread = nthread,
|
||||||
|
nrounds = 2,
|
||||||
|
objective = "binary:logistic"
|
||||||
|
)
|
||||||
|
|
||||||
xgb.save(bst, 'xgb.model')
|
xgb.save(bst, 'xgb.model')
|
||||||
bst <- xgb.load('xgb.model')
|
bst <- xgb.load('xgb.model')
|
||||||
if (file.exists('xgb.model')) file.remove('xgb.model')
|
if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||||
pred <- predict(bst, test$data)
|
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
\code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
|
\code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
|
||||||
|
|||||||
@ -66,9 +66,12 @@ Parse a boosted tree model text dump into a \code{data.table} structure.
|
|||||||
# Basic use:
|
# Basic use:
|
||||||
|
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
||||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||||
|
|
||||||
(dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
(dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
||||||
|
|
||||||
|
|||||||
@ -61,10 +61,13 @@ This function was inspired by the blog post
|
|||||||
\examples{
|
\examples{
|
||||||
|
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
## Keep the number of threads to 2 for examples
|
||||||
|
nthread <- 2
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
# Change max_depth to a higher number to get a more significant result
|
## Change max_depth to a higher number to get a more significant result
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
|
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
|
||||||
eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
|
eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
|
||||||
subsample = 0.5, min_child_weight = 2)
|
subsample = 0.5, min_child_weight = 2)
|
||||||
|
|
||||||
xgb.plot.deepness(bst)
|
xgb.plot.deepness(bst)
|
||||||
|
|||||||
@ -77,9 +77,14 @@ with bar colors corresponding to different clusters that have somewhat similar i
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train)
|
data(agaricus.train)
|
||||||
|
## Keep the number of threads to 2 for examples
|
||||||
|
nthread <- 2
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||||
|
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||||
|
)
|
||||||
|
|
||||||
importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
|
importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
|
||||||
|
|
||||||
|
|||||||
@ -63,10 +63,15 @@ This function is inspired by this blog post:
|
|||||||
\examples{
|
\examples{
|
||||||
|
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
## Keep the number of threads to 2 for examples
|
||||||
|
nthread <- 2
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
|
data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||||
min_child_weight = 50, verbose = 0)
|
eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
|
||||||
|
min_child_weight = 50, verbose = 0
|
||||||
|
)
|
||||||
|
|
||||||
p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
||||||
print(p)
|
print(p)
|
||||||
|
|||||||
@ -124,9 +124,14 @@ a meaningful thing to do.
|
|||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
nrounds <- 20
|
||||||
|
|
||||||
|
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
|
||||||
eta = 0.1, max_depth = 3, subsample = .5,
|
eta = 0.1, max_depth = 3, subsample = .5,
|
||||||
method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
|
method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
|
||||||
|
|
||||||
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
||||||
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
||||||
@ -135,12 +140,11 @@ xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12) # S
|
|||||||
|
|
||||||
# multiclass example - plots for each class separately:
|
# multiclass example - plots for each class separately:
|
||||||
nclass <- 3
|
nclass <- 3
|
||||||
nrounds <- 20
|
|
||||||
x <- as.matrix(iris[, -5])
|
x <- as.matrix(iris[, -5])
|
||||||
set.seed(123)
|
set.seed(123)
|
||||||
is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
||||||
mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
||||||
max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
|
max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
|
||||||
objective = "multi:softprob", num_class = nclass, verbose = 0)
|
objective = "multi:softprob", num_class = nclass, verbose = 0)
|
||||||
trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
||||||
col <- rgb(0, 0, 1, 0.5)
|
col <- rgb(0, 0, 1, 0.5)
|
||||||
|
|||||||
@ -31,14 +31,22 @@ releases of XGBoost.
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||||
|
nthread = nthread,
|
||||||
|
nrounds = 2,
|
||||||
|
objective = "binary:logistic"
|
||||||
|
)
|
||||||
xgb.save(bst, 'xgb.model')
|
xgb.save(bst, 'xgb.model')
|
||||||
bst <- xgb.load('xgb.model')
|
bst <- xgb.load('xgb.model')
|
||||||
if (file.exists('xgb.model')) file.remove('xgb.model')
|
if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||||
pred <- predict(bst, test$data)
|
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
\code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
|
\code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
|
||||||
|
|||||||
@ -25,12 +25,17 @@ Save xgboost model from xgboost or xgb.train
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
## Keep the number of threads to 2 for examples
|
||||||
|
nthread <- 2
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||||
|
|
||||||
raw <- xgb.save.raw(bst)
|
raw <- xgb.save.raw(bst)
|
||||||
bst <- xgb.load.raw(raw)
|
bst <- xgb.load.raw(raw)
|
||||||
pred <- predict(bst, test$data)
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -250,7 +250,8 @@ customized objective and evaluation metric functions, therefore it is more flexi
|
|||||||
than the \code{xgboost} interface.
|
than the \code{xgboost} interface.
|
||||||
|
|
||||||
Parallelization is automatically enabled if \code{OpenMP} is present.
|
Parallelization is automatically enabled if \code{OpenMP} is present.
|
||||||
Number of threads can also be manually specified via \code{nthread} parameter.
|
Number of threads can also be manually specified via the \code{nthread}
|
||||||
|
parameter.
|
||||||
|
|
||||||
The evaluation metric is chosen automatically by XGBoost (according to the objective)
|
The evaluation metric is chosen automatically by XGBoost (according to the objective)
|
||||||
when the \code{eval_metric} parameter is not provided.
|
when the \code{eval_metric} parameter is not provided.
|
||||||
@ -286,17 +287,25 @@ The following callbacks are automatically created when certain parameters are se
|
|||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
## Keep the number of threads to 1 for examples
|
||||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
|
dtrain <- with(
|
||||||
|
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
)
|
||||||
|
dtest <- with(
|
||||||
|
agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
)
|
||||||
watchlist <- list(train = dtrain, eval = dtest)
|
watchlist <- list(train = dtrain, eval = dtest)
|
||||||
|
|
||||||
## A simple xgb.train example:
|
## A simple xgb.train example:
|
||||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
objective = "binary:logistic", eval_metric = "auc")
|
objective = "binary:logistic", eval_metric = "auc")
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||||
|
|
||||||
|
## An xgb.train example where custom objective and evaluation metric are
|
||||||
## An xgb.train example where custom objective and evaluation metric are used:
|
## used:
|
||||||
logregobj <- function(preds, dtrain) {
|
logregobj <- function(preds, dtrain) {
|
||||||
labels <- getinfo(dtrain, "label")
|
labels <- getinfo(dtrain, "label")
|
||||||
preds <- 1/(1 + exp(-preds))
|
preds <- 1/(1 + exp(-preds))
|
||||||
@ -312,12 +321,12 @@ evalerror <- function(preds, dtrain) {
|
|||||||
|
|
||||||
# These functions could be used by passing them either:
|
# These functions could be used by passing them either:
|
||||||
# as 'objective' and 'eval_metric' parameters in the params list:
|
# as 'objective' and 'eval_metric' parameters in the params list:
|
||||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
objective = logregobj, eval_metric = evalerror)
|
objective = logregobj, eval_metric = evalerror)
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||||
|
|
||||||
# or through the ... arguments:
|
# or through the ... arguments:
|
||||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
|
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||||
objective = logregobj, eval_metric = evalerror)
|
objective = logregobj, eval_metric = evalerror)
|
||||||
|
|
||||||
@ -327,7 +336,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
|||||||
|
|
||||||
|
|
||||||
## An xgb.train example of using variable learning rates at each iteration:
|
## An xgb.train example of using variable learning rates at each iteration:
|
||||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
objective = "binary:logistic", eval_metric = "auc")
|
objective = "binary:logistic", eval_metric = "auc")
|
||||||
my_etas <- list(eta = c(0.5, 0.1))
|
my_etas <- list(eta = c(0.5, 0.1))
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||||
@ -339,7 +348,7 @@ bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
|
|||||||
|
|
||||||
## An 'xgboost' interface example:
|
## An 'xgboost' interface example:
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
||||||
max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
|
max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
|
||||||
objective = "binary:logistic")
|
objective = "binary:logistic")
|
||||||
pred <- predict(bst, agaricus.test$data)
|
pred <- predict(bst, agaricus.test$data)
|
||||||
|
|
||||||
|
|||||||
@ -62,6 +62,7 @@ OBJECTS= \
|
|||||||
$(PKGROOT)/src/gbm/gbtree_model.o \
|
$(PKGROOT)/src/gbm/gbtree_model.o \
|
||||||
$(PKGROOT)/src/gbm/gblinear.o \
|
$(PKGROOT)/src/gbm/gblinear.o \
|
||||||
$(PKGROOT)/src/gbm/gblinear_model.o \
|
$(PKGROOT)/src/gbm/gblinear_model.o \
|
||||||
|
$(PKGROOT)/src/data/adapter.o \
|
||||||
$(PKGROOT)/src/data/simple_dmatrix.o \
|
$(PKGROOT)/src/data/simple_dmatrix.o \
|
||||||
$(PKGROOT)/src/data/data.o \
|
$(PKGROOT)/src/data/data.o \
|
||||||
$(PKGROOT)/src/data/sparse_page_raw_format.o \
|
$(PKGROOT)/src/data/sparse_page_raw_format.o \
|
||||||
@ -97,9 +98,15 @@ OBJECTS= \
|
|||||||
$(PKGROOT)/src/context.o \
|
$(PKGROOT)/src/context.o \
|
||||||
$(PKGROOT)/src/logging.o \
|
$(PKGROOT)/src/logging.o \
|
||||||
$(PKGROOT)/src/global_config.o \
|
$(PKGROOT)/src/global_config.o \
|
||||||
|
$(PKGROOT)/src/collective/allgather.o \
|
||||||
|
$(PKGROOT)/src/collective/allreduce.o \
|
||||||
|
$(PKGROOT)/src/collective/broadcast.o \
|
||||||
|
$(PKGROOT)/src/collective/comm.o \
|
||||||
|
$(PKGROOT)/src/collective/tracker.o \
|
||||||
$(PKGROOT)/src/collective/communicator.o \
|
$(PKGROOT)/src/collective/communicator.o \
|
||||||
$(PKGROOT)/src/collective/in_memory_communicator.o \
|
$(PKGROOT)/src/collective/in_memory_communicator.o \
|
||||||
$(PKGROOT)/src/collective/in_memory_handler.o \
|
$(PKGROOT)/src/collective/in_memory_handler.o \
|
||||||
|
$(PKGROOT)/src/collective/loop.o \
|
||||||
$(PKGROOT)/src/collective/socket.o \
|
$(PKGROOT)/src/collective/socket.o \
|
||||||
$(PKGROOT)/src/common/charconv.o \
|
$(PKGROOT)/src/common/charconv.o \
|
||||||
$(PKGROOT)/src/common/column_matrix.o \
|
$(PKGROOT)/src/common/column_matrix.o \
|
||||||
|
|||||||
@ -62,6 +62,7 @@ OBJECTS= \
|
|||||||
$(PKGROOT)/src/gbm/gbtree_model.o \
|
$(PKGROOT)/src/gbm/gbtree_model.o \
|
||||||
$(PKGROOT)/src/gbm/gblinear.o \
|
$(PKGROOT)/src/gbm/gblinear.o \
|
||||||
$(PKGROOT)/src/gbm/gblinear_model.o \
|
$(PKGROOT)/src/gbm/gblinear_model.o \
|
||||||
|
$(PKGROOT)/src/data/adapter.o \
|
||||||
$(PKGROOT)/src/data/simple_dmatrix.o \
|
$(PKGROOT)/src/data/simple_dmatrix.o \
|
||||||
$(PKGROOT)/src/data/data.o \
|
$(PKGROOT)/src/data/data.o \
|
||||||
$(PKGROOT)/src/data/sparse_page_raw_format.o \
|
$(PKGROOT)/src/data/sparse_page_raw_format.o \
|
||||||
@ -97,9 +98,15 @@ OBJECTS= \
|
|||||||
$(PKGROOT)/src/context.o \
|
$(PKGROOT)/src/context.o \
|
||||||
$(PKGROOT)/src/logging.o \
|
$(PKGROOT)/src/logging.o \
|
||||||
$(PKGROOT)/src/global_config.o \
|
$(PKGROOT)/src/global_config.o \
|
||||||
|
$(PKGROOT)/src/collective/allgather.o \
|
||||||
|
$(PKGROOT)/src/collective/allreduce.o \
|
||||||
|
$(PKGROOT)/src/collective/broadcast.o \
|
||||||
|
$(PKGROOT)/src/collective/comm.o \
|
||||||
|
$(PKGROOT)/src/collective/tracker.o \
|
||||||
$(PKGROOT)/src/collective/communicator.o \
|
$(PKGROOT)/src/collective/communicator.o \
|
||||||
$(PKGROOT)/src/collective/in_memory_communicator.o \
|
$(PKGROOT)/src/collective/in_memory_communicator.o \
|
||||||
$(PKGROOT)/src/collective/in_memory_handler.o \
|
$(PKGROOT)/src/collective/in_memory_handler.o \
|
||||||
|
$(PKGROOT)/src/collective/loop.o \
|
||||||
$(PKGROOT)/src/collective/socket.o \
|
$(PKGROOT)/src/collective/socket.o \
|
||||||
$(PKGROOT)/src/common/charconv.o \
|
$(PKGROOT)/src/common/charconv.o \
|
||||||
$(PKGROOT)/src/common/column_matrix.o \
|
$(PKGROOT)/src/common/column_matrix.o \
|
||||||
|
|||||||
@ -5,7 +5,6 @@
|
|||||||
* and edited to conform to xgboost C linter requirements. For details, see
|
* and edited to conform to xgboost C linter requirements. For details, see
|
||||||
* https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines
|
* https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines
|
||||||
*/
|
*/
|
||||||
#include <R.h>
|
|
||||||
#include <Rinternals.h>
|
#include <Rinternals.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <R_ext/Rdynload.h>
|
#include <R_ext/Rdynload.h>
|
||||||
|
|||||||
@ -20,7 +20,6 @@
|
|||||||
#include "../../src/common/threading_utils.h"
|
#include "../../src/common/threading_utils.h"
|
||||||
|
|
||||||
#include "./xgboost_R.h" // Must follow other includes.
|
#include "./xgboost_R.h" // Must follow other includes.
|
||||||
#include "Rinternals.h"
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief macro to annotate begin of api
|
* \brief macro to annotate begin of api
|
||||||
|
|||||||
@ -19,15 +19,15 @@ w <- runif(metadata$kRows)
|
|||||||
version <- packageVersion('xgboost')
|
version <- packageVersion('xgboost')
|
||||||
target_dir <- 'models'
|
target_dir <- 'models'
|
||||||
|
|
||||||
save_booster <- function (booster, model_name) {
|
save_booster <- function(booster, model_name) {
|
||||||
booster_bin <- function (model_name) {
|
booster_bin <- function(model_name) {
|
||||||
return (file.path(target_dir, paste('xgboost-', version, '.', model_name, '.bin', sep = '')))
|
return(file.path(target_dir, paste('xgboost-', version, '.', model_name, '.bin', sep = '')))
|
||||||
}
|
}
|
||||||
booster_json <- function (model_name) {
|
booster_json <- function(model_name) {
|
||||||
return (file.path(target_dir, paste('xgboost-', version, '.', model_name, '.json', sep = '')))
|
return(file.path(target_dir, paste('xgboost-', version, '.', model_name, '.json', sep = '')))
|
||||||
}
|
}
|
||||||
booster_rds <- function (model_name) {
|
booster_rds <- function(model_name) {
|
||||||
return (file.path(target_dir, paste('xgboost-', version, '.', model_name, '.rds', sep = '')))
|
return(file.path(target_dir, paste('xgboost-', version, '.', model_name, '.rds', sep = '')))
|
||||||
}
|
}
|
||||||
xgb.save(booster, booster_bin(model_name))
|
xgb.save(booster, booster_bin(model_name))
|
||||||
saveRDS(booster, booster_rds(model_name))
|
saveRDS(booster, booster_rds(model_name))
|
||||||
@ -36,7 +36,7 @@ save_booster <- function (booster, model_name) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_regression_model <- function () {
|
generate_regression_model <- function() {
|
||||||
print('Regression')
|
print('Regression')
|
||||||
y <- rnorm(metadata$kRows)
|
y <- rnorm(metadata$kRows)
|
||||||
|
|
||||||
@ -47,7 +47,7 @@ generate_regression_model <- function () {
|
|||||||
save_booster(booster, 'reg')
|
save_booster(booster, 'reg')
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_logistic_model <- function () {
|
generate_logistic_model <- function() {
|
||||||
print('Binary classification with logistic loss')
|
print('Binary classification with logistic loss')
|
||||||
y <- sample(0:1, size = metadata$kRows, replace = TRUE)
|
y <- sample(0:1, size = metadata$kRows, replace = TRUE)
|
||||||
stopifnot(max(y) == 1, min(y) == 0)
|
stopifnot(max(y) == 1, min(y) == 0)
|
||||||
@ -64,7 +64,7 @@ generate_logistic_model <- function () {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_classification_model <- function () {
|
generate_classification_model <- function() {
|
||||||
print('Multi-class classification')
|
print('Multi-class classification')
|
||||||
y <- sample(0:(metadata$kClasses - 1), size = metadata$kRows, replace = TRUE)
|
y <- sample(0:(metadata$kClasses - 1), size = metadata$kRows, replace = TRUE)
|
||||||
stopifnot(max(y) == metadata$kClasses - 1, min(y) == 0)
|
stopifnot(max(y) == metadata$kClasses - 1, min(y) == 0)
|
||||||
@ -77,7 +77,7 @@ generate_classification_model <- function () {
|
|||||||
save_booster(booster, 'cls')
|
save_booster(booster, 'cls')
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_ranking_model <- function () {
|
generate_ranking_model <- function() {
|
||||||
print('Learning to rank')
|
print('Learning to rank')
|
||||||
y <- sample(0:4, size = metadata$kRows, replace = TRUE)
|
y <- sample(0:4, size = metadata$kRows, replace = TRUE)
|
||||||
stopifnot(max(y) == 4, min(y) == 0)
|
stopifnot(max(y) == 4, min(y) == 0)
|
||||||
|
|||||||
25
R-package/tests/helper_scripts/run-examples.R
Normal file
25
R-package/tests/helper_scripts/run-examples.R
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
## Helper script for running individual examples.
|
||||||
|
library(pkgload)
|
||||||
|
library(xgboost)
|
||||||
|
|
||||||
|
files <- list.files("./man")
|
||||||
|
|
||||||
|
|
||||||
|
run_example_timeit <- function(f) {
|
||||||
|
path <- paste("./man/", f, sep = "")
|
||||||
|
print(paste("Test", f))
|
||||||
|
flush.console()
|
||||||
|
t0 <- proc.time()
|
||||||
|
run_example(path)
|
||||||
|
t1 <- proc.time()
|
||||||
|
list(file = f, time = t1 - t0)
|
||||||
|
}
|
||||||
|
|
||||||
|
timings <- lapply(files, run_example_timeit)
|
||||||
|
|
||||||
|
for (t in timings) {
|
||||||
|
ratio <- t$time[1] / t$time[3]
|
||||||
|
if (!is.na(ratio) && !is.infinite(ratio) && ratio >= 2.5) {
|
||||||
|
print(paste("Offending example:", t$file, ratio))
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,23 +1,28 @@
|
|||||||
context("basic functions")
|
context("basic functions")
|
||||||
|
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = "xgboost")
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = "xgboost")
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
set.seed(1994)
|
set.seed(1994)
|
||||||
|
|
||||||
# disable some tests for Win32
|
# disable some tests for Win32
|
||||||
windows_flag <- .Platform$OS.type == "windows" &&
|
windows_flag <- .Platform$OS.type == "windows" &&
|
||||||
.Machine$sizeof.pointer != 8
|
.Machine$sizeof.pointer != 8
|
||||||
solaris_flag <- (Sys.info()['sysname'] == "SunOS")
|
solaris_flag <- (Sys.info()["sysname"] == "SunOS")
|
||||||
|
n_threads <- 1
|
||||||
|
|
||||||
|
|
||||||
test_that("train and predict binary classification", {
|
test_that("train and predict binary classification", {
|
||||||
nrounds <- 2
|
nrounds <- 2
|
||||||
expect_output(
|
expect_output(
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic",
|
data = train$data, label = train$label, max_depth = 2,
|
||||||
eval_metric = "error")
|
eta = 1, nthread = n_threads, nrounds = nrounds,
|
||||||
, "train-error")
|
objective = "binary:logistic", eval_metric = "error"
|
||||||
|
),
|
||||||
|
"train-error"
|
||||||
|
)
|
||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
expect_equal(bst$niter, nrounds)
|
expect_equal(bst$niter, nrounds)
|
||||||
expect_false(is.null(bst$evaluation_log))
|
expect_false(is.null(bst$evaluation_log))
|
||||||
@ -46,26 +51,39 @@ test_that("parameter validation works", {
|
|||||||
d <- cbind(
|
d <- cbind(
|
||||||
x1 = rnorm(10),
|
x1 = rnorm(10),
|
||||||
x2 = rnorm(10),
|
x2 = rnorm(10),
|
||||||
x3 = rnorm(10))
|
x3 = rnorm(10)
|
||||||
|
)
|
||||||
y <- d[, "x1"] + d[, "x2"]^2 +
|
y <- d[, "x1"] + d[, "x2"]^2 +
|
||||||
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
|
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
|
||||||
rnorm(10)
|
rnorm(10)
|
||||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||||
|
|
||||||
correct <- function() {
|
correct <- function() {
|
||||||
params <- list(max_depth = 2, booster = "dart",
|
params <- list(
|
||||||
rate_drop = 0.5, one_drop = TRUE,
|
max_depth = 2,
|
||||||
objective = "reg:squarederror")
|
booster = "dart",
|
||||||
|
rate_drop = 0.5,
|
||||||
|
one_drop = TRUE,
|
||||||
|
nthread = n_threads,
|
||||||
|
objective = "reg:squarederror"
|
||||||
|
)
|
||||||
xgb.train(params = params, data = dtrain, nrounds = nrounds)
|
xgb.train(params = params, data = dtrain, nrounds = nrounds)
|
||||||
}
|
}
|
||||||
expect_silent(correct())
|
expect_silent(correct())
|
||||||
incorrect <- function() {
|
incorrect <- function() {
|
||||||
params <- list(max_depth = 2, booster = "dart",
|
params <- list(
|
||||||
rate_drop = 0.5, one_drop = TRUE,
|
max_depth = 2,
|
||||||
objective = "reg:squarederror",
|
booster = "dart",
|
||||||
foo = "bar", bar = "foo")
|
rate_drop = 0.5,
|
||||||
|
one_drop = TRUE,
|
||||||
|
objective = "reg:squarederror",
|
||||||
|
nthread = n_threads,
|
||||||
|
foo = "bar",
|
||||||
|
bar = "foo"
|
||||||
|
)
|
||||||
output <- capture.output(
|
output <- capture.output(
|
||||||
xgb.train(params = params, data = dtrain, nrounds = nrounds))
|
xgb.train(params = params, data = dtrain, nrounds = nrounds)
|
||||||
|
)
|
||||||
print(output)
|
print(output)
|
||||||
}
|
}
|
||||||
expect_output(incorrect(), '\\\\"bar\\\\", \\\\"foo\\\\"')
|
expect_output(incorrect(), '\\\\"bar\\\\", \\\\"foo\\\\"')
|
||||||
@ -79,7 +97,8 @@ test_that("dart prediction works", {
|
|||||||
d <- cbind(
|
d <- cbind(
|
||||||
x1 = rnorm(100),
|
x1 = rnorm(100),
|
||||||
x2 = rnorm(100),
|
x2 = rnorm(100),
|
||||||
x3 = rnorm(100))
|
x3 = rnorm(100)
|
||||||
|
)
|
||||||
y <- d[, "x1"] + d[, "x2"]^2 +
|
y <- d[, "x1"] + d[, "x2"]^2 +
|
||||||
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
|
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
|
||||||
rnorm(100)
|
rnorm(100)
|
||||||
@ -93,7 +112,7 @@ test_that("dart prediction works", {
|
|||||||
rate_drop = 0.5,
|
rate_drop = 0.5,
|
||||||
one_drop = TRUE,
|
one_drop = TRUE,
|
||||||
eta = 1,
|
eta = 1,
|
||||||
nthread = 2,
|
nthread = n_threads,
|
||||||
nrounds = nrounds,
|
nrounds = nrounds,
|
||||||
objective = "reg:squarederror"
|
objective = "reg:squarederror"
|
||||||
)
|
)
|
||||||
@ -105,7 +124,7 @@ test_that("dart prediction works", {
|
|||||||
expect_false(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))
|
expect_false(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))
|
||||||
|
|
||||||
set.seed(1994)
|
set.seed(1994)
|
||||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||||
booster_by_train <- xgb.train(
|
booster_by_train <- xgb.train(
|
||||||
params = list(
|
params = list(
|
||||||
booster = "dart",
|
booster = "dart",
|
||||||
@ -113,7 +132,7 @@ test_that("dart prediction works", {
|
|||||||
eta = 1,
|
eta = 1,
|
||||||
rate_drop = 0.5,
|
rate_drop = 0.5,
|
||||||
one_drop = TRUE,
|
one_drop = TRUE,
|
||||||
nthread = 1,
|
nthread = n_threads,
|
||||||
objective = "reg:squarederror"
|
objective = "reg:squarederror"
|
||||||
),
|
),
|
||||||
data = dtrain,
|
data = dtrain,
|
||||||
@ -132,10 +151,13 @@ test_that("train and predict softprob", {
|
|||||||
lb <- as.numeric(iris$Species) - 1
|
lb <- as.numeric(iris$Species) - 1
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
expect_output(
|
expect_output(
|
||||||
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
|
bst <- xgboost(
|
||||||
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
|
data = as.matrix(iris[, -5]), label = lb,
|
||||||
objective = "multi:softprob", num_class = 3, eval_metric = "merror")
|
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
|
||||||
, "train-merror")
|
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
|
||||||
|
),
|
||||||
|
"train-merror"
|
||||||
|
)
|
||||||
expect_false(is.null(bst$evaluation_log))
|
expect_false(is.null(bst$evaluation_log))
|
||||||
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
||||||
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
||||||
@ -164,9 +186,10 @@ test_that("train and predict softprob", {
|
|||||||
x3 = rnorm(100)
|
x3 = rnorm(100)
|
||||||
)
|
)
|
||||||
y <- sample.int(10, 100, replace = TRUE) - 1
|
y <- sample.int(10, 100, replace = TRUE) - 1
|
||||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||||
booster <- xgb.train(
|
booster <- xgb.train(
|
||||||
params = list(tree_method = "hist"), data = dtrain, nrounds = 4, num_class = 10,
|
params = list(tree_method = "hist", nthread = n_threads),
|
||||||
|
data = dtrain, nrounds = 4, num_class = 10,
|
||||||
objective = "multi:softprob"
|
objective = "multi:softprob"
|
||||||
)
|
)
|
||||||
predt <- predict(booster, as.matrix(d), reshape = TRUE, strict_shape = FALSE)
|
predt <- predict(booster, as.matrix(d), reshape = TRUE, strict_shape = FALSE)
|
||||||
@ -178,10 +201,13 @@ test_that("train and predict softmax", {
|
|||||||
lb <- as.numeric(iris$Species) - 1
|
lb <- as.numeric(iris$Species) - 1
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
expect_output(
|
expect_output(
|
||||||
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
|
bst <- xgboost(
|
||||||
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
|
data = as.matrix(iris[, -5]), label = lb,
|
||||||
objective = "multi:softmax", num_class = 3, eval_metric = "merror")
|
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
|
||||||
, "train-merror")
|
objective = "multi:softmax", num_class = 3, eval_metric = "merror"
|
||||||
|
),
|
||||||
|
"train-merror"
|
||||||
|
)
|
||||||
expect_false(is.null(bst$evaluation_log))
|
expect_false(is.null(bst$evaluation_log))
|
||||||
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
||||||
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
||||||
@ -196,16 +222,19 @@ test_that("train and predict RF", {
|
|||||||
set.seed(11)
|
set.seed(11)
|
||||||
lb <- train$label
|
lb <- train$label
|
||||||
# single iteration
|
# single iteration
|
||||||
bst <- xgboost(data = train$data, label = lb, max_depth = 5,
|
bst <- xgboost(
|
||||||
nthread = 2, nrounds = 1, objective = "binary:logistic", eval_metric = "error",
|
data = train$data, label = lb, max_depth = 5,
|
||||||
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1)
|
nthread = n_threads,
|
||||||
|
nrounds = 1, objective = "binary:logistic", eval_metric = "error",
|
||||||
|
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1
|
||||||
|
)
|
||||||
expect_equal(bst$niter, 1)
|
expect_equal(bst$niter, 1)
|
||||||
expect_equal(xgb.ntree(bst), 20)
|
expect_equal(xgb.ntree(bst), 20)
|
||||||
|
|
||||||
pred <- predict(bst, train$data)
|
pred <- predict(bst, train$data)
|
||||||
pred_err <- sum((pred > 0.5) != lb) / length(lb)
|
pred_err <- sum((pred > 0.5) != lb) / length(lb)
|
||||||
expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
|
expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
|
||||||
#expect_lt(pred_err, 0.03)
|
# expect_lt(pred_err, 0.03)
|
||||||
|
|
||||||
pred <- predict(bst, train$data, ntreelimit = 20)
|
pred <- predict(bst, train$data, ntreelimit = 20)
|
||||||
pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
|
pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
|
||||||
@ -219,11 +248,13 @@ test_that("train and predict RF with softprob", {
|
|||||||
lb <- as.numeric(iris$Species) - 1
|
lb <- as.numeric(iris$Species) - 1
|
||||||
nrounds <- 15
|
nrounds <- 15
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
|
bst <- xgboost(
|
||||||
max_depth = 3, eta = 0.9, nthread = 2, nrounds = nrounds,
|
data = as.matrix(iris[, -5]), label = lb,
|
||||||
objective = "multi:softprob", eval_metric = "merror",
|
max_depth = 3, eta = 0.9, nthread = n_threads, nrounds = nrounds,
|
||||||
num_class = 3, verbose = 0,
|
objective = "multi:softprob", eval_metric = "merror",
|
||||||
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5)
|
num_class = 3, verbose = 0,
|
||||||
|
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5
|
||||||
|
)
|
||||||
expect_equal(bst$niter, 15)
|
expect_equal(bst$niter, 15)
|
||||||
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
|
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
|
||||||
# predict for all iterations:
|
# predict for all iterations:
|
||||||
@ -240,18 +271,24 @@ test_that("train and predict RF with softprob", {
|
|||||||
|
|
||||||
test_that("use of multiple eval metrics works", {
|
test_that("use of multiple eval metrics works", {
|
||||||
expect_output(
|
expect_output(
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = train$data, label = train$label, max_depth = 2,
|
||||||
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
, "train-error.*train-auc.*train-logloss")
|
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
|
||||||
|
),
|
||||||
|
"train-error.*train-auc.*train-logloss"
|
||||||
|
)
|
||||||
expect_false(is.null(bst$evaluation_log))
|
expect_false(is.null(bst$evaluation_log))
|
||||||
expect_equal(dim(bst$evaluation_log), c(2, 4))
|
expect_equal(dim(bst$evaluation_log), c(2, 4))
|
||||||
expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
||||||
expect_output(
|
expect_output(
|
||||||
bst2 <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst2 <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = train$data, label = train$label, max_depth = 2,
|
||||||
eval_metric = list("error", "auc", "logloss"))
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
, "train-error.*train-auc.*train-logloss")
|
eval_metric = list("error", "auc", "logloss")
|
||||||
|
),
|
||||||
|
"train-error.*train-auc.*train-logloss"
|
||||||
|
)
|
||||||
expect_false(is.null(bst2$evaluation_log))
|
expect_false(is.null(bst2$evaluation_log))
|
||||||
expect_equal(dim(bst2$evaluation_log), c(2, 4))
|
expect_equal(dim(bst2$evaluation_log), c(2, 4))
|
||||||
expect_equal(colnames(bst2$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
expect_equal(colnames(bst2$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
||||||
@ -259,9 +296,11 @@ test_that("use of multiple eval metrics works", {
|
|||||||
|
|
||||||
|
|
||||||
test_that("training continuation works", {
|
test_that("training continuation works", {
|
||||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
|
||||||
watchlist <- list(train = dtrain)
|
watchlist <- list(train = dtrain)
|
||||||
param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
|
param <- list(
|
||||||
|
objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
|
||||||
|
)
|
||||||
|
|
||||||
# for the reference, use 4 iterations at once:
|
# for the reference, use 4 iterations at once:
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
@ -271,30 +310,33 @@ test_that("training continuation works", {
|
|||||||
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
|
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
|
||||||
# continue for two more:
|
# continue for two more:
|
||||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
|
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
|
||||||
if (!windows_flag && !solaris_flag)
|
if (!windows_flag && !solaris_flag) {
|
||||||
expect_equal(bst$raw, bst2$raw)
|
expect_equal(bst$raw, bst2$raw)
|
||||||
|
}
|
||||||
expect_false(is.null(bst2$evaluation_log))
|
expect_false(is.null(bst2$evaluation_log))
|
||||||
expect_equal(dim(bst2$evaluation_log), c(4, 2))
|
expect_equal(dim(bst2$evaluation_log), c(4, 2))
|
||||||
expect_equal(bst2$evaluation_log, bst$evaluation_log)
|
expect_equal(bst2$evaluation_log, bst$evaluation_log)
|
||||||
# test continuing from raw model data
|
# test continuing from raw model data
|
||||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
|
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
|
||||||
if (!windows_flag && !solaris_flag)
|
if (!windows_flag && !solaris_flag) {
|
||||||
expect_equal(bst$raw, bst2$raw)
|
expect_equal(bst$raw, bst2$raw)
|
||||||
|
}
|
||||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||||
# test continuing from a model in file
|
# test continuing from a model in file
|
||||||
xgb.save(bst1, "xgboost.json")
|
xgb.save(bst1, "xgboost.json")
|
||||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.json")
|
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.json")
|
||||||
if (!windows_flag && !solaris_flag)
|
if (!windows_flag && !solaris_flag) {
|
||||||
expect_equal(bst$raw, bst2$raw)
|
expect_equal(bst$raw, bst2$raw)
|
||||||
|
}
|
||||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||||
file.remove("xgboost.json")
|
file.remove("xgboost.json")
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("model serialization works", {
|
test_that("model serialization works", {
|
||||||
out_path <- "model_serialization"
|
out_path <- "model_serialization"
|
||||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
|
||||||
watchlist <- list(train = dtrain)
|
watchlist <- list(train = dtrain)
|
||||||
param <- list(objective = "binary:logistic")
|
param <- list(objective = "binary:logistic", nthread = n_threads)
|
||||||
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
|
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
|
||||||
raw <- xgb.serialize(booster)
|
raw <- xgb.serialize(booster)
|
||||||
saveRDS(raw, out_path)
|
saveRDS(raw, out_path)
|
||||||
@ -309,11 +351,14 @@ test_that("model serialization works", {
|
|||||||
test_that("xgb.cv works", {
|
test_that("xgb.cv works", {
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
expect_output(
|
expect_output(
|
||||||
cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5,
|
cv <- xgb.cv(
|
||||||
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = train$data, label = train$label, max_depth = 2, nfold = 5,
|
||||||
eval_metric = "error", verbose = TRUE)
|
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
, "train-error:")
|
eval_metric = "error", verbose = TRUE
|
||||||
expect_is(cv, 'xgb.cv.synchronous')
|
),
|
||||||
|
"train-error:"
|
||||||
|
)
|
||||||
|
expect_is(cv, "xgb.cv.synchronous")
|
||||||
expect_false(is.null(cv$evaluation_log))
|
expect_false(is.null(cv$evaluation_log))
|
||||||
expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
|
expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
|
||||||
expect_lt(cv$evaluation_log[, min(test_error_std)], 0.008)
|
expect_lt(cv$evaluation_log[, min(test_error_std)], 0.008)
|
||||||
@ -326,15 +371,19 @@ test_that("xgb.cv works", {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.cv works with stratified folds", {
|
test_that("xgb.cv works with stratified folds", {
|
||||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
|
||||||
set.seed(314159)
|
set.seed(314159)
|
||||||
cv <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
|
cv <- xgb.cv(
|
||||||
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = dtrain, max_depth = 2, nfold = 5,
|
||||||
verbose = TRUE, stratified = FALSE)
|
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
verbose = TRUE, stratified = FALSE
|
||||||
|
)
|
||||||
set.seed(314159)
|
set.seed(314159)
|
||||||
cv2 <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
|
cv2 <- xgb.cv(
|
||||||
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = dtrain, max_depth = 2, nfold = 5,
|
||||||
verbose = TRUE, stratified = TRUE)
|
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
verbose = TRUE, stratified = TRUE
|
||||||
|
)
|
||||||
# Stratified folds should result in a different evaluation logs
|
# Stratified folds should result in a different evaluation logs
|
||||||
expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
|
expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
|
||||||
})
|
})
|
||||||
@ -342,40 +391,57 @@ test_that("xgb.cv works with stratified folds", {
|
|||||||
test_that("train and predict with non-strict classes", {
|
test_that("train and predict with non-strict classes", {
|
||||||
# standard dense matrix input
|
# standard dense matrix input
|
||||||
train_dense <- as.matrix(train$data)
|
train_dense <- as.matrix(train$data)
|
||||||
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
data = train_dense, label = train$label, max_depth = 2,
|
||||||
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
verbose = 0
|
||||||
|
)
|
||||||
pr0 <- predict(bst, train_dense)
|
pr0 <- predict(bst, train_dense)
|
||||||
|
|
||||||
# dense matrix-like input of non-matrix class
|
# dense matrix-like input of non-matrix class
|
||||||
class(train_dense) <- 'shmatrix'
|
class(train_dense) <- "shmatrix"
|
||||||
expect_true(is.matrix(train_dense))
|
expect_true(is.matrix(train_dense))
|
||||||
expect_error(
|
expect_error(
|
||||||
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
data = train_dense, label = train$label, max_depth = 2,
|
||||||
, regexp = NA)
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
verbose = 0
|
||||||
|
),
|
||||||
|
regexp = NA
|
||||||
|
)
|
||||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
expect_equal(pr0, pr)
|
expect_equal(pr0, pr)
|
||||||
|
|
||||||
# dense matrix-like input of non-matrix class with some inheritance
|
# dense matrix-like input of non-matrix class with some inheritance
|
||||||
class(train_dense) <- c('pphmatrix', 'shmatrix')
|
class(train_dense) <- c("pphmatrix", "shmatrix")
|
||||||
expect_true(is.matrix(train_dense))
|
expect_true(is.matrix(train_dense))
|
||||||
expect_error(
|
expect_error(
|
||||||
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
data = train_dense, label = train$label, max_depth = 2,
|
||||||
, regexp = NA)
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
verbose = 0
|
||||||
|
),
|
||||||
|
regexp = NA
|
||||||
|
)
|
||||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
expect_equal(pr0, pr)
|
expect_equal(pr0, pr)
|
||||||
|
|
||||||
# when someone inherits from xgb.Booster, it should still be possible to use it as xgb.Booster
|
# when someone inherits from xgb.Booster, it should still be possible to use it as xgb.Booster
|
||||||
class(bst) <- c('super.Booster', 'xgb.Booster')
|
class(bst) <- c("super.Booster", "xgb.Booster")
|
||||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
expect_equal(pr0, pr)
|
expect_equal(pr0, pr)
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("max_delta_step works", {
|
test_that("max_delta_step works", {
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
|
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
watchlist <- list(train = dtrain)
|
watchlist <- list(train = dtrain)
|
||||||
param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
|
param <- list(
|
||||||
|
objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
|
||||||
|
nthread = n_threads,
|
||||||
|
eta = 0.5
|
||||||
|
)
|
||||||
nrounds <- 5
|
nrounds <- 5
|
||||||
# model with no restriction on max_delta_step
|
# model with no restriction on max_delta_step
|
||||||
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
|
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
|
||||||
@ -395,14 +461,16 @@ test_that("colsample_bytree works", {
|
|||||||
test_y <- as.numeric(rowSums(test_x) > 0)
|
test_y <- as.numeric(rowSums(test_x) > 0)
|
||||||
colnames(train_x) <- paste0("Feature_", sprintf("%03d", 1:100))
|
colnames(train_x) <- paste0("Feature_", sprintf("%03d", 1:100))
|
||||||
colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
|
colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
|
||||||
dtrain <- xgb.DMatrix(train_x, label = train_y)
|
dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
|
||||||
dtest <- xgb.DMatrix(test_x, label = test_y)
|
dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
|
||||||
watchlist <- list(train = dtrain, eval = dtest)
|
watchlist <- list(train = dtrain, eval = dtest)
|
||||||
## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
|
## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
|
||||||
## each tree
|
## each tree
|
||||||
param <- list(max_depth = 2, eta = 0, nthread = 2,
|
param <- list(
|
||||||
colsample_bytree = 0.01, objective = "binary:logistic",
|
max_depth = 2, eta = 0, nthread = n_threads,
|
||||||
eval_metric = "auc")
|
colsample_bytree = 0.01, objective = "binary:logistic",
|
||||||
|
eval_metric = "auc"
|
||||||
|
)
|
||||||
set.seed(2)
|
set.seed(2)
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
|
bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
|
||||||
xgb.importance(model = bst)
|
xgb.importance(model = bst)
|
||||||
@ -412,9 +480,11 @@ test_that("colsample_bytree works", {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test_that("Configuration works", {
|
test_that("Configuration works", {
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = train$data, label = train$label, max_depth = 2,
|
||||||
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
|
||||||
|
)
|
||||||
config <- xgb.config(bst)
|
config <- xgb.config(bst)
|
||||||
xgb.config(bst) <- config
|
xgb.config(bst) <- config
|
||||||
reloaded_config <- xgb.config(bst)
|
reloaded_config <- xgb.config(bst)
|
||||||
@ -451,22 +521,26 @@ test_that("strict_shape works", {
|
|||||||
y <- as.numeric(iris$Species) - 1
|
y <- as.numeric(iris$Species) - 1
|
||||||
X <- as.matrix(iris[, -5])
|
X <- as.matrix(iris[, -5])
|
||||||
|
|
||||||
bst <- xgboost(data = X, label = y,
|
bst <- xgboost(
|
||||||
max_depth = 2, nrounds = n_rounds,
|
data = X, label = y,
|
||||||
objective = "multi:softprob", num_class = 3, eval_metric = "merror")
|
max_depth = 2, nrounds = n_rounds, nthread = n_threads,
|
||||||
|
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
|
||||||
|
)
|
||||||
|
|
||||||
test_strict_shape(bst, X, 3)
|
test_strict_shape(bst, X, 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
test_agaricus <- function() {
|
test_agaricus <- function() {
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = "xgboost")
|
||||||
X <- agaricus.train$data
|
X <- agaricus.train$data
|
||||||
y <- agaricus.train$label
|
y <- agaricus.train$label
|
||||||
|
|
||||||
bst <- xgboost(data = X, label = y, max_depth = 2,
|
bst <- xgboost(
|
||||||
nrounds = n_rounds, objective = "binary:logistic",
|
data = X, label = y, max_depth = 2, nthread = n_threads,
|
||||||
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
|
nrounds = n_rounds, objective = "binary:logistic",
|
||||||
|
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
|
||||||
|
)
|
||||||
|
|
||||||
test_strict_shape(bst, X, 1)
|
test_strict_shape(bst, X, 1)
|
||||||
}
|
}
|
||||||
@ -481,8 +555,10 @@ test_that("'predict' accepts CSR data", {
|
|||||||
x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix")
|
x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix")
|
||||||
x_csr <- as(x_csc, "RsparseMatrix")
|
x_csr <- as(x_csc, "RsparseMatrix")
|
||||||
x_spv <- as(x_csc, "sparseVector")
|
x_spv <- as(x_csc, "sparseVector")
|
||||||
bst <- xgboost(data = X, label = y, objective = "binary:logistic",
|
bst <- xgboost(
|
||||||
nrounds = 5L, verbose = FALSE)
|
data = X, label = y, objective = "binary:logistic",
|
||||||
|
nrounds = 5L, verbose = FALSE, nthread = n_threads,
|
||||||
|
)
|
||||||
p_csc <- predict(bst, x_csc)
|
p_csc <- predict(bst, x_csc)
|
||||||
p_csr <- predict(bst, x_csr)
|
p_csr <- predict(bst, x_csr)
|
||||||
p_spv <- predict(bst, x_spv)
|
p_spv <- predict(bst, x_spv)
|
||||||
|
|||||||
@ -6,6 +6,8 @@ data(agaricus.test, package = 'xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
# add some label noise for early stopping tests
|
# add some label noise for early stopping tests
|
||||||
add.noise <- function(label, frac) {
|
add.noise <- function(label, frac) {
|
||||||
inoise <- sample(length(label), length(label) * frac)
|
inoise <- sample(length(label), length(label) * frac)
|
||||||
@ -15,15 +17,15 @@ add.noise <- function(label, frac) {
|
|||||||
set.seed(11)
|
set.seed(11)
|
||||||
ltrain <- add.noise(train$label, 0.2)
|
ltrain <- add.noise(train$label, 0.2)
|
||||||
ltest <- add.noise(test$label, 0.2)
|
ltest <- add.noise(test$label, 0.2)
|
||||||
dtrain <- xgb.DMatrix(train$data, label = ltrain)
|
dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
|
||||||
dtest <- xgb.DMatrix(test$data, label = ltest)
|
dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
|
||||||
watchlist <- list(train = dtrain, test = dtest)
|
watchlist <- list(train = dtrain, test = dtest)
|
||||||
|
|
||||||
|
|
||||||
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
|
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
|
||||||
|
|
||||||
param <- list(objective = "binary:logistic", eval_metric = "error",
|
param <- list(objective = "binary:logistic", eval_metric = "error",
|
||||||
max_depth = 2, nthread = 2)
|
max_depth = 2, nthread = n_threads)
|
||||||
|
|
||||||
|
|
||||||
test_that("cb.print.evaluation works as expected", {
|
test_that("cb.print.evaluation works as expected", {
|
||||||
@ -103,7 +105,7 @@ test_that("cb.evaluation.log works as expected", {
|
|||||||
|
|
||||||
|
|
||||||
param <- list(objective = "binary:logistic", eval_metric = "error",
|
param <- list(objective = "binary:logistic", eval_metric = "error",
|
||||||
max_depth = 4, nthread = 2)
|
max_depth = 4, nthread = n_threads)
|
||||||
|
|
||||||
test_that("can store evaluation_log without printing", {
|
test_that("can store evaluation_log without printing", {
|
||||||
expect_silent(
|
expect_silent(
|
||||||
@ -179,8 +181,10 @@ test_that("cb.save.model works as expected", {
|
|||||||
expect_true(file.exists('xgboost_01.json'))
|
expect_true(file.exists('xgboost_01.json'))
|
||||||
expect_true(file.exists('xgboost_02.json'))
|
expect_true(file.exists('xgboost_02.json'))
|
||||||
b1 <- xgb.load('xgboost_01.json')
|
b1 <- xgb.load('xgboost_01.json')
|
||||||
|
xgb.parameters(b1) <- list(nthread = 2)
|
||||||
expect_equal(xgb.ntree(b1), 1)
|
expect_equal(xgb.ntree(b1), 1)
|
||||||
b2 <- xgb.load('xgboost_02.json')
|
b2 <- xgb.load('xgboost_02.json')
|
||||||
|
xgb.parameters(b2) <- list(nthread = 2)
|
||||||
expect_equal(xgb.ntree(b2), 2)
|
expect_equal(xgb.ntree(b2), 2)
|
||||||
|
|
||||||
xgb.config(b2) <- xgb.config(bst)
|
xgb.config(b2) <- xgb.config(bst)
|
||||||
@ -267,7 +271,8 @@ test_that("early stopping works with titanic", {
|
|||||||
objective = "binary:logistic",
|
objective = "binary:logistic",
|
||||||
eval_metric = "auc",
|
eval_metric = "auc",
|
||||||
nrounds = 100,
|
nrounds = 100,
|
||||||
early_stopping_rounds = 3
|
early_stopping_rounds = 3,
|
||||||
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
|
|
||||||
expect_true(TRUE) # should not crash
|
expect_true(TRUE) # should not crash
|
||||||
@ -308,7 +313,7 @@ test_that("prediction in xgb.cv works", {
|
|||||||
|
|
||||||
test_that("prediction in xgb.cv works for gblinear too", {
|
test_that("prediction in xgb.cv works for gblinear too", {
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = 2)
|
p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = n_threads)
|
||||||
cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
|
cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
|
||||||
expect_false(is.null(cv$evaluation_log))
|
expect_false(is.null(cv$evaluation_log))
|
||||||
expect_false(is.null(cv$pred))
|
expect_false(is.null(cv$pred))
|
||||||
@ -341,7 +346,7 @@ test_that("prediction in xgb.cv for softprob works", {
|
|||||||
set.seed(11)
|
set.seed(11)
|
||||||
expect_warning(
|
expect_warning(
|
||||||
cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
|
cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
|
||||||
eta = 0.5, nrounds = 5, max_depth = 3, nthread = 2,
|
eta = 0.5, nrounds = 5, max_depth = 3, nthread = n_threads,
|
||||||
subsample = 0.8, gamma = 2, verbose = 0,
|
subsample = 0.8, gamma = 2, verbose = 0,
|
||||||
prediction = TRUE, objective = "multi:softprob", num_class = 3)
|
prediction = TRUE, objective = "multi:softprob", num_class = 3)
|
||||||
, NA)
|
, NA)
|
||||||
|
|||||||
@ -2,10 +2,16 @@ context('Test models with custom objective')
|
|||||||
|
|
||||||
set.seed(1994)
|
set.seed(1994)
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = 'xgboost')
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
dtest <- xgb.DMatrix(
|
||||||
|
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||||
|
)
|
||||||
watchlist <- list(eval = dtest, train = dtrain)
|
watchlist <- list(eval = dtest, train = dtrain)
|
||||||
|
|
||||||
logregobj <- function(preds, dtrain) {
|
logregobj <- function(preds, dtrain) {
|
||||||
@ -22,7 +28,7 @@ evalerror <- function(preds, dtrain) {
|
|||||||
return(list(metric = "error", value = err))
|
return(list(metric = "error", value = err))
|
||||||
}
|
}
|
||||||
|
|
||||||
param <- list(max_depth = 2, eta = 1, nthread = 2,
|
param <- list(max_depth = 2, eta = 1, nthread = n_threads,
|
||||||
objective = logregobj, eval_metric = evalerror)
|
objective = logregobj, eval_metric = evalerror)
|
||||||
num_round <- 2
|
num_round <- 2
|
||||||
|
|
||||||
@ -67,7 +73,7 @@ test_that("custom objective using DMatrix attr works", {
|
|||||||
test_that("custom objective with multi-class shape", {
|
test_that("custom objective with multi-class shape", {
|
||||||
data <- as.matrix(iris[, -5])
|
data <- as.matrix(iris[, -5])
|
||||||
label <- as.numeric(iris$Species) - 1
|
label <- as.numeric(iris$Species) - 1
|
||||||
dtrain <- xgb.DMatrix(data = data, label = label)
|
dtrain <- xgb.DMatrix(data = data, label = label, nthread = n_threads)
|
||||||
n_classes <- 3
|
n_classes <- 3
|
||||||
|
|
||||||
fake_softprob <- function(preds, dtrain) {
|
fake_softprob <- function(preds, dtrain) {
|
||||||
|
|||||||
@ -5,19 +5,21 @@ data(agaricus.test, package = "xgboost")
|
|||||||
test_data <- agaricus.test$data[1:100, ]
|
test_data <- agaricus.test$data[1:100, ]
|
||||||
test_label <- agaricus.test$label[1:100]
|
test_label <- agaricus.test$label[1:100]
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
test_that("xgb.DMatrix: basic construction", {
|
test_that("xgb.DMatrix: basic construction", {
|
||||||
# from sparse matrix
|
# from sparse matrix
|
||||||
dtest1 <- xgb.DMatrix(test_data, label = test_label)
|
dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||||
|
|
||||||
# from dense matrix
|
# from dense matrix
|
||||||
dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label)
|
dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label, nthread = n_threads)
|
||||||
expect_equal(getinfo(dtest1, "label"), getinfo(dtest2, "label"))
|
expect_equal(getinfo(dtest1, "label"), getinfo(dtest2, "label"))
|
||||||
expect_equal(dim(dtest1), dim(dtest2))
|
expect_equal(dim(dtest1), dim(dtest2))
|
||||||
|
|
||||||
# from dense integer matrix
|
# from dense integer matrix
|
||||||
int_data <- as.matrix(test_data)
|
int_data <- as.matrix(test_data)
|
||||||
storage.mode(int_data) <- "integer"
|
storage.mode(int_data) <- "integer"
|
||||||
dtest3 <- xgb.DMatrix(int_data, label = test_label)
|
dtest3 <- xgb.DMatrix(int_data, label = test_label, nthread = n_threads)
|
||||||
expect_equal(dim(dtest1), dim(dtest3))
|
expect_equal(dim(dtest1), dim(dtest3))
|
||||||
|
|
||||||
n_samples <- 100
|
n_samples <- 100
|
||||||
@ -29,15 +31,15 @@ test_that("xgb.DMatrix: basic construction", {
|
|||||||
X <- matrix(X, nrow = n_samples)
|
X <- matrix(X, nrow = n_samples)
|
||||||
y <- rbinom(n = n_samples, size = 1, prob = 1 / 2)
|
y <- rbinom(n = n_samples, size = 1, prob = 1 / 2)
|
||||||
|
|
||||||
fd <- xgb.DMatrix(X, label = y, missing = 1)
|
fd <- xgb.DMatrix(X, label = y, missing = 1, nthread = n_threads)
|
||||||
|
|
||||||
dgc <- as(X, "dgCMatrix")
|
dgc <- as(X, "dgCMatrix")
|
||||||
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0)
|
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0, nthread = n_threads)
|
||||||
|
|
||||||
dgr <- as(X, "dgRMatrix")
|
dgr <- as(X, "dgRMatrix")
|
||||||
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1)
|
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1, nthread = n_threads)
|
||||||
|
|
||||||
params <- list(tree_method = "hist")
|
params <- list(tree_method = "hist", nthread = n_threads)
|
||||||
bst_fd <- xgb.train(
|
bst_fd <- xgb.train(
|
||||||
params, nrounds = 8, fd, watchlist = list(train = fd)
|
params, nrounds = 8, fd, watchlist = list(train = fd)
|
||||||
)
|
)
|
||||||
@ -64,12 +66,12 @@ test_that("xgb.DMatrix: NA", {
|
|||||||
)
|
)
|
||||||
x[1, "x1"] <- NA
|
x[1, "x1"] <- NA
|
||||||
|
|
||||||
m <- xgb.DMatrix(x)
|
m <- xgb.DMatrix(x, nthread = n_threads)
|
||||||
xgb.DMatrix.save(m, "int.dmatrix")
|
xgb.DMatrix.save(m, "int.dmatrix")
|
||||||
|
|
||||||
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
|
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
|
||||||
colnames(x) <- c("x1", "x2")
|
colnames(x) <- c("x1", "x2")
|
||||||
m <- xgb.DMatrix(x)
|
m <- xgb.DMatrix(x, nthread = n_threads)
|
||||||
|
|
||||||
xgb.DMatrix.save(m, "float.dmatrix")
|
xgb.DMatrix.save(m, "float.dmatrix")
|
||||||
|
|
||||||
@ -94,7 +96,7 @@ test_that("xgb.DMatrix: NA", {
|
|||||||
|
|
||||||
test_that("xgb.DMatrix: saving, loading", {
|
test_that("xgb.DMatrix: saving, loading", {
|
||||||
# save to a local file
|
# save to a local file
|
||||||
dtest1 <- xgb.DMatrix(test_data, label = test_label)
|
dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||||
tmp_file <- tempfile('xgb.DMatrix_')
|
tmp_file <- tempfile('xgb.DMatrix_')
|
||||||
on.exit(unlink(tmp_file))
|
on.exit(unlink(tmp_file))
|
||||||
expect_true(xgb.DMatrix.save(dtest1, tmp_file))
|
expect_true(xgb.DMatrix.save(dtest1, tmp_file))
|
||||||
@ -109,13 +111,17 @@ test_that("xgb.DMatrix: saving, loading", {
|
|||||||
tmp_file <- tempfile(fileext = ".libsvm")
|
tmp_file <- tempfile(fileext = ".libsvm")
|
||||||
writeLines(tmp, tmp_file)
|
writeLines(tmp, tmp_file)
|
||||||
expect_true(file.exists(tmp_file))
|
expect_true(file.exists(tmp_file))
|
||||||
dtest4 <- xgb.DMatrix(paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE)
|
dtest4 <- xgb.DMatrix(
|
||||||
|
paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE, nthread = n_threads
|
||||||
|
)
|
||||||
expect_equal(dim(dtest4), c(3, 4))
|
expect_equal(dim(dtest4), c(3, 4))
|
||||||
expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0))
|
expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0))
|
||||||
|
|
||||||
# check that feature info is saved
|
# check that feature info is saved
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
|
data = agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
cnames <- colnames(dtrain)
|
cnames <- colnames(dtrain)
|
||||||
expect_equal(length(cnames), 126)
|
expect_equal(length(cnames), 126)
|
||||||
tmp_file <- tempfile('xgb.DMatrix_')
|
tmp_file <- tempfile('xgb.DMatrix_')
|
||||||
@ -129,7 +135,7 @@ test_that("xgb.DMatrix: saving, loading", {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.DMatrix: getinfo & setinfo", {
|
test_that("xgb.DMatrix: getinfo & setinfo", {
|
||||||
dtest <- xgb.DMatrix(test_data)
|
dtest <- xgb.DMatrix(test_data, nthread = n_threads)
|
||||||
expect_true(setinfo(dtest, 'label', test_label))
|
expect_true(setinfo(dtest, 'label', test_label))
|
||||||
labels <- getinfo(dtest, 'label')
|
labels <- getinfo(dtest, 'label')
|
||||||
expect_equal(test_label, getinfo(dtest, 'label'))
|
expect_equal(test_label, getinfo(dtest, 'label'))
|
||||||
@ -156,7 +162,7 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.DMatrix: slice, dim", {
|
test_that("xgb.DMatrix: slice, dim", {
|
||||||
dtest <- xgb.DMatrix(test_data, label = test_label)
|
dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||||
expect_equal(dim(dtest), dim(test_data))
|
expect_equal(dim(dtest), dim(test_data))
|
||||||
dsub1 <- slice(dtest, 1:42)
|
dsub1 <- slice(dtest, 1:42)
|
||||||
expect_equal(nrow(dsub1), 42)
|
expect_equal(nrow(dsub1), 42)
|
||||||
@ -171,16 +177,20 @@ test_that("xgb.DMatrix: slice, trailing empty rows", {
|
|||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
train_data <- agaricus.train$data
|
train_data <- agaricus.train$data
|
||||||
train_label <- agaricus.train$label
|
train_label <- agaricus.train$label
|
||||||
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
|
dtrain <- xgb.DMatrix(
|
||||||
|
data = train_data, label = train_label, nthread = n_threads
|
||||||
|
)
|
||||||
slice(dtrain, 6513L)
|
slice(dtrain, 6513L)
|
||||||
train_data[6513, ] <- 0
|
train_data[6513, ] <- 0
|
||||||
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
|
dtrain <- xgb.DMatrix(
|
||||||
|
data = train_data, label = train_label, nthread = n_threads
|
||||||
|
)
|
||||||
slice(dtrain, 6513L)
|
slice(dtrain, 6513L)
|
||||||
expect_equal(nrow(dtrain), 6513)
|
expect_equal(nrow(dtrain), 6513)
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.DMatrix: colnames", {
|
test_that("xgb.DMatrix: colnames", {
|
||||||
dtest <- xgb.DMatrix(test_data, label = test_label)
|
dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||||
expect_equal(colnames(dtest), colnames(test_data))
|
expect_equal(colnames(dtest), colnames(test_data))
|
||||||
expect_error(colnames(dtest) <- 'asdf')
|
expect_error(colnames(dtest) <- 'asdf')
|
||||||
new_names <- make.names(seq_len(ncol(test_data)))
|
new_names <- make.names(seq_len(ncol(test_data)))
|
||||||
@ -196,7 +206,7 @@ test_that("xgb.DMatrix: nrow is correct for a very sparse matrix", {
|
|||||||
x <- Matrix::rsparsematrix(nr, 100, density = 0.0005)
|
x <- Matrix::rsparsematrix(nr, 100, density = 0.0005)
|
||||||
# we want it very sparse, so that last rows are empty
|
# we want it very sparse, so that last rows are empty
|
||||||
expect_lt(max(x@i), nr)
|
expect_lt(max(x@i), nr)
|
||||||
dtest <- xgb.DMatrix(x)
|
dtest <- xgb.DMatrix(x, nthread = n_threads)
|
||||||
expect_equal(dim(dtest), dim(x))
|
expect_equal(dim(dtest), dim(x))
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -205,8 +215,8 @@ test_that("xgb.DMatrix: print", {
|
|||||||
|
|
||||||
# core DMatrix with just data and labels
|
# core DMatrix with just data and labels
|
||||||
dtrain <- xgb.DMatrix(
|
dtrain <- xgb.DMatrix(
|
||||||
data = agaricus.train$data
|
data = agaricus.train$data, label = agaricus.train$label,
|
||||||
, label = agaricus.train$label
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
txt <- capture.output({
|
txt <- capture.output({
|
||||||
print(dtrain)
|
print(dtrain)
|
||||||
@ -222,10 +232,11 @@ test_that("xgb.DMatrix: print", {
|
|||||||
|
|
||||||
# DMatrix with weights and base_margin
|
# DMatrix with weights and base_margin
|
||||||
dtrain <- xgb.DMatrix(
|
dtrain <- xgb.DMatrix(
|
||||||
data = agaricus.train$data
|
data = agaricus.train$data,
|
||||||
, label = agaricus.train$label
|
label = agaricus.train$label,
|
||||||
, weight = seq_along(agaricus.train$label)
|
weight = seq_along(agaricus.train$label),
|
||||||
, base_margin = agaricus.train$label
|
base_margin = agaricus.train$label,
|
||||||
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
txt <- capture.output({
|
txt <- capture.output({
|
||||||
print(dtrain)
|
print(dtrain)
|
||||||
@ -234,7 +245,8 @@ test_that("xgb.DMatrix: print", {
|
|||||||
|
|
||||||
# DMatrix with just features
|
# DMatrix with just features
|
||||||
dtrain <- xgb.DMatrix(
|
dtrain <- xgb.DMatrix(
|
||||||
data = agaricus.train$data
|
data = agaricus.train$data,
|
||||||
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
txt <- capture.output({
|
txt <- capture.output({
|
||||||
print(dtrain)
|
print(dtrain)
|
||||||
@ -245,7 +257,8 @@ test_that("xgb.DMatrix: print", {
|
|||||||
data_no_colnames <- agaricus.train$data
|
data_no_colnames <- agaricus.train$data
|
||||||
colnames(data_no_colnames) <- NULL
|
colnames(data_no_colnames) <- NULL
|
||||||
dtrain <- xgb.DMatrix(
|
dtrain <- xgb.DMatrix(
|
||||||
data = data_no_colnames
|
data = data_no_colnames,
|
||||||
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
txt <- capture.output({
|
txt <- capture.output({
|
||||||
print(dtrain)
|
print(dtrain)
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
context("feature weights")
|
context("feature weights")
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
test_that("training with feature weights works", {
|
test_that("training with feature weights works", {
|
||||||
nrows <- 1000
|
nrows <- 1000
|
||||||
ncols <- 9
|
ncols <- 9
|
||||||
@ -10,8 +12,12 @@ test_that("training with feature weights works", {
|
|||||||
|
|
||||||
test <- function(tm) {
|
test <- function(tm) {
|
||||||
names <- paste0("f", 1:ncols)
|
names <- paste0("f", 1:ncols)
|
||||||
xy <- xgb.DMatrix(data = x, label = y, feature_weights = weights)
|
xy <- xgb.DMatrix(
|
||||||
params <- list(colsample_bynode = 0.4, tree_method = tm, nthread = 1)
|
data = x, label = y, feature_weights = weights, nthread = n_threads
|
||||||
|
)
|
||||||
|
params <- list(
|
||||||
|
colsample_bynode = 0.4, tree_method = tm, nthread = n_threads
|
||||||
|
)
|
||||||
model <- xgb.train(params = params, data = xy, nrounds = 32)
|
model <- xgb.train(params = params, data = xy, nrounds = 32)
|
||||||
importance <- xgb.importance(model = model, feature_names = names)
|
importance <- xgb.importance(model = model, feature_names = names)
|
||||||
expect_equal(dim(importance), c(ncols, 4))
|
expect_equal(dim(importance), c(ncols, 4))
|
||||||
|
|||||||
@ -1,13 +1,19 @@
|
|||||||
context('Test generalized linear models')
|
context('Test generalized linear models')
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
test_that("gblinear works", {
|
test_that("gblinear works", {
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = 'xgboost')
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
dtest <- xgb.DMatrix(
|
||||||
|
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
|
||||||
param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
|
param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
|
||||||
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
|
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
|
||||||
watchlist <- list(eval = dtest, train = dtrain)
|
watchlist <- list(eval = dtest, train = dtrain)
|
||||||
|
|
||||||
n <- 5 # iterations
|
n <- 5 # iterations
|
||||||
@ -48,12 +54,16 @@ test_that("gblinear works", {
|
|||||||
test_that("gblinear early stopping works", {
|
test_that("gblinear early stopping works", {
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = 'xgboost')
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
dtest <- xgb.DMatrix(
|
||||||
|
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
|
||||||
param <- list(
|
param <- list(
|
||||||
objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
|
objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
|
||||||
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
|
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
|
||||||
updater = "coord_descent"
|
updater = "coord_descent"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -171,6 +171,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
|
|||||||
fit <- xgboost(
|
fit <- xgboost(
|
||||||
params = c(
|
params = c(
|
||||||
list(
|
list(
|
||||||
|
nthread = 2,
|
||||||
booster = booster,
|
booster = booster,
|
||||||
objective = "reg:squarederror",
|
objective = "reg:squarederror",
|
||||||
eval_metric = "rmse"),
|
eval_metric = "rmse"),
|
||||||
@ -257,7 +258,7 @@ test_that("xgb.Booster serializing as R object works", {
|
|||||||
.skip_if_vcd_not_available()
|
.skip_if_vcd_not_available()
|
||||||
saveRDS(bst.Tree, 'xgb.model.rds')
|
saveRDS(bst.Tree, 'xgb.model.rds')
|
||||||
bst <- readRDS('xgb.model.rds')
|
bst <- readRDS('xgb.model.rds')
|
||||||
dtrain <- xgb.DMatrix(sparse_matrix, label = label)
|
dtrain <- xgb.DMatrix(sparse_matrix, label = label, nthread = 2)
|
||||||
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
|
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
|
||||||
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
|
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
|
||||||
xgb.save(bst, 'xgb.model')
|
xgb.save(bst, 'xgb.model')
|
||||||
@ -363,7 +364,8 @@ test_that("xgb.importance works with and without feature names", {
|
|||||||
data = as.matrix(data.frame(x = c(0, 1))),
|
data = as.matrix(data.frame(x = c(0, 1))),
|
||||||
label = c(1, 2),
|
label = c(1, 2),
|
||||||
nrounds = 1,
|
nrounds = 1,
|
||||||
base_score = 0.5
|
base_score = 0.5,
|
||||||
|
nthread = 2
|
||||||
)
|
)
|
||||||
df <- xgb.model.dt.tree(model = m)
|
df <- xgb.model.dt.tree(model = m)
|
||||||
expect_equal(df$Feature, "Leaf")
|
expect_equal(df$Feature, "Leaf")
|
||||||
|
|||||||
@ -2,6 +2,8 @@ require(xgboost)
|
|||||||
|
|
||||||
context("interaction constraints")
|
context("interaction constraints")
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
set.seed(1024)
|
set.seed(1024)
|
||||||
x1 <- rnorm(1000, 1)
|
x1 <- rnorm(1000, 1)
|
||||||
x2 <- rnorm(1000, 1)
|
x2 <- rnorm(1000, 1)
|
||||||
@ -45,11 +47,18 @@ test_that("interaction constraints scientific representation", {
|
|||||||
d <- matrix(rexp(rows, rate = .1), nrow = rows, ncol = cols)
|
d <- matrix(rexp(rows, rate = .1), nrow = rows, ncol = cols)
|
||||||
y <- rnorm(rows)
|
y <- rnorm(rows)
|
||||||
|
|
||||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||||
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))
|
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))
|
||||||
|
|
||||||
with_inc <- xgb.train(data = dtrain, tree_method = 'hist',
|
with_inc <- xgb.train(
|
||||||
interaction_constraints = inc, nrounds = 10)
|
data = dtrain,
|
||||||
without_inc <- xgb.train(data = dtrain, tree_method = 'hist', nrounds = 10)
|
tree_method = 'hist',
|
||||||
|
interaction_constraints = inc,
|
||||||
|
nrounds = 10,
|
||||||
|
nthread = n_threads
|
||||||
|
)
|
||||||
|
without_inc <- xgb.train(
|
||||||
|
data = dtrain, tree_method = 'hist', nrounds = 10, nthread = n_threads
|
||||||
|
)
|
||||||
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
|
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
|
||||||
})
|
})
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
context('Test prediction of feature interactions')
|
context('Test prediction of feature interactions')
|
||||||
|
|
||||||
set.seed(123)
|
set.seed(123)
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
test_that("predict feature interactions works", {
|
test_that("predict feature interactions works", {
|
||||||
# simulate some binary data and a linear outcome with an interaction term
|
# simulate some binary data and a linear outcome with an interaction term
|
||||||
@ -19,8 +20,10 @@ test_that("predict feature interactions works", {
|
|||||||
|
|
||||||
y <- f_int(X)
|
y <- f_int(X)
|
||||||
|
|
||||||
dm <- xgb.DMatrix(X, label = y)
|
dm <- xgb.DMatrix(X, label = y, nthread = n_threads)
|
||||||
param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
|
param <- list(
|
||||||
|
eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = n_threads
|
||||||
|
)
|
||||||
b <- xgb.train(param, dm, 100)
|
b <- xgb.train(param, dm, 100)
|
||||||
|
|
||||||
pred <- predict(b, dm, outputmargin = TRUE)
|
pred <- predict(b, dm, outputmargin = TRUE)
|
||||||
@ -99,11 +102,13 @@ test_that("SHAP contribution values are not NAN", {
|
|||||||
verbose = 0,
|
verbose = 0,
|
||||||
params = list(
|
params = list(
|
||||||
objective = "reg:squarederror",
|
objective = "reg:squarederror",
|
||||||
eval_metric = "rmse"),
|
eval_metric = "rmse",
|
||||||
|
nthread = n_threads
|
||||||
|
),
|
||||||
data = as.matrix(subset(d, fold == 2)[, ivs]),
|
data = as.matrix(subset(d, fold == 2)[, ivs]),
|
||||||
label = subset(d, fold == 2)$y,
|
label = subset(d, fold == 2)$y,
|
||||||
nthread = 1,
|
nrounds = 3
|
||||||
nrounds = 3)
|
)
|
||||||
|
|
||||||
shaps <- as.data.frame(predict(fit,
|
shaps <- as.data.frame(predict(fit,
|
||||||
newdata = as.matrix(subset(d, fold == 1)[, ivs]),
|
newdata = as.matrix(subset(d, fold == 1)[, ivs]),
|
||||||
@ -116,8 +121,12 @@ test_that("SHAP contribution values are not NAN", {
|
|||||||
|
|
||||||
|
|
||||||
test_that("multiclass feature interactions work", {
|
test_that("multiclass feature interactions work", {
|
||||||
dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
|
dm <- xgb.DMatrix(
|
||||||
param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
|
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
|
||||||
|
)
|
||||||
|
param <- list(
|
||||||
|
eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3, nthread = n_threads
|
||||||
|
)
|
||||||
b <- xgb.train(param, dm, 40)
|
b <- xgb.train(param, dm, 40)
|
||||||
pred <- t(
|
pred <- t(
|
||||||
array(
|
array(
|
||||||
@ -166,6 +175,7 @@ test_that("SHAP single sample works", {
|
|||||||
max_depth = 2,
|
max_depth = 2,
|
||||||
nrounds = 4,
|
nrounds = 4,
|
||||||
objective = "binary:logistic",
|
objective = "binary:logistic",
|
||||||
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
|
|
||||||
predt <- predict(
|
predt <- predict(
|
||||||
|
|||||||
@ -9,7 +9,8 @@ test_that("load/save raw works", {
|
|||||||
nrounds <- 8
|
nrounds <- 8
|
||||||
booster <- xgboost(
|
booster <- xgboost(
|
||||||
data = train$data, label = train$label,
|
data = train$data, label = train$label,
|
||||||
nrounds = nrounds, objective = "binary:logistic"
|
nrounds = nrounds, objective = "binary:logistic",
|
||||||
|
nthread = 2
|
||||||
)
|
)
|
||||||
|
|
||||||
json_bytes <- xgb.save.raw(booster, raw_format = "json")
|
json_bytes <- xgb.save.raw(booster, raw_format = "json")
|
||||||
|
|||||||
@ -9,20 +9,20 @@ metadata <- list(
|
|||||||
kClasses = 3
|
kClasses = 3
|
||||||
)
|
)
|
||||||
|
|
||||||
run_model_param_check <- function (config) {
|
run_model_param_check <- function(config) {
|
||||||
testthat::expect_equal(config$learner$learner_model_param$num_feature, '4')
|
testthat::expect_equal(config$learner$learner_model_param$num_feature, '4')
|
||||||
testthat::expect_equal(config$learner$learner_train_param$booster, 'gbtree')
|
testthat::expect_equal(config$learner$learner_train_param$booster, 'gbtree')
|
||||||
}
|
}
|
||||||
|
|
||||||
get_num_tree <- function (booster) {
|
get_num_tree <- function(booster) {
|
||||||
dump <- xgb.dump(booster)
|
dump <- xgb.dump(booster)
|
||||||
m <- regexec('booster\\[[0-9]+\\]', dump, perl = TRUE)
|
m <- regexec('booster\\[[0-9]+\\]', dump, perl = TRUE)
|
||||||
m <- regmatches(dump, m)
|
m <- regmatches(dump, m)
|
||||||
num_tree <- Reduce('+', lapply(m, length))
|
num_tree <- Reduce('+', lapply(m, length))
|
||||||
return (num_tree)
|
return(num_tree)
|
||||||
}
|
}
|
||||||
|
|
||||||
run_booster_check <- function (booster, name) {
|
run_booster_check <- function(booster, name) {
|
||||||
# If given a handle, we need to call xgb.Booster.complete() prior to using xgb.config().
|
# If given a handle, we need to call xgb.Booster.complete() prior to using xgb.config().
|
||||||
if (inherits(booster, "xgb.Booster") && xgboost:::is.null.handle(booster$handle)) {
|
if (inherits(booster, "xgb.Booster") && xgboost:::is.null.handle(booster$handle)) {
|
||||||
booster <- xgb.Booster.complete(booster)
|
booster <- xgb.Booster.complete(booster)
|
||||||
@ -66,9 +66,9 @@ test_that("Models from previous versions of XGBoost can be loaded", {
|
|||||||
unzip(zipfile, exdir = extract_dir, overwrite = TRUE)
|
unzip(zipfile, exdir = extract_dir, overwrite = TRUE)
|
||||||
model_dir <- file.path(extract_dir, 'models')
|
model_dir <- file.path(extract_dir, 'models')
|
||||||
|
|
||||||
pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4))
|
pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4), nthread = 2)
|
||||||
|
|
||||||
lapply(list.files(model_dir), function (x) {
|
lapply(list.files(model_dir), function(x) {
|
||||||
model_file <- file.path(model_dir, x)
|
model_file <- file.path(model_dir, x)
|
||||||
m <- regexec("xgboost-([0-9\\.]+)\\.([a-z]+)\\.[a-z]+", model_file, perl = TRUE)
|
m <- regexec("xgboost-([0-9\\.]+)\\.([a-z]+)\\.[a-z]+", model_file, perl = TRUE)
|
||||||
m <- regmatches(model_file, m)[[1]]
|
m <- regmatches(model_file, m)[[1]]
|
||||||
@ -87,6 +87,7 @@ test_that("Models from previous versions of XGBoost can be loaded", {
|
|||||||
booster <- readRDS(model_file)
|
booster <- readRDS(model_file)
|
||||||
} else {
|
} else {
|
||||||
booster <- xgb.load(model_file)
|
booster <- xgb.load(model_file)
|
||||||
|
xgb.parameters(booster) <- list(nthread = 2)
|
||||||
}
|
}
|
||||||
predict(booster, newdata = pred_data)
|
predict(booster, newdata = pred_data)
|
||||||
run_booster_check(booster, name)
|
run_booster_check(booster, name)
|
||||||
|
|||||||
@ -3,8 +3,12 @@ context('Test model params and call are exposed to R')
|
|||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = 'xgboost')
|
||||||
|
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
agaricus.train$data, label = agaricus.train$label, nthread = 2
|
||||||
|
)
|
||||||
|
dtest <- xgb.DMatrix(
|
||||||
|
agaricus.test$data, label = agaricus.test$label, nthread = 2
|
||||||
|
)
|
||||||
|
|
||||||
bst <- xgboost(data = dtrain,
|
bst <- xgboost(data = dtrain,
|
||||||
max_depth = 2,
|
max_depth = 2,
|
||||||
|
|||||||
@ -4,8 +4,10 @@ set.seed(1994)
|
|||||||
|
|
||||||
test_that("Poisson regression works", {
|
test_that("Poisson regression works", {
|
||||||
data(mtcars)
|
data(mtcars)
|
||||||
bst <- xgboost(data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
|
bst <- xgboost(
|
||||||
objective = 'count:poisson', nrounds = 10, verbose = 0)
|
data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
|
||||||
|
objective = 'count:poisson', nrounds = 10, verbose = 0, nthread = 2
|
||||||
|
)
|
||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
pred <- predict(bst, as.matrix(mtcars[, -11]))
|
pred <- predict(bst, as.matrix(mtcars[, -11]))
|
||||||
expect_equal(length(pred), 32)
|
expect_equal(length(pred), 32)
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
context('Learning to rank')
|
context('Learning to rank')
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
test_that('Test ranking with unweighted data', {
|
test_that('Test ranking with unweighted data', {
|
||||||
X <- Matrix::sparseMatrix(
|
X <- Matrix::sparseMatrix(
|
||||||
i = c(2, 3, 7, 9, 12, 15, 17, 18)
|
i = c(2, 3, 7, 9, 12, 15, 17, 18)
|
||||||
@ -9,10 +11,10 @@ test_that('Test ranking with unweighted data', {
|
|||||||
)
|
)
|
||||||
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
|
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
|
||||||
group <- c(5, 5, 5, 5)
|
group <- c(5, 5, 5, 5)
|
||||||
dtrain <- xgb.DMatrix(X, label = y, group = group)
|
dtrain <- xgb.DMatrix(X, label = y, group = group, nthread = n_threads)
|
||||||
|
|
||||||
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
|
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
|
||||||
eval_metric = 'auc', eval_metric = 'aucpr')
|
eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
|
||||||
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
||||||
# Check if the metric is monotone increasing
|
# Check if the metric is monotone increasing
|
||||||
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
||||||
@ -29,10 +31,14 @@ test_that('Test ranking with weighted data', {
|
|||||||
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
|
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
|
||||||
group <- c(5, 5, 5, 5)
|
group <- c(5, 5, 5, 5)
|
||||||
weight <- c(1.0, 2.0, 3.0, 4.0)
|
weight <- c(1.0, 2.0, 3.0, 4.0)
|
||||||
dtrain <- xgb.DMatrix(X, label = y, group = group, weight = weight)
|
dtrain <- xgb.DMatrix(
|
||||||
|
X, label = y, group = group, weight = weight, nthread = n_threads
|
||||||
|
)
|
||||||
|
|
||||||
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
|
params <- list(
|
||||||
eval_metric = 'auc', eval_metric = 'aucpr')
|
eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
|
||||||
|
eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
|
||||||
|
)
|
||||||
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
||||||
# Check if the metric is monotone increasing
|
# Check if the metric is monotone increasing
|
||||||
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
||||||
@ -41,7 +47,7 @@ test_that('Test ranking with weighted data', {
|
|||||||
pred <- predict(bst, newdata = dtrain, ntreelimit = i)
|
pred <- predict(bst, newdata = dtrain, ntreelimit = i)
|
||||||
# is_sorted[i]: is i-th group correctly sorted by the ranking predictor?
|
# is_sorted[i]: is i-th group correctly sorted by the ranking predictor?
|
||||||
is_sorted <- lapply(seq(1, 20, by = 5),
|
is_sorted <- lapply(seq(1, 20, by = 5),
|
||||||
function (k) {
|
function(k) {
|
||||||
ind <- order(-pred[k:(k + 4)])
|
ind <- order(-pred[k:(k + 4)])
|
||||||
z <- y[ind + (k - 1)]
|
z <- y[ind + (k - 1)]
|
||||||
all(diff(z) <= 0) # Check if z is monotone decreasing
|
all(diff(z) <= 0) # Check if z is monotone decreasing
|
||||||
|
|||||||
@ -16,6 +16,7 @@ test_that("Can save and load models with Unicode paths", {
|
|||||||
path <- file.path(tmpdir, x)
|
path <- file.path(tmpdir, x)
|
||||||
xgb.save(bst, path)
|
xgb.save(bst, path)
|
||||||
bst2 <- xgb.load(path)
|
bst2 <- xgb.load(path)
|
||||||
|
xgb.parameters(bst2) <- list(nthread = 2)
|
||||||
expect_equal(predict(bst, test$data), predict(bst2, test$data))
|
expect_equal(predict(bst, test$data), predict(bst2, test$data))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@ -2,8 +2,15 @@ context("update trees in an existing model")
|
|||||||
|
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = 'xgboost')
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
n_threads <- 1
|
||||||
|
|
||||||
|
dtrain <- xgb.DMatrix(
|
||||||
|
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
dtest <- xgb.DMatrix(
|
||||||
|
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
|
||||||
# Disable flaky tests for 32-bit Windows.
|
# Disable flaky tests for 32-bit Windows.
|
||||||
# See https://github.com/dmlc/xgboost/issues/3720
|
# See https://github.com/dmlc/xgboost/issues/3720
|
||||||
@ -14,7 +21,7 @@ test_that("updating the model works", {
|
|||||||
|
|
||||||
# no-subsampling
|
# no-subsampling
|
||||||
p1 <- list(
|
p1 <- list(
|
||||||
objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2,
|
objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = n_threads,
|
||||||
updater = "grow_colmaker,prune"
|
updater = "grow_colmaker,prune"
|
||||||
)
|
)
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
@ -86,9 +93,11 @@ test_that("updating the model works", {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test_that("updating works for multiclass & multitree", {
|
test_that("updating works for multiclass & multitree", {
|
||||||
dtr <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
|
dtr <- xgb.DMatrix(
|
||||||
|
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
|
||||||
|
)
|
||||||
watchlist <- list(train = dtr)
|
watchlist <- list(train = dtr)
|
||||||
p0 <- list(max_depth = 2, eta = 0.5, nthread = 2, subsample = 0.6,
|
p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
|
||||||
objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
|
objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
|
||||||
base_score = 0)
|
base_score = 0)
|
||||||
set.seed(121)
|
set.seed(121)
|
||||||
|
|||||||
@ -31,6 +31,8 @@ require(data.table)
|
|||||||
if (!require('vcd')) {
|
if (!require('vcd')) {
|
||||||
install.packages('vcd')
|
install.packages('vcd')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data.table::setDTthreads(2)
|
||||||
```
|
```
|
||||||
|
|
||||||
> **VCD** package is used for one of its embedded dataset only.
|
> **VCD** package is used for one of its embedded dataset only.
|
||||||
@ -297,23 +299,25 @@ test <- agaricus.test
|
|||||||
|
|
||||||
#Random Forest - 1000 trees
|
#Random Forest - 1000 trees
|
||||||
bst <- xgboost(
|
bst <- xgboost(
|
||||||
data = train$data
|
data = train$data,
|
||||||
, label = train$label
|
label = train$label,
|
||||||
, max_depth = 4
|
max_depth = 4,
|
||||||
, num_parallel_tree = 1000
|
num_parallel_tree = 1000,
|
||||||
, subsample = 0.5
|
subsample = 0.5,
|
||||||
, colsample_bytree = 0.5
|
colsample_bytree = 0.5,
|
||||||
, nrounds = 1
|
nrounds = 1,
|
||||||
, objective = "binary:logistic"
|
objective = "binary:logistic",
|
||||||
|
nthread = 2
|
||||||
)
|
)
|
||||||
|
|
||||||
#Boosting - 3 rounds
|
#Boosting - 3 rounds
|
||||||
bst <- xgboost(
|
bst <- xgboost(
|
||||||
data = train$data
|
data = train$data,
|
||||||
, label = train$label
|
label = train$label,
|
||||||
, max_depth = 4
|
max_depth = 4,
|
||||||
, nrounds = 3
|
nrounds = 3,
|
||||||
, objective = "binary:logistic"
|
objective = "binary:logistic",
|
||||||
|
nthread = 2
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@ -86,9 +86,10 @@ data(agaricus.test, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
|
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||||
nrounds = 2, objective = "binary:logistic")
|
nrounds = 2, objective = "binary:logistic", nthread = 2)
|
||||||
xgb.save(bst, 'model.save')
|
xgb.save(bst, 'model.save')
|
||||||
bst = xgb.load('model.save')
|
bst = xgb.load('model.save')
|
||||||
|
xgb.parameters(bst) <- list(nthread = 2)
|
||||||
pred <- predict(bst, test$data)
|
pred <- predict(bst, test$data)
|
||||||
@
|
@
|
||||||
|
|
||||||
@ -127,7 +128,7 @@ training from initial prediction value, weighted training instance.
|
|||||||
|
|
||||||
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
|
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
|
||||||
<<xgb.DMatrix>>=
|
<<xgb.DMatrix>>=
|
||||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)
|
||||||
class(dtrain)
|
class(dtrain)
|
||||||
head(getinfo(dtrain,'label'))
|
head(getinfo(dtrain,'label'))
|
||||||
@
|
@
|
||||||
@ -161,9 +162,9 @@ evalerror <- function(preds, dtrain) {
|
|||||||
return(list(metric = "MSE", value = err))
|
return(list(metric = "MSE", value = err))
|
||||||
}
|
}
|
||||||
|
|
||||||
dtest <- xgb.DMatrix(test$data, label = test$label)
|
dtest <- xgb.DMatrix(test$data, label = test$label, nthread = 2)
|
||||||
watchlist <- list(eval = dtest, train = dtrain)
|
watchlist <- list(eval = dtest, train = dtrain)
|
||||||
param <- list(max_depth = 2, eta = 1)
|
param <- list(max_depth = 2, eta = 1, nthread = 2)
|
||||||
|
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
|
||||||
@
|
@
|
||||||
|
|||||||
@ -173,13 +173,13 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**
|
|||||||
|
|
||||||
```{r trainingDense, message=F, warning=F}
|
```{r trainingDense, message=F, warning=F}
|
||||||
bstDense <- xgboost(
|
bstDense <- xgboost(
|
||||||
data = as.matrix(train$data)
|
data = as.matrix(train$data),
|
||||||
, label = train$label
|
label = train$label,
|
||||||
, max_depth = 2
|
max_depth = 2,
|
||||||
, eta = 1
|
eta = 1,
|
||||||
, nthread = 2
|
nthread = 2,
|
||||||
, nrounds = 2
|
nrounds = 2,
|
||||||
, objective = "binary:logistic"
|
objective = "binary:logistic"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -188,14 +188,14 @@ bstDense <- xgboost(
|
|||||||
**XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.
|
**XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.
|
||||||
|
|
||||||
```{r trainingDmatrix, message=F, warning=F}
|
```{r trainingDmatrix, message=F, warning=F}
|
||||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
|
||||||
bstDMatrix <- xgboost(
|
bstDMatrix <- xgboost(
|
||||||
data = dtrain
|
data = dtrain,
|
||||||
, max_depth = 2
|
max_depth = 2,
|
||||||
, eta = 1
|
eta = 1,
|
||||||
, nthread = 2
|
nthread = 2,
|
||||||
, nrounds = 2
|
nrounds = 2,
|
||||||
, objective = "binary:logistic"
|
objective = "binary:logistic"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -314,8 +314,8 @@ Most of the features below have been implemented to help you to improve your mod
|
|||||||
For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.
|
For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.
|
||||||
|
|
||||||
```{r DMatrix, message=F, warning=F}
|
```{r DMatrix, message=F, warning=F}
|
||||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
|
||||||
dtest <- xgb.DMatrix(data = test$data, label = test$label)
|
dtest <- xgb.DMatrix(data = test$data, label = test$label, nthread = 2)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Measure learning progress with xgb.train
|
### Measure learning progress with xgb.train
|
||||||
@ -476,6 +476,7 @@ An interesting test to see how identical our saved model is to the original one
|
|||||||
```{r loadModel, message=F, warning=F}
|
```{r loadModel, message=F, warning=F}
|
||||||
# load binary model to R
|
# load binary model to R
|
||||||
bst2 <- xgb.load("xgboost.model")
|
bst2 <- xgb.load("xgboost.model")
|
||||||
|
xgb.parameters(bst2) <- list(nthread = 2)
|
||||||
pred2 <- predict(bst2, test$data)
|
pred2 <- predict(bst2, test$data)
|
||||||
|
|
||||||
# And now the test
|
# And now the test
|
||||||
@ -500,6 +501,7 @@ print(class(rawVec))
|
|||||||
|
|
||||||
# load binary model to R
|
# load binary model to R
|
||||||
bst3 <- xgb.load(rawVec)
|
bst3 <- xgb.load(rawVec)
|
||||||
|
xgb.parameters(bst3) <- list(nthread = 2)
|
||||||
pred3 <- predict(bst3, test$data)
|
pred3 <- predict(bst3, test$data)
|
||||||
|
|
||||||
# pred2 should be identical to pred
|
# pred2 should be identical to pred
|
||||||
|
|||||||
@ -175,7 +175,7 @@ bst_preds == bst_from_json_preds
|
|||||||
|
|
||||||
None are exactly equal again. What is going on here? Well, since we are using the value `1` in the calculations, we have introduced a double into the calculation. Because of this, all float values are promoted to 64-bit doubles and the 64-bit version of the exponentiation operator `exp` is also used. On the other hand, xgboost uses the 32-bit version of the exponentiation operator in its [sigmoid function](https://github.com/dmlc/xgboost/blob/54980b8959680a0da06a3fc0ec776e47c8cbb0a1/src/common/math.h#L25-L27).
|
None are exactly equal again. What is going on here? Well, since we are using the value `1` in the calculations, we have introduced a double into the calculation. Because of this, all float values are promoted to 64-bit doubles and the 64-bit version of the exponentiation operator `exp` is also used. On the other hand, xgboost uses the 32-bit version of the exponentiation operator in its [sigmoid function](https://github.com/dmlc/xgboost/blob/54980b8959680a0da06a3fc0ec776e47c8cbb0a1/src/common/math.h#L25-L27).
|
||||||
|
|
||||||
How do we fix this? We have to ensure we use the correct data types everywhere and the correct operators. If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.
|
How do we fix this? We have to ensure we use the correct data types everywhere and the correct operators. If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.
|
||||||
```{r}
|
```{r}
|
||||||
# calculate the predictions casting doubles to floats
|
# calculate the predictions casting doubles to floats
|
||||||
bst_from_json_preds <- ifelse(
|
bst_from_json_preds <- ifelse(
|
||||||
|
|||||||
@ -1,16 +1,17 @@
|
|||||||
function (run_doxygen)
|
function(run_doxygen)
|
||||||
find_package(Doxygen REQUIRED)
|
find_package(Doxygen REQUIRED)
|
||||||
|
|
||||||
if (NOT DOXYGEN_DOT_FOUND)
|
if(NOT DOXYGEN_DOT_FOUND)
|
||||||
message(FATAL_ERROR "Command `dot` not found. Please install graphviz.")
|
message(FATAL_ERROR "Command `dot` not found. Please install graphviz.")
|
||||||
endif (NOT DOXYGEN_DOT_FOUND)
|
endif()
|
||||||
|
|
||||||
configure_file(
|
configure_file(
|
||||||
${xgboost_SOURCE_DIR}/doc/Doxyfile.in
|
${xgboost_SOURCE_DIR}/doc/Doxyfile.in
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
|
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
|
||||||
add_custom_target( doc_doxygen ALL
|
add_custom_target(
|
||||||
|
doc_doxygen ALL
|
||||||
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
|
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
|
||||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||||
COMMENT "Generate C APIs documentation."
|
COMMENT "Generate C APIs documentation."
|
||||||
VERBATIM)
|
VERBATIM)
|
||||||
endfunction (run_doxygen)
|
endfunction()
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
function (find_prefetch_intrinsics)
|
function(find_prefetch_intrinsics)
|
||||||
include(CheckCXXSourceCompiles)
|
include(CheckCXXSourceCompiles)
|
||||||
check_cxx_source_compiles("
|
check_cxx_source_compiles("
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
@ -19,4 +19,4 @@ function (find_prefetch_intrinsics)
|
|||||||
" XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
" XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
||||||
set(XGBOOST_MM_PREFETCH_PRESENT ${XGBOOST_MM_PREFETCH_PRESENT} PARENT_SCOPE)
|
set(XGBOOST_MM_PREFETCH_PRESENT ${XGBOOST_MM_PREFETCH_PRESENT} PARENT_SCOPE)
|
||||||
set(XGBOOST_BUILTIN_PREFETCH_PRESENT ${XGBOOST_BUILTIN_PREFETCH_PRESENT} PARENT_SCOPE)
|
set(XGBOOST_BUILTIN_PREFETCH_PRESENT ${XGBOOST_BUILTIN_PREFETCH_PRESENT} PARENT_SCOPE)
|
||||||
endfunction (find_prefetch_intrinsics)
|
endfunction()
|
||||||
|
|||||||
@ -12,9 +12,9 @@ macro(enable_sanitizer sanitizer)
|
|||||||
elseif(${sanitizer} MATCHES "thread")
|
elseif(${sanitizer} MATCHES "thread")
|
||||||
find_package(TSan)
|
find_package(TSan)
|
||||||
set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=thread")
|
set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=thread")
|
||||||
if (TSan_FOUND)
|
if(TSan_FOUND)
|
||||||
link_libraries(${TSan_LIBRARY})
|
link_libraries(${TSan_LIBRARY})
|
||||||
endif (TSan_FOUND)
|
endif()
|
||||||
|
|
||||||
elseif(${sanitizer} MATCHES "leak")
|
elseif(${sanitizer} MATCHES "leak")
|
||||||
find_package(LSan)
|
find_package(LSan)
|
||||||
@ -33,16 +33,16 @@ macro(enable_sanitizers SANITIZERS)
|
|||||||
# Check sanitizers compatibility.
|
# Check sanitizers compatibility.
|
||||||
# Idealy, we should use if(san IN_LIST SANITIZERS) ... endif()
|
# Idealy, we should use if(san IN_LIST SANITIZERS) ... endif()
|
||||||
# But I haven't figure out how to make it work.
|
# But I haven't figure out how to make it work.
|
||||||
foreach ( _san ${SANITIZERS} )
|
foreach( _san ${SANITIZERS} )
|
||||||
string(TOLOWER ${_san} _san)
|
string(TOLOWER ${_san} _san)
|
||||||
if (_san MATCHES "thread")
|
if(_san MATCHES "thread")
|
||||||
if (${_use_other_sanitizers})
|
if(${_use_other_sanitizers})
|
||||||
message(FATAL_ERROR
|
message(FATAL_ERROR
|
||||||
"thread sanitizer is not compatible with ${_san} sanitizer.")
|
"thread sanitizer is not compatible with ${_san} sanitizer.")
|
||||||
endif()
|
endif()
|
||||||
set(_use_thread_sanitizer 1)
|
set(_use_thread_sanitizer 1)
|
||||||
else ()
|
else()
|
||||||
if (${_use_thread_sanitizer})
|
if(${_use_thread_sanitizer})
|
||||||
message(FATAL_ERROR
|
message(FATAL_ERROR
|
||||||
"${_san} sanitizer is not compatible with thread sanitizer.")
|
"${_san} sanitizer is not compatible with thread sanitizer.")
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@ -11,7 +11,7 @@ function(auto_source_group SOURCES)
|
|||||||
|
|
||||||
source_group("${GROUP}" FILES "${FILE}")
|
source_group("${GROUP}" FILES "${FILE}")
|
||||||
endforeach()
|
endforeach()
|
||||||
endfunction(auto_source_group)
|
endfunction()
|
||||||
|
|
||||||
# Force static runtime for MSVC
|
# Force static runtime for MSVC
|
||||||
function(msvc_use_static_runtime)
|
function(msvc_use_static_runtime)
|
||||||
@ -50,7 +50,7 @@ function(msvc_use_static_runtime)
|
|||||||
endif()
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
endif()
|
endif()
|
||||||
endfunction(msvc_use_static_runtime)
|
endfunction()
|
||||||
|
|
||||||
# Set output directory of target, ignoring debug or release
|
# Set output directory of target, ignoring debug or release
|
||||||
function(set_output_directory target dir)
|
function(set_output_directory target dir)
|
||||||
@ -70,17 +70,17 @@ function(set_output_directory target dir)
|
|||||||
ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${dir}
|
ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${dir}
|
||||||
ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO ${dir}
|
ARCHIVE_OUTPUT_DIRECTORY_RELWITHDEBINFO ${dir}
|
||||||
ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL ${dir})
|
ARCHIVE_OUTPUT_DIRECTORY_MINSIZEREL ${dir})
|
||||||
endfunction(set_output_directory)
|
endfunction()
|
||||||
|
|
||||||
# Set a default build type to release if none was specified
|
# Set a default build type to release if none was specified
|
||||||
function(set_default_configuration_release)
|
function(set_default_configuration_release)
|
||||||
if(CMAKE_CONFIGURATION_TYPES STREQUAL "Debug;Release;MinSizeRel;RelWithDebInfo") # multiconfig generator?
|
if(CMAKE_CONFIGURATION_TYPES STREQUAL "Debug;Release;MinSizeRel;RelWithDebInfo") # multiconfig generator?
|
||||||
set(CMAKE_CONFIGURATION_TYPES Release CACHE STRING "" FORCE)
|
set(CMAKE_CONFIGURATION_TYPES Release CACHE STRING "" FORCE)
|
||||||
elseif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
elseif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||||
message(STATUS "Setting build type to 'Release' as none was specified.")
|
message(STATUS "Setting build type to 'Release' as none was specified.")
|
||||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE )
|
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
|
||||||
endif()
|
endif()
|
||||||
endfunction(set_default_configuration_release)
|
endfunction()
|
||||||
|
|
||||||
# Generate nvcc compiler flags given a list of architectures
|
# Generate nvcc compiler flags given a list of architectures
|
||||||
# Also generates PTX for the most recent architecture for forwards compatibility
|
# Also generates PTX for the most recent architecture for forwards compatibility
|
||||||
@ -90,9 +90,9 @@ function(format_gencode_flags flags out)
|
|||||||
endif()
|
endif()
|
||||||
# Set up architecture flags
|
# Set up architecture flags
|
||||||
if(NOT flags)
|
if(NOT flags)
|
||||||
if (CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
|
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
|
||||||
set(flags "50;60;70;80;90")
|
set(flags "50;60;70;80;90")
|
||||||
elseif (CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
|
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
|
||||||
set(flags "50;60;70;80")
|
set(flags "50;60;70;80")
|
||||||
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
|
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
|
||||||
set(flags "35;50;60;70")
|
set(flags "35;50;60;70")
|
||||||
@ -103,7 +103,7 @@ function(format_gencode_flags flags out)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
|
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
|
||||||
cmake_policy(SET CMP0104 NEW)
|
cmake_policy(SET CMP0104 NEW)
|
||||||
list(GET flags -1 latest_arch)
|
list(GET flags -1 latest_arch)
|
||||||
list(TRANSFORM flags APPEND "-real")
|
list(TRANSFORM flags APPEND "-real")
|
||||||
@ -121,8 +121,8 @@ function(format_gencode_flags flags out)
|
|||||||
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=compute_${ver};")
|
set(${out} "${${out}}--generate-code=arch=compute_${ver},code=compute_${ver};")
|
||||||
set(${out} "${${out}}" PARENT_SCOPE)
|
set(${out} "${${out}}" PARENT_SCOPE)
|
||||||
message(STATUS "CUDA GEN_CODE: ${GEN_CODE}")
|
message(STATUS "CUDA GEN_CODE: ${GEN_CODE}")
|
||||||
endif (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
|
endif()
|
||||||
endfunction(format_gencode_flags flags)
|
endfunction()
|
||||||
|
|
||||||
# Set CUDA related flags to target. Must be used after code `format_gencode_flags`.
|
# Set CUDA related flags to target. Must be used after code `format_gencode_flags`.
|
||||||
function(xgboost_set_cuda_flags target)
|
function(xgboost_set_cuda_flags target)
|
||||||
@ -133,35 +133,35 @@ function(xgboost_set_cuda_flags target)
|
|||||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
|
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=${OpenMP_CXX_FLAGS}>
|
||||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)
|
$<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all>)
|
||||||
|
|
||||||
if (USE_PER_THREAD_DEFAULT_STREAM)
|
if(USE_PER_THREAD_DEFAULT_STREAM)
|
||||||
target_compile_options(${target} PRIVATE
|
target_compile_options(${target} PRIVATE
|
||||||
$<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread>)
|
$<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread>)
|
||||||
endif (USE_PER_THREAD_DEFAULT_STREAM)
|
endif()
|
||||||
|
|
||||||
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
|
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
|
||||||
set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
|
set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES})
|
||||||
endif (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
|
endif()
|
||||||
|
|
||||||
if (FORCE_COLORED_OUTPUT)
|
if(FORCE_COLORED_OUTPUT)
|
||||||
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
|
if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
|
||||||
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
|
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
|
||||||
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
|
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
|
||||||
target_compile_options(${target} PRIVATE
|
target_compile_options(${target} PRIVATE
|
||||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fdiagnostics-color=always>)
|
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fdiagnostics-color=always>)
|
||||||
endif()
|
endif()
|
||||||
endif (FORCE_COLORED_OUTPUT)
|
endif()
|
||||||
|
|
||||||
if (USE_DEVICE_DEBUG)
|
if(USE_DEVICE_DEBUG)
|
||||||
target_compile_options(${target} PRIVATE
|
target_compile_options(${target} PRIVATE
|
||||||
$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-G;-src-in-ptx>)
|
$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-G;-src-in-ptx>)
|
||||||
else (USE_DEVICE_DEBUG)
|
else()
|
||||||
target_compile_options(${target} PRIVATE
|
target_compile_options(${target} PRIVATE
|
||||||
$<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>)
|
$<$<COMPILE_LANGUAGE:CUDA>:-lineinfo>)
|
||||||
endif (USE_DEVICE_DEBUG)
|
endif()
|
||||||
|
|
||||||
if (USE_NVTX)
|
if(USE_NVTX)
|
||||||
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
|
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
|
||||||
endif (USE_NVTX)
|
endif()
|
||||||
|
|
||||||
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
|
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
|
||||||
target_include_directories(
|
target_include_directories(
|
||||||
@ -169,17 +169,17 @@ function(xgboost_set_cuda_flags target)
|
|||||||
${xgboost_SOURCE_DIR}/gputreeshap
|
${xgboost_SOURCE_DIR}/gputreeshap
|
||||||
${CUDAToolkit_INCLUDE_DIRS})
|
${CUDAToolkit_INCLUDE_DIRS})
|
||||||
|
|
||||||
if (MSVC)
|
if(MSVC)
|
||||||
target_compile_options(${target} PRIVATE
|
target_compile_options(${target} PRIVATE
|
||||||
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>)
|
$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=/utf-8>)
|
||||||
endif (MSVC)
|
endif()
|
||||||
|
|
||||||
set_target_properties(${target} PROPERTIES
|
set_target_properties(${target} PROPERTIES
|
||||||
CUDA_STANDARD 17
|
CUDA_STANDARD 17
|
||||||
CUDA_STANDARD_REQUIRED ON
|
CUDA_STANDARD_REQUIRED ON
|
||||||
CUDA_SEPARABLE_COMPILATION OFF
|
CUDA_SEPARABLE_COMPILATION OFF
|
||||||
CUDA_RUNTIME_LIBRARY Static)
|
CUDA_RUNTIME_LIBRARY Static)
|
||||||
endfunction(xgboost_set_cuda_flags)
|
endfunction()
|
||||||
|
|
||||||
# Set HIP related flags to target.
|
# Set HIP related flags to target.
|
||||||
function(xgboost_set_hip_flags target)
|
function(xgboost_set_hip_flags target)
|
||||||
@ -199,16 +199,16 @@ function(xgboost_set_hip_flags target)
|
|||||||
endfunction(xgboost_set_hip_flags)
|
endfunction(xgboost_set_hip_flags)
|
||||||
|
|
||||||
macro(xgboost_link_nccl target)
|
macro(xgboost_link_nccl target)
|
||||||
if (BUILD_STATIC_LIB)
|
if(BUILD_STATIC_LIB)
|
||||||
target_include_directories(${target} PUBLIC ${NCCL_INCLUDE_DIR})
|
target_include_directories(${target} PUBLIC ${NCCL_INCLUDE_DIR})
|
||||||
target_compile_definitions(${target} PUBLIC -DXGBOOST_USE_NCCL=1)
|
target_compile_definitions(${target} PUBLIC -DXGBOOST_USE_NCCL=1)
|
||||||
target_link_libraries(${target} PUBLIC ${NCCL_LIBRARY})
|
target_link_libraries(${target} PUBLIC ${NCCL_LIBRARY})
|
||||||
else ()
|
else()
|
||||||
target_include_directories(${target} PRIVATE ${NCCL_INCLUDE_DIR})
|
target_include_directories(${target} PRIVATE ${NCCL_INCLUDE_DIR})
|
||||||
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NCCL=1)
|
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NCCL=1)
|
||||||
target_link_libraries(${target} PRIVATE ${NCCL_LIBRARY})
|
target_link_libraries(${target} PRIVATE ${NCCL_LIBRARY})
|
||||||
endif (BUILD_STATIC_LIB)
|
endif()
|
||||||
endmacro(xgboost_link_nccl)
|
endmacro()
|
||||||
|
|
||||||
# compile options
|
# compile options
|
||||||
macro(xgboost_target_properties target)
|
macro(xgboost_target_properties target)
|
||||||
@ -217,110 +217,106 @@ macro(xgboost_target_properties target)
|
|||||||
CXX_STANDARD_REQUIRED ON
|
CXX_STANDARD_REQUIRED ON
|
||||||
POSITION_INDEPENDENT_CODE ON)
|
POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
if (HIDE_CXX_SYMBOLS)
|
if(HIDE_CXX_SYMBOLS)
|
||||||
#-- Hide all C++ symbols
|
#-- Hide all C++ symbols
|
||||||
set_target_properties(${target} PROPERTIES
|
set_target_properties(${target} PROPERTIES
|
||||||
C_VISIBILITY_PRESET hidden
|
C_VISIBILITY_PRESET hidden
|
||||||
CXX_VISIBILITY_PRESET hidden
|
CXX_VISIBILITY_PRESET hidden
|
||||||
CUDA_VISIBILITY_PRESET hidden
|
CUDA_VISIBILITY_PRESET hidden
|
||||||
)
|
)
|
||||||
endif (HIDE_CXX_SYMBOLS)
|
endif()
|
||||||
|
|
||||||
if (ENABLE_ALL_WARNINGS)
|
if(ENABLE_ALL_WARNINGS)
|
||||||
target_compile_options(${target} PUBLIC
|
target_compile_options(${target} PUBLIC
|
||||||
$<IF:$<COMPILE_LANGUAGE:CUDA>,
|
$<IF:$<COMPILE_LANGUAGE:CUDA>,
|
||||||
-Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,
|
-Xcompiler=-Wall -Xcompiler=-Wextra -Xcompiler=-Wno-expansion-to-defined,
|
||||||
-Wall -Wextra -Wno-expansion-to-defined>
|
-Wall -Wextra -Wno-expansion-to-defined>
|
||||||
)
|
)
|
||||||
target_compile_options(${target} PUBLIC
|
endif()
|
||||||
$<IF:$<COMPILE_LANGUAGE:HIP>,
|
|
||||||
-Wall -Wextra >
|
|
||||||
)
|
|
||||||
endif(ENABLE_ALL_WARNINGS)
|
|
||||||
|
|
||||||
target_compile_options(${target}
|
target_compile_options(${target}
|
||||||
PRIVATE
|
PRIVATE
|
||||||
$<$<AND:$<CXX_COMPILER_ID:MSVC>,$<COMPILE_LANGUAGE:CXX>>:/MP>
|
$<$<AND:$<CXX_COMPILER_ID:MSVC>,$<COMPILE_LANGUAGE:CXX>>:/MP>
|
||||||
$<$<AND:$<NOT:$<CXX_COMPILER_ID:MSVC>>,$<COMPILE_LANGUAGE:CXX>>:-funroll-loops>)
|
$<$<AND:$<NOT:$<CXX_COMPILER_ID:MSVC>>,$<COMPILE_LANGUAGE:CXX>>:-funroll-loops>)
|
||||||
|
|
||||||
if (MSVC)
|
if(MSVC)
|
||||||
target_compile_options(${target} PRIVATE
|
target_compile_options(${target} PRIVATE
|
||||||
$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>
|
$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>
|
||||||
-D_CRT_SECURE_NO_WARNINGS
|
-D_CRT_SECURE_NO_WARNINGS
|
||||||
-D_CRT_SECURE_NO_DEPRECATE
|
-D_CRT_SECURE_NO_DEPRECATE
|
||||||
)
|
)
|
||||||
endif (MSVC)
|
endif()
|
||||||
|
|
||||||
if (WIN32 AND MINGW)
|
if(WIN32 AND MINGW)
|
||||||
target_compile_options(${target} PUBLIC -static-libstdc++)
|
target_compile_options(${target} PUBLIC -static-libstdc++)
|
||||||
endif (WIN32 AND MINGW)
|
endif()
|
||||||
endmacro(xgboost_target_properties)
|
endmacro()
|
||||||
|
|
||||||
# Custom definitions used in xgboost.
|
# Custom definitions used in xgboost.
|
||||||
macro(xgboost_target_defs target)
|
macro(xgboost_target_defs target)
|
||||||
if (NOT ${target} STREQUAL "dmlc") # skip dmlc core for custom logging.
|
if(NOT ${target} STREQUAL "dmlc") # skip dmlc core for custom logging.
|
||||||
target_compile_definitions(${target}
|
target_compile_definitions(${target}
|
||||||
PRIVATE
|
PRIVATE
|
||||||
-DDMLC_LOG_CUSTOMIZE=1
|
-DDMLC_LOG_CUSTOMIZE=1
|
||||||
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:_MWAITXINTRIN_H_INCLUDED>)
|
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:_MWAITXINTRIN_H_INCLUDED>)
|
||||||
endif ()
|
endif()
|
||||||
if (USE_DEBUG_OUTPUT)
|
if(USE_DEBUG_OUTPUT)
|
||||||
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_DEBUG_OUTPUT=1)
|
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_DEBUG_OUTPUT=1)
|
||||||
endif (USE_DEBUG_OUTPUT)
|
endif()
|
||||||
if (XGBOOST_MM_PREFETCH_PRESENT)
|
if(XGBOOST_MM_PREFETCH_PRESENT)
|
||||||
target_compile_definitions(${target}
|
target_compile_definitions(${target}
|
||||||
PRIVATE
|
PRIVATE
|
||||||
-DXGBOOST_MM_PREFETCH_PRESENT=1)
|
-DXGBOOST_MM_PREFETCH_PRESENT=1)
|
||||||
endif(XGBOOST_MM_PREFETCH_PRESENT)
|
endif()
|
||||||
if (XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
if(XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
||||||
target_compile_definitions(${target}
|
target_compile_definitions(${target}
|
||||||
PRIVATE
|
PRIVATE
|
||||||
-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1)
|
-DXGBOOST_BUILTIN_PREFETCH_PRESENT=1)
|
||||||
endif (XGBOOST_BUILTIN_PREFETCH_PRESENT)
|
endif()
|
||||||
|
|
||||||
if (PLUGIN_RMM)
|
if(PLUGIN_RMM)
|
||||||
target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
|
target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
|
||||||
endif (PLUGIN_RMM)
|
endif()
|
||||||
endmacro(xgboost_target_defs)
|
endmacro()
|
||||||
|
|
||||||
# handles dependencies
|
# handles dependencies
|
||||||
macro(xgboost_target_link_libraries target)
|
macro(xgboost_target_link_libraries target)
|
||||||
if (BUILD_STATIC_LIB)
|
if(BUILD_STATIC_LIB)
|
||||||
target_link_libraries(${target} PUBLIC Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${target} PUBLIC Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
|
||||||
else()
|
else()
|
||||||
target_link_libraries(${target} PRIVATE Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${target} PRIVATE Threads::Threads ${CMAKE_THREAD_LIBS_INIT})
|
||||||
endif (BUILD_STATIC_LIB)
|
endif()
|
||||||
|
|
||||||
if (USE_OPENMP)
|
if(USE_OPENMP)
|
||||||
if (BUILD_STATIC_LIB)
|
if(BUILD_STATIC_LIB)
|
||||||
target_link_libraries(${target} PUBLIC OpenMP::OpenMP_CXX)
|
target_link_libraries(${target} PUBLIC OpenMP::OpenMP_CXX)
|
||||||
else()
|
else()
|
||||||
target_link_libraries(${target} PRIVATE OpenMP::OpenMP_CXX)
|
target_link_libraries(${target} PRIVATE OpenMP::OpenMP_CXX)
|
||||||
endif (BUILD_STATIC_LIB)
|
endif()
|
||||||
endif (USE_OPENMP)
|
endif()
|
||||||
|
|
||||||
if (USE_CUDA)
|
if(USE_CUDA)
|
||||||
xgboost_set_cuda_flags(${target})
|
xgboost_set_cuda_flags(${target})
|
||||||
target_link_libraries(${target} PUBLIC CUDA::cudart_static)
|
target_link_libraries(${target} PUBLIC CUDA::cudart_static)
|
||||||
endif (USE_CUDA)
|
endif()
|
||||||
|
|
||||||
if (USE_HIP)
|
if (USE_HIP)
|
||||||
xgboost_set_hip_flags(${target})
|
xgboost_set_hip_flags(${target})
|
||||||
endif (USE_HIP)
|
endif (USE_HIP)
|
||||||
|
|
||||||
if (PLUGIN_RMM)
|
if(PLUGIN_RMM)
|
||||||
target_link_libraries(${target} PRIVATE rmm::rmm)
|
target_link_libraries(${target} PRIVATE rmm::rmm)
|
||||||
endif (PLUGIN_RMM)
|
endif()
|
||||||
|
|
||||||
if (USE_NCCL)
|
if(USE_NCCL)
|
||||||
xgboost_link_nccl(${target})
|
xgboost_link_nccl(${target})
|
||||||
endif (USE_NCCL)
|
endif()
|
||||||
|
|
||||||
if (USE_NVTX)
|
if(USE_NVTX)
|
||||||
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
|
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
|
||||||
endif (USE_NVTX)
|
endif()
|
||||||
|
|
||||||
if (MINGW)
|
if(MINGW)
|
||||||
target_link_libraries(${target} PRIVATE wsock32 ws2_32)
|
target_link_libraries(${target} PRIVATE wsock32 ws2_32)
|
||||||
endif (MINGW)
|
endif()
|
||||||
endmacro(xgboost_target_link_libraries)
|
endmacro()
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
function (write_version)
|
function(write_version)
|
||||||
message(STATUS "xgboost VERSION: ${xgboost_VERSION}")
|
message(STATUS "xgboost VERSION: ${xgboost_VERSION}")
|
||||||
configure_file(
|
configure_file(
|
||||||
${xgboost_SOURCE_DIR}/cmake/version_config.h.in
|
${xgboost_SOURCE_DIR}/cmake/version_config.h.in
|
||||||
${xgboost_SOURCE_DIR}/include/xgboost/version_config.h @ONLY)
|
${xgboost_SOURCE_DIR}/include/xgboost/version_config.h @ONLY)
|
||||||
endfunction (write_version)
|
endfunction()
|
||||||
|
|||||||
@ -66,7 +66,7 @@ function(create_rlib_for_msvc)
|
|||||||
execute_process(COMMAND ${DLLTOOL_EXE}
|
execute_process(COMMAND ${DLLTOOL_EXE}
|
||||||
"--input-def" "${CMAKE_CURRENT_BINARY_DIR}/R.def"
|
"--input-def" "${CMAKE_CURRENT_BINARY_DIR}/R.def"
|
||||||
"--output-lib" "${CMAKE_CURRENT_BINARY_DIR}/R.lib")
|
"--output-lib" "${CMAKE_CURRENT_BINARY_DIR}/R.lib")
|
||||||
endfunction(create_rlib_for_msvc)
|
endfunction()
|
||||||
|
|
||||||
|
|
||||||
# detection for OSX
|
# detection for OSX
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
if (NVML_LIBRARY)
|
if(NVML_LIBRARY)
|
||||||
unset(NVML_LIBRARY CACHE)
|
unset(NVML_LIBRARY CACHE)
|
||||||
endif(NVML_LIBRARY)
|
endif()
|
||||||
|
|
||||||
set(NVML_LIB_NAME nvml)
|
set(NVML_LIB_NAME nvml)
|
||||||
|
|
||||||
|
|||||||
@ -35,20 +35,20 @@
|
|||||||
#
|
#
|
||||||
# This module assumes that the user has already called find_package(CUDA)
|
# This module assumes that the user has already called find_package(CUDA)
|
||||||
|
|
||||||
if (NCCL_LIBRARY)
|
if(NCCL_LIBRARY)
|
||||||
if(NOT USE_NCCL_LIB_PATH)
|
if(NOT USE_NCCL_LIB_PATH)
|
||||||
# Don't cache NCCL_LIBRARY to enable switching between static and shared.
|
# Don't cache NCCL_LIBRARY to enable switching between static and shared.
|
||||||
unset(NCCL_LIBRARY CACHE)
|
unset(NCCL_LIBRARY CACHE)
|
||||||
endif(NOT USE_NCCL_LIB_PATH)
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (BUILD_WITH_SHARED_NCCL)
|
if(BUILD_WITH_SHARED_NCCL)
|
||||||
# libnccl.so
|
# libnccl.so
|
||||||
set(NCCL_LIB_NAME nccl)
|
set(NCCL_LIB_NAME nccl)
|
||||||
else ()
|
else()
|
||||||
# libnccl_static.a
|
# libnccl_static.a
|
||||||
set(NCCL_LIB_NAME nccl_static)
|
set(NCCL_LIB_NAME nccl_static)
|
||||||
endif (BUILD_WITH_SHARED_NCCL)
|
endif()
|
||||||
|
|
||||||
find_path(NCCL_INCLUDE_DIR
|
find_path(NCCL_INCLUDE_DIR
|
||||||
NAMES nccl.h
|
NAMES nccl.h
|
||||||
|
|||||||
@ -3,11 +3,11 @@ find_package(xgboost REQUIRED)
|
|||||||
|
|
||||||
# xgboost is built as static libraries, all cxx dependencies need to be linked into the
|
# xgboost is built as static libraries, all cxx dependencies need to be linked into the
|
||||||
# executable.
|
# executable.
|
||||||
if (XGBOOST_BUILD_STATIC_LIB)
|
if(XGBOOST_BUILD_STATIC_LIB)
|
||||||
enable_language(CXX)
|
enable_language(CXX)
|
||||||
# find again for those cxx libraries.
|
# find again for those cxx libraries.
|
||||||
find_package(xgboost REQUIRED)
|
find_package(xgboost REQUIRED)
|
||||||
endif(XGBOOST_BUILD_STATIC_LIB)
|
endif()
|
||||||
|
|
||||||
add_executable(api-demo c-api-demo.c)
|
add_executable(api-demo c-api-demo.c)
|
||||||
target_link_libraries(api-demo PRIVATE xgboost::xgboost)
|
target_link_libraries(api-demo PRIVATE xgboost::xgboost)
|
||||||
|
|||||||
@ -4,11 +4,11 @@ find_package(xgboost REQUIRED)
|
|||||||
|
|
||||||
# xgboost is built as static libraries, all cxx dependencies need to be linked into the
|
# xgboost is built as static libraries, all cxx dependencies need to be linked into the
|
||||||
# executable.
|
# executable.
|
||||||
if (XGBOOST_BUILD_STATIC_LIB)
|
if(XGBOOST_BUILD_STATIC_LIB)
|
||||||
enable_language(CXX)
|
enable_language(CXX)
|
||||||
# find again for those cxx libraries.
|
# find again for those cxx libraries.
|
||||||
find_package(xgboost REQUIRED)
|
find_package(xgboost REQUIRED)
|
||||||
endif(XGBOOST_BUILD_STATIC_LIB)
|
endif()
|
||||||
|
|
||||||
add_executable(inference-demo inference.c)
|
add_executable(inference-demo inference.c)
|
||||||
target_link_libraries(inference-demo PRIVATE xgboost::xgboost)
|
target_link_libraries(inference-demo PRIVATE xgboost::xgboost)
|
||||||
|
|||||||
@ -104,7 +104,7 @@ def check_point_callback():
|
|||||||
# Use callback class from xgboost.callback
|
# Use callback class from xgboost.callback
|
||||||
# Feel free to subclass/customize it to suit your need.
|
# Feel free to subclass/customize it to suit your need.
|
||||||
check_point = xgb.callback.TrainingCheckPoint(
|
check_point = xgb.callback.TrainingCheckPoint(
|
||||||
directory=tmpdir, iterations=rounds, name="model"
|
directory=tmpdir, interval=rounds, name="model"
|
||||||
)
|
)
|
||||||
xgb.train(
|
xgb.train(
|
||||||
{"objective": "binary:logistic"},
|
{"objective": "binary:logistic"},
|
||||||
@ -118,7 +118,7 @@ def check_point_callback():
|
|||||||
# This version of checkpoint saves everything including parameters and
|
# This version of checkpoint saves everything including parameters and
|
||||||
# model. See: doc/tutorials/saving_model.rst
|
# model. See: doc/tutorials/saving_model.rst
|
||||||
check_point = xgb.callback.TrainingCheckPoint(
|
check_point = xgb.callback.TrainingCheckPoint(
|
||||||
directory=tmpdir, iterations=rounds, as_pickle=True, name="model"
|
directory=tmpdir, interval=rounds, as_pickle=True, name="model"
|
||||||
)
|
)
|
||||||
xgb.train(
|
xgb.train(
|
||||||
{"objective": "binary:logistic"},
|
{"objective": "binary:logistic"},
|
||||||
|
|||||||
@ -24,8 +24,8 @@ param <- list("objective" = "binary:logitraw",
|
|||||||
"nthread" = 16)
|
"nthread" = 16)
|
||||||
watchlist <- list("train" = xgmat)
|
watchlist <- list("train" = xgmat)
|
||||||
nrounds <- 120
|
nrounds <- 120
|
||||||
print ("loading data end, start to boost trees")
|
print("loading data end, start to boost trees")
|
||||||
bst <- xgb.train(param, xgmat, nrounds, watchlist)
|
bst <- xgb.train(param, xgmat, nrounds, watchlist)
|
||||||
# save out model
|
# save out model
|
||||||
xgb.save(bst, "higgs.model")
|
xgb.save(bst, "higgs.model")
|
||||||
print ('finish training')
|
print('finish training')
|
||||||
|
|||||||
@ -39,11 +39,11 @@ for (i in seq_along(threads)){
|
|||||||
"nthread" = thread)
|
"nthread" = thread)
|
||||||
watchlist <- list("train" = xgmat)
|
watchlist <- list("train" = xgmat)
|
||||||
nrounds <- 120
|
nrounds <- 120
|
||||||
print ("loading data end, start to boost trees")
|
print("loading data end, start to boost trees")
|
||||||
bst <- xgb.train(param, xgmat, nrounds, watchlist)
|
bst <- xgb.train(param, xgmat, nrounds, watchlist)
|
||||||
# save out model
|
# save out model
|
||||||
xgb.save(bst, "higgs.model")
|
xgb.save(bst, "higgs.model")
|
||||||
print ('finish training')
|
print('finish training')
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -85,8 +85,8 @@ shutdown server
|
|||||||
## Training with GPUs
|
## Training with GPUs
|
||||||
|
|
||||||
To demo with Federated Learning using GPUs, make sure your machine has at least 2 GPUs.
|
To demo with Federated Learning using GPUs, make sure your machine has at least 2 GPUs.
|
||||||
Build XGBoost with the federated learning plugin enabled along with CUDA, but with NCCL
|
Build XGBoost with the federated learning plugin enabled along with CUDA
|
||||||
turned off (see the [README](../../plugin/federated/README.md)).
|
(see the [README](../../plugin/federated/README.md)).
|
||||||
|
|
||||||
Modify `config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps
|
Modify `../config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps
|
||||||
above.
|
above.
|
||||||
|
|||||||
@ -67,7 +67,7 @@ class XGBoostTrainer(Executor):
|
|||||||
dtest = xgb.DMatrix('agaricus.txt.test?format=libsvm')
|
dtest = xgb.DMatrix('agaricus.txt.test?format=libsvm')
|
||||||
|
|
||||||
# Specify parameters via map, definition are same as c++ version
|
# Specify parameters via map, definition are same as c++ version
|
||||||
param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
param = {'tree_method': 'hist', 'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
|
||||||
if self._use_gpus:
|
if self._use_gpus:
|
||||||
self.log_info(fl_ctx, f'Training with GPU {rank}')
|
self.log_info(fl_ctx, f'Training with GPU {rank}')
|
||||||
param['device'] = f"cuda:{rank}"
|
param['device'] = f"cuda:{rank}"
|
||||||
|
|||||||
@ -56,4 +56,9 @@ shutdown server
|
|||||||
|
|
||||||
## Training with GPUs
|
## Training with GPUs
|
||||||
|
|
||||||
Currently GPUs are not yet supported by vertical federated XGBoost.
|
To demo with Vertical Federated Learning using GPUs, make sure your machine has at least 2 GPUs.
|
||||||
|
Build XGBoost with the federated learning plugin enabled along with CUDA
|
||||||
|
(see the [README](../../plugin/federated/README.md)).
|
||||||
|
|
||||||
|
Modify `../config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps
|
||||||
|
above.
|
||||||
|
|||||||
@ -77,13 +77,14 @@ class XGBoostTrainer(Executor):
|
|||||||
'gamma': 1.0,
|
'gamma': 1.0,
|
||||||
'max_depth': 8,
|
'max_depth': 8,
|
||||||
'min_child_weight': 100,
|
'min_child_weight': 100,
|
||||||
'tree_method': 'approx',
|
'tree_method': 'hist',
|
||||||
'grow_policy': 'depthwise',
|
'grow_policy': 'depthwise',
|
||||||
'objective': 'binary:logistic',
|
'objective': 'binary:logistic',
|
||||||
'eval_metric': 'auc',
|
'eval_metric': 'auc',
|
||||||
}
|
}
|
||||||
if self._use_gpus:
|
if self._use_gpus:
|
||||||
self.log_info(fl_ctx, 'GPUs are not currently supported by vertical federated XGBoost')
|
self.log_info(fl_ctx, f'Training with GPU {rank}')
|
||||||
|
param['device'] = f"cuda:{rank}"
|
||||||
|
|
||||||
# specify validations set to watch performance
|
# specify validations set to watch performance
|
||||||
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
||||||
|
|||||||
@ -250,8 +250,8 @@ echo "<hash> <artifact>" | shasum -a 256 --check
|
|||||||
```
|
```
|
||||||
|
|
||||||
**Experimental binary packages for R with CUDA enabled**
|
**Experimental binary packages for R with CUDA enabled**
|
||||||
* xgboost_r_gpu_linux_1.7.5.tar.gz: [Download]({r_gpu_linux_url})
|
* xgboost_r_gpu_linux_{release}.tar.gz: [Download]({r_gpu_linux_url})
|
||||||
* xgboost_r_gpu_win64_1.7.5.tar.gz: [Download]({r_gpu_win64_url})
|
* xgboost_r_gpu_win64_{release}.tar.gz: [Download]({r_gpu_win64_url})
|
||||||
|
|
||||||
**Source tarball**
|
**Source tarball**
|
||||||
* xgboost.tar.gz: [Download]({src_tarball})"""
|
* xgboost.tar.gz: [Download]({src_tarball})"""
|
||||||
@ -296,12 +296,13 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
git.submodule("update")
|
git.submodule("update")
|
||||||
commit_hash = latest_hash()
|
commit_hash = latest_hash()
|
||||||
|
|
||||||
if not os.path.exists(args.outdir):
|
outdir = os.path.abspath(args.outdir)
|
||||||
os.mkdir(args.outdir)
|
if not os.path.exists(outdir):
|
||||||
|
os.mkdir(outdir)
|
||||||
|
|
||||||
# source tarball
|
# source tarball
|
||||||
hashes: List[str] = []
|
hashes: List[str] = []
|
||||||
tarname, h = make_src_package(release, args.outdir)
|
tarname, h = make_src_package(release, outdir)
|
||||||
hashes.append(h)
|
hashes.append(h)
|
||||||
|
|
||||||
# CUDA R packages
|
# CUDA R packages
|
||||||
@ -310,18 +311,18 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
branch,
|
branch,
|
||||||
"" if rc is None else rc + str(rc_ver),
|
"" if rc is None else rc + str(rc_ver),
|
||||||
commit_hash,
|
commit_hash,
|
||||||
args.outdir,
|
outdir,
|
||||||
)
|
)
|
||||||
hashes.extend(hr)
|
hashes.extend(hr)
|
||||||
|
|
||||||
# Python source wheel
|
# Python source wheel
|
||||||
make_pysrc_wheel(release, rc, rc_ver, args.outdir)
|
make_pysrc_wheel(release, rc, rc_ver, outdir)
|
||||||
|
|
||||||
# Python binary wheels
|
# Python binary wheels
|
||||||
download_py_packages(branch, major, minor, commit_hash, args.outdir)
|
download_py_packages(branch, major, minor, commit_hash, outdir)
|
||||||
|
|
||||||
# Write end note
|
# Write end note
|
||||||
release_note(release, hashes, urls, tarname, args.outdir)
|
release_note(release, hashes, urls, tarname, outdir)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -80,6 +80,24 @@ R package versioning
|
|||||||
====================
|
====================
|
||||||
See :ref:`release`.
|
See :ref:`release`.
|
||||||
|
|
||||||
|
Testing R package with different compilers
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
You can change the default compiler of R by changing the configuration file in home
|
||||||
|
directory. For instance, if you want to test XGBoost built with clang++ instead of g++ on
|
||||||
|
Linux, put the following in your ``~/.R/Makevars`` file:
|
||||||
|
|
||||||
|
.. code-block:: sh
|
||||||
|
|
||||||
|
CC=clang-15
|
||||||
|
CXX17=clang++-15
|
||||||
|
|
||||||
|
Be aware that the variable name should match with the name used by ``R CMD``:
|
||||||
|
|
||||||
|
.. code-block:: sh
|
||||||
|
|
||||||
|
R CMD config CXX17
|
||||||
|
|
||||||
Registering native routines in R
|
Registering native routines in R
|
||||||
================================
|
================================
|
||||||
According to `R extension manual <https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines>`_,
|
According to `R extension manual <https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines>`_,
|
||||||
|
|||||||
@ -35,7 +35,7 @@ Building sdists
|
|||||||
|
|
||||||
In the case of XGBoost, an sdist contains both the Python code as well as
|
In the case of XGBoost, an sdist contains both the Python code as well as
|
||||||
the C++ code, so that the core part of XGBoost can be compiled into the
|
the C++ code, so that the core part of XGBoost can be compiled into the
|
||||||
shared libary ``libxgboost.so`` [#shared_lib_name]_.
|
shared library ``libxgboost.so`` [#shared_lib_name]_.
|
||||||
|
|
||||||
You can obtain an sdist as follows:
|
You can obtain an sdist as follows:
|
||||||
|
|
||||||
|
|||||||
@ -16,7 +16,14 @@ Adding a new unit test
|
|||||||
|
|
||||||
Python package: pytest
|
Python package: pytest
|
||||||
======================
|
======================
|
||||||
Add your test under the directory `tests/python/ <https://github.com/dmlc/xgboost/tree/master/tests/python>`_ or `tests/python-gpu/ <https://github.com/dmlc/xgboost/tree/master/tests/python-gpu>`_ (if you are testing GPU code). Refer to `the PyTest tutorial <https://docs.pytest.org/en/latest/getting-started.html>`_ to learn how to write tests for Python code.
|
Add your test under the directories
|
||||||
|
|
||||||
|
- `tests/python/ <https://github.com/dmlc/xgboost/tree/master/tests/python>`_
|
||||||
|
- `tests/python-gpu/ <https://github.com/dmlc/xgboost/tree/master/tests/python-gpu>`_ (if you are testing GPU code)
|
||||||
|
- `tests/test_distributed <https://github.com/dmlc/xgboost/tree/master/tests/test_distributed>`_. (if a distributed framework is used)
|
||||||
|
|
||||||
|
Refer to `the PyTest tutorial <https://docs.pytest.org/en/latest/getting-started.html>`_
|
||||||
|
to learn how to write tests for Python code.
|
||||||
|
|
||||||
You may try running your test by following instructions in :ref:`this section <running_pytest>`.
|
You may try running your test by following instructions in :ref:`this section <running_pytest>`.
|
||||||
|
|
||||||
@ -56,19 +63,26 @@ Run
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
make Rcheck
|
python ./tests/ci_build/test_r_package.py --task=check
|
||||||
|
|
||||||
at the root of the project directory.
|
at the root of the project directory. The command builds and checks the XGBoost
|
||||||
|
r-package. Alternatively, if you want to just run the tests, you can use the following
|
||||||
|
commands after installing XGBoost:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd R-package/tests/
|
||||||
|
Rscript testthat.R
|
||||||
|
|
||||||
.. _running_jvm_tests:
|
.. _running_jvm_tests:
|
||||||
|
|
||||||
JVM packages
|
JVM packages
|
||||||
============
|
============
|
||||||
As part of the building process, tests are run:
|
Maven is used
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
mvn package
|
mvn test
|
||||||
|
|
||||||
.. _running_pytest:
|
.. _running_pytest:
|
||||||
|
|
||||||
@ -99,6 +113,14 @@ In addition, to test CUDA code, run:
|
|||||||
|
|
||||||
(For this step, you should have compiled XGBoost with CUDA enabled.)
|
(For this step, you should have compiled XGBoost with CUDA enabled.)
|
||||||
|
|
||||||
|
For testing with distributed frameworks like ``Dask`` and ``PySpark``:
|
||||||
|
|
||||||
|
.. code:: bash
|
||||||
|
|
||||||
|
# Tell Python where to find XGBoost module
|
||||||
|
export PYTHONPATH=./python-package
|
||||||
|
pytest -v -s --fulltrace tests/test_distributed
|
||||||
|
|
||||||
.. _running_gtest:
|
.. _running_gtest:
|
||||||
|
|
||||||
C++: Google Test
|
C++: Google Test
|
||||||
@ -110,21 +132,13 @@ To build and run C++ unit tests enable tests while running CMake:
|
|||||||
|
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON ..
|
cmake -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DUSE_CUDA=ON -DUSE_NCCL=ON ..
|
||||||
make
|
ninja
|
||||||
make test
|
./testxgboost
|
||||||
|
|
||||||
To enable tests for CUDA code, add ``-DUSE_CUDA=ON`` and ``-DUSE_NCCL=ON`` (CUDA toolkit required):
|
Flags like ``USE_CUDA``, ``USE_DMLC_GTEST`` are optional. For more info about how to build
|
||||||
|
XGBoost from source, see :doc:`</build>`. One can also run all unit test using ctest tool
|
||||||
.. code-block:: bash
|
which provides higher flexibility. For example:
|
||||||
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DUSE_CUDA=ON -DUSE_NCCL=ON ..
|
|
||||||
make
|
|
||||||
make test
|
|
||||||
|
|
||||||
One can also run all unit test using ctest tool which provides higher flexibility. For example:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
@ -157,14 +171,14 @@ sanitizer is not compatible with the other two sanitizers.
|
|||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
cmake -DUSE_SANITIZER=ON -DENABLED_SANITIZERS="address;leak" /path/to/xgboost
|
cmake -DUSE_SANITIZER=ON -DENABLED_SANITIZERS="address;undefined" /path/to/xgboost
|
||||||
|
|
||||||
By default, CMake will search regular system paths for sanitizers, you can also
|
By default, CMake will search regular system paths for sanitizers, you can also
|
||||||
supply a specified SANITIZER_PATH.
|
supply a specified SANITIZER_PATH.
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
cmake -DUSE_SANITIZER=ON -DENABLED_SANITIZERS="address;leak" \
|
cmake -DUSE_SANITIZER=ON -DENABLED_SANITIZERS="address;undefined" \
|
||||||
-DSANITIZER_PATH=/path/to/sanitizers /path/to/xgboost
|
-DSANITIZER_PATH=/path/to/sanitizers /path/to/xgboost
|
||||||
|
|
||||||
How to use sanitizers with CUDA support
|
How to use sanitizers with CUDA support
|
||||||
@ -181,7 +195,7 @@ environment variable:
|
|||||||
Other sanitizer runtime options
|
Other sanitizer runtime options
|
||||||
===============================
|
===============================
|
||||||
|
|
||||||
By default undefined sanitizer doesn't print out the backtrace. You can enable it by
|
By default undefined sanitizer doesn't print out the backtrace. You can enable it by
|
||||||
exporting environment variable:
|
exporting environment variable:
|
||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|||||||
@ -146,3 +146,48 @@ instance we might accidentally call ``clf.set_params()`` inside a predict functi
|
|||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=10) as e:
|
with ThreadPoolExecutor(max_workers=10) as e:
|
||||||
e.submit(predict_fn, ...)
|
e.submit(predict_fn, ...)
|
||||||
|
|
||||||
|
*****************************
|
||||||
|
Privacy-Preserving Prediction
|
||||||
|
*****************************
|
||||||
|
|
||||||
|
`Concrete ML`_ is a third-party open-source library developed by `Zama`_ that proposes gradient
|
||||||
|
boosting classes similar to ours, but predicting directly over encrypted data, thanks to
|
||||||
|
Fully Homomorphic Encryption. A simple example would be as follows:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from sklearn.datasets import make_classification
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from concrete.ml.sklearn import XGBClassifier
|
||||||
|
|
||||||
|
x, y = make_classification(n_samples=100, class_sep=2, n_features=30, random_state=42)
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(
|
||||||
|
x, y, test_size=10, random_state=42
|
||||||
|
)
|
||||||
|
|
||||||
|
# Train in the clear and quantize the weights
|
||||||
|
model = XGBClassifier()
|
||||||
|
model.fit(X_train, y_train)
|
||||||
|
|
||||||
|
# Simulate the predictions in the clear
|
||||||
|
y_pred_clear = model.predict(X_test)
|
||||||
|
|
||||||
|
# Compile in FHE
|
||||||
|
model.compile(X_train)
|
||||||
|
|
||||||
|
# Generate keys
|
||||||
|
model.fhe_circuit.keygen()
|
||||||
|
|
||||||
|
# Run the inference on encrypted inputs!
|
||||||
|
y_pred_fhe = model.predict(X_test, fhe="execute")
|
||||||
|
|
||||||
|
print("In clear :", y_pred_clear)
|
||||||
|
print("In FHE :", y_pred_fhe)
|
||||||
|
print(f"Similarity: {int((y_pred_fhe == y_pred_clear).mean()*100)}%")
|
||||||
|
|
||||||
|
More information and examples are given in the `Concrete ML documentation`_.
|
||||||
|
|
||||||
|
.. _Zama: https://www.zama.ai/
|
||||||
|
.. _Concrete ML: https://github.com/zama-ai/concrete-ml
|
||||||
|
.. _Concrete ML documentation: https://docs.zama.ai/concrete-ml
|
||||||
|
|||||||
@ -172,9 +172,8 @@ Support Matrix
|
|||||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||||
| modin.Series | NPA | FF | NPA | NPA | FF | |
|
| modin.Series | NPA | FF | NPA | NPA | FF | |
|
||||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||||
| pyarrow.Table | T | F | | NPA | FF | |
|
| pyarrow.Table | NPA | NPA | NPA | NPA | NPA | NPA |
|
||||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||||
| pyarrow.dataset.Dataset | T | F | | | F | |
|
|
||||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||||
| _\_array\_\_ | NPA | F | NPA | NPA | H | |
|
| _\_array\_\_ | NPA | F | NPA | NPA | H | |
|
||||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||||
|
|||||||
@ -30,3 +30,4 @@ See `Awesome XGBoost <https://github.com/dmlc/xgboost/tree/master/demo>`_ for mo
|
|||||||
input_format
|
input_format
|
||||||
param_tuning
|
param_tuning
|
||||||
custom_metric_obj
|
custom_metric_obj
|
||||||
|
privacy_preserving
|
||||||
@ -58,6 +58,7 @@ Notice that the samples are sorted based on their query index in a non-decreasin
|
|||||||
sorted_idx = np.argsort(qid)
|
sorted_idx = np.argsort(qid)
|
||||||
X = X[sorted_idx, :]
|
X = X[sorted_idx, :]
|
||||||
y = y[sorted_idx]
|
y = y[sorted_idx]
|
||||||
|
qid = qid[sorted_idx]
|
||||||
|
|
||||||
The simplest way to train a ranking model is by using the scikit-learn estimator interface. Continuing the previous snippet, we can train a simple ranking model without tuning:
|
The simplest way to train a ranking model is by using the scikit-learn estimator interface. Continuing the previous snippet, we can train a simple ranking model without tuning:
|
||||||
|
|
||||||
|
|||||||
97
doc/tutorials/privacy_preserving.rst
Normal file
97
doc/tutorials/privacy_preserving.rst
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
#############################################
|
||||||
|
Privacy Preserving Inference with Concrete ML
|
||||||
|
#############################################
|
||||||
|
|
||||||
|
`Concrete ML`_ is a specialized library developed by Zama that allows the execution of machine learning models on encrypted data through `Fully Homomorphic Encryption (FHE) <https://www.youtube.com/watch?v=FFox2S4uqEo>`_, thereby preserving data privacy.
|
||||||
|
|
||||||
|
To use models such as XGBClassifier, use the following import:
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
from concrete.ml.sklearn import XGBClassifier
|
||||||
|
|
||||||
|
***************************************
|
||||||
|
Performing Privacy Preserving Inference
|
||||||
|
***************************************
|
||||||
|
|
||||||
|
Initialization of a XGBClassifier can be done as follows:
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
classifier = XGBClassifier(n_bits=6, [other_hyperparameters])
|
||||||
|
|
||||||
|
|
||||||
|
where ``n_bits`` determines the precision of the input features. Note that a higher value of ``n_bits`` increases the precision of the input features and possibly the final model accuracy but also ends up with longer FHE execution time.
|
||||||
|
|
||||||
|
Other hyper-parameters that exist in xgboost library can be used.
|
||||||
|
|
||||||
|
******************************
|
||||||
|
Model Training and Compilation
|
||||||
|
******************************
|
||||||
|
|
||||||
|
As commonly used in scikit-learn like models, it can be trained with the .fit() method.
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
classifier.fit(X_train, y_train)
|
||||||
|
|
||||||
|
After training, the model can be compiled with a calibration dataset, potentially a subset of the training data:
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
classifier.compile(X_calibrate)
|
||||||
|
|
||||||
|
This calibration dataset, ``X_calibrate``, is used in Concrete ML compute the precision (bit-width) of each intermediate value in the model. This is a necessary step to optimize the equivalent FHE circuit.
|
||||||
|
|
||||||
|
****************************
|
||||||
|
FHE Simulation and Execution
|
||||||
|
****************************
|
||||||
|
|
||||||
|
To verify model accuracy in encrypted computations, you can run an FHE simulation:
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
predictions = classifier.predict(X_test, fhe="simulate")
|
||||||
|
|
||||||
|
This simulation can be used to evaluate the model. The resulting accuracy of this simulation step is representative of the actual FHE execution without having to pay the cost of an actual FHE execution.
|
||||||
|
|
||||||
|
When the model is ready, actual Fully Homomorphic Encryption execution can be performed:
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
predictions = classifier.predict(X_test, fhe="execute")
|
||||||
|
|
||||||
|
|
||||||
|
Note that using FHE="execute" is a convenient way to assess the model in FHE, but for real deployment, functions to encrypt (on the client), run in FHE (on the server), and finally decrypt (on the client) have to be used for end-to-end privacy-preserving inferences.
|
||||||
|
|
||||||
|
Concrete ML provides a deployment API to facilitate this process, ensuring end-to-end privacy.
|
||||||
|
|
||||||
|
To go further in the deployment API you can read:
|
||||||
|
|
||||||
|
- the `deployment documentation <https://docs.zama.ai/concrete-ml/advanced-topics/client_server>`_
|
||||||
|
- the `deployment notebook <https://github.com/zama-ai/concrete-ml/blob/17779ca571d20b001caff5792eb11e76fe2c19ba/docs/advanced_examples/ClientServer.ipynb>`_
|
||||||
|
|
||||||
|
*******************************
|
||||||
|
Parameter Tuning in Concrete ML
|
||||||
|
*******************************
|
||||||
|
|
||||||
|
Concrete ML is compatible with standard scikit-learn pipelines such as GridSearchCV or any other hyper-parameter tuning techniques.
|
||||||
|
|
||||||
|
******************
|
||||||
|
Examples and Demos
|
||||||
|
******************
|
||||||
|
|
||||||
|
- `Sentiment analysis (based on transformers + xgboost) <https://huggingface.co/spaces/zama-fhe/encrypted_sentiment_analysis>`_
|
||||||
|
- `XGBoost Classifier <https://github.com/zama-ai/concrete-ml/blob/6966c84b9698d5418209b346900f81d1270c64bd/docs/advanced_examples/XGBClassifier.ipynb>`_
|
||||||
|
- `XGBoost Regressor <https://github.com/zama-ai/concrete-ml/blob/6966c84b9698d5418209b346900f81d1270c64bd/docs/advanced_examples/XGBRegressor.ipynb>`_
|
||||||
|
|
||||||
|
**********
|
||||||
|
Conclusion
|
||||||
|
**********
|
||||||
|
|
||||||
|
Concrete ML provides a framework for executing privacy-preserving inferences by leveraging Fully Homomorphic Encryption, allowing secure and private computations on encrypted data.
|
||||||
|
|
||||||
|
More information and examples are given in the `Concrete ML documentation`_.
|
||||||
|
|
||||||
|
.. _Concrete ML: https://github.com/zama-ai/concrete-ml
|
||||||
|
.. _`Concrete ML documentation`: https://docs.zama.ai/concrete-ml
|
||||||
@ -144,9 +144,7 @@ XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle
|
|||||||
* See :doc:`/tutorials/input_format` for more info.
|
* See :doc:`/tutorials/input_format` for more info.
|
||||||
* \endverbatim
|
* \endverbatim
|
||||||
* - silent (optional): Whether to print message during loading. Default to true.
|
* - silent (optional): Whether to print message during loading. Default to true.
|
||||||
* - data_split_mode (optional): Whether to split by row or column. In distributed mode, the
|
* - data_split_mode (optional): Whether the file was split by row or column beforehand for distributed computing. Default to row.
|
||||||
* file is split accordingly; otherwise this is only an indicator on how the file was split
|
|
||||||
* beforehand. Default to row.
|
|
||||||
* \param out a loaded data matrix
|
* \param out a loaded data matrix
|
||||||
* \return 0 when success, -1 when failure happens
|
* \return 0 when success, -1 when failure happens
|
||||||
*/
|
*/
|
||||||
@ -174,6 +172,7 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t *indptr, const unsigned *indic
|
|||||||
* \param config JSON encoded configuration. Required values are:
|
* \param config JSON encoded configuration. Required values are:
|
||||||
* - missing: Which value to represent missing value.
|
* - missing: Which value to represent missing value.
|
||||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||||
|
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
|
||||||
* \param out created dmatrix
|
* \param out created dmatrix
|
||||||
* \return 0 when success, -1 when failure happens
|
* \return 0 when success, -1 when failure happens
|
||||||
*/
|
*/
|
||||||
@ -186,6 +185,7 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char
|
|||||||
* \param config JSON encoded configuration. Required values are:
|
* \param config JSON encoded configuration. Required values are:
|
||||||
* - missing: Which value to represent missing value.
|
* - missing: Which value to represent missing value.
|
||||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||||
|
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
|
||||||
* \param out created dmatrix
|
* \param out created dmatrix
|
||||||
* \return 0 when success, -1 when failure happens
|
* \return 0 when success, -1 when failure happens
|
||||||
*/
|
*/
|
||||||
@ -200,6 +200,7 @@ XGB_DLL int XGDMatrixCreateFromDense(char const *data, char const *config, DMatr
|
|||||||
* \param config JSON encoded configuration. Supported values are:
|
* \param config JSON encoded configuration. Supported values are:
|
||||||
* - missing: Which value to represent missing value.
|
* - missing: Which value to represent missing value.
|
||||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||||
|
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
|
||||||
* \param out created dmatrix
|
* \param out created dmatrix
|
||||||
* \return 0 when success, -1 when failure happens
|
* \return 0 when success, -1 when failure happens
|
||||||
*/
|
*/
|
||||||
@ -266,6 +267,7 @@ XGB_DLL int XGDMatrixCreateFromDT(void** data,
|
|||||||
* \param config JSON encoded configuration. Required values are:
|
* \param config JSON encoded configuration. Required values are:
|
||||||
* - missing: Which value to represent missing value.
|
* - missing: Which value to represent missing value.
|
||||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||||
|
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
|
||||||
* \param out created dmatrix
|
* \param out created dmatrix
|
||||||
* \return 0 when success, -1 when failure happens
|
* \return 0 when success, -1 when failure happens
|
||||||
*/
|
*/
|
||||||
@ -278,6 +280,7 @@ XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data, char const *config
|
|||||||
* \param config JSON encoded configuration. Required values are:
|
* \param config JSON encoded configuration. Required values are:
|
||||||
* - missing: Which value to represent missing value.
|
* - missing: Which value to represent missing value.
|
||||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
* - nthread (optional): Number of threads used for initializing DMatrix.
|
||||||
|
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
|
||||||
* \param out created dmatrix
|
* \param out created dmatrix
|
||||||
* \return 0 when success, -1 when failure happens
|
* \return 0 when success, -1 when failure happens
|
||||||
*/
|
*/
|
||||||
@ -552,24 +555,6 @@ XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr,
|
|||||||
|
|
||||||
/** @} */ // End of Streaming
|
/** @} */ // End of Streaming
|
||||||
|
|
||||||
XGB_DLL int XGImportArrowRecordBatch(DataIterHandle data_handle, void *ptr_array, void *ptr_schema);
|
|
||||||
|
|
||||||
/*!
|
|
||||||
* \brief Construct DMatrix from arrow using callbacks. Arrow related C API is not stable
|
|
||||||
* and subject to change in the future.
|
|
||||||
*
|
|
||||||
* \param next Callback function for fetching arrow records.
|
|
||||||
* \param config JSON encoded configuration. Required values are:
|
|
||||||
* - missing: Which value to represent missing value.
|
|
||||||
* - nbatch: Number of batches in arrow table.
|
|
||||||
* - nthread (optional): Number of threads used for initializing DMatrix.
|
|
||||||
* \param out The created DMatrix.
|
|
||||||
*
|
|
||||||
* \return 0 when success, -1 when failure happens
|
|
||||||
*/
|
|
||||||
XGB_DLL int XGDMatrixCreateFromArrowCallback(XGDMatrixCallbackNext *next, char const *config,
|
|
||||||
DMatrixHandle *out);
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief create a new dmatrix from sliced content of existing matrix
|
* \brief create a new dmatrix from sliced content of existing matrix
|
||||||
* \param handle instance of data matrix to be sliced
|
* \param handle instance of data matrix to be sliced
|
||||||
@ -808,6 +793,16 @@ XGB_DLL int XGDMatrixNumCol(DMatrixHandle handle, bst_ulong *out);
|
|||||||
*/
|
*/
|
||||||
XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
|
XGB_DLL int XGDMatrixNumNonMissing(DMatrixHandle handle, bst_ulong *out);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Get the data split mode from DMatrix.
|
||||||
|
*
|
||||||
|
* \param handle the handle to the DMatrix
|
||||||
|
* \param out The output of the data split mode
|
||||||
|
*
|
||||||
|
* \return 0 when success, -1 when failure happens
|
||||||
|
*/
|
||||||
|
XGB_DLL int XGDMatrixDataSplitMode(DMatrixHandle handle, bst_ulong *out);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Get the predictors from DMatrix as CSR matrix for testing. If this is a
|
* \brief Get the predictors from DMatrix as CSR matrix for testing. If this is a
|
||||||
* quantized DMatrix, quantized values are returned instead.
|
* quantized DMatrix, quantized values are returned instead.
|
||||||
@ -1276,15 +1271,6 @@ XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle,
|
|||||||
XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *config, bst_ulong *out_len,
|
XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *config, bst_ulong *out_len,
|
||||||
char const **out_dptr);
|
char const **out_dptr);
|
||||||
|
|
||||||
/*!
|
|
||||||
* \brief Save booster to a buffer with in binary format.
|
|
||||||
*
|
|
||||||
* \deprecated since 1.6.0
|
|
||||||
* \see XGBoosterSaveModelToBuffer()
|
|
||||||
*/
|
|
||||||
XGB_DLL int XGBoosterGetModelRaw(BoosterHandle handle, bst_ulong *out_len,
|
|
||||||
const char **out_dptr);
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief Memory snapshot based serialization method. Saves everything states
|
* \brief Memory snapshot based serialization method. Saves everything states
|
||||||
* into buffer.
|
* into buffer.
|
||||||
@ -1308,24 +1294,6 @@ XGB_DLL int XGBoosterSerializeToBuffer(BoosterHandle handle, bst_ulong *out_len,
|
|||||||
XGB_DLL int XGBoosterUnserializeFromBuffer(BoosterHandle handle,
|
XGB_DLL int XGBoosterUnserializeFromBuffer(BoosterHandle handle,
|
||||||
const void *buf, bst_ulong len);
|
const void *buf, bst_ulong len);
|
||||||
|
|
||||||
/*!
|
|
||||||
* \brief Initialize the booster from rabit checkpoint.
|
|
||||||
* This is used in distributed training API.
|
|
||||||
* \param handle handle
|
|
||||||
* \param version The output version of the model.
|
|
||||||
* \return 0 when success, -1 when failure happens
|
|
||||||
*/
|
|
||||||
XGB_DLL int XGBoosterLoadRabitCheckpoint(BoosterHandle handle,
|
|
||||||
int* version);
|
|
||||||
|
|
||||||
/*!
|
|
||||||
* \brief Save the current checkpoint to rabit.
|
|
||||||
* \param handle handle
|
|
||||||
* \return 0 when success, -1 when failure happens
|
|
||||||
*/
|
|
||||||
XGB_DLL int XGBoosterSaveRabitCheckpoint(BoosterHandle handle);
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief Save XGBoost's internal configuration into a JSON document. Currently the
|
* \brief Save XGBoost's internal configuration into a JSON document. Currently the
|
||||||
* support is experimental, function signature may change in the future without
|
* support is experimental, function signature may change in the future without
|
||||||
@ -1554,29 +1522,19 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *config,
|
|||||||
* \param config JSON encoded configuration. Accepted JSON keys are:
|
* \param config JSON encoded configuration. Accepted JSON keys are:
|
||||||
* - xgboost_communicator: The type of the communicator. Can be set as an environment variable.
|
* - xgboost_communicator: The type of the communicator. Can be set as an environment variable.
|
||||||
* * rabit: Use Rabit. This is the default if the type is unspecified.
|
* * rabit: Use Rabit. This is the default if the type is unspecified.
|
||||||
* * mpi: Use MPI.
|
|
||||||
* * federated: Use the gRPC interface for Federated Learning.
|
* * federated: Use the gRPC interface for Federated Learning.
|
||||||
* Only applicable to the Rabit communicator (these are case-sensitive):
|
* Only applicable to the Rabit communicator (these are case-sensitive):
|
||||||
* - rabit_tracker_uri: Hostname of the tracker.
|
* - rabit_tracker_uri: Hostname of the tracker.
|
||||||
* - rabit_tracker_port: Port number of the tracker.
|
* - rabit_tracker_port: Port number of the tracker.
|
||||||
* - rabit_task_id: ID of the current task, can be used to obtain deterministic rank assignment.
|
* - rabit_task_id: ID of the current task, can be used to obtain deterministic rank assignment.
|
||||||
* - rabit_world_size: Total number of workers.
|
* - rabit_world_size: Total number of workers.
|
||||||
* - rabit_hadoop_mode: Enable Hadoop support.
|
|
||||||
* - rabit_tree_reduce_minsize: Minimal size for tree reduce.
|
|
||||||
* - rabit_reduce_ring_mincount: Minimal count to perform ring reduce.
|
|
||||||
* - rabit_reduce_buffer: Size of the reduce buffer.
|
|
||||||
* - rabit_bootstrap_cache: Size of the bootstrap cache.
|
|
||||||
* - rabit_debug: Enable debugging.
|
|
||||||
* - rabit_timeout: Enable timeout.
|
* - rabit_timeout: Enable timeout.
|
||||||
* - rabit_timeout_sec: Timeout in seconds.
|
* - rabit_timeout_sec: Timeout in seconds.
|
||||||
* - rabit_enable_tcp_no_delay: Enable TCP no delay on Unix platforms.
|
|
||||||
* Only applicable to the Rabit communicator (these are case-sensitive, and can be set as
|
* Only applicable to the Rabit communicator (these are case-sensitive, and can be set as
|
||||||
* environment variables):
|
* environment variables):
|
||||||
* - DMLC_TRACKER_URI: Hostname of the tracker.
|
* - DMLC_TRACKER_URI: Hostname of the tracker.
|
||||||
* - DMLC_TRACKER_PORT: Port number of the tracker.
|
* - DMLC_TRACKER_PORT: Port number of the tracker.
|
||||||
* - DMLC_TASK_ID: ID of the current task, can be used to obtain deterministic rank assignment.
|
* - DMLC_TASK_ID: ID of the current task, can be used to obtain deterministic rank assignment.
|
||||||
* - DMLC_ROLE: Role of the current task, "worker" or "server".
|
|
||||||
* - DMLC_NUM_ATTEMPT: Number of attempts after task failure.
|
|
||||||
* - DMLC_WORKER_CONNECT_RETRY: Number of retries to connect to the tracker.
|
* - DMLC_WORKER_CONNECT_RETRY: Number of retries to connect to the tracker.
|
||||||
* Only applicable to the Federated communicator (use upper case for environment variables, use
|
* Only applicable to the Federated communicator (use upper case for environment variables, use
|
||||||
* lower case for runtime configuration):
|
* lower case for runtime configuration):
|
||||||
|
|||||||
@ -157,4 +157,13 @@ struct Result {
|
|||||||
[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc, Result&& prev) {
|
[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc, Result&& prev) {
|
||||||
return Result{std::move(msg), std::move(errc), std::forward<Result>(prev)};
|
return Result{std::move(msg), std::move(errc), std::forward<Result>(prev)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We don't have monad, a simple helper would do.
|
||||||
|
template <typename Fn>
|
||||||
|
Result operator<<(Result&& r, Fn&& fn) {
|
||||||
|
if (!r.OK()) {
|
||||||
|
return std::forward<Result>(r);
|
||||||
|
}
|
||||||
|
return fn();
|
||||||
|
}
|
||||||
} // namespace xgboost::collective
|
} // namespace xgboost::collective
|
||||||
|
|||||||
@ -215,9 +215,9 @@ class SockAddrV4 {
|
|||||||
static SockAddrV4 Loopback();
|
static SockAddrV4 Loopback();
|
||||||
static SockAddrV4 InaddrAny();
|
static SockAddrV4 InaddrAny();
|
||||||
|
|
||||||
in_port_t Port() const { return ntohs(addr_.sin_port); }
|
[[nodiscard]] in_port_t Port() const { return ntohs(addr_.sin_port); }
|
||||||
|
|
||||||
std::string Addr() const {
|
[[nodiscard]] std::string Addr() const {
|
||||||
char buf[INET_ADDRSTRLEN];
|
char buf[INET_ADDRSTRLEN];
|
||||||
auto const *s = system::inet_ntop(static_cast<std::int32_t>(SockDomain::kV4), &addr_.sin_addr,
|
auto const *s = system::inet_ntop(static_cast<std::int32_t>(SockDomain::kV4), &addr_.sin_addr,
|
||||||
buf, INET_ADDRSTRLEN);
|
buf, INET_ADDRSTRLEN);
|
||||||
@ -226,7 +226,7 @@ class SockAddrV4 {
|
|||||||
}
|
}
|
||||||
return {buf};
|
return {buf};
|
||||||
}
|
}
|
||||||
sockaddr_in const &Handle() const { return addr_; }
|
[[nodiscard]] sockaddr_in const &Handle() const { return addr_; }
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -243,13 +243,13 @@ class SockAddress {
|
|||||||
explicit SockAddress(SockAddrV6 const &addr) : v6_{addr}, domain_{SockDomain::kV6} {}
|
explicit SockAddress(SockAddrV6 const &addr) : v6_{addr}, domain_{SockDomain::kV6} {}
|
||||||
explicit SockAddress(SockAddrV4 const &addr) : v4_{addr} {}
|
explicit SockAddress(SockAddrV4 const &addr) : v4_{addr} {}
|
||||||
|
|
||||||
auto Domain() const { return domain_; }
|
[[nodiscard]] auto Domain() const { return domain_; }
|
||||||
|
|
||||||
bool IsV4() const { return Domain() == SockDomain::kV4; }
|
[[nodiscard]] bool IsV4() const { return Domain() == SockDomain::kV4; }
|
||||||
bool IsV6() const { return !IsV4(); }
|
[[nodiscard]] bool IsV6() const { return !IsV4(); }
|
||||||
|
|
||||||
auto const &V4() const { return v4_; }
|
[[nodiscard]] auto const &V4() const { return v4_; }
|
||||||
auto const &V6() const { return v6_; }
|
[[nodiscard]] auto const &V6() const { return v6_; }
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -261,6 +261,7 @@ class TCPSocket {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
HandleT handle_{InvalidSocket()};
|
HandleT handle_{InvalidSocket()};
|
||||||
|
bool non_blocking_{false};
|
||||||
// There's reliable no way to extract domain from a socket without first binding that
|
// There's reliable no way to extract domain from a socket without first binding that
|
||||||
// socket on macos.
|
// socket on macos.
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
@ -276,7 +277,7 @@ class TCPSocket {
|
|||||||
/**
|
/**
|
||||||
* \brief Return the socket domain.
|
* \brief Return the socket domain.
|
||||||
*/
|
*/
|
||||||
auto Domain() const -> SockDomain {
|
[[nodiscard]] auto Domain() const -> SockDomain {
|
||||||
auto ret_iafamily = [](std::int32_t domain) {
|
auto ret_iafamily = [](std::int32_t domain) {
|
||||||
switch (domain) {
|
switch (domain) {
|
||||||
case AF_INET:
|
case AF_INET:
|
||||||
@ -321,10 +322,10 @@ class TCPSocket {
|
|||||||
#endif // platforms
|
#endif // platforms
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsClosed() const { return handle_ == InvalidSocket(); }
|
[[nodiscard]] bool IsClosed() const { return handle_ == InvalidSocket(); }
|
||||||
|
|
||||||
/** \brief get last error code if any */
|
/** @brief get last error code if any */
|
||||||
Result GetSockError() const {
|
[[nodiscard]] Result GetSockError() const {
|
||||||
std::int32_t optval = 0;
|
std::int32_t optval = 0;
|
||||||
socklen_t len = sizeof(optval);
|
socklen_t len = sizeof(optval);
|
||||||
auto ret = getsockopt(handle_, SOL_SOCKET, SO_ERROR, reinterpret_cast<char *>(&optval), &len);
|
auto ret = getsockopt(handle_, SOL_SOCKET, SO_ERROR, reinterpret_cast<char *>(&optval), &len);
|
||||||
@ -340,7 +341,7 @@ class TCPSocket {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** \brief check if anything bad happens */
|
/** \brief check if anything bad happens */
|
||||||
bool BadSocket() const {
|
[[nodiscard]] bool BadSocket() const {
|
||||||
if (IsClosed()) {
|
if (IsClosed()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -352,24 +353,63 @@ class TCPSocket {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetNonBlock(bool non_block) {
|
[[nodiscard]] Result NonBlocking(bool non_block) {
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
u_long mode = non_block ? 1 : 0;
|
u_long mode = non_block ? 1 : 0;
|
||||||
xgboost_CHECK_SYS_CALL(ioctlsocket(handle_, FIONBIO, &mode), NO_ERROR);
|
if (ioctlsocket(handle_, FIONBIO, &mode) != NO_ERROR) {
|
||||||
|
return system::FailWithCode("Failed to set socket to non-blocking.");
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
std::int32_t flag = fcntl(handle_, F_GETFL, 0);
|
std::int32_t flag = fcntl(handle_, F_GETFL, 0);
|
||||||
if (flag == -1) {
|
auto rc = flag;
|
||||||
system::ThrowAtError("fcntl");
|
if (rc == -1) {
|
||||||
|
return system::FailWithCode("Failed to get socket flag.");
|
||||||
}
|
}
|
||||||
if (non_block) {
|
if (non_block) {
|
||||||
flag |= O_NONBLOCK;
|
flag |= O_NONBLOCK;
|
||||||
} else {
|
} else {
|
||||||
flag &= ~O_NONBLOCK;
|
flag &= ~O_NONBLOCK;
|
||||||
}
|
}
|
||||||
if (fcntl(handle_, F_SETFL, flag) == -1) {
|
rc = fcntl(handle_, F_SETFL, flag);
|
||||||
system::ThrowAtError("fcntl");
|
if (rc == -1) {
|
||||||
|
return system::FailWithCode("Failed to set socket to non-blocking.");
|
||||||
}
|
}
|
||||||
#endif // _WIN32
|
#endif // _WIN32
|
||||||
|
non_blocking_ = non_block;
|
||||||
|
return Success();
|
||||||
|
}
|
||||||
|
[[nodiscard]] bool NonBlocking() const { return non_blocking_; }
|
||||||
|
[[nodiscard]] Result RecvTimeout(std::chrono::seconds timeout) {
|
||||||
|
// https://stackoverflow.com/questions/2876024/linux-is-there-a-read-or-recv-from-socket-with-timeout
|
||||||
|
#if defined(_WIN32)
|
||||||
|
DWORD tv = timeout.count() * 1000;
|
||||||
|
auto rc =
|
||||||
|
setsockopt(Handle(), SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<char *>(&tv), sizeof(tv));
|
||||||
|
#else
|
||||||
|
struct timeval tv;
|
||||||
|
tv.tv_sec = timeout.count();
|
||||||
|
tv.tv_usec = 0;
|
||||||
|
auto rc = setsockopt(Handle(), SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast<char const *>(&tv),
|
||||||
|
sizeof(tv));
|
||||||
|
#endif
|
||||||
|
if (rc != 0) {
|
||||||
|
return system::FailWithCode("Failed to set timeout on recv.");
|
||||||
|
}
|
||||||
|
return Success();
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] Result SetBufSize(std::int32_t n_bytes) {
|
||||||
|
auto rc = setsockopt(this->Handle(), SOL_SOCKET, SO_SNDBUF, reinterpret_cast<char *>(&n_bytes),
|
||||||
|
sizeof(n_bytes));
|
||||||
|
if (rc != 0) {
|
||||||
|
return system::FailWithCode("Failed to set send buffer size.");
|
||||||
|
}
|
||||||
|
rc = setsockopt(this->Handle(), SOL_SOCKET, SO_RCVBUF, reinterpret_cast<char *>(&n_bytes),
|
||||||
|
sizeof(n_bytes));
|
||||||
|
if (rc != 0) {
|
||||||
|
return system::FailWithCode("Failed to set recv buffer size.");
|
||||||
|
}
|
||||||
|
return Success();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetKeepAlive() {
|
void SetKeepAlive() {
|
||||||
@ -391,14 +431,31 @@ class TCPSocket {
|
|||||||
* \brief Accept new connection, returns a new TCP socket for the new connection.
|
* \brief Accept new connection, returns a new TCP socket for the new connection.
|
||||||
*/
|
*/
|
||||||
TCPSocket Accept() {
|
TCPSocket Accept() {
|
||||||
HandleT newfd = accept(handle_, nullptr, nullptr);
|
HandleT newfd = accept(Handle(), nullptr, nullptr);
|
||||||
if (newfd == InvalidSocket()) {
|
#if defined(_WIN32)
|
||||||
|
auto interrupt = WSAEINTR;
|
||||||
|
#else
|
||||||
|
auto interrupt = EINTR;
|
||||||
|
#endif
|
||||||
|
if (newfd == InvalidSocket() && system::LastError() != interrupt) {
|
||||||
system::ThrowAtError("accept");
|
system::ThrowAtError("accept");
|
||||||
}
|
}
|
||||||
TCPSocket newsock{newfd};
|
TCPSocket newsock{newfd};
|
||||||
return newsock;
|
return newsock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] Result Accept(TCPSocket *out, SockAddrV4 *addr) {
|
||||||
|
struct sockaddr_in caddr;
|
||||||
|
socklen_t caddr_len = sizeof(caddr);
|
||||||
|
HandleT newfd = accept(Handle(), reinterpret_cast<sockaddr *>(&caddr), &caddr_len);
|
||||||
|
if (newfd == InvalidSocket()) {
|
||||||
|
return system::FailWithCode("Failed to accept.");
|
||||||
|
}
|
||||||
|
*addr = SockAddrV4{caddr};
|
||||||
|
*out = TCPSocket{newfd};
|
||||||
|
return Success();
|
||||||
|
}
|
||||||
|
|
||||||
~TCPSocket() {
|
~TCPSocket() {
|
||||||
if (!IsClosed()) {
|
if (!IsClosed()) {
|
||||||
Close();
|
Close();
|
||||||
@ -413,9 +470,9 @@ class TCPSocket {
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* \brief Return the native socket file descriptor.
|
* @brief Return the native socket file descriptor.
|
||||||
*/
|
*/
|
||||||
HandleT const &Handle() const { return handle_; }
|
[[nodiscard]] HandleT const &Handle() const { return handle_; }
|
||||||
/**
|
/**
|
||||||
* \brief Listen to incoming requests. Should be called after bind.
|
* \brief Listen to incoming requests. Should be called after bind.
|
||||||
*/
|
*/
|
||||||
@ -423,7 +480,7 @@ class TCPSocket {
|
|||||||
/**
|
/**
|
||||||
* \brief Bind socket to INADDR_ANY, return the port selected by the OS.
|
* \brief Bind socket to INADDR_ANY, return the port selected by the OS.
|
||||||
*/
|
*/
|
||||||
in_port_t BindHost() {
|
[[nodiscard]] in_port_t BindHost() {
|
||||||
if (Domain() == SockDomain::kV6) {
|
if (Domain() == SockDomain::kV6) {
|
||||||
auto addr = SockAddrV6::InaddrAny();
|
auto addr = SockAddrV6::InaddrAny();
|
||||||
auto handle = reinterpret_cast<sockaddr const *>(&addr.Handle());
|
auto handle = reinterpret_cast<sockaddr const *>(&addr.Handle());
|
||||||
@ -448,10 +505,53 @@ class TCPSocket {
|
|||||||
return ntohs(res_addr.sin_port);
|
return ntohs(res_addr.sin_port);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] auto Port() const {
|
||||||
|
if (this->Domain() == SockDomain::kV4) {
|
||||||
|
sockaddr_in res_addr;
|
||||||
|
socklen_t addrlen = sizeof(res_addr);
|
||||||
|
auto code = getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen);
|
||||||
|
if (code != 0) {
|
||||||
|
return std::make_pair(system::FailWithCode("getsockname"), std::int32_t{0});
|
||||||
|
}
|
||||||
|
return std::make_pair(Success(), std::int32_t{ntohs(res_addr.sin_port)});
|
||||||
|
} else {
|
||||||
|
sockaddr_in6 res_addr;
|
||||||
|
socklen_t addrlen = sizeof(res_addr);
|
||||||
|
auto code = getsockname(handle_, reinterpret_cast<sockaddr *>(&res_addr), &addrlen);
|
||||||
|
if (code != 0) {
|
||||||
|
return std::make_pair(system::FailWithCode("getsockname"), std::int32_t{0});
|
||||||
|
}
|
||||||
|
return std::make_pair(Success(), std::int32_t{ntohs(res_addr.sin6_port)});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] Result Bind(StringView ip, std::int32_t *port) {
|
||||||
|
// bind socket handle_ to ip
|
||||||
|
auto addr = MakeSockAddress(ip, 0);
|
||||||
|
std::int32_t errc{0};
|
||||||
|
if (addr.IsV4()) {
|
||||||
|
auto handle = reinterpret_cast<sockaddr const *>(&addr.V4().Handle());
|
||||||
|
errc = bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.V4().Handle())>));
|
||||||
|
} else {
|
||||||
|
auto handle = reinterpret_cast<sockaddr const *>(&addr.V6().Handle());
|
||||||
|
errc = bind(handle_, handle, sizeof(std::remove_reference_t<decltype(addr.V6().Handle())>));
|
||||||
|
}
|
||||||
|
if (errc != 0) {
|
||||||
|
return system::FailWithCode("Failed to bind socket.");
|
||||||
|
}
|
||||||
|
auto [rc, new_port] = this->Port();
|
||||||
|
if (!rc.OK()) {
|
||||||
|
return std::move(rc);
|
||||||
|
}
|
||||||
|
*port = new_port;
|
||||||
|
return Success();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Send data, without error then all data should be sent.
|
* \brief Send data, without error then all data should be sent.
|
||||||
*/
|
*/
|
||||||
auto SendAll(void const *buf, std::size_t len) {
|
[[nodiscard]] auto SendAll(void const *buf, std::size_t len) {
|
||||||
char const *_buf = reinterpret_cast<const char *>(buf);
|
char const *_buf = reinterpret_cast<const char *>(buf);
|
||||||
std::size_t ndone = 0;
|
std::size_t ndone = 0;
|
||||||
while (ndone < len) {
|
while (ndone < len) {
|
||||||
@ -470,7 +570,7 @@ class TCPSocket {
|
|||||||
/**
|
/**
|
||||||
* \brief Receive data, without error then all data should be received.
|
* \brief Receive data, without error then all data should be received.
|
||||||
*/
|
*/
|
||||||
auto RecvAll(void *buf, std::size_t len) {
|
[[nodiscard]] auto RecvAll(void *buf, std::size_t len) {
|
||||||
char *_buf = reinterpret_cast<char *>(buf);
|
char *_buf = reinterpret_cast<char *>(buf);
|
||||||
std::size_t ndone = 0;
|
std::size_t ndone = 0;
|
||||||
while (ndone < len) {
|
while (ndone < len) {
|
||||||
@ -524,7 +624,15 @@ class TCPSocket {
|
|||||||
*/
|
*/
|
||||||
void Close() {
|
void Close() {
|
||||||
if (InvalidSocket() != handle_) {
|
if (InvalidSocket() != handle_) {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
auto rc = system::CloseSocket(handle_);
|
||||||
|
// it's possible that we close TCP sockets after finalizing WSA due to detached thread.
|
||||||
|
if (rc != 0 && system::LastError() != WSANOTINITIALISED) {
|
||||||
|
system::ThrowAtError("close", rc);
|
||||||
|
}
|
||||||
|
#else
|
||||||
xgboost_CHECK_SYS_CALL(system::CloseSocket(handle_), 0);
|
xgboost_CHECK_SYS_CALL(system::CloseSocket(handle_), 0);
|
||||||
|
#endif
|
||||||
handle_ = InvalidSocket();
|
handle_ = InvalidSocket();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -546,6 +654,24 @@ class TCPSocket {
|
|||||||
socket.domain_ = domain;
|
socket.domain_ = domain;
|
||||||
#endif // defined(__APPLE__)
|
#endif // defined(__APPLE__)
|
||||||
return socket;
|
return socket;
|
||||||
|
#endif // defined(xgboost_IS_MINGW)
|
||||||
|
}
|
||||||
|
|
||||||
|
static TCPSocket *CreatePtr(SockDomain domain) {
|
||||||
|
#if defined(xgboost_IS_MINGW)
|
||||||
|
MingWError();
|
||||||
|
return nullptr;
|
||||||
|
#else
|
||||||
|
auto fd = socket(static_cast<std::int32_t>(domain), SOCK_STREAM, 0);
|
||||||
|
if (fd == InvalidSocket()) {
|
||||||
|
system::ThrowAtError("socket");
|
||||||
|
}
|
||||||
|
auto socket = new TCPSocket{fd};
|
||||||
|
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
socket->domain_ = domain;
|
||||||
|
#endif // defined(__APPLE__)
|
||||||
|
return socket;
|
||||||
#endif // defined(xgboost_IS_MINGW)
|
#endif // defined(xgboost_IS_MINGW)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -567,12 +693,36 @@ class TCPSocket {
|
|||||||
xgboost::collective::TCPSocket *out_conn);
|
xgboost::collective::TCPSocket *out_conn);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Get the local host name.
|
* @brief Get the local host name.
|
||||||
*/
|
*/
|
||||||
inline std::string GetHostName() {
|
[[nodiscard]] Result GetHostName(std::string *p_out);
|
||||||
char buf[HOST_NAME_MAX];
|
|
||||||
xgboost_CHECK_SYS_CALL(gethostname(&buf[0], HOST_NAME_MAX), 0);
|
/**
|
||||||
return buf;
|
* @brief inet_ntop
|
||||||
|
*/
|
||||||
|
template <typename H>
|
||||||
|
Result INetNToP(H const &host, std::string *p_out) {
|
||||||
|
std::string &ip = *p_out;
|
||||||
|
switch (host->h_addrtype) {
|
||||||
|
case AF_INET: {
|
||||||
|
auto addr = reinterpret_cast<struct in_addr *>(host->h_addr_list[0]);
|
||||||
|
char str[INET_ADDRSTRLEN];
|
||||||
|
inet_ntop(AF_INET, addr, str, INET_ADDRSTRLEN);
|
||||||
|
ip = str;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AF_INET6: {
|
||||||
|
auto addr = reinterpret_cast<struct in6_addr *>(host->h_addr_list[0]);
|
||||||
|
char str[INET6_ADDRSTRLEN];
|
||||||
|
inet_ntop(AF_INET6, addr, str, INET6_ADDRSTRLEN);
|
||||||
|
ip = str;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
return Fail("Invalid address type.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Success();
|
||||||
}
|
}
|
||||||
} // namespace collective
|
} // namespace collective
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -29,31 +29,37 @@ struct DeviceSym {
|
|||||||
* viewing types like `linalg::TensorView`.
|
* viewing types like `linalg::TensorView`.
|
||||||
*/
|
*/
|
||||||
struct DeviceOrd {
|
struct DeviceOrd {
|
||||||
|
// Constant representing the device ID of CPU.
|
||||||
|
static bst_d_ordinal_t constexpr CPUOrdinal() { return -1; }
|
||||||
|
static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
|
||||||
|
|
||||||
enum Type : std::int16_t { kCPU = 0, kCUDA = 1 } device{kCPU};
|
enum Type : std::int16_t { kCPU = 0, kCUDA = 1 } device{kCPU};
|
||||||
// CUDA device ordinal.
|
// CUDA device ordinal.
|
||||||
bst_d_ordinal_t ordinal{-1};
|
bst_d_ordinal_t ordinal{CPUOrdinal()};
|
||||||
|
|
||||||
[[nodiscard]] bool IsCUDA() const { return device == kCUDA; }
|
[[nodiscard]] bool IsCUDA() const { return device == kCUDA; }
|
||||||
[[nodiscard]] bool IsCPU() const { return device == kCPU; }
|
[[nodiscard]] bool IsCPU() const { return device == kCPU; }
|
||||||
|
|
||||||
DeviceOrd() = default;
|
constexpr DeviceOrd() = default;
|
||||||
constexpr DeviceOrd(Type type, bst_d_ordinal_t ord) : device{type}, ordinal{ord} {}
|
constexpr DeviceOrd(Type type, bst_d_ordinal_t ord) : device{type}, ordinal{ord} {}
|
||||||
|
|
||||||
DeviceOrd(DeviceOrd const& that) = default;
|
constexpr DeviceOrd(DeviceOrd const& that) = default;
|
||||||
DeviceOrd& operator=(DeviceOrd const& that) = default;
|
constexpr DeviceOrd& operator=(DeviceOrd const& that) = default;
|
||||||
DeviceOrd(DeviceOrd&& that) = default;
|
constexpr DeviceOrd(DeviceOrd&& that) = default;
|
||||||
DeviceOrd& operator=(DeviceOrd&& that) = default;
|
constexpr DeviceOrd& operator=(DeviceOrd&& that) = default;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Constructor for CPU.
|
* @brief Constructor for CPU.
|
||||||
*/
|
*/
|
||||||
[[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, -1}; }
|
[[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, CPUOrdinal()}; }
|
||||||
/**
|
/**
|
||||||
* @brief Constructor for CUDA device.
|
* @brief Constructor for CUDA device.
|
||||||
*
|
*
|
||||||
* @param ordinal CUDA device ordinal.
|
* @param ordinal CUDA device ordinal.
|
||||||
*/
|
*/
|
||||||
[[nodiscard]] static auto CUDA(bst_d_ordinal_t ordinal) { return DeviceOrd{kCUDA, ordinal}; }
|
[[nodiscard]] static constexpr auto CUDA(bst_d_ordinal_t ordinal) {
|
||||||
|
return DeviceOrd{kCUDA, ordinal};
|
||||||
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool operator==(DeviceOrd const& that) const {
|
[[nodiscard]] bool operator==(DeviceOrd const& that) const {
|
||||||
return device == that.device && ordinal == that.ordinal;
|
return device == that.device && ordinal == that.ordinal;
|
||||||
@ -78,25 +84,26 @@ struct DeviceOrd {
|
|||||||
|
|
||||||
static_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));
|
static_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& os, DeviceOrd ord);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Runtime context for XGBoost. Contains information like threads and device.
|
* @brief Runtime context for XGBoost. Contains information like threads and device.
|
||||||
*/
|
*/
|
||||||
struct Context : public XGBoostParameter<Context> {
|
struct Context : public XGBoostParameter<Context> {
|
||||||
private:
|
private:
|
||||||
|
// User interfacing parameter for device ordinal
|
||||||
std::string device{DeviceSym::CPU()}; // NOLINT
|
std::string device{DeviceSym::CPU()}; // NOLINT
|
||||||
// The device object for the current context. We are in the middle of replacing the
|
// The device ordinal set by user
|
||||||
// `gpu_id` with this device field.
|
|
||||||
DeviceOrd device_{DeviceOrd::CPU()};
|
DeviceOrd device_{DeviceOrd::CPU()};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Constant representing the device ID of CPU.
|
|
||||||
static bst_d_ordinal_t constexpr kCpuId = -1;
|
|
||||||
static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
|
|
||||||
static std::int64_t constexpr kDefaultSeed = 0;
|
static std::int64_t constexpr kDefaultSeed = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Context();
|
Context();
|
||||||
|
|
||||||
|
void Init(Args const& kwargs);
|
||||||
|
|
||||||
template <typename Container>
|
template <typename Container>
|
||||||
Args UpdateAllowUnknown(Container const& kwargs) {
|
Args UpdateAllowUnknown(Container const& kwargs) {
|
||||||
auto args = XGBoostParameter<Context>::UpdateAllowUnknown(kwargs);
|
auto args = XGBoostParameter<Context>::UpdateAllowUnknown(kwargs);
|
||||||
@ -104,7 +111,6 @@ struct Context : public XGBoostParameter<Context> {
|
|||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::int32_t gpu_id{kCpuId};
|
|
||||||
// The number of threads to use if OpenMP is enabled. If equals 0, use the system default.
|
// The number of threads to use if OpenMP is enabled. If equals 0, use the system default.
|
||||||
std::int32_t nthread{0}; // NOLINT
|
std::int32_t nthread{0}; // NOLINT
|
||||||
// stored random seed
|
// stored random seed
|
||||||
@ -116,7 +122,8 @@ struct Context : public XGBoostParameter<Context> {
|
|||||||
bool validate_parameters{false};
|
bool validate_parameters{false};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Configure the parameter `gpu_id'.
|
* @brief Configure the parameter `device'. Deprecated, will remove once `gpu_id` is
|
||||||
|
* removed.
|
||||||
*
|
*
|
||||||
* @param require_gpu Whether GPU is explicitly required by the user through other
|
* @param require_gpu Whether GPU is explicitly required by the user through other
|
||||||
* configurations.
|
* configurations.
|
||||||
@ -212,9 +219,7 @@ struct Context : public XGBoostParameter<Context> {
|
|||||||
private:
|
private:
|
||||||
void SetDeviceOrdinal(Args const& kwargs);
|
void SetDeviceOrdinal(Args const& kwargs);
|
||||||
Context& SetDevice(DeviceOrd d) {
|
Context& SetDevice(DeviceOrd d) {
|
||||||
this->device_ = d;
|
this->device = (this->device_ = d).Name();
|
||||||
this->gpu_id = d.ordinal; // this can be removed once we move away from `gpu_id`.
|
|
||||||
this->device = d.Name();
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -106,10 +106,10 @@ class MetaInfo {
|
|||||||
MetaInfo& operator=(MetaInfo&& that) = default;
|
MetaInfo& operator=(MetaInfo&& that) = default;
|
||||||
MetaInfo& operator=(MetaInfo const& that) = delete;
|
MetaInfo& operator=(MetaInfo const& that) = delete;
|
||||||
|
|
||||||
/*!
|
/**
|
||||||
* \brief Validate all metainfo.
|
* @brief Validate all metainfo.
|
||||||
*/
|
*/
|
||||||
void Validate(int32_t device) const;
|
void Validate(DeviceOrd device) const;
|
||||||
|
|
||||||
MetaInfo Slice(common::Span<int32_t const> ridxs) const;
|
MetaInfo Slice(common::Span<int32_t const> ridxs) const;
|
||||||
|
|
||||||
@ -559,8 +559,7 @@ class DMatrix {
|
|||||||
*
|
*
|
||||||
* \param uri The URI of input.
|
* \param uri The URI of input.
|
||||||
* \param silent Whether print information during loading.
|
* \param silent Whether print information during loading.
|
||||||
* \param data_split_mode In distributed mode, split the input according this mode; otherwise,
|
* \param data_split_mode Indicate how the data was split beforehand.
|
||||||
* it's just an indicator on how the input was split beforehand.
|
|
||||||
* \return The created DMatrix.
|
* \return The created DMatrix.
|
||||||
*/
|
*/
|
||||||
static DMatrix* Load(const std::string& uri, bool silent = true,
|
static DMatrix* Load(const std::string& uri, bool silent = true,
|
||||||
|
|||||||
@ -88,9 +88,9 @@ class HostDeviceVector {
|
|||||||
static_assert(std::is_standard_layout<T>::value, "HostDeviceVector admits only POD types");
|
static_assert(std::is_standard_layout<T>::value, "HostDeviceVector admits only POD types");
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit HostDeviceVector(size_t size = 0, T v = T(), int device = -1);
|
explicit HostDeviceVector(size_t size = 0, T v = T(), DeviceOrd device = DeviceOrd::CPU());
|
||||||
HostDeviceVector(std::initializer_list<T> init, int device = -1);
|
HostDeviceVector(std::initializer_list<T> init, DeviceOrd device = DeviceOrd::CPU());
|
||||||
explicit HostDeviceVector(const std::vector<T>& init, int device = -1);
|
explicit HostDeviceVector(const std::vector<T>& init, DeviceOrd device = DeviceOrd::CPU());
|
||||||
~HostDeviceVector();
|
~HostDeviceVector();
|
||||||
|
|
||||||
HostDeviceVector(const HostDeviceVector<T>&) = delete;
|
HostDeviceVector(const HostDeviceVector<T>&) = delete;
|
||||||
@ -99,17 +99,9 @@ class HostDeviceVector {
|
|||||||
HostDeviceVector<T>& operator=(const HostDeviceVector<T>&) = delete;
|
HostDeviceVector<T>& operator=(const HostDeviceVector<T>&) = delete;
|
||||||
HostDeviceVector<T>& operator=(HostDeviceVector<T>&&);
|
HostDeviceVector<T>& operator=(HostDeviceVector<T>&&);
|
||||||
|
|
||||||
bool Empty() const { return Size() == 0; }
|
[[nodiscard]] bool Empty() const { return Size() == 0; }
|
||||||
size_t Size() const;
|
[[nodiscard]] std::size_t Size() const;
|
||||||
int DeviceIdx() const;
|
[[nodiscard]] DeviceOrd Device() const;
|
||||||
DeviceOrd Device() const {
|
|
||||||
auto idx = this->DeviceIdx();
|
|
||||||
if (idx == DeviceOrd::CPU().ordinal) {
|
|
||||||
return DeviceOrd::CPU();
|
|
||||||
} else {
|
|
||||||
return DeviceOrd::CUDA(idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
common::Span<T> DeviceSpan();
|
common::Span<T> DeviceSpan();
|
||||||
common::Span<const T> ConstDeviceSpan() const;
|
common::Span<const T> ConstDeviceSpan() const;
|
||||||
common::Span<const T> DeviceSpan() const { return ConstDeviceSpan(); }
|
common::Span<const T> DeviceSpan() const { return ConstDeviceSpan(); }
|
||||||
@ -135,13 +127,12 @@ class HostDeviceVector {
|
|||||||
const std::vector<T>& ConstHostVector() const;
|
const std::vector<T>& ConstHostVector() const;
|
||||||
const std::vector<T>& HostVector() const {return ConstHostVector(); }
|
const std::vector<T>& HostVector() const {return ConstHostVector(); }
|
||||||
|
|
||||||
bool HostCanRead() const;
|
[[nodiscard]] bool HostCanRead() const;
|
||||||
bool HostCanWrite() const;
|
[[nodiscard]] bool HostCanWrite() const;
|
||||||
bool DeviceCanRead() const;
|
[[nodiscard]] bool DeviceCanRead() const;
|
||||||
bool DeviceCanWrite() const;
|
[[nodiscard]] bool DeviceCanWrite() const;
|
||||||
GPUAccess DeviceAccess() const;
|
[[nodiscard]] GPUAccess DeviceAccess() const;
|
||||||
|
|
||||||
void SetDevice(int device) const;
|
|
||||||
void SetDevice(DeviceOrd device) const;
|
void SetDevice(DeviceOrd device) const;
|
||||||
|
|
||||||
void Resize(size_t new_size, T v = T());
|
void Resize(size_t new_size, T v = T());
|
||||||
|
|||||||
@ -372,6 +372,19 @@ class Json {
|
|||||||
/*! \brief Use your own JsonWriter. */
|
/*! \brief Use your own JsonWriter. */
|
||||||
static void Dump(Json json, JsonWriter* writer);
|
static void Dump(Json json, JsonWriter* writer);
|
||||||
|
|
||||||
|
template <typename Container = std::string>
|
||||||
|
static Container Dump(Json json) {
|
||||||
|
if constexpr (std::is_same_v<Container, std::string>) {
|
||||||
|
std::string str;
|
||||||
|
Dump(json, &str);
|
||||||
|
return str;
|
||||||
|
} else {
|
||||||
|
std::vector<char> str;
|
||||||
|
Dump(json, &str);
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Json() = default;
|
Json() = default;
|
||||||
|
|
||||||
// number
|
// number
|
||||||
@ -595,44 +608,6 @@ using Boolean = JsonBoolean;
|
|||||||
using String = JsonString;
|
using String = JsonString;
|
||||||
using Null = JsonNull;
|
using Null = JsonNull;
|
||||||
|
|
||||||
// Utils tailored for XGBoost.
|
|
||||||
namespace detail {
|
|
||||||
template <typename Head>
|
|
||||||
bool TypeCheckImpl(Json const& value) {
|
|
||||||
return IsA<Head>(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Head, typename... JT>
|
|
||||||
std::enable_if_t<sizeof...(JT) != 0, bool> TypeCheckImpl(Json const& value) {
|
|
||||||
return IsA<Head>(value) || TypeCheckImpl<JT...>(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Head>
|
|
||||||
std::string TypeCheckError() {
|
|
||||||
return "`" + Head{}.TypeStr() + "`";
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Head, typename... JT>
|
|
||||||
std::enable_if_t<sizeof...(JT) != 0, std::string> TypeCheckError() {
|
|
||||||
return "`" + Head{}.TypeStr() + "`, " + TypeCheckError<JT...>();
|
|
||||||
}
|
|
||||||
} // namespace detail
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Type check for JSON-based parameters
|
|
||||||
*
|
|
||||||
* \tparam JT Expected JSON types.
|
|
||||||
* \param value Value to be checked.
|
|
||||||
*/
|
|
||||||
template <typename... JT>
|
|
||||||
void TypeCheck(Json const& value, StringView name) {
|
|
||||||
if (!detail::TypeCheckImpl<JT...>(value)) {
|
|
||||||
LOG(FATAL) << "Invalid type for: `" << name << "`, expecting one of the: {`"
|
|
||||||
<< detail::TypeCheckError<JT...>() << "}, got: `" << value.GetValue().TypeStr()
|
|
||||||
<< "`";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Convert XGBoost parameter to JSON object.
|
* \brief Convert XGBoost parameter to JSON object.
|
||||||
*
|
*
|
||||||
|
|||||||
@ -603,13 +603,13 @@ auto MakeTensorView(Context const *ctx, Order order, common::Span<T> data, S &&.
|
|||||||
|
|
||||||
template <typename T, typename... S>
|
template <typename T, typename... S>
|
||||||
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
|
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
|
||||||
auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
|
auto span = ctx->IsCUDA() ? data->DeviceSpan() : data->HostSpan();
|
||||||
return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
|
return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename... S>
|
template <typename T, typename... S>
|
||||||
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
|
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
|
||||||
auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
|
auto span = ctx->IsCUDA() ? data->ConstDeviceSpan() : data->ConstHostSpan();
|
||||||
return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
|
return MakeTensorView(ctx->Device(), span, std::forward<S>(shape)...);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -659,13 +659,13 @@ auto MakeVec(T *ptr, size_t s, DeviceOrd device = DeviceOrd::CPU()) {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
auto MakeVec(HostDeviceVector<T> *data) {
|
auto MakeVec(HostDeviceVector<T> *data) {
|
||||||
return MakeVec(data->DeviceIdx() == -1 ? data->HostPointer() : data->DevicePointer(),
|
return MakeVec(data->Device().IsCPU() ? data->HostPointer() : data->DevicePointer(), data->Size(),
|
||||||
data->Size(), data->Device());
|
data->Device());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
auto MakeVec(HostDeviceVector<T> const *data) {
|
auto MakeVec(HostDeviceVector<T> const *data) {
|
||||||
return MakeVec(data->DeviceIdx() == -1 ? data->ConstHostPointer() : data->ConstDevicePointer(),
|
return MakeVec(data->Device().IsCPU() ? data->ConstHostPointer() : data->ConstDevicePointer(),
|
||||||
data->Size(), data->Device());
|
data->Size(), data->Device());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -757,13 +757,13 @@ class Tensor {
|
|||||||
Order order_{Order::kC};
|
Order order_{Order::kC};
|
||||||
|
|
||||||
template <typename I, std::int32_t D>
|
template <typename I, std::int32_t D>
|
||||||
void Initialize(I const (&shape)[D], std::int32_t device) {
|
void Initialize(I const (&shape)[D], DeviceOrd device) {
|
||||||
static_assert(D <= kDim, "Invalid shape.");
|
static_assert(D <= kDim, "Invalid shape.");
|
||||||
std::copy(shape, shape + D, shape_);
|
std::copy(shape, shape + D, shape_);
|
||||||
for (auto i = D; i < kDim; ++i) {
|
for (auto i = D; i < kDim; ++i) {
|
||||||
shape_[i] = 1;
|
shape_[i] = 1;
|
||||||
}
|
}
|
||||||
if (device >= 0) {
|
if (device.IsCUDA()) {
|
||||||
data_.SetDevice(device);
|
data_.SetDevice(device);
|
||||||
data_.ConstDevicePointer(); // Pull to device;
|
data_.ConstDevicePointer(); // Pull to device;
|
||||||
}
|
}
|
||||||
@ -780,14 +780,11 @@ class Tensor {
|
|||||||
* See \ref TensorView for parameters of this constructor.
|
* See \ref TensorView for parameters of this constructor.
|
||||||
*/
|
*/
|
||||||
template <typename I, int32_t D>
|
template <typename I, int32_t D>
|
||||||
explicit Tensor(I const (&shape)[D], std::int32_t device, Order order = kC)
|
|
||||||
: Tensor{common::Span<I const, D>{shape}, device, order} {}
|
|
||||||
template <typename I, int32_t D>
|
|
||||||
explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
|
explicit Tensor(I const (&shape)[D], DeviceOrd device, Order order = kC)
|
||||||
: Tensor{common::Span<I const, D>{shape}, device.ordinal, order} {}
|
: Tensor{common::Span<I const, D>{shape}, device, order} {}
|
||||||
|
|
||||||
template <typename I, size_t D>
|
template <typename I, size_t D>
|
||||||
explicit Tensor(common::Span<I const, D> shape, std::int32_t device, Order order = kC)
|
explicit Tensor(common::Span<I const, D> shape, DeviceOrd device, Order order = kC)
|
||||||
: order_{order} {
|
: order_{order} {
|
||||||
// No device unroll as this is a host only function.
|
// No device unroll as this is a host only function.
|
||||||
std::copy(shape.data(), shape.data() + D, shape_);
|
std::copy(shape.data(), shape.data() + D, shape_);
|
||||||
@ -795,11 +792,11 @@ class Tensor {
|
|||||||
shape_[i] = 1;
|
shape_[i] = 1;
|
||||||
}
|
}
|
||||||
auto size = detail::CalcSize(shape_);
|
auto size = detail::CalcSize(shape_);
|
||||||
if (device >= 0) {
|
if (device.IsCUDA()) {
|
||||||
data_.SetDevice(device);
|
data_.SetDevice(device);
|
||||||
}
|
}
|
||||||
data_.Resize(size);
|
data_.Resize(size);
|
||||||
if (device >= 0) {
|
if (device.IsCUDA()) {
|
||||||
data_.DevicePointer(); // Pull to device
|
data_.DevicePointer(); // Pull to device
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -807,7 +804,7 @@ class Tensor {
|
|||||||
* Initialize from 2 host iterators.
|
* Initialize from 2 host iterators.
|
||||||
*/
|
*/
|
||||||
template <typename It, typename I, int32_t D>
|
template <typename It, typename I, int32_t D>
|
||||||
explicit Tensor(It begin, It end, I const (&shape)[D], std::int32_t device, Order order = kC)
|
explicit Tensor(It begin, It end, I const (&shape)[D], DeviceOrd device, Order order = kC)
|
||||||
: order_{order} {
|
: order_{order} {
|
||||||
auto &h_vec = data_.HostVector();
|
auto &h_vec = data_.HostVector();
|
||||||
h_vec.insert(h_vec.begin(), begin, end);
|
h_vec.insert(h_vec.begin(), begin, end);
|
||||||
@ -816,7 +813,7 @@ class Tensor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename I, int32_t D>
|
template <typename I, int32_t D>
|
||||||
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], std::int32_t device,
|
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
|
||||||
Order order = kC)
|
Order order = kC)
|
||||||
: order_{order} {
|
: order_{order} {
|
||||||
auto &h_vec = data_.HostVector();
|
auto &h_vec = data_.HostVector();
|
||||||
@ -824,10 +821,6 @@ class Tensor {
|
|||||||
// shape
|
// shape
|
||||||
this->Initialize(shape, device);
|
this->Initialize(shape, device);
|
||||||
}
|
}
|
||||||
template <typename I, int32_t D>
|
|
||||||
explicit Tensor(std::initializer_list<T> data, I const (&shape)[D], DeviceOrd device,
|
|
||||||
Order order = kC)
|
|
||||||
: Tensor{data, shape, device.ordinal, order} {}
|
|
||||||
/**
|
/**
|
||||||
* \brief Index operator. Not thread safe, should not be used in performance critical
|
* \brief Index operator. Not thread safe, should not be used in performance critical
|
||||||
* region. For more efficient indexing, consider getting a view first.
|
* region. For more efficient indexing, consider getting a view first.
|
||||||
@ -944,9 +937,7 @@ class Tensor {
|
|||||||
/**
|
/**
|
||||||
* \brief Set device ordinal for this tensor.
|
* \brief Set device ordinal for this tensor.
|
||||||
*/
|
*/
|
||||||
void SetDevice(int32_t device) const { data_.SetDevice(device); }
|
|
||||||
void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
|
void SetDevice(DeviceOrd device) const { data_.SetDevice(device); }
|
||||||
[[nodiscard]] int32_t DeviceIdx() const { return data_.DeviceIdx(); }
|
|
||||||
[[nodiscard]] DeviceOrd Device() const { return data_.Device(); }
|
[[nodiscard]] DeviceOrd Device() const { return data_.Device(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -962,7 +953,7 @@ using Vector = Tensor<T, 1>;
|
|||||||
template <typename T, typename... Index>
|
template <typename T, typename... Index>
|
||||||
auto Empty(Context const *ctx, Index &&...index) {
|
auto Empty(Context const *ctx, Index &&...index) {
|
||||||
Tensor<T, sizeof...(Index)> t;
|
Tensor<T, sizeof...(Index)> t;
|
||||||
t.SetDevice(ctx->gpu_id);
|
t.SetDevice(ctx->Device());
|
||||||
t.Reshape(index...);
|
t.Reshape(index...);
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
@ -973,7 +964,7 @@ auto Empty(Context const *ctx, Index &&...index) {
|
|||||||
template <typename T, typename... Index>
|
template <typename T, typename... Index>
|
||||||
auto Constant(Context const *ctx, T v, Index &&...index) {
|
auto Constant(Context const *ctx, T v, Index &&...index) {
|
||||||
Tensor<T, sizeof...(Index)> t;
|
Tensor<T, sizeof...(Index)> t;
|
||||||
t.SetDevice(ctx->gpu_id);
|
t.SetDevice(ctx->Device());
|
||||||
t.Reshape(index...);
|
t.Reshape(index...);
|
||||||
t.Data()->Fill(std::move(v));
|
t.Data()->Fill(std::move(v));
|
||||||
return t;
|
return t;
|
||||||
@ -990,8 +981,8 @@ auto Zeros(Context const *ctx, Index &&...index) {
|
|||||||
// Only first axis is supported for now.
|
// Only first axis is supported for now.
|
||||||
template <typename T, int32_t D>
|
template <typename T, int32_t D>
|
||||||
void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
|
void Stack(Tensor<T, D> *l, Tensor<T, D> const &r) {
|
||||||
if (r.DeviceIdx() >= 0) {
|
if (r.Device().IsCUDA()) {
|
||||||
l->SetDevice(r.DeviceIdx());
|
l->SetDevice(r.Device());
|
||||||
}
|
}
|
||||||
l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {
|
l->ModifyInplace([&](HostDeviceVector<T> *data, common::Span<size_t, D> shape) {
|
||||||
for (size_t i = 1; i < D; ++i) {
|
for (size_t i = 1; i < D; ++i) {
|
||||||
|
|||||||
@ -52,9 +52,9 @@ class PredictionContainer : public DMatrixCache<PredictionCacheEntry> {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
PredictionContainer() : DMatrixCache<PredictionCacheEntry>{DefaultSize()} {}
|
PredictionContainer() : DMatrixCache<PredictionCacheEntry>{DefaultSize()} {}
|
||||||
PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, std::int32_t device) {
|
PredictionCacheEntry& Cache(std::shared_ptr<DMatrix> m, DeviceOrd device) {
|
||||||
auto p_cache = this->CacheItem(m);
|
auto p_cache = this->CacheItem(m);
|
||||||
if (device != Context::kCpuId) {
|
if (device.IsCUDA()) {
|
||||||
p_cache->predictions.SetDevice(device);
|
p_cache->predictions.SetDevice(device);
|
||||||
}
|
}
|
||||||
return *p_cache;
|
return *p_cache;
|
||||||
|
|||||||
@ -29,7 +29,7 @@ struct StringView {
|
|||||||
public:
|
public:
|
||||||
constexpr StringView() = default;
|
constexpr StringView() = default;
|
||||||
constexpr StringView(CharT const* str, std::size_t size) : str_{str}, size_{size} {}
|
constexpr StringView(CharT const* str, std::size_t size) : str_{str}, size_{size} {}
|
||||||
explicit StringView(std::string const& str) : str_{str.c_str()}, size_{str.size()} {}
|
StringView(std::string const& str) : str_{str.c_str()}, size_{str.size()} {} // NOLINT
|
||||||
constexpr StringView(CharT const* str) // NOLINT
|
constexpr StringView(CharT const* str) // NOLINT
|
||||||
: str_{str}, size_{str == nullptr ? 0ul : Traits::length(str)} {}
|
: str_{str}, size_{str == nullptr ? 0ul : Traits::length(str)} {}
|
||||||
|
|
||||||
|
|||||||
@ -4,16 +4,16 @@ list(APPEND JVM_SOURCES
|
|||||||
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j.cpp
|
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j.cpp
|
||||||
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cpp)
|
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cpp)
|
||||||
|
|
||||||
if (USE_CUDA)
|
if(USE_CUDA)
|
||||||
list(APPEND JVM_SOURCES
|
list(APPEND JVM_SOURCES
|
||||||
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu)
|
${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu)
|
||||||
endif (USE_CUDA)
|
endif()
|
||||||
|
|
||||||
add_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES})
|
add_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES})
|
||||||
|
|
||||||
if (ENABLE_ALL_WARNINGS)
|
if(ENABLE_ALL_WARNINGS)
|
||||||
target_compile_options(xgboost4j PUBLIC -Wall -Wextra)
|
target_compile_options(xgboost4j PUBLIC -Wall -Wextra)
|
||||||
endif (ENABLE_ALL_WARNINGS)
|
endif()
|
||||||
|
|
||||||
target_link_libraries(xgboost4j PRIVATE objxgboost)
|
target_link_libraries(xgboost4j PRIVATE objxgboost)
|
||||||
target_include_directories(xgboost4j
|
target_include_directories(xgboost4j
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2014-2022 by Contributors
|
Copyright (c) 2014-2023 by Contributors
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
@ -32,57 +32,53 @@ class ExternalCheckpointManagerSuite extends AnyFunSuite with TmpFolderPerSuite
|
|||||||
}
|
}
|
||||||
|
|
||||||
private def createNewModels():
|
private def createNewModels():
|
||||||
(String, XGBoostClassificationModel, XGBoostClassificationModel) = {
|
(String, XGBoostClassificationModel, XGBoostClassificationModel) = {
|
||||||
val tmpPath = createTmpFolder("test").toAbsolutePath.toString
|
val tmpPath = createTmpFolder("test").toAbsolutePath.toString
|
||||||
val (model4, model8) = {
|
val (model2, model4) = {
|
||||||
val training = buildDataFrame(Classification.train)
|
val training = buildDataFrame(Classification.train)
|
||||||
val paramMap = produceParamMap(tmpPath, 2)
|
val paramMap = produceParamMap(tmpPath, 2)
|
||||||
(new XGBoostClassifier(paramMap ++ Seq("num_round" -> 2)).fit(training),
|
(new XGBoostClassifier(paramMap ++ Seq("num_round" -> 2)).fit(training),
|
||||||
new XGBoostClassifier(paramMap ++ Seq("num_round" -> 4)).fit(training))
|
new XGBoostClassifier(paramMap ++ Seq("num_round" -> 4)).fit(training))
|
||||||
}
|
}
|
||||||
(tmpPath, model4, model8)
|
(tmpPath, model2, model4)
|
||||||
}
|
}
|
||||||
|
|
||||||
test("test update/load models") {
|
test("test update/load models") {
|
||||||
val (tmpPath, model4, model8) = createNewModels()
|
val (tmpPath, model2, model4) = createNewModels()
|
||||||
val manager = new ExternalCheckpointManager(tmpPath, FileSystem.get(sc.hadoopConfiguration))
|
val manager = new ExternalCheckpointManager(tmpPath, FileSystem.get(sc.hadoopConfiguration))
|
||||||
|
|
||||||
manager.updateCheckpoint(model4._booster.booster)
|
manager.updateCheckpoint(model2._booster.booster)
|
||||||
var files = FileSystem.get(sc.hadoopConfiguration).listStatus(new Path(tmpPath))
|
var files = FileSystem.get(sc.hadoopConfiguration).listStatus(new Path(tmpPath))
|
||||||
assert(files.length == 1)
|
assert(files.length == 1)
|
||||||
assert(files.head.getPath.getName == "4.model")
|
assert(files.head.getPath.getName == "1.model")
|
||||||
assert(manager.loadCheckpointAsScalaBooster().getVersion == 4)
|
assert(manager.loadCheckpointAsScalaBooster().getNumBoostedRound == 2)
|
||||||
|
|
||||||
manager.updateCheckpoint(model8._booster)
|
manager.updateCheckpoint(model4._booster)
|
||||||
files = FileSystem.get(sc.hadoopConfiguration).listStatus(new Path(tmpPath))
|
files = FileSystem.get(sc.hadoopConfiguration).listStatus(new Path(tmpPath))
|
||||||
assert(files.length == 1)
|
assert(files.length == 1)
|
||||||
assert(files.head.getPath.getName == "8.model")
|
assert(files.head.getPath.getName == "3.model")
|
||||||
assert(manager.loadCheckpointAsScalaBooster().getVersion == 8)
|
assert(manager.loadCheckpointAsScalaBooster().getNumBoostedRound == 4)
|
||||||
}
|
}
|
||||||
|
|
||||||
test("test cleanUpHigherVersions") {
|
test("test cleanUpHigherVersions") {
|
||||||
val (tmpPath, model4, model8) = createNewModels()
|
val (tmpPath, model2, model4) = createNewModels()
|
||||||
|
|
||||||
val manager = new ExternalCheckpointManager(tmpPath, FileSystem.get(sc.hadoopConfiguration))
|
val manager = new ExternalCheckpointManager(tmpPath, FileSystem.get(sc.hadoopConfiguration))
|
||||||
manager.updateCheckpoint(model8._booster)
|
manager.updateCheckpoint(model4._booster)
|
||||||
manager.cleanUpHigherVersions(8)
|
manager.cleanUpHigherVersions(3)
|
||||||
assert(new File(s"$tmpPath/8.model").exists())
|
assert(new File(s"$tmpPath/3.model").exists())
|
||||||
|
|
||||||
manager.cleanUpHigherVersions(4)
|
manager.cleanUpHigherVersions(2)
|
||||||
assert(!new File(s"$tmpPath/8.model").exists())
|
assert(!new File(s"$tmpPath/3.model").exists())
|
||||||
}
|
}
|
||||||
|
|
||||||
test("test checkpoint rounds") {
|
test("test checkpoint rounds") {
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
val (tmpPath, model4, model8) = createNewModels()
|
val (tmpPath, model2, model4) = createNewModels()
|
||||||
val manager = new ExternalCheckpointManager(tmpPath, FileSystem.get(sc.hadoopConfiguration))
|
val manager = new ExternalCheckpointManager(tmpPath, FileSystem.get(sc.hadoopConfiguration))
|
||||||
assertResult(Seq(7))(
|
assertResult(Seq(2))(manager.getCheckpointRounds(0, 0, 3).asScala)
|
||||||
manager.getCheckpointRounds(0, 7).asScala)
|
assertResult(Seq(0, 2, 4, 6))(manager.getCheckpointRounds(0, 2, 7).asScala)
|
||||||
assertResult(Seq(2, 4, 6, 7))(
|
assertResult(Seq(0, 2, 4, 6, 7))(manager.getCheckpointRounds(0, 2, 8).asScala)
|
||||||
manager.getCheckpointRounds(2, 7).asScala)
|
|
||||||
manager.updateCheckpoint(model4._booster)
|
|
||||||
assertResult(Seq(4, 6, 7))(
|
|
||||||
manager.getCheckpointRounds(2, 7).asScala)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -109,8 +105,8 @@ class ExternalCheckpointManagerSuite extends AnyFunSuite with TmpFolderPerSuite
|
|||||||
// Check only one model is kept after training
|
// Check only one model is kept after training
|
||||||
val files = FileSystem.get(sc.hadoopConfiguration).listStatus(new Path(tmpPath))
|
val files = FileSystem.get(sc.hadoopConfiguration).listStatus(new Path(tmpPath))
|
||||||
assert(files.length == 1)
|
assert(files.length == 1)
|
||||||
assert(files.head.getPath.getName == "8.model")
|
assert(files.head.getPath.getName == "4.model")
|
||||||
val tmpModel = SXGBoost.loadModel(s"$tmpPath/8.model")
|
val tmpModel = SXGBoost.loadModel(s"$tmpPath/4.model")
|
||||||
// Train next model based on prev model
|
// Train next model based on prev model
|
||||||
val nextModel = new XGBoostClassifier(paramMap ++ Seq("num_round" -> 8)).fit(training)
|
val nextModel = new XGBoostClassifier(paramMap ++ Seq("num_round" -> 8)).fit(training)
|
||||||
assert(error(tmpModel) >= error(prevModel._booster))
|
assert(error(tmpModel) >= error(prevModel._booster))
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user