Compare commits
15 Commits
master-roc
...
release_1.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
00774eeac3 | ||
|
|
bcb15a980f | ||
|
|
0cd0dad0b5 | ||
|
|
884098ec22 | ||
|
|
738786680b | ||
|
|
04232c01b2 | ||
|
|
0353a78ab7 | ||
|
|
0089a0e6bf | ||
|
|
03a68a1714 | ||
|
|
a0da8a7e0a | ||
|
|
eee4eff49b | ||
|
|
936a854baa | ||
|
|
7856da5827 | ||
|
|
50a0def6c3 | ||
|
|
9116a0ec10 |
2
.github/workflows/main.yml
vendored
2
.github/workflows/main.yml
vendored
@ -81,7 +81,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
cd R-package
|
cd R-package
|
||||||
R.exe CMD INSTALL .
|
R.exe CMD INSTALL .
|
||||||
Rscript.exe tests/run_lint.R
|
Rscript.exe tests/helper_scripts/run_lint.R
|
||||||
|
|
||||||
|
|
||||||
test-with-R:
|
test-with-R:
|
||||||
|
|||||||
@ -1,9 +1,10 @@
|
|||||||
cmake_minimum_required(VERSION 3.13)
|
cmake_minimum_required(VERSION 3.13)
|
||||||
project(xgboost LANGUAGES CXX C VERSION 1.2.0)
|
project(xgboost LANGUAGES CXX C VERSION 1.2.1)
|
||||||
include(cmake/Utils.cmake)
|
include(cmake/Utils.cmake)
|
||||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||||
cmake_policy(SET CMP0022 NEW)
|
cmake_policy(SET CMP0022 NEW)
|
||||||
cmake_policy(SET CMP0079 NEW)
|
cmake_policy(SET CMP0079 NEW)
|
||||||
|
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
|
||||||
cmake_policy(SET CMP0063 NEW)
|
cmake_policy(SET CMP0063 NEW)
|
||||||
|
|
||||||
if ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
|
if ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
|
||||||
@ -173,9 +174,6 @@ foreach(lib rabit rabit_base rabit_empty rabit_mock rabit_mock_static)
|
|||||||
# from dmlc is correctly applied to rabit.
|
# from dmlc is correctly applied to rabit.
|
||||||
if (TARGET ${lib})
|
if (TARGET ${lib})
|
||||||
target_link_libraries(${lib} dmlc ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${lib} dmlc ${CMAKE_THREAD_LIBS_INIT})
|
||||||
if (HIDE_CXX_SYMBOLS) # Hide all C++ symbols from Rabit
|
|
||||||
set_target_properties(${lib} PROPERTIES CXX_VISIBILITY_PRESET hidden)
|
|
||||||
endif (HIDE_CXX_SYMBOLS)
|
|
||||||
if (ENABLE_ALL_WARNINGS)
|
if (ENABLE_ALL_WARNINGS)
|
||||||
target_compile_options(${lib} PRIVATE -Wall -Wextra)
|
target_compile_options(${lib} PRIVATE -Wall -Wextra)
|
||||||
endif (ENABLE_ALL_WARNINGS)
|
endif (ENABLE_ALL_WARNINGS)
|
||||||
@ -204,8 +202,9 @@ endif (USE_NVTX)
|
|||||||
|
|
||||||
#-- Hide all C++ symbols
|
#-- Hide all C++ symbols
|
||||||
if (HIDE_CXX_SYMBOLS)
|
if (HIDE_CXX_SYMBOLS)
|
||||||
set_target_properties(objxgboost PROPERTIES CXX_VISIBILITY_PRESET hidden)
|
foreach(target objxgboost xgboost dmlc rabit rabit_mock_static)
|
||||||
set_target_properties(xgboost PROPERTIES CXX_VISIBILITY_PRESET hidden)
|
set_target_properties(${target} PROPERTIES CXX_VISIBILITY_PRESET hidden)
|
||||||
|
endforeach()
|
||||||
endif (HIDE_CXX_SYMBOLS)
|
endif (HIDE_CXX_SYMBOLS)
|
||||||
|
|
||||||
target_include_directories(xgboost
|
target_include_directories(xgboost
|
||||||
|
|||||||
23
Jenkinsfile
vendored
23
Jenkinsfile
vendored
@ -92,7 +92,7 @@ pipeline {
|
|||||||
'test-python-gpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2') },
|
'test-python-gpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2') },
|
||||||
'test-python-gpu-cuda11.0-cross': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '11.0') },
|
'test-python-gpu-cuda11.0-cross': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '11.0') },
|
||||||
'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
|
'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
|
||||||
'test-python-mgpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2', multi_gpu: true) },
|
'test-python-mgpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.2', multi_gpu: true) },
|
||||||
'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') },
|
'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') },
|
||||||
'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
|
'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
|
||||||
'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') },
|
'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') },
|
||||||
@ -144,7 +144,7 @@ def ClangTidy() {
|
|||||||
echo "Running clang-tidy job..."
|
echo "Running clang-tidy job..."
|
||||||
def container_type = "clang_tidy"
|
def container_type = "clang_tidy"
|
||||||
def docker_binary = "docker"
|
def docker_binary = "docker"
|
||||||
def dockerArgs = "--build-arg CUDA_VERSION=10.1"
|
def dockerArgs = "--build-arg CUDA_VERSION_ARG=10.1"
|
||||||
sh """
|
sh """
|
||||||
${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py
|
${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py
|
||||||
"""
|
"""
|
||||||
@ -261,7 +261,7 @@ def BuildCUDA(args) {
|
|||||||
echo "Build with CUDA ${args.cuda_version}"
|
echo "Build with CUDA ${args.cuda_version}"
|
||||||
def container_type = GetCUDABuildContainerType(args.cuda_version)
|
def container_type = GetCUDABuildContainerType(args.cuda_version)
|
||||||
def docker_binary = "docker"
|
def docker_binary = "docker"
|
||||||
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
|
def docker_args = "--build-arg CUDA_VERSION_ARG=${args.cuda_version}"
|
||||||
def arch_flag = ""
|
def arch_flag = ""
|
||||||
if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
|
if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
|
||||||
arch_flag = "-DGPU_COMPUTE_VER=75"
|
arch_flag = "-DGPU_COMPUTE_VER=75"
|
||||||
@ -285,12 +285,12 @@ def BuildCUDA(args) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
def BuildJVMPackagesWithCUDA(args) {
|
def BuildJVMPackagesWithCUDA(args) {
|
||||||
node('linux && gpu') {
|
node('linux && mgpu') {
|
||||||
unstash name: 'srcs'
|
unstash name: 'srcs'
|
||||||
echo "Build XGBoost4J-Spark with Spark ${args.spark_version}, CUDA ${args.cuda_version}"
|
echo "Build XGBoost4J-Spark with Spark ${args.spark_version}, CUDA ${args.cuda_version}"
|
||||||
def container_type = "jvm_gpu_build"
|
def container_type = "jvm_gpu_build"
|
||||||
def docker_binary = "nvidia-docker"
|
def docker_binary = "nvidia-docker"
|
||||||
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
|
def docker_args = "--build-arg CUDA_VERSION_ARG=${args.cuda_version}"
|
||||||
def arch_flag = ""
|
def arch_flag = ""
|
||||||
if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
|
if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
|
||||||
arch_flag = "-DGPU_COMPUTE_VER=75"
|
arch_flag = "-DGPU_COMPUTE_VER=75"
|
||||||
@ -365,7 +365,7 @@ def TestPythonGPU(args) {
|
|||||||
echo "Test Python GPU: CUDA ${args.host_cuda_version}"
|
echo "Test Python GPU: CUDA ${args.host_cuda_version}"
|
||||||
def container_type = "gpu"
|
def container_type = "gpu"
|
||||||
def docker_binary = "nvidia-docker"
|
def docker_binary = "nvidia-docker"
|
||||||
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
|
def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
|
||||||
if (args.multi_gpu) {
|
if (args.multi_gpu) {
|
||||||
echo "Using multiple GPUs"
|
echo "Using multiple GPUs"
|
||||||
// Allocate extra space in /dev/shm to enable NCCL
|
// Allocate extra space in /dev/shm to enable NCCL
|
||||||
@ -406,7 +406,7 @@ def TestCppGPU(args) {
|
|||||||
echo "Test C++, CUDA ${args.host_cuda_version}"
|
echo "Test C++, CUDA ${args.host_cuda_version}"
|
||||||
def container_type = "gpu"
|
def container_type = "gpu"
|
||||||
def docker_binary = "nvidia-docker"
|
def docker_binary = "nvidia-docker"
|
||||||
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
|
def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
|
||||||
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost"
|
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost"
|
||||||
deleteDir()
|
deleteDir()
|
||||||
}
|
}
|
||||||
@ -424,7 +424,7 @@ def CrossTestJVMwithJDKGPU(args) {
|
|||||||
}
|
}
|
||||||
def container_type = "gpu_jvm"
|
def container_type = "gpu_jvm"
|
||||||
def docker_binary = "nvidia-docker"
|
def docker_binary = "nvidia-docker"
|
||||||
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
|
def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
|
||||||
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh"
|
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh"
|
||||||
deleteDir()
|
deleteDir()
|
||||||
}
|
}
|
||||||
@ -472,10 +472,11 @@ def DeployJVMPackages(args) {
|
|||||||
unstash name: 'srcs'
|
unstash name: 'srcs'
|
||||||
if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
|
if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
|
||||||
echo 'Deploying to xgboost-maven-repo S3 repo...'
|
echo 'Deploying to xgboost-maven-repo S3 repo...'
|
||||||
def container_type = "jvm"
|
|
||||||
def docker_binary = "docker"
|
|
||||||
sh """
|
sh """
|
||||||
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/deploy_jvm_packages.sh ${args.spark_version}
|
${dockerRun} jvm docker tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 0
|
||||||
|
"""
|
||||||
|
sh """
|
||||||
|
${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=10.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 1
|
||||||
"""
|
"""
|
||||||
}
|
}
|
||||||
deleteDir()
|
deleteDir()
|
||||||
|
|||||||
5
Makefile
5
Makefile
@ -133,15 +133,16 @@ Rpack: clean_all
|
|||||||
sed -i -e 's/@BACKTRACE_LIB@//g' xgboost/src/Makevars.win
|
sed -i -e 's/@BACKTRACE_LIB@//g' xgboost/src/Makevars.win
|
||||||
sed -i -e 's/@OPENMP_LIB@//g' xgboost/src/Makevars.win
|
sed -i -e 's/@OPENMP_LIB@//g' xgboost/src/Makevars.win
|
||||||
rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it
|
rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it
|
||||||
bash R-package/remove_warning_suppression_pragma.sh
|
bash xgboost/remove_warning_suppression_pragma.sh
|
||||||
rm xgboost/remove_warning_suppression_pragma.sh
|
rm xgboost/remove_warning_suppression_pragma.sh
|
||||||
|
rm -rfv xgboost/tests/helper_scripts/
|
||||||
|
|
||||||
Rbuild: Rpack
|
Rbuild: Rpack
|
||||||
R CMD build --no-build-vignettes xgboost
|
R CMD build --no-build-vignettes xgboost
|
||||||
rm -rf xgboost
|
rm -rf xgboost
|
||||||
|
|
||||||
Rcheck: Rbuild
|
Rcheck: Rbuild
|
||||||
R CMD check xgboost*.tar.gz
|
R CMD check --as-cran xgboost*.tar.gz
|
||||||
|
|
||||||
-include build/*.d
|
-include build/*.d
|
||||||
-include build/*/*.d
|
-include build/*/*.d
|
||||||
|
|||||||
@ -2,7 +2,7 @@ Package: xgboost
|
|||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 1.2.0.1
|
Version: 1.2.0.1
|
||||||
Date: 2020-02-21
|
Date: 2020-08-28
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person("Tianqi", "Chen", role = c("aut"),
|
person("Tianqi", "Chen", role = c("aut"),
|
||||||
email = "tianqi.tchen@gmail.com"),
|
email = "tianqi.tchen@gmail.com"),
|
||||||
|
|||||||
@ -349,6 +349,7 @@ NULL
|
|||||||
#' # Save as a stand-alone file (JSON); load it with xgb.load()
|
#' # Save as a stand-alone file (JSON); load it with xgb.load()
|
||||||
#' xgb.save(bst, 'xgb.model.json')
|
#' xgb.save(bst, 'xgb.model.json')
|
||||||
#' bst2 <- xgb.load('xgb.model.json')
|
#' bst2 <- xgb.load('xgb.model.json')
|
||||||
|
#' if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
|
||||||
#'
|
#'
|
||||||
#' # Save as a raw byte vector; load it with xgb.load.raw()
|
#' # Save as a raw byte vector; load it with xgb.load.raw()
|
||||||
#' xgb_bytes <- xgb.save.raw(bst)
|
#' xgb_bytes <- xgb.save.raw(bst)
|
||||||
@ -364,6 +365,7 @@ NULL
|
|||||||
#' obj2 <- readRDS('my_object.rds')
|
#' obj2 <- readRDS('my_object.rds')
|
||||||
#' # Re-construct xgb.Booster object from the bytes
|
#' # Re-construct xgb.Booster object from the bytes
|
||||||
#' bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
|
#' bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
|
||||||
|
#' if (file.exists('my_object.rds')) file.remove('my_object.rds')
|
||||||
#'
|
#'
|
||||||
#' @name a-compatibility-note-for-saveRDS-save
|
#' @name a-compatibility-note-for-saveRDS-save
|
||||||
NULL
|
NULL
|
||||||
|
|||||||
@ -79,7 +79,7 @@
|
|||||||
#'
|
#'
|
||||||
#' All observations are used for both training and validation.
|
#' All observations are used for both training and validation.
|
||||||
#'
|
#'
|
||||||
#' Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation}
|
#' Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29}
|
||||||
#'
|
#'
|
||||||
#' @return
|
#' @return
|
||||||
#' An object of class \code{xgb.cv.synchronous} with the following elements:
|
#' An object of class \code{xgb.cv.synchronous} with the following elements:
|
||||||
|
|||||||
@ -130,16 +130,16 @@
|
|||||||
#' Note that when using a customized metric, only this single metric can be used.
|
#' Note that when using a customized metric, only this single metric can be used.
|
||||||
#' The following is the list of built-in metrics for which Xgboost provides optimized implementation:
|
#' The following is the list of built-in metrics for which Xgboost provides optimized implementation:
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
#' \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
|
||||||
#' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
#' \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
|
||||||
#' \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
|
#' \item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
|
||||||
#' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
#' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
||||||
#' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
|
#' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
|
||||||
#' Different threshold (e.g., 0.) could be specified as "error@0."
|
#' Different threshold (e.g., 0.) could be specified as "error@0."
|
||||||
#' \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
#' \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
||||||
#' \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
|
#' \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
|
||||||
#' \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
|
#' \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
|
||||||
#' \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG}
|
#' \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
|
||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
#' The following callbacks are automatically created when certain parameters are set:
|
#' The following callbacks are automatically created when certain parameters are set:
|
||||||
|
|||||||
@ -43,6 +43,7 @@ bst2 <- xgb.load('xgb.model')
|
|||||||
# Save as a stand-alone file (JSON); load it with xgb.load()
|
# Save as a stand-alone file (JSON); load it with xgb.load()
|
||||||
xgb.save(bst, 'xgb.model.json')
|
xgb.save(bst, 'xgb.model.json')
|
||||||
bst2 <- xgb.load('xgb.model.json')
|
bst2 <- xgb.load('xgb.model.json')
|
||||||
|
if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
|
||||||
|
|
||||||
# Save as a raw byte vector; load it with xgb.load.raw()
|
# Save as a raw byte vector; load it with xgb.load.raw()
|
||||||
xgb_bytes <- xgb.save.raw(bst)
|
xgb_bytes <- xgb.save.raw(bst)
|
||||||
@ -58,5 +59,6 @@ saveRDS(obj, 'my_object.rds')
|
|||||||
obj2 <- readRDS('my_object.rds')
|
obj2 <- readRDS('my_object.rds')
|
||||||
# Re-construct xgb.Booster object from the bytes
|
# Re-construct xgb.Booster object from the bytes
|
||||||
bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
|
bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
|
||||||
|
if (file.exists('my_object.rds')) file.remove('my_object.rds')
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -154,7 +154,7 @@ The cross-validation process is then repeated \code{nrounds} times, with each of
|
|||||||
|
|
||||||
All observations are used for both training and validation.
|
All observations are used for both training and validation.
|
||||||
|
|
||||||
Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation}
|
Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29}
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
|||||||
@ -215,16 +215,16 @@ User may set one or several \code{eval_metric} parameters.
|
|||||||
Note that when using a customized metric, only this single metric can be used.
|
Note that when using a customized metric, only this single metric can be used.
|
||||||
The following is the list of built-in metrics for which Xgboost provides optimized implementation:
|
The following is the list of built-in metrics for which Xgboost provides optimized implementation:
|
||||||
\itemize{
|
\itemize{
|
||||||
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
\item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
|
||||||
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
\item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
|
||||||
\item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
|
\item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
|
||||||
\item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
\item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
||||||
By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
|
By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
|
||||||
Different threshold (e.g., 0.) could be specified as "error@0."
|
Different threshold (e.g., 0.) could be specified as "error@0."
|
||||||
\item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
\item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
||||||
\item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
|
\item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
|
||||||
\item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
|
\item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
|
||||||
\item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG}
|
\item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
|
||||||
}
|
}
|
||||||
|
|
||||||
The following callbacks are automatically created when certain parameters are set:
|
The following callbacks are automatically created when certain parameters are set:
|
||||||
|
|||||||
@ -1,10 +0,0 @@
|
|||||||
model_generator_metadata <- function() {
|
|
||||||
return (list(
|
|
||||||
kRounds = 2,
|
|
||||||
kRows = 1000,
|
|
||||||
kCols = 4,
|
|
||||||
kForests = 2,
|
|
||||||
kMaxDepth = 2,
|
|
||||||
kClasses = 3
|
|
||||||
))
|
|
||||||
}
|
|
||||||
@ -5,7 +5,14 @@ library(Matrix)
|
|||||||
source('./generate_models_params.R')
|
source('./generate_models_params.R')
|
||||||
|
|
||||||
set.seed(0)
|
set.seed(0)
|
||||||
metadata <- model_generator_metadata()
|
metadata <- list(
|
||||||
|
kRounds = 2,
|
||||||
|
kRows = 1000,
|
||||||
|
kCols = 4,
|
||||||
|
kForests = 2,
|
||||||
|
kMaxDepth = 2,
|
||||||
|
kClasses = 3
|
||||||
|
)
|
||||||
X <- Matrix(data = rnorm(metadata$kRows * metadata$kCols), nrow = metadata$kRows,
|
X <- Matrix(data = rnorm(metadata$kRows * metadata$kCols), nrow = metadata$kRows,
|
||||||
ncol = metadata$kCols, sparse = TRUE)
|
ncol = metadata$kCols, sparse = TRUE)
|
||||||
w <- runif(metadata$kRows)
|
w <- runif(metadata$kRows)
|
||||||
@ -1,10 +1,16 @@
|
|||||||
require(xgboost)
|
require(xgboost)
|
||||||
require(jsonlite)
|
require(jsonlite)
|
||||||
source('../generate_models_params.R')
|
|
||||||
|
|
||||||
context("Models from previous versions of XGBoost can be loaded")
|
context("Models from previous versions of XGBoost can be loaded")
|
||||||
|
|
||||||
metadata <- model_generator_metadata()
|
metadata <- list(
|
||||||
|
kRounds = 2,
|
||||||
|
kRows = 1000,
|
||||||
|
kCols = 4,
|
||||||
|
kForests = 2,
|
||||||
|
kMaxDepth = 2,
|
||||||
|
kClasses = 3
|
||||||
|
)
|
||||||
|
|
||||||
run_model_param_check <- function (config) {
|
run_model_param_check <- function (config) {
|
||||||
testthat::expect_equal(config$learner$learner_model_param$num_feature, '4')
|
testthat::expect_equal(config$learner$learner_model_param$num_feature, '4')
|
||||||
|
|||||||
@ -57,7 +57,7 @@ To answer the question above we will convert *categorical* variables to `numeric
|
|||||||
|
|
||||||
In this Vignette we will see how to transform a *dense* `data.frame` (*dense* = few zeroes in the matrix) with *categorical* variables to a very *sparse* matrix (*sparse* = lots of zero in the matrix) of `numeric` features.
|
In this Vignette we will see how to transform a *dense* `data.frame` (*dense* = few zeroes in the matrix) with *categorical* variables to a very *sparse* matrix (*sparse* = lots of zero in the matrix) of `numeric` features.
|
||||||
|
|
||||||
The method we are going to see is usually called [one-hot encoding](http://en.wikipedia.org/wiki/One-hot).
|
The method we are going to see is usually called [one-hot encoding](https://en.wikipedia.org/wiki/One-hot).
|
||||||
|
|
||||||
The first step is to load `Arthritis` dataset in memory and wrap it with `data.table` package.
|
The first step is to load `Arthritis` dataset in memory and wrap it with `data.table` package.
|
||||||
|
|
||||||
@ -66,7 +66,7 @@ data(Arthritis)
|
|||||||
df <- data.table(Arthritis, keep.rownames = FALSE)
|
df <- data.table(Arthritis, keep.rownames = FALSE)
|
||||||
```
|
```
|
||||||
|
|
||||||
> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.
|
> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](https://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.
|
||||||
|
|
||||||
The first thing we want to do is to have a look to the first few lines of the `data.table`:
|
The first thing we want to do is to have a look to the first few lines of the `data.table`:
|
||||||
|
|
||||||
@ -137,8 +137,8 @@ levels(df[,Treatment])
|
|||||||
#### Encoding categorical features
|
#### Encoding categorical features
|
||||||
|
|
||||||
Next step, we will transform the categorical data to dummy variables.
|
Next step, we will transform the categorical data to dummy variables.
|
||||||
Several encoding methods exist, e.g., [one-hot encoding](http://en.wikipedia.org/wiki/One-hot) is a common approach.
|
Several encoding methods exist, e.g., [one-hot encoding](https://en.wikipedia.org/wiki/One-hot) is a common approach.
|
||||||
We will use the [dummy contrast coding](http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm#dummy) which is popular because it produces "full rank" encoding (also see [this blog post by Max Kuhn](http://appliedpredictivemodeling.com/blog/2013/10/23/the-basics-of-encoding-categorical-data-for-predictive-models)).
|
We will use the [dummy contrast coding](https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/) which is popular because it produces "full rank" encoding (also see [this blog post by Max Kuhn](http://appliedpredictivemodeling.com/blog/2013/10/23/the-basics-of-encoding-categorical-data-for-predictive-models)).
|
||||||
|
|
||||||
The purpose is to transform each value of each *categorical* feature into a *binary* feature `{0, 1}`.
|
The purpose is to transform each value of each *categorical* feature into a *binary* feature `{0, 1}`.
|
||||||
|
|
||||||
@ -176,7 +176,7 @@ bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,
|
|||||||
|
|
||||||
You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better.
|
You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better.
|
||||||
|
|
||||||
A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future).
|
A model which fits too well may [overfit](https://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future).
|
||||||
|
|
||||||
> Here you can see the numbers decrease until line 7 and then increase.
|
> Here you can see the numbers decrease until line 7 and then increase.
|
||||||
>
|
>
|
||||||
@ -304,7 +304,7 @@ Linear model may not be that smart in this scenario.
|
|||||||
Special Note: What about Random Forests™?
|
Special Note: What about Random Forests™?
|
||||||
-----------------------------------------
|
-----------------------------------------
|
||||||
|
|
||||||
As you may know, [Random Forests™](http://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](http://en.wikipedia.org/wiki/Ensemble_learning) family.
|
As you may know, [Random Forests™](https://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](https://en.wikipedia.org/wiki/Ensemble_learning) family.
|
||||||
|
|
||||||
Both trains several decision trees for one dataset. The *main* difference is that in Random Forests™, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`).
|
Both trains several decision trees for one dataset. The *main* difference is that in Random Forests™, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`).
|
||||||
|
|
||||||
|
|||||||
@ -24,7 +24,7 @@
|
|||||||
author = "K. Bache and M. Lichman",
|
author = "K. Bache and M. Lichman",
|
||||||
year = "2013",
|
year = "2013",
|
||||||
title = "{UCI} Machine Learning Repository",
|
title = "{UCI} Machine Learning Repository",
|
||||||
url = "http://archive.ics.uci.edu/ml",
|
url = "http://archive.ics.uci.edu/ml/",
|
||||||
institution = "University of California, Irvine, School of Information and Computer Sciences"
|
institution = "University of California, Irvine, School of Information and Computer Sciences"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -68,7 +68,7 @@ The version 0.4-2 is on CRAN, and you can install it by:
|
|||||||
install.packages("xgboost")
|
install.packages("xgboost")
|
||||||
```
|
```
|
||||||
|
|
||||||
Formerly available versions can be obtained from the CRAN [archive](https://cran.r-project.org/src/contrib/Archive/xgboost)
|
Formerly available versions can be obtained from the CRAN [archive](https://cran.r-project.org/src/contrib/Archive/xgboost/)
|
||||||
|
|
||||||
## Learning
|
## Learning
|
||||||
|
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-SNAPSHOT
|
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@
|
||||||
|
|||||||
@ -6,6 +6,6 @@
|
|||||||
|
|
||||||
#define XGBOOST_VER_MAJOR 1
|
#define XGBOOST_VER_MAJOR 1
|
||||||
#define XGBOOST_VER_MINOR 2
|
#define XGBOOST_VER_MINOR 2
|
||||||
#define XGBOOST_VER_PATCH 0
|
#define XGBOOST_VER_PATCH 1
|
||||||
|
|
||||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
<description>JVM Package for XGBoost</description>
|
<description>JVM Package for XGBoost</description>
|
||||||
|
|||||||
@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
@ -37,7 +37,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j_2.12</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>1.2.0-SNAPSHOT</version>
|
<version>1.2.1</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
1.2.0-SNAPSHOT
|
1.2.1
|
||||||
|
|||||||
@ -40,7 +40,7 @@ class EarlyStopException(Exception):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, best_iteration):
|
def __init__(self, best_iteration):
|
||||||
super(EarlyStopException, self).__init__()
|
super().__init__()
|
||||||
self.best_iteration = best_iteration
|
self.best_iteration = best_iteration
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -738,7 +738,8 @@ async def _predict_async(client: Client, model, data, *args,
|
|||||||
predt = booster.predict(data=local_x,
|
predt = booster.predict(data=local_x,
|
||||||
validate_features=local_x.num_row() != 0,
|
validate_features=local_x.num_row() != 0,
|
||||||
*args)
|
*args)
|
||||||
ret = (delayed(predt), order)
|
columns = 1 if len(predt.shape) == 1 else predt.shape[1]
|
||||||
|
ret = ((delayed(predt), columns), order)
|
||||||
predictions.append(ret)
|
predictions.append(ret)
|
||||||
return predictions
|
return predictions
|
||||||
|
|
||||||
@ -775,8 +776,10 @@ async def _predict_async(client: Client, model, data, *args,
|
|||||||
# See https://docs.dask.org/en/latest/array-creation.html
|
# See https://docs.dask.org/en/latest/array-creation.html
|
||||||
arrays = []
|
arrays = []
|
||||||
for i, shape in enumerate(shapes):
|
for i, shape in enumerate(shapes):
|
||||||
arrays.append(da.from_delayed(results[i], shape=(shape[0], ),
|
arrays.append(da.from_delayed(
|
||||||
dtype=numpy.float32))
|
results[i][0], shape=(shape[0],)
|
||||||
|
if results[i][1] == 1 else (shape[0], results[i][1]),
|
||||||
|
dtype=numpy.float32))
|
||||||
predictions = await da.concatenate(arrays, axis=0)
|
predictions = await da.concatenate(arrays, axis=0)
|
||||||
return predictions
|
return predictions
|
||||||
|
|
||||||
@ -978,6 +981,7 @@ class DaskScikitLearnBase(XGBModel):
|
|||||||
def client(self, clt):
|
def client(self, clt):
|
||||||
self._client = clt
|
self._client = clt
|
||||||
|
|
||||||
|
|
||||||
@xgboost_model_doc("""Implementation of the Scikit-Learn API for XGBoost.""",
|
@xgboost_model_doc("""Implementation of the Scikit-Learn API for XGBoost.""",
|
||||||
['estimators', 'model'])
|
['estimators', 'model'])
|
||||||
class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
|
class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
|
||||||
@ -1032,9 +1036,6 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
|
|||||||
['estimators', 'model']
|
['estimators', 'model']
|
||||||
)
|
)
|
||||||
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
||||||
# pylint: disable=missing-docstring
|
|
||||||
_client = None
|
|
||||||
|
|
||||||
async def _fit_async(self, X, y,
|
async def _fit_async(self, X, y,
|
||||||
sample_weights=None,
|
sample_weights=None,
|
||||||
eval_set=None,
|
eval_set=None,
|
||||||
@ -1078,13 +1079,34 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
return self.client.sync(self._fit_async, X, y, sample_weights,
|
return self.client.sync(self._fit_async, X, y, sample_weights,
|
||||||
eval_set, sample_weight_eval_set, verbose)
|
eval_set, sample_weight_eval_set, verbose)
|
||||||
|
|
||||||
async def _predict_async(self, data):
|
async def _predict_proba_async(self, data):
|
||||||
|
_assert_dask_support()
|
||||||
|
|
||||||
test_dmatrix = await DaskDMatrix(client=self.client, data=data,
|
test_dmatrix = await DaskDMatrix(client=self.client, data=data,
|
||||||
missing=self.missing)
|
missing=self.missing)
|
||||||
pred_probs = await predict(client=self.client,
|
pred_probs = await predict(client=self.client,
|
||||||
model=self.get_booster(), data=test_dmatrix)
|
model=self.get_booster(), data=test_dmatrix)
|
||||||
return pred_probs
|
return pred_probs
|
||||||
|
|
||||||
|
def predict_proba(self, data): # pylint: disable=arguments-differ,missing-docstring
|
||||||
|
_assert_dask_support()
|
||||||
|
return self.client.sync(self._predict_proba_async, data)
|
||||||
|
|
||||||
|
async def _predict_async(self, data):
|
||||||
|
_assert_dask_support()
|
||||||
|
|
||||||
|
test_dmatrix = await DaskDMatrix(client=self.client, data=data,
|
||||||
|
missing=self.missing)
|
||||||
|
pred_probs = await predict(client=self.client,
|
||||||
|
model=self.get_booster(), data=test_dmatrix)
|
||||||
|
|
||||||
|
if self.n_classes_ == 2:
|
||||||
|
preds = (pred_probs > 0.5).astype(int)
|
||||||
|
else:
|
||||||
|
preds = da.argmax(pred_probs, axis=1)
|
||||||
|
|
||||||
|
return preds
|
||||||
|
|
||||||
def predict(self, data): # pylint: disable=arguments-differ
|
def predict(self, data): # pylint: disable=arguments-differ
|
||||||
_assert_dask_support()
|
_assert_dask_support()
|
||||||
return self.client.sync(self._predict_async, data)
|
return self.client.sync(self._predict_async, data)
|
||||||
|
|||||||
@ -77,7 +77,7 @@ __model_doc = '''
|
|||||||
gamma : float
|
gamma : float
|
||||||
Minimum loss reduction required to make a further partition on a leaf
|
Minimum loss reduction required to make a further partition on a leaf
|
||||||
node of the tree.
|
node of the tree.
|
||||||
min_child_weight : int
|
min_child_weight : float
|
||||||
Minimum sum of instance weight(hessian) needed in a child.
|
Minimum sum of instance weight(hessian) needed in a child.
|
||||||
max_delta_step : int
|
max_delta_step : int
|
||||||
Maximum delta step we allow each tree's weight estimation to be.
|
Maximum delta step we allow each tree's weight estimation to be.
|
||||||
@ -750,7 +750,10 @@ class XGBModel(XGBModelBase):
|
|||||||
|
|
||||||
@xgboost_model_doc(
|
@xgboost_model_doc(
|
||||||
"Implementation of the scikit-learn API for XGBoost classification.",
|
"Implementation of the scikit-learn API for XGBoost classification.",
|
||||||
['model', 'objective'])
|
['model', 'objective'], extra_parameters='''
|
||||||
|
n_estimators : int
|
||||||
|
Number of boosting rounds.
|
||||||
|
''')
|
||||||
class XGBClassifier(XGBModel, XGBClassifierBase):
|
class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||||
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
|
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
|
||||||
def __init__(self, objective="binary:logistic", **kwargs):
|
def __init__(self, objective="binary:logistic", **kwargs):
|
||||||
@ -1014,7 +1017,7 @@ class XGBRFClassifier(XGBClassifier):
|
|||||||
**kwargs)
|
**kwargs)
|
||||||
|
|
||||||
def get_xgb_params(self):
|
def get_xgb_params(self):
|
||||||
params = super(XGBRFClassifier, self).get_xgb_params()
|
params = super().get_xgb_params()
|
||||||
params['num_parallel_tree'] = self.n_estimators
|
params['num_parallel_tree'] = self.n_estimators
|
||||||
return params
|
return params
|
||||||
|
|
||||||
@ -1033,7 +1036,10 @@ class XGBRegressor(XGBModel, XGBRegressorBase):
|
|||||||
|
|
||||||
@xgboost_model_doc(
|
@xgboost_model_doc(
|
||||||
"scikit-learn API for XGBoost random forest regression.",
|
"scikit-learn API for XGBoost random forest regression.",
|
||||||
['model', 'objective'])
|
['model', 'objective'], extra_parameters='''
|
||||||
|
n_estimators : int
|
||||||
|
Number of trees in random forest to fit.
|
||||||
|
''')
|
||||||
class XGBRFRegressor(XGBRegressor):
|
class XGBRFRegressor(XGBRegressor):
|
||||||
# pylint: disable=missing-docstring
|
# pylint: disable=missing-docstring
|
||||||
def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
|
def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
|
||||||
@ -1043,7 +1049,7 @@ class XGBRFRegressor(XGBRegressor):
|
|||||||
reg_lambda=reg_lambda, **kwargs)
|
reg_lambda=reg_lambda, **kwargs)
|
||||||
|
|
||||||
def get_xgb_params(self):
|
def get_xgb_params(self):
|
||||||
params = super(XGBRFRegressor, self).get_xgb_params()
|
params = super().get_xgb_params()
|
||||||
params['num_parallel_tree'] = self.n_estimators
|
params['num_parallel_tree'] = self.n_estimators
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,8 @@
|
|||||||
/*!
|
/*!
|
||||||
* Copyright 2019 by Contributors
|
* Copyright 2019-2020 by Contributors
|
||||||
*/
|
*/
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "xgboost/json.h"
|
#include "xgboost/json.h"
|
||||||
#include "xgboost/logging.h"
|
#include "xgboost/logging.h"
|
||||||
#include "gbtree_model.h"
|
#include "gbtree_model.h"
|
||||||
@ -41,15 +43,14 @@ void GBTreeModel::SaveModel(Json* p_out) const {
|
|||||||
auto& out = *p_out;
|
auto& out = *p_out;
|
||||||
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
|
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
|
||||||
out["gbtree_model_param"] = ToJson(param);
|
out["gbtree_model_param"] = ToJson(param);
|
||||||
std::vector<Json> trees_json;
|
std::vector<Json> trees_json(trees.size());
|
||||||
size_t t = 0;
|
|
||||||
for (auto const& tree : trees) {
|
for (size_t t = 0; t < trees.size(); ++t) {
|
||||||
|
auto const& tree = trees[t];
|
||||||
Json tree_json{Object()};
|
Json tree_json{Object()};
|
||||||
tree->SaveModel(&tree_json);
|
tree->SaveModel(&tree_json);
|
||||||
// The field is not used in XGBoost, but might be useful for external project.
|
tree_json["id"] = Integer(static_cast<Integer::Int>(t));
|
||||||
tree_json["id"] = Integer(t);
|
trees_json[t] = std::move(tree_json);
|
||||||
trees_json.emplace_back(tree_json);
|
|
||||||
t++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Json> tree_info_json(tree_info.size());
|
std::vector<Json> tree_info_json(tree_info.size());
|
||||||
@ -70,9 +71,10 @@ void GBTreeModel::LoadModel(Json const& in) {
|
|||||||
auto const& trees_json = get<Array const>(in["trees"]);
|
auto const& trees_json = get<Array const>(in["trees"]);
|
||||||
trees.resize(trees_json.size());
|
trees.resize(trees_json.size());
|
||||||
|
|
||||||
for (size_t t = 0; t < trees.size(); ++t) {
|
for (size_t t = 0; t < trees_json.size(); ++t) { // NOLINT
|
||||||
trees[t].reset( new RegTree() );
|
auto tree_id = get<Integer>(trees_json[t]["id"]);
|
||||||
trees[t]->LoadModel(trees_json[t]);
|
trees.at(tree_id).reset(new RegTree());
|
||||||
|
trees.at(tree_id)->LoadModel(trees_json[t]);
|
||||||
}
|
}
|
||||||
|
|
||||||
tree_info.resize(param.num_trees);
|
tree_info.resize(param.num_trees);
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION_ARG
|
||||||
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04
|
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04
|
||||||
|
ARG CUDA_VERSION_ARG
|
||||||
|
|
||||||
# Environment
|
# Environment
|
||||||
ENV DEBIAN_FRONTEND noninteractive
|
ENV DEBIAN_FRONTEND noninteractive
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION_ARG
|
||||||
FROM nvidia/cuda:$CUDA_VERSION-runtime-ubuntu16.04
|
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04
|
||||||
|
ARG CUDA_VERSION_ARG
|
||||||
|
|
||||||
# Environment
|
# Environment
|
||||||
ENV DEBIAN_FRONTEND noninteractive
|
ENV DEBIAN_FRONTEND noninteractive
|
||||||
@ -17,8 +18,8 @@ ENV PATH=/opt/python/bin:$PATH
|
|||||||
|
|
||||||
# Create new Conda environment with cuDF, Dask, and cuPy
|
# Create new Conda environment with cuDF, Dask, and cuPy
|
||||||
RUN \
|
RUN \
|
||||||
conda create -n gpu_test -c rapidsai -c nvidia -c conda-forge -c defaults \
|
conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
|
||||||
python=3.7 cudf=0.14 cudatoolkit=$CUDA_VERSION dask dask-cuda dask-cudf cupy \
|
python=3.7 cudf=0.15* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda dask-cudf cupy \
|
||||||
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis
|
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis
|
||||||
|
|
||||||
ENV GOSU_VERSION 1.10
|
ENV GOSU_VERSION 1.10
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION_ARG
|
||||||
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu16.04
|
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu16.04
|
||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION_ARG
|
||||||
|
|
||||||
# Environment
|
# Environment
|
||||||
ENV DEBIAN_FRONTEND noninteractive
|
ENV DEBIAN_FRONTEND noninteractive
|
||||||
@ -19,7 +19,7 @@ RUN \
|
|||||||
|
|
||||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||||
RUN \
|
RUN \
|
||||||
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \
|
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
|
||||||
export NCCL_VERSION=2.7.5-1 && \
|
export NCCL_VERSION=2.7.5-1 && \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
|
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION_ARG
|
||||||
FROM nvidia/cuda:$CUDA_VERSION-devel-centos6
|
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos6
|
||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION_ARG
|
||||||
|
|
||||||
# Environment
|
# Environment
|
||||||
ENV DEBIAN_FRONTEND noninteractive
|
ENV DEBIAN_FRONTEND noninteractive
|
||||||
@ -33,7 +33,7 @@ RUN \
|
|||||||
|
|
||||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||||
RUN \
|
RUN \
|
||||||
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \
|
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
|
||||||
export NCCL_VERSION=2.4.8-1 && \
|
export NCCL_VERSION=2.4.8-1 && \
|
||||||
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||||
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION_ARG
|
||||||
FROM nvidia/cuda:$CUDA_VERSION-runtime-ubuntu16.04
|
FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04
|
||||||
|
ARG CUDA_VERSION_ARG
|
||||||
ARG JDK_VERSION=8
|
ARG JDK_VERSION=8
|
||||||
ARG SPARK_VERSION=3.0.0
|
ARG SPARK_VERSION=3.0.0
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION_ARG
|
||||||
FROM nvidia/cuda:$CUDA_VERSION-devel-centos6
|
FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos6
|
||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION_ARG
|
||||||
|
|
||||||
# Environment
|
# Environment
|
||||||
ENV DEBIAN_FRONTEND noninteractive
|
ENV DEBIAN_FRONTEND noninteractive
|
||||||
@ -30,7 +30,7 @@ RUN \
|
|||||||
|
|
||||||
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
|
||||||
RUN \
|
RUN \
|
||||||
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \
|
export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
|
||||||
export NCCL_VERSION=2.4.8-1 && \
|
export NCCL_VERSION=2.4.8-1 && \
|
||||||
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||||
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
|
||||||
|
|||||||
@ -3,22 +3,32 @@
|
|||||||
set -e
|
set -e
|
||||||
set -x
|
set -x
|
||||||
|
|
||||||
if [ $# -ne 1 ]; then
|
if [ $# -ne 2 ]; then
|
||||||
echo "Usage: $0 [spark version]"
|
echo "Usage: $0 [spark version] [build_gpu? 0 or 1]"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
spark_version=$1
|
spark_version=$1
|
||||||
|
build_gpu=$2
|
||||||
|
|
||||||
# Initialize local Maven repository
|
# Initialize local Maven repository
|
||||||
./tests/ci_build/initialize_maven.sh
|
./tests/ci_build/initialize_maven.sh
|
||||||
|
|
||||||
rm -rf build/
|
|
||||||
cd jvm-packages
|
cd jvm-packages
|
||||||
|
rm -rf $(find . -name target)
|
||||||
|
rm -rf ../build/
|
||||||
|
|
||||||
# Re-build package without Mock Rabit
|
# Re-build package without Mock Rabit
|
||||||
# Deploy to S3 bucket xgboost-maven-repo
|
# Deploy to S3 bucket xgboost-maven-repo
|
||||||
mvn --no-transfer-progress package deploy -P release-to-s3 -Dspark.version=${spark_version} -DskipTests
|
if [[ "$build_gpu" == "0" ]]
|
||||||
|
then
|
||||||
|
# Build CPU artifact
|
||||||
|
mvn --no-transfer-progress package deploy -P release-to-s3 -Dspark.version=${spark_version} -DskipTests
|
||||||
|
else
|
||||||
|
# Build GPU artifact
|
||||||
|
sed -i -e 's/<artifactId>xgboost\(.*\)_\(.*\)<\/artifactId>/<artifactId>xgboost\1-gpu_\2<\/artifactId>/' $(find . -name pom.xml)
|
||||||
|
mvn --no-transfer-progress package deploy -Duse.cuda=ON -P release-to-s3 -Dspark.version=${spark_version} -DskipTests
|
||||||
|
fi
|
||||||
|
|
||||||
set +x
|
set +x
|
||||||
set +e
|
set +e
|
||||||
|
|||||||
@ -148,7 +148,16 @@ TEST(Learner, JsonModelIO) {
|
|||||||
Json out { Object() };
|
Json out { Object() };
|
||||||
learner->SaveModel(&out);
|
learner->SaveModel(&out);
|
||||||
|
|
||||||
learner->LoadModel(out);
|
dmlc::TemporaryDirectory tmpdir;
|
||||||
|
|
||||||
|
std::ofstream fout (tmpdir.path + "/model.json");
|
||||||
|
fout << out;
|
||||||
|
fout.close();
|
||||||
|
|
||||||
|
auto loaded_str = common::LoadSequentialFile(tmpdir.path + "/model.json");
|
||||||
|
Json loaded = Json::Load(StringView{loaded_str.c_str(), loaded_str.size()});
|
||||||
|
|
||||||
|
learner->LoadModel(loaded);
|
||||||
learner->Configure();
|
learner->Configure();
|
||||||
|
|
||||||
Json new_in { Object() };
|
Json new_in { Object() };
|
||||||
|
|||||||
@ -121,6 +121,8 @@ eval[test] = {data_path}
|
|||||||
v = xgboost.__version__
|
v = xgboost.__version__
|
||||||
if v.find('SNAPSHOT') != -1:
|
if v.find('SNAPSHOT') != -1:
|
||||||
assert msg.split(':')[1].strip() == v.split('-')[0]
|
assert msg.split(':')[1].strip() == v.split('-')[0]
|
||||||
|
elif v.find('rc') != -1:
|
||||||
|
assert msg.split(':')[1].strip() == v.split('rc')[0]
|
||||||
else:
|
else:
|
||||||
assert msg.split(':')[1].strip() == v
|
assert msg.split(':')[1].strip() == v
|
||||||
|
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import sys
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import json
|
import json
|
||||||
import asyncio
|
import asyncio
|
||||||
|
from sklearn.datasets import make_classification
|
||||||
|
|
||||||
if sys.platform.startswith("win"):
|
if sys.platform.startswith("win"):
|
||||||
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
|
pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
|
||||||
@ -36,7 +37,7 @@ def generate_array():
|
|||||||
|
|
||||||
|
|
||||||
def test_from_dask_dataframe():
|
def test_from_dask_dataframe():
|
||||||
with LocalCluster(n_workers=5) as cluster:
|
with LocalCluster(n_workers=kWorkers) as cluster:
|
||||||
with Client(cluster) as client:
|
with Client(cluster) as client:
|
||||||
X, y = generate_array()
|
X, y = generate_array()
|
||||||
|
|
||||||
@ -74,7 +75,7 @@ def test_from_dask_dataframe():
|
|||||||
|
|
||||||
|
|
||||||
def test_from_dask_array():
|
def test_from_dask_array():
|
||||||
with LocalCluster(n_workers=5, threads_per_worker=5) as cluster:
|
with LocalCluster(n_workers=kWorkers, threads_per_worker=5) as cluster:
|
||||||
with Client(cluster) as client:
|
with Client(cluster) as client:
|
||||||
X, y = generate_array()
|
X, y = generate_array()
|
||||||
dtrain = DaskDMatrix(client, X, y)
|
dtrain = DaskDMatrix(client, X, y)
|
||||||
@ -104,8 +105,28 @@ def test_from_dask_array():
|
|||||||
assert np.all(single_node_predt == from_arr.compute())
|
assert np.all(single_node_predt == from_arr.compute())
|
||||||
|
|
||||||
|
|
||||||
|
def test_dask_predict_shape_infer():
|
||||||
|
with LocalCluster(n_workers=kWorkers) as cluster:
|
||||||
|
with Client(cluster) as client:
|
||||||
|
X, y = make_classification(n_samples=1000, n_informative=5,
|
||||||
|
n_classes=3)
|
||||||
|
X_ = dd.from_array(X, chunksize=100)
|
||||||
|
y_ = dd.from_array(y, chunksize=100)
|
||||||
|
dtrain = xgb.dask.DaskDMatrix(client, data=X_, label=y_)
|
||||||
|
|
||||||
|
model = xgb.dask.train(
|
||||||
|
client,
|
||||||
|
{"objective": "multi:softprob", "num_class": 3},
|
||||||
|
dtrain=dtrain
|
||||||
|
)
|
||||||
|
|
||||||
|
preds = xgb.dask.predict(client, model, dtrain)
|
||||||
|
assert preds.shape[0] == preds.compute().shape[0]
|
||||||
|
assert preds.shape[1] == preds.compute().shape[1]
|
||||||
|
|
||||||
|
|
||||||
def test_dask_missing_value_reg():
|
def test_dask_missing_value_reg():
|
||||||
with LocalCluster(n_workers=5) as cluster:
|
with LocalCluster(n_workers=kWorkers) as cluster:
|
||||||
with Client(cluster) as client:
|
with Client(cluster) as client:
|
||||||
X_0 = np.ones((20 // 2, kCols))
|
X_0 = np.ones((20 // 2, kCols))
|
||||||
X_1 = np.zeros((20 // 2, kCols))
|
X_1 = np.zeros((20 // 2, kCols))
|
||||||
@ -144,19 +165,19 @@ def test_dask_missing_value_cls():
|
|||||||
missing=0.0)
|
missing=0.0)
|
||||||
cls.client = client
|
cls.client = client
|
||||||
cls.fit(X, y, eval_set=[(X, y)])
|
cls.fit(X, y, eval_set=[(X, y)])
|
||||||
dd_predt = cls.predict(X).compute()
|
dd_pred_proba = cls.predict_proba(X).compute()
|
||||||
|
|
||||||
np_X = X.compute()
|
np_X = X.compute()
|
||||||
np_predt = cls.get_booster().predict(
|
np_pred_proba = cls.get_booster().predict(
|
||||||
xgb.DMatrix(np_X, missing=0.0))
|
xgb.DMatrix(np_X, missing=0.0))
|
||||||
np.testing.assert_allclose(np_predt, dd_predt)
|
np.testing.assert_allclose(np_pred_proba, dd_pred_proba)
|
||||||
|
|
||||||
cls = xgb.dask.DaskXGBClassifier()
|
cls = xgb.dask.DaskXGBClassifier()
|
||||||
assert hasattr(cls, 'missing')
|
assert hasattr(cls, 'missing')
|
||||||
|
|
||||||
|
|
||||||
def test_dask_regressor():
|
def test_dask_regressor():
|
||||||
with LocalCluster(n_workers=5) as cluster:
|
with LocalCluster(n_workers=kWorkers) as cluster:
|
||||||
with Client(cluster) as client:
|
with Client(cluster) as client:
|
||||||
X, y = generate_array()
|
X, y = generate_array()
|
||||||
regressor = xgb.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
|
regressor = xgb.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
|
||||||
@ -178,7 +199,7 @@ def test_dask_regressor():
|
|||||||
|
|
||||||
|
|
||||||
def test_dask_classifier():
|
def test_dask_classifier():
|
||||||
with LocalCluster(n_workers=5) as cluster:
|
with LocalCluster(n_workers=kWorkers) as cluster:
|
||||||
with Client(cluster) as client:
|
with Client(cluster) as client:
|
||||||
X, y = generate_array()
|
X, y = generate_array()
|
||||||
y = (y * 10).astype(np.int32)
|
y = (y * 10).astype(np.int32)
|
||||||
@ -201,7 +222,18 @@ def test_dask_classifier():
|
|||||||
assert len(list(history['validation_0'])) == 1
|
assert len(list(history['validation_0'])) == 1
|
||||||
assert len(history['validation_0']['merror']) == 2
|
assert len(history['validation_0']['merror']) == 2
|
||||||
|
|
||||||
|
# Test .predict_proba()
|
||||||
|
probas = classifier.predict_proba(X)
|
||||||
assert classifier.n_classes_ == 10
|
assert classifier.n_classes_ == 10
|
||||||
|
assert probas.ndim == 2
|
||||||
|
assert probas.shape[0] == kRows
|
||||||
|
assert probas.shape[1] == 10
|
||||||
|
|
||||||
|
cls_booster = classifier.get_booster()
|
||||||
|
single_node_proba = cls_booster.inplace_predict(X.compute())
|
||||||
|
|
||||||
|
np.testing.assert_allclose(single_node_proba,
|
||||||
|
probas.compute())
|
||||||
|
|
||||||
# Test with dataframe.
|
# Test with dataframe.
|
||||||
X_d = dd.from_dask_array(X)
|
X_d = dd.from_dask_array(X)
|
||||||
@ -218,7 +250,7 @@ def test_dask_classifier():
|
|||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
def test_sklearn_grid_search():
|
def test_sklearn_grid_search():
|
||||||
from sklearn.model_selection import GridSearchCV
|
from sklearn.model_selection import GridSearchCV
|
||||||
with LocalCluster(n_workers=4) as cluster:
|
with LocalCluster(n_workers=kWorkers) as cluster:
|
||||||
with Client(cluster) as client:
|
with Client(cluster) as client:
|
||||||
X, y = generate_array()
|
X, y = generate_array()
|
||||||
reg = xgb.dask.DaskXGBRegressor(learning_rate=0.1,
|
reg = xgb.dask.DaskXGBRegressor(learning_rate=0.1,
|
||||||
@ -292,7 +324,9 @@ def run_empty_dmatrix_cls(client, parameters):
|
|||||||
evals=[(dtrain, 'validation')],
|
evals=[(dtrain, 'validation')],
|
||||||
num_boost_round=2)
|
num_boost_round=2)
|
||||||
predictions = xgb.dask.predict(client=client, model=out,
|
predictions = xgb.dask.predict(client=client, model=out,
|
||||||
data=dtrain).compute()
|
data=dtrain)
|
||||||
|
assert predictions.shape[1] == n_classes
|
||||||
|
predictions = predictions.compute()
|
||||||
_check_outputs(out, predictions)
|
_check_outputs(out, predictions)
|
||||||
|
|
||||||
# train has more rows than evals
|
# train has more rows than evals
|
||||||
@ -315,7 +349,7 @@ def run_empty_dmatrix_cls(client, parameters):
|
|||||||
# environment and Exact doesn't support it.
|
# environment and Exact doesn't support it.
|
||||||
|
|
||||||
def test_empty_dmatrix_hist():
|
def test_empty_dmatrix_hist():
|
||||||
with LocalCluster(n_workers=5) as cluster:
|
with LocalCluster(n_workers=kWorkers) as cluster:
|
||||||
with Client(cluster) as client:
|
with Client(cluster) as client:
|
||||||
parameters = {'tree_method': 'hist'}
|
parameters = {'tree_method': 'hist'}
|
||||||
run_empty_dmatrix_reg(client, parameters)
|
run_empty_dmatrix_reg(client, parameters)
|
||||||
@ -323,7 +357,7 @@ def test_empty_dmatrix_hist():
|
|||||||
|
|
||||||
|
|
||||||
def test_empty_dmatrix_approx():
|
def test_empty_dmatrix_approx():
|
||||||
with LocalCluster(n_workers=5) as cluster:
|
with LocalCluster(n_workers=kWorkers) as cluster:
|
||||||
with Client(cluster) as client:
|
with Client(cluster) as client:
|
||||||
parameters = {'tree_method': 'approx'}
|
parameters = {'tree_method': 'approx'}
|
||||||
run_empty_dmatrix_reg(client, parameters)
|
run_empty_dmatrix_reg(client, parameters)
|
||||||
@ -397,7 +431,13 @@ async def run_dask_classifier_asyncio(scheduler_address):
|
|||||||
assert len(list(history['validation_0'])) == 1
|
assert len(list(history['validation_0'])) == 1
|
||||||
assert len(history['validation_0']['merror']) == 2
|
assert len(history['validation_0']['merror']) == 2
|
||||||
|
|
||||||
|
# Test .predict_proba()
|
||||||
|
probas = await classifier.predict_proba(X)
|
||||||
assert classifier.n_classes_ == 10
|
assert classifier.n_classes_ == 10
|
||||||
|
assert probas.ndim == 2
|
||||||
|
assert probas.shape[0] == kRows
|
||||||
|
assert probas.shape[1] == 10
|
||||||
|
|
||||||
|
|
||||||
# Test with dataframe.
|
# Test with dataframe.
|
||||||
X_d = dd.from_dask_array(X)
|
X_d = dd.from_dask_array(X)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user