Compare commits

...

15 Commits

Author SHA1 Message Date
Nan Zhu
00774eeac3
[jvm-packages] update version number for 1.2 branch (#6427)
* [jvm-packages]update version number of 1.2 branch

* update ver
2020-11-23 14:16:30 -08:00
Philip Hyunsu Cho
bcb15a980f
1.2.1 patch release (#6206)
* Hide C++ symbols from dmlc-core (#6188)

* Up version to 1.2.1

* Fix lint

* [CI] Fix Docker build for CUDA 11 (#6202)

* Update Dockerfile.gpu
2020-10-12 15:10:16 -07:00
Tong He
0cd0dad0b5
Fix CRAN submission (#6076) 2020-09-01 23:38:27 -07:00
Philip Hyunsu Cho
884098ec22
[CI] Fix CRAN check (#6067) 2020-08-28 21:24:49 +08:00
Hyunsu Cho
738786680b Release 1.2.0 2020-08-22 18:25:18 -07:00
Philip Hyunsu Cho
04232c01b2
[CI] Fix broken tests (#6048) 2020-08-22 11:43:38 -07:00
Jiaming Yuan
0353a78ab7 Fix scikit learn cls doc. (#6041) 2020-08-20 19:25:12 -07:00
Hyunsu Cho
0089a0e6bf Fix another typo 2020-08-12 19:29:08 +00:00
Philip Hyunsu Cho
03a68a1714
Fix typo 2020-08-12 01:34:33 -07:00
Hyunsu Cho
a0da8a7e0a Make RC2 2020-08-12 00:50:51 -07:00
Hyunsu Cho
eee4eff49b [CI] Build GPU-enabled JAR artifact and deploy to xgboost-maven-repo 2020-08-12 00:50:47 -07:00
Jiaming Yuan
936a854baa
Back port fixes to 1.2 (#6002)
* Fix sklearn doc. (#5980)

* Enforce tree order in JSON. (#5974)

* Make JSON model IO more future proof by using tree id in model loading.

* Fix dask predict shape infer. (#5989)

* [Breaking] Fix .predict() method and add .predict_proba() in xgboost.dask.DaskXGBClassifier (#5986)
2020-08-11 20:22:31 +08:00
Hyunsu Cho
7856da5827 [CI] Use mgpu machine to run gpu hist unit tests 2020-08-02 02:33:05 -07:00
Hyunsu Cho
50a0def6c3 Make RC1 2020-08-02 08:56:20 +00:00
Hyunsu Cho
9116a0ec10 Fix a unit test on CLI, to handle RC versions 2020-08-02 08:56:15 +00:00
40 changed files with 232 additions and 130 deletions

View File

@ -81,7 +81,7 @@ jobs:
run: | run: |
cd R-package cd R-package
R.exe CMD INSTALL . R.exe CMD INSTALL .
Rscript.exe tests/run_lint.R Rscript.exe tests/helper_scripts/run_lint.R
test-with-R: test-with-R:

View File

@ -1,9 +1,10 @@
cmake_minimum_required(VERSION 3.13) cmake_minimum_required(VERSION 3.13)
project(xgboost LANGUAGES CXX C VERSION 1.2.0) project(xgboost LANGUAGES CXX C VERSION 1.2.1)
include(cmake/Utils.cmake) include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0022 NEW)
cmake_policy(SET CMP0079 NEW) cmake_policy(SET CMP0079 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
cmake_policy(SET CMP0063 NEW) cmake_policy(SET CMP0063 NEW)
if ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13)) if ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
@ -173,9 +174,6 @@ foreach(lib rabit rabit_base rabit_empty rabit_mock rabit_mock_static)
# from dmlc is correctly applied to rabit. # from dmlc is correctly applied to rabit.
if (TARGET ${lib}) if (TARGET ${lib})
target_link_libraries(${lib} dmlc ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${lib} dmlc ${CMAKE_THREAD_LIBS_INIT})
if (HIDE_CXX_SYMBOLS) # Hide all C++ symbols from Rabit
set_target_properties(${lib} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endif (HIDE_CXX_SYMBOLS)
if (ENABLE_ALL_WARNINGS) if (ENABLE_ALL_WARNINGS)
target_compile_options(${lib} PRIVATE -Wall -Wextra) target_compile_options(${lib} PRIVATE -Wall -Wextra)
endif (ENABLE_ALL_WARNINGS) endif (ENABLE_ALL_WARNINGS)
@ -204,8 +202,9 @@ endif (USE_NVTX)
#-- Hide all C++ symbols #-- Hide all C++ symbols
if (HIDE_CXX_SYMBOLS) if (HIDE_CXX_SYMBOLS)
set_target_properties(objxgboost PROPERTIES CXX_VISIBILITY_PRESET hidden) foreach(target objxgboost xgboost dmlc rabit rabit_mock_static)
set_target_properties(xgboost PROPERTIES CXX_VISIBILITY_PRESET hidden) set_target_properties(${target} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endforeach()
endif (HIDE_CXX_SYMBOLS) endif (HIDE_CXX_SYMBOLS)
target_include_directories(xgboost target_include_directories(xgboost

23
Jenkinsfile vendored
View File

@ -92,7 +92,7 @@ pipeline {
'test-python-gpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2') }, 'test-python-gpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2') },
'test-python-gpu-cuda11.0-cross': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '11.0') }, 'test-python-gpu-cuda11.0-cross': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '11.0') },
'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') }, 'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
'test-python-mgpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2', multi_gpu: true) }, 'test-python-mgpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.2', multi_gpu: true) },
'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') }, 'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') },
'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') }, 'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') }, 'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') },
@ -144,7 +144,7 @@ def ClangTidy() {
echo "Running clang-tidy job..." echo "Running clang-tidy job..."
def container_type = "clang_tidy" def container_type = "clang_tidy"
def docker_binary = "docker" def docker_binary = "docker"
def dockerArgs = "--build-arg CUDA_VERSION=10.1" def dockerArgs = "--build-arg CUDA_VERSION_ARG=10.1"
sh """ sh """
${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py ${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py
""" """
@ -261,7 +261,7 @@ def BuildCUDA(args) {
echo "Build with CUDA ${args.cuda_version}" echo "Build with CUDA ${args.cuda_version}"
def container_type = GetCUDABuildContainerType(args.cuda_version) def container_type = GetCUDABuildContainerType(args.cuda_version)
def docker_binary = "docker" def docker_binary = "docker"
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" def docker_args = "--build-arg CUDA_VERSION_ARG=${args.cuda_version}"
def arch_flag = "" def arch_flag = ""
if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) { if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
arch_flag = "-DGPU_COMPUTE_VER=75" arch_flag = "-DGPU_COMPUTE_VER=75"
@ -285,12 +285,12 @@ def BuildCUDA(args) {
} }
def BuildJVMPackagesWithCUDA(args) { def BuildJVMPackagesWithCUDA(args) {
node('linux && gpu') { node('linux && mgpu') {
unstash name: 'srcs' unstash name: 'srcs'
echo "Build XGBoost4J-Spark with Spark ${args.spark_version}, CUDA ${args.cuda_version}" echo "Build XGBoost4J-Spark with Spark ${args.spark_version}, CUDA ${args.cuda_version}"
def container_type = "jvm_gpu_build" def container_type = "jvm_gpu_build"
def docker_binary = "nvidia-docker" def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" def docker_args = "--build-arg CUDA_VERSION_ARG=${args.cuda_version}"
def arch_flag = "" def arch_flag = ""
if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) { if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
arch_flag = "-DGPU_COMPUTE_VER=75" arch_flag = "-DGPU_COMPUTE_VER=75"
@ -365,7 +365,7 @@ def TestPythonGPU(args) {
echo "Test Python GPU: CUDA ${args.host_cuda_version}" echo "Test Python GPU: CUDA ${args.host_cuda_version}"
def container_type = "gpu" def container_type = "gpu"
def docker_binary = "nvidia-docker" def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
if (args.multi_gpu) { if (args.multi_gpu) {
echo "Using multiple GPUs" echo "Using multiple GPUs"
// Allocate extra space in /dev/shm to enable NCCL // Allocate extra space in /dev/shm to enable NCCL
@ -406,7 +406,7 @@ def TestCppGPU(args) {
echo "Test C++, CUDA ${args.host_cuda_version}" echo "Test C++, CUDA ${args.host_cuda_version}"
def container_type = "gpu" def container_type = "gpu"
def docker_binary = "nvidia-docker" def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost" sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost"
deleteDir() deleteDir()
} }
@ -424,7 +424,7 @@ def CrossTestJVMwithJDKGPU(args) {
} }
def container_type = "gpu_jvm" def container_type = "gpu_jvm"
def docker_binary = "nvidia-docker" def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh" sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh"
deleteDir() deleteDir()
} }
@ -472,10 +472,11 @@ def DeployJVMPackages(args) {
unstash name: 'srcs' unstash name: 'srcs'
if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) { if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
echo 'Deploying to xgboost-maven-repo S3 repo...' echo 'Deploying to xgboost-maven-repo S3 repo...'
def container_type = "jvm"
def docker_binary = "docker"
sh """ sh """
${dockerRun} ${container_type} ${docker_binary} tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} ${dockerRun} jvm docker tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 0
"""
sh """
${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=10.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 1
""" """
} }
deleteDir() deleteDir()

View File

@ -133,15 +133,16 @@ Rpack: clean_all
sed -i -e 's/@BACKTRACE_LIB@//g' xgboost/src/Makevars.win sed -i -e 's/@BACKTRACE_LIB@//g' xgboost/src/Makevars.win
sed -i -e 's/@OPENMP_LIB@//g' xgboost/src/Makevars.win sed -i -e 's/@OPENMP_LIB@//g' xgboost/src/Makevars.win
rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it
bash R-package/remove_warning_suppression_pragma.sh bash xgboost/remove_warning_suppression_pragma.sh
rm xgboost/remove_warning_suppression_pragma.sh rm xgboost/remove_warning_suppression_pragma.sh
rm -rfv xgboost/tests/helper_scripts/
Rbuild: Rpack Rbuild: Rpack
R CMD build --no-build-vignettes xgboost R CMD build --no-build-vignettes xgboost
rm -rf xgboost rm -rf xgboost
Rcheck: Rbuild Rcheck: Rbuild
R CMD check xgboost*.tar.gz R CMD check --as-cran xgboost*.tar.gz
-include build/*.d -include build/*.d
-include build/*/*.d -include build/*/*.d

View File

@ -2,7 +2,7 @@ Package: xgboost
Type: Package Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 1.2.0.1 Version: 1.2.0.1
Date: 2020-02-21 Date: 2020-08-28
Authors@R: c( Authors@R: c(
person("Tianqi", "Chen", role = c("aut"), person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"), email = "tianqi.tchen@gmail.com"),

View File

@ -349,6 +349,7 @@ NULL
#' # Save as a stand-alone file (JSON); load it with xgb.load() #' # Save as a stand-alone file (JSON); load it with xgb.load()
#' xgb.save(bst, 'xgb.model.json') #' xgb.save(bst, 'xgb.model.json')
#' bst2 <- xgb.load('xgb.model.json') #' bst2 <- xgb.load('xgb.model.json')
#' if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
#' #'
#' # Save as a raw byte vector; load it with xgb.load.raw() #' # Save as a raw byte vector; load it with xgb.load.raw()
#' xgb_bytes <- xgb.save.raw(bst) #' xgb_bytes <- xgb.save.raw(bst)
@ -364,6 +365,7 @@ NULL
#' obj2 <- readRDS('my_object.rds') #' obj2 <- readRDS('my_object.rds')
#' # Re-construct xgb.Booster object from the bytes #' # Re-construct xgb.Booster object from the bytes
#' bst2 <- xgb.load.raw(obj2$xgb_model_bytes) #' bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
#' if (file.exists('my_object.rds')) file.remove('my_object.rds')
#' #'
#' @name a-compatibility-note-for-saveRDS-save #' @name a-compatibility-note-for-saveRDS-save
NULL NULL

View File

@ -79,7 +79,7 @@
#' #'
#' All observations are used for both training and validation. #' All observations are used for both training and validation.
#' #'
#' Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation} #' Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29}
#' #'
#' @return #' @return
#' An object of class \code{xgb.cv.synchronous} with the following elements: #' An object of class \code{xgb.cv.synchronous} with the following elements:

View File

@ -130,16 +130,16 @@
#' Note that when using a customized metric, only this single metric can be used. #' Note that when using a customized metric, only this single metric can be used.
#' The following is the list of built-in metrics for which Xgboost provides optimized implementation: #' The following is the list of built-in metrics for which Xgboost provides optimized implementation:
#' \itemize{ #' \itemize{
#' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error} #' \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
#' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood} #' \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
#' \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss} #' \item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
#' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. #' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
#' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances. #' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
#' Different threshold (e.g., 0.) could be specified as "error@0." #' Different threshold (e.g., 0.) could be specified as "error@0."
#' \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. #' \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
#' \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation. #' \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
#' \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation. #' \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
#' \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG} #' \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
#' } #' }
#' #'
#' The following callbacks are automatically created when certain parameters are set: #' The following callbacks are automatically created when certain parameters are set:

View File

@ -43,6 +43,7 @@ bst2 <- xgb.load('xgb.model')
# Save as a stand-alone file (JSON); load it with xgb.load() # Save as a stand-alone file (JSON); load it with xgb.load()
xgb.save(bst, 'xgb.model.json') xgb.save(bst, 'xgb.model.json')
bst2 <- xgb.load('xgb.model.json') bst2 <- xgb.load('xgb.model.json')
if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
# Save as a raw byte vector; load it with xgb.load.raw() # Save as a raw byte vector; load it with xgb.load.raw()
xgb_bytes <- xgb.save.raw(bst) xgb_bytes <- xgb.save.raw(bst)
@ -58,5 +59,6 @@ saveRDS(obj, 'my_object.rds')
obj2 <- readRDS('my_object.rds') obj2 <- readRDS('my_object.rds')
# Re-construct xgb.Booster object from the bytes # Re-construct xgb.Booster object from the bytes
bst2 <- xgb.load.raw(obj2$xgb_model_bytes) bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
if (file.exists('my_object.rds')) file.remove('my_object.rds')
} }

View File

@ -154,7 +154,7 @@ The cross-validation process is then repeated \code{nrounds} times, with each of
All observations are used for both training and validation. All observations are used for both training and validation.
Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation} Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29}
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')

View File

@ -215,16 +215,16 @@ User may set one or several \code{eval_metric} parameters.
Note that when using a customized metric, only this single metric can be used. Note that when using a customized metric, only this single metric can be used.
The following is the list of built-in metrics for which Xgboost provides optimized implementation: The following is the list of built-in metrics for which Xgboost provides optimized implementation:
\itemize{ \itemize{
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error} \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood} \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
\item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss} \item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
\item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
By default, it uses the 0.5 threshold for predicted values to define negative and positive instances. By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
Different threshold (e.g., 0.) could be specified as "error@0." Different threshold (e.g., 0.) could be specified as "error@0."
\item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
\item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation. \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
\item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation. \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
\item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG} \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
} }
The following callbacks are automatically created when certain parameters are set: The following callbacks are automatically created when certain parameters are set:

View File

@ -1,10 +0,0 @@
model_generator_metadata <- function() {
return (list(
kRounds = 2,
kRows = 1000,
kCols = 4,
kForests = 2,
kMaxDepth = 2,
kClasses = 3
))
}

View File

@ -5,7 +5,14 @@ library(Matrix)
source('./generate_models_params.R') source('./generate_models_params.R')
set.seed(0) set.seed(0)
metadata <- model_generator_metadata() metadata <- list(
kRounds = 2,
kRows = 1000,
kCols = 4,
kForests = 2,
kMaxDepth = 2,
kClasses = 3
)
X <- Matrix(data = rnorm(metadata$kRows * metadata$kCols), nrow = metadata$kRows, X <- Matrix(data = rnorm(metadata$kRows * metadata$kCols), nrow = metadata$kRows,
ncol = metadata$kCols, sparse = TRUE) ncol = metadata$kCols, sparse = TRUE)
w <- runif(metadata$kRows) w <- runif(metadata$kRows)

View File

@ -1,10 +1,16 @@
require(xgboost) require(xgboost)
require(jsonlite) require(jsonlite)
source('../generate_models_params.R')
context("Models from previous versions of XGBoost can be loaded") context("Models from previous versions of XGBoost can be loaded")
metadata <- model_generator_metadata() metadata <- list(
kRounds = 2,
kRows = 1000,
kCols = 4,
kForests = 2,
kMaxDepth = 2,
kClasses = 3
)
run_model_param_check <- function (config) { run_model_param_check <- function (config) {
testthat::expect_equal(config$learner$learner_model_param$num_feature, '4') testthat::expect_equal(config$learner$learner_model_param$num_feature, '4')

View File

@ -57,7 +57,7 @@ To answer the question above we will convert *categorical* variables to `numeric
In this Vignette we will see how to transform a *dense* `data.frame` (*dense* = few zeroes in the matrix) with *categorical* variables to a very *sparse* matrix (*sparse* = lots of zero in the matrix) of `numeric` features. In this Vignette we will see how to transform a *dense* `data.frame` (*dense* = few zeroes in the matrix) with *categorical* variables to a very *sparse* matrix (*sparse* = lots of zero in the matrix) of `numeric` features.
The method we are going to see is usually called [one-hot encoding](http://en.wikipedia.org/wiki/One-hot). The method we are going to see is usually called [one-hot encoding](https://en.wikipedia.org/wiki/One-hot).
The first step is to load `Arthritis` dataset in memory and wrap it with `data.table` package. The first step is to load `Arthritis` dataset in memory and wrap it with `data.table` package.
@ -66,7 +66,7 @@ data(Arthritis)
df <- data.table(Arthritis, keep.rownames = FALSE) df <- data.table(Arthritis, keep.rownames = FALSE)
``` ```
> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`. > `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](https://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.
The first thing we want to do is to have a look to the first few lines of the `data.table`: The first thing we want to do is to have a look to the first few lines of the `data.table`:
@ -137,8 +137,8 @@ levels(df[,Treatment])
#### Encoding categorical features #### Encoding categorical features
Next step, we will transform the categorical data to dummy variables. Next step, we will transform the categorical data to dummy variables.
Several encoding methods exist, e.g., [one-hot encoding](http://en.wikipedia.org/wiki/One-hot) is a common approach. Several encoding methods exist, e.g., [one-hot encoding](https://en.wikipedia.org/wiki/One-hot) is a common approach.
We will use the [dummy contrast coding](http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm#dummy) which is popular because it produces "full rank" encoding (also see [this blog post by Max Kuhn](http://appliedpredictivemodeling.com/blog/2013/10/23/the-basics-of-encoding-categorical-data-for-predictive-models)). We will use the [dummy contrast coding](https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/) which is popular because it produces "full rank" encoding (also see [this blog post by Max Kuhn](http://appliedpredictivemodeling.com/blog/2013/10/23/the-basics-of-encoding-categorical-data-for-predictive-models)).
The purpose is to transform each value of each *categorical* feature into a *binary* feature `{0, 1}`. The purpose is to transform each value of each *categorical* feature into a *binary* feature `{0, 1}`.
@ -176,7 +176,7 @@ bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,
You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better. You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better.
A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future). A model which fits too well may [overfit](https://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future).
> Here you can see the numbers decrease until line 7 and then increase. > Here you can see the numbers decrease until line 7 and then increase.
> >
@ -304,7 +304,7 @@ Linear model may not be that smart in this scenario.
Special Note: What about Random Forests™? Special Note: What about Random Forests™?
----------------------------------------- -----------------------------------------
As you may know, [Random Forests™](http://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](http://en.wikipedia.org/wiki/Ensemble_learning) family. As you may know, [Random Forests™](https://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](https://en.wikipedia.org/wiki/Ensemble_learning) family.
Both trains several decision trees for one dataset. The *main* difference is that in Random Forests™, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`). Both trains several decision trees for one dataset. The *main* difference is that in Random Forests™, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`).

View File

@ -24,7 +24,7 @@
author = "K. Bache and M. Lichman", author = "K. Bache and M. Lichman",
year = "2013", year = "2013",
title = "{UCI} Machine Learning Repository", title = "{UCI} Machine Learning Repository",
url = "http://archive.ics.uci.edu/ml", url = "http://archive.ics.uci.edu/ml/",
institution = "University of California, Irvine, School of Information and Computer Sciences" institution = "University of California, Irvine, School of Information and Computer Sciences"
} }

View File

@ -68,7 +68,7 @@ The version 0.4-2 is on CRAN, and you can install it by:
install.packages("xgboost") install.packages("xgboost")
``` ```
Formerly available versions can be obtained from the CRAN [archive](https://cran.r-project.org/src/contrib/Archive/xgboost) Formerly available versions can be obtained from the CRAN [archive](https://cran.r-project.org/src/contrib/Archive/xgboost/)
## Learning ## Learning

View File

@ -1 +1 @@
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-SNAPSHOT @xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@

View File

@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MAJOR 1
#define XGBOOST_VER_MINOR 2 #define XGBOOST_VER_MINOR 2
#define XGBOOST_VER_PATCH 0 #define XGBOOST_VER_PATCH 1
#endif // XGBOOST_VERSION_CONFIG_H_ #endif // XGBOOST_VERSION_CONFIG_H_

View File

@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>XGBoost JVM Package</name> <name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description> <description>JVM Package for XGBoost</description>

View File

@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
</parent> </parent>
<artifactId>xgboost4j-example_2.12</artifactId> <artifactId>xgboost4j-example_2.12</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<build> <build>
<plugins> <plugins>
@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
@ -37,7 +37,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
</parent> </parent>
<artifactId>xgboost4j-flink_2.12</artifactId> <artifactId>xgboost4j-flink_2.12</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
</parent> </parent>
<artifactId>xgboost4j-spark_2.12</artifactId> <artifactId>xgboost4j-spark_2.12</artifactId>
<build> <build>
@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
</parent> </parent>
<artifactId>xgboost4j_2.12</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>1.2.0-SNAPSHOT</version> <version>1.2.1</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@ -1 +1 @@
1.2.0-SNAPSHOT 1.2.1

View File

@ -40,7 +40,7 @@ class EarlyStopException(Exception):
""" """
def __init__(self, best_iteration): def __init__(self, best_iteration):
super(EarlyStopException, self).__init__() super().__init__()
self.best_iteration = best_iteration self.best_iteration = best_iteration

View File

@ -738,7 +738,8 @@ async def _predict_async(client: Client, model, data, *args,
predt = booster.predict(data=local_x, predt = booster.predict(data=local_x,
validate_features=local_x.num_row() != 0, validate_features=local_x.num_row() != 0,
*args) *args)
ret = (delayed(predt), order) columns = 1 if len(predt.shape) == 1 else predt.shape[1]
ret = ((delayed(predt), columns), order)
predictions.append(ret) predictions.append(ret)
return predictions return predictions
@ -775,7 +776,9 @@ async def _predict_async(client: Client, model, data, *args,
# See https://docs.dask.org/en/latest/array-creation.html # See https://docs.dask.org/en/latest/array-creation.html
arrays = [] arrays = []
for i, shape in enumerate(shapes): for i, shape in enumerate(shapes):
arrays.append(da.from_delayed(results[i], shape=(shape[0], ), arrays.append(da.from_delayed(
results[i][0], shape=(shape[0],)
if results[i][1] == 1 else (shape[0], results[i][1]),
dtype=numpy.float32)) dtype=numpy.float32))
predictions = await da.concatenate(arrays, axis=0) predictions = await da.concatenate(arrays, axis=0)
return predictions return predictions
@ -978,6 +981,7 @@ class DaskScikitLearnBase(XGBModel):
def client(self, clt): def client(self, clt):
self._client = clt self._client = clt
@xgboost_model_doc("""Implementation of the Scikit-Learn API for XGBoost.""", @xgboost_model_doc("""Implementation of the Scikit-Learn API for XGBoost.""",
['estimators', 'model']) ['estimators', 'model'])
class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase): class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
@ -1032,9 +1036,6 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
['estimators', 'model'] ['estimators', 'model']
) )
class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase): class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
# pylint: disable=missing-docstring
_client = None
async def _fit_async(self, X, y, async def _fit_async(self, X, y,
sample_weights=None, sample_weights=None,
eval_set=None, eval_set=None,
@ -1078,13 +1079,34 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
return self.client.sync(self._fit_async, X, y, sample_weights, return self.client.sync(self._fit_async, X, y, sample_weights,
eval_set, sample_weight_eval_set, verbose) eval_set, sample_weight_eval_set, verbose)
async def _predict_async(self, data): async def _predict_proba_async(self, data):
_assert_dask_support()
test_dmatrix = await DaskDMatrix(client=self.client, data=data, test_dmatrix = await DaskDMatrix(client=self.client, data=data,
missing=self.missing) missing=self.missing)
pred_probs = await predict(client=self.client, pred_probs = await predict(client=self.client,
model=self.get_booster(), data=test_dmatrix) model=self.get_booster(), data=test_dmatrix)
return pred_probs return pred_probs
def predict_proba(self, data): # pylint: disable=arguments-differ,missing-docstring
_assert_dask_support()
return self.client.sync(self._predict_proba_async, data)
async def _predict_async(self, data):
_assert_dask_support()
test_dmatrix = await DaskDMatrix(client=self.client, data=data,
missing=self.missing)
pred_probs = await predict(client=self.client,
model=self.get_booster(), data=test_dmatrix)
if self.n_classes_ == 2:
preds = (pred_probs > 0.5).astype(int)
else:
preds = da.argmax(pred_probs, axis=1)
return preds
def predict(self, data): # pylint: disable=arguments-differ def predict(self, data): # pylint: disable=arguments-differ
_assert_dask_support() _assert_dask_support()
return self.client.sync(self._predict_async, data) return self.client.sync(self._predict_async, data)

View File

@ -77,7 +77,7 @@ __model_doc = '''
gamma : float gamma : float
Minimum loss reduction required to make a further partition on a leaf Minimum loss reduction required to make a further partition on a leaf
node of the tree. node of the tree.
min_child_weight : int min_child_weight : float
Minimum sum of instance weight(hessian) needed in a child. Minimum sum of instance weight(hessian) needed in a child.
max_delta_step : int max_delta_step : int
Maximum delta step we allow each tree's weight estimation to be. Maximum delta step we allow each tree's weight estimation to be.
@ -750,7 +750,10 @@ class XGBModel(XGBModelBase):
@xgboost_model_doc( @xgboost_model_doc(
"Implementation of the scikit-learn API for XGBoost classification.", "Implementation of the scikit-learn API for XGBoost classification.",
['model', 'objective']) ['model', 'objective'], extra_parameters='''
n_estimators : int
Number of boosting rounds.
''')
class XGBClassifier(XGBModel, XGBClassifierBase): class XGBClassifier(XGBModel, XGBClassifierBase):
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
def __init__(self, objective="binary:logistic", **kwargs): def __init__(self, objective="binary:logistic", **kwargs):
@ -1014,7 +1017,7 @@ class XGBRFClassifier(XGBClassifier):
**kwargs) **kwargs)
def get_xgb_params(self): def get_xgb_params(self):
params = super(XGBRFClassifier, self).get_xgb_params() params = super().get_xgb_params()
params['num_parallel_tree'] = self.n_estimators params['num_parallel_tree'] = self.n_estimators
return params return params
@ -1033,7 +1036,10 @@ class XGBRegressor(XGBModel, XGBRegressorBase):
@xgboost_model_doc( @xgboost_model_doc(
"scikit-learn API for XGBoost random forest regression.", "scikit-learn API for XGBoost random forest regression.",
['model', 'objective']) ['model', 'objective'], extra_parameters='''
n_estimators : int
Number of trees in random forest to fit.
''')
class XGBRFRegressor(XGBRegressor): class XGBRFRegressor(XGBRegressor):
# pylint: disable=missing-docstring # pylint: disable=missing-docstring
def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8, def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
@ -1043,7 +1049,7 @@ class XGBRFRegressor(XGBRegressor):
reg_lambda=reg_lambda, **kwargs) reg_lambda=reg_lambda, **kwargs)
def get_xgb_params(self): def get_xgb_params(self):
params = super(XGBRFRegressor, self).get_xgb_params() params = super().get_xgb_params()
params['num_parallel_tree'] = self.n_estimators params['num_parallel_tree'] = self.n_estimators
return params return params

View File

@ -1,6 +1,8 @@
/*! /*!
* Copyright 2019 by Contributors * Copyright 2019-2020 by Contributors
*/ */
#include <utility>
#include "xgboost/json.h" #include "xgboost/json.h"
#include "xgboost/logging.h" #include "xgboost/logging.h"
#include "gbtree_model.h" #include "gbtree_model.h"
@ -41,15 +43,14 @@ void GBTreeModel::SaveModel(Json* p_out) const {
auto& out = *p_out; auto& out = *p_out;
CHECK_EQ(param.num_trees, static_cast<int>(trees.size())); CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
out["gbtree_model_param"] = ToJson(param); out["gbtree_model_param"] = ToJson(param);
std::vector<Json> trees_json; std::vector<Json> trees_json(trees.size());
size_t t = 0;
for (auto const& tree : trees) { for (size_t t = 0; t < trees.size(); ++t) {
auto const& tree = trees[t];
Json tree_json{Object()}; Json tree_json{Object()};
tree->SaveModel(&tree_json); tree->SaveModel(&tree_json);
// The field is not used in XGBoost, but might be useful for external project. tree_json["id"] = Integer(static_cast<Integer::Int>(t));
tree_json["id"] = Integer(t); trees_json[t] = std::move(tree_json);
trees_json.emplace_back(tree_json);
t++;
} }
std::vector<Json> tree_info_json(tree_info.size()); std::vector<Json> tree_info_json(tree_info.size());
@ -70,9 +71,10 @@ void GBTreeModel::LoadModel(Json const& in) {
auto const& trees_json = get<Array const>(in["trees"]); auto const& trees_json = get<Array const>(in["trees"]);
trees.resize(trees_json.size()); trees.resize(trees_json.size());
for (size_t t = 0; t < trees.size(); ++t) { for (size_t t = 0; t < trees_json.size(); ++t) { // NOLINT
trees[t].reset( new RegTree() ); auto tree_id = get<Integer>(trees_json[t]["id"]);
trees[t]->LoadModel(trees_json[t]); trees.at(tree_id).reset(new RegTree());
trees.at(tree_id)->LoadModel(trees_json[t]);
} }
tree_info.resize(param.num_trees); tree_info.resize(param.num_trees);

View File

@ -1,5 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04
ARG CUDA_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive

View File

@ -1,5 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-runtime-ubuntu16.04 FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04
ARG CUDA_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
@ -17,8 +18,8 @@ ENV PATH=/opt/python/bin:$PATH
# Create new Conda environment with cuDF, Dask, and cuPy # Create new Conda environment with cuDF, Dask, and cuPy
RUN \ RUN \
conda create -n gpu_test -c rapidsai -c nvidia -c conda-forge -c defaults \ conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.7 cudf=0.14 cudatoolkit=$CUDA_VERSION dask dask-cuda dask-cudf cupy \ python=3.7 cudf=0.15* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda dask-cudf cupy \
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis
ENV GOSU_VERSION 1.10 ENV GOSU_VERSION 1.10

View File

@ -1,6 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu16.04 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu16.04
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
@ -19,7 +19,7 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \ RUN \
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.7.5-1 && \ export NCCL_VERSION=2.7.5-1 && \
apt-get update && \ apt-get update && \
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}

View File

@ -1,6 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-devel-centos6 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos6
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
@ -33,7 +33,7 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \ RUN \
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.4.8-1 && \ export NCCL_VERSION=2.4.8-1 && \
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \

View File

@ -1,5 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-runtime-ubuntu16.04 FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04
ARG CUDA_VERSION_ARG
ARG JDK_VERSION=8 ARG JDK_VERSION=8
ARG SPARK_VERSION=3.0.0 ARG SPARK_VERSION=3.0.0

View File

@ -1,6 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-devel-centos6 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos6
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
@ -30,7 +30,7 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \ RUN \
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.4.8-1 && \ export NCCL_VERSION=2.4.8-1 && \
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \

View File

@ -3,22 +3,32 @@
set -e set -e
set -x set -x
if [ $# -ne 1 ]; then if [ $# -ne 2 ]; then
echo "Usage: $0 [spark version]" echo "Usage: $0 [spark version] [build_gpu? 0 or 1]"
exit 1 exit 1
fi fi
spark_version=$1 spark_version=$1
build_gpu=$2
# Initialize local Maven repository # Initialize local Maven repository
./tests/ci_build/initialize_maven.sh ./tests/ci_build/initialize_maven.sh
rm -rf build/
cd jvm-packages cd jvm-packages
rm -rf $(find . -name target)
rm -rf ../build/
# Re-build package without Mock Rabit # Re-build package without Mock Rabit
# Deploy to S3 bucket xgboost-maven-repo # Deploy to S3 bucket xgboost-maven-repo
mvn --no-transfer-progress package deploy -P release-to-s3 -Dspark.version=${spark_version} -DskipTests if [[ "$build_gpu" == "0" ]]
then
# Build CPU artifact
mvn --no-transfer-progress package deploy -P release-to-s3 -Dspark.version=${spark_version} -DskipTests
else
# Build GPU artifact
sed -i -e 's/<artifactId>xgboost\(.*\)_\(.*\)<\/artifactId>/<artifactId>xgboost\1-gpu_\2<\/artifactId>/' $(find . -name pom.xml)
mvn --no-transfer-progress package deploy -Duse.cuda=ON -P release-to-s3 -Dspark.version=${spark_version} -DskipTests
fi
set +x set +x
set +e set +e

View File

@ -148,7 +148,16 @@ TEST(Learner, JsonModelIO) {
Json out { Object() }; Json out { Object() };
learner->SaveModel(&out); learner->SaveModel(&out);
learner->LoadModel(out); dmlc::TemporaryDirectory tmpdir;
std::ofstream fout (tmpdir.path + "/model.json");
fout << out;
fout.close();
auto loaded_str = common::LoadSequentialFile(tmpdir.path + "/model.json");
Json loaded = Json::Load(StringView{loaded_str.c_str(), loaded_str.size()});
learner->LoadModel(loaded);
learner->Configure(); learner->Configure();
Json new_in { Object() }; Json new_in { Object() };

View File

@ -121,6 +121,8 @@ eval[test] = {data_path}
v = xgboost.__version__ v = xgboost.__version__
if v.find('SNAPSHOT') != -1: if v.find('SNAPSHOT') != -1:
assert msg.split(':')[1].strip() == v.split('-')[0] assert msg.split(':')[1].strip() == v.split('-')[0]
elif v.find('rc') != -1:
assert msg.split(':')[1].strip() == v.split('rc')[0]
else: else:
assert msg.split(':')[1].strip() == v assert msg.split(':')[1].strip() == v

View File

@ -5,6 +5,7 @@ import sys
import numpy as np import numpy as np
import json import json
import asyncio import asyncio
from sklearn.datasets import make_classification
if sys.platform.startswith("win"): if sys.platform.startswith("win"):
pytest.skip("Skipping dask tests on Windows", allow_module_level=True) pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
@ -36,7 +37,7 @@ def generate_array():
def test_from_dask_dataframe(): def test_from_dask_dataframe():
with LocalCluster(n_workers=5) as cluster: with LocalCluster(n_workers=kWorkers) as cluster:
with Client(cluster) as client: with Client(cluster) as client:
X, y = generate_array() X, y = generate_array()
@ -74,7 +75,7 @@ def test_from_dask_dataframe():
def test_from_dask_array(): def test_from_dask_array():
with LocalCluster(n_workers=5, threads_per_worker=5) as cluster: with LocalCluster(n_workers=kWorkers, threads_per_worker=5) as cluster:
with Client(cluster) as client: with Client(cluster) as client:
X, y = generate_array() X, y = generate_array()
dtrain = DaskDMatrix(client, X, y) dtrain = DaskDMatrix(client, X, y)
@ -104,8 +105,28 @@ def test_from_dask_array():
assert np.all(single_node_predt == from_arr.compute()) assert np.all(single_node_predt == from_arr.compute())
def test_dask_predict_shape_infer():
with LocalCluster(n_workers=kWorkers) as cluster:
with Client(cluster) as client:
X, y = make_classification(n_samples=1000, n_informative=5,
n_classes=3)
X_ = dd.from_array(X, chunksize=100)
y_ = dd.from_array(y, chunksize=100)
dtrain = xgb.dask.DaskDMatrix(client, data=X_, label=y_)
model = xgb.dask.train(
client,
{"objective": "multi:softprob", "num_class": 3},
dtrain=dtrain
)
preds = xgb.dask.predict(client, model, dtrain)
assert preds.shape[0] == preds.compute().shape[0]
assert preds.shape[1] == preds.compute().shape[1]
def test_dask_missing_value_reg(): def test_dask_missing_value_reg():
with LocalCluster(n_workers=5) as cluster: with LocalCluster(n_workers=kWorkers) as cluster:
with Client(cluster) as client: with Client(cluster) as client:
X_0 = np.ones((20 // 2, kCols)) X_0 = np.ones((20 // 2, kCols))
X_1 = np.zeros((20 // 2, kCols)) X_1 = np.zeros((20 // 2, kCols))
@ -144,19 +165,19 @@ def test_dask_missing_value_cls():
missing=0.0) missing=0.0)
cls.client = client cls.client = client
cls.fit(X, y, eval_set=[(X, y)]) cls.fit(X, y, eval_set=[(X, y)])
dd_predt = cls.predict(X).compute() dd_pred_proba = cls.predict_proba(X).compute()
np_X = X.compute() np_X = X.compute()
np_predt = cls.get_booster().predict( np_pred_proba = cls.get_booster().predict(
xgb.DMatrix(np_X, missing=0.0)) xgb.DMatrix(np_X, missing=0.0))
np.testing.assert_allclose(np_predt, dd_predt) np.testing.assert_allclose(np_pred_proba, dd_pred_proba)
cls = xgb.dask.DaskXGBClassifier() cls = xgb.dask.DaskXGBClassifier()
assert hasattr(cls, 'missing') assert hasattr(cls, 'missing')
def test_dask_regressor(): def test_dask_regressor():
with LocalCluster(n_workers=5) as cluster: with LocalCluster(n_workers=kWorkers) as cluster:
with Client(cluster) as client: with Client(cluster) as client:
X, y = generate_array() X, y = generate_array()
regressor = xgb.dask.DaskXGBRegressor(verbosity=1, n_estimators=2) regressor = xgb.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
@ -178,7 +199,7 @@ def test_dask_regressor():
def test_dask_classifier(): def test_dask_classifier():
with LocalCluster(n_workers=5) as cluster: with LocalCluster(n_workers=kWorkers) as cluster:
with Client(cluster) as client: with Client(cluster) as client:
X, y = generate_array() X, y = generate_array()
y = (y * 10).astype(np.int32) y = (y * 10).astype(np.int32)
@ -201,7 +222,18 @@ def test_dask_classifier():
assert len(list(history['validation_0'])) == 1 assert len(list(history['validation_0'])) == 1
assert len(history['validation_0']['merror']) == 2 assert len(history['validation_0']['merror']) == 2
# Test .predict_proba()
probas = classifier.predict_proba(X)
assert classifier.n_classes_ == 10 assert classifier.n_classes_ == 10
assert probas.ndim == 2
assert probas.shape[0] == kRows
assert probas.shape[1] == 10
cls_booster = classifier.get_booster()
single_node_proba = cls_booster.inplace_predict(X.compute())
np.testing.assert_allclose(single_node_proba,
probas.compute())
# Test with dataframe. # Test with dataframe.
X_d = dd.from_dask_array(X) X_d = dd.from_dask_array(X)
@ -218,7 +250,7 @@ def test_dask_classifier():
@pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_sklearn())
def test_sklearn_grid_search(): def test_sklearn_grid_search():
from sklearn.model_selection import GridSearchCV from sklearn.model_selection import GridSearchCV
with LocalCluster(n_workers=4) as cluster: with LocalCluster(n_workers=kWorkers) as cluster:
with Client(cluster) as client: with Client(cluster) as client:
X, y = generate_array() X, y = generate_array()
reg = xgb.dask.DaskXGBRegressor(learning_rate=0.1, reg = xgb.dask.DaskXGBRegressor(learning_rate=0.1,
@ -292,7 +324,9 @@ def run_empty_dmatrix_cls(client, parameters):
evals=[(dtrain, 'validation')], evals=[(dtrain, 'validation')],
num_boost_round=2) num_boost_round=2)
predictions = xgb.dask.predict(client=client, model=out, predictions = xgb.dask.predict(client=client, model=out,
data=dtrain).compute() data=dtrain)
assert predictions.shape[1] == n_classes
predictions = predictions.compute()
_check_outputs(out, predictions) _check_outputs(out, predictions)
# train has more rows than evals # train has more rows than evals
@ -315,7 +349,7 @@ def run_empty_dmatrix_cls(client, parameters):
# environment and Exact doesn't support it. # environment and Exact doesn't support it.
def test_empty_dmatrix_hist(): def test_empty_dmatrix_hist():
with LocalCluster(n_workers=5) as cluster: with LocalCluster(n_workers=kWorkers) as cluster:
with Client(cluster) as client: with Client(cluster) as client:
parameters = {'tree_method': 'hist'} parameters = {'tree_method': 'hist'}
run_empty_dmatrix_reg(client, parameters) run_empty_dmatrix_reg(client, parameters)
@ -323,7 +357,7 @@ def test_empty_dmatrix_hist():
def test_empty_dmatrix_approx(): def test_empty_dmatrix_approx():
with LocalCluster(n_workers=5) as cluster: with LocalCluster(n_workers=kWorkers) as cluster:
with Client(cluster) as client: with Client(cluster) as client:
parameters = {'tree_method': 'approx'} parameters = {'tree_method': 'approx'}
run_empty_dmatrix_reg(client, parameters) run_empty_dmatrix_reg(client, parameters)
@ -397,7 +431,13 @@ async def run_dask_classifier_asyncio(scheduler_address):
assert len(list(history['validation_0'])) == 1 assert len(list(history['validation_0'])) == 1
assert len(history['validation_0']['merror']) == 2 assert len(history['validation_0']['merror']) == 2
# Test .predict_proba()
probas = await classifier.predict_proba(X)
assert classifier.n_classes_ == 10 assert classifier.n_classes_ == 10
assert probas.ndim == 2
assert probas.shape[0] == kRows
assert probas.shape[1] == 10
# Test with dataframe. # Test with dataframe.
X_d = dd.from_dask_array(X) X_d = dd.from_dask_array(X)