Compare commits

...

7 Commits

Author SHA1 Message Date
Nan Zhu
00774eeac3 [jvm-packages] update version number for 1.2 branch (#6427)
* [jvm-packages]update version number of 1.2 branch

* update ver
2020-11-23 14:16:30 -08:00
Philip Hyunsu Cho
bcb15a980f 1.2.1 patch release (#6206)
* Hide C++ symbols from dmlc-core (#6188)

* Up version to 1.2.1

* Fix lint

* [CI] Fix Docker build for CUDA 11 (#6202)

* Update Dockerfile.gpu
2020-10-12 15:10:16 -07:00
Tong He
0cd0dad0b5 Fix CRAN submission (#6076) 2020-09-01 23:38:27 -07:00
Philip Hyunsu Cho
884098ec22 [CI] Fix CRAN check (#6067) 2020-08-28 21:24:49 +08:00
Hyunsu Cho
738786680b Release 1.2.0 2020-08-22 18:25:18 -07:00
Philip Hyunsu Cho
04232c01b2 [CI] Fix broken tests (#6048) 2020-08-22 11:43:38 -07:00
Jiaming Yuan
0353a78ab7 Fix scikit learn cls doc. (#6041) 2020-08-20 19:25:12 -07:00
34 changed files with 107 additions and 91 deletions

View File

@@ -81,7 +81,7 @@ jobs:
run: | run: |
cd R-package cd R-package
R.exe CMD INSTALL . R.exe CMD INSTALL .
Rscript.exe tests/run_lint.R Rscript.exe tests/helper_scripts/run_lint.R
test-with-R: test-with-R:

View File

@@ -1,9 +1,10 @@
cmake_minimum_required(VERSION 3.13) cmake_minimum_required(VERSION 3.13)
project(xgboost LANGUAGES CXX C VERSION 1.2.0) project(xgboost LANGUAGES CXX C VERSION 1.2.1)
include(cmake/Utils.cmake) include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0022 NEW)
cmake_policy(SET CMP0079 NEW) cmake_policy(SET CMP0079 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
cmake_policy(SET CMP0063 NEW) cmake_policy(SET CMP0063 NEW)
if ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13)) if ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
@@ -173,9 +174,6 @@ foreach(lib rabit rabit_base rabit_empty rabit_mock rabit_mock_static)
# from dmlc is correctly applied to rabit. # from dmlc is correctly applied to rabit.
if (TARGET ${lib}) if (TARGET ${lib})
target_link_libraries(${lib} dmlc ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${lib} dmlc ${CMAKE_THREAD_LIBS_INIT})
if (HIDE_CXX_SYMBOLS) # Hide all C++ symbols from Rabit
set_target_properties(${lib} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endif (HIDE_CXX_SYMBOLS)
if (ENABLE_ALL_WARNINGS) if (ENABLE_ALL_WARNINGS)
target_compile_options(${lib} PRIVATE -Wall -Wextra) target_compile_options(${lib} PRIVATE -Wall -Wextra)
endif (ENABLE_ALL_WARNINGS) endif (ENABLE_ALL_WARNINGS)
@@ -204,8 +202,9 @@ endif (USE_NVTX)
#-- Hide all C++ symbols #-- Hide all C++ symbols
if (HIDE_CXX_SYMBOLS) if (HIDE_CXX_SYMBOLS)
set_target_properties(objxgboost PROPERTIES CXX_VISIBILITY_PRESET hidden) foreach(target objxgboost xgboost dmlc rabit rabit_mock_static)
set_target_properties(xgboost PROPERTIES CXX_VISIBILITY_PRESET hidden) set_target_properties(${target} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endforeach()
endif (HIDE_CXX_SYMBOLS) endif (HIDE_CXX_SYMBOLS)
target_include_directories(xgboost target_include_directories(xgboost

16
Jenkinsfile vendored
View File

@@ -92,7 +92,7 @@ pipeline {
'test-python-gpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2') }, 'test-python-gpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2') },
'test-python-gpu-cuda11.0-cross': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '11.0') }, 'test-python-gpu-cuda11.0-cross': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '11.0') },
'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') }, 'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
'test-python-mgpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2', multi_gpu: true) }, 'test-python-mgpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.2', multi_gpu: true) },
'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') }, 'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') },
'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') }, 'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') }, 'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') },
@@ -144,7 +144,7 @@ def ClangTidy() {
echo "Running clang-tidy job..." echo "Running clang-tidy job..."
def container_type = "clang_tidy" def container_type = "clang_tidy"
def docker_binary = "docker" def docker_binary = "docker"
def dockerArgs = "--build-arg CUDA_VERSION=10.1" def dockerArgs = "--build-arg CUDA_VERSION_ARG=10.1"
sh """ sh """
${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py ${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py
""" """
@@ -261,7 +261,7 @@ def BuildCUDA(args) {
echo "Build with CUDA ${args.cuda_version}" echo "Build with CUDA ${args.cuda_version}"
def container_type = GetCUDABuildContainerType(args.cuda_version) def container_type = GetCUDABuildContainerType(args.cuda_version)
def docker_binary = "docker" def docker_binary = "docker"
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" def docker_args = "--build-arg CUDA_VERSION_ARG=${args.cuda_version}"
def arch_flag = "" def arch_flag = ""
if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) { if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
arch_flag = "-DGPU_COMPUTE_VER=75" arch_flag = "-DGPU_COMPUTE_VER=75"
@@ -290,7 +290,7 @@ def BuildJVMPackagesWithCUDA(args) {
echo "Build XGBoost4J-Spark with Spark ${args.spark_version}, CUDA ${args.cuda_version}" echo "Build XGBoost4J-Spark with Spark ${args.spark_version}, CUDA ${args.cuda_version}"
def container_type = "jvm_gpu_build" def container_type = "jvm_gpu_build"
def docker_binary = "nvidia-docker" def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}" def docker_args = "--build-arg CUDA_VERSION_ARG=${args.cuda_version}"
def arch_flag = "" def arch_flag = ""
if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) { if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
arch_flag = "-DGPU_COMPUTE_VER=75" arch_flag = "-DGPU_COMPUTE_VER=75"
@@ -365,7 +365,7 @@ def TestPythonGPU(args) {
echo "Test Python GPU: CUDA ${args.host_cuda_version}" echo "Test Python GPU: CUDA ${args.host_cuda_version}"
def container_type = "gpu" def container_type = "gpu"
def docker_binary = "nvidia-docker" def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
if (args.multi_gpu) { if (args.multi_gpu) {
echo "Using multiple GPUs" echo "Using multiple GPUs"
// Allocate extra space in /dev/shm to enable NCCL // Allocate extra space in /dev/shm to enable NCCL
@@ -406,7 +406,7 @@ def TestCppGPU(args) {
echo "Test C++, CUDA ${args.host_cuda_version}" echo "Test C++, CUDA ${args.host_cuda_version}"
def container_type = "gpu" def container_type = "gpu"
def docker_binary = "nvidia-docker" def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost" sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost"
deleteDir() deleteDir()
} }
@@ -424,7 +424,7 @@ def CrossTestJVMwithJDKGPU(args) {
} }
def container_type = "gpu_jvm" def container_type = "gpu_jvm"
def docker_binary = "nvidia-docker" def docker_binary = "nvidia-docker"
def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}" def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh" sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh"
deleteDir() deleteDir()
} }
@@ -476,7 +476,7 @@ def DeployJVMPackages(args) {
${dockerRun} jvm docker tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 0 ${dockerRun} jvm docker tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 0
""" """
sh """ sh """
${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION=10.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 1 ${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=10.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 1
""" """
} }
deleteDir() deleteDir()

View File

@@ -133,15 +133,16 @@ Rpack: clean_all
sed -i -e 's/@BACKTRACE_LIB@//g' xgboost/src/Makevars.win sed -i -e 's/@BACKTRACE_LIB@//g' xgboost/src/Makevars.win
sed -i -e 's/@OPENMP_LIB@//g' xgboost/src/Makevars.win sed -i -e 's/@OPENMP_LIB@//g' xgboost/src/Makevars.win
rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it
bash R-package/remove_warning_suppression_pragma.sh bash xgboost/remove_warning_suppression_pragma.sh
rm xgboost/remove_warning_suppression_pragma.sh rm xgboost/remove_warning_suppression_pragma.sh
rm -rfv xgboost/tests/helper_scripts/
Rbuild: Rpack Rbuild: Rpack
R CMD build --no-build-vignettes xgboost R CMD build --no-build-vignettes xgboost
rm -rf xgboost rm -rf xgboost
Rcheck: Rbuild Rcheck: Rbuild
R CMD check xgboost*.tar.gz R CMD check --as-cran xgboost*.tar.gz
-include build/*.d -include build/*.d
-include build/*/*.d -include build/*/*.d

View File

@@ -2,7 +2,7 @@ Package: xgboost
Type: Package Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 1.2.0.1 Version: 1.2.0.1
Date: 2020-02-21 Date: 2020-08-28
Authors@R: c( Authors@R: c(
person("Tianqi", "Chen", role = c("aut"), person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"), email = "tianqi.tchen@gmail.com"),

View File

@@ -349,6 +349,7 @@ NULL
#' # Save as a stand-alone file (JSON); load it with xgb.load() #' # Save as a stand-alone file (JSON); load it with xgb.load()
#' xgb.save(bst, 'xgb.model.json') #' xgb.save(bst, 'xgb.model.json')
#' bst2 <- xgb.load('xgb.model.json') #' bst2 <- xgb.load('xgb.model.json')
#' if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
#' #'
#' # Save as a raw byte vector; load it with xgb.load.raw() #' # Save as a raw byte vector; load it with xgb.load.raw()
#' xgb_bytes <- xgb.save.raw(bst) #' xgb_bytes <- xgb.save.raw(bst)
@@ -364,6 +365,7 @@ NULL
#' obj2 <- readRDS('my_object.rds') #' obj2 <- readRDS('my_object.rds')
#' # Re-construct xgb.Booster object from the bytes #' # Re-construct xgb.Booster object from the bytes
#' bst2 <- xgb.load.raw(obj2$xgb_model_bytes) #' bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
#' if (file.exists('my_object.rds')) file.remove('my_object.rds')
#' #'
#' @name a-compatibility-note-for-saveRDS-save #' @name a-compatibility-note-for-saveRDS-save
NULL NULL

View File

@@ -79,7 +79,7 @@
#' #'
#' All observations are used for both training and validation. #' All observations are used for both training and validation.
#' #'
#' Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation} #' Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29}
#' #'
#' @return #' @return
#' An object of class \code{xgb.cv.synchronous} with the following elements: #' An object of class \code{xgb.cv.synchronous} with the following elements:

View File

@@ -130,16 +130,16 @@
#' Note that when using a customized metric, only this single metric can be used. #' Note that when using a customized metric, only this single metric can be used.
#' The following is the list of built-in metrics for which Xgboost provides optimized implementation: #' The following is the list of built-in metrics for which Xgboost provides optimized implementation:
#' \itemize{ #' \itemize{
#' \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error} #' \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
#' \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood} #' \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
#' \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss} #' \item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
#' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. #' \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
#' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances. #' By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
#' Different threshold (e.g., 0.) could be specified as "error@0." #' Different threshold (e.g., 0.) could be specified as "error@0."
#' \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. #' \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
#' \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation. #' \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
#' \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation. #' \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
#' \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG} #' \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
#' } #' }
#' #'
#' The following callbacks are automatically created when certain parameters are set: #' The following callbacks are automatically created when certain parameters are set:

View File

@@ -43,6 +43,7 @@ bst2 <- xgb.load('xgb.model')
# Save as a stand-alone file (JSON); load it with xgb.load() # Save as a stand-alone file (JSON); load it with xgb.load()
xgb.save(bst, 'xgb.model.json') xgb.save(bst, 'xgb.model.json')
bst2 <- xgb.load('xgb.model.json') bst2 <- xgb.load('xgb.model.json')
if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
# Save as a raw byte vector; load it with xgb.load.raw() # Save as a raw byte vector; load it with xgb.load.raw()
xgb_bytes <- xgb.save.raw(bst) xgb_bytes <- xgb.save.raw(bst)
@@ -58,5 +59,6 @@ saveRDS(obj, 'my_object.rds')
obj2 <- readRDS('my_object.rds') obj2 <- readRDS('my_object.rds')
# Re-construct xgb.Booster object from the bytes # Re-construct xgb.Booster object from the bytes
bst2 <- xgb.load.raw(obj2$xgb_model_bytes) bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
if (file.exists('my_object.rds')) file.remove('my_object.rds')
} }

View File

@@ -154,7 +154,7 @@ The cross-validation process is then repeated \code{nrounds} times, with each of
All observations are used for both training and validation. All observations are used for both training and validation.
Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation} Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29}
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')

View File

@@ -215,16 +215,16 @@ User may set one or several \code{eval_metric} parameters.
Note that when using a customized metric, only this single metric can be used. Note that when using a customized metric, only this single metric can be used.
The following is the list of built-in metrics for which Xgboost provides optimized implementation: The following is the list of built-in metrics for which Xgboost provides optimized implementation:
\itemize{ \itemize{
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error} \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood} \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
\item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss} \item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
\item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
By default, it uses the 0.5 threshold for predicted values to define negative and positive instances. By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
Different threshold (e.g., 0.) could be specified as "error@0." Different threshold (e.g., 0.) could be specified as "error@0."
\item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}. \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
\item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation. \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
\item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation. \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
\item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG} \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
} }
The following callbacks are automatically created when certain parameters are set: The following callbacks are automatically created when certain parameters are set:

View File

@@ -1,10 +0,0 @@
model_generator_metadata <- function() {
return (list(
kRounds = 2,
kRows = 1000,
kCols = 4,
kForests = 2,
kMaxDepth = 2,
kClasses = 3
))
}

View File

@@ -5,7 +5,14 @@ library(Matrix)
source('./generate_models_params.R') source('./generate_models_params.R')
set.seed(0) set.seed(0)
metadata <- model_generator_metadata() metadata <- list(
kRounds = 2,
kRows = 1000,
kCols = 4,
kForests = 2,
kMaxDepth = 2,
kClasses = 3
)
X <- Matrix(data = rnorm(metadata$kRows * metadata$kCols), nrow = metadata$kRows, X <- Matrix(data = rnorm(metadata$kRows * metadata$kCols), nrow = metadata$kRows,
ncol = metadata$kCols, sparse = TRUE) ncol = metadata$kCols, sparse = TRUE)
w <- runif(metadata$kRows) w <- runif(metadata$kRows)

View File

@@ -1,10 +1,16 @@
require(xgboost) require(xgboost)
require(jsonlite) require(jsonlite)
source('../generate_models_params.R')
context("Models from previous versions of XGBoost can be loaded") context("Models from previous versions of XGBoost can be loaded")
metadata <- model_generator_metadata() metadata <- list(
kRounds = 2,
kRows = 1000,
kCols = 4,
kForests = 2,
kMaxDepth = 2,
kClasses = 3
)
run_model_param_check <- function (config) { run_model_param_check <- function (config) {
testthat::expect_equal(config$learner$learner_model_param$num_feature, '4') testthat::expect_equal(config$learner$learner_model_param$num_feature, '4')

View File

@@ -57,7 +57,7 @@ To answer the question above we will convert *categorical* variables to `numeric
In this Vignette we will see how to transform a *dense* `data.frame` (*dense* = few zeroes in the matrix) with *categorical* variables to a very *sparse* matrix (*sparse* = lots of zero in the matrix) of `numeric` features. In this Vignette we will see how to transform a *dense* `data.frame` (*dense* = few zeroes in the matrix) with *categorical* variables to a very *sparse* matrix (*sparse* = lots of zero in the matrix) of `numeric` features.
The method we are going to see is usually called [one-hot encoding](http://en.wikipedia.org/wiki/One-hot). The method we are going to see is usually called [one-hot encoding](https://en.wikipedia.org/wiki/One-hot).
The first step is to load `Arthritis` dataset in memory and wrap it with `data.table` package. The first step is to load `Arthritis` dataset in memory and wrap it with `data.table` package.
@@ -66,7 +66,7 @@ data(Arthritis)
df <- data.table(Arthritis, keep.rownames = FALSE) df <- data.table(Arthritis, keep.rownames = FALSE)
``` ```
> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`. > `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](https://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.
The first thing we want to do is to have a look to the first few lines of the `data.table`: The first thing we want to do is to have a look to the first few lines of the `data.table`:
@@ -137,8 +137,8 @@ levels(df[,Treatment])
#### Encoding categorical features #### Encoding categorical features
Next step, we will transform the categorical data to dummy variables. Next step, we will transform the categorical data to dummy variables.
Several encoding methods exist, e.g., [one-hot encoding](http://en.wikipedia.org/wiki/One-hot) is a common approach. Several encoding methods exist, e.g., [one-hot encoding](https://en.wikipedia.org/wiki/One-hot) is a common approach.
We will use the [dummy contrast coding](http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm#dummy) which is popular because it produces "full rank" encoding (also see [this blog post by Max Kuhn](http://appliedpredictivemodeling.com/blog/2013/10/23/the-basics-of-encoding-categorical-data-for-predictive-models)). We will use the [dummy contrast coding](https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/) which is popular because it produces "full rank" encoding (also see [this blog post by Max Kuhn](http://appliedpredictivemodeling.com/blog/2013/10/23/the-basics-of-encoding-categorical-data-for-predictive-models)).
The purpose is to transform each value of each *categorical* feature into a *binary* feature `{0, 1}`. The purpose is to transform each value of each *categorical* feature into a *binary* feature `{0, 1}`.
@@ -176,7 +176,7 @@ bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,
You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better. You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better.
A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future). A model which fits too well may [overfit](https://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future).
> Here you can see the numbers decrease until line 7 and then increase. > Here you can see the numbers decrease until line 7 and then increase.
> >
@@ -304,7 +304,7 @@ Linear model may not be that smart in this scenario.
Special Note: What about Random Forests™? Special Note: What about Random Forests™?
----------------------------------------- -----------------------------------------
As you may know, [Random Forests™](http://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](http://en.wikipedia.org/wiki/Ensemble_learning) family. As you may know, [Random Forests™](https://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](https://en.wikipedia.org/wiki/Ensemble_learning) family.
Both trains several decision trees for one dataset. The *main* difference is that in Random Forests™, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`). Both trains several decision trees for one dataset. The *main* difference is that in Random Forests™, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`).

View File

@@ -24,7 +24,7 @@
author = "K. Bache and M. Lichman", author = "K. Bache and M. Lichman",
year = "2013", year = "2013",
title = "{UCI} Machine Learning Repository", title = "{UCI} Machine Learning Repository",
url = "http://archive.ics.uci.edu/ml", url = "http://archive.ics.uci.edu/ml/",
institution = "University of California, Irvine, School of Information and Computer Sciences" institution = "University of California, Irvine, School of Information and Computer Sciences"
} }

View File

@@ -68,7 +68,7 @@ The version 0.4-2 is on CRAN, and you can install it by:
install.packages("xgboost") install.packages("xgboost")
``` ```
Formerly available versions can be obtained from the CRAN [archive](https://cran.r-project.org/src/contrib/Archive/xgboost) Formerly available versions can be obtained from the CRAN [archive](https://cran.r-project.org/src/contrib/Archive/xgboost/)
## Learning ## Learning

View File

@@ -1 +1 @@
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@rc2 @xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@

View File

@@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MAJOR 1
#define XGBOOST_VER_MINOR 2 #define XGBOOST_VER_MINOR 2
#define XGBOOST_VER_PATCH 0 #define XGBOOST_VER_PATCH 1
#endif // XGBOOST_VERSION_CONFIG_H_ #endif // XGBOOST_VERSION_CONFIG_H_

View File

@@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>XGBoost JVM Package</name> <name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description> <description>JVM Package for XGBoost</description>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
</parent> </parent>
<artifactId>xgboost4j-example_2.12</artifactId> <artifactId>xgboost4j-example_2.12</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<build> <build>
<plugins> <plugins>
@@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
</parent> </parent>
<artifactId>xgboost4j-flink_2.12</artifactId> <artifactId>xgboost4j-flink_2.12</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
</parent> </parent>
<artifactId>xgboost4j-spark_2.12</artifactId> <artifactId>xgboost4j-spark_2.12</artifactId>
<build> <build>
@@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
</parent> </parent>
<artifactId>xgboost4j_2.12</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>1.2.0-RC2</version> <version>1.2.1</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@@ -1 +1 @@
1.2.0rc2 1.2.1

View File

@@ -40,7 +40,7 @@ class EarlyStopException(Exception):
""" """
def __init__(self, best_iteration): def __init__(self, best_iteration):
super(EarlyStopException, self).__init__() super().__init__()
self.best_iteration = best_iteration self.best_iteration = best_iteration

View File

@@ -750,7 +750,10 @@ class XGBModel(XGBModelBase):
@xgboost_model_doc( @xgboost_model_doc(
"Implementation of the scikit-learn API for XGBoost classification.", "Implementation of the scikit-learn API for XGBoost classification.",
['model', 'objective']) ['model', 'objective'], extra_parameters='''
n_estimators : int
Number of boosting rounds.
''')
class XGBClassifier(XGBModel, XGBClassifierBase): class XGBClassifier(XGBModel, XGBClassifierBase):
# pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
def __init__(self, objective="binary:logistic", **kwargs): def __init__(self, objective="binary:logistic", **kwargs):
@@ -1014,7 +1017,7 @@ class XGBRFClassifier(XGBClassifier):
**kwargs) **kwargs)
def get_xgb_params(self): def get_xgb_params(self):
params = super(XGBRFClassifier, self).get_xgb_params() params = super().get_xgb_params()
params['num_parallel_tree'] = self.n_estimators params['num_parallel_tree'] = self.n_estimators
return params return params
@@ -1033,7 +1036,10 @@ class XGBRegressor(XGBModel, XGBRegressorBase):
@xgboost_model_doc( @xgboost_model_doc(
"scikit-learn API for XGBoost random forest regression.", "scikit-learn API for XGBoost random forest regression.",
['model', 'objective']) ['model', 'objective'], extra_parameters='''
n_estimators : int
Number of trees in random forest to fit.
''')
class XGBRFRegressor(XGBRegressor): class XGBRFRegressor(XGBRegressor):
# pylint: disable=missing-docstring # pylint: disable=missing-docstring
def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8, def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
@@ -1043,7 +1049,7 @@ class XGBRFRegressor(XGBRegressor):
reg_lambda=reg_lambda, **kwargs) reg_lambda=reg_lambda, **kwargs)
def get_xgb_params(self): def get_xgb_params(self):
params = super(XGBRFRegressor, self).get_xgb_params() params = super().get_xgb_params()
params['num_parallel_tree'] = self.n_estimators params['num_parallel_tree'] = self.n_estimators
return params return params

View File

@@ -1,5 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04
ARG CUDA_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive

View File

@@ -1,5 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-runtime-ubuntu16.04 FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04
ARG CUDA_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
@@ -17,8 +18,8 @@ ENV PATH=/opt/python/bin:$PATH
# Create new Conda environment with cuDF, Dask, and cuPy # Create new Conda environment with cuDF, Dask, and cuPy
RUN \ RUN \
conda create -n gpu_test -c rapidsai -c nvidia -c conda-forge -c defaults \ conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.7 cudf=0.14 cudatoolkit=$CUDA_VERSION dask dask-cuda dask-cudf cupy \ python=3.7 cudf=0.15* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda dask-cudf cupy \
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis
ENV GOSU_VERSION 1.10 ENV GOSU_VERSION 1.10

View File

@@ -1,6 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu16.04 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu16.04
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
@@ -19,7 +19,7 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \ RUN \
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.7.5-1 && \ export NCCL_VERSION=2.7.5-1 && \
apt-get update && \ apt-get update && \
apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}

View File

@@ -1,6 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-devel-centos6 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos6
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
@@ -33,7 +33,7 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \ RUN \
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.4.8-1 && \ export NCCL_VERSION=2.4.8-1 && \
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \

View File

@@ -1,5 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-runtime-ubuntu16.04 FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04
ARG CUDA_VERSION_ARG
ARG JDK_VERSION=8 ARG JDK_VERSION=8
ARG SPARK_VERSION=3.0.0 ARG SPARK_VERSION=3.0.0

View File

@@ -1,6 +1,6 @@
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
FROM nvidia/cuda:$CUDA_VERSION-devel-centos6 FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos6
ARG CUDA_VERSION ARG CUDA_VERSION_ARG
# Environment # Environment
ENV DEBIAN_FRONTEND noninteractive ENV DEBIAN_FRONTEND noninteractive
@@ -30,7 +30,7 @@ RUN \
# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
RUN \ RUN \
export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
export NCCL_VERSION=2.4.8-1 && \ export NCCL_VERSION=2.4.8-1 && \
wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \