[jvm-packages] update version number for 1.2 branch (#6427 )

* [jvm-packages]update version number of 1.2 branch * update ver
1.2.1 patch release (#6206 )
2020-11-23 14:16:30 -08:00 · 2020-10-12 15:10:16 -07:00 · 2020-09-01 23:38:27 -07:00 · 2020-08-28 21:24:49 +08:00 · 2020-08-22 18:25:18 -07:00 · 2020-08-22 11:43:38 -07:00
40 changed files with 232 additions and 130 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -81,7 +81,7 @@ jobs:
      run: |
        cd R-package
        R.exe CMD INSTALL .
-        Rscript.exe tests/run_lint.R
+        Rscript.exe tests/helper_scripts/run_lint.R


  test-with-R:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,9 +1,10 @@
 cmake_minimum_required(VERSION 3.13)
-project(xgboost LANGUAGES CXX C VERSION 1.2.0)
+project(xgboost LANGUAGES CXX C VERSION 1.2.1)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 cmake_policy(SET CMP0022 NEW)
 cmake_policy(SET CMP0079 NEW)
+set(CMAKE_POLICY_DEFAULT_CMP0063 NEW)
 cmake_policy(SET CMP0063 NEW)

 if ((${CMAKE_VERSION} VERSION_GREATER 3.13) OR (${CMAKE_VERSION} VERSION_EQUAL 3.13))
@ -173,9 +174,6 @@ foreach(lib rabit rabit_base rabit_empty rabit_mock rabit_mock_static)
  # from dmlc is correctly applied to rabit.
  if (TARGET ${lib})
    target_link_libraries(${lib} dmlc ${CMAKE_THREAD_LIBS_INIT})
-    if (HIDE_CXX_SYMBOLS)  # Hide all C++ symbols from Rabit
-      set_target_properties(${lib} PROPERTIES CXX_VISIBILITY_PRESET hidden)
-    endif (HIDE_CXX_SYMBOLS)
    if (ENABLE_ALL_WARNINGS)
      target_compile_options(${lib} PRIVATE -Wall -Wextra)
    endif (ENABLE_ALL_WARNINGS)
@ -204,8 +202,9 @@ endif (USE_NVTX)

 #-- Hide all C++ symbols
 if (HIDE_CXX_SYMBOLS)
-  set_target_properties(objxgboost PROPERTIES CXX_VISIBILITY_PRESET hidden)
-  set_target_properties(xgboost PROPERTIES CXX_VISIBILITY_PRESET hidden)
+  foreach(target objxgboost xgboost dmlc rabit rabit_mock_static)
+    set_target_properties(${target} PROPERTIES CXX_VISIBILITY_PRESET hidden)
+  endforeach()
 endif (HIDE_CXX_SYMBOLS)

 target_include_directories(xgboost
--- a/23
+++ b/23
@ -92,7 +92,7 @@ pipeline {
            'test-python-gpu-cuda10.2': { TestPythonGPU(host_cuda_version: '10.2') },
            'test-python-gpu-cuda11.0-cross': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '11.0') },
            'test-python-gpu-cuda11.0': { TestPythonGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
-            'test-python-mgpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2', multi_gpu: true) },
+            'test-python-mgpu-cuda10.2': { TestPythonGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.2', multi_gpu: true) },
            'test-cpp-gpu-cuda10.2': { TestCppGPU(artifact_cuda_version: '10.2', host_cuda_version: '10.2') },
            'test-cpp-gpu-cuda11.0': { TestCppGPU(artifact_cuda_version: '11.0', host_cuda_version: '11.0') },
            'test-jvm-jdk8-cuda10.0': { CrossTestJVMwithJDKGPU(artifact_cuda_version: '10.0', host_cuda_version: '10.0') },
@ -144,7 +144,7 @@ def ClangTidy() {
    echo "Running clang-tidy job..."
    def container_type = "clang_tidy"
    def docker_binary = "docker"
-    def dockerArgs = "--build-arg CUDA_VERSION=10.1"
+    def dockerArgs = "--build-arg CUDA_VERSION_ARG=10.1"
    sh """
    ${dockerRun} ${container_type} ${docker_binary} ${dockerArgs} python3 tests/ci_build/tidy.py
    """
@ -261,7 +261,7 @@ def BuildCUDA(args) {
    echo "Build with CUDA ${args.cuda_version}"
    def container_type = GetCUDABuildContainerType(args.cuda_version)
    def docker_binary = "docker"
-    def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
+    def docker_args = "--build-arg CUDA_VERSION_ARG=${args.cuda_version}"
    def arch_flag = ""
    if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
      arch_flag = "-DGPU_COMPUTE_VER=75"
@ -285,12 +285,12 @@ def BuildCUDA(args) {
 }

 def BuildJVMPackagesWithCUDA(args) {
-  node('linux && gpu') {
+  node('linux && mgpu') {
    unstash name: 'srcs'
    echo "Build XGBoost4J-Spark with Spark ${args.spark_version}, CUDA ${args.cuda_version}"
    def container_type = "jvm_gpu_build"
    def docker_binary = "nvidia-docker"
-    def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
+    def docker_args = "--build-arg CUDA_VERSION_ARG=${args.cuda_version}"
    def arch_flag = ""
    if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
      arch_flag = "-DGPU_COMPUTE_VER=75"
@ -365,7 +365,7 @@ def TestPythonGPU(args) {
    echo "Test Python GPU: CUDA ${args.host_cuda_version}"
    def container_type = "gpu"
    def docker_binary = "nvidia-docker"
-    def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
+    def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
    if (args.multi_gpu) {
      echo "Using multiple GPUs"
      // Allocate extra space in /dev/shm to enable NCCL
@ -406,7 +406,7 @@ def TestCppGPU(args) {
    echo "Test C++, CUDA ${args.host_cuda_version}"
    def container_type = "gpu"
    def docker_binary = "nvidia-docker"
-    def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
+    def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
    sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} build/testxgboost"
    deleteDir()
  }
@ -424,7 +424,7 @@ def CrossTestJVMwithJDKGPU(args) {
    }
    def container_type = "gpu_jvm"
    def docker_binary = "nvidia-docker"
-    def docker_args = "--build-arg CUDA_VERSION=${args.host_cuda_version}"
+    def docker_args = "--build-arg CUDA_VERSION_ARG=${args.host_cuda_version}"
    sh "${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/test_jvm_gpu_cross.sh"
    deleteDir()
  }
@ -472,10 +472,11 @@ def DeployJVMPackages(args) {
    unstash name: 'srcs'
    if (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release')) {
      echo 'Deploying to xgboost-maven-repo S3 repo...'
-      def container_type = "jvm"
-      def docker_binary = "docker"
      sh """
-      ${dockerRun} ${container_type} ${docker_binary} tests/ci_build/deploy_jvm_packages.sh ${args.spark_version}
+      ${dockerRun} jvm docker tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 0
+      """
+      sh """
+      ${dockerRun} jvm_gpu_build docker --build-arg CUDA_VERSION_ARG=10.0 tests/ci_build/deploy_jvm_packages.sh ${args.spark_version} 1
      """
    }
    deleteDir()
--- a/5
+++ b/5
@ -133,15 +133,16 @@ Rpack: clean_all
 	sed -i -e 's/@BACKTRACE_LIB@//g' xgboost/src/Makevars.win
 	sed -i -e 's/@OPENMP_LIB@//g' xgboost/src/Makevars.win
 	rm -f xgboost/src/Makevars.win-e   # OSX sed create this extra file; remove it
-	bash R-package/remove_warning_suppression_pragma.sh
+	bash xgboost/remove_warning_suppression_pragma.sh
 	rm xgboost/remove_warning_suppression_pragma.sh
+	rm -rfv xgboost/tests/helper_scripts/

 Rbuild: Rpack
 	R CMD build --no-build-vignettes xgboost
 	rm -rf xgboost

 Rcheck: Rbuild
-	R CMD check xgboost*.tar.gz
+	R CMD check --as-cran xgboost*.tar.gz

 -include build/*.d
 -include build/*/*.d
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@ -2,7 +2,7 @@ Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
 Version: 1.2.0.1
-Date: 2020-02-21
+Date: 2020-08-28
 Authors@R: c(
  person("Tianqi", "Chen", role = c("aut"),
         email = "tianqi.tchen@gmail.com"),
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -349,6 +349,7 @@ NULL
 #' # Save as a stand-alone file (JSON); load it with xgb.load()
 #' xgb.save(bst, 'xgb.model.json')
 #' bst2 <- xgb.load('xgb.model.json')
+#' if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
 #'
 #' # Save as a raw byte vector; load it with xgb.load.raw()
 #' xgb_bytes <- xgb.save.raw(bst)
@ -364,6 +365,7 @@ NULL
 #' obj2 <- readRDS('my_object.rds')
 #' # Re-construct xgb.Booster object from the bytes
 #' bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
+#' if (file.exists('my_object.rds')) file.remove('my_object.rds')
 #'
 #' @name a-compatibility-note-for-saveRDS-save
 NULL
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@ -79,7 +79,7 @@
 #'
 #' All observations are used for both training and validation.
 #'
-#' Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation}
+#' Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29}
 #'
 #' @return
 #' An object of class \code{xgb.cv.synchronous} with the following elements:
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@ -130,16 +130,16 @@
 #' Note that when using a customized metric, only this single metric can be used.
 #' The following is the list of built-in metrics for which Xgboost provides optimized implementation:
 #'   \itemize{
-#'      \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
-#'      \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
-#'      \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
+#'      \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
+#'      \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
+#'      \item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
 #'      \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
 #'            By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
 #'            Different threshold (e.g., 0.) could be specified as "error@0."
 #'      \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
-#'      \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
+#'      \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
 #'      \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
-#'      \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG}
+#'      \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
 #'   }
 #'
 #' The following callbacks are automatically created when certain parameters are set:
--- a/R-package/man/a-compatibility-note-for-saveRDS-save.Rd
+++ b/R-package/man/a-compatibility-note-for-saveRDS-save.Rd
@ -43,6 +43,7 @@ bst2 <- xgb.load('xgb.model')
 # Save as a stand-alone file (JSON); load it with xgb.load()
 xgb.save(bst, 'xgb.model.json')
 bst2 <- xgb.load('xgb.model.json')
+if (file.exists('xgb.model.json')) file.remove('xgb.model.json')

 # Save as a raw byte vector; load it with xgb.load.raw()
 xgb_bytes <- xgb.save.raw(bst)
@ -58,5 +59,6 @@ saveRDS(obj, 'my_object.rds')
 obj2 <- readRDS('my_object.rds')
 # Re-construct xgb.Booster object from the bytes
 bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
+if (file.exists('my_object.rds')) file.remove('my_object.rds')

 }
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@ -154,7 +154,7 @@ The cross-validation process is then repeated \code{nrounds} times, with each of

 All observations are used for both training and validation.

-Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation}
+Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29}
 }
 \examples{
 data(agaricus.train, package='xgboost')
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@ -215,16 +215,16 @@ User may set one or several \code{eval_metric} parameters.
 Note that when using a customized metric, only this single metric can be used.
 The following is the list of built-in metrics for which Xgboost provides optimized implementation:
  \itemize{
-     \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
-     \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
-     \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
+     \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
+     \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
+     \item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
     \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
           By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
           Different threshold (e.g., 0.) could be specified as "error@0."
     \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
-     \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
+     \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
     \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
-     \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG}
+     \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
  }

 The following callbacks are automatically created when certain parameters are set:
--- a/R-package/tests/generate_models_params.R
+++ b/R-package/tests/generate_models_params.R
@ -1,10 +0,0 @@
-model_generator_metadata <- function() {
-  return (list(
-    kRounds = 2,
-    kRows = 1000,
-    kCols = 4,
-    kForests = 2,
-    kMaxDepth = 2,
-    kClasses = 3
-  ))
-}
--- a/R-package/tests/helper_scripts/generate_models.R
+++ b/R-package/tests/helper_scripts/generate_models.R
@ -5,7 +5,14 @@ library(Matrix)
 source('./generate_models_params.R')

 set.seed(0)
-metadata <- model_generator_metadata()
+metadata <- list(
+  kRounds = 2,
+  kRows = 1000,
+  kCols = 4,
+  kForests = 2,
+  kMaxDepth = 2,
+  kClasses = 3
+)
 X <- Matrix(data = rnorm(metadata$kRows * metadata$kCols), nrow = metadata$kRows,
            ncol = metadata$kCols, sparse = TRUE)
 w <- runif(metadata$kRows)
--- a/R-package/tests/helper_scripts/run_lint.R
+++ b/R-package/tests/helper_scripts/run_lint.R
--- a/R-package/tests/testthat/test_model_compatibility.R
+++ b/R-package/tests/testthat/test_model_compatibility.R
@ -1,10 +1,16 @@
 require(xgboost)
 require(jsonlite)
-source('../generate_models_params.R')

 context("Models from previous versions of XGBoost can be loaded")

-metadata <- model_generator_metadata()
+metadata <- list(
+  kRounds = 2,
+  kRows = 1000,
+  kCols = 4,
+  kForests = 2,
+  kMaxDepth = 2,
+  kClasses = 3
+)

 run_model_param_check <- function (config) {
  testthat::expect_equal(config$learner$learner_model_param$num_feature, '4')
--- a/R-package/vignettes/discoverYourData.Rmd
+++ b/R-package/vignettes/discoverYourData.Rmd
@ -57,7 +57,7 @@ To answer the question above we will convert *categorical* variables to `numeric

 In this Vignette we will see how to transform a *dense* `data.frame` (*dense* = few zeroes in the matrix) with *categorical* variables to a very *sparse* matrix (*sparse* = lots of zero in the matrix) of `numeric` features.

-The method we are going to see is usually called [one-hot encoding](http://en.wikipedia.org/wiki/One-hot).
+The method we are going to see is usually called [one-hot encoding](https://en.wikipedia.org/wiki/One-hot).

 The first step is to load `Arthritis` dataset in memory and wrap it with `data.table` package.

@ -66,7 +66,7 @@ data(Arthritis)
 df <- data.table(Arthritis, keep.rownames = FALSE)
 ```

-> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.
+> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](https://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.

 The first thing we want to do is to have a look to the first few lines of the `data.table`:

@ -137,8 +137,8 @@ levels(df[,Treatment])
 #### Encoding categorical features

 Next step, we will transform the categorical data to dummy variables.
-Several encoding methods exist, e.g., [one-hot encoding](http://en.wikipedia.org/wiki/One-hot) is a common approach.
-We will use the [dummy contrast coding](http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm#dummy) which is popular because it produces "full rank" encoding (also see [this blog post by Max Kuhn](http://appliedpredictivemodeling.com/blog/2013/10/23/the-basics-of-encoding-categorical-data-for-predictive-models)).
+Several encoding methods exist, e.g., [one-hot encoding](https://en.wikipedia.org/wiki/One-hot) is a common approach.
+We will use the [dummy contrast coding](https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/) which is popular because it produces "full rank" encoding (also see [this blog post by Max Kuhn](http://appliedpredictivemodeling.com/blog/2013/10/23/the-basics-of-encoding-categorical-data-for-predictive-models)).

 The purpose is to transform each value of each *categorical* feature into a *binary* feature `{0, 1}`.

@ -176,7 +176,7 @@ bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,

 You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better.

-A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future).
+A model which fits too well may [overfit](https://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future).

 > Here you can see the numbers decrease until line 7 and then increase.
 >
@ -304,7 +304,7 @@ Linear model may not be that smart in this scenario.
 Special Note: What about Random Forests™?
 -----------------------------------------

-As you may know, [Random Forests™](http://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](http://en.wikipedia.org/wiki/Ensemble_learning) family.
+As you may know, [Random Forests™](https://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](https://en.wikipedia.org/wiki/Ensemble_learning) family.

 Both trains several decision trees for one dataset. The *main* difference is that in Random Forests™, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`).

--- a/R-package/vignettes/xgboost.bib
+++ b/R-package/vignettes/xgboost.bib
@ -24,7 +24,7 @@
    author = "K. Bache and M. Lichman",
    year = "2013",
    title = "{UCI} Machine Learning Repository",
-    url = "http://archive.ics.uci.edu/ml",
+    url = "http://archive.ics.uci.edu/ml/",
    institution = "University of California, Irvine, School of Information and Computer Sciences" 
 }

--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@ -68,7 +68,7 @@ The version 0.4-2 is on CRAN, and you can install it by:
 install.packages("xgboost")
 ```

-Formerly available versions can be obtained from the CRAN [archive](https://cran.r-project.org/src/contrib/Archive/xgboost)
+Formerly available versions can be obtained from the CRAN [archive](https://cran.r-project.org/src/contrib/Archive/xgboost/)

 ## Learning

--- a/cmake/Python_version.in
+++ b/cmake/Python_version.in
@ -1 +1 @@
-@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-SNAPSHOT
+@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@ -6,6 +6,6 @@

 #define XGBOOST_VER_MAJOR 1
 #define XGBOOST_VER_MINOR 2
-#define XGBOOST_VER_PATCH 0
+#define XGBOOST_VER_PATCH 1

 #endif  // XGBOOST_VERSION_CONFIG_H_
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@ -6,7 +6,7 @@

    <groupId>ml.dmlc</groupId>
    <artifactId>xgboost-jvm_2.12</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.2.1</version>
    <packaging>pom</packaging>
    <name>XGBoost JVM Package</name>
    <description>JVM Package for XGBoost</description>
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.2.0-SNAPSHOT</version>
+        <version>1.2.1</version>
    </parent>
    <artifactId>xgboost4j-example_2.12</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.2.1</version>
    <packaging>jar</packaging>
    <build>
        <plugins>
@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
-            <version>1.2.0-SNAPSHOT</version>
+            <version>1.2.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
@ -37,7 +37,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
-            <version>1.2.0-SNAPSHOT</version>
+            <version>1.2.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.2.0-SNAPSHOT</version>
+        <version>1.2.1</version>
    </parent>
    <artifactId>xgboost4j-flink_2.12</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.2.1</version>
    <build>
        <plugins>
            <plugin>
@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.2.0-SNAPSHOT</version>
+            <version>1.2.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.2.0-SNAPSHOT</version>
+        <version>1.2.1</version>
    </parent>
    <artifactId>xgboost4j-spark_2.12</artifactId>
    <build>
@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.2.0-SNAPSHOT</version>
+            <version>1.2.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.2.0-SNAPSHOT</version>
+        <version>1.2.1</version>
    </parent>
    <artifactId>xgboost4j_2.12</artifactId>
-    <version>1.2.0-SNAPSHOT</version>
+    <version>1.2.1</version>
    <packaging>jar</packaging>

    <dependencies>
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@ -1 +1 @@
-1.2.0-SNAPSHOT
+1.2.1
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@ -40,7 +40,7 @@ class EarlyStopException(Exception):
    """

    def __init__(self, best_iteration):
-        super(EarlyStopException, self).__init__()
+        super().__init__()
        self.best_iteration = best_iteration


--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@ -738,7 +738,8 @@ async def _predict_async(client: Client, model, data, *args,
            predt = booster.predict(data=local_x,
                                    validate_features=local_x.num_row() != 0,
                                    *args)
-            ret = (delayed(predt), order)
+            columns = 1 if len(predt.shape) == 1 else predt.shape[1]
+            ret = ((delayed(predt), columns), order)
            predictions.append(ret)
        return predictions

@ -775,7 +776,9 @@ async def _predict_async(client: Client, model, data, *args,
    # See https://docs.dask.org/en/latest/array-creation.html
    arrays = []
    for i, shape in enumerate(shapes):
-        arrays.append(da.from_delayed(results[i], shape=(shape[0], ),
+        arrays.append(da.from_delayed(
+            results[i][0], shape=(shape[0],)
+            if results[i][1] == 1 else (shape[0], results[i][1]),
            dtype=numpy.float32))
    predictions = await da.concatenate(arrays, axis=0)
    return predictions
@ -978,6 +981,7 @@ class DaskScikitLearnBase(XGBModel):
    def client(self, clt):
        self._client = clt

+
@xgboost_model_doc("""Implementation of the Scikit-Learn API for XGBoost.""",
                   ['estimators', 'model'])
 class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
@ -1032,9 +1036,6 @@ class DaskXGBRegressor(DaskScikitLearnBase, XGBRegressorBase):
    ['estimators', 'model']
 )
 class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
-    # pylint: disable=missing-docstring
-    _client = None
-
    async def _fit_async(self, X, y,
                         sample_weights=None,
                         eval_set=None,
@ -1078,13 +1079,34 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
        return self.client.sync(self._fit_async, X, y, sample_weights,
                                eval_set, sample_weight_eval_set, verbose)

-    async def _predict_async(self, data):
+    async def _predict_proba_async(self, data):
+        _assert_dask_support()
+
        test_dmatrix = await DaskDMatrix(client=self.client, data=data,
                                         missing=self.missing)
        pred_probs = await predict(client=self.client,
                                   model=self.get_booster(), data=test_dmatrix)
        return pred_probs

+    def predict_proba(self, data):  # pylint: disable=arguments-differ,missing-docstring
+        _assert_dask_support()
+        return self.client.sync(self._predict_proba_async, data)
+
+    async def _predict_async(self, data):
+        _assert_dask_support()
+
+        test_dmatrix = await DaskDMatrix(client=self.client, data=data,
+                                         missing=self.missing)
+        pred_probs = await predict(client=self.client,
+                                   model=self.get_booster(), data=test_dmatrix)
+
+        if self.n_classes_ == 2:
+            preds = (pred_probs > 0.5).astype(int)
+        else:
+            preds = da.argmax(pred_probs, axis=1)
+
+        return preds
+
    def predict(self, data):  # pylint: disable=arguments-differ
        _assert_dask_support()
        return self.client.sync(self._predict_async, data)
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@ -77,7 +77,7 @@ __model_doc = '''
    gamma : float
        Minimum loss reduction required to make a further partition on a leaf
        node of the tree.
-    min_child_weight : int
+    min_child_weight : float
        Minimum sum of instance weight(hessian) needed in a child.
    max_delta_step : int
        Maximum delta step we allow each tree's weight estimation to be.
@ -750,7 +750,10 @@ class XGBModel(XGBModelBase):

@xgboost_model_doc(
    "Implementation of the scikit-learn API for XGBoost classification.",
-    ['model', 'objective'])
+    ['model', 'objective'], extra_parameters='''
+    n_estimators : int
+        Number of boosting rounds.
+''')
 class XGBClassifier(XGBModel, XGBClassifierBase):
    # pylint: disable=missing-docstring,invalid-name,too-many-instance-attributes
    def __init__(self, objective="binary:logistic", **kwargs):
@ -1014,7 +1017,7 @@ class XGBRFClassifier(XGBClassifier):
                         **kwargs)

    def get_xgb_params(self):
-        params = super(XGBRFClassifier, self).get_xgb_params()
+        params = super().get_xgb_params()
        params['num_parallel_tree'] = self.n_estimators
        return params

@ -1033,7 +1036,10 @@ class XGBRegressor(XGBModel, XGBRegressorBase):

@xgboost_model_doc(
    "scikit-learn API for XGBoost random forest regression.",
-    ['model', 'objective'])
+    ['model', 'objective'], extra_parameters='''
+    n_estimators : int
+        Number of trees in random forest to fit.
+''')
 class XGBRFRegressor(XGBRegressor):
    # pylint: disable=missing-docstring
    def __init__(self, learning_rate=1, subsample=0.8, colsample_bynode=0.8,
@ -1043,7 +1049,7 @@ class XGBRFRegressor(XGBRegressor):
                         reg_lambda=reg_lambda, **kwargs)

    def get_xgb_params(self):
-        params = super(XGBRFRegressor, self).get_xgb_params()
+        params = super().get_xgb_params()
        params['num_parallel_tree'] = self.n_estimators
        return params

--- a/src/gbm/gbtree_model.cc
+++ b/src/gbm/gbtree_model.cc
@ -1,6 +1,8 @@
 /*!
- * Copyright 2019 by Contributors
+ * Copyright 2019-2020 by Contributors
 */
+#include <utility>
+
 #include "xgboost/json.h"
 #include "xgboost/logging.h"
 #include "gbtree_model.h"
@ -41,15 +43,14 @@ void GBTreeModel::SaveModel(Json* p_out) const {
  auto& out = *p_out;
  CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
  out["gbtree_model_param"] = ToJson(param);
-  std::vector<Json> trees_json;
-  size_t t = 0;
-  for (auto const& tree : trees) {
+  std::vector<Json> trees_json(trees.size());
+
+  for (size_t t = 0; t < trees.size(); ++t) {
+    auto const& tree = trees[t];
    Json tree_json{Object()};
    tree->SaveModel(&tree_json);
-    // The field is not used in XGBoost, but might be useful for external project.
-    tree_json["id"] = Integer(t);
-    trees_json.emplace_back(tree_json);
-    t++;
+    tree_json["id"] = Integer(static_cast<Integer::Int>(t));
+    trees_json[t] = std::move(tree_json);
  }

  std::vector<Json> tree_info_json(tree_info.size());
@ -70,9 +71,10 @@ void GBTreeModel::LoadModel(Json const& in) {
  auto const& trees_json = get<Array const>(in["trees"]);
  trees.resize(trees_json.size());

-  for (size_t t = 0; t < trees.size(); ++t) {
-    trees[t].reset( new RegTree() );
-    trees[t]->LoadModel(trees_json[t]);
+  for (size_t t = 0; t < trees_json.size(); ++t) {  // NOLINT
+    auto tree_id = get<Integer>(trees_json[t]["id"]);
+    trees.at(tree_id).reset(new RegTree());
+    trees.at(tree_id)->LoadModel(trees_json[t]);
  }

  tree_info.resize(param.num_trees);
--- a/tests/ci_build/Dockerfile.clang_tidy
+++ b/tests/ci_build/Dockerfile.clang_tidy
@ -1,5 +1,6 @@
-ARG CUDA_VERSION
-FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04
+ARG CUDA_VERSION_ARG
+FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04
+ARG CUDA_VERSION_ARG

 # Environment
 ENV DEBIAN_FRONTEND noninteractive
--- a/tests/ci_build/Dockerfile.gpu
+++ b/tests/ci_build/Dockerfile.gpu
@ -1,5 +1,6 @@
-ARG CUDA_VERSION
-FROM nvidia/cuda:$CUDA_VERSION-runtime-ubuntu16.04
+ARG CUDA_VERSION_ARG
+FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04
+ARG CUDA_VERSION_ARG

 # Environment
 ENV DEBIAN_FRONTEND noninteractive
@ -17,8 +18,8 @@ ENV PATH=/opt/python/bin:$PATH

 # Create new Conda environment with cuDF, Dask, and cuPy
 RUN \
-    conda create -n gpu_test -c rapidsai -c nvidia -c conda-forge -c defaults \
-        python=3.7 cudf=0.14 cudatoolkit=$CUDA_VERSION dask dask-cuda dask-cudf cupy \
+    conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
+        python=3.7 cudf=0.15* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda dask-cudf cupy \
        numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis

 ENV GOSU_VERSION 1.10
--- a/tests/ci_build/Dockerfile.gpu_build
+++ b/tests/ci_build/Dockerfile.gpu_build
@ -1,6 +1,6 @@
-ARG CUDA_VERSION
-FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu16.04
-ARG CUDA_VERSION
+ARG CUDA_VERSION_ARG
+FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu16.04
+ARG CUDA_VERSION_ARG

 # Environment
 ENV DEBIAN_FRONTEND noninteractive
@ -19,7 +19,7 @@ RUN \

 # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
 RUN \
-    export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \
+    export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
    export NCCL_VERSION=2.7.5-1 && \
    apt-get update && \
    apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}
--- a/tests/ci_build/Dockerfile.gpu_build_centos6
+++ b/tests/ci_build/Dockerfile.gpu_build_centos6
@ -1,6 +1,6 @@
-ARG CUDA_VERSION
-FROM nvidia/cuda:$CUDA_VERSION-devel-centos6
-ARG CUDA_VERSION
+ARG CUDA_VERSION_ARG
+FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos6
+ARG CUDA_VERSION_ARG

 # Environment
 ENV DEBIAN_FRONTEND noninteractive
@ -33,7 +33,7 @@ RUN \

 # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
 RUN \
-    export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \
+    export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
    export NCCL_VERSION=2.4.8-1 && \
    wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
    rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
--- a/tests/ci_build/Dockerfile.gpu_jvm
+++ b/tests/ci_build/Dockerfile.gpu_jvm
@ -1,5 +1,6 @@
-ARG CUDA_VERSION
-FROM nvidia/cuda:$CUDA_VERSION-runtime-ubuntu16.04
+ARG CUDA_VERSION_ARG
+FROM nvidia/cuda:$CUDA_VERSION_ARG-runtime-ubuntu16.04
+ARG CUDA_VERSION_ARG
 ARG JDK_VERSION=8
 ARG SPARK_VERSION=3.0.0

--- a/tests/ci_build/Dockerfile.jvm_gpu_build
+++ b/tests/ci_build/Dockerfile.jvm_gpu_build
@ -1,6 +1,6 @@
-ARG CUDA_VERSION
-FROM nvidia/cuda:$CUDA_VERSION-devel-centos6
-ARG CUDA_VERSION
+ARG CUDA_VERSION_ARG
+FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos6
+ARG CUDA_VERSION_ARG

 # Environment
 ENV DEBIAN_FRONTEND noninteractive
@ -30,7 +30,7 @@ RUN \

 # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
 RUN \
-    export CUDA_SHORT=`echo $CUDA_VERSION | egrep -o '[0-9]+\.[0-9]'` && \
+    export CUDA_SHORT=`echo $CUDA_VERSION_ARG | egrep -o '[0-9]+\.[0-9]'` && \
    export NCCL_VERSION=2.4.8-1 && \
    wget https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
    rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \
--- a/tests/ci_build/deploy_jvm_packages.sh
+++ b/tests/ci_build/deploy_jvm_packages.sh
@ -3,22 +3,32 @@
 set -e
 set -x

-if [ $# -ne 1 ]; then
-  echo "Usage: $0 [spark version]"
+if [ $# -ne 2 ]; then
+  echo "Usage: $0 [spark version] [build_gpu? 0 or 1]"
  exit 1
 fi

 spark_version=$1
+build_gpu=$2

 # Initialize local Maven repository
 ./tests/ci_build/initialize_maven.sh

-rm -rf build/
 cd jvm-packages
+rm -rf $(find . -name target)
+rm -rf ../build/

 # Re-build package without Mock Rabit
 # Deploy to S3 bucket xgboost-maven-repo
+if [[ "$build_gpu" == "0" ]]
+then
+  # Build CPU artifact
  mvn --no-transfer-progress package deploy -P release-to-s3 -Dspark.version=${spark_version} -DskipTests
+else
+  # Build GPU artifact
+  sed -i -e 's/<artifactId>xgboost\(.*\)_\(.*\)<\/artifactId>/<artifactId>xgboost\1-gpu_\2<\/artifactId>/' $(find . -name pom.xml)
+  mvn --no-transfer-progress package deploy -Duse.cuda=ON -P release-to-s3 -Dspark.version=${spark_version} -DskipTests
+fi

 set +x
 set +e
--- a/tests/cpp/test_learner.cc
+++ b/tests/cpp/test_learner.cc
@ -148,7 +148,16 @@ TEST(Learner, JsonModelIO) {
    Json out { Object() };
    learner->SaveModel(&out);

-    learner->LoadModel(out);
+    dmlc::TemporaryDirectory tmpdir;
+
+    std::ofstream fout (tmpdir.path + "/model.json");
+    fout << out;
+    fout.close();
+
+    auto loaded_str = common::LoadSequentialFile(tmpdir.path + "/model.json");
+    Json loaded = Json::Load(StringView{loaded_str.c_str(), loaded_str.size()});
+
+    learner->LoadModel(loaded);
    learner->Configure();

    Json new_in { Object() };
--- a/tests/python/test_cli.py
+++ b/tests/python/test_cli.py
@ -121,6 +121,8 @@ eval[test] = {data_path}
        v = xgboost.__version__
        if v.find('SNAPSHOT') != -1:
            assert msg.split(':')[1].strip() == v.split('-')[0]
+        elif v.find('rc') != -1:
+            assert msg.split(':')[1].strip() == v.split('rc')[0]
        else:
            assert msg.split(':')[1].strip() == v

--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@ -5,6 +5,7 @@ import sys
 import numpy as np
 import json
 import asyncio
+from sklearn.datasets import make_classification

 if sys.platform.startswith("win"):
    pytest.skip("Skipping dask tests on Windows", allow_module_level=True)
@ -36,7 +37,7 @@ def generate_array():


 def test_from_dask_dataframe():
-    with LocalCluster(n_workers=5) as cluster:
+    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            X, y = generate_array()

@ -74,7 +75,7 @@ def test_from_dask_dataframe():


 def test_from_dask_array():
-    with LocalCluster(n_workers=5, threads_per_worker=5) as cluster:
+    with LocalCluster(n_workers=kWorkers, threads_per_worker=5) as cluster:
        with Client(cluster) as client:
            X, y = generate_array()
            dtrain = DaskDMatrix(client, X, y)
@ -104,8 +105,28 @@ def test_from_dask_array():
            assert np.all(single_node_predt == from_arr.compute())


+def test_dask_predict_shape_infer():
+    with LocalCluster(n_workers=kWorkers) as cluster:
+        with Client(cluster) as client:
+            X, y = make_classification(n_samples=1000, n_informative=5,
+                                       n_classes=3)
+            X_ = dd.from_array(X, chunksize=100)
+            y_ = dd.from_array(y, chunksize=100)
+            dtrain = xgb.dask.DaskDMatrix(client, data=X_, label=y_)
+
+            model = xgb.dask.train(
+                client,
+                {"objective": "multi:softprob", "num_class": 3},
+                dtrain=dtrain
+            )
+
+            preds = xgb.dask.predict(client, model, dtrain)
+            assert preds.shape[0] == preds.compute().shape[0]
+            assert preds.shape[1] == preds.compute().shape[1]
+
+
 def test_dask_missing_value_reg():
-    with LocalCluster(n_workers=5) as cluster:
+    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            X_0 = np.ones((20 // 2, kCols))
            X_1 = np.zeros((20 // 2, kCols))
@ -144,19 +165,19 @@ def test_dask_missing_value_cls():
                                             missing=0.0)
            cls.client = client
            cls.fit(X, y, eval_set=[(X, y)])
-            dd_predt = cls.predict(X).compute()
+            dd_pred_proba = cls.predict_proba(X).compute()

            np_X = X.compute()
-            np_predt = cls.get_booster().predict(
+            np_pred_proba = cls.get_booster().predict(
                xgb.DMatrix(np_X, missing=0.0))
-            np.testing.assert_allclose(np_predt, dd_predt)
+            np.testing.assert_allclose(np_pred_proba, dd_pred_proba)

            cls = xgb.dask.DaskXGBClassifier()
            assert hasattr(cls, 'missing')


 def test_dask_regressor():
-    with LocalCluster(n_workers=5) as cluster:
+    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            X, y = generate_array()
            regressor = xgb.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
@ -178,7 +199,7 @@ def test_dask_regressor():


 def test_dask_classifier():
-    with LocalCluster(n_workers=5) as cluster:
+    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            X, y = generate_array()
            y = (y * 10).astype(np.int32)
@ -201,7 +222,18 @@ def test_dask_classifier():
            assert len(list(history['validation_0'])) == 1
            assert len(history['validation_0']['merror']) == 2

+            # Test .predict_proba()
+            probas = classifier.predict_proba(X)
            assert classifier.n_classes_ == 10
+            assert probas.ndim == 2
+            assert probas.shape[0] == kRows
+            assert probas.shape[1] == 10
+
+            cls_booster = classifier.get_booster()
+            single_node_proba = cls_booster.inplace_predict(X.compute())
+
+            np.testing.assert_allclose(single_node_proba,
+                                       probas.compute())

            # Test with dataframe.
            X_d = dd.from_dask_array(X)
@ -218,7 +250,7 @@ def test_dask_classifier():
@pytest.mark.skipif(**tm.no_sklearn())
 def test_sklearn_grid_search():
    from sklearn.model_selection import GridSearchCV
-    with LocalCluster(n_workers=4) as cluster:
+    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            X, y = generate_array()
            reg = xgb.dask.DaskXGBRegressor(learning_rate=0.1,
@ -292,7 +324,9 @@ def run_empty_dmatrix_cls(client, parameters):
                         evals=[(dtrain, 'validation')],
                         num_boost_round=2)
    predictions = xgb.dask.predict(client=client, model=out,
-                                   data=dtrain).compute()
+                                   data=dtrain)
+    assert predictions.shape[1] == n_classes
+    predictions = predictions.compute()
    _check_outputs(out, predictions)

    # train has more rows than evals
@ -315,7 +349,7 @@ def run_empty_dmatrix_cls(client, parameters):
 # environment and Exact doesn't support it.

 def test_empty_dmatrix_hist():
-    with LocalCluster(n_workers=5) as cluster:
+    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            parameters = {'tree_method': 'hist'}
            run_empty_dmatrix_reg(client, parameters)
@ -323,7 +357,7 @@ def test_empty_dmatrix_hist():


 def test_empty_dmatrix_approx():
-    with LocalCluster(n_workers=5) as cluster:
+    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            parameters = {'tree_method': 'approx'}
            run_empty_dmatrix_reg(client, parameters)
@ -397,7 +431,13 @@ async def run_dask_classifier_asyncio(scheduler_address):
        assert len(list(history['validation_0'])) == 1
        assert len(history['validation_0']['merror']) == 2

+        # Test .predict_proba()
+        probas = await classifier.predict_proba(X)
        assert classifier.n_classes_ == 10
+        assert probas.ndim == 2
+        assert probas.shape[0] == kRows
+        assert probas.shape[1] == 10
+

        # Test with dataframe.
        X_d = dd.from_dask_array(X)
Author	SHA1	Message	Date
Nan Zhu	00774eeac3	[jvm-packages] update version number for 1.2 branch (#6427 ) * [jvm-packages]update version number of 1.2 branch * update ver	2020-11-23 14:16:30 -08:00
Philip Hyunsu Cho	bcb15a980f	1.2.1 patch release (#6206 ) * Hide C++ symbols from dmlc-core (#6188) * Up version to 1.2.1 * Fix lint * [CI] Fix Docker build for CUDA 11 (#6202) * Update Dockerfile.gpu	2020-10-12 15:10:16 -07:00
Tong He	0cd0dad0b5	Fix CRAN submission (#6076 )	2020-09-01 23:38:27 -07:00
Philip Hyunsu Cho	884098ec22	[CI] Fix CRAN check (#6067 )	2020-08-28 21:24:49 +08:00
Hyunsu Cho	738786680b	Release 1.2.0	2020-08-22 18:25:18 -07:00
Philip Hyunsu Cho	04232c01b2	[CI] Fix broken tests (#6048 )	2020-08-22 11:43:38 -07:00
Jiaming Yuan	0353a78ab7	Fix scikit learn cls doc. (#6041 )	2020-08-20 19:25:12 -07:00
Hyunsu Cho	0089a0e6bf	Fix another typo	2020-08-12 19:29:08 +00:00
Philip Hyunsu Cho	03a68a1714	Fix typo	2020-08-12 01:34:33 -07:00
Hyunsu Cho	a0da8a7e0a	Make RC2	2020-08-12 00:50:51 -07:00
Hyunsu Cho	eee4eff49b	[CI] Build GPU-enabled JAR artifact and deploy to xgboost-maven-repo	2020-08-12 00:50:47 -07:00
Jiaming Yuan	936a854baa	Back port fixes to 1.2 (#6002 ) * Fix sklearn doc. (#5980) * Enforce tree order in JSON. (#5974) * Make JSON model IO more future proof by using tree id in model loading. * Fix dask predict shape infer. (#5989) * [Breaking] Fix .predict() method and add .predict_proba() in xgboost.dask.DaskXGBClassifier (#5986)	2020-08-11 20:22:31 +08:00
Hyunsu Cho	7856da5827	[CI] Use mgpu machine to run gpu hist unit tests	2020-08-02 02:33:05 -07:00
Hyunsu Cho	50a0def6c3	Make RC1	2020-08-02 08:56:20 +00:00
Hyunsu Cho	9116a0ec10	Fix a unit test on CLI, to handle RC versions	2020-08-02 08:56:15 +00:00