[CI] Upload Doxygen to correct destination

Bump release version to 1.3.3. (#6624 )
Revert ntree limit fix (#6616 ) (#6622 )
2021-04-13 15:09:53 -07:00 · 2021-01-20 19:23:31 +08:00 · 2021-01-20 04:20:07 +08:00 · 2021-01-15 18:20:39 +08:00 · 2021-01-13 17:35:00 +08:00 · 2021-01-13 04:44:06 +08:00
65 changed files with 563 additions and 239 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -192,7 +192,7 @@ jobs:
      run: |
        cd build/
        tar cvjf ${{ steps.extract_branch.outputs.branch }}.tar.bz2 doc_doxygen/
-        python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/ --acl public-read
+        python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/doxygen/ --acl public-read
      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
      env:
        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
--- a/.travis.yml
+++ b/.travis.yml
@ -52,6 +52,7 @@ addons:
  apt:
    packages:
      - snapd
+      - unzip

 before_install:
  - source tests/travis/travis_setup_env.sh
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.13)
-project(xgboost LANGUAGES CXX C VERSION 1.3.0)
+project(xgboost LANGUAGES CXX C VERSION 1.3.3)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 cmake_policy(SET CMP0022 NEW)
--- a/11
+++ b/11
@ -190,11 +190,20 @@ def BuildCUDA(args) {
    if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
      arch_flag = "-DGPU_COMPUTE_VER=75"
    }
+    def wheel_tag = "manylinux2010_x86_64"
    sh """
    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh -DUSE_CUDA=ON -DUSE_NCCL=ON -DOPEN_MP:BOOL=ON -DHIDE_CXX_SYMBOLS=ON ${arch_flag}
    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
-    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64
+    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} ${wheel_tag}
    """
+    if (args.cuda_version == ref_cuda_ver) {
+      sh """
+      ${dockerRun} auditwheel_x86_64 ${docker_binary} auditwheel repair --plat ${wheel_tag} python-package/dist/*.whl
+      mv -v wheelhouse/*.whl python-package/dist/
+      # Make sure that libgomp.so is vendored in the wheel
+      ${dockerRun} auditwheel_x86_64 ${docker_binary} bash -c "unzip -l python-package/dist/*.whl | grep libgomp  || exit -1"
+      """
+    }
    echo 'Stashing Python wheel...'
    stash name: "xgboost_whl_cuda${args.cuda_version}", includes: 'python-package/dist/*.whl'
    if (args.cuda_version == ref_cuda_ver && (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release'))) {
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@ -1,7 +1,7 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 1.3.0.1
+Version: 1.3.3.1
 Date: 2020-08-28
 Authors@R: c(
  person("Tianqi", "Chen", role = c("aut"),
--- a/R-package/tests/helper_scripts/generate_models.R
+++ b/R-package/tests/helper_scripts/generate_models.R
@ -2,7 +2,6 @@
 # of saved model files from XGBoost version 0.90 and 1.0.x.
 library(xgboost)
 library(Matrix)
-source('./generate_models_params.R')

 set.seed(0)
 metadata <- list(
@ -53,11 +52,16 @@ generate_logistic_model <- function () {
  y <- sample(0:1, size = metadata$kRows, replace = TRUE)
  stopifnot(max(y) == 1, min(y) == 0)

-  data <- xgb.DMatrix(X, label = y, weight = w)
-  params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests,
-                 max_depth = metadata$kMaxDepth, objective = 'binary:logistic')
-  booster <- xgb.train(params, data, nrounds = metadata$kRounds)
-  save_booster(booster, 'logit')
+  objective <- c('binary:logistic', 'binary:logitraw')
+  name <- c('logit', 'logitraw')
+
+  for (i in seq_len(length(objective))) {
+    data <- xgb.DMatrix(X, label = y, weight = w)
+    params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests,
+                   max_depth = metadata$kMaxDepth, objective = objective[i])
+    booster <- xgb.train(params, data, nrounds = metadata$kRounds)
+    save_booster(booster, name[i])
+  }
 }

 generate_classification_model <- function () {
--- a/R-package/tests/testthat/test_model_compatibility.R
+++ b/R-package/tests/testthat/test_model_compatibility.R
@ -39,6 +39,10 @@ run_booster_check <- function (booster, name) {
    testthat::expect_equal(config$learner$learner_train_param$objective, 'multi:softmax')
    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class),
                           metadata$kClasses)
+  } else if (name == 'logitraw') {
+    testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
+    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)
+    testthat::expect_equal(config$learner$learner_train_param$objective, 'binary:logitraw')
  } else if (name == 'logit') {
    testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)
--- a/cmake/Python_version.in
+++ b/cmake/Python_version.in
@ -1 +1 @@
-@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-SNAPSHOT
+@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@
--- a/cmake/RPackageInstallTargetSetup.cmake
+++ b/cmake/RPackageInstallTargetSetup.cmake
@ -6,11 +6,11 @@ function(setup_rpackage_install_target rlib_target build_dir)
  install(
    DIRECTORY "${xgboost_SOURCE_DIR}/R-package"
    DESTINATION "${build_dir}"
-    REGEX "src/*" EXCLUDE
-    REGEX "R-package/configure" EXCLUDE
+    PATTERN "src/*" EXCLUDE
+    PATTERN "R-package/configure" EXCLUDE
  )
  install(TARGETS ${rlib_target}
    LIBRARY DESTINATION "${build_dir}/R-package/src/"
    RUNTIME DESTINATION "${build_dir}/R-package/src/")
  install(SCRIPT ${PROJECT_BINARY_DIR}/RPackageInstall.cmake)
-endfunction()
+endfunction()
--- a/demo/CLI/binary_classification/README.md
+++ b/demo/CLI/binary_classification/README.md
@ -62,7 +62,7 @@ test:data = "agaricus.txt.test"
 We use the tree booster and logistic regression objective in our setting. This indicates that we accomplish our task using classic gradient boosting regression tree(GBRT), which is a promising method for binary classification.

 The parameters shown in the example gives the most common ones that are needed to use xgboost.
-If you are interested in more parameter settings, the complete parameter settings and detailed descriptions are [here](../../doc/parameter.rst). Besides putting the parameters in the configuration file, we can set them by passing them as arguments as below:
+If you are interested in more parameter settings, the complete parameter settings and detailed descriptions are [here](https://xgboost.readthedocs.io/en/stable/parameter.html). Besides putting the parameters in the configuration file, we can set them by passing them as arguments as below:

 ```
 ../../xgboost mushroom.conf max_depth=6
@ -161,4 +161,3 @@ Eg. ```nthread=10```

 Set nthread to be the number of your real cpu (On Unix, this can be found using ```lscpu```)
 Some systems will have ```Thread(s) per core = 2```, for example, a 4 core cpu with 8 threads, in such case set ```nthread=4``` and not 8.
-
--- a/demo/CLI/regression/README.md
+++ b/demo/CLI/regression/README.md
@ -1,6 +1,6 @@
 Regression
 ====
-Using XGBoost for regression is very similar to using it for binary classification. We suggest that you can refer to the [binary classification demo](../binary_classification) first. In XGBoost if we use negative log likelihood as the loss function for regression, the training procedure is same as training binary classifier of XGBoost. 
+Using XGBoost for regression is very similar to using it for binary classification. We suggest that you can refer to the [binary classification demo](../binary_classification) first. In XGBoost if we use negative log likelihood as the loss function for regression, the training procedure is same as training binary classifier of XGBoost.

 ### Tutorial
 The dataset we used is the [computer hardware dataset from UCI repository](https://archive.ics.uci.edu/ml/datasets/Computer+Hardware). The demo for regression is almost the same as the [binary classification demo](../binary_classification), except a little difference in general parameter:
@ -14,4 +14,3 @@ objective = reg:squarederror
 ```

 The input format is same as binary classification, except that the label is now the target regression values. We use linear regression here, if we want use objective = reg:logistic logistic regression, the label needed to be pre-scaled into [0,1].
-
--- a/demo/README.md
+++ b/demo/README.md
@ -60,9 +60,9 @@ This is a list of short codes introducing different functionalities of xgboost p
 Most of examples in this section are based on CLI or python version.
 However, the parameter settings can be applied to all versions

- [Binary classification](binary_classification)
+- [Binary classification](CLI/binary_classification)
 - [Multiclass classification](multiclass_classification)
- [Regression](regression)
+- [Regression](CLI/regression)
 - [Learning to Rank](rank)

 ### Benchmarks
--- a/demo/rank/mq2008.conf
+++ b/demo/rank/mq2008.conf
@ -5,9 +5,9 @@ objective="rank:pairwise"

 # Tree Booster Parameters
 # step size shrinkage
-eta = 0.1 
+eta = 0.1
 # minimum loss reduction required to make a further partition
-gamma = 1.0 
+gamma = 1.0
 # minimum sum of instance weight(hessian) needed in a child
 min_child_weight = 0.1
 # maximum depth of a tree
@ -17,12 +17,10 @@ max_depth = 6
 # the number of round to do boosting
 num_round = 4
 # 0 means do not save any model except the final round model
-save_period = 0 
+save_period = 0
 # The path of training data
-data = "mq2008.train" 
+data = "mq2008.train"
 # The path of validation data, used to monitor training process, here [test] sets name of the validation set
-eval[test] = "mq2008.vali" 
-# The path of test data 
-test:data = "mq2008.test"      
-
-
+eval[test] = "mq2008.vali"
+# The path of test data
+test:data = "mq2008.test"
--- a/doc/tutorials/model.rst
+++ b/doc/tutorials/model.rst
@ -2,7 +2,6 @@
 Introduction to Boosted Trees
 #############################
 XGBoost stands for "Extreme Gradient Boosting", where the term "Gradient Boosting" originates from the paper *Greedy Function Approximation: A Gradient Boosting Machine*, by Friedman.
-This is a tutorial on gradient boosted trees, and most of the content is based on `these slides <http://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf>`_ by Tianqi Chen, the original author of XGBoost.

 The **gradient boosted trees** has been around for a while, and there are a lot of materials on the topic.
 This tutorial will explain boosted trees in a self-contained and principled way using the elements of supervised learning.
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@ -55,7 +55,7 @@
 #endif  // defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)

 #if defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) && \
-    !defined(__CUDACC__)
+    !defined(__CUDACC__) && !defined(__sun) && !defined(sun)
 #include <parallel/algorithm>
 #define XGBOOST_PARALLEL_SORT(X, Y, Z) __gnu_parallel::sort((X), (Y), (Z))
 #define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) \
--- a/include/xgboost/generic_parameters.h
+++ b/include/xgboost/generic_parameters.h
@ -11,6 +11,7 @@
 #include <string>

 namespace xgboost {
+
 struct GenericParameter : public XGBoostParameter<GenericParameter> {
  // Constant representing the device ID of CPU.
  static int32_t constexpr kCpuId = -1;
@ -26,6 +27,8 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
  int nthread;
  // primary device, -1 means no gpu.
  int gpu_id;
+  // fail when gpu_id is invalid
+  bool fail_on_invalid_gpu_id {false};
  // gpu page size in external memory mode, 0 means using the default.
  size_t gpu_page_size;
  bool enable_experimental_json_serialization {true};
@ -64,6 +67,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
        .set_default(-1)
        .set_lower_bound(-1)
        .describe("The primary GPU device ordinal.");
+    DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
+        .set_default(false)
+        .describe("Fail with error when gpu_id is invalid.");
    DMLC_DECLARE_FIELD(gpu_page_size)
        .set_default(0)
        .set_lower_bound(0)
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@ -6,6 +6,6 @@

 #define XGBOOST_VER_MAJOR 1
 #define XGBOOST_VER_MINOR 3
-#define XGBOOST_VER_PATCH 0
+#define XGBOOST_VER_PATCH 3

 #endif  // XGBOOST_VERSION_CONFIG_H_
--- a/jvm-packages/dev/change_version.sh
+++ b/jvm-packages/dev/change_version.sh
@ -34,9 +34,9 @@ TO_VERSION=$2
 sed_i() {
  perl -p -000 -e "$1" "$2" > "$2.tmp" && mv "$2.tmp" "$2"
 }
-   
+
 export -f sed_i
- 
+
 BASEDIR=$(dirname $0)/..
 find "$BASEDIR" -name 'pom.xml' -not -path '*target*' -print \
  -exec bash -c \
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@ -6,7 +6,7 @@

    <groupId>ml.dmlc</groupId>
    <artifactId>xgboost-jvm_2.12</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.3.3</version>
    <packaging>pom</packaging>
    <name>XGBoost JVM Package</name>
    <description>JVM Package for XGBoost</description>
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j-example_2.12</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.3.3</version>
    <packaging>jar</packaging>
    <build>
        <plugins>
@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
-            <version>1.3.0-SNAPSHOT</version>
+            <version>1.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
@ -37,7 +37,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
-            <version>1.3.0-SNAPSHOT</version>
+            <version>1.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j-flink_2.12</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.3.3</version>
    <build>
        <plugins>
            <plugin>
@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.3.0-SNAPSHOT</version>
+            <version>1.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j-gpu_2.12</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.3.3</version>
    <packaging>jar</packaging>

    <dependencies>
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
    <build>
@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
-            <version>1.3.0-SNAPSHOT</version>
+            <version>1.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j-spark_2.12</artifactId>
    <build>
@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.3.0-SNAPSHOT</version>
+            <version>1.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j_2.12</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.3.3</version>
    <packaging>jar</packaging>

    <dependencies>
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@ -1 +1 @@
-1.3.0-SNAPSHOT
+1.3.3
--- a/python-package/xgboost/callback.py
+++ b/python-package/xgboost/callback.py
@ -456,6 +456,7 @@ class LearningRateScheduler(TrainingCallback):

    def after_iteration(self, model, epoch, evals_log):
        model.set_param('learning_rate', self.learning_rates(epoch))
+        return False


 # pylint: disable=too-many-instance-attributes
@ -565,7 +566,7 @@ class EarlyStopping(TrainingCallback):
    def after_training(self, model: Booster):
        try:
            if self.save_best:
-                model = model[: int(model.attr('best_iteration'))]
+                model = model[: int(model.attr('best_iteration')) + 1]
        except XGBoostError as e:
            raise XGBoostError('`save_best` is not applicable to current booster') from e
        return model
@ -621,7 +622,7 @@ class EvaluationMonitor(TrainingCallback):
                    msg += self._fmt_metric(data, metric_name, score, stdv)
            msg += '\n'

-            if (epoch % self.period) != 0:
+            if (epoch % self.period) == 0 or self.period == 1:
                rabit.tracker_print(msg)
                self._latest = None
            else:
@ -677,6 +678,7 @@ class TrainingCheckPoint(TrainingCallback):
                else:
                    model.save_model(path)
        self._epoch += 1
+        return False


 class LegacyCallbacks:
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@ -1,11 +1,12 @@
 # coding: utf-8
 # pylint: disable=too-many-arguments, too-many-branches, invalid-name
-# pylint: disable=too-many-lines, too-many-locals
+# pylint: disable=too-many-lines, too-many-locals, no-self-use
 """Core XGBoost Library."""
 import collections
 # pylint: disable=no-name-in-module,import-error
 from collections.abc import Mapping
 # pylint: enable=no-name-in-module,import-error
+from typing import Dict, Union, List
 import ctypes
 import os
 import re
@ -1012,6 +1013,7 @@ class Booster(object):
        _check_call(_LIB.XGBoosterCreate(dmats, c_bst_ulong(len(cache)),
                                         ctypes.byref(self.handle)))
        params = params or {}
+        params = self._configure_metrics(params.copy())
        if isinstance(params, list):
            params.append(('validate_parameters', True))
        else:
@ -1041,6 +1043,17 @@ class Booster(object):
        else:
            raise TypeError('Unknown type:', model_file)

+    def _configure_metrics(self, params: Union[Dict, List]) -> Union[Dict, List]:
+        if isinstance(params, dict) and 'eval_metric' in params \
+           and isinstance(params['eval_metric'], list):
+            params = dict((k, v) for k, v in params.items())
+            eval_metrics = params['eval_metric']
+            params.pop("eval_metric", None)
+            params = list(params.items())
+            for eval_metric in eval_metrics:
+                params += [('eval_metric', eval_metric)]
+        return params
+
    def __del__(self):
        if hasattr(self, 'handle') and self.handle is not None:
            _check_call(_LIB.XGBoosterFree(self.handle))
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@ -33,7 +33,7 @@ from .compat import lazy_isinstance
 from .core import DMatrix, DeviceQuantileDMatrix, Booster, _expect, DataIter
 from .core import _deprecate_positional_args
 from .training import train as worker_train
-from .tracker import RabitTracker
+from .tracker import RabitTracker, get_host_ip
 from .sklearn import XGBModel, XGBRegressorBase, XGBClassifierBase
 from .sklearn import xgboost_model_doc

@ -70,8 +70,7 @@ LOGGER = logging.getLogger('[xgboost.dask]')
 def _start_tracker(n_workers):
    """Start Rabit tracker """
    env = {'DMLC_NUM_WORKER': n_workers}
-    import socket
-    host = socket.gethostbyname(socket.gethostname())
+    host = get_host_ip('auto')
    rabit_context = RabitTracker(hostIP=host, nslave=n_workers)
    env.update(rabit_context.slave_envs())

@ -1211,10 +1210,10 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
                                early_stopping_rounds=early_stopping_rounds,
                                verbose=verbose)

-    async def _predict_proba_async(self, data, output_margin=False,
+    async def _predict_proba_async(self, X, output_margin=False,
                                   base_margin=None):
        test_dmatrix = await DaskDMatrix(
-            client=self.client, data=data, base_margin=base_margin,
+            client=self.client, data=X, base_margin=base_margin,
            missing=self.missing
        )
        pred_probs = await predict(client=self.client,
@ -1224,11 +1223,11 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
        return pred_probs

    # pylint: disable=arguments-differ,missing-docstring
-    def predict_proba(self, data, output_margin=False, base_margin=None):
+    def predict_proba(self, X, output_margin=False, base_margin=None):
        _assert_dask_support()
        return self.client.sync(
            self._predict_proba_async,
-            data,
+            X=X,
            output_margin=output_margin,
            base_margin=base_margin
        )
--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@ -424,6 +424,7 @@ def _transform_cupy_array(data):
            data, '__array__'):
        import cupy             # pylint: disable=import-error
        data = cupy.array(data, copy=False)
+    data = data.astype(dtype=data.dtype, order='C', copy=False)
    return data


--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@ -4,6 +4,7 @@
 import copy
 import warnings
 import json
+from typing import Optional
 import numpy as np
 from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args
 from .training import train
@ -398,7 +399,7 @@ class XGBModel(XGBModelBase):
            'importance_type', 'kwargs', 'missing', 'n_estimators', 'use_label_encoder'}
        filtered = dict()
        for k, v in params.items():
-            if k not in wrapper_specific:
+            if k not in wrapper_specific and not callable(v):
                filtered[k] = v
        return filtered

@ -494,6 +495,13 @@ class XGBModel(XGBModelBase):
        # Delete the attribute after load
        self.get_booster().set_attr(scikit_learn=None)

+    def _set_evaluation_result(self, evals_result: Optional[dict]) -> None:
+        if evals_result:
+            for val in evals_result.items():
+                evals_result_key = list(val[1].keys())[0]
+                evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
+            self.evals_result_ = evals_result
+
    @_deprecate_positional_args
    def fit(self, X, y, *, sample_weight=None, base_margin=None,
            eval_set=None, eval_metric=None, early_stopping_rounds=None,
@ -565,13 +573,6 @@ class XGBModel(XGBModelBase):

        """
        self.n_features_in_ = X.shape[1]
-
-        train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
-                                base_margin=base_margin,
-                                missing=self.missing,
-                                nthread=self.n_jobs)
-        train_dmatrix.set_info(feature_weights=feature_weights)
-
        evals_result = {}

        train_dmatrix, evals = self._wrap_evaluation_matrices(
@ -601,12 +602,7 @@ class XGBModel(XGBModelBase):
                              verbose_eval=verbose, xgb_model=xgb_model,
                              callbacks=callbacks)

-        if evals_result:
-            for val in evals_result.items():
-                evals_result_key = list(val[1].keys())[0]
-                evals_result[val[0]][evals_result_key] = val[1][
-                    evals_result_key]
-            self.evals_result_ = evals_result
+        self._set_evaluation_result(evals_result)

        if early_stopping_rounds is not None:
            self.best_score = self._Booster.best_score
@ -841,14 +837,18 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
            self.classes_ = cp.unique(y.values)
            self.n_classes_ = len(self.classes_)
            can_use_label_encoder = False
-            if not cp.array_equal(self.classes_, cp.arange(self.n_classes_)):
+            expected_classes = cp.arange(self.n_classes_)
+            if (self.classes_.shape != expected_classes.shape or
+                    not (self.classes_ == expected_classes).all()):
                raise ValueError(label_encoding_check_error)
        elif _is_cupy_array(y):
            import cupy as cp  # pylint: disable=E0401
            self.classes_ = cp.unique(y)
            self.n_classes_ = len(self.classes_)
            can_use_label_encoder = False
-            if not cp.array_equal(self.classes_, cp.arange(self.n_classes_)):
+            expected_classes = cp.arange(self.n_classes_)
+            if (self.classes_.shape != expected_classes.shape or
+                    not (self.classes_ == expected_classes).all()):
                raise ValueError(label_encoding_check_error)
        else:
            self.classes_ = np.unique(y)
@ -915,12 +915,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
                              callbacks=callbacks)

        self.objective = xgb_options["objective"]
-        if evals_result:
-            for val in evals_result.items():
-                evals_result_key = list(val[1].keys())[0]
-                evals_result[val[0]][
-                    evals_result_key] = val[1][evals_result_key]
-            self.evals_result_ = evals_result
+        self._set_evaluation_result(evals_result)

        if early_stopping_rounds is not None:
            self.best_score = self._Booster.best_score
@ -991,10 +986,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
            return self._le.inverse_transform(column_indexes)
        return column_indexes

-    def predict_proba(self, data, ntree_limit=None, validate_features=False,
+    def predict_proba(self, X, ntree_limit=None, validate_features=False,
                      base_margin=None):
-        """
-        Predict the probability of each `data` example being of a given class.
+        """ Predict the probability of each `X` example being of a given class.

        .. note:: This function is not thread safe

@ -1004,21 +998,22 @@ class XGBClassifier(XGBModel, XGBClassifierBase):

        Parameters
        ----------
-        data : array_like
+        X : array_like
            Feature matrix.
        ntree_limit : int
-            Limit number of trees in the prediction; defaults to best_ntree_limit if defined
-            (i.e. it has been trained with early stopping), otherwise 0 (use all trees).
+            Limit number of trees in the prediction; defaults to best_ntree_limit if
+            defined (i.e. it has been trained with early stopping), otherwise 0 (use all
+            trees).
        validate_features : bool
-            When this is True, validate that the Booster's and data's feature_names are identical.
-            Otherwise, it is assumed that the feature_names are the same.
+            When this is True, validate that the Booster's and data's feature_names are
+            identical.  Otherwise, it is assumed that the feature_names are the same.

        Returns
        -------
        prediction : numpy array
            a numpy array with the probability of each data example being of a given class.
        """
-        test_dmatrix = DMatrix(data, base_margin=base_margin,
+        test_dmatrix = DMatrix(X, base_margin=base_margin,
                               missing=self.missing, nthread=self.n_jobs)
        if ntree_limit is None:
            ntree_limit = getattr(self, "best_ntree_limit", 0)
@ -1324,12 +1319,7 @@ class XGBRanker(XGBModel):

        self.objective = params["objective"]

-        if evals_result:
-            for val in evals_result.items():
-                evals_result_key = list(val[1].keys())[0]
-                evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
-            self.evals_result = evals_result
-
+        self._set_evaluation_result(evals_result)
        if early_stopping_rounds is not None:
            self.best_score = self._Booster.best_score
            self.best_iteration = self._Booster.best_iteration
--- a/python-package/xgboost/tracker.py
+++ b/python-package/xgboost/tracker.py
@ -52,6 +52,28 @@ def get_some_ip(host):
    return socket.getaddrinfo(host, None)[0][4][0]


+def get_host_ip(hostIP=None):
+    if hostIP is None or hostIP == 'auto':
+        hostIP = 'ip'
+
+    if hostIP == 'dns':
+        hostIP = socket.getfqdn()
+    elif hostIP == 'ip':
+        from socket import gaierror
+        try:
+            hostIP = socket.gethostbyname(socket.getfqdn())
+        except gaierror:
+            logging.warning(
+                'gethostbyname(socket.getfqdn()) failed... trying on hostname()')
+            hostIP = socket.gethostbyname(socket.gethostname())
+        if hostIP.startswith("127."):
+            s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+            # doesn't have to be reachable
+            s.connect(('10.255.255.255', 1))
+            hostIP = s.getsockname()[0]
+    return hostIP
+
+
 def get_family(addr):
    return socket.getaddrinfo(addr, None)[0][0]

--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@ -4,6 +4,7 @@
 """Training Library containing training routines."""
 import warnings
 import copy
+import json

 import numpy as np
 from .core import Booster, XGBoostError
@ -40,18 +41,6 @@ def _is_new_callback(callbacks):
               for c in callbacks) or not callbacks


-def _configure_metrics(params):
-    if isinstance(params, dict) and 'eval_metric' in params \
-       and isinstance(params['eval_metric'], list):
-        params = dict((k, v) for k, v in params.items())
-        eval_metrics = params['eval_metric']
-        params.pop("eval_metric", None)
-        params = list(params.items())
-        for eval_metric in eval_metrics:
-            params += [('eval_metric', eval_metric)]
-    return params
-
-
 def _train_internal(params, dtrain,
                    num_boost_round=10, evals=(),
                    obj=None, feval=None,
@ -61,7 +50,6 @@ def _train_internal(params, dtrain,
    """internal training function"""
    callbacks = [] if callbacks is None else copy.copy(callbacks)
    evals = list(evals)
-    params = _configure_metrics(params.copy())

    bst = Booster(params, [dtrain] + [d[0] for d in evals])
    nboost = 0
@ -136,7 +124,26 @@ def _train_internal(params, dtrain,
        bst.best_iteration = int(bst.attr('best_iteration'))
    else:
        bst.best_iteration = nboost - 1
+
+    config = json.loads(bst.save_config())
+    booster = config['learner']['gradient_booster']['name']
+    if booster == 'gblinear':
+        num_parallel_tree = 0
+    elif booster == 'dart':
+        num_parallel_tree = int(
+            config['learner']['gradient_booster']['gbtree']['gbtree_train_param'][
+                'num_parallel_tree'
+            ]
+        )
+    elif booster == 'gbtree':
+        num_parallel_tree = int(
+            config['learner']['gradient_booster']['gbtree_train_param'][
+                'num_parallel_tree']
+        )
+    else:
+        raise ValueError(f'Unknown booster: {booster}')
    bst.best_ntree_limit = (bst.best_iteration + 1) * num_parallel_tree
+
    # Copy to serialise and unserialise booster to reset state and free
    # training memory
    return bst.copy()
@ -175,9 +182,10 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
        If there's more than one metric in the **eval_metric** parameter given in
        **params**, the last metric will be used for early stopping.
        If early stopping occurs, the model will have three additional fields:
-        ``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.
-        (Use ``bst.best_ntree_limit`` to get the correct value if
-        ``num_parallel_tree`` and/or ``num_class`` appears in the parameters)
+        ``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.  Use
+        ``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree`` and/or
+        ``num_class`` appears in the parameters.  ``best_ntree_limit`` is the result of
+        ``num_parallel_tree * best_iteration``.
    evals_result: dict
        This dictionary stores the evaluation results of all the items in watchlist.

--- a/rabit/include/rabit/internal/socket.h
+++ b/rabit/include/rabit/internal/socket.h
@ -25,6 +25,10 @@
 #include <sys/socket.h>
 #include <sys/ioctl.h>

+#if defined(__sun) || defined(sun)
+#include <sys/sockio.h>
+#endif  // defined(__sun) || defined(sun)
+
 #endif  // defined(_WIN32)

 #include <string>
--- a/src/cli_main.cc
+++ b/src/cli_main.cc
@ -268,7 +268,7 @@ class CLI {
    // always save final round
    if ((param_.save_period == 0 ||
         param_.num_round % param_.save_period != 0) &&
-        param_.model_out != CLIParam::kNull && rabit::GetRank() == 0) {
+        rabit::GetRank() == 0) {
      std::ostringstream os;
      if (param_.model_out == CLIParam::kNull) {
        os << param_.model_dir << '/' << std::setfill('0') << std::setw(4)
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@ -407,9 +407,14 @@ class HistCollection {
  // access histogram for i-th node
  GHistRowT operator[](bst_uint nid) const {
    constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
-    CHECK_NE(row_ptr_[nid], kMax);
-    GradientPairT* ptr =
-        const_cast<GradientPairT*>(dmlc::BeginPtr(data_) + row_ptr_[nid]);
+    const size_t id = row_ptr_[nid];
+    CHECK_NE(id, kMax);
+    GradientPairT* ptr = nullptr;
+    if (contiguous_allocation_) {
+      ptr = const_cast<GradientPairT*>(data_[0].data() + nbins_*id);
+    } else {
+      ptr = const_cast<GradientPairT*>(data_[id].data());
+    }
    return {ptr, nbins_};
  }

@ -438,21 +443,37 @@ class HistCollection {
    }
    CHECK_EQ(row_ptr_[nid], kMax);

-    if (data_.size() < nbins_ * (nid + 1)) {
-      data_.resize(nbins_ * (nid + 1));
+    if (data_.size() < (nid + 1)) {
+      data_.resize((nid + 1));
    }

-    row_ptr_[nid] = nbins_ * n_nodes_added_;
+    row_ptr_[nid] = n_nodes_added_;
    n_nodes_added_++;
  }
+  // allocate thread local memory i-th node
+  void AllocateData(bst_uint nid) {
+    if (data_[row_ptr_[nid]].size() == 0) {
+      data_[row_ptr_[nid]].resize(nbins_, {0, 0});
+    }
+  }
+  // allocate common buffer contiguously for all nodes, need for single Allreduce call
+  void AllocateAllData() {
+    const size_t new_size = nbins_*data_.size();
+    contiguous_allocation_ = true;
+    if (data_[0].size() != new_size) {
+      data_[0].resize(new_size);
+    }
+  }

 private:
  /*! \brief number of all bins over all features */
  uint32_t nbins_ = 0;
  /*! \brief amount of active nodes in hist collection */
  uint32_t n_nodes_added_ = 0;
+  /*! \brief flag to identify contiguous memory allocation */
+  bool contiguous_allocation_ = false;

-  std::vector<GradientPairT> data_;
+  std::vector<std::vector<GradientPairT>> data_;

  /*! \brief row_ptr_[nid] locates bin for histogram of node nid */
  std::vector<size_t> row_ptr_;
@ -481,7 +502,6 @@ class ParallelGHistBuilder {
             const std::vector<GHistRowT>& targeted_hists) {
    hist_buffer_.Init(nbins_);
    tid_nid_to_hist_.clear();
-    hist_memory_.clear();
    threads_to_nids_map_.clear();

    targeted_hists_ = targeted_hists;
@ -504,8 +524,11 @@ class ParallelGHistBuilder {
    CHECK_LT(nid, nodes_);
    CHECK_LT(tid, nthreads_);

-    size_t idx = tid_nid_to_hist_.at({tid, nid});
-    GHistRowT hist = hist_memory_[idx];
+    int idx = tid_nid_to_hist_.at({tid, nid});
+    if (idx >= 0) {
+      hist_buffer_.AllocateData(idx);
+    }
+    GHistRowT hist = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];

    if (!hist_was_used_[tid * nodes_ + nid]) {
      InitilizeHistByZeroes(hist, 0, hist.size());
@ -526,8 +549,9 @@ class ParallelGHistBuilder {
    for (size_t tid = 0; tid < nthreads_; ++tid) {
      if (hist_was_used_[tid * nodes_ + nid]) {
        is_updated = true;
-        const size_t idx = tid_nid_to_hist_.at({tid, nid});
-        GHistRowT src = hist_memory_[idx];
+
+        int idx = tid_nid_to_hist_.at({tid, nid});
+        GHistRowT src = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];

        if (dst.data() != src.data()) {
          IncrementHist(dst, src, begin, end);
@ -589,7 +613,6 @@ class ParallelGHistBuilder {
  }

  void MatchNodeNidPairToHist() {
-    size_t hist_total = 0;
    size_t hist_allocated_additionally = 0;

    for (size_t nid = 0; nid < nodes_; ++nid) {
@ -597,15 +620,11 @@ class ParallelGHistBuilder {
      for (size_t tid = 0; tid < nthreads_; ++tid) {
        if (threads_to_nids_map_[tid * nodes_ + nid]) {
          if (first_hist) {
-            hist_memory_.push_back(targeted_hists_[nid]);
+            tid_nid_to_hist_[{tid, nid}] = -1;
            first_hist = false;
          } else {
-            hist_memory_.push_back(hist_buffer_[hist_allocated_additionally]);
-            hist_allocated_additionally++;
+            tid_nid_to_hist_[{tid, nid}] = hist_allocated_additionally++;
          }
-          // map pair {tid, nid} to index of allocated histogram from hist_memory_
-          tid_nid_to_hist_[{tid, nid}] = hist_total++;
-          CHECK_EQ(hist_total, hist_memory_.size());
        }
      }
    }
@ -630,10 +649,11 @@ class ParallelGHistBuilder {
  std::vector<bool> threads_to_nids_map_;
  /*! \brief Contains histograms for final results  */
  std::vector<GHistRowT> targeted_hists_;
-  /*! \brief Allocated memory for histograms used for construction  */
-  std::vector<GHistRowT> hist_memory_;
-  /*! \brief map pair {tid, nid} to index of allocated histogram from hist_memory_  */
-  std::map<std::pair<size_t, size_t>, size_t> tid_nid_to_hist_;
+  /*!
+   * \brief map pair {tid, nid} to index of allocated histogram from hist_buffer_ and targeted_hists_,
+   * -1 is reserved for targeted_hists_
+   */
+  std::map<std::pair<size_t, size_t>, int> tid_nid_to_hist_;
 };

 /*!
--- a/src/common/row_set.h
+++ b/src/common/row_set.h
@ -11,6 +11,7 @@
 #include <algorithm>
 #include <vector>
 #include <utility>
+#include <memory>

 namespace xgboost {
 namespace common {
@ -150,24 +151,33 @@ class PartitionBuilder {
    }
  }

+  // allocate thread local memory, should be called for each specific task
+  void AllocateForTask(size_t id) {
+    if (mem_blocks_[id].get() == nullptr) {
+      BlockInfo* local_block_ptr = new BlockInfo;
+      CHECK_NE(local_block_ptr, (BlockInfo*)nullptr);
+      mem_blocks_[id].reset(local_block_ptr);
+    }
+  }
+
  common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
    const size_t task_idx = GetTaskIdx(nid, begin);
-    return { mem_blocks_.at(task_idx).Left(), end - begin };
+    return { mem_blocks_.at(task_idx)->Left(), end - begin };
  }

  common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
    const size_t task_idx = GetTaskIdx(nid, begin);
-    return { mem_blocks_.at(task_idx).Right(), end - begin };
+    return { mem_blocks_.at(task_idx)->Right(), end - begin };
  }

  void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left) {
    size_t task_idx = GetTaskIdx(nid, begin);
-    mem_blocks_.at(task_idx).n_left = n_left;
+    mem_blocks_.at(task_idx)->n_left = n_left;
  }

  void SetNRightElems(int nid, size_t begin, size_t end, size_t n_right) {
    size_t task_idx = GetTaskIdx(nid, begin);
-    mem_blocks_.at(task_idx).n_right = n_right;
+    mem_blocks_.at(task_idx)->n_right = n_right;
  }


@ -185,13 +195,13 @@ class PartitionBuilder {
    for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) {
      size_t n_left = 0;
      for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
-        mem_blocks_[j].n_offset_left = n_left;
-        n_left += mem_blocks_[j].n_left;
+        mem_blocks_[j]->n_offset_left = n_left;
+        n_left += mem_blocks_[j]->n_left;
      }
      size_t n_right = 0;
      for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
-        mem_blocks_[j].n_offset_right = n_left + n_right;
-        n_right += mem_blocks_[j].n_right;
+        mem_blocks_[j]->n_offset_right = n_left + n_right;
+        n_right += mem_blocks_[j]->n_right;
      }
      left_right_nodes_sizes_[i] = {n_left, n_right};
    }
@ -200,21 +210,21 @@ class PartitionBuilder {
  void MergeToArray(int nid, size_t begin, size_t* rows_indexes) {
    size_t task_idx = GetTaskIdx(nid, begin);

-    size_t* left_result  = rows_indexes + mem_blocks_[task_idx].n_offset_left;
-    size_t* right_result = rows_indexes + mem_blocks_[task_idx].n_offset_right;
+    size_t* left_result  = rows_indexes + mem_blocks_[task_idx]->n_offset_left;
+    size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;

-    const size_t* left = mem_blocks_[task_idx].Left();
-    const size_t* right = mem_blocks_[task_idx].Right();
+    const size_t* left = mem_blocks_[task_idx]->Left();
+    const size_t* right = mem_blocks_[task_idx]->Right();

-    std::copy_n(left, mem_blocks_[task_idx].n_left, left_result);
-    std::copy_n(right, mem_blocks_[task_idx].n_right, right_result);
+    std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result);
+    std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result);
  }

- protected:
  size_t GetTaskIdx(int nid, size_t begin) {
    return blocks_offsets_[nid] + begin / BlockSize;
  }

+ protected:
  struct BlockInfo{
    size_t n_left;
    size_t n_right;
@ -230,12 +240,12 @@ class PartitionBuilder {
      return &right_data_[0];
    }
   private:
-    alignas(128) size_t left_data_[BlockSize];
-    alignas(128) size_t right_data_[BlockSize];
+    size_t left_data_[BlockSize];
+    size_t right_data_[BlockSize];
  };
  std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;
  std::vector<size_t> blocks_offsets_;
-  std::vector<BlockInfo> mem_blocks_;
+  std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;
  size_t max_n_tasks_ = 0;
 };

--- a/src/gbm/gblinear_model.cc
+++ b/src/gbm/gblinear_model.cc
@ -10,10 +10,6 @@ namespace xgboost {
 namespace gbm {

 void GBLinearModel::SaveModel(Json* p_out) const {
-  using WeightType = std::remove_reference<decltype(std::declval<decltype(weight)>().back())>::type;
-  using JsonFloat = Number::Float;
-  static_assert(std::is_same<WeightType, JsonFloat>::value,
-                "Weight type should be of the same type with JSON float");
  auto& out = *p_out;

  size_t const n_weights = weight.size();
--- a/src/learner.cc
+++ b/src/learner.cc
@ -222,6 +222,10 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
      LOG(WARNING) << "No visible GPU is found, setting `gpu_id` to -1";
    }
    this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
+  } else if (fail_on_invalid_gpu_id) {
+    CHECK(gpu_id == kCpuId || gpu_id < n_gpus)
+      << "Only " << n_gpus << " GPUs are visible, gpu_id "
+      << gpu_id << " is invalid.";
  } else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
    LOG(WARNING) << "Only " << n_gpus
                 << " GPUs are visible, setting `gpu_id` to " << gpu_id % n_gpus;
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@ -162,6 +162,9 @@ struct LogisticRaw : public LogisticRegression {
    predt = common::Sigmoid(predt);
    return std::max(predt * (T(1.0f) - predt), eps);
  }
+  static bst_float ProbToMargin(bst_float base_score) {
+    return base_score;
+  }
  static const char* DefaultEvalMetric() { return "auc"; }

  static const char* Name() { return "binary:logitraw"; }
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@ -580,7 +580,7 @@ class GPUPredictor : public xgboost::Predictor {
      Predictor::Predictor{generic_param} {}

  ~GPUPredictor() override {
-    if (generic_param_->gpu_id >= 0) {
+    if (generic_param_->gpu_id >= 0 && generic_param_->gpu_id < common::AllVisibleGPUs()) {
      dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
    }
  }
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@ -182,8 +182,10 @@ void DistributedHistSynchronizer<GradientSumT>::SyncHistograms(BuilderT* builder
    }
  });
  builder->builder_monitor_.Start("SyncHistogramsAllreduce");
+
  builder->histred_.Allreduce(builder->hist_[starting_index].data(),
                                    builder->hist_builder_.GetNumBins() * sync_count);
+
  builder->builder_monitor_.Stop("SyncHistogramsAllreduce");

  ParallelSubtractionHist(builder, space, builder->nodes_for_explicit_hist_build_, p_tree);
@ -232,7 +234,7 @@ void BatchHistRowsAdder<GradientSumT>::AddHistRows(BuilderT *builder,
  for (auto const& node : builder->nodes_for_subtraction_trick_) {
    builder->hist_.AddHistRow(node.nid);
  }
-
+  builder->hist_.AllocateAllData();
  builder->builder_monitor_.Stop("AddHistRows");
 }

@ -268,6 +270,8 @@ void DistributedHistRowsAdder<GradientSumT>::AddHistRows(BuilderT *builder,
      builder->hist_local_worker_.AddHistRow(nid);
    }
  }
+  builder->hist_.AllocateAllData();
+  builder->hist_local_worker_.AllocateAllData();
  (*sync_count) = std::max(1, n_left);
  builder->builder_monitor_.Stop("AddHistRows");
 }
@ -1166,7 +1170,7 @@ template <typename GradientSumT>
 void QuantileHistMaker::Builder<GradientSumT>::ApplySplit(const std::vector<ExpandEntry> nodes,
                                            const GHistIndexMatrix& gmat,
                                            const ColumnMatrix& column_matrix,
-                                            const HistCollection<GradientSumT>&,
+                                            const HistCollection<GradientSumT>& hist,
                                            RegTree* p_tree) {
  builder_monitor_.Start("ApplySplit");
  // 1. Find split condition for each split
@ -1189,7 +1193,10 @@ void QuantileHistMaker::Builder<GradientSumT>::ApplySplit(const std::vector<Expa
  // 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node
  // Store results in intermediate buffers from partition_builder_
  common::ParallelFor2d(space, this->nthread_, [&](size_t node_in_set, common::Range1d r) {
+    size_t begin = r.begin();
    const int32_t nid = nodes[node_in_set].nid;
+    const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin);
+    partition_builder_.AllocateForTask(task_id);
      switch (column_matrix.GetTypeSize()) {
      case common::kUint8BinsTypeSize:
        PartitionKernel<uint8_t>(node_in_set, nid, r,
--- a/tests/ci_build/CentOS-Base.repo
+++ b/tests/ci_build/CentOS-Base.repo
@ -0,0 +1,37 @@
+[base]
+name=CentOS-$releasever - Base
+baseurl=http://vault.centos.org/centos/$releasever/os/$basearch/
+gpgcheck=1
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
+
+#released updates 
+[updates]
+name=CentOS-$releasever - Updates
+baseurl=http://vault.centos.org/centos/$releasever/updates/$basearch/
+gpgcheck=1
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
+
+#additional packages that may be useful
+[extras]
+name=CentOS-$releasever - Extras
+baseurl=http://vault.centos.org/centos/$releasever/extras/$basearch/
+gpgcheck=1
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
+
+#additional packages that extend functionality of existing packages
+[centosplus]
+name=CentOS-$releasever - Plus
+mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=centosplus&infra=$infra
+#baseurl=http://mirror.centos.org/centos/$releasever/centosplus/$basearch/
+gpgcheck=1
+enabled=0
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
+
+#contrib - packages by Centos Users
+[contrib]
+name=CentOS-$releasever - Contrib
+mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=contrib&infra=$infra
+#baseurl=http://mirror.centos.org/centos/$releasever/contrib/$basearch/
+gpgcheck=1
+enabled=0
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
--- a/tests/ci_build/Dockerfile.auditwheel_x86_64
+++ b/tests/ci_build/Dockerfile.auditwheel_x86_64
@ -0,0 +1,15 @@
+FROM quay.io/pypa/manylinux2010_x86_64
+
+# Install lightweight sudo (not bound to TTY)
+ENV GOSU_VERSION 1.10
+RUN set -ex; \
+    curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
+    chmod +x /usr/local/bin/gosu && \
+    gosu nobody true
+
+# Default entry-point to use if running locally
+# It will preserve attributes of created files
+COPY entrypoint.sh /scripts/
+
+WORKDIR /workspace
+ENTRYPOINT ["/scripts/entrypoint.sh"]
--- a/tests/ci_build/Dockerfile.gpu
+++ b/tests/ci_build/Dockerfile.gpu
@ -19,7 +19,7 @@ ENV PATH=/opt/python/bin:$PATH
 # Create new Conda environment with cuDF, Dask, and cuPy
 RUN \
    conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
-        python=3.7 cudf=0.16* rmm=0.16* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda dask-cudf cupy \
+        python=3.7 cudf=0.17* rmm=0.17* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda dask-cudf cupy \
        numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis

 ENV GOSU_VERSION 1.10
--- a/tests/ci_build/Dockerfile.gpu_build_centos6
+++ b/tests/ci_build/Dockerfile.gpu_build_centos6
@ -6,12 +6,13 @@ ARG CUDA_VERSION_ARG
 ENV DEBIAN_FRONTEND noninteractive
 ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/

+COPY CentOS-Base.repo /etc/yum.repos.d/
+
 # Install all basic requirements
 RUN \
+    yum install -y epel-release && \
    yum -y update && \
-    yum install -y tar unzip wget xz git centos-release-scl yum-utils && \
-    yum-config-manager --enable centos-sclo-rh-testing && \
-    yum -y update && \
+    yum install -y tar unzip wget xz git patchelf && \
    yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
@ -20,6 +21,7 @@ RUN \
    # Python
    wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
    bash Miniconda3.sh -b -p /opt/python && \
+    /opt/python/bin/python -m pip install auditwheel && \
    # CMake
    wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
    bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
@ -29,7 +31,7 @@ RUN \
    wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \
    tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \
    cd ninja && \
-    python ./configure.py --bootstrap
+    /opt/python/bin/python ./configure.py --bootstrap

 # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
 RUN \
--- a/tests/ci_build/Dockerfile.jvm
+++ b/tests/ci_build/Dockerfile.jvm
@ -2,12 +2,13 @@ FROM centos:6

 ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/

+COPY CentOS-Base.repo /etc/yum.repos.d/
+
 # Install all basic requirements
 RUN \
+    yum install -y epel-release && \
    yum -y update && \
-    yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \
-    yum-config-manager --enable centos-sclo-rh-testing && \
-    yum -y update && \
+    yum install -y tar unzip wget xz git java-1.8.0-openjdk-devel && \
    yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
@ -31,7 +32,7 @@ ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp

 # Install Python packages
 RUN \
-    pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22 awscli
+    pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.25.10 awscli

 ENV GOSU_VERSION 1.10

--- a/tests/ci_build/Dockerfile.jvm_gpu_build
+++ b/tests/ci_build/Dockerfile.jvm_gpu_build
@ -6,12 +6,13 @@ ARG CUDA_VERSION_ARG
 ENV DEBIAN_FRONTEND noninteractive
 ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/

+COPY CentOS-Base.repo /etc/yum.repos.d/
+
 # Install all basic requirements
 RUN \
+    yum install -y epel-release && \
    yum -y update && \
-    yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \
-    yum-config-manager --enable centos-sclo-rh-testing && \
-    yum -y update && \
+    yum install -y tar unzip wget xz git java-1.8.0-openjdk-devel && \
    yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
@ -45,7 +46,7 @@ ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp

 # Install Python packages
 RUN \
-    pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22 awscli
+    pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.25.10 awscli

 ENV GOSU_VERSION 1.10

--- a/tests/ci_build/Dockerfile.rmm
+++ b/tests/ci_build/Dockerfile.rmm
@ -29,7 +29,7 @@ ENV PATH=/opt/python/bin:$PATH
 # Create new Conda environment with RMM
 RUN \
    conda create -n gpu_test -c nvidia -c rapidsai-nightly -c rapidsai -c conda-forge -c defaults \
-        python=3.7 rmm=0.16* cudatoolkit=$CUDA_VERSION_ARG
+        python=3.7 rmm=0.17* cudatoolkit=$CUDA_VERSION_ARG

 ENV GOSU_VERSION 1.10

--- a/tests/ci_build/conda_env/aarch64_test.yml
+++ b/tests/ci_build/conda_env/aarch64_test.yml
@ -27,3 +27,4 @@ dependencies:
 - pip:
  - shap
  - awscli
+  - auditwheel
--- a/tests/ci_build/conda_env/win64_test.yml
+++ b/tests/ci_build/conda_env/win64_test.yml
@ -9,7 +9,6 @@ dependencies:
 - scikit-learn
 - pandas
 - pytest
- python-graphviz
 - boto3
 - hypothesis
 - jsonschema
@ -17,3 +16,4 @@ dependencies:
 - pip:
  - cupy-cuda101
  - modin[all]
+  - graphviz
--- a/tests/cpp/common/test_hist_util.cc
+++ b/tests/cpp/common/test_hist_util.cc
@ -35,7 +35,7 @@ void ParallelGHistBuilderReset() {
  for(size_t inode = 0; inode < kNodesExtended; inode++) {
    collection.AddHistRow(inode);
  }
-
+  collection.AllocateAllData();
  ParallelGHistBuilder<GradientSumT> hist_builder;
  hist_builder.Init(kBins);
  std::vector<GHistRow<GradientSumT>> target_hist(kNodes);
@ -91,7 +91,7 @@ void ParallelGHistBuilderReduceHist(){
  for(size_t inode = 0; inode < kNodes; inode++) {
    collection.AddHistRow(inode);
  }
-
+  collection.AllocateAllData();
  ParallelGHistBuilder<GradientSumT> hist_builder;
  hist_builder.Init(kBins);
  std::vector<GHistRow<GradientSumT>> target_hist(kNodes);
--- a/tests/cpp/common/test_partition_builder.cc
+++ b/tests/cpp/common/test_partition_builder.cc
@ -32,6 +32,8 @@ TEST(PartitionBuilder, BasicTest) {
    for(size_t j = 0; j < tasks[nid]; ++j) {
      size_t begin = kBlockSize*j;
      size_t end = kBlockSize*(j+1);
+      const size_t id = builder.GetTaskIdx(nid, begin);
+      builder.AllocateForTask(id);

      auto left  = builder.GetLeftBuffer(nid, begin, end);
      auto right = builder.GetRightBuffer(nid, begin, end);
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@ -274,6 +274,7 @@ class QuantileHistMock : public QuantileHistMaker {
      RealImpl::InitData(gmat, gpair, fmat, tree);
      GHistIndexBlockMatrix dummy;
      this->hist_.AddHistRow(nid);
+      this->hist_.AllocateAllData();
      this->BuildHist(gpair, this->row_set_collection_[nid],
                gmat, dummy, this->hist_[nid]);

@ -315,7 +316,7 @@ class QuantileHistMock : public QuantileHistMaker {

      RealImpl::InitData(gmat, row_gpairs, *dmat, tree);
      this->hist_.AddHistRow(0);
-
+      this->hist_.AllocateAllData();
      this->BuildHist(row_gpairs, this->row_set_collection_[0],
                      gmat, quantile_index_block, this->hist_[0]);

@ -411,7 +412,7 @@ class QuantileHistMock : public QuantileHistMaker {
        cm.Init(gmat, 0.0);
        RealImpl::InitData(gmat, row_gpairs, *dmat, tree);
        this->hist_.AddHistRow(0);
-
+        this->hist_.AllocateAllData();
        RealImpl::InitNewNode(0, gmat, row_gpairs, *dmat, tree);

        const size_t num_row = dmat->Info().num_row_;
@ -449,6 +450,8 @@ class QuantileHistMock : public QuantileHistMaker {
          RealImpl::partition_builder_.Init(1, 1, [&](size_t node_in_set) {
            return 1;
          });
+          const size_t task_id = RealImpl::partition_builder_.GetTaskIdx(0, 0);
+          RealImpl::partition_builder_.AllocateForTask(task_id);
          this->template PartitionKernel<uint8_t>(0, 0, common::Range1d(0, kNRows),
                                                  split, cm, tree);
          RealImpl::partition_builder_.CalculateRowOffsets();
--- a/tests/python-gpu/test_gpu_basic_models.py
+++ b/tests/python-gpu/test_gpu_basic_models.py
@ -52,3 +52,17 @@ class TestGPUBasicModels:

        model_0, model_1 = self.run_cls(X, y, False)
        assert model_0 != model_1
+
+    def test_invalid_gpu_id(self):
+        X = np.random.randn(10, 5) * 1e4
+        y = np.random.randint(0, 2, size=10) * 1e4
+        # should pass with invalid gpu id
+        cls1 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999)
+        cls1.fit(X, y)
+        # should throw error with fail_on_invalid_gpu_id enabled
+        cls2 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999, fail_on_invalid_gpu_id=True)
+        try:
+            cls2.fit(X, y)
+            assert False, "Should have failed with with fail_on_invalid_gpu_id enabled"
+        except xgb.core.XGBoostError as err:
+            assert "gpu_id 9999 is invalid" in str(err)
--- a/tests/python-gpu/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask.py
@ -5,8 +5,10 @@ import numpy as np
 import asyncio
 import xgboost
 import subprocess
+import hypothesis
 from hypothesis import given, strategies, settings, note
 from hypothesis._settings import duration
+from hypothesis import HealthCheck
 from test_gpu_updaters import parameter_strategy

 if sys.platform.startswith("win"):
@ -19,6 +21,11 @@ from test_with_dask import _get_client_workers  # noqa
 from test_with_dask import generate_array     # noqa
 import testing as tm                          # noqa

+if hasattr(HealthCheck, 'function_scoped_fixture'):
+    suppress = [HealthCheck.function_scoped_fixture]
+else:
+    suppress = hypothesis.utils.conventions.not_set
+

 try:
    import dask.dataframe as dd
@ -161,19 +168,24 @@ class TestDistributedGPU:
            run_with_dask_dataframe(dxgb.DaskDMatrix, client)
            run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client)

-    @given(params=parameter_strategy, num_rounds=strategies.integers(1, 20),
-           dataset=tm.dataset_strategy)
-    @settings(deadline=duration(seconds=120))
+    @given(
+        params=parameter_strategy,
+        num_rounds=strategies.integers(1, 20),
+        dataset=tm.dataset_strategy,
+    )
+    @settings(deadline=duration(seconds=120), suppress_health_check=suppress)
    @pytest.mark.skipif(**tm.no_dask())
    @pytest.mark.skipif(**tm.no_dask_cuda())
-    @pytest.mark.parametrize('local_cuda_cluster', [{'n_workers': 2}], indirect=['local_cuda_cluster'])
+    @pytest.mark.parametrize(
+        "local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"]
+    )
    @pytest.mark.mgpu
    def test_gpu_hist(self, params, num_rounds, dataset, local_cuda_cluster):
        with Client(local_cuda_cluster) as client:
-            run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix,
-                         client)
-            run_gpu_hist(params, num_rounds, dataset,
-                         dxgb.DaskDeviceQuantileDMatrix, client)
+            run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client)
+            run_gpu_hist(
+                params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client
+            )

    @pytest.mark.skipif(**tm.no_cupy())
    @pytest.mark.skipif(**tm.no_dask())
--- a/tests/python/generate_models.py
+++ b/tests/python/generate_models.py
@ -64,22 +64,24 @@ def generate_logistic_model():
    y = np.random.randint(0, 2, size=kRows)
    assert y.max() == 1 and y.min() == 0

-    data = xgboost.DMatrix(X, label=y, weight=w)
-    booster = xgboost.train({'tree_method': 'hist',
-                             'num_parallel_tree': kForests,
-                             'max_depth': kMaxDepth,
-                             'objective': 'binary:logistic'},
-                            num_boost_round=kRounds, dtrain=data)
-    booster.save_model(booster_bin('logit'))
-    booster.save_model(booster_json('logit'))
+    for objective, name in [('binary:logistic', 'logit'), ('binary:logitraw', 'logitraw')]:
+        data = xgboost.DMatrix(X, label=y, weight=w)
+        booster = xgboost.train({'tree_method': 'hist',
+                                 'num_parallel_tree': kForests,
+                                 'max_depth': kMaxDepth,
+                                 'objective': objective},
+                                num_boost_round=kRounds, dtrain=data)
+        booster.save_model(booster_bin(name))
+        booster.save_model(booster_json(name))

-    reg = xgboost.XGBClassifier(tree_method='hist',
-                                num_parallel_tree=kForests,
-                                max_depth=kMaxDepth,
-                                n_estimators=kRounds)
-    reg.fit(X, y, w)
-    reg.save_model(skl_bin('logit'))
-    reg.save_model(skl_json('logit'))
+        reg = xgboost.XGBClassifier(tree_method='hist',
+                                    num_parallel_tree=kForests,
+                                    max_depth=kMaxDepth,
+                                    n_estimators=kRounds,
+                                    objective=objective)
+        reg.fit(X, y, w)
+        reg.save_model(skl_bin(name))
+        reg.save_model(skl_json(name))


 def generate_classification_model():
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@ -57,6 +57,25 @@ class TestBasic:
            # assert they are the same
            assert np.sum(np.abs(preds2 - preds)) == 0

+    def test_metric_config(self):
+        # Make sure that the metric configuration happens in booster so the
+        # string `['error', 'auc']` doesn't get passed down to core.
+        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
+                 'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']}
+        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+        num_round = 2
+        booster = xgb.train(param, dtrain, num_round, watchlist)
+        predt_0 = booster.predict(dtrain)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, 'model.json')
+            booster.save_model(path)
+
+            booster = xgb.Booster(params=param, model_file=path)
+            predt_1 = booster.predict(dtrain)
+            np.testing.assert_allclose(predt_0, predt_1)
+
    def test_record_results(self):
        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
@ -124,8 +143,8 @@ class TestBasic:

        dump2 = bst.get_dump(with_stats=True)
        assert dump2[0].count('\n') == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
-        assert (dump2[0].find('\n') > dump1[0].find('\n'),
-                'Expected more info when with_stats=True is given.')
+        msg = 'Expected more info when with_stats=True is given.'
+        assert dump2[0].find('\n') > dump1[0].find('\n'), msg

        dump3 = bst.get_dump(dump_format="json")
        dump3j = json.loads(dump3[0])
@ -248,13 +267,11 @@ class TestBasicPathLike:
        assert binary_path.exists()
        Path.unlink(binary_path)

-
    def test_Booster_init_invalid_path(self):
        """An invalid model_file path should raise XGBoostError."""
        with pytest.raises(xgb.core.XGBoostError):
            xgb.Booster(model_file=Path("invalidpath"))

-
    def test_Booster_save_and_load(self):
        """Saving and loading model files from paths."""
        save_path = Path("saveload.model")
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@ -22,6 +22,30 @@ class TestCallbacks:
        cls.X_valid = X[split:, ...]
        cls.y_valid = y[split:, ...]

+    def run_evaluation_monitor(self, D_train, D_valid, rounds, verbose_eval):
+        evals_result = {}
+        with tm.captured_output() as (out, err):
+            xgb.train({'objective': 'binary:logistic',
+                       'eval_metric': 'error'}, D_train,
+                      evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+                      num_boost_round=rounds,
+                      evals_result=evals_result,
+                      verbose_eval=verbose_eval)
+            output: str = out.getvalue().strip()
+
+        if int(verbose_eval) == 1:
+            # Should print each iteration info
+            assert len(output.split('\n')) == rounds
+        elif int(verbose_eval) > rounds:
+            # Should print first and latest iteration info
+            assert len(output.split('\n')) == 2
+        else:
+            # Should print info by each period additionaly to first and latest iteration
+            num_periods = rounds // int(verbose_eval)
+            # Extra information is required for latest iteration
+            is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1) 
+            assert len(output.split('\n')) == 1 + num_periods + int(is_extra_info_required)
+
    def test_evaluation_monitor(self):
        D_train = xgb.DMatrix(self.X_train, self.y_train)
        D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
@ -36,23 +60,10 @@ class TestCallbacks:
        assert len(evals_result['Train']['error']) == rounds
        assert len(evals_result['Valid']['error']) == rounds

-        with tm.captured_output() as (out, err):
-            xgb.train({'objective': 'binary:logistic',
-                       'eval_metric': 'error'}, D_train,
-                      evals=[(D_train, 'Train'), (D_valid, 'Valid')],
-                      num_boost_round=rounds,
-                      evals_result=evals_result,
-                      verbose_eval=2)
-            output: str = out.getvalue().strip()
-
-        pos = 0
-        msg = 'Train-error'
-        for i in range(rounds // 2):
-            pos = output.find('Train-error', pos)
-            assert pos != -1
-            pos += len(msg)
-
-        assert output.find('Train-error', pos) == -1
+        self.run_evaluation_monitor(D_train, D_valid, rounds, True)
+        self.run_evaluation_monitor(D_train, D_valid, rounds, 2)
+        self.run_evaluation_monitor(D_train, D_valid, rounds, 4)
+        self.run_evaluation_monitor(D_train, D_valid, rounds, rounds + 1)        

    def test_early_stopping(self):
        D_train = xgb.DMatrix(self.X_train, self.y_train)
@ -142,7 +153,7 @@ class TestCallbacks:
                eval_metric=tm.eval_error_metric, callbacks=[early_stop])
        booster = cls.get_booster()
        dump = booster.get_dump(dump_format='json')
-        assert len(dump) == booster.best_iteration
+        assert len(dump) == booster.best_iteration + 1

        early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
                                                save_best=True)
--- a/tests/python/test_cli.py
+++ b/tests/python/test_cli.py
@ -22,6 +22,7 @@ model_in = {model_in}
 model_out = {model_out}
 test_path = {test_path}
 name_pred = {name_pred}
+model_dir = {model_dir}

 num_round = 10
 data = {data_path}
@ -59,7 +60,8 @@ eval[test] = {data_path}
                                              model_in='NULL',
                                              model_out=model_out_cli,
                                              test_path='NULL',
-                                              name_pred='NULL')
+                                              name_pred='NULL',
+                                              model_dir='NULL')
            with open(config_path, 'w') as fd:
                fd.write(train_conf)

@ -73,7 +75,8 @@ eval[test] = {data_path}
                                                model_in=model_out_cli,
                                                model_out='NULL',
                                                test_path=data_path,
-                                                name_pred=predict_out)
+                                                name_pred=predict_out,
+                                                model_dir='NULL')
            with open(config_path, 'w') as fd:
                fd.write(predict_conf)

@ -145,7 +148,8 @@ eval[test] = {data_path}
                                              model_in='NULL',
                                              model_out=model_out_cli,
                                              test_path='NULL',
-                                              name_pred='NULL')
+                                              name_pred='NULL',
+                                              model_dir='NULL')
            with open(config_path, 'w') as fd:
                fd.write(train_conf)

@ -154,3 +158,28 @@ eval[test] = {data_path}
                model = json.load(fd)

            assert model['learner']['gradient_booster']['name'] == 'gbtree'
+
+    def test_cli_save_model(self):
+        '''Test save on final round'''
+        exe = self.get_exe()
+        data_path = "{root}/demo/data/agaricus.txt.train?format=libsvm".format(
+            root=self.PROJECT_ROOT)
+        seed = 1994
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            model_out_cli = os.path.join(tmpdir, '0010.model')
+            config_path = os.path.join(tmpdir, 'test_load_cli_model.conf')
+
+            train_conf = self.template.format(data_path=data_path,
+                                              seed=seed,
+                                              task='train',
+                                              model_in='NULL',
+                                              model_out='NULL',
+                                              test_path='NULL',
+                                              name_pred='NULL',
+                                              model_dir=tmpdir)
+            with open(config_path, 'w') as fd:
+                fd.write(train_conf)
+
+            subprocess.run([exe, config_path])
+            assert os.path.exists(model_out_cli)
--- a/tests/python/test_model_compatibility.py
+++ b/tests/python/test_model_compatibility.py
@ -24,6 +24,10 @@ def run_booster_check(booster, name):
            config['learner']['learner_model_param']['base_score']) == 0.5
        assert config['learner']['learner_train_param'][
            'objective'] == 'multi:softmax'
+    elif name.find('logitraw') != -1:
+        assert len(booster.get_dump()) == gm.kForests * gm.kRounds
+        assert config['learner']['learner_model_param']['num_class'] == str(0)
+        assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
    elif name.find('logit') != -1:
        assert len(booster.get_dump()) == gm.kForests * gm.kRounds
        assert config['learner']['learner_model_param']['num_class'] == str(0)
@ -77,6 +81,13 @@ def run_scikit_model_check(name, path):
        assert config['learner']['learner_train_param'][
            'objective'] == 'rank:ndcg'
        run_model_param_check(config)
+    elif name.find('logitraw') != -1:
+        logit = xgboost.XGBClassifier()
+        logit.load_model(path)
+        assert (len(logit.get_booster().get_dump()) ==
+                gm.kRounds * gm.kForests)
+        config = json.loads(logit.get_booster().save_config())
+        assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
    elif name.find('logit') != -1:
        logit = xgboost.XGBClassifier()
        logit.load_model(path)
--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@ -33,9 +33,15 @@ def run_predict_leaf(predictor):
    y = rng.randint(low=0, high=classes, size=rows)
    m = xgb.DMatrix(X, y)
    booster = xgb.train(
-        {'num_parallel_tree': num_parallel_tree, 'num_class': classes,
-         'predictor': predictor, 'tree_method': 'hist'}, m,
-        num_boost_round=num_boost_round)
+        {
+            "num_parallel_tree": num_parallel_tree,
+            "num_class": classes,
+            "predictor": predictor,
+            "tree_method": "hist",
+        },
+        m,
+        num_boost_round=num_boost_round,
+    )

    empty = xgb.DMatrix(np.ones(shape=(0, cols)))
    empty_leaf = booster.predict(empty, pred_leaf=True)
@ -52,12 +58,19 @@ def run_predict_leaf(predictor):
            end = classes * num_parallel_tree * (j + 1)
            layer = row[start: end]
            for c in range(classes):
-                tree_group = layer[c * num_parallel_tree:
-                                   (c+1) * num_parallel_tree]
+                tree_group = layer[c * num_parallel_tree: (c + 1) * num_parallel_tree]
                assert tree_group.shape[0] == num_parallel_tree
                # no subsampling so tree in same forest should output same
                # leaf.
                assert np.all(tree_group == tree_group[0])
+
+    ntree_limit = 2
+    sliced = booster.predict(
+        m, pred_leaf=True, ntree_limit=num_parallel_tree * ntree_limit
+    )
+    first = sliced[0, ...]
+
+    assert first.shape[0] == classes * num_parallel_tree * ntree_limit
    return leaf


--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@ -8,7 +8,8 @@ import asyncio
 from sklearn.datasets import make_classification
 import os
 import subprocess
-from hypothesis import given, settings, note
+import hypothesis
+from hypothesis import given, settings, note, HealthCheck
 from test_updaters import hist_parameter_strategy, exact_parameter_strategy

 if sys.platform.startswith("win"):
@ -17,6 +18,12 @@ if tm.no_dask()['condition']:
    pytest.skip(msg=tm.no_dask()['reason'], allow_module_level=True)


+if hasattr(HealthCheck, 'function_scoped_fixture'):
+    suppress = [HealthCheck.function_scoped_fixture]
+else:
+    suppress = hypothesis.utils.conventions.not_set
+
+
 try:
    from distributed import LocalCluster, Client, get_client
    from distributed.utils_test import client, loop, cluster_fixture
@ -668,14 +675,14 @@ class TestWithDask:

    @given(params=hist_parameter_strategy,
           dataset=tm.dataset_strategy)
-    @settings(deadline=None)
+    @settings(deadline=None, suppress_health_check=suppress)
    def test_hist(self, params, dataset, client):
        num_rounds = 30
        self.run_updater_test(client, params, num_rounds, dataset, 'hist')

    @given(params=exact_parameter_strategy,
           dataset=tm.dataset_strategy)
-    @settings(deadline=None)
+    @settings(deadline=None, suppress_health_check=suppress)
    def test_approx(self, client, params, dataset):
        num_rounds = 30
        self.run_updater_test(client, params, num_rounds, dataset, 'approx')
@ -795,7 +802,6 @@ class TestDaskCallbacks:
                merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')])
                assert len(merged) == 2

-
    def test_data_initialization(self):
        '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
        generate unnecessary copies of data.
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@ -78,6 +78,34 @@ def test_multiclass_classification():
        check_pred(preds4, labels, output_margin=False)


+def test_best_ntree_limit():
+    from sklearn.datasets import load_iris
+
+    X, y = load_iris(return_X_y=True)
+
+    def train(booster, forest):
+        rounds = 4
+        cls = xgb.XGBClassifier(
+            n_estimators=rounds, num_parallel_tree=forest, booster=booster
+        ).fit(
+            X, y, eval_set=[(X, y)], early_stopping_rounds=3
+        )
+
+        if forest:
+            assert cls.best_ntree_limit == rounds * forest
+        else:
+            assert cls.best_ntree_limit == 0
+
+        # best_ntree_limit is used by default, assert that under gblinear it's
+        # automatically ignored due to being 0.
+        cls.predict(X)
+
+    num_parallel_tree = 4
+    train('gbtree', num_parallel_tree)
+    train('dart', num_parallel_tree)
+    train('gblinear', None)
+
+
 def test_ranking():
    # generate random data
    x_train = np.random.rand(1000, 10)
@ -94,6 +122,8 @@ def test_ranking():
    model = xgb.sklearn.XGBRanker(**params)
    model.fit(x_train, y_train, group=train_group,
              eval_set=[(x_valid, y_valid)], eval_group=[valid_group])
+    assert model.evals_result()
+
    pred = model.predict(x_test)

    train_data = xgb.DMatrix(x_train, y_train)
@ -399,6 +429,21 @@ def test_classification_with_custom_objective():
        X, y
    )

+    cls = xgb.XGBClassifier(use_label_encoder=False, n_estimators=1)
+    cls.fit(X, y)
+
+    is_called = [False]
+
+    def wrapped(y, p):
+        is_called[0] = True
+        return logregobj(y, p)
+
+    cls.set_params(objective=wrapped)
+    cls.predict(X)              # no throw
+    cls.fit(X, y)
+
+    assert is_called[0]
+

 def test_sklearn_api():
    from sklearn.datasets import load_iris
--- a/tests/travis/run_test.sh
+++ b/tests/travis/run_test.sh
@ -34,6 +34,10 @@ if [ ${TASK} == "python_test" ]; then
      tests/ci_build/ci_build.sh aarch64 docker bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
      TAG=manylinux2014_aarch64
      tests/ci_build/ci_build.sh aarch64 docker python tests/ci_build/rename_whl.py python-package/dist/*.whl ${TRAVIS_COMMIT} ${TAG}
+      tests/ci_build/ci_build.sh aarch64 docker auditwheel repair --plat ${TAG} python-package/dist/*.whl
+      mv -v wheelhouse/*.whl python-package/dist/
+      # Make sure that libgomp.so is vendored in the wheel
+      unzip -l python-package/dist/*.whl | grep libgomp  || exit -1
    else
      rm -rf build
      mkdir build && cd build
Author	SHA1	Message	Date
Hyunsu Cho	963a17b771	[CI] Upload Doxygen to correct destination	2021-04-13 15:09:53 -07:00
Jiaming Yuan	000292ce6d	Bump release version to 1.3.3. (#6624 )	2021-01-20 19:23:31 +08:00
Jiaming Yuan	d3ec116322	Revert ntree limit fix (#6616 ) (#6622 ) The old (before fix) best_ntree_limit ignores the num_class parameters, which is incorrect. In before we workarounded it in c++ layer to avoid possible breaking changes on other language bindings. But the Python interpretation stayed incorrect. The PR fixed that in Python to consider num_class, but didn't remove the old workaround, so tree calculation in predictor is incorrect, see PredictBatch in CPUPredictor.	2021-01-20 04:20:07 +08:00
Jiaming Yuan	a018028471	Remove type check for solaris. (#6606 )	2021-01-15 18:20:39 +08:00
fis	3e343159ef	Release patch release 1.3.2	2021-01-13 17:35:00 +08:00
Jiaming Yuan	99e802f2ff	Remove duplicated DMatrix. (#6592 ) (#6599 )	2021-01-13 04:44:06 +08:00
Jiaming Yuan	6a29afb480	Fix evaluation result for XGBRanker. (#6594 ) (#6600 ) * Remove duplicated code, which fixes typo `evals_result` -> `evals_result_`.	2021-01-13 04:42:43 +08:00
Jiaming Yuan	8e321adac8	Support Solaris. (#6578 ) (#6588 ) * Add system header. * Remove use of TR1 on Solaris Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>	2021-01-11 02:31:29 +08:00
Jiaming Yuan	d0ec65520a	[backport] Fix `best_ntree_limit` for dart and gblinear. (#6579 ) (#6587 ) * [backport] Fix `best_ntree_limit` for dart and gblinear. (#6579) * Backport num group test fix.	2021-01-11 01:46:05 +08:00
Jiaming Yuan	7aec915dcd	[Backport] Rename `data` to `X` in `predict_proba`. (#6555 ) (#6586 ) * [Breaking] Rename `data` to `X` in `predict_proba`. (#6555) New Scikit-Learn version uses keyword argument, and `X` is the predefined keyword. * Use pip to install latest Python graphviz on Windows CI. * Suppress health check.	2021-01-10 16:05:17 +08:00
Philip Hyunsu Cho	a78d0d4110	Release patch release 1.3.1 (#6543 )	2020-12-21 23:22:32 -08:00
Jiaming Yuan	76c361431f	Remove cupy.array_equal, since it's not compatible with cuPy 7.8 (#6528 ) (#6535 ) Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>	2020-12-20 15:11:50 +08:00
Jiaming Yuan	d95d02132a	Fix handling of print period in EvaluationMonitor (#6499 ) (#6534 ) Co-authored-by: Kirill Shvets <kirill.shvets@intel.com> Co-authored-by: ShvetsKS <33296480+ShvetsKS@users.noreply.github.com> Co-authored-by: Kirill Shvets <kirill.shvets@intel.com>	2020-12-20 15:07:42 +08:00
Jiaming Yuan	7109c6c1f2	[backport] Move metric configuration into booster. (#6504 ) (#6533 )	2020-12-20 10:36:32 +08:00
Jiaming Yuan	bce7ca313c	[backport] Fix `save_best`. (#6523 )	2020-12-18 20:00:29 +08:00
Jiaming Yuan	8be2cd8c91	Enable loading model from <1.0.0 trained with objective='binary:logitraw' (#6517 ) (#6524 ) * Enable loading model from <1.0.0 trained with objective='binary:logitraw' * Add binary:logitraw in model compatibility testing suite * Feedback from @trivialfis: Override ProbToMargin() for LogisticRaw Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com> Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>	2020-12-18 04:10:09 +08:00
Philip Hyunsu Cho	c5f0cdbc72	Hot fix for libgomp vendoring (#6482 ) * Hot fix for libgomp vendoring * Set post0 in setup.py	2020-12-09 10:04:45 -08:00
Jiaming Yuan	1bf3899983	Fix dask ip resolution. (#6475 ) This adopts the solution used in dask/dask-xgboost#40 which employs the get_host_ip from dmlc-core tracker.	2020-12-07 16:38:16 -08:00
Jiaming Yuan	c39f6b25f0	Fix filtering callable objects in skl xgb param. (#6466 ) Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>	2020-12-07 16:38:16 -08:00
Philip Hyunsu Cho	2b3e301543	[CI] Fix CentOS 6 Docker images (#6467 )	2020-12-07 16:38:16 -08:00
Hyunsu Cho	10d3419fa6	Release 1.3.0	2020-12-03 21:35:09 -08:00
Philip Hyunsu Cho	b273e5bd4c	Vendor libgomp in the manylinux Python wheel (#6461 ) * Vendor libgomp in the manylinux2014_aarch64 wheel * Use vault repo, since CentOS 6 has reached End-of-Life on Nov 30 * Vendor libgomp in the manylinux2010_x86_64 wheel * Run verification step inside the container	2020-12-03 21:29:40 -08:00
Philip Hyunsu Cho	3a83fcb0eb	Enforce row-major order in cuPy array (#6459 )	2020-12-03 21:29:24 -08:00
hzy001	3efc4ea0d1	Fix broken links. (#6455 ) Co-authored-by: Hao Ziyu <haoziyu@qiyi.com> Co-authored-by: fis <jm.yuan@outlook.com>	2020-12-03 21:29:03 -08:00
Jiaming Yuan	a2c778e2d1	Fix period in evaluation monitor. (#6441 )	2020-12-03 21:28:45 -08:00
Jiaming Yuan	8a0db293c5	Fix CLI ranking demo. (#6439 ) Save model at final round.	2020-12-03 21:28:28 -08:00
Honza Sterba	028ec5f028	Optionaly fail when gpu_id is set to invalid value (#6342 )	2020-12-03 21:27:58 -08:00
ShvetsKS	38c80bcec4	Thread local memory allocation for BuildHist (#6358 ) * thread mem locality * fix apply * cleanup * fix lint * fix tests * simple try * fix * fix * apply comments * fix comments * fix * apply simple comment Co-authored-by: ShvetsKS <kirill.shvets@intel.com>	2020-12-03 21:27:31 -08:00
Philip Hyunsu Cho	16ff63905d	[CI] Upgrade cuDF and RMM to 0.17 nightlies (#6434 )	2020-12-03 21:27:01 -08:00
Philip Hyunsu Cho	a9b09919f9	[R] Fix R package installation via CMake (#6423 )	2020-12-03 21:26:29 -08:00
Hyunsu Cho	f3b060401a	Release 1.3.0 RC1	2020-11-21 11:36:08 -08:00