Release patch release 1.3.1 (#6543 )

Remove cupy.array_equal, since it's not compatible with cuPy 7.8 (#6528 ) (#6535 )
Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
2020-12-21 23:22:32 -08:00 · 2020-12-20 15:11:50 +08:00 · 2020-12-20 15:07:42 +08:00 · 2020-12-20 10:36:32 +08:00 · 2020-12-18 20:00:29 +08:00 · 2020-12-18 04:10:09 +08:00
56 changed files with 428 additions and 172 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -52,6 +52,7 @@ addons:
  apt:
    packages:
      - snapd
+      - unzip

 before_install:
  - source tests/travis/travis_setup_env.sh
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.13)
-project(xgboost LANGUAGES CXX C VERSION 1.3.0)
+project(xgboost LANGUAGES CXX C VERSION 1.3.1)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 cmake_policy(SET CMP0022 NEW)
--- a/11
+++ b/11
@@ -190,11 +190,20 @@ def BuildCUDA(args) {
    if (env.BRANCH_NAME != 'master' && !(env.BRANCH_NAME.startsWith('release'))) {
      arch_flag = "-DGPU_COMPUTE_VER=75"
    }
+    def wheel_tag = "manylinux2010_x86_64"
    sh """
    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh -DUSE_CUDA=ON -DUSE_NCCL=ON -DOPEN_MP:BOOL=ON -DHIDE_CXX_SYMBOLS=ON ${arch_flag}
    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
-    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} manylinux2010_x86_64
+    ${dockerRun} ${container_type} ${docker_binary} ${docker_args} python tests/ci_build/rename_whl.py python-package/dist/*.whl ${commit_id} ${wheel_tag}
    """
+    if (args.cuda_version == ref_cuda_ver) {
+      sh """
+      ${dockerRun} auditwheel_x86_64 ${docker_binary} auditwheel repair --plat ${wheel_tag} python-package/dist/*.whl
+      mv -v wheelhouse/*.whl python-package/dist/
+      # Make sure that libgomp.so is vendored in the wheel
+      ${dockerRun} auditwheel_x86_64 ${docker_binary} bash -c "unzip -l python-package/dist/*.whl | grep libgomp  || exit -1"
+      """
+    }
    echo 'Stashing Python wheel...'
    stash name: "xgboost_whl_cuda${args.cuda_version}", includes: 'python-package/dist/*.whl'
    if (args.cuda_version == ref_cuda_ver && (env.BRANCH_NAME == 'master' || env.BRANCH_NAME.startsWith('release'))) {
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 1.3.0.1
+Version: 1.3.1.1
 Date: 2020-08-28
 Authors@R: c(
  person("Tianqi", "Chen", role = c("aut"),
--- a/R-package/tests/helper_scripts/generate_models.R
+++ b/R-package/tests/helper_scripts/generate_models.R
@@ -2,7 +2,6 @@
 # of saved model files from XGBoost version 0.90 and 1.0.x.
 library(xgboost)
 library(Matrix)
-source('./generate_models_params.R')

 set.seed(0)
 metadata <- list(
@@ -53,11 +52,16 @@ generate_logistic_model <- function () {
  y <- sample(0:1, size = metadata$kRows, replace = TRUE)
  stopifnot(max(y) == 1, min(y) == 0)

+  objective <- c('binary:logistic', 'binary:logitraw')
+  name <- c('logit', 'logitraw')
+
+  for (i in seq_len(length(objective))) {
    data <- xgb.DMatrix(X, label = y, weight = w)
    params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests,
-                 max_depth = metadata$kMaxDepth, objective = 'binary:logistic')
+                   max_depth = metadata$kMaxDepth, objective = objective[i])
    booster <- xgb.train(params, data, nrounds = metadata$kRounds)
-  save_booster(booster, 'logit')
+    save_booster(booster, name[i])
+  }
 }

 generate_classification_model <- function () {
--- a/R-package/tests/testthat/test_model_compatibility.R
+++ b/R-package/tests/testthat/test_model_compatibility.R
@@ -39,6 +39,10 @@ run_booster_check <- function (booster, name) {
    testthat::expect_equal(config$learner$learner_train_param$objective, 'multi:softmax')
    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class),
                           metadata$kClasses)
+  } else if (name == 'logitraw') {
+    testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
+    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)
+    testthat::expect_equal(config$learner$learner_train_param$objective, 'binary:logitraw')
  } else if (name == 'logit') {
    testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)
--- a/cmake/Python_version.in
+++ b/cmake/Python_version.in
@@ -1 +1 @@
-@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-SNAPSHOT
+@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@
--- a/cmake/RPackageInstallTargetSetup.cmake
+++ b/cmake/RPackageInstallTargetSetup.cmake
@@ -6,8 +6,8 @@ function(setup_rpackage_install_target rlib_target build_dir)
  install(
    DIRECTORY "${xgboost_SOURCE_DIR}/R-package"
    DESTINATION "${build_dir}"
-    REGEX "src/*" EXCLUDE
-    REGEX "R-package/configure" EXCLUDE
+    PATTERN "src/*" EXCLUDE
+    PATTERN "R-package/configure" EXCLUDE
  )
  install(TARGETS ${rlib_target}
    LIBRARY DESTINATION "${build_dir}/R-package/src/"
--- a/demo/CLI/binary_classification/README.md
+++ b/demo/CLI/binary_classification/README.md
@@ -62,7 +62,7 @@ test:data = "agaricus.txt.test"
 We use the tree booster and logistic regression objective in our setting. This indicates that we accomplish our task using classic gradient boosting regression tree(GBRT), which is a promising method for binary classification.

 The parameters shown in the example gives the most common ones that are needed to use xgboost.
-If you are interested in more parameter settings, the complete parameter settings and detailed descriptions are [here](../../doc/parameter.rst). Besides putting the parameters in the configuration file, we can set them by passing them as arguments as below:
+If you are interested in more parameter settings, the complete parameter settings and detailed descriptions are [here](https://xgboost.readthedocs.io/en/stable/parameter.html). Besides putting the parameters in the configuration file, we can set them by passing them as arguments as below:

 ```
 ../../xgboost mushroom.conf max_depth=6
@@ -161,4 +161,3 @@ Eg. ```nthread=10```

 Set nthread to be the number of your real cpu (On Unix, this can be found using ```lscpu```)
 Some systems will have ```Thread(s) per core = 2```, for example, a 4 core cpu with 8 threads, in such case set ```nthread=4``` and not 8.
-
--- a/demo/CLI/regression/README.md
+++ b/demo/CLI/regression/README.md
@@ -14,4 +14,3 @@ objective = reg:squarederror
 ```

 The input format is same as binary classification, except that the label is now the target regression values. We use linear regression here, if we want use objective = reg:logistic logistic regression, the label needed to be pre-scaled into [0,1].
-
--- a/demo/README.md
+++ b/demo/README.md
@@ -60,9 +60,9 @@ This is a list of short codes introducing different functionalities of xgboost p
 Most of examples in this section are based on CLI or python version.
 However, the parameter settings can be applied to all versions

- [Binary classification](binary_classification)
+- [Binary classification](CLI/binary_classification)
 - [Multiclass classification](multiclass_classification)
- [Regression](regression)
+- [Regression](CLI/regression)
 - [Learning to Rank](rank)

 ### Benchmarks
--- a/demo/rank/mq2008.conf
+++ b/demo/rank/mq2008.conf
@@ -24,5 +24,3 @@ data = "mq2008.train"
 eval[test] = "mq2008.vali"
 # The path of test data
 test:data = "mq2008.test"
-
-
--- a/doc/tutorials/model.rst
+++ b/doc/tutorials/model.rst
@@ -2,7 +2,6 @@
 Introduction to Boosted Trees
 #############################
 XGBoost stands for "Extreme Gradient Boosting", where the term "Gradient Boosting" originates from the paper *Greedy Function Approximation: A Gradient Boosting Machine*, by Friedman.
-This is a tutorial on gradient boosted trees, and most of the content is based on `these slides <http://homes.cs.washington.edu/~tqchen/pdf/BoostedTree.pdf>`_ by Tianqi Chen, the original author of XGBoost.

 The **gradient boosted trees** has been around for a while, and there are a lot of materials on the topic.
 This tutorial will explain boosted trees in a self-contained and principled way using the elements of supervised learning.
--- a/include/xgboost/generic_parameters.h
+++ b/include/xgboost/generic_parameters.h
@@ -11,6 +11,7 @@
 #include <string>

 namespace xgboost {
+
 struct GenericParameter : public XGBoostParameter<GenericParameter> {
  // Constant representing the device ID of CPU.
  static int32_t constexpr kCpuId = -1;
@@ -26,6 +27,8 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
  int nthread;
  // primary device, -1 means no gpu.
  int gpu_id;
+  // fail when gpu_id is invalid
+  bool fail_on_invalid_gpu_id {false};
  // gpu page size in external memory mode, 0 means using the default.
  size_t gpu_page_size;
  bool enable_experimental_json_serialization {true};
@@ -64,6 +67,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
        .set_default(-1)
        .set_lower_bound(-1)
        .describe("The primary GPU device ordinal.");
+    DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
+        .set_default(false)
+        .describe("Fail with error when gpu_id is invalid.");
    DMLC_DECLARE_FIELD(gpu_page_size)
        .set_default(0)
        .set_lower_bound(0)
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -6,6 +6,6 @@

 #define XGBOOST_VER_MAJOR 1
 #define XGBOOST_VER_MINOR 3
-#define XGBOOST_VER_PATCH 0
+#define XGBOOST_VER_PATCH 1

 #endif  // XGBOOST_VERSION_CONFIG_H_
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@

    <groupId>ml.dmlc</groupId>
    <artifactId>xgboost-jvm_2.12</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.3.1</version>
    <packaging>pom</packaging>
    <name>XGBoost JVM Package</name>
    <description>JVM Package for XGBoost</description>
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.1</version>
    </parent>
    <artifactId>xgboost4j-example_2.12</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.3.1</version>
    <packaging>jar</packaging>
    <build>
        <plugins>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
-            <version>1.3.0-SNAPSHOT</version>
+            <version>1.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
-            <version>1.3.0-SNAPSHOT</version>
+            <version>1.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.1</version>
    </parent>
    <artifactId>xgboost4j-flink_2.12</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.3.1</version>
    <build>
        <plugins>
            <plugin>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.3.0-SNAPSHOT</version>
+            <version>1.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.1</version>
    </parent>
    <artifactId>xgboost4j-gpu_2.12</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.3.1</version>
    <packaging>jar</packaging>

    <dependencies>
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.1</version>
    </parent>
    <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
-            <version>1.3.0-SNAPSHOT</version>
+            <version>1.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.1</version>
    </parent>
    <artifactId>xgboost4j-spark_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.3.0-SNAPSHOT</version>
+            <version>1.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0-SNAPSHOT</version>
+        <version>1.3.1</version>
    </parent>
    <artifactId>xgboost4j_2.12</artifactId>
-    <version>1.3.0-SNAPSHOT</version>
+    <version>1.3.1</version>
    <packaging>jar</packaging>

    <dependencies>
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-1.3.0-SNAPSHOT
+1.3.1
--- a/python-package/xgboost/callback.py
+++ b/python-package/xgboost/callback.py
@@ -456,6 +456,7 @@ class LearningRateScheduler(TrainingCallback):

    def after_iteration(self, model, epoch, evals_log):
        model.set_param('learning_rate', self.learning_rates(epoch))
+        return False


 # pylint: disable=too-many-instance-attributes
@@ -565,7 +566,7 @@ class EarlyStopping(TrainingCallback):
    def after_training(self, model: Booster):
        try:
            if self.save_best:
-                model = model[: int(model.attr('best_iteration'))]
+                model = model[: int(model.attr('best_iteration')) + 1]
        except XGBoostError as e:
            raise XGBoostError('`save_best` is not applicable to current booster') from e
        return model
@@ -621,7 +622,7 @@ class EvaluationMonitor(TrainingCallback):
                    msg += self._fmt_metric(data, metric_name, score, stdv)
            msg += '\n'

-            if (epoch % self.period) != 0:
+            if (epoch % self.period) == 0 or self.period == 1:
                rabit.tracker_print(msg)
                self._latest = None
            else:
@@ -677,6 +678,7 @@ class TrainingCheckPoint(TrainingCallback):
                else:
                    model.save_model(path)
        self._epoch += 1
+        return False


 class LegacyCallbacks:
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -1,11 +1,12 @@
 # coding: utf-8
 # pylint: disable=too-many-arguments, too-many-branches, invalid-name
-# pylint: disable=too-many-lines, too-many-locals
+# pylint: disable=too-many-lines, too-many-locals, no-self-use
 """Core XGBoost Library."""
 import collections
 # pylint: disable=no-name-in-module,import-error
 from collections.abc import Mapping
 # pylint: enable=no-name-in-module,import-error
+from typing import Dict, Union, List
 import ctypes
 import os
 import re
@@ -1012,6 +1013,7 @@ class Booster(object):
        _check_call(_LIB.XGBoosterCreate(dmats, c_bst_ulong(len(cache)),
                                         ctypes.byref(self.handle)))
        params = params or {}
+        params = self._configure_metrics(params.copy())
        if isinstance(params, list):
            params.append(('validate_parameters', True))
        else:
@@ -1041,6 +1043,17 @@ class Booster(object):
        else:
            raise TypeError('Unknown type:', model_file)

+    def _configure_metrics(self, params: Union[Dict, List]) -> Union[Dict, List]:
+        if isinstance(params, dict) and 'eval_metric' in params \
+           and isinstance(params['eval_metric'], list):
+            params = dict((k, v) for k, v in params.items())
+            eval_metrics = params['eval_metric']
+            params.pop("eval_metric", None)
+            params = list(params.items())
+            for eval_metric in eval_metrics:
+                params += [('eval_metric', eval_metric)]
+        return params
+
    def __del__(self):
        if hasattr(self, 'handle') and self.handle is not None:
            _check_call(_LIB.XGBoosterFree(self.handle))
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@@ -33,7 +33,7 @@ from .compat import lazy_isinstance
 from .core import DMatrix, DeviceQuantileDMatrix, Booster, _expect, DataIter
 from .core import _deprecate_positional_args
 from .training import train as worker_train
-from .tracker import RabitTracker
+from .tracker import RabitTracker, get_host_ip
 from .sklearn import XGBModel, XGBRegressorBase, XGBClassifierBase
 from .sklearn import xgboost_model_doc

@@ -70,8 +70,7 @@ LOGGER = logging.getLogger('[xgboost.dask]')
 def _start_tracker(n_workers):
    """Start Rabit tracker """
    env = {'DMLC_NUM_WORKER': n_workers}
-    import socket
-    host = socket.gethostbyname(socket.gethostname())
+    host = get_host_ip('auto')
    rabit_context = RabitTracker(hostIP=host, nslave=n_workers)
    env.update(rabit_context.slave_envs())

--- a/python-package/xgboost/data.py
+++ b/python-package/xgboost/data.py
@@ -424,6 +424,7 @@ def _transform_cupy_array(data):
            data, '__array__'):
        import cupy             # pylint: disable=import-error
        data = cupy.array(data, copy=False)
+    data = data.astype(dtype=data.dtype, order='C', copy=False)
    return data


--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -398,7 +398,7 @@ class XGBModel(XGBModelBase):
            'importance_type', 'kwargs', 'missing', 'n_estimators', 'use_label_encoder'}
        filtered = dict()
        for k, v in params.items():
-            if k not in wrapper_specific:
+            if k not in wrapper_specific and not callable(v):
                filtered[k] = v
        return filtered

@@ -841,14 +841,18 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
            self.classes_ = cp.unique(y.values)
            self.n_classes_ = len(self.classes_)
            can_use_label_encoder = False
-            if not cp.array_equal(self.classes_, cp.arange(self.n_classes_)):
+            expected_classes = cp.arange(self.n_classes_)
+            if (self.classes_.shape != expected_classes.shape or
+                    not (self.classes_ == expected_classes).all()):
                raise ValueError(label_encoding_check_error)
        elif _is_cupy_array(y):
            import cupy as cp  # pylint: disable=E0401
            self.classes_ = cp.unique(y)
            self.n_classes_ = len(self.classes_)
            can_use_label_encoder = False
-            if not cp.array_equal(self.classes_, cp.arange(self.n_classes_)):
+            expected_classes = cp.arange(self.n_classes_)
+            if (self.classes_.shape != expected_classes.shape or
+                    not (self.classes_ == expected_classes).all()):
                raise ValueError(label_encoding_check_error)
        else:
            self.classes_ = np.unique(y)
--- a/python-package/xgboost/tracker.py
+++ b/python-package/xgboost/tracker.py
@@ -52,6 +52,28 @@ def get_some_ip(host):
    return socket.getaddrinfo(host, None)[0][4][0]


+def get_host_ip(hostIP=None):
+    if hostIP is None or hostIP == 'auto':
+        hostIP = 'ip'
+
+    if hostIP == 'dns':
+        hostIP = socket.getfqdn()
+    elif hostIP == 'ip':
+        from socket import gaierror
+        try:
+            hostIP = socket.gethostbyname(socket.getfqdn())
+        except gaierror:
+            logging.warning(
+                'gethostbyname(socket.getfqdn()) failed... trying on hostname()')
+            hostIP = socket.gethostbyname(socket.gethostname())
+        if hostIP.startswith("127."):
+            s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+            # doesn't have to be reachable
+            s.connect(('10.255.255.255', 1))
+            hostIP = s.getsockname()[0]
+    return hostIP
+
+
 def get_family(addr):
    return socket.getaddrinfo(addr, None)[0][0]

--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -40,18 +40,6 @@ def _is_new_callback(callbacks):
               for c in callbacks) or not callbacks


-def _configure_metrics(params):
-    if isinstance(params, dict) and 'eval_metric' in params \
-       and isinstance(params['eval_metric'], list):
-        params = dict((k, v) for k, v in params.items())
-        eval_metrics = params['eval_metric']
-        params.pop("eval_metric", None)
-        params = list(params.items())
-        for eval_metric in eval_metrics:
-            params += [('eval_metric', eval_metric)]
-    return params
-
-
 def _train_internal(params, dtrain,
                    num_boost_round=10, evals=(),
                    obj=None, feval=None,
@@ -61,7 +49,6 @@ def _train_internal(params, dtrain,
    """internal training function"""
    callbacks = [] if callbacks is None else copy.copy(callbacks)
    evals = list(evals)
-    params = _configure_metrics(params.copy())

    bst = Booster(params, [dtrain] + [d[0] for d in evals])
    nboost = 0
--- a/src/cli_main.cc
+++ b/src/cli_main.cc
@@ -268,7 +268,7 @@ class CLI {
    // always save final round
    if ((param_.save_period == 0 ||
         param_.num_round % param_.save_period != 0) &&
-        param_.model_out != CLIParam::kNull && rabit::GetRank() == 0) {
+        rabit::GetRank() == 0) {
      std::ostringstream os;
      if (param_.model_out == CLIParam::kNull) {
        os << param_.model_dir << '/' << std::setfill('0') << std::setw(4)
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@@ -407,9 +407,14 @@ class HistCollection {
  // access histogram for i-th node
  GHistRowT operator[](bst_uint nid) const {
    constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
-    CHECK_NE(row_ptr_[nid], kMax);
-    GradientPairT* ptr =
-        const_cast<GradientPairT*>(dmlc::BeginPtr(data_) + row_ptr_[nid]);
+    const size_t id = row_ptr_[nid];
+    CHECK_NE(id, kMax);
+    GradientPairT* ptr = nullptr;
+    if (contiguous_allocation_) {
+      ptr = const_cast<GradientPairT*>(data_[0].data() + nbins_*id);
+    } else {
+      ptr = const_cast<GradientPairT*>(data_[id].data());
+    }
    return {ptr, nbins_};
  }

@@ -438,21 +443,37 @@ class HistCollection {
    }
    CHECK_EQ(row_ptr_[nid], kMax);

-    if (data_.size() < nbins_ * (nid + 1)) {
-      data_.resize(nbins_ * (nid + 1));
+    if (data_.size() < (nid + 1)) {
+      data_.resize((nid + 1));
    }

-    row_ptr_[nid] = nbins_ * n_nodes_added_;
+    row_ptr_[nid] = n_nodes_added_;
    n_nodes_added_++;
  }
+  // allocate thread local memory i-th node
+  void AllocateData(bst_uint nid) {
+    if (data_[row_ptr_[nid]].size() == 0) {
+      data_[row_ptr_[nid]].resize(nbins_, {0, 0});
+    }
+  }
+  // allocate common buffer contiguously for all nodes, need for single Allreduce call
+  void AllocateAllData() {
+    const size_t new_size = nbins_*data_.size();
+    contiguous_allocation_ = true;
+    if (data_[0].size() != new_size) {
+      data_[0].resize(new_size);
+    }
+  }

 private:
  /*! \brief number of all bins over all features */
  uint32_t nbins_ = 0;
  /*! \brief amount of active nodes in hist collection */
  uint32_t n_nodes_added_ = 0;
+  /*! \brief flag to identify contiguous memory allocation */
+  bool contiguous_allocation_ = false;

-  std::vector<GradientPairT> data_;
+  std::vector<std::vector<GradientPairT>> data_;

  /*! \brief row_ptr_[nid] locates bin for histogram of node nid */
  std::vector<size_t> row_ptr_;
@@ -481,7 +502,6 @@ class ParallelGHistBuilder {
             const std::vector<GHistRowT>& targeted_hists) {
    hist_buffer_.Init(nbins_);
    tid_nid_to_hist_.clear();
-    hist_memory_.clear();
    threads_to_nids_map_.clear();

    targeted_hists_ = targeted_hists;
@@ -504,8 +524,11 @@ class ParallelGHistBuilder {
    CHECK_LT(nid, nodes_);
    CHECK_LT(tid, nthreads_);

-    size_t idx = tid_nid_to_hist_.at({tid, nid});
-    GHistRowT hist = hist_memory_[idx];
+    int idx = tid_nid_to_hist_.at({tid, nid});
+    if (idx >= 0) {
+      hist_buffer_.AllocateData(idx);
+    }
+    GHistRowT hist = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];

    if (!hist_was_used_[tid * nodes_ + nid]) {
      InitilizeHistByZeroes(hist, 0, hist.size());
@@ -526,8 +549,9 @@ class ParallelGHistBuilder {
    for (size_t tid = 0; tid < nthreads_; ++tid) {
      if (hist_was_used_[tid * nodes_ + nid]) {
        is_updated = true;
-        const size_t idx = tid_nid_to_hist_.at({tid, nid});
-        GHistRowT src = hist_memory_[idx];
+
+        int idx = tid_nid_to_hist_.at({tid, nid});
+        GHistRowT src = idx == -1 ? targeted_hists_[nid] : hist_buffer_[idx];

        if (dst.data() != src.data()) {
          IncrementHist(dst, src, begin, end);
@@ -589,7 +613,6 @@ class ParallelGHistBuilder {
  }

  void MatchNodeNidPairToHist() {
-    size_t hist_total = 0;
    size_t hist_allocated_additionally = 0;

    for (size_t nid = 0; nid < nodes_; ++nid) {
@@ -597,15 +620,11 @@ class ParallelGHistBuilder {
      for (size_t tid = 0; tid < nthreads_; ++tid) {
        if (threads_to_nids_map_[tid * nodes_ + nid]) {
          if (first_hist) {
-            hist_memory_.push_back(targeted_hists_[nid]);
+            tid_nid_to_hist_[{tid, nid}] = -1;
            first_hist = false;
          } else {
-            hist_memory_.push_back(hist_buffer_[hist_allocated_additionally]);
-            hist_allocated_additionally++;
+            tid_nid_to_hist_[{tid, nid}] = hist_allocated_additionally++;
          }
-          // map pair {tid, nid} to index of allocated histogram from hist_memory_
-          tid_nid_to_hist_[{tid, nid}] = hist_total++;
-          CHECK_EQ(hist_total, hist_memory_.size());
        }
      }
    }
@@ -630,10 +649,11 @@ class ParallelGHistBuilder {
  std::vector<bool> threads_to_nids_map_;
  /*! \brief Contains histograms for final results  */
  std::vector<GHistRowT> targeted_hists_;
-  /*! \brief Allocated memory for histograms used for construction  */
-  std::vector<GHistRowT> hist_memory_;
-  /*! \brief map pair {tid, nid} to index of allocated histogram from hist_memory_  */
-  std::map<std::pair<size_t, size_t>, size_t> tid_nid_to_hist_;
+  /*!
+   * \brief map pair {tid, nid} to index of allocated histogram from hist_buffer_ and targeted_hists_,
+   * -1 is reserved for targeted_hists_
+   */
+  std::map<std::pair<size_t, size_t>, int> tid_nid_to_hist_;
 };

 /*!
--- a/src/common/row_set.h
+++ b/src/common/row_set.h
@@ -11,6 +11,7 @@
 #include <algorithm>
 #include <vector>
 #include <utility>
+#include <memory>

 namespace xgboost {
 namespace common {
@@ -150,24 +151,33 @@ class PartitionBuilder {
    }
  }

+  // allocate thread local memory, should be called for each specific task
+  void AllocateForTask(size_t id) {
+    if (mem_blocks_[id].get() == nullptr) {
+      BlockInfo* local_block_ptr = new BlockInfo;
+      CHECK_NE(local_block_ptr, (BlockInfo*)nullptr);
+      mem_blocks_[id].reset(local_block_ptr);
+    }
+  }
+
  common::Span<size_t> GetLeftBuffer(int nid, size_t begin, size_t end) {
    const size_t task_idx = GetTaskIdx(nid, begin);
-    return { mem_blocks_.at(task_idx).Left(), end - begin };
+    return { mem_blocks_.at(task_idx)->Left(), end - begin };
  }

  common::Span<size_t> GetRightBuffer(int nid, size_t begin, size_t end) {
    const size_t task_idx = GetTaskIdx(nid, begin);
-    return { mem_blocks_.at(task_idx).Right(), end - begin };
+    return { mem_blocks_.at(task_idx)->Right(), end - begin };
  }

  void SetNLeftElems(int nid, size_t begin, size_t end, size_t n_left) {
    size_t task_idx = GetTaskIdx(nid, begin);
-    mem_blocks_.at(task_idx).n_left = n_left;
+    mem_blocks_.at(task_idx)->n_left = n_left;
  }

  void SetNRightElems(int nid, size_t begin, size_t end, size_t n_right) {
    size_t task_idx = GetTaskIdx(nid, begin);
-    mem_blocks_.at(task_idx).n_right = n_right;
+    mem_blocks_.at(task_idx)->n_right = n_right;
  }


@@ -185,13 +195,13 @@ class PartitionBuilder {
    for (size_t i = 0; i < blocks_offsets_.size()-1; ++i) {
      size_t n_left = 0;
      for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
-        mem_blocks_[j].n_offset_left = n_left;
-        n_left += mem_blocks_[j].n_left;
+        mem_blocks_[j]->n_offset_left = n_left;
+        n_left += mem_blocks_[j]->n_left;
      }
      size_t n_right = 0;
      for (size_t j = blocks_offsets_[i]; j < blocks_offsets_[i+1]; ++j) {
-        mem_blocks_[j].n_offset_right = n_left + n_right;
-        n_right += mem_blocks_[j].n_right;
+        mem_blocks_[j]->n_offset_right = n_left + n_right;
+        n_right += mem_blocks_[j]->n_right;
      }
      left_right_nodes_sizes_[i] = {n_left, n_right};
    }
@@ -200,21 +210,21 @@ class PartitionBuilder {
  void MergeToArray(int nid, size_t begin, size_t* rows_indexes) {
    size_t task_idx = GetTaskIdx(nid, begin);

-    size_t* left_result  = rows_indexes + mem_blocks_[task_idx].n_offset_left;
-    size_t* right_result = rows_indexes + mem_blocks_[task_idx].n_offset_right;
+    size_t* left_result  = rows_indexes + mem_blocks_[task_idx]->n_offset_left;
+    size_t* right_result = rows_indexes + mem_blocks_[task_idx]->n_offset_right;

-    const size_t* left = mem_blocks_[task_idx].Left();
-    const size_t* right = mem_blocks_[task_idx].Right();
+    const size_t* left = mem_blocks_[task_idx]->Left();
+    const size_t* right = mem_blocks_[task_idx]->Right();

-    std::copy_n(left, mem_blocks_[task_idx].n_left, left_result);
-    std::copy_n(right, mem_blocks_[task_idx].n_right, right_result);
+    std::copy_n(left, mem_blocks_[task_idx]->n_left, left_result);
+    std::copy_n(right, mem_blocks_[task_idx]->n_right, right_result);
  }

- protected:
  size_t GetTaskIdx(int nid, size_t begin) {
    return blocks_offsets_[nid] + begin / BlockSize;
  }

+ protected:
  struct BlockInfo{
    size_t n_left;
    size_t n_right;
@@ -230,12 +240,12 @@ class PartitionBuilder {
      return &right_data_[0];
    }
   private:
-    alignas(128) size_t left_data_[BlockSize];
-    alignas(128) size_t right_data_[BlockSize];
+    size_t left_data_[BlockSize];
+    size_t right_data_[BlockSize];
  };
  std::vector<std::pair<size_t, size_t>> left_right_nodes_sizes_;
  std::vector<size_t> blocks_offsets_;
-  std::vector<BlockInfo> mem_blocks_;
+  std::vector<std::shared_ptr<BlockInfo>> mem_blocks_;
  size_t max_n_tasks_ = 0;
 };

--- a/src/learner.cc
+++ b/src/learner.cc
@@ -222,6 +222,10 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
      LOG(WARNING) << "No visible GPU is found, setting `gpu_id` to -1";
    }
    this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
+  } else if (fail_on_invalid_gpu_id) {
+    CHECK(gpu_id == kCpuId || gpu_id < n_gpus)
+      << "Only " << n_gpus << " GPUs are visible, gpu_id "
+      << gpu_id << " is invalid.";
  } else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
    LOG(WARNING) << "Only " << n_gpus
                 << " GPUs are visible, setting `gpu_id` to " << gpu_id % n_gpus;
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@@ -162,6 +162,9 @@ struct LogisticRaw : public LogisticRegression {
    predt = common::Sigmoid(predt);
    return std::max(predt * (T(1.0f) - predt), eps);
  }
+  static bst_float ProbToMargin(bst_float base_score) {
+    return base_score;
+  }
  static const char* DefaultEvalMetric() { return "auc"; }

  static const char* Name() { return "binary:logitraw"; }
--- a/src/predictor/gpu_predictor.cu
+++ b/src/predictor/gpu_predictor.cu
@@ -580,7 +580,7 @@ class GPUPredictor : public xgboost::Predictor {
      Predictor::Predictor{generic_param} {}

  ~GPUPredictor() override {
-    if (generic_param_->gpu_id >= 0) {
+    if (generic_param_->gpu_id >= 0 && generic_param_->gpu_id < common::AllVisibleGPUs()) {
      dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
    }
  }
--- a/src/tree/updater_quantile_hist.cc
+++ b/src/tree/updater_quantile_hist.cc
@@ -182,8 +182,10 @@ void DistributedHistSynchronizer<GradientSumT>::SyncHistograms(BuilderT* builder
    }
  });
  builder->builder_monitor_.Start("SyncHistogramsAllreduce");
+
  builder->histred_.Allreduce(builder->hist_[starting_index].data(),
                                    builder->hist_builder_.GetNumBins() * sync_count);
+
  builder->builder_monitor_.Stop("SyncHistogramsAllreduce");

  ParallelSubtractionHist(builder, space, builder->nodes_for_explicit_hist_build_, p_tree);
@@ -232,7 +234,7 @@ void BatchHistRowsAdder<GradientSumT>::AddHistRows(BuilderT *builder,
  for (auto const& node : builder->nodes_for_subtraction_trick_) {
    builder->hist_.AddHistRow(node.nid);
  }
-
+  builder->hist_.AllocateAllData();
  builder->builder_monitor_.Stop("AddHistRows");
 }

@@ -268,6 +270,8 @@ void DistributedHistRowsAdder<GradientSumT>::AddHistRows(BuilderT *builder,
      builder->hist_local_worker_.AddHistRow(nid);
    }
  }
+  builder->hist_.AllocateAllData();
+  builder->hist_local_worker_.AllocateAllData();
  (*sync_count) = std::max(1, n_left);
  builder->builder_monitor_.Stop("AddHistRows");
 }
@@ -1166,7 +1170,7 @@ template <typename GradientSumT>
 void QuantileHistMaker::Builder<GradientSumT>::ApplySplit(const std::vector<ExpandEntry> nodes,
                                            const GHistIndexMatrix& gmat,
                                            const ColumnMatrix& column_matrix,
-                                            const HistCollection<GradientSumT>&,
+                                            const HistCollection<GradientSumT>& hist,
                                            RegTree* p_tree) {
  builder_monitor_.Start("ApplySplit");
  // 1. Find split condition for each split
@@ -1189,7 +1193,10 @@ void QuantileHistMaker::Builder<GradientSumT>::ApplySplit(const std::vector<Expa
  // 2.3 Split elements of row_set_collection_ to left and right child-nodes for each node
  // Store results in intermediate buffers from partition_builder_
  common::ParallelFor2d(space, this->nthread_, [&](size_t node_in_set, common::Range1d r) {
+    size_t begin = r.begin();
    const int32_t nid = nodes[node_in_set].nid;
+    const size_t task_id = partition_builder_.GetTaskIdx(node_in_set, begin);
+    partition_builder_.AllocateForTask(task_id);
      switch (column_matrix.GetTypeSize()) {
      case common::kUint8BinsTypeSize:
        PartitionKernel<uint8_t>(node_in_set, nid, r,
--- a/tests/ci_build/CentOS-Base.repo
+++ b/tests/ci_build/CentOS-Base.repo
@@ -0,0 +1,37 @@
+[base]
+name=CentOS-$releasever - Base
+baseurl=http://vault.centos.org/centos/$releasever/os/$basearch/
+gpgcheck=1
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
+
+#released updates 
+[updates]
+name=CentOS-$releasever - Updates
+baseurl=http://vault.centos.org/centos/$releasever/updates/$basearch/
+gpgcheck=1
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
+
+#additional packages that may be useful
+[extras]
+name=CentOS-$releasever - Extras
+baseurl=http://vault.centos.org/centos/$releasever/extras/$basearch/
+gpgcheck=1
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
+
+#additional packages that extend functionality of existing packages
+[centosplus]
+name=CentOS-$releasever - Plus
+mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=centosplus&infra=$infra
+#baseurl=http://mirror.centos.org/centos/$releasever/centosplus/$basearch/
+gpgcheck=1
+enabled=0
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
+
+#contrib - packages by Centos Users
+[contrib]
+name=CentOS-$releasever - Contrib
+mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=contrib&infra=$infra
+#baseurl=http://mirror.centos.org/centos/$releasever/contrib/$basearch/
+gpgcheck=1
+enabled=0
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
--- a/tests/ci_build/Dockerfile.auditwheel_x86_64
+++ b/tests/ci_build/Dockerfile.auditwheel_x86_64
@@ -0,0 +1,15 @@
+FROM quay.io/pypa/manylinux2010_x86_64
+
+# Install lightweight sudo (not bound to TTY)
+ENV GOSU_VERSION 1.10
+RUN set -ex; \
+    curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
+    chmod +x /usr/local/bin/gosu && \
+    gosu nobody true
+
+# Default entry-point to use if running locally
+# It will preserve attributes of created files
+COPY entrypoint.sh /scripts/
+
+WORKDIR /workspace
+ENTRYPOINT ["/scripts/entrypoint.sh"]
--- a/tests/ci_build/Dockerfile.gpu
+++ b/tests/ci_build/Dockerfile.gpu
@@ -19,7 +19,7 @@ ENV PATH=/opt/python/bin:$PATH
 # Create new Conda environment with cuDF, Dask, and cuPy
 RUN \
    conda create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
-        python=3.7 cudf=0.16* rmm=0.16* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda dask-cudf cupy \
+        python=3.7 cudf=0.17* rmm=0.17* cudatoolkit=$CUDA_VERSION_ARG dask dask-cuda dask-cudf cupy \
        numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis

 ENV GOSU_VERSION 1.10
--- a/tests/ci_build/Dockerfile.gpu_build_centos6
+++ b/tests/ci_build/Dockerfile.gpu_build_centos6
@@ -6,12 +6,13 @@ ARG CUDA_VERSION_ARG
 ENV DEBIAN_FRONTEND noninteractive
 ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/

+COPY CentOS-Base.repo /etc/yum.repos.d/
+
 # Install all basic requirements
 RUN \
+    yum install -y epel-release && \
    yum -y update && \
-    yum install -y tar unzip wget xz git centos-release-scl yum-utils && \
-    yum-config-manager --enable centos-sclo-rh-testing && \
-    yum -y update && \
+    yum install -y tar unzip wget xz git patchelf && \
    yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
@@ -20,6 +21,7 @@ RUN \
    # Python
    wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
    bash Miniconda3.sh -b -p /opt/python && \
+    /opt/python/bin/python -m pip install auditwheel && \
    # CMake
    wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
    bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr && \
@@ -29,7 +31,7 @@ RUN \
    wget -nv -nc https://github.com/ninja-build/ninja/archive/v1.10.0.tar.gz --no-check-certificate && \
    tar xf v1.10.0.tar.gz && mv ninja-1.10.0 ninja && rm -v v1.10.0.tar.gz && \
    cd ninja && \
-    python ./configure.py --bootstrap
+    /opt/python/bin/python ./configure.py --bootstrap

 # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
 RUN \
--- a/tests/ci_build/Dockerfile.jvm
+++ b/tests/ci_build/Dockerfile.jvm
@@ -2,12 +2,13 @@ FROM centos:6

 ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/

+COPY CentOS-Base.repo /etc/yum.repos.d/
+
 # Install all basic requirements
 RUN \
+    yum install -y epel-release && \
    yum -y update && \
-    yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \
-    yum-config-manager --enable centos-sclo-rh-testing && \
-    yum -y update && \
+    yum install -y tar unzip wget xz git java-1.8.0-openjdk-devel && \
    yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
@@ -31,7 +32,7 @@ ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp

 # Install Python packages
 RUN \
-    pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22 awscli
+    pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.25.10 awscli

 ENV GOSU_VERSION 1.10

--- a/tests/ci_build/Dockerfile.jvm_gpu_build
+++ b/tests/ci_build/Dockerfile.jvm_gpu_build
@@ -6,12 +6,13 @@ ARG CUDA_VERSION_ARG
 ENV DEBIAN_FRONTEND noninteractive
 ENV DEVTOOLSET_URL_ROOT http://vault.centos.org/6.9/sclo/x86_64/rh/devtoolset-4/

+COPY CentOS-Base.repo /etc/yum.repos.d/
+
 # Install all basic requirements
 RUN \
+    yum install -y epel-release && \
    yum -y update && \
-    yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \
-    yum-config-manager --enable centos-sclo-rh-testing && \
-    yum -y update && \
+    yum install -y tar unzip wget xz git java-1.8.0-openjdk-devel && \
    yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \
                   $DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \
@@ -45,7 +46,7 @@ ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp

 # Install Python packages
 RUN \
-    pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.22 awscli
+    pip install numpy pytest scipy scikit-learn wheel kubernetes urllib3==1.25.10 awscli

 ENV GOSU_VERSION 1.10

--- a/tests/ci_build/Dockerfile.rmm
+++ b/tests/ci_build/Dockerfile.rmm
@@ -29,7 +29,7 @@ ENV PATH=/opt/python/bin:$PATH
 # Create new Conda environment with RMM
 RUN \
    conda create -n gpu_test -c nvidia -c rapidsai-nightly -c rapidsai -c conda-forge -c defaults \
-        python=3.7 rmm=0.16* cudatoolkit=$CUDA_VERSION_ARG
+        python=3.7 rmm=0.17* cudatoolkit=$CUDA_VERSION_ARG

 ENV GOSU_VERSION 1.10

--- a/tests/ci_build/conda_env/aarch64_test.yml
+++ b/tests/ci_build/conda_env/aarch64_test.yml
@@ -27,3 +27,4 @@ dependencies:
 - pip:
  - shap
  - awscli
+  - auditwheel
--- a/tests/cpp/common/test_hist_util.cc
+++ b/tests/cpp/common/test_hist_util.cc
@@ -35,7 +35,7 @@ void ParallelGHistBuilderReset() {
  for(size_t inode = 0; inode < kNodesExtended; inode++) {
    collection.AddHistRow(inode);
  }
-
+  collection.AllocateAllData();
  ParallelGHistBuilder<GradientSumT> hist_builder;
  hist_builder.Init(kBins);
  std::vector<GHistRow<GradientSumT>> target_hist(kNodes);
@@ -91,7 +91,7 @@ void ParallelGHistBuilderReduceHist(){
  for(size_t inode = 0; inode < kNodes; inode++) {
    collection.AddHistRow(inode);
  }
-
+  collection.AllocateAllData();
  ParallelGHistBuilder<GradientSumT> hist_builder;
  hist_builder.Init(kBins);
  std::vector<GHistRow<GradientSumT>> target_hist(kNodes);
--- a/tests/cpp/common/test_partition_builder.cc
+++ b/tests/cpp/common/test_partition_builder.cc
@@ -32,6 +32,8 @@ TEST(PartitionBuilder, BasicTest) {
    for(size_t j = 0; j < tasks[nid]; ++j) {
      size_t begin = kBlockSize*j;
      size_t end = kBlockSize*(j+1);
+      const size_t id = builder.GetTaskIdx(nid, begin);
+      builder.AllocateForTask(id);

      auto left  = builder.GetLeftBuffer(nid, begin, end);
      auto right = builder.GetRightBuffer(nid, begin, end);
--- a/tests/cpp/tree/test_quantile_hist.cc
+++ b/tests/cpp/tree/test_quantile_hist.cc
@@ -274,6 +274,7 @@ class QuantileHistMock : public QuantileHistMaker {
      RealImpl::InitData(gmat, gpair, fmat, tree);
      GHistIndexBlockMatrix dummy;
      this->hist_.AddHistRow(nid);
+      this->hist_.AllocateAllData();
      this->BuildHist(gpair, this->row_set_collection_[nid],
                gmat, dummy, this->hist_[nid]);

@@ -315,7 +316,7 @@ class QuantileHistMock : public QuantileHistMaker {

      RealImpl::InitData(gmat, row_gpairs, *dmat, tree);
      this->hist_.AddHistRow(0);
-
+      this->hist_.AllocateAllData();
      this->BuildHist(row_gpairs, this->row_set_collection_[0],
                      gmat, quantile_index_block, this->hist_[0]);

@@ -411,7 +412,7 @@ class QuantileHistMock : public QuantileHistMaker {
        cm.Init(gmat, 0.0);
        RealImpl::InitData(gmat, row_gpairs, *dmat, tree);
        this->hist_.AddHistRow(0);
-
+        this->hist_.AllocateAllData();
        RealImpl::InitNewNode(0, gmat, row_gpairs, *dmat, tree);

        const size_t num_row = dmat->Info().num_row_;
@@ -449,6 +450,8 @@ class QuantileHistMock : public QuantileHistMaker {
          RealImpl::partition_builder_.Init(1, 1, [&](size_t node_in_set) {
            return 1;
          });
+          const size_t task_id = RealImpl::partition_builder_.GetTaskIdx(0, 0);
+          RealImpl::partition_builder_.AllocateForTask(task_id);
          this->template PartitionKernel<uint8_t>(0, 0, common::Range1d(0, kNRows),
                                                  split, cm, tree);
          RealImpl::partition_builder_.CalculateRowOffsets();
--- a/tests/python-gpu/test_gpu_basic_models.py
+++ b/tests/python-gpu/test_gpu_basic_models.py
@@ -52,3 +52,17 @@ class TestGPUBasicModels:

        model_0, model_1 = self.run_cls(X, y, False)
        assert model_0 != model_1
+
+    def test_invalid_gpu_id(self):
+        X = np.random.randn(10, 5) * 1e4
+        y = np.random.randint(0, 2, size=10) * 1e4
+        # should pass with invalid gpu id
+        cls1 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999)
+        cls1.fit(X, y)
+        # should throw error with fail_on_invalid_gpu_id enabled
+        cls2 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999, fail_on_invalid_gpu_id=True)
+        try:
+            cls2.fit(X, y)
+            assert False, "Should have failed with with fail_on_invalid_gpu_id enabled"
+        except xgb.core.XGBoostError as err:
+            assert "gpu_id 9999 is invalid" in str(err)
--- a/tests/python/generate_models.py
+++ b/tests/python/generate_models.py
@@ -64,22 +64,24 @@ def generate_logistic_model():
    y = np.random.randint(0, 2, size=kRows)
    assert y.max() == 1 and y.min() == 0

+    for objective, name in [('binary:logistic', 'logit'), ('binary:logitraw', 'logitraw')]:
        data = xgboost.DMatrix(X, label=y, weight=w)
        booster = xgboost.train({'tree_method': 'hist',
                                 'num_parallel_tree': kForests,
                                 'max_depth': kMaxDepth,
-                             'objective': 'binary:logistic'},
+                                 'objective': objective},
                                num_boost_round=kRounds, dtrain=data)
-    booster.save_model(booster_bin('logit'))
-    booster.save_model(booster_json('logit'))
+        booster.save_model(booster_bin(name))
+        booster.save_model(booster_json(name))

        reg = xgboost.XGBClassifier(tree_method='hist',
                                    num_parallel_tree=kForests,
                                    max_depth=kMaxDepth,
-                                n_estimators=kRounds)
+                                    n_estimators=kRounds,
+                                    objective=objective)
        reg.fit(X, y, w)
-    reg.save_model(skl_bin('logit'))
-    reg.save_model(skl_json('logit'))
+        reg.save_model(skl_bin(name))
+        reg.save_model(skl_json(name))


 def generate_classification_model():
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@@ -57,6 +57,25 @@ class TestBasic:
            # assert they are the same
            assert np.sum(np.abs(preds2 - preds)) == 0

+    def test_metric_config(self):
+        # Make sure that the metric configuration happens in booster so the
+        # string `['error', 'auc']` doesn't get passed down to core.
+        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
+                 'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']}
+        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+        num_round = 2
+        booster = xgb.train(param, dtrain, num_round, watchlist)
+        predt_0 = booster.predict(dtrain)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, 'model.json')
+            booster.save_model(path)
+
+            booster = xgb.Booster(params=param, model_file=path)
+            predt_1 = booster.predict(dtrain)
+            np.testing.assert_allclose(predt_0, predt_1)
+
    def test_record_results(self):
        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
@@ -124,8 +143,8 @@ class TestBasic:

        dump2 = bst.get_dump(with_stats=True)
        assert dump2[0].count('\n') == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
-        assert (dump2[0].find('\n') > dump1[0].find('\n'),
-                'Expected more info when with_stats=True is given.')
+        msg = 'Expected more info when with_stats=True is given.'
+        assert dump2[0].find('\n') > dump1[0].find('\n'), msg

        dump3 = bst.get_dump(dump_format="json")
        dump3j = json.loads(dump3[0])
@@ -248,13 +267,11 @@ class TestBasicPathLike:
        assert binary_path.exists()
        Path.unlink(binary_path)

-
    def test_Booster_init_invalid_path(self):
        """An invalid model_file path should raise XGBoostError."""
        with pytest.raises(xgb.core.XGBoostError):
            xgb.Booster(model_file=Path("invalidpath"))

-
    def test_Booster_save_and_load(self):
        """Saving and loading model files from paths."""
        save_path = Path("saveload.model")
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -22,6 +22,30 @@ class TestCallbacks:
        cls.X_valid = X[split:, ...]
        cls.y_valid = y[split:, ...]

+    def run_evaluation_monitor(self, D_train, D_valid, rounds, verbose_eval):
+        evals_result = {}
+        with tm.captured_output() as (out, err):
+            xgb.train({'objective': 'binary:logistic',
+                       'eval_metric': 'error'}, D_train,
+                      evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+                      num_boost_round=rounds,
+                      evals_result=evals_result,
+                      verbose_eval=verbose_eval)
+            output: str = out.getvalue().strip()
+
+        if int(verbose_eval) == 1:
+            # Should print each iteration info
+            assert len(output.split('\n')) == rounds
+        elif int(verbose_eval) > rounds:
+            # Should print first and latest iteration info
+            assert len(output.split('\n')) == 2
+        else:
+            # Should print info by each period additionaly to first and latest iteration
+            num_periods = rounds // int(verbose_eval)
+            # Extra information is required for latest iteration
+            is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1) 
+            assert len(output.split('\n')) == 1 + num_periods + int(is_extra_info_required)
+
    def test_evaluation_monitor(self):
        D_train = xgb.DMatrix(self.X_train, self.y_train)
        D_valid = xgb.DMatrix(self.X_valid, self.y_valid)
@@ -36,23 +60,10 @@ class TestCallbacks:
        assert len(evals_result['Train']['error']) == rounds
        assert len(evals_result['Valid']['error']) == rounds

-        with tm.captured_output() as (out, err):
-            xgb.train({'objective': 'binary:logistic',
-                       'eval_metric': 'error'}, D_train,
-                      evals=[(D_train, 'Train'), (D_valid, 'Valid')],
-                      num_boost_round=rounds,
-                      evals_result=evals_result,
-                      verbose_eval=2)
-            output: str = out.getvalue().strip()
-
-        pos = 0
-        msg = 'Train-error'
-        for i in range(rounds // 2):
-            pos = output.find('Train-error', pos)
-            assert pos != -1
-            pos += len(msg)
-
-        assert output.find('Train-error', pos) == -1
+        self.run_evaluation_monitor(D_train, D_valid, rounds, True)
+        self.run_evaluation_monitor(D_train, D_valid, rounds, 2)
+        self.run_evaluation_monitor(D_train, D_valid, rounds, 4)
+        self.run_evaluation_monitor(D_train, D_valid, rounds, rounds + 1)        

    def test_early_stopping(self):
        D_train = xgb.DMatrix(self.X_train, self.y_train)
@@ -142,7 +153,7 @@ class TestCallbacks:
                eval_metric=tm.eval_error_metric, callbacks=[early_stop])
        booster = cls.get_booster()
        dump = booster.get_dump(dump_format='json')
-        assert len(dump) == booster.best_iteration
+        assert len(dump) == booster.best_iteration + 1

        early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
                                                save_best=True)
--- a/tests/python/test_cli.py
+++ b/tests/python/test_cli.py
@@ -22,6 +22,7 @@ model_in = {model_in}
 model_out = {model_out}
 test_path = {test_path}
 name_pred = {name_pred}
+model_dir = {model_dir}

 num_round = 10
 data = {data_path}
@@ -59,7 +60,8 @@ eval[test] = {data_path}
                                              model_in='NULL',
                                              model_out=model_out_cli,
                                              test_path='NULL',
-                                              name_pred='NULL')
+                                              name_pred='NULL',
+                                              model_dir='NULL')
            with open(config_path, 'w') as fd:
                fd.write(train_conf)

@@ -73,7 +75,8 @@ eval[test] = {data_path}
                                                model_in=model_out_cli,
                                                model_out='NULL',
                                                test_path=data_path,
-                                                name_pred=predict_out)
+                                                name_pred=predict_out,
+                                                model_dir='NULL')
            with open(config_path, 'w') as fd:
                fd.write(predict_conf)

@@ -145,7 +148,8 @@ eval[test] = {data_path}
                                              model_in='NULL',
                                              model_out=model_out_cli,
                                              test_path='NULL',
-                                              name_pred='NULL')
+                                              name_pred='NULL',
+                                              model_dir='NULL')
            with open(config_path, 'w') as fd:
                fd.write(train_conf)

@@ -154,3 +158,28 @@ eval[test] = {data_path}
                model = json.load(fd)

            assert model['learner']['gradient_booster']['name'] == 'gbtree'
+
+    def test_cli_save_model(self):
+        '''Test save on final round'''
+        exe = self.get_exe()
+        data_path = "{root}/demo/data/agaricus.txt.train?format=libsvm".format(
+            root=self.PROJECT_ROOT)
+        seed = 1994
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            model_out_cli = os.path.join(tmpdir, '0010.model')
+            config_path = os.path.join(tmpdir, 'test_load_cli_model.conf')
+
+            train_conf = self.template.format(data_path=data_path,
+                                              seed=seed,
+                                              task='train',
+                                              model_in='NULL',
+                                              model_out='NULL',
+                                              test_path='NULL',
+                                              name_pred='NULL',
+                                              model_dir=tmpdir)
+            with open(config_path, 'w') as fd:
+                fd.write(train_conf)
+
+            subprocess.run([exe, config_path])
+            assert os.path.exists(model_out_cli)
--- a/tests/python/test_model_compatibility.py
+++ b/tests/python/test_model_compatibility.py
@@ -24,6 +24,10 @@ def run_booster_check(booster, name):
            config['learner']['learner_model_param']['base_score']) == 0.5
        assert config['learner']['learner_train_param'][
            'objective'] == 'multi:softmax'
+    elif name.find('logitraw') != -1:
+        assert len(booster.get_dump()) == gm.kForests * gm.kRounds
+        assert config['learner']['learner_model_param']['num_class'] == str(0)
+        assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
    elif name.find('logit') != -1:
        assert len(booster.get_dump()) == gm.kForests * gm.kRounds
        assert config['learner']['learner_model_param']['num_class'] == str(0)
@@ -77,6 +81,13 @@ def run_scikit_model_check(name, path):
        assert config['learner']['learner_train_param'][
            'objective'] == 'rank:ndcg'
        run_model_param_check(config)
+    elif name.find('logitraw') != -1:
+        logit = xgboost.XGBClassifier()
+        logit.load_model(path)
+        assert (len(logit.get_booster().get_dump()) ==
+                gm.kRounds * gm.kForests)
+        config = json.loads(logit.get_booster().save_config())
+        assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
    elif name.find('logit') != -1:
        logit = xgboost.XGBClassifier()
        logit.load_model(path)
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -399,6 +399,21 @@ def test_classification_with_custom_objective():
        X, y
    )

+    cls = xgb.XGBClassifier(use_label_encoder=False, n_estimators=1)
+    cls.fit(X, y)
+
+    is_called = [False]
+
+    def wrapped(y, p):
+        is_called[0] = True
+        return logregobj(y, p)
+
+    cls.set_params(objective=wrapped)
+    cls.predict(X)              # no throw
+    cls.fit(X, y)
+
+    assert is_called[0]
+

 def test_sklearn_api():
    from sklearn.datasets import load_iris
--- a/tests/travis/run_test.sh
+++ b/tests/travis/run_test.sh
@@ -34,6 +34,10 @@ if [ ${TASK} == "python_test" ]; then
      tests/ci_build/ci_build.sh aarch64 docker bash -c "cd python-package && rm -rf dist/* && python setup.py bdist_wheel --universal"
      TAG=manylinux2014_aarch64
      tests/ci_build/ci_build.sh aarch64 docker python tests/ci_build/rename_whl.py python-package/dist/*.whl ${TRAVIS_COMMIT} ${TAG}
+      tests/ci_build/ci_build.sh aarch64 docker auditwheel repair --plat ${TAG} python-package/dist/*.whl
+      mv -v wheelhouse/*.whl python-package/dist/
+      # Make sure that libgomp.so is vendored in the wheel
+      unzip -l python-package/dist/*.whl | grep libgomp  || exit -1
    else
      rm -rf build
      mkdir build && cd build
Author	SHA1	Message	Date
Philip Hyunsu Cho	a78d0d4110	Release patch release 1.3.1 (#6543 )	2020-12-21 23:22:32 -08:00
Jiaming Yuan	76c361431f	Remove cupy.array_equal, since it's not compatible with cuPy 7.8 (#6528 ) (#6535 ) Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>	2020-12-20 15:11:50 +08:00
Jiaming Yuan	d95d02132a	Fix handling of print period in EvaluationMonitor (#6499 ) (#6534 ) Co-authored-by: Kirill Shvets <kirill.shvets@intel.com> Co-authored-by: ShvetsKS <33296480+ShvetsKS@users.noreply.github.com> Co-authored-by: Kirill Shvets <kirill.shvets@intel.com>	2020-12-20 15:07:42 +08:00
Jiaming Yuan	7109c6c1f2	[backport] Move metric configuration into booster. (#6504 ) (#6533 )	2020-12-20 10:36:32 +08:00
Jiaming Yuan	bce7ca313c	[backport] Fix `save_best`. (#6523 )	2020-12-18 20:00:29 +08:00
Jiaming Yuan	8be2cd8c91	Enable loading model from <1.0.0 trained with objective='binary:logitraw' (#6517 ) (#6524 ) * Enable loading model from <1.0.0 trained with objective='binary:logitraw' * Add binary:logitraw in model compatibility testing suite * Feedback from @trivialfis: Override ProbToMargin() for LogisticRaw Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com> Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>	2020-12-18 04:10:09 +08:00
Philip Hyunsu Cho	c5f0cdbc72	Hot fix for libgomp vendoring (#6482 ) * Hot fix for libgomp vendoring * Set post0 in setup.py	2020-12-09 10:04:45 -08:00
Jiaming Yuan	1bf3899983	Fix dask ip resolution. (#6475 ) This adopts the solution used in dask/dask-xgboost#40 which employs the get_host_ip from dmlc-core tracker.	2020-12-07 16:38:16 -08:00
Jiaming Yuan	c39f6b25f0	Fix filtering callable objects in skl xgb param. (#6466 ) Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>	2020-12-07 16:38:16 -08:00
Philip Hyunsu Cho	2b3e301543	[CI] Fix CentOS 6 Docker images (#6467 )	2020-12-07 16:38:16 -08:00
Hyunsu Cho	10d3419fa6	Release 1.3.0	2020-12-03 21:35:09 -08:00
Philip Hyunsu Cho	b273e5bd4c	Vendor libgomp in the manylinux Python wheel (#6461 ) * Vendor libgomp in the manylinux2014_aarch64 wheel * Use vault repo, since CentOS 6 has reached End-of-Life on Nov 30 * Vendor libgomp in the manylinux2010_x86_64 wheel * Run verification step inside the container	2020-12-03 21:29:40 -08:00
Philip Hyunsu Cho	3a83fcb0eb	Enforce row-major order in cuPy array (#6459 )	2020-12-03 21:29:24 -08:00
hzy001	3efc4ea0d1	Fix broken links. (#6455 ) Co-authored-by: Hao Ziyu <haoziyu@qiyi.com> Co-authored-by: fis <jm.yuan@outlook.com>	2020-12-03 21:29:03 -08:00
Jiaming Yuan	a2c778e2d1	Fix period in evaluation monitor. (#6441 )	2020-12-03 21:28:45 -08:00
Jiaming Yuan	8a0db293c5	Fix CLI ranking demo. (#6439 ) Save model at final round.	2020-12-03 21:28:28 -08:00
Honza Sterba	028ec5f028	Optionaly fail when gpu_id is set to invalid value (#6342 )	2020-12-03 21:27:58 -08:00
ShvetsKS	38c80bcec4	Thread local memory allocation for BuildHist (#6358 ) * thread mem locality * fix apply * cleanup * fix lint * fix tests * simple try * fix * fix * apply comments * fix comments * fix * apply simple comment Co-authored-by: ShvetsKS <kirill.shvets@intel.com>	2020-12-03 21:27:31 -08:00
Philip Hyunsu Cho	16ff63905d	[CI] Upgrade cuDF and RMM to 0.17 nightlies (#6434 )	2020-12-03 21:27:01 -08:00
Philip Hyunsu Cho	a9b09919f9	[R] Fix R package installation via CMake (#6423 )	2020-12-03 21:26:29 -08:00
Hyunsu Cho	f3b060401a	Release 1.3.0 RC1	2020-11-21 11:36:08 -08:00