[CI] Upload Doxygen to correct destination

Bump release version to 1.3.3. (#6624 )
Revert ntree limit fix (#6616 ) (#6622 )
2021-04-13 15:09:53 -07:00 · 2021-01-20 19:23:31 +08:00 · 2021-01-20 04:20:07 +08:00 · 2021-01-15 18:20:39 +08:00 · 2021-01-13 17:35:00 +08:00 · 2021-01-13 04:44:06 +08:00
35 changed files with 278 additions and 143 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -192,7 +192,7 @@ jobs:
      run: |
        cd build/
        tar cvjf ${{ steps.extract_branch.outputs.branch }}.tar.bz2 doc_doxygen/
-        python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/ --acl public-read
+        python -m awscli s3 cp ./${{ steps.extract_branch.outputs.branch }}.tar.bz2 s3://xgboost-docs/doxygen/ --acl public-read
      if: github.ref == 'refs/heads/master' || contains(github.ref, 'refs/heads/release_')
      env:
        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_IAM_S3_UPLOADER }}
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.13)
-project(xgboost LANGUAGES CXX C VERSION 1.3.0)
+project(xgboost LANGUAGES CXX C VERSION 1.3.3)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 cmake_policy(SET CMP0022 NEW)
--- a/4
+++ b/4
@@ -198,10 +198,10 @@ def BuildCUDA(args) {
    """
    if (args.cuda_version == ref_cuda_ver) {
      sh """
-      ${dockerRun} ${container_type} ${docker_binary} ${docker_args} auditwheel repair --plat ${wheel_tag} python-package/dist/*.whl
+      ${dockerRun} auditwheel_x86_64 ${docker_binary} auditwheel repair --plat ${wheel_tag} python-package/dist/*.whl
      mv -v wheelhouse/*.whl python-package/dist/
      # Make sure that libgomp.so is vendored in the wheel
-      ${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "unzip -l python-package/dist/*.whl | grep libgomp  || exit -1"
+      ${dockerRun} auditwheel_x86_64 ${docker_binary} bash -c "unzip -l python-package/dist/*.whl | grep libgomp  || exit -1"
      """
    }
    echo 'Stashing Python wheel...'
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 1.3.0.1
+Version: 1.3.3.1
 Date: 2020-08-28
 Authors@R: c(
  person("Tianqi", "Chen", role = c("aut"),
--- a/R-package/tests/helper_scripts/generate_models.R
+++ b/R-package/tests/helper_scripts/generate_models.R
@@ -2,7 +2,6 @@
 # of saved model files from XGBoost version 0.90 and 1.0.x.
 library(xgboost)
 library(Matrix)
 source('./generate_models_params.R')
 set.seed(0)
 metadata <- list(
@@ -53,11 +52,16 @@ generate_logistic_model <- function () {
  y <- sample(0:1, size = metadata$kRows, replace = TRUE)
  stopifnot(max(y) == 1, min(y) == 0)
-  data <- xgb.DMatrix(X, label = y, weight = w)
+  objective <- c('binary:logistic', 'binary:logitraw')
-  params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests,
+  name <- c('logit', 'logitraw')
-                 max_depth = metadata$kMaxDepth, objective = 'binary:logistic')
+
-  booster <- xgb.train(params, data, nrounds = metadata$kRounds)
+  for (i in seq_len(length(objective))) {
-  save_booster(booster, 'logit')
+    data <- xgb.DMatrix(X, label = y, weight = w)
    params <- list(tree_method = 'hist', num_parallel_tree = metadata$kForests,
                   max_depth = metadata$kMaxDepth, objective = objective[i])
    booster <- xgb.train(params, data, nrounds = metadata$kRounds)
    save_booster(booster, name[i])
  }
 }
 generate_classification_model <- function () {
--- a/R-package/tests/testthat/test_model_compatibility.R
+++ b/R-package/tests/testthat/test_model_compatibility.R
@@ -39,6 +39,10 @@ run_booster_check <- function (booster, name) {
    testthat::expect_equal(config$learner$learner_train_param$objective, 'multi:softmax')
    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class),
                           metadata$kClasses)
  } else if (name == 'logitraw') {
    testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)
    testthat::expect_equal(config$learner$learner_train_param$objective, 'binary:logitraw')
  } else if (name == 'logit') {
    testthat::expect_equal(get_num_tree(booster), metadata$kForests * metadata$kRounds)
    testthat::expect_equal(as.numeric(config$learner$learner_model_param$num_class), 0)
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -55,7 +55,7 @@
 #endif  // defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
 #if defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) && \
-    !defined(__CUDACC__)
+    !defined(__CUDACC__) && !defined(__sun) && !defined(sun)
 #include <parallel/algorithm>
 #define XGBOOST_PARALLEL_SORT(X, Y, Z) __gnu_parallel::sort((X), (Y), (Z))
 #define XGBOOST_PARALLEL_STABLE_SORT(X, Y, Z) \
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -6,6 +6,6 @@
 #define XGBOOST_VER_MAJOR 1
 #define XGBOOST_VER_MINOR 3
-#define XGBOOST_VER_PATCH 0
+#define XGBOOST_VER_PATCH 3
 #endif  // XGBOOST_VERSION_CONFIG_H_
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@
    <groupId>ml.dmlc</groupId>
    <artifactId>xgboost-jvm_2.12</artifactId>
-    <version>1.3.0</version>
+    <version>1.3.3</version>
    <packaging>pom</packaging>
    <name>XGBoost JVM Package</name>
    <description>JVM Package for XGBoost</description>
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j-example_2.12</artifactId>
-    <version>1.3.0</version>
+    <version>1.3.3</version>
    <packaging>jar</packaging>
    <build>
        <plugins>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
-            <version>1.3.0</version>
+            <version>1.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
-            <version>1.3.0</version>
+            <version>1.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j-flink_2.12</artifactId>
-    <version>1.3.0</version>
+    <version>1.3.3</version>
    <build>
        <plugins>
            <plugin>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.3.0</version>
+            <version>1.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j-gpu_2.12</artifactId>
-    <version>1.3.0</version>
+    <version>1.3.3</version>
    <packaging>jar</packaging>
    <dependencies>
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
-            <version>1.3.0</version>
+            <version>1.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j-spark_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.3.0</version>
+            <version>1.3.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.3.0</version>
+        <version>1.3.3</version>
    </parent>
    <artifactId>xgboost4j_2.12</artifactId>
-    <version>1.3.0</version>
+    <version>1.3.3</version>
    <packaging>jar</packaging>
    <dependencies>
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-1.3.0
+1.3.3
--- a/python-package/xgboost/callback.py
+++ b/python-package/xgboost/callback.py
@@ -456,6 +456,7 @@ class LearningRateScheduler(TrainingCallback):
    def after_iteration(self, model, epoch, evals_log):
        model.set_param('learning_rate', self.learning_rates(epoch))
        return False
 # pylint: disable=too-many-instance-attributes
@@ -565,7 +566,7 @@ class EarlyStopping(TrainingCallback):
    def after_training(self, model: Booster):
        try:
            if self.save_best:
-                model = model[: int(model.attr('best_iteration'))]
+                model = model[: int(model.attr('best_iteration')) + 1]
        except XGBoostError as e:
            raise XGBoostError('`save_best` is not applicable to current booster') from e
        return model
@@ -621,7 +622,7 @@ class EvaluationMonitor(TrainingCallback):
                    msg += self._fmt_metric(data, metric_name, score, stdv)
            msg += '\n'
-            if (epoch % self.period) != 0 or self.period == 1:
+            if (epoch % self.period) == 0 or self.period == 1:
                rabit.tracker_print(msg)
                self._latest = None
            else:
@@ -677,6 +678,7 @@ class TrainingCheckPoint(TrainingCallback):
                else:
                    model.save_model(path)
        self._epoch += 1
        return False
 class LegacyCallbacks:
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -1,11 +1,12 @@
 # coding: utf-8
 # pylint: disable=too-many-arguments, too-many-branches, invalid-name
-# pylint: disable=too-many-lines, too-many-locals
+# pylint: disable=too-many-lines, too-many-locals, no-self-use
 """Core XGBoost Library."""
 import collections
 # pylint: disable=no-name-in-module,import-error
 from collections.abc import Mapping
 # pylint: enable=no-name-in-module,import-error
 from typing import Dict, Union, List
 import ctypes
 import os
 import re
@@ -1012,6 +1013,7 @@ class Booster(object):
        _check_call(_LIB.XGBoosterCreate(dmats, c_bst_ulong(len(cache)),
                                         ctypes.byref(self.handle)))
        params = params or {}
        params = self._configure_metrics(params.copy())
        if isinstance(params, list):
            params.append(('validate_parameters', True))
        else:
@@ -1041,6 +1043,17 @@ class Booster(object):
        else:
            raise TypeError('Unknown type:', model_file)
    def _configure_metrics(self, params: Union[Dict, List]) -> Union[Dict, List]:
        if isinstance(params, dict) and 'eval_metric' in params \
           and isinstance(params['eval_metric'], list):
            params = dict((k, v) for k, v in params.items())
            eval_metrics = params['eval_metric']
            params.pop("eval_metric", None)
            params = list(params.items())
            for eval_metric in eval_metrics:
                params += [('eval_metric', eval_metric)]
        return params
    def __del__(self):
        if hasattr(self, 'handle') and self.handle is not None:
            _check_call(_LIB.XGBoosterFree(self.handle))
--- a/python-package/xgboost/dask.py
+++ b/python-package/xgboost/dask.py
@@ -1210,10 +1210,10 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
                                early_stopping_rounds=early_stopping_rounds,
                                verbose=verbose)
-    async def _predict_proba_async(self, data, output_margin=False,
+    async def _predict_proba_async(self, X, output_margin=False,
                                   base_margin=None):
        test_dmatrix = await DaskDMatrix(
-            client=self.client, data=data, base_margin=base_margin,
+            client=self.client, data=X, base_margin=base_margin,
            missing=self.missing
        )
        pred_probs = await predict(client=self.client,
@@ -1223,11 +1223,11 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
        return pred_probs
    # pylint: disable=arguments-differ,missing-docstring
-    def predict_proba(self, data, output_margin=False, base_margin=None):
+    def predict_proba(self, X, output_margin=False, base_margin=None):
        _assert_dask_support()
        return self.client.sync(
            self._predict_proba_async,
-            data,
+            X=X,
            output_margin=output_margin,
            base_margin=base_margin
        )
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -4,6 +4,7 @@
 import copy
 import warnings
 import json
 from typing import Optional
 import numpy as np
 from .core import Booster, DMatrix, XGBoostError, _deprecate_positional_args
 from .training import train
@@ -494,6 +495,13 @@ class XGBModel(XGBModelBase):
        # Delete the attribute after load
        self.get_booster().set_attr(scikit_learn=None)
    def _set_evaluation_result(self, evals_result: Optional[dict]) -> None:
        if evals_result:
            for val in evals_result.items():
                evals_result_key = list(val[1].keys())[0]
                evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
            self.evals_result_ = evals_result
    @_deprecate_positional_args
    def fit(self, X, y, *, sample_weight=None, base_margin=None,
            eval_set=None, eval_metric=None, early_stopping_rounds=None,
@@ -565,13 +573,6 @@ class XGBModel(XGBModelBase):
        """
        self.n_features_in_ = X.shape[1]
        train_dmatrix = DMatrix(data=X, label=y, weight=sample_weight,
                                base_margin=base_margin,
                                missing=self.missing,
                                nthread=self.n_jobs)
        train_dmatrix.set_info(feature_weights=feature_weights)
        evals_result = {}
        train_dmatrix, evals = self._wrap_evaluation_matrices(
@@ -601,12 +602,7 @@ class XGBModel(XGBModelBase):
                              verbose_eval=verbose, xgb_model=xgb_model,
                              callbacks=callbacks)
-        if evals_result:
+        self._set_evaluation_result(evals_result)
            for val in evals_result.items():
                evals_result_key = list(val[1].keys())[0]
                evals_result[val[0]][evals_result_key] = val[1][
                    evals_result_key]
            self.evals_result_ = evals_result
        if early_stopping_rounds is not None:
            self.best_score = self._Booster.best_score
@@ -841,14 +837,18 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
            self.classes_ = cp.unique(y.values)
            self.n_classes_ = len(self.classes_)
            can_use_label_encoder = False
-            if not cp.array_equal(self.classes_, cp.arange(self.n_classes_)):
+            expected_classes = cp.arange(self.n_classes_)
            if (self.classes_.shape != expected_classes.shape or
                    not (self.classes_ == expected_classes).all()):
                raise ValueError(label_encoding_check_error)
        elif _is_cupy_array(y):
            import cupy as cp  # pylint: disable=E0401
            self.classes_ = cp.unique(y)
            self.n_classes_ = len(self.classes_)
            can_use_label_encoder = False
-            if not cp.array_equal(self.classes_, cp.arange(self.n_classes_)):
+            expected_classes = cp.arange(self.n_classes_)
            if (self.classes_.shape != expected_classes.shape or
                    not (self.classes_ == expected_classes).all()):
                raise ValueError(label_encoding_check_error)
        else:
            self.classes_ = np.unique(y)
@@ -915,12 +915,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
                              callbacks=callbacks)
        self.objective = xgb_options["objective"]
-        if evals_result:
+        self._set_evaluation_result(evals_result)
            for val in evals_result.items():
                evals_result_key = list(val[1].keys())[0]
                evals_result[val[0]][
                    evals_result_key] = val[1][evals_result_key]
            self.evals_result_ = evals_result
        if early_stopping_rounds is not None:
            self.best_score = self._Booster.best_score
@@ -991,10 +986,9 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
            return self._le.inverse_transform(column_indexes)
        return column_indexes
-    def predict_proba(self, data, ntree_limit=None, validate_features=False,
+    def predict_proba(self, X, ntree_limit=None, validate_features=False,
                      base_margin=None):
-        """
+        """ Predict the probability of each `X` example being of a given class.
        Predict the probability of each `data` example being of a given class.
        .. note:: This function is not thread safe
@@ -1004,21 +998,22 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
        Parameters
        ----------
-        data : array_like
+        X : array_like
            Feature matrix.
        ntree_limit : int
-            Limit number of trees in the prediction; defaults to best_ntree_limit if defined
+            Limit number of trees in the prediction; defaults to best_ntree_limit if
-            (i.e. it has been trained with early stopping), otherwise 0 (use all trees).
+            defined (i.e. it has been trained with early stopping), otherwise 0 (use all
            trees).
        validate_features : bool
-            When this is True, validate that the Booster's and data's feature_names are identical.
+            When this is True, validate that the Booster's and data's feature_names are
-            Otherwise, it is assumed that the feature_names are the same.
+            identical.  Otherwise, it is assumed that the feature_names are the same.
        Returns
        -------
        prediction : numpy array
            a numpy array with the probability of each data example being of a given class.
        """
-        test_dmatrix = DMatrix(data, base_margin=base_margin,
+        test_dmatrix = DMatrix(X, base_margin=base_margin,
                               missing=self.missing, nthread=self.n_jobs)
        if ntree_limit is None:
            ntree_limit = getattr(self, "best_ntree_limit", 0)
@@ -1324,12 +1319,7 @@ class XGBRanker(XGBModel):
        self.objective = params["objective"]
-        if evals_result:
+        self._set_evaluation_result(evals_result)
            for val in evals_result.items():
                evals_result_key = list(val[1].keys())[0]
                evals_result[val[0]][evals_result_key] = val[1][evals_result_key]
            self.evals_result = evals_result
        if early_stopping_rounds is not None:
            self.best_score = self._Booster.best_score
            self.best_iteration = self._Booster.best_iteration
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -4,6 +4,7 @@
 """Training Library containing training routines."""
 import warnings
 import copy
 import json
 import numpy as np
 from .core import Booster, XGBoostError
@@ -40,18 +41,6 @@ def _is_new_callback(callbacks):
               for c in callbacks) or not callbacks
 def _configure_metrics(params):
    if isinstance(params, dict) and 'eval_metric' in params \
       and isinstance(params['eval_metric'], list):
        params = dict((k, v) for k, v in params.items())
        eval_metrics = params['eval_metric']
        params.pop("eval_metric", None)
        params = list(params.items())
        for eval_metric in eval_metrics:
            params += [('eval_metric', eval_metric)]
    return params
 def _train_internal(params, dtrain,
                    num_boost_round=10, evals=(),
                    obj=None, feval=None,
@@ -61,7 +50,6 @@ def _train_internal(params, dtrain,
    """internal training function"""
    callbacks = [] if callbacks is None else copy.copy(callbacks)
    evals = list(evals)
    params = _configure_metrics(params.copy())
    bst = Booster(params, [dtrain] + [d[0] for d in evals])
    nboost = 0
@@ -136,7 +124,26 @@ def _train_internal(params, dtrain,
        bst.best_iteration = int(bst.attr('best_iteration'))
    else:
        bst.best_iteration = nboost - 1
    config = json.loads(bst.save_config())
    booster = config['learner']['gradient_booster']['name']
    if booster == 'gblinear':
        num_parallel_tree = 0
    elif booster == 'dart':
        num_parallel_tree = int(
            config['learner']['gradient_booster']['gbtree']['gbtree_train_param'][
                'num_parallel_tree'
            ]
        )
    elif booster == 'gbtree':
        num_parallel_tree = int(
            config['learner']['gradient_booster']['gbtree_train_param'][
                'num_parallel_tree']
        )
    else:
        raise ValueError(f'Unknown booster: {booster}')
    bst.best_ntree_limit = (bst.best_iteration + 1) * num_parallel_tree
    # Copy to serialise and unserialise booster to reset state and free
    # training memory
    return bst.copy()
@@ -175,9 +182,10 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
        If there's more than one metric in the **eval_metric** parameter given in
        **params**, the last metric will be used for early stopping.
        If early stopping occurs, the model will have three additional fields:
-        ``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.
+        ``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.  Use
-        (Use ``bst.best_ntree_limit`` to get the correct value if
+        ``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree`` and/or
-        ``num_parallel_tree`` and/or ``num_class`` appears in the parameters)
+        ``num_class`` appears in the parameters.  ``best_ntree_limit`` is the result of
        ``num_parallel_tree * best_iteration``.
    evals_result: dict
        This dictionary stores the evaluation results of all the items in watchlist.
--- a/rabit/include/rabit/internal/socket.h
+++ b/rabit/include/rabit/internal/socket.h
@@ -25,6 +25,10 @@
 #include <sys/socket.h>
 #include <sys/ioctl.h>
 #if defined(__sun) || defined(sun)
 #include <sys/sockio.h>
 #endif  // defined(__sun) || defined(sun)
 #endif  // defined(_WIN32)
 #include <string>
--- a/src/gbm/gblinear_model.cc
+++ b/src/gbm/gblinear_model.cc
@@ -10,10 +10,6 @@ namespace xgboost {
 namespace gbm {
 void GBLinearModel::SaveModel(Json* p_out) const {
  using WeightType = std::remove_reference<decltype(std::declval<decltype(weight)>().back())>::type;
  using JsonFloat = Number::Float;
  static_assert(std::is_same<WeightType, JsonFloat>::value,
                "Weight type should be of the same type with JSON float");
  auto& out = *p_out;
  size_t const n_weights = weight.size();
--- a/src/objective/regression_loss.h
+++ b/src/objective/regression_loss.h
@@ -162,6 +162,9 @@ struct LogisticRaw : public LogisticRegression {
    predt = common::Sigmoid(predt);
    return std::max(predt * (T(1.0f) - predt), eps);
  }
  static bst_float ProbToMargin(bst_float base_score) {
    return base_score;
  }
  static const char* DefaultEvalMetric() { return "auc"; }
  static const char* Name() { return "binary:logitraw"; }
--- a/tests/ci_build/Dockerfile.auditwheel_x86_64
+++ b/tests/ci_build/Dockerfile.auditwheel_x86_64
@@ -0,0 +1,15 @@
 FROM quay.io/pypa/manylinux2010_x86_64
 # Install lightweight sudo (not bound to TTY)
 ENV GOSU_VERSION 1.10
 RUN set -ex; \
    curl -o /usr/local/bin/gosu -L "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
    chmod +x /usr/local/bin/gosu && \
    gosu nobody true
 # Default entry-point to use if running locally
 # It will preserve attributes of created files
 COPY entrypoint.sh /scripts/
 WORKDIR /workspace
 ENTRYPOINT ["/scripts/entrypoint.sh"]
--- a/tests/ci_build/conda_env/win64_test.yml
+++ b/tests/ci_build/conda_env/win64_test.yml
@@ -9,7 +9,6 @@ dependencies:
 - scikit-learn
 - pandas
 - pytest
 - python-graphviz
 - boto3
 - hypothesis
 - jsonschema
@@ -17,3 +16,4 @@ dependencies:
 - pip:
  - cupy-cuda101
  - modin[all]
  - graphviz
--- a/tests/python-gpu/test_gpu_with_dask.py
+++ b/tests/python-gpu/test_gpu_with_dask.py
@@ -5,8 +5,10 @@ import numpy as np
 import asyncio
 import xgboost
 import subprocess
 import hypothesis
 from hypothesis import given, strategies, settings, note
 from hypothesis._settings import duration
 from hypothesis import HealthCheck
 from test_gpu_updaters import parameter_strategy
 if sys.platform.startswith("win"):
@@ -19,6 +21,11 @@ from test_with_dask import _get_client_workers  # noqa
 from test_with_dask import generate_array     # noqa
 import testing as tm                          # noqa
 if hasattr(HealthCheck, 'function_scoped_fixture'):
    suppress = [HealthCheck.function_scoped_fixture]
 else:
    suppress = hypothesis.utils.conventions.not_set
 try:
    import dask.dataframe as dd
@@ -161,19 +168,24 @@ class TestDistributedGPU:
            run_with_dask_dataframe(dxgb.DaskDMatrix, client)
            run_with_dask_dataframe(dxgb.DaskDeviceQuantileDMatrix, client)
-    @given(params=parameter_strategy, num_rounds=strategies.integers(1, 20),
+    @given(
-           dataset=tm.dataset_strategy)
+        params=parameter_strategy,
-    @settings(deadline=duration(seconds=120))
+        num_rounds=strategies.integers(1, 20),
        dataset=tm.dataset_strategy,
    )
    @settings(deadline=duration(seconds=120), suppress_health_check=suppress)
    @pytest.mark.skipif(**tm.no_dask())
    @pytest.mark.skipif(**tm.no_dask_cuda())
-    @pytest.mark.parametrize('local_cuda_cluster', [{'n_workers': 2}], indirect=['local_cuda_cluster'])
+    @pytest.mark.parametrize(
        "local_cuda_cluster", [{"n_workers": 2}], indirect=["local_cuda_cluster"]
    )
    @pytest.mark.mgpu
    def test_gpu_hist(self, params, num_rounds, dataset, local_cuda_cluster):
        with Client(local_cuda_cluster) as client:
-            run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix,
+            run_gpu_hist(params, num_rounds, dataset, dxgb.DaskDMatrix, client)
-                         client)
+            run_gpu_hist(
-            run_gpu_hist(params, num_rounds, dataset,
+                params, num_rounds, dataset, dxgb.DaskDeviceQuantileDMatrix, client
-                         dxgb.DaskDeviceQuantileDMatrix, client)
+            )
    @pytest.mark.skipif(**tm.no_cupy())
    @pytest.mark.skipif(**tm.no_dask())
--- a/tests/python/generate_models.py
+++ b/tests/python/generate_models.py
@@ -64,22 +64,24 @@ def generate_logistic_model():
    y = np.random.randint(0, 2, size=kRows)
    assert y.max() == 1 and y.min() == 0
-    data = xgboost.DMatrix(X, label=y, weight=w)
+    for objective, name in [('binary:logistic', 'logit'), ('binary:logitraw', 'logitraw')]:
-    booster = xgboost.train({'tree_method': 'hist',
+        data = xgboost.DMatrix(X, label=y, weight=w)
-                             'num_parallel_tree': kForests,
+        booster = xgboost.train({'tree_method': 'hist',
-                             'max_depth': kMaxDepth,
+                                 'num_parallel_tree': kForests,
-                             'objective': 'binary:logistic'},
+                                 'max_depth': kMaxDepth,
-                            num_boost_round=kRounds, dtrain=data)
+                                 'objective': objective},
-    booster.save_model(booster_bin('logit'))
+                                num_boost_round=kRounds, dtrain=data)
-    booster.save_model(booster_json('logit'))
+        booster.save_model(booster_bin(name))
        booster.save_model(booster_json(name))
-    reg = xgboost.XGBClassifier(tree_method='hist',
+        reg = xgboost.XGBClassifier(tree_method='hist',
-                                num_parallel_tree=kForests,
+                                    num_parallel_tree=kForests,
-                                max_depth=kMaxDepth,
+                                    max_depth=kMaxDepth,
-                                n_estimators=kRounds)
+                                    n_estimators=kRounds,
-    reg.fit(X, y, w)
+                                    objective=objective)
-    reg.save_model(skl_bin('logit'))
+        reg.fit(X, y, w)
-    reg.save_model(skl_json('logit'))
+        reg.save_model(skl_bin(name))
        reg.save_model(skl_json(name))
 def generate_classification_model():
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@@ -57,6 +57,25 @@ class TestBasic:
            # assert they are the same
            assert np.sum(np.abs(preds2 - preds)) == 0
    def test_metric_config(self):
        # Make sure that the metric configuration happens in booster so the
        # string `['error', 'auc']` doesn't get passed down to core.
        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
                 'objective': 'binary:logistic', 'eval_metric': ['error', 'auc']}
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 2
        booster = xgb.train(param, dtrain, num_round, watchlist)
        predt_0 = booster.predict(dtrain)
        with tempfile.TemporaryDirectory() as tmpdir:
            path = os.path.join(tmpdir, 'model.json')
            booster.save_model(path)
            booster = xgb.Booster(params=param, model_file=path)
            predt_1 = booster.predict(dtrain)
            np.testing.assert_allclose(predt_0, predt_1)
    def test_record_results(self):
        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
@@ -124,8 +143,8 @@ class TestBasic:
        dump2 = bst.get_dump(with_stats=True)
        assert dump2[0].count('\n') == 3, 'Expected 1 root and 2 leaves - 3 lines in dump.'
-        assert (dump2[0].find('\n') > dump1[0].find('\n'),
+        msg = 'Expected more info when with_stats=True is given.'
-                'Expected more info when with_stats=True is given.')
+        assert dump2[0].find('\n') > dump1[0].find('\n'), msg
        dump3 = bst.get_dump(dump_format="json")
        dump3j = json.loads(dump3[0])
@@ -248,13 +267,11 @@ class TestBasicPathLike:
        assert binary_path.exists()
        Path.unlink(binary_path)
    def test_Booster_init_invalid_path(self):
        """An invalid model_file path should raise XGBoostError."""
        with pytest.raises(xgb.core.XGBoostError):
            xgb.Booster(model_file=Path("invalidpath"))
    def test_Booster_save_and_load(self):
        """Saving and loading model files from paths."""
        save_path = Path("saveload.model")
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -33,15 +33,18 @@ class TestCallbacks:
                      verbose_eval=verbose_eval)
            output: str = out.getvalue().strip()
-        pos = 0
+        if int(verbose_eval) == 1:
-        msg = 'Train-error'
+            # Should print each iteration info
-        for i in range(rounds // int(verbose_eval)):
+            assert len(output.split('\n')) == rounds
-            pos = output.find('Train-error', pos)
+        elif int(verbose_eval) > rounds:
-            assert pos != -1
+            # Should print first and latest iteration info
-            pos += len(msg)
+            assert len(output.split('\n')) == 2
-
+        else:
-        assert output.find('Train-error', pos) == -1
+            # Should print info by each period additionaly to first and latest iteration
-
+            num_periods = rounds // int(verbose_eval)
            # Extra information is required for latest iteration
            is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1) 
            assert len(output.split('\n')) == 1 + num_periods + int(is_extra_info_required)
    def test_evaluation_monitor(self):
        D_train = xgb.DMatrix(self.X_train, self.y_train)
@@ -57,8 +60,10 @@ class TestCallbacks:
        assert len(evals_result['Train']['error']) == rounds
        assert len(evals_result['Valid']['error']) == rounds
        self.run_evaluation_monitor(D_train, D_valid, rounds, 2)
        self.run_evaluation_monitor(D_train, D_valid, rounds, True)
        self.run_evaluation_monitor(D_train, D_valid, rounds, 2)
        self.run_evaluation_monitor(D_train, D_valid, rounds, 4)
        self.run_evaluation_monitor(D_train, D_valid, rounds, rounds + 1)        
    def test_early_stopping(self):
        D_train = xgb.DMatrix(self.X_train, self.y_train)
@@ -148,7 +153,7 @@ class TestCallbacks:
                eval_metric=tm.eval_error_metric, callbacks=[early_stop])
        booster = cls.get_booster()
        dump = booster.get_dump(dump_format='json')
-        assert len(dump) == booster.best_iteration
+        assert len(dump) == booster.best_iteration + 1
        early_stop = xgb.callback.EarlyStopping(rounds=early_stopping_rounds,
                                                save_best=True)
--- a/tests/python/test_model_compatibility.py
+++ b/tests/python/test_model_compatibility.py
@@ -24,6 +24,10 @@ def run_booster_check(booster, name):
            config['learner']['learner_model_param']['base_score']) == 0.5
        assert config['learner']['learner_train_param'][
            'objective'] == 'multi:softmax'
    elif name.find('logitraw') != -1:
        assert len(booster.get_dump()) == gm.kForests * gm.kRounds
        assert config['learner']['learner_model_param']['num_class'] == str(0)
        assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
    elif name.find('logit') != -1:
        assert len(booster.get_dump()) == gm.kForests * gm.kRounds
        assert config['learner']['learner_model_param']['num_class'] == str(0)
@@ -77,6 +81,13 @@ def run_scikit_model_check(name, path):
        assert config['learner']['learner_train_param'][
            'objective'] == 'rank:ndcg'
        run_model_param_check(config)
    elif name.find('logitraw') != -1:
        logit = xgboost.XGBClassifier()
        logit.load_model(path)
        assert (len(logit.get_booster().get_dump()) ==
                gm.kRounds * gm.kForests)
        config = json.loads(logit.get_booster().save_config())
        assert config['learner']['learner_train_param']['objective'] == 'binary:logitraw'
    elif name.find('logit') != -1:
        logit = xgboost.XGBClassifier()
        logit.load_model(path)
--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@@ -33,9 +33,15 @@ def run_predict_leaf(predictor):
    y = rng.randint(low=0, high=classes, size=rows)
    m = xgb.DMatrix(X, y)
    booster = xgb.train(
-        {'num_parallel_tree': num_parallel_tree, 'num_class': classes,
+        {
-         'predictor': predictor, 'tree_method': 'hist'}, m,
+            "num_parallel_tree": num_parallel_tree,
-        num_boost_round=num_boost_round)
+            "num_class": classes,
            "predictor": predictor,
            "tree_method": "hist",
        },
        m,
        num_boost_round=num_boost_round,
    )
    empty = xgb.DMatrix(np.ones(shape=(0, cols)))
    empty_leaf = booster.predict(empty, pred_leaf=True)
@@ -52,12 +58,19 @@ def run_predict_leaf(predictor):
            end = classes * num_parallel_tree * (j + 1)
            layer = row[start: end]
            for c in range(classes):
-                tree_group = layer[c * num_parallel_tree:
+                tree_group = layer[c * num_parallel_tree: (c + 1) * num_parallel_tree]
                                   (c+1) * num_parallel_tree]
                assert tree_group.shape[0] == num_parallel_tree
                # no subsampling so tree in same forest should output same
                # leaf.
                assert np.all(tree_group == tree_group[0])
    ntree_limit = 2
    sliced = booster.predict(
        m, pred_leaf=True, ntree_limit=num_parallel_tree * ntree_limit
    )
    first = sliced[0, ...]
    assert first.shape[0] == classes * num_parallel_tree * ntree_limit
    return leaf
--- a/tests/python/test_with_dask.py
+++ b/tests/python/test_with_dask.py
@@ -8,7 +8,8 @@ import asyncio
 from sklearn.datasets import make_classification
 import os
 import subprocess
-from hypothesis import given, settings, note
+import hypothesis
 from hypothesis import given, settings, note, HealthCheck
 from test_updaters import hist_parameter_strategy, exact_parameter_strategy
 if sys.platform.startswith("win"):
@@ -17,6 +18,12 @@ if tm.no_dask()['condition']:
    pytest.skip(msg=tm.no_dask()['reason'], allow_module_level=True)
 if hasattr(HealthCheck, 'function_scoped_fixture'):
    suppress = [HealthCheck.function_scoped_fixture]
 else:
    suppress = hypothesis.utils.conventions.not_set
 try:
    from distributed import LocalCluster, Client, get_client
    from distributed.utils_test import client, loop, cluster_fixture
@@ -668,14 +675,14 @@ class TestWithDask:
    @given(params=hist_parameter_strategy,
           dataset=tm.dataset_strategy)
-    @settings(deadline=None)
+    @settings(deadline=None, suppress_health_check=suppress)
    def test_hist(self, params, dataset, client):
        num_rounds = 30
        self.run_updater_test(client, params, num_rounds, dataset, 'hist')
    @given(params=exact_parameter_strategy,
           dataset=tm.dataset_strategy)
-    @settings(deadline=None)
+    @settings(deadline=None, suppress_health_check=suppress)
    def test_approx(self, client, params, dataset):
        num_rounds = 30
        self.run_updater_test(client, params, num_rounds, dataset, 'approx')
@@ -795,7 +802,6 @@ class TestDaskCallbacks:
                merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')])
                assert len(merged) == 2
    def test_data_initialization(self):
        '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
        generate unnecessary copies of data.
--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -78,6 +78,34 @@ def test_multiclass_classification():
        check_pred(preds4, labels, output_margin=False)
 def test_best_ntree_limit():
    from sklearn.datasets import load_iris
    X, y = load_iris(return_X_y=True)
    def train(booster, forest):
        rounds = 4
        cls = xgb.XGBClassifier(
            n_estimators=rounds, num_parallel_tree=forest, booster=booster
        ).fit(
            X, y, eval_set=[(X, y)], early_stopping_rounds=3
        )
        if forest:
            assert cls.best_ntree_limit == rounds * forest
        else:
            assert cls.best_ntree_limit == 0
        # best_ntree_limit is used by default, assert that under gblinear it's
        # automatically ignored due to being 0.
        cls.predict(X)
    num_parallel_tree = 4
    train('gbtree', num_parallel_tree)
    train('dart', num_parallel_tree)
    train('gblinear', None)
 def test_ranking():
    # generate random data
    x_train = np.random.rand(1000, 10)
@@ -94,6 +122,8 @@ def test_ranking():
    model = xgb.sklearn.XGBRanker(**params)
    model.fit(x_train, y_train, group=train_group,
              eval_set=[(x_valid, y_valid)], eval_group=[valid_group])
    assert model.evals_result()
    pred = model.predict(x_test)
    train_data = xgb.DMatrix(x_train, y_train)
Author	SHA1	Message	Date
Hyunsu Cho	963a17b771	[CI] Upload Doxygen to correct destination	2021-04-13 15:09:53 -07:00
Jiaming Yuan	000292ce6d	Bump release version to 1.3.3. (#6624 )	2021-01-20 19:23:31 +08:00
Jiaming Yuan	d3ec116322	Revert ntree limit fix (#6616 ) (#6622 ) The old (before fix) best_ntree_limit ignores the num_class parameters, which is incorrect. In before we workarounded it in c++ layer to avoid possible breaking changes on other language bindings. But the Python interpretation stayed incorrect. The PR fixed that in Python to consider num_class, but didn't remove the old workaround, so tree calculation in predictor is incorrect, see PredictBatch in CPUPredictor.	2021-01-20 04:20:07 +08:00
Jiaming Yuan	a018028471	Remove type check for solaris. (#6606 )	2021-01-15 18:20:39 +08:00
fis	3e343159ef	Release patch release 1.3.2	2021-01-13 17:35:00 +08:00
Jiaming Yuan	99e802f2ff	Remove duplicated DMatrix. (#6592 ) (#6599 )	2021-01-13 04:44:06 +08:00
Jiaming Yuan	6a29afb480	Fix evaluation result for XGBRanker. (#6594 ) (#6600 ) * Remove duplicated code, which fixes typo `evals_result` -> `evals_result_`.	2021-01-13 04:42:43 +08:00
Jiaming Yuan	8e321adac8	Support Solaris. (#6578 ) (#6588 ) * Add system header. * Remove use of TR1 on Solaris Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>	2021-01-11 02:31:29 +08:00
Jiaming Yuan	d0ec65520a	[backport] Fix `best_ntree_limit` for dart and gblinear. (#6579 ) (#6587 ) * [backport] Fix `best_ntree_limit` for dart and gblinear. (#6579) * Backport num group test fix.	2021-01-11 01:46:05 +08:00
Jiaming Yuan	7aec915dcd	[Backport] Rename `data` to `X` in `predict_proba`. (#6555 ) (#6586 ) * [Breaking] Rename `data` to `X` in `predict_proba`. (#6555) New Scikit-Learn version uses keyword argument, and `X` is the predefined keyword. * Use pip to install latest Python graphviz on Windows CI. * Suppress health check.	2021-01-10 16:05:17 +08:00
Philip Hyunsu Cho	a78d0d4110	Release patch release 1.3.1 (#6543 )	2020-12-21 23:22:32 -08:00
Jiaming Yuan	76c361431f	Remove cupy.array_equal, since it's not compatible with cuPy 7.8 (#6528 ) (#6535 ) Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>	2020-12-20 15:11:50 +08:00
Jiaming Yuan	d95d02132a	Fix handling of print period in EvaluationMonitor (#6499 ) (#6534 ) Co-authored-by: Kirill Shvets <kirill.shvets@intel.com> Co-authored-by: ShvetsKS <33296480+ShvetsKS@users.noreply.github.com> Co-authored-by: Kirill Shvets <kirill.shvets@intel.com>	2020-12-20 15:07:42 +08:00
Jiaming Yuan	7109c6c1f2	[backport] Move metric configuration into booster. (#6504 ) (#6533 )	2020-12-20 10:36:32 +08:00
Jiaming Yuan	bce7ca313c	[backport] Fix `save_best`. (#6523 )	2020-12-18 20:00:29 +08:00
Jiaming Yuan	8be2cd8c91	Enable loading model from <1.0.0 trained with objective='binary:logitraw' (#6517 ) (#6524 ) * Enable loading model from <1.0.0 trained with objective='binary:logitraw' * Add binary:logitraw in model compatibility testing suite * Feedback from @trivialfis: Override ProbToMargin() for LogisticRaw Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com> Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>	2020-12-18 04:10:09 +08:00
Philip Hyunsu Cho	c5f0cdbc72	Hot fix for libgomp vendoring (#6482 ) * Hot fix for libgomp vendoring * Set post0 in setup.py	2020-12-09 10:04:45 -08:00