Bump R package version to 1.7.3. (#8649 )

Bump version to 1.7.3. (#8646 )
[backport] Do not return internal value for get_params. (#8634 ) (#8642 )
2023-01-06 20:34:05 +08:00 · 2023-01-06 17:55:51 +08:00 · 2023-01-06 02:28:39 +08:00 · 2023-01-06 02:28:21 +08:00 · 2023-01-06 01:17:49 +08:00 · 2023-01-05 23:16:10 +08:00
27 changed files with 223 additions and 132 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-project(xgboost LANGUAGES CXX C VERSION 1.7.2)
+project(xgboost LANGUAGES CXX C VERSION 1.7.3)
 include(cmake/Utils.cmake)
 list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
 cmake_policy(SET CMP0022 NEW)
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 1.7.2.1
-Date: 2022-12-08
+Version: 1.7.3.1
+Date: 2023-01-06
 Authors@R: c(
  person("Tianqi", "Chen", role = c("aut"),
         email = "tianqi.tchen@gmail.com"),
@@ -66,5 +66,5 @@ Imports:
    methods,
    data.table (>= 1.9.6),
    jsonlite (>= 1.0),
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.2
 SystemRequirements: GNU make, C++14
--- a/R-package/configure
+++ b/R-package/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for xgboost 1.7.2.
+# Generated by GNU Autoconf 2.69 for xgboost 1.7.3.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -576,8 +576,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='xgboost'
 PACKAGE_TARNAME='xgboost'
-PACKAGE_VERSION='1.7.2'
-PACKAGE_STRING='xgboost 1.7.2'
+PACKAGE_VERSION='1.7.3'
+PACKAGE_STRING='xgboost 1.7.3'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''

@@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then
  # Omit some internal or obsolete options to make the list less imposing.
  # This message is too long to be a string in the A/UX 3.1 sh.
  cat <<_ACEOF
-\`configure' configures xgboost 1.7.2 to adapt to many kinds of systems.
+\`configure' configures xgboost 1.7.3 to adapt to many kinds of systems.

 Usage: $0 [OPTION]... [VAR=VALUE]...

@@ -1257,7 +1257,7 @@ fi

 if test -n "$ac_init_help"; then
  case $ac_init_help in
-     short | recursive ) echo "Configuration of xgboost 1.7.2:";;
+     short | recursive ) echo "Configuration of xgboost 1.7.3:";;
   esac
  cat <<\_ACEOF

@@ -1336,7 +1336,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
  cat <<\_ACEOF
-xgboost configure 1.7.2
+xgboost configure 1.7.3
 generated by GNU Autoconf 2.69

 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.

-It was created by xgboost $as_me 1.7.2, which was
+It was created by xgboost $as_me 1.7.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  $ $0 $@
@@ -3294,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by xgboost $as_me 1.7.2, which was
+This file was extended by xgboost $as_me 1.7.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was

  CONFIG_FILES    = $CONFIG_FILES
@@ -3347,7 +3347,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-xgboost config.status 1.7.2
+xgboost config.status 1.7.3
 configured by $0, generated by GNU Autoconf 2.69,
  with options \\"\$ac_cs_config\\"

--- a/R-package/configure.ac
+++ b/R-package/configure.ac
@@ -2,7 +2,7 @@

 AC_PREREQ(2.69)

-AC_INIT([xgboost],[1.7.2],[],[xgboost],[])
+AC_INIT([xgboost],[1.7.3],[],[xgboost],[])

 # Use this line to set CC variable to a C compiler
 AC_PROG_CC
--- a/doc/tutorials/categorical.rst
+++ b/doc/tutorials/categorical.rst
@@ -138,11 +138,11 @@ Miscellaneous

 By default, XGBoost assumes input categories are integers starting from 0 till the number
 of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid
-values due to mistakes or missing values. It can be negative value, integer values that
-can not be accurately represented by 32-bit floating point, or values that are larger than
-actual number of unique categories.  During training this is validated but for prediction
-it's treated as the same as missing value for performance reasons.  Lastly, missing values
-are treated as the same as numerical features (using the learned split direction).
+values due to mistakes or missing values in training dataset. It can be negative value,
+integer values that can not be accurately represented by 32-bit floating point, or values
+that are larger than actual number of unique categories.  During training this is
+validated but for prediction it's treated as the same as not-chosen category for
+performance reasons.


 **********
--- a/include/xgboost/version_config.h
+++ b/include/xgboost/version_config.h
@@ -6,6 +6,6 @@

 #define XGBOOST_VER_MAJOR 1
 #define XGBOOST_VER_MINOR 7
-#define XGBOOST_VER_PATCH 2
+#define XGBOOST_VER_PATCH 3

 #endif  // XGBOOST_VERSION_CONFIG_H_
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -6,7 +6,7 @@

    <groupId>ml.dmlc</groupId>
    <artifactId>xgboost-jvm_2.12</artifactId>
-    <version>1.7.2</version>
+    <version>1.7.3</version>
    <packaging>pom</packaging>
    <name>XGBoost JVM Package</name>
    <description>JVM Package for XGBoost</description>
--- a/jvm-packages/xgboost4j-example/pom.xml
+++ b/jvm-packages/xgboost4j-example/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.2</version>
+        <version>1.7.3</version>
    </parent>
    <artifactId>xgboost4j-example_2.12</artifactId>
-    <version>1.7.2</version>
+    <version>1.7.3</version>
    <packaging>jar</packaging>
    <build>
        <plugins>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
-            <version>1.7.2</version>
+            <version>1.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
-            <version>1.7.2</version>
+            <version>1.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-flink/pom.xml
+++ b/jvm-packages/xgboost4j-flink/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.2</version>
+        <version>1.7.3</version>
    </parent>
    <artifactId>xgboost4j-flink_2.12</artifactId>
-    <version>1.7.2</version>
+    <version>1.7.3</version>
    <build>
        <plugins>
            <plugin>
@@ -26,7 +26,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.7.2</version>
+            <version>1.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.2</version>
+        <version>1.7.3</version>
    </parent>
    <artifactId>xgboost4j-gpu_2.12</artifactId>
-    <version>1.7.2</version>
+    <version>1.7.3</version>
    <packaging>jar</packaging>

    <dependencies>
--- a/jvm-packages/xgboost4j-spark-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.2</version>
+        <version>1.7.3</version>
    </parent>
    <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
-            <version>1.7.2</version>
+            <version>1.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j-spark/pom.xml
+++ b/jvm-packages/xgboost4j-spark/pom.xml
@@ -6,7 +6,7 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.2</version>
+        <version>1.7.3</version>
    </parent>
    <artifactId>xgboost4j-spark_2.12</artifactId>
    <build>
@@ -24,7 +24,7 @@
        <dependency>
            <groupId>ml.dmlc</groupId>
            <artifactId>xgboost4j_${scala.binary.version}</artifactId>
-            <version>1.7.2</version>
+            <version>1.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
--- a/jvm-packages/xgboost4j-tester/get_iris.py
+++ b/jvm-packages/xgboost4j-tester/get_iris.py
@@ -1,9 +1,9 @@
-from sklearn.datasets import load_iris
 import numpy as np
 import pandas
+from sklearn.datasets import load_iris

 X, y = load_iris(return_X_y=True)
-y = y.astype(np.int)
+y = y.astype(np.int32)
 df = pandas.DataFrame(data=X, columns=['sepal length', 'sepal width', 'petal length', 'petal width'])
 class_id_to_name = {0:'Iris-setosa', 1:'Iris-versicolor', 2:'Iris-virginica'}
 df['class'] = np.vectorize(class_id_to_name.get)(y)
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -6,10 +6,10 @@
    <parent>
        <groupId>ml.dmlc</groupId>
        <artifactId>xgboost-jvm_2.12</artifactId>
-        <version>1.7.2</version>
+        <version>1.7.3</version>
    </parent>
    <artifactId>xgboost4j_2.12</artifactId>
-    <version>1.7.2</version>
+    <version>1.7.3</version>
    <packaging>jar</packaging>

    <dependencies>
--- a/python-package/xgboost/VERSION
+++ b/python-package/xgboost/VERSION
@@ -1 +1 @@
-1.7.2
+1.7.3
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -674,7 +674,7 @@ class XGBModel(XGBModelBase):
                    self.kwargs = {}
                self.kwargs[key] = value

-        if hasattr(self, "_Booster"):
+        if self.__sklearn_is_fitted__():
            parameters = self.get_xgb_params()
            self.get_booster().set_param(parameters)

@@ -701,39 +701,12 @@ class XGBModel(XGBModelBase):
                np.iinfo(np.int32).max
            )

-        def parse_parameter(value: Any) -> Optional[Union[int, float, str]]:
-            for t in (int, float, str):
-                try:
-                    ret = t(value)
-                    return ret
-                except ValueError:
-                    continue
-            return None
-
-        # Get internal parameter values
-        try:
-            config = json.loads(self.get_booster().save_config())
-            stack = [config]
-            internal = {}
-            while stack:
-                obj = stack.pop()
-                for k, v in obj.items():
-                    if k.endswith("_param"):
-                        for p_k, p_v in v.items():
-                            internal[p_k] = p_v
-                    elif isinstance(v, dict):
-                        stack.append(v)
-
-            for k, v in internal.items():
-                if k in params and params[k] is None:
-                    params[k] = parse_parameter(v)
-        except ValueError:
-            pass
        return params

    def get_xgb_params(self) -> Dict[str, Any]:
        """Get xgboost specific parameters."""
-        params = self.get_params()
+        params: Dict[str, Any] = self.get_params()
+
        # Parameters that should not go into native learner.
        wrapper_specific = {
            "importance_type",
@@ -750,6 +723,7 @@ class XGBModel(XGBModelBase):
        for k, v in params.items():
            if k not in wrapper_specific and not callable(v):
                filtered[k] = v
+
        return filtered

    def get_num_boosting_rounds(self) -> int:
@@ -1070,7 +1044,7 @@ class XGBModel(XGBModelBase):
        # error with incompatible data type.
        # Inplace predict doesn't handle as many data types as DMatrix, but it's
        # sufficient for dask interface where input is simpiler.
-        predictor = self.get_params().get("predictor", None)
+        predictor = self.get_xgb_params().get("predictor", None)
        if predictor in ("auto", None) and self.booster != "gblinear":
            return True
        return False
@@ -1336,7 +1310,7 @@ class XGBModel(XGBModelBase):
        -------
        coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]``
        """
-        if self.get_params()["booster"] != "gblinear":
+        if self.get_xgb_params()["booster"] != "gblinear":
            raise AttributeError(
                f"Coefficients are not defined for Booster type {self.booster}"
            )
@@ -1366,7 +1340,7 @@ class XGBModel(XGBModelBase):
        -------
        intercept_ : array of shape ``(1,)`` or ``[n_classes]``
        """
-        if self.get_params()["booster"] != "gblinear":
+        if self.get_xgb_params()["booster"] != "gblinear":
            raise AttributeError(
                f"Intercept (bias) is not defined for Booster type {self.booster}"
            )
--- a/src/common/categorical.h
+++ b/src/common/categorical.h
@@ -48,20 +48,21 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) {
  return cat < 0 || cat >= kMaxCat;
 }

-/* \brief Whether should it traverse to left branch of a tree.
+/**
+ * \brief Whether should it traverse to left branch of a tree.
 *
- *  For one hot split, go to left if it's NOT the matching category.
+ *   Go to left if it's NOT the matching category, which matches one-hot encoding.
 */
-template <bool validate = true>
-inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat, bool dft_left) {
+inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat) {
  KCatBitField const s_cats(cats);
-  // FIXME: Size() is not accurate since it represents the size of bit set instead of
-  // actual number of categories.
-  if (XGBOOST_EXPECT(validate && (InvalidCat(cat) || cat >= s_cats.Size()), false)) {
-    return dft_left;
+  if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
+    return true;
  }

  auto pos = KCatBitField::ToBitPos(cat);
+  // If the input category is larger than the size of the bit field, it implies that the
+  // category is not chosen. Otherwise the bit field would have the category instead of
+  // being smaller than the category value.
  if (pos.int_pos >= cats.size()) {
    return true;
  }
--- a/src/common/linalg_op.h
+++ b/src/common/linalg_op.h
@@ -62,7 +62,7 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
 #endif  // !defined(XGBOOST_USE_CUDA)

 template <typename T, std::int32_t kDim>
-auto cbegin(TensorView<T, kDim> v) {  // NOLINT
+auto cbegin(TensorView<T, kDim> const& v) {  // NOLINT
  auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
    return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
  });
@@ -70,19 +70,19 @@ auto cbegin(TensorView<T, kDim> v) {  // NOLINT
 }

 template <typename T, std::int32_t kDim>
-auto cend(TensorView<T, kDim> v) {  // NOLINT
+auto cend(TensorView<T, kDim> const& v) {  // NOLINT
  return cbegin(v) + v.Size();
 }

 template <typename T, std::int32_t kDim>
-auto begin(TensorView<T, kDim> v) {  // NOLINT
+auto begin(TensorView<T, kDim>& v) {  // NOLINT
  auto it = common::MakeIndexTransformIter(
      [&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
  return it;
 }

 template <typename T, std::int32_t kDim>
-auto end(TensorView<T, kDim> v) {  // NOLINT
+auto end(TensorView<T, kDim>& v) {  // NOLINT
  return begin(v) + v.Size();
 }
 }  // namespace linalg
--- a/src/common/partition_builder.h
+++ b/src/common/partition_builder.h
@@ -144,7 +144,7 @@ class PartitionBuilder {
        auto gidx = gidx_calc(ridx);
        bool go_left = default_left;
        if (gidx > -1) {
-          go_left = Decision(node_cats, cut_values[gidx], default_left);
+          go_left = Decision(node_cats, cut_values[gidx]);
        }
        return go_left;
      } else {
@@ -157,7 +157,7 @@ class PartitionBuilder {
      bool go_left = default_left;
      if (gidx > -1) {
        if (is_cat) {
-          go_left = Decision(node_cats, cut_values[gidx], default_left);
+          go_left = Decision(node_cats, cut_values[gidx]);
        } else {
          go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;
        }
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -28,6 +28,7 @@
 #include "xgboost/logging.h"
 #include "xgboost/objective.h"
 #include "xgboost/predictor.h"
+#include "xgboost/string_view.h"
 #include "xgboost/tree_updater.h"

 namespace xgboost {
@@ -395,23 +396,36 @@ void GBTree::LoadConfig(Json const& in) {
  tparam_.process_type = TreeProcessType::kDefault;
  int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
  if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) {
-    LOG(WARNING)
-        << "Loading from a raw memory buffer on CPU only machine.  "
-           "Changing predictor to auto.";
+    LOG(WARNING) << "Loading from a raw memory buffer on CPU only machine.  "
+                    "Changing predictor to auto.";
    tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}});
  }
+
+  auto msg = StringView{
+      R"(
+  Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only
+  machine. Consider using `save_model/load_model` instead. See:
+
+    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
+
+  for more details about differences between saving model and serializing.)"};
+
  if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) {
    tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}});
-    LOG(WARNING)
-        << "Loading from a raw memory buffer on CPU only machine.  "
-           "Changing tree_method to hist.";
+    LOG(WARNING) << msg << "  Changing `tree_method` to `hist`.";
  }

  auto const& j_updaters = get<Object const>(in["updater"]);
  updaters_.clear();
+
  for (auto const& kv : j_updaters) {
-    std::unique_ptr<TreeUpdater> up(
-        TreeUpdater::Create(kv.first, ctx_, model_.learner_model_param->task));
+    auto name = kv.first;
+    if (n_gpus == 0 && name == "grow_gpu_hist") {
+      name = "grow_quantile_histmaker";
+      LOG(WARNING) << "Changing updater from `grow_gpu_hist` to `grow_quantile_histmaker`.";
+    }
+    std::unique_ptr<TreeUpdater> up{
+        TreeUpdater::Create(name, ctx_, model_.learner_model_param->task)};
    up->LoadConfig(kv.second);
    updaters_.push_back(std::move(up));
  }
--- a/src/predictor/predict_fn.h
+++ b/src/predictor/predict_fn.h
@@ -18,9 +18,7 @@ inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bs
    if (has_categorical && common::IsCat(cats.split_type, nid)) {
      auto node_categories =
          cats.categories.subspan(cats.node_ptr[nid].beg, cats.node_ptr[nid].size);
-      return common::Decision<true>(node_categories, fvalue, node.DefaultLeft())
-                 ? node.LeftChild()
-                 : node.RightChild();
+      return common::Decision(node_categories, fvalue) ? node.LeftChild() : node.RightChild();
    } else {
      return node.LeftChild() + !(fvalue < node.SplitCond());
    }
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -403,8 +403,7 @@ struct GPUHistMakerDevice {
            go_left = data.split_node.DefaultLeft();
          } else {
            if (data.split_type == FeatureType::kCategorical) {
-              go_left = common::Decision<false>(data.node_cats.Bits(), cut_value,
-                                                data.split_node.DefaultLeft());
+              go_left = common::Decision(data.node_cats.Bits(), cut_value);
            } else {
              go_left = cut_value <= data.split_node.SplitCond();
            }
@@ -481,7 +480,7 @@ struct GPUHistMakerDevice {
          if (common::IsCat(d_feature_types, position)) {
            auto node_cats = categories.subspan(categories_segments[position].beg,
                                                categories_segments[position].size);
-            go_left = common::Decision<false>(node_cats, element, node.DefaultLeft());
+            go_left = common::Decision(node_cats, element);
          } else {
            go_left = element <= node.SplitCond();
          }
--- a/tests/buildkite/test-cpp-gpu.sh
+++ b/tests/buildkite/test-cpp-gpu.sh
@@ -4,7 +4,7 @@ set -euo pipefail

 source tests/buildkite/conftest.sh

-echo "--- Run Google Tests with CUDA, using 4 GPUs"
+echo "--- Run Google Tests with CUDA, using a GPU"
 buildkite-agent artifact download "build/testxgboost" . --step build-cuda
 chmod +x build/testxgboost
 tests/ci_build/ci_build.sh gpu nvidia-docker \
@@ -12,11 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
  --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
  build/testxgboost

-echo "--- Run Google Tests with CUDA, using 4 GPUs, RMM enabled"
-rm -rfv build/
-buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
-chmod +x build/testxgboost
-tests/ci_build/ci_build.sh rmm nvidia-docker \
-  --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
-  --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
-  "source activate gpu_test && build/testxgboost --use-rmm-pool"
+# Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved
+# echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
+# rm -rfv build/
+# buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
+# chmod +x build/testxgboost
+# tests/ci_build/ci_build.sh rmm nvidia-docker \
+#   --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
+#   --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
+#   "source activate gpu_test && build/testxgboost --use-rmm-pool"
--- a/tests/cpp/common/test_categorical.cc
+++ b/tests/cpp/common/test_categorical.cc
@@ -1,11 +1,14 @@
 /*!
- * Copyright 2021 by XGBoost Contributors
+ * Copyright 2021-2022 by XGBoost Contributors
 */
 #include <gtest/gtest.h>
+#include <xgboost/json.h>
+#include <xgboost/learner.h>

 #include <limits>

 #include "../../../src/common/categorical.h"
+#include "../helpers.h"

 namespace xgboost {
 namespace common {
@@ -15,29 +18,76 @@ TEST(Categorical, Decision) {

  ASSERT_TRUE(common::InvalidCat(a));
  std::vector<uint32_t> cats(256, 0);
-  ASSERT_TRUE(Decision(cats, a, true));
+  ASSERT_TRUE(Decision(cats, a));

  // larger than size
  a = 256;
-  ASSERT_TRUE(Decision(cats, a, true));
+  ASSERT_TRUE(Decision(cats, a));

  // negative
  a = -1;
-  ASSERT_TRUE(Decision(cats, a, true));
+  ASSERT_TRUE(Decision(cats, a));

  CatBitField bits{cats};
  bits.Set(0);
  a = -0.5;
-  ASSERT_TRUE(Decision(cats, a, true));
+  ASSERT_TRUE(Decision(cats, a));

  // round toward 0
  a = 0.5;
-  ASSERT_FALSE(Decision(cats, a, true));
+  ASSERT_FALSE(Decision(cats, a));

  // valid
  a = 13;
  bits.Set(a);
-  ASSERT_FALSE(Decision(bits.Bits(), a, true));
+  ASSERT_FALSE(Decision(bits.Bits(), a));
+}
+
+/**
+ * Test for running inference with input category greater than the one stored in tree.
+ */
+TEST(Categorical, MinimalSet) {
+  std::size_t constexpr kRows = 256, kCols = 1, kCat = 3;
+  std::vector<FeatureType> types{FeatureType::kCategorical};
+  auto Xy =
+      RandomDataGenerator{kRows, kCols, 0.0}.Type(types).MaxCategory(kCat).GenerateDMatrix(true);
+
+  std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+  learner->SetParam("max_depth", "1");
+  learner->SetParam("tree_method", "hist");
+  learner->Configure();
+  learner->UpdateOneIter(0, Xy);
+
+  Json model{Object{}};
+  learner->SaveModel(&model);
+  auto tree = model["learner"]["gradient_booster"]["model"]["trees"][0];
+  ASSERT_GE(get<I32Array const>(tree["categories"]).size(), 1);
+  auto v = get<I32Array const>(tree["categories"])[0];
+
+  HostDeviceVector<float> predt;
+  {
+    std::vector<float> data{static_cast<float>(kCat),
+                            static_cast<float>(kCat + 1), 32.0f, 33.0f, 34.0f};
+    auto test = GetDMatrixFromData(data, data.size(), kCols);
+    learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
+    ASSERT_EQ(predt.Size(), data.size());
+    auto const& h_predt = predt.ConstHostSpan();
+    for (auto v : h_predt) {
+      ASSERT_EQ(v, 1);  // left child of root node
+    }
+  }
+
+  {
+    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
+    learner->LoadModel(model);
+    std::vector<float> data = {static_cast<float>(v)};
+    auto test = GetDMatrixFromData(data, data.size(), kCols);
+    learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
+    auto const& h_predt = predt.ConstHostSpan();
+    for (auto v : h_predt) {
+      ASSERT_EQ(v, 2);  // right child of root node
+    }
+  }
 }
 }  // namespace common
 }  // namespace xgboost
--- a/tests/python/test_with_pandas.py
+++ b/tests/python/test_with_pandas.py
@@ -112,7 +112,6 @@ class TestPandas:

        # test Index as columns
        df = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=pd.Index([1, 2]))
-        print(df.columns, isinstance(df.columns, pd.Index))
        Xy = xgb.DMatrix(df)
        np.testing.assert_equal(np.array(Xy.feature_names), np.array(["1", "2"]))

--- a/tests/python/test_with_shap.py
+++ b/tests/python/test_with_shap.py
@@ -4,7 +4,7 @@ import pytest

 try:
    import shap
-except ImportError:
+except Exception:
    shap = None
    pass

--- a/tests/python/test_with_sklearn.py
+++ b/tests/python/test_with_sklearn.py
@@ -2,6 +2,7 @@ import collections
 import importlib.util
 import json
 import os
+import pickle
 import random
 import tempfile
 from typing import Callable, Optional
@@ -636,26 +637,74 @@ def test_sklearn_n_jobs():

 def test_parameters_access():
    from sklearn import datasets
-    params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1}
+
+    params = {"updater": "grow_gpu_hist", "subsample": 0.5, "n_jobs": -1}
    clf = xgb.XGBClassifier(n_estimators=1000, **params)
-    assert clf.get_params()['updater'] == 'grow_gpu_hist'
-    assert clf.get_params()['subsample'] == .5
-    assert clf.get_params()['n_estimators'] == 1000
+    assert clf.get_params()["updater"] == "grow_gpu_hist"
+    assert clf.get_params()["subsample"] == 0.5
+    assert clf.get_params()["n_estimators"] == 1000

    clf = xgb.XGBClassifier(n_estimators=1, nthread=4)
    X, y = datasets.load_iris(return_X_y=True)
    clf.fit(X, y)

    config = json.loads(clf.get_booster().save_config())
-    assert int(config['learner']['generic_param']['nthread']) == 4
+    assert int(config["learner"]["generic_param"]["nthread"]) == 4

    clf.set_params(nthread=16)
    config = json.loads(clf.get_booster().save_config())
-    assert int(config['learner']['generic_param']['nthread']) == 16
+    assert int(config["learner"]["generic_param"]["nthread"]) == 16

    clf.predict(X)
    config = json.loads(clf.get_booster().save_config())
-    assert int(config['learner']['generic_param']['nthread']) == 16
+    assert int(config["learner"]["generic_param"]["nthread"]) == 16
+
+    clf = xgb.XGBClassifier(n_estimators=2)
+    assert clf.tree_method is None
+    assert clf.get_params()["tree_method"] is None
+    clf.fit(X, y)
+    assert clf.get_params()["tree_method"] is None
+
+    def save_load(clf: xgb.XGBClassifier) -> xgb.XGBClassifier:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, "model.json")
+            clf.save_model(path)
+            clf = xgb.XGBClassifier()
+            clf.load_model(path)
+        return clf
+
+    def get_tm(clf: xgb.XGBClassifier) -> str:
+        tm = json.loads(clf.get_booster().save_config())["learner"]["gradient_booster"][
+            "gbtree_train_param"
+        ]["tree_method"]
+        return tm
+
+    assert get_tm(clf) == "exact"
+
+    clf = pickle.loads(pickle.dumps(clf))
+
+    assert clf.tree_method is None
+    assert clf.n_estimators == 2
+    assert clf.get_params()["tree_method"] is None
+    assert clf.get_params()["n_estimators"] == 2
+    assert get_tm(clf) == "exact"  # preserved for pickle
+
+    clf = save_load(clf)
+
+    assert clf.tree_method is None
+    assert clf.n_estimators == 2
+    assert clf.get_params()["tree_method"] is None
+    assert clf.get_params()["n_estimators"] == 2
+    assert get_tm(clf) == "auto"  # discarded for save/load_model
+
+    clf.set_params(tree_method="hist")
+    assert clf.get_params()["tree_method"] == "hist"
+    clf = pickle.loads(pickle.dumps(clf))
+    assert clf.get_params()["tree_method"] == "hist"
+    clf = save_load(clf)
+    # FIXME(jiamingy): We should remove this behavior once we remove parameters
+    # serialization for skl save/load_model.
+    assert clf.get_params()["tree_method"] == "hist"


 def test_kwargs_error():
@@ -695,13 +744,19 @@ def test_sklearn_clone():

 def test_sklearn_get_default_params():
    from sklearn.datasets import load_digits
+
    digits_2class = load_digits(n_class=2)
-    X = digits_2class['data']
-    y = digits_2class['target']
+    X = digits_2class["data"]
+    y = digits_2class["target"]
    cls = xgb.XGBClassifier()
-    assert cls.get_params()['base_score'] is None
+    assert cls.get_params()["base_score"] is None
    cls.fit(X[:4, ...], y[:4, ...])
-    assert cls.get_params()['base_score'] is not None
+    base_score = float(
+        json.loads(cls.get_booster().save_config())["learner"]["learner_model_param"][
+            "base_score"
+        ]
+    )
+    np.testing.assert_equal(base_score, 0.5)


 def run_validation_weights(model):
@@ -1029,9 +1084,9 @@ def test_pandas_input():

    clf_isotonic = CalibratedClassifierCV(model, cv="prefit", method="isotonic")
    clf_isotonic.fit(train, target)
-    assert isinstance(
-        clf_isotonic.calibrated_classifiers_[0].base_estimator, xgb.XGBClassifier
-    )
+    clf = clf_isotonic.calibrated_classifiers_[0]
+    est = clf.estimator if hasattr(clf, "estimator") else clf.base_estimator
+    assert isinstance(est, xgb.XGBClassifier)
    np.testing.assert_allclose(np.array(clf_isotonic.classes_), np.array([0, 1]))
Author	SHA1	Message	Date
Jiaming Yuan	ccf43d4ba0	Bump R package version to 1.7.3. (#8649 )	2023-01-06 20:34:05 +08:00
Jiaming Yuan	dd58c2ac47	Bump version to 1.7.3. (#8646 )	2023-01-06 17:55:51 +08:00
Jiaming Yuan	899e4c8988	[backport] Do not return internal value for `get_params`. (#8634 ) (#8642 )	2023-01-06 02:28:39 +08:00
Jiaming Yuan	a2085bf223	[backport] Fix loading GPU pickle with a CPU-only xgboost distribution. (#8632 ) (#8641 ) We can handle loading the pickle on a CPU-only machine if the XGBoost is built with CUDA enabled (Linux and Windows PyPI package), but not if the distribution is CPU-only (macOS PyPI package).	2023-01-06 02:28:21 +08:00
Jiaming Yuan	067b704e58	[backport] Fix inference with categorical feature. (#8591 ) (#8602 ) (#8638 ) * Fix inference with categorical feature. (#8591) * Fix windows build on buildkite. (#8602) * workaround.	2023-01-06 01:17:49 +08:00
Jiaming Yuan	1a834b2b85	Fix linalg iterator. (#8603 ) (#8639 )	2023-01-05 23:16:10 +08:00
Jiaming Yuan	162b48a1a4	[backport] [CI] Disable gtest with RMM (#8620 ) (#8640 ) Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>	2023-01-05 23:13:45 +08:00
Jiaming Yuan	83a078b7e5	[backport] Fix sklearn test that calls a removed field (#8579 ) (#8636 ) Co-authored-by: Rong Ou <rong.ou@gmail.com>	2023-01-05 21:17:05 +08:00
Jiaming Yuan	575fba651b	[backport] [CI] Fix CI with updated dependencies. (#8631 ) (#8635 )	2023-01-05 19:10:58 +08:00
@@ -1 +1 @@
 .7.2
 .7.3