Compare commits

...

9 Commits

Author SHA1 Message Date
Jiaming Yuan
ccf43d4ba0 Bump R package version to 1.7.3. (#8649) 2023-01-06 20:34:05 +08:00
Jiaming Yuan
dd58c2ac47 Bump version to 1.7.3. (#8646) 2023-01-06 17:55:51 +08:00
Jiaming Yuan
899e4c8988 [backport] Do not return internal value for get_params. (#8634) (#8642) 2023-01-06 02:28:39 +08:00
Jiaming Yuan
a2085bf223 [backport] Fix loading GPU pickle with a CPU-only xgboost distribution. (#8632) (#8641)
We can handle loading the pickle on a CPU-only machine if the XGBoost is built with CUDA
enabled (Linux and Windows PyPI package), but not if the distribution is CPU-only (macOS
PyPI package).
2023-01-06 02:28:21 +08:00
Jiaming Yuan
067b704e58 [backport] Fix inference with categorical feature. (#8591) (#8602) (#8638)
* Fix inference with categorical feature. (#8591)

* Fix windows build on buildkite. (#8602)

* workaround.
2023-01-06 01:17:49 +08:00
Jiaming Yuan
1a834b2b85 Fix linalg iterator. (#8603) (#8639) 2023-01-05 23:16:10 +08:00
Jiaming Yuan
162b48a1a4 [backport] [CI] Disable gtest with RMM (#8620) (#8640)
Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
2023-01-05 23:13:45 +08:00
Jiaming Yuan
83a078b7e5 [backport] Fix sklearn test that calls a removed field (#8579) (#8636)
Co-authored-by: Rong Ou <rong.ou@gmail.com>
2023-01-05 21:17:05 +08:00
Jiaming Yuan
575fba651b [backport] [CI] Fix CI with updated dependencies. (#8631) (#8635) 2023-01-05 19:10:58 +08:00
27 changed files with 223 additions and 132 deletions

View File

@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR) cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 1.7.2) project(xgboost LANGUAGES CXX C VERSION 1.7.3)
include(cmake/Utils.cmake) include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0022 NEW)

View File

@@ -1,8 +1,8 @@
Package: xgboost Package: xgboost
Type: Package Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 1.7.2.1 Version: 1.7.3.1
Date: 2022-12-08 Date: 2023-01-06
Authors@R: c( Authors@R: c(
person("Tianqi", "Chen", role = c("aut"), person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"), email = "tianqi.tchen@gmail.com"),
@@ -66,5 +66,5 @@ Imports:
methods, methods,
data.table (>= 1.9.6), data.table (>= 1.9.6),
jsonlite (>= 1.0), jsonlite (>= 1.0),
RoxygenNote: 7.2.1 RoxygenNote: 7.2.2
SystemRequirements: GNU make, C++14 SystemRequirements: GNU make, C++14

18
R-package/configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for xgboost 1.7.2. # Generated by GNU Autoconf 2.69 for xgboost 1.7.3.
# #
# #
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -576,8 +576,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='xgboost' PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost' PACKAGE_TARNAME='xgboost'
PACKAGE_VERSION='1.7.2' PACKAGE_VERSION='1.7.3'
PACKAGE_STRING='xgboost 1.7.2' PACKAGE_STRING='xgboost 1.7.3'
PACKAGE_BUGREPORT='' PACKAGE_BUGREPORT=''
PACKAGE_URL='' PACKAGE_URL=''
@@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures xgboost 1.7.2 to adapt to many kinds of systems. \`configure' configures xgboost 1.7.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1257,7 +1257,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of xgboost 1.7.2:";; short | recursive ) echo "Configuration of xgboost 1.7.3:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@@ -1336,7 +1336,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
xgboost configure 1.7.2 xgboost configure 1.7.3
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by xgboost $as_me 1.7.2, which was It was created by xgboost $as_me 1.7.3, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
@@ -3294,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by xgboost $as_me 1.7.2, which was This file was extended by xgboost $as_me 1.7.3, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@@ -3347,7 +3347,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
xgboost config.status 1.7.2 xgboost config.status 1.7.3
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"

View File

@@ -2,7 +2,7 @@
AC_PREREQ(2.69) AC_PREREQ(2.69)
AC_INIT([xgboost],[1.7.2],[],[xgboost],[]) AC_INIT([xgboost],[1.7.3],[],[xgboost],[])
# Use this line to set CC variable to a C compiler # Use this line to set CC variable to a C compiler
AC_PROG_CC AC_PROG_CC

View File

@@ -138,11 +138,11 @@ Miscellaneous
By default, XGBoost assumes input categories are integers starting from 0 till the number By default, XGBoost assumes input categories are integers starting from 0 till the number
of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid
values due to mistakes or missing values. It can be negative value, integer values that values due to mistakes or missing values in training dataset. It can be negative value,
can not be accurately represented by 32-bit floating point, or values that are larger than integer values that can not be accurately represented by 32-bit floating point, or values
actual number of unique categories. During training this is validated but for prediction that are larger than actual number of unique categories. During training this is
it's treated as the same as missing value for performance reasons. Lastly, missing values validated but for prediction it's treated as the same as not-chosen category for
are treated as the same as numerical features (using the learned split direction). performance reasons.
********** **********

View File

@@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MAJOR 1
#define XGBOOST_VER_MINOR 7 #define XGBOOST_VER_MINOR 7
#define XGBOOST_VER_PATCH 2 #define XGBOOST_VER_PATCH 3
#endif // XGBOOST_VERSION_CONFIG_H_ #endif // XGBOOST_VERSION_CONFIG_H_

View File

@@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>XGBoost JVM Package</name> <name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description> <description>JVM Package for XGBoost</description>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
</parent> </parent>
<artifactId>xgboost4j-example_2.12</artifactId> <artifactId>xgboost4j-example_2.12</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<build> <build>
<plugins> <plugins>
@@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
</parent> </parent>
<artifactId>xgboost4j-flink_2.12</artifactId> <artifactId>xgboost4j-flink_2.12</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@@ -26,7 +26,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
</parent> </parent>
<artifactId>xgboost4j-gpu_2.12</artifactId> <artifactId>xgboost4j-gpu_2.12</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
</parent> </parent>
<artifactId>xgboost4j-spark-gpu_2.12</artifactId> <artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<build> <build>
@@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
</parent> </parent>
<artifactId>xgboost4j-spark_2.12</artifactId> <artifactId>xgboost4j-spark_2.12</artifactId>
<build> <build>
@@ -24,7 +24,7 @@
<dependency> <dependency>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>

View File

@@ -1,9 +1,9 @@
from sklearn.datasets import load_iris
import numpy as np import numpy as np
import pandas import pandas
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True) X, y = load_iris(return_X_y=True)
y = y.astype(np.int) y = y.astype(np.int32)
df = pandas.DataFrame(data=X, columns=['sepal length', 'sepal width', 'petal length', 'petal width']) df = pandas.DataFrame(data=X, columns=['sepal length', 'sepal width', 'petal length', 'petal width'])
class_id_to_name = {0:'Iris-setosa', 1:'Iris-versicolor', 2:'Iris-virginica'} class_id_to_name = {0:'Iris-setosa', 1:'Iris-versicolor', 2:'Iris-virginica'}
df['class'] = np.vectorize(class_id_to_name.get)(y) df['class'] = np.vectorize(class_id_to_name.get)(y)

View File

@@ -6,10 +6,10 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId> <artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
</parent> </parent>
<artifactId>xgboost4j_2.12</artifactId> <artifactId>xgboost4j_2.12</artifactId>
<version>1.7.2</version> <version>1.7.3</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@@ -1 +1 @@
1.7.2 1.7.3

View File

@@ -674,7 +674,7 @@ class XGBModel(XGBModelBase):
self.kwargs = {} self.kwargs = {}
self.kwargs[key] = value self.kwargs[key] = value
if hasattr(self, "_Booster"): if self.__sklearn_is_fitted__():
parameters = self.get_xgb_params() parameters = self.get_xgb_params()
self.get_booster().set_param(parameters) self.get_booster().set_param(parameters)
@@ -701,39 +701,12 @@ class XGBModel(XGBModelBase):
np.iinfo(np.int32).max np.iinfo(np.int32).max
) )
def parse_parameter(value: Any) -> Optional[Union[int, float, str]]:
for t in (int, float, str):
try:
ret = t(value)
return ret
except ValueError:
continue
return None
# Get internal parameter values
try:
config = json.loads(self.get_booster().save_config())
stack = [config]
internal = {}
while stack:
obj = stack.pop()
for k, v in obj.items():
if k.endswith("_param"):
for p_k, p_v in v.items():
internal[p_k] = p_v
elif isinstance(v, dict):
stack.append(v)
for k, v in internal.items():
if k in params and params[k] is None:
params[k] = parse_parameter(v)
except ValueError:
pass
return params return params
def get_xgb_params(self) -> Dict[str, Any]: def get_xgb_params(self) -> Dict[str, Any]:
"""Get xgboost specific parameters.""" """Get xgboost specific parameters."""
params = self.get_params() params: Dict[str, Any] = self.get_params()
# Parameters that should not go into native learner. # Parameters that should not go into native learner.
wrapper_specific = { wrapper_specific = {
"importance_type", "importance_type",
@@ -750,6 +723,7 @@ class XGBModel(XGBModelBase):
for k, v in params.items(): for k, v in params.items():
if k not in wrapper_specific and not callable(v): if k not in wrapper_specific and not callable(v):
filtered[k] = v filtered[k] = v
return filtered return filtered
def get_num_boosting_rounds(self) -> int: def get_num_boosting_rounds(self) -> int:
@@ -1070,7 +1044,7 @@ class XGBModel(XGBModelBase):
# error with incompatible data type. # error with incompatible data type.
# Inplace predict doesn't handle as many data types as DMatrix, but it's # Inplace predict doesn't handle as many data types as DMatrix, but it's
# sufficient for dask interface where input is simpiler. # sufficient for dask interface where input is simpiler.
predictor = self.get_params().get("predictor", None) predictor = self.get_xgb_params().get("predictor", None)
if predictor in ("auto", None) and self.booster != "gblinear": if predictor in ("auto", None) and self.booster != "gblinear":
return True return True
return False return False
@@ -1336,7 +1310,7 @@ class XGBModel(XGBModelBase):
------- -------
coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]`` coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]``
""" """
if self.get_params()["booster"] != "gblinear": if self.get_xgb_params()["booster"] != "gblinear":
raise AttributeError( raise AttributeError(
f"Coefficients are not defined for Booster type {self.booster}" f"Coefficients are not defined for Booster type {self.booster}"
) )
@@ -1366,7 +1340,7 @@ class XGBModel(XGBModelBase):
------- -------
intercept_ : array of shape ``(1,)`` or ``[n_classes]`` intercept_ : array of shape ``(1,)`` or ``[n_classes]``
""" """
if self.get_params()["booster"] != "gblinear": if self.get_xgb_params()["booster"] != "gblinear":
raise AttributeError( raise AttributeError(
f"Intercept (bias) is not defined for Booster type {self.booster}" f"Intercept (bias) is not defined for Booster type {self.booster}"
) )

View File

@@ -48,20 +48,21 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) {
return cat < 0 || cat >= kMaxCat; return cat < 0 || cat >= kMaxCat;
} }
/* \brief Whether should it traverse to left branch of a tree. /**
* \brief Whether should it traverse to left branch of a tree.
* *
* For one hot split, go to left if it's NOT the matching category. * Go to left if it's NOT the matching category, which matches one-hot encoding.
*/ */
template <bool validate = true> inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat) {
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat, bool dft_left) {
KCatBitField const s_cats(cats); KCatBitField const s_cats(cats);
// FIXME: Size() is not accurate since it represents the size of bit set instead of if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
// actual number of categories. return true;
if (XGBOOST_EXPECT(validate && (InvalidCat(cat) || cat >= s_cats.Size()), false)) {
return dft_left;
} }
auto pos = KCatBitField::ToBitPos(cat); auto pos = KCatBitField::ToBitPos(cat);
// If the input category is larger than the size of the bit field, it implies that the
// category is not chosen. Otherwise the bit field would have the category instead of
// being smaller than the category value.
if (pos.int_pos >= cats.size()) { if (pos.int_pos >= cats.size()) {
return true; return true;
} }

View File

@@ -62,7 +62,7 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
#endif // !defined(XGBOOST_USE_CUDA) #endif // !defined(XGBOOST_USE_CUDA)
template <typename T, std::int32_t kDim> template <typename T, std::int32_t kDim>
auto cbegin(TensorView<T, kDim> v) { // NOLINT auto cbegin(TensorView<T, kDim> const& v) { // NOLINT
auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& { auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
}); });
@@ -70,19 +70,19 @@ auto cbegin(TensorView<T, kDim> v) { // NOLINT
} }
template <typename T, std::int32_t kDim> template <typename T, std::int32_t kDim>
auto cend(TensorView<T, kDim> v) { // NOLINT auto cend(TensorView<T, kDim> const& v) { // NOLINT
return cbegin(v) + v.Size(); return cbegin(v) + v.Size();
} }
template <typename T, std::int32_t kDim> template <typename T, std::int32_t kDim>
auto begin(TensorView<T, kDim> v) { // NOLINT auto begin(TensorView<T, kDim>& v) { // NOLINT
auto it = common::MakeIndexTransformIter( auto it = common::MakeIndexTransformIter(
[&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); }); [&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
return it; return it;
} }
template <typename T, std::int32_t kDim> template <typename T, std::int32_t kDim>
auto end(TensorView<T, kDim> v) { // NOLINT auto end(TensorView<T, kDim>& v) { // NOLINT
return begin(v) + v.Size(); return begin(v) + v.Size();
} }
} // namespace linalg } // namespace linalg

View File

@@ -144,7 +144,7 @@ class PartitionBuilder {
auto gidx = gidx_calc(ridx); auto gidx = gidx_calc(ridx);
bool go_left = default_left; bool go_left = default_left;
if (gidx > -1) { if (gidx > -1) {
go_left = Decision(node_cats, cut_values[gidx], default_left); go_left = Decision(node_cats, cut_values[gidx]);
} }
return go_left; return go_left;
} else { } else {
@@ -157,7 +157,7 @@ class PartitionBuilder {
bool go_left = default_left; bool go_left = default_left;
if (gidx > -1) { if (gidx > -1) {
if (is_cat) { if (is_cat) {
go_left = Decision(node_cats, cut_values[gidx], default_left); go_left = Decision(node_cats, cut_values[gidx]);
} else { } else {
go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value; go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;
} }

View File

@@ -28,6 +28,7 @@
#include "xgboost/logging.h" #include "xgboost/logging.h"
#include "xgboost/objective.h" #include "xgboost/objective.h"
#include "xgboost/predictor.h" #include "xgboost/predictor.h"
#include "xgboost/string_view.h"
#include "xgboost/tree_updater.h" #include "xgboost/tree_updater.h"
namespace xgboost { namespace xgboost {
@@ -395,23 +396,36 @@ void GBTree::LoadConfig(Json const& in) {
tparam_.process_type = TreeProcessType::kDefault; tparam_.process_type = TreeProcessType::kDefault;
int32_t const n_gpus = xgboost::common::AllVisibleGPUs(); int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) { if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) {
LOG(WARNING) LOG(WARNING) << "Loading from a raw memory buffer on CPU only machine. "
<< "Loading from a raw memory buffer on CPU only machine. "
"Changing predictor to auto."; "Changing predictor to auto.";
tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}}); tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}});
} }
auto msg = StringView{
R"(
Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only
machine. Consider using `save_model/load_model` instead. See:
https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
for more details about differences between saving model and serializing.)"};
if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) { if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) {
tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}}); tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}});
LOG(WARNING) LOG(WARNING) << msg << " Changing `tree_method` to `hist`.";
<< "Loading from a raw memory buffer on CPU only machine. "
"Changing tree_method to hist.";
} }
auto const& j_updaters = get<Object const>(in["updater"]); auto const& j_updaters = get<Object const>(in["updater"]);
updaters_.clear(); updaters_.clear();
for (auto const& kv : j_updaters) { for (auto const& kv : j_updaters) {
std::unique_ptr<TreeUpdater> up( auto name = kv.first;
TreeUpdater::Create(kv.first, ctx_, model_.learner_model_param->task)); if (n_gpus == 0 && name == "grow_gpu_hist") {
name = "grow_quantile_histmaker";
LOG(WARNING) << "Changing updater from `grow_gpu_hist` to `grow_quantile_histmaker`.";
}
std::unique_ptr<TreeUpdater> up{
TreeUpdater::Create(name, ctx_, model_.learner_model_param->task)};
up->LoadConfig(kv.second); up->LoadConfig(kv.second);
updaters_.push_back(std::move(up)); updaters_.push_back(std::move(up));
} }

View File

@@ -18,9 +18,7 @@ inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bs
if (has_categorical && common::IsCat(cats.split_type, nid)) { if (has_categorical && common::IsCat(cats.split_type, nid)) {
auto node_categories = auto node_categories =
cats.categories.subspan(cats.node_ptr[nid].beg, cats.node_ptr[nid].size); cats.categories.subspan(cats.node_ptr[nid].beg, cats.node_ptr[nid].size);
return common::Decision<true>(node_categories, fvalue, node.DefaultLeft()) return common::Decision(node_categories, fvalue) ? node.LeftChild() : node.RightChild();
? node.LeftChild()
: node.RightChild();
} else { } else {
return node.LeftChild() + !(fvalue < node.SplitCond()); return node.LeftChild() + !(fvalue < node.SplitCond());
} }

View File

@@ -403,8 +403,7 @@ struct GPUHistMakerDevice {
go_left = data.split_node.DefaultLeft(); go_left = data.split_node.DefaultLeft();
} else { } else {
if (data.split_type == FeatureType::kCategorical) { if (data.split_type == FeatureType::kCategorical) {
go_left = common::Decision<false>(data.node_cats.Bits(), cut_value, go_left = common::Decision(data.node_cats.Bits(), cut_value);
data.split_node.DefaultLeft());
} else { } else {
go_left = cut_value <= data.split_node.SplitCond(); go_left = cut_value <= data.split_node.SplitCond();
} }
@@ -481,7 +480,7 @@ struct GPUHistMakerDevice {
if (common::IsCat(d_feature_types, position)) { if (common::IsCat(d_feature_types, position)) {
auto node_cats = categories.subspan(categories_segments[position].beg, auto node_cats = categories.subspan(categories_segments[position].beg,
categories_segments[position].size); categories_segments[position].size);
go_left = common::Decision<false>(node_cats, element, node.DefaultLeft()); go_left = common::Decision(node_cats, element);
} else { } else {
go_left = element <= node.SplitCond(); go_left = element <= node.SplitCond();
} }

View File

@@ -4,7 +4,7 @@ set -euo pipefail
source tests/buildkite/conftest.sh source tests/buildkite/conftest.sh
echo "--- Run Google Tests with CUDA, using 4 GPUs" echo "--- Run Google Tests with CUDA, using a GPU"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda buildkite-agent artifact download "build/testxgboost" . --step build-cuda
chmod +x build/testxgboost chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \ tests/ci_build/ci_build.sh gpu nvidia-docker \
@@ -12,11 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
build/testxgboost build/testxgboost
echo "--- Run Google Tests with CUDA, using 4 GPUs, RMM enabled" # Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved
rm -rfv build/ # echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm # rm -rfv build/
chmod +x build/testxgboost # buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
tests/ci_build/ci_build.sh rmm nvidia-docker \ # chmod +x build/testxgboost
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ # tests/ci_build/ci_build.sh rmm nvidia-docker \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \ # --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
"source activate gpu_test && build/testxgboost --use-rmm-pool" # --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
# "source activate gpu_test && build/testxgboost --use-rmm-pool"

View File

@@ -1,11 +1,14 @@
/*! /*!
* Copyright 2021 by XGBoost Contributors * Copyright 2021-2022 by XGBoost Contributors
*/ */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <xgboost/json.h>
#include <xgboost/learner.h>
#include <limits> #include <limits>
#include "../../../src/common/categorical.h" #include "../../../src/common/categorical.h"
#include "../helpers.h"
namespace xgboost { namespace xgboost {
namespace common { namespace common {
@@ -15,29 +18,76 @@ TEST(Categorical, Decision) {
ASSERT_TRUE(common::InvalidCat(a)); ASSERT_TRUE(common::InvalidCat(a));
std::vector<uint32_t> cats(256, 0); std::vector<uint32_t> cats(256, 0);
ASSERT_TRUE(Decision(cats, a, true)); ASSERT_TRUE(Decision(cats, a));
// larger than size // larger than size
a = 256; a = 256;
ASSERT_TRUE(Decision(cats, a, true)); ASSERT_TRUE(Decision(cats, a));
// negative // negative
a = -1; a = -1;
ASSERT_TRUE(Decision(cats, a, true)); ASSERT_TRUE(Decision(cats, a));
CatBitField bits{cats}; CatBitField bits{cats};
bits.Set(0); bits.Set(0);
a = -0.5; a = -0.5;
ASSERT_TRUE(Decision(cats, a, true)); ASSERT_TRUE(Decision(cats, a));
// round toward 0 // round toward 0
a = 0.5; a = 0.5;
ASSERT_FALSE(Decision(cats, a, true)); ASSERT_FALSE(Decision(cats, a));
// valid // valid
a = 13; a = 13;
bits.Set(a); bits.Set(a);
ASSERT_FALSE(Decision(bits.Bits(), a, true)); ASSERT_FALSE(Decision(bits.Bits(), a));
}
/**
* Test for running inference with input category greater than the one stored in tree.
*/
TEST(Categorical, MinimalSet) {
std::size_t constexpr kRows = 256, kCols = 1, kCat = 3;
std::vector<FeatureType> types{FeatureType::kCategorical};
auto Xy =
RandomDataGenerator{kRows, kCols, 0.0}.Type(types).MaxCategory(kCat).GenerateDMatrix(true);
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
learner->SetParam("max_depth", "1");
learner->SetParam("tree_method", "hist");
learner->Configure();
learner->UpdateOneIter(0, Xy);
Json model{Object{}};
learner->SaveModel(&model);
auto tree = model["learner"]["gradient_booster"]["model"]["trees"][0];
ASSERT_GE(get<I32Array const>(tree["categories"]).size(), 1);
auto v = get<I32Array const>(tree["categories"])[0];
HostDeviceVector<float> predt;
{
std::vector<float> data{static_cast<float>(kCat),
static_cast<float>(kCat + 1), 32.0f, 33.0f, 34.0f};
auto test = GetDMatrixFromData(data, data.size(), kCols);
learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
ASSERT_EQ(predt.Size(), data.size());
auto const& h_predt = predt.ConstHostSpan();
for (auto v : h_predt) {
ASSERT_EQ(v, 1); // left child of root node
}
}
{
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
learner->LoadModel(model);
std::vector<float> data = {static_cast<float>(v)};
auto test = GetDMatrixFromData(data, data.size(), kCols);
learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
auto const& h_predt = predt.ConstHostSpan();
for (auto v : h_predt) {
ASSERT_EQ(v, 2); // right child of root node
}
}
} }
} // namespace common } // namespace common
} // namespace xgboost } // namespace xgboost

View File

@@ -112,7 +112,6 @@ class TestPandas:
# test Index as columns # test Index as columns
df = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=pd.Index([1, 2])) df = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=pd.Index([1, 2]))
print(df.columns, isinstance(df.columns, pd.Index))
Xy = xgb.DMatrix(df) Xy = xgb.DMatrix(df)
np.testing.assert_equal(np.array(Xy.feature_names), np.array(["1", "2"])) np.testing.assert_equal(np.array(Xy.feature_names), np.array(["1", "2"]))

View File

@@ -4,7 +4,7 @@ import pytest
try: try:
import shap import shap
except ImportError: except Exception:
shap = None shap = None
pass pass

View File

@@ -2,6 +2,7 @@ import collections
import importlib.util import importlib.util
import json import json
import os import os
import pickle
import random import random
import tempfile import tempfile
from typing import Callable, Optional from typing import Callable, Optional
@@ -636,26 +637,74 @@ def test_sklearn_n_jobs():
def test_parameters_access(): def test_parameters_access():
from sklearn import datasets from sklearn import datasets
params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1}
params = {"updater": "grow_gpu_hist", "subsample": 0.5, "n_jobs": -1}
clf = xgb.XGBClassifier(n_estimators=1000, **params) clf = xgb.XGBClassifier(n_estimators=1000, **params)
assert clf.get_params()['updater'] == 'grow_gpu_hist' assert clf.get_params()["updater"] == "grow_gpu_hist"
assert clf.get_params()['subsample'] == .5 assert clf.get_params()["subsample"] == 0.5
assert clf.get_params()['n_estimators'] == 1000 assert clf.get_params()["n_estimators"] == 1000
clf = xgb.XGBClassifier(n_estimators=1, nthread=4) clf = xgb.XGBClassifier(n_estimators=1, nthread=4)
X, y = datasets.load_iris(return_X_y=True) X, y = datasets.load_iris(return_X_y=True)
clf.fit(X, y) clf.fit(X, y)
config = json.loads(clf.get_booster().save_config()) config = json.loads(clf.get_booster().save_config())
assert int(config['learner']['generic_param']['nthread']) == 4 assert int(config["learner"]["generic_param"]["nthread"]) == 4
clf.set_params(nthread=16) clf.set_params(nthread=16)
config = json.loads(clf.get_booster().save_config()) config = json.loads(clf.get_booster().save_config())
assert int(config['learner']['generic_param']['nthread']) == 16 assert int(config["learner"]["generic_param"]["nthread"]) == 16
clf.predict(X) clf.predict(X)
config = json.loads(clf.get_booster().save_config()) config = json.loads(clf.get_booster().save_config())
assert int(config['learner']['generic_param']['nthread']) == 16 assert int(config["learner"]["generic_param"]["nthread"]) == 16
clf = xgb.XGBClassifier(n_estimators=2)
assert clf.tree_method is None
assert clf.get_params()["tree_method"] is None
clf.fit(X, y)
assert clf.get_params()["tree_method"] is None
def save_load(clf: xgb.XGBClassifier) -> xgb.XGBClassifier:
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "model.json")
clf.save_model(path)
clf = xgb.XGBClassifier()
clf.load_model(path)
return clf
def get_tm(clf: xgb.XGBClassifier) -> str:
tm = json.loads(clf.get_booster().save_config())["learner"]["gradient_booster"][
"gbtree_train_param"
]["tree_method"]
return tm
assert get_tm(clf) == "exact"
clf = pickle.loads(pickle.dumps(clf))
assert clf.tree_method is None
assert clf.n_estimators == 2
assert clf.get_params()["tree_method"] is None
assert clf.get_params()["n_estimators"] == 2
assert get_tm(clf) == "exact" # preserved for pickle
clf = save_load(clf)
assert clf.tree_method is None
assert clf.n_estimators == 2
assert clf.get_params()["tree_method"] is None
assert clf.get_params()["n_estimators"] == 2
assert get_tm(clf) == "auto" # discarded for save/load_model
clf.set_params(tree_method="hist")
assert clf.get_params()["tree_method"] == "hist"
clf = pickle.loads(pickle.dumps(clf))
assert clf.get_params()["tree_method"] == "hist"
clf = save_load(clf)
# FIXME(jiamingy): We should remove this behavior once we remove parameters
# serialization for skl save/load_model.
assert clf.get_params()["tree_method"] == "hist"
def test_kwargs_error(): def test_kwargs_error():
@@ -695,13 +744,19 @@ def test_sklearn_clone():
def test_sklearn_get_default_params(): def test_sklearn_get_default_params():
from sklearn.datasets import load_digits from sklearn.datasets import load_digits
digits_2class = load_digits(n_class=2) digits_2class = load_digits(n_class=2)
X = digits_2class['data'] X = digits_2class["data"]
y = digits_2class['target'] y = digits_2class["target"]
cls = xgb.XGBClassifier() cls = xgb.XGBClassifier()
assert cls.get_params()['base_score'] is None assert cls.get_params()["base_score"] is None
cls.fit(X[:4, ...], y[:4, ...]) cls.fit(X[:4, ...], y[:4, ...])
assert cls.get_params()['base_score'] is not None base_score = float(
json.loads(cls.get_booster().save_config())["learner"]["learner_model_param"][
"base_score"
]
)
np.testing.assert_equal(base_score, 0.5)
def run_validation_weights(model): def run_validation_weights(model):
@@ -1029,9 +1084,9 @@ def test_pandas_input():
clf_isotonic = CalibratedClassifierCV(model, cv="prefit", method="isotonic") clf_isotonic = CalibratedClassifierCV(model, cv="prefit", method="isotonic")
clf_isotonic.fit(train, target) clf_isotonic.fit(train, target)
assert isinstance( clf = clf_isotonic.calibrated_classifiers_[0]
clf_isotonic.calibrated_classifiers_[0].base_estimator, xgb.XGBClassifier est = clf.estimator if hasattr(clf, "estimator") else clf.base_estimator
) assert isinstance(est, xgb.XGBClassifier)
np.testing.assert_allclose(np.array(clf_isotonic.classes_), np.array([0, 1])) np.testing.assert_allclose(np.array(clf_isotonic.classes_), np.array([0, 1]))