Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ccf43d4ba0 | ||
|
|
dd58c2ac47 | ||
|
|
899e4c8988 | ||
|
|
a2085bf223 | ||
|
|
067b704e58 | ||
|
|
1a834b2b85 | ||
|
|
162b48a1a4 | ||
|
|
83a078b7e5 | ||
|
|
575fba651b |
@@ -1,5 +1,5 @@
|
||||
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.7.2)
|
||||
project(xgboost LANGUAGES CXX C VERSION 1.7.3)
|
||||
include(cmake/Utils.cmake)
|
||||
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
|
||||
cmake_policy(SET CMP0022 NEW)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: Extreme Gradient Boosting
|
||||
Version: 1.7.2.1
|
||||
Date: 2022-12-08
|
||||
Version: 1.7.3.1
|
||||
Date: 2023-01-06
|
||||
Authors@R: c(
|
||||
person("Tianqi", "Chen", role = c("aut"),
|
||||
email = "tianqi.tchen@gmail.com"),
|
||||
@@ -66,5 +66,5 @@ Imports:
|
||||
methods,
|
||||
data.table (>= 1.9.6),
|
||||
jsonlite (>= 1.0),
|
||||
RoxygenNote: 7.2.1
|
||||
RoxygenNote: 7.2.2
|
||||
SystemRequirements: GNU make, C++14
|
||||
|
||||
18
R-package/configure
vendored
18
R-package/configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for xgboost 1.7.2.
|
||||
# Generated by GNU Autoconf 2.69 for xgboost 1.7.3.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@@ -576,8 +576,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='xgboost'
|
||||
PACKAGE_TARNAME='xgboost'
|
||||
PACKAGE_VERSION='1.7.2'
|
||||
PACKAGE_STRING='xgboost 1.7.2'
|
||||
PACKAGE_VERSION='1.7.3'
|
||||
PACKAGE_STRING='xgboost 1.7.3'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures xgboost 1.7.2 to adapt to many kinds of systems.
|
||||
\`configure' configures xgboost 1.7.3 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1257,7 +1257,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of xgboost 1.7.2:";;
|
||||
short | recursive ) echo "Configuration of xgboost 1.7.3:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1336,7 +1336,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
xgboost configure 1.7.2
|
||||
xgboost configure 1.7.3
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by xgboost $as_me 1.7.2, which was
|
||||
It was created by xgboost $as_me 1.7.3, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -3294,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by xgboost $as_me 1.7.2, which was
|
||||
This file was extended by xgboost $as_me 1.7.3, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -3347,7 +3347,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
xgboost config.status 1.7.2
|
||||
xgboost config.status 1.7.3
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
AC_PREREQ(2.69)
|
||||
|
||||
AC_INIT([xgboost],[1.7.2],[],[xgboost],[])
|
||||
AC_INIT([xgboost],[1.7.3],[],[xgboost],[])
|
||||
|
||||
# Use this line to set CC variable to a C compiler
|
||||
AC_PROG_CC
|
||||
|
||||
@@ -138,11 +138,11 @@ Miscellaneous
|
||||
|
||||
By default, XGBoost assumes input categories are integers starting from 0 till the number
|
||||
of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid
|
||||
values due to mistakes or missing values. It can be negative value, integer values that
|
||||
can not be accurately represented by 32-bit floating point, or values that are larger than
|
||||
actual number of unique categories. During training this is validated but for prediction
|
||||
it's treated as the same as missing value for performance reasons. Lastly, missing values
|
||||
are treated as the same as numerical features (using the learned split direction).
|
||||
values due to mistakes or missing values in training dataset. It can be negative value,
|
||||
integer values that can not be accurately represented by 32-bit floating point, or values
|
||||
that are larger than actual number of unique categories. During training this is
|
||||
validated but for prediction it's treated as the same as not-chosen category for
|
||||
performance reasons.
|
||||
|
||||
|
||||
**********
|
||||
|
||||
@@ -6,6 +6,6 @@
|
||||
|
||||
#define XGBOOST_VER_MAJOR 1
|
||||
#define XGBOOST_VER_MINOR 7
|
||||
#define XGBOOST_VER_PATCH 2
|
||||
#define XGBOOST_VER_PATCH 3
|
||||
|
||||
#endif // XGBOOST_VERSION_CONFIG_H_
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>XGBoost JVM Package</name>
|
||||
<description>JVM Package for XGBoost</description>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
<packaging>jar</packaging>
|
||||
<build>
|
||||
<plugins>
|
||||
@@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
@@ -37,7 +37,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
@@ -26,7 +26,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||
<build>
|
||||
@@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||
<build>
|
||||
@@ -24,7 +24,7 @@
|
||||
<dependency>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from sklearn.datasets import load_iris
|
||||
import numpy as np
|
||||
import pandas
|
||||
from sklearn.datasets import load_iris
|
||||
|
||||
X, y = load_iris(return_X_y=True)
|
||||
y = y.astype(np.int)
|
||||
y = y.astype(np.int32)
|
||||
df = pandas.DataFrame(data=X, columns=['sepal length', 'sepal width', 'petal length', 'petal width'])
|
||||
class_id_to_name = {0:'Iris-setosa', 1:'Iris-versicolor', 2:'Iris-virginica'}
|
||||
df['class'] = np.vectorize(class_id_to_name.get)(y)
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
<parent>
|
||||
<groupId>ml.dmlc</groupId>
|
||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
</parent>
|
||||
<artifactId>xgboost4j_2.12</artifactId>
|
||||
<version>1.7.2</version>
|
||||
<version>1.7.3</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
|
||||
@@ -1 +1 @@
|
||||
1.7.2
|
||||
1.7.3
|
||||
|
||||
@@ -674,7 +674,7 @@ class XGBModel(XGBModelBase):
|
||||
self.kwargs = {}
|
||||
self.kwargs[key] = value
|
||||
|
||||
if hasattr(self, "_Booster"):
|
||||
if self.__sklearn_is_fitted__():
|
||||
parameters = self.get_xgb_params()
|
||||
self.get_booster().set_param(parameters)
|
||||
|
||||
@@ -701,39 +701,12 @@ class XGBModel(XGBModelBase):
|
||||
np.iinfo(np.int32).max
|
||||
)
|
||||
|
||||
def parse_parameter(value: Any) -> Optional[Union[int, float, str]]:
|
||||
for t in (int, float, str):
|
||||
try:
|
||||
ret = t(value)
|
||||
return ret
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
# Get internal parameter values
|
||||
try:
|
||||
config = json.loads(self.get_booster().save_config())
|
||||
stack = [config]
|
||||
internal = {}
|
||||
while stack:
|
||||
obj = stack.pop()
|
||||
for k, v in obj.items():
|
||||
if k.endswith("_param"):
|
||||
for p_k, p_v in v.items():
|
||||
internal[p_k] = p_v
|
||||
elif isinstance(v, dict):
|
||||
stack.append(v)
|
||||
|
||||
for k, v in internal.items():
|
||||
if k in params and params[k] is None:
|
||||
params[k] = parse_parameter(v)
|
||||
except ValueError:
|
||||
pass
|
||||
return params
|
||||
|
||||
def get_xgb_params(self) -> Dict[str, Any]:
|
||||
"""Get xgboost specific parameters."""
|
||||
params = self.get_params()
|
||||
params: Dict[str, Any] = self.get_params()
|
||||
|
||||
# Parameters that should not go into native learner.
|
||||
wrapper_specific = {
|
||||
"importance_type",
|
||||
@@ -750,6 +723,7 @@ class XGBModel(XGBModelBase):
|
||||
for k, v in params.items():
|
||||
if k not in wrapper_specific and not callable(v):
|
||||
filtered[k] = v
|
||||
|
||||
return filtered
|
||||
|
||||
def get_num_boosting_rounds(self) -> int:
|
||||
@@ -1070,7 +1044,7 @@ class XGBModel(XGBModelBase):
|
||||
# error with incompatible data type.
|
||||
# Inplace predict doesn't handle as many data types as DMatrix, but it's
|
||||
# sufficient for dask interface where input is simpiler.
|
||||
predictor = self.get_params().get("predictor", None)
|
||||
predictor = self.get_xgb_params().get("predictor", None)
|
||||
if predictor in ("auto", None) and self.booster != "gblinear":
|
||||
return True
|
||||
return False
|
||||
@@ -1336,7 +1310,7 @@ class XGBModel(XGBModelBase):
|
||||
-------
|
||||
coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]``
|
||||
"""
|
||||
if self.get_params()["booster"] != "gblinear":
|
||||
if self.get_xgb_params()["booster"] != "gblinear":
|
||||
raise AttributeError(
|
||||
f"Coefficients are not defined for Booster type {self.booster}"
|
||||
)
|
||||
@@ -1366,7 +1340,7 @@ class XGBModel(XGBModelBase):
|
||||
-------
|
||||
intercept_ : array of shape ``(1,)`` or ``[n_classes]``
|
||||
"""
|
||||
if self.get_params()["booster"] != "gblinear":
|
||||
if self.get_xgb_params()["booster"] != "gblinear":
|
||||
raise AttributeError(
|
||||
f"Intercept (bias) is not defined for Booster type {self.booster}"
|
||||
)
|
||||
|
||||
@@ -48,20 +48,21 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) {
|
||||
return cat < 0 || cat >= kMaxCat;
|
||||
}
|
||||
|
||||
/* \brief Whether should it traverse to left branch of a tree.
|
||||
/**
|
||||
* \brief Whether should it traverse to left branch of a tree.
|
||||
*
|
||||
* For one hot split, go to left if it's NOT the matching category.
|
||||
* Go to left if it's NOT the matching category, which matches one-hot encoding.
|
||||
*/
|
||||
template <bool validate = true>
|
||||
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat, bool dft_left) {
|
||||
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat) {
|
||||
KCatBitField const s_cats(cats);
|
||||
// FIXME: Size() is not accurate since it represents the size of bit set instead of
|
||||
// actual number of categories.
|
||||
if (XGBOOST_EXPECT(validate && (InvalidCat(cat) || cat >= s_cats.Size()), false)) {
|
||||
return dft_left;
|
||||
if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto pos = KCatBitField::ToBitPos(cat);
|
||||
// If the input category is larger than the size of the bit field, it implies that the
|
||||
// category is not chosen. Otherwise the bit field would have the category instead of
|
||||
// being smaller than the category value.
|
||||
if (pos.int_pos >= cats.size()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -62,7 +62,7 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto cbegin(TensorView<T, kDim> v) { // NOLINT
|
||||
auto cbegin(TensorView<T, kDim> const& v) { // NOLINT
|
||||
auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
|
||||
return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
|
||||
});
|
||||
@@ -70,19 +70,19 @@ auto cbegin(TensorView<T, kDim> v) { // NOLINT
|
||||
}
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto cend(TensorView<T, kDim> v) { // NOLINT
|
||||
auto cend(TensorView<T, kDim> const& v) { // NOLINT
|
||||
return cbegin(v) + v.Size();
|
||||
}
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto begin(TensorView<T, kDim> v) { // NOLINT
|
||||
auto begin(TensorView<T, kDim>& v) { // NOLINT
|
||||
auto it = common::MakeIndexTransformIter(
|
||||
[&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
|
||||
return it;
|
||||
}
|
||||
|
||||
template <typename T, std::int32_t kDim>
|
||||
auto end(TensorView<T, kDim> v) { // NOLINT
|
||||
auto end(TensorView<T, kDim>& v) { // NOLINT
|
||||
return begin(v) + v.Size();
|
||||
}
|
||||
} // namespace linalg
|
||||
|
||||
@@ -144,7 +144,7 @@ class PartitionBuilder {
|
||||
auto gidx = gidx_calc(ridx);
|
||||
bool go_left = default_left;
|
||||
if (gidx > -1) {
|
||||
go_left = Decision(node_cats, cut_values[gidx], default_left);
|
||||
go_left = Decision(node_cats, cut_values[gidx]);
|
||||
}
|
||||
return go_left;
|
||||
} else {
|
||||
@@ -157,7 +157,7 @@ class PartitionBuilder {
|
||||
bool go_left = default_left;
|
||||
if (gidx > -1) {
|
||||
if (is_cat) {
|
||||
go_left = Decision(node_cats, cut_values[gidx], default_left);
|
||||
go_left = Decision(node_cats, cut_values[gidx]);
|
||||
} else {
|
||||
go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "xgboost/logging.h"
|
||||
#include "xgboost/objective.h"
|
||||
#include "xgboost/predictor.h"
|
||||
#include "xgboost/string_view.h"
|
||||
#include "xgboost/tree_updater.h"
|
||||
|
||||
namespace xgboost {
|
||||
@@ -395,23 +396,36 @@ void GBTree::LoadConfig(Json const& in) {
|
||||
tparam_.process_type = TreeProcessType::kDefault;
|
||||
int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
|
||||
if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) {
|
||||
LOG(WARNING)
|
||||
<< "Loading from a raw memory buffer on CPU only machine. "
|
||||
"Changing predictor to auto.";
|
||||
LOG(WARNING) << "Loading from a raw memory buffer on CPU only machine. "
|
||||
"Changing predictor to auto.";
|
||||
tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}});
|
||||
}
|
||||
|
||||
auto msg = StringView{
|
||||
R"(
|
||||
Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only
|
||||
machine. Consider using `save_model/load_model` instead. See:
|
||||
|
||||
https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
|
||||
|
||||
for more details about differences between saving model and serializing.)"};
|
||||
|
||||
if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) {
|
||||
tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}});
|
||||
LOG(WARNING)
|
||||
<< "Loading from a raw memory buffer on CPU only machine. "
|
||||
"Changing tree_method to hist.";
|
||||
LOG(WARNING) << msg << " Changing `tree_method` to `hist`.";
|
||||
}
|
||||
|
||||
auto const& j_updaters = get<Object const>(in["updater"]);
|
||||
updaters_.clear();
|
||||
|
||||
for (auto const& kv : j_updaters) {
|
||||
std::unique_ptr<TreeUpdater> up(
|
||||
TreeUpdater::Create(kv.first, ctx_, model_.learner_model_param->task));
|
||||
auto name = kv.first;
|
||||
if (n_gpus == 0 && name == "grow_gpu_hist") {
|
||||
name = "grow_quantile_histmaker";
|
||||
LOG(WARNING) << "Changing updater from `grow_gpu_hist` to `grow_quantile_histmaker`.";
|
||||
}
|
||||
std::unique_ptr<TreeUpdater> up{
|
||||
TreeUpdater::Create(name, ctx_, model_.learner_model_param->task)};
|
||||
up->LoadConfig(kv.second);
|
||||
updaters_.push_back(std::move(up));
|
||||
}
|
||||
|
||||
@@ -18,9 +18,7 @@ inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bs
|
||||
if (has_categorical && common::IsCat(cats.split_type, nid)) {
|
||||
auto node_categories =
|
||||
cats.categories.subspan(cats.node_ptr[nid].beg, cats.node_ptr[nid].size);
|
||||
return common::Decision<true>(node_categories, fvalue, node.DefaultLeft())
|
||||
? node.LeftChild()
|
||||
: node.RightChild();
|
||||
return common::Decision(node_categories, fvalue) ? node.LeftChild() : node.RightChild();
|
||||
} else {
|
||||
return node.LeftChild() + !(fvalue < node.SplitCond());
|
||||
}
|
||||
|
||||
@@ -403,8 +403,7 @@ struct GPUHistMakerDevice {
|
||||
go_left = data.split_node.DefaultLeft();
|
||||
} else {
|
||||
if (data.split_type == FeatureType::kCategorical) {
|
||||
go_left = common::Decision<false>(data.node_cats.Bits(), cut_value,
|
||||
data.split_node.DefaultLeft());
|
||||
go_left = common::Decision(data.node_cats.Bits(), cut_value);
|
||||
} else {
|
||||
go_left = cut_value <= data.split_node.SplitCond();
|
||||
}
|
||||
@@ -481,7 +480,7 @@ struct GPUHistMakerDevice {
|
||||
if (common::IsCat(d_feature_types, position)) {
|
||||
auto node_cats = categories.subspan(categories_segments[position].beg,
|
||||
categories_segments[position].size);
|
||||
go_left = common::Decision<false>(node_cats, element, node.DefaultLeft());
|
||||
go_left = common::Decision(node_cats, element);
|
||||
} else {
|
||||
go_left = element <= node.SplitCond();
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ set -euo pipefail
|
||||
|
||||
source tests/buildkite/conftest.sh
|
||||
|
||||
echo "--- Run Google Tests with CUDA, using 4 GPUs"
|
||||
echo "--- Run Google Tests with CUDA, using a GPU"
|
||||
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
|
||||
chmod +x build/testxgboost
|
||||
tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
@@ -12,11 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
|
||||
build/testxgboost
|
||||
|
||||
echo "--- Run Google Tests with CUDA, using 4 GPUs, RMM enabled"
|
||||
rm -rfv build/
|
||||
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
|
||||
chmod +x build/testxgboost
|
||||
tests/ci_build/ci_build.sh rmm nvidia-docker \
|
||||
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
|
||||
"source activate gpu_test && build/testxgboost --use-rmm-pool"
|
||||
# Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved
|
||||
# echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
|
||||
# rm -rfv build/
|
||||
# buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
|
||||
# chmod +x build/testxgboost
|
||||
# tests/ci_build/ci_build.sh rmm nvidia-docker \
|
||||
# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
|
||||
# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
|
||||
# "source activate gpu_test && build/testxgboost --use-rmm-pool"
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
/*!
|
||||
* Copyright 2021 by XGBoost Contributors
|
||||
* Copyright 2021-2022 by XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/json.h>
|
||||
#include <xgboost/learner.h>
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "../../../src/common/categorical.h"
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@@ -15,29 +18,76 @@ TEST(Categorical, Decision) {
|
||||
|
||||
ASSERT_TRUE(common::InvalidCat(a));
|
||||
std::vector<uint32_t> cats(256, 0);
|
||||
ASSERT_TRUE(Decision(cats, a, true));
|
||||
ASSERT_TRUE(Decision(cats, a));
|
||||
|
||||
// larger than size
|
||||
a = 256;
|
||||
ASSERT_TRUE(Decision(cats, a, true));
|
||||
ASSERT_TRUE(Decision(cats, a));
|
||||
|
||||
// negative
|
||||
a = -1;
|
||||
ASSERT_TRUE(Decision(cats, a, true));
|
||||
ASSERT_TRUE(Decision(cats, a));
|
||||
|
||||
CatBitField bits{cats};
|
||||
bits.Set(0);
|
||||
a = -0.5;
|
||||
ASSERT_TRUE(Decision(cats, a, true));
|
||||
ASSERT_TRUE(Decision(cats, a));
|
||||
|
||||
// round toward 0
|
||||
a = 0.5;
|
||||
ASSERT_FALSE(Decision(cats, a, true));
|
||||
ASSERT_FALSE(Decision(cats, a));
|
||||
|
||||
// valid
|
||||
a = 13;
|
||||
bits.Set(a);
|
||||
ASSERT_FALSE(Decision(bits.Bits(), a, true));
|
||||
ASSERT_FALSE(Decision(bits.Bits(), a));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for running inference with input category greater than the one stored in tree.
|
||||
*/
|
||||
TEST(Categorical, MinimalSet) {
|
||||
std::size_t constexpr kRows = 256, kCols = 1, kCat = 3;
|
||||
std::vector<FeatureType> types{FeatureType::kCategorical};
|
||||
auto Xy =
|
||||
RandomDataGenerator{kRows, kCols, 0.0}.Type(types).MaxCategory(kCat).GenerateDMatrix(true);
|
||||
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
learner->SetParam("max_depth", "1");
|
||||
learner->SetParam("tree_method", "hist");
|
||||
learner->Configure();
|
||||
learner->UpdateOneIter(0, Xy);
|
||||
|
||||
Json model{Object{}};
|
||||
learner->SaveModel(&model);
|
||||
auto tree = model["learner"]["gradient_booster"]["model"]["trees"][0];
|
||||
ASSERT_GE(get<I32Array const>(tree["categories"]).size(), 1);
|
||||
auto v = get<I32Array const>(tree["categories"])[0];
|
||||
|
||||
HostDeviceVector<float> predt;
|
||||
{
|
||||
std::vector<float> data{static_cast<float>(kCat),
|
||||
static_cast<float>(kCat + 1), 32.0f, 33.0f, 34.0f};
|
||||
auto test = GetDMatrixFromData(data, data.size(), kCols);
|
||||
learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
|
||||
ASSERT_EQ(predt.Size(), data.size());
|
||||
auto const& h_predt = predt.ConstHostSpan();
|
||||
for (auto v : h_predt) {
|
||||
ASSERT_EQ(v, 1); // left child of root node
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
learner->LoadModel(model);
|
||||
std::vector<float> data = {static_cast<float>(v)};
|
||||
auto test = GetDMatrixFromData(data, data.size(), kCols);
|
||||
learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
|
||||
auto const& h_predt = predt.ConstHostSpan();
|
||||
for (auto v : h_predt) {
|
||||
ASSERT_EQ(v, 2); // right child of root node
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -112,7 +112,6 @@ class TestPandas:
|
||||
|
||||
# test Index as columns
|
||||
df = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=pd.Index([1, 2]))
|
||||
print(df.columns, isinstance(df.columns, pd.Index))
|
||||
Xy = xgb.DMatrix(df)
|
||||
np.testing.assert_equal(np.array(Xy.feature_names), np.array(["1", "2"]))
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import pytest
|
||||
|
||||
try:
|
||||
import shap
|
||||
except ImportError:
|
||||
except Exception:
|
||||
shap = None
|
||||
pass
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ import collections
|
||||
import importlib.util
|
||||
import json
|
||||
import os
|
||||
import pickle
|
||||
import random
|
||||
import tempfile
|
||||
from typing import Callable, Optional
|
||||
@@ -636,26 +637,74 @@ def test_sklearn_n_jobs():
|
||||
|
||||
def test_parameters_access():
|
||||
from sklearn import datasets
|
||||
params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1}
|
||||
|
||||
params = {"updater": "grow_gpu_hist", "subsample": 0.5, "n_jobs": -1}
|
||||
clf = xgb.XGBClassifier(n_estimators=1000, **params)
|
||||
assert clf.get_params()['updater'] == 'grow_gpu_hist'
|
||||
assert clf.get_params()['subsample'] == .5
|
||||
assert clf.get_params()['n_estimators'] == 1000
|
||||
assert clf.get_params()["updater"] == "grow_gpu_hist"
|
||||
assert clf.get_params()["subsample"] == 0.5
|
||||
assert clf.get_params()["n_estimators"] == 1000
|
||||
|
||||
clf = xgb.XGBClassifier(n_estimators=1, nthread=4)
|
||||
X, y = datasets.load_iris(return_X_y=True)
|
||||
clf.fit(X, y)
|
||||
|
||||
config = json.loads(clf.get_booster().save_config())
|
||||
assert int(config['learner']['generic_param']['nthread']) == 4
|
||||
assert int(config["learner"]["generic_param"]["nthread"]) == 4
|
||||
|
||||
clf.set_params(nthread=16)
|
||||
config = json.loads(clf.get_booster().save_config())
|
||||
assert int(config['learner']['generic_param']['nthread']) == 16
|
||||
assert int(config["learner"]["generic_param"]["nthread"]) == 16
|
||||
|
||||
clf.predict(X)
|
||||
config = json.loads(clf.get_booster().save_config())
|
||||
assert int(config['learner']['generic_param']['nthread']) == 16
|
||||
assert int(config["learner"]["generic_param"]["nthread"]) == 16
|
||||
|
||||
clf = xgb.XGBClassifier(n_estimators=2)
|
||||
assert clf.tree_method is None
|
||||
assert clf.get_params()["tree_method"] is None
|
||||
clf.fit(X, y)
|
||||
assert clf.get_params()["tree_method"] is None
|
||||
|
||||
def save_load(clf: xgb.XGBClassifier) -> xgb.XGBClassifier:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
path = os.path.join(tmpdir, "model.json")
|
||||
clf.save_model(path)
|
||||
clf = xgb.XGBClassifier()
|
||||
clf.load_model(path)
|
||||
return clf
|
||||
|
||||
def get_tm(clf: xgb.XGBClassifier) -> str:
|
||||
tm = json.loads(clf.get_booster().save_config())["learner"]["gradient_booster"][
|
||||
"gbtree_train_param"
|
||||
]["tree_method"]
|
||||
return tm
|
||||
|
||||
assert get_tm(clf) == "exact"
|
||||
|
||||
clf = pickle.loads(pickle.dumps(clf))
|
||||
|
||||
assert clf.tree_method is None
|
||||
assert clf.n_estimators == 2
|
||||
assert clf.get_params()["tree_method"] is None
|
||||
assert clf.get_params()["n_estimators"] == 2
|
||||
assert get_tm(clf) == "exact" # preserved for pickle
|
||||
|
||||
clf = save_load(clf)
|
||||
|
||||
assert clf.tree_method is None
|
||||
assert clf.n_estimators == 2
|
||||
assert clf.get_params()["tree_method"] is None
|
||||
assert clf.get_params()["n_estimators"] == 2
|
||||
assert get_tm(clf) == "auto" # discarded for save/load_model
|
||||
|
||||
clf.set_params(tree_method="hist")
|
||||
assert clf.get_params()["tree_method"] == "hist"
|
||||
clf = pickle.loads(pickle.dumps(clf))
|
||||
assert clf.get_params()["tree_method"] == "hist"
|
||||
clf = save_load(clf)
|
||||
# FIXME(jiamingy): We should remove this behavior once we remove parameters
|
||||
# serialization for skl save/load_model.
|
||||
assert clf.get_params()["tree_method"] == "hist"
|
||||
|
||||
|
||||
def test_kwargs_error():
|
||||
@@ -695,13 +744,19 @@ def test_sklearn_clone():
|
||||
|
||||
def test_sklearn_get_default_params():
|
||||
from sklearn.datasets import load_digits
|
||||
|
||||
digits_2class = load_digits(n_class=2)
|
||||
X = digits_2class['data']
|
||||
y = digits_2class['target']
|
||||
X = digits_2class["data"]
|
||||
y = digits_2class["target"]
|
||||
cls = xgb.XGBClassifier()
|
||||
assert cls.get_params()['base_score'] is None
|
||||
assert cls.get_params()["base_score"] is None
|
||||
cls.fit(X[:4, ...], y[:4, ...])
|
||||
assert cls.get_params()['base_score'] is not None
|
||||
base_score = float(
|
||||
json.loads(cls.get_booster().save_config())["learner"]["learner_model_param"][
|
||||
"base_score"
|
||||
]
|
||||
)
|
||||
np.testing.assert_equal(base_score, 0.5)
|
||||
|
||||
|
||||
def run_validation_weights(model):
|
||||
@@ -1029,9 +1084,9 @@ def test_pandas_input():
|
||||
|
||||
clf_isotonic = CalibratedClassifierCV(model, cv="prefit", method="isotonic")
|
||||
clf_isotonic.fit(train, target)
|
||||
assert isinstance(
|
||||
clf_isotonic.calibrated_classifiers_[0].base_estimator, xgb.XGBClassifier
|
||||
)
|
||||
clf = clf_isotonic.calibrated_classifiers_[0]
|
||||
est = clf.estimator if hasattr(clf, "estimator") else clf.base_estimator
|
||||
assert isinstance(est, xgb.XGBClassifier)
|
||||
np.testing.assert_allclose(np.array(clf_isotonic.classes_), np.array([0, 1]))
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user