Compare commits

...

9 Commits

Author SHA1 Message Date
Jiaming Yuan
ccf43d4ba0 Bump R package version to 1.7.3. (#8649) 2023-01-06 20:34:05 +08:00
Jiaming Yuan
dd58c2ac47 Bump version to 1.7.3. (#8646) 2023-01-06 17:55:51 +08:00
Jiaming Yuan
899e4c8988 [backport] Do not return internal value for get_params. (#8634) (#8642) 2023-01-06 02:28:39 +08:00
Jiaming Yuan
a2085bf223 [backport] Fix loading GPU pickle with a CPU-only xgboost distribution. (#8632) (#8641)
We can handle loading the pickle on a CPU-only machine if the XGBoost is built with CUDA
enabled (Linux and Windows PyPI package), but not if the distribution is CPU-only (macOS
PyPI package).
2023-01-06 02:28:21 +08:00
Jiaming Yuan
067b704e58 [backport] Fix inference with categorical feature. (#8591) (#8602) (#8638)
* Fix inference with categorical feature. (#8591)

* Fix windows build on buildkite. (#8602)

* workaround.
2023-01-06 01:17:49 +08:00
Jiaming Yuan
1a834b2b85 Fix linalg iterator. (#8603) (#8639) 2023-01-05 23:16:10 +08:00
Jiaming Yuan
162b48a1a4 [backport] [CI] Disable gtest with RMM (#8620) (#8640)
Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
2023-01-05 23:13:45 +08:00
Jiaming Yuan
83a078b7e5 [backport] Fix sklearn test that calls a removed field (#8579) (#8636)
Co-authored-by: Rong Ou <rong.ou@gmail.com>
2023-01-05 21:17:05 +08:00
Jiaming Yuan
575fba651b [backport] [CI] Fix CI with updated dependencies. (#8631) (#8635) 2023-01-05 19:10:58 +08:00
27 changed files with 223 additions and 132 deletions

View File

@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
project(xgboost LANGUAGES CXX C VERSION 1.7.2)
project(xgboost LANGUAGES CXX C VERSION 1.7.3)
include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")
cmake_policy(SET CMP0022 NEW)

View File

@@ -1,8 +1,8 @@
Package: xgboost
Type: Package
Title: Extreme Gradient Boosting
Version: 1.7.2.1
Date: 2022-12-08
Version: 1.7.3.1
Date: 2023-01-06
Authors@R: c(
person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"),
@@ -66,5 +66,5 @@ Imports:
methods,
data.table (>= 1.9.6),
jsonlite (>= 1.0),
RoxygenNote: 7.2.1
RoxygenNote: 7.2.2
SystemRequirements: GNU make, C++14

18
R-package/configure vendored
View File

@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for xgboost 1.7.2.
# Generated by GNU Autoconf 2.69 for xgboost 1.7.3.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -576,8 +576,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost'
PACKAGE_VERSION='1.7.2'
PACKAGE_STRING='xgboost 1.7.2'
PACKAGE_VERSION='1.7.3'
PACKAGE_STRING='xgboost 1.7.3'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures xgboost 1.7.2 to adapt to many kinds of systems.
\`configure' configures xgboost 1.7.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1257,7 +1257,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of xgboost 1.7.2:";;
short | recursive ) echo "Configuration of xgboost 1.7.3:";;
esac
cat <<\_ACEOF
@@ -1336,7 +1336,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
xgboost configure 1.7.2
xgboost configure 1.7.3
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by xgboost $as_me 1.7.2, which was
It was created by xgboost $as_me 1.7.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -3294,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by xgboost $as_me 1.7.2, which was
This file was extended by xgboost $as_me 1.7.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -3347,7 +3347,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
xgboost config.status 1.7.2
xgboost config.status 1.7.3
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@@ -2,7 +2,7 @@
AC_PREREQ(2.69)
AC_INIT([xgboost],[1.7.2],[],[xgboost],[])
AC_INIT([xgboost],[1.7.3],[],[xgboost],[])
# Use this line to set CC variable to a C compiler
AC_PROG_CC

View File

@@ -138,11 +138,11 @@ Miscellaneous
By default, XGBoost assumes input categories are integers starting from 0 till the number
of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid
values due to mistakes or missing values. It can be negative value, integer values that
can not be accurately represented by 32-bit floating point, or values that are larger than
actual number of unique categories. During training this is validated but for prediction
it's treated as the same as missing value for performance reasons. Lastly, missing values
are treated as the same as numerical features (using the learned split direction).
values due to mistakes or missing values in training dataset. It can be negative value,
integer values that can not be accurately represented by 32-bit floating point, or values
that are larger than actual number of unique categories. During training this is
validated but for prediction it's treated as the same as not-chosen category for
performance reasons.
**********

View File

@@ -6,6 +6,6 @@
#define XGBOOST_VER_MAJOR 1
#define XGBOOST_VER_MINOR 7
#define XGBOOST_VER_PATCH 2
#define XGBOOST_VER_PATCH 3
#endif // XGBOOST_VERSION_CONFIG_H_

View File

@@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
<packaging>pom</packaging>
<name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description>

View File

@@ -6,10 +6,10 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
</parent>
<artifactId>xgboost4j-example_2.12</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
<packaging>jar</packaging>
<build>
<plugins>
@@ -26,7 +26,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
@@ -37,7 +37,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
</parent>
<artifactId>xgboost4j-flink_2.12</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
<build>
<plugins>
<plugin>
@@ -26,7 +26,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>

View File

@@ -6,10 +6,10 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
</parent>
<artifactId>xgboost4j-gpu_2.12</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
<packaging>jar</packaging>
<dependencies>

View File

@@ -6,7 +6,7 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
</parent>
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
<build>
@@ -24,7 +24,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>

View File

@@ -6,7 +6,7 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
</parent>
<artifactId>xgboost4j-spark_2.12</artifactId>
<build>
@@ -24,7 +24,7 @@
<dependency>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>

View File

@@ -1,9 +1,9 @@
from sklearn.datasets import load_iris
import numpy as np
import pandas
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
y = y.astype(np.int)
y = y.astype(np.int32)
df = pandas.DataFrame(data=X, columns=['sepal length', 'sepal width', 'petal length', 'petal width'])
class_id_to_name = {0:'Iris-setosa', 1:'Iris-versicolor', 2:'Iris-virginica'}
df['class'] = np.vectorize(class_id_to_name.get)(y)

View File

@@ -6,10 +6,10 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
</parent>
<artifactId>xgboost4j_2.12</artifactId>
<version>1.7.2</version>
<version>1.7.3</version>
<packaging>jar</packaging>
<dependencies>

View File

@@ -1 +1 @@
1.7.2
1.7.3

View File

@@ -674,7 +674,7 @@ class XGBModel(XGBModelBase):
self.kwargs = {}
self.kwargs[key] = value
if hasattr(self, "_Booster"):
if self.__sklearn_is_fitted__():
parameters = self.get_xgb_params()
self.get_booster().set_param(parameters)
@@ -701,39 +701,12 @@ class XGBModel(XGBModelBase):
np.iinfo(np.int32).max
)
def parse_parameter(value: Any) -> Optional[Union[int, float, str]]:
for t in (int, float, str):
try:
ret = t(value)
return ret
except ValueError:
continue
return None
# Get internal parameter values
try:
config = json.loads(self.get_booster().save_config())
stack = [config]
internal = {}
while stack:
obj = stack.pop()
for k, v in obj.items():
if k.endswith("_param"):
for p_k, p_v in v.items():
internal[p_k] = p_v
elif isinstance(v, dict):
stack.append(v)
for k, v in internal.items():
if k in params and params[k] is None:
params[k] = parse_parameter(v)
except ValueError:
pass
return params
def get_xgb_params(self) -> Dict[str, Any]:
"""Get xgboost specific parameters."""
params = self.get_params()
params: Dict[str, Any] = self.get_params()
# Parameters that should not go into native learner.
wrapper_specific = {
"importance_type",
@@ -750,6 +723,7 @@ class XGBModel(XGBModelBase):
for k, v in params.items():
if k not in wrapper_specific and not callable(v):
filtered[k] = v
return filtered
def get_num_boosting_rounds(self) -> int:
@@ -1070,7 +1044,7 @@ class XGBModel(XGBModelBase):
# error with incompatible data type.
# Inplace predict doesn't handle as many data types as DMatrix, but it's
# sufficient for dask interface where input is simpiler.
predictor = self.get_params().get("predictor", None)
predictor = self.get_xgb_params().get("predictor", None)
if predictor in ("auto", None) and self.booster != "gblinear":
return True
return False
@@ -1336,7 +1310,7 @@ class XGBModel(XGBModelBase):
-------
coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]``
"""
if self.get_params()["booster"] != "gblinear":
if self.get_xgb_params()["booster"] != "gblinear":
raise AttributeError(
f"Coefficients are not defined for Booster type {self.booster}"
)
@@ -1366,7 +1340,7 @@ class XGBModel(XGBModelBase):
-------
intercept_ : array of shape ``(1,)`` or ``[n_classes]``
"""
if self.get_params()["booster"] != "gblinear":
if self.get_xgb_params()["booster"] != "gblinear":
raise AttributeError(
f"Intercept (bias) is not defined for Booster type {self.booster}"
)

View File

@@ -48,20 +48,21 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) {
return cat < 0 || cat >= kMaxCat;
}
/* \brief Whether should it traverse to left branch of a tree.
/**
* \brief Whether should it traverse to left branch of a tree.
*
* For one hot split, go to left if it's NOT the matching category.
* Go to left if it's NOT the matching category, which matches one-hot encoding.
*/
template <bool validate = true>
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat, bool dft_left) {
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat) {
KCatBitField const s_cats(cats);
// FIXME: Size() is not accurate since it represents the size of bit set instead of
// actual number of categories.
if (XGBOOST_EXPECT(validate && (InvalidCat(cat) || cat >= s_cats.Size()), false)) {
return dft_left;
if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
return true;
}
auto pos = KCatBitField::ToBitPos(cat);
// If the input category is larger than the size of the bit field, it implies that the
// category is not chosen. Otherwise the bit field would have the category instead of
// being smaller than the category value.
if (pos.int_pos >= cats.size()) {
return true;
}

View File

@@ -62,7 +62,7 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView<T, D> t,
#endif // !defined(XGBOOST_USE_CUDA)
template <typename T, std::int32_t kDim>
auto cbegin(TensorView<T, kDim> v) { // NOLINT
auto cbegin(TensorView<T, kDim> const& v) { // NOLINT
auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t<T> const& {
return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape()));
});
@@ -70,19 +70,19 @@ auto cbegin(TensorView<T, kDim> v) { // NOLINT
}
template <typename T, std::int32_t kDim>
auto cend(TensorView<T, kDim> v) { // NOLINT
auto cend(TensorView<T, kDim> const& v) { // NOLINT
return cbegin(v) + v.Size();
}
template <typename T, std::int32_t kDim>
auto begin(TensorView<T, kDim> v) { // NOLINT
auto begin(TensorView<T, kDim>& v) { // NOLINT
auto it = common::MakeIndexTransformIter(
[&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); });
return it;
}
template <typename T, std::int32_t kDim>
auto end(TensorView<T, kDim> v) { // NOLINT
auto end(TensorView<T, kDim>& v) { // NOLINT
return begin(v) + v.Size();
}
} // namespace linalg

View File

@@ -144,7 +144,7 @@ class PartitionBuilder {
auto gidx = gidx_calc(ridx);
bool go_left = default_left;
if (gidx > -1) {
go_left = Decision(node_cats, cut_values[gidx], default_left);
go_left = Decision(node_cats, cut_values[gidx]);
}
return go_left;
} else {
@@ -157,7 +157,7 @@ class PartitionBuilder {
bool go_left = default_left;
if (gidx > -1) {
if (is_cat) {
go_left = Decision(node_cats, cut_values[gidx], default_left);
go_left = Decision(node_cats, cut_values[gidx]);
} else {
go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value;
}

View File

@@ -28,6 +28,7 @@
#include "xgboost/logging.h"
#include "xgboost/objective.h"
#include "xgboost/predictor.h"
#include "xgboost/string_view.h"
#include "xgboost/tree_updater.h"
namespace xgboost {
@@ -395,23 +396,36 @@ void GBTree::LoadConfig(Json const& in) {
tparam_.process_type = TreeProcessType::kDefault;
int32_t const n_gpus = xgboost::common::AllVisibleGPUs();
if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) {
LOG(WARNING)
<< "Loading from a raw memory buffer on CPU only machine. "
"Changing predictor to auto.";
LOG(WARNING) << "Loading from a raw memory buffer on CPU only machine. "
"Changing predictor to auto.";
tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}});
}
auto msg = StringView{
R"(
Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only
machine. Consider using `save_model/load_model` instead. See:
https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
for more details about differences between saving model and serializing.)"};
if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) {
tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}});
LOG(WARNING)
<< "Loading from a raw memory buffer on CPU only machine. "
"Changing tree_method to hist.";
LOG(WARNING) << msg << " Changing `tree_method` to `hist`.";
}
auto const& j_updaters = get<Object const>(in["updater"]);
updaters_.clear();
for (auto const& kv : j_updaters) {
std::unique_ptr<TreeUpdater> up(
TreeUpdater::Create(kv.first, ctx_, model_.learner_model_param->task));
auto name = kv.first;
if (n_gpus == 0 && name == "grow_gpu_hist") {
name = "grow_quantile_histmaker";
LOG(WARNING) << "Changing updater from `grow_gpu_hist` to `grow_quantile_histmaker`.";
}
std::unique_ptr<TreeUpdater> up{
TreeUpdater::Create(name, ctx_, model_.learner_model_param->task)};
up->LoadConfig(kv.second);
updaters_.push_back(std::move(up));
}

View File

@@ -18,9 +18,7 @@ inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bs
if (has_categorical && common::IsCat(cats.split_type, nid)) {
auto node_categories =
cats.categories.subspan(cats.node_ptr[nid].beg, cats.node_ptr[nid].size);
return common::Decision<true>(node_categories, fvalue, node.DefaultLeft())
? node.LeftChild()
: node.RightChild();
return common::Decision(node_categories, fvalue) ? node.LeftChild() : node.RightChild();
} else {
return node.LeftChild() + !(fvalue < node.SplitCond());
}

View File

@@ -403,8 +403,7 @@ struct GPUHistMakerDevice {
go_left = data.split_node.DefaultLeft();
} else {
if (data.split_type == FeatureType::kCategorical) {
go_left = common::Decision<false>(data.node_cats.Bits(), cut_value,
data.split_node.DefaultLeft());
go_left = common::Decision(data.node_cats.Bits(), cut_value);
} else {
go_left = cut_value <= data.split_node.SplitCond();
}
@@ -481,7 +480,7 @@ struct GPUHistMakerDevice {
if (common::IsCat(d_feature_types, position)) {
auto node_cats = categories.subspan(categories_segments[position].beg,
categories_segments[position].size);
go_left = common::Decision<false>(node_cats, element, node.DefaultLeft());
go_left = common::Decision(node_cats, element);
} else {
go_left = element <= node.SplitCond();
}

View File

@@ -4,7 +4,7 @@ set -euo pipefail
source tests/buildkite/conftest.sh
echo "--- Run Google Tests with CUDA, using 4 GPUs"
echo "--- Run Google Tests with CUDA, using a GPU"
buildkite-agent artifact download "build/testxgboost" . --step build-cuda
chmod +x build/testxgboost
tests/ci_build/ci_build.sh gpu nvidia-docker \
@@ -12,11 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \
build/testxgboost
echo "--- Run Google Tests with CUDA, using 4 GPUs, RMM enabled"
rm -rfv build/
buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
chmod +x build/testxgboost
tests/ci_build/ci_build.sh rmm nvidia-docker \
--build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
--build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
"source activate gpu_test && build/testxgboost --use-rmm-pool"
# Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved
# echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled"
# rm -rfv build/
# buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm
# chmod +x build/testxgboost
# tests/ci_build/ci_build.sh rmm nvidia-docker \
# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \
# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \
# "source activate gpu_test && build/testxgboost --use-rmm-pool"

View File

@@ -1,11 +1,14 @@
/*!
* Copyright 2021 by XGBoost Contributors
* Copyright 2021-2022 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/json.h>
#include <xgboost/learner.h>
#include <limits>
#include "../../../src/common/categorical.h"
#include "../helpers.h"
namespace xgboost {
namespace common {
@@ -15,29 +18,76 @@ TEST(Categorical, Decision) {
ASSERT_TRUE(common::InvalidCat(a));
std::vector<uint32_t> cats(256, 0);
ASSERT_TRUE(Decision(cats, a, true));
ASSERT_TRUE(Decision(cats, a));
// larger than size
a = 256;
ASSERT_TRUE(Decision(cats, a, true));
ASSERT_TRUE(Decision(cats, a));
// negative
a = -1;
ASSERT_TRUE(Decision(cats, a, true));
ASSERT_TRUE(Decision(cats, a));
CatBitField bits{cats};
bits.Set(0);
a = -0.5;
ASSERT_TRUE(Decision(cats, a, true));
ASSERT_TRUE(Decision(cats, a));
// round toward 0
a = 0.5;
ASSERT_FALSE(Decision(cats, a, true));
ASSERT_FALSE(Decision(cats, a));
// valid
a = 13;
bits.Set(a);
ASSERT_FALSE(Decision(bits.Bits(), a, true));
ASSERT_FALSE(Decision(bits.Bits(), a));
}
/**
* Test for running inference with input category greater than the one stored in tree.
*/
TEST(Categorical, MinimalSet) {
std::size_t constexpr kRows = 256, kCols = 1, kCat = 3;
std::vector<FeatureType> types{FeatureType::kCategorical};
auto Xy =
RandomDataGenerator{kRows, kCols, 0.0}.Type(types).MaxCategory(kCat).GenerateDMatrix(true);
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
learner->SetParam("max_depth", "1");
learner->SetParam("tree_method", "hist");
learner->Configure();
learner->UpdateOneIter(0, Xy);
Json model{Object{}};
learner->SaveModel(&model);
auto tree = model["learner"]["gradient_booster"]["model"]["trees"][0];
ASSERT_GE(get<I32Array const>(tree["categories"]).size(), 1);
auto v = get<I32Array const>(tree["categories"])[0];
HostDeviceVector<float> predt;
{
std::vector<float> data{static_cast<float>(kCat),
static_cast<float>(kCat + 1), 32.0f, 33.0f, 34.0f};
auto test = GetDMatrixFromData(data, data.size(), kCols);
learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
ASSERT_EQ(predt.Size(), data.size());
auto const& h_predt = predt.ConstHostSpan();
for (auto v : h_predt) {
ASSERT_EQ(v, 1); // left child of root node
}
}
{
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
learner->LoadModel(model);
std::vector<float> data = {static_cast<float>(v)};
auto test = GetDMatrixFromData(data, data.size(), kCols);
learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
auto const& h_predt = predt.ConstHostSpan();
for (auto v : h_predt) {
ASSERT_EQ(v, 2); // right child of root node
}
}
}
} // namespace common
} // namespace xgboost

View File

@@ -112,7 +112,6 @@ class TestPandas:
# test Index as columns
df = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=pd.Index([1, 2]))
print(df.columns, isinstance(df.columns, pd.Index))
Xy = xgb.DMatrix(df)
np.testing.assert_equal(np.array(Xy.feature_names), np.array(["1", "2"]))

View File

@@ -4,7 +4,7 @@ import pytest
try:
import shap
except ImportError:
except Exception:
shap = None
pass

View File

@@ -2,6 +2,7 @@ import collections
import importlib.util
import json
import os
import pickle
import random
import tempfile
from typing import Callable, Optional
@@ -636,26 +637,74 @@ def test_sklearn_n_jobs():
def test_parameters_access():
from sklearn import datasets
params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1}
params = {"updater": "grow_gpu_hist", "subsample": 0.5, "n_jobs": -1}
clf = xgb.XGBClassifier(n_estimators=1000, **params)
assert clf.get_params()['updater'] == 'grow_gpu_hist'
assert clf.get_params()['subsample'] == .5
assert clf.get_params()['n_estimators'] == 1000
assert clf.get_params()["updater"] == "grow_gpu_hist"
assert clf.get_params()["subsample"] == 0.5
assert clf.get_params()["n_estimators"] == 1000
clf = xgb.XGBClassifier(n_estimators=1, nthread=4)
X, y = datasets.load_iris(return_X_y=True)
clf.fit(X, y)
config = json.loads(clf.get_booster().save_config())
assert int(config['learner']['generic_param']['nthread']) == 4
assert int(config["learner"]["generic_param"]["nthread"]) == 4
clf.set_params(nthread=16)
config = json.loads(clf.get_booster().save_config())
assert int(config['learner']['generic_param']['nthread']) == 16
assert int(config["learner"]["generic_param"]["nthread"]) == 16
clf.predict(X)
config = json.loads(clf.get_booster().save_config())
assert int(config['learner']['generic_param']['nthread']) == 16
assert int(config["learner"]["generic_param"]["nthread"]) == 16
clf = xgb.XGBClassifier(n_estimators=2)
assert clf.tree_method is None
assert clf.get_params()["tree_method"] is None
clf.fit(X, y)
assert clf.get_params()["tree_method"] is None
def save_load(clf: xgb.XGBClassifier) -> xgb.XGBClassifier:
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "model.json")
clf.save_model(path)
clf = xgb.XGBClassifier()
clf.load_model(path)
return clf
def get_tm(clf: xgb.XGBClassifier) -> str:
tm = json.loads(clf.get_booster().save_config())["learner"]["gradient_booster"][
"gbtree_train_param"
]["tree_method"]
return tm
assert get_tm(clf) == "exact"
clf = pickle.loads(pickle.dumps(clf))
assert clf.tree_method is None
assert clf.n_estimators == 2
assert clf.get_params()["tree_method"] is None
assert clf.get_params()["n_estimators"] == 2
assert get_tm(clf) == "exact" # preserved for pickle
clf = save_load(clf)
assert clf.tree_method is None
assert clf.n_estimators == 2
assert clf.get_params()["tree_method"] is None
assert clf.get_params()["n_estimators"] == 2
assert get_tm(clf) == "auto" # discarded for save/load_model
clf.set_params(tree_method="hist")
assert clf.get_params()["tree_method"] == "hist"
clf = pickle.loads(pickle.dumps(clf))
assert clf.get_params()["tree_method"] == "hist"
clf = save_load(clf)
# FIXME(jiamingy): We should remove this behavior once we remove parameters
# serialization for skl save/load_model.
assert clf.get_params()["tree_method"] == "hist"
def test_kwargs_error():
@@ -695,13 +744,19 @@ def test_sklearn_clone():
def test_sklearn_get_default_params():
from sklearn.datasets import load_digits
digits_2class = load_digits(n_class=2)
X = digits_2class['data']
y = digits_2class['target']
X = digits_2class["data"]
y = digits_2class["target"]
cls = xgb.XGBClassifier()
assert cls.get_params()['base_score'] is None
assert cls.get_params()["base_score"] is None
cls.fit(X[:4, ...], y[:4, ...])
assert cls.get_params()['base_score'] is not None
base_score = float(
json.loads(cls.get_booster().save_config())["learner"]["learner_model_param"][
"base_score"
]
)
np.testing.assert_equal(base_score, 0.5)
def run_validation_weights(model):
@@ -1029,9 +1084,9 @@ def test_pandas_input():
clf_isotonic = CalibratedClassifierCV(model, cv="prefit", method="isotonic")
clf_isotonic.fit(train, target)
assert isinstance(
clf_isotonic.calibrated_classifiers_[0].base_estimator, xgb.XGBClassifier
)
clf = clf_isotonic.calibrated_classifiers_[0]
est = clf.estimator if hasattr(clf, "estimator") else clf.base_estimator
assert isinstance(est, xgb.XGBClassifier)
np.testing.assert_allclose(np.array(clf_isotonic.classes_), np.array([0, 1]))