Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
584b45a9cc | ||
|
|
30c1b5c54c | ||
|
|
36e247aca4 | ||
|
|
c4aff733bb | ||
|
|
cdbfd21d31 | ||
|
|
508a0b0dbd | ||
|
|
e04e773f9f | ||
|
|
1debabb321 |
@@ -2,7 +2,7 @@ Package: xgboost
|
|||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 1.5.0.1
|
Version: 1.5.0.1
|
||||||
Date: 2020-08-28
|
Date: 2021-10-13
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person("Tianqi", "Chen", role = c("aut"),
|
person("Tianqi", "Chen", role = c("aut"),
|
||||||
email = "tianqi.tchen@gmail.com"),
|
email = "tianqi.tchen@gmail.com"),
|
||||||
|
|||||||
@@ -11,6 +11,7 @@
|
|||||||
#' @param missing a float value to represents missing values in data (used only when input is a dense matrix).
|
#' @param missing a float value to represents missing values in data (used only when input is a dense matrix).
|
||||||
#' It is useful when a 0 or some other extreme value represents missing values in data.
|
#' It is useful when a 0 or some other extreme value represents missing values in data.
|
||||||
#' @param silent whether to suppress printing an informational message after loading from a file.
|
#' @param silent whether to suppress printing an informational message after loading from a file.
|
||||||
|
#' @param nthread Number of threads used for creating DMatrix.
|
||||||
#' @param ... the \code{info} data could be passed directly as parameters, without creating an \code{info} list.
|
#' @param ... the \code{info} data could be passed directly as parameters, without creating an \code{info} list.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
|
|||||||
@@ -4,7 +4,14 @@
|
|||||||
\alias{xgb.DMatrix}
|
\alias{xgb.DMatrix}
|
||||||
\title{Construct xgb.DMatrix object}
|
\title{Construct xgb.DMatrix object}
|
||||||
\usage{
|
\usage{
|
||||||
xgb.DMatrix(data, info = list(), missing = NA, silent = FALSE, ...)
|
xgb.DMatrix(
|
||||||
|
data,
|
||||||
|
info = list(),
|
||||||
|
missing = NA,
|
||||||
|
silent = FALSE,
|
||||||
|
nthread = NULL,
|
||||||
|
...
|
||||||
|
)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{data}{a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
|
\item{data}{a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
|
||||||
@@ -18,6 +25,8 @@ It is useful when a 0 or some other extreme value represents missing values in d
|
|||||||
|
|
||||||
\item{silent}{whether to suppress printing an informational message after loading from a file.}
|
\item{silent}{whether to suppress printing an informational message after loading from a file.}
|
||||||
|
|
||||||
|
\item{nthread}{Number of threads used for creating DMatrix.}
|
||||||
|
|
||||||
\item{...}{the \code{info} data could be passed directly as parameters, without creating an \code{info} list.}
|
\item{...}{the \code{info} data could be passed directly as parameters, without creating an \code{info} list.}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-dev
|
@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@
|
||||||
@@ -18,7 +18,7 @@ Making a Release
|
|||||||
|
|
||||||
1. Create an issue for the release, noting the estimated date and expected features or major fixes, pin that issue.
|
1. Create an issue for the release, noting the estimated date and expected features or major fixes, pin that issue.
|
||||||
2. Bump release version.
|
2. Bump release version.
|
||||||
1. Modify ``CMakeLists.txt`` source tree, run CMake.
|
1. Modify ``CMakeLists.txt`` in source tree and ``cmake/Python_version.in`` if needed, run CMake.
|
||||||
2. Modify ``DESCRIPTION`` in R-package.
|
2. Modify ``DESCRIPTION`` in R-package.
|
||||||
3. Run ``change_version.sh`` in ``jvm-packages/dev``
|
3. Run ``change_version.sh`` in ``jvm-packages/dev``
|
||||||
3. Commit the change, create a PR on GitHub on release branch. Port the bumped version to default branch, optionally with the postfix ``SNAPSHOT``.
|
3. Commit the change, create a PR on GitHub on release branch. Port the bumped version to default branch, optionally with the postfix ``SNAPSHOT``.
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>XGBoost JVM Package</name>
|
<name>XGBoost JVM Package</name>
|
||||||
<description>JVM Package for XGBoost</description>
|
<description>JVM Package for XGBoost</description>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-example_2.12</artifactId>
|
<artifactId>xgboost4j-example_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
@@ -37,7 +37,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-flink_2.12</artifactId>
|
<artifactId>xgboost4j-flink_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-gpu_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j-spark_2.12</artifactId>
|
<artifactId>xgboost4j-spark_2.12</artifactId>
|
||||||
<build>
|
<build>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
|
|||||||
@@ -6,10 +6,10 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>ml.dmlc</groupId>
|
<groupId>ml.dmlc</groupId>
|
||||||
<artifactId>xgboost-jvm_2.12</artifactId>
|
<artifactId>xgboost-jvm_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>xgboost4j_2.12</artifactId>
|
<artifactId>xgboost4j_2.12</artifactId>
|
||||||
<version>1.5.0-SNAPSHOT</version>
|
<version>1.5.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
1.5.0-dev
|
1.5.0
|
||||||
|
|||||||
@@ -174,7 +174,9 @@ __model_doc = f'''
|
|||||||
Device ordinal.
|
Device ordinal.
|
||||||
validate_parameters : Optional[bool]
|
validate_parameters : Optional[bool]
|
||||||
Give warnings for unknown parameter.
|
Give warnings for unknown parameter.
|
||||||
|
predictor : Optional[str]
|
||||||
|
Force XGBoost to use specific predictor, available choices are [cpu_predictor,
|
||||||
|
gpu_predictor].
|
||||||
enable_categorical : bool
|
enable_categorical : bool
|
||||||
|
|
||||||
.. versionadded:: 1.5.0
|
.. versionadded:: 1.5.0
|
||||||
@@ -807,7 +809,11 @@ class XGBModel(XGBModelBase):
|
|||||||
# Inplace predict doesn't handle as many data types as DMatrix, but it's
|
# Inplace predict doesn't handle as many data types as DMatrix, but it's
|
||||||
# sufficient for dask interface where input is simpiler.
|
# sufficient for dask interface where input is simpiler.
|
||||||
predictor = self.get_params().get("predictor", None)
|
predictor = self.get_params().get("predictor", None)
|
||||||
if predictor in ("auto", None) and self.booster != "gblinear":
|
if (
|
||||||
|
not self.enable_categorical
|
||||||
|
and predictor in ("auto", None)
|
||||||
|
and self.booster != "gblinear"
|
||||||
|
):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -834,7 +840,9 @@ class XGBModel(XGBModelBase):
|
|||||||
iteration_range: Optional[Tuple[int, int]] = None,
|
iteration_range: Optional[Tuple[int, int]] = None,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
"""Predict with `X`. If the model is trained with early stopping, then `best_iteration`
|
"""Predict with `X`. If the model is trained with early stopping, then `best_iteration`
|
||||||
is used automatically.
|
is used automatically. For tree models, when data is on GPU, like cupy array or
|
||||||
|
cuDF dataframe and `predictor` is not specified, the prediction is run on GPU
|
||||||
|
automatically, otherwise it will run on CPU.
|
||||||
|
|
||||||
.. note:: This function is only thread safe for `gbtree` and `dart`.
|
.. note:: This function is only thread safe for `gbtree` and `dart`.
|
||||||
|
|
||||||
@@ -862,6 +870,7 @@ class XGBModel(XGBModelBase):
|
|||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
prediction
|
prediction
|
||||||
|
|
||||||
"""
|
"""
|
||||||
iteration_range = _convert_ntree_limit(
|
iteration_range = _convert_ntree_limit(
|
||||||
self.get_booster(), ntree_limit, iteration_range
|
self.get_booster(), ntree_limit, iteration_range
|
||||||
@@ -886,7 +895,10 @@ class XGBModel(XGBModelBase):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
test = DMatrix(
|
test = DMatrix(
|
||||||
X, base_margin=base_margin, missing=self.missing, nthread=self.n_jobs
|
X, base_margin=base_margin,
|
||||||
|
missing=self.missing,
|
||||||
|
nthread=self.n_jobs,
|
||||||
|
enable_categorical=self.enable_categorical
|
||||||
)
|
)
|
||||||
return self.get_booster().predict(
|
return self.get_booster().predict(
|
||||||
data=test,
|
data=test,
|
||||||
|
|||||||
@@ -472,13 +472,15 @@ def cv(params, dtrain, num_boost_round=10, nfold=3, stratified=False, folds=None
|
|||||||
if is_new_callback:
|
if is_new_callback:
|
||||||
assert all(isinstance(c, callback.TrainingCallback)
|
assert all(isinstance(c, callback.TrainingCallback)
|
||||||
for c in callbacks), "You can't mix new and old callback styles."
|
for c in callbacks), "You can't mix new and old callback styles."
|
||||||
if isinstance(verbose_eval, bool) and verbose_eval:
|
if verbose_eval:
|
||||||
verbose_eval = 1 if verbose_eval is True else verbose_eval
|
verbose_eval = 1 if verbose_eval is True else verbose_eval
|
||||||
callbacks.append(callback.EvaluationMonitor(period=verbose_eval,
|
callbacks.append(
|
||||||
show_stdv=show_stdv))
|
callback.EvaluationMonitor(period=verbose_eval, show_stdv=show_stdv)
|
||||||
|
)
|
||||||
if early_stopping_rounds:
|
if early_stopping_rounds:
|
||||||
callbacks.append(callback.EarlyStopping(
|
callbacks.append(
|
||||||
rounds=early_stopping_rounds, maximize=maximize))
|
callback.EarlyStopping(rounds=early_stopping_rounds, maximize=maximize)
|
||||||
|
)
|
||||||
callbacks = callback.CallbackContainer(callbacks, metric=feval, is_cv=True)
|
callbacks = callback.CallbackContainer(callbacks, metric=feval, is_cv=True)
|
||||||
else:
|
else:
|
||||||
callbacks = _configure_deprecated_callbacks(
|
callbacks = _configure_deprecated_callbacks(
|
||||||
|
|||||||
@@ -291,7 +291,7 @@ float GPUMultiClassAUCOVR(common::Span<float const> predts, MetaInfo const &info
|
|||||||
// labels is a vector of size n_samples.
|
// labels is a vector of size n_samples.
|
||||||
float label = labels[idx % n_samples] == class_id;
|
float label = labels[idx % n_samples] == class_id;
|
||||||
|
|
||||||
float w = get_weight(i % n_samples);
|
float w = weights.empty() ? 1.0f : weights[d_sorted_idx[i] % n_samples];
|
||||||
float fp = (1.0 - label) * w;
|
float fp = (1.0 - label) * w;
|
||||||
float tp = label * w;
|
float tp = label * w;
|
||||||
return thrust::make_pair(fp, tp);
|
return thrust::make_pair(fp, tp);
|
||||||
|
|||||||
@@ -309,10 +309,9 @@ struct EvalGammaNLogLik {
|
|||||||
float constexpr kPsi = 1.0;
|
float constexpr kPsi = 1.0;
|
||||||
bst_float theta = -1. / py;
|
bst_float theta = -1. / py;
|
||||||
bst_float a = kPsi;
|
bst_float a = kPsi;
|
||||||
// b = -std::log(-theta);
|
float b = -std::log(-theta);
|
||||||
float b = 1.0f;
|
// c = 1. / kPsi^2 * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
|
||||||
// c = 1. / kPsi * std::log(y/kPsi) - std::log(y) - common::LogGamma(1. / kPsi);
|
// = 1.0f * std::log(y) - std::log(y) - 0 = 0
|
||||||
// = 1.0f * std::log(y) - std::log(y) - 0 = 0
|
|
||||||
float c = 0;
|
float c = 0;
|
||||||
// general form for exponential family.
|
// general form for exponential family.
|
||||||
return -((y * theta - b) / a + c);
|
return -((y * theta - b) / a + c);
|
||||||
|
|||||||
@@ -143,7 +143,7 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
|
|||||||
}
|
}
|
||||||
|
|
||||||
xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
|
xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
|
||||||
xgboost::HostDeviceVector<xgboost::bst_float> preds,
|
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
|
||||||
std::vector<xgboost::bst_float> labels,
|
std::vector<xgboost::bst_float> labels,
|
||||||
std::vector<xgboost::bst_float> weights,
|
std::vector<xgboost::bst_float> weights,
|
||||||
std::vector<xgboost::bst_uint> groups) {
|
std::vector<xgboost::bst_uint> groups) {
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
|
|||||||
|
|
||||||
xgboost::bst_float GetMetricEval(
|
xgboost::bst_float GetMetricEval(
|
||||||
xgboost::Metric * metric,
|
xgboost::Metric * metric,
|
||||||
xgboost::HostDeviceVector<xgboost::bst_float> preds,
|
xgboost::HostDeviceVector<xgboost::bst_float> const& preds,
|
||||||
std::vector<xgboost::bst_float> labels,
|
std::vector<xgboost::bst_float> labels,
|
||||||
std::vector<xgboost::bst_float> weights = std::vector<xgboost::bst_float>(),
|
std::vector<xgboost::bst_float> weights = std::vector<xgboost::bst_float>(),
|
||||||
std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>());
|
std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>());
|
||||||
|
|||||||
@@ -90,6 +90,16 @@ TEST(Metric, DeclareUnifiedTest(MultiAUC)) {
|
|||||||
},
|
},
|
||||||
{0, 1, 1}); // no class 2.
|
{0, 1, 1}); // no class 2.
|
||||||
EXPECT_TRUE(std::isnan(auc)) << auc;
|
EXPECT_TRUE(std::isnan(auc)) << auc;
|
||||||
|
|
||||||
|
HostDeviceVector<float> predts{
|
||||||
|
0.0f, 1.0f, 0.0f,
|
||||||
|
1.0f, 0.0f, 0.0f,
|
||||||
|
0.0f, 0.0f, 1.0f,
|
||||||
|
0.0f, 0.0f, 1.0f,
|
||||||
|
};
|
||||||
|
std::vector<float> labels {1.0f, 0.0f, 2.0f, 1.0f};
|
||||||
|
auc = GetMetricEval(metric, predts, labels, {1.0f, 2.0f, 3.0f, 4.0f});
|
||||||
|
ASSERT_GT(auc, 0.714);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Metric, DeclareUnifiedTest(RankingAUC)) {
|
TEST(Metric, DeclareUnifiedTest(RankingAUC)) {
|
||||||
|
|||||||
@@ -13,9 +13,11 @@ class TestGPUEvalMetrics:
|
|||||||
def test_roc_auc_binary(self, n_samples):
|
def test_roc_auc_binary(self, n_samples):
|
||||||
self.cpu_test.run_roc_auc_binary("gpu_hist", n_samples)
|
self.cpu_test.run_roc_auc_binary("gpu_hist", n_samples)
|
||||||
|
|
||||||
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
@pytest.mark.parametrize(
|
||||||
def test_roc_auc_multi(self, n_samples):
|
"n_samples,weighted", [(4, False), (100, False), (1000, False), (1000, True)]
|
||||||
self.cpu_test.run_roc_auc_multi("gpu_hist", n_samples)
|
)
|
||||||
|
def test_roc_auc_multi(self, n_samples, weighted):
|
||||||
|
self.cpu_test.run_roc_auc_multi("gpu_hist", n_samples, weighted)
|
||||||
|
|
||||||
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
||||||
def test_roc_auc_ltr(self, n_samples):
|
def test_roc_auc_ltr(self, n_samples):
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ def test_categorical():
|
|||||||
)
|
)
|
||||||
X = pd.DataFrame(X.todense()).astype("category")
|
X = pd.DataFrame(X.todense()).astype("category")
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
|
assert not clf._can_use_inplace_predict()
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tempdir:
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
model = os.path.join(tempdir, "categorial.json")
|
model = os.path.join(tempdir, "categorial.json")
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
from typing import Union
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
@@ -22,29 +23,47 @@ class TestCallbacks:
|
|||||||
cls.X_valid = X[split:, ...]
|
cls.X_valid = X[split:, ...]
|
||||||
cls.y_valid = y[split:, ...]
|
cls.y_valid = y[split:, ...]
|
||||||
|
|
||||||
def run_evaluation_monitor(self, D_train, D_valid, rounds, verbose_eval):
|
def run_evaluation_monitor(
|
||||||
evals_result = {}
|
self,
|
||||||
with tm.captured_output() as (out, err):
|
D_train: xgb.DMatrix,
|
||||||
xgb.train({'objective': 'binary:logistic',
|
D_valid: xgb.DMatrix,
|
||||||
'eval_metric': 'error'}, D_train,
|
rounds: int,
|
||||||
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
verbose_eval: Union[bool, int]
|
||||||
num_boost_round=rounds,
|
):
|
||||||
evals_result=evals_result,
|
def check_output(output: str) -> None:
|
||||||
verbose_eval=verbose_eval)
|
if int(verbose_eval) == 1:
|
||||||
output: str = out.getvalue().strip()
|
# Should print each iteration info
|
||||||
|
assert len(output.split('\n')) == rounds
|
||||||
|
elif int(verbose_eval) > rounds:
|
||||||
|
# Should print first and latest iteration info
|
||||||
|
assert len(output.split('\n')) == 2
|
||||||
|
else:
|
||||||
|
# Should print info by each period additionaly to first and latest
|
||||||
|
# iteration
|
||||||
|
num_periods = rounds // int(verbose_eval)
|
||||||
|
# Extra information is required for latest iteration
|
||||||
|
is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)
|
||||||
|
assert len(output.split('\n')) == (
|
||||||
|
1 + num_periods + int(is_extra_info_required)
|
||||||
|
)
|
||||||
|
|
||||||
if int(verbose_eval) == 1:
|
evals_result: xgb.callback.TrainingCallback.EvalsLog = {}
|
||||||
# Should print each iteration info
|
params = {'objective': 'binary:logistic', 'eval_metric': 'error'}
|
||||||
assert len(output.split('\n')) == rounds
|
with tm.captured_output() as (out, err):
|
||||||
elif int(verbose_eval) > rounds:
|
xgb.train(
|
||||||
# Should print first and latest iteration info
|
params, D_train,
|
||||||
assert len(output.split('\n')) == 2
|
evals=[(D_train, 'Train'), (D_valid, 'Valid')],
|
||||||
else:
|
num_boost_round=rounds,
|
||||||
# Should print info by each period additionaly to first and latest iteration
|
evals_result=evals_result,
|
||||||
num_periods = rounds // int(verbose_eval)
|
verbose_eval=verbose_eval,
|
||||||
# Extra information is required for latest iteration
|
)
|
||||||
is_extra_info_required = num_periods * int(verbose_eval) < (rounds - 1)
|
output: str = out.getvalue().strip()
|
||||||
assert len(output.split('\n')) == 1 + num_periods + int(is_extra_info_required)
|
check_output(output)
|
||||||
|
|
||||||
|
with tm.captured_output() as (out, err):
|
||||||
|
xgb.cv(params, D_train, num_boost_round=rounds, verbose_eval=verbose_eval)
|
||||||
|
output = out.getvalue().strip()
|
||||||
|
check_output(output)
|
||||||
|
|
||||||
def test_evaluation_monitor(self):
|
def test_evaluation_monitor(self):
|
||||||
D_train = xgb.DMatrix(self.X_train, self.y_train)
|
D_train = xgb.DMatrix(self.X_train, self.y_train)
|
||||||
|
|||||||
@@ -124,6 +124,35 @@ class TestEvalMetrics:
|
|||||||
skl_gamma_dev = mean_gamma_deviance(y, score)
|
skl_gamma_dev = mean_gamma_deviance(y, score)
|
||||||
np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)
|
np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
|
def test_gamma_lik(self) -> None:
|
||||||
|
import scipy.stats as stats
|
||||||
|
rng = np.random.default_rng(1994)
|
||||||
|
n_samples = 32
|
||||||
|
n_features = 10
|
||||||
|
|
||||||
|
X = rng.normal(0, 1, size=n_samples * n_features).reshape((n_samples, n_features))
|
||||||
|
|
||||||
|
alpha, loc, beta = 5.0, 11.1, 22
|
||||||
|
y = stats.gamma.rvs(alpha, loc=loc, scale=beta, size=n_samples, random_state=rng)
|
||||||
|
reg = xgb.XGBRegressor(tree_method="hist", objective="reg:gamma", n_estimators=64)
|
||||||
|
reg.fit(X, y, eval_metric="gamma-nloglik", eval_set=[(X, y)])
|
||||||
|
|
||||||
|
score = reg.predict(X)
|
||||||
|
|
||||||
|
booster = reg.get_booster()
|
||||||
|
nloglik = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1].split(":")[0])
|
||||||
|
|
||||||
|
# \beta_i = - (1 / \theta_i a)
|
||||||
|
# where \theta_i is the canonical parameter
|
||||||
|
# XGBoost uses the canonical link function of gamma in evaluation function.
|
||||||
|
# so \theta = - (1.0 / y)
|
||||||
|
# dispersion is hardcoded as 1.0, so shape (a in scipy parameter) is also 1.0
|
||||||
|
beta = - (1.0 / (- (1.0 / y))) # == y
|
||||||
|
nloglik_stats = -stats.gamma.logpdf(score, a=1.0, scale=beta)
|
||||||
|
|
||||||
|
np.testing.assert_allclose(nloglik, np.mean(nloglik_stats), rtol=1e-3)
|
||||||
|
|
||||||
def run_roc_auc_binary(self, tree_method, n_samples):
|
def run_roc_auc_binary(self, tree_method, n_samples):
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.datasets import make_classification
|
from sklearn.datasets import make_classification
|
||||||
@@ -162,11 +191,11 @@ class TestEvalMetrics:
|
|||||||
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
||||||
|
|
||||||
@pytest.mark.skipif(**tm.no_sklearn())
|
@pytest.mark.skipif(**tm.no_sklearn())
|
||||||
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
@pytest.mark.parametrize("n_samples", [100, 1000])
|
||||||
def test_roc_auc(self, n_samples):
|
def test_roc_auc(self, n_samples):
|
||||||
self.run_roc_auc_binary("hist", n_samples)
|
self.run_roc_auc_binary("hist", n_samples)
|
||||||
|
|
||||||
def run_roc_auc_multi(self, tree_method, n_samples):
|
def run_roc_auc_multi(self, tree_method, n_samples, weighted):
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.datasets import make_classification
|
from sklearn.datasets import make_classification
|
||||||
from sklearn.metrics import roc_auc_score
|
from sklearn.metrics import roc_auc_score
|
||||||
@@ -184,8 +213,14 @@ class TestEvalMetrics:
|
|||||||
n_classes=n_classes,
|
n_classes=n_classes,
|
||||||
random_state=rng
|
random_state=rng
|
||||||
)
|
)
|
||||||
|
if weighted:
|
||||||
|
weights = rng.randn(n_samples)
|
||||||
|
weights -= weights.min()
|
||||||
|
weights /= weights.max()
|
||||||
|
else:
|
||||||
|
weights = None
|
||||||
|
|
||||||
Xy = xgb.DMatrix(X, y)
|
Xy = xgb.DMatrix(X, y, weight=weights)
|
||||||
booster = xgb.train(
|
booster = xgb.train(
|
||||||
{
|
{
|
||||||
"tree_method": tree_method,
|
"tree_method": tree_method,
|
||||||
@@ -197,16 +232,22 @@ class TestEvalMetrics:
|
|||||||
num_boost_round=8,
|
num_boost_round=8,
|
||||||
)
|
)
|
||||||
score = booster.predict(Xy)
|
score = booster.predict(Xy)
|
||||||
skl_auc = roc_auc_score(y, score, average="weighted", multi_class="ovr")
|
skl_auc = roc_auc_score(
|
||||||
|
y, score, average="weighted", sample_weight=weights, multi_class="ovr"
|
||||||
|
)
|
||||||
auc = float(booster.eval(Xy).split(":")[1])
|
auc = float(booster.eval(Xy).split(":")[1])
|
||||||
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
||||||
|
|
||||||
X = rng.randn(*X.shape)
|
X = rng.randn(*X.shape)
|
||||||
score = booster.predict(xgb.DMatrix(X))
|
score = booster.predict(xgb.DMatrix(X, weight=weights))
|
||||||
skl_auc = roc_auc_score(y, score, average="weighted", multi_class="ovr")
|
skl_auc = roc_auc_score(
|
||||||
auc = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1])
|
y, score, average="weighted", sample_weight=weights, multi_class="ovr"
|
||||||
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
)
|
||||||
|
auc = float(booster.eval(xgb.DMatrix(X, y, weight=weights)).split(":")[1])
|
||||||
|
np.testing.assert_allclose(skl_auc, auc, rtol=1e-5)
|
||||||
|
|
||||||
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
@pytest.mark.parametrize(
|
||||||
def test_roc_auc_multi(self, n_samples):
|
"n_samples,weighted", [(4, False), (100, False), (1000, False), (1000, True)]
|
||||||
self.run_roc_auc_multi("hist", n_samples)
|
)
|
||||||
|
def test_roc_auc_multi(self, n_samples, weighted):
|
||||||
|
self.run_roc_auc_multi("hist", n_samples, weighted)
|
||||||
|
|||||||
Reference in New Issue
Block a user