Compare commits

...

11 Commits

Author SHA1 Message Date
Jiaming Yuan
096047c547
Make 2.0 release. (#9567) 2023-09-12 00:20:49 +08:00
Jiaming Yuan
e75dd75bb2
[backport] [pyspark] support gpu transform (#9542) (#9559)
---------

Co-authored-by: Bobby Wang <wbo4958@gmail.com>
2023-09-07 17:21:09 +08:00
Jiaming Yuan
4d387cbfbf
[backport] [pyspark] rework transform to reuse same code (#9292) (#9558)
Co-authored-by: Bobby Wang <wbo4958@gmail.com>
2023-09-07 15:26:24 +08:00
Jiaming Yuan
3fde9361d7
[backport] Fix inplace predict with fallback when base margin is used. (#9536) (#9548)
- Copy meta info from proxy DMatrix.
- Use `std::call_once` to emit less warnings.
2023-09-05 23:38:06 +08:00
Jiaming Yuan
b67c2ed96d
[backport] [CI] bump setup-r action version. (#9544) (#9551) 2023-09-05 22:10:30 +08:00
Jiaming Yuan
177fd79864
[backport] Fix read the doc configuration. [skip ci] (#9549) 2023-09-05 17:32:00 +08:00
Jiaming Yuan
06487d3896
[backport] Fix GPU categorical split memory allocation. (#9529) (#9535) 2023-08-29 21:14:43 +08:00
Jiaming Yuan
e50ccc4d3c
[R] Fix integer inputs with NA. (#9522) (#9534) 2023-08-29 19:52:13 +08:00
Jiaming Yuan
add57f8880
[backport] Delay the check for vector leaf. (#9509) (#9533) 2023-08-29 18:25:59 +08:00
Jiaming Yuan
a0d3573c74
[backport] Fix device dispatch for linear updater. (#9507) (#9532) 2023-08-29 15:10:43 +08:00
Jiaming Yuan
4301558a57
Make 2.0.0 RC1. (#9492) 2023-08-17 16:16:51 +08:00
36 changed files with 575 additions and 327 deletions

View File

@ -25,7 +25,7 @@ jobs:
with: with:
submodules: 'true' submodules: 'true'
- uses: r-lib/actions/setup-r@50d1eae9b8da0bb3f8582c59a5b82225fa2fe7f2 # v2.3.1 - uses: r-lib/actions/setup-r@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4
with: with:
r-version: ${{ matrix.config.r }} r-version: ${{ matrix.config.r }}
@ -64,7 +64,7 @@ jobs:
with: with:
submodules: 'true' submodules: 'true'
- uses: r-lib/actions/setup-r@50d1eae9b8da0bb3f8582c59a5b82225fa2fe7f2 # v2.3.1 - uses: r-lib/actions/setup-r@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4
with: with:
r-version: ${{ matrix.config.r }} r-version: ${{ matrix.config.r }}

View File

@ -32,4 +32,3 @@ formats:
python: python:
install: install:
- requirements: doc/requirements.txt - requirements: doc/requirements.txt
system_packages: true

View File

@ -2,7 +2,7 @@ Package: xgboost
Type: Package Type: Package
Title: Extreme Gradient Boosting Title: Extreme Gradient Boosting
Version: 2.0.0.1 Version: 2.0.0.1
Date: 2022-10-18 Date: 2023-09-11
Authors@R: c( Authors@R: c(
person("Tianqi", "Chen", role = c("aut"), person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"), email = "tianqi.tchen@gmail.com"),

View File

@ -120,11 +120,25 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) {
ctx.nthread = asInteger(n_threads); ctx.nthread = asInteger(n_threads);
std::int32_t threads = ctx.Threads(); std::int32_t threads = ctx.Threads();
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) { if (is_int) {
for (size_t j = 0; j < ncol; ++j) { xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
data[i * ncol + j] = is_int ? static_cast<float>(iin[i + nrow * j]) : din[i + nrow * j]; for (size_t j = 0; j < ncol; ++j) {
} auto v = iin[i + nrow * j];
}); if (v == NA_INTEGER) {
data[i * ncol + j] = std::numeric_limits<float>::quiet_NaN();
} else {
data[i * ncol + j] = static_cast<float>(v);
}
}
});
} else {
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
for (size_t j = 0; j < ncol; ++j) {
data[i * ncol + j] = din[i + nrow * j];
}
});
}
DMatrixHandle handle; DMatrixHandle handle;
CHECK_CALL(XGDMatrixCreateFromMat_omp(BeginPtr(data), nrow, ncol, CHECK_CALL(XGDMatrixCreateFromMat_omp(BeginPtr(data), nrow, ncol,
asReal(missing), &handle, threads)); asReal(missing), &handle, threads));

View File

@ -56,6 +56,42 @@ test_that("xgb.DMatrix: basic construction", {
expect_equal(raw_fd, raw_dgc) expect_equal(raw_fd, raw_dgc)
}) })
test_that("xgb.DMatrix: NA", {
n_samples <- 3
x <- cbind(
x1 = sample(x = 4, size = n_samples, replace = TRUE),
x2 = sample(x = 4, size = n_samples, replace = TRUE)
)
x[1, "x1"] <- NA
m <- xgb.DMatrix(x)
xgb.DMatrix.save(m, "int.dmatrix")
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
colnames(x) <- c("x1", "x2")
m <- xgb.DMatrix(x)
xgb.DMatrix.save(m, "float.dmatrix")
iconn <- file("int.dmatrix", "rb")
fconn <- file("float.dmatrix", "rb")
expect_equal(file.size("int.dmatrix"), file.size("float.dmatrix"))
bytes <- file.size("int.dmatrix")
idmatrix <- readBin(iconn, "raw", n = bytes)
fdmatrix <- readBin(fconn, "raw", n = bytes)
expect_equal(length(idmatrix), length(fdmatrix))
expect_equal(idmatrix, fdmatrix)
close(iconn)
close(fconn)
file.remove("int.dmatrix")
file.remove("float.dmatrix")
})
test_that("xgb.DMatrix: saving, loading", { test_that("xgb.DMatrix: saving, loading", {
# save to a local file # save to a local file
dtest1 <- xgb.DMatrix(test_data, label = test_label) dtest1 <- xgb.DMatrix(test_data, label = test_label)

View File

@ -329,7 +329,7 @@ Parameters for Linear Booster (``booster=gblinear``)
- Choice of algorithm to fit linear model - Choice of algorithm to fit linear model
- ``shotgun``: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run. - ``shotgun``: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.
- ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. - ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the ``device`` parameter is set to ``cuda`` or ``gpu``, a GPU variant would be used.
* ``feature_selector`` [default= ``cyclic``] * ``feature_selector`` [default= ``cyclic``]

View File

@ -1,5 +1,5 @@
/*! /**
* Copyright 2020 by Contributors * Copyright 2020-2023, XGBoost Contributors
* \file global_config.h * \file global_config.h
* \brief Global configuration for XGBoost * \brief Global configuration for XGBoost
* \author Hyunsu Cho * \author Hyunsu Cho
@ -7,24 +7,22 @@
#ifndef XGBOOST_GLOBAL_CONFIG_H_ #ifndef XGBOOST_GLOBAL_CONFIG_H_
#define XGBOOST_GLOBAL_CONFIG_H_ #define XGBOOST_GLOBAL_CONFIG_H_
#include <xgboost/parameter.h> #include <dmlc/thread_local.h> // for ThreadLocalStore
#include <vector> #include <xgboost/parameter.h> // for XGBoostParameter
#include <string>
#include <cstdint> // for int32_t
namespace xgboost { namespace xgboost {
class Json;
struct GlobalConfiguration : public XGBoostParameter<GlobalConfiguration> { struct GlobalConfiguration : public XGBoostParameter<GlobalConfiguration> {
int verbosity { 1 }; std::int32_t verbosity{1};
bool use_rmm { false }; bool use_rmm{false};
DMLC_DECLARE_PARAMETER(GlobalConfiguration) { DMLC_DECLARE_PARAMETER(GlobalConfiguration) {
DMLC_DECLARE_FIELD(verbosity) DMLC_DECLARE_FIELD(verbosity)
.set_range(0, 3) .set_range(0, 3)
.set_default(1) // shows only warning .set_default(1) // shows only warning
.describe("Flag to print out detailed breakdown of runtime."); .describe("Flag to print out detailed breakdown of runtime.");
DMLC_DECLARE_FIELD(use_rmm) DMLC_DECLARE_FIELD(use_rmm).set_default(false).describe(
.set_default(false) "Whether to use RAPIDS Memory Manager to allocate GPU memory in XGBoost");
.describe("Whether to use RAPIDS Memory Manager to allocate GPU memory in XGBoost");
} }
}; };

View File

@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version> <version>2.0.0</version>
<packaging>pom</packaging> <packaging>pom</packaging>
<name>XGBoost JVM Package</name> <name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description> <description>JVM Package for XGBoost</description>

View File

@ -6,11 +6,11 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version> <version>2.0.0</version>
</parent> </parent>
<name>xgboost4j-example</name> <name>xgboost4j-example</name>
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId> <artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version> <version>2.0.0</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<build> <build>
<plugins> <plugins>

View File

@ -6,12 +6,12 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version> <version>2.0.0</version>
</parent> </parent>
<name>xgboost4j-flink</name> <name>xgboost4j-flink</name>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId> <artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version> <version>2.0.0</version>
<properties> <properties>
<flink-ml.version>2.2.0</flink-ml.version> <flink-ml.version>2.2.0</flink-ml.version>
</properties> </properties>

View File

@ -6,11 +6,11 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version> <version>2.0.0</version>
</parent> </parent>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<name>xgboost4j-gpu</name> <name>xgboost4j-gpu</name>
<version>2.0.0-SNAPSHOT</version> <version>2.0.0</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version> <version>2.0.0</version>
</parent> </parent>
<name>xgboost4j-spark-gpu</name> <name>xgboost4j-spark-gpu</name>
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>

View File

@ -6,7 +6,7 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version> <version>2.0.0</version>
</parent> </parent>
<name>xgboost4j-spark</name> <name>xgboost4j-spark</name>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId> <artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>

View File

@ -6,11 +6,11 @@
<parent> <parent>
<groupId>ml.dmlc</groupId> <groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId> <artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version> <version>2.0.0</version>
</parent> </parent>
<name>xgboost4j</name> <name>xgboost4j</name>
<artifactId>xgboost4j_${scala.binary.version}</artifactId> <artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version> <version>2.0.0</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<dependencies> <dependencies>

View File

@ -7,7 +7,7 @@ build-backend = "packager.pep517"
[project] [project]
name = "xgboost" name = "xgboost"
version = "2.0.0-dev" version = "2.0.0"
authors = [ authors = [
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" }, { name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" } { name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }

View File

@ -1 +1 @@
2.0.0-dev 2.0.0

View File

@ -88,6 +88,18 @@ def is_cudf_available() -> bool:
return False return False
def is_cupy_available() -> bool:
"""Check cupy package available or not"""
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy
return True
except ImportError:
return False
try: try:
import scipy.sparse as scipy_sparse import scipy.sparse as scipy_sparse
from scipy.sparse import csr_matrix as scipy_csr from scipy.sparse import csr_matrix as scipy_csr

View File

@ -59,11 +59,12 @@ from scipy.special import expit, softmax # pylint: disable=no-name-in-module
import xgboost import xgboost
from xgboost import XGBClassifier from xgboost import XGBClassifier
from xgboost.compat import is_cudf_available from xgboost.compat import is_cudf_available, is_cupy_available
from xgboost.core import Booster, _check_distributed_params from xgboost.core import Booster, _check_distributed_params
from xgboost.sklearn import DEFAULT_N_ESTIMATORS, XGBModel, _can_use_qdm from xgboost.sklearn import DEFAULT_N_ESTIMATORS, XGBModel, _can_use_qdm
from xgboost.training import train as worker_train from xgboost.training import train as worker_train
from .._typing import ArrayLike
from .data import ( from .data import (
_read_csr_matrix_from_unwrapped_spark_vec, _read_csr_matrix_from_unwrapped_spark_vec,
alias, alias,
@ -241,6 +242,13 @@ class _SparkXGBParams(
TypeConverters.toList, TypeConverters.toList,
) )
def set_device(self, value: str) -> "_SparkXGBParams":
"""Set device, optional value: cpu, cuda, gpu"""
_check_distributed_params({"device": value})
assert value in ("cpu", "cuda", "gpu")
self.set(self.device, value)
return self
@classmethod @classmethod
def _xgb_cls(cls) -> Type[XGBModel]: def _xgb_cls(cls) -> Type[XGBModel]:
""" """
@ -1117,12 +1125,111 @@ class _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable):
) )
return features_col, feature_col_names return features_col, feature_col_names
def _get_pred_contrib_col_name(self) -> Optional[str]:
"""Return the pred_contrib_col col name"""
pred_contrib_col_name = None
if (
self.isDefined(self.pred_contrib_col)
and self.getOrDefault(self.pred_contrib_col) != ""
):
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
return pred_contrib_col_name
def _out_schema(self) -> Tuple[bool, str]:
"""Return the bool to indicate if it's a single prediction, true is single prediction,
and the returned type of the user-defined function. The value must
be a DDL-formatted type string."""
if self._get_pred_contrib_col_name() is not None:
return False, f"{pred.prediction} double, {pred.pred_contrib} array<double>"
return True, "double"
def _get_predict_func(self) -> Callable:
"""Return the true prediction function which will be running on the executor side"""
predict_params = self._gen_predict_params_dict()
pred_contrib_col_name = self._get_pred_contrib_col_name()
def _predict(
model: XGBModel, X: ArrayLike, base_margin: Optional[ArrayLike]
) -> Union[pd.DataFrame, pd.Series]:
data = {}
preds = model.predict(
X,
base_margin=base_margin,
validate_features=False,
**predict_params,
)
data[pred.prediction] = pd.Series(preds)
if pred_contrib_col_name is not None:
contribs = pred_contribs(model, X, base_margin)
data[pred.pred_contrib] = pd.Series(list(contribs))
return pd.DataFrame(data=data)
return data[pred.prediction]
return _predict
def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:
"""Post process of transform"""
prediction_col_name = self.getOrDefault(self.predictionCol)
single_pred, _ = self._out_schema()
if single_pred:
if prediction_col_name:
dataset = dataset.withColumn(prediction_col_name, pred_col)
else:
pred_struct_col = "_prediction_struct"
dataset = dataset.withColumn(pred_struct_col, pred_col)
if prediction_col_name:
dataset = dataset.withColumn(
prediction_col_name, getattr(col(pred_struct_col), pred.prediction)
)
pred_contrib_col_name = self._get_pred_contrib_col_name()
if pred_contrib_col_name is not None:
dataset = dataset.withColumn(
pred_contrib_col_name,
array_to_vector(getattr(col(pred_struct_col), pred.pred_contrib)),
)
dataset = dataset.drop(pred_struct_col)
return dataset
def _gpu_transform(self) -> bool:
"""If gpu is used to do the prediction, true to gpu prediction"""
if _is_local(_get_spark_session().sparkContext):
# if it's local model, we just use the internal "device"
return use_cuda(self.getOrDefault(self.device))
gpu_per_task = (
_get_spark_session()
.sparkContext.getConf()
.get("spark.task.resource.gpu.amount")
)
# User don't set gpu configurations, just use cpu
if gpu_per_task is None:
if use_cuda(self.getOrDefault(self.device)):
get_logger("XGBoost-PySpark").warning(
"Do the prediction on the CPUs since "
"no gpu configurations are set"
)
return False
# User already sets the gpu configurations, we just use the internal "device".
return use_cuda(self.getOrDefault(self.device))
def _transform(self, dataset: DataFrame) -> DataFrame: def _transform(self, dataset: DataFrame) -> DataFrame:
# pylint: disable=too-many-statements, too-many-locals # pylint: disable=too-many-statements, too-many-locals
# Save xgb_sklearn_model and predict_params to be local variable # Save xgb_sklearn_model and predict_params to be local variable
# to avoid the `self` object to be pickled to remote. # to avoid the `self` object to be pickled to remote.
xgb_sklearn_model = self._xgb_sklearn_model xgb_sklearn_model = self._xgb_sklearn_model
predict_params = self._gen_predict_params_dict()
has_base_margin = False has_base_margin = False
if ( if (
@ -1137,79 +1244,92 @@ class _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable):
features_col, feature_col_names = self._get_feature_col(dataset) features_col, feature_col_names = self._get_feature_col(dataset)
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim) enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)
pred_contrib_col_name = None predict_func = self._get_predict_func()
if (
self.isDefined(self.pred_contrib_col)
and self.getOrDefault(self.pred_contrib_col) != ""
):
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
single_pred = True _, schema = self._out_schema()
schema = "double"
if pred_contrib_col_name: is_local = _is_local(_get_spark_session().sparkContext)
single_pred = False run_on_gpu = self._gpu_transform()
schema = f"{pred.prediction} double, {pred.pred_contrib} array<double>"
@pandas_udf(schema) # type: ignore @pandas_udf(schema) # type: ignore
def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]: def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]:
assert xgb_sklearn_model is not None assert xgb_sklearn_model is not None
model = xgb_sklearn_model model = xgb_sklearn_model
from pyspark import TaskContext
context = TaskContext.get()
assert context is not None
dev_ordinal = -1
if is_cudf_available():
if is_local:
if run_on_gpu and is_cupy_available():
import cupy as cp # pylint: disable=import-error
total_gpus = cp.cuda.runtime.getDeviceCount()
if total_gpus > 0:
partition_id = context.partitionId()
# For transform local mode, default the dev_ordinal to
# (partition id) % gpus.
dev_ordinal = partition_id % total_gpus
elif run_on_gpu:
dev_ordinal = _get_gpu_id(context)
if dev_ordinal >= 0:
device = "cuda:" + str(dev_ordinal)
get_logger("XGBoost-PySpark").info(
"Do the inference with device: %s", device
)
model.set_params(device=device)
else:
get_logger("XGBoost-PySpark").info("Do the inference on the CPUs")
else:
msg = (
"CUDF is unavailable, fallback the inference on the CPUs"
if run_on_gpu
else "Do the inference on the CPUs"
)
get_logger("XGBoost-PySpark").info(msg)
def to_gpu_if_possible(data: ArrayLike) -> ArrayLike:
"""Move the data to gpu if possible"""
if dev_ordinal >= 0:
import cudf # pylint: disable=import-error
import cupy as cp # pylint: disable=import-error
# We must set the device after import cudf, which will change the device id to 0
# See https://github.com/rapidsai/cudf/issues/11386
cp.cuda.runtime.setDevice(dev_ordinal) # pylint: disable=I1101
df = cudf.DataFrame(data)
del data
return df
return data
for data in iterator: for data in iterator:
if enable_sparse_data_optim: if enable_sparse_data_optim:
X = _read_csr_matrix_from_unwrapped_spark_vec(data) X = _read_csr_matrix_from_unwrapped_spark_vec(data)
else: else:
if feature_col_names is not None: if feature_col_names is not None:
X = data[feature_col_names] tmp = data[feature_col_names]
else: else:
X = stack_series(data[alias.data]) tmp = stack_series(data[alias.data])
X = to_gpu_if_possible(tmp)
if has_base_margin: if has_base_margin:
base_margin = data[alias.margin].to_numpy() base_margin = to_gpu_if_possible(data[alias.margin])
else: else:
base_margin = None base_margin = None
data = {} yield predict_func(model, X, base_margin)
preds = model.predict(
X,
base_margin=base_margin,
validate_features=False,
**predict_params,
)
data[pred.prediction] = pd.Series(preds)
if pred_contrib_col_name:
contribs = pred_contribs(model, X, base_margin)
data[pred.pred_contrib] = pd.Series(list(contribs))
yield pd.DataFrame(data=data)
else:
yield data[pred.prediction]
if has_base_margin: if has_base_margin:
pred_col = predict_udf(struct(*features_col, base_margin_col)) pred_col = predict_udf(struct(*features_col, base_margin_col))
else: else:
pred_col = predict_udf(struct(*features_col)) pred_col = predict_udf(struct(*features_col))
prediction_col_name = self.getOrDefault(self.predictionCol) return self._post_transform(dataset, pred_col)
if single_pred:
dataset = dataset.withColumn(prediction_col_name, pred_col)
else:
pred_struct_col = "_prediction_struct"
dataset = dataset.withColumn(pred_struct_col, pred_col)
dataset = dataset.withColumn(
prediction_col_name, getattr(col(pred_struct_col), pred.prediction)
)
if pred_contrib_col_name:
dataset = dataset.withColumn(
pred_contrib_col_name,
array_to_vector(getattr(col(pred_struct_col), pred.pred_contrib)),
)
dataset = dataset.drop(pred_struct_col)
return dataset
class _ClassificationModel( # pylint: disable=abstract-method class _ClassificationModel( # pylint: disable=abstract-method
@ -1221,22 +1341,21 @@ class _ClassificationModel( # pylint: disable=abstract-method
.. Note:: This API is experimental. .. Note:: This API is experimental.
""" """
def _transform(self, dataset: DataFrame) -> DataFrame: def _out_schema(self) -> Tuple[bool, str]:
# pylint: disable=too-many-statements, too-many-locals schema = (
# Save xgb_sklearn_model and predict_params to be local variable f"{pred.raw_prediction} array<double>, {pred.prediction} double,"
# to avoid the `self` object to be pickled to remote. f" {pred.probability} array<double>"
xgb_sklearn_model = self._xgb_sklearn_model )
predict_params = self._gen_predict_params_dict() if self._get_pred_contrib_col_name() is not None:
# We will force setting strict_shape to True when predicting contribs,
# So, it will also output 3-D shape result.
schema = f"{schema}, {pred.pred_contrib} array<array<double>>"
has_base_margin = False return False, schema
if (
self.isDefined(self.base_margin_col) def _get_predict_func(self) -> Callable:
and self.getOrDefault(self.base_margin_col) != "" predict_params = self._gen_predict_params_dict()
): pred_contrib_col_name = self._get_pred_contrib_col_name()
has_base_margin = True
base_margin_col = col(self.getOrDefault(self.base_margin_col)).alias(
alias.margin
)
def transform_margin(margins: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: def transform_margin(margins: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
if margins.ndim == 1: if margins.ndim == 1:
@ -1251,76 +1370,38 @@ class _ClassificationModel( # pylint: disable=abstract-method
class_probs = softmax(raw_preds, axis=1) class_probs = softmax(raw_preds, axis=1)
return raw_preds, class_probs return raw_preds, class_probs
features_col, feature_col_names = self._get_feature_col(dataset) def _predict(
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim) model: XGBModel, X: ArrayLike, base_margin: Optional[np.ndarray]
) -> Union[pd.DataFrame, pd.Series]:
margins = model.predict(
X,
base_margin=base_margin,
output_margin=True,
validate_features=False,
**predict_params,
)
raw_preds, class_probs = transform_margin(margins)
pred_contrib_col_name = None # It seems that they use argmax of class probs,
if ( # not of margin to get the prediction (Note: scala implementation)
self.isDefined(self.pred_contrib_col) preds = np.argmax(class_probs, axis=1)
and self.getOrDefault(self.pred_contrib_col) != "" result: Dict[str, pd.Series] = {
): pred.raw_prediction: pd.Series(list(raw_preds)),
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col) pred.prediction: pd.Series(preds),
pred.probability: pd.Series(list(class_probs)),
}
schema = ( if pred_contrib_col_name is not None:
f"{pred.raw_prediction} array<double>, {pred.prediction} double," contribs = pred_contribs(model, X, base_margin, strict_shape=True)
f" {pred.probability} array<double>" result[pred.pred_contrib] = pd.Series(list(contribs.tolist()))
)
if pred_contrib_col_name:
# We will force setting strict_shape to True when predicting contribs,
# So, it will also output 3-D shape result.
schema = f"{schema}, {pred.pred_contrib} array<array<double>>"
@pandas_udf(schema) # type: ignore return pd.DataFrame(data=result)
def predict_udf(
iterator: Iterator[Tuple[pd.Series, ...]]
) -> Iterator[pd.DataFrame]:
assert xgb_sklearn_model is not None
model = xgb_sklearn_model
for data in iterator:
if enable_sparse_data_optim:
X = _read_csr_matrix_from_unwrapped_spark_vec(data)
else:
if feature_col_names is not None:
X = data[feature_col_names] # type: ignore
else:
X = stack_series(data[alias.data])
if has_base_margin: return _predict
base_margin = stack_series(data[alias.margin])
else:
base_margin = None
margins = model.predict(
X,
base_margin=base_margin,
output_margin=True,
validate_features=False,
**predict_params,
)
raw_preds, class_probs = transform_margin(margins)
# It seems that they use argmax of class probs,
# not of margin to get the prediction (Note: scala implementation)
preds = np.argmax(class_probs, axis=1)
result: Dict[str, pd.Series] = {
pred.raw_prediction: pd.Series(list(raw_preds)),
pred.prediction: pd.Series(preds),
pred.probability: pd.Series(list(class_probs)),
}
if pred_contrib_col_name:
contribs = pred_contribs(model, X, base_margin, strict_shape=True)
result[pred.pred_contrib] = pd.Series(list(contribs.tolist()))
yield pd.DataFrame(data=result)
if has_base_margin:
pred_struct = predict_udf(struct(*features_col, base_margin_col))
else:
pred_struct = predict_udf(struct(*features_col))
def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:
pred_struct_col = "_prediction_struct" pred_struct_col = "_prediction_struct"
dataset = dataset.withColumn(pred_struct_col, pred_struct) dataset = dataset.withColumn(pred_struct_col, pred_col)
raw_prediction_col_name = self.getOrDefault(self.rawPredictionCol) raw_prediction_col_name = self.getOrDefault(self.rawPredictionCol)
if raw_prediction_col_name: if raw_prediction_col_name:
@ -1342,7 +1423,8 @@ class _ClassificationModel( # pylint: disable=abstract-method
array_to_vector(getattr(col(pred_struct_col), pred.probability)), array_to_vector(getattr(col(pred_struct_col), pred.probability)),
) )
if pred_contrib_col_name: pred_contrib_col_name = self._get_pred_contrib_col_name()
if pred_contrib_col_name is not None:
dataset = dataset.withColumn( dataset = dataset.withColumn(
pred_contrib_col_name, pred_contrib_col_name,
getattr(col(pred_struct_col), pred.pred_contrib), getattr(col(pred_struct_col), pred.pred_contrib),

View File

@ -10,7 +10,7 @@ from threading import Thread
from typing import Any, Callable, Dict, Optional, Set, Type from typing import Any, Callable, Dict, Optional, Set, Type
import pyspark import pyspark
from pyspark import BarrierTaskContext, SparkContext, SparkFiles from pyspark import BarrierTaskContext, SparkContext, SparkFiles, TaskContext
from pyspark.sql.session import SparkSession from pyspark.sql.session import SparkSession
from xgboost import Booster, XGBModel, collective from xgboost import Booster, XGBModel, collective
@ -129,7 +129,7 @@ def _is_local(spark_context: SparkContext) -> bool:
return spark_context._jsc.sc().isLocal() return spark_context._jsc.sc().isLocal()
def _get_gpu_id(task_context: BarrierTaskContext) -> int: def _get_gpu_id(task_context: TaskContext) -> int:
"""Get the gpu id from the task resources""" """Get the gpu id from the task resources"""
if task_context is None: if task_context is None:
# This is a safety check. # This is a safety check.

View File

@ -52,7 +52,7 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) {
* *
* Go to left if it's NOT the matching category, which matches one-hot encoding. * Go to left if it's NOT the matching category, which matches one-hot encoding.
*/ */
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat) { inline XGBOOST_DEVICE bool Decision(common::Span<CatBitField::value_type const> cats, float cat) {
KCatBitField const s_cats(cats); KCatBitField const s_cats(cats);
if (XGBOOST_EXPECT(InvalidCat(cat), false)) { if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
return true; return true;

View File

@ -3,9 +3,11 @@
*/ */
#include "error_msg.h" #include "error_msg.h"
#include <mutex> // for call_once, once_flag
#include <sstream> // for stringstream #include <sstream> // for stringstream
#include "../collective/communicator-inl.h" // for GetRank #include "../collective/communicator-inl.h" // for GetRank
#include "xgboost/context.h" // for Context
#include "xgboost/logging.h" #include "xgboost/logging.h"
namespace xgboost::error { namespace xgboost::error {
@ -26,34 +28,43 @@ void WarnDeprecatedGPUHist() {
} }
void WarnManualUpdater() { void WarnManualUpdater() {
bool static thread_local logged{false}; static std::once_flag flag;
if (logged) { std::call_once(flag, [] {
return; LOG(WARNING)
} << "You have manually specified the `updater` parameter. The `tree_method` parameter "
LOG(WARNING) "will be ignored. Incorrect sequence of updaters will produce undefined "
<< "You have manually specified the `updater` parameter. The `tree_method` parameter " "behavior. For common uses, we recommend using `tree_method` parameter instead.";
"will be ignored. Incorrect sequence of updaters will produce undefined " });
"behavior. For common uses, we recommend using `tree_method` parameter instead.";
logged = true;
} }
void WarnDeprecatedGPUId() { void WarnDeprecatedGPUId() {
static thread_local bool logged{false}; static std::once_flag flag;
if (logged) { std::call_once(flag, [] {
return; auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device");
} msg += " E.g. device=cpu/cuda/cuda:0";
auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device"); LOG(WARNING) << msg;
msg += " E.g. device=cpu/cuda/cuda:0"; });
LOG(WARNING) << msg;
logged = true;
} }
void WarnEmptyDataset() { void WarnEmptyDataset() {
static thread_local bool logged{false}; static std::once_flag flag;
if (logged) { std::call_once(flag,
return; [] { LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank(); });
} }
LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank();
logged = true; void MismatchedDevices(Context const* booster, Context const* data) {
static std::once_flag flag;
std::call_once(flag, [&] {
LOG(WARNING)
<< "Falling back to prediction using DMatrix due to mismatched devices. This might "
"lead to higher memory usage and slower performance. XGBoost is running on: "
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName() << ".\n"
<< R"(Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.
This warning will only be shown once.
)";
});
} }
} // namespace xgboost::error } // namespace xgboost::error

View File

@ -10,7 +10,8 @@
#include <limits> // for numeric_limits #include <limits> // for numeric_limits
#include <string> // for string #include <string> // for string
#include "xgboost/base.h" // for bst_feature_t #include "xgboost/base.h" // for bst_feature_t
#include "xgboost/context.h" // for Context
#include "xgboost/logging.h" #include "xgboost/logging.h"
#include "xgboost/string_view.h" // for StringView #include "xgboost/string_view.h" // for StringView
@ -94,5 +95,7 @@ constexpr StringView InvalidCUDAOrdinal() {
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU " return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "
"available for using GPU."; "available for using GPU.";
} }
void MismatchedDevices(Context const* booster, Context const* data);
} // namespace xgboost::error } // namespace xgboost::error
#endif // XGBOOST_COMMON_ERROR_MSG_H_ #endif // XGBOOST_COMMON_ERROR_MSG_H_

View File

@ -55,6 +55,7 @@ std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,
} }
CHECK(p_fmat) << "Failed to fallback."; CHECK(p_fmat) << "Failed to fallback.";
p_fmat->Info() = proxy->Info().Copy();
return p_fmat; return p_fmat;
} }
} // namespace xgboost::data } // namespace xgboost::data

View File

@ -1,5 +1,5 @@
/*! /**
* Copyright 2014-2022 by XGBoost Contributors * Copyright 2014-2023, XGBoost Contributors
* \file gblinear.cc * \file gblinear.cc
* \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net * \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
* the update rule is parallel coordinate descent (shotgun) * the update rule is parallel coordinate descent (shotgun)
@ -26,9 +26,9 @@
#include "../common/timer.h" #include "../common/timer.h"
#include "../common/common.h" #include "../common/common.h"
#include "../common/threading_utils.h" #include "../common/threading_utils.h"
#include "../common/error_msg.h"
namespace xgboost { namespace xgboost::gbm {
namespace gbm {
DMLC_REGISTRY_FILE_TAG(gblinear); DMLC_REGISTRY_FILE_TAG(gblinear);
@ -83,7 +83,16 @@ class GBLinear : public GradientBooster {
} }
param_.UpdateAllowUnknown(cfg); param_.UpdateAllowUnknown(cfg);
param_.CheckGPUSupport(); param_.CheckGPUSupport();
updater_.reset(LinearUpdater::Create(param_.updater, ctx_)); if (param_.updater == "gpu_coord_descent") {
LOG(WARNING) << error::DeprecatedFunc("gpu_coord_descent", "2.0.0",
R"(device="cuda", updater="coord_descent")");
}
if (param_.updater == "coord_descent" && ctx_->IsCUDA()) {
updater_.reset(LinearUpdater::Create("gpu_coord_descent", ctx_));
} else {
updater_.reset(LinearUpdater::Create(param_.updater, ctx_));
}
updater_->Configure(cfg); updater_->Configure(cfg);
monitor_.Init("GBLinear"); monitor_.Init("GBLinear");
} }
@ -354,5 +363,4 @@ XGBOOST_REGISTER_GBM(GBLinear, "gblinear")
.set_body([](LearnerModelParam const* booster_config, Context const* ctx) { .set_body([](LearnerModelParam const* booster_config, Context const* ctx) {
return new GBLinear(booster_config, ctx); return new GBLinear(booster_config, ctx);
}); });
} // namespace gbm } // namespace xgboost::gbm
} // namespace xgboost

View File

@ -85,25 +85,6 @@ bool UpdatersMatched(std::vector<std::string> updater_seq,
return name == up->Name(); return name == up->Name();
}); });
} }
void MismatchedDevices(Context const* booster, Context const* data) {
bool thread_local static logged{false};
if (logged) {
return;
}
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. This might "
"lead to higher memory usage and slower performance. XGBoost is running on: "
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName()
<< ".\n"
<< R"(Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.
This warning will only be shown once for each thread. Subsequent warnings made by the
current thread will be suppressed.
)";
logged = true;
}
} // namespace } // namespace
void GBTree::Configure(Args const& cfg) { void GBTree::Configure(Args const& cfg) {
@ -146,14 +127,6 @@ void GBTree::Configure(Args const& cfg) {
if (specified_updater_) { if (specified_updater_) {
error::WarnManualUpdater(); error::WarnManualUpdater();
} }
if (model_.learner_model_param->IsVectorLeaf()) {
CHECK(tparam_.tree_method == TreeMethod::kHist || tparam_.tree_method == TreeMethod::kAuto)
<< "Only the hist tree method is supported for building multi-target trees with vector "
"leaf.";
CHECK(ctx_->IsCPU()) << "GPU is not yet supported for vector leaf.";
}
LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method); LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method);
if (!specified_updater_) { if (!specified_updater_) {
@ -225,6 +198,13 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair, void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
PredictionCacheEntry* predt, ObjFunction const* obj) { PredictionCacheEntry* predt, ObjFunction const* obj) {
if (model_.learner_model_param->IsVectorLeaf()) {
CHECK(tparam_.tree_method == TreeMethod::kHist || tparam_.tree_method == TreeMethod::kAuto)
<< "Only the hist tree method is supported for building multi-target trees with vector "
"leaf.";
CHECK(ctx_->IsCPU()) << "GPU is not yet supported for vector leaf.";
}
TreesOneIter new_trees; TreesOneIter new_trees;
bst_target_t const n_groups = model_.learner_model_param->OutputLength(); bst_target_t const n_groups = model_.learner_model_param->OutputLength();
monitor_.Start("BoostNewTrees"); monitor_.Start("BoostNewTrees");
@ -555,7 +535,7 @@ void GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end); auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees."; CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
if (p_m->Ctx()->Device() != this->ctx_->Device()) { if (p_m->Ctx()->Device() != this->ctx_->Device()) {
MismatchedDevices(this->ctx_, p_m->Ctx()); error::MismatchedDevices(this->ctx_, p_m->Ctx());
CHECK_EQ(out_preds->version, 0); CHECK_EQ(out_preds->version, 0);
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m); auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
CHECK(proxy) << error::InplacePredictProxy(); CHECK(proxy) << error::InplacePredictProxy();
@ -808,7 +788,7 @@ class Dart : public GBTree {
auto n_groups = model_.learner_model_param->num_output_group; auto n_groups = model_.learner_model_param->num_output_group;
if (ctx_->Device() != p_fmat->Ctx()->Device()) { if (ctx_->Device() != p_fmat->Ctx()->Device()) {
MismatchedDevices(ctx_, p_fmat->Ctx()); error::MismatchedDevices(ctx_, p_fmat->Ctx());
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_fmat); auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_fmat);
CHECK(proxy) << error::InplacePredictProxy(); CHECK(proxy) << error::InplacePredictProxy();
auto p_fmat = data::CreateDMatrixFromProxy(ctx_, proxy, missing); auto p_fmat = data::CreateDMatrixFromProxy(ctx_, proxy, missing);

View File

@ -9,8 +9,7 @@
#include "coordinate_common.h" #include "coordinate_common.h"
#include "xgboost/json.h" #include "xgboost/json.h"
namespace xgboost { namespace xgboost::linear {
namespace linear {
DMLC_REGISTER_PARAMETER(CoordinateParam); DMLC_REGISTER_PARAMETER(CoordinateParam);
DMLC_REGISTRY_FILE_TAG(updater_coordinate); DMLC_REGISTRY_FILE_TAG(updater_coordinate);
@ -39,8 +38,9 @@ class CoordinateUpdater : public LinearUpdater {
FromJson(config.at("linear_train_param"), &tparam_); FromJson(config.at("linear_train_param"), &tparam_);
FromJson(config.at("coordinate_param"), &cparam_); FromJson(config.at("coordinate_param"), &cparam_);
} }
void SaveConfig(Json* p_out) const override { void SaveConfig(Json *p_out) const override {
auto& out = *p_out; LOG(DEBUG) << "Save config for CPU updater.";
auto &out = *p_out;
out["linear_train_param"] = ToJson(tparam_); out["linear_train_param"] = ToJson(tparam_);
out["coordinate_param"] = ToJson(cparam_); out["coordinate_param"] = ToJson(cparam_);
} }
@ -99,5 +99,4 @@ class CoordinateUpdater : public LinearUpdater {
XGBOOST_REGISTER_LINEAR_UPDATER(CoordinateUpdater, "coord_descent") XGBOOST_REGISTER_LINEAR_UPDATER(CoordinateUpdater, "coord_descent")
.describe("Update linear model according to coordinate descent algorithm.") .describe("Update linear model according to coordinate descent algorithm.")
.set_body([]() { return new CoordinateUpdater(); }); .set_body([]() { return new CoordinateUpdater(); });
} // namespace linear } // namespace xgboost::linear
} // namespace xgboost

View File

@ -15,8 +15,7 @@
#include "../common/timer.h" #include "../common/timer.h"
#include "./param.h" #include "./param.h"
namespace xgboost { namespace xgboost::linear {
namespace linear {
DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate); DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate);
@ -29,7 +28,7 @@ DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate);
class GPUCoordinateUpdater : public LinearUpdater { // NOLINT class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
public: public:
// set training parameter // set training parameter
void Configure(Args const& args) override { void Configure(Args const &args) override {
tparam_.UpdateAllowUnknown(args); tparam_.UpdateAllowUnknown(args);
coord_param_.UpdateAllowUnknown(args); coord_param_.UpdateAllowUnknown(args);
selector_.reset(FeatureSelector::Create(tparam_.feature_selector)); selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
@ -41,8 +40,9 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
FromJson(config.at("linear_train_param"), &tparam_); FromJson(config.at("linear_train_param"), &tparam_);
FromJson(config.at("coordinate_param"), &coord_param_); FromJson(config.at("coordinate_param"), &coord_param_);
} }
void SaveConfig(Json* p_out) const override { void SaveConfig(Json *p_out) const override {
auto& out = *p_out; LOG(DEBUG) << "Save config for GPU updater.";
auto &out = *p_out;
out["linear_train_param"] = ToJson(tparam_); out["linear_train_param"] = ToJson(tparam_);
out["coordinate_param"] = ToJson(coord_param_); out["coordinate_param"] = ToJson(coord_param_);
} }
@ -101,10 +101,9 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
monitor_.Stop("LazyInitDevice"); monitor_.Stop("LazyInitDevice");
monitor_.Start("UpdateGpair"); monitor_.Start("UpdateGpair");
auto &in_gpair_host = in_gpair->ConstHostVector();
// Update gpair // Update gpair
if (ctx_->gpu_id >= 0) { if (ctx_->gpu_id >= 0) {
this->UpdateGpair(in_gpair_host); this->UpdateGpair(in_gpair->ConstHostVector());
} }
monitor_.Stop("UpdateGpair"); monitor_.Stop("UpdateGpair");
@ -249,5 +248,4 @@ XGBOOST_REGISTER_LINEAR_UPDATER(GPUCoordinateUpdater, "gpu_coord_descent")
"Update linear model according to coordinate descent algorithm. GPU " "Update linear model according to coordinate descent algorithm. GPU "
"accelerated.") "accelerated.")
.set_body([]() { return new GPUCoordinateUpdater(); }); .set_body([]() { return new GPUCoordinateUpdater(); });
} // namespace linear } // namespace xgboost::linear
} // namespace xgboost

View File

@ -1,5 +1,5 @@
/*! /**
* Copyright 2020-2022 by XGBoost Contributors * Copyright 2020-2023, XGBoost Contributors
*/ */
#include <algorithm> // std::max #include <algorithm> // std::max
#include <vector> #include <vector>
@ -11,9 +11,7 @@
#include "evaluate_splits.cuh" #include "evaluate_splits.cuh"
#include "expand_entry.cuh" #include "expand_entry.cuh"
namespace xgboost { namespace xgboost::tree {
namespace tree {
// With constraints // With constraints
XGBOOST_DEVICE float LossChangeMissing(const GradientPairInt64 &scan, XGBOOST_DEVICE float LossChangeMissing(const GradientPairInt64 &scan,
const GradientPairInt64 &missing, const GradientPairInt64 &missing,
@ -315,11 +313,11 @@ __device__ void SetCategoricalSplit(const EvaluateSplitSharedInputs &shared_inpu
common::Span<common::CatBitField::value_type> out, common::Span<common::CatBitField::value_type> out,
DeviceSplitCandidate *p_out_split) { DeviceSplitCandidate *p_out_split) {
auto &out_split = *p_out_split; auto &out_split = *p_out_split;
out_split.split_cats = common::CatBitField{out}; auto out_cats = common::CatBitField{out};
// Simple case for one hot split // Simple case for one hot split
if (common::UseOneHot(shared_inputs.FeatureBins(fidx), shared_inputs.param.max_cat_to_onehot)) { if (common::UseOneHot(shared_inputs.FeatureBins(fidx), shared_inputs.param.max_cat_to_onehot)) {
out_split.split_cats.Set(common::AsCat(out_split.thresh)); out_cats.Set(common::AsCat(out_split.thresh));
return; return;
} }
@ -339,7 +337,7 @@ __device__ void SetCategoricalSplit(const EvaluateSplitSharedInputs &shared_inpu
assert(partition > 0 && "Invalid partition."); assert(partition > 0 && "Invalid partition.");
thrust::for_each(thrust::seq, beg, beg + partition, [&](size_t c) { thrust::for_each(thrust::seq, beg, beg + partition, [&](size_t c) {
auto cat = shared_inputs.feature_values[c - node_offset]; auto cat = shared_inputs.feature_values[c - node_offset];
out_split.SetCat(cat); out_cats.Set(common::AsCat(cat));
}); });
} }
@ -427,8 +425,7 @@ void GPUHistEvaluator::EvaluateSplits(
if (split.is_cat) { if (split.is_cat) {
SetCategoricalSplit(shared_inputs, d_sorted_idx, fidx, i, SetCategoricalSplit(shared_inputs, d_sorted_idx, fidx, i,
device_cats_accessor.GetNodeCatStorage(input.nidx), device_cats_accessor.GetNodeCatStorage(input.nidx), &out_splits[i]);
&out_splits[i]);
} }
float base_weight = float base_weight =
@ -460,6 +457,4 @@ GPUExpandEntry GPUHistEvaluator::EvaluateSingleSplit(
cudaMemcpyDeviceToHost)); cudaMemcpyDeviceToHost));
return root_entry; return root_entry;
} }
} // namespace xgboost::tree
} // namespace tree
} // namespace xgboost

View File

@ -37,8 +37,8 @@ struct EvaluateSplitSharedInputs {
common::Span<const float> feature_values; common::Span<const float> feature_values;
common::Span<const float> min_fvalue; common::Span<const float> min_fvalue;
bool is_dense; bool is_dense;
XGBOOST_DEVICE auto Features() const { return feature_segments.size() - 1; } [[nodiscard]] XGBOOST_DEVICE auto Features() const { return feature_segments.size() - 1; }
__device__ auto FeatureBins(bst_feature_t fidx) const { [[nodiscard]] __device__ std::uint32_t FeatureBins(bst_feature_t fidx) const {
return feature_segments[fidx + 1] - feature_segments[fidx]; return feature_segments[fidx + 1] - feature_segments[fidx];
} }
}; };
@ -102,7 +102,7 @@ class GPUHistEvaluator {
} }
/** /**
* \brief Get device category storage of nidx for internal calculation. * @brief Get device category storage of nidx for internal calculation.
*/ */
auto DeviceCatStorage(const std::vector<bst_node_t> &nidx) { auto DeviceCatStorage(const std::vector<bst_node_t> &nidx) {
if (!has_categoricals_) return CatAccessor{}; if (!has_categoricals_) return CatAccessor{};
@ -117,8 +117,8 @@ class GPUHistEvaluator {
/** /**
* \brief Get sorted index storage based on the left node of inputs. * \brief Get sorted index storage based on the left node of inputs.
*/ */
auto SortedIdx(int num_nodes, bst_feature_t total_bins) { auto SortedIdx(int num_nodes, bst_bin_t total_bins) {
if(!need_sort_histogram_) return common::Span<bst_feature_t>(); if (!need_sort_histogram_) return common::Span<bst_feature_t>{};
cat_sorted_idx_.resize(num_nodes * total_bins); cat_sorted_idx_.resize(num_nodes * total_bins);
return dh::ToSpan(cat_sorted_idx_); return dh::ToSpan(cat_sorted_idx_);
} }
@ -142,12 +142,22 @@ class GPUHistEvaluator {
* \brief Get host category storage for nidx. Different from the internal version, this * \brief Get host category storage for nidx. Different from the internal version, this
* returns strictly 1 node. * returns strictly 1 node.
*/ */
common::Span<CatST const> GetHostNodeCats(bst_node_t nidx) const { [[nodiscard]] common::Span<CatST const> GetHostNodeCats(bst_node_t nidx) const {
copy_stream_.View().Sync(); copy_stream_.View().Sync();
auto cats_out = common::Span<CatST const>{h_split_cats_}.subspan( auto cats_out = common::Span<CatST const>{h_split_cats_}.subspan(
nidx * node_categorical_storage_size_, node_categorical_storage_size_); nidx * node_categorical_storage_size_, node_categorical_storage_size_);
return cats_out; return cats_out;
} }
[[nodiscard]] auto GetDeviceNodeCats(bst_node_t nidx) {
copy_stream_.View().Sync();
if (has_categoricals_) {
CatAccessor accessor = {dh::ToSpan(split_cats_), node_categorical_storage_size_};
return common::KCatBitField{accessor.GetNodeCatStorage(nidx)};
} else {
return common::KCatBitField{};
}
}
/** /**
* \brief Add a split to the internal tree evaluator. * \brief Add a split to the internal tree evaluator.
*/ */

View File

@ -64,7 +64,6 @@ struct DeviceSplitCandidate {
// split. // split.
bst_cat_t thresh{-1}; bst_cat_t thresh{-1};
common::CatBitField split_cats;
bool is_cat { false }; bool is_cat { false };
GradientPairInt64 left_sum; GradientPairInt64 left_sum;
@ -72,12 +71,6 @@ struct DeviceSplitCandidate {
XGBOOST_DEVICE DeviceSplitCandidate() {} // NOLINT XGBOOST_DEVICE DeviceSplitCandidate() {} // NOLINT
template <typename T>
XGBOOST_DEVICE void SetCat(T c) {
this->split_cats.Set(common::AsCat(c));
fvalue = std::max(this->fvalue, static_cast<float>(c));
}
XGBOOST_DEVICE void Update(float loss_chg_in, DefaultDirection dir_in, float fvalue_in, XGBOOST_DEVICE void Update(float loss_chg_in, DefaultDirection dir_in, float fvalue_in,
int findex_in, GradientPairInt64 left_sum_in, int findex_in, GradientPairInt64 left_sum_in,
GradientPairInt64 right_sum_in, bool cat, GradientPairInt64 right_sum_in, bool cat,
@ -100,22 +93,23 @@ struct DeviceSplitCandidate {
*/ */
XGBOOST_DEVICE void UpdateCat(float loss_chg_in, DefaultDirection dir_in, bst_cat_t thresh_in, XGBOOST_DEVICE void UpdateCat(float loss_chg_in, DefaultDirection dir_in, bst_cat_t thresh_in,
bst_feature_t findex_in, GradientPairInt64 left_sum_in, bst_feature_t findex_in, GradientPairInt64 left_sum_in,
GradientPairInt64 right_sum_in, GPUTrainingParam const& param, const GradientQuantiser& quantiser) { GradientPairInt64 right_sum_in, GPUTrainingParam const& param,
if (loss_chg_in > loss_chg && const GradientQuantiser& quantiser) {
quantiser.ToFloatingPoint(left_sum_in).GetHess() >= param.min_child_weight && if (loss_chg_in > loss_chg &&
quantiser.ToFloatingPoint(right_sum_in).GetHess() >= param.min_child_weight) { quantiser.ToFloatingPoint(left_sum_in).GetHess() >= param.min_child_weight &&
loss_chg = loss_chg_in; quantiser.ToFloatingPoint(right_sum_in).GetHess() >= param.min_child_weight) {
dir = dir_in; loss_chg = loss_chg_in;
fvalue = std::numeric_limits<float>::quiet_NaN(); dir = dir_in;
thresh = thresh_in; fvalue = std::numeric_limits<float>::quiet_NaN();
is_cat = true; thresh = thresh_in;
left_sum = left_sum_in; is_cat = true;
right_sum = right_sum_in; left_sum = left_sum_in;
findex = findex_in; right_sum = right_sum_in;
} findex = findex_in;
}
} }
XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; } [[nodiscard]] XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; }
friend std::ostream& operator<<(std::ostream& os, DeviceSplitCandidate const& c) { friend std::ostream& operator<<(std::ostream& os, DeviceSplitCandidate const& c) {
os << "loss_chg:" << c.loss_chg << ", " os << "loss_chg:" << c.loss_chg << ", "

View File

@ -7,9 +7,9 @@
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <limits> #include <cstddef> // for size_t
#include <memory> #include <memory> // for unique_ptr, make_unique
#include <utility> #include <utility> // for move
#include <vector> #include <vector>
#include "../collective/communicator-inl.cuh" #include "../collective/communicator-inl.cuh"
@ -216,9 +216,9 @@ struct GPUHistMakerDevice {
void InitFeatureGroupsOnce() { void InitFeatureGroupsOnce() {
if (!feature_groups) { if (!feature_groups) {
CHECK(page); CHECK(page);
feature_groups.reset(new FeatureGroups(page->Cuts(), page->is_dense, feature_groups = std::make_unique<FeatureGroups>(page->Cuts(), page->is_dense,
dh::MaxSharedMemoryOptin(ctx_->gpu_id), dh::MaxSharedMemoryOptin(ctx_->gpu_id),
sizeof(GradientPairPrecise))); sizeof(GradientPairPrecise));
} }
} }
@ -244,10 +244,10 @@ struct GPUHistMakerDevice {
this->evaluator_.Reset(page->Cuts(), feature_types, dmat->Info().num_col_, param, ctx_->gpu_id); this->evaluator_.Reset(page->Cuts(), feature_types, dmat->Info().num_col_, param, ctx_->gpu_id);
quantiser.reset(new GradientQuantiser(this->gpair)); quantiser = std::make_unique<GradientQuantiser>(this->gpair);
row_partitioner.reset(); // Release the device memory first before reallocating row_partitioner.reset(); // Release the device memory first before reallocating
row_partitioner.reset(new RowPartitioner(ctx_->gpu_id, sample.sample_rows)); row_partitioner = std::make_unique<RowPartitioner>(ctx_->gpu_id, sample.sample_rows);
// Init histogram // Init histogram
hist.Init(ctx_->gpu_id, page->Cuts().TotalBins()); hist.Init(ctx_->gpu_id, page->Cuts().TotalBins());
@ -294,7 +294,7 @@ struct GPUHistMakerDevice {
dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size()); dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
// Store the feature set ptrs so they dont go out of scope before the kernel is called // Store the feature set ptrs so they dont go out of scope before the kernel is called
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets; std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
for (size_t i = 0; i < candidates.size(); i++) { for (std::size_t i = 0; i < candidates.size(); i++) {
auto candidate = candidates.at(i); auto candidate = candidates.at(i);
int left_nidx = tree[candidate.nid].LeftChild(); int left_nidx = tree[candidate.nid].LeftChild();
int right_nidx = tree[candidate.nid].RightChild(); int right_nidx = tree[candidate.nid].RightChild();
@ -327,14 +327,13 @@ struct GPUHistMakerDevice {
d_node_inputs.data().get(), h_node_inputs.data(), d_node_inputs.data().get(), h_node_inputs.data(),
h_node_inputs.size() * sizeof(EvaluateSplitInputs), cudaMemcpyDefault)); h_node_inputs.size() * sizeof(EvaluateSplitInputs), cudaMemcpyDefault));
this->evaluator_.EvaluateSplits(nidx, max_active_features, this->evaluator_.EvaluateSplits(nidx, max_active_features, dh::ToSpan(d_node_inputs),
dh::ToSpan(d_node_inputs), shared_inputs, shared_inputs, dh::ToSpan(entries));
dh::ToSpan(entries));
dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(), dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(),
entries.data().get(), sizeof(GPUExpandEntry) * entries.size(), entries.data().get(), sizeof(GPUExpandEntry) * entries.size(),
cudaMemcpyDeviceToHost)); cudaMemcpyDeviceToHost));
dh::DefaultStream().Sync(); dh::DefaultStream().Sync();
} }
void BuildHist(int nidx) { void BuildHist(int nidx) {
auto d_node_hist = hist.GetNodeHistogram(nidx); auto d_node_hist = hist.GetNodeHistogram(nidx);
@ -366,23 +365,29 @@ struct GPUHistMakerDevice {
struct NodeSplitData { struct NodeSplitData {
RegTree::Node split_node; RegTree::Node split_node;
FeatureType split_type; FeatureType split_type;
common::CatBitField node_cats; common::KCatBitField node_cats;
}; };
void UpdatePosition(const std::vector<GPUExpandEntry>& candidates, RegTree* p_tree) { void UpdatePosition(std::vector<GPUExpandEntry> const& candidates, RegTree* p_tree) {
if (candidates.empty()) return; if (candidates.empty()) {
std::vector<int> nidx(candidates.size()); return;
std::vector<int> left_nidx(candidates.size()); }
std::vector<int> right_nidx(candidates.size());
std::vector<bst_node_t> nidx(candidates.size());
std::vector<bst_node_t> left_nidx(candidates.size());
std::vector<bst_node_t> right_nidx(candidates.size());
std::vector<NodeSplitData> split_data(candidates.size()); std::vector<NodeSplitData> split_data(candidates.size());
for (size_t i = 0; i < candidates.size(); i++) { for (size_t i = 0; i < candidates.size(); i++) {
auto& e = candidates[i]; auto const& e = candidates[i];
RegTree::Node split_node = (*p_tree)[e.nid]; RegTree::Node split_node = (*p_tree)[e.nid];
auto split_type = p_tree->NodeSplitType(e.nid); auto split_type = p_tree->NodeSplitType(e.nid);
nidx.at(i) = e.nid; nidx.at(i) = e.nid;
left_nidx.at(i) = split_node.LeftChild(); left_nidx.at(i) = split_node.LeftChild();
right_nidx.at(i) = split_node.RightChild(); right_nidx.at(i) = split_node.RightChild();
split_data.at(i) = NodeSplitData{split_node, split_type, e.split.split_cats}; split_data.at(i) = NodeSplitData{split_node, split_type, evaluator_.GetDeviceNodeCats(e.nid)};
CHECK_EQ(split_type == FeatureType::kCategorical, e.split.is_cat);
} }
auto d_matrix = page->GetDeviceAccessor(ctx_->gpu_id); auto d_matrix = page->GetDeviceAccessor(ctx_->gpu_id);
@ -390,7 +395,7 @@ struct GPUHistMakerDevice {
nidx, left_nidx, right_nidx, split_data, nidx, left_nidx, right_nidx, split_data,
[=] __device__(bst_uint ridx, const NodeSplitData& data) { [=] __device__(bst_uint ridx, const NodeSplitData& data) {
// given a row index, returns the node id it belongs to // given a row index, returns the node id it belongs to
bst_float cut_value = d_matrix.GetFvalue(ridx, data.split_node.SplitIndex()); float cut_value = d_matrix.GetFvalue(ridx, data.split_node.SplitIndex());
// Missing value // Missing value
bool go_left = true; bool go_left = true;
if (isnan(cut_value)) { if (isnan(cut_value)) {
@ -620,7 +625,6 @@ struct GPUHistMakerDevice {
CHECK(common::CheckNAN(candidate.split.fvalue)); CHECK(common::CheckNAN(candidate.split.fvalue));
std::vector<common::CatBitField::value_type> split_cats; std::vector<common::CatBitField::value_type> split_cats;
CHECK_GT(candidate.split.split_cats.Bits().size(), 0);
auto h_cats = this->evaluator_.GetHostNodeCats(candidate.nid); auto h_cats = this->evaluator_.GetHostNodeCats(candidate.nid);
auto n_bins_feature = page->Cuts().FeatureBins(candidate.split.findex); auto n_bins_feature = page->Cuts().FeatureBins(candidate.split.findex);
split_cats.resize(common::CatBitField::ComputeStorageSize(n_bins_feature), 0); split_cats.resize(common::CatBitField::ComputeStorageSize(n_bins_feature), 0);

View File

@ -0,0 +1,42 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/global_config.h> // for GlobalConfigThreadLocalStore
#include <xgboost/json.h> // for Json, Object
#include <xgboost/learner.h> // for Learner
#include <algorithm> // for transform
#include <string> // for string
#include <utility> // for swap
#include "../helpers.h" // for RandomDataGenerator
namespace xgboost {
TEST(GBlinear, DispatchUpdater) {
auto verbosity = 3;
std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
auto test = [](std::string device) {
auto p_fmat = RandomDataGenerator{10, 10, 0.0f}.GenerateDMatrix(true);
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParams(
Args{{"booster", "gblinear"}, {"updater", "coord_descent"}, {"device", device}});
learner->Configure();
for (std::int32_t iter = 0; iter < 3; ++iter) {
learner->UpdateOneIter(iter, p_fmat);
}
Json config{Object{}};
::testing::internal::CaptureStderr();
learner->SaveConfig(&config);
auto str = ::testing::internal::GetCapturedStderr();
std::transform(device.cbegin(), device.cend(), device.begin(),
[](char c) { return std::toupper(c); });
ASSERT_NE(str.find(device), std::string::npos);
};
test("cpu");
test("gpu");
std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
}
} // namespace xgboost

View File

@ -58,21 +58,6 @@ void TestInplaceFallback(Context const* ctx) {
HostDeviceVector<float>* out_predt{nullptr}; HostDeviceVector<float>* out_predt{nullptr};
ConsoleLogger::Configure(Args{{"verbosity", "1"}}); ConsoleLogger::Configure(Args{{"verbosity", "1"}});
std::string output; std::string output;
// test whether the warning is raised
#if !defined(_WIN32)
// Windows has issue with CUDA and thread local storage. For some reason, on Windows a
// cudaInitializationError is raised during destruction of `HostDeviceVector`. This
// might be related to https://github.com/dmlc/xgboost/issues/5793
::testing::internal::CaptureStderr();
std::thread{[&] {
// Launch a new thread to ensure a warning is raised as we prevent over-verbose
// warning by using thread-local flags.
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 0);
}}.join();
output = testing::internal::GetCapturedStderr();
ASSERT_NE(output.find("Falling back"), std::string::npos);
#endif
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(), learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 0); &out_predt, 0, 0);

View File

@ -191,14 +191,32 @@ class TestGPUPredict:
np.testing.assert_allclose(predt_0, predt_3) np.testing.assert_allclose(predt_0, predt_3)
np.testing.assert_allclose(predt_0, predt_4) np.testing.assert_allclose(predt_0, predt_4)
def run_inplace_base_margin(self, booster, dtrain, X, base_margin): def run_inplace_base_margin(
self, device: int, booster: xgb.Booster, dtrain: xgb.DMatrix, X, base_margin
) -> None:
import cupy as cp import cupy as cp
booster.set_param({"device": f"cuda:{device}"})
dtrain.set_info(base_margin=base_margin) dtrain.set_info(base_margin=base_margin)
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin) from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
from_dmatrix = booster.predict(dtrain) from_dmatrix = booster.predict(dtrain)
cp.testing.assert_allclose(from_inplace, from_dmatrix) cp.testing.assert_allclose(from_inplace, from_dmatrix)
booster = booster.copy() # clear prediction cache.
booster.set_param({"device": "cpu"})
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
from_dmatrix = booster.predict(dtrain)
cp.testing.assert_allclose(from_inplace, from_dmatrix)
booster = booster.copy() # clear prediction cache.
base_margin = cp.asnumpy(base_margin)
if hasattr(X, "values"):
X = cp.asnumpy(X.values)
booster.set_param({"device": f"cuda:{device}"})
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
from_dmatrix = booster.predict(dtrain)
cp.testing.assert_allclose(from_inplace, from_dmatrix, rtol=1e-6)
def run_inplace_predict_cupy(self, device: int) -> None: def run_inplace_predict_cupy(self, device: int) -> None:
import cupy as cp import cupy as cp
@ -244,7 +262,7 @@ class TestGPUPredict:
run_threaded_predict(X, rows, predict_dense) run_threaded_predict(X, rows, predict_dense)
base_margin = cp_rng.randn(rows) base_margin = cp_rng.randn(rows)
self.run_inplace_base_margin(booster, dtrain, X, base_margin) self.run_inplace_base_margin(device, booster, dtrain, X, base_margin)
# Create a wide dataset # Create a wide dataset
X = cp_rng.randn(100, 10000) X = cp_rng.randn(100, 10000)
@ -318,7 +336,7 @@ class TestGPUPredict:
run_threaded_predict(X, rows, predict_df) run_threaded_predict(X, rows, predict_df)
base_margin = cudf.Series(rng.randn(rows)) base_margin = cudf.Series(rng.randn(rows))
self.run_inplace_base_margin(booster, dtrain, X, base_margin) self.run_inplace_base_margin(0, booster, dtrain, X, base_margin)
@given( @given(
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy

View File

@ -2,6 +2,7 @@ import json
import logging import logging
import subprocess import subprocess
import numpy as np
import pytest import pytest
import sklearn import sklearn
@ -13,7 +14,7 @@ from pyspark.ml.linalg import Vectors
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
from pyspark.sql import SparkSession from pyspark.sql import SparkSession
from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor, SparkXGBRegressorModel
gpu_discovery_script_path = "tests/test_distributed/test_gpu_with_spark/discover_gpu.sh" gpu_discovery_script_path = "tests/test_distributed/test_gpu_with_spark/discover_gpu.sh"
@ -242,3 +243,33 @@ def test_sparkxgb_regressor_feature_cols_with_gpu(spark_diabetes_dataset_feature
evaluator = RegressionEvaluator(metricName="rmse") evaluator = RegressionEvaluator(metricName="rmse")
rmse = evaluator.evaluate(pred_result_df) rmse = evaluator.evaluate(pred_result_df)
assert rmse <= 65.0 assert rmse <= 65.0
def test_gpu_transform(spark_diabetes_dataset) -> None:
regressor = SparkXGBRegressor(device="cuda", num_workers=num_workers)
train_df, test_df = spark_diabetes_dataset
model: SparkXGBRegressorModel = regressor.fit(train_df)
# The model trained with GPUs, and transform with GPU configurations.
assert model._gpu_transform()
model.set_device("cpu")
assert not model._gpu_transform()
# without error
cpu_rows = model.transform(test_df).select("prediction").collect()
regressor = SparkXGBRegressor(device="cpu", num_workers=num_workers)
model = regressor.fit(train_df)
# The model trained with CPUs. Even with GPU configurations,
# still prefer transforming with CPUs
assert not model._gpu_transform()
# Set gpu transform explicitly.
model.set_device("cuda")
assert model._gpu_transform()
# without error
gpu_rows = model.transform(test_df).select("prediction").collect()
for cpu, gpu in zip(cpu_rows, gpu_rows):
np.testing.assert_allclose(cpu.prediction, gpu.prediction, atol=1e-3)

View File

@ -888,6 +888,34 @@ class TestPySparkLocal:
clf = SparkXGBClassifier(device="cuda") clf = SparkXGBClassifier(device="cuda")
clf._validate_params() clf._validate_params()
def test_gpu_transform(self, clf_data: ClfData) -> None:
"""local mode"""
classifier = SparkXGBClassifier(device="cpu")
model: SparkXGBClassifierModel = classifier.fit(clf_data.cls_df_train)
with tempfile.TemporaryDirectory() as tmpdir:
path = "file:" + tmpdir
model.write().overwrite().save(path)
# The model trained with CPU, transform defaults to cpu
assert not model._gpu_transform()
# without error
model.transform(clf_data.cls_df_test).collect()
model.set_device("cuda")
assert model._gpu_transform()
model_loaded = SparkXGBClassifierModel.load(path)
# The model trained with CPU, transform defaults to cpu
assert not model_loaded._gpu_transform()
# without error
model_loaded.transform(clf_data.cls_df_test).collect()
model_loaded.set_device("cuda")
assert model_loaded._gpu_transform()
class XgboostLocalTest(SparkTestCase): class XgboostLocalTest(SparkTestCase):
def setUp(self): def setUp(self):