Compare commits

...

11 Commits

Author SHA1 Message Date
Jiaming Yuan
096047c547
Make 2.0 release. (#9567) 2023-09-12 00:20:49 +08:00
Jiaming Yuan
e75dd75bb2
[backport] [pyspark] support gpu transform (#9542) (#9559)
---------

Co-authored-by: Bobby Wang <wbo4958@gmail.com>
2023-09-07 17:21:09 +08:00
Jiaming Yuan
4d387cbfbf
[backport] [pyspark] rework transform to reuse same code (#9292) (#9558)
Co-authored-by: Bobby Wang <wbo4958@gmail.com>
2023-09-07 15:26:24 +08:00
Jiaming Yuan
3fde9361d7
[backport] Fix inplace predict with fallback when base margin is used. (#9536) (#9548)
- Copy meta info from proxy DMatrix.
- Use `std::call_once` to emit less warnings.
2023-09-05 23:38:06 +08:00
Jiaming Yuan
b67c2ed96d
[backport] [CI] bump setup-r action version. (#9544) (#9551) 2023-09-05 22:10:30 +08:00
Jiaming Yuan
177fd79864
[backport] Fix read the doc configuration. [skip ci] (#9549) 2023-09-05 17:32:00 +08:00
Jiaming Yuan
06487d3896
[backport] Fix GPU categorical split memory allocation. (#9529) (#9535) 2023-08-29 21:14:43 +08:00
Jiaming Yuan
e50ccc4d3c
[R] Fix integer inputs with NA. (#9522) (#9534) 2023-08-29 19:52:13 +08:00
Jiaming Yuan
add57f8880
[backport] Delay the check for vector leaf. (#9509) (#9533) 2023-08-29 18:25:59 +08:00
Jiaming Yuan
a0d3573c74
[backport] Fix device dispatch for linear updater. (#9507) (#9532) 2023-08-29 15:10:43 +08:00
Jiaming Yuan
4301558a57
Make 2.0.0 RC1. (#9492) 2023-08-17 16:16:51 +08:00
36 changed files with 575 additions and 327 deletions

View File

@ -25,7 +25,7 @@ jobs:
with:
submodules: 'true'
- uses: r-lib/actions/setup-r@50d1eae9b8da0bb3f8582c59a5b82225fa2fe7f2 # v2.3.1
- uses: r-lib/actions/setup-r@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4
with:
r-version: ${{ matrix.config.r }}
@ -64,7 +64,7 @@ jobs:
with:
submodules: 'true'
- uses: r-lib/actions/setup-r@50d1eae9b8da0bb3f8582c59a5b82225fa2fe7f2 # v2.3.1
- uses: r-lib/actions/setup-r@11a22a908006c25fe054c4ef0ac0436b1de3edbe # v2.6.4
with:
r-version: ${{ matrix.config.r }}

View File

@ -32,4 +32,3 @@ formats:
python:
install:
- requirements: doc/requirements.txt
system_packages: true

View File

@ -2,7 +2,7 @@ Package: xgboost
Type: Package
Title: Extreme Gradient Boosting
Version: 2.0.0.1
Date: 2022-10-18
Date: 2023-09-11
Authors@R: c(
person("Tianqi", "Chen", role = c("aut"),
email = "tianqi.tchen@gmail.com"),

View File

@ -120,11 +120,25 @@ XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) {
ctx.nthread = asInteger(n_threads);
std::int32_t threads = ctx.Threads();
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
for (size_t j = 0; j < ncol; ++j) {
data[i * ncol + j] = is_int ? static_cast<float>(iin[i + nrow * j]) : din[i + nrow * j];
}
});
if (is_int) {
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
for (size_t j = 0; j < ncol; ++j) {
auto v = iin[i + nrow * j];
if (v == NA_INTEGER) {
data[i * ncol + j] = std::numeric_limits<float>::quiet_NaN();
} else {
data[i * ncol + j] = static_cast<float>(v);
}
}
});
} else {
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
for (size_t j = 0; j < ncol; ++j) {
data[i * ncol + j] = din[i + nrow * j];
}
});
}
DMatrixHandle handle;
CHECK_CALL(XGDMatrixCreateFromMat_omp(BeginPtr(data), nrow, ncol,
asReal(missing), &handle, threads));

View File

@ -56,6 +56,42 @@ test_that("xgb.DMatrix: basic construction", {
expect_equal(raw_fd, raw_dgc)
})
test_that("xgb.DMatrix: NA", {
n_samples <- 3
x <- cbind(
x1 = sample(x = 4, size = n_samples, replace = TRUE),
x2 = sample(x = 4, size = n_samples, replace = TRUE)
)
x[1, "x1"] <- NA
m <- xgb.DMatrix(x)
xgb.DMatrix.save(m, "int.dmatrix")
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
colnames(x) <- c("x1", "x2")
m <- xgb.DMatrix(x)
xgb.DMatrix.save(m, "float.dmatrix")
iconn <- file("int.dmatrix", "rb")
fconn <- file("float.dmatrix", "rb")
expect_equal(file.size("int.dmatrix"), file.size("float.dmatrix"))
bytes <- file.size("int.dmatrix")
idmatrix <- readBin(iconn, "raw", n = bytes)
fdmatrix <- readBin(fconn, "raw", n = bytes)
expect_equal(length(idmatrix), length(fdmatrix))
expect_equal(idmatrix, fdmatrix)
close(iconn)
close(fconn)
file.remove("int.dmatrix")
file.remove("float.dmatrix")
})
test_that("xgb.DMatrix: saving, loading", {
# save to a local file
dtest1 <- xgb.DMatrix(test_data, label = test_label)

View File

@ -329,7 +329,7 @@ Parameters for Linear Booster (``booster=gblinear``)
- Choice of algorithm to fit linear model
- ``shotgun``: Parallel coordinate descent algorithm based on shotgun algorithm. Uses 'hogwild' parallelism and therefore produces a nondeterministic solution on each run.
- ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution.
- ``coord_descent``: Ordinary coordinate descent algorithm. Also multithreaded but still produces a deterministic solution. When the ``device`` parameter is set to ``cuda`` or ``gpu``, a GPU variant would be used.
* ``feature_selector`` [default= ``cyclic``]

View File

@ -1,5 +1,5 @@
/*!
* Copyright 2020 by Contributors
/**
* Copyright 2020-2023, XGBoost Contributors
* \file global_config.h
* \brief Global configuration for XGBoost
* \author Hyunsu Cho
@ -7,24 +7,22 @@
#ifndef XGBOOST_GLOBAL_CONFIG_H_
#define XGBOOST_GLOBAL_CONFIG_H_
#include <xgboost/parameter.h>
#include <vector>
#include <string>
#include <dmlc/thread_local.h> // for ThreadLocalStore
#include <xgboost/parameter.h> // for XGBoostParameter
#include <cstdint> // for int32_t
namespace xgboost {
class Json;
struct GlobalConfiguration : public XGBoostParameter<GlobalConfiguration> {
int verbosity { 1 };
bool use_rmm { false };
std::int32_t verbosity{1};
bool use_rmm{false};
DMLC_DECLARE_PARAMETER(GlobalConfiguration) {
DMLC_DECLARE_FIELD(verbosity)
.set_range(0, 3)
.set_default(1) // shows only warning
.describe("Flag to print out detailed breakdown of runtime.");
DMLC_DECLARE_FIELD(use_rmm)
.set_default(false)
.describe("Whether to use RAPIDS Memory Manager to allocate GPU memory in XGBoost");
DMLC_DECLARE_FIELD(use_rmm).set_default(false).describe(
"Whether to use RAPIDS Memory Manager to allocate GPU memory in XGBoost");
}
};

View File

@ -6,7 +6,7 @@
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.0</version>
<packaging>pom</packaging>
<name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description>

View File

@ -6,11 +6,11 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.0</version>
</parent>
<name>xgboost4j-example</name>
<artifactId>xgboost4j-example_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.0</version>
<packaging>jar</packaging>
<build>
<plugins>

View File

@ -6,12 +6,12 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.0</version>
</parent>
<name>xgboost4j-flink</name>
<artifactId>xgboost4j-flink_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.0</version>
<properties>
<flink-ml.version>2.2.0</flink-ml.version>
</properties>

View File

@ -6,11 +6,11 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.0</version>
</parent>
<artifactId>xgboost4j-gpu_${scala.binary.version}</artifactId>
<name>xgboost4j-gpu</name>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.0</version>
<packaging>jar</packaging>
<dependencies>

View File

@ -6,7 +6,7 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.0</version>
</parent>
<name>xgboost4j-spark-gpu</name>
<artifactId>xgboost4j-spark-gpu_${scala.binary.version}</artifactId>

View File

@ -6,7 +6,7 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.0</version>
</parent>
<name>xgboost4j-spark</name>
<artifactId>xgboost4j-spark_${scala.binary.version}</artifactId>

View File

@ -6,11 +6,11 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.0</version>
</parent>
<name>xgboost4j</name>
<artifactId>xgboost4j_${scala.binary.version}</artifactId>
<version>2.0.0-SNAPSHOT</version>
<version>2.0.0</version>
<packaging>jar</packaging>
<dependencies>

View File

@ -7,7 +7,7 @@ build-backend = "packager.pep517"
[project]
name = "xgboost"
version = "2.0.0-dev"
version = "2.0.0"
authors = [
{ name = "Hyunsu Cho", email = "chohyu01@cs.washington.edu" },
{ name = "Jiaming Yuan", email = "jm.yuan@outlook.com" }

View File

@ -1 +1 @@
2.0.0-dev
2.0.0

View File

@ -88,6 +88,18 @@ def is_cudf_available() -> bool:
return False
def is_cupy_available() -> bool:
"""Check cupy package available or not"""
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy
return True
except ImportError:
return False
try:
import scipy.sparse as scipy_sparse
from scipy.sparse import csr_matrix as scipy_csr

View File

@ -59,11 +59,12 @@ from scipy.special import expit, softmax # pylint: disable=no-name-in-module
import xgboost
from xgboost import XGBClassifier
from xgboost.compat import is_cudf_available
from xgboost.compat import is_cudf_available, is_cupy_available
from xgboost.core import Booster, _check_distributed_params
from xgboost.sklearn import DEFAULT_N_ESTIMATORS, XGBModel, _can_use_qdm
from xgboost.training import train as worker_train
from .._typing import ArrayLike
from .data import (
_read_csr_matrix_from_unwrapped_spark_vec,
alias,
@ -241,6 +242,13 @@ class _SparkXGBParams(
TypeConverters.toList,
)
def set_device(self, value: str) -> "_SparkXGBParams":
"""Set device, optional value: cpu, cuda, gpu"""
_check_distributed_params({"device": value})
assert value in ("cpu", "cuda", "gpu")
self.set(self.device, value)
return self
@classmethod
def _xgb_cls(cls) -> Type[XGBModel]:
"""
@ -1117,12 +1125,111 @@ class _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable):
)
return features_col, feature_col_names
def _get_pred_contrib_col_name(self) -> Optional[str]:
"""Return the pred_contrib_col col name"""
pred_contrib_col_name = None
if (
self.isDefined(self.pred_contrib_col)
and self.getOrDefault(self.pred_contrib_col) != ""
):
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
return pred_contrib_col_name
def _out_schema(self) -> Tuple[bool, str]:
"""Return the bool to indicate if it's a single prediction, true is single prediction,
and the returned type of the user-defined function. The value must
be a DDL-formatted type string."""
if self._get_pred_contrib_col_name() is not None:
return False, f"{pred.prediction} double, {pred.pred_contrib} array<double>"
return True, "double"
def _get_predict_func(self) -> Callable:
"""Return the true prediction function which will be running on the executor side"""
predict_params = self._gen_predict_params_dict()
pred_contrib_col_name = self._get_pred_contrib_col_name()
def _predict(
model: XGBModel, X: ArrayLike, base_margin: Optional[ArrayLike]
) -> Union[pd.DataFrame, pd.Series]:
data = {}
preds = model.predict(
X,
base_margin=base_margin,
validate_features=False,
**predict_params,
)
data[pred.prediction] = pd.Series(preds)
if pred_contrib_col_name is not None:
contribs = pred_contribs(model, X, base_margin)
data[pred.pred_contrib] = pd.Series(list(contribs))
return pd.DataFrame(data=data)
return data[pred.prediction]
return _predict
def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:
"""Post process of transform"""
prediction_col_name = self.getOrDefault(self.predictionCol)
single_pred, _ = self._out_schema()
if single_pred:
if prediction_col_name:
dataset = dataset.withColumn(prediction_col_name, pred_col)
else:
pred_struct_col = "_prediction_struct"
dataset = dataset.withColumn(pred_struct_col, pred_col)
if prediction_col_name:
dataset = dataset.withColumn(
prediction_col_name, getattr(col(pred_struct_col), pred.prediction)
)
pred_contrib_col_name = self._get_pred_contrib_col_name()
if pred_contrib_col_name is not None:
dataset = dataset.withColumn(
pred_contrib_col_name,
array_to_vector(getattr(col(pred_struct_col), pred.pred_contrib)),
)
dataset = dataset.drop(pred_struct_col)
return dataset
def _gpu_transform(self) -> bool:
"""If gpu is used to do the prediction, true to gpu prediction"""
if _is_local(_get_spark_session().sparkContext):
# if it's local model, we just use the internal "device"
return use_cuda(self.getOrDefault(self.device))
gpu_per_task = (
_get_spark_session()
.sparkContext.getConf()
.get("spark.task.resource.gpu.amount")
)
# User don't set gpu configurations, just use cpu
if gpu_per_task is None:
if use_cuda(self.getOrDefault(self.device)):
get_logger("XGBoost-PySpark").warning(
"Do the prediction on the CPUs since "
"no gpu configurations are set"
)
return False
# User already sets the gpu configurations, we just use the internal "device".
return use_cuda(self.getOrDefault(self.device))
def _transform(self, dataset: DataFrame) -> DataFrame:
# pylint: disable=too-many-statements, too-many-locals
# Save xgb_sklearn_model and predict_params to be local variable
# to avoid the `self` object to be pickled to remote.
xgb_sklearn_model = self._xgb_sklearn_model
predict_params = self._gen_predict_params_dict()
has_base_margin = False
if (
@ -1137,79 +1244,92 @@ class _SparkXGBModel(Model, _SparkXGBParams, MLReadable, MLWritable):
features_col, feature_col_names = self._get_feature_col(dataset)
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)
pred_contrib_col_name = None
if (
self.isDefined(self.pred_contrib_col)
and self.getOrDefault(self.pred_contrib_col) != ""
):
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
predict_func = self._get_predict_func()
single_pred = True
schema = "double"
if pred_contrib_col_name:
single_pred = False
schema = f"{pred.prediction} double, {pred.pred_contrib} array<double>"
_, schema = self._out_schema()
is_local = _is_local(_get_spark_session().sparkContext)
run_on_gpu = self._gpu_transform()
@pandas_udf(schema) # type: ignore
def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]:
assert xgb_sklearn_model is not None
model = xgb_sklearn_model
from pyspark import TaskContext
context = TaskContext.get()
assert context is not None
dev_ordinal = -1
if is_cudf_available():
if is_local:
if run_on_gpu and is_cupy_available():
import cupy as cp # pylint: disable=import-error
total_gpus = cp.cuda.runtime.getDeviceCount()
if total_gpus > 0:
partition_id = context.partitionId()
# For transform local mode, default the dev_ordinal to
# (partition id) % gpus.
dev_ordinal = partition_id % total_gpus
elif run_on_gpu:
dev_ordinal = _get_gpu_id(context)
if dev_ordinal >= 0:
device = "cuda:" + str(dev_ordinal)
get_logger("XGBoost-PySpark").info(
"Do the inference with device: %s", device
)
model.set_params(device=device)
else:
get_logger("XGBoost-PySpark").info("Do the inference on the CPUs")
else:
msg = (
"CUDF is unavailable, fallback the inference on the CPUs"
if run_on_gpu
else "Do the inference on the CPUs"
)
get_logger("XGBoost-PySpark").info(msg)
def to_gpu_if_possible(data: ArrayLike) -> ArrayLike:
"""Move the data to gpu if possible"""
if dev_ordinal >= 0:
import cudf # pylint: disable=import-error
import cupy as cp # pylint: disable=import-error
# We must set the device after import cudf, which will change the device id to 0
# See https://github.com/rapidsai/cudf/issues/11386
cp.cuda.runtime.setDevice(dev_ordinal) # pylint: disable=I1101
df = cudf.DataFrame(data)
del data
return df
return data
for data in iterator:
if enable_sparse_data_optim:
X = _read_csr_matrix_from_unwrapped_spark_vec(data)
else:
if feature_col_names is not None:
X = data[feature_col_names]
tmp = data[feature_col_names]
else:
X = stack_series(data[alias.data])
tmp = stack_series(data[alias.data])
X = to_gpu_if_possible(tmp)
if has_base_margin:
base_margin = data[alias.margin].to_numpy()
base_margin = to_gpu_if_possible(data[alias.margin])
else:
base_margin = None
data = {}
preds = model.predict(
X,
base_margin=base_margin,
validate_features=False,
**predict_params,
)
data[pred.prediction] = pd.Series(preds)
if pred_contrib_col_name:
contribs = pred_contribs(model, X, base_margin)
data[pred.pred_contrib] = pd.Series(list(contribs))
yield pd.DataFrame(data=data)
else:
yield data[pred.prediction]
yield predict_func(model, X, base_margin)
if has_base_margin:
pred_col = predict_udf(struct(*features_col, base_margin_col))
else:
pred_col = predict_udf(struct(*features_col))
prediction_col_name = self.getOrDefault(self.predictionCol)
if single_pred:
dataset = dataset.withColumn(prediction_col_name, pred_col)
else:
pred_struct_col = "_prediction_struct"
dataset = dataset.withColumn(pred_struct_col, pred_col)
dataset = dataset.withColumn(
prediction_col_name, getattr(col(pred_struct_col), pred.prediction)
)
if pred_contrib_col_name:
dataset = dataset.withColumn(
pred_contrib_col_name,
array_to_vector(getattr(col(pred_struct_col), pred.pred_contrib)),
)
dataset = dataset.drop(pred_struct_col)
return dataset
return self._post_transform(dataset, pred_col)
class _ClassificationModel( # pylint: disable=abstract-method
@ -1221,22 +1341,21 @@ class _ClassificationModel( # pylint: disable=abstract-method
.. Note:: This API is experimental.
"""
def _transform(self, dataset: DataFrame) -> DataFrame:
# pylint: disable=too-many-statements, too-many-locals
# Save xgb_sklearn_model and predict_params to be local variable
# to avoid the `self` object to be pickled to remote.
xgb_sklearn_model = self._xgb_sklearn_model
predict_params = self._gen_predict_params_dict()
def _out_schema(self) -> Tuple[bool, str]:
schema = (
f"{pred.raw_prediction} array<double>, {pred.prediction} double,"
f" {pred.probability} array<double>"
)
if self._get_pred_contrib_col_name() is not None:
# We will force setting strict_shape to True when predicting contribs,
# So, it will also output 3-D shape result.
schema = f"{schema}, {pred.pred_contrib} array<array<double>>"
has_base_margin = False
if (
self.isDefined(self.base_margin_col)
and self.getOrDefault(self.base_margin_col) != ""
):
has_base_margin = True
base_margin_col = col(self.getOrDefault(self.base_margin_col)).alias(
alias.margin
)
return False, schema
def _get_predict_func(self) -> Callable:
predict_params = self._gen_predict_params_dict()
pred_contrib_col_name = self._get_pred_contrib_col_name()
def transform_margin(margins: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
if margins.ndim == 1:
@ -1251,76 +1370,38 @@ class _ClassificationModel( # pylint: disable=abstract-method
class_probs = softmax(raw_preds, axis=1)
return raw_preds, class_probs
features_col, feature_col_names = self._get_feature_col(dataset)
enable_sparse_data_optim = self.getOrDefault(self.enable_sparse_data_optim)
def _predict(
model: XGBModel, X: ArrayLike, base_margin: Optional[np.ndarray]
) -> Union[pd.DataFrame, pd.Series]:
margins = model.predict(
X,
base_margin=base_margin,
output_margin=True,
validate_features=False,
**predict_params,
)
raw_preds, class_probs = transform_margin(margins)
pred_contrib_col_name = None
if (
self.isDefined(self.pred_contrib_col)
and self.getOrDefault(self.pred_contrib_col) != ""
):
pred_contrib_col_name = self.getOrDefault(self.pred_contrib_col)
# It seems that they use argmax of class probs,
# not of margin to get the prediction (Note: scala implementation)
preds = np.argmax(class_probs, axis=1)
result: Dict[str, pd.Series] = {
pred.raw_prediction: pd.Series(list(raw_preds)),
pred.prediction: pd.Series(preds),
pred.probability: pd.Series(list(class_probs)),
}
schema = (
f"{pred.raw_prediction} array<double>, {pred.prediction} double,"
f" {pred.probability} array<double>"
)
if pred_contrib_col_name:
# We will force setting strict_shape to True when predicting contribs,
# So, it will also output 3-D shape result.
schema = f"{schema}, {pred.pred_contrib} array<array<double>>"
if pred_contrib_col_name is not None:
contribs = pred_contribs(model, X, base_margin, strict_shape=True)
result[pred.pred_contrib] = pd.Series(list(contribs.tolist()))
@pandas_udf(schema) # type: ignore
def predict_udf(
iterator: Iterator[Tuple[pd.Series, ...]]
) -> Iterator[pd.DataFrame]:
assert xgb_sklearn_model is not None
model = xgb_sklearn_model
for data in iterator:
if enable_sparse_data_optim:
X = _read_csr_matrix_from_unwrapped_spark_vec(data)
else:
if feature_col_names is not None:
X = data[feature_col_names] # type: ignore
else:
X = stack_series(data[alias.data])
return pd.DataFrame(data=result)
if has_base_margin:
base_margin = stack_series(data[alias.margin])
else:
base_margin = None
margins = model.predict(
X,
base_margin=base_margin,
output_margin=True,
validate_features=False,
**predict_params,
)
raw_preds, class_probs = transform_margin(margins)
# It seems that they use argmax of class probs,
# not of margin to get the prediction (Note: scala implementation)
preds = np.argmax(class_probs, axis=1)
result: Dict[str, pd.Series] = {
pred.raw_prediction: pd.Series(list(raw_preds)),
pred.prediction: pd.Series(preds),
pred.probability: pd.Series(list(class_probs)),
}
if pred_contrib_col_name:
contribs = pred_contribs(model, X, base_margin, strict_shape=True)
result[pred.pred_contrib] = pd.Series(list(contribs.tolist()))
yield pd.DataFrame(data=result)
if has_base_margin:
pred_struct = predict_udf(struct(*features_col, base_margin_col))
else:
pred_struct = predict_udf(struct(*features_col))
return _predict
def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame:
pred_struct_col = "_prediction_struct"
dataset = dataset.withColumn(pred_struct_col, pred_struct)
dataset = dataset.withColumn(pred_struct_col, pred_col)
raw_prediction_col_name = self.getOrDefault(self.rawPredictionCol)
if raw_prediction_col_name:
@ -1342,7 +1423,8 @@ class _ClassificationModel( # pylint: disable=abstract-method
array_to_vector(getattr(col(pred_struct_col), pred.probability)),
)
if pred_contrib_col_name:
pred_contrib_col_name = self._get_pred_contrib_col_name()
if pred_contrib_col_name is not None:
dataset = dataset.withColumn(
pred_contrib_col_name,
getattr(col(pred_struct_col), pred.pred_contrib),

View File

@ -10,7 +10,7 @@ from threading import Thread
from typing import Any, Callable, Dict, Optional, Set, Type
import pyspark
from pyspark import BarrierTaskContext, SparkContext, SparkFiles
from pyspark import BarrierTaskContext, SparkContext, SparkFiles, TaskContext
from pyspark.sql.session import SparkSession
from xgboost import Booster, XGBModel, collective
@ -129,7 +129,7 @@ def _is_local(spark_context: SparkContext) -> bool:
return spark_context._jsc.sc().isLocal()
def _get_gpu_id(task_context: BarrierTaskContext) -> int:
def _get_gpu_id(task_context: TaskContext) -> int:
"""Get the gpu id from the task resources"""
if task_context is None:
# This is a safety check.

View File

@ -52,7 +52,7 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) {
*
* Go to left if it's NOT the matching category, which matches one-hot encoding.
*/
inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat) {
inline XGBOOST_DEVICE bool Decision(common::Span<CatBitField::value_type const> cats, float cat) {
KCatBitField const s_cats(cats);
if (XGBOOST_EXPECT(InvalidCat(cat), false)) {
return true;

View File

@ -3,9 +3,11 @@
*/
#include "error_msg.h"
#include <mutex> // for call_once, once_flag
#include <sstream> // for stringstream
#include "../collective/communicator-inl.h" // for GetRank
#include "xgboost/context.h" // for Context
#include "xgboost/logging.h"
namespace xgboost::error {
@ -26,34 +28,43 @@ void WarnDeprecatedGPUHist() {
}
void WarnManualUpdater() {
bool static thread_local logged{false};
if (logged) {
return;
}
LOG(WARNING)
<< "You have manually specified the `updater` parameter. The `tree_method` parameter "
"will be ignored. Incorrect sequence of updaters will produce undefined "
"behavior. For common uses, we recommend using `tree_method` parameter instead.";
logged = true;
static std::once_flag flag;
std::call_once(flag, [] {
LOG(WARNING)
<< "You have manually specified the `updater` parameter. The `tree_method` parameter "
"will be ignored. Incorrect sequence of updaters will produce undefined "
"behavior. For common uses, we recommend using `tree_method` parameter instead.";
});
}
void WarnDeprecatedGPUId() {
static thread_local bool logged{false};
if (logged) {
return;
}
auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device");
msg += " E.g. device=cpu/cuda/cuda:0";
LOG(WARNING) << msg;
logged = true;
static std::once_flag flag;
std::call_once(flag, [] {
auto msg = DeprecatedFunc("gpu_id", "2.0.0", "device");
msg += " E.g. device=cpu/cuda/cuda:0";
LOG(WARNING) << msg;
});
}
void WarnEmptyDataset() {
static thread_local bool logged{false};
if (logged) {
return;
}
LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank();
logged = true;
static std::once_flag flag;
std::call_once(flag,
[] { LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank(); });
}
void MismatchedDevices(Context const* booster, Context const* data) {
static std::once_flag flag;
std::call_once(flag, [&] {
LOG(WARNING)
<< "Falling back to prediction using DMatrix due to mismatched devices. This might "
"lead to higher memory usage and slower performance. XGBoost is running on: "
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName() << ".\n"
<< R"(Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.
This warning will only be shown once.
)";
});
}
} // namespace xgboost::error

View File

@ -10,7 +10,8 @@
#include <limits> // for numeric_limits
#include <string> // for string
#include "xgboost/base.h" // for bst_feature_t
#include "xgboost/base.h" // for bst_feature_t
#include "xgboost/context.h" // for Context
#include "xgboost/logging.h"
#include "xgboost/string_view.h" // for StringView
@ -94,5 +95,7 @@ constexpr StringView InvalidCUDAOrdinal() {
return "Invalid device. `device` is required to be CUDA and there must be at least one GPU "
"available for using GPU.";
}
void MismatchedDevices(Context const* booster, Context const* data);
} // namespace xgboost::error
#endif // XGBOOST_COMMON_ERROR_MSG_H_

View File

@ -55,6 +55,7 @@ std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,
}
CHECK(p_fmat) << "Failed to fallback.";
p_fmat->Info() = proxy->Info().Copy();
return p_fmat;
}
} // namespace xgboost::data

View File

@ -1,5 +1,5 @@
/*!
* Copyright 2014-2022 by XGBoost Contributors
/**
* Copyright 2014-2023, XGBoost Contributors
* \file gblinear.cc
* \brief Implementation of Linear booster, with L1/L2 regularization: Elastic Net
* the update rule is parallel coordinate descent (shotgun)
@ -26,9 +26,9 @@
#include "../common/timer.h"
#include "../common/common.h"
#include "../common/threading_utils.h"
#include "../common/error_msg.h"
namespace xgboost {
namespace gbm {
namespace xgboost::gbm {
DMLC_REGISTRY_FILE_TAG(gblinear);
@ -83,7 +83,16 @@ class GBLinear : public GradientBooster {
}
param_.UpdateAllowUnknown(cfg);
param_.CheckGPUSupport();
updater_.reset(LinearUpdater::Create(param_.updater, ctx_));
if (param_.updater == "gpu_coord_descent") {
LOG(WARNING) << error::DeprecatedFunc("gpu_coord_descent", "2.0.0",
R"(device="cuda", updater="coord_descent")");
}
if (param_.updater == "coord_descent" && ctx_->IsCUDA()) {
updater_.reset(LinearUpdater::Create("gpu_coord_descent", ctx_));
} else {
updater_.reset(LinearUpdater::Create(param_.updater, ctx_));
}
updater_->Configure(cfg);
monitor_.Init("GBLinear");
}
@ -354,5 +363,4 @@ XGBOOST_REGISTER_GBM(GBLinear, "gblinear")
.set_body([](LearnerModelParam const* booster_config, Context const* ctx) {
return new GBLinear(booster_config, ctx);
});
} // namespace gbm
} // namespace xgboost
} // namespace xgboost::gbm

View File

@ -85,25 +85,6 @@ bool UpdatersMatched(std::vector<std::string> updater_seq,
return name == up->Name();
});
}
void MismatchedDevices(Context const* booster, Context const* data) {
bool thread_local static logged{false};
if (logged) {
return;
}
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. This might "
"lead to higher memory usage and slower performance. XGBoost is running on: "
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName()
<< ".\n"
<< R"(Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.
This warning will only be shown once for each thread. Subsequent warnings made by the
current thread will be suppressed.
)";
logged = true;
}
} // namespace
void GBTree::Configure(Args const& cfg) {
@ -146,14 +127,6 @@ void GBTree::Configure(Args const& cfg) {
if (specified_updater_) {
error::WarnManualUpdater();
}
if (model_.learner_model_param->IsVectorLeaf()) {
CHECK(tparam_.tree_method == TreeMethod::kHist || tparam_.tree_method == TreeMethod::kAuto)
<< "Only the hist tree method is supported for building multi-target trees with vector "
"leaf.";
CHECK(ctx_->IsCPU()) << "GPU is not yet supported for vector leaf.";
}
LOG(DEBUG) << "Using tree method: " << static_cast<int>(tparam_.tree_method);
if (!specified_updater_) {
@ -225,6 +198,13 @@ void GBTree::UpdateTreeLeaf(DMatrix const* p_fmat, HostDeviceVector<float> const
void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
PredictionCacheEntry* predt, ObjFunction const* obj) {
if (model_.learner_model_param->IsVectorLeaf()) {
CHECK(tparam_.tree_method == TreeMethod::kHist || tparam_.tree_method == TreeMethod::kAuto)
<< "Only the hist tree method is supported for building multi-target trees with vector "
"leaf.";
CHECK(ctx_->IsCPU()) << "GPU is not yet supported for vector leaf.";
}
TreesOneIter new_trees;
bst_target_t const n_groups = model_.learner_model_param->OutputLength();
monitor_.Start("BoostNewTrees");
@ -555,7 +535,7 @@ void GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
if (p_m->Ctx()->Device() != this->ctx_->Device()) {
MismatchedDevices(this->ctx_, p_m->Ctx());
error::MismatchedDevices(this->ctx_, p_m->Ctx());
CHECK_EQ(out_preds->version, 0);
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
CHECK(proxy) << error::InplacePredictProxy();
@ -808,7 +788,7 @@ class Dart : public GBTree {
auto n_groups = model_.learner_model_param->num_output_group;
if (ctx_->Device() != p_fmat->Ctx()->Device()) {
MismatchedDevices(ctx_, p_fmat->Ctx());
error::MismatchedDevices(ctx_, p_fmat->Ctx());
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_fmat);
CHECK(proxy) << error::InplacePredictProxy();
auto p_fmat = data::CreateDMatrixFromProxy(ctx_, proxy, missing);

View File

@ -9,8 +9,7 @@
#include "coordinate_common.h"
#include "xgboost/json.h"
namespace xgboost {
namespace linear {
namespace xgboost::linear {
DMLC_REGISTER_PARAMETER(CoordinateParam);
DMLC_REGISTRY_FILE_TAG(updater_coordinate);
@ -39,8 +38,9 @@ class CoordinateUpdater : public LinearUpdater {
FromJson(config.at("linear_train_param"), &tparam_);
FromJson(config.at("coordinate_param"), &cparam_);
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
void SaveConfig(Json *p_out) const override {
LOG(DEBUG) << "Save config for CPU updater.";
auto &out = *p_out;
out["linear_train_param"] = ToJson(tparam_);
out["coordinate_param"] = ToJson(cparam_);
}
@ -99,5 +99,4 @@ class CoordinateUpdater : public LinearUpdater {
XGBOOST_REGISTER_LINEAR_UPDATER(CoordinateUpdater, "coord_descent")
.describe("Update linear model according to coordinate descent algorithm.")
.set_body([]() { return new CoordinateUpdater(); });
} // namespace linear
} // namespace xgboost
} // namespace xgboost::linear

View File

@ -15,8 +15,7 @@
#include "../common/timer.h"
#include "./param.h"
namespace xgboost {
namespace linear {
namespace xgboost::linear {
DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate);
@ -29,7 +28,7 @@ DMLC_REGISTRY_FILE_TAG(updater_gpu_coordinate);
class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
public:
// set training parameter
void Configure(Args const& args) override {
void Configure(Args const &args) override {
tparam_.UpdateAllowUnknown(args);
coord_param_.UpdateAllowUnknown(args);
selector_.reset(FeatureSelector::Create(tparam_.feature_selector));
@ -41,8 +40,9 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
FromJson(config.at("linear_train_param"), &tparam_);
FromJson(config.at("coordinate_param"), &coord_param_);
}
void SaveConfig(Json* p_out) const override {
auto& out = *p_out;
void SaveConfig(Json *p_out) const override {
LOG(DEBUG) << "Save config for GPU updater.";
auto &out = *p_out;
out["linear_train_param"] = ToJson(tparam_);
out["coordinate_param"] = ToJson(coord_param_);
}
@ -101,10 +101,9 @@ class GPUCoordinateUpdater : public LinearUpdater { // NOLINT
monitor_.Stop("LazyInitDevice");
monitor_.Start("UpdateGpair");
auto &in_gpair_host = in_gpair->ConstHostVector();
// Update gpair
if (ctx_->gpu_id >= 0) {
this->UpdateGpair(in_gpair_host);
this->UpdateGpair(in_gpair->ConstHostVector());
}
monitor_.Stop("UpdateGpair");
@ -249,5 +248,4 @@ XGBOOST_REGISTER_LINEAR_UPDATER(GPUCoordinateUpdater, "gpu_coord_descent")
"Update linear model according to coordinate descent algorithm. GPU "
"accelerated.")
.set_body([]() { return new GPUCoordinateUpdater(); });
} // namespace linear
} // namespace xgboost
} // namespace xgboost::linear

View File

@ -1,5 +1,5 @@
/*!
* Copyright 2020-2022 by XGBoost Contributors
/**
* Copyright 2020-2023, XGBoost Contributors
*/
#include <algorithm> // std::max
#include <vector>
@ -11,9 +11,7 @@
#include "evaluate_splits.cuh"
#include "expand_entry.cuh"
namespace xgboost {
namespace tree {
namespace xgboost::tree {
// With constraints
XGBOOST_DEVICE float LossChangeMissing(const GradientPairInt64 &scan,
const GradientPairInt64 &missing,
@ -315,11 +313,11 @@ __device__ void SetCategoricalSplit(const EvaluateSplitSharedInputs &shared_inpu
common::Span<common::CatBitField::value_type> out,
DeviceSplitCandidate *p_out_split) {
auto &out_split = *p_out_split;
out_split.split_cats = common::CatBitField{out};
auto out_cats = common::CatBitField{out};
// Simple case for one hot split
if (common::UseOneHot(shared_inputs.FeatureBins(fidx), shared_inputs.param.max_cat_to_onehot)) {
out_split.split_cats.Set(common::AsCat(out_split.thresh));
out_cats.Set(common::AsCat(out_split.thresh));
return;
}
@ -339,7 +337,7 @@ __device__ void SetCategoricalSplit(const EvaluateSplitSharedInputs &shared_inpu
assert(partition > 0 && "Invalid partition.");
thrust::for_each(thrust::seq, beg, beg + partition, [&](size_t c) {
auto cat = shared_inputs.feature_values[c - node_offset];
out_split.SetCat(cat);
out_cats.Set(common::AsCat(cat));
});
}
@ -427,8 +425,7 @@ void GPUHistEvaluator::EvaluateSplits(
if (split.is_cat) {
SetCategoricalSplit(shared_inputs, d_sorted_idx, fidx, i,
device_cats_accessor.GetNodeCatStorage(input.nidx),
&out_splits[i]);
device_cats_accessor.GetNodeCatStorage(input.nidx), &out_splits[i]);
}
float base_weight =
@ -460,6 +457,4 @@ GPUExpandEntry GPUHistEvaluator::EvaluateSingleSplit(
cudaMemcpyDeviceToHost));
return root_entry;
}
} // namespace tree
} // namespace xgboost
} // namespace xgboost::tree

View File

@ -37,8 +37,8 @@ struct EvaluateSplitSharedInputs {
common::Span<const float> feature_values;
common::Span<const float> min_fvalue;
bool is_dense;
XGBOOST_DEVICE auto Features() const { return feature_segments.size() - 1; }
__device__ auto FeatureBins(bst_feature_t fidx) const {
[[nodiscard]] XGBOOST_DEVICE auto Features() const { return feature_segments.size() - 1; }
[[nodiscard]] __device__ std::uint32_t FeatureBins(bst_feature_t fidx) const {
return feature_segments[fidx + 1] - feature_segments[fidx];
}
};
@ -102,7 +102,7 @@ class GPUHistEvaluator {
}
/**
* \brief Get device category storage of nidx for internal calculation.
* @brief Get device category storage of nidx for internal calculation.
*/
auto DeviceCatStorage(const std::vector<bst_node_t> &nidx) {
if (!has_categoricals_) return CatAccessor{};
@ -117,8 +117,8 @@ class GPUHistEvaluator {
/**
* \brief Get sorted index storage based on the left node of inputs.
*/
auto SortedIdx(int num_nodes, bst_feature_t total_bins) {
if(!need_sort_histogram_) return common::Span<bst_feature_t>();
auto SortedIdx(int num_nodes, bst_bin_t total_bins) {
if (!need_sort_histogram_) return common::Span<bst_feature_t>{};
cat_sorted_idx_.resize(num_nodes * total_bins);
return dh::ToSpan(cat_sorted_idx_);
}
@ -142,12 +142,22 @@ class GPUHistEvaluator {
* \brief Get host category storage for nidx. Different from the internal version, this
* returns strictly 1 node.
*/
common::Span<CatST const> GetHostNodeCats(bst_node_t nidx) const {
[[nodiscard]] common::Span<CatST const> GetHostNodeCats(bst_node_t nidx) const {
copy_stream_.View().Sync();
auto cats_out = common::Span<CatST const>{h_split_cats_}.subspan(
nidx * node_categorical_storage_size_, node_categorical_storage_size_);
return cats_out;
}
[[nodiscard]] auto GetDeviceNodeCats(bst_node_t nidx) {
copy_stream_.View().Sync();
if (has_categoricals_) {
CatAccessor accessor = {dh::ToSpan(split_cats_), node_categorical_storage_size_};
return common::KCatBitField{accessor.GetNodeCatStorage(nidx)};
} else {
return common::KCatBitField{};
}
}
/**
* \brief Add a split to the internal tree evaluator.
*/

View File

@ -64,7 +64,6 @@ struct DeviceSplitCandidate {
// split.
bst_cat_t thresh{-1};
common::CatBitField split_cats;
bool is_cat { false };
GradientPairInt64 left_sum;
@ -72,12 +71,6 @@ struct DeviceSplitCandidate {
XGBOOST_DEVICE DeviceSplitCandidate() {} // NOLINT
template <typename T>
XGBOOST_DEVICE void SetCat(T c) {
this->split_cats.Set(common::AsCat(c));
fvalue = std::max(this->fvalue, static_cast<float>(c));
}
XGBOOST_DEVICE void Update(float loss_chg_in, DefaultDirection dir_in, float fvalue_in,
int findex_in, GradientPairInt64 left_sum_in,
GradientPairInt64 right_sum_in, bool cat,
@ -100,22 +93,23 @@ struct DeviceSplitCandidate {
*/
XGBOOST_DEVICE void UpdateCat(float loss_chg_in, DefaultDirection dir_in, bst_cat_t thresh_in,
bst_feature_t findex_in, GradientPairInt64 left_sum_in,
GradientPairInt64 right_sum_in, GPUTrainingParam const& param, const GradientQuantiser& quantiser) {
if (loss_chg_in > loss_chg &&
quantiser.ToFloatingPoint(left_sum_in).GetHess() >= param.min_child_weight &&
quantiser.ToFloatingPoint(right_sum_in).GetHess() >= param.min_child_weight) {
loss_chg = loss_chg_in;
dir = dir_in;
fvalue = std::numeric_limits<float>::quiet_NaN();
thresh = thresh_in;
is_cat = true;
left_sum = left_sum_in;
right_sum = right_sum_in;
findex = findex_in;
}
GradientPairInt64 right_sum_in, GPUTrainingParam const& param,
const GradientQuantiser& quantiser) {
if (loss_chg_in > loss_chg &&
quantiser.ToFloatingPoint(left_sum_in).GetHess() >= param.min_child_weight &&
quantiser.ToFloatingPoint(right_sum_in).GetHess() >= param.min_child_weight) {
loss_chg = loss_chg_in;
dir = dir_in;
fvalue = std::numeric_limits<float>::quiet_NaN();
thresh = thresh_in;
is_cat = true;
left_sum = left_sum_in;
right_sum = right_sum_in;
findex = findex_in;
}
}
XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; }
[[nodiscard]] XGBOOST_DEVICE bool IsValid() const { return loss_chg > 0.0f; }
friend std::ostream& operator<<(std::ostream& os, DeviceSplitCandidate const& c) {
os << "loss_chg:" << c.loss_chg << ", "

View File

@ -7,9 +7,9 @@
#include <algorithm>
#include <cmath>
#include <limits>
#include <memory>
#include <utility>
#include <cstddef> // for size_t
#include <memory> // for unique_ptr, make_unique
#include <utility> // for move
#include <vector>
#include "../collective/communicator-inl.cuh"
@ -216,9 +216,9 @@ struct GPUHistMakerDevice {
void InitFeatureGroupsOnce() {
if (!feature_groups) {
CHECK(page);
feature_groups.reset(new FeatureGroups(page->Cuts(), page->is_dense,
dh::MaxSharedMemoryOptin(ctx_->gpu_id),
sizeof(GradientPairPrecise)));
feature_groups = std::make_unique<FeatureGroups>(page->Cuts(), page->is_dense,
dh::MaxSharedMemoryOptin(ctx_->gpu_id),
sizeof(GradientPairPrecise));
}
}
@ -244,10 +244,10 @@ struct GPUHistMakerDevice {
this->evaluator_.Reset(page->Cuts(), feature_types, dmat->Info().num_col_, param, ctx_->gpu_id);
quantiser.reset(new GradientQuantiser(this->gpair));
quantiser = std::make_unique<GradientQuantiser>(this->gpair);
row_partitioner.reset(); // Release the device memory first before reallocating
row_partitioner.reset(new RowPartitioner(ctx_->gpu_id, sample.sample_rows));
row_partitioner = std::make_unique<RowPartitioner>(ctx_->gpu_id, sample.sample_rows);
// Init histogram
hist.Init(ctx_->gpu_id, page->Cuts().TotalBins());
@ -294,7 +294,7 @@ struct GPUHistMakerDevice {
dh::TemporaryArray<GPUExpandEntry> entries(2 * candidates.size());
// Store the feature set ptrs so they dont go out of scope before the kernel is called
std::vector<std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_sets;
for (size_t i = 0; i < candidates.size(); i++) {
for (std::size_t i = 0; i < candidates.size(); i++) {
auto candidate = candidates.at(i);
int left_nidx = tree[candidate.nid].LeftChild();
int right_nidx = tree[candidate.nid].RightChild();
@ -327,14 +327,13 @@ struct GPUHistMakerDevice {
d_node_inputs.data().get(), h_node_inputs.data(),
h_node_inputs.size() * sizeof(EvaluateSplitInputs), cudaMemcpyDefault));
this->evaluator_.EvaluateSplits(nidx, max_active_features,
dh::ToSpan(d_node_inputs), shared_inputs,
dh::ToSpan(entries));
this->evaluator_.EvaluateSplits(nidx, max_active_features, dh::ToSpan(d_node_inputs),
shared_inputs, dh::ToSpan(entries));
dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(),
entries.data().get(), sizeof(GPUExpandEntry) * entries.size(),
cudaMemcpyDeviceToHost));
dh::DefaultStream().Sync();
}
}
void BuildHist(int nidx) {
auto d_node_hist = hist.GetNodeHistogram(nidx);
@ -366,23 +365,29 @@ struct GPUHistMakerDevice {
struct NodeSplitData {
RegTree::Node split_node;
FeatureType split_type;
common::CatBitField node_cats;
common::KCatBitField node_cats;
};
void UpdatePosition(const std::vector<GPUExpandEntry>& candidates, RegTree* p_tree) {
if (candidates.empty()) return;
std::vector<int> nidx(candidates.size());
std::vector<int> left_nidx(candidates.size());
std::vector<int> right_nidx(candidates.size());
void UpdatePosition(std::vector<GPUExpandEntry> const& candidates, RegTree* p_tree) {
if (candidates.empty()) {
return;
}
std::vector<bst_node_t> nidx(candidates.size());
std::vector<bst_node_t> left_nidx(candidates.size());
std::vector<bst_node_t> right_nidx(candidates.size());
std::vector<NodeSplitData> split_data(candidates.size());
for (size_t i = 0; i < candidates.size(); i++) {
auto& e = candidates[i];
auto const& e = candidates[i];
RegTree::Node split_node = (*p_tree)[e.nid];
auto split_type = p_tree->NodeSplitType(e.nid);
nidx.at(i) = e.nid;
left_nidx.at(i) = split_node.LeftChild();
right_nidx.at(i) = split_node.RightChild();
split_data.at(i) = NodeSplitData{split_node, split_type, e.split.split_cats};
split_data.at(i) = NodeSplitData{split_node, split_type, evaluator_.GetDeviceNodeCats(e.nid)};
CHECK_EQ(split_type == FeatureType::kCategorical, e.split.is_cat);
}
auto d_matrix = page->GetDeviceAccessor(ctx_->gpu_id);
@ -390,7 +395,7 @@ struct GPUHistMakerDevice {
nidx, left_nidx, right_nidx, split_data,
[=] __device__(bst_uint ridx, const NodeSplitData& data) {
// given a row index, returns the node id it belongs to
bst_float cut_value = d_matrix.GetFvalue(ridx, data.split_node.SplitIndex());
float cut_value = d_matrix.GetFvalue(ridx, data.split_node.SplitIndex());
// Missing value
bool go_left = true;
if (isnan(cut_value)) {
@ -620,7 +625,6 @@ struct GPUHistMakerDevice {
CHECK(common::CheckNAN(candidate.split.fvalue));
std::vector<common::CatBitField::value_type> split_cats;
CHECK_GT(candidate.split.split_cats.Bits().size(), 0);
auto h_cats = this->evaluator_.GetHostNodeCats(candidate.nid);
auto n_bins_feature = page->Cuts().FeatureBins(candidate.split.findex);
split_cats.resize(common::CatBitField::ComputeStorageSize(n_bins_feature), 0);

View File

@ -0,0 +1,42 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/global_config.h> // for GlobalConfigThreadLocalStore
#include <xgboost/json.h> // for Json, Object
#include <xgboost/learner.h> // for Learner
#include <algorithm> // for transform
#include <string> // for string
#include <utility> // for swap
#include "../helpers.h" // for RandomDataGenerator
namespace xgboost {
TEST(GBlinear, DispatchUpdater) {
auto verbosity = 3;
std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
auto test = [](std::string device) {
auto p_fmat = RandomDataGenerator{10, 10, 0.0f}.GenerateDMatrix(true);
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParams(
Args{{"booster", "gblinear"}, {"updater", "coord_descent"}, {"device", device}});
learner->Configure();
for (std::int32_t iter = 0; iter < 3; ++iter) {
learner->UpdateOneIter(iter, p_fmat);
}
Json config{Object{}};
::testing::internal::CaptureStderr();
learner->SaveConfig(&config);
auto str = ::testing::internal::GetCapturedStderr();
std::transform(device.cbegin(), device.cend(), device.begin(),
[](char c) { return std::toupper(c); });
ASSERT_NE(str.find(device), std::string::npos);
};
test("cpu");
test("gpu");
std::swap(GlobalConfigThreadLocalStore::Get()->verbosity, verbosity);
}
} // namespace xgboost

View File

@ -58,21 +58,6 @@ void TestInplaceFallback(Context const* ctx) {
HostDeviceVector<float>* out_predt{nullptr};
ConsoleLogger::Configure(Args{{"verbosity", "1"}});
std::string output;
// test whether the warning is raised
#if !defined(_WIN32)
// Windows has issue with CUDA and thread local storage. For some reason, on Windows a
// cudaInitializationError is raised during destruction of `HostDeviceVector`. This
// might be related to https://github.com/dmlc/xgboost/issues/5793
::testing::internal::CaptureStderr();
std::thread{[&] {
// Launch a new thread to ensure a warning is raised as we prevent over-verbose
// warning by using thread-local flags.
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 0);
}}.join();
output = testing::internal::GetCapturedStderr();
ASSERT_NE(output.find("Falling back"), std::string::npos);
#endif
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 0);

View File

@ -191,14 +191,32 @@ class TestGPUPredict:
np.testing.assert_allclose(predt_0, predt_3)
np.testing.assert_allclose(predt_0, predt_4)
def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
def run_inplace_base_margin(
self, device: int, booster: xgb.Booster, dtrain: xgb.DMatrix, X, base_margin
) -> None:
import cupy as cp
booster.set_param({"device": f"cuda:{device}"})
dtrain.set_info(base_margin=base_margin)
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
from_dmatrix = booster.predict(dtrain)
cp.testing.assert_allclose(from_inplace, from_dmatrix)
booster = booster.copy() # clear prediction cache.
booster.set_param({"device": "cpu"})
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
from_dmatrix = booster.predict(dtrain)
cp.testing.assert_allclose(from_inplace, from_dmatrix)
booster = booster.copy() # clear prediction cache.
base_margin = cp.asnumpy(base_margin)
if hasattr(X, "values"):
X = cp.asnumpy(X.values)
booster.set_param({"device": f"cuda:{device}"})
from_inplace = booster.inplace_predict(data=X, base_margin=base_margin)
from_dmatrix = booster.predict(dtrain)
cp.testing.assert_allclose(from_inplace, from_dmatrix, rtol=1e-6)
def run_inplace_predict_cupy(self, device: int) -> None:
import cupy as cp
@ -244,7 +262,7 @@ class TestGPUPredict:
run_threaded_predict(X, rows, predict_dense)
base_margin = cp_rng.randn(rows)
self.run_inplace_base_margin(booster, dtrain, X, base_margin)
self.run_inplace_base_margin(device, booster, dtrain, X, base_margin)
# Create a wide dataset
X = cp_rng.randn(100, 10000)
@ -318,7 +336,7 @@ class TestGPUPredict:
run_threaded_predict(X, rows, predict_df)
base_margin = cudf.Series(rng.randn(rows))
self.run_inplace_base_margin(booster, dtrain, X, base_margin)
self.run_inplace_base_margin(0, booster, dtrain, X, base_margin)
@given(
strategies.integers(1, 10), tm.make_dataset_strategy(), shap_parameter_strategy

View File

@ -2,6 +2,7 @@ import json
import logging
import subprocess
import numpy as np
import pytest
import sklearn
@ -13,7 +14,7 @@ from pyspark.ml.linalg import Vectors
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
from pyspark.sql import SparkSession
from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor
from xgboost.spark import SparkXGBClassifier, SparkXGBRegressor, SparkXGBRegressorModel
gpu_discovery_script_path = "tests/test_distributed/test_gpu_with_spark/discover_gpu.sh"
@ -242,3 +243,33 @@ def test_sparkxgb_regressor_feature_cols_with_gpu(spark_diabetes_dataset_feature
evaluator = RegressionEvaluator(metricName="rmse")
rmse = evaluator.evaluate(pred_result_df)
assert rmse <= 65.0
def test_gpu_transform(spark_diabetes_dataset) -> None:
regressor = SparkXGBRegressor(device="cuda", num_workers=num_workers)
train_df, test_df = spark_diabetes_dataset
model: SparkXGBRegressorModel = regressor.fit(train_df)
# The model trained with GPUs, and transform with GPU configurations.
assert model._gpu_transform()
model.set_device("cpu")
assert not model._gpu_transform()
# without error
cpu_rows = model.transform(test_df).select("prediction").collect()
regressor = SparkXGBRegressor(device="cpu", num_workers=num_workers)
model = regressor.fit(train_df)
# The model trained with CPUs. Even with GPU configurations,
# still prefer transforming with CPUs
assert not model._gpu_transform()
# Set gpu transform explicitly.
model.set_device("cuda")
assert model._gpu_transform()
# without error
gpu_rows = model.transform(test_df).select("prediction").collect()
for cpu, gpu in zip(cpu_rows, gpu_rows):
np.testing.assert_allclose(cpu.prediction, gpu.prediction, atol=1e-3)

View File

@ -888,6 +888,34 @@ class TestPySparkLocal:
clf = SparkXGBClassifier(device="cuda")
clf._validate_params()
def test_gpu_transform(self, clf_data: ClfData) -> None:
"""local mode"""
classifier = SparkXGBClassifier(device="cpu")
model: SparkXGBClassifierModel = classifier.fit(clf_data.cls_df_train)
with tempfile.TemporaryDirectory() as tmpdir:
path = "file:" + tmpdir
model.write().overwrite().save(path)
# The model trained with CPU, transform defaults to cpu
assert not model._gpu_transform()
# without error
model.transform(clf_data.cls_df_test).collect()
model.set_device("cuda")
assert model._gpu_transform()
model_loaded = SparkXGBClassifierModel.load(path)
# The model trained with CPU, transform defaults to cpu
assert not model_loaded._gpu_transform()
# without error
model_loaded.transform(clf_data.cls_df_test).collect()
model_loaded.set_device("cuda")
assert model_loaded._gpu_transform()
class XgboostLocalTest(SparkTestCase):
def setUp(self):