Define the new device parameter. (#9362)

This commit is contained in:
Jiaming Yuan 2023-07-13 19:30:25 +08:00 committed by GitHub
parent 2d0cd2817e
commit 04aff3af8e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
63 changed files with 827 additions and 477 deletions

View File

@ -15,4 +15,3 @@
address = {New York, NY, USA}, address = {New York, NY, USA},
keywords = {large-scale machine learning}, keywords = {large-scale machine learning},
} }

View File

@ -22,7 +22,8 @@ Supported parameters
GPU accelerated prediction is enabled by default for the above mentioned ``tree_method`` parameters but can be switched to CPU prediction by setting ``predictor`` to ``cpu_predictor``. This could be useful if you want to conserve GPU memory. Likewise when using CPU algorithms, GPU accelerated prediction can be enabled by setting ``predictor`` to ``gpu_predictor``. GPU accelerated prediction is enabled by default for the above mentioned ``tree_method`` parameters but can be switched to CPU prediction by setting ``predictor`` to ``cpu_predictor``. This could be useful if you want to conserve GPU memory. Likewise when using CPU algorithms, GPU accelerated prediction can be enabled by setting ``predictor`` to ``gpu_predictor``.
The device ordinal (which GPU to use if you have many of them) can be selected using the The device ordinal (which GPU to use if you have many of them) can be selected using the
``gpu_id`` parameter, which defaults to 0 (the first device reported by CUDA runtime). ``device`` parameter, which defaults to 0 when "CUDA" is specified(the first device reported by CUDA
runtime).
The GPU algorithms currently work with CLI, Python, R, and JVM packages. See :doc:`/install` for details. The GPU algorithms currently work with CLI, Python, R, and JVM packages. See :doc:`/install` for details.
@ -30,13 +31,13 @@ The GPU algorithms currently work with CLI, Python, R, and JVM packages. See :do
.. code-block:: python .. code-block:: python
:caption: Python example :caption: Python example
param['gpu_id'] = 0 param["device"] = "cuda:0"
param['tree_method'] = 'gpu_hist' param['tree_method'] = 'gpu_hist'
.. code-block:: python .. code-block:: python
:caption: With Scikit-Learn interface :caption: With Scikit-Learn interface
XGBRegressor(tree_method='gpu_hist', gpu_id=0) XGBRegressor(tree_method='gpu_hist', device="cuda")
GPU-Accelerated SHAP values GPU-Accelerated SHAP values
@ -45,7 +46,7 @@ XGBoost makes use of `GPUTreeShap <https://github.com/rapidsai/gputreeshap>`_ as
.. code-block:: python .. code-block:: python
model.set_param({"gpu_id": "0", "tree_method": "gpu_hist"}) model.set_param({"device": "cuda:0", "tree_method": "gpu_hist"})
shap_values = model.predict(dtrain, pred_contribs=True) shap_values = model.predict(dtrain, pred_contribs=True)
shap_interaction_values = model.predict(dtrain, pred_interactions=True) shap_interaction_values = model.predict(dtrain, pred_interactions=True)

View File

@ -3,10 +3,10 @@ Installation Guide
################## ##################
XGBoost provides binary packages for some language bindings. The binary packages support XGBoost provides binary packages for some language bindings. The binary packages support
the GPU algorithm (``gpu_hist``) on machines with NVIDIA GPUs. Please note that **training the GPU algorithm (``device=cuda:0``) on machines with NVIDIA GPUs. Please note that
with multiple GPUs is only supported for Linux platform**. See :doc:`gpu/index`. Also we **training with multiple GPUs is only supported for Linux platform**. See
have both stable releases and nightly builds, see below for how to install them. For :doc:`gpu/index`. Also we have both stable releases and nightly builds, see below for how
building from source, visit :doc:`this page </build>`. to install them. For building from source, visit :doc:`this page </build>`.
.. contents:: Contents .. contents:: Contents

View File

@ -59,6 +59,18 @@ General Parameters
- Feature dimension used in boosting, set to maximum dimension of the feature - Feature dimension used in boosting, set to maximum dimension of the feature
* ``device`` [default= ``cpu``]
.. versionadded:: 2.0.0
- Device for XGBoost to run. User can set it to one of the following values:
+ ``cpu``: Use CPU.
+ ``cuda``: Use a GPU (CUDA device).
+ ``cuda:<ordinal>``: ``<ordinal>`` is an integer that specifies the ordinal of the GPU (which GPU do you want to use if you have more than one devices).
+ ``gpu``: Default GPU device selection from the list of available and supported devices. Only ``cuda`` devices are supported currently.
+ ``gpu:<ordinal>``: Default GPU device selection from the list of available and supported devices. Only ``cuda`` devices are supported currently.
Parameters for Tree Booster Parameters for Tree Booster
=========================== ===========================
* ``eta`` [default=0.3, alias: ``learning_rate``] * ``eta`` [default=0.3, alias: ``learning_rate``]
@ -99,7 +111,7 @@ Parameters for Tree Booster
- ``gradient_based``: the selection probability for each training instance is proportional to the - ``gradient_based``: the selection probability for each training instance is proportional to the
*regularized absolute value* of gradients (more specifically, :math:`\sqrt{g^2+\lambda h^2}`). *regularized absolute value* of gradients (more specifically, :math:`\sqrt{g^2+\lambda h^2}`).
``subsample`` may be set to as low as 0.1 without loss of model accuracy. Note that this ``subsample`` may be set to as low as 0.1 without loss of model accuracy. Note that this
sampling method is only supported when ``tree_method`` is set to ``gpu_hist``; other tree sampling method is only supported when ``tree_method`` is set to ``hist`` and the device is ``cuda``; other tree
methods only support ``uniform`` sampling. methods only support ``uniform`` sampling.
* ``colsample_bytree``, ``colsample_bylevel``, ``colsample_bynode`` [default=1] * ``colsample_bytree``, ``colsample_bylevel``, ``colsample_bynode`` [default=1]
@ -131,26 +143,15 @@ Parameters for Tree Booster
* ``tree_method`` string [default= ``auto``] * ``tree_method`` string [default= ``auto``]
- The tree construction algorithm used in XGBoost. See description in the `reference paper <http://arxiv.org/abs/1603.02754>`_ and :doc:`treemethod`. - The tree construction algorithm used in XGBoost. See description in the `reference paper <http://arxiv.org/abs/1603.02754>`_ and :doc:`treemethod`.
- XGBoost supports ``approx``, ``hist`` and ``gpu_hist`` for distributed training. Experimental support for external memory is available for ``approx`` and ``gpu_hist``.
- Choices: ``auto``, ``exact``, ``approx``, ``hist``, ``gpu_hist``, this is a - Choices: ``auto``, ``exact``, ``approx``, ``hist``, this is a combination of commonly
combination of commonly used updaters. For other updaters like ``refresh``, set the used updaters. For other updaters like ``refresh``, set the parameter ``updater``
parameter ``updater`` directly. directly.
- ``auto``: Use heuristic to choose the fastest method. - ``auto``: Same as the ``hist`` tree method.
- For small dataset, exact greedy (``exact``) will be used.
- For larger dataset, approximate algorithm (``approx``) will be chosen. It's
recommended to try ``hist`` and ``gpu_hist`` for higher performance with large
dataset.
(``gpu_hist``)has support for ``external memory``.
- Because old behavior is always use exact greedy in single machine, user will get a
message when approximate algorithm is chosen to notify this choice.
- ``exact``: Exact greedy algorithm. Enumerates all split candidates. - ``exact``: Exact greedy algorithm. Enumerates all split candidates.
- ``approx``: Approximate greedy algorithm using quantile sketch and gradient histogram. - ``approx``: Approximate greedy algorithm using quantile sketch and gradient histogram.
- ``hist``: Faster histogram optimized approximate greedy algorithm. - ``hist``: Faster histogram optimized approximate greedy algorithm.
- ``gpu_hist``: GPU implementation of ``hist`` algorithm.
* ``scale_pos_weight`` [default=1] * ``scale_pos_weight`` [default=1]
@ -163,7 +164,7 @@ Parameters for Tree Booster
- ``grow_colmaker``: non-distributed column-based construction of trees. - ``grow_colmaker``: non-distributed column-based construction of trees.
- ``grow_histmaker``: distributed tree construction with row-based data splitting based on global proposal of histogram counting. - ``grow_histmaker``: distributed tree construction with row-based data splitting based on global proposal of histogram counting.
- ``grow_quantile_histmaker``: Grow tree using quantized histogram. - ``grow_quantile_histmaker``: Grow tree using quantized histogram.
- ``grow_gpu_hist``: Grow tree with GPU. - ``grow_gpu_hist``: Grow tree with GPU. Same as setting tree method to ``hist`` and use ``device=cuda``.
- ``sync``: synchronizes trees in all distributed nodes. - ``sync``: synchronizes trees in all distributed nodes.
- ``refresh``: refreshes tree's statistics and/or leaf values based on the current data. Note that no random subsampling of data rows is performed. - ``refresh``: refreshes tree's statistics and/or leaf values based on the current data. Note that no random subsampling of data rows is performed.
- ``prune``: prunes the splits where loss < min_split_loss (or gamma) and nodes that have depth greater than ``max_depth``. - ``prune``: prunes the splits where loss < min_split_loss (or gamma) and nodes that have depth greater than ``max_depth``.
@ -183,7 +184,7 @@ Parameters for Tree Booster
* ``grow_policy`` [default= ``depthwise``] * ``grow_policy`` [default= ``depthwise``]
- Controls a way new nodes are added to the tree. - Controls a way new nodes are added to the tree.
- Currently supported only if ``tree_method`` is set to ``hist``, ``approx`` or ``gpu_hist``. - Currently supported only if ``tree_method`` is set to ``hist`` or ``approx``.
- Choices: ``depthwise``, ``lossguide`` - Choices: ``depthwise``, ``lossguide``
- ``depthwise``: split at nodes closest to the root. - ``depthwise``: split at nodes closest to the root.
@ -195,7 +196,7 @@ Parameters for Tree Booster
* ``max_bin``, [default=256] * ``max_bin``, [default=256]
- Only used if ``tree_method`` is set to ``hist``, ``approx`` or ``gpu_hist``. - Only used if ``tree_method`` is set to ``hist`` or ``approx``.
- Maximum number of discrete bins to bucket continuous features. - Maximum number of discrete bins to bucket continuous features.
- Increasing this number improves the optimality of splits at the cost of higher computation time. - Increasing this number improves the optimality of splits at the cost of higher computation time.

View File

@ -3,14 +3,14 @@ Tree Methods
############ ############
For training boosted tree models, there are 2 parameters used for choosing algorithms, For training boosted tree models, there are 2 parameters used for choosing algorithms,
namely ``updater`` and ``tree_method``. XGBoost has 4 builtin tree methods, namely namely ``updater`` and ``tree_method``. XGBoost has 3 builtin tree methods, namely
``exact``, ``approx``, ``hist`` and ``gpu_hist``. Along with these tree methods, there ``exact``, ``approx`` and ``hist``. Along with these tree methods, there are also some
are also some free standing updaters including ``refresh``, free standing updaters including ``refresh``, ``prune`` and ``sync``. The parameter
``prune`` and ``sync``. The parameter ``updater`` is more primitive than ``tree_method`` ``updater`` is more primitive than ``tree_method`` as the latter is just a
as the latter is just a pre-configuration of the former. The difference is mostly due to pre-configuration of the former. The difference is mostly due to historical reasons that
historical reasons that each updater requires some specific configurations and might has each updater requires some specific configurations and might has missing features. As we
missing features. As we are moving forward, the gap between them is becoming more and are moving forward, the gap between them is becoming more and more irrelevant. We will
more irrelevant. We will collectively document them under tree methods. collectively document them under tree methods.
************** **************
Exact Solution Exact Solution
@ -19,23 +19,23 @@ Exact Solution
Exact means XGBoost considers all candidates from data for tree splitting, but underlying Exact means XGBoost considers all candidates from data for tree splitting, but underlying
the objective is still interpreted as a Taylor expansion. the objective is still interpreted as a Taylor expansion.
1. ``exact``: Vanilla gradient boosting tree algorithm described in `reference paper 1. ``exact``: The vanilla gradient boosting tree algorithm described in `reference paper
<http://arxiv.org/abs/1603.02754>`_. During each split finding procedure, it iterates <http://arxiv.org/abs/1603.02754>`_. During split-finding, it iterates over all
over all entries of input data. It's more accurate (among other greedy methods) but entries of input data. It's more accurate (among other greedy methods) but
slow in computation performance. Also it doesn't support distributed training as computationally slower in compared to other tree methods. Further more, its feature
XGBoost employs row spliting data distribution while ``exact`` tree method works on a set is limited. Features like distributed training and external memory that require
sorted column format. This tree method can be used with parameter ``tree_method`` set approximated quantiles are not supported. This tree method can be used with the
to ``exact``. parameter ``tree_method`` set to ``exact``.
********************** **********************
Approximated Solutions Approximated Solutions
********************** **********************
As ``exact`` tree method is slow in performance and not scalable, we often employ As ``exact`` tree method is slow in computation performance and difficult to scale, we
approximated training algorithms. These algorithms build a gradient histogram for each often employ approximated training algorithms. These algorithms build a gradient
node and iterate through the histogram instead of real dataset. Here we introduce the histogram for each node and iterate through the histogram instead of real dataset. Here
implementations in XGBoost below. we introduce the implementations in XGBoost.
1. ``approx`` tree method: An approximation tree method described in `reference paper 1. ``approx`` tree method: An approximation tree method described in `reference paper
<http://arxiv.org/abs/1603.02754>`_. It runs sketching before building each tree <http://arxiv.org/abs/1603.02754>`_. It runs sketching before building each tree
@ -48,22 +48,18 @@ implementations in XGBoost below.
this global sketch. This is the fastest algorithm as it runs sketching only once. The this global sketch. This is the fastest algorithm as it runs sketching only once. The
algorithm can be accessed by setting ``tree_method`` to ``hist``. algorithm can be accessed by setting ``tree_method`` to ``hist``.
3. ``gpu_hist`` tree method: The ``gpu_hist`` tree method is a GPU implementation of
``hist``, with additional support for gradient based sampling. The algorithm can be
accessed by setting ``tree_method`` to ``gpu_hist``.
************ ************
Implications Implications
************ ************
Some objectives like ``reg:squarederror`` have constant hessian. In this case, ``hist`` Some objectives like ``reg:squarederror`` have constant hessian. In this case, the
or ``gpu_hist`` should be preferred as weighted sketching doesn't make sense with constant ``hist`` should be preferred as weighted sketching doesn't make sense with constant
weights. When using non-constant hessian objectives, sometimes ``approx`` yields better weights. When using non-constant hessian objectives, sometimes ``approx`` yields better
accuracy, but with slower computation performance. Most of the time using ``(gpu)_hist`` accuracy, but with slower computation performance. Most of the time using ``hist`` with
with higher ``max_bin`` can achieve similar or even superior accuracy while maintaining higher ``max_bin`` can achieve similar or even superior accuracy while maintaining good
good performance. However, as xgboost is largely driven by community effort, the actual performance. However, as xgboost is largely driven by community effort, the actual
implementations have some differences than pure math description. Result might have implementations have some differences than pure math description. Result might be
slight differences than expectation, which we are currently trying to overcome. slightly different than expectation, which we are currently trying to overcome.
************** **************
Other Updaters Other Updaters
@ -106,8 +102,8 @@ solely for the interest of documentation.
histogram creation step and uses sketching values directly during split evaluation. It histogram creation step and uses sketching values directly during split evaluation. It
was never tested and contained some unknown bugs, we decided to remove it and focus our was never tested and contained some unknown bugs, we decided to remove it and focus our
resources on more promising algorithms instead. For accuracy, most of the time resources on more promising algorithms instead. For accuracy, most of the time
``approx``, ``hist`` and ``gpu_hist`` are enough with some parameters tuning, so ``approx`` and ``hist`` are enough with some parameters tuning, so removing them don't
removing them don't have any real practical impact. have any real practical impact.
3. ``grow_local_histmaker`` updater: An approximation tree method described in `reference 3. ``grow_local_histmaker`` updater: An approximation tree method described in `reference
paper <http://arxiv.org/abs/1603.02754>`_. This updater was rarely used in practice so paper <http://arxiv.org/abs/1603.02754>`_. This updater was rarely used in practice so

View File

@ -149,7 +149,7 @@ Also for inplace prediction:
.. code-block:: python .. code-block:: python
# where X is a dask DataFrame or dask Array backed by cupy or cuDF. # where X is a dask DataFrame or dask Array backed by cupy or cuDF.
booster.set_param({"gpu_id": "0"}) booster.set_param({"device": "cuda:0"})
prediction = xgb.dask.inplace_predict(client, booster, X) prediction = xgb.dask.inplace_predict(client, booster, X)
When input is ``da.Array`` object, output is always ``da.Array``. However, if the input When input is ``da.Array`` object, output is always ``da.Array``. However, if the input

View File

@ -163,7 +163,7 @@ Will print out something similar to (not actual output as it's too long for demo
{ {
"Learner": { "Learner": {
"generic_parameter": { "generic_parameter": {
"gpu_id": "0", "device": "cuda:0",
"gpu_page_size": "0", "gpu_page_size": "0",
"n_jobs": "0", "n_jobs": "0",
"random_state": "0", "random_state": "0",

View File

@ -119,7 +119,7 @@ using bst_group_t = std::uint32_t; // NOLINT
*/ */
using bst_target_t = std::uint32_t; // NOLINT using bst_target_t = std::uint32_t; // NOLINT
/** /**
* brief Type for indexing boosted layers. * @brief Type for indexing boosted layers.
*/ */
using bst_layer_t = std::int32_t; // NOLINT using bst_layer_t = std::int32_t; // NOLINT
/** /**

View File

@ -12,12 +12,18 @@
#include <cstdint> // for int16_t, int32_t, int64_t #include <cstdint> // for int16_t, int32_t, int64_t
#include <memory> // for shared_ptr #include <memory> // for shared_ptr
#include <string> // for string, to_string #include <string> // for string, to_string
#include <type_traits> // for invoke_result_t, is_same_v #include <type_traits> // for invoke_result_t, is_same_v, underlying_type_t
namespace xgboost { namespace xgboost {
struct CUDAContext; struct CUDAContext;
// symbolic names
struct DeviceSym {
static auto constexpr CPU() { return "cpu"; }
static auto constexpr CUDA() { return "cuda"; }
};
/** /**
* @brief A type for device ordinal. The type is packed into 32-bit for efficient use in * @brief A type for device ordinal. The type is packed into 32-bit for efficient use in
* viewing types like `linalg::TensorView`. * viewing types like `linalg::TensorView`.
@ -59,9 +65,9 @@ struct DeviceOrd {
[[nodiscard]] std::string Name() const { [[nodiscard]] std::string Name() const {
switch (device) { switch (device) {
case DeviceOrd::kCPU: case DeviceOrd::kCPU:
return "CPU"; return DeviceSym::CPU();
case DeviceOrd::kCUDA: case DeviceOrd::kCUDA:
return "CUDA:" + std::to_string(ordinal); return DeviceSym::CUDA() + (':' + std::to_string(ordinal));
default: { default: {
LOG(FATAL) << "Unknown device."; LOG(FATAL) << "Unknown device.";
return ""; return "";
@ -76,26 +82,39 @@ static_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));
* @brief Runtime context for XGBoost. Contains information like threads and device. * @brief Runtime context for XGBoost. Contains information like threads and device.
*/ */
struct Context : public XGBoostParameter<Context> { struct Context : public XGBoostParameter<Context> {
private:
std::string device{DeviceSym::CPU()}; // NOLINT
// The device object for the current context. We are in the middle of replacing the
// `gpu_id` with this device field.
DeviceOrd device_{DeviceOrd::CPU()};
public: public:
// Constant representing the device ID of CPU. // Constant representing the device ID of CPU.
static std::int32_t constexpr kCpuId = -1; static bst_d_ordinal_t constexpr kCpuId = -1;
static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
static std::int64_t constexpr kDefaultSeed = 0; static std::int64_t constexpr kDefaultSeed = 0;
public: public:
Context(); Context();
template <typename Container>
Args UpdateAllowUnknown(Container const& kwargs) {
auto args = XGBoostParameter<Context>::UpdateAllowUnknown(kwargs);
this->SetDeviceOrdinal(kwargs);
return args;
}
std::int32_t gpu_id{kCpuId};
// The number of threads to use if OpenMP is enabled. If equals 0, use the system default.
std::int32_t nthread{0}; // NOLINT
// stored random seed // stored random seed
std::int64_t seed{kDefaultSeed}; std::int64_t seed{kDefaultSeed};
// whether seed the PRNG each iteration // whether seed the PRNG each iteration
bool seed_per_iteration{false}; bool seed_per_iteration{false};
// number of threads to use if OpenMP is enabled
// if equals 0, use system default
std::int32_t nthread{0};
// primary device, -1 means no gpu.
std::int32_t gpu_id{kCpuId};
// fail when gpu_id is invalid // fail when gpu_id is invalid
bool fail_on_invalid_gpu_id{false}; bool fail_on_invalid_gpu_id{false};
bool validate_parameters{false}; bool validate_parameters{false};
/** /**
* @brief Configure the parameter `gpu_id'. * @brief Configure the parameter `gpu_id'.
* *
@ -111,21 +130,19 @@ struct Context : public XGBoostParameter<Context> {
/** /**
* @brief Is XGBoost running on CPU? * @brief Is XGBoost running on CPU?
*/ */
[[nodiscard]] bool IsCPU() const { return gpu_id == kCpuId; } [[nodiscard]] bool IsCPU() const { return Device().IsCPU(); }
/** /**
* @brief Is XGBoost running on a CUDA device? * @brief Is XGBoost running on a CUDA device?
*/ */
[[nodiscard]] bool IsCUDA() const { return !IsCPU(); } [[nodiscard]] bool IsCUDA() const { return Device().IsCUDA(); }
/** /**
* @brief Get the current device and ordinal. * @brief Get the current device and ordinal.
*/ */
[[nodiscard]] DeviceOrd Device() const { [[nodiscard]] DeviceOrd Device() const { return device_; }
return IsCPU() ? DeviceOrd::CPU() : DeviceOrd::CUDA(static_cast<bst_d_ordinal_t>(gpu_id));
}
/** /**
* @brief Get the CUDA device ordinal. -1 if XGBoost is running on CPU. * @brief Get the CUDA device ordinal. -1 if XGBoost is running on CPU.
*/ */
[[nodiscard]] bst_d_ordinal_t Ordinal() const { return this->gpu_id; } [[nodiscard]] bst_d_ordinal_t Ordinal() const { return Device().ordinal; }
/** /**
* @brief Name of the current device. * @brief Name of the current device.
*/ */
@ -134,24 +151,22 @@ struct Context : public XGBoostParameter<Context> {
* @brief Get a CUDA device context for allocator and stream. * @brief Get a CUDA device context for allocator and stream.
*/ */
[[nodiscard]] CUDAContext const* CUDACtx() const; [[nodiscard]] CUDAContext const* CUDACtx() const;
/** /**
* @brief Make a CUDA context based on the current context. * @brief Make a CUDA context based on the current context.
* *
* @param ordinal The CUDA device ordinal. * @param ordinal The CUDA device ordinal.
*/ */
[[nodiscard]] Context MakeCUDA(std::int32_t ordinal = 0) const { [[nodiscard]] Context MakeCUDA(bst_d_ordinal_t ordinal = 0) const {
Context ctx = *this; Context ctx = *this;
CHECK_GE(ordinal, 0); return ctx.SetDevice(DeviceOrd::CUDA(ordinal));
ctx.gpu_id = ordinal;
return ctx;
} }
/** /**
* @brief Make a CPU context based on the current context. * @brief Make a CPU context based on the current context.
*/ */
[[nodiscard]] Context MakeCPU() const { [[nodiscard]] Context MakeCPU() const {
Context ctx = *this; Context ctx = *this;
ctx.gpu_id = kCpuId; return ctx.SetDevice(DeviceOrd::CPU());
return ctx;
} }
/** /**
* @brief Call function based on the current device. * @brief Call function based on the current device.
@ -167,7 +182,8 @@ struct Context : public XGBoostParameter<Context> {
default: default:
// Do not use the device name as this is likely an internal error, the name // Do not use the device name as this is likely an internal error, the name
// wouldn't be valid. // wouldn't be valid.
LOG(FATAL) << "Unknown device type:" << static_cast<std::int16_t>(this->Device().device); LOG(FATAL) << "Unknown device type:"
<< static_cast<std::underlying_type_t<DeviceOrd::Type>>(this->Device().device);
break; break;
} }
return std::invoke_result_t<CPUFn>(); return std::invoke_result_t<CPUFn>();
@ -182,11 +198,9 @@ struct Context : public XGBoostParameter<Context> {
DMLC_DECLARE_FIELD(seed_per_iteration) DMLC_DECLARE_FIELD(seed_per_iteration)
.set_default(false) .set_default(false)
.describe("Seed PRNG determnisticly via iterator number."); .describe("Seed PRNG determnisticly via iterator number.");
DMLC_DECLARE_FIELD(device).set_default(DeviceSym::CPU()).describe("Device ordinal.");
DMLC_DECLARE_FIELD(nthread).set_default(0).describe("Number of threads to use."); DMLC_DECLARE_FIELD(nthread).set_default(0).describe("Number of threads to use.");
DMLC_DECLARE_ALIAS(nthread, n_jobs); DMLC_DECLARE_ALIAS(nthread, n_jobs);
DMLC_DECLARE_FIELD(gpu_id).set_default(-1).set_lower_bound(-1).describe(
"The primary GPU device ordinal.");
DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id) DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
.set_default(false) .set_default(false)
.describe("Fail with error when gpu_id is invalid."); .describe("Fail with error when gpu_id is invalid.");
@ -196,6 +210,14 @@ struct Context : public XGBoostParameter<Context> {
} }
private: private:
void SetDeviceOrdinal(Args const& kwargs);
Context& SetDevice(DeviceOrd d) {
this->device_ = d;
this->gpu_id = d.ordinal; // this can be removed once we move away from `gpu_id`.
this->device = d.Name();
return *this;
}
// mutable for lazy cuda context initialization. This avoids initializing CUDA at load. // mutable for lazy cuda context initialization. This avoids initializing CUDA at load.
// shared_ptr is used instead of unique_ptr as with unique_ptr it's difficult to define // shared_ptr is used instead of unique_ptr as with unique_ptr it's difficult to define
// p_impl while trying to hide CUDA code from the host compiler. // p_impl while trying to hide CUDA code from the host compiler.

View File

@ -664,11 +664,11 @@ Object ToJson(Parameter const& param) {
template <typename Parameter> template <typename Parameter>
Args FromJson(Json const& obj, Parameter* param) { Args FromJson(Json const& obj, Parameter* param) {
auto const& j_param = get<Object const>(obj); auto const& j_param = get<Object const>(obj);
std::map<std::string, std::string> m; Args args;
for (auto const& kv : j_param) { for (auto const& kv : j_param) {
m[kv.first] = get<String const>(kv.second); args.emplace_back(kv.first, get<String const>(kv.second));
} }
return param->UpdateAllowUnknown(m); return param->UpdateAllowUnknown(args);
} }
} // namespace xgboost } // namespace xgboost
#endif // XGBOOST_JSON_H_ #endif // XGBOOST_JSON_H_

View File

@ -110,15 +110,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \param approx_contribs whether to approximate the feature contributions for speed * \param approx_contribs whether to approximate the feature contributions for speed
* \param pred_interactions whether to compute the feature pair contributions * \param pred_interactions whether to compute the feature pair contributions
*/ */
virtual void Predict(std::shared_ptr<DMatrix> data, virtual void Predict(std::shared_ptr<DMatrix> data, bool output_margin,
bool output_margin, HostDeviceVector<bst_float>* out_preds, bst_layer_t layer_begin,
HostDeviceVector<bst_float> *out_preds, bst_layer_t layer_end, bool training = false, bool pred_leaf = false,
unsigned layer_begin, bool pred_contribs = false, bool approx_contribs = false,
unsigned layer_end,
bool training = false,
bool pred_leaf = false,
bool pred_contribs = false,
bool approx_contribs = false,
bool pred_interactions = false) = 0; bool pred_interactions = false) = 0;
/*! /*!
@ -132,8 +127,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \param layer_end End of booster layer. 0 means do not limit trees. * \param layer_end End of booster layer. 0 means do not limit trees.
*/ */
virtual void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing, virtual void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,
HostDeviceVector<bst_float>** out_preds, uint32_t layer_begin, HostDeviceVector<float>** out_preds, bst_layer_t layer_begin,
uint32_t layer_end) = 0; bst_layer_t layer_end) = 0;
/*! /*!
* \brief Calculate feature score. See doc in C API for outputs. * \brief Calculate feature score. See doc in C API for outputs.

View File

@ -39,9 +39,8 @@ struct PredictionCacheEntry {
* *
* \param v Added versions. * \param v Added versions.
*/ */
void Update(std::uint32_t v) { void Update(std::uint32_t v) { version += v; }
version += v; void Reset() { version = 0; }
}
}; };
/** /**

View File

@ -280,7 +280,7 @@ object GpuPreXGBoost extends PreXGBoostProvider {
// - gpu id // - gpu id
// - predictor: Force to gpu predictor since native doesn't save predictor. // - predictor: Force to gpu predictor since native doesn't save predictor.
val gpuId = if (!isLocal) XGBoost.getGPUAddrFromResources else 0 val gpuId = if (!isLocal) XGBoost.getGPUAddrFromResources else 0
booster.setParam("gpu_id", gpuId.toString) booster.setParam("device", s"cuda:$gpuId")
logger.info("GPU transform on device: " + gpuId) logger.info("GPU transform on device: " + gpuId)
boosterFlag.isGpuParamsSet = true; boosterFlag.isGpuParamsSet = true;
} }

View File

@ -326,7 +326,7 @@ object XGBoost extends Serializable {
getGPUAddrFromResources getGPUAddrFromResources
} }
logger.info("Leveraging gpu device " + gpuId + " to train") logger.info("Leveraging gpu device " + gpuId + " to train")
params = params + ("gpu_id" -> gpuId) params = params + ("device" -> s"cuda:$gpuId")
} }
val booster = if (makeCheckpoint) { val booster = if (makeCheckpoint) {
SXGBoost.trainAndSaveCheckpoint( SXGBoost.trainAndSaveCheckpoint(

View File

@ -1393,13 +1393,13 @@ class _ProxyDMatrix(DMatrix):
class QuantileDMatrix(DMatrix): class QuantileDMatrix(DMatrix):
"""A DMatrix variant that generates quantilized data directly from input for """A DMatrix variant that generates quantilized data directly from input for the
``hist`` and ``gpu_hist`` tree methods. This DMatrix is primarily designed to save ``hist`` tree method. This DMatrix is primarily designed to save memory in training
memory in training by avoiding intermediate storage. Set ``max_bin`` to control the by avoiding intermediate storage. Set ``max_bin`` to control the number of bins
number of bins during quantisation, which should be consistent with the training during quantisation, which should be consistent with the training parameter
parameter ``max_bin``. When ``QuantileDMatrix`` is used for validation/test dataset, ``max_bin``. When ``QuantileDMatrix`` is used for validation/test dataset, ``ref``
``ref`` should be another ``QuantileDMatrix``(or ``DMatrix``, but not recommended as should be another ``QuantileDMatrix``(or ``DMatrix``, but not recommended as it
it defeats the purpose of saving memory) constructed from training dataset. See defeats the purpose of saving memory) constructed from training dataset. See
:py:obj:`xgboost.DMatrix` for documents on meta info. :py:obj:`xgboost.DMatrix` for documents on meta info.
.. note:: .. note::
@ -2277,10 +2277,10 @@ class Booster:
.. code-block:: python .. code-block:: python
booster.set_param({"gpu_id": "0", "tree_method": "gpu_hist"}) booster.set_param({"device": "cuda:0"})
booster.inplace_predict(cupy_array) booster.inplace_predict(cupy_array)
booster.set_param({"gpu_id": "-1", "tree_method": "hist"}) booster.set_param({"device": "cpu"})
booster.inplace_predict(numpy_array) booster.inplace_predict(numpy_array)
.. versionadded:: 1.1.0 .. versionadded:: 1.1.0
@ -2311,8 +2311,8 @@ class Booster:
Returns Returns
------- -------
prediction : numpy.ndarray/cupy.ndarray prediction : numpy.ndarray/cupy.ndarray
The prediction result. When input data is on GPU, prediction The prediction result. When input data is on GPU, prediction result is
result is stored in a cupy array. stored in a cupy array.
""" """
preds = ctypes.POINTER(ctypes.c_float)() preds = ctypes.POINTER(ctypes.c_float)()

View File

@ -273,7 +273,7 @@ __model_doc = f"""
* For linear model, only "weight" is defined and it's the normalized coefficients * For linear model, only "weight" is defined and it's the normalized coefficients
without bias. without bias.
gpu_id : Optional[int] device : Optional[str]
Device ordinal. Device ordinal.
validate_parameters : Optional[bool] validate_parameters : Optional[bool]
Give warnings for unknown parameter. Give warnings for unknown parameter.
@ -647,7 +647,7 @@ class XGBModel(XGBModelBase):
monotone_constraints: Optional[Union[Dict[str, int], str]] = None, monotone_constraints: Optional[Union[Dict[str, int], str]] = None,
interaction_constraints: Optional[Union[str, Sequence[Sequence[str]]]] = None, interaction_constraints: Optional[Union[str, Sequence[Sequence[str]]]] = None,
importance_type: Optional[str] = None, importance_type: Optional[str] = None,
gpu_id: Optional[int] = None, device: Optional[str] = None,
validate_parameters: Optional[bool] = None, validate_parameters: Optional[bool] = None,
enable_categorical: bool = False, enable_categorical: bool = False,
feature_types: Optional[FeatureTypes] = None, feature_types: Optional[FeatureTypes] = None,
@ -693,7 +693,7 @@ class XGBModel(XGBModelBase):
self.monotone_constraints = monotone_constraints self.monotone_constraints = monotone_constraints
self.interaction_constraints = interaction_constraints self.interaction_constraints = interaction_constraints
self.importance_type = importance_type self.importance_type = importance_type
self.gpu_id = gpu_id self.device = device
self.validate_parameters = validate_parameters self.validate_parameters = validate_parameters
self.enable_categorical = enable_categorical self.enable_categorical = enable_categorical
self.feature_types = feature_types self.feature_types = feature_types

View File

@ -1,4 +1,4 @@
"""Xgboost pyspark integration submodule for core code.""" """XGBoost pyspark integration submodule for core code."""
import base64 import base64
# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name # pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
@ -133,6 +133,7 @@ _inverse_pyspark_param_alias_map = {v: k for k, v in _pyspark_param_alias_map.it
_unsupported_xgb_params = [ _unsupported_xgb_params = [
"gpu_id", # we have "use_gpu" pyspark param instead. "gpu_id", # we have "use_gpu" pyspark param instead.
"device", # we have "use_gpu" pyspark param instead.
"enable_categorical", # Use feature_types param to specify categorical feature instead "enable_categorical", # Use feature_types param to specify categorical feature instead
"use_label_encoder", "use_label_encoder",
"n_jobs", # Do not allow user to set it, will use `spark.task.cpus` value instead. "n_jobs", # Do not allow user to set it, will use `spark.task.cpus` value instead.
@ -899,12 +900,14 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
context = BarrierTaskContext.get() context = BarrierTaskContext.get()
gpu_id = None dev_ordinal = None
use_hist = booster_params.get("tree_method", None) in ("hist", "gpu_hist") use_hist = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
if use_gpu: if use_gpu:
gpu_id = context.partitionId() if is_local else _get_gpu_id(context) dev_ordinal = (
booster_params["gpu_id"] = gpu_id context.partitionId() if is_local else _get_gpu_id(context)
)
booster_params["device"] = "cuda:" + str(dev_ordinal)
# If cuDF is not installed, then using DMatrix instead of QDM, # If cuDF is not installed, then using DMatrix instead of QDM,
# because without cuDF, DMatrix performs better than QDM. # because without cuDF, DMatrix performs better than QDM.
# Note: Checking `is_cudf_available` in spark worker side because # Note: Checking `is_cudf_available` in spark worker side because
@ -945,7 +948,7 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
dtrain, dvalid = create_dmatrix_from_partitions( dtrain, dvalid = create_dmatrix_from_partitions(
pandas_df_iter, pandas_df_iter,
feature_prop.features_cols_names, feature_prop.features_cols_names,
gpu_id, dev_ordinal,
use_qdm, use_qdm,
dmatrix_kwargs, dmatrix_kwargs,
enable_sparse_data_optim=feature_prop.enable_sparse_data_optim, enable_sparse_data_optim=feature_prop.enable_sparse_data_optim,

View File

@ -157,7 +157,7 @@ def _read_csr_matrix_from_unwrapped_spark_vec(part: pd.DataFrame) -> csr_matrix:
def make_qdm( def make_qdm(
data: Dict[str, List[np.ndarray]], data: Dict[str, List[np.ndarray]],
gpu_id: Optional[int], dev_ordinal: Optional[int],
meta: Dict[str, Any], meta: Dict[str, Any],
ref: Optional[DMatrix], ref: Optional[DMatrix],
params: Dict[str, Any], params: Dict[str, Any],
@ -165,7 +165,7 @@ def make_qdm(
"""Handle empty partition for QuantileDMatrix.""" """Handle empty partition for QuantileDMatrix."""
if not data: if not data:
return QuantileDMatrix(np.empty((0, 0)), ref=ref) return QuantileDMatrix(np.empty((0, 0)), ref=ref)
it = PartIter(data, gpu_id, **meta) it = PartIter(data, dev_ordinal, **meta)
m = QuantileDMatrix(it, **params, ref=ref) m = QuantileDMatrix(it, **params, ref=ref)
return m return m
@ -173,7 +173,7 @@ def make_qdm(
def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
iterator: Iterator[pd.DataFrame], iterator: Iterator[pd.DataFrame],
feature_cols: Optional[Sequence[str]], feature_cols: Optional[Sequence[str]],
gpu_id: Optional[int], dev_ordinal: Optional[int],
use_qdm: bool, use_qdm: bool,
kwargs: Dict[str, Any], # use dict to make sure this parameter is passed. kwargs: Dict[str, Any], # use dict to make sure this parameter is passed.
enable_sparse_data_optim: bool, enable_sparse_data_optim: bool,
@ -187,7 +187,7 @@ def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
Pyspark partition iterator. Pyspark partition iterator.
feature_cols: feature_cols:
A sequence of feature names, used only when rapids plugin is enabled. A sequence of feature names, used only when rapids plugin is enabled.
gpu_id: dev_ordinal:
Device ordinal, used when GPU is enabled. Device ordinal, used when GPU is enabled.
use_qdm : use_qdm :
Whether QuantileDMatrix should be used instead of DMatrix. Whether QuantileDMatrix should be used instead of DMatrix.
@ -304,13 +304,13 @@ def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
if feature_cols is not None and use_qdm: if feature_cols is not None and use_qdm:
cache_partitions(iterator, append_fn) cache_partitions(iterator, append_fn)
dtrain: DMatrix = make_qdm(train_data, gpu_id, meta, None, params) dtrain: DMatrix = make_qdm(train_data, dev_ordinal, meta, None, params)
elif feature_cols is not None and not use_qdm: elif feature_cols is not None and not use_qdm:
cache_partitions(iterator, append_fn) cache_partitions(iterator, append_fn)
dtrain = make(train_data, kwargs) dtrain = make(train_data, kwargs)
elif feature_cols is None and use_qdm: elif feature_cols is None and use_qdm:
cache_partitions(iterator, append_fn) cache_partitions(iterator, append_fn)
dtrain = make_qdm(train_data, gpu_id, meta, None, params) dtrain = make_qdm(train_data, dev_ordinal, meta, None, params)
else: else:
cache_partitions(iterator, append_fn) cache_partitions(iterator, append_fn)
dtrain = make(train_data, kwargs) dtrain = make(train_data, kwargs)
@ -324,7 +324,7 @@ def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
if has_validation_col: if has_validation_col:
if use_qdm: if use_qdm:
dvalid: Optional[DMatrix] = make_qdm( dvalid: Optional[DMatrix] = make_qdm(
valid_data, gpu_id, meta, dtrain, params valid_data, dev_ordinal, meta, dtrain, params
) )
else: else:
dvalid = make(valid_data, kwargs) if has_validation_col else None dvalid = make(valid_data, kwargs) if has_validation_col else None

View File

@ -78,8 +78,7 @@ def _set_pyspark_xgb_cls_param_attrs(
class SparkXGBRegressor(_SparkXGBEstimator): class SparkXGBRegressor(_SparkXGBEstimator):
""" """SparkXGBRegressor is a PySpark ML estimator. It implements the XGBoost regression
SparkXGBRegressor is a PySpark ML estimator. It implements the XGBoost regression
algorithm based on XGBoost python library, and it can be used in PySpark Pipeline algorithm based on XGBoost python library, and it can be used in PySpark Pipeline
and PySpark ML meta algorithms like :py:class:`~pyspark.ml.tuning.CrossValidator`/ and PySpark ML meta algorithms like :py:class:`~pyspark.ml.tuning.CrossValidator`/
:py:class:`~pyspark.ml.tuning.TrainValidationSplit`/ :py:class:`~pyspark.ml.tuning.TrainValidationSplit`/
@ -89,8 +88,8 @@ class SparkXGBRegressor(_SparkXGBEstimator):
:py:class:`xgboost.XGBRegressor` constructor and most of the parameters used in :py:class:`xgboost.XGBRegressor` constructor and most of the parameters used in
:py:meth:`xgboost.XGBRegressor.fit` and :py:meth:`xgboost.XGBRegressor.predict` method. :py:meth:`xgboost.XGBRegressor.fit` and :py:meth:`xgboost.XGBRegressor.predict` method.
SparkXGBRegressor doesn't support setting `gpu_id` but support another param `use_gpu`, SparkXGBRegressor doesn't support setting `device` but supports another param
see doc below for more details. `use_gpu`, see doc below for more details.
SparkXGBRegressor doesn't support setting `base_margin` explicitly as well, but support SparkXGBRegressor doesn't support setting `base_margin` explicitly as well, but support
another param called `base_margin_col`. see doc below for more details. another param called `base_margin_col`. see doc below for more details.
@ -247,8 +246,8 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
:py:class:`xgboost.XGBClassifier` constructor and most of the parameters used in :py:class:`xgboost.XGBClassifier` constructor and most of the parameters used in
:py:meth:`xgboost.XGBClassifier.fit` and :py:meth:`xgboost.XGBClassifier.predict` method. :py:meth:`xgboost.XGBClassifier.fit` and :py:meth:`xgboost.XGBClassifier.predict` method.
SparkXGBClassifier doesn't support setting `gpu_id` but support another param `use_gpu`, SparkXGBClassifier doesn't support setting `device` but support another param
see doc below for more details. `use_gpu`, see doc below for more details.
SparkXGBClassifier doesn't support setting `base_margin` explicitly as well, but support SparkXGBClassifier doesn't support setting `base_margin` explicitly as well, but support
another param called `base_margin_col`. see doc below for more details. another param called `base_margin_col`. see doc below for more details.
@ -423,7 +422,7 @@ class SparkXGBRanker(_SparkXGBEstimator):
:py:class:`xgboost.XGBRanker` constructor and most of the parameters used in :py:class:`xgboost.XGBRanker` constructor and most of the parameters used in
:py:meth:`xgboost.XGBRanker.fit` and :py:meth:`xgboost.XGBRanker.predict` method. :py:meth:`xgboost.XGBRanker.fit` and :py:meth:`xgboost.XGBRanker.predict` method.
SparkXGBRanker doesn't support setting `gpu_id` but support another param `use_gpu`, SparkXGBRanker doesn't support setting `device` but support another param `use_gpu`,
see doc below for more details. see doc below for more details.
SparkXGBRanker doesn't support setting `base_margin` explicitly as well, but support SparkXGBRanker doesn't support setting `base_margin` explicitly as well, but support

View File

@ -723,24 +723,6 @@ def predictor_equal(lhs: xgb.DMatrix, rhs: xgb.DMatrix) -> bool:
M = TypeVar("M", xgb.Booster, xgb.XGBModel) M = TypeVar("M", xgb.Booster, xgb.XGBModel)
def set_ordinal(ordinal: int, booster: M) -> M:
"""Temporary solution for setting the device ordinal until we move away from
`gpu_id`.
"""
if ordinal < 0:
params = {"gpu_id": -1, "tree_method": "hist"}
else:
params = {"gpu_id": ordinal, "tree_method": "gpu_hist"}
if isinstance(booster, xgb.Booster):
booster.set_param(params)
elif isinstance(booster, xgb.XGBModel):
booster.set_params(**params)
return booster
def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]: def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
"""Evaluation metric for xgb.train""" """Evaluation metric for xgb.train"""
label = dtrain.get_label() label = dtrain.get_label()

View File

@ -117,10 +117,7 @@ int InplacePreidctCUDA(BoosterHandle handle, char const *c_array_interface,
RequiredArg<Integer>(config, "iteration_begin", __func__), RequiredArg<Integer>(config, "iteration_begin", __func__),
RequiredArg<Integer>(config, "iteration_end", __func__)); RequiredArg<Integer>(config, "iteration_end", __func__));
CHECK(p_predt); CHECK(p_predt);
if (learner->Ctx()->IsCPU()) { if (learner->Ctx()->IsCUDA()) {
// Prediction using DMatrix as fallback.
CHECK(p_predt->HostCanRead() && !p_predt->DeviceCanRead());
} else {
CHECK(p_predt->DeviceCanRead() && !p_predt->HostCanRead()); CHECK(p_predt->DeviceCanRead() && !p_predt->HostCanRead());
} }
p_predt->SetDevice(proxy->DeviceIdx()); p_predt->SetDevice(proxy->DeviceIdx());

View File

@ -3,23 +3,18 @@
*/ */
#include "error_msg.h" #include "error_msg.h"
#include "../collective/communicator-inl.h" // for GetRank
#include "xgboost/logging.h" #include "xgboost/logging.h"
namespace xgboost::error { namespace xgboost::error {
void WarnDeprecatedGPUHist() { void WarnDeprecatedGPUHist() {
bool static thread_local logged{false};
if (logged) {
return;
}
auto msg = auto msg =
"The tree method `gpu_hist` is deprecated since 2.0.0. To use GPU training, set the `device` " "The tree method `gpu_hist` is deprecated since 2.0.0. To use GPU training, set the `device` "
R"(parameter to CUDA instead. R"(parameter to CUDA instead.
E.g. tree_method = "hist", device = "CUDA" E.g. tree_method = "hist", device = "CUDA"
)"; )";
LOG(WARNING) << msg; LOG(WARNING) << msg;
logged = true;
} }
void WarnManualUpdater() { void WarnManualUpdater() {
@ -33,4 +28,23 @@ void WarnManualUpdater() {
"behavior. For common uses, we recommend using `tree_method` parameter instead."; "behavior. For common uses, we recommend using `tree_method` parameter instead.";
logged = true; logged = true;
} }
void WarnDeprecatedGPUId() {
static thread_local bool logged{false};
if (logged) {
return;
}
LOG(WARNING) << "`gpu_id` is deprecated in favor of the new `device` parameter: "
<< "device = cpu/cuda/cuda:0";
logged = true;
}
void WarnEmptyDataset() {
static thread_local bool logged{false};
if (logged) {
return;
}
LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank();
logged = true;
}
} // namespace xgboost::error } // namespace xgboost::error

View File

@ -82,5 +82,9 @@ inline void WarnOldSerialization() {
void WarnDeprecatedGPUHist(); void WarnDeprecatedGPUHist();
void WarnManualUpdater(); void WarnManualUpdater();
void WarnDeprecatedGPUId();
void WarnEmptyDataset();
} // namespace xgboost::error } // namespace xgboost::error
#endif // XGBOOST_COMMON_ERROR_MSG_H_ #endif // XGBOOST_COMMON_ERROR_MSG_H_

View File

@ -3,53 +3,201 @@
* *
* \brief Context object used for controlling runtime parameters. * \brief Context object used for controlling runtime parameters.
*/ */
#include <xgboost/context.h> #include "xgboost/context.h"
#include <algorithm> // for find_if
#include <charconv> // for from_chars
#include <iterator> // for distance
#include <optional> // for optional
#include <regex> // for regex_replace, regex_match
#include "common/common.h" // AssertGPUSupport #include "common/common.h" // AssertGPUSupport
#include "common/error_msg.h" // WarnDeprecatedGPUId
#include "common/threading_utils.h" #include "common/threading_utils.h"
#include "xgboost/string_view.h"
namespace xgboost { namespace xgboost {
DMLC_REGISTER_PARAMETER(Context); DMLC_REGISTER_PARAMETER(Context);
std::int32_t constexpr Context::kCpuId; bst_d_ordinal_t constexpr Context::kCpuId;
std::int64_t constexpr Context::kDefaultSeed; std::int64_t constexpr Context::kDefaultSeed;
Context::Context() : cfs_cpu_count_{common::GetCfsCPUCount()} {} Context::Context() : cfs_cpu_count_{common::GetCfsCPUCount()} {}
void Context::ConfigureGpuId(bool require_gpu) { namespace {
#if defined(XGBOOST_USE_CUDA) inline constexpr char const* kDevice = "device";
if (gpu_id == kCpuId) { // 0. User didn't specify the `gpu_id'
if (require_gpu) { // 1. `tree_method' or `predictor' or both are using
// GPU.
// 2. Use device 0 as default.
this->UpdateAllowUnknown(Args{{"gpu_id", "0"}});
}
}
// 3. When booster is loaded from a memory image (Python pickle or R #if !defined(XGBOOST_USE_CUDA)
// raw model), number of available GPUs could be different. Wrap around it. DeviceOrd CUDAOrdinal(DeviceOrd device, bool) {
int32_t n_gpus = common::AllVisibleGPUs(); device = DeviceOrd::CPU();
if (n_gpus == 0) { return device;
if (gpu_id != kCpuId) { }
LOG(WARNING) << "No visible GPU is found, setting `gpu_id` to -1";
}
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
} else if (fail_on_invalid_gpu_id) {
CHECK(gpu_id == kCpuId || gpu_id < n_gpus)
<< "Only " << n_gpus << " GPUs are visible, gpu_id " << gpu_id << " is invalid.";
} else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
LOG(WARNING) << "Only " << n_gpus << " GPUs are visible, setting `gpu_id` to "
<< gpu_id % n_gpus;
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(gpu_id % n_gpus)}});
}
#else #else
// Just set it to CPU, don't think about it. // Check CUDA on the current device, wrap the ordinal if necessary.
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}}); [[nodiscard]] DeviceOrd CUDAOrdinal(DeviceOrd device, bool fail_on_invalid) {
(void)(require_gpu); // When booster is loaded from a memory image (Python pickle or R raw model), number of
#endif // defined(XGBOOST_USE_CUDA) // available GPUs could be different. Wrap around it.
std::int32_t n_visible = common::AllVisibleGPUs();
if (n_visible == 0) {
if (device.IsCUDA()) {
LOG(WARNING) << "No visible GPU is found, setting device to CPU.";
}
device = DeviceOrd::CPU();
} else if (fail_on_invalid) {
CHECK(device.IsCPU() || device.ordinal < n_visible)
<< "Only " << n_visible << " GPUs are visible, ordinal " << device.ordinal
<< " is invalid.";
} else if (device.IsCUDA() && device.ordinal >= n_visible) {
device.ordinal = device.ordinal % n_visible;
LOG(WARNING) << "Only " << n_visible << " GPUs are visible, setting device ordinal to "
<< device.ordinal;
}
common::SetDevice(this->gpu_id); if (device.IsCUDA()) {
common::SetDevice(device.ordinal);
}
return device;
}
#endif // !defined(XGBOOST_USE_CUDA)
[[nodiscard]] std::optional<std::int32_t> ParseInt(StringView ordinal) {
// Some basic checks to ensure valid `gpu_id` and device ordinal instead of directly parsing and
// letting go of unknown characters.
if (ordinal.empty()) {
return std::nullopt;
}
std::size_t offset{0};
if (ordinal[0] == '-') {
offset = 1;
}
if (ordinal.size() <= offset) {
return std::nullopt;
}
bool valid = std::all_of(ordinal.cbegin() + offset, ordinal.cend(),
[](auto c) { return std::isdigit(c); });
if (!valid) {
return std::nullopt;
}
std::int32_t parsed_id{Context::kCpuId};
auto res = std::from_chars(ordinal.c_str(), ordinal.c_str() + ordinal.size(), parsed_id);
if (res.ec != std::errc()) {
return std::nullopt;
}
return parsed_id;
}
[[nodiscard]] DeviceOrd MakeDeviceOrd(std::string const& input, bool fail_on_invalid_gpu_id) {
StringView msg{R"(Invalid argument for `device`. Expected to be one of the following:
- cpu
- cuda
- cuda:<device ordinal> # e.g. cuda:0
- gpu
- gpu:<device ordinal> # e.g. gpu:0
)"};
auto fatal = [&] { LOG(FATAL) << msg << "Got: `" << input << "`."; };
#if defined(__MINGW32__)
// mingw hangs on regex using rtools 430. Basic checks only.
CHECK_GE(input.size(), 3) << msg;
auto substr = input.substr(0, 3);
bool valid = substr == "cpu" || substr == "cud" || substr == "gpu";
CHECK(valid) << msg;
#else
std::regex pattern{"gpu(:[0-9]+)?|cuda(:[0-9]+)?|cpu"};
if (!std::regex_match(input, pattern)) {
fatal();
}
#endif // defined(__MINGW32__)
// handle alias
std::string s_device = std::regex_replace(input, std::regex{"gpu"}, DeviceSym::CUDA());
auto split_it = std::find(s_device.cbegin(), s_device.cend(), ':');
DeviceOrd device;
device.ordinal = Context::InvalidOrdinal(); // mark it invalid for check.
if (split_it == s_device.cend()) {
// no ordinal.
if (s_device == DeviceSym::CPU()) {
device = DeviceOrd::CPU();
} else if (s_device == DeviceSym::CUDA()) {
device = DeviceOrd::CUDA(0); // use 0 as default;
} else {
fatal();
}
} else {
// must be CUDA when ordinal is specifed.
// +1 for colon
std::size_t offset = std::distance(s_device.cbegin(), split_it) + 1;
// substr
StringView s_ordinal = {s_device.data() + offset, s_device.size() - offset};
if (s_ordinal.empty()) {
fatal();
}
auto opt_id = ParseInt(s_ordinal);
if (!opt_id.has_value()) {
fatal();
}
CHECK_LE(opt_id.value(), std::numeric_limits<bst_d_ordinal_t>::max())
<< "Ordinal value too large.";
device = DeviceOrd::CUDA(opt_id.value());
}
if (device.ordinal < Context::kCpuId) {
fatal();
}
device = CUDAOrdinal(device, fail_on_invalid_gpu_id);
return device;
}
} // namespace
void Context::ConfigureGpuId(bool require_gpu) {
if (this->IsCPU() && require_gpu) {
this->UpdateAllowUnknown(Args{{kDevice, DeviceSym::CUDA()}});
}
}
void Context::SetDeviceOrdinal(Args const& kwargs) {
auto gpu_id_it = std::find_if(kwargs.cbegin(), kwargs.cend(),
[](auto const& p) { return p.first == "gpu_id"; });
auto has_gpu_id = gpu_id_it != kwargs.cend();
auto device_it = std::find_if(kwargs.cbegin(), kwargs.cend(),
[](auto const& p) { return p.first == kDevice; });
auto has_device = device_it != kwargs.cend();
if (has_device && has_gpu_id) {
LOG(FATAL) << "Both `device` and `gpu_id` are specified. Use `device` instead.";
}
if (has_gpu_id) {
// Compatible with XGBoost < 2.0.0
error::WarnDeprecatedGPUId();
auto opt_id = ParseInt(StringView{gpu_id_it->second});
CHECK(opt_id.has_value()) << "Invalid value for `gpu_id`. Got:" << gpu_id_it->second;
if (opt_id.value() > Context::kCpuId) {
this->UpdateAllowUnknown(Args{{kDevice, DeviceOrd::CUDA(opt_id.value()).Name()}});
} else {
this->UpdateAllowUnknown(Args{{kDevice, DeviceOrd::CPU().Name()}});
}
return;
}
auto new_d = MakeDeviceOrd(this->device, this->fail_on_invalid_gpu_id);
if (!has_device) {
CHECK_EQ(new_d.ordinal, this->device_.ordinal); // unchanged
}
this->SetDevice(new_d);
if (this->IsCPU()) {
CHECK_EQ(this->device_.ordinal, kCpuId);
} else {
CHECK_GT(this->device_.ordinal, kCpuId);
}
} }
std::int32_t Context::Threads() const { std::int32_t Context::Threads() const {

View File

@ -33,10 +33,11 @@ IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle pro
bool valid = iter.Next(); bool valid = iter.Next();
CHECK(valid) << "Iterative DMatrix must have at least 1 batch."; CHECK(valid) << "Iterative DMatrix must have at least 1 batch.";
auto d = MakeProxy(proxy_)->DeviceIdx(); auto pctx = MakeProxy(proxy_)->Ctx();
Context ctx; Context ctx;
ctx.UpdateAllowUnknown(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}}); ctx.UpdateAllowUnknown(
Args{{"nthread", std::to_string(nthread)}, {"device", pctx->DeviceName()}});
// hardcoded parameter. // hardcoded parameter.
BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()}; BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()};

View File

@ -54,6 +54,7 @@ std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,
p_fmat = cuda_impl::CreateDMatrixFromProxy(ctx, proxy, missing); p_fmat = cuda_impl::CreateDMatrixFromProxy(ctx, proxy, missing);
} }
CHECK(p_fmat) << "Failed to fallback.";
return p_fmat; return p_fmat;
} }
} // namespace xgboost::data } // namespace xgboost::data

View File

@ -7,28 +7,31 @@
namespace xgboost::data { namespace xgboost::data {
void DMatrixProxy::FromCudaColumnar(StringView interface_str) { void DMatrixProxy::FromCudaColumnar(StringView interface_str) {
std::shared_ptr<data::CudfAdapter> adapter{new CudfAdapter{interface_str}}; auto adapter{std::make_shared<CudfAdapter>(interface_str)};
auto const& value = adapter->Value();
this->batch_ = adapter; this->batch_ = adapter;
ctx_.gpu_id = adapter->DeviceIdx();
this->Info().num_col_ = adapter->NumColumns(); this->Info().num_col_ = adapter->NumColumns();
this->Info().num_row_ = adapter->NumRows(); this->Info().num_row_ = adapter->NumRows();
if (ctx_.gpu_id < 0) { if (adapter->DeviceIdx() < 0) {
// empty data
CHECK_EQ(this->Info().num_row_, 0); CHECK_EQ(this->Info().num_row_, 0);
ctx_.gpu_id = dh::CurrentDevice(); ctx_ = ctx_.MakeCUDA(dh::CurrentDevice());
return;
} }
ctx_ = ctx_.MakeCUDA(adapter->DeviceIdx());
} }
void DMatrixProxy::FromCudaArray(StringView interface_str) { void DMatrixProxy::FromCudaArray(StringView interface_str) {
std::shared_ptr<CupyAdapter> adapter(new CupyAdapter{StringView{interface_str}}); auto adapter(std::make_shared<CupyAdapter>(StringView{interface_str}));
this->batch_ = adapter; this->batch_ = adapter;
ctx_.gpu_id = adapter->DeviceIdx();
this->Info().num_col_ = adapter->NumColumns(); this->Info().num_col_ = adapter->NumColumns();
this->Info().num_row_ = adapter->NumRows(); this->Info().num_row_ = adapter->NumRows();
if (ctx_.gpu_id < 0) { if (adapter->DeviceIdx() < 0) {
// empty data
CHECK_EQ(this->Info().num_row_, 0); CHECK_EQ(this->Info().num_row_, 0);
ctx_.gpu_id = dh::CurrentDevice(); ctx_ = ctx_.MakeCUDA(dh::CurrentDevice());
return;
} }
ctx_ = ctx_.MakeCUDA(adapter->DeviceIdx());
} }
namespace cuda_impl { namespace cuda_impl {

View File

@ -27,7 +27,7 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthr
dh::safe_cuda(cudaSetDevice(device)); dh::safe_cuda(cudaSetDevice(device));
Context ctx; Context ctx;
ctx.Init(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(device)}}); ctx.Init(Args{{"nthread", std::to_string(nthread)}, {"device", DeviceOrd::CUDA(device).Name()}});
CHECK(adapter->NumRows() != kAdapterUnknownSize); CHECK(adapter->NumRows() != kAdapterUnknownSize);
CHECK(adapter->NumColumns() != kAdapterUnknownSize); CHECK(adapter->NumColumns() != kAdapterUnknownSize);

View File

@ -84,6 +84,25 @@ bool UpdatersMatched(std::vector<std::string> updater_seq,
return name == up->Name(); return name == up->Name();
}); });
} }
void MismatchedDevices(Context const* booster, Context const* data) {
bool thread_local static logged{false};
if (logged) {
return;
}
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. This might "
"lead to higher memory usage and slower performance. XGBoost is running on: "
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName()
<< ".\n"
<< R"(Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.
This warning will only be shown once, and subsequent warnings made by the current thread will be
suppressed.
)";
logged = true;
}
} // namespace } // namespace
void GBTree::Configure(Args const& cfg) { void GBTree::Configure(Args const& cfg) {
@ -208,6 +227,7 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
bst_target_t const n_groups = model_.learner_model_param->OutputLength(); bst_target_t const n_groups = model_.learner_model_param->OutputLength();
monitor_.Start("BoostNewTrees"); monitor_.Start("BoostNewTrees");
predt->predictions.SetDevice(ctx_->Ordinal());
auto out = linalg::MakeTensorView(ctx_, &predt->predictions, p_fmat->Info().num_row_, auto out = linalg::MakeTensorView(ctx_, &predt->predictions, p_fmat->Info().num_row_,
model_.learner_model_param->OutputLength()); model_.learner_model_param->OutputLength());
CHECK_NE(n_groups, 0); CHECK_NE(n_groups, 0);
@ -521,18 +541,6 @@ void GBTree::PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* out_preds,
} }
} }
namespace {
inline void MismatchedDevices(Context const* booster, Context const* data) {
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. XGBoost "
<< "is running on: " << booster->DeviceName()
<< ", while the input data is on: " << data->DeviceName() << ".\n"
<< R"(Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.
)";
}
}; // namespace
void GBTree::PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training, void GBTree::PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,
bst_layer_t layer_begin, bst_layer_t layer_end) { bst_layer_t layer_begin, bst_layer_t layer_end) {
// dispatch to const function. // dispatch to const function.

View File

@ -40,7 +40,7 @@
#include "common/api_entry.h" // for XGBAPIThreadLocalEntry #include "common/api_entry.h" // for XGBAPIThreadLocalEntry
#include "common/charconv.h" // for to_chars, to_chars_result, NumericLimits, from_... #include "common/charconv.h" // for to_chars, to_chars_result, NumericLimits, from_...
#include "common/common.h" // for ToString, Split #include "common/common.h" // for ToString, Split
#include "common/error_msg.h" // for MaxFeatureSize, WarnOldSerialization #include "common/error_msg.h" // for MaxFeatureSize, WarnOldSerialization, ...
#include "common/io.h" // for PeekableInStream, ReadAll, FixedSizeStream, Mem... #include "common/io.h" // for PeekableInStream, ReadAll, FixedSizeStream, Mem...
#include "common/observer.h" // for TrainingObserver #include "common/observer.h" // for TrainingObserver
#include "common/random.h" // for GlobalRandom #include "common/random.h" // for GlobalRandom
@ -711,6 +711,7 @@ class LearnerConfiguration : public Learner {
// FIXME(trivialfis): Make eval_metric a training parameter. // FIXME(trivialfis): Make eval_metric a training parameter.
keys.emplace_back(kEvalMetric); keys.emplace_back(kEvalMetric);
keys.emplace_back("num_output_group"); keys.emplace_back("num_output_group");
keys.emplace_back("gpu_id"); // deprecated param.
std::sort(keys.begin(), keys.end()); std::sort(keys.begin(), keys.end());
@ -1340,10 +1341,9 @@ class LearnerImpl : public LearnerIO {
} }
void Predict(std::shared_ptr<DMatrix> data, bool output_margin, void Predict(std::shared_ptr<DMatrix> data, bool output_margin,
HostDeviceVector<bst_float> *out_preds, unsigned layer_begin, HostDeviceVector<bst_float>* out_preds, bst_layer_t layer_begin,
unsigned layer_end, bool training, bst_layer_t layer_end, bool training, bool pred_leaf, bool pred_contribs,
bool pred_leaf, bool pred_contribs, bool approx_contribs, bool approx_contribs, bool pred_interactions) override {
bool pred_interactions) override {
int multiple_predictions = static_cast<int>(pred_leaf) + int multiple_predictions = static_cast<int>(pred_leaf) +
static_cast<int>(pred_interactions) + static_cast<int>(pred_interactions) +
static_cast<int>(pred_contribs); static_cast<int>(pred_contribs);
@ -1391,15 +1391,16 @@ class LearnerImpl : public LearnerIO {
} }
void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing, void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,
HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin, HostDeviceVector<float>** out_preds, bst_layer_t iteration_begin,
uint32_t iteration_end) override { bst_layer_t iteration_end) override {
this->Configure(); this->Configure();
this->CheckModelInitialized(); this->CheckModelInitialized();
auto& out_predictions = this->GetThreadLocal().prediction_entry; auto& out_predictions = this->GetThreadLocal().prediction_entry;
out_predictions.version = 0; out_predictions.Reset();
this->gbm_->InplacePredict(p_m, missing, &out_predictions, iteration_begin, iteration_end); this->gbm_->InplacePredict(p_m, missing, &out_predictions, iteration_begin, iteration_end);
if (type == PredictionType::kValue) { if (type == PredictionType::kValue) {
obj_->PredTransform(&out_predictions.predictions); obj_->PredTransform(&out_predictions.predictions);
} else if (type == PredictionType::kMargin) { } else if (type == PredictionType::kMargin) {
@ -1454,7 +1455,7 @@ class LearnerImpl : public LearnerIO {
} }
if (p_fmat->Info().num_row_ == 0) { if (p_fmat->Info().num_row_ == 0) {
LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank(); error::WarnEmptyDataset();
} }
} }

View File

@ -28,6 +28,7 @@ class LintersPaths:
"tests/python-gpu/test_gpu_prediction.py", "tests/python-gpu/test_gpu_prediction.py",
"tests/python-gpu/load_pickle.py", "tests/python-gpu/load_pickle.py",
"tests/python-gpu/test_gpu_pickling.py", "tests/python-gpu/test_gpu_pickling.py",
"tests/python-gpu/test_gpu_eval_metrics.py",
"tests/test_distributed/test_with_spark/", "tests/test_distributed/test_with_spark/",
"tests/test_distributed/test_gpu_with_spark/", "tests/test_distributed/test_gpu_with_spark/",
# demo # demo

View File

@ -16,8 +16,7 @@
namespace xgboost { namespace xgboost {
namespace common { namespace common {
void TestSegmentedArgSort() { void TestSegmentedArgSort() {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.gpu_id = 0;
size_t constexpr kElements = 100, kGroups = 3; size_t constexpr kElements = 100, kGroups = 3;
dh::device_vector<size_t> sorted_idx(kElements, 0); dh::device_vector<size_t> sorted_idx(kElements, 0);
@ -55,8 +54,7 @@ void TestSegmentedArgSort() {
TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); } TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }
TEST(Algorithm, GpuArgSort) { TEST(Algorithm, GpuArgSort) {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.gpu_id = 0;
dh::device_vector<float> values(20); dh::device_vector<float> values(20);
dh::Iota(dh::ToSpan(values)); // accending dh::Iota(dh::ToSpan(values)); // accending

View File

@ -227,7 +227,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
} }
// check categorical // check categorical
beg = n_samples; beg = n_samples;
for (std::size_t i = 0; i < n_categories; ++i) { for (bst_cat_t i = 0; i < n_categories; ++i) {
// all from the second column // all from the second column
ASSERT_EQ(static_cast<bst_feature_t>(weight[i + beg]) % n_features, 1); ASSERT_EQ(static_cast<bst_feature_t>(weight[i + beg]) % n_features, 1);
} }

View File

@ -4,6 +4,7 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "../../../src/common/linalg_op.cuh" #include "../../../src/common/linalg_op.cuh"
#include "../helpers.h"
#include "xgboost/context.h" #include "xgboost/context.h"
#include "xgboost/linalg.h" #include "xgboost/linalg.h"
@ -54,8 +55,7 @@ void TestElementWiseKernel() {
} }
void TestSlice() { void TestSlice() {
Context ctx; auto ctx = MakeCUDACtx(1);
ctx.gpu_id = 1;
thrust::device_vector<double> data(2 * 3 * 4); thrust::device_vector<double> data(2 * 3 * 4);
auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4); auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
dh::LaunchN(1, [=] __device__(size_t) { dh::LaunchN(1, [=] __device__(size_t) {

View File

@ -23,8 +23,7 @@
namespace xgboost::ltr { namespace xgboost::ltr {
void TestCalcQueriesInvIDCG() { void TestCalcQueriesInvIDCG() {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
std::size_t n_groups = 5, n_samples_per_group = 32; std::size_t n_groups = 5, n_samples_per_group = 32;
dh::device_vector<float> scores(n_samples_per_group * n_groups); dh::device_vector<float> scores(n_samples_per_group * n_groups);
@ -85,20 +84,17 @@ void TestRankingCache(Context const* ctx) {
} // namespace } // namespace
TEST(RankingCache, InitFromGPU) { TEST(RankingCache, InitFromGPU) {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestRankingCache(&ctx); TestRankingCache(&ctx);
} }
TEST(NDCGCache, InitFromGPU) { TEST(NDCGCache, InitFromGPU) {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestNDCGCache(&ctx); TestNDCGCache(&ctx);
} }
TEST(MAPCache, InitFromGPU) { TEST(MAPCache, InitFromGPU) {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestMAPCache(&ctx); TestMAPCache(&ctx);
} }
} // namespace xgboost::ltr } // namespace xgboost::ltr

View File

@ -7,6 +7,7 @@
#include "../../../src/common/stats.h" #include "../../../src/common/stats.h"
#include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter #include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter
#include "../helpers.h"
namespace xgboost { namespace xgboost {
namespace common { namespace common {
@ -71,7 +72,7 @@ TEST(Stats, Median) {
ASSERT_EQ(m, .5f); ASSERT_EQ(m, .5f);
#if defined(XGBOOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
ctx.gpu_id = 0; ctx = ctx.MakeCUDA(0);
ASSERT_FALSE(ctx.IsCPU()); ASSERT_FALSE(ctx.IsCPU());
Median(&ctx, values, weights, &out); Median(&ctx, values, weights, &out);
m = out(0); m = out(0);
@ -80,7 +81,7 @@ TEST(Stats, Median) {
} }
{ {
ctx.gpu_id = Context::kCpuId; ctx = ctx.MakeCPU();
// 4x2 matrix // 4x2 matrix
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id}; linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
HostDeviceVector<float> weights; HostDeviceVector<float> weights;
@ -90,7 +91,7 @@ TEST(Stats, Median) {
ASSERT_EQ(out(1), .5f); ASSERT_EQ(out(1), .5f);
#if defined(XGBOOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
ctx.gpu_id = 0; ctx = ctx.MakeCUDA(0);
Median(&ctx, values, weights, &out); Median(&ctx, values, weights, &out);
ASSERT_EQ(out(0), .5f); ASSERT_EQ(out(0), .5f);
ASSERT_EQ(out(1), .5f); ASSERT_EQ(out(1), .5f);
@ -123,8 +124,7 @@ TEST(Stats, Mean) {
#if defined(XGBOOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
TEST(Stats, GPUMean) { TEST(Stats, GPUMean) {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestMean(&ctx); TestMean(&ctx);
} }
#endif // defined(XGBOOST_USE_CUDA) #endif // defined(XGBOOST_USE_CUDA)

View File

@ -9,6 +9,7 @@
#include "../../../src/common/linalg_op.cuh" // ElementWiseTransformDevice #include "../../../src/common/linalg_op.cuh" // ElementWiseTransformDevice
#include "../../../src/common/stats.cuh" #include "../../../src/common/stats.cuh"
#include "../helpers.h"
#include "xgboost/base.h" // XGBOOST_DEVICE #include "xgboost/base.h" // XGBOOST_DEVICE
#include "xgboost/context.h" // Context #include "xgboost/context.h" // Context
#include "xgboost/host_device_vector.h" // HostDeviceVector #include "xgboost/host_device_vector.h" // HostDeviceVector
@ -33,7 +34,7 @@ class StatsGPU : public ::testing::Test {
} }
public: public:
void SetUp() override { ctx_.gpu_id = 0; } void SetUp() override { ctx_ = MakeCUDACtx(0); }
void WeightedMulti() { void WeightedMulti() {
// data for one segment // data for one segment

View File

@ -171,8 +171,7 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
ASSERT_TRUE(Xy->SingleColBlock()); ASSERT_TRUE(Xy->SingleColBlock());
bst_bin_t constexpr kBins{17}; bst_bin_t constexpr kBins{17};
auto p = BatchParam{kBins, threshold}; auto p = BatchParam{kBins, threshold};
Context gpu_ctx; auto gpu_ctx = MakeCUDACtx(0);
gpu_ctx.gpu_id = 0;
for (auto const &page : Xy->GetBatches<EllpackPage>( for (auto const &page : Xy->GetBatches<EllpackPage>(
&gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) { &gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {
from_ellpack = std::make_unique<GHistIndexMatrix>(&ctx, Xy->Info(), page, p); from_ellpack = std::make_unique<GHistIndexMatrix>(&ctx, Xy->Info(), page, p);

View File

@ -180,7 +180,12 @@ TEST(GBTree, ChooseTreeMethod) {
learner->SetParam("tree_method", tree_method.value()); learner->SetParam("tree_method", tree_method.value());
} }
if (device.has_value()) { if (device.has_value()) {
learner->SetParam("gpu_id", device.value()); auto const& d = device.value();
if (std::isdigit(d.front()) || d.front() == '-') {
learner->SetParam("gpu_id", d);
} else {
learner->SetParam("device", d);
}
} }
learner->Configure(); learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) { for (std::int32_t i = 0; i < 3; ++i) {
@ -199,7 +204,12 @@ TEST(GBTree, ChooseTreeMethod) {
learner->SetParam("tree_method", tree_method.value()); learner->SetParam("tree_method", tree_method.value());
} }
if (device.has_value()) { if (device.has_value()) {
learner->SetParam("gpu_id", device.value()); auto const& d = device.value();
if (std::isdigit(d.front()) || d.front() == '-') {
learner->SetParam("gpu_id", d);
} else {
learner->SetParam("device", d);
}
} }
learner->Configure(); learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) { for (std::int32_t i = 0; i < 3; ++i) {
@ -215,11 +225,12 @@ TEST(GBTree, ChooseTreeMethod) {
// | | hist | gpu_hist | exact | NA | // | | hist | gpu_hist | exact | NA |
// |--------+---------+----------+-------+-----| // |--------+---------+----------+-------+-----|
// | CUDA:0 | GPU | GPU (w) | Err | GPU | # not yet tested // | CUDA:0 | GPU | GPU (w) | Err | GPU |
// | CPU | CPU | Err | CPU | CPU | # not yet tested // | CPU | CPU | GPU (w) | CPU | CPU |
// |--------+---------+----------+-------+-----| // |--------+---------+----------+-------+-----|
// | -1 | CPU | GPU (w) | CPU | CPU | // | -1 | CPU | GPU (w) | CPU | CPU |
// | 0 | GPU | GPU (w) | Err | GPU | // | 0 | GPU | GPU (w) | Err | GPU |
// |--------+---------+----------+-------+-----|
// | NA | CPU | GPU (w) | CPU | CPU | // | NA | CPU | GPU (w) | CPU | CPU |
// //
// - (w): warning // - (w): warning
@ -237,18 +248,30 @@ TEST(GBTree, ChooseTreeMethod) {
// hist // hist
{{"hist", "-1"}, "grow_quantile_histmaker"}, {{"hist", "-1"}, "grow_quantile_histmaker"},
{{"hist", "0"}, "grow_gpu_hist"}, {{"hist", "0"}, "grow_gpu_hist"},
{{"hist", "cpu"}, "grow_quantile_histmaker"},
{{"hist", "cuda"}, "grow_gpu_hist"},
{{"hist", "cuda:0"}, "grow_gpu_hist"},
{{"hist", std::nullopt}, "grow_quantile_histmaker"}, {{"hist", std::nullopt}, "grow_quantile_histmaker"},
// gpu_hist // gpu_hist
{{"gpu_hist", "-1"}, "grow_gpu_hist"}, {{"gpu_hist", "-1"}, "grow_gpu_hist"},
{{"gpu_hist", "0"}, "grow_gpu_hist"}, {{"gpu_hist", "0"}, "grow_gpu_hist"},
{{"gpu_hist", "cpu"}, "grow_gpu_hist"},
{{"gpu_hist", "cuda"}, "grow_gpu_hist"},
{{"gpu_hist", "cuda:0"}, "grow_gpu_hist"},
{{"gpu_hist", std::nullopt}, "grow_gpu_hist"}, {{"gpu_hist", std::nullopt}, "grow_gpu_hist"},
// exact // exact
{{"exact", "-1"}, "grow_colmaker,prune"}, {{"exact", "-1"}, "grow_colmaker,prune"},
{{"exact", "0"}, "err"}, {{"exact", "0"}, "err"},
{{"exact", "cpu"}, "grow_colmaker,prune"},
{{"exact", "cuda"}, "err"},
{{"exact", "cuda:0"}, "err"},
{{"exact", std::nullopt}, "grow_colmaker,prune"}, {{"exact", std::nullopt}, "grow_colmaker,prune"},
// NA // NA
{{std::nullopt, "-1"}, "grow_quantile_histmaker"}, {{std::nullopt, "-1"}, "grow_quantile_histmaker"},
{{std::nullopt, "0"}, "grow_gpu_hist"}, // default to hist {{std::nullopt, "0"}, "grow_gpu_hist"}, // default to hist
{{std::nullopt, "cpu"}, "grow_quantile_histmaker"},
{{std::nullopt, "cuda"}, "grow_gpu_hist"},
{{std::nullopt, "cuda:0"}, "grow_gpu_hist"},
{{std::nullopt, std::nullopt}, "grow_quantile_histmaker"}, {{std::nullopt, std::nullopt}, "grow_quantile_histmaker"},
}; };
@ -392,8 +415,7 @@ class Dart : public testing::TestWithParam<char const*> {
for (size_t i = 0; i < 16; ++i) { for (size_t i = 0; i < 16; ++i) {
learner->UpdateOneIter(i, p_mat); learner->UpdateOneIter(i, p_mat);
} }
learner->SetParam("device", ctx.DeviceName());
ConfigLearnerByCtx(&ctx, learner.get());
HostDeviceVector<float> predts_training; HostDeviceVector<float> predts_training;
learner->Predict(p_mat, false, &predts_training, 0, 0, true); learner->Predict(p_mat, false, &predts_training, 0, 0, true);
@ -654,8 +676,7 @@ TEST(GBTree, InplacePredictionError) {
RandomDataGenerator{n_samples, n_features, 0.5f}.Batches(2).GenerateSparsePageDMatrix( RandomDataGenerator{n_samples, n_features, 0.5f}.Batches(2).GenerateSparsePageDMatrix(
"cache", true); "cache", true);
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})}; std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParam("booster", booster); learner->SetParams(Args{{"booster", booster}, {"device", ctx->DeviceName()}});
ConfigLearnerByCtx(ctx, learner.get());
learner->Configure(); learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) { for (std::int32_t i = 0; i < 3; ++i) {
learner->UpdateOneIter(i, p_fmat); learner->UpdateOneIter(i, p_fmat);
@ -697,9 +718,9 @@ TEST(GBTree, InplacePredictionError) {
#endif // defined(XGBOOST_USE_CUDA) #endif // defined(XGBOOST_USE_CUDA)
}; };
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})}; std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
learner->SetParam("booster", booster); learner->SetParams(Args{{"booster", booster},
learner->SetParam("max_bin", std::to_string(max_bins)); {"max_bin", std::to_string(max_bins)},
ConfigLearnerByCtx(ctx, learner.get()); {"device", ctx->DeviceName()}});
learner->Configure(); learner->Configure();
for (std::int32_t i = 0; i < 3; ++i) { for (std::int32_t i = 0; i < 3; ++i) {
learner->UpdateOneIter(i, p_fmat); learner->UpdateOneIter(i, p_fmat);

View File

@ -8,6 +8,7 @@
#include <limits> // for numeric_limits #include <limits> // for numeric_limits
#include <memory> // for shared_ptr #include <memory> // for shared_ptr
#include <string> // for string #include <string> // for string
#include <thread> // for thread
#include "../../../src/data/adapter.h" // for ArrayAdapter #include "../../../src/data/adapter.h" // for ArrayAdapter
#include "../../../src/data/device_adapter.cuh" // for CupyAdapter #include "../../../src/data/device_adapter.cuh" // for CupyAdapter
@ -41,7 +42,7 @@ void TestInplaceFallback(Context const* ctx) {
// learner is configured to the device specified by ctx // learner is configured to the device specified by ctx
std::unique_ptr<Learner> learner{Learner::Create({Xy})}; std::unique_ptr<Learner> learner{Learner::Create({Xy})};
ConfigLearnerByCtx(ctx, learner.get()); learner->SetParam("device", ctx->DeviceName());
for (std::int32_t i = 0; i < 3; ++i) { for (std::int32_t i = 0; i < 3; ++i) {
learner->UpdateOneIter(i, Xy); learner->UpdateOneIter(i, Xy);
} }
@ -56,18 +57,31 @@ void TestInplaceFallback(Context const* ctx) {
HostDeviceVector<float>* out_predt{nullptr}; HostDeviceVector<float>* out_predt{nullptr};
ConsoleLogger::Configure(Args{{"verbosity", "1"}}); ConsoleLogger::Configure(Args{{"verbosity", "1"}});
std::string output;
// test whether the warning is raised // test whether the warning is raised
#if !defined(_WIN32)
// Windows has issue with CUDA and thread local storage. For some reason, on Windows a
// cudaInitializationError is raised during destruction of `HostDeviceVector`. This
// might be related to https://github.com/dmlc/xgboost/issues/5793
::testing::internal::CaptureStderr(); ::testing::internal::CaptureStderr();
std::thread{[&] {
// Launch a new thread to ensure a warning is raised as we prevent over-verbose
// warning by using thread-local flags.
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(), learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 0); &out_predt, 0, 0);
auto output = testing::internal::GetCapturedStderr(); }}.join();
output = testing::internal::GetCapturedStderr();
ASSERT_NE(output.find("Falling back"), std::string::npos); ASSERT_NE(output.find("Falling back"), std::string::npos);
#endif
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
&out_predt, 0, 0);
// test when the contexts match // test when the contexts match
Context new_ctx = *proxy->Ctx(); Context new_ctx = *proxy->Ctx();
ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id); ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id);
ConfigLearnerByCtx(&new_ctx, learner.get()); learner->SetParam("device", new_ctx.DeviceName());
HostDeviceVector<float>* out_predt_1{nullptr}; HostDeviceVector<float>* out_predt_1{nullptr};
// no warning is raised // no warning is raised
::testing::internal::CaptureStderr(); ::testing::internal::CaptureStderr();

View File

@ -559,16 +559,4 @@ class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test {
} }
} }
}; };
// A temporary solution before we move away from gpu_id.
inline void ConfigLearnerByCtx(Context const* ctx, Learner* learner) {
if (ctx->IsCPU()) {
learner->SetParam("tree_method", "hist");
} else {
learner->SetParam("tree_method", "gpu_hist");
}
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
learner->Configure();
ASSERT_EQ(learner->Ctx()->gpu_id, ctx->gpu_id);
}
} // namespace xgboost } // namespace xgboost

View File

@ -46,7 +46,6 @@ inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device)
inline void TestMultiClassError(int device, DataSplitMode data_split_mode) { inline void TestMultiClassError(int device, DataSplitMode data_split_mode) {
auto ctx = MakeCUDACtx(device); auto ctx = MakeCUDACtx(device);
ctx.gpu_id = device;
xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx); xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx);
metric->Configure({}); metric->Configure({});
ASSERT_STREQ(metric->Name(), "merror"); ASSERT_STREQ(metric->Name(), "merror");
@ -67,7 +66,6 @@ inline void VerifyMultiClassError(DataSplitMode data_split_mode = DataSplitMode:
inline void TestMultiClassLogLoss(int device, DataSplitMode data_split_mode) { inline void TestMultiClassLogLoss(int device, DataSplitMode data_split_mode) {
auto ctx = MakeCUDACtx(device); auto ctx = MakeCUDACtx(device);
ctx.gpu_id = device;
xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx); xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx);
metric->Configure({}); metric->Configure({});
ASSERT_STREQ(metric->Name(), "mlogloss"); ASSERT_STREQ(metric->Name(), "mlogloss");

View File

@ -13,26 +13,22 @@
namespace xgboost::obj { namespace xgboost::obj {
TEST(LambdaRank, GPUNDCGJsonIO) { TEST(LambdaRank, GPUNDCGJsonIO) {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.gpu_id = 0;
TestNDCGJsonIO(&ctx); TestNDCGJsonIO(&ctx);
} }
TEST(LambdaRank, GPUMAPStat) { TEST(LambdaRank, GPUMAPStat) {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.gpu_id = 0;
TestMAPStat(&ctx); TestMAPStat(&ctx);
} }
TEST(LambdaRank, GPUNDCGGPair) { TEST(LambdaRank, GPUNDCGGPair) {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.gpu_id = 0;
TestNDCGGPair(&ctx); TestNDCGGPair(&ctx);
} }
void TestGPUMakePair() { void TestGPUMakePair() {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.gpu_id = 0;
MetaInfo info; MetaInfo info;
HostDeviceVector<float> predt; HostDeviceVector<float> predt;
@ -126,8 +122,7 @@ void TestGPUMakePair() {
TEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); } TEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); }
TEST(LambdaRank, GPUUnbiasedNDCG) { TEST(LambdaRank, GPUUnbiasedNDCG) {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.gpu_id = 0;
TestUnbiasedNDCG(&ctx); TestUnbiasedNDCG(&ctx);
} }
@ -161,8 +156,7 @@ TEST(LambdaRank, RankItemCountOnRight) {
} }
TEST(LambdaRank, GPUMAPGPair) { TEST(LambdaRank, GPUMAPGPair) {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.gpu_id = 0;
TestMAPGPair(&ctx); TestMAPGPair(&ctx);
} }
} // namespace xgboost::obj } // namespace xgboost::obj

View File

@ -305,12 +305,12 @@ TEST(Objective, CPU_vs_CUDA) {
{ {
// CPU // CPU
ctx.gpu_id = -1; ctx = ctx.MakeCPU();
obj->GetGradient(preds, info, 0, &cpu_out_preds); obj->GetGradient(preds, info, 0, &cpu_out_preds);
} }
{ {
// CUDA // CUDA
ctx.gpu_id = 0; ctx = ctx.MakeCUDA(0);
obj->GetGradient(preds, info, 0, &cuda_out_preds); obj->GetGradient(preds, info, 0, &cuda_out_preds);
} }

View File

@ -148,7 +148,7 @@ TEST(Plugin, CPUvsOneAPI) {
{ {
// CPU // CPU
ctx.gpu_id = -1; ctx = ctx.MakeCPU();
obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds); obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
} }
{ {

View File

@ -214,15 +214,16 @@ void TestUpdatePredictionCache(bool use_subsampling) {
} }
} // namespace } // namespace
TEST(CPUPredictor, GHistIndex) { TEST(CPUPredictor, GHistIndexTraining) {
size_t constexpr kRows{128}, kCols{16}, kBins{64}; size_t constexpr kRows{128}, kCols{16}, kBins{64};
Context ctx;
auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateQuantileDMatrix(false); auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateQuantileDMatrix(false);
HostDeviceVector<float> storage(kRows * kCols); HostDeviceVector<float> storage(kRows * kCols);
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage); auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);
auto adapter = data::ArrayAdapter(columnar.c_str()); auto adapter = data::ArrayAdapter(columnar.c_str());
std::shared_ptr<DMatrix> p_full{ std::shared_ptr<DMatrix> p_full{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)}; DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
TestTrainingPrediction(kRows, kBins, "hist", p_full, p_hist); TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);
} }
TEST(CPUPredictor, CategoricalPrediction) { TEST(CPUPredictor, CategoricalPrediction) {

View File

@ -33,9 +33,8 @@ TEST(GPUPredictor, Basic) {
int n_row = i, n_col = i; int n_row = i, n_col = i;
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix(); auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.gpu_id = 0; LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx); gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch // Test predict batch
@ -71,7 +70,7 @@ void VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_r
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix(); auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)}; std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)}; LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx); gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch // Test predict batch
@ -102,7 +101,7 @@ TEST(GPUPredictor, MGPUBasicColumnSplit) {
size_t n_row = i, n_col = i; size_t n_row = i, n_col = i;
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix(); auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)}; LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx); gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
// Test predict batch // Test predict batch
@ -132,18 +131,19 @@ TEST(GPUPredictor, EllpackBasic) {
} }
TEST(GPUPredictor, EllpackTraining) { TEST(GPUPredictor, EllpackTraining) {
size_t constexpr kRows { 128 }, kCols { 16 }, kBins { 64 }; auto ctx = MakeCUDACtx(0);
auto p_ellpack = size_t constexpr kRows{128}, kCols{16}, kBins{64};
RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).Device(0).GenerateDeviceDMatrix(false); auto p_ellpack = RandomDataGenerator{kRows, kCols, 0.0}
.Bins(kBins)
.Device(ctx.Ordinal())
.GenerateDeviceDMatrix(false);
HostDeviceVector<float> storage(kRows * kCols); HostDeviceVector<float> storage(kRows * kCols);
auto columnar = RandomDataGenerator{kRows, kCols, 0.0} auto columnar =
.Device(0) RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Ordinal()).GenerateArrayInterface(&storage);
.GenerateArrayInterface(&storage);
auto adapter = data::CupyAdapter(columnar); auto adapter = data::CupyAdapter(columnar);
std::shared_ptr<DMatrix> p_full { std::shared_ptr<DMatrix> p_full{
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1) DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
}; TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_ellpack);
TestTrainingPrediction(kRows, kBins, "gpu_hist", p_full, p_ellpack);
} }
TEST(GPUPredictor, ExternalMemoryTest) { TEST(GPUPredictor, ExternalMemoryTest) {
@ -153,9 +153,8 @@ TEST(GPUPredictor, ExternalMemoryTest) {
gpu_predictor->Configure({}); gpu_predictor->Configure({});
const int n_classes = 3; const int n_classes = 3;
Context ctx; Context ctx = MakeCUDACtx(0);
ctx.gpu_id = 0; LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Ordinal())};
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.gpu_id)};
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes); gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
std::vector<std::unique_ptr<DMatrix>> dmats; std::vector<std::unique_ptr<DMatrix>> dmats;
@ -185,7 +184,7 @@ TEST(GPUPredictor, InplacePredictCupy) {
auto ctx = MakeCUDACtx(0); auto ctx = MakeCUDACtx(0);
size_t constexpr kRows{128}, kCols{64}; size_t constexpr kRows{128}, kCols{64};
RandomDataGenerator gen(kRows, kCols, 0.5); RandomDataGenerator gen(kRows, kCols, 0.5);
gen.Device(ctx.gpu_id); gen.Device(ctx.Ordinal());
HostDeviceVector<float> data; HostDeviceVector<float> data;
std::string interface_str = gen.GenerateArrayInterface(&data); std::string interface_str = gen.GenerateArrayInterface(&data);
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy}; std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
@ -197,7 +196,7 @@ TEST(GPUPredictor, InplacePredictCuDF) {
auto ctx = MakeCUDACtx(0); auto ctx = MakeCUDACtx(0);
size_t constexpr kRows{128}, kCols{64}; size_t constexpr kRows{128}, kCols{64};
RandomDataGenerator gen(kRows, kCols, 0.5); RandomDataGenerator gen(kRows, kCols, 0.5);
gen.Device(ctx.gpu_id); gen.Device(ctx.Ordinal());
std::vector<HostDeviceVector<float>> storage(kCols); std::vector<HostDeviceVector<float>> storage(kCols);
auto interface_str = gen.GenerateColumnarArrayInterface(&storage); auto interface_str = gen.GenerateColumnarArrayInterface(&storage);
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy}; std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
@ -214,9 +213,8 @@ TEST(GpuPredictor, LesserFeatures) {
TEST(GPUPredictor, ShapStump) { TEST(GPUPredictor, ShapStump) {
cudaSetDevice(0); cudaSetDevice(0);
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.gpu_id = 0; LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
gbm::GBTreeModel model(&mparam, &ctx); gbm::GBTreeModel model(&mparam, &ctx);
std::vector<std::unique_ptr<RegTree>> trees; std::vector<std::unique_ptr<RegTree>> trees;
@ -241,9 +239,8 @@ TEST(GPUPredictor, ShapStump) {
} }
TEST(GPUPredictor, Shap) { TEST(GPUPredictor, Shap) {
Context ctx; auto ctx = MakeCUDACtx(0);
ctx.gpu_id = 0; LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
gbm::GBTreeModel model(&mparam, &ctx); gbm::GBTreeModel model(&mparam, &ctx);
std::vector<std::unique_ptr<RegTree>> trees; std::vector<std::unique_ptr<RegTree>> trees;

View File

@ -44,16 +44,14 @@ TEST(Predictor, PredictionCache) {
EXPECT_ANY_THROW(container.Entry(m)); EXPECT_ANY_THROW(container.Entry(m));
} }
void TestTrainingPrediction(size_t rows, size_t bins, void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
std::string tree_method, std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist) {
std::shared_ptr<DMatrix> p_full,
std::shared_ptr<DMatrix> p_hist) {
size_t constexpr kCols = 16; size_t constexpr kCols = 16;
size_t constexpr kClasses = 3; size_t constexpr kClasses = 3;
size_t constexpr kIters = 3; size_t constexpr kIters = 3;
std::unique_ptr<Learner> learner; std::unique_ptr<Learner> learner;
auto train = [&](Context const& ctx) {
p_hist->Info().labels.Reshape(rows, 1); p_hist->Info().labels.Reshape(rows, 1);
auto &h_label = p_hist->Info().labels.Data()->HostVector(); auto &h_label = p_hist->Info().labels.Data()->HostVector();
@ -62,12 +60,11 @@ void TestTrainingPrediction(size_t rows, size_t bins,
} }
learner.reset(Learner::Create({})); learner.reset(Learner::Create({}));
learner->SetParam("tree_method", tree_method); learner->SetParams(Args{{"objective", "multi:softprob"},
learner->SetParam("objective", "multi:softprob"); {"num_feature", std::to_string(kCols)},
learner->SetParam("num_feature", std::to_string(kCols)); {"num_class", std::to_string(kClasses)},
learner->SetParam("num_class", std::to_string(kClasses)); {"max_bin", std::to_string(bins)},
learner->SetParam("max_bin", std::to_string(bins)); {"device", ctx->DeviceName()}});
ConfigLearnerByCtx(&ctx, learner.get());
learner->Configure(); learner->Configure();
for (size_t i = 0; i < kIters; ++i) { for (size_t i = 0; i < kIters; ++i) {
@ -79,7 +76,7 @@ void TestTrainingPrediction(size_t rows, size_t bins,
learner.reset(Learner::Create({})); learner.reset(Learner::Create({}));
learner->LoadModel(model); learner->LoadModel(model);
ConfigLearnerByCtx(&ctx, learner.get()); learner->SetParam("device", ctx->DeviceName());
learner->Configure(); learner->Configure();
HostDeviceVector<float> from_full; HostDeviceVector<float> from_full;
@ -89,15 +86,7 @@ void TestTrainingPrediction(size_t rows, size_t bins,
learner->Predict(p_hist, false, &from_hist, 0, 0); learner->Predict(p_hist, false, &from_hist, 0, 0);
for (size_t i = 0; i < rows; ++i) { for (size_t i = 0; i < rows; ++i) {
EXPECT_NEAR(from_hist.ConstHostVector()[i], EXPECT_NEAR(from_hist.ConstHostVector()[i], from_full.ConstHostVector()[i], kRtEps);
from_full.ConstHostVector()[i], kRtEps);
}
};
if (tree_method == "gpu_hist") {
train(MakeCUDACtx(0));
} else {
train(Context{});
} }
} }
@ -120,7 +109,7 @@ void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_r
learner->UpdateOneIter(it, m); learner->UpdateOneIter(it, m);
} }
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id)); learner->SetParam("device", ctx->DeviceName());
learner->Configure(); learner->Configure();
HostDeviceVector<float> *p_out_predictions_0{nullptr}; HostDeviceVector<float> *p_out_predictions_0{nullptr};
@ -153,7 +142,7 @@ void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_r
ASSERT_NEAR(h_pred[i], h_pred_0[i] + h_pred_1[i] - 0.5f, kRtEps); ASSERT_NEAR(h_pred[i], h_pred_0[i] + h_pred_1[i] - 0.5f, kRtEps);
} }
learner->SetParam("gpu_id", "-1"); learner->SetParam("device", "cpu");
learner->Configure(); learner->Configure();
} }
@ -161,12 +150,12 @@ namespace {
std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMatrix> dmat, std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMatrix> dmat,
size_t iters, size_t forest = 1) { size_t iters, size_t forest = 1) {
std::unique_ptr<Learner> learner{Learner::Create({dmat})}; std::unique_ptr<Learner> learner{Learner::Create({dmat})};
learner->SetParams(Args{{"num_parallel_tree", std::to_string(forest)}}); learner->SetParams(
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->DeviceName()}});
for (size_t i = 0; i < iters; ++i) { for (size_t i = 0; i < iters; ++i) {
learner->UpdateOneIter(i, dmat); learner->UpdateOneIter(i, dmat);
} }
ConfigLearnerByCtx(ctx, learner.get());
return learner; return learner;
} }
@ -215,7 +204,7 @@ void TestPredictionDeviceAccess() {
{ {
ASSERT_EQ(from_cpu.DeviceIdx(), Context::kCpuId); ASSERT_EQ(from_cpu.DeviceIdx(), Context::kCpuId);
Context cpu_ctx; Context cpu_ctx;
ConfigLearnerByCtx(&cpu_ctx, learner.get()); learner->SetParam("device", cpu_ctx.DeviceName());
learner->Predict(m_test, false, &from_cpu, 0, 0); learner->Predict(m_test, false, &from_cpu, 0, 0);
ASSERT_TRUE(from_cpu.HostCanWrite()); ASSERT_TRUE(from_cpu.HostCanWrite());
ASSERT_FALSE(from_cpu.DeviceCanRead()); ASSERT_FALSE(from_cpu.DeviceCanRead());
@ -225,7 +214,7 @@ void TestPredictionDeviceAccess() {
HostDeviceVector<float> from_cuda; HostDeviceVector<float> from_cuda;
{ {
Context cuda_ctx = MakeCUDACtx(0); Context cuda_ctx = MakeCUDACtx(0);
ConfigLearnerByCtx(&cuda_ctx, learner.get()); learner->SetParam("device", cuda_ctx.DeviceName());
learner->Predict(m_test, false, &from_cuda, 0, 0); learner->Predict(m_test, false, &from_cuda, 0, 0);
ASSERT_EQ(from_cuda.DeviceIdx(), 0); ASSERT_EQ(from_cuda.DeviceIdx(), 0);
ASSERT_TRUE(from_cuda.DeviceCanWrite()); ASSERT_TRUE(from_cuda.DeviceCanWrite());
@ -465,11 +454,7 @@ void TestIterationRangeColumnSplit(Context const* ctx) {
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses); auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses);
auto learner = LearnerForTest(ctx, dmat, kIters, kForest); auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
if (ctx->IsCPU()) { learner->SetParam("device", ctx->DeviceName());
learner->SetParams(Args{{"gpu_id", std::to_string(-1)}});
} else {
learner->SetParams(Args{{"gpu_id", std::to_string(0)}});
}
bool bound = false; bool bound = false;
std::unique_ptr<Learner> sliced{learner->Slice(0, 3, 1, &bound)}; std::unique_ptr<Learner> sliced{learner->Slice(0, 3, 1, &bound)};
@ -582,7 +567,7 @@ void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity) {
learner.reset(Learner::Create({Xy})); learner.reset(Learner::Create({Xy}));
learner->LoadModel(model); learner->LoadModel(model);
ConfigLearnerByCtx(ctx, learner.get()); learner->SetParam("device", ctx->DeviceName());
learner->Predict(Xy, false, &sparse_predt, 0, 0); learner->Predict(Xy, false, &sparse_predt, 0, 0);
auto constexpr kWorldSize = 2; auto constexpr kWorldSize = 2;

View File

@ -84,9 +84,8 @@ void TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t col
} }
// p_full and p_hist should come from the same data set. // p_full and p_hist should come from the same data set.
void TestTrainingPrediction(size_t rows, size_t bins, std::string tree_method, void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
std::shared_ptr<DMatrix> p_hist);
void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_row_t rows, void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
bst_feature_t cols); bst_feature_t cols);

31
tests/cpp/test_context.cc Normal file
View File

@ -0,0 +1,31 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h>
#include <xgboost/context.h>
namespace xgboost {
TEST(Context, CPU) {
Context ctx;
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
ASSERT_EQ(ctx.Ordinal(), Context::kCpuId);
std::int32_t flag{0};
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
ASSERT_EQ(flag, -1);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "oops"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "-1"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CPU"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CUDA"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CPU:0"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:+0"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:0-"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":gpu"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":0"}}), dmlc::Error);
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ""}}), dmlc::Error);
}
} // namespace xgboost

99
tests/cpp/test_context.cu Normal file
View File

@ -0,0 +1,99 @@
/**
* Copyright 2023, XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/base.h> // for Args
#include <xgboost/context.h>
#include <xgboost/json.h> // for FromJson, ToJson
#include <string> // for string, to_string
#include "../../src/common/common.h" // for AllVisibleGPUs
namespace xgboost {
namespace {
void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
ASSERT_EQ(ctx.gpu_id, ord);
ASSERT_EQ(ctx.Device().ordinal, ord);
ASSERT_EQ(ctx.DeviceName(), "cuda:" + std::to_string(ord));
ASSERT_EQ(ctx.Ordinal(), ord);
ASSERT_TRUE(ctx.IsCUDA());
ASSERT_FALSE(ctx.IsCPU());
ASSERT_EQ(ctx.Device(), DeviceOrd::CUDA(ord));
Json jctx{ToJson(ctx)};
Context new_ctx;
FromJson(jctx, &new_ctx);
ASSERT_EQ(new_ctx.Device(), ctx.Device());
ASSERT_EQ(new_ctx.gpu_id, ctx.gpu_id);
}
} // namespace
TEST(Context, DeviceOrdinal) {
Context ctx;
auto n_vis = common::AllVisibleGPUs();
auto ord = n_vis - 1;
std::string device = "cuda:" + std::to_string(ord);
ctx.UpdateAllowUnknown(Args{{"device", device}});
TestCUDA(ctx, ord);
device = "cuda:" + std::to_string(1001);
ctx.UpdateAllowUnknown(Args{{"device", device}});
ord = 1001 % n_vis;
TestCUDA(ctx, ord);
std::int32_t flag{0};
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
ASSERT_EQ(flag, 1);
Context new_ctx = ctx;
TestCUDA(new_ctx, ctx.Ordinal());
auto cpu_ctx = ctx.MakeCPU();
ASSERT_TRUE(cpu_ctx.IsCPU());
ASSERT_EQ(cpu_ctx.Ordinal(), Context::kCpuId);
ASSERT_EQ(cpu_ctx.Device(), DeviceOrd::CPU());
auto cuda_ctx = cpu_ctx.MakeCUDA(ctx.Ordinal());
TestCUDA(cuda_ctx, ctx.Ordinal());
cuda_ctx.UpdateAllowUnknown(Args{{"fail_on_invalid_gpu_id", "true"}});
ASSERT_THROW({ cuda_ctx.UpdateAllowUnknown(Args{{"device", "cuda:9999"}}); }, dmlc::Error);
cuda_ctx.UpdateAllowUnknown(Args{{"device", "cuda:00"}});
ASSERT_EQ(cuda_ctx.Ordinal(), 0);
ctx.UpdateAllowUnknown(Args{{"device", "cpu"}});
// Test alias
ctx.UpdateAllowUnknown(Args{{"device", "gpu:0"}});
TestCUDA(ctx, 0);
ctx.UpdateAllowUnknown(Args{{"device", "gpu"}});
TestCUDA(ctx, 0);
// Test the thread local memory in dmlc is not linking different instances together.
cpu_ctx.UpdateAllowUnknown(Args{{"device", "cpu"}});
TestCUDA(ctx, 0);
ctx.UpdateAllowUnknown(Args{});
TestCUDA(ctx, 0);
}
TEST(Context, GPUId) {
Context ctx;
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
TestCUDA(ctx, 0);
auto n_vis = common::AllVisibleGPUs();
auto ord = n_vis - 1;
ctx.UpdateAllowUnknown(Args{{"gpu_id", std::to_string(ord)}});
TestCUDA(ctx, ord);
auto device = "cuda:" + std::to_string(1001);
ctx.UpdateAllowUnknown(Args{{"device", device}});
ord = 1001 % n_vis;
TestCUDA(ctx, ord);
ctx.UpdateAllowUnknown(Args{{"gpu_id", "-1"}});
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
}
} // namespace xgboost

View File

@ -27,7 +27,6 @@
#include "../../src/common/io.h" // for LoadSequentialFile #include "../../src/common/io.h" // for LoadSequentialFile
#include "../../src/common/linalg_op.h" // for ElementWiseTransformHost, begin, end #include "../../src/common/linalg_op.h" // for ElementWiseTransformHost, begin, end
#include "../../src/common/random.h" // for GlobalRandom #include "../../src/common/random.h" // for GlobalRandom
#include "../../src/common/transform_iterator.h" // for IndexTransformIter
#include "dmlc/io.h" // for Stream #include "dmlc/io.h" // for Stream
#include "dmlc/omp.h" // for omp_get_max_threads #include "dmlc/omp.h" // for omp_get_max_threads
#include "dmlc/registry.h" // for Registry #include "dmlc/registry.h" // for Registry
@ -35,14 +34,13 @@
#include "helpers.h" // for GetBaseScore, RandomDataGenerator #include "helpers.h" // for GetBaseScore, RandomDataGenerator
#include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator #include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator
#include "xgboost/base.h" // for bst_float, Args, bst_feature_t, bst_int #include "xgboost/base.h" // for bst_float, Args, bst_feature_t, bst_int
#include "xgboost/context.h" // for Context #include "xgboost/context.h" // for Context, DeviceOrd
#include "xgboost/data.h" // for DMatrix, MetaInfo, DataType #include "xgboost/data.h" // for DMatrix, MetaInfo, DataType
#include "xgboost/host_device_vector.h" // for HostDeviceVector #include "xgboost/host_device_vector.h" // for HostDeviceVector
#include "xgboost/json.h" // for Json, Object, get, String, IsA, opera... #include "xgboost/json.h" // for Json, Object, get, String, IsA, opera...
#include "xgboost/linalg.h" // for Tensor, TensorView #include "xgboost/linalg.h" // for Tensor, TensorView
#include "xgboost/logging.h" // for ConsoleLogger #include "xgboost/logging.h" // for ConsoleLogger
#include "xgboost/predictor.h" // for PredictionCacheEntry #include "xgboost/predictor.h" // for PredictionCacheEntry
#include "xgboost/span.h" // for Span, operator!=, SpanIterator
#include "xgboost/string_view.h" // for StringView #include "xgboost/string_view.h" // for StringView
namespace xgboost { namespace xgboost {
@ -58,9 +56,9 @@ TEST(Learner, Basic) {
auto minor = XGBOOST_VER_MINOR; auto minor = XGBOOST_VER_MINOR;
auto patch = XGBOOST_VER_PATCH; auto patch = XGBOOST_VER_PATCH;
static_assert(std::is_integral<decltype(major)>::value, "Wrong major version type"); static_assert(std::is_integral_v<decltype(major)>, "Wrong major version type");
static_assert(std::is_integral<decltype(minor)>::value, "Wrong minor version type"); static_assert(std::is_integral_v<decltype(minor)>, "Wrong minor version type");
static_assert(std::is_integral<decltype(patch)>::value, "Wrong patch version type"); static_assert(std::is_integral_v<decltype(patch)>, "Wrong patch version type");
} }
TEST(Learner, ParameterValidation) { TEST(Learner, ParameterValidation) {
@ -92,8 +90,7 @@ TEST(Learner, CheckGroup) {
size_t constexpr kNumRows = 17; size_t constexpr kNumRows = 17;
bst_feature_t constexpr kNumCols = 15; bst_feature_t constexpr kNumCols = 15;
std::shared_ptr<DMatrix> p_mat{ std::shared_ptr<DMatrix> p_mat{RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
std::vector<bst_float> weight(kNumGroups, 1); std::vector<bst_float> weight(kNumGroups, 1);
std::vector<bst_int> group(kNumGroups); std::vector<bst_int> group(kNumGroups);
group[0] = 2; group[0] = 2;
@ -312,35 +309,36 @@ TEST(Learner, GPUConfiguration) {
learner->SetParams({Arg{"booster", "gblinear"}, learner->SetParams({Arg{"booster", "gblinear"},
Arg{"updater", "gpu_coord_descent"}}); Arg{"updater", "gpu_coord_descent"}});
learner->UpdateOneIter(0, p_dmat); learner->UpdateOneIter(0, p_dmat);
ASSERT_EQ(learner->Ctx()->gpu_id, 0); ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
} }
{ {
std::unique_ptr<Learner> learner {Learner::Create(mat)}; std::unique_ptr<Learner> learner{Learner::Create(mat)};
learner->SetParams({Arg{"tree_method", "gpu_hist"}}); learner->SetParams({Arg{"tree_method", "gpu_hist"}});
learner->Configure();
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
learner->UpdateOneIter(0, p_dmat); learner->UpdateOneIter(0, p_dmat);
ASSERT_EQ(learner->Ctx()->gpu_id, 0); ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
} }
{ {
std::unique_ptr<Learner> learner {Learner::Create(mat)}; std::unique_ptr<Learner> learner {Learner::Create(mat)};
learner->SetParams({Arg{"tree_method", "gpu_hist"}, learner->SetParams({Arg{"tree_method", "gpu_hist"},
Arg{"gpu_id", "-1"}}); Arg{"gpu_id", "-1"}});
learner->UpdateOneIter(0, p_dmat); learner->UpdateOneIter(0, p_dmat);
ASSERT_EQ(learner->Ctx()->gpu_id, 0); ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
} }
{ {
// with CPU algorithm // with CPU algorithm
std::unique_ptr<Learner> learner {Learner::Create(mat)}; std::unique_ptr<Learner> learner {Learner::Create(mat)};
learner->SetParams({Arg{"tree_method", "hist"}}); learner->SetParams({Arg{"tree_method", "hist"}});
learner->UpdateOneIter(0, p_dmat); learner->UpdateOneIter(0, p_dmat);
ASSERT_EQ(learner->Ctx()->gpu_id, -1); ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CPU());
} }
{ {
// with CPU algorithm, but `gpu_id` takes priority // with CPU algorithm, but `gpu_id` takes priority
std::unique_ptr<Learner> learner {Learner::Create(mat)}; std::unique_ptr<Learner> learner {Learner::Create(mat)};
learner->SetParams({Arg{"tree_method", "hist"}, learner->SetParams({Arg{"tree_method", "hist"}, Arg{"gpu_id", "0"}});
Arg{"gpu_id", "0"}});
learner->UpdateOneIter(0, p_dmat); learner->UpdateOneIter(0, p_dmat);
ASSERT_EQ(learner->Ctx()->gpu_id, 0); ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
} }
} }
#endif // defined(XGBOOST_USE_CUDA) #endif // defined(XGBOOST_USE_CUDA)

View File

@ -8,6 +8,8 @@
#include <memory> // for unique_ptr #include <memory> // for unique_ptr
#include "../helpers.h"
namespace xgboost { namespace xgboost {
TEST(Updater, HasNodePosition) { TEST(Updater, HasNodePosition) {
Context ctx; Context ctx;
@ -19,7 +21,7 @@ TEST(Updater, HasNodePosition) {
ASSERT_TRUE(up->HasNodePosition()); ASSERT_TRUE(up->HasNodePosition());
#if defined(XGBOOST_USE_CUDA) #if defined(XGBOOST_USE_CUDA)
ctx.gpu_id = 0; ctx = MakeCUDACtx(0);
up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, &task)); up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, &task));
ASSERT_TRUE(up->HasNodePosition()); ASSERT_TRUE(up->HasNodePosition());
#endif // defined(XGBOOST_USE_CUDA) #endif // defined(XGBOOST_USE_CUDA)

View File

@ -70,9 +70,9 @@ class TestPredictionCache : public ::testing::Test {
Context ctx; Context ctx;
ctx.InitAllowUnknown(Args{{"nthread", "8"}}); ctx.InitAllowUnknown(Args{{"nthread", "8"}});
if (updater_name == "grow_gpu_hist") { if (updater_name == "grow_gpu_hist") {
ctx.gpu_id = 0; ctx = ctx.MakeCUDA(0);
} else { } else {
ctx.gpu_id = Context::kCpuId; ctx = ctx.MakeCPU();
} }
ObjInfo task{ObjInfo::kRegression}; ObjInfo task{ObjInfo::kRegression};

View File

@ -34,7 +34,7 @@ class TestLoadPickle:
bst = load_pickle(model_path) bst = load_pickle(model_path)
config = bst.save_config() config = bst.save_config()
config = json.loads(config) config = json.loads(config)
assert config["learner"]["generic_param"]["gpu_id"] == "-1" assert config["learner"]["generic_param"]["device"] == "cpu"
def test_context_is_preserved(self) -> None: def test_context_is_preserved(self) -> None:
"""Test the device context is preserved after pickling.""" """Test the device context is preserved after pickling."""
@ -42,14 +42,14 @@ class TestLoadPickle:
bst = load_pickle(model_path) bst = load_pickle(model_path)
config = bst.save_config() config = bst.save_config()
config = json.loads(config) config = json.loads(config)
assert config["learner"]["generic_param"]["gpu_id"] == "0" assert config["learner"]["generic_param"]["device"] == "cuda:0"
def test_wrap_gpu_id(self) -> None: def test_wrap_gpu_id(self) -> None:
assert os.environ["CUDA_VISIBLE_DEVICES"] == "0" assert os.environ["CUDA_VISIBLE_DEVICES"] == "0"
bst = load_pickle(model_path) bst = load_pickle(model_path)
config = bst.save_config() config = bst.save_config()
config = json.loads(config) config = json.loads(config)
assert config["learner"]["generic_param"]["gpu_id"] == "0" assert config["learner"]["generic_param"]["device"] == "cuda:0"
x, y = build_dataset() x, y = build_dataset()
test_x = xgb.DMatrix(x) test_x = xgb.DMatrix(x)

View File

@ -203,7 +203,7 @@ class TestQuantileDMatrix:
np.testing.assert_equal(h_ret.indices, d_ret.indices) np.testing.assert_equal(h_ret.indices, d_ret.indices)
booster = xgb.train( booster = xgb.train(
{"tree_method": "gpu_hist", "gpu_id": "0"}, dtrain=d_m {"tree_method": "hist", "device": "cuda:0"}, dtrain=d_m
) )
np.testing.assert_allclose( np.testing.assert_allclose(

View File

@ -65,16 +65,20 @@ class TestGPUBasicModels:
@pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_sklearn())
def test_invalid_gpu_id(self): def test_invalid_gpu_id(self):
from sklearn.datasets import load_digits from sklearn.datasets import load_digits
X, y = load_digits(return_X_y=True) X, y = load_digits(return_X_y=True)
# should pass with invalid gpu id # should pass with invalid gpu id
cls1 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999) cls1 = xgb.XGBClassifier(tree_method="gpu_hist", gpu_id=9999)
cls1.fit(X, y) cls1.fit(X, y)
# should throw error with fail_on_invalid_gpu_id enabled # should throw error with fail_on_invalid_gpu_id enabled
cls2 = xgb.XGBClassifier( cls2 = xgb.XGBClassifier(
tree_method='gpu_hist', gpu_id=9999, fail_on_invalid_gpu_id=True tree_method="gpu_hist", gpu_id=9999, fail_on_invalid_gpu_id=True
) )
try: with pytest.raises(ValueError, match="ordinal 9999 is invalid"):
cls2.fit(X, y)
cls2 = xgb.XGBClassifier(
tree_method="hist", device="cuda:9999", fail_on_invalid_gpu_id=True
)
with pytest.raises(ValueError, match="ordinal 9999 is invalid"):
cls2.fit(X, y) cls2.fit(X, y)
assert False, "Should have failed with with fail_on_invalid_gpu_id enabled"
except xgb.core.XGBoostError as err:
assert "gpu_id 9999 is invalid" in str(err)

View File

@ -43,10 +43,16 @@ class TestGPUEvalMetrics:
num_boost_round=10, num_boost_round=10,
) )
cpu_auc = float(booster.eval(Xy).split(":")[1]) cpu_auc = float(booster.eval(Xy).split(":")[1])
booster.set_param({"gpu_id": "0"}) booster.set_param({"device": "cuda:0"})
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0" assert (
json.loads(booster.save_config())["learner"]["generic_param"]["device"]
== "cuda:0"
)
gpu_auc = float(booster.eval(Xy).split(":")[1]) gpu_auc = float(booster.eval(Xy).split(":")[1])
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0" assert (
json.loads(booster.save_config())["learner"]["generic_param"]["device"]
== "cuda:0"
)
np.testing.assert_allclose(cpu_auc, gpu_auc) np.testing.assert_allclose(cpu_auc, gpu_auc)

View File

@ -113,14 +113,6 @@ class TestPickling:
param = {"tree_method": "gpu_hist", "verbosity": 1} param = {"tree_method": "gpu_hist", "verbosity": 1}
bst = xgb.train(param, train_x) bst = xgb.train(param, train_x)
with tm.captured_output() as (out, err):
bst.inplace_predict(x)
# The warning is redirected to Python callback, so it's printed in stdout
# instead of stderr.
stdout = out.getvalue()
assert stdout.find("mismatched devices") != -1
save_pickle(bst, model_path) save_pickle(bst, model_path)
args = self.args_template.copy() args = self.args_template.copy()
@ -177,7 +169,7 @@ class TestPickling:
# Switch to CPU predictor # Switch to CPU predictor
bst = model.get_booster() bst = model.get_booster()
tm.set_ordinal(-1, bst) bst.set_param({"device": "cpu"})
cpu_pred = model.predict(x, output_margin=True) cpu_pred = model.predict(x, output_margin=True)
np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5) np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5)

View File

@ -39,7 +39,8 @@ predict_parameter_strategy = strategies.fixed_dictionaries(
} }
) )
pytestmark = tm.timeout(20) # cupy nvrtc compilation can take a long time for the first run
pytestmark = tm.timeout(30)
class TestGPUPredict: class TestGPUPredict:
@ -71,8 +72,8 @@ class TestGPUPredict:
param = { param = {
"objective": "binary:logistic", "objective": "binary:logistic",
"eval_metric": "logloss", "eval_metric": "logloss",
"tree_method": "gpu_hist", "tree_method": "hist",
"gpu_id": 0, "device": "gpu:0",
"max_depth": 1, "max_depth": 1,
} }
bst = xgb.train( bst = xgb.train(
@ -84,7 +85,7 @@ class TestGPUPredict:
gpu_pred_test = bst.predict(dtest, output_margin=True) gpu_pred_test = bst.predict(dtest, output_margin=True)
gpu_pred_val = bst.predict(dval, output_margin=True) gpu_pred_val = bst.predict(dval, output_margin=True)
bst.set_param({"gpu_id": -1, "tree_method": "hist"}) bst.set_param({"device": "cpu", "tree_method": "hist"})
bst_cpu = copy(bst) bst_cpu = copy(bst)
cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True) cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
cpu_pred_test = bst_cpu.predict(dtest, output_margin=True) cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
@ -107,14 +108,15 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(X_train, label=y_train) dtrain = xgb.DMatrix(X_train, label=y_train)
params = {} params = {}
params["tree_method"] = "gpu_hist" params["tree_method"] = "hist"
params["device"] = "cuda:0"
bst = xgb.train(params, dtrain) bst = xgb.train(params, dtrain)
tm.set_ordinal(0, bst) bst.set_param({"device": "cuda:0"})
# Don't reuse the DMatrix for prediction, otherwise the result is cached. # Don't reuse the DMatrix for prediction, otherwise the result is cached.
predict_gpu_0 = bst.predict(xgb.DMatrix(X_test)) predict_gpu_0 = bst.predict(xgb.DMatrix(X_test))
predict_gpu_1 = bst.predict(xgb.DMatrix(X_test)) predict_gpu_1 = bst.predict(xgb.DMatrix(X_test))
tm.set_ordinal(-1, bst) bst.set_param({"device": "cpu"})
predict_cpu = bst.predict(xgb.DMatrix(X_test)) predict_cpu = bst.predict(xgb.DMatrix(X_test))
assert np.allclose(predict_gpu_0, predict_gpu_1) assert np.allclose(predict_gpu_0, predict_gpu_1)
@ -131,8 +133,8 @@ class TestGPUPredict:
X_test, y_test = X[tr_size:, :], y[tr_size:] X_test, y_test = X[tr_size:, :], y[tr_size:]
params = { params = {
"tree_method": "gpu_hist", "tree_method": "hist",
"gpu_id": "0", "device": "cuda:0",
"n_jobs": -1, "n_jobs": -1,
"seed": 123, "seed": 123,
} }
@ -141,13 +143,54 @@ class TestGPUPredict:
gpu_test_score = m.score(X_test, y_test) gpu_test_score = m.score(X_test, y_test)
# Now with cpu # Now with cpu
m = tm.set_ordinal(-1, m) m.set_params(device="cpu")
cpu_train_score = m.score(X_train, y_train) cpu_train_score = m.score(X_train, y_train)
cpu_test_score = m.score(X_test, y_test) cpu_test_score = m.score(X_test, y_test)
assert np.allclose(cpu_train_score, gpu_train_score) assert np.allclose(cpu_train_score, gpu_train_score)
assert np.allclose(cpu_test_score, gpu_test_score) assert np.allclose(cpu_test_score, gpu_test_score)
@pytest.mark.parametrize("device", ["cpu", "cuda"])
@pytest.mark.skipif(**tm.no_cupy())
def test_inplace_predict_device_type(self, device: str) -> None:
"""Test inplace predict with different device and data types.
The sklearn interface uses inplace predict by default and gbtree fallbacks to
DMatrix whenever device doesn't match. This test checks that XGBoost can handle
different combinations of device and input data type.
"""
import cudf
import cupy as cp
import pandas as pd
from scipy.sparse import csr_matrix
reg = xgb.XGBRegressor(tree_method="hist", device=device)
n_samples = 4096
n_features = 13
X, y, w = tm.make_regression(n_samples, n_features, use_cupy=True)
X[X == 0.0] = 1.0
reg.fit(X, y, sample_weight=w)
predt_0 = reg.predict(X)
X = cp.asnumpy(X)
predt_1 = reg.predict(X)
df = pd.DataFrame(X)
predt_2 = reg.predict(df)
df = cudf.DataFrame(X)
predt_3 = reg.predict(df)
X_csr = csr_matrix(X)
predt_4 = reg.predict(X_csr)
np.testing.assert_allclose(predt_0, predt_1)
np.testing.assert_allclose(predt_0, predt_2)
np.testing.assert_allclose(predt_0, predt_3)
np.testing.assert_allclose(predt_0, predt_4)
def run_inplace_base_margin(self, booster, dtrain, X, base_margin): def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
import cupy as cp import cupy as cp
@ -175,7 +218,9 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(X, y) dtrain = xgb.DMatrix(X, y)
booster = xgb.train( booster = xgb.train(
{"tree_method": "gpu_hist", "gpu_id": device}, dtrain, num_boost_round=10 {"tree_method": "hist", "device": f"cuda:{device}"},
dtrain,
num_boost_round=10,
) )
test = xgb.DMatrix(X[:10, ...], missing=missing) test = xgb.DMatrix(X[:10, ...], missing=missing)
@ -208,13 +253,13 @@ class TestGPUPredict:
missing_idx = [i for i in range(0, X.shape[1], 16)] missing_idx = [i for i in range(0, X.shape[1], 16)]
X[:, missing_idx] = missing X[:, missing_idx] = missing
reg = xgb.XGBRegressor( reg = xgb.XGBRegressor(
tree_method="gpu_hist", n_estimators=8, missing=missing, gpu_id=device tree_method="hist", n_estimators=8, missing=missing, device=f"cuda:{device}"
) )
reg.fit(X, y) reg.fit(X, y)
reg = tm.set_ordinal(device, reg) reg.set_params(device=f"cuda:{device}")
gpu_predt = reg.predict(X) gpu_predt = reg.predict(X)
reg = tm.set_ordinal(-1, reg) reg = reg.set_params(device="cpu")
cpu_predt = reg.predict(cp.asnumpy(X)) cpu_predt = reg.predict(cp.asnumpy(X))
np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6) np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)
cp.cuda.runtime.setDevice(0) cp.cuda.runtime.setDevice(0)
@ -250,7 +295,9 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(X, y) dtrain = xgb.DMatrix(X, y)
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain, num_boost_round=10) booster = xgb.train(
{"tree_method": "hist", "device": "cuda:0"}, dtrain, num_boost_round=10
)
test = xgb.DMatrix(X) test = xgb.DMatrix(X)
predt_from_array = booster.inplace_predict(X) predt_from_array = booster.inplace_predict(X)
predt_from_dmatrix = booster.predict(test) predt_from_dmatrix = booster.predict(test)
@ -280,12 +327,12 @@ class TestGPUPredict:
def test_shap(self, num_rounds, dataset, param): def test_shap(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method if dataset.name.endswith("-l1"): # not supported by the exact tree method
return return
param.update({"tree_method": "gpu_hist", "gpu_id": 0}) param.update({"tree_method": "hist", "device": "gpu:0"})
param = dataset.set_params(param) param = dataset.set_params(param)
dmat = dataset.get_dmat() dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds) bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin) test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst = tm.set_ordinal(0, bst) bst.set_param({"device": "gpu:0"})
shap = bst.predict(test_dmat, pred_contribs=True) shap = bst.predict(test_dmat, pred_contribs=True)
margin = bst.predict(test_dmat, output_margin=True) margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0) assume(len(dataset.y) > 0)
@ -298,12 +345,12 @@ class TestGPUPredict:
def test_shap_interactions(self, num_rounds, dataset, param): def test_shap_interactions(self, num_rounds, dataset, param):
if dataset.name.endswith("-l1"): # not supported by the exact tree method if dataset.name.endswith("-l1"): # not supported by the exact tree method
return return
param.update({"tree_method": "hist", "gpu_id": 0}) param.update({"tree_method": "hist", "device": "cuda:0"})
param = dataset.set_params(param) param = dataset.set_params(param)
dmat = dataset.get_dmat() dmat = dataset.get_dmat()
bst = xgb.train(param, dmat, num_rounds) bst = xgb.train(param, dmat, num_rounds)
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin) test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
bst = tm.set_ordinal(0, bst) bst.set_param({"device": "cuda:0"})
shap = bst.predict(test_dmat, pred_interactions=True) shap = bst.predict(test_dmat, pred_interactions=True)
margin = bst.predict(test_dmat, output_margin=True) margin = bst.predict(test_dmat, output_margin=True)
assume(len(dataset.y) > 0) assume(len(dataset.y) > 0)
@ -317,16 +364,18 @@ class TestGPUPredict:
def test_shap_categorical(self): def test_shap_categorical(self):
X, y = tm.make_categorical(100, 20, 7, False) X, y = tm.make_categorical(100, 20, 7, False)
Xy = xgb.DMatrix(X, y, enable_categorical=True) Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10) booster = xgb.train(
{"tree_method": "hist", "device": "gpu:0"}, Xy, num_boost_round=10
)
booster = tm.set_ordinal(0, booster) booster.set_param({"device": "cuda:0"})
shap = booster.predict(Xy, pred_contribs=True) shap = booster.predict(Xy, pred_contribs=True)
margin = booster.predict(Xy, output_margin=True) margin = booster.predict(Xy, output_margin=True)
np.testing.assert_allclose( np.testing.assert_allclose(
np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3 np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3
) )
booster = tm.set_ordinal(-1, booster) booster.set_param({"device": "cpu"})
shap = booster.predict(Xy, pred_contribs=True) shap = booster.predict(Xy, pred_contribs=True)
margin = booster.predict(Xy, output_margin=True) margin = booster.predict(Xy, output_margin=True)
np.testing.assert_allclose( np.testing.assert_allclose(
@ -334,8 +383,8 @@ class TestGPUPredict:
) )
def test_predict_leaf_basic(self): def test_predict_leaf_basic(self):
gpu_leaf = run_predict_leaf(0) gpu_leaf = run_predict_leaf("gpu:0")
cpu_leaf = run_predict_leaf(-1) cpu_leaf = run_predict_leaf("cpu")
np.testing.assert_equal(gpu_leaf, cpu_leaf) np.testing.assert_equal(gpu_leaf, cpu_leaf)
def run_predict_leaf_booster(self, param, num_rounds, dataset): def run_predict_leaf_booster(self, param, num_rounds, dataset):
@ -344,23 +393,22 @@ class TestGPUPredict:
booster = xgb.train( booster = xgb.train(
param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds
) )
booster = tm.set_ordinal(-1, booster) booster.set_param({"device": "cpu"})
cpu_leaf = booster.predict(m, pred_leaf=True) cpu_leaf = booster.predict(m, pred_leaf=True)
booster = tm.set_ordinal(0, booster) booster.set_param({"device": "cuda:0"})
gpu_leaf = booster.predict(m, pred_leaf=True) gpu_leaf = booster.predict(m, pred_leaf=True)
np.testing.assert_equal(cpu_leaf, gpu_leaf) np.testing.assert_equal(cpu_leaf, gpu_leaf)
@given(predict_parameter_strategy, tm.make_dataset_strategy()) @given(predict_parameter_strategy, tm.make_dataset_strategy())
@settings(deadline=None, max_examples=20, print_blob=True) @settings(deadline=None, max_examples=20, print_blob=True)
def test_predict_leaf_gbtree(self, param, dataset): def test_predict_leaf_gbtree(self, param: dict, dataset: tm.TestDataset) -> None:
# Unsupported for random forest # Unsupported for random forest
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"): if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
return return
param["booster"] = "gbtree" param.update({"booster": "gbtree", "tree_method": "hist", "device": "cuda:0"})
param["tree_method"] = "gpu_hist"
self.run_predict_leaf_booster(param, 10, dataset) self.run_predict_leaf_booster(param, 10, dataset)
@given(predict_parameter_strategy, tm.make_dataset_strategy()) @given(predict_parameter_strategy, tm.make_dataset_strategy())
@ -370,8 +418,7 @@ class TestGPUPredict:
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"): if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
return return
param["booster"] = "dart" param.update({"booster": "dart", "tree_method": "hist", "device": "cuda:0"})
param["tree_method"] = "gpu_hist"
self.run_predict_leaf_booster(param, 10, dataset) self.run_predict_leaf_booster(param, 10, dataset)
@pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.skipif(**tm.no_sklearn())
@ -395,12 +442,12 @@ class TestGPUPredict:
dtrain = xgb.DMatrix(df, label=y, enable_categorical=True) dtrain = xgb.DMatrix(df, label=y, enable_categorical=True)
params = { params = {
"tree_method": "gpu_hist", "tree_method": "hist",
"max_depth": 3, "max_depth": 3,
"learning_rate": 1.0, "learning_rate": 1.0,
"base_score": 0.0, "base_score": 0.0,
"eval_metric": "rmse", "eval_metric": "rmse",
"gpu_id": "0", "device": "cuda:0",
} }
eval_history = {} eval_history = {}
@ -412,7 +459,7 @@ class TestGPUPredict:
verbose_eval=False, verbose_eval=False,
evals_result=eval_history, evals_result=eval_history,
) )
bst = tm.set_ordinal(0, bst) bst.set_param({"device": "cuda:0"})
pred = bst.predict(dtrain) pred = bst.predict(dtrain)
rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False) rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
np.testing.assert_almost_equal( np.testing.assert_almost_equal(
@ -434,14 +481,16 @@ class TestGPUPredict:
Xy = xgb.DMatrix(X, y) Xy = xgb.DMatrix(X, y)
if n_classes == 2: if n_classes == 2:
params = { params = {
"tree_method": "gpu_hist", "tree_method": "hist",
"device": "cuda:0",
"booster": "dart", "booster": "dart",
"rate_drop": 0.5, "rate_drop": 0.5,
"objective": "binary:logistic", "objective": "binary:logistic",
} }
else: else:
params = { params = {
"tree_method": "gpu_hist", "tree_method": "hist",
"device": "cuda:0",
"booster": "dart", "booster": "dart",
"rate_drop": 0.5, "rate_drop": 0.5,
"objective": "multi:softprob", "objective": "multi:softprob",
@ -455,7 +504,7 @@ class TestGPUPredict:
copied = booster.predict(Xy) copied = booster.predict(Xy)
# CPU # CPU
booster = tm.set_ordinal(-1, booster) booster.set_param({"device": "cpu"})
cpu_inplace = booster.inplace_predict(X_) cpu_inplace = booster.inplace_predict(X_)
cpu_copied = booster.predict(Xy) cpu_copied = booster.predict(Xy)
@ -465,7 +514,7 @@ class TestGPUPredict:
cp.testing.assert_allclose(inplace, copied, atol=1e-6) cp.testing.assert_allclose(inplace, copied, atol=1e-6)
# GPU # GPU
booster = tm.set_ordinal(0, booster) booster.set_param({"device": "cuda:0"})
inplace = booster.inplace_predict(X) inplace = booster.inplace_predict(X)
copied = booster.predict(Xy) copied = booster.predict(Xy)
@ -482,7 +531,7 @@ class TestGPUPredict:
orig = rng.randint(low=0, high=127, size=rows * cols).reshape(rows, cols) orig = rng.randint(low=0, high=127, size=rows * cols).reshape(rows, cols)
y = rng.randint(low=0, high=127, size=rows) y = rng.randint(low=0, high=127, size=rows)
dtrain = xgb.DMatrix(orig, label=y) dtrain = xgb.DMatrix(orig, label=y)
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain) booster = xgb.train({"tree_method": "hist", "device": "cuda:0"}, dtrain)
predt_orig = booster.inplace_predict(orig) predt_orig = booster.inplace_predict(orig)
# all primitive types in numpy # all primitive types in numpy

View File

@ -28,7 +28,7 @@ def run_threaded_predict(X, rows, predict_func):
assert f.result() assert f.result()
def run_predict_leaf(gpu_id: int) -> np.ndarray: def run_predict_leaf(device: str) -> np.ndarray:
rows = 100 rows = 100
cols = 4 cols = 4
classes = 5 classes = 5
@ -48,7 +48,7 @@ def run_predict_leaf(gpu_id: int) -> np.ndarray:
num_boost_round=num_boost_round, num_boost_round=num_boost_round,
) )
booster = tm.set_ordinal(gpu_id, booster) booster.set_param({"device": device})
empty = xgb.DMatrix(np.ones(shape=(0, cols))) empty = xgb.DMatrix(np.ones(shape=(0, cols)))
empty_leaf = booster.predict(empty, pred_leaf=True) empty_leaf = booster.predict(empty, pred_leaf=True)
assert empty_leaf.shape[0] == 0 assert empty_leaf.shape[0] == 0
@ -74,14 +74,14 @@ def run_predict_leaf(gpu_id: int) -> np.ndarray:
# When there's only 1 tree, the output is a 1 dim vector # When there's only 1 tree, the output is a 1 dim vector
booster = xgb.train({"tree_method": "hist"}, num_boost_round=1, dtrain=m) booster = xgb.train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
booster = tm.set_ordinal(gpu_id, booster) booster.set_param({"device": device})
assert booster.predict(m, pred_leaf=True).shape == (rows,) assert booster.predict(m, pred_leaf=True).shape == (rows,)
return leaf return leaf
def test_predict_leaf() -> None: def test_predict_leaf() -> None:
run_predict_leaf(-1) run_predict_leaf("cpu")
def test_predict_shape(): def test_predict_shape():

View File

@ -69,7 +69,7 @@ def run_dmatrix_ctor(is_feature_cols: bool, is_qdm: bool, on_gpu: bool) -> None:
train_Xy, valid_Xy = create_dmatrix_from_partitions( train_Xy, valid_Xy = create_dmatrix_from_partitions(
iter(dfs), iter(dfs),
feature_cols, feature_cols,
gpu_id=device_id, dev_ordinal=device_id,
use_qdm=is_qdm, use_qdm=is_qdm,
kwargs=kwargs, kwargs=kwargs,
enable_sparse_data_optim=False, enable_sparse_data_optim=False,

View File

@ -1025,6 +1025,7 @@ class XgboostLocalTest(SparkTestCase):
self.assertTrue(hasattr(py_reg, "n_estimators")) self.assertTrue(hasattr(py_reg, "n_estimators"))
self.assertEqual(py_reg.n_estimators.parent, py_reg.uid) self.assertEqual(py_reg.n_estimators.parent, py_reg.uid)
self.assertFalse(hasattr(py_reg, "gpu_id")) self.assertFalse(hasattr(py_reg, "gpu_id"))
self.assertFalse(hasattr(py_reg, "device"))
self.assertEqual(py_reg.getOrDefault(py_reg.n_estimators), 100) self.assertEqual(py_reg.getOrDefault(py_reg.n_estimators), 100)
self.assertEqual(py_reg.getOrDefault(py_reg.objective), "reg:squarederror") self.assertEqual(py_reg.getOrDefault(py_reg.objective), "reg:squarederror")
py_reg2 = SparkXGBRegressor(n_estimators=200) py_reg2 = SparkXGBRegressor(n_estimators=200)
@ -1038,6 +1039,7 @@ class XgboostLocalTest(SparkTestCase):
self.assertTrue(hasattr(py_cls, "n_estimators")) self.assertTrue(hasattr(py_cls, "n_estimators"))
self.assertEqual(py_cls.n_estimators.parent, py_cls.uid) self.assertEqual(py_cls.n_estimators.parent, py_cls.uid)
self.assertFalse(hasattr(py_cls, "gpu_id")) self.assertFalse(hasattr(py_cls, "gpu_id"))
self.assertFalse(hasattr(py_cls, "device"))
self.assertEqual(py_cls.getOrDefault(py_cls.n_estimators), 100) self.assertEqual(py_cls.getOrDefault(py_cls.n_estimators), 100)
self.assertEqual(py_cls.getOrDefault(py_cls.objective), None) self.assertEqual(py_cls.getOrDefault(py_cls.objective), None)
py_cls2 = SparkXGBClassifier(n_estimators=200) py_cls2 = SparkXGBClassifier(n_estimators=200)
@ -1051,6 +1053,7 @@ class XgboostLocalTest(SparkTestCase):
self.assertTrue(hasattr(py_cls, "n_estimators")) self.assertTrue(hasattr(py_cls, "n_estimators"))
self.assertEqual(py_cls.n_estimators.parent, py_cls.uid) self.assertEqual(py_cls.n_estimators.parent, py_cls.uid)
self.assertFalse(hasattr(py_cls, "gpu_id")) self.assertFalse(hasattr(py_cls, "gpu_id"))
self.assertFalse(hasattr(py_cls, "device"))
self.assertTrue(hasattr(py_cls, "arbitrary_params_dict")) self.assertTrue(hasattr(py_cls, "arbitrary_params_dict"))
expected_kwargs = {"sketch_eps": 0.03} expected_kwargs = {"sketch_eps": 0.03}
self.assertEqual( self.assertEqual(