Define the new device parameter. (#9362)
This commit is contained in:
parent
2d0cd2817e
commit
04aff3af8e
1
CITATION
1
CITATION
@ -15,4 +15,3 @@
|
||||
address = {New York, NY, USA},
|
||||
keywords = {large-scale machine learning},
|
||||
}
|
||||
|
||||
|
||||
@ -22,7 +22,8 @@ Supported parameters
|
||||
GPU accelerated prediction is enabled by default for the above mentioned ``tree_method`` parameters but can be switched to CPU prediction by setting ``predictor`` to ``cpu_predictor``. This could be useful if you want to conserve GPU memory. Likewise when using CPU algorithms, GPU accelerated prediction can be enabled by setting ``predictor`` to ``gpu_predictor``.
|
||||
|
||||
The device ordinal (which GPU to use if you have many of them) can be selected using the
|
||||
``gpu_id`` parameter, which defaults to 0 (the first device reported by CUDA runtime).
|
||||
``device`` parameter, which defaults to 0 when "CUDA" is specified(the first device reported by CUDA
|
||||
runtime).
|
||||
|
||||
|
||||
The GPU algorithms currently work with CLI, Python, R, and JVM packages. See :doc:`/install` for details.
|
||||
@ -30,13 +31,13 @@ The GPU algorithms currently work with CLI, Python, R, and JVM packages. See :do
|
||||
.. code-block:: python
|
||||
:caption: Python example
|
||||
|
||||
param['gpu_id'] = 0
|
||||
param["device"] = "cuda:0"
|
||||
param['tree_method'] = 'gpu_hist'
|
||||
|
||||
.. code-block:: python
|
||||
:caption: With Scikit-Learn interface
|
||||
|
||||
XGBRegressor(tree_method='gpu_hist', gpu_id=0)
|
||||
XGBRegressor(tree_method='gpu_hist', device="cuda")
|
||||
|
||||
|
||||
GPU-Accelerated SHAP values
|
||||
@ -45,7 +46,7 @@ XGBoost makes use of `GPUTreeShap <https://github.com/rapidsai/gputreeshap>`_ as
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
model.set_param({"gpu_id": "0", "tree_method": "gpu_hist"})
|
||||
model.set_param({"device": "cuda:0", "tree_method": "gpu_hist"})
|
||||
shap_values = model.predict(dtrain, pred_contribs=True)
|
||||
shap_interaction_values = model.predict(dtrain, pred_interactions=True)
|
||||
|
||||
|
||||
@ -3,10 +3,10 @@ Installation Guide
|
||||
##################
|
||||
|
||||
XGBoost provides binary packages for some language bindings. The binary packages support
|
||||
the GPU algorithm (``gpu_hist``) on machines with NVIDIA GPUs. Please note that **training
|
||||
with multiple GPUs is only supported for Linux platform**. See :doc:`gpu/index`. Also we
|
||||
have both stable releases and nightly builds, see below for how to install them. For
|
||||
building from source, visit :doc:`this page </build>`.
|
||||
the GPU algorithm (``device=cuda:0``) on machines with NVIDIA GPUs. Please note that
|
||||
**training with multiple GPUs is only supported for Linux platform**. See
|
||||
:doc:`gpu/index`. Also we have both stable releases and nightly builds, see below for how
|
||||
to install them. For building from source, visit :doc:`this page </build>`.
|
||||
|
||||
.. contents:: Contents
|
||||
|
||||
|
||||
@ -59,6 +59,18 @@ General Parameters
|
||||
|
||||
- Feature dimension used in boosting, set to maximum dimension of the feature
|
||||
|
||||
* ``device`` [default= ``cpu``]
|
||||
|
||||
.. versionadded:: 2.0.0
|
||||
|
||||
- Device for XGBoost to run. User can set it to one of the following values:
|
||||
|
||||
+ ``cpu``: Use CPU.
|
||||
+ ``cuda``: Use a GPU (CUDA device).
|
||||
+ ``cuda:<ordinal>``: ``<ordinal>`` is an integer that specifies the ordinal of the GPU (which GPU do you want to use if you have more than one devices).
|
||||
+ ``gpu``: Default GPU device selection from the list of available and supported devices. Only ``cuda`` devices are supported currently.
|
||||
+ ``gpu:<ordinal>``: Default GPU device selection from the list of available and supported devices. Only ``cuda`` devices are supported currently.
|
||||
|
||||
Parameters for Tree Booster
|
||||
===========================
|
||||
* ``eta`` [default=0.3, alias: ``learning_rate``]
|
||||
@ -99,7 +111,7 @@ Parameters for Tree Booster
|
||||
- ``gradient_based``: the selection probability for each training instance is proportional to the
|
||||
*regularized absolute value* of gradients (more specifically, :math:`\sqrt{g^2+\lambda h^2}`).
|
||||
``subsample`` may be set to as low as 0.1 without loss of model accuracy. Note that this
|
||||
sampling method is only supported when ``tree_method`` is set to ``gpu_hist``; other tree
|
||||
sampling method is only supported when ``tree_method`` is set to ``hist`` and the device is ``cuda``; other tree
|
||||
methods only support ``uniform`` sampling.
|
||||
|
||||
* ``colsample_bytree``, ``colsample_bylevel``, ``colsample_bynode`` [default=1]
|
||||
@ -131,26 +143,15 @@ Parameters for Tree Booster
|
||||
* ``tree_method`` string [default= ``auto``]
|
||||
|
||||
- The tree construction algorithm used in XGBoost. See description in the `reference paper <http://arxiv.org/abs/1603.02754>`_ and :doc:`treemethod`.
|
||||
- XGBoost supports ``approx``, ``hist`` and ``gpu_hist`` for distributed training. Experimental support for external memory is available for ``approx`` and ``gpu_hist``.
|
||||
|
||||
- Choices: ``auto``, ``exact``, ``approx``, ``hist``, ``gpu_hist``, this is a
|
||||
combination of commonly used updaters. For other updaters like ``refresh``, set the
|
||||
parameter ``updater`` directly.
|
||||
- Choices: ``auto``, ``exact``, ``approx``, ``hist``, this is a combination of commonly
|
||||
used updaters. For other updaters like ``refresh``, set the parameter ``updater``
|
||||
directly.
|
||||
|
||||
- ``auto``: Use heuristic to choose the fastest method.
|
||||
|
||||
- For small dataset, exact greedy (``exact``) will be used.
|
||||
- For larger dataset, approximate algorithm (``approx``) will be chosen. It's
|
||||
recommended to try ``hist`` and ``gpu_hist`` for higher performance with large
|
||||
dataset.
|
||||
(``gpu_hist``)has support for ``external memory``.
|
||||
|
||||
- Because old behavior is always use exact greedy in single machine, user will get a
|
||||
message when approximate algorithm is chosen to notify this choice.
|
||||
- ``auto``: Same as the ``hist`` tree method.
|
||||
- ``exact``: Exact greedy algorithm. Enumerates all split candidates.
|
||||
- ``approx``: Approximate greedy algorithm using quantile sketch and gradient histogram.
|
||||
- ``hist``: Faster histogram optimized approximate greedy algorithm.
|
||||
- ``gpu_hist``: GPU implementation of ``hist`` algorithm.
|
||||
|
||||
* ``scale_pos_weight`` [default=1]
|
||||
|
||||
@ -163,7 +164,7 @@ Parameters for Tree Booster
|
||||
- ``grow_colmaker``: non-distributed column-based construction of trees.
|
||||
- ``grow_histmaker``: distributed tree construction with row-based data splitting based on global proposal of histogram counting.
|
||||
- ``grow_quantile_histmaker``: Grow tree using quantized histogram.
|
||||
- ``grow_gpu_hist``: Grow tree with GPU.
|
||||
- ``grow_gpu_hist``: Grow tree with GPU. Same as setting tree method to ``hist`` and use ``device=cuda``.
|
||||
- ``sync``: synchronizes trees in all distributed nodes.
|
||||
- ``refresh``: refreshes tree's statistics and/or leaf values based on the current data. Note that no random subsampling of data rows is performed.
|
||||
- ``prune``: prunes the splits where loss < min_split_loss (or gamma) and nodes that have depth greater than ``max_depth``.
|
||||
@ -183,7 +184,7 @@ Parameters for Tree Booster
|
||||
* ``grow_policy`` [default= ``depthwise``]
|
||||
|
||||
- Controls a way new nodes are added to the tree.
|
||||
- Currently supported only if ``tree_method`` is set to ``hist``, ``approx`` or ``gpu_hist``.
|
||||
- Currently supported only if ``tree_method`` is set to ``hist`` or ``approx``.
|
||||
- Choices: ``depthwise``, ``lossguide``
|
||||
|
||||
- ``depthwise``: split at nodes closest to the root.
|
||||
@ -195,7 +196,7 @@ Parameters for Tree Booster
|
||||
|
||||
* ``max_bin``, [default=256]
|
||||
|
||||
- Only used if ``tree_method`` is set to ``hist``, ``approx`` or ``gpu_hist``.
|
||||
- Only used if ``tree_method`` is set to ``hist`` or ``approx``.
|
||||
- Maximum number of discrete bins to bucket continuous features.
|
||||
- Increasing this number improves the optimality of splits at the cost of higher computation time.
|
||||
|
||||
|
||||
@ -3,14 +3,14 @@ Tree Methods
|
||||
############
|
||||
|
||||
For training boosted tree models, there are 2 parameters used for choosing algorithms,
|
||||
namely ``updater`` and ``tree_method``. XGBoost has 4 builtin tree methods, namely
|
||||
``exact``, ``approx``, ``hist`` and ``gpu_hist``. Along with these tree methods, there
|
||||
are also some free standing updaters including ``refresh``,
|
||||
``prune`` and ``sync``. The parameter ``updater`` is more primitive than ``tree_method``
|
||||
as the latter is just a pre-configuration of the former. The difference is mostly due to
|
||||
historical reasons that each updater requires some specific configurations and might has
|
||||
missing features. As we are moving forward, the gap between them is becoming more and
|
||||
more irrelevant. We will collectively document them under tree methods.
|
||||
namely ``updater`` and ``tree_method``. XGBoost has 3 builtin tree methods, namely
|
||||
``exact``, ``approx`` and ``hist``. Along with these tree methods, there are also some
|
||||
free standing updaters including ``refresh``, ``prune`` and ``sync``. The parameter
|
||||
``updater`` is more primitive than ``tree_method`` as the latter is just a
|
||||
pre-configuration of the former. The difference is mostly due to historical reasons that
|
||||
each updater requires some specific configurations and might has missing features. As we
|
||||
are moving forward, the gap between them is becoming more and more irrelevant. We will
|
||||
collectively document them under tree methods.
|
||||
|
||||
**************
|
||||
Exact Solution
|
||||
@ -19,23 +19,23 @@ Exact Solution
|
||||
Exact means XGBoost considers all candidates from data for tree splitting, but underlying
|
||||
the objective is still interpreted as a Taylor expansion.
|
||||
|
||||
1. ``exact``: Vanilla gradient boosting tree algorithm described in `reference paper
|
||||
<http://arxiv.org/abs/1603.02754>`_. During each split finding procedure, it iterates
|
||||
over all entries of input data. It's more accurate (among other greedy methods) but
|
||||
slow in computation performance. Also it doesn't support distributed training as
|
||||
XGBoost employs row spliting data distribution while ``exact`` tree method works on a
|
||||
sorted column format. This tree method can be used with parameter ``tree_method`` set
|
||||
to ``exact``.
|
||||
1. ``exact``: The vanilla gradient boosting tree algorithm described in `reference paper
|
||||
<http://arxiv.org/abs/1603.02754>`_. During split-finding, it iterates over all
|
||||
entries of input data. It's more accurate (among other greedy methods) but
|
||||
computationally slower in compared to other tree methods. Further more, its feature
|
||||
set is limited. Features like distributed training and external memory that require
|
||||
approximated quantiles are not supported. This tree method can be used with the
|
||||
parameter ``tree_method`` set to ``exact``.
|
||||
|
||||
|
||||
**********************
|
||||
Approximated Solutions
|
||||
**********************
|
||||
|
||||
As ``exact`` tree method is slow in performance and not scalable, we often employ
|
||||
approximated training algorithms. These algorithms build a gradient histogram for each
|
||||
node and iterate through the histogram instead of real dataset. Here we introduce the
|
||||
implementations in XGBoost below.
|
||||
As ``exact`` tree method is slow in computation performance and difficult to scale, we
|
||||
often employ approximated training algorithms. These algorithms build a gradient
|
||||
histogram for each node and iterate through the histogram instead of real dataset. Here
|
||||
we introduce the implementations in XGBoost.
|
||||
|
||||
1. ``approx`` tree method: An approximation tree method described in `reference paper
|
||||
<http://arxiv.org/abs/1603.02754>`_. It runs sketching before building each tree
|
||||
@ -48,22 +48,18 @@ implementations in XGBoost below.
|
||||
this global sketch. This is the fastest algorithm as it runs sketching only once. The
|
||||
algorithm can be accessed by setting ``tree_method`` to ``hist``.
|
||||
|
||||
3. ``gpu_hist`` tree method: The ``gpu_hist`` tree method is a GPU implementation of
|
||||
``hist``, with additional support for gradient based sampling. The algorithm can be
|
||||
accessed by setting ``tree_method`` to ``gpu_hist``.
|
||||
|
||||
************
|
||||
Implications
|
||||
************
|
||||
|
||||
Some objectives like ``reg:squarederror`` have constant hessian. In this case, ``hist``
|
||||
or ``gpu_hist`` should be preferred as weighted sketching doesn't make sense with constant
|
||||
Some objectives like ``reg:squarederror`` have constant hessian. In this case, the
|
||||
``hist`` should be preferred as weighted sketching doesn't make sense with constant
|
||||
weights. When using non-constant hessian objectives, sometimes ``approx`` yields better
|
||||
accuracy, but with slower computation performance. Most of the time using ``(gpu)_hist``
|
||||
with higher ``max_bin`` can achieve similar or even superior accuracy while maintaining
|
||||
good performance. However, as xgboost is largely driven by community effort, the actual
|
||||
implementations have some differences than pure math description. Result might have
|
||||
slight differences than expectation, which we are currently trying to overcome.
|
||||
accuracy, but with slower computation performance. Most of the time using ``hist`` with
|
||||
higher ``max_bin`` can achieve similar or even superior accuracy while maintaining good
|
||||
performance. However, as xgboost is largely driven by community effort, the actual
|
||||
implementations have some differences than pure math description. Result might be
|
||||
slightly different than expectation, which we are currently trying to overcome.
|
||||
|
||||
**************
|
||||
Other Updaters
|
||||
@ -106,8 +102,8 @@ solely for the interest of documentation.
|
||||
histogram creation step and uses sketching values directly during split evaluation. It
|
||||
was never tested and contained some unknown bugs, we decided to remove it and focus our
|
||||
resources on more promising algorithms instead. For accuracy, most of the time
|
||||
``approx``, ``hist`` and ``gpu_hist`` are enough with some parameters tuning, so
|
||||
removing them don't have any real practical impact.
|
||||
``approx`` and ``hist`` are enough with some parameters tuning, so removing them don't
|
||||
have any real practical impact.
|
||||
|
||||
3. ``grow_local_histmaker`` updater: An approximation tree method described in `reference
|
||||
paper <http://arxiv.org/abs/1603.02754>`_. This updater was rarely used in practice so
|
||||
|
||||
@ -149,7 +149,7 @@ Also for inplace prediction:
|
||||
.. code-block:: python
|
||||
|
||||
# where X is a dask DataFrame or dask Array backed by cupy or cuDF.
|
||||
booster.set_param({"gpu_id": "0"})
|
||||
booster.set_param({"device": "cuda:0"})
|
||||
prediction = xgb.dask.inplace_predict(client, booster, X)
|
||||
|
||||
When input is ``da.Array`` object, output is always ``da.Array``. However, if the input
|
||||
|
||||
@ -163,7 +163,7 @@ Will print out something similar to (not actual output as it's too long for demo
|
||||
{
|
||||
"Learner": {
|
||||
"generic_parameter": {
|
||||
"gpu_id": "0",
|
||||
"device": "cuda:0",
|
||||
"gpu_page_size": "0",
|
||||
"n_jobs": "0",
|
||||
"random_state": "0",
|
||||
|
||||
@ -119,7 +119,7 @@ using bst_group_t = std::uint32_t; // NOLINT
|
||||
*/
|
||||
using bst_target_t = std::uint32_t; // NOLINT
|
||||
/**
|
||||
* brief Type for indexing boosted layers.
|
||||
* @brief Type for indexing boosted layers.
|
||||
*/
|
||||
using bst_layer_t = std::int32_t; // NOLINT
|
||||
/**
|
||||
|
||||
@ -12,12 +12,18 @@
|
||||
#include <cstdint> // for int16_t, int32_t, int64_t
|
||||
#include <memory> // for shared_ptr
|
||||
#include <string> // for string, to_string
|
||||
#include <type_traits> // for invoke_result_t, is_same_v
|
||||
#include <type_traits> // for invoke_result_t, is_same_v, underlying_type_t
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
struct CUDAContext;
|
||||
|
||||
// symbolic names
|
||||
struct DeviceSym {
|
||||
static auto constexpr CPU() { return "cpu"; }
|
||||
static auto constexpr CUDA() { return "cuda"; }
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A type for device ordinal. The type is packed into 32-bit for efficient use in
|
||||
* viewing types like `linalg::TensorView`.
|
||||
@ -59,9 +65,9 @@ struct DeviceOrd {
|
||||
[[nodiscard]] std::string Name() const {
|
||||
switch (device) {
|
||||
case DeviceOrd::kCPU:
|
||||
return "CPU";
|
||||
return DeviceSym::CPU();
|
||||
case DeviceOrd::kCUDA:
|
||||
return "CUDA:" + std::to_string(ordinal);
|
||||
return DeviceSym::CUDA() + (':' + std::to_string(ordinal));
|
||||
default: {
|
||||
LOG(FATAL) << "Unknown device.";
|
||||
return "";
|
||||
@ -76,26 +82,39 @@ static_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));
|
||||
* @brief Runtime context for XGBoost. Contains information like threads and device.
|
||||
*/
|
||||
struct Context : public XGBoostParameter<Context> {
|
||||
private:
|
||||
std::string device{DeviceSym::CPU()}; // NOLINT
|
||||
// The device object for the current context. We are in the middle of replacing the
|
||||
// `gpu_id` with this device field.
|
||||
DeviceOrd device_{DeviceOrd::CPU()};
|
||||
|
||||
public:
|
||||
// Constant representing the device ID of CPU.
|
||||
static std::int32_t constexpr kCpuId = -1;
|
||||
static bst_d_ordinal_t constexpr kCpuId = -1;
|
||||
static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
|
||||
static std::int64_t constexpr kDefaultSeed = 0;
|
||||
|
||||
public:
|
||||
Context();
|
||||
|
||||
template <typename Container>
|
||||
Args UpdateAllowUnknown(Container const& kwargs) {
|
||||
auto args = XGBoostParameter<Context>::UpdateAllowUnknown(kwargs);
|
||||
this->SetDeviceOrdinal(kwargs);
|
||||
return args;
|
||||
}
|
||||
|
||||
std::int32_t gpu_id{kCpuId};
|
||||
// The number of threads to use if OpenMP is enabled. If equals 0, use the system default.
|
||||
std::int32_t nthread{0}; // NOLINT
|
||||
// stored random seed
|
||||
std::int64_t seed{kDefaultSeed};
|
||||
// whether seed the PRNG each iteration
|
||||
bool seed_per_iteration{false};
|
||||
// number of threads to use if OpenMP is enabled
|
||||
// if equals 0, use system default
|
||||
std::int32_t nthread{0};
|
||||
// primary device, -1 means no gpu.
|
||||
std::int32_t gpu_id{kCpuId};
|
||||
// fail when gpu_id is invalid
|
||||
bool fail_on_invalid_gpu_id{false};
|
||||
bool validate_parameters{false};
|
||||
|
||||
/**
|
||||
* @brief Configure the parameter `gpu_id'.
|
||||
*
|
||||
@ -111,21 +130,19 @@ struct Context : public XGBoostParameter<Context> {
|
||||
/**
|
||||
* @brief Is XGBoost running on CPU?
|
||||
*/
|
||||
[[nodiscard]] bool IsCPU() const { return gpu_id == kCpuId; }
|
||||
[[nodiscard]] bool IsCPU() const { return Device().IsCPU(); }
|
||||
/**
|
||||
* @brief Is XGBoost running on a CUDA device?
|
||||
*/
|
||||
[[nodiscard]] bool IsCUDA() const { return !IsCPU(); }
|
||||
[[nodiscard]] bool IsCUDA() const { return Device().IsCUDA(); }
|
||||
/**
|
||||
* @brief Get the current device and ordinal.
|
||||
*/
|
||||
[[nodiscard]] DeviceOrd Device() const {
|
||||
return IsCPU() ? DeviceOrd::CPU() : DeviceOrd::CUDA(static_cast<bst_d_ordinal_t>(gpu_id));
|
||||
}
|
||||
[[nodiscard]] DeviceOrd Device() const { return device_; }
|
||||
/**
|
||||
* @brief Get the CUDA device ordinal. -1 if XGBoost is running on CPU.
|
||||
*/
|
||||
[[nodiscard]] bst_d_ordinal_t Ordinal() const { return this->gpu_id; }
|
||||
[[nodiscard]] bst_d_ordinal_t Ordinal() const { return Device().ordinal; }
|
||||
/**
|
||||
* @brief Name of the current device.
|
||||
*/
|
||||
@ -134,24 +151,22 @@ struct Context : public XGBoostParameter<Context> {
|
||||
* @brief Get a CUDA device context for allocator and stream.
|
||||
*/
|
||||
[[nodiscard]] CUDAContext const* CUDACtx() const;
|
||||
|
||||
/**
|
||||
* @brief Make a CUDA context based on the current context.
|
||||
*
|
||||
* @param ordinal The CUDA device ordinal.
|
||||
*/
|
||||
[[nodiscard]] Context MakeCUDA(std::int32_t ordinal = 0) const {
|
||||
[[nodiscard]] Context MakeCUDA(bst_d_ordinal_t ordinal = 0) const {
|
||||
Context ctx = *this;
|
||||
CHECK_GE(ordinal, 0);
|
||||
ctx.gpu_id = ordinal;
|
||||
return ctx;
|
||||
return ctx.SetDevice(DeviceOrd::CUDA(ordinal));
|
||||
}
|
||||
/**
|
||||
* @brief Make a CPU context based on the current context.
|
||||
*/
|
||||
[[nodiscard]] Context MakeCPU() const {
|
||||
Context ctx = *this;
|
||||
ctx.gpu_id = kCpuId;
|
||||
return ctx;
|
||||
return ctx.SetDevice(DeviceOrd::CPU());
|
||||
}
|
||||
/**
|
||||
* @brief Call function based on the current device.
|
||||
@ -167,7 +182,8 @@ struct Context : public XGBoostParameter<Context> {
|
||||
default:
|
||||
// Do not use the device name as this is likely an internal error, the name
|
||||
// wouldn't be valid.
|
||||
LOG(FATAL) << "Unknown device type:" << static_cast<std::int16_t>(this->Device().device);
|
||||
LOG(FATAL) << "Unknown device type:"
|
||||
<< static_cast<std::underlying_type_t<DeviceOrd::Type>>(this->Device().device);
|
||||
break;
|
||||
}
|
||||
return std::invoke_result_t<CPUFn>();
|
||||
@ -182,11 +198,9 @@ struct Context : public XGBoostParameter<Context> {
|
||||
DMLC_DECLARE_FIELD(seed_per_iteration)
|
||||
.set_default(false)
|
||||
.describe("Seed PRNG determnisticly via iterator number.");
|
||||
DMLC_DECLARE_FIELD(device).set_default(DeviceSym::CPU()).describe("Device ordinal.");
|
||||
DMLC_DECLARE_FIELD(nthread).set_default(0).describe("Number of threads to use.");
|
||||
DMLC_DECLARE_ALIAS(nthread, n_jobs);
|
||||
|
||||
DMLC_DECLARE_FIELD(gpu_id).set_default(-1).set_lower_bound(-1).describe(
|
||||
"The primary GPU device ordinal.");
|
||||
DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
|
||||
.set_default(false)
|
||||
.describe("Fail with error when gpu_id is invalid.");
|
||||
@ -196,6 +210,14 @@ struct Context : public XGBoostParameter<Context> {
|
||||
}
|
||||
|
||||
private:
|
||||
void SetDeviceOrdinal(Args const& kwargs);
|
||||
Context& SetDevice(DeviceOrd d) {
|
||||
this->device_ = d;
|
||||
this->gpu_id = d.ordinal; // this can be removed once we move away from `gpu_id`.
|
||||
this->device = d.Name();
|
||||
return *this;
|
||||
}
|
||||
|
||||
// mutable for lazy cuda context initialization. This avoids initializing CUDA at load.
|
||||
// shared_ptr is used instead of unique_ptr as with unique_ptr it's difficult to define
|
||||
// p_impl while trying to hide CUDA code from the host compiler.
|
||||
|
||||
@ -664,11 +664,11 @@ Object ToJson(Parameter const& param) {
|
||||
template <typename Parameter>
|
||||
Args FromJson(Json const& obj, Parameter* param) {
|
||||
auto const& j_param = get<Object const>(obj);
|
||||
std::map<std::string, std::string> m;
|
||||
Args args;
|
||||
for (auto const& kv : j_param) {
|
||||
m[kv.first] = get<String const>(kv.second);
|
||||
args.emplace_back(kv.first, get<String const>(kv.second));
|
||||
}
|
||||
return param->UpdateAllowUnknown(m);
|
||||
return param->UpdateAllowUnknown(args);
|
||||
}
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_JSON_H_
|
||||
|
||||
@ -110,15 +110,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
||||
* \param approx_contribs whether to approximate the feature contributions for speed
|
||||
* \param pred_interactions whether to compute the feature pair contributions
|
||||
*/
|
||||
virtual void Predict(std::shared_ptr<DMatrix> data,
|
||||
bool output_margin,
|
||||
HostDeviceVector<bst_float> *out_preds,
|
||||
unsigned layer_begin,
|
||||
unsigned layer_end,
|
||||
bool training = false,
|
||||
bool pred_leaf = false,
|
||||
bool pred_contribs = false,
|
||||
bool approx_contribs = false,
|
||||
virtual void Predict(std::shared_ptr<DMatrix> data, bool output_margin,
|
||||
HostDeviceVector<bst_float>* out_preds, bst_layer_t layer_begin,
|
||||
bst_layer_t layer_end, bool training = false, bool pred_leaf = false,
|
||||
bool pred_contribs = false, bool approx_contribs = false,
|
||||
bool pred_interactions = false) = 0;
|
||||
|
||||
/*!
|
||||
@ -132,8 +127,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
||||
* \param layer_end End of booster layer. 0 means do not limit trees.
|
||||
*/
|
||||
virtual void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,
|
||||
HostDeviceVector<bst_float>** out_preds, uint32_t layer_begin,
|
||||
uint32_t layer_end) = 0;
|
||||
HostDeviceVector<float>** out_preds, bst_layer_t layer_begin,
|
||||
bst_layer_t layer_end) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Calculate feature score. See doc in C API for outputs.
|
||||
|
||||
@ -39,9 +39,8 @@ struct PredictionCacheEntry {
|
||||
*
|
||||
* \param v Added versions.
|
||||
*/
|
||||
void Update(std::uint32_t v) {
|
||||
version += v;
|
||||
}
|
||||
void Update(std::uint32_t v) { version += v; }
|
||||
void Reset() { version = 0; }
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@ -280,7 +280,7 @@ object GpuPreXGBoost extends PreXGBoostProvider {
|
||||
// - gpu id
|
||||
// - predictor: Force to gpu predictor since native doesn't save predictor.
|
||||
val gpuId = if (!isLocal) XGBoost.getGPUAddrFromResources else 0
|
||||
booster.setParam("gpu_id", gpuId.toString)
|
||||
booster.setParam("device", s"cuda:$gpuId")
|
||||
logger.info("GPU transform on device: " + gpuId)
|
||||
boosterFlag.isGpuParamsSet = true;
|
||||
}
|
||||
|
||||
@ -326,7 +326,7 @@ object XGBoost extends Serializable {
|
||||
getGPUAddrFromResources
|
||||
}
|
||||
logger.info("Leveraging gpu device " + gpuId + " to train")
|
||||
params = params + ("gpu_id" -> gpuId)
|
||||
params = params + ("device" -> s"cuda:$gpuId")
|
||||
}
|
||||
val booster = if (makeCheckpoint) {
|
||||
SXGBoost.trainAndSaveCheckpoint(
|
||||
|
||||
@ -1393,13 +1393,13 @@ class _ProxyDMatrix(DMatrix):
|
||||
|
||||
|
||||
class QuantileDMatrix(DMatrix):
|
||||
"""A DMatrix variant that generates quantilized data directly from input for
|
||||
``hist`` and ``gpu_hist`` tree methods. This DMatrix is primarily designed to save
|
||||
memory in training by avoiding intermediate storage. Set ``max_bin`` to control the
|
||||
number of bins during quantisation, which should be consistent with the training
|
||||
parameter ``max_bin``. When ``QuantileDMatrix`` is used for validation/test dataset,
|
||||
``ref`` should be another ``QuantileDMatrix``(or ``DMatrix``, but not recommended as
|
||||
it defeats the purpose of saving memory) constructed from training dataset. See
|
||||
"""A DMatrix variant that generates quantilized data directly from input for the
|
||||
``hist`` tree method. This DMatrix is primarily designed to save memory in training
|
||||
by avoiding intermediate storage. Set ``max_bin`` to control the number of bins
|
||||
during quantisation, which should be consistent with the training parameter
|
||||
``max_bin``. When ``QuantileDMatrix`` is used for validation/test dataset, ``ref``
|
||||
should be another ``QuantileDMatrix``(or ``DMatrix``, but not recommended as it
|
||||
defeats the purpose of saving memory) constructed from training dataset. See
|
||||
:py:obj:`xgboost.DMatrix` for documents on meta info.
|
||||
|
||||
.. note::
|
||||
@ -2277,10 +2277,10 @@ class Booster:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
booster.set_param({"gpu_id": "0", "tree_method": "gpu_hist"})
|
||||
booster.set_param({"device": "cuda:0"})
|
||||
booster.inplace_predict(cupy_array)
|
||||
|
||||
booster.set_param({"gpu_id": "-1", "tree_method": "hist"})
|
||||
booster.set_param({"device": "cpu"})
|
||||
booster.inplace_predict(numpy_array)
|
||||
|
||||
.. versionadded:: 1.1.0
|
||||
@ -2311,8 +2311,8 @@ class Booster:
|
||||
Returns
|
||||
-------
|
||||
prediction : numpy.ndarray/cupy.ndarray
|
||||
The prediction result. When input data is on GPU, prediction
|
||||
result is stored in a cupy array.
|
||||
The prediction result. When input data is on GPU, prediction result is
|
||||
stored in a cupy array.
|
||||
|
||||
"""
|
||||
preds = ctypes.POINTER(ctypes.c_float)()
|
||||
|
||||
@ -273,7 +273,7 @@ __model_doc = f"""
|
||||
* For linear model, only "weight" is defined and it's the normalized coefficients
|
||||
without bias.
|
||||
|
||||
gpu_id : Optional[int]
|
||||
device : Optional[str]
|
||||
Device ordinal.
|
||||
validate_parameters : Optional[bool]
|
||||
Give warnings for unknown parameter.
|
||||
@ -647,7 +647,7 @@ class XGBModel(XGBModelBase):
|
||||
monotone_constraints: Optional[Union[Dict[str, int], str]] = None,
|
||||
interaction_constraints: Optional[Union[str, Sequence[Sequence[str]]]] = None,
|
||||
importance_type: Optional[str] = None,
|
||||
gpu_id: Optional[int] = None,
|
||||
device: Optional[str] = None,
|
||||
validate_parameters: Optional[bool] = None,
|
||||
enable_categorical: bool = False,
|
||||
feature_types: Optional[FeatureTypes] = None,
|
||||
@ -693,7 +693,7 @@ class XGBModel(XGBModelBase):
|
||||
self.monotone_constraints = monotone_constraints
|
||||
self.interaction_constraints = interaction_constraints
|
||||
self.importance_type = importance_type
|
||||
self.gpu_id = gpu_id
|
||||
self.device = device
|
||||
self.validate_parameters = validate_parameters
|
||||
self.enable_categorical = enable_categorical
|
||||
self.feature_types = feature_types
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
"""Xgboost pyspark integration submodule for core code."""
|
||||
"""XGBoost pyspark integration submodule for core code."""
|
||||
import base64
|
||||
|
||||
# pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name
|
||||
@ -133,6 +133,7 @@ _inverse_pyspark_param_alias_map = {v: k for k, v in _pyspark_param_alias_map.it
|
||||
|
||||
_unsupported_xgb_params = [
|
||||
"gpu_id", # we have "use_gpu" pyspark param instead.
|
||||
"device", # we have "use_gpu" pyspark param instead.
|
||||
"enable_categorical", # Use feature_types param to specify categorical feature instead
|
||||
"use_label_encoder",
|
||||
"n_jobs", # Do not allow user to set it, will use `spark.task.cpus` value instead.
|
||||
@ -899,12 +900,14 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
||||
|
||||
context = BarrierTaskContext.get()
|
||||
|
||||
gpu_id = None
|
||||
dev_ordinal = None
|
||||
use_hist = booster_params.get("tree_method", None) in ("hist", "gpu_hist")
|
||||
|
||||
if use_gpu:
|
||||
gpu_id = context.partitionId() if is_local else _get_gpu_id(context)
|
||||
booster_params["gpu_id"] = gpu_id
|
||||
dev_ordinal = (
|
||||
context.partitionId() if is_local else _get_gpu_id(context)
|
||||
)
|
||||
booster_params["device"] = "cuda:" + str(dev_ordinal)
|
||||
# If cuDF is not installed, then using DMatrix instead of QDM,
|
||||
# because without cuDF, DMatrix performs better than QDM.
|
||||
# Note: Checking `is_cudf_available` in spark worker side because
|
||||
@ -945,7 +948,7 @@ class _SparkXGBEstimator(Estimator, _SparkXGBParams, MLReadable, MLWritable):
|
||||
dtrain, dvalid = create_dmatrix_from_partitions(
|
||||
pandas_df_iter,
|
||||
feature_prop.features_cols_names,
|
||||
gpu_id,
|
||||
dev_ordinal,
|
||||
use_qdm,
|
||||
dmatrix_kwargs,
|
||||
enable_sparse_data_optim=feature_prop.enable_sparse_data_optim,
|
||||
|
||||
@ -157,7 +157,7 @@ def _read_csr_matrix_from_unwrapped_spark_vec(part: pd.DataFrame) -> csr_matrix:
|
||||
|
||||
def make_qdm(
|
||||
data: Dict[str, List[np.ndarray]],
|
||||
gpu_id: Optional[int],
|
||||
dev_ordinal: Optional[int],
|
||||
meta: Dict[str, Any],
|
||||
ref: Optional[DMatrix],
|
||||
params: Dict[str, Any],
|
||||
@ -165,7 +165,7 @@ def make_qdm(
|
||||
"""Handle empty partition for QuantileDMatrix."""
|
||||
if not data:
|
||||
return QuantileDMatrix(np.empty((0, 0)), ref=ref)
|
||||
it = PartIter(data, gpu_id, **meta)
|
||||
it = PartIter(data, dev_ordinal, **meta)
|
||||
m = QuantileDMatrix(it, **params, ref=ref)
|
||||
return m
|
||||
|
||||
@ -173,7 +173,7 @@ def make_qdm(
|
||||
def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
|
||||
iterator: Iterator[pd.DataFrame],
|
||||
feature_cols: Optional[Sequence[str]],
|
||||
gpu_id: Optional[int],
|
||||
dev_ordinal: Optional[int],
|
||||
use_qdm: bool,
|
||||
kwargs: Dict[str, Any], # use dict to make sure this parameter is passed.
|
||||
enable_sparse_data_optim: bool,
|
||||
@ -187,7 +187,7 @@ def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
|
||||
Pyspark partition iterator.
|
||||
feature_cols:
|
||||
A sequence of feature names, used only when rapids plugin is enabled.
|
||||
gpu_id:
|
||||
dev_ordinal:
|
||||
Device ordinal, used when GPU is enabled.
|
||||
use_qdm :
|
||||
Whether QuantileDMatrix should be used instead of DMatrix.
|
||||
@ -304,13 +304,13 @@ def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
|
||||
|
||||
if feature_cols is not None and use_qdm:
|
||||
cache_partitions(iterator, append_fn)
|
||||
dtrain: DMatrix = make_qdm(train_data, gpu_id, meta, None, params)
|
||||
dtrain: DMatrix = make_qdm(train_data, dev_ordinal, meta, None, params)
|
||||
elif feature_cols is not None and not use_qdm:
|
||||
cache_partitions(iterator, append_fn)
|
||||
dtrain = make(train_data, kwargs)
|
||||
elif feature_cols is None and use_qdm:
|
||||
cache_partitions(iterator, append_fn)
|
||||
dtrain = make_qdm(train_data, gpu_id, meta, None, params)
|
||||
dtrain = make_qdm(train_data, dev_ordinal, meta, None, params)
|
||||
else:
|
||||
cache_partitions(iterator, append_fn)
|
||||
dtrain = make(train_data, kwargs)
|
||||
@ -324,7 +324,7 @@ def create_dmatrix_from_partitions( # pylint: disable=too-many-arguments
|
||||
if has_validation_col:
|
||||
if use_qdm:
|
||||
dvalid: Optional[DMatrix] = make_qdm(
|
||||
valid_data, gpu_id, meta, dtrain, params
|
||||
valid_data, dev_ordinal, meta, dtrain, params
|
||||
)
|
||||
else:
|
||||
dvalid = make(valid_data, kwargs) if has_validation_col else None
|
||||
|
||||
@ -78,8 +78,7 @@ def _set_pyspark_xgb_cls_param_attrs(
|
||||
|
||||
|
||||
class SparkXGBRegressor(_SparkXGBEstimator):
|
||||
"""
|
||||
SparkXGBRegressor is a PySpark ML estimator. It implements the XGBoost regression
|
||||
"""SparkXGBRegressor is a PySpark ML estimator. It implements the XGBoost regression
|
||||
algorithm based on XGBoost python library, and it can be used in PySpark Pipeline
|
||||
and PySpark ML meta algorithms like :py:class:`~pyspark.ml.tuning.CrossValidator`/
|
||||
:py:class:`~pyspark.ml.tuning.TrainValidationSplit`/
|
||||
@ -89,8 +88,8 @@ class SparkXGBRegressor(_SparkXGBEstimator):
|
||||
:py:class:`xgboost.XGBRegressor` constructor and most of the parameters used in
|
||||
:py:meth:`xgboost.XGBRegressor.fit` and :py:meth:`xgboost.XGBRegressor.predict` method.
|
||||
|
||||
SparkXGBRegressor doesn't support setting `gpu_id` but support another param `use_gpu`,
|
||||
see doc below for more details.
|
||||
SparkXGBRegressor doesn't support setting `device` but supports another param
|
||||
`use_gpu`, see doc below for more details.
|
||||
|
||||
SparkXGBRegressor doesn't support setting `base_margin` explicitly as well, but support
|
||||
another param called `base_margin_col`. see doc below for more details.
|
||||
@ -247,8 +246,8 @@ class SparkXGBClassifier(_SparkXGBEstimator, HasProbabilityCol, HasRawPrediction
|
||||
:py:class:`xgboost.XGBClassifier` constructor and most of the parameters used in
|
||||
:py:meth:`xgboost.XGBClassifier.fit` and :py:meth:`xgboost.XGBClassifier.predict` method.
|
||||
|
||||
SparkXGBClassifier doesn't support setting `gpu_id` but support another param `use_gpu`,
|
||||
see doc below for more details.
|
||||
SparkXGBClassifier doesn't support setting `device` but support another param
|
||||
`use_gpu`, see doc below for more details.
|
||||
|
||||
SparkXGBClassifier doesn't support setting `base_margin` explicitly as well, but support
|
||||
another param called `base_margin_col`. see doc below for more details.
|
||||
@ -423,7 +422,7 @@ class SparkXGBRanker(_SparkXGBEstimator):
|
||||
:py:class:`xgboost.XGBRanker` constructor and most of the parameters used in
|
||||
:py:meth:`xgboost.XGBRanker.fit` and :py:meth:`xgboost.XGBRanker.predict` method.
|
||||
|
||||
SparkXGBRanker doesn't support setting `gpu_id` but support another param `use_gpu`,
|
||||
SparkXGBRanker doesn't support setting `device` but support another param `use_gpu`,
|
||||
see doc below for more details.
|
||||
|
||||
SparkXGBRanker doesn't support setting `base_margin` explicitly as well, but support
|
||||
|
||||
@ -723,24 +723,6 @@ def predictor_equal(lhs: xgb.DMatrix, rhs: xgb.DMatrix) -> bool:
|
||||
M = TypeVar("M", xgb.Booster, xgb.XGBModel)
|
||||
|
||||
|
||||
def set_ordinal(ordinal: int, booster: M) -> M:
|
||||
"""Temporary solution for setting the device ordinal until we move away from
|
||||
`gpu_id`.
|
||||
|
||||
"""
|
||||
if ordinal < 0:
|
||||
params = {"gpu_id": -1, "tree_method": "hist"}
|
||||
else:
|
||||
params = {"gpu_id": ordinal, "tree_method": "gpu_hist"}
|
||||
|
||||
if isinstance(booster, xgb.Booster):
|
||||
booster.set_param(params)
|
||||
elif isinstance(booster, xgb.XGBModel):
|
||||
booster.set_params(**params)
|
||||
|
||||
return booster
|
||||
|
||||
|
||||
def eval_error_metric(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, np.float64]:
|
||||
"""Evaluation metric for xgb.train"""
|
||||
label = dtrain.get_label()
|
||||
|
||||
@ -117,10 +117,7 @@ int InplacePreidctCUDA(BoosterHandle handle, char const *c_array_interface,
|
||||
RequiredArg<Integer>(config, "iteration_begin", __func__),
|
||||
RequiredArg<Integer>(config, "iteration_end", __func__));
|
||||
CHECK(p_predt);
|
||||
if (learner->Ctx()->IsCPU()) {
|
||||
// Prediction using DMatrix as fallback.
|
||||
CHECK(p_predt->HostCanRead() && !p_predt->DeviceCanRead());
|
||||
} else {
|
||||
if (learner->Ctx()->IsCUDA()) {
|
||||
CHECK(p_predt->DeviceCanRead() && !p_predt->HostCanRead());
|
||||
}
|
||||
p_predt->SetDevice(proxy->DeviceIdx());
|
||||
|
||||
@ -3,23 +3,18 @@
|
||||
*/
|
||||
#include "error_msg.h"
|
||||
|
||||
#include "../collective/communicator-inl.h" // for GetRank
|
||||
#include "xgboost/logging.h"
|
||||
|
||||
namespace xgboost::error {
|
||||
void WarnDeprecatedGPUHist() {
|
||||
bool static thread_local logged{false};
|
||||
if (logged) {
|
||||
return;
|
||||
}
|
||||
auto msg =
|
||||
"The tree method `gpu_hist` is deprecated since 2.0.0. To use GPU training, set the `device` "
|
||||
R"(parameter to CUDA instead.
|
||||
|
||||
E.g. tree_method = "hist", device = "CUDA"
|
||||
|
||||
)";
|
||||
LOG(WARNING) << msg;
|
||||
logged = true;
|
||||
}
|
||||
|
||||
void WarnManualUpdater() {
|
||||
@ -33,4 +28,23 @@ void WarnManualUpdater() {
|
||||
"behavior. For common uses, we recommend using `tree_method` parameter instead.";
|
||||
logged = true;
|
||||
}
|
||||
|
||||
void WarnDeprecatedGPUId() {
|
||||
static thread_local bool logged{false};
|
||||
if (logged) {
|
||||
return;
|
||||
}
|
||||
LOG(WARNING) << "`gpu_id` is deprecated in favor of the new `device` parameter: "
|
||||
<< "device = cpu/cuda/cuda:0";
|
||||
logged = true;
|
||||
}
|
||||
|
||||
void WarnEmptyDataset() {
|
||||
static thread_local bool logged{false};
|
||||
if (logged) {
|
||||
return;
|
||||
}
|
||||
LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank();
|
||||
logged = true;
|
||||
}
|
||||
} // namespace xgboost::error
|
||||
|
||||
@ -82,5 +82,9 @@ inline void WarnOldSerialization() {
|
||||
void WarnDeprecatedGPUHist();
|
||||
|
||||
void WarnManualUpdater();
|
||||
|
||||
void WarnDeprecatedGPUId();
|
||||
|
||||
void WarnEmptyDataset();
|
||||
} // namespace xgboost::error
|
||||
#endif // XGBOOST_COMMON_ERROR_MSG_H_
|
||||
|
||||
212
src/context.cc
212
src/context.cc
@ -3,53 +3,201 @@
|
||||
*
|
||||
* \brief Context object used for controlling runtime parameters.
|
||||
*/
|
||||
#include <xgboost/context.h>
|
||||
#include "xgboost/context.h"
|
||||
|
||||
#include <algorithm> // for find_if
|
||||
#include <charconv> // for from_chars
|
||||
#include <iterator> // for distance
|
||||
#include <optional> // for optional
|
||||
#include <regex> // for regex_replace, regex_match
|
||||
|
||||
#include "common/common.h" // AssertGPUSupport
|
||||
#include "common/error_msg.h" // WarnDeprecatedGPUId
|
||||
#include "common/threading_utils.h"
|
||||
#include "xgboost/string_view.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
DMLC_REGISTER_PARAMETER(Context);
|
||||
|
||||
std::int32_t constexpr Context::kCpuId;
|
||||
bst_d_ordinal_t constexpr Context::kCpuId;
|
||||
std::int64_t constexpr Context::kDefaultSeed;
|
||||
|
||||
Context::Context() : cfs_cpu_count_{common::GetCfsCPUCount()} {}
|
||||
|
||||
void Context::ConfigureGpuId(bool require_gpu) {
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
if (gpu_id == kCpuId) { // 0. User didn't specify the `gpu_id'
|
||||
if (require_gpu) { // 1. `tree_method' or `predictor' or both are using
|
||||
// GPU.
|
||||
// 2. Use device 0 as default.
|
||||
this->UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
}
|
||||
}
|
||||
namespace {
|
||||
inline constexpr char const* kDevice = "device";
|
||||
|
||||
// 3. When booster is loaded from a memory image (Python pickle or R
|
||||
// raw model), number of available GPUs could be different. Wrap around it.
|
||||
int32_t n_gpus = common::AllVisibleGPUs();
|
||||
if (n_gpus == 0) {
|
||||
if (gpu_id != kCpuId) {
|
||||
LOG(WARNING) << "No visible GPU is found, setting `gpu_id` to -1";
|
||||
}
|
||||
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
|
||||
} else if (fail_on_invalid_gpu_id) {
|
||||
CHECK(gpu_id == kCpuId || gpu_id < n_gpus)
|
||||
<< "Only " << n_gpus << " GPUs are visible, gpu_id " << gpu_id << " is invalid.";
|
||||
} else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
|
||||
LOG(WARNING) << "Only " << n_gpus << " GPUs are visible, setting `gpu_id` to "
|
||||
<< gpu_id % n_gpus;
|
||||
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(gpu_id % n_gpus)}});
|
||||
}
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
DeviceOrd CUDAOrdinal(DeviceOrd device, bool) {
|
||||
device = DeviceOrd::CPU();
|
||||
return device;
|
||||
}
|
||||
#else
|
||||
// Just set it to CPU, don't think about it.
|
||||
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
|
||||
(void)(require_gpu);
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
// Check CUDA on the current device, wrap the ordinal if necessary.
|
||||
[[nodiscard]] DeviceOrd CUDAOrdinal(DeviceOrd device, bool fail_on_invalid) {
|
||||
// When booster is loaded from a memory image (Python pickle or R raw model), number of
|
||||
// available GPUs could be different. Wrap around it.
|
||||
std::int32_t n_visible = common::AllVisibleGPUs();
|
||||
if (n_visible == 0) {
|
||||
if (device.IsCUDA()) {
|
||||
LOG(WARNING) << "No visible GPU is found, setting device to CPU.";
|
||||
}
|
||||
device = DeviceOrd::CPU();
|
||||
} else if (fail_on_invalid) {
|
||||
CHECK(device.IsCPU() || device.ordinal < n_visible)
|
||||
<< "Only " << n_visible << " GPUs are visible, ordinal " << device.ordinal
|
||||
<< " is invalid.";
|
||||
} else if (device.IsCUDA() && device.ordinal >= n_visible) {
|
||||
device.ordinal = device.ordinal % n_visible;
|
||||
LOG(WARNING) << "Only " << n_visible << " GPUs are visible, setting device ordinal to "
|
||||
<< device.ordinal;
|
||||
}
|
||||
|
||||
common::SetDevice(this->gpu_id);
|
||||
if (device.IsCUDA()) {
|
||||
common::SetDevice(device.ordinal);
|
||||
}
|
||||
return device;
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
[[nodiscard]] std::optional<std::int32_t> ParseInt(StringView ordinal) {
|
||||
// Some basic checks to ensure valid `gpu_id` and device ordinal instead of directly parsing and
|
||||
// letting go of unknown characters.
|
||||
if (ordinal.empty()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::size_t offset{0};
|
||||
if (ordinal[0] == '-') {
|
||||
offset = 1;
|
||||
}
|
||||
if (ordinal.size() <= offset) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
bool valid = std::all_of(ordinal.cbegin() + offset, ordinal.cend(),
|
||||
[](auto c) { return std::isdigit(c); });
|
||||
if (!valid) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::int32_t parsed_id{Context::kCpuId};
|
||||
auto res = std::from_chars(ordinal.c_str(), ordinal.c_str() + ordinal.size(), parsed_id);
|
||||
if (res.ec != std::errc()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return parsed_id;
|
||||
}
|
||||
|
||||
[[nodiscard]] DeviceOrd MakeDeviceOrd(std::string const& input, bool fail_on_invalid_gpu_id) {
|
||||
StringView msg{R"(Invalid argument for `device`. Expected to be one of the following:
|
||||
- cpu
|
||||
- cuda
|
||||
- cuda:<device ordinal> # e.g. cuda:0
|
||||
- gpu
|
||||
- gpu:<device ordinal> # e.g. gpu:0
|
||||
)"};
|
||||
auto fatal = [&] { LOG(FATAL) << msg << "Got: `" << input << "`."; };
|
||||
|
||||
#if defined(__MINGW32__)
|
||||
// mingw hangs on regex using rtools 430. Basic checks only.
|
||||
CHECK_GE(input.size(), 3) << msg;
|
||||
auto substr = input.substr(0, 3);
|
||||
bool valid = substr == "cpu" || substr == "cud" || substr == "gpu";
|
||||
CHECK(valid) << msg;
|
||||
#else
|
||||
std::regex pattern{"gpu(:[0-9]+)?|cuda(:[0-9]+)?|cpu"};
|
||||
if (!std::regex_match(input, pattern)) {
|
||||
fatal();
|
||||
}
|
||||
#endif // defined(__MINGW32__)
|
||||
|
||||
// handle alias
|
||||
std::string s_device = std::regex_replace(input, std::regex{"gpu"}, DeviceSym::CUDA());
|
||||
|
||||
auto split_it = std::find(s_device.cbegin(), s_device.cend(), ':');
|
||||
DeviceOrd device;
|
||||
device.ordinal = Context::InvalidOrdinal(); // mark it invalid for check.
|
||||
if (split_it == s_device.cend()) {
|
||||
// no ordinal.
|
||||
if (s_device == DeviceSym::CPU()) {
|
||||
device = DeviceOrd::CPU();
|
||||
} else if (s_device == DeviceSym::CUDA()) {
|
||||
device = DeviceOrd::CUDA(0); // use 0 as default;
|
||||
} else {
|
||||
fatal();
|
||||
}
|
||||
} else {
|
||||
// must be CUDA when ordinal is specifed.
|
||||
// +1 for colon
|
||||
std::size_t offset = std::distance(s_device.cbegin(), split_it) + 1;
|
||||
// substr
|
||||
StringView s_ordinal = {s_device.data() + offset, s_device.size() - offset};
|
||||
if (s_ordinal.empty()) {
|
||||
fatal();
|
||||
}
|
||||
auto opt_id = ParseInt(s_ordinal);
|
||||
if (!opt_id.has_value()) {
|
||||
fatal();
|
||||
}
|
||||
CHECK_LE(opt_id.value(), std::numeric_limits<bst_d_ordinal_t>::max())
|
||||
<< "Ordinal value too large.";
|
||||
device = DeviceOrd::CUDA(opt_id.value());
|
||||
}
|
||||
|
||||
if (device.ordinal < Context::kCpuId) {
|
||||
fatal();
|
||||
}
|
||||
device = CUDAOrdinal(device, fail_on_invalid_gpu_id);
|
||||
|
||||
return device;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void Context::ConfigureGpuId(bool require_gpu) {
|
||||
if (this->IsCPU() && require_gpu) {
|
||||
this->UpdateAllowUnknown(Args{{kDevice, DeviceSym::CUDA()}});
|
||||
}
|
||||
}
|
||||
|
||||
void Context::SetDeviceOrdinal(Args const& kwargs) {
|
||||
auto gpu_id_it = std::find_if(kwargs.cbegin(), kwargs.cend(),
|
||||
[](auto const& p) { return p.first == "gpu_id"; });
|
||||
auto has_gpu_id = gpu_id_it != kwargs.cend();
|
||||
auto device_it = std::find_if(kwargs.cbegin(), kwargs.cend(),
|
||||
[](auto const& p) { return p.first == kDevice; });
|
||||
auto has_device = device_it != kwargs.cend();
|
||||
if (has_device && has_gpu_id) {
|
||||
LOG(FATAL) << "Both `device` and `gpu_id` are specified. Use `device` instead.";
|
||||
}
|
||||
|
||||
if (has_gpu_id) {
|
||||
// Compatible with XGBoost < 2.0.0
|
||||
error::WarnDeprecatedGPUId();
|
||||
auto opt_id = ParseInt(StringView{gpu_id_it->second});
|
||||
CHECK(opt_id.has_value()) << "Invalid value for `gpu_id`. Got:" << gpu_id_it->second;
|
||||
if (opt_id.value() > Context::kCpuId) {
|
||||
this->UpdateAllowUnknown(Args{{kDevice, DeviceOrd::CUDA(opt_id.value()).Name()}});
|
||||
} else {
|
||||
this->UpdateAllowUnknown(Args{{kDevice, DeviceOrd::CPU().Name()}});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
auto new_d = MakeDeviceOrd(this->device, this->fail_on_invalid_gpu_id);
|
||||
|
||||
if (!has_device) {
|
||||
CHECK_EQ(new_d.ordinal, this->device_.ordinal); // unchanged
|
||||
}
|
||||
this->SetDevice(new_d);
|
||||
|
||||
if (this->IsCPU()) {
|
||||
CHECK_EQ(this->device_.ordinal, kCpuId);
|
||||
} else {
|
||||
CHECK_GT(this->device_.ordinal, kCpuId);
|
||||
}
|
||||
}
|
||||
|
||||
std::int32_t Context::Threads() const {
|
||||
|
||||
@ -33,10 +33,11 @@ IterativeDMatrix::IterativeDMatrix(DataIterHandle iter_handle, DMatrixHandle pro
|
||||
bool valid = iter.Next();
|
||||
CHECK(valid) << "Iterative DMatrix must have at least 1 batch.";
|
||||
|
||||
auto d = MakeProxy(proxy_)->DeviceIdx();
|
||||
auto pctx = MakeProxy(proxy_)->Ctx();
|
||||
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(d)}});
|
||||
ctx.UpdateAllowUnknown(
|
||||
Args{{"nthread", std::to_string(nthread)}, {"device", pctx->DeviceName()}});
|
||||
// hardcoded parameter.
|
||||
BatchParam p{max_bin, tree::TrainParam::DftSparseThreshold()};
|
||||
|
||||
|
||||
@ -54,6 +54,7 @@ std::shared_ptr<DMatrix> CreateDMatrixFromProxy(Context const *ctx,
|
||||
p_fmat = cuda_impl::CreateDMatrixFromProxy(ctx, proxy, missing);
|
||||
}
|
||||
|
||||
CHECK(p_fmat) << "Failed to fallback.";
|
||||
return p_fmat;
|
||||
}
|
||||
} // namespace xgboost::data
|
||||
|
||||
@ -7,28 +7,31 @@
|
||||
|
||||
namespace xgboost::data {
|
||||
void DMatrixProxy::FromCudaColumnar(StringView interface_str) {
|
||||
std::shared_ptr<data::CudfAdapter> adapter{new CudfAdapter{interface_str}};
|
||||
auto const& value = adapter->Value();
|
||||
auto adapter{std::make_shared<CudfAdapter>(interface_str)};
|
||||
this->batch_ = adapter;
|
||||
ctx_.gpu_id = adapter->DeviceIdx();
|
||||
this->Info().num_col_ = adapter->NumColumns();
|
||||
this->Info().num_row_ = adapter->NumRows();
|
||||
if (ctx_.gpu_id < 0) {
|
||||
if (adapter->DeviceIdx() < 0) {
|
||||
// empty data
|
||||
CHECK_EQ(this->Info().num_row_, 0);
|
||||
ctx_.gpu_id = dh::CurrentDevice();
|
||||
ctx_ = ctx_.MakeCUDA(dh::CurrentDevice());
|
||||
return;
|
||||
}
|
||||
ctx_ = ctx_.MakeCUDA(adapter->DeviceIdx());
|
||||
}
|
||||
|
||||
void DMatrixProxy::FromCudaArray(StringView interface_str) {
|
||||
std::shared_ptr<CupyAdapter> adapter(new CupyAdapter{StringView{interface_str}});
|
||||
auto adapter(std::make_shared<CupyAdapter>(StringView{interface_str}));
|
||||
this->batch_ = adapter;
|
||||
ctx_.gpu_id = adapter->DeviceIdx();
|
||||
this->Info().num_col_ = adapter->NumColumns();
|
||||
this->Info().num_row_ = adapter->NumRows();
|
||||
if (ctx_.gpu_id < 0) {
|
||||
if (adapter->DeviceIdx() < 0) {
|
||||
// empty data
|
||||
CHECK_EQ(this->Info().num_row_, 0);
|
||||
ctx_.gpu_id = dh::CurrentDevice();
|
||||
ctx_ = ctx_.MakeCUDA(dh::CurrentDevice());
|
||||
return;
|
||||
}
|
||||
ctx_ = ctx_.MakeCUDA(adapter->DeviceIdx());
|
||||
}
|
||||
|
||||
namespace cuda_impl {
|
||||
|
||||
@ -27,7 +27,7 @@ SimpleDMatrix::SimpleDMatrix(AdapterT* adapter, float missing, std::int32_t nthr
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
|
||||
Context ctx;
|
||||
ctx.Init(Args{{"nthread", std::to_string(nthread)}, {"gpu_id", std::to_string(device)}});
|
||||
ctx.Init(Args{{"nthread", std::to_string(nthread)}, {"device", DeviceOrd::CUDA(device).Name()}});
|
||||
|
||||
CHECK(adapter->NumRows() != kAdapterUnknownSize);
|
||||
CHECK(adapter->NumColumns() != kAdapterUnknownSize);
|
||||
|
||||
@ -84,6 +84,25 @@ bool UpdatersMatched(std::vector<std::string> updater_seq,
|
||||
return name == up->Name();
|
||||
});
|
||||
}
|
||||
|
||||
void MismatchedDevices(Context const* booster, Context const* data) {
|
||||
bool thread_local static logged{false};
|
||||
if (logged) {
|
||||
return;
|
||||
}
|
||||
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. This might "
|
||||
"lead to higher memory usage and slower performance. XGBoost is running on: "
|
||||
<< booster->DeviceName() << ", while the input data is on: " << data->DeviceName()
|
||||
<< ".\n"
|
||||
<< R"(Potential solutions:
|
||||
- Use a data structure that matches the device ordinal in the booster.
|
||||
- Set the device for booster before call to inplace_predict.
|
||||
|
||||
This warning will only be shown once, and subsequent warnings made by the current thread will be
|
||||
suppressed.
|
||||
)";
|
||||
logged = true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void GBTree::Configure(Args const& cfg) {
|
||||
@ -208,6 +227,7 @@ void GBTree::DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
|
||||
bst_target_t const n_groups = model_.learner_model_param->OutputLength();
|
||||
monitor_.Start("BoostNewTrees");
|
||||
|
||||
predt->predictions.SetDevice(ctx_->Ordinal());
|
||||
auto out = linalg::MakeTensorView(ctx_, &predt->predictions, p_fmat->Info().num_row_,
|
||||
model_.learner_model_param->OutputLength());
|
||||
CHECK_NE(n_groups, 0);
|
||||
@ -521,18 +541,6 @@ void GBTree::PredictBatchImpl(DMatrix* p_fmat, PredictionCacheEntry* out_preds,
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
inline void MismatchedDevices(Context const* booster, Context const* data) {
|
||||
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. XGBoost "
|
||||
<< "is running on: " << booster->DeviceName()
|
||||
<< ", while the input data is on: " << data->DeviceName() << ".\n"
|
||||
<< R"(Potential solutions:
|
||||
- Use a data structure that matches the device ordinal in the booster.
|
||||
- Set the device for booster before call to inplace_predict.
|
||||
)";
|
||||
}
|
||||
}; // namespace
|
||||
|
||||
void GBTree::PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* out_preds, bool is_training,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end) {
|
||||
// dispatch to const function.
|
||||
|
||||
@ -40,7 +40,7 @@
|
||||
#include "common/api_entry.h" // for XGBAPIThreadLocalEntry
|
||||
#include "common/charconv.h" // for to_chars, to_chars_result, NumericLimits, from_...
|
||||
#include "common/common.h" // for ToString, Split
|
||||
#include "common/error_msg.h" // for MaxFeatureSize, WarnOldSerialization
|
||||
#include "common/error_msg.h" // for MaxFeatureSize, WarnOldSerialization, ...
|
||||
#include "common/io.h" // for PeekableInStream, ReadAll, FixedSizeStream, Mem...
|
||||
#include "common/observer.h" // for TrainingObserver
|
||||
#include "common/random.h" // for GlobalRandom
|
||||
@ -711,6 +711,7 @@ class LearnerConfiguration : public Learner {
|
||||
// FIXME(trivialfis): Make eval_metric a training parameter.
|
||||
keys.emplace_back(kEvalMetric);
|
||||
keys.emplace_back("num_output_group");
|
||||
keys.emplace_back("gpu_id"); // deprecated param.
|
||||
|
||||
std::sort(keys.begin(), keys.end());
|
||||
|
||||
@ -1340,10 +1341,9 @@ class LearnerImpl : public LearnerIO {
|
||||
}
|
||||
|
||||
void Predict(std::shared_ptr<DMatrix> data, bool output_margin,
|
||||
HostDeviceVector<bst_float> *out_preds, unsigned layer_begin,
|
||||
unsigned layer_end, bool training,
|
||||
bool pred_leaf, bool pred_contribs, bool approx_contribs,
|
||||
bool pred_interactions) override {
|
||||
HostDeviceVector<bst_float>* out_preds, bst_layer_t layer_begin,
|
||||
bst_layer_t layer_end, bool training, bool pred_leaf, bool pred_contribs,
|
||||
bool approx_contribs, bool pred_interactions) override {
|
||||
int multiple_predictions = static_cast<int>(pred_leaf) +
|
||||
static_cast<int>(pred_interactions) +
|
||||
static_cast<int>(pred_contribs);
|
||||
@ -1391,15 +1391,16 @@ class LearnerImpl : public LearnerIO {
|
||||
}
|
||||
|
||||
void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,
|
||||
HostDeviceVector<bst_float>** out_preds, uint32_t iteration_begin,
|
||||
uint32_t iteration_end) override {
|
||||
HostDeviceVector<float>** out_preds, bst_layer_t iteration_begin,
|
||||
bst_layer_t iteration_end) override {
|
||||
this->Configure();
|
||||
this->CheckModelInitialized();
|
||||
|
||||
auto& out_predictions = this->GetThreadLocal().prediction_entry;
|
||||
out_predictions.version = 0;
|
||||
out_predictions.Reset();
|
||||
|
||||
this->gbm_->InplacePredict(p_m, missing, &out_predictions, iteration_begin, iteration_end);
|
||||
|
||||
if (type == PredictionType::kValue) {
|
||||
obj_->PredTransform(&out_predictions.predictions);
|
||||
} else if (type == PredictionType::kMargin) {
|
||||
@ -1454,7 +1455,7 @@ class LearnerImpl : public LearnerIO {
|
||||
}
|
||||
|
||||
if (p_fmat->Info().num_row_ == 0) {
|
||||
LOG(WARNING) << "Empty dataset at worker: " << collective::GetRank();
|
||||
error::WarnEmptyDataset();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -28,6 +28,7 @@ class LintersPaths:
|
||||
"tests/python-gpu/test_gpu_prediction.py",
|
||||
"tests/python-gpu/load_pickle.py",
|
||||
"tests/python-gpu/test_gpu_pickling.py",
|
||||
"tests/python-gpu/test_gpu_eval_metrics.py",
|
||||
"tests/test_distributed/test_with_spark/",
|
||||
"tests/test_distributed/test_gpu_with_spark/",
|
||||
# demo
|
||||
|
||||
@ -16,8 +16,7 @@
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
void TestSegmentedArgSort() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
size_t constexpr kElements = 100, kGroups = 3;
|
||||
dh::device_vector<size_t> sorted_idx(kElements, 0);
|
||||
@ -55,8 +54,7 @@ void TestSegmentedArgSort() {
|
||||
TEST(Algorithm, SegmentedArgSort) { TestSegmentedArgSort(); }
|
||||
|
||||
TEST(Algorithm, GpuArgSort) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
dh::device_vector<float> values(20);
|
||||
dh::Iota(dh::ToSpan(values)); // accending
|
||||
|
||||
@ -227,7 +227,7 @@ TEST(HistUtil, RemoveDuplicatedCategories) {
|
||||
}
|
||||
// check categorical
|
||||
beg = n_samples;
|
||||
for (std::size_t i = 0; i < n_categories; ++i) {
|
||||
for (bst_cat_t i = 0; i < n_categories; ++i) {
|
||||
// all from the second column
|
||||
ASSERT_EQ(static_cast<bst_feature_t>(weight[i + beg]) % n_features, 1);
|
||||
}
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../../../src/common/linalg_op.cuh"
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/context.h"
|
||||
#include "xgboost/linalg.h"
|
||||
|
||||
@ -54,8 +55,7 @@ void TestElementWiseKernel() {
|
||||
}
|
||||
|
||||
void TestSlice() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 1;
|
||||
auto ctx = MakeCUDACtx(1);
|
||||
thrust::device_vector<double> data(2 * 3 * 4);
|
||||
auto t = MakeTensorView(&ctx, dh::ToSpan(data), 2, 3, 4);
|
||||
dh::LaunchN(1, [=] __device__(size_t) {
|
||||
|
||||
@ -23,8 +23,7 @@
|
||||
|
||||
namespace xgboost::ltr {
|
||||
void TestCalcQueriesInvIDCG() {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
std::size_t n_groups = 5, n_samples_per_group = 32;
|
||||
|
||||
dh::device_vector<float> scores(n_samples_per_group * n_groups);
|
||||
@ -85,20 +84,17 @@ void TestRankingCache(Context const* ctx) {
|
||||
} // namespace
|
||||
|
||||
TEST(RankingCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestRankingCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(NDCGCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestNDCGCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(MAPCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMAPCache(&ctx);
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
@ -7,6 +7,7 @@
|
||||
|
||||
#include "../../../src/common/stats.h"
|
||||
#include "../../../src/common/transform_iterator.h" // common::MakeIndexTransformIter
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
@ -71,7 +72,7 @@ TEST(Stats, Median) {
|
||||
ASSERT_EQ(m, .5f);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
ASSERT_FALSE(ctx.IsCPU());
|
||||
Median(&ctx, values, weights, &out);
|
||||
m = out(0);
|
||||
@ -80,7 +81,7 @@ TEST(Stats, Median) {
|
||||
}
|
||||
|
||||
{
|
||||
ctx.gpu_id = Context::kCpuId;
|
||||
ctx = ctx.MakeCPU();
|
||||
// 4x2 matrix
|
||||
linalg::Tensor<float, 2> values{{0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 2.f, 2.f}, {4, 2}, ctx.gpu_id};
|
||||
HostDeviceVector<float> weights;
|
||||
@ -90,7 +91,7 @@ TEST(Stats, Median) {
|
||||
ASSERT_EQ(out(1), .5f);
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
Median(&ctx, values, weights, &out);
|
||||
ASSERT_EQ(out(0), .5f);
|
||||
ASSERT_EQ(out(1), .5f);
|
||||
@ -123,8 +124,7 @@ TEST(Stats, Mean) {
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
TEST(Stats, GPUMean) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMean(&ctx);
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
@ -9,6 +9,7 @@
|
||||
|
||||
#include "../../../src/common/linalg_op.cuh" // ElementWiseTransformDevice
|
||||
#include "../../../src/common/stats.cuh"
|
||||
#include "../helpers.h"
|
||||
#include "xgboost/base.h" // XGBOOST_DEVICE
|
||||
#include "xgboost/context.h" // Context
|
||||
#include "xgboost/host_device_vector.h" // HostDeviceVector
|
||||
@ -33,7 +34,7 @@ class StatsGPU : public ::testing::Test {
|
||||
}
|
||||
|
||||
public:
|
||||
void SetUp() override { ctx_.gpu_id = 0; }
|
||||
void SetUp() override { ctx_ = MakeCUDACtx(0); }
|
||||
|
||||
void WeightedMulti() {
|
||||
// data for one segment
|
||||
|
||||
@ -171,8 +171,7 @@ class GHistIndexMatrixTest : public testing::TestWithParam<std::tuple<float, flo
|
||||
ASSERT_TRUE(Xy->SingleColBlock());
|
||||
bst_bin_t constexpr kBins{17};
|
||||
auto p = BatchParam{kBins, threshold};
|
||||
Context gpu_ctx;
|
||||
gpu_ctx.gpu_id = 0;
|
||||
auto gpu_ctx = MakeCUDACtx(0);
|
||||
for (auto const &page : Xy->GetBatches<EllpackPage>(
|
||||
&gpu_ctx, BatchParam{kBins, tree::TrainParam::DftSparseThreshold()})) {
|
||||
from_ellpack = std::make_unique<GHistIndexMatrix>(&ctx, Xy->Info(), page, p);
|
||||
|
||||
@ -180,7 +180,12 @@ TEST(GBTree, ChooseTreeMethod) {
|
||||
learner->SetParam("tree_method", tree_method.value());
|
||||
}
|
||||
if (device.has_value()) {
|
||||
learner->SetParam("gpu_id", device.value());
|
||||
auto const& d = device.value();
|
||||
if (std::isdigit(d.front()) || d.front() == '-') {
|
||||
learner->SetParam("gpu_id", d);
|
||||
} else {
|
||||
learner->SetParam("device", d);
|
||||
}
|
||||
}
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
@ -199,7 +204,12 @@ TEST(GBTree, ChooseTreeMethod) {
|
||||
learner->SetParam("tree_method", tree_method.value());
|
||||
}
|
||||
if (device.has_value()) {
|
||||
learner->SetParam("gpu_id", device.value());
|
||||
auto const& d = device.value();
|
||||
if (std::isdigit(d.front()) || d.front() == '-') {
|
||||
learner->SetParam("gpu_id", d);
|
||||
} else {
|
||||
learner->SetParam("device", d);
|
||||
}
|
||||
}
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
@ -215,11 +225,12 @@ TEST(GBTree, ChooseTreeMethod) {
|
||||
|
||||
// | | hist | gpu_hist | exact | NA |
|
||||
// |--------+---------+----------+-------+-----|
|
||||
// | CUDA:0 | GPU | GPU (w) | Err | GPU | # not yet tested
|
||||
// | CPU | CPU | Err | CPU | CPU | # not yet tested
|
||||
// | CUDA:0 | GPU | GPU (w) | Err | GPU |
|
||||
// | CPU | CPU | GPU (w) | CPU | CPU |
|
||||
// |--------+---------+----------+-------+-----|
|
||||
// | -1 | CPU | GPU (w) | CPU | CPU |
|
||||
// | 0 | GPU | GPU (w) | Err | GPU |
|
||||
// |--------+---------+----------+-------+-----|
|
||||
// | NA | CPU | GPU (w) | CPU | CPU |
|
||||
//
|
||||
// - (w): warning
|
||||
@ -237,18 +248,30 @@ TEST(GBTree, ChooseTreeMethod) {
|
||||
// hist
|
||||
{{"hist", "-1"}, "grow_quantile_histmaker"},
|
||||
{{"hist", "0"}, "grow_gpu_hist"},
|
||||
{{"hist", "cpu"}, "grow_quantile_histmaker"},
|
||||
{{"hist", "cuda"}, "grow_gpu_hist"},
|
||||
{{"hist", "cuda:0"}, "grow_gpu_hist"},
|
||||
{{"hist", std::nullopt}, "grow_quantile_histmaker"},
|
||||
// gpu_hist
|
||||
{{"gpu_hist", "-1"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "0"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "cpu"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "cuda"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", "cuda:0"}, "grow_gpu_hist"},
|
||||
{{"gpu_hist", std::nullopt}, "grow_gpu_hist"},
|
||||
// exact
|
||||
{{"exact", "-1"}, "grow_colmaker,prune"},
|
||||
{{"exact", "0"}, "err"},
|
||||
{{"exact", "cpu"}, "grow_colmaker,prune"},
|
||||
{{"exact", "cuda"}, "err"},
|
||||
{{"exact", "cuda:0"}, "err"},
|
||||
{{"exact", std::nullopt}, "grow_colmaker,prune"},
|
||||
// NA
|
||||
{{std::nullopt, "-1"}, "grow_quantile_histmaker"},
|
||||
{{std::nullopt, "0"}, "grow_gpu_hist"}, // default to hist
|
||||
{{std::nullopt, "cpu"}, "grow_quantile_histmaker"},
|
||||
{{std::nullopt, "cuda"}, "grow_gpu_hist"},
|
||||
{{std::nullopt, "cuda:0"}, "grow_gpu_hist"},
|
||||
{{std::nullopt, std::nullopt}, "grow_quantile_histmaker"},
|
||||
};
|
||||
|
||||
@ -392,8 +415,7 @@ class Dart : public testing::TestWithParam<char const*> {
|
||||
for (size_t i = 0; i < 16; ++i) {
|
||||
learner->UpdateOneIter(i, p_mat);
|
||||
}
|
||||
|
||||
ConfigLearnerByCtx(&ctx, learner.get());
|
||||
learner->SetParam("device", ctx.DeviceName());
|
||||
|
||||
HostDeviceVector<float> predts_training;
|
||||
learner->Predict(p_mat, false, &predts_training, 0, 0, true);
|
||||
@ -654,8 +676,7 @@ TEST(GBTree, InplacePredictionError) {
|
||||
RandomDataGenerator{n_samples, n_features, 0.5f}.Batches(2).GenerateSparsePageDMatrix(
|
||||
"cache", true);
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParam("booster", booster);
|
||||
ConfigLearnerByCtx(ctx, learner.get());
|
||||
learner->SetParams(Args{{"booster", booster}, {"device", ctx->DeviceName()}});
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
learner->UpdateOneIter(i, p_fmat);
|
||||
@ -697,9 +718,9 @@ TEST(GBTree, InplacePredictionError) {
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
};
|
||||
std::unique_ptr<Learner> learner{Learner::Create({p_fmat})};
|
||||
learner->SetParam("booster", booster);
|
||||
learner->SetParam("max_bin", std::to_string(max_bins));
|
||||
ConfigLearnerByCtx(ctx, learner.get());
|
||||
learner->SetParams(Args{{"booster", booster},
|
||||
{"max_bin", std::to_string(max_bins)},
|
||||
{"device", ctx->DeviceName()}});
|
||||
learner->Configure();
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
learner->UpdateOneIter(i, p_fmat);
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
#include <limits> // for numeric_limits
|
||||
#include <memory> // for shared_ptr
|
||||
#include <string> // for string
|
||||
#include <thread> // for thread
|
||||
|
||||
#include "../../../src/data/adapter.h" // for ArrayAdapter
|
||||
#include "../../../src/data/device_adapter.cuh" // for CupyAdapter
|
||||
@ -41,7 +42,7 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
|
||||
// learner is configured to the device specified by ctx
|
||||
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
|
||||
ConfigLearnerByCtx(ctx, learner.get());
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
learner->UpdateOneIter(i, Xy);
|
||||
}
|
||||
@ -56,18 +57,31 @@ void TestInplaceFallback(Context const* ctx) {
|
||||
|
||||
HostDeviceVector<float>* out_predt{nullptr};
|
||||
ConsoleLogger::Configure(Args{{"verbosity", "1"}});
|
||||
std::string output;
|
||||
// test whether the warning is raised
|
||||
#if !defined(_WIN32)
|
||||
// Windows has issue with CUDA and thread local storage. For some reason, on Windows a
|
||||
// cudaInitializationError is raised during destruction of `HostDeviceVector`. This
|
||||
// might be related to https://github.com/dmlc/xgboost/issues/5793
|
||||
::testing::internal::CaptureStderr();
|
||||
std::thread{[&] {
|
||||
// Launch a new thread to ensure a warning is raised as we prevent over-verbose
|
||||
// warning by using thread-local flags.
|
||||
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
||||
&out_predt, 0, 0);
|
||||
auto output = testing::internal::GetCapturedStderr();
|
||||
}}.join();
|
||||
output = testing::internal::GetCapturedStderr();
|
||||
ASSERT_NE(output.find("Falling back"), std::string::npos);
|
||||
#endif
|
||||
|
||||
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
|
||||
&out_predt, 0, 0);
|
||||
|
||||
// test when the contexts match
|
||||
Context new_ctx = *proxy->Ctx();
|
||||
ASSERT_NE(new_ctx.gpu_id, ctx->gpu_id);
|
||||
|
||||
ConfigLearnerByCtx(&new_ctx, learner.get());
|
||||
learner->SetParam("device", new_ctx.DeviceName());
|
||||
HostDeviceVector<float>* out_predt_1{nullptr};
|
||||
// no warning is raised
|
||||
::testing::internal::CaptureStderr();
|
||||
|
||||
@ -559,16 +559,4 @@ class DeclareUnifiedDistributedTest(MetricTest) : public ::testing::Test {
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// A temporary solution before we move away from gpu_id.
|
||||
inline void ConfigLearnerByCtx(Context const* ctx, Learner* learner) {
|
||||
if (ctx->IsCPU()) {
|
||||
learner->SetParam("tree_method", "hist");
|
||||
} else {
|
||||
learner->SetParam("tree_method", "gpu_hist");
|
||||
}
|
||||
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
|
||||
learner->Configure();
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, ctx->gpu_id);
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
@ -46,7 +46,6 @@ inline void CheckDeterministicMetricMultiClass(StringView name, int32_t device)
|
||||
|
||||
inline void TestMultiClassError(int device, DataSplitMode data_split_mode) {
|
||||
auto ctx = MakeCUDACtx(device);
|
||||
ctx.gpu_id = device;
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("merror", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "merror");
|
||||
@ -67,7 +66,6 @@ inline void VerifyMultiClassError(DataSplitMode data_split_mode = DataSplitMode:
|
||||
|
||||
inline void TestMultiClassLogLoss(int device, DataSplitMode data_split_mode) {
|
||||
auto ctx = MakeCUDACtx(device);
|
||||
ctx.gpu_id = device;
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("mlogloss", &ctx);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "mlogloss");
|
||||
|
||||
@ -13,26 +13,22 @@
|
||||
|
||||
namespace xgboost::obj {
|
||||
TEST(LambdaRank, GPUNDCGJsonIO) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestNDCGJsonIO(&ctx);
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUMAPStat) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMAPStat(&ctx);
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUNDCGGPair) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestNDCGGPair(&ctx);
|
||||
}
|
||||
|
||||
void TestGPUMakePair() {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
|
||||
MetaInfo info;
|
||||
HostDeviceVector<float> predt;
|
||||
@ -126,8 +122,7 @@ void TestGPUMakePair() {
|
||||
TEST(LambdaRank, GPUMakePair) { TestGPUMakePair(); }
|
||||
|
||||
TEST(LambdaRank, GPUUnbiasedNDCG) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestUnbiasedNDCG(&ctx);
|
||||
}
|
||||
|
||||
@ -161,8 +156,7 @@ TEST(LambdaRank, RankItemCountOnRight) {
|
||||
}
|
||||
|
||||
TEST(LambdaRank, GPUMAPGPair) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
TestMAPGPair(&ctx);
|
||||
}
|
||||
} // namespace xgboost::obj
|
||||
|
||||
@ -305,12 +305,12 @@ TEST(Objective, CPU_vs_CUDA) {
|
||||
|
||||
{
|
||||
// CPU
|
||||
ctx.gpu_id = -1;
|
||||
ctx = ctx.MakeCPU();
|
||||
obj->GetGradient(preds, info, 0, &cpu_out_preds);
|
||||
}
|
||||
{
|
||||
// CUDA
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
obj->GetGradient(preds, info, 0, &cuda_out_preds);
|
||||
}
|
||||
|
||||
|
||||
@ -148,7 +148,7 @@ TEST(Plugin, CPUvsOneAPI) {
|
||||
|
||||
{
|
||||
// CPU
|
||||
ctx.gpu_id = -1;
|
||||
ctx = ctx.MakeCPU();
|
||||
obj_cpu->GetGradient(preds, info, 0, &cpu_out_preds);
|
||||
}
|
||||
{
|
||||
|
||||
@ -214,15 +214,16 @@ void TestUpdatePredictionCache(bool use_subsampling) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(CPUPredictor, GHistIndex) {
|
||||
TEST(CPUPredictor, GHistIndexTraining) {
|
||||
size_t constexpr kRows{128}, kCols{16}, kBins{64};
|
||||
Context ctx;
|
||||
auto p_hist = RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).GenerateQuantileDMatrix(false);
|
||||
HostDeviceVector<float> storage(kRows * kCols);
|
||||
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&storage);
|
||||
auto adapter = data::ArrayAdapter(columnar.c_str());
|
||||
std::shared_ptr<DMatrix> p_full{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
TestTrainingPrediction(kRows, kBins, "hist", p_full, p_hist);
|
||||
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_hist);
|
||||
}
|
||||
|
||||
TEST(CPUPredictor, CategoricalPrediction) {
|
||||
|
||||
@ -33,9 +33,8 @@ TEST(GPUPredictor, Basic) {
|
||||
int n_row = i, n_col = i;
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@ -71,7 +70,7 @@ void VerifyBasicColumnSplit(std::array<std::vector<float>, 32> const& expected_r
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
std::unique_ptr<DMatrix> sliced{dmat->SliceCol(world_size, rank)};
|
||||
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@ -102,7 +101,7 @@ TEST(GPUPredictor, MGPUBasicColumnSplit) {
|
||||
size_t n_row = i, n_col = i;
|
||||
auto dmat = RandomDataGenerator(n_row, n_col, 0).GenerateDMatrix();
|
||||
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.gpu_id)};
|
||||
LearnerModelParam mparam{MakeMP(n_col, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx);
|
||||
|
||||
// Test predict batch
|
||||
@ -132,18 +131,19 @@ TEST(GPUPredictor, EllpackBasic) {
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, EllpackTraining) {
|
||||
size_t constexpr kRows { 128 }, kCols { 16 }, kBins { 64 };
|
||||
auto p_ellpack =
|
||||
RandomDataGenerator{kRows, kCols, 0.0}.Bins(kBins).Device(0).GenerateDeviceDMatrix(false);
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{16}, kBins{64};
|
||||
auto p_ellpack = RandomDataGenerator{kRows, kCols, 0.0}
|
||||
.Bins(kBins)
|
||||
.Device(ctx.Ordinal())
|
||||
.GenerateDeviceDMatrix(false);
|
||||
HostDeviceVector<float> storage(kRows * kCols);
|
||||
auto columnar = RandomDataGenerator{kRows, kCols, 0.0}
|
||||
.Device(0)
|
||||
.GenerateArrayInterface(&storage);
|
||||
auto columnar =
|
||||
RandomDataGenerator{kRows, kCols, 0.0}.Device(ctx.Ordinal()).GenerateArrayInterface(&storage);
|
||||
auto adapter = data::CupyAdapter(columnar);
|
||||
std::shared_ptr<DMatrix> p_full {
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)
|
||||
};
|
||||
TestTrainingPrediction(kRows, kBins, "gpu_hist", p_full, p_ellpack);
|
||||
std::shared_ptr<DMatrix> p_full{
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)};
|
||||
TestTrainingPrediction(&ctx, kRows, kBins, p_full, p_ellpack);
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
@ -153,9 +153,8 @@ TEST(GPUPredictor, ExternalMemoryTest) {
|
||||
gpu_predictor->Configure({});
|
||||
|
||||
const int n_classes = 3;
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.gpu_id)};
|
||||
Context ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(5, .5, n_classes, ctx.Ordinal())};
|
||||
|
||||
gbm::GBTreeModel model = CreateTestModel(&mparam, &ctx, n_classes);
|
||||
std::vector<std::unique_ptr<DMatrix>> dmats;
|
||||
@ -185,7 +184,7 @@ TEST(GPUPredictor, InplacePredictCupy) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{64};
|
||||
RandomDataGenerator gen(kRows, kCols, 0.5);
|
||||
gen.Device(ctx.gpu_id);
|
||||
gen.Device(ctx.Ordinal());
|
||||
HostDeviceVector<float> data;
|
||||
std::string interface_str = gen.GenerateArrayInterface(&data);
|
||||
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
|
||||
@ -197,7 +196,7 @@ TEST(GPUPredictor, InplacePredictCuDF) {
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
size_t constexpr kRows{128}, kCols{64};
|
||||
RandomDataGenerator gen(kRows, kCols, 0.5);
|
||||
gen.Device(ctx.gpu_id);
|
||||
gen.Device(ctx.Ordinal());
|
||||
std::vector<HostDeviceVector<float>> storage(kCols);
|
||||
auto interface_str = gen.GenerateColumnarArrayInterface(&storage);
|
||||
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
|
||||
@ -214,9 +213,8 @@ TEST(GpuPredictor, LesserFeatures) {
|
||||
TEST(GPUPredictor, ShapStump) {
|
||||
cudaSetDevice(0);
|
||||
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
@ -241,9 +239,8 @@ TEST(GPUPredictor, ShapStump) {
|
||||
}
|
||||
|
||||
TEST(GPUPredictor, Shap) {
|
||||
Context ctx;
|
||||
ctx.gpu_id = 0;
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.gpu_id)};
|
||||
auto ctx = MakeCUDACtx(0);
|
||||
LearnerModelParam mparam{MakeMP(1, .5, 1, ctx.Ordinal())};
|
||||
gbm::GBTreeModel model(&mparam, &ctx);
|
||||
|
||||
std::vector<std::unique_ptr<RegTree>> trees;
|
||||
|
||||
@ -44,16 +44,14 @@ TEST(Predictor, PredictionCache) {
|
||||
EXPECT_ANY_THROW(container.Entry(m));
|
||||
}
|
||||
|
||||
void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
std::string tree_method,
|
||||
std::shared_ptr<DMatrix> p_full,
|
||||
std::shared_ptr<DMatrix> p_hist) {
|
||||
void TestTrainingPrediction(Context const *ctx, size_t rows, size_t bins,
|
||||
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist) {
|
||||
size_t constexpr kCols = 16;
|
||||
size_t constexpr kClasses = 3;
|
||||
size_t constexpr kIters = 3;
|
||||
|
||||
std::unique_ptr<Learner> learner;
|
||||
auto train = [&](Context const& ctx) {
|
||||
|
||||
p_hist->Info().labels.Reshape(rows, 1);
|
||||
auto &h_label = p_hist->Info().labels.Data()->HostVector();
|
||||
|
||||
@ -62,12 +60,11 @@ void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
}
|
||||
|
||||
learner.reset(Learner::Create({}));
|
||||
learner->SetParam("tree_method", tree_method);
|
||||
learner->SetParam("objective", "multi:softprob");
|
||||
learner->SetParam("num_feature", std::to_string(kCols));
|
||||
learner->SetParam("num_class", std::to_string(kClasses));
|
||||
learner->SetParam("max_bin", std::to_string(bins));
|
||||
ConfigLearnerByCtx(&ctx, learner.get());
|
||||
learner->SetParams(Args{{"objective", "multi:softprob"},
|
||||
{"num_feature", std::to_string(kCols)},
|
||||
{"num_class", std::to_string(kClasses)},
|
||||
{"max_bin", std::to_string(bins)},
|
||||
{"device", ctx->DeviceName()}});
|
||||
learner->Configure();
|
||||
|
||||
for (size_t i = 0; i < kIters; ++i) {
|
||||
@ -79,7 +76,7 @@ void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
|
||||
learner.reset(Learner::Create({}));
|
||||
learner->LoadModel(model);
|
||||
ConfigLearnerByCtx(&ctx, learner.get());
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->Configure();
|
||||
|
||||
HostDeviceVector<float> from_full;
|
||||
@ -89,15 +86,7 @@ void TestTrainingPrediction(size_t rows, size_t bins,
|
||||
learner->Predict(p_hist, false, &from_hist, 0, 0);
|
||||
|
||||
for (size_t i = 0; i < rows; ++i) {
|
||||
EXPECT_NEAR(from_hist.ConstHostVector()[i],
|
||||
from_full.ConstHostVector()[i], kRtEps);
|
||||
}
|
||||
};
|
||||
|
||||
if (tree_method == "gpu_hist") {
|
||||
train(MakeCUDACtx(0));
|
||||
} else {
|
||||
train(Context{});
|
||||
EXPECT_NEAR(from_hist.ConstHostVector()[i], from_full.ConstHostVector()[i], kRtEps);
|
||||
}
|
||||
}
|
||||
|
||||
@ -120,7 +109,7 @@ void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_r
|
||||
learner->UpdateOneIter(it, m);
|
||||
}
|
||||
|
||||
learner->SetParam("gpu_id", std::to_string(ctx->gpu_id));
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->Configure();
|
||||
|
||||
HostDeviceVector<float> *p_out_predictions_0{nullptr};
|
||||
@ -153,7 +142,7 @@ void TestInplacePrediction(Context const *ctx, std::shared_ptr<DMatrix> x, bst_r
|
||||
ASSERT_NEAR(h_pred[i], h_pred_0[i] + h_pred_1[i] - 0.5f, kRtEps);
|
||||
}
|
||||
|
||||
learner->SetParam("gpu_id", "-1");
|
||||
learner->SetParam("device", "cpu");
|
||||
learner->Configure();
|
||||
}
|
||||
|
||||
@ -161,12 +150,12 @@ namespace {
|
||||
std::unique_ptr<Learner> LearnerForTest(Context const *ctx, std::shared_ptr<DMatrix> dmat,
|
||||
size_t iters, size_t forest = 1) {
|
||||
std::unique_ptr<Learner> learner{Learner::Create({dmat})};
|
||||
learner->SetParams(Args{{"num_parallel_tree", std::to_string(forest)}});
|
||||
learner->SetParams(
|
||||
Args{{"num_parallel_tree", std::to_string(forest)}, {"device", ctx->DeviceName()}});
|
||||
for (size_t i = 0; i < iters; ++i) {
|
||||
learner->UpdateOneIter(i, dmat);
|
||||
}
|
||||
|
||||
ConfigLearnerByCtx(ctx, learner.get());
|
||||
return learner;
|
||||
}
|
||||
|
||||
@ -215,7 +204,7 @@ void TestPredictionDeviceAccess() {
|
||||
{
|
||||
ASSERT_EQ(from_cpu.DeviceIdx(), Context::kCpuId);
|
||||
Context cpu_ctx;
|
||||
ConfigLearnerByCtx(&cpu_ctx, learner.get());
|
||||
learner->SetParam("device", cpu_ctx.DeviceName());
|
||||
learner->Predict(m_test, false, &from_cpu, 0, 0);
|
||||
ASSERT_TRUE(from_cpu.HostCanWrite());
|
||||
ASSERT_FALSE(from_cpu.DeviceCanRead());
|
||||
@ -225,7 +214,7 @@ void TestPredictionDeviceAccess() {
|
||||
HostDeviceVector<float> from_cuda;
|
||||
{
|
||||
Context cuda_ctx = MakeCUDACtx(0);
|
||||
ConfigLearnerByCtx(&cuda_ctx, learner.get());
|
||||
learner->SetParam("device", cuda_ctx.DeviceName());
|
||||
learner->Predict(m_test, false, &from_cuda, 0, 0);
|
||||
ASSERT_EQ(from_cuda.DeviceIdx(), 0);
|
||||
ASSERT_TRUE(from_cuda.DeviceCanWrite());
|
||||
@ -465,11 +454,7 @@ void TestIterationRangeColumnSplit(Context const* ctx) {
|
||||
auto dmat = RandomDataGenerator(kRows, kCols, 0).GenerateDMatrix(true, true, kClasses);
|
||||
auto learner = LearnerForTest(ctx, dmat, kIters, kForest);
|
||||
|
||||
if (ctx->IsCPU()) {
|
||||
learner->SetParams(Args{{"gpu_id", std::to_string(-1)}});
|
||||
} else {
|
||||
learner->SetParams(Args{{"gpu_id", std::to_string(0)}});
|
||||
}
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
|
||||
bool bound = false;
|
||||
std::unique_ptr<Learner> sliced{learner->Slice(0, 3, 1, &bound)};
|
||||
@ -582,7 +567,7 @@ void TestSparsePredictionColumnSplit(Context const* ctx, float sparsity) {
|
||||
learner.reset(Learner::Create({Xy}));
|
||||
learner->LoadModel(model);
|
||||
|
||||
ConfigLearnerByCtx(ctx, learner.get());
|
||||
learner->SetParam("device", ctx->DeviceName());
|
||||
learner->Predict(Xy, false, &sparse_predt, 0, 0);
|
||||
|
||||
auto constexpr kWorldSize = 2;
|
||||
|
||||
@ -84,9 +84,8 @@ void TestPredictionFromGradientIndex(Context const* ctx, size_t rows, size_t col
|
||||
}
|
||||
|
||||
// p_full and p_hist should come from the same data set.
|
||||
void TestTrainingPrediction(size_t rows, size_t bins, std::string tree_method,
|
||||
std::shared_ptr<DMatrix> p_full,
|
||||
std::shared_ptr<DMatrix> p_hist);
|
||||
void TestTrainingPrediction(Context const* ctx, size_t rows, size_t bins,
|
||||
std::shared_ptr<DMatrix> p_full, std::shared_ptr<DMatrix> p_hist);
|
||||
|
||||
void TestInplacePrediction(Context const* ctx, std::shared_ptr<DMatrix> x, bst_row_t rows,
|
||||
bst_feature_t cols);
|
||||
|
||||
31
tests/cpp/test_context.cc
Normal file
31
tests/cpp/test_context.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h>
|
||||
#include <xgboost/context.h>
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Context, CPU) {
|
||||
Context ctx;
|
||||
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
|
||||
ASSERT_EQ(ctx.Ordinal(), Context::kCpuId);
|
||||
|
||||
std::int32_t flag{0};
|
||||
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
|
||||
ASSERT_EQ(flag, -1);
|
||||
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "oops"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "-1"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CPU"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CUDA"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "CPU:0"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:+0"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:0-"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", "gpu:"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":gpu"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ":0"}}), dmlc::Error);
|
||||
ASSERT_THROW(ctx.UpdateAllowUnknown(Args{{"device", ""}}), dmlc::Error);
|
||||
}
|
||||
} // namespace xgboost
|
||||
99
tests/cpp/test_context.cu
Normal file
99
tests/cpp/test_context.cu
Normal file
@ -0,0 +1,99 @@
|
||||
/**
|
||||
* Copyright 2023, XGBoost Contributors
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include <xgboost/base.h> // for Args
|
||||
#include <xgboost/context.h>
|
||||
#include <xgboost/json.h> // for FromJson, ToJson
|
||||
|
||||
#include <string> // for string, to_string
|
||||
|
||||
#include "../../src/common/common.h" // for AllVisibleGPUs
|
||||
|
||||
namespace xgboost {
|
||||
namespace {
|
||||
void TestCUDA(Context const& ctx, bst_d_ordinal_t ord) {
|
||||
ASSERT_EQ(ctx.gpu_id, ord);
|
||||
ASSERT_EQ(ctx.Device().ordinal, ord);
|
||||
ASSERT_EQ(ctx.DeviceName(), "cuda:" + std::to_string(ord));
|
||||
ASSERT_EQ(ctx.Ordinal(), ord);
|
||||
ASSERT_TRUE(ctx.IsCUDA());
|
||||
ASSERT_FALSE(ctx.IsCPU());
|
||||
ASSERT_EQ(ctx.Device(), DeviceOrd::CUDA(ord));
|
||||
|
||||
Json jctx{ToJson(ctx)};
|
||||
Context new_ctx;
|
||||
FromJson(jctx, &new_ctx);
|
||||
ASSERT_EQ(new_ctx.Device(), ctx.Device());
|
||||
ASSERT_EQ(new_ctx.gpu_id, ctx.gpu_id);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(Context, DeviceOrdinal) {
|
||||
Context ctx;
|
||||
auto n_vis = common::AllVisibleGPUs();
|
||||
auto ord = n_vis - 1;
|
||||
|
||||
std::string device = "cuda:" + std::to_string(ord);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", device}});
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
device = "cuda:" + std::to_string(1001);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", device}});
|
||||
ord = 1001 % n_vis;
|
||||
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
std::int32_t flag{0};
|
||||
ctx.DispatchDevice([&] { flag = -1; }, [&] { flag = 1; });
|
||||
ASSERT_EQ(flag, 1);
|
||||
|
||||
Context new_ctx = ctx;
|
||||
TestCUDA(new_ctx, ctx.Ordinal());
|
||||
|
||||
auto cpu_ctx = ctx.MakeCPU();
|
||||
ASSERT_TRUE(cpu_ctx.IsCPU());
|
||||
ASSERT_EQ(cpu_ctx.Ordinal(), Context::kCpuId);
|
||||
ASSERT_EQ(cpu_ctx.Device(), DeviceOrd::CPU());
|
||||
|
||||
auto cuda_ctx = cpu_ctx.MakeCUDA(ctx.Ordinal());
|
||||
TestCUDA(cuda_ctx, ctx.Ordinal());
|
||||
|
||||
cuda_ctx.UpdateAllowUnknown(Args{{"fail_on_invalid_gpu_id", "true"}});
|
||||
ASSERT_THROW({ cuda_ctx.UpdateAllowUnknown(Args{{"device", "cuda:9999"}}); }, dmlc::Error);
|
||||
cuda_ctx.UpdateAllowUnknown(Args{{"device", "cuda:00"}});
|
||||
ASSERT_EQ(cuda_ctx.Ordinal(), 0);
|
||||
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "cpu"}});
|
||||
// Test alias
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "gpu:0"}});
|
||||
TestCUDA(ctx, 0);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", "gpu"}});
|
||||
TestCUDA(ctx, 0);
|
||||
|
||||
// Test the thread local memory in dmlc is not linking different instances together.
|
||||
cpu_ctx.UpdateAllowUnknown(Args{{"device", "cpu"}});
|
||||
TestCUDA(ctx, 0);
|
||||
ctx.UpdateAllowUnknown(Args{});
|
||||
TestCUDA(ctx, 0);
|
||||
}
|
||||
|
||||
TEST(Context, GPUId) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
TestCUDA(ctx, 0);
|
||||
|
||||
auto n_vis = common::AllVisibleGPUs();
|
||||
auto ord = n_vis - 1;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", std::to_string(ord)}});
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
auto device = "cuda:" + std::to_string(1001);
|
||||
ctx.UpdateAllowUnknown(Args{{"device", device}});
|
||||
ord = 1001 % n_vis;
|
||||
TestCUDA(ctx, ord);
|
||||
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "-1"}});
|
||||
ASSERT_EQ(ctx.Device(), DeviceOrd::CPU());
|
||||
}
|
||||
} // namespace xgboost
|
||||
@ -27,7 +27,6 @@
|
||||
#include "../../src/common/io.h" // for LoadSequentialFile
|
||||
#include "../../src/common/linalg_op.h" // for ElementWiseTransformHost, begin, end
|
||||
#include "../../src/common/random.h" // for GlobalRandom
|
||||
#include "../../src/common/transform_iterator.h" // for IndexTransformIter
|
||||
#include "dmlc/io.h" // for Stream
|
||||
#include "dmlc/omp.h" // for omp_get_max_threads
|
||||
#include "dmlc/registry.h" // for Registry
|
||||
@ -35,14 +34,13 @@
|
||||
#include "helpers.h" // for GetBaseScore, RandomDataGenerator
|
||||
#include "objective_helpers.h" // for MakeObjNamesForTest, ObjTestNameGenerator
|
||||
#include "xgboost/base.h" // for bst_float, Args, bst_feature_t, bst_int
|
||||
#include "xgboost/context.h" // for Context
|
||||
#include "xgboost/context.h" // for Context, DeviceOrd
|
||||
#include "xgboost/data.h" // for DMatrix, MetaInfo, DataType
|
||||
#include "xgboost/host_device_vector.h" // for HostDeviceVector
|
||||
#include "xgboost/json.h" // for Json, Object, get, String, IsA, opera...
|
||||
#include "xgboost/linalg.h" // for Tensor, TensorView
|
||||
#include "xgboost/logging.h" // for ConsoleLogger
|
||||
#include "xgboost/predictor.h" // for PredictionCacheEntry
|
||||
#include "xgboost/span.h" // for Span, operator!=, SpanIterator
|
||||
#include "xgboost/string_view.h" // for StringView
|
||||
|
||||
namespace xgboost {
|
||||
@ -58,9 +56,9 @@ TEST(Learner, Basic) {
|
||||
auto minor = XGBOOST_VER_MINOR;
|
||||
auto patch = XGBOOST_VER_PATCH;
|
||||
|
||||
static_assert(std::is_integral<decltype(major)>::value, "Wrong major version type");
|
||||
static_assert(std::is_integral<decltype(minor)>::value, "Wrong minor version type");
|
||||
static_assert(std::is_integral<decltype(patch)>::value, "Wrong patch version type");
|
||||
static_assert(std::is_integral_v<decltype(major)>, "Wrong major version type");
|
||||
static_assert(std::is_integral_v<decltype(minor)>, "Wrong minor version type");
|
||||
static_assert(std::is_integral_v<decltype(patch)>, "Wrong patch version type");
|
||||
}
|
||||
|
||||
TEST(Learner, ParameterValidation) {
|
||||
@ -92,8 +90,7 @@ TEST(Learner, CheckGroup) {
|
||||
size_t constexpr kNumRows = 17;
|
||||
bst_feature_t constexpr kNumCols = 15;
|
||||
|
||||
std::shared_ptr<DMatrix> p_mat{
|
||||
RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
|
||||
std::shared_ptr<DMatrix> p_mat{RandomDataGenerator{kNumRows, kNumCols, 0.0f}.GenerateDMatrix()};
|
||||
std::vector<bst_float> weight(kNumGroups, 1);
|
||||
std::vector<bst_int> group(kNumGroups);
|
||||
group[0] = 2;
|
||||
@ -312,35 +309,36 @@ TEST(Learner, GPUConfiguration) {
|
||||
learner->SetParams({Arg{"booster", "gblinear"},
|
||||
Arg{"updater", "gpu_coord_descent"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
{
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
std::unique_ptr<Learner> learner{Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "gpu_hist"}});
|
||||
learner->Configure();
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
{
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "gpu_hist"},
|
||||
Arg{"gpu_id", "-1"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
{
|
||||
// with CPU algorithm
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "hist"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, -1);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CPU());
|
||||
}
|
||||
{
|
||||
// with CPU algorithm, but `gpu_id` takes priority
|
||||
std::unique_ptr<Learner> learner {Learner::Create(mat)};
|
||||
learner->SetParams({Arg{"tree_method", "hist"},
|
||||
Arg{"gpu_id", "0"}});
|
||||
learner->SetParams({Arg{"tree_method", "hist"}, Arg{"gpu_id", "0"}});
|
||||
learner->UpdateOneIter(0, p_dmat);
|
||||
ASSERT_EQ(learner->Ctx()->gpu_id, 0);
|
||||
ASSERT_EQ(learner->Ctx()->Device(), DeviceOrd::CUDA(0));
|
||||
}
|
||||
}
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
@ -8,6 +8,8 @@
|
||||
|
||||
#include <memory> // for unique_ptr
|
||||
|
||||
#include "../helpers.h"
|
||||
|
||||
namespace xgboost {
|
||||
TEST(Updater, HasNodePosition) {
|
||||
Context ctx;
|
||||
@ -19,7 +21,7 @@ TEST(Updater, HasNodePosition) {
|
||||
ASSERT_TRUE(up->HasNodePosition());
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
ctx.gpu_id = 0;
|
||||
ctx = MakeCUDACtx(0);
|
||||
up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, &task));
|
||||
ASSERT_TRUE(up->HasNodePosition());
|
||||
#endif // defined(XGBOOST_USE_CUDA)
|
||||
|
||||
@ -70,9 +70,9 @@ class TestPredictionCache : public ::testing::Test {
|
||||
Context ctx;
|
||||
ctx.InitAllowUnknown(Args{{"nthread", "8"}});
|
||||
if (updater_name == "grow_gpu_hist") {
|
||||
ctx.gpu_id = 0;
|
||||
ctx = ctx.MakeCUDA(0);
|
||||
} else {
|
||||
ctx.gpu_id = Context::kCpuId;
|
||||
ctx = ctx.MakeCPU();
|
||||
}
|
||||
|
||||
ObjInfo task{ObjInfo::kRegression};
|
||||
|
||||
@ -34,7 +34,7 @@ class TestLoadPickle:
|
||||
bst = load_pickle(model_path)
|
||||
config = bst.save_config()
|
||||
config = json.loads(config)
|
||||
assert config["learner"]["generic_param"]["gpu_id"] == "-1"
|
||||
assert config["learner"]["generic_param"]["device"] == "cpu"
|
||||
|
||||
def test_context_is_preserved(self) -> None:
|
||||
"""Test the device context is preserved after pickling."""
|
||||
@ -42,14 +42,14 @@ class TestLoadPickle:
|
||||
bst = load_pickle(model_path)
|
||||
config = bst.save_config()
|
||||
config = json.loads(config)
|
||||
assert config["learner"]["generic_param"]["gpu_id"] == "0"
|
||||
assert config["learner"]["generic_param"]["device"] == "cuda:0"
|
||||
|
||||
def test_wrap_gpu_id(self) -> None:
|
||||
assert os.environ["CUDA_VISIBLE_DEVICES"] == "0"
|
||||
bst = load_pickle(model_path)
|
||||
config = bst.save_config()
|
||||
config = json.loads(config)
|
||||
assert config["learner"]["generic_param"]["gpu_id"] == "0"
|
||||
assert config["learner"]["generic_param"]["device"] == "cuda:0"
|
||||
|
||||
x, y = build_dataset()
|
||||
test_x = xgb.DMatrix(x)
|
||||
|
||||
@ -203,7 +203,7 @@ class TestQuantileDMatrix:
|
||||
np.testing.assert_equal(h_ret.indices, d_ret.indices)
|
||||
|
||||
booster = xgb.train(
|
||||
{"tree_method": "gpu_hist", "gpu_id": "0"}, dtrain=d_m
|
||||
{"tree_method": "hist", "device": "cuda:0"}, dtrain=d_m
|
||||
)
|
||||
|
||||
np.testing.assert_allclose(
|
||||
|
||||
@ -65,16 +65,20 @@ class TestGPUBasicModels:
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
def test_invalid_gpu_id(self):
|
||||
from sklearn.datasets import load_digits
|
||||
|
||||
X, y = load_digits(return_X_y=True)
|
||||
# should pass with invalid gpu id
|
||||
cls1 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999)
|
||||
cls1 = xgb.XGBClassifier(tree_method="gpu_hist", gpu_id=9999)
|
||||
cls1.fit(X, y)
|
||||
# should throw error with fail_on_invalid_gpu_id enabled
|
||||
cls2 = xgb.XGBClassifier(
|
||||
tree_method='gpu_hist', gpu_id=9999, fail_on_invalid_gpu_id=True
|
||||
tree_method="gpu_hist", gpu_id=9999, fail_on_invalid_gpu_id=True
|
||||
)
|
||||
try:
|
||||
with pytest.raises(ValueError, match="ordinal 9999 is invalid"):
|
||||
cls2.fit(X, y)
|
||||
|
||||
cls2 = xgb.XGBClassifier(
|
||||
tree_method="hist", device="cuda:9999", fail_on_invalid_gpu_id=True
|
||||
)
|
||||
with pytest.raises(ValueError, match="ordinal 9999 is invalid"):
|
||||
cls2.fit(X, y)
|
||||
assert False, "Should have failed with with fail_on_invalid_gpu_id enabled"
|
||||
except xgb.core.XGBoostError as err:
|
||||
assert "gpu_id 9999 is invalid" in str(err)
|
||||
|
||||
@ -43,10 +43,16 @@ class TestGPUEvalMetrics:
|
||||
num_boost_round=10,
|
||||
)
|
||||
cpu_auc = float(booster.eval(Xy).split(":")[1])
|
||||
booster.set_param({"gpu_id": "0"})
|
||||
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0"
|
||||
booster.set_param({"device": "cuda:0"})
|
||||
assert (
|
||||
json.loads(booster.save_config())["learner"]["generic_param"]["device"]
|
||||
== "cuda:0"
|
||||
)
|
||||
gpu_auc = float(booster.eval(Xy).split(":")[1])
|
||||
assert json.loads(booster.save_config())["learner"]["generic_param"]["gpu_id"] == "0"
|
||||
assert (
|
||||
json.loads(booster.save_config())["learner"]["generic_param"]["device"]
|
||||
== "cuda:0"
|
||||
)
|
||||
|
||||
np.testing.assert_allclose(cpu_auc, gpu_auc)
|
||||
|
||||
|
||||
@ -113,14 +113,6 @@ class TestPickling:
|
||||
param = {"tree_method": "gpu_hist", "verbosity": 1}
|
||||
bst = xgb.train(param, train_x)
|
||||
|
||||
with tm.captured_output() as (out, err):
|
||||
bst.inplace_predict(x)
|
||||
|
||||
# The warning is redirected to Python callback, so it's printed in stdout
|
||||
# instead of stderr.
|
||||
stdout = out.getvalue()
|
||||
assert stdout.find("mismatched devices") != -1
|
||||
|
||||
save_pickle(bst, model_path)
|
||||
|
||||
args = self.args_template.copy()
|
||||
@ -177,7 +169,7 @@ class TestPickling:
|
||||
|
||||
# Switch to CPU predictor
|
||||
bst = model.get_booster()
|
||||
tm.set_ordinal(-1, bst)
|
||||
bst.set_param({"device": "cpu"})
|
||||
cpu_pred = model.predict(x, output_margin=True)
|
||||
np.testing.assert_allclose(cpu_pred, gpu_pred, rtol=1e-5)
|
||||
|
||||
|
||||
@ -39,7 +39,8 @@ predict_parameter_strategy = strategies.fixed_dictionaries(
|
||||
}
|
||||
)
|
||||
|
||||
pytestmark = tm.timeout(20)
|
||||
# cupy nvrtc compilation can take a long time for the first run
|
||||
pytestmark = tm.timeout(30)
|
||||
|
||||
|
||||
class TestGPUPredict:
|
||||
@ -71,8 +72,8 @@ class TestGPUPredict:
|
||||
param = {
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": "logloss",
|
||||
"tree_method": "gpu_hist",
|
||||
"gpu_id": 0,
|
||||
"tree_method": "hist",
|
||||
"device": "gpu:0",
|
||||
"max_depth": 1,
|
||||
}
|
||||
bst = xgb.train(
|
||||
@ -84,7 +85,7 @@ class TestGPUPredict:
|
||||
gpu_pred_test = bst.predict(dtest, output_margin=True)
|
||||
gpu_pred_val = bst.predict(dval, output_margin=True)
|
||||
|
||||
bst.set_param({"gpu_id": -1, "tree_method": "hist"})
|
||||
bst.set_param({"device": "cpu", "tree_method": "hist"})
|
||||
bst_cpu = copy(bst)
|
||||
cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
|
||||
cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
|
||||
@ -107,14 +108,15 @@ class TestGPUPredict:
|
||||
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||||
|
||||
params = {}
|
||||
params["tree_method"] = "gpu_hist"
|
||||
params["tree_method"] = "hist"
|
||||
params["device"] = "cuda:0"
|
||||
bst = xgb.train(params, dtrain)
|
||||
|
||||
tm.set_ordinal(0, bst)
|
||||
bst.set_param({"device": "cuda:0"})
|
||||
# Don't reuse the DMatrix for prediction, otherwise the result is cached.
|
||||
predict_gpu_0 = bst.predict(xgb.DMatrix(X_test))
|
||||
predict_gpu_1 = bst.predict(xgb.DMatrix(X_test))
|
||||
tm.set_ordinal(-1, bst)
|
||||
bst.set_param({"device": "cpu"})
|
||||
predict_cpu = bst.predict(xgb.DMatrix(X_test))
|
||||
|
||||
assert np.allclose(predict_gpu_0, predict_gpu_1)
|
||||
@ -131,8 +133,8 @@ class TestGPUPredict:
|
||||
X_test, y_test = X[tr_size:, :], y[tr_size:]
|
||||
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"gpu_id": "0",
|
||||
"tree_method": "hist",
|
||||
"device": "cuda:0",
|
||||
"n_jobs": -1,
|
||||
"seed": 123,
|
||||
}
|
||||
@ -141,13 +143,54 @@ class TestGPUPredict:
|
||||
gpu_test_score = m.score(X_test, y_test)
|
||||
|
||||
# Now with cpu
|
||||
m = tm.set_ordinal(-1, m)
|
||||
m.set_params(device="cpu")
|
||||
cpu_train_score = m.score(X_train, y_train)
|
||||
cpu_test_score = m.score(X_test, y_test)
|
||||
|
||||
assert np.allclose(cpu_train_score, gpu_train_score)
|
||||
assert np.allclose(cpu_test_score, gpu_test_score)
|
||||
|
||||
@pytest.mark.parametrize("device", ["cpu", "cuda"])
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
def test_inplace_predict_device_type(self, device: str) -> None:
|
||||
"""Test inplace predict with different device and data types.
|
||||
|
||||
The sklearn interface uses inplace predict by default and gbtree fallbacks to
|
||||
DMatrix whenever device doesn't match. This test checks that XGBoost can handle
|
||||
different combinations of device and input data type.
|
||||
|
||||
"""
|
||||
import cudf
|
||||
import cupy as cp
|
||||
import pandas as pd
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
reg = xgb.XGBRegressor(tree_method="hist", device=device)
|
||||
n_samples = 4096
|
||||
n_features = 13
|
||||
X, y, w = tm.make_regression(n_samples, n_features, use_cupy=True)
|
||||
X[X == 0.0] = 1.0
|
||||
|
||||
reg.fit(X, y, sample_weight=w)
|
||||
predt_0 = reg.predict(X)
|
||||
|
||||
X = cp.asnumpy(X)
|
||||
predt_1 = reg.predict(X)
|
||||
|
||||
df = pd.DataFrame(X)
|
||||
predt_2 = reg.predict(df)
|
||||
|
||||
df = cudf.DataFrame(X)
|
||||
predt_3 = reg.predict(df)
|
||||
|
||||
X_csr = csr_matrix(X)
|
||||
predt_4 = reg.predict(X_csr)
|
||||
|
||||
np.testing.assert_allclose(predt_0, predt_1)
|
||||
np.testing.assert_allclose(predt_0, predt_2)
|
||||
np.testing.assert_allclose(predt_0, predt_3)
|
||||
np.testing.assert_allclose(predt_0, predt_4)
|
||||
|
||||
def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
|
||||
import cupy as cp
|
||||
|
||||
@ -175,7 +218,9 @@ class TestGPUPredict:
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
|
||||
booster = xgb.train(
|
||||
{"tree_method": "gpu_hist", "gpu_id": device}, dtrain, num_boost_round=10
|
||||
{"tree_method": "hist", "device": f"cuda:{device}"},
|
||||
dtrain,
|
||||
num_boost_round=10,
|
||||
)
|
||||
|
||||
test = xgb.DMatrix(X[:10, ...], missing=missing)
|
||||
@ -208,13 +253,13 @@ class TestGPUPredict:
|
||||
missing_idx = [i for i in range(0, X.shape[1], 16)]
|
||||
X[:, missing_idx] = missing
|
||||
reg = xgb.XGBRegressor(
|
||||
tree_method="gpu_hist", n_estimators=8, missing=missing, gpu_id=device
|
||||
tree_method="hist", n_estimators=8, missing=missing, device=f"cuda:{device}"
|
||||
)
|
||||
reg.fit(X, y)
|
||||
|
||||
reg = tm.set_ordinal(device, reg)
|
||||
reg.set_params(device=f"cuda:{device}")
|
||||
gpu_predt = reg.predict(X)
|
||||
reg = tm.set_ordinal(-1, reg)
|
||||
reg = reg.set_params(device="cpu")
|
||||
cpu_predt = reg.predict(cp.asnumpy(X))
|
||||
np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)
|
||||
cp.cuda.runtime.setDevice(0)
|
||||
@ -250,7 +295,9 @@ class TestGPUPredict:
|
||||
|
||||
dtrain = xgb.DMatrix(X, y)
|
||||
|
||||
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain, num_boost_round=10)
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "device": "cuda:0"}, dtrain, num_boost_round=10
|
||||
)
|
||||
test = xgb.DMatrix(X)
|
||||
predt_from_array = booster.inplace_predict(X)
|
||||
predt_from_dmatrix = booster.predict(test)
|
||||
@ -280,12 +327,12 @@ class TestGPUPredict:
|
||||
def test_shap(self, num_rounds, dataset, param):
|
||||
if dataset.name.endswith("-l1"): # not supported by the exact tree method
|
||||
return
|
||||
param.update({"tree_method": "gpu_hist", "gpu_id": 0})
|
||||
param.update({"tree_method": "hist", "device": "gpu:0"})
|
||||
param = dataset.set_params(param)
|
||||
dmat = dataset.get_dmat()
|
||||
bst = xgb.train(param, dmat, num_rounds)
|
||||
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
|
||||
bst = tm.set_ordinal(0, bst)
|
||||
bst.set_param({"device": "gpu:0"})
|
||||
shap = bst.predict(test_dmat, pred_contribs=True)
|
||||
margin = bst.predict(test_dmat, output_margin=True)
|
||||
assume(len(dataset.y) > 0)
|
||||
@ -298,12 +345,12 @@ class TestGPUPredict:
|
||||
def test_shap_interactions(self, num_rounds, dataset, param):
|
||||
if dataset.name.endswith("-l1"): # not supported by the exact tree method
|
||||
return
|
||||
param.update({"tree_method": "hist", "gpu_id": 0})
|
||||
param.update({"tree_method": "hist", "device": "cuda:0"})
|
||||
param = dataset.set_params(param)
|
||||
dmat = dataset.get_dmat()
|
||||
bst = xgb.train(param, dmat, num_rounds)
|
||||
test_dmat = xgb.DMatrix(dataset.X, dataset.y, dataset.w, dataset.margin)
|
||||
bst = tm.set_ordinal(0, bst)
|
||||
bst.set_param({"device": "cuda:0"})
|
||||
shap = bst.predict(test_dmat, pred_interactions=True)
|
||||
margin = bst.predict(test_dmat, output_margin=True)
|
||||
assume(len(dataset.y) > 0)
|
||||
@ -317,16 +364,18 @@ class TestGPUPredict:
|
||||
def test_shap_categorical(self):
|
||||
X, y = tm.make_categorical(100, 20, 7, False)
|
||||
Xy = xgb.DMatrix(X, y, enable_categorical=True)
|
||||
booster = xgb.train({"tree_method": "gpu_hist"}, Xy, num_boost_round=10)
|
||||
booster = xgb.train(
|
||||
{"tree_method": "hist", "device": "gpu:0"}, Xy, num_boost_round=10
|
||||
)
|
||||
|
||||
booster = tm.set_ordinal(0, booster)
|
||||
booster.set_param({"device": "cuda:0"})
|
||||
shap = booster.predict(Xy, pred_contribs=True)
|
||||
margin = booster.predict(Xy, output_margin=True)
|
||||
np.testing.assert_allclose(
|
||||
np.sum(shap, axis=len(shap.shape) - 1), margin, rtol=1e-3
|
||||
)
|
||||
|
||||
booster = tm.set_ordinal(-1, booster)
|
||||
booster.set_param({"device": "cpu"})
|
||||
shap = booster.predict(Xy, pred_contribs=True)
|
||||
margin = booster.predict(Xy, output_margin=True)
|
||||
np.testing.assert_allclose(
|
||||
@ -334,8 +383,8 @@ class TestGPUPredict:
|
||||
)
|
||||
|
||||
def test_predict_leaf_basic(self):
|
||||
gpu_leaf = run_predict_leaf(0)
|
||||
cpu_leaf = run_predict_leaf(-1)
|
||||
gpu_leaf = run_predict_leaf("gpu:0")
|
||||
cpu_leaf = run_predict_leaf("cpu")
|
||||
np.testing.assert_equal(gpu_leaf, cpu_leaf)
|
||||
|
||||
def run_predict_leaf_booster(self, param, num_rounds, dataset):
|
||||
@ -344,23 +393,22 @@ class TestGPUPredict:
|
||||
booster = xgb.train(
|
||||
param, dtrain=dataset.get_dmat(), num_boost_round=num_rounds
|
||||
)
|
||||
booster = tm.set_ordinal(-1, booster)
|
||||
booster.set_param({"device": "cpu"})
|
||||
cpu_leaf = booster.predict(m, pred_leaf=True)
|
||||
|
||||
booster = tm.set_ordinal(0, booster)
|
||||
booster.set_param({"device": "cuda:0"})
|
||||
gpu_leaf = booster.predict(m, pred_leaf=True)
|
||||
|
||||
np.testing.assert_equal(cpu_leaf, gpu_leaf)
|
||||
|
||||
@given(predict_parameter_strategy, tm.make_dataset_strategy())
|
||||
@settings(deadline=None, max_examples=20, print_blob=True)
|
||||
def test_predict_leaf_gbtree(self, param, dataset):
|
||||
def test_predict_leaf_gbtree(self, param: dict, dataset: tm.TestDataset) -> None:
|
||||
# Unsupported for random forest
|
||||
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
|
||||
return
|
||||
|
||||
param["booster"] = "gbtree"
|
||||
param["tree_method"] = "gpu_hist"
|
||||
param.update({"booster": "gbtree", "tree_method": "hist", "device": "cuda:0"})
|
||||
self.run_predict_leaf_booster(param, 10, dataset)
|
||||
|
||||
@given(predict_parameter_strategy, tm.make_dataset_strategy())
|
||||
@ -370,8 +418,7 @@ class TestGPUPredict:
|
||||
if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"):
|
||||
return
|
||||
|
||||
param["booster"] = "dart"
|
||||
param["tree_method"] = "gpu_hist"
|
||||
param.update({"booster": "dart", "tree_method": "hist", "device": "cuda:0"})
|
||||
self.run_predict_leaf_booster(param, 10, dataset)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@ -395,12 +442,12 @@ class TestGPUPredict:
|
||||
dtrain = xgb.DMatrix(df, label=y, enable_categorical=True)
|
||||
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"tree_method": "hist",
|
||||
"max_depth": 3,
|
||||
"learning_rate": 1.0,
|
||||
"base_score": 0.0,
|
||||
"eval_metric": "rmse",
|
||||
"gpu_id": "0",
|
||||
"device": "cuda:0",
|
||||
}
|
||||
|
||||
eval_history = {}
|
||||
@ -412,7 +459,7 @@ class TestGPUPredict:
|
||||
verbose_eval=False,
|
||||
evals_result=eval_history,
|
||||
)
|
||||
bst = tm.set_ordinal(0, bst)
|
||||
bst.set_param({"device": "cuda:0"})
|
||||
pred = bst.predict(dtrain)
|
||||
rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
|
||||
np.testing.assert_almost_equal(
|
||||
@ -434,14 +481,16 @@ class TestGPUPredict:
|
||||
Xy = xgb.DMatrix(X, y)
|
||||
if n_classes == 2:
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"tree_method": "hist",
|
||||
"device": "cuda:0",
|
||||
"booster": "dart",
|
||||
"rate_drop": 0.5,
|
||||
"objective": "binary:logistic",
|
||||
}
|
||||
else:
|
||||
params = {
|
||||
"tree_method": "gpu_hist",
|
||||
"tree_method": "hist",
|
||||
"device": "cuda:0",
|
||||
"booster": "dart",
|
||||
"rate_drop": 0.5,
|
||||
"objective": "multi:softprob",
|
||||
@ -455,7 +504,7 @@ class TestGPUPredict:
|
||||
copied = booster.predict(Xy)
|
||||
|
||||
# CPU
|
||||
booster = tm.set_ordinal(-1, booster)
|
||||
booster.set_param({"device": "cpu"})
|
||||
cpu_inplace = booster.inplace_predict(X_)
|
||||
cpu_copied = booster.predict(Xy)
|
||||
|
||||
@ -465,7 +514,7 @@ class TestGPUPredict:
|
||||
cp.testing.assert_allclose(inplace, copied, atol=1e-6)
|
||||
|
||||
# GPU
|
||||
booster = tm.set_ordinal(0, booster)
|
||||
booster.set_param({"device": "cuda:0"})
|
||||
inplace = booster.inplace_predict(X)
|
||||
copied = booster.predict(Xy)
|
||||
|
||||
@ -482,7 +531,7 @@ class TestGPUPredict:
|
||||
orig = rng.randint(low=0, high=127, size=rows * cols).reshape(rows, cols)
|
||||
y = rng.randint(low=0, high=127, size=rows)
|
||||
dtrain = xgb.DMatrix(orig, label=y)
|
||||
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain)
|
||||
booster = xgb.train({"tree_method": "hist", "device": "cuda:0"}, dtrain)
|
||||
|
||||
predt_orig = booster.inplace_predict(orig)
|
||||
# all primitive types in numpy
|
||||
|
||||
@ -28,7 +28,7 @@ def run_threaded_predict(X, rows, predict_func):
|
||||
assert f.result()
|
||||
|
||||
|
||||
def run_predict_leaf(gpu_id: int) -> np.ndarray:
|
||||
def run_predict_leaf(device: str) -> np.ndarray:
|
||||
rows = 100
|
||||
cols = 4
|
||||
classes = 5
|
||||
@ -48,7 +48,7 @@ def run_predict_leaf(gpu_id: int) -> np.ndarray:
|
||||
num_boost_round=num_boost_round,
|
||||
)
|
||||
|
||||
booster = tm.set_ordinal(gpu_id, booster)
|
||||
booster.set_param({"device": device})
|
||||
empty = xgb.DMatrix(np.ones(shape=(0, cols)))
|
||||
empty_leaf = booster.predict(empty, pred_leaf=True)
|
||||
assert empty_leaf.shape[0] == 0
|
||||
@ -74,14 +74,14 @@ def run_predict_leaf(gpu_id: int) -> np.ndarray:
|
||||
|
||||
# When there's only 1 tree, the output is a 1 dim vector
|
||||
booster = xgb.train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
|
||||
booster = tm.set_ordinal(gpu_id, booster)
|
||||
booster.set_param({"device": device})
|
||||
assert booster.predict(m, pred_leaf=True).shape == (rows,)
|
||||
|
||||
return leaf
|
||||
|
||||
|
||||
def test_predict_leaf() -> None:
|
||||
run_predict_leaf(-1)
|
||||
run_predict_leaf("cpu")
|
||||
|
||||
|
||||
def test_predict_shape():
|
||||
|
||||
@ -69,7 +69,7 @@ def run_dmatrix_ctor(is_feature_cols: bool, is_qdm: bool, on_gpu: bool) -> None:
|
||||
train_Xy, valid_Xy = create_dmatrix_from_partitions(
|
||||
iter(dfs),
|
||||
feature_cols,
|
||||
gpu_id=device_id,
|
||||
dev_ordinal=device_id,
|
||||
use_qdm=is_qdm,
|
||||
kwargs=kwargs,
|
||||
enable_sparse_data_optim=False,
|
||||
|
||||
@ -1025,6 +1025,7 @@ class XgboostLocalTest(SparkTestCase):
|
||||
self.assertTrue(hasattr(py_reg, "n_estimators"))
|
||||
self.assertEqual(py_reg.n_estimators.parent, py_reg.uid)
|
||||
self.assertFalse(hasattr(py_reg, "gpu_id"))
|
||||
self.assertFalse(hasattr(py_reg, "device"))
|
||||
self.assertEqual(py_reg.getOrDefault(py_reg.n_estimators), 100)
|
||||
self.assertEqual(py_reg.getOrDefault(py_reg.objective), "reg:squarederror")
|
||||
py_reg2 = SparkXGBRegressor(n_estimators=200)
|
||||
@ -1038,6 +1039,7 @@ class XgboostLocalTest(SparkTestCase):
|
||||
self.assertTrue(hasattr(py_cls, "n_estimators"))
|
||||
self.assertEqual(py_cls.n_estimators.parent, py_cls.uid)
|
||||
self.assertFalse(hasattr(py_cls, "gpu_id"))
|
||||
self.assertFalse(hasattr(py_cls, "device"))
|
||||
self.assertEqual(py_cls.getOrDefault(py_cls.n_estimators), 100)
|
||||
self.assertEqual(py_cls.getOrDefault(py_cls.objective), None)
|
||||
py_cls2 = SparkXGBClassifier(n_estimators=200)
|
||||
@ -1051,6 +1053,7 @@ class XgboostLocalTest(SparkTestCase):
|
||||
self.assertTrue(hasattr(py_cls, "n_estimators"))
|
||||
self.assertEqual(py_cls.n_estimators.parent, py_cls.uid)
|
||||
self.assertFalse(hasattr(py_cls, "gpu_id"))
|
||||
self.assertFalse(hasattr(py_cls, "device"))
|
||||
self.assertTrue(hasattr(py_cls, "arbitrary_params_dict"))
|
||||
expected_kwargs = {"sketch_eps": 0.03}
|
||||
self.assertEqual(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user