[breaking] Remove the predictor param, allow fallback to prediction using DMatrix. (#9129)
- A `DeviceOrd` struct is implemented to indicate the device. It will eventually replace the `gpu_id` parameter. - The `predictor` parameter is removed. - Fallback to `DMatrix` when `inplace_predict` is not available. - The heuristic for choosing a predictor is only used during training.
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
#include <dmlc/omp.h>
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
@@ -112,7 +113,7 @@ using bst_row_t = std::size_t; // NOLINT
|
||||
/*! \brief Type for tree node index. */
|
||||
using bst_node_t = std::int32_t; // NOLINT
|
||||
/*! \brief Type for ranking group index. */
|
||||
using bst_group_t = std::uint32_t; // NOLINT
|
||||
using bst_group_t = std::uint32_t; // NOLINT
|
||||
/**
|
||||
* \brief Type for indexing into output targets.
|
||||
*/
|
||||
@@ -125,6 +126,10 @@ using bst_layer_t = std::int32_t; // NOLINT
|
||||
* \brief Type for indexing trees.
|
||||
*/
|
||||
using bst_tree_t = std::int32_t; // NOLINT
|
||||
/**
|
||||
* @brief Ordinal of a CUDA device.
|
||||
*/
|
||||
using bst_d_ordinal_t = std::int16_t; // NOLINT
|
||||
|
||||
namespace detail {
|
||||
/*! \brief Implementation of gradient statistics pair. Template specialisation
|
||||
|
||||
@@ -1067,6 +1067,9 @@ XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle, DMatrixHandle dmat
|
||||
/**
|
||||
* \brief Inplace prediction from CPU dense matrix.
|
||||
*
|
||||
* \note If the booster is configured to run on a CUDA device, XGBoost falls back to run
|
||||
* prediction with DMatrix with a performance warning.
|
||||
*
|
||||
* \param handle Booster handle.
|
||||
* \param values JSON encoded __array_interface__ to values.
|
||||
* \param config See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
@@ -1091,6 +1094,9 @@ XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *values,
|
||||
/**
|
||||
* \brief Inplace prediction from CPU CSR matrix.
|
||||
*
|
||||
* \note If the booster is configured to run on a CUDA device, XGBoost falls back to run
|
||||
* prediction with DMatrix with a performance warning.
|
||||
*
|
||||
* \param handle Booster handle.
|
||||
* \param indptr JSON encoded __array_interface__ to row pointer in CSR.
|
||||
* \param indices JSON encoded __array_interface__ to column indices in CSR.
|
||||
@@ -1116,6 +1122,9 @@ XGB_DLL int XGBoosterPredictFromCSR(BoosterHandle handle, char const *indptr, ch
|
||||
/**
|
||||
* \brief Inplace prediction from CUDA Dense matrix (cupy in Python).
|
||||
*
|
||||
* \note If the booster is configured to run on a CPU, XGBoost falls back to run
|
||||
* prediction with DMatrix with a performance warning.
|
||||
*
|
||||
* \param handle Booster handle
|
||||
* \param values JSON encoded __cuda_array_interface__ to values.
|
||||
* \param config See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
@@ -1137,6 +1146,9 @@ XGB_DLL int XGBoosterPredictFromCudaArray(BoosterHandle handle, char const *valu
|
||||
/**
|
||||
* \brief Inplace prediction from CUDA dense dataframe (cuDF in Python).
|
||||
*
|
||||
* \note If the booster is configured to run on a CPU, XGBoost falls back to run
|
||||
* prediction with DMatrix with a performance warning.
|
||||
*
|
||||
* \param handle Booster handle
|
||||
* \param values List of __cuda_array_interface__ for all columns encoded in JSON list.
|
||||
* \param config See \ref XGBoosterPredictFromDMatrix for more info.
|
||||
|
||||
@@ -1,20 +1,79 @@
|
||||
/*!
|
||||
* Copyright 2014-2022 by Contributors
|
||||
/**
|
||||
* Copyright 2014-2023, XGBoost Contributors
|
||||
* \file context.h
|
||||
*/
|
||||
#ifndef XGBOOST_CONTEXT_H_
|
||||
#define XGBOOST_CONTEXT_H_
|
||||
|
||||
#include <xgboost/logging.h>
|
||||
#include <xgboost/parameter.h>
|
||||
#include <xgboost/base.h> // for bst_d_ordinal_t
|
||||
#include <xgboost/logging.h> // for CHECK_GE
|
||||
#include <xgboost/parameter.h> // for XGBoostParameter
|
||||
|
||||
#include <memory> // std::shared_ptr
|
||||
#include <string>
|
||||
#include <cstdint> // for int16_t, int32_t, int64_t
|
||||
#include <memory> // for shared_ptr
|
||||
#include <string> // for string, to_string
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
struct CUDAContext;
|
||||
|
||||
/**
|
||||
* @brief A type for device ordinal. The type is packed into 32-bit for efficient use in
|
||||
* viewing types like `linalg::TensorView`.
|
||||
*/
|
||||
struct DeviceOrd {
|
||||
enum Type : std::int16_t { kCPU = 0, kCUDA = 1 } device{kCPU};
|
||||
// CUDA device ordinal.
|
||||
bst_d_ordinal_t ordinal{-1};
|
||||
|
||||
[[nodiscard]] bool IsCUDA() const { return device == kCUDA; }
|
||||
[[nodiscard]] bool IsCPU() const { return device == kCPU; }
|
||||
|
||||
DeviceOrd() = default;
|
||||
constexpr DeviceOrd(Type type, bst_d_ordinal_t ord) : device{type}, ordinal{ord} {}
|
||||
|
||||
DeviceOrd(DeviceOrd const& that) = default;
|
||||
DeviceOrd& operator=(DeviceOrd const& that) = default;
|
||||
DeviceOrd(DeviceOrd&& that) = default;
|
||||
DeviceOrd& operator=(DeviceOrd&& that) = default;
|
||||
|
||||
/**
|
||||
* @brief Constructor for CPU.
|
||||
*/
|
||||
[[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, -1}; }
|
||||
/**
|
||||
* @brief Constructor for CUDA device.
|
||||
*
|
||||
* @param ordinal CUDA device ordinal.
|
||||
*/
|
||||
[[nodiscard]] static auto CUDA(bst_d_ordinal_t ordinal) { return DeviceOrd{kCUDA, ordinal}; }
|
||||
|
||||
[[nodiscard]] bool operator==(DeviceOrd const& that) const {
|
||||
return device == that.device && ordinal == that.ordinal;
|
||||
}
|
||||
[[nodiscard]] bool operator!=(DeviceOrd const& that) const { return !(*this == that); }
|
||||
/**
|
||||
* @brief Get a string representation of the device and the ordinal.
|
||||
*/
|
||||
[[nodiscard]] std::string Name() const {
|
||||
switch (device) {
|
||||
case DeviceOrd::kCPU:
|
||||
return "CPU";
|
||||
case DeviceOrd::kCUDA:
|
||||
return "CUDA:" + std::to_string(ordinal);
|
||||
default: {
|
||||
LOG(FATAL) << "Unknown device.";
|
||||
return "";
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));
|
||||
|
||||
/**
|
||||
* @brief Runtime context for XGBoost. Contains information like threads and device.
|
||||
*/
|
||||
struct Context : public XGBoostParameter<Context> {
|
||||
public:
|
||||
// Constant representing the device ID of CPU.
|
||||
@@ -36,29 +95,59 @@ struct Context : public XGBoostParameter<Context> {
|
||||
// fail when gpu_id is invalid
|
||||
bool fail_on_invalid_gpu_id{false};
|
||||
bool validate_parameters{false};
|
||||
|
||||
/*!
|
||||
* \brief Configure the parameter `gpu_id'.
|
||||
/**
|
||||
* @brief Configure the parameter `gpu_id'.
|
||||
*
|
||||
* \param require_gpu Whether GPU is explicitly required from user.
|
||||
* @param require_gpu Whether GPU is explicitly required by the user through other
|
||||
* configurations.
|
||||
*/
|
||||
void ConfigureGpuId(bool require_gpu);
|
||||
/*!
|
||||
* Return automatically chosen threads.
|
||||
/**
|
||||
* @brief Returns the automatically chosen number of threads based on the `nthread`
|
||||
* parameter and the system settting.
|
||||
*/
|
||||
std::int32_t Threads() const;
|
||||
|
||||
bool IsCPU() const { return gpu_id == kCpuId; }
|
||||
bool IsCUDA() const { return !IsCPU(); }
|
||||
|
||||
CUDAContext const* CUDACtx() const;
|
||||
// Make a CUDA context based on the current context.
|
||||
Context MakeCUDA(std::int32_t device = 0) const {
|
||||
[[nodiscard]] std::int32_t Threads() const;
|
||||
/**
|
||||
* @brief Is XGBoost running on CPU?
|
||||
*/
|
||||
[[nodiscard]] bool IsCPU() const { return gpu_id == kCpuId; }
|
||||
/**
|
||||
* @brief Is XGBoost running on a CUDA device?
|
||||
*/
|
||||
[[nodiscard]] bool IsCUDA() const { return !IsCPU(); }
|
||||
/**
|
||||
* @brief Get the current device and ordinal.
|
||||
*/
|
||||
[[nodiscard]] DeviceOrd Device() const {
|
||||
return IsCPU() ? DeviceOrd::CPU() : DeviceOrd::CUDA(static_cast<bst_d_ordinal_t>(gpu_id));
|
||||
}
|
||||
/**
|
||||
* @brief Get the CUDA device ordinal. -1 if XGBoost is running on CPU.
|
||||
*/
|
||||
[[nodiscard]] bst_d_ordinal_t Ordinal() const { return this->gpu_id; }
|
||||
/**
|
||||
* @brief Name of the current device.
|
||||
*/
|
||||
[[nodiscard]] std::string DeviceName() const { return Device().Name(); }
|
||||
/**
|
||||
* @brief Get a CUDA device context for allocator and stream.
|
||||
*/
|
||||
[[nodiscard]] CUDAContext const* CUDACtx() const;
|
||||
/**
|
||||
* @brief Make a CUDA context based on the current context.
|
||||
*
|
||||
* @param ordinal The CUDA device ordinal.
|
||||
*/
|
||||
[[nodiscard]] Context MakeCUDA(std::int32_t ordinal = 0) const {
|
||||
Context ctx = *this;
|
||||
ctx.gpu_id = device;
|
||||
CHECK_GE(ordinal, 0);
|
||||
ctx.gpu_id = ordinal;
|
||||
return ctx;
|
||||
}
|
||||
Context MakeCPU() const {
|
||||
/**
|
||||
* @brief Make a CPU context based on the current context.
|
||||
*/
|
||||
[[nodiscard]] Context MakeCPU() const {
|
||||
Context ctx = *this;
|
||||
ctx.gpu_id = kCpuId;
|
||||
return ctx;
|
||||
@@ -87,9 +176,9 @@ struct Context : public XGBoostParameter<Context> {
|
||||
}
|
||||
|
||||
private:
|
||||
// mutable for lazy initialization for cuda context to avoid initializing CUDA at load.
|
||||
// shared_ptr is used instead of unique_ptr as with unique_ptr it's difficult to define p_impl
|
||||
// while trying to hide CUDA code from host compiler.
|
||||
// mutable for lazy cuda context initialization. This avoids initializing CUDA at load.
|
||||
// shared_ptr is used instead of unique_ptr as with unique_ptr it's difficult to define
|
||||
// p_impl while trying to hide CUDA code from the host compiler.
|
||||
mutable std::shared_ptr<CUDAContext> cuctx_;
|
||||
// cached value for CFS CPU limit. (used in containerized env)
|
||||
std::int32_t cfs_cpu_count_; // NOLINT
|
||||
|
||||
@@ -149,18 +149,14 @@ class GradientBooster : public Model, public Configurable {
|
||||
* \param layer_begin Beginning of boosted tree layer used for prediction.
|
||||
* \param layer_end End of booster layer. 0 means do not limit trees.
|
||||
* \param approximate use a faster (inconsistent) approximation of SHAP values
|
||||
* \param condition condition on the condition_feature (0=no, -1=cond off, 1=cond on).
|
||||
* \param condition_feature feature to condition on (i.e. fix) during calculations
|
||||
*/
|
||||
virtual void PredictContribution(DMatrix* dmat,
|
||||
HostDeviceVector<bst_float>* out_contribs,
|
||||
unsigned layer_begin, unsigned layer_end,
|
||||
bool approximate = false, int condition = 0,
|
||||
unsigned condition_feature = 0) = 0;
|
||||
virtual void PredictContribution(DMatrix* dmat, HostDeviceVector<float>* out_contribs,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end,
|
||||
bool approximate = false) = 0;
|
||||
|
||||
virtual void PredictInteractionContributions(
|
||||
DMatrix *dmat, HostDeviceVector<bst_float> *out_contribs,
|
||||
unsigned layer_begin, unsigned layer_end, bool approximate) = 0;
|
||||
virtual void PredictInteractionContributions(DMatrix* dmat, HostDeviceVector<float>* out_contribs,
|
||||
bst_layer_t layer_begin, bst_layer_t layer_end,
|
||||
bool approximate) = 0;
|
||||
|
||||
/*!
|
||||
* \brief dump the model in the requested format
|
||||
|
||||
Reference in New Issue
Block a user