Define the new device parameter. (#9362)
This commit is contained in:
@@ -119,7 +119,7 @@ using bst_group_t = std::uint32_t; // NOLINT
|
||||
*/
|
||||
using bst_target_t = std::uint32_t; // NOLINT
|
||||
/**
|
||||
* brief Type for indexing boosted layers.
|
||||
* @brief Type for indexing boosted layers.
|
||||
*/
|
||||
using bst_layer_t = std::int32_t; // NOLINT
|
||||
/**
|
||||
|
||||
@@ -12,12 +12,18 @@
|
||||
#include <cstdint> // for int16_t, int32_t, int64_t
|
||||
#include <memory> // for shared_ptr
|
||||
#include <string> // for string, to_string
|
||||
#include <type_traits> // for invoke_result_t, is_same_v
|
||||
#include <type_traits> // for invoke_result_t, is_same_v, underlying_type_t
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
struct CUDAContext;
|
||||
|
||||
// symbolic names
|
||||
struct DeviceSym {
|
||||
static auto constexpr CPU() { return "cpu"; }
|
||||
static auto constexpr CUDA() { return "cuda"; }
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A type for device ordinal. The type is packed into 32-bit for efficient use in
|
||||
* viewing types like `linalg::TensorView`.
|
||||
@@ -59,9 +65,9 @@ struct DeviceOrd {
|
||||
[[nodiscard]] std::string Name() const {
|
||||
switch (device) {
|
||||
case DeviceOrd::kCPU:
|
||||
return "CPU";
|
||||
return DeviceSym::CPU();
|
||||
case DeviceOrd::kCUDA:
|
||||
return "CUDA:" + std::to_string(ordinal);
|
||||
return DeviceSym::CUDA() + (':' + std::to_string(ordinal));
|
||||
default: {
|
||||
LOG(FATAL) << "Unknown device.";
|
||||
return "";
|
||||
@@ -76,26 +82,39 @@ static_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));
|
||||
* @brief Runtime context for XGBoost. Contains information like threads and device.
|
||||
*/
|
||||
struct Context : public XGBoostParameter<Context> {
|
||||
private:
|
||||
std::string device{DeviceSym::CPU()}; // NOLINT
|
||||
// The device object for the current context. We are in the middle of replacing the
|
||||
// `gpu_id` with this device field.
|
||||
DeviceOrd device_{DeviceOrd::CPU()};
|
||||
|
||||
public:
|
||||
// Constant representing the device ID of CPU.
|
||||
static std::int32_t constexpr kCpuId = -1;
|
||||
static bst_d_ordinal_t constexpr kCpuId = -1;
|
||||
static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
|
||||
static std::int64_t constexpr kDefaultSeed = 0;
|
||||
|
||||
public:
|
||||
Context();
|
||||
|
||||
template <typename Container>
|
||||
Args UpdateAllowUnknown(Container const& kwargs) {
|
||||
auto args = XGBoostParameter<Context>::UpdateAllowUnknown(kwargs);
|
||||
this->SetDeviceOrdinal(kwargs);
|
||||
return args;
|
||||
}
|
||||
|
||||
std::int32_t gpu_id{kCpuId};
|
||||
// The number of threads to use if OpenMP is enabled. If equals 0, use the system default.
|
||||
std::int32_t nthread{0}; // NOLINT
|
||||
// stored random seed
|
||||
std::int64_t seed{kDefaultSeed};
|
||||
// whether seed the PRNG each iteration
|
||||
bool seed_per_iteration{false};
|
||||
// number of threads to use if OpenMP is enabled
|
||||
// if equals 0, use system default
|
||||
std::int32_t nthread{0};
|
||||
// primary device, -1 means no gpu.
|
||||
std::int32_t gpu_id{kCpuId};
|
||||
// fail when gpu_id is invalid
|
||||
bool fail_on_invalid_gpu_id{false};
|
||||
bool validate_parameters{false};
|
||||
|
||||
/**
|
||||
* @brief Configure the parameter `gpu_id'.
|
||||
*
|
||||
@@ -111,21 +130,19 @@ struct Context : public XGBoostParameter<Context> {
|
||||
/**
|
||||
* @brief Is XGBoost running on CPU?
|
||||
*/
|
||||
[[nodiscard]] bool IsCPU() const { return gpu_id == kCpuId; }
|
||||
[[nodiscard]] bool IsCPU() const { return Device().IsCPU(); }
|
||||
/**
|
||||
* @brief Is XGBoost running on a CUDA device?
|
||||
*/
|
||||
[[nodiscard]] bool IsCUDA() const { return !IsCPU(); }
|
||||
[[nodiscard]] bool IsCUDA() const { return Device().IsCUDA(); }
|
||||
/**
|
||||
* @brief Get the current device and ordinal.
|
||||
*/
|
||||
[[nodiscard]] DeviceOrd Device() const {
|
||||
return IsCPU() ? DeviceOrd::CPU() : DeviceOrd::CUDA(static_cast<bst_d_ordinal_t>(gpu_id));
|
||||
}
|
||||
[[nodiscard]] DeviceOrd Device() const { return device_; }
|
||||
/**
|
||||
* @brief Get the CUDA device ordinal. -1 if XGBoost is running on CPU.
|
||||
*/
|
||||
[[nodiscard]] bst_d_ordinal_t Ordinal() const { return this->gpu_id; }
|
||||
[[nodiscard]] bst_d_ordinal_t Ordinal() const { return Device().ordinal; }
|
||||
/**
|
||||
* @brief Name of the current device.
|
||||
*/
|
||||
@@ -134,24 +151,22 @@ struct Context : public XGBoostParameter<Context> {
|
||||
* @brief Get a CUDA device context for allocator and stream.
|
||||
*/
|
||||
[[nodiscard]] CUDAContext const* CUDACtx() const;
|
||||
|
||||
/**
|
||||
* @brief Make a CUDA context based on the current context.
|
||||
*
|
||||
* @param ordinal The CUDA device ordinal.
|
||||
*/
|
||||
[[nodiscard]] Context MakeCUDA(std::int32_t ordinal = 0) const {
|
||||
[[nodiscard]] Context MakeCUDA(bst_d_ordinal_t ordinal = 0) const {
|
||||
Context ctx = *this;
|
||||
CHECK_GE(ordinal, 0);
|
||||
ctx.gpu_id = ordinal;
|
||||
return ctx;
|
||||
return ctx.SetDevice(DeviceOrd::CUDA(ordinal));
|
||||
}
|
||||
/**
|
||||
* @brief Make a CPU context based on the current context.
|
||||
*/
|
||||
[[nodiscard]] Context MakeCPU() const {
|
||||
Context ctx = *this;
|
||||
ctx.gpu_id = kCpuId;
|
||||
return ctx;
|
||||
return ctx.SetDevice(DeviceOrd::CPU());
|
||||
}
|
||||
/**
|
||||
* @brief Call function based on the current device.
|
||||
@@ -167,7 +182,8 @@ struct Context : public XGBoostParameter<Context> {
|
||||
default:
|
||||
// Do not use the device name as this is likely an internal error, the name
|
||||
// wouldn't be valid.
|
||||
LOG(FATAL) << "Unknown device type:" << static_cast<std::int16_t>(this->Device().device);
|
||||
LOG(FATAL) << "Unknown device type:"
|
||||
<< static_cast<std::underlying_type_t<DeviceOrd::Type>>(this->Device().device);
|
||||
break;
|
||||
}
|
||||
return std::invoke_result_t<CPUFn>();
|
||||
@@ -182,11 +198,9 @@ struct Context : public XGBoostParameter<Context> {
|
||||
DMLC_DECLARE_FIELD(seed_per_iteration)
|
||||
.set_default(false)
|
||||
.describe("Seed PRNG determnisticly via iterator number.");
|
||||
DMLC_DECLARE_FIELD(device).set_default(DeviceSym::CPU()).describe("Device ordinal.");
|
||||
DMLC_DECLARE_FIELD(nthread).set_default(0).describe("Number of threads to use.");
|
||||
DMLC_DECLARE_ALIAS(nthread, n_jobs);
|
||||
|
||||
DMLC_DECLARE_FIELD(gpu_id).set_default(-1).set_lower_bound(-1).describe(
|
||||
"The primary GPU device ordinal.");
|
||||
DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
|
||||
.set_default(false)
|
||||
.describe("Fail with error when gpu_id is invalid.");
|
||||
@@ -196,6 +210,14 @@ struct Context : public XGBoostParameter<Context> {
|
||||
}
|
||||
|
||||
private:
|
||||
void SetDeviceOrdinal(Args const& kwargs);
|
||||
Context& SetDevice(DeviceOrd d) {
|
||||
this->device_ = d;
|
||||
this->gpu_id = d.ordinal; // this can be removed once we move away from `gpu_id`.
|
||||
this->device = d.Name();
|
||||
return *this;
|
||||
}
|
||||
|
||||
// mutable for lazy cuda context initialization. This avoids initializing CUDA at load.
|
||||
// shared_ptr is used instead of unique_ptr as with unique_ptr it's difficult to define
|
||||
// p_impl while trying to hide CUDA code from the host compiler.
|
||||
|
||||
@@ -664,11 +664,11 @@ Object ToJson(Parameter const& param) {
|
||||
template <typename Parameter>
|
||||
Args FromJson(Json const& obj, Parameter* param) {
|
||||
auto const& j_param = get<Object const>(obj);
|
||||
std::map<std::string, std::string> m;
|
||||
Args args;
|
||||
for (auto const& kv : j_param) {
|
||||
m[kv.first] = get<String const>(kv.second);
|
||||
args.emplace_back(kv.first, get<String const>(kv.second));
|
||||
}
|
||||
return param->UpdateAllowUnknown(m);
|
||||
return param->UpdateAllowUnknown(args);
|
||||
}
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_JSON_H_
|
||||
|
||||
@@ -110,15 +110,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
||||
* \param approx_contribs whether to approximate the feature contributions for speed
|
||||
* \param pred_interactions whether to compute the feature pair contributions
|
||||
*/
|
||||
virtual void Predict(std::shared_ptr<DMatrix> data,
|
||||
bool output_margin,
|
||||
HostDeviceVector<bst_float> *out_preds,
|
||||
unsigned layer_begin,
|
||||
unsigned layer_end,
|
||||
bool training = false,
|
||||
bool pred_leaf = false,
|
||||
bool pred_contribs = false,
|
||||
bool approx_contribs = false,
|
||||
virtual void Predict(std::shared_ptr<DMatrix> data, bool output_margin,
|
||||
HostDeviceVector<bst_float>* out_preds, bst_layer_t layer_begin,
|
||||
bst_layer_t layer_end, bool training = false, bool pred_leaf = false,
|
||||
bool pred_contribs = false, bool approx_contribs = false,
|
||||
bool pred_interactions = false) = 0;
|
||||
|
||||
/*!
|
||||
@@ -132,8 +127,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
|
||||
* \param layer_end End of booster layer. 0 means do not limit trees.
|
||||
*/
|
||||
virtual void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,
|
||||
HostDeviceVector<bst_float>** out_preds, uint32_t layer_begin,
|
||||
uint32_t layer_end) = 0;
|
||||
HostDeviceVector<float>** out_preds, bst_layer_t layer_begin,
|
||||
bst_layer_t layer_end) = 0;
|
||||
|
||||
/*!
|
||||
* \brief Calculate feature score. See doc in C API for outputs.
|
||||
|
||||
@@ -39,9 +39,8 @@ struct PredictionCacheEntry {
|
||||
*
|
||||
* \param v Added versions.
|
||||
*/
|
||||
void Update(std::uint32_t v) {
|
||||
version += v;
|
||||
}
|
||||
void Update(std::uint32_t v) { version += v; }
|
||||
void Reset() { version = 0; }
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user