Define the new device parameter. (#9362)

This commit is contained in:
Jiaming Yuan
2023-07-13 19:30:25 +08:00
committed by GitHub
parent 2d0cd2817e
commit 04aff3af8e
63 changed files with 827 additions and 477 deletions

View File

@@ -119,7 +119,7 @@ using bst_group_t = std::uint32_t; // NOLINT
*/
using bst_target_t = std::uint32_t; // NOLINT
/**
* brief Type for indexing boosted layers.
* @brief Type for indexing boosted layers.
*/
using bst_layer_t = std::int32_t; // NOLINT
/**

View File

@@ -12,12 +12,18 @@
#include <cstdint> // for int16_t, int32_t, int64_t
#include <memory> // for shared_ptr
#include <string> // for string, to_string
#include <type_traits> // for invoke_result_t, is_same_v
#include <type_traits> // for invoke_result_t, is_same_v, underlying_type_t
namespace xgboost {
struct CUDAContext;
// symbolic names
struct DeviceSym {
static auto constexpr CPU() { return "cpu"; }
static auto constexpr CUDA() { return "cuda"; }
};
/**
* @brief A type for device ordinal. The type is packed into 32-bit for efficient use in
* viewing types like `linalg::TensorView`.
@@ -59,9 +65,9 @@ struct DeviceOrd {
[[nodiscard]] std::string Name() const {
switch (device) {
case DeviceOrd::kCPU:
return "CPU";
return DeviceSym::CPU();
case DeviceOrd::kCUDA:
return "CUDA:" + std::to_string(ordinal);
return DeviceSym::CUDA() + (':' + std::to_string(ordinal));
default: {
LOG(FATAL) << "Unknown device.";
return "";
@@ -76,26 +82,39 @@ static_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));
* @brief Runtime context for XGBoost. Contains information like threads and device.
*/
struct Context : public XGBoostParameter<Context> {
private:
std::string device{DeviceSym::CPU()}; // NOLINT
// The device object for the current context. We are in the middle of replacing the
// `gpu_id` with this device field.
DeviceOrd device_{DeviceOrd::CPU()};
public:
// Constant representing the device ID of CPU.
static std::int32_t constexpr kCpuId = -1;
static bst_d_ordinal_t constexpr kCpuId = -1;
static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
static std::int64_t constexpr kDefaultSeed = 0;
public:
Context();
template <typename Container>
Args UpdateAllowUnknown(Container const& kwargs) {
auto args = XGBoostParameter<Context>::UpdateAllowUnknown(kwargs);
this->SetDeviceOrdinal(kwargs);
return args;
}
std::int32_t gpu_id{kCpuId};
// The number of threads to use if OpenMP is enabled. If equals 0, use the system default.
std::int32_t nthread{0}; // NOLINT
// stored random seed
std::int64_t seed{kDefaultSeed};
// whether seed the PRNG each iteration
bool seed_per_iteration{false};
// number of threads to use if OpenMP is enabled
// if equals 0, use system default
std::int32_t nthread{0};
// primary device, -1 means no gpu.
std::int32_t gpu_id{kCpuId};
// fail when gpu_id is invalid
bool fail_on_invalid_gpu_id{false};
bool validate_parameters{false};
/**
* @brief Configure the parameter `gpu_id'.
*
@@ -111,21 +130,19 @@ struct Context : public XGBoostParameter<Context> {
/**
* @brief Is XGBoost running on CPU?
*/
[[nodiscard]] bool IsCPU() const { return gpu_id == kCpuId; }
[[nodiscard]] bool IsCPU() const { return Device().IsCPU(); }
/**
* @brief Is XGBoost running on a CUDA device?
*/
[[nodiscard]] bool IsCUDA() const { return !IsCPU(); }
[[nodiscard]] bool IsCUDA() const { return Device().IsCUDA(); }
/**
* @brief Get the current device and ordinal.
*/
[[nodiscard]] DeviceOrd Device() const {
return IsCPU() ? DeviceOrd::CPU() : DeviceOrd::CUDA(static_cast<bst_d_ordinal_t>(gpu_id));
}
[[nodiscard]] DeviceOrd Device() const { return device_; }
/**
* @brief Get the CUDA device ordinal. -1 if XGBoost is running on CPU.
*/
[[nodiscard]] bst_d_ordinal_t Ordinal() const { return this->gpu_id; }
[[nodiscard]] bst_d_ordinal_t Ordinal() const { return Device().ordinal; }
/**
* @brief Name of the current device.
*/
@@ -134,24 +151,22 @@ struct Context : public XGBoostParameter<Context> {
* @brief Get a CUDA device context for allocator and stream.
*/
[[nodiscard]] CUDAContext const* CUDACtx() const;
/**
* @brief Make a CUDA context based on the current context.
*
* @param ordinal The CUDA device ordinal.
*/
[[nodiscard]] Context MakeCUDA(std::int32_t ordinal = 0) const {
[[nodiscard]] Context MakeCUDA(bst_d_ordinal_t ordinal = 0) const {
Context ctx = *this;
CHECK_GE(ordinal, 0);
ctx.gpu_id = ordinal;
return ctx;
return ctx.SetDevice(DeviceOrd::CUDA(ordinal));
}
/**
* @brief Make a CPU context based on the current context.
*/
[[nodiscard]] Context MakeCPU() const {
Context ctx = *this;
ctx.gpu_id = kCpuId;
return ctx;
return ctx.SetDevice(DeviceOrd::CPU());
}
/**
* @brief Call function based on the current device.
@@ -167,7 +182,8 @@ struct Context : public XGBoostParameter<Context> {
default:
// Do not use the device name as this is likely an internal error, the name
// wouldn't be valid.
LOG(FATAL) << "Unknown device type:" << static_cast<std::int16_t>(this->Device().device);
LOG(FATAL) << "Unknown device type:"
<< static_cast<std::underlying_type_t<DeviceOrd::Type>>(this->Device().device);
break;
}
return std::invoke_result_t<CPUFn>();
@@ -182,11 +198,9 @@ struct Context : public XGBoostParameter<Context> {
DMLC_DECLARE_FIELD(seed_per_iteration)
.set_default(false)
.describe("Seed PRNG determnisticly via iterator number.");
DMLC_DECLARE_FIELD(device).set_default(DeviceSym::CPU()).describe("Device ordinal.");
DMLC_DECLARE_FIELD(nthread).set_default(0).describe("Number of threads to use.");
DMLC_DECLARE_ALIAS(nthread, n_jobs);
DMLC_DECLARE_FIELD(gpu_id).set_default(-1).set_lower_bound(-1).describe(
"The primary GPU device ordinal.");
DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
.set_default(false)
.describe("Fail with error when gpu_id is invalid.");
@@ -196,6 +210,14 @@ struct Context : public XGBoostParameter<Context> {
}
private:
void SetDeviceOrdinal(Args const& kwargs);
Context& SetDevice(DeviceOrd d) {
this->device_ = d;
this->gpu_id = d.ordinal; // this can be removed once we move away from `gpu_id`.
this->device = d.Name();
return *this;
}
// mutable for lazy cuda context initialization. This avoids initializing CUDA at load.
// shared_ptr is used instead of unique_ptr as with unique_ptr it's difficult to define
// p_impl while trying to hide CUDA code from the host compiler.

View File

@@ -664,11 +664,11 @@ Object ToJson(Parameter const& param) {
template <typename Parameter>
Args FromJson(Json const& obj, Parameter* param) {
auto const& j_param = get<Object const>(obj);
std::map<std::string, std::string> m;
Args args;
for (auto const& kv : j_param) {
m[kv.first] = get<String const>(kv.second);
args.emplace_back(kv.first, get<String const>(kv.second));
}
return param->UpdateAllowUnknown(m);
return param->UpdateAllowUnknown(args);
}
} // namespace xgboost
#endif // XGBOOST_JSON_H_

View File

@@ -110,15 +110,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \param approx_contribs whether to approximate the feature contributions for speed
* \param pred_interactions whether to compute the feature pair contributions
*/
virtual void Predict(std::shared_ptr<DMatrix> data,
bool output_margin,
HostDeviceVector<bst_float> *out_preds,
unsigned layer_begin,
unsigned layer_end,
bool training = false,
bool pred_leaf = false,
bool pred_contribs = false,
bool approx_contribs = false,
virtual void Predict(std::shared_ptr<DMatrix> data, bool output_margin,
HostDeviceVector<bst_float>* out_preds, bst_layer_t layer_begin,
bst_layer_t layer_end, bool training = false, bool pred_leaf = false,
bool pred_contribs = false, bool approx_contribs = false,
bool pred_interactions = false) = 0;
/*!
@@ -132,8 +127,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \param layer_end End of booster layer. 0 means do not limit trees.
*/
virtual void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,
HostDeviceVector<bst_float>** out_preds, uint32_t layer_begin,
uint32_t layer_end) = 0;
HostDeviceVector<float>** out_preds, bst_layer_t layer_begin,
bst_layer_t layer_end) = 0;
/*!
* \brief Calculate feature score. See doc in C API for outputs.

View File

@@ -39,9 +39,8 @@ struct PredictionCacheEntry {
*
* \param v Added versions.
*/
void Update(std::uint32_t v) {
version += v;
}
void Update(std::uint32_t v) { version += v; }
void Reset() { version = 0; }
};
/**