Define the new device parameter. (#9362)

2023-07-13 19:30:25 +08:00
parent 2d0cd2817e
commit 04aff3af8e
63 changed files with 827 additions and 477 deletions
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -119,7 +119,7 @@ using bst_group_t = std::uint32_t;  // NOLINT
 */
 using bst_target_t = std::uint32_t;  // NOLINT
 /**
- * brief Type for indexing boosted layers.
+ * @brief Type for indexing boosted layers.
 */
 using bst_layer_t = std::int32_t;  // NOLINT
 /**
--- a/include/xgboost/context.h
+++ b/include/xgboost/context.h
@@ -12,12 +12,18 @@
 #include <cstdint>      // for int16_t, int32_t, int64_t
 #include <memory>       // for shared_ptr
 #include <string>       // for string, to_string
-#include <type_traits>  // for invoke_result_t, is_same_v
+#include <type_traits>  // for invoke_result_t, is_same_v, underlying_type_t

 namespace xgboost {

 struct CUDAContext;

+// symbolic names
+struct DeviceSym {
+  static auto constexpr CPU() { return "cpu"; }
+  static auto constexpr CUDA() { return "cuda"; }
+};
+
 /**
 * @brief A type for device ordinal. The type is packed into 32-bit for efficient use in
 *        viewing types like `linalg::TensorView`.
@@ -59,9 +65,9 @@ struct DeviceOrd {
  [[nodiscard]] std::string Name() const {
    switch (device) {
      case DeviceOrd::kCPU:
-        return "CPU";
+        return DeviceSym::CPU();
      case DeviceOrd::kCUDA:
-        return "CUDA:" + std::to_string(ordinal);
+        return DeviceSym::CUDA() + (':' + std::to_string(ordinal));
      default: {
        LOG(FATAL) << "Unknown device.";
        return "";
@@ -76,26 +82,39 @@ static_assert(sizeof(DeviceOrd) == sizeof(std::int32_t));
 * @brief Runtime context for XGBoost. Contains information like threads and device.
 */
 struct Context : public XGBoostParameter<Context> {
+ private:
+  std::string device{DeviceSym::CPU()};  // NOLINT
+  // The device object for the current context. We are in the middle of replacing the
+  // `gpu_id` with this device field.
+  DeviceOrd device_{DeviceOrd::CPU()};
+
 public:
  // Constant representing the device ID of CPU.
-  static std::int32_t constexpr kCpuId = -1;
+  static bst_d_ordinal_t constexpr kCpuId = -1;
+  static bst_d_ordinal_t constexpr InvalidOrdinal() { return -2; }
  static std::int64_t constexpr kDefaultSeed = 0;

 public:
  Context();

+  template <typename Container>
+  Args UpdateAllowUnknown(Container const& kwargs) {
+    auto args = XGBoostParameter<Context>::UpdateAllowUnknown(kwargs);
+    this->SetDeviceOrdinal(kwargs);
+    return args;
+  }
+
+  std::int32_t gpu_id{kCpuId};
+  // The number of threads to use if OpenMP is enabled. If equals 0, use the system default.
+  std::int32_t nthread{0};  // NOLINT
  // stored random seed
  std::int64_t seed{kDefaultSeed};
  // whether seed the PRNG each iteration
  bool seed_per_iteration{false};
-  // number of threads to use if OpenMP is enabled
-  // if equals 0, use system default
-  std::int32_t nthread{0};
-  // primary device, -1 means no gpu.
-  std::int32_t gpu_id{kCpuId};
  // fail when gpu_id is invalid
  bool fail_on_invalid_gpu_id{false};
  bool validate_parameters{false};
+
  /**
   * @brief Configure the parameter `gpu_id'.
   *
@@ -111,21 +130,19 @@ struct Context : public XGBoostParameter<Context> {
  /**
   * @brief Is XGBoost running on CPU?
   */
-  [[nodiscard]] bool IsCPU() const { return gpu_id == kCpuId; }
+  [[nodiscard]] bool IsCPU() const { return Device().IsCPU(); }
  /**
   * @brief Is XGBoost running on a CUDA device?
   */
-  [[nodiscard]] bool IsCUDA() const { return !IsCPU(); }
+  [[nodiscard]] bool IsCUDA() const { return Device().IsCUDA(); }
  /**
   * @brief Get the current device and ordinal.
   */
-  [[nodiscard]] DeviceOrd Device() const {
-    return IsCPU() ? DeviceOrd::CPU() : DeviceOrd::CUDA(static_cast<bst_d_ordinal_t>(gpu_id));
-  }
+  [[nodiscard]] DeviceOrd Device() const { return device_; }
  /**
   * @brief Get the CUDA device ordinal. -1 if XGBoost is running on CPU.
   */
-  [[nodiscard]] bst_d_ordinal_t Ordinal() const { return this->gpu_id; }
+  [[nodiscard]] bst_d_ordinal_t Ordinal() const { return Device().ordinal; }
  /**
   * @brief Name of the current device.
   */
@@ -134,24 +151,22 @@ struct Context : public XGBoostParameter<Context> {
   * @brief Get a CUDA device context for allocator and stream.
   */
  [[nodiscard]] CUDAContext const* CUDACtx() const;
+
  /**
   * @brief Make a CUDA context based on the current context.
   *
   * @param ordinal The CUDA device ordinal.
   */
-  [[nodiscard]] Context MakeCUDA(std::int32_t ordinal = 0) const {
+  [[nodiscard]] Context MakeCUDA(bst_d_ordinal_t ordinal = 0) const {
    Context ctx = *this;
-    CHECK_GE(ordinal, 0);
-    ctx.gpu_id = ordinal;
-    return ctx;
+    return ctx.SetDevice(DeviceOrd::CUDA(ordinal));
  }
  /**
   * @brief Make a CPU context based on the current context.
   */
  [[nodiscard]] Context MakeCPU() const {
    Context ctx = *this;
-    ctx.gpu_id = kCpuId;
-    return ctx;
+    return ctx.SetDevice(DeviceOrd::CPU());
  }
  /**
   * @brief Call function based on the current device.
@@ -167,7 +182,8 @@ struct Context : public XGBoostParameter<Context> {
      default:
        // Do not use the device name as this is likely an internal error, the name
        // wouldn't be valid.
-        LOG(FATAL) << "Unknown device type:" << static_cast<std::int16_t>(this->Device().device);
+        LOG(FATAL) << "Unknown device type:"
+                   << static_cast<std::underlying_type_t<DeviceOrd::Type>>(this->Device().device);
        break;
    }
    return std::invoke_result_t<CPUFn>();
@@ -182,11 +198,9 @@ struct Context : public XGBoostParameter<Context> {
    DMLC_DECLARE_FIELD(seed_per_iteration)
        .set_default(false)
        .describe("Seed PRNG determnisticly via iterator number.");
+    DMLC_DECLARE_FIELD(device).set_default(DeviceSym::CPU()).describe("Device ordinal.");
    DMLC_DECLARE_FIELD(nthread).set_default(0).describe("Number of threads to use.");
    DMLC_DECLARE_ALIAS(nthread, n_jobs);
-
-    DMLC_DECLARE_FIELD(gpu_id).set_default(-1).set_lower_bound(-1).describe(
-        "The primary GPU device ordinal.");
    DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
        .set_default(false)
        .describe("Fail with error when gpu_id is invalid.");
@@ -196,6 +210,14 @@ struct Context : public XGBoostParameter<Context> {
  }

 private:
+  void SetDeviceOrdinal(Args const& kwargs);
+  Context& SetDevice(DeviceOrd d) {
+    this->device_ = d;
+    this->gpu_id = d.ordinal;  // this can be removed once we move away from `gpu_id`.
+    this->device = d.Name();
+    return *this;
+  }
+
  // mutable for lazy cuda context initialization. This avoids initializing CUDA at load.
  // shared_ptr is used instead of unique_ptr as with unique_ptr it's difficult to define
  // p_impl while trying to hide CUDA code from the host compiler.
--- a/include/xgboost/json.h
+++ b/include/xgboost/json.h
@@ -664,11 +664,11 @@ Object ToJson(Parameter const& param) {
 template <typename Parameter>
 Args FromJson(Json const& obj, Parameter* param) {
  auto const& j_param = get<Object const>(obj);
-  std::map<std::string, std::string> m;
+  Args args;
  for (auto const& kv : j_param) {
-    m[kv.first] = get<String const>(kv.second);
+    args.emplace_back(kv.first, get<String const>(kv.second));
  }
-  return param->UpdateAllowUnknown(m);
+  return param->UpdateAllowUnknown(args);
 }
 }  // namespace xgboost
 #endif  // XGBOOST_JSON_H_
--- a/include/xgboost/learner.h
+++ b/include/xgboost/learner.h
@@ -110,15 +110,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   * \param approx_contribs whether to approximate the feature contributions for speed
   * \param pred_interactions whether to compute the feature pair contributions
   */
-  virtual void Predict(std::shared_ptr<DMatrix> data,
-                       bool output_margin,
-                       HostDeviceVector<bst_float> *out_preds,
-                       unsigned layer_begin,
-                       unsigned layer_end,
-                       bool training = false,
-                       bool pred_leaf = false,
-                       bool pred_contribs = false,
-                       bool approx_contribs = false,
+  virtual void Predict(std::shared_ptr<DMatrix> data, bool output_margin,
+                       HostDeviceVector<bst_float>* out_preds, bst_layer_t layer_begin,
+                       bst_layer_t layer_end, bool training = false, bool pred_leaf = false,
+                       bool pred_contribs = false, bool approx_contribs = false,
                       bool pred_interactions = false) = 0;

  /*!
@@ -132,8 +127,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
   * \param          layer_end   End of booster layer. 0 means do not limit trees.
   */
  virtual void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,
-                              HostDeviceVector<bst_float>** out_preds, uint32_t layer_begin,
-                              uint32_t layer_end) = 0;
+                              HostDeviceVector<float>** out_preds, bst_layer_t layer_begin,
+                              bst_layer_t layer_end) = 0;

  /*!
   * \brief Calculate feature score.  See doc in C API for outputs.
--- a/include/xgboost/predictor.h
+++ b/include/xgboost/predictor.h
@@ -39,9 +39,8 @@ struct PredictionCacheEntry {
   *
   * \param v Added versions.
   */
-  void Update(std::uint32_t v) {
-    version += v;
-  }
+  void Update(std::uint32_t v) { version += v; }
+  void Reset() { version = 0; }
 };

 /**