Remove internal use of gpu_id. (#9568)

2023-09-20 23:29:51 +08:00
parent 38ac52dd87
commit 8c676c889d
121 changed files with 1012 additions and 1044 deletions
--- a/src/metric/auc.cc
+++ b/src/metric/auc.cc
@@ -23,8 +23,7 @@
 #include "xgboost/linalg.h"
 #include "xgboost/metric.h"

-namespace xgboost {
-namespace metric {
+namespace xgboost::metric {
 // tag the this file, used by force static link later.
 DMLC_REGISTRY_FILE_TAG(auc);
 /**
@@ -257,10 +256,10 @@ template <typename Curve>
 class EvalAUC : public MetricNoCache {
  double Eval(const HostDeviceVector<bst_float> &preds, const MetaInfo &info) override {
    double auc {0};
-    if (ctx_->gpu_id != Context::kCpuId) {
-      preds.SetDevice(ctx_->gpu_id);
-      info.labels.SetDevice(ctx_->gpu_id);
-      info.weights_.SetDevice(ctx_->gpu_id);
+    if (ctx_->Device().IsCUDA()) {
+      preds.SetDevice(ctx_->Device());
+      info.labels.SetDevice(ctx_->Device());
+      info.weights_.SetDevice(ctx_->Device());
    }
    //  We use the global size to handle empty dataset.
    std::array<size_t, 2> meta{info.labels.Size(), preds.Size()};
@@ -329,7 +328,7 @@ class EvalROCAUC : public EvalAUC<EvalROCAUC> {
    double auc{0};
    uint32_t valid_groups = 0;
    auto n_threads = ctx_->Threads();
-    if (ctx_->gpu_id == Context::kCpuId) {
+    if (ctx_->IsCPU()) {
      std::tie(auc, valid_groups) =
          RankingAUC<true>(ctx_, predts.ConstHostVector(), info, n_threads);
    } else {
@@ -344,7 +343,7 @@ class EvalROCAUC : public EvalAUC<EvalROCAUC> {
    double auc{0};
    auto n_threads = ctx_->Threads();
    CHECK_NE(n_classes, 0);
-    if (ctx_->gpu_id == Context::kCpuId) {
+    if (ctx_->IsCPU()) {
      auc = MultiClassOVR(ctx_, predts.ConstHostVector(), info, n_classes, n_threads, BinaryROCAUC);
    } else {
      auc = GPUMultiClassROCAUC(ctx_, predts.ConstDeviceSpan(), info, &this->d_cache_, n_classes);
@@ -355,7 +354,7 @@ class EvalROCAUC : public EvalAUC<EvalROCAUC> {
  std::tuple<double, double, double>
  EvalBinary(HostDeviceVector<float> const &predts, MetaInfo const &info) {
    double fp, tp, auc;
-    if (ctx_->gpu_id == Context::kCpuId) {
+    if (ctx_->IsCPU()) {
      std::tie(fp, tp, auc) = BinaryROCAUC(ctx_, predts.ConstHostVector(),
                                           info.labels.HostView().Slice(linalg::All(), 0),
                                           common::OptionalWeights{info.weights_.ConstHostSpan()});
@@ -367,7 +366,7 @@ class EvalROCAUC : public EvalAUC<EvalROCAUC> {
  }

 public:
-  char const* Name() const override {
+  [[nodiscard]] char const* Name() const override {
    return "auc";
  }
 };
@@ -405,7 +404,7 @@ class EvalPRAUC : public EvalAUC<EvalPRAUC> {
  std::tuple<double, double, double>
  EvalBinary(HostDeviceVector<float> const &predts, MetaInfo const &info) {
    double pr, re, auc;
-    if (ctx_->gpu_id == Context::kCpuId) {
+    if (ctx_->IsCPU()) {
      std::tie(pr, re, auc) =
          BinaryPRAUC(ctx_, predts.ConstHostSpan(), info.labels.HostView().Slice(linalg::All(), 0),
                      common::OptionalWeights{info.weights_.ConstHostSpan()});
@@ -418,7 +417,7 @@ class EvalPRAUC : public EvalAUC<EvalPRAUC> {

  double EvalMultiClass(HostDeviceVector<float> const &predts, MetaInfo const &info,
                        size_t n_classes) {
-    if (ctx_->gpu_id == Context::kCpuId) {
+    if (ctx_->IsCPU()) {
      auto n_threads = this->ctx_->Threads();
      return MultiClassOVR(ctx_, predts.ConstHostSpan(), info, n_classes, n_threads, BinaryPRAUC);
    } else {
@@ -431,7 +430,7 @@ class EvalPRAUC : public EvalAUC<EvalPRAUC> {
    double auc{0};
    uint32_t valid_groups = 0;
    auto n_threads = ctx_->Threads();
-    if (ctx_->gpu_id == Context::kCpuId) {
+    if (ctx_->IsCPU()) {
      auto labels = info.labels.Data()->ConstHostSpan();
      if (std::any_of(labels.cbegin(), labels.cend(), PRAUCLabelInvalid{})) {
        InvalidLabels();
@@ -446,7 +445,7 @@ class EvalPRAUC : public EvalAUC<EvalPRAUC> {
  }

 public:
-  const char *Name() const override { return "aucpr"; }
+  [[nodiscard]] const char *Name() const override { return "aucpr"; }
 };

 XGBOOST_REGISTER_METRIC(AUCPR, "aucpr")
@@ -473,5 +472,4 @@ std::pair<double, std::uint32_t> GPURankingPRAUC(Context const *, common::Span<f
  return {};
 }
 #endif
-}  // namespace metric
-}  // namespace xgboost
+}  // namespace xgboost::metric
--- a/src/metric/auc.cu
+++ b/src/metric/auc.cu
@@ -824,7 +824,7 @@ std::pair<double, std::uint32_t> GPURankingPRAUC(Context const *ctx,
                                                 common::Span<float const> predts,
                                                 MetaInfo const &info,
                                                 std::shared_ptr<DeviceAUCCache> *p_cache) {
-  dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
+  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
  if (predts.empty()) {
    return std::make_pair(0.0, static_cast<uint32_t>(0));
  }
--- a/src/metric/multiclass_metric.cu
+++ b/src/metric/multiclass_metric.cu
@@ -127,24 +127,24 @@ class MultiClassMetricsReduction {

 #endif  // XGBOOST_USE_CUDA

-  PackedReduceResult Reduce(const Context& tparam, int device, size_t n_class,
+  PackedReduceResult Reduce(const Context& ctx, DeviceOrd device, size_t n_class,
                            const HostDeviceVector<bst_float>& weights,
                            const HostDeviceVector<bst_float>& labels,
                            const HostDeviceVector<bst_float>& preds) {
    PackedReduceResult result;

-    if (device < 0) {
+    if (device.IsCPU()) {
      result =
-          CpuReduceMetrics(weights, labels, preds, n_class, tparam.Threads());
+          CpuReduceMetrics(weights, labels, preds, n_class, ctx.Threads());
    }
 #if defined(XGBOOST_USE_CUDA)
    else {  // NOLINT
-      device_ = tparam.gpu_id;
+      device_ = ctx.Device();
      preds.SetDevice(device_);
      labels.SetDevice(device_);
      weights.SetDevice(device_);

-      dh::safe_cuda(cudaSetDevice(device_));
+      dh::safe_cuda(cudaSetDevice(device_.ordinal));
      result = DeviceReduceMetrics(weights, labels, preds, n_class);
    }
 #endif  // defined(XGBOOST_USE_CUDA)
@@ -154,7 +154,7 @@ class MultiClassMetricsReduction {
 private:
 #if defined(XGBOOST_USE_CUDA)
  dh::PinnedMemory label_error_;
-  int device_{-1};
+  DeviceOrd device_{DeviceOrd::CPU()};
 #endif  // defined(XGBOOST_USE_CUDA)
 };

@@ -176,7 +176,7 @@ struct EvalMClassBase : public MetricNoCache {
      CHECK_GE(nclass, 1U)
          << "mlogloss and merror are only used for multi-class classification,"
          << " use logloss for binary classification";
-      int device = ctx_->gpu_id;
+      auto device = ctx_->Device();
      auto result =
          reducer_.Reduce(*ctx_, device, nclass, info.weights_, *info.labels.Data(), preds);
      dat[0] = result.Residue();
--- a/src/metric/rank_metric.cu
+++ b/src/metric/rank_metric.cu
@@ -35,7 +35,7 @@ PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,
  auto d_gptr = p_cache->DataGroupPtr(ctx);
  auto d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);

-  predt.SetDevice(ctx->gpu_id);
+  predt.SetDevice(ctx->Device());
  auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());
  auto topk = p_cache->Param().TopK();
  auto d_weight = common::MakeOptionalWeights(ctx, info.weights_);
@@ -90,7 +90,7 @@ PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
    CHECK_EQ(d_weight.weights.size(), p_cache->Groups());
  }
  auto d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
-  predt.SetDevice(ctx->gpu_id);
+  predt.SetDevice(ctx->Device());
  auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), predt.Size());

  auto d_group_ptr = p_cache->DataGroupPtr(ctx);
--- a/src/metric/survival_metric.cu
+++ b/src/metric/survival_metric.cu
@@ -130,18 +130,18 @@ class ElementWiseSurvivalMetricsReduction {
      const HostDeviceVector<bst_float>& preds) {
    PackedReduceResult result;

-    if (ctx.gpu_id < 0) {
+    if (ctx.IsCPU()) {
      result = CpuReduceMetrics(weights, labels_lower_bound, labels_upper_bound,
                                preds, ctx.Threads());
    }
 #if defined(XGBOOST_USE_CUDA)
    else {  // NOLINT
-      preds.SetDevice(ctx.gpu_id);
-      labels_lower_bound.SetDevice(ctx.gpu_id);
-      labels_upper_bound.SetDevice(ctx.gpu_id);
-      weights.SetDevice(ctx.gpu_id);
+      preds.SetDevice(ctx.Device());
+      labels_lower_bound.SetDevice(ctx.Device());
+      labels_upper_bound.SetDevice(ctx.Device());
+      weights.SetDevice(ctx.Device());

-      dh::safe_cuda(cudaSetDevice(ctx.gpu_id));
+      dh::safe_cuda(cudaSetDevice(ctx.Ordinal()));
      result = DeviceReduceMetrics(weights, labels_lower_bound, labels_upper_bound, preds);
    }
 #endif  // defined(XGBOOST_USE_CUDA)