Use the new DeviceOrd in the linalg module. (#9527)

2023-08-29 13:37:29 +08:00
parent 942b957eef
commit ddf2e68821
43 changed files with 252 additions and 273 deletions
--- a/src/metric/auc.cu
+++ b/src/metric/auc.cu
@@ -85,11 +85,11 @@ void InitCacheOnce(common::Span<float const> predts, std::shared_ptr<DeviceAUCCa
 template <typename Fn>
 std::tuple<double, double, double>
 GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
-             int32_t device, common::Span<size_t const> d_sorted_idx,
+             DeviceOrd device, common::Span<size_t const> d_sorted_idx,
             Fn area_fn, std::shared_ptr<DeviceAUCCache> cache) {
  auto labels = info.labels.View(device);
  auto weights = info.weights_.ConstDeviceSpan();
-  dh::safe_cuda(cudaSetDevice(device));
+  dh::safe_cuda(cudaSetDevice(device.ordinal));

  CHECK_NE(labels.Size(), 0);
  CHECK_EQ(labels.Size(), predts.size());
@@ -168,7 +168,7 @@ GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
 }

 std::tuple<double, double, double> GPUBinaryROCAUC(common::Span<float const> predts,
-                                                   MetaInfo const &info, std::int32_t device,
+                                                   MetaInfo const &info, DeviceOrd device,
                                                   std::shared_ptr<DeviceAUCCache> *p_cache) {
  auto &cache = *p_cache;
  InitCacheOnce<false>(predts, p_cache);
@@ -309,9 +309,10 @@ void SegmentedReduceAUC(common::Span<size_t const> d_unique_idx,
 * up each class in all kernels.
 */
 template <bool scale, typename Fn>
-double GPUMultiClassAUCOVR(MetaInfo const &info, int32_t device, common::Span<uint32_t> d_class_ptr,
-                           size_t n_classes, std::shared_ptr<DeviceAUCCache> cache, Fn area_fn) {
-  dh::safe_cuda(cudaSetDevice(device));
+double GPUMultiClassAUCOVR(MetaInfo const &info, DeviceOrd device,
+                           common::Span<uint32_t> d_class_ptr, size_t n_classes,
+                           std::shared_ptr<DeviceAUCCache> cache, Fn area_fn) {
+  dh::safe_cuda(cudaSetDevice(device.ordinal));
  /**
   * Sorted idx
   */
@@ -467,11 +468,12 @@ double GPUMultiClassROCAUC(Context const *ctx, common::Span<float const> predts,
  dh::TemporaryArray<uint32_t> class_ptr(n_classes + 1, 0);
  MultiClassSortedIdx(ctx, predts, dh::ToSpan(class_ptr), cache);

-  auto fn = [] XGBOOST_DEVICE(double fp_prev, double fp, double tp_prev,
-                              double tp, size_t /*class_id*/) {
+  auto fn = [] XGBOOST_DEVICE(double fp_prev, double fp, double tp_prev, double tp,
+                              size_t /*class_id*/) {
    return TrapezoidArea(fp_prev, fp, tp_prev, tp);
  };
-  return GPUMultiClassAUCOVR<true>(info, ctx->gpu_id, dh::ToSpan(class_ptr), n_classes, cache, fn);
+  return GPUMultiClassAUCOVR<true>(info, ctx->Device(), dh::ToSpan(class_ptr), n_classes, cache,
+                                   fn);
 }

 namespace {
@@ -512,7 +514,7 @@ std::pair<double, std::uint32_t> GPURankingAUC(Context const *ctx, common::Span<
  /**
   * Sort the labels
   */
-  auto d_labels = info.labels.View(ctx->gpu_id);
+  auto d_labels = info.labels.View(ctx->Device());

  auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);
  common::SegmentedArgSort<false, false>(ctx, d_labels.Values(), d_group_ptr, d_sorted_idx);
@@ -604,7 +606,7 @@ std::pair<double, std::uint32_t> GPURankingAUC(Context const *ctx, common::Span<
 }

 std::tuple<double, double, double> GPUBinaryPRAUC(common::Span<float const> predts,
-                                                  MetaInfo const &info, std::int32_t device,
+                                                  MetaInfo const &info, DeviceOrd device,
                                                  std::shared_ptr<DeviceAUCCache> *p_cache) {
  auto& cache = *p_cache;
  InitCacheOnce<false>(predts, p_cache);
@@ -662,7 +664,7 @@ double GPUMultiClassPRAUC(Context const *ctx, common::Span<float const> predts,
  /**
   * Get total positive/negative
   */
-  auto labels = info.labels.View(ctx->gpu_id);
+  auto labels = info.labels.View(ctx->Device());
  auto n_samples = info.num_row_;
  dh::caching_device_vector<Pair> totals(n_classes);
  auto key_it =
@@ -695,13 +697,13 @@ double GPUMultiClassPRAUC(Context const *ctx, common::Span<float const> predts,
    return detail::CalcDeltaPRAUC(fp_prev, fp, tp_prev, tp,
                                  d_totals[class_id].first);
  };
-  return GPUMultiClassAUCOVR<false>(info, ctx->gpu_id, d_class_ptr, n_classes, cache, fn);
+  return GPUMultiClassAUCOVR<false>(info, ctx->Device(), d_class_ptr, n_classes, cache, fn);
 }

 template <typename Fn>
 std::pair<double, uint32_t>
 GPURankingPRAUCImpl(common::Span<float const> predts, MetaInfo const &info,
-                    common::Span<uint32_t> d_group_ptr, int32_t device,
+                    common::Span<uint32_t> d_group_ptr, DeviceOrd device,
                    std::shared_ptr<DeviceAUCCache> cache, Fn area_fn) {
  /**
   * Sorted idx
@@ -843,7 +845,7 @@ std::pair<double, std::uint32_t> GPURankingPRAUC(Context const *ctx,
  common::SegmentedArgSort<false, false>(ctx, predts, d_group_ptr, d_sorted_idx);

  dh::XGBDeviceAllocator<char> alloc;
-  auto labels = info.labels.View(ctx->gpu_id);
+  auto labels = info.labels.View(ctx->Device());
  if (thrust::any_of(thrust::cuda::par(alloc), dh::tbegin(labels.Values()),
                     dh::tend(labels.Values()), PRAUCLabelInvalid{})) {
    InvalidLabels();
@@ -882,7 +884,7 @@ std::pair<double, std::uint32_t> GPURankingPRAUC(Context const *ctx,
    return detail::CalcDeltaPRAUC(fp_prev, fp, tp_prev, tp,
                                  d_totals[group_id].first);
  };
-  return GPURankingPRAUCImpl(predts, info, d_group_ptr, ctx->gpu_id, cache, fn);
+  return GPURankingPRAUCImpl(predts, info, d_group_ptr, ctx->Device(), cache, fn);
 }
 }  // namespace metric
 }  // namespace xgboost