Use the new DeviceOrd in the linalg module. (#9527)

2023-08-29 13:37:29 +08:00
parent 942b957eef
commit ddf2e68821
43 changed files with 252 additions and 273 deletions
--- a/src/common/linalg_op.cuh
+++ b/src/common/linalg_op.cuh
@@ -13,7 +13,7 @@ namespace xgboost {
 namespace linalg {
 template <typename T, int32_t D, typename Fn>
 void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
-  dh::safe_cuda(cudaSetDevice(t.DeviceIdx()));
+  dh::safe_cuda(cudaSetDevice(t.Device().ordinal));
  static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value,
                "For function with return, use transform instead.");
  if (t.Contiguous()) {
--- a/src/common/ranking_utils.cu
+++ b/src/common/ranking_utils.cu
@@ -133,7 +133,7 @@ struct WeightOp {
 void RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
  CUDAContext const* cuctx = ctx->CUDACtx();

-  group_ptr_.SetDevice(ctx->gpu_id);
+  group_ptr_.SetDevice(ctx->Device());
  if (info.group_ptr_.empty()) {
    group_ptr_.Resize(2, 0);
    group_ptr_.HostVector()[1] = info.num_row_;
@@ -153,7 +153,7 @@ void RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
  max_group_size_ =
      thrust::reduce(cuctx->CTP(), it, it + n_groups, 0ul, thrust::maximum<std::size_t>{});

-  threads_group_ptr_.SetDevice(ctx->gpu_id);
+  threads_group_ptr_.SetDevice(ctx->Device());
  threads_group_ptr_.Resize(n_groups + 1, 0);
  auto d_threads_group_ptr = threads_group_ptr_.DeviceSpan();
  if (param_.HasTruncation()) {
@@ -168,7 +168,7 @@ void RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
    n_cuda_threads_ = info.num_row_ * param_.NumPair();
  }

-  sorted_idx_cache_.SetDevice(ctx->gpu_id);
+  sorted_idx_cache_.SetDevice(ctx->Device());
  sorted_idx_cache_.Resize(info.labels.Size(), 0);

  auto weight = common::MakeOptionalWeights(ctx, info.weights_);
@@ -187,18 +187,18 @@ common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const* ctx,

 void NDCGCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
  CUDAContext const* cuctx = ctx->CUDACtx();
-  auto labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
+  auto labels = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
  CheckNDCGLabels(this->Param(), labels, CheckNDCGOp{cuctx});

  auto d_group_ptr = this->DataGroupPtr(ctx);

  std::size_t n_groups = d_group_ptr.size() - 1;
  inv_idcg_ = linalg::Zeros<double>(ctx, n_groups);
-  auto d_inv_idcg = inv_idcg_.View(ctx->gpu_id);
+  auto d_inv_idcg = inv_idcg_.View(ctx->Device());
  cuda_impl::CalcQueriesInvIDCG(ctx, labels, d_group_ptr, d_inv_idcg, this->Param());
  CHECK_GE(this->Param().NumPair(), 1ul);

-  discounts_.SetDevice(ctx->gpu_id);
+  discounts_.SetDevice(ctx->Device());
  discounts_.Resize(MaxGroupSize());
  auto d_discount = discounts_.DeviceSpan();
  dh::LaunchN(MaxGroupSize(), cuctx->Stream(),
@@ -206,12 +206,12 @@ void NDCGCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
 }

 void PreCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
-  auto const d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
+  auto const d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
  CheckPreLabels("pre", d_label, CheckMAPOp{ctx->CUDACtx()});
 }

 void MAPCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
-  auto const d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
+  auto const d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
  CheckPreLabels("map", d_label, CheckMAPOp{ctx->CUDACtx()});
 }
 }  // namespace xgboost::ltr
--- a/src/common/ranking_utils.h
+++ b/src/common/ranking_utils.h
@@ -217,7 +217,7 @@ class RankingCache {
  }
  // Constructed as [1, n_samples] if group ptr is not supplied by the user
  common::Span<bst_group_t const> DataGroupPtr(Context const* ctx) const {
-    group_ptr_.SetDevice(ctx->gpu_id);
+    group_ptr_.SetDevice(ctx->Device());
    return ctx->IsCPU() ? group_ptr_.ConstHostSpan() : group_ptr_.ConstDeviceSpan();
  }

@@ -228,7 +228,7 @@ class RankingCache {
  // Create a rank list by model prediction
  common::Span<std::size_t const> SortedIdx(Context const* ctx, common::Span<float const> predt) {
    if (sorted_idx_cache_.Empty()) {
-      sorted_idx_cache_.SetDevice(ctx->gpu_id);
+      sorted_idx_cache_.SetDevice(ctx->Device());
      sorted_idx_cache_.Resize(predt.size());
    }
    if (ctx->IsCPU()) {
@@ -242,7 +242,7 @@ class RankingCache {
  common::Span<std::size_t> SortedIdxY(Context const* ctx, std::size_t n_samples) {
    CHECK(ctx->IsCUDA()) << error::InvalidCUDAOrdinal();
    if (y_sorted_idx_cache_.Empty()) {
-      y_sorted_idx_cache_.SetDevice(ctx->gpu_id);
+      y_sorted_idx_cache_.SetDevice(ctx->Device());
      y_sorted_idx_cache_.Resize(n_samples);
    }
    return y_sorted_idx_cache_.DeviceSpan();
@@ -250,7 +250,7 @@ class RankingCache {
  common::Span<float> RankedY(Context const* ctx, std::size_t n_samples) {
    CHECK(ctx->IsCUDA()) << error::InvalidCUDAOrdinal();
    if (y_ranked_by_model_.Empty()) {
-      y_ranked_by_model_.SetDevice(ctx->gpu_id);
+      y_ranked_by_model_.SetDevice(ctx->Device());
      y_ranked_by_model_.Resize(n_samples);
    }
    return y_ranked_by_model_.DeviceSpan();
@@ -266,21 +266,21 @@ class RankingCache {

  linalg::VectorView<GradientPair> CUDARounding(Context const* ctx) {
    if (roundings_.Size() == 0) {
-      roundings_.SetDevice(ctx->gpu_id);
+      roundings_.SetDevice(ctx->Device());
      roundings_.Reshape(Groups());
    }
-    return roundings_.View(ctx->gpu_id);
+    return roundings_.View(ctx->Device());
  }
  common::Span<double> CUDACostRounding(Context const* ctx) {
    if (cost_rounding_.Size() == 0) {
-      cost_rounding_.SetDevice(ctx->gpu_id);
+      cost_rounding_.SetDevice(ctx->Device());
      cost_rounding_.Resize(1);
    }
    return cost_rounding_.DeviceSpan();
  }
  template <typename Type>
  common::Span<Type> MaxLambdas(Context const* ctx, std::size_t n) {
-    max_lambdas_.SetDevice(ctx->gpu_id);
+    max_lambdas_.SetDevice(ctx->Device());
    std::size_t bytes = n * sizeof(Type);
    if (bytes != max_lambdas_.Size()) {
      max_lambdas_.Resize(bytes);
@@ -315,17 +315,17 @@ class NDCGCache : public RankingCache {
  }

  linalg::VectorView<double const> InvIDCG(Context const* ctx) const {
-    return inv_idcg_.View(ctx->gpu_id);
+    return inv_idcg_.View(ctx->Device());
  }
  common::Span<double const> Discount(Context const* ctx) const {
    return ctx->IsCPU() ? discounts_.ConstHostSpan() : discounts_.ConstDeviceSpan();
  }
  linalg::VectorView<double> Dcg(Context const* ctx) {
    if (dcg_.Size() == 0) {
-      dcg_.SetDevice(ctx->gpu_id);
+      dcg_.SetDevice(ctx->Device());
      dcg_.Reshape(this->Groups());
    }
-    return dcg_.View(ctx->gpu_id);
+    return dcg_.View(ctx->Device());
  }
 };

@@ -396,7 +396,7 @@ class PreCache : public RankingCache {

  common::Span<double> Pre(Context const* ctx) {
    if (pre_.Empty()) {
-      pre_.SetDevice(ctx->gpu_id);
+      pre_.SetDevice(ctx->Device());
      pre_.Resize(this->Groups());
    }
    return ctx->IsCPU() ? pre_.HostSpan() : pre_.DeviceSpan();
@@ -427,21 +427,21 @@ class MAPCache : public RankingCache {

  common::Span<double> NumRelevant(Context const* ctx) {
    if (n_rel_.Empty()) {
-      n_rel_.SetDevice(ctx->gpu_id);
+      n_rel_.SetDevice(ctx->Device());
      n_rel_.Resize(n_samples_);
    }
    return ctx->IsCPU() ? n_rel_.HostSpan() : n_rel_.DeviceSpan();
  }
  common::Span<double> Acc(Context const* ctx) {
    if (acc_.Empty()) {
-      acc_.SetDevice(ctx->gpu_id);
+      acc_.SetDevice(ctx->Device());
      acc_.Resize(n_samples_);
    }
    return ctx->IsCPU() ? acc_.HostSpan() : acc_.DeviceSpan();
  }
  common::Span<double> Map(Context const* ctx) {
    if (map_.Empty()) {
-      map_.SetDevice(ctx->gpu_id);
+      map_.SetDevice(ctx->Device());
      map_.Resize(this->Groups());
    }
    return ctx->IsCPU() ? map_.HostSpan() : map_.DeviceSpan();
--- a/src/common/stats.cc
+++ b/src/common/stats.cc
@@ -20,9 +20,9 @@ namespace common {
 void Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
            HostDeviceVector<float> const& weights, linalg::Tensor<float, 1>* out) {
  if (!ctx->IsCPU()) {
-    weights.SetDevice(ctx->gpu_id);
+    weights.SetDevice(ctx->Device());
    auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
-    auto t_v = t.View(ctx->gpu_id);
+    auto t_v = t.View(ctx->Device());
    cuda_impl::Median(ctx, t_v, opt_weights, out);
  }

@@ -59,7 +59,7 @@ void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<flo
    auto ret = std::accumulate(tloc.cbegin(), tloc.cend(), .0f);
    out->HostView()(0) = ret;
  } else {
-    cuda_impl::Mean(ctx, v.View(ctx->gpu_id), out->View(ctx->gpu_id));
+    cuda_impl::Mean(ctx, v.View(ctx->Device()), out->View(ctx->Device()));
  }
 }
 }  // namespace common