Use the new DeviceOrd in the linalg module. (#9527)

2023-08-29 13:37:29 +08:00
parent 942b957eef
commit ddf2e68821
43 changed files with 252 additions and 273 deletions
--- a/src/objective/adaptive.cu
+++ b/src/objective/adaptive.cu
@@ -19,7 +19,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
                          dh::device_vector<size_t>* p_ridx, HostDeviceVector<size_t>* p_nptr,
                          HostDeviceVector<bst_node_t>* p_nidx, RegTree const& tree) {
  // copy position to buffer
-  dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
+  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
  auto cuctx = ctx->CUDACtx();
  size_t n_samples = position.size();
  dh::device_vector<bst_node_t> sorted_position(position.size());
@@ -86,11 +86,11 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
   */
  auto& nidx = *p_nidx;
  auto& nptr = *p_nptr;
-  nidx.SetDevice(ctx->gpu_id);
+  nidx.SetDevice(ctx->Device());
  nidx.Resize(n_leaf);
  auto d_node_idx = nidx.DeviceSpan();

-  nptr.SetDevice(ctx->gpu_id);
+  nptr.SetDevice(ctx->Device());
  nptr.Resize(n_leaf + 1, 0);
  auto d_node_ptr = nptr.DeviceSpan();

@@ -142,7 +142,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
 void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
                          std::int32_t group_idx, MetaInfo const& info, float learning_rate,
                          HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
-  dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
+  dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
  dh::device_vector<size_t> ridx;
  HostDeviceVector<size_t> nptr;
  HostDeviceVector<bst_node_t> nidx;
@@ -155,13 +155,13 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
  }

  HostDeviceVector<float> quantiles;
-  predt.SetDevice(ctx->gpu_id);
+  predt.SetDevice(ctx->Device());

  auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), info.num_row_,
                                        predt.Size() / info.num_row_);
  CHECK_LT(group_idx, d_predt.Shape(1));
  auto t_predt = d_predt.Slice(linalg::All(), group_idx);
-  auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), IdxY(info, group_idx));
+  auto d_labels = info.labels.View(ctx->Device()).Slice(linalg::All(), IdxY(info, group_idx));

  auto d_row_index = dh::ToSpan(ridx);
  auto seg_beg = nptr.DevicePointer();
@@ -178,7 +178,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
  if (info.weights_.Empty()) {
    common::SegmentedQuantile(ctx, alpha, seg_beg, seg_end, val_beg, val_end, &quantiles);
  } else {
-    info.weights_.SetDevice(ctx->gpu_id);
+    info.weights_.SetDevice(ctx->Device());
    auto d_weights = info.weights_.ConstDeviceSpan();
    CHECK_EQ(d_weights.size(), d_row_index.size());
    auto w_it = thrust::make_permutation_iterator(dh::tcbegin(d_weights), dh::tcbegin(d_row_index));
--- a/src/objective/lambdarank_obj.cc
+++ b/src/objective/lambdarank_obj.cc
@@ -109,12 +109,12 @@ class LambdaRankObj : public FitIntercept {
    lj_.SetDevice(ctx_->gpu_id);

    if (ctx_->IsCPU()) {
-      cpu_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->gpu_id),
-                                             lj_full_.View(ctx_->gpu_id), &ti_plus_, &tj_minus_,
+      cpu_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->Device()),
+                                             lj_full_.View(ctx_->Device()), &ti_plus_, &tj_minus_,
                                             &li_, &lj_, p_cache_);
    } else {
-      cuda_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->gpu_id),
-                                              lj_full_.View(ctx_->gpu_id), &ti_plus_, &tj_minus_,
+      cuda_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->Device()),
+                                              lj_full_.View(ctx_->Device()), &ti_plus_, &tj_minus_,
                                              &li_, &lj_, p_cache_);
    }

@@ -354,9 +354,9 @@ class LambdaRankNDCG : public LambdaRankObj<LambdaRankNDCG, ltr::NDCGCache> {
                       const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {
    if (ctx_->IsCUDA()) {
      cuda_impl::LambdaRankGetGradientNDCG(
-          ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id),
-          tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id),
-          out_gpair);
+          ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
+          tj_minus_.View(ctx_->Device()), li_full_.View(ctx_->Device()),
+          lj_full_.View(ctx_->Device()), out_gpair);
      return;
    }

@@ -477,9 +477,9 @@ class LambdaRankMAP : public LambdaRankObj<LambdaRankMAP, ltr::MAPCache> {
    CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the MAP objective.";
    if (ctx_->IsCUDA()) {
      return cuda_impl::LambdaRankGetGradientMAP(
-          ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id),
-          tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id),
-          out_gpair);
+          ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
+          tj_minus_.View(ctx_->Device()), li_full_.View(ctx_->Device()),
+          lj_full_.View(ctx_->Device()), out_gpair);
    }

    auto gptr = p_cache_->DataGroupPtr(ctx_).data();
@@ -567,9 +567,9 @@ class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::Ranking
    CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the pairwise objective.";
    if (ctx_->IsCUDA()) {
      return cuda_impl::LambdaRankGetGradientPairwise(
-          ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id),
-          tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id),
-          out_gpair);
+          ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
+          tj_minus_.View(ctx_->Device()), li_full_.View(ctx_->Device()),
+          lj_full_.View(ctx_->Device()), out_gpair);
    }

    auto gptr = p_cache_->DataGroupPtr(ctx_);
--- a/src/objective/lambdarank_obj.cu
+++ b/src/objective/lambdarank_obj.cu
@@ -306,7 +306,7 @@ void Launch(Context const* ctx, std::int32_t iter, HostDeviceVector<float> const

  CHECK_NE(d_rounding.Size(), 0);

-  auto label = info.labels.View(ctx->gpu_id);
+  auto label = info.labels.View(ctx->Device());
  auto predts = preds.ConstDeviceSpan();
  auto gpairs = out_gpair->View(ctx->Device());
  thrust::fill_n(ctx->CUDACtx()->CTP(), gpairs.Values().data(), gpairs.Size(),
@@ -348,7 +348,7 @@ common::Span<std::size_t const> SortY(Context const* ctx, MetaInfo const& info,
                                      common::Span<std::size_t const> d_rank,
                                      std::shared_ptr<ltr::RankingCache> p_cache) {
  auto const d_group_ptr = p_cache->DataGroupPtr(ctx);
-  auto label = info.labels.View(ctx->gpu_id);
+  auto label = info.labels.View(ctx->Device());
  // The buffer for ranked y is necessary as cub segmented sort accepts only pointer.
  auto d_y_ranked = p_cache->RankedY(ctx, info.num_row_);
  thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), d_y_ranked.size(),
@@ -374,13 +374,13 @@ void LambdaRankGetGradientNDCG(Context const* ctx, std::int32_t iter,
                               linalg::VectorView<double> li, linalg::VectorView<double> lj,
                               linalg::Matrix<GradientPair>* out_gpair) {
  // boilerplate
-  std::int32_t device_id = ctx->gpu_id;
-  dh::safe_cuda(cudaSetDevice(device_id));
+  auto device = ctx->Device();
+  dh::safe_cuda(cudaSetDevice(device.ordinal));
  auto const d_inv_IDCG = p_cache->InvIDCG(ctx);
  auto const discount = p_cache->Discount(ctx);

-  info.labels.SetDevice(device_id);
-  preds.SetDevice(device_id);
+  info.labels.SetDevice(device);
+  preds.SetDevice(device);

  auto const exp_gain = p_cache->Param().ndcg_exp_gain;
  auto delta_ndcg = [=] XGBOOST_DEVICE(float y_high, float y_low, std::size_t rank_high,
@@ -403,7 +403,7 @@ void MAPStat(Context const* ctx, MetaInfo const& info, common::Span<std::size_t
  auto key_it = dh::MakeTransformIterator<std::size_t>(
      thrust::make_counting_iterator(0ul),
      [=] XGBOOST_DEVICE(std::size_t i) -> std::size_t { return dh::SegmentId(group_ptr, i); });
-  auto label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
+  auto label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
  auto const* cuctx = ctx->CUDACtx();

  {
@@ -442,11 +442,11 @@ void LambdaRankGetGradientMAP(Context const* ctx, std::int32_t iter,
                              linalg::VectorView<double const> tj_minus,  // input bias ratio
                              linalg::VectorView<double> li, linalg::VectorView<double> lj,
                              linalg::Matrix<GradientPair>* out_gpair) {
-  std::int32_t device_id = ctx->gpu_id;
-  dh::safe_cuda(cudaSetDevice(device_id));
+  auto device = ctx->Device();
+  dh::safe_cuda(cudaSetDevice(device.ordinal));

-  info.labels.SetDevice(device_id);
-  predt.SetDevice(device_id);
+  info.labels.SetDevice(device);
+  predt.SetDevice(device);

  CHECK(p_cache);

@@ -481,11 +481,11 @@ void LambdaRankGetGradientPairwise(Context const* ctx, std::int32_t iter,
                                   linalg::VectorView<double const> tj_minus,  // input bias ratio
                                   linalg::VectorView<double> li, linalg::VectorView<double> lj,
                                   linalg::Matrix<GradientPair>* out_gpair) {
-  std::int32_t device_id = ctx->gpu_id;
-  dh::safe_cuda(cudaSetDevice(device_id));
+  auto device = ctx->Device();
+  dh::safe_cuda(cudaSetDevice(device.ordinal));

-  info.labels.SetDevice(device_id);
-  predt.SetDevice(device_id);
+  info.labels.SetDevice(device);
+  predt.SetDevice(device);

  auto d_predt = predt.ConstDeviceSpan();
  auto const d_sorted_idx = p_cache->SortedIdx(ctx, d_predt);
@@ -517,11 +517,11 @@ void LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView<double
  auto const d_group_ptr = p_cache->DataGroupPtr(ctx);
  auto n_groups = d_group_ptr.size() - 1;

-  auto ti_plus = p_ti_plus->View(ctx->gpu_id);
-  auto tj_minus = p_tj_minus->View(ctx->gpu_id);
+  auto ti_plus = p_ti_plus->View(ctx->Device());
+  auto tj_minus = p_tj_minus->View(ctx->Device());

-  auto li = p_li->View(ctx->gpu_id);
-  auto lj = p_lj->View(ctx->gpu_id);
+  auto li = p_li->View(ctx->Device());
+  auto lj = p_lj->View(ctx->Device());
  CHECK_EQ(li.Size(), ti_plus.Size());

  auto const& param = p_cache->Param();
--- a/src/objective/quantile_obj.cu
+++ b/src/objective/quantile_obj.cu
@@ -62,7 +62,7 @@ class QuantileRegression : public ObjFunction {
    CHECK_GE(n_targets, n_alphas);
    CHECK_EQ(preds.Size(), info.num_row_ * n_targets);

-    auto labels = info.labels.View(ctx_->gpu_id);
+    auto labels = info.labels.View(ctx_->Device());

    out_gpair->SetDevice(ctx_->Device());
    CHECK_EQ(info.labels.Shape(1), 1)
@@ -131,7 +131,7 @@ class QuantileRegression : public ObjFunction {
 #if defined(XGBOOST_USE_CUDA)
      alpha_.SetDevice(ctx_->gpu_id);
      auto d_alpha = alpha_.ConstDeviceSpan();
-      auto d_labels = info.labels.View(ctx_->gpu_id);
+      auto d_labels = info.labels.View(ctx_->Device());
      auto seg_it = dh::MakeTransformIterator<std::size_t>(
          thrust::make_counting_iterator(0ul),
          [=] XGBOOST_DEVICE(std::size_t i) { return i * d_labels.Shape(0); });
--- a/src/objective/regression_obj.cu
+++ b/src/objective/regression_obj.cu
@@ -69,7 +69,7 @@ class RegLossObj : public FitIntercept {

 public:
  void ValidateLabel(MetaInfo const& info) {
-    auto label = info.labels.View(ctx_->Ordinal());
+    auto label = info.labels.View(ctx_->Device());
    auto valid = ctx_->DispatchDevice(
        [&] {
          return std::all_of(linalg::cbegin(label), linalg::cend(label),
@@ -244,7 +244,7 @@ class PseudoHuberRegression : public FitIntercept {
    CheckRegInputs(info, preds);
    auto slope = param_.huber_slope;
    CHECK_NE(slope, 0.0) << "slope for pseudo huber cannot be 0.";
-    auto labels = info.labels.View(ctx_->gpu_id);
+    auto labels = info.labels.View(ctx_->Device());

    out_gpair->SetDevice(ctx_->gpu_id);
    out_gpair->Reshape(info.num_row_, this->Targets(info));
@@ -698,7 +698,7 @@ class MeanAbsoluteError : public ObjFunction {
  void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info,
                   std::int32_t /*iter*/, linalg::Matrix<GradientPair>* out_gpair) override {
    CheckRegInputs(info, preds);
-    auto labels = info.labels.View(ctx_->gpu_id);
+    auto labels = info.labels.View(ctx_->Device());

    out_gpair->SetDevice(ctx_->Device());
    out_gpair->Reshape(info.num_row_, this->Targets(info));