Remove use of device_idx in dh::LaunchN. (#7063)

It's an unused parameter, removing it can make the CI log more readable.
2021-06-29 11:37:26 +08:00
parent dd4db347f3
commit 1c8fdf2218
25 changed files with 105 additions and 107 deletions
--- a/src/common/device_helpers.cuh
+++ b/src/common/device_helpers.cuh
@@ -279,7 +279,7 @@ class LaunchKernel {
 };

 template <int ITEMS_PER_THREAD = 8, int BLOCK_THREADS = 256, typename L>
-inline void LaunchN(int device_idx, size_t n, cudaStream_t stream, L lambda) {
+inline void LaunchN(size_t n, cudaStream_t stream, L lambda) {
  if (n == 0) {
    return;
  }
@@ -291,13 +291,13 @@ inline void LaunchN(int device_idx, size_t n, cudaStream_t stream, L lambda) {

 // Default stream version
 template <int ITEMS_PER_THREAD = 8, int BLOCK_THREADS = 256, typename L>
-inline void LaunchN(int device_idx, size_t n, L lambda) {
-  LaunchN<ITEMS_PER_THREAD, BLOCK_THREADS>(device_idx, n, nullptr, lambda);
+inline void LaunchN(size_t n, L lambda) {
+  LaunchN<ITEMS_PER_THREAD, BLOCK_THREADS>(n, nullptr, lambda);
 }

 template <typename Container>
-void Iota(Container array, int32_t device = CurrentDevice()) {
-  LaunchN(device, array.size(), [=] __device__(size_t i) { array[i] = i; });
+void Iota(Container array) {
+  LaunchN(array.size(), [=] __device__(size_t i) { array[i] = i; });
 }

 namespace detail {
@@ -539,7 +539,7 @@ class TemporaryArray {
    int device = 0;
    dh::safe_cuda(cudaGetDevice(&device));
    auto d_data = ptr_.get();
-    LaunchN(device, this->size(), [=] __device__(size_t idx) { d_data[idx] = val; });
+    LaunchN(this->size(), [=] __device__(size_t idx) { d_data[idx] = val; });
  }
  thrust::device_ptr<T> data() { return ptr_; }  // NOLINT
  size_t size() { return size_; }  // NOLINT
--- a/src/common/hist_util.cu
+++ b/src/common/hist_util.cu
@@ -159,7 +159,7 @@ void RemoveDuplicatedCategories(
  auto d_new_cuts_size = dh::ToSpan(new_cuts_size);
  auto d_new_columns_ptr = dh::ToSpan(new_column_scan);
  CHECK_EQ(new_column_scan.size(), new_cuts_size.size());
-  dh::LaunchN(device, new_column_scan.size(), [=] __device__(size_t idx) {
+  dh::LaunchN(new_column_scan.size(), [=] __device__(size_t idx) {
    d_old_column_sizes_scan[idx] = d_new_columns_ptr[idx];
    if (idx == d_new_columns_ptr.size() - 1) {
      return;
@@ -248,14 +248,14 @@ void ProcessWeightedBatch(int device, const SparsePage& page,
        << "Must have at least 1 group for ranking.";
    CHECK_EQ(weights.size(), d_group_ptr.size() - 1)
        << "Weight size should equal to number of groups.";
-    dh::LaunchN(device, temp_weights.size(), [=] __device__(size_t idx) {
+    dh::LaunchN(temp_weights.size(), [=] __device__(size_t idx) {
        size_t element_idx = idx + begin;
        size_t ridx = dh::SegmentId(row_ptrs, element_idx);
        bst_group_t group_idx = dh::SegmentId(d_group_ptr, ridx + base_rowid);
        d_temp_weights[idx] = weights[group_idx];
      });
  } else {
-    dh::LaunchN(device, temp_weights.size(), [=] __device__(size_t idx) {
+    dh::LaunchN(temp_weights.size(), [=] __device__(size_t idx) {
        size_t element_idx = idx + begin;
        size_t ridx = dh::SegmentId(row_ptrs, element_idx);
        d_temp_weights[idx] = weights[ridx + base_rowid];
--- a/src/common/hist_util.cuh
+++ b/src/common/hist_util.cuh
@@ -41,7 +41,7 @@ void GetColumnSizesScan(int device, size_t num_columns, size_t num_cuts_per_feat

  dh::XGBCachingDeviceAllocator<char> alloc;
  auto d_column_sizes_scan = column_sizes_scan->data().get();
-  dh::LaunchN(device, end - begin, [=] __device__(size_t idx) {
+  dh::LaunchN(end - begin, [=] __device__(size_t idx) {
    auto e = batch_iter[begin + idx];
    if (is_valid(e)) {
      atomicAdd(&d_column_sizes_scan[e.column_idx], static_cast<size_t>(1));
--- a/src/common/host_device_vector.cu
+++ b/src/common/host_device_vector.cu
@@ -93,9 +93,8 @@ class HostDeviceVectorImpl {
      gpu_access_ = GPUAccess::kWrite;
      SetDevice();
      auto s_data = dh::ToSpan(*data_d_);
-      dh::LaunchN(device_, data_d_->size(), [=]XGBOOST_DEVICE(size_t i) {
-          s_data[i] = v;
-      });
+      dh::LaunchN(data_d_->size(),
+                  [=] XGBOOST_DEVICE(size_t i) { s_data[i] = v; });
    }
  }

--- a/src/common/quantile.cu
+++ b/src/common/quantile.cu
@@ -61,7 +61,7 @@ void PruneImpl(int device,
               Span<FeatureType const> feature_types,
               Span<SketchEntry> out_cuts,
               ToSketchEntry to_sketch_entry) {
-  dh::LaunchN(device, out_cuts.size(), [=] __device__(size_t idx) {
+  dh::LaunchN(out_cuts.size(), [=] __device__(size_t idx) {
    size_t column_id = dh::SegmentId(cuts_ptr, idx);
    auto out_column = out_cuts.subspan(
        cuts_ptr[column_id], cuts_ptr[column_id + 1] - cuts_ptr[column_id]);
@@ -221,7 +221,7 @@ void MergeImpl(int32_t device, Span<SketchEntry const> const &d_x,
  auto d_merge_path = MergePath(d_x, x_ptr, d_y, y_ptr, out, out_ptr);
  auto d_out = out;

-  dh::LaunchN(device, d_out.size(), [=] __device__(size_t idx) {
+  dh::LaunchN(d_out.size(), [=] __device__(size_t idx) {
    auto column_id = dh::SegmentId(out_ptr, idx);
    idx -= out_ptr[column_id];

@@ -487,7 +487,7 @@ void SketchContainer::FixError() {
  dh::safe_cuda(cudaSetDevice(device_));
  auto d_columns_ptr = this->columns_ptr_.ConstDeviceSpan();
  auto in = dh::ToSpan(this->Current());
-  dh::LaunchN(device_, in.size(), [=] __device__(size_t idx) {
+  dh::LaunchN(in.size(), [=] __device__(size_t idx) {
    auto column_id = dh::SegmentId(d_columns_ptr, idx);
    auto in_column = in.subspan(d_columns_ptr[column_id],
                                d_columns_ptr[column_id + 1] -
@@ -627,7 +627,7 @@ void SketchContainer::MakeCuts(HistogramCuts* p_cuts) {
  auto out_cut_values = p_cuts->cut_values_.DeviceSpan();
  auto d_ft = feature_types_.ConstDeviceSpan();

-  dh::LaunchN(0, total_bins, [=] __device__(size_t idx) {
+  dh::LaunchN(total_bins, [=] __device__(size_t idx) {
    auto column_id = dh::SegmentId(d_out_columns_ptr, idx);
    auto in_column = in_cut_values.subspan(d_in_columns_ptr[column_id],
                                           d_in_columns_ptr[column_id + 1] -
--- a/src/common/ranking_utils.cuh
+++ b/src/common/ranking_utils.cuh
@@ -44,7 +44,7 @@ SegmentedTrapezoidThreads(xgboost::common::Span<U> group_ptr,
  CHECK_GE(group_ptr.size(), 1);
  CHECK_EQ(group_ptr.size(), out_group_threads_ptr.size());
  dh::LaunchN(
-      dh::CurrentDevice(), group_ptr.size(), [=] XGBOOST_DEVICE(size_t idx) {
+      group_ptr.size(), [=] XGBOOST_DEVICE(size_t idx) {
        if (idx == 0) {
          out_group_threads_ptr[0] = 0;
          return;