add namespace aliases to reduce code

2023-10-27 09:11:55 -07:00
parent e00131c465
commit 4a4b528d54
19 changed files with 110 additions and 407 deletions
--- a/src/tree/fit_stump.cu
+++ b/src/tree/fit_stump.cu
@@ -21,6 +21,12 @@
 #include "xgboost/logging.h"  // CHECK_EQ
 #include "xgboost/span.h"     // span

+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost::tree::cuda_impl {
 void FitStump(Context const* ctx, MetaInfo const& info,
              linalg::TensorView<GradientPair const, 2> gpair, linalg::VectorView<float> out) {
@@ -45,11 +51,7 @@ void FitStump(Context const* ctx, MetaInfo const& info,

  dh::XGBCachingDeviceAllocator<char> alloc;

-#if defined(XGBOOST_USE_CUDA)
  auto policy = thrust::cuda::par(alloc);
-#elif defined(XGBOOST_USE_HIP)
-  auto policy = thrust::hip::par(alloc);
-#endif

  thrust::reduce_by_key(policy, key_it, key_it + gpair.Size(), grad_it,
                        thrust::make_discard_iterator(), dh::tbegin(d_sum.Values()));
--- a/src/tree/gpu_hist/evaluator.cu
+++ b/src/tree/gpu_hist/evaluator.cu
@@ -12,6 +12,12 @@
 #include "evaluate_splits.cuh"
 #include "xgboost/data.h"

+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost::tree {
 void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<FeatureType const> ft,
                             bst_feature_t n_features, TrainParam const &param,
@@ -28,7 +34,6 @@ void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<Fea
    // This condition avoids sort-based split function calls if the users want
    // onehot-encoding-based splits.
    // For some reason, any_of adds 1.5 minutes to compilation time for CUDA 11.x.
-#if defined(XGBOOST_USE_CUDA)
    need_sort_histogram_ =
        thrust::any_of(thrust::cuda::par(alloc), beg, end, [=] XGBOOST_DEVICE(size_t i) {
          auto idx = i - 1;
@@ -39,18 +44,6 @@ void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<Fea
          }
          return false;
        });
-#elif defined(XGBOOST_USE_HIP)
-    need_sort_histogram_ =
-        thrust::any_of(thrust::hip::par(alloc), beg, end, [=] XGBOOST_DEVICE(size_t i) {
-          auto idx = i - 1;
-          if (common::IsCat(ft, idx)) {
-            auto n_bins = ptrs[i] - ptrs[idx];
-            bool use_sort = !common::UseOneHot(n_bins, to_onehot);
-            return use_sort;
-          }
-          return false;
-        });
-#endif

    node_categorical_storage_size_ =
        common::CatBitField::ComputeStorageSize(cuts.MaxCategory() + 1);
@@ -72,19 +65,11 @@ void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<Fea
    auto it = thrust::make_counting_iterator(0ul);
    auto values = cuts.cut_values_.ConstDeviceSpan();

-#if defined(XGBOOST_USE_CUDA)
    thrust::transform(thrust::cuda::par(alloc), it, it + feature_idx_.size(), feature_idx_.begin(),
                      [=] XGBOOST_DEVICE(size_t i) {
                        auto fidx = dh::SegmentId(ptrs, i);
                        return fidx;
                      });
-#elif defined(XGBOOST_USE_HIP)
-    thrust::transform(thrust::hip::par(alloc), it, it + feature_idx_.size(), feature_idx_.begin(),
-                      [=] XGBOOST_DEVICE(size_t i) {
-                        auto fidx = dh::SegmentId(ptrs, i);
-                        return fidx;
-                      });
-#endif
  }
  is_column_split_ = is_column_split;
  device_ = device;
@@ -101,7 +86,6 @@ common::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(
  auto d_feature_idx = dh::ToSpan(feature_idx_);
  auto total_bins = shared_inputs.feature_values.size();

-#if defined(XGBOOST_USE_CUDA)
  thrust::transform(thrust::cuda::par(alloc), it, it + data.size(), dh::tbegin(data),
                    [=] XGBOOST_DEVICE(uint32_t i) {
                      auto const &input = d_inputs[i / total_bins];
@@ -115,27 +99,11 @@ common::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(
                      }
                      return thrust::make_tuple(i, 0.0f);
                    });
-#elif defined(XGBOOST_USE_HIP)
-  thrust::transform(thrust::hip::par(alloc), it, it + data.size(), dh::tbegin(data),
-                    [=] XGBOOST_DEVICE(uint32_t i) {
-                      auto const &input = d_inputs[i / total_bins];
-                      auto j = i % total_bins;
-                      auto fidx = d_feature_idx[j];
-                      if (common::IsCat(shared_inputs.feature_types, fidx)) {
-                        auto grad =
-                            shared_inputs.rounding.ToFloatingPoint(input.gradient_histogram[j]);
-                        auto lw = evaluator.CalcWeightCat(shared_inputs.param, grad);
-                        return thrust::make_tuple(i, lw);
-                      }
-                      return thrust::make_tuple(i, 0.0f);
-                    });
-#endif

  // Sort an array segmented according to
  // - nodes
  // - features within each node
  // - gradients within each feature
-#if defined(XGBOOST_USE_CUDA)
  thrust::stable_sort_by_key(thrust::cuda::par(alloc), dh::tbegin(data), dh::tend(data),
                             dh::tbegin(sorted_idx),
                             [=] XGBOOST_DEVICE(SortPair const &l, SortPair const &r) {
@@ -166,38 +134,6 @@ common::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(
                               }
                               return li < ri;
                             });
-#elif defined(XGBOOST_USE_HIP)
-  thrust::stable_sort_by_key(thrust::hip::par(alloc), dh::tbegin(data), dh::tend(data),
-                             dh::tbegin(sorted_idx),
-                             [=] XGBOOST_DEVICE(SortPair const &l, SortPair const &r) {
-                               auto li = thrust::get<0>(l);
-                               auto ri = thrust::get<0>(r);
-
-                               auto l_node = li / total_bins;
-                               auto r_node = ri / total_bins;
-
-                               if (l_node != r_node) {
-                                 return l_node < r_node;  // not the same node
-                               }
-
-                               li = li % total_bins;
-                               ri = ri % total_bins;
-
-                               auto lfidx = d_feature_idx[li];
-                               auto rfidx = d_feature_idx[ri];
-
-                               if (lfidx != rfidx) {
-                                 return lfidx < rfidx;  // not the same feature
-                               }
-
-                               if (common::IsCat(shared_inputs.feature_types, lfidx)) {
-                                 auto lw = thrust::get<1>(l);
-                                 auto rw = thrust::get<1>(r);
-                                 return lw < rw;
-                               }
-                               return li < ri;
-                             });
-#endif
  return dh::ToSpan(cat_sorted_idx_);
 }
 }  // namespace xgboost::tree
--- a/src/tree/gpu_hist/histogram.cu
+++ b/src/tree/gpu_hist/histogram.cu
@@ -16,6 +16,12 @@
 #include "row_partitioner.cuh"
 #include "xgboost/base.h"

+#if defined(XGBOOST_USE_HIP)
+namespace thrust {
+    namespace cuda = thrust::hip;
+}
+#endif
+
 namespace xgboost {
 namespace tree {
 namespace {
@@ -60,13 +66,8 @@ GradientQuantiser::GradientQuantiser(common::Span<GradientPair const> gpair, Met

  thrust::device_ptr<GradientPair const> gpair_beg{gpair.data()};
  auto beg = thrust::make_transform_iterator(gpair_beg, Clip());
-#if defined(XGBOOST_USE_CUDA)
  Pair p =
      dh::Reduce(thrust::cuda::par(alloc), beg, beg + gpair.size(), Pair{}, thrust::plus<Pair>{});
-#elif defined(XGBOOST_USE_HIP)
-  Pair p =
-      dh::Reduce(thrust::hip::par(alloc), beg, beg + gpair.size(), Pair{}, thrust::plus<Pair>{});
-#endif

  // Treat pair as array of 4 primitive types to allreduce
  using ReduceT = typename decltype(p.first)::ValueT;