add namespace aliases to reduce code
This commit is contained in:
@@ -21,6 +21,12 @@
|
||||
#include "xgboost/logging.h" // CHECK_EQ
|
||||
#include "xgboost/span.h" // span
|
||||
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
namespace thrust {
|
||||
namespace cuda = thrust::hip;
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace xgboost::tree::cuda_impl {
|
||||
void FitStump(Context const* ctx, MetaInfo const& info,
|
||||
linalg::TensorView<GradientPair const, 2> gpair, linalg::VectorView<float> out) {
|
||||
@@ -45,11 +51,7 @@ void FitStump(Context const* ctx, MetaInfo const& info,
|
||||
|
||||
dh::XGBCachingDeviceAllocator<char> alloc;
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
auto policy = thrust::cuda::par(alloc);
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
auto policy = thrust::hip::par(alloc);
|
||||
#endif
|
||||
|
||||
thrust::reduce_by_key(policy, key_it, key_it + gpair.Size(), grad_it,
|
||||
thrust::make_discard_iterator(), dh::tbegin(d_sum.Values()));
|
||||
|
||||
@@ -12,6 +12,12 @@
|
||||
#include "evaluate_splits.cuh"
|
||||
#include "xgboost/data.h"
|
||||
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
namespace thrust {
|
||||
namespace cuda = thrust::hip;
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace xgboost::tree {
|
||||
void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<FeatureType const> ft,
|
||||
bst_feature_t n_features, TrainParam const ¶m,
|
||||
@@ -28,7 +34,6 @@ void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<Fea
|
||||
// This condition avoids sort-based split function calls if the users want
|
||||
// onehot-encoding-based splits.
|
||||
// For some reason, any_of adds 1.5 minutes to compilation time for CUDA 11.x.
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
need_sort_histogram_ =
|
||||
thrust::any_of(thrust::cuda::par(alloc), beg, end, [=] XGBOOST_DEVICE(size_t i) {
|
||||
auto idx = i - 1;
|
||||
@@ -39,18 +44,6 @@ void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<Fea
|
||||
}
|
||||
return false;
|
||||
});
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
need_sort_histogram_ =
|
||||
thrust::any_of(thrust::hip::par(alloc), beg, end, [=] XGBOOST_DEVICE(size_t i) {
|
||||
auto idx = i - 1;
|
||||
if (common::IsCat(ft, idx)) {
|
||||
auto n_bins = ptrs[i] - ptrs[idx];
|
||||
bool use_sort = !common::UseOneHot(n_bins, to_onehot);
|
||||
return use_sort;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
#endif
|
||||
|
||||
node_categorical_storage_size_ =
|
||||
common::CatBitField::ComputeStorageSize(cuts.MaxCategory() + 1);
|
||||
@@ -72,19 +65,11 @@ void GPUHistEvaluator::Reset(common::HistogramCuts const &cuts, common::Span<Fea
|
||||
auto it = thrust::make_counting_iterator(0ul);
|
||||
auto values = cuts.cut_values_.ConstDeviceSpan();
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
thrust::transform(thrust::cuda::par(alloc), it, it + feature_idx_.size(), feature_idx_.begin(),
|
||||
[=] XGBOOST_DEVICE(size_t i) {
|
||||
auto fidx = dh::SegmentId(ptrs, i);
|
||||
return fidx;
|
||||
});
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
thrust::transform(thrust::hip::par(alloc), it, it + feature_idx_.size(), feature_idx_.begin(),
|
||||
[=] XGBOOST_DEVICE(size_t i) {
|
||||
auto fidx = dh::SegmentId(ptrs, i);
|
||||
return fidx;
|
||||
});
|
||||
#endif
|
||||
}
|
||||
is_column_split_ = is_column_split;
|
||||
device_ = device;
|
||||
@@ -101,7 +86,6 @@ common::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(
|
||||
auto d_feature_idx = dh::ToSpan(feature_idx_);
|
||||
auto total_bins = shared_inputs.feature_values.size();
|
||||
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
thrust::transform(thrust::cuda::par(alloc), it, it + data.size(), dh::tbegin(data),
|
||||
[=] XGBOOST_DEVICE(uint32_t i) {
|
||||
auto const &input = d_inputs[i / total_bins];
|
||||
@@ -115,27 +99,11 @@ common::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(
|
||||
}
|
||||
return thrust::make_tuple(i, 0.0f);
|
||||
});
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
thrust::transform(thrust::hip::par(alloc), it, it + data.size(), dh::tbegin(data),
|
||||
[=] XGBOOST_DEVICE(uint32_t i) {
|
||||
auto const &input = d_inputs[i / total_bins];
|
||||
auto j = i % total_bins;
|
||||
auto fidx = d_feature_idx[j];
|
||||
if (common::IsCat(shared_inputs.feature_types, fidx)) {
|
||||
auto grad =
|
||||
shared_inputs.rounding.ToFloatingPoint(input.gradient_histogram[j]);
|
||||
auto lw = evaluator.CalcWeightCat(shared_inputs.param, grad);
|
||||
return thrust::make_tuple(i, lw);
|
||||
}
|
||||
return thrust::make_tuple(i, 0.0f);
|
||||
});
|
||||
#endif
|
||||
|
||||
// Sort an array segmented according to
|
||||
// - nodes
|
||||
// - features within each node
|
||||
// - gradients within each feature
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
thrust::stable_sort_by_key(thrust::cuda::par(alloc), dh::tbegin(data), dh::tend(data),
|
||||
dh::tbegin(sorted_idx),
|
||||
[=] XGBOOST_DEVICE(SortPair const &l, SortPair const &r) {
|
||||
@@ -166,38 +134,6 @@ common::Span<bst_feature_t const> GPUHistEvaluator::SortHistogram(
|
||||
}
|
||||
return li < ri;
|
||||
});
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
thrust::stable_sort_by_key(thrust::hip::par(alloc), dh::tbegin(data), dh::tend(data),
|
||||
dh::tbegin(sorted_idx),
|
||||
[=] XGBOOST_DEVICE(SortPair const &l, SortPair const &r) {
|
||||
auto li = thrust::get<0>(l);
|
||||
auto ri = thrust::get<0>(r);
|
||||
|
||||
auto l_node = li / total_bins;
|
||||
auto r_node = ri / total_bins;
|
||||
|
||||
if (l_node != r_node) {
|
||||
return l_node < r_node; // not the same node
|
||||
}
|
||||
|
||||
li = li % total_bins;
|
||||
ri = ri % total_bins;
|
||||
|
||||
auto lfidx = d_feature_idx[li];
|
||||
auto rfidx = d_feature_idx[ri];
|
||||
|
||||
if (lfidx != rfidx) {
|
||||
return lfidx < rfidx; // not the same feature
|
||||
}
|
||||
|
||||
if (common::IsCat(shared_inputs.feature_types, lfidx)) {
|
||||
auto lw = thrust::get<1>(l);
|
||||
auto rw = thrust::get<1>(r);
|
||||
return lw < rw;
|
||||
}
|
||||
return li < ri;
|
||||
});
|
||||
#endif
|
||||
return dh::ToSpan(cat_sorted_idx_);
|
||||
}
|
||||
} // namespace xgboost::tree
|
||||
|
||||
@@ -16,6 +16,12 @@
|
||||
#include "row_partitioner.cuh"
|
||||
#include "xgboost/base.h"
|
||||
|
||||
#if defined(XGBOOST_USE_HIP)
|
||||
namespace thrust {
|
||||
namespace cuda = thrust::hip;
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace xgboost {
|
||||
namespace tree {
|
||||
namespace {
|
||||
@@ -60,13 +66,8 @@ GradientQuantiser::GradientQuantiser(common::Span<GradientPair const> gpair, Met
|
||||
|
||||
thrust::device_ptr<GradientPair const> gpair_beg{gpair.data()};
|
||||
auto beg = thrust::make_transform_iterator(gpair_beg, Clip());
|
||||
#if defined(XGBOOST_USE_CUDA)
|
||||
Pair p =
|
||||
dh::Reduce(thrust::cuda::par(alloc), beg, beg + gpair.size(), Pair{}, thrust::plus<Pair>{});
|
||||
#elif defined(XGBOOST_USE_HIP)
|
||||
Pair p =
|
||||
dh::Reduce(thrust::hip::par(alloc), beg, beg + gpair.size(), Pair{}, thrust::plus<Pair>{});
|
||||
#endif
|
||||
|
||||
// Treat pair as array of 4 primitive types to allreduce
|
||||
using ReduceT = typename decltype(p.first)::ValueT;
|
||||
|
||||
Reference in New Issue
Block a user