Use the new DeviceOrd in the linalg module. (#9527)
This commit is contained in:
@@ -13,7 +13,7 @@ namespace xgboost {
|
||||
namespace linalg {
|
||||
template <typename T, int32_t D, typename Fn>
|
||||
void ElementWiseKernelDevice(linalg::TensorView<T, D> t, Fn&& fn, cudaStream_t s = nullptr) {
|
||||
dh::safe_cuda(cudaSetDevice(t.DeviceIdx()));
|
||||
dh::safe_cuda(cudaSetDevice(t.Device().ordinal));
|
||||
static_assert(std::is_void<std::result_of_t<Fn(size_t, T&)>>::value,
|
||||
"For function with return, use transform instead.");
|
||||
if (t.Contiguous()) {
|
||||
|
||||
@@ -133,7 +133,7 @@ struct WeightOp {
|
||||
void RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
|
||||
CUDAContext const* cuctx = ctx->CUDACtx();
|
||||
|
||||
group_ptr_.SetDevice(ctx->gpu_id);
|
||||
group_ptr_.SetDevice(ctx->Device());
|
||||
if (info.group_ptr_.empty()) {
|
||||
group_ptr_.Resize(2, 0);
|
||||
group_ptr_.HostVector()[1] = info.num_row_;
|
||||
@@ -153,7 +153,7 @@ void RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
|
||||
max_group_size_ =
|
||||
thrust::reduce(cuctx->CTP(), it, it + n_groups, 0ul, thrust::maximum<std::size_t>{});
|
||||
|
||||
threads_group_ptr_.SetDevice(ctx->gpu_id);
|
||||
threads_group_ptr_.SetDevice(ctx->Device());
|
||||
threads_group_ptr_.Resize(n_groups + 1, 0);
|
||||
auto d_threads_group_ptr = threads_group_ptr_.DeviceSpan();
|
||||
if (param_.HasTruncation()) {
|
||||
@@ -168,7 +168,7 @@ void RankingCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
|
||||
n_cuda_threads_ = info.num_row_ * param_.NumPair();
|
||||
}
|
||||
|
||||
sorted_idx_cache_.SetDevice(ctx->gpu_id);
|
||||
sorted_idx_cache_.SetDevice(ctx->Device());
|
||||
sorted_idx_cache_.Resize(info.labels.Size(), 0);
|
||||
|
||||
auto weight = common::MakeOptionalWeights(ctx, info.weights_);
|
||||
@@ -187,18 +187,18 @@ common::Span<std::size_t const> RankingCache::MakeRankOnCUDA(Context const* ctx,
|
||||
|
||||
void NDCGCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
|
||||
CUDAContext const* cuctx = ctx->CUDACtx();
|
||||
auto labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
|
||||
auto labels = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
|
||||
CheckNDCGLabels(this->Param(), labels, CheckNDCGOp{cuctx});
|
||||
|
||||
auto d_group_ptr = this->DataGroupPtr(ctx);
|
||||
|
||||
std::size_t n_groups = d_group_ptr.size() - 1;
|
||||
inv_idcg_ = linalg::Zeros<double>(ctx, n_groups);
|
||||
auto d_inv_idcg = inv_idcg_.View(ctx->gpu_id);
|
||||
auto d_inv_idcg = inv_idcg_.View(ctx->Device());
|
||||
cuda_impl::CalcQueriesInvIDCG(ctx, labels, d_group_ptr, d_inv_idcg, this->Param());
|
||||
CHECK_GE(this->Param().NumPair(), 1ul);
|
||||
|
||||
discounts_.SetDevice(ctx->gpu_id);
|
||||
discounts_.SetDevice(ctx->Device());
|
||||
discounts_.Resize(MaxGroupSize());
|
||||
auto d_discount = discounts_.DeviceSpan();
|
||||
dh::LaunchN(MaxGroupSize(), cuctx->Stream(),
|
||||
@@ -206,12 +206,12 @@ void NDCGCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
|
||||
}
|
||||
|
||||
void PreCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
|
||||
auto const d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
|
||||
auto const d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
|
||||
CheckPreLabels("pre", d_label, CheckMAPOp{ctx->CUDACtx()});
|
||||
}
|
||||
|
||||
void MAPCache::InitOnCUDA(Context const* ctx, MetaInfo const& info) {
|
||||
auto const d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
|
||||
auto const d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
|
||||
CheckPreLabels("map", d_label, CheckMAPOp{ctx->CUDACtx()});
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
@@ -217,7 +217,7 @@ class RankingCache {
|
||||
}
|
||||
// Constructed as [1, n_samples] if group ptr is not supplied by the user
|
||||
common::Span<bst_group_t const> DataGroupPtr(Context const* ctx) const {
|
||||
group_ptr_.SetDevice(ctx->gpu_id);
|
||||
group_ptr_.SetDevice(ctx->Device());
|
||||
return ctx->IsCPU() ? group_ptr_.ConstHostSpan() : group_ptr_.ConstDeviceSpan();
|
||||
}
|
||||
|
||||
@@ -228,7 +228,7 @@ class RankingCache {
|
||||
// Create a rank list by model prediction
|
||||
common::Span<std::size_t const> SortedIdx(Context const* ctx, common::Span<float const> predt) {
|
||||
if (sorted_idx_cache_.Empty()) {
|
||||
sorted_idx_cache_.SetDevice(ctx->gpu_id);
|
||||
sorted_idx_cache_.SetDevice(ctx->Device());
|
||||
sorted_idx_cache_.Resize(predt.size());
|
||||
}
|
||||
if (ctx->IsCPU()) {
|
||||
@@ -242,7 +242,7 @@ class RankingCache {
|
||||
common::Span<std::size_t> SortedIdxY(Context const* ctx, std::size_t n_samples) {
|
||||
CHECK(ctx->IsCUDA()) << error::InvalidCUDAOrdinal();
|
||||
if (y_sorted_idx_cache_.Empty()) {
|
||||
y_sorted_idx_cache_.SetDevice(ctx->gpu_id);
|
||||
y_sorted_idx_cache_.SetDevice(ctx->Device());
|
||||
y_sorted_idx_cache_.Resize(n_samples);
|
||||
}
|
||||
return y_sorted_idx_cache_.DeviceSpan();
|
||||
@@ -250,7 +250,7 @@ class RankingCache {
|
||||
common::Span<float> RankedY(Context const* ctx, std::size_t n_samples) {
|
||||
CHECK(ctx->IsCUDA()) << error::InvalidCUDAOrdinal();
|
||||
if (y_ranked_by_model_.Empty()) {
|
||||
y_ranked_by_model_.SetDevice(ctx->gpu_id);
|
||||
y_ranked_by_model_.SetDevice(ctx->Device());
|
||||
y_ranked_by_model_.Resize(n_samples);
|
||||
}
|
||||
return y_ranked_by_model_.DeviceSpan();
|
||||
@@ -266,21 +266,21 @@ class RankingCache {
|
||||
|
||||
linalg::VectorView<GradientPair> CUDARounding(Context const* ctx) {
|
||||
if (roundings_.Size() == 0) {
|
||||
roundings_.SetDevice(ctx->gpu_id);
|
||||
roundings_.SetDevice(ctx->Device());
|
||||
roundings_.Reshape(Groups());
|
||||
}
|
||||
return roundings_.View(ctx->gpu_id);
|
||||
return roundings_.View(ctx->Device());
|
||||
}
|
||||
common::Span<double> CUDACostRounding(Context const* ctx) {
|
||||
if (cost_rounding_.Size() == 0) {
|
||||
cost_rounding_.SetDevice(ctx->gpu_id);
|
||||
cost_rounding_.SetDevice(ctx->Device());
|
||||
cost_rounding_.Resize(1);
|
||||
}
|
||||
return cost_rounding_.DeviceSpan();
|
||||
}
|
||||
template <typename Type>
|
||||
common::Span<Type> MaxLambdas(Context const* ctx, std::size_t n) {
|
||||
max_lambdas_.SetDevice(ctx->gpu_id);
|
||||
max_lambdas_.SetDevice(ctx->Device());
|
||||
std::size_t bytes = n * sizeof(Type);
|
||||
if (bytes != max_lambdas_.Size()) {
|
||||
max_lambdas_.Resize(bytes);
|
||||
@@ -315,17 +315,17 @@ class NDCGCache : public RankingCache {
|
||||
}
|
||||
|
||||
linalg::VectorView<double const> InvIDCG(Context const* ctx) const {
|
||||
return inv_idcg_.View(ctx->gpu_id);
|
||||
return inv_idcg_.View(ctx->Device());
|
||||
}
|
||||
common::Span<double const> Discount(Context const* ctx) const {
|
||||
return ctx->IsCPU() ? discounts_.ConstHostSpan() : discounts_.ConstDeviceSpan();
|
||||
}
|
||||
linalg::VectorView<double> Dcg(Context const* ctx) {
|
||||
if (dcg_.Size() == 0) {
|
||||
dcg_.SetDevice(ctx->gpu_id);
|
||||
dcg_.SetDevice(ctx->Device());
|
||||
dcg_.Reshape(this->Groups());
|
||||
}
|
||||
return dcg_.View(ctx->gpu_id);
|
||||
return dcg_.View(ctx->Device());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -396,7 +396,7 @@ class PreCache : public RankingCache {
|
||||
|
||||
common::Span<double> Pre(Context const* ctx) {
|
||||
if (pre_.Empty()) {
|
||||
pre_.SetDevice(ctx->gpu_id);
|
||||
pre_.SetDevice(ctx->Device());
|
||||
pre_.Resize(this->Groups());
|
||||
}
|
||||
return ctx->IsCPU() ? pre_.HostSpan() : pre_.DeviceSpan();
|
||||
@@ -427,21 +427,21 @@ class MAPCache : public RankingCache {
|
||||
|
||||
common::Span<double> NumRelevant(Context const* ctx) {
|
||||
if (n_rel_.Empty()) {
|
||||
n_rel_.SetDevice(ctx->gpu_id);
|
||||
n_rel_.SetDevice(ctx->Device());
|
||||
n_rel_.Resize(n_samples_);
|
||||
}
|
||||
return ctx->IsCPU() ? n_rel_.HostSpan() : n_rel_.DeviceSpan();
|
||||
}
|
||||
common::Span<double> Acc(Context const* ctx) {
|
||||
if (acc_.Empty()) {
|
||||
acc_.SetDevice(ctx->gpu_id);
|
||||
acc_.SetDevice(ctx->Device());
|
||||
acc_.Resize(n_samples_);
|
||||
}
|
||||
return ctx->IsCPU() ? acc_.HostSpan() : acc_.DeviceSpan();
|
||||
}
|
||||
common::Span<double> Map(Context const* ctx) {
|
||||
if (map_.Empty()) {
|
||||
map_.SetDevice(ctx->gpu_id);
|
||||
map_.SetDevice(ctx->Device());
|
||||
map_.Resize(this->Groups());
|
||||
}
|
||||
return ctx->IsCPU() ? map_.HostSpan() : map_.DeviceSpan();
|
||||
|
||||
@@ -20,9 +20,9 @@ namespace common {
|
||||
void Median(Context const* ctx, linalg::Tensor<float, 2> const& t,
|
||||
HostDeviceVector<float> const& weights, linalg::Tensor<float, 1>* out) {
|
||||
if (!ctx->IsCPU()) {
|
||||
weights.SetDevice(ctx->gpu_id);
|
||||
weights.SetDevice(ctx->Device());
|
||||
auto opt_weights = OptionalWeights(weights.ConstDeviceSpan());
|
||||
auto t_v = t.View(ctx->gpu_id);
|
||||
auto t_v = t.View(ctx->Device());
|
||||
cuda_impl::Median(ctx, t_v, opt_weights, out);
|
||||
}
|
||||
|
||||
@@ -59,7 +59,7 @@ void Mean(Context const* ctx, linalg::Vector<float> const& v, linalg::Vector<flo
|
||||
auto ret = std::accumulate(tloc.cbegin(), tloc.cend(), .0f);
|
||||
out->HostView()(0) = ret;
|
||||
} else {
|
||||
cuda_impl::Mean(ctx, v.View(ctx->gpu_id), out->View(ctx->gpu_id));
|
||||
cuda_impl::Mean(ctx, v.View(ctx->Device()), out->View(ctx->Device()));
|
||||
}
|
||||
}
|
||||
} // namespace common
|
||||
|
||||
Reference in New Issue
Block a user