Use the new DeviceOrd in the linalg module. (#9527)

This commit is contained in:
Jiaming Yuan
2023-08-29 13:37:29 +08:00
committed by GitHub
parent 942b957eef
commit ddf2e68821
43 changed files with 252 additions and 273 deletions

View File

@@ -19,7 +19,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
dh::device_vector<size_t>* p_ridx, HostDeviceVector<size_t>* p_nptr,
HostDeviceVector<bst_node_t>* p_nidx, RegTree const& tree) {
// copy position to buffer
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
auto cuctx = ctx->CUDACtx();
size_t n_samples = position.size();
dh::device_vector<bst_node_t> sorted_position(position.size());
@@ -86,11 +86,11 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
*/
auto& nidx = *p_nidx;
auto& nptr = *p_nptr;
nidx.SetDevice(ctx->gpu_id);
nidx.SetDevice(ctx->Device());
nidx.Resize(n_leaf);
auto d_node_idx = nidx.DeviceSpan();
nptr.SetDevice(ctx->gpu_id);
nptr.SetDevice(ctx->Device());
nptr.Resize(n_leaf + 1, 0);
auto d_node_ptr = nptr.DeviceSpan();
@@ -142,7 +142,7 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> position,
std::int32_t group_idx, MetaInfo const& info, float learning_rate,
HostDeviceVector<float> const& predt, float alpha, RegTree* p_tree) {
dh::safe_cuda(cudaSetDevice(ctx->gpu_id));
dh::safe_cuda(cudaSetDevice(ctx->Ordinal()));
dh::device_vector<size_t> ridx;
HostDeviceVector<size_t> nptr;
HostDeviceVector<bst_node_t> nidx;
@@ -155,13 +155,13 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
}
HostDeviceVector<float> quantiles;
predt.SetDevice(ctx->gpu_id);
predt.SetDevice(ctx->Device());
auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), info.num_row_,
predt.Size() / info.num_row_);
CHECK_LT(group_idx, d_predt.Shape(1));
auto t_predt = d_predt.Slice(linalg::All(), group_idx);
auto d_labels = info.labels.View(ctx->gpu_id).Slice(linalg::All(), IdxY(info, group_idx));
auto d_labels = info.labels.View(ctx->Device()).Slice(linalg::All(), IdxY(info, group_idx));
auto d_row_index = dh::ToSpan(ridx);
auto seg_beg = nptr.DevicePointer();
@@ -178,7 +178,7 @@ void UpdateTreeLeafDevice(Context const* ctx, common::Span<bst_node_t const> pos
if (info.weights_.Empty()) {
common::SegmentedQuantile(ctx, alpha, seg_beg, seg_end, val_beg, val_end, &quantiles);
} else {
info.weights_.SetDevice(ctx->gpu_id);
info.weights_.SetDevice(ctx->Device());
auto d_weights = info.weights_.ConstDeviceSpan();
CHECK_EQ(d_weights.size(), d_row_index.size());
auto w_it = thrust::make_permutation_iterator(dh::tcbegin(d_weights), dh::tcbegin(d_row_index));

View File

@@ -109,12 +109,12 @@ class LambdaRankObj : public FitIntercept {
lj_.SetDevice(ctx_->gpu_id);
if (ctx_->IsCPU()) {
cpu_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->gpu_id),
lj_full_.View(ctx_->gpu_id), &ti_plus_, &tj_minus_,
cpu_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->Device()),
lj_full_.View(ctx_->Device()), &ti_plus_, &tj_minus_,
&li_, &lj_, p_cache_);
} else {
cuda_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->gpu_id),
lj_full_.View(ctx_->gpu_id), &ti_plus_, &tj_minus_,
cuda_impl::LambdaRankUpdatePositionBias(ctx_, li_full_.View(ctx_->Device()),
lj_full_.View(ctx_->Device()), &ti_plus_, &tj_minus_,
&li_, &lj_, p_cache_);
}
@@ -354,9 +354,9 @@ class LambdaRankNDCG : public LambdaRankObj<LambdaRankNDCG, ltr::NDCGCache> {
const MetaInfo& info, linalg::Matrix<GradientPair>* out_gpair) {
if (ctx_->IsCUDA()) {
cuda_impl::LambdaRankGetGradientNDCG(
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id),
tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id),
out_gpair);
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
tj_minus_.View(ctx_->Device()), li_full_.View(ctx_->Device()),
lj_full_.View(ctx_->Device()), out_gpair);
return;
}
@@ -477,9 +477,9 @@ class LambdaRankMAP : public LambdaRankObj<LambdaRankMAP, ltr::MAPCache> {
CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the MAP objective.";
if (ctx_->IsCUDA()) {
return cuda_impl::LambdaRankGetGradientMAP(
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id),
tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id),
out_gpair);
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
tj_minus_.View(ctx_->Device()), li_full_.View(ctx_->Device()),
lj_full_.View(ctx_->Device()), out_gpair);
}
auto gptr = p_cache_->DataGroupPtr(ctx_).data();
@@ -567,9 +567,9 @@ class LambdaRankPairwise : public LambdaRankObj<LambdaRankPairwise, ltr::Ranking
CHECK(param_.ndcg_exp_gain) << "NDCG gain can not be set for the pairwise objective.";
if (ctx_->IsCUDA()) {
return cuda_impl::LambdaRankGetGradientPairwise(
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->gpu_id),
tj_minus_.View(ctx_->gpu_id), li_full_.View(ctx_->gpu_id), lj_full_.View(ctx_->gpu_id),
out_gpair);
ctx_, iter, predt, info, GetCache(), ti_plus_.View(ctx_->Device()),
tj_minus_.View(ctx_->Device()), li_full_.View(ctx_->Device()),
lj_full_.View(ctx_->Device()), out_gpair);
}
auto gptr = p_cache_->DataGroupPtr(ctx_);

View File

@@ -306,7 +306,7 @@ void Launch(Context const* ctx, std::int32_t iter, HostDeviceVector<float> const
CHECK_NE(d_rounding.Size(), 0);
auto label = info.labels.View(ctx->gpu_id);
auto label = info.labels.View(ctx->Device());
auto predts = preds.ConstDeviceSpan();
auto gpairs = out_gpair->View(ctx->Device());
thrust::fill_n(ctx->CUDACtx()->CTP(), gpairs.Values().data(), gpairs.Size(),
@@ -348,7 +348,7 @@ common::Span<std::size_t const> SortY(Context const* ctx, MetaInfo const& info,
common::Span<std::size_t const> d_rank,
std::shared_ptr<ltr::RankingCache> p_cache) {
auto const d_group_ptr = p_cache->DataGroupPtr(ctx);
auto label = info.labels.View(ctx->gpu_id);
auto label = info.labels.View(ctx->Device());
// The buffer for ranked y is necessary as cub segmented sort accepts only pointer.
auto d_y_ranked = p_cache->RankedY(ctx, info.num_row_);
thrust::for_each_n(ctx->CUDACtx()->CTP(), thrust::make_counting_iterator(0ul), d_y_ranked.size(),
@@ -374,13 +374,13 @@ void LambdaRankGetGradientNDCG(Context const* ctx, std::int32_t iter,
linalg::VectorView<double> li, linalg::VectorView<double> lj,
linalg::Matrix<GradientPair>* out_gpair) {
// boilerplate
std::int32_t device_id = ctx->gpu_id;
dh::safe_cuda(cudaSetDevice(device_id));
auto device = ctx->Device();
dh::safe_cuda(cudaSetDevice(device.ordinal));
auto const d_inv_IDCG = p_cache->InvIDCG(ctx);
auto const discount = p_cache->Discount(ctx);
info.labels.SetDevice(device_id);
preds.SetDevice(device_id);
info.labels.SetDevice(device);
preds.SetDevice(device);
auto const exp_gain = p_cache->Param().ndcg_exp_gain;
auto delta_ndcg = [=] XGBOOST_DEVICE(float y_high, float y_low, std::size_t rank_high,
@@ -403,7 +403,7 @@ void MAPStat(Context const* ctx, MetaInfo const& info, common::Span<std::size_t
auto key_it = dh::MakeTransformIterator<std::size_t>(
thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(std::size_t i) -> std::size_t { return dh::SegmentId(group_ptr, i); });
auto label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
auto label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
auto const* cuctx = ctx->CUDACtx();
{
@@ -442,11 +442,11 @@ void LambdaRankGetGradientMAP(Context const* ctx, std::int32_t iter,
linalg::VectorView<double const> tj_minus, // input bias ratio
linalg::VectorView<double> li, linalg::VectorView<double> lj,
linalg::Matrix<GradientPair>* out_gpair) {
std::int32_t device_id = ctx->gpu_id;
dh::safe_cuda(cudaSetDevice(device_id));
auto device = ctx->Device();
dh::safe_cuda(cudaSetDevice(device.ordinal));
info.labels.SetDevice(device_id);
predt.SetDevice(device_id);
info.labels.SetDevice(device);
predt.SetDevice(device);
CHECK(p_cache);
@@ -481,11 +481,11 @@ void LambdaRankGetGradientPairwise(Context const* ctx, std::int32_t iter,
linalg::VectorView<double const> tj_minus, // input bias ratio
linalg::VectorView<double> li, linalg::VectorView<double> lj,
linalg::Matrix<GradientPair>* out_gpair) {
std::int32_t device_id = ctx->gpu_id;
dh::safe_cuda(cudaSetDevice(device_id));
auto device = ctx->Device();
dh::safe_cuda(cudaSetDevice(device.ordinal));
info.labels.SetDevice(device_id);
predt.SetDevice(device_id);
info.labels.SetDevice(device);
predt.SetDevice(device);
auto d_predt = predt.ConstDeviceSpan();
auto const d_sorted_idx = p_cache->SortedIdx(ctx, d_predt);
@@ -517,11 +517,11 @@ void LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView<double
auto const d_group_ptr = p_cache->DataGroupPtr(ctx);
auto n_groups = d_group_ptr.size() - 1;
auto ti_plus = p_ti_plus->View(ctx->gpu_id);
auto tj_minus = p_tj_minus->View(ctx->gpu_id);
auto ti_plus = p_ti_plus->View(ctx->Device());
auto tj_minus = p_tj_minus->View(ctx->Device());
auto li = p_li->View(ctx->gpu_id);
auto lj = p_lj->View(ctx->gpu_id);
auto li = p_li->View(ctx->Device());
auto lj = p_lj->View(ctx->Device());
CHECK_EQ(li.Size(), ti_plus.Size());
auto const& param = p_cache->Param();

View File

@@ -62,7 +62,7 @@ class QuantileRegression : public ObjFunction {
CHECK_GE(n_targets, n_alphas);
CHECK_EQ(preds.Size(), info.num_row_ * n_targets);
auto labels = info.labels.View(ctx_->gpu_id);
auto labels = info.labels.View(ctx_->Device());
out_gpair->SetDevice(ctx_->Device());
CHECK_EQ(info.labels.Shape(1), 1)
@@ -131,7 +131,7 @@ class QuantileRegression : public ObjFunction {
#if defined(XGBOOST_USE_CUDA)
alpha_.SetDevice(ctx_->gpu_id);
auto d_alpha = alpha_.ConstDeviceSpan();
auto d_labels = info.labels.View(ctx_->gpu_id);
auto d_labels = info.labels.View(ctx_->Device());
auto seg_it = dh::MakeTransformIterator<std::size_t>(
thrust::make_counting_iterator(0ul),
[=] XGBOOST_DEVICE(std::size_t i) { return i * d_labels.Shape(0); });

View File

@@ -69,7 +69,7 @@ class RegLossObj : public FitIntercept {
public:
void ValidateLabel(MetaInfo const& info) {
auto label = info.labels.View(ctx_->Ordinal());
auto label = info.labels.View(ctx_->Device());
auto valid = ctx_->DispatchDevice(
[&] {
return std::all_of(linalg::cbegin(label), linalg::cend(label),
@@ -244,7 +244,7 @@ class PseudoHuberRegression : public FitIntercept {
CheckRegInputs(info, preds);
auto slope = param_.huber_slope;
CHECK_NE(slope, 0.0) << "slope for pseudo huber cannot be 0.";
auto labels = info.labels.View(ctx_->gpu_id);
auto labels = info.labels.View(ctx_->Device());
out_gpair->SetDevice(ctx_->gpu_id);
out_gpair->Reshape(info.num_row_, this->Targets(info));
@@ -698,7 +698,7 @@ class MeanAbsoluteError : public ObjFunction {
void GetGradient(HostDeviceVector<float> const& preds, const MetaInfo& info,
std::int32_t /*iter*/, linalg::Matrix<GradientPair>* out_gpair) override {
CheckRegInputs(info, preds);
auto labels = info.labels.View(ctx_->gpu_id);
auto labels = info.labels.View(ctx_->Device());
out_gpair->SetDevice(ctx_->Device());
out_gpair->Reshape(info.num_row_, this->Targets(info));