Use the new DeviceOrd in the linalg module. (#9527)
This commit is contained in:
@@ -82,22 +82,19 @@ template <typename BinaryAUC>
|
||||
double MultiClassOVR(Context const *ctx, common::Span<float const> predts, MetaInfo const &info,
|
||||
size_t n_classes, int32_t n_threads, BinaryAUC &&binary_auc) {
|
||||
CHECK_NE(n_classes, 0);
|
||||
auto const labels = info.labels.View(Context::kCpuId);
|
||||
auto const labels = info.labels.HostView();
|
||||
if (labels.Shape(0) != 0) {
|
||||
CHECK_EQ(labels.Shape(1), 1) << "AUC doesn't support multi-target model.";
|
||||
}
|
||||
|
||||
std::vector<double> results_storage(n_classes * 3, 0);
|
||||
linalg::TensorView<double, 2> results(results_storage, {n_classes, static_cast<size_t>(3)},
|
||||
Context::kCpuId);
|
||||
auto results = linalg::MakeTensorView(ctx, results_storage, n_classes, 3);
|
||||
auto local_area = results.Slice(linalg::All(), 0);
|
||||
auto tp = results.Slice(linalg::All(), 1);
|
||||
auto auc = results.Slice(linalg::All(), 2);
|
||||
|
||||
auto weights = common::OptionalWeights{info.weights_.ConstHostSpan()};
|
||||
auto predts_t = linalg::TensorView<float const, 2>(
|
||||
predts, {static_cast<size_t>(info.num_row_), n_classes},
|
||||
Context::kCpuId);
|
||||
auto predts_t = linalg::MakeTensorView(ctx, predts, info.num_row_, n_classes);
|
||||
|
||||
if (info.labels.Size() != 0) {
|
||||
common::ParallelFor(n_classes, n_threads, [&](auto c) {
|
||||
@@ -108,8 +105,8 @@ double MultiClassOVR(Context const *ctx, common::Span<float const> predts, MetaI
|
||||
response[i] = labels(i) == c ? 1.0f : 0.0;
|
||||
}
|
||||
double fp;
|
||||
std::tie(fp, tp(c), auc(c)) =
|
||||
binary_auc(ctx, proba, linalg::MakeVec(response.data(), response.size(), -1), weights);
|
||||
std::tie(fp, tp(c), auc(c)) = binary_auc(
|
||||
ctx, proba, linalg::MakeVec(response.data(), response.size(), ctx->Device()), weights);
|
||||
local_area(c) = fp * tp(c);
|
||||
});
|
||||
}
|
||||
@@ -220,7 +217,7 @@ std::pair<double, uint32_t> RankingAUC(Context const *ctx, std::vector<float> co
|
||||
CHECK_GE(info.group_ptr_.size(), 2);
|
||||
uint32_t n_groups = info.group_ptr_.size() - 1;
|
||||
auto s_predts = common::Span<float const>{predts};
|
||||
auto labels = info.labels.View(Context::kCpuId);
|
||||
auto labels = info.labels.View(ctx->Device());
|
||||
auto s_weights = info.weights_.ConstHostSpan();
|
||||
|
||||
std::atomic<uint32_t> invalid_groups{0};
|
||||
@@ -363,8 +360,8 @@ class EvalROCAUC : public EvalAUC<EvalROCAUC> {
|
||||
info.labels.HostView().Slice(linalg::All(), 0),
|
||||
common::OptionalWeights{info.weights_.ConstHostSpan()});
|
||||
} else {
|
||||
std::tie(fp, tp, auc) = GPUBinaryROCAUC(predts.ConstDeviceSpan(), info,
|
||||
ctx_->gpu_id, &this->d_cache_);
|
||||
std::tie(fp, tp, auc) =
|
||||
GPUBinaryROCAUC(predts.ConstDeviceSpan(), info, ctx_->Device(), &this->d_cache_);
|
||||
}
|
||||
return std::make_tuple(fp, tp, auc);
|
||||
}
|
||||
@@ -381,8 +378,7 @@ XGBOOST_REGISTER_METRIC(EvalAUC, "auc")
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
std::tuple<double, double, double> GPUBinaryROCAUC(common::Span<float const>, MetaInfo const &,
|
||||
std::int32_t,
|
||||
std::shared_ptr<DeviceAUCCache> *) {
|
||||
DeviceOrd, std::shared_ptr<DeviceAUCCache> *) {
|
||||
common::AssertGPUSupport();
|
||||
return {};
|
||||
}
|
||||
@@ -414,8 +410,8 @@ class EvalPRAUC : public EvalAUC<EvalPRAUC> {
|
||||
BinaryPRAUC(ctx_, predts.ConstHostSpan(), info.labels.HostView().Slice(linalg::All(), 0),
|
||||
common::OptionalWeights{info.weights_.ConstHostSpan()});
|
||||
} else {
|
||||
std::tie(pr, re, auc) = GPUBinaryPRAUC(predts.ConstDeviceSpan(), info,
|
||||
ctx_->gpu_id, &this->d_cache_);
|
||||
std::tie(pr, re, auc) =
|
||||
GPUBinaryPRAUC(predts.ConstDeviceSpan(), info, ctx_->Device(), &this->d_cache_);
|
||||
}
|
||||
return std::make_tuple(pr, re, auc);
|
||||
}
|
||||
@@ -459,7 +455,7 @@ XGBOOST_REGISTER_METRIC(AUCPR, "aucpr")
|
||||
|
||||
#if !defined(XGBOOST_USE_CUDA)
|
||||
std::tuple<double, double, double> GPUBinaryPRAUC(common::Span<float const>, MetaInfo const &,
|
||||
std::int32_t, std::shared_ptr<DeviceAUCCache> *) {
|
||||
DeviceOrd, std::shared_ptr<DeviceAUCCache> *) {
|
||||
common::AssertGPUSupport();
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -85,11 +85,11 @@ void InitCacheOnce(common::Span<float const> predts, std::shared_ptr<DeviceAUCCa
|
||||
template <typename Fn>
|
||||
std::tuple<double, double, double>
|
||||
GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
|
||||
int32_t device, common::Span<size_t const> d_sorted_idx,
|
||||
DeviceOrd device, common::Span<size_t const> d_sorted_idx,
|
||||
Fn area_fn, std::shared_ptr<DeviceAUCCache> cache) {
|
||||
auto labels = info.labels.View(device);
|
||||
auto weights = info.weights_.ConstDeviceSpan();
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
|
||||
CHECK_NE(labels.Size(), 0);
|
||||
CHECK_EQ(labels.Size(), predts.size());
|
||||
@@ -168,7 +168,7 @@ GPUBinaryAUC(common::Span<float const> predts, MetaInfo const &info,
|
||||
}
|
||||
|
||||
std::tuple<double, double, double> GPUBinaryROCAUC(common::Span<float const> predts,
|
||||
MetaInfo const &info, std::int32_t device,
|
||||
MetaInfo const &info, DeviceOrd device,
|
||||
std::shared_ptr<DeviceAUCCache> *p_cache) {
|
||||
auto &cache = *p_cache;
|
||||
InitCacheOnce<false>(predts, p_cache);
|
||||
@@ -309,9 +309,10 @@ void SegmentedReduceAUC(common::Span<size_t const> d_unique_idx,
|
||||
* up each class in all kernels.
|
||||
*/
|
||||
template <bool scale, typename Fn>
|
||||
double GPUMultiClassAUCOVR(MetaInfo const &info, int32_t device, common::Span<uint32_t> d_class_ptr,
|
||||
size_t n_classes, std::shared_ptr<DeviceAUCCache> cache, Fn area_fn) {
|
||||
dh::safe_cuda(cudaSetDevice(device));
|
||||
double GPUMultiClassAUCOVR(MetaInfo const &info, DeviceOrd device,
|
||||
common::Span<uint32_t> d_class_ptr, size_t n_classes,
|
||||
std::shared_ptr<DeviceAUCCache> cache, Fn area_fn) {
|
||||
dh::safe_cuda(cudaSetDevice(device.ordinal));
|
||||
/**
|
||||
* Sorted idx
|
||||
*/
|
||||
@@ -467,11 +468,12 @@ double GPUMultiClassROCAUC(Context const *ctx, common::Span<float const> predts,
|
||||
dh::TemporaryArray<uint32_t> class_ptr(n_classes + 1, 0);
|
||||
MultiClassSortedIdx(ctx, predts, dh::ToSpan(class_ptr), cache);
|
||||
|
||||
auto fn = [] XGBOOST_DEVICE(double fp_prev, double fp, double tp_prev,
|
||||
double tp, size_t /*class_id*/) {
|
||||
auto fn = [] XGBOOST_DEVICE(double fp_prev, double fp, double tp_prev, double tp,
|
||||
size_t /*class_id*/) {
|
||||
return TrapezoidArea(fp_prev, fp, tp_prev, tp);
|
||||
};
|
||||
return GPUMultiClassAUCOVR<true>(info, ctx->gpu_id, dh::ToSpan(class_ptr), n_classes, cache, fn);
|
||||
return GPUMultiClassAUCOVR<true>(info, ctx->Device(), dh::ToSpan(class_ptr), n_classes, cache,
|
||||
fn);
|
||||
}
|
||||
|
||||
namespace {
|
||||
@@ -512,7 +514,7 @@ std::pair<double, std::uint32_t> GPURankingAUC(Context const *ctx, common::Span<
|
||||
/**
|
||||
* Sort the labels
|
||||
*/
|
||||
auto d_labels = info.labels.View(ctx->gpu_id);
|
||||
auto d_labels = info.labels.View(ctx->Device());
|
||||
|
||||
auto d_sorted_idx = dh::ToSpan(cache->sorted_idx);
|
||||
common::SegmentedArgSort<false, false>(ctx, d_labels.Values(), d_group_ptr, d_sorted_idx);
|
||||
@@ -604,7 +606,7 @@ std::pair<double, std::uint32_t> GPURankingAUC(Context const *ctx, common::Span<
|
||||
}
|
||||
|
||||
std::tuple<double, double, double> GPUBinaryPRAUC(common::Span<float const> predts,
|
||||
MetaInfo const &info, std::int32_t device,
|
||||
MetaInfo const &info, DeviceOrd device,
|
||||
std::shared_ptr<DeviceAUCCache> *p_cache) {
|
||||
auto& cache = *p_cache;
|
||||
InitCacheOnce<false>(predts, p_cache);
|
||||
@@ -662,7 +664,7 @@ double GPUMultiClassPRAUC(Context const *ctx, common::Span<float const> predts,
|
||||
/**
|
||||
* Get total positive/negative
|
||||
*/
|
||||
auto labels = info.labels.View(ctx->gpu_id);
|
||||
auto labels = info.labels.View(ctx->Device());
|
||||
auto n_samples = info.num_row_;
|
||||
dh::caching_device_vector<Pair> totals(n_classes);
|
||||
auto key_it =
|
||||
@@ -695,13 +697,13 @@ double GPUMultiClassPRAUC(Context const *ctx, common::Span<float const> predts,
|
||||
return detail::CalcDeltaPRAUC(fp_prev, fp, tp_prev, tp,
|
||||
d_totals[class_id].first);
|
||||
};
|
||||
return GPUMultiClassAUCOVR<false>(info, ctx->gpu_id, d_class_ptr, n_classes, cache, fn);
|
||||
return GPUMultiClassAUCOVR<false>(info, ctx->Device(), d_class_ptr, n_classes, cache, fn);
|
||||
}
|
||||
|
||||
template <typename Fn>
|
||||
std::pair<double, uint32_t>
|
||||
GPURankingPRAUCImpl(common::Span<float const> predts, MetaInfo const &info,
|
||||
common::Span<uint32_t> d_group_ptr, int32_t device,
|
||||
common::Span<uint32_t> d_group_ptr, DeviceOrd device,
|
||||
std::shared_ptr<DeviceAUCCache> cache, Fn area_fn) {
|
||||
/**
|
||||
* Sorted idx
|
||||
@@ -843,7 +845,7 @@ std::pair<double, std::uint32_t> GPURankingPRAUC(Context const *ctx,
|
||||
common::SegmentedArgSort<false, false>(ctx, predts, d_group_ptr, d_sorted_idx);
|
||||
|
||||
dh::XGBDeviceAllocator<char> alloc;
|
||||
auto labels = info.labels.View(ctx->gpu_id);
|
||||
auto labels = info.labels.View(ctx->Device());
|
||||
if (thrust::any_of(thrust::cuda::par(alloc), dh::tbegin(labels.Values()),
|
||||
dh::tend(labels.Values()), PRAUCLabelInvalid{})) {
|
||||
InvalidLabels();
|
||||
@@ -882,7 +884,7 @@ std::pair<double, std::uint32_t> GPURankingPRAUC(Context const *ctx,
|
||||
return detail::CalcDeltaPRAUC(fp_prev, fp, tp_prev, tp,
|
||||
d_totals[group_id].first);
|
||||
};
|
||||
return GPURankingPRAUCImpl(predts, info, d_group_ptr, ctx->gpu_id, cache, fn);
|
||||
return GPURankingPRAUCImpl(predts, info, d_group_ptr, ctx->Device(), cache, fn);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -30,7 +30,7 @@ XGBOOST_DEVICE inline double TrapezoidArea(double x0, double x1, double y0, doub
|
||||
struct DeviceAUCCache;
|
||||
|
||||
std::tuple<double, double, double> GPUBinaryROCAUC(common::Span<float const> predts,
|
||||
MetaInfo const &info, std::int32_t device,
|
||||
MetaInfo const &info, DeviceOrd,
|
||||
std::shared_ptr<DeviceAUCCache> *p_cache);
|
||||
|
||||
double GPUMultiClassROCAUC(Context const *ctx, common::Span<float const> predts,
|
||||
@@ -45,7 +45,7 @@ std::pair<double, std::uint32_t> GPURankingAUC(Context const *ctx, common::Span<
|
||||
* PR AUC *
|
||||
**********/
|
||||
std::tuple<double, double, double> GPUBinaryPRAUC(common::Span<float const> predts,
|
||||
MetaInfo const &info, std::int32_t device,
|
||||
MetaInfo const &info, DeviceOrd,
|
||||
std::shared_ptr<DeviceAUCCache> *p_cache);
|
||||
|
||||
double GPUMultiClassPRAUC(Context const *ctx, common::Span<float const> predts,
|
||||
|
||||
@@ -45,7 +45,7 @@ namespace {
|
||||
template <typename Fn>
|
||||
PackedReduceResult Reduce(Context const* ctx, MetaInfo const& info, Fn&& loss) {
|
||||
PackedReduceResult result;
|
||||
auto labels = info.labels.View(ctx->gpu_id);
|
||||
auto labels = info.labels.View(ctx->Device());
|
||||
if (ctx->IsCPU()) {
|
||||
auto n_threads = ctx->Threads();
|
||||
std::vector<double> score_tloc(n_threads, 0.0);
|
||||
@@ -183,10 +183,10 @@ class PseudoErrorLoss : public MetricNoCache {
|
||||
|
||||
double Eval(const HostDeviceVector<bst_float>& preds, const MetaInfo& info) override {
|
||||
CHECK_EQ(info.labels.Shape(0), info.num_row_);
|
||||
auto labels = info.labels.View(ctx_->gpu_id);
|
||||
preds.SetDevice(ctx_->gpu_id);
|
||||
auto labels = info.labels.View(ctx_->Device());
|
||||
preds.SetDevice(ctx_->Device());
|
||||
auto predts = ctx_->IsCPU() ? preds.ConstHostSpan() : preds.ConstDeviceSpan();
|
||||
info.weights_.SetDevice(ctx_->gpu_id);
|
||||
info.weights_.SetDevice(ctx_->Device());
|
||||
common::OptionalWeights weights(ctx_->IsCPU() ? info.weights_.ConstHostSpan()
|
||||
: info.weights_.ConstDeviceSpan());
|
||||
float slope = this->param_.huber_slope;
|
||||
@@ -349,11 +349,11 @@ struct EvalEWiseBase : public MetricNoCache {
|
||||
if (info.labels.Size() != 0) {
|
||||
CHECK_NE(info.labels.Shape(1), 0);
|
||||
}
|
||||
auto labels = info.labels.View(ctx_->gpu_id);
|
||||
info.weights_.SetDevice(ctx_->gpu_id);
|
||||
auto labels = info.labels.View(ctx_->Device());
|
||||
info.weights_.SetDevice(ctx_->Device());
|
||||
common::OptionalWeights weights(ctx_->IsCPU() ? info.weights_.ConstHostSpan()
|
||||
: info.weights_.ConstDeviceSpan());
|
||||
preds.SetDevice(ctx_->gpu_id);
|
||||
preds.SetDevice(ctx_->Device());
|
||||
auto predts = ctx_->IsCPU() ? preds.ConstHostSpan() : preds.ConstDeviceSpan();
|
||||
|
||||
auto d_policy = policy_;
|
||||
@@ -444,16 +444,16 @@ class QuantileError : public MetricNoCache {
|
||||
}
|
||||
|
||||
auto const* ctx = ctx_;
|
||||
auto y_true = info.labels.View(ctx->gpu_id);
|
||||
preds.SetDevice(ctx->gpu_id);
|
||||
alpha_.SetDevice(ctx->gpu_id);
|
||||
auto y_true = info.labels.View(ctx->Device());
|
||||
preds.SetDevice(ctx->Device());
|
||||
alpha_.SetDevice(ctx->Device());
|
||||
auto alpha = ctx->IsCPU() ? alpha_.ConstHostSpan() : alpha_.ConstDeviceSpan();
|
||||
std::size_t n_targets = preds.Size() / info.num_row_ / alpha_.Size();
|
||||
CHECK_NE(n_targets, 0);
|
||||
auto y_predt = linalg::MakeTensorView(ctx, &preds, static_cast<std::size_t>(info.num_row_),
|
||||
alpha_.Size(), n_targets);
|
||||
|
||||
info.weights_.SetDevice(ctx->gpu_id);
|
||||
info.weights_.SetDevice(ctx->Device());
|
||||
common::OptionalWeights weight{ctx->IsCPU() ? info.weights_.ConstHostSpan()
|
||||
: info.weights_.ConstDeviceSpan()};
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ struct EvalAMS : public MetricNoCache {
|
||||
const double br = 10.0;
|
||||
unsigned thresindex = 0;
|
||||
double s_tp = 0.0, b_fp = 0.0, tams = 0.0;
|
||||
const auto& labels = info.labels.View(Context::kCpuId);
|
||||
const auto& labels = info.labels.View(DeviceOrd::CPU());
|
||||
for (unsigned i = 0; i < static_cast<unsigned>(ndata-1) && i < ntop; ++i) {
|
||||
const unsigned ridx = rec[i].second;
|
||||
const bst_float wt = info.GetWeight(ridx);
|
||||
@@ -134,7 +134,7 @@ struct EvalRank : public MetricNoCache, public EvalRankConfig {
|
||||
std::vector<double> sum_tloc(ctx_->Threads(), 0.0);
|
||||
|
||||
{
|
||||
const auto& labels = info.labels.View(Context::kCpuId);
|
||||
const auto& labels = info.labels.HostView();
|
||||
const auto &h_preds = preds.ConstHostVector();
|
||||
|
||||
dmlc::OMPException exc;
|
||||
|
||||
@@ -33,7 +33,7 @@ PackedReduceResult PreScore(Context const *ctx, MetaInfo const &info,
|
||||
HostDeviceVector<float> const &predt,
|
||||
std::shared_ptr<ltr::PreCache> p_cache) {
|
||||
auto d_gptr = p_cache->DataGroupPtr(ctx);
|
||||
auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
|
||||
auto d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
|
||||
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());
|
||||
@@ -89,7 +89,7 @@ PackedReduceResult NDCGScore(Context const *ctx, MetaInfo const &info,
|
||||
if (!d_weight.Empty()) {
|
||||
CHECK_EQ(d_weight.weights.size(), p_cache->Groups());
|
||||
}
|
||||
auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
|
||||
auto d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
auto d_predt = linalg::MakeTensorView(ctx, predt.ConstDeviceSpan(), predt.Size());
|
||||
|
||||
@@ -119,9 +119,9 @@ PackedReduceResult MAPScore(Context const *ctx, MetaInfo const &info,
|
||||
HostDeviceVector<float> const &predt, bool minus,
|
||||
std::shared_ptr<ltr::MAPCache> p_cache) {
|
||||
auto d_group_ptr = p_cache->DataGroupPtr(ctx);
|
||||
auto d_label = info.labels.View(ctx->gpu_id).Slice(linalg::All(), 0);
|
||||
auto d_label = info.labels.View(ctx->Device()).Slice(linalg::All(), 0);
|
||||
|
||||
predt.SetDevice(ctx->gpu_id);
|
||||
predt.SetDevice(ctx->Device());
|
||||
auto d_rank_idx = p_cache->SortedIdx(ctx, predt.ConstDeviceSpan());
|
||||
auto key_it = dh::MakeTransformIterator<std::size_t>(
|
||||
thrust::make_counting_iterator(0ul),
|
||||
|
||||
Reference in New Issue
Block a user