/*! * Copyright 2020 XGBoost contributors */ // When device ordinal is present, we would want to build the metrics on the GPU. It is *not* // possible for a valid device ordinal to be present for non GPU builds. However, it is possible // for an invalid device ordinal to be specified in GPU builds - to train/predict and/or compute // the metrics on CPU. To accommodate these scenarios, the following is done for the metrics // accelerated on the GPU. // - An internal GPU registry holds all the GPU metric types (defined in the .cu file) // - An instance of the appropriate GPU metric type is created when a device ordinal is present // - If the creation is successful, the metric computation is done on the device // - else, it falls back on the CPU // - The GPU metric types are *only* registered when xgboost is built for GPUs // // This is done for 2 reasons: // - Clear separation of CPU and GPU logic // - Sorting datasets containing large number of rows is (much) faster when parallel sort // semantics is used on the CPU. The __gnu_parallel/concurrency primitives needed to perform // this cannot be used when the translation unit is compiled using the 'nvcc' compiler (as the // corresponding headers that brings in those function declaration can't be included with CUDA). // This precludes the CPU and GPU logic to coexist inside a .cu file #include #include #include #include #include #include "xgboost/host_device_vector.h" #include "../common/math.h" #include "../common/threading_utils.h" #include "metric_common.h" namespace { using PredIndPair = std::pair; using PredIndPairContainer = std::vector; /* * Adapter to access instance weights. * * - For ranking task, weights are per-group * - For binary classification task, weights are per-instance * * WeightPolicy::GetWeightOfInstance() : * get weight associated with an individual instance, using index into * `info.weights` * WeightPolicy::GetWeightOfSortedRecord() : * get weight associated with an individual instance, using index into * sorted records `rec` (in ascending order of predicted labels). `rec` is * of type PredIndPairContainer */ class PerInstanceWeightPolicy { public: inline static xgboost::bst_float GetWeightOfInstance(const xgboost::MetaInfo& info, unsigned instance_id, unsigned) { return info.GetWeight(instance_id); } inline static xgboost::bst_float GetWeightOfSortedRecord(const xgboost::MetaInfo& info, const PredIndPairContainer& rec, unsigned record_id, unsigned) { return info.GetWeight(rec[record_id].second); } }; class PerGroupWeightPolicy { public: inline static xgboost::bst_float GetWeightOfInstance(const xgboost::MetaInfo& info, unsigned, unsigned group_id) { return info.GetWeight(group_id); } inline static xgboost::bst_float GetWeightOfSortedRecord(const xgboost::MetaInfo& info, const PredIndPairContainer&, unsigned, unsigned group_id) { return info.GetWeight(group_id); } }; } // anonymous namespace namespace xgboost { namespace metric { // tag the this file, used by force static link later. DMLC_REGISTRY_FILE_TAG(rank_metric); /*! \brief AMS: also records best threshold */ struct EvalAMS : public Metric { public: explicit EvalAMS(const char* param) { CHECK(param != nullptr) // NOLINT << "AMS must be in format ams@k"; ratio_ = atof(param); std::ostringstream os; os << "ams@" << ratio_; name_ = os.str(); } bst_float Eval(const HostDeviceVector &preds, const MetaInfo &info, bool distributed) override { CHECK(!distributed) << "metric AMS do not support distributed evaluation"; using namespace std; // NOLINT(*) const auto ndata = static_cast(info.labels_.Size()); PredIndPairContainer rec(ndata); const auto &h_preds = preds.ConstHostVector(); common::ParallelFor(ndata, [&](bst_omp_uint i) { rec[i] = std::make_pair(h_preds[i], i); }); XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst); auto ntop = static_cast(ratio_ * ndata); if (ntop == 0) ntop = ndata; const double br = 10.0; unsigned thresindex = 0; double s_tp = 0.0, b_fp = 0.0, tams = 0.0; const auto& labels = info.labels_.ConstHostVector(); for (unsigned i = 0; i < static_cast(ndata-1) && i < ntop; ++i) { const unsigned ridx = rec[i].second; const bst_float wt = info.GetWeight(ridx); if (labels[ridx] > 0.5f) { s_tp += wt; } else { b_fp += wt; } if (rec[i].first != rec[i + 1].first) { double ams = sqrt(2 * ((s_tp + b_fp + br) * log(1.0 + s_tp / (b_fp + br)) - s_tp)); if (tams < ams) { thresindex = i; tams = ams; } } } if (ntop == ndata) { LOG(INFO) << "best-ams-ratio=" << static_cast(thresindex) / ndata; return static_cast(tams); } else { return static_cast( sqrt(2 * ((s_tp + b_fp + br) * log(1.0 + s_tp/(b_fp + br)) - s_tp))); } } const char* Name() const override { return name_.c_str(); } private: std::string name_; float ratio_; }; /*! \brief Evaluate rank list */ struct EvalRank : public Metric, public EvalRankConfig { private: // This is used to compute the ranking metrics on the GPU - for training jobs that run on the GPU. std::unique_ptr rank_gpu_; public: bst_float Eval(const HostDeviceVector &preds, const MetaInfo &info, bool distributed) override { CHECK_EQ(preds.Size(), info.labels_.Size()) << "label size predict size not match"; // quick consistency when group is not available std::vector tgptr(2, 0); tgptr[1] = static_cast(preds.Size()); const auto &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_; CHECK_NE(gptr.size(), 0U) << "must specify group when constructing rank file"; CHECK_EQ(gptr.back(), preds.Size()) << "EvalRank: group structure must match number of prediction"; const auto ngroups = static_cast(gptr.size() - 1); // sum statistics double sum_metric = 0.0f; // Check and see if we have the GPU metric registered in the internal registry if (tparam_->gpu_id >= 0) { if (!rank_gpu_) { rank_gpu_.reset(GPUMetric::CreateGPUMetric(this->Name(), tparam_)); } if (rank_gpu_) { sum_metric = rank_gpu_->Eval(preds, info, distributed); } } CHECK(tparam_); std::vector sum_tloc(tparam_->Threads(), 0.0); if (!rank_gpu_ || tparam_->gpu_id < 0) { const auto &labels = info.labels_.ConstHostVector(); const auto &h_preds = preds.ConstHostVector(); dmlc::OMPException exc; #pragma omp parallel num_threads(tparam_->Threads()) { exc.Run([&]() { // each thread takes a local rec PredIndPairContainer rec; #pragma omp for schedule(static) for (bst_omp_uint k = 0; k < ngroups; ++k) { exc.Run([&]() { rec.clear(); for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) { rec.emplace_back(h_preds[j], static_cast(labels[j])); } sum_tloc[omp_get_thread_num()] += this->EvalGroup(&rec); }); } }); } sum_metric = std::accumulate(sum_tloc.cbegin(), sum_tloc.cend(), 0.0); exc.Rethrow(); } if (distributed) { bst_float dat[2]; dat[0] = static_cast(sum_metric); dat[1] = static_cast(ngroups); // approximately estimate the metric using mean rabit::Allreduce(dat, 2); return dat[0] / dat[1]; } else { return static_cast(sum_metric) / ngroups; } } const char* Name() const override { return name.c_str(); } protected: explicit EvalRank(const char* name, const char* param) { using namespace std; // NOLINT(*) if (param != nullptr) { std::ostringstream os; if (sscanf(param, "%u[-]?", &topn) == 1) { os << name << '@' << param; this->name = os.str(); } else { os << name << param; this->name = os.str(); } if (param[strlen(param) - 1] == '-') { minus = true; } } else { this->name = name; } } virtual double EvalGroup(PredIndPairContainer *recptr) const = 0; }; /*! \brief Precision at N, for both classification and rank */ struct EvalPrecision : public EvalRank { public: explicit EvalPrecision(const char* name, const char* param) : EvalRank(name, param) {} double EvalGroup(PredIndPairContainer *recptr) const override { PredIndPairContainer &rec(*recptr); // calculate Precision std::stable_sort(rec.begin(), rec.end(), common::CmpFirst); unsigned nhit = 0; for (size_t j = 0; j < rec.size() && j < this->topn; ++j) { nhit += (rec[j].second != 0); } return static_cast(nhit) / this->topn; } }; /*! \brief NDCG: Normalized Discounted Cumulative Gain at N */ struct EvalNDCG : public EvalRank { private: double CalcDCG(const PredIndPairContainer &rec) const { double sumdcg = 0.0; for (size_t i = 0; i < rec.size() && i < this->topn; ++i) { const unsigned rel = rec[i].second; if (rel != 0) { sumdcg += ((1 << rel) - 1) / std::log2(i + 2.0); } } return sumdcg; } public: explicit EvalNDCG(const char* name, const char* param) : EvalRank(name, param) {} double EvalGroup(PredIndPairContainer *recptr) const override { PredIndPairContainer &rec(*recptr); std::stable_sort(rec.begin(), rec.end(), common::CmpFirst); double dcg = CalcDCG(rec); std::stable_sort(rec.begin(), rec.end(), common::CmpSecond); double idcg = CalcDCG(rec); if (idcg == 0.0f) { if (this->minus) { return 0.0f; } else { return 1.0f; } } return dcg/idcg; } }; /*! \brief Mean Average Precision at N, for both classification and rank */ struct EvalMAP : public EvalRank { public: explicit EvalMAP(const char* name, const char* param) : EvalRank(name, param) {} double EvalGroup(PredIndPairContainer *recptr) const override { PredIndPairContainer &rec(*recptr); std::stable_sort(rec.begin(), rec.end(), common::CmpFirst); unsigned nhits = 0; double sumap = 0.0; for (size_t i = 0; i < rec.size(); ++i) { if (rec[i].second != 0) { nhits += 1; if (i < this->topn) { sumap += static_cast(nhits) / (i + 1); } } } if (nhits != 0) { sumap /= nhits; return sumap; } else { if (this->minus) { return 0.0f; } else { return 1.0f; } } } }; /*! \brief Cox: Partial likelihood of the Cox proportional hazards model */ struct EvalCox : public Metric { public: EvalCox() = default; bst_float Eval(const HostDeviceVector &preds, const MetaInfo &info, bool distributed) override { CHECK(!distributed) << "Cox metric does not support distributed evaluation"; using namespace std; // NOLINT(*) const auto ndata = static_cast(info.labels_.Size()); const auto &label_order = info.LabelAbsSort(); // pre-compute a sum for the denominator double exp_p_sum = 0; // we use double because we might need the precision with large datasets const auto &h_preds = preds.ConstHostVector(); for (omp_ulong i = 0; i < ndata; ++i) { exp_p_sum += h_preds[i]; } double out = 0; double accumulated_sum = 0; bst_omp_uint num_events = 0; const auto& labels = info.labels_.ConstHostVector(); for (bst_omp_uint i = 0; i < ndata; ++i) { const size_t ind = label_order[i]; const auto label = labels[ind]; if (label > 0) { out -= log(h_preds[ind]) - log(exp_p_sum); ++num_events; } // only update the denominator after we move forward in time (labels are sorted) accumulated_sum += h_preds[ind]; if (i == ndata - 1 || std::abs(label) < std::abs(labels[label_order[i + 1]])) { exp_p_sum -= accumulated_sum; accumulated_sum = 0; } } return out/num_events; // normalize by the number of events } const char* Name() const override { return "cox-nloglik"; } }; /*! \brief Area Under PR Curve, for both classification and rank computed on CPU */ struct EvalAucPR : public Metric { // implementation of AUC-PR for weighted data // translated from PRROC R Package // see https://doi.org/10.1371/journal.pone.0092209 private: // This is used to compute the AUCPR metrics on the GPU - for ranking tasks and // for training jobs that run on the GPU. std::unique_ptr aucpr_gpu_; template bst_float Eval(const HostDeviceVector &preds, const MetaInfo &info, bool distributed, const std::vector &gptr) { const auto ngroups = static_cast(gptr.size() - 1); // sum of all AUC's across all query groups double sum_auc = 0.0; int auc_error = 0; const auto &h_labels = info.labels_.ConstHostVector(); const auto &h_preds = preds.ConstHostVector(); dmlc::OMPException exc; #pragma omp parallel reduction(+:sum_auc, auc_error) if (ngroups > 1) { exc.Run([&]() { // Each thread works on a distinct group and sorts the predictions in that group PredIndPairContainer rec; #pragma omp for schedule(static) for (bst_omp_uint group_id = 0; group_id < ngroups; ++group_id) { exc.Run([&]() { double total_pos = 0.0; double total_neg = 0.0; // Same thread can work on multiple groups one after another; hence, resize // the predictions array based on the current group rec.resize(gptr[group_id + 1] - gptr[group_id]); #pragma omp parallel for schedule(static) reduction(+:total_pos, total_neg) \ if (!omp_in_parallel()) // NOLINT for (bst_omp_uint j = gptr[group_id]; j < gptr[group_id + 1]; ++j) { exc.Run([&]() { const bst_float wt = WeightPolicy::GetWeightOfInstance(info, j, group_id); total_pos += wt * h_labels[j]; total_neg += wt * (1.0f - h_labels[j]); rec[j - gptr[group_id]] = {h_preds[j], j}; }); } // we need pos > 0 && neg > 0 if (total_pos <= 0.0 || total_neg <= 0.0) { auc_error += 1; return; } XGBOOST_PARALLEL_SORT(rec.begin(), rec.end(), common::CmpFirst); // calculate AUC double tp = 0.0, prevtp = 0.0, fp = 0.0, prevfp = 0.0, h = 0.0, a = 0.0, b = 0.0; for (size_t j = 0; j < rec.size(); ++j) { const bst_float wt = WeightPolicy::GetWeightOfSortedRecord(info, rec, j, group_id); tp += wt * h_labels[rec[j].second]; fp += wt * (1.0f - h_labels[rec[j].second]); if ((j < rec.size() - 1 && rec[j].first != rec[j + 1].first) || j == rec.size() - 1) { if (tp == prevtp) { a = 1.0; b = 0.0; } else { h = (fp - prevfp) / (tp - prevtp); a = 1.0 + h; b = (prevfp - h * prevtp) / total_pos; } if (0.0 != b) { sum_auc += (tp / total_pos - prevtp / total_pos - b / a * (std::log(a * tp / total_pos + b) - std::log(a * prevtp / total_pos + b))) / a; } else { sum_auc += (tp / total_pos - prevtp / total_pos) / a; } prevtp = tp; prevfp = fp; } } // sanity check if (tp < 0 || prevtp < 0 || fp < 0 || prevfp < 0) { CHECK(!auc_error) << "AUC-PR: error in calculation"; } }); } }); } exc.Rethrow(); // Report average AUC-PR across all groups // In distributed mode, workers which only contains pos or neg samples // will be ignored when aggregate AUC-PR. bst_float dat[2] = {0.0f, 0.0f}; if (auc_error < static_cast(ngroups)) { dat[0] = static_cast(sum_auc); dat[1] = static_cast(static_cast(ngroups) - auc_error); } if (distributed) { rabit::Allreduce(dat, 2); } CHECK_GT(dat[1], 0.0f) << "AUC-PR: the dataset only contains pos or neg samples"; CHECK_LE(dat[0], dat[1]) << "AUC-PR: AUC > 1.0"; return dat[0] / dat[1]; } public: bst_float Eval(const HostDeviceVector &preds, const MetaInfo &info, bool distributed) override { CHECK_NE(info.labels_.Size(), 0U) << "label set cannot be empty"; CHECK_EQ(preds.Size(), info.labels_.Size()) << "label size predict size not match"; std::vector tgptr(2, 0); tgptr[1] = static_cast(info.labels_.Size()); const auto &gptr = info.group_ptr_.empty() ? tgptr : info.group_ptr_; CHECK_EQ(gptr.back(), info.labels_.Size()) << "EvalAucPR: group structure must match number of prediction"; // For ranking task, weights are per-group // For binary classification task, weights are per-instance const bool is_ranking_task = !info.group_ptr_.empty() && info.weights_.Size() != info.num_row_; // Check if we have a GPU assignment; else, revert back to CPU if (tparam_->gpu_id >= 0 && is_ranking_task) { if (!aucpr_gpu_) { // Check and see if we have the GPU metric registered in the internal registry aucpr_gpu_.reset(GPUMetric::CreateGPUMetric(this->Name(), tparam_)); } if (aucpr_gpu_) { return aucpr_gpu_->Eval(preds, info, distributed); } } if (is_ranking_task) { return Eval(preds, info, distributed, gptr); } else { return Eval(preds, info, distributed, gptr); } } const char *Name() const override { return "aucpr"; } }; XGBOOST_REGISTER_METRIC(AMS, "ams") .describe("AMS metric for higgs.") .set_body([](const char* param) { return new EvalAMS(param); }); XGBOOST_REGISTER_METRIC(AucPR, "aucpr") .describe("Area under PR curve for both classification and rank.") .set_body([](const char*) { return new EvalAucPR(); }); XGBOOST_REGISTER_METRIC(Precision, "pre") .describe("precision@k for rank.") .set_body([](const char* param) { return new EvalPrecision("pre", param); }); XGBOOST_REGISTER_METRIC(NDCG, "ndcg") .describe("ndcg@k for rank.") .set_body([](const char* param) { return new EvalNDCG("ndcg", param); }); XGBOOST_REGISTER_METRIC(MAP, "map") .describe("map@k for rank.") .set_body([](const char* param) { return new EvalMAP("map", param); }); XGBOOST_REGISTER_METRIC(Cox, "cox-nloglik") .describe("Negative log partial likelihood of Cox proportional hazards model.") .set_body([](const char*) { return new EvalCox(); }); } // namespace metric } // namespace xgboost