- create a gpu metrics (internal) registry (#5387)
* - create a gpu metrics (internal) registry
- the objective is to separate the cpu and gpu implementations such that they evolve
indepedently. to that end, this approach will:
- preserve the same metrics configuration (from the end user perspective)
- internally delegate the responsibility to the gpu metrics builder when there is a
valid device present
- decouple the gpu metrics builder from the cpu ones to prevent misuse
- move away from including the cuda file from within the cc file and segregate the code
via ifdef's
This commit is contained in:
parent
8d06878bf9
commit
1ba6706167
@ -7,12 +7,11 @@
|
||||
#include <xgboost/metric.h>
|
||||
#include <xgboost/generic_parameters.h>
|
||||
|
||||
namespace dmlc {
|
||||
DMLC_REGISTRY_ENABLE(::xgboost::MetricReg);
|
||||
}
|
||||
#include "metric_common.h"
|
||||
|
||||
namespace xgboost {
|
||||
Metric* Metric::Create(const std::string& name, GenericParameter const* tparam) {
|
||||
template <typename MetricRegistry>
|
||||
Metric* CreateMetricImpl(const std::string& name, GenericParameter const* tparam) {
|
||||
std::string buf = name;
|
||||
std::string prefix = name;
|
||||
const char* param;
|
||||
@ -26,29 +25,58 @@ Metric* Metric::Create(const std::string& name, GenericParameter const* tparam)
|
||||
prefix = buf;
|
||||
param = nullptr;
|
||||
}
|
||||
auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(prefix.c_str());
|
||||
auto *e = ::dmlc::Registry<MetricRegistry>::Get()->Find(prefix.c_str());
|
||||
if (e == nullptr) {
|
||||
LOG(FATAL) << "Unknown metric function " << name;
|
||||
return nullptr;
|
||||
}
|
||||
auto p_metric = (e->body)(param);
|
||||
p_metric->tparam_ = tparam;
|
||||
return p_metric;
|
||||
} else {
|
||||
std::string prefix = buf.substr(0, pos);
|
||||
auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(prefix.c_str());
|
||||
auto *e = ::dmlc::Registry<MetricRegistry>::Get()->Find(prefix.c_str());
|
||||
if (e == nullptr) {
|
||||
LOG(FATAL) << "Unknown metric function " << name;
|
||||
return nullptr;
|
||||
}
|
||||
auto p_metric = (e->body)(buf.substr(pos + 1, buf.length()).c_str());
|
||||
p_metric->tparam_ = tparam;
|
||||
return p_metric;
|
||||
}
|
||||
}
|
||||
|
||||
Metric *
|
||||
Metric::Create(const std::string& name, GenericParameter const* tparam) {
|
||||
auto metric = CreateMetricImpl<MetricReg>(name, tparam);
|
||||
if (metric == nullptr) {
|
||||
LOG(FATAL) << "Unknown metric function " << name;
|
||||
}
|
||||
|
||||
metric->tparam_ = tparam;
|
||||
return metric;
|
||||
}
|
||||
|
||||
Metric *
|
||||
GPUMetric::CreateGPUMetric(const std::string& name, GenericParameter const* tparam) {
|
||||
auto metric = CreateMetricImpl<MetricGPUReg>(name, tparam);
|
||||
if (metric == nullptr) {
|
||||
LOG(WARNING) << "Cannot find a GPU metric builder for metric " << name
|
||||
<< ". Resorting to the CPU builder";
|
||||
return metric;
|
||||
}
|
||||
|
||||
// Narrowing reference only for the compiler to allow assignment to a base class member.
|
||||
// As such, using this narrowed reference to refer to derived members will be an illegal op.
|
||||
// This is moot, as this type is stateless.
|
||||
static_cast<GPUMetric *>(metric)->tparam_ = tparam;
|
||||
return metric;
|
||||
}
|
||||
} // namespace xgboost
|
||||
|
||||
namespace dmlc {
|
||||
DMLC_REGISTRY_ENABLE(::xgboost::MetricReg);
|
||||
DMLC_REGISTRY_ENABLE(::xgboost::MetricGPUReg);
|
||||
}
|
||||
|
||||
namespace xgboost {
|
||||
namespace metric {
|
||||
|
||||
// List of files that will be force linked in static links.
|
||||
DMLC_REGISTRY_LINK_TAG(elementwise_metric);
|
||||
DMLC_REGISTRY_LINK_TAG(multiclass_metric);
|
||||
|
||||
@ -1,18 +1,54 @@
|
||||
/*!
|
||||
* Copyright 2018-2019 by Contributors
|
||||
* \file metric_param.cc
|
||||
* Copyright 2018-2020 by Contributors
|
||||
* \file metric_common.h
|
||||
*/
|
||||
#ifndef XGBOOST_METRIC_METRIC_COMMON_H_
|
||||
#define XGBOOST_METRIC_METRIC_COMMON_H_
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <string>
|
||||
|
||||
#include "../common/common.h"
|
||||
|
||||
namespace xgboost {
|
||||
|
||||
// This creates a GPU metric instance dynamically and adds it to the GPU metric registry, if not
|
||||
// present already. This is created when there is a device ordinal present and if xgboost
|
||||
// is compiled with CUDA support
|
||||
struct GPUMetric : Metric {
|
||||
static Metric *CreateGPUMetric(const std::string& name, GenericParameter const* tparam);
|
||||
};
|
||||
|
||||
/*!
|
||||
* \brief Internal registry entries for GPU Metric factory functions.
|
||||
* The additional parameter const char* param gives the value after @, can be null.
|
||||
* For example, metric map@3, then: param == "3".
|
||||
*/
|
||||
struct MetricGPUReg
|
||||
: public dmlc::FunctionRegEntryBase<MetricGPUReg,
|
||||
std::function<Metric * (const char*)> > {
|
||||
};
|
||||
|
||||
/*!
|
||||
* \brief Macro to register metric computed on GPU.
|
||||
*
|
||||
* \code
|
||||
* // example of registering a objective ndcg@k
|
||||
* XGBOOST_REGISTER_GPU_METRIC(NDCG_GPU, "ndcg")
|
||||
* .describe("NDCG metric computer on GPU.")
|
||||
* .set_body([](const char* param) {
|
||||
* int at_k = atoi(param);
|
||||
* return new NDCG(at_k);
|
||||
* });
|
||||
* \endcode
|
||||
*/
|
||||
|
||||
// Note: Metric names registered in the GPU registry should follow this convention:
|
||||
// - GPU metric types should be registered with the same name as the non GPU metric types
|
||||
#define XGBOOST_REGISTER_GPU_METRIC(UniqueId, Name) \
|
||||
::xgboost::MetricGPUReg& __make_ ## MetricGPUReg ## _ ## UniqueId ## __ = \
|
||||
::dmlc::Registry< ::xgboost::MetricGPUReg>::Get()->__REGISTER__(Name)
|
||||
|
||||
namespace metric {
|
||||
|
||||
// Ranking config to be used on device and host
|
||||
|
||||
@ -1,9 +1,25 @@
|
||||
/*!
|
||||
* Copyright 2015 by Contributors
|
||||
* \file rank_metric.cc
|
||||
* \brief prediction rank based metrics.
|
||||
* \author Kailong Chen, Tianqi Chen
|
||||
* Copyright 2020 XGBoost contributors
|
||||
*/
|
||||
// When device ordinal is present, we would want to build the metrics on the GPU. It is *not*
|
||||
// possible for a valid device ordinal to be present for non GPU builds. However, it is possible
|
||||
// for an invalid device ordinal to be specified in GPU builds - to train/predict and/or compute
|
||||
// the metrics on CPU. To accommodate these scenarios, the following is done for the metrics
|
||||
// accelarated on the GPU.
|
||||
// - An internal GPU registry holds all the GPU metric types (defined in the .cu file)
|
||||
// - An instance of the appropriate gpu metric type is created when a device ordinal is present
|
||||
// - If the creation is successful, the metric computation is done on the device
|
||||
// - else, it falls back on the CPU
|
||||
// - The GPU metric types are *only* registered when xgboost is built for GPUs
|
||||
//
|
||||
// This is done for 2 reasons:
|
||||
// - Clear separation of CPU and GPU logic
|
||||
// - Sorting datasets containing large number of rows is (much) faster when parallel sort
|
||||
// semantics is used on the CPU. The __gnu_parallel/concurrency primitives needed to perform
|
||||
// this cannot be used when the translation unit is compiled using the 'nvcc' compiler (as the
|
||||
// corresponding headers that brings in those function declaration can't be included with CUDA).
|
||||
// This precludes the CPU and GPU logic to coexist inside a .cu file
|
||||
|
||||
#include <rabit/rabit.h>
|
||||
#include <xgboost/metric.h>
|
||||
#include <dmlc/registry.h>
|
||||
@ -143,6 +159,10 @@ struct EvalAMS : public Metric {
|
||||
/*! \brief Area Under Curve, for both classification and rank computed on CPU */
|
||||
struct EvalAuc : public Metric {
|
||||
private:
|
||||
// This is used to compute the AUC metrics on the GPU - for ranking tasks and
|
||||
// for training jobs that run on the GPU.
|
||||
std::unique_ptr<xgboost::Metric> auc_gpu_;
|
||||
|
||||
template <typename WeightPolicy>
|
||||
bst_float Eval(const HostDeviceVector<bst_float> &preds,
|
||||
const MetaInfo &info,
|
||||
@ -239,6 +259,18 @@ struct EvalAuc : public Metric {
|
||||
const bool is_ranking_task =
|
||||
!info.group_ptr_.empty() && info.weights_.Size() != info.num_row_;
|
||||
|
||||
// Check if we have a GPU assignment; else, revert back to CPU
|
||||
if (tparam_->gpu_id >= 0 && is_ranking_task) {
|
||||
if (!auc_gpu_) {
|
||||
// Check and see if we have the GPU metric registered in the internal registry
|
||||
auc_gpu_.reset(GPUMetric::CreateGPUMetric(this->Name(), tparam_));
|
||||
}
|
||||
|
||||
if (auc_gpu_) {
|
||||
return auc_gpu_->Eval(preds, info, distributed);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_ranking_task) {
|
||||
return Eval<PerGroupWeightPolicy>(preds, info, distributed, gptr);
|
||||
} else {
|
||||
@ -251,6 +283,10 @@ struct EvalAuc : public Metric {
|
||||
|
||||
/*! \brief Evaluate rank list */
|
||||
struct EvalRank : public Metric, public EvalRankConfig {
|
||||
private:
|
||||
// This is used to compute the ranking metrics on the GPU - for training jobs that run on the GPU.
|
||||
std::unique_ptr<xgboost::Metric> rank_gpu_;
|
||||
|
||||
public:
|
||||
bst_float Eval(const HostDeviceVector<bst_float> &preds,
|
||||
const MetaInfo &info,
|
||||
@ -271,20 +307,32 @@ struct EvalRank : public Metric, public EvalRankConfig {
|
||||
// sum statistics
|
||||
double sum_metric = 0.0f;
|
||||
|
||||
const auto &labels = info.labels_.ConstHostVector();
|
||||
const auto &h_preds = preds.ConstHostVector();
|
||||
// Check and see if we have the GPU metric registered in the internal registry
|
||||
if (tparam_->gpu_id >= 0) {
|
||||
if (!rank_gpu_) {
|
||||
rank_gpu_.reset(GPUMetric::CreateGPUMetric(this->Name(), tparam_));
|
||||
}
|
||||
if (rank_gpu_) {
|
||||
sum_metric = rank_gpu_->Eval(preds, info, distributed);
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel reduction(+:sum_metric)
|
||||
{
|
||||
// each thread takes a local rec
|
||||
PredIndPairContainer rec;
|
||||
#pragma omp for schedule(static)
|
||||
for (bst_omp_uint k = 0; k < ngroups; ++k) {
|
||||
rec.clear();
|
||||
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
|
||||
rec.emplace_back(h_preds[j], static_cast<int>(labels[j]));
|
||||
if (!rank_gpu_ || tparam_->gpu_id < 0) {
|
||||
const auto &labels = info.labels_.ConstHostVector();
|
||||
const auto &h_preds = preds.ConstHostVector();
|
||||
|
||||
#pragma omp parallel reduction(+:sum_metric)
|
||||
{
|
||||
// each thread takes a local rec
|
||||
PredIndPairContainer rec;
|
||||
#pragma omp for schedule(static)
|
||||
for (bst_omp_uint k = 0; k < ngroups; ++k) {
|
||||
rec.clear();
|
||||
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
|
||||
rec.emplace_back(h_preds[j], static_cast<int>(labels[j]));
|
||||
}
|
||||
sum_metric += this->EvalGroup(&rec);
|
||||
}
|
||||
sum_metric += this->EvalGroup(&rec);
|
||||
}
|
||||
}
|
||||
|
||||
@ -465,6 +513,10 @@ struct EvalAucPR : public Metric {
|
||||
// translated from PRROC R Package
|
||||
// see https://doi.org/10.1371/journal.pone.0092209
|
||||
private:
|
||||
// This is used to compute the AUCPR metrics on the GPU - for ranking tasks and
|
||||
// for training jobs that run on the GPU.
|
||||
std::unique_ptr<xgboost::Metric> aucpr_gpu_;
|
||||
|
||||
template <typename WeightPolicy>
|
||||
bst_float Eval(const HostDeviceVector<bst_float> &preds,
|
||||
const MetaInfo &info,
|
||||
@ -580,6 +632,18 @@ struct EvalAucPR : public Metric {
|
||||
const bool is_ranking_task =
|
||||
!info.group_ptr_.empty() && info.weights_.Size() != info.num_row_;
|
||||
|
||||
// Check if we have a GPU assignment; else, revert back to CPU
|
||||
if (tparam_->gpu_id >= 0 && is_ranking_task) {
|
||||
if (!aucpr_gpu_) {
|
||||
// Check and see if we have the GPU metric registered in the internal registry
|
||||
aucpr_gpu_.reset(GPUMetric::CreateGPUMetric(this->Name(), tparam_));
|
||||
}
|
||||
|
||||
if (aucpr_gpu_) {
|
||||
return aucpr_gpu_->Eval(preds, info, distributed);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_ranking_task) {
|
||||
return Eval<PerGroupWeightPolicy>(preds, info, distributed, gptr);
|
||||
} else {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user