- create a gpu metrics (internal) registry (#5387)

* - create a gpu metrics (internal) registry
  - the objective is to separate the cpu and gpu implementations such that they evolve
    indepedently. to that end, this approach will:
    - preserve the same metrics configuration (from the end user perspective)
    - internally delegate the responsibility to the gpu metrics builder when there is a
      valid device present
    - decouple the gpu metrics builder from the cpu ones to prevent misuse
    - move away from including the cuda file from within the cc file and segregate the code
      via ifdef's
This commit is contained in:
sriramch 2020-03-06 18:31:35 -08:00 committed by GitHub
parent 8d06878bf9
commit 1ba6706167
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 159 additions and 31 deletions

View File

@ -7,12 +7,11 @@
#include <xgboost/metric.h>
#include <xgboost/generic_parameters.h>
namespace dmlc {
DMLC_REGISTRY_ENABLE(::xgboost::MetricReg);
}
#include "metric_common.h"
namespace xgboost {
Metric* Metric::Create(const std::string& name, GenericParameter const* tparam) {
template <typename MetricRegistry>
Metric* CreateMetricImpl(const std::string& name, GenericParameter const* tparam) {
std::string buf = name;
std::string prefix = name;
const char* param;
@ -26,29 +25,58 @@ Metric* Metric::Create(const std::string& name, GenericParameter const* tparam)
prefix = buf;
param = nullptr;
}
auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(prefix.c_str());
auto *e = ::dmlc::Registry<MetricRegistry>::Get()->Find(prefix.c_str());
if (e == nullptr) {
LOG(FATAL) << "Unknown metric function " << name;
return nullptr;
}
auto p_metric = (e->body)(param);
p_metric->tparam_ = tparam;
return p_metric;
} else {
std::string prefix = buf.substr(0, pos);
auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(prefix.c_str());
auto *e = ::dmlc::Registry<MetricRegistry>::Get()->Find(prefix.c_str());
if (e == nullptr) {
LOG(FATAL) << "Unknown metric function " << name;
return nullptr;
}
auto p_metric = (e->body)(buf.substr(pos + 1, buf.length()).c_str());
p_metric->tparam_ = tparam;
return p_metric;
}
}
Metric *
Metric::Create(const std::string& name, GenericParameter const* tparam) {
auto metric = CreateMetricImpl<MetricReg>(name, tparam);
if (metric == nullptr) {
LOG(FATAL) << "Unknown metric function " << name;
}
metric->tparam_ = tparam;
return metric;
}
Metric *
GPUMetric::CreateGPUMetric(const std::string& name, GenericParameter const* tparam) {
auto metric = CreateMetricImpl<MetricGPUReg>(name, tparam);
if (metric == nullptr) {
LOG(WARNING) << "Cannot find a GPU metric builder for metric " << name
<< ". Resorting to the CPU builder";
return metric;
}
// Narrowing reference only for the compiler to allow assignment to a base class member.
// As such, using this narrowed reference to refer to derived members will be an illegal op.
// This is moot, as this type is stateless.
static_cast<GPUMetric *>(metric)->tparam_ = tparam;
return metric;
}
} // namespace xgboost
namespace dmlc {
DMLC_REGISTRY_ENABLE(::xgboost::MetricReg);
DMLC_REGISTRY_ENABLE(::xgboost::MetricGPUReg);
}
namespace xgboost {
namespace metric {
// List of files that will be force linked in static links.
DMLC_REGISTRY_LINK_TAG(elementwise_metric);
DMLC_REGISTRY_LINK_TAG(multiclass_metric);

View File

@ -1,18 +1,54 @@
/*!
* Copyright 2018-2019 by Contributors
* \file metric_param.cc
* Copyright 2018-2020 by Contributors
* \file metric_common.h
*/
#ifndef XGBOOST_METRIC_METRIC_COMMON_H_
#define XGBOOST_METRIC_METRIC_COMMON_H_
#include <utility>
#include <vector>
#include <limits>
#include <string>
#include "../common/common.h"
namespace xgboost {
// This creates a GPU metric instance dynamically and adds it to the GPU metric registry, if not
// present already. This is created when there is a device ordinal present and if xgboost
// is compiled with CUDA support
struct GPUMetric : Metric {
static Metric *CreateGPUMetric(const std::string& name, GenericParameter const* tparam);
};
/*!
* \brief Internal registry entries for GPU Metric factory functions.
* The additional parameter const char* param gives the value after @, can be null.
* For example, metric map@3, then: param == "3".
*/
struct MetricGPUReg
: public dmlc::FunctionRegEntryBase<MetricGPUReg,
std::function<Metric * (const char*)> > {
};
/*!
* \brief Macro to register metric computed on GPU.
*
* \code
* // example of registering a objective ndcg@k
* XGBOOST_REGISTER_GPU_METRIC(NDCG_GPU, "ndcg")
* .describe("NDCG metric computer on GPU.")
* .set_body([](const char* param) {
* int at_k = atoi(param);
* return new NDCG(at_k);
* });
* \endcode
*/
// Note: Metric names registered in the GPU registry should follow this convention:
// - GPU metric types should be registered with the same name as the non GPU metric types
#define XGBOOST_REGISTER_GPU_METRIC(UniqueId, Name) \
::xgboost::MetricGPUReg& __make_ ## MetricGPUReg ## _ ## UniqueId ## __ = \
::dmlc::Registry< ::xgboost::MetricGPUReg>::Get()->__REGISTER__(Name)
namespace metric {
// Ranking config to be used on device and host

View File

@ -1,9 +1,25 @@
/*!
* Copyright 2015 by Contributors
* \file rank_metric.cc
* \brief prediction rank based metrics.
* \author Kailong Chen, Tianqi Chen
* Copyright 2020 XGBoost contributors
*/
// When device ordinal is present, we would want to build the metrics on the GPU. It is *not*
// possible for a valid device ordinal to be present for non GPU builds. However, it is possible
// for an invalid device ordinal to be specified in GPU builds - to train/predict and/or compute
// the metrics on CPU. To accommodate these scenarios, the following is done for the metrics
// accelarated on the GPU.
// - An internal GPU registry holds all the GPU metric types (defined in the .cu file)
// - An instance of the appropriate gpu metric type is created when a device ordinal is present
// - If the creation is successful, the metric computation is done on the device
// - else, it falls back on the CPU
// - The GPU metric types are *only* registered when xgboost is built for GPUs
//
// This is done for 2 reasons:
// - Clear separation of CPU and GPU logic
// - Sorting datasets containing large number of rows is (much) faster when parallel sort
// semantics is used on the CPU. The __gnu_parallel/concurrency primitives needed to perform
// this cannot be used when the translation unit is compiled using the 'nvcc' compiler (as the
// corresponding headers that brings in those function declaration can't be included with CUDA).
// This precludes the CPU and GPU logic to coexist inside a .cu file
#include <rabit/rabit.h>
#include <xgboost/metric.h>
#include <dmlc/registry.h>
@ -143,6 +159,10 @@ struct EvalAMS : public Metric {
/*! \brief Area Under Curve, for both classification and rank computed on CPU */
struct EvalAuc : public Metric {
private:
// This is used to compute the AUC metrics on the GPU - for ranking tasks and
// for training jobs that run on the GPU.
std::unique_ptr<xgboost::Metric> auc_gpu_;
template <typename WeightPolicy>
bst_float Eval(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
@ -239,6 +259,18 @@ struct EvalAuc : public Metric {
const bool is_ranking_task =
!info.group_ptr_.empty() && info.weights_.Size() != info.num_row_;
// Check if we have a GPU assignment; else, revert back to CPU
if (tparam_->gpu_id >= 0 && is_ranking_task) {
if (!auc_gpu_) {
// Check and see if we have the GPU metric registered in the internal registry
auc_gpu_.reset(GPUMetric::CreateGPUMetric(this->Name(), tparam_));
}
if (auc_gpu_) {
return auc_gpu_->Eval(preds, info, distributed);
}
}
if (is_ranking_task) {
return Eval<PerGroupWeightPolicy>(preds, info, distributed, gptr);
} else {
@ -251,6 +283,10 @@ struct EvalAuc : public Metric {
/*! \brief Evaluate rank list */
struct EvalRank : public Metric, public EvalRankConfig {
private:
// This is used to compute the ranking metrics on the GPU - for training jobs that run on the GPU.
std::unique_ptr<xgboost::Metric> rank_gpu_;
public:
bst_float Eval(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
@ -271,20 +307,32 @@ struct EvalRank : public Metric, public EvalRankConfig {
// sum statistics
double sum_metric = 0.0f;
const auto &labels = info.labels_.ConstHostVector();
const auto &h_preds = preds.ConstHostVector();
// Check and see if we have the GPU metric registered in the internal registry
if (tparam_->gpu_id >= 0) {
if (!rank_gpu_) {
rank_gpu_.reset(GPUMetric::CreateGPUMetric(this->Name(), tparam_));
}
if (rank_gpu_) {
sum_metric = rank_gpu_->Eval(preds, info, distributed);
}
}
#pragma omp parallel reduction(+:sum_metric)
{
// each thread takes a local rec
PredIndPairContainer rec;
#pragma omp for schedule(static)
for (bst_omp_uint k = 0; k < ngroups; ++k) {
rec.clear();
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
rec.emplace_back(h_preds[j], static_cast<int>(labels[j]));
if (!rank_gpu_ || tparam_->gpu_id < 0) {
const auto &labels = info.labels_.ConstHostVector();
const auto &h_preds = preds.ConstHostVector();
#pragma omp parallel reduction(+:sum_metric)
{
// each thread takes a local rec
PredIndPairContainer rec;
#pragma omp for schedule(static)
for (bst_omp_uint k = 0; k < ngroups; ++k) {
rec.clear();
for (unsigned j = gptr[k]; j < gptr[k + 1]; ++j) {
rec.emplace_back(h_preds[j], static_cast<int>(labels[j]));
}
sum_metric += this->EvalGroup(&rec);
}
sum_metric += this->EvalGroup(&rec);
}
}
@ -465,6 +513,10 @@ struct EvalAucPR : public Metric {
// translated from PRROC R Package
// see https://doi.org/10.1371/journal.pone.0092209
private:
// This is used to compute the AUCPR metrics on the GPU - for ranking tasks and
// for training jobs that run on the GPU.
std::unique_ptr<xgboost::Metric> aucpr_gpu_;
template <typename WeightPolicy>
bst_float Eval(const HostDeviceVector<bst_float> &preds,
const MetaInfo &info,
@ -580,6 +632,18 @@ struct EvalAucPR : public Metric {
const bool is_ranking_task =
!info.group_ptr_.empty() && info.weights_.Size() != info.num_row_;
// Check if we have a GPU assignment; else, revert back to CPU
if (tparam_->gpu_id >= 0 && is_ranking_task) {
if (!aucpr_gpu_) {
// Check and see if we have the GPU metric registered in the internal registry
aucpr_gpu_.reset(GPUMetric::CreateGPUMetric(this->Name(), tparam_));
}
if (aucpr_gpu_) {
return aucpr_gpu_->Eval(preds, info, distributed);
}
}
if (is_ranking_task) {
return Eval<PerGroupWeightPolicy>(preds, info, distributed, gptr);
} else {