xgboost/dev/rank/xgboost_rank_eval.h
2014-04-04 10:42:13 -07:00

170 lines
4.6 KiB
C++

#ifndef XGBOOST_RANK_EVAL_H
#define XGBOOST_RANK_EVAL_H
/*!
* \file xgboost_rank_eval.h
* \brief evaluation metrics for ranking
* \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
*/
#include <cmath>
#include <vector>
#include <algorithm>
#include "../utils/xgboost_utils.h"
#include "../utils/xgboost_omp.h"
namespace xgboost {
namespace rank {
/*! \brief evaluator that evaluates the loss metrics */
struct IRankEvaluator {
/*!
* \brief evaluate a specific metric
* \param preds prediction
* \param labels label
*/
virtual float Eval( const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index) const= 0;
/*! \return name of metric */
virtual const char *Name( void ) const= 0;
};
struct Pair{
float key_;
float value_;
Pair(float key,float value){
key_ = key;
value_ = value_;
}
};
bool PairKeyComparer(const Pair &a, const Pair &b){
return a.key_ < b.key_;
}
bool PairValueComparer(const Pair &a, const Pair &b){
return a.value_ < b.value_;
}
struct EvalPair : public IRankEvaluator{
virtual float Eval( const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index ) const {
return 0;
}
};
/*! \brief Mean Average Precision */
struct EvalMAP : public IRankEvaluator {
virtual float Eval( const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index ) const {
float acc = 0;
std::vector<Pair> pairs_sort;
for(int i = 0; i < group_index.size() - 1; i++){
for(int j = group_index[i]; j < group_index[i+1];j++){
Pair pair(preds[j],labels[j]);
pairs_sort.push_back(pair);
}
acc += average_precision(pairs_sort);
}
return acc / (group_index.size() - 1);
}
float float average_precision(std::vector<Pair> pairs_sort){
std::sort<Pair>(pairs_sort.begin(),pairs_sort.end(),PairKeyComparer);
float hits = 0;
float average_precision = 0;
for(int j = 0; j < pairs_sort.size(); j++){
if(pairs_sort[j].value_ == 1){
hits++;
average_precision += hits/(j+1);
}
}
if(hits != 0) average_precision /= hits;
return average_precision;
}
virtual const char *Name( void ) const {
return "MAP";
}
};
/*! \brief Normalized DCG */
struct EvalNDCG : public IRankEvaluator {
virtual float Eval( const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index ) const {
float acc = 0;
std::vector<Pair> pairs_sort;
for(int i = 0; i < group_index.size() - 1; i++){
for(int j = group_index[i]; j < group_index[i+1];j++){
Pair pair(preds[j],labels[j]);
pairs_sort.push_back(pair);
}
acc += NDCG(pairs_sort);
}
}
float NDCG(std::vector<Pair> pairs_sort){
std::sort<Pair>(pairs_sort.begin(),pairs_sort.end(),PairKeyComparer);
float DCG = DCG(pairs_sort);
std::sort<Pair>(pairs_sort.begin(),pairs_sort.end(),PairValueComparer);
float IDCG = DCG(pairs_sort);
if(IDCG == 0) return 0;
return DCG/IDCG;
}
float DCG(std::vector<Pair> pairs_sort){
float ans = 0.0;
ans += pairs_sort[0].value_;
for(int i = 1; i < pairs_sort.size(); i++){
ans += pairs_sort[i].value_/log(i + 1);
}
return ans;
}
virtual const char *Name( void ) const {
return "NDCG";
}
};
};
namespace rank {
/*! \brief a set of evaluators */
struct RankEvalSet {
public:
inline void AddEval( const char *name ) {
if( !strcmp( name, "PAIR" )) evals_.push_back( &pair_);
if( !strcmp( name, "MAP") ) evals_.push_back( &map_ );
if( !strcmp( name, "NDCG") ) evals_.push_back( &ndcg_ );
}
inline void Init( void ) {
std::sort( evals_.begin(), evals_.end() );
evals_.resize( std::unique( evals_.begin(), evals_.end() ) - evals_.begin() );
}
inline void Eval( FILE *fo, const char *evname,
const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index ) const {
for( size_t i = 0; i < evals_.size(); ++ i ) {
float res = evals_[i]->Eval( preds, labels,group_index );
fprintf( fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res );
}
}
private:
EvalPair pair_;
EvalMAP map_;
EvalNDCG ndcg_;
std::vector<const IRankEvaluator*> evals_;
};
};
};
#endif