This commit is contained in:
kalenhaha
2014-04-06 22:51:52 +08:00
parent ddb8a6982c
commit 6bc71df494
12 changed files with 1106 additions and 936 deletions

View File

@@ -13,157 +13,170 @@
#include "../utils/xgboost_omp.h"
namespace xgboost {
namespace rank {
/*! \brief evaluator that evaluates the loss metrics */
struct IRankEvaluator {
/*!
* \brief evaluate a specific metric
* \param preds prediction
* \param labels label
*/
virtual float Eval( const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index) const= 0;
/*! \return name of metric */
virtual const char *Name( void ) const= 0;
};
namespace rank {
/*! \brief evaluator that evaluates the loss metrics */
class IRankEvaluator {
public:
/*!
* \brief evaluate a specific metric
* \param preds prediction
* \param labels label
*/
virtual float Eval(const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index) const = 0;
/*! \return name of metric */
virtual const char *Name(void) const = 0;
};
struct Pair{
float key_;
float value_;
Pair(float key,float value){
key_ = key;
value_ = value_;
}
};
class Pair{
public:
float key_;
float value_;
bool PairKeyComparer(const Pair &a, const Pair &b){
return a.key_ < b.key_;
}
Pair(float key, float value){
key_ = key;
value_ = value_;
}
};
bool PairValueComparer(const Pair &a, const Pair &b){
return a.value_ < b.value_;
}
bool PairKeyComparer(const Pair &a, const Pair &b){
return a.key_ < b.key_;
}
struct EvalPair : public IRankEvaluator{
virtual float Eval( const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index ) const {
return 0;
}
};
/*! \brief Mean Average Precision */
struct EvalMAP : public IRankEvaluator {
virtual float Eval( const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index ) const {
float acc = 0;
std::vector<Pair> pairs_sort;
for(int i = 0; i < group_index.size() - 1; i++){
for(int j = group_index[i]; j < group_index[i+1];j++){
Pair pair(preds[j],labels[j]);
pairs_sort.push_back(pair);
}
acc += average_precision(pairs_sort);
}
return acc / (group_index.size() - 1);
}
float float average_precision(std::vector<Pair> pairs_sort){
std::sort<Pair>(pairs_sort.begin(),pairs_sort.end(),PairKeyComparer);
float hits = 0;
float average_precision = 0;
for(int j = 0; j < pairs_sort.size(); j++){
if(pairs_sort[j].value_ == 1){
hits++;
average_precision += hits/(j+1);
}
}
if(hits != 0) average_precision /= hits;
return average_precision;
}
virtual const char *Name( void ) const {
return "MAP";
}
};
bool PairValueComparer(const Pair &a, const Pair &b){
return a.value_ < b.value_;
}
/*! \brief Normalized DCG */
struct EvalNDCG : public IRankEvaluator {
virtual float Eval( const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index ) const {
float acc = 0;
std::vector<Pair> pairs_sort;
for(int i = 0; i < group_index.size() - 1; i++){
for(int j = group_index[i]; j < group_index[i+1];j++){
Pair pair(preds[j],labels[j]);
pairs_sort.push_back(pair);
}
acc += NDCG(pairs_sort);
}
}
float NDCG(std::vector<Pair> pairs_sort){
std::sort<Pair>(pairs_sort.begin(),pairs_sort.end(),PairKeyComparer);
float DCG = DCG(pairs_sort);
std::sort<Pair>(pairs_sort.begin(),pairs_sort.end(),PairValueComparer);
float IDCG = DCG(pairs_sort);
if(IDCG == 0) return 0;
return DCG/IDCG;
}
float DCG(std::vector<Pair> pairs_sort){
float ans = 0.0;
ans += pairs_sort[0].value_;
for(int i = 1; i < pairs_sort.size(); i++){
ans += pairs_sort[i].value_/log(i + 1);
}
return ans;
}
virtual const char *Name( void ) const {
return "NDCG";
}
};
/*! \brief Mean Average Precision */
class EvalMAP : public IRankEvaluator {
public:
float Eval(const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index) const {
float acc = 0;
std::vector<Pair> pairs_sort;
for (int i = 0; i < group_index.size() - 1; i++){
for (int j = group_index[i]; j < group_index[i + 1]; j++){
Pair pair(preds[j], labels[j]);
pairs_sort.push_back(pair);
}
acc += average_precision(pairs_sort);
}
return acc / (group_index.size() - 1);
}
};
namespace rank {
/*! \brief a set of evaluators */
struct RankEvalSet {
public:
inline void AddEval( const char *name ) {
if( !strcmp( name, "PAIR" )) evals_.push_back( &pair_);
if( !strcmp( name, "MAP") ) evals_.push_back( &map_ );
if( !strcmp( name, "NDCG") ) evals_.push_back( &ndcg_ );
}
inline void Init( void ) {
std::sort( evals_.begin(), evals_.end() );
evals_.resize( std::unique( evals_.begin(), evals_.end() ) - evals_.begin() );
}
inline void Eval( FILE *fo, const char *evname,
const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index ) const {
for( size_t i = 0; i < evals_.size(); ++ i ) {
float res = evals_[i]->Eval( preds, labels,group_index );
fprintf( fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res );
}
}
private:
EvalPair pair_;
EvalMAP map_;
EvalNDCG ndcg_;
std::vector<const IRankEvaluator*> evals_;
};
};
virtual const char *Name(void) const {
return "MAP";
}
float average_precision(std::vector<Pair> pairs_sort) const{
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
float hits = 0;
float average_precision = 0;
for (int j = 0; j < pairs_sort.size(); j++){
if (pairs_sort[j].value_ == 1){
hits++;
average_precision += hits / (j + 1);
}
}
if (hits != 0) average_precision /= hits;
return average_precision;
}
};
class EvalPair : public IRankEvaluator{
public:
float Eval(const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index) const {
return 0;
}
const char *Name(void) const {
return "PAIR";
}
};
/*! \brief Normalized DCG */
class EvalNDCG : public IRankEvaluator {
public:
float Eval(const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index) const {
if (group_index.size() <= 1) return 0;
float acc = 0;
std::vector<Pair> pairs_sort;
for (int i = 0; i < group_index.size() - 1; i++){
for (int j = group_index[i]; j < group_index[i + 1]; j++){
Pair pair(preds[j], labels[j]);
pairs_sort.push_back(pair);
}
acc += NDCG(pairs_sort);
}
return acc / (group_index.size() - 1);
}
float NDCG(std::vector<Pair> pairs_sort) const{
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
float dcg = DCG(pairs_sort);
std::sort(pairs_sort.begin(), pairs_sort.end(), PairValueComparer);
float IDCG = DCG(pairs_sort);
if (IDCG == 0) return 0;
return dcg / IDCG;
}
float DCG(std::vector<Pair> pairs_sort) const{
float ans = 0.0;
ans += pairs_sort[0].value_;
for (int i = 1; i < pairs_sort.size(); i++){
ans += pairs_sort[i].value_ / log(i + 1);
}
return ans;
}
virtual const char *Name(void) const {
return "NDCG";
}
};
};
namespace rank {
/*! \brief a set of evaluators */
class RankEvalSet {
public:
inline void AddEval(const char *name) {
if (!strcmp(name, "PAIR")) evals_.push_back(&pair_);
if (!strcmp(name, "MAP")) evals_.push_back(&map_);
if (!strcmp(name, "NDCG")) evals_.push_back(&ndcg_);
}
inline void Init(void) {
std::sort(evals_.begin(), evals_.end());
evals_.resize(std::unique(evals_.begin(), evals_.end()) - evals_.begin());
}
inline void Eval(FILE *fo, const char *evname,
const std::vector<float> &preds,
const std::vector<float> &labels,
const std::vector<int> &group_index) const {
for (size_t i = 0; i < evals_.size(); ++i) {
float res = evals_[i]->Eval(preds, labels, group_index);
fprintf(fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res);
}
}
private:
EvalPair pair_;
EvalMAP map_;
EvalNDCG ndcg_;
std::vector<const IRankEvaluator*> evals_;
};
};
};
#endif