new AUC evaluator, now compatible with weighted loss
This commit is contained in:
parent
31edfda03c
commit
87a9c22795
@ -94,34 +94,54 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! \brief Area under curve */
|
/*! \brief Area under curve, for both classification and rank */
|
||||||
struct EvalAuc : public IEvaluator{
|
struct EvalAuc : public IEvaluator{
|
||||||
inline static bool CmpFirst( const std::pair<float,float> &a, const std::pair<float,float> &b ){
|
inline static bool CmpFirst( const std::pair<float,unsigned> &a, const std::pair<float,unsigned> &b ){
|
||||||
return a.first > b.first;
|
return a.first > b.first;
|
||||||
}
|
}
|
||||||
virtual float Eval( const std::vector<float> &preds,
|
virtual float Eval( const std::vector<float> &preds,
|
||||||
const DMatrix::Info &info ) const {
|
const DMatrix::Info &info ) const {
|
||||||
const std::vector<float> &labels = info.labels;
|
std::vector<unsigned> tgptr(2,0); tgptr[1] = preds.size();
|
||||||
const unsigned ndata = static_cast<unsigned>( preds.size() );
|
const std::vector<unsigned> &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr;
|
||||||
std::vector< std::pair<float, float> > rec;
|
utils::Assert( gptr.back() == preds.size(), "EvalAuc: group structure must match number of prediction" );
|
||||||
for( unsigned i = 0; i < ndata; ++ i ){
|
const unsigned ngroup = static_cast<unsigned>( gptr.size() );
|
||||||
rec.push_back( std::make_pair( preds[i], labels[i]) );
|
|
||||||
}
|
double sum_auc = 0.0f;
|
||||||
random::Shuffle( rec );
|
#pragma omp parallel reduction(+:sum_auc)
|
||||||
std::sort( rec.begin(), rec.end(), CmpFirst );
|
{
|
||||||
|
// each thread takes a local rec
|
||||||
long npos = 0, nhit = 0;
|
std::vector< std::pair<float,unsigned> > rec;
|
||||||
for( unsigned i = 0; i < ndata; ++ i ){
|
#pragma omp for schedule(static)
|
||||||
if( rec[i].second > 0.5f ) {
|
for( unsigned k = 0; k < ngroup; ++ k ){
|
||||||
++ npos;
|
rec.clear();
|
||||||
}else{
|
for( unsigned j = gptr[k]; j < gptr[k+1]; ++ j ){
|
||||||
// this is the number of correct pairs
|
rec.push_back( std::make_pair( preds[j], j ) );
|
||||||
nhit += npos;
|
}
|
||||||
|
std::sort( rec.begin(), rec.end(), CmpFirst );
|
||||||
|
// calculate AUC
|
||||||
|
double sum_pospair = 0.0;
|
||||||
|
double sum_npos = 0.0, sum_nneg = 0.0, buf_pos = 0.0, buf_neg = 0.0;
|
||||||
|
for( size_t j = 0; j < rec.size(); ++ j ){
|
||||||
|
const float wt = info.GetWeight( rec[j].second );
|
||||||
|
const float ctr = info.labels[ rec[j].second ];
|
||||||
|
// keep bucketing predictions in same bucket
|
||||||
|
if( j != 0 && rec[j].first != rec[j-1].first ){
|
||||||
|
sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
|
||||||
|
sum_npos += buf_pos; sum_nneg += buf_neg;
|
||||||
|
buf_neg = buf_pos = 0.0f;
|
||||||
|
}
|
||||||
|
buf_pos += ctr * wt; buf_neg += (1.0f-ctr) * wt;
|
||||||
|
}
|
||||||
|
sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
|
||||||
|
sum_npos += buf_pos; sum_nneg += buf_neg;
|
||||||
|
//
|
||||||
|
utils::Assert( sum_npos > 0.0 && sum_nneg > 0.0, "the dataset only contains pos or neg samples" );
|
||||||
|
// this is the AUC
|
||||||
|
sum_auc += sum_pospair / (sum_npos*sum_nneg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
long nneg = ndata - npos;
|
// return average AUC over list
|
||||||
utils::Assert( nneg > 0, "the dataset only contains pos samples" );
|
return static_cast<float>(sum_auc) / ngroup;
|
||||||
return static_cast<float>(nhit) / nneg / npos;
|
|
||||||
}
|
}
|
||||||
virtual const char *Name( void ) const{
|
virtual const char *Name( void ) const{
|
||||||
return "auc";
|
return "auc";
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user