add auc evaluation metric

This commit is contained in:
tqchen 2014-04-24 22:20:40 -07:00
parent 7f9637aae4
commit ea354683b4
2 changed files with 51 additions and 16 deletions

View File

@ -172,7 +172,7 @@ namespace xgboost{
preds.resize(data.Size()); preds.resize(data.Size());
const unsigned ndata = static_cast<unsigned>(data.Size()); const unsigned ndata = static_cast<unsigned>(data.Size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for (unsigned j = 0; j < ndata; ++j){ for (unsigned j = 0; j < ndata; ++j){
preds[j] = mparam.PredTransform preds[j] = mparam.PredTransform
(mparam.base_score + base_gbm.Predict(data.data, j, -1)); (mparam.base_score + base_gbm.Predict(data.data, j, -1));
@ -233,7 +233,7 @@ namespace xgboost{
preds.resize(data.Size()); preds.resize(data.Size());
const unsigned ndata = static_cast<unsigned>(data.Size()); const unsigned ndata = static_cast<unsigned>(data.Size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for (unsigned j = 0; j < ndata; ++j){ for (unsigned j = 0; j < ndata; ++j){
preds[j] = mparam.PredTransform preds[j] = mparam.PredTransform
(mparam.base_score + base_gbm.Predict(data.data, j, buffer_offset + j)); (mparam.base_score + base_gbm.Predict(data.data, j, buffer_offset + j));
@ -248,7 +248,7 @@ namespace xgboost{
grad.resize(preds.size()); hess.resize(preds.size()); grad.resize(preds.size()); hess.resize(preds.size());
const unsigned ndata = static_cast<unsigned>(preds.size()); const unsigned ndata = static_cast<unsigned>(preds.size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for (unsigned j = 0; j < ndata; ++j){ for (unsigned j = 0; j < ndata; ++j){
grad[j] = mparam.FirstOrderGradient(preds[j], labels[j]); grad[j] = mparam.FirstOrderGradient(preds[j], labels[j]);
hess[j] = mparam.SecondOrderGradient(preds[j], labels[j]); hess[j] = mparam.SecondOrderGradient(preds[j], labels[j]);

View File

@ -11,6 +11,7 @@
#include <algorithm> #include <algorithm>
#include "../utils/xgboost_utils.h" #include "../utils/xgboost_utils.h"
#include "../utils/xgboost_omp.h" #include "../utils/xgboost_omp.h"
#include "../utils/xgboost_random.h"
namespace xgboost{ namespace xgboost{
namespace regression{ namespace regression{
@ -33,7 +34,7 @@ namespace xgboost{
const std::vector<float> &labels) const{ const std::vector<float> &labels) const{
const unsigned ndata = static_cast<unsigned>(preds.size()); const unsigned ndata = static_cast<unsigned>(preds.size());
float sum = 0.0; float sum = 0.0;
#pragma omp parallel for reduction(+:sum) schedule( static ) #pragma omp parallel for reduction(+:sum) schedule( static )
for (unsigned i = 0; i < ndata; ++i){ for (unsigned i = 0; i < ndata; ++i){
float diff = preds[i] - labels[i]; float diff = preds[i] - labels[i];
sum += diff * diff; sum += diff * diff;
@ -51,7 +52,7 @@ namespace xgboost{
const std::vector<float> &labels) const{ const std::vector<float> &labels) const{
const unsigned ndata = static_cast<unsigned>(preds.size()); const unsigned ndata = static_cast<unsigned>(preds.size());
unsigned nerr = 0; unsigned nerr = 0;
#pragma omp parallel for reduction(+:nerr) schedule( static ) #pragma omp parallel for reduction(+:nerr) schedule( static )
for (unsigned i = 0; i < ndata; ++i){ for (unsigned i = 0; i < ndata; ++i){
if (preds[i] > 0.5f){ if (preds[i] > 0.5f){
if (labels[i] < 0.5f) nerr += 1; if (labels[i] < 0.5f) nerr += 1;
@ -67,6 +68,38 @@ namespace xgboost{
} }
}; };
/*! \brief Area under curve */
struct EvalAuc : public IEvaluator{
inline static bool CmpFirst( const std::pair<float,float> &a, const std::pair<float,float> &b ){
return a.first > b.first;
}
virtual float Eval( const std::vector<float> &preds,
const std::vector<float> &labels ) const{
const unsigned ndata = static_cast<unsigned>( preds.size() );
std::vector< std::pair<float, float> > rec;
for( unsigned i = 0; i < ndata; ++ i ){
rec.push_back( std::make_pair( preds[i], labels[i]) );
}
random::Shuffle( rec );
std::sort( rec.begin(), rec.end(), CmpFirst );
long npos = 0, nhit = 0;
for( unsigned i = 0; i < ndata; ++ i ){
if( rec[i].second > 0.5f ) {
++ npos;
}else{
// this is the number of correct pairs
nhit += npos;
}
}
long nneg = ndata - npos;
utils::Assert( nneg > 0, "the dataset only contains pos samples" );
return static_cast<float>(nhit) / nneg / npos;
}
virtual const char *Name( void ) const{
return "auc";
}
};
/*! \brief Error */ /*! \brief Error */
struct EvalLogLoss : public IEvaluator{ struct EvalLogLoss : public IEvaluator{
@ -74,7 +107,7 @@ namespace xgboost{
const std::vector<float> &labels) const{ const std::vector<float> &labels) const{
const unsigned ndata = static_cast<unsigned>(preds.size()); const unsigned ndata = static_cast<unsigned>(preds.size());
unsigned nerr = 0; unsigned nerr = 0;
#pragma omp parallel for reduction(+:nerr) schedule( static ) #pragma omp parallel for reduction(+:nerr) schedule( static )
for (unsigned i = 0; i < ndata; ++i){ for (unsigned i = 0; i < ndata; ++i){
const float y = labels[i]; const float y = labels[i];
const float py = preds[i]; const float py = preds[i];
@ -96,6 +129,7 @@ namespace xgboost{
if (!strcmp(name, "rmse")) evals_.push_back(&rmse_); if (!strcmp(name, "rmse")) evals_.push_back(&rmse_);
if (!strcmp(name, "error")) evals_.push_back(&error_); if (!strcmp(name, "error")) evals_.push_back(&error_);
if (!strcmp(name, "logloss")) evals_.push_back(&logloss_); if (!strcmp(name, "logloss")) evals_.push_back(&logloss_);
if (!strcmp( name, "auc")) evals_.push_back( &auc_ );
} }
inline void Init(void){ inline void Init(void){
std::sort(evals_.begin(), evals_.end()); std::sort(evals_.begin(), evals_.end());
@ -112,6 +146,7 @@ namespace xgboost{
private: private:
EvalRMSE rmse_; EvalRMSE rmse_;
EvalError error_; EvalError error_;
EvalAuc auc_;
EvalLogLoss logloss_; EvalLogLoss logloss_;
std::vector<const IEvaluator*> evals_; std::vector<const IEvaluator*> evals_;
}; };