bug fix in pairwise rank
This commit is contained in:
parent
37e1473cea
commit
e2d13db24e
@ -2,7 +2,7 @@
|
||||
# choose the tree booster, 0: tree, 1: linear
|
||||
booster_type = 0
|
||||
|
||||
#objective="rank:pairwise"
|
||||
objective="rank:pairwise"
|
||||
#objective="rank:softmax"
|
||||
#objective="lambdarank:map"
|
||||
#objective="lambdarank:ndcg"
|
||||
@ -15,8 +15,9 @@ bst:gamma = 1.0
|
||||
# minimum sum of instance weight(hessian) needed in a child
|
||||
bst:min_child_weight = 1
|
||||
# maximum depth of a tree
|
||||
bst:max_depth = 3
|
||||
eval_metric='ndcg'
|
||||
bst:max_depth = 6
|
||||
eval_metric = "ndcg"
|
||||
eval_metric = "map"
|
||||
# Task parameters
|
||||
# the number of round to do boosting
|
||||
num_round = 4
|
||||
|
||||
@ -174,7 +174,6 @@ namespace xgboost{
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*! \brief Area under curve, for both classification and rank */
|
||||
struct EvalAuc : public IEvaluator{
|
||||
virtual float Eval(const std::vector<float> &preds,
|
||||
@ -292,17 +291,6 @@ namespace xgboost{
|
||||
struct EvalNDCG : public EvalRankList{
|
||||
public:
|
||||
EvalNDCG(const char *name):EvalRankList(name){}
|
||||
|
||||
static inline float CalcDCG(const std::vector< float > &rec) {
|
||||
double sumdcg = 0.0;
|
||||
for (size_t i = 0; i < rec.size(); i++){
|
||||
const unsigned rel = static_cast<unsigned>(rec[i]);
|
||||
if (rel != 0){
|
||||
sumdcg += logf(2.0f) *((1 << rel) - 1) / logf(i + 1);
|
||||
}
|
||||
}
|
||||
return static_cast<float>(sumdcg);
|
||||
}
|
||||
protected:
|
||||
inline float CalcDCG( const std::vector< std::pair<float,unsigned> > &rec ) const {
|
||||
double sumdcg = 0.0;
|
||||
@ -315,9 +303,9 @@ namespace xgboost{
|
||||
return static_cast<float>(sumdcg);
|
||||
}
|
||||
virtual float EvalMetric( std::vector< std::pair<float, unsigned> > &rec ) const {
|
||||
std::sort(rec.begin(), rec.end(), CmpFirst);
|
||||
float idcg = this->CalcDCG(rec);
|
||||
std::sort(rec.begin(), rec.end(), CmpSecond);
|
||||
float idcg = this->CalcDCG(rec);
|
||||
std::sort(rec.begin(), rec.end(), CmpFirst);
|
||||
float dcg = this->CalcDCG(rec);
|
||||
if( idcg == 0.0f ) return 0.0f;
|
||||
else return dcg/idcg;
|
||||
|
||||
@ -185,13 +185,15 @@ namespace xgboost{
|
||||
class PairwiseRankObj : public IObjFunction{
|
||||
public:
|
||||
PairwiseRankObj(void){
|
||||
loss.loss_type = LossType::kLinearSquare;
|
||||
loss.loss_type = LossType::kLogisticRaw;
|
||||
fix_list_weight = 0.0f;
|
||||
num_pairsample = 1;
|
||||
}
|
||||
virtual ~PairwiseRankObj(){}
|
||||
virtual void SetParam(const char *name, const char *val){
|
||||
if( !strcmp( "loss_type", name ) ) loss.loss_type = atoi( val );
|
||||
if( !strcmp( "fix_list_weight", name ) ) fix_list_weight = (float)atof( val );
|
||||
if( !strcmp( "num_pairsample", name ) ) num_pairsample = atoi( val );
|
||||
}
|
||||
virtual void GetGradient(const std::vector<float>& preds,
|
||||
const DMatrix::Info &info,
|
||||
@ -224,21 +226,33 @@ namespace xgboost{
|
||||
while( j < rec.size() && rec[j].first == rec[i].first ) ++ j;
|
||||
// bucket in [i,j), get a sample outside bucket
|
||||
unsigned nleft = i, nright = rec.size() - j;
|
||||
for( unsigned pid = i; pid < j; ++ pid ){
|
||||
unsigned ridx = static_cast<int>( rnd.RandDouble() * (nleft+nright) );
|
||||
if( ridx < nleft ){
|
||||
// get the samples in left side, ridx is pos sample
|
||||
this->AddGradient( rec[ridx].second, rec[pid].second, preds, grad, hess );
|
||||
}else{
|
||||
// get samples in right side, ridx is negsample
|
||||
this->AddGradient( rec[pid].second, rec[ridx+j-i].second, preds, grad, hess );
|
||||
if( nleft + nright != 0 ){
|
||||
int nsample = num_pairsample;
|
||||
while( nsample -- ){
|
||||
for( unsigned pid = i; pid < j; ++ pid ){
|
||||
unsigned ridx = static_cast<unsigned>( rnd.RandDouble() * (nleft+nright) );
|
||||
if( ridx < nleft ){
|
||||
// get the samples in left side, ridx is pos sample
|
||||
this->AddGradient( rec[ridx].second, rec[pid].second, preds, grad, hess );
|
||||
}else{
|
||||
// get samples in right side, ridx is negsample
|
||||
this->AddGradient( rec[pid].second, rec[ridx+j-i].second, preds, grad, hess );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}else{
|
||||
for( unsigned pid = i; pid < j; ++ pid ){
|
||||
utils::Assert( rec[pid].first == 0.0f );
|
||||
}
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
// rescale each gradient and hessian so that the list have constant weight
|
||||
float scale = 1.0f / num_pairsample;
|
||||
if( fix_list_weight != 0.0f ){
|
||||
float scale = fix_list_weight / (gptr[k+1] - gptr[k]);
|
||||
scale *= fix_list_weight / (gptr[k+1] - gptr[k]);
|
||||
}
|
||||
if( scale != 1.0f ){
|
||||
for(unsigned j = gptr[k]; j < gptr[k+1]; ++j ){
|
||||
grad[j] *= scale; hess[j] *= scale;
|
||||
}
|
||||
@ -246,11 +260,9 @@ namespace xgboost{
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual const char* DefaultEvalMetric(void) {
|
||||
return "ndcg";
|
||||
}
|
||||
|
||||
private:
|
||||
inline void AddGradient( unsigned pid, unsigned nid,
|
||||
const std::vector<float> &pred,
|
||||
@ -263,13 +275,10 @@ namespace xgboost{
|
||||
grad[pid] += g; grad[nid] -= g;
|
||||
// take conservative update, scale hessian by 2
|
||||
hess[pid] += 2.0f * h; hess[nid] += 2.0f * h;
|
||||
}
|
||||
|
||||
inline static bool CmpFirst( const std::pair<float,unsigned> &a, const std::pair<float,unsigned> &b ){
|
||||
return a.first > b.first;
|
||||
}
|
||||
|
||||
private:
|
||||
// number of samples peformed for each instance
|
||||
int num_pairsample;
|
||||
// fix weight of each list
|
||||
float fix_list_weight;
|
||||
LossType loss;
|
||||
@ -448,6 +457,17 @@ namespace xgboost{
|
||||
|
||||
class LambdaRankObj_NDCG : public LambdaRankObj{
|
||||
|
||||
static inline float CalcDCG(const std::vector< float > &rec) {
|
||||
double sumdcg = 0.0;
|
||||
for (size_t i = 0; i < rec.size(); i++){
|
||||
const unsigned rel = static_cast<unsigned>(rec[i]);
|
||||
if (rel != 0){
|
||||
sumdcg += logf(2.0f) *((1 << rel) - 1) / logf(i + 2);
|
||||
}
|
||||
}
|
||||
return static_cast<float>(sumdcg);
|
||||
}
|
||||
|
||||
/*
|
||||
* \brief Obtain the delta NDCG if trying to switch the positions of instances in index1 or index2
|
||||
* in sorted triples. Here DCG is calculated as sigma_i 2^rel_i/log(i + 1)
|
||||
@ -475,7 +495,7 @@ namespace xgboost{
|
||||
}
|
||||
|
||||
std::sort(labels.begin(), labels.end(), std::greater<float>());
|
||||
return EvalNDCG::CalcDCG(labels);
|
||||
return CalcDCG(labels);
|
||||
}
|
||||
|
||||
inline void GetLambda(const std::vector<float> &preds,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user