From 2baeeabac4233eeb9a294323377dc2c14ffe5642 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 16 May 2014 00:02:26 -0700 Subject: [PATCH] new lambda rank interface --- demo/rank/mq2008.conf | 4 +- regrank/xgboost_regrank_dev.hpp | 164 ++++++++++++ regrank/xgboost_regrank_obj.h | 4 +- regrank/xgboost_regrank_obj.hpp | 440 ++++++-------------------------- 4 files changed, 239 insertions(+), 373 deletions(-) create mode 100644 regrank/xgboost_regrank_dev.hpp diff --git a/demo/rank/mq2008.conf b/demo/rank/mq2008.conf index 107a5ae44..08b9d9679 100644 --- a/demo/rank/mq2008.conf +++ b/demo/rank/mq2008.conf @@ -2,9 +2,9 @@ # choose the tree booster, 0: tree, 1: linear booster_type = 0 -#objective="rank:pairwise" +objective="rank:pairwise" #objective="rank:softmax" -objective="lambdarank:map" +#objective="lambdarank:map" #objective="lambdarank:ndcg" # Tree Booster Parameters diff --git a/regrank/xgboost_regrank_dev.hpp b/regrank/xgboost_regrank_dev.hpp new file mode 100644 index 000000000..1925e4c8d --- /dev/null +++ b/regrank/xgboost_regrank_dev.hpp @@ -0,0 +1,164 @@ +// some backup code + + class LambdaRankObj_NDCG : public LambdaRankObj{ + + static inline float CalcDCG(const std::vector< float > &rec) { + double sumdcg = 0.0; + for (size_t i = 0; i < rec.size(); i++){ + const unsigned rel = static_cast(rec[i]); + if (rel != 0){ + sumdcg += logf(2.0f) *((1 << rel) - 1) / logf(i + 2); + } + } + return static_cast(sumdcg); + } + + /* + * \brief Obtain the delta NDCG if trying to switch the positions of instances in index1 or index2 + * in sorted triples. Here DCG is calculated as sigma_i 2^rel_i/log(i + 1) + * \param sorted_triple the fields are predition,label,original index + * \param index1,index2 the instances switched + * \param the IDCG of the list + */ + inline float GetLambdaNDCG(const std::vector< Triple > sorted_triple, + int index1, + int index2, float IDCG){ + double original = (1 << static_cast(sorted_triple[index1].label_)) / log(index1 + 2) + + (1 << static_cast(sorted_triple[index2].label_)) / log(index2 + 2); + double changed = (1 << static_cast(sorted_triple[index2].label_)) / log(index1 + 2) + + (1 << static_cast(sorted_triple[index1].label_)) / log(index2 + 2); + double ans = (original - changed) / IDCG; + if (ans < 0) ans = -ans; + return static_cast(ans); + } + + + inline float GetIDCG(const std::vector< Triple > sorted_triple){ + std::vector labels; + for (size_t i = 0; i < sorted_triple.size(); i++){ + labels.push_back(sorted_triple[i].label_); + } + + std::sort(labels.begin(), labels.end(), std::greater()); + return CalcDCG(labels); + } + + inline void GetLambda(const std::vector &preds, + const std::vector &labels, + const std::vector &group_index, + const std::vector< std::pair > &pairs, std::vector &lambda, int group){ + std::vector< Triple > sorted_triple; + std::vector index_remap; + float IDCG; + + GetSortedTuple(preds, labels, group_index, group, sorted_triple); + GetIndexMap(sorted_triple, group_index[group], index_remap); + IDCG = GetIDCG(sorted_triple); + + lambda.resize(pairs.size()); + for (size_t i = 0; i < pairs.size(); i++){ + lambda[i] = GetLambdaNDCG(sorted_triple, + index_remap[pairs[i].first],index_remap[pairs[i].second],IDCG); + } + } + }; + + class LambdaRankObj_MAP : public LambdaRankObj{ + class Quadruple{ + public: + /* \brief the accumulated precision */ + float ap_acc_; + /* \brief the accumulated precision assuming a positive instance is missing*/ + float ap_acc_miss_; + /* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/ + float ap_acc_add_; + /* \brief the accumulated positive instance count */ + float hits_; + + Quadruple(){} + + Quadruple(const Quadruple& q){ + ap_acc_ = q.ap_acc_; + ap_acc_miss_ = q.ap_acc_miss_; + ap_acc_add_ = q.ap_acc_add_; + hits_ = q.hits_; + } + + Quadruple(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits + ) :ap_acc_(ap_acc), ap_acc_miss_(ap_acc_miss), ap_acc_add_(ap_acc_add), hits_(hits){ + + } + + }; + + /* + * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2 + * in sorted triples + * \param sorted_triple the fields are predition,label,original index + * \param index1,index2 the instances switched + * \param map_acc a vector containing the accumulated precisions for each position in a list + */ + inline float GetLambdaMAP(const std::vector< Triple > sorted_triple, + int index1, int index2, + std::vector< Quadruple > &map_acc){ + if (index1 == index2 || sorted_triple[index1].label_ == sorted_triple[index2].label_) return 0.0; + if (index1 > index2) std::swap(index1, index2); + float original = map_acc[index2].ap_acc_; // The accumulated precision in the interval [index1,index2] + if (index1 != 0) original -= map_acc[index1 - 1].ap_acc_; + float changed = 0; + if (sorted_triple[index1].label_ < sorted_triple[index2].label_){ + changed += map_acc[index2 - 1].ap_acc_add_ - map_acc[index1].ap_acc_add_; + changed += (map_acc[index1].hits_ + 1.0f) / (index1 + 1); + } + else{ + changed += map_acc[index2 - 1].ap_acc_miss_ - map_acc[index1].ap_acc_miss_; + changed += map_acc[index2].hits_ / (index2 + 1); + } + float ans = (changed - original) / (map_acc[map_acc.size() - 1].hits_); + if (ans < 0) ans = -ans; + return ans; + } + + + /* + * \brief preprocessing results for calculating delta MAP + * \return The first field is the accumulated precision, the second field is the + * accumulated precision assuming a positive instance is missing, + * the third field is the accumulated precision assuming that one more positive + * instance is inserted, the fourth field is the accumulated positive instance count + */ + inline void GetMAPAcc(const std::vector< Triple > sorted_triple, + std::vector< Quadruple > &map_acc){ + map_acc.resize(sorted_triple.size()); + float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0; + for (size_t i = 1; i <= sorted_triple.size(); i++){ + if ((int)sorted_triple[i - 1].label_ == 1) { + hit++; + acc1 += hit / i; + acc2 += (hit - 1) / i; + acc3 += (hit + 1) / i; + } + map_acc[i-1] = Quadruple(acc1, acc2, acc3, hit); + } + } + + inline void GetLambda(const std::vector &preds, + const std::vector &labels, + const std::vector &group_index, + const std::vector< std::pair > &pairs, std::vector &lambda, int group){ + std::vector< Triple > sorted_triple; + std::vector index_remap; + std::vector< Quadruple > map_acc; + + GetSortedTuple(preds, labels, group_index, group, sorted_triple); + GetIndexMap(sorted_triple, group_index[group], index_remap); + GetMAPAcc(sorted_triple, map_acc); + + lambda.resize(pairs.size()); + for (size_t i = 0; i < pairs.size(); i++){ + lambda[i] = GetLambdaMAP(sorted_triple, + index_remap[pairs[i].first], index_remap[pairs[i].second], map_acc); + } + } + }; + diff --git a/regrank/xgboost_regrank_obj.h b/regrank/xgboost_regrank_obj.h index 254d5a45d..f2fee0653 100644 --- a/regrank/xgboost_regrank_obj.h +++ b/regrank/xgboost_regrank_obj.h @@ -113,8 +113,8 @@ namespace xgboost{ if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj(); if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj(); if( !strcmp("softmax", name ) ) return new SoftmaxMultiClassObj(); - if (!strcmp("lambdarank:map", name)) return new LambdaRankObj_MAP(); - if (!strcmp("lambdarank:ndcg", name)) return new LambdaRankObj_NDCG(); + // if (!strcmp("lambdarank:map", name)) return new LambdaRankObj_MAP(); + // if (!strcmp("lambdarank:ndcg", name)) return new LambdaRankObj_NDCG(); utils::Error("unknown objective function type"); return NULL; } diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp index 2995a03fe..e4d99e0c7 100644 --- a/regrank/xgboost_regrank_obj.hpp +++ b/regrank/xgboost_regrank_obj.hpp @@ -180,21 +180,23 @@ namespace xgboost{ }; }; + namespace regrank{ - // simple pairwise rank - class PairwiseRankObj : public IObjFunction{ + /*! \brief objective for lambda rank */ + class LambdaRankObj : public IObjFunction{ public: - PairwiseRankObj(void){ + LambdaRankObj(void){ loss.loss_type = LossType::kLogisticRaw; fix_list_weight = 0.0f; num_pairsample = 1; } - virtual ~PairwiseRankObj(){} + virtual ~LambdaRankObj(){} virtual void SetParam(const char *name, const char *val){ - if( !strcmp( "loss_type", name ) ) loss.loss_type = atoi( val ); + if( !strcmp( "loss_type", name ) ) loss.loss_type = atoi( val ); if( !strcmp( "fix_list_weight", name ) ) fix_list_weight = (float)atof( val ); if( !strcmp( "num_pairsample", name ) ) num_pairsample = atoi( val ); } + public: virtual void GetGradient(const std::vector& preds, const DMatrix::Info &info, int iter, @@ -211,16 +213,24 @@ namespace xgboost{ // parall construct, declare random number generator here, so that each // thread use its own random number generator, seed by thread id and current iteration random::Random rnd; rnd.Seed( iter * 1111 + omp_get_thread_num() ); + std::vector pairs; + std::vector lst; std::vector< std::pair > rec; + #pragma omp for schedule(static) for (unsigned k = 0; k < ngroup; ++k){ - rec.clear(); + lst.clear(); pairs.clear(); for(unsigned j = gptr[k]; j < gptr[k+1]; ++j ){ - rec.push_back( std::make_pair(info.labels[j], j) ); + lst.push_back( ListEntry(preds[j], info.labels[j], j ) ); grad[j] = hess[j] = 0.0f; } + std::sort( lst.begin(), lst.end(), ListEntry::CmpPred ); + rec.resize( lst.size() ); + for( unsigned i = 0; i < lst.size(); ++i ){ + rec[i] = std::make_pair( lst[i].label, i ); + } std::sort( rec.begin(), rec.end(), CmpFirst ); - // enumerate buckets with same label, for each item in the list, grab another sample randomly + // enumerate buckets with same label, for each item in the lst, grab another sample randomly for( unsigned i = 0; i < rec.size(); ){ unsigned j = i + 1; while( j < rec.size() && rec[j].first == rec[i].first ) ++ j; @@ -232,73 +242,49 @@ namespace xgboost{ for( unsigned pid = i; pid < j; ++ pid ){ unsigned ridx = static_cast( rnd.RandDouble() * (nleft+nright) ); if( ridx < nleft ){ - // get the samples in left side, ridx is pos sample - this->AddGradient( rec[ridx].second, rec[pid].second, preds, grad, hess ); + pairs.push_back( LambdaPair( rec[ridx].second, rec[pid].second ) ); }else{ - // get samples in right side, ridx is negsample - this->AddGradient( rec[pid].second, rec[ridx+j-i].second, preds, grad, hess ); + pairs.push_back( LambdaPair( rec[pid].second, rec[ridx+j-i].second ) ); } } } - }else{ - for( unsigned pid = i; pid < j; ++ pid ){ - utils::Assert( rec[pid].first == 0.0f ); - } } i = j; } - // rescale each gradient and hessian so that the list have constant weight + // get lambda weight for the pairs + this->GetLambdaWeight( lst, pairs ); + // rescale each gradient and hessian so that the lst have constant weighted float scale = 1.0f / num_pairsample; if( fix_list_weight != 0.0f ){ scale *= fix_list_weight / (gptr[k+1] - gptr[k]); } - if( scale != 1.0f ){ - for(unsigned j = gptr[k]; j < gptr[k+1]; ++j ){ - grad[j] *= scale; hess[j] *= scale; - } - } + for( size_t i = 0; i < pairs.size(); ++ i ){ + const ListEntry &pos = lst[ pairs[i].pos_index ]; + const ListEntry &neg = lst[ pairs[i].neg_index ]; + const float w = pairs[i].weight * scale; + float p = loss.PredTransform( pos.pred - neg.pred ); + float g = loss.FirstOrderGradient( p, 1.0f ); + float h = loss.SecondOrderGradient( p, 1.0f ); + // accumulate gradient and hessian in both pid, and nid, + grad[ pos.rindex ] += g * w; + grad[ neg.rindex ] -= g * w; + // take conservative update, scale hessian by 2 + hess[ pos.rindex ] += 2.0f * h * w; + hess[ neg.rindex ] += 2.0f * h * w; + } } } } virtual const char* DefaultEvalMetric(void) { return "map"; - } - private: - inline void AddGradient( unsigned pid, unsigned nid, - const std::vector &pred, - std::vector &grad, - std::vector &hess ){ - float p = loss.PredTransform( pred[pid]-pred[nid] ); - float g = loss.FirstOrderGradient( p, 1.0f ); - float h = loss.SecondOrderGradient( p, 1.0f ); - // accumulate gradient and hessian in both pid, and nid, - grad[pid] += g; grad[nid] -= g; - // take conservative update, scale hessian by 2 - hess[pid] += 2.0f * h; hess[nid] += 2.0f * h; } private: - // number of samples peformed for each instance - int num_pairsample; - // fix weight of each list - float fix_list_weight; + // loss function LossType loss; - }; - }; - - namespace regrank{ - class LambdaRankObj : public IObjFunction{ - public: - LambdaRankObj(void){ - loss_.loss_type = LossType::kLogisticRaw; - } - virtual ~LambdaRankObj(){} - virtual void SetParam(const char *name, const char *val){ - if( !strcmp( "loss_type", name ) ) loss_.loss_type = atoi( val ); - if( !strcmp( "fix_list_weight", name ) ) fix_list_weight_ = (float)atof( val ); - } - private: - LossType loss_; - float fix_list_weight_; + // number of samples peformed for each instance + int num_pairsample; + // fix weight of each elements in list + float fix_list_weight; protected: /*! \brief helper information in a list */ struct ListEntry{ @@ -311,323 +297,39 @@ namespace xgboost{ // constructor ListEntry(float pred, float label, unsigned rindex): pred(pred),label(label),rindex(rindex){} // comparator by prediction - inline bool operator<(const ListEntry &p) const{ - return pred > p.pred; + inline static bool CmpPred(const ListEntry &a, const ListEntry &b){ + return a.pred > b.pred; + } + // comparator by label + inline static bool CmpLabel(const ListEntry &a, const ListEntry &b){ + return a.label > b.label; } }; - - class Triple{ - public: - float pred_; - float label_; - int index_; - - Triple(){ - - } - - Triple(const Triple& t){ - pred_ = t.pred_; - label_ = t.label_; - index_ = t.index_; - } - - Triple(float pred, float label, int index) :pred_(pred), label_(label), index_(index){ - - } - }; - - static inline bool TripleComparer(const Triple &a, const Triple &b){ - return a.pred_ > b.pred_; - } - - /* \brief Sorted tuples of a group by the predictions, and - * the fields in the return tuples successively are predicions, - * labels, and the original index of the instance in the group - */ - inline void GetSortedTuple(const std::vector &preds, - const std::vector &labels, - const std::vector &group_index, - int group, std::vector< Triple > &sorted_triple){ - sorted_triple.resize(group_index[group + 1] - group_index[group]); - for (unsigned j = group_index[group]; j < group_index[group + 1]; j++){ - sorted_triple[j - group_index[group]] = Triple(preds[j], labels[j], j); - } - - std::sort(sorted_triple.begin(), sorted_triple.end(), TripleComparer); - } - - /* - * \brief Get the position of instances after sorted - * \param sorted_triple the fields successively are predicions, - * labels, and the original index of the instance in the group - * \param start the offset index of the group - * \param index_remap a vector indicating the new position of each instance after sorted, - * for example,[1,0] means that the second instance is put ahead after sorted - */ - inline void GetIndexMap(std::vector< Triple > sorted_triple, int start, std::vector &index_remap){ - index_remap.resize(sorted_triple.size()); - for (size_t i = 0; i < sorted_triple.size(); i++){ - index_remap[sorted_triple[i].index_ - start] = i; - } - } - - - virtual void GetLambda(const std::vector &preds, - const std::vector &labels, - const std::vector &group_index, - const std::vector< std::pair > &pairs, std::vector &lambda, int group) = 0; - - inline void GetGroupGradient(const std::vector &preds, - const std::vector &labels, - const std::vector &group_index, - std::vector &grad, - std::vector &hess, - const std::vector< std::pair > pairs, - int group){ - - std::vector lambda; - GetLambda(preds, labels, group_index, pairs, lambda, group); - - float pred_diff, delta; - float first_order_gradient, second_order_gradient; - - for (size_t i = 0; i < pairs.size(); i++){ - delta = lambda[i]; - pred_diff = loss_.PredTransform(preds[pairs[i].first] - preds[pairs[i].second]); - first_order_gradient = delta * loss_.FirstOrderGradient(pred_diff, 1.0f); - second_order_gradient = 2 * delta * loss_.SecondOrderGradient(pred_diff, 1.0f); - hess[pairs[i].first] += second_order_gradient; - grad[pairs[i].first] += first_order_gradient; - hess[pairs[i].second] += second_order_gradient; - grad[pairs[i].second] -= first_order_gradient; - - } - - if( fix_list_weight_ != 0.0f ){ - float scale = fix_list_weight_ / (group_index[group+1] - group_index[group]); - for(unsigned j = group_index[group]; j < group_index[group+1]; ++j ){ - grad[j] *= scale; - hess[j] *= scale; - } - } - } - - virtual void GenPairs(const std::vector& preds, - const std::vector& labels, - const int &start, const int &end, - std::vector< std::pair > &pairs){ - - random::Random rnd; rnd.Seed(0); - std::vector< std::pair > rec; - for(int j = start; j < end; ++j ){ - rec.push_back( std::make_pair(labels[j], j) ); - } - - std::sort( rec.begin(), rec.end(), CmpFirst ); - // enumerate buckets with same label, for each item in the list, grab another sample randomly - for( unsigned i = 0; i < rec.size(); ){ - unsigned j = i + 1; - while( j < rec.size() && rec[j].first == rec[i].first ) ++ j; - // bucket in [i,j), get a sample outside bucket - unsigned nleft = i, nright = rec.size() - j; - for( unsigned pid = i; pid < j; ++ pid ){ - unsigned ridx = static_cast( rnd.RandDouble() * (nleft+nright) ); - if( ridx < nleft ){ - // get the samples in left side, ridx is pos sample - pairs.push_back(std::make_pair(rec[ridx].second, rec[pid].second)); - }else{ - // get samples in right side, ridx is negsample - pairs.push_back(std::make_pair(rec[pid].second, rec[ridx+j-i].second)); - } - } - i = j; - } - } + /*! \brief a pair in the lambda rank */ + struct LambdaPair{ + /*! \brief positive index: this is a position in the list */ + unsigned pos_index; + /*! \brief negative index: this is a position in the list */ + unsigned neg_index; + /*! \brief weight to be filled in */ + float weight; + LambdaPair( unsigned pos_index, unsigned neg_index ):pos_index(pos_index),neg_index(neg_index),weight(1.0f){} + }; + /*! + * \brief get lambda weight for existing pairs + * \param list a list that is sorted by pred score + * \param pairs record of pairs, containing the pairs to fill in weights + */ + virtual void GetLambdaWeight( const std::vector &sorted_list, std::vector &pairs ) = 0; + }; + }; + + namespace regrank{ + class PairwiseRankObj: public LambdaRankObj{ public: - virtual void GetGradient(const std::vector& preds, - const DMatrix::Info &info, - int iter, - std::vector &grad, - std::vector &hess) { - grad.resize(preds.size()); hess.resize(preds.size()); - const std::vector &group_index = info.group_ptr; - utils::Assert(group_index.size() != 0 && group_index.back() == preds.size(), "rank loss must have group file"); - - for (size_t i = 0; i < group_index.size() - 1; i++){ - std::vector< std::pair > pairs; - GenPairs(preds, info.labels, group_index[i], group_index[i + 1],pairs); - GetGroupGradient(preds, info.labels, group_index, grad, hess, pairs, i); - } - } - virtual const char* DefaultEvalMetric(void) { - return "auc"; - } + virtual ~PairwiseRankObj(void){} + virtual void GetLambdaWeight( const std::vector &sorted_list, std::vector &pairs ){} }; - - class LambdaRankObj_NDCG : public LambdaRankObj{ - - static inline float CalcDCG(const std::vector< float > &rec) { - double sumdcg = 0.0; - for (size_t i = 0; i < rec.size(); i++){ - const unsigned rel = static_cast(rec[i]); - if (rel != 0){ - sumdcg += logf(2.0f) *((1 << rel) - 1) / logf(i + 2); - } - } - return static_cast(sumdcg); - } - - /* - * \brief Obtain the delta NDCG if trying to switch the positions of instances in index1 or index2 - * in sorted triples. Here DCG is calculated as sigma_i 2^rel_i/log(i + 1) - * \param sorted_triple the fields are predition,label,original index - * \param index1,index2 the instances switched - * \param the IDCG of the list - */ - inline float GetLambdaNDCG(const std::vector< Triple > sorted_triple, - int index1, - int index2, float IDCG){ - double original = (1 << static_cast(sorted_triple[index1].label_)) / log(index1 + 2) - + (1 << static_cast(sorted_triple[index2].label_)) / log(index2 + 2); - double changed = (1 << static_cast(sorted_triple[index2].label_)) / log(index1 + 2) - + (1 << static_cast(sorted_triple[index1].label_)) / log(index2 + 2); - double ans = (original - changed) / IDCG; - if (ans < 0) ans = -ans; - return static_cast(ans); - } - - - inline float GetIDCG(const std::vector< Triple > sorted_triple){ - std::vector labels; - for (size_t i = 0; i < sorted_triple.size(); i++){ - labels.push_back(sorted_triple[i].label_); - } - - std::sort(labels.begin(), labels.end(), std::greater()); - return CalcDCG(labels); - } - - inline void GetLambda(const std::vector &preds, - const std::vector &labels, - const std::vector &group_index, - const std::vector< std::pair > &pairs, std::vector &lambda, int group){ - std::vector< Triple > sorted_triple; - std::vector index_remap; - float IDCG; - - GetSortedTuple(preds, labels, group_index, group, sorted_triple); - GetIndexMap(sorted_triple, group_index[group], index_remap); - IDCG = GetIDCG(sorted_triple); - - lambda.resize(pairs.size()); - for (size_t i = 0; i < pairs.size(); i++){ - lambda[i] = GetLambdaNDCG(sorted_triple, - index_remap[pairs[i].first],index_remap[pairs[i].second],IDCG); - } - } - }; - - class LambdaRankObj_MAP : public LambdaRankObj{ - class Quadruple{ - public: - /* \brief the accumulated precision */ - float ap_acc_; - /* \brief the accumulated precision assuming a positive instance is missing*/ - float ap_acc_miss_; - /* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/ - float ap_acc_add_; - /* \brief the accumulated positive instance count */ - float hits_; - - Quadruple(){} - - Quadruple(const Quadruple& q){ - ap_acc_ = q.ap_acc_; - ap_acc_miss_ = q.ap_acc_miss_; - ap_acc_add_ = q.ap_acc_add_; - hits_ = q.hits_; - } - - Quadruple(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits - ) :ap_acc_(ap_acc), ap_acc_miss_(ap_acc_miss), ap_acc_add_(ap_acc_add), hits_(hits){ - - } - - }; - - /* - * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2 - * in sorted triples - * \param sorted_triple the fields are predition,label,original index - * \param index1,index2 the instances switched - * \param map_acc a vector containing the accumulated precisions for each position in a list - */ - inline float GetLambdaMAP(const std::vector< Triple > sorted_triple, - int index1, int index2, - std::vector< Quadruple > &map_acc){ - if (index1 == index2 || sorted_triple[index1].label_ == sorted_triple[index2].label_) return 0.0; - if (index1 > index2) std::swap(index1, index2); - float original = map_acc[index2].ap_acc_; // The accumulated precision in the interval [index1,index2] - if (index1 != 0) original -= map_acc[index1 - 1].ap_acc_; - float changed = 0; - if (sorted_triple[index1].label_ < sorted_triple[index2].label_){ - changed += map_acc[index2 - 1].ap_acc_add_ - map_acc[index1].ap_acc_add_; - changed += (map_acc[index1].hits_ + 1.0f) / (index1 + 1); - } - else{ - changed += map_acc[index2 - 1].ap_acc_miss_ - map_acc[index1].ap_acc_miss_; - changed += map_acc[index2].hits_ / (index2 + 1); - } - float ans = (changed - original) / (map_acc[map_acc.size() - 1].hits_); - if (ans < 0) ans = -ans; - return ans; - } - - - /* - * \brief preprocessing results for calculating delta MAP - * \return The first field is the accumulated precision, the second field is the - * accumulated precision assuming a positive instance is missing, - * the third field is the accumulated precision assuming that one more positive - * instance is inserted, the fourth field is the accumulated positive instance count - */ - inline void GetMAPAcc(const std::vector< Triple > sorted_triple, - std::vector< Quadruple > &map_acc){ - map_acc.resize(sorted_triple.size()); - float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0; - for (size_t i = 1; i <= sorted_triple.size(); i++){ - if ((int)sorted_triple[i - 1].label_ == 1) { - hit++; - acc1 += hit / i; - acc2 += (hit - 1) / i; - acc3 += (hit + 1) / i; - } - map_acc[i-1] = Quadruple(acc1, acc2, acc3, hit); - } - } - - inline void GetLambda(const std::vector &preds, - const std::vector &labels, - const std::vector &group_index, - const std::vector< std::pair > &pairs, std::vector &lambda, int group){ - std::vector< Triple > sorted_triple; - std::vector index_remap; - std::vector< Quadruple > map_acc; - - GetSortedTuple(preds, labels, group_index, group, sorted_triple); - GetIndexMap(sorted_triple, group_index[group], index_remap); - GetMAPAcc(sorted_triple, map_acc); - - lambda.resize(pairs.size()); - for (size_t i = 0; i < pairs.size(); i++){ - lambda[i] = GetLambdaMAP(sorted_triple, - index_remap[pairs[i].first], index_remap[pairs[i].second], map_acc); - } - } - }; - - }; }; #endif