diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp index e4d99e0c7..e0ef67f7a 100644 --- a/regrank/xgboost_regrank_obj.hpp +++ b/regrank/xgboost_regrank_obj.hpp @@ -330,6 +330,146 @@ namespace xgboost{ virtual ~PairwiseRankObj(void){} virtual void GetLambdaWeight( const std::vector &sorted_list, std::vector &pairs ){} }; + + class LambdaRankObj_NDCG : public LambdaRankObj{ + + public: + virtual ~LambdaRankObj_NDCG(void){} + + inline float DCG(const std::vector &labels){ + return 1.0; + } + + inline float GetIDCG(const std::vector &sorted_list){ + std::vector labels; + for (size_t i = 0; i < sorted_list.size(); i++){ + labels.push_back(sorted_list[i].label); + } + + std::sort(labels.begin(), labels.end(), std::greater()); + return DCG(labels); + } + + /* + * \brief Obtain the delta NDCG if trying to switch the positions of instances in index1 or index2 + * in sorted triples. Here DCG is calculated as sigma_i 2^rel_i/log(i + 1) + * \param sorted_list the list containing entry information + * \param index1,index2 the instances switched + * \param the IDCG of the list + */ + inline float GetLambdaNDCG(const std::vector &sorted_list, + int index1, + int index2, float IDCG){ + double original = (1 << static_cast(sorted_list[index1].label)) / log(index1 + 2) + + (1 << static_cast(sorted_list[index2].label)) / log(index2 + 2); + double changed = (1 << static_cast(sorted_list[index2].label)) / log(index1 + 2) + + (1 << static_cast(sorted_list[index1].label)) / log(index2 + 2); + double ans = (original - changed) / IDCG; + if (ans < 0) ans = -ans; + return static_cast(ans); + } + + virtual void GetLambdaWeight(const std::vector &sorted_list, std::vector &pairs){ + float IDCG = GetIDCG(sorted_list); + for (size_t i = 0; i < pairs.size(); i++){ + pairs[i].weight = GetLambdaNDCG(sorted_list, + pairs[i].pos_index, pairs[i].neg_index, IDCG); + } + } + + }; + + class LambdaRankObj_MAP : public LambdaRankObj{ + + class Quadruple{ + public: + /* \brief the accumulated precision */ + float ap_acc_; + /* \brief the accumulated precision assuming a positive instance is missing*/ + float ap_acc_miss_; + /* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/ + float ap_acc_add_; + /* \brief the accumulated positive instance count */ + float hits_; + + Quadruple(){} + + Quadruple(const Quadruple& q){ + ap_acc_ = q.ap_acc_; + ap_acc_miss_ = q.ap_acc_miss_; + ap_acc_add_ = q.ap_acc_add_; + hits_ = q.hits_; + } + + Quadruple(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits + ) :ap_acc_(ap_acc), ap_acc_miss_(ap_acc_miss), ap_acc_add_(ap_acc_add), hits_(hits){ + + } + + }; + + public: + virtual ~LambdaRankObj_MAP(void){} + + /* + * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2 + * in sorted triples + * \param sorted_list the list containing entry information + * \param index1,index2 the instances switched + * \param map_acc a vector containing the accumulated precisions for each position in a list + */ + inline float GetLambdaMAP(const std::vector &sorted_list, + int index1, int index2, + std::vector< Quadruple > &map_acc){ + if (index1 == index2 || sorted_list[index1].label == sorted_list[index2].label) return 0.0; + if (index1 > index2) std::swap(index1, index2); + float original = map_acc[index2].ap_acc_; // The accumulated precision in the interval [index1,index2] + if (index1 != 0) original -= map_acc[index1 - 1].ap_acc_; + float changed = 0; + if (sorted_list[index1].label < sorted_list[index2].label){ + changed += map_acc[index2 - 1].ap_acc_add_ - map_acc[index1].ap_acc_add_; + changed += (map_acc[index1].hits_ + 1.0f) / (index1 + 1); + } + else{ + changed += map_acc[index2 - 1].ap_acc_miss_ - map_acc[index1].ap_acc_miss_; + changed += map_acc[index2].hits_ / (index2 + 1); + } + float ans = (changed - original) / (map_acc[map_acc.size() - 1].hits_); + if (ans < 0) ans = -ans; + return ans; + } + + /* + * \brief preprocessing results for calculating delta MAP + * \return The first field is the accumulated precision, the second field is the + * accumulated precision assuming a positive instance is missing, + * the third field is the accumulated precision assuming that one more positive + * instance is inserted, the fourth field is the accumulated positive instance count + */ + inline void GetMAPAcc(const std::vector &sorted_list, + std::vector< Quadruple > &map_acc){ + map_acc.resize(sorted_list.size()); + float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0; + for (size_t i = 1; i <= sorted_list.size(); i++){ + if ((int)sorted_list[i - 1].label == 1) { + hit++; + acc1 += hit / i; + acc2 += (hit - 1) / i; + acc3 += (hit + 1) / i; + } + map_acc[i - 1] = Quadruple(acc1, acc2, acc3, hit); + } + } + virtual void GetLambdaWeight(const std::vector &sorted_list, std::vector &pairs){ + std::vector< Quadruple > map_acc; + GetMAPAcc(sorted_list, map_acc); + for (size_t i = 0; i < pairs.size(); i++){ + pairs[i].weight = GetLambdaMAP(sorted_list, pairs[i].pos_index, pairs[i].neg_index, map_acc); + } + } + + }; + }; }; #endif