This commit is contained in:
tqchen 2014-08-17 20:47:20 -07:00
parent 5a472145de
commit 4ed4b08146
4 changed files with 45 additions and 36 deletions

View File

@ -15,7 +15,7 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
} }
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) { void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
if (dmat.magic == DMatrixSimple::kMagic){ if (dmat.magic == DMatrixSimple::kMagic) {
const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat); const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
p_dmat->SaveBinary(fname, silent); p_dmat->SaveBinary(fname, silent);
} else { } else {

View File

@ -8,6 +8,7 @@
#include <vector> #include <vector>
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
#include <utility>
#include <functional> #include <functional>
#include "../data.h" #include "../data.h"
#include "./objective.h" #include "./objective.h"
@ -254,7 +255,6 @@ class LambdaRankObj : public IObjFunction {
utils::Check(gptr.size() != 0 && gptr.back() == preds.size(), utils::Check(gptr.size() != 0 && gptr.back() == preds.size(),
"group structure not consistent with #rows"); "group structure not consistent with #rows");
const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1); const unsigned ngroup = static_cast<unsigned>(gptr.size() - 1);
#pragma omp parallel #pragma omp parallel
{ {
// parall construct, declare random number generator here, so that each // parall construct, declare random number generator here, so that each
@ -262,8 +262,7 @@ class LambdaRankObj : public IObjFunction {
random::Random rnd; rnd.Seed(iter* 1111 + omp_get_thread_num()); random::Random rnd; rnd.Seed(iter* 1111 + omp_get_thread_num());
std::vector<LambdaPair> pairs; std::vector<LambdaPair> pairs;
std::vector<ListEntry> lst; std::vector<ListEntry> lst;
std::vector< std::pair<float,unsigned> > rec; std::vector< std::pair<float, unsigned> > rec;
#pragma omp for schedule(static) #pragma omp for schedule(static)
for (unsigned k = 0; k < ngroup; ++k) { for (unsigned k = 0; k < ngroup; ++k) {
lst.clear(); pairs.clear(); lst.clear(); pairs.clear();
@ -377,7 +376,7 @@ class LambdaRankObj : public IObjFunction {
class PairwiseRankObj: public LambdaRankObj{ class PairwiseRankObj: public LambdaRankObj{
public: public:
virtual ~PairwiseRankObj(void){} virtual ~PairwiseRankObj(void) {}
protected: protected:
virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list, virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
@ -402,7 +401,6 @@ class LambdaRankObjNDCG : public LambdaRankObj {
std::sort(labels.begin(), labels.end(), std::greater<float>()); std::sort(labels.begin(), labels.end(), std::greater<float>());
IDCG = CalcDCG(labels); IDCG = CalcDCG(labels);
} }
if (IDCG == 0.0) { if (IDCG == 0.0) {
for (size_t i = 0; i < pairs.size(); ++i) { for (size_t i = 0; i < pairs.size(); ++i) {
pairs[i].weight = 0.0f; pairs[i].weight = 0.0f;
@ -412,13 +410,15 @@ class LambdaRankObjNDCG : public LambdaRankObj {
for (size_t i = 0; i < pairs.size(); ++i) { for (size_t i = 0; i < pairs.size(); ++i) {
unsigned pos_idx = pairs[i].pos_index; unsigned pos_idx = pairs[i].pos_index;
unsigned neg_idx = pairs[i].neg_index; unsigned neg_idx = pairs[i].neg_index;
float pos_loginv = 1.0f / logf(pos_idx+2.0f); float pos_loginv = 1.0f / logf(pos_idx + 2.0f);
float neg_loginv = 1.0f / logf(neg_idx+2.0f); float neg_loginv = 1.0f / logf(neg_idx + 2.0f);
int pos_label = static_cast<int>(sorted_list[pos_idx].label); int pos_label = static_cast<int>(sorted_list[pos_idx].label);
int neg_label = static_cast<int>(sorted_list[neg_idx].label); int neg_label = static_cast<int>(sorted_list[neg_idx].label);
float original = ((1<<pos_label)-1) * pos_loginv + ((1<<neg_label)-1) * neg_loginv; float original =
float changed = ((1<<neg_label)-1) * pos_loginv + ((1<<pos_label)-1) * neg_loginv; ((1 << pos_label) - 1) * pos_loginv + ((1 << neg_label) - 1) * neg_loginv;
float delta = (original-changed) * IDCG; float changed =
((1 << neg_label) - 1) * pos_loginv + ((1 << pos_label) - 1) * neg_loginv;
float delta = (original - changed) * IDCG;
if (delta < 0.0f) delta = - delta; if (delta < 0.0f) delta = - delta;
pairs[i].weight = delta; pairs[i].weight = delta;
} }
@ -429,7 +429,7 @@ class LambdaRankObjNDCG : public LambdaRankObj {
for (size_t i = 0; i < labels.size(); ++i) { for (size_t i = 0; i < labels.size(); ++i) {
const unsigned rel = labels[i]; const unsigned rel = labels[i];
if (rel != 0) { if (rel != 0) {
sumdcg += ((1<<rel)-1) / logf(i + 2); sumdcg += ((1 << rel) - 1) / logf(i + 2);
} }
} }
return static_cast<float>(sumdcg); return static_cast<float>(sumdcg);
@ -442,11 +442,17 @@ class LambdaRankObjMAP : public LambdaRankObj {
protected: protected:
struct MAPStats { struct MAPStats {
/* \brief the accumulated precision */ /*! \brief the accumulated precision */
float ap_acc; float ap_acc;
/* \brief the accumulated precision assuming a positive instance is missing */ /*!
* \brief the accumulated precision,
* assuming a positive instance is missing
*/
float ap_acc_miss; float ap_acc_miss;
/* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/ /*!
* \brief the accumulated precision,
* assuming that one more positive instance is inserted ahead
*/
float ap_acc_add; float ap_acc_add;
/* \brief the accumulated positive instance count */ /* \brief the accumulated positive instance count */
float hits; float hits;
@ -454,7 +460,7 @@ class LambdaRankObjMAP : public LambdaRankObj {
MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits) MAPStats(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits)
: ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {} : ap_acc(ap_acc), ap_acc_miss(ap_acc_miss), ap_acc_add(ap_acc_add), hits(hits) {}
}; };
/* /*!
* \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2 * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
* in sorted triples * in sorted triples
* \param sorted_list the list containing entry information * \param sorted_list the list containing entry information
@ -463,7 +469,8 @@ class LambdaRankObjMAP : public LambdaRankObj {
*/ */
inline float GetLambdaMAP(const std::vector<ListEntry> &sorted_list, inline float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
int index1, int index2, int index1, int index2,
std::vector<MAPStats> &map_stats){ std::vector<MAPStats> *p_map_stats) {
std::vector<MAPStats> &map_stats = *p_map_stats;
if (index1 == index2 || map_stats[map_stats.size() - 1].hits == 0) { if (index1 == index2 || map_stats[map_stats.size() - 1].hits == 0) {
return 0.0f; return 0.0f;
} }
@ -482,7 +489,6 @@ class LambdaRankObjMAP : public LambdaRankObj {
changed += map_stats[index2 - 1].ap_acc_miss - map_stats[index1].ap_acc_miss; changed += map_stats[index2 - 1].ap_acc_miss - map_stats[index1].ap_acc_miss;
changed += map_stats[index2].hits / (index2 + 1); changed += map_stats[index2].hits / (index2 + 1);
} }
float ans = (changed - original) / (map_stats[map_stats.size() - 1].hits); float ans = (changed - original) / (map_stats[map_stats.size() - 1].hits);
if (ans < 0) ans = -ans; if (ans < 0) ans = -ans;
return ans; return ans;
@ -493,7 +499,8 @@ class LambdaRankObjMAP : public LambdaRankObj {
* \param map_stats a vector containing the accumulated precisions for each position in a list * \param map_stats a vector containing the accumulated precisions for each position in a list
*/ */
inline void GetMAPStats(const std::vector<ListEntry> &sorted_list, inline void GetMAPStats(const std::vector<ListEntry> &sorted_list,
std::vector<MAPStats> &map_acc){ std::vector<MAPStats> *p_map_acc) {
std::vector<MAPStats> &map_acc = *p_map_acc;
map_acc.resize(sorted_list.size()); map_acc.resize(sorted_list.size());
float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0; float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
for (size_t i = 1; i <= sorted_list.size(); ++i) { for (size_t i = 1; i <= sorted_list.size(); ++i) {
@ -503,16 +510,18 @@ class LambdaRankObjMAP : public LambdaRankObj {
acc2 += (hit - 1) / i; acc2 += (hit - 1) / i;
acc3 += (hit + 1) / i; acc3 += (hit + 1) / i;
} }
map_acc[i - 1] = MAPStats(acc1,acc2,acc3,hit); map_acc[i - 1] = MAPStats(acc1, acc2, acc3, hit);
} }
} }
virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> *io_pairs) { virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
std::vector<LambdaPair> *io_pairs) {
std::vector<LambdaPair> &pairs = *io_pairs; std::vector<LambdaPair> &pairs = *io_pairs;
std::vector<MAPStats> map_stats; std::vector<MAPStats> map_stats;
GetMAPStats(sorted_list, map_stats); GetMAPStats(sorted_list, &map_stats);
for (size_t i = 0; i < pairs.size(); ++i) { for (size_t i = 0; i < pairs.size(); ++i) {
pairs[i].weight = pairs[i].weight =
GetLambdaMAP(sorted_list, pairs[i].pos_index, pairs[i].neg_index, map_stats); GetLambdaMAP(sorted_list, pairs[i].pos_index,
pairs[i].neg_index, &map_stats);
} }
} }
}; };