Merge branch 'dev' of ssh://github.com/tqchen/xgboost into dev
This commit is contained in:
commit
31c5d7843f
@ -2,7 +2,7 @@
|
|||||||
# choose the tree booster, 0: tree, 1: linear
|
# choose the tree booster, 0: tree, 1: linear
|
||||||
booster_type = 0
|
booster_type = 0
|
||||||
|
|
||||||
# so far, we have pairwise rank
|
# specify objective
|
||||||
objective="rank:pairwise"
|
objective="rank:pairwise"
|
||||||
|
|
||||||
# Tree Booster Parameters
|
# Tree Booster Parameters
|
||||||
|
|||||||
@ -1,14 +1,8 @@
|
|||||||
#Download the dataset from web site
|
python trans_data.py train.txt mq2008.train mq2008.train.group
|
||||||
wget http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2008.rar
|
|
||||||
|
|
||||||
#please first install the unrar package
|
python trans_data.py test.txt mq2008.test mq2008.test.group
|
||||||
unrar x MQ2008
|
|
||||||
|
|
||||||
python trans_data.py MQ2008/Fold1/train.txt mq2008.train mq2008.train.group
|
python trans_data.py vali.txt mq2008.vali mq2008.vali.group
|
||||||
|
|
||||||
python trans_data.py MQ2008/Fold1/test.txt mq2008.test mq2008.test.group
|
|
||||||
|
|
||||||
python trans_data.py MQ2008/Fold1/vali.txt mq2008.vali mq2008.vali.group
|
|
||||||
|
|
||||||
../../xgboost mq2008.conf
|
../../xgboost mq2008.conf
|
||||||
|
|
||||||
|
|||||||
@ -116,7 +116,11 @@ namespace xgboost{
|
|||||||
if( !strcmp("multi:softmax", name ) ) return new SoftmaxMultiClassObj();
|
if( !strcmp("multi:softmax", name ) ) return new SoftmaxMultiClassObj();
|
||||||
if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
|
if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
|
||||||
if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj();
|
if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj();
|
||||||
utils::Error("unknown objective function type");
|
if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
|
||||||
|
if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj();
|
||||||
|
if( !strcmp("rank:map", name ) ) return new LambdaRankObj_MAP();
|
||||||
|
if( !strcmp("rank:ndcg", name ) ) return new LambdaRankObj_NDCG();
|
||||||
|
utils::Error("unknown objective function type");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@ -330,6 +330,163 @@ namespace xgboost{
|
|||||||
virtual ~PairwiseRankObj(void){}
|
virtual ~PairwiseRankObj(void){}
|
||||||
virtual void GetLambdaWeight( const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs ){}
|
virtual void GetLambdaWeight( const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs ){}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class LambdaRankObj_NDCG : public LambdaRankObj{
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual ~LambdaRankObj_NDCG(void){}
|
||||||
|
|
||||||
|
inline float CalcDCG( const std::vector<float> &labels ){
|
||||||
|
double sumdcg = 0.0;
|
||||||
|
for( size_t i = 0; i < labels.size(); i ++ ){
|
||||||
|
const unsigned rel = labels[i];
|
||||||
|
if( rel != 0 ){
|
||||||
|
sumdcg += logf(2.0f) * ((1<<rel)-1) / logf( i + 2 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return static_cast<float>(sumdcg);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float GetIDCG(const std::vector<ListEntry> &sorted_list){
|
||||||
|
std::vector<float> labels;
|
||||||
|
for (size_t i = 0; i < sorted_list.size(); i++){
|
||||||
|
labels.push_back(sorted_list[i].label);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(labels.begin(), labels.end(), std::greater<float>());
|
||||||
|
return CalcDCG(labels);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* \brief Obtain the delta NDCG if trying to switch the positions of instances in index1 or index2
|
||||||
|
* in sorted triples. Here DCG is calculated as sigma_i 2^rel_i/log(i + 1)
|
||||||
|
* \param sorted_list the list containing entry information
|
||||||
|
* \param index1,index2 the instances switched
|
||||||
|
* \param the IDCG of the list
|
||||||
|
*/
|
||||||
|
inline float GetLambdaNDCG(const std::vector<ListEntry> &sorted_list,
|
||||||
|
int index1,
|
||||||
|
int index2, float IDCG){
|
||||||
|
double original = (1 << static_cast<int>(sorted_list[index1].label)) / log(index1 + 2)
|
||||||
|
+ (1 << static_cast<int>(sorted_list[index2].label)) / log(index2 + 2);
|
||||||
|
double changed = (1 << static_cast<int>(sorted_list[index2].label)) / log(index1 + 2)
|
||||||
|
+ (1 << static_cast<int>(sorted_list[index1].label)) / log(index2 + 2);
|
||||||
|
double ans = (original - changed) / IDCG;
|
||||||
|
if (ans < 0) ans = -ans;
|
||||||
|
return static_cast<float>(ans);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs){
|
||||||
|
float IDCG = GetIDCG(sorted_list);
|
||||||
|
for (size_t i = 0; i < pairs.size(); i++){
|
||||||
|
pairs[i].weight = GetLambdaNDCG(sorted_list,
|
||||||
|
pairs[i].pos_index, pairs[i].neg_index, IDCG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
class LambdaRankObj_MAP : public LambdaRankObj{
|
||||||
|
|
||||||
|
class Quadruple{
|
||||||
|
public:
|
||||||
|
/* \brief the accumulated precision */
|
||||||
|
float ap_acc_;
|
||||||
|
/* \brief the accumulated precision assuming a positive instance is missing*/
|
||||||
|
float ap_acc_miss_;
|
||||||
|
/* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/
|
||||||
|
float ap_acc_add_;
|
||||||
|
/* \brief the accumulated positive instance count */
|
||||||
|
float hits_;
|
||||||
|
|
||||||
|
Quadruple(){}
|
||||||
|
|
||||||
|
Quadruple(const Quadruple& q){
|
||||||
|
ap_acc_ = q.ap_acc_;
|
||||||
|
ap_acc_miss_ = q.ap_acc_miss_;
|
||||||
|
ap_acc_add_ = q.ap_acc_add_;
|
||||||
|
hits_ = q.hits_;
|
||||||
|
}
|
||||||
|
|
||||||
|
Quadruple(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits
|
||||||
|
) :ap_acc_(ap_acc), ap_acc_miss_(ap_acc_miss), ap_acc_add_(ap_acc_add), hits_(hits){
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual ~LambdaRankObj_MAP(void){}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
|
||||||
|
* in sorted triples
|
||||||
|
* \param sorted_list the list containing entry information
|
||||||
|
* \param index1,index2 the instances switched
|
||||||
|
* \param map_acc a vector containing the accumulated precisions for each position in a list
|
||||||
|
*/
|
||||||
|
inline float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
|
||||||
|
int index1, int index2,
|
||||||
|
std::vector< Quadruple > &map_acc){
|
||||||
|
if (index1 == index2
|
||||||
|
|| sorted_list[index1].label == sorted_list[index2].label
|
||||||
|
|| map_acc[map_acc.size() - 1].hits_ == 0
|
||||||
|
) return 0.0;
|
||||||
|
if (index1 > index2) std::swap(index1, index2);
|
||||||
|
float original = map_acc[index2].ap_acc_; // The accumulated precision in the interval [index1,index2]
|
||||||
|
if (index1 != 0) original -= map_acc[index1 - 1].ap_acc_;
|
||||||
|
float changed = 0;
|
||||||
|
if (sorted_list[index1].label < sorted_list[index2].label){
|
||||||
|
changed += map_acc[index2 - 1].ap_acc_add_ - map_acc[index1].ap_acc_add_;
|
||||||
|
changed += (map_acc[index1].hits_ + 1.0f) / (index1 + 1);
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
changed += map_acc[index2 - 1].ap_acc_miss_ - map_acc[index1].ap_acc_miss_;
|
||||||
|
changed += map_acc[index2].hits_ / (index2 + 1);
|
||||||
|
}
|
||||||
|
if(map_acc[map_acc.size() - 1].hits_ == 0) printf("haha\n");
|
||||||
|
|
||||||
|
float ans = (changed - original) / (map_acc[map_acc.size() - 1].hits_);
|
||||||
|
if (ans < 0) ans = -ans;
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* \brief preprocessing results for calculating delta MAP
|
||||||
|
* \return The first field is the accumulated precision, the second field is the
|
||||||
|
* accumulated precision assuming a positive instance is missing,
|
||||||
|
* the third field is the accumulated precision assuming that one more positive
|
||||||
|
* instance is inserted, the fourth field is the accumulated positive instance count
|
||||||
|
*/
|
||||||
|
inline void GetMAPAcc(const std::vector<ListEntry> &sorted_list,
|
||||||
|
std::vector< Quadruple > &map_acc){
|
||||||
|
map_acc.resize(sorted_list.size());
|
||||||
|
float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
|
||||||
|
for (size_t i = 1; i <= sorted_list.size(); i++){
|
||||||
|
if ((int)sorted_list[i - 1].label > 0) {
|
||||||
|
hit++;
|
||||||
|
acc1 += hit / i;
|
||||||
|
acc2 += (hit - 1) / i;
|
||||||
|
acc3 += (hit + 1) / i;
|
||||||
|
}
|
||||||
|
map_acc[i - 1].ap_acc_ = acc1;
|
||||||
|
map_acc[i - 1].ap_acc_miss_ = acc2;
|
||||||
|
map_acc[i - 1].ap_acc_add_ = acc3;
|
||||||
|
map_acc[i - 1].hits_ = hit;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs){
|
||||||
|
std::vector< Quadruple > map_acc;
|
||||||
|
GetMAPAcc(sorted_list, map_acc);
|
||||||
|
for (size_t i = 0; i < pairs.size(); i++){
|
||||||
|
pairs[i].weight = GetLambdaMAP(sorted_list, pairs[i].pos_index, pairs[i].neg_index, map_acc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user