diff --git a/Makefile b/Makefile index 75a38ae6c..780066aca 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ OBJ = all: $(BIN) $(OBJ) export LDFLAGS= -pthread -lm -xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h booster/*.h booster/*/*.hpp booster/*.hpp +xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h regrank/*.hpp booster/*.h booster/*/*.hpp booster/*.hpp $(BIN) : diff --git a/demo/rank/mq2008.conf b/demo/rank/mq2008.conf index 91ccea511..107a5ae44 100644 --- a/demo/rank/mq2008.conf +++ b/demo/rank/mq2008.conf @@ -2,9 +2,9 @@ # choose the tree booster, 0: tree, 1: linear booster_type = 0 -objective="rank:pairwise" +#objective="rank:pairwise" #objective="rank:softmax" -#objective="lambdarank:map" +objective="lambdarank:map" #objective="lambdarank:ndcg" # Tree Booster Parameters @@ -13,7 +13,7 @@ bst:eta = 0.1 # minimum loss reduction required to make a further partition bst:gamma = 1.0 # minimum sum of instance weight(hessian) needed in a child -bst:min_child_weight = 1 +bst:min_child_weight = 0.1 # maximum depth of a tree bst:max_depth = 6 eval_metric = "ndcg" diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp index 7c49b063a..2995a03fe 100644 --- a/regrank/xgboost_regrank_obj.hpp +++ b/regrank/xgboost_regrank_obj.hpp @@ -261,7 +261,7 @@ namespace xgboost{ } } virtual const char* DefaultEvalMetric(void) { - return "ndcg"; + return "map"; } private: inline void AddGradient( unsigned pid, unsigned nid, @@ -284,25 +284,38 @@ namespace xgboost{ LossType loss; }; }; - - + namespace regrank{ class LambdaRankObj : public IObjFunction{ public: - LambdaRankObj(void){} - + LambdaRankObj(void){ + loss_.loss_type = LossType::kLogisticRaw; + } virtual ~LambdaRankObj(){} - virtual void SetParam(const char *name, const char *val){ - if( !strcmp( "loss_type", name ) ) loss_.loss_type = atoi( val ); + if( !strcmp( "loss_type", name ) ) loss_.loss_type = atoi( val ); if( !strcmp( "fix_list_weight", name ) ) fix_list_weight_ = (float)atof( val ); - - } + } private: LossType loss_; float fix_list_weight_; protected: - + /*! \brief helper information in a list */ + struct ListEntry{ + /*! \brief the predict score we in the data */ + float pred; + /*! \brief the actual label of the entry */ + float label; + /*! \brief row index in the data matrix */ + unsigned rindex; + // constructor + ListEntry(float pred, float label, unsigned rindex): pred(pred),label(label),rindex(rindex){} + // comparator by prediction + inline bool operator<(const ListEntry &p) const{ + return pred > p.pred; + } + }; + class Triple{ public: float pred_; @@ -388,30 +401,30 @@ namespace xgboost{ grad[pairs[i].first] += first_order_gradient; hess[pairs[i].second] += second_order_gradient; grad[pairs[i].second] -= first_order_gradient; - - } - + + } + if( fix_list_weight_ != 0.0f ){ float scale = fix_list_weight_ / (group_index[group+1] - group_index[group]); for(unsigned j = group_index[group]; j < group_index[group+1]; ++j ){ grad[j] *= scale; - hess[j] *= scale; + hess[j] *= scale; } } } - - virtual void GenPairs(const std::vector& preds, - const std::vector& labels, - const int &start, const int &end, - std::vector< std::pair > &pairs){ + + virtual void GenPairs(const std::vector& preds, + const std::vector& labels, + const int &start, const int &end, + std::vector< std::pair > &pairs){ - random::Random rnd; rnd.Seed(0); - std::vector< std::pair > rec; + random::Random rnd; rnd.Seed(0); + std::vector< std::pair > rec; for(int j = start; j < end; ++j ){ rec.push_back( std::make_pair(labels[j], j) ); } - - std::sort( rec.begin(), rec.end(), CmpFirst ); + + std::sort( rec.begin(), rec.end(), CmpFirst ); // enumerate buckets with same label, for each item in the list, grab another sample randomly for( unsigned i = 0; i < rec.size(); ){ unsigned j = i + 1; @@ -422,17 +435,15 @@ namespace xgboost{ unsigned ridx = static_cast( rnd.RandDouble() * (nleft+nright) ); if( ridx < nleft ){ // get the samples in left side, ridx is pos sample - pairs.push_back(std::make_pair(rec[ridx].second, rec[pid].second)); + pairs.push_back(std::make_pair(rec[ridx].second, rec[pid].second)); }else{ // get samples in right side, ridx is negsample - pairs.push_back(std::make_pair(rec[pid].second, rec[ridx+j-i].second)); + pairs.push_back(std::make_pair(rec[pid].second, rec[ridx+j-i].second)); } } i = j; } - } - - + } public: virtual void GetGradient(const std::vector& preds, const DMatrix::Info &info, @@ -445,11 +456,10 @@ namespace xgboost{ for (size_t i = 0; i < group_index.size() - 1; i++){ std::vector< std::pair > pairs; - GenPairs(preds, info.labels, group_index[i], group_index[i + 1],pairs); + GenPairs(preds, info.labels, group_index[i], group_index[i + 1],pairs); GetGroupGradient(preds, info.labels, group_index, grad, hess, pairs, i); } - } - + } virtual const char* DefaultEvalMetric(void) { return "auc"; } @@ -497,7 +507,7 @@ namespace xgboost{ std::sort(labels.begin(), labels.end(), std::greater()); return CalcDCG(labels); } - + inline void GetLambda(const std::vector &preds, const std::vector &labels, const std::vector &group_index,