This commit is contained in:
tqchen 2014-05-15 18:56:28 -07:00
parent e2d13db24e
commit c22df2b31a
3 changed files with 46 additions and 36 deletions

View File

@ -10,7 +10,7 @@ OBJ =
all: $(BIN) $(OBJ) all: $(BIN) $(OBJ)
export LDFLAGS= -pthread -lm export LDFLAGS= -pthread -lm
xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h booster/*.h booster/*/*.hpp booster/*.hpp xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h regrank/*.hpp booster/*.h booster/*/*.hpp booster/*.hpp
$(BIN) : $(BIN) :

View File

@ -2,9 +2,9 @@
# choose the tree booster, 0: tree, 1: linear # choose the tree booster, 0: tree, 1: linear
booster_type = 0 booster_type = 0
objective="rank:pairwise" #objective="rank:pairwise"
#objective="rank:softmax" #objective="rank:softmax"
#objective="lambdarank:map" objective="lambdarank:map"
#objective="lambdarank:ndcg" #objective="lambdarank:ndcg"
# Tree Booster Parameters # Tree Booster Parameters
@ -13,7 +13,7 @@ bst:eta = 0.1
# minimum loss reduction required to make a further partition # minimum loss reduction required to make a further partition
bst:gamma = 1.0 bst:gamma = 1.0
# minimum sum of instance weight(hessian) needed in a child # minimum sum of instance weight(hessian) needed in a child
bst:min_child_weight = 1 bst:min_child_weight = 0.1
# maximum depth of a tree # maximum depth of a tree
bst:max_depth = 6 bst:max_depth = 6
eval_metric = "ndcg" eval_metric = "ndcg"

View File

@ -261,7 +261,7 @@ namespace xgboost{
} }
} }
virtual const char* DefaultEvalMetric(void) { virtual const char* DefaultEvalMetric(void) {
return "ndcg"; return "map";
} }
private: private:
inline void AddGradient( unsigned pid, unsigned nid, inline void AddGradient( unsigned pid, unsigned nid,
@ -284,25 +284,38 @@ namespace xgboost{
LossType loss; LossType loss;
}; };
}; };
namespace regrank{ namespace regrank{
class LambdaRankObj : public IObjFunction{ class LambdaRankObj : public IObjFunction{
public: public:
LambdaRankObj(void){} LambdaRankObj(void){
loss_.loss_type = LossType::kLogisticRaw;
}
virtual ~LambdaRankObj(){} virtual ~LambdaRankObj(){}
virtual void SetParam(const char *name, const char *val){ virtual void SetParam(const char *name, const char *val){
if( !strcmp( "loss_type", name ) ) loss_.loss_type = atoi( val ); if( !strcmp( "loss_type", name ) ) loss_.loss_type = atoi( val );
if( !strcmp( "fix_list_weight", name ) ) fix_list_weight_ = (float)atof( val ); if( !strcmp( "fix_list_weight", name ) ) fix_list_weight_ = (float)atof( val );
}
}
private: private:
LossType loss_; LossType loss_;
float fix_list_weight_; float fix_list_weight_;
protected: protected:
/*! \brief helper information in a list */
struct ListEntry{
/*! \brief the predict score we in the data */
float pred;
/*! \brief the actual label of the entry */
float label;
/*! \brief row index in the data matrix */
unsigned rindex;
// constructor
ListEntry(float pred, float label, unsigned rindex): pred(pred),label(label),rindex(rindex){}
// comparator by prediction
inline bool operator<(const ListEntry &p) const{
return pred > p.pred;
}
};
class Triple{ class Triple{
public: public:
float pred_; float pred_;
@ -388,30 +401,30 @@ namespace xgboost{
grad[pairs[i].first] += first_order_gradient; grad[pairs[i].first] += first_order_gradient;
hess[pairs[i].second] += second_order_gradient; hess[pairs[i].second] += second_order_gradient;
grad[pairs[i].second] -= first_order_gradient; grad[pairs[i].second] -= first_order_gradient;
} }
if( fix_list_weight_ != 0.0f ){ if( fix_list_weight_ != 0.0f ){
float scale = fix_list_weight_ / (group_index[group+1] - group_index[group]); float scale = fix_list_weight_ / (group_index[group+1] - group_index[group]);
for(unsigned j = group_index[group]; j < group_index[group+1]; ++j ){ for(unsigned j = group_index[group]; j < group_index[group+1]; ++j ){
grad[j] *= scale; grad[j] *= scale;
hess[j] *= scale; hess[j] *= scale;
} }
} }
} }
virtual void GenPairs(const std::vector<float>& preds, virtual void GenPairs(const std::vector<float>& preds,
const std::vector<float>& labels, const std::vector<float>& labels,
const int &start, const int &end, const int &start, const int &end,
std::vector< std::pair<int,int> > &pairs){ std::vector< std::pair<int,int> > &pairs){
random::Random rnd; rnd.Seed(0); random::Random rnd; rnd.Seed(0);
std::vector< std::pair<float,unsigned> > rec; std::vector< std::pair<float,unsigned> > rec;
for(int j = start; j < end; ++j ){ for(int j = start; j < end; ++j ){
rec.push_back( std::make_pair(labels[j], j) ); rec.push_back( std::make_pair(labels[j], j) );
} }
std::sort( rec.begin(), rec.end(), CmpFirst ); std::sort( rec.begin(), rec.end(), CmpFirst );
// enumerate buckets with same label, for each item in the list, grab another sample randomly // enumerate buckets with same label, for each item in the list, grab another sample randomly
for( unsigned i = 0; i < rec.size(); ){ for( unsigned i = 0; i < rec.size(); ){
unsigned j = i + 1; unsigned j = i + 1;
@ -422,17 +435,15 @@ namespace xgboost{
unsigned ridx = static_cast<int>( rnd.RandDouble() * (nleft+nright) ); unsigned ridx = static_cast<int>( rnd.RandDouble() * (nleft+nright) );
if( ridx < nleft ){ if( ridx < nleft ){
// get the samples in left side, ridx is pos sample // get the samples in left side, ridx is pos sample
pairs.push_back(std::make_pair(rec[ridx].second, rec[pid].second)); pairs.push_back(std::make_pair(rec[ridx].second, rec[pid].second));
}else{ }else{
// get samples in right side, ridx is negsample // get samples in right side, ridx is negsample
pairs.push_back(std::make_pair(rec[pid].second, rec[ridx+j-i].second)); pairs.push_back(std::make_pair(rec[pid].second, rec[ridx+j-i].second));
} }
} }
i = j; i = j;
} }
} }
public: public:
virtual void GetGradient(const std::vector<float>& preds, virtual void GetGradient(const std::vector<float>& preds,
const DMatrix::Info &info, const DMatrix::Info &info,
@ -445,11 +456,10 @@ namespace xgboost{
for (size_t i = 0; i < group_index.size() - 1; i++){ for (size_t i = 0; i < group_index.size() - 1; i++){
std::vector< std::pair<int,int> > pairs; std::vector< std::pair<int,int> > pairs;
GenPairs(preds, info.labels, group_index[i], group_index[i + 1],pairs); GenPairs(preds, info.labels, group_index[i], group_index[i + 1],pairs);
GetGroupGradient(preds, info.labels, group_index, grad, hess, pairs, i); GetGroupGradient(preds, info.labels, group_index, grad, hess, pairs, i);
} }
} }
virtual const char* DefaultEvalMetric(void) { virtual const char* DefaultEvalMetric(void) {
return "auc"; return "auc";
} }
@ -497,7 +507,7 @@ namespace xgboost{
std::sort(labels.begin(), labels.end(), std::greater<float>()); std::sort(labels.begin(), labels.end(), std::greater<float>());
return CalcDCG(labels); return CalcDCG(labels);
} }
inline void GetLambda(const std::vector<float> &preds, inline void GetLambda(const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
const std::vector<unsigned> &group_index, const std::vector<unsigned> &group_index,