ok
This commit is contained in:
parent
e2d13db24e
commit
c22df2b31a
2
Makefile
2
Makefile
@ -10,7 +10,7 @@ OBJ =
|
||||
all: $(BIN) $(OBJ)
|
||||
export LDFLAGS= -pthread -lm
|
||||
|
||||
xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h booster/*.h booster/*/*.hpp booster/*.hpp
|
||||
xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h regrank/*.hpp booster/*.h booster/*/*.hpp booster/*.hpp
|
||||
|
||||
|
||||
$(BIN) :
|
||||
|
||||
@ -2,9 +2,9 @@
|
||||
# choose the tree booster, 0: tree, 1: linear
|
||||
booster_type = 0
|
||||
|
||||
objective="rank:pairwise"
|
||||
#objective="rank:pairwise"
|
||||
#objective="rank:softmax"
|
||||
#objective="lambdarank:map"
|
||||
objective="lambdarank:map"
|
||||
#objective="lambdarank:ndcg"
|
||||
|
||||
# Tree Booster Parameters
|
||||
@ -13,7 +13,7 @@ bst:eta = 0.1
|
||||
# minimum loss reduction required to make a further partition
|
||||
bst:gamma = 1.0
|
||||
# minimum sum of instance weight(hessian) needed in a child
|
||||
bst:min_child_weight = 1
|
||||
bst:min_child_weight = 0.1
|
||||
# maximum depth of a tree
|
||||
bst:max_depth = 6
|
||||
eval_metric = "ndcg"
|
||||
|
||||
@ -261,7 +261,7 @@ namespace xgboost{
|
||||
}
|
||||
}
|
||||
virtual const char* DefaultEvalMetric(void) {
|
||||
return "ndcg";
|
||||
return "map";
|
||||
}
|
||||
private:
|
||||
inline void AddGradient( unsigned pid, unsigned nid,
|
||||
@ -284,25 +284,38 @@ namespace xgboost{
|
||||
LossType loss;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
namespace regrank{
|
||||
class LambdaRankObj : public IObjFunction{
|
||||
public:
|
||||
LambdaRankObj(void){}
|
||||
|
||||
LambdaRankObj(void){
|
||||
loss_.loss_type = LossType::kLogisticRaw;
|
||||
}
|
||||
virtual ~LambdaRankObj(){}
|
||||
|
||||
virtual void SetParam(const char *name, const char *val){
|
||||
if( !strcmp( "loss_type", name ) ) loss_.loss_type = atoi( val );
|
||||
if( !strcmp( "loss_type", name ) ) loss_.loss_type = atoi( val );
|
||||
if( !strcmp( "fix_list_weight", name ) ) fix_list_weight_ = (float)atof( val );
|
||||
|
||||
}
|
||||
}
|
||||
private:
|
||||
LossType loss_;
|
||||
float fix_list_weight_;
|
||||
protected:
|
||||
|
||||
/*! \brief helper information in a list */
|
||||
struct ListEntry{
|
||||
/*! \brief the predict score we in the data */
|
||||
float pred;
|
||||
/*! \brief the actual label of the entry */
|
||||
float label;
|
||||
/*! \brief row index in the data matrix */
|
||||
unsigned rindex;
|
||||
// constructor
|
||||
ListEntry(float pred, float label, unsigned rindex): pred(pred),label(label),rindex(rindex){}
|
||||
// comparator by prediction
|
||||
inline bool operator<(const ListEntry &p) const{
|
||||
return pred > p.pred;
|
||||
}
|
||||
};
|
||||
|
||||
class Triple{
|
||||
public:
|
||||
float pred_;
|
||||
@ -388,30 +401,30 @@ namespace xgboost{
|
||||
grad[pairs[i].first] += first_order_gradient;
|
||||
hess[pairs[i].second] += second_order_gradient;
|
||||
grad[pairs[i].second] -= first_order_gradient;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
if( fix_list_weight_ != 0.0f ){
|
||||
float scale = fix_list_weight_ / (group_index[group+1] - group_index[group]);
|
||||
for(unsigned j = group_index[group]; j < group_index[group+1]; ++j ){
|
||||
grad[j] *= scale;
|
||||
hess[j] *= scale;
|
||||
hess[j] *= scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual void GenPairs(const std::vector<float>& preds,
|
||||
const std::vector<float>& labels,
|
||||
const int &start, const int &end,
|
||||
std::vector< std::pair<int,int> > &pairs){
|
||||
|
||||
virtual void GenPairs(const std::vector<float>& preds,
|
||||
const std::vector<float>& labels,
|
||||
const int &start, const int &end,
|
||||
std::vector< std::pair<int,int> > &pairs){
|
||||
|
||||
random::Random rnd; rnd.Seed(0);
|
||||
std::vector< std::pair<float,unsigned> > rec;
|
||||
random::Random rnd; rnd.Seed(0);
|
||||
std::vector< std::pair<float,unsigned> > rec;
|
||||
for(int j = start; j < end; ++j ){
|
||||
rec.push_back( std::make_pair(labels[j], j) );
|
||||
}
|
||||
|
||||
std::sort( rec.begin(), rec.end(), CmpFirst );
|
||||
|
||||
std::sort( rec.begin(), rec.end(), CmpFirst );
|
||||
// enumerate buckets with same label, for each item in the list, grab another sample randomly
|
||||
for( unsigned i = 0; i < rec.size(); ){
|
||||
unsigned j = i + 1;
|
||||
@ -422,17 +435,15 @@ namespace xgboost{
|
||||
unsigned ridx = static_cast<int>( rnd.RandDouble() * (nleft+nright) );
|
||||
if( ridx < nleft ){
|
||||
// get the samples in left side, ridx is pos sample
|
||||
pairs.push_back(std::make_pair(rec[ridx].second, rec[pid].second));
|
||||
pairs.push_back(std::make_pair(rec[ridx].second, rec[pid].second));
|
||||
}else{
|
||||
// get samples in right side, ridx is negsample
|
||||
pairs.push_back(std::make_pair(rec[pid].second, rec[ridx+j-i].second));
|
||||
pairs.push_back(std::make_pair(rec[pid].second, rec[ridx+j-i].second));
|
||||
}
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
public:
|
||||
virtual void GetGradient(const std::vector<float>& preds,
|
||||
const DMatrix::Info &info,
|
||||
@ -445,11 +456,10 @@ namespace xgboost{
|
||||
|
||||
for (size_t i = 0; i < group_index.size() - 1; i++){
|
||||
std::vector< std::pair<int,int> > pairs;
|
||||
GenPairs(preds, info.labels, group_index[i], group_index[i + 1],pairs);
|
||||
GenPairs(preds, info.labels, group_index[i], group_index[i + 1],pairs);
|
||||
GetGroupGradient(preds, info.labels, group_index, grad, hess, pairs, i);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
virtual const char* DefaultEvalMetric(void) {
|
||||
return "auc";
|
||||
}
|
||||
@ -497,7 +507,7 @@ namespace xgboost{
|
||||
std::sort(labels.begin(), labels.end(), std::greater<float>());
|
||||
return CalcDCG(labels);
|
||||
}
|
||||
|
||||
|
||||
inline void GetLambda(const std::vector<float> &preds,
|
||||
const std::vector<float> &labels,
|
||||
const std::vector<unsigned> &group_index,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user