ok
This commit is contained in:
parent
e2d13db24e
commit
c22df2b31a
2
Makefile
2
Makefile
@ -10,7 +10,7 @@ OBJ =
|
|||||||
all: $(BIN) $(OBJ)
|
all: $(BIN) $(OBJ)
|
||||||
export LDFLAGS= -pthread -lm
|
export LDFLAGS= -pthread -lm
|
||||||
|
|
||||||
xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h booster/*.h booster/*/*.hpp booster/*.hpp
|
xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h regrank/*.hpp booster/*.h booster/*/*.hpp booster/*.hpp
|
||||||
|
|
||||||
|
|
||||||
$(BIN) :
|
$(BIN) :
|
||||||
|
|||||||
@ -2,9 +2,9 @@
|
|||||||
# choose the tree booster, 0: tree, 1: linear
|
# choose the tree booster, 0: tree, 1: linear
|
||||||
booster_type = 0
|
booster_type = 0
|
||||||
|
|
||||||
objective="rank:pairwise"
|
#objective="rank:pairwise"
|
||||||
#objective="rank:softmax"
|
#objective="rank:softmax"
|
||||||
#objective="lambdarank:map"
|
objective="lambdarank:map"
|
||||||
#objective="lambdarank:ndcg"
|
#objective="lambdarank:ndcg"
|
||||||
|
|
||||||
# Tree Booster Parameters
|
# Tree Booster Parameters
|
||||||
@ -13,7 +13,7 @@ bst:eta = 0.1
|
|||||||
# minimum loss reduction required to make a further partition
|
# minimum loss reduction required to make a further partition
|
||||||
bst:gamma = 1.0
|
bst:gamma = 1.0
|
||||||
# minimum sum of instance weight(hessian) needed in a child
|
# minimum sum of instance weight(hessian) needed in a child
|
||||||
bst:min_child_weight = 1
|
bst:min_child_weight = 0.1
|
||||||
# maximum depth of a tree
|
# maximum depth of a tree
|
||||||
bst:max_depth = 6
|
bst:max_depth = 6
|
||||||
eval_metric = "ndcg"
|
eval_metric = "ndcg"
|
||||||
|
|||||||
@ -261,7 +261,7 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
virtual const char* DefaultEvalMetric(void) {
|
virtual const char* DefaultEvalMetric(void) {
|
||||||
return "ndcg";
|
return "map";
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
inline void AddGradient( unsigned pid, unsigned nid,
|
inline void AddGradient( unsigned pid, unsigned nid,
|
||||||
@ -284,25 +284,38 @@ namespace xgboost{
|
|||||||
LossType loss;
|
LossType loss;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
namespace regrank{
|
namespace regrank{
|
||||||
class LambdaRankObj : public IObjFunction{
|
class LambdaRankObj : public IObjFunction{
|
||||||
public:
|
public:
|
||||||
LambdaRankObj(void){}
|
LambdaRankObj(void){
|
||||||
|
loss_.loss_type = LossType::kLogisticRaw;
|
||||||
|
}
|
||||||
virtual ~LambdaRankObj(){}
|
virtual ~LambdaRankObj(){}
|
||||||
|
|
||||||
virtual void SetParam(const char *name, const char *val){
|
virtual void SetParam(const char *name, const char *val){
|
||||||
if( !strcmp( "loss_type", name ) ) loss_.loss_type = atoi( val );
|
if( !strcmp( "loss_type", name ) ) loss_.loss_type = atoi( val );
|
||||||
if( !strcmp( "fix_list_weight", name ) ) fix_list_weight_ = (float)atof( val );
|
if( !strcmp( "fix_list_weight", name ) ) fix_list_weight_ = (float)atof( val );
|
||||||
|
}
|
||||||
}
|
|
||||||
private:
|
private:
|
||||||
LossType loss_;
|
LossType loss_;
|
||||||
float fix_list_weight_;
|
float fix_list_weight_;
|
||||||
protected:
|
protected:
|
||||||
|
/*! \brief helper information in a list */
|
||||||
|
struct ListEntry{
|
||||||
|
/*! \brief the predict score we in the data */
|
||||||
|
float pred;
|
||||||
|
/*! \brief the actual label of the entry */
|
||||||
|
float label;
|
||||||
|
/*! \brief row index in the data matrix */
|
||||||
|
unsigned rindex;
|
||||||
|
// constructor
|
||||||
|
ListEntry(float pred, float label, unsigned rindex): pred(pred),label(label),rindex(rindex){}
|
||||||
|
// comparator by prediction
|
||||||
|
inline bool operator<(const ListEntry &p) const{
|
||||||
|
return pred > p.pred;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class Triple{
|
class Triple{
|
||||||
public:
|
public:
|
||||||
float pred_;
|
float pred_;
|
||||||
@ -388,30 +401,30 @@ namespace xgboost{
|
|||||||
grad[pairs[i].first] += first_order_gradient;
|
grad[pairs[i].first] += first_order_gradient;
|
||||||
hess[pairs[i].second] += second_order_gradient;
|
hess[pairs[i].second] += second_order_gradient;
|
||||||
grad[pairs[i].second] -= first_order_gradient;
|
grad[pairs[i].second] -= first_order_gradient;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( fix_list_weight_ != 0.0f ){
|
if( fix_list_weight_ != 0.0f ){
|
||||||
float scale = fix_list_weight_ / (group_index[group+1] - group_index[group]);
|
float scale = fix_list_weight_ / (group_index[group+1] - group_index[group]);
|
||||||
for(unsigned j = group_index[group]; j < group_index[group+1]; ++j ){
|
for(unsigned j = group_index[group]; j < group_index[group+1]; ++j ){
|
||||||
grad[j] *= scale;
|
grad[j] *= scale;
|
||||||
hess[j] *= scale;
|
hess[j] *= scale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void GenPairs(const std::vector<float>& preds,
|
virtual void GenPairs(const std::vector<float>& preds,
|
||||||
const std::vector<float>& labels,
|
const std::vector<float>& labels,
|
||||||
const int &start, const int &end,
|
const int &start, const int &end,
|
||||||
std::vector< std::pair<int,int> > &pairs){
|
std::vector< std::pair<int,int> > &pairs){
|
||||||
|
|
||||||
random::Random rnd; rnd.Seed(0);
|
random::Random rnd; rnd.Seed(0);
|
||||||
std::vector< std::pair<float,unsigned> > rec;
|
std::vector< std::pair<float,unsigned> > rec;
|
||||||
for(int j = start; j < end; ++j ){
|
for(int j = start; j < end; ++j ){
|
||||||
rec.push_back( std::make_pair(labels[j], j) );
|
rec.push_back( std::make_pair(labels[j], j) );
|
||||||
}
|
}
|
||||||
|
|
||||||
std::sort( rec.begin(), rec.end(), CmpFirst );
|
std::sort( rec.begin(), rec.end(), CmpFirst );
|
||||||
// enumerate buckets with same label, for each item in the list, grab another sample randomly
|
// enumerate buckets with same label, for each item in the list, grab another sample randomly
|
||||||
for( unsigned i = 0; i < rec.size(); ){
|
for( unsigned i = 0; i < rec.size(); ){
|
||||||
unsigned j = i + 1;
|
unsigned j = i + 1;
|
||||||
@ -422,17 +435,15 @@ namespace xgboost{
|
|||||||
unsigned ridx = static_cast<int>( rnd.RandDouble() * (nleft+nright) );
|
unsigned ridx = static_cast<int>( rnd.RandDouble() * (nleft+nright) );
|
||||||
if( ridx < nleft ){
|
if( ridx < nleft ){
|
||||||
// get the samples in left side, ridx is pos sample
|
// get the samples in left side, ridx is pos sample
|
||||||
pairs.push_back(std::make_pair(rec[ridx].second, rec[pid].second));
|
pairs.push_back(std::make_pair(rec[ridx].second, rec[pid].second));
|
||||||
}else{
|
}else{
|
||||||
// get samples in right side, ridx is negsample
|
// get samples in right side, ridx is negsample
|
||||||
pairs.push_back(std::make_pair(rec[pid].second, rec[ridx+j-i].second));
|
pairs.push_back(std::make_pair(rec[pid].second, rec[ridx+j-i].second));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
i = j;
|
i = j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
virtual void GetGradient(const std::vector<float>& preds,
|
virtual void GetGradient(const std::vector<float>& preds,
|
||||||
const DMatrix::Info &info,
|
const DMatrix::Info &info,
|
||||||
@ -445,11 +456,10 @@ namespace xgboost{
|
|||||||
|
|
||||||
for (size_t i = 0; i < group_index.size() - 1; i++){
|
for (size_t i = 0; i < group_index.size() - 1; i++){
|
||||||
std::vector< std::pair<int,int> > pairs;
|
std::vector< std::pair<int,int> > pairs;
|
||||||
GenPairs(preds, info.labels, group_index[i], group_index[i + 1],pairs);
|
GenPairs(preds, info.labels, group_index[i], group_index[i + 1],pairs);
|
||||||
GetGroupGradient(preds, info.labels, group_index, grad, hess, pairs, i);
|
GetGroupGradient(preds, info.labels, group_index, grad, hess, pairs, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual const char* DefaultEvalMetric(void) {
|
virtual const char* DefaultEvalMetric(void) {
|
||||||
return "auc";
|
return "auc";
|
||||||
}
|
}
|
||||||
@ -497,7 +507,7 @@ namespace xgboost{
|
|||||||
std::sort(labels.begin(), labels.end(), std::greater<float>());
|
std::sort(labels.begin(), labels.end(), std::greater<float>());
|
||||||
return CalcDCG(labels);
|
return CalcDCG(labels);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void GetLambda(const std::vector<float> &preds,
|
inline void GetLambda(const std::vector<float> &preds,
|
||||||
const std::vector<float> &labels,
|
const std::vector<float> &labels,
|
||||||
const std::vector<unsigned> &group_index,
|
const std::vector<unsigned> &group_index,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user