ok

2014-05-15 18:56:28 -07:00
parent e2d13db24e
commit c22df2b31a
3 changed files with 46 additions and 36 deletions
--- a/2
+++ b/2
@@ -10,7 +10,7 @@ OBJ =
 all: $(BIN) $(OBJ)
 export LDFLAGS= -pthread -lm 

-xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h booster/*.h booster/*/*.hpp booster/*.hpp
+xgboost: regrank/xgboost_regrank_main.cpp regrank/*.h regrank/*.hpp booster/*.h booster/*/*.hpp booster/*.hpp


 $(BIN) : 
--- a/demo/rank/mq2008.conf
+++ b/demo/rank/mq2008.conf
@@ -2,9 +2,9 @@
 # choose the tree booster, 0: tree, 1: linear
 booster_type = 0 

-objective="rank:pairwise"
+#objective="rank:pairwise"
 #objective="rank:softmax"
-#objective="lambdarank:map"
+objective="lambdarank:map"
 #objective="lambdarank:ndcg"

 # Tree Booster Parameters
@@ -13,7 +13,7 @@ bst:eta = 0.1
 # minimum loss reduction required to make a further partition
 bst:gamma = 1.0 
 # minimum sum of instance weight(hessian) needed in a child
-bst:min_child_weight = 1 
+bst:min_child_weight = 0.1
 # maximum depth of a tree
 bst:max_depth = 6
 eval_metric = "ndcg"
--- a/regrank/xgboost_regrank_obj.hpp
+++ b/regrank/xgboost_regrank_obj.hpp
@@ -261,7 +261,7 @@ namespace xgboost{
                }
            }
            virtual const char* DefaultEvalMetric(void) {
-                return "ndcg";
+                return "map";
            }    
        private:
            inline void AddGradient( unsigned pid, unsigned nid, 
@@ -284,25 +284,38 @@ namespace xgboost{
            LossType loss;
        };
    };
-
-
+    
    namespace regrank{
        class LambdaRankObj : public IObjFunction{
        public:
-            LambdaRankObj(void){}
-
+            LambdaRankObj(void){
+                loss_.loss_type = LossType::kLogisticRaw;
+            }
            virtual ~LambdaRankObj(){}
-
            virtual void SetParam(const char *name, const char *val){
-                if( !strcmp( "loss_type", name ) ) loss_.loss_type = atoi( val );
+                if( !strcmp( "loss_type", name ) )       loss_.loss_type = atoi( val );
                if( !strcmp( "fix_list_weight", name ) ) fix_list_weight_ = (float)atof( val );
-
-	    }
+            }
        private:
            LossType loss_;
            float fix_list_weight_;
        protected:
-
+            /*! \brief helper information in a list */
+            struct ListEntry{
+                /*! \brief the predict score we in the data */
+                float pred;
+                /*! \brief the actual label of the entry */
+                float label;
+                /*! \brief row index in the data matrix */                
+                unsigned rindex;
+                // constructor
+                ListEntry(float pred, float label, unsigned rindex): pred(pred),label(label),rindex(rindex){}
+                // comparator by prediction
+                inline bool operator<(const ListEntry &p) const{
+                    return pred > p.pred;
+                }
+            };
+            
            class Triple{
            public:
                float pred_;
@@ -388,30 +401,30 @@ namespace xgboost{
                    grad[pairs[i].first] += first_order_gradient;
                    hess[pairs[i].second] += second_order_gradient;
                    grad[pairs[i].second] -= first_order_gradient;
-                    	
-		}
-
+                    
+                }
+                
                if( fix_list_weight_ != 0.0f ){
                    float scale = fix_list_weight_ / (group_index[group+1] - group_index[group]);
                    for(unsigned j = group_index[group]; j < group_index[group+1]; ++j ){
                        grad[j] *= scale; 
-			hess[j] *= scale;
+                        hess[j] *= scale;
                    }                            
                }
            }
-
-           virtual void GenPairs(const std::vector<float>& preds,
-                const std::vector<float>& labels,
-                const int &start, const int &end,
-		std::vector< std::pair<int,int> > &pairs){
+            
+            virtual void GenPairs(const std::vector<float>& preds,
+                                  const std::vector<float>& labels,
+                                  const int &start, const int &end,
+                                  std::vector< std::pair<int,int> > &pairs){
 	            
-	        random::Random rnd; rnd.Seed(0);
-		std::vector< std::pair<float,unsigned> > rec;
+                random::Random rnd; rnd.Seed(0);
+                std::vector< std::pair<float,unsigned> > rec;
                for(int j = start; j < end; ++j ){
                    rec.push_back( std::make_pair(labels[j], j) );
                }                        
-                        
-		std::sort( rec.begin(), rec.end(), CmpFirst );
+                
+                std::sort( rec.begin(), rec.end(), CmpFirst );
                // enumerate buckets with same label, for each item in the list, grab another sample randomly
                for( unsigned i = 0; i < rec.size(); ){
                    unsigned j = i + 1;
@@ -422,17 +435,15 @@ namespace xgboost{
                        unsigned ridx = static_cast<int>( rnd.RandDouble() * (nleft+nright) );
                        if( ridx < nleft ){
                            // get the samples in left side, ridx is pos sample
-			    pairs.push_back(std::make_pair(rec[ridx].second, rec[pid].second));
+                            pairs.push_back(std::make_pair(rec[ridx].second, rec[pid].second));
                        }else{
                            // get samples in right side, ridx is negsample
-			    pairs.push_back(std::make_pair(rec[pid].second, rec[ridx+j-i].second));
+                            pairs.push_back(std::make_pair(rec[pid].second, rec[ridx+j-i].second));
                        }
                    }                            
                    i = j;
                }
-	   }
- 
-
+            }                        
        public:
            virtual void GetGradient(const std::vector<float>& preds,
                const DMatrix::Info &info,
@@ -445,11 +456,10 @@ namespace xgboost{

                for (size_t i = 0; i < group_index.size() - 1; i++){
                    std::vector< std::pair<int,int> > pairs;
-		    GenPairs(preds, info.labels, group_index[i], group_index[i + 1],pairs);
+                    GenPairs(preds, info.labels, group_index[i], group_index[i + 1],pairs);
                    GetGroupGradient(preds, info.labels, group_index, grad, hess, pairs, i);
                }
-            }
-
+            }            
            virtual const char* DefaultEvalMetric(void) {
                return "auc";
            }
@@ -497,7 +507,7 @@ namespace xgboost{
                std::sort(labels.begin(), labels.end(), std::greater<float>());
                return CalcDCG(labels);
            }
-
+            
            inline void GetLambda(const std::vector<float> &preds,
            const std::vector<float> &labels,
            const std::vector<unsigned> &group_index,