Merge branch 'master' of ssh://github.com/tqchen/xgboost

Conflicts: regression/xgboost_reg_data.h
2014-04-18 17:46:44 -07:00 · 2014-04-18 17:46:44 -07:00 · c3592dc06c
commit c3592dc06c
parent 3d327503fd 91bb4777b0
32 changed files with 2456 additions and 2167 deletions
--- a/2
+++ b/2
@ -12,6 +12,8 @@ export LDFLAGS= -pthread -lm

 xgboost: regression/xgboost_reg_main.cpp regression/*.h booster/*.h booster/*/*.hpp booster/*.hpp

+#xgboost: rank/xgboost_rank_main.cpp base/*.h rank/*.h booster/*.h booster/*/*.hpp booster/*.hpp
+
 $(BIN) : 
 	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)

--- a/booster/xgboost_gbmbase.h
+++ b/booster/xgboost_gbmbase.h
@ -288,7 +288,8 @@ namespace xgboost{
                    booster_info.push_back(0);
                    this->ConfigBooster(boosters.back());
                    boosters.back()->InitModel();
-                }else{
+                }
+                else{
                    this->ConfigBooster(boosters.back());
                }
                return boosters.back();
--- a/demo/rank/README
+++ b/demo/rank/README
@ -0,0 +1,13 @@
+Demonstrating how to use XGBoost accomplish regression tasks on computer hardware dataset https://archive.ics.uci.edu/ml/datasets/Computer+Hardware
+
+Run: ./runexp.sh
+
+Format of input: LIBSVM format
+
+Format of ```featmap.txt: <featureid> <featurename> <q or i or int>\n ```:
+  - Feature id must be from 0 to number of features, in sorted order.
+  - i means this feature is binary indicator feature
+  - q means this feature is a quantitative value, such as age, time, can be missing
+  - int means this feature is integer value (when int is hinted, the decision boundary will be integer)
+
+Explainations: https://github.com/tqchen/xgboost/wiki/Regression
--- a/demo/rank/runexp.sh
+++ b/demo/rank/runexp.sh
@ -0,0 +1,16 @@
+#!/bin/bash
+# map the data to features. For convenience we only use 7 original attributes and encode them as features in a trivial way 
+python mapfeat.py
+# split train and test
+python mknfold.py machine.txt 1
+# training and output the models
+../../xgboost machine.conf
+# output predictions of test data
+../../xgboost machine.conf task=pred model_in=0002.model
+# print the boosters of 0002.model in dump.raw.txt
+../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
+# print the boosters of 0002.model in dump.nice.txt with feature map
+../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt 
+
+# cat the result
+cat dump.nice.txt
--- a/demo/rank/toy.eval
+++ b/demo/rank/toy.eval
@ -0,0 +1,5 @@
+1 0:2 1:3 2:2
+0 0:2 1:3 2:2
+0 0:2 1:3 2:2
+0 0:2 1:3 2:2
+1 0:2 1:3 2:2
--- a/demo/rank/toy.eval.group
+++ b/demo/rank/toy.eval.group
@ -0,0 +1,2 @@
+2
+3
--- a/demo/rank/toy.test
+++ b/demo/rank/toy.test
@ -0,0 +1,5 @@
+1 0:2 1:3 2:2
+0 0:2 1:3 2:2
+0 0:2 1:3 2:2
+0 0:2 1:3 2:2
+1 0:2 1:3 2:2
--- a/demo/rank/toy.test.group
+++ b/demo/rank/toy.test.group
@ -0,0 +1,2 @@
+2
+3
--- a/demo/rank/toy.train
+++ b/demo/rank/toy.train
@ -0,0 +1,11 @@
+1 0:1.2 1:3 2:5.6
+0 0:2.0 1:2.3 2:5.1
+0 0:3.9 1:3 2:3.1
+0 0:2 1:3.2 2:3.4
+1 0:2.1 1:4.5 2:4.2
+0 0:1.9 1:2.8 2:3.1
+1 0:3.0 1:2.0 2:1.1
+0 0:1.9 1:1.8 2:2.1
+0 0:1.1 1:2.2 2:1.4
+1 0:2.1 1:4.1 2:4.0
+0 0:1.9 1:2.2 2:1.1
--- a/demo/rank/toy.train.group
+++ b/demo/rank/toy.train.group
@ -0,0 +1,2 @@
+6
+5
--- a/demo/rank/train
+++ b/demo/rank/train
--- a/dev/base/xgboost_boost_task.h
+++ b/dev/base/xgboost_boost_task.h
@ -20,6 +20,7 @@ namespace xgboost{
        class BoostTask{
        public:
            inline int Run(int argc, char *argv[]){
+
                if (argc < 2){
                    printf("Usage: <config>\n");
                    return 0;
@ -34,6 +35,7 @@ namespace xgboost{
                        this->SetParam(name, val);
                    }
                }
+		
                this->InitData();
                this->InitLearner();
                if (task == "dump"){
@ -128,6 +130,7 @@ namespace xgboost{


            inline void InitData(void){
+	        
                if (name_fmap != "NULL") fmap.LoadText(name_fmap.c_str());
                if (task == "dump") return;
                if (learning_task == RANKING){
@ -140,6 +143,7 @@ namespace xgboost{
                        // training 
                        sscanf(train_path.c_str(), "%[^;];%s", instance_path, group_path);
 		        data.CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
+                        
 			utils::Assert(eval_data_names.size() == eval_data_paths.size());
                        for (size_t i = 0; i < eval_data_names.size(); ++i){
                            deval.push_back(new DMatrix());
@ -147,8 +151,6 @@ namespace xgboost{
                            deval.back()->CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
                        }
                    }
-
-
                }
                else{
                    if (task == "pred" || task == "dumppath"){
@ -166,7 +168,9 @@ namespace xgboost{
                }

                learner_->SetData(&data, deval, eval_data_names);
+		if(!silent) printf("BoostTask:Data Initiation Done!\n");
            }
+            
            inline void InitLearner(void){
                cfg.BeforeFirst();
                while (cfg.Next()){
@ -182,6 +186,7 @@ namespace xgboost{
                    learner_->InitModel();
                }
                learner_->InitTrainer();
+		if(!silent) printf("BoostTask:InitLearner Done!\n");
            }

            inline void TaskTrain(void){
--- a/dev/base/xgboost_data_instance.h
+++ b/dev/base/xgboost_data_instance.h
@ -70,17 +70,27 @@ namespace xgboost{
                        (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
                }
                fclose(file);
+		LoadGroup(fgroup,silent);
+	    }
            
+            inline void LoadGroup(const char* fgroup, bool silent = false){
 	        //if exists group data load it in
 	        FILE *file_group = fopen64(fgroup, "r");
+	
 		if (file_group != NULL){
 		    group_index.push_back(0);
-					int tmp = 0, acc = 0;
-					while (fscanf(file_group, "%d", tmp) == 1){
+                    int tmp = 0, acc = 0,cnt = 0;
+                    while (fscanf(file_group, "%d", &tmp) == 1){
 		        acc += tmp;
                        group_index.push_back(acc);
+			cnt++;
                    }
+                    if(!silent) printf("%d groups are loaded from %s\n",cnt,fgroup);
+		    fclose(file_group);
+                }else{
+		    if(!silent) printf("There is no group file\n");
 		}
+                
 	    }
            /*!
            * \brief load from binary file
@ -100,26 +110,14 @@ namespace xgboost{
                data.InitData();

                if (!silent){
-					printf("%ux%u matrix with %lu entries is loaded from %s\n",
+		    printf("%ux%u matrix with %lu entries is loaded from %s as binary\n",
                        (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
                }

-				//if group data exists load it in
-				FILE *file_group = fopen64(fgroup, "r");
-				if (file_group != NULL){
-					int group_index_size = 0;
-					utils::FileStream group_stream(file_group);
-					utils::Assert(group_stream.Read(&group_index_size, sizeof(int)) != 0, "Load group indice size");
-					group_index.resize(group_index_size);
-					utils::Assert(group_stream.Read(&group_index, sizeof(int)* group_index_size) != 0, "Load group indice");
-
-					if (!silent){
-						printf("the group index of %d groups is loaded from %s\n",
-							group_index_size - 1, fgroup);
-					}
-				}
+                LoadGroupBinary(fgroup,silent);
                return true;
            }
+            
            /*!
            * \brief save to binary file
            * \param fname name of binary data
@ -134,19 +132,45 @@ namespace xgboost{
                fs.Write(&labels[0], sizeof(float)* data.NumRow());
                fs.Close();
                if (!silent){
-					printf("%ux%u matrix with %lu entries is saved to %s\n",
+                    printf("%ux%u matrix with %lu entries is saved to %s as binary\n",
                        (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
                }

+                SaveGroupBinary(fgroup,silent);
+            }
+            
+            inline void SaveGroupBinary(const char* fgroup, bool silent = false){
 	        //save group data
                if (group_index.size() > 0){
                    utils::FileStream file_group(utils::FopenCheck(fgroup, "wb"));
                    int group_index_size = group_index.size();
                    file_group.Write(&(group_index_size), sizeof(int));
                    file_group.Write(&group_index[0], sizeof(int) * group_index_size);
+		    file_group.Close();
+		    if(!silent){printf("Index info of %d groups is saved to %s as binary\n",group_index_size-1,fgroup);}
+                }
 	    }
            
+            inline void LoadGroupBinary(const char* fgroup, bool silent = false){
+	        //if group data exists load it in
+                FILE *file_group = fopen64(fgroup, "r");
+                if (file_group != NULL){
+                    int group_index_size = 0;
+                    utils::FileStream group_stream(file_group);
+                    utils::Assert(group_stream.Read(&group_index_size, sizeof(int)) != 0, "Load group indice size");
+                    group_index.resize(group_index_size);
+                    utils::Assert(group_stream.Read(&group_index[0], sizeof(int) * group_index_size) != 0, "Load group indice");
+
+                    if (!silent){
+                        printf("Index info of %d groups is loaded from %s as binary\n",
+                            group_index.size() - 1, fgroup);
                    }
+		    fclose(file_group);
+                }else{
+		    if(!silent){printf("The binary file of group info not exists");}
+		}
+            }
+            
            /*!
            * \brief cache load data given a file name, if filename ends with .buffer, direct load binary
            *        otherwise the function will first check if fname + '.buffer' exists,
@ -161,11 +185,13 @@ namespace xgboost{
                if (len > 8 && !strcmp(fname + len - 7, ".buffer")){
                    this->LoadBinary(fname, fgroup, silent); return;
                }
-				char bname[1024];
+                char bname[1024],bgroup[1024];
                sprintf(bname, "%s.buffer", fname);
-				if (!this->LoadBinary(bname, fgroup, silent)){
+                sprintf(bgroup, "%s.buffer", fgroup);
+                if (!this->LoadBinary(bname, bgroup, silent))
+		{
                    this->LoadText(fname, fgroup, silent);
-					if (savebuffer) this->SaveBinary(bname, fgroup, silent);
+                    if (savebuffer) this->SaveBinary(bname, bgroup, silent);
                }
            }
        private:
@ -182,9 +208,6 @@ namespace xgboost{
                }
            }
        };
-
-
-
    }
 };

--- a/dev/base/xgboost_learner.h
+++ b/dev/base/xgboost_learner.h
@ -96,6 +96,7 @@ namespace xgboost {
            */
            inline void InitModel(void) {
                base_gbm.InitModel();
+		if(!silent) printf("BoostLearner:InitModel Done!\n");
            }
            /*!
            * \brief load model from stream
@ -143,16 +144,23 @@ namespace xgboost {
        	this->GetGradient(preds_, train_->labels, train_->group_index, grad_, hess_);
                std::vector<unsigned> root_index;
                base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
+	
+// 		printf("xgboost_learner.h:UpdateOneIter\n");
+// 		const unsigned ndata = static_cast<unsigned>(train_->Size());
+// 		#pragma omp parallel for schedule( static )
+//                 for (unsigned j = 0; j < ndata; ++j) {
+// 		    printf("haha:%d %f\n",j,base_gbm.Predict(train_->data, j, j));
+// 		}
 	    }

            /*! \brief get intransformed prediction, without buffering */
            inline void Predict(std::vector<float> &preds, const DMatrix &data) {
                preds.resize(data.Size());
-
                const unsigned ndata = static_cast<unsigned>(data.Size());
 		#pragma omp parallel for schedule( static )
                for (unsigned j = 0; j < ndata; ++j) {
 		    preds[j] = base_gbm.Predict(data.data, j, -1);
+                  
 		}
            }

@ -210,8 +218,8 @@ namespace xgboost {
            /*! \brief get intransformed predictions, given data */
            virtual inline void PredictBuffer(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset) {
                preds.resize(data.Size());
-
 		const unsigned ndata = static_cast<unsigned>(data.Size());
+		
 		#pragma omp parallel for schedule( static )
                for (unsigned j = 0; j < ndata; ++j) {
 		    preds[j] = base_gbm.Predict(data.data, j, buffer_offset + j);
--- a/dev/rank/xgboost_rank.h
+++ b/dev/rank/xgboost_rank.h
@ -7,7 +7,7 @@
 */
 #include <cmath>
 #include <cstdlib>
-#include <cstring>
+#include <vector>
 #include "xgboost_sample.h"
 #include "xgboost_rank_eval.h"
 #include "../base/xgboost_data_instance.h"
@ -71,31 +71,128 @@ namespace xgboost {
                fprintf(fo, "\n");
            }

-			inline void SetParam(const char *name, const char *val){
+            virtual inline void SetParam(const char *name, const char *val){
+                BoostLearner::SetParam(name,val);
 	        if (!strcmp(name, "eval_metric"))  evaluator_.AddEval(val);
                if (!strcmp(name, "rank:sampler"))  sampler.AssignSampler(atoi(val));
            }
-			/*! \brief get the first order and second order gradient, given the transformed predictions and labels */
-			inline void GetGradient(const std::vector<float> &preds,
+            
+	private:
+	    inline std::vector< Triple<float,float,int> > GetSortedTuple(const std::vector<float> &preds,
+                const std::vector<float> &labels,
+                const std::vector<int> &group_index,
+		int group){
+		std::vector< Triple<float,float,int> > sorted_triple;
+		for(int j = group_index[group]; j < group_index[group+1]; j++){
+		      sorted_triple.push_back(Triple<float,float,int>(preds[j],labels[j],j));
+		}
+                std::sort(sorted_triple.begin(),sorted_triple.end(),Triplef1Comparer);
+		return sorted_triple;
+	    }
+	    
+	    inline std::vector<int> GetIndexMap(std::vector< Triple<float,float,int> > sorted_triple,int start){
+		std::vector<int> index_remap;
+		index_remap.resize(sorted_triple.size());
+		for(int i = 0; i < sorted_triple.size(); i++){
+		  index_remap[sorted_triple[i].f3_-start] = i;
+		}
+		return index_remap;
+	    }
+	    
+	    inline float GetLambdaMAP(const std::vector< Triple<float,float,int> > sorted_triple,
+		int index1,int index2,
+		std::vector< Quadruple<float,float,float,float> > map_acc){
+	        if(index1 > index2) std::swap(index1,index2);
+		float original = map_acc[index2].f1_;
+		if(index1 != 0) original -= map_acc[index1 - 1].f1_;
+		float changed = 0;
+		if(sorted_triple[index1].f2_ < sorted_triple[index2].f2_){
+		  changed += map_acc[index2 - 1].f3_ - map_acc[index1].f3_;
+		  changed += (map_acc[index1].f4_ + 1.0f)/(index1 + 1);
+		}else{
+		  changed += map_acc[index2 - 1].f2_ - map_acc[index1].f2_;
+		  changed += map_acc[index2].f4_/(index2 + 1);
+		}
+		float ans = (changed - original)/(map_acc[map_acc.size() - 1].f4_);
+		if(ans < 0) ans = -ans;
+		return ans;
+	    }
+	    
+	    inline float GetLambdaNDCG(const std::vector< Triple<float,float,int> > sorted_triple,
+		int index1,
+		int index2,float IDCG){
+	        float original = pow(2,sorted_triple[index1].f2_)/log(index1+2)
+		                + pow(2,sorted_triple[index2].f2_)/log(index2+2);
+	        float changed = pow(2,sorted_triple[index2].f2_)/log(index1+2)
+		                + pow(2,sorted_triple[index1].f2_)/log(index2+2);
+		float ans = (original - changed)/IDCG;
+		if(ans < 0) ans = -ans;
+		return ans;
+	    }
+	    
+	    
+	    inline float GetIDCG(const std::vector< Triple<float,float,int> > sorted_triple){
+		std::vector<float> labels;
+		for(int i = 0; i < sorted_triple.size(); i++){
+		  labels.push_back(sorted_triple[i].f2_);
+		}
+		
+		std::sort(labels.begin(),labels.end(),std::greater<float>());
+		return EvalNDCG::DCG(labels);
+	    }
+	    
+	    inline std::vector< Quadruple<float,float,float,float> > GetMAPAcc(const std::vector< Triple<float,float,int> > sorted_triple){
+		std::vector< Quadruple<float,float,float,float> > map_acc;
+		float hit = 0,acc1 = 0,acc2 = 0,acc3 = 0;
+		for(int i = 0; i < sorted_triple.size(); i++){
+		    if(sorted_triple[i].f2_ == 1) {
+		      hit++;
+		      acc1 += hit /( i + 1 );
+		      acc2 += (hit - 1)/(i+1);
+		      acc3 += (hit + 1)/(i+1);
+		    }
+		    map_acc.push_back(Quadruple<float,float,float,float>(acc1,acc2,acc3,hit));
+		}
+		return map_acc;
+	      
+	    }
+	    
+	    inline void GetGroupGradient(const std::vector<float> &preds,
                const std::vector<float> &labels,
                const std::vector<int> &group_index,
                std::vector<float> &grad,
-				std::vector<float> &hess) {
-				grad.resize(preds.size());
-				hess.resize(preds.size());
+                std::vector<float> &hess,
+		const std::vector< Triple<float,float,int> > sorted_triple,
+		const std::vector<int> index_remap,
+		const sample::Pairs& pairs,
+	        int group){
 	        bool j_better;
-				float pred_diff, pred_diff_exp, first_order_gradient, second_order_gradient;
-				for (int i = 0; i < group_index.size() - 1; i++){
-					sample::Pairs pairs = sampler.GenPairs(preds, labels, group_index[i], group_index[i + 1]);
-					for (int j = group_index[i]; j < group_index[i + 1]; j++){
+                float IDCG, pred_diff, pred_diff_exp, delta;
+		float first_order_gradient, second_order_gradient;
+                std::vector< Quadruple<float,float,float,float> > map_acc;
+		
+		if(mparam.loss_type == NDCG){
+		  IDCG = GetIDCG(sorted_triple);
+		}else if(mparam.loss_type == MAP){
+		  map_acc = GetMAPAcc(sorted_triple);
+		}
+		
+		for (int j = group_index[group]; j < group_index[group + 1]; j++){
 		    std::vector<int> pair_instance = pairs.GetPairs(j);
                    for (int k = 0; k < pair_instance.size(); k++){
 			j_better = labels[j] > labels[pair_instance[k]];
                        if (j_better){
+			    switch(mparam.loss_type){
+			      case PAIRWISE: delta = 1.0;break;
+			      case MAP: delta = GetLambdaMAP(sorted_triple,index_remap[j - group_index[group]],index_remap[pair_instance[k]-group_index[group]],map_acc);break;
+			      case NDCG: delta = GetLambdaNDCG(sorted_triple,index_remap[j - group_index[group]],index_remap[pair_instance[k]-group_index[group]],IDCG);break;
+			      default: utils::Error("Cannot find the specified loss type");
+			    }
+		        
 			    pred_diff = preds[preds[j] - pair_instance[k]];
 			    pred_diff_exp = j_better ? expf(-pred_diff) : expf(pred_diff);
-								first_order_gradient = FirstOrderGradient(pred_diff_exp);
-								second_order_gradient = 2 * SecondOrderGradient(pred_diff_exp);
+			    first_order_gradient = delta * FirstOrderGradient(pred_diff_exp);
+			    second_order_gradient = 2 * delta * SecondOrderGradient(pred_diff_exp);
 			    hess[j] += second_order_gradient;
 			    grad[j] += first_order_gradient;
 			    hess[pair_instance[k]] += second_order_gradient;
@ -104,11 +201,54 @@ namespace xgboost {
 		    }
                }
 	    }
+	public:
+            /*! \brief get the first order and second order gradient, given the 
+	     *         intransformed predictions and labels */
+            inline void GetGradient(const std::vector<float> &preds,
+                const std::vector<float> &labels,
+                const std::vector<int> &group_index,
+                std::vector<float> &grad,
+                std::vector<float> &hess) {
+                grad.resize(preds.size());
+                hess.resize(preds.size());
+                for (int i = 0; i < group_index.size() - 1; i++){
+		    sample::Pairs pairs = sampler.GenPairs(preds, labels, group_index[i], group_index[i + 1]);
+                    //pairs.GetPairs()
+		    std::vector< Triple<float,float,int> > sorted_triple = GetSortedTuple(preds,labels,group_index,i);
+		    std::vector<int> index_remap = GetIndexMap(sorted_triple,group_index[i]);
+		    GetGroupGradient(preds,labels,group_index,
+			grad,hess,sorted_triple,index_remap,pairs,i);
+                }
            }

            inline void UpdateInteract(std::string action) {
+		this->InteractPredict(preds_, *train_, 0);

+                int buffer_offset = static_cast<int>(train_->Size());
+                for (size_t i = 0; i < evals_.size(); ++i){
+                    std::vector<float> &preds = this->eval_preds_[i];
+                    this->InteractPredict(preds, *evals_[i], buffer_offset);
+                    buffer_offset += static_cast<int>(evals_[i]->Size());
                }
+
+                if (action == "remove"){
+                    base_gbm.DelteBooster(); return;
+                }
+
+                this->GetGradient(preds_, train_->labels,train_->group_index, grad_, hess_);
+                std::vector<unsigned> root_index;
+                base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
+
+                this->InteractRePredict(*train_, 0);
+                buffer_offset = static_cast<int>(train_->Size());
+                for (size_t i = 0; i < evals_.size(); ++i){
+                    this->InteractRePredict(*evals_[i], buffer_offset);
+                    buffer_offset += static_cast<int>(evals_[i]->Size());
+                }
+            }
+            
+            
+            
        private:
            enum LossType {
                PAIRWISE = 0,
--- a/dev/rank/xgboost_rank_eval.h
+++ b/dev/rank/xgboost_rank_eval.h
@ -34,9 +34,7 @@ namespace xgboost {
            float key_;
            float value_;

-			Pair(float key, float value){
-				key_ = key;
-				value_ = value_;
+            Pair(float key, float value):key_(key),value_(value){
            }
        };

@ -48,6 +46,32 @@ namespace xgboost {
            return a.value_ < b.value_;
        }

+        template<typename T1,typename T2,typename T3>
+        class Triple{
+	public:
+	  T1 f1_;
+	  T2 f2_;
+	  T3 f3_;
+	  Triple(T1 f1,T2 f2,T3 f3):f1_(f1),f2_(f2),f3_(f3){
+	    
+	  }
+	};
+	
+	template<typename T1,typename T2,typename T3,typename T4>
+        class Quadruple{
+	public:
+	  T1 f1_;
+	  T2 f2_;
+	  T3 f3_;
+	  T4 f4_;
+	  Quadruple(T1 f1,T2 f2,T3 f3,T4 f4):f1_(f1),f2_(f2),f3_(f3),f4_(f4){
+	    
+	  }
+	};
+	
+	bool Triplef1Comparer(const Triple<float,float,int> &a, const Triple<float,float,int> &b){  
+	  return a.f1_< b.f1_;
+        }
        
        /*! \brief Mean Average Precision */
        class EvalMAP : public IRankEvaluator {
@ -55,6 +79,7 @@ namespace xgboost {
            float Eval(const std::vector<float> &preds,
                const std::vector<float> &labels,
                const std::vector<int> &group_index) const {
+		if (group_index.size() <= 1) return 0;
                float acc = 0;
                std::vector<Pair> pairs_sort;
                for (int i = 0; i < group_index.size() - 1; i++){
@ -68,10 +93,11 @@ namespace xgboost {
            }
            
 	    
+
            virtual const char *Name(void) const {
                return "MAP";
            }
-
+	private:
            float average_precision(std::vector<Pair> pairs_sort) const{

                std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
@ -94,12 +120,31 @@ namespace xgboost {
            float Eval(const std::vector<float> &preds,
                const std::vector<float> &labels,
                const std::vector<int> &group_index) const {
-				return 0;
+		if (group_index.size() <= 1) return 0;
+                float acc = 0;
+                for (int i = 0; i < group_index.size() - 1; i++){
+                    acc += Count_Inversion(preds,labels,
+			group_index[i],group_index[i+1]);
+                }
+                return acc / (group_index.size() - 1);	  
 	    }

            const char *Name(void) const {
                return "PAIR";
            }
+	private:
+	    float Count_Inversion(const std::vector<float> &preds,
+	      const std::vector<float> &labels,int begin,int end
+	    ) const{
+	      float ans = 0;
+	      for(int i = begin; i < end; i++){
+		for(int j = i + 1; j < end; j++){
+		  if(preds[i] > preds[j] && labels[i] < labels[j])
+		    ans++;
+		}
+	      }
+	      return ans;
+	    }
        };

        /*! \brief Normalized DCG */
@ -121,6 +166,19 @@ namespace xgboost {
                return acc / (group_index.size() - 1);
            }
            
+            static float DCG(const std::vector<float> &labels){
+		float ans = 0.0;
+                for (int i = 0; i < labels.size(); i++){
+                    ans += (pow(2,labels[i]) - 1 ) / log(i + 2);
+                }
+                return ans;
+	    }
+	    
+            virtual const char *Name(void) const {
+                return "NDCG";
+            }
+            
+	  private:
            float NDCG(std::vector<Pair> pairs_sort) const{
                std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
                float dcg = DCG(pairs_sort);
@ -131,17 +189,14 @@ namespace xgboost {
            }

            float DCG(std::vector<Pair> pairs_sort) const{
-				float ans = 0.0;
-				ans += pairs_sort[0].value_;
+                std::vector<float> labels;
 	        for (int i = 1; i < pairs_sort.size(); i++){
-					ans += pairs_sort[i].value_ / log(i + 1);
+		  labels.push_back(pairs_sort[i].value_);
 		}
-				return ans;
+                return DCG(labels);
            }

-			virtual const char *Name(void) const {
-				return "NDCG";
-			}
+            
        };

    };
--- a/dev/rank/xgboost_rank_main.cpp
+++ b/dev/rank/xgboost_rank_main.cpp
@ -11,20 +11,12 @@
 #include "../base/xgboost_boost_task.h"
 #include "xgboost_rank.h"
 #include "../regression/xgboost_reg.h"
+#include "../regression/xgboost_reg_main.cpp"
+#include "../base/xgboost_data_instance.h"

 int main(int argc, char *argv[]) {    
-	
  xgboost::random::Seed(0);
-	xgboost::base::BoostTask tsk;
-	xgboost::utils::ConfigIterator itr(argv[1]);
-/*	int learner_index = 0;
-	while (itr.Next()){
-		if (!strcmp(itr.name(), "learning_task")){
-			learner_index = atoi(itr.val());
-		}
-	}*/
-	xgboost::rank::RankBoostLearner* rank_learner = new xgboost::rank::RankBoostLearner;
-	xgboost::base::BoostLearner *parent = static_cast<xgboost::base::BoostLearner*>(rank_learner);
-	tsk.SetLearner(parent);
-	return tsk.Run(argc, argv);
+  xgboost::base::BoostTask rank_tsk;
+  rank_tsk.SetLearner(new xgboost::rank::RankBoostLearner);
+  return rank_tsk.Run(argc, argv);
 }
--- a/dev/rank/xgboost_sample.h
+++ b/dev/rank/xgboost_sample.h
@ -19,7 +19,7 @@ namespace xgboost {
                 * \param start the begin index of the group
                 * \param end the end index of the group
                 */
-      Pairs(int start,int end):start_(start),end_(end_){
+                Pairs(int start, int end) :start_(start), end_(end){
                    for (int i = start; i < end; i++){
                        std::vector<int> v;
                        pairs_.push_back(v);
@ -30,7 +30,7 @@ namespace xgboost {
                 * \param index, the index of retrieved instance
                 * \return the index of instances paired
                 */
-      std::vector<int> GetPairs(int index) {
+                std::vector<int> GetPairs(int index) const{
                    utils::Assert(index >= start_ && index < end_, "The query index out of sampling bound");
                    return pairs_[index - start_];
                }
@ -115,6 +115,7 @@ namespace xgboost {
                Pairs GenPairs(const std::vector<float> &preds,
                    const std::vector<float> &labels,
                    int start, int end){
+		    utils::Assert(sampler_ != NULL,"Not config the sampler yet. Add rank:sampler in the config file\n");
                    return sampler_->GenPairs(preds, labels, start, end);
                }
            private:
--- a/regression/xgboost_reg.h
+++ b/regression/xgboost_reg.h
@ -94,7 +94,8 @@ namespace xgboost{
                base_gbm.InitTrainer();
                if (mparam.loss_type == kLogisticClassify){
                    evaluator_.AddEval("error");
-                }else{
+                }
+                else{
                    evaluator_.AddEval("rmse");
                }
                evaluator_.Init();
--- a/regression/xgboost_reg_data.h
+++ b/regression/xgboost_reg_data.h
@ -50,7 +50,8 @@ namespace xgboost{
                    unsigned index; float value;
                    if (sscanf(tmp, "%u:%f", &index, &value) == 2){
                        findex.push_back(index); fvalue.push_back(value);
-                    }else{
+                    }
+                    else{
                        if (!init){
                            labels.push_back(label);
                            data.AddRow(findex, fvalue);
--- a/regression/xgboost_reg_eval.h
+++ b/regression/xgboost_reg_eval.h
@ -55,7 +55,8 @@ namespace xgboost{
                for (unsigned i = 0; i < ndata; ++i){
                    if (preds[i] > 0.5f){
                        if (labels[i] < 0.5f) nerr += 1;
-                    }else{
+                    }
+                    else{
                        if (labels[i] > 0.5f) nerr += 1;
                    }
                }
--- a/regression/xgboost_reg_main.cpp
+++ b/regression/xgboost_reg_main.cpp
@ -50,7 +50,8 @@ namespace xgboost{
                }
                if (task == "pred"){
                    this->TaskPred();
-                }else{                  
+                }
+                else{
                    this->TaskTrain();
                }
                return 0;
@ -113,7 +114,8 @@ namespace xgboost{
                if (task == "dump") return;
                if (task == "pred" || task == "dumppath"){
                    data.CacheLoad(test_path.c_str(), silent != 0, use_buffer != 0);
-                }else{
+                }
+                else{
                    // training 
                    data.CacheLoad(train_path.c_str(), silent != 0, use_buffer != 0);
                    utils::Assert(eval_data_names.size() == eval_data_paths.size());
@ -133,7 +135,8 @@ namespace xgboost{
                    utils::FileStream fi(utils::FopenCheck(model_in.c_str(), "rb"));
                    learner.LoadModel(fi);
                    fi.Close();
-                }else{
+                }
+                else{
                    utils::Assert(task == "train", "model_in not specified");
                    learner.InitModel();
                }
@ -156,7 +159,8 @@ namespace xgboost{
                if (save_period == 0 || num_round % save_period != 0){
                    if (model_out == "NULL"){
                        this->SaveModel(num_round - 1);
-                    }else{
+                    }
+                    else{
                        this->SaveModel(model_out.c_str());
                    }
                }
@ -177,7 +181,8 @@ namespace xgboost{
                    if (!strcmp(cfg_batch.name(), "run")){
                        learner.UpdateInteract(interact_action);
                        batch_action += 1;
-                    } else{
+                    }
+                    else{
                        learner.SetParam(cfg_batch.name(), cfg_batch.val());
                    }
                }
--- a/utils/xgboost_config.h
+++ b/utils/xgboost_config.h
@ -94,7 +94,8 @@ namespace xgboost{
                    case '\"':
                        if (i == 0){
                            ParseStr(tok); ch_buf = fgetc(fi); return new_line;
-                        }else{
+                        }
+                        else{
                            Error("token followed directly by string");
                        }
                    case '=':
@ -102,7 +103,8 @@ namespace xgboost{
                            ch_buf = fgetc(fi);
                            tok[0] = '=';
                            tok[1] = '\0';
-                        }else{
+                        }
+                        else{
                            tok[i] = '\0';
                        }
                        return new_line;
@ -155,7 +157,8 @@ namespace xgboost{
                if (priority == 0){
                    names.push_back(std::string(name));
                    values.push_back(std::string(val));
-                }else{
+                }
+                else{
                    names_high.push_back(std::string(name));
                    values_high.push_back(std::string(val));
                }
@ -184,7 +187,8 @@ namespace xgboost{
                size_t i = idx - 1;
                if (i >= names.size()){
                    return names_high[i - names.size()].c_str();
-                }else{
+                }
+                else{
                    return names[i].c_str();
                }
            }
@ -197,7 +201,8 @@ namespace xgboost{
                size_t i = idx - 1;
                if (i >= values.size()){
                    return values_high[i - values.size()].c_str();
-                }else{
+                }
+                else{
                    return values[i].c_str();
                }
            }
--- a/utils/xgboost_matrix_csr.h
+++ b/utils/xgboost_matrix_csr.h
@ -50,7 +50,8 @@ namespace xgboost{
                if (!UseAcList){
                    rptr.clear();
                    rptr.resize(nrows + 1, 0);
-                }else{
+                }
+                else{
                    Assert(nrows + 1 == rptr.size(), "rptr must be initialized already");
                    this->Cleanup();
                }
@ -79,7 +80,8 @@ namespace xgboost{
                        rptr[i] = start;
                        start += rlen;
                    }
-                }else{
+                }
+                else{
                    // case with active list
                    std::sort(aclist.begin(), aclist.end());

--- a/utils/xgboost_omp.h
+++ b/utils/xgboost_omp.h
@ -10,7 +10,7 @@
 #if defined(_OPENMP)
 #include <omp.h>
 #else
-//#warning "OpenMP is not available, compile to single thread code"
+#warning "OpenMP is not available, compile to single thread code"
 inline int omp_get_thread_num() { return 0; }
 inline int omp_get_num_threads() { return 1; }
 inline void omp_set_num_threads(int nthread) {}
--- a/utils/xgboost_random.h
+++ b/utils/xgboost_random.h
@ -88,7 +88,8 @@ namespace xgboost{
                    u = NextDouble();
                } while (u == 0.0);
                return SampleGamma(alpha + 1.0, beta) * pow(u, 1.0 / alpha);
-            } else {
+            }
+            else {
                double d, c, x, v, u;
                d = alpha - 1.0 / 3.0;
                c = 1.0 / sqrt(9.0 * d);
--- a/utils/xgboost_utils.h
+++ b/utils/xgboost_utils.h
@ -62,16 +62,6 @@ namespace xgboost{
            }
            return fp;
        }
-        
-        /*! \brief replace fopen, */
-        inline FILE *FopenTry( const char *fname , const char *flag ){
-            FILE *fp = fopen64( fname , flag );
-            if( fp == NULL ){
-                fprintf( stderr, "can not open file \"%s\"\n",fname );
-                exit( -1 );
-            }
-            return fp;
-        }
    };
 };