lambda rank added

2014-04-10 22:09:19 +08:00
parent 1136c71e64
commit f83942d3e9
18 changed files with 1792 additions and 76 deletions
--- a/dev/base/xgboost_boost_task.h
+++ b/dev/base/xgboost_boost_task.h
@@ -174,7 +174,7 @@ namespace xgboost{
            inline void InitLearner(void){
                cfg.BeforeFirst();
                while (cfg.Next()){
-                    learner_->SetParam(cfg.name(), cfg.val());
+		     learner_->SetParam(cfg.name(), cfg.val());
                }
                if (model_in != "NULL"){
                    utils::FileStream fi(utils::FopenCheck(model_in.c_str(), "rb"));
--- a/dev/base/xgboost_data_instance.h
+++ b/dev/base/xgboost_data_instance.h
@@ -10,8 +10,8 @@

 namespace xgboost{
    namespace base{
-        /*! \brief data matrix for regression,classification,rank content */
-        struct  DMatrix{
+        /*! \brief data matrix for regression, classification, rank content */
+        struct DMatrix{
        public:
            /*! \brief maximum feature dimension */
            unsigned num_feature;
@@ -74,7 +74,7 @@ namespace xgboost{
 	    }
            
            inline void LoadGroup(const char* fgroup, bool silent = false){
-	      //if exists group data load it in
+	        //if exists group data load it in
 	        FILE *file_group = fopen64(fgroup, "r");
 	
 		if (file_group != NULL){
@@ -117,6 +117,7 @@ namespace xgboost{
                LoadGroupBinary(fgroup,silent);
                return true;
            }
+            
            /*!
            * \brief save to binary file
            * \param fname name of binary data
@@ -139,7 +140,7 @@ namespace xgboost{
            }
            
            inline void SaveGroupBinary(const char* fgroup, bool silent = false){
-	      //save group data
+	        //save group data
                if (group_index.size() > 0){
                    utils::FileStream file_group(utils::FopenCheck(fgroup, "wb"));
                    int group_index_size = group_index.size();
@@ -151,7 +152,7 @@ namespace xgboost{
 	    }
            
            inline void LoadGroupBinary(const char* fgroup, bool silent = false){
-	      //if group data exists load it in
+	        //if group data exists load it in
                FILE *file_group = fopen64(fgroup, "r");
                if (file_group != NULL){
                    int group_index_size = 0;
@@ -168,8 +169,8 @@ namespace xgboost{
                }else{
 		    if(!silent){printf("The binary file of group info not exists");}
 		}
-                
-	    }
+            }
+            
            /*!
            * \brief cache load data given a file name, if filename ends with .buffer, direct load binary
            *        otherwise the function will first check if fname + '.buffer' exists,
@@ -207,9 +208,6 @@ namespace xgboost{
                }
            }
        };
-
-
-
    }
 };

--- a/dev/base/xgboost_learner.h
+++ b/dev/base/xgboost_learner.h
@@ -144,17 +144,24 @@ namespace xgboost {
        	this->GetGradient(preds_, train_->labels, train_->group_index, grad_, hess_);
                std::vector<unsigned> root_index;
                base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
+	
+// 		printf("xgboost_learner.h:UpdateOneIter\n");
+// 		const unsigned ndata = static_cast<unsigned>(train_->Size());
+// 		#pragma omp parallel for schedule( static )
+//                 for (unsigned j = 0; j < ndata; ++j) {
+// 		    printf("haha:%d %f\n",j,base_gbm.Predict(train_->data, j, j));
+// 		}
 	    }

            /*! \brief get intransformed prediction, without buffering */
            inline void Predict(std::vector<float> &preds, const DMatrix &data) {
                preds.resize(data.Size());
-
                const unsigned ndata = static_cast<unsigned>(data.Size());
-#pragma omp parallel for schedule( static )
+		#pragma omp parallel for schedule( static )
                for (unsigned j = 0; j < ndata; ++j) {
-                    preds[j] = base_gbm.Predict(data.data, j, -1);
-                }
+		    preds[j] = base_gbm.Predict(data.data, j, -1);
+                  
+		}
            }

        public:
@@ -194,7 +201,7 @@ namespace xgboost {
            inline void InteractPredict(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset) {
                preds.resize(data.Size());
                const unsigned ndata = static_cast<unsigned>(data.Size());
-#pragma omp parallel for schedule( static )
+		#pragma omp parallel for schedule( static )
                for (unsigned j = 0; j < ndata; ++j) {
                    preds[j] = base_gbm.InteractPredict(data.data, j, buffer_offset + j);
                }
@@ -202,7 +209,7 @@ namespace xgboost {
            /*! \brief repredict trial */
            inline void InteractRePredict(const xgboost::base::DMatrix &data, unsigned buffer_offset) {
                const unsigned ndata = static_cast<unsigned>(data.Size());
-#pragma omp parallel for schedule( static )
+		#pragma omp parallel for schedule( static )
                for (unsigned j = 0; j < ndata; ++j) {
                    base_gbm.InteractRePredict(data.data, j, buffer_offset + j);
                }
@@ -212,10 +219,11 @@ namespace xgboost {
            virtual inline void PredictBuffer(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset) {
                preds.resize(data.Size());
 		const unsigned ndata = static_cast<unsigned>(data.Size());
-#pragma omp parallel for schedule( static )
+		
+		#pragma omp parallel for schedule( static )
                for (unsigned j = 0; j < ndata; ++j) {
 		    preds[j] = base_gbm.Predict(data.data, j, buffer_offset + j);
-                }
+		}
            }

            /*! \brief get the first order and second order gradient, given the transformed predictions and labels */
@@ -248,7 +256,7 @@ namespace xgboost {
                * \param val  value of the parameter
                */
                inline void SetParam(const char *name, const char *val) {
-                    if (!strcmp("loss_type", name))   loss_type = atoi(val);
+		    if (!strcmp("loss_type", name))   loss_type = atoi(val);
                    if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
                }