tab eliminated

2014-02-19 13:25:01 +08:00 · 2014-02-19 13:25:01 +08:00 · a0dddaf224
commit a0dddaf224
parent a20b1d1866
6 changed files with 468 additions and 470 deletions
--- a/demo/regression/reg.conf
+++ b/demo/regression/reg.conf
@ -18,8 +18,6 @@ booster_type=1
 do_reboost=0
 bst:num_roots=0
 bst:num_feature=3
 learning_rate=0.01
--- a/regression/xgboost_reg.h
+++ b/regression/xgboost_reg.h
@ -12,267 +12,267 @@
 #include "../utils/xgboost_stream.h"
 namespace xgboost{
-	namespace regression{
+    namespace regression{
-		/*! \brief class for gradient boosted regression */
+        /*! \brief class for gradient boosted regression */
-		class RegBoostLearner{            
+        class RegBoostLearner{            
-		public:
+        public:
-			RegBoostLearner(bool silent = false){
+            RegBoostLearner(bool silent = false){
-				this->silent = silent;
+                this->silent = silent;
-			}
+            }
-			/*! 
+            /*! 
-			* \brief a regression booter associated with training and evaluating data 
+            * \brief a regression booter associated with training and evaluating data 
-			* \param train pointer to the training data
+            * \param train pointer to the training data
-			* \param evals array of evaluating data
+            * \param evals array of evaluating data
-			* \param evname name of evaluation data, used print statistics
+            * \param evname name of evaluation data, used print statistics
-			*/
+            */
-			RegBoostLearner( const DMatrix *train,
+            RegBoostLearner( const DMatrix *train,
-				std::vector<const DMatrix *> evals,
+                std::vector<const DMatrix *> evals,
-				std::vector<std::string> evname, bool silent = false ){
+                std::vector<std::string> evname, bool silent = false ){
-					this->silent = silent;
+                    this->silent = silent;
-					SetData(train,evals,evname);
+                    SetData(train,evals,evname);
-			}
+            }
-			/*! 
+            /*! 
-			* \brief associate regression booster with training and evaluating data 
+            * \brief associate regression booster with training and evaluating data 
-			* \param train pointer to the training data
+            * \param train pointer to the training data
-			* \param evals array of evaluating data
+            * \param evals array of evaluating data
-			* \param evname name of evaluation data, used print statistics
+            * \param evname name of evaluation data, used print statistics
-			*/
+            */
-			inline void SetData(const DMatrix *train,
+            inline void SetData(const DMatrix *train,
-				std::vector<const DMatrix *> evals,
+                std::vector<const DMatrix *> evals,
-				std::vector<std::string> evname){
+                std::vector<std::string> evname){
-					this->train_ = train;
+                    this->train_ = train;
-					this->evals_ = evals;
+                    this->evals_ = evals;
-					this->evname_ = evname; 
+                    this->evname_ = evname; 
-					//assign buffer index
+                    //assign buffer index
-					int buffer_size = (*train).size();
+                    int buffer_size = (*train).size();
-					for(int i = 0; i < evals.size(); i++){
+                    for(int i = 0; i < evals.size(); i++){
-						buffer_size += (*evals[i]).size();
+                        buffer_size += (*evals[i]).size();
-					}
+                    }
-					char str[25];
+                    char str[25];
-					_itoa(buffer_size,str,10);
+                    _itoa(buffer_size,str,10);
-					base_model.SetParam("num_pbuffer",str);
+                    base_model.SetParam("num_pbuffer",str);
-					base_model.SetParam("num_pbuffer",str);
+                    base_model.SetParam("num_pbuffer",str);
-			}
+            }
-			/*! 
+            /*! 
-			* \brief set parameters from outside 
+            * \brief set parameters from outside 
-			* \param name name of the parameter
+            * \param name name of the parameter
-			* \param val  value of the parameter
+            * \param val  value of the parameter
-			*/
+            */
-			inline void SetParam( const char *name, const char *val ){
+            inline void SetParam( const char *name, const char *val ){
-				mparam.SetParam( name, val );
+                mparam.SetParam( name, val );
-				base_model.SetParam( name, val );
+                base_model.SetParam( name, val );
-			}
+            }
-			/*!
+            /*!
-			* \brief initialize solver before training, called before training
+            * \brief initialize solver before training, called before training
-			* this function is reserved for solver to allocate necessary space and do other preparation 
+            * this function is reserved for solver to allocate necessary space and do other preparation 
-			*/
+            */
-			inline void InitTrainer( void ){
+            inline void InitTrainer( void ){
-				base_model.InitTrainer();
+                base_model.InitTrainer();
-				InitModel();
+                InitModel();
-				mparam.AdjustBase();
+                mparam.AdjustBase();
-			} 
+            } 
-			 /*!
+            /*!
-             * \brief initialize the current data storage for model, if the model is used first time, call this function
+            * \brief initialize the current data storage for model, if the model is used first time, call this function
-             */
+            */
-			inline void InitModel( void ){
+            inline void InitModel( void ){
-				base_model.InitModel();
+                base_model.InitModel();
-			} 
+            } 
-			/*! 
+            /*! 
-			* \brief load model from stream
+            * \brief load model from stream
-			* \param fi input stream
+            * \param fi input stream
-			*/          
+            */          
-			inline void LoadModel( utils::IStream &fi ){
+            inline void LoadModel( utils::IStream &fi ){
-				utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 );
+                utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 );
-				base_model.LoadModel( fi );
+                base_model.LoadModel( fi );
-			}
+            }
-			/*! 
+            /*! 
-			* \brief save model to stream
+            * \brief save model to stream
-			* \param fo output stream
+            * \param fo output stream
-			*/
+            */
-			inline void SaveModel( utils::IStream &fo ) const{
+            inline void SaveModel( utils::IStream &fo ) const{
-				fo.Write( &mparam, sizeof(ModelParam) );
+                fo.Write( &mparam, sizeof(ModelParam) );
-				base_model.SaveModel( fo );	
+                base_model.SaveModel( fo );	
-			} 
+            } 
 			/*! 
 			* \brief update the model for one iteration
 			* \param iteration the number of updating iteration 
 			*/           
 			inline void UpdateOneIter( int iteration ){
 				std::vector<float> grad,hess,preds;
 				std::vector<unsigned> root_index;
 				booster::FMatrixS::Image train_image((*train_).data);
 				Predict(preds,*train_,0);
 				Gradient(preds,(*train_).labels,grad,hess);
 				base_model.DoBoost(grad,hess,train_image,root_index);
 				int buffer_index_offset = (*train_).size();
 				float loss = 0.0;
 				for(int i = 0; i < evals_.size();i++){
 					Predict(preds, *evals_[i], buffer_index_offset);
 					loss = mparam.Loss(preds,(*evals_[i]).labels);
 					if(!silent){
 						printf("The loss of %s data set in %d the \
 							iteration is %f",evname_[i].c_str(),&iteration,&loss);
 					}
 					buffer_index_offset += (*evals_[i]).size();
 				}
 			}
-			/*! \brief get the transformed predictions, given data */
+            /*! 
-			inline void Predict( std::vector<float> &preds, const DMatrix &data,int buffer_index_offset = 0 ){
+            * \brief update the model for one iteration
-				int data_size = data.size();
+            * \param iteration the number of updating iteration 
-				preds.resize(data_size);
+            */           
-				for(int j = 0; j < data_size; j++){
+            inline void UpdateOneIter( int iteration ){
-					preds[j] = mparam.PredTransform(mparam.base_score + 
+                std::vector<float> grad,hess,preds;
-						base_model.Predict(data.data[j],buffer_index_offset + j));
+                std::vector<unsigned> root_index;
-				}
+                booster::FMatrixS::Image train_image((*train_).data);
-			}
+                Predict(preds,*train_,0);
                Gradient(preds,(*train_).labels,grad,hess);
                base_model.DoBoost(grad,hess,train_image,root_index);
                int buffer_index_offset = (*train_).size();
                float loss = 0.0;
                for(int i = 0; i < evals_.size();i++){
                    Predict(preds, *evals_[i], buffer_index_offset);
                    loss = mparam.Loss(preds,(*evals_[i]).labels);
                    if(!silent){
                        printf("The loss of %s data set in %d the \
                               iteration is %f",evname_[i].c_str(),&iteration,&loss);
                    }
                    buffer_index_offset += (*evals_[i]).size();
                }
-		private:
+            }
 			/*! \brief get the first order and second order gradient, given the transformed predictions and labels*/
 			inline void Gradient(const std::vector<float> &preds, const std::vector<float> &labels, std::vector<float> &grad,
 				std::vector<float> &hess){
 					grad.clear(); 
 					hess.clear();
 					for(int j = 0; j < preds.size(); j++){
 						grad.push_back(mparam.FirstOrderGradient(preds[j],labels[j]));
 						hess.push_back(mparam.SecondOrderGradient(preds[j],labels[j]));
 					}
 			}
-			enum LOSS_TYPE_LIST{
+            /*! \brief get the transformed predictions, given data */
-				LINEAR_SQUARE,
+            inline void Predict( std::vector<float> &preds, const DMatrix &data,int buffer_index_offset = 0 ){
-				LOGISTIC_NEGLOGLIKELIHOOD,
+                int data_size = data.size();
-			};
+                preds.resize(data_size);
                for(int j = 0; j < data_size; j++){
                    preds[j] = mparam.PredTransform(mparam.base_score + 
                        base_model.Predict(data.data[j],buffer_index_offset + j));
                }
            }
-			/*! \brief training parameter for regression */
+        private:
-			struct ModelParam{
+            /*! \brief get the first order and second order gradient, given the transformed predictions and labels*/
-				/* \brief global bias */
+            inline void Gradient(const std::vector<float> &preds, const std::vector<float> &labels, std::vector<float> &grad,
-				float base_score;
+                std::vector<float> &hess){
-				/* \brief type of loss function */
+                    grad.clear(); 
-				int loss_type;
+                    hess.clear();
-				
+                    for(int j = 0; j < preds.size(); j++){
-				ModelParam( void ){
+                        grad.push_back(mparam.FirstOrderGradient(preds[j],labels[j]));
-					base_score = 0.5f;
+                        hess.push_back(mparam.SecondOrderGradient(preds[j],labels[j]));
-					loss_type  = 0;
+                    }
-				}
+            }
 				/*! 
 				* \brief set parameters from outside 
 				* \param name name of the parameter
 				* \param val  value of the parameter
 				*/
 				inline void SetParam( const char *name, const char *val ){
 					if( !strcmp("base_score", name ) )  base_score = (float)atof( val );
 					if( !strcmp("loss_type", name ) )   loss_type = atoi( val );
 				}
 				/*! 
 				* \brief adjust base_score
 				*/                
 				inline void AdjustBase( void ){
 					if( loss_type == 1 ){
 						utils::Assert( base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain" );
 						base_score = - logf( 1.0f / base_score - 1.0f );
 					}
 				}
 				/*! 
 				* \brief calculate first order gradient of loss, given transformed prediction
 				* \param predt transformed prediction
 				* \param label true label
 				* \return first order gradient
 				*/
 				inline float FirstOrderGradient( float predt, float label ) const{
 					switch( loss_type ){                        
 					case LINEAR_SQUARE: return predt - label;
 					case 1: return predt - label;
 					default: utils::Error("unknown loss_type"); return 0.0f;
 					}
 				}
 				/*! 
 				* \brief calculate second order gradient of loss, given transformed prediction
 				* \param predt transformed prediction
 				* \param label true label
 				* \return second order gradient
 				*/
 				inline float SecondOrderGradient( float predt, float label ) const{
 					switch( loss_type ){                        
 					case LINEAR_SQUARE: return 1.0f;
 					case LOGISTIC_NEGLOGLIKELIHOOD: return predt * ( 1 - predt );
 					default: utils::Error("unknown loss_type"); return 0.0f;
 					}
 				}
-				/*!
+            enum LOSS_TYPE_LIST{
-				* \brief calculating the loss, given the predictions, labels and the loss type
+                LINEAR_SQUARE,
-				* \param preds the given predictions
+                LOGISTIC_NEGLOGLIKELIHOOD,
-				* \param labels the given labels
+            };
 				* \return the specified loss
 				*/
 				inline float Loss(const std::vector<float> &preds, const std::vector<float> &labels) const{
 					switch( loss_type ){
 					case LINEAR_SQUARE: return SquareLoss(preds,labels);
 					case LOGISTIC_NEGLOGLIKELIHOOD: return NegLoglikelihoodLoss(preds,labels);
 					default: utils::Error("unknown loss_type"); return 0.0f;
 					}
 				}
-				/*!
+            /*! \brief training parameter for regression */
-				* \brief calculating the square loss, given the predictions and labels
+            struct ModelParam{
-				* \param preds the given predictions
+                /* \brief global bias */
-				* \param labels the given labels
+                float base_score;
-				* \return the summation of square loss
+                /* \brief type of loss function */
-				*/
+                int loss_type;
 				inline float SquareLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
 					float ans = 0.0;
 					for(int i = 0; i < preds.size(); i++)
 						ans += pow(preds[i] - labels[i], 2);
 					return ans;
 				}
-				/*!
+                ModelParam( void ){
-				* \brief calculating the square loss, given the predictions and labels
+                    base_score = 0.5f;
-				* \param preds the given predictions
+                    loss_type  = 0;
-				* \param labels the given labels
+                }
-				* \return the summation of square loss
+                /*! 
-				*/
+                * \brief set parameters from outside 
-				inline float NegLoglikelihoodLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
+                * \param name name of the parameter
-					float ans = 0.0;
+                * \param val  value of the parameter
-					for(int i = 0; i < preds.size(); i++)
+                */
-						ans -= labels[i] * log(preds[i]) + ( 1 - labels[i] ) * log(1 - preds[i]);
+                inline void SetParam( const char *name, const char *val ){
-					return ans;
+                    if( !strcmp("base_score", name ) )  base_score = (float)atof( val );
-				}
+                    if( !strcmp("loss_type", name ) )   loss_type = atoi( val );
                }
                /*! 
                * \brief adjust base_score
                */                
                inline void AdjustBase( void ){
                    if( loss_type == 1 ){
                        utils::Assert( base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain" );
                        base_score = - logf( 1.0f / base_score - 1.0f );
                    }
                }
                /*! 
                * \brief calculate first order gradient of loss, given transformed prediction
                * \param predt transformed prediction
                * \param label true label
                * \return first order gradient
                */
                inline float FirstOrderGradient( float predt, float label ) const{
                    switch( loss_type ){                        
                    case LINEAR_SQUARE: return predt - label;
                    case 1: return predt - label;
                    default: utils::Error("unknown loss_type"); return 0.0f;
                    }
                }
                /*! 
                * \brief calculate second order gradient of loss, given transformed prediction
                * \param predt transformed prediction
                * \param label true label
                * \return second order gradient
                */
                inline float SecondOrderGradient( float predt, float label ) const{
                    switch( loss_type ){                        
                    case LINEAR_SQUARE: return 1.0f;
                    case LOGISTIC_NEGLOGLIKELIHOOD: return predt * ( 1 - predt );
                    default: utils::Error("unknown loss_type"); return 0.0f;
                    }
                }
-				
+                /*!
-				/*! 
+                * \brief calculating the loss, given the predictions, labels and the loss type
-				* \brief transform the linear sum to prediction 
+                * \param preds the given predictions
-				* \param x linear sum of boosting ensemble
+                * \param labels the given labels
-				* \return transformed prediction
+                * \return the specified loss
-				*/
+                */
-				inline float PredTransform( float x ){
+                inline float Loss(const std::vector<float> &preds, const std::vector<float> &labels) const{
-					switch( loss_type ){                        
+                    switch( loss_type ){
-					case LINEAR_SQUARE: return x;
+                    case LINEAR_SQUARE: return SquareLoss(preds,labels);
-					case LOGISTIC_NEGLOGLIKELIHOOD: return 1.0f/(1.0f + expf(-x));
+                    case LOGISTIC_NEGLOGLIKELIHOOD: return NegLoglikelihoodLoss(preds,labels);
-					default: utils::Error("unknown loss_type"); return 0.0f;
+                    default: utils::Error("unknown loss_type"); return 0.0f;
-					}
+                    }
-				}
+                }
-				
+                /*!
-			};            
+                * \brief calculating the square loss, given the predictions and labels
-		private:            
+                * \param preds the given predictions
-			booster::GBMBaseModel base_model;
+                * \param labels the given labels
-			ModelParam   mparam;
+                * \return the summation of square loss
-			const DMatrix *train_;
+                */
-			std::vector<const DMatrix *> evals_;
+                inline float SquareLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
-			std::vector<std::string> evname_;
+                    float ans = 0.0;
-			bool silent;
+                    for(int i = 0; i < preds.size(); i++)
-		};
+                        ans += pow(preds[i] - labels[i], 2);
-	}
+                    return ans;
                }
                /*!
                * \brief calculating the square loss, given the predictions and labels
                * \param preds the given predictions
                * \param labels the given labels
                * \return the summation of square loss
                */
                inline float NegLoglikelihoodLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
                    float ans = 0.0;
                    for(int i = 0; i < preds.size(); i++)
                        ans -= labels[i] * log(preds[i]) + ( 1 - labels[i] ) * log(1 - preds[i]);
                    return ans;
                }
                /*! 
                * \brief transform the linear sum to prediction 
                * \param x linear sum of boosting ensemble
                * \return transformed prediction
                */
                inline float PredTransform( float x ){
                    switch( loss_type ){                        
                    case LINEAR_SQUARE: return x;
                    case LOGISTIC_NEGLOGLIKELIHOOD: return 1.0f/(1.0f + expf(-x));
                    default: utils::Error("unknown loss_type"); return 0.0f;
                    }
                }
            };            
        private:            
            booster::GBMBaseModel base_model;
            ModelParam   mparam;
            const DMatrix *train_;
            std::vector<const DMatrix *> evals_;
            std::vector<std::string> evname_;
            bool silent;
        };
    }
 };
 #endif
--- a/regression/xgboost_reg_main.cpp
+++ b/regression/xgboost_reg_main.cpp
@ -3,13 +3,13 @@
 using namespace xgboost::regression;
 int main(int argc, char *argv[]){
-//	char* config_path = argv[1];
+    //char* config_path = argv[1];
-//	bool silent = ( atoi(argv[2]) == 1 );
+    //bool silent = ( atoi(argv[2]) == 1 );
-	char* config_path = "c:\\cygwin64\\home\\chen\\github\\xgboost\\demo\\regression\\reg.conf";
+    char* config_path = "c:\\cygwin64\\home\\chen\\github\\xgboost\\demo\\regression\\reg.conf";
-	bool silent = false;
+    bool silent = false;
-	RegBoostTrain train;
+    RegBoostTrain train;
-	train.train(config_path,false);
+    train.train(config_path,false);
-	RegBoostTest test;
+    RegBoostTest test;
-	test.test(config_path,false);
+    test.test(config_path,false);
 }
--- a/regression/xgboost_reg_test.h
+++ b/regression/xgboost_reg_test.h
@ -11,89 +11,89 @@
 using namespace xgboost::utils;
 namespace xgboost{
-	namespace regression{
+    namespace regression{
-		/*!
+        /*!
-		* \brief wrapping the testing process of the gradient 
+        * \brief wrapping the testing process of the gradient 
-		         boosting regression model,given the configuation
+        boosting regression model,given the configuation
-		* \author Kailong Chen: chenkl198812@gmail.com
+        * \author Kailong Chen: chenkl198812@gmail.com
-		*/
+        */
-		class RegBoostTest{
+        class RegBoostTest{
-		public:
+        public:
-			/*!
+            /*!
-			* \brief to start the testing process of gradient boosting regression
+            * \brief to start the testing process of gradient boosting regression
-			*        model given the configuation, and finally save the prediction
+            *        model given the configuation, and finally save the prediction
-			*        results to the specified paths.
+            *        results to the specified paths.
-			* \param config_path the location of the configuration
+            * \param config_path the location of the configuration
-			* \param silent whether to print feedback messages
+            * \param silent whether to print feedback messages
-			*/
+            */
-			void test(char* config_path,bool silent = false){
+            void test(char* config_path,bool silent = false){
-				reg_boost_learner = new xgboost::regression::RegBoostLearner(silent);
+                reg_boost_learner = new xgboost::regression::RegBoostLearner(silent);
-				ConfigIterator config_itr(config_path);
+                ConfigIterator config_itr(config_path);
-				//Get the training data and validation data paths, config the Learner
+                //Get the training data and validation data paths, config the Learner
-				while (config_itr.Next()){
+                while (config_itr.Next()){
-					reg_boost_learner->SetParam(config_itr.name(),config_itr.val());
+                    reg_boost_learner->SetParam(config_itr.name(),config_itr.val());
-					test_param.SetParam(config_itr.name(),config_itr.val());
+                    test_param.SetParam(config_itr.name(),config_itr.val());
-				}
+                }
-				Assert(test_param.test_paths.size() == test_param.test_names.size(),
+                Assert(test_param.test_paths.size() == test_param.test_names.size(),
-					"The number of test data set paths is not the same as the number of test data set data set names");
+                    "The number of test data set paths is not the same as the number of test data set data set names");
-				//begin testing
+                //begin testing
-				reg_boost_learner->InitModel();
+                reg_boost_learner->InitModel();
-				char model_path[256];
+                char model_path[256];
-				std::vector<float> preds;
+                std::vector<float> preds;
-				for(int i = 0; i < test_param.test_paths.size(); i++){
+                for(int i = 0; i < test_param.test_paths.size(); i++){
-					xgboost::regression::DMatrix test_data;
+                    xgboost::regression::DMatrix test_data;
-					test_data.LoadText(test_param.test_paths[i].c_str());
+                    test_data.LoadText(test_param.test_paths[i].c_str());
-					sprintf(model_path,"%s/final.model",test_param.model_dir_path);
+                    sprintf(model_path,"%s/final.model",test_param.model_dir_path);
-					FileStream fin(fopen(model_path,"r"));
+                    FileStream fin(fopen(model_path,"r"));
-					reg_boost_learner->LoadModel(fin);
+                    reg_boost_learner->LoadModel(fin);
-					fin.Close();
+                    fin.Close();
-					reg_boost_learner->Predict(preds,test_data);
+                    reg_boost_learner->Predict(preds,test_data);
-				}
+                }
-			}
+            }
-		private:
+        private:
-			struct TestParam{
+            struct TestParam{
-				/* \brief upperbound of the number of boosters */
+                /* \brief upperbound of the number of boosters */
-				int boost_iterations;
+                int boost_iterations;
-				/* \brief the period to save the model, -1 means only save the final round model */
+                /* \brief the period to save the model, -1 means only save the final round model */
-				int save_period;
+                int save_period;
-				/* \brief the path of directory containing the saved models */
+                /* \brief the path of directory containing the saved models */
-				char model_dir_path[256];
+                char model_dir_path[256];
-				/* \brief the path of directory containing the output prediction results */
+                /* \brief the path of directory containing the output prediction results */
-				char pred_dir_path[256];
+                char pred_dir_path[256];
-				/* \brief the paths of test data sets */
+                /* \brief the paths of test data sets */
-				std::vector<std::string> test_paths;
+                std::vector<std::string> test_paths;
-				/* \brief the names of the test data sets */
+                /* \brief the names of the test data sets */
-				std::vector<std::string> test_names;
+                std::vector<std::string> test_names;
-				/*! 
+                /*! 
-				* \brief set parameters from outside 
+                * \brief set parameters from outside 
-				* \param name name of the parameter
+                * \param name name of the parameter
-				* \param val  value of the parameter
+                * \param val  value of the parameter
-				*/
+                */
-				inline void SetParam(const char *name,const char *val ){
+                inline void SetParam(const char *name,const char *val ){
-					if( !strcmp("model_dir_path", name ) ) strcpy(model_dir_path,val);
+                    if( !strcmp("model_dir_path", name ) ) strcpy(model_dir_path,val);
-					if( !strcmp("pred_dir_path", name ) ) strcpy(pred_dir_path,val);
+                    if( !strcmp("pred_dir_path", name ) ) strcpy(pred_dir_path,val);
-					if( !strcmp("test_paths",  name) ) {
+                    if( !strcmp("test_paths",  name) ) {
-						test_paths = StringProcessing::split(val,';');
+                        test_paths = StringProcessing::split(val,';');
-					}
+                    }
-					if( !strcmp("test_names",  name) ) {
+                    if( !strcmp("test_names",  name) ) {
-						test_names = StringProcessing::split(val,';');
+                        test_names = StringProcessing::split(val,';');
-					}
+                    }
-				}
+                }
-			};
+            };
-			TestParam test_param;
+            TestParam test_param;
-			xgboost::regression::RegBoostLearner* reg_boost_learner;
+            xgboost::regression::RegBoostLearner* reg_boost_learner;
-		};
+        };
-	}
+    }
 }
 #endif
--- a/regression/xgboost_reg_train.h
+++ b/regression/xgboost_reg_train.h
@ -12,120 +12,120 @@
 using namespace xgboost::utils;
 namespace xgboost{
-	namespace regression{
+    namespace regression{
-		/*!
+        /*!
-		* \brief wrapping the training process of the gradient 
+        * \brief wrapping the training process of the gradient 
-		         boosting regression model,given the configuation
+        boosting regression model,given the configuation
-		* \author Kailong Chen: chenkl198812@gmail.com
+        * \author Kailong Chen: chenkl198812@gmail.com
-		*/
+        */
-		class RegBoostTrain{
+        class RegBoostTrain{
-		public:
+        public:
-			/*!
+            /*!
-			* \brief to start the training process of gradient boosting regression
+            * \brief to start the training process of gradient boosting regression
-			*        model given the configuation, and finally saved the models
+            *        model given the configuation, and finally saved the models
-			*        to the specified model directory
+            *        to the specified model directory
-			* \param config_path the location of the configuration
+            * \param config_path the location of the configuration
-			* \param silent whether to print feedback messages
+            * \param silent whether to print feedback messages
-			*/
+            */
-			void train(char* config_path,bool silent = false){
+            void train(char* config_path,bool silent = false){
-				reg_boost_learner = new xgboost::regression::RegBoostLearner(silent);
+                reg_boost_learner = new xgboost::regression::RegBoostLearner(silent);
-				ConfigIterator config_itr(config_path);
+                ConfigIterator config_itr(config_path);
-				//Get the training data and validation data paths, config the Learner
+                //Get the training data and validation data paths, config the Learner
-				while (config_itr.Next()){
+                while (config_itr.Next()){
- 					printf("%s %s\n",config_itr.name(),config_itr.val());
+                    printf("%s %s\n",config_itr.name(),config_itr.val());
-					reg_boost_learner->SetParam(config_itr.name(),config_itr.val());
+                    reg_boost_learner->SetParam(config_itr.name(),config_itr.val());
-					train_param.SetParam(config_itr.name(),config_itr.val());
+                    train_param.SetParam(config_itr.name(),config_itr.val());
-				}
+                }
-				Assert(train_param.validation_data_paths.size() == train_param.validation_data_names.size(),
+                Assert(train_param.validation_data_paths.size() == train_param.validation_data_names.size(),
-					"The number of validation paths is not the same as the number of validation data set names");
+                    "The number of validation paths is not the same as the number of validation data set names");
-				//Load Data
+                //Load Data
-				xgboost::regression::DMatrix train;
+                xgboost::regression::DMatrix train;
-				printf("%s",train_param.train_path);
+                printf("%s",train_param.train_path);
-				train.LoadText(train_param.train_path);
+                train.LoadText(train_param.train_path);
-				std::vector<const xgboost::regression::DMatrix*> evals;
+                std::vector<const xgboost::regression::DMatrix*> evals;
-				for(int i = 0; i < train_param.validation_data_paths.size(); i++){
+                for(int i = 0; i < train_param.validation_data_paths.size(); i++){
-					xgboost::regression::DMatrix eval;
+                    xgboost::regression::DMatrix eval;
-					eval.LoadText(train_param.validation_data_paths[i].c_str());
+                    eval.LoadText(train_param.validation_data_paths[i].c_str());
-					evals.push_back(&eval);
+                    evals.push_back(&eval);
-				}
+                }
-				reg_boost_learner->SetData(&train,evals,train_param.validation_data_names);
+                reg_boost_learner->SetData(&train,evals,train_param.validation_data_names);
-				//begin training
+                //begin training
-				reg_boost_learner->InitTrainer();
+                reg_boost_learner->InitTrainer();
-				char suffix[256];
+                char suffix[256];
-				for(int i = 1; i <= train_param.boost_iterations; i++){
+                for(int i = 1; i <= train_param.boost_iterations; i++){
-					reg_boost_learner->UpdateOneIter(i);
+                    reg_boost_learner->UpdateOneIter(i);
-					if(train_param.save_period != 0 && i % train_param.save_period == 0){
+                    if(train_param.save_period != 0 && i % train_param.save_period == 0){
-						sscanf(suffix,"%d.model",i);
+                        sscanf(suffix,"%d.model",i);
-						SaveModel(suffix);
+                        SaveModel(suffix);
-					}
+                    }
-				}
+                }
-				//save the final round model
+                //save the final round model
-				SaveModel("final.model");
+                SaveModel("final.model");
-			}
+            }
-		private:
+        private:
-			/*! \brief save model in the model directory with specified suffix*/
+            /*! \brief save model in the model directory with specified suffix*/
-			void SaveModel(const char* suffix){
+            void SaveModel(const char* suffix){
-				char model_path[256];
+                char model_path[256];
-				//save the final round model
+                //save the final round model
-				sprintf(model_path,"%s/%s",train_param.model_dir_path,suffix);
+                sprintf(model_path,"%s/%s",train_param.model_dir_path,suffix);
-				FILE* file = fopen(model_path,"w");
+                FILE* file = fopen(model_path,"w");
-				FileStream fin(file);
+                FileStream fin(file);
-				reg_boost_learner->SaveModel(fin);
+                reg_boost_learner->SaveModel(fin);
-				fin.Close();
+                fin.Close();
-			}
+            }
-			struct TrainParam{
+            struct TrainParam{
-				/* \brief upperbound of the number of boosters */
+                /* \brief upperbound of the number of boosters */
-				int boost_iterations;
+                int boost_iterations;
-				/* \brief the period to save the model, -1 means only save the final round model */
+                /* \brief the period to save the model, -1 means only save the final round model */
-				int save_period;
+                int save_period;
-				/* \brief the path of training data set */
+                /* \brief the path of training data set */
-				char train_path[256];
+                char train_path[256];
-				/* \brief the path of directory containing the saved models */
+                /* \brief the path of directory containing the saved models */
-				char model_dir_path[256];
+                char model_dir_path[256];
-				/* \brief the paths of validation data sets */
+                /* \brief the paths of validation data sets */
-				std::vector<std::string> validation_data_paths;
+                std::vector<std::string> validation_data_paths;
-				/* \brief the names of the validation data sets */
+                /* \brief the names of the validation data sets */
-				std::vector<std::string> validation_data_names;
+                std::vector<std::string> validation_data_names;
-				/*! 
+                /*! 
-				* \brief set parameters from outside 
+                * \brief set parameters from outside 
-				* \param name name of the parameter
+                * \param name name of the parameter
-				* \param val  value of the parameter
+                * \param val  value of the parameter
-				*/
+                */
-				inline void SetParam(const char *name,const char *val ){
+                inline void SetParam(const char *name,const char *val ){
-					if( !strcmp("boost_iterations", name ) )  boost_iterations = atoi( val );
+                    if( !strcmp("boost_iterations", name ) )  boost_iterations = atoi( val );
-					if( !strcmp("save_period", name ) )   save_period = atoi( val );
+                    if( !strcmp("save_period", name ) )   save_period = atoi( val );
-					if( !strcmp("train_path",  name ) ) strcpy(train_path,val);
+                    if( !strcmp("train_path",  name ) ) strcpy(train_path,val);
-					if( !strcmp("model_dir_path", name ) ) {
+                    if( !strcmp("model_dir_path", name ) ) {
-						strcpy(model_dir_path,val);
+                        strcpy(model_dir_path,val);
-					}
+                    }
-					if( !strcmp("validation_paths",  name) ) {
+                    if( !strcmp("validation_paths",  name) ) {
-						validation_data_paths = StringProcessing::split(val,';');
+                        validation_data_paths = StringProcessing::split(val,';');
-					}
+                    }
-					if( !strcmp("validation_names",  name) ) {
+                    if( !strcmp("validation_names",  name) ) {
-						validation_data_names = StringProcessing::split(val,';');
+                        validation_data_names = StringProcessing::split(val,';');
-					}
+                    }
-				}
+                }
-			};
+            };
-			/*! \brief the parameters of the training process*/
+            /*! \brief the parameters of the training process*/
-			TrainParam train_param;
+            TrainParam train_param;
-			
+
-			/*! \brief the gradient boosting regression tree model*/
+            /*! \brief the gradient boosting regression tree model*/
-			xgboost::regression::RegBoostLearner* reg_boost_learner;
+            xgboost::regression::RegBoostLearner* reg_boost_learner;
-		};
+        };
-	}
+    }
 }
 #endif
--- a/regression/xgboost_regdata.h
+++ b/regression/xgboost_regdata.h
@ -2,14 +2,14 @@
 #define _XGBOOST_REGDATA_H_
 /*!
- * \file xgboost_regdata.h
+* \file xgboost_regdata.h
- * \brief input data structure for regression and binary classification task.
+* \brief input data structure for regression and binary classification task.
- *     Format:
+*     Format:
- *        The data should contain each data instance in each line.
+*        The data should contain each data instance in each line.
- *		  The format of line data is as below:
+*		  The format of line data is as below:
- *        label <nonzero feature dimension> [feature index:feature value]+
+*        label <nonzero feature dimension> [feature index:feature value]+
- * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
+* \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
- */
+*/
 #include <cstdio>
 #include <vector>
 #include "../booster/xgboost_data.h"
@ -31,17 +31,17 @@ namespace xgboost{
            /*! \brief default constructor */
            DMatrix( void ){}
-			
+
-			/*! \brief get the number of instances */
+            /*! \brief get the number of instances */
-			inline int size() const{
+            inline int size() const{
-				return labels.size();
+                return labels.size();
-			}
+            }
            /*! 
-             * \brief load from text file 
+            * \brief load from text file 
-             * \param fname name of text data
+            * \param fname name of text data
-             * \param silent whether print information or not
+            * \param silent whether print information or not
-             */            
+            */            
            inline void LoadText( const char* fname, bool silent = false ){
                data.Clear();
                FILE* file = utils::FopenCheck( fname, "r" );
@ -49,7 +49,7 @@ namespace xgboost{
                char tmp[ 1024 ];
                std::vector<booster::bst_uint> findex;
                std::vector<booster::bst_float> fvalue;
-                
+
                while( fscanf( file, "%s", tmp ) == 1 ){
                    unsigned index; float value;
                    if( sscanf( tmp, "%u:%f", &index, &value ) == 2 ){
@ -64,23 +64,23 @@ namespace xgboost{
                        init = false;
                    }
                }
-                
+
-				labels.push_back( label );
+                labels.push_back( label );
                data.AddRow( findex, fvalue );
-                
+
                this->UpdateInfo();
                if( !silent ){
                    printf("%ux%u matrix with %lu entries is loaded from %s\n", 
-                           (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
+                        (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
                }
                fclose(file);
            }
            /*! 
-             * \brief load from binary file 
+            * \brief load from binary file 
-             * \param fname name of binary data
+            * \param fname name of binary data
-             * \param silent whether print information or not
+            * \param silent whether print information or not
-             * \return whether loading is success
+            * \return whether loading is success
-             */
+            */
            inline bool LoadBinary( const char* fname, bool silent = false ){
                FILE *fp = fopen64( fname, "rb" );
                if( fp == NULL ) return false;                
@ -92,15 +92,15 @@ namespace xgboost{
                this->UpdateInfo();
                if( !silent ){
                    printf("%ux%u matrix with %lu entries is loaded from %s\n", 
-                           (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
+                        (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
                }
                return true;
            }
            /*! 
-             * \brief save to binary file
+            * \brief save to binary file
-             * \param fname name of binary data
+            * \param fname name of binary data
-             * \param silent whether print information or not
+            * \param silent whether print information or not
-             */
+            */
            inline void SaveBinary( const char* fname, bool silent = false ){
                utils::FileStream fs( utils::FopenCheck( fname, "wb" ) );
                data.SaveBinary( fs );
@ -108,17 +108,17 @@ namespace xgboost{
                fs.Close();
                if( !silent ){
                    printf("%ux%u matrix with %lu entries is saved to %s\n", 
-                           (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
+                        (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
                }
            }
            /*! 
-             * \brief cache load data given a file name, the function will first check if fname + '.xgbuffer' exists,
+            * \brief cache load data given a file name, the function will first check if fname + '.xgbuffer' exists,
-             *        if binary buffer exists, it will reads from binary buffer, otherwise, it will load from text file,
+            *        if binary buffer exists, it will reads from binary buffer, otherwise, it will load from text file,
-             *        and try to create a buffer file 
+            *        and try to create a buffer file 
-             * \param fname name of binary data
+            * \param fname name of binary data
-             * \param silent whether print information or not
+            * \param silent whether print information or not
-             * \return whether loading is success
+            * \return whether loading is success
-             */            
+            */            
            inline void CacheLoad( const char *fname, bool silent = false ){
                char bname[ 1024 ];
                sprintf( bname, "%s.buffer", fname );