tab eliminated

2014-02-19 13:25:01 +08:00 · 2014-02-19 13:25:01 +08:00 · a0dddaf224
commit a0dddaf224
parent a20b1d1866
6 changed files with 468 additions and 470 deletions
--- a/demo/regression/reg.conf
+++ b/demo/regression/reg.conf
@ -18,8 +18,6 @@ booster_type=1

 do_reboost=0

-bst:num_roots=0
-
 bst:num_feature=3

 learning_rate=0.01
--- a/regression/xgboost_reg.h
+++ b/regression/xgboost_reg.h
@ -12,267 +12,267 @@
 #include "../utils/xgboost_stream.h"

 namespace xgboost{
-	namespace regression{
-		/*! \brief class for gradient boosted regression */
-		class RegBoostLearner{            
-		public:
+    namespace regression{
+        /*! \brief class for gradient boosted regression */
+        class RegBoostLearner{            
+        public:

-			RegBoostLearner(bool silent = false){
-				this->silent = silent;
-			}
+            RegBoostLearner(bool silent = false){
+                this->silent = silent;
+            }

-			/*! 
-			* \brief a regression booter associated with training and evaluating data 
-			* \param train pointer to the training data
-			* \param evals array of evaluating data
-			* \param evname name of evaluation data, used print statistics
-			*/
-			RegBoostLearner( const DMatrix *train,
-				std::vector<const DMatrix *> evals,
-				std::vector<std::string> evname, bool silent = false ){
-					this->silent = silent;
-					SetData(train,evals,evname);
-			}
+            /*! 
+            * \brief a regression booter associated with training and evaluating data 
+            * \param train pointer to the training data
+            * \param evals array of evaluating data
+            * \param evname name of evaluation data, used print statistics
+            */
+            RegBoostLearner( const DMatrix *train,
+                std::vector<const DMatrix *> evals,
+                std::vector<std::string> evname, bool silent = false ){
+                    this->silent = silent;
+                    SetData(train,evals,evname);
+            }

-			/*! 
-			* \brief associate regression booster with training and evaluating data 
-			* \param train pointer to the training data
-			* \param evals array of evaluating data
-			* \param evname name of evaluation data, used print statistics
-			*/
-			inline void SetData(const DMatrix *train,
-				std::vector<const DMatrix *> evals,
-				std::vector<std::string> evname){
-					this->train_ = train;
-					this->evals_ = evals;
-					this->evname_ = evname; 
-					//assign buffer index
-					int buffer_size = (*train).size();
-					for(int i = 0; i < evals.size(); i++){
-						buffer_size += (*evals[i]).size();
-					}
-					char str[25];
-					_itoa(buffer_size,str,10);
-					base_model.SetParam("num_pbuffer",str);
-					base_model.SetParam("num_pbuffer",str);
-			}
+            /*! 
+            * \brief associate regression booster with training and evaluating data 
+            * \param train pointer to the training data
+            * \param evals array of evaluating data
+            * \param evname name of evaluation data, used print statistics
+            */
+            inline void SetData(const DMatrix *train,
+                std::vector<const DMatrix *> evals,
+                std::vector<std::string> evname){
+                    this->train_ = train;
+                    this->evals_ = evals;
+                    this->evname_ = evname; 
+                    //assign buffer index
+                    int buffer_size = (*train).size();
+                    for(int i = 0; i < evals.size(); i++){
+                        buffer_size += (*evals[i]).size();
+                    }
+                    char str[25];
+                    _itoa(buffer_size,str,10);
+                    base_model.SetParam("num_pbuffer",str);
+                    base_model.SetParam("num_pbuffer",str);
+            }

-			/*! 
-			* \brief set parameters from outside 
-			* \param name name of the parameter
-			* \param val  value of the parameter
-			*/
-			inline void SetParam( const char *name, const char *val ){
-				mparam.SetParam( name, val );
-				base_model.SetParam( name, val );
-			}
-			/*!
-			* \brief initialize solver before training, called before training
-			* this function is reserved for solver to allocate necessary space and do other preparation 
-			*/
-			inline void InitTrainer( void ){
-				base_model.InitTrainer();
-				InitModel();
-				mparam.AdjustBase();
-			} 
+            /*! 
+            * \brief set parameters from outside 
+            * \param name name of the parameter
+            * \param val  value of the parameter
+            */
+            inline void SetParam( const char *name, const char *val ){
+                mparam.SetParam( name, val );
+                base_model.SetParam( name, val );
+            }
+            /*!
+            * \brief initialize solver before training, called before training
+            * this function is reserved for solver to allocate necessary space and do other preparation 
+            */
+            inline void InitTrainer( void ){
+                base_model.InitTrainer();
+                InitModel();
+                mparam.AdjustBase();
+            } 

-			 /*!
-             * \brief initialize the current data storage for model, if the model is used first time, call this function
-             */
-			inline void InitModel( void ){
-				base_model.InitModel();
-			} 
+            /*!
+            * \brief initialize the current data storage for model, if the model is used first time, call this function
+            */
+            inline void InitModel( void ){
+                base_model.InitModel();
+            } 

-			/*! 
-			* \brief load model from stream
-			* \param fi input stream
-			*/          
-			inline void LoadModel( utils::IStream &fi ){
-				utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 );
-				base_model.LoadModel( fi );
-			}
-			/*! 
-			* \brief save model to stream
-			* \param fo output stream
-			*/
-			inline void SaveModel( utils::IStream &fo ) const{
-				fo.Write( &mparam, sizeof(ModelParam) );
-				base_model.SaveModel( fo );	
-			} 
+            /*! 
+            * \brief load model from stream
+            * \param fi input stream
+            */          
+            inline void LoadModel( utils::IStream &fi ){
+                utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 );
+                base_model.LoadModel( fi );
+            }
+            /*! 
+            * \brief save model to stream
+            * \param fo output stream
+            */
+            inline void SaveModel( utils::IStream &fo ) const{
+                fo.Write( &mparam, sizeof(ModelParam) );
+                base_model.SaveModel( fo );	
+            } 

-			/*! 
-			* \brief update the model for one iteration
-			* \param iteration the number of updating iteration 
-			*/           
-			inline void UpdateOneIter( int iteration ){
-				std::vector<float> grad,hess,preds;
-				std::vector<unsigned> root_index;
-				booster::FMatrixS::Image train_image((*train_).data);
-				Predict(preds,*train_,0);
-				Gradient(preds,(*train_).labels,grad,hess);
-				base_model.DoBoost(grad,hess,train_image,root_index);
-				int buffer_index_offset = (*train_).size();
-				float loss = 0.0;
-				for(int i = 0; i < evals_.size();i++){
-					Predict(preds, *evals_[i], buffer_index_offset);
-					loss = mparam.Loss(preds,(*evals_[i]).labels);
-					if(!silent){
-						printf("The loss of %s data set in %d the \
-							iteration is %f",evname_[i].c_str(),&iteration,&loss);
-					}
-					buffer_index_offset += (*evals_[i]).size();
-				}
+            /*! 
+            * \brief update the model for one iteration
+            * \param iteration the number of updating iteration 
+            */           
+            inline void UpdateOneIter( int iteration ){
+                std::vector<float> grad,hess,preds;
+                std::vector<unsigned> root_index;
+                booster::FMatrixS::Image train_image((*train_).data);
+                Predict(preds,*train_,0);
+                Gradient(preds,(*train_).labels,grad,hess);
+                base_model.DoBoost(grad,hess,train_image,root_index);
+                int buffer_index_offset = (*train_).size();
+                float loss = 0.0;
+                for(int i = 0; i < evals_.size();i++){
+                    Predict(preds, *evals_[i], buffer_index_offset);
+                    loss = mparam.Loss(preds,(*evals_[i]).labels);
+                    if(!silent){
+                        printf("The loss of %s data set in %d the \
+                               iteration is %f",evname_[i].c_str(),&iteration,&loss);
+                    }
+                    buffer_index_offset += (*evals_[i]).size();
+                }

-			}
+            }

-			/*! \brief get the transformed predictions, given data */
-			inline void Predict( std::vector<float> &preds, const DMatrix &data,int buffer_index_offset = 0 ){
-				int data_size = data.size();
-				preds.resize(data_size);
-				for(int j = 0; j < data_size; j++){
-					preds[j] = mparam.PredTransform(mparam.base_score + 
-						base_model.Predict(data.data[j],buffer_index_offset + j));
-				}
-			}
+            /*! \brief get the transformed predictions, given data */
+            inline void Predict( std::vector<float> &preds, const DMatrix &data,int buffer_index_offset = 0 ){
+                int data_size = data.size();
+                preds.resize(data_size);
+                for(int j = 0; j < data_size; j++){
+                    preds[j] = mparam.PredTransform(mparam.base_score + 
+                        base_model.Predict(data.data[j],buffer_index_offset + j));
+                }
+            }

-		private:
-			/*! \brief get the first order and second order gradient, given the transformed predictions and labels*/
-			inline void Gradient(const std::vector<float> &preds, const std::vector<float> &labels, std::vector<float> &grad,
-				std::vector<float> &hess){
-					grad.clear(); 
-					hess.clear();
-					for(int j = 0; j < preds.size(); j++){
-						grad.push_back(mparam.FirstOrderGradient(preds[j],labels[j]));
-						hess.push_back(mparam.SecondOrderGradient(preds[j],labels[j]));
-					}
-			}
+        private:
+            /*! \brief get the first order and second order gradient, given the transformed predictions and labels*/
+            inline void Gradient(const std::vector<float> &preds, const std::vector<float> &labels, std::vector<float> &grad,
+                std::vector<float> &hess){
+                    grad.clear(); 
+                    hess.clear();
+                    for(int j = 0; j < preds.size(); j++){
+                        grad.push_back(mparam.FirstOrderGradient(preds[j],labels[j]));
+                        hess.push_back(mparam.SecondOrderGradient(preds[j],labels[j]));
+                    }
+            }

-			enum LOSS_TYPE_LIST{
-				LINEAR_SQUARE,
-				LOGISTIC_NEGLOGLIKELIHOOD,
-			};
+            enum LOSS_TYPE_LIST{
+                LINEAR_SQUARE,
+                LOGISTIC_NEGLOGLIKELIHOOD,
+            };

-			/*! \brief training parameter for regression */
-			struct ModelParam{
-				/* \brief global bias */
-				float base_score;
-				/* \brief type of loss function */
-				int loss_type;
+            /*! \brief training parameter for regression */
+            struct ModelParam{
+                /* \brief global bias */
+                float base_score;
+                /* \brief type of loss function */
+                int loss_type;

-				ModelParam( void ){
-					base_score = 0.5f;
-					loss_type  = 0;
-				}
-				/*! 
-				* \brief set parameters from outside 
-				* \param name name of the parameter
-				* \param val  value of the parameter
-				*/
-				inline void SetParam( const char *name, const char *val ){
-					if( !strcmp("base_score", name ) )  base_score = (float)atof( val );
-					if( !strcmp("loss_type", name ) )   loss_type = atoi( val );
-				}
-				/*! 
-				* \brief adjust base_score
-				*/                
-				inline void AdjustBase( void ){
-					if( loss_type == 1 ){
-						utils::Assert( base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain" );
-						base_score = - logf( 1.0f / base_score - 1.0f );
-					}
-				}
-				/*! 
-				* \brief calculate first order gradient of loss, given transformed prediction
-				* \param predt transformed prediction
-				* \param label true label
-				* \return first order gradient
-				*/
-				inline float FirstOrderGradient( float predt, float label ) const{
-					switch( loss_type ){                        
-					case LINEAR_SQUARE: return predt - label;
-					case 1: return predt - label;
-					default: utils::Error("unknown loss_type"); return 0.0f;
-					}
-				}
-				/*! 
-				* \brief calculate second order gradient of loss, given transformed prediction
-				* \param predt transformed prediction
-				* \param label true label
-				* \return second order gradient
-				*/
-				inline float SecondOrderGradient( float predt, float label ) const{
-					switch( loss_type ){                        
-					case LINEAR_SQUARE: return 1.0f;
-					case LOGISTIC_NEGLOGLIKELIHOOD: return predt * ( 1 - predt );
-					default: utils::Error("unknown loss_type"); return 0.0f;
-					}
-				}
+                ModelParam( void ){
+                    base_score = 0.5f;
+                    loss_type  = 0;
+                }
+                /*! 
+                * \brief set parameters from outside 
+                * \param name name of the parameter
+                * \param val  value of the parameter
+                */
+                inline void SetParam( const char *name, const char *val ){
+                    if( !strcmp("base_score", name ) )  base_score = (float)atof( val );
+                    if( !strcmp("loss_type", name ) )   loss_type = atoi( val );
+                }
+                /*! 
+                * \brief adjust base_score
+                */                
+                inline void AdjustBase( void ){
+                    if( loss_type == 1 ){
+                        utils::Assert( base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain" );
+                        base_score = - logf( 1.0f / base_score - 1.0f );
+                    }
+                }
+                /*! 
+                * \brief calculate first order gradient of loss, given transformed prediction
+                * \param predt transformed prediction
+                * \param label true label
+                * \return first order gradient
+                */
+                inline float FirstOrderGradient( float predt, float label ) const{
+                    switch( loss_type ){                        
+                    case LINEAR_SQUARE: return predt - label;
+                    case 1: return predt - label;
+                    default: utils::Error("unknown loss_type"); return 0.0f;
+                    }
+                }
+                /*! 
+                * \brief calculate second order gradient of loss, given transformed prediction
+                * \param predt transformed prediction
+                * \param label true label
+                * \return second order gradient
+                */
+                inline float SecondOrderGradient( float predt, float label ) const{
+                    switch( loss_type ){                        
+                    case LINEAR_SQUARE: return 1.0f;
+                    case LOGISTIC_NEGLOGLIKELIHOOD: return predt * ( 1 - predt );
+                    default: utils::Error("unknown loss_type"); return 0.0f;
+                    }
+                }

-				/*!
-				* \brief calculating the loss, given the predictions, labels and the loss type
-				* \param preds the given predictions
-				* \param labels the given labels
-				* \return the specified loss
-				*/
-				inline float Loss(const std::vector<float> &preds, const std::vector<float> &labels) const{
-					switch( loss_type ){
-					case LINEAR_SQUARE: return SquareLoss(preds,labels);
-					case LOGISTIC_NEGLOGLIKELIHOOD: return NegLoglikelihoodLoss(preds,labels);
-					default: utils::Error("unknown loss_type"); return 0.0f;
-					}
-				}
+                /*!
+                * \brief calculating the loss, given the predictions, labels and the loss type
+                * \param preds the given predictions
+                * \param labels the given labels
+                * \return the specified loss
+                */
+                inline float Loss(const std::vector<float> &preds, const std::vector<float> &labels) const{
+                    switch( loss_type ){
+                    case LINEAR_SQUARE: return SquareLoss(preds,labels);
+                    case LOGISTIC_NEGLOGLIKELIHOOD: return NegLoglikelihoodLoss(preds,labels);
+                    default: utils::Error("unknown loss_type"); return 0.0f;
+                    }
+                }

-				/*!
-				* \brief calculating the square loss, given the predictions and labels
-				* \param preds the given predictions
-				* \param labels the given labels
-				* \return the summation of square loss
-				*/
-				inline float SquareLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
-					float ans = 0.0;
-					for(int i = 0; i < preds.size(); i++)
-						ans += pow(preds[i] - labels[i], 2);
-					return ans;
-				}
+                /*!
+                * \brief calculating the square loss, given the predictions and labels
+                * \param preds the given predictions
+                * \param labels the given labels
+                * \return the summation of square loss
+                */
+                inline float SquareLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
+                    float ans = 0.0;
+                    for(int i = 0; i < preds.size(); i++)
+                        ans += pow(preds[i] - labels[i], 2);
+                    return ans;
+                }

-				/*!
-				* \brief calculating the square loss, given the predictions and labels
-				* \param preds the given predictions
-				* \param labels the given labels
-				* \return the summation of square loss
-				*/
-				inline float NegLoglikelihoodLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
-					float ans = 0.0;
-					for(int i = 0; i < preds.size(); i++)
-						ans -= labels[i] * log(preds[i]) + ( 1 - labels[i] ) * log(1 - preds[i]);
-					return ans;
-				}
+                /*!
+                * \brief calculating the square loss, given the predictions and labels
+                * \param preds the given predictions
+                * \param labels the given labels
+                * \return the summation of square loss
+                */
+                inline float NegLoglikelihoodLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
+                    float ans = 0.0;
+                    for(int i = 0; i < preds.size(); i++)
+                        ans -= labels[i] * log(preds[i]) + ( 1 - labels[i] ) * log(1 - preds[i]);
+                    return ans;
+                }


-				/*! 
-				* \brief transform the linear sum to prediction 
-				* \param x linear sum of boosting ensemble
-				* \return transformed prediction
-				*/
-				inline float PredTransform( float x ){
-					switch( loss_type ){                        
-					case LINEAR_SQUARE: return x;
-					case LOGISTIC_NEGLOGLIKELIHOOD: return 1.0f/(1.0f + expf(-x));
-					default: utils::Error("unknown loss_type"); return 0.0f;
-					}
-				}
+                /*! 
+                * \brief transform the linear sum to prediction 
+                * \param x linear sum of boosting ensemble
+                * \return transformed prediction
+                */
+                inline float PredTransform( float x ){
+                    switch( loss_type ){                        
+                    case LINEAR_SQUARE: return x;
+                    case LOGISTIC_NEGLOGLIKELIHOOD: return 1.0f/(1.0f + expf(-x));
+                    default: utils::Error("unknown loss_type"); return 0.0f;
+                    }
+                }


-			};            
-		private:            
-			booster::GBMBaseModel base_model;
-			ModelParam   mparam;
-			const DMatrix *train_;
-			std::vector<const DMatrix *> evals_;
-			std::vector<std::string> evname_;
-			bool silent;
-		};
-	}
+            };            
+        private:            
+            booster::GBMBaseModel base_model;
+            ModelParam   mparam;
+            const DMatrix *train_;
+            std::vector<const DMatrix *> evals_;
+            std::vector<std::string> evname_;
+            bool silent;
+        };
+    }
 };

 #endif
--- a/regression/xgboost_reg_main.cpp
+++ b/regression/xgboost_reg_main.cpp
@ -3,13 +3,13 @@
 using namespace xgboost::regression;

 int main(int argc, char *argv[]){
-//	char* config_path = argv[1];
-//	bool silent = ( atoi(argv[2]) == 1 );
-	char* config_path = "c:\\cygwin64\\home\\chen\\github\\xgboost\\demo\\regression\\reg.conf";
-	bool silent = false;
-	RegBoostTrain train;
-	train.train(config_path,false);
+    //char* config_path = argv[1];
+    //bool silent = ( atoi(argv[2]) == 1 );
+    char* config_path = "c:\\cygwin64\\home\\chen\\github\\xgboost\\demo\\regression\\reg.conf";
+    bool silent = false;
+    RegBoostTrain train;
+    train.train(config_path,false);

-	RegBoostTest test;
-	test.test(config_path,false);
+    RegBoostTest test;
+    test.test(config_path,false);
 }
--- a/regression/xgboost_reg_test.h
+++ b/regression/xgboost_reg_test.h
@ -11,89 +11,89 @@

 using namespace xgboost::utils;
 namespace xgboost{
-	namespace regression{
-		/*!
-		* \brief wrapping the testing process of the gradient 
-		         boosting regression model,given the configuation
-		* \author Kailong Chen: chenkl198812@gmail.com
-		*/
-		class RegBoostTest{
-		public:
-			/*!
-			* \brief to start the testing process of gradient boosting regression
-			*        model given the configuation, and finally save the prediction
-			*        results to the specified paths.
-			* \param config_path the location of the configuration
-			* \param silent whether to print feedback messages
-			*/
-			void test(char* config_path,bool silent = false){
-				reg_boost_learner = new xgboost::regression::RegBoostLearner(silent);
-				ConfigIterator config_itr(config_path);
-				//Get the training data and validation data paths, config the Learner
-				while (config_itr.Next()){
-					reg_boost_learner->SetParam(config_itr.name(),config_itr.val());
-					test_param.SetParam(config_itr.name(),config_itr.val());
-				}
+    namespace regression{
+        /*!
+        * \brief wrapping the testing process of the gradient 
+        boosting regression model,given the configuation
+        * \author Kailong Chen: chenkl198812@gmail.com
+        */
+        class RegBoostTest{
+        public:
+            /*!
+            * \brief to start the testing process of gradient boosting regression
+            *        model given the configuation, and finally save the prediction
+            *        results to the specified paths.
+            * \param config_path the location of the configuration
+            * \param silent whether to print feedback messages
+            */
+            void test(char* config_path,bool silent = false){
+                reg_boost_learner = new xgboost::regression::RegBoostLearner(silent);
+                ConfigIterator config_itr(config_path);
+                //Get the training data and validation data paths, config the Learner
+                while (config_itr.Next()){
+                    reg_boost_learner->SetParam(config_itr.name(),config_itr.val());
+                    test_param.SetParam(config_itr.name(),config_itr.val());
+                }

-				Assert(test_param.test_paths.size() == test_param.test_names.size(),
-					"The number of test data set paths is not the same as the number of test data set data set names");
+                Assert(test_param.test_paths.size() == test_param.test_names.size(),
+                    "The number of test data set paths is not the same as the number of test data set data set names");

-				//begin testing
-				reg_boost_learner->InitModel();
-				char model_path[256];
-				std::vector<float> preds;
-				for(int i = 0; i < test_param.test_paths.size(); i++){
-					xgboost::regression::DMatrix test_data;
-					test_data.LoadText(test_param.test_paths[i].c_str());
-					sprintf(model_path,"%s/final.model",test_param.model_dir_path);
-					FileStream fin(fopen(model_path,"r"));
-					reg_boost_learner->LoadModel(fin);
-					fin.Close();
-					reg_boost_learner->Predict(preds,test_data);
-				}
-			}
+                //begin testing
+                reg_boost_learner->InitModel();
+                char model_path[256];
+                std::vector<float> preds;
+                for(int i = 0; i < test_param.test_paths.size(); i++){
+                    xgboost::regression::DMatrix test_data;
+                    test_data.LoadText(test_param.test_paths[i].c_str());
+                    sprintf(model_path,"%s/final.model",test_param.model_dir_path);
+                    FileStream fin(fopen(model_path,"r"));
+                    reg_boost_learner->LoadModel(fin);
+                    fin.Close();
+                    reg_boost_learner->Predict(preds,test_data);
+                }
+            }

-		private:
-			struct TestParam{
-				/* \brief upperbound of the number of boosters */
-				int boost_iterations;
+        private:
+            struct TestParam{
+                /* \brief upperbound of the number of boosters */
+                int boost_iterations;

-				/* \brief the period to save the model, -1 means only save the final round model */
-				int save_period;
+                /* \brief the period to save the model, -1 means only save the final round model */
+                int save_period;

-				/* \brief the path of directory containing the saved models */
-				char model_dir_path[256];
+                /* \brief the path of directory containing the saved models */
+                char model_dir_path[256];

-				/* \brief the path of directory containing the output prediction results */
-				char pred_dir_path[256];
+                /* \brief the path of directory containing the output prediction results */
+                char pred_dir_path[256];

-				/* \brief the paths of test data sets */
-				std::vector<std::string> test_paths;
+                /* \brief the paths of test data sets */
+                std::vector<std::string> test_paths;

-				/* \brief the names of the test data sets */
-				std::vector<std::string> test_names;
+                /* \brief the names of the test data sets */
+                std::vector<std::string> test_names;

-				/*! 
-				* \brief set parameters from outside 
-				* \param name name of the parameter
-				* \param val  value of the parameter
-				*/
-				inline void SetParam(const char *name,const char *val ){
-					if( !strcmp("model_dir_path", name ) ) strcpy(model_dir_path,val);
-					if( !strcmp("pred_dir_path", name ) ) strcpy(pred_dir_path,val);
-					if( !strcmp("test_paths",  name) ) {
-						test_paths = StringProcessing::split(val,';');
-					}
-					if( !strcmp("test_names",  name) ) {
-						test_names = StringProcessing::split(val,';');
-					}
-				}
-			};
+                /*! 
+                * \brief set parameters from outside 
+                * \param name name of the parameter
+                * \param val  value of the parameter
+                */
+                inline void SetParam(const char *name,const char *val ){
+                    if( !strcmp("model_dir_path", name ) ) strcpy(model_dir_path,val);
+                    if( !strcmp("pred_dir_path", name ) ) strcpy(pred_dir_path,val);
+                    if( !strcmp("test_paths",  name) ) {
+                        test_paths = StringProcessing::split(val,';');
+                    }
+                    if( !strcmp("test_names",  name) ) {
+                        test_names = StringProcessing::split(val,';');
+                    }
+                }
+            };

-			TestParam test_param;
-			xgboost::regression::RegBoostLearner* reg_boost_learner;
-		};
-	}
+            TestParam test_param;
+            xgboost::regression::RegBoostLearner* reg_boost_learner;
+        };
+    }
 }

 #endif
--- a/regression/xgboost_reg_train.h
+++ b/regression/xgboost_reg_train.h
@ -12,120 +12,120 @@
 using namespace xgboost::utils;

 namespace xgboost{
-	namespace regression{
-		/*!
-		* \brief wrapping the training process of the gradient 
-		         boosting regression model,given the configuation
-		* \author Kailong Chen: chenkl198812@gmail.com
-		*/
-		class RegBoostTrain{
-		public:
-			/*!
-			* \brief to start the training process of gradient boosting regression
-			*        model given the configuation, and finally saved the models
-			*        to the specified model directory
-			* \param config_path the location of the configuration
-			* \param silent whether to print feedback messages
-			*/
-			void train(char* config_path,bool silent = false){
-				reg_boost_learner = new xgboost::regression::RegBoostLearner(silent);
-				ConfigIterator config_itr(config_path);
-				//Get the training data and validation data paths, config the Learner
-				while (config_itr.Next()){
- 					printf("%s %s\n",config_itr.name(),config_itr.val());
-					reg_boost_learner->SetParam(config_itr.name(),config_itr.val());
-					train_param.SetParam(config_itr.name(),config_itr.val());
-				}
+    namespace regression{
+        /*!
+        * \brief wrapping the training process of the gradient 
+        boosting regression model,given the configuation
+        * \author Kailong Chen: chenkl198812@gmail.com
+        */
+        class RegBoostTrain{
+        public:
+            /*!
+            * \brief to start the training process of gradient boosting regression
+            *        model given the configuation, and finally saved the models
+            *        to the specified model directory
+            * \param config_path the location of the configuration
+            * \param silent whether to print feedback messages
+            */
+            void train(char* config_path,bool silent = false){
+                reg_boost_learner = new xgboost::regression::RegBoostLearner(silent);
+                ConfigIterator config_itr(config_path);
+                //Get the training data and validation data paths, config the Learner
+                while (config_itr.Next()){
+                    printf("%s %s\n",config_itr.name(),config_itr.val());
+                    reg_boost_learner->SetParam(config_itr.name(),config_itr.val());
+                    train_param.SetParam(config_itr.name(),config_itr.val());
+                }

-				Assert(train_param.validation_data_paths.size() == train_param.validation_data_names.size(),
-					"The number of validation paths is not the same as the number of validation data set names");
+                Assert(train_param.validation_data_paths.size() == train_param.validation_data_names.size(),
+                    "The number of validation paths is not the same as the number of validation data set names");

-				//Load Data
-				xgboost::regression::DMatrix train;
-				printf("%s",train_param.train_path);
-				train.LoadText(train_param.train_path);
-				std::vector<const xgboost::regression::DMatrix*> evals;
-				for(int i = 0; i < train_param.validation_data_paths.size(); i++){
-					xgboost::regression::DMatrix eval;
-					eval.LoadText(train_param.validation_data_paths[i].c_str());
-					evals.push_back(&eval);
-				}
-				reg_boost_learner->SetData(&train,evals,train_param.validation_data_names);
+                //Load Data
+                xgboost::regression::DMatrix train;
+                printf("%s",train_param.train_path);
+                train.LoadText(train_param.train_path);
+                std::vector<const xgboost::regression::DMatrix*> evals;
+                for(int i = 0; i < train_param.validation_data_paths.size(); i++){
+                    xgboost::regression::DMatrix eval;
+                    eval.LoadText(train_param.validation_data_paths[i].c_str());
+                    evals.push_back(&eval);
+                }
+                reg_boost_learner->SetData(&train,evals,train_param.validation_data_names);

-				//begin training
-				reg_boost_learner->InitTrainer();
-				char suffix[256];
-				for(int i = 1; i <= train_param.boost_iterations; i++){
-					reg_boost_learner->UpdateOneIter(i);
-					if(train_param.save_period != 0 && i % train_param.save_period == 0){
-						sscanf(suffix,"%d.model",i);
-						SaveModel(suffix);
-					}
-				}
+                //begin training
+                reg_boost_learner->InitTrainer();
+                char suffix[256];
+                for(int i = 1; i <= train_param.boost_iterations; i++){
+                    reg_boost_learner->UpdateOneIter(i);
+                    if(train_param.save_period != 0 && i % train_param.save_period == 0){
+                        sscanf(suffix,"%d.model",i);
+                        SaveModel(suffix);
+                    }
+                }

-				//save the final round model
-				SaveModel("final.model");
-			}
+                //save the final round model
+                SaveModel("final.model");
+            }

-		private:
-			/*! \brief save model in the model directory with specified suffix*/
-			void SaveModel(const char* suffix){
-				char model_path[256];
-				//save the final round model
-				sprintf(model_path,"%s/%s",train_param.model_dir_path,suffix);
-				FILE* file = fopen(model_path,"w");
-				FileStream fin(file);
-				reg_boost_learner->SaveModel(fin);
-				fin.Close();
-			}
+        private:
+            /*! \brief save model in the model directory with specified suffix*/
+            void SaveModel(const char* suffix){
+                char model_path[256];
+                //save the final round model
+                sprintf(model_path,"%s/%s",train_param.model_dir_path,suffix);
+                FILE* file = fopen(model_path,"w");
+                FileStream fin(file);
+                reg_boost_learner->SaveModel(fin);
+                fin.Close();
+            }

-			struct TrainParam{
-				/* \brief upperbound of the number of boosters */
-				int boost_iterations;
+            struct TrainParam{
+                /* \brief upperbound of the number of boosters */
+                int boost_iterations;

-				/* \brief the period to save the model, -1 means only save the final round model */
-				int save_period;
+                /* \brief the period to save the model, -1 means only save the final round model */
+                int save_period;

-				/* \brief the path of training data set */
-				char train_path[256];
+                /* \brief the path of training data set */
+                char train_path[256];

-				/* \brief the path of directory containing the saved models */
-				char model_dir_path[256];
+                /* \brief the path of directory containing the saved models */
+                char model_dir_path[256];

-				/* \brief the paths of validation data sets */
-				std::vector<std::string> validation_data_paths;
+                /* \brief the paths of validation data sets */
+                std::vector<std::string> validation_data_paths;

-				/* \brief the names of the validation data sets */
-				std::vector<std::string> validation_data_names;
+                /* \brief the names of the validation data sets */
+                std::vector<std::string> validation_data_names;

-				/*! 
-				* \brief set parameters from outside 
-				* \param name name of the parameter
-				* \param val  value of the parameter
-				*/
-				inline void SetParam(const char *name,const char *val ){
-					if( !strcmp("boost_iterations", name ) )  boost_iterations = atoi( val );
-					if( !strcmp("save_period", name ) )   save_period = atoi( val );
-					if( !strcmp("train_path",  name ) ) strcpy(train_path,val);
-					if( !strcmp("model_dir_path", name ) ) {
-						strcpy(model_dir_path,val);
-					}
-					if( !strcmp("validation_paths",  name) ) {
-						validation_data_paths = StringProcessing::split(val,';');
-					}
-					if( !strcmp("validation_names",  name) ) {
-						validation_data_names = StringProcessing::split(val,';');
-					}
-				}
-			};
+                /*! 
+                * \brief set parameters from outside 
+                * \param name name of the parameter
+                * \param val  value of the parameter
+                */
+                inline void SetParam(const char *name,const char *val ){
+                    if( !strcmp("boost_iterations", name ) )  boost_iterations = atoi( val );
+                    if( !strcmp("save_period", name ) )   save_period = atoi( val );
+                    if( !strcmp("train_path",  name ) ) strcpy(train_path,val);
+                    if( !strcmp("model_dir_path", name ) ) {
+                        strcpy(model_dir_path,val);
+                    }
+                    if( !strcmp("validation_paths",  name) ) {
+                        validation_data_paths = StringProcessing::split(val,';');
+                    }
+                    if( !strcmp("validation_names",  name) ) {
+                        validation_data_names = StringProcessing::split(val,';');
+                    }
+                }
+            };

-			/*! \brief the parameters of the training process*/
-			TrainParam train_param;
+            /*! \brief the parameters of the training process*/
+            TrainParam train_param;

-			/*! \brief the gradient boosting regression tree model*/
-			xgboost::regression::RegBoostLearner* reg_boost_learner;
-		};
-	}
+            /*! \brief the gradient boosting regression tree model*/
+            xgboost::regression::RegBoostLearner* reg_boost_learner;
+        };
+    }
 }

 #endif
--- a/regression/xgboost_regdata.h
+++ b/regression/xgboost_regdata.h
@ -2,14 +2,14 @@
 #define _XGBOOST_REGDATA_H_

 /*!
- * \file xgboost_regdata.h
- * \brief input data structure for regression and binary classification task.
- *     Format:
- *        The data should contain each data instance in each line.
- *		  The format of line data is as below:
- *        label <nonzero feature dimension> [feature index:feature value]+
- * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
- */
+* \file xgboost_regdata.h
+* \brief input data structure for regression and binary classification task.
+*     Format:
+*        The data should contain each data instance in each line.
+*		  The format of line data is as below:
+*        label <nonzero feature dimension> [feature index:feature value]+
+* \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
+*/
 #include <cstdio>
 #include <vector>
 #include "../booster/xgboost_data.h"
@ -32,16 +32,16 @@ namespace xgboost{
            DMatrix( void ){}


-			/*! \brief get the number of instances */
-			inline int size() const{
-				return labels.size();
-			}
+            /*! \brief get the number of instances */
+            inline int size() const{
+                return labels.size();
+            }

            /*! 
-             * \brief load from text file 
-             * \param fname name of text data
-             * \param silent whether print information or not
-             */            
+            * \brief load from text file 
+            * \param fname name of text data
+            * \param silent whether print information or not
+            */            
            inline void LoadText( const char* fname, bool silent = false ){
                data.Clear();
                FILE* file = utils::FopenCheck( fname, "r" );
@ -65,22 +65,22 @@ namespace xgboost{
                    }
                }

-				labels.push_back( label );
+                labels.push_back( label );
                data.AddRow( findex, fvalue );

                this->UpdateInfo();
                if( !silent ){
                    printf("%ux%u matrix with %lu entries is loaded from %s\n", 
-                           (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
+                        (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
                }
                fclose(file);
            }
            /*! 
-             * \brief load from binary file 
-             * \param fname name of binary data
-             * \param silent whether print information or not
-             * \return whether loading is success
-             */
+            * \brief load from binary file 
+            * \param fname name of binary data
+            * \param silent whether print information or not
+            * \return whether loading is success
+            */
            inline bool LoadBinary( const char* fname, bool silent = false ){
                FILE *fp = fopen64( fname, "rb" );
                if( fp == NULL ) return false;                
@ -92,15 +92,15 @@ namespace xgboost{
                this->UpdateInfo();
                if( !silent ){
                    printf("%ux%u matrix with %lu entries is loaded from %s\n", 
-                           (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
+                        (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
                }
                return true;
            }
            /*! 
-             * \brief save to binary file
-             * \param fname name of binary data
-             * \param silent whether print information or not
-             */
+            * \brief save to binary file
+            * \param fname name of binary data
+            * \param silent whether print information or not
+            */
            inline void SaveBinary( const char* fname, bool silent = false ){
                utils::FileStream fs( utils::FopenCheck( fname, "wb" ) );
                data.SaveBinary( fs );
@ -108,17 +108,17 @@ namespace xgboost{
                fs.Close();
                if( !silent ){
                    printf("%ux%u matrix with %lu entries is saved to %s\n", 
-                           (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
+                        (unsigned)labels.size(), num_feature, (unsigned long)data.NumEntry(), fname );
                }
            }
            /*! 
-             * \brief cache load data given a file name, the function will first check if fname + '.xgbuffer' exists,
-             *        if binary buffer exists, it will reads from binary buffer, otherwise, it will load from text file,
-             *        and try to create a buffer file 
-             * \param fname name of binary data
-             * \param silent whether print information or not
-             * \return whether loading is success
-             */            
+            * \brief cache load data given a file name, the function will first check if fname + '.xgbuffer' exists,
+            *        if binary buffer exists, it will reads from binary buffer, otherwise, it will load from text file,
+            *        and try to create a buffer file 
+            * \param fname name of binary data
+            * \param silent whether print information or not
+            * \return whether loading is success
+            */            
            inline void CacheLoad( const char *fname, bool silent = false ){
                char bname[ 1024 ];
                sprintf( bname, "%s.buffer", fname );