diff --git a/regression/xgboost_reg_main.cpp b/regression/xgboost_reg_main.cpp new file mode 100644 index 000000000..83a15b025 --- /dev/null +++ b/regression/xgboost_reg_main.cpp @@ -0,0 +1,14 @@ +#include"xgbooost_reg_train.h" +#include"xgboost_reg_test.h" +using namespace xgboost::regression; + +int main(int argc, char *argv[]){ +// char* config_path = argv[1]; +// bool silent = ( atoi(argv[2]) == 1 ); + char* config_path = "c:\\cygwin64\\home\\chen\\github\\gboost\\demo\\regression\\reg.conf"; + bool silent = false; + RegBoostTrain train; + RegBoostTest test; + train.train(config_path,false); + test.test(config_path,false); +} \ No newline at end of file diff --git a/regression/xgboost_reg_test.h b/regression/xgboost_reg_test.h new file mode 100644 index 000000000..1e7b67b53 --- /dev/null +++ b/regression/xgboost_reg_test.h @@ -0,0 +1,87 @@ +#ifndef _XGBOOST_REG_TEST_H_ +#define _XGBOOST_REG_TEST_H_ + +#include +#include +#include +#include"../utils/xgboost_config.h" +#include"xgboost_reg.h" +#include"xgboost_regdata.h" +#include"../utils/xgboost_string.h" + +using namespace xgboost::utils; +namespace xgboost{ + namespace regression{ + class RegBoostTest{ + public: + void test(char* config_path,bool silent = false){ + reg_boost_learner = new xgboost::regression::RegBoostLearner(silent); + ConfigIterator config_itr(config_path); + //Get the training data and validation data paths, config the Learner + while (config_itr.Next()){ + reg_boost_learner->SetParam(config_itr.name(),config_itr.val()); + test_param.SetParam(config_itr.name(),config_itr.val()); + } + + Assert(test_param.test_paths.size() == test_param.test_names.size(), + "The number of test data set paths is not the same as the number of test data set data set names"); + + //begin testing + reg_boost_learner->InitModel(); + char model_path[256]; + std::vector preds; + for(int i = 0; i < test_param.test_paths.size(); i++){ + xgboost::regression::DMatrix test_data; + test_data.LoadText(test_param.test_paths[i].c_str()); + sscanf(model_path,"%s/final.model",test_param.model_dir_path); + FileStream fin(fopen(model_path,"r")); + reg_boost_learner->LoadModel(fin); + fin.Close(); + reg_boost_learner->Predict(preds,test_data); + } + } + + private: + struct TestParam{ + /* \brief upperbound of the number of boosters */ + int boost_iterations; + + /* \brief the period to save the model, -1 means only save the final round model */ + int save_period; + + /* \brief the path of directory containing the saved models */ + const char* model_dir_path; + + /* \brief the path of directory containing the output prediction results */ + const char* pred_dir_path; + + /* \brief the paths of test data sets */ + std::vector test_paths; + + /* \brief the names of the test data sets */ + std::vector test_names; + + /*! + * \brief set parameters from outside + * \param name name of the parameter + * \param val value of the parameter + */ + inline void SetParam(const char *name,const char *val ){ + if( !strcmp("model_dir_path", name ) ) model_dir_path = val; + if( !strcmp("pred_dir_path", name ) ) model_dir_path = val; + if( !strcmp("test_paths", name) ) { + test_paths = StringProcessing::split(val,';'); + } + if( !strcmp("test_names", name) ) { + test_names = StringProcessing::split(val,';'); + } + } + }; + + TestParam test_param; + xgboost::regression::RegBoostLearner* reg_boost_learner; + }; + } +} + +#endif diff --git a/regression/xgboost_reg_train.h b/regression/xgboost_reg_train.h new file mode 100644 index 000000000..40bf7565b --- /dev/null +++ b/regression/xgboost_reg_train.h @@ -0,0 +1,108 @@ +#ifndef _XGBOOST_REG_TRAIN_H_ +#define _XGBOOST_REG_TRAIN_H_ + +#include +#include +#include +#include"../utils/xgboost_config.h" +#include"xgboost_reg.h" +#include"xgboost_regdata.h" +#include"../utils/xgboost_string.h" + +using namespace xgboost::utils; +namespace xgboost{ + namespace regression{ + class RegBoostTrain{ + public: + void train(char* config_path,bool silent = false){ + reg_boost_learner = new xgboost::regression::RegBoostLearner(silent); + ConfigIterator config_itr(config_path); + //Get the training data and validation data paths, config the Learner + while (config_itr.Next()){ + reg_boost_learner->SetParam(config_itr.name(),config_itr.val()); + train_param.SetParam(config_itr.name(),config_itr.val()); + } + + Assert(train_param.validation_data_paths.size() == train_param.validation_data_names.size(), + "The number of validation paths is not the same as the number of validation data set names"); + + //Load Data + xgboost::regression::DMatrix train; + train.LoadText(train_param.train_path); + std::vector evals; + for(int i = 0; i < train_param.validation_data_paths.size(); i++){ + xgboost::regression::DMatrix eval; + eval.LoadText(train_param.validation_data_paths[i].c_str()); + evals.push_back(&eval); + } + reg_boost_learner->SetData(&train,evals,train_param.validation_data_names); + + //begin training + reg_boost_learner->InitTrainer(); + char model_path[256]; + for(int i = 1; i <= train_param.boost_iterations; i++){ + reg_boost_learner->UpdateOneIter(i); + //save the models during the iterations + if(train_param.save_period != 0 && i % train_param.save_period == 0){ + sscanf(model_path,"%s/%d.model",train_param.model_dir_path,i); + FILE* file = fopen(model_path,"w"); + FileStream fin(file); + reg_boost_learner->SaveModel(fin); + fin.Close(); + } + } + + //save the final model + sscanf(model_path,"%s/final.model",train_param.model_dir_path); + FILE* file = fopen(model_path,"w"); + FileStream fin(file); + reg_boost_learner->SaveModel(fin); + fin.Close(); + + } + private: + struct TrainParam{ + /* \brief upperbound of the number of boosters */ + int boost_iterations; + + /* \brief the period to save the model, -1 means only save the final round model */ + int save_period; + + /* \brief the path of training data set */ + const char* train_path; + + /* \brief the path of directory containing the saved models */ + const char* model_dir_path; + + /* \brief the paths of validation data sets */ + std::vector validation_data_paths; + + /* \brief the names of the validation data sets */ + std::vector validation_data_names; + + /*! + * \brief set parameters from outside + * \param name name of the parameter + * \param val value of the parameter + */ + inline void SetParam(const char *name,const char *val ){ + if( !strcmp("boost_iterations", name ) ) boost_iterations = (float)atof( val ); + if( !strcmp("save_period", name ) ) save_period = atoi( val ); + if( !strcmp("train_path", name ) ) train_path = val; + if( !strcmp("model_dir_path", name ) ) model_dir_path = val; + if( !strcmp("validation_paths", name) ) { + validation_data_paths = StringProcessing::split(val,';'); + } + if( !strcmp("validation_names", name) ) { + validation_data_names = StringProcessing::split(val,';'); + } + } + }; + + TrainParam train_param; + xgboost::regression::RegBoostLearner* reg_boost_learner; + }; + } +} + +#endif diff --git a/utils/xgboost_string.h b/utils/xgboost_string.h new file mode 100644 index 000000000..1ce056d33 --- /dev/null +++ b/utils/xgboost_string.h @@ -0,0 +1,31 @@ +#ifndef _XGBOOST_STRING_H_ +#define _XGBOOST_STRING_H_ +#include +#include + +namespace xgboost{ + namespace utils{ + class StringProcessing{ + + public: + static std::vector &split(const std::string &s, char delim, std::vector &elems) { + std::stringstream ss(s); + std::string item; + while (std::getline(ss, item, delim)) { + elems.push_back(item); + } + return elems; + } + + + static std::vector split(const std::string &s, char delim) { + std::vector elems; + split(s, delim, elems); + return elems; + } + + }; + } +} + +#endif \ No newline at end of file