From f3c98d0c4b04d347e344df670b29fa5307aea015 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 1 Mar 2014 21:15:54 -0800 Subject: [PATCH] add smart load --- regression/xgboost_reg_main.cpp | 10 +++++++--- regression/xgboost_regdata.h | 17 +++++++++++------ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/regression/xgboost_reg_main.cpp b/regression/xgboost_reg_main.cpp index 61751b961..298f22530 100644 --- a/regression/xgboost_reg_main.cpp +++ b/regression/xgboost_reg_main.cpp @@ -51,6 +51,7 @@ namespace xgboost{ } inline void SetParam( const char *name, const char *val ){ if( !strcmp("silent", name ) ) silent = atoi( val ); + if( !strcmp("use_buffer", name ) ) use_buffer = atoi( val ); if( !strcmp("seed", name ) ) random::Seed( atoi(val) ); if( !strcmp("num_round", name ) ) num_round = atoi( val ); if( !strcmp("save_period", name ) ) save_period = atoi( val ); @@ -71,6 +72,7 @@ namespace xgboost{ RegBoostTask( void ){ // default parameters silent = 0; + use_buffer = 1; num_round = 10; save_period = 0; task = "train"; @@ -89,14 +91,14 @@ namespace xgboost{ inline void InitData( void ){ if( task == "dump") return; if( task == "test" || task == "dumppath" ){ - data.CacheLoad( test_path.c_str() ); + data.CacheLoad( test_path.c_str(), silent!=0, use_buffer!=0 ); }else{ // training - data.CacheLoad( train_path.c_str() ); + data.CacheLoad( train_path.c_str(), silent!=0, use_buffer!=0 ); utils::Assert( eval_data_names.size() == eval_data_paths.size() ); for( size_t i = 0; i < eval_data_names.size(); ++ i ){ deval.push_back( new DMatrix() ); - deval.back()->CacheLoad( eval_data_paths[i].c_str() ); + deval.back()->CacheLoad( eval_data_paths[i].c_str(), silent!=0, use_buffer!=0 ); } } learner.SetData( &data, deval, eval_data_names ); @@ -169,6 +171,8 @@ namespace xgboost{ private: /* \brief whether silent */ int silent; + /* \brief whether use auto binary buffer */ + int use_buffer; /* \brief number of boosting iterations */ int num_round; /* \brief the period to save the model, 0 means only save the final round model */ diff --git a/regression/xgboost_regdata.h b/regression/xgboost_regdata.h index 56c1bef47..2ef822f45 100644 --- a/regression/xgboost_regdata.h +++ b/regression/xgboost_regdata.h @@ -116,20 +116,25 @@ namespace xgboost{ } } /*! - * \brief cache load data given a file name, the function will first check if fname + '.xgbuffer' exists, + * \brief cache load data given a file name, if filename ends with .buffer, direct load binary + * otherwise the function will first check if fname + '.buffer' exists, * if binary buffer exists, it will reads from binary buffer, otherwise, it will load from text file, * and try to create a buffer file * \param fname name of binary data * \param silent whether print information or not - * \return whether loading is success - */ - inline void CacheLoad( const char *fname, bool silent = false ){ + * \param savebuffer whether do save binary buffer if it is text + */ + inline void CacheLoad( const char *fname, bool silent = false, bool savebuffer = true ){ + int len = strlen( fname ); + if( len > 8 && !strcmp( fname + len - 7, ".buffer") ){ + this->LoadBinary( fname, silent ); return; + } char bname[ 1024 ]; sprintf( bname, "%s.buffer", fname ); if( !this->LoadBinary( bname, silent ) ){ this->LoadText( fname, silent ); - this->SaveBinary( bname, silent ); - } + if( savebuffer ) this->SaveBinary( bname, silent ); + } } private: /*! \brief update num_feature info */