From 377a573097f1a1987bb44d781430f42261ccf8f4 Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 11 Mar 2014 11:25:50 -0700 Subject: [PATCH] add remove tree --- booster/xgboost_gbmbase.h | 10 ++++++++++ demo/test/mushroom.conf | 18 ++++++++++++++++++ demo/test/runexp.sh | 14 ++++---------- regression/xgboost_reg.h | 9 +++++++-- regression/xgboost_reg_main.cpp | 8 ++++++-- 5 files changed, 45 insertions(+), 14 deletions(-) create mode 100644 demo/test/mushroom.conf diff --git a/booster/xgboost_gbmbase.h b/booster/xgboost_gbmbase.h index 7c8906edf..c6bf3f3a0 100644 --- a/booster/xgboost_gbmbase.h +++ b/booster/xgboost_gbmbase.h @@ -232,6 +232,16 @@ namespace xgboost{ } return psum; } + /*! \brief delete the specified booster */ + inline void DelteBooster( void ){ + const int bid = tparam.reupdate_booster; + utils::Assert( bid >= 0 && bid < mparam.num_boosters , "must specify booster index for deletion"); + delete boosters[ bid ]; + for( int i = bid + 1; i < mparam.num_boosters; ++ i ){ + boosters[ i - 1 ] = boosters[ i ]; + } + boosters.resize( mparam.num_boosters -= 1 ); + } /*! \brief update the prediction buffer, after booster have been updated */ inline void InteractRePredict( const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0 ){ if( tparam.reupdate_booster != -1 ){ diff --git a/demo/test/mushroom.conf b/demo/test/mushroom.conf new file mode 100644 index 000000000..f11b90a1e --- /dev/null +++ b/demo/test/mushroom.conf @@ -0,0 +1,18 @@ +num_round=2 + +save_period=0 + +data = "agaricus.txt.train" +eval[test] = "agaricus.txt.test" +test:data = "agaricus.txt.test" + + +booster_type = 0 +loss_type = 2 + +bst:tree_maker=2 + +bst:eta=1.0 +bst:gamma=1.0 +bst:min_child_weight=1 +bst:max_depth=3 diff --git a/demo/test/runexp.sh b/demo/test/runexp.sh index 626df2fc8..312f072fe 100755 --- a/demo/test/runexp.sh +++ b/demo/test/runexp.sh @@ -8,22 +8,20 @@ python mknfold.py agaricus.txt 1 ../../xgboost mushroom.conf task=dump model_in=full.model fmap=featmap.txt name_dump=dump.full.txt # training -../../xgboost mushroom.conf num_round=1 model_out=m1.model bst:max_depth=1 +../../xgboost mushroom.conf num_round=2 model_out=m1.model bst:max_depth=1 # this is what dump will looklike with feature map ../../xgboost mushroom.conf task=dump model_in=m1.model fmap=featmap.txt name_dump=dump.m1.txt # interaction ../../xgboost mushroom.conf task=interact model_in=m1.model model_out=m2.model interact:booster_index=0 bst:interact:expand=1 -../../xgboost mushroom.conf task=interact model_in=m2.model model_out=m3.model interact:booster_index=0 bst:interact:expand=2 -../../xgboost mushroom.conf task=interact model_in=m3.model model_out=m3v.model interact:booster_index=0 bst:interact:remove=2 -../../xgboost mushroom.conf task=interact model_in=m3v.model model_out=m3p.model interact:booster_index=0 bst:interact:expand=2 +../../xgboost mushroom.conf task=interact model_in=m2.model model_out=m3.model interact:booster_index=0 interact:action=remove + # this is what dump will looklike with feature map +../../xgboost mushroom.conf task=dump model_in=m1.model fmap=featmap.txt name_dump=dump.m2.txt ../../xgboost mushroom.conf task=dump model_in=m2.model fmap=featmap.txt name_dump=dump.m2.txt ../../xgboost mushroom.conf task=dump model_in=m3.model fmap=featmap.txt name_dump=dump.m3.txt -../../xgboost mushroom.conf task=dump model_in=m3v.model fmap=featmap.txt name_dump=dump.m3v.txt -../../xgboost mushroom.conf task=dump model_in=m3p.model fmap=featmap.txt name_dump=dump.m3p.txt echo "========m1=======" cat dump.m1.txt @@ -34,11 +32,7 @@ cat dump.m2.txt echo "========m3========" cat dump.m3.txt -echo "========m3v========" -cat dump.m3v.txt -echo "========m3p========" -cat dump.m3p.txt echo "========full=======" cat dump.full.txt diff --git a/regression/xgboost_reg.h b/regression/xgboost_reg.h index 52d8cbd4e..683b37b0d 100644 --- a/regression/xgboost_reg.h +++ b/regression/xgboost_reg.h @@ -182,8 +182,13 @@ namespace xgboost{ * \brief update the model for one iteration * \param iteration iteration number */ - inline void UpdateInteract( void ){ - this->InteractPredict( preds_, *train_, 0 ); + inline void UpdateInteract( std::string action ){ + this->InteractPredict( preds_, *train_, 0 ); + + if( action == "remove" ){ + base_gbm.DelteBooster(); return; + } + int buffer_offset = static_cast( train_->Size() ); for( size_t i = 0; i < evals_.size(); ++i ){ std::vector &preds = this->eval_preds_[ i ]; diff --git a/regression/xgboost_reg_main.cpp b/regression/xgboost_reg_main.cpp index d25f62101..fe09f94a5 100644 --- a/regression/xgboost_reg_main.cpp +++ b/regression/xgboost_reg_main.cpp @@ -71,6 +71,7 @@ namespace xgboost{ if( !strcmp("name_dumppath", name ) ) name_dumppath = val; if( !strcmp("name_pred", name ) ) name_pred = val; if( !strcmp("dump_stats", name ) ) dump_model_stats = atoi( val ); + if( !strcmp("interact:action", name ) ) interact_action = val; if( !strncmp("eval[", name, 5 ) ) { char evname[ 256 ]; utils::Assert( sscanf( name, "eval[%[^]]", evname ) == 1, "must specify evaluation name for display"); @@ -95,6 +96,7 @@ namespace xgboost{ name_dump = "dump.txt"; name_dumppath = "dump.path.txt"; model_dir_path = "./"; + interact_action = "update"; } ~RegBoostTask( void ){ for( size_t i = 0; i < deval.size(); i ++ ){ @@ -162,7 +164,7 @@ namespace xgboost{ inline void TaskInteractive( void ){ const time_t start = time( NULL ); unsigned long elapsed = 0; - learner.UpdateInteract(); + learner.UpdateInteract( interact_action ); utils::Assert( model_out != "NULL", "interactive mode must specify model_out" ); this->SaveModel( model_out.c_str() ); elapsed = (unsigned long)(time(NULL) - start); @@ -211,7 +213,9 @@ namespace xgboost{ /* \brief number of boosting iterations */ int num_round; /* \brief the period to save the model, 0 means only save the final round model */ - int save_period; + int save_period; + /*! \brief interfact action */ + std::string interact_action; /* \brief the path of training/test data set */ std::string train_path, test_path; /* \brief the path of test model file, or file to restart training */