Merge branch 'master' of https://github.com/tqchen/xgboost

2014-03-22 21:50:31 +08:00 · 2014-03-22 21:50:31 +08:00 · 55d1b1e109
commit 55d1b1e109
parent 193d1d165f bc071cac4f
3 changed files with 38 additions and 18 deletions
--- a/README.md
+++ b/README.md
@ -3,8 +3,8 @@ xgboost: eXtreme Gradient Boosting
 An efficient general purpose gradient boosting (tree) library.

 Creater: Tianqi Chen
- 
-Documentation: https://github.com/tqchen/xgboost/wiki
+
+Turorial and Documentation: https://github.com/tqchen/xgboost/wiki
 

 Features
@ -15,7 +15,7 @@ Features
  - Efficient implementation that optimizes memory and computation.
 * Layout of gradient boosting algorithm to support generic tasks, see project wiki.

-Planned key components
+Supported key components
 =======
 * Gradient boosting models: 
    - regression tree (GBRT)
@ -23,10 +23,14 @@ Planned key components
 * Objectives to support tasks: 
    - regression
    - classification
+* OpenMP implementation
+
+Planned components
+=======
+* More objective to support tasks: 
    - ranking
    - matrix factorization
    - structured prediction
-(3) OpenMP implementation

 File extension convention: 
 (1) .h are interface, utils and data structures, with detailed comment; 
--- a/demo/test/runexp.sh
+++ b/demo/test/runexp.sh
@ -4,23 +4,20 @@ python mapfeat.py
 # split train and test
 python mknfold.py agaricus.txt 1
 # training
-../../xgboost mushroom.conf num_round=1 model_out=full.model bst:max_depth=3
+../../xgboost mushroom.conf num_round=2 model_out=full.model bst:max_depth=3
 ../../xgboost mushroom.conf task=dump model_in=full.model fmap=featmap.txt name_dump=dump.full.txt

-# constrain
-../../xgboost mushroom.conf num_round=1 model_out=ban.model bst:max_depth=3 bst:fban=22-31 
+# major element of batch running: add batch prefix to each setting, batch:run=1 will run that action

-# constrain
-../../xgboost mushroom.conf num_round=1 model_out=pass.model bst:max_depth=3 bst:fdefault=-1 bst:fpass=22-31
+../../xgboost mushroom.conf model_in=full.model model_out=m1.model task=interact\
+ batch:interact:booster_index=0 batch:bst:interact:remove=1 batch:run=1\
+ batch:interact:booster_index=1 batch:bst:interact:remove=1 batch:run=1\
+ batch:interact:booster_index=1 batch:bst:interact:expand=9 batch:run=1\

-../../xgboost mushroom.conf task=dump model_in=ban.model fmap=featmap.txt name_dump=dump.ban.txt
-../../xgboost mushroom.conf task=dump model_in=pass.model fmap=featmap.txt name_dump=dump.pass.txt
+../../xgboost mushroom.conf task=dump model_in=m1.model fmap=featmap.txt name_dump=dump.m1.txt

 echo "========full======="
 cat dump.full.txt

-echo "========ban======="
-cat dump.ban.txt
-
-echo "========pass======="
-cat dump.pass.txt
+echo "========m1======="
+cat dump.m1.txt
--- a/regression/xgboost_reg_main.cpp
+++ b/regression/xgboost_reg_main.cpp
@ -73,6 +73,9 @@ namespace xgboost{
                if( !strcmp("name_pred",  name ) )   name_pred = val;
                if( !strcmp("dump_stats", name ) )   dump_model_stats = atoi( val );
                if( !strcmp("interact:action",  name ) )  interact_action = val;
+                if( !strncmp("batch:",  name, 6 ) ){
+                    cfg_batch.PushBack( name + 6, val );
+                }
                if( !strncmp("eval[",  name, 5 ) ) {
                    char evname[ 256 ];
                    utils::Assert( sscanf( name, "eval[%[^]]", evname ) == 1, "must specify evaluation name for display");
@ -167,13 +170,27 @@ namespace xgboost{
            inline void TaskInteractive( void ){
                const time_t start    = time( NULL );
                unsigned long elapsed = 0;
-                learner.UpdateInteract( interact_action );
+                int batch_action = 0;
+                
+                cfg_batch.BeforeFirst();
+                while( cfg_batch.Next() ){
+                    if( !strcmp( cfg_batch.name(), "run" ) ){
+                        learner.UpdateInteract( interact_action );
+                        batch_action += 1;
+                    } else{
+                        learner.SetParam( cfg_batch.name(), cfg_batch.val() );
+                    }
+                }
+
+                if( batch_action == 0 ){
+                    learner.UpdateInteract( interact_action );
+                }
                utils::Assert( model_out != "NULL", "interactive mode must specify model_out" );
                this->SaveModel( model_out.c_str() );
                elapsed = (unsigned long)(time(NULL) - start); 

                if( !silent ){
-                    printf("\ninteractive update, %lu sec in all\n", elapsed );
+                    printf("\ninteractive update, %d batch actions, %lu sec in all\n", batch_action, elapsed );
                }
            }

@ -245,6 +262,8 @@ namespace xgboost{
            std::vector<std::string> eval_data_names;            
            /*! \brief saves configurations */
            utils::ConfigSaver cfg;
+            /*! \brief batch configurations */
+            utils::ConfigSaver cfg_batch;
        private:
            DMatrix data;
            std::vector<DMatrix*> deval;