From 7761d562b18c6d8b72fcc91f65135f9c56e0d0ce Mon Sep 17 00:00:00 2001
From: tqchen <workcrow@gmail.com>
Date: Sat, 1 Mar 2014 21:49:29 -0800
Subject: [PATCH] add smart decision of nfeatures

---
 booster/xgboost_gbmbase.h   |  5 +++--
 demo/mushroom/mushroom.conf |  1 -
 regression/xgboost_reg.h    | 18 ++++++++++++++----
 3 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/booster/xgboost_gbmbase.h b/booster/xgboost_gbmbase.h
index 8e30f8069..88334985d 100644
--- a/booster/xgboost_gbmbase.h
+++ b/booster/xgboost_gbmbase.h
@@ -88,6 +88,9 @@ namespace xgboost{
                     if( !strcmp("bst:num_feature", name ) )  num_feature = atoi( val );
                 }
             };
+        public:
+            /*! \brief model parameters */ 
+            Param param;
         public:
             /*! \brief number of thread used */
             GBMBaseModel( void ){
@@ -323,8 +326,6 @@ namespace xgboost{
         protected:
             /*! \brief number of OpenMP threads */
             int nthread;
-            /*! \brief model parameters */ 
-            Param param;
             /*! \brief component boosters */ 
             std::vector<booster::IBooster*> boosters;
             /*! \brief some information indicator of the booster, reserved */ 
diff --git a/demo/mushroom/mushroom.conf b/demo/mushroom/mushroom.conf
index aa5e99a91..4a92767b5 100644
--- a/demo/mushroom/mushroom.conf
+++ b/demo/mushroom/mushroom.conf
@@ -10,7 +10,6 @@ test:data =  "agaricus.txt.test"
 booster_type = 0
 loss_type = 2
 
-bst:num_feature=126
 bst:eta=1.0
 bst:gamma=1.0
 bst:min_child_weight=1   
diff --git a/regression/xgboost_reg.h b/regression/xgboost_reg.h
index ed2e0a89e..7aeb856ca 100644
--- a/regression/xgboost_reg.h
+++ b/regression/xgboost_reg.h
@@ -49,19 +49,29 @@ namespace xgboost{
                 this->train_ = train;
                 this->evals_ = evals;
                 this->evname_ = evname; 
-                //assign buffer index
+                // estimate feature bound
+                int num_feature = (int)(train->data.NumCol());
+                // assign buffer index
                 unsigned buffer_size = static_cast<unsigned>( train->Size() );
                 
                 for( size_t i = 0; i < evals.size(); ++ i ){
                     buffer_size += static_cast<unsigned>( evals[i]->Size() );
+                    num_feature = std::max( num_feature, (int)(evals[i]->data.NumCol()) );
                 }
-                char snum_pbuffer[25];
-                sprintf( snum_pbuffer, "%u", buffer_size );
+
+                char str_temp[25];
+                if( num_feature > base_model.param.num_feature ){
+                    sprintf( str_temp, "%d", num_feature );
+                    base_model.SetParam( "bst:num_feature", str_temp );
+                }
+                
+                sprintf( str_temp, "%u", buffer_size );
+                base_model.SetParam( "num_pbuffer", str_temp );
                 if( !silent ){
                     printf( "buffer_size=%u\n", buffer_size );
                 }
-                base_model.SetParam( "num_pbuffer",snum_pbuffer );
                 
+                // set eval_preds tmp sapce
                 this->eval_preds_.resize( evals.size(), std::vector<float>() );
             }
             /*!