add pathdump

2014-02-26 17:08:23 -08:00
parent 88c982012a
commit 7b2fe1bf5d
7 changed files with 91 additions and 19 deletions
--- a/booster/tree/xgboost_tree.hpp
+++ b/booster/tree/xgboost_tree.hpp
@@ -57,7 +57,7 @@ namespace xgboost{
            
            virtual int GetLeafIndex( const std::vector<float> &feat,
                                      const std::vector<bool>  &funknown,
-                                  unsigned gid = 0 ){
+                                      unsigned gid = 0 ){
                // start from groups that belongs to current data
                int pid = (int)gid;
                // tranverse tree
@@ -67,18 +67,28 @@ namespace xgboost{
                }
                return pid;
            }
+
+            virtual void PredPath( std::vector<int> &path, const FMatrixS::Line &feat, unsigned gid = 0 ){
+                path.clear();
+                this->InitTmp();
+                this->PrepareTmp( feat );
+
+                int pid = (int)gid;
+                path.push_back( pid );
+                // tranverse tree
+                while( !tree[ pid ].is_leaf() ){                    
+                    unsigned split_index = tree[ pid ].split_index();
+                    pid = this->GetNext( pid, tmp_feat[ split_index ], tmp_funknown[ split_index ] );
+                    path.push_back( pid );
+                }                
+                this->DropTmp( feat );
+            }
+
            virtual float Predict( const FMatrixS::Line &feat, unsigned gid = 0 ){
                this->InitTmp();
-                for( unsigned i = 0; i < feat.len; i ++ ){
-                    utils::Assert( feat[i].findex < (unsigned)tmp_funknown.size() , "input feature execeed bound" );
-                    tmp_funknown[ feat[i].findex ] = false;
-                    tmp_feat[ feat[i].findex ] = feat[i].fvalue;
-                } 
+                this->PrepareTmp( feat );
                int pid = this->GetLeafIndex( tmp_feat, tmp_funknown, gid );
-                // set back
-                for( unsigned i = 0; i < feat.len; i ++ ){
-                    tmp_funknown[ feat[i].findex ] = true;
-                }            
+                this->DropTmp( feat );
                return tree[ pid ].leaf_value();
            }
            virtual float Predict( const std::vector<float> &feat, 
@@ -127,6 +137,18 @@ namespace xgboost{
                    std::fill( tmp_funknown.begin(), tmp_funknown.end(), true );
                }
            }
+            inline void PrepareTmp( const FMatrixS::Line &feat ){
+                for( unsigned i = 0; i < feat.len; i ++ ){
+                    utils::Assert( feat[i].findex < (unsigned)tmp_funknown.size() , "input feature execeed bound" );
+                    tmp_funknown[ feat[i].findex ] = false;
+                    tmp_feat[ feat[i].findex ] = feat[i].fvalue;
+                } 
+            }
+            inline void DropTmp( const FMatrixS::Line &feat ){
+                for( unsigned i = 0; i < feat.len; i ++ ){
+                    tmp_funknown[ feat[i].findex ] = true;
+                }
+            }

            inline int GetNext( int pid, float fvalue, bool is_unknown ){
                float split_value = tree[ pid ].split_cond();
--- a/booster/xgboost.h
+++ b/booster/xgboost.h
@@ -65,6 +65,14 @@ namespace xgboost{
                                  std::vector<float> &hess,
                                  const FMatrixS &feats,
                                  const std::vector<unsigned> &root_index ) = 0;
+            /*! 
+             * \brief predict the path ids along a trees, for given sparse feature vector. When booster is a tree
+             * \param path the result of path
+             * \param rid root id of current instance, default = 0
+             */
+            virtual void PredPath( std::vector<int> &path, const FMatrixS::Line &feat, unsigned rid = 0 ){
+                utils::Error( "not implemented" );
+            }
            /*! 
             * \brief predict values for given sparse feature vector
             *   NOTE: in tree implementation, this is not threadsafe, used dense version to ensure threadsafety
--- a/booster/xgboost_gbmbase.h
+++ b/booster/xgboost_gbmbase.h
@@ -179,6 +179,25 @@ namespace xgboost{
                    boosters[i]->DumpModel( fo );
                }
            }
+            /*! 
+             * \brief Dump path of all trees
+             * \param fo text file 
+             * \param data input data
+             */
+            inline void DumpPath( FILE *fo, const FMatrixS &data ){
+                for( size_t i = 0; i < data.NumRow(); ++ i ){
+                    for( size_t j = 0; j < boosters.size(); ++ j ){
+                        if( j != 0 ) fprintf( fo, "\t" );
+                        std::vector<int> path;
+                        boosters[j]->PredPath( path, data[i] );
+                        fprintf( fo, "%d", path[0] );
+                        for( size_t k = 1; k < path.size(); ++ k ){
+                            fprintf( fo, ",%d", path[k] );
+                        }
+                    }
+                    fprintf( fo, "\n" );
+                }
+            }
        public:
            /*! 
             * \brief do gradient boost training for one step, using the information given
@@ -195,7 +214,7 @@ namespace xgboost{
                                 const std::vector<unsigned> &root_index ) {
                booster::IBooster *bst = this->GetUpdateBooster();
                bst->DoBoost( grad, hess, feats, root_index );
-            }
+            }            
            /*! 
             * \brief predict values for given sparse feature vector
             *   NOTE: in tree implementation, this is not threadsafe
@@ -204,7 +223,7 @@ namespace xgboost{
             * \param rid root id of current instance, default = 0
             * \return prediction 
             */        
-            virtual float Predict( const booster::FMatrixS::Line &feat, int buffer_index = -1, unsigned rid = 0 ){               
+            virtual float Predict( const booster::FMatrixS::Line &feat, int buffer_index = -1, unsigned rid = 0 ){
                size_t istart = 0;
                float  psum = 0.0f;