update this folder

2014-02-06 16:06:18 -08:00
parent ee6a0c7f4a
commit a607444038
5 changed files with 50 additions and 13 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@
 *.lai
 *.la
 *.a
+*~
--- a/2
+++ b/2
@@ -10,7 +10,7 @@ OBJ = xgboost.o
 all: $(BIN) $(OBJ)
 export LDFLAGS= -pthread -lm 

-xgboost.o: booster/xgboost.cpp
+xgboost.o: booster/*.h booster/*.cpp

 $(BIN) : 
 	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
--- a/README.md
+++ b/README.md
@@ -1,4 +1,20 @@
 xgboost
 =======
+Creater: Tianqi Chen: tianqi.tchen@gmail.com

 General Purpose Gradient Boosting Library
+
+Intention: A stand-alone efficient library to do machine learning in functional space
+
+Planned key components (TODO):
+
+(1) Gradient boosting models: 
+    - regression tree
+    - linear model/lasso
+(2) Objectives to support tasks: 
+    - regression
+    - classification
+    - ranking
+    - matrix factorization
+    - structured prediction
+(3) OpenMP support for parallelization(optional)
--- a/booster/xgboost.h
+++ b/booster/xgboost.h
@@ -12,6 +12,7 @@

 /*! \brief namespace for xboost package */
 namespace xgboost{
+    /*! \brief namespace for boosters */
    namespace booster{
        /*! \brief interface of a gradient boosting learner */
        class IBooster{
@@ -19,11 +20,12 @@ namespace xgboost{
            // interface for model setting and loading
            // calling procedure:
            //  (1) booster->SetParam to setting necessary parameters
-            //  (2) if it is first time usage of the model: call booster->
-            //   if new model to be trained, trainer->init_trainer
-            //   elseif just to load from file, trainer->load_model
-            //   trainer->do_boost
-            //   trainer->save_model
+            //  (2) if it is first time usage of the model: 
+            //          call booster->InitModel
+            //      else: 
+            //          call booster->LoadModel
+            //  (3) booster->DoBoost to update the model
+            //  (4) booster->Predict to get new prediction
            /*! 
             * \brief set parameters from outside 
             * \param name name of the parameter
@@ -59,7 +61,7 @@ namespace xgboost{
                                  const FMatrixS::Image &feats,
                                  const std::vector<unsigned> &root_index ) = 0;
            /*! 
-             * \brief predict values for given sparse feature
+             * \brief predict values for given sparse feature vector
             *   NOTE: in tree implementation, this is not threadsafe
             * \param feat vector in sparse format
             * \param rid root id of current instance, default = 0
@@ -70,7 +72,7 @@ namespace xgboost{
                return 0.0f;
            }
            /*! 
-             * \brief predict values for given dense feature
+             * \brief predict values for given dense feature vector
             * \param feat feature vector in dense format
             * \param funknown indicator that the feature is missing
             * \param rid root id of current instance, default = 0
@@ -88,6 +90,7 @@ namespace xgboost{
             */        
            virtual void PrintInfo( FILE *fo ){}
        public:
+            /*! \brief virtual destructor */
            virtual ~IBooster( void ){}
        };    
    };
--- a/booster/xgboost_data.h
+++ b/booster/xgboost_data.h
@@ -1,5 +1,6 @@
 #ifndef _XGBOOST_DATA_H_
 #define _XGBOOST_DATA_H_
+
 /*!
 * \file xgboost_data.h
 * \brief the input data structure for gradient boosting
@@ -24,7 +25,7 @@ namespace xgboost{
 namespace xgboost{
    namespace booster{
        /*! 
-         * \brief auxlilary feature matrix to store training instance, in sparse CSR format
+         * \brief feature matrix to store training instance, in sparse CSR format
         */
        class FMatrixS{
        public:
@@ -35,7 +36,7 @@ namespace xgboost{
                /*! \brief array of feature value */
                const bst_float *fvalue;
                /*! \brief size of the data */
-                bst_int len;
+                bst_uint len;
            };
            /*! 
             * \brief remapped image of sparse matrix, 
@@ -89,12 +90,12 @@ namespace xgboost{
             *  \param feat sparse feature
             *  \param fstart start bound of feature
             *  \param fend   end bound range of feature
-             *  \return the row id addted
+             *  \return the row id of added line
             */
            inline size_t AddRow( const Line &feat, unsigned fstart = 0, unsigned fend = UINT_MAX ){
                utils::Assert( feat.len >= 0, "sparse feature length can not be negative" );
                unsigned cnt = 0;
-                for( int i = 0; i < feat.len; i ++ ){
+                for( unsigned i = 0; i < feat.len; i ++ ){
                    if( feat.findex[i] < fstart || feat.findex[i] >= fend ) continue;
                    findex.push_back( feat.findex[i] );
                    fvalue.push_back( feat.fvalue[i] );
@@ -103,11 +104,27 @@ namespace xgboost{
                row_ptr.push_back( row_ptr.back() + cnt );
                return row_ptr.size() - 2;
            }
+
+            /*! 
+             * \brief add a row to the matrix, with data stored in STL container
+             * \param findex feature index
+             * \param fvalue feature value
+             * \return the row id added line
+             */
+            inline size_t AddRow( const std::vector<bst_uint> &findex, 
+                                  const std::vector<bst_float> &fvalue ){
+                FMatrixS::Line l;
+                utils::Assert( findex.size() == fvalue.size() );
+                l.findex = &findex[0];
+                l.fvalue = &fvalue[0];
+                l.len = static_cast<bst_uint>( findex.size() );
+                return this->AddRow( l );
+            }
            /*! \brief get sparse part of current row */
            inline Line operator[]( size_t sidx ) const{
                Line sp;
                utils::Assert( !bst_debug || sidx < this->NumRow(), "row id exceed bound" );
-                sp.len = row_ptr[ sidx + 1 ] - row_ptr[ sidx ];
+                sp.len = static_cast<bst_uint>( row_ptr[ sidx + 1 ] - row_ptr[ sidx ] );
                sp.findex = &findex[ row_ptr[ sidx ] ];
                sp.fvalue = &fvalue[ row_ptr[ sidx ] ];
                return sp;