update this folder

This commit is contained in:
tqchen 2014-02-06 16:06:18 -08:00
parent 750871a158
commit 5a2b8678fc
5 changed files with 50 additions and 13 deletions

1
.gitignore vendored
View File

@ -11,3 +11,4 @@
*.lai *.lai
*.la *.la
*.a *.a
*~

View File

@ -10,7 +10,7 @@ OBJ = xgboost.o
all: $(BIN) $(OBJ) all: $(BIN) $(OBJ)
export LDFLAGS= -pthread -lm export LDFLAGS= -pthread -lm
xgboost.o: booster/xgboost.cpp xgboost.o: booster/*.h booster/*.cpp
$(BIN) : $(BIN) :
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)

View File

@ -1,4 +1,20 @@
xgboost xgboost
======= =======
Creater: Tianqi Chen: tianqi.tchen@gmail.com
General Purpose Gradient Boosting Library General Purpose Gradient Boosting Library
Intention: A stand-alone efficient library to do machine learning in functional space
Planned key components (TODO):
(1) Gradient boosting models:
- regression tree
- linear model/lasso
(2) Objectives to support tasks:
- regression
- classification
- ranking
- matrix factorization
- structured prediction
(3) OpenMP support for parallelization(optional)

View File

@ -12,6 +12,7 @@
/*! \brief namespace for xboost package */ /*! \brief namespace for xboost package */
namespace xgboost{ namespace xgboost{
/*! \brief namespace for boosters */
namespace booster{ namespace booster{
/*! \brief interface of a gradient boosting learner */ /*! \brief interface of a gradient boosting learner */
class IBooster{ class IBooster{
@ -19,11 +20,12 @@ namespace xgboost{
// interface for model setting and loading // interface for model setting and loading
// calling procedure: // calling procedure:
// (1) booster->SetParam to setting necessary parameters // (1) booster->SetParam to setting necessary parameters
// (2) if it is first time usage of the model: call booster-> // (2) if it is first time usage of the model:
// if new model to be trained, trainer->init_trainer // call booster->InitModel
// elseif just to load from file, trainer->load_model // else:
// trainer->do_boost // call booster->LoadModel
// trainer->save_model // (3) booster->DoBoost to update the model
// (4) booster->Predict to get new prediction
/*! /*!
* \brief set parameters from outside * \brief set parameters from outside
* \param name name of the parameter * \param name name of the parameter
@ -59,7 +61,7 @@ namespace xgboost{
const FMatrixS::Image &feats, const FMatrixS::Image &feats,
const std::vector<unsigned> &root_index ) = 0; const std::vector<unsigned> &root_index ) = 0;
/*! /*!
* \brief predict values for given sparse feature * \brief predict values for given sparse feature vector
* NOTE: in tree implementation, this is not threadsafe * NOTE: in tree implementation, this is not threadsafe
* \param feat vector in sparse format * \param feat vector in sparse format
* \param rid root id of current instance, default = 0 * \param rid root id of current instance, default = 0
@ -70,7 +72,7 @@ namespace xgboost{
return 0.0f; return 0.0f;
} }
/*! /*!
* \brief predict values for given dense feature * \brief predict values for given dense feature vector
* \param feat feature vector in dense format * \param feat feature vector in dense format
* \param funknown indicator that the feature is missing * \param funknown indicator that the feature is missing
* \param rid root id of current instance, default = 0 * \param rid root id of current instance, default = 0
@ -88,6 +90,7 @@ namespace xgboost{
*/ */
virtual void PrintInfo( FILE *fo ){} virtual void PrintInfo( FILE *fo ){}
public: public:
/*! \brief virtual destructor */
virtual ~IBooster( void ){} virtual ~IBooster( void ){}
}; };
}; };

View File

@ -1,5 +1,6 @@
#ifndef _XGBOOST_DATA_H_ #ifndef _XGBOOST_DATA_H_
#define _XGBOOST_DATA_H_ #define _XGBOOST_DATA_H_
/*! /*!
* \file xgboost_data.h * \file xgboost_data.h
* \brief the input data structure for gradient boosting * \brief the input data structure for gradient boosting
@ -24,7 +25,7 @@ namespace xgboost{
namespace xgboost{ namespace xgboost{
namespace booster{ namespace booster{
/*! /*!
* \brief auxlilary feature matrix to store training instance, in sparse CSR format * \brief feature matrix to store training instance, in sparse CSR format
*/ */
class FMatrixS{ class FMatrixS{
public: public:
@ -35,7 +36,7 @@ namespace xgboost{
/*! \brief array of feature value */ /*! \brief array of feature value */
const bst_float *fvalue; const bst_float *fvalue;
/*! \brief size of the data */ /*! \brief size of the data */
bst_int len; bst_uint len;
}; };
/*! /*!
* \brief remapped image of sparse matrix, * \brief remapped image of sparse matrix,
@ -89,12 +90,12 @@ namespace xgboost{
* \param feat sparse feature * \param feat sparse feature
* \param fstart start bound of feature * \param fstart start bound of feature
* \param fend end bound range of feature * \param fend end bound range of feature
* \return the row id addted * \return the row id of added line
*/ */
inline size_t AddRow( const Line &feat, unsigned fstart = 0, unsigned fend = UINT_MAX ){ inline size_t AddRow( const Line &feat, unsigned fstart = 0, unsigned fend = UINT_MAX ){
utils::Assert( feat.len >= 0, "sparse feature length can not be negative" ); utils::Assert( feat.len >= 0, "sparse feature length can not be negative" );
unsigned cnt = 0; unsigned cnt = 0;
for( int i = 0; i < feat.len; i ++ ){ for( unsigned i = 0; i < feat.len; i ++ ){
if( feat.findex[i] < fstart || feat.findex[i] >= fend ) continue; if( feat.findex[i] < fstart || feat.findex[i] >= fend ) continue;
findex.push_back( feat.findex[i] ); findex.push_back( feat.findex[i] );
fvalue.push_back( feat.fvalue[i] ); fvalue.push_back( feat.fvalue[i] );
@ -103,11 +104,27 @@ namespace xgboost{
row_ptr.push_back( row_ptr.back() + cnt ); row_ptr.push_back( row_ptr.back() + cnt );
return row_ptr.size() - 2; return row_ptr.size() - 2;
} }
/*!
* \brief add a row to the matrix, with data stored in STL container
* \param findex feature index
* \param fvalue feature value
* \return the row id added line
*/
inline size_t AddRow( const std::vector<bst_uint> &findex,
const std::vector<bst_float> &fvalue ){
FMatrixS::Line l;
utils::Assert( findex.size() == fvalue.size() );
l.findex = &findex[0];
l.fvalue = &fvalue[0];
l.len = static_cast<bst_uint>( findex.size() );
return this->AddRow( l );
}
/*! \brief get sparse part of current row */ /*! \brief get sparse part of current row */
inline Line operator[]( size_t sidx ) const{ inline Line operator[]( size_t sidx ) const{
Line sp; Line sp;
utils::Assert( !bst_debug || sidx < this->NumRow(), "row id exceed bound" ); utils::Assert( !bst_debug || sidx < this->NumRow(), "row id exceed bound" );
sp.len = row_ptr[ sidx + 1 ] - row_ptr[ sidx ]; sp.len = static_cast<bst_uint>( row_ptr[ sidx + 1 ] - row_ptr[ sidx ] );
sp.findex = &findex[ row_ptr[ sidx ] ]; sp.findex = &findex[ row_ptr[ sidx ] ];
sp.fvalue = &fvalue[ row_ptr[ sidx ] ]; sp.fvalue = &fvalue[ row_ptr[ sidx ] ];
return sp; return sp;