update this folder
This commit is contained in:
parent
750871a158
commit
5a2b8678fc
1
.gitignore
vendored
1
.gitignore
vendored
@ -11,3 +11,4 @@
|
|||||||
*.lai
|
*.lai
|
||||||
*.la
|
*.la
|
||||||
*.a
|
*.a
|
||||||
|
*~
|
||||||
|
|||||||
2
Makefile
2
Makefile
@ -10,7 +10,7 @@ OBJ = xgboost.o
|
|||||||
all: $(BIN) $(OBJ)
|
all: $(BIN) $(OBJ)
|
||||||
export LDFLAGS= -pthread -lm
|
export LDFLAGS= -pthread -lm
|
||||||
|
|
||||||
xgboost.o: booster/xgboost.cpp
|
xgboost.o: booster/*.h booster/*.cpp
|
||||||
|
|
||||||
$(BIN) :
|
$(BIN) :
|
||||||
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
|
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
|
||||||
|
|||||||
16
README.md
16
README.md
@ -1,4 +1,20 @@
|
|||||||
xgboost
|
xgboost
|
||||||
=======
|
=======
|
||||||
|
Creater: Tianqi Chen: tianqi.tchen@gmail.com
|
||||||
|
|
||||||
General Purpose Gradient Boosting Library
|
General Purpose Gradient Boosting Library
|
||||||
|
|
||||||
|
Intention: A stand-alone efficient library to do machine learning in functional space
|
||||||
|
|
||||||
|
Planned key components (TODO):
|
||||||
|
|
||||||
|
(1) Gradient boosting models:
|
||||||
|
- regression tree
|
||||||
|
- linear model/lasso
|
||||||
|
(2) Objectives to support tasks:
|
||||||
|
- regression
|
||||||
|
- classification
|
||||||
|
- ranking
|
||||||
|
- matrix factorization
|
||||||
|
- structured prediction
|
||||||
|
(3) OpenMP support for parallelization(optional)
|
||||||
|
|||||||
@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
/*! \brief namespace for xboost package */
|
/*! \brief namespace for xboost package */
|
||||||
namespace xgboost{
|
namespace xgboost{
|
||||||
|
/*! \brief namespace for boosters */
|
||||||
namespace booster{
|
namespace booster{
|
||||||
/*! \brief interface of a gradient boosting learner */
|
/*! \brief interface of a gradient boosting learner */
|
||||||
class IBooster{
|
class IBooster{
|
||||||
@ -19,11 +20,12 @@ namespace xgboost{
|
|||||||
// interface for model setting and loading
|
// interface for model setting and loading
|
||||||
// calling procedure:
|
// calling procedure:
|
||||||
// (1) booster->SetParam to setting necessary parameters
|
// (1) booster->SetParam to setting necessary parameters
|
||||||
// (2) if it is first time usage of the model: call booster->
|
// (2) if it is first time usage of the model:
|
||||||
// if new model to be trained, trainer->init_trainer
|
// call booster->InitModel
|
||||||
// elseif just to load from file, trainer->load_model
|
// else:
|
||||||
// trainer->do_boost
|
// call booster->LoadModel
|
||||||
// trainer->save_model
|
// (3) booster->DoBoost to update the model
|
||||||
|
// (4) booster->Predict to get new prediction
|
||||||
/*!
|
/*!
|
||||||
* \brief set parameters from outside
|
* \brief set parameters from outside
|
||||||
* \param name name of the parameter
|
* \param name name of the parameter
|
||||||
@ -59,7 +61,7 @@ namespace xgboost{
|
|||||||
const FMatrixS::Image &feats,
|
const FMatrixS::Image &feats,
|
||||||
const std::vector<unsigned> &root_index ) = 0;
|
const std::vector<unsigned> &root_index ) = 0;
|
||||||
/*!
|
/*!
|
||||||
* \brief predict values for given sparse feature
|
* \brief predict values for given sparse feature vector
|
||||||
* NOTE: in tree implementation, this is not threadsafe
|
* NOTE: in tree implementation, this is not threadsafe
|
||||||
* \param feat vector in sparse format
|
* \param feat vector in sparse format
|
||||||
* \param rid root id of current instance, default = 0
|
* \param rid root id of current instance, default = 0
|
||||||
@ -70,7 +72,7 @@ namespace xgboost{
|
|||||||
return 0.0f;
|
return 0.0f;
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief predict values for given dense feature
|
* \brief predict values for given dense feature vector
|
||||||
* \param feat feature vector in dense format
|
* \param feat feature vector in dense format
|
||||||
* \param funknown indicator that the feature is missing
|
* \param funknown indicator that the feature is missing
|
||||||
* \param rid root id of current instance, default = 0
|
* \param rid root id of current instance, default = 0
|
||||||
@ -88,6 +90,7 @@ namespace xgboost{
|
|||||||
*/
|
*/
|
||||||
virtual void PrintInfo( FILE *fo ){}
|
virtual void PrintInfo( FILE *fo ){}
|
||||||
public:
|
public:
|
||||||
|
/*! \brief virtual destructor */
|
||||||
virtual ~IBooster( void ){}
|
virtual ~IBooster( void ){}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
#ifndef _XGBOOST_DATA_H_
|
#ifndef _XGBOOST_DATA_H_
|
||||||
#define _XGBOOST_DATA_H_
|
#define _XGBOOST_DATA_H_
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \file xgboost_data.h
|
* \file xgboost_data.h
|
||||||
* \brief the input data structure for gradient boosting
|
* \brief the input data structure for gradient boosting
|
||||||
@ -24,7 +25,7 @@ namespace xgboost{
|
|||||||
namespace xgboost{
|
namespace xgboost{
|
||||||
namespace booster{
|
namespace booster{
|
||||||
/*!
|
/*!
|
||||||
* \brief auxlilary feature matrix to store training instance, in sparse CSR format
|
* \brief feature matrix to store training instance, in sparse CSR format
|
||||||
*/
|
*/
|
||||||
class FMatrixS{
|
class FMatrixS{
|
||||||
public:
|
public:
|
||||||
@ -35,7 +36,7 @@ namespace xgboost{
|
|||||||
/*! \brief array of feature value */
|
/*! \brief array of feature value */
|
||||||
const bst_float *fvalue;
|
const bst_float *fvalue;
|
||||||
/*! \brief size of the data */
|
/*! \brief size of the data */
|
||||||
bst_int len;
|
bst_uint len;
|
||||||
};
|
};
|
||||||
/*!
|
/*!
|
||||||
* \brief remapped image of sparse matrix,
|
* \brief remapped image of sparse matrix,
|
||||||
@ -89,12 +90,12 @@ namespace xgboost{
|
|||||||
* \param feat sparse feature
|
* \param feat sparse feature
|
||||||
* \param fstart start bound of feature
|
* \param fstart start bound of feature
|
||||||
* \param fend end bound range of feature
|
* \param fend end bound range of feature
|
||||||
* \return the row id addted
|
* \return the row id of added line
|
||||||
*/
|
*/
|
||||||
inline size_t AddRow( const Line &feat, unsigned fstart = 0, unsigned fend = UINT_MAX ){
|
inline size_t AddRow( const Line &feat, unsigned fstart = 0, unsigned fend = UINT_MAX ){
|
||||||
utils::Assert( feat.len >= 0, "sparse feature length can not be negative" );
|
utils::Assert( feat.len >= 0, "sparse feature length can not be negative" );
|
||||||
unsigned cnt = 0;
|
unsigned cnt = 0;
|
||||||
for( int i = 0; i < feat.len; i ++ ){
|
for( unsigned i = 0; i < feat.len; i ++ ){
|
||||||
if( feat.findex[i] < fstart || feat.findex[i] >= fend ) continue;
|
if( feat.findex[i] < fstart || feat.findex[i] >= fend ) continue;
|
||||||
findex.push_back( feat.findex[i] );
|
findex.push_back( feat.findex[i] );
|
||||||
fvalue.push_back( feat.fvalue[i] );
|
fvalue.push_back( feat.fvalue[i] );
|
||||||
@ -103,11 +104,27 @@ namespace xgboost{
|
|||||||
row_ptr.push_back( row_ptr.back() + cnt );
|
row_ptr.push_back( row_ptr.back() + cnt );
|
||||||
return row_ptr.size() - 2;
|
return row_ptr.size() - 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief add a row to the matrix, with data stored in STL container
|
||||||
|
* \param findex feature index
|
||||||
|
* \param fvalue feature value
|
||||||
|
* \return the row id added line
|
||||||
|
*/
|
||||||
|
inline size_t AddRow( const std::vector<bst_uint> &findex,
|
||||||
|
const std::vector<bst_float> &fvalue ){
|
||||||
|
FMatrixS::Line l;
|
||||||
|
utils::Assert( findex.size() == fvalue.size() );
|
||||||
|
l.findex = &findex[0];
|
||||||
|
l.fvalue = &fvalue[0];
|
||||||
|
l.len = static_cast<bst_uint>( findex.size() );
|
||||||
|
return this->AddRow( l );
|
||||||
|
}
|
||||||
/*! \brief get sparse part of current row */
|
/*! \brief get sparse part of current row */
|
||||||
inline Line operator[]( size_t sidx ) const{
|
inline Line operator[]( size_t sidx ) const{
|
||||||
Line sp;
|
Line sp;
|
||||||
utils::Assert( !bst_debug || sidx < this->NumRow(), "row id exceed bound" );
|
utils::Assert( !bst_debug || sidx < this->NumRow(), "row id exceed bound" );
|
||||||
sp.len = row_ptr[ sidx + 1 ] - row_ptr[ sidx ];
|
sp.len = static_cast<bst_uint>( row_ptr[ sidx + 1 ] - row_ptr[ sidx ] );
|
||||||
sp.findex = &findex[ row_ptr[ sidx ] ];
|
sp.findex = &findex[ row_ptr[ sidx ] ];
|
||||||
sp.fvalue = &fvalue[ row_ptr[ sidx ] ];
|
sp.fvalue = &fvalue[ row_ptr[ sidx ] ];
|
||||||
return sp;
|
return sp;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user