rank pass toy
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
#define XGBOOST_INL_HPP
|
||||
/*!
|
||||
* \file xgboost-inl.hpp
|
||||
* \brief bootser implementations
|
||||
* \brief bootser implementations
|
||||
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||
*/
|
||||
// implementation of boosters go to here
|
||||
@@ -18,7 +18,7 @@
|
||||
#include "linear/xgboost_linear.hpp"
|
||||
|
||||
namespace xgboost{
|
||||
namespace booster{
|
||||
namespace booster{
|
||||
/*!
|
||||
* \brief create a gradient booster, given type of booster
|
||||
* \param booster_type type of gradient booster, can be used to specify implements
|
||||
@@ -26,14 +26,14 @@ namespace xgboost{
|
||||
* \return the pointer to the gradient booster created
|
||||
*/
|
||||
template<typename FMatrix>
|
||||
inline InterfaceBooster<FMatrix> *CreateBooster( int booster_type ){
|
||||
switch( booster_type ){
|
||||
inline InterfaceBooster<FMatrix> *CreateBooster(int booster_type){
|
||||
switch (booster_type){
|
||||
case 0: return new RegTreeTrainer<FMatrix>();
|
||||
case 1: return new LinearBooster<FMatrix>();
|
||||
default: utils::Error("unknown booster_type"); return NULL;
|
||||
}
|
||||
}
|
||||
}; // namespace booster
|
||||
}
|
||||
}; // namespace booster
|
||||
}; // namespace xgboost
|
||||
|
||||
#endif // XGBOOST_INL_HPP
|
||||
|
||||
@@ -19,8 +19,8 @@
|
||||
namespace xgboost{
|
||||
/*! \brief namespace for boosters */
|
||||
namespace booster{
|
||||
/*!
|
||||
* \brief interface of a gradient boosting learner
|
||||
/*!
|
||||
* \brief interface of a gradient boosting learner
|
||||
* \tparam FMatrix the feature matrix format that the booster takes
|
||||
*/
|
||||
template<typename FMatrix>
|
||||
@@ -35,101 +35,101 @@ namespace xgboost{
|
||||
// call booster->LoadModel
|
||||
// (3) booster->DoBoost to update the model
|
||||
// (4) booster->Predict to get new prediction
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
* \param name name of the parameter
|
||||
* \param val value of the parameter
|
||||
*/
|
||||
virtual void SetParam( const char *name, const char *val ) = 0;
|
||||
/*!
|
||||
virtual void SetParam(const char *name, const char *val) = 0;
|
||||
/*!
|
||||
* \brief load model from stream
|
||||
* \param fi input stream
|
||||
*/
|
||||
virtual void LoadModel( utils::IStream &fi ) = 0;
|
||||
/*!
|
||||
virtual void LoadModel(utils::IStream &fi) = 0;
|
||||
/*!
|
||||
* \brief save model to stream
|
||||
* \param fo output stream
|
||||
*/
|
||||
virtual void SaveModel( utils::IStream &fo ) const = 0;
|
||||
virtual void SaveModel(utils::IStream &fo) const = 0;
|
||||
/*!
|
||||
* \brief initialize solver before training, called before training
|
||||
* this function is reserved for solver to allocate necessary space and do other preparation
|
||||
* this function is reserved for solver to allocate necessary space and do other preparation
|
||||
*/
|
||||
virtual void InitModel( void ) = 0;
|
||||
virtual void InitModel(void) = 0;
|
||||
public:
|
||||
/*!
|
||||
* \brief do gradient boost training for one step, using the information given,
|
||||
/*!
|
||||
* \brief do gradient boost training for one step, using the information given,
|
||||
* Note: content of grad and hess can change after DoBoost
|
||||
* \param grad first order gradient of each instance
|
||||
* \param hess second order gradient of each instance
|
||||
* \param feats features of each instance
|
||||
* \param root_index pre-partitioned root index of each instance,
|
||||
* \param root_index pre-partitioned root index of each instance,
|
||||
* root_index.size() can be 0 which indicates that no pre-partition involved
|
||||
*/
|
||||
virtual void DoBoost( std::vector<float> &grad,
|
||||
std::vector<float> &hess,
|
||||
const FMatrix &feats,
|
||||
const std::vector<unsigned> &root_index ) = 0;
|
||||
/*!
|
||||
virtual void DoBoost(std::vector<float> &grad,
|
||||
std::vector<float> &hess,
|
||||
const FMatrix &feats,
|
||||
const std::vector<unsigned> &root_index) = 0;
|
||||
/*!
|
||||
* \brief predict the path ids along a trees, for given sparse feature vector. When booster is a tree
|
||||
* \param path the result of path
|
||||
* \param feats feature matrix
|
||||
* \param row_index row index in the feature matrix
|
||||
* \param root_index root id of current instance, default = 0
|
||||
*/
|
||||
virtual void PredPath( std::vector<int> &path, const FMatrix &feats,
|
||||
bst_uint row_index, unsigned root_index = 0 ){
|
||||
utils::Error( "not implemented" );
|
||||
virtual void PredPath(std::vector<int> &path, const FMatrix &feats,
|
||||
bst_uint row_index, unsigned root_index = 0){
|
||||
utils::Error("not implemented");
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief predict values for given sparse feature vector
|
||||
*
|
||||
*
|
||||
* NOTE: in tree implementation, Sparse Predict is OpenMP threadsafe, but not threadsafe in general,
|
||||
* dense version of Predict to ensures threadsafety
|
||||
* \param feats feature matrix
|
||||
* \param row_index row index in the feature matrix
|
||||
* \param root_index root id of current instance, default = 0
|
||||
* \return prediction
|
||||
*/
|
||||
virtual float Predict( const FMatrix &feats, bst_uint row_index, unsigned root_index = 0 ){
|
||||
utils::Error( "not implemented" );
|
||||
* \return prediction
|
||||
*/
|
||||
virtual float Predict(const FMatrix &feats, bst_uint row_index, unsigned root_index = 0){
|
||||
utils::Error("not implemented");
|
||||
return 0.0f;
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief predict values for given dense feature vector
|
||||
* \param feat feature vector in dense format
|
||||
* \param funknown indicator that the feature is missing
|
||||
* \param rid root id of current instance, default = 0
|
||||
* \return prediction
|
||||
*/
|
||||
virtual float Predict( const std::vector<float> &feat,
|
||||
const std::vector<bool> &funknown,
|
||||
unsigned rid = 0 ){
|
||||
utils::Error( "not implemented" );
|
||||
virtual float Predict(const std::vector<float> &feat,
|
||||
const std::vector<bool> &funknown,
|
||||
unsigned rid = 0){
|
||||
utils::Error("not implemented");
|
||||
return 0.0f;
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief print information
|
||||
* \param fo output stream
|
||||
*/
|
||||
virtual void PrintInfo( FILE *fo ){}
|
||||
/*!
|
||||
* \param fo output stream
|
||||
*/
|
||||
virtual void PrintInfo(FILE *fo){}
|
||||
/*!
|
||||
* \brief dump model into text file
|
||||
* \param fo output stream
|
||||
* \param fo output stream
|
||||
* \param fmap feature map that may help give interpretations of feature
|
||||
* \param with_stats whether print statistics
|
||||
*/
|
||||
virtual void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats = false ){
|
||||
utils::Error( "not implemented" );
|
||||
virtual void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats = false){
|
||||
utils::Error("not implemented");
|
||||
}
|
||||
public:
|
||||
/*! \brief virtual destructor */
|
||||
virtual ~InterfaceBooster( void ){}
|
||||
virtual ~InterfaceBooster(void){}
|
||||
};
|
||||
};
|
||||
namespace booster{
|
||||
/*!
|
||||
* \brief this will is the most commonly used booster interface
|
||||
/*!
|
||||
* \brief this will is the most commonly used booster interface
|
||||
* we try to make booster invariant of data structures, but most cases, FMatrixS is what we wnat
|
||||
*/
|
||||
typedef InterfaceBooster<FMatrixS> IBooster;
|
||||
@@ -138,7 +138,7 @@ namespace xgboost{
|
||||
|
||||
namespace xgboost{
|
||||
namespace booster{
|
||||
/*!
|
||||
/*!
|
||||
* \brief create a gradient booster, given type of booster
|
||||
* normally we use FMatrixS, by calling CreateBooster<FMatrixS>
|
||||
* \param booster_type type of gradient booster, can be used to specify implements
|
||||
@@ -146,7 +146,7 @@ namespace xgboost{
|
||||
* \return the pointer to the gradient booster created
|
||||
*/
|
||||
template<typename FMatrix>
|
||||
inline InterfaceBooster<FMatrix> *CreateBooster( int booster_type );
|
||||
inline InterfaceBooster<FMatrix> *CreateBooster(int booster_type);
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -21,76 +21,76 @@ namespace xgboost{
|
||||
typedef unsigned bst_uint;
|
||||
/*! \brief float type used in boost */
|
||||
typedef float bst_float;
|
||||
/*! \brief debug option for booster */
|
||||
const bool bst_debug = false;
|
||||
/*! \brief debug option for booster */
|
||||
const bool bst_debug = false;
|
||||
};
|
||||
};
|
||||
|
||||
namespace xgboost{
|
||||
namespace booster{
|
||||
/**
|
||||
* \brief This is a interface, defining the way to access features,
|
||||
* \brief This is a interface, defining the way to access features,
|
||||
* by column or by row. This interface is used to make implementation
|
||||
* of booster does not depend on how feature is stored.
|
||||
*
|
||||
* Why template instead of virtual class: for efficiency
|
||||
* feature matrix is going to be used by most inner loop of the algorithm
|
||||
*
|
||||
* \tparam Derived type of actual implementation
|
||||
* \tparam Derived type of actual implementation
|
||||
* \sa FMatrixS: most of time FMatrixS is sufficient, refer to it if you find it confusing
|
||||
*/
|
||||
template<typename Derived>
|
||||
struct FMatrix{
|
||||
public:
|
||||
/*! \brief exmaple iterator over one row */
|
||||
/*! \brief exmaple iterator over one row */
|
||||
struct RowIter{
|
||||
/*!
|
||||
* \brief move to next position
|
||||
/*!
|
||||
* \brief move to next position
|
||||
* \return whether there is element in next position
|
||||
*/
|
||||
|
||||
inline bool Next( void );
|
||||
inline bool Next(void);
|
||||
/*! \return feature index in current position */
|
||||
inline bst_uint findex( void ) const;
|
||||
inline bst_uint findex(void) const;
|
||||
/*! \return feature value in current position */
|
||||
inline bst_float fvalue( void ) const;
|
||||
inline bst_float fvalue(void) const;
|
||||
};
|
||||
/*! \brief example iterator over one column */
|
||||
struct ColIter{
|
||||
/*!
|
||||
* \brief move to next position
|
||||
/*!
|
||||
* \brief move to next position
|
||||
* \return whether there is element in next position
|
||||
*/
|
||||
inline bool Next( void );
|
||||
inline bool Next(void);
|
||||
/*! \return row index of current position */
|
||||
inline bst_uint rindex( void ) const;
|
||||
inline bst_uint rindex(void) const;
|
||||
/*! \return feature value in current position */
|
||||
inline bst_float fvalue( void ) const;
|
||||
inline bst_float fvalue(void) const;
|
||||
};
|
||||
/*! \brief backward iterator over column */
|
||||
struct ColBackIter : public ColIter {};
|
||||
public:
|
||||
/*!
|
||||
* \brief get number of rows
|
||||
/*!
|
||||
* \brief get number of rows
|
||||
* \return number of rows
|
||||
*/
|
||||
inline size_t NumRow( void ) const;
|
||||
/*!
|
||||
inline size_t NumRow(void) const;
|
||||
/*!
|
||||
* \brief get number of columns
|
||||
* \return number of columns
|
||||
*/
|
||||
inline size_t NumCol( void ) const;
|
||||
inline size_t NumCol(void) const;
|
||||
/*!
|
||||
* \brief get row iterator
|
||||
* \param ridx row index
|
||||
* \return row iterator
|
||||
*/
|
||||
inline RowIter GetRow( size_t ridx ) const;
|
||||
/*!
|
||||
inline RowIter GetRow(size_t ridx) const;
|
||||
/*!
|
||||
* \brief get number of column groups, this ise used together with GetRow( ridx, gid )
|
||||
* \return number of column group
|
||||
*/
|
||||
inline unsigned NumColGroup( void ) const{
|
||||
inline unsigned NumColGroup(void) const{
|
||||
return 1;
|
||||
}
|
||||
/*!
|
||||
@@ -99,32 +99,32 @@ namespace xgboost{
|
||||
* \param gid colmun group id
|
||||
* \return row iterator, only iterates over features of specified column group
|
||||
*/
|
||||
inline RowIter GetRow( size_t ridx, unsigned gid ) const;
|
||||
inline RowIter GetRow(size_t ridx, unsigned gid) const;
|
||||
|
||||
/*! \return whether column access is enabled */
|
||||
inline bool HaveColAccess( void ) const;
|
||||
inline bool HaveColAccess(void) const;
|
||||
/*!
|
||||
* \brief get column iterator, the columns must be sorted by feature value
|
||||
* \param ridx column index
|
||||
* \return column iterator
|
||||
*/
|
||||
inline ColIter GetSortedCol( size_t ridx ) const;
|
||||
inline ColIter GetSortedCol(size_t ridx) const;
|
||||
/*!
|
||||
* \brief get column backward iterator, starts from biggest fvalue, and iterator back
|
||||
* \param ridx column index
|
||||
* \return reverse column iterator
|
||||
*/
|
||||
inline ColBackIter GetReverseSortedCol( size_t ridx ) const;
|
||||
inline ColBackIter GetReverseSortedCol(size_t ridx) const;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
namespace xgboost{
|
||||
namespace booster{
|
||||
/*!
|
||||
/*!
|
||||
* \brief feature matrix to store training instance, in sparse CSR format
|
||||
*/
|
||||
class FMatrixS: public FMatrix<FMatrixS>{
|
||||
*/
|
||||
class FMatrixS : public FMatrix<FMatrixS>{
|
||||
public:
|
||||
/*! \brief one entry in a row */
|
||||
struct REntry{
|
||||
@@ -133,10 +133,10 @@ namespace xgboost{
|
||||
/*! \brief feature value */
|
||||
bst_float fvalue;
|
||||
/*! \brief constructor */
|
||||
REntry( void ){}
|
||||
REntry(void){}
|
||||
/*! \brief constructor */
|
||||
REntry( bst_uint findex, bst_float fvalue ) : findex(findex), fvalue(fvalue){}
|
||||
inline static bool cmp_fvalue( const REntry &a, const REntry &b ){
|
||||
REntry(bst_uint findex, bst_float fvalue) : findex(findex), fvalue(fvalue){}
|
||||
inline static bool cmp_fvalue(const REntry &a, const REntry &b){
|
||||
return a.fvalue < b.fvalue;
|
||||
}
|
||||
};
|
||||
@@ -147,79 +147,79 @@ namespace xgboost{
|
||||
/*! \brief size of the data */
|
||||
bst_uint len;
|
||||
/*! \brief get k-th element */
|
||||
inline const REntry& operator[]( unsigned i ) const{
|
||||
inline const REntry& operator[](unsigned i) const{
|
||||
return data_[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
/*! \brief row iterator */
|
||||
struct RowIter{
|
||||
const REntry *dptr_, *end_;
|
||||
RowIter( const REntry* dptr, const REntry* end )
|
||||
:dptr_(dptr),end_(end){}
|
||||
inline bool Next( void ){
|
||||
if( dptr_ == end_ ) return false;
|
||||
RowIter(const REntry* dptr, const REntry* end)
|
||||
:dptr_(dptr), end_(end){}
|
||||
inline bool Next(void){
|
||||
if (dptr_ == end_) return false;
|
||||
else{
|
||||
++ dptr_; return true;
|
||||
++dptr_; return true;
|
||||
}
|
||||
}
|
||||
inline bst_uint findex( void ) const{
|
||||
inline bst_uint findex(void) const{
|
||||
return dptr_->findex;
|
||||
}
|
||||
inline bst_float fvalue( void ) const{
|
||||
inline bst_float fvalue(void) const{
|
||||
return dptr_->fvalue;
|
||||
}
|
||||
};
|
||||
/*! \brief column iterator */
|
||||
struct ColIter: public RowIter{
|
||||
ColIter( const REntry* dptr, const REntry* end )
|
||||
:RowIter( dptr, end ){}
|
||||
inline bst_uint rindex( void ) const{
|
||||
struct ColIter : public RowIter{
|
||||
ColIter(const REntry* dptr, const REntry* end)
|
||||
:RowIter(dptr, end){}
|
||||
inline bst_uint rindex(void) const{
|
||||
return this->findex();
|
||||
}
|
||||
};
|
||||
/*! \brief reverse column iterator */
|
||||
struct ColBackIter: public ColIter{
|
||||
ColBackIter( const REntry* dptr, const REntry* end )
|
||||
:ColIter( dptr, end ){}
|
||||
struct ColBackIter : public ColIter{
|
||||
ColBackIter(const REntry* dptr, const REntry* end)
|
||||
:ColIter(dptr, end){}
|
||||
// shadows RowIter::Next
|
||||
inline bool Next( void ){
|
||||
if( dptr_ == end_ ) return false;
|
||||
inline bool Next(void){
|
||||
if (dptr_ == end_) return false;
|
||||
else{
|
||||
-- dptr_; return true;
|
||||
--dptr_; return true;
|
||||
}
|
||||
}
|
||||
};
|
||||
public:
|
||||
/*! \brief constructor */
|
||||
FMatrixS( void ){ this->Clear(); }
|
||||
FMatrixS(void){ this->Clear(); }
|
||||
/*! \brief get number of rows */
|
||||
inline size_t NumRow( void ) const{
|
||||
inline size_t NumRow(void) const{
|
||||
return row_ptr_.size() - 1;
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief get number of nonzero entries
|
||||
* \return number of nonzero entries
|
||||
*/
|
||||
inline size_t NumEntry( void ) const{
|
||||
inline size_t NumEntry(void) const{
|
||||
return row_data_.size();
|
||||
}
|
||||
/*! \brief clear the storage */
|
||||
inline void Clear( void ){
|
||||
inline void Clear(void){
|
||||
row_ptr_.clear();
|
||||
row_ptr_.push_back( 0 );
|
||||
row_ptr_.push_back(0);
|
||||
row_data_.clear();
|
||||
col_ptr_.clear();
|
||||
col_data_.clear();
|
||||
}
|
||||
/*! \brief get sparse part of current row */
|
||||
inline Line operator[]( size_t sidx ) const{
|
||||
inline Line operator[](size_t sidx) const{
|
||||
Line sp;
|
||||
utils::Assert( !bst_debug || sidx < this->NumRow(), "row id exceed bound" );
|
||||
sp.len = static_cast<bst_uint>( row_ptr_[ sidx + 1 ] - row_ptr_[ sidx ] );
|
||||
sp.data_ = &row_data_[ row_ptr_[ sidx ] ];
|
||||
utils::Assert(!bst_debug || sidx < this->NumRow(), "row id exceed bound");
|
||||
sp.len = static_cast<bst_uint>(row_ptr_[sidx + 1] - row_ptr_[sidx]);
|
||||
sp.data_ = &row_data_[row_ptr_[sidx]];
|
||||
return sp;
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief add a row to the matrix, with data stored in STL container
|
||||
* \param findex feature index
|
||||
* \param fvalue feature value
|
||||
@@ -227,155 +227,155 @@ namespace xgboost{
|
||||
* \param fend end bound range of feature
|
||||
* \return the row id added line
|
||||
*/
|
||||
inline size_t AddRow( const std::vector<bst_uint> &findex,
|
||||
const std::vector<bst_float> &fvalue,
|
||||
unsigned fstart = 0, unsigned fend = UINT_MAX ){
|
||||
utils::Assert( findex.size() == fvalue.size() );
|
||||
inline size_t AddRow(const std::vector<bst_uint> &findex,
|
||||
const std::vector<bst_float> &fvalue,
|
||||
unsigned fstart = 0, unsigned fend = UINT_MAX){
|
||||
utils::Assert(findex.size() == fvalue.size());
|
||||
unsigned cnt = 0;
|
||||
for( size_t i = 0; i < findex.size(); i ++ ){
|
||||
if( findex[i] < fstart || findex[i] >= fend ) continue;
|
||||
row_data_.push_back( REntry( findex[i], fvalue[i] ) );
|
||||
cnt ++;
|
||||
for (size_t i = 0; i < findex.size(); i++){
|
||||
if (findex[i] < fstart || findex[i] >= fend) continue;
|
||||
row_data_.push_back(REntry(findex[i], fvalue[i]));
|
||||
cnt++;
|
||||
}
|
||||
row_ptr_.push_back( row_ptr_.back() + cnt );
|
||||
row_ptr_.push_back(row_ptr_.back() + cnt);
|
||||
return row_ptr_.size() - 2;
|
||||
}
|
||||
/*! \brief get row iterator*/
|
||||
inline RowIter GetRow( size_t ridx ) const{
|
||||
utils::Assert( !bst_debug || ridx < this->NumRow(), "row id exceed bound" );
|
||||
return RowIter( &row_data_[ row_ptr_[ridx] ] - 1, &row_data_[ row_ptr_[ridx+1] ] - 1 );
|
||||
inline RowIter GetRow(size_t ridx) const{
|
||||
utils::Assert(!bst_debug || ridx < this->NumRow(), "row id exceed bound");
|
||||
return RowIter(&row_data_[row_ptr_[ridx]] - 1, &row_data_[row_ptr_[ridx + 1]] - 1);
|
||||
}
|
||||
/*! \brief get row iterator*/
|
||||
inline RowIter GetRow( size_t ridx, unsigned gid ) const{
|
||||
utils::Assert( gid == 0, "FMatrixS only have 1 column group" );
|
||||
return FMatrixS::GetRow( ridx );
|
||||
inline RowIter GetRow(size_t ridx, unsigned gid) const{
|
||||
utils::Assert(gid == 0, "FMatrixS only have 1 column group");
|
||||
return FMatrixS::GetRow(ridx);
|
||||
}
|
||||
public:
|
||||
/*! \return whether column access is enabled */
|
||||
inline bool HaveColAccess( void ) const{
|
||||
inline bool HaveColAccess(void) const{
|
||||
return col_ptr_.size() != 0 && col_data_.size() == row_data_.size();
|
||||
}
|
||||
/*! \brief get number of colmuns */
|
||||
inline size_t NumCol( void ) const{
|
||||
utils::Assert( this->HaveColAccess() );
|
||||
inline size_t NumCol(void) const{
|
||||
utils::Assert(this->HaveColAccess());
|
||||
return col_ptr_.size() - 1;
|
||||
}
|
||||
/*! \brief get col iterator*/
|
||||
inline ColIter GetSortedCol( size_t cidx ) const{
|
||||
utils::Assert( !bst_debug || cidx < this->NumCol(), "col id exceed bound" );
|
||||
return ColIter( &col_data_[ col_ptr_[cidx] ] - 1, &col_data_[ col_ptr_[cidx+1] ] - 1 );
|
||||
inline ColIter GetSortedCol(size_t cidx) const{
|
||||
utils::Assert(!bst_debug || cidx < this->NumCol(), "col id exceed bound");
|
||||
return ColIter(&col_data_[col_ptr_[cidx]] - 1, &col_data_[col_ptr_[cidx + 1]] - 1);
|
||||
}
|
||||
/*! \brief get col iterator */
|
||||
inline ColBackIter GetReverseSortedCol( size_t cidx ) const{
|
||||
utils::Assert( !bst_debug || cidx < this->NumCol(), "col id exceed bound" );
|
||||
return ColBackIter( &col_data_[ col_ptr_[cidx+1] ], &col_data_[ col_ptr_[cidx] ] );
|
||||
inline ColBackIter GetReverseSortedCol(size_t cidx) const{
|
||||
utils::Assert(!bst_debug || cidx < this->NumCol(), "col id exceed bound");
|
||||
return ColBackIter(&col_data_[col_ptr_[cidx + 1]], &col_data_[col_ptr_[cidx]]);
|
||||
}
|
||||
/*!
|
||||
* \brief intialize the data so that we have both column and row major
|
||||
* access, call this whenever we need column access
|
||||
*/
|
||||
inline void InitData( void ){
|
||||
utils::SparseCSRMBuilder<REntry> builder( col_ptr_, col_data_ );
|
||||
builder.InitBudget( 0 );
|
||||
for( size_t i = 0; i < this->NumRow(); i ++ ){
|
||||
for( RowIter it = this->GetRow(i); it.Next(); ){
|
||||
builder.AddBudget( it.findex() );
|
||||
inline void InitData(void){
|
||||
utils::SparseCSRMBuilder<REntry> builder(col_ptr_, col_data_);
|
||||
builder.InitBudget(0);
|
||||
for (size_t i = 0; i < this->NumRow(); i++){
|
||||
for (RowIter it = this->GetRow(i); it.Next();){
|
||||
builder.AddBudget(it.findex());
|
||||
}
|
||||
}
|
||||
builder.InitStorage();
|
||||
for( size_t i = 0; i < this->NumRow(); i ++ ){
|
||||
for( RowIter it = this->GetRow(i); it.Next(); ){
|
||||
builder.PushElem( it.findex(), REntry( (bst_uint)i, it.fvalue() ) );
|
||||
for (size_t i = 0; i < this->NumRow(); i++){
|
||||
for (RowIter it = this->GetRow(i); it.Next();){
|
||||
builder.PushElem(it.findex(), REntry((bst_uint)i, it.fvalue()));
|
||||
}
|
||||
}
|
||||
// sort columns
|
||||
unsigned ncol = static_cast<unsigned>( this->NumCol() );
|
||||
for( unsigned i = 0; i < ncol; i ++ ){
|
||||
std::sort( &col_data_[ col_ptr_[ i ] ], &col_data_[ col_ptr_[ i+1 ] ], REntry::cmp_fvalue );
|
||||
unsigned ncol = static_cast<unsigned>(this->NumCol());
|
||||
for (unsigned i = 0; i < ncol; i++){
|
||||
std::sort(&col_data_[col_ptr_[i]], &col_data_[col_ptr_[i + 1]], REntry::cmp_fvalue);
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief save data to binary stream
|
||||
* note: since we have size_t in ptr,
|
||||
* \brief save data to binary stream
|
||||
* note: since we have size_t in ptr,
|
||||
* the function is not consistent between 64bit and 32bit machine
|
||||
* \param fo output stream
|
||||
*/
|
||||
inline void SaveBinary( utils::IStream &fo ) const{
|
||||
FMatrixS::SaveBinary( fo, row_ptr_, row_data_ );
|
||||
inline void SaveBinary(utils::IStream &fo) const{
|
||||
FMatrixS::SaveBinary(fo, row_ptr_, row_data_);
|
||||
int col_access = this->HaveColAccess() ? 1 : 0;
|
||||
fo.Write( &col_access, sizeof(int) );
|
||||
if( col_access != 0 ){
|
||||
FMatrixS::SaveBinary( fo, col_ptr_, col_data_ );
|
||||
fo.Write(&col_access, sizeof(int));
|
||||
if (col_access != 0){
|
||||
FMatrixS::SaveBinary(fo, col_ptr_, col_data_);
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief load data from binary stream
|
||||
* note: since we have size_t in ptr,
|
||||
* \brief load data from binary stream
|
||||
* note: since we have size_t in ptr,
|
||||
* the function is not consistent between 64bit and 32bit machin
|
||||
* \param fi input stream
|
||||
*/
|
||||
inline void LoadBinary( utils::IStream &fi ){
|
||||
FMatrixS::LoadBinary( fi, row_ptr_, row_data_ );
|
||||
int col_access;
|
||||
fi.Read( &col_access, sizeof(int) );
|
||||
if( col_access != 0 ){
|
||||
FMatrixS::LoadBinary( fi, col_ptr_, col_data_ );
|
||||
inline void LoadBinary(utils::IStream &fi){
|
||||
FMatrixS::LoadBinary(fi, row_ptr_, row_data_);
|
||||
int col_access;
|
||||
fi.Read(&col_access, sizeof(int));
|
||||
if (col_access != 0){
|
||||
FMatrixS::LoadBinary(fi, col_ptr_, col_data_);
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief load from text file
|
||||
* \brief load from text file
|
||||
* \param fi input file pointer
|
||||
*/
|
||||
inline void LoadText( FILE *fi ){
|
||||
*/
|
||||
inline void LoadText(FILE *fi){
|
||||
this->Clear();
|
||||
int ninst;
|
||||
while( fscanf( fi, "%d", &ninst ) == 1 ){
|
||||
while (fscanf(fi, "%d", &ninst) == 1){
|
||||
std::vector<booster::bst_uint> findex;
|
||||
std::vector<booster::bst_float> fvalue;
|
||||
while( ninst -- ){
|
||||
while (ninst--){
|
||||
unsigned index; float value;
|
||||
utils::Assert( fscanf( fi, "%u:%f", &index, &value ) == 2, "load Text" );
|
||||
findex.push_back( index ); fvalue.push_back( value );
|
||||
utils::Assert(fscanf(fi, "%u:%f", &index, &value) == 2, "load Text");
|
||||
findex.push_back(index); fvalue.push_back(value);
|
||||
}
|
||||
this->AddRow( findex, fvalue );
|
||||
this->AddRow(findex, fvalue);
|
||||
}
|
||||
// initialize column support as well
|
||||
this->InitData();
|
||||
}
|
||||
private:
|
||||
/*!
|
||||
* \brief save data to binary stream
|
||||
* \brief save data to binary stream
|
||||
* \param fo output stream
|
||||
* \param ptr pointer data
|
||||
* \param data data content
|
||||
*/
|
||||
inline static void SaveBinary( utils::IStream &fo,
|
||||
const std::vector<size_t> &ptr,
|
||||
const std::vector<REntry> &data ){
|
||||
inline static void SaveBinary(utils::IStream &fo,
|
||||
const std::vector<size_t> &ptr,
|
||||
const std::vector<REntry> &data){
|
||||
size_t nrow = ptr.size() - 1;
|
||||
fo.Write( &nrow, sizeof(size_t) );
|
||||
fo.Write( &ptr[0], ptr.size() * sizeof(size_t) );
|
||||
if( data.size() != 0 ){
|
||||
fo.Write( &data[0] , data.size() * sizeof(REntry) );
|
||||
fo.Write(&nrow, sizeof(size_t));
|
||||
fo.Write(&ptr[0], ptr.size() * sizeof(size_t));
|
||||
if (data.size() != 0){
|
||||
fo.Write(&data[0], data.size() * sizeof(REntry));
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief load data from binary stream
|
||||
* \brief load data from binary stream
|
||||
* \param fi input stream
|
||||
* \param ptr pointer data
|
||||
* \param data data content
|
||||
*/
|
||||
inline static void LoadBinary( utils::IStream &fi,
|
||||
std::vector<size_t> &ptr,
|
||||
std::vector<REntry> &data ){
|
||||
inline static void LoadBinary(utils::IStream &fi,
|
||||
std::vector<size_t> &ptr,
|
||||
std::vector<REntry> &data){
|
||||
size_t nrow;
|
||||
utils::Assert( fi.Read( &nrow, sizeof(size_t) ) != 0, "Load FMatrixS" );
|
||||
ptr.resize( nrow + 1 );
|
||||
utils::Assert( fi.Read( &ptr[0], ptr.size() * sizeof(size_t) ), "Load FMatrixS" );
|
||||
utils::Assert(fi.Read(&nrow, sizeof(size_t)) != 0, "Load FMatrixS");
|
||||
ptr.resize(nrow + 1);
|
||||
utils::Assert(fi.Read(&ptr[0], ptr.size() * sizeof(size_t)), "Load FMatrixS");
|
||||
|
||||
data.resize( ptr.back() );
|
||||
if( data.size() != 0 ){
|
||||
utils::Assert( fi.Read( &data[0] , data.size() * sizeof(REntry) ) , "Load FMatrixS" );
|
||||
data.resize(ptr.back());
|
||||
if (data.size() != 0){
|
||||
utils::Assert(fi.Read(&data[0], data.size() * sizeof(REntry)), "Load FMatrixS");
|
||||
}
|
||||
}
|
||||
protected:
|
||||
@@ -387,7 +387,7 @@ namespace xgboost{
|
||||
std::vector<size_t> col_ptr_;
|
||||
/*! \brief column datas */
|
||||
std::vector<REntry> col_data_;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -8,25 +8,25 @@
|
||||
#include "../utils/xgboost_config.h"
|
||||
/*!
|
||||
* \file xgboost_gbmbase.h
|
||||
* \brief a base model class,
|
||||
* \brief a base model class,
|
||||
* that assembles the ensembles of booster together and do model update
|
||||
* this class can be used as base code to create booster variants
|
||||
* this class can be used as base code to create booster variants
|
||||
*
|
||||
* The detailed implementation of boosters should start by using the class
|
||||
* provided by this file
|
||||
*
|
||||
*
|
||||
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||
*/
|
||||
namespace xgboost{
|
||||
namespace booster{
|
||||
/*!
|
||||
* \brief a base model class,
|
||||
* \brief a base model class,
|
||||
* that assembles the ensembles of booster together and provide single routines to do prediction buffer and update
|
||||
* this class can be used as base code to create booster variants
|
||||
* this class can be used as base code to create booster variants
|
||||
* *
|
||||
* relation to xgboost.h:
|
||||
* (1) xgboost.h provides a interface to a single booster(e.g. a single regression tree )
|
||||
* while GBMBaseModel builds upon IBooster to build a class that
|
||||
* while GBMBaseModel builds upon IBooster to build a class that
|
||||
* ensembls the boosters together;
|
||||
* (2) GBMBaseModel provides prediction buffering scheme to speedup training;
|
||||
* (3) Summary: GBMBaseModel is a standard wrapper for boosting ensembles;
|
||||
@@ -37,259 +37,260 @@ namespace xgboost{
|
||||
* (3) model.InitTrainer before calling model.Predict and model.DoBoost
|
||||
* (4) model.Predict to get predictions given a instance
|
||||
* (4) model.DoBoost to update the ensembles, add new booster to the model
|
||||
* (4) model.SaveModel to save learned results
|
||||
* (4) model.SaveModel to save learned results
|
||||
*
|
||||
* Bufferring: each instance comes with a buffer_index in Predict.
|
||||
* when mparam.num_pbuffer != 0, a unique buffer index can be
|
||||
* Bufferring: each instance comes with a buffer_index in Predict.
|
||||
* when mparam.num_pbuffer != 0, a unique buffer index can be
|
||||
* assigned to each instance to buffer previous results of boosters,
|
||||
* this helps to speedup training, so consider assign buffer_index
|
||||
* this helps to speedup training, so consider assign buffer_index
|
||||
* for each training instances, if buffer_index = -1, the code
|
||||
* recalculate things from scratch and will still works correctly
|
||||
*/
|
||||
class GBMBase{
|
||||
public:
|
||||
/*! \brief number of thread used */
|
||||
GBMBase( void ){}
|
||||
GBMBase(void){}
|
||||
/*! \brief destructor */
|
||||
virtual ~GBMBase( void ){
|
||||
virtual ~GBMBase(void){
|
||||
this->FreeSpace();
|
||||
}
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
* \param name name of the parameter
|
||||
* \param val value of the parameter
|
||||
*/
|
||||
inline void SetParam( const char *name, const char *val ){
|
||||
if( !strncmp( name, "bst:", 4 ) ){
|
||||
cfg.PushBack( name + 4, val );
|
||||
inline void SetParam(const char *name, const char *val){
|
||||
if (!strncmp(name, "bst:", 4)){
|
||||
cfg.PushBack(name + 4, val);
|
||||
}
|
||||
if( !strcmp( name, "silent") ){
|
||||
cfg.PushBack( name, val );
|
||||
if (!strcmp(name, "silent")){
|
||||
cfg.PushBack(name, val);
|
||||
}
|
||||
tparam.SetParam( name, val );
|
||||
if( boosters.size() == 0 ) mparam.SetParam( name, val );
|
||||
tparam.SetParam(name, val);
|
||||
if (boosters.size() == 0) mparam.SetParam(name, val);
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief load model from stream
|
||||
* \param fi input stream
|
||||
*/
|
||||
inline void LoadModel( utils::IStream &fi ){
|
||||
if( boosters.size() != 0 ) this->FreeSpace();
|
||||
utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 );
|
||||
boosters.resize( mparam.num_boosters );
|
||||
for( size_t i = 0; i < boosters.size(); i ++ ){
|
||||
boosters[ i ] = booster::CreateBooster<FMatrixS>( mparam.booster_type );
|
||||
boosters[ i ]->LoadModel( fi );
|
||||
inline void LoadModel(utils::IStream &fi){
|
||||
if (boosters.size() != 0) this->FreeSpace();
|
||||
utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
|
||||
boosters.resize(mparam.num_boosters);
|
||||
for (size_t i = 0; i < boosters.size(); i++){
|
||||
boosters[i] = booster::CreateBooster<FMatrixS>(mparam.booster_type);
|
||||
boosters[i]->LoadModel(fi);
|
||||
}
|
||||
{// load info
|
||||
booster_info.resize( mparam.num_boosters );
|
||||
if( mparam.num_boosters != 0 ){
|
||||
utils::Assert( fi.Read( &booster_info[0], sizeof(int)*mparam.num_boosters ) != 0 );
|
||||
booster_info.resize(mparam.num_boosters);
|
||||
if (mparam.num_boosters != 0){
|
||||
utils::Assert(fi.Read(&booster_info[0], sizeof(int)*mparam.num_boosters) != 0);
|
||||
}
|
||||
}
|
||||
if( mparam.num_pbuffer != 0 ){
|
||||
pred_buffer.resize ( mparam.num_pbuffer );
|
||||
pred_counter.resize( mparam.num_pbuffer );
|
||||
utils::Assert( fi.Read( &pred_buffer[0] , pred_buffer.size()*sizeof(float) ) != 0 );
|
||||
utils::Assert( fi.Read( &pred_counter[0], pred_counter.size()*sizeof(unsigned) ) != 0 );
|
||||
if (mparam.num_pbuffer != 0){
|
||||
pred_buffer.resize(mparam.num_pbuffer);
|
||||
pred_counter.resize(mparam.num_pbuffer);
|
||||
utils::Assert(fi.Read(&pred_buffer[0], pred_buffer.size()*sizeof(float)) != 0);
|
||||
utils::Assert(fi.Read(&pred_counter[0], pred_counter.size()*sizeof(unsigned)) != 0);
|
||||
}
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief save model to stream
|
||||
* \param fo output stream
|
||||
*/
|
||||
inline void SaveModel( utils::IStream &fo ) const {
|
||||
utils::Assert( mparam.num_boosters == (int)boosters.size() );
|
||||
fo.Write( &mparam, sizeof(ModelParam) );
|
||||
for( size_t i = 0; i < boosters.size(); i ++ ){
|
||||
boosters[ i ]->SaveModel( fo );
|
||||
inline void SaveModel(utils::IStream &fo) const {
|
||||
utils::Assert(mparam.num_boosters == (int)boosters.size());
|
||||
fo.Write(&mparam, sizeof(ModelParam));
|
||||
for (size_t i = 0; i < boosters.size(); i++){
|
||||
boosters[i]->SaveModel(fo);
|
||||
}
|
||||
if( booster_info.size() != 0 ){
|
||||
fo.Write( &booster_info[0], sizeof(int) * booster_info.size() );
|
||||
if (booster_info.size() != 0){
|
||||
fo.Write(&booster_info[0], sizeof(int)* booster_info.size());
|
||||
}
|
||||
if( mparam.num_pbuffer != 0 ){
|
||||
fo.Write( &pred_buffer[0] , pred_buffer.size()*sizeof(float) );
|
||||
fo.Write( &pred_counter[0], pred_counter.size()*sizeof(unsigned) );
|
||||
if (mparam.num_pbuffer != 0){
|
||||
fo.Write(&pred_buffer[0], pred_buffer.size()*sizeof(float));
|
||||
fo.Write(&pred_counter[0], pred_counter.size()*sizeof(unsigned));
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief initialize the current data storage for model, if the model is used first time, call this function
|
||||
*/
|
||||
inline void InitModel( void ){
|
||||
inline void InitModel(void){
|
||||
pred_buffer.clear(); pred_counter.clear();
|
||||
pred_buffer.resize ( mparam.num_pbuffer, 0.0 );
|
||||
pred_counter.resize( mparam.num_pbuffer, 0 );
|
||||
utils::Assert( mparam.num_boosters == 0 );
|
||||
utils::Assert( boosters.size() == 0 );
|
||||
pred_buffer.resize(mparam.num_pbuffer, 0.0);
|
||||
pred_counter.resize(mparam.num_pbuffer, 0);
|
||||
utils::Assert(mparam.num_boosters == 0);
|
||||
utils::Assert(boosters.size() == 0);
|
||||
}
|
||||
/*!
|
||||
* \brief initialize solver before training, called before training
|
||||
* this function is reserved for solver to allocate necessary space and do other preparation
|
||||
*/
|
||||
inline void InitTrainer( void ){
|
||||
if( tparam.nthread != 0 ){
|
||||
omp_set_num_threads( tparam.nthread );
|
||||
* this function is reserved for solver to allocate necessary space and do other preparation
|
||||
*/
|
||||
inline void InitTrainer(void){
|
||||
if (tparam.nthread != 0){
|
||||
omp_set_num_threads(tparam.nthread);
|
||||
}
|
||||
// make sure all the boosters get the latest parameters
|
||||
for( size_t i = 0; i < this->boosters.size(); i ++ ){
|
||||
this->ConfigBooster( this->boosters[i] );
|
||||
for (size_t i = 0; i < this->boosters.size(); i++){
|
||||
this->ConfigBooster(this->boosters[i]);
|
||||
}
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief DumpModel
|
||||
* \param fo text file
|
||||
* \param fo text file
|
||||
* \param fmap feature map that may help give interpretations of feature
|
||||
* \param with_stats whether print statistics
|
||||
*/
|
||||
inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ){
|
||||
for( size_t i = 0; i < boosters.size(); i ++ ){
|
||||
fprintf( fo, "booster[%d]\n", (int)i );
|
||||
boosters[i]->DumpModel( fo, fmap, with_stats );
|
||||
*/
|
||||
inline void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats){
|
||||
for (size_t i = 0; i < boosters.size(); i++){
|
||||
fprintf(fo, "booster[%d]\n", (int)i);
|
||||
boosters[i]->DumpModel(fo, fmap, with_stats);
|
||||
}
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief Dump path of all trees
|
||||
* \param fo text file
|
||||
* \param fo text file
|
||||
* \param data input data
|
||||
*/
|
||||
inline void DumpPath( FILE *fo, const FMatrixS &data ){
|
||||
for( size_t i = 0; i < data.NumRow(); ++ i ){
|
||||
for( size_t j = 0; j < boosters.size(); ++ j ){
|
||||
if( j != 0 ) fprintf( fo, "\t" );
|
||||
inline void DumpPath(FILE *fo, const FMatrixS &data){
|
||||
for (size_t i = 0; i < data.NumRow(); ++i){
|
||||
for (size_t j = 0; j < boosters.size(); ++j){
|
||||
if (j != 0) fprintf(fo, "\t");
|
||||
std::vector<int> path;
|
||||
boosters[j]->PredPath( path, data, i );
|
||||
fprintf( fo, "%d", path[0] );
|
||||
for( size_t k = 1; k < path.size(); ++ k ){
|
||||
fprintf( fo, ",%d", path[k] );
|
||||
boosters[j]->PredPath(path, data, i);
|
||||
fprintf(fo, "%d", path[0]);
|
||||
for (size_t k = 1; k < path.size(); ++k){
|
||||
fprintf(fo, ",%d", path[k]);
|
||||
}
|
||||
}
|
||||
fprintf( fo, "\n" );
|
||||
fprintf(fo, "\n");
|
||||
}
|
||||
}
|
||||
public:
|
||||
/*!
|
||||
/*!
|
||||
* \brief do gradient boost training for one step, using the information given
|
||||
* Note: content of grad and hess can change after DoBoost
|
||||
* \param grad first order gradient of each instance
|
||||
* \param hess second order gradient of each instance
|
||||
* \param feats features of each instance
|
||||
* \param root_index pre-partitioned root index of each instance,
|
||||
* \param root_index pre-partitioned root index of each instance,
|
||||
* root_index.size() can be 0 which indicates that no pre-partition involved
|
||||
*/
|
||||
inline void DoBoost( std::vector<float> &grad,
|
||||
std::vector<float> &hess,
|
||||
const booster::FMatrixS &feats,
|
||||
const std::vector<unsigned> &root_index ) {
|
||||
inline void DoBoost(std::vector<float> &grad,
|
||||
std::vector<float> &hess,
|
||||
const booster::FMatrixS &feats,
|
||||
const std::vector<unsigned> &root_index) {
|
||||
booster::IBooster *bst = this->GetUpdateBooster();
|
||||
bst->DoBoost( grad, hess, feats, root_index );
|
||||
bst->DoBoost(grad, hess, feats, root_index);
|
||||
}
|
||||
/*!
|
||||
/*!
|
||||
* \brief predict values for given sparse feature vector
|
||||
* NOTE: in tree implementation, this is only OpenMP threadsafe, but not threadsafe
|
||||
* \param feats feature matrix
|
||||
* \param row_index row index in the feature matrix
|
||||
* \param buffer_index the buffer index of the current feature line, default -1 means no buffer assigned
|
||||
* \param root_index root id of current instance, default = 0
|
||||
* \return prediction
|
||||
* \return prediction
|
||||
*/
|
||||
inline float Predict( const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0 ){
|
||||
inline float Predict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){
|
||||
size_t istart = 0;
|
||||
float psum = 0.0f;
|
||||
|
||||
// load buffered results if any
|
||||
if( mparam.do_reboost == 0 && buffer_index >= 0 ){
|
||||
utils::Assert( buffer_index < mparam.num_pbuffer, "buffer index exceed num_pbuffer" );
|
||||
istart = this->pred_counter[ buffer_index ];
|
||||
psum = this->pred_buffer [ buffer_index ];
|
||||
if (mparam.do_reboost == 0 && buffer_index >= 0){
|
||||
utils::Assert(buffer_index < mparam.num_pbuffer, "buffer index exceed num_pbuffer");
|
||||
istart = this->pred_counter[buffer_index];
|
||||
psum = this->pred_buffer[buffer_index];
|
||||
}
|
||||
|
||||
for (size_t i = istart; i < this->boosters.size(); i++){
|
||||
psum += this->boosters[i]->Predict(feats, row_index, root_index);
|
||||
}
|
||||
|
||||
for( size_t i = istart; i < this->boosters.size(); i ++ ){
|
||||
psum += this->boosters[ i ]->Predict( feats, row_index, root_index );
|
||||
}
|
||||
// updated the buffered results
|
||||
if( mparam.do_reboost == 0 && buffer_index >= 0 ){
|
||||
this->pred_counter[ buffer_index ] = static_cast<unsigned>( boosters.size() );
|
||||
this->pred_buffer [ buffer_index ] = psum;
|
||||
if (mparam.do_reboost == 0 && buffer_index >= 0){
|
||||
this->pred_counter[buffer_index] = static_cast<unsigned>(boosters.size());
|
||||
this->pred_buffer[buffer_index] = psum;
|
||||
}
|
||||
return psum;
|
||||
}
|
||||
public:
|
||||
//--------trial code for interactive update an existing booster------
|
||||
//-------- usually not needed, ignore this region ---------
|
||||
/*!
|
||||
* \brief same as Predict, but removes the prediction of booster to be updated
|
||||
/*!
|
||||
* \brief same as Predict, but removes the prediction of booster to be updated
|
||||
* this function must be called once and only once for every data with pbuffer
|
||||
*/
|
||||
inline float InteractPredict( const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0 ){
|
||||
float psum = this->Predict( feats, row_index, buffer_index, root_index );
|
||||
if( tparam.reupdate_booster != -1 ){
|
||||
inline float InteractPredict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){
|
||||
float psum = this->Predict(feats, row_index, buffer_index, root_index);
|
||||
if (tparam.reupdate_booster != -1){
|
||||
const int bid = tparam.reupdate_booster;
|
||||
utils::Assert( bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound" );
|
||||
psum -= boosters[ bid ]->Predict( feats, row_index, root_index );
|
||||
if( mparam.do_reboost == 0 && buffer_index >= 0 ){
|
||||
this->pred_buffer[ buffer_index ] = psum;
|
||||
utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
|
||||
psum -= boosters[bid]->Predict(feats, row_index, root_index);
|
||||
if (mparam.do_reboost == 0 && buffer_index >= 0){
|
||||
this->pred_buffer[buffer_index] = psum;
|
||||
}
|
||||
}
|
||||
return psum;
|
||||
}
|
||||
/*! \brief delete the specified booster */
|
||||
inline void DelteBooster( void ){
|
||||
inline void DelteBooster(void){
|
||||
const int bid = tparam.reupdate_booster;
|
||||
utils::Assert( bid >= 0 && bid < mparam.num_boosters , "must specify booster index for deletion");
|
||||
delete boosters[ bid ];
|
||||
for( int i = bid + 1; i < mparam.num_boosters; ++ i ){
|
||||
boosters[i-1] = boosters[ i ];
|
||||
booster_info[i-1] = booster_info[ i ];
|
||||
}
|
||||
boosters.resize( mparam.num_boosters -= 1 );
|
||||
booster_info.resize( boosters.size() );
|
||||
utils::Assert(bid >= 0 && bid < mparam.num_boosters, "must specify booster index for deletion");
|
||||
delete boosters[bid];
|
||||
for (int i = bid + 1; i < mparam.num_boosters; ++i){
|
||||
boosters[i - 1] = boosters[i];
|
||||
booster_info[i - 1] = booster_info[i];
|
||||
}
|
||||
boosters.resize(mparam.num_boosters -= 1);
|
||||
booster_info.resize(boosters.size());
|
||||
}
|
||||
/*! \brief update the prediction buffer, after booster have been updated */
|
||||
inline void InteractRePredict( const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0 ){
|
||||
if( tparam.reupdate_booster != -1 ){
|
||||
/*! \brief update the prediction buffer, after booster have been updated */
|
||||
inline void InteractRePredict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){
|
||||
if (tparam.reupdate_booster != -1){
|
||||
const int bid = tparam.reupdate_booster;
|
||||
utils::Assert( bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound" );
|
||||
if( mparam.do_reboost == 0 && buffer_index >= 0 ){
|
||||
this->pred_buffer[ buffer_index ] += boosters[ bid ]->Predict( feats, row_index, root_index );
|
||||
utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
|
||||
if (mparam.do_reboost == 0 && buffer_index >= 0){
|
||||
this->pred_buffer[buffer_index] += boosters[bid]->Predict(feats, row_index, root_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
//-----------non public fields afterwards-------------
|
||||
protected:
|
||||
/*! \brief free space of the model */
|
||||
inline void FreeSpace( void ){
|
||||
for( size_t i = 0; i < boosters.size(); i ++ ){
|
||||
inline void FreeSpace(void){
|
||||
for (size_t i = 0; i < boosters.size(); i++){
|
||||
delete boosters[i];
|
||||
}
|
||||
boosters.clear(); booster_info.clear(); mparam.num_boosters = 0;
|
||||
boosters.clear(); booster_info.clear(); mparam.num_boosters = 0;
|
||||
}
|
||||
/*! \brief configure a booster */
|
||||
inline void ConfigBooster( booster::IBooster *bst ){
|
||||
inline void ConfigBooster(booster::IBooster *bst){
|
||||
cfg.BeforeFirst();
|
||||
while( cfg.Next() ){
|
||||
bst->SetParam( cfg.name(), cfg.val() );
|
||||
while (cfg.Next()){
|
||||
bst->SetParam(cfg.name(), cfg.val());
|
||||
}
|
||||
}
|
||||
/*!
|
||||
* \brief get a booster to update
|
||||
/*!
|
||||
* \brief get a booster to update
|
||||
* \return the booster created
|
||||
*/
|
||||
inline booster::IBooster *GetUpdateBooster( void ){
|
||||
if( tparam.reupdate_booster != -1 ){
|
||||
inline booster::IBooster *GetUpdateBooster(void){
|
||||
if (tparam.reupdate_booster != -1){
|
||||
const int bid = tparam.reupdate_booster;
|
||||
utils::Assert( bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound" );
|
||||
this->ConfigBooster( boosters[bid] );
|
||||
return boosters[ bid ];
|
||||
utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
|
||||
this->ConfigBooster(boosters[bid]);
|
||||
return boosters[bid];
|
||||
}
|
||||
|
||||
if( mparam.do_reboost == 0 || boosters.size() == 0 ){
|
||||
if (mparam.do_reboost == 0 || boosters.size() == 0){
|
||||
mparam.num_boosters += 1;
|
||||
boosters.push_back( booster::CreateBooster<FMatrixS>( mparam.booster_type ) );
|
||||
booster_info.push_back( 0 );
|
||||
this->ConfigBooster( boosters.back() );
|
||||
boosters.back()->InitModel();
|
||||
}else{
|
||||
this->ConfigBooster( boosters.back() );
|
||||
boosters.push_back(booster::CreateBooster<FMatrixS>(mparam.booster_type));
|
||||
booster_info.push_back(0);
|
||||
this->ConfigBooster(boosters.back());
|
||||
boosters.back()->InitModel();
|
||||
}
|
||||
else{
|
||||
this->ConfigBooster(boosters.back());
|
||||
}
|
||||
return boosters.back();
|
||||
}
|
||||
@@ -306,76 +307,76 @@ namespace xgboost{
|
||||
int num_feature;
|
||||
/*! \brief size of predicton buffer allocated for buffering boosting computation */
|
||||
int num_pbuffer;
|
||||
/*!
|
||||
/*!
|
||||
* \brief whether we repeatly update a single booster each round: default 0
|
||||
* set to 1 for linear booster, so that regularization term can be considered
|
||||
*/
|
||||
int do_reboost;
|
||||
/*! \brief reserved parameters */
|
||||
int reserved[ 32 ];
|
||||
int reserved[32];
|
||||
/*! \brief constructor */
|
||||
ModelParam( void ){
|
||||
num_boosters = 0;
|
||||
ModelParam(void){
|
||||
num_boosters = 0;
|
||||
booster_type = 0;
|
||||
num_roots = num_feature = 0;
|
||||
num_roots = num_feature = 0;
|
||||
do_reboost = 0;
|
||||
num_pbuffer = 0;
|
||||
memset( reserved, 0, sizeof( reserved ) );
|
||||
memset(reserved, 0, sizeof(reserved));
|
||||
}
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
* \param name name of the parameter
|
||||
* \param val value of the parameter
|
||||
*/
|
||||
inline void SetParam( const char *name, const char *val ){
|
||||
if( !strcmp("booster_type", name ) ){
|
||||
booster_type = atoi( val );
|
||||
inline void SetParam(const char *name, const char *val){
|
||||
if (!strcmp("booster_type", name)){
|
||||
booster_type = atoi(val);
|
||||
// linear boost automatically set do reboost
|
||||
if( booster_type == 1 ) do_reboost = 1;
|
||||
if (booster_type == 1) do_reboost = 1;
|
||||
}
|
||||
if( !strcmp("num_pbuffer", name ) ) num_pbuffer = atoi( val );
|
||||
if( !strcmp("do_reboost", name ) ) do_reboost = atoi( val );
|
||||
if( !strcmp("bst:num_roots", name ) ) num_roots = atoi( val );
|
||||
if( !strcmp("bst:num_feature", name ) ) num_feature = atoi( val );
|
||||
if (!strcmp("num_pbuffer", name)) num_pbuffer = atoi(val);
|
||||
if (!strcmp("do_reboost", name)) do_reboost = atoi(val);
|
||||
if (!strcmp("bst:num_roots", name)) num_roots = atoi(val);
|
||||
if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
|
||||
}
|
||||
};
|
||||
/*! \brief training parameters */
|
||||
struct TrainParam{
|
||||
/*! \brief number of OpenMP threads */
|
||||
int nthread;
|
||||
/*!
|
||||
* \brief index of specific booster to be re-updated, default = -1: update new booster
|
||||
/*!
|
||||
* \brief index of specific booster to be re-updated, default = -1: update new booster
|
||||
* parameter this is part of trial interactive update mode
|
||||
*/
|
||||
int reupdate_booster;
|
||||
/*! \brief constructor */
|
||||
TrainParam( void ) {
|
||||
TrainParam(void) {
|
||||
nthread = 1;
|
||||
reupdate_booster = -1;
|
||||
}
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
* \param name name of the parameter
|
||||
* \param val value of the parameter
|
||||
*/
|
||||
inline void SetParam( const char *name, const char *val ){
|
||||
if( !strcmp("nthread", name ) ) nthread = atoi( val );
|
||||
if( !strcmp("interact:booster_index", name ) ) reupdate_booster = atoi( val );
|
||||
*/
|
||||
inline void SetParam(const char *name, const char *val){
|
||||
if (!strcmp("nthread", name)) nthread = atoi(val);
|
||||
if (!strcmp("interact:booster_index", name)) reupdate_booster = atoi(val);
|
||||
}
|
||||
};
|
||||
protected:
|
||||
/*! \brief model parameters */
|
||||
/*! \brief model parameters */
|
||||
ModelParam mparam;
|
||||
/*! \brief training parameters */
|
||||
/*! \brief training parameters */
|
||||
TrainParam tparam;
|
||||
protected:
|
||||
/*! \brief component boosters */
|
||||
/*! \brief component boosters */
|
||||
std::vector<booster::IBooster*> boosters;
|
||||
/*! \brief some information indicator of the booster, reserved */
|
||||
/*! \brief some information indicator of the booster, reserved */
|
||||
std::vector<int> booster_info;
|
||||
/*! \brief prediction buffer */
|
||||
/*! \brief prediction buffer */
|
||||
std::vector<float> pred_buffer;
|
||||
/*! \brief prediction buffer counter, record the progress so fart of the buffer */
|
||||
/*! \brief prediction buffer counter, record the progress so fart of the buffer */
|
||||
std::vector<unsigned> pred_counter;
|
||||
/*! \brief configurations saved for each booster */
|
||||
utils::ConfigSaver cfg;
|
||||
|
||||
Reference in New Issue
Block a user