rank pass toy

This commit is contained in:
kalenhaha 2014-04-07 23:25:35 +08:00
parent 40c380e40a
commit a10f594644
32 changed files with 2237 additions and 2146 deletions

View File

@ -12,6 +12,8 @@ export LDFLAGS= -pthread -lm
xgboost: regression/xgboost_reg_main.cpp regression/*.h booster/*.h booster/*/*.hpp booster/*.hpp xgboost: regression/xgboost_reg_main.cpp regression/*.h booster/*.h booster/*/*.hpp booster/*.hpp
#xgboost: rank/xgboost_rank_main.cpp base/*.h rank/*.h booster/*.h booster/*/*.hpp booster/*.hpp
$(BIN) : $(BIN) :
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)

View File

@ -18,7 +18,7 @@
#include "linear/xgboost_linear.hpp" #include "linear/xgboost_linear.hpp"
namespace xgboost{ namespace xgboost{
namespace booster{ namespace booster{
/*! /*!
* \brief create a gradient booster, given type of booster * \brief create a gradient booster, given type of booster
* \param booster_type type of gradient booster, can be used to specify implements * \param booster_type type of gradient booster, can be used to specify implements
@ -26,14 +26,14 @@ namespace xgboost{
* \return the pointer to the gradient booster created * \return the pointer to the gradient booster created
*/ */
template<typename FMatrix> template<typename FMatrix>
inline InterfaceBooster<FMatrix> *CreateBooster( int booster_type ){ inline InterfaceBooster<FMatrix> *CreateBooster(int booster_type){
switch( booster_type ){ switch (booster_type){
case 0: return new RegTreeTrainer<FMatrix>(); case 0: return new RegTreeTrainer<FMatrix>();
case 1: return new LinearBooster<FMatrix>(); case 1: return new LinearBooster<FMatrix>();
default: utils::Error("unknown booster_type"); return NULL; default: utils::Error("unknown booster_type"); return NULL;
} }
} }
}; // namespace booster }; // namespace booster
}; // namespace xgboost }; // namespace xgboost
#endif // XGBOOST_INL_HPP #endif // XGBOOST_INL_HPP

View File

@ -40,22 +40,22 @@ namespace xgboost{
* \param name name of the parameter * \param name name of the parameter
* \param val value of the parameter * \param val value of the parameter
*/ */
virtual void SetParam( const char *name, const char *val ) = 0; virtual void SetParam(const char *name, const char *val) = 0;
/*! /*!
* \brief load model from stream * \brief load model from stream
* \param fi input stream * \param fi input stream
*/ */
virtual void LoadModel( utils::IStream &fi ) = 0; virtual void LoadModel(utils::IStream &fi) = 0;
/*! /*!
* \brief save model to stream * \brief save model to stream
* \param fo output stream * \param fo output stream
*/ */
virtual void SaveModel( utils::IStream &fo ) const = 0; virtual void SaveModel(utils::IStream &fo) const = 0;
/*! /*!
* \brief initialize solver before training, called before training * \brief initialize solver before training, called before training
* this function is reserved for solver to allocate necessary space and do other preparation * this function is reserved for solver to allocate necessary space and do other preparation
*/ */
virtual void InitModel( void ) = 0; virtual void InitModel(void) = 0;
public: public:
/*! /*!
* \brief do gradient boost training for one step, using the information given, * \brief do gradient boost training for one step, using the information given,
@ -66,10 +66,10 @@ namespace xgboost{
* \param root_index pre-partitioned root index of each instance, * \param root_index pre-partitioned root index of each instance,
* root_index.size() can be 0 which indicates that no pre-partition involved * root_index.size() can be 0 which indicates that no pre-partition involved
*/ */
virtual void DoBoost( std::vector<float> &grad, virtual void DoBoost(std::vector<float> &grad,
std::vector<float> &hess, std::vector<float> &hess,
const FMatrix &feats, const FMatrix &feats,
const std::vector<unsigned> &root_index ) = 0; const std::vector<unsigned> &root_index) = 0;
/*! /*!
* \brief predict the path ids along a trees, for given sparse feature vector. When booster is a tree * \brief predict the path ids along a trees, for given sparse feature vector. When booster is a tree
* \param path the result of path * \param path the result of path
@ -77,9 +77,9 @@ namespace xgboost{
* \param row_index row index in the feature matrix * \param row_index row index in the feature matrix
* \param root_index root id of current instance, default = 0 * \param root_index root id of current instance, default = 0
*/ */
virtual void PredPath( std::vector<int> &path, const FMatrix &feats, virtual void PredPath(std::vector<int> &path, const FMatrix &feats,
bst_uint row_index, unsigned root_index = 0 ){ bst_uint row_index, unsigned root_index = 0){
utils::Error( "not implemented" ); utils::Error("not implemented");
} }
/*! /*!
* \brief predict values for given sparse feature vector * \brief predict values for given sparse feature vector
@ -91,8 +91,8 @@ namespace xgboost{
* \param root_index root id of current instance, default = 0 * \param root_index root id of current instance, default = 0
* \return prediction * \return prediction
*/ */
virtual float Predict( const FMatrix &feats, bst_uint row_index, unsigned root_index = 0 ){ virtual float Predict(const FMatrix &feats, bst_uint row_index, unsigned root_index = 0){
utils::Error( "not implemented" ); utils::Error("not implemented");
return 0.0f; return 0.0f;
} }
/*! /*!
@ -102,29 +102,29 @@ namespace xgboost{
* \param rid root id of current instance, default = 0 * \param rid root id of current instance, default = 0
* \return prediction * \return prediction
*/ */
virtual float Predict( const std::vector<float> &feat, virtual float Predict(const std::vector<float> &feat,
const std::vector<bool> &funknown, const std::vector<bool> &funknown,
unsigned rid = 0 ){ unsigned rid = 0){
utils::Error( "not implemented" ); utils::Error("not implemented");
return 0.0f; return 0.0f;
} }
/*! /*!
* \brief print information * \brief print information
* \param fo output stream * \param fo output stream
*/ */
virtual void PrintInfo( FILE *fo ){} virtual void PrintInfo(FILE *fo){}
/*! /*!
* \brief dump model into text file * \brief dump model into text file
* \param fo output stream * \param fo output stream
* \param fmap feature map that may help give interpretations of feature * \param fmap feature map that may help give interpretations of feature
* \param with_stats whether print statistics * \param with_stats whether print statistics
*/ */
virtual void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats = false ){ virtual void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats = false){
utils::Error( "not implemented" ); utils::Error("not implemented");
} }
public: public:
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~InterfaceBooster( void ){} virtual ~InterfaceBooster(void){}
}; };
}; };
namespace booster{ namespace booster{
@ -146,7 +146,7 @@ namespace xgboost{
* \return the pointer to the gradient booster created * \return the pointer to the gradient booster created
*/ */
template<typename FMatrix> template<typename FMatrix>
inline InterfaceBooster<FMatrix> *CreateBooster( int booster_type ); inline InterfaceBooster<FMatrix> *CreateBooster(int booster_type);
}; };
}; };

View File

@ -49,11 +49,11 @@ namespace xgboost{
* \return whether there is element in next position * \return whether there is element in next position
*/ */
inline bool Next( void ); inline bool Next(void);
/*! \return feature index in current position */ /*! \return feature index in current position */
inline bst_uint findex( void ) const; inline bst_uint findex(void) const;
/*! \return feature value in current position */ /*! \return feature value in current position */
inline bst_float fvalue( void ) const; inline bst_float fvalue(void) const;
}; };
/*! \brief example iterator over one column */ /*! \brief example iterator over one column */
struct ColIter{ struct ColIter{
@ -61,11 +61,11 @@ namespace xgboost{
* \brief move to next position * \brief move to next position
* \return whether there is element in next position * \return whether there is element in next position
*/ */
inline bool Next( void ); inline bool Next(void);
/*! \return row index of current position */ /*! \return row index of current position */
inline bst_uint rindex( void ) const; inline bst_uint rindex(void) const;
/*! \return feature value in current position */ /*! \return feature value in current position */
inline bst_float fvalue( void ) const; inline bst_float fvalue(void) const;
}; };
/*! \brief backward iterator over column */ /*! \brief backward iterator over column */
struct ColBackIter : public ColIter {}; struct ColBackIter : public ColIter {};
@ -74,23 +74,23 @@ namespace xgboost{
* \brief get number of rows * \brief get number of rows
* \return number of rows * \return number of rows
*/ */
inline size_t NumRow( void ) const; inline size_t NumRow(void) const;
/*! /*!
* \brief get number of columns * \brief get number of columns
* \return number of columns * \return number of columns
*/ */
inline size_t NumCol( void ) const; inline size_t NumCol(void) const;
/*! /*!
* \brief get row iterator * \brief get row iterator
* \param ridx row index * \param ridx row index
* \return row iterator * \return row iterator
*/ */
inline RowIter GetRow( size_t ridx ) const; inline RowIter GetRow(size_t ridx) const;
/*! /*!
* \brief get number of column groups, this ise used together with GetRow( ridx, gid ) * \brief get number of column groups, this ise used together with GetRow( ridx, gid )
* \return number of column group * \return number of column group
*/ */
inline unsigned NumColGroup( void ) const{ inline unsigned NumColGroup(void) const{
return 1; return 1;
} }
/*! /*!
@ -99,22 +99,22 @@ namespace xgboost{
* \param gid colmun group id * \param gid colmun group id
* \return row iterator, only iterates over features of specified column group * \return row iterator, only iterates over features of specified column group
*/ */
inline RowIter GetRow( size_t ridx, unsigned gid ) const; inline RowIter GetRow(size_t ridx, unsigned gid) const;
/*! \return whether column access is enabled */ /*! \return whether column access is enabled */
inline bool HaveColAccess( void ) const; inline bool HaveColAccess(void) const;
/*! /*!
* \brief get column iterator, the columns must be sorted by feature value * \brief get column iterator, the columns must be sorted by feature value
* \param ridx column index * \param ridx column index
* \return column iterator * \return column iterator
*/ */
inline ColIter GetSortedCol( size_t ridx ) const; inline ColIter GetSortedCol(size_t ridx) const;
/*! /*!
* \brief get column backward iterator, starts from biggest fvalue, and iterator back * \brief get column backward iterator, starts from biggest fvalue, and iterator back
* \param ridx column index * \param ridx column index
* \return reverse column iterator * \return reverse column iterator
*/ */
inline ColBackIter GetReverseSortedCol( size_t ridx ) const; inline ColBackIter GetReverseSortedCol(size_t ridx) const;
}; };
}; };
}; };
@ -124,7 +124,7 @@ namespace xgboost{
/*! /*!
* \brief feature matrix to store training instance, in sparse CSR format * \brief feature matrix to store training instance, in sparse CSR format
*/ */
class FMatrixS: public FMatrix<FMatrixS>{ class FMatrixS : public FMatrix<FMatrixS>{
public: public:
/*! \brief one entry in a row */ /*! \brief one entry in a row */
struct REntry{ struct REntry{
@ -133,10 +133,10 @@ namespace xgboost{
/*! \brief feature value */ /*! \brief feature value */
bst_float fvalue; bst_float fvalue;
/*! \brief constructor */ /*! \brief constructor */
REntry( void ){} REntry(void){}
/*! \brief constructor */ /*! \brief constructor */
REntry( bst_uint findex, bst_float fvalue ) : findex(findex), fvalue(fvalue){} REntry(bst_uint findex, bst_float fvalue) : findex(findex), fvalue(fvalue){}
inline static bool cmp_fvalue( const REntry &a, const REntry &b ){ inline static bool cmp_fvalue(const REntry &a, const REntry &b){
return a.fvalue < b.fvalue; return a.fvalue < b.fvalue;
} }
}; };
@ -147,76 +147,76 @@ namespace xgboost{
/*! \brief size of the data */ /*! \brief size of the data */
bst_uint len; bst_uint len;
/*! \brief get k-th element */ /*! \brief get k-th element */
inline const REntry& operator[]( unsigned i ) const{ inline const REntry& operator[](unsigned i) const{
return data_[i]; return data_[i];
} }
}; };
/*! \brief row iterator */ /*! \brief row iterator */
struct RowIter{ struct RowIter{
const REntry *dptr_, *end_; const REntry *dptr_, *end_;
RowIter( const REntry* dptr, const REntry* end ) RowIter(const REntry* dptr, const REntry* end)
:dptr_(dptr),end_(end){} :dptr_(dptr), end_(end){}
inline bool Next( void ){ inline bool Next(void){
if( dptr_ == end_ ) return false; if (dptr_ == end_) return false;
else{ else{
++ dptr_; return true; ++dptr_; return true;
} }
} }
inline bst_uint findex( void ) const{ inline bst_uint findex(void) const{
return dptr_->findex; return dptr_->findex;
} }
inline bst_float fvalue( void ) const{ inline bst_float fvalue(void) const{
return dptr_->fvalue; return dptr_->fvalue;
} }
}; };
/*! \brief column iterator */ /*! \brief column iterator */
struct ColIter: public RowIter{ struct ColIter : public RowIter{
ColIter( const REntry* dptr, const REntry* end ) ColIter(const REntry* dptr, const REntry* end)
:RowIter( dptr, end ){} :RowIter(dptr, end){}
inline bst_uint rindex( void ) const{ inline bst_uint rindex(void) const{
return this->findex(); return this->findex();
} }
}; };
/*! \brief reverse column iterator */ /*! \brief reverse column iterator */
struct ColBackIter: public ColIter{ struct ColBackIter : public ColIter{
ColBackIter( const REntry* dptr, const REntry* end ) ColBackIter(const REntry* dptr, const REntry* end)
:ColIter( dptr, end ){} :ColIter(dptr, end){}
// shadows RowIter::Next // shadows RowIter::Next
inline bool Next( void ){ inline bool Next(void){
if( dptr_ == end_ ) return false; if (dptr_ == end_) return false;
else{ else{
-- dptr_; return true; --dptr_; return true;
} }
} }
}; };
public: public:
/*! \brief constructor */ /*! \brief constructor */
FMatrixS( void ){ this->Clear(); } FMatrixS(void){ this->Clear(); }
/*! \brief get number of rows */ /*! \brief get number of rows */
inline size_t NumRow( void ) const{ inline size_t NumRow(void) const{
return row_ptr_.size() - 1; return row_ptr_.size() - 1;
} }
/*! /*!
* \brief get number of nonzero entries * \brief get number of nonzero entries
* \return number of nonzero entries * \return number of nonzero entries
*/ */
inline size_t NumEntry( void ) const{ inline size_t NumEntry(void) const{
return row_data_.size(); return row_data_.size();
} }
/*! \brief clear the storage */ /*! \brief clear the storage */
inline void Clear( void ){ inline void Clear(void){
row_ptr_.clear(); row_ptr_.clear();
row_ptr_.push_back( 0 ); row_ptr_.push_back(0);
row_data_.clear(); row_data_.clear();
col_ptr_.clear(); col_ptr_.clear();
col_data_.clear(); col_data_.clear();
} }
/*! \brief get sparse part of current row */ /*! \brief get sparse part of current row */
inline Line operator[]( size_t sidx ) const{ inline Line operator[](size_t sidx) const{
Line sp; Line sp;
utils::Assert( !bst_debug || sidx < this->NumRow(), "row id exceed bound" ); utils::Assert(!bst_debug || sidx < this->NumRow(), "row id exceed bound");
sp.len = static_cast<bst_uint>( row_ptr_[ sidx + 1 ] - row_ptr_[ sidx ] ); sp.len = static_cast<bst_uint>(row_ptr_[sidx + 1] - row_ptr_[sidx]);
sp.data_ = &row_data_[ row_ptr_[ sidx ] ]; sp.data_ = &row_data_[row_ptr_[sidx]];
return sp; return sp;
} }
/*! /*!
@ -227,71 +227,71 @@ namespace xgboost{
* \param fend end bound range of feature * \param fend end bound range of feature
* \return the row id added line * \return the row id added line
*/ */
inline size_t AddRow( const std::vector<bst_uint> &findex, inline size_t AddRow(const std::vector<bst_uint> &findex,
const std::vector<bst_float> &fvalue, const std::vector<bst_float> &fvalue,
unsigned fstart = 0, unsigned fend = UINT_MAX ){ unsigned fstart = 0, unsigned fend = UINT_MAX){
utils::Assert( findex.size() == fvalue.size() ); utils::Assert(findex.size() == fvalue.size());
unsigned cnt = 0; unsigned cnt = 0;
for( size_t i = 0; i < findex.size(); i ++ ){ for (size_t i = 0; i < findex.size(); i++){
if( findex[i] < fstart || findex[i] >= fend ) continue; if (findex[i] < fstart || findex[i] >= fend) continue;
row_data_.push_back( REntry( findex[i], fvalue[i] ) ); row_data_.push_back(REntry(findex[i], fvalue[i]));
cnt ++; cnt++;
} }
row_ptr_.push_back( row_ptr_.back() + cnt ); row_ptr_.push_back(row_ptr_.back() + cnt);
return row_ptr_.size() - 2; return row_ptr_.size() - 2;
} }
/*! \brief get row iterator*/ /*! \brief get row iterator*/
inline RowIter GetRow( size_t ridx ) const{ inline RowIter GetRow(size_t ridx) const{
utils::Assert( !bst_debug || ridx < this->NumRow(), "row id exceed bound" ); utils::Assert(!bst_debug || ridx < this->NumRow(), "row id exceed bound");
return RowIter( &row_data_[ row_ptr_[ridx] ] - 1, &row_data_[ row_ptr_[ridx+1] ] - 1 ); return RowIter(&row_data_[row_ptr_[ridx]] - 1, &row_data_[row_ptr_[ridx + 1]] - 1);
} }
/*! \brief get row iterator*/ /*! \brief get row iterator*/
inline RowIter GetRow( size_t ridx, unsigned gid ) const{ inline RowIter GetRow(size_t ridx, unsigned gid) const{
utils::Assert( gid == 0, "FMatrixS only have 1 column group" ); utils::Assert(gid == 0, "FMatrixS only have 1 column group");
return FMatrixS::GetRow( ridx ); return FMatrixS::GetRow(ridx);
} }
public: public:
/*! \return whether column access is enabled */ /*! \return whether column access is enabled */
inline bool HaveColAccess( void ) const{ inline bool HaveColAccess(void) const{
return col_ptr_.size() != 0 && col_data_.size() == row_data_.size(); return col_ptr_.size() != 0 && col_data_.size() == row_data_.size();
} }
/*! \brief get number of colmuns */ /*! \brief get number of colmuns */
inline size_t NumCol( void ) const{ inline size_t NumCol(void) const{
utils::Assert( this->HaveColAccess() ); utils::Assert(this->HaveColAccess());
return col_ptr_.size() - 1; return col_ptr_.size() - 1;
} }
/*! \brief get col iterator*/ /*! \brief get col iterator*/
inline ColIter GetSortedCol( size_t cidx ) const{ inline ColIter GetSortedCol(size_t cidx) const{
utils::Assert( !bst_debug || cidx < this->NumCol(), "col id exceed bound" ); utils::Assert(!bst_debug || cidx < this->NumCol(), "col id exceed bound");
return ColIter( &col_data_[ col_ptr_[cidx] ] - 1, &col_data_[ col_ptr_[cidx+1] ] - 1 ); return ColIter(&col_data_[col_ptr_[cidx]] - 1, &col_data_[col_ptr_[cidx + 1]] - 1);
} }
/*! \brief get col iterator */ /*! \brief get col iterator */
inline ColBackIter GetReverseSortedCol( size_t cidx ) const{ inline ColBackIter GetReverseSortedCol(size_t cidx) const{
utils::Assert( !bst_debug || cidx < this->NumCol(), "col id exceed bound" ); utils::Assert(!bst_debug || cidx < this->NumCol(), "col id exceed bound");
return ColBackIter( &col_data_[ col_ptr_[cidx+1] ], &col_data_[ col_ptr_[cidx] ] ); return ColBackIter(&col_data_[col_ptr_[cidx + 1]], &col_data_[col_ptr_[cidx]]);
} }
/*! /*!
* \brief intialize the data so that we have both column and row major * \brief intialize the data so that we have both column and row major
* access, call this whenever we need column access * access, call this whenever we need column access
*/ */
inline void InitData( void ){ inline void InitData(void){
utils::SparseCSRMBuilder<REntry> builder( col_ptr_, col_data_ ); utils::SparseCSRMBuilder<REntry> builder(col_ptr_, col_data_);
builder.InitBudget( 0 ); builder.InitBudget(0);
for( size_t i = 0; i < this->NumRow(); i ++ ){ for (size_t i = 0; i < this->NumRow(); i++){
for( RowIter it = this->GetRow(i); it.Next(); ){ for (RowIter it = this->GetRow(i); it.Next();){
builder.AddBudget( it.findex() ); builder.AddBudget(it.findex());
} }
} }
builder.InitStorage(); builder.InitStorage();
for( size_t i = 0; i < this->NumRow(); i ++ ){ for (size_t i = 0; i < this->NumRow(); i++){
for( RowIter it = this->GetRow(i); it.Next(); ){ for (RowIter it = this->GetRow(i); it.Next();){
builder.PushElem( it.findex(), REntry( (bst_uint)i, it.fvalue() ) ); builder.PushElem(it.findex(), REntry((bst_uint)i, it.fvalue()));
} }
} }
// sort columns // sort columns
unsigned ncol = static_cast<unsigned>( this->NumCol() ); unsigned ncol = static_cast<unsigned>(this->NumCol());
for( unsigned i = 0; i < ncol; i ++ ){ for (unsigned i = 0; i < ncol; i++){
std::sort( &col_data_[ col_ptr_[ i ] ], &col_data_[ col_ptr_[ i+1 ] ], REntry::cmp_fvalue ); std::sort(&col_data_[col_ptr_[i]], &col_data_[col_ptr_[i + 1]], REntry::cmp_fvalue);
} }
} }
/*! /*!
@ -300,12 +300,12 @@ namespace xgboost{
* the function is not consistent between 64bit and 32bit machine * the function is not consistent between 64bit and 32bit machine
* \param fo output stream * \param fo output stream
*/ */
inline void SaveBinary( utils::IStream &fo ) const{ inline void SaveBinary(utils::IStream &fo) const{
FMatrixS::SaveBinary( fo, row_ptr_, row_data_ ); FMatrixS::SaveBinary(fo, row_ptr_, row_data_);
int col_access = this->HaveColAccess() ? 1 : 0; int col_access = this->HaveColAccess() ? 1 : 0;
fo.Write( &col_access, sizeof(int) ); fo.Write(&col_access, sizeof(int));
if( col_access != 0 ){ if (col_access != 0){
FMatrixS::SaveBinary( fo, col_ptr_, col_data_ ); FMatrixS::SaveBinary(fo, col_ptr_, col_data_);
} }
} }
/*! /*!
@ -314,30 +314,30 @@ namespace xgboost{
* the function is not consistent between 64bit and 32bit machin * the function is not consistent between 64bit and 32bit machin
* \param fi input stream * \param fi input stream
*/ */
inline void LoadBinary( utils::IStream &fi ){ inline void LoadBinary(utils::IStream &fi){
FMatrixS::LoadBinary( fi, row_ptr_, row_data_ ); FMatrixS::LoadBinary(fi, row_ptr_, row_data_);
int col_access; int col_access;
fi.Read( &col_access, sizeof(int) ); fi.Read(&col_access, sizeof(int));
if( col_access != 0 ){ if (col_access != 0){
FMatrixS::LoadBinary( fi, col_ptr_, col_data_ ); FMatrixS::LoadBinary(fi, col_ptr_, col_data_);
} }
} }
/*! /*!
* \brief load from text file * \brief load from text file
* \param fi input file pointer * \param fi input file pointer
*/ */
inline void LoadText( FILE *fi ){ inline void LoadText(FILE *fi){
this->Clear(); this->Clear();
int ninst; int ninst;
while( fscanf( fi, "%d", &ninst ) == 1 ){ while (fscanf(fi, "%d", &ninst) == 1){
std::vector<booster::bst_uint> findex; std::vector<booster::bst_uint> findex;
std::vector<booster::bst_float> fvalue; std::vector<booster::bst_float> fvalue;
while( ninst -- ){ while (ninst--){
unsigned index; float value; unsigned index; float value;
utils::Assert( fscanf( fi, "%u:%f", &index, &value ) == 2, "load Text" ); utils::Assert(fscanf(fi, "%u:%f", &index, &value) == 2, "load Text");
findex.push_back( index ); fvalue.push_back( value ); findex.push_back(index); fvalue.push_back(value);
} }
this->AddRow( findex, fvalue ); this->AddRow(findex, fvalue);
} }
// initialize column support as well // initialize column support as well
this->InitData(); this->InitData();
@ -349,14 +349,14 @@ namespace xgboost{
* \param ptr pointer data * \param ptr pointer data
* \param data data content * \param data data content
*/ */
inline static void SaveBinary( utils::IStream &fo, inline static void SaveBinary(utils::IStream &fo,
const std::vector<size_t> &ptr, const std::vector<size_t> &ptr,
const std::vector<REntry> &data ){ const std::vector<REntry> &data){
size_t nrow = ptr.size() - 1; size_t nrow = ptr.size() - 1;
fo.Write( &nrow, sizeof(size_t) ); fo.Write(&nrow, sizeof(size_t));
fo.Write( &ptr[0], ptr.size() * sizeof(size_t) ); fo.Write(&ptr[0], ptr.size() * sizeof(size_t));
if( data.size() != 0 ){ if (data.size() != 0){
fo.Write( &data[0] , data.size() * sizeof(REntry) ); fo.Write(&data[0], data.size() * sizeof(REntry));
} }
} }
/*! /*!
@ -365,17 +365,17 @@ namespace xgboost{
* \param ptr pointer data * \param ptr pointer data
* \param data data content * \param data data content
*/ */
inline static void LoadBinary( utils::IStream &fi, inline static void LoadBinary(utils::IStream &fi,
std::vector<size_t> &ptr, std::vector<size_t> &ptr,
std::vector<REntry> &data ){ std::vector<REntry> &data){
size_t nrow; size_t nrow;
utils::Assert( fi.Read( &nrow, sizeof(size_t) ) != 0, "Load FMatrixS" ); utils::Assert(fi.Read(&nrow, sizeof(size_t)) != 0, "Load FMatrixS");
ptr.resize( nrow + 1 ); ptr.resize(nrow + 1);
utils::Assert( fi.Read( &ptr[0], ptr.size() * sizeof(size_t) ), "Load FMatrixS" ); utils::Assert(fi.Read(&ptr[0], ptr.size() * sizeof(size_t)), "Load FMatrixS");
data.resize( ptr.back() ); data.resize(ptr.back());
if( data.size() != 0 ){ if (data.size() != 0){
utils::Assert( fi.Read( &data[0] , data.size() * sizeof(REntry) ) , "Load FMatrixS" ); utils::Assert(fi.Read(&data[0], data.size() * sizeof(REntry)), "Load FMatrixS");
} }
} }
protected: protected:

View File

@ -49,9 +49,9 @@ namespace xgboost{
class GBMBase{ class GBMBase{
public: public:
/*! \brief number of thread used */ /*! \brief number of thread used */
GBMBase( void ){} GBMBase(void){}
/*! \brief destructor */ /*! \brief destructor */
virtual ~GBMBase( void ){ virtual ~GBMBase(void){
this->FreeSpace(); this->FreeSpace();
} }
/*! /*!
@ -59,80 +59,80 @@ namespace xgboost{
* \param name name of the parameter * \param name name of the parameter
* \param val value of the parameter * \param val value of the parameter
*/ */
inline void SetParam( const char *name, const char *val ){ inline void SetParam(const char *name, const char *val){
if( !strncmp( name, "bst:", 4 ) ){ if (!strncmp(name, "bst:", 4)){
cfg.PushBack( name + 4, val ); cfg.PushBack(name + 4, val);
} }
if( !strcmp( name, "silent") ){ if (!strcmp(name, "silent")){
cfg.PushBack( name, val ); cfg.PushBack(name, val);
} }
tparam.SetParam( name, val ); tparam.SetParam(name, val);
if( boosters.size() == 0 ) mparam.SetParam( name, val ); if (boosters.size() == 0) mparam.SetParam(name, val);
} }
/*! /*!
* \brief load model from stream * \brief load model from stream
* \param fi input stream * \param fi input stream
*/ */
inline void LoadModel( utils::IStream &fi ){ inline void LoadModel(utils::IStream &fi){
if( boosters.size() != 0 ) this->FreeSpace(); if (boosters.size() != 0) this->FreeSpace();
utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 ); utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
boosters.resize( mparam.num_boosters ); boosters.resize(mparam.num_boosters);
for( size_t i = 0; i < boosters.size(); i ++ ){ for (size_t i = 0; i < boosters.size(); i++){
boosters[ i ] = booster::CreateBooster<FMatrixS>( mparam.booster_type ); boosters[i] = booster::CreateBooster<FMatrixS>(mparam.booster_type);
boosters[ i ]->LoadModel( fi ); boosters[i]->LoadModel(fi);
} }
{// load info {// load info
booster_info.resize( mparam.num_boosters ); booster_info.resize(mparam.num_boosters);
if( mparam.num_boosters != 0 ){ if (mparam.num_boosters != 0){
utils::Assert( fi.Read( &booster_info[0], sizeof(int)*mparam.num_boosters ) != 0 ); utils::Assert(fi.Read(&booster_info[0], sizeof(int)*mparam.num_boosters) != 0);
} }
} }
if( mparam.num_pbuffer != 0 ){ if (mparam.num_pbuffer != 0){
pred_buffer.resize ( mparam.num_pbuffer ); pred_buffer.resize(mparam.num_pbuffer);
pred_counter.resize( mparam.num_pbuffer ); pred_counter.resize(mparam.num_pbuffer);
utils::Assert( fi.Read( &pred_buffer[0] , pred_buffer.size()*sizeof(float) ) != 0 ); utils::Assert(fi.Read(&pred_buffer[0], pred_buffer.size()*sizeof(float)) != 0);
utils::Assert( fi.Read( &pred_counter[0], pred_counter.size()*sizeof(unsigned) ) != 0 ); utils::Assert(fi.Read(&pred_counter[0], pred_counter.size()*sizeof(unsigned)) != 0);
} }
} }
/*! /*!
* \brief save model to stream * \brief save model to stream
* \param fo output stream * \param fo output stream
*/ */
inline void SaveModel( utils::IStream &fo ) const { inline void SaveModel(utils::IStream &fo) const {
utils::Assert( mparam.num_boosters == (int)boosters.size() ); utils::Assert(mparam.num_boosters == (int)boosters.size());
fo.Write( &mparam, sizeof(ModelParam) ); fo.Write(&mparam, sizeof(ModelParam));
for( size_t i = 0; i < boosters.size(); i ++ ){ for (size_t i = 0; i < boosters.size(); i++){
boosters[ i ]->SaveModel( fo ); boosters[i]->SaveModel(fo);
} }
if( booster_info.size() != 0 ){ if (booster_info.size() != 0){
fo.Write( &booster_info[0], sizeof(int) * booster_info.size() ); fo.Write(&booster_info[0], sizeof(int)* booster_info.size());
} }
if( mparam.num_pbuffer != 0 ){ if (mparam.num_pbuffer != 0){
fo.Write( &pred_buffer[0] , pred_buffer.size()*sizeof(float) ); fo.Write(&pred_buffer[0], pred_buffer.size()*sizeof(float));
fo.Write( &pred_counter[0], pred_counter.size()*sizeof(unsigned) ); fo.Write(&pred_counter[0], pred_counter.size()*sizeof(unsigned));
} }
} }
/*! /*!
* \brief initialize the current data storage for model, if the model is used first time, call this function * \brief initialize the current data storage for model, if the model is used first time, call this function
*/ */
inline void InitModel( void ){ inline void InitModel(void){
pred_buffer.clear(); pred_counter.clear(); pred_buffer.clear(); pred_counter.clear();
pred_buffer.resize ( mparam.num_pbuffer, 0.0 ); pred_buffer.resize(mparam.num_pbuffer, 0.0);
pred_counter.resize( mparam.num_pbuffer, 0 ); pred_counter.resize(mparam.num_pbuffer, 0);
utils::Assert( mparam.num_boosters == 0 ); utils::Assert(mparam.num_boosters == 0);
utils::Assert( boosters.size() == 0 ); utils::Assert(boosters.size() == 0);
} }
/*! /*!
* \brief initialize solver before training, called before training * \brief initialize solver before training, called before training
* this function is reserved for solver to allocate necessary space and do other preparation * this function is reserved for solver to allocate necessary space and do other preparation
*/ */
inline void InitTrainer( void ){ inline void InitTrainer(void){
if( tparam.nthread != 0 ){ if (tparam.nthread != 0){
omp_set_num_threads( tparam.nthread ); omp_set_num_threads(tparam.nthread);
} }
// make sure all the boosters get the latest parameters // make sure all the boosters get the latest parameters
for( size_t i = 0; i < this->boosters.size(); i ++ ){ for (size_t i = 0; i < this->boosters.size(); i++){
this->ConfigBooster( this->boosters[i] ); this->ConfigBooster(this->boosters[i]);
} }
} }
/*! /*!
@ -141,10 +141,10 @@ namespace xgboost{
* \param fmap feature map that may help give interpretations of feature * \param fmap feature map that may help give interpretations of feature
* \param with_stats whether print statistics * \param with_stats whether print statistics
*/ */
inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ){ inline void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats){
for( size_t i = 0; i < boosters.size(); i ++ ){ for (size_t i = 0; i < boosters.size(); i++){
fprintf( fo, "booster[%d]\n", (int)i ); fprintf(fo, "booster[%d]\n", (int)i);
boosters[i]->DumpModel( fo, fmap, with_stats ); boosters[i]->DumpModel(fo, fmap, with_stats);
} }
} }
/*! /*!
@ -152,18 +152,18 @@ namespace xgboost{
* \param fo text file * \param fo text file
* \param data input data * \param data input data
*/ */
inline void DumpPath( FILE *fo, const FMatrixS &data ){ inline void DumpPath(FILE *fo, const FMatrixS &data){
for( size_t i = 0; i < data.NumRow(); ++ i ){ for (size_t i = 0; i < data.NumRow(); ++i){
for( size_t j = 0; j < boosters.size(); ++ j ){ for (size_t j = 0; j < boosters.size(); ++j){
if( j != 0 ) fprintf( fo, "\t" ); if (j != 0) fprintf(fo, "\t");
std::vector<int> path; std::vector<int> path;
boosters[j]->PredPath( path, data, i ); boosters[j]->PredPath(path, data, i);
fprintf( fo, "%d", path[0] ); fprintf(fo, "%d", path[0]);
for( size_t k = 1; k < path.size(); ++ k ){ for (size_t k = 1; k < path.size(); ++k){
fprintf( fo, ",%d", path[k] ); fprintf(fo, ",%d", path[k]);
} }
} }
fprintf( fo, "\n" ); fprintf(fo, "\n");
} }
} }
public: public:
@ -176,12 +176,12 @@ namespace xgboost{
* \param root_index pre-partitioned root index of each instance, * \param root_index pre-partitioned root index of each instance,
* root_index.size() can be 0 which indicates that no pre-partition involved * root_index.size() can be 0 which indicates that no pre-partition involved
*/ */
inline void DoBoost( std::vector<float> &grad, inline void DoBoost(std::vector<float> &grad,
std::vector<float> &hess, std::vector<float> &hess,
const booster::FMatrixS &feats, const booster::FMatrixS &feats,
const std::vector<unsigned> &root_index ) { const std::vector<unsigned> &root_index) {
booster::IBooster *bst = this->GetUpdateBooster(); booster::IBooster *bst = this->GetUpdateBooster();
bst->DoBoost( grad, hess, feats, root_index ); bst->DoBoost(grad, hess, feats, root_index);
} }
/*! /*!
* \brief predict values for given sparse feature vector * \brief predict values for given sparse feature vector
@ -192,24 +192,24 @@ namespace xgboost{
* \param root_index root id of current instance, default = 0 * \param root_index root id of current instance, default = 0
* \return prediction * \return prediction
*/ */
inline float Predict( const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0 ){ inline float Predict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){
size_t istart = 0; size_t istart = 0;
float psum = 0.0f; float psum = 0.0f;
// load buffered results if any // load buffered results if any
if( mparam.do_reboost == 0 && buffer_index >= 0 ){ if (mparam.do_reboost == 0 && buffer_index >= 0){
utils::Assert( buffer_index < mparam.num_pbuffer, "buffer index exceed num_pbuffer" ); utils::Assert(buffer_index < mparam.num_pbuffer, "buffer index exceed num_pbuffer");
istart = this->pred_counter[ buffer_index ]; istart = this->pred_counter[buffer_index];
psum = this->pred_buffer [ buffer_index ]; psum = this->pred_buffer[buffer_index];
} }
for( size_t i = istart; i < this->boosters.size(); i ++ ){ for (size_t i = istart; i < this->boosters.size(); i++){
psum += this->boosters[ i ]->Predict( feats, row_index, root_index ); psum += this->boosters[i]->Predict(feats, row_index, root_index);
} }
// updated the buffered results // updated the buffered results
if( mparam.do_reboost == 0 && buffer_index >= 0 ){ if (mparam.do_reboost == 0 && buffer_index >= 0){
this->pred_counter[ buffer_index ] = static_cast<unsigned>( boosters.size() ); this->pred_counter[buffer_index] = static_cast<unsigned>(boosters.size());
this->pred_buffer [ buffer_index ] = psum; this->pred_buffer[buffer_index] = psum;
} }
return psum; return psum;
} }
@ -220,76 +220,77 @@ namespace xgboost{
* \brief same as Predict, but removes the prediction of booster to be updated * \brief same as Predict, but removes the prediction of booster to be updated
* this function must be called once and only once for every data with pbuffer * this function must be called once and only once for every data with pbuffer
*/ */
inline float InteractPredict( const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0 ){ inline float InteractPredict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){
float psum = this->Predict( feats, row_index, buffer_index, root_index ); float psum = this->Predict(feats, row_index, buffer_index, root_index);
if( tparam.reupdate_booster != -1 ){ if (tparam.reupdate_booster != -1){
const int bid = tparam.reupdate_booster; const int bid = tparam.reupdate_booster;
utils::Assert( bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound" ); utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
psum -= boosters[ bid ]->Predict( feats, row_index, root_index ); psum -= boosters[bid]->Predict(feats, row_index, root_index);
if( mparam.do_reboost == 0 && buffer_index >= 0 ){ if (mparam.do_reboost == 0 && buffer_index >= 0){
this->pred_buffer[ buffer_index ] = psum; this->pred_buffer[buffer_index] = psum;
} }
} }
return psum; return psum;
} }
/*! \brief delete the specified booster */ /*! \brief delete the specified booster */
inline void DelteBooster( void ){ inline void DelteBooster(void){
const int bid = tparam.reupdate_booster; const int bid = tparam.reupdate_booster;
utils::Assert( bid >= 0 && bid < mparam.num_boosters , "must specify booster index for deletion"); utils::Assert(bid >= 0 && bid < mparam.num_boosters, "must specify booster index for deletion");
delete boosters[ bid ]; delete boosters[bid];
for( int i = bid + 1; i < mparam.num_boosters; ++ i ){ for (int i = bid + 1; i < mparam.num_boosters; ++i){
boosters[i-1] = boosters[ i ]; boosters[i - 1] = boosters[i];
booster_info[i-1] = booster_info[ i ]; booster_info[i - 1] = booster_info[i];
} }
boosters.resize( mparam.num_boosters -= 1 ); boosters.resize(mparam.num_boosters -= 1);
booster_info.resize( boosters.size() ); booster_info.resize(boosters.size());
} }
/*! \brief update the prediction buffer, after booster have been updated */ /*! \brief update the prediction buffer, after booster have been updated */
inline void InteractRePredict( const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0 ){ inline void InteractRePredict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){
if( tparam.reupdate_booster != -1 ){ if (tparam.reupdate_booster != -1){
const int bid = tparam.reupdate_booster; const int bid = tparam.reupdate_booster;
utils::Assert( bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound" ); utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
if( mparam.do_reboost == 0 && buffer_index >= 0 ){ if (mparam.do_reboost == 0 && buffer_index >= 0){
this->pred_buffer[ buffer_index ] += boosters[ bid ]->Predict( feats, row_index, root_index ); this->pred_buffer[buffer_index] += boosters[bid]->Predict(feats, row_index, root_index);
} }
} }
} }
//-----------non public fields afterwards------------- //-----------non public fields afterwards-------------
protected: protected:
/*! \brief free space of the model */ /*! \brief free space of the model */
inline void FreeSpace( void ){ inline void FreeSpace(void){
for( size_t i = 0; i < boosters.size(); i ++ ){ for (size_t i = 0; i < boosters.size(); i++){
delete boosters[i]; delete boosters[i];
} }
boosters.clear(); booster_info.clear(); mparam.num_boosters = 0; boosters.clear(); booster_info.clear(); mparam.num_boosters = 0;
} }
/*! \brief configure a booster */ /*! \brief configure a booster */
inline void ConfigBooster( booster::IBooster *bst ){ inline void ConfigBooster(booster::IBooster *bst){
cfg.BeforeFirst(); cfg.BeforeFirst();
while( cfg.Next() ){ while (cfg.Next()){
bst->SetParam( cfg.name(), cfg.val() ); bst->SetParam(cfg.name(), cfg.val());
} }
} }
/*! /*!
* \brief get a booster to update * \brief get a booster to update
* \return the booster created * \return the booster created
*/ */
inline booster::IBooster *GetUpdateBooster( void ){ inline booster::IBooster *GetUpdateBooster(void){
if( tparam.reupdate_booster != -1 ){ if (tparam.reupdate_booster != -1){
const int bid = tparam.reupdate_booster; const int bid = tparam.reupdate_booster;
utils::Assert( bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound" ); utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound");
this->ConfigBooster( boosters[bid] ); this->ConfigBooster(boosters[bid]);
return boosters[ bid ]; return boosters[bid];
} }
if( mparam.do_reboost == 0 || boosters.size() == 0 ){ if (mparam.do_reboost == 0 || boosters.size() == 0){
mparam.num_boosters += 1; mparam.num_boosters += 1;
boosters.push_back( booster::CreateBooster<FMatrixS>( mparam.booster_type ) ); boosters.push_back(booster::CreateBooster<FMatrixS>(mparam.booster_type));
booster_info.push_back( 0 ); booster_info.push_back(0);
this->ConfigBooster( boosters.back() ); this->ConfigBooster(boosters.back());
boosters.back()->InitModel(); boosters.back()->InitModel();
}else{ }
this->ConfigBooster( boosters.back() ); else{
this->ConfigBooster(boosters.back());
} }
return boosters.back(); return boosters.back();
} }
@ -312,31 +313,31 @@ namespace xgboost{
*/ */
int do_reboost; int do_reboost;
/*! \brief reserved parameters */ /*! \brief reserved parameters */
int reserved[ 32 ]; int reserved[32];
/*! \brief constructor */ /*! \brief constructor */
ModelParam( void ){ ModelParam(void){
num_boosters = 0; num_boosters = 0;
booster_type = 0; booster_type = 0;
num_roots = num_feature = 0; num_roots = num_feature = 0;
do_reboost = 0; do_reboost = 0;
num_pbuffer = 0; num_pbuffer = 0;
memset( reserved, 0, sizeof( reserved ) ); memset(reserved, 0, sizeof(reserved));
} }
/*! /*!
* \brief set parameters from outside * \brief set parameters from outside
* \param name name of the parameter * \param name name of the parameter
* \param val value of the parameter * \param val value of the parameter
*/ */
inline void SetParam( const char *name, const char *val ){ inline void SetParam(const char *name, const char *val){
if( !strcmp("booster_type", name ) ){ if (!strcmp("booster_type", name)){
booster_type = atoi( val ); booster_type = atoi(val);
// linear boost automatically set do reboost // linear boost automatically set do reboost
if( booster_type == 1 ) do_reboost = 1; if (booster_type == 1) do_reboost = 1;
} }
if( !strcmp("num_pbuffer", name ) ) num_pbuffer = atoi( val ); if (!strcmp("num_pbuffer", name)) num_pbuffer = atoi(val);
if( !strcmp("do_reboost", name ) ) do_reboost = atoi( val ); if (!strcmp("do_reboost", name)) do_reboost = atoi(val);
if( !strcmp("bst:num_roots", name ) ) num_roots = atoi( val ); if (!strcmp("bst:num_roots", name)) num_roots = atoi(val);
if( !strcmp("bst:num_feature", name ) ) num_feature = atoi( val ); if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
} }
}; };
/*! \brief training parameters */ /*! \brief training parameters */
@ -349,7 +350,7 @@ namespace xgboost{
*/ */
int reupdate_booster; int reupdate_booster;
/*! \brief constructor */ /*! \brief constructor */
TrainParam( void ) { TrainParam(void) {
nthread = 1; nthread = 1;
reupdate_booster = -1; reupdate_booster = -1;
} }
@ -358,9 +359,9 @@ namespace xgboost{
* \param name name of the parameter * \param name name of the parameter
* \param val value of the parameter * \param val value of the parameter
*/ */
inline void SetParam( const char *name, const char *val ){ inline void SetParam(const char *name, const char *val){
if( !strcmp("nthread", name ) ) nthread = atoi( val ); if (!strcmp("nthread", name)) nthread = atoi(val);
if( !strcmp("interact:booster_index", name ) ) reupdate_booster = atoi( val ); if (!strcmp("interact:booster_index", name)) reupdate_booster = atoi(val);
} }
}; };
protected: protected:

13
demo/rank/README Normal file
View File

@ -0,0 +1,13 @@
Demonstrating how to use XGBoost accomplish regression tasks on computer hardware dataset https://archive.ics.uci.edu/ml/datasets/Computer+Hardware
Run: ./runexp.sh
Format of input: LIBSVM format
Format of ```featmap.txt: <featureid> <featurename> <q or i or int>\n ```:
- Feature id must be from 0 to number of features, in sorted order.
- i means this feature is binary indicator feature
- q means this feature is a quantitative value, such as age, time, can be missing
- int means this feature is integer value (when int is hinted, the decision boundary will be integer)
Explainations: https://github.com/tqchen/xgboost/wiki/Regression

16
demo/rank/runexp.sh Normal file
View File

@ -0,0 +1,16 @@
#!/bin/bash
# map the data to features. For convenience we only use 7 original attributes and encode them as features in a trivial way
python mapfeat.py
# split train and test
python mknfold.py machine.txt 1
# training and output the models
../../xgboost machine.conf
# output predictions of test data
../../xgboost machine.conf task=pred model_in=0002.model
# print the boosters of 0002.model in dump.raw.txt
../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
# print the boosters of 0002.model in dump.nice.txt with feature map
../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
# cat the result
cat dump.nice.txt

5
demo/rank/toy.eval Normal file
View File

@ -0,0 +1,5 @@
1 0:2 1:3 2:2
0 0:2 1:3 2:2
0 0:2 1:3 2:2
0 0:2 1:3 2:2
1 0:2 1:3 2:2

2
demo/rank/toy.eval.group Normal file
View File

@ -0,0 +1,2 @@
2
3

5
demo/rank/toy.test Normal file
View File

@ -0,0 +1,5 @@
1 0:2 1:3 2:2
0 0:2 1:3 2:2
0 0:2 1:3 2:2
0 0:2 1:3 2:2
1 0:2 1:3 2:2

2
demo/rank/toy.test.group Normal file
View File

@ -0,0 +1,2 @@
2
3

5
demo/rank/toy.train Normal file
View File

@ -0,0 +1,5 @@
1 0:2 1:3 2:2
0 0:2 1:3 2:2
0 0:2 1:3 2:2
0 0:2 1:3 2:2
1 0:2 1:3 2:2

View File

@ -0,0 +1,2 @@
2
3

0
demo/rank/train Normal file
View File

View File

@ -11,314 +11,319 @@
#include "../utils/xgboost_config.h" #include "../utils/xgboost_config.h"
namespace xgboost{ namespace xgboost{
namespace base{ namespace base{
/*! /*!
* \brief wrapping the training process of the gradient boosting model, * \brief wrapping the training process of the gradient boosting model,
* given the configuation * given the configuation
* \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.chen@gmail.com * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.chen@gmail.com
*/ */
class BoostTask{ class BoostTask{
public: public:
inline int Run(int argc, char *argv[]){ inline int Run(int argc, char *argv[]){
if (argc < 2){
printf("Usage: <config>\n");
return 0;
}
utils::ConfigIterator itr(argv[1]);
while (itr.Next()){
this->SetParam(itr.name(), itr.val());
}
for (int i = 2; i < argc; i++){
char name[256], val[256];
if (sscanf(argv[i], "%[^=]=%s", name, val) == 2){
this->SetParam(name, val);
}
}
this->InitData();
this->InitLearner();
if (task == "dump"){
this->TaskDump();
return 0;
}
if (task == "interact"){
this->TaskInteractive(); return 0;
}
if (task == "dumppath"){
this->TaskDumpPath(); return 0;
}
if (task == "eval"){
this->TaskEval(); return 0;
}
if (task == "pred"){
this->TaskPred();
}
else{
this->TaskTrain();
}
return 0;
}
enum learning_tasks{ if (argc < 2){
REGRESSION = 0, printf("Usage: <config>\n");
BINARY_CLASSIFICATION = 1, return 0;
RANKING = 2 }
}; utils::ConfigIterator itr(argv[1]);
while (itr.Next()){
this->SetParam(itr.name(), itr.val());
}
for (int i = 2; i < argc; i++){
char name[256], val[256];
if (sscanf(argv[i], "%[^=]=%s", name, val) == 2){
this->SetParam(name, val);
}
}
/* \brief set learner this->InitData();
* \param learner the passed in learner this->InitLearner();
*/ if (task == "dump"){
inline void SetLearner(BoostLearner* learner){ this->TaskDump();
learner_ = learner; return 0;
} }
if (task == "interact"){
this->TaskInteractive(); return 0;
}
if (task == "dumppath"){
this->TaskDumpPath(); return 0;
}
if (task == "eval"){
this->TaskEval(); return 0;
}
if (task == "pred"){
this->TaskPred();
}
else{
this->TaskTrain();
}
return 0;
}
inline void SetParam(const char *name, const char *val){ enum learning_tasks{
if (!strcmp("learning_task", name)) learning_task = atoi(val); REGRESSION = 0,
if (!strcmp("silent", name)) silent = atoi(val); BINARY_CLASSIFICATION = 1,
if (!strcmp("use_buffer", name)) use_buffer = atoi(val); RANKING = 2
if (!strcmp("seed", name)) random::Seed(atoi(val)); };
if (!strcmp("num_round", name)) num_round = atoi(val);
if (!strcmp("save_period", name)) save_period = atoi(val); /* \brief set learner
if (!strcmp("task", name)) task = val; * \param learner the passed in learner
if (!strcmp("data", name)) train_path = val; */
if (!strcmp("test:data", name)) test_path = val; inline void SetLearner(BoostLearner* learner){
if (!strcmp("model_in", name)) model_in = val; learner_ = learner;
if (!strcmp("model_out", name)) model_out = val; }
if (!strcmp("model_dir", name)) model_dir_path = val;
if (!strcmp("fmap", name)) name_fmap = val; inline void SetParam(const char *name, const char *val){
if (!strcmp("name_dump", name)) name_dump = val; if (!strcmp("learning_task", name)) learning_task = atoi(val);
if (!strcmp("name_dumppath", name)) name_dumppath = val; if (!strcmp("silent", name)) silent = atoi(val);
if (!strcmp("name_pred", name)) name_pred = val; if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
if (!strcmp("dump_stats", name)) dump_model_stats = atoi(val); if (!strcmp("seed", name)) random::Seed(atoi(val));
if (!strcmp("interact:action", name)) interact_action = val; if (!strcmp("num_round", name)) num_round = atoi(val);
if (!strncmp("batch:", name, 6)){ if (!strcmp("save_period", name)) save_period = atoi(val);
cfg_batch.PushBack(name + 6, val); if (!strcmp("task", name)) task = val;
} if (!strcmp("data", name)) train_path = val;
if (!strncmp("eval[", name, 5)) { if (!strcmp("test:data", name)) test_path = val;
char evname[256]; if (!strcmp("model_in", name)) model_in = val;
utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1, "must specify evaluation name for display"); if (!strcmp("model_out", name)) model_out = val;
eval_data_names.push_back(std::string(evname)); if (!strcmp("model_dir", name)) model_dir_path = val;
eval_data_paths.push_back(std::string(val)); if (!strcmp("fmap", name)) name_fmap = val;
} if (!strcmp("name_dump", name)) name_dump = val;
cfg.PushBack(name, val); if (!strcmp("name_dumppath", name)) name_dumppath = val;
} if (!strcmp("name_pred", name)) name_pred = val;
public: if (!strcmp("dump_stats", name)) dump_model_stats = atoi(val);
BoostTask(void){ if (!strcmp("interact:action", name)) interact_action = val;
// default parameters if (!strncmp("batch:", name, 6)){
silent = 0; cfg_batch.PushBack(name + 6, val);
use_buffer = 1; }
num_round = 10; if (!strncmp("eval[", name, 5)) {
save_period = 0; char evname[256];
dump_model_stats = 0; utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1, "must specify evaluation name for display");
task = "train"; eval_data_names.push_back(std::string(evname));
model_in = "NULL"; eval_data_paths.push_back(std::string(val));
model_out = "NULL"; }
name_fmap = "NULL"; cfg.PushBack(name, val);
name_pred = "pred.txt"; }
name_dump = "dump.txt"; public:
name_dumppath = "dump.path.txt"; BoostTask(void){
model_dir_path = "./"; // default parameters
interact_action = "update"; silent = 0;
} use_buffer = 1;
~BoostTask(void){ num_round = 10;
for (size_t i = 0; i < deval.size(); i++){ save_period = 0;
delete deval[i]; dump_model_stats = 0;
} task = "train";
} model_in = "NULL";
private: model_out = "NULL";
name_fmap = "NULL";
name_pred = "pred.txt";
name_dump = "dump.txt";
name_dumppath = "dump.path.txt";
model_dir_path = "./";
interact_action = "update";
}
~BoostTask(void){
for (size_t i = 0; i < deval.size(); i++){
delete deval[i];
}
}
private:
inline void InitData(void){ inline void InitData(void){
if (name_fmap != "NULL") fmap.LoadText(name_fmap.c_str());
if (task == "dump") return;
if (learning_task == RANKING){
char instance_path[256], group_path[256];
if (task == "pred" || task == "dumppath"){
sscanf(test_path.c_str(), "%[^;];%s", instance_path, group_path);
data.CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
}
else{
// training
sscanf(train_path.c_str(), "%[^;];%s", instance_path, group_path);
data.CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
utils::Assert(eval_data_names.size() == eval_data_paths.size());
for (size_t i = 0; i < eval_data_names.size(); ++i){
deval.push_back(new DMatrix());
sscanf(eval_data_paths[i].c_str(), "%[^;];%s", instance_path, group_path);
deval.back()->CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
}
}
if (name_fmap != "NULL") fmap.LoadText(name_fmap.c_str());
if (task == "dump") return;
if (learning_task == RANKING){
char instance_path[256], group_path[256];
if (task == "pred" || task == "dumppath"){
sscanf(test_path.c_str(), "%[^;];%s", instance_path, group_path);
data.CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
}
else{
// training
sscanf(train_path.c_str(), "%[^;];%s", instance_path, group_path);
data.CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
} utils::Assert(eval_data_names.size() == eval_data_paths.size());
else{ for (size_t i = 0; i < eval_data_names.size(); ++i){
if (task == "pred" || task == "dumppath"){ deval.push_back(new DMatrix());
data.CacheLoad(test_path.c_str(), "", silent != 0, use_buffer != 0); sscanf(eval_data_paths[i].c_str(), "%[^;];%s", instance_path, group_path);
} deval.back()->CacheLoad(instance_path, group_path, silent != 0, use_buffer != 0);
else{ }
// training }
data.CacheLoad(train_path.c_str(), "", silent != 0, use_buffer != 0); }
utils::Assert(eval_data_names.size() == eval_data_paths.size()); else{
for (size_t i = 0; i < eval_data_names.size(); ++i){ if (task == "pred" || task == "dumppath"){
deval.push_back(new DMatrix()); data.CacheLoad(test_path.c_str(), "", silent != 0, use_buffer != 0);
deval.back()->CacheLoad(eval_data_paths[i].c_str(), "", silent != 0, use_buffer != 0); }
} else{
} // training
} data.CacheLoad(train_path.c_str(), "", silent != 0, use_buffer != 0);
utils::Assert(eval_data_names.size() == eval_data_paths.size());
for (size_t i = 0; i < eval_data_names.size(); ++i){
deval.push_back(new DMatrix());
deval.back()->CacheLoad(eval_data_paths[i].c_str(), "", silent != 0, use_buffer != 0);
}
}
}
learner_->SetData(&data, deval, eval_data_names); learner_->SetData(&data, deval, eval_data_names);
} if(!silent) printf("BoostTask:Data Initiation Done!\n");
inline void InitLearner(void){ }
cfg.BeforeFirst();
while (cfg.Next()){
learner_->SetParam(cfg.name(), cfg.val());
}
if (model_in != "NULL"){
utils::FileStream fi(utils::FopenCheck(model_in.c_str(), "rb"));
learner_->LoadModel(fi);
fi.Close();
}
else{
utils::Assert(task == "train", "model_in not specified");
learner_->InitModel();
}
learner_->InitTrainer();
}
inline void TaskTrain(void){ inline void InitLearner(void){
const time_t start = time(NULL); cfg.BeforeFirst();
unsigned long elapsed = 0; while (cfg.Next()){
for (int i = 0; i < num_round; ++i){ learner_->SetParam(cfg.name(), cfg.val());
elapsed = (unsigned long)(time(NULL) - start); }
if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed); if (model_in != "NULL"){
learner_->UpdateOneIter(i); utils::FileStream fi(utils::FopenCheck(model_in.c_str(), "rb"));
learner_->EvalOneIter(i); learner_->LoadModel(fi);
if (save_period != 0 && (i + 1) % save_period == 0){ fi.Close();
this->SaveModel(i); }
} else{
elapsed = (unsigned long)(time(NULL) - start); utils::Assert(task == "train", "model_in not specified");
} learner_->InitModel();
// always save final round }
if (save_period == 0 || num_round % save_period != 0){ learner_->InitTrainer();
if (model_out == "NULL"){ if(!silent) printf("BoostTask:InitLearner Done!\n");
this->SaveModel(num_round - 1); }
}
else{
this->SaveModel(model_out.c_str());
}
}
if (!silent){
printf("\nupdating end, %lu sec in all\n", elapsed);
}
}
inline void TaskEval(void){
learner_->EvalOneIter(0);
}
inline void TaskInteractive(void){
const time_t start = time(NULL);
unsigned long elapsed = 0;
int batch_action = 0;
cfg_batch.BeforeFirst(); inline void TaskTrain(void){
while (cfg_batch.Next()){ const time_t start = time(NULL);
if (!strcmp(cfg_batch.name(), "run")){ unsigned long elapsed = 0;
learner_->UpdateInteract(interact_action); for (int i = 0; i < num_round; ++i){
batch_action += 1; elapsed = (unsigned long)(time(NULL) - start);
} if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
else{ learner_->UpdateOneIter(i);
learner_->SetParam(cfg_batch.name(), cfg_batch.val()); learner_->EvalOneIter(i);
} if (save_period != 0 && (i + 1) % save_period == 0){
} this->SaveModel(i);
}
elapsed = (unsigned long)(time(NULL) - start);
}
// always save final round
if (save_period == 0 || num_round % save_period != 0){
if (model_out == "NULL"){
this->SaveModel(num_round - 1);
}
else{
this->SaveModel(model_out.c_str());
}
}
if (!silent){
printf("\nupdating end, %lu sec in all\n", elapsed);
}
}
inline void TaskEval(void){
learner_->EvalOneIter(0);
}
inline void TaskInteractive(void){
const time_t start = time(NULL);
unsigned long elapsed = 0;
int batch_action = 0;
if (batch_action == 0){ cfg_batch.BeforeFirst();
learner_->UpdateInteract(interact_action); while (cfg_batch.Next()){
} if (!strcmp(cfg_batch.name(), "run")){
utils::Assert(model_out != "NULL", "interactive mode must specify model_out"); learner_->UpdateInteract(interact_action);
this->SaveModel(model_out.c_str()); batch_action += 1;
elapsed = (unsigned long)(time(NULL) - start); }
else{
learner_->SetParam(cfg_batch.name(), cfg_batch.val());
}
}
if (!silent){ if (batch_action == 0){
printf("\ninteractive update, %d batch actions, %lu sec in all\n", batch_action, elapsed); learner_->UpdateInteract(interact_action);
} }
} utils::Assert(model_out != "NULL", "interactive mode must specify model_out");
this->SaveModel(model_out.c_str());
elapsed = (unsigned long)(time(NULL) - start);
inline void TaskDump(void){ if (!silent){
FILE *fo = utils::FopenCheck(name_dump.c_str(), "w"); printf("\ninteractive update, %d batch actions, %lu sec in all\n", batch_action, elapsed);
learner_->DumpModel(fo, fmap, dump_model_stats != 0); }
fclose(fo); }
}
inline void TaskDumpPath(void){
FILE *fo = utils::FopenCheck(name_dumppath.c_str(), "w");
learner_->DumpPath(fo, data);
fclose(fo);
}
inline void SaveModel(const char *fname) const{
utils::FileStream fo(utils::FopenCheck(fname, "wb"));
learner_->SaveModel(fo);
fo.Close();
}
inline void SaveModel(int i) const{
char fname[256];
sprintf(fname, "%s/%04d.model", model_dir_path.c_str(), i + 1);
this->SaveModel(fname);
}
inline void TaskPred(void){
std::vector<float> preds;
if (!silent) printf("start prediction...\n");
learner_->Predict(preds, data);
if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
for (size_t i = 0; i < preds.size(); i++){
fprintf(fo, "%f\n", preds[i]);
}
fclose(fo);
}
private:
/* \brief specify the learning task*/
int learning_task;
/* \brief whether silent */
int silent;
/* \brief whether use auto binary buffer */
int use_buffer;
/* \brief number of boosting iterations */
int num_round;
/* \brief the period to save the model, 0 means only save the final round model */
int save_period;
/*! \brief interfact action */
std::string interact_action;
/* \brief the path of training/test data set */
std::string train_path, test_path;
/* \brief the path of test model file, or file to restart training */
std::string model_in;
/* \brief the path of final model file, to be saved */
std::string model_out;
/* \brief the path of directory containing the saved models */
std::string model_dir_path;
/* \brief task to perform, choosing training or testing */
std::string task;
/* \brief name of predict file */
std::string name_pred;
/* \brief whether dump statistics along with model */
int dump_model_stats;
/* \brief name of feature map */
std::string name_fmap;
/* \brief name of dump file */
std::string name_dump;
/* \brief name of dump path file */
std::string name_dumppath;
/* \brief the paths of validation data sets */
std::vector<std::string> eval_data_paths;
/* \brief the names of the evaluation data used in output log */
std::vector<std::string> eval_data_names;
/*! \brief saves configurations */
utils::ConfigSaver cfg;
/*! \brief batch configurations */
utils::ConfigSaver cfg_batch;
private:
DMatrix data;
std::vector<DMatrix*> deval;
utils::FeatMap fmap;
BoostLearner* learner_;
}; inline void TaskDump(void){
}; FILE *fo = utils::FopenCheck(name_dump.c_str(), "w");
learner_->DumpModel(fo, fmap, dump_model_stats != 0);
fclose(fo);
}
inline void TaskDumpPath(void){
FILE *fo = utils::FopenCheck(name_dumppath.c_str(), "w");
learner_->DumpPath(fo, data);
fclose(fo);
}
inline void SaveModel(const char *fname) const{
utils::FileStream fo(utils::FopenCheck(fname, "wb"));
learner_->SaveModel(fo);
fo.Close();
}
inline void SaveModel(int i) const{
char fname[256];
sprintf(fname, "%s/%04d.model", model_dir_path.c_str(), i + 1);
this->SaveModel(fname);
}
inline void TaskPred(void){
std::vector<float> preds;
if (!silent) printf("start prediction...\n");
learner_->Predict(preds, data);
if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
for (size_t i = 0; i < preds.size(); i++){
fprintf(fo, "%f\n", preds[i]);
}
fclose(fo);
}
private:
/* \brief specify the learning task*/
int learning_task;
/* \brief whether silent */
int silent;
/* \brief whether use auto binary buffer */
int use_buffer;
/* \brief number of boosting iterations */
int num_round;
/* \brief the period to save the model, 0 means only save the final round model */
int save_period;
/*! \brief interfact action */
std::string interact_action;
/* \brief the path of training/test data set */
std::string train_path, test_path;
/* \brief the path of test model file, or file to restart training */
std::string model_in;
/* \brief the path of final model file, to be saved */
std::string model_out;
/* \brief the path of directory containing the saved models */
std::string model_dir_path;
/* \brief task to perform, choosing training or testing */
std::string task;
/* \brief name of predict file */
std::string name_pred;
/* \brief whether dump statistics along with model */
int dump_model_stats;
/* \brief name of feature map */
std::string name_fmap;
/* \brief name of dump file */
std::string name_dump;
/* \brief name of dump path file */
std::string name_dumppath;
/* \brief the paths of validation data sets */
std::vector<std::string> eval_data_paths;
/* \brief the names of the evaluation data used in output log */
std::vector<std::string> eval_data_names;
/*! \brief saves configurations */
utils::ConfigSaver cfg;
/*! \brief batch configurations */
utils::ConfigSaver cfg_batch;
private:
DMatrix data;
std::vector<DMatrix*> deval;
utils::FeatMap fmap;
BoostLearner* learner_;
};
};
}; };

View File

@ -9,183 +9,208 @@
namespace xgboost{ namespace xgboost{
namespace base{ namespace base{
/*! \brief data matrix for regression,classification,rank content */ /*! \brief data matrix for regression,classification,rank content */
struct DMatrix{ struct DMatrix{
public: public:
/*! \brief maximum feature dimension */ /*! \brief maximum feature dimension */
unsigned num_feature; unsigned num_feature;
/*! \brief feature data content */ /*! \brief feature data content */
booster::FMatrixS data; booster::FMatrixS data;
/*! \brief label of each instance */ /*! \brief label of each instance */
std::vector<float> labels; std::vector<float> labels;
/*! \brief the index of begin and end of a group, /*! \brief the index of begin and end of a group,
* needed when the learning task is ranking*/ * needed when the learning task is ranking*/
std::vector<int> group_index; std::vector<int> group_index;
public: public:
/*! \brief default constructor */ /*! \brief default constructor */
DMatrix(void){} DMatrix(void){}
/*! \brief get the number of instances */ /*! \brief get the number of instances */
inline size_t Size() const{ inline size_t Size() const{
return labels.size(); return labels.size();
} }
/*! /*!
* \brief load from text file * \brief load from text file
* \param fname file of instances data * \param fname file of instances data
* \param fgroup file of the group data * \param fgroup file of the group data
* \param silent whether print information or not * \param silent whether print information or not
*/ */
inline void LoadText(const char* fname, const char* fgroup, bool silent = false){ inline void LoadText(const char* fname, const char* fgroup, bool silent = false){
data.Clear(); data.Clear();
FILE* file = utils::FopenCheck(fname, "r"); FILE* file = utils::FopenCheck(fname, "r");
float label; bool init = true; float label; bool init = true;
char tmp[1024]; char tmp[1024];
std::vector<booster::bst_uint> findex; std::vector<booster::bst_uint> findex;
std::vector<booster::bst_float> fvalue; std::vector<booster::bst_float> fvalue;
while (fscanf(file, "%s", tmp) == 1){ while (fscanf(file, "%s", tmp) == 1){
unsigned index; float value; unsigned index; float value;
if (sscanf(tmp, "%u:%f", &index, &value) == 2){ if (sscanf(tmp, "%u:%f", &index, &value) == 2){
findex.push_back(index); fvalue.push_back(value); findex.push_back(index); fvalue.push_back(value);
} }
else{ else{
if (!init){ if (!init){
labels.push_back(label); labels.push_back(label);
data.AddRow(findex, fvalue); data.AddRow(findex, fvalue);
} }
findex.clear(); fvalue.clear(); findex.clear(); fvalue.clear();
utils::Assert(sscanf(tmp, "%f", &label) == 1, "invalid format"); utils::Assert(sscanf(tmp, "%f", &label) == 1, "invalid format");
init = false; init = false;
} }
} }
labels.push_back(label); labels.push_back(label);
data.AddRow(findex, fvalue); data.AddRow(findex, fvalue);
// initialize column support as well // initialize column support as well
data.InitData(); data.InitData();
if (!silent){ if (!silent){
printf("%ux%u matrix with %lu entries is loaded from %s\n", printf("%ux%u matrix with %lu entries is loaded from %s\n",
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname); (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
} }
fclose(file); fclose(file);
LoadGroup(fgroup,silent);
}
//if exists group data load it in inline void LoadGroup(const char* fgroup, bool silent = false){
FILE *file_group = fopen64(fgroup, "r"); //if exists group data load it in
if (file_group != NULL){ FILE *file_group = fopen64(fgroup, "r");
group_index.push_back(0);
int tmp = 0, acc = 0;
while (fscanf(file_group, "%d", tmp) == 1){
acc += tmp;
group_index.push_back(acc);
}
}
}
/*!
* \brief load from binary file
* \param fname name of binary data
* \param silent whether print information or not
* \return whether loading is success
*/
inline bool LoadBinary(const char* fname, const char* fgroup, bool silent = false){
FILE *fp = fopen64(fname, "rb");
if (fp == NULL) return false;
utils::FileStream fs(fp);
data.LoadBinary(fs);
labels.resize(data.NumRow());
utils::Assert(fs.Read(&labels[0], sizeof(float)* data.NumRow()) != 0, "DMatrix LoadBinary");
fs.Close();
// initialize column support as well
data.InitData();
if (!silent){ if (file_group != NULL){
printf("%ux%u matrix with %lu entries is loaded from %s\n", group_index.push_back(0);
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname); int tmp = 0, acc = 0,cnt = 0;
} while (fscanf(file_group, "%d", &tmp) == 1){
acc += tmp;
group_index.push_back(acc);
cnt++;
}
if(!silent) printf("%d groups are loaded from %s\n",cnt,fgroup);
fclose(file_group);
}else{
if(!silent) printf("There is no group file\n");
}
//if group data exists load it in }
FILE *file_group = fopen64(fgroup, "r"); /*!
if (file_group != NULL){ * \brief load from binary file
int group_index_size = 0; * \param fname name of binary data
utils::FileStream group_stream(file_group); * \param silent whether print information or not
utils::Assert(group_stream.Read(&group_index_size, sizeof(int)) != 0, "Load group indice size"); * \return whether loading is success
group_index.resize(group_index_size); */
utils::Assert(group_stream.Read(&group_index, sizeof(int)* group_index_size) != 0, "Load group indice"); inline bool LoadBinary(const char* fname, const char* fgroup, bool silent = false){
FILE *fp = fopen64(fname, "rb");
if (fp == NULL) return false;
utils::FileStream fs(fp);
data.LoadBinary(fs);
labels.resize(data.NumRow());
utils::Assert(fs.Read(&labels[0], sizeof(float) * data.NumRow()) != 0, "DMatrix LoadBinary");
fs.Close();
// initialize column support as well
data.InitData();
if (!silent){ if (!silent){
printf("the group index of %d groups is loaded from %s\n", printf("%ux%u matrix with %lu entries is loaded from %s as binary\n",
group_index_size - 1, fgroup); (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
} }
}
return true;
}
/*!
* \brief save to binary file
* \param fname name of binary data
* \param silent whether print information or not
*/
inline void SaveBinary(const char* fname, const char* fgroup, bool silent = false){
// initialize column support as well
data.InitData();
utils::FileStream fs(utils::FopenCheck(fname, "wb")); LoadGroupBinary(fgroup,silent);
data.SaveBinary(fs); return true;
fs.Write(&labels[0], sizeof(float)* data.NumRow()); }
fs.Close(); /*!
if (!silent){ * \brief save to binary file
printf("%ux%u matrix with %lu entries is saved to %s\n", * \param fname name of binary data
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname); * \param silent whether print information or not
} */
inline void SaveBinary(const char* fname, const char* fgroup, bool silent = false){
// initialize column support as well
data.InitData();
//save group data utils::FileStream fs(utils::FopenCheck(fname, "wb"));
if (group_index.size() > 0){ data.SaveBinary(fs);
utils::FileStream file_group(utils::FopenCheck(fgroup, "wb")); fs.Write(&labels[0], sizeof(float)* data.NumRow());
int group_index_size = group_index.size(); fs.Close();
file_group.Write(&(group_index_size), sizeof(int)); if (!silent){
file_group.Write(&group_index[0], sizeof(int) * group_index_size); printf("%ux%u matrix with %lu entries is saved to %s as binary\n",
} (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
}
} SaveGroupBinary(fgroup,silent);
/*! }
* \brief cache load data given a file name, if filename ends with .buffer, direct load binary
* otherwise the function will first check if fname + '.buffer' exists, inline void SaveGroupBinary(const char* fgroup, bool silent = false){
* if binary buffer exists, it will reads from binary buffer, otherwise, it will load from text file, //save group data
* and try to create a buffer file if (group_index.size() > 0){
* \param fname name of binary data utils::FileStream file_group(utils::FopenCheck(fgroup, "wb"));
* \param silent whether print information or not int group_index_size = group_index.size();
* \param savebuffer whether do save binary buffer if it is text file_group.Write(&(group_index_size), sizeof(int));
*/ file_group.Write(&group_index[0], sizeof(int) * group_index_size);
inline void CacheLoad(const char *fname, const char *fgroup, bool silent = false, bool savebuffer = true){ file_group.Close();
int len = strlen(fname); if(!silent){printf("Index info of %d groups is saved to %s as binary\n",group_index_size-1,fgroup);}
if (len > 8 && !strcmp(fname + len - 7, ".buffer")){ }
this->LoadBinary(fname, fgroup, silent); return; }
}
char bname[1024]; inline void LoadGroupBinary(const char* fgroup, bool silent = false){
sprintf(bname, "%s.buffer", fname); //if group data exists load it in
if (!this->LoadBinary(bname, fgroup, silent)){ FILE *file_group = fopen64(fgroup, "r");
this->LoadText(fname, fgroup, silent); if (file_group != NULL){
if (savebuffer) this->SaveBinary(bname, fgroup, silent); int group_index_size = 0;
} utils::FileStream group_stream(file_group);
} utils::Assert(group_stream.Read(&group_index_size, sizeof(int)) != 0, "Load group indice size");
private: group_index.resize(group_index_size);
/*! \brief update num_feature info */ utils::Assert(group_stream.Read(&group_index[0], sizeof(int) * group_index_size) != 0, "Load group indice");
inline void UpdateInfo(void){
this->num_feature = 0; if (!silent){
for (size_t i = 0; i < data.NumRow(); i++){ printf("Index info of %d groups is loaded from %s as binary\n",
booster::FMatrixS::Line sp = data[i]; group_index.size() - 1, fgroup);
for (unsigned j = 0; j < sp.len; j++){ }
if (num_feature <= sp[j].findex){ fclose(file_group);
num_feature = sp[j].findex + 1; }else{
} if(!silent){printf("The binary file of group info not exists");}
} }
}
} }
}; /*!
* \brief cache load data given a file name, if filename ends with .buffer, direct load binary
* otherwise the function will first check if fname + '.buffer' exists,
* if binary buffer exists, it will reads from binary buffer, otherwise, it will load from text file,
* and try to create a buffer file
* \param fname name of binary data
* \param silent whether print information or not
* \param savebuffer whether do save binary buffer if it is text
*/
inline void CacheLoad(const char *fname, const char *fgroup, bool silent = false, bool savebuffer = true){
int len = strlen(fname);
if (len > 8 && !strcmp(fname + len - 7, ".buffer")){
this->LoadBinary(fname, fgroup, silent); return;
}
char bname[1024],bgroup[1024];
sprintf(bname, "%s.buffer", fname);
sprintf(bgroup, "%s.buffer", fgroup);
if (!this->LoadBinary(bname, bgroup, silent))
{
this->LoadText(fname, fgroup, silent);
if (savebuffer) this->SaveBinary(bname, bgroup, silent);
}
}
private:
/*! \brief update num_feature info */
inline void UpdateInfo(void){
this->num_feature = 0;
for (size_t i = 0; i < data.NumRow(); i++){
booster::FMatrixS::Line sp = data[i];
for (unsigned j = 0; j < sp.len; j++){
if (num_feature <= sp[j].findex){
num_feature = sp[j].findex + 1;
}
}
}
}
};
} }
}; };
#endif #endif

View File

@ -15,256 +15,256 @@
#include "../utils/xgboost_stream.h" #include "../utils/xgboost_stream.h"
namespace xgboost { namespace xgboost {
namespace base { namespace base {
/*! \brief class for gradient boosting learner */ /*! \brief class for gradient boosting learner */
class BoostLearner { class BoostLearner {
public: public:
/*! \brief constructor */ /*! \brief constructor */
BoostLearner(void) { BoostLearner(void) {
silent = 0; silent = 0;
} }
/*! /*!
* \brief booster associated with training and evaluating data * \brief booster associated with training and evaluating data
* \param train pointer to the training data * \param train pointer to the training data
* \param evals array of evaluating data * \param evals array of evaluating data
* \param evname name of evaluation data, used print statistics * \param evname name of evaluation data, used print statistics
*/ */
BoostLearner(const DMatrix *train, BoostLearner(const DMatrix *train,
const std::vector<DMatrix *> &evals, const std::vector<DMatrix *> &evals,
const std::vector<std::string> &evname) { const std::vector<std::string> &evname) {
silent = 0; silent = 0;
this->SetData(train, evals, evname); this->SetData(train, evals, evname);
} }
/*! /*!
* \brief associate booster with training and evaluating data * \brief associate booster with training and evaluating data
* \param train pointer to the training data * \param train pointer to the training data
* \param evals array of evaluating data * \param evals array of evaluating data
* \param evname name of evaluation data, used print statistics * \param evname name of evaluation data, used print statistics
*/ */
inline void SetData(const DMatrix *train, inline void SetData(const DMatrix *train,
const std::vector<DMatrix *> &evals, const std::vector<DMatrix *> &evals,
const std::vector<std::string> &evname) { const std::vector<std::string> &evname) {
this->train_ = train; this->train_ = train;
this->evals_ = evals; this->evals_ = evals;
this->evname_ = evname; this->evname_ = evname;
// estimate feature bound // estimate feature bound
int num_feature = (int)(train->data.NumCol()); int num_feature = (int)(train->data.NumCol());
// assign buffer index // assign buffer index
unsigned buffer_size = static_cast<unsigned>(train->Size()); unsigned buffer_size = static_cast<unsigned>(train->Size());
for (size_t i = 0; i < evals.size(); ++i) { for (size_t i = 0; i < evals.size(); ++i) {
buffer_size += static_cast<unsigned>(evals[i]->Size()); buffer_size += static_cast<unsigned>(evals[i]->Size());
num_feature = std::max(num_feature, (int)(evals[i]->data.NumCol())); num_feature = std::max(num_feature, (int)(evals[i]->data.NumCol()));
} }
char str_temp[25]; char str_temp[25];
if (num_feature > mparam.num_feature) { if (num_feature > mparam.num_feature) {
mparam.num_feature = num_feature; mparam.num_feature = num_feature;
sprintf(str_temp, "%d", num_feature); sprintf(str_temp, "%d", num_feature);
base_gbm.SetParam("bst:num_feature", str_temp); base_gbm.SetParam("bst:num_feature", str_temp);
} }
sprintf(str_temp, "%u", buffer_size); sprintf(str_temp, "%u", buffer_size);
base_gbm.SetParam("num_pbuffer", str_temp); base_gbm.SetParam("num_pbuffer", str_temp);
if (!silent) { if (!silent) {
printf("buffer_size=%u\n", buffer_size); printf("buffer_size=%u\n", buffer_size);
} }
// set eval_preds tmp sapce // set eval_preds tmp sapce
this->eval_preds_.resize(evals.size(), std::vector<float>()); this->eval_preds_.resize(evals.size(), std::vector<float>());
} }
/*! /*!
* \brief set parameters from outside * \brief set parameters from outside
* \param name name of the parameter * \param name name of the parameter
* \param val value of the parameter * \param val value of the parameter
*/ */
virtual inline void SetParam(const char *name, const char *val) { virtual inline void SetParam(const char *name, const char *val) {
if (!strcmp(name, "silent")) silent = atoi(val); if (!strcmp(name, "silent")) silent = atoi(val);
mparam.SetParam(name, val); mparam.SetParam(name, val);
base_gbm.SetParam(name, val); base_gbm.SetParam(name, val);
} }
/*! /*!
* \brief initialize solver before training, called before training * \brief initialize solver before training, called before training
* this function is reserved for solver to allocate necessary space and do other preparation * this function is reserved for solver to allocate necessary space and do other preparation
*/ */
inline void InitTrainer(void) { inline void InitTrainer(void) {
base_gbm.InitTrainer(); base_gbm.InitTrainer();
} }
/*! /*!
* \brief initialize the current data storage for model, if the model is used first time, call this function * \brief initialize the current data storage for model, if the model is used first time, call this function
*/ */
inline void InitModel(void) { inline void InitModel(void) {
base_gbm.InitModel(); base_gbm.InitModel();
} if(!silent) printf("BoostLearner:InitModel Done!\n");
/*! }
* \brief load model from stream /*!
* \param fi input stream * \brief load model from stream
*/ * \param fi input stream
inline void LoadModel(utils::IStream &fi) { */
base_gbm.LoadModel(fi); inline void LoadModel(utils::IStream &fi) {
utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0); base_gbm.LoadModel(fi);
} utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
/*! }
* \brief DumpModel /*!
* \param fo text file * \brief DumpModel
* \param fmap feature map that may help give interpretations of feature * \param fo text file
* \param with_stats whether print statistics as well * \param fmap feature map that may help give interpretations of feature
*/ * \param with_stats whether print statistics as well
inline void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats) { */
base_gbm.DumpModel(fo, fmap, with_stats); inline void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats) {
} base_gbm.DumpModel(fo, fmap, with_stats);
/*! }
* \brief Dump path of all trees /*!
* \param fo text file * \brief Dump path of all trees
* \param data input data * \param fo text file
*/ * \param data input data
inline void DumpPath(FILE *fo, const DMatrix &data) { */
base_gbm.DumpPath(fo, data.data); inline void DumpPath(FILE *fo, const DMatrix &data) {
} base_gbm.DumpPath(fo, data.data);
}
/*! /*!
* \brief save model to stream * \brief save model to stream
* \param fo output stream * \param fo output stream
*/ */
inline void SaveModel(utils::IStream &fo) const { inline void SaveModel(utils::IStream &fo) const {
base_gbm.SaveModel(fo); base_gbm.SaveModel(fo);
fo.Write(&mparam, sizeof(ModelParam)); fo.Write(&mparam, sizeof(ModelParam));
} }
virtual void EvalOneIter(int iter, FILE *fo = stderr) {} virtual void EvalOneIter(int iter, FILE *fo = stderr) {}
/*! /*!
* \brief update the model for one iteration * \brief update the model for one iteration
* \param iteration iteration number * \param iteration iteration number
*/ */
inline void UpdateOneIter(int iter) { inline void UpdateOneIter(int iter) {
this->PredictBuffer(preds_, *train_, 0); this->PredictBuffer(preds_, *train_, 0);
this->GetGradient(preds_, train_->labels, train_->group_index, grad_, hess_); this->GetGradient(preds_, train_->labels, train_->group_index, grad_, hess_);
std::vector<unsigned> root_index; std::vector<unsigned> root_index;
base_gbm.DoBoost(grad_, hess_, train_->data, root_index); base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
} }
/*! \brief get intransformed prediction, without buffering */ /*! \brief get intransformed prediction, without buffering */
inline void Predict(std::vector<float> &preds, const DMatrix &data) { inline void Predict(std::vector<float> &preds, const DMatrix &data) {
preds.resize(data.Size()); preds.resize(data.Size());
const unsigned ndata = static_cast<unsigned>(data.Size()); const unsigned ndata = static_cast<unsigned>(data.Size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for (unsigned j = 0; j < ndata; ++j) { for (unsigned j = 0; j < ndata; ++j) {
preds[j] = base_gbm.Predict(data.data, j, -1); preds[j] = base_gbm.Predict(data.data, j, -1);
} }
} }
public: public:
/*! /*!
* \brief update the model for one iteration * \brief update the model for one iteration
* \param iteration iteration number * \param iteration iteration number
*/ */
virtual inline void UpdateInteract(std::string action){ virtual inline void UpdateInteract(std::string action){
this->InteractPredict(preds_, *train_, 0); this->InteractPredict(preds_, *train_, 0);
int buffer_offset = static_cast<int>(train_->Size()); int buffer_offset = static_cast<int>(train_->Size());
for (size_t i = 0; i < evals_.size(); ++i) { for (size_t i = 0; i < evals_.size(); ++i) {
std::vector<float> &preds = this->eval_preds_[i]; std::vector<float> &preds = this->eval_preds_[i];
this->InteractPredict(preds, *evals_[i], buffer_offset); this->InteractPredict(preds, *evals_[i], buffer_offset);
buffer_offset += static_cast<int>(evals_[i]->Size()); buffer_offset += static_cast<int>(evals_[i]->Size());
} }
if (action == "remove") { if (action == "remove") {
base_gbm.DelteBooster(); base_gbm.DelteBooster();
return; return;
} }
this->GetGradient(preds_, train_->labels, train_->group_index, grad_, hess_); this->GetGradient(preds_, train_->labels, train_->group_index, grad_, hess_);
std::vector<unsigned> root_index; std::vector<unsigned> root_index;
base_gbm.DoBoost(grad_, hess_, train_->data, root_index); base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
this->InteractRePredict(*train_, 0); this->InteractRePredict(*train_, 0);
buffer_offset = static_cast<int>(train_->Size()); buffer_offset = static_cast<int>(train_->Size());
for (size_t i = 0; i < evals_.size(); ++i) { for (size_t i = 0; i < evals_.size(); ++i) {
this->InteractRePredict(*evals_[i], buffer_offset); this->InteractRePredict(*evals_[i], buffer_offset);
buffer_offset += static_cast<int>(evals_[i]->Size()); buffer_offset += static_cast<int>(evals_[i]->Size());
} }
}; };
protected: protected:
/*! \brief get the intransformed predictions, given data */ /*! \brief get the intransformed predictions, given data */
inline void InteractPredict(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset) { inline void InteractPredict(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset) {
preds.resize(data.Size()); preds.resize(data.Size());
const unsigned ndata = static_cast<unsigned>(data.Size()); const unsigned ndata = static_cast<unsigned>(data.Size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for (unsigned j = 0; j < ndata; ++j) { for (unsigned j = 0; j < ndata; ++j) {
preds[j] = base_gbm.InteractPredict(data.data, j, buffer_offset + j); preds[j] = base_gbm.InteractPredict(data.data, j, buffer_offset + j);
} }
} }
/*! \brief repredict trial */ /*! \brief repredict trial */
inline void InteractRePredict(const xgboost::base::DMatrix &data, unsigned buffer_offset) { inline void InteractRePredict(const xgboost::base::DMatrix &data, unsigned buffer_offset) {
const unsigned ndata = static_cast<unsigned>(data.Size()); const unsigned ndata = static_cast<unsigned>(data.Size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for (unsigned j = 0; j < ndata; ++j) { for (unsigned j = 0; j < ndata; ++j) {
base_gbm.InteractRePredict(data.data, j, buffer_offset + j); base_gbm.InteractRePredict(data.data, j, buffer_offset + j);
} }
} }
/*! \brief get intransformed predictions, given data */ /*! \brief get intransformed predictions, given data */
virtual inline void PredictBuffer(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset) { virtual inline void PredictBuffer(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset) {
preds.resize(data.Size()); preds.resize(data.Size());
const unsigned ndata = static_cast<unsigned>(data.Size());
const unsigned ndata = static_cast<unsigned>(data.Size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for (unsigned j = 0; j < ndata; ++j) { for (unsigned j = 0; j < ndata; ++j) {
preds[j] = base_gbm.Predict(data.data, j, buffer_offset + j); preds[j] = base_gbm.Predict(data.data, j, buffer_offset + j);
} }
} }
/*! \brief get the first order and second order gradient, given the transformed predictions and labels */ /*! \brief get the first order and second order gradient, given the transformed predictions and labels */
virtual inline void GetGradient(const std::vector<float> &preds, virtual inline void GetGradient(const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
const std::vector<int> &group_index, const std::vector<int> &group_index,
std::vector<float> &grad, std::vector<float> &grad,
std::vector<float> &hess) {}; std::vector<float> &hess) {};
protected: protected:
/*! \brief training parameter for regression */ /*! \brief training parameter for regression */
struct ModelParam { struct ModelParam {
/* \brief type of loss function */ /* \brief type of loss function */
int loss_type; int loss_type;
/* \brief number of features */ /* \brief number of features */
int num_feature; int num_feature;
/*! \brief reserved field */ /*! \brief reserved field */
int reserved[16]; int reserved[16];
/*! \brief constructor */ /*! \brief constructor */
ModelParam(void) { ModelParam(void) {
loss_type = 0; loss_type = 0;
num_feature = 0; num_feature = 0;
memset(reserved, 0, sizeof(reserved)); memset(reserved, 0, sizeof(reserved));
} }
/*! /*!
* \brief set parameters from outside * \brief set parameters from outside
* \param name name of the parameter * \param name name of the parameter
* \param val value of the parameter * \param val value of the parameter
*/ */
inline void SetParam(const char *name, const char *val) { inline void SetParam(const char *name, const char *val) {
if (!strcmp("loss_type", name)) loss_type = atoi(val); if (!strcmp("loss_type", name)) loss_type = atoi(val);
if (!strcmp("bst:num_feature", name)) num_feature = atoi(val); if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
} }
}; };
int silent; int silent;
booster::GBMBase base_gbm; booster::GBMBase base_gbm;
ModelParam mparam; ModelParam mparam;
const DMatrix *train_; const DMatrix *train_;
std::vector<DMatrix *> evals_; std::vector<DMatrix *> evals_;
std::vector<std::string> evname_; std::vector<std::string> evname_;
std::vector<unsigned> buffer_index_; std::vector<unsigned> buffer_index_;
std::vector<float> grad_, hess_, preds_; std::vector<float> grad_, hess_, preds_;
std::vector< std::vector<float> > eval_preds_; std::vector< std::vector<float> > eval_preds_;
}; };
} }
}; };
#endif #endif

View File

@ -18,133 +18,133 @@
#include "../base/xgboost_learner.h" #include "../base/xgboost_learner.h"
namespace xgboost { namespace xgboost {
namespace rank { namespace rank {
/*! \brief class for gradient boosted regression */ /*! \brief class for gradient boosted regression */
class RankBoostLearner :public base::BoostLearner{ class RankBoostLearner :public base::BoostLearner{
public: public:
/*! \brief constructor */ /*! \brief constructor */
RankBoostLearner(void) { RankBoostLearner(void) {
BoostLearner(); BoostLearner();
} }
/*! /*!
* \brief a rank booster associated with training and evaluating data * \brief a rank booster associated with training and evaluating data
* \param train pointer to the training data * \param train pointer to the training data
* \param evals array of evaluating data * \param evals array of evaluating data
* \param evname name of evaluation data, used print statistics * \param evname name of evaluation data, used print statistics
*/ */
RankBoostLearner(const base::DMatrix *train, RankBoostLearner(const base::DMatrix *train,
const std::vector<base::DMatrix *> &evals, const std::vector<base::DMatrix *> &evals,
const std::vector<std::string> &evname) { const std::vector<std::string> &evname) {
BoostLearner(train, evals, evname); BoostLearner(train, evals, evname);
} }
/*! /*!
* \brief initialize solver before training, called before training * \brief initialize solver before training, called before training
* this function is reserved for solver to allocate necessary space * this function is reserved for solver to allocate necessary space
* and do other preparation * and do other preparation
*/ */
inline void InitTrainer(void) { inline void InitTrainer(void) {
BoostLearner::InitTrainer(); BoostLearner::InitTrainer();
if (mparam.loss_type == PAIRWISE) { if (mparam.loss_type == PAIRWISE) {
evaluator_.AddEval("PAIR"); evaluator_.AddEval("PAIR");
} }
else if (mparam.loss_type == MAP) { else if (mparam.loss_type == MAP) {
evaluator_.AddEval("MAP"); evaluator_.AddEval("MAP");
} }
else { else {
evaluator_.AddEval("NDCG"); evaluator_.AddEval("NDCG");
} }
evaluator_.Init(); evaluator_.Init();
} }
void EvalOneIter(int iter, FILE *fo = stderr) { void EvalOneIter(int iter, FILE *fo = stderr) {
fprintf(fo, "[%d]", iter); fprintf(fo, "[%d]", iter);
int buffer_offset = static_cast<int>(train_->Size()); int buffer_offset = static_cast<int>(train_->Size());
for (size_t i = 0; i < evals_.size(); ++i) { for (size_t i = 0; i < evals_.size(); ++i) {
std::vector<float> &preds = this->eval_preds_[i]; std::vector<float> &preds = this->eval_preds_[i];
this->PredictBuffer(preds, *evals_[i], buffer_offset); this->PredictBuffer(preds, *evals_[i], buffer_offset);
evaluator_.Eval(fo, evname_[i].c_str(), preds, (*evals_[i]).labels, (*evals_[i]).group_index); evaluator_.Eval(fo, evname_[i].c_str(), preds, (*evals_[i]).labels, (*evals_[i]).group_index);
buffer_offset += static_cast<int>(evals_[i]->Size()); buffer_offset += static_cast<int>(evals_[i]->Size());
} }
fprintf(fo, "\n"); fprintf(fo, "\n");
} }
inline void SetParam(const char *name, const char *val){ inline void SetParam(const char *name, const char *val){
if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val); if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
if (!strcmp(name, "rank:sampler")) sampler.AssignSampler(atoi(val)); if (!strcmp(name, "rank:sampler")) sampler.AssignSampler(atoi(val));
} }
/*! \brief get the first order and second order gradient, given the transformed predictions and labels */ /*! \brief get the first order and second order gradient, given the transformed predictions and labels */
inline void GetGradient(const std::vector<float> &preds, inline void GetGradient(const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
const std::vector<int> &group_index, const std::vector<int> &group_index,
std::vector<float> &grad, std::vector<float> &grad,
std::vector<float> &hess) { std::vector<float> &hess) {
grad.resize(preds.size()); grad.resize(preds.size());
hess.resize(preds.size()); hess.resize(preds.size());
bool j_better; bool j_better;
float pred_diff, pred_diff_exp, first_order_gradient, second_order_gradient; float pred_diff, pred_diff_exp, first_order_gradient, second_order_gradient;
for (int i = 0; i < group_index.size() - 1; i++){ for (int i = 0; i < group_index.size() - 1; i++){
sample::Pairs pairs = sampler.GenPairs(preds, labels, group_index[i], group_index[i + 1]); sample::Pairs pairs = sampler.GenPairs(preds, labels, group_index[i], group_index[i + 1]);
for (int j = group_index[i]; j < group_index[i + 1]; j++){ for (int j = group_index[i]; j < group_index[i + 1]; j++){
std::vector<int> pair_instance = pairs.GetPairs(j); std::vector<int> pair_instance = pairs.GetPairs(j);
for (int k = 0; k < pair_instance.size(); k++){ for (int k = 0; k < pair_instance.size(); k++){
j_better = labels[j] > labels[pair_instance[k]]; j_better = labels[j] > labels[pair_instance[k]];
if (j_better){ if (j_better){
pred_diff = preds[preds[j] - pair_instance[k]]; pred_diff = preds[preds[j] - pair_instance[k]];
pred_diff_exp = j_better ? expf(-pred_diff) : expf(pred_diff); pred_diff_exp = j_better ? expf(-pred_diff) : expf(pred_diff);
first_order_gradient = FirstOrderGradient(pred_diff_exp); first_order_gradient = FirstOrderGradient(pred_diff_exp);
second_order_gradient = 2 * SecondOrderGradient(pred_diff_exp); second_order_gradient = 2 * SecondOrderGradient(pred_diff_exp);
hess[j] += second_order_gradient; hess[j] += second_order_gradient;
grad[j] += first_order_gradient; grad[j] += first_order_gradient;
hess[pair_instance[k]] += second_order_gradient; hess[pair_instance[k]] += second_order_gradient;
grad[pair_instance[k]] += -first_order_gradient; grad[pair_instance[k]] += -first_order_gradient;
} }
} }
} }
} }
} }
inline void UpdateInteract(std::string action) { inline void UpdateInteract(std::string action) {
} }
private: private:
enum LossType { enum LossType {
PAIRWISE = 0, PAIRWISE = 0,
MAP = 1, MAP = 1,
NDCG = 2 NDCG = 2
}; };
/*! /*!
* \brief calculate first order gradient of pairwise loss function(f(x) = ln(1+exp(-x)), * \brief calculate first order gradient of pairwise loss function(f(x) = ln(1+exp(-x)),
* given the exponential of the difference of intransformed pair predictions * given the exponential of the difference of intransformed pair predictions
* \param the intransformed prediction of positive instance * \param the intransformed prediction of positive instance
* \param the intransformed prediction of negative instance * \param the intransformed prediction of negative instance
* \return first order gradient * \return first order gradient
*/ */
inline float FirstOrderGradient(float pred_diff_exp) const { inline float FirstOrderGradient(float pred_diff_exp) const {
return -pred_diff_exp / (1 + pred_diff_exp); return -pred_diff_exp / (1 + pred_diff_exp);
} }
/*! /*!
* \brief calculate second order gradient of pairwise loss function(f(x) = ln(1+exp(-x)), * \brief calculate second order gradient of pairwise loss function(f(x) = ln(1+exp(-x)),
* given the exponential of the difference of intransformed pair predictions * given the exponential of the difference of intransformed pair predictions
* \param the intransformed prediction of positive instance * \param the intransformed prediction of positive instance
* \param the intransformed prediction of negative instance * \param the intransformed prediction of negative instance
* \return second order gradient * \return second order gradient
*/ */
inline float SecondOrderGradient(float pred_diff_exp) const { inline float SecondOrderGradient(float pred_diff_exp) const {
return pred_diff_exp / pow(1 + pred_diff_exp, 2); return pred_diff_exp / pow(1 + pred_diff_exp, 2);
} }
private: private:
RankEvalSet evaluator_; RankEvalSet evaluator_;
sample::PairSamplerWrapper sampler; sample::PairSamplerWrapper sampler;
}; };
}; };
}; };
#endif #endif

View File

@ -13,170 +13,170 @@
#include "../utils/xgboost_omp.h" #include "../utils/xgboost_omp.h"
namespace xgboost { namespace xgboost {
namespace rank { namespace rank {
/*! \brief evaluator that evaluates the loss metrics */ /*! \brief evaluator that evaluates the loss metrics */
class IRankEvaluator { class IRankEvaluator {
public: public:
/*! /*!
* \brief evaluate a specific metric * \brief evaluate a specific metric
* \param preds prediction * \param preds prediction
* \param labels label * \param labels label
*/ */
virtual float Eval(const std::vector<float> &preds, virtual float Eval(const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
const std::vector<int> &group_index) const = 0; const std::vector<int> &group_index) const = 0;
/*! \return name of metric */ /*! \return name of metric */
virtual const char *Name(void) const = 0; virtual const char *Name(void) const = 0;
}; };
class Pair{ class Pair{
public: public:
float key_; float key_;
float value_; float value_;
Pair(float key, float value){ Pair(float key, float value){
key_ = key; key_ = key;
value_ = value_; value_ = value_;
} }
}; };
bool PairKeyComparer(const Pair &a, const Pair &b){ bool PairKeyComparer(const Pair &a, const Pair &b){
return a.key_ < b.key_; return a.key_ < b.key_;
} }
bool PairValueComparer(const Pair &a, const Pair &b){ bool PairValueComparer(const Pair &a, const Pair &b){
return a.value_ < b.value_; return a.value_ < b.value_;
} }
/*! \brief Mean Average Precision */ /*! \brief Mean Average Precision */
class EvalMAP : public IRankEvaluator { class EvalMAP : public IRankEvaluator {
public: public:
float Eval(const std::vector<float> &preds, float Eval(const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
const std::vector<int> &group_index) const { const std::vector<int> &group_index) const {
float acc = 0; float acc = 0;
std::vector<Pair> pairs_sort; std::vector<Pair> pairs_sort;
for (int i = 0; i < group_index.size() - 1; i++){ for (int i = 0; i < group_index.size() - 1; i++){
for (int j = group_index[i]; j < group_index[i + 1]; j++){ for (int j = group_index[i]; j < group_index[i + 1]; j++){
Pair pair(preds[j], labels[j]); Pair pair(preds[j], labels[j]);
pairs_sort.push_back(pair); pairs_sort.push_back(pair);
} }
acc += average_precision(pairs_sort); acc += average_precision(pairs_sort);
} }
return acc / (group_index.size() - 1); return acc / (group_index.size() - 1);
} }
virtual const char *Name(void) const { virtual const char *Name(void) const {
return "MAP"; return "MAP";
} }
float average_precision(std::vector<Pair> pairs_sort) const{ float average_precision(std::vector<Pair> pairs_sort) const{
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer); std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
float hits = 0; float hits = 0;
float average_precision = 0; float average_precision = 0;
for (int j = 0; j < pairs_sort.size(); j++){ for (int j = 0; j < pairs_sort.size(); j++){
if (pairs_sort[j].value_ == 1){ if (pairs_sort[j].value_ == 1){
hits++; hits++;
average_precision += hits / (j + 1); average_precision += hits / (j + 1);
} }
} }
if (hits != 0) average_precision /= hits; if (hits != 0) average_precision /= hits;
return average_precision; return average_precision;
} }
}; };
class EvalPair : public IRankEvaluator{ class EvalPair : public IRankEvaluator{
public: public:
float Eval(const std::vector<float> &preds, float Eval(const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
const std::vector<int> &group_index) const { const std::vector<int> &group_index) const {
return 0; return 0;
} }
const char *Name(void) const { const char *Name(void) const {
return "PAIR"; return "PAIR";
} }
}; };
/*! \brief Normalized DCG */ /*! \brief Normalized DCG */
class EvalNDCG : public IRankEvaluator { class EvalNDCG : public IRankEvaluator {
public: public:
float Eval(const std::vector<float> &preds, float Eval(const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
const std::vector<int> &group_index) const { const std::vector<int> &group_index) const {
if (group_index.size() <= 1) return 0; if (group_index.size() <= 1) return 0;
float acc = 0; float acc = 0;
std::vector<Pair> pairs_sort; std::vector<Pair> pairs_sort;
for (int i = 0; i < group_index.size() - 1; i++){ for (int i = 0; i < group_index.size() - 1; i++){
for (int j = group_index[i]; j < group_index[i + 1]; j++){ for (int j = group_index[i]; j < group_index[i + 1]; j++){
Pair pair(preds[j], labels[j]); Pair pair(preds[j], labels[j]);
pairs_sort.push_back(pair); pairs_sort.push_back(pair);
} }
acc += NDCG(pairs_sort); acc += NDCG(pairs_sort);
} }
return acc / (group_index.size() - 1); return acc / (group_index.size() - 1);
} }
float NDCG(std::vector<Pair> pairs_sort) const{ float NDCG(std::vector<Pair> pairs_sort) const{
std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer); std::sort(pairs_sort.begin(), pairs_sort.end(), PairKeyComparer);
float dcg = DCG(pairs_sort); float dcg = DCG(pairs_sort);
std::sort(pairs_sort.begin(), pairs_sort.end(), PairValueComparer); std::sort(pairs_sort.begin(), pairs_sort.end(), PairValueComparer);
float IDCG = DCG(pairs_sort); float IDCG = DCG(pairs_sort);
if (IDCG == 0) return 0; if (IDCG == 0) return 0;
return dcg / IDCG; return dcg / IDCG;
} }
float DCG(std::vector<Pair> pairs_sort) const{ float DCG(std::vector<Pair> pairs_sort) const{
float ans = 0.0; float ans = 0.0;
ans += pairs_sort[0].value_; ans += pairs_sort[0].value_;
for (int i = 1; i < pairs_sort.size(); i++){ for (int i = 1; i < pairs_sort.size(); i++){
ans += pairs_sort[i].value_ / log(i + 1); ans += pairs_sort[i].value_ / log(i + 1);
} }
return ans; return ans;
} }
virtual const char *Name(void) const { virtual const char *Name(void) const {
return "NDCG"; return "NDCG";
} }
}; };
}; };
namespace rank { namespace rank {
/*! \brief a set of evaluators */ /*! \brief a set of evaluators */
class RankEvalSet { class RankEvalSet {
public: public:
inline void AddEval(const char *name) { inline void AddEval(const char *name) {
if (!strcmp(name, "PAIR")) evals_.push_back(&pair_); if (!strcmp(name, "PAIR")) evals_.push_back(&pair_);
if (!strcmp(name, "MAP")) evals_.push_back(&map_); if (!strcmp(name, "MAP")) evals_.push_back(&map_);
if (!strcmp(name, "NDCG")) evals_.push_back(&ndcg_); if (!strcmp(name, "NDCG")) evals_.push_back(&ndcg_);
} }
inline void Init(void) { inline void Init(void) {
std::sort(evals_.begin(), evals_.end()); std::sort(evals_.begin(), evals_.end());
evals_.resize(std::unique(evals_.begin(), evals_.end()) - evals_.begin()); evals_.resize(std::unique(evals_.begin(), evals_.end()) - evals_.begin());
} }
inline void Eval(FILE *fo, const char *evname, inline void Eval(FILE *fo, const char *evname,
const std::vector<float> &preds, const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
const std::vector<int> &group_index) const { const std::vector<int> &group_index) const {
for (size_t i = 0; i < evals_.size(); ++i) { for (size_t i = 0; i < evals_.size(); ++i) {
float res = evals_[i]->Eval(preds, labels, group_index); float res = evals_[i]->Eval(preds, labels, group_index);
fprintf(fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res); fprintf(fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res);
} }
} }
private: private:
EvalPair pair_; EvalPair pair_;
EvalMAP map_; EvalMAP map_;
EvalNDCG ndcg_; EvalNDCG ndcg_;
std::vector<const IRankEvaluator*> evals_; std::vector<const IRankEvaluator*> evals_;
}; };
}; };
}; };
#endif #endif

View File

@ -11,20 +11,11 @@
#include "../base/xgboost_boost_task.h" #include "../base/xgboost_boost_task.h"
#include "xgboost_rank.h" #include "xgboost_rank.h"
#include "../regression/xgboost_reg.h" #include "../regression/xgboost_reg.h"
#include "../regression/xgboost_reg_main.cpp"
#include "../base/xgboost_data_instance.h"
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
xgboost::random::Seed(0);
xgboost::random::Seed(0); xgboost::base::BoostTask rank_tsk;
xgboost::base::BoostTask tsk; rank_tsk.SetLearner(new xgboost::rank::RankBoostLearner);
xgboost::utils::ConfigIterator itr(argv[1]); return rank_tsk.Run(argc, argv);
/* int learner_index = 0;
while (itr.Next()){
if (!strcmp(itr.name(), "learning_task")){
learner_index = atoi(itr.val());
}
}*/
xgboost::rank::RankBoostLearner* rank_learner = new xgboost::rank::RankBoostLearner;
xgboost::base::BoostLearner *parent = static_cast<xgboost::base::BoostLearner*>(rank_learner);
tsk.SetLearner(parent);
return tsk.Run(argc, argv);
} }

View File

@ -5,123 +5,124 @@
#include"../utils/xgboost_utils.h" #include"../utils/xgboost_utils.h"
namespace xgboost { namespace xgboost {
namespace rank { namespace rank {
namespace sample { namespace sample {
/* /*
* \brief the data structure to maintain the sample pairs * \brief the data structure to maintain the sample pairs
*/ */
struct Pairs { struct Pairs {
/* /*
* \brief constructor given the start and end offset of the sampling group * \brief constructor given the start and end offset of the sampling group
* in overall instances * in overall instances
* \param start the begin index of the group * \param start the begin index of the group
* \param end the end index of the group * \param end the end index of the group
*/ */
Pairs(int start,int end):start_(start),end_(end_){ Pairs(int start, int end) :start_(start), end_(end_){
for(int i = start; i < end; i++){ for (int i = start; i < end; i++){
std::vector<int> v; std::vector<int> v;
pairs_.push_back(v); pairs_.push_back(v);
} }
} }
/* /*
* \brief retrieve the related pair information of an data instances * \brief retrieve the related pair information of an data instances
* \param index, the index of retrieved instance * \param index, the index of retrieved instance
* \return the index of instances paired * \return the index of instances paired
*/ */
std::vector<int> GetPairs(int index) { std::vector<int> GetPairs(int index) {
utils::Assert(index >= start_ && index < end_,"The query index out of sampling bound"); utils::Assert(index >= start_ && index < end_, "The query index out of sampling bound");
return pairs_[index-start_]; return pairs_[index - start_];
} }
/* /*
* \brief add in a sampled pair * \brief add in a sampled pair
* \param index the index of the instance to sample a friend * \param index the index of the instance to sample a friend
* \param paired_index the index of the instance sampled as a friend * \param paired_index the index of the instance sampled as a friend
*/ */
void push(int index,int paired_index){ void push(int index, int paired_index){
pairs_[index - start_].push_back(paired_index); pairs_[index - start_].push_back(paired_index);
} }
std::vector< std::vector<int> > pairs_; std::vector< std::vector<int> > pairs_;
int start_; int start_;
int end_; int end_;
}; };
/* /*
* \brief the interface of pair sampler * \brief the interface of pair sampler
*/ */
struct IPairSampler { struct IPairSampler {
/* /*
* \brief Generate sample pairs given the predcions, labels, the start and the end index * \brief Generate sample pairs given the predcions, labels, the start and the end index
* of a specified group * of a specified group
* \param preds, the predictions of all data instances * \param preds, the predictions of all data instances
* \param labels, the labels of all data instances * \param labels, the labels of all data instances
* \param start, the start index of a specified group * \param start, the start index of a specified group
* \param end, the end index of a specified group * \param end, the end index of a specified group
* \return the generated pairs * \return the generated pairs
*/ */
virtual Pairs GenPairs(const std::vector<float> &preds, virtual Pairs GenPairs(const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
int start,int end) = 0; int start, int end) = 0;
}; };
enum{ enum{
BINARY_LINEAR_SAMPLER BINARY_LINEAR_SAMPLER
}; };
/*! \brief A simple pair sampler when the rank relevence scale is binary /*! \brief A simple pair sampler when the rank relevence scale is binary
* for each positive instance, we will pick a negative * for each positive instance, we will pick a negative
* instance and add in a pair. When using binary linear sampler, * instance and add in a pair. When using binary linear sampler,
* we should guarantee the labels are 0 or 1 * we should guarantee the labels are 0 or 1
*/ */
struct BinaryLinearSampler:public IPairSampler{ struct BinaryLinearSampler :public IPairSampler{
virtual Pairs GenPairs(const std::vector<float> &preds, virtual Pairs GenPairs(const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
int start,int end) { int start, int end) {
Pairs pairs(start,end); Pairs pairs(start, end);
int pointer = 0, last_pointer = 0,index = start, interval = end - start; int pointer = 0, last_pointer = 0, index = start, interval = end - start;
for(int i = start; i < end; i++){ for (int i = start; i < end; i++){
if(labels[i] == 1){ if (labels[i] == 1){
while(true){ while (true){
index = (++pointer) % interval + start; index = (++pointer) % interval + start;
if(labels[index] == 0) break; if (labels[index] == 0) break;
if(pointer - last_pointer > interval) return pairs; if (pointer - last_pointer > interval) return pairs;
} }
pairs.push(i,index); pairs.push(i, index);
pairs.push(index,i); pairs.push(index, i);
last_pointer = pointer; last_pointer = pointer;
} }
} }
return pairs; return pairs;
} }
}; };
/*! \brief Pair Sampler Wrapper*/ /*! \brief Pair Sampler Wrapper*/
struct PairSamplerWrapper{ struct PairSamplerWrapper{
public: public:
inline void AssignSampler( int sampler_index ){ inline void AssignSampler(int sampler_index){
switch(sampler_index){ switch (sampler_index){
case BINARY_LINEAR_SAMPLER:sampler_ = &binary_linear_sampler;break; case BINARY_LINEAR_SAMPLER:sampler_ = &binary_linear_sampler; break;
default:utils::Error("Cannot find the specified sampler"); default:utils::Error("Cannot find the specified sampler");
} }
} }
Pairs GenPairs(const std::vector<float> &preds, Pairs GenPairs(const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
int start,int end){ int start, int end){
return sampler_->GenPairs(preds,labels,start,end); utils::Assert(sampler_ != NULL,"Not config the sampler yet. Add rank:sampler in the config file\n");
} return sampler_->GenPairs(preds, labels, start, end);
private: }
BinaryLinearSampler binary_linear_sampler; private:
IPairSampler *sampler_; BinaryLinearSampler binary_linear_sampler;
}; IPairSampler *sampler_;
};
}
} }
}
} }
#endif #endif

View File

@ -21,7 +21,7 @@ namespace xgboost{
class RegBoostLearner{ class RegBoostLearner{
public: public:
/*! \brief constructor */ /*! \brief constructor */
RegBoostLearner( void ){ RegBoostLearner(void){
silent = 0; silent = 0;
} }
/*! /*!
@ -30,11 +30,11 @@ namespace xgboost{
* \param evals array of evaluating data * \param evals array of evaluating data
* \param evname name of evaluation data, used print statistics * \param evname name of evaluation data, used print statistics
*/ */
RegBoostLearner( const DMatrix *train, RegBoostLearner(const DMatrix *train,
const std::vector<DMatrix *> &evals, const std::vector<DMatrix *> &evals,
const std::vector<std::string> &evname ){ const std::vector<std::string> &evname){
silent = 0; silent = 0;
this->SetData(train,evals,evname); this->SetData(train, evals, evname);
} }
/*! /*!
@ -43,66 +43,67 @@ namespace xgboost{
* \param evals array of evaluating data * \param evals array of evaluating data
* \param evname name of evaluation data, used print statistics * \param evname name of evaluation data, used print statistics
*/ */
inline void SetData( const DMatrix *train, inline void SetData(const DMatrix *train,
const std::vector<DMatrix *> &evals, const std::vector<DMatrix *> &evals,
const std::vector<std::string> &evname ){ const std::vector<std::string> &evname){
this->train_ = train; this->train_ = train;
this->evals_ = evals; this->evals_ = evals;
this->evname_ = evname; this->evname_ = evname;
// estimate feature bound // estimate feature bound
int num_feature = (int)(train->data.NumCol()); int num_feature = (int)(train->data.NumCol());
// assign buffer index // assign buffer index
unsigned buffer_size = static_cast<unsigned>( train->Size() ); unsigned buffer_size = static_cast<unsigned>(train->Size());
for( size_t i = 0; i < evals.size(); ++ i ){ for (size_t i = 0; i < evals.size(); ++i){
buffer_size += static_cast<unsigned>( evals[i]->Size() ); buffer_size += static_cast<unsigned>(evals[i]->Size());
num_feature = std::max( num_feature, (int)(evals[i]->data.NumCol()) ); num_feature = std::max(num_feature, (int)(evals[i]->data.NumCol()));
} }
char str_temp[25]; char str_temp[25];
if( num_feature > mparam.num_feature ){ if (num_feature > mparam.num_feature){
mparam.num_feature = num_feature; mparam.num_feature = num_feature;
sprintf( str_temp, "%d", num_feature ); sprintf(str_temp, "%d", num_feature);
base_gbm.SetParam( "bst:num_feature", str_temp ); base_gbm.SetParam("bst:num_feature", str_temp);
} }
sprintf( str_temp, "%u", buffer_size ); sprintf(str_temp, "%u", buffer_size);
base_gbm.SetParam( "num_pbuffer", str_temp ); base_gbm.SetParam("num_pbuffer", str_temp);
if( !silent ){ if (!silent){
printf( "buffer_size=%u\n", buffer_size ); printf("buffer_size=%u\n", buffer_size);
} }
// set eval_preds tmp sapce // set eval_preds tmp sapce
this->eval_preds_.resize( evals.size(), std::vector<float>() ); this->eval_preds_.resize(evals.size(), std::vector<float>());
} }
/*! /*!
* \brief set parameters from outside * \brief set parameters from outside
* \param name name of the parameter * \param name name of the parameter
* \param val value of the parameter * \param val value of the parameter
*/ */
inline void SetParam( const char *name, const char *val ){ inline void SetParam(const char *name, const char *val){
if( !strcmp( name, "silent") ) silent = atoi( val ); if (!strcmp(name, "silent")) silent = atoi(val);
if( !strcmp( name, "eval_metric") ) evaluator_.AddEval( val ); if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);
mparam.SetParam( name, val ); mparam.SetParam(name, val);
base_gbm.SetParam( name, val ); base_gbm.SetParam(name, val);
} }
/*! /*!
* \brief initialize solver before training, called before training * \brief initialize solver before training, called before training
* this function is reserved for solver to allocate necessary space and do other preparation * this function is reserved for solver to allocate necessary space and do other preparation
*/ */
inline void InitTrainer( void ){ inline void InitTrainer(void){
base_gbm.InitTrainer(); base_gbm.InitTrainer();
if( mparam.loss_type == kLogisticClassify ){ if (mparam.loss_type == kLogisticClassify){
evaluator_.AddEval( "error" ); evaluator_.AddEval("error");
}else{ }
evaluator_.AddEval( "rmse" ); else{
evaluator_.AddEval("rmse");
} }
evaluator_.Init(); evaluator_.Init();
} }
/*! /*!
* \brief initialize the current data storage for model, if the model is used first time, call this function * \brief initialize the current data storage for model, if the model is used first time, call this function
*/ */
inline void InitModel( void ){ inline void InitModel(void){
base_gbm.InitModel(); base_gbm.InitModel();
mparam.AdjustBase(); mparam.AdjustBase();
} }
@ -110,71 +111,71 @@ namespace xgboost{
* \brief load model from stream * \brief load model from stream
* \param fi input stream * \param fi input stream
*/ */
inline void LoadModel( utils::IStream &fi ){ inline void LoadModel(utils::IStream &fi){
base_gbm.LoadModel( fi ); base_gbm.LoadModel(fi);
utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 ); utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
} }
/*! /*!
* \brief DumpModel * \brief DumpModel
* \param fo text file * \param fo text file
* \param fmap feature map that may help give interpretations of feature * \param fmap feature map that may help give interpretations of feature
* \param with_stats whether print statistics as well * \param with_stats whether print statistics as well
*/ */
inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ){ inline void DumpModel(FILE *fo, const utils::FeatMap& fmap, bool with_stats){
base_gbm.DumpModel( fo, fmap, with_stats ); base_gbm.DumpModel(fo, fmap, with_stats);
} }
/*! /*!
* \brief Dump path of all trees * \brief Dump path of all trees
* \param fo text file * \param fo text file
* \param data input data * \param data input data
*/ */
inline void DumpPath( FILE *fo, const DMatrix &data ){ inline void DumpPath(FILE *fo, const DMatrix &data){
base_gbm.DumpPath( fo, data.data ); base_gbm.DumpPath(fo, data.data);
} }
/*! /*!
* \brief save model to stream * \brief save model to stream
* \param fo output stream * \param fo output stream
*/ */
inline void SaveModel( utils::IStream &fo ) const{ inline void SaveModel(utils::IStream &fo) const{
base_gbm.SaveModel( fo ); base_gbm.SaveModel(fo);
fo.Write( &mparam, sizeof(ModelParam) ); fo.Write(&mparam, sizeof(ModelParam));
} }
/*! /*!
* \brief update the model for one iteration * \brief update the model for one iteration
* \param iteration iteration number * \param iteration iteration number
*/ */
inline void UpdateOneIter( int iter ){ inline void UpdateOneIter(int iter){
this->PredictBuffer( preds_, *train_, 0 ); this->PredictBuffer(preds_, *train_, 0);
this->GetGradient( preds_, train_->labels, grad_, hess_ ); this->GetGradient(preds_, train_->labels, grad_, hess_);
std::vector<unsigned> root_index; std::vector<unsigned> root_index;
base_gbm.DoBoost( grad_, hess_, train_->data, root_index ); base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
} }
/*! /*!
* \brief evaluate the model for specific iteration * \brief evaluate the model for specific iteration
* \param iter iteration number * \param iter iteration number
* \param fo file to output log * \param fo file to output log
*/ */
inline void EvalOneIter( int iter, FILE *fo = stderr ){ inline void EvalOneIter(int iter, FILE *fo = stderr){
fprintf( fo, "[%d]", iter ); fprintf(fo, "[%d]", iter);
int buffer_offset = static_cast<int>( train_->Size() ); int buffer_offset = static_cast<int>(train_->Size());
for( size_t i = 0; i < evals_.size(); ++i ){ for (size_t i = 0; i < evals_.size(); ++i){
std::vector<float> &preds = this->eval_preds_[ i ]; std::vector<float> &preds = this->eval_preds_[i];
this->PredictBuffer( preds, *evals_[i], buffer_offset); this->PredictBuffer(preds, *evals_[i], buffer_offset);
evaluator_.Eval( fo, evname_[i].c_str(), preds, (*evals_[i]).labels ); evaluator_.Eval(fo, evname_[i].c_str(), preds, (*evals_[i]).labels);
buffer_offset += static_cast<int>( evals_[i]->Size() ); buffer_offset += static_cast<int>(evals_[i]->Size());
} }
fprintf( fo,"\n" ); fprintf(fo, "\n");
} }
/*! \brief get prediction, without buffering */ /*! \brief get prediction, without buffering */
inline void Predict( std::vector<float> &preds, const DMatrix &data ){ inline void Predict(std::vector<float> &preds, const DMatrix &data){
preds.resize( data.Size() ); preds.resize(data.Size());
const unsigned ndata = static_cast<unsigned>( data.Size() ); const unsigned ndata = static_cast<unsigned>(data.Size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for( unsigned j = 0; j < ndata; ++ j ){ for (unsigned j = 0; j < ndata; ++j){
preds[j] = mparam.PredTransform preds[j] = mparam.PredTransform
( mparam.base_score + base_gbm.Predict( data.data, j, -1 ) ); (mparam.base_score + base_gbm.Predict(data.data, j, -1));
} }
} }
public: public:
@ -182,75 +183,75 @@ namespace xgboost{
* \brief update the model for one iteration * \brief update the model for one iteration
* \param iteration iteration number * \param iteration iteration number
*/ */
inline void UpdateInteract( std::string action ){ inline void UpdateInteract(std::string action){
this->InteractPredict( preds_, *train_, 0 ); this->InteractPredict(preds_, *train_, 0);
int buffer_offset = static_cast<int>( train_->Size() ); int buffer_offset = static_cast<int>(train_->Size());
for( size_t i = 0; i < evals_.size(); ++i ){ for (size_t i = 0; i < evals_.size(); ++i){
std::vector<float> &preds = this->eval_preds_[ i ]; std::vector<float> &preds = this->eval_preds_[i];
this->InteractPredict( preds, *evals_[i], buffer_offset ); this->InteractPredict(preds, *evals_[i], buffer_offset);
buffer_offset += static_cast<int>( evals_[i]->Size() ); buffer_offset += static_cast<int>(evals_[i]->Size());
} }
if( action == "remove" ){ if (action == "remove"){
base_gbm.DelteBooster(); return; base_gbm.DelteBooster(); return;
} }
this->GetGradient( preds_, train_->labels, grad_, hess_ ); this->GetGradient(preds_, train_->labels, grad_, hess_);
std::vector<unsigned> root_index; std::vector<unsigned> root_index;
base_gbm.DoBoost( grad_, hess_, train_->data, root_index ); base_gbm.DoBoost(grad_, hess_, train_->data, root_index);
this->InteractRePredict( *train_, 0 ); this->InteractRePredict(*train_, 0);
buffer_offset = static_cast<int>( train_->Size() ); buffer_offset = static_cast<int>(train_->Size());
for( size_t i = 0; i < evals_.size(); ++i ){ for (size_t i = 0; i < evals_.size(); ++i){
this->InteractRePredict( *evals_[i], buffer_offset ); this->InteractRePredict(*evals_[i], buffer_offset);
buffer_offset += static_cast<int>( evals_[i]->Size() ); buffer_offset += static_cast<int>(evals_[i]->Size());
} }
} }
private: private:
/*! \brief get the transformed predictions, given data */ /*! \brief get the transformed predictions, given data */
inline void InteractPredict( std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset ){ inline void InteractPredict(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset){
preds.resize( data.Size() ); preds.resize(data.Size());
const unsigned ndata = static_cast<unsigned>( data.Size() ); const unsigned ndata = static_cast<unsigned>(data.Size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for( unsigned j = 0; j < ndata; ++ j ){ for (unsigned j = 0; j < ndata; ++j){
preds[j] = mparam.PredTransform preds[j] = mparam.PredTransform
( mparam.base_score + base_gbm.InteractPredict( data.data, j, buffer_offset + j ) ); (mparam.base_score + base_gbm.InteractPredict(data.data, j, buffer_offset + j));
} }
} }
/*! \brief repredict trial */ /*! \brief repredict trial */
inline void InteractRePredict( const DMatrix &data, unsigned buffer_offset ){ inline void InteractRePredict(const DMatrix &data, unsigned buffer_offset){
const unsigned ndata = static_cast<unsigned>( data.Size() ); const unsigned ndata = static_cast<unsigned>(data.Size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for( unsigned j = 0; j < ndata; ++ j ){ for (unsigned j = 0; j < ndata; ++j){
base_gbm.InteractRePredict( data.data, j, buffer_offset + j ); base_gbm.InteractRePredict(data.data, j, buffer_offset + j);
} }
} }
private: private:
/*! \brief get the transformed predictions, given data */ /*! \brief get the transformed predictions, given data */
inline void PredictBuffer( std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset ){ inline void PredictBuffer(std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset){
preds.resize( data.Size() ); preds.resize(data.Size());
const unsigned ndata = static_cast<unsigned>( data.Size() ); const unsigned ndata = static_cast<unsigned>(data.Size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for( unsigned j = 0; j < ndata; ++ j ){ for (unsigned j = 0; j < ndata; ++j){
preds[j] = mparam.PredTransform preds[j] = mparam.PredTransform
( mparam.base_score + base_gbm.Predict( data.data, j, buffer_offset + j ) ); (mparam.base_score + base_gbm.Predict(data.data, j, buffer_offset + j));
} }
} }
/*! \brief get the first order and second order gradient, given the transformed predictions and labels */ /*! \brief get the first order and second order gradient, given the transformed predictions and labels */
inline void GetGradient( const std::vector<float> &preds, inline void GetGradient(const std::vector<float> &preds,
const std::vector<float> &labels, const std::vector<float> &labels,
std::vector<float> &grad, std::vector<float> &grad,
std::vector<float> &hess ){ std::vector<float> &hess){
grad.resize( preds.size() ); hess.resize( preds.size() ); grad.resize(preds.size()); hess.resize(preds.size());
const unsigned ndata = static_cast<unsigned>( preds.size() ); const unsigned ndata = static_cast<unsigned>(preds.size());
#pragma omp parallel for schedule( static ) #pragma omp parallel for schedule( static )
for( unsigned j = 0; j < ndata; ++ j ){ for (unsigned j = 0; j < ndata; ++j){
grad[j] = mparam.FirstOrderGradient( preds[j], labels[j] ); grad[j] = mparam.FirstOrderGradient(preds[j], labels[j]);
hess[j] = mparam.SecondOrderGradient( preds[j], labels[j] ); hess[j] = mparam.SecondOrderGradient(preds[j], labels[j]);
} }
} }
@ -270,31 +271,31 @@ namespace xgboost{
/* \brief number of features */ /* \brief number of features */
int num_feature; int num_feature;
/*! \brief reserved field */ /*! \brief reserved field */
int reserved[ 16 ]; int reserved[16];
/*! \brief constructor */ /*! \brief constructor */
ModelParam( void ){ ModelParam(void){
base_score = 0.5f; base_score = 0.5f;
loss_type = 0; loss_type = 0;
num_feature = 0; num_feature = 0;
memset( reserved, 0, sizeof( reserved ) ); memset(reserved, 0, sizeof(reserved));
} }
/*! /*!
* \brief set parameters from outside * \brief set parameters from outside
* \param name name of the parameter * \param name name of the parameter
* \param val value of the parameter * \param val value of the parameter
*/ */
inline void SetParam( const char *name, const char *val ){ inline void SetParam(const char *name, const char *val){
if( !strcmp("base_score", name ) ) base_score = (float)atof( val ); if (!strcmp("base_score", name)) base_score = (float)atof(val);
if( !strcmp("loss_type", name ) ) loss_type = atoi( val ); if (!strcmp("loss_type", name)) loss_type = atoi(val);
if( !strcmp("bst:num_feature", name ) ) num_feature = atoi( val ); if (!strcmp("bst:num_feature", name)) num_feature = atoi(val);
} }
/*! /*!
* \brief adjust base_score * \brief adjust base_score
*/ */
inline void AdjustBase( void ){ inline void AdjustBase(void){
if( loss_type == 1 || loss_type == 2 ){ if (loss_type == 1 || loss_type == 2){
utils::Assert( base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain" ); utils::Assert(base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain");
base_score = - logf( 1.0f / base_score - 1.0f ); base_score = -logf(1.0f / base_score - 1.0f);
} }
} }
@ -303,11 +304,11 @@ namespace xgboost{
* \param x linear sum of boosting ensemble * \param x linear sum of boosting ensemble
* \return transformed prediction * \return transformed prediction
*/ */
inline float PredTransform( float x ){ inline float PredTransform(float x){
switch( loss_type ){ switch (loss_type){
case kLinearSquare: return x; case kLinearSquare: return x;
case kLogisticClassify: case kLogisticClassify:
case kLogisticNeglik: return 1.0f/(1.0f + expf(-x)); case kLogisticNeglik: return 1.0f / (1.0f + expf(-x));
default: utils::Error("unknown loss_type"); return 0.0f; default: utils::Error("unknown loss_type"); return 0.0f;
} }
} }
@ -318,8 +319,8 @@ namespace xgboost{
* \param label true label * \param label true label
* \return first order gradient * \return first order gradient
*/ */
inline float FirstOrderGradient( float predt, float label ) const{ inline float FirstOrderGradient(float predt, float label) const{
switch( loss_type ){ switch (loss_type){
case kLinearSquare: return predt - label; case kLinearSquare: return predt - label;
case kLogisticClassify: case kLogisticClassify:
case kLogisticNeglik: return predt - label; case kLogisticNeglik: return predt - label;
@ -332,11 +333,11 @@ namespace xgboost{
* \param label true label * \param label true label
* \return second order gradient * \return second order gradient
*/ */
inline float SecondOrderGradient( float predt, float label ) const{ inline float SecondOrderGradient(float predt, float label) const{
switch( loss_type ){ switch (loss_type){
case kLinearSquare: return 1.0f; case kLinearSquare: return 1.0f;
case kLogisticClassify: case kLogisticClassify:
case kLogisticNeglik: return predt * ( 1 - predt ); case kLogisticNeglik: return predt * (1 - predt);
default: utils::Error("unknown loss_type"); return 0.0f; default: utils::Error("unknown loss_type"); return 0.0f;
} }
} }
@ -348,10 +349,10 @@ namespace xgboost{
* \return the specified loss * \return the specified loss
*/ */
inline float Loss(const std::vector<float> &preds, const std::vector<float> &labels) const{ inline float Loss(const std::vector<float> &preds, const std::vector<float> &labels) const{
switch( loss_type ){ switch (loss_type){
case kLinearSquare: return SquareLoss(preds,labels); case kLinearSquare: return SquareLoss(preds, labels);
case kLogisticNeglik: case kLogisticNeglik:
case kLogisticClassify: return NegLoglikelihoodLoss(preds,labels); case kLogisticClassify: return NegLoglikelihoodLoss(preds, labels);
default: utils::Error("unknown loss_type"); return 0.0f; default: utils::Error("unknown loss_type"); return 0.0f;
} }
} }
@ -364,7 +365,7 @@ namespace xgboost{
*/ */
inline float SquareLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{ inline float SquareLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
float ans = 0.0; float ans = 0.0;
for(size_t i = 0; i < preds.size(); i++){ for (size_t i = 0; i < preds.size(); i++){
float dif = preds[i] - labels[i]; float dif = preds[i] - labels[i];
ans += dif * dif; ans += dif * dif;
} }
@ -379,8 +380,8 @@ namespace xgboost{
*/ */
inline float NegLoglikelihoodLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{ inline float NegLoglikelihoodLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
float ans = 0.0; float ans = 0.0;
for(size_t i = 0; i < preds.size(); i++) for (size_t i = 0; i < preds.size(); i++)
ans -= labels[i] * logf(preds[i]) + ( 1 - labels[i] ) * logf(1 - preds[i]); ans -= labels[i] * logf(preds[i]) + (1 - labels[i]) * logf(1 - preds[i]);
return ans; return ans;
} }
}; };

View File

@ -29,7 +29,7 @@ namespace xgboost{
std::vector<float> labels; std::vector<float> labels;
public: public:
/*! \brief default constructor */ /*! \brief default constructor */
DMatrix( void ){} DMatrix(void){}
/*! \brief get the number of instances */ /*! \brief get the number of instances */
inline size_t Size() const{ inline size_t Size() const{
@ -40,37 +40,38 @@ namespace xgboost{
* \param fname name of text data * \param fname name of text data
* \param silent whether print information or not * \param silent whether print information or not
*/ */
inline void LoadText( const char* fname, bool silent = false ){ inline void LoadText(const char* fname, bool silent = false){
data.Clear(); data.Clear();
FILE* file = utils::FopenCheck( fname, "r" ); FILE* file = utils::FopenCheck(fname, "r");
float label; bool init = true; float label; bool init = true;
char tmp[ 1024 ]; char tmp[1024];
std::vector<booster::bst_uint> findex; std::vector<booster::bst_uint> findex;
std::vector<booster::bst_float> fvalue; std::vector<booster::bst_float> fvalue;
while( fscanf( file, "%s", tmp ) == 1 ){ while (fscanf(file, "%s", tmp) == 1){
unsigned index; float value; unsigned index; float value;
if( sscanf( tmp, "%u:%f", &index, &value ) == 2 ){ if (sscanf(tmp, "%u:%f", &index, &value) == 2){
findex.push_back( index ); fvalue.push_back( value ); findex.push_back(index); fvalue.push_back(value);
}else{ }
if( !init ){ else{
labels.push_back( label ); if (!init){
data.AddRow( findex, fvalue ); labels.push_back(label);
data.AddRow(findex, fvalue);
} }
findex.clear(); fvalue.clear(); findex.clear(); fvalue.clear();
utils::Assert( sscanf( tmp, "%f", &label ) == 1, "invalid format" ); utils::Assert(sscanf(tmp, "%f", &label) == 1, "invalid format");
init = false; init = false;
} }
} }
labels.push_back( label ); labels.push_back(label);
data.AddRow( findex, fvalue ); data.AddRow(findex, fvalue);
// initialize column support as well // initialize column support as well
data.InitData(); data.InitData();
if( !silent ){ if (!silent){
printf("%ux%u matrix with %lu entries is loaded from %s\n", printf("%ux%u matrix with %lu entries is loaded from %s\n",
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname ); (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
} }
fclose(file); fclose(file);
} }
@ -80,20 +81,20 @@ namespace xgboost{
* \param silent whether print information or not * \param silent whether print information or not
* \return whether loading is success * \return whether loading is success
*/ */
inline bool LoadBinary( const char* fname, bool silent = false ){ inline bool LoadBinary(const char* fname, bool silent = false){
FILE *fp = fopen64( fname, "rb" ); FILE *fp = fopen64(fname, "rb");
if( fp == NULL ) return false; if (fp == NULL) return false;
utils::FileStream fs( fp ); utils::FileStream fs(fp);
data.LoadBinary( fs ); data.LoadBinary(fs);
labels.resize( data.NumRow() ); labels.resize(data.NumRow());
utils::Assert( fs.Read( &labels[0], sizeof(float) * data.NumRow() ) != 0, "DMatrix LoadBinary" ); utils::Assert(fs.Read(&labels[0], sizeof(float)* data.NumRow()) != 0, "DMatrix LoadBinary");
fs.Close(); fs.Close();
// initialize column support as well // initialize column support as well
data.InitData(); data.InitData();
if( !silent ){ if (!silent){
printf("%ux%u matrix with %lu entries is loaded from %s\n", printf("%ux%u matrix with %lu entries is loaded from %s\n",
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname ); (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
} }
return true; return true;
} }
@ -102,17 +103,17 @@ namespace xgboost{
* \param fname name of binary data * \param fname name of binary data
* \param silent whether print information or not * \param silent whether print information or not
*/ */
inline void SaveBinary( const char* fname, bool silent = false ){ inline void SaveBinary(const char* fname, bool silent = false){
// initialize column support as well // initialize column support as well
data.InitData(); data.InitData();
utils::FileStream fs( utils::FopenCheck( fname, "wb" ) ); utils::FileStream fs(utils::FopenCheck(fname, "wb"));
data.SaveBinary( fs ); data.SaveBinary(fs);
fs.Write( &labels[0], sizeof(float) * data.NumRow() ); fs.Write(&labels[0], sizeof(float)* data.NumRow());
fs.Close(); fs.Close();
if( !silent ){ if (!silent){
printf("%ux%u matrix with %lu entries is saved to %s\n", printf("%ux%u matrix with %lu entries is saved to %s\n",
(unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname ); (unsigned)data.NumRow(), (unsigned)data.NumCol(), (unsigned long)data.NumEntry(), fname);
} }
} }
/*! /*!
@ -124,26 +125,26 @@ namespace xgboost{
* \param silent whether print information or not * \param silent whether print information or not
* \param savebuffer whether do save binary buffer if it is text * \param savebuffer whether do save binary buffer if it is text
*/ */
inline void CacheLoad( const char *fname, bool silent = false, bool savebuffer = true ){ inline void CacheLoad(const char *fname, bool silent = false, bool savebuffer = true){
int len = strlen( fname ); int len = strlen(fname);
if( len > 8 && !strcmp( fname + len - 7, ".buffer") ){ if (len > 8 && !strcmp(fname + len - 7, ".buffer")){
this->LoadBinary( fname, silent ); return; this->LoadBinary(fname, silent); return;
} }
char bname[ 1024 ]; char bname[1024];
sprintf( bname, "%s.buffer", fname ); sprintf(bname, "%s.buffer", fname);
if( !this->LoadBinary( bname, silent ) ){ if (!this->LoadBinary(bname, silent)){
this->LoadText( fname, silent ); this->LoadText(fname, silent);
if( savebuffer ) this->SaveBinary( bname, silent ); if (savebuffer) this->SaveBinary(bname, silent);
} }
} }
private: private:
/*! \brief update num_feature info */ /*! \brief update num_feature info */
inline void UpdateInfo( void ){ inline void UpdateInfo(void){
this->num_feature = 0; this->num_feature = 0;
for( size_t i = 0; i < data.NumRow(); i ++ ){ for (size_t i = 0; i < data.NumRow(); i++){
booster::FMatrixS::Line sp = data[i]; booster::FMatrixS::Line sp = data[i];
for( unsigned j = 0; j < sp.len; j ++ ){ for (unsigned j = 0; j < sp.len; j++){
if( num_feature <= sp[j].findex ){ if (num_feature <= sp[j].findex){
num_feature = sp[j].findex + 1; num_feature = sp[j].findex + 1;
} }
} }

View File

@ -21,47 +21,48 @@ namespace xgboost{
* \param preds prediction * \param preds prediction
* \param labels label * \param labels label
*/ */
virtual float Eval( const std::vector<float> &preds, virtual float Eval(const std::vector<float> &preds,
const std::vector<float> &labels ) const= 0; const std::vector<float> &labels) const = 0;
/*! \return name of metric */ /*! \return name of metric */
virtual const char *Name( void ) const= 0; virtual const char *Name(void) const = 0;
}; };
/*! \brief RMSE */ /*! \brief RMSE */
struct EvalRMSE : public IEvaluator{ struct EvalRMSE : public IEvaluator{
virtual float Eval( const std::vector<float> &preds, virtual float Eval(const std::vector<float> &preds,
const std::vector<float> &labels ) const{ const std::vector<float> &labels) const{
const unsigned ndata = static_cast<unsigned>( preds.size() ); const unsigned ndata = static_cast<unsigned>(preds.size());
float sum = 0.0; float sum = 0.0;
#pragma omp parallel for reduction(+:sum) schedule( static ) #pragma omp parallel for reduction(+:sum) schedule( static )
for( unsigned i = 0; i < ndata; ++ i ){ for (unsigned i = 0; i < ndata; ++i){
float diff = preds[i] - labels[i]; float diff = preds[i] - labels[i];
sum += diff * diff; sum += diff * diff;
} }
return sqrtf( sum / ndata ); return sqrtf(sum / ndata);
} }
virtual const char *Name( void ) const{ virtual const char *Name(void) const{
return "rmse"; return "rmse";
} }
}; };
/*! \brief Error */ /*! \brief Error */
struct EvalError : public IEvaluator{ struct EvalError : public IEvaluator{
virtual float Eval( const std::vector<float> &preds, virtual float Eval(const std::vector<float> &preds,
const std::vector<float> &labels ) const{ const std::vector<float> &labels) const{
const unsigned ndata = static_cast<unsigned>( preds.size() ); const unsigned ndata = static_cast<unsigned>(preds.size());
unsigned nerr = 0; unsigned nerr = 0;
#pragma omp parallel for reduction(+:nerr) schedule( static ) #pragma omp parallel for reduction(+:nerr) schedule( static )
for( unsigned i = 0; i < ndata; ++ i ){ for (unsigned i = 0; i < ndata; ++i){
if( preds[i] > 0.5f ){ if (preds[i] > 0.5f){
if( labels[i] < 0.5f ) nerr += 1; if (labels[i] < 0.5f) nerr += 1;
}else{ }
if( labels[i] > 0.5f ) nerr += 1; else{
if (labels[i] > 0.5f) nerr += 1;
} }
} }
return static_cast<float>(nerr) / ndata; return static_cast<float>(nerr) / ndata;
} }
virtual const char *Name( void ) const{ virtual const char *Name(void) const{
return "error"; return "error";
} }
}; };
@ -69,19 +70,19 @@ namespace xgboost{
/*! \brief Error */ /*! \brief Error */
struct EvalLogLoss : public IEvaluator{ struct EvalLogLoss : public IEvaluator{
virtual float Eval( const std::vector<float> &preds, virtual float Eval(const std::vector<float> &preds,
const std::vector<float> &labels ) const{ const std::vector<float> &labels) const{
const unsigned ndata = static_cast<unsigned>( preds.size() ); const unsigned ndata = static_cast<unsigned>(preds.size());
unsigned nerr = 0; unsigned nerr = 0;
#pragma omp parallel for reduction(+:nerr) schedule( static ) #pragma omp parallel for reduction(+:nerr) schedule( static )
for( unsigned i = 0; i < ndata; ++ i ){ for (unsigned i = 0; i < ndata; ++i){
const float y = labels[i]; const float y = labels[i];
const float py = preds[i]; const float py = preds[i];
nerr -= y * std::log(py) + (1.0f-y)*std::log(1-py); nerr -= y * std::log(py) + (1.0f - y)*std::log(1 - py);
} }
return static_cast<float>(nerr) / ndata; return static_cast<float>(nerr) / ndata;
} }
virtual const char *Name( void ) const{ virtual const char *Name(void) const{
return "negllik"; return "negllik";
} }
}; };
@ -91,21 +92,21 @@ namespace xgboost{
/*! \brief a set of evaluators */ /*! \brief a set of evaluators */
struct EvalSet{ struct EvalSet{
public: public:
inline void AddEval( const char *name ){ inline void AddEval(const char *name){
if( !strcmp( name, "rmse") ) evals_.push_back( &rmse_ ); if (!strcmp(name, "rmse")) evals_.push_back(&rmse_);
if( !strcmp( name, "error") ) evals_.push_back( &error_ ); if (!strcmp(name, "error")) evals_.push_back(&error_);
if( !strcmp( name, "logloss") ) evals_.push_back( &logloss_ ); if (!strcmp(name, "logloss")) evals_.push_back(&logloss_);
} }
inline void Init( void ){ inline void Init(void){
std::sort( evals_.begin(), evals_.end() ); std::sort(evals_.begin(), evals_.end());
evals_.resize( std::unique( evals_.begin(), evals_.end() ) - evals_.begin() ); evals_.resize(std::unique(evals_.begin(), evals_.end()) - evals_.begin());
} }
inline void Eval( FILE *fo, const char *evname, inline void Eval(FILE *fo, const char *evname,
const std::vector<float> &preds, const std::vector<float> &preds,
const std::vector<float> &labels ) const{ const std::vector<float> &labels) const{
for( size_t i = 0; i < evals_.size(); ++ i ){ for (size_t i = 0; i < evals_.size(); ++i){
float res = evals_[i]->Eval( preds, labels ); float res = evals_[i]->Eval(preds, labels);
fprintf( fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res ); fprintf(fo, "\t%s-%s:%f", evname, evals_[i]->Name(), res);
} }
} }
private: private:

View File

@ -18,74 +18,75 @@ namespace xgboost{
*/ */
class RegBoostTask{ class RegBoostTask{
public: public:
inline int Run( int argc, char *argv[] ){ inline int Run(int argc, char *argv[]){
if( argc < 2 ){ if (argc < 2){
printf("Usage: <config>\n"); printf("Usage: <config>\n");
return 0; return 0;
} }
utils::ConfigIterator itr( argv[1] ); utils::ConfigIterator itr(argv[1]);
while( itr.Next() ){ while (itr.Next()){
this->SetParam( itr.name(), itr.val() ); this->SetParam(itr.name(), itr.val());
} }
for( int i = 2; i < argc; i ++ ){ for (int i = 2; i < argc; i++){
char name[256], val[256]; char name[256], val[256];
if( sscanf( argv[i], "%[^=]=%s", name, val ) == 2 ){ if (sscanf(argv[i], "%[^=]=%s", name, val) == 2){
this->SetParam( name, val ); this->SetParam(name, val);
} }
} }
this->InitData(); this->InitData();
this->InitLearner(); this->InitLearner();
if( task == "dump" ){ if (task == "dump"){
this->TaskDump(); this->TaskDump();
return 0; return 0;
} }
if( task == "interact" ){ if (task == "interact"){
this->TaskInteractive(); return 0; this->TaskInteractive(); return 0;
} }
if( task == "dumppath" ){ if (task == "dumppath"){
this->TaskDumpPath(); return 0; this->TaskDumpPath(); return 0;
} }
if( task == "eval" ){ if (task == "eval"){
this->TaskEval(); return 0; this->TaskEval(); return 0;
} }
if( task == "pred" ){ if (task == "pred"){
this->TaskPred(); this->TaskPred();
}else{ }
else{
this->TaskTrain(); this->TaskTrain();
} }
return 0; return 0;
} }
inline void SetParam( const char *name, const char *val ){ inline void SetParam(const char *name, const char *val){
if( !strcmp("silent", name ) ) silent = atoi( val ); if (!strcmp("silent", name)) silent = atoi(val);
if( !strcmp("use_buffer", name ) ) use_buffer = atoi( val ); if (!strcmp("use_buffer", name)) use_buffer = atoi(val);
if( !strcmp("seed", name ) ) random::Seed( atoi(val) ); if (!strcmp("seed", name)) random::Seed(atoi(val));
if( !strcmp("num_round", name ) ) num_round = atoi( val ); if (!strcmp("num_round", name)) num_round = atoi(val);
if( !strcmp("save_period", name ) ) save_period = atoi( val ); if (!strcmp("save_period", name)) save_period = atoi(val);
if( !strcmp("task", name ) ) task = val; if (!strcmp("task", name)) task = val;
if( !strcmp("data", name ) ) train_path = val; if (!strcmp("data", name)) train_path = val;
if( !strcmp("test:data", name ) ) test_path = val; if (!strcmp("test:data", name)) test_path = val;
if( !strcmp("model_in", name ) ) model_in = val; if (!strcmp("model_in", name)) model_in = val;
if( !strcmp("model_out", name ) ) model_out = val; if (!strcmp("model_out", name)) model_out = val;
if( !strcmp("model_dir", name ) ) model_dir_path = val; if (!strcmp("model_dir", name)) model_dir_path = val;
if( !strcmp("fmap", name ) ) name_fmap = val; if (!strcmp("fmap", name)) name_fmap = val;
if( !strcmp("name_dump", name ) ) name_dump = val; if (!strcmp("name_dump", name)) name_dump = val;
if( !strcmp("name_dumppath", name ) ) name_dumppath = val; if (!strcmp("name_dumppath", name)) name_dumppath = val;
if( !strcmp("name_pred", name ) ) name_pred = val; if (!strcmp("name_pred", name)) name_pred = val;
if( !strcmp("dump_stats", name ) ) dump_model_stats = atoi( val ); if (!strcmp("dump_stats", name)) dump_model_stats = atoi(val);
if( !strcmp("interact:action", name ) ) interact_action = val; if (!strcmp("interact:action", name)) interact_action = val;
if( !strncmp("batch:", name, 6 ) ){ if (!strncmp("batch:", name, 6)){
cfg_batch.PushBack( name + 6, val ); cfg_batch.PushBack(name + 6, val);
} }
if( !strncmp("eval[", name, 5 ) ) { if (!strncmp("eval[", name, 5)) {
char evname[ 256 ]; char evname[256];
utils::Assert( sscanf( name, "eval[%[^]]", evname ) == 1, "must specify evaluation name for display"); utils::Assert(sscanf(name, "eval[%[^]]", evname) == 1, "must specify evaluation name for display");
eval_data_names.push_back( std::string( evname ) ); eval_data_names.push_back(std::string(evname));
eval_data_paths.push_back( std::string( val ) ); eval_data_paths.push_back(std::string(val));
} }
cfg.PushBack( name, val ); cfg.PushBack(name, val);
} }
public: public:
RegBoostTask( void ){ RegBoostTask(void){
// default parameters // default parameters
silent = 0; silent = 0;
use_buffer = 1; use_buffer = 1;
@ -102,128 +103,132 @@ namespace xgboost{
model_dir_path = "./"; model_dir_path = "./";
interact_action = "update"; interact_action = "update";
} }
~RegBoostTask( void ){ ~RegBoostTask(void){
for( size_t i = 0; i < deval.size(); i ++ ){ for (size_t i = 0; i < deval.size(); i++){
delete deval[i]; delete deval[i];
} }
} }
private: private:
inline void InitData( void ){ inline void InitData(void){
if( name_fmap != "NULL" ) fmap.LoadText( name_fmap.c_str() ); if (name_fmap != "NULL") fmap.LoadText(name_fmap.c_str());
if( task == "dump" ) return; if (task == "dump") return;
if( task == "pred" || task == "dumppath" ){ if (task == "pred" || task == "dumppath"){
data.CacheLoad( test_path.c_str(), silent!=0, use_buffer!=0 ); data.CacheLoad(test_path.c_str(), silent != 0, use_buffer != 0);
}else{ }
else{
// training // training
data.CacheLoad( train_path.c_str(), silent!=0, use_buffer!=0 ); data.CacheLoad(train_path.c_str(), silent != 0, use_buffer != 0);
utils::Assert( eval_data_names.size() == eval_data_paths.size() ); utils::Assert(eval_data_names.size() == eval_data_paths.size());
for( size_t i = 0; i < eval_data_names.size(); ++ i ){ for (size_t i = 0; i < eval_data_names.size(); ++i){
deval.push_back( new DMatrix() ); deval.push_back(new DMatrix());
deval.back()->CacheLoad( eval_data_paths[i].c_str(), silent!=0, use_buffer!=0 ); deval.back()->CacheLoad(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0);
} }
} }
learner.SetData( &data, deval, eval_data_names ); learner.SetData(&data, deval, eval_data_names);
} }
inline void InitLearner( void ){ inline void InitLearner(void){
cfg.BeforeFirst(); cfg.BeforeFirst();
while( cfg.Next() ){ while (cfg.Next()){
learner.SetParam( cfg.name(), cfg.val() ); learner.SetParam(cfg.name(), cfg.val());
} }
if( model_in != "NULL" ){ if (model_in != "NULL"){
utils::FileStream fi( utils::FopenCheck( model_in.c_str(), "rb") ); utils::FileStream fi(utils::FopenCheck(model_in.c_str(), "rb"));
learner.LoadModel( fi ); learner.LoadModel(fi);
fi.Close(); fi.Close();
}else{ }
utils::Assert( task == "train", "model_in not specified" ); else{
utils::Assert(task == "train", "model_in not specified");
learner.InitModel(); learner.InitModel();
} }
learner.InitTrainer(); learner.InitTrainer();
} }
inline void TaskTrain( void ){ inline void TaskTrain(void){
const time_t start = time( NULL ); const time_t start = time(NULL);
unsigned long elapsed = 0; unsigned long elapsed = 0;
for( int i = 0; i < num_round; ++ i ){ for (int i = 0; i < num_round; ++i){
elapsed = (unsigned long)(time(NULL) - start); elapsed = (unsigned long)(time(NULL) - start);
if( !silent ) printf("boosting round %d, %lu sec elapsed\n", i , elapsed ); if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
learner.UpdateOneIter( i ); learner.UpdateOneIter(i);
learner.EvalOneIter( i ); learner.EvalOneIter(i);
if( save_period != 0 && (i+1) % save_period == 0 ){ if (save_period != 0 && (i + 1) % save_period == 0){
this->SaveModel( i ); this->SaveModel(i);
} }
elapsed = (unsigned long)(time(NULL) - start); elapsed = (unsigned long)(time(NULL) - start);
} }
// always save final round // always save final round
if( save_period == 0 || num_round % save_period != 0 ){ if (save_period == 0 || num_round % save_period != 0){
if( model_out == "NULL" ){ if (model_out == "NULL"){
this->SaveModel( num_round - 1 ); this->SaveModel(num_round - 1);
}else{ }
this->SaveModel( model_out.c_str() ); else{
this->SaveModel(model_out.c_str());
} }
} }
if( !silent ){ if (!silent){
printf("\nupdating end, %lu sec in all\n", elapsed ); printf("\nupdating end, %lu sec in all\n", elapsed);
} }
} }
inline void TaskEval( void ){ inline void TaskEval(void){
learner.EvalOneIter( 0 ); learner.EvalOneIter(0);
} }
inline void TaskInteractive( void ){ inline void TaskInteractive(void){
const time_t start = time( NULL ); const time_t start = time(NULL);
unsigned long elapsed = 0; unsigned long elapsed = 0;
int batch_action = 0; int batch_action = 0;
cfg_batch.BeforeFirst(); cfg_batch.BeforeFirst();
while( cfg_batch.Next() ){ while (cfg_batch.Next()){
if( !strcmp( cfg_batch.name(), "run" ) ){ if (!strcmp(cfg_batch.name(), "run")){
learner.UpdateInteract( interact_action ); learner.UpdateInteract(interact_action);
batch_action += 1; batch_action += 1;
} else{ }
learner.SetParam( cfg_batch.name(), cfg_batch.val() ); else{
learner.SetParam(cfg_batch.name(), cfg_batch.val());
} }
} }
if( batch_action == 0 ){ if (batch_action == 0){
learner.UpdateInteract( interact_action ); learner.UpdateInteract(interact_action);
} }
utils::Assert( model_out != "NULL", "interactive mode must specify model_out" ); utils::Assert(model_out != "NULL", "interactive mode must specify model_out");
this->SaveModel( model_out.c_str() ); this->SaveModel(model_out.c_str());
elapsed = (unsigned long)(time(NULL) - start); elapsed = (unsigned long)(time(NULL) - start);
if( !silent ){ if (!silent){
printf("\ninteractive update, %d batch actions, %lu sec in all\n", batch_action, elapsed ); printf("\ninteractive update, %d batch actions, %lu sec in all\n", batch_action, elapsed);
} }
} }
inline void TaskDump( void ){ inline void TaskDump(void){
FILE *fo = utils::FopenCheck( name_dump.c_str(), "w" ); FILE *fo = utils::FopenCheck(name_dump.c_str(), "w");
learner.DumpModel( fo, fmap, dump_model_stats != 0 ); learner.DumpModel(fo, fmap, dump_model_stats != 0);
fclose( fo ); fclose(fo);
} }
inline void TaskDumpPath( void ){ inline void TaskDumpPath(void){
FILE *fo = utils::FopenCheck( name_dumppath.c_str(), "w" ); FILE *fo = utils::FopenCheck(name_dumppath.c_str(), "w");
learner.DumpPath( fo, data ); learner.DumpPath(fo, data);
fclose( fo ); fclose(fo);
} }
inline void SaveModel( const char *fname ) const{ inline void SaveModel(const char *fname) const{
utils::FileStream fo( utils::FopenCheck( fname, "wb" ) ); utils::FileStream fo(utils::FopenCheck(fname, "wb"));
learner.SaveModel( fo ); learner.SaveModel(fo);
fo.Close(); fo.Close();
} }
inline void SaveModel( int i ) const{ inline void SaveModel(int i) const{
char fname[256]; char fname[256];
sprintf( fname ,"%s/%04d.model", model_dir_path.c_str(), i+1 ); sprintf(fname, "%s/%04d.model", model_dir_path.c_str(), i + 1);
this->SaveModel( fname ); this->SaveModel(fname);
} }
inline void TaskPred( void ){ inline void TaskPred(void){
std::vector<float> preds; std::vector<float> preds;
if( !silent ) printf("start prediction...\n"); if (!silent) printf("start prediction...\n");
learner.Predict( preds, data ); learner.Predict(preds, data);
if( !silent ) printf("writing prediction to %s\n", name_pred.c_str() ); if (!silent) printf("writing prediction to %s\n", name_pred.c_str());
FILE *fo = utils::FopenCheck( name_pred.c_str(), "w" ); FILE *fo = utils::FopenCheck(name_pred.c_str(), "w");
for( size_t i = 0; i < preds.size(); i ++ ){ for (size_t i = 0; i < preds.size(); i++){
fprintf( fo, "%f\n", preds[i] ); fprintf(fo, "%f\n", preds[i]);
} }
fclose( fo ); fclose(fo);
} }
private: private:
/* \brief whether silent */ /* \brief whether silent */
@ -273,8 +278,8 @@ namespace xgboost{
}; };
}; };
int main( int argc, char *argv[] ){ // int main( int argc, char *argv[] ){
xgboost::random::Seed( 0 ); // xgboost::random::Seed( 0 );
xgboost::regression::RegBoostTask tsk; // xgboost::regression::RegBoostTask tsk;
return tsk.Run( argc, argv ); // return tsk.Run( argc, argv );
} // }

View File

@ -23,38 +23,38 @@ namespace xgboost{
* \brief constructor * \brief constructor
* \param fname name of configure file * \param fname name of configure file
*/ */
ConfigIterator( const char *fname ){ ConfigIterator(const char *fname){
fi = FopenCheck( fname, "r"); fi = FopenCheck(fname, "r");
ch_buf = fgetc( fi ); ch_buf = fgetc(fi);
} }
/*! \brief destructor */ /*! \brief destructor */
~ConfigIterator(){ ~ConfigIterator(){
fclose( fi ); fclose(fi);
} }
/*! /*!
* \brief get current name, called after Next returns true * \brief get current name, called after Next returns true
* \return current parameter name * \return current parameter name
*/ */
inline const char *name( void )const{ inline const char *name(void)const{
return s_name; return s_name;
} }
/*! /*!
* \brief get current value, called after Next returns true * \brief get current value, called after Next returns true
* \return current parameter value * \return current parameter value
*/ */
inline const char *val( void ) const{ inline const char *val(void) const{
return s_val; return s_val;
} }
/*! /*!
* \brief move iterator to next position * \brief move iterator to next position
* \return true if there is value in next position * \return true if there is value in next position
*/ */
inline bool Next( void ){ inline bool Next(void){
while( !feof( fi ) ){ while (!feof(fi)){
GetNextToken( s_name ); GetNextToken(s_name);
if( s_name[0] == '=') return false; if (s_name[0] == '=') return false;
if( GetNextToken( s_buf ) || s_buf[0] != '=' ) return false; if (GetNextToken(s_buf) || s_buf[0] != '=') return false;
if( GetNextToken( s_val ) || s_val[0] == '=' ) return false; if (GetNextToken(s_val) || s_val[0] == '=') return false;
return true; return true;
} }
return false; return false;
@ -62,21 +62,21 @@ namespace xgboost{
private: private:
FILE *fi; FILE *fi;
char ch_buf; char ch_buf;
char s_name[256],s_val[256],s_buf[246]; char s_name[256], s_val[256], s_buf[246];
inline void SkipLine(){ inline void SkipLine(){
do{ do{
ch_buf = fgetc( fi ); ch_buf = fgetc(fi);
}while( ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r' ); } while (ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r');
} }
inline void ParseStr( char tok[] ){ inline void ParseStr(char tok[]){
int i = 0; int i = 0;
while( (ch_buf = fgetc(fi)) != EOF ){ while ((ch_buf = fgetc(fi)) != EOF){
switch( ch_buf ){ switch (ch_buf){
case '\\': tok[i++] = fgetc( fi ); break; case '\\': tok[i++] = fgetc(fi); break;
case '\"': tok[i++] = '\0'; case '\"': tok[i++] = '\0';
return; return;
case '\r': case '\r':
case '\n': Error("unterminated string"); break; case '\n': Error("unterminated string"); break;
default: tok[i++] = ch_buf; default: tok[i++] = ch_buf;
@ -85,41 +85,43 @@ namespace xgboost{
Error("unterminated string"); Error("unterminated string");
} }
// return newline // return newline
inline bool GetNextToken( char tok[] ){ inline bool GetNextToken(char tok[]){
int i = 0; int i = 0;
bool new_line = false; bool new_line = false;
while( ch_buf != EOF ){ while (ch_buf != EOF){
switch( ch_buf ){ switch (ch_buf){
case '#' : SkipLine(); new_line = true; break; case '#': SkipLine(); new_line = true; break;
case '\"': case '\"':
if( i == 0 ){ if (i == 0){
ParseStr( tok );ch_buf = fgetc(fi); return new_line; ParseStr(tok); ch_buf = fgetc(fi); return new_line;
}else{ }
else{
Error("token followed directly by string"); Error("token followed directly by string");
} }
case '=': case '=':
if( i == 0 ) { if (i == 0) {
ch_buf = fgetc( fi ); ch_buf = fgetc(fi);
tok[0] = '='; tok[0] = '=';
tok[1] = '\0'; tok[1] = '\0';
}else{ }
else{
tok[i] = '\0'; tok[i] = '\0';
} }
return new_line; return new_line;
case '\r': case '\r':
case '\n': case '\n':
if( i == 0 ) new_line = true; if (i == 0) new_line = true;
case '\t': case '\t':
case ' ' : case ' ':
ch_buf = fgetc( fi ); ch_buf = fgetc(fi);
if( i > 0 ){ if (i > 0){
tok[i] = '\0'; tok[i] = '\0';
return new_line; return new_line;
} }
break; break;
default: default:
tok[i++] = ch_buf; tok[i++] = ch_buf;
ch_buf = fgetc( fi ); ch_buf = fgetc(fi);
break; break;
} }
} }
@ -137,9 +139,9 @@ namespace xgboost{
class ConfigSaver{ class ConfigSaver{
public: public:
/*! \brief constructor */ /*! \brief constructor */
ConfigSaver( void ){ idx = 0; } ConfigSaver(void){ idx = 0; }
/*! \brief clear all saves */ /*! \brief clear all saves */
inline void Clear( void ){ inline void Clear(void){
idx = 0; idx = 0;
names.clear(); values.clear(); names.clear(); values.clear();
names_high.clear(); values_high.clear(); names_high.clear(); values_high.clear();
@ -151,54 +153,57 @@ namespace xgboost{
* \param priority whether the setting has higher priority: high priority occurs * \param priority whether the setting has higher priority: high priority occurs
* latter when read from ConfigSaver, and can overwrite existing settings * latter when read from ConfigSaver, and can overwrite existing settings
*/ */
inline void PushBack( const char *name, const char *val, int priority = 0 ){ inline void PushBack(const char *name, const char *val, int priority = 0){
if( priority == 0 ){ if (priority == 0){
names.push_back( std::string( name ) ); names.push_back(std::string(name));
values.push_back( std::string( val ) ); values.push_back(std::string(val));
}else{ }
names_high.push_back( std::string( name ) ); else{
values_high.push_back( std::string( val ) ); names_high.push_back(std::string(name));
values_high.push_back(std::string(val));
} }
} }
/*! \brief set pointer to beginning of the ConfigSaver */ /*! \brief set pointer to beginning of the ConfigSaver */
inline void BeforeFirst( void ){ inline void BeforeFirst(void){
idx = 0; idx = 0;
} }
/*! /*!
* \brief move iterator to next position * \brief move iterator to next position
* \return true if there is value in next position * \return true if there is value in next position
*/ */
inline bool Next( void ){ inline bool Next(void){
if( idx >= names.size() + names_high.size() ){ if (idx >= names.size() + names_high.size()){
return false; return false;
} }
idx ++; idx++;
return true; return true;
} }
/*! /*!
* \brief get current name, called after Next returns true * \brief get current name, called after Next returns true
* \return current parameter name * \return current parameter name
*/ */
inline const char *name( void ) const{ inline const char *name(void) const{
Assert( idx > 0, "can't call name before first"); Assert(idx > 0, "can't call name before first");
size_t i = idx - 1; size_t i = idx - 1;
if( i >= names.size() ){ if (i >= names.size()){
return names_high[ i - names.size() ].c_str(); return names_high[i - names.size()].c_str();
}else{ }
return names[ i ].c_str(); else{
return names[i].c_str();
} }
} }
/*! /*!
* \brief get current value, called after Next returns true * \brief get current value, called after Next returns true
* \return current parameter value * \return current parameter value
*/ */
inline const char *val( void ) const{ inline const char *val(void) const{
Assert( idx > 0, "can't call name before first"); Assert(idx > 0, "can't call name before first");
size_t i = idx - 1; size_t i = idx - 1;
if( i >= values.size() ){ if (i >= values.size()){
return values_high[ i - values.size() ].c_str(); return values_high[i - values.size()].c_str();
}else{ }
return values[ i ].c_str(); else{
return values[i].c_str();
} }
} }
private: private:

View File

@ -16,48 +16,48 @@ namespace xgboost{
class FeatMap{ class FeatMap{
public: public:
enum Type{ enum Type{
kIndicator = 0, kIndicator = 0,
kQuantitive = 1, kQuantitive = 1,
kInteger = 2, kInteger = 2,
kFloat = 3 kFloat = 3
}; };
public: public:
/*! \brief load feature map from text format */ /*! \brief load feature map from text format */
inline void LoadText( const char *fname ){ inline void LoadText(const char *fname){
FILE *fi = utils::FopenCheck( fname, "r" ); FILE *fi = utils::FopenCheck(fname, "r");
this->LoadText( fi ); this->LoadText(fi);
fclose( fi ); fclose(fi);
} }
/*! \brief load feature map from text format */ /*! \brief load feature map from text format */
inline void LoadText( FILE *fi ){ inline void LoadText(FILE *fi){
int fid; int fid;
char fname[256], ftype[256]; char fname[256], ftype[256];
while( fscanf( fi, "%d%s%s", &fid, fname, ftype ) == 3 ){ while (fscanf(fi, "%d%s%s", &fid, fname, ftype) == 3){
utils::Assert( fid == (int)names_.size(), "invalid fmap format" ); utils::Assert(fid == (int)names_.size(), "invalid fmap format");
names_.push_back( std::string(fname) ); names_.push_back(std::string(fname));
types_.push_back( GetType( ftype ) ); types_.push_back(GetType(ftype));
} }
} }
/*! \brief number of known features */ /*! \brief number of known features */
size_t size( void ) const{ size_t size(void) const{
return names_.size(); return names_.size();
} }
/*! \brief return name of specific feature */ /*! \brief return name of specific feature */
const char* name( size_t idx ) const{ const char* name(size_t idx) const{
utils::Assert( idx < names_.size(), "utils::FMap::name feature index exceed bound" ); utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
return names_[ idx ].c_str(); return names_[idx].c_str();
} }
/*! \brief return type of specific feature */ /*! \brief return type of specific feature */
const Type& type( size_t idx ) const{ const Type& type(size_t idx) const{
utils::Assert( idx < names_.size(), "utils::FMap::name feature index exceed bound" ); utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
return types_[ idx ]; return types_[idx];
} }
private: private:
inline static Type GetType( const char *tname ){ inline static Type GetType(const char *tname){
if( !strcmp( "i", tname ) ) return kIndicator; if (!strcmp("i", tname)) return kIndicator;
if( !strcmp( "q", tname ) ) return kQuantitive; if (!strcmp("q", tname)) return kQuantitive;
if( !strcmp( "int", tname ) ) return kInteger; if (!strcmp("int", tname)) return kInteger;
if( !strcmp( "float", tname ) ) return kFloat; if (!strcmp("float", tname)) return kFloat;
utils::Error("unknown feature type, use i for indicator and q for quantity"); utils::Error("unknown feature type, use i for indicator and q for quantity");
return kIndicator; return kIndicator;
} }
@ -73,44 +73,44 @@ namespace xgboost{
/*! \brief feature constraint, allow or disallow some feature during training */ /*! \brief feature constraint, allow or disallow some feature during training */
class FeatConstrain{ class FeatConstrain{
public: public:
FeatConstrain( void ){ FeatConstrain(void){
default_state_ = +1; default_state_ = +1;
} }
/*!\brief set parameters */ /*!\brief set parameters */
inline void SetParam( const char *name, const char *val ){ inline void SetParam(const char *name, const char *val){
int a, b; int a, b;
if( !strcmp( name, "fban") ){ if (!strcmp(name, "fban")){
this->ParseRange( val, a, b ); this->ParseRange(val, a, b);
this->SetRange( a, b, -1 ); this->SetRange(a, b, -1);
} }
if( !strcmp( name, "fpass") ){ if (!strcmp(name, "fpass")){
this->ParseRange( val, a, b ); this->ParseRange(val, a, b);
this->SetRange( a, b, +1 ); this->SetRange(a, b, +1);
} }
if( !strcmp( name, "fdefault") ){ if (!strcmp(name, "fdefault")){
default_state_ = atoi( val ); default_state_ = atoi(val);
} }
} }
/*! \brief whether constrain is specified */ /*! \brief whether constrain is specified */
inline bool HasConstrain( void ) const { inline bool HasConstrain(void) const {
return state_.size() != 0 && default_state_ == 1; return state_.size() != 0 && default_state_ == 1;
} }
/*! \brief whether a feature index is banned or not */ /*! \brief whether a feature index is banned or not */
inline bool NotBanned( unsigned index ) const{ inline bool NotBanned(unsigned index) const{
int rt = index < state_.size() ? state_[index] : default_state_; int rt = index < state_.size() ? state_[index] : default_state_;
if( rt == 0 ) rt = default_state_; if (rt == 0) rt = default_state_;
return rt == 1; return rt == 1;
} }
private: private:
inline void SetRange( int a, int b, int st ){ inline void SetRange(int a, int b, int st){
if( b > (int)state_.size() ) state_.resize( b, 0 ); if (b >(int)state_.size()) state_.resize(b, 0);
for( int i = a; i < b; ++ i ){ for (int i = a; i < b; ++i){
state_[i] = st; state_[i] = st;
} }
} }
inline void ParseRange( const char *val, int &a, int &b ){ inline void ParseRange(const char *val, int &a, int &b){
if( sscanf( val, "%d-%d", &a, &b ) == 2 ) return; if (sscanf(val, "%d-%d", &a, &b) == 2) return;
utils::Assert( sscanf( val, "%d", &a ) == 1 ); utils::Assert(sscanf(val, "%d", &a) == 1);
b = a + 1; b = a + 1;
} }
/*! \brief default state */ /*! \brief default state */

View File

@ -2,7 +2,7 @@
* \file xgboost_matrix_csr.h * \file xgboost_matrix_csr.h
* \brief this file defines some easy to use STL based class for in memory sparse CSR matrix * \brief this file defines some easy to use STL based class for in memory sparse CSR matrix
* \author Tianqi Chen: tianqi.tchen@gmail.com * \author Tianqi Chen: tianqi.tchen@gmail.com
*/ */
#ifndef XGBOOST_MATRIX_CSR_H #ifndef XGBOOST_MATRIX_CSR_H
#define XGBOOST_MATRIX_CSR_H #define XGBOOST_MATRIX_CSR_H
#include <vector> #include <vector>
@ -17,7 +17,7 @@ namespace xgboost{
* \tparam IndexType type of index used to store the index position, usually unsigned or size_t * \tparam IndexType type of index used to store the index position, usually unsigned or size_t
* \tparam whether enabling the usage of aclist, this option must be enabled manually * \tparam whether enabling the usage of aclist, this option must be enabled manually
*/ */
template<typename IndexType,bool UseAcList = false> template<typename IndexType, bool UseAcList = false>
struct SparseCSRMBuilder{ struct SparseCSRMBuilder{
private: private:
/*! \brief dummy variable used in the indicator matrix construction */ /*! \brief dummy variable used in the indicator matrix construction */
@ -29,29 +29,30 @@ namespace xgboost{
/*! \brief a list of active rows, used when many rows are empty */ /*! \brief a list of active rows, used when many rows are empty */
std::vector<size_t> &aclist; std::vector<size_t> &aclist;
public: public:
SparseCSRMBuilder( std::vector<size_t> &p_rptr, SparseCSRMBuilder(std::vector<size_t> &p_rptr,
std::vector<IndexType> &p_findex ) std::vector<IndexType> &p_findex)
:rptr(p_rptr), findex( p_findex ), aclist( dummy_aclist ){ :rptr(p_rptr), findex(p_findex), aclist(dummy_aclist){
Assert( !UseAcList, "enabling bug" ); Assert(!UseAcList, "enabling bug");
} }
/*! \brief use with caution! rptr must be cleaned before use */ /*! \brief use with caution! rptr must be cleaned before use */
SparseCSRMBuilder( std::vector<size_t> &p_rptr, SparseCSRMBuilder(std::vector<size_t> &p_rptr,
std::vector<IndexType> &p_findex, std::vector<IndexType> &p_findex,
std::vector<size_t> &p_aclist ) std::vector<size_t> &p_aclist)
:rptr(p_rptr), findex( p_findex ), aclist( p_aclist ){ :rptr(p_rptr), findex(p_findex), aclist(p_aclist){
Assert( UseAcList, "must manually enable the option use aclist" ); Assert(UseAcList, "must manually enable the option use aclist");
} }
public: public:
/*! /*!
* \brief step 1: initialize the number of rows in the data, not necessary exact * \brief step 1: initialize the number of rows in the data, not necessary exact
* \nrows number of rows in the matrix, can be smaller than expected * \nrows number of rows in the matrix, can be smaller than expected
*/ */
inline void InitBudget( size_t nrows = 0 ){ inline void InitBudget(size_t nrows = 0){
if( !UseAcList ){ if (!UseAcList){
rptr.clear(); rptr.clear();
rptr.resize( nrows + 1, 0 ); rptr.resize(nrows + 1, 0);
}else{ }
Assert( nrows + 1 == rptr.size(), "rptr must be initialized already" ); else{
Assert(nrows + 1 == rptr.size(), "rptr must be initialized already");
this->Cleanup(); this->Cleanup();
} }
} }
@ -60,58 +61,59 @@ namespace xgboost{
* \param row_id the id of the row * \param row_id the id of the row
* \param nelem number of element budget add to this row * \param nelem number of element budget add to this row
*/ */
inline void AddBudget( size_t row_id, size_t nelem = 1 ){ inline void AddBudget(size_t row_id, size_t nelem = 1){
if( rptr.size() < row_id + 2 ){ if (rptr.size() < row_id + 2){
rptr.resize( row_id + 2, 0 ); rptr.resize(row_id + 2, 0);
} }
if( UseAcList ){ if (UseAcList){
if( rptr[ row_id + 1 ] == 0 ) aclist.push_back( row_id ); if (rptr[row_id + 1] == 0) aclist.push_back(row_id);
} }
rptr[ row_id + 1 ] += nelem; rptr[row_id + 1] += nelem;
} }
/*! \brief step 3: initialize the necessary storage */ /*! \brief step 3: initialize the necessary storage */
inline void InitStorage( void ){ inline void InitStorage(void){
// initialize rptr to be beginning of each segment // initialize rptr to be beginning of each segment
size_t start = 0; size_t start = 0;
if( !UseAcList ){ if (!UseAcList){
for( size_t i = 1; i < rptr.size(); i ++ ){ for (size_t i = 1; i < rptr.size(); i++){
size_t rlen = rptr[ i ]; size_t rlen = rptr[i];
rptr[ i ] = start; rptr[i] = start;
start += rlen;
}
}else{
// case with active list
std::sort( aclist.begin(), aclist.end() );
for( size_t i = 0; i < aclist.size(); i ++ ){
size_t ridx = aclist[ i ];
size_t rlen = rptr[ ridx + 1 ];
rptr[ ridx + 1 ] = start;
// set previous rptr to right position if previous feature is not active
if( i == 0 || ridx != aclist[i-1] + 1 ) rptr[ ridx ] = start;
start += rlen; start += rlen;
} }
} }
findex.resize( start ); else{
// case with active list
std::sort(aclist.begin(), aclist.end());
for (size_t i = 0; i < aclist.size(); i++){
size_t ridx = aclist[i];
size_t rlen = rptr[ridx + 1];
rptr[ridx + 1] = start;
// set previous rptr to right position if previous feature is not active
if (i == 0 || ridx != aclist[i - 1] + 1) rptr[ridx] = start;
start += rlen;
}
}
findex.resize(start);
} }
/*! /*!
* \brief step 4: * \brief step 4:
* used in indicator matrix construction, add new * used in indicator matrix construction, add new
* element to each row, the number of calls shall be exactly same as add_budget * element to each row, the number of calls shall be exactly same as add_budget
*/ */
inline void PushElem( size_t row_id, IndexType col_id ){ inline void PushElem(size_t row_id, IndexType col_id){
size_t &rp = rptr[ row_id + 1 ]; size_t &rp = rptr[row_id + 1];
findex[ rp ++ ] = col_id; findex[rp++] = col_id;
} }
/*! /*!
* \brief step 5: only needed when aclist is used * \brief step 5: only needed when aclist is used
* clean up the rptr for next usage * clean up the rptr for next usage
*/ */
inline void Cleanup( void ){ inline void Cleanup(void){
Assert( UseAcList, "this function can only be called use AcList" ); Assert(UseAcList, "this function can only be called use AcList");
for( size_t i = 0; i < aclist.size(); i ++ ){ for (size_t i = 0; i < aclist.size(); i++){
const size_t ridx = aclist[i]; const size_t ridx = aclist[i];
rptr[ ridx ] = 0; rptr[ ridx + 1 ] = 0; rptr[ridx] = 0; rptr[ridx + 1] = 0;
} }
aclist.clear(); aclist.clear();
} }
@ -134,20 +136,20 @@ namespace xgboost{
/*! \brief matrix builder*/ /*! \brief matrix builder*/
SparseCSRMBuilder<IndexType> builder; SparseCSRMBuilder<IndexType> builder;
public: public:
SparseCSRMat( void ):builder( rptr, findex ){ SparseCSRMat(void) :builder(rptr, findex){
} }
public: public:
/*! \return number of rows in the matrx */ /*! \return number of rows in the matrx */
inline size_t NumRow( void ) const{ inline size_t NumRow(void) const{
return rptr.size() - 1; return rptr.size() - 1;
} }
/*! \return number of elements r-th row */ /*! \return number of elements r-th row */
inline size_t NumElem( size_t r ) const{ inline size_t NumElem(size_t r) const{
return rptr[ r + 1 ] - rptr[ r ]; return rptr[r + 1] - rptr[r];
} }
/*! \return r-th row */ /*! \return r-th row */
inline const IndexType *operator[]( size_t r ) const{ inline const IndexType *operator[](size_t r) const{
return &findex[ rptr[r] ]; return &findex[rptr[r]];
} }
}; };
}; };

View File

@ -10,9 +10,9 @@
#if defined(_OPENMP) #if defined(_OPENMP)
#include <omp.h> #include <omp.h>
#else #else
//#warning "OpenMP is not available, compile to single thread code" #warning "OpenMP is not available, compile to single thread code"
inline int omp_get_thread_num() { return 0; } inline int omp_get_thread_num() { return 0; }
inline int omp_get_num_threads() { return 1; } inline int omp_get_num_threads() { return 1; }
inline void omp_set_num_threads( int nthread ) {} inline void omp_set_num_threads(int nthread) {}
#endif #endif
#endif #endif

View File

@ -23,90 +23,91 @@ typedef unsigned int uint32_t;
namespace xgboost{ namespace xgboost{
namespace random{ namespace random{
/*! \brief seed the PRNG */ /*! \brief seed the PRNG */
inline void Seed( uint32_t seed ){ inline void Seed(uint32_t seed){
srand( seed ); srand(seed);
} }
/*! \brief return a real number uniform in [0,1) */ /*! \brief return a real number uniform in [0,1) */
inline double NextDouble(){ inline double NextDouble(){
return static_cast<double>( rand() ) / (static_cast<double>( RAND_MAX )+1.0); return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX)+1.0);
} }
/*! \brief return a real numer uniform in (0,1) */ /*! \brief return a real numer uniform in (0,1) */
inline double NextDouble2(){ inline double NextDouble2(){
return (static_cast<double>( rand() ) + 1.0 ) / (static_cast<double>(RAND_MAX) + 2.0); return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX)+2.0);
} }
}; };
namespace random{ namespace random{
/*! \brief return a random number */ /*! \brief return a random number */
inline uint32_t NextUInt32( void ){ inline uint32_t NextUInt32(void){
return (uint32_t)rand(); return (uint32_t)rand();
} }
/*! \brief return a random number in n */ /*! \brief return a random number in n */
inline uint32_t NextUInt32( uint32_t n ){ inline uint32_t NextUInt32(uint32_t n){
return (uint32_t) floor( NextDouble() * n ) ; return (uint32_t)floor(NextDouble() * n);
} }
/*! \brief return x~N(0,1) */ /*! \brief return x~N(0,1) */
inline double SampleNormal(){ inline double SampleNormal(){
double x,y,s; double x, y, s;
do{ do{
x = 2 * NextDouble2() - 1.0; x = 2 * NextDouble2() - 1.0;
y = 2 * NextDouble2() - 1.0; y = 2 * NextDouble2() - 1.0;
s = x*x + y*y; s = x*x + y*y;
}while( s >= 1.0 || s == 0.0 ); } while (s >= 1.0 || s == 0.0);
return x * sqrt( -2.0 * log(s) / s ) ; return x * sqrt(-2.0 * log(s) / s);
} }
/*! \brief return iid x,y ~N(0,1) */ /*! \brief return iid x,y ~N(0,1) */
inline void SampleNormal2D( double &xx, double &yy ){ inline void SampleNormal2D(double &xx, double &yy){
double x,y,s; double x, y, s;
do{ do{
x = 2 * NextDouble2() - 1.0; x = 2 * NextDouble2() - 1.0;
y = 2 * NextDouble2() - 1.0; y = 2 * NextDouble2() - 1.0;
s = x*x + y*y; s = x*x + y*y;
}while( s >= 1.0 || s == 0.0 ); } while (s >= 1.0 || s == 0.0);
double t = sqrt( -2.0 * log(s) / s ) ; double t = sqrt(-2.0 * log(s) / s);
xx = x * t; xx = x * t;
yy = y * t; yy = y * t;
} }
/*! \brief return x~N(mu,sigma^2) */ /*! \brief return x~N(mu,sigma^2) */
inline double SampleNormal( double mu, double sigma ){ inline double SampleNormal(double mu, double sigma){
return SampleNormal() * sigma + mu; return SampleNormal() * sigma + mu;
} }
/*! \brief return 1 with probability p, coin flip */ /*! \brief return 1 with probability p, coin flip */
inline int SampleBinary( double p ){ inline int SampleBinary(double p){
return NextDouble() < p; return NextDouble() < p;
} }
/*! \brief return distribution from Gamma( alpha, beta ) */ /*! \brief return distribution from Gamma( alpha, beta ) */
inline double SampleGamma( double alpha, double beta ) { inline double SampleGamma(double alpha, double beta) {
if ( alpha < 1.0 ) { if (alpha < 1.0) {
double u; double u;
do { do {
u = NextDouble(); u = NextDouble();
} while (u == 0.0); } while (u == 0.0);
return SampleGamma(alpha + 1.0, beta) * pow(u, 1.0 / alpha); return SampleGamma(alpha + 1.0, beta) * pow(u, 1.0 / alpha);
} else { }
double d,c,x,v,u; else {
d = alpha - 1.0/3.0; double d, c, x, v, u;
c = 1.0 / sqrt( 9.0 * d ); d = alpha - 1.0 / 3.0;
c = 1.0 / sqrt(9.0 * d);
do { do {
do { do {
x = SampleNormal(); x = SampleNormal();
v = 1.0 + c*x; v = 1.0 + c*x;
} while ( v <= 0.0 ); } while (v <= 0.0);
v = v * v * v; v = v * v * v;
u = NextDouble(); u = NextDouble();
} while ( (u >= (1.0 - 0.0331 * (x*x) * (x*x))) } while ((u >= (1.0 - 0.0331 * (x*x) * (x*x)))
&& (log(u) >= (0.5 * x * x + d * (1.0 - v + log(v)))) ); && (log(u) >= (0.5 * x * x + d * (1.0 - v + log(v)))));
return d * v / beta; return d * v / beta;
} }
} }
template<typename T> template<typename T>
inline void Exchange( T &a, T &b ){ inline void Exchange(T &a, T &b){
T c; T c;
c = a; c = a;
a = b; a = b;
@ -114,16 +115,16 @@ namespace xgboost{
} }
template<typename T> template<typename T>
inline void Shuffle( T *data, size_t sz ){ inline void Shuffle(T *data, size_t sz){
if( sz == 0 ) return; if (sz == 0) return;
for( uint32_t i = (uint32_t)sz - 1; i > 0; i-- ){ for (uint32_t i = (uint32_t)sz - 1; i > 0; i--){
Exchange( data[i], data[ NextUInt32( i+1 ) ] ); Exchange(data[i], data[NextUInt32(i + 1)]);
} }
} }
// random shuffle the data inside, require PRNG // random shuffle the data inside, require PRNG
template<typename T> template<typename T>
inline void Shuffle( std::vector<T> &data ){ inline void Shuffle(std::vector<T> &data){
Shuffle( &data[0], data.size() ); Shuffle(&data[0], data.size());
} }
}; };
}; };

View File

@ -20,33 +20,33 @@ namespace xgboost{
* \param size size of block * \param size size of block
* \return usually is the size of data readed * \return usually is the size of data readed
*/ */
virtual size_t Read( void *ptr, size_t size ) = 0; virtual size_t Read(void *ptr, size_t size) = 0;
/*! /*!
* \brief write data to stream * \brief write data to stream
* \param ptr pointer to memory buffer * \param ptr pointer to memory buffer
* \param size size of block * \param size size of block
*/ */
virtual void Write( const void *ptr, size_t size ) = 0; virtual void Write(const void *ptr, size_t size) = 0;
/*! \brief virtual destructor */ /*! \brief virtual destructor */
virtual ~IStream( void ){} virtual ~IStream(void){}
}; };
/*! \brief implementation of file i/o stream */ /*! \brief implementation of file i/o stream */
class FileStream: public IStream{ class FileStream : public IStream{
private: private:
FILE *fp; FILE *fp;
public: public:
FileStream( FILE *fp ){ FileStream(FILE *fp){
this->fp = fp; this->fp = fp;
} }
virtual size_t Read( void *ptr, size_t size ){ virtual size_t Read(void *ptr, size_t size){
return fread( ptr, size, 1, fp ); return fread(ptr, size, 1, fp);
} }
virtual void Write( const void *ptr, size_t size ){ virtual void Write(const void *ptr, size_t size){
fwrite( ptr, size, 1, fp ); fwrite(ptr, size, 1, fp);
} }
inline void Close( void ){ inline void Close(void){
fclose( fp ); fclose(fp);
} }
}; };
}; };

View File

@ -36,39 +36,39 @@ extern "C"{
namespace xgboost{ namespace xgboost{
/*! \brief namespace for helper utils of the project */ /*! \brief namespace for helper utils of the project */
namespace utils{ namespace utils{
inline void Error( const char *msg ){ inline void Error(const char *msg){
fprintf( stderr, "Error:%s\n",msg ); fprintf(stderr, "Error:%s\n", msg);
exit( -1 ); exit(-1);
} }
inline void Assert( bool exp ){ inline void Assert(bool exp){
if( !exp ) Error( "AssertError" ); if (!exp) Error("AssertError");
} }
inline void Assert( bool exp, const char *msg ){ inline void Assert(bool exp, const char *msg){
if( !exp ) Error( msg ); if (!exp) Error(msg);
} }
inline void Warning( const char *msg ){ inline void Warning(const char *msg){
fprintf( stderr, "warning:%s\n",msg ); fprintf(stderr, "warning:%s\n", msg);
} }
/*! \brief replace fopen, report error when the file open fails */ /*! \brief replace fopen, report error when the file open fails */
inline FILE *FopenCheck( const char *fname , const char *flag ){ inline FILE *FopenCheck(const char *fname, const char *flag){
FILE *fp = fopen64( fname , flag ); FILE *fp = fopen64(fname, flag);
if( fp == NULL ){ if (fp == NULL){
fprintf( stderr, "can not open file \"%s\"\n",fname ); fprintf(stderr, "can not open file \"%s\"\n", fname);
exit( -1 ); exit(-1);
} }
return fp; return fp;
} }
/*! \brief replace fopen, */ /*! \brief replace fopen, */
inline FILE *FopenTry( const char *fname , const char *flag ){ inline FILE *FopenTry(const char *fname, const char *flag){
FILE *fp = fopen64( fname , flag ); FILE *fp = fopen64(fname, flag);
if( fp == NULL ){ if (fp == NULL){
fprintf( stderr, "can not open file \"%s\"\n",fname ); fprintf(stderr, "can not open file \"%s\"\n", fname);
exit( -1 ); exit(-1);
} }
return fp; return fp;
} }