change input data structure
This commit is contained in:
parent
6fa5c30777
commit
9b09cd3d49
5
.gitignore
vendored
5
.gitignore
vendored
@ -12,3 +12,8 @@
|
|||||||
*.la
|
*.la
|
||||||
*.a
|
*.a
|
||||||
*~
|
*~
|
||||||
|
*txt*
|
||||||
|
*conf
|
||||||
|
*buffer
|
||||||
|
*model
|
||||||
|
xgboost
|
||||||
@ -38,7 +38,7 @@ namespace xgboost{
|
|||||||
public:
|
public:
|
||||||
virtual void DoBoost( std::vector<float> &grad,
|
virtual void DoBoost( std::vector<float> &grad,
|
||||||
std::vector<float> &hess,
|
std::vector<float> &hess,
|
||||||
const FMatrixS::Image &smat,
|
const FMatrixS &smat,
|
||||||
const std::vector<unsigned> &root_index ){
|
const std::vector<unsigned> &root_index ){
|
||||||
utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" );
|
utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" );
|
||||||
this->Update( smat, grad, hess );
|
this->Update( smat, grad, hess );
|
||||||
@ -46,7 +46,7 @@ namespace xgboost{
|
|||||||
virtual float Predict( const FMatrixS::Line &sp, unsigned rid = 0 ){
|
virtual float Predict( const FMatrixS::Line &sp, unsigned rid = 0 ){
|
||||||
float sum = model.bias();
|
float sum = model.bias();
|
||||||
for( unsigned i = 0; i < sp.len; i ++ ){
|
for( unsigned i = 0; i < sp.len; i ++ ){
|
||||||
sum += model.weight[ sp.findex[i] ] * sp.fvalue[i];
|
sum += model.weight[ sp[i].findex ] * sp[i].fvalue;
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
@ -208,11 +208,10 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void MakeCmajor( std::vector<size_t> &rptr,
|
inline void MakeCmajor( std::vector<size_t> &rptr,
|
||||||
std::vector<SCEntry> &entry,
|
std::vector<SCEntry> &entry,
|
||||||
const std::vector<float> &hess,
|
const std::vector<float> &hess,
|
||||||
const FMatrixS::Image &smat ){
|
const FMatrixS &smat ){
|
||||||
// transform to column order first
|
// transform to column order first
|
||||||
const int nfeat = model.param.num_feature;
|
const int nfeat = model.param.num_feature;
|
||||||
// build CSR column major format data
|
// build CSR column major format data
|
||||||
@ -224,8 +223,8 @@ namespace xgboost{
|
|||||||
// add sparse part budget
|
// add sparse part budget
|
||||||
FMatrixS::Line sp = smat[ i ];
|
FMatrixS::Line sp = smat[ i ];
|
||||||
for( unsigned j = 0; j < sp.len; j ++ ){
|
for( unsigned j = 0; j < sp.len; j ++ ){
|
||||||
if( j == 0 || sp.findex[j-1] != sp.findex[j] ){
|
if( j == 0 || sp[j-1].findex != sp[j].findex ){
|
||||||
builder.AddBudget( sp.findex[j] );
|
builder.AddBudget( sp[j].findex );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -237,14 +236,14 @@ namespace xgboost{
|
|||||||
FMatrixS::Line sp = smat[ i ];
|
FMatrixS::Line sp = smat[ i ];
|
||||||
for( unsigned j = 0; j < sp.len; j ++ ){
|
for( unsigned j = 0; j < sp.len; j ++ ){
|
||||||
// skip duplicated terms
|
// skip duplicated terms
|
||||||
if( j == 0 || sp.findex[j-1] != sp.findex[j] ){
|
if( j == 0 || sp[j-1].findex != sp[j].findex ){
|
||||||
builder.PushElem( sp.findex[j], SCEntry( sp.fvalue[j], i ) );
|
builder.PushElem( sp[j].findex, SCEntry( sp[j].fvalue, i ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
protected:
|
protected:
|
||||||
virtual void Update( const FMatrixS::Image &smat,
|
virtual void Update( const FMatrixS &smat,
|
||||||
std::vector<float> &grad,
|
std::vector<float> &grad,
|
||||||
const std::vector<float> &hess ){
|
const std::vector<float> &hess ){
|
||||||
std::vector<size_t> rptr;
|
std::vector<size_t> rptr;
|
||||||
|
|||||||
@ -131,7 +131,7 @@ namespace xgboost{
|
|||||||
RTree &tree;
|
RTree &tree;
|
||||||
std::vector<float> &grad;
|
std::vector<float> &grad;
|
||||||
std::vector<float> &hess;
|
std::vector<float> &hess;
|
||||||
const FMatrixS::Image &smat;
|
const FMatrixS &smat;
|
||||||
const std::vector<unsigned> &group_id;
|
const std::vector<unsigned> &group_id;
|
||||||
private:
|
private:
|
||||||
// maximum depth up to now
|
// maximum depth up to now
|
||||||
@ -322,7 +322,7 @@ namespace xgboost{
|
|||||||
|
|
||||||
FMatrixS::Line sp = smat[ ridx ];
|
FMatrixS::Line sp = smat[ ridx ];
|
||||||
for( unsigned j = 0; j < sp.len; j ++ ){
|
for( unsigned j = 0; j < sp.len; j ++ ){
|
||||||
builder.AddBudget( sp.findex[j] );
|
builder.AddBudget( sp[j].findex );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -336,7 +336,7 @@ namespace xgboost{
|
|||||||
const unsigned ridx = tsk.idset[i];
|
const unsigned ridx = tsk.idset[i];
|
||||||
FMatrixS::Line sp = smat[ ridx ];
|
FMatrixS::Line sp = smat[ ridx ];
|
||||||
for( unsigned j = 0; j < sp.len; j ++ ){
|
for( unsigned j = 0; j < sp.len; j ++ ){
|
||||||
builder.PushElem( sp.findex[j], SCEntry( sp.fvalue[j], ridx ) );
|
builder.PushElem( sp[j].findex, SCEntry( sp[j].fvalue, ridx ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// --- end of building column major matrix ---
|
// --- end of building column major matrix ---
|
||||||
@ -429,7 +429,7 @@ namespace xgboost{
|
|||||||
RTree &ptree,
|
RTree &ptree,
|
||||||
std::vector<float> &pgrad,
|
std::vector<float> &pgrad,
|
||||||
std::vector<float> &phess,
|
std::vector<float> &phess,
|
||||||
const FMatrixS::Image &psmat,
|
const FMatrixS &psmat,
|
||||||
const std::vector<unsigned> &pgroup_id ):
|
const std::vector<unsigned> &pgroup_id ):
|
||||||
param( pparam ), tree( ptree ), grad( pgrad ), hess( phess ),
|
param( pparam ), tree( ptree ), grad( pgrad ), hess( phess ),
|
||||||
smat( psmat ), group_id( pgroup_id ){
|
smat( psmat ), group_id( pgroup_id ){
|
||||||
@ -494,7 +494,7 @@ namespace xgboost{
|
|||||||
public:
|
public:
|
||||||
virtual void DoBoost( std::vector<float> &grad,
|
virtual void DoBoost( std::vector<float> &grad,
|
||||||
std::vector<float> &hess,
|
std::vector<float> &hess,
|
||||||
const FMatrixS::Image &smat,
|
const FMatrixS &smat,
|
||||||
const std::vector<unsigned> &group_id ){
|
const std::vector<unsigned> &group_id ){
|
||||||
utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" );
|
utils::Assert( grad.size() < UINT_MAX, "number of instance exceed what we can handle" );
|
||||||
if( !silent ){
|
if( !silent ){
|
||||||
@ -526,14 +526,14 @@ namespace xgboost{
|
|||||||
virtual float Predict( const FMatrixS::Line &feat, unsigned gid = 0 ){
|
virtual float Predict( const FMatrixS::Line &feat, unsigned gid = 0 ){
|
||||||
this->init_tmpfeat();
|
this->init_tmpfeat();
|
||||||
for( unsigned i = 0; i < feat.len; i ++ ){
|
for( unsigned i = 0; i < feat.len; i ++ ){
|
||||||
utils::Assert( feat.findex[i] < (unsigned)tmp_funknown.size() , "input feature execeed bound" );
|
utils::Assert( feat[i].findex < (unsigned)tmp_funknown.size() , "input feature execeed bound" );
|
||||||
tmp_funknown[ feat.findex[i] ] = false;
|
tmp_funknown[ feat[i].findex ] = false;
|
||||||
tmp_feat[ feat.findex[i] ] = feat.fvalue[i];
|
tmp_feat[ feat[i].findex ] = feat[i].fvalue;
|
||||||
}
|
}
|
||||||
int pid = this->GetLeafIndex( tmp_feat, tmp_funknown, gid );
|
int pid = this->GetLeafIndex( tmp_feat, tmp_funknown, gid );
|
||||||
// set back
|
// set back
|
||||||
for( unsigned i = 0; i < feat.len; i ++ ){
|
for( unsigned i = 0; i < feat.len; i ++ ){
|
||||||
tmp_funknown[ feat.findex[i] ] = true;
|
tmp_funknown[ feat[i].findex ] = true;
|
||||||
}
|
}
|
||||||
return tree[ pid ].leaf_value();
|
return tree[ pid ].leaf_value();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
namespace xgboost{
|
namespace xgboost{
|
||||||
namespace booster{
|
namespace booster{
|
||||||
/*!
|
/*
|
||||||
* \brief listing the types of boosters
|
* \brief listing the types of boosters
|
||||||
*/
|
*/
|
||||||
enum BOOSTER_TYPE_LIST{
|
enum BOOSTER_TYPE_LIST{
|
||||||
|
|||||||
@ -16,7 +16,9 @@
|
|||||||
namespace xgboost{
|
namespace xgboost{
|
||||||
/*! \brief namespace for boosters */
|
/*! \brief namespace for boosters */
|
||||||
namespace booster{
|
namespace booster{
|
||||||
/*! \brief interface of a gradient boosting learner */
|
/*!
|
||||||
|
* \brief interface of a gradient boosting learner
|
||||||
|
*/
|
||||||
class IBooster{
|
class IBooster{
|
||||||
public:
|
public:
|
||||||
// interface for model setting and loading
|
// interface for model setting and loading
|
||||||
@ -61,7 +63,7 @@ namespace xgboost{
|
|||||||
*/
|
*/
|
||||||
virtual void DoBoost( std::vector<float> &grad,
|
virtual void DoBoost( std::vector<float> &grad,
|
||||||
std::vector<float> &hess,
|
std::vector<float> &hess,
|
||||||
const FMatrixS::Image &feats,
|
const FMatrixS &feats,
|
||||||
const std::vector<unsigned> &root_index ) = 0;
|
const std::vector<unsigned> &root_index ) = 0;
|
||||||
/*!
|
/*!
|
||||||
* \brief predict values for given sparse feature vector
|
* \brief predict values for given sparse feature vector
|
||||||
|
|||||||
@ -24,120 +24,188 @@ namespace xgboost{
|
|||||||
const bool bst_debug = false;
|
const bool bst_debug = false;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace xgboost{
|
||||||
|
namespace booster{
|
||||||
|
/**
|
||||||
|
* \brief This is a interface, defining the way to access features,
|
||||||
|
* by column or by row. This interface is used to make implementation
|
||||||
|
* of booster does not depend on how feature is stored.
|
||||||
|
*
|
||||||
|
* Why template instead of virtual class: for efficiency
|
||||||
|
* feature matrix is going to be used by most inner loop of the algorithm
|
||||||
|
*
|
||||||
|
* \tparam Derived type of actual implementation
|
||||||
|
* \sa FMatrixS: most of time FMatrixS is sufficient, refer to it if you find it confusing
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
struct FMatrix{
|
||||||
|
public:
|
||||||
|
/*! \brief exmaple iterator over one row */
|
||||||
|
struct RowIter{
|
||||||
|
/*!
|
||||||
|
* \brief move to next position
|
||||||
|
* \return whether there is element in next position
|
||||||
|
*/
|
||||||
|
inline bool Next( void );
|
||||||
|
/*! \return feature index in current position */
|
||||||
|
inline bst_uint findex( void ) const;
|
||||||
|
/*! \return feature value in current position */
|
||||||
|
inline bst_float fvalue( void ) const;
|
||||||
|
};
|
||||||
|
/*! \brief example iterator over one column */
|
||||||
|
struct ColIter{
|
||||||
|
/*!
|
||||||
|
* \brief move to next position
|
||||||
|
* \return whether there is element in next position
|
||||||
|
*/
|
||||||
|
inline bool Next( void );
|
||||||
|
/*! \return row index of current position */
|
||||||
|
inline bst_uint rindex( void ) const;
|
||||||
|
/*! \return feature value in current position */
|
||||||
|
inline bst_float fvalue( void ) const;
|
||||||
|
};
|
||||||
|
public:
|
||||||
|
/*!
|
||||||
|
* \brief prepare sorted columns so that GetSortedCol can be called
|
||||||
|
*/
|
||||||
|
inline void MakeSortedCol( void );
|
||||||
|
/*!
|
||||||
|
* \brief get number of rows
|
||||||
|
* \return number of rows
|
||||||
|
*/
|
||||||
|
inline size_t NumRow( void ) const;
|
||||||
|
/*!
|
||||||
|
* \brief get number of columns
|
||||||
|
* \return number of columns
|
||||||
|
*/
|
||||||
|
inline size_t NumCol( void ) const;
|
||||||
|
/*!
|
||||||
|
* \brief get row iterator
|
||||||
|
* \param ridx row index
|
||||||
|
* \return row iterator
|
||||||
|
*/
|
||||||
|
inline RowIter GetRow( size_t ridx ) const;
|
||||||
|
/*!
|
||||||
|
* \brief get column iterator, the columns must be sorted by feature value
|
||||||
|
* \param ridx column index
|
||||||
|
* \return column iterator
|
||||||
|
*/
|
||||||
|
inline ColIter GetSortedCol( size_t ridx ) const;
|
||||||
|
|
||||||
|
/*! \return the view of derived class */
|
||||||
|
inline const Derived& self( void ) const{
|
||||||
|
return *static_cast<const Derived*>(this);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
namespace xgboost{
|
namespace xgboost{
|
||||||
namespace booster{
|
namespace booster{
|
||||||
/*!
|
/*!
|
||||||
* \brief feature matrix to store training instance, in sparse CSR format
|
* \brief feature matrix to store training instance, in sparse CSR format
|
||||||
*/
|
*/
|
||||||
class FMatrixS{
|
class FMatrixS: public FMatrix<FMatrixS>{
|
||||||
public:
|
public:
|
||||||
|
/*! \brief one entry in a row */
|
||||||
|
struct REntry{
|
||||||
|
/*! \brief feature index */
|
||||||
|
bst_uint findex;
|
||||||
|
/*! \brief feature value */
|
||||||
|
bst_float fvalue;
|
||||||
|
};
|
||||||
|
/*! \brief one entry in a row */
|
||||||
|
struct CEntry{
|
||||||
|
/*! \brief row index */
|
||||||
|
bst_uint rindex;
|
||||||
|
/*! \brief feature value */
|
||||||
|
bst_float fvalue;
|
||||||
|
};
|
||||||
/*! \brief one row of sparse feature matrix */
|
/*! \brief one row of sparse feature matrix */
|
||||||
struct Line{
|
struct Line{
|
||||||
/*! \brief array of feature index */
|
/*! \brief array of feature index */
|
||||||
const bst_uint *findex;
|
const REntry *data_;
|
||||||
/*! \brief array of feature value */
|
|
||||||
const bst_float *fvalue;
|
|
||||||
/*! \brief size of the data */
|
/*! \brief size of the data */
|
||||||
bst_uint len;
|
bst_uint len;
|
||||||
};
|
inline const REntry& operator[]( unsigned i ) const{
|
||||||
/*!
|
return data_[i];
|
||||||
* \brief remapped image of sparse matrix,
|
|
||||||
* allows use a subset of sparse matrix, by specifying a rowmap
|
|
||||||
*/
|
|
||||||
struct Image{
|
|
||||||
public:
|
|
||||||
Image( const FMatrixS &smat ):smat(smat), row_map( tmp_rowmap ){
|
|
||||||
}
|
}
|
||||||
Image( const FMatrixS &smat, const std::vector<unsigned> &row_map )
|
|
||||||
:smat(smat), row_map(row_map){
|
|
||||||
}
|
|
||||||
/*! \brief get sparse part of current row */
|
|
||||||
inline Line operator[]( size_t sidx ) const{
|
|
||||||
if( row_map.size() == 0 ) return smat[ sidx ];
|
|
||||||
else return smat[ row_map[ sidx ] ];
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
// used to set the simple case
|
|
||||||
std::vector<unsigned> tmp_rowmap;
|
|
||||||
const FMatrixS &smat;
|
|
||||||
const std::vector<unsigned> &row_map;
|
|
||||||
};
|
};
|
||||||
public:
|
public:
|
||||||
// -----Note: unless needed for hacking, these fields should not be accessed directly -----
|
struct RowIter{
|
||||||
/*! \brief row pointer of CSR sparse storage */
|
const REntry *dptr, *end;
|
||||||
std::vector<size_t> row_ptr;
|
inline bool Next( void ){
|
||||||
/*! \brief index of CSR format */
|
if( dptr == end ) return false;
|
||||||
std::vector<bst_uint> findex;
|
else{
|
||||||
/*! \brief value of CSR format */
|
++ dptr; return true;
|
||||||
std::vector<bst_float> fvalue;
|
}
|
||||||
|
}
|
||||||
|
inline bst_uint findex( void ) const{
|
||||||
|
return dptr->findex;
|
||||||
|
}
|
||||||
|
inline bst_float fvalue( void ) const{
|
||||||
|
return dptr->fvalue;
|
||||||
|
}
|
||||||
|
};
|
||||||
public:
|
public:
|
||||||
/*! \brief constructor */
|
/*! \brief constructor */
|
||||||
FMatrixS( void ){ this->Clear(); }
|
FMatrixS( void ){ this->Clear(); }
|
||||||
/*!
|
/*! \brief get number of rows */
|
||||||
* \brief get number of rows
|
|
||||||
* \return number of rows
|
|
||||||
*/
|
|
||||||
inline size_t NumRow( void ) const{
|
inline size_t NumRow( void ) const{
|
||||||
return row_ptr.size() - 1;
|
return row_ptr_.size() - 1;
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief get number of nonzero entries
|
* \brief get number of nonzero entries
|
||||||
* \return number of nonzero entries
|
* \return number of nonzero entries
|
||||||
*/
|
*/
|
||||||
inline size_t NumEntry( void ) const{
|
inline size_t NumEntry( void ) const{
|
||||||
return findex.size();
|
return row_data_.size();
|
||||||
}
|
}
|
||||||
/*! \brief clear the storage */
|
/*! \brief clear the storage */
|
||||||
inline void Clear( void ){
|
inline void Clear( void ){
|
||||||
row_ptr.resize( 0 );
|
row_ptr_.clear();
|
||||||
findex.resize( 0 );
|
row_ptr_.push_back( 0 );
|
||||||
fvalue.resize( 0 );
|
row_data_.clear();
|
||||||
row_ptr.push_back( 0 );
|
|
||||||
}
|
|
||||||
/*!
|
|
||||||
* \brief add a row to the matrix, but only accept features from fstart to fend
|
|
||||||
* \param feat sparse feature
|
|
||||||
* \param fstart start bound of feature
|
|
||||||
* \param fend end bound range of feature
|
|
||||||
* \return the row id of added line
|
|
||||||
*/
|
|
||||||
inline size_t AddRow( const Line &feat, unsigned fstart = 0, unsigned fend = UINT_MAX ){
|
|
||||||
utils::Assert( feat.len >= 0, "sparse feature length can not be negative" );
|
|
||||||
unsigned cnt = 0;
|
|
||||||
for( unsigned i = 0; i < feat.len; i ++ ){
|
|
||||||
if( feat.findex[i] < fstart || feat.findex[i] >= fend ) continue;
|
|
||||||
findex.push_back( feat.findex[i] );
|
|
||||||
fvalue.push_back( feat.fvalue[i] );
|
|
||||||
cnt ++;
|
|
||||||
}
|
|
||||||
row_ptr.push_back( row_ptr.back() + cnt );
|
|
||||||
return row_ptr.size() - 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*!
|
|
||||||
* \brief add a row to the matrix, with data stored in STL container
|
|
||||||
* \param findex feature index
|
|
||||||
* \param fvalue feature value
|
|
||||||
* \return the row id added line
|
|
||||||
*/
|
|
||||||
inline size_t AddRow( const std::vector<bst_uint> &findex,
|
|
||||||
const std::vector<bst_float> &fvalue ){
|
|
||||||
FMatrixS::Line l;
|
|
||||||
utils::Assert( findex.size() == fvalue.size() );
|
|
||||||
l.findex = &findex[0];
|
|
||||||
l.fvalue = &fvalue[0];
|
|
||||||
l.len = static_cast<bst_uint>( findex.size() );
|
|
||||||
return this->AddRow( l );
|
|
||||||
}
|
}
|
||||||
/*! \brief get sparse part of current row */
|
/*! \brief get sparse part of current row */
|
||||||
inline Line operator[]( size_t sidx ) const{
|
inline Line operator[]( size_t sidx ) const{
|
||||||
Line sp;
|
Line sp;
|
||||||
utils::Assert( !bst_debug || sidx < this->NumRow(), "row id exceed bound" );
|
utils::Assert( !bst_debug || sidx < this->NumRow(), "row id exceed bound" );
|
||||||
sp.len = static_cast<bst_uint>( row_ptr[ sidx + 1 ] - row_ptr[ sidx ] );
|
sp.len = static_cast<bst_uint>( row_ptr_[ sidx + 1 ] - row_ptr_[ sidx ] );
|
||||||
sp.findex = &findex[ row_ptr[ sidx ] ];
|
sp.data_ = &row_data_[ row_ptr_[ sidx ] ];
|
||||||
sp.fvalue = &fvalue[ row_ptr[ sidx ] ];
|
|
||||||
return sp;
|
return sp;
|
||||||
}
|
}
|
||||||
|
/*! \brief get row iterator*/
|
||||||
|
inline RowIter GetRow( size_t ridx ) const{
|
||||||
|
utils::Assert( !bst_debug || ridx < this->NumRow(), "row id exceed bound" );
|
||||||
|
RowIter it;
|
||||||
|
it.dptr = &row_data_[ row_ptr_[ridx] ] - 1;
|
||||||
|
it.dptr = &row_data_[ row_ptr_[ridx+1] ] - 1;
|
||||||
|
return it;
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief add a row to the matrix, with data stored in STL container
|
||||||
|
* \param findex feature index
|
||||||
|
* \param fvalue feature value
|
||||||
|
* \param fstart start bound of feature
|
||||||
|
* \param fend end bound range of feature
|
||||||
|
* \return the row id added line
|
||||||
|
*/
|
||||||
|
inline size_t AddRow( const std::vector<bst_uint> &findex,
|
||||||
|
const std::vector<bst_float> &fvalue,
|
||||||
|
unsigned fstart = 0, unsigned fend = UINT_MAX ){
|
||||||
|
utils::Assert( findex.size() == fvalue.size() );
|
||||||
|
unsigned cnt = 0;
|
||||||
|
for( size_t i = 0; i < findex.size(); i ++ ){
|
||||||
|
if( findex[i] < fstart || findex[i] >= fend ) continue;
|
||||||
|
REntry e; e.findex = findex[i]; e.fvalue = fvalue[i];
|
||||||
|
row_data_.push_back( e );
|
||||||
|
cnt ++;
|
||||||
|
}
|
||||||
|
row_ptr_.push_back( row_ptr_.back() + cnt );
|
||||||
|
return row_ptr_.size() - 2;
|
||||||
|
}
|
||||||
public:
|
public:
|
||||||
/*!
|
/*!
|
||||||
* \brief save data to binary stream
|
* \brief save data to binary stream
|
||||||
@ -148,10 +216,9 @@ namespace xgboost{
|
|||||||
inline void SaveBinary(utils::IStream &fo ) const{
|
inline void SaveBinary(utils::IStream &fo ) const{
|
||||||
size_t nrow = this->NumRow();
|
size_t nrow = this->NumRow();
|
||||||
fo.Write( &nrow, sizeof(size_t) );
|
fo.Write( &nrow, sizeof(size_t) );
|
||||||
fo.Write( &row_ptr[0], row_ptr.size() * sizeof(size_t) );
|
fo.Write( &row_ptr_[0], row_ptr_.size() * sizeof(size_t) );
|
||||||
if( findex.size() != 0 ){
|
if( row_data_.size() != 0 ){
|
||||||
fo.Write( &findex[0] , findex.size() * sizeof(bst_uint) );
|
fo.Write( &row_data_[0] , row_data_.size() * sizeof(REntry) );
|
||||||
fo.Write( &fvalue[0] , fvalue.size() * sizeof(bst_float) );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
@ -163,17 +230,20 @@ namespace xgboost{
|
|||||||
inline void LoadBinary( utils::IStream &fi ){
|
inline void LoadBinary( utils::IStream &fi ){
|
||||||
size_t nrow;
|
size_t nrow;
|
||||||
utils::Assert( fi.Read( &nrow, sizeof(size_t) ) != 0, "Load FMatrixS" );
|
utils::Assert( fi.Read( &nrow, sizeof(size_t) ) != 0, "Load FMatrixS" );
|
||||||
row_ptr.resize( nrow + 1 );
|
row_ptr_.resize( nrow + 1 );
|
||||||
utils::Assert( fi.Read( &row_ptr[0], row_ptr.size() * sizeof(size_t) ), "Load FMatrixS" );
|
utils::Assert( fi.Read( &row_ptr_[0], row_ptr_.size() * sizeof(size_t) ), "Load FMatrixS" );
|
||||||
|
|
||||||
findex.resize( row_ptr.back() ); fvalue.resize( row_ptr.back() );
|
row_data_.resize( row_ptr_.back() );
|
||||||
if( findex.size() != 0 ){
|
if( row_data_.size() != 0 ){
|
||||||
utils::Assert( fi.Read( &findex[0] , findex.size() * sizeof(bst_uint) ) , "Load FMatrixS" );
|
utils::Assert( fi.Read( &row_data_[0] , row_data_.size() * sizeof(REntry) ) , "Load FMatrixS" );
|
||||||
utils::Assert( fi.Read( &fvalue[0] , fvalue.size() * sizeof(bst_float) ), "Load FMatrixS" );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
private:
|
||||||
|
/*! \brief row pointer of CSR sparse storage */
|
||||||
|
std::vector<size_t> row_ptr_;
|
||||||
|
/*! \brief data in the row */
|
||||||
|
std::vector<REntry> row_data_;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -191,7 +191,7 @@ namespace xgboost{
|
|||||||
*/
|
*/
|
||||||
inline void DoBoost( std::vector<float> &grad,
|
inline void DoBoost( std::vector<float> &grad,
|
||||||
std::vector<float> &hess,
|
std::vector<float> &hess,
|
||||||
const booster::FMatrixS::Image &feats,
|
const booster::FMatrixS &feats,
|
||||||
const std::vector<unsigned> &root_index ) {
|
const std::vector<unsigned> &root_index ) {
|
||||||
booster::IBooster *bst = this->GetUpdateBooster();
|
booster::IBooster *bst = this->GetUpdateBooster();
|
||||||
bst->DoBoost( grad, hess, feats, root_index );
|
bst->DoBoost( grad, hess, feats, root_index );
|
||||||
|
|||||||
@ -117,8 +117,7 @@ namespace xgboost{
|
|||||||
this->GetGradient( preds, train_->labels, grad, hess );
|
this->GetGradient( preds, train_->labels, grad, hess );
|
||||||
|
|
||||||
std::vector<unsigned> root_index;
|
std::vector<unsigned> root_index;
|
||||||
booster::FMatrixS::Image train_image( train_->data );
|
base_model.DoBoost(grad,hess,train_->data,root_index);
|
||||||
base_model.DoBoost(grad,hess,train_image,root_index);
|
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief evaluate the model for specific iteration
|
* \brief evaluate the model for specific iteration
|
||||||
|
|||||||
@ -132,8 +132,8 @@ namespace xgboost{
|
|||||||
for( size_t i = 0; i < data.NumRow(); i ++ ){
|
for( size_t i = 0; i < data.NumRow(); i ++ ){
|
||||||
booster::FMatrixS::Line sp = data[i];
|
booster::FMatrixS::Line sp = data[i];
|
||||||
for( unsigned j = 0; j < sp.len; j ++ ){
|
for( unsigned j = 0; j < sp.len; j ++ ){
|
||||||
if( num_feature <= sp.findex[j] ){
|
if( num_feature <= sp[j].findex ){
|
||||||
num_feature = sp.findex[j] + 1;
|
num_feature = sp[j].findex + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user