init commit

This commit is contained in:
tqchen
2014-02-06 15:50:50 -08:00
parent 225aa9841b
commit 57fef8bc54
11 changed files with 703 additions and 0 deletions

13
booster/xgboost.cpp Normal file
View File

@@ -0,0 +1,13 @@
/*!
* \file xgboost.cpp
* \brief bootser implementations
* \author Tianqi Chen: tianqi.tchen@gmail.com
*/
// implementation of boosters go to here
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#include <climits>
#include "xgboost.h"
#include "../utils/xgboost_utils.h"

11
booster/xgboost.cpp~ Normal file
View File

@@ -0,0 +1,11 @@
/*!
* \file apex_booster.cpp
* \brief bootser implementation
* \author Tianqi Chen: tqchen@apex.sjtu.edu.cn
*/
// implementation of boosters go to here
#define _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_DEPRECATE
#include <climits>
#include "apex_booster.h"
#include "../../apex-utils/apex_utils.h"

96
booster/xgboost.h Normal file
View File

@@ -0,0 +1,96 @@
#ifndef _XGBOOST_H_
#define _XGBOOST_H_
/*!
* \file xgboost.h
* \brief the general gradient boosting interface
* \author Tianqi Chen: tianqi.tchen@gmail.com
*/
#include <vector>
#include "../utils/xgboost_utils.h"
#include "../utils/xgboost_stream.h"
#include "xgboost_data.h"
/*! \brief namespace for xboost package */
namespace xgboost{
namespace booster{
/*! \brief interface of a gradient boosting learner */
class IBooster{
public:
// interface for model setting and loading
// calling procedure:
// (1) booster->SetParam to setting necessary parameters
// (2) if it is first time usage of the model: call booster->
// if new model to be trained, trainer->init_trainer
// elseif just to load from file, trainer->load_model
// trainer->do_boost
// trainer->save_model
/*!
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
virtual void SetParam( const char *name, const char *val ) = 0;
/*!
* \brief load model from stream
* \param fi input stream
*/
virtual void LoadModel( utils::IStream &fi ) = 0;
/*!
* \brief save model to stream
* \param fo output stream
*/
virtual void SaveModel( utils::IStream &fo ) const = 0;
/*!
* \brief initialize solver before training, called before training
* this function is reserved for solver to allocate necessary space and do other preparations
*/
virtual void InitModel( void ) = 0;
public:
/*!
* \brief do gradient boost training for one step, using the information given
* \param grad first order gradient of each instance
* \param hess second order gradient of each instance
* \param feats features of each instance
* \param root_index pre-partitioned root index of each instance,
* root_index.size() can be 0 which indicates that no pre-partition involved
*/
virtual void DoBoost( std::vector<float> &grad,
std::vector<float> &hess,
const FMatrixS::Image &feats,
const std::vector<unsigned> &root_index ) = 0;
/*!
* \brief predict values for given sparse feature
* NOTE: in tree implementation, this is not threadsafe
* \param feat vector in sparse format
* \param rid root id of current instance, default = 0
* \return prediction
*/
virtual float Predict( const FMatrixS::Line &feat, unsigned rid = 0 ){
utils::Error( "not implemented" );
return 0.0f;
}
/*!
* \brief predict values for given dense feature
* \param feat feature vector in dense format
* \param funknown indicator that the feature is missing
* \param rid root id of current instance, default = 0
* \return prediction
*/
virtual float Predict( const std::vector<float> &feat,
const std::vector<bool> &funknown,
unsigned rid = 0 ){
utils::Error( "not implemented" );
return 0.0f;
}
/*!
* \brief print information
* \param fo output stream
*/
virtual void PrintInfo( FILE *fo ){}
public:
virtual ~IBooster( void ){}
};
};
};
#endif

93
booster/xgboost.h~ Normal file
View File

@@ -0,0 +1,93 @@
#ifndef _XGBOOST_H_
#define _XGBOOST_H_
/*!
* \file xgboost.h
* \brief the general gradient boosting interface
* \author Tianqi Chen: tianqi.tchen@gmail.com
*/
#include "../utils/xgboost_utils.h"
/*! \brief namespace for xboost package */
namespace xgboost{
namespace booster{
/*! \brief interface of a gradient boosting learner */
class IBooster{
public:
// interface for model setting and loading
// calling procedure:
// (1) booster->SetParam to setting necessary parameters
// (2) if it is first time usage of the model: call booster->
// if new model to be trained, trainer->init_trainer
// elseif just to load from file, trainer->load_model
// trainer->do_boost
// trainer->save_model
/*!
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
virtual void SetParam( const char *name, const char *val ) = 0;
/*!
* \brief load model from stream
* \param fi input stream
*/
virtual void LoadModel( utils::IStream &fi ) = 0;
/*!
* \brief save model to stream
* \param fo output stream
*/
virtual void SaveModel( utils::IStream &fo ) const = 0;
/*!
* \brief initialize solver before training, called before training
* this function is reserved for solver to allocate necessary space and do other preparations
*/
virtual void InitModel( void ) = 0;
public:
/*!
* \brief do gradient boost training for one step, using the information given
* \param grad first order gradient of each instance
* \param hess second order gradient of each instance
* \param feats features of each instance
* \param root_index pre-partitioned root index of each instance,
* root_index.size() can be 0 which indicates that no pre-partition involved
*/
virtual void DoBoost( std::vector<float> &grad,
std::vector<float> &hess,
const FMatrixS::Image &feats,
const std::vector<unsigned> &root_index ) = 0;
/*!
* \brief predict values for given sparse feature
* NOTE: in tree implementation, this is not threadsafe
* \param feat vector in sparse format
* \param rid root id of current instance, default = 0
* \return prediction
*/
virtual float Predict( const FMatrixS::Line &feat, unsigned rid = 0 ){
utils::error( "not implemented" );
return 0.0f;
}
/*!
* \brief predict values for given dense feature
* \param feat feature vector in dense format
* \param funknown indicator that the feature is missing
* \param rid root id of current instance, default = 0
* \return prediction
*/
virtual float Predict( const std::vector<float> &feat,
const std::vector<bool> &funknown,
unsigned rid = 0 ){
utils::error( "not implemented" );
return 0.0f;
}
/*!
* \brief print information
* \param fo output stream
*/
virtual void PrintInfo( FILE *fo ){}
public:
virtual ~IBooster( void ){}
};
};
};
#endif

118
booster/xgboost_data.h Normal file
View File

@@ -0,0 +1,118 @@
#ifndef _XGBOOST_DATA_H_
#define _XGBOOST_DATA_H_
/*!
* \file xgboost_data.h
* \brief the input data structure for gradient boosting
* \author Tianqi Chen: tianqi.tchen@gmail.com
*/
#include <vector>
#include "../utils/xgboost_utils.h"
namespace xgboost{
namespace booster{
/*! \brief interger type used in boost */
typedef int bst_int;
/*! \brief unsigned interger type used in boost */
typedef unsigned bst_uint;
/*! \brief float type used in boost */
typedef float bst_float;
/*! \brief debug option for booster */
const bool bst_debug = false;
};
};
namespace xgboost{
namespace booster{
/*!
* \brief auxlilary feature matrix to store training instance, in sparse CSR format
*/
class FMatrixS{
public:
/*! \brief one row of sparse feature matrix */
struct Line{
/*! \brief array of feature index */
const bst_uint *findex;
/*! \brief array of feature value */
const bst_float *fvalue;
/*! \brief size of the data */
bst_int len;
};
/*!
* \brief remapped image of sparse matrix,
* allows use a subset of sparse matrix, by specifying a rowmap
*/
struct Image{
public:
Image( const FMatrixS &smat ):smat(smat), row_map( tmp_rowmap ){
}
Image( const FMatrixS &smat, const std::vector<unsigned> &row_map )
:smat(smat), row_map(row_map){
}
/*! \brief get sparse part of current row */
inline Line operator[]( size_t sidx ) const{
if( row_map.size() == 0 ) return smat[ sidx ];
else return smat[ row_map[ sidx ] ];
}
private:
// used to set the simple case
std::vector<unsigned> tmp_rowmap;
const FMatrixS &smat;
const std::vector<unsigned> &row_map;
};
public:
// -----Note: unless needed for hacking, these fields should not be accessed directly -----
/*! \brief row pointer of CSR sparse storage */
std::vector<size_t> row_ptr;
/*! \brief index of CSR format */
std::vector<bst_uint> findex;
/*! \brief value of CSR format */
std::vector<bst_float> fvalue;
public:
/*! \brief constructor */
FMatrixS( void ){ this->Clear(); }
/*!
* \brief get number of rows
* \return number of rows
*/
inline size_t NumRow( void ) const{
return row_ptr.size() - 1;
}
/*! \brief clear the storage */
inline void Clear( void ){
row_ptr.resize( 0 );
findex.resize( 0 );
fvalue.resize( 0 );
row_ptr.push_back( 0 );
}
/*!
* \brief add a row to the matrix, but only accept features from fstart to fend
* \param feat sparse feature
* \param fstart start bound of feature
* \param fend end bound range of feature
* \return the row id addted
*/
inline size_t AddRow( const Line &feat, unsigned fstart = 0, unsigned fend = UINT_MAX ){
utils::Assert( feat.len >= 0, "sparse feature length can not be negative" );
unsigned cnt = 0;
for( int i = 0; i < feat.len; i ++ ){
if( feat.findex[i] < fstart || feat.findex[i] >= fend ) continue;
findex.push_back( feat.findex[i] );
fvalue.push_back( feat.fvalue[i] );
cnt ++;
}
row_ptr.push_back( row_ptr.back() + cnt );
return row_ptr.size() - 2;
}
/*! \brief get sparse part of current row */
inline Line operator[]( size_t sidx ) const{
Line sp;
utils::Assert( !bst_debug || sidx < this->NumRow(), "row id exceed bound" );
sp.len = row_ptr[ sidx + 1 ] - row_ptr[ sidx ];
sp.findex = &findex[ row_ptr[ sidx ] ];
sp.fvalue = &fvalue[ row_ptr[ sidx ] ];
return sp;
}
};
};
};
#endif

118
booster/xgboost_data.h~ Normal file
View File

@@ -0,0 +1,118 @@
#ifndef _XGBOOST_DATA_H_
#define _XGBOOST_DATA_H_
/*!
* \file xgboost_data.h
* \brief the input data structure for gradient boosting
* \author Tianqi Chen: tianqi.tchen@gmail.com
*/
#include <vector>
#include "../utils/xgboost_utils.h"
namespace xgboost{
namespace booster{
/*! \brief interger type used in boost */
typedef int bst_int;
/*! \brief unsigned interger type used in boost */
typedef unsigned bst_uint;
/*! \brief float type used in boost */
typedef float bst_float;
/*! \brief debug option for booster */
const bool bst_debug = false;
};
};
namespace xgboost{
namespace booster{
/*!
* \brief auxlilary feature matrix to store training instance, in sparse CSR format
*/
class FMatrixS{
public:
/*! \brief one row of sparse feature matrix */
struct Line{
/*! \brief array of feature index */
const bst_uint *findex;
/*! \brief array of feature value */
const bst_float *fvalue;
/*! \brief size of the data */
bst_int len;
};
/*!
* \brief remapped image of sparse matrix,
* allows use a subset of sparse matrix, by specifying a rowmap
*/
struct Image{
public:
Image( const FMatrixS &smat ):smat(smat), row_map( tmp_rowmap ){
}
Image( const FMatrixS &smat, const std::vector<unsigned> &row_map )
:smat(smat), row_map(row_map){
}
/*! \brief get sparse part of current row */
inline Line operator[]( size_t sidx ) const{
if( row_map.size() == 0 ) return smat[ sidx ];
else return smat[ row_map[ sidx ] ];
}
private:
// used to set the simple case
std::vector<unsigned> tmp_rowmap;
const FMatrixS &smat;
const std::vector<unsigned> &row_map;
};
public:
// -----Note: unless needed for hacking, these fields should not be accessed directly -----
/*! \brief row pointer of CSR sparse storage */
std::vector<size_t> row_ptr;
/*! \brief index of CSR format */
std::vector<bst_uint> findex;
/*! \brief value of CSR format */
std::vector<bst_float> fvalue;
public:
/*! \brief constructor */
FMatrixS( void ){ this->clear(); }
/*!
* \brief get number of rows
* \return number of rows
*/
inline size_t NumRow( void ) const{
return row_ptr.size() - 1;
}
/*! \brief clear the storage */
inline void Clear( void ){
row_ptr.resize( 0 );
findex.resize( 0 );
fvalue.resize( 0 );
row_ptr.push_back( 0 );
}
/*!
* \brief add a row to the matrix, but only accept features from fstart to fend
* \param feat sparse feature
* \param fstart start bound of feature
* \param fend end bound range of feature
* \return the row id addted
*/
inline size_t AddRow( const Line &feat, unsigned fstart = 0, unsigned fend = UINT_MAX ){
utils::assert( feat.len >= 0, "sparse feature length can not be negative" );
unsigned cnt = 0;
for( int i = 0; i < feat.len; i ++ ){
if( feat.findex[i] < fstart || feat.findex[i] >= fend ) continue;
findex.push_back( feat.findex[i] );
fvalue.push_back( feat.fvalue[i] );
cnt ++;
}
row_ptr.push_back( row_ptr.back() + cnt );
return row_ptr.size() - 2;
}
/*! \brief get sparse part of current row */
inline Line operator[]( size_t sidx ) const{
Line sp;
utils::assert( !bst_debug || sidx < num_row(), "row id exceed bound" );
sp.len = row_ptr[ sidx + 1 ] - row_ptr[ sidx ];
sp.findex = &findex[ row_ptr[ sidx ] ];
sp.fvalue = &fvalue[ row_ptr[ sidx ] ];
return sp;
}
};
};
};
#endif