finish python lib
This commit is contained in:
parent
20de7f8f97
commit
adc9400736
@ -1,30 +1,35 @@
|
|||||||
# module for xgboost
|
# module for xgboost
|
||||||
import ctypes
|
import ctypes
|
||||||
# optinally have scipy sparse, though not necessary
|
# optinally have scipy sparse, though not necessary
|
||||||
|
import numpy as np
|
||||||
import scipy.sparse as scp
|
import scipy.sparse as scp
|
||||||
|
# set this line correctly
|
||||||
|
XGBOOST_PATH = './libxgboostpy.so'
|
||||||
|
|
||||||
# entry type of sparse matrix
|
# entry type of sparse matrix
|
||||||
class REntry(ctypes.Structure):
|
class REntry(ctypes.Structure):
|
||||||
_fields_ = [("findex", ctypes.c_uint), ("fvalue", ctypes.c_float) ]
|
_fields_ = [("findex", ctypes.c_uint), ("fvalue", ctypes.c_float) ]
|
||||||
|
|
||||||
# load in xgboost library
|
# load in xgboost library
|
||||||
xglib = ctypes.cdll.LoadLibrary('./libxgboostpy.so')
|
xglib = ctypes.cdll.LoadLibrary(XGBOOST_PATH)
|
||||||
|
|
||||||
xglib.XGDMatrixCreate.restype = ctypes.c_void_p
|
xglib.XGDMatrixCreate.restype = ctypes.c_void_p
|
||||||
xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
|
xglib.XGDMatrixNumRow.restype = ctypes.c_ulong
|
||||||
xglib.XGDMatrixGetLabel.restype = ctypes.POINTER( ctypes.c_float )
|
xglib.XGDMatrixGetLabel.restype = ctypes.POINTER( ctypes.c_float )
|
||||||
xglib.XGDMatrixGetRow.restype = ctypes.POINTER( REntry )
|
xglib.XGDMatrixGetRow.restype = ctypes.POINTER( REntry )
|
||||||
|
xglib.XGBoosterPredict.restype = ctypes.POINTER( ctypes.c_float )
|
||||||
|
|
||||||
# data matrix used in xgboost
|
# data matrix used in xgboost
|
||||||
class DMatrix:
|
class DMatrix:
|
||||||
# constructor
|
# constructor
|
||||||
def __init__(self, data=None, label=None):
|
def __init__(self, data=None, label=None):
|
||||||
self.handle = xglib.XGDMatrixCreate();
|
self.handle = xglib.XGDMatrixCreate()
|
||||||
if data == None:
|
if data == None:
|
||||||
return
|
return
|
||||||
if type(data) is str:
|
if isinstance(data,str):
|
||||||
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data), 1)
|
xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data), 1)
|
||||||
elif type(data) is scp.csr_matrix:
|
|
||||||
|
elif isinstance(data,scp.csr_matrix):
|
||||||
self.__init_from_csr(data)
|
self.__init_from_csr(data)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
@ -54,7 +59,13 @@ class DMatrix:
|
|||||||
xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname), int(silent))
|
xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname), int(silent))
|
||||||
# set label of dmatrix
|
# set label of dmatrix
|
||||||
def set_label(self, label):
|
def set_label(self, label):
|
||||||
xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label) );
|
xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label) )
|
||||||
|
# set group size of dmatrix, used for rank
|
||||||
|
def set_group(self, group):
|
||||||
|
xglib.XGDMatrixSetGroup(self.handle, (ctypes.c_uint*len(group))(*group), len(group) )
|
||||||
|
# set weight of each instances
|
||||||
|
def set_weight(self, weight):
|
||||||
|
xglib.XGDMatrixSetWeight(self.handle, (ctypes.c_uint*len(weight))(*weight), len(weight) )
|
||||||
# get label from dmatrix
|
# get label from dmatrix
|
||||||
def get_label(self):
|
def get_label(self):
|
||||||
length = ctypes.c_ulong()
|
length = ctypes.c_ulong()
|
||||||
@ -66,16 +77,57 @@ class DMatrix:
|
|||||||
def num_row(self):
|
def num_row(self):
|
||||||
return xglib.XGDMatrixNumRow(self.handle)
|
return xglib.XGDMatrixNumRow(self.handle)
|
||||||
# append a row to DMatrix
|
# append a row to DMatrix
|
||||||
def add_row(self, row, label):
|
def add_row(self, row):
|
||||||
xglib.XGDMatrixAddRow(self.handle, (REntry*len(row))(*row), len(row), label )
|
xglib.XGDMatrixAddRow(self.handle, (REntry*len(row))(*row), len(row) )
|
||||||
# get n-throw from DMatrix
|
# get n-throw from DMatrix
|
||||||
def __getitem__(self, ridx):
|
def __getitem__(self, ridx):
|
||||||
length = ctypes.c_ulong()
|
length = ctypes.c_ulong()
|
||||||
row = xglib.XGDMatrixGetRow(self.handle, ridx, ctypes.byref(length) );
|
row = xglib.XGDMatrixGetRow(self.handle, ridx, ctypes.byref(length) );
|
||||||
return [ (int(row[i].findex),row[i].fvalue) for i in xrange(length.value) ]
|
return [ (int(row[i].findex),row[i].fvalue) for i in xrange(length.value) ]
|
||||||
|
|
||||||
|
class Booster:
|
||||||
|
"""learner class """
|
||||||
|
def __init__(self, params, cache=[]):
|
||||||
|
""" constructor, param: """
|
||||||
|
for d in cache:
|
||||||
|
assert isinstance(d,DMatrix)
|
||||||
|
dmats = ( ctypes.c_void_p * len(cache) )(*[ ctypes.c_void_p(d.handle) for d in cache])
|
||||||
|
self.handle = xglib.XGBoosterCreate( dmats, len(cache) )
|
||||||
|
for k, v in params.iteritems():
|
||||||
|
xglib.XGBoosterSetParam( self.handle, ctypes.c_char_p(k), ctypes.c_char_p(str(v)) )
|
||||||
|
def update(self, dtrain):
|
||||||
|
""" update """
|
||||||
|
assert isinstance(dtrain, DMatrix)
|
||||||
|
xglib.XGBoosterUpdateOneIter( self.handle, dtrain.handle )
|
||||||
|
def eval_set(self, evals, it = 0):
|
||||||
|
for d in evals:
|
||||||
|
assert isinstance(d[0], DMatrix)
|
||||||
|
assert isinstance(d[1], str)
|
||||||
|
dmats = ( ctypes.c_void_p * len(evals) )(*[ ctypes.c_void_p(d[0].handle) for d in evals])
|
||||||
|
evnames = ( ctypes.c_char_p * len(evals) )(*[ ctypes.c_char_p(d[1]) for d in evals])
|
||||||
|
xglib.XGBoosterEvalOneIter( self.handle, it, dmats, evnames, len(evals) )
|
||||||
|
def eval(self, mat, name = 'eval', it = 0 ):
|
||||||
|
self.eval_set( [(mat,name)], it)
|
||||||
|
def predict(self, data):
|
||||||
|
length = ctypes.c_ulong()
|
||||||
|
preds = xglib.XGBoosterPredict( self.handle, data.handle, ctypes.byref(length))
|
||||||
|
return [ preds[i] for i in xrange(length.value) ]
|
||||||
|
def save_model(self, fname):
|
||||||
|
""" save model to file """
|
||||||
|
xglib.XGBoosterSaveModel( self.handle, ctypes.c_char_p(fname) )
|
||||||
|
def load_model(self, fname):
|
||||||
|
"""load model from file"""
|
||||||
|
xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname) )
|
||||||
|
def dump_model(self, fname, fmap=''):
|
||||||
|
"""dump model into text file"""
|
||||||
|
xglib.XGBoosterDumpModel( self.handle, ctypes.c_char_p(fname), ctypes.c_char_p(fmap) )
|
||||||
|
|
||||||
|
def train(params, dtrain, num_boost_round = 10, evals = []):
|
||||||
|
""" train a booster with given paramaters """
|
||||||
|
bst = Booster(params, [dtrain] )
|
||||||
|
for i in xrange(num_boost_round):
|
||||||
|
bst.update( dtrain )
|
||||||
|
if len(evals) != 0:
|
||||||
|
bst.eval_set( evals, i )
|
||||||
|
return bst
|
||||||
|
|
||||||
mat = DMatrix('xx.buffer')
|
|
||||||
print mat.num_row()
|
|
||||||
mat.clear()
|
|
||||||
|
|||||||
@ -56,20 +56,63 @@ namespace xgboost{
|
|||||||
this->info.labels.resize( len );
|
this->info.labels.resize( len );
|
||||||
memcpy( &(this->info).labels[0], label, sizeof(float)*len );
|
memcpy( &(this->info).labels[0], label, sizeof(float)*len );
|
||||||
}
|
}
|
||||||
|
inline void SetGroup( const unsigned *group, size_t len ){
|
||||||
|
this->info.group_ptr.resize( len + 1 );
|
||||||
|
this->info.group_ptr[0] = 0;
|
||||||
|
for( size_t i = 0; i < len; ++ i ){
|
||||||
|
this->info.group_ptr[i+1] = this->info.group_ptr[i]+group[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inline void SetWeight( const float *weight, size_t len ){
|
||||||
|
this->info.weights.resize( len );
|
||||||
|
memcpy( &(this->info).weights[0], weight, sizeof(float)*len );
|
||||||
|
}
|
||||||
inline const float* GetLabel( size_t* len ) const{
|
inline const float* GetLabel( size_t* len ) const{
|
||||||
*len = this->info.labels.size();
|
*len = this->info.labels.size();
|
||||||
return &(this->info.labels[0]);
|
return &(this->info.labels[0]);
|
||||||
}
|
}
|
||||||
inline void InitTrain(void){
|
inline void CheckInit(void){
|
||||||
if(!this->data.HaveColAccess()) this->data.InitData();
|
if(!this->data.HaveColAccess()){
|
||||||
|
this->data.InitData();
|
||||||
|
}
|
||||||
utils::Assert( this->data.NumRow() == this->info.labels.size(), "DMatrix: number of labels must match number of rows in matrix");
|
utils::Assert( this->data.NumRow() == this->info.labels.size(), "DMatrix: number of labels must match number of rows in matrix");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class Booster: public xgboost::regrank::RegRankBoostLearner{
|
||||||
|
private:
|
||||||
|
bool init_trainer, init_model;
|
||||||
|
public:
|
||||||
|
Booster(const std::vector<const regrank::DMatrix *> mats){
|
||||||
|
silent = 1;
|
||||||
|
init_trainer = false;
|
||||||
|
init_model = false;
|
||||||
|
this->SetCacheData(mats);
|
||||||
|
}
|
||||||
|
inline void CheckInit(void){
|
||||||
|
if( !init_trainer ){
|
||||||
|
this->InitTrainer(); init_trainer = true;
|
||||||
|
}
|
||||||
|
if( !init_model ){
|
||||||
|
this->InitModel(); init_model = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inline void LoadModel( const char *fname ){
|
||||||
|
xgboost::regrank::RegRankBoostLearner::LoadModel(fname);
|
||||||
|
this->init_model = true;
|
||||||
|
}
|
||||||
|
const float *Pred( const DMatrix &dmat, size_t *len ){
|
||||||
|
this->Predict( this->preds_, dmat );
|
||||||
|
*len = this->preds_.size();
|
||||||
|
return &this->preds_[0];
|
||||||
|
}
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
using namespace xgboost::python;
|
using namespace xgboost::python;
|
||||||
|
|
||||||
|
|
||||||
extern "C"{
|
extern "C"{
|
||||||
void* XGDMatrixCreate( void ){
|
void* XGDMatrixCreate( void ){
|
||||||
return new DMatrix();
|
return new DMatrix();
|
||||||
@ -94,6 +137,12 @@ extern "C"{
|
|||||||
void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
|
void XGDMatrixSetLabel( void *handle, const float *label, size_t len ){
|
||||||
static_cast<DMatrix*>(handle)->SetLabel(label,len);
|
static_cast<DMatrix*>(handle)->SetLabel(label,len);
|
||||||
}
|
}
|
||||||
|
void XGDMatrixSetWeight( void *handle, const float *weight, size_t len ){
|
||||||
|
static_cast<DMatrix*>(handle)->SetWeight(weight,len);
|
||||||
|
}
|
||||||
|
void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len ){
|
||||||
|
static_cast<DMatrix*>(handle)->SetGroup(group,len);
|
||||||
|
}
|
||||||
const float* XGDMatrixGetLabel( const void *handle, size_t* len ){
|
const float* XGDMatrixGetLabel( const void *handle, size_t* len ){
|
||||||
return static_cast<const DMatrix*>(handle)->GetLabel(len);
|
return static_cast<const DMatrix*>(handle)->GetLabel(len);
|
||||||
}
|
}
|
||||||
@ -109,5 +158,54 @@ extern "C"{
|
|||||||
const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len){
|
const XGEntry* XGDMatrixGetRow(void *handle, unsigned ridx, size_t* len){
|
||||||
return static_cast<DMatrix*>(handle)->GetRow(ridx, len);
|
return static_cast<DMatrix*>(handle)->GetRow(ridx, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// xgboost implementation
|
||||||
|
void *XGBoosterCreate( void *dmats[], size_t len ){
|
||||||
|
std::vector<const xgboost::regrank::DMatrix*> mats;
|
||||||
|
for( size_t i = 0; i < len; ++i ){
|
||||||
|
mats.push_back( static_cast<DMatrix*>(dmats[i]) );
|
||||||
|
}
|
||||||
|
return new Booster( mats );
|
||||||
|
}
|
||||||
|
void XGBoosterSetParam( void *handle, const char *name, const char *value ){
|
||||||
|
static_cast<Booster*>(handle)->SetParam( name, value );
|
||||||
|
}
|
||||||
|
void XGBoosterUpdateOneIter( void *handle, void *dtrain ){
|
||||||
|
Booster *bst = static_cast<Booster*>(handle);
|
||||||
|
DMatrix *dtr = static_cast<DMatrix*>(dtrain);
|
||||||
|
bst->CheckInit(); dtr->CheckInit();
|
||||||
|
bst->UpdateOneIter( *dtr );
|
||||||
|
}
|
||||||
|
void XGBoosterEvalOneIter( void *handle, int iter, void *dmats[], const char *evnames[], size_t len ){
|
||||||
|
Booster *bst = static_cast<Booster*>(handle);
|
||||||
|
bst->CheckInit();
|
||||||
|
|
||||||
|
std::vector<std::string> names;
|
||||||
|
std::vector<const xgboost::regrank::DMatrix*> mats;
|
||||||
|
for( size_t i = 0; i < len; ++i ){
|
||||||
|
mats.push_back( static_cast<DMatrix*>(dmats[i]) );
|
||||||
|
names.push_back( std::string( evnames[i]) );
|
||||||
|
}
|
||||||
|
bst->EvalOneIter( iter, mats, names, stdout );
|
||||||
|
}
|
||||||
|
const float *XGBoosterPredict( void *handle, void *dmat, size_t *len ){
|
||||||
|
return static_cast<Booster*>(handle)->Pred( *static_cast<DMatrix*>(dmat), len );
|
||||||
|
}
|
||||||
|
void XGBoosterLoadModel( void *handle, const char *fname ){
|
||||||
|
static_cast<Booster*>(handle)->LoadModel( fname );
|
||||||
|
}
|
||||||
|
void XGBoosterSaveModel( const void *handle, const char *fname ){
|
||||||
|
static_cast<const Booster*>(handle)->SaveModel( fname );
|
||||||
|
}
|
||||||
|
void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap ){
|
||||||
|
using namespace xgboost::utils;
|
||||||
|
FILE *fo = FopenCheck( fname, "w" );
|
||||||
|
FeatMap featmap;
|
||||||
|
if( strlen(fmap) != 0 ){
|
||||||
|
featmap.LoadText( fmap );
|
||||||
|
}
|
||||||
|
static_cast<Booster*>(handle)->DumpModel( fo, featmap, false );
|
||||||
|
fclose( fo );
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -52,10 +52,24 @@ extern "C"{
|
|||||||
/*!
|
/*!
|
||||||
* \brief set label of the training matrix
|
* \brief set label of the training matrix
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
* \param data array of row content
|
* \param label pointer to label
|
||||||
* \param len length of array
|
* \param len length of array
|
||||||
*/
|
*/
|
||||||
void XGDMatrixSetLabel( void *handle, const float *label, size_t len );
|
void XGDMatrixSetLabel( void *handle, const float *label, size_t len );
|
||||||
|
/*!
|
||||||
|
* \brief set label of the training matrix
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param group pointer to group size
|
||||||
|
* \param len length of array
|
||||||
|
*/
|
||||||
|
void XGDMatrixSetGroup( void *handle, const unsigned *group, size_t len );
|
||||||
|
/*!
|
||||||
|
* \brief set weight of each instacne
|
||||||
|
* \param handle a instance of data matrix
|
||||||
|
* \param weight data pointer to weights
|
||||||
|
* \param len length of array
|
||||||
|
*/
|
||||||
|
void XGDMatrixSetWeight( void *handle, const float *weight, size_t len );
|
||||||
/*!
|
/*!
|
||||||
* \brief get label set from matrix
|
* \brief get label set from matrix
|
||||||
* \param handle a instance of data matrix
|
* \param handle a instance of data matrix
|
||||||
@ -94,7 +108,7 @@ extern "C"{
|
|||||||
* \param dmats matrices that are set to be cached
|
* \param dmats matrices that are set to be cached
|
||||||
* \param create a booster
|
* \param create a booster
|
||||||
*/
|
*/
|
||||||
void *CreateXGBooster( void**dmats, size_t len );
|
void *XGBoosterCreate( void* dmats[], size_t len );
|
||||||
/*!
|
/*!
|
||||||
* \brief set parameters
|
* \brief set parameters
|
||||||
* \param handle handle
|
* \param handle handle
|
||||||
@ -135,7 +149,14 @@ extern "C"{
|
|||||||
* \param handle handle
|
* \param handle handle
|
||||||
* \param fname file name
|
* \param fname file name
|
||||||
*/
|
*/
|
||||||
void XGBoosterSaveModel( void *handle, const char *fname );
|
void XGBoosterSaveModel( const void *handle, const char *fname );
|
||||||
|
/*!
|
||||||
|
* \brief dump model into text file
|
||||||
|
* \param handle handle
|
||||||
|
* \param fname file name
|
||||||
|
* \param fmap name to fmap can be empty string
|
||||||
|
*/
|
||||||
|
void XGBoosterDumpModel( void *handle, const char *fname, const char *fmap );
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -31,7 +31,7 @@ namespace xgboost{
|
|||||||
* \brief a regression booter associated with training and evaluating data
|
* \brief a regression booter associated with training and evaluating data
|
||||||
* \param mats array of pointers to matrix whose prediction result need to be cached
|
* \param mats array of pointers to matrix whose prediction result need to be cached
|
||||||
*/
|
*/
|
||||||
RegRankBoostLearner(const std::vector<const DMatrix *> mats){
|
RegRankBoostLearner(const std::vector<const DMatrix *>& mats){
|
||||||
silent = 0;
|
silent = 0;
|
||||||
obj_ = NULL;
|
obj_ = NULL;
|
||||||
name_obj_ = "reg";
|
name_obj_ = "reg";
|
||||||
@ -45,7 +45,7 @@ namespace xgboost{
|
|||||||
* data matrices to continue training otherwise it will cause error
|
* data matrices to continue training otherwise it will cause error
|
||||||
* \param mats array of pointers to matrix whose prediction result need to be cached
|
* \param mats array of pointers to matrix whose prediction result need to be cached
|
||||||
*/
|
*/
|
||||||
inline void SetCacheData(const std::vector<const DMatrix *> mats){
|
inline void SetCacheData(const std::vector<const DMatrix *>& mats){
|
||||||
// estimate feature bound
|
// estimate feature bound
|
||||||
int num_feature = 0;
|
int num_feature = 0;
|
||||||
// assign buffer index
|
// assign buffer index
|
||||||
@ -53,6 +53,11 @@ namespace xgboost{
|
|||||||
|
|
||||||
utils::Assert( cache_.size() == 0, "can only call cache data once" );
|
utils::Assert( cache_.size() == 0, "can only call cache data once" );
|
||||||
for( size_t i = 0; i < mats.size(); ++i ){
|
for( size_t i = 0; i < mats.size(); ++i ){
|
||||||
|
bool dupilicate = false;
|
||||||
|
for( size_t j = 0; j < i; ++ j ){
|
||||||
|
if( mats[i] == mats[j] ) dupilicate = true;
|
||||||
|
}
|
||||||
|
if( dupilicate ) continue;
|
||||||
cache_.push_back( CacheEntry( mats[i], buffer_size ) );
|
cache_.push_back( CacheEntry( mats[i], buffer_size ) );
|
||||||
buffer_size += static_cast<unsigned>(mats[i]->Size());
|
buffer_size += static_cast<unsigned>(mats[i]->Size());
|
||||||
num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol()));
|
num_feature = std::max(num_feature, (int)(mats[i]->data.NumCol()));
|
||||||
@ -105,9 +110,18 @@ namespace xgboost{
|
|||||||
mparam.AdjustBase();
|
mparam.AdjustBase();
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief load model from stream
|
* \brief load model from file
|
||||||
* \param fi input stream
|
* \param fname file name
|
||||||
*/
|
*/
|
||||||
|
inline void LoadModel(const char *fname){
|
||||||
|
utils::FileStream fi(utils::FopenCheck(fname, "rb"));
|
||||||
|
this->LoadModel(fi);
|
||||||
|
fi.Close();
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief load model from stream
|
||||||
|
* \param fi input stream
|
||||||
|
*/
|
||||||
inline void LoadModel(utils::IStream &fi){
|
inline void LoadModel(utils::IStream &fi){
|
||||||
base_gbm.LoadModel(fi);
|
base_gbm.LoadModel(fi);
|
||||||
utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
|
utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
|
||||||
@ -138,10 +152,18 @@ namespace xgboost{
|
|||||||
fo.Write(&mparam, sizeof(ModelParam));
|
fo.Write(&mparam, sizeof(ModelParam));
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief update the model for one iteration
|
* \brief save model into file
|
||||||
* \param iteration iteration number
|
* \param fname file name
|
||||||
*/
|
*/
|
||||||
inline void UpdateOneIter(int iter, const DMatrix &train){
|
inline void SaveModel(const char *fname) const{
|
||||||
|
utils::FileStream fo(utils::FopenCheck(fname, "wb"));
|
||||||
|
this->SaveModel(fo);
|
||||||
|
fo.Close();
|
||||||
|
}
|
||||||
|
/*!
|
||||||
|
* \brief update the model for one iteration
|
||||||
|
*/
|
||||||
|
inline void UpdateOneIter(const DMatrix &train){
|
||||||
this->PredictRaw(preds_, train);
|
this->PredictRaw(preds_, train);
|
||||||
obj_->GetGradient(preds_, train.info, base_gbm.NumBoosters(), grad_, hess_);
|
obj_->GetGradient(preds_, train.info, base_gbm.NumBoosters(), grad_, hess_);
|
||||||
// do boost
|
// do boost
|
||||||
@ -295,7 +317,7 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
private:
|
protected:
|
||||||
int silent;
|
int silent;
|
||||||
EvalSet evaluator_;
|
EvalSet evaluator_;
|
||||||
booster::GBMBase base_gbm;
|
booster::GBMBase base_gbm;
|
||||||
@ -305,7 +327,7 @@ namespace xgboost{
|
|||||||
// name of objective function
|
// name of objective function
|
||||||
std::string name_obj_;
|
std::string name_obj_;
|
||||||
std::vector< std::pair<std::string, std::string> > cfg_;
|
std::vector< std::pair<std::string, std::string> > cfg_;
|
||||||
private:
|
protected:
|
||||||
std::vector<float> grad_, hess_, preds_;
|
std::vector<float> grad_, hess_, preds_;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@ -166,7 +166,11 @@ namespace xgboost{
|
|||||||
inline void CacheLoad(const char *fname, bool silent = false, bool savebuffer = true){
|
inline void CacheLoad(const char *fname, bool silent = false, bool savebuffer = true){
|
||||||
int len = strlen(fname);
|
int len = strlen(fname);
|
||||||
if (len > 8 && !strcmp(fname + len - 7, ".buffer")){
|
if (len > 8 && !strcmp(fname + len - 7, ".buffer")){
|
||||||
this->LoadBinary(fname, silent); return;
|
if( !this->LoadBinary(fname, silent) ){
|
||||||
|
fprintf(stderr,"can not open file \"%s\"", fname);
|
||||||
|
utils::Error("DMatrix::CacheLoad failed");
|
||||||
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
char bname[1024];
|
char bname[1024];
|
||||||
sprintf(bname, "%s.buffer", fname);
|
sprintf(bname, "%s.buffer", fname);
|
||||||
|
|||||||
@ -163,7 +163,7 @@ namespace xgboost{
|
|||||||
for (int i = 0; i < num_round; ++i){
|
for (int i = 0; i < num_round; ++i){
|
||||||
elapsed = (unsigned long)(time(NULL) - start);
|
elapsed = (unsigned long)(time(NULL) - start);
|
||||||
if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
|
if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed);
|
||||||
learner.UpdateOneIter(i, data);
|
learner.UpdateOneIter(data);
|
||||||
learner.EvalOneIter(i, devalall, eval_data_names);
|
learner.EvalOneIter(i, devalall, eval_data_names);
|
||||||
if (save_period != 0 && (i + 1) % save_period == 0){
|
if (save_period != 0 && (i + 1) % save_period == 0){
|
||||||
this->SaveModel(i);
|
this->SaveModel(i);
|
||||||
|
|||||||
@ -31,7 +31,7 @@ namespace xgboost{
|
|||||||
/*! \brief load feature map from text format */
|
/*! \brief load feature map from text format */
|
||||||
inline void LoadText(FILE *fi){
|
inline void LoadText(FILE *fi){
|
||||||
int fid;
|
int fid;
|
||||||
char fname[256], ftype[256];
|
char fname[1256], ftype[1256];
|
||||||
while (fscanf(fi, "%d\t%[^\t]\t%s\n", &fid, fname, ftype) == 3){
|
while (fscanf(fi, "%d\t%[^\t]\t%s\n", &fid, fname, ftype) == 3){
|
||||||
utils::Assert(fid == (int)names_.size(), "invalid fmap format");
|
utils::Assert(fid == (int)names_.size(), "invalid fmap format");
|
||||||
names_.push_back(std::string(fname));
|
names_.push_back(std::string(fname));
|
||||||
|
|||||||
@ -38,6 +38,7 @@ namespace xgboost{
|
|||||||
namespace utils{
|
namespace utils{
|
||||||
inline void Error(const char *msg){
|
inline void Error(const char *msg){
|
||||||
fprintf(stderr, "Error:%s\n", msg);
|
fprintf(stderr, "Error:%s\n", msg);
|
||||||
|
fflush(stderr);
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -57,7 +58,8 @@ namespace xgboost{
|
|||||||
inline FILE *FopenCheck(const char *fname, const char *flag){
|
inline FILE *FopenCheck(const char *fname, const char *flag){
|
||||||
FILE *fp = fopen64(fname, flag);
|
FILE *fp = fopen64(fname, flag);
|
||||||
if (fp == NULL){
|
if (fp == NULL){
|
||||||
fprintf(stderr, "can not open file \"%s\"\n", fname);
|
fprintf(stderr, "can not open file \"%s\" \n", fname);
|
||||||
|
fflush(stderr);
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
return fp;
|
return fp;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user