reupdate data
This commit is contained in:
@@ -60,13 +60,14 @@ namespace xgboost{
|
||||
}
|
||||
|
||||
char str_temp[25];
|
||||
if( num_feature > base_model.param.num_feature ){
|
||||
if( num_feature > mparam.num_feature ){
|
||||
mparam.num_feature = num_feature;
|
||||
sprintf( str_temp, "%d", num_feature );
|
||||
base_model.SetParam( "bst:num_feature", str_temp );
|
||||
base_gbm.SetParam( "bst:num_feature", str_temp );
|
||||
}
|
||||
|
||||
sprintf( str_temp, "%u", buffer_size );
|
||||
base_model.SetParam( "num_pbuffer", str_temp );
|
||||
base_gbm.SetParam( "num_pbuffer", str_temp );
|
||||
if( !silent ){
|
||||
printf( "buffer_size=%u\n", buffer_size );
|
||||
}
|
||||
@@ -81,16 +82,16 @@ namespace xgboost{
|
||||
*/
|
||||
inline void SetParam( const char *name, const char *val ){
|
||||
if( !strcmp( name, "silent") ) silent = atoi( val );
|
||||
if( !strcmp( name, "eval_metric") ) evaluator_.AddEval( val );
|
||||
if( !strcmp( name, "eval_metric") ) evaluator_.AddEval( val );
|
||||
mparam.SetParam( name, val );
|
||||
base_model.SetParam( name, val );
|
||||
base_gbm.SetParam( name, val );
|
||||
}
|
||||
/*!
|
||||
* \brief initialize solver before training, called before training
|
||||
* this function is reserved for solver to allocate necessary space and do other preparation
|
||||
*/
|
||||
inline void InitTrainer( void ){
|
||||
base_model.InitTrainer();
|
||||
base_gbm.InitTrainer();
|
||||
if( mparam.loss_type == kLogisticClassify ){
|
||||
evaluator_.AddEval( "error" );
|
||||
}else{
|
||||
@@ -102,7 +103,7 @@ namespace xgboost{
|
||||
* \brief initialize the current data storage for model, if the model is used first time, call this function
|
||||
*/
|
||||
inline void InitModel( void ){
|
||||
base_model.InitModel();
|
||||
base_gbm.InitModel();
|
||||
mparam.AdjustBase();
|
||||
}
|
||||
/*!
|
||||
@@ -110,7 +111,7 @@ namespace xgboost{
|
||||
* \param fi input stream
|
||||
*/
|
||||
inline void LoadModel( utils::IStream &fi ){
|
||||
base_model.LoadModel( fi );
|
||||
base_gbm.LoadModel( fi );
|
||||
utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 );
|
||||
}
|
||||
/*!
|
||||
@@ -120,7 +121,7 @@ namespace xgboost{
|
||||
* \param with_stats whether print statistics as well
|
||||
*/
|
||||
inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ){
|
||||
base_model.DumpModel( fo, fmap, with_stats );
|
||||
base_gbm.DumpModel( fo, fmap, with_stats );
|
||||
}
|
||||
/*!
|
||||
* \brief Dump path of all trees
|
||||
@@ -128,14 +129,14 @@ namespace xgboost{
|
||||
* \param data input data
|
||||
*/
|
||||
inline void DumpPath( FILE *fo, const DMatrix &data ){
|
||||
base_model.DumpPath( fo, data.data );
|
||||
base_gbm.DumpPath( fo, data.data );
|
||||
}
|
||||
/*!
|
||||
* \brief save model to stream
|
||||
* \param fo output stream
|
||||
*/
|
||||
inline void SaveModel( utils::IStream &fo ) const{
|
||||
base_model.SaveModel( fo );
|
||||
base_gbm.SaveModel( fo );
|
||||
fo.Write( &mparam, sizeof(ModelParam) );
|
||||
}
|
||||
/*!
|
||||
@@ -146,7 +147,7 @@ namespace xgboost{
|
||||
this->PredictBuffer( preds_, *train_, 0 );
|
||||
this->GetGradient( preds_, train_->labels, grad_, hess_ );
|
||||
std::vector<unsigned> root_index;
|
||||
base_model.DoBoost( grad_, hess_, train_->data, root_index );
|
||||
base_gbm.DoBoost( grad_, hess_, train_->data, root_index );
|
||||
}
|
||||
/*!
|
||||
* \brief evaluate the model for specific iteration
|
||||
@@ -165,7 +166,6 @@ namespace xgboost{
|
||||
}
|
||||
fprintf( fo,"\n" );
|
||||
}
|
||||
|
||||
/*! \brief get prediction, without buffering */
|
||||
inline void Predict( std::vector<float> &preds, const DMatrix &data ){
|
||||
preds.resize( data.Size() );
|
||||
@@ -174,7 +174,51 @@ namespace xgboost{
|
||||
#pragma omp parallel for schedule( static )
|
||||
for( unsigned j = 0; j < ndata; ++ j ){
|
||||
preds[j] = mparam.PredTransform
|
||||
( mparam.base_score + base_model.Predict( data.data, j, -1 ) );
|
||||
( mparam.base_score + base_gbm.Predict( data.data, j, -1 ) );
|
||||
}
|
||||
}
|
||||
public:
|
||||
/*!
|
||||
* \brief update the model for one iteration
|
||||
* \param iteration iteration number
|
||||
*/
|
||||
inline void UpdateInteract( void ){
|
||||
this->InteractPredict( preds_, *train_, 0 );
|
||||
int buffer_offset = static_cast<int>( train_->Size() );
|
||||
for( size_t i = 0; i < evals_.size(); ++i ){
|
||||
std::vector<float> &preds = this->eval_preds_[ i ];
|
||||
this->InteractPredict( preds, *evals_[i], buffer_offset );
|
||||
buffer_offset += static_cast<int>( evals_[i]->Size() );
|
||||
}
|
||||
|
||||
this->GetGradient( preds_, train_->labels, grad_, hess_ );
|
||||
std::vector<unsigned> root_index;
|
||||
base_gbm.DoBoost( grad_, hess_, train_->data, root_index );
|
||||
|
||||
this->InteractRePredict( *train_, 0 );
|
||||
buffer_offset = static_cast<int>( train_->Size() );
|
||||
for( size_t i = 0; i < evals_.size(); ++i ){
|
||||
this->InteractRePredict( *evals_[i], buffer_offset );
|
||||
buffer_offset += static_cast<int>( evals_[i]->Size() );
|
||||
}
|
||||
}
|
||||
private:
|
||||
/*! \brief get the transformed predictions, given data */
|
||||
inline void InteractPredict( std::vector<float> &preds, const DMatrix &data, unsigned buffer_offset ){
|
||||
preds.resize( data.Size() );
|
||||
const unsigned ndata = static_cast<unsigned>( data.Size() );
|
||||
#pragma omp parallel for schedule( static )
|
||||
for( unsigned j = 0; j < ndata; ++ j ){
|
||||
preds[j] = mparam.PredTransform
|
||||
( mparam.base_score + base_gbm.InteractPredict( data.data, j, buffer_offset + j ) );
|
||||
}
|
||||
}
|
||||
/*! \brief repredict trial */
|
||||
inline void InteractRePredict( const DMatrix &data, unsigned buffer_offset ){
|
||||
const unsigned ndata = static_cast<unsigned>( data.Size() );
|
||||
#pragma omp parallel for schedule( static )
|
||||
for( unsigned j = 0; j < ndata; ++ j ){
|
||||
base_gbm.InteractRePredict( data.data, j, buffer_offset + j );
|
||||
}
|
||||
}
|
||||
private:
|
||||
@@ -186,7 +230,7 @@ namespace xgboost{
|
||||
#pragma omp parallel for schedule( static )
|
||||
for( unsigned j = 0; j < ndata; ++ j ){
|
||||
preds[j] = mparam.PredTransform
|
||||
( mparam.base_score + base_model.Predict( data.data, j, buffer_offset + j ) );
|
||||
( mparam.base_score + base_gbm.Predict( data.data, j, buffer_offset + j ) );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -218,9 +262,16 @@ namespace xgboost{
|
||||
float base_score;
|
||||
/* \brief type of loss function */
|
||||
int loss_type;
|
||||
/* \brief number of features */
|
||||
int num_feature;
|
||||
/*! \brief reserved field */
|
||||
int reserved[ 16 ];
|
||||
/*! \brief constructor */
|
||||
ModelParam( void ){
|
||||
base_score = 0.5f;
|
||||
loss_type = 0;
|
||||
num_feature = 0;
|
||||
memset( reserved, 0, sizeof( reserved ) );
|
||||
}
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
@@ -230,6 +281,7 @@ namespace xgboost{
|
||||
inline void SetParam( const char *name, const char *val ){
|
||||
if( !strcmp("base_score", name ) ) base_score = (float)atof( val );
|
||||
if( !strcmp("loss_type", name ) ) loss_type = atoi( val );
|
||||
if( !strcmp("bst:num_feature", name ) ) num_feature = atoi( val );
|
||||
}
|
||||
/*!
|
||||
* \brief adjust base_score
|
||||
@@ -330,7 +382,7 @@ namespace xgboost{
|
||||
private:
|
||||
int silent;
|
||||
EvalSet evaluator_;
|
||||
booster::GBMBaseModel base_model;
|
||||
booster::GBMBase base_gbm;
|
||||
ModelParam mparam;
|
||||
const DMatrix *train_;
|
||||
std::vector<DMatrix *> evals_;
|
||||
|
||||
@@ -39,6 +39,10 @@ namespace xgboost{
|
||||
this->TaskDump();
|
||||
return 0;
|
||||
}
|
||||
if( task == "interactive" ){
|
||||
this->TaskInteractive();
|
||||
return 0;
|
||||
}
|
||||
if( task == "dumppath" ){
|
||||
this->TaskDumpPath();
|
||||
return 0;
|
||||
@@ -60,6 +64,7 @@ namespace xgboost{
|
||||
if( !strcmp("data", name ) ) train_path = val;
|
||||
if( !strcmp("test:data", name ) ) test_path = val;
|
||||
if( !strcmp("model_in", name ) ) model_in = val;
|
||||
if( !strcmp("model_out", name ) ) model_out = val;
|
||||
if( !strcmp("model_dir", name ) ) model_dir_path = val;
|
||||
if( !strcmp("fmap", name ) ) name_fmap = val;
|
||||
if( !strcmp("name_dump", name ) ) name_dump = val;
|
||||
@@ -141,13 +146,30 @@ namespace xgboost{
|
||||
}
|
||||
// always save final round
|
||||
if( save_period == 0 || num_round % save_period != 0 ){
|
||||
this->SaveModel( num_round );
|
||||
if( model_out == "NULL" ){
|
||||
this->SaveModel( num_round );
|
||||
}else{
|
||||
this->SaveModel( model_out.c_str() );
|
||||
}
|
||||
}
|
||||
if( !silent ){
|
||||
printf("\nupdating end, %lu sec in all\n", elapsed );
|
||||
}
|
||||
}
|
||||
|
||||
inline void TaskInteractive( void ){
|
||||
const time_t start = time( NULL );
|
||||
unsigned long elapsed = 0;
|
||||
learner.UpdateInteract();
|
||||
utils::Assert( model_out != "NULL", "interactive mode must specify model_out" );
|
||||
this->SaveModel( model_out.c_str() );
|
||||
elapsed = (unsigned long)(time(NULL) - start);
|
||||
|
||||
if( !silent ){
|
||||
printf("\ninteractive update, %lu sec in all\n", elapsed );
|
||||
}
|
||||
}
|
||||
|
||||
inline void TaskDump( void ){
|
||||
FILE *fo = utils::FopenCheck( name_dump.c_str(), "w" );
|
||||
learner.DumpModel( fo, fmap, dump_model_stats != 0 );
|
||||
@@ -158,13 +180,16 @@ namespace xgboost{
|
||||
learner.DumpPath( fo, data );
|
||||
fclose( fo );
|
||||
}
|
||||
inline void SaveModel( int i ) const{
|
||||
char fname[256];
|
||||
sprintf( fname ,"%s/%04d.model", model_dir_path.c_str(), i+1 );
|
||||
inline void SaveModel( const char *fname ) const{
|
||||
utils::FileStream fo( utils::FopenCheck( fname, "wb" ) );
|
||||
learner.SaveModel( fo );
|
||||
fo.Close();
|
||||
}
|
||||
inline void SaveModel( int i ) const{
|
||||
char fname[256];
|
||||
sprintf( fname ,"%s/%04d.model", model_dir_path.c_str(), i+1 );
|
||||
this->SaveModel( fname );
|
||||
}
|
||||
inline void TaskPred( void ){
|
||||
std::vector<float> preds;
|
||||
if( !silent ) printf("start prediction...\n");
|
||||
@@ -189,6 +214,8 @@ namespace xgboost{
|
||||
std::string train_path, test_path;
|
||||
/* \brief the path of test model file, or file to restart training */
|
||||
std::string model_in;
|
||||
/* \brief the path of final model file, to be saved */
|
||||
std::string model_out;
|
||||
/* \brief the path of directory containing the saved models */
|
||||
std::string model_dir_path;
|
||||
/* \brief task to perform */
|
||||
|
||||
Reference in New Issue
Block a user