fix fmap
This commit is contained in:
parent
074a861e7b
commit
623e003923
@ -155,6 +155,7 @@ namespace xgboost{
|
|||||||
for( int nid = 0; nid < tree.param.num_nodes; ++ nid ){
|
for( int nid = 0; nid < tree.param.num_nodes; ++ nid ){
|
||||||
tree.stat( nid ).leaf_child_cnt = 0;
|
tree.stat( nid ).leaf_child_cnt = 0;
|
||||||
tree.stat( nid ).loss_chg = snode[ nid ].best.loss_chg;
|
tree.stat( nid ).loss_chg = snode[ nid ].best.loss_chg;
|
||||||
|
tree.stat( nid ).sum_hess = static_cast<float>( snode[ nid ].sum_hess );
|
||||||
}
|
}
|
||||||
for( int nid = 0; nid < tree.param.num_nodes; ++ nid ){
|
for( int nid = 0; nid < tree.param.num_nodes; ++ nid ){
|
||||||
if( tree[ nid ].is_leaf() ) this->TryPruneLeaf( nid, tree.GetDepth(nid) );
|
if( tree[ nid ].is_leaf() ) this->TryPruneLeaf( nid, tree.GetDepth(nid) );
|
||||||
|
|||||||
@ -156,16 +156,18 @@ namespace xgboost{
|
|||||||
sum_hess += hess[ ridx ];
|
sum_hess += hess[ ridx ];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
tree.stat( tsk.nid ).sum_hess = static_cast<float>( sum_hess );
|
||||||
tree[ tsk.nid ].set_leaf( param.learning_rate * param.CalcWeight( sum_grad, sum_hess, tsk.parent_base_weight ) );
|
tree[ tsk.nid ].set_leaf( param.learning_rate * param.CalcWeight( sum_grad, sum_hess, tsk.parent_base_weight ) );
|
||||||
this->try_prune_leaf( tsk.nid, tree.GetDepth( tsk.nid ) );
|
this->try_prune_leaf( tsk.nid, tree.GetDepth( tsk.nid ) );
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
// make split for current task, re-arrange positions in idset
|
// make split for current task, re-arrange positions in idset
|
||||||
inline void make_split( Task tsk, const SCEntry *entry, int num, float loss_chg, double base_weight ){
|
inline void make_split( Task tsk, const SCEntry *entry, int num, float loss_chg, double sum_hess, double base_weight ){
|
||||||
// before split, first prepare statistics
|
// before split, first prepare statistics
|
||||||
RegTree::NodeStat &s = tree.stat( tsk.nid );
|
RegTree::NodeStat &s = tree.stat( tsk.nid );
|
||||||
s.loss_chg = loss_chg;
|
s.loss_chg = loss_chg;
|
||||||
s.leaf_child_cnt = 0;
|
s.leaf_child_cnt = 0;
|
||||||
|
s.sum_hess = static_cast<float>( sum_hess );
|
||||||
s.base_weight = static_cast<float>( base_weight );
|
s.base_weight = static_cast<float>( base_weight );
|
||||||
|
|
||||||
// add childs to current node
|
// add childs to current node
|
||||||
@ -345,7 +347,7 @@ namespace xgboost{
|
|||||||
// add splits
|
// add splits
|
||||||
tree[ tsk.nid ].set_split( e.split_index(), e.split_value, e.default_left() );
|
tree[ tsk.nid ].set_split( e.split_index(), e.split_value, e.default_left() );
|
||||||
// re-arrange idset, push tasks
|
// re-arrange idset, push tasks
|
||||||
this->make_split( tsk, &entry[ e.start ], e.len, e.loss_chg, base_weight );
|
this->make_split( tsk, &entry[ e.start ], e.len, e.loss_chg, rsum_hess, base_weight );
|
||||||
}else{
|
}else{
|
||||||
// make leaf if we didn't meet requirement
|
// make leaf if we didn't meet requirement
|
||||||
this->make_leaf( tsk, rsum_grad, rsum_hess, false );
|
this->make_leaf( tsk, rsum_grad, rsum_hess, false );
|
||||||
|
|||||||
@ -105,8 +105,8 @@ namespace xgboost{
|
|||||||
int pid = this->GetLeafIndex( feat, funknown, gid );
|
int pid = this->GetLeafIndex( feat, funknown, gid );
|
||||||
return tree[ pid ].leaf_value();
|
return tree[ pid ].leaf_value();
|
||||||
}
|
}
|
||||||
virtual void DumpModel( FILE *fo ){
|
virtual void DumpModel( FILE *fo, const utils::FeatMap &fmap, bool with_stats ){
|
||||||
tree.DumpModel( fo );
|
tree.DumpModel( fo, fmap, with_stats );
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
template<typename FMatrix>
|
template<typename FMatrix>
|
||||||
@ -172,8 +172,7 @@ namespace xgboost{
|
|||||||
inline int GetNext( int pid, float fvalue, bool is_unknown ){
|
inline int GetNext( int pid, float fvalue, bool is_unknown ){
|
||||||
float split_value = tree[ pid ].split_cond();
|
float split_value = tree[ pid ].split_cond();
|
||||||
if( is_unknown ){
|
if( is_unknown ){
|
||||||
if( tree[ pid ].default_left() ) return tree[ pid ].cleft();
|
return tree[ pid ].cdefault();
|
||||||
else return tree[ pid ].cright();
|
|
||||||
}else{
|
}else{
|
||||||
if( fvalue < split_value ) return tree[ pid ].cleft();
|
if( fvalue < split_value ) return tree[ pid ].cleft();
|
||||||
else return tree[ pid ].cright();
|
else return tree[ pid ].cright();
|
||||||
|
|||||||
@ -89,6 +89,10 @@ namespace xgboost{
|
|||||||
inline int cright( void ) const{
|
inline int cright( void ) const{
|
||||||
return this->cright_;
|
return this->cright_;
|
||||||
}
|
}
|
||||||
|
/*! \brief index of default child when feature is missing */
|
||||||
|
inline int cdefault( void ) const{
|
||||||
|
return this->default_left() ? this->cleft() : this->cright();
|
||||||
|
}
|
||||||
/*! \brief feature index of split condition */
|
/*! \brief feature index of split condition */
|
||||||
inline unsigned split_index( void ) const{
|
inline unsigned split_index( void ) const{
|
||||||
return sindex_ & ( (1U<<31) - 1U );
|
return sindex_ & ( (1U<<31) - 1U );
|
||||||
@ -228,8 +232,9 @@ namespace xgboost{
|
|||||||
*/
|
*/
|
||||||
inline void LoadModel( utils::IStream &fi ){
|
inline void LoadModel( utils::IStream &fi ){
|
||||||
utils::Assert( fi.Read( ¶m, sizeof(Param) ) > 0, "TreeModel" );
|
utils::Assert( fi.Read( ¶m, sizeof(Param) ) > 0, "TreeModel" );
|
||||||
nodes.resize( param.num_nodes );
|
nodes.resize( param.num_nodes ); stats.resize( param.num_nodes );
|
||||||
utils::Assert( fi.Read( &nodes[0], sizeof(Node) * nodes.size() ) > 0, "TreeModel::Node" );
|
utils::Assert( fi.Read( &nodes[0], sizeof(Node) * nodes.size() ) > 0, "TreeModel::Node" );
|
||||||
|
utils::Assert( fi.Read( &stats[0], sizeof(NodeStat) * stats.size() ) > 0, "TreeModel::Node" );
|
||||||
|
|
||||||
deleted_nodes.resize( 0 );
|
deleted_nodes.resize( 0 );
|
||||||
for( int i = param.num_roots; i < param.num_nodes; i ++ ){
|
for( int i = param.num_roots; i < param.num_nodes; i ++ ){
|
||||||
@ -243,8 +248,10 @@ namespace xgboost{
|
|||||||
*/
|
*/
|
||||||
inline void SaveModel( utils::IStream &fo ) const{
|
inline void SaveModel( utils::IStream &fo ) const{
|
||||||
utils::Assert( param.num_nodes == (int)nodes.size() );
|
utils::Assert( param.num_nodes == (int)nodes.size() );
|
||||||
|
utils::Assert( param.num_nodes == (int)stats.size() );
|
||||||
fo.Write( ¶m, sizeof(Param) );
|
fo.Write( ¶m, sizeof(Param) );
|
||||||
fo.Write( &nodes[0], sizeof(Node) * nodes.size() );
|
fo.Write( &nodes[0], sizeof(Node) * nodes.size() );
|
||||||
|
fo.Write( &stats[0], sizeof(NodeStat) * nodes.size() );
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief add child nodes to node
|
* \brief add child nodes to node
|
||||||
@ -285,23 +292,50 @@ namespace xgboost{
|
|||||||
return param.num_nodes - param.num_roots - param.num_deleted;
|
return param.num_nodes - param.num_roots - param.num_deleted;
|
||||||
}
|
}
|
||||||
/*! \brief dump model to text file */
|
/*! \brief dump model to text file */
|
||||||
inline void DumpModel( FILE *fo ){
|
inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ){
|
||||||
this->Dump( 0, fo, 0 );
|
this->Dump( 0, fo, fmap, 0, with_stats );
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
void Dump( int nid, FILE *fo, int depth ){
|
void Dump( int nid, FILE *fo, const utils::FeatMap& fmap, int depth, bool with_stats ){
|
||||||
for( int i = 0; i < depth; ++ i ){
|
for( int i = 0; i < depth; ++ i ){
|
||||||
fprintf( fo, "\t" );
|
fprintf( fo, "\t" );
|
||||||
}
|
}
|
||||||
if( nodes[ nid ].is_leaf() ){
|
if( nodes[ nid ].is_leaf() ){
|
||||||
fprintf( fo, "%d:leaf=%f\n", nid, nodes[ nid ].leaf_value() );
|
fprintf( fo, "%d:leaf=%f ", nid, nodes[ nid ].leaf_value() );
|
||||||
|
if( with_stats ){
|
||||||
|
stat( nid ).Print( fo, true );
|
||||||
|
}
|
||||||
|
fprintf( fo, "\n" );
|
||||||
}else{
|
}else{
|
||||||
// right then left,
|
// right then left,
|
||||||
TSplitCond cond = nodes[ nid ].split_cond();
|
TSplitCond cond = nodes[ nid ].split_cond();
|
||||||
fprintf( fo, "%d:[f%u<%f] yes=%d,no=%d\n", nid,
|
const unsigned split_index = nodes[ nid ].split_index();
|
||||||
nodes[ nid ].split_index(), float(cond), nodes[ nid ].cleft(), nodes[ nid ].cright() );
|
|
||||||
this->Dump( nodes[ nid ].cleft(), fo, depth+1 );
|
if( split_index < fmap.size() ){
|
||||||
this->Dump( nodes[ nid ].cright(), fo, depth+1 );
|
if( fmap.type(split_index) == utils::FeatMap::kIndicator ){
|
||||||
|
int nyes = nodes[ nid ].default_left()?nodes[nid].cright():nodes[nid].cleft();
|
||||||
|
fprintf( fo, "%d:[%s] yes=%d,no=%d",
|
||||||
|
nid, fmap.name( split_index ),
|
||||||
|
nyes, nodes[nid].cdefault() );
|
||||||
|
}else{
|
||||||
|
fprintf( fo, "%d:[%s<%f] yes=%d,no=%d,missing=%d",
|
||||||
|
nid, fmap.name(split_index), float(cond),
|
||||||
|
nodes[ nid ].cleft(), nodes[ nid ].cright(),
|
||||||
|
nodes[ nid ].cdefault() );
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
fprintf( fo, "%d:[f%u<%f] yes=%d,no=%d,missing=%d",
|
||||||
|
nid, split_index, float(cond),
|
||||||
|
nodes[ nid ].cleft(), nodes[ nid ].cright(),
|
||||||
|
nodes[ nid ].cdefault() );
|
||||||
|
}
|
||||||
|
if( with_stats ){
|
||||||
|
fprintf( fo, " ");
|
||||||
|
stat( nid ).Print( fo, false );
|
||||||
|
}
|
||||||
|
fprintf( fo, "\n" );
|
||||||
|
this->Dump( nodes[ nid ].cleft(), fo, fmap, depth+1, with_stats );
|
||||||
|
this->Dump( nodes[ nid ].cright(), fo, fmap, depth+1, with_stats );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -447,12 +481,22 @@ namespace xgboost{
|
|||||||
namespace booster{
|
namespace booster{
|
||||||
/*! \brief node statistics used in regression tree */
|
/*! \brief node statistics used in regression tree */
|
||||||
struct RTreeNodeStat{
|
struct RTreeNodeStat{
|
||||||
// loss chg caused by current split
|
/*! \brief loss chg caused by current split */
|
||||||
float loss_chg;
|
float loss_chg;
|
||||||
// weight of current node
|
/*! \brief sum of hessian values, used to measure coverage of data */
|
||||||
|
float sum_hess;
|
||||||
|
/*! \brief weight of current node */
|
||||||
float base_weight;
|
float base_weight;
|
||||||
// number of child that is leaf node known up to now
|
/*! \brief number of child that is leaf node known up to now */
|
||||||
int leaf_child_cnt;
|
int leaf_child_cnt;
|
||||||
|
/*! \brief print information of current stats to fo */
|
||||||
|
inline void Print( FILE *fo, bool is_leaf ) const{
|
||||||
|
if( !is_leaf ){
|
||||||
|
fprintf( fo, "gain=%f,cover=%f", loss_chg, sum_hess );
|
||||||
|
}else{
|
||||||
|
fprintf( fo, "cover=%f", sum_hess );
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
/*! \brief most comment structure of regression tree */
|
/*! \brief most comment structure of regression tree */
|
||||||
class RegTree: public TreeModel<bst_float,RTreeNodeStat>{
|
class RegTree: public TreeModel<bst_float,RTreeNodeStat>{
|
||||||
|
|||||||
@ -8,6 +8,7 @@
|
|||||||
*/
|
*/
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "../utils/xgboost_utils.h"
|
#include "../utils/xgboost_utils.h"
|
||||||
|
#include "../utils/xgboost_fmap.h"
|
||||||
#include "../utils/xgboost_stream.h"
|
#include "../utils/xgboost_stream.h"
|
||||||
#include "../utils/xgboost_config.h"
|
#include "../utils/xgboost_config.h"
|
||||||
#include "xgboost_data.h"
|
#include "xgboost_data.h"
|
||||||
@ -107,8 +108,10 @@ namespace xgboost{
|
|||||||
/*!
|
/*!
|
||||||
* \brief dump model into text file
|
* \brief dump model into text file
|
||||||
* \param fo output stream
|
* \param fo output stream
|
||||||
|
* \param fmap feature map that may help give interpretations of feature
|
||||||
|
* \param with_stats whether print statistics
|
||||||
*/
|
*/
|
||||||
virtual void DumpModel( FILE *fo ){
|
virtual void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats = false ){
|
||||||
utils::Error( "not implemented" );
|
utils::Error( "not implemented" );
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
|
|||||||
@ -188,11 +188,13 @@ namespace xgboost{
|
|||||||
/*!
|
/*!
|
||||||
* \brief DumpModel
|
* \brief DumpModel
|
||||||
* \param fo text file
|
* \param fo text file
|
||||||
|
* \param fmap feature map that may help give interpretations of feature
|
||||||
|
* \param with_stats whether print statistics
|
||||||
*/
|
*/
|
||||||
inline void DumpModel( FILE *fo ){
|
inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ){
|
||||||
for( size_t i = 0; i < boosters.size(); i ++ ){
|
for( size_t i = 0; i < boosters.size(); i ++ ){
|
||||||
fprintf( fo, "booster[%d]\n", (int)i );
|
fprintf( fo, "booster[%d]\n", (int)i );
|
||||||
boosters[i]->DumpModel( fo );
|
boosters[i]->DumpModel( fo, fmap, with_stats );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
|
|||||||
@ -25,11 +25,11 @@ def loadfmap( fname ):
|
|||||||
|
|
||||||
def write_nmap( fo, nmap ):
|
def write_nmap( fo, nmap ):
|
||||||
for i in xrange( len(nmap) ):
|
for i in xrange( len(nmap) ):
|
||||||
fo.write('%d\t%s\n' % (i, nmap[i]) )
|
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
|
||||||
|
|
||||||
# start here
|
# start here
|
||||||
fmap, nmap = loadfmap( 'agaricus-lepiota.fmap' )
|
fmap, nmap = loadfmap( 'agaricus-lepiota.fmap' )
|
||||||
fo = open( 'featname.txt', 'w' )
|
fo = open( 'featmap.txt', 'w' )
|
||||||
write_nmap( fo, nmap )
|
write_nmap( fo, nmap )
|
||||||
fo.close()
|
fo.close()
|
||||||
|
|
||||||
|
|||||||
@ -2,5 +2,9 @@
|
|||||||
python mapfeat.py
|
python mapfeat.py
|
||||||
python mknfold.py agaricus.txt 1
|
python mknfold.py agaricus.txt 1
|
||||||
../../xgboost mushroom.conf
|
../../xgboost mushroom.conf
|
||||||
../../xgboost mushroom.conf task=dump model_in=0003.model
|
# this is what dump will looklike without feature map
|
||||||
python maptree.py
|
../../xgboost mushroom.conf task=dump model_in=0003.model name_dump=dump.raw.txt
|
||||||
|
# this is what dump will looklike with feature map
|
||||||
|
../../xgboost mushroom.conf task=dump model_in=0003.model fmap=featmap.txt name_dump=dump.nice.txt
|
||||||
|
cat dump.nice.txt
|
||||||
|
|
||||||
|
|||||||
@ -116,9 +116,11 @@ namespace xgboost{
|
|||||||
/*!
|
/*!
|
||||||
* \brief DumpModel
|
* \brief DumpModel
|
||||||
* \param fo text file
|
* \param fo text file
|
||||||
|
* \param fmap feature map that may help give interpretations of feature
|
||||||
|
* \param with_stats whether print statistics as well
|
||||||
*/
|
*/
|
||||||
inline void DumpModel( FILE *fo ){
|
inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ){
|
||||||
base_model.DumpModel( fo );
|
base_model.DumpModel( fo, fmap, with_stats );
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief Dump path of all trees
|
* \brief Dump path of all trees
|
||||||
|
|||||||
@ -5,6 +5,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include "xgboost_reg.h"
|
#include "xgboost_reg.h"
|
||||||
|
#include "../utils/xgboost_fmap.h"
|
||||||
#include "../utils/xgboost_random.h"
|
#include "../utils/xgboost_random.h"
|
||||||
#include "../utils/xgboost_config.h"
|
#include "../utils/xgboost_config.h"
|
||||||
|
|
||||||
@ -60,6 +61,10 @@ namespace xgboost{
|
|||||||
if( !strcmp("test:data", name ) ) test_path = val;
|
if( !strcmp("test:data", name ) ) test_path = val;
|
||||||
if( !strcmp("model_in", name ) ) model_in = val;
|
if( !strcmp("model_in", name ) ) model_in = val;
|
||||||
if( !strcmp("model_dir", name ) ) model_dir_path = val;
|
if( !strcmp("model_dir", name ) ) model_dir_path = val;
|
||||||
|
if( !strcmp("fmap", name ) ) name_fmap = val;
|
||||||
|
if( !strcmp("name_dump", name ) ) name_dump = val;
|
||||||
|
if( !strcmp("name_pred", name ) ) name_pred = val;
|
||||||
|
if( !strcmp("dump_stats", name ) ) dump_model_stats = atoi( val );
|
||||||
if( !strncmp("eval[", name, 5 ) ) {
|
if( !strncmp("eval[", name, 5 ) ) {
|
||||||
char evname[ 256 ];
|
char evname[ 256 ];
|
||||||
utils::Assert( sscanf( name, "eval[%[^]]", evname ) == 1, "must specify evaluation name for display");
|
utils::Assert( sscanf( name, "eval[%[^]]", evname ) == 1, "must specify evaluation name for display");
|
||||||
@ -75,8 +80,10 @@ namespace xgboost{
|
|||||||
use_buffer = 1;
|
use_buffer = 1;
|
||||||
num_round = 10;
|
num_round = 10;
|
||||||
save_period = 0;
|
save_period = 0;
|
||||||
|
dump_model_stats = 0;
|
||||||
task = "train";
|
task = "train";
|
||||||
model_in = "NULL";
|
model_in = "NULL";
|
||||||
|
name_fmap = "NULL";
|
||||||
name_pred = "pred.txt";
|
name_pred = "pred.txt";
|
||||||
name_dump = "dump.txt";
|
name_dump = "dump.txt";
|
||||||
name_dumppath = "dump.path.txt";
|
name_dumppath = "dump.path.txt";
|
||||||
@ -89,7 +96,8 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
inline void InitData( void ){
|
inline void InitData( void ){
|
||||||
if( task == "dump") return;
|
if( name_fmap != "NULL" ) fmap.LoadText( name_fmap.c_str() );
|
||||||
|
if( task == "dump" ) return;
|
||||||
if( task == "test" || task == "dumppath" ){
|
if( task == "test" || task == "dumppath" ){
|
||||||
data.CacheLoad( test_path.c_str(), silent!=0, use_buffer!=0 );
|
data.CacheLoad( test_path.c_str(), silent!=0, use_buffer!=0 );
|
||||||
}else{
|
}else{
|
||||||
@ -142,7 +150,7 @@ namespace xgboost{
|
|||||||
|
|
||||||
inline void TaskDump( void ){
|
inline void TaskDump( void ){
|
||||||
FILE *fo = utils::FopenCheck( name_dump.c_str(), "w" );
|
FILE *fo = utils::FopenCheck( name_dump.c_str(), "w" );
|
||||||
learner.DumpModel( fo );
|
learner.DumpModel( fo, fmap, dump_model_stats != 0 );
|
||||||
fclose( fo );
|
fclose( fo );
|
||||||
}
|
}
|
||||||
inline void TaskDumpPath( void ){
|
inline void TaskDumpPath( void ){
|
||||||
@ -187,6 +195,10 @@ namespace xgboost{
|
|||||||
std::string task;
|
std::string task;
|
||||||
/* \brief name of predict file */
|
/* \brief name of predict file */
|
||||||
std::string name_pred;
|
std::string name_pred;
|
||||||
|
/* \brief whether dump statistics along with model */
|
||||||
|
int dump_model_stats;
|
||||||
|
/* \brief name of feature map */
|
||||||
|
std::string name_fmap;
|
||||||
/* \brief name of dump file */
|
/* \brief name of dump file */
|
||||||
std::string name_dump;
|
std::string name_dump;
|
||||||
/* \brief name of dump path file */
|
/* \brief name of dump path file */
|
||||||
@ -200,6 +212,7 @@ namespace xgboost{
|
|||||||
private:
|
private:
|
||||||
DMatrix data;
|
DMatrix data;
|
||||||
std::vector<DMatrix*> deval;
|
std::vector<DMatrix*> deval;
|
||||||
|
utils::FeatMap fmap;
|
||||||
RegBoostLearner learner;
|
RegBoostLearner learner;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
68
utils/xgboost_fmap.h
Normal file
68
utils/xgboost_fmap.h
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
#ifndef XGBOOST_FMAP_H
|
||||||
|
#define XGBOOST_FMAP_H
|
||||||
|
/*!
|
||||||
|
* \file xgboost_fmap.h
|
||||||
|
* \brief helper class that holds the feature names and interpretations
|
||||||
|
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
||||||
|
*/
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
|
#include "xgboost_utils.h"
|
||||||
|
|
||||||
|
namespace xgboost{
|
||||||
|
namespace utils{
|
||||||
|
/*! \brief helper class that holds the feature names and interpretations */
|
||||||
|
class FeatMap{
|
||||||
|
public:
|
||||||
|
enum Type{
|
||||||
|
kIndicator = 0,
|
||||||
|
kQuantitive = 1
|
||||||
|
};
|
||||||
|
public:
|
||||||
|
/*! \brief load feature map from text format */
|
||||||
|
inline void LoadText( const char *fname ){
|
||||||
|
FILE *fi = utils::FopenCheck( fname, "r" );
|
||||||
|
this->LoadText( fi );
|
||||||
|
fclose( fi );
|
||||||
|
}
|
||||||
|
/*! \brief load feature map from text format */
|
||||||
|
inline void LoadText( FILE *fi ){
|
||||||
|
int fid;
|
||||||
|
char fname[256], ftype[256];
|
||||||
|
while( fscanf( fi, "%d%s%s", &fid, fname, ftype ) == 3 ){
|
||||||
|
utils::Assert( fid == (int)names_.size(), "invalid fmap format" );
|
||||||
|
names_.push_back( std::string(fname) );
|
||||||
|
types_.push_back( GetType( ftype ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*! \brief number of known features */
|
||||||
|
size_t size( void ) const{
|
||||||
|
return names_.size();
|
||||||
|
}
|
||||||
|
/*! \brief return name of specific feature */
|
||||||
|
const char* name( size_t idx ) const{
|
||||||
|
utils::Assert( idx < names_.size(), "utils::FMap::name feature index exceed bound" );
|
||||||
|
return names_[ idx ].c_str();
|
||||||
|
}
|
||||||
|
/*! \brief return type of specific feature */
|
||||||
|
const Type& type( size_t idx ) const{
|
||||||
|
utils::Assert( idx < names_.size(), "utils::FMap::name feature index exceed bound" );
|
||||||
|
return types_[ idx ];
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
inline static Type GetType( const char *tname ){
|
||||||
|
if( !strcmp( "i", tname ) ) return kIndicator;
|
||||||
|
if( !strcmp( "q", tname ) ) return kQuantitive;
|
||||||
|
utils::Error("unknown feature type, use i for indicator and q for quantity");
|
||||||
|
return kIndicator;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
/*! \brief name of the feature */
|
||||||
|
std::vector<std::string> names_;
|
||||||
|
/*! \brief type of the feature */
|
||||||
|
std::vector<Type> types_;
|
||||||
|
};
|
||||||
|
}; // namespace utils
|
||||||
|
}; // namespace xgboost
|
||||||
|
#endif // XGBOOST_FMAP_H
|
||||||
Loading…
x
Reference in New Issue
Block a user