xgboost/utils/xgboost_fmap.h
2014-03-19 10:47:56 -07:00

124 lines
4.8 KiB
C++

#ifndef XGBOOST_FMAP_H
#define XGBOOST_FMAP_H
/*!
* \file xgboost_fmap.h
* \brief helper class that holds the feature names and interpretations
* \author Tianqi Chen: tianqi.tchen@gmail.com
*/
#include <vector>
#include <string>
#include <cstring>
#include "xgboost_utils.h"
namespace xgboost{
namespace utils{
/*! \brief helper class that holds the feature names and interpretations */
class FeatMap{
public:
enum Type{
kIndicator = 0,
kQuantitive = 1,
kInteger = 2,
kFloat = 3
};
public:
/*! \brief load feature map from text format */
inline void LoadText( const char *fname ){
FILE *fi = utils::FopenCheck( fname, "r" );
this->LoadText( fi );
fclose( fi );
}
/*! \brief load feature map from text format */
inline void LoadText( FILE *fi ){
int fid;
char fname[256], ftype[256];
while( fscanf( fi, "%d%s%s", &fid, fname, ftype ) == 3 ){
utils::Assert( fid == (int)names_.size(), "invalid fmap format" );
names_.push_back( std::string(fname) );
types_.push_back( GetType( ftype ) );
}
}
/*! \brief number of known features */
size_t size( void ) const{
return names_.size();
}
/*! \brief return name of specific feature */
const char* name( size_t idx ) const{
utils::Assert( idx < names_.size(), "utils::FMap::name feature index exceed bound" );
return names_[ idx ].c_str();
}
/*! \brief return type of specific feature */
const Type& type( size_t idx ) const{
utils::Assert( idx < names_.size(), "utils::FMap::name feature index exceed bound" );
return types_[ idx ];
}
private:
inline static Type GetType( const char *tname ){
if( !strcmp( "i", tname ) ) return kIndicator;
if( !strcmp( "q", tname ) ) return kQuantitive;
if( !strcmp( "int", tname ) ) return kInteger;
if( !strcmp( "float", tname ) ) return kFloat;
utils::Error("unknown feature type, use i for indicator and q for quantity");
return kIndicator;
}
private:
/*! \brief name of the feature */
std::vector<std::string> names_;
/*! \brief type of the feature */
std::vector<Type> types_;
};
}; // namespace utils
namespace utils{
/*! \brief feature constraint, allow or disallow some feature during training */
class FeatConstrain{
public:
FeatConstrain( void ){
default_state_ = +1;
}
/*!\brief set parameters */
inline void SetParam( const char *name, const char *val ){
int a, b;
if( !strcmp( name, "fban") ){
this->ParseRange( val, a, b );
this->SetRange( a, b, -1 );
}
if( !strcmp( name, "fpass") ){
this->ParseRange( val, a, b );
this->SetRange( a, b, +1 );
}
if( !strcmp( name, "fdefault") ){
default_state_ = atoi( val );
}
}
/*! \brief whether constrain is specified */
inline bool HasConstrain( void ) const {
return state_.size() != 0 && default_state_ == 1;
}
/*! \brief whether a feature index is banned or not */
inline bool NotBanned( unsigned index ) const{
int rt = index < state_.size() ? state_[index] : default_state_;
if( rt == 0 ) rt = default_state_;
return rt == 1;
}
private:
inline void SetRange( int a, int b, int st ){
if( b > (int)state_.size() ) state_.resize( b, 0 );
for( int i = a; i < b; ++ i ){
state_[i] = st;
}
}
inline void ParseRange( const char *val, int &a, int &b ){
if( sscanf( val, "%d-%d", &a, &b ) == 2 ) return;
utils::Assert( sscanf( val, "%d", &a ) == 1 );
b = a + 1;
}
/*! \brief default state */
int default_state_;
/*! \brief whether the state here is, +1:pass, -1: ban, 0:default */
std::vector<int> state_;
};
}; // namespace utils
}; // namespace xgboost
#endif // XGBOOST_FMAP_H