124 lines
4.7 KiB
C++
124 lines
4.7 KiB
C++
#ifndef XGBOOST_FMAP_H
|
|
#define XGBOOST_FMAP_H
|
|
/*!
|
|
* \file xgboost_fmap.h
|
|
* \brief helper class that holds the feature names and interpretations
|
|
* \author Tianqi Chen: tianqi.tchen@gmail.com
|
|
*/
|
|
#include <vector>
|
|
#include <string>
|
|
#include <cstring>
|
|
#include "xgboost_utils.h"
|
|
|
|
namespace xgboost{
|
|
namespace utils{
|
|
/*! \brief helper class that holds the feature names and interpretations */
|
|
class FeatMap{
|
|
public:
|
|
enum Type{
|
|
kIndicator = 0,
|
|
kQuantitive = 1,
|
|
kInteger = 2,
|
|
kFloat = 3
|
|
};
|
|
public:
|
|
/*! \brief load feature map from text format */
|
|
inline void LoadText(const char *fname){
|
|
FILE *fi = utils::FopenCheck(fname, "r");
|
|
this->LoadText(fi);
|
|
fclose(fi);
|
|
}
|
|
/*! \brief load feature map from text format */
|
|
inline void LoadText(FILE *fi){
|
|
int fid;
|
|
char fname[256], ftype[256];
|
|
while (fscanf(fi, "%d%s%s", &fid, fname, ftype) == 3){
|
|
utils::Assert(fid == (int)names_.size(), "invalid fmap format");
|
|
names_.push_back(std::string(fname));
|
|
types_.push_back(GetType(ftype));
|
|
}
|
|
}
|
|
/*! \brief number of known features */
|
|
size_t size(void) const{
|
|
return names_.size();
|
|
}
|
|
/*! \brief return name of specific feature */
|
|
const char* name(size_t idx) const{
|
|
utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
|
|
return names_[idx].c_str();
|
|
}
|
|
/*! \brief return type of specific feature */
|
|
const Type& type(size_t idx) const{
|
|
utils::Assert(idx < names_.size(), "utils::FMap::name feature index exceed bound");
|
|
return types_[idx];
|
|
}
|
|
private:
|
|
inline static Type GetType(const char *tname){
|
|
if (!strcmp("i", tname)) return kIndicator;
|
|
if (!strcmp("q", tname)) return kQuantitive;
|
|
if (!strcmp("int", tname)) return kInteger;
|
|
if (!strcmp("float", tname)) return kFloat;
|
|
utils::Error("unknown feature type, use i for indicator and q for quantity");
|
|
return kIndicator;
|
|
}
|
|
private:
|
|
/*! \brief name of the feature */
|
|
std::vector<std::string> names_;
|
|
/*! \brief type of the feature */
|
|
std::vector<Type> types_;
|
|
};
|
|
}; // namespace utils
|
|
|
|
namespace utils{
|
|
/*! \brief feature constraint, allow or disallow some feature during training */
|
|
class FeatConstrain{
|
|
public:
|
|
FeatConstrain(void){
|
|
default_state_ = +1;
|
|
}
|
|
/*!\brief set parameters */
|
|
inline void SetParam(const char *name, const char *val){
|
|
int a, b;
|
|
if (!strcmp(name, "fban")){
|
|
this->ParseRange(val, a, b);
|
|
this->SetRange(a, b, -1);
|
|
}
|
|
if (!strcmp(name, "fpass")){
|
|
this->ParseRange(val, a, b);
|
|
this->SetRange(a, b, +1);
|
|
}
|
|
if (!strcmp(name, "fdefault")){
|
|
default_state_ = atoi(val);
|
|
}
|
|
}
|
|
/*! \brief whether constrain is specified */
|
|
inline bool HasConstrain(void) const {
|
|
return state_.size() != 0 && default_state_ == 1;
|
|
}
|
|
/*! \brief whether a feature index is banned or not */
|
|
inline bool NotBanned(unsigned index) const{
|
|
int rt = index < state_.size() ? state_[index] : default_state_;
|
|
if (rt == 0) rt = default_state_;
|
|
return rt == 1;
|
|
}
|
|
private:
|
|
inline void SetRange(int a, int b, int st){
|
|
if (b >(int)state_.size()) state_.resize(b, 0);
|
|
for (int i = a; i < b; ++i){
|
|
state_[i] = st;
|
|
}
|
|
}
|
|
inline void ParseRange(const char *val, int &a, int &b){
|
|
if (sscanf(val, "%d-%d", &a, &b) == 2) return;
|
|
utils::Assert(sscanf(val, "%d", &a) == 1);
|
|
b = a + 1;
|
|
}
|
|
/*! \brief default state */
|
|
int default_state_;
|
|
/*! \brief whether the state here is, +1:pass, -1: ban, 0:default */
|
|
std::vector<int> state_;
|
|
};
|
|
}; // namespace utils
|
|
}; // namespace xgboost
|
|
#endif // XGBOOST_FMAP_H
|