xgboost/booster/gbrt.h
2014-02-11 11:07:00 +08:00

102 lines
2.6 KiB
C++

#ifndef _GBRT_H_
#define _GBRT_H_
#include "../utils/xgboost_config.h"
#include "../utils/xgboost_stream.h"
#include "xgboost_regression_data_reader.h"
#include "xgboost_gbmbase.h"
#include <math.h>
using namespace xgboost::utils;
using namespace xgboost::booster;
class gbrt{
public:
gbrt(const char* config_path){
ConfigIterator config_itr(config_path);
while(config_itr.Next()){
SetParam(config_itr.name,config_itr.val);
base_model.SetParam(config_itr.name,config_itr.val);
}
}
void SetParam( const char *name, const char *val ){
param.SetParam(name, val);
}
void train(){
xgboost_regression_data_reader data_reader(param.train_file_path);
base_model.InitModel();
base_model.InitTrainer();
std::vector<float> grad,hess;
std::vector<unsigned> root_index;
int instance_num = data_reader.InsNum();
float label = 0,pred_transform = 0;
grad.resize(instance_num); hess.resize(instance_num);
for(int i = 0; i < 100; i++){
grad.clear();hess.clear();
for(int j = 0; j < instance_num; j++){
label = data_reader.GetLabel(j);
pred_transform = Logistic(Predict(data_reader.GetLine(j)));
grad.push_back(FirstOrderGradient(pred_transform,label));
hess.push_back(SecondOrderGradient(pred_transform));
}
base_model.DoBoost(grad,hess,data_reader.GetImage(),root_index );
}
}
inline void SaveModel(IStream &fo ){
base_model.SaveModel(fo);
}
inline void LoadModel(IStream &fi ){
base_model.LoadModel(fi);
}
float Predict( const FMatrixS::Line &feat, int buffer_index = -1, unsigned rid = 0 ){
return base_model.Predict(feat,buffer_index,rid);
}
float Predict( const std::vector<float> &feat,
const std::vector<bool> &funknown,
int buffer_index = -1,
unsigned rid = 0 ){
return base_model.Predict(feat,funknown,buffer_index,rid);
}
struct GBRTParam{
/*! \brief path of input training data */
const char* train_file_path;
GBRTParam( void ){
}
/*!
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
inline void SetParam( const char *name, const char *val ){
if( !strcmp("train_file_path", name ) ) train_file_path = val;
}
};
private:
inline float FirstOrderGradient(float pred_transform,float label){
return label - pred_transform;
}
inline float SecondOrderGradient(float pred_transform){
return pred_transform * ( 1 - pred_transform );
}
inline float Logistic(float x){
return 1.0/(1.0 + exp(-x));
}
GBMBaseModel base_model;
GBRTParam param;
};
#endif