gbrt implemented

This commit is contained in:
kalenhaha 2014-02-10 23:40:38 +08:00
parent 6c38e35ffb
commit 3afd186ea9
2 changed files with 158 additions and 0 deletions

82
booster/gbrt.h Normal file
View File

@ -0,0 +1,82 @@
#ifndef _GBRT_H_
#define _GBRT_H_
#include "../utils/xgboost_config.h"
#include "xgboost_regression_data_reader.h"
#include "xgboost_gbmbase.h"
#include <math.h>
using namespace xgboost::utils;
using namespace xgboost::booster;
class gbrt{
public:
gbrt(const char* config_path){
ConfigIterator config_itr(config_path);
while(config_itr.Next()){
SetParam(config_itr.name,config_itr.val);
base_model.SetParam(config_itr.name,config_itr.val);
}
}
void SetParam( const char *name, const char *val ){
param.SetParam(name, val);
}
void train(){
xgboost_regression_data_reader data_reader(param.train_file_path);
base_model.InitModel();
base_model.InitTrainer();
std::vector<float> grad,hess;
std::vector<unsigned> root_index;
int instance_num = data_reader.InsNum();
float label = 0,pred_transform = 0;
grad.resize(instance_num); hess.resize(instance_num);
for(int i = 0; i < 100; i++){
grad.clear();hess.clear();
for(int j = 0; j < instance_num; j++){
label = data_reader.GetLabel(j);
pred_transform = Logistic(base_model.Predict(data_reader.GetLine(j)));
grad.push_back(FirstOrderGradient(pred_transform,label));
hess.push_back(SecondOrderGradient(pred_transform));
}
base_model.DoBoost(grad,hess,data_reader.GetImage(),root_index );
}
}
struct GBRTParam{
/*! \brief path of input training data */
const char* train_file_path;
GBRTParam( void ){
}
/*!
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
inline void SetParam( const char *name, const char *val ){
if( !strcmp("train_file_path", name ) ) train_file_path = val;
}
};
private:
inline float FirstOrderGradient(float pred_transform,float label){
return label - pred_transform;
}
inline float SecondOrderGradient(float pred_transform){
return pred_transform * ( 1 - pred_transform );
}
inline float Logistic(float x){
return 1.0/(1.0 + exp(-x));
}
GBMBaseModel base_model;
GBRTParam param;
};
#endif

View File

@ -0,0 +1,76 @@
#include"xgboost_data.h"
#include<stdio.h>
#include<vector>
using namespace xgboost::booster;
/*!
* \file xgboost_gbmbase.h
* \brief A reader to read the data for regression task from a specified file
* The data should contain each data instance in each line.
* The format of line data is as below:
* label nonzero feature dimension[ feature index:feature value]+
* \author Kailong Chen: chenkl198812@gmail.com
*/
class xgboost_regression_data_reader{
public:
xgboost_regression_data_reader(const char* file_path){
Load(file_path);
}
void Load(const char* file_path){
data_matrix.Clear();
FILE* file = fopen(file_path,"r");
if(file == NULL){
printf("The file is missing at %s",file_path);
return;
}
float label;
int nonzero_dimension,index,value,num_row = 0;
std::vector<bst_uint> findex;
std::vector<bst_float> fvalue;
while(fscanf(file,"%f %i",label,nonzero_dimension)){
findex.clear();
fvalue.clear();
findex.resize(nonzero_dimension);
fvalue.resize(nonzero_dimension);
for(int i = 0; i < nonzero_dimension; i++){
if(!fscanf(file," %i:%f",index,value)){
printf("The feature dimension is not coincident \
with the indicated one");
return;
}
findex.push_back(index);
fvalue.push_back(value);
}
data_matrix.AddRow(findex, fvalue);
labels.push_back(label);
num_row++;
}
printf("%i rows of data is loaded from %s",num_row,file_path);
fclose(file);
}
float GetLabel(int index){
return labels[index];
}
FMatrixS::Line GetLine(int index){
return data_matrix[index];
}
int InsNum(){
return labels.size();
}
FMatrixS::Image GetImage(){
return FMatrixS::Image(data_matrix);
}
private:
FMatrixS data_matrix;
std::vector<float> labels;
};