gbrt implemented
This commit is contained in:
parent
6c38e35ffb
commit
3afd186ea9
82
booster/gbrt.h
Normal file
82
booster/gbrt.h
Normal file
@ -0,0 +1,82 @@
|
||||
#ifndef _GBRT_H_
|
||||
#define _GBRT_H_
|
||||
|
||||
#include "../utils/xgboost_config.h"
|
||||
#include "xgboost_regression_data_reader.h"
|
||||
#include "xgboost_gbmbase.h"
|
||||
#include <math.h>
|
||||
using namespace xgboost::utils;
|
||||
using namespace xgboost::booster;
|
||||
|
||||
class gbrt{
|
||||
|
||||
public:
|
||||
gbrt(const char* config_path){
|
||||
ConfigIterator config_itr(config_path);
|
||||
while(config_itr.Next()){
|
||||
SetParam(config_itr.name,config_itr.val);
|
||||
base_model.SetParam(config_itr.name,config_itr.val);
|
||||
}
|
||||
}
|
||||
|
||||
void SetParam( const char *name, const char *val ){
|
||||
param.SetParam(name, val);
|
||||
}
|
||||
|
||||
void train(){
|
||||
xgboost_regression_data_reader data_reader(param.train_file_path);
|
||||
base_model.InitModel();
|
||||
base_model.InitTrainer();
|
||||
std::vector<float> grad,hess;
|
||||
std::vector<unsigned> root_index;
|
||||
int instance_num = data_reader.InsNum();
|
||||
float label = 0,pred_transform = 0;
|
||||
grad.resize(instance_num); hess.resize(instance_num);
|
||||
for(int i = 0; i < 100; i++){
|
||||
grad.clear();hess.clear();
|
||||
for(int j = 0; j < instance_num; j++){
|
||||
label = data_reader.GetLabel(j);
|
||||
pred_transform = Logistic(base_model.Predict(data_reader.GetLine(j)));
|
||||
grad.push_back(FirstOrderGradient(pred_transform,label));
|
||||
hess.push_back(SecondOrderGradient(pred_transform));
|
||||
}
|
||||
base_model.DoBoost(grad,hess,data_reader.GetImage(),root_index );
|
||||
}
|
||||
}
|
||||
|
||||
struct GBRTParam{
|
||||
|
||||
/*! \brief path of input training data */
|
||||
const char* train_file_path;
|
||||
|
||||
GBRTParam( void ){
|
||||
}
|
||||
/*!
|
||||
* \brief set parameters from outside
|
||||
* \param name name of the parameter
|
||||
* \param val value of the parameter
|
||||
*/
|
||||
inline void SetParam( const char *name, const char *val ){
|
||||
if( !strcmp("train_file_path", name ) ) train_file_path = val;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
inline float FirstOrderGradient(float pred_transform,float label){
|
||||
return label - pred_transform;
|
||||
}
|
||||
|
||||
inline float SecondOrderGradient(float pred_transform){
|
||||
return pred_transform * ( 1 - pred_transform );
|
||||
}
|
||||
|
||||
inline float Logistic(float x){
|
||||
return 1.0/(1.0 + exp(-x));
|
||||
}
|
||||
|
||||
GBMBaseModel base_model;
|
||||
GBRTParam param;
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
76
booster/xgboost_regression_data_reader.h
Normal file
76
booster/xgboost_regression_data_reader.h
Normal file
@ -0,0 +1,76 @@
|
||||
#include"xgboost_data.h"
|
||||
#include<stdio.h>
|
||||
#include<vector>
|
||||
|
||||
using namespace xgboost::booster;
|
||||
/*!
|
||||
* \file xgboost_gbmbase.h
|
||||
* \brief A reader to read the data for regression task from a specified file
|
||||
* The data should contain each data instance in each line.
|
||||
* The format of line data is as below:
|
||||
* label nonzero feature dimension[ feature index:feature value]+
|
||||
* \author Kailong Chen: chenkl198812@gmail.com
|
||||
*/
|
||||
|
||||
class xgboost_regression_data_reader{
|
||||
|
||||
public:
|
||||
xgboost_regression_data_reader(const char* file_path){
|
||||
Load(file_path);
|
||||
}
|
||||
|
||||
void Load(const char* file_path){
|
||||
data_matrix.Clear();
|
||||
FILE* file = fopen(file_path,"r");
|
||||
if(file == NULL){
|
||||
printf("The file is missing at %s",file_path);
|
||||
return;
|
||||
}
|
||||
float label;
|
||||
int nonzero_dimension,index,value,num_row = 0;
|
||||
std::vector<bst_uint> findex;
|
||||
std::vector<bst_float> fvalue;
|
||||
|
||||
while(fscanf(file,"%f %i",label,nonzero_dimension)){
|
||||
findex.clear();
|
||||
fvalue.clear();
|
||||
findex.resize(nonzero_dimension);
|
||||
fvalue.resize(nonzero_dimension);
|
||||
for(int i = 0; i < nonzero_dimension; i++){
|
||||
if(!fscanf(file," %i:%f",index,value)){
|
||||
printf("The feature dimension is not coincident \
|
||||
with the indicated one");
|
||||
return;
|
||||
}
|
||||
findex.push_back(index);
|
||||
fvalue.push_back(value);
|
||||
}
|
||||
data_matrix.AddRow(findex, fvalue);
|
||||
labels.push_back(label);
|
||||
num_row++;
|
||||
}
|
||||
printf("%i rows of data is loaded from %s",num_row,file_path);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
|
||||
float GetLabel(int index){
|
||||
return labels[index];
|
||||
}
|
||||
|
||||
FMatrixS::Line GetLine(int index){
|
||||
return data_matrix[index];
|
||||
}
|
||||
|
||||
int InsNum(){
|
||||
return labels.size();
|
||||
}
|
||||
|
||||
FMatrixS::Image GetImage(){
|
||||
return FMatrixS::Image(data_matrix);
|
||||
}
|
||||
|
||||
private:
|
||||
FMatrixS data_matrix;
|
||||
std::vector<float> labels;
|
||||
};
|
||||
Loading…
x
Reference in New Issue
Block a user