cleanup reg

This commit is contained in:
tqchen 2014-02-10 21:09:09 -08:00
parent 56e4a2ced1
commit 45a452b27e
3 changed files with 155 additions and 102 deletions

View File

@ -1,102 +0,0 @@
#ifndef _GBRT_H_
#define _GBRT_H_
#include "../utils/xgboost_config.h"
#include "../utils/xgboost_stream.h"
#include "xgboost_regression_data_reader.h"
#include "xgboost_gbmbase.h"
#include <math.h>
using namespace xgboost::utils;
using namespace xgboost::booster;
class gbrt{
public:
gbrt(const char* config_path){
ConfigIterator config_itr(config_path);
while(config_itr.Next()){
SetParam(config_itr.name,config_itr.val);
base_model.SetParam(config_itr.name,config_itr.val);
}
}
void SetParam( const char *name, const char *val ){
param.SetParam(name, val);
}
void train(){
xgboost_regression_data_reader data_reader(param.train_file_path);
base_model.InitModel();
base_model.InitTrainer();
std::vector<float> grad,hess;
std::vector<unsigned> root_index;
int instance_num = data_reader.InsNum();
float label = 0,pred_transform = 0;
grad.resize(instance_num); hess.resize(instance_num);
for(int i = 0; i < 100; i++){
grad.clear();hess.clear();
for(int j = 0; j < instance_num; j++){
label = data_reader.GetLabel(j);
pred_transform = Logistic(Predict(data_reader.GetLine(j)));
grad.push_back(FirstOrderGradient(pred_transform,label));
hess.push_back(SecondOrderGradient(pred_transform));
}
base_model.DoBoost(grad,hess,data_reader.GetImage(),root_index );
}
}
inline void SaveModel(IStream &fo ){
base_model.SaveModel(fo);
}
inline void LoadModel(IStream &fi ){
base_model.LoadModel(fi);
}
float Predict( const FMatrixS::Line &feat, int buffer_index = -1, unsigned rid = 0 ){
return base_model.Predict(feat,buffer_index,rid);
}
float Predict( const std::vector<float> &feat,
const std::vector<bool> &funknown,
int buffer_index = -1,
unsigned rid = 0 ){
return base_model.Predict(feat,funknown,buffer_index,rid);
}
struct GBRTParam{
/*! \brief path of input training data */
const char* train_file_path;
GBRTParam( void ){
}
/*!
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
inline void SetParam( const char *name, const char *val ){
if( !strcmp("train_file_path", name ) ) train_file_path = val;
}
};
private:
inline float FirstOrderGradient(float pred_transform,float label){
return label - pred_transform;
}
inline float SecondOrderGradient(float pred_transform){
return pred_transform * ( 1 - pred_transform );
}
inline float Logistic(float x){
return 1.0/(1.0 + exp(-x));
}
GBMBaseModel base_model;
GBRTParam param;
};
#endif

View File

@ -13,6 +13,7 @@
// implementations of boosters // implementations of boosters
#include "tree/xgboost_svdf_tree.hpp" #include "tree/xgboost_svdf_tree.hpp"
#include "linear/xgboost_linear.hpp" #include "linear/xgboost_linear.hpp"
#include "../regression/xgboost_reg.h"
namespace xgboost{ namespace xgboost{
namespace booster{ namespace booster{

154
regression/xgboost_reg.h Normal file
View File

@ -0,0 +1,154 @@
#ifndef _XGBOOST_REG_H_
#define _XGBOOST_REG_H_
/*!
* \file xgboost_reg.h
* \brief class for gradient boosted regression
* \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
*/
#include <cmath>
#include "xgboost_regdata.h"
#include "../booster/xgboost_gbmbase.h"
#include "../utils/xgboost_utils.h"
#include "../utils/xgboost_stream.h"
namespace xgboost{
namespace regression{
/*! \brief class for gradient boosted regression */
class RegBoostLearner{
public:
/*!
* \brief a regression booter associated with training and evaluating data
* \param train pointer to the training data
* \param evals array of evaluating data
* \param evname name of evaluation data, used print statistics
*/
RegBoostLearner( const DMatrix *train,
std::vector<const DMatrix *> evals,
std::vector<std::string> evname ){
this->train_ = train;
this->evals_ = evals;
this->evname_ = evname;
//TODO: assign buffer index
}
/*!
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
inline void SetParam( const char *name, const char *val ){
mparam.SetParam( name, val );
base_model.SetParam( name, val );
}
/*!
* \brief initialize solver before training, called before training
* this function is reserved for solver to allocate necessary space and do other preparation
*/
inline void InitTrainer( void ){
base_model.InitTrainer();
mparam.AdjustBase();
}
/*!
* \brief load model from stream
* \param fi input stream
*/
inline void LoadModel( utils::IStream &fi ){
utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 );
base_model.LoadModel( fi );
}
/*!
* \brief save model to stream
* \param fo output stream
*/
inline void SaveModel( utils::IStream &fo ) const{
fo.Write( &mparam, sizeof(ModelParam) );
base_model.SaveModel( fo );
}
/*!
* \brief update the model for one iteration
*/
inline void UpdateOneIter( void ){
//TODO
}
/*! \brief predict the results, given data */
inline void Predict( std::vector<float> &preds, const DMatrix &data ){
//TODO
}
private:
/*! \brief training parameter for regression */
struct ModelParam{
/* \brief global bias */
float base_score;
/* \brief type of loss function */
int loss_type;
ModelParam( void ){
base_score = 0.5f;
loss_type = 0;
}
/*!
* \brief set parameters from outside
* \param name name of the parameter
* \param val value of the parameter
*/
inline void SetParam( const char *name, const char *val ){
if( !strcmp("base_score", name ) ) base_score = (float)atof( val );
if( !strcmp("loss_type", name ) ) loss_type = atoi( val );
}
/*!
* \brief adjust base_score
*/
inline void AdjustBase( void ){
if( loss_type == 1 ){
utils::Assert( base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain" );
base_score = - logf( 1.0f / base_score - 1.0f );
}
}
/*!
* \brief calculate first order gradient of loss, given transformed prediction
* \param predt transformed prediction
* \param label true label
* \return first order gradient
*/
inline float FirstOrderGradient( float predt, float label ) const{
switch( loss_type ){
case 0: return predt - label;
case 1: return predt - label;
default: utils::Error("unknown loss_type"); return 0.0f;
}
}
/*!
* \brief calculate second order gradient of loss, given transformed prediction
* \param predt transformed prediction
* \param label true label
* \return second order gradient
*/
inline float SecondOrderGradient( float predt, float label ) const{
switch( loss_type ){
case 0: return 1.0f;
case 1: return predt * ( 1 - predt );
default: utils::Error("unknown loss_type"); return 0.0f;
}
}
/*!
* \brief transform the linear sum to prediction
* \param x linear sum of boosting ensemble
* \return transformed prediction
*/
inline float PredTransform( float x ){
switch( loss_type ){
case 0: return x;
case 1: return 1.0f/(1.0f + expf(-x));
default: utils::Error("unknown loss_type"); return 0.0f;
}
}
};
private:
booster::GBMBaseModel base_model;
ModelParam mparam;
const DMatrix *train_;
std::vector<const DMatrix *> evals_;
std::vector<std::string> evname_;
};
};
};
#endif