diff --git a/booster/gbrt.h b/booster/gbrt.h deleted file mode 100644 index 0ce830b9e..000000000 --- a/booster/gbrt.h +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef _GBRT_H_ -#define _GBRT_H_ - -#include "../utils/xgboost_config.h" -#include "../utils/xgboost_stream.h" -#include "xgboost_regression_data_reader.h" -#include "xgboost_gbmbase.h" -#include -using namespace xgboost::utils; -using namespace xgboost::booster; - -class gbrt{ - -public: - gbrt(const char* config_path){ - ConfigIterator config_itr(config_path); - while(config_itr.Next()){ - SetParam(config_itr.name,config_itr.val); - base_model.SetParam(config_itr.name,config_itr.val); - } - } - - void SetParam( const char *name, const char *val ){ - param.SetParam(name, val); - } - - void train(){ - xgboost_regression_data_reader data_reader(param.train_file_path); - base_model.InitModel(); - base_model.InitTrainer(); - std::vector grad,hess; - std::vector root_index; - int instance_num = data_reader.InsNum(); - float label = 0,pred_transform = 0; - grad.resize(instance_num); hess.resize(instance_num); - for(int i = 0; i < 100; i++){ - grad.clear();hess.clear(); - for(int j = 0; j < instance_num; j++){ - label = data_reader.GetLabel(j); - pred_transform = Logistic(Predict(data_reader.GetLine(j))); - grad.push_back(FirstOrderGradient(pred_transform,label)); - hess.push_back(SecondOrderGradient(pred_transform)); - } - base_model.DoBoost(grad,hess,data_reader.GetImage(),root_index ); - } - } - - inline void SaveModel(IStream &fo ){ - base_model.SaveModel(fo); - } - - inline void LoadModel(IStream &fi ){ - base_model.LoadModel(fi); - } - - float Predict( const FMatrixS::Line &feat, int buffer_index = -1, unsigned rid = 0 ){ - return base_model.Predict(feat,buffer_index,rid); - } - - float Predict( const std::vector &feat, - const std::vector &funknown, - int buffer_index = -1, - unsigned rid = 0 ){ - return base_model.Predict(feat,funknown,buffer_index,rid); - } - - struct GBRTParam{ - - /*! \brief path of input training data */ - const char* train_file_path; - - GBRTParam( void ){ - } - /*! - * \brief set parameters from outside - * \param name name of the parameter - * \param val value of the parameter - */ - inline void SetParam( const char *name, const char *val ){ - if( !strcmp("train_file_path", name ) ) train_file_path = val; - } - }; - -private: - inline float FirstOrderGradient(float pred_transform,float label){ - return label - pred_transform; - } - - inline float SecondOrderGradient(float pred_transform){ - return pred_transform * ( 1 - pred_transform ); - } - - inline float Logistic(float x){ - return 1.0/(1.0 + exp(-x)); - } - - GBMBaseModel base_model; - GBRTParam param; - -}; - -#endif \ No newline at end of file diff --git a/booster/xgboost.cpp b/booster/xgboost.cpp index f60d2b2f8..5fdc3ace2 100644 --- a/booster/xgboost.cpp +++ b/booster/xgboost.cpp @@ -13,6 +13,7 @@ // implementations of boosters #include "tree/xgboost_svdf_tree.hpp" #include "linear/xgboost_linear.hpp" +#include "../regression/xgboost_reg.h" namespace xgboost{ namespace booster{ diff --git a/regression/xgboost_reg.h b/regression/xgboost_reg.h new file mode 100644 index 000000000..ebc574a85 --- /dev/null +++ b/regression/xgboost_reg.h @@ -0,0 +1,154 @@ +#ifndef _XGBOOST_REG_H_ +#define _XGBOOST_REG_H_ +/*! + * \file xgboost_reg.h + * \brief class for gradient boosted regression + * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com + */ +#include +#include "xgboost_regdata.h" +#include "../booster/xgboost_gbmbase.h" +#include "../utils/xgboost_utils.h" +#include "../utils/xgboost_stream.h" + +namespace xgboost{ + namespace regression{ + /*! \brief class for gradient boosted regression */ + class RegBoostLearner{ + public: + /*! + * \brief a regression booter associated with training and evaluating data + * \param train pointer to the training data + * \param evals array of evaluating data + * \param evname name of evaluation data, used print statistics + */ + RegBoostLearner( const DMatrix *train, + std::vector evals, + std::vector evname ){ + this->train_ = train; + this->evals_ = evals; + this->evname_ = evname; + //TODO: assign buffer index + } + /*! + * \brief set parameters from outside + * \param name name of the parameter + * \param val value of the parameter + */ + inline void SetParam( const char *name, const char *val ){ + mparam.SetParam( name, val ); + base_model.SetParam( name, val ); + } + /*! + * \brief initialize solver before training, called before training + * this function is reserved for solver to allocate necessary space and do other preparation + */ + inline void InitTrainer( void ){ + base_model.InitTrainer(); + mparam.AdjustBase(); + } + /*! + * \brief load model from stream + * \param fi input stream + */ + inline void LoadModel( utils::IStream &fi ){ + utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 ); + base_model.LoadModel( fi ); + } + /*! + * \brief save model to stream + * \param fo output stream + */ + inline void SaveModel( utils::IStream &fo ) const{ + fo.Write( &mparam, sizeof(ModelParam) ); + base_model.SaveModel( fo ); + } + /*! + * \brief update the model for one iteration + */ + inline void UpdateOneIter( void ){ + //TODO + } + /*! \brief predict the results, given data */ + inline void Predict( std::vector &preds, const DMatrix &data ){ + //TODO + } + private: + /*! \brief training parameter for regression */ + struct ModelParam{ + /* \brief global bias */ + float base_score; + /* \brief type of loss function */ + int loss_type; + ModelParam( void ){ + base_score = 0.5f; + loss_type = 0; + } + /*! + * \brief set parameters from outside + * \param name name of the parameter + * \param val value of the parameter + */ + inline void SetParam( const char *name, const char *val ){ + if( !strcmp("base_score", name ) ) base_score = (float)atof( val ); + if( !strcmp("loss_type", name ) ) loss_type = atoi( val ); + } + /*! + * \brief adjust base_score + */ + inline void AdjustBase( void ){ + if( loss_type == 1 ){ + utils::Assert( base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain" ); + base_score = - logf( 1.0f / base_score - 1.0f ); + } + } + /*! + * \brief calculate first order gradient of loss, given transformed prediction + * \param predt transformed prediction + * \param label true label + * \return first order gradient + */ + inline float FirstOrderGradient( float predt, float label ) const{ + switch( loss_type ){ + case 0: return predt - label; + case 1: return predt - label; + default: utils::Error("unknown loss_type"); return 0.0f; + } + } + /*! + * \brief calculate second order gradient of loss, given transformed prediction + * \param predt transformed prediction + * \param label true label + * \return second order gradient + */ + inline float SecondOrderGradient( float predt, float label ) const{ + switch( loss_type ){ + case 0: return 1.0f; + case 1: return predt * ( 1 - predt ); + default: utils::Error("unknown loss_type"); return 0.0f; + } + } + /*! + * \brief transform the linear sum to prediction + * \param x linear sum of boosting ensemble + * \return transformed prediction + */ + inline float PredTransform( float x ){ + switch( loss_type ){ + case 0: return x; + case 1: return 1.0f/(1.0f + expf(-x)); + default: utils::Error("unknown loss_type"); return 0.0f; + } + } + }; + private: + booster::GBMBaseModel base_model; + ModelParam mparam; + const DMatrix *train_; + std::vector evals_; + std::vector evname_; + }; + }; +}; + +#endif