xgboost/regression/xgboost_reg.h

#ifndef _XGBOOST_REG_H_
#define _XGBOOST_REG_H_
/*!
* \file xgboost_reg.h
* \brief class for gradient boosted regression
* \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com
*/
#include <cmath>
#include "xgboost_regdata.h"
#include "../booster/xgboost_gbmbase.h"
#include "../utils/xgboost_utils.h"
#include "../utils/xgboost_stream.h"

namespace xgboost{
    namespace regression{
        /*! \brief class for gradient boosted regression */
        class RegBoostLearner{
        public:

            RegBoostLearner(bool silent = false){
                this->silent = silent;
            }

            /*!
            * \brief a regression booter associated with training and evaluating data
            * \param train pointer to the training data
            * \param evals array of evaluating data
            * \param evname name of evaluation data, used print statistics
            */
            RegBoostLearner( const DMatrix *train,
                std::vector<const DMatrix *> evals,
                std::vector<std::string> evname, bool silent = false ){
                    this->silent = silent;
                    SetData(train,evals,evname);
            }

            /*!
            * \brief associate regression booster with training and evaluating data
            * \param train pointer to the training data
            * \param evals array of evaluating data
            * \param evname name of evaluation data, used print statistics
            */
            inline void SetData(const DMatrix *train,
                std::vector<const DMatrix *> evals,
                std::vector<std::string> evname){
                    this->train_ = train;
                    this->evals_ = evals;
                    this->evname_ = evname;
                    //assign buffer index
                    int buffer_size = (*train).size();
                    for(int i = 0; i < evals.size(); i++){
                        buffer_size += (*evals[i]).size();
                    }
                    char str[25];
                    _itoa(buffer_size,str,10);
                    base_model.SetParam("num_pbuffer",str);
                    base_model.SetParam("num_pbuffer",str);
            }

            /*!
            * \brief set parameters from outside
            * \param name name of the parameter
            * \param val  value of the parameter
            */
            inline void SetParam( const char *name, const char *val ){
                mparam.SetParam( name, val );
                base_model.SetParam( name, val );
            }
            /*!
            * \brief initialize solver before training, called before training
            * this function is reserved for solver to allocate necessary space and do other preparation
            */
            inline void InitTrainer( void ){
                base_model.InitTrainer();
                InitModel();
                mparam.AdjustBase();
            }

            /*!
            * \brief initialize the current data storage for model, if the model is used first time, call this function
            */
            inline void InitModel( void ){
                base_model.InitModel();
            }

            /*!
            * \brief load model from stream
            * \param fi input stream
            */
            inline void LoadModel( utils::IStream &fi ){
                utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 );
                base_model.LoadModel( fi );
            }
            /*!
            * \brief save model to stream
            * \param fo output stream
            */
            inline void SaveModel( utils::IStream &fo ) const{
                fo.Write( &mparam, sizeof(ModelParam) );
                base_model.SaveModel( fo );
            }

            /*!
            * \brief update the model for one iteration
            * \param iteration the number of updating iteration
            */
            inline void UpdateOneIter( int iteration ){
                std::vector<float> grad,hess,preds;
                std::vector<unsigned> root_index;
                booster::FMatrixS::Image train_image((*train_).data);
                Predict(preds,*train_,0);
                Gradient(preds,(*train_).labels,grad,hess);
                base_model.DoBoost(grad,hess,train_image,root_index);
                int buffer_index_offset = (*train_).size();
                float loss = 0.0;
                for(int i = 0; i < evals_.size();i++){
                    Predict(preds, *evals_[i], buffer_index_offset);
                    loss = mparam.Loss(preds,(*evals_[i]).labels);
                    if(!silent){
                        printf("The loss of %s data set in %d the \
                               iteration is %f",evname_[i].c_str(),&iteration,&loss);
                    }
                    buffer_index_offset += (*evals_[i]).size();
                }

            }

            /*! \brief get the transformed predictions, given data */
            inline void Predict( std::vector<float> &preds, const DMatrix &data,int buffer_index_offset = 0 ){
                int data_size = data.size();
                preds.resize(data_size);
                for(int j = 0; j < data_size; j++){
                    preds[j] = mparam.PredTransform(mparam.base_score +
                        base_model.Predict(data.data[j],buffer_index_offset + j));
                }
            }

        private:
            /*! \brief get the first order and second order gradient, given the transformed predictions and labels*/
            inline void Gradient(const std::vector<float> &preds, const std::vector<float> &labels, std::vector<float> &grad,
                std::vector<float> &hess){
                    grad.clear();
                    hess.clear();
                    for(int j = 0; j < preds.size(); j++){
                        grad.push_back(mparam.FirstOrderGradient(preds[j],labels[j]));
                        hess.push_back(mparam.SecondOrderGradient(preds[j],labels[j]));
                    }
            }

            enum LOSS_TYPE_LIST{
                LINEAR_SQUARE,
                LOGISTIC_NEGLOGLIKELIHOOD,
            };

            /*! \brief training parameter for regression */
            struct ModelParam{
                /* \brief global bias */
                float base_score;
                /* \brief type of loss function */
                int loss_type;

                ModelParam( void ){
                    base_score = 0.5f;
                    loss_type  = 0;
                }
                /*!
                * \brief set parameters from outside
                * \param name name of the parameter
                * \param val  value of the parameter
                */
                inline void SetParam( const char *name, const char *val ){
                    if( !strcmp("base_score", name ) )  base_score = (float)atof( val );
                    if( !strcmp("loss_type", name ) )   loss_type = atoi( val );
                }
                /*!
                * \brief adjust base_score
                */
                inline void AdjustBase( void ){
                    if( loss_type == 1 ){
                        utils::Assert( base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain" );
                        base_score = - logf( 1.0f / base_score - 1.0f );
                    }
                }
                /*!
                * \brief calculate first order gradient of loss, given transformed prediction
                * \param predt transformed prediction
                * \param label true label
                * \return first order gradient
                */
                inline float FirstOrderGradient( float predt, float label ) const{
                    switch( loss_type ){
                    case LINEAR_SQUARE: return predt - label;
                    case 1: return predt - label;
                    default: utils::Error("unknown loss_type"); return 0.0f;
                    }
                }
                /*!
                * \brief calculate second order gradient of loss, given transformed prediction
                * \param predt transformed prediction
                * \param label true label
                * \return second order gradient
                */
                inline float SecondOrderGradient( float predt, float label ) const{
                    switch( loss_type ){
                    case LINEAR_SQUARE: return 1.0f;
                    case LOGISTIC_NEGLOGLIKELIHOOD: return predt * ( 1 - predt );
                    default: utils::Error("unknown loss_type"); return 0.0f;
                    }
                }

                /*!
                * \brief calculating the loss, given the predictions, labels and the loss type
                * \param preds the given predictions
                * \param labels the given labels
                * \return the specified loss
                */
                inline float Loss(const std::vector<float> &preds, const std::vector<float> &labels) const{
                    switch( loss_type ){
                    case LINEAR_SQUARE: return SquareLoss(preds,labels);
                    case LOGISTIC_NEGLOGLIKELIHOOD: return NegLoglikelihoodLoss(preds,labels);
                    default: utils::Error("unknown loss_type"); return 0.0f;
                    }
                }

                /*!
                * \brief calculating the square loss, given the predictions and labels
                * \param preds the given predictions
                * \param labels the given labels
                * \return the summation of square loss
                */
                inline float SquareLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
                    float ans = 0.0;
                    for(int i = 0; i < preds.size(); i++)
                        ans += pow(preds[i] - labels[i], 2);
                    return ans;
                }

                /*!
                * \brief calculating the square loss, given the predictions and labels
                * \param preds the given predictions
                * \param labels the given labels
                * \return the summation of square loss
                */
                inline float NegLoglikelihoodLoss(const std::vector<float> &preds, const std::vector<float> &labels) const{
                    float ans = 0.0;
                    for(int i = 0; i < preds.size(); i++)
                        ans -= labels[i] * log(preds[i]) + ( 1 - labels[i] ) * log(1 - preds[i]);
                    return ans;
                }


                /*!
                * \brief transform the linear sum to prediction
                * \param x linear sum of boosting ensemble
                * \return transformed prediction
                */
                inline float PredTransform( float x ){
                    switch( loss_type ){
                    case LINEAR_SQUARE: return x;
                    case LOGISTIC_NEGLOGLIKELIHOOD: return 1.0f/(1.0f + expf(-x));
                    default: utils::Error("unknown loss_type"); return 0.0f;
                    }
                }


            };
        private:
            booster::GBMBaseModel base_model;
            ModelParam   mparam;
            const DMatrix *train_;
            std::vector<const DMatrix *> evals_;
            std::vector<std::string> evname_;
            bool silent;
        };
    }
};

#endif