#ifndef XGBOOST_RANK_H #define XGBOOST_RANK_H /*! * \file xgboost_rank.h * \brief class for gradient boosting ranking * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com */ #include #include #include #include "xgboost_sample.h" #include "xgboost_rank_data.h" #include "xgboost_rank_eval.h" #include "../utils/xgboost_omp.h" #include "../booster/xgboost_gbmbase.h" #include "../utils/xgboost_utils.h" #include "../utils/xgboost_stream.h" namespace xgboost { namespace rank { /*! \brief class for gradient boosted regression */ class RankBoostLearner { public: /*! \brief constructor */ RegBoostLearner( void ) { silent = 0; } /*! * \brief a rank booster associated with training and evaluating data * \param train pointer to the training data * \param evals array of evaluating data * \param evname name of evaluation data, used print statistics */ RankBoostLearner( const RMatrix *train, const std::vector &evals, const std::vector &evname ) { silent = 0; this->SetData(train,evals,evname); } /*! * \brief associate rank booster with training and evaluating data * \param train pointer to the training data * \param evals array of evaluating data * \param evname name of evaluation data, used print statistics */ inline void SetData( const RMatrix *train, const std::vector &evals, const std::vector &evname ) { this->train_ = train; this->evals_ = evals; this->evname_ = evname; // estimate feature bound int num_feature = (int)(train->data.NumCol()); // assign buffer index unsigned buffer_size = static_cast( train->Size() ); for( size_t i = 0; i < evals.size(); ++ i ) { buffer_size += static_cast( evals[i]->Size() ); num_feature = std::max( num_feature, (int)(evals[i]->data.NumCol()) ); } char str_temp[25]; if( num_feature > mparam.num_feature ) { mparam.num_feature = num_feature; sprintf( str_temp, "%d", num_feature ); base_gbm.SetParam( "bst:num_feature", str_temp ); } sprintf( str_temp, "%u", buffer_size ); base_gbm.SetParam( "num_pbuffer", str_temp ); if( !silent ) { printf( "buffer_size=%u\n", buffer_size ); } // set eval_preds tmp sapce this->eval_preds_.resize( evals.size(), std::vector() ); } /*! * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter */ inline void SetParam( const char *name, const char *val ) { if( !strcmp( name, "silent") ) silent = atoi( val ); if( !strcmp( name, "eval_metric") ) evaluator_.AddEval( val ); mparam.SetParam( name, val ); base_gbm.SetParam( name, val ); } /*! * \brief initialize solver before training, called before training * this function is reserved for solver to allocate necessary space and do other preparation */ inline void InitTrainer( void ) { base_gbm.InitTrainer(); if( mparam.loss_type == PAIRWISE) { evaluator_.AddEval( "pairwise" ); } else if( mparam.loss_type == MAP) { evaluator_.AddEval( "MAP" ); } else { evaluator_.AddEval( "NDCG" ); } evaluator_.Init(); } /*! * \brief initialize the current data storage for model, if the model is used first time, call this function */ inline void InitModel( void ) { base_gbm.InitModel(); mparam.AdjustBase(); } /*! * \brief load model from stream * \param fi input stream */ inline void LoadModel( utils::IStream &fi ) { base_gbm.LoadModel( fi ); utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 ); } /*! * \brief DumpModel * \param fo text file * \param fmap feature map that may help give interpretations of feature * \param with_stats whether print statistics as well */ inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ) { base_gbm.DumpModel( fo, fmap, with_stats ); } /*! * \brief Dump path of all trees * \param fo text file * \param data input data */ inline void DumpPath( FILE *fo, const RMatrix &data ) { base_gbm.DumpPath( fo, data.data ); } /*! * \brief save model to stream * \param fo output stream */ inline void SaveModel( utils::IStream &fo ) const { base_gbm.SaveModel( fo ); fo.Write( &mparam, sizeof(ModelParam) ); } /*! * \brief update the model for one iteration * \param iteration iteration number */ inline void UpdateOneIter( int iter ) { this->PredictBuffer( preds_, *train_, 0 ); this->GetGradient( preds_, train_->labels,train_->group_index, grad_, hess_ ); std::vector root_index; base_gbm.DoBoost( grad_, hess_, train_->data, root_index ); } /*! * \brief evaluate the model for specific iteration * \param iter iteration number * \param fo file to output log */ inline void EvalOneIter( int iter, FILE *fo = stderr ) { fprintf( fo, "[%d]", iter ); int buffer_offset = static_cast( train_->Size() ); for( size_t i = 0; i < evals_.size(); ++i ) { std::vector &preds = this->eval_preds_[ i ]; this->PredictBuffer( preds, *evals_[i], buffer_offset); evaluator_.Eval( fo, evname_[i].c_str(), preds, (*evals_[i]).labels ); buffer_offset += static_cast( evals_[i]->Size() ); } fprintf( fo,"\n" ); } /*! \brief get intransformed prediction, without buffering */ inline void Predict( std::vector &preds, const DMatrix &data ) { preds.resize( data.Size() ); const unsigned ndata = static_cast( data.Size() ); #pragma omp parallel for schedule( static ) for( unsigned j = 0; j < ndata; ++ j ) { preds[j] = mparam.base_score + base_gbm.Predict( data.data, j, -1 ); } } public: /*! * \brief update the model for one iteration * \param iteration iteration number */ inline void UpdateInteract( std::string action ) { this->InteractPredict( preds_, *train_, 0 ); int buffer_offset = static_cast( train_->Size() ); for( size_t i = 0; i < evals_.size(); ++i ) { std::vector &preds = this->eval_preds_[ i ]; this->InteractPredict( preds, *evals_[i], buffer_offset ); buffer_offset += static_cast( evals_[i]->Size() ); } if( action == "remove" ) { base_gbm.DelteBooster(); return; } this->GetGradient( preds_, train_->labels, grad_, hess_ ); std::vector root_index; base_gbm.DoBoost( grad_, hess_, train_->data, root_index ); this->InteractRePredict( *train_, 0 ); buffer_offset = static_cast( train_->Size() ); for( size_t i = 0; i < evals_.size(); ++i ) { this->InteractRePredict( *evals_[i], buffer_offset ); buffer_offset += static_cast( evals_[i]->Size() ); } } private: /*! \brief get the transformed predictions, given data */ inline void InteractPredict( std::vector &preds, const DMatrix &data, unsigned buffer_offset ) { preds.resize( data.Size() ); const unsigned ndata = static_cast( data.Size() ); #pragma omp parallel for schedule( static ) for( unsigned j = 0; j < ndata; ++ j ) { preds[j] = mparam.base_score + base_gbm.InteractPredict( data.data, j, buffer_offset + j ); } } /*! \brief repredict trial */ inline void InteractRePredict( const DMatrix &data, unsigned buffer_offset ) { const unsigned ndata = static_cast( data.Size() ); #pragma omp parallel for schedule( static ) for( unsigned j = 0; j < ndata; ++ j ) { base_gbm.InteractRePredict( data.data, j, buffer_offset + j ); } } private: /*! \brief get intransformed predictions, given data */ inline void PredictBuffer( std::vector &preds, const RMatrix &data, unsigned buffer_offset ) { preds.resize( data.Size() ); const unsigned ndata = static_cast( data.Size() ); #pragma omp parallel for schedule( static ) for( unsigned j = 0; j < ndata; ++ j ) { preds[j] = mparam.base_score + base_gbm.Predict( data.data, j, buffer_offset + j ); } } /*! \brief get the first order and second order gradient, given the transformed predictions and labels */ inline void GetGradient( const std::vector &preds, const std::vector &labels, const std::vector &group_index, std::vector &grad, std::vector &hess ) { grad.resize( preds.size() ); hess.resize( preds.size() ); bool j_better; float pred_diff,pred_diff_exp,first_order_gradient,second_order_gradient; for(int i = 0; i < group_index.size() - 1; i++){ xgboost::rank::sample::PairSamplerSet sampler; xgboost::rank::sample::Pairs pairs = sampler.GenPairs(preds,labels,group_index[i],group_index[i+1]); for(int j = group_index[i]; j < group_index[i+1]; j++){ std::vector pair_instance = pairs.GetPairs(j); j_better = labels[j] > labels[pair_instance[k]]; if(j_better){ for(int k = 0; k < pair_instance.size(); k++){ pred_diff = preds[preds[j] - pair_instance[k]]; pred_diff_exp = j_better? expf(-pred_diff):expf(pred_diff); first_order_gradient = mparam.FirstOrderGradient(pred_diff_exp); second_order_gradient = 2 * mparam.SecondOrderGradient(pred_diff_exp); hess[j] += second_order_gradient; grad[j] += first_order_gradient; hess[pair_instance[k]] += second_order_gradient; grad[pair_instance[k]] += -first_order_gradient; } } } } } private: enum LossType { PAIRWISE = 0, MAP = 1, NDCG = 2 }; /*! \brief training parameter for regression */ struct ModelParam { /* \brief global bias */ float base_score; /* \brief type of loss function */ int loss_type; /* \brief number of features */ int num_feature; /*! \brief reserved field */ int reserved[ 16 ]; /*! \brief constructor */ ModelParam( void ) { base_score = 0.5f; loss_type = 0; num_feature = 0; memset( reserved, 0, sizeof( reserved ) ); } /*! * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter */ inline void SetParam( const char *name, const char *val ) { if( !strcmp("base_score", name ) ) base_score = (float)atof( val ); if( !strcmp("loss_type", name ) ) loss_type = atoi( val ); if( !strcmp("bst:num_feature", name ) ) num_feature = atoi( val ); } /*! * \brief adjust base_score */ inline void AdjustBase( void ) { if( loss_type == 1 || loss_type == 2 ) { utils::Assert( base_score > 0.0f && base_score < 1.0f, "sigmoid range constrain" ); base_score = - logf( 1.0f / base_score - 1.0f ); } } /*! * \brief transform the linear sum to prediction * \param x linear sum of boosting ensemble * \return transformed prediction */ inline float PredTransform( float x ) { switch( loss_type ) { case PAIRWISE: case MAP: case NDCG: return 1.0f/(1.0f + expf(-x)); default: utils::Error("unknown loss_type"); return 0.0f; } } /*! * \brief calculate first order gradient of pairwise loss function(f(x) = ln(1+exp(-x)), * given the exponential of the difference of intransformed pair predictions * \param the intransformed prediction of positive instance * \param the intransformed prediction of negative instance * \return first order gradient */ inline float FirstOrderGradient( float pred_diff_exp) const { return -pred_diff_exp/(1 + pred_diff_exp); } /*! * \brief calculate second order gradient of pairwise loss function(f(x) = ln(1+exp(-x)), * given the exponential of the difference of intransformed pair predictions * \param the intransformed prediction of positive instance * \param the intransformed prediction of negative instance * \return second order gradient */ inline float SecondOrderGradient( float pred_diff_exp ) const { return pred_diff_exp/pow(1 + pred_diff_exp,2); } /*! * \brief calculating the loss, given the predictions, labels and the loss type * \param preds the given predictions * \param labels the given labels * \return the specified loss */ inline float Loss(const std::vector &preds, const std::vector &labels) const { switch( loss_type ) { case kLinearSquare: return SquareLoss(preds,labels); case kLogisticNeglik: case kLogisticClassify: return NegLoglikelihoodLoss(preds,labels); default: utils::Error("unknown loss_type"); return 0.0f; } } /*! * \brief calculating the square loss, given the predictions and labels * \param preds the given predictions * \param labels the given labels * \return the summation of square loss */ inline float SquareLoss(const std::vector &preds, const std::vector &labels) const { float ans = 0.0; for(size_t i = 0; i < preds.size(); i++) { float dif = preds[i] - labels[i]; ans += dif * dif; } return ans; } /*! * \brief calculating the square loss, given the predictions and labels * \param preds the given predictions * \param labels the given labels * \return the summation of square loss */ inline float NegLoglikelihoodLoss(const std::vector &preds, const std::vector &labels) const { float ans = 0.0; for(size_t i = 0; i < preds.size(); i++) ans -= labels[i] * logf(preds[i]) + ( 1 - labels[i] ) * logf(1 - preds[i]); return ans; } }; private: int silent; RankEvalSet evaluator_; booster::GBMBase base_gbm; ModelParam mparam; const RMatrix *train_; std::vector evals_; std::vector evname_; std::vector buffer_index_; private: std::vector grad_, hess_, preds_; std::vector< std::vector > eval_preds_; }; } }; #endif