#ifndef XGBOOST_RANK_H #define XGBOOST_RANK_H /*! * \file xgboost_rank.h * \brief class for gradient boosting ranking * \author Kailong Chen: chenkl198812@gmail.com, Tianqi Chen: tianqi.tchen@gmail.com */ #include #include #include #include "xgboost_sample.h" #include "xgboost_rank_data.h" #include "xgboost_rank_eval.h" #include "../utils/xgboost_omp.h" #include "../booster/xgboost_gbmbase.h" #include "../utils/xgboost_utils.h" #include "../utils/xgboost_stream.h" namespace xgboost { namespace rank { /*! \brief class for gradient boosted regression */ class RankBoostLearner { public: /*! \brief constructor */ RegBoostLearner( void ) { silent = 0; } /*! * \brief a rank booster associated with training and evaluating data * \param train pointer to the training data * \param evals array of evaluating data * \param evname name of evaluation data, used print statistics */ RankBoostLearner( const RMatrix *train, const std::vector &evals, const std::vector &evname ) { silent = 0; this->SetData(train,evals,evname); } /*! * \brief associate rank booster with training and evaluating data * \param train pointer to the training data * \param evals array of evaluating data * \param evname name of evaluation data, used print statistics */ inline void SetData( const RMatrix *train, const std::vector &evals, const std::vector &evname ) { this->train_ = train; this->evals_ = evals; this->evname_ = evname; // estimate feature bound int num_feature = (int)(train->data.NumCol()); // assign buffer index unsigned buffer_size = static_cast( train->Size() ); for( size_t i = 0; i < evals.size(); ++ i ) { buffer_size += static_cast( evals[i]->Size() ); num_feature = std::max( num_feature, (int)(evals[i]->data.NumCol()) ); } char str_temp[25]; if( num_feature > mparam.num_feature ) { mparam.num_feature = num_feature; sprintf( str_temp, "%d", num_feature ); base_gbm.SetParam( "bst:num_feature", str_temp ); } sprintf( str_temp, "%u", buffer_size ); base_gbm.SetParam( "num_pbuffer", str_temp ); if( !silent ) { printf( "buffer_size=%u\n", buffer_size ); } // set eval_preds tmp sapce this->eval_preds_.resize( evals.size(), std::vector() ); } /*! * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter */ inline void SetParam( const char *name, const char *val ) { if( !strcmp( name, "silent") ) silent = atoi( val ); if( !strcmp( name, "eval_metric") ) evaluator_.AddEval( val ); mparam.SetParam( name, val ); base_gbm.SetParam( name, val ); } /*! * \brief initialize solver before training, called before training * this function is reserved for solver to allocate necessary space and do other preparation */ inline void InitTrainer( void ) { base_gbm.InitTrainer(); if( mparam.loss_type == PAIRWISE) { evaluator_.AddEval( "PAIR" ); } else if( mparam.loss_type == MAP) { evaluator_.AddEval( "MAP" ); } else { evaluator_.AddEval( "NDCG" ); } evaluator_.Init(); sampler.AssignSampler(mparam.sampler_type); } /*! * \brief initialize the current data storage for model, if the model is used first time, call this function */ inline void InitModel( void ) { base_gbm.InitModel(); } /*! * \brief load model from stream * \param fi input stream */ inline void LoadModel( utils::IStream &fi ) { base_gbm.LoadModel( fi ); utils::Assert( fi.Read( &mparam, sizeof(ModelParam) ) != 0 ); } /*! * \brief DumpModel * \param fo text file * \param fmap feature map that may help give interpretations of feature * \param with_stats whether print statistics as well */ inline void DumpModel( FILE *fo, const utils::FeatMap& fmap, bool with_stats ) { base_gbm.DumpModel( fo, fmap, with_stats ); } /*! * \brief Dump path of all trees * \param fo text file * \param data input data */ inline void DumpPath( FILE *fo, const RMatrix &data ) { base_gbm.DumpPath( fo, data.data ); } /*! * \brief save model to stream * \param fo output stream */ inline void SaveModel( utils::IStream &fo ) const { base_gbm.SaveModel( fo ); fo.Write( &mparam, sizeof(ModelParam) ); } /*! * \brief update the model for one iteration * \param iteration iteration number */ inline void UpdateOneIter( int iter ) { this->PredictBuffer( preds_, *train_, 0 ); this->GetGradient( preds_, train_->labels,train_->group_index, grad_, hess_ ); std::vector root_index; base_gbm.DoBoost( grad_, hess_, train_->data, root_index ); } /*! * \brief evaluate the model for specific iteration * \param iter iteration number * \param fo file to output log */ inline void EvalOneIter( int iter, FILE *fo = stderr ) { fprintf( fo, "[%d]", iter ); int buffer_offset = static_cast( train_->Size() ); for( size_t i = 0; i < evals_.size(); ++i ) { std::vector &preds = this->eval_preds_[ i ]; this->PredictBuffer( preds, *evals_[i], buffer_offset); evaluator_.Eval( fo, evname_[i].c_str(), preds, (*evals_[i]).labels ); buffer_offset += static_cast( evals_[i]->Size() ); } fprintf( fo,"\n" ); } /*! \brief get intransformed prediction, without buffering */ inline void Predict( std::vector &preds, const DMatrix &data ) { preds.resize( data.Size() ); const unsigned ndata = static_cast( data.Size() ); #pragma omp parallel for schedule( static ) for( unsigned j = 0; j < ndata; ++ j ) { preds[j] = base_gbm.Predict( data.data, j, -1 ); } } public: /*! * \brief update the model for one iteration * \param iteration iteration number */ inline void UpdateInteract( std::string action ) { this->InteractPredict( preds_, *train_, 0 ); int buffer_offset = static_cast( train_->Size() ); for( size_t i = 0; i < evals_.size(); ++i ) { std::vector &preds = this->eval_preds_[ i ]; this->InteractPredict( preds, *evals_[i], buffer_offset ); buffer_offset += static_cast( evals_[i]->Size() ); } if( action == "remove" ) { base_gbm.DelteBooster(); return; } this->GetGradient( preds_, train_->labels, grad_, hess_ ); std::vector root_index; base_gbm.DoBoost( grad_, hess_, train_->data, root_index ); this->InteractRePredict( *train_, 0 ); buffer_offset = static_cast( train_->Size() ); for( size_t i = 0; i < evals_.size(); ++i ) { this->InteractRePredict( *evals_[i], buffer_offset ); buffer_offset += static_cast( evals_[i]->Size() ); } } private: /*! \brief get the transformed predictions, given data */ inline void InteractPredict( std::vector &preds, const DMatrix &data, unsigned buffer_offset ) { preds.resize( data.Size() ); const unsigned ndata = static_cast( data.Size() ); #pragma omp parallel for schedule( static ) for( unsigned j = 0; j < ndata; ++ j ) { preds[j] = base_gbm.InteractPredict( data.data, j, buffer_offset + j ); } } /*! \brief repredict trial */ inline void InteractRePredict( const DMatrix &data, unsigned buffer_offset ) { const unsigned ndata = static_cast( data.Size() ); #pragma omp parallel for schedule( static ) for( unsigned j = 0; j < ndata; ++ j ) { base_gbm.InteractRePredict( data.data, j, buffer_offset + j ); } } private: /*! \brief get intransformed predictions, given data */ inline void PredictBuffer( std::vector &preds, const RMatrix &data, unsigned buffer_offset ) { preds.resize( data.Size() ); const unsigned ndata = static_cast( data.Size() ); #pragma omp parallel for schedule( static ) for( unsigned j = 0; j < ndata; ++ j ) { preds[j] = base_gbm.Predict( data.data, j, buffer_offset + j ); } } /*! \brief get the first order and second order gradient, given the transformed predictions and labels */ inline void GetGradient( const std::vector &preds, const std::vector &labels, const std::vector &group_index, std::vector &grad, std::vector &hess ) { grad.resize( preds.size() ); hess.resize( preds.size() ); bool j_better; float pred_diff,pred_diff_exp,first_order_gradient,second_order_gradient; for(int i = 0; i < group_index.size() - 1; i++){ sample::Pairs pairs = sampler.GenPairs(preds,labels,group_index[i],group_index[i+1]); for(int j = group_index[i]; j < group_index[i + 1]; j++){ std::vector pair_instance = pairs.GetPairs(j); for(int k = 0; k < pair_instance.size(); k++){ j_better = labels[j] > labels[pair_instance[k]]; if(j_better){ pred_diff = preds[preds[j] - pair_instance[k]]; pred_diff_exp = j_better? expf(-pred_diff):expf(pred_diff); first_order_gradient = mparam.FirstOrderGradient(pred_diff_exp); second_order_gradient = 2 * mparam.SecondOrderGradient(pred_diff_exp); hess[j] += second_order_gradient; grad[j] += first_order_gradient; hess[pair_instance[k]] += second_order_gradient; grad[pair_instance[k]] += -first_order_gradient; } } } } } private: enum LossType { PAIRWISE = 0, MAP = 1, NDCG = 2 }; /*! \brief training parameter for regression */ struct ModelParam { /* \brief type of loss function */ int loss_type; /* \brief number of features */ int num_feature; /*! \brief reserved field */ int reserved[ 16 ]; /*! \brief sampler type */ int sampler_type; /*! \brief constructor */ ModelParam( void ) { loss_type = 0; num_feature = 0; memset( reserved, 0, sizeof( reserved ) ); } /*! * \brief set parameters from outside * \param name name of the parameter * \param val value of the parameter */ inline void SetParam( const char *name, const char *val ) { if( !strcmp("loss_type", name ) ) loss_type = atoi( val ); if( !strcmp("bst:num_feature", name ) ) num_feature = atoi( val ); if( !strcmp("rank:sampler",name)) sampler = atoi( val ); } /*! * \brief calculate first order gradient of pairwise loss function(f(x) = ln(1+exp(-x)), * given the exponential of the difference of intransformed pair predictions * \param the intransformed prediction of positive instance * \param the intransformed prediction of negative instance * \return first order gradient */ inline float FirstOrderGradient( float pred_diff_exp) const { return -pred_diff_exp/(1 + pred_diff_exp); } /*! * \brief calculate second order gradient of pairwise loss function(f(x) = ln(1+exp(-x)), * given the exponential of the difference of intransformed pair predictions * \param the intransformed prediction of positive instance * \param the intransformed prediction of negative instance * \return second order gradient */ inline float SecondOrderGradient( float pred_diff_exp ) const { return pred_diff_exp/pow(1 + pred_diff_exp,2); } }; private: int silent; RankEvalSet evaluator_; sample::PairSamplerWrapper sampler; booster::GBMBase base_gbm; ModelParam mparam; const RMatrix *train_; std::vector evals_; std::vector evname_; std::vector buffer_index_; private: std::vector grad_, hess_, preds_; std::vector< std::vector > eval_preds_; }; } }; #endif