From 8c0c10463eccef36aea8ba06c956e9665de35bf5 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 4 May 2014 12:10:03 -0700 Subject: [PATCH] add boost group support to xgboost. now have beta multi-class classification --- booster/xgboost_gbmbase.h | 99 +++++++++++++++++++++++---------- regrank/xgboost_regrank.h | 64 ++++++++++++++++----- regrank/xgboost_regrank_data.h | 8 ++- regrank/xgboost_regrank_eval.h | 14 ++--- regrank/xgboost_regrank_obj.h | 5 +- regrank/xgboost_regrank_obj.hpp | 95 ++++++++++++++++++++++++------- 6 files changed, 210 insertions(+), 75 deletions(-) diff --git a/booster/xgboost_gbmbase.h b/booster/xgboost_gbmbase.h index ced13c565..c96e22af3 100644 --- a/booster/xgboost_gbmbase.h +++ b/booster/xgboost_gbmbase.h @@ -88,8 +88,8 @@ namespace xgboost{ } } if (mparam.num_pbuffer != 0){ - pred_buffer.resize(mparam.num_pbuffer); - pred_counter.resize(mparam.num_pbuffer); + pred_buffer.resize(mparam.PredBufferSize()); + pred_counter.resize(mparam.PredBufferSize()); utils::Assert(fi.Read(&pred_buffer[0], pred_buffer.size()*sizeof(float)) != 0); utils::Assert(fi.Read(&pred_counter[0], pred_counter.size()*sizeof(unsigned)) != 0); } @@ -117,8 +117,8 @@ namespace xgboost{ */ inline void InitModel(void){ pred_buffer.clear(); pred_counter.clear(); - pred_buffer.resize(mparam.num_pbuffer, 0.0); - pred_counter.resize(mparam.num_pbuffer, 0); + pred_buffer.resize(mparam.PredBufferSize(), 0.0); + pred_counter.resize(mparam.PredBufferSize(), 0); utils::Assert(mparam.num_boosters == 0); utils::Assert(boosters.size() == 0); } @@ -130,6 +130,7 @@ namespace xgboost{ if (tparam.nthread != 0){ omp_set_num_threads(tparam.nthread); } + if (mparam.num_booster_group == 0) mparam.num_booster_group = 1; // make sure all the boosters get the latest parameters for (size_t i = 0; i < this->boosters.size(); i++){ this->ConfigBooster(this->boosters[i]); @@ -175,12 +176,14 @@ namespace xgboost{ * \param feats features of each instance * \param root_index pre-partitioned root index of each instance, * root_index.size() can be 0 which indicates that no pre-partition involved + * \param bst_group which booster group it belongs to, by default, we only have 1 booster group, and leave this parameter as default */ inline void DoBoost(std::vector &grad, std::vector &hess, const booster::FMatrixS &feats, - const std::vector &root_index) { - booster::IBooster *bst = this->GetUpdateBooster(); + const std::vector &root_index, + int bst_group = 0 ) { + booster::IBooster *bst = this->GetUpdateBooster( bst_group ); bst->DoBoost(grad, hess, feats, root_index); } /*! @@ -190,26 +193,30 @@ namespace xgboost{ * \param row_index row index in the feature matrix * \param buffer_index the buffer index of the current feature line, default -1 means no buffer assigned * \param root_index root id of current instance, default = 0 + * \param bst_group booster group index * \return prediction */ - inline float Predict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){ - size_t istart = 0; + inline float Predict(const FMatrixS &feats, bst_uint row_index, + int buffer_index = -1, unsigned root_index = 0, int bst_group = 0 ){ + size_t itop = 0; float psum = 0.0f; + const int bid = mparam.BufferOffset(buffer_index, bst_group); // load buffered results if any - if (mparam.do_reboost == 0 && buffer_index >= 0){ - utils::Assert(buffer_index < mparam.num_pbuffer, "buffer index exceed num_pbuffer"); - istart = this->pred_counter[buffer_index]; - psum = this->pred_buffer[buffer_index]; + if (mparam.do_reboost == 0 && bid >= 0){ + itop = this->pred_counter[bid]; + psum = this->pred_buffer[bid]; } - for (size_t i = istart; i < this->boosters.size(); i++){ - psum += this->boosters[i]->Predict(feats, row_index, root_index); + for (size_t i = itop; i < this->boosters.size(); ++i ){ + if( booster_info[i] == bst_group ){ + psum += this->boosters[i]->Predict(feats, row_index, root_index); + } } // updated the buffered results - if (mparam.do_reboost == 0 && buffer_index >= 0){ - this->pred_counter[buffer_index] = static_cast(boosters.size()); - this->pred_buffer[buffer_index] = psum; + if (mparam.do_reboost == 0 && bid >= 0){ + this->pred_counter[bid] = static_cast(boosters.size()); + this->pred_buffer[bid] = psum; } return psum; } @@ -217,6 +224,11 @@ namespace xgboost{ inline int NumBoosters(void) const{ return mparam.num_boosters; } + /*! \return number of booster groups */ + inline int NumBoosterGroup(void) const{ + if( mparam.num_booster_group == 0 ) return 1; + return mparam.num_booster_group; + } public: //--------trial code for interactive update an existing booster------ //-------- usually not needed, ignore this region --------- @@ -224,14 +236,17 @@ namespace xgboost{ * \brief same as Predict, but removes the prediction of booster to be updated * this function must be called once and only once for every data with pbuffer */ - inline float InteractPredict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){ + inline float InteractPredict(const FMatrixS &feats, bst_uint row_index, + int buffer_index = -1, unsigned root_index = 0, int bst_group = 0){ float psum = this->Predict(feats, row_index, buffer_index, root_index); if (tparam.reupdate_booster != -1){ const int bid = tparam.reupdate_booster; utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound"); - psum -= boosters[bid]->Predict(feats, row_index, root_index); + if( bst_group == booster_info[bid] ){ + psum -= boosters[bid]->Predict(feats, row_index, root_index); + } if (mparam.do_reboost == 0 && buffer_index >= 0){ - this->pred_buffer[buffer_index] = psum; + this->pred_buffer[mparam.BufferOffset(buffer_index,bst_group)] = psum; } } return psum; @@ -246,15 +261,21 @@ namespace xgboost{ booster_info[i - 1] = booster_info[i]; } boosters.resize(mparam.num_boosters -= 1); - booster_info.resize(boosters.size()); + booster_info.resize(boosters.size()); + // update pred counter + for( size_t i = 0; i < pred_counter.size(); ++ i ){ + if( pred_counter[i] > (unsigned)bid ) pred_counter[i] -= 1; + } } /*! \brief update the prediction buffer, after booster have been updated */ - inline void InteractRePredict(const FMatrixS &feats, bst_uint row_index, int buffer_index = -1, unsigned root_index = 0){ + inline void InteractRePredict(const FMatrixS &feats, bst_uint row_index, + int buffer_index = -1, unsigned root_index = 0, int bst_group = 0 ){ if (tparam.reupdate_booster != -1){ const int bid = tparam.reupdate_booster; + if( booster_info[bid] != bst_group ) return; utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound"); if (mparam.do_reboost == 0 && buffer_index >= 0){ - this->pred_buffer[buffer_index] += boosters[bid]->Predict(feats, row_index, root_index); + this->pred_buffer[mparam.BufferOffset(buffer_index,bst_group)] += boosters[bid]->Predict(feats, row_index, root_index); } } } @@ -278,18 +299,19 @@ namespace xgboost{ * \brief get a booster to update * \return the booster created */ - inline booster::IBooster *GetUpdateBooster(void){ + inline booster::IBooster *GetUpdateBooster(int bst_group){ if (tparam.reupdate_booster != -1){ const int bid = tparam.reupdate_booster; utils::Assert(bid >= 0 && bid < (int)boosters.size(), "interact:booster_index exceed existing bound"); this->ConfigBooster(boosters[bid]); + utils::Assert( bst_group == booster_info[bid], "booster group must match existing reupdate booster"); return boosters[bid]; } if (mparam.do_reboost == 0 || boosters.size() == 0){ mparam.num_boosters += 1; boosters.push_back(booster::CreateBooster(mparam.booster_type)); - booster_info.push_back(0); + booster_info.push_back(bst_group); this->ConfigBooster(boosters.back()); boosters.back()->InitModel(); } @@ -316,8 +338,13 @@ namespace xgboost{ * set to 1 for linear booster, so that regularization term can be considered */ int do_reboost; + /*! + * \brief number of booster group, how many predictions a single + * input instance could corresponds to + */ + int num_booster_group; /*! \brief reserved parameters */ - int reserved[32]; + int reserved[31]; /*! \brief constructor */ ModelParam(void){ num_boosters = 0; @@ -325,6 +352,7 @@ namespace xgboost{ num_roots = num_feature = 0; do_reboost = 0; num_pbuffer = 0; + num_booster_group = 1; memset(reserved, 0, sizeof(reserved)); } /*! @@ -338,10 +366,21 @@ namespace xgboost{ // linear boost automatically set do reboost if (booster_type == 1) do_reboost = 1; } - if (!strcmp("num_pbuffer", name)) num_pbuffer = atoi(val); - if (!strcmp("do_reboost", name)) do_reboost = atoi(val); - if (!strcmp("bst:num_roots", name)) num_roots = atoi(val); - if (!strcmp("bst:num_feature", name)) num_feature = atoi(val); + if (!strcmp("num_pbuffer", name)) num_pbuffer = atoi(val); + if (!strcmp("do_reboost", name)) do_reboost = atoi(val); + if (!strcmp("num_booster_group", name)) num_booster_group = atoi(val); + if (!strcmp("bst:num_roots", name)) num_roots = atoi(val); + if (!strcmp("bst:num_feature", name)) num_feature = atoi(val); + } + inline int PredBufferSize(void) const{ + if (num_booster_group == 0) return num_pbuffer; + else return num_booster_group * num_pbuffer; + } + inline int BufferOffset( int buffer_index, int bst_group ) const{ + if( buffer_index < 0 ) return -1; + utils::Assert( buffer_index < num_pbuffer, "buffer_indexexceed num_pbuffer" ); + return buffer_index + num_pbuffer * bst_group; + } }; /*! \brief training parameters */ diff --git a/regrank/xgboost_regrank.h b/regrank/xgboost_regrank.h index 2363b5eae..c91632b20 100644 --- a/regrank/xgboost_regrank.h +++ b/regrank/xgboost_regrank.h @@ -86,6 +86,7 @@ namespace xgboost{ if (!strcmp(name, "silent")) silent = atoi(val); if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val); if (!strcmp(name, "objective") ) name_obj_ = val; + if (!strcmp(name, "num_class") ) base_gbm.SetParam("num_booster_group", val ); mparam.SetParam(name, val); base_gbm.SetParam(name, val); cfg_.push_back( std::make_pair( std::string(name), std::string(val) ) ); @@ -95,7 +96,13 @@ namespace xgboost{ * this function is reserved for solver to allocate necessary space and do other preparation */ inline void InitTrainer(void){ - base_gbm.InitTrainer(); + if( mparam.num_class != 0 ){ + if( name_obj_ != "softmax" ){ + name_obj_ = "softmax"; + printf("auto select objective=softmax to support multi-class classification\n" ); + } + } + base_gbm.InitTrainer(); obj_ = CreateObjFunction( name_obj_.c_str() ); for( size_t i = 0; i < cfg_.size(); ++ i ){ obj_->SetParam( cfg_[i].first.c_str(), cfg_[i].second.c_str() ); @@ -166,9 +173,18 @@ namespace xgboost{ inline void UpdateOneIter(const DMatrix &train){ this->PredictRaw(preds_, train); obj_->GetGradient(preds_, train.info, base_gbm.NumBoosters(), grad_, hess_); - // do boost - std::vector root_index; - base_gbm.DoBoost(grad_, hess_, train.data, root_index); + if( grad_.size() == train.Size() ){ + base_gbm.DoBoost(grad_, hess_, train.data, train.info.root_index); + }else{ + int ngroup = base_gbm.NumBoosterGroup(); + utils::Assert( grad_.size() == train.Size() * (size_t)ngroup, "BUG: UpdateOneIter: mclass" ); + std::vector tgrad( train.Size() ), thess( train.Size() ); + for( int g = 0; g < ngroup; ++ g ){ + memcpy( &tgrad[0], &grad_[g*tgrad.size()], sizeof(float)*tgrad.size() ); + memcpy( &thess[0], &hess_[g*tgrad.size()], sizeof(float)*tgrad.size() ); + base_gbm.DoBoost(tgrad, thess, train.data, train.info.root_index, g ); + } + } } /*! * \brief evaluate the model for specific iteration @@ -190,9 +206,14 @@ namespace xgboost{ fprintf(fo, "\n"); fflush(fo); } - /*! \brief get prediction, without buffering */ - inline void Predict(std::vector &preds, const DMatrix &data){ - this->PredictRaw(preds,data); + /*! + * \brief get prediction + * \param storage to store prediction + * \param data input data + * \param bst_group booster group we are in + */ + inline void Predict(std::vector &preds, const DMatrix &data, int bst_group = -1){ + this->PredictRaw( preds, data, bst_group ); obj_->PredTransform( preds ); } public: @@ -243,22 +264,31 @@ namespace xgboost{ } private: /*! \brief get un-transformed prediction*/ - inline void PredictRaw(std::vector &preds, const DMatrix &data){ - this->PredictBuffer(preds, data, this->FindBufferOffset(data) ); + inline void PredictRaw(std::vector &preds, const DMatrix &data, int bst_group = -1 ){ + int buffer_offset = this->FindBufferOffset(data); + if( bst_group < 0 ){ + int ngroup = base_gbm.NumBoosterGroup(); + preds.resize( data.Size() * ngroup ); + for( int g = 0; g < ngroup; ++ g ){ + this->PredictBuffer(&preds[ data.Size() * g ], data, buffer_offset, g ); + } + }else{ + preds.resize( data.Size() ); + this->PredictBuffer(&preds[0], data, buffer_offset, bst_group ); + } } /*! \brief get the un-transformed predictions, given data */ - inline void PredictBuffer(std::vector &preds, const DMatrix &data, int buffer_offset){ - preds.resize(data.Size()); + inline void PredictBuffer(float *preds, const DMatrix &data, int buffer_offset, int bst_group ){ const unsigned ndata = static_cast(data.Size()); if( buffer_offset >= 0 ){ #pragma omp parallel for schedule( static ) for (unsigned j = 0; j < ndata; ++j){ - preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, buffer_offset + j); + preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, buffer_offset + j, data.info.GetRoot(j), bst_group ); } }else #pragma omp parallel for schedule( static ) for (unsigned j = 0; j < ndata; ++j){ - preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, -1); + preds[j] = mparam.base_score + base_gbm.Predict(data.data, j, -1, data.info.GetRoot(j), bst_group ); }{ } } @@ -270,14 +300,17 @@ namespace xgboost{ /* \brief type of loss function */ int loss_type; /* \brief number of features */ - int num_feature; + int num_feature; + /* \brief number of class, if it is multi-class classification */ + int num_class; /*! \brief reserved field */ - int reserved[16]; + int reserved[15]; /*! \brief constructor */ ModelParam(void){ base_score = 0.5f; loss_type = 0; num_feature = 0; + num_class = 0; memset(reserved, 0, sizeof(reserved)); } /*! @@ -288,6 +321,7 @@ namespace xgboost{ inline void SetParam(const char *name, const char *val){ if (!strcmp("base_score", name)) base_score = (float)atof(val); if (!strcmp("loss_type", name)) loss_type = atoi(val); + if (!strcmp("num_class", name)) num_class = atoi(val); if (!strcmp("bst:num_feature", name)) num_feature = atoi(val); } /*! diff --git a/regrank/xgboost_regrank_data.h b/regrank/xgboost_regrank_data.h index 227d0381f..a5191044d 100644 --- a/regrank/xgboost_regrank_data.h +++ b/regrank/xgboost_regrank_data.h @@ -35,11 +35,17 @@ namespace xgboost{ std::vector group_ptr; /*! \brief weights of each instance, optional */ std::vector weights; + /*! \brief specified root index of each instance, can be used for multi task setting*/ + std::vector root_index; /*! \brief get weight of each instances */ inline float GetWeight( size_t i ) const{ - if( weights.size() != 0 ) return weights[i]; + if( weights.size() != 0 ) return weights[i]; else return 1.0f; } + inline float GetRoot( size_t i ) const{ + if( root_index.size() != 0 ) return root_index[i]; + else return 0; + } }; public: /*! \brief feature data content */ diff --git a/regrank/xgboost_regrank_eval.h b/regrank/xgboost_regrank_eval.h index 0c03a1769..df7e1e2ef 100644 --- a/regrank/xgboost_regrank_eval.h +++ b/regrank/xgboost_regrank_eval.h @@ -13,6 +13,7 @@ #include "../utils/xgboost_omp.h" #include "../utils/xgboost_random.h" #include "xgboost_regrank_data.h" +#include "xgboost_regrank_utils.h" namespace xgboost{ namespace regrank{ @@ -31,17 +32,11 @@ namespace xgboost{ virtual ~IEvaluator(void){} }; - inline static bool CmpFirst(const std::pair &a, const std::pair &b){ - return a.first > b.first; - } - inline static bool CmpSecond(const std::pair &a, const std::pair &b){ - return a.second > b.second; - } - /*! \brief RMSE */ struct EvalRMSE : public IEvaluator{ virtual float Eval(const std::vector &preds, const DMatrix::Info &info) const { + utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" ); const unsigned ndata = static_cast(preds.size()); float sum = 0.0, wsum = 0.0; #pragma omp parallel for reduction(+:sum,wsum) schedule( static ) @@ -62,6 +57,7 @@ namespace xgboost{ struct EvalLogLoss : public IEvaluator{ virtual float Eval(const std::vector &preds, const DMatrix::Info &info) const { + utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" ); const unsigned ndata = static_cast(preds.size()); float sum = 0.0f, wsum = 0.0f; #pragma omp parallel for reduction(+:sum,wsum) schedule( static ) @@ -106,7 +102,8 @@ namespace xgboost{ /*! \brief Area under curve, for both classification and rank */ struct EvalAuc : public IEvaluator{ virtual float Eval(const std::vector &preds, - const DMatrix::Info &info) const { + const DMatrix::Info &info) const { + utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" ); std::vector tgptr(2, 0); tgptr[1] = preds.size(); const std::vector &gptr = info.group_ptr.size() == 0 ? tgptr : info.group_ptr; utils::Assert(gptr.back() == preds.size(), "EvalAuc: group structure must match number of prediction"); @@ -159,6 +156,7 @@ namespace xgboost{ public: virtual float Eval(const std::vector &preds, const DMatrix::Info &info) const { + utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" ); const std::vector &gptr = info.group_ptr; utils::Assert(gptr.size() != 0 && gptr.back() == preds.size(), "EvalAuc: group structure must match number of prediction"); const unsigned ngroup = static_cast(gptr.size() - 1); diff --git a/regrank/xgboost_regrank_obj.h b/regrank/xgboost_regrank_obj.h index a494aa763..b6f081fe9 100644 --- a/regrank/xgboost_regrank_obj.h +++ b/regrank/xgboost_regrank_obj.h @@ -106,8 +106,9 @@ namespace xgboost{ namespace regrank{ IObjFunction* CreateObjFunction( const char *name ){ if( !strcmp("reg", name ) ) return new RegressionObj(); - if( !strcmp("rank", name ) ) return new PairwiseRankObj(); - if( !strcmp("softmax", name ) ) return new SoftmaxObj(); + if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj(); + if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj(); + if( !strcmp("softmax", name ) ) return new SoftmaxMultiClassObj(); utils::Error("unknown objective function type"); return NULL; } diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp index ffd30db0c..6a1ed7741 100644 --- a/regrank/xgboost_regrank_obj.hpp +++ b/regrank/xgboost_regrank_obj.hpp @@ -1,12 +1,13 @@ #ifndef XGBOOST_REGRANK_OBJ_HPP #define XGBOOST_REGRANK_OBJ_HPP /*! - * \file xgboost_regrank_obj.h + * \file xgboost_regrank_obj.hpp * \brief implementation of objective functions * \author Tianqi Chen, Kailong Chen */ //#include "xgboost_regrank_sample.h" #include +#include "xgboost_regrank_utils.h" namespace xgboost{ namespace regrank{ @@ -24,6 +25,7 @@ namespace xgboost{ int iter, std::vector &grad, std::vector &hess ) { + utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" ); grad.resize(preds.size()); hess.resize(preds.size()); const unsigned ndata = static_cast(preds.size()); @@ -52,11 +54,11 @@ namespace xgboost{ namespace regrank{ // simple softmax rak - class SoftmaxObj : public IObjFunction{ + class SoftmaxRankObj : public IObjFunction{ public: - SoftmaxObj(void){ + SoftmaxRankObj(void){ } - virtual ~SoftmaxObj(){} + virtual ~SoftmaxRankObj(){} virtual void SetParam(const char *name, const char *val){ } virtual void GetGradient(const std::vector& preds, @@ -64,6 +66,7 @@ namespace xgboost{ int iter, std::vector &grad, std::vector &hess ) { + utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" ); grad.resize(preds.size()); hess.resize(preds.size()); const std::vector &gptr = info.group_ptr; utils::Assert( gptr.size() != 0 && gptr.back() == preds.size(), "rank loss must have group file" ); @@ -96,23 +99,76 @@ namespace xgboost{ } virtual const char* DefaultEvalMetric(void) { return "pre@1"; - } - private: - inline static void Softmax( std::vector& rec ){ - float wmax = rec[0]; - for( size_t i = 1; i < rec.size(); ++ i ){ - wmax = std::max( rec[i], wmax ); - } - double wsum = 0.0f; - for( size_t i = 0; i < rec.size(); ++ i ){ - rec[i] = expf(rec[i]-wmax); - wsum += rec[i]; - } - for( size_t i = 0; i < rec.size(); ++ i ){ - rec[i] /= wsum; - } } }; + + // simple softmax multi-class classification + class SoftmaxMultiClassObj : public IObjFunction{ + public: + SoftmaxMultiClassObj(void){ + nclass = 0; + } + virtual ~SoftmaxMultiClassObj(){} + virtual void SetParam(const char *name, const char *val){ + if( !strcmp( "num_class", name ) ) nclass = atoi(val); + } + virtual void GetGradient(const std::vector& preds, + const DMatrix::Info &info, + int iter, + std::vector &grad, + std::vector &hess ) { + utils::Assert( nclass != 0, "must set num_class to use softmax" ); + utils::Assert( preds.size() == (size_t)nclass * info.labels.size(), "SoftmaxMultiClassObj: label size and pred size does not match" ); + grad.resize(preds.size()); hess.resize(preds.size()); + + const unsigned ndata = static_cast(info.labels.size()); + #pragma omp parallel + { + std::vector rec(nclass); + #pragma for schedule(static) + for (unsigned j = 0; j < ndata; ++j){ + for( int k = 0; k < nclass; ++ k ){ + rec[k] = preds[j + k * ndata]; + } + Softmax( rec ); + int label = static_cast(info.labels[j]); + utils::Assert( label < nclass, "SoftmaxMultiClassObj: label exceed num_class" ); + for( int k = 0; k < nclass; ++ k ){ + float p = rec[ k ]; + if( label == k ){ + grad[j+k*ndata] = p - 1.0f; + }else{ + grad[j+k*ndata] = p; + } + hess[j+k*ndata] = 2.0f * p * ( 1.0f - p ); + } + } + } + } + virtual void PredTransform(std::vector &preds){ + utils::Assert( nclass != 0, "must set num_class to use softmax" ); + utils::Assert( preds.size() % nclass == 0, "SoftmaxMultiClassObj: label size and pred size does not match" ); + const unsigned ndata = static_cast(preds.size()/nclass); + #pragma omp parallel + { + std::vector rec(nclass); + #pragma for schedule(static) + for (unsigned j = 0; j < ndata; ++j){ + for( int k = 0; k < nclass; ++ k ){ + rec[k] = preds[j + k * ndata]; + } + Softmax( rec ); + preds[j] = FindMaxIndex( rec ); + } + } + preds.resize( ndata ); + } + virtual const char* DefaultEvalMetric(void) { + return "error"; + } + private: + int nclass; + }; }; namespace regrank{ @@ -133,6 +189,7 @@ namespace xgboost{ int iter, std::vector &grad, std::vector &hess ) { + utils::Assert( preds.size() == info.labels.size(), "label size predict size not match" ); grad.resize(preds.size()); hess.resize(preds.size()); const std::vector &gptr = info.group_ptr; utils::Assert( gptr.size() != 0 && gptr.back() == preds.size(), "rank loss must have group file" );