From 07e98254f5d2c8e2441af8306647f457623049fa Mon Sep 17 00:00:00 2001 From: kalenhaha Date: Fri, 16 May 2014 20:42:46 +0800 Subject: [PATCH 01/12] Impement new Lambda rank interface --- regrank/xgboost_regrank_obj.hpp | 140 ++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp index e4d99e0c7..e0ef67f7a 100644 --- a/regrank/xgboost_regrank_obj.hpp +++ b/regrank/xgboost_regrank_obj.hpp @@ -330,6 +330,146 @@ namespace xgboost{ virtual ~PairwiseRankObj(void){} virtual void GetLambdaWeight( const std::vector &sorted_list, std::vector &pairs ){} }; + + class LambdaRankObj_NDCG : public LambdaRankObj{ + + public: + virtual ~LambdaRankObj_NDCG(void){} + + inline float DCG(const std::vector &labels){ + return 1.0; + } + + inline float GetIDCG(const std::vector &sorted_list){ + std::vector labels; + for (size_t i = 0; i < sorted_list.size(); i++){ + labels.push_back(sorted_list[i].label); + } + + std::sort(labels.begin(), labels.end(), std::greater()); + return DCG(labels); + } + + /* + * \brief Obtain the delta NDCG if trying to switch the positions of instances in index1 or index2 + * in sorted triples. Here DCG is calculated as sigma_i 2^rel_i/log(i + 1) + * \param sorted_list the list containing entry information + * \param index1,index2 the instances switched + * \param the IDCG of the list + */ + inline float GetLambdaNDCG(const std::vector &sorted_list, + int index1, + int index2, float IDCG){ + double original = (1 << static_cast(sorted_list[index1].label)) / log(index1 + 2) + + (1 << static_cast(sorted_list[index2].label)) / log(index2 + 2); + double changed = (1 << static_cast(sorted_list[index2].label)) / log(index1 + 2) + + (1 << static_cast(sorted_list[index1].label)) / log(index2 + 2); + double ans = (original - changed) / IDCG; + if (ans < 0) ans = -ans; + return static_cast(ans); + } + + virtual void GetLambdaWeight(const std::vector &sorted_list, std::vector &pairs){ + float IDCG = GetIDCG(sorted_list); + for (size_t i = 0; i < pairs.size(); i++){ + pairs[i].weight = GetLambdaNDCG(sorted_list, + pairs[i].pos_index, pairs[i].neg_index, IDCG); + } + } + + }; + + class LambdaRankObj_MAP : public LambdaRankObj{ + + class Quadruple{ + public: + /* \brief the accumulated precision */ + float ap_acc_; + /* \brief the accumulated precision assuming a positive instance is missing*/ + float ap_acc_miss_; + /* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/ + float ap_acc_add_; + /* \brief the accumulated positive instance count */ + float hits_; + + Quadruple(){} + + Quadruple(const Quadruple& q){ + ap_acc_ = q.ap_acc_; + ap_acc_miss_ = q.ap_acc_miss_; + ap_acc_add_ = q.ap_acc_add_; + hits_ = q.hits_; + } + + Quadruple(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits + ) :ap_acc_(ap_acc), ap_acc_miss_(ap_acc_miss), ap_acc_add_(ap_acc_add), hits_(hits){ + + } + + }; + + public: + virtual ~LambdaRankObj_MAP(void){} + + /* + * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2 + * in sorted triples + * \param sorted_list the list containing entry information + * \param index1,index2 the instances switched + * \param map_acc a vector containing the accumulated precisions for each position in a list + */ + inline float GetLambdaMAP(const std::vector &sorted_list, + int index1, int index2, + std::vector< Quadruple > &map_acc){ + if (index1 == index2 || sorted_list[index1].label == sorted_list[index2].label) return 0.0; + if (index1 > index2) std::swap(index1, index2); + float original = map_acc[index2].ap_acc_; // The accumulated precision in the interval [index1,index2] + if (index1 != 0) original -= map_acc[index1 - 1].ap_acc_; + float changed = 0; + if (sorted_list[index1].label < sorted_list[index2].label){ + changed += map_acc[index2 - 1].ap_acc_add_ - map_acc[index1].ap_acc_add_; + changed += (map_acc[index1].hits_ + 1.0f) / (index1 + 1); + } + else{ + changed += map_acc[index2 - 1].ap_acc_miss_ - map_acc[index1].ap_acc_miss_; + changed += map_acc[index2].hits_ / (index2 + 1); + } + float ans = (changed - original) / (map_acc[map_acc.size() - 1].hits_); + if (ans < 0) ans = -ans; + return ans; + } + + /* + * \brief preprocessing results for calculating delta MAP + * \return The first field is the accumulated precision, the second field is the + * accumulated precision assuming a positive instance is missing, + * the third field is the accumulated precision assuming that one more positive + * instance is inserted, the fourth field is the accumulated positive instance count + */ + inline void GetMAPAcc(const std::vector &sorted_list, + std::vector< Quadruple > &map_acc){ + map_acc.resize(sorted_list.size()); + float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0; + for (size_t i = 1; i <= sorted_list.size(); i++){ + if ((int)sorted_list[i - 1].label == 1) { + hit++; + acc1 += hit / i; + acc2 += (hit - 1) / i; + acc3 += (hit + 1) / i; + } + map_acc[i - 1] = Quadruple(acc1, acc2, acc3, hit); + } + } + virtual void GetLambdaWeight(const std::vector &sorted_list, std::vector &pairs){ + std::vector< Quadruple > map_acc; + GetMAPAcc(sorted_list, map_acc); + for (size_t i = 0; i < pairs.size(); i++){ + pairs[i].weight = GetLambdaMAP(sorted_list, pairs[i].pos_index, pairs[i].neg_index, map_acc); + } + } + + }; + }; }; #endif From e3a0c0efe565fb5c1d2e4d7603269a9cfe532e9e Mon Sep 17 00:00:00 2001 From: yepyao Date: Fri, 16 May 2014 21:18:32 +0800 Subject: [PATCH 02/12] Download data set from web site --- demo/rank/runexp.sh | 14 ++++++++++---- demo/rank/train | 0 2 files changed, 10 insertions(+), 4 deletions(-) delete mode 100644 demo/rank/train diff --git a/demo/rank/runexp.sh b/demo/rank/runexp.sh index cb15f1dd0..fc751a69d 100644 --- a/demo/rank/runexp.sh +++ b/demo/rank/runexp.sh @@ -1,11 +1,17 @@ -python trans_data.py train.txt mq2008.train mq2008.train.group +#Download the dataset from web site +wget http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2008.rar -python trans_data.py test.txt mq2008.test mq2008.test.group +#please first install the unrar package +unrar x MQ2008 -python trans_data.py vali.txt mq2008.vali mq2008.vali.group +python MQ2008/Fold1/trans_data.py train.txt mq2008.train mq2008.train.group + +python MQ2008/Fold1/trans_data.py test.txt mq2008.test mq2008.test.group + +python MQ2008/Fold1/trans_data.py vali.txt mq2008.vali mq2008.vali.group ../../xgboost mq2008.conf ../../xgboost mq2008.conf task=pred model_in=0002.model -../../xgboost mq2008.conf task=dump model_in=0002.model name_dump=dump.raw.txt \ No newline at end of file +../../xgboost mq2008.conf task=dump model_in=0002.model name_dump=dump.raw.txt diff --git a/demo/rank/train b/demo/rank/train deleted file mode 100644 index e69de29bb..000000000 From 5db373e73c2e4f29494ead762b21b927fccfc2ed Mon Sep 17 00:00:00 2001 From: yepyao Date: Fri, 16 May 2014 21:20:41 +0800 Subject: [PATCH 03/12] small change --- demo/rank/mq2008.conf | 4 ++-- regrank/xgboost_regrank_obj.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/demo/rank/mq2008.conf b/demo/rank/mq2008.conf index 08b9d9679..0853fcb4e 100644 --- a/demo/rank/mq2008.conf +++ b/demo/rank/mq2008.conf @@ -4,8 +4,8 @@ booster_type = 0 objective="rank:pairwise" #objective="rank:softmax" -#objective="lambdarank:map" -#objective="lambdarank:ndcg" +#objective="rank:map" +#objective="rank:ndcg" # Tree Booster Parameters # step size shrinkage diff --git a/regrank/xgboost_regrank_obj.h b/regrank/xgboost_regrank_obj.h index f2fee0653..5a7bad9dd 100644 --- a/regrank/xgboost_regrank_obj.h +++ b/regrank/xgboost_regrank_obj.h @@ -113,8 +113,8 @@ namespace xgboost{ if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj(); if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj(); if( !strcmp("softmax", name ) ) return new SoftmaxMultiClassObj(); - // if (!strcmp("lambdarank:map", name)) return new LambdaRankObj_MAP(); - // if (!strcmp("lambdarank:ndcg", name)) return new LambdaRankObj_NDCG(); + if (!strcmp("rank:map", name)) return new LambdaRankObj_MAP(); + if (!strcmp("rank:ndcg", name)) return new LambdaRankObj_NDCG(); utils::Error("unknown objective function type"); return NULL; } From 4d03729683a084162a7c8ca0de96ced85dae804b Mon Sep 17 00:00:00 2001 From: yepyao Date: Fri, 16 May 2014 23:06:24 +0800 Subject: [PATCH 04/12] use ndcg@all in lambdarank for ndcg --- regrank/xgboost_regrank_obj.hpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp index e0ef67f7a..5272b42fa 100644 --- a/regrank/xgboost_regrank_obj.hpp +++ b/regrank/xgboost_regrank_obj.hpp @@ -336,8 +336,15 @@ namespace xgboost{ public: virtual ~LambdaRankObj_NDCG(void){} - inline float DCG(const std::vector &labels){ - return 1.0; + inline float CalcDCG( const std::vector &labels ){ + double sumdcg = 0.0; + for( size_t i = 0; i < labels.size(); i ++ ){ + const unsigned rel = labels[i]; + if( rel != 0 ){ + sumdcg += logf(2.0f) * ((1<(sumdcg); } inline float GetIDCG(const std::vector &sorted_list){ @@ -347,7 +354,7 @@ namespace xgboost{ } std::sort(labels.begin(), labels.end(), std::greater()); - return DCG(labels); + return CalcDCG(labels); } /* From 53633ae9c27c198b27657c2fc8f74d50dad4a6ea Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 16 May 2014 19:24:53 -0700 Subject: [PATCH 05/12] chgs --- python/example/demo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/example/demo.py b/python/example/demo.py index f5e0aa2a7..c1cf82b0f 100755 --- a/python/example/demo.py +++ b/python/example/demo.py @@ -12,7 +12,7 @@ dtrain = xgb.DMatrix('agaricus.txt.train') dtest = xgb.DMatrix('agaricus.txt.test') # specify parameters via map, definition are same as c++ version -param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'loss_type':2 } +param = {'bst:max_depth':2, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' } # specify validations set to watch performance evallist = [(dtest,'eval'), (dtrain,'train')] From 1150fb59a8f8a73dc393241dd30867aa5fa023fa Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Fri, 16 May 2014 19:30:32 -0700 Subject: [PATCH 06/12] Update demo.py --- python/example/demo.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/python/example/demo.py b/python/example/demo.py index c1cf82b0f..73935efab 100755 --- a/python/example/demo.py +++ b/python/example/demo.py @@ -29,11 +29,6 @@ bst.dump_model('dump.raw.txt') # dump model with feature map bst.dump_model('dump.raw.txt','featmap.txt') -# beta: interact mode -bst.set_param('bst:interact:expand',4) -bst.update_interact( dtrain, 'update', 0) -bst.dump_model('dump.raw2.txt') - ### # build dmatrix in python iteratively # From a70454e3ce672c21c2014145efa5d9dd09f52f5b Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 16 May 2014 19:33:59 -0700 Subject: [PATCH 07/12] add bing to author list --- README.md | 3 ++- python/xgboost.py | 1 + python/xgboost_python.cpp | 1 + python/xgboost_python.h | 3 ++- 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 64cbf5159..c775c9776 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,10 @@ xgboost: eXtreme Gradient Boosting ======= An optimized general purpose gradient boosting (tree) library. -Authors: +Contributors: * Tianqi Chen, project creater * Kailong Chen, contributes regression module +* Bing Xu, contributes python interface, higgs example Turorial and Documentation: https://github.com/tqchen/xgboost/wiki diff --git a/python/xgboost.py b/python/xgboost.py index 5c3555770..d7cf9f63e 100644 --- a/python/xgboost.py +++ b/python/xgboost.py @@ -1,3 +1,4 @@ +# Author: Tianqi Chen, Bing Xu # module for xgboost import ctypes import os diff --git a/python/xgboost_python.cpp b/python/xgboost_python.cpp index 8dd210c52..7c63fc6ac 100644 --- a/python/xgboost_python.cpp +++ b/python/xgboost_python.cpp @@ -1,3 +1,4 @@ +// implementations in ctypes #include "xgboost_python.h" #include "../regrank/xgboost_regrank.h" #include "../regrank/xgboost_regrank_data.h" diff --git a/python/xgboost_python.h b/python/xgboost_python.h index ac3ca94ac..6c113a108 100644 --- a/python/xgboost_python.h +++ b/python/xgboost_python.h @@ -1,7 +1,8 @@ #ifndef XGBOOST_PYTHON_H #define XGBOOST_PYTHON_H /*! - * \file xgboost_regrank_data.h + * \file xgboost_python.h + * \author Tianqi Chen * \brief python wrapper for xgboost, using ctypes, * hides everything behind functions * use c style interface From e565916c1cead838e8e183b4b1c94dffacf7dec6 Mon Sep 17 00:00:00 2001 From: yepyao Date: Sat, 17 May 2014 10:35:10 +0800 Subject: [PATCH 08/12] fix small bug --- demo/rank/runexp.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/demo/rank/runexp.sh b/demo/rank/runexp.sh index fc751a69d..305aef6b4 100644 --- a/demo/rank/runexp.sh +++ b/demo/rank/runexp.sh @@ -1,17 +1,17 @@ #Download the dataset from web site -wget http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2008.rar +#wget http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2008.rar #please first install the unrar package unrar x MQ2008 -python MQ2008/Fold1/trans_data.py train.txt mq2008.train mq2008.train.group +python trans_data.py MQ2008/Fold1/train.txt mq2008.train mq2008.train.group -python MQ2008/Fold1/trans_data.py test.txt mq2008.test mq2008.test.group +python trans_data.py MQ2008/Fold1/test.txt mq2008.test mq2008.test.group -python MQ2008/Fold1/trans_data.py vali.txt mq2008.vali mq2008.vali.group +python trans_data.py MQ2008/Fold1/vali.txt mq2008.vali mq2008.vali.group ../../xgboost mq2008.conf -../../xgboost mq2008.conf task=pred model_in=0002.model +../../xgboost mq2008.conf task=pred model_in=0004.model -../../xgboost mq2008.conf task=dump model_in=0002.model name_dump=dump.raw.txt +../../xgboost mq2008.conf task=dump model_in=0004.model name_dump=dump.raw.txt From c4a783f40827702bf4c8f10ffdabc288b8c50391 Mon Sep 17 00:00:00 2001 From: yepyao Date: Sat, 17 May 2014 10:50:15 +0800 Subject: [PATCH 09/12] small change --- demo/rank/runexp.sh | 2 +- regrank/xgboost_regrank_obj.h | 12 ------------ 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/demo/rank/runexp.sh b/demo/rank/runexp.sh index 305aef6b4..932fbb2a8 100755 --- a/demo/rank/runexp.sh +++ b/demo/rank/runexp.sh @@ -1,5 +1,5 @@ #Download the dataset from web site -#wget http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2008.rar +wget http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2008.rar #please first install the unrar package unrar x MQ2008 diff --git a/regrank/xgboost_regrank_obj.h b/regrank/xgboost_regrank_obj.h index 195646729..24396101b 100644 --- a/regrank/xgboost_regrank_obj.h +++ b/regrank/xgboost_regrank_obj.h @@ -109,17 +109,6 @@ namespace xgboost{ namespace xgboost{ namespace regrank{ inline IObjFunction* CreateObjFunction( const char *name ){ -<<<<<<< HEAD - if( !strcmp("reg", name ) ) return new RegressionObj(); - if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj(); - if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj(); - if( !strcmp("softmax", name ) ) return new SoftmaxMultiClassObj(); - if (!strcmp("rank:map", name)) return new LambdaRankObj_MAP(); - if (!strcmp("rank:ndcg", name)) return new LambdaRankObj_NDCG(); - utils::Error("unknown objective function type"); - return NULL; - } -======= if( !strcmp("reg:linear", name ) ) return new RegressionObj( LossType::kLinearSquare ); if( !strcmp("reg:logistic", name ) ) return new RegressionObj( LossType::kLogisticNeglik ); if( !strcmp("binary:logistic", name ) ) return new RegressionObj( LossType::kLogisticClassify ); @@ -130,7 +119,6 @@ namespace xgboost{ utils::Error("unknown objective function type"); return NULL; } ->>>>>>> 9eabb5c7f912a326005aca53a76c2e53a1661842 }; }; #endif From 6a9438ac86c6af184754bd5de4a5ee08006d26cc Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 16 May 2014 19:51:33 -0700 Subject: [PATCH 10/12] before commit --- regrank/xgboost_regrank_obj.h | 12 --- regrank/xgboost_regrank_obj.hpp | 147 -------------------------------- 2 files changed, 159 deletions(-) diff --git a/regrank/xgboost_regrank_obj.h b/regrank/xgboost_regrank_obj.h index 195646729..24396101b 100644 --- a/regrank/xgboost_regrank_obj.h +++ b/regrank/xgboost_regrank_obj.h @@ -109,17 +109,6 @@ namespace xgboost{ namespace xgboost{ namespace regrank{ inline IObjFunction* CreateObjFunction( const char *name ){ -<<<<<<< HEAD - if( !strcmp("reg", name ) ) return new RegressionObj(); - if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj(); - if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj(); - if( !strcmp("softmax", name ) ) return new SoftmaxMultiClassObj(); - if (!strcmp("rank:map", name)) return new LambdaRankObj_MAP(); - if (!strcmp("rank:ndcg", name)) return new LambdaRankObj_NDCG(); - utils::Error("unknown objective function type"); - return NULL; - } -======= if( !strcmp("reg:linear", name ) ) return new RegressionObj( LossType::kLinearSquare ); if( !strcmp("reg:logistic", name ) ) return new RegressionObj( LossType::kLogisticNeglik ); if( !strcmp("binary:logistic", name ) ) return new RegressionObj( LossType::kLogisticClassify ); @@ -130,7 +119,6 @@ namespace xgboost{ utils::Error("unknown objective function type"); return NULL; } ->>>>>>> 9eabb5c7f912a326005aca53a76c2e53a1661842 }; }; #endif diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp index 7350541cb..71ebec0ab 100644 --- a/regrank/xgboost_regrank_obj.hpp +++ b/regrank/xgboost_regrank_obj.hpp @@ -330,153 +330,6 @@ namespace xgboost{ virtual ~PairwiseRankObj(void){} virtual void GetLambdaWeight( const std::vector &sorted_list, std::vector &pairs ){} }; - - class LambdaRankObj_NDCG : public LambdaRankObj{ - - public: - virtual ~LambdaRankObj_NDCG(void){} - - inline float CalcDCG( const std::vector &labels ){ - double sumdcg = 0.0; - for( size_t i = 0; i < labels.size(); i ++ ){ - const unsigned rel = labels[i]; - if( rel != 0 ){ - sumdcg += logf(2.0f) * ((1<(sumdcg); - } - - inline float GetIDCG(const std::vector &sorted_list){ - std::vector labels; - for (size_t i = 0; i < sorted_list.size(); i++){ - labels.push_back(sorted_list[i].label); - } - - std::sort(labels.begin(), labels.end(), std::greater()); - return CalcDCG(labels); - } - - /* - * \brief Obtain the delta NDCG if trying to switch the positions of instances in index1 or index2 - * in sorted triples. Here DCG is calculated as sigma_i 2^rel_i/log(i + 1) - * \param sorted_list the list containing entry information - * \param index1,index2 the instances switched - * \param the IDCG of the list - */ - inline float GetLambdaNDCG(const std::vector &sorted_list, - int index1, - int index2, float IDCG){ - double original = (1 << static_cast(sorted_list[index1].label)) / log(index1 + 2) - + (1 << static_cast(sorted_list[index2].label)) / log(index2 + 2); - double changed = (1 << static_cast(sorted_list[index2].label)) / log(index1 + 2) - + (1 << static_cast(sorted_list[index1].label)) / log(index2 + 2); - double ans = (original - changed) / IDCG; - if (ans < 0) ans = -ans; - return static_cast(ans); - } - - virtual void GetLambdaWeight(const std::vector &sorted_list, std::vector &pairs){ - float IDCG = GetIDCG(sorted_list); - for (size_t i = 0; i < pairs.size(); i++){ - pairs[i].weight = GetLambdaNDCG(sorted_list, - pairs[i].pos_index, pairs[i].neg_index, IDCG); - } - } - - }; - - class LambdaRankObj_MAP : public LambdaRankObj{ - - class Quadruple{ - public: - /* \brief the accumulated precision */ - float ap_acc_; - /* \brief the accumulated precision assuming a positive instance is missing*/ - float ap_acc_miss_; - /* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/ - float ap_acc_add_; - /* \brief the accumulated positive instance count */ - float hits_; - - Quadruple(){} - - Quadruple(const Quadruple& q){ - ap_acc_ = q.ap_acc_; - ap_acc_miss_ = q.ap_acc_miss_; - ap_acc_add_ = q.ap_acc_add_; - hits_ = q.hits_; - } - - Quadruple(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits - ) :ap_acc_(ap_acc), ap_acc_miss_(ap_acc_miss), ap_acc_add_(ap_acc_add), hits_(hits){ - - } - - }; - - public: - virtual ~LambdaRankObj_MAP(void){} - - /* - * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2 - * in sorted triples - * \param sorted_list the list containing entry information - * \param index1,index2 the instances switched - * \param map_acc a vector containing the accumulated precisions for each position in a list - */ - inline float GetLambdaMAP(const std::vector &sorted_list, - int index1, int index2, - std::vector< Quadruple > &map_acc){ - if (index1 == index2 || sorted_list[index1].label == sorted_list[index2].label) return 0.0; - if (index1 > index2) std::swap(index1, index2); - float original = map_acc[index2].ap_acc_; // The accumulated precision in the interval [index1,index2] - if (index1 != 0) original -= map_acc[index1 - 1].ap_acc_; - float changed = 0; - if (sorted_list[index1].label < sorted_list[index2].label){ - changed += map_acc[index2 - 1].ap_acc_add_ - map_acc[index1].ap_acc_add_; - changed += (map_acc[index1].hits_ + 1.0f) / (index1 + 1); - } - else{ - changed += map_acc[index2 - 1].ap_acc_miss_ - map_acc[index1].ap_acc_miss_; - changed += map_acc[index2].hits_ / (index2 + 1); - } - float ans = (changed - original) / (map_acc[map_acc.size() - 1].hits_); - if (ans < 0) ans = -ans; - return ans; - } - - /* - * \brief preprocessing results for calculating delta MAP - * \return The first field is the accumulated precision, the second field is the - * accumulated precision assuming a positive instance is missing, - * the third field is the accumulated precision assuming that one more positive - * instance is inserted, the fourth field is the accumulated positive instance count - */ - inline void GetMAPAcc(const std::vector &sorted_list, - std::vector< Quadruple > &map_acc){ - map_acc.resize(sorted_list.size()); - float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0; - for (size_t i = 1; i <= sorted_list.size(); i++){ - if ((int)sorted_list[i - 1].label == 1) { - hit++; - acc1 += hit / i; - acc2 += (hit - 1) / i; - acc3 += (hit + 1) / i; - } - map_acc[i - 1] = Quadruple(acc1, acc2, acc3, hit); - } - } - virtual void GetLambdaWeight(const std::vector &sorted_list, std::vector &pairs){ - std::vector< Quadruple > map_acc; - GetMAPAcc(sorted_list, map_acc); - for (size_t i = 0; i < pairs.size(); i++){ - pairs[i].weight = GetLambdaMAP(sorted_list, pairs[i].pos_index, pairs[i].neg_index, map_acc); - } - } - - }; - }; }; #endif From 2cae28087a367fc6c261bb839665d00e5efda4b0 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 16 May 2014 19:52:39 -0700 Subject: [PATCH 11/12] do not need to dump in rank --- demo/rank/runexp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/rank/runexp.sh b/demo/rank/runexp.sh index 932fbb2a8..c17ebee05 100755 --- a/demo/rank/runexp.sh +++ b/demo/rank/runexp.sh @@ -14,4 +14,4 @@ python trans_data.py MQ2008/Fold1/vali.txt mq2008.vali mq2008.vali.group ../../xgboost mq2008.conf task=pred model_in=0004.model -../../xgboost mq2008.conf task=dump model_in=0004.model name_dump=dump.raw.txt + From 7537d691d9a2dd541a8a588e2eb988494fa0b1d0 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Fri, 16 May 2014 20:00:20 -0700 Subject: [PATCH 12/12] Update README.md --- demo/kaggle-higgs/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/demo/kaggle-higgs/README.md b/demo/kaggle-higgs/README.md index a3c208002..b3db23266 100644 --- a/demo/kaggle-higgs/README.md +++ b/demo/kaggle-higgs/README.md @@ -16,5 +16,6 @@ make - +Speed +===== speedtest.py compares xgboost's speed on this dataset with sklearn.GBM