From d05cb137515fe0c539f146a6057a00db901b7eaa Mon Sep 17 00:00:00 2001
From: antinucleon <antinucleon@gmail.com>
Date: Fri, 16 May 2014 20:57:42 -0600
Subject: [PATCH 01/18] demo

---
 demo/multi_classification/train.py    | 42 +++++++++++++++++++++++++++
 demo/multi_classification/wgetdata.sh |  2 ++
 2 files changed, 44 insertions(+)
 create mode 100755 demo/multi_classification/train.py
 create mode 100755 demo/multi_classification/wgetdata.sh

diff --git a/demo/multi_classification/train.py b/demo/multi_classification/train.py
new file mode 100755
index 000000000..2dc98f4d6
--- /dev/null
+++ b/demo/multi_classification/train.py
@@ -0,0 +1,42 @@
+
+import sys
+import numpy as np
+sys.path.append('../../python/')
+import xgboost as xgb
+
+
+
+data = np.loadtxt('./dermatology.data', delimiter=',',converters={33: lambda x:int(x == '?'), 34: lambda x:int(x) } )
+sz = data.shape
+
+train = data[:int(sz[0] * 0.7), :]
+test = data[int(sz[0] * 0.7):, :]
+
+train_X = train[:,0:33]
+train_Y = train[:, 34]
+
+
+test_X = test[:,0:33]
+test_Y = test[:, 34]
+
+xg_train = xgb.DMatrix( train_X, label=train_Y)
+xg_test = xgb.DMatrix(test_X, label=test_Y)
+# setup parameters for xgboost
+param = {}
+# use logistic regression loss, use raw prediction before logistic transformation
+# since we only need the rank
+param['objective'] = 'multi:softmax'
+# scale weight of positive examples
+param['bst:eta'] = 0.1
+param['bst:max_depth'] = 6
+param['eval_metric'] = 'auc'
+param['silent'] = 1
+param['nthread'] = 4
+param['num_class'] = 5
+
+watchlist = [ (xg_train,'train'), (xg_test, 'test') ]
+num_round = 5
+bst = xgb.train(param, xg_train, num_round, watchlist );
+
+
+
diff --git a/demo/multi_classification/wgetdata.sh b/demo/multi_classification/wgetdata.sh
new file mode 100755
index 000000000..10dbcd8fb
--- /dev/null
+++ b/demo/multi_classification/wgetdata.sh
@@ -0,0 +1,2 @@
+#! /bin/bash
+wget https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data

From 7537d691d9a2dd541a8a588e2eb988494fa0b1d0 Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Fri, 16 May 2014 20:00:20 -0700
Subject: [PATCH 02/18] Update README.md

---
 demo/kaggle-higgs/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/demo/kaggle-higgs/README.md b/demo/kaggle-higgs/README.md
index a3c208002..b3db23266 100644
--- a/demo/kaggle-higgs/README.md
+++ b/demo/kaggle-higgs/README.md
@@ -16,5 +16,6 @@ make
 
 
 
-
+Speed
+=====
 speedtest.py compares xgboost's speed on this dataset with sklearn.GBM

From f52f7b78995abd92d830bf8758e825750a76ac53 Mon Sep 17 00:00:00 2001
From: antinucleon <antinucleon@gmail.com>
Date: Fri, 16 May 2014 21:05:11 -0600
Subject: [PATCH 03/18] demo

---
 demo/multi_classification/train.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/demo/multi_classification/train.py b/demo/multi_classification/train.py
index 2dc98f4d6..89a065450 100755
--- a/demo/multi_classification/train.py
+++ b/demo/multi_classification/train.py
@@ -1,4 +1,4 @@
-
+#! /usr/bin/python
 import sys
 import numpy as np
 sys.path.append('../../python/')
@@ -29,7 +29,6 @@ param['objective'] = 'multi:softmax'
 # scale weight of positive examples
 param['bst:eta'] = 0.1
 param['bst:max_depth'] = 6
-param['eval_metric'] = 'auc'
 param['silent'] = 1
 param['nthread'] = 4
 param['num_class'] = 5

From 8e5e3340a29c9a0580326de5afd2407401cae72c Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 16 May 2014 20:12:04 -0700
Subject: [PATCH 04/18] multi class

---
 demo/multi_classification/runexp.sh   | 9 +++++++++
 demo/multi_classification/train.py    | 9 +++------
 demo/multi_classification/wgetdata.sh | 2 --
 3 files changed, 12 insertions(+), 8 deletions(-)
 create mode 100755 demo/multi_classification/runexp.sh
 delete mode 100755 demo/multi_classification/wgetdata.sh

diff --git a/demo/multi_classification/runexp.sh b/demo/multi_classification/runexp.sh
new file mode 100755
index 000000000..0af814725
--- /dev/null
+++ b/demo/multi_classification/runexp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+if [ -f dermatology.data ]
+then
+    echo "use existing data to run multi class classification"
+else
+    echo "getting data from uci, make sure you are connected to internet"
+    wget https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data
+fi
+python train.py
diff --git a/demo/multi_classification/train.py b/demo/multi_classification/train.py
index 89a065450..d51824a16 100755
--- a/demo/multi_classification/train.py
+++ b/demo/multi_classification/train.py
@@ -4,9 +4,8 @@ import numpy as np
 sys.path.append('../../python/')
 import xgboost as xgb
 
-
-
-data = np.loadtxt('./dermatology.data', delimiter=',',converters={33: lambda x:int(x == '?'), 34: lambda x:int(x) } )
+# label need to be 0 to num_class -1
+data = np.loadtxt('./dermatology.data', delimiter=',',converters={33: lambda x:int(x == '?'), 34: lambda x:int(x)-1 } )
 sz = data.shape
 
 train = data[:int(sz[0] * 0.7), :]
@@ -31,11 +30,9 @@ param['bst:eta'] = 0.1
 param['bst:max_depth'] = 6
 param['silent'] = 1
 param['nthread'] = 4
-param['num_class'] = 5
+param['num_class'] = 6
 
 watchlist = [ (xg_train,'train'), (xg_test, 'test') ]
 num_round = 5
 bst = xgb.train(param, xg_train, num_round, watchlist );
 
-
-
diff --git a/demo/multi_classification/wgetdata.sh b/demo/multi_classification/wgetdata.sh
deleted file mode 100755
index 10dbcd8fb..000000000
--- a/demo/multi_classification/wgetdata.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#! /bin/bash
-wget https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data

From cfd6c9e3b7c3fa7d97d86c9e40164be6b11f7605 Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Fri, 16 May 2014 20:16:10 -0700
Subject: [PATCH 05/18] Update train.py

---
 demo/multi_classification/train.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/demo/multi_classification/train.py b/demo/multi_classification/train.py
index d51824a16..d4cf2a0d4 100755
--- a/demo/multi_classification/train.py
+++ b/demo/multi_classification/train.py
@@ -22,8 +22,7 @@ xg_train = xgb.DMatrix( train_X, label=train_Y)
 xg_test = xgb.DMatrix(test_X, label=test_Y)
 # setup parameters for xgboost
 param = {}
-# use logistic regression loss, use raw prediction before logistic transformation
-# since we only need the rank
+# use softmax multi-class classification
 param['objective'] = 'multi:softmax'
 # scale weight of positive examples
 param['bst:eta'] = 0.1
@@ -35,4 +34,9 @@ param['num_class'] = 6
 watchlist = [ (xg_train,'train'), (xg_test, 'test') ]
 num_round = 5
 bst = xgb.train(param, xg_train, num_round, watchlist );
+# get prediction
+pred = bst.predict( xg_test );
+
+print 'error=%f' % sum(int(pred[i]) != test_Y[i] for i in len(test_Y)) / float(len(test_Y)) 
+
 

From 6c72d0220524ef9d42a8357493f32cdb4fb25388 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 16 May 2014 20:18:34 -0700
Subject: [PATCH 06/18] chg

---
 demo/multi_classification/train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demo/multi_classification/train.py b/demo/multi_classification/train.py
index d4cf2a0d4..38d818890 100755
--- a/demo/multi_classification/train.py
+++ b/demo/multi_classification/train.py
@@ -37,6 +37,6 @@ bst = xgb.train(param, xg_train, num_round, watchlist );
 # get prediction
 pred = bst.predict( xg_test );
 
-print 'error=%f' % sum(int(pred[i]) != test_Y[i] for i in len(test_Y)) / float(len(test_Y)) 
+print 'predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in xrange(len(test_Y))) / float(len(test_Y)) )
 
 

From 3e4dd2fce070fd6d03e59821b3a47bac25d81e9a Mon Sep 17 00:00:00 2001
From: antinucleon <antinucleon@gmail.com>
Date: Fri, 16 May 2014 21:27:37 -0600
Subject: [PATCH 07/18] chg

---
 demo/multiclass_classification/README.md                  | 8 ++++++++
 .../runexp.sh                                             | 0
 .../train.py                                              | 0
 3 files changed, 8 insertions(+)
 create mode 100644 demo/multiclass_classification/README.md
 rename demo/{multi_classification => multiclass_classification}/runexp.sh (100%)
 rename demo/{multi_classification => multiclass_classification}/train.py (100%)

diff --git a/demo/multiclass_classification/README.md b/demo/multiclass_classification/README.md
new file mode 100644
index 000000000..4d66ee06a
--- /dev/null
+++ b/demo/multiclass_classification/README.md
@@ -0,0 +1,8 @@
+Demonstrating how to use XGBoost accomplish Multi-Class classification task on [UCI Dermatology dataset](https://archive.ics.uci.edu/ml/datasets/Dermatology)
+
+1. Run runexp.sh
+```bash
+./runexp.sh
+```
+
+Explainations can be found in [wiki](https://github.com/tqchen/xgboost/wiki)
diff --git a/demo/multi_classification/runexp.sh b/demo/multiclass_classification/runexp.sh
similarity index 100%
rename from demo/multi_classification/runexp.sh
rename to demo/multiclass_classification/runexp.sh
diff --git a/demo/multi_classification/train.py b/demo/multiclass_classification/train.py
similarity index 100%
rename from demo/multi_classification/train.py
rename to demo/multiclass_classification/train.py

From b07ff1ac8d4625c7b59ac2ce59d503d1a1c9c4c8 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 16 May 2014 20:28:07 -0700
Subject: [PATCH 08/18] fix softmax

---
 regrank/xgboost_regrank.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/regrank/xgboost_regrank.h b/regrank/xgboost_regrank.h
index b2649735c..e3868002a 100644
--- a/regrank/xgboost_regrank.h
+++ b/regrank/xgboost_regrank.h
@@ -97,8 +97,8 @@ namespace xgboost{
             */
             inline void InitTrainer(void){
                 if( mparam.num_class != 0 ){
-                    if( name_obj_ != "softmax" ){
-                        name_obj_ = "softmax";
+                    if( name_obj_ != "multi:softmax" ){
+                        name_obj_ = "multi:softmax";
                         printf("auto select objective=softmax to support multi-class classification\n" );
                     }
                 }

From 255bad90cb8c7d4f95b61e5ba6c624cee06f551f Mon Sep 17 00:00:00 2001
From: yepyao <yepyao@gmail.com>
Date: Sat, 17 May 2014 11:34:24 +0800
Subject: [PATCH 09/18] small change

---
 demo/rank/mq2008.conf           |   4 +-
 demo/rank/runexp.sh             |  12 +--
 regrank/xgboost_regrank_obj.h   |   6 +-
 regrank/xgboost_regrank_obj.hpp | 157 ++++++++++++++++++++++++++++++++
 4 files changed, 167 insertions(+), 12 deletions(-)

diff --git a/demo/rank/mq2008.conf b/demo/rank/mq2008.conf
index 0d26580c2..f355b0578 100644
--- a/demo/rank/mq2008.conf
+++ b/demo/rank/mq2008.conf
@@ -2,8 +2,8 @@
 # choose the tree booster, 0: tree, 1: linear
 booster_type = 0 
 
-# so far, we have pairwise rank
-objective="rank:pairwise"
+# specify objective
+objective="rank:map"
 
 # Tree Booster Parameters
 # step size shrinkage
diff --git a/demo/rank/runexp.sh b/demo/rank/runexp.sh
index c17ebee05..3867047f3 100755
--- a/demo/rank/runexp.sh
+++ b/demo/rank/runexp.sh
@@ -1,14 +1,8 @@
-#Download the dataset from web site
-wget http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2008.rar
+python trans_data.py train.txt mq2008.train mq2008.train.group
 
-#please first install the unrar package
-unrar x MQ2008
+python trans_data.py test.txt mq2008.test mq2008.test.group
 
-python trans_data.py MQ2008/Fold1/train.txt mq2008.train mq2008.train.group
-
-python trans_data.py MQ2008/Fold1/test.txt mq2008.test mq2008.test.group
-
-python trans_data.py MQ2008/Fold1/vali.txt mq2008.vali mq2008.vali.group
+python trans_data.py vali.txt mq2008.vali mq2008.vali.group
 
 ../../xgboost mq2008.conf
 
diff --git a/regrank/xgboost_regrank_obj.h b/regrank/xgboost_regrank_obj.h
index 24396101b..a33b828d9 100644
--- a/regrank/xgboost_regrank_obj.h
+++ b/regrank/xgboost_regrank_obj.h
@@ -116,7 +116,11 @@ namespace xgboost{
            if( !strcmp("multi:softmax", name ) )      return new SoftmaxMultiClassObj();
            if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
            if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj();
-           utils::Error("unknown objective function type");
+           if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
+           if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj();
+           if( !strcmp("rank:map", name ) ) return new LambdaRankObj_MAP();
+           if( !strcmp("rank:ndcg", name ) ) return new LambdaRankObj_NDCG();
+	   utils::Error("unknown objective function type");
            return NULL;
        }
     };
diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp
index 71ebec0ab..70d4347f7 100644
--- a/regrank/xgboost_regrank_obj.hpp
+++ b/regrank/xgboost_regrank_obj.hpp
@@ -330,6 +330,163 @@ namespace xgboost{
             virtual ~PairwiseRankObj(void){}
             virtual void GetLambdaWeight( const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs ){}            
         };
+
+
+        class LambdaRankObj_NDCG : public LambdaRankObj{
+            
+        public:
+            virtual ~LambdaRankObj_NDCG(void){}
+
+            inline float CalcDCG( const std::vector<float> &labels ){
+                double sumdcg = 0.0;
+                for( size_t i = 0; i < labels.size(); i ++ ){
+                    const unsigned rel = labels[i];
+                    if( rel != 0 ){ 
+                        sumdcg += logf(2.0f) * ((1<<rel)-1) / logf( i + 2 );
+                    }
+                }
+                return static_cast<float>(sumdcg);
+            }
+
+            inline float GetIDCG(const std::vector<ListEntry> &sorted_list){
+                std::vector<float> labels;
+                for (size_t i = 0; i < sorted_list.size(); i++){
+                    labels.push_back(sorted_list[i].label);
+                }
+
+                std::sort(labels.begin(), labels.end(), std::greater<float>());
+                return CalcDCG(labels);
+            }
+
+            /*
+            * \brief Obtain the delta NDCG if trying to switch the positions of instances in index1 or index2
+            *        in sorted triples. Here DCG is calculated as sigma_i 2^rel_i/log(i + 1)
+            * \param sorted_list the list containing entry information
+            * \param index1,index2 the instances switched
+            * \param the IDCG of the list
+            */
+            inline float GetLambdaNDCG(const std::vector<ListEntry> &sorted_list,
+                int index1,
+                int index2, float IDCG){
+                double original = (1 << static_cast<int>(sorted_list[index1].label)) / log(index1 + 2)
+                    + (1 << static_cast<int>(sorted_list[index2].label)) / log(index2 + 2);
+                double changed = (1 << static_cast<int>(sorted_list[index2].label)) / log(index1 + 2)
+                    + (1 << static_cast<int>(sorted_list[index1].label)) / log(index2 + 2);
+                double ans = (original - changed) / IDCG;
+                if (ans < 0) ans = -ans;
+                return static_cast<float>(ans);
+            }
+
+            virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs){
+                float IDCG = GetIDCG(sorted_list);
+                for (size_t i = 0; i < pairs.size(); i++){
+                    pairs[i].weight = GetLambdaNDCG(sorted_list,
+                        pairs[i].pos_index, pairs[i].neg_index, IDCG);
+                }
+            }
+            
+        };
+
+        class LambdaRankObj_MAP : public LambdaRankObj{
+
+            class Quadruple{
+            public:
+                /* \brief the accumulated precision */
+                float ap_acc_;
+                /* \brief the accumulated precision assuming a positive instance is missing*/
+                float ap_acc_miss_;
+                /* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/
+                float ap_acc_add_;
+                /* \brief the accumulated positive instance count */
+                float hits_;
+
+                Quadruple(){}
+
+                Quadruple(const Quadruple& q){
+                    ap_acc_ = q.ap_acc_;
+                    ap_acc_miss_ = q.ap_acc_miss_;
+                    ap_acc_add_ = q.ap_acc_add_;
+                    hits_ = q.hits_;
+                }
+
+                Quadruple(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits
+                    ) :ap_acc_(ap_acc), ap_acc_miss_(ap_acc_miss), ap_acc_add_(ap_acc_add), hits_(hits){
+
+                }
+
+            };
+
+        public:
+            virtual ~LambdaRankObj_MAP(void){}
+
+            /*
+            * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
+            *        in sorted triples
+            * \param sorted_list the list containing entry information
+            * \param index1,index2 the instances switched
+            * \param map_acc a vector containing the accumulated precisions for each position in a list
+            */
+            inline float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
+                int index1, int index2,
+                std::vector< Quadruple > &map_acc){
+                if (index1 == index2 
+			|| sorted_list[index1].label == sorted_list[index2].label
+			|| map_acc[map_acc.size() - 1].hits_ == 0
+			) return 0.0;
+                if (index1 > index2) std::swap(index1, index2);
+                float original = map_acc[index2].ap_acc_; // The accumulated precision in the interval [index1,index2]
+                if (index1 != 0) original -= map_acc[index1 - 1].ap_acc_;
+                float changed = 0;
+                if (sorted_list[index1].label < sorted_list[index2].label){
+                    changed += map_acc[index2 - 1].ap_acc_add_ - map_acc[index1].ap_acc_add_;
+                    changed += (map_acc[index1].hits_ + 1.0f) / (index1 + 1);
+                }
+                else{
+                    changed += map_acc[index2 - 1].ap_acc_miss_ - map_acc[index1].ap_acc_miss_;
+                    changed += map_acc[index2].hits_ / (index2 + 1);
+                }
+                if(map_acc[map_acc.size() - 1].hits_ == 0) printf("haha\n");
+
+                float ans = (changed - original) / (map_acc[map_acc.size() - 1].hits_);
+                if (ans < 0) ans = -ans;
+                return ans;
+            }
+
+            /*
+            * \brief preprocessing results for calculating delta MAP
+            * \return The first field is the accumulated precision, the second field is the
+            *         accumulated precision assuming a positive instance is missing,
+            *         the third field is the accumulated precision assuming that one more positive
+            *         instance is inserted, the fourth field is the accumulated positive instance count
+            */
+            inline void GetMAPAcc(const std::vector<ListEntry> &sorted_list,
+                std::vector< Quadruple > &map_acc){
+                map_acc.resize(sorted_list.size());
+                float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
+                for (size_t i = 1; i <= sorted_list.size(); i++){
+                    if ((int)sorted_list[i - 1].label > 0) {
+                        hit++;
+                        acc1 += hit / i;
+                        acc2 += (hit - 1) / i;
+                        acc3 += (hit + 1) / i;
+                    }
+                    map_acc[i - 1].ap_acc_ = acc1;
+                    map_acc[i - 1].ap_acc_miss_ = acc2;
+                    map_acc[i - 1].ap_acc_add_ = acc3;
+                    map_acc[i - 1].hits_ = hit;
+               
+                }
+            }
+            virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs){
+                std::vector< Quadruple > map_acc;
+                GetMAPAcc(sorted_list, map_acc);
+                for (size_t i = 0; i < pairs.size(); i++){
+                    pairs[i].weight = GetLambdaMAP(sorted_list, pairs[i].pos_index, pairs[i].neg_index, map_acc);
+                }
+            }
+           
+        };
+
     };
 };
 #endif

From 391be108061ccadb166813abdba3be59f3bb92b8 Mon Sep 17 00:00:00 2001
From: yepyao <yepyao@gmail.com>
Date: Sat, 17 May 2014 11:35:43 +0800
Subject: [PATCH 10/18] small change

---
 demo/rank/mq2008.conf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demo/rank/mq2008.conf b/demo/rank/mq2008.conf
index f355b0578..65ad19b8e 100644
--- a/demo/rank/mq2008.conf
+++ b/demo/rank/mq2008.conf
@@ -3,7 +3,7 @@
 booster_type = 0 
 
 # specify objective
-objective="rank:map"
+objective="rank:pairwise"
 
 # Tree Booster Parameters
 # step size shrinkage

From d429289ad35d1440c431176e2965af6fb7bd8200 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 16 May 2014 20:37:45 -0700
Subject: [PATCH 11/18] ok

---
 README.md                   | 2 ++
 demo/kaggle-higgs/README.md | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c775c9776..c7e22d706 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@ Features
   - Sparse feature format allows easy handling of missing values, and improve computation efficiency.
 * Push the limit on single machine:
   - Efficient implementation that optimizes memory and computation.
+* Speed: XGBoost is very fast
+  - IN [demo/higgs/speedtest.py](../blob/master/demo/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
 * Layout of gradient boosting algorithm to support user defined objective
 * Python interface, works with numpy and scipy.sparse matrix
 
diff --git a/demo/kaggle-higgs/README.md b/demo/kaggle-higgs/README.md
index b3db23266..28472a848 100644
--- a/demo/kaggle-higgs/README.md
+++ b/demo/kaggle-higgs/README.md
@@ -14,8 +14,6 @@ make
 
 3. Run ./run.sh
 
-
-
 Speed
 =====
 speedtest.py compares xgboost's speed on this dataset with sklearn.GBM

From 58cbfa06920c6c044ce43cc9e0755cb8646d1aab Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Fri, 16 May 2014 20:41:05 -0700
Subject: [PATCH 12/18] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c7e22d706..c8d57105c 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Features
 * Push the limit on single machine:
   - Efficient implementation that optimizes memory and computation.
 * Speed: XGBoost is very fast
-  - IN [demo/higgs/speedtest.py](../blob/master/demo/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
+  - IN [demo/higgs/speedtest.py](../demo/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
 * Layout of gradient boosting algorithm to support user defined objective
 * Python interface, works with numpy and scipy.sparse matrix
 

From 32a33710737b0f239465f4f74c02e0770b357f6f Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Fri, 16 May 2014 20:41:21 -0700
Subject: [PATCH 13/18] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c8d57105c..b32ddbc18 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Features
 * Push the limit on single machine:
   - Efficient implementation that optimizes memory and computation.
 * Speed: XGBoost is very fast
-  - IN [demo/higgs/speedtest.py](../demo/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
+  - IN [demo/higgs/speedtest.py](../demo/master/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
 * Layout of gradient boosting algorithm to support user defined objective
 * Python interface, works with numpy and scipy.sparse matrix
 

From 4218c1ef53d2a8902ad650525eaa155cdb0c3f8a Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Fri, 16 May 2014 20:41:43 -0700
Subject: [PATCH 14/18] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b32ddbc18..c8d57105c 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Features
 * Push the limit on single machine:
   - Efficient implementation that optimizes memory and computation.
 * Speed: XGBoost is very fast
-  - IN [demo/higgs/speedtest.py](../demo/master/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
+  - IN [demo/higgs/speedtest.py](../demo/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
 * Layout of gradient boosting algorithm to support user defined objective
 * Python interface, works with numpy and scipy.sparse matrix
 

From 4dadc766527f3be034e9cc97a447a5295fc08f87 Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Fri, 16 May 2014 20:41:59 -0700
Subject: [PATCH 15/18] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c8d57105c..659f8d5fa 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Features
 * Push the limit on single machine:
   - Efficient implementation that optimizes memory and computation.
 * Speed: XGBoost is very fast
-  - IN [demo/higgs/speedtest.py](../demo/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
+  - IN [demo/higgs/speedtest.py](demo/kaggle-higgs/speedtest.py), kaggle higgs data it is faster(on our machine 20 times faster using 4 threads) than sklearn.ensemble.GradientBoostingClassifier
 * Layout of gradient boosting algorithm to support user defined objective
 * Python interface, works with numpy and scipy.sparse matrix
 

From d7bb10eb79715ab90189413cb03cfd410ccd0367 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 16 May 2014 20:44:02 -0700
Subject: [PATCH 16/18] final check

---
 regrank/xgboost_regrank_obj.h   |   9 +-
 regrank/xgboost_regrank_obj.hpp | 157 --------------------------------
 2 files changed, 3 insertions(+), 163 deletions(-)

diff --git a/regrank/xgboost_regrank_obj.h b/regrank/xgboost_regrank_obj.h
index a33b828d9..2778686a3 100644
--- a/regrank/xgboost_regrank_obj.h
+++ b/regrank/xgboost_regrank_obj.h
@@ -113,14 +113,11 @@ namespace xgboost{
            if( !strcmp("reg:logistic", name ) )    return new RegressionObj( LossType::kLogisticNeglik );
            if( !strcmp("binary:logistic", name ) ) return new RegressionObj( LossType::kLogisticClassify );
            if( !strcmp("binary:logitraw", name ) ) return new RegressionObj( LossType::kLogisticRaw );
-           if( !strcmp("multi:softmax", name ) )      return new SoftmaxMultiClassObj();
+           if( !strcmp("multi:softmax", name ) )   return new SoftmaxMultiClassObj();
            if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
-           if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj();
            if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
-           if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj();
-           if( !strcmp("rank:map", name ) ) return new LambdaRankObj_MAP();
-           if( !strcmp("rank:ndcg", name ) ) return new LambdaRankObj_NDCG();
-	   utils::Error("unknown objective function type");
+           if( !strcmp("rank:softmax", name ) )  return new SoftmaxRankObj();
+           utils::Error("unknown objective function type");
            return NULL;
        }
     };
diff --git a/regrank/xgboost_regrank_obj.hpp b/regrank/xgboost_regrank_obj.hpp
index 70d4347f7..71ebec0ab 100644
--- a/regrank/xgboost_regrank_obj.hpp
+++ b/regrank/xgboost_regrank_obj.hpp
@@ -330,163 +330,6 @@ namespace xgboost{
             virtual ~PairwiseRankObj(void){}
             virtual void GetLambdaWeight( const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs ){}            
         };
-
-
-        class LambdaRankObj_NDCG : public LambdaRankObj{
-            
-        public:
-            virtual ~LambdaRankObj_NDCG(void){}
-
-            inline float CalcDCG( const std::vector<float> &labels ){
-                double sumdcg = 0.0;
-                for( size_t i = 0; i < labels.size(); i ++ ){
-                    const unsigned rel = labels[i];
-                    if( rel != 0 ){ 
-                        sumdcg += logf(2.0f) * ((1<<rel)-1) / logf( i + 2 );
-                    }
-                }
-                return static_cast<float>(sumdcg);
-            }
-
-            inline float GetIDCG(const std::vector<ListEntry> &sorted_list){
-                std::vector<float> labels;
-                for (size_t i = 0; i < sorted_list.size(); i++){
-                    labels.push_back(sorted_list[i].label);
-                }
-
-                std::sort(labels.begin(), labels.end(), std::greater<float>());
-                return CalcDCG(labels);
-            }
-
-            /*
-            * \brief Obtain the delta NDCG if trying to switch the positions of instances in index1 or index2
-            *        in sorted triples. Here DCG is calculated as sigma_i 2^rel_i/log(i + 1)
-            * \param sorted_list the list containing entry information
-            * \param index1,index2 the instances switched
-            * \param the IDCG of the list
-            */
-            inline float GetLambdaNDCG(const std::vector<ListEntry> &sorted_list,
-                int index1,
-                int index2, float IDCG){
-                double original = (1 << static_cast<int>(sorted_list[index1].label)) / log(index1 + 2)
-                    + (1 << static_cast<int>(sorted_list[index2].label)) / log(index2 + 2);
-                double changed = (1 << static_cast<int>(sorted_list[index2].label)) / log(index1 + 2)
-                    + (1 << static_cast<int>(sorted_list[index1].label)) / log(index2 + 2);
-                double ans = (original - changed) / IDCG;
-                if (ans < 0) ans = -ans;
-                return static_cast<float>(ans);
-            }
-
-            virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs){
-                float IDCG = GetIDCG(sorted_list);
-                for (size_t i = 0; i < pairs.size(); i++){
-                    pairs[i].weight = GetLambdaNDCG(sorted_list,
-                        pairs[i].pos_index, pairs[i].neg_index, IDCG);
-                }
-            }
-            
-        };
-
-        class LambdaRankObj_MAP : public LambdaRankObj{
-
-            class Quadruple{
-            public:
-                /* \brief the accumulated precision */
-                float ap_acc_;
-                /* \brief the accumulated precision assuming a positive instance is missing*/
-                float ap_acc_miss_;
-                /* \brief the accumulated precision assuming that one more positive instance is inserted ahead*/
-                float ap_acc_add_;
-                /* \brief the accumulated positive instance count */
-                float hits_;
-
-                Quadruple(){}
-
-                Quadruple(const Quadruple& q){
-                    ap_acc_ = q.ap_acc_;
-                    ap_acc_miss_ = q.ap_acc_miss_;
-                    ap_acc_add_ = q.ap_acc_add_;
-                    hits_ = q.hits_;
-                }
-
-                Quadruple(float ap_acc, float ap_acc_miss, float ap_acc_add, float hits
-                    ) :ap_acc_(ap_acc), ap_acc_miss_(ap_acc_miss), ap_acc_add_(ap_acc_add), hits_(hits){
-
-                }
-
-            };
-
-        public:
-            virtual ~LambdaRankObj_MAP(void){}
-
-            /*
-            * \brief Obtain the delta MAP if trying to switch the positions of instances in index1 or index2
-            *        in sorted triples
-            * \param sorted_list the list containing entry information
-            * \param index1,index2 the instances switched
-            * \param map_acc a vector containing the accumulated precisions for each position in a list
-            */
-            inline float GetLambdaMAP(const std::vector<ListEntry> &sorted_list,
-                int index1, int index2,
-                std::vector< Quadruple > &map_acc){
-                if (index1 == index2 
-			|| sorted_list[index1].label == sorted_list[index2].label
-			|| map_acc[map_acc.size() - 1].hits_ == 0
-			) return 0.0;
-                if (index1 > index2) std::swap(index1, index2);
-                float original = map_acc[index2].ap_acc_; // The accumulated precision in the interval [index1,index2]
-                if (index1 != 0) original -= map_acc[index1 - 1].ap_acc_;
-                float changed = 0;
-                if (sorted_list[index1].label < sorted_list[index2].label){
-                    changed += map_acc[index2 - 1].ap_acc_add_ - map_acc[index1].ap_acc_add_;
-                    changed += (map_acc[index1].hits_ + 1.0f) / (index1 + 1);
-                }
-                else{
-                    changed += map_acc[index2 - 1].ap_acc_miss_ - map_acc[index1].ap_acc_miss_;
-                    changed += map_acc[index2].hits_ / (index2 + 1);
-                }
-                if(map_acc[map_acc.size() - 1].hits_ == 0) printf("haha\n");
-
-                float ans = (changed - original) / (map_acc[map_acc.size() - 1].hits_);
-                if (ans < 0) ans = -ans;
-                return ans;
-            }
-
-            /*
-            * \brief preprocessing results for calculating delta MAP
-            * \return The first field is the accumulated precision, the second field is the
-            *         accumulated precision assuming a positive instance is missing,
-            *         the third field is the accumulated precision assuming that one more positive
-            *         instance is inserted, the fourth field is the accumulated positive instance count
-            */
-            inline void GetMAPAcc(const std::vector<ListEntry> &sorted_list,
-                std::vector< Quadruple > &map_acc){
-                map_acc.resize(sorted_list.size());
-                float hit = 0, acc1 = 0, acc2 = 0, acc3 = 0;
-                for (size_t i = 1; i <= sorted_list.size(); i++){
-                    if ((int)sorted_list[i - 1].label > 0) {
-                        hit++;
-                        acc1 += hit / i;
-                        acc2 += (hit - 1) / i;
-                        acc3 += (hit + 1) / i;
-                    }
-                    map_acc[i - 1].ap_acc_ = acc1;
-                    map_acc[i - 1].ap_acc_miss_ = acc2;
-                    map_acc[i - 1].ap_acc_add_ = acc3;
-                    map_acc[i - 1].hits_ = hit;
-               
-                }
-            }
-            virtual void GetLambdaWeight(const std::vector<ListEntry> &sorted_list, std::vector<LambdaPair> &pairs){
-                std::vector< Quadruple > map_acc;
-                GetMAPAcc(sorted_list, map_acc);
-                for (size_t i = 0; i < pairs.size(); i++){
-                    pairs[i].weight = GetLambdaMAP(sorted_list, pairs[i].pos_index, pairs[i].neg_index, map_acc);
-                }
-            }
-           
-        };
-
     };
 };
 #endif

From 348d35a6689c773f97bc773ca3533515745de98a Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 16 May 2014 20:46:08 -0700
Subject: [PATCH 17/18] add ignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index ae2c00e76..5227dbc87 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,4 @@ xgboost
 *group
 *rar
 *vali
+*data

From 8e941b2a7964868f6fa20196ed1ab4171082c529 Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Fri, 16 May 2014 20:49:05 -0700
Subject: [PATCH 18/18] Update README.md

---
 demo/multiclass_classification/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/demo/multiclass_classification/README.md b/demo/multiclass_classification/README.md
index 4d66ee06a..72607de09 100644
--- a/demo/multiclass_classification/README.md
+++ b/demo/multiclass_classification/README.md
@@ -1,5 +1,7 @@
 Demonstrating how to use XGBoost accomplish Multi-Class classification task on [UCI Dermatology dataset](https://archive.ics.uci.edu/ml/datasets/Dermatology)
 
+Make sure you make make xgboost python module in ../../python
+
 1. Run runexp.sh
 ```bash
 ./runexp.sh