cleanup code

2014-05-15 15:01:41 -07:00
parent 3960ac9cb4
commit 37e1473cea
3 changed files with 15 additions and 28 deletions
--- a/demo/rank/mq2008.conf
+++ b/demo/rank/mq2008.conf
@@ -1,29 +1,25 @@
 # General Parameters, see comment for each definition
 # choose the tree booster, 0: tree, 1: linear
 booster_type = 0 
-# this is the only difference with classification, use 0: linear regression
-# when labels are in [0,1] we can also use 1: logistic regression
-loss_type = 0

 #objective="rank:pairwise"
 #objective="rank:softmax"
 #objective="lambdarank:map"
 #objective="lambdarank:ndcg"

-num_feature=50
 # Tree Booster Parameters
 # step size shrinkage
-bst:eta = 1.0 
+bst:eta = 0.1 
 # minimum loss reduction required to make a further partition
 bst:gamma = 1.0 
 # minimum sum of instance weight(hessian) needed in a child
 bst:min_child_weight = 1 
 # maximum depth of a tree
 bst:max_depth = 3 
-
+eval_metric='ndcg'
 # Task parameters
 # the number of round to do boosting
-num_round = 2
+num_round = 4
 # 0 means do not save any model except the final round model
 save_period = 0 
 # The path of training data
--- a/demo/rank/trans_data.py
+++ b/demo/rank/trans_data.py
@@ -6,20 +6,22 @@ def save_data(group_data,output_feature,output_group):

    output_group.write(str(len(group_data))+"\n")
    for data in group_data:
-	output_feature.write(data[0] + " " + " ".join(data[2:]) + "\n")
+        # only include nonzero features
+        feats = [ p for p in data[2:] if float(p.split(':')[1]) != 0.0 ]        
+	output_feature.write(data[0] + " " + " ".join(feats) + "\n")

 if __name__ == "__main__":
    if len(sys.argv) != 4:
        print "Usage: python trans_data.py [Ranksvm Format Input] [Output Feature File] [Output Group File]"
 	sys.exit(0)

-    input = open(sys.argv[1])
+    fi = open(sys.argv[1])
    output_feature = open(sys.argv[2],"w")
    output_group = open(sys.argv[3],"w")
    
    group_data = []
    group = ""
-    for line in input:
+    for line in fi:
 	if not line:
 	    break
 	if "#" in line:
@@ -33,8 +35,7 @@ if __name__ == "__main__":

    save_data(group_data,output_feature,output_group)

-    input.close()
+    fi.close()
    output_feature.close()
    output_group.close()

-