cleanup code

This commit is contained in:
tqchen
2014-05-15 15:01:41 -07:00
parent 3960ac9cb4
commit 37e1473cea
3 changed files with 15 additions and 28 deletions

View File

@@ -1,29 +1,25 @@
# General Parameters, see comment for each definition
# choose the tree booster, 0: tree, 1: linear
booster_type = 0
# this is the only difference with classification, use 0: linear regression
# when labels are in [0,1] we can also use 1: logistic regression
loss_type = 0
#objective="rank:pairwise"
#objective="rank:softmax"
#objective="lambdarank:map"
#objective="lambdarank:ndcg"
num_feature=50
# Tree Booster Parameters
# step size shrinkage
bst:eta = 1.0
bst:eta = 0.1
# minimum loss reduction required to make a further partition
bst:gamma = 1.0
# minimum sum of instance weight(hessian) needed in a child
bst:min_child_weight = 1
# maximum depth of a tree
bst:max_depth = 3
eval_metric='ndcg'
# Task parameters
# the number of round to do boosting
num_round = 2
num_round = 4
# 0 means do not save any model except the final round model
save_period = 0
# The path of training data

View File

@@ -6,20 +6,22 @@ def save_data(group_data,output_feature,output_group):
output_group.write(str(len(group_data))+"\n")
for data in group_data:
output_feature.write(data[0] + " " + " ".join(data[2:]) + "\n")
# only include nonzero features
feats = [ p for p in data[2:] if float(p.split(':')[1]) != 0.0 ]
output_feature.write(data[0] + " " + " ".join(feats) + "\n")
if __name__ == "__main__":
if len(sys.argv) != 4:
print "Usage: python trans_data.py [Ranksvm Format Input] [Output Feature File] [Output Group File]"
sys.exit(0)
input = open(sys.argv[1])
fi = open(sys.argv[1])
output_feature = open(sys.argv[2],"w")
output_group = open(sys.argv[3],"w")
group_data = []
group = ""
for line in input:
for line in fi:
if not line:
break
if "#" in line:
@@ -33,8 +35,7 @@ if __name__ == "__main__":
save_data(group_data,output_feature,output_group)
input.close()
fi.close()
output_feature.close()
output_group.close()