update regression

2014-03-26 16:25:44 -07:00
parent 0a971cb466
commit 7d97d6b1d4
8 changed files with 60 additions and 182 deletions
--- a/demo/binary_classification/dump2json.py
+++ b/demo/binary_classification/dump2json.py
@@ -1,80 +0,0 @@
-#!/usr/bin/python
-import sys
-import json
-
-def loadnmap( fname ):
-    nmap = {}
-    for l in open(fname):
-        arr = l.split()
-        nmap[int(arr[0])] = arr[1].strip()
-    return nmap
-
-def recstats( rec, l, label ):
-    for it in l.split(','):
-        k = int( it )
-        if k not in rec:
-            rec[ k ] = (0,0)
-        else:
-            if label == 0:
-                rec[k] = (rec[k][0]+1,rec[k][1])
-            else:
-                rec[k] = (rec[k][0],rec[k][1]+1)
-
-def loadstats( fname, fpath ):
-    res = {}
-    fp = open( fname )
-    for l in open( fpath ):
-        label = int( fp.readline().split()[0] )
-        arr = l.split()
-        for i in xrange( len(arr) ):
-            if i not in res:
-                res[ i ] = {}
-            recstats( res[ i ], arr[i], label )            
-    return res
-
-def mapid( idmap, fid, bid ):
-    if (bid, fid) not in idmap:
-        idmap[ (bid,fid) ] = len(idmap)
-    return idmap[ (bid,fid) ]
-
-def dumpjson( fo, trees ):
-    fo.write('{\n')
-    fo.write('  \"roots\":'+json.dumps( trees['roots'], separators=(' , ',' : ') ) +',\n' )
-    fo.write('  \"weights\":'+json.dumps( trees['weights'], separators=(' , ',' : ') ) +',\n' )
-    fo.write('  \"nodes\":[\n' )
-    fo.write('%s\n   ]' % ',\n'.join(('    %s' % json.dumps( n, separators=(' , ',' : ') ) )   for n in trees['nodes']) )
-    fo.write('\n}\n')
-        
-fo = sys.stdout
-nmap = loadnmap( 'featmap.txt' )
-stat = loadstats( 'agaricus.txt.test', 'dump.path.txt' )
-
-trees = {'roots':[], 'weights':[], 'nodes':[] }
-idmap = {}
-
-for l in open( 'dump.raw.txt'):
-    if l.startswith('booster['):
-        bid = int( l.split('[')[1].split(']')[0] )
-        trees['roots'].append( mapid(idmap,bid,0) )
-        trees['weights'].append( 1.0 )
-        continue
-
-    node = {}
-    rid = int( l.split(':')[0] )
-    node['id'] = mapid( idmap, bid, rid )
-    node['neg_cnt' ] = stat[ bid ][ rid ][ 0 ]
-    node['pos_cnt' ] = stat[ bid ][ rid ][ 1 ] 
-
-    idx = l.find('[f')
-    if idx != -1:
-        fid = int( l[idx+2:len(l)].split('<')[0])
-        node['label'] = nmap[ fid ]
-        node['children'] = [ mapid( idmap, bid, int(it.split('=')[1]) ) for it in l.split()[1].split(',') ]
-        node['edge_tags'] = ['yes','no']
-    else:
-        node['label'] = l.split(':')[1].strip()
-        node['value'] = float(l.split(':')[1].split('=')[1])
-
-    trees['nodes'].append( node )
-trees['nodes'].sort( key = lambda x:x['id'] )
-dumpjson( sys.stderr, trees)
--- a/demo/binary_classification/mushroom.conf
+++ b/demo/binary_classification/mushroom.conf
@@ -1,17 +1,27 @@
-# General Parameters
-booster_type = 0
-loss_type = 2
+# General Parameters, see comment for each definition
+# choose the tree booster, 0: tree, 1: linear
+booster_type = 0 
+# choose logistic regression loss function for binary classification
+loss_type = 2 

 # Tree Booster Parameters
-bst:tree_maker=2
-bst:eta=1.0
-bst:gamma=1.0
-bst:min_child_weight=1   
-bst:max_depth=3           
+# step size shrinkage
+bst:eta = 1.0 
+# minimum loss reduction required to make a further partition
+bst:gamma = 1.0 
+# minimum sum of instance weight(hessian) needed in a child
+bst:min_child_weight = 1 
+# maximum depth of a tree
+bst:max_depth = 3 

-# Binary Classification Parameters
-num_round=2
-save_period=0
-data = "agaricus.txt.train"
-eval[test] = "agaricus.txt.test"
-test:data =  "agaricus.txt.test"
+# Task Parameters
+# the number of round to do boosting
+num_round = 2
+# 0 means do not save any model except the final round model
+save_period = 0 
+# The path of training data
+data = "agaricus.txt.train" 
+# The path of validation data, used to monitor training process, here [test] sets name of the validation set
+eval[test] = "agaricus.txt.test" 
+# The path of test data 
+test:data = "agaricus.txt.test"