change the regression demo data set

This commit is contained in:
kalenhaha
2014-03-24 23:23:11 +08:00
parent 57713be940
commit feb914c35b
8 changed files with 301 additions and 8360 deletions

View File

@@ -1,48 +1,21 @@
#!/usr/bin/python
import sys
def loadfmap( fname ):
fmap = {}
nmap = {}
for l in open( fname ):
arr = l.split()
if arr[0].find('.') != -1:
idx = int( arr[0].strip('.') )
assert idx not in fmap
fmap[ idx ] = {}
ftype = arr[1].strip(':')
content = arr[2]
else:
content = arr[0]
for it in content.split(','):
if it.strip() == '':
continue
k , v = it.split('=')
fmap[ idx ][ v ] = len(nmap)
nmap[ len(nmap) ] = ftype+'='+k
return fmap, nmap
def write_nmap( fo, nmap ):
for i in xrange( len(nmap) ):
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
# start here
fmap, nmap = loadfmap( 'agaricus-lepiota.fmap' )
fo = open( 'featmap.txt', 'w' )
write_nmap( fo, nmap )
fo.close()
fo = open( 'agaricus.txt', 'w' )
for l in open( 'agaricus-lepiota.data' ):
fo = open( 'machine.txt', 'w' )
cnt = 6
fmap = {}
for l in open( 'machine.data' ):
arr = l.split(',')
if arr[0] == 'p':
fo.write('1')
else:
assert arr[0] == 'e'
fo.write('0')
for i in xrange( 1,len(arr) ):
fo.write( ' %d:1' % fmap[i][arr[i].strip()] )
fo.write(arr[8])
for i in xrange( 0,6 ):
fo.write( ' %d:%s' %(i,arr[i+2]) )
if arr[0] not in fmap.keys():
fmap[arr[0]] = cnt
cnt += 1
fo.write( ' %d:1' % fmap[arr[0]] )
fo.write('\n')
fo.close()