This commit is contained in:
tqchen 2014-03-03 12:26:40 -08:00
parent 2adf905dcf
commit e3b7abfb47
3 changed files with 17 additions and 5 deletions

View File

@ -1,2 +1,12 @@
example of UCI dataset example of training a binary classifier on UCI dataset
http://archive.ics.uci.edu/ml/datasets/Mushroom http://archive.ics.uci.edu/ml/datasets/Mushroom
Run: ./runexp.sh
Format of input: LIBSVM format
Format of featmap.txt:
<featureid> <featurename> <q or i>\n
q means continuous quantities, i means indicator features.
Feature id must be from 0 to num_features, in sorted order.

View File

@ -46,13 +46,13 @@ def dumpjson( fo, trees ):
fo.write('\n}\n') fo.write('\n}\n')
fo = sys.stdout fo = sys.stdout
nmap = loadnmap( 'featname.txt' ) nmap = loadnmap( 'featmap.txt' )
stat = loadstats( 'agaricus.txt.test', 'dump.path.txt' ) stat = loadstats( 'agaricus.txt.test', 'dump.path.txt' )
trees = {'roots':[], 'weights':[], 'nodes':[] } trees = {'roots':[], 'weights':[], 'nodes':[] }
idmap = {} idmap = {}
for l in open( 'dump.txt'): for l in open( 'dump.raw.txt'):
if l.startswith('booster['): if l.startswith('booster['):
bid = int( l.split('[')[1].split(']')[0] ) bid = int( l.split('[')[1].split(']')[0] )
trees['roots'].append( mapid(idmap,bid,0) ) trees['roots'].append( mapid(idmap,bid,0) )

View File

@ -1,10 +1,12 @@
#!/bin/bash #!/bin/bash
# map feature using indicator encoding, also produce featmap.txt
python mapfeat.py python mapfeat.py
# split train and test
python mknfold.py agaricus.txt 1 python mknfold.py agaricus.txt 1
# training
../../xgboost mushroom.conf ../../xgboost mushroom.conf
# this is what dump will looklike without feature map # this is what dump will looklike without feature map
../../xgboost mushroom.conf task=dump model_in=0003.model name_dump=dump.raw.txt ../../xgboost mushroom.conf task=dump model_in=0003.model name_dump=dump.raw.txt
# this is what dump will looklike with feature map # this is what dump will looklike with feature map
../../xgboost mushroom.conf task=dump model_in=0003.model fmap=featmap.txt name_dump=dump.nice.txt ../../xgboost mushroom.conf task=dump model_in=0003.model fmap=featmap.txt name_dump=dump.nice.txt
cat dump.nice.txt cat dump.nice.txt