ok
This commit is contained in:
parent
2adf905dcf
commit
e3b7abfb47
@ -1,2 +1,12 @@
|
||||
example of UCI dataset
|
||||
example of training a binary classifier on UCI dataset
|
||||
http://archive.ics.uci.edu/ml/datasets/Mushroom
|
||||
|
||||
Run: ./runexp.sh
|
||||
|
||||
Format of input: LIBSVM format
|
||||
|
||||
Format of featmap.txt:
|
||||
<featureid> <featurename> <q or i>\n
|
||||
|
||||
q means continuous quantities, i means indicator features.
|
||||
Feature id must be from 0 to num_features, in sorted order.
|
||||
|
||||
@ -46,13 +46,13 @@ def dumpjson( fo, trees ):
|
||||
fo.write('\n}\n')
|
||||
|
||||
fo = sys.stdout
|
||||
nmap = loadnmap( 'featname.txt' )
|
||||
nmap = loadnmap( 'featmap.txt' )
|
||||
stat = loadstats( 'agaricus.txt.test', 'dump.path.txt' )
|
||||
|
||||
trees = {'roots':[], 'weights':[], 'nodes':[] }
|
||||
idmap = {}
|
||||
|
||||
for l in open( 'dump.txt'):
|
||||
for l in open( 'dump.raw.txt'):
|
||||
if l.startswith('booster['):
|
||||
bid = int( l.split('[')[1].split(']')[0] )
|
||||
trees['roots'].append( mapid(idmap,bid,0) )
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
#!/bin/bash
|
||||
# map feature using indicator encoding, also produce featmap.txt
|
||||
python mapfeat.py
|
||||
# split train and test
|
||||
python mknfold.py agaricus.txt 1
|
||||
# training
|
||||
../../xgboost mushroom.conf
|
||||
# this is what dump will looklike without feature map
|
||||
../../xgboost mushroom.conf task=dump model_in=0003.model name_dump=dump.raw.txt
|
||||
# this is what dump will looklike with feature map
|
||||
../../xgboost mushroom.conf task=dump model_in=0003.model fmap=featmap.txt name_dump=dump.nice.txt
|
||||
cat dump.nice.txt
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user