ok
This commit is contained in:
parent
2adf905dcf
commit
e3b7abfb47
@ -1,2 +1,12 @@
|
|||||||
example of UCI dataset
|
example of training a binary classifier on UCI dataset
|
||||||
http://archive.ics.uci.edu/ml/datasets/Mushroom
|
http://archive.ics.uci.edu/ml/datasets/Mushroom
|
||||||
|
|
||||||
|
Run: ./runexp.sh
|
||||||
|
|
||||||
|
Format of input: LIBSVM format
|
||||||
|
|
||||||
|
Format of featmap.txt:
|
||||||
|
<featureid> <featurename> <q or i>\n
|
||||||
|
|
||||||
|
q means continuous quantities, i means indicator features.
|
||||||
|
Feature id must be from 0 to num_features, in sorted order.
|
||||||
|
|||||||
@ -46,13 +46,13 @@ def dumpjson( fo, trees ):
|
|||||||
fo.write('\n}\n')
|
fo.write('\n}\n')
|
||||||
|
|
||||||
fo = sys.stdout
|
fo = sys.stdout
|
||||||
nmap = loadnmap( 'featname.txt' )
|
nmap = loadnmap( 'featmap.txt' )
|
||||||
stat = loadstats( 'agaricus.txt.test', 'dump.path.txt' )
|
stat = loadstats( 'agaricus.txt.test', 'dump.path.txt' )
|
||||||
|
|
||||||
trees = {'roots':[], 'weights':[], 'nodes':[] }
|
trees = {'roots':[], 'weights':[], 'nodes':[] }
|
||||||
idmap = {}
|
idmap = {}
|
||||||
|
|
||||||
for l in open( 'dump.txt'):
|
for l in open( 'dump.raw.txt'):
|
||||||
if l.startswith('booster['):
|
if l.startswith('booster['):
|
||||||
bid = int( l.split('[')[1].split(']')[0] )
|
bid = int( l.split('[')[1].split(']')[0] )
|
||||||
trees['roots'].append( mapid(idmap,bid,0) )
|
trees['roots'].append( mapid(idmap,bid,0) )
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
# map feature using indicator encoding, also produce featmap.txt
|
||||||
python mapfeat.py
|
python mapfeat.py
|
||||||
|
# split train and test
|
||||||
python mknfold.py agaricus.txt 1
|
python mknfold.py agaricus.txt 1
|
||||||
|
# training
|
||||||
../../xgboost mushroom.conf
|
../../xgboost mushroom.conf
|
||||||
# this is what dump will looklike without feature map
|
# this is what dump will looklike without feature map
|
||||||
../../xgboost mushroom.conf task=dump model_in=0003.model name_dump=dump.raw.txt
|
../../xgboost mushroom.conf task=dump model_in=0003.model name_dump=dump.raw.txt
|
||||||
# this is what dump will looklike with feature map
|
# this is what dump will looklike with feature map
|
||||||
../../xgboost mushroom.conf task=dump model_in=0003.model fmap=featmap.txt name_dump=dump.nice.txt
|
../../xgboost mushroom.conf task=dump model_in=0003.model fmap=featmap.txt name_dump=dump.nice.txt
|
||||||
cat dump.nice.txt
|
cat dump.nice.txt
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user