remove test directory
This commit is contained in:
parent
77901f2428
commit
57713be940
@ -1,5 +1,4 @@
|
|||||||
example of training a binary classifier on UCI dataset
|
Demonstrating how to use XGBoost accomplish binary classification tasks on UCI mushroom dataset http://archive.ics.uci.edu/ml/datasets/Mushroom
|
||||||
http://archive.ics.uci.edu/ml/datasets/Mushroom
|
|
||||||
|
|
||||||
Run: ./runexp.sh
|
Run: ./runexp.sh
|
||||||
|
|
||||||
|
|||||||
@ -1,18 +1,17 @@
|
|||||||
num_round=2
|
# General Parameters
|
||||||
|
|
||||||
save_period=0
|
|
||||||
|
|
||||||
data = "agaricus.txt.train"
|
|
||||||
eval[test] = "agaricus.txt.test"
|
|
||||||
test:data = "agaricus.txt.test"
|
|
||||||
|
|
||||||
|
|
||||||
booster_type = 0
|
booster_type = 0
|
||||||
loss_type = 2
|
loss_type = 2
|
||||||
|
|
||||||
|
# Tree Booster Parameters
|
||||||
bst:tree_maker=2
|
bst:tree_maker=2
|
||||||
|
|
||||||
bst:eta=1.0
|
bst:eta=1.0
|
||||||
bst:gamma=1.0
|
bst:gamma=1.0
|
||||||
bst:min_child_weight=1
|
bst:min_child_weight=1
|
||||||
bst:max_depth=3
|
bst:max_depth=3
|
||||||
|
|
||||||
|
# Binary Classification Parameters
|
||||||
|
num_round=2
|
||||||
|
save_period=0
|
||||||
|
data = "agaricus.txt.train"
|
||||||
|
eval[test] = "agaricus.txt.test"
|
||||||
|
test:data = "agaricus.txt.test"
|
||||||
@ -3,10 +3,12 @@
|
|||||||
python mapfeat.py
|
python mapfeat.py
|
||||||
# split train and test
|
# split train and test
|
||||||
python mknfold.py agaricus.txt 1
|
python mknfold.py agaricus.txt 1
|
||||||
# training
|
# training and output the models
|
||||||
../../xgboost mushroom.conf
|
../../xgboost mushroom.conf
|
||||||
# this is what dump will looklike without feature map
|
# output prediction task=pred
|
||||||
|
../../xgboost mushroom.conf task=pred model_in=0003.model
|
||||||
|
# print the boosters of 00003.model in dump.raw.txt
|
||||||
../../xgboost mushroom.conf task=dump model_in=0003.model name_dump=dump.raw.txt
|
../../xgboost mushroom.conf task=dump model_in=0003.model name_dump=dump.raw.txt
|
||||||
# this is what dump will looklike with feature map
|
# use the feature map in printing for better visualization
|
||||||
../../xgboost mushroom.conf task=dump model_in=0003.model fmap=featmap.txt name_dump=dump.nice.txt
|
../../xgboost mushroom.conf task=dump model_in=0003.model fmap=featmap.txt name_dump=dump.nice.txt
|
||||||
cat dump.nice.txt
|
cat dump.nice.txt
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
example of training a binary classifier on UCI dataset
|
Demonstrating how to use XGBoost accomplish regression tasks on UCI mushroom dataset http://archive.ics.uci.edu/ml/datasets/Mushroom
|
||||||
http://archive.ics.uci.edu/ml/datasets/Mushroom
|
|
||||||
|
|
||||||
Run: ./runexp.sh
|
Run: ./runexp.sh
|
||||||
|
|
||||||
|
|||||||
@ -3,10 +3,12 @@
|
|||||||
python mapfeat.py
|
python mapfeat.py
|
||||||
# split train and test
|
# split train and test
|
||||||
python mknfold.py agaricus.txt 1
|
python mknfold.py agaricus.txt 1
|
||||||
# training
|
# training and output the models
|
||||||
../../xgboost mushroom.conf
|
../../xgboost mushroom.conf
|
||||||
# this is what dump will looklike without feature map
|
# output predictions of test data
|
||||||
|
../../xgboost mushroom.conf task=pred model_in=0003.model
|
||||||
|
# print the boosters of 00003.model in dump.raw.txt
|
||||||
../../xgboost mushroom.conf task=dump model_in=0003.model name_dump=dump.raw.txt
|
../../xgboost mushroom.conf task=dump model_in=0003.model name_dump=dump.raw.txt
|
||||||
# this is what dump will looklike with feature map
|
# use the feature map in printing for better visualization
|
||||||
../../xgboost mushroom.conf task=dump model_in=0003.model fmap=featmap.txt name_dump=dump.nice.txt
|
../../xgboost mushroom.conf task=dump model_in=0003.model fmap=featmap.txt name_dump=dump.nice.txt
|
||||||
cat dump.nice.txt
|
cat dump.nice.txt
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
test folder to test new functions
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,32 +0,0 @@
|
|||||||
1. cap-shape: bell=b,conical=c,convex=x,flat=f,knobbed=k,sunken=s
|
|
||||||
2. cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s
|
|
||||||
3. cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y
|
|
||||||
4. bruises?: bruises=t,no=f
|
|
||||||
5. odor: almond=a,anise=l,creosote=c,fishy=y,foul=f,
|
|
||||||
musty=m,none=n,pungent=p,spicy=s
|
|
||||||
6. gill-attachment: attached=a,descending=d,free=f,notched=n
|
|
||||||
7. gill-spacing: close=c,crowded=w,distant=d
|
|
||||||
8. gill-size: broad=b,narrow=n
|
|
||||||
9. gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g,
|
|
||||||
green=r,orange=o,pink=p,purple=u,red=e,
|
|
||||||
white=w,yellow=y
|
|
||||||
10. stalk-shape: enlarging=e,tapering=t
|
|
||||||
11. stalk-root: bulbous=b,club=c,cup=u,equal=e,
|
|
||||||
rhizomorphs=z,rooted=r,missing=?
|
|
||||||
12. stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s
|
|
||||||
13. stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s
|
|
||||||
14. stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,
|
|
||||||
pink=p,red=e,white=w,yellow=y
|
|
||||||
15. stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,
|
|
||||||
pink=p,red=e,white=w,yellow=y
|
|
||||||
16. veil-type: partial=p,universal=u
|
|
||||||
17. veil-color: brown=n,orange=o,white=w,yellow=y
|
|
||||||
18. ring-number: none=n,one=o,two=t
|
|
||||||
19. ring-type: cobwebby=c,evanescent=e,flaring=f,large=l,
|
|
||||||
none=n,pendant=p,sheathing=s,zone=z
|
|
||||||
20. spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r,
|
|
||||||
orange=o,purple=u,white=w,yellow=y
|
|
||||||
21. population: abundant=a,clustered=c,numerous=n,
|
|
||||||
scattered=s,several=v,solitary=y
|
|
||||||
22. habitat: grasses=g,leaves=l,meadows=m,paths=p,
|
|
||||||
urban=u,waste=w,woods=d
|
|
||||||
@ -1,148 +0,0 @@
|
|||||||
1. Title: Mushroom Database
|
|
||||||
|
|
||||||
2. Sources:
|
|
||||||
(a) Mushroom records drawn from The Audubon Society Field Guide to North
|
|
||||||
American Mushrooms (1981). G. H. Lincoff (Pres.), New York: Alfred
|
|
||||||
A. Knopf
|
|
||||||
(b) Donor: Jeff Schlimmer (Jeffrey.Schlimmer@a.gp.cs.cmu.edu)
|
|
||||||
(c) Date: 27 April 1987
|
|
||||||
|
|
||||||
3. Past Usage:
|
|
||||||
1. Schlimmer,J.S. (1987). Concept Acquisition Through Representational
|
|
||||||
Adjustment (Technical Report 87-19). Doctoral disseration, Department
|
|
||||||
of Information and Computer Science, University of California, Irvine.
|
|
||||||
--- STAGGER: asymptoted to 95% classification accuracy after reviewing
|
|
||||||
1000 instances.
|
|
||||||
2. Iba,W., Wogulis,J., & Langley,P. (1988). Trading off Simplicity
|
|
||||||
and Coverage in Incremental Concept Learning. In Proceedings of
|
|
||||||
the 5th International Conference on Machine Learning, 73-79.
|
|
||||||
Ann Arbor, Michigan: Morgan Kaufmann.
|
|
||||||
-- approximately the same results with their HILLARY algorithm
|
|
||||||
3. In the following references a set of rules (given below) were
|
|
||||||
learned for this data set which may serve as a point of
|
|
||||||
comparison for other researchers.
|
|
||||||
|
|
||||||
Duch W, Adamczak R, Grabczewski K (1996) Extraction of logical rules
|
|
||||||
from training data using backpropagation networks, in: Proc. of the
|
|
||||||
The 1st Online Workshop on Soft Computing, 19-30.Aug.1996, pp. 25-30,
|
|
||||||
available on-line at: http://www.bioele.nuee.nagoya-u.ac.jp/wsc1/
|
|
||||||
|
|
||||||
Duch W, Adamczak R, Grabczewski K, Ishikawa M, Ueda H, Extraction of
|
|
||||||
crisp logical rules using constrained backpropagation networks -
|
|
||||||
comparison of two new approaches, in: Proc. of the European Symposium
|
|
||||||
on Artificial Neural Networks (ESANN'97), Bruge, Belgium 16-18.4.1997,
|
|
||||||
pp. xx-xx
|
|
||||||
|
|
||||||
Wlodzislaw Duch, Department of Computer Methods, Nicholas Copernicus
|
|
||||||
University, 87-100 Torun, Grudziadzka 5, Poland
|
|
||||||
e-mail: duch@phys.uni.torun.pl
|
|
||||||
WWW http://www.phys.uni.torun.pl/kmk/
|
|
||||||
|
|
||||||
Date: Mon, 17 Feb 1997 13:47:40 +0100
|
|
||||||
From: Wlodzislaw Duch <duch@phys.uni.torun.pl>
|
|
||||||
Organization: Dept. of Computer Methods, UMK
|
|
||||||
|
|
||||||
I have attached a file containing logical rules for mushrooms.
|
|
||||||
It should be helpful for other people since only in the last year I
|
|
||||||
have seen about 10 papers analyzing this dataset and obtaining quite
|
|
||||||
complex rules. We will try to contribute other results later.
|
|
||||||
|
|
||||||
With best regards, Wlodek Duch
|
|
||||||
________________________________________________________________
|
|
||||||
|
|
||||||
Logical rules for the mushroom data sets.
|
|
||||||
|
|
||||||
Logical rules given below seem to be the simplest possible for the
|
|
||||||
mushroom dataset and therefore should be treated as benchmark results.
|
|
||||||
|
|
||||||
Disjunctive rules for poisonous mushrooms, from most general
|
|
||||||
to most specific:
|
|
||||||
|
|
||||||
P_1) odor=NOT(almond.OR.anise.OR.none)
|
|
||||||
120 poisonous cases missed, 98.52% accuracy
|
|
||||||
|
|
||||||
P_2) spore-print-color=green
|
|
||||||
48 cases missed, 99.41% accuracy
|
|
||||||
|
|
||||||
P_3) odor=none.AND.stalk-surface-below-ring=scaly.AND.
|
|
||||||
(stalk-color-above-ring=NOT.brown)
|
|
||||||
8 cases missed, 99.90% accuracy
|
|
||||||
|
|
||||||
P_4) habitat=leaves.AND.cap-color=white
|
|
||||||
100% accuracy
|
|
||||||
|
|
||||||
Rule P_4) may also be
|
|
||||||
|
|
||||||
P_4') population=clustered.AND.cap_color=white
|
|
||||||
|
|
||||||
These rule involve 6 attributes (out of 22). Rules for edible
|
|
||||||
mushrooms are obtained as negation of the rules given above, for
|
|
||||||
example the rule:
|
|
||||||
|
|
||||||
odor=(almond.OR.anise.OR.none).AND.spore-print-color=NOT.green
|
|
||||||
|
|
||||||
gives 48 errors, or 99.41% accuracy on the whole dataset.
|
|
||||||
|
|
||||||
Several slightly more complex variations on these rules exist,
|
|
||||||
involving other attributes, such as gill_size, gill_spacing,
|
|
||||||
stalk_surface_above_ring, but the rules given above are the simplest
|
|
||||||
we have found.
|
|
||||||
|
|
||||||
|
|
||||||
4. Relevant Information:
|
|
||||||
This data set includes descriptions of hypothetical samples
|
|
||||||
corresponding to 23 species of gilled mushrooms in the Agaricus and
|
|
||||||
Lepiota Family (pp. 500-525). Each species is identified as
|
|
||||||
definitely edible, definitely poisonous, or of unknown edibility and
|
|
||||||
not recommended. This latter class was combined with the poisonous
|
|
||||||
one. The Guide clearly states that there is no simple rule for
|
|
||||||
determining the edibility of a mushroom; no rule like ``leaflets
|
|
||||||
three, let it be'' for Poisonous Oak and Ivy.
|
|
||||||
|
|
||||||
5. Number of Instances: 8124
|
|
||||||
|
|
||||||
6. Number of Attributes: 22 (all nominally valued)
|
|
||||||
|
|
||||||
7. Attribute Information: (classes: edible=e, poisonous=p)
|
|
||||||
1. cap-shape: bell=b,conical=c,convex=x,flat=f,
|
|
||||||
knobbed=k,sunken=s
|
|
||||||
2. cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s
|
|
||||||
3. cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,
|
|
||||||
pink=p,purple=u,red=e,white=w,yellow=y
|
|
||||||
4. bruises?: bruises=t,no=f
|
|
||||||
5. odor: almond=a,anise=l,creosote=c,fishy=y,foul=f,
|
|
||||||
musty=m,none=n,pungent=p,spicy=s
|
|
||||||
6. gill-attachment: attached=a,descending=d,free=f,notched=n
|
|
||||||
7. gill-spacing: close=c,crowded=w,distant=d
|
|
||||||
8. gill-size: broad=b,narrow=n
|
|
||||||
9. gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g,
|
|
||||||
green=r,orange=o,pink=p,purple=u,red=e,
|
|
||||||
white=w,yellow=y
|
|
||||||
10. stalk-shape: enlarging=e,tapering=t
|
|
||||||
11. stalk-root: bulbous=b,club=c,cup=u,equal=e,
|
|
||||||
rhizomorphs=z,rooted=r,missing=?
|
|
||||||
12. stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s
|
|
||||||
13. stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s
|
|
||||||
14. stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,
|
|
||||||
pink=p,red=e,white=w,yellow=y
|
|
||||||
15. stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,
|
|
||||||
pink=p,red=e,white=w,yellow=y
|
|
||||||
16. veil-type: partial=p,universal=u
|
|
||||||
17. veil-color: brown=n,orange=o,white=w,yellow=y
|
|
||||||
18. ring-number: none=n,one=o,two=t
|
|
||||||
19. ring-type: cobwebby=c,evanescent=e,flaring=f,large=l,
|
|
||||||
none=n,pendant=p,sheathing=s,zone=z
|
|
||||||
20. spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r,
|
|
||||||
orange=o,purple=u,white=w,yellow=y
|
|
||||||
21. population: abundant=a,clustered=c,numerous=n,
|
|
||||||
scattered=s,several=v,solitary=y
|
|
||||||
22. habitat: grasses=g,leaves=l,meadows=m,paths=p,
|
|
||||||
urban=u,waste=w,woods=d
|
|
||||||
|
|
||||||
8. Missing Attribute Values: 2480 of them (denoted by "?"), all for
|
|
||||||
attribute #11.
|
|
||||||
|
|
||||||
9. Class Distribution:
|
|
||||||
-- edible: 4208 (51.8%)
|
|
||||||
-- poisonous: 3916 (48.2%)
|
|
||||||
-- total: 8124 instances
|
|
||||||
@ -1,80 +0,0 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
import sys
|
|
||||||
import json
|
|
||||||
|
|
||||||
def loadnmap( fname ):
|
|
||||||
nmap = {}
|
|
||||||
for l in open(fname):
|
|
||||||
arr = l.split()
|
|
||||||
nmap[int(arr[0])] = arr[1].strip()
|
|
||||||
return nmap
|
|
||||||
|
|
||||||
def recstats( rec, l, label ):
|
|
||||||
for it in l.split(','):
|
|
||||||
k = int( it )
|
|
||||||
if k not in rec:
|
|
||||||
rec[ k ] = (0,0)
|
|
||||||
else:
|
|
||||||
if label == 0:
|
|
||||||
rec[k] = (rec[k][0]+1,rec[k][1])
|
|
||||||
else:
|
|
||||||
rec[k] = (rec[k][0],rec[k][1]+1)
|
|
||||||
|
|
||||||
def loadstats( fname, fpath ):
|
|
||||||
res = {}
|
|
||||||
fp = open( fname )
|
|
||||||
for l in open( fpath ):
|
|
||||||
label = int( fp.readline().split()[0] )
|
|
||||||
arr = l.split()
|
|
||||||
for i in xrange( len(arr) ):
|
|
||||||
if i not in res:
|
|
||||||
res[ i ] = {}
|
|
||||||
recstats( res[ i ], arr[i], label )
|
|
||||||
return res
|
|
||||||
|
|
||||||
def mapid( idmap, fid, bid ):
|
|
||||||
if (bid, fid) not in idmap:
|
|
||||||
idmap[ (bid,fid) ] = len(idmap)
|
|
||||||
return idmap[ (bid,fid) ]
|
|
||||||
|
|
||||||
def dumpjson( fo, trees ):
|
|
||||||
fo.write('{\n')
|
|
||||||
fo.write(' \"roots\":'+json.dumps( trees['roots'], separators=(' , ',' : ') ) +',\n' )
|
|
||||||
fo.write(' \"weights\":'+json.dumps( trees['weights'], separators=(' , ',' : ') ) +',\n' )
|
|
||||||
fo.write(' \"nodes\":[\n' )
|
|
||||||
fo.write('%s\n ]' % ',\n'.join((' %s' % json.dumps( n, separators=(' , ',' : ') ) ) for n in trees['nodes']) )
|
|
||||||
fo.write('\n}\n')
|
|
||||||
|
|
||||||
fo = sys.stdout
|
|
||||||
nmap = loadnmap( 'featmap.txt' )
|
|
||||||
stat = loadstats( 'agaricus.txt.test', 'dump.path.txt' )
|
|
||||||
|
|
||||||
trees = {'roots':[], 'weights':[], 'nodes':[] }
|
|
||||||
idmap = {}
|
|
||||||
|
|
||||||
for l in open( 'dump.raw.txt'):
|
|
||||||
if l.startswith('booster['):
|
|
||||||
bid = int( l.split('[')[1].split(']')[0] )
|
|
||||||
trees['roots'].append( mapid(idmap,bid,0) )
|
|
||||||
trees['weights'].append( 1.0 )
|
|
||||||
continue
|
|
||||||
|
|
||||||
node = {}
|
|
||||||
rid = int( l.split(':')[0] )
|
|
||||||
node['id'] = mapid( idmap, bid, rid )
|
|
||||||
node['neg_cnt' ] = stat[ bid ][ rid ][ 0 ]
|
|
||||||
node['pos_cnt' ] = stat[ bid ][ rid ][ 1 ]
|
|
||||||
|
|
||||||
idx = l.find('[f')
|
|
||||||
if idx != -1:
|
|
||||||
fid = int( l[idx+2:len(l)].split('<')[0])
|
|
||||||
node['label'] = nmap[ fid ]
|
|
||||||
node['children'] = [ mapid( idmap, bid, int(it.split('=')[1]) ) for it in l.split()[1].split(',') ]
|
|
||||||
node['edge_tags'] = ['yes','no']
|
|
||||||
else:
|
|
||||||
node['label'] = l.split(':')[1].strip()
|
|
||||||
node['value'] = float(l.split(':')[1].split('=')[1])
|
|
||||||
|
|
||||||
trees['nodes'].append( node )
|
|
||||||
trees['nodes'].sort( key = lambda x:x['id'] )
|
|
||||||
dumpjson( sys.stderr, trees)
|
|
||||||
@ -1,50 +0,0 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
import sys
|
|
||||||
|
|
||||||
def loadfmap( fname ):
|
|
||||||
fmap = {}
|
|
||||||
nmap = {}
|
|
||||||
|
|
||||||
for l in open( fname ):
|
|
||||||
arr = l.split()
|
|
||||||
if arr[0].find('.') != -1:
|
|
||||||
idx = int( arr[0].strip('.') )
|
|
||||||
assert idx not in fmap
|
|
||||||
fmap[ idx ] = {}
|
|
||||||
ftype = arr[1].strip(':')
|
|
||||||
content = arr[2]
|
|
||||||
else:
|
|
||||||
content = arr[0]
|
|
||||||
for it in content.split(','):
|
|
||||||
if it.strip() == '':
|
|
||||||
continue
|
|
||||||
k , v = it.split('=')
|
|
||||||
fmap[ idx ][ v ] = len(nmap)
|
|
||||||
nmap[ len(nmap) ] = ftype+'='+k
|
|
||||||
return fmap, nmap
|
|
||||||
|
|
||||||
def write_nmap( fo, nmap ):
|
|
||||||
for i in xrange( len(nmap) ):
|
|
||||||
fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
|
|
||||||
|
|
||||||
# start here
|
|
||||||
fmap, nmap = loadfmap( 'agaricus-lepiota.fmap' )
|
|
||||||
fo = open( 'featmap.txt', 'w' )
|
|
||||||
write_nmap( fo, nmap )
|
|
||||||
fo.close()
|
|
||||||
|
|
||||||
fo = open( 'agaricus.txt', 'w' )
|
|
||||||
for l in open( 'agaricus-lepiota.data' ):
|
|
||||||
arr = l.split(',')
|
|
||||||
if arr[0] == 'p':
|
|
||||||
fo.write('1')
|
|
||||||
else:
|
|
||||||
assert arr[0] == 'e'
|
|
||||||
fo.write('0')
|
|
||||||
for i in xrange( 1,len(arr) ):
|
|
||||||
fo.write( ' %d:1' % fmap[i][arr[i].strip()] )
|
|
||||||
fo.write('\n')
|
|
||||||
|
|
||||||
fo.close()
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,29 +0,0 @@
|
|||||||
#!/usr/bin/python
|
|
||||||
import sys
|
|
||||||
import random
|
|
||||||
|
|
||||||
if len(sys.argv) < 2:
|
|
||||||
print 'Usage:<filename> <k> [nfold = 5]'
|
|
||||||
exit(0)
|
|
||||||
|
|
||||||
random.seed( 10 )
|
|
||||||
|
|
||||||
k = int( sys.argv[2] )
|
|
||||||
if len(sys.argv) > 3:
|
|
||||||
nfold = int( sys.argv[3] )
|
|
||||||
else:
|
|
||||||
nfold = 5
|
|
||||||
|
|
||||||
fi = open( sys.argv[1], 'r' )
|
|
||||||
ftr = open( sys.argv[1]+'.train', 'w' )
|
|
||||||
fte = open( sys.argv[1]+'.test', 'w' )
|
|
||||||
for l in fi:
|
|
||||||
if random.randint( 1 , nfold ) == k:
|
|
||||||
fte.write( l )
|
|
||||||
else:
|
|
||||||
ftr.write( l )
|
|
||||||
|
|
||||||
fi.close()
|
|
||||||
ftr.close()
|
|
||||||
fte.close()
|
|
||||||
|
|
||||||
@ -1,19 +0,0 @@
|
|||||||
num_round=2
|
|
||||||
|
|
||||||
save_period=0
|
|
||||||
|
|
||||||
data = "agaricus.txt.train"
|
|
||||||
test:data = "agaricus.txt.test"
|
|
||||||
eval[test] = "agaricus.txt.test"
|
|
||||||
eval[train] = "agaricus.txt.train"
|
|
||||||
|
|
||||||
|
|
||||||
booster_type = 0
|
|
||||||
loss_type = 2
|
|
||||||
|
|
||||||
bst:tree_maker=2
|
|
||||||
|
|
||||||
bst:eta=1.0
|
|
||||||
bst:gamma=1.0
|
|
||||||
bst:min_child_weight=1
|
|
||||||
bst:max_depth=3
|
|
||||||
@ -1,23 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# map feature using indicator encoding, also produce featmap.txt
|
|
||||||
python mapfeat.py
|
|
||||||
# split train and test
|
|
||||||
python mknfold.py agaricus.txt 1
|
|
||||||
# training
|
|
||||||
../../xgboost mushroom.conf num_round=2 model_out=full.model bst:max_depth=3
|
|
||||||
../../xgboost mushroom.conf task=dump model_in=full.model fmap=featmap.txt name_dump=dump.full.txt
|
|
||||||
|
|
||||||
# major element of batch running: add batch prefix to each setting, batch:run=1 will run that action
|
|
||||||
|
|
||||||
../../xgboost mushroom.conf model_in=full.model model_out=m1.model task=interact\
|
|
||||||
batch:interact:booster_index=0 batch:bst:interact:remove=1 batch:run=1\
|
|
||||||
batch:interact:booster_index=1 batch:bst:interact:remove=1 batch:run=1\
|
|
||||||
batch:interact:booster_index=1 batch:bst:interact:expand=9 batch:run=1\
|
|
||||||
|
|
||||||
../../xgboost mushroom.conf task=dump model_in=m1.model fmap=featmap.txt name_dump=dump.m1.txt
|
|
||||||
|
|
||||||
echo "========full======="
|
|
||||||
cat dump.full.txt
|
|
||||||
|
|
||||||
echo "========m1======="
|
|
||||||
cat dump.m1.txt
|
|
||||||
Loading…
x
Reference in New Issue
Block a user