Group CLI demo into subdirectory. (#6258)
CLI is not most developed interface. Putting them into correct directory can help new users to avoid it as most of the use cases are from a language binding.
This commit is contained in:
@@ -18,7 +18,7 @@ max_depth = 3
|
||||
# the number of round to do boosting
|
||||
num_round = 2
|
||||
# 0 means do not save any model except the final round model
|
||||
save_period = 0
|
||||
save_period = 2
|
||||
# The path of training data
|
||||
data = "agaricus.txt.train"
|
||||
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
|
||||
@@ -3,13 +3,15 @@
|
||||
python mapfeat.py
|
||||
# split train and test
|
||||
python mknfold.py agaricus.txt 1
|
||||
# training and output the models
|
||||
../../xgboost mushroom.conf
|
||||
# output prediction task=pred
|
||||
../../xgboost mushroom.conf task=pred model_in=0002.model
|
||||
# print the boosters of 00002.model in dump.raw.txt
|
||||
../../xgboost mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt
|
||||
# use the feature map in printing for better visualization
|
||||
../../xgboost mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
|
||||
cat dump.nice.txt
|
||||
|
||||
XGBOOST=../../../xgboost
|
||||
|
||||
# training and output the models
|
||||
$XGBOOST mushroom.conf
|
||||
# output prediction task=pred
|
||||
$XGBOOST mushroom.conf task=pred model_in=0002.model
|
||||
# print the boosters of 00002.model in dump.raw.txt
|
||||
$XGBOOST mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt
|
||||
# use the feature map in printing for better visualization
|
||||
$XGBOOST mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
|
||||
cat dump.nice.txt
|
||||
11
demo/CLI/distributed-training/run_aws.sh
Normal file
11
demo/CLI/distributed-training/run_aws.sh
Normal file
@@ -0,0 +1,11 @@
|
||||
# This is the example script to run distributed xgboost on AWS.
|
||||
# Change the following two lines for configuration
|
||||
|
||||
export BUCKET=mybucket
|
||||
|
||||
# submit the job to YARN
|
||||
../../../dmlc-core/tracker/dmlc-submit --cluster=yarn --num-workers=2 --worker-cores=2\
|
||||
../../../xgboost mushroom.aws.conf nthread=2\
|
||||
data=s3://${BUCKET}/xgb-demo/train\
|
||||
eval[test]=s3://${BUCKET}/xgb-demo/test\
|
||||
model_dir=s3://${BUCKET}/xgb-demo/model
|
||||
33
demo/CLI/regression/mapfeat.py
Executable file
33
demo/CLI/regression/mapfeat.py
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
fo = open('machine.txt', 'w')
|
||||
cnt = 6
|
||||
fmap = {}
|
||||
for l in open('machine.data'):
|
||||
arr = l.split(',')
|
||||
fo.write(arr[8])
|
||||
for i in range(0, 6):
|
||||
fo.write(' %d:%s' % (i, arr[i + 2]))
|
||||
|
||||
if arr[0] not in fmap:
|
||||
fmap[arr[0]] = cnt
|
||||
cnt += 1
|
||||
|
||||
fo.write(' %d:1' % fmap[arr[0]])
|
||||
fo.write('\n')
|
||||
|
||||
fo.close()
|
||||
|
||||
# create feature map for machine data
|
||||
fo = open('featmap.txt', 'w')
|
||||
# list from machine.names
|
||||
names = [
|
||||
'vendor', 'MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP'
|
||||
]
|
||||
|
||||
for i in range(0, 6):
|
||||
fo.write('%d\t%s\tint\n' % (i, names[i + 1]))
|
||||
|
||||
for v, k in sorted(fmap.items(), key=lambda x: x[1]):
|
||||
fo.write('%d\tvendor=%s\ti\n' % (k, v))
|
||||
fo.close()
|
||||
28
demo/CLI/regression/mknfold.py
Executable file
28
demo/CLI/regression/mknfold.py
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import random
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print('Usage:<filename> <k> [nfold = 5]')
|
||||
exit(0)
|
||||
|
||||
random.seed(10)
|
||||
|
||||
k = int(sys.argv[2])
|
||||
if len(sys.argv) > 3:
|
||||
nfold = int(sys.argv[3])
|
||||
else:
|
||||
nfold = 5
|
||||
|
||||
fi = open(sys.argv[1], 'r')
|
||||
ftr = open(sys.argv[1] + '.train', 'w')
|
||||
fte = open(sys.argv[1] + '.test', 'w')
|
||||
for l in fi:
|
||||
if random.randint(1, nfold) == k:
|
||||
fte.write(l)
|
||||
else:
|
||||
ftr.write(l)
|
||||
|
||||
fi.close()
|
||||
ftr.close()
|
||||
fte.close()
|
||||
@@ -1,14 +1,9 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print 'Usage: <csv> <libsvm>'
|
||||
print 'convert a all numerical csv to libsvm'
|
||||
|
||||
fo = open(sys.argv[2], 'w')
|
||||
|
||||
for l in open(sys.argv[1]):
|
||||
arr = l.split(',')
|
||||
fo.write('%s' % arr[0])
|
||||
for i in xrange(len(arr) - 1):
|
||||
for i in range(len(arr) - 1):
|
||||
fo.write(' %d:%s' % (i, arr[i+1]))
|
||||
fo.close()
|
||||
@@ -14,4 +14,4 @@ python csv2libsvm.py YearPredictionMSD.txt yearpredMSD.libsvm
|
||||
head -n 463715 yearpredMSD.libsvm > yearpredMSD.libsvm.train
|
||||
tail -n 51630 yearpredMSD.libsvm > yearpredMSD.libsvm.test
|
||||
echo "finish making the data"
|
||||
../../xgboost yearpredMSD.conf
|
||||
../../../xgboost yearpredMSD.conf
|
||||
@@ -1,11 +0,0 @@
|
||||
# This is the example script to run distributed xgboost on AWS.
|
||||
# Change the following two lines for configuration
|
||||
|
||||
export BUCKET=mybucket
|
||||
|
||||
# submit the job to YARN
|
||||
../../dmlc-core/tracker/dmlc-submit --cluster=yarn --num-workers=2 --worker-cores=2\
|
||||
../../xgboost mushroom.aws.conf nthread=2\
|
||||
data=s3://${BUCKET}/xgb-demo/train\
|
||||
eval[test]=s3://${BUCKET}/xgb-demo/test\
|
||||
model_dir=s3://${BUCKET}/xgb-demo/model
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
../../xgboost mq2008.conf
|
||||
|
||||
../../xgboost mq2008.conf task=pred model_in=0004.model
|
||||
|
||||
@@ -7,7 +7,7 @@ def save_data(group_data,output_feature,output_group):
|
||||
output_group.write(str(len(group_data))+"\n")
|
||||
for data in group_data:
|
||||
# only include nonzero features
|
||||
feats = [ p for p in data[2:] if float(p.split(':')[1]) != 0.0 ]
|
||||
feats = [ p for p in data[2:] if float(p.split(':')[1]) != 0.0 ]
|
||||
output_feature.write(data[0] + " " + " ".join(feats) + "\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -18,7 +18,7 @@ if __name__ == "__main__":
|
||||
fi = open(sys.argv[1])
|
||||
output_feature = open(sys.argv[2],"w")
|
||||
output_group = open(sys.argv[3],"w")
|
||||
|
||||
|
||||
group_data = []
|
||||
group = ""
|
||||
for line in fi:
|
||||
@@ -38,4 +38,3 @@ if __name__ == "__main__":
|
||||
fi.close()
|
||||
output_feature.close()
|
||||
output_group.close()
|
||||
|
||||
|
||||
@@ -1,7 +1,13 @@
|
||||
#!/bin/bash
|
||||
wget https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.rar
|
||||
unrar x MQ2008.rar
|
||||
mv -f MQ2008/Fold1/*.txt .
|
||||
if [ -f MQ2008.rar ]
|
||||
then
|
||||
echo "Use downloaded data to run experiment."
|
||||
else
|
||||
echo "Downloading data."
|
||||
wget https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.rar
|
||||
unrar x MQ2008.rar
|
||||
mv -f MQ2008/Fold1/*.txt .
|
||||
fi
|
||||
|
||||
python trans_data.py train.txt mq2008.train mq2008.train.group
|
||||
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
fo = open( 'machine.txt', 'w' )
|
||||
cnt = 6
|
||||
fmap = {}
|
||||
for l in open( 'machine.data' ):
|
||||
arr = l.split(',')
|
||||
fo.write(arr[8])
|
||||
for i in range( 0,6 ):
|
||||
fo.write( ' %d:%s' %(i,arr[i+2]) )
|
||||
|
||||
if arr[0] not in fmap:
|
||||
fmap[arr[0]] = cnt
|
||||
cnt += 1
|
||||
|
||||
fo.write( ' %d:1' % fmap[arr[0]] )
|
||||
fo.write('\n')
|
||||
|
||||
fo.close()
|
||||
|
||||
# create feature map for machine data
|
||||
fo = open('featmap.txt', 'w')
|
||||
# list from machine.names
|
||||
names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
|
||||
|
||||
for i in range(0,6):
|
||||
fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
|
||||
|
||||
for v, k in sorted( fmap.items(), key = lambda x:x[1] ):
|
||||
fo.write( '%d\tvendor=%s\ti\n' % (k, v))
|
||||
fo.close()
|
||||
@@ -1,29 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import random
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print ('Usage:<filename> <k> [nfold = 5]')
|
||||
exit(0)
|
||||
|
||||
random.seed( 10 )
|
||||
|
||||
k = int( sys.argv[2] )
|
||||
if len(sys.argv) > 3:
|
||||
nfold = int( sys.argv[3] )
|
||||
else:
|
||||
nfold = 5
|
||||
|
||||
fi = open( sys.argv[1], 'r' )
|
||||
ftr = open( sys.argv[1]+'.train', 'w' )
|
||||
fte = open( sys.argv[1]+'.test', 'w' )
|
||||
for l in fi:
|
||||
if random.randint( 1 , nfold ) == k:
|
||||
fte.write( l )
|
||||
else:
|
||||
ftr.write( l )
|
||||
|
||||
fi.close()
|
||||
ftr.close()
|
||||
fte.close()
|
||||
|
||||
Reference in New Issue
Block a user