This commit is contained in:
tqchen 2015-01-26 10:27:44 -08:00
parent 72f6fbd46f
commit c34367b207
4 changed files with 37 additions and 0 deletions

2
.gitignore vendored
View File

@ -45,3 +45,5 @@ Debug
*save
*csv
.Rproj.user
xgboost
xgboost.mock

1
demo/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.libsvm

14
demo/yearpredMSD/csv2libsvm.py Executable file
View File

@ -0,0 +1,14 @@
#!/usr/bin/python
import sys
if len(sys.argv) < 3:
print 'Usage: <csv> <libsvm>'
print 'convert a all numerical csv to libsvm'
fo = open(sys.argv[2], 'w')
for l in open(sys.argv[1]):
arr = l.split(',')
fo.write('%s' % arr[0])
for i in xrange(len(arr) - 1):
fo.write(' %d:%s' % (i, arr[i+1]))
fo.close()

20
demo/yearpredMSD/runexp.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
if [ -f YearPredictionMSD.txt ]
then
echo "use existing data to run experiment"
else
echo "getting data from uci, make sure you are connected to internet"
wget https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip
unzip YearPredictionMSD.txt.zip
fi
echo "start making data.."
# map feature using indicator encoding, also produce featmap.txt
python csv2libsvm.py YearPredictionMSD.txt yearpredMSD.libsvm
head -n 463715 yearpredMSD.libsvm > yearpredMSD.libsvm.train
tail -n 51630 yearpredMSD.libsvm > yearpredMSD.libsvm.test
echo "finish making the data"
../../xgboost yearpredMSD.conf