diff --git a/.gitignore b/.gitignore index d454c6d1d..ee5928043 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,5 @@ Debug *save *csv .Rproj.user +xgboost +xgboost.mock diff --git a/demo/.gitignore b/demo/.gitignore new file mode 100644 index 000000000..e52797d15 --- /dev/null +++ b/demo/.gitignore @@ -0,0 +1 @@ +*.libsvm \ No newline at end of file diff --git a/demo/yearpredMSD/csv2libsvm.py b/demo/yearpredMSD/csv2libsvm.py new file mode 100755 index 000000000..d7c1d15c1 --- /dev/null +++ b/demo/yearpredMSD/csv2libsvm.py @@ -0,0 +1,14 @@ +#!/usr/bin/python +import sys + +if len(sys.argv) < 3: + print 'Usage: ' + print 'convert a all numerical csv to libsvm' + +fo = open(sys.argv[2], 'w') +for l in open(sys.argv[1]): + arr = l.split(',') + fo.write('%s' % arr[0]) + for i in xrange(len(arr) - 1): + fo.write(' %d:%s' % (i, arr[i+1])) +fo.close() diff --git a/demo/yearpredMSD/runexp.sh b/demo/yearpredMSD/runexp.sh new file mode 100755 index 000000000..fa75b837e --- /dev/null +++ b/demo/yearpredMSD/runexp.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +if [ -f YearPredictionMSD.txt ] +then + echo "use existing data to run experiment" +else + echo "getting data from uci, make sure you are connected to internet" + wget https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip + unzip YearPredictionMSD.txt.zip +fi +echo "start making data.." +# map feature using indicator encoding, also produce featmap.txt +python csv2libsvm.py YearPredictionMSD.txt yearpredMSD.libsvm +head -n 463715 yearpredMSD.libsvm > yearpredMSD.libsvm.train +tail -n 51630 yearpredMSD.libsvm > yearpredMSD.libsvm.test +echo "finish making the data" +../../xgboost yearpredMSD.conf + + + \ No newline at end of file