From c34367b2077bdee159efecdbe115e6eb5fd28677 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 26 Jan 2015 10:27:44 -0800 Subject: [PATCH] add msd --- .gitignore | 2 ++ demo/.gitignore | 1 + demo/yearpredMSD/csv2libsvm.py | 14 ++++++++++++++ demo/yearpredMSD/runexp.sh | 20 ++++++++++++++++++++ 4 files changed, 37 insertions(+) create mode 100644 demo/.gitignore create mode 100755 demo/yearpredMSD/csv2libsvm.py create mode 100755 demo/yearpredMSD/runexp.sh diff --git a/.gitignore b/.gitignore index d454c6d1d..ee5928043 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,5 @@ Debug *save *csv .Rproj.user +xgboost +xgboost.mock diff --git a/demo/.gitignore b/demo/.gitignore new file mode 100644 index 000000000..e52797d15 --- /dev/null +++ b/demo/.gitignore @@ -0,0 +1 @@ +*.libsvm \ No newline at end of file diff --git a/demo/yearpredMSD/csv2libsvm.py b/demo/yearpredMSD/csv2libsvm.py new file mode 100755 index 000000000..d7c1d15c1 --- /dev/null +++ b/demo/yearpredMSD/csv2libsvm.py @@ -0,0 +1,14 @@ +#!/usr/bin/python +import sys + +if len(sys.argv) < 3: + print 'Usage: ' + print 'convert a all numerical csv to libsvm' + +fo = open(sys.argv[2], 'w') +for l in open(sys.argv[1]): + arr = l.split(',') + fo.write('%s' % arr[0]) + for i in xrange(len(arr) - 1): + fo.write(' %d:%s' % (i, arr[i+1])) +fo.close() diff --git a/demo/yearpredMSD/runexp.sh b/demo/yearpredMSD/runexp.sh new file mode 100755 index 000000000..fa75b837e --- /dev/null +++ b/demo/yearpredMSD/runexp.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +if [ -f YearPredictionMSD.txt ] +then + echo "use existing data to run experiment" +else + echo "getting data from uci, make sure you are connected to internet" + wget https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip + unzip YearPredictionMSD.txt.zip +fi +echo "start making data.." +# map feature using indicator encoding, also produce featmap.txt +python csv2libsvm.py YearPredictionMSD.txt yearpredMSD.libsvm +head -n 463715 yearpredMSD.libsvm > yearpredMSD.libsvm.train +tail -n 51630 yearpredMSD.libsvm > yearpredMSD.libsvm.test +echo "finish making the data" +../../xgboost yearpredMSD.conf + + + \ No newline at end of file