From 3b48a9f35901944b65252a3b813065f73ed16450 Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 19 Nov 2014 19:21:56 -0800 Subject: [PATCH] checkin split row --- multi-node/row-split/splitrows.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 multi-node/row-split/splitrows.py diff --git a/multi-node/row-split/splitrows.py b/multi-node/row-split/splitrows.py new file mode 100644 index 000000000..2e9d1184d --- /dev/null +++ b/multi-node/row-split/splitrows.py @@ -0,0 +1,24 @@ +#!/usr/bin/python +import sys +import random + +# split libsvm file into different rows +if len(sys.argv) < 4: + print ('Usage: k') + exit(0) + +random.seed(10) + +k = int(sys.argv[3]) +fi = open( sys.argv[1], 'r' ) +fos = [] + +for i in range(k): + fos.append(open( sys.argv[2]+'.row%d' % i, 'w' )) + +for l in open(sys.argv[1]): + i = random.randint(0, k-1) + fos[i].write(l) + +for f in fos: + f.close()