From 5fe3c58b4a1497f1d05459ba823e1db12d8c6e93 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 00:31:01 -0800 Subject: [PATCH] add kmeans hadoop --- toolkit/kmeans_hadoop.sh | 9 +++++++++ tracker/rabit_hadoop.py | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100755 toolkit/kmeans_hadoop.sh diff --git a/toolkit/kmeans_hadoop.sh b/toolkit/kmeans_hadoop.sh new file mode 100755 index 000000000..ed576f8b9 --- /dev/null +++ b/toolkit/kmeans_hadoop.sh @@ -0,0 +1,9 @@ +#!/bin/bash +if [ "$#" -lt 5 ]; +then + echo "Usage: " + exit -1 +fi +python ../rabit_hadoop.py -s $1 -i $2 -m kmeans --args "stdin "$3" "$4" stdout" -o $5 + + \ No newline at end of file diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 79e0cb133..c81e9d344 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -35,7 +35,8 @@ if hadoop_streaming_jar != None: args.hadoop_streaming_jar = hadoop_streaming_jar def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -input %s -output %s -mapper \"%s stdin %d %d stdout %s\" -reducer \"/bin/cat\" -file %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations, ' '.join(slave_args), args.mapper, nslaves) + cmd = '%s jar %s -input %s -output %s -mapper \"%s %s %s\" -reducer \"/bin/cat\" -file %s -D mapred.map.tasks=%d' + % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.args, ' '.join(slave_args), args.mapper, nslaves) print cmd subprocess.check_call(cmd, shell = True)