diff --git a/submit_job_hadoop.py b/submit_job_hadoop.py index 0bd95644b..bf1bac752 100755 --- a/submit_job_hadoop.py +++ b/submit_job_hadoop.py @@ -5,6 +5,7 @@ This is an example job submit script for hadoop streaming import argparse import sys import os +import time import subprocess sys.path.append('./src/') import rabit_master as master @@ -25,5 +26,7 @@ def hadoop_streaming(nslaves, slave_args): cmd = '%s jar %s -input %s -output %s -mapper \"%s stdin %d %d stdout %s\" -reducer \"/bin/cat\" -file %s' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations, ' '.join(slave_args), args.mapper) print cmd subprocess.check_call(cmd, shell = True) - + +start = time.time() master.submit(args.nslaves, [], fun_submit= hadoop_streaming) +print 'All run took %s' % (time.time() - start) diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index 109b49826..674223cc6 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -3,6 +3,7 @@ #include #include #include "./toolkit_util.h" +#include using namespace rabit; @@ -85,6 +86,8 @@ int main(int argc, char *argv[]) { printf("Usage: num_cluster max_iter \n"); return 0; } + clock_t tStart = clock(); + srand(0); // load the data SparseMat data; @@ -140,6 +143,7 @@ int main(int argc, char *argv[]) { if (rabit::GetRank() == 0) { model.centroids.Print(argv[4]); } + utils::LogPrintf("[%d] Time taken: %f seconds\n", rabit::GetRank(), static_cast(clock() - tStart) / CLOCKS_PER_SEC); rabit::Finalize(); return 0; }