From 3f4bf96c5d4c877790136fc174712217d151b879 Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sun, 11 Jan 2015 13:46:18 +0800 Subject: [PATCH 1/4] temp --- tracker/rabit_hadoop.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 470455cb6..809dfc1e4 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -12,7 +12,8 @@ import rabit_tracker as tracker #!!! Set path to hadoop and hadoop streaming jar here hadoop_binary = 'hadoop' -hadoop_streaming_jar = None +#hadoop_streaming_jar = None +hadoop_streaming_jar = '/home/likewise-open/APEXLAB/blchen/streaming.jar' # code hadoop_home = os.getenv('HADOOP_HOME') From 7fa23f2d2f25aeadb35fe907a22a7a2b672440bf Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sun, 11 Jan 2015 14:52:48 +0800 Subject: [PATCH 2/4] modify default jobname --- tracker/rabit_hadoop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index a283e0ca7..20bb47e7f 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -50,7 +50,7 @@ parser.add_argument('-f', '--files', nargs = '*', ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker') parser.add_argument('--timeout', default=600000000, type=int, - help = 'timeout of each mapper job, automatically set to a very long time,'\ + help = 'timeout ((in milli seconds)) of each mapper job, automatically set to a very long time,'\ 'normally you do not need to set this ') parser.add_argument('-m', '--memory_mb', default=-1, type=int, help = 'maximum memory used by the process, Guide: set it large (near mapred.cluster.max.map.memory.mb)'\ @@ -74,7 +74,7 @@ parser.add_argument('command', nargs='+', args = parser.parse_args() if args.jobname == 'auto': - args.jobname = ('Rabit(nworker=%d):' % args.nworker) + args.command[0].split('/')[-1]; + args.jobname = ('Rabit[nworker=%d]:' % args.nworker) + args.command[0].split('/')[-1]; def hadoop_streaming(nworker, worker_args): fset = set() From 80b0d06b7e3ba9802da762d07b80e60230015071 Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sun, 11 Jan 2015 14:56:20 +0800 Subject: [PATCH 3/4] merger from tqchen --- tracker/rabit_hadoop.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 135f13df5..7808f9143 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -12,8 +12,7 @@ import rabit_tracker as tracker #!!! Set path to hadoop and hadoop streaming jar here hadoop_binary = 'hadoop' -#hadoop_streaming_jar = None -hadoop_streaming_jar = '/home/likewise-open/APEXLAB/blchen/streaming.jar' +hadoop_streaming_jar = None # code hadoop_home = os.getenv('HADOOP_HOME') @@ -50,7 +49,7 @@ parser.add_argument('-f', '--files', default = [], action='append', ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker') parser.add_argument('--timeout', default=600000000, type=int, - help = 'timeout ((in milli seconds)) of each mapper job, automatically set to a very long time,'\ + help = 'timeout (in milli seconds) of each mapper job, automatically set to a very long time,'\ 'normally you do not need to set this ') parser.add_argument('-m', '--memory_mb', default=-1, type=int, help = 'maximum memory used by the process, Guide: set it large (near mapred.cluster.max.map.memory.mb)'\ From c6d0be57d499f44872425b67fb217f63e8221709 Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sun, 11 Jan 2015 15:39:50 +0800 Subject: [PATCH 4/4] explain timeout --- tracker/rabit_hadoop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 7808f9143..3d5977e14 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -49,7 +49,7 @@ parser.add_argument('-f', '--files', default = [], action='append', ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker') parser.add_argument('--timeout', default=600000000, type=int, - help = 'timeout (in milli seconds) of each mapper job, automatically set to a very long time,'\ + help = 'timeout (in million seconds) of each mapper job, automatically set to a very long time,'\ 'normally you do not need to set this ') parser.add_argument('-m', '--memory_mb', default=-1, type=int, help = 'maximum memory used by the process, Guide: set it large (near mapred.cluster.max.map.memory.mb)'\