add tracker
This commit is contained in:
parent
295d8a12f1
commit
20daddbeda
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
This is the demo submission script of rabit for submitting jobs in local machine
|
This is the demo submission script of rabit for submitting jobs in local machine
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
Deprecated
|
Deprecated
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
Submission script to submit rabit jobs using MPI
|
Submission script to submit rabit jobs using MPI
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
Submit rabit jobs to Sun Grid Engine
|
Submit rabit jobs to Sun Grid Engine
|
||||||
"""
|
"""
|
||||||
@ -38,7 +38,7 @@ else:
|
|||||||
|
|
||||||
runscript = '%s/runrabit.sh' % args.logdir
|
runscript = '%s/runrabit.sh' % args.logdir
|
||||||
fo = open(runscript, 'w')
|
fo = open(runscript, 'w')
|
||||||
fo.write('\"$@\"')
|
fo.write('\"$@\"\n')
|
||||||
fo.close()
|
fo.close()
|
||||||
#
|
#
|
||||||
# submission script using MPI
|
# submission script using MPI
|
||||||
@ -63,7 +63,7 @@ def sge_submit(nslave, worker_args, worker_envs):
|
|||||||
cmd += ' %s %s' % (runscript, ' '.join(args.command + worker_args))
|
cmd += ' %s %s' % (runscript, ' '.join(args.command + worker_args))
|
||||||
print cmd
|
print cmd
|
||||||
subprocess.check_call(cmd, shell = True)
|
subprocess.check_call(cmd, shell = True)
|
||||||
print 'Waiting for the jobs to get up...' % args.jobname
|
print 'Waiting for the jobs to get up...'
|
||||||
|
|
||||||
# call submit, with nslave, the commands to run each job and submit function
|
# call submit, with nslave, the commands to run each job and submit function
|
||||||
tracker.submit(args.nworker, [], fun_submit = sge_submit, verbose = args.verbose)
|
tracker.submit(args.nworker, [], fun_submit = sge_submit, verbose = args.verbose)
|
||||||
|
|||||||
@ -13,6 +13,7 @@ import socket
|
|||||||
import struct
|
import struct
|
||||||
import subprocess
|
import subprocess
|
||||||
import random
|
import random
|
||||||
|
import time
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -258,6 +259,7 @@ class Tracker:
|
|||||||
job_map[s.jobid] = rank
|
job_map[s.jobid] = rank
|
||||||
if len(todo_nodes) == 0:
|
if len(todo_nodes) == 0:
|
||||||
self.log_print('@tracker All of %d nodes getting started' % nslave, 2)
|
self.log_print('@tracker All of %d nodes getting started' % nslave, 2)
|
||||||
|
self.start_time = time.time()
|
||||||
s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map)
|
s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map)
|
||||||
if s.cmd != 'start':
|
if s.cmd != 'start':
|
||||||
self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank), 1)
|
self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank), 1)
|
||||||
@ -266,6 +268,8 @@ class Tracker:
|
|||||||
if s.wait_accept > 0:
|
if s.wait_accept > 0:
|
||||||
wait_conn[rank] = s
|
wait_conn[rank] = s
|
||||||
self.log_print('@tracker All nodes finishes job', 2)
|
self.log_print('@tracker All nodes finishes job', 2)
|
||||||
|
self.end_time = time.time()
|
||||||
|
self.log_print('@tracker %s secs between node start and job finish' % str(self.end_time - self.start_time), 2)
|
||||||
|
|
||||||
def submit(nslave, args, fun_submit, verbose, hostIP = 'auto'):
|
def submit(nslave, args, fun_submit, verbose, hostIP = 'auto'):
|
||||||
master = Tracker(verbose = verbose, hostIP = hostIP)
|
master = Tracker(verbose = verbose, hostIP = hostIP)
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
This is a script to submit rabit job via Yarn
|
This is a script to submit rabit job via Yarn
|
||||||
rabit will run as a Yarn application
|
rabit will run as a Yarn application
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user