add tracker

This commit is contained in:
tqchen 2015-03-11 13:27:23 -07:00
parent 295d8a12f1
commit 20daddbeda
6 changed files with 11 additions and 7 deletions

View File

@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/env python
""" """
This is the demo submission script of rabit for submitting jobs in local machine This is the demo submission script of rabit for submitting jobs in local machine
""" """

View File

@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/env python
""" """
Deprecated Deprecated

View File

@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/env python
""" """
Submission script to submit rabit jobs using MPI Submission script to submit rabit jobs using MPI
""" """

View File

@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/env python
""" """
Submit rabit jobs to Sun Grid Engine Submit rabit jobs to Sun Grid Engine
""" """
@ -38,7 +38,7 @@ else:
runscript = '%s/runrabit.sh' % args.logdir runscript = '%s/runrabit.sh' % args.logdir
fo = open(runscript, 'w') fo = open(runscript, 'w')
fo.write('\"$@\"') fo.write('\"$@\"\n')
fo.close() fo.close()
# #
# submission script using MPI # submission script using MPI
@ -63,7 +63,7 @@ def sge_submit(nslave, worker_args, worker_envs):
cmd += ' %s %s' % (runscript, ' '.join(args.command + worker_args)) cmd += ' %s %s' % (runscript, ' '.join(args.command + worker_args))
print cmd print cmd
subprocess.check_call(cmd, shell = True) subprocess.check_call(cmd, shell = True)
print 'Waiting for the jobs to get up...' % args.jobname print 'Waiting for the jobs to get up...'
# call submit, with nslave, the commands to run each job and submit function # call submit, with nslave, the commands to run each job and submit function
tracker.submit(args.nworker, [], fun_submit = sge_submit, verbose = args.verbose) tracker.submit(args.nworker, [], fun_submit = sge_submit, verbose = args.verbose)

View File

@ -13,6 +13,7 @@ import socket
import struct import struct
import subprocess import subprocess
import random import random
import time
from threading import Thread from threading import Thread
""" """
@ -258,6 +259,7 @@ class Tracker:
job_map[s.jobid] = rank job_map[s.jobid] = rank
if len(todo_nodes) == 0: if len(todo_nodes) == 0:
self.log_print('@tracker All of %d nodes getting started' % nslave, 2) self.log_print('@tracker All of %d nodes getting started' % nslave, 2)
self.start_time = time.time()
s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map) s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map)
if s.cmd != 'start': if s.cmd != 'start':
self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank), 1) self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank), 1)
@ -266,6 +268,8 @@ class Tracker:
if s.wait_accept > 0: if s.wait_accept > 0:
wait_conn[rank] = s wait_conn[rank] = s
self.log_print('@tracker All nodes finishes job', 2) self.log_print('@tracker All nodes finishes job', 2)
self.end_time = time.time()
self.log_print('@tracker %s secs between node start and job finish' % str(self.end_time - self.start_time), 2)
def submit(nslave, args, fun_submit, verbose, hostIP = 'auto'): def submit(nslave, args, fun_submit, verbose, hostIP = 'auto'):
master = Tracker(verbose = verbose, hostIP = hostIP) master = Tracker(verbose = verbose, hostIP = hostIP)

View File

@ -1,4 +1,4 @@
#!/usr/bin/python #!/usr/bin/env python
""" """
This is a script to submit rabit job via Yarn This is a script to submit rabit job via Yarn
rabit will run as a Yarn application rabit will run as a Yarn application