Squashed 'subtree/rabit/' changes from 091634b..59e63bc

59e63bc minor
6233050 ok
14477f9 add namenode
75a6d34 add libhdfs opts
e3c76bf minmum fix
8b3c435 chg
2035799 test code
7751b2b add debug
7690313 ok
bd346b4 ok
faba1dc add testload
6f7783e add testload
e5f0340 ok
3ed9ec8 chg
e552ac4 ask for more ram in am
b2505e3 only stop nm when sucess
bc696c9 add queue info
f3e867e add option queue
5dc843c refactor fileio
cd9c81b quick fix
1e23af2 add virtual destructor to iseekstream
f165ffb fix hdfs
8cc6508 allow demo to pass in env
fad4d69 ok
0fd6197 fix more
7423837 fix more
d25de54 add temporal solution, run_yarn_prog.py
e5a9e31 final attempt
ed3bee8 add command back
0774000 add hdfs to resource
9b66e7e fix hadoop
6812f14 ok
08e1c16 change hadoop prefix back to hadoop home
d6b6828 Update build.sh
146e069 bugfix: logical boundary for ring buffer
19cb685 ok
4cf3c13 Merge branch 'master' of ssh://github.com/tqchen/rabit
20daddb add tracker
c57dad8 add ringbased passing and batch schedule
295d8a1 update
994cb02 add sge
014c866 OK

git-subtree-dir: subtree/rabit
git-subtree-split: 59e63bc135
This commit is contained in:
tqchen
2015-03-21 00:44:31 -07:00
parent 13a319ca01
commit 75bf97b575
34 changed files with 869 additions and 207 deletions

View File

@@ -9,4 +9,4 @@ the example guidelines are in the script themselfs
* Yarn (Hadoop): [rabit_yarn.py](rabit_yarn.py)
- It is also possible to submit via hadoop streaming with rabit_hadoop_streaming.py
- However, it is higly recommended to use rabit_yarn.py because this will allocate resources more precisely and fits machine learning scenarios
* Sun Grid engine: [rabit_sge.py](rabit_sge.py)

View File

@@ -1,7 +1,6 @@
#!/usr/bin/python
#!/usr/bin/env python
"""
This is the demo submission script of rabit, it is created to
submit rabit jobs using hadoop streaming
This is the demo submission script of rabit for submitting jobs in local machine
"""
import argparse
import sys
@@ -43,7 +42,7 @@ def exec_cmd(cmd, taskid, worker_env):
if cmd[0].find('/') == -1 and os.path.exists(cmd[0]) and os.name != 'nt':
cmd[0] = './' + cmd[0]
cmd = ' '.join(cmd)
env = {}
env = os.environ.copy()
for k, v in worker_env.items():
env[k] = str(v)
env['rabit_task_id'] = str(taskid)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/env python
"""
Deprecated

View File

@@ -1,7 +1,6 @@
#!/usr/bin/python
#!/usr/bin/env python
"""
This is the demo submission script of rabit, it is created to
submit rabit jobs using hadoop streaming
Submission script to submit rabit jobs using MPI
"""
import argparse
import sys

69
tracker/rabit_sge.py Executable file
View File

@@ -0,0 +1,69 @@
#!/usr/bin/env python
"""
Submit rabit jobs to Sun Grid Engine
"""
import argparse
import sys
import os
import subprocess
import rabit_tracker as tracker
parser = argparse.ArgumentParser(description='Rabit script to submit rabit job using MPI')
parser.add_argument('-n', '--nworker', required=True, type=int,
help = 'number of worker proccess to be launched')
parser.add_argument('-q', '--queue', default='default', type=str,
help = 'the queue we want to submit the job to')
parser.add_argument('-hip', '--host_ip', default='auto', type=str,
help = 'host IP address if cannot be automatically guessed, specify the IP of submission machine')
parser.add_argument('--vcores', default = 1, type=int,
help = 'number of vcpores to request in each mapper, set it if each rabit job is multi-threaded')
parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker')
parser.add_argument('--logdir', default='auto', help = 'customize the directory to place the logs')
parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int,
help = 'print more messages into the console')
parser.add_argument('command', nargs='+',
help = 'command for rabit program')
args = parser.parse_args()
if args.jobname == 'auto':
args.jobname = ('rabit%d.' % args.nworker) + args.command[0].split('/')[-1];
if args.logdir == 'auto':
args.logdir = args.jobname + '.log'
if os.path.exists(args.logdir):
if not os.path.isdir(args.logdir):
raise RuntimeError('specified logdir %s is a file instead of directory' % args.logdir)
else:
os.mkdir(args.logdir)
runscript = '%s/runrabit.sh' % args.logdir
fo = open(runscript, 'w')
fo.write('\"$@\"\n')
fo.close()
#
# submission script using MPI
#
def sge_submit(nslave, worker_args, worker_envs):
"""
customized submit script, that submit nslave jobs, each must contain args as parameter
note this can be a lambda function containing additional parameters in input
Parameters
nslave number of slave process to start up
args arguments to launch each job
this usually includes the parameters of master_uri and parameters passed into submit
"""
env_arg = ','.join(['%s=\"%s\"' % (k, str(v)) for k, v in worker_envs.items()])
cmd = 'qsub -cwd -t 1-%d -S /bin/bash' % nslave
if args.queue != 'default':
cmd += '-q %s' % args.queue
cmd += ' -N %s ' % args.jobname
cmd += ' -e %s -o %s' % (args.logdir, args.logdir)
cmd += ' -pe orte %d' % (args.vcores)
cmd += ' -v %s,PATH=${PATH}:.' % env_arg
cmd += ' %s %s' % (runscript, ' '.join(args.command + worker_args))
print cmd
subprocess.check_call(cmd, shell = True)
print 'Waiting for the jobs to get up...'
# call submit, with nslave, the commands to run each job and submit function
tracker.submit(args.nworker, [], fun_submit = sge_submit, verbose = args.verbose)

View File

@@ -13,6 +13,7 @@ import socket
import struct
import subprocess
import random
import time
from threading import Thread
"""
@@ -188,6 +189,7 @@ class Tracker:
vlst.reverse()
rlst += vlst
return rlst
def get_ring(self, tree_map, parent_map):
"""
get a ring connection used to recover local data
@@ -202,14 +204,44 @@ class Tracker:
rnext = (r + 1) % nslave
ring_map[rlst[r]] = (rlst[rprev], rlst[rnext])
return ring_map
def get_link_map(self, nslave):
"""
get the link map, this is a bit hacky, call for better algorithm
to place similar nodes together
"""
tree_map, parent_map = self.get_tree(nslave)
ring_map = self.get_ring(tree_map, parent_map)
rmap = {0 : 0}
k = 0
for i in range(nslave - 1):
k = ring_map[k][1]
rmap[k] = i + 1
ring_map_ = {}
tree_map_ = {}
parent_map_ ={}
for k, v in ring_map.items():
ring_map_[rmap[k]] = (rmap[v[0]], rmap[v[1]])
for k, v in tree_map.items():
tree_map_[rmap[k]] = [rmap[x] for x in v]
for k, v in parent_map.items():
if k != 0:
parent_map_[rmap[k]] = rmap[v]
else:
parent_map_[rmap[k]] = -1
return tree_map_, parent_map_, ring_map_
def handle_print(self,slave, msg):
sys.stdout.write(msg)
def log_print(self, msg, level):
if level == 1:
if self.verbose:
sys.stderr.write(msg + '\n')
else:
sys.stderr.write(msg + '\n')
def accept_slaves(self, nslave):
# set of nodes that finishs the job
shutdown = {}
@@ -241,31 +273,40 @@ class Tracker:
assert s.cmd == 'start'
if s.world_size > 0:
nslave = s.world_size
tree_map, parent_map = self.get_tree(nslave)
ring_map = self.get_ring(tree_map, parent_map)
tree_map, parent_map, ring_map = self.get_link_map(nslave)
# set of nodes that is pending for getting up
todo_nodes = range(nslave)
random.shuffle(todo_nodes)
else:
assert s.world_size == -1 or s.world_size == nslave
if s.cmd == 'recover':
assert s.rank >= 0
rank = s.decide_rank(job_map)
# batch assignment of ranks
if rank == -1:
assert len(todo_nodes) != 0
rank = todo_nodes.pop(0)
if s.jobid != 'NULL':
job_map[s.jobid] = rank
pending.append(s)
if len(pending) == len(todo_nodes):
pending.sort(key = lambda x : x.host)
for s in pending:
rank = todo_nodes.pop(0)
if s.jobid != 'NULL':
job_map[s.jobid] = rank
s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map)
if s.wait_accept > 0:
wait_conn[rank] = s
self.log_print('Recieve %s signal from %s; assign rank %d' % (s.cmd, s.host, s.rank), 1)
if len(todo_nodes) == 0:
self.log_print('@tracker All of %d nodes getting started' % nslave, 2)
s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map)
if s.cmd != 'start':
self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank), 1)
self.start_time = time.time()
else:
self.log_print('Recieve %s signal from %s; assign rank %d' % (s.cmd, s.host, s.rank), 1)
if s.wait_accept > 0:
wait_conn[rank] = s
s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map)
self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank), 1)
if s.wait_accept > 0:
wait_conn[rank] = s
self.log_print('@tracker All nodes finishes job', 2)
self.end_time = time.time()
self.log_print('@tracker %s secs between node start and job finish' % str(self.end_time - self.start_time), 2)
def submit(nslave, args, fun_submit, verbose, hostIP = 'auto'):
master = Tracker(verbose = verbose, hostIP = hostIP)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/env python
"""
This is a script to submit rabit job via Yarn
rabit will run as a Yarn application
@@ -13,6 +13,7 @@ import rabit_tracker as tracker
WRAPPER_PATH = os.path.dirname(__file__) + '/../wrapper'
YARN_JAR_PATH = os.path.dirname(__file__) + '/../yarn/rabit-yarn.jar'
YARN_BOOT_PY = os.path.dirname(__file__) + '/../yarn/run_hdfs_prog.py'
if not os.path.exists(YARN_JAR_PATH):
warnings.warn("cannot find \"%s\", I will try to run build" % YARN_JAR_PATH)
@@ -21,7 +22,7 @@ if not os.path.exists(YARN_JAR_PATH):
subprocess.check_call(cmd, shell = True, env = os.environ)
assert os.path.exists(YARN_JAR_PATH), "failed to build rabit-yarn.jar, try it manually"
hadoop_binary = 'hadoop'
hadoop_binary = None
# code
hadoop_home = os.getenv('HADOOP_HOME')
@@ -38,6 +39,8 @@ parser.add_argument('-hip', '--host_ip', default='auto', type=str,
help = 'host IP address if cannot be automatically guessed, specify the IP of submission machine')
parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int,
help = 'print more messages into the console')
parser.add_argument('-q', '--queue', default='default', type=str,
help = 'the queue we want to submit the job to')
parser.add_argument('-ac', '--auto_file_cache', default=1, choices=[0, 1], type=int,
help = 'whether automatically cache the files in the command to hadoop localfile, this is on by default')
parser.add_argument('-f', '--files', default = [], action='append',
@@ -56,6 +59,11 @@ parser.add_argument('-mem', '--memory_mb', default=1024, type=int,
help = 'maximum memory used by the process. Guide: set it large (near mapred.cluster.max.map.memory.mb)'\
'if you are running multi-threading rabit,'\
'so that each node can occupy all the mapper slots in a machine for maximum performance')
parser.add_argument('--libhdfs-opts', default='-Xmx128m', type=str,
help = 'setting to be passed to libhdfs')
parser.add_argument('--name-node', default='default', type=str,
help = 'the namenode address of hdfs, libhdfs should connect to, normally leave it as default')
parser.add_argument('command', nargs='+',
help = 'command for rabit program')
args = parser.parse_args()
@@ -87,7 +95,7 @@ if hadoop_version < 2:
print 'Current Hadoop Version is %s, rabit_yarn will need Yarn(Hadoop 2.0)' % out[1]
def submit_yarn(nworker, worker_args, worker_env):
fset = set([YARN_JAR_PATH])
fset = set([YARN_JAR_PATH, YARN_BOOT_PY])
if args.auto_file_cache != 0:
for i in range(len(args.command)):
f = args.command[i]
@@ -96,7 +104,7 @@ def submit_yarn(nworker, worker_args, worker_env):
if i == 0:
args.command[i] = './' + args.command[i].split('/')[-1]
else:
args.command[i] = args.command[i].split('/')[-1]
args.command[i] = './' + args.command[i].split('/')[-1]
if args.command[0].endswith('.py'):
flst = [WRAPPER_PATH + '/rabit.py',
WRAPPER_PATH + '/librabit_wrapper.so',
@@ -112,6 +120,8 @@ def submit_yarn(nworker, worker_args, worker_env):
env['rabit_cpu_vcores'] = str(args.vcores)
env['rabit_memory_mb'] = str(args.memory_mb)
env['rabit_world_size'] = str(args.nworker)
env['rabit_hdfs_opts'] = str(args.libhdfs_opts)
env['rabit_hdfs_namenode'] = str(args.name_node)
if args.files != None:
for flst in args.files:
@@ -121,7 +131,8 @@ def submit_yarn(nworker, worker_args, worker_env):
cmd += ' -file %s' % f
cmd += ' -jobname %s ' % args.jobname
cmd += ' -tempdir %s ' % args.tempdir
cmd += (' '.join(args.command + worker_args))
cmd += ' -queue %s ' % args.queue
cmd += (' '.join(['./run_hdfs_prog.py'] + args.command + worker_args))
if args.verbose != 0:
print cmd
subprocess.check_call(cmd, shell = True, env = env)