28ca7be add linear readme ca4b20f add linear readme 1133628 add linear readme 6a11676 update docs a607047 Update build.sh 2c1cfd8 complete yarn 4f28e32 change formater 2fbda81 fix stdin input 3258bcf checkin yarn master 67ebf81 allow setup from env variables 9b6bf57 fix hdfs 395d5c2 add make system 88ce767 refactor io, initial hdfs file access need test 19be870 chgs a1bd3c6 Merge branch 'master' of ssh://github.com/tqchen/rabit 1a573f9 introduce input split 29476f1 fix timer issue git-subtree-dir: subtree/rabit git-subtree-split: 28ca7becbdf6503e6b1398588a969efb164c9701
98 lines
3.2 KiB
Python
Executable File
98 lines
3.2 KiB
Python
Executable File
#!/usr/bin/python
|
|
"""
|
|
This is the demo submission script of rabit, it is created to
|
|
submit rabit jobs using hadoop streaming
|
|
"""
|
|
import argparse
|
|
import sys
|
|
import os
|
|
import subprocess
|
|
from threading import Thread
|
|
import rabit_tracker as tracker
|
|
if os.name == 'nt':
|
|
WRAPPER_PATH = os.path.dirname(__file__) + '\\..\\wrapper'
|
|
else:
|
|
WRAPPER_PATH = os.path.dirname(__file__) + '/../wrapper'
|
|
|
|
parser = argparse.ArgumentParser(description='Rabit script to submit rabit job locally using python subprocess')
|
|
parser.add_argument('-n', '--nworker', required=True, type=int,
|
|
help = 'number of worker proccess to be launched')
|
|
parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int,
|
|
help = 'print more messages into the console')
|
|
parser.add_argument('command', nargs='+',
|
|
help = 'command for rabit program')
|
|
args = parser.parse_args()
|
|
|
|
# bash script for keepalive
|
|
# use it so that python do not need to communicate with subprocess
|
|
echo="echo %s rabit_num_trial=$nrep;"
|
|
keepalive = """
|
|
nrep=0
|
|
rc=254
|
|
while [ $rc -eq 254 ];
|
|
do
|
|
export rabit_num_trial=$nrep
|
|
%s
|
|
%s
|
|
rc=$?;
|
|
nrep=$((nrep+1));
|
|
done
|
|
"""
|
|
|
|
def exec_cmd(cmd, taskid, worker_env):
|
|
if cmd[0].find('/') == -1 and os.path.exists(cmd[0]) and os.name != 'nt':
|
|
cmd[0] = './' + cmd[0]
|
|
cmd = ' '.join(cmd)
|
|
env = {}
|
|
for k, v in worker_env.items():
|
|
env[k] = str(v)
|
|
env['rabit_task_id'] = str(taskid)
|
|
env['PYTHONPATH'] = WRAPPER_PATH
|
|
|
|
ntrial = 0
|
|
while True:
|
|
if os.name == 'nt':
|
|
env['rabit_num_trial'] = str(ntrial)
|
|
ret = subprocess.call(cmd, shell=True, env = env)
|
|
if ret == 254:
|
|
ntrial += 1
|
|
continue
|
|
else:
|
|
if args.verbose != 0:
|
|
bash = keepalive % (echo % cmd, cmd)
|
|
else:
|
|
bash = keepalive % ('', cmd)
|
|
ret = subprocess.call(bash, shell=True, executable='bash', env = env)
|
|
if ret == 0:
|
|
if args.verbose != 0:
|
|
print 'Thread %d exit with 0' % taskid
|
|
return
|
|
else:
|
|
if os.name == 'nt':
|
|
os.exit(-1)
|
|
else:
|
|
raise Exception('Get nonzero return code=%d' % ret)
|
|
#
|
|
# Note: this submit script is only used for demo purpose
|
|
# submission script using pyhton multi-threading
|
|
#
|
|
def mthread_submit(nslave, worker_args, worker_envs):
|
|
"""
|
|
customized submit script, that submit nslave jobs, each must contain args as parameter
|
|
note this can be a lambda function containing additional parameters in input
|
|
Parameters
|
|
nslave number of slave process to start up
|
|
args arguments to launch each job
|
|
this usually includes the parameters of master_uri and parameters passed into submit
|
|
"""
|
|
procs = {}
|
|
for i in range(nslave):
|
|
procs[i] = Thread(target = exec_cmd, args = (args.command + worker_args, i, worker_envs))
|
|
procs[i].daemon = True
|
|
procs[i].start()
|
|
for i in range(nslave):
|
|
procs[i].join()
|
|
|
|
# call submit, with nslave, the commands to run each job and submit function
|
|
tracker.submit(args.nworker, [], fun_submit = mthread_submit, verbose = args.verbose)
|