add auto caching of python in hadoop script, mock test module to python, with checkpt

This commit is contained in:
tqchen
2015-01-13 14:29:10 -08:00
parent 877fc42e40
commit 3419cf9aa7
10 changed files with 104 additions and 144 deletions

View File

@@ -9,6 +9,7 @@ import os
import subprocess
from threading import Thread
import rabit_tracker as tracker
WRAPPER_PATH = os.path.dirname(__file__) + '/../wrapper'
parser = argparse.ArgumentParser(description='Rabit script to submit rabit job locally using python subprocess')
parser.add_argument('-n', '--nworker', required=True, type=int,
@@ -25,8 +26,9 @@ def exec_cmd(cmd, taskid):
cmd = ' '.join(cmd)
ntrial = 0
while True:
prep = 'PYTHONPATH=\"%s\" ' % WRAPPER_PATH
arg = ' rabit_task_id=%d rabit_num_trial=%d' % (taskid, ntrial)
ret = subprocess.call(cmd + arg, shell = True)
ret = subprocess.call(prep + cmd + arg, shell = True)
if ret == 254 or ret == -2:
ntrial += 1
continue

View File

@@ -11,6 +11,7 @@ import subprocess
import warnings
import rabit_tracker as tracker
WRAPPER_PATH = os.path.dirname(__file__) + '/../wrapper'
#!!! Set path to hadoop and hadoop streaming jar here
hadoop_binary = 'hadoop'
@@ -102,6 +103,13 @@ def hadoop_streaming(nworker, worker_args, use_yarn):
args.command[i] = './' + args.command[i].split('/')[-1]
else:
args.command[i] = args.command[i].split('/')[-1]
if args.commands[0].endswith('.py'):
flst = [WRAPPER_PATH + '/rabit.py',
WRAPPER_PATH + '/librabit_wrapper.so',
WRAPPER_PATH + '/librabit_wrapper_mock.so']
for f in flst:
if os.path.exists(f):
fset.add(f)
kmap = {}
# setup keymaps
if use_yarn: