This commit is contained in:
tqchen 2015-03-10 15:26:40 -07:00
parent 014c86603d
commit 994cb02a66
5 changed files with 79 additions and 5 deletions

View File

@ -1,4 +1,9 @@
# specify tensor path
ifneq ("$(wildcard ../config.mk)","")
config = ../config.mk
else
config = ../make/config.mk
endif
BIN = linear.rabit
MOCKBIN= linear.mock
MPIBIN =
@ -6,7 +11,6 @@ MPIBIN =
OBJ = linear.o
# common build script for programs
include ../make/config.mk
include ../make/common.mk
CFLAGS+=-fopenmp
linear.o: linear.cc ../../src/*.h linear.h ../solver/*.h

View File

@ -6,7 +6,7 @@
#
# - copy this file to the root of rabit-learn folder
# - modify the configuration you want
# - type make or make -j n for parallel build
# - type make or make -j n on each of the folder
#----------------------------------------------------
# choice of compiler

View File

@ -9,4 +9,4 @@ the example guidelines are in the script themselfs
* Yarn (Hadoop): [rabit_yarn.py](rabit_yarn.py)
- It is also possible to submit via hadoop streaming with rabit_hadoop_streaming.py
- However, it is higly recommended to use rabit_yarn.py because this will allocate resources more precisely and fits machine learning scenarios
* Sun Grid engine: [rabit_sge.py](rabit_sge.py)

View File

@ -1,7 +1,7 @@
#!/usr/bin/python
"""
This is the demo submission script of rabit, it is created to
submit rabit jobs using hadoop streaming
submit rabit jobs using MPI
"""
import argparse
import sys

70
tracker/rabit_sge.py Executable file
View File

@ -0,0 +1,70 @@
#!/usr/bin/python
"""
This is the demo submission script of rabit, it is created to
submit rabit jobs to Sun Grid Engine
"""
import argparse
import sys
import os
import subprocess
import rabit_tracker as tracker
parser = argparse.ArgumentParser(description='Rabit script to submit rabit job using MPI')
parser.add_argument('-n', '--nworker', required=True, type=int,
help = 'number of worker proccess to be launched')
parser.add_argument('-q', '--queue', default='default', type=str,
help = 'the queue we want to submit the job to')
parser.add_argument('-hip', '--host_ip', default='auto', type=str,
help = 'host IP address if cannot be automatically guessed, specify the IP of submission machine')
parser.add_argument('--vcores', default = 1, type=int,
help = 'number of vcpores to request in each mapper, set it if each rabit job is multi-threaded')
parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker')
parser.add_argument('--logdir', default='auto', help = 'customize the directory to place the logs')
parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int,
help = 'print more messages into the console')
parser.add_argument('command', nargs='+',
help = 'command for rabit program')
args = parser.parse_args()
if args.jobname == 'auto':
args.jobname = ('rabit%d.' % args.nworker) + args.command[0].split('/')[-1];
if args.logdir == 'auto':
args.logdir = args.jobname + '.log'
if os.path.exists(args.logdir):
if not os.path.isdir(args.logdir):
raise RuntimeError('specified logdir %s is a file instead of directory' % args.logdir)
else:
os.mkdir(args.logdir)
runscript = '%s/runrabit.sh' % args.logdir
fo = open(runscript, 'w')
fo.write('\"$@\"')
fo.close()
#
# submission script using MPI
#
def sge_submit(nslave, worker_args, worker_envs):
"""
customized submit script, that submit nslave jobs, each must contain args as parameter
note this can be a lambda function containing additional parameters in input
Parameters
nslave number of slave process to start up
args arguments to launch each job
this usually includes the parameters of master_uri and parameters passed into submit
"""
env_arg = ','.join(['%s=\"%s\"' % (k, str(v)) for k, v in worker_envs.items()])
cmd = 'qsub -cwd -t 1-%d -S /bin/bash' % nslave
if args.queue != 'default':
cmd += '-q %s' % args.queue
cmd += ' -N %s ' % args.jobname
cmd += ' -e %s -o %s' % (args.logdir, args.logdir)
cmd += ' -pe orte %d' % (args.vcores)
cmd += ' -v %s,PATH=${PATH}:.' % env_arg
cmd += ' %s %s' % (runscript, ' '.join(args.command + worker_args))
print cmd
subprocess.check_call(cmd, shell = True)
print 'Waiting for the jobs to get up...' % args.jobname
# call submit, with nslave, the commands to run each job and submit function
tracker.submit(args.nworker, [], fun_submit = sge_submit, verbose = args.verbose)