add sge
This commit is contained in:
parent
014c86603d
commit
994cb02a66
@ -1,4 +1,9 @@
|
|||||||
# specify tensor path
|
ifneq ("$(wildcard ../config.mk)","")
|
||||||
|
config = ../config.mk
|
||||||
|
else
|
||||||
|
config = ../make/config.mk
|
||||||
|
endif
|
||||||
|
|
||||||
BIN = linear.rabit
|
BIN = linear.rabit
|
||||||
MOCKBIN= linear.mock
|
MOCKBIN= linear.mock
|
||||||
MPIBIN =
|
MPIBIN =
|
||||||
@ -6,7 +11,6 @@ MPIBIN =
|
|||||||
OBJ = linear.o
|
OBJ = linear.o
|
||||||
|
|
||||||
# common build script for programs
|
# common build script for programs
|
||||||
include ../make/config.mk
|
|
||||||
include ../make/common.mk
|
include ../make/common.mk
|
||||||
CFLAGS+=-fopenmp
|
CFLAGS+=-fopenmp
|
||||||
linear.o: linear.cc ../../src/*.h linear.h ../solver/*.h
|
linear.o: linear.cc ../../src/*.h linear.h ../solver/*.h
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
#
|
#
|
||||||
# - copy this file to the root of rabit-learn folder
|
# - copy this file to the root of rabit-learn folder
|
||||||
# - modify the configuration you want
|
# - modify the configuration you want
|
||||||
# - type make or make -j n for parallel build
|
# - type make or make -j n on each of the folder
|
||||||
#----------------------------------------------------
|
#----------------------------------------------------
|
||||||
|
|
||||||
# choice of compiler
|
# choice of compiler
|
||||||
|
|||||||
@ -9,4 +9,4 @@ the example guidelines are in the script themselfs
|
|||||||
* Yarn (Hadoop): [rabit_yarn.py](rabit_yarn.py)
|
* Yarn (Hadoop): [rabit_yarn.py](rabit_yarn.py)
|
||||||
- It is also possible to submit via hadoop streaming with rabit_hadoop_streaming.py
|
- It is also possible to submit via hadoop streaming with rabit_hadoop_streaming.py
|
||||||
- However, it is higly recommended to use rabit_yarn.py because this will allocate resources more precisely and fits machine learning scenarios
|
- However, it is higly recommended to use rabit_yarn.py because this will allocate resources more precisely and fits machine learning scenarios
|
||||||
|
* Sun Grid engine: [rabit_sge.py](rabit_sge.py)
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
"""
|
"""
|
||||||
This is the demo submission script of rabit, it is created to
|
This is the demo submission script of rabit, it is created to
|
||||||
submit rabit jobs using hadoop streaming
|
submit rabit jobs using MPI
|
||||||
"""
|
"""
|
||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
|
|||||||
70
tracker/rabit_sge.py
Executable file
70
tracker/rabit_sge.py
Executable file
@ -0,0 +1,70 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
"""
|
||||||
|
This is the demo submission script of rabit, it is created to
|
||||||
|
submit rabit jobs to Sun Grid Engine
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import rabit_tracker as tracker
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Rabit script to submit rabit job using MPI')
|
||||||
|
parser.add_argument('-n', '--nworker', required=True, type=int,
|
||||||
|
help = 'number of worker proccess to be launched')
|
||||||
|
parser.add_argument('-q', '--queue', default='default', type=str,
|
||||||
|
help = 'the queue we want to submit the job to')
|
||||||
|
parser.add_argument('-hip', '--host_ip', default='auto', type=str,
|
||||||
|
help = 'host IP address if cannot be automatically guessed, specify the IP of submission machine')
|
||||||
|
parser.add_argument('--vcores', default = 1, type=int,
|
||||||
|
help = 'number of vcpores to request in each mapper, set it if each rabit job is multi-threaded')
|
||||||
|
parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker')
|
||||||
|
parser.add_argument('--logdir', default='auto', help = 'customize the directory to place the logs')
|
||||||
|
parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int,
|
||||||
|
help = 'print more messages into the console')
|
||||||
|
parser.add_argument('command', nargs='+',
|
||||||
|
help = 'command for rabit program')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.jobname == 'auto':
|
||||||
|
args.jobname = ('rabit%d.' % args.nworker) + args.command[0].split('/')[-1];
|
||||||
|
if args.logdir == 'auto':
|
||||||
|
args.logdir = args.jobname + '.log'
|
||||||
|
|
||||||
|
if os.path.exists(args.logdir):
|
||||||
|
if not os.path.isdir(args.logdir):
|
||||||
|
raise RuntimeError('specified logdir %s is a file instead of directory' % args.logdir)
|
||||||
|
else:
|
||||||
|
os.mkdir(args.logdir)
|
||||||
|
|
||||||
|
runscript = '%s/runrabit.sh' % args.logdir
|
||||||
|
fo = open(runscript, 'w')
|
||||||
|
fo.write('\"$@\"')
|
||||||
|
fo.close()
|
||||||
|
#
|
||||||
|
# submission script using MPI
|
||||||
|
#
|
||||||
|
def sge_submit(nslave, worker_args, worker_envs):
|
||||||
|
"""
|
||||||
|
customized submit script, that submit nslave jobs, each must contain args as parameter
|
||||||
|
note this can be a lambda function containing additional parameters in input
|
||||||
|
Parameters
|
||||||
|
nslave number of slave process to start up
|
||||||
|
args arguments to launch each job
|
||||||
|
this usually includes the parameters of master_uri and parameters passed into submit
|
||||||
|
"""
|
||||||
|
env_arg = ','.join(['%s=\"%s\"' % (k, str(v)) for k, v in worker_envs.items()])
|
||||||
|
cmd = 'qsub -cwd -t 1-%d -S /bin/bash' % nslave
|
||||||
|
if args.queue != 'default':
|
||||||
|
cmd += '-q %s' % args.queue
|
||||||
|
cmd += ' -N %s ' % args.jobname
|
||||||
|
cmd += ' -e %s -o %s' % (args.logdir, args.logdir)
|
||||||
|
cmd += ' -pe orte %d' % (args.vcores)
|
||||||
|
cmd += ' -v %s,PATH=${PATH}:.' % env_arg
|
||||||
|
cmd += ' %s %s' % (runscript, ' '.join(args.command + worker_args))
|
||||||
|
print cmd
|
||||||
|
subprocess.check_call(cmd, shell = True)
|
||||||
|
print 'Waiting for the jobs to get up...' % args.jobname
|
||||||
|
|
||||||
|
# call submit, with nslave, the commands to run each job and submit function
|
||||||
|
tracker.submit(args.nworker, [], fun_submit = sge_submit, verbose = args.verbose)
|
||||||
Loading…
x
Reference in New Issue
Block a user