This commit is contained in:
tqchen 2015-03-13 21:36:09 -07:00
parent 7423837303
commit 0fd6197b8b
3 changed files with 4 additions and 28 deletions

View File

@ -12,7 +12,7 @@ hadoop fs -mkdir $2/data
hadoop fs -put ../data/agaricus.txt.train $2/data
# submit to hadoop
../../tracker/rabit_yarn.py -n $1 --vcores 1 ../../yarn/run_yarn_prog.py ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}"
../../tracker/rabit_yarn.py -n $1 --vcores 1 ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}"
# get the final model file
hadoop fs -get $2/mushroom.linear.model ./linear.model

View File

@ -13,6 +13,7 @@ import rabit_tracker as tracker
WRAPPER_PATH = os.path.dirname(__file__) + '/../wrapper'
YARN_JAR_PATH = os.path.dirname(__file__) + '/../yarn/rabit-yarn.jar'
YARN_BOOT_PY = os.path.dirname(__file__) + '/../yarn/run_hdfs_prog.py'
if not os.path.exists(YARN_JAR_PATH):
warnings.warn("cannot find \"%s\", I will try to run build" % YARN_JAR_PATH)
@ -87,7 +88,7 @@ if hadoop_version < 2:
print 'Current Hadoop Version is %s, rabit_yarn will need Yarn(Hadoop 2.0)' % out[1]
def submit_yarn(nworker, worker_args, worker_env):
fset = set([YARN_JAR_PATH])
fset = set([YARN_JAR_PATH, YARN_BOOT_PY])
if args.auto_file_cache != 0:
for i in range(len(args.command)):
f = args.command[i]
@ -121,7 +122,7 @@ def submit_yarn(nworker, worker_args, worker_env):
cmd += ' -file %s' % f
cmd += ' -jobname %s ' % args.jobname
cmd += ' -tempdir %s ' % args.tempdir
cmd += (' '.join(args.command + worker_args))
cmd += (' '.join(['./rabit_hdfs_prog.py'] + args.command + worker_args))
if args.verbose != 0:
print cmd
subprocess.check_call(cmd, shell = True, env = env)

View File

@ -1,25 +0,0 @@
#!/usr/bin/env python
"""
this script helps setup classpath env for HDFS
"""
import glob
import sys
import os
import subprocess
if len(sys.argv) < 2:
print 'Usage: the program you want to run'
hadoop_home = os.getenv('HADOOP_HOME')
if hadoop_home is None:
hadoop_home = os.getenv('HADOOP_PREFIX')
assert hadoop_home is not None, 'need to set HADOOP_HOME'
(classpath, err) = subprocess.Popen('%s/bin/hadoop classpath' % hadoop_home, shell = True, stdout=subprocess.PIPE, env = os.environ).communicate()
cpath = []
for f in classpath.split(':'):
cpath += glob.glob(f)
env = os.environ.copy()
env['CLASSPATH'] = '${CLASSPATH}:' + (':'.join(cpath))
subprocess.check_call(' '.join(sys.argv[1:]), shell = True, env = env)