fix more

2015-03-13 21:36:09 -07:00 · 2015-03-13 21:36:09 -07:00 · 0fd6197b8b
commit 0fd6197b8b
parent 7423837303
3 changed files with 4 additions and 28 deletions
--- a/rabit-learn/linear/run-yarn.sh
+++ b/rabit-learn/linear/run-yarn.sh
@ -12,7 +12,7 @@ hadoop fs -mkdir $2/data
 hadoop fs -put ../data/agaricus.txt.train $2/data
 # submit to hadoop
-../../tracker/rabit_yarn.py  -n $1 --vcores 1 ../../yarn/run_yarn_prog.py ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}" 
+../../tracker/rabit_yarn.py  -n $1 --vcores 1  ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}" 
 # get the final model file
 hadoop fs -get $2/mushroom.linear.model ./linear.model
--- a/tracker/rabit_yarn.py
+++ b/tracker/rabit_yarn.py
@ -13,6 +13,7 @@ import rabit_tracker as tracker
 WRAPPER_PATH = os.path.dirname(__file__) + '/../wrapper'
 YARN_JAR_PATH = os.path.dirname(__file__) + '/../yarn/rabit-yarn.jar'
 YARN_BOOT_PY = os.path.dirname(__file__) + '/../yarn/run_hdfs_prog.py'
 if not os.path.exists(YARN_JAR_PATH):
    warnings.warn("cannot find \"%s\", I will try to run build" % YARN_JAR_PATH)
@ -87,7 +88,7 @@ if hadoop_version < 2:
    print 'Current Hadoop Version is %s, rabit_yarn will need Yarn(Hadoop 2.0)' % out[1]
 def submit_yarn(nworker, worker_args, worker_env):
-    fset = set([YARN_JAR_PATH]) 
+    fset = set([YARN_JAR_PATH, YARN_BOOT_PY]) 
    if args.auto_file_cache != 0:
        for i in range(len(args.command)):
            f = args.command[i]
@ -121,7 +122,7 @@ def submit_yarn(nworker, worker_args, worker_env):
        cmd += ' -file %s' % f
    cmd += ' -jobname %s ' % args.jobname
    cmd += ' -tempdir %s ' % args.tempdir
-    cmd += (' '.join(args.command + worker_args))    
+    cmd += (' '.join(['./rabit_hdfs_prog.py'] + args.command + worker_args))
    if args.verbose != 0:
        print cmd
    subprocess.check_call(cmd, shell = True, env = env)
--- a/yarn/run_yarn_prog.py
+++ b/yarn/run_yarn_prog.py
@ -1,25 +0,0 @@
 #!/usr/bin/env python
 """
 this script helps setup classpath env for HDFS
 """
 import glob
 import sys
 import os
 import subprocess
 if len(sys.argv) < 2:
    print 'Usage: the program you want to run'
 hadoop_home = os.getenv('HADOOP_HOME')
 if hadoop_home is None:
    hadoop_home = os.getenv('HADOOP_PREFIX')
 assert hadoop_home is not None, 'need to set HADOOP_HOME'
 (classpath, err) = subprocess.Popen('%s/bin/hadoop classpath' % hadoop_home, shell = True, stdout=subprocess.PIPE, env = os.environ).communicate()
 cpath = []
 for f in classpath.split(':'):
    cpath += glob.glob(f)
 env = os.environ.copy()
 env['CLASSPATH'] = '${CLASSPATH}:' + (':'.join(cpath))
 subprocess.check_call(' '.join(sys.argv[1:]), shell = True, env = env)