fix more

2015-03-13 21:36:09 -07:00
parent 7423837303
commit 0fd6197b8b
3 changed files with 4 additions and 28 deletions
--- a/rabit-learn/linear/run-yarn.sh
+++ b/rabit-learn/linear/run-yarn.sh
@@ -12,7 +12,7 @@ hadoop fs -mkdir $2/data
 hadoop fs -put ../data/agaricus.txt.train $2/data

 # submit to hadoop
-../../tracker/rabit_yarn.py  -n $1 --vcores 1 ../../yarn/run_yarn_prog.py ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}" 
+../../tracker/rabit_yarn.py  -n $1 --vcores 1  ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}" 

 # get the final model file
 hadoop fs -get $2/mushroom.linear.model ./linear.model
--- a/tracker/rabit_yarn.py
+++ b/tracker/rabit_yarn.py
@@ -13,6 +13,7 @@ import rabit_tracker as tracker

 WRAPPER_PATH = os.path.dirname(__file__) + '/../wrapper'
 YARN_JAR_PATH = os.path.dirname(__file__) + '/../yarn/rabit-yarn.jar'
+YARN_BOOT_PY = os.path.dirname(__file__) + '/../yarn/run_hdfs_prog.py'

 if not os.path.exists(YARN_JAR_PATH):
    warnings.warn("cannot find \"%s\", I will try to run build" % YARN_JAR_PATH)
@@ -87,7 +88,7 @@ if hadoop_version < 2:
    print 'Current Hadoop Version is %s, rabit_yarn will need Yarn(Hadoop 2.0)' % out[1]

 def submit_yarn(nworker, worker_args, worker_env):
-    fset = set([YARN_JAR_PATH]) 
+    fset = set([YARN_JAR_PATH, YARN_BOOT_PY]) 
    if args.auto_file_cache != 0:
        for i in range(len(args.command)):
            f = args.command[i]
@@ -121,7 +122,7 @@ def submit_yarn(nworker, worker_args, worker_env):
        cmd += ' -file %s' % f
    cmd += ' -jobname %s ' % args.jobname
    cmd += ' -tempdir %s ' % args.tempdir
-    cmd += (' '.join(args.command + worker_args))    
+    cmd += (' '.join(['./rabit_hdfs_prog.py'] + args.command + worker_args))
    if args.verbose != 0:
        print cmd
    subprocess.check_call(cmd, shell = True, env = env)
--- a/yarn/run_yarn_prog.py
+++ b/yarn/run_yarn_prog.py
@@ -1,25 +0,0 @@
-#!/usr/bin/env python
-"""
-this script helps setup classpath env for HDFS
-"""
-import glob
-import sys
-import os
-import subprocess
-
-if len(sys.argv) < 2:
-    print 'Usage: the program you want to run'
-
-hadoop_home = os.getenv('HADOOP_HOME')
-if hadoop_home is None:
-    hadoop_home = os.getenv('HADOOP_PREFIX')
-assert hadoop_home is not None, 'need to set HADOOP_HOME'
-
-(classpath, err) = subprocess.Popen('%s/bin/hadoop classpath' % hadoop_home, shell = True, stdout=subprocess.PIPE, env = os.environ).communicate()
-cpath = []
-for f in classpath.split(':'):
-    cpath += glob.glob(f)
-
-env = os.environ.copy()
-env['CLASSPATH'] = '${CLASSPATH}:' + (':'.join(cpath))
-subprocess.check_call(' '.join(sys.argv[1:]), shell = True, env = env)