From 0fd6197b8bad0eb587988d6459d466e301ae6979 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 13 Mar 2015 21:36:09 -0700 Subject: [PATCH] fix more --- rabit-learn/linear/run-yarn.sh | 2 +- tracker/rabit_yarn.py | 5 +++-- yarn/run_yarn_prog.py | 25 ------------------------- 3 files changed, 4 insertions(+), 28 deletions(-) delete mode 100755 yarn/run_yarn_prog.py diff --git a/rabit-learn/linear/run-yarn.sh b/rabit-learn/linear/run-yarn.sh index d7d5399a0..a9d65bcb2 100755 --- a/rabit-learn/linear/run-yarn.sh +++ b/rabit-learn/linear/run-yarn.sh @@ -12,7 +12,7 @@ hadoop fs -mkdir $2/data hadoop fs -put ../data/agaricus.txt.train $2/data # submit to hadoop -../../tracker/rabit_yarn.py -n $1 --vcores 1 ../../yarn/run_yarn_prog.py ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}" +../../tracker/rabit_yarn.py -n $1 --vcores 1 ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}" # get the final model file hadoop fs -get $2/mushroom.linear.model ./linear.model diff --git a/tracker/rabit_yarn.py b/tracker/rabit_yarn.py index 0c321f144..0a9a6c8e2 100755 --- a/tracker/rabit_yarn.py +++ b/tracker/rabit_yarn.py @@ -13,6 +13,7 @@ import rabit_tracker as tracker WRAPPER_PATH = os.path.dirname(__file__) + '/../wrapper' YARN_JAR_PATH = os.path.dirname(__file__) + '/../yarn/rabit-yarn.jar' +YARN_BOOT_PY = os.path.dirname(__file__) + '/../yarn/run_hdfs_prog.py' if not os.path.exists(YARN_JAR_PATH): warnings.warn("cannot find \"%s\", I will try to run build" % YARN_JAR_PATH) @@ -87,7 +88,7 @@ if hadoop_version < 2: print 'Current Hadoop Version is %s, rabit_yarn will need Yarn(Hadoop 2.0)' % out[1] def submit_yarn(nworker, worker_args, worker_env): - fset = set([YARN_JAR_PATH]) + fset = set([YARN_JAR_PATH, YARN_BOOT_PY]) if args.auto_file_cache != 0: for i in range(len(args.command)): f = args.command[i] @@ -121,7 +122,7 @@ def submit_yarn(nworker, worker_args, worker_env): cmd += ' -file %s' % f cmd += ' -jobname %s ' % args.jobname cmd += ' -tempdir %s ' % args.tempdir - cmd += (' '.join(args.command + worker_args)) + cmd += (' '.join(['./rabit_hdfs_prog.py'] + args.command + worker_args)) if args.verbose != 0: print cmd subprocess.check_call(cmd, shell = True, env = env) diff --git a/yarn/run_yarn_prog.py b/yarn/run_yarn_prog.py deleted file mode 100755 index 5456c04cd..000000000 --- a/yarn/run_yarn_prog.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python -""" -this script helps setup classpath env for HDFS -""" -import glob -import sys -import os -import subprocess - -if len(sys.argv) < 2: - print 'Usage: the program you want to run' - -hadoop_home = os.getenv('HADOOP_HOME') -if hadoop_home is None: - hadoop_home = os.getenv('HADOOP_PREFIX') -assert hadoop_home is not None, 'need to set HADOOP_HOME' - -(classpath, err) = subprocess.Popen('%s/bin/hadoop classpath' % hadoop_home, shell = True, stdout=subprocess.PIPE, env = os.environ).communicate() -cpath = [] -for f in classpath.split(':'): - cpath += glob.glob(f) - -env = os.environ.copy() -env['CLASSPATH'] = '${CLASSPATH}:' + (':'.join(cpath)) -subprocess.check_call(' '.join(sys.argv[1:]), shell = True, env = env)