fix more
This commit is contained in:
parent
7423837303
commit
0fd6197b8b
@ -12,7 +12,7 @@ hadoop fs -mkdir $2/data
|
|||||||
hadoop fs -put ../data/agaricus.txt.train $2/data
|
hadoop fs -put ../data/agaricus.txt.train $2/data
|
||||||
|
|
||||||
# submit to hadoop
|
# submit to hadoop
|
||||||
../../tracker/rabit_yarn.py -n $1 --vcores 1 ../../yarn/run_yarn_prog.py ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}"
|
../../tracker/rabit_yarn.py -n $1 --vcores 1 ./linear.rabit hdfs://$2/data/agaricus.txt.train model_out=hdfs://$2/mushroom.linear.model "${*:3}"
|
||||||
|
|
||||||
# get the final model file
|
# get the final model file
|
||||||
hadoop fs -get $2/mushroom.linear.model ./linear.model
|
hadoop fs -get $2/mushroom.linear.model ./linear.model
|
||||||
|
|||||||
@ -13,6 +13,7 @@ import rabit_tracker as tracker
|
|||||||
|
|
||||||
WRAPPER_PATH = os.path.dirname(__file__) + '/../wrapper'
|
WRAPPER_PATH = os.path.dirname(__file__) + '/../wrapper'
|
||||||
YARN_JAR_PATH = os.path.dirname(__file__) + '/../yarn/rabit-yarn.jar'
|
YARN_JAR_PATH = os.path.dirname(__file__) + '/../yarn/rabit-yarn.jar'
|
||||||
|
YARN_BOOT_PY = os.path.dirname(__file__) + '/../yarn/run_hdfs_prog.py'
|
||||||
|
|
||||||
if not os.path.exists(YARN_JAR_PATH):
|
if not os.path.exists(YARN_JAR_PATH):
|
||||||
warnings.warn("cannot find \"%s\", I will try to run build" % YARN_JAR_PATH)
|
warnings.warn("cannot find \"%s\", I will try to run build" % YARN_JAR_PATH)
|
||||||
@ -87,7 +88,7 @@ if hadoop_version < 2:
|
|||||||
print 'Current Hadoop Version is %s, rabit_yarn will need Yarn(Hadoop 2.0)' % out[1]
|
print 'Current Hadoop Version is %s, rabit_yarn will need Yarn(Hadoop 2.0)' % out[1]
|
||||||
|
|
||||||
def submit_yarn(nworker, worker_args, worker_env):
|
def submit_yarn(nworker, worker_args, worker_env):
|
||||||
fset = set([YARN_JAR_PATH])
|
fset = set([YARN_JAR_PATH, YARN_BOOT_PY])
|
||||||
if args.auto_file_cache != 0:
|
if args.auto_file_cache != 0:
|
||||||
for i in range(len(args.command)):
|
for i in range(len(args.command)):
|
||||||
f = args.command[i]
|
f = args.command[i]
|
||||||
@ -121,7 +122,7 @@ def submit_yarn(nworker, worker_args, worker_env):
|
|||||||
cmd += ' -file %s' % f
|
cmd += ' -file %s' % f
|
||||||
cmd += ' -jobname %s ' % args.jobname
|
cmd += ' -jobname %s ' % args.jobname
|
||||||
cmd += ' -tempdir %s ' % args.tempdir
|
cmd += ' -tempdir %s ' % args.tempdir
|
||||||
cmd += (' '.join(args.command + worker_args))
|
cmd += (' '.join(['./rabit_hdfs_prog.py'] + args.command + worker_args))
|
||||||
if args.verbose != 0:
|
if args.verbose != 0:
|
||||||
print cmd
|
print cmd
|
||||||
subprocess.check_call(cmd, shell = True, env = env)
|
subprocess.check_call(cmd, shell = True, env = env)
|
||||||
|
|||||||
@ -1,25 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
this script helps setup classpath env for HDFS
|
|
||||||
"""
|
|
||||||
import glob
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
if len(sys.argv) < 2:
|
|
||||||
print 'Usage: the program you want to run'
|
|
||||||
|
|
||||||
hadoop_home = os.getenv('HADOOP_HOME')
|
|
||||||
if hadoop_home is None:
|
|
||||||
hadoop_home = os.getenv('HADOOP_PREFIX')
|
|
||||||
assert hadoop_home is not None, 'need to set HADOOP_HOME'
|
|
||||||
|
|
||||||
(classpath, err) = subprocess.Popen('%s/bin/hadoop classpath' % hadoop_home, shell = True, stdout=subprocess.PIPE, env = os.environ).communicate()
|
|
||||||
cpath = []
|
|
||||||
for f in classpath.split(':'):
|
|
||||||
cpath += glob.glob(f)
|
|
||||||
|
|
||||||
env = os.environ.copy()
|
|
||||||
env['CLASSPATH'] = '${CLASSPATH}:' + (':'.join(cpath))
|
|
||||||
subprocess.check_call(' '.join(sys.argv[1:]), shell = True, env = env)
|
|
||||||
Loading…
x
Reference in New Issue
Block a user