move stream to rabit part, support rabit on yarn

This commit is contained in:
tqchen
2015-03-09 14:43:46 -07:00
parent 9f7c6fe271
commit a8d5af39fd
14 changed files with 134 additions and 500 deletions

View File

@@ -25,10 +25,10 @@ save_period = 0
# eval[test] = "agaricus.txt.test"
# Plz donot modify the following parameters
# The path of training data
data = stdin
# The path of training data, with prefix hdfs
#data = hdfs:/data/
# The path of model file
model_out = stdout
#model_out =
# split pattern of xgboost
dsplit = row
# evaluate on training data as well each round

View File

@@ -8,11 +8,16 @@ fi
# put the local training file to HDFS
hadoop fs -mkdir $3/data
hadoop fs -put ../../demo/data/agaricus.txt.train $3/data
hadoop fs -put ../../demo/data/agaricus.txt.test $3/data
../../subtree/rabit/tracker/rabit_hadoop.py -n $1 -nt $2 -i $3/data/agaricus.txt.train -o $3/mushroom.final.model ../../xgboost mushroom.hadoop.conf nthread=$2
# running rabit, pass address in hdfs
../../subtree/rabit/tracker/rabit_yarn.py -n $1 --vcores $2 ../../xgboost mushroom.hadoop.conf nthread=$2\
data=hdfs://$3/data/agaricus.txt.train\
eval[test]=hdfs://$3/data/agaricus.txt.test\
model_out=hdfs://$3/mushroom.final.model
# get the final model file
hadoop fs -get $3/mushroom.final.model/part-00000 ./final.model
hadoop fs -get $3/mushroom.final.model final.model
# output prediction task=pred
../../xgboost mushroom.hadoop.conf task=pred model_in=final.model test:data=../../demo/data/agaricus.txt.test