Merge pull request #137 from cblsjtu/unity
Unity hadoop version scripts
This commit is contained in:
commit
c38f7109bd
@ -20,12 +20,11 @@ num_round = 2
|
|||||||
# 0 means do not save any model except the final round model
|
# 0 means do not save any model except the final round model
|
||||||
save_period = 0
|
save_period = 0
|
||||||
# The path of training data
|
# The path of training data
|
||||||
data = "agaricus.txt.train"
|
data = stdin
|
||||||
|
# The path of model file
|
||||||
|
model_out = stdout
|
||||||
|
|
||||||
# The following parameters are not supported by xgboost running in hadoop yet!
|
|
||||||
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
|
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
|
||||||
#eval[test] = "agaricus.txt.test"
|
eval[test] = "agaricus.txt.test"
|
||||||
# evaluate on training data as well each round
|
# evaluate on training data as well each round
|
||||||
#eval_train = 1
|
eval_train = 1
|
||||||
# The path of test data
|
|
||||||
#test:data = "agaricus.txt.test"
|
|
||||||
@ -1,43 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
if [ "$#" -lt 2 ];
|
|
||||||
then
|
|
||||||
echo "Usage: <nworkers> <path_in_HDFS>"
|
|
||||||
exit -1
|
|
||||||
fi
|
|
||||||
|
|
||||||
curDir=`pwd`
|
|
||||||
dataDir=../../demo/binary_classification
|
|
||||||
trainFile=$dataDir/agaricus.txt.train
|
|
||||||
input=$2
|
|
||||||
output=$2/model
|
|
||||||
|
|
||||||
# generate the training file if it doesnot exist
|
|
||||||
if [ ! -f "$trainFile" ];
|
|
||||||
then
|
|
||||||
echo "Generating training file:"
|
|
||||||
cd $dataDir
|
|
||||||
# map feature using indicator encoding, also produce featmap.txt
|
|
||||||
python mapfeat.py
|
|
||||||
# split train and test
|
|
||||||
python mknfold.py agaricus.txt 1
|
|
||||||
cd $curDir
|
|
||||||
fi
|
|
||||||
|
|
||||||
hadoop fs -mkdir $input
|
|
||||||
hadoop fs -put $trainFile $input
|
|
||||||
#hadoop fs -rm -skipTrash -r $output
|
|
||||||
|
|
||||||
# training and output the final model file
|
|
||||||
python ../../rabit/tracker/rabit_hadoop.py -n $1 -i $input/agaricus.txt.train -o $output -f $dataDir/mushroom.hadoop.conf \
|
|
||||||
--jobname xgboost_hadoop ../../xgboost mushroom.hadoop.conf data=stdin model_out=stdout
|
|
||||||
|
|
||||||
# get the final model file
|
|
||||||
hadoop fs -get $output/part-00000 ./final.model
|
|
||||||
# output prediction task=pred
|
|
||||||
../../xgboost $dataDir/mushroom.hadoop.conf task=pred model_in=final.model
|
|
||||||
# print the boosters of 00002.model in dump.raw.txt
|
|
||||||
../../xgboost $dataDir/mushroom.hadoop.conf task=dump model_in=final.model name_dump=dump.raw.txt
|
|
||||||
# use the feature map in printing for better visualization
|
|
||||||
../../xgboost $dataDir/mushroom.hadoop.conf task=dump model_in=final.model fmap=$dataDir/featmap.txt name_dump=dump.nice.txt
|
|
||||||
cat dump.nice.txt
|
|
||||||
29
multi-node/hadoop/run_hadoop_mushroom.sh
Executable file
29
multi-node/hadoop/run_hadoop_mushroom.sh
Executable file
@ -0,0 +1,29 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [ "$#" -lt 2 ];
|
||||||
|
then
|
||||||
|
echo "Usage: <num_of_slave_nodes> <path_in_HDFS>"
|
||||||
|
exit -1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# put the local training file to HDFS
|
||||||
|
hadoop fs -mkdir $2/data
|
||||||
|
hadoop fs -put ../../demo/data/agaricus.txt.train $2/data
|
||||||
|
|
||||||
|
# training and output the final model file
|
||||||
|
../../rabit/tracker/rabit_hadoop.py -n $1 -i $2/data/agaricus.txt.train \
|
||||||
|
-o $2/model -f ../../demo/data/agaricus.txt.test \
|
||||||
|
../../xgboost mushroom.hadoop.conf dsplit=row
|
||||||
|
|
||||||
|
# get the final model file
|
||||||
|
hadoop fs -get $2/model/part-00000 ./final.model
|
||||||
|
|
||||||
|
# output prediction task=pred
|
||||||
|
../../xgboost mushroom.hadoop.conf task=pred model_in=final.model \
|
||||||
|
test:data=../../demo/data/agaricus.txt.test
|
||||||
|
# print the boosters of final.model in dump.raw.txt
|
||||||
|
../../xgboost mushroom.hadoop.conf task=dump model_in=final.model name_dump=dump.raw.txt
|
||||||
|
# use the feature map in printing for better visualization
|
||||||
|
../../xgboost mushroom.hadoop.conf task=dump model_in=final.model \
|
||||||
|
fmap=../../demo/data/featmap.txt name_dump=dump.nice.txt
|
||||||
|
cat dump.nice.txt
|
||||||
Loading…
x
Reference in New Issue
Block a user