From 24f99220cbd7955f51e047e2feff030d5e7423a3 Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sat, 10 Jan 2015 23:59:25 +0800 Subject: [PATCH] fix bugs --- .../hadoop/run_binary_classification.sh | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100755 multi-node/hadoop/run_binary_classification.sh diff --git a/multi-node/hadoop/run_binary_classification.sh b/multi-node/hadoop/run_binary_classification.sh new file mode 100755 index 000000000..740a468cf --- /dev/null +++ b/multi-node/hadoop/run_binary_classification.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +if [ "$#" -lt 2 ]; +then + echo "Usage: " + exit -1 +fi + +curDir=`pwd` +dataDir=../../demo/binary_classification +trainFile=$dataDir/agaricus.txt.train +input=$2 +output=$2/model + +# generate the training file if it doesnot exist +if [ ! -f "$trainFile" ]; +then + echo "Generating training file:" + cd $dataDir + # map feature using indicator encoding, also produce featmap.txt + python mapfeat.py + # split train and test + python mknfold.py agaricus.txt 1 + cd $curDir +fi + +hadoop fs -mkdir $input +hadoop fs -put $trainFile $input +#hadoop fs -rm -skipTrash -r $output + +# training and output the final model file +python ../../rabit/tracker/rabit_hadoop.py -n $1 -i $input/agaricus.txt.train -o $output -f $dataDir/mushroom.hadoop.conf \ + --jobname xgboost_hadoop ../../xgboost mushroom.hadoop.conf data=stdin model_out=stdout + +# get the final model file +hadoop fs -get $output/part-00000 ./final.model +# output prediction task=pred +../../xgboost $dataDir/mushroom.hadoop.conf task=pred model_in=final.model +# print the boosters of 00002.model in dump.raw.txt +../../xgboost $dataDir/mushroom.hadoop.conf task=dump model_in=final.model name_dump=dump.raw.txt +# use the feature map in printing for better visualization +../../xgboost $dataDir/mushroom.hadoop.conf task=dump model_in=final.model fmap=$dataDir/featmap.txt name_dump=dump.nice.txt +cat dump.nice.txt