diff --git a/multi-node/row-split/machine-row-map.sh b/multi-node/row-split/machine-row-map.sh new file mode 100755 index 000000000..a1c5bfe0c --- /dev/null +++ b/multi-node/row-split/machine-row-map.sh @@ -0,0 +1,20 @@ +#!/bin/bash +if [[ $# -ne 1 ]] +then + echo "Usage: nprocess" + exit -1 +fi + +rm -rf train-machine.row* *.model +k=$1 +# make machine data +cd ../../demo/regression/ +python mapfeat.py +python mknfold.py machine.txt 1 +cd - + +# split the lib svm file into k subfiles +python splitrows.py ../../demo/regression/machine.txt.train train-machine $k + +# run xgboost mpi, take data from stdin +../submit_job_tcp.py $k "bash map.sh train-machine.row ../../xgboost machine-row.conf dsplit=row num_round=3 data=stdin" diff --git a/multi-node/row-split/map.sh b/multi-node/row-split/map.sh new file mode 100644 index 000000000..624192121 --- /dev/null +++ b/multi-node/row-split/map.sh @@ -0,0 +1,3 @@ +# a simple script to simulate mapreduce mapper +echo "cat $1$OMPI_COMM_WORLD_RANK | ${@:2}" +cat $1$OMPI_COMM_WORLD_RANK | ${@:2}