From b55fe803503e494a05178705edab0e098330d2cf Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 23 Nov 2014 18:15:42 -0800 Subject: [PATCH] add row map example --- multi-node/row-split/machine-row-map.sh | 20 ++++++++++++++++++++ multi-node/row-split/map.sh | 3 +++ 2 files changed, 23 insertions(+) create mode 100755 multi-node/row-split/machine-row-map.sh create mode 100644 multi-node/row-split/map.sh diff --git a/multi-node/row-split/machine-row-map.sh b/multi-node/row-split/machine-row-map.sh new file mode 100755 index 000000000..a1c5bfe0c --- /dev/null +++ b/multi-node/row-split/machine-row-map.sh @@ -0,0 +1,20 @@ +#!/bin/bash +if [[ $# -ne 1 ]] +then + echo "Usage: nprocess" + exit -1 +fi + +rm -rf train-machine.row* *.model +k=$1 +# make machine data +cd ../../demo/regression/ +python mapfeat.py +python mknfold.py machine.txt 1 +cd - + +# split the lib svm file into k subfiles +python splitrows.py ../../demo/regression/machine.txt.train train-machine $k + +# run xgboost mpi, take data from stdin +../submit_job_tcp.py $k "bash map.sh train-machine.row ../../xgboost machine-row.conf dsplit=row num_round=3 data=stdin" diff --git a/multi-node/row-split/map.sh b/multi-node/row-split/map.sh new file mode 100644 index 000000000..624192121 --- /dev/null +++ b/multi-node/row-split/map.sh @@ -0,0 +1,3 @@ +# a simple script to simulate mapreduce mapper +echo "cat $1$OMPI_COMM_WORLD_RANK | ${@:2}" +cat $1$OMPI_COMM_WORLD_RANK | ${@:2}