add note for col

2014-11-19 11:37:54 -08:00 · 2014-11-19 11:37:54 -08:00 · da54f5e5d8
commit da54f5e5d8
parent 03e24cf590
3 changed files with 35 additions and 2 deletions
--- a/multi-node/col-split/README.md
+++ b/multi-node/col-split/README.md
@ -1,2 +1,16 @@
 Column Split Version of XGBoost
 ====
 * run ```bash run-mushroom.sh```
 Steps to use column split version
 ====
 * First split the data by column, 
 * In the config, specify data file as containing a wildcard %d, where %d is the rank of the node, each node will load their part of data
 * Enable column split mode by ```dsplit=col```
 Note on the Column Split Version
 ====
 * The code is multi-threaded, so you want to run one xgboost-mpi per node
 * The code will work correctly as long as union of each column subset is all the columns we are interested in.
  - The column subset can overlap with each other.
 * It uses exactly the same algorithm as single node version, to examine all potential split points.
--- a/multi-node/col-split/run-mushroom.sh
+++ b/multi-node/col-split/run-mushroom.sh
@ -0,0 +1,19 @@
 #!/bin/bash
 if [[ $# -ne 1 ]]
 then
    echo "Usage: nprocess"
    exit -1
 fi
 rm -rf train.col*
 k=$1
 # split the lib svm file into k subfiles
 python splitsvm.py ../../demo/data/agaricus.txt.train train $k
 # run xgboost mpi
 mpirun -n $k ../../xgboost-mpi  mushroom-col.conf updater=distcol silent=0
 # the model can be directly loaded by single machine xgboost solver, as usuall
 ../../xgboost mushroom-col.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
 cat dump.nice.$k.txt
--- a/multi-node/col-split/runexp-mpi.sh
+++ b/multi-node/col-split/runexp-mpi.sh
@ -12,8 +12,8 @@ k=$1
 python splitsvm.py ../../demo/data/agaricus.txt.train train $k
 # run xgboost mpi
-mpirun -n $k ../../xgboost-mpi  mushroom-col.conf dsplit=col
+mpirun -n $k ../../xgboost-mpi mushroom-col.conf dsplit=col
 # the model can be directly loaded by single machine xgboost solver, as usuall
-../../xgboost mpi.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
+../../xgboost mushroom-col.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
 cat dump.nice.$k.txt