ok
This commit is contained in:
parent
5f08313cb2
commit
373620503a
@ -1,6 +1,7 @@
|
|||||||
Distributed XGBoost: Column Split Version
|
Distributed XGBoost: Column Split Version
|
||||||
====
|
====
|
||||||
* run ```bash mushroom-col.sh <n-mpi-process>```
|
* run ```bash mushroom-col.sh <n-mpi-process>```
|
||||||
|
- mushroom-col.sh starts xgboost-mpi job
|
||||||
* run ```bash mushroom-col-tcp.sh <n-process>```
|
* run ```bash mushroom-col-tcp.sh <n-process>```
|
||||||
- mushroom-col-tcp.sh starts xgboost job using xgboost's buildin allreduce
|
- mushroom-col-tcp.sh starts xgboost job using xgboost's buildin allreduce
|
||||||
* run ```bash mushroom-col-python.sh <n-process>```
|
* run ```bash mushroom-col-python.sh <n-process>```
|
||||||
|
|||||||
28
multi-node/col-split/mushroom-col-tcp.sh
Executable file
28
multi-node/col-split/mushroom-col-tcp.sh
Executable file
@ -0,0 +1,28 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
if [[ $# -ne 1 ]]
|
||||||
|
then
|
||||||
|
echo "Usage: nprocess"
|
||||||
|
exit -1
|
||||||
|
fi
|
||||||
|
|
||||||
|
#
|
||||||
|
# This script is same as mushroom-col except that we will be using xgboost instead of xgboost-mpi
|
||||||
|
# xgboost used built in tcp-based allreduce module, and can be run on more enviroment, so long as we know how to start job by modifying ../submit_job_tcp.py
|
||||||
|
#
|
||||||
|
rm -rf train.col* *.model
|
||||||
|
k=$1
|
||||||
|
|
||||||
|
# split the lib svm file into k subfiles
|
||||||
|
python splitsvm.py ../../demo/data/agaricus.txt.train train $k
|
||||||
|
|
||||||
|
# run xgboost mpi
|
||||||
|
../submit_job_tcp.py $k ../../xgboost mushroom-col.conf dsplit=col
|
||||||
|
|
||||||
|
# the model can be directly loaded by single machine xgboost solver, as usuall
|
||||||
|
../../xgboost mushroom-col.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
|
||||||
|
|
||||||
|
# run for one round, and continue training
|
||||||
|
../submit_job_tcp.py $k ../../xgboost mushroom-col.conf dsplit=col num_round=1
|
||||||
|
../submit_job_tcp.py $k ../../xgboost mushroom-col.conf dsplit=col model_in=0001.model
|
||||||
|
|
||||||
|
cat dump.nice.$k.txt
|
||||||
@ -3,6 +3,8 @@ Distributed XGBoost: Row Split Version
|
|||||||
* Mushroom: run ```bash mushroom-row.sh <n-mpi-process>```
|
* Mushroom: run ```bash mushroom-row.sh <n-mpi-process>```
|
||||||
* Machine: run ```bash machine-row.sh <n-mpi-process>```
|
* Machine: run ```bash machine-row.sh <n-mpi-process>```
|
||||||
- Machine case also include example to continue training from existing model
|
- Machine case also include example to continue training from existing model
|
||||||
|
* Machine TCP: run ```bash machine-row-tcp.sh <n-mpi-process>```
|
||||||
|
- machine-col-tcp.sh starts xgboost job using xgboost's buildin allreduce
|
||||||
|
|
||||||
How to Use
|
How to Use
|
||||||
====
|
====
|
||||||
|
|||||||
24
multi-node/row-split/machine-row-tcp.sh
Executable file
24
multi-node/row-split/machine-row-tcp.sh
Executable file
@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
if [[ $# -ne 1 ]]
|
||||||
|
then
|
||||||
|
echo "Usage: nprocess"
|
||||||
|
exit -1
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -rf train-machine.row* *.model
|
||||||
|
k=$1
|
||||||
|
# make machine data
|
||||||
|
cd ../../demo/regression/
|
||||||
|
python mapfeat.py
|
||||||
|
python mknfold.py machine.txt 1
|
||||||
|
cd -
|
||||||
|
|
||||||
|
# split the lib svm file into k subfiles
|
||||||
|
python splitrows.py ../../demo/regression/machine.txt.train train-machine $k
|
||||||
|
|
||||||
|
# run xgboost mpi
|
||||||
|
../submit_job_tcp.py $k ../../xgboost machine-row.conf dsplit=row num_round=3
|
||||||
|
|
||||||
|
# run xgboost-mpi save model 0001, continue to run from existing model
|
||||||
|
../submit_job_tcp.py $k ../../xgboost machine-row.conf dsplit=row num_round=1
|
||||||
|
../submit_job_tcp.py $k ../../xgboost machine-row.conf dsplit=row num_round=2 model_in=0001.model
|
||||||
Loading…
x
Reference in New Issue
Block a user