cleanup multi-node
This commit is contained in:
@@ -4,8 +4,6 @@ Distributed XGBoost: Column Split Version
|
||||
- mushroom-col-rabit.sh starts xgboost job using rabit's allreduce
|
||||
* run ```bash mushroom-col-rabit-mock.sh <n-process>```
|
||||
- mushroom-col-rabit-mock.sh starts xgboost job using rabit's allreduce, inserts suicide signal at certain point and test recovery
|
||||
* run ```bash mushroom-col-mpi.sh <n-mpi-process>```
|
||||
- mushroom-col.sh starts xgboost-mpi job
|
||||
|
||||
How to Use
|
||||
====
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
#!/bin/bash
|
||||
if [[ $# -ne 1 ]]
|
||||
then
|
||||
echo "Usage: nprocess"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
rm -rf train.col* *.model
|
||||
k=$1
|
||||
|
||||
# split the lib svm file into k subfiles
|
||||
python splitsvm.py ../../demo/data/agaricus.txt.train train $k
|
||||
|
||||
# run xgboost mpi
|
||||
mpirun -n $k ../../xgboost-mpi mushroom-col.conf dsplit=col
|
||||
|
||||
# the model can be directly loaded by single machine xgboost solver, as usuall
|
||||
../../xgboost mushroom-col.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
|
||||
|
||||
# run for one round, and continue training
|
||||
mpirun -n $k ../../xgboost-mpi mushroom-col.conf dsplit=col num_round=1
|
||||
mpirun -n $k ../../xgboost-mpi mushroom-col.conf dsplit=col model_in=0001.model
|
||||
|
||||
cat dump.nice.$k.txt
|
||||
@@ -1,22 +0,0 @@
|
||||
#!/bin/bash
|
||||
if [[ $# -ne 1 ]]
|
||||
then
|
||||
echo "Usage: nprocess"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
#
|
||||
# This script is same as mushroom-col except that we will be using xgboost python module
|
||||
#
|
||||
# xgboost used built in tcp-based allreduce module, and can be run on more enviroment, so long as we know how to start job by modifying ../submit_job_tcp.py
|
||||
#
|
||||
rm -rf train.col* *.model
|
||||
k=$1
|
||||
|
||||
# split the lib svm file into k subfiles
|
||||
python splitsvm.py ../../demo/data/agaricus.txt.train train $k
|
||||
|
||||
# run xgboost mpi
|
||||
../../rabit/tracker/rabit_mpi.py $k local python mushroom-col.py
|
||||
|
||||
cat dump.nice.$k.txt
|
||||
@@ -1,33 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
path = os.path.dirname(__file__)
|
||||
if path == '':
|
||||
path = '.'
|
||||
sys.path.append(path+'/../../wrapper')
|
||||
|
||||
import xgboost as xgb
|
||||
# this is example script of running distributed xgboost using python
|
||||
|
||||
# call this additional function to intialize the xgboost sync module
|
||||
# in distributed mode
|
||||
xgb.sync_init(sys.argv)
|
||||
rank = xgb.sync_get_rank()
|
||||
# read in dataset
|
||||
dtrain = xgb.DMatrix('train.col%d' % rank)
|
||||
param = {'max_depth':3, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||
param['dsplit'] = 'col'
|
||||
nround = 3
|
||||
|
||||
if rank == 0:
|
||||
dtest = xgb.DMatrix('../../demo/data/agaricus.txt.test')
|
||||
model = xgb.train(param, dtrain, nround, [(dtrain, 'train') , (dtest, 'test')])
|
||||
else:
|
||||
# if it is a slave node, do not run evaluation
|
||||
model = xgb.train(param, dtrain, nround)
|
||||
|
||||
if rank == 0:
|
||||
model.save_model('%04d.model' % nround)
|
||||
# dump model with feature map
|
||||
model.dump_model('dump.nice.%d.txt' % xgb.sync_get_world_size(),'../../demo/data/featmap.txt')
|
||||
# shutdown the synchronization module
|
||||
xgb.sync_finalize()
|
||||
Reference in New Issue
Block a user