check multinode
This commit is contained in:
parent
54e2ed90d7
commit
03e24cf590
2
multi-node/col-split/README.md
Normal file
2
multi-node/col-split/README.md
Normal file
@ -0,0 +1,2 @@
|
||||
Column Split Version of XGBoost
|
||||
====
|
||||
19
multi-node/col-split/runexp-mpi.sh
Executable file
19
multi-node/col-split/runexp-mpi.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
if [[ $# -ne 1 ]]
|
||||
then
|
||||
echo "Usage: nprocess"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
rm -rf train.col*
|
||||
k=$1
|
||||
|
||||
# split the lib svm file into k subfiles
|
||||
python splitsvm.py ../../demo/data/agaricus.txt.train train $k
|
||||
|
||||
# run xgboost mpi
|
||||
mpirun -n $k ../../xgboost-mpi mushroom-col.conf dsplit=col
|
||||
|
||||
# the model can be directly loaded by single machine xgboost solver, as usuall
|
||||
../../xgboost mpi.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
|
||||
cat dump.nice.$k.txt
|
||||
32
multi-node/col-split/splitsvm.py
Normal file
32
multi-node/col-split/splitsvm.py
Normal file
@ -0,0 +1,32 @@
|
||||
#!/usr/bin/python
|
||||
import sys
|
||||
import random
|
||||
|
||||
# split libsvm file into different subcolumns
|
||||
if len(sys.argv) < 4:
|
||||
print ('Usage:<fin> <fo> k')
|
||||
exit(0)
|
||||
|
||||
random.seed(10)
|
||||
fmap = {}
|
||||
|
||||
k = int(sys.argv[3])
|
||||
fi = open( sys.argv[1], 'r' )
|
||||
fos = []
|
||||
|
||||
for i in range(k):
|
||||
fos.append(open( sys.argv[2]+'.col%d' % i, 'w' ))
|
||||
|
||||
for l in open(sys.argv[1]):
|
||||
arr = l.split()
|
||||
for f in fos:
|
||||
f.write(arr[0])
|
||||
for it in arr[1:]:
|
||||
fid = int(it.split(':')[0])
|
||||
if fid not in fmap:
|
||||
fmap[fid] = random.randint(0, k-1)
|
||||
fos[fmap[fid]].write(' '+it)
|
||||
for f in fos:
|
||||
f.write('\n')
|
||||
for f in fos:
|
||||
f.close()
|
||||
Loading…
x
Reference in New Issue
Block a user