check multinode

This commit is contained in:
tqchen 2014-11-19 11:22:17 -08:00
parent 54e2ed90d7
commit 03e24cf590
3 changed files with 53 additions and 0 deletions

View File

@ -0,0 +1,2 @@
Column Split Version of XGBoost
====

View File

@ -0,0 +1,19 @@
#!/bin/bash
if [[ $# -ne 1 ]]
then
echo "Usage: nprocess"
exit -1
fi
rm -rf train.col*
k=$1
# split the lib svm file into k subfiles
python splitsvm.py ../../demo/data/agaricus.txt.train train $k
# run xgboost mpi
mpirun -n $k ../../xgboost-mpi mushroom-col.conf dsplit=col
# the model can be directly loaded by single machine xgboost solver, as usuall
../../xgboost mpi.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
cat dump.nice.$k.txt

View File

@ -0,0 +1,32 @@
#!/usr/bin/python
import sys
import random
# split libsvm file into different subcolumns
if len(sys.argv) < 4:
print ('Usage:<fin> <fo> k')
exit(0)
random.seed(10)
fmap = {}
k = int(sys.argv[3])
fi = open( sys.argv[1], 'r' )
fos = []
for i in range(k):
fos.append(open( sys.argv[2]+'.col%d' % i, 'w' ))
for l in open(sys.argv[1]):
arr = l.split()
for f in fos:
f.write(arr[0])
for it in arr[1:]:
fid = int(it.split(':')[0])
if fid not in fmap:
fmap[fid] = random.randint(0, k-1)
fos[fmap[fid]].write(' '+it)
for f in fos:
f.write('\n')
for f in fos:
f.close()