cleanup multi-node

This commit is contained in:
tqchen
2015-01-15 21:55:56 -08:00
parent b762231b02
commit b1f89f29b8
13 changed files with 31 additions and 252 deletions

View File

@@ -1,10 +1,8 @@
Distributed XGBoost: Row Split Version
====
* You might be interested to checkout the [Hadoop example](../hadoop)
* Machine Rabit: run ```bash machine-row-rabit.sh <n-mpi-process>```
- machine-col-rabit.sh starts xgboost job using rabit
* Mushroom: run ```bash mushroom-row-mpi.sh <n-mpi-process>```
* Machine: run ```bash machine-row-mpi.sh <n-mpi-process>```
- Machine case also include example to continue training from existing model
How to Use
====

View File

@@ -1,20 +0,0 @@
#!/bin/bash
if [[ $# -ne 1 ]]
then
echo "Usage: nprocess"
exit -1
fi
rm -rf train-machine.row* *.model
k=$1
# make machine data
cd ../../demo/regression/
python mapfeat.py
python mknfold.py machine.txt 1
cd -
# split the lib svm file into k subfiles
python splitrows.py ../../demo/regression/machine.txt.train train-machine $k
# run xgboost mpi, take data from stdin
../submit_job_tcp.py $k "bash map.sh train-machine.row ../../xgboost machine-row.conf dsplit=row num_round=3 data=stdin"

View File

@@ -1,24 +0,0 @@
#!/bin/bash
if [[ $# -ne 1 ]]
then
echo "Usage: nprocess"
exit -1
fi
rm -rf train-machine.row* *.model
k=$1
# make machine data
cd ../../demo/regression/
python mapfeat.py
python mknfold.py machine.txt 1
cd -
# split the lib svm file into k subfiles
python splitrows.py ../../demo/regression/machine.txt.train train-machine $k
# run xgboost mpi
mpirun -n $k ../../xgboost-mpi machine-row.conf dsplit=row num_round=3
# run xgboost-mpi save model 0001, continue to run from existing model
mpirun -n $k ../../xgboost-mpi machine-row.conf dsplit=row num_round=1
mpirun -n $k ../../xgboost-mpi machine-row.conf dsplit=row num_round=2 model_in=0001.model

View File

@@ -1,3 +0,0 @@
# a simple script to simulate mapreduce mapper
echo "cat $1$OMPI_COMM_WORLD_RANK | ${@:2}"
cat $1$OMPI_COMM_WORLD_RANK | ${@:2}

View File

@@ -1,19 +0,0 @@
#!/bin/bash
if [[ $# -ne 1 ]]
then
echo "Usage: nprocess"
exit -1
fi
rm -rf train.row* *.model
k=$1
# split the lib svm file into k subfiles
python splitrows.py ../../demo/data/agaricus.txt.train train $k
# run xgboost mpi
mpirun -n $k ../../xgboost-mpi mushroom-row.conf dsplit=row nthread=1
# the model can be directly loaded by single machine xgboost solver, as usuall
../../xgboost mushroom-row.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
cat dump.nice.$k.txt

View File

@@ -1,35 +0,0 @@
# General Parameters, see comment for each definition
# choose the booster, can be gbtree or gblinear
booster = gbtree
# choose logistic regression loss function for binary classification
objective = binary:logistic
# Tree Booster Parameters
# step size shrinkage
eta = 1.0
# minimum loss reduction required to make a further partition
gamma = 1.0
# minimum sum of instance weight(hessian) needed in a child
min_child_weight = 1
# maximum depth of a tree
max_depth = 3
# Task Parameters
# the number of round to do boosting
num_round = 2
# 0 means do not save any model except the final round model
save_period = 0
use_buffer = 0
# The path of training data %d is the wildcard for the rank of the data
# The idea is each process take a feature matrix with subset of columns
#
data = "train.row%d"
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
eval[test] = "../../demo/data/agaricus.txt.test"
# evaluate on training data as well each round
eval_train = 1
# The path of test data, need to use full data of test, try not use it, or keep an subsampled version
test:data = "../../demo/data/agaricus.txt.test"