cleanup multi-node
This commit is contained in:
@@ -1,10 +1,8 @@
|
||||
Distributed XGBoost: Row Split Version
|
||||
====
|
||||
* You might be interested to checkout the [Hadoop example](../hadoop)
|
||||
* Machine Rabit: run ```bash machine-row-rabit.sh <n-mpi-process>```
|
||||
- machine-col-rabit.sh starts xgboost job using rabit
|
||||
* Mushroom: run ```bash mushroom-row-mpi.sh <n-mpi-process>```
|
||||
* Machine: run ```bash machine-row-mpi.sh <n-mpi-process>```
|
||||
- Machine case also include example to continue training from existing model
|
||||
|
||||
How to Use
|
||||
====
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
#!/bin/bash
|
||||
if [[ $# -ne 1 ]]
|
||||
then
|
||||
echo "Usage: nprocess"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
rm -rf train-machine.row* *.model
|
||||
k=$1
|
||||
# make machine data
|
||||
cd ../../demo/regression/
|
||||
python mapfeat.py
|
||||
python mknfold.py machine.txt 1
|
||||
cd -
|
||||
|
||||
# split the lib svm file into k subfiles
|
||||
python splitrows.py ../../demo/regression/machine.txt.train train-machine $k
|
||||
|
||||
# run xgboost mpi, take data from stdin
|
||||
../submit_job_tcp.py $k "bash map.sh train-machine.row ../../xgboost machine-row.conf dsplit=row num_round=3 data=stdin"
|
||||
@@ -1,24 +0,0 @@
|
||||
#!/bin/bash
|
||||
if [[ $# -ne 1 ]]
|
||||
then
|
||||
echo "Usage: nprocess"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
rm -rf train-machine.row* *.model
|
||||
k=$1
|
||||
# make machine data
|
||||
cd ../../demo/regression/
|
||||
python mapfeat.py
|
||||
python mknfold.py machine.txt 1
|
||||
cd -
|
||||
|
||||
# split the lib svm file into k subfiles
|
||||
python splitrows.py ../../demo/regression/machine.txt.train train-machine $k
|
||||
|
||||
# run xgboost mpi
|
||||
mpirun -n $k ../../xgboost-mpi machine-row.conf dsplit=row num_round=3
|
||||
|
||||
# run xgboost-mpi save model 0001, continue to run from existing model
|
||||
mpirun -n $k ../../xgboost-mpi machine-row.conf dsplit=row num_round=1
|
||||
mpirun -n $k ../../xgboost-mpi machine-row.conf dsplit=row num_round=2 model_in=0001.model
|
||||
@@ -1,3 +0,0 @@
|
||||
# a simple script to simulate mapreduce mapper
|
||||
echo "cat $1$OMPI_COMM_WORLD_RANK | ${@:2}"
|
||||
cat $1$OMPI_COMM_WORLD_RANK | ${@:2}
|
||||
@@ -1,19 +0,0 @@
|
||||
#!/bin/bash
|
||||
if [[ $# -ne 1 ]]
|
||||
then
|
||||
echo "Usage: nprocess"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
rm -rf train.row* *.model
|
||||
k=$1
|
||||
|
||||
# split the lib svm file into k subfiles
|
||||
python splitrows.py ../../demo/data/agaricus.txt.train train $k
|
||||
|
||||
# run xgboost mpi
|
||||
mpirun -n $k ../../xgboost-mpi mushroom-row.conf dsplit=row nthread=1
|
||||
|
||||
# the model can be directly loaded by single machine xgboost solver, as usuall
|
||||
../../xgboost mushroom-row.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
|
||||
cat dump.nice.$k.txt
|
||||
@@ -1,35 +0,0 @@
|
||||
# General Parameters, see comment for each definition
|
||||
# choose the booster, can be gbtree or gblinear
|
||||
booster = gbtree
|
||||
# choose logistic regression loss function for binary classification
|
||||
objective = binary:logistic
|
||||
|
||||
# Tree Booster Parameters
|
||||
# step size shrinkage
|
||||
eta = 1.0
|
||||
# minimum loss reduction required to make a further partition
|
||||
gamma = 1.0
|
||||
# minimum sum of instance weight(hessian) needed in a child
|
||||
min_child_weight = 1
|
||||
# maximum depth of a tree
|
||||
max_depth = 3
|
||||
|
||||
# Task Parameters
|
||||
# the number of round to do boosting
|
||||
num_round = 2
|
||||
# 0 means do not save any model except the final round model
|
||||
save_period = 0
|
||||
use_buffer = 0
|
||||
|
||||
# The path of training data %d is the wildcard for the rank of the data
|
||||
# The idea is each process take a feature matrix with subset of columns
|
||||
#
|
||||
data = "train.row%d"
|
||||
|
||||
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
|
||||
eval[test] = "../../demo/data/agaricus.txt.test"
|
||||
# evaluate on training data as well each round
|
||||
eval_train = 1
|
||||
|
||||
# The path of test data, need to use full data of test, try not use it, or keep an subsampled version
|
||||
test:data = "../../demo/data/agaricus.txt.test"
|
||||
Reference in New Issue
Block a user