make wrapper ok
This commit is contained in:
parent
69b2f31098
commit
5f08313cb2
4
Makefile
4
Makefile
@ -32,8 +32,8 @@ sync_tcp.o: src/sync/sync_tcp.cpp
|
|||||||
sync_empty.o: src/sync/sync_empty.cpp
|
sync_empty.o: src/sync/sync_empty.cpp
|
||||||
main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h
|
main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h
|
||||||
xgboost-mpi: updater.o gbm.o io.o main.o sync_mpi.o
|
xgboost-mpi: updater.o gbm.o io.o main.o sync_mpi.o
|
||||||
xgboost: updater.o gbm.o io.o main.o sync_empty.o
|
xgboost: updater.o gbm.o io.o main.o sync_tcp.o
|
||||||
wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o sync_empty.o
|
wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h updater.o gbm.o io.o sync_tcp.o
|
||||||
|
|
||||||
$(BIN) :
|
$(BIN) :
|
||||||
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
|
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
|
||||||
|
|||||||
@ -4,12 +4,12 @@ python mapfeat.py
|
|||||||
# split train and test
|
# split train and test
|
||||||
python mknfold.py agaricus.txt 1
|
python mknfold.py agaricus.txt 1
|
||||||
# training and output the models
|
# training and output the models
|
||||||
mpirun ../../xgboost mushroom.conf
|
../../xgboost mushroom.conf
|
||||||
# output prediction task=pred
|
# output prediction task=pred
|
||||||
mpirun ../../xgboost mushroom.conf task=pred model_in=0002.model
|
../../xgboost mushroom.conf task=pred model_in=0002.model
|
||||||
# print the boosters of 00002.model in dump.raw.txt
|
# print the boosters of 00002.model in dump.raw.txt
|
||||||
mpirun ../../xgboost mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt
|
../../xgboost mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt
|
||||||
# use the feature map in printing for better visualization
|
# use the feature map in printing for better visualization
|
||||||
mpirun ../../xgboost mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
|
../../xgboost mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
|
||||||
cat dump.nice.txt
|
cat dump.nice.txt
|
||||||
|
|
||||||
|
|||||||
@ -4,5 +4,5 @@ python custom_objective.py
|
|||||||
python boost_from_prediction.py
|
python boost_from_prediction.py
|
||||||
python generalized_linear_model.py
|
python generalized_linear_model.py
|
||||||
python cross_validation.py
|
python cross_validation.py
|
||||||
python predict_leaf_index.py
|
python predict_leaf_indices.py
|
||||||
rm -rf *~ *.model *.buffer
|
rm -rf *~ *.model *.buffer
|
||||||
@ -4,17 +4,21 @@ This folder contains information about experimental version of distributed xgboo
|
|||||||
|
|
||||||
Build
|
Build
|
||||||
=====
|
=====
|
||||||
* You will need to have MPI
|
|
||||||
* In the root folder, run ```make mpi```, this will give you xgboost-mpi
|
* In the root folder, run ```make mpi```, this will give you xgboost-mpi
|
||||||
|
- You will need to have MPI to build xgboost-mpi
|
||||||
|
* Alternatively, you can run ```make```, this will give you xgboost, which uses a beta buildin allreduce
|
||||||
|
- You do not need MPI to build this, you can modify [submit_job_tcp.py](submit_job_tcp.py) to use any job scheduler you like to submit the job
|
||||||
|
|
||||||
Design Choice
|
Design Choice
|
||||||
=====
|
=====
|
||||||
* Does distributed xgboost reply on MPI?
|
* Does distributed xgboost must reply on MPI library?
|
||||||
- Yes, but the dependency is isolated in [sync module](../src/sync/sync.h)
|
- No, XGBoost replies on MPI protocol that provide Broadcast and AllReduce,
|
||||||
|
- The dependency is isolated in [sync module](../src/sync/sync.h)
|
||||||
- All other parts of code uses interface defined in sync.h
|
- All other parts of code uses interface defined in sync.h
|
||||||
- sync_mpi.cpp is a implementation of sync interface using standard MPI library
|
- [sync_mpi.cpp](../src/sync/sync_mpi.cpp) is a implementation of sync interface using standard MPI library, to use xgboost-mpi, you need an MPI library
|
||||||
- Specificially, xgboost reply on MPI protocol that provide Broadcast and AllReduce,
|
- If there are platform/framework that implements these protocol, xgboost should naturally extends to these platform
|
||||||
if there are platform/framework that implements these protocol, xgboost should naturally extends to these platform
|
- As an example, [sync_tcp.cpp](../src/sync/sync_tcp.cpp) is an implementation of interface using TCP, and is linked with xgboost by default
|
||||||
|
|
||||||
* How is the data distributed?
|
* How is the data distributed?
|
||||||
- There are two solvers in distributed xgboost
|
- There are two solvers in distributed xgboost
|
||||||
- Column-based solver split data by column, each node work on subset of columns,
|
- Column-based solver split data by column, each node work on subset of columns,
|
||||||
@ -26,10 +30,11 @@ Design Choice
|
|||||||
|
|
||||||
Usage
|
Usage
|
||||||
====
|
====
|
||||||
* The current code run in MPI enviroment, you will need to have a network filesystem,
|
* You will need a network filesystem, or copy data to local file system before running the code
|
||||||
or copy data to local file system before running the code
|
* xgboost-mpi run in MPI enviroment,
|
||||||
|
* xgboost can be used together with [submit_job_tcp.py](submit_job_tcp.py) on other types of job schedulers
|
||||||
* ***Note*** The distributed version is still multi-threading optimized.
|
* ***Note*** The distributed version is still multi-threading optimized.
|
||||||
You should run one xgboost-mpi per node that takes most available CPU,
|
You should run one process per node that takes most available CPU,
|
||||||
this will reduce the communication overhead and improve the performance.
|
this will reduce the communication overhead and improve the performance.
|
||||||
- One way to do that is limit mpi slot in each machine to be 1, or reserve nthread processors for each process.
|
- One way to do that is limit mpi slot in each machine to be 1, or reserve nthread processors for each process.
|
||||||
* Examples:
|
* Examples:
|
||||||
|
|||||||
@ -1,6 +1,11 @@
|
|||||||
Distributed XGBoost: Column Split Version
|
Distributed XGBoost: Column Split Version
|
||||||
====
|
====
|
||||||
* run ```bash mushroom-row.sh <n-mpi-process>```
|
* run ```bash mushroom-col.sh <n-mpi-process>```
|
||||||
|
* run ```bash mushroom-col-tcp.sh <n-process>```
|
||||||
|
- mushroom-col-tcp.sh starts xgboost job using xgboost's buildin allreduce
|
||||||
|
* run ```bash mushroom-col-python.sh <n-process>```
|
||||||
|
- mushroom-col-python.sh starts xgboost python job using xgboost's buildin all reduce
|
||||||
|
- see mushroom-col.py
|
||||||
|
|
||||||
How to Use
|
How to Use
|
||||||
====
|
====
|
||||||
|
|||||||
22
multi-node/col-split/mushroom-col-python.sh
Executable file
22
multi-node/col-split/mushroom-col-python.sh
Executable file
@ -0,0 +1,22 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
if [[ $# -ne 1 ]]
|
||||||
|
then
|
||||||
|
echo "Usage: nprocess"
|
||||||
|
exit -1
|
||||||
|
fi
|
||||||
|
|
||||||
|
#
|
||||||
|
# This script is same as mushroom-col except that we will be using xgboost python module
|
||||||
|
#
|
||||||
|
# xgboost used built in tcp-based allreduce module, and can be run on more enviroment, so long as we know how to start job by modifying ../submit_job_tcp.py
|
||||||
|
#
|
||||||
|
rm -rf train.col* *.model
|
||||||
|
k=$1
|
||||||
|
|
||||||
|
# split the lib svm file into k subfiles
|
||||||
|
python splitsvm.py ../../demo/data/agaricus.txt.train train $k
|
||||||
|
|
||||||
|
# run xgboost mpi
|
||||||
|
../submit_job_tcp.py $k python mushroom-col.py
|
||||||
|
|
||||||
|
cat dump.nice.$k.txt
|
||||||
29
multi-node/col-split/mushroom-col.py
Normal file
29
multi-node/col-split/mushroom-col.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
sys.path.append(os.path.dirname(__file__)+'/../wrapper')
|
||||||
|
import xgboost as xgb
|
||||||
|
# this is example script of running distributed xgboost using python
|
||||||
|
|
||||||
|
# call this additional function to intialize the xgboost sync module
|
||||||
|
# in distributed mode
|
||||||
|
xgb.sync_init(sys.argv)
|
||||||
|
rank = xgb.sync_get_rank()
|
||||||
|
# read in dataset
|
||||||
|
dtrain = xgb.DMatrix('train.col%d' % rank)
|
||||||
|
param = {'max_depth':3, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
|
||||||
|
param['dsplit'] = 'col'
|
||||||
|
nround = 3
|
||||||
|
|
||||||
|
if rank == 0:
|
||||||
|
dtest = xgb.DMatrix('../../demo/data/agaricus.txt.test')
|
||||||
|
model = xgb.train(param, dtrain, nround, [(dtrain, 'train') , (dtest, 'test')])
|
||||||
|
else:
|
||||||
|
# if it is a slave node, do not run evaluation
|
||||||
|
model = xgb.train(param, dtrain, nround)
|
||||||
|
|
||||||
|
if rank == 0:
|
||||||
|
model.save_model('%04d.model' % nround)
|
||||||
|
# dump model with feature map
|
||||||
|
model.dump_model('dump.nice.%d.txt' % xgb.sync_get_world_size(),'../../demo/data/featmap.txt')
|
||||||
|
# shutdown the synchronization module
|
||||||
|
xgb.sync_finalize()
|
||||||
@ -11,6 +11,10 @@ import subprocess
|
|||||||
sys.path.append(os.path.dirname(__file__)+'/../src/sync/')
|
sys.path.append(os.path.dirname(__file__)+'/../src/sync/')
|
||||||
import tcp_master as master
|
import tcp_master as master
|
||||||
|
|
||||||
|
#
|
||||||
|
# Note: this submit script is only used for example purpose
|
||||||
|
# It does not have to be mpirun, it can be any job submission script that starts the job, qsub, hadoop streaming etc.
|
||||||
|
#
|
||||||
def mpi_submit(nslave, args):
|
def mpi_submit(nslave, args):
|
||||||
"""
|
"""
|
||||||
customized submit script, that submit nslave jobs, each must contain args as parameter
|
customized submit script, that submit nslave jobs, each must contain args as parameter
|
||||||
|
|||||||
@ -13,6 +13,11 @@
|
|||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace io {
|
namespace io {
|
||||||
DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
|
DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
|
||||||
|
if (!strcmp(fname, "stdin")) {
|
||||||
|
DMatrixSimple *dmat = new DMatrixSimple();
|
||||||
|
dmat->LoadText(fname, silent);
|
||||||
|
return dmat;
|
||||||
|
}
|
||||||
std::string tmp_fname;
|
std::string tmp_fname;
|
||||||
const char *fname_ext = NULL;
|
const char *fname_ext = NULL;
|
||||||
if (strchr(fname, ';') != NULL) {
|
if (strchr(fname, ';') != NULL) {
|
||||||
|
|||||||
@ -84,7 +84,12 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
inline void LoadText(const char* fname, bool silent = false) {
|
inline void LoadText(const char* fname, bool silent = false) {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
this->Clear();
|
this->Clear();
|
||||||
FILE* file = utils::FopenCheck(fname, "r");
|
FILE* file;
|
||||||
|
if (!strcmp(fname, "stdin")) {
|
||||||
|
file = stdin;
|
||||||
|
} else {
|
||||||
|
file = utils::FopenCheck(fname, "r");
|
||||||
|
}
|
||||||
float label; bool init = true;
|
float label; bool init = true;
|
||||||
char tmp[1024];
|
char tmp[1024];
|
||||||
std::vector<RowBatch::Entry> feats;
|
std::vector<RowBatch::Entry> feats;
|
||||||
@ -112,7 +117,9 @@ class DMatrixSimple : public DataMatrix {
|
|||||||
static_cast<unsigned long>(info.num_col()),
|
static_cast<unsigned long>(info.num_col()),
|
||||||
static_cast<unsigned long>(row_data_.size()), fname);
|
static_cast<unsigned long>(row_data_.size()), fname);
|
||||||
}
|
}
|
||||||
fclose(file);
|
if (file != stdin) {
|
||||||
|
fclose(file);
|
||||||
|
}
|
||||||
// try to load in additional file
|
// try to load in additional file
|
||||||
std::string name = fname;
|
std::string name = fname;
|
||||||
std::string gname = name + ".group";
|
std::string gname = name + ".group";
|
||||||
|
|||||||
@ -352,7 +352,7 @@ class SyncManager {
|
|||||||
buffer_.resize(std::min(reduce_buffer_size, n));
|
buffer_.resize(std::min(reduce_buffer_size, n));
|
||||||
// make sure align to type_nbytes
|
// make sure align to type_nbytes
|
||||||
buffer_size = buffer_.size() * sizeof(uint64_t) / type_nbytes * type_nbytes;
|
buffer_size = buffer_.size() * sizeof(uint64_t) / type_nbytes * type_nbytes;
|
||||||
utils::Assert(type_nbytes < buffer_size, "too large type_nbytes=%lu, buffer_size", type_nbytes, buffer_size);
|
utils::Assert(type_nbytes <= buffer_size, "too large type_nbytes=%lu, buffer_size=%lu", type_nbytes, buffer_size);
|
||||||
// set buffer head
|
// set buffer head
|
||||||
buffer_head = reinterpret_cast<char*>(BeginPtr(buffer_));
|
buffer_head = reinterpret_cast<char*>(BeginPtr(buffer_));
|
||||||
}
|
}
|
||||||
@ -487,6 +487,8 @@ void AllReduce<uint32_t>(uint32_t *sendrecvbuf, int count, ReduceOp op) {
|
|||||||
typedef uint32_t DType;
|
typedef uint32_t DType;
|
||||||
switch(op) {
|
switch(op) {
|
||||||
case kBitwiseOR: manager.AllReduce(sendrecvbuf, sizeof(DType), count, ReduceBitOR<DType>); return;
|
case kBitwiseOR: manager.AllReduce(sendrecvbuf, sizeof(DType), count, ReduceBitOR<DType>); return;
|
||||||
|
case kSum: manager.AllReduce(sendrecvbuf, sizeof(DType), count, ReduceSum<DType>); return;
|
||||||
|
case kMax: manager.AllReduce(sendrecvbuf, sizeof(DType), count, ReduceMax<DType>); return;
|
||||||
default: utils::Error("reduce op not supported");
|
default: utils::Error("reduce op not supported");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
export CC = gcc
|
export CC = gcc
|
||||||
export CXX = clang++
|
export CXX = g++
|
||||||
export MPICXX = mpicxx
|
export MPICXX = mpicxx
|
||||||
export LDFLAGS= -pthread -lm
|
export LDFLAGS= -pthread -lm
|
||||||
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src
|
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src
|
||||||
|
|||||||
@ -33,7 +33,10 @@ xglib.XGBoosterCreate.restype = ctypes.c_void_p
|
|||||||
xglib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
|
xglib.XGBoosterPredict.restype = ctypes.POINTER(ctypes.c_float)
|
||||||
xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
|
xglib.XGBoosterEvalOneIter.restype = ctypes.c_char_p
|
||||||
xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
|
xglib.XGBoosterDumpModel.restype = ctypes.POINTER(ctypes.c_char_p)
|
||||||
|
# sync function
|
||||||
|
xglib.XGSyncGetRank.restype = ctypes.c_int
|
||||||
|
xglib.XGSyncGetWorldSize.restype = ctypes.c_int
|
||||||
|
# initialize communication module
|
||||||
|
|
||||||
def ctypes2numpy(cptr, length, dtype):
|
def ctypes2numpy(cptr, length, dtype):
|
||||||
"""convert a ctypes pointer array to numpy array """
|
"""convert a ctypes pointer array to numpy array """
|
||||||
@ -553,3 +556,18 @@ def cv(params, dtrain, num_boost_round = 10, nfold=3, metrics=[], \
|
|||||||
sys.stderr.write(res+'\n')
|
sys.stderr.write(res+'\n')
|
||||||
results.append(res)
|
results.append(res)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
# synchronization module
|
||||||
|
def sync_init(args = sys.argv):
|
||||||
|
arr = (ctypes.c_char_p * len(args))()
|
||||||
|
arr[:] = args
|
||||||
|
xglib.XGSyncInit(len(args), arr)
|
||||||
|
|
||||||
|
def sync_finalize():
|
||||||
|
xglib.XGSyncFinalize()
|
||||||
|
|
||||||
|
def sync_get_rank():
|
||||||
|
return int(xglib.XGSyncGetRank())
|
||||||
|
|
||||||
|
def sync_get_world_size():
|
||||||
|
return int(xglib.XGSyncGetWorldSize())
|
||||||
|
|||||||
@ -80,6 +80,23 @@ class Booster: public learner::BoostLearner {
|
|||||||
using namespace xgboost::wrapper;
|
using namespace xgboost::wrapper;
|
||||||
|
|
||||||
extern "C"{
|
extern "C"{
|
||||||
|
void XGSyncInit(int argc, char *argv[]) {
|
||||||
|
sync::Init(argc, argv);
|
||||||
|
if (sync::IsDistributed()) {
|
||||||
|
std::string pname = xgboost::sync::GetProcessorName();
|
||||||
|
utils::Printf("distributed job start %s:%d\n", pname.c_str(), xgboost::sync::GetRank());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void XGSyncFinalize(void) {
|
||||||
|
sync::Finalize();
|
||||||
|
}
|
||||||
|
int XGSyncGetRank(void) {
|
||||||
|
int rank = xgboost::sync::GetRank();
|
||||||
|
return rank;
|
||||||
|
}
|
||||||
|
int XGSyncGetWorldSize(void) {
|
||||||
|
return sync::GetWorldSize();
|
||||||
|
}
|
||||||
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
|
void* XGDMatrixCreateFromFile(const char *fname, int silent) {
|
||||||
return LoadDataMatrix(fname, silent != 0, false);
|
return LoadDataMatrix(fname, silent != 0, false);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -17,6 +17,28 @@ typedef unsigned long bst_ulong;
|
|||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
/*!
|
||||||
|
* \brief initialize sync module, this is needed if used in distributed model
|
||||||
|
* normally, argv need to contain master_uri and master_port
|
||||||
|
* if start using submit_job_tcp script, then pass args to this will do
|
||||||
|
* \param argc number of arguments
|
||||||
|
* \param argv the arguments to be passed in sync module
|
||||||
|
*/
|
||||||
|
XGB_DLL void XGSyncInit(int argc, char *argv[]);
|
||||||
|
/*!
|
||||||
|
* \brief finalize sync module, call this when everything is done
|
||||||
|
*/
|
||||||
|
XGB_DLL void XGSyncFinalize(void);
|
||||||
|
/*!
|
||||||
|
* \brief get the rank
|
||||||
|
* \return return the rank of
|
||||||
|
*/
|
||||||
|
XGB_DLL int XGSyncGetRank(void);
|
||||||
|
/*!
|
||||||
|
* \brief get the world size from sync
|
||||||
|
* \return return the number of distributed job ran in the group
|
||||||
|
*/
|
||||||
|
XGB_DLL int XGSyncGetWorldSize(void);
|
||||||
/*!
|
/*!
|
||||||
* \brief load a data matrix
|
* \brief load a data matrix
|
||||||
* \return a loaded data matrix
|
* \return a loaded data matrix
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user