get multinode in

This commit is contained in:
tqchen 2014-11-19 19:19:53 -08:00
parent 7c3a392136
commit c42ba8d281
14 changed files with 157 additions and 23 deletions

View File

@ -28,6 +28,7 @@ Design Choice
this will reduce the communication overhead and improve the performance.
- One way to do that is limit mpi slot in each machine to be 1, or reserve nthread processors for each process.
Examples
Usage
====
* [Column-based version](col-split)
* [Row-based version](row-split)

View File

@ -1,6 +1,6 @@
Distributed XGBoost: Column Split Version
====
* run ```bash run-mushroom.sh```
* run ```bash mushroom-row.sh <n-mpi-process>```
How to Use
====

View File

@ -0,0 +1,17 @@
Distributed XGBoost: Row Split Version
====
* Mushroom: run ```bash mushroom-row.sh <n-mpi-process>```
* Machine: run ```bash machine-row.sh <n-mpi-process>```
How to Use
====
* First split the data by rows
* In the config, specify data file as containing a wildcard %d, where %d is the rank of the node, each node will load their part of data
* Enable ow split mode by ```dsplit=row```
Notes
====
* The code is multi-threaded, so you want to run one xgboost-mpi per node
* Row-based solver split data by row, each node work on subset of rows, it uses an approximate histogram count algorithm,
and will only examine subset of potential split points as opposed to all split points.
* ```colsample_bytree``` is not enabled in row split mode so far

View File

@ -0,0 +1,31 @@
# General Parameters, see comment for each definition
# choose the tree booster, can also change to gblinear
booster = gbtree
# this is the only difference with classification, use reg:linear to do linear classification
# when labels are in [0,1] we can also use reg:logistic
objective = reg:linear
# Tree Booster Parameters
# step size shrinkage
eta = 1.0
# minimum loss reduction required to make a further partition
gamma = 1.0
# minimum sum of instance weight(hessian) needed in a child
min_child_weight = 1
# maximum depth of a tree
max_depth = 3
# Task parameters
# the number of round to do boosting
num_round = 2
# 0 means do not save any model except the final round model
save_period = 0
use_buffer = 0
# The path of training data
data = "train-machine.row%d"
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
eval[test] = "../../demo/regression/machine.txt.test"
# The path of test data
test:data = "../../demo/regression/machine.txt.test"

View File

@ -0,0 +1,21 @@
#!/bin/bash
if [[ $# -ne 1 ]]
then
echo "Usage: nprocess"
exit -1
fi
rm -rf train-machine.row* *.model
k=$1
# make machine data
cd ../../demo/regression/
python mapfeat.py
python mknfold.py machine.txt 1
cd -
# split the lib svm file into k subfiles
python splitrows.py ../../demo/regression/machine.txt.train train-machine $k
# run xgboost mpi
mpirun -n $k ../../xgboost-mpi machine-row.conf dsplit=row

View File

@ -0,0 +1,35 @@
# General Parameters, see comment for each definition
# choose the booster, can be gbtree or gblinear
booster = gbtree
# choose logistic regression loss function for binary classification
objective = binary:logistic
# Tree Booster Parameters
# step size shrinkage
eta = 1.0
# minimum loss reduction required to make a further partition
gamma = 1.0
# minimum sum of instance weight(hessian) needed in a child
min_child_weight = 1
# maximum depth of a tree
max_depth = 3
# Task Parameters
# the number of round to do boosting
num_round = 2
# 0 means do not save any model except the final round model
save_period = 0
use_buffer = 0
# The path of training data %d is the wildcard for the rank of the data
# The idea is each process take a feature matrix with subset of columns
#
data = "train.row%d"
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
eval[test] = "../../demo/data/agaricus.txt.test"
# evaluate on training data as well each round
eval_train = 1
# The path of test data, need to use full data of test, try not use it, or keep an subsampled version
test:data = "../../demo/data/agaricus.txt.test"

View File

@ -0,0 +1,19 @@
#!/bin/bash
if [[ $# -ne 1 ]]
then
echo "Usage: nprocess"
exit -1
fi
rm -rf train.row* *.model
k=$1
# split the lib svm file into k subfiles
python splitrows.py ../../demo/data/agaricus.txt.train train $k
# run xgboost mpi
mpirun -n $k ../../xgboost-mpi mushroom-row.conf dsplit=row nthread=1
# the model can be directly loaded by single machine xgboost solver, as usuall
../../xgboost mushroom-row.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
cat dump.nice.$k.txt

View File

@ -160,13 +160,13 @@ class SerializeReducer {
inline void AllReduce(DType *sendrecvobj, size_t max_n4byte, size_t count) {
buffer.resize(max_n4byte * count);
for (size_t i = 0; i < count; ++i) {
utils::MemoryFixSizeBuffer fs(BeginPtr(buffer) + i * max_n4byte * 4, max_n4byte * 4);
sendrecvobj[i]->Save(fs);
utils::MemoryFixSizeBuffer fs(BeginPtr(buffer) + i * max_n4byte, max_n4byte * 4);
sendrecvobj[i].Save(fs);
}
handle.AllReduce(BeginPtr(buffer), max_n4byte, count);
for (size_t i = 0; i < count; ++i) {
utils::MemoryFixSizeBuffer fs(BeginPtr(buffer) + i * max_n4byte * 4, max_n4byte * 4);
sendrecvobj[i]->Load(fs);
utils::MemoryFixSizeBuffer fs(BeginPtr(buffer) + i * max_n4byte, max_n4byte * 4);
sendrecvobj[i].Load(fs);
}
}
@ -178,12 +178,12 @@ class SerializeReducer {
// temp space
DType tsrc, tdst;
for (int i = 0; i < len_; ++i) {
utils::MemoryFixSizeBuffer fsrc((void*)(src_) + i * nbytes, nbytes);
utils::MemoryFixSizeBuffer fdst(dst_ + i * nbytes, nbytes);
utils::MemoryFixSizeBuffer fsrc((char*)(src_) + i * nbytes, nbytes);
utils::MemoryFixSizeBuffer fdst((char*)(dst_) + i * nbytes, nbytes);
tsrc.Load(fsrc);
tdst.Load(fdst);
// govern const check
tdst.Reduce(static_cast<const DType &>(tsrc));
tdst.Reduce(static_cast<const DType &>(tsrc), nbytes);
fdst.Seek(0);
tdst.Save(fdst);
}

View File

@ -38,6 +38,9 @@ void Bcast(std::string *sendrecv_data, int root) {
ReduceHandle::ReduceHandle(void) : handle(NULL) {}
ReduceHandle::~ReduceHandle(void) {}
int ReduceHandle::TypeSize(const MPI::Datatype &dtype) {
return 0;
}
void ReduceHandle::Init(ReduceFunction redfunc, size_t type_n4bytes, bool commute) {}
void ReduceHandle::AllReduce(void *sendrecvbuf, size_t type_n4bytes, size_t n4byte) {}
} // namespace sync

View File

@ -97,9 +97,12 @@ void ReduceHandle::AllReduce(void *sendrecvbuf, size_t type_n4bytes, size_t coun
utils::Assert(handle != NULL, "must intialize handle to call AllReduce");
MPI::Op *op = reinterpret_cast<MPI::Op*>(handle);
MPI::Datatype *dtype = reinterpret_cast<MPI::Datatype*>(htype);
if (created_type_n4bytes != type_n4bytes || htype == NULL) {
dtype->Free();
if (created_type_n4bytes != type_n4bytes || dtype == NULL) {
if (dtype == NULL) {
dtype = new MPI::Datatype();
} else {
dtype->Free();
}
*dtype = MPI::INT.Create_contiguous(type_n4bytes);
dtype->Commit();
created_type_n4bytes = type_n4bytes;

View File

@ -18,7 +18,7 @@ IUpdater* CreateUpdater(const char *name) {
if (!strcmp(name, "sync")) return new TreeSyncher();
if (!strcmp(name, "refresh")) return new TreeRefresher<GradStats>();
if (!strcmp(name, "grow_colmaker")) return new ColMaker<GradStats>();
//if (!strcmp(name, "grow_histmaker")) return new CQHistMaker<GradStats>();
if (!strcmp(name, "grow_histmaker")) return new CQHistMaker<GradStats>();
//if (!strcmp(name, "grow_skmaker")) return new SketchMaker();
if (!strcmp(name, "distcol")) return new DistColMaker<GradStats>();

View File

@ -507,7 +507,7 @@ class CQHistMaker: public HistMaker<TStats> {
// node statistics
std::vector<TStats> node_stats;
// summary array
std::vector< WXQSketch::SummaryContainer> summary_array;
std::vector<WXQSketch::SummaryContainer> summary_array;
// reducer for summary
sync::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
// per node, per feature sketch
@ -517,6 +517,7 @@ class CQHistMaker: public HistMaker<TStats> {
template<typename TStats>
class QuantileHistMaker: public HistMaker<TStats> {
protected:
typedef utils::WXQuantileSketch<bst_float, bst_float> WXQSketch;
virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
IFMatrix *p_fmat,
const BoosterInfo &info,
@ -624,9 +625,8 @@ class QuantileHistMaker: public HistMaker<TStats> {
}
private:
typedef utils::WXQuantileSketch<bst_float, bst_float> WXQSketch;
// summary array
std::vector< WXQSketch::SummaryContainer> summary_array;
std::vector<WXQSketch::SummaryContainer> summary_array;
// reducer for summary
sync::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
// local temp column data structure

View File

@ -106,7 +106,7 @@ struct MemoryFixSizeBuffer : public ISeekStream {
}
virtual ~MemoryFixSizeBuffer(void) {}
virtual size_t Read(void *ptr, size_t size) {
utils::Assert(curr_ptr_ <= buffer_size_,
utils::Assert(curr_ptr_ + size <= buffer_size_,
"read can not have position excceed buffer length");
size_t nread = std::min(buffer_size_ - curr_ptr_, size);
if (nread != 0) memcpy(ptr, p_buffer_ + curr_ptr_, nread);

View File

@ -519,12 +519,12 @@ class QuantileSketchTemplate {
/*! \brief same as summary, but use STL to backup the space */
struct SummaryContainer : public Summary {
std::vector<Entry> space;
explicit SummaryContainer(void) : Summary(NULL, 0) {
}
explicit SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
this->space = src.space;
this->data = BeginPtr(this->space);
}
SummaryContainer(void) : Summary(NULL, 0) {
}
/*! \brief reserve space for summary */
inline void Reserve(size_t size) {
if (size > space.size()) {
@ -576,13 +576,17 @@ class QuantileSketchTemplate {
/*! \brief save the data structure into stream */
inline void Save(IStream &fo) const {
fo.Write(&(this->size), sizeof(this->size));
fo.Write(data, this->size * sizeof(Entry));
if (this->size != 0) {
fo.Write(this->data, this->size * sizeof(Entry));
}
}
/*! \brief load data structure from input stream */
inline void Load(IStream &fi) {
utils::Check(fi.Read(&this->size, sizeof(this->size)) != 0, "invalid SummaryArray 1");
this->Reserve(this->size);
utils::Check(fi.Read(data, this->size * sizeof(Entry)) != 0, "invalid SummaryArray 2");
this->Reserve(this->size);
if (this->size != 0) {
utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0, "invalid SummaryArray 2");
}
}
};
/*!