get multinode in
This commit is contained in:
parent
7c3a392136
commit
c42ba8d281
@ -28,6 +28,7 @@ Design Choice
|
||||
this will reduce the communication overhead and improve the performance.
|
||||
- One way to do that is limit mpi slot in each machine to be 1, or reserve nthread processors for each process.
|
||||
|
||||
Examples
|
||||
Usage
|
||||
====
|
||||
* [Column-based version](col-split)
|
||||
* [Row-based version](row-split)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
Distributed XGBoost: Column Split Version
|
||||
====
|
||||
* run ```bash run-mushroom.sh```
|
||||
* run ```bash mushroom-row.sh <n-mpi-process>```
|
||||
|
||||
How to Use
|
||||
====
|
||||
|
||||
17
multi-node/row-split/README.md
Normal file
17
multi-node/row-split/README.md
Normal file
@ -0,0 +1,17 @@
|
||||
Distributed XGBoost: Row Split Version
|
||||
====
|
||||
* Mushroom: run ```bash mushroom-row.sh <n-mpi-process>```
|
||||
* Machine: run ```bash machine-row.sh <n-mpi-process>```
|
||||
|
||||
How to Use
|
||||
====
|
||||
* First split the data by rows
|
||||
* In the config, specify data file as containing a wildcard %d, where %d is the rank of the node, each node will load their part of data
|
||||
* Enable ow split mode by ```dsplit=row```
|
||||
|
||||
Notes
|
||||
====
|
||||
* The code is multi-threaded, so you want to run one xgboost-mpi per node
|
||||
* Row-based solver split data by row, each node work on subset of rows, it uses an approximate histogram count algorithm,
|
||||
and will only examine subset of potential split points as opposed to all split points.
|
||||
* ```colsample_bytree``` is not enabled in row split mode so far
|
||||
31
multi-node/row-split/machine-row.conf
Normal file
31
multi-node/row-split/machine-row.conf
Normal file
@ -0,0 +1,31 @@
|
||||
# General Parameters, see comment for each definition
|
||||
# choose the tree booster, can also change to gblinear
|
||||
booster = gbtree
|
||||
# this is the only difference with classification, use reg:linear to do linear classification
|
||||
# when labels are in [0,1] we can also use reg:logistic
|
||||
objective = reg:linear
|
||||
|
||||
# Tree Booster Parameters
|
||||
# step size shrinkage
|
||||
eta = 1.0
|
||||
# minimum loss reduction required to make a further partition
|
||||
gamma = 1.0
|
||||
# minimum sum of instance weight(hessian) needed in a child
|
||||
min_child_weight = 1
|
||||
# maximum depth of a tree
|
||||
max_depth = 3
|
||||
|
||||
# Task parameters
|
||||
# the number of round to do boosting
|
||||
num_round = 2
|
||||
# 0 means do not save any model except the final round model
|
||||
save_period = 0
|
||||
use_buffer = 0
|
||||
|
||||
# The path of training data
|
||||
data = "train-machine.row%d"
|
||||
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
|
||||
eval[test] = "../../demo/regression/machine.txt.test"
|
||||
# The path of test data
|
||||
test:data = "../../demo/regression/machine.txt.test"
|
||||
|
||||
21
multi-node/row-split/machine-row.sh
Executable file
21
multi-node/row-split/machine-row.sh
Executable file
@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
if [[ $# -ne 1 ]]
|
||||
then
|
||||
echo "Usage: nprocess"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
rm -rf train-machine.row* *.model
|
||||
k=$1
|
||||
# make machine data
|
||||
cd ../../demo/regression/
|
||||
python mapfeat.py
|
||||
python mknfold.py machine.txt 1
|
||||
cd -
|
||||
|
||||
# split the lib svm file into k subfiles
|
||||
python splitrows.py ../../demo/regression/machine.txt.train train-machine $k
|
||||
|
||||
# run xgboost mpi
|
||||
mpirun -n $k ../../xgboost-mpi machine-row.conf dsplit=row
|
||||
|
||||
35
multi-node/row-split/mushroom-row.conf
Normal file
35
multi-node/row-split/mushroom-row.conf
Normal file
@ -0,0 +1,35 @@
|
||||
# General Parameters, see comment for each definition
|
||||
# choose the booster, can be gbtree or gblinear
|
||||
booster = gbtree
|
||||
# choose logistic regression loss function for binary classification
|
||||
objective = binary:logistic
|
||||
|
||||
# Tree Booster Parameters
|
||||
# step size shrinkage
|
||||
eta = 1.0
|
||||
# minimum loss reduction required to make a further partition
|
||||
gamma = 1.0
|
||||
# minimum sum of instance weight(hessian) needed in a child
|
||||
min_child_weight = 1
|
||||
# maximum depth of a tree
|
||||
max_depth = 3
|
||||
|
||||
# Task Parameters
|
||||
# the number of round to do boosting
|
||||
num_round = 2
|
||||
# 0 means do not save any model except the final round model
|
||||
save_period = 0
|
||||
use_buffer = 0
|
||||
|
||||
# The path of training data %d is the wildcard for the rank of the data
|
||||
# The idea is each process take a feature matrix with subset of columns
|
||||
#
|
||||
data = "train.row%d"
|
||||
|
||||
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
|
||||
eval[test] = "../../demo/data/agaricus.txt.test"
|
||||
# evaluate on training data as well each round
|
||||
eval_train = 1
|
||||
|
||||
# The path of test data, need to use full data of test, try not use it, or keep an subsampled version
|
||||
test:data = "../../demo/data/agaricus.txt.test"
|
||||
19
multi-node/row-split/mushroom-row.sh
Executable file
19
multi-node/row-split/mushroom-row.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
if [[ $# -ne 1 ]]
|
||||
then
|
||||
echo "Usage: nprocess"
|
||||
exit -1
|
||||
fi
|
||||
|
||||
rm -rf train.row* *.model
|
||||
k=$1
|
||||
|
||||
# split the lib svm file into k subfiles
|
||||
python splitrows.py ../../demo/data/agaricus.txt.train train $k
|
||||
|
||||
# run xgboost mpi
|
||||
mpirun -n $k ../../xgboost-mpi mushroom-row.conf dsplit=row nthread=1
|
||||
|
||||
# the model can be directly loaded by single machine xgboost solver, as usuall
|
||||
../../xgboost mushroom-row.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
|
||||
cat dump.nice.$k.txt
|
||||
@ -160,13 +160,13 @@ class SerializeReducer {
|
||||
inline void AllReduce(DType *sendrecvobj, size_t max_n4byte, size_t count) {
|
||||
buffer.resize(max_n4byte * count);
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
utils::MemoryFixSizeBuffer fs(BeginPtr(buffer) + i * max_n4byte * 4, max_n4byte * 4);
|
||||
sendrecvobj[i]->Save(fs);
|
||||
utils::MemoryFixSizeBuffer fs(BeginPtr(buffer) + i * max_n4byte, max_n4byte * 4);
|
||||
sendrecvobj[i].Save(fs);
|
||||
}
|
||||
handle.AllReduce(BeginPtr(buffer), max_n4byte, count);
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
utils::MemoryFixSizeBuffer fs(BeginPtr(buffer) + i * max_n4byte * 4, max_n4byte * 4);
|
||||
sendrecvobj[i]->Load(fs);
|
||||
utils::MemoryFixSizeBuffer fs(BeginPtr(buffer) + i * max_n4byte, max_n4byte * 4);
|
||||
sendrecvobj[i].Load(fs);
|
||||
}
|
||||
}
|
||||
|
||||
@ -178,12 +178,12 @@ class SerializeReducer {
|
||||
// temp space
|
||||
DType tsrc, tdst;
|
||||
for (int i = 0; i < len_; ++i) {
|
||||
utils::MemoryFixSizeBuffer fsrc((void*)(src_) + i * nbytes, nbytes);
|
||||
utils::MemoryFixSizeBuffer fdst(dst_ + i * nbytes, nbytes);
|
||||
utils::MemoryFixSizeBuffer fsrc((char*)(src_) + i * nbytes, nbytes);
|
||||
utils::MemoryFixSizeBuffer fdst((char*)(dst_) + i * nbytes, nbytes);
|
||||
tsrc.Load(fsrc);
|
||||
tdst.Load(fdst);
|
||||
// govern const check
|
||||
tdst.Reduce(static_cast<const DType &>(tsrc));
|
||||
tdst.Reduce(static_cast<const DType &>(tsrc), nbytes);
|
||||
fdst.Seek(0);
|
||||
tdst.Save(fdst);
|
||||
}
|
||||
|
||||
@ -38,6 +38,9 @@ void Bcast(std::string *sendrecv_data, int root) {
|
||||
|
||||
ReduceHandle::ReduceHandle(void) : handle(NULL) {}
|
||||
ReduceHandle::~ReduceHandle(void) {}
|
||||
int ReduceHandle::TypeSize(const MPI::Datatype &dtype) {
|
||||
return 0;
|
||||
}
|
||||
void ReduceHandle::Init(ReduceFunction redfunc, size_t type_n4bytes, bool commute) {}
|
||||
void ReduceHandle::AllReduce(void *sendrecvbuf, size_t type_n4bytes, size_t n4byte) {}
|
||||
} // namespace sync
|
||||
|
||||
@ -97,9 +97,12 @@ void ReduceHandle::AllReduce(void *sendrecvbuf, size_t type_n4bytes, size_t coun
|
||||
utils::Assert(handle != NULL, "must intialize handle to call AllReduce");
|
||||
MPI::Op *op = reinterpret_cast<MPI::Op*>(handle);
|
||||
MPI::Datatype *dtype = reinterpret_cast<MPI::Datatype*>(htype);
|
||||
|
||||
if (created_type_n4bytes != type_n4bytes || htype == NULL) {
|
||||
dtype->Free();
|
||||
if (created_type_n4bytes != type_n4bytes || dtype == NULL) {
|
||||
if (dtype == NULL) {
|
||||
dtype = new MPI::Datatype();
|
||||
} else {
|
||||
dtype->Free();
|
||||
}
|
||||
*dtype = MPI::INT.Create_contiguous(type_n4bytes);
|
||||
dtype->Commit();
|
||||
created_type_n4bytes = type_n4bytes;
|
||||
|
||||
@ -18,7 +18,7 @@ IUpdater* CreateUpdater(const char *name) {
|
||||
if (!strcmp(name, "sync")) return new TreeSyncher();
|
||||
if (!strcmp(name, "refresh")) return new TreeRefresher<GradStats>();
|
||||
if (!strcmp(name, "grow_colmaker")) return new ColMaker<GradStats>();
|
||||
//if (!strcmp(name, "grow_histmaker")) return new CQHistMaker<GradStats>();
|
||||
if (!strcmp(name, "grow_histmaker")) return new CQHistMaker<GradStats>();
|
||||
//if (!strcmp(name, "grow_skmaker")) return new SketchMaker();
|
||||
if (!strcmp(name, "distcol")) return new DistColMaker<GradStats>();
|
||||
|
||||
|
||||
@ -507,7 +507,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
||||
// node statistics
|
||||
std::vector<TStats> node_stats;
|
||||
// summary array
|
||||
std::vector< WXQSketch::SummaryContainer> summary_array;
|
||||
std::vector<WXQSketch::SummaryContainer> summary_array;
|
||||
// reducer for summary
|
||||
sync::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
|
||||
// per node, per feature sketch
|
||||
@ -517,6 +517,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
||||
template<typename TStats>
|
||||
class QuantileHistMaker: public HistMaker<TStats> {
|
||||
protected:
|
||||
typedef utils::WXQuantileSketch<bst_float, bst_float> WXQSketch;
|
||||
virtual void ResetPosAndPropose(const std::vector<bst_gpair> &gpair,
|
||||
IFMatrix *p_fmat,
|
||||
const BoosterInfo &info,
|
||||
@ -624,9 +625,8 @@ class QuantileHistMaker: public HistMaker<TStats> {
|
||||
}
|
||||
|
||||
private:
|
||||
typedef utils::WXQuantileSketch<bst_float, bst_float> WXQSketch;
|
||||
// summary array
|
||||
std::vector< WXQSketch::SummaryContainer> summary_array;
|
||||
std::vector<WXQSketch::SummaryContainer> summary_array;
|
||||
// reducer for summary
|
||||
sync::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
|
||||
// local temp column data structure
|
||||
|
||||
@ -106,7 +106,7 @@ struct MemoryFixSizeBuffer : public ISeekStream {
|
||||
}
|
||||
virtual ~MemoryFixSizeBuffer(void) {}
|
||||
virtual size_t Read(void *ptr, size_t size) {
|
||||
utils::Assert(curr_ptr_ <= buffer_size_,
|
||||
utils::Assert(curr_ptr_ + size <= buffer_size_,
|
||||
"read can not have position excceed buffer length");
|
||||
size_t nread = std::min(buffer_size_ - curr_ptr_, size);
|
||||
if (nread != 0) memcpy(ptr, p_buffer_ + curr_ptr_, nread);
|
||||
|
||||
@ -519,12 +519,12 @@ class QuantileSketchTemplate {
|
||||
/*! \brief same as summary, but use STL to backup the space */
|
||||
struct SummaryContainer : public Summary {
|
||||
std::vector<Entry> space;
|
||||
explicit SummaryContainer(void) : Summary(NULL, 0) {
|
||||
}
|
||||
explicit SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
|
||||
SummaryContainer(const SummaryContainer &src) : Summary(NULL, src.size) {
|
||||
this->space = src.space;
|
||||
this->data = BeginPtr(this->space);
|
||||
}
|
||||
SummaryContainer(void) : Summary(NULL, 0) {
|
||||
}
|
||||
/*! \brief reserve space for summary */
|
||||
inline void Reserve(size_t size) {
|
||||
if (size > space.size()) {
|
||||
@ -576,13 +576,17 @@ class QuantileSketchTemplate {
|
||||
/*! \brief save the data structure into stream */
|
||||
inline void Save(IStream &fo) const {
|
||||
fo.Write(&(this->size), sizeof(this->size));
|
||||
fo.Write(data, this->size * sizeof(Entry));
|
||||
if (this->size != 0) {
|
||||
fo.Write(this->data, this->size * sizeof(Entry));
|
||||
}
|
||||
}
|
||||
/*! \brief load data structure from input stream */
|
||||
inline void Load(IStream &fi) {
|
||||
utils::Check(fi.Read(&this->size, sizeof(this->size)) != 0, "invalid SummaryArray 1");
|
||||
this->Reserve(this->size);
|
||||
utils::Check(fi.Read(data, this->size * sizeof(Entry)) != 0, "invalid SummaryArray 2");
|
||||
if (this->size != 0) {
|
||||
utils::Check(fi.Read(this->data, this->size * sizeof(Entry)) != 0, "invalid SummaryArray 2");
|
||||
}
|
||||
}
|
||||
};
|
||||
/*!
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user