Fix spelling in documents (#6948)
* Update roxygen2 doc. Co-authored-by: fis <jm.yuan@outlook.com>
This commit is contained in:
@@ -11,7 +11,7 @@ and do not need to be manually configured.
|
||||
- The port of rabit tracker
|
||||
* rabit_task_id [automatically detected]
|
||||
- The unique identifier of computing process
|
||||
- When running on hadoop, this is automatically extracted from enviroment variable
|
||||
- When running on Hadoop, this is automatically extracted from environment variable
|
||||
* rabit_reduce_buffer [default = 256MB]
|
||||
- The memory buffer used to store intermediate result of reduction
|
||||
- Format "digits + unit", can be 128M, 1G
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
typedef unsigned long rbt_ulong; // NOLINT(*)
|
||||
|
||||
/*!
|
||||
* \brief intialize the rabit module,
|
||||
* \brief initialize the rabit module,
|
||||
* call this once before using anything
|
||||
* The additional arguments is not necessary.
|
||||
* Usually rabit will detect settings
|
||||
@@ -87,7 +87,7 @@ RABIT_DLL void RabitGetProcessorName(char *out_name,
|
||||
* \brief broadcast an memory region to all others from root
|
||||
*
|
||||
* Example: int a = 1; Broadcast(&a, sizeof(a), root);
|
||||
* \param sendrecv_data the pointer to send or recive buffer,
|
||||
* \param sendrecv_data the pointer to send or receive buffer,
|
||||
* \param size the size of the data
|
||||
* \param root the root of process
|
||||
*/
|
||||
@@ -122,12 +122,12 @@ RABIT_DLL int RabitAllgather(void *sendrecvbuf, size_t total_size,
|
||||
* ...
|
||||
* Allreduce<op::Sum>(&data[0], data.size());
|
||||
* ...
|
||||
* \param sendrecvbuf buffer for both sending and recving data
|
||||
* \param sendrecvbuf buffer for both sending and receiving data
|
||||
* \param count number of elements to be reduced
|
||||
* \param enum_dtype the enumeration of data type, see rabit::engine::mpi::DataType in engine.h of rabit include
|
||||
* \param enum_op the enumeration of operation type, see rabit::engine::mpi::OpType in engine.h of rabit
|
||||
* \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg)
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* will be called by the function before performing Allreduce, to initialize the data in sendrecvbuf_.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
*/
|
||||
|
||||
@@ -102,7 +102,7 @@ inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &
|
||||
}
|
||||
} // namespace op
|
||||
|
||||
// intialize the rabit engine
|
||||
// initialize the rabit engine
|
||||
inline bool Init(int argc, char *argv[]) {
|
||||
return engine::Init(argc, argv);
|
||||
}
|
||||
|
||||
@@ -615,7 +615,7 @@ struct PollHelper {
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief peform poll on the set defined, read, write, exception
|
||||
* \brief perform poll on the set defined, read, write, exception
|
||||
* \param timeout specify timeout in milliseconds(ms) if negative, means poll will block
|
||||
* \return
|
||||
*/
|
||||
|
||||
@@ -40,7 +40,7 @@ AllreduceBase::AllreduceBase() {
|
||||
err_link = nullptr;
|
||||
dmlc_role = "worker";
|
||||
this->SetParam("rabit_reduce_buffer", "256MB");
|
||||
// setup possible enviroment variable of interest
|
||||
// setup possible environment variable of interest
|
||||
// include dmlc support direct variables
|
||||
env_vars.emplace_back("DMLC_TASK_ID");
|
||||
env_vars.emplace_back("DMLC_ROLE");
|
||||
@@ -52,7 +52,7 @@ AllreduceBase::AllreduceBase() {
|
||||
|
||||
// initialization function
|
||||
bool AllreduceBase::Init(int argc, char* argv[]) {
|
||||
// setup from enviroment variables
|
||||
// setup from environment variables
|
||||
// handler to get variables from env
|
||||
for (auto & env_var : env_vars) {
|
||||
const char *value = getenv(env_var.c_str());
|
||||
@@ -294,7 +294,7 @@ bool AllreduceBase::ReConnectLinks(const char *cmd) {
|
||||
rank = newrank;
|
||||
|
||||
if (rank == -1) {
|
||||
LOG(FATAL) << "tracker got overwhelemed and not able to assign correct rank";
|
||||
LOG(FATAL) << "tracker got overwhelmed and not able to assign correct rank";
|
||||
}
|
||||
|
||||
LOG(CONSOLE) << "task " << task_id << " got new rank " << rank;
|
||||
@@ -455,7 +455,7 @@ bool AllreduceBase::ReConnectLinks(const char *cmd) {
|
||||
* It only means the current node get the correct result of Allreduce.
|
||||
* However, it means every node finishes LAST call(instead of this one) of Allreduce/Bcast
|
||||
*
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param type_nbytes the unit number of bytes the type have
|
||||
* \param count number of elements to be reduced
|
||||
* \param reducer reduce function
|
||||
@@ -477,7 +477,7 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_,
|
||||
* \brief perform in-place allreduce, on sendrecvbuf,
|
||||
* this function implements tree-shape reduction
|
||||
*
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param type_nbytes the unit number of bytes the type have
|
||||
* \param count number of elements to be reduced
|
||||
* \param reducer reduce function
|
||||
@@ -513,7 +513,7 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
|
||||
}
|
||||
links[i].ResetSize();
|
||||
}
|
||||
// if no childs, no need to reduce
|
||||
// if no children, no need to reduce
|
||||
if (nlink == static_cast<int>(parent_index != -1)) {
|
||||
size_up_reduce = total_size;
|
||||
}
|
||||
@@ -548,7 +548,7 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
|
||||
}
|
||||
}
|
||||
}
|
||||
// finish runing allreduce
|
||||
// finish running allreduce
|
||||
if (finished) break;
|
||||
// select must return
|
||||
watcher.Poll(timeout_sec);
|
||||
@@ -566,7 +566,7 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
|
||||
}
|
||||
}
|
||||
}
|
||||
// this node have childs, peform reduce
|
||||
// this node have children, perform reduce
|
||||
if (nlink > static_cast<int>(parent_index != -1)) {
|
||||
size_t buffer_size = 0;
|
||||
// do upstream reduce
|
||||
@@ -584,16 +584,16 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
|
||||
max_reduce = (max_reduce / type_nbytes * type_nbytes);
|
||||
|
||||
// if max reduce is less than total size, we reduce multiple times of
|
||||
// eachreduce size
|
||||
// each reduce size
|
||||
if (max_reduce < total_size) {
|
||||
max_reduce = max_reduce - max_reduce % eachreduce;
|
||||
}
|
||||
|
||||
// peform reduce, can be at most two rounds
|
||||
// perform reduce, can be at most two rounds
|
||||
while (size_up_reduce < max_reduce) {
|
||||
// start position
|
||||
size_t start = size_up_reduce % buffer_size;
|
||||
// peform read till end of buffer
|
||||
// perform read till end of buffer
|
||||
size_t nread = std::min(buffer_size - start,
|
||||
max_reduce - size_up_reduce);
|
||||
utils::Assert(nread % type_nbytes == 0, "Allreduce: size check");
|
||||
@@ -659,7 +659,7 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
|
||||
// this is root, can use reduce as most recent point
|
||||
size_down_in = size_up_out = size_up_reduce;
|
||||
}
|
||||
// can pass message down to childs
|
||||
// can pass message down to children
|
||||
for (int i = 0; i < nlink; ++i) {
|
||||
if (i != parent_index && links[i].size_write < size_down_in) {
|
||||
ReturnType ret = links[i].WriteFromArray(sendrecvbuf, size_down_in);
|
||||
@@ -673,7 +673,7 @@ AllreduceBase::TryAllreduceTree(void *sendrecvbuf_,
|
||||
}
|
||||
/*!
|
||||
* \brief broadcast data from root to all nodes, this function can fail,and will return the cause of failure
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param total_size the size of the data to be broadcasted
|
||||
* \param root the root worker id to broadcast the data
|
||||
* \return this function can return kSuccess, kSockError, kGetExcept, see ReturnType for details
|
||||
@@ -851,7 +851,7 @@ AllreduceBase::TryAllgatherRing(void *sendrecvbuf_, size_t total_size,
|
||||
*
|
||||
* Ring-based algorithm
|
||||
*
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param type_nbytes the unit number of bytes the type have
|
||||
* \param count number of elements to be reduced
|
||||
* \param reducer reduce function
|
||||
@@ -952,7 +952,7 @@ AllreduceBase::TryReduceScatterRing(void *sendrecvbuf_,
|
||||
* \brief perform in-place allreduce, on sendrecvbuf
|
||||
* use a ring based algorithm
|
||||
*
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param type_nbytes the unit number of bytes the type have
|
||||
* \param count number of elements to be reduced
|
||||
* \param reducer reduce function
|
||||
|
||||
@@ -111,12 +111,12 @@ class AllreduceBase : public IEngine {
|
||||
/*!
|
||||
* \brief perform in-place allreduce, on sendrecvbuf
|
||||
* this function is NOT thread-safe
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param type_nbytes the unit number of bytes the type have
|
||||
* \param count number of elements to be reduced
|
||||
* \param reducer reduce function
|
||||
* \param prepare_func Lazy preprocessing function, lazy prepare_fun(prepare_arg)
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* will be called by the function before performing Allreduce, to initialize the data in sendrecvbuf_.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
*/
|
||||
@@ -131,7 +131,7 @@ class AllreduceBase : public IEngine {
|
||||
}
|
||||
/*!
|
||||
* \brief broadcast data from root to all nodes
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param size the size of the data to be broadcasted
|
||||
* \param root the root worker id to broadcast the data
|
||||
* \param _file caller file name used to generate unique cache key
|
||||
@@ -146,7 +146,7 @@ class AllreduceBase : public IEngine {
|
||||
/*!
|
||||
* \brief load latest check point
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to gauranttees that global_model
|
||||
* when calling this function, the caller need to guarantees that global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to local model, that is specific to current node/rank
|
||||
* this can be NULL when no local model is needed
|
||||
@@ -174,7 +174,7 @@ class AllreduceBase : public IEngine {
|
||||
* every time we call check point, there is a version number which will increase by one
|
||||
*
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to gauranttees that global_model
|
||||
* when calling this function, the caller need to guarantees that global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to local model, that is specific to current node/rank
|
||||
* this can be NULL when no local state is needed
|
||||
@@ -191,11 +191,11 @@ class AllreduceBase : public IEngine {
|
||||
}
|
||||
/*!
|
||||
* \brief This function can be used to replace CheckPoint for global_model only,
|
||||
* when certain condition is met(see detailed expplaination).
|
||||
* when certain condition is met(see detailed explanation).
|
||||
*
|
||||
* This is a "lazy" checkpoint such that only the pointer to global_model is
|
||||
* remembered and no memory copy is taken. To use this function, the user MUST ensure that:
|
||||
* The global_model must remain unchanged util last call of Allreduce/Broadcast in current version finishs.
|
||||
* The global_model must remain unchanged until the last call of Allreduce/Broadcast in current version finishes.
|
||||
* In another words, global_model model can be changed only between last call of
|
||||
* Allreduce/Broadcast and LazyCheckPoint in current version
|
||||
*
|
||||
@@ -205,7 +205,7 @@ class AllreduceBase : public IEngine {
|
||||
* If user can only changes global_model in code3, then LazyCheckPoint can be used to
|
||||
* improve efficiency of the program.
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to gauranttees that global_model
|
||||
* when calling this function, the caller need to guarantees that global_model
|
||||
* is the same in all nodes
|
||||
* \sa LoadCheckPoint, CheckPoint, VersionNumber
|
||||
*/
|
||||
@@ -405,7 +405,7 @@ class AllreduceBase : public IEngine {
|
||||
* It only means the current node get the correct result of Allreduce.
|
||||
* However, it means every node finishes LAST call(instead of this one) of Allreduce/Bcast
|
||||
*
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param type_nbytes the unit number of bytes the type have
|
||||
* \param count number of elements to be reduced
|
||||
* \param reducer reduce function
|
||||
@@ -429,7 +429,7 @@ class AllreduceBase : public IEngine {
|
||||
* \brief perform in-place allreduce, on sendrecvbuf,
|
||||
* this function implements tree-shape reduction
|
||||
*
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param type_nbytes the unit number of bytes the type have
|
||||
* \param count number of elements to be reduced
|
||||
* \param reducer reduce function
|
||||
@@ -465,7 +465,7 @@ class AllreduceBase : public IEngine {
|
||||
* the k-th segment is defined by [k * step, min((k + 1) * step,count) )
|
||||
* where step = ceil(count / world_size)
|
||||
*
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param type_nbytes the unit number of bytes the type have
|
||||
* \param count number of elements to be reduced
|
||||
* \param reducer reduce function
|
||||
@@ -480,7 +480,7 @@ class AllreduceBase : public IEngine {
|
||||
* \brief perform in-place allreduce, on sendrecvbuf
|
||||
* use a ring based algorithm, reduce-scatter + allgather
|
||||
*
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param type_nbytes the unit number of bytes the type have
|
||||
* \param count number of elements to be reduced
|
||||
* \param reducer reduce function
|
||||
@@ -505,7 +505,7 @@ class AllreduceBase : public IEngine {
|
||||
int seq_counter{0}; // NOLINT
|
||||
// version number of model
|
||||
int version_number {0}; // NOLINT
|
||||
// whether the job is running in hadoop
|
||||
// whether the job is running in Hadoop
|
||||
bool hadoop_mode; // NOLINT
|
||||
//---- local data related to link ----
|
||||
// index of parent link, can be -1, meaning this is root of the tree
|
||||
@@ -540,9 +540,9 @@ class AllreduceBase : public IEngine {
|
||||
size_t reduce_buffer_size; // NOLINT
|
||||
// reduction method
|
||||
int reduce_method; // NOLINT
|
||||
// mininum count of cells to use ring based method
|
||||
// minimum count of cells to use ring based method
|
||||
size_t reduce_ring_mincount; // NOLINT
|
||||
// minimul block size per tree reduce
|
||||
// minimum block size per tree reduce
|
||||
size_t tree_reduce_minsize; // NOLINT
|
||||
// current rank
|
||||
int rank; // NOLINT
|
||||
|
||||
@@ -121,7 +121,7 @@ void ReduceHandle::Allreduce(void *sendrecvbuf,
|
||||
size_t type_nbytes, size_t count,
|
||||
IEngine::PreprocFunction prepare_fun,
|
||||
void *prepare_arg) {
|
||||
utils::Assert(redfunc_ != nullptr, "must intialize handle to call AllReduce");
|
||||
utils::Assert(redfunc_ != nullptr, "must initialize handle to call AllReduce");
|
||||
GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count,
|
||||
redfunc_, prepare_fun, prepare_arg);
|
||||
}
|
||||
|
||||
@@ -223,7 +223,7 @@ void ReduceHandle::Allreduce(void *sendrecvbuf,
|
||||
size_t type_nbytes, size_t count,
|
||||
IEngine::PreprocFunction prepare_fun,
|
||||
void *prepare_arg) {
|
||||
utils::Assert(handle_ != NULL, "must intialize handle to call AllReduce");
|
||||
utils::Assert(handle_ != NULL, "must initialize handle to call AllReduce");
|
||||
MPI::Op *op = reinterpret_cast<MPI::Op*>(handle_);
|
||||
MPI::Datatype *dtype = reinterpret_cast<MPI::Datatype*>(htype_);
|
||||
if (created_type_nbytes_ != type_nbytes || dtype == NULL) {
|
||||
|
||||
Reference in New Issue
Block a user