Fix compiler warnings. (#8022)
- Remove/fix unused parameters - Remove deprecated code in rabit. - Update dmlc-core.
This commit is contained in:
@@ -135,38 +135,6 @@ RABIT_DLL int RabitAllreduce(void *sendrecvbuf, size_t count, int enum_dtype,
|
||||
int enum_op, void (*prepare_fun)(void *arg),
|
||||
void *prepare_arg);
|
||||
|
||||
/*!
|
||||
* \brief load latest check point
|
||||
* \param out_global_model hold output of serialized global_model
|
||||
* \param out_global_len the output length of serialized global model
|
||||
* \param out_local_model hold output of serialized local_model, can be NULL
|
||||
* \param out_local_len the output length of serialized local model, can be NULL
|
||||
*
|
||||
* \return the version number of check point loaded
|
||||
* if returned version == 0, this means no model has been CheckPointed
|
||||
* nothing will be touched
|
||||
*/
|
||||
RABIT_DLL int RabitLoadCheckPoint(char **out_global_model,
|
||||
rbt_ulong *out_global_len,
|
||||
char **out_local_model,
|
||||
rbt_ulong *out_local_len);
|
||||
/*!
|
||||
* \brief checkpoint the model, meaning we finished a stage of execution
|
||||
* every time we call check point, there is a version number which will increase by one
|
||||
*
|
||||
* \param global_model hold content of serialized global_model
|
||||
* \param global_len the content length of serialized global model
|
||||
* \param local_model hold content of serialized local_model, can be NULL
|
||||
* \param local_len the content length of serialized local model, can be NULL
|
||||
*
|
||||
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
||||
* bring replication cost in CheckPoint function. global_model do not need explicit replication.
|
||||
* So only CheckPoint with global_model if possible
|
||||
*/
|
||||
RABIT_DLL void RabitCheckPoint(const char *global_model,
|
||||
rbt_ulong global_len,
|
||||
const char *local_model,
|
||||
rbt_ulong local_len);
|
||||
/*!
|
||||
* \return version number of current stored model,
|
||||
* which means how many calls to CheckPoint we made so far
|
||||
|
||||
@@ -87,68 +87,13 @@ class IEngine {
|
||||
*/
|
||||
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) = 0;
|
||||
/*!
|
||||
* \brief loads the latest check point
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to the local model that is specific to current node/rank
|
||||
* this can be NULL when no local model is needed
|
||||
*
|
||||
* \return the version number of the model loaded
|
||||
* if returned version == 0, this means no model has been CheckPointed
|
||||
* the p_model is not touched, users should do necessary initialization by themselves
|
||||
*
|
||||
* Common usage example:
|
||||
* int iter = rabit::LoadCheckPoint(&model);
|
||||
* if (iter == 0) model.InitParameters();
|
||||
* for (i = iter; i < max_iter; ++i) {
|
||||
* do many things, include allreduce
|
||||
* rabit::CheckPoint(model);
|
||||
* }
|
||||
*
|
||||
* \sa CheckPoint, VersionNumber
|
||||
* deprecated
|
||||
*/
|
||||
virtual int LoadCheckPoint(Serializable *global_model,
|
||||
Serializable *local_model = nullptr) = 0;
|
||||
virtual int LoadCheckPoint() = 0;
|
||||
/*!
|
||||
* \brief checkpoints the model, meaning a stage of execution was finished
|
||||
* every time we call check point, a version number increases by ones
|
||||
*
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \param local_model pointer to the local model that is specific to current node/rank
|
||||
* this can be NULL when no local state is needed
|
||||
*
|
||||
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
||||
* bring replication cost in CheckPoint function. global_model does not need explicit replication.
|
||||
* So, only CheckPoint with global_model if possible
|
||||
*
|
||||
* \sa LoadCheckPoint, VersionNumber
|
||||
* \brief Increase internal version number. Deprecated.
|
||||
*/
|
||||
virtual void CheckPoint(const Serializable *global_model,
|
||||
const Serializable *local_model = nullptr) = 0;
|
||||
/*!
|
||||
* \brief This function can be used to replace CheckPoint for global_model only,
|
||||
* when certain condition is met (see detailed explanation).
|
||||
*
|
||||
* This is a "lazy" checkpoint such that only the pointer to global_model is
|
||||
* remembered and no memory copy is taken. To use this function, the user MUST ensure that:
|
||||
* The global_model must remain unchanged until the last call of Allreduce/Broadcast in the current version finishes.
|
||||
* In other words, global_model can be changed only between the last call of
|
||||
* Allreduce/Broadcast and LazyCheckPoint in the current version
|
||||
*
|
||||
* For example, suppose the calling sequence is:
|
||||
* LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint
|
||||
*
|
||||
* If the user can only change global_model in code3, then LazyCheckPoint can be used to
|
||||
* improve the efficiency of the program.
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that global_model
|
||||
* is the same in every node
|
||||
* \sa LoadCheckPoint, CheckPoint, VersionNumber
|
||||
*/
|
||||
virtual void LazyCheckPoint(const Serializable *global_model) = 0;
|
||||
virtual void CheckPoint() = 0;
|
||||
/*!
|
||||
* \return version number of the current stored model,
|
||||
* which means how many calls to CheckPoint we made so far
|
||||
|
||||
@@ -92,10 +92,10 @@ struct BitOR {
|
||||
dst |= src;
|
||||
}
|
||||
};
|
||||
template<typename OP, typename DType>
|
||||
inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) {
|
||||
const DType* src = static_cast<const DType*>(src_);
|
||||
DType* dst = (DType*)dst_; // NOLINT(*)
|
||||
template <typename OP, typename DType>
|
||||
inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &) {
|
||||
const DType *src = static_cast<const DType *>(src_);
|
||||
DType *dst = (DType *)dst_; // NOLINT(*)
|
||||
for (int i = 0; i < len; i++) {
|
||||
OP::Reduce(dst[i], src[i]);
|
||||
}
|
||||
@@ -207,20 +207,11 @@ inline void TrackerPrintf(const char *fmt, ...) {
|
||||
}
|
||||
|
||||
#endif // RABIT_STRICT_CXX98_
|
||||
// load latest check point
|
||||
inline int LoadCheckPoint(Serializable *global_model,
|
||||
Serializable *local_model) {
|
||||
return engine::GetEngine()->LoadCheckPoint(global_model, local_model);
|
||||
}
|
||||
// checkpoint the model, meaning we finished a stage of execution
|
||||
inline void CheckPoint(const Serializable *global_model,
|
||||
const Serializable *local_model) {
|
||||
engine::GetEngine()->CheckPoint(global_model, local_model);
|
||||
}
|
||||
// lazy checkpoint the model, only remember the pointer to global_model
|
||||
inline void LazyCheckPoint(const Serializable *global_model) {
|
||||
engine::GetEngine()->LazyCheckPoint(global_model);
|
||||
}
|
||||
|
||||
// deprecated, planned for removal after checkpoing from JVM package is removed.
|
||||
inline int LoadCheckPoint() { return engine::GetEngine()->LoadCheckPoint(); }
|
||||
// deprecated, increase internal version number
|
||||
inline void CheckPoint() { engine::GetEngine()->CheckPoint(); }
|
||||
// return the version number of currently stored model
|
||||
inline int VersionNumber() {
|
||||
return engine::GetEngine()->VersionNumber();
|
||||
|
||||
@@ -397,7 +397,7 @@ class TCPSocket : public Socket{
|
||||
*/
|
||||
inline void Create(int af = PF_INET) {
|
||||
#if !IS_MINGW()
|
||||
sockfd = socket(PF_INET, SOCK_STREAM, 0);
|
||||
sockfd = socket(af, SOCK_STREAM, 0);
|
||||
if (sockfd == kInvalidSocket) {
|
||||
Socket::Error("Create");
|
||||
}
|
||||
|
||||
@@ -205,69 +205,16 @@ template<typename OP, typename DType>
|
||||
inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||
std::function<void()> prepare_fun);
|
||||
#endif // C++11
|
||||
|
||||
/*!
|
||||
* \brief loads the latest check point
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \param local_model pointer to the local model that is specific to the current node/rank
|
||||
* this can be NULL when no local model is needed
|
||||
*
|
||||
* \return the version number of the check point loaded
|
||||
* if returned version == 0, this means no model has been CheckPointed
|
||||
* the p_model is not touched, users should do the necessary initialization by themselves
|
||||
*
|
||||
* \code{.cpp}
|
||||
* // Example usage code of LoadCheckPoint
|
||||
* int iter = rabit::LoadCheckPoint(&model);
|
||||
* if (iter == 0) model.InitParameters();
|
||||
* for (i = iter; i < max_iter; ++i) {
|
||||
* // do many things, include allreduce
|
||||
* rabit::CheckPoint(model);
|
||||
* }
|
||||
* \endcode
|
||||
* \sa CheckPoint, VersionNumber
|
||||
* \brief deprecated, planned for removal after checkpoing from JVM package is removed.
|
||||
*/
|
||||
inline int LoadCheckPoint(Serializable *global_model,
|
||||
Serializable *local_model = nullptr);
|
||||
inline int LoadCheckPoint();
|
||||
/*!
|
||||
* \brief checkpoints the model, meaning a stage of execution has finished.
|
||||
* every time we call check point, a version number will be increased by one
|
||||
*
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \param local_model pointer to the local model that is specific to the current node/rank
|
||||
* this can be NULL when no local state is needed
|
||||
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
||||
* bring replication cost in the CheckPoint function. global_model does not need explicit replication.
|
||||
* So, only CheckPoint with the global_model if possible
|
||||
* \sa LoadCheckPoint, VersionNumber
|
||||
*/
|
||||
inline void CheckPoint(const Serializable *global_model,
|
||||
const Serializable *local_model = nullptr);
|
||||
/*!
|
||||
* \brief This function can be used to replace CheckPoint for global_model only,
|
||||
* when certain condition is met (see detailed explanation).
|
||||
*
|
||||
* This is a "lazy" checkpoint such that only the pointer to the global_model is
|
||||
* remembered and no memory copy is taken. To use this function, the user MUST ensure that:
|
||||
* The global_model must remain unchanged until the last call of Allreduce/Broadcast in the current version finishes.
|
||||
* In other words, the global_model model can be changed only between the last call of
|
||||
* Allreduce/Broadcast and LazyCheckPoint, both in the same version
|
||||
*
|
||||
* For example, suppose the calling sequence is:
|
||||
* LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint/(or can be CheckPoint)
|
||||
*
|
||||
* Then the user MUST only change the global_model in code3.
|
||||
*
|
||||
* The use of LazyCheckPoint instead of CheckPoint will improve the efficiency of the program.
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller needs to guarantee that the global_model
|
||||
* is the same in every node
|
||||
* \sa LoadCheckPoint, CheckPoint, VersionNumber
|
||||
* \brief deprecated, planned for removal after checkpoing from JVM package is removed.
|
||||
*/
|
||||
inline void LazyCheckPoint(const Serializable *global_model);
|
||||
inline void CheckPoint();
|
||||
|
||||
/*!
|
||||
* \return version number of the current stored model,
|
||||
* which means how many calls to CheckPoint we made so far
|
||||
|
||||
@@ -144,74 +144,13 @@ class AllreduceBase : public IEngine {
|
||||
"Broadcast failed");
|
||||
}
|
||||
/*!
|
||||
* \brief load latest check point
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to guarantees that global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to local model, that is specific to current node/rank
|
||||
* this can be NULL when no local model is needed
|
||||
*
|
||||
* \return the version number of check point loaded
|
||||
* if returned version == 0, this means no model has been CheckPointed
|
||||
* the p_model is not touched, user should do necessary initialization by themselves
|
||||
*
|
||||
* Common usage example:
|
||||
* int iter = rabit::LoadCheckPoint(&model);
|
||||
* if (iter == 0) model.InitParameters();
|
||||
* for (i = iter; i < max_iter; ++i) {
|
||||
* do many things, include allreduce
|
||||
* rabit::CheckPoint(model);
|
||||
* }
|
||||
*
|
||||
* \brief deprecated
|
||||
* \sa CheckPoint, VersionNumber
|
||||
*/
|
||||
int LoadCheckPoint(Serializable *global_model,
|
||||
Serializable *local_model = nullptr) override {
|
||||
return 0;
|
||||
}
|
||||
/*!
|
||||
* \brief checkpoint the model, meaning we finished a stage of execution
|
||||
* every time we call check point, there is a version number which will increase by one
|
||||
*
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to guarantees that global_model
|
||||
* is the same in all nodes
|
||||
* \param local_model pointer to local model, that is specific to current node/rank
|
||||
* this can be NULL when no local state is needed
|
||||
*
|
||||
* NOTE: local_model requires explicit replication of the model for fault-tolerance, which will
|
||||
* bring replication cost in CheckPoint function. global_model do not need explicit replication.
|
||||
* So only CheckPoint with global_model if possible
|
||||
*
|
||||
* \sa LoadCheckPoint, VersionNumber
|
||||
*/
|
||||
void CheckPoint(const Serializable *global_model,
|
||||
const Serializable *local_model = nullptr) override {
|
||||
version_number += 1;
|
||||
}
|
||||
/*!
|
||||
* \brief This function can be used to replace CheckPoint for global_model only,
|
||||
* when certain condition is met(see detailed explanation).
|
||||
*
|
||||
* This is a "lazy" checkpoint such that only the pointer to global_model is
|
||||
* remembered and no memory copy is taken. To use this function, the user MUST ensure that:
|
||||
* The global_model must remain unchanged until the last call of Allreduce/Broadcast in current version finishes.
|
||||
* In another words, global_model model can be changed only between last call of
|
||||
* Allreduce/Broadcast and LazyCheckPoint in current version
|
||||
*
|
||||
* For example, suppose the calling sequence is:
|
||||
* LazyCheckPoint, code1, Allreduce, code2, Broadcast, code3, LazyCheckPoint
|
||||
*
|
||||
* If user can only changes global_model in code3, then LazyCheckPoint can be used to
|
||||
* improve efficiency of the program.
|
||||
* \param global_model pointer to the globally shared model/state
|
||||
* when calling this function, the caller need to guarantees that global_model
|
||||
* is the same in all nodes
|
||||
* \sa LoadCheckPoint, CheckPoint, VersionNumber
|
||||
*/
|
||||
void LazyCheckPoint(const Serializable *global_model) override {
|
||||
version_number += 1;
|
||||
}
|
||||
int LoadCheckPoint() override { return 0; }
|
||||
|
||||
// deprecated, increase internal version number
|
||||
void CheckPoint() override { version_number += 1; }
|
||||
/*!
|
||||
* \return version number of current stored model,
|
||||
* which means how many calls to CheckPoint we made so far
|
||||
|
||||
@@ -65,31 +65,21 @@ class AllreduceMock : public AllreduceBase {
|
||||
this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "Broadcast");
|
||||
AllreduceBase::Broadcast(sendrecvbuf_, total_size, root);
|
||||
}
|
||||
int LoadCheckPoint(Serializable *global_model,
|
||||
Serializable *local_model) override {
|
||||
int LoadCheckPoint() override {
|
||||
tsum_allreduce_ = 0.0;
|
||||
tsum_allgather_ = 0.0;
|
||||
time_checkpoint_ = dmlc::GetTime();
|
||||
if (force_local_ == 0) {
|
||||
return AllreduceBase::LoadCheckPoint(global_model, local_model);
|
||||
return AllreduceBase::LoadCheckPoint();
|
||||
} else {
|
||||
DummySerializer dum;
|
||||
ComboSerializer com(global_model, local_model);
|
||||
return AllreduceBase::LoadCheckPoint(&dum, &com);
|
||||
return AllreduceBase::LoadCheckPoint();
|
||||
}
|
||||
}
|
||||
void CheckPoint(const Serializable *global_model,
|
||||
const Serializable *local_model) override {
|
||||
void CheckPoint() override {
|
||||
this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "CheckPoint");
|
||||
double tstart = dmlc::GetTime();
|
||||
double tbet_chkpt = tstart - time_checkpoint_;
|
||||
if (force_local_ == 0) {
|
||||
AllreduceBase::CheckPoint(global_model, local_model);
|
||||
} else {
|
||||
DummySerializer dum;
|
||||
ComboSerializer com(global_model, local_model);
|
||||
AllreduceBase::CheckPoint(&dum, &com);
|
||||
}
|
||||
AllreduceBase::CheckPoint();
|
||||
time_checkpoint_ = dmlc::GetTime();
|
||||
double tcost = dmlc::GetTime() - tstart;
|
||||
if (report_stats_ != 0 && rank == 0) {
|
||||
@@ -105,11 +95,6 @@ class AllreduceMock : public AllreduceBase {
|
||||
tsum_allgather_ = 0.0;
|
||||
}
|
||||
|
||||
void LazyCheckPoint(const Serializable *global_model) override {
|
||||
this->Verify(MockKey(rank, version_number, seq_counter, num_trial_), "LazyCheckPoint");
|
||||
AllreduceBase::LazyCheckPoint(global_model);
|
||||
}
|
||||
|
||||
protected:
|
||||
// force checkpoint to local
|
||||
int force_local_;
|
||||
@@ -122,30 +107,6 @@ class AllreduceMock : public AllreduceBase {
|
||||
double time_checkpoint_;
|
||||
|
||||
private:
|
||||
struct DummySerializer : public Serializable {
|
||||
void Load(Stream *fi) override {}
|
||||
void Save(Stream *fo) const override {}
|
||||
};
|
||||
struct ComboSerializer : public Serializable {
|
||||
Serializable *lhs;
|
||||
Serializable *rhs;
|
||||
const Serializable *c_lhs;
|
||||
const Serializable *c_rhs;
|
||||
ComboSerializer(Serializable *lhs, Serializable *rhs)
|
||||
: lhs(lhs), rhs(rhs), c_lhs(lhs), c_rhs(rhs) {
|
||||
}
|
||||
ComboSerializer(const Serializable *lhs, const Serializable *rhs)
|
||||
: lhs(nullptr), rhs(nullptr), c_lhs(lhs), c_rhs(rhs) {
|
||||
}
|
||||
void Load(Stream *fi) override {
|
||||
if (lhs != nullptr) lhs->Load(fi);
|
||||
if (rhs != nullptr) rhs->Load(fi);
|
||||
}
|
||||
void Save(Stream *fo) const override {
|
||||
if (c_lhs != nullptr) c_lhs->Save(fo);
|
||||
if (c_rhs != nullptr) c_rhs->Save(fo);
|
||||
}
|
||||
};
|
||||
// key to identify the mock stage
|
||||
struct MockKey {
|
||||
int rank;
|
||||
|
||||
@@ -100,8 +100,7 @@ void Allreduce_(void *sendrecvbuf, // NOLINT
|
||||
mpi::OpType ,
|
||||
IEngine::PreprocFunction prepare_fun,
|
||||
void *prepare_arg) {
|
||||
GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun,
|
||||
prepare_arg);
|
||||
GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun, prepare_arg);
|
||||
}
|
||||
} // namespace engine
|
||||
} // namespace rabit
|
||||
|
||||
@@ -120,6 +120,7 @@ void Allreduce(void *sendrecvbuf,
|
||||
default: utils::Error("unknown enum_op");
|
||||
}
|
||||
}
|
||||
|
||||
void Allgather(void *sendrecvbuf_,
|
||||
size_t total_size,
|
||||
size_t beginIndex,
|
||||
@@ -298,46 +299,6 @@ RABIT_DLL int RabitAllreduce(void *sendrecvbuf, size_t count, int enum_dtype,
|
||||
API_END()
|
||||
}
|
||||
|
||||
RABIT_DLL int RabitLoadCheckPoint(char **out_global_model,
|
||||
rbt_ulong *out_global_len,
|
||||
char **out_local_model,
|
||||
rbt_ulong *out_local_len) {
|
||||
// no-op as XGBoost 1.3
|
||||
using rabit::BeginPtr;
|
||||
using namespace rabit::c_api; // NOLINT(*)
|
||||
static std::string global_buffer;
|
||||
static std::string local_buffer;
|
||||
|
||||
ReadWrapper sg(&global_buffer);
|
||||
ReadWrapper sl(&local_buffer);
|
||||
int version;
|
||||
|
||||
if (out_local_model == nullptr) {
|
||||
version = rabit::LoadCheckPoint(&sg, nullptr);
|
||||
*out_global_model = BeginPtr(global_buffer);
|
||||
*out_global_len = static_cast<rbt_ulong>(global_buffer.length());
|
||||
} else {
|
||||
version = rabit::LoadCheckPoint(&sg, &sl);
|
||||
*out_global_model = BeginPtr(global_buffer);
|
||||
*out_global_len = static_cast<rbt_ulong>(global_buffer.length());
|
||||
*out_local_model = BeginPtr(local_buffer);
|
||||
*out_local_len = static_cast<rbt_ulong>(local_buffer.length());
|
||||
}
|
||||
return version;
|
||||
}
|
||||
|
||||
RABIT_DLL void RabitCheckPoint(const char *global_model, rbt_ulong global_len,
|
||||
const char *local_model, rbt_ulong local_len) {
|
||||
using namespace rabit::c_api; // NOLINT(*)
|
||||
WriteWrapper sg(global_model, global_len);
|
||||
WriteWrapper sl(local_model, local_len);
|
||||
if (local_model == nullptr) {
|
||||
rabit::CheckPoint(&sg, nullptr);
|
||||
} else {
|
||||
rabit::CheckPoint(&sg, &sl);
|
||||
}
|
||||
}
|
||||
|
||||
RABIT_DLL int RabitVersionNumber() {
|
||||
return rabit::VersionNumber();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user