add status report

This commit is contained in:
tqchen 2014-12-05 09:49:26 -08:00
parent ab278513ab
commit 7765e2dc55
3 changed files with 15 additions and 3 deletions

View File

@ -144,6 +144,16 @@ class AllreduceBase : public IEngine {
virtual void InitAfterException(void) {
utils::Error("InitAfterException: not implemented");
}
/*!
* \brief report current status to the job tracker
* depending on the job tracker we are in
*/
inline void ReportStatus(void) const {
if (hadoop_mode != 0) {
fprintf(stderr, "reporter:status:Rabit Phase[%03d] Operation %03d\n",
version_number, seq_counter);
}
}
protected:
/*! \brief enumeration of possible returning results from Try functions */
@ -284,6 +294,10 @@ class AllreduceBase : public IEngine {
*/
ReturnType TryBroadcast(void *sendrecvbuf_, size_t size, int root);
//---- data structure related to model ----
// call sequence counter, records how many calls we made so far
// from last call to CheckPoint, LoadCheckPoint
int seq_counter;
// version number of model
int version_number;
// whether the job is running in hadoop
int hadoop_mode;

View File

@ -645,6 +645,7 @@ bool AllreduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) {
// request
ActionSummary req(flag, seqno);
while (true) {
this->ReportStatus();
// action
ActionSummary act = req;
// get the reduced action

View File

@ -402,9 +402,6 @@ class AllreduceRobust : public AllreduceBase {
size_t out_index)
);
//---- recovery data structure ----
// call sequence counter, records how many calls we made so far
// from last call to CheckPoint, LoadCheckPoint
int seq_counter;
// the round of result buffer, used to mode the result
int result_buffer_round;
// result buffer of all reduce