check in license
This commit is contained in:
parent
cc5efb8d81
commit
3f22596e3c
13
LICENSE
Normal file
13
LICENSE
Normal file
@ -0,0 +1,13 @@
|
||||
Copyright (c) 2014 by Contributors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
@ -20,6 +20,7 @@ AllreduceRobust::AllreduceRobust(void) {
|
||||
result_buffer_round = 1;
|
||||
num_local_replica = 0;
|
||||
seq_counter = 0;
|
||||
local_chkpt_version = 0;
|
||||
}
|
||||
/*! \brief shutdown the engine */
|
||||
void AllreduceRobust::Shutdown(void) {
|
||||
@ -619,16 +620,16 @@ AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) {
|
||||
// check in local data
|
||||
RecoverType role = requester ? kRequestData : kHaveData;
|
||||
ReturnType succ;
|
||||
if (num_local_replica != 0) {
|
||||
if (false) {
|
||||
if (requester) {
|
||||
// clear existing history, if any, before load
|
||||
local_rptr[local_chkpt_version].clear();
|
||||
local_chkpt[local_chkpt_version].clear();
|
||||
}
|
||||
// recover local checkpoint
|
||||
succ = TryRecoverLocalState(&local_rptr[local_chkpt_version],
|
||||
&local_chkpt[local_chkpt_version]);
|
||||
if (succ != kSuccess) return succ;
|
||||
//succ = TryRecoverLocalState(&local_rptr[local_chkpt_version],
|
||||
//m&local_chkpt[local_chkpt_version]);
|
||||
//if (succ != kSuccess) return succ;
|
||||
int nlocal = std::max(static_cast<int>(local_rptr[local_chkpt_version].size()) - 1, 0);
|
||||
// check if everyone is OK
|
||||
unsigned state = 0;
|
||||
@ -817,7 +818,8 @@ AllreduceRobust::TryRecoverLocalState(std::vector<size_t> *p_local_rptr,
|
||||
utils::Assert(chkpt.length() == 0, "local chkpt space inconsistent");
|
||||
}
|
||||
const int n = num_local_replica;
|
||||
{// backward passing, passing state in backward direction of the ring
|
||||
utils::LogPrintf("[%d] backward!!\n", rabit::GetRank());
|
||||
if(false){// backward passing, passing state in backward direction of the ring
|
||||
const int nlocal = static_cast<int>(rptr.size() - 1);
|
||||
utils::Assert(nlocal <= n + 1, "invalid local replica");
|
||||
std::vector<int> msg_back(n + 1);
|
||||
@ -870,6 +872,8 @@ AllreduceRobust::TryRecoverLocalState(std::vector<size_t> *p_local_rptr,
|
||||
rptr.resize(nlocal + 1); chkpt.resize(rptr.back()); return succ;
|
||||
}
|
||||
}
|
||||
|
||||
utils::LogPrintf("[%d] FORward!!\n", rabit::GetRank());
|
||||
{// forward passing, passing state in forward direction of the ring
|
||||
const int nlocal = static_cast<int>(rptr.size() - 1);
|
||||
utils::Assert(nlocal <= n + 1, "invalid local replica");
|
||||
@ -933,6 +937,7 @@ AllreduceRobust::TryRecoverLocalState(std::vector<size_t> *p_local_rptr,
|
||||
rptr.resize(nlocal + 1); chkpt.resize(rptr.back()); return succ;
|
||||
}
|
||||
}
|
||||
utils::LogPrintf("[%d] Finished!!\n", rabit::GetRank());
|
||||
return kSuccess;
|
||||
}
|
||||
/*!
|
||||
|
||||
@ -41,7 +41,7 @@ inline void TestMax(test::Mock &mock, Model *model, Model *local, int ntrial, in
|
||||
}
|
||||
mock.Allreduce<op::Max>(&ndata[0], ndata.size());
|
||||
if (ntrial == iter && rank == 3) {
|
||||
//exit(-1);
|
||||
throw MockException();
|
||||
}
|
||||
for (size_t i = 0; i < ndata.size(); ++i) {
|
||||
float rmax = (i * 1) % z + model->data[i];
|
||||
|
||||
@ -41,7 +41,7 @@ inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) {
|
||||
}
|
||||
mock.Allreduce<op::Max>(&ndata[0], ndata.size());
|
||||
if (ntrial == iter && rank == 3) {
|
||||
exit(-1);
|
||||
// exit(-1);
|
||||
}
|
||||
for (size_t i = 0; i < ndata.size(); ++i) {
|
||||
float rmax = (i * 1) % z + model->data[i];
|
||||
@ -65,7 +65,7 @@ inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) {
|
||||
mock.Allreduce<op::Sum>(&ndata[0], ndata.size());
|
||||
|
||||
if (ntrial == iter && rank == 0) {
|
||||
exit(-1);
|
||||
throw MockException();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < ndata.size(); ++i) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user