check in license

This commit is contained in:
tqchen 2014-12-09 20:57:54 -08:00
parent cc5efb8d81
commit 3f22596e3c
4 changed files with 26 additions and 8 deletions

13
LICENSE Normal file
View File

@ -0,0 +1,13 @@
Copyright (c) 2014 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -20,6 +20,7 @@ AllreduceRobust::AllreduceRobust(void) {
result_buffer_round = 1;
num_local_replica = 0;
seq_counter = 0;
local_chkpt_version = 0;
}
/*! \brief shutdown the engine */
void AllreduceRobust::Shutdown(void) {
@ -619,16 +620,16 @@ AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) {
// check in local data
RecoverType role = requester ? kRequestData : kHaveData;
ReturnType succ;
if (num_local_replica != 0) {
if (false) {
if (requester) {
// clear existing history, if any, before load
local_rptr[local_chkpt_version].clear();
local_chkpt[local_chkpt_version].clear();
}
// recover local checkpoint
succ = TryRecoverLocalState(&local_rptr[local_chkpt_version],
&local_chkpt[local_chkpt_version]);
if (succ != kSuccess) return succ;
//succ = TryRecoverLocalState(&local_rptr[local_chkpt_version],
//m&local_chkpt[local_chkpt_version]);
//if (succ != kSuccess) return succ;
int nlocal = std::max(static_cast<int>(local_rptr[local_chkpt_version].size()) - 1, 0);
// check if everyone is OK
unsigned state = 0;
@ -817,7 +818,8 @@ AllreduceRobust::TryRecoverLocalState(std::vector<size_t> *p_local_rptr,
utils::Assert(chkpt.length() == 0, "local chkpt space inconsistent");
}
const int n = num_local_replica;
{// backward passing, passing state in backward direction of the ring
utils::LogPrintf("[%d] backward!!\n", rabit::GetRank());
if(false){// backward passing, passing state in backward direction of the ring
const int nlocal = static_cast<int>(rptr.size() - 1);
utils::Assert(nlocal <= n + 1, "invalid local replica");
std::vector<int> msg_back(n + 1);
@ -870,6 +872,8 @@ AllreduceRobust::TryRecoverLocalState(std::vector<size_t> *p_local_rptr,
rptr.resize(nlocal + 1); chkpt.resize(rptr.back()); return succ;
}
}
utils::LogPrintf("[%d] FORward!!\n", rabit::GetRank());
{// forward passing, passing state in forward direction of the ring
const int nlocal = static_cast<int>(rptr.size() - 1);
utils::Assert(nlocal <= n + 1, "invalid local replica");
@ -933,6 +937,7 @@ AllreduceRobust::TryRecoverLocalState(std::vector<size_t> *p_local_rptr,
rptr.resize(nlocal + 1); chkpt.resize(rptr.back()); return succ;
}
}
utils::LogPrintf("[%d] Finished!!\n", rabit::GetRank());
return kSuccess;
}
/*!

View File

@ -41,7 +41,7 @@ inline void TestMax(test::Mock &mock, Model *model, Model *local, int ntrial, in
}
mock.Allreduce<op::Max>(&ndata[0], ndata.size());
if (ntrial == iter && rank == 3) {
//exit(-1);
throw MockException();
}
for (size_t i = 0; i < ndata.size(); ++i) {
float rmax = (i * 1) % z + model->data[i];

View File

@ -41,7 +41,7 @@ inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) {
}
mock.Allreduce<op::Max>(&ndata[0], ndata.size());
if (ntrial == iter && rank == 3) {
exit(-1);
// exit(-1);
}
for (size_t i = 0; i < ndata.size(); ++i) {
float rmax = (i * 1) % z + model->data[i];
@ -65,7 +65,7 @@ inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) {
mock.Allreduce<op::Sum>(&ndata[0], ndata.size());
if (ntrial == iter && rank == 0) {
exit(-1);
throw MockException();
}
for (size_t i = 0; i < ndata.size(); ++i) {