Merge pull request #1218 from tqchen/master

[DATA] fix async data writing
This commit is contained in:
Tianqi Chen 2016-05-21 19:40:41 -07:00
commit 587999755f
3 changed files with 114 additions and 104 deletions

View File

@ -256,6 +256,8 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
name_shards.push_back(prefix + ".col.page"); name_shards.push_back(prefix + ".col.page");
format_shards.push_back(SparsePage::Format::DecideFormat(prefix).second); format_shards.push_back(SparsePage::Format::DecideFormat(prefix).second);
} }
{
SparsePage::Writer writer(name_shards, format_shards, 6); SparsePage::Writer writer(name_shards, format_shards, 6);
std::unique_ptr<SparsePage> page; std::unique_ptr<SparsePage> page;
writer.Alloc(&page); page->Clear(); writer.Alloc(&page); page->Clear();
@ -291,6 +293,7 @@ void SparsePageDMatrix::InitColAccess(const std::vector<bool>& enabled,
fo->Write(buffered_rowset_); fo->Write(buffered_rowset_);
fo->Write(col_size_); fo->Write(col_size_);
fo.reset(nullptr); fo.reset(nullptr);
}
// initialize column data // initialize column data
CHECK(TryInitColData()); CHECK(TryInitColData());
} }

View File

@ -110,6 +110,7 @@ void SparsePageSource::Create(dmlc::Parser<uint32_t>* src,
name_shards.push_back(prefix + ".row.page"); name_shards.push_back(prefix + ".row.page");
format_shards.push_back(SparsePage::Format::DecideFormat(prefix).first); format_shards.push_back(SparsePage::Format::DecideFormat(prefix).first);
} }
{
SparsePage::Writer writer(name_shards, format_shards, 6); SparsePage::Writer writer(name_shards, format_shards, 6);
std::unique_ptr<SparsePage> page; std::unique_ptr<SparsePage> page;
writer.Alloc(&page); page->Clear(); writer.Alloc(&page); page->Clear();
@ -162,6 +163,7 @@ void SparsePageSource::Create(dmlc::Parser<uint32_t>* src,
int tmagic = kMagic; int tmagic = kMagic;
fo->Write(&tmagic, sizeof(tmagic)); fo->Write(&tmagic, sizeof(tmagic));
info.SaveBinary(fo.get()); info.SaveBinary(fo.get());
}
LOG(CONSOLE) << "SparsePageSource: Finished writing to " << name_info; LOG(CONSOLE) << "SparsePageSource: Finished writing to " << name_info;
} }
@ -176,6 +178,7 @@ void SparsePageSource::Create(DMatrix* src,
name_shards.push_back(prefix + ".row.page"); name_shards.push_back(prefix + ".row.page");
format_shards.push_back(SparsePage::Format::DecideFormat(prefix).first); format_shards.push_back(SparsePage::Format::DecideFormat(prefix).first);
} }
{
SparsePage::Writer writer(name_shards, format_shards, 6); SparsePage::Writer writer(name_shards, format_shards, 6);
std::unique_ptr<SparsePage> page; std::unique_ptr<SparsePage> page;
writer.Alloc(&page); page->Clear(); writer.Alloc(&page); page->Clear();
@ -208,6 +211,7 @@ void SparsePageSource::Create(DMatrix* src,
int tmagic = kMagic; int tmagic = kMagic;
fo->Write(&tmagic, sizeof(tmagic)); fo->Write(&tmagic, sizeof(tmagic));
info.SaveBinary(fo.get()); info.SaveBinary(fo.get());
}
LOG(CONSOLE) << "SparsePageSource: Finished writing to " << name_info; LOG(CONSOLE) << "SparsePageSource: Finished writing to " << name_info;
} }

View File

@ -34,6 +34,7 @@ SparsePage::Writer::Writer(
fo->Write(format_shard); fo->Write(format_shard);
std::unique_ptr<SparsePage> page; std::unique_ptr<SparsePage> page;
while (wqueue->Pop(&page)) { while (wqueue->Pop(&page)) {
if (page.get() == nullptr) break;
fmt->Write(*page, fo.get()); fmt->Write(*page, fo.get());
qrecycle_.Push(std::move(page)); qrecycle_.Push(std::move(page));
} }
@ -45,7 +46,9 @@ SparsePage::Writer::Writer(
SparsePage::Writer::~Writer() { SparsePage::Writer::~Writer() {
for (auto& queue : qworkers_) { for (auto& queue : qworkers_) {
queue.SignalForKill(); // use nullptr to signal termination.
std::unique_ptr<SparsePage> sig(nullptr);
queue.Push(std::move(sig));
} }
for (auto& thread : workers_) { for (auto& thread : workers_) {
thread->join(); thread->join();