From 0a7d233c5de92e45b3dfb557f04fece0b60a35ac Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 17 Apr 2015 22:09:26 -0700 Subject: [PATCH] add --- src/io/page_dmatrix-inl.hpp | 17 +++++++++++++++-- src/sync/sync.h | 1 + 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/io/page_dmatrix-inl.hpp b/src/io/page_dmatrix-inl.hpp index 03f0d5ca8..ed47a265c 100644 --- a/src/io/page_dmatrix-inl.hpp +++ b/src/io/page_dmatrix-inl.hpp @@ -133,7 +133,9 @@ class DMatrixPageBase : public DataMatrix { const char* cache_file, bool silent, bool loadsplit) { - + if (!silent) { + utils::Printf("start generate text file from %s\n", uri); + } int rank = 0, npart = 1; if (loadsplit) { rank = rabit::GetRank(); @@ -146,6 +148,8 @@ class DMatrixPageBase : public DataMatrix { dmlc::InputSplit *in = dmlc::InputSplit::Create(uri, rank, npart); std::string line; + size_t bytes_write = 0; + double tstart = rabit::utils::GetTime(); info.Clear(); while (in->ReadRecord(&line)) { float label; @@ -162,8 +166,17 @@ class DMatrixPageBase : public DataMatrix { RowBatch::Inst row(BeginPtr(feats), feats.size()); page.Push(row); if (page.MemCostBytes() >= kPageSize) { - page.Save(&fo); page.Clear(); + bytes_write += page.MemCostBytes(); + page.Save(&fo); + page.Clear(); + double tdiff = rabit::utils::GetTime() - tstart; + if (!silent) { + utils::Printf("Writting to %s in %g MB/s, %g MB written\n", + cache_file, (bytes_write >> 20UL) / tdiff, + (bytes_write >> 20UL)); + } } + for (size_t i = 0; i < feats.size(); ++i) { info.info.num_col = std::max(info.info.num_col, static_cast(feats[i].index+1)); diff --git a/src/sync/sync.h b/src/sync/sync.h index aec5e2abd..3a371b03c 100644 --- a/src/sync/sync.h +++ b/src/sync/sync.h @@ -7,6 +7,7 @@ * \author Tianqi Chen */ #include "../../subtree/rabit/include/rabit.h" +#include "../../subtree/rabit/include/rabit/timer.h" #endif // XGBOOST_SYNC_H_