This commit is contained in:
tqchen 2015-04-17 22:09:26 -07:00
parent ddb7e538df
commit 0a7d233c5d
2 changed files with 16 additions and 2 deletions

View File

@ -133,7 +133,9 @@ class DMatrixPageBase : public DataMatrix {
const char* cache_file,
bool silent,
bool loadsplit) {
if (!silent) {
utils::Printf("start generate text file from %s\n", uri);
}
int rank = 0, npart = 1;
if (loadsplit) {
rank = rabit::GetRank();
@ -146,6 +148,8 @@ class DMatrixPageBase : public DataMatrix {
dmlc::InputSplit *in =
dmlc::InputSplit::Create(uri, rank, npart);
std::string line;
size_t bytes_write = 0;
double tstart = rabit::utils::GetTime();
info.Clear();
while (in->ReadRecord(&line)) {
float label;
@ -162,8 +166,17 @@ class DMatrixPageBase : public DataMatrix {
RowBatch::Inst row(BeginPtr(feats), feats.size());
page.Push(row);
if (page.MemCostBytes() >= kPageSize) {
page.Save(&fo); page.Clear();
bytes_write += page.MemCostBytes();
page.Save(&fo);
page.Clear();
double tdiff = rabit::utils::GetTime() - tstart;
if (!silent) {
utils::Printf("Writting to %s in %g MB/s, %g MB written\n",
cache_file, (bytes_write >> 20UL) / tdiff,
(bytes_write >> 20UL));
}
}
for (size_t i = 0; i < feats.size(); ++i) {
info.info.num_col = std::max(info.info.num_col,
static_cast<size_t>(feats[i].index+1));

View File

@ -7,6 +7,7 @@
* \author Tianqi Chen
*/
#include "../../subtree/rabit/include/rabit.h"
#include "../../subtree/rabit/include/rabit/timer.h"
#endif // XGBOOST_SYNC_H_