From 72347e2d4584633bf2bd8b83fc9021e0cf03ba2e Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 10 Jan 2016 22:27:26 -0800 Subject: [PATCH] [DATA] Make it fully compatible with rank --- amalgamation/xgboost-all0.cc | 2 ++ src/data/data.cc | 41 ++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/amalgamation/xgboost-all0.cc b/amalgamation/xgboost-all0.cc index 9e3dba90c..c0f004e40 100644 --- a/amalgamation/xgboost-all0.cc +++ b/amalgamation/xgboost-all0.cc @@ -30,6 +30,8 @@ #include "../src/data/data.cc" #include "../src/data/simple_csr_source.cc" #include "../src/data/simple_dmatrix.cc" +#include "../src/data/sparse_page_source.cc" +#include "../src/data/sparse_page_dmatrix.cc" // tress #include "../src/tree/tree_model.cc" diff --git a/src/data/data.cc b/src/data/data.cc index 593ccf284..024b68c0d 100644 --- a/src/data/data.cc +++ b/src/data/data.cc @@ -51,6 +51,35 @@ void MetaInfo::LoadBinary(dmlc::Stream *fi) { CHECK(fi->Read(&base_margin)) << "MetaInfo: invalid format"; } +// try to load group information from file, if exists +inline bool MetaTryLoadGroup(const std::string& fname, + std::vector* group) { + std::unique_ptr fi(dmlc::Stream::Create(fname.c_str(), "r", true)); + if (fi.get() == nullptr) return false; + dmlc::istream is(fi.get()); + group->clear(); + group->push_back(0); + unsigned nline; + while (is >> nline) { + group->push_back(group->back() + nline); + } + return true; +} + +// try to load weight information from file, if exists +inline bool MetaTryLoadFloatInfo(const std::string& fname, + std::vector* data) { + std::unique_ptr fi(dmlc::Stream::Create(fname.c_str(), "r", true)); + if (fi.get() == nullptr) return false; + dmlc::istream is(fi.get()); + data->clear(); + float value; + while (is >> value) { + data->push_back(value); + } + return true; +} + // macro to dispatch according to specified pointer types #define DISPATCH_CONST_PTR(dtype, old_ptr, cast_ptr, proc) \ switch (dtype) { \ @@ -143,6 +172,18 @@ DMatrix* DMatrix::Load(const std::string& uri, LOG(CONSOLE) << dmat->info().num_row << 'x' << dmat->info().num_col << " matrix with " << dmat->info().num_nonzero << " entries loaded from " << uri; } + // backward compatiblity code. + if (!load_row_split) { + MetaInfo& info = dmat->info(); + if (MetaTryLoadGroup(fname + ".group", &info.group_ptr) && !silent) { + LOG(CONSOLE) << info.group_ptr.size() - 1 + << " groups are loaded from " << fname << ".group"; + } + if (MetaTryLoadFloatInfo(fname + ".base_margin", &info.base_margin) && !silent) { + LOG(CONSOLE) << info.base_margin.size() + << " base_margin are loaded from " << fname << ".base_margin"; + } + } return dmat; }