untested version of cpage
This commit is contained in:
parent
4b9aeea89c
commit
a89e3063e6
@ -7,6 +7,7 @@ using namespace std;
|
||||
#include "../utils/utils.h"
|
||||
#include "simple_dmatrix-inl.hpp"
|
||||
#include "page_dmatrix-inl.hpp"
|
||||
#include "page_fmatrix-inl.hpp"
|
||||
|
||||
// implements data loads using dmatrix simple for now
|
||||
|
||||
@ -30,6 +31,12 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
|
||||
// the file pointer is hold in page matrix
|
||||
return dmat;
|
||||
}
|
||||
if (magic == DMatrixColPage::kMagic) {
|
||||
DMatrixColPage *dmat = new DMatrixColPage(fname);
|
||||
dmat->Load(fs, silent, fname);
|
||||
// the file pointer is hold in page matrix
|
||||
return dmat;
|
||||
}
|
||||
fs.Close();
|
||||
|
||||
DMatrixSimple *dmat = new DMatrixSimple();
|
||||
@ -42,6 +49,10 @@ void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
|
||||
DMatrixPage::Save(fname, dmat, silent);
|
||||
return;
|
||||
}
|
||||
if (!strcmp(fname + strlen(fname) - 6, ".cpage")) {
|
||||
DMatrixColPage::Save(fname, dmat, silent);
|
||||
return;
|
||||
}
|
||||
if (dmat.magic == DMatrixSimple::kMagic) {
|
||||
const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
|
||||
p_dmat->SaveBinary(fname, silent);
|
||||
|
||||
@ -9,7 +9,6 @@
|
||||
#include "../utils/iterator.h"
|
||||
#include "../utils/thread_buffer.h"
|
||||
#include "./simple_fmatrix-inl.hpp"
|
||||
#include "./page_fmatrix-inl.hpp"
|
||||
|
||||
namespace xgboost {
|
||||
namespace io {
|
||||
@ -200,26 +199,24 @@ class ThreadRowPageIterator: public utils::IIterator<RowBatch> {
|
||||
};
|
||||
|
||||
/*! \brief data matrix using page */
|
||||
class DMatrixPage : public DataMatrix {
|
||||
template<int TKMagic>
|
||||
class DMatrixPageBase : public DataMatrix {
|
||||
public:
|
||||
DMatrixPage(void) : DataMatrix(kMagic) {
|
||||
DMatrixPageBase(void) : DataMatrix(kMagic) {
|
||||
iter_ = new ThreadRowPageIterator();
|
||||
fmat_ = new FMatrixS(iter_);
|
||||
}
|
||||
// virtual destructor
|
||||
virtual ~DMatrixPage(void) {
|
||||
delete fmat_;
|
||||
}
|
||||
virtual IFMatrix *fmat(void) const {
|
||||
return fmat_;
|
||||
virtual ~DMatrixPageBase(void) {
|
||||
// do not delete row iterator, since it is owned by fmat
|
||||
// to be cleaned up in a more clear way
|
||||
}
|
||||
/*! \brief load and initialize the iterator with fi */
|
||||
inline void Load(utils::FileStream &fi,
|
||||
bool silent = false,
|
||||
const char *fname = NULL){
|
||||
int magic;
|
||||
utils::Check(fi.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
|
||||
utils::Check(magic == kMagic, "invalid format,magic number mismatch");
|
||||
int tmagic;
|
||||
utils::Check(fi.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
|
||||
utils::Check(tmagic == magic, "invalid format,magic number mismatch");
|
||||
this->info.LoadBinary(fi);
|
||||
iter_->Load(fi);
|
||||
if (!silent) {
|
||||
@ -250,12 +247,27 @@ class DMatrixPage : public DataMatrix {
|
||||
static_cast<unsigned long>(mat.info.num_col()), fname);
|
||||
}
|
||||
}
|
||||
/*! \brief the real fmatrix */
|
||||
FMatrixS *fmat_;
|
||||
/*! \brief magic number used to identify DMatrix */
|
||||
static const int kMagic = TKMagic;
|
||||
protected:
|
||||
|
||||
/*! \brief row iterator */
|
||||
ThreadRowPageIterator *iter_;
|
||||
/*! \brief magic number used to identify DMatrix */
|
||||
static const int kMagic = 0xffffab02;
|
||||
};
|
||||
|
||||
class DMatrixPage : public DMatrixPageBase<0xffffab02> {
|
||||
public:
|
||||
DMatrixPage(void) {
|
||||
fmat_ = new FMatrixS(iter_);
|
||||
}
|
||||
virtual ~DMatrixPage(void) {
|
||||
delete fmat_;
|
||||
}
|
||||
virtual IFMatrix *fmat(void) const {
|
||||
return fmat_;
|
||||
}
|
||||
/*! \brief the real fmatrix */
|
||||
IFMatrix *fmat_;
|
||||
};
|
||||
} // namespace io
|
||||
} // namespace xgboost
|
||||
|
||||
@ -199,7 +199,8 @@ class ThreadColPageIterator : public utils::IIterator<ColBatch> {
|
||||
class FMatrixPage : public IFMatrix {
|
||||
public:
|
||||
/*! \brief constructor */
|
||||
FMatrixPage(utils::IIterator<RowBatch> *iter, std::string fname_buffer) {
|
||||
FMatrixPage(utils::IIterator<RowBatch> *iter, std::string fname_buffer)
|
||||
: fname_cbuffer_(fname_buffer) {
|
||||
this->row_iter_ = iter;
|
||||
this->col_iter_ = NULL;
|
||||
this->fi_ = NULL;
|
||||
@ -238,7 +239,8 @@ class FMatrixPage : public IFMatrix {
|
||||
}
|
||||
virtual void InitColAccess(float pkeep = 1.0f) {
|
||||
if (this->HaveColAccess()) return;
|
||||
this->InitColData(pkeep);
|
||||
this->InitColData(pkeep, fname_cbuffer_.c_str(),
|
||||
64 << 20, 5);
|
||||
}
|
||||
/*!
|
||||
* \brief get the row iterator associated with FMatrix
|
||||
@ -281,11 +283,12 @@ class FMatrixPage : public IFMatrix {
|
||||
* \brief intialize column data
|
||||
* \param pkeep probability to keep a row
|
||||
*/
|
||||
inline void InitColData(float pkeep) {
|
||||
inline void InitColData(float pkeep, const char *fname,
|
||||
size_t buffer_size, size_t col_step) {
|
||||
buffered_rowset_.clear();
|
||||
utils::FileStream fo(utils::FopenCheck(fname_cbuffer_.c_str(), "wb+"));
|
||||
utils::FileStream fo(utils::FopenCheck(fname, "wb+"));
|
||||
// use 64M buffer
|
||||
utils::SparseCSRFileBuilder<ColBatch::Entry> builder(&fo, 64<<20);
|
||||
utils::SparseCSRFileBuilder<ColBatch::Entry> builder(&fo, buffer_size);
|
||||
|
||||
// start working
|
||||
row_iter_->BeforeFirst();
|
||||
@ -322,7 +325,7 @@ class FMatrixPage : public IFMatrix {
|
||||
}
|
||||
}
|
||||
builder.Finalize();
|
||||
builder.SortRows(ColBatch::Entry::CmpValue, 5);
|
||||
builder.SortRows(ColBatch::Entry::CmpValue, col_step);
|
||||
fo.Close();
|
||||
}
|
||||
|
||||
@ -339,6 +342,23 @@ class FMatrixPage : public IFMatrix {
|
||||
std::vector<bst_uint> buffered_rowset_;
|
||||
};
|
||||
|
||||
class DMatrixColPage : public DMatrixPageBase<0xffffab03> {
|
||||
public:
|
||||
DMatrixColPage(const char *fname) {
|
||||
std::string fext = fname;
|
||||
fext += ".col";
|
||||
fmat_ = new FMatrixPage(iter_, fext.c_str());
|
||||
}
|
||||
virtual ~DMatrixColPage(void) {
|
||||
delete fmat_;
|
||||
}
|
||||
virtual IFMatrix *fmat(void) const {
|
||||
return fmat_;
|
||||
}
|
||||
/*! \brief the real fmatrix */
|
||||
IFMatrix *fmat_;
|
||||
};
|
||||
|
||||
} // namespace io
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user