untested version of cpage
This commit is contained in:
parent
4b9aeea89c
commit
a89e3063e6
@ -7,6 +7,7 @@ using namespace std;
|
|||||||
#include "../utils/utils.h"
|
#include "../utils/utils.h"
|
||||||
#include "simple_dmatrix-inl.hpp"
|
#include "simple_dmatrix-inl.hpp"
|
||||||
#include "page_dmatrix-inl.hpp"
|
#include "page_dmatrix-inl.hpp"
|
||||||
|
#include "page_fmatrix-inl.hpp"
|
||||||
|
|
||||||
// implements data loads using dmatrix simple for now
|
// implements data loads using dmatrix simple for now
|
||||||
|
|
||||||
@ -30,6 +31,12 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
|
|||||||
// the file pointer is hold in page matrix
|
// the file pointer is hold in page matrix
|
||||||
return dmat;
|
return dmat;
|
||||||
}
|
}
|
||||||
|
if (magic == DMatrixColPage::kMagic) {
|
||||||
|
DMatrixColPage *dmat = new DMatrixColPage(fname);
|
||||||
|
dmat->Load(fs, silent, fname);
|
||||||
|
// the file pointer is hold in page matrix
|
||||||
|
return dmat;
|
||||||
|
}
|
||||||
fs.Close();
|
fs.Close();
|
||||||
|
|
||||||
DMatrixSimple *dmat = new DMatrixSimple();
|
DMatrixSimple *dmat = new DMatrixSimple();
|
||||||
@ -42,6 +49,10 @@ void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
|
|||||||
DMatrixPage::Save(fname, dmat, silent);
|
DMatrixPage::Save(fname, dmat, silent);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (!strcmp(fname + strlen(fname) - 6, ".cpage")) {
|
||||||
|
DMatrixColPage::Save(fname, dmat, silent);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (dmat.magic == DMatrixSimple::kMagic) {
|
if (dmat.magic == DMatrixSimple::kMagic) {
|
||||||
const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
|
const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
|
||||||
p_dmat->SaveBinary(fname, silent);
|
p_dmat->SaveBinary(fname, silent);
|
||||||
|
|||||||
@ -9,7 +9,6 @@
|
|||||||
#include "../utils/iterator.h"
|
#include "../utils/iterator.h"
|
||||||
#include "../utils/thread_buffer.h"
|
#include "../utils/thread_buffer.h"
|
||||||
#include "./simple_fmatrix-inl.hpp"
|
#include "./simple_fmatrix-inl.hpp"
|
||||||
#include "./page_fmatrix-inl.hpp"
|
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace io {
|
namespace io {
|
||||||
@ -200,26 +199,24 @@ class ThreadRowPageIterator: public utils::IIterator<RowBatch> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/*! \brief data matrix using page */
|
/*! \brief data matrix using page */
|
||||||
class DMatrixPage : public DataMatrix {
|
template<int TKMagic>
|
||||||
|
class DMatrixPageBase : public DataMatrix {
|
||||||
public:
|
public:
|
||||||
DMatrixPage(void) : DataMatrix(kMagic) {
|
DMatrixPageBase(void) : DataMatrix(kMagic) {
|
||||||
iter_ = new ThreadRowPageIterator();
|
iter_ = new ThreadRowPageIterator();
|
||||||
fmat_ = new FMatrixS(iter_);
|
|
||||||
}
|
}
|
||||||
// virtual destructor
|
// virtual destructor
|
||||||
virtual ~DMatrixPage(void) {
|
virtual ~DMatrixPageBase(void) {
|
||||||
delete fmat_;
|
// do not delete row iterator, since it is owned by fmat
|
||||||
}
|
// to be cleaned up in a more clear way
|
||||||
virtual IFMatrix *fmat(void) const {
|
|
||||||
return fmat_;
|
|
||||||
}
|
}
|
||||||
/*! \brief load and initialize the iterator with fi */
|
/*! \brief load and initialize the iterator with fi */
|
||||||
inline void Load(utils::FileStream &fi,
|
inline void Load(utils::FileStream &fi,
|
||||||
bool silent = false,
|
bool silent = false,
|
||||||
const char *fname = NULL){
|
const char *fname = NULL){
|
||||||
int magic;
|
int tmagic;
|
||||||
utils::Check(fi.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
|
utils::Check(fi.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
|
||||||
utils::Check(magic == kMagic, "invalid format,magic number mismatch");
|
utils::Check(tmagic == magic, "invalid format,magic number mismatch");
|
||||||
this->info.LoadBinary(fi);
|
this->info.LoadBinary(fi);
|
||||||
iter_->Load(fi);
|
iter_->Load(fi);
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
@ -250,12 +247,27 @@ class DMatrixPage : public DataMatrix {
|
|||||||
static_cast<unsigned long>(mat.info.num_col()), fname);
|
static_cast<unsigned long>(mat.info.num_col()), fname);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/*! \brief the real fmatrix */
|
/*! \brief magic number used to identify DMatrix */
|
||||||
FMatrixS *fmat_;
|
static const int kMagic = TKMagic;
|
||||||
|
protected:
|
||||||
|
|
||||||
/*! \brief row iterator */
|
/*! \brief row iterator */
|
||||||
ThreadRowPageIterator *iter_;
|
ThreadRowPageIterator *iter_;
|
||||||
/*! \brief magic number used to identify DMatrix */
|
};
|
||||||
static const int kMagic = 0xffffab02;
|
|
||||||
|
class DMatrixPage : public DMatrixPageBase<0xffffab02> {
|
||||||
|
public:
|
||||||
|
DMatrixPage(void) {
|
||||||
|
fmat_ = new FMatrixS(iter_);
|
||||||
|
}
|
||||||
|
virtual ~DMatrixPage(void) {
|
||||||
|
delete fmat_;
|
||||||
|
}
|
||||||
|
virtual IFMatrix *fmat(void) const {
|
||||||
|
return fmat_;
|
||||||
|
}
|
||||||
|
/*! \brief the real fmatrix */
|
||||||
|
IFMatrix *fmat_;
|
||||||
};
|
};
|
||||||
} // namespace io
|
} // namespace io
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
@ -199,7 +199,8 @@ class ThreadColPageIterator : public utils::IIterator<ColBatch> {
|
|||||||
class FMatrixPage : public IFMatrix {
|
class FMatrixPage : public IFMatrix {
|
||||||
public:
|
public:
|
||||||
/*! \brief constructor */
|
/*! \brief constructor */
|
||||||
FMatrixPage(utils::IIterator<RowBatch> *iter, std::string fname_buffer) {
|
FMatrixPage(utils::IIterator<RowBatch> *iter, std::string fname_buffer)
|
||||||
|
: fname_cbuffer_(fname_buffer) {
|
||||||
this->row_iter_ = iter;
|
this->row_iter_ = iter;
|
||||||
this->col_iter_ = NULL;
|
this->col_iter_ = NULL;
|
||||||
this->fi_ = NULL;
|
this->fi_ = NULL;
|
||||||
@ -238,7 +239,8 @@ class FMatrixPage : public IFMatrix {
|
|||||||
}
|
}
|
||||||
virtual void InitColAccess(float pkeep = 1.0f) {
|
virtual void InitColAccess(float pkeep = 1.0f) {
|
||||||
if (this->HaveColAccess()) return;
|
if (this->HaveColAccess()) return;
|
||||||
this->InitColData(pkeep);
|
this->InitColData(pkeep, fname_cbuffer_.c_str(),
|
||||||
|
64 << 20, 5);
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief get the row iterator associated with FMatrix
|
* \brief get the row iterator associated with FMatrix
|
||||||
@ -281,11 +283,12 @@ class FMatrixPage : public IFMatrix {
|
|||||||
* \brief intialize column data
|
* \brief intialize column data
|
||||||
* \param pkeep probability to keep a row
|
* \param pkeep probability to keep a row
|
||||||
*/
|
*/
|
||||||
inline void InitColData(float pkeep) {
|
inline void InitColData(float pkeep, const char *fname,
|
||||||
|
size_t buffer_size, size_t col_step) {
|
||||||
buffered_rowset_.clear();
|
buffered_rowset_.clear();
|
||||||
utils::FileStream fo(utils::FopenCheck(fname_cbuffer_.c_str(), "wb+"));
|
utils::FileStream fo(utils::FopenCheck(fname, "wb+"));
|
||||||
// use 64M buffer
|
// use 64M buffer
|
||||||
utils::SparseCSRFileBuilder<ColBatch::Entry> builder(&fo, 64<<20);
|
utils::SparseCSRFileBuilder<ColBatch::Entry> builder(&fo, buffer_size);
|
||||||
|
|
||||||
// start working
|
// start working
|
||||||
row_iter_->BeforeFirst();
|
row_iter_->BeforeFirst();
|
||||||
@ -322,7 +325,7 @@ class FMatrixPage : public IFMatrix {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
builder.Finalize();
|
builder.Finalize();
|
||||||
builder.SortRows(ColBatch::Entry::CmpValue, 5);
|
builder.SortRows(ColBatch::Entry::CmpValue, col_step);
|
||||||
fo.Close();
|
fo.Close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -339,6 +342,23 @@ class FMatrixPage : public IFMatrix {
|
|||||||
std::vector<bst_uint> buffered_rowset_;
|
std::vector<bst_uint> buffered_rowset_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class DMatrixColPage : public DMatrixPageBase<0xffffab03> {
|
||||||
|
public:
|
||||||
|
DMatrixColPage(const char *fname) {
|
||||||
|
std::string fext = fname;
|
||||||
|
fext += ".col";
|
||||||
|
fmat_ = new FMatrixPage(iter_, fext.c_str());
|
||||||
|
}
|
||||||
|
virtual ~DMatrixColPage(void) {
|
||||||
|
delete fmat_;
|
||||||
|
}
|
||||||
|
virtual IFMatrix *fmat(void) const {
|
||||||
|
return fmat_;
|
||||||
|
}
|
||||||
|
/*! \brief the real fmatrix */
|
||||||
|
IFMatrix *fmat_;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace io
|
} // namespace io
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
#endif // XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
|
#endif // XGBOOST_IO_PAGE_FMATRIX_INL_HPP_
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user