first check of page
This commit is contained in:
parent
d0e27482ef
commit
ce772c2f3e
@ -22,7 +22,13 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
|
|||||||
dmat->LoadBinary(fs, silent, fname);
|
dmat->LoadBinary(fs, silent, fname);
|
||||||
fs.Close();
|
fs.Close();
|
||||||
return dmat;
|
return dmat;
|
||||||
}
|
}
|
||||||
|
if (magic == DMatrixPage::kMagic) {
|
||||||
|
DMatrixPage *dmat = new DMatrixPage();
|
||||||
|
dmat->Load(fs, silent, fname);
|
||||||
|
// the file pointer is hold in page matrix
|
||||||
|
return dmat;
|
||||||
|
}
|
||||||
fs.Close();
|
fs.Close();
|
||||||
|
|
||||||
DMatrixSimple *dmat = new DMatrixSimple();
|
DMatrixSimple *dmat = new DMatrixSimple();
|
||||||
@ -31,6 +37,11 @@ DataMatrix* LoadDataMatrix(const char *fname, bool silent, bool savebuffer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
|
void SaveDataMatrix(const DataMatrix &dmat, const char *fname, bool silent) {
|
||||||
|
if (!strcmp(fname + strlen(fname) - 5, ".page")) {
|
||||||
|
|
||||||
|
DMatrixPage::Save(fname, dmat, silent);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (dmat.magic == DMatrixSimple::kMagic) {
|
if (dmat.magic == DMatrixSimple::kMagic) {
|
||||||
const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
|
const DMatrixSimple *p_dmat = static_cast<const DMatrixSimple*>(&dmat);
|
||||||
p_dmat->SaveBinary(fname, silent);
|
p_dmat->SaveBinary(fname, silent);
|
||||||
|
|||||||
@ -8,6 +8,8 @@
|
|||||||
#include "../data.h"
|
#include "../data.h"
|
||||||
#include "../utils/iterator.h"
|
#include "../utils/iterator.h"
|
||||||
#include "../utils/thread_buffer.h"
|
#include "../utils/thread_buffer.h"
|
||||||
|
#include "./simple_fmatrix-inl.hpp"
|
||||||
|
|
||||||
namespace xgboost {
|
namespace xgboost {
|
||||||
namespace io {
|
namespace io {
|
||||||
/*! \brief page structure that can be used to store a rowbatch */
|
/*! \brief page structure that can be used to store a rowbatch */
|
||||||
@ -102,7 +104,7 @@ class ThreadRowPageIterator: public utils::IIterator<RowBatch> {
|
|||||||
base_rowid_ = 0;
|
base_rowid_ = 0;
|
||||||
isend_ = false;
|
isend_ = false;
|
||||||
}
|
}
|
||||||
virtual ~ThreadRowPageIterator(void) {
|
virtual ~ThreadRowPageIterator(void) {
|
||||||
}
|
}
|
||||||
virtual void Init(void) {
|
virtual void Init(void) {
|
||||||
}
|
}
|
||||||
@ -188,7 +190,9 @@ class ThreadRowPageIterator: public utils::IIterator<RowBatch> {
|
|||||||
inline void FreeSpace(PagePtr &a) {
|
inline void FreeSpace(PagePtr &a) {
|
||||||
delete a;
|
delete a;
|
||||||
}
|
}
|
||||||
inline void Destroy(void) {}
|
inline void Destroy(void) {
|
||||||
|
fi.Close();
|
||||||
|
}
|
||||||
inline void BeforeFirst(void) {
|
inline void BeforeFirst(void) {
|
||||||
fi.Seek(file_begin_);
|
fi.Seek(file_begin_);
|
||||||
}
|
}
|
||||||
@ -199,6 +203,63 @@ class ThreadRowPageIterator: public utils::IIterator<RowBatch> {
|
|||||||
int ptop_;
|
int ptop_;
|
||||||
utils::ThreadBuffer<PagePtr,Factory> itr;
|
utils::ThreadBuffer<PagePtr,Factory> itr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*! \brief data matrix using page */
|
||||||
|
class DMatrixPage : public DataMatrix {
|
||||||
|
public:
|
||||||
|
DMatrixPage(void) : DataMatrix(kMagic) {
|
||||||
|
iter_ = new ThreadRowPageIterator();
|
||||||
|
fmat_ = new FMatrixS(iter_);
|
||||||
|
}
|
||||||
|
// virtual destructor
|
||||||
|
virtual ~DMatrixPage(void) {
|
||||||
|
delete fmat_;
|
||||||
|
}
|
||||||
|
virtual IFMatrix *fmat(void) const {
|
||||||
|
return fmat_;
|
||||||
|
}
|
||||||
|
/*! \brief load and initialize the iterator with fi */
|
||||||
|
inline void Load(utils::FileStream &fi,
|
||||||
|
bool silent = false,
|
||||||
|
const char *fname = NULL){
|
||||||
|
int magic;
|
||||||
|
utils::Check(fi.Read(&magic, sizeof(magic)) != 0, "invalid input file format");
|
||||||
|
utils::Check(magic == kMagic, "invalid format,magic number mismatch");
|
||||||
|
this->info.LoadBinary(fi);
|
||||||
|
iter_->Load(fi);
|
||||||
|
if (!silent) {
|
||||||
|
printf("DMatrixPage: %lux%lu matrix is loaded",
|
||||||
|
info.num_row(), info.num_col());
|
||||||
|
if (fname != NULL) {
|
||||||
|
printf(" from %s\n", fname);
|
||||||
|
} else {
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
if (info.group_ptr.size() != 0) {
|
||||||
|
printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*! \brief save a DataMatrix as DMatrixPage*/
|
||||||
|
inline static void Save(const char* fname, const DataMatrix &mat, bool silent) {
|
||||||
|
utils::FileStream fs(utils::FopenCheck(fname, "wb"));
|
||||||
|
int magic = kMagic;
|
||||||
|
fs.Write(&magic, sizeof(magic));
|
||||||
|
mat.info.SaveBinary(fs);
|
||||||
|
ThreadRowPageIterator::Save(mat.fmat()->RowIterator(), fs);
|
||||||
|
fs.Close();
|
||||||
|
if (!silent) {
|
||||||
|
printf("DMatrixPage: %lux%lu is saved to %s\n",
|
||||||
|
mat.info.num_row(), mat.info.num_col(), fname);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*! \brief the real fmatrix */
|
||||||
|
FMatrixS *fmat_;
|
||||||
|
/*! \brief row iterator */
|
||||||
|
ThreadRowPageIterator *iter_;
|
||||||
|
/*! \brief magic number used to identify DMatrix */
|
||||||
|
static const int kMagic = 0xffffab02;
|
||||||
|
};
|
||||||
} // namespace io
|
} // namespace io
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
#endif // XGBOOST_IO_PAGE_ROW_ITER_INL_HPP_
|
#endif // XGBOOST_IO_PAGE_ROW_ITER_INL_HPP_
|
||||||
|
|||||||
@ -100,7 +100,9 @@ class ISeekStream: public IStream {
|
|||||||
/*! \brief implementation of file i/o stream */
|
/*! \brief implementation of file i/o stream */
|
||||||
class FileStream : public ISeekStream {
|
class FileStream : public ISeekStream {
|
||||||
public:
|
public:
|
||||||
explicit FileStream(void) {}
|
explicit FileStream(void) {
|
||||||
|
this->fp = NULL;
|
||||||
|
}
|
||||||
explicit FileStream(FILE *fp) {
|
explicit FileStream(FILE *fp) {
|
||||||
this->fp = fp;
|
this->fp = fp;
|
||||||
}
|
}
|
||||||
@ -117,7 +119,9 @@ class FileStream : public ISeekStream {
|
|||||||
return static_cast<size_t>(ftell(fp));
|
return static_cast<size_t>(ftell(fp));
|
||||||
}
|
}
|
||||||
inline void Close(void) {
|
inline void Close(void) {
|
||||||
fclose(fp);
|
if (fp != NULL){
|
||||||
|
fclose(fp); fp = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user