From 0a7cfb32c6f453482bdc8a8f5fa4f2e2db1308fc Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 31 Aug 2014 21:58:01 -0700 Subject: [PATCH] add fmatrix, fight tmr --- src/io/page_dmatrix-inl.hpp | 1 + src/io/page_fmatrix-inl.hpp | 75 +++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 src/io/page_fmatrix-inl.hpp diff --git a/src/io/page_dmatrix-inl.hpp b/src/io/page_dmatrix-inl.hpp index 8db944c85..23013b98b 100644 --- a/src/io/page_dmatrix-inl.hpp +++ b/src/io/page_dmatrix-inl.hpp @@ -9,6 +9,7 @@ #include "../utils/iterator.h" #include "../utils/thread_buffer.h" #include "./simple_fmatrix-inl.hpp" +#include "./page_fmatrix-inl.hpp" namespace xgboost { namespace io { diff --git a/src/io/page_fmatrix-inl.hpp b/src/io/page_fmatrix-inl.hpp new file mode 100644 index 000000000..156cddb63 --- /dev/null +++ b/src/io/page_fmatrix-inl.hpp @@ -0,0 +1,75 @@ +#ifndef XGBOOST_IO_PAGE_FMATRIX_INL_HPP_ +#define XGBOOST_IO_PAGE_FMATRIX_INL_HPP_ +/*! + * \file page_fmatrix-inl.hpp + * sparse page manager for fmatrix + * \author Tianqi Chen + */ +#include "../data.h" +#include "../utils/iterator.h" +#include "../utils/thread_buffer.h" +namespace xgboost { +namespace io { + +class CSCMatrixManager { + public: + /*! \brief in memory page */ + struct Page { + public: + /*! \brief initialize the page */ + inline void Init(size_t size) { + buffer.resize(size); + num_entry = 0; + col_index.clear(); + col_data.clear(); + } + /*! \brief number of used entries */ + size_t num_entry; + /*! \brief column index */ + std::vector col_index; + /*! \brief column data */ + std::vector col_data; + /*! \brief number of free entries */ + inline size_t NumFreeEntry(void) const { + return buffer.size() - num_entry; + } + inline ColBatch::Entry* AllocEntry(size_t len) { + ColBatch::Entry *p_data = &buffer[0] + num_entry; + num_entry += len; + return p_data; + } + /*! \brief get underlying batch */ + inline ColBatch GetBatch(void) const { + ColBatch batch; + batch.col_index = &col_index[0]; + batch.col_data = &col_data[0]; + return batch; + } + private: + /*! \brief buffer space, not to be changed since ready */ + std::vector buffer; + }; + + private: + /*! \brief fill a page with */ + inline bool Fill(size_t cidx, Page *p_page) { + size_t len = col_ptr_[cidx+1] - col_ptr_[cidx]; + if (p_page->NumFreeEntry() < len) return false; + ColBatch::Entry *p_data = p_page->AllocEntry(len); + fi->Seek(col_ptr_[cidx]); + utils::Check(fi->Read(p_data, sizeof(ColBatch::Entry) * len) != 0, + "invalid column buffer format"); + p_page->col_data.push_back(ColBatch::Inst(p_data, len)); + p_page->col_index.push_back(cidx); + } + /*! \brief size of data content */ + size_t data_size_; + /*! \brief input stream */ + utils::ISeekStream *fi; + /*! \brief column pointer of CSC format */ + std::vector col_ptr_; +}; + +} // namespace io +} // namespace xgboost +#endif // XGBOOST_IO_PAGE_FMATRIX_INL_HPP_