some lint

This commit is contained in:
tqchen 2014-09-02 17:49:39 -07:00
parent e6e467ad60
commit 401d648372
2 changed files with 39 additions and 34 deletions

View File

@ -5,6 +5,7 @@
* row iterator based on sparse page * row iterator based on sparse page
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#include <vector>
#include "../data.h" #include "../data.h"
#include "../utils/iterator.h" #include "../utils/iterator.h"
#include "../utils/thread_buffer.h" #include "../utils/thread_buffer.h"
@ -15,7 +16,7 @@ namespace io {
/*! \brief page structure that can be used to store a rowbatch */ /*! \brief page structure that can be used to store a rowbatch */
struct RowBatchPage { struct RowBatchPage {
public: public:
RowBatchPage(size_t page_size) : kPageSize(page_size) { explicit RowBatchPage(size_t page_size) : kPageSize(page_size) {
data_ = new int[kPageSize]; data_ = new int[kPageSize];
utils::Assert(data_ != NULL, "fail to allocate row batch page"); utils::Assert(data_ != NULL, "fail to allocate row batch page");
this->Clear(); this->Clear();
@ -31,10 +32,10 @@ struct RowBatchPage {
inline bool PushRow(const RowBatch::Inst &row) { inline bool PushRow(const RowBatch::Inst &row) {
const size_t dsize = row.length * sizeof(RowBatch::Entry); const size_t dsize = row.length * sizeof(RowBatch::Entry);
if (FreeBytes() < dsize+ sizeof(int)) return false; if (FreeBytes() < dsize+ sizeof(int)) return false;
row_ptr(Size() + 1) = row_ptr(Size()) + row.length; row_ptr(Size() + 1) = row_ptr(Size()) + row.length;
memcpy(data_ptr(row_ptr(Size())) , row.data, dsize); memcpy(data_ptr(row_ptr(Size())) , row.data, dsize);
++ data_[0]; ++data_[0];
return true; return true;
} }
/*! /*!
* \brief get a row batch representation from the page * \brief get a row batch representation from the page
@ -43,7 +44,7 @@ struct RowBatchPage {
* \return a new RowBatch object * \return a new RowBatch object
*/ */
inline RowBatch GetRowBatch(std::vector<size_t> *p_rptr, size_t base_rowid) { inline RowBatch GetRowBatch(std::vector<size_t> *p_rptr, size_t base_rowid) {
RowBatch batch; RowBatch batch;
batch.base_rowid = base_rowid; batch.base_rowid = base_rowid;
batch.data_ptr = this->data_ptr(0); batch.data_ptr = this->data_ptr(0);
batch.size = static_cast<size_t>(this->Size()); batch.size = static_cast<size_t>(this->Size());
@ -57,7 +58,7 @@ struct RowBatchPage {
} }
/*! \brief get i-th row from the batch */ /*! \brief get i-th row from the batch */
inline RowBatch::Inst operator[](int i) { inline RowBatch::Inst operator[](int i) {
return RowBatch::Inst(data_ptr(0) + row_ptr(i), return RowBatch::Inst(data_ptr(0) + row_ptr(i),
static_cast<bst_uint>(row_ptr(i+1) - row_ptr(i))); static_cast<bst_uint>(row_ptr(i+1) - row_ptr(i)));
} }
/*! /*!
@ -85,8 +86,8 @@ struct RowBatchPage {
private: private:
/*! \return number of elements */ /*! \return number of elements */
inline size_t FreeBytes(void) { inline size_t FreeBytes(void) {
return (kPageSize - (Size() + 2)) * sizeof(int) return (kPageSize - (Size() + 2)) * sizeof(int) -
- row_ptr(Size()) * sizeof(RowBatch::Entry) ; row_ptr(Size()) * sizeof(RowBatch::Entry);
} }
/*! \brief equivalent row pointer at i */ /*! \brief equivalent row pointer at i */
inline int& row_ptr(int i) { inline int& row_ptr(int i) {
@ -98,7 +99,7 @@ struct RowBatchPage {
// page size // page size
const size_t kPageSize; const size_t kPageSize;
// content of data // content of data
int *data_; int *data_;
}; };
/*! \brief thread buffer iterator */ /*! \brief thread buffer iterator */
class ThreadRowPageIterator: public utils::IIterator<RowBatch> { class ThreadRowPageIterator: public utils::IIterator<RowBatch> {
@ -108,8 +109,7 @@ class ThreadRowPageIterator: public utils::IIterator<RowBatch> {
page_ = NULL; page_ = NULL;
base_rowid_ = 0; base_rowid_ = 0;
} }
virtual ~ThreadRowPageIterator(void) { virtual ~ThreadRowPageIterator(void) {}
}
virtual void Init(void) { virtual void Init(void) {
} }
virtual void BeforeFirst(void) { virtual void BeforeFirst(void) {
@ -117,12 +117,12 @@ class ThreadRowPageIterator: public utils::IIterator<RowBatch> {
base_rowid_ = 0; base_rowid_ = 0;
} }
virtual bool Next(void) { virtual bool Next(void) {
if(!itr.Next(page_)) return false; if (!itr.Next(page_)) return false;
out_ = page_->GetRowBatch(&tmp_ptr_, base_rowid_); out_ = page_->GetRowBatch(&tmp_ptr_, base_rowid_);
base_rowid_ += out_.size; base_rowid_ += out_.size;
return true; return true;
} }
virtual const RowBatch &Value(void) const{ virtual const RowBatch &Value(void) const {
return out_; return out_;
} }
/*! \brief load and initialize the iterator with fi */ /*! \brief load and initialize the iterator with fi */
@ -152,6 +152,7 @@ class ThreadRowPageIterator: public utils::IIterator<RowBatch> {
} }
/*! \brief page size 64 MB */ /*! \brief page size 64 MB */
static const size_t kPageSize = 64 << 18; static const size_t kPageSize = 64 << 18;
private: private:
// base row id // base row id
size_t base_rowid_; size_t base_rowid_;
@ -195,7 +196,7 @@ class ThreadRowPageIterator: public utils::IIterator<RowBatch> {
protected: protected:
PagePtr page_; PagePtr page_;
utils::ThreadBuffer<PagePtr,Factory> itr; utils::ThreadBuffer<PagePtr, Factory> itr;
}; };
/*! \brief data matrix using page */ /*! \brief data matrix using page */
@ -213,10 +214,10 @@ class DMatrixPageBase : public DataMatrix {
/*! \brief load and initialize the iterator with fi */ /*! \brief load and initialize the iterator with fi */
inline void Load(utils::FileStream &fi, inline void Load(utils::FileStream &fi,
bool silent = false, bool silent = false,
const char *fname = NULL){ const char *fname = NULL) {
int tmagic; int tmagic;
utils::Check(fi.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format"); utils::Check(fi.Read(&tmagic, sizeof(tmagic)) != 0, "invalid input file format");
utils::Check(tmagic == magic, "invalid format,magic number mismatch"); utils::Check(tmagic == magic, "invalid format,magic number mismatch");
this->info.LoadBinary(fi); this->info.LoadBinary(fi);
iter_->Load(fi); iter_->Load(fi);
if (!silent) { if (!silent) {
@ -229,7 +230,7 @@ class DMatrixPageBase : public DataMatrix {
utils::Printf("\n"); utils::Printf("\n");
} }
if (info.group_ptr.size() != 0) { if (info.group_ptr.size() != 0) {
utils::Printf("data contains %u groups\n", (unsigned)info.group_ptr.size()-1); utils::Printf("data contains %u groups\n", (unsigned)info.group_ptr.size() - 1);
} }
} }
} }
@ -249,8 +250,8 @@ class DMatrixPageBase : public DataMatrix {
} }
/*! \brief magic number used to identify DMatrix */ /*! \brief magic number used to identify DMatrix */
static const int kMagic = TKMagic; static const int kMagic = TKMagic;
protected:
protected:
/*! \brief row iterator */ /*! \brief row iterator */
ThreadRowPageIterator *iter_; ThreadRowPageIterator *iter_;
}; };

View File

@ -5,6 +5,9 @@
* sparse page manager for fmatrix * sparse page manager for fmatrix
* \author Tianqi Chen * \author Tianqi Chen
*/ */
#include <vector>
#include <string>
#include <algorithm>
#include "../data.h" #include "../data.h"
#include "../utils/iterator.h" #include "../utils/iterator.h"
#include "../utils/io.h" #include "../utils/io.h"
@ -34,7 +37,7 @@ class CSCMatrixManager {
/*! \brief column index */ /*! \brief column index */
std::vector<bst_uint> col_index; std::vector<bst_uint> col_index;
/*! \brief column data */ /*! \brief column data */
std::vector<ColBatch::Inst> col_data; std::vector<ColBatch::Inst> col_data;
/*! \brief number of free entries */ /*! \brief number of free entries */
inline size_t NumFreeEntry(void) const { inline size_t NumFreeEntry(void) const {
return buffer.size() - num_entry; return buffer.size() - num_entry;
@ -52,6 +55,7 @@ class CSCMatrixManager {
batch.col_data = BeginPtr(col_data); batch.col_data = BeginPtr(col_data);
return batch; return batch;
} }
private: private:
/*! \brief buffer space, not to be changed since ready */ /*! \brief buffer space, not to be changed since ready */
std::vector<ColBatch::Entry> buffer; std::vector<ColBatch::Entry> buffer;
@ -80,7 +84,7 @@ class CSCMatrixManager {
col_index_ = col_todo_; col_index_ = col_todo_;
read_top_ = 0; read_top_ = 0;
} }
inline bool LoadNext(PagePtr &val) { inline bool LoadNext(PagePtr &val) {
val->Clear(); val->Clear();
if (read_top_ >= col_index_.size()) return false; if (read_top_ >= col_index_.size()) return false;
while (read_top_ < col_index_.size()) { while (read_top_ < col_index_.size()) {
@ -106,7 +110,7 @@ class CSCMatrixManager {
psmax = std::max(psmax, col_ptr_[i+1] - col_ptr_[i]); psmax = std::max(psmax, col_ptr_[i+1] - col_ptr_[i]);
} }
utils::Check(page_ratio >= 1.0f, "col_page_ratio must be at least 1"); utils::Check(page_ratio >= 1.0f, "col_page_ratio must be at least 1");
page_size_ = std::max(static_cast<size_t>(psmax * page_ratio), psmax); page_size_ = std::max(static_cast<size_t>(psmax * page_ratio), psmax);
} }
inline void SetColSet(const std::vector<bst_uint> &cset, bool setall) { inline void SetColSet(const std::vector<bst_uint> &cset, bool setall) {
if (!setall) { if (!setall) {
@ -124,6 +128,7 @@ class CSCMatrixManager {
} }
} }
} }
private: private:
/*! \brief fill a page with */ /*! \brief fill a page with */
inline bool TryFill(size_t cidx, Page *p_page) { inline bool TryFill(size_t cidx, Page *p_page) {
@ -173,21 +178,22 @@ class ThreadColPageIterator : public utils::IIterator<ColBatch> {
} }
virtual void BeforeFirst(void) { virtual void BeforeFirst(void) {
itr_.BeforeFirst(); itr_.BeforeFirst();
} }
virtual bool Next(void) { virtual bool Next(void) {
// page to be loaded // page to be loaded
CSCMatrixManager::PagePtr page; CSCMatrixManager::PagePtr page;
if(!itr_.Next(page)) return false; if (!itr_.Next(page)) return false;
out_ = page->GetBatch(); out_ = page->GetBatch();
return true; return true;
} }
virtual const ColBatch &Value(void) const{ virtual const ColBatch &Value(void) const {
return out_; return out_;
} }
inline const std::vector<size_t> &col_ptr(void) const { inline const std::vector<size_t> &col_ptr(void) const {
return itr_.get_factory().col_ptr(); return itr_.get_factory().col_ptr();
} }
inline void SetColSet(const std::vector<bst_uint> &cset, bool setall = false) { inline void SetColSet(const std::vector<bst_uint> &cset,
bool setall = false) {
itr_.get_factory().SetColSet(cset, setall); itr_.get_factory().SetColSet(cset, setall);
} }
@ -195,9 +201,8 @@ class ThreadColPageIterator : public utils::IIterator<ColBatch> {
// output data // output data
ColBatch out_; ColBatch out_;
// internal iterator // internal iterator
utils::ThreadBuffer<CSCMatrixManager::PagePtr,CSCMatrixManager> itr_; utils::ThreadBuffer<CSCMatrixManager::PagePtr, CSCMatrixManager> itr_;
}; };
/*! /*!
* \brief sparse matrix that support column access * \brief sparse matrix that support column access
*/ */
@ -216,7 +221,7 @@ class FMatrixPage : public IFMatrix {
if (col_iter_ != NULL) delete col_iter_; if (col_iter_ != NULL) delete col_iter_;
if (fi_ != NULL) { if (fi_ != NULL) {
fi_->Close(); delete fi_; fi_->Close(); delete fi_;
} }
} }
/*! \return whether column access is enabled */ /*! \return whether column access is enabled */
virtual bool HaveColAccess(void) const { virtual bool HaveColAccess(void) const {
@ -272,7 +277,7 @@ class FMatrixPage : public IFMatrix {
col_iter_->BeforeFirst(); col_iter_->BeforeFirst();
return col_iter_; return col_iter_;
} }
protected: protected:
/*! /*!
* \brief try load column data from file * \brief try load column data from file
@ -282,25 +287,24 @@ class FMatrixPage : public IFMatrix {
if (fp == NULL) return false; if (fp == NULL) return false;
fi_ = new utils::FileStream(fp); fi_ = new utils::FileStream(fp);
static_cast<utils::IStream*>(fi_)->Read(&buffered_rowset_); static_cast<utils::IStream*>(fi_)->Read(&buffered_rowset_);
col_iter_ = new ThreadColPageIterator(fi_, 2.0f, false); col_iter_ = new ThreadColPageIterator(fi_, 2.0f, false);
return true; return true;
} }
/*! /*!
* \brief intialize column data * \brief intialize column data
* \param pkeep probability to keep a row * \param pkeep probability to keep a row
*/ */
inline void InitColData(float pkeep, const char *fname, inline void InitColData(float pkeep, const char *fname,
size_t buffer_size, size_t col_step) { size_t buffer_size, size_t col_step) {
buffered_rowset_.clear(); buffered_rowset_.clear();
utils::FileStream fo(utils::FopenCheck(fname, "wb+")); utils::FileStream fo(utils::FopenCheck(fname, "wb+"));
// use 64M buffer // use 64M buffer
utils::SparseCSRFileBuilder<ColBatch::Entry> builder(&fo, buffer_size); utils::SparseCSRFileBuilder<ColBatch::Entry> builder(&fo, buffer_size);
// start working // start working
row_iter_->BeforeFirst(); row_iter_->BeforeFirst();
while (row_iter_->Next()) { while (row_iter_->Next()) {
const RowBatch &batch = row_iter_->Value(); const RowBatch &batch = row_iter_->Value();
for (size_t i = 0; i < batch.size; ++i) { for (size_t i = 0; i < batch.size; ++i) {
if (pkeep == 1.0f || random::SampleBinary(pkeep)) { if (pkeep == 1.0f || random::SampleBinary(pkeep)) {
buffered_rowset_.push_back(static_cast<bst_uint>(batch.base_rowid+i)); buffered_rowset_.push_back(static_cast<bst_uint>(batch.base_rowid+i));
RowBatch::Inst inst = batch[i]; RowBatch::Inst inst = batch[i];
@ -350,7 +354,7 @@ class FMatrixPage : public IFMatrix {
class DMatrixColPage : public DMatrixPageBase<0xffffab03> { class DMatrixColPage : public DMatrixPageBase<0xffffab03> {
public: public:
DMatrixColPage(const char *fname) { explicit DMatrixColPage(const char *fname) {
std::string fext = fname; std::string fext = fname;
fext += ".col"; fext += ".col";
fmat_ = new FMatrixPage(iter_, fext.c_str()); fmat_ = new FMatrixPage(iter_, fext.c_str());