xgboost/src/data/simple_csr_source.h
2016-01-16 10:24:01 -08:00

82 lines
2.4 KiB
C++

/*!
* Copyright 2015 by Contributors
* \file simple_csr_source.h
* \brief The simplest form of data source, can be used to create DMatrix.
* This is an in-memory data structure that holds the data in row oriented format.
* \author Tianqi Chen
*/
#ifndef XGBOOST_DATA_SIMPLE_CSR_SOURCE_H_
#define XGBOOST_DATA_SIMPLE_CSR_SOURCE_H_
#include <xgboost/base.h>
#include <xgboost/data.h>
#include <vector>
#include <algorithm>
namespace xgboost {
namespace data {
/*!
* \brief The simplest form of data holder, can be used to create DMatrix.
* This is an in-memory data structure that holds the data in row oriented format.
* \code
* std::unique_ptr<DataSource> source(new SimpleCSRSource());
* // add data to source
* DMatrix* dmat = DMatrix::Create(std::move(source));
* \encode
*/
class SimpleCSRSource : public DataSource {
public:
// public data members
// MetaInfo info; // inheritated from DataSource
/*! \brief row pointer of CSR sparse storage */
std::vector<size_t> row_ptr_;
/*! \brief data in the CSR sparse storage */
std::vector<RowBatch::Entry> row_data_;
// functions
/*! \brief default constructor */
SimpleCSRSource() : row_ptr_(1, 0), at_first_(true) {}
/*! \brief destructor */
virtual ~SimpleCSRSource() {}
/*! \brief clear the data structure */
void Clear();
/*!
* \brief copy content of data from src
* \param src source data iter.
*/
void CopyFrom(DMatrix* src);
/*!
* \brief copy content of data from parser, also set the additional information.
* \param src source data iter.
* \param info The additional information reflected in the parser.
*/
void CopyFrom(dmlc::Parser<uint32_t>* src);
/*!
* \brief Load data from binary stream.
* \param fi the pointer to load data from.
*/
void LoadBinary(dmlc::Stream* fi);
/*!
* \brief Save data into binary stream
* \param fo The output stream.
*/
void SaveBinary(dmlc::Stream* fo) const;
// implement Next
bool Next() override;
// implement BeforeFirst
void BeforeFirst() override;
// implement Value
const RowBatch &Value() const override;
/*! \brief magic number used to identify SimpleCSRSource */
static const int kMagic = 0xffffab01;
private:
/*! \brief internal variable, used to support iterator interface */
bool at_first_;
/*! \brief */
RowBatch batch_;
};
} // namespace data
} // namespace xgboost
#endif // XGBOOST_DATA_SIMPLE_CSR_SOURCE_H_