Merge commit 'a16289b2047a7c2ec36667f6031dbb648e4d2caa'
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
|
||||
// include uint64_t only to make io standalone
|
||||
#ifdef _MSC_VER
|
||||
/*! \brief uint64 */
|
||||
typedef unsigned __int64 uint64_t;
|
||||
#else
|
||||
#include <inttypes.h>
|
||||
@@ -24,7 +25,7 @@ namespace dmlc {
|
||||
/*!
|
||||
* \brief interface of stream I/O for serialization
|
||||
*/
|
||||
class Stream {
|
||||
class Stream { // NOLINT(*)
|
||||
public:
|
||||
/*!
|
||||
* \brief reads data from a stream
|
||||
@@ -71,7 +72,7 @@ class Stream {
|
||||
/*!
|
||||
* \brief writes a string
|
||||
* \param str the string to be written/serialized
|
||||
*/
|
||||
*/
|
||||
inline void Write(const std::string &str);
|
||||
/*!
|
||||
* \brief loads a string
|
||||
@@ -94,7 +95,7 @@ class SeekStream: public Stream {
|
||||
* \brief generic factory function
|
||||
* create an SeekStream for read only,
|
||||
* the stream will close the underlying files upon deletion
|
||||
* error will be reported and the system will exit when create failed
|
||||
* error will be reported and the system will exit when create failed
|
||||
* \param uri the uri of the input currently we support
|
||||
* hdfs://, s3://, and file:// by default file:// will be used
|
||||
* \param allow_null whether NULL can be returned, or directly report error
|
||||
@@ -107,12 +108,12 @@ class SeekStream: public Stream {
|
||||
/*! \brief interface for serializable objects */
|
||||
class Serializable {
|
||||
public:
|
||||
/*!
|
||||
/*!
|
||||
* \brief load the model from a stream
|
||||
* \param fi stream where to load the model from
|
||||
*/
|
||||
virtual void Load(Stream *fi) = 0;
|
||||
/*!
|
||||
/*!
|
||||
* \brief saves the model to a stream
|
||||
* \param fo stream where to save the model to
|
||||
*/
|
||||
@@ -123,7 +124,7 @@ class Serializable {
|
||||
* \brief input split creates that allows reading
|
||||
* of records from split of data,
|
||||
* independent part that covers all the dataset
|
||||
*
|
||||
*
|
||||
* see InputSplit::Create for definition of record
|
||||
*/
|
||||
class InputSplit {
|
||||
@@ -141,7 +142,7 @@ class InputSplit {
|
||||
* this is a hint so may not be enforced,
|
||||
* but InputSplit will try adjust its internal buffer
|
||||
* size to the hinted value
|
||||
* \param chunk_size the chunk size
|
||||
* \param chunk_size the chunk size
|
||||
*/
|
||||
virtual void HintChunkSize(size_t chunk_size) {}
|
||||
/*! \brief reset the position of InputSplit to beginning */
|
||||
@@ -150,7 +151,7 @@ class InputSplit {
|
||||
* \brief get the next record, the returning value
|
||||
* is valid until next call to NextRecord or NextChunk
|
||||
* caller can modify the memory content of out_rec
|
||||
*
|
||||
*
|
||||
* For text, out_rec contains a single line
|
||||
* For recordio, out_rec contains one record content(with header striped)
|
||||
*
|
||||
@@ -161,11 +162,11 @@ class InputSplit {
|
||||
*/
|
||||
virtual bool NextRecord(Blob *out_rec) = 0;
|
||||
/*!
|
||||
* \brief get a chunk of memory that can contain multiple records,
|
||||
* \brief get a chunk of memory that can contain multiple records,
|
||||
* the caller needs to parse the content of the resulting chunk,
|
||||
* for text file, out_chunk can contain data of multiple lines
|
||||
* for recordio, out_chunk can contain multiple records(including headers)
|
||||
*
|
||||
*
|
||||
* This function ensures there won't be partial record in the chunk
|
||||
* caller can modify the memory content of out_chunk,
|
||||
* the memory is valid until next call to NextRecord or NextChunk
|
||||
@@ -192,9 +193,10 @@ class InputSplit {
|
||||
* List of possible types: "text", "recordio"
|
||||
* - "text":
|
||||
* text file, each line is treated as a record
|
||||
* input split will split on \n or \r
|
||||
* input split will split on '\\n' or '\\r'
|
||||
* - "recordio":
|
||||
* binary recordio file, see recordio.h
|
||||
* \return a new input split
|
||||
* \sa InputSplit::Type
|
||||
*/
|
||||
static InputSplit* Create(const char *uri,
|
||||
@@ -224,7 +226,7 @@ class ostream : public std::basic_ostream<char> {
|
||||
* \param buffer_size internal streambuf size
|
||||
*/
|
||||
explicit ostream(Stream *stream,
|
||||
size_t buffer_size = 1 << 10)
|
||||
size_t buffer_size = (1 << 10))
|
||||
: std::basic_ostream<char>(NULL), buf_(buffer_size) {
|
||||
this->set_stream(stream);
|
||||
}
|
||||
@@ -240,7 +242,7 @@ class ostream : public std::basic_ostream<char> {
|
||||
buf_.set_stream(stream);
|
||||
this->rdbuf(&buf_);
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
// internal streambuf
|
||||
class OutBuf : public std::streambuf {
|
||||
@@ -251,7 +253,7 @@ class ostream : public std::basic_ostream<char> {
|
||||
}
|
||||
// set stream to the buffer
|
||||
inline void set_stream(Stream *stream);
|
||||
|
||||
|
||||
private:
|
||||
/*! \brief internal stream by StreamBuf */
|
||||
Stream *stream_;
|
||||
@@ -287,7 +289,7 @@ class istream : public std::basic_istream<char> {
|
||||
* \param buffer_size internal buffer size
|
||||
*/
|
||||
explicit istream(Stream *stream,
|
||||
size_t buffer_size = 1 << 10)
|
||||
size_t buffer_size = (1 << 10))
|
||||
: std::basic_istream<char>(NULL), buf_(buffer_size) {
|
||||
this->set_stream(stream);
|
||||
}
|
||||
@@ -325,7 +327,7 @@ class istream : public std::basic_istream<char> {
|
||||
Stream *stream_;
|
||||
/*! \brief how many bytes we read so far */
|
||||
size_t bytes_read_;
|
||||
/*! \brief internal buffer */
|
||||
/*! \brief internal buffer */
|
||||
std::vector<char> buffer_;
|
||||
// override underflow
|
||||
inline int_type underflow();
|
||||
@@ -402,7 +404,7 @@ inline int ostream::OutBuf::overflow(int c) {
|
||||
// implementations for istream
|
||||
inline void istream::InBuf::set_stream(Stream *stream) {
|
||||
stream_ = stream;
|
||||
this->setg(&buffer_[0], &buffer_[0], &buffer_[0]);
|
||||
this->setg(&buffer_[0], &buffer_[0], &buffer_[0]);
|
||||
}
|
||||
inline int istream::InBuf::underflow() {
|
||||
char *bhead = &buffer_[0];
|
||||
|
||||
Reference in New Issue
Block a user