/*! * Copyright 2014 by Contributors * \file base64.h * \brief data stream support to input and output from/to base64 stream * base64 is easier to store and pass as text format in mapreduce * \author Tianqi Chen */ #ifndef XGBOOST_UTILS_BASE64_INL_H_ #define XGBOOST_UTILS_BASE64_INL_H_ #include #include #include #include "./io.h" namespace xgboost { namespace utils { /*! \brief buffer reader of the stream that allows you to get */ class StreamBufferReader { public: explicit StreamBufferReader(size_t buffer_size) :stream_(NULL), read_len_(1), read_ptr_(1) { buffer_.resize(buffer_size); } /*! * \brief set input stream */ inline void set_stream(IStream *stream) { stream_ = stream; read_len_ = read_ptr_ = 1; } /*! * \brief allows quick read using get char */ inline char GetChar(void) { while (true) { if (read_ptr_ < read_len_) { return buffer_[read_ptr_++]; } else { read_len_ = stream_->Read(&buffer_[0], buffer_.length()); if (read_len_ == 0) return EOF; read_ptr_ = 0; } } } /*! \brief whether we are reaching the end of file */ inline bool AtEnd(void) const { return read_len_ == 0; } private: /*! \brief the underlying stream */ IStream *stream_; /*! \brief buffer to hold data */ std::string buffer_; /*! \brief length of valid data in buffer */ size_t read_len_; /*! \brief pointer in the buffer */ size_t read_ptr_; }; /*! \brief namespace of base64 decoding and encoding table */ namespace base64 { const char DecodeTable[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, // '+' 0, 0, 0, 63, // '/' 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9' 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z' 0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z' }; static const char EncodeTable[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; } // namespace base64 /*! \brief the stream that reads from base64, note we take from file pointers */ class Base64InStream: public IStream { public: explicit Base64InStream(IStream *fs) : reader_(256) { reader_.set_stream(fs); num_prev = 0; tmp_ch = 0; } /*! * \brief initialize the stream position to beginning of next base64 stream * call this function before actually start read */ inline void InitPosition(void) { // get a character do { tmp_ch = reader_.GetChar(); } while (isspace(tmp_ch)); } /*! \brief whether current position is end of a base64 stream */ inline bool IsEOF(void) const { return num_prev == 0 && (tmp_ch == EOF || isspace(tmp_ch)); } virtual size_t Read(void *ptr, size_t size) { using base64::DecodeTable; if (size == 0) return 0; // use tlen to record left size size_t tlen = size; unsigned char *cptr = static_cast(ptr); // if anything left, load from previous buffered result if (num_prev != 0) { if (num_prev == 2) { if (tlen >= 2) { *cptr++ = buf_prev[0]; *cptr++ = buf_prev[1]; tlen -= 2; num_prev = 0; } else { // assert tlen == 1 *cptr++ = buf_prev[0]; --tlen; buf_prev[0] = buf_prev[1]; num_prev = 1; } } else { // assert num_prev == 1 *cptr++ = buf_prev[0]; --tlen; num_prev = 0; } } if (tlen == 0) return size; int nvalue; // note: everything goes with 4 bytes in Base64 // so we process 4 bytes a unit while (tlen && tmp_ch != EOF && !isspace(tmp_ch)) { // first byte nvalue = DecodeTable[tmp_ch] << 18; { // second byte utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), "invalid base64 format"); nvalue |= DecodeTable[tmp_ch] << 12; *cptr++ = (nvalue >> 16) & 0xFF; --tlen; } { // third byte utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), "invalid base64 format"); // handle termination if (tmp_ch == '=') { utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == '='), "invalid base64 format"); utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)), "invalid base64 format"); break; } nvalue |= DecodeTable[tmp_ch] << 6; if (tlen) { *cptr++ = (nvalue >> 8) & 0xFF; --tlen; } else { buf_prev[num_prev++] = (nvalue >> 8) & 0xFF; } } { // fourth byte utils::Check((tmp_ch = reader_.GetChar(), tmp_ch != EOF && !isspace(tmp_ch)), "invalid base64 format"); if (tmp_ch == '=') { utils::Check((tmp_ch = reader_.GetChar(), tmp_ch == EOF || isspace(tmp_ch)), "invalid base64 format"); break; } nvalue |= DecodeTable[tmp_ch]; if (tlen) { *cptr++ = nvalue & 0xFF; --tlen; } else { buf_prev[num_prev ++] = nvalue & 0xFF; } } // get next char tmp_ch = reader_.GetChar(); } if (kStrictCheck) { utils::Check(tlen == 0, "Base64InStream: read incomplete"); } return size - tlen; } virtual void Write(const void *ptr, size_t size) { utils::Error("Base64InStream do not support write"); } private: StreamBufferReader reader_; int tmp_ch; int num_prev; unsigned char buf_prev[2]; // whether we need to do strict check static const bool kStrictCheck = false; }; /*! \brief the stream that write to base64, note we take from file pointers */ class Base64OutStream: public IStream { public: explicit Base64OutStream(IStream *fp) : fp(fp) { buf_top = 0; } virtual void Write(const void *ptr, size_t size) { using base64::EncodeTable; size_t tlen = size; const unsigned char *cptr = static_cast(ptr); while (tlen) { while (buf_top < 3 && tlen != 0) { buf[++buf_top] = *cptr++; --tlen; } if (buf_top == 3) { // flush 4 bytes out PutChar(EncodeTable[buf[1] >> 2]); PutChar(EncodeTable[((buf[1] << 4) | (buf[2] >> 4)) & 0x3F]); PutChar(EncodeTable[((buf[2] << 2) | (buf[3] >> 6)) & 0x3F]); PutChar(EncodeTable[buf[3] & 0x3F]); buf_top = 0; } } } virtual size_t Read(void *ptr, size_t size) { utils::Error("Base64OutStream do not support read"); return 0; } /*! * \brief finish writing of all current base64 stream, do some post processing * \param endch character to put to end of stream, if it is EOF, then nothing will be done */ inline void Finish(char endch = EOF) { using base64::EncodeTable; if (buf_top == 1) { PutChar(EncodeTable[buf[1] >> 2]); PutChar(EncodeTable[(buf[1] << 4) & 0x3F]); PutChar('='); PutChar('='); } if (buf_top == 2) { PutChar(EncodeTable[buf[1] >> 2]); PutChar(EncodeTable[((buf[1] << 4) | (buf[2] >> 4)) & 0x3F]); PutChar(EncodeTable[(buf[2] << 2) & 0x3F]); PutChar('='); } buf_top = 0; if (endch != EOF) PutChar(endch); this->Flush(); } private: IStream *fp; int buf_top; unsigned char buf[4]; std::string out_buf; static const size_t kBufferSize = 256; inline void PutChar(char ch) { out_buf += ch; if (out_buf.length() >= kBufferSize) Flush(); } inline void Flush(void) { if (out_buf.length() != 0) { fp->Write(&out_buf[0], out_buf.length()); out_buf.clear(); } } }; } // namespace utils } // namespace xgboost #endif // XGBOOST_UTILS_BASE64_INL_H_