diff options
Diffstat (limited to 'include/rapidjson/encodedstream.h')
-rw-r--r-- | include/rapidjson/encodedstream.h | 299 |
1 files changed, 0 insertions, 299 deletions
diff --git a/include/rapidjson/encodedstream.h b/include/rapidjson/encodedstream.h deleted file mode 100644 index 14506838..00000000 --- a/include/rapidjson/encodedstream.h +++ /dev/null @@ -1,299 +0,0 @@ -// Tencent is pleased to support the open source community by making RapidJSON available. -// -// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. -// -// Licensed under the MIT License (the "License"); you may not use this file except -// in compliance with the License. You may obtain a copy of the License at -// -// http://opensource.org/licenses/MIT -// -// Unless required by applicable law or agreed to in writing, software distributed -// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -#ifndef RAPIDJSON_ENCODEDSTREAM_H_ -#define RAPIDJSON_ENCODEDSTREAM_H_ - -#include "stream.h" -#include "memorystream.h" - -#ifdef __GNUC__ -RAPIDJSON_DIAG_PUSH -RAPIDJSON_DIAG_OFF(effc++) -#endif - -#ifdef __clang__ -RAPIDJSON_DIAG_PUSH -RAPIDJSON_DIAG_OFF(padded) -#endif - -RAPIDJSON_NAMESPACE_BEGIN - -//! Input byte stream wrapper with a statically bound encoding. -/*! - \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. - \tparam InputByteStream Type of input byte stream. For example, FileReadStream. -*/ -template <typename Encoding, typename InputByteStream> -class EncodedInputStream { - RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); -public: - typedef typename Encoding::Ch Ch; - - EncodedInputStream(InputByteStream& is) : is_(is) { - current_ = Encoding::TakeBOM(is_); - } - - Ch Peek() const { return current_; } - Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; } - size_t Tell() const { return is_.Tell(); } - - // Not implemented - void Put(Ch) { RAPIDJSON_ASSERT(false); } - void Flush() { RAPIDJSON_ASSERT(false); } - Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } - size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } - -private: - EncodedInputStream(const EncodedInputStream&); - EncodedInputStream& operator=(const EncodedInputStream&); - - InputByteStream& is_; - Ch current_; -}; - -//! Specialized for UTF8 MemoryStream. -template <> -class EncodedInputStream<UTF8<>, MemoryStream> { -public: - typedef UTF8<>::Ch Ch; - - EncodedInputStream(MemoryStream& is) : is_(is) { - if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take(); - if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take(); - if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take(); - } - Ch Peek() const { return is_.Peek(); } - Ch Take() { return is_.Take(); } - size_t Tell() const { return is_.Tell(); } - - // Not implemented - void Put(Ch) {} - void Flush() {} - Ch* PutBegin() { return 0; } - size_t PutEnd(Ch*) { return 0; } - - MemoryStream& is_; - -private: - EncodedInputStream(const EncodedInputStream&); - EncodedInputStream& operator=(const EncodedInputStream&); -}; - -//! Output byte stream wrapper with statically bound encoding. -/*! - \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. - \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream. -*/ -template <typename Encoding, typename OutputByteStream> -class EncodedOutputStream { - RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); -public: - typedef typename Encoding::Ch Ch; - - EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { - if (putBOM) - Encoding::PutBOM(os_); - } - - void Put(Ch c) { Encoding::Put(os_, c); } - void Flush() { os_.Flush(); } - - // Not implemented - Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} - Ch Take() { RAPIDJSON_ASSERT(false); return 0;} - size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } - Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } - size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } - -private: - EncodedOutputStream(const EncodedOutputStream&); - EncodedOutputStream& operator=(const EncodedOutputStream&); - - OutputByteStream& os_; -}; - -#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x - -//! Input stream wrapper with dynamically bound encoding and automatic encoding detection. -/*! - \tparam CharType Type of character for reading. - \tparam InputByteStream type of input byte stream to be wrapped. -*/ -template <typename CharType, typename InputByteStream> -class AutoUTFInputStream { - RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); -public: - typedef CharType Ch; - - //! Constructor. - /*! - \param is input stream to be wrapped. - \param type UTF encoding type if it is not detected from the stream. - */ - AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) { - RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); - DetectType(); - static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) }; - takeFunc_ = f[type_]; - current_ = takeFunc_(*is_); - } - - UTFType GetType() const { return type_; } - bool HasBOM() const { return hasBOM_; } - - Ch Peek() const { return current_; } - Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; } - size_t Tell() const { return is_->Tell(); } - - // Not implemented - void Put(Ch) { RAPIDJSON_ASSERT(false); } - void Flush() { RAPIDJSON_ASSERT(false); } - Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } - size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } - -private: - AutoUTFInputStream(const AutoUTFInputStream&); - AutoUTFInputStream& operator=(const AutoUTFInputStream&); - - // Detect encoding type with BOM or RFC 4627 - void DetectType() { - // BOM (Byte Order Mark): - // 00 00 FE FF UTF-32BE - // FF FE 00 00 UTF-32LE - // FE FF UTF-16BE - // FF FE UTF-16LE - // EF BB BF UTF-8 - - const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4()); - if (!c) - return; - - unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24)); - hasBOM_ = false; - if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } - else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } - else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); } - else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); } - else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); } - - // RFC 4627: Section 3 - // "Since the first two characters of a JSON text will always be ASCII - // characters [RFC0020], it is possible to determine whether an octet - // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking - // at the pattern of nulls in the first four octets." - // 00 00 00 xx UTF-32BE - // 00 xx 00 xx UTF-16BE - // xx 00 00 00 UTF-32LE - // xx 00 xx 00 UTF-16LE - // xx xx xx xx UTF-8 - - if (!hasBOM_) { - unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0); - switch (pattern) { - case 0x08: type_ = kUTF32BE; break; - case 0x0A: type_ = kUTF16BE; break; - case 0x01: type_ = kUTF32LE; break; - case 0x05: type_ = kUTF16LE; break; - case 0x0F: type_ = kUTF8; break; - default: break; // Use type defined by user. - } - } - - // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. - if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); - if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); - } - - typedef Ch (*TakeFunc)(InputByteStream& is); - InputByteStream* is_; - UTFType type_; - Ch current_; - TakeFunc takeFunc_; - bool hasBOM_; -}; - -//! Output stream wrapper with dynamically bound encoding and automatic encoding detection. -/*! - \tparam CharType Type of character for writing. - \tparam OutputByteStream type of output byte stream to be wrapped. -*/ -template <typename CharType, typename OutputByteStream> -class AutoUTFOutputStream { - RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); -public: - typedef CharType Ch; - - //! Constructor. - /*! - \param os output stream to be wrapped. - \param type UTF encoding type. - \param putBOM Whether to write BOM at the beginning of the stream. - */ - AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) { - RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); - - // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. - if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); - if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); - - static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) }; - putFunc_ = f[type_]; - - if (putBOM) - PutBOM(); - } - - UTFType GetType() const { return type_; } - - void Put(Ch c) { putFunc_(*os_, c); } - void Flush() { os_->Flush(); } - - // Not implemented - Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} - Ch Take() { RAPIDJSON_ASSERT(false); return 0;} - size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } - Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } - size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } - -private: - AutoUTFOutputStream(const AutoUTFOutputStream&); - AutoUTFOutputStream& operator=(const AutoUTFOutputStream&); - - void PutBOM() { - typedef void (*PutBOMFunc)(OutputByteStream&); - static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) }; - f[type_](*os_); - } - - typedef void (*PutFunc)(OutputByteStream&, Ch); - - OutputByteStream* os_; - UTFType type_; - PutFunc putFunc_; -}; - -#undef RAPIDJSON_ENCODINGS_FUNC - -RAPIDJSON_NAMESPACE_END - -#ifdef __clang__ -RAPIDJSON_DIAG_POP -#endif - -#ifdef __GNUC__ -RAPIDJSON_DIAG_POP -#endif - -#endif // RAPIDJSON_FILESTREAM_H_ |