diff options
Diffstat (limited to 'NorthstarDLL/include/rapidjson/encodedstream.h')
-rw-r--r-- | NorthstarDLL/include/rapidjson/encodedstream.h | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/NorthstarDLL/include/rapidjson/encodedstream.h b/NorthstarDLL/include/rapidjson/encodedstream.h new file mode 100644 index 00000000..14506838 --- /dev/null +++ b/NorthstarDLL/include/rapidjson/encodedstream.h @@ -0,0 +1,299 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_ENCODEDSTREAM_H_ +#define RAPIDJSON_ENCODEDSTREAM_H_ + +#include "stream.h" +#include "memorystream.h" + +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(padded) +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +//! Input byte stream wrapper with a statically bound encoding. +/*! + \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. + \tparam InputByteStream Type of input byte stream. For example, FileReadStream. +*/ +template <typename Encoding, typename InputByteStream> +class EncodedInputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); +public: + typedef typename Encoding::Ch Ch; + + EncodedInputStream(InputByteStream& is) : is_(is) { + current_ = Encoding::TakeBOM(is_); + } + + Ch Peek() const { return current_; } + Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; } + size_t Tell() const { return is_.Tell(); } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + EncodedInputStream(const EncodedInputStream&); + EncodedInputStream& operator=(const EncodedInputStream&); + + InputByteStream& is_; + Ch current_; +}; + +//! Specialized for UTF8 MemoryStream. +template <> +class EncodedInputStream<UTF8<>, MemoryStream> { +public: + typedef UTF8<>::Ch Ch; + + EncodedInputStream(MemoryStream& is) : is_(is) { + if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take(); + if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take(); + if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take(); + } + Ch Peek() const { return is_.Peek(); } + Ch Take() { return is_.Take(); } + size_t Tell() const { return is_.Tell(); } + + // Not implemented + void Put(Ch) {} + void Flush() {} + Ch* PutBegin() { return 0; } + size_t PutEnd(Ch*) { return 0; } + + MemoryStream& is_; + +private: + EncodedInputStream(const EncodedInputStream&); + EncodedInputStream& operator=(const EncodedInputStream&); +}; + +//! Output byte stream wrapper with statically bound encoding. +/*! + \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. + \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream. +*/ +template <typename Encoding, typename OutputByteStream> +class EncodedOutputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); +public: + typedef typename Encoding::Ch Ch; + + EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { + if (putBOM) + Encoding::PutBOM(os_); + } + + void Put(Ch c) { Encoding::Put(os_, c); } + void Flush() { os_.Flush(); } + + // Not implemented + Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} + Ch Take() { RAPIDJSON_ASSERT(false); return 0;} + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + EncodedOutputStream(const EncodedOutputStream&); + EncodedOutputStream& operator=(const EncodedOutputStream&); + + OutputByteStream& os_; +}; + +#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x + +//! Input stream wrapper with dynamically bound encoding and automatic encoding detection. +/*! + \tparam CharType Type of character for reading. + \tparam InputByteStream type of input byte stream to be wrapped. +*/ +template <typename CharType, typename InputByteStream> +class AutoUTFInputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); +public: + typedef CharType Ch; + + //! Constructor. + /*! + \param is input stream to be wrapped. + \param type UTF encoding type if it is not detected from the stream. + */ + AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) { + RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); + DetectType(); + static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) }; + takeFunc_ = f[type_]; + current_ = takeFunc_(*is_); + } + + UTFType GetType() const { return type_; } + bool HasBOM() const { return hasBOM_; } + + Ch Peek() const { return current_; } + Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; } + size_t Tell() const { return is_->Tell(); } + + // Not implemented + void Put(Ch) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + AutoUTFInputStream(const AutoUTFInputStream&); + AutoUTFInputStream& operator=(const AutoUTFInputStream&); + + // Detect encoding type with BOM or RFC 4627 + void DetectType() { + // BOM (Byte Order Mark): + // 00 00 FE FF UTF-32BE + // FF FE 00 00 UTF-32LE + // FE FF UTF-16BE + // FF FE UTF-16LE + // EF BB BF UTF-8 + + const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4()); + if (!c) + return; + + unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24)); + hasBOM_ = false; + if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } + else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } + else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); } + else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); } + else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); } + + // RFC 4627: Section 3 + // "Since the first two characters of a JSON text will always be ASCII + // characters [RFC0020], it is possible to determine whether an octet + // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking + // at the pattern of nulls in the first four octets." + // 00 00 00 xx UTF-32BE + // 00 xx 00 xx UTF-16BE + // xx 00 00 00 UTF-32LE + // xx 00 xx 00 UTF-16LE + // xx xx xx xx UTF-8 + + if (!hasBOM_) { + unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0); + switch (pattern) { + case 0x08: type_ = kUTF32BE; break; + case 0x0A: type_ = kUTF16BE; break; + case 0x01: type_ = kUTF32LE; break; + case 0x05: type_ = kUTF16LE; break; + case 0x0F: type_ = kUTF8; break; + default: break; // Use type defined by user. + } + } + + // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. + if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); + if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); + } + + typedef Ch (*TakeFunc)(InputByteStream& is); + InputByteStream* is_; + UTFType type_; + Ch current_; + TakeFunc takeFunc_; + bool hasBOM_; +}; + +//! Output stream wrapper with dynamically bound encoding and automatic encoding detection. +/*! + \tparam CharType Type of character for writing. + \tparam OutputByteStream type of output byte stream to be wrapped. +*/ +template <typename CharType, typename OutputByteStream> +class AutoUTFOutputStream { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); +public: + typedef CharType Ch; + + //! Constructor. + /*! + \param os output stream to be wrapped. + \param type UTF encoding type. + \param putBOM Whether to write BOM at the beginning of the stream. + */ + AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) { + RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); + + // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. + if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); + if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); + + static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) }; + putFunc_ = f[type_]; + + if (putBOM) + PutBOM(); + } + + UTFType GetType() const { return type_; } + + void Put(Ch c) { putFunc_(*os_, c); } + void Flush() { os_->Flush(); } + + // Not implemented + Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} + Ch Take() { RAPIDJSON_ASSERT(false); return 0;} + size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + AutoUTFOutputStream(const AutoUTFOutputStream&); + AutoUTFOutputStream& operator=(const AutoUTFOutputStream&); + + void PutBOM() { + typedef void (*PutBOMFunc)(OutputByteStream&); + static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) }; + f[type_](*os_); + } + + typedef void (*PutFunc)(OutputByteStream&, Ch); + + OutputByteStream* os_; + UTFType type_; + PutFunc putFunc_; +}; + +#undef RAPIDJSON_ENCODINGS_FUNC + +RAPIDJSON_NAMESPACE_END + +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_FILESTREAM_H_ |