diff options
Diffstat (limited to 'NorthstarDedicatedTest/include/protobuf/io')
25 files changed, 13296 insertions, 0 deletions
diff --git a/NorthstarDedicatedTest/include/protobuf/io/coded_stream.cc b/NorthstarDedicatedTest/include/protobuf/io/coded_stream.cc new file mode 100644 index 00000000..9c50c56e --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/coded_stream.cc @@ -0,0 +1,978 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This implementation is heavily optimized to make reads and writes +// of small values (especially varints) as fast as possible. In +// particular, we optimize for the common case that a read or a write +// will not cross the end of the buffer, since we can avoid a lot +// of branching in this case. + +#include <io/coded_stream.h> + +#include <limits.h> + +#include <algorithm> +#include <cstring> +#include <utility> + +#include <stubs/logging.h> +#include <stubs/common.h> +#include <arena.h> +#include <io/zero_copy_stream.h> +#include <io/zero_copy_stream_impl_lite.h> +#include <stubs/stl_util.h> + + +#include <port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +namespace { + +static const int kMaxVarintBytes = 10; +static const int kMaxVarint32Bytes = 5; + + +inline bool NextNonEmpty(ZeroCopyInputStream* input, const void** data, + int* size) { + bool success; + do { + success = input->Next(data, size); + } while (success && *size == 0); + return success; +} + +} // namespace + +// CodedInputStream ================================================== + +CodedInputStream::~CodedInputStream() { + if (input_ != NULL) { + BackUpInputToCurrentPosition(); + } +} + +// Static. +int CodedInputStream::default_recursion_limit_ = 100; + + +void CodedInputStream::BackUpInputToCurrentPosition() { + int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_; + if (backup_bytes > 0) { + input_->BackUp(backup_bytes); + + // total_bytes_read_ doesn't include overflow_bytes_. + total_bytes_read_ -= BufferSize() + buffer_size_after_limit_; + buffer_end_ = buffer_; + buffer_size_after_limit_ = 0; + overflow_bytes_ = 0; + } +} + +inline void CodedInputStream::RecomputeBufferLimits() { + buffer_end_ += buffer_size_after_limit_; + int closest_limit = std::min(current_limit_, total_bytes_limit_); + if (closest_limit < total_bytes_read_) { + // The limit position is in the current buffer. We must adjust + // the buffer size accordingly. + buffer_size_after_limit_ = total_bytes_read_ - closest_limit; + buffer_end_ -= buffer_size_after_limit_; + } else { + buffer_size_after_limit_ = 0; + } +} + +CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) { + // Current position relative to the beginning of the stream. + int current_position = CurrentPosition(); + + Limit old_limit = current_limit_; + + // security: byte_limit is possibly evil, so check for negative values + // and overflow. Also check that the new requested limit is before the + // previous limit; otherwise we continue to enforce the previous limit. + if (PROTOBUF_PREDICT_TRUE(byte_limit >= 0 && + byte_limit <= INT_MAX - current_position && + byte_limit < current_limit_ - current_position)) { + current_limit_ = current_position + byte_limit; + RecomputeBufferLimits(); + } + + return old_limit; +} + +void CodedInputStream::PopLimit(Limit limit) { + // The limit passed in is actually the *old* limit, which we returned from + // PushLimit(). + current_limit_ = limit; + RecomputeBufferLimits(); + + // We may no longer be at a legitimate message end. ReadTag() needs to be + // called again to find out. + legitimate_message_end_ = false; +} + +std::pair<CodedInputStream::Limit, int> +CodedInputStream::IncrementRecursionDepthAndPushLimit(int byte_limit) { + return std::make_pair(PushLimit(byte_limit), --recursion_budget_); +} + +CodedInputStream::Limit CodedInputStream::ReadLengthAndPushLimit() { + uint32_t length; + return PushLimit(ReadVarint32(&length) ? length : 0); +} + +bool CodedInputStream::DecrementRecursionDepthAndPopLimit(Limit limit) { + bool result = ConsumedEntireMessage(); + PopLimit(limit); + GOOGLE_DCHECK_LT(recursion_budget_, recursion_limit_); + ++recursion_budget_; + return result; +} + +bool CodedInputStream::CheckEntireMessageConsumedAndPopLimit(Limit limit) { + bool result = ConsumedEntireMessage(); + PopLimit(limit); + return result; +} + +int CodedInputStream::BytesUntilLimit() const { + if (current_limit_ == INT_MAX) return -1; + int current_position = CurrentPosition(); + + return current_limit_ - current_position; +} + +void CodedInputStream::SetTotalBytesLimit(int total_bytes_limit) { + // Make sure the limit isn't already past, since this could confuse other + // code. + int current_position = CurrentPosition(); + total_bytes_limit_ = std::max(current_position, total_bytes_limit); + RecomputeBufferLimits(); +} + +int CodedInputStream::BytesUntilTotalBytesLimit() const { + if (total_bytes_limit_ == INT_MAX) return -1; + return total_bytes_limit_ - CurrentPosition(); +} + +void CodedInputStream::PrintTotalBytesLimitError() { + GOOGLE_LOG(ERROR) + << "A protocol message was rejected because it was too " + "big (more than " + << total_bytes_limit_ + << " bytes). To increase the limit (or to disable these " + "warnings), see CodedInputStream::SetTotalBytesLimit() " + "in third_party/protobuf/src/google/protobuf/io/coded_stream.h."; +} + +bool CodedInputStream::SkipFallback(int count, int original_buffer_size) { + if (buffer_size_after_limit_ > 0) { + // We hit a limit inside this buffer. Advance to the limit and fail. + Advance(original_buffer_size); + return false; + } + + count -= original_buffer_size; + buffer_ = NULL; + buffer_end_ = buffer_; + + // Make sure this skip doesn't try to skip past the current limit. + int closest_limit = std::min(current_limit_, total_bytes_limit_); + int bytes_until_limit = closest_limit - total_bytes_read_; + if (bytes_until_limit < count) { + // We hit the limit. Skip up to it then fail. + if (bytes_until_limit > 0) { + total_bytes_read_ = closest_limit; + input_->Skip(bytes_until_limit); + } + return false; + } + + if (!input_->Skip(count)) { + total_bytes_read_ = input_->ByteCount(); + return false; + } + total_bytes_read_ += count; + return true; +} + +bool CodedInputStream::GetDirectBufferPointer(const void** data, int* size) { + if (BufferSize() == 0 && !Refresh()) return false; + + *data = buffer_; + *size = BufferSize(); + return true; +} + +bool CodedInputStream::ReadRaw(void* buffer, int size) { + int current_buffer_size; + while ((current_buffer_size = BufferSize()) < size) { + // Reading past end of buffer. Copy what we have, then refresh. + memcpy(buffer, buffer_, current_buffer_size); + buffer = reinterpret_cast<uint8_t*>(buffer) + current_buffer_size; + size -= current_buffer_size; + Advance(current_buffer_size); + if (!Refresh()) return false; + } + + memcpy(buffer, buffer_, size); + Advance(size); + + return true; +} + +bool CodedInputStream::ReadString(std::string* buffer, int size) { + if (size < 0) return false; // security: size is often user-supplied + + if (BufferSize() >= size) { + STLStringResizeUninitialized(buffer, size); + std::pair<char*, bool> z = as_string_data(buffer); + if (z.second) { + // Oddly enough, memcpy() requires its first two args to be non-NULL even + // if we copy 0 bytes. So, we have ensured that z.first is non-NULL here. + GOOGLE_DCHECK(z.first != NULL); + memcpy(z.first, buffer_, size); + Advance(size); + } + return true; + } + + return ReadStringFallback(buffer, size); +} + +bool CodedInputStream::ReadStringFallback(std::string* buffer, int size) { + if (!buffer->empty()) { + buffer->clear(); + } + + int closest_limit = std::min(current_limit_, total_bytes_limit_); + if (closest_limit != INT_MAX) { + int bytes_to_limit = closest_limit - CurrentPosition(); + if (bytes_to_limit > 0 && size > 0 && size <= bytes_to_limit) { + buffer->reserve(size); + } + } + + int current_buffer_size; + while ((current_buffer_size = BufferSize()) < size) { + // Some STL implementations "helpfully" crash on buffer->append(NULL, 0). + if (current_buffer_size != 0) { + // Note: string1.append(string2) is O(string2.size()) (as opposed to + // O(string1.size() + string2.size()), which would be bad). + buffer->append(reinterpret_cast<const char*>(buffer_), + current_buffer_size); + } + size -= current_buffer_size; + Advance(current_buffer_size); + if (!Refresh()) return false; + } + + buffer->append(reinterpret_cast<const char*>(buffer_), size); + Advance(size); + + return true; +} + + +bool CodedInputStream::ReadLittleEndian32Fallback(uint32_t* value) { + uint8_t bytes[sizeof(*value)]; + + const uint8_t* ptr; + if (BufferSize() >= static_cast<int64_t>(sizeof(*value))) { + // Fast path: Enough bytes in the buffer to read directly. + ptr = buffer_; + Advance(sizeof(*value)); + } else { + // Slow path: Had to read past the end of the buffer. + if (!ReadRaw(bytes, sizeof(*value))) return false; + ptr = bytes; + } + ReadLittleEndian32FromArray(ptr, value); + return true; +} + +bool CodedInputStream::ReadLittleEndian64Fallback(uint64_t* value) { + uint8_t bytes[sizeof(*value)]; + + const uint8_t* ptr; + if (BufferSize() >= static_cast<int64_t>(sizeof(*value))) { + // Fast path: Enough bytes in the buffer to read directly. + ptr = buffer_; + Advance(sizeof(*value)); + } else { + // Slow path: Had to read past the end of the buffer. + if (!ReadRaw(bytes, sizeof(*value))) return false; + ptr = bytes; + } + ReadLittleEndian64FromArray(ptr, value); + return true; +} + +namespace { + +// Decodes varint64 with known size, N, and returns next pointer. Knowing N at +// compile time, compiler can generate optimal code. For example, instead of +// subtracting 0x80 at each iteration, it subtracts properly shifted mask once. +template <size_t N> +const uint8_t* DecodeVarint64KnownSize(const uint8_t* buffer, uint64_t* value) { + GOOGLE_DCHECK_GT(N, 0); + uint64_t result = static_cast<uint64_t>(buffer[N - 1]) << (7 * (N - 1)); + for (size_t i = 0, offset = 0; i < N - 1; i++, offset += 7) { + result += static_cast<uint64_t>(buffer[i] - 0x80) << offset; + } + *value = result; + return buffer + N; +} + +// Read a varint from the given buffer, write it to *value, and return a pair. +// The first part of the pair is true iff the read was successful. The second +// part is buffer + (number of bytes read). This function is always inlined, +// so returning a pair is costless. +PROTOBUF_ALWAYS_INLINE +::std::pair<bool, const uint8_t*> ReadVarint32FromArray(uint32_t first_byte, + const uint8_t* buffer, + uint32_t* value); +inline ::std::pair<bool, const uint8_t*> ReadVarint32FromArray( + uint32_t first_byte, const uint8_t* buffer, uint32_t* value) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + GOOGLE_DCHECK_EQ(*buffer, first_byte); + GOOGLE_DCHECK_EQ(first_byte & 0x80, 0x80) << first_byte; + const uint8_t* ptr = buffer; + uint32_t b; + uint32_t result = first_byte - 0x80; + ++ptr; // We just processed the first byte. Move on to the second. + b = *(ptr++); + result += b << 7; + if (!(b & 0x80)) goto done; + result -= 0x80 << 7; + b = *(ptr++); + result += b << 14; + if (!(b & 0x80)) goto done; + result -= 0x80 << 14; + b = *(ptr++); + result += b << 21; + if (!(b & 0x80)) goto done; + result -= 0x80 << 21; + b = *(ptr++); + result += b << 28; + if (!(b & 0x80)) goto done; + // "result -= 0x80 << 28" is irrevelant. + + // If the input is larger than 32 bits, we still need to read it all + // and discard the high-order bits. + for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) { + b = *(ptr++); + if (!(b & 0x80)) goto done; + } + + // We have overrun the maximum size of a varint (10 bytes). Assume + // the data is corrupt. + return std::make_pair(false, ptr); + +done: + *value = result; + return std::make_pair(true, ptr); +} + +PROTOBUF_ALWAYS_INLINE::std::pair<bool, const uint8_t*> ReadVarint64FromArray( + const uint8_t* buffer, uint64_t* value); +inline ::std::pair<bool, const uint8_t*> ReadVarint64FromArray( + const uint8_t* buffer, uint64_t* value) { + // Assumes varint64 is at least 2 bytes. + GOOGLE_DCHECK_GE(buffer[0], 128); + + const uint8_t* next; + if (buffer[1] < 128) { + next = DecodeVarint64KnownSize<2>(buffer, value); + } else if (buffer[2] < 128) { + next = DecodeVarint64KnownSize<3>(buffer, value); + } else if (buffer[3] < 128) { + next = DecodeVarint64KnownSize<4>(buffer, value); + } else if (buffer[4] < 128) { + next = DecodeVarint64KnownSize<5>(buffer, value); + } else if (buffer[5] < 128) { + next = DecodeVarint64KnownSize<6>(buffer, value); + } else if (buffer[6] < 128) { + next = DecodeVarint64KnownSize<7>(buffer, value); + } else if (buffer[7] < 128) { + next = DecodeVarint64KnownSize<8>(buffer, value); + } else if (buffer[8] < 128) { + next = DecodeVarint64KnownSize<9>(buffer, value); + } else if (buffer[9] < 128) { + next = DecodeVarint64KnownSize<10>(buffer, value); + } else { + // We have overrun the maximum size of a varint (10 bytes). Assume + // the data is corrupt. + return std::make_pair(false, buffer + 11); + } + + return std::make_pair(true, next); +} + +} // namespace + +bool CodedInputStream::ReadVarint32Slow(uint32_t* value) { + // Directly invoke ReadVarint64Fallback, since we already tried to optimize + // for one-byte varints. + std::pair<uint64_t, bool> p = ReadVarint64Fallback(); + *value = static_cast<uint32_t>(p.first); + return p.second; +} + +int64_t CodedInputStream::ReadVarint32Fallback(uint32_t first_byte_or_zero) { + if (BufferSize() >= kMaxVarintBytes || + // Optimization: We're also safe if the buffer is non-empty and it ends + // with a byte that would terminate a varint. + (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { + GOOGLE_DCHECK_NE(first_byte_or_zero, 0) + << "Caller should provide us with *buffer_ when buffer is non-empty"; + uint32_t temp; + ::std::pair<bool, const uint8_t*> p = + ReadVarint32FromArray(first_byte_or_zero, buffer_, &temp); + if (!p.first) return -1; + buffer_ = p.second; + return temp; + } else { + // Really slow case: we will incur the cost of an extra function call here, + // but moving this out of line reduces the size of this function, which + // improves the common case. In micro benchmarks, this is worth about 10-15% + uint32_t temp; + return ReadVarint32Slow(&temp) ? static_cast<int64_t>(temp) : -1; + } +} + +int CodedInputStream::ReadVarintSizeAsIntSlow() { + // Directly invoke ReadVarint64Fallback, since we already tried to optimize + // for one-byte varints. + std::pair<uint64_t, bool> p = ReadVarint64Fallback(); + if (!p.second || p.first > static_cast<uint64_t>(INT_MAX)) return -1; + return p.first; +} + +int CodedInputStream::ReadVarintSizeAsIntFallback() { + if (BufferSize() >= kMaxVarintBytes || + // Optimization: We're also safe if the buffer is non-empty and it ends + // with a byte that would terminate a varint. + (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { + uint64_t temp; + ::std::pair<bool, const uint8_t*> p = ReadVarint64FromArray(buffer_, &temp); + if (!p.first || temp > static_cast<uint64_t>(INT_MAX)) return -1; + buffer_ = p.second; + return temp; + } else { + // Really slow case: we will incur the cost of an extra function call here, + // but moving this out of line reduces the size of this function, which + // improves the common case. In micro benchmarks, this is worth about 10-15% + return ReadVarintSizeAsIntSlow(); + } +} + +uint32_t CodedInputStream::ReadTagSlow() { + if (buffer_ == buffer_end_) { + // Call refresh. + if (!Refresh()) { + // Refresh failed. Make sure that it failed due to EOF, not because + // we hit total_bytes_limit_, which, unlike normal limits, is not a + // valid place to end a message. + int current_position = total_bytes_read_ - buffer_size_after_limit_; + if (current_position >= total_bytes_limit_) { + // Hit total_bytes_limit_. But if we also hit the normal limit, + // we're still OK. + legitimate_message_end_ = current_limit_ == total_bytes_limit_; + } else { + legitimate_message_end_ = true; + } + return 0; + } + } + + // For the slow path, just do a 64-bit read. Try to optimize for one-byte tags + // again, since we have now refreshed the buffer. + uint64_t result = 0; + if (!ReadVarint64(&result)) return 0; + return static_cast<uint32_t>(result); +} + +uint32_t CodedInputStream::ReadTagFallback(uint32_t first_byte_or_zero) { + const int buf_size = BufferSize(); + if (buf_size >= kMaxVarintBytes || + // Optimization: We're also safe if the buffer is non-empty and it ends + // with a byte that would terminate a varint. + (buf_size > 0 && !(buffer_end_[-1] & 0x80))) { + GOOGLE_DCHECK_EQ(first_byte_or_zero, buffer_[0]); + if (first_byte_or_zero == 0) { + ++buffer_; + return 0; + } + uint32_t tag; + ::std::pair<bool, const uint8_t*> p = + ReadVarint32FromArray(first_byte_or_zero, buffer_, &tag); + if (!p.first) { + return 0; + } + buffer_ = p.second; + return tag; + } else { + // We are commonly at a limit when attempting to read tags. Try to quickly + // detect this case without making another function call. + if ((buf_size == 0) && + ((buffer_size_after_limit_ > 0) || + (total_bytes_read_ == current_limit_)) && + // Make sure that the limit we hit is not total_bytes_limit_, since + // in that case we still need to call Refresh() so that it prints an + // error. + total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) { + // We hit a byte limit. + legitimate_message_end_ = true; + return 0; + } + return ReadTagSlow(); + } +} + +bool CodedInputStream::ReadVarint64Slow(uint64_t* value) { + // Slow path: This read might cross the end of the buffer, so we + // need to check and refresh the buffer if and when it does. + + uint64_t result = 0; + int count = 0; + uint32_t b; + + do { + if (count == kMaxVarintBytes) { + *value = 0; + return false; + } + while (buffer_ == buffer_end_) { + if (!Refresh()) { + *value = 0; + return false; + } + } + b = *buffer_; + result |= static_cast<uint64_t>(b & 0x7F) << (7 * count); + Advance(1); + ++count; + } while (b & 0x80); + + *value = result; + return true; +} + +std::pair<uint64_t, bool> CodedInputStream::ReadVarint64Fallback() { + if (BufferSize() >= kMaxVarintBytes || + // Optimization: We're also safe if the buffer is non-empty and it ends + // with a byte that would terminate a varint. + (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { + uint64_t temp; + ::std::pair<bool, const uint8_t*> p = ReadVarint64FromArray(buffer_, &temp); + if (!p.first) { + return std::make_pair(0, false); + } + buffer_ = p.second; + return std::make_pair(temp, true); + } else { + uint64_t temp; + bool success = ReadVarint64Slow(&temp); + return std::make_pair(temp, success); + } +} + +bool CodedInputStream::Refresh() { + GOOGLE_DCHECK_EQ(0, BufferSize()); + + if (buffer_size_after_limit_ > 0 || overflow_bytes_ > 0 || + total_bytes_read_ == current_limit_) { + // We've hit a limit. Stop. + int current_position = total_bytes_read_ - buffer_size_after_limit_; + + if (current_position >= total_bytes_limit_ && + total_bytes_limit_ != current_limit_) { + // Hit total_bytes_limit_. + PrintTotalBytesLimitError(); + } + + return false; + } + + const void* void_buffer; + int buffer_size; + if (NextNonEmpty(input_, &void_buffer, &buffer_size)) { + buffer_ = reinterpret_cast<const uint8_t*>(void_buffer); + buffer_end_ = buffer_ + buffer_size; + GOOGLE_CHECK_GE(buffer_size, 0); + + if (total_bytes_read_ <= INT_MAX - buffer_size) { + total_bytes_read_ += buffer_size; + } else { + // Overflow. Reset buffer_end_ to not include the bytes beyond INT_MAX. + // We can't get that far anyway, because total_bytes_limit_ is guaranteed + // to be less than it. We need to keep track of the number of bytes + // we discarded, though, so that we can call input_->BackUp() to back + // up over them on destruction. + + // The following line is equivalent to: + // overflow_bytes_ = total_bytes_read_ + buffer_size - INT_MAX; + // except that it avoids overflows. Signed integer overflow has + // undefined results according to the C standard. + overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size); + buffer_end_ -= overflow_bytes_; + total_bytes_read_ = INT_MAX; + } + + RecomputeBufferLimits(); + return true; + } else { + buffer_ = NULL; + buffer_end_ = NULL; + return false; + } +} + +// CodedOutputStream ================================================= + +void EpsCopyOutputStream::EnableAliasing(bool enabled) { + aliasing_enabled_ = enabled && stream_->AllowsAliasing(); +} + +int64_t EpsCopyOutputStream::ByteCount(uint8_t* ptr) const { + // Calculate the current offset relative to the end of the stream buffer. + int delta = (end_ - ptr) + (buffer_end_ ? 0 : kSlopBytes); + return stream_->ByteCount() - delta; +} + +// Flushes what's written out to the underlying ZeroCopyOutputStream buffers. +// Returns the size remaining in the buffer and sets buffer_end_ to the start +// of the remaining buffer, ie. [buffer_end_, buffer_end_ + return value) +int EpsCopyOutputStream::Flush(uint8_t* ptr) { + while (buffer_end_ && ptr > end_) { + int overrun = ptr - end_; + GOOGLE_DCHECK(!had_error_); + GOOGLE_DCHECK(overrun <= kSlopBytes); // NOLINT + ptr = Next() + overrun; + if (had_error_) return 0; + } + int s; + if (buffer_end_) { + std::memcpy(buffer_end_, buffer_, ptr - buffer_); + buffer_end_ += ptr - buffer_; + s = end_ - ptr; + } else { + // The stream is writing directly in the ZeroCopyOutputStream buffer. + s = end_ + kSlopBytes - ptr; + buffer_end_ = ptr; + } + GOOGLE_DCHECK(s >= 0); // NOLINT + return s; +} + +uint8_t* EpsCopyOutputStream::Trim(uint8_t* ptr) { + if (had_error_) return ptr; + int s = Flush(ptr); + if (s) stream_->BackUp(s); + // Reset to initial state (expecting new buffer) + buffer_end_ = end_ = buffer_; + return buffer_; +} + + +uint8_t* EpsCopyOutputStream::FlushAndResetBuffer(uint8_t* ptr) { + if (had_error_) return buffer_; + int s = Flush(ptr); + if (had_error_) return buffer_; + return SetInitialBuffer(buffer_end_, s); +} + +bool EpsCopyOutputStream::Skip(int count, uint8_t** pp) { + if (count < 0) return false; + if (had_error_) { + *pp = buffer_; + return false; + } + int size = Flush(*pp); + if (had_error_) { + *pp = buffer_; + return false; + } + void* data = buffer_end_; + while (count > size) { + count -= size; + if (!stream_->Next(&data, &size)) { + *pp = Error(); + return false; + } + } + *pp = SetInitialBuffer(static_cast<uint8_t*>(data) + count, size - count); + return true; +} + +bool EpsCopyOutputStream::GetDirectBufferPointer(void** data, int* size, + uint8_t** pp) { + if (had_error_) { + *pp = buffer_; + return false; + } + *size = Flush(*pp); + if (had_error_) { + *pp = buffer_; + return false; + } + *data = buffer_end_; + while (*size == 0) { + if (!stream_->Next(data, size)) { + *pp = Error(); + return false; + } + } + *pp = SetInitialBuffer(*data, *size); + return true; +} + +uint8_t* EpsCopyOutputStream::GetDirectBufferForNBytesAndAdvance(int size, + uint8_t** pp) { + if (had_error_) { + *pp = buffer_; + return nullptr; + } + int s = Flush(*pp); + if (had_error_) { + *pp = buffer_; + return nullptr; + } + if (s >= size) { + auto res = buffer_end_; + *pp = SetInitialBuffer(buffer_end_ + size, s - size); + return res; + } else { + *pp = SetInitialBuffer(buffer_end_, s); + return nullptr; + } +} + +uint8_t* EpsCopyOutputStream::Next() { + GOOGLE_DCHECK(!had_error_); // NOLINT + if (PROTOBUF_PREDICT_FALSE(stream_ == nullptr)) return Error(); + if (buffer_end_) { + // We're in the patch buffer and need to fill up the previous buffer. + std::memcpy(buffer_end_, buffer_, end_ - buffer_); + uint8_t* ptr; + int size; + do { + void* data; + if (PROTOBUF_PREDICT_FALSE(!stream_->Next(&data, &size))) { + // Stream has an error, we use the patch buffer to continue to be + // able to write. + return Error(); + } + ptr = static_cast<uint8_t*>(data); + } while (size == 0); + if (PROTOBUF_PREDICT_TRUE(size > kSlopBytes)) { + std::memcpy(ptr, end_, kSlopBytes); + end_ = ptr + size - kSlopBytes; + buffer_end_ = nullptr; + return ptr; + } else { + GOOGLE_DCHECK(size > 0); // NOLINT + // Buffer to small + std::memmove(buffer_, end_, kSlopBytes); + buffer_end_ = ptr; + end_ = buffer_ + size; + return buffer_; + } + } else { + std::memcpy(buffer_, end_, kSlopBytes); + buffer_end_ = end_; + end_ = buffer_ + kSlopBytes; + return buffer_; + } +} + +uint8_t* EpsCopyOutputStream::EnsureSpaceFallback(uint8_t* ptr) { + do { + if (PROTOBUF_PREDICT_FALSE(had_error_)) return buffer_; + int overrun = ptr - end_; + GOOGLE_DCHECK(overrun >= 0); // NOLINT + GOOGLE_DCHECK(overrun <= kSlopBytes); // NOLINT + ptr = Next() + overrun; + } while (ptr >= end_); + GOOGLE_DCHECK(ptr < end_); // NOLINT + return ptr; +} + +uint8_t* EpsCopyOutputStream::WriteRawFallback(const void* data, int size, + uint8_t* ptr) { + int s = GetSize(ptr); + while (s < size) { + std::memcpy(ptr, data, s); + size -= s; + data = static_cast<const uint8_t*>(data) + s; + ptr = EnsureSpaceFallback(ptr + s); + s = GetSize(ptr); + } + std::memcpy(ptr, data, size); + return ptr + size; +} + +uint8_t* EpsCopyOutputStream::WriteAliasedRaw(const void* data, int size, + uint8_t* ptr) { + if (size < GetSize(ptr) + ) { + return WriteRaw(data, size, ptr); + } else { + ptr = Trim(ptr); + if (stream_->WriteAliasedRaw(data, size)) return ptr; + return Error(); + } +} + +#ifndef PROTOBUF_LITTLE_ENDIAN +uint8_t* EpsCopyOutputStream::WriteRawLittleEndian32(const void* data, int size, + uint8_t* ptr) { + auto p = static_cast<const uint8_t*>(data); + auto end = p + size; + while (end - p >= kSlopBytes) { + ptr = EnsureSpace(ptr); + uint32_t buffer[4]; + static_assert(sizeof(buffer) == kSlopBytes, "Buffer must be kSlopBytes"); + std::memcpy(buffer, p, kSlopBytes); + p += kSlopBytes; + for (auto x : buffer) + ptr = CodedOutputStream::WriteLittleEndian32ToArray(x, ptr); + } + while (p < end) { + ptr = EnsureSpace(ptr); + uint32_t buffer; + std::memcpy(&buffer, p, 4); + p += 4; + ptr = CodedOutputStream::WriteLittleEndian32ToArray(buffer, ptr); + } + return ptr; +} + +uint8_t* EpsCopyOutputStream::WriteRawLittleEndian64(const void* data, int size, + uint8_t* ptr) { + auto p = static_cast<const uint8_t*>(data); + auto end = p + size; + while (end - p >= kSlopBytes) { + ptr = EnsureSpace(ptr); + uint64_t buffer[2]; + static_assert(sizeof(buffer) == kSlopBytes, "Buffer must be kSlopBytes"); + std::memcpy(buffer, p, kSlopBytes); + p += kSlopBytes; + for (auto x : buffer) + ptr = CodedOutputStream::WriteLittleEndian64ToArray(x, ptr); + } + while (p < end) { + ptr = EnsureSpace(ptr); + uint64_t buffer; + std::memcpy(&buffer, p, 8); + p += 8; + ptr = CodedOutputStream::WriteLittleEndian64ToArray(buffer, ptr); + } + return ptr; +} +#endif + + +uint8_t* EpsCopyOutputStream::WriteStringMaybeAliasedOutline(uint32_t num, + const std::string& s, + uint8_t* ptr) { + ptr = EnsureSpace(ptr); + uint32_t size = s.size(); + ptr = WriteLengthDelim(num, size, ptr); + return WriteRawMaybeAliased(s.data(), size, ptr); +} + +uint8_t* EpsCopyOutputStream::WriteStringOutline(uint32_t num, const std::string& s, + uint8_t* ptr) { + ptr = EnsureSpace(ptr); + uint32_t size = s.size(); + ptr = WriteLengthDelim(num, size, ptr); + return WriteRaw(s.data(), size, ptr); +} + +std::atomic<bool> CodedOutputStream::default_serialization_deterministic_{ + false}; + +CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* stream, + bool do_eager_refresh) + : impl_(stream, IsDefaultSerializationDeterministic(), &cur_), + start_count_(stream->ByteCount()) { + if (do_eager_refresh) { + void* data; + int size; + if (!stream->Next(&data, &size) || size == 0) return; + cur_ = impl_.SetInitialBuffer(data, size); + } +} + +CodedOutputStream::~CodedOutputStream() { Trim(); } + + +uint8_t* CodedOutputStream::WriteStringWithSizeToArray(const std::string& str, + uint8_t* target) { + GOOGLE_DCHECK_LE(str.size(), std::numeric_limits<uint32_t>::max()); + target = WriteVarint32ToArray(str.size(), target); + return WriteStringToArray(str, target); +} + +uint8_t* CodedOutputStream::WriteVarint32ToArrayOutOfLineHelper(uint32_t value, + uint8_t* target) { + GOOGLE_DCHECK_GE(value, 0x80); + target[0] |= static_cast<uint8_t>(0x80); + value >>= 7; + target[1] = static_cast<uint8_t>(value); + if (value < 0x80) { + return target + 2; + } + target += 2; + do { + // Turn on continuation bit in the byte we just wrote. + target[-1] |= static_cast<uint8_t>(0x80); + value >>= 7; + *target = static_cast<uint8_t>(value); + ++target; + } while (value >= 0x80); + return target; +} + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <port_undef.inc> diff --git a/NorthstarDedicatedTest/include/protobuf/io/coded_stream.h b/NorthstarDedicatedTest/include/protobuf/io/coded_stream.h new file mode 100644 index 00000000..99844c69 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/coded_stream.h @@ -0,0 +1,1775 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains the CodedInputStream and CodedOutputStream classes, +// which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively, +// and allow you to read or write individual pieces of data in various +// formats. In particular, these implement the varint encoding for +// integers, a simple variable-length encoding in which smaller numbers +// take fewer bytes. +// +// Typically these classes will only be used internally by the protocol +// buffer library in order to encode and decode protocol buffers. Clients +// of the library only need to know about this class if they wish to write +// custom message parsing or serialization procedures. +// +// CodedOutputStream example: +// // Write some data to "myfile". First we write a 4-byte "magic number" +// // to identify the file type, then write a length-delimited string. The +// // string is composed of a varint giving the length followed by the raw +// // bytes. +// int fd = open("myfile", O_CREAT | O_WRONLY); +// ZeroCopyOutputStream* raw_output = new FileOutputStream(fd); +// CodedOutputStream* coded_output = new CodedOutputStream(raw_output); +// +// int magic_number = 1234; +// char text[] = "Hello world!"; +// coded_output->WriteLittleEndian32(magic_number); +// coded_output->WriteVarint32(strlen(text)); +// coded_output->WriteRaw(text, strlen(text)); +// +// delete coded_output; +// delete raw_output; +// close(fd); +// +// CodedInputStream example: +// // Read a file created by the above code. +// int fd = open("myfile", O_RDONLY); +// ZeroCopyInputStream* raw_input = new FileInputStream(fd); +// CodedInputStream* coded_input = new CodedInputStream(raw_input); +// +// coded_input->ReadLittleEndian32(&magic_number); +// if (magic_number != 1234) { +// cerr << "File not in expected format." << endl; +// return; +// } +// +// uint32_t size; +// coded_input->ReadVarint32(&size); +// +// char* text = new char[size + 1]; +// coded_input->ReadRaw(buffer, size); +// text[size] = '\0'; +// +// delete coded_input; +// delete raw_input; +// close(fd); +// +// cout << "Text is: " << text << endl; +// delete [] text; +// +// For those who are interested, varint encoding is defined as follows: +// +// The encoding operates on unsigned integers of up to 64 bits in length. +// Each byte of the encoded value has the format: +// * bits 0-6: Seven bits of the number being encoded. +// * bit 7: Zero if this is the last byte in the encoding (in which +// case all remaining bits of the number are zero) or 1 if +// more bytes follow. +// The first byte contains the least-significant 7 bits of the number, the +// second byte (if present) contains the next-least-significant 7 bits, +// and so on. So, the binary number 1011000101011 would be encoded in two +// bytes as "10101011 00101100". +// +// In theory, varint could be used to encode integers of any length. +// However, for practicality we set a limit at 64 bits. The maximum encoded +// length of a number is thus 10 bytes. + +#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ +#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ + + +#include <assert.h> + +#include <atomic> +#include <climits> +#include <cstddef> +#include <cstring> +#include <limits> +#include <string> +#include <type_traits> +#include <utility> + +#ifdef _WIN32 +// Assuming windows is always little-endian. +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) +#define PROTOBUF_LITTLE_ENDIAN 1 +#endif +#if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER) +// If MSVC has "/RTCc" set, it will complain about truncating casts at +// runtime. This file contains some intentional truncating casts. +#pragma runtime_checks("c", off) +#endif +#else +#ifdef __APPLE__ +#include <machine/endian.h> // __BYTE_ORDER +#elif defined(__FreeBSD__) +#include <sys/endian.h> // __BYTE_ORDER +#elif (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__)) +#include <sys/isa_defs.h> // __BYTE_ORDER +#elif defined(_AIX) || defined(__TOS_AIX__) +#include <sys/machine.h> // BYTE_ORDER +#else +#if !defined(__QNX__) +#include <endian.h> // __BYTE_ORDER +#endif +#endif +#if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \ + (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \ + !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) +#define PROTOBUF_LITTLE_ENDIAN 1 +#endif +#endif +#include <stubs/common.h> +#include <stubs/logging.h> +#include <stubs/strutil.h> +#include <port.h> +#include <stubs/port.h> + + +#include <port_def.inc> + +namespace google { +namespace protobuf { + +class DescriptorPool; +class MessageFactory; +class ZeroCopyCodedInputStream; + +namespace internal { +void MapTestForceDeterministic(); +class EpsCopyByteStream; +} // namespace internal + +namespace io { + +// Defined in this file. +class CodedInputStream; +class CodedOutputStream; + +// Defined in other files. +class ZeroCopyInputStream; // zero_copy_stream.h +class ZeroCopyOutputStream; // zero_copy_stream.h + +// Class which reads and decodes binary data which is composed of varint- +// encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream. +// Most users will not need to deal with CodedInputStream. +// +// Most methods of CodedInputStream that return a bool return false if an +// underlying I/O error occurs or if the data is malformed. Once such a +// failure occurs, the CodedInputStream is broken and is no longer useful. +// After a failure, callers also should assume writes to "out" args may have +// occurred, though nothing useful can be determined from those writes. +class PROTOBUF_EXPORT CodedInputStream { + public: + // Create a CodedInputStream that reads from the given ZeroCopyInputStream. + explicit CodedInputStream(ZeroCopyInputStream* input); + + // Create a CodedInputStream that reads from the given flat array. This is + // faster than using an ArrayInputStream. PushLimit(size) is implied by + // this constructor. + explicit CodedInputStream(const uint8_t* buffer, int size); + + // Destroy the CodedInputStream and position the underlying + // ZeroCopyInputStream at the first unread byte. If an error occurred while + // reading (causing a method to return false), then the exact position of + // the input stream may be anywhere between the last value that was read + // successfully and the stream's byte limit. + ~CodedInputStream(); + + // Return true if this CodedInputStream reads from a flat array instead of + // a ZeroCopyInputStream. + inline bool IsFlat() const; + + // Skips a number of bytes. Returns false if an underlying read error + // occurs. + inline bool Skip(int count); + + // Sets *data to point directly at the unread part of the CodedInputStream's + // underlying buffer, and *size to the size of that buffer, but does not + // advance the stream's current position. This will always either produce + // a non-empty buffer or return false. If the caller consumes any of + // this data, it should then call Skip() to skip over the consumed bytes. + // This may be useful for implementing external fast parsing routines for + // types of data not covered by the CodedInputStream interface. + bool GetDirectBufferPointer(const void** data, int* size); + + // Like GetDirectBufferPointer, but this method is inlined, and does not + // attempt to Refresh() if the buffer is currently empty. + PROTOBUF_ALWAYS_INLINE + void GetDirectBufferPointerInline(const void** data, int* size); + + // Read raw bytes, copying them into the given buffer. + bool ReadRaw(void* buffer, int size); + + // Like ReadRaw, but reads into a string. + bool ReadString(std::string* buffer, int size); + + + // Read a 32-bit little-endian integer. + bool ReadLittleEndian32(uint32_t* value); + // Read a 64-bit little-endian integer. + bool ReadLittleEndian64(uint64_t* value); + + // These methods read from an externally provided buffer. The caller is + // responsible for ensuring that the buffer has sufficient space. + // Read a 32-bit little-endian integer. + static const uint8_t* ReadLittleEndian32FromArray(const uint8_t* buffer, + uint32_t* value); + // Read a 64-bit little-endian integer. + static const uint8_t* ReadLittleEndian64FromArray(const uint8_t* buffer, + uint64_t* value); + + // Read an unsigned integer with Varint encoding, truncating to 32 bits. + // Reading a 32-bit value is equivalent to reading a 64-bit one and casting + // it to uint32_t, but may be more efficient. + bool ReadVarint32(uint32_t* value); + // Read an unsigned integer with Varint encoding. + bool ReadVarint64(uint64_t* value); + + // Reads a varint off the wire into an "int". This should be used for reading + // sizes off the wire (sizes of strings, submessages, bytes fields, etc). + // + // The value from the wire is interpreted as unsigned. If its value exceeds + // the representable value of an integer on this platform, instead of + // truncating we return false. Truncating (as performed by ReadVarint32() + // above) is an acceptable approach for fields representing an integer, but + // when we are parsing a size from the wire, truncating the value would result + // in us misparsing the payload. + bool ReadVarintSizeAsInt(int* value); + + // Read a tag. This calls ReadVarint32() and returns the result, or returns + // zero (which is not a valid tag) if ReadVarint32() fails. Also, ReadTag + // (but not ReadTagNoLastTag) updates the last tag value, which can be checked + // with LastTagWas(). + // + // Always inline because this is only called in one place per parse loop + // but it is called for every iteration of said loop, so it should be fast. + // GCC doesn't want to inline this by default. + PROTOBUF_ALWAYS_INLINE uint32_t ReadTag() { + return last_tag_ = ReadTagNoLastTag(); + } + + PROTOBUF_ALWAYS_INLINE uint32_t ReadTagNoLastTag(); + + // This usually a faster alternative to ReadTag() when cutoff is a manifest + // constant. It does particularly well for cutoff >= 127. The first part + // of the return value is the tag that was read, though it can also be 0 in + // the cases where ReadTag() would return 0. If the second part is true + // then the tag is known to be in [0, cutoff]. If not, the tag either is + // above cutoff or is 0. (There's intentional wiggle room when tag is 0, + // because that can arise in several ways, and for best performance we want + // to avoid an extra "is tag == 0?" check here.) + PROTOBUF_ALWAYS_INLINE + std::pair<uint32_t, bool> ReadTagWithCutoff(uint32_t cutoff) { + std::pair<uint32_t, bool> result = ReadTagWithCutoffNoLastTag(cutoff); + last_tag_ = result.first; + return result; + } + + PROTOBUF_ALWAYS_INLINE + std::pair<uint32_t, bool> ReadTagWithCutoffNoLastTag(uint32_t cutoff); + + // Usually returns true if calling ReadVarint32() now would produce the given + // value. Will always return false if ReadVarint32() would not return the + // given value. If ExpectTag() returns true, it also advances past + // the varint. For best performance, use a compile-time constant as the + // parameter. + // Always inline because this collapses to a small number of instructions + // when given a constant parameter, but GCC doesn't want to inline by default. + PROTOBUF_ALWAYS_INLINE bool ExpectTag(uint32_t expected); + + // Like above, except this reads from the specified buffer. The caller is + // responsible for ensuring that the buffer is large enough to read a varint + // of the expected size. For best performance, use a compile-time constant as + // the expected tag parameter. + // + // Returns a pointer beyond the expected tag if it was found, or NULL if it + // was not. + PROTOBUF_ALWAYS_INLINE + static const uint8_t* ExpectTagFromArray(const uint8_t* buffer, + uint32_t expected); + + // Usually returns true if no more bytes can be read. Always returns false + // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent + // call to LastTagWas() will act as if ReadTag() had been called and returned + // zero, and ConsumedEntireMessage() will return true. + bool ExpectAtEnd(); + + // If the last call to ReadTag() or ReadTagWithCutoff() returned the given + // value, returns true. Otherwise, returns false. + // ReadTagNoLastTag/ReadTagWithCutoffNoLastTag do not preserve the last + // returned value. + // + // This is needed because parsers for some types of embedded messages + // (with field type TYPE_GROUP) don't actually know that they've reached the + // end of a message until they see an ENDGROUP tag, which was actually part + // of the enclosing message. The enclosing message would like to check that + // tag to make sure it had the right number, so it calls LastTagWas() on + // return from the embedded parser to check. + bool LastTagWas(uint32_t expected); + void SetLastTag(uint32_t tag) { last_tag_ = tag; } + + // When parsing message (but NOT a group), this method must be called + // immediately after MergeFromCodedStream() returns (if it returns true) + // to further verify that the message ended in a legitimate way. For + // example, this verifies that parsing did not end on an end-group tag. + // It also checks for some cases where, due to optimizations, + // MergeFromCodedStream() can incorrectly return true. + bool ConsumedEntireMessage(); + void SetConsumed() { legitimate_message_end_ = true; } + + // Limits ---------------------------------------------------------- + // Limits are used when parsing length-delimited embedded messages. + // After the message's length is read, PushLimit() is used to prevent + // the CodedInputStream from reading beyond that length. Once the + // embedded message has been parsed, PopLimit() is called to undo the + // limit. + + // Opaque type used with PushLimit() and PopLimit(). Do not modify + // values of this type yourself. The only reason that this isn't a + // struct with private internals is for efficiency. + typedef int Limit; + + // Places a limit on the number of bytes that the stream may read, + // starting from the current position. Once the stream hits this limit, + // it will act like the end of the input has been reached until PopLimit() + // is called. + // + // As the names imply, the stream conceptually has a stack of limits. The + // shortest limit on the stack is always enforced, even if it is not the + // top limit. + // + // The value returned by PushLimit() is opaque to the caller, and must + // be passed unchanged to the corresponding call to PopLimit(). + Limit PushLimit(int byte_limit); + + // Pops the last limit pushed by PushLimit(). The input must be the value + // returned by that call to PushLimit(). + void PopLimit(Limit limit); + + // Returns the number of bytes left until the nearest limit on the + // stack is hit, or -1 if no limits are in place. + int BytesUntilLimit() const; + + // Returns current position relative to the beginning of the input stream. + int CurrentPosition() const; + + // Total Bytes Limit ----------------------------------------------- + // To prevent malicious users from sending excessively large messages + // and causing memory exhaustion, CodedInputStream imposes a hard limit on + // the total number of bytes it will read. + + // Sets the maximum number of bytes that this CodedInputStream will read + // before refusing to continue. To prevent servers from allocating enormous + // amounts of memory to hold parsed messages, the maximum message length + // should be limited to the shortest length that will not harm usability. + // The default limit is INT_MAX (~2GB) and apps should set shorter limits + // if possible. An error will always be printed to stderr if the limit is + // reached. + // + // Note: setting a limit less than the current read position is interpreted + // as a limit on the current position. + // + // This is unrelated to PushLimit()/PopLimit(). + void SetTotalBytesLimit(int total_bytes_limit); + + // The Total Bytes Limit minus the Current Position, or -1 if the total bytes + // limit is INT_MAX. + int BytesUntilTotalBytesLimit() const; + + // Recursion Limit ------------------------------------------------- + // To prevent corrupt or malicious messages from causing stack overflows, + // we must keep track of the depth of recursion when parsing embedded + // messages and groups. CodedInputStream keeps track of this because it + // is the only object that is passed down the stack during parsing. + + // Sets the maximum recursion depth. The default is 100. + void SetRecursionLimit(int limit); + int RecursionBudget() { return recursion_budget_; } + + static int GetDefaultRecursionLimit() { return default_recursion_limit_; } + + // Increments the current recursion depth. Returns true if the depth is + // under the limit, false if it has gone over. + bool IncrementRecursionDepth(); + + // Decrements the recursion depth if possible. + void DecrementRecursionDepth(); + + // Decrements the recursion depth blindly. This is faster than + // DecrementRecursionDepth(). It should be used only if all previous + // increments to recursion depth were successful. + void UnsafeDecrementRecursionDepth(); + + // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_). + // Using this can reduce code size and complexity in some cases. The caller + // is expected to check that the second part of the result is non-negative (to + // bail out if the depth of recursion is too high) and, if all is well, to + // later pass the first part of the result to PopLimit() or similar. + std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit( + int byte_limit); + + // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0). + Limit ReadLengthAndPushLimit(); + + // Helper that is equivalent to: { + // bool result = ConsumedEntireMessage(); + // PopLimit(limit); + // UnsafeDecrementRecursionDepth(); + // return result; } + // Using this can reduce code size and complexity in some cases. + // Do not use unless the current recursion depth is greater than zero. + bool DecrementRecursionDepthAndPopLimit(Limit limit); + + // Helper that is equivalent to: { + // bool result = ConsumedEntireMessage(); + // PopLimit(limit); + // return result; } + // Using this can reduce code size and complexity in some cases. + bool CheckEntireMessageConsumedAndPopLimit(Limit limit); + + // Extension Registry ---------------------------------------------- + // ADVANCED USAGE: 99.9% of people can ignore this section. + // + // By default, when parsing extensions, the parser looks for extension + // definitions in the pool which owns the outer message's Descriptor. + // However, you may call SetExtensionRegistry() to provide an alternative + // pool instead. This makes it possible, for example, to parse a message + // using a generated class, but represent some extensions using + // DynamicMessage. + + // Set the pool used to look up extensions. Most users do not need to call + // this as the correct pool will be chosen automatically. + // + // WARNING: It is very easy to misuse this. Carefully read the requirements + // below. Do not use this unless you are sure you need it. Almost no one + // does. + // + // Let's say you are parsing a message into message object m, and you want + // to take advantage of SetExtensionRegistry(). You must follow these + // requirements: + // + // The given DescriptorPool must contain m->GetDescriptor(). It is not + // sufficient for it to simply contain a descriptor that has the same name + // and content -- it must be the *exact object*. In other words: + // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) == + // m->GetDescriptor()); + // There are two ways to satisfy this requirement: + // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless + // because this is the pool that would be used anyway if you didn't call + // SetExtensionRegistry() at all. + // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an + // "underlay". Read the documentation for DescriptorPool for more + // information about underlays. + // + // You must also provide a MessageFactory. This factory will be used to + // construct Message objects representing extensions. The factory's + // GetPrototype() MUST return non-NULL for any Descriptor which can be found + // through the provided pool. + // + // If the provided factory might return instances of protocol-compiler- + // generated (i.e. compiled-in) types, or if the outer message object m is + // a generated type, then the given factory MUST have this property: If + // GetPrototype() is given a Descriptor which resides in + // DescriptorPool::generated_pool(), the factory MUST return the same + // prototype which MessageFactory::generated_factory() would return. That + // is, given a descriptor for a generated type, the factory must return an + // instance of the generated class (NOT DynamicMessage). However, when + // given a descriptor for a type that is NOT in generated_pool, the factory + // is free to return any implementation. + // + // The reason for this requirement is that generated sub-objects may be + // accessed via the standard (non-reflection) extension accessor methods, + // and these methods will down-cast the object to the generated class type. + // If the object is not actually of that type, the results would be undefined. + // On the other hand, if an extension is not compiled in, then there is no + // way the code could end up accessing it via the standard accessors -- the + // only way to access the extension is via reflection. When using reflection, + // DynamicMessage and generated messages are indistinguishable, so it's fine + // if these objects are represented using DynamicMessage. + // + // Using DynamicMessageFactory on which you have called + // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the + // above requirement. + // + // If either pool or factory is NULL, both must be NULL. + // + // Note that this feature is ignored when parsing "lite" messages as they do + // not have descriptors. + void SetExtensionRegistry(const DescriptorPool* pool, + MessageFactory* factory); + + // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool + // has been provided. + const DescriptorPool* GetExtensionPool(); + + // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no + // factory has been provided. + MessageFactory* GetExtensionFactory(); + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream); + + const uint8_t* buffer_; + const uint8_t* buffer_end_; // pointer to the end of the buffer. + ZeroCopyInputStream* input_; + int total_bytes_read_; // total bytes read from input_, including + // the current buffer + + // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here + // so that we can BackUp() on destruction. + int overflow_bytes_; + + // LastTagWas() stuff. + uint32_t last_tag_; // result of last ReadTag() or ReadTagWithCutoff(). + + // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly + // at EOF, or by ExpectAtEnd() when it returns true. This happens when we + // reach the end of a message and attempt to read another tag. + bool legitimate_message_end_; + + // See EnableAliasing(). + bool aliasing_enabled_; + + // Limits + Limit current_limit_; // if position = -1, no limit is applied + + // For simplicity, if the current buffer crosses a limit (either a normal + // limit created by PushLimit() or the total bytes limit), buffer_size_ + // only tracks the number of bytes before that limit. This field + // contains the number of bytes after it. Note that this implies that if + // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've + // hit a limit. However, if both are zero, it doesn't necessarily mean + // we aren't at a limit -- the buffer may have ended exactly at the limit. + int buffer_size_after_limit_; + + // Maximum number of bytes to read, period. This is unrelated to + // current_limit_. Set using SetTotalBytesLimit(). + int total_bytes_limit_; + + // Current recursion budget, controlled by IncrementRecursionDepth() and + // similar. Starts at recursion_limit_ and goes down: if this reaches + // -1 we are over budget. + int recursion_budget_; + // Recursion depth limit, set by SetRecursionLimit(). + int recursion_limit_; + + // See SetExtensionRegistry(). + const DescriptorPool* extension_pool_; + MessageFactory* extension_factory_; + + // Private member functions. + + // Fallback when Skip() goes past the end of the current buffer. + bool SkipFallback(int count, int original_buffer_size); + + // Advance the buffer by a given number of bytes. + void Advance(int amount); + + // Back up input_ to the current buffer position. + void BackUpInputToCurrentPosition(); + + // Recomputes the value of buffer_size_after_limit_. Must be called after + // current_limit_ or total_bytes_limit_ changes. + void RecomputeBufferLimits(); + + // Writes an error message saying that we hit total_bytes_limit_. + void PrintTotalBytesLimitError(); + + // Called when the buffer runs out to request more data. Implies an + // Advance(BufferSize()). + bool Refresh(); + + // When parsing varints, we optimize for the common case of small values, and + // then optimize for the case when the varint fits within the current buffer + // piece. The Fallback method is used when we can't use the one-byte + // optimization. The Slow method is yet another fallback when the buffer is + // not large enough. Making the slow path out-of-line speeds up the common + // case by 10-15%. The slow path is fairly uncommon: it only triggers when a + // message crosses multiple buffers. Note: ReadVarint32Fallback() and + // ReadVarint64Fallback() are called frequently and generally not inlined, so + // they have been optimized to avoid "out" parameters. The former returns -1 + // if it fails and the uint32_t it read otherwise. The latter has a bool + // indicating success or failure as part of its return type. + int64_t ReadVarint32Fallback(uint32_t first_byte_or_zero); + int ReadVarintSizeAsIntFallback(); + std::pair<uint64_t, bool> ReadVarint64Fallback(); + bool ReadVarint32Slow(uint32_t* value); + bool ReadVarint64Slow(uint64_t* value); + int ReadVarintSizeAsIntSlow(); + bool ReadLittleEndian32Fallback(uint32_t* value); + bool ReadLittleEndian64Fallback(uint64_t* value); + + // Fallback/slow methods for reading tags. These do not update last_tag_, + // but will set legitimate_message_end_ if we are at the end of the input + // stream. + uint32_t ReadTagFallback(uint32_t first_byte_or_zero); + uint32_t ReadTagSlow(); + bool ReadStringFallback(std::string* buffer, int size); + + // Return the size of the buffer. + int BufferSize() const; + + static const int kDefaultTotalBytesLimit = INT_MAX; + + static int default_recursion_limit_; // 100 by default. + + friend class google::protobuf::ZeroCopyCodedInputStream; + friend class google::protobuf::internal::EpsCopyByteStream; +}; + +// EpsCopyOutputStream wraps a ZeroCopyOutputStream and exposes a new stream, +// which has the property you can write kSlopBytes (16 bytes) from the current +// position without bounds checks. The cursor into the stream is managed by +// the user of the class and is an explicit parameter in the methods. Careful +// use of this class, ie. keep ptr a local variable, eliminates the need to +// for the compiler to sync the ptr value between register and memory. +class PROTOBUF_EXPORT EpsCopyOutputStream { + public: + enum { kSlopBytes = 16 }; + + // Initialize from a stream. + EpsCopyOutputStream(ZeroCopyOutputStream* stream, bool deterministic, + uint8_t** pp) + : end_(buffer_), + stream_(stream), + is_serialization_deterministic_(deterministic) { + *pp = buffer_; + } + + // Only for array serialization. No overflow protection, end_ will be the + // pointed to the end of the array. When using this the total size is already + // known, so no need to maintain the slop region. + EpsCopyOutputStream(void* data, int size, bool deterministic) + : end_(static_cast<uint8_t*>(data) + size), + buffer_end_(nullptr), + stream_(nullptr), + is_serialization_deterministic_(deterministic) {} + + // Initialize from stream but with the first buffer already given (eager). + EpsCopyOutputStream(void* data, int size, ZeroCopyOutputStream* stream, + bool deterministic, uint8_t** pp) + : stream_(stream), is_serialization_deterministic_(deterministic) { + *pp = SetInitialBuffer(data, size); + } + + // Flush everything that's written into the underlying ZeroCopyOutputStream + // and trims the underlying stream to the location of ptr. + uint8_t* Trim(uint8_t* ptr); + + // After this it's guaranteed you can safely write kSlopBytes to ptr. This + // will never fail! The underlying stream can produce an error. Use HadError + // to check for errors. + PROTOBUF_NODISCARD uint8_t* EnsureSpace(uint8_t* ptr) { + if (PROTOBUF_PREDICT_FALSE(ptr >= end_)) { + return EnsureSpaceFallback(ptr); + } + return ptr; + } + + uint8_t* WriteRaw(const void* data, int size, uint8_t* ptr) { + if (PROTOBUF_PREDICT_FALSE(end_ - ptr < size)) { + return WriteRawFallback(data, size, ptr); + } + std::memcpy(ptr, data, size); + return ptr + size; + } + // Writes the buffer specified by data, size to the stream. Possibly by + // aliasing the buffer (ie. not copying the data). The caller is responsible + // to make sure the buffer is alive for the duration of the + // ZeroCopyOutputStream. +#ifndef NDEBUG + PROTOBUF_NOINLINE +#endif + uint8_t* WriteRawMaybeAliased(const void* data, int size, uint8_t* ptr) { + if (aliasing_enabled_) { + return WriteAliasedRaw(data, size, ptr); + } else { + return WriteRaw(data, size, ptr); + } + } + + +#ifndef NDEBUG + PROTOBUF_NOINLINE +#endif + uint8_t* WriteStringMaybeAliased(uint32_t num, const std::string& s, + uint8_t* ptr) { + std::ptrdiff_t size = s.size(); + if (PROTOBUF_PREDICT_FALSE( + size >= 128 || end_ - ptr + 16 - TagSize(num << 3) - 1 < size)) { + return WriteStringMaybeAliasedOutline(num, s, ptr); + } + ptr = UnsafeVarint((num << 3) | 2, ptr); + *ptr++ = static_cast<uint8_t>(size); + std::memcpy(ptr, s.data(), size); + return ptr + size; + } + uint8_t* WriteBytesMaybeAliased(uint32_t num, const std::string& s, + uint8_t* ptr) { + return WriteStringMaybeAliased(num, s, ptr); + } + + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteString(uint32_t num, const T& s, + uint8_t* ptr) { + std::ptrdiff_t size = s.size(); + if (PROTOBUF_PREDICT_FALSE( + size >= 128 || end_ - ptr + 16 - TagSize(num << 3) - 1 < size)) { + return WriteStringOutline(num, s, ptr); + } + ptr = UnsafeVarint((num << 3) | 2, ptr); + *ptr++ = static_cast<uint8_t>(size); + std::memcpy(ptr, s.data(), size); + return ptr + size; + } + template <typename T> +#ifndef NDEBUG + PROTOBUF_NOINLINE +#endif + uint8_t* WriteBytes(uint32_t num, const T& s, uint8_t* ptr) { + return WriteString(num, s, ptr); + } + + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteInt32Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteUInt32Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode32); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteSInt32Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, ZigZagEncode32); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteInt64Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteUInt64Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteSInt64Packed(int num, const T& r, + int size, uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, ZigZagEncode64); + } + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteEnumPacked(int num, const T& r, int size, + uint8_t* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + + template <typename T> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteFixedPacked(int num, const T& r, + uint8_t* ptr) { + ptr = EnsureSpace(ptr); + constexpr auto element_size = sizeof(typename T::value_type); + auto size = r.size() * element_size; + ptr = WriteLengthDelim(num, size, ptr); + return WriteRawLittleEndian<element_size>(r.data(), static_cast<int>(size), + ptr); + } + + // Returns true if there was an underlying I/O error since this object was + // created. + bool HadError() const { return had_error_; } + + // Instructs the EpsCopyOutputStream to allow the underlying + // ZeroCopyOutputStream to hold pointers to the original structure instead of + // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the + // underlying stream does not support aliasing, then enabling it has no + // affect. For now, this only affects the behavior of + // WriteRawMaybeAliased(). + // + // NOTE: It is caller's responsibility to ensure that the chunk of memory + // remains live until all of the data has been consumed from the stream. + void EnableAliasing(bool enabled); + + // See documentation on CodedOutputStream::SetSerializationDeterministic. + void SetSerializationDeterministic(bool value) { + is_serialization_deterministic_ = value; + } + + // See documentation on CodedOutputStream::IsSerializationDeterministic. + bool IsSerializationDeterministic() const { + return is_serialization_deterministic_; + } + + // The number of bytes written to the stream at position ptr, relative to the + // stream's overall position. + int64_t ByteCount(uint8_t* ptr) const; + + + private: + uint8_t* end_; + uint8_t* buffer_end_ = buffer_; + uint8_t buffer_[2 * kSlopBytes]; + ZeroCopyOutputStream* stream_; + bool had_error_ = false; + bool aliasing_enabled_ = false; // See EnableAliasing(). + bool is_serialization_deterministic_; + + uint8_t* EnsureSpaceFallback(uint8_t* ptr); + inline uint8_t* Next(); + int Flush(uint8_t* ptr); + std::ptrdiff_t GetSize(uint8_t* ptr) const { + GOOGLE_DCHECK(ptr <= end_ + kSlopBytes); // NOLINT + return end_ + kSlopBytes - ptr; + } + + uint8_t* Error() { + had_error_ = true; + // We use the patch buffer to always guarantee space to write to. + end_ = buffer_ + kSlopBytes; + return buffer_; + } + + static constexpr int TagSize(uint32_t tag) { + return (tag < (1 << 7)) ? 1 + : (tag < (1 << 14)) ? 2 + : (tag < (1 << 21)) ? 3 + : (tag < (1 << 28)) ? 4 + : 5; + } + + PROTOBUF_ALWAYS_INLINE uint8_t* WriteTag(uint32_t num, uint32_t wt, + uint8_t* ptr) { + GOOGLE_DCHECK(ptr < end_); // NOLINT + return UnsafeVarint((num << 3) | wt, ptr); + } + + PROTOBUF_ALWAYS_INLINE uint8_t* WriteLengthDelim(int num, uint32_t size, + uint8_t* ptr) { + ptr = WriteTag(num, 2, ptr); + return UnsafeWriteSize(size, ptr); + } + + uint8_t* WriteRawFallback(const void* data, int size, uint8_t* ptr); + + uint8_t* WriteAliasedRaw(const void* data, int size, uint8_t* ptr); + + uint8_t* WriteStringMaybeAliasedOutline(uint32_t num, const std::string& s, + uint8_t* ptr); + uint8_t* WriteStringOutline(uint32_t num, const std::string& s, uint8_t* ptr); + + template <typename T, typename E> + PROTOBUF_ALWAYS_INLINE uint8_t* WriteVarintPacked(int num, const T& r, + int size, uint8_t* ptr, + const E& encode) { + ptr = EnsureSpace(ptr); + ptr = WriteLengthDelim(num, size, ptr); + auto it = r.data(); + auto end = it + r.size(); + do { + ptr = EnsureSpace(ptr); + ptr = UnsafeVarint(encode(*it++), ptr); + } while (it < end); + return ptr; + } + + static uint32_t Encode32(uint32_t v) { return v; } + static uint64_t Encode64(uint64_t v) { return v; } + static uint32_t ZigZagEncode32(int32_t v) { + return (static_cast<uint32_t>(v) << 1) ^ static_cast<uint32_t>(v >> 31); + } + static uint64_t ZigZagEncode64(int64_t v) { + return (static_cast<uint64_t>(v) << 1) ^ static_cast<uint64_t>(v >> 63); + } + + template <typename T> + PROTOBUF_ALWAYS_INLINE static uint8_t* UnsafeVarint(T value, uint8_t* ptr) { + static_assert(std::is_unsigned<T>::value, + "Varint serialization must be unsigned"); + ptr[0] = static_cast<uint8_t>(value); + if (value < 0x80) { + return ptr + 1; + } + // Turn on continuation bit in the byte we just wrote. + ptr[0] |= static_cast<uint8_t>(0x80); + value >>= 7; + ptr[1] = static_cast<uint8_t>(value); + if (value < 0x80) { + return ptr + 2; + } + ptr += 2; + do { + // Turn on continuation bit in the byte we just wrote. + ptr[-1] |= static_cast<uint8_t>(0x80); + value >>= 7; + *ptr = static_cast<uint8_t>(value); + ++ptr; + } while (value >= 0x80); + return ptr; + } + + PROTOBUF_ALWAYS_INLINE static uint8_t* UnsafeWriteSize(uint32_t value, + uint8_t* ptr) { + while (PROTOBUF_PREDICT_FALSE(value >= 0x80)) { + *ptr = static_cast<uint8_t>(value | 0x80); + value >>= 7; + ++ptr; + } + *ptr++ = static_cast<uint8_t>(value); + return ptr; + } + + template <int S> + uint8_t* WriteRawLittleEndian(const void* data, int size, uint8_t* ptr); +#ifndef PROTOBUF_LITTLE_ENDIAN + uint8_t* WriteRawLittleEndian32(const void* data, int size, uint8_t* ptr); + uint8_t* WriteRawLittleEndian64(const void* data, int size, uint8_t* ptr); +#endif + + // These methods are for CodedOutputStream. Ideally they should be private + // but to match current behavior of CodedOutputStream as close as possible + // we allow it some functionality. + public: + uint8_t* SetInitialBuffer(void* data, int size) { + auto ptr = static_cast<uint8_t*>(data); + if (size > kSlopBytes) { + end_ = ptr + size - kSlopBytes; + buffer_end_ = nullptr; + return ptr; + } else { + end_ = buffer_ + size; + buffer_end_ = ptr; + return buffer_; + } + } + + private: + // Needed by CodedOutputStream HadError. HadError needs to flush the patch + // buffers to ensure there is no error as of yet. + uint8_t* FlushAndResetBuffer(uint8_t*); + + // The following functions mimic the old CodedOutputStream behavior as close + // as possible. They flush the current state to the stream, behave as + // the old CodedOutputStream and then return to normal operation. + bool Skip(int count, uint8_t** pp); + bool GetDirectBufferPointer(void** data, int* size, uint8_t** pp); + uint8_t* GetDirectBufferForNBytesAndAdvance(int size, uint8_t** pp); + + friend class CodedOutputStream; +}; + +template <> +inline uint8_t* EpsCopyOutputStream::WriteRawLittleEndian<1>(const void* data, + int size, + uint8_t* ptr) { + return WriteRaw(data, size, ptr); +} +template <> +inline uint8_t* EpsCopyOutputStream::WriteRawLittleEndian<4>(const void* data, + int size, + uint8_t* ptr) { +#ifdef PROTOBUF_LITTLE_ENDIAN + return WriteRaw(data, size, ptr); +#else + return WriteRawLittleEndian32(data, size, ptr); +#endif +} +template <> +inline uint8_t* EpsCopyOutputStream::WriteRawLittleEndian<8>(const void* data, + int size, + uint8_t* ptr) { +#ifdef PROTOBUF_LITTLE_ENDIAN + return WriteRaw(data, size, ptr); +#else + return WriteRawLittleEndian64(data, size, ptr); +#endif +} + +// Class which encodes and writes binary data which is composed of varint- +// encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream. +// Most users will not need to deal with CodedOutputStream. +// +// Most methods of CodedOutputStream which return a bool return false if an +// underlying I/O error occurs. Once such a failure occurs, the +// CodedOutputStream is broken and is no longer useful. The Write* methods do +// not return the stream status, but will invalidate the stream if an error +// occurs. The client can probe HadError() to determine the status. +// +// Note that every method of CodedOutputStream which writes some data has +// a corresponding static "ToArray" version. These versions write directly +// to the provided buffer, returning a pointer past the last written byte. +// They require that the buffer has sufficient capacity for the encoded data. +// This allows an optimization where we check if an output stream has enough +// space for an entire message before we start writing and, if there is, we +// call only the ToArray methods to avoid doing bound checks for each +// individual value. +// i.e., in the example above: +// +// CodedOutputStream* coded_output = new CodedOutputStream(raw_output); +// int magic_number = 1234; +// char text[] = "Hello world!"; +// +// int coded_size = sizeof(magic_number) + +// CodedOutputStream::VarintSize32(strlen(text)) + +// strlen(text); +// +// uint8_t* buffer = +// coded_output->GetDirectBufferForNBytesAndAdvance(coded_size); +// if (buffer != nullptr) { +// // The output stream has enough space in the buffer: write directly to +// // the array. +// buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number, +// buffer); +// buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer); +// buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer); +// } else { +// // Make bound-checked writes, which will ask the underlying stream for +// // more space as needed. +// coded_output->WriteLittleEndian32(magic_number); +// coded_output->WriteVarint32(strlen(text)); +// coded_output->WriteRaw(text, strlen(text)); +// } +// +// delete coded_output; +class PROTOBUF_EXPORT CodedOutputStream { + public: + // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream. + explicit CodedOutputStream(ZeroCopyOutputStream* stream) + : CodedOutputStream(stream, true) {} + CodedOutputStream(ZeroCopyOutputStream* stream, bool do_eager_refresh); + + // Destroy the CodedOutputStream and position the underlying + // ZeroCopyOutputStream immediately after the last byte written. + ~CodedOutputStream(); + + // Returns true if there was an underlying I/O error since this object was + // created. On should call Trim before this function in order to catch all + // errors. + bool HadError() { + cur_ = impl_.FlushAndResetBuffer(cur_); + GOOGLE_DCHECK(cur_); + return impl_.HadError(); + } + + // Trims any unused space in the underlying buffer so that its size matches + // the number of bytes written by this stream. The underlying buffer will + // automatically be trimmed when this stream is destroyed; this call is only + // necessary if the underlying buffer is accessed *before* the stream is + // destroyed. + void Trim() { cur_ = impl_.Trim(cur_); } + + // Skips a number of bytes, leaving the bytes unmodified in the underlying + // buffer. Returns false if an underlying write error occurs. This is + // mainly useful with GetDirectBufferPointer(). + // Note of caution, the skipped bytes may contain uninitialized data. The + // caller must make sure that the skipped bytes are properly initialized, + // otherwise you might leak bytes from your heap. + bool Skip(int count) { return impl_.Skip(count, &cur_); } + + // Sets *data to point directly at the unwritten part of the + // CodedOutputStream's underlying buffer, and *size to the size of that + // buffer, but does not advance the stream's current position. This will + // always either produce a non-empty buffer or return false. If the caller + // writes any data to this buffer, it should then call Skip() to skip over + // the consumed bytes. This may be useful for implementing external fast + // serialization routines for types of data not covered by the + // CodedOutputStream interface. + bool GetDirectBufferPointer(void** data, int* size) { + return impl_.GetDirectBufferPointer(data, size, &cur_); + } + + // If there are at least "size" bytes available in the current buffer, + // returns a pointer directly into the buffer and advances over these bytes. + // The caller may then write directly into this buffer (e.g. using the + // *ToArray static methods) rather than go through CodedOutputStream. If + // there are not enough bytes available, returns NULL. The return pointer is + // invalidated as soon as any other non-const method of CodedOutputStream + // is called. + inline uint8_t* GetDirectBufferForNBytesAndAdvance(int size) { + return impl_.GetDirectBufferForNBytesAndAdvance(size, &cur_); + } + + // Write raw bytes, copying them from the given buffer. + void WriteRaw(const void* buffer, int size) { + cur_ = impl_.WriteRaw(buffer, size, cur_); + } + // Like WriteRaw() but will try to write aliased data if aliasing is + // turned on. + void WriteRawMaybeAliased(const void* data, int size); + // Like WriteRaw() but writing directly to the target array. + // This is _not_ inlined, as the compiler often optimizes memcpy into inline + // copy loops. Since this gets called by every field with string or bytes + // type, inlining may lead to a significant amount of code bloat, with only a + // minor performance gain. + static uint8_t* WriteRawToArray(const void* buffer, int size, + uint8_t* target); + + // Equivalent to WriteRaw(str.data(), str.size()). + void WriteString(const std::string& str); + // Like WriteString() but writing directly to the target array. + static uint8_t* WriteStringToArray(const std::string& str, uint8_t* target); + // Write the varint-encoded size of str followed by str. + static uint8_t* WriteStringWithSizeToArray(const std::string& str, + uint8_t* target); + + + // Write a 32-bit little-endian integer. + void WriteLittleEndian32(uint32_t value) { + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteLittleEndian32ToArray(value, Cur())); + } + // Like WriteLittleEndian32() but writing directly to the target array. + static uint8_t* WriteLittleEndian32ToArray(uint32_t value, uint8_t* target); + // Write a 64-bit little-endian integer. + void WriteLittleEndian64(uint64_t value) { + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteLittleEndian64ToArray(value, Cur())); + } + // Like WriteLittleEndian64() but writing directly to the target array. + static uint8_t* WriteLittleEndian64ToArray(uint64_t value, uint8_t* target); + + // Write an unsigned integer with Varint encoding. Writing a 32-bit value + // is equivalent to casting it to uint64_t and writing it as a 64-bit value, + // but may be more efficient. + void WriteVarint32(uint32_t value); + // Like WriteVarint32() but writing directly to the target array. + static uint8_t* WriteVarint32ToArray(uint32_t value, uint8_t* target); + // Like WriteVarint32() but writing directly to the target array, and with + // the less common-case paths being out of line rather than inlined. + static uint8_t* WriteVarint32ToArrayOutOfLine(uint32_t value, + uint8_t* target); + // Write an unsigned integer with Varint encoding. + void WriteVarint64(uint64_t value); + // Like WriteVarint64() but writing directly to the target array. + static uint8_t* WriteVarint64ToArray(uint64_t value, uint8_t* target); + + // Equivalent to WriteVarint32() except when the value is negative, + // in which case it must be sign-extended to a full 10 bytes. + void WriteVarint32SignExtended(int32_t value); + // Like WriteVarint32SignExtended() but writing directly to the target array. + static uint8_t* WriteVarint32SignExtendedToArray(int32_t value, + uint8_t* target); + + // This is identical to WriteVarint32(), but optimized for writing tags. + // In particular, if the input is a compile-time constant, this method + // compiles down to a couple instructions. + // Always inline because otherwise the aforementioned optimization can't work, + // but GCC by default doesn't want to inline this. + void WriteTag(uint32_t value); + // Like WriteTag() but writing directly to the target array. + PROTOBUF_ALWAYS_INLINE + static uint8_t* WriteTagToArray(uint32_t value, uint8_t* target); + + // Returns the number of bytes needed to encode the given value as a varint. + static size_t VarintSize32(uint32_t value); + // Returns the number of bytes needed to encode the given value as a varint. + static size_t VarintSize64(uint64_t value); + + // If negative, 10 bytes. Otherwise, same as VarintSize32(). + static size_t VarintSize32SignExtended(int32_t value); + + // Same as above, plus one. The additional one comes at no compute cost. + static size_t VarintSize32PlusOne(uint32_t value); + static size_t VarintSize64PlusOne(uint64_t value); + static size_t VarintSize32SignExtendedPlusOne(int32_t value); + + // Compile-time equivalent of VarintSize32(). + template <uint32_t Value> + struct StaticVarintSize32 { + static const size_t value = (Value < (1 << 7)) ? 1 + : (Value < (1 << 14)) ? 2 + : (Value < (1 << 21)) ? 3 + : (Value < (1 << 28)) ? 4 + : 5; + }; + + // Returns the total number of bytes written since this object was created. + int ByteCount() const { + return static_cast<int>(impl_.ByteCount(cur_) - start_count_); + } + + // Instructs the CodedOutputStream to allow the underlying + // ZeroCopyOutputStream to hold pointers to the original structure instead of + // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the + // underlying stream does not support aliasing, then enabling it has no + // affect. For now, this only affects the behavior of + // WriteRawMaybeAliased(). + // + // NOTE: It is caller's responsibility to ensure that the chunk of memory + // remains live until all of the data has been consumed from the stream. + void EnableAliasing(bool enabled) { impl_.EnableAliasing(enabled); } + + // Indicate to the serializer whether the user wants derministic + // serialization. The default when this is not called comes from the global + // default, controlled by SetDefaultSerializationDeterministic. + // + // What deterministic serialization means is entirely up to the driver of the + // serialization process (i.e. the caller of methods like WriteVarint32). In + // the case of serializing a proto buffer message using one of the methods of + // MessageLite, this means that for a given binary equal messages will always + // be serialized to the same bytes. This implies: + // + // * Repeated serialization of a message will return the same bytes. + // + // * Different processes running the same binary (including on different + // machines) will serialize equal messages to the same bytes. + // + // Note that this is *not* canonical across languages. It is also unstable + // across different builds with intervening message definition changes, due to + // unknown fields. Users who need canonical serialization (e.g. persistent + // storage in a canonical form, fingerprinting) should define their own + // canonicalization specification and implement the serializer using + // reflection APIs rather than relying on this API. + void SetSerializationDeterministic(bool value) { + impl_.SetSerializationDeterministic(value); + } + + // Return whether the user wants deterministic serialization. See above. + bool IsSerializationDeterministic() const { + return impl_.IsSerializationDeterministic(); + } + + static bool IsDefaultSerializationDeterministic() { + return default_serialization_deterministic_.load( + std::memory_order_relaxed) != 0; + } + + template <typename Func> + void Serialize(const Func& func); + + uint8_t* Cur() const { return cur_; } + void SetCur(uint8_t* ptr) { cur_ = ptr; } + EpsCopyOutputStream* EpsCopy() { return &impl_; } + + private: + EpsCopyOutputStream impl_; + uint8_t* cur_; + int64_t start_count_; + static std::atomic<bool> default_serialization_deterministic_; + + // See above. Other projects may use "friend" to allow them to call this. + // After SetDefaultSerializationDeterministic() completes, all protocol + // buffer serializations will be deterministic by default. Thread safe. + // However, the meaning of "after" is subtle here: to be safe, each thread + // that wants deterministic serialization by default needs to call + // SetDefaultSerializationDeterministic() or ensure on its own that another + // thread has done so. + friend void internal::MapTestForceDeterministic(); + static void SetDefaultSerializationDeterministic() { + default_serialization_deterministic_.store(true, std::memory_order_relaxed); + } + // REQUIRES: value >= 0x80, and that (value & 7f) has been written to *target. + static uint8_t* WriteVarint32ToArrayOutOfLineHelper(uint32_t value, + uint8_t* target); + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream); +}; + +// inline methods ==================================================== +// The vast majority of varints are only one byte. These inline +// methods optimize for that case. + +inline bool CodedInputStream::ReadVarint32(uint32_t* value) { + uint32_t v = 0; + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { + v = *buffer_; + if (v < 0x80) { + *value = v; + Advance(1); + return true; + } + } + int64_t result = ReadVarint32Fallback(v); + *value = static_cast<uint32_t>(result); + return result >= 0; +} + +inline bool CodedInputStream::ReadVarint64(uint64_t* value) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) { + *value = *buffer_; + Advance(1); + return true; + } + std::pair<uint64_t, bool> p = ReadVarint64Fallback(); + *value = p.first; + return p.second; +} + +inline bool CodedInputStream::ReadVarintSizeAsInt(int* value) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { + int v = *buffer_; + if (v < 0x80) { + *value = v; + Advance(1); + return true; + } + } + *value = ReadVarintSizeAsIntFallback(); + return *value >= 0; +} + +// static +inline const uint8_t* CodedInputStream::ReadLittleEndian32FromArray( + const uint8_t* buffer, uint32_t* value) { +#if defined(PROTOBUF_LITTLE_ENDIAN) + memcpy(value, buffer, sizeof(*value)); + return buffer + sizeof(*value); +#else + *value = (static_cast<uint32_t>(buffer[0])) | + (static_cast<uint32_t>(buffer[1]) << 8) | + (static_cast<uint32_t>(buffer[2]) << 16) | + (static_cast<uint32_t>(buffer[3]) << 24); + return buffer + sizeof(*value); +#endif +} +// static +inline const uint8_t* CodedInputStream::ReadLittleEndian64FromArray( + const uint8_t* buffer, uint64_t* value) { +#if defined(PROTOBUF_LITTLE_ENDIAN) + memcpy(value, buffer, sizeof(*value)); + return buffer + sizeof(*value); +#else + uint32_t part0 = (static_cast<uint32_t>(buffer[0])) | + (static_cast<uint32_t>(buffer[1]) << 8) | + (static_cast<uint32_t>(buffer[2]) << 16) | + (static_cast<uint32_t>(buffer[3]) << 24); + uint32_t part1 = (static_cast<uint32_t>(buffer[4])) | + (static_cast<uint32_t>(buffer[5]) << 8) | + (static_cast<uint32_t>(buffer[6]) << 16) | + (static_cast<uint32_t>(buffer[7]) << 24); + *value = static_cast<uint64_t>(part0) | (static_cast<uint64_t>(part1) << 32); + return buffer + sizeof(*value); +#endif +} + +inline bool CodedInputStream::ReadLittleEndian32(uint32_t* value) { +#if defined(PROTOBUF_LITTLE_ENDIAN) + if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) { + buffer_ = ReadLittleEndian32FromArray(buffer_, value); + return true; + } else { + return ReadLittleEndian32Fallback(value); + } +#else + return ReadLittleEndian32Fallback(value); +#endif +} + +inline bool CodedInputStream::ReadLittleEndian64(uint64_t* value) { +#if defined(PROTOBUF_LITTLE_ENDIAN) + if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) { + buffer_ = ReadLittleEndian64FromArray(buffer_, value); + return true; + } else { + return ReadLittleEndian64Fallback(value); + } +#else + return ReadLittleEndian64Fallback(value); +#endif +} + +inline uint32_t CodedInputStream::ReadTagNoLastTag() { + uint32_t v = 0; + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { + v = *buffer_; + if (v < 0x80) { + Advance(1); + return v; + } + } + v = ReadTagFallback(v); + return v; +} + +inline std::pair<uint32_t, bool> CodedInputStream::ReadTagWithCutoffNoLastTag( + uint32_t cutoff) { + // In performance-sensitive code we can expect cutoff to be a compile-time + // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at + // compile time. + uint32_t first_byte_or_zero = 0; + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { + // Hot case: buffer_ non_empty, buffer_[0] in [1, 128). + // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields + // is large enough then is it better to check for the two-byte case first? + first_byte_or_zero = buffer_[0]; + if (static_cast<int8_t>(buffer_[0]) > 0) { + const uint32_t kMax1ByteVarint = 0x7f; + uint32_t tag = buffer_[0]; + Advance(1); + return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff); + } + // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available, + // and tag is two bytes. The latter is tested by bitwise-and-not of the + // first byte and the second byte. + if (cutoff >= 0x80 && PROTOBUF_PREDICT_TRUE(buffer_ + 1 < buffer_end_) && + PROTOBUF_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) { + const uint32_t kMax2ByteVarint = (0x7f << 7) + 0x7f; + uint32_t tag = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80); + Advance(2); + // It might make sense to test for tag == 0 now, but it is so rare that + // that we don't bother. A varint-encoded 0 should be one byte unless + // the encoder lost its mind. The second part of the return value of + // this function is allowed to be either true or false if the tag is 0, + // so we don't have to check for tag == 0. We may need to check whether + // it exceeds cutoff. + bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff; + return std::make_pair(tag, at_or_below_cutoff); + } + } + // Slow path + const uint32_t tag = ReadTagFallback(first_byte_or_zero); + return std::make_pair(tag, static_cast<uint32_t>(tag - 1) < cutoff); +} + +inline bool CodedInputStream::LastTagWas(uint32_t expected) { + return last_tag_ == expected; +} + +inline bool CodedInputStream::ConsumedEntireMessage() { + return legitimate_message_end_; +} + +inline bool CodedInputStream::ExpectTag(uint32_t expected) { + if (expected < (1 << 7)) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) && + buffer_[0] == expected) { + Advance(1); + return true; + } else { + return false; + } + } else if (expected < (1 << 14)) { + if (PROTOBUF_PREDICT_TRUE(BufferSize() >= 2) && + buffer_[0] == static_cast<uint8_t>(expected | 0x80) && + buffer_[1] == static_cast<uint8_t>(expected >> 7)) { + Advance(2); + return true; + } else { + return false; + } + } else { + // Don't bother optimizing for larger values. + return false; + } +} + +inline const uint8_t* CodedInputStream::ExpectTagFromArray( + const uint8_t* buffer, uint32_t expected) { + if (expected < (1 << 7)) { + if (buffer[0] == expected) { + return buffer + 1; + } + } else if (expected < (1 << 14)) { + if (buffer[0] == static_cast<uint8_t>(expected | 0x80) && + buffer[1] == static_cast<uint8_t>(expected >> 7)) { + return buffer + 2; + } + } + return nullptr; +} + +inline void CodedInputStream::GetDirectBufferPointerInline(const void** data, + int* size) { + *data = buffer_; + *size = static_cast<int>(buffer_end_ - buffer_); +} + +inline bool CodedInputStream::ExpectAtEnd() { + // If we are at a limit we know no more bytes can be read. Otherwise, it's + // hard to say without calling Refresh(), and we'd rather not do that. + + if (buffer_ == buffer_end_ && ((buffer_size_after_limit_ != 0) || + (total_bytes_read_ == current_limit_))) { + last_tag_ = 0; // Pretend we called ReadTag()... + legitimate_message_end_ = true; // ... and it hit EOF. + return true; + } else { + return false; + } +} + +inline int CodedInputStream::CurrentPosition() const { + return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_); +} + +inline void CodedInputStream::Advance(int amount) { buffer_ += amount; } + +inline void CodedInputStream::SetRecursionLimit(int limit) { + recursion_budget_ += limit - recursion_limit_; + recursion_limit_ = limit; +} + +inline bool CodedInputStream::IncrementRecursionDepth() { + --recursion_budget_; + return recursion_budget_ >= 0; +} + +inline void CodedInputStream::DecrementRecursionDepth() { + if (recursion_budget_ < recursion_limit_) ++recursion_budget_; +} + +inline void CodedInputStream::UnsafeDecrementRecursionDepth() { + assert(recursion_budget_ < recursion_limit_); + ++recursion_budget_; +} + +inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool, + MessageFactory* factory) { + extension_pool_ = pool; + extension_factory_ = factory; +} + +inline const DescriptorPool* CodedInputStream::GetExtensionPool() { + return extension_pool_; +} + +inline MessageFactory* CodedInputStream::GetExtensionFactory() { + return extension_factory_; +} + +inline int CodedInputStream::BufferSize() const { + return static_cast<int>(buffer_end_ - buffer_); +} + +inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input) + : buffer_(nullptr), + buffer_end_(nullptr), + input_(input), + total_bytes_read_(0), + overflow_bytes_(0), + last_tag_(0), + legitimate_message_end_(false), + aliasing_enabled_(false), + current_limit_(std::numeric_limits<int32_t>::max()), + buffer_size_after_limit_(0), + total_bytes_limit_(kDefaultTotalBytesLimit), + recursion_budget_(default_recursion_limit_), + recursion_limit_(default_recursion_limit_), + extension_pool_(nullptr), + extension_factory_(nullptr) { + // Eagerly Refresh() so buffer space is immediately available. + Refresh(); +} + +inline CodedInputStream::CodedInputStream(const uint8_t* buffer, int size) + : buffer_(buffer), + buffer_end_(buffer + size), + input_(nullptr), + total_bytes_read_(size), + overflow_bytes_(0), + last_tag_(0), + legitimate_message_end_(false), + aliasing_enabled_(false), + current_limit_(size), + buffer_size_after_limit_(0), + total_bytes_limit_(kDefaultTotalBytesLimit), + recursion_budget_(default_recursion_limit_), + recursion_limit_(default_recursion_limit_), + extension_pool_(nullptr), + extension_factory_(nullptr) { + // Note that setting current_limit_ == size is important to prevent some + // code paths from trying to access input_ and segfaulting. +} + +inline bool CodedInputStream::IsFlat() const { return input_ == nullptr; } + +inline bool CodedInputStream::Skip(int count) { + if (count < 0) return false; // security: count is often user-supplied + + const int original_buffer_size = BufferSize(); + + if (count <= original_buffer_size) { + // Just skipping within the current buffer. Easy. + Advance(count); + return true; + } + + return SkipFallback(count, original_buffer_size); +} + +inline uint8_t* CodedOutputStream::WriteVarint32ToArray(uint32_t value, + uint8_t* target) { + return EpsCopyOutputStream::UnsafeVarint(value, target); +} + +inline uint8_t* CodedOutputStream::WriteVarint32ToArrayOutOfLine( + uint32_t value, uint8_t* target) { + target[0] = static_cast<uint8_t>(value); + if (value < 0x80) { + return target + 1; + } else { + return WriteVarint32ToArrayOutOfLineHelper(value, target); + } +} + +inline uint8_t* CodedOutputStream::WriteVarint64ToArray(uint64_t value, + uint8_t* target) { + return EpsCopyOutputStream::UnsafeVarint(value, target); +} + +inline void CodedOutputStream::WriteVarint32SignExtended(int32_t value) { + WriteVarint64(static_cast<uint64_t>(value)); +} + +inline uint8_t* CodedOutputStream::WriteVarint32SignExtendedToArray( + int32_t value, uint8_t* target) { + return WriteVarint64ToArray(static_cast<uint64_t>(value), target); +} + +inline uint8_t* CodedOutputStream::WriteLittleEndian32ToArray(uint32_t value, + uint8_t* target) { +#if defined(PROTOBUF_LITTLE_ENDIAN) + memcpy(target, &value, sizeof(value)); +#else + target[0] = static_cast<uint8_t>(value); + target[1] = static_cast<uint8_t>(value >> 8); + target[2] = static_cast<uint8_t>(value >> 16); + target[3] = static_cast<uint8_t>(value >> 24); +#endif + return target + sizeof(value); +} + +inline uint8_t* CodedOutputStream::WriteLittleEndian64ToArray(uint64_t value, + uint8_t* target) { +#if defined(PROTOBUF_LITTLE_ENDIAN) + memcpy(target, &value, sizeof(value)); +#else + uint32_t part0 = static_cast<uint32_t>(value); + uint32_t part1 = static_cast<uint32_t>(value >> 32); + + target[0] = static_cast<uint8_t>(part0); + target[1] = static_cast<uint8_t>(part0 >> 8); + target[2] = static_cast<uint8_t>(part0 >> 16); + target[3] = static_cast<uint8_t>(part0 >> 24); + target[4] = static_cast<uint8_t>(part1); + target[5] = static_cast<uint8_t>(part1 >> 8); + target[6] = static_cast<uint8_t>(part1 >> 16); + target[7] = static_cast<uint8_t>(part1 >> 24); +#endif + return target + sizeof(value); +} + +inline void CodedOutputStream::WriteVarint32(uint32_t value) { + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteVarint32ToArray(value, Cur())); +} + +inline void CodedOutputStream::WriteVarint64(uint64_t value) { + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteVarint64ToArray(value, Cur())); +} + +inline void CodedOutputStream::WriteTag(uint32_t value) { + WriteVarint32(value); +} + +inline uint8_t* CodedOutputStream::WriteTagToArray(uint32_t value, + uint8_t* target) { + return WriteVarint32ToArray(value, target); +} + +inline size_t CodedOutputStream::VarintSize32(uint32_t value) { + // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1 + // Use an explicit multiplication to implement the divide of + // a number in the 1..31 range. + // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is + // undefined. + uint32_t log2value = Bits::Log2FloorNonZero(value | 0x1); + return static_cast<size_t>((log2value * 9 + 73) / 64); +} + +inline size_t CodedOutputStream::VarintSize32PlusOne(uint32_t value) { + // Same as above, but one more. + uint32_t log2value = Bits::Log2FloorNonZero(value | 0x1); + return static_cast<size_t>((log2value * 9 + 73 + 64) / 64); +} + +inline size_t CodedOutputStream::VarintSize64(uint64_t value) { + // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1 + // Use an explicit multiplication to implement the divide of + // a number in the 1..63 range. + // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is + // undefined. + uint32_t log2value = Bits::Log2FloorNonZero64(value | 0x1); + return static_cast<size_t>((log2value * 9 + 73) / 64); +} + +inline size_t CodedOutputStream::VarintSize64PlusOne(uint64_t value) { + // Same as above, but one more. + uint32_t log2value = Bits::Log2FloorNonZero64(value | 0x1); + return static_cast<size_t>((log2value * 9 + 73 + 64) / 64); +} + +inline size_t CodedOutputStream::VarintSize32SignExtended(int32_t value) { + return VarintSize64(static_cast<uint64_t>(int64_t{value})); +} + +inline size_t CodedOutputStream::VarintSize32SignExtendedPlusOne( + int32_t value) { + return VarintSize64PlusOne(static_cast<uint64_t>(int64_t{value})); +} + +inline void CodedOutputStream::WriteString(const std::string& str) { + WriteRaw(str.data(), static_cast<int>(str.size())); +} + +inline void CodedOutputStream::WriteRawMaybeAliased(const void* data, + int size) { + cur_ = impl_.WriteRawMaybeAliased(data, size, cur_); +} + +inline uint8_t* CodedOutputStream::WriteRawToArray(const void* data, int size, + uint8_t* target) { + memcpy(target, data, size); + return target + size; +} + +inline uint8_t* CodedOutputStream::WriteStringToArray(const std::string& str, + uint8_t* target) { + return WriteRawToArray(str.data(), static_cast<int>(str.size()), target); +} + +} // namespace io +} // namespace protobuf +} // namespace google + +#if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER) +#pragma runtime_checks("c", restore) +#endif // _MSC_VER && !defined(__INTEL_COMPILER) + +#include <port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ diff --git a/NorthstarDedicatedTest/include/protobuf/io/coded_stream_unittest.cc b/NorthstarDedicatedTest/include/protobuf/io/coded_stream_unittest.cc new file mode 100644 index 00000000..fffe879a --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/coded_stream_unittest.cc @@ -0,0 +1,1346 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains tests and benchmarks. + +#include <io/coded_stream.h> + +#include <limits.h> + +#include <memory> +#include <vector> + +#include <stubs/common.h> +#include <stubs/logging.h> +#include <stubs/logging.h> +#include <io/zero_copy_stream_impl.h> +#include <testing/googletest.h> +#include <gtest/gtest.h> +#include <stubs/casts.h> + +#include <port_def.inc> + + +namespace google { +namespace protobuf { +namespace io { +namespace { + + +// =================================================================== +// Data-Driven Test Infrastructure + +// TEST_1D and TEST_2D are macros I'd eventually like to see added to +// gTest. These macros can be used to declare tests which should be +// run multiple times, once for each item in some input array. TEST_1D +// tests all cases in a single input array. TEST_2D tests all +// combinations of cases from two arrays. The arrays must be statically +// defined such that the GOOGLE_ARRAYSIZE() macro works on them. Example: +// +// int kCases[] = {1, 2, 3, 4} +// TEST_1D(MyFixture, MyTest, kCases) { +// EXPECT_GT(kCases_case, 0); +// } +// +// This test iterates through the numbers 1, 2, 3, and 4 and tests that +// they are all grater than zero. In case of failure, the exact case +// which failed will be printed. The case type must be printable using +// ostream::operator<<. + +// TODO(kenton): gTest now supports "parameterized tests" which would be +// a better way to accomplish this. Rewrite when time permits. + +#define TEST_1D(FIXTURE, NAME, CASES) \ + class FIXTURE##_##NAME##_DD : public FIXTURE { \ + protected: \ + template <typename CaseType> \ + void DoSingleCase(const CaseType& CASES##_case); \ + }; \ + \ + TEST_F(FIXTURE##_##NAME##_DD, NAME) { \ + for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES); i++) { \ + SCOPED_TRACE(testing::Message() \ + << #CASES " case #" << i << ": " << CASES[i]); \ + DoSingleCase(CASES[i]); \ + } \ + } \ + \ + template <typename CaseType> \ + void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case) + +#define TEST_2D(FIXTURE, NAME, CASES1, CASES2) \ + class FIXTURE##_##NAME##_DD : public FIXTURE { \ + protected: \ + template <typename CaseType1, typename CaseType2> \ + void DoSingleCase(const CaseType1& CASES1##_case, \ + const CaseType2& CASES2##_case); \ + }; \ + \ + TEST_F(FIXTURE##_##NAME##_DD, NAME) { \ + for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES1); i++) { \ + for (int j = 0; j < GOOGLE_ARRAYSIZE(CASES2); j++) { \ + SCOPED_TRACE(testing::Message() \ + << #CASES1 " case #" << i << ": " << CASES1[i] << ", " \ + << #CASES2 " case #" << j << ": " << CASES2[j]); \ + DoSingleCase(CASES1[i], CASES2[j]); \ + } \ + } \ + } \ + \ + template <typename CaseType1, typename CaseType2> \ + void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \ + const CaseType2& CASES2##_case) + +// =================================================================== + +class CodedStreamTest : public testing::Test { + protected: + // Buffer used during most of the tests. This assumes tests run sequentially. + static constexpr int kBufferSize = 1024 * 64; + static uint8 buffer_[kBufferSize]; +}; + +uint8 CodedStreamTest::buffer_[CodedStreamTest::kBufferSize]; + +// We test each operation over a variety of block sizes to insure that +// we test cases where reads or writes cross buffer boundaries, cases +// where they don't, and cases where there is so much buffer left that +// we can use special optimized paths that don't worry about bounds +// checks. +const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024}; + + +// ------------------------------------------------------------------- +// Varint tests. + +struct VarintCase { + uint8 bytes[10]; // Encoded bytes. + int size; // Encoded size, in bytes. + uint64 value; // Parsed value. +}; + +inline std::ostream& operator<<(std::ostream& os, const VarintCase& c) { + return os << c.value; +} + +VarintCase kVarintCases[] = { + // 32-bit values + {{0x00}, 1, 0}, + {{0x01}, 1, 1}, + {{0x7f}, 1, 127}, + {{0xa2, 0x74}, 2, (0x22 << 0) | (0x74 << 7)}, // 14882 + {{0xbe, 0xf7, 0x92, 0x84, 0x0b}, + 5, // 2961488830 + (0x3e << 0) | (0x77 << 7) | (0x12 << 14) | (0x04 << 21) | + (uint64_t{0x0bu} << 28)}, + + // 64-bit + {{0xbe, 0xf7, 0x92, 0x84, 0x1b}, + 5, // 7256456126 + (0x3e << 0) | (0x77 << 7) | (0x12 << 14) | (0x04 << 21) | + (uint64_t{0x1bu} << 28)}, + {{0x80, 0xe6, 0xeb, 0x9c, 0xc3, 0xc9, 0xa4, 0x49}, + 8, // 41256202580718336 + (0x00 << 0) | (0x66 << 7) | (0x6b << 14) | (0x1c << 21) | + (uint64_t{0x43u} << 28) | (uint64_t{0x49u} << 35) | + (uint64_t{0x24u} << 42) | (uint64_t{0x49u} << 49)}, + // 11964378330978735131 + {{0x9b, 0xa8, 0xf9, 0xc2, 0xbb, 0xd6, 0x80, 0x85, 0xa6, 0x01}, + 10, + (0x1b << 0) | (0x28 << 7) | (0x79 << 14) | (0x42 << 21) | + (uint64_t{0x3bu} << 28) | (uint64_t{0x56u} << 35) | + (uint64_t{0x00u} << 42) | (uint64_t{0x05u} << 49) | + (uint64_t{0x26u} << 56) | (uint64_t{0x01u} << 63)}, +}; + +TEST_2D(CodedStreamTest, ReadVarint32, kVarintCases, kBlockSizes) { + memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + uint32 value; + EXPECT_TRUE(coded_input.ReadVarint32(&value)); + EXPECT_EQ(static_cast<uint32>(kVarintCases_case.value), value); + } + + EXPECT_EQ(kVarintCases_case.size, input.ByteCount()); +} + +TEST_2D(CodedStreamTest, ReadTag, kVarintCases, kBlockSizes) { + memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + uint32 expected_value = static_cast<uint32>(kVarintCases_case.value); + EXPECT_EQ(expected_value, coded_input.ReadTag()); + + EXPECT_TRUE(coded_input.LastTagWas(expected_value)); + EXPECT_FALSE(coded_input.LastTagWas(expected_value + 1)); + } + + EXPECT_EQ(kVarintCases_case.size, input.ByteCount()); +} + +// This is the regression test that verifies that there is no issues +// with the empty input buffers handling. +TEST_F(CodedStreamTest, EmptyInputBeforeEos) { + class In : public ZeroCopyInputStream { + public: + In() : count_(0) {} + + private: + bool Next(const void** data, int* size) override { + *data = NULL; + *size = 0; + return count_++ < 2; + } + void BackUp(int count) override { GOOGLE_LOG(FATAL) << "Tests never call this."; } + bool Skip(int count) override { + GOOGLE_LOG(FATAL) << "Tests never call this."; + return false; + } + int64_t ByteCount() const override { return 0; } + int count_; + } in; + CodedInputStream input(&in); + input.ReadTagNoLastTag(); + EXPECT_TRUE(input.ConsumedEntireMessage()); +} + +TEST_1D(CodedStreamTest, ExpectTag, kVarintCases) { + // Leave one byte at the beginning of the buffer so we can read it + // to force the first buffer to be loaded. + buffer_[0] = '\0'; + memcpy(buffer_ + 1, kVarintCases_case.bytes, kVarintCases_case.size); + ArrayInputStream input(buffer_, sizeof(buffer_)); + + { + CodedInputStream coded_input(&input); + + // Read one byte to force coded_input.Refill() to be called. Otherwise, + // ExpectTag() will return a false negative. + uint8 dummy; + coded_input.ReadRaw(&dummy, 1); + EXPECT_EQ((uint)'\0', (uint)dummy); + + uint32 expected_value = static_cast<uint32>(kVarintCases_case.value); + + // ExpectTag() produces false negatives for large values. + if (kVarintCases_case.size <= 2) { + EXPECT_FALSE(coded_input.ExpectTag(expected_value + 1)); + EXPECT_TRUE(coded_input.ExpectTag(expected_value)); + } else { + EXPECT_FALSE(coded_input.ExpectTag(expected_value)); + } + } + + if (kVarintCases_case.size <= 2) { + EXPECT_EQ(kVarintCases_case.size + 1, input.ByteCount()); + } else { + EXPECT_EQ(1, input.ByteCount()); + } +} + +TEST_1D(CodedStreamTest, ExpectTagFromArray, kVarintCases) { + memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size); + + const uint32 expected_value = static_cast<uint32>(kVarintCases_case.value); + + // If the expectation succeeds, it should return a pointer past the tag. + if (kVarintCases_case.size <= 2) { + EXPECT_TRUE(NULL == CodedInputStream::ExpectTagFromArray( + buffer_, expected_value + 1)); + EXPECT_TRUE(buffer_ + kVarintCases_case.size == + CodedInputStream::ExpectTagFromArray(buffer_, expected_value)); + } else { + EXPECT_TRUE(NULL == + CodedInputStream::ExpectTagFromArray(buffer_, expected_value)); + } +} + +TEST_2D(CodedStreamTest, ReadVarint64, kVarintCases, kBlockSizes) { + memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + uint64 value; + EXPECT_TRUE(coded_input.ReadVarint64(&value)); + EXPECT_EQ(kVarintCases_case.value, value); + } + + EXPECT_EQ(kVarintCases_case.size, input.ByteCount()); +} + +TEST_2D(CodedStreamTest, WriteVarint32, kVarintCases, kBlockSizes) { + if (kVarintCases_case.value > uint64_t{0x00000000FFFFFFFFu}) { + // Skip this test for the 64-bit values. + return; + } + + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + coded_output.WriteVarint32(static_cast<uint32>(kVarintCases_case.value)); + EXPECT_FALSE(coded_output.HadError()); + + EXPECT_EQ(kVarintCases_case.size, coded_output.ByteCount()); + } + + EXPECT_EQ(kVarintCases_case.size, output.ByteCount()); + EXPECT_EQ(0, + memcmp(buffer_, kVarintCases_case.bytes, kVarintCases_case.size)); +} + +TEST_2D(CodedStreamTest, WriteVarint64, kVarintCases, kBlockSizes) { + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + coded_output.WriteVarint64(kVarintCases_case.value); + EXPECT_FALSE(coded_output.HadError()); + + EXPECT_EQ(kVarintCases_case.size, coded_output.ByteCount()); + } + + EXPECT_EQ(kVarintCases_case.size, output.ByteCount()); + EXPECT_EQ(0, + memcmp(buffer_, kVarintCases_case.bytes, kVarintCases_case.size)); +} + +// This test causes gcc 3.3.5 (and earlier?) to give the cryptic error: +// "sorry, unimplemented: `method_call_expr' not supported by dump_expr" +#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3) + +int32 kSignExtendedVarintCases[] = {0, 1, -1, 1237894, -37895138}; + +TEST_2D(CodedStreamTest, WriteVarint32SignExtended, kSignExtendedVarintCases, + kBlockSizes) { + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + coded_output.WriteVarint32SignExtended(kSignExtendedVarintCases_case); + EXPECT_FALSE(coded_output.HadError()); + + if (kSignExtendedVarintCases_case < 0) { + EXPECT_EQ(10, coded_output.ByteCount()); + } else { + EXPECT_LE(coded_output.ByteCount(), 5); + } + } + + if (kSignExtendedVarintCases_case < 0) { + EXPECT_EQ(10, output.ByteCount()); + } else { + EXPECT_LE(output.ByteCount(), 5); + } + + // Read value back in as a varint64 and insure it matches. + ArrayInputStream input(buffer_, sizeof(buffer_)); + + { + CodedInputStream coded_input(&input); + + uint64 value; + EXPECT_TRUE(coded_input.ReadVarint64(&value)); + + EXPECT_EQ(kSignExtendedVarintCases_case, static_cast<int64>(value)); + } + + EXPECT_EQ(output.ByteCount(), input.ByteCount()); +} + +#endif + + +// ------------------------------------------------------------------- +// Varint failure test. + +struct VarintErrorCase { + uint8 bytes[12]; + int size; + bool can_parse; +}; + +inline std::ostream& operator<<(std::ostream& os, const VarintErrorCase& c) { + return os << "size " << c.size; +} + +const VarintErrorCase kVarintErrorCases[] = { + // Control case. (Insures that there isn't something else wrong that + // makes parsing always fail.) + {{0x00}, 1, true}, + + // No input data. + {{}, 0, false}, + + // Input ends unexpectedly. + {{0xf0, 0xab}, 2, false}, + + // Input ends unexpectedly after 32 bits. + {{0xf0, 0xab, 0xc9, 0x9a, 0xf8, 0xb2}, 6, false}, + + // Longer than 10 bytes. + {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01}, + 11, + false}, +}; + +TEST_2D(CodedStreamTest, ReadVarint32Error, kVarintErrorCases, kBlockSizes) { + memcpy(buffer_, kVarintErrorCases_case.bytes, kVarintErrorCases_case.size); + ArrayInputStream input(buffer_, kVarintErrorCases_case.size, + kBlockSizes_case); + CodedInputStream coded_input(&input); + + uint32 value; + EXPECT_EQ(kVarintErrorCases_case.can_parse, coded_input.ReadVarint32(&value)); +} + +TEST_2D(CodedStreamTest, ReadVarint32Error_LeavesValueInInitializedState, + kVarintErrorCases, kBlockSizes) { + memcpy(buffer_, kVarintErrorCases_case.bytes, kVarintErrorCases_case.size); + ArrayInputStream input(buffer_, kVarintErrorCases_case.size, + kBlockSizes_case); + CodedInputStream coded_input(&input); + + uint32 value = 0; + EXPECT_EQ(kVarintErrorCases_case.can_parse, coded_input.ReadVarint32(&value)); + // While the specific value following a failure is not critical, we do want to + // ensure that it doesn't get set to an uninitialized value. (This check fails + // in MSAN mode if value has been set to an uninitialized value.) + EXPECT_EQ(value, value); +} + +TEST_2D(CodedStreamTest, ReadVarint64Error, kVarintErrorCases, kBlockSizes) { + memcpy(buffer_, kVarintErrorCases_case.bytes, kVarintErrorCases_case.size); + ArrayInputStream input(buffer_, kVarintErrorCases_case.size, + kBlockSizes_case); + CodedInputStream coded_input(&input); + + uint64 value; + EXPECT_EQ(kVarintErrorCases_case.can_parse, coded_input.ReadVarint64(&value)); +} + +TEST_2D(CodedStreamTest, ReadVarint64Error_LeavesValueInInitializedState, + kVarintErrorCases, kBlockSizes) { + memcpy(buffer_, kVarintErrorCases_case.bytes, kVarintErrorCases_case.size); + ArrayInputStream input(buffer_, kVarintErrorCases_case.size, + kBlockSizes_case); + CodedInputStream coded_input(&input); + + uint64 value = 0; + EXPECT_EQ(kVarintErrorCases_case.can_parse, coded_input.ReadVarint64(&value)); + // While the specific value following a failure is not critical, we do want to + // ensure that it doesn't get set to an uninitialized value. (This check fails + // in MSAN mode if value has been set to an uninitialized value.) + EXPECT_EQ(value, value); +} + +// ------------------------------------------------------------------- +// VarintSize + +struct VarintSizeCase { + uint64 value; + int size; +}; + +inline std::ostream& operator<<(std::ostream& os, const VarintSizeCase& c) { + return os << c.value; +} + +VarintSizeCase kVarintSizeCases[] = { + {0u, 1}, + {1u, 1}, + {127u, 1}, + {128u, 2}, + {758923u, 3}, + {4000000000u, 5}, + {uint64_t{41256202580718336u}, 8}, + {uint64_t{11964378330978735131u}, 10}, +}; + +TEST_1D(CodedStreamTest, VarintSize32, kVarintSizeCases) { + if (kVarintSizeCases_case.value > 0xffffffffu) { + // Skip 64-bit values. + return; + } + + EXPECT_EQ(kVarintSizeCases_case.size, + CodedOutputStream::VarintSize32( + static_cast<uint32>(kVarintSizeCases_case.value))); +} + +TEST_1D(CodedStreamTest, VarintSize64, kVarintSizeCases) { + EXPECT_EQ(kVarintSizeCases_case.size, + CodedOutputStream::VarintSize64(kVarintSizeCases_case.value)); +} + +TEST_F(CodedStreamTest, VarintSize32PowersOfTwo) { + int expected = 1; + for (int i = 1; i < 32; i++) { + if (i % 7 == 0) { + expected += 1; + } + EXPECT_EQ(expected, + CodedOutputStream::VarintSize32(static_cast<uint32>(0x1u << i))); + } +} + +TEST_F(CodedStreamTest, VarintSize64PowersOfTwo) { + int expected = 1; + for (int i = 1; i < 64; i++) { + if (i % 7 == 0) { + expected += 1; + } + EXPECT_EQ(expected, CodedOutputStream::VarintSize64( + static_cast<uint64>(0x1ull << i))); + } +} + +// ------------------------------------------------------------------- +// Fixed-size int tests + +struct Fixed32Case { + uint8 bytes[sizeof(uint32)]; // Encoded bytes. + uint32 value; // Parsed value. +}; + +struct Fixed64Case { + uint8 bytes[sizeof(uint64)]; // Encoded bytes. + uint64 value; // Parsed value. +}; + +inline std::ostream& operator<<(std::ostream& os, const Fixed32Case& c) { + return os << "0x" << std::hex << c.value << std::dec; +} + +inline std::ostream& operator<<(std::ostream& os, const Fixed64Case& c) { + return os << "0x" << std::hex << c.value << std::dec; +} + +Fixed32Case kFixed32Cases[] = { + {{0xef, 0xcd, 0xab, 0x90}, 0x90abcdefu}, + {{0x12, 0x34, 0x56, 0x78}, 0x78563412u}, +}; + +Fixed64Case kFixed64Cases[] = { + {{0xef, 0xcd, 0xab, 0x90, 0x12, 0x34, 0x56, 0x78}, + uint64_t{0x7856341290abcdefu}}, + {{0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}, + uint64_t{0x8877665544332211u}}, +}; + +TEST_2D(CodedStreamTest, ReadLittleEndian32, kFixed32Cases, kBlockSizes) { + memcpy(buffer_, kFixed32Cases_case.bytes, sizeof(kFixed32Cases_case.bytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + uint32 value; + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(kFixed32Cases_case.value, value); + } + + EXPECT_EQ(sizeof(uint32), input.ByteCount()); +} + +TEST_2D(CodedStreamTest, ReadLittleEndian64, kFixed64Cases, kBlockSizes) { + memcpy(buffer_, kFixed64Cases_case.bytes, sizeof(kFixed64Cases_case.bytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + uint64 value; + EXPECT_TRUE(coded_input.ReadLittleEndian64(&value)); + EXPECT_EQ(kFixed64Cases_case.value, value); + } + + EXPECT_EQ(sizeof(uint64), input.ByteCount()); +} + +TEST_2D(CodedStreamTest, WriteLittleEndian32, kFixed32Cases, kBlockSizes) { + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + coded_output.WriteLittleEndian32(kFixed32Cases_case.value); + EXPECT_FALSE(coded_output.HadError()); + + EXPECT_EQ(sizeof(uint32), coded_output.ByteCount()); + } + + EXPECT_EQ(sizeof(uint32), output.ByteCount()); + EXPECT_EQ(0, memcmp(buffer_, kFixed32Cases_case.bytes, sizeof(uint32))); +} + +TEST_2D(CodedStreamTest, WriteLittleEndian64, kFixed64Cases, kBlockSizes) { + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + coded_output.WriteLittleEndian64(kFixed64Cases_case.value); + EXPECT_FALSE(coded_output.HadError()); + + EXPECT_EQ(sizeof(uint64), coded_output.ByteCount()); + } + + EXPECT_EQ(sizeof(uint64), output.ByteCount()); + EXPECT_EQ(0, memcmp(buffer_, kFixed64Cases_case.bytes, sizeof(uint64))); +} + +// Tests using the static methods to read fixed-size values from raw arrays. + +TEST_1D(CodedStreamTest, ReadLittleEndian32FromArray, kFixed32Cases) { + memcpy(buffer_, kFixed32Cases_case.bytes, sizeof(kFixed32Cases_case.bytes)); + + uint32 value; + const uint8* end = + CodedInputStream::ReadLittleEndian32FromArray(buffer_, &value); + EXPECT_EQ(kFixed32Cases_case.value, value); + EXPECT_TRUE(end == buffer_ + sizeof(value)); +} + +TEST_1D(CodedStreamTest, ReadLittleEndian64FromArray, kFixed64Cases) { + memcpy(buffer_, kFixed64Cases_case.bytes, sizeof(kFixed64Cases_case.bytes)); + + uint64 value; + const uint8* end = + CodedInputStream::ReadLittleEndian64FromArray(buffer_, &value); + EXPECT_EQ(kFixed64Cases_case.value, value); + EXPECT_TRUE(end == buffer_ + sizeof(value)); +} + +// ------------------------------------------------------------------- +// Raw reads and writes + +const char kRawBytes[] = "Some bytes which will be written and read raw."; + +TEST_1D(CodedStreamTest, ReadRaw, kBlockSizes) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + char read_buffer[sizeof(kRawBytes)]; + + { + CodedInputStream coded_input(&input); + + EXPECT_TRUE(coded_input.ReadRaw(read_buffer, sizeof(kRawBytes))); + EXPECT_EQ(0, memcmp(kRawBytes, read_buffer, sizeof(kRawBytes))); + } + + EXPECT_EQ(sizeof(kRawBytes), input.ByteCount()); +} + +TEST_1D(CodedStreamTest, WriteRaw, kBlockSizes) { + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + coded_output.WriteRaw(kRawBytes, sizeof(kRawBytes)); + EXPECT_FALSE(coded_output.HadError()); + + EXPECT_EQ(sizeof(kRawBytes), coded_output.ByteCount()); + } + + EXPECT_EQ(sizeof(kRawBytes), output.ByteCount()); + EXPECT_EQ(0, memcmp(buffer_, kRawBytes, sizeof(kRawBytes))); +} + +TEST_1D(CodedStreamTest, ReadString, kBlockSizes) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + std::string str; + EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes))); + EXPECT_EQ(kRawBytes, str); + } + + EXPECT_EQ(strlen(kRawBytes), input.ByteCount()); +} + +// Check to make sure ReadString doesn't crash on impossibly large strings. +TEST_1D(CodedStreamTest, ReadStringImpossiblyLarge, kBlockSizes) { + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + std::string str; + // Try to read a gigabyte. + EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30)); + } +} + +TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnStack) { + // Same test as above, except directly use a buffer. This used to cause + // crashes while the above did not. + uint8 buffer[8]; + CodedInputStream coded_input(buffer, 8); + std::string str; + EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30)); +} + +TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) { + std::unique_ptr<uint8[]> buffer(new uint8[8]); + CodedInputStream coded_input(buffer.get(), 8); + std::string str; + EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30)); +} + +TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnTotalLimit, kBlockSizes) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + coded_input.SetTotalBytesLimit(sizeof(kRawBytes)); + EXPECT_EQ(sizeof(kRawBytes), coded_input.BytesUntilTotalBytesLimit()); + + std::string str; + EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes))); + EXPECT_EQ(sizeof(kRawBytes) - strlen(kRawBytes), + coded_input.BytesUntilTotalBytesLimit()); + EXPECT_EQ(kRawBytes, str); + // TODO(liujisi): Replace with a more meaningful test (see cl/60966023). + EXPECT_GE(str.capacity(), strlen(kRawBytes)); + } + + EXPECT_EQ(strlen(kRawBytes), input.ByteCount()); +} + +TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnPushedLimit, kBlockSizes) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(sizeof(buffer_)); + + std::string str; + EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes))); + EXPECT_EQ(kRawBytes, str); + // TODO(liujisi): Replace with a more meaningful test (see cl/60966023). + EXPECT_GE(str.capacity(), strlen(kRawBytes)); + } + + EXPECT_EQ(strlen(kRawBytes), input.ByteCount()); +} + +TEST_F(CodedStreamTest, ReadStringNoReservationIfLimitsNotSet) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + + std::string str; + EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes))); + EXPECT_EQ(kRawBytes, str); + // Note: this check depends on string class implementation. It + // expects that string will allocate more than strlen(kRawBytes) + // if the content of kRawBytes is appended to string in small + // chunks. + // TODO(liujisi): Replace with a more meaningful test (see cl/60966023). + EXPECT_GE(str.capacity(), strlen(kRawBytes)); + } + + EXPECT_EQ(strlen(kRawBytes), input.ByteCount()); +} + +TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsNegative) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(sizeof(buffer_)); + + std::string str; + EXPECT_FALSE(coded_input.ReadString(&str, -1)); + // Note: this check depends on string class implementation. It + // expects that string will always allocate the same amount of + // memory for an empty string. + EXPECT_EQ(std::string().capacity(), str.capacity()); + } +} + +TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsLarge) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(sizeof(buffer_)); + + std::string str; + EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30)); + EXPECT_GT(1 << 30, str.capacity()); + } +} + +TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheLimit) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(16); + + std::string str; + EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); + // Note: this check depends on string class implementation. It + // expects that string will allocate less than strlen(kRawBytes) + // for an empty string. + EXPECT_GT(strlen(kRawBytes), str.capacity()); + } +} + +TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheTotalBytesLimit) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.SetTotalBytesLimit(16); + + std::string str; + EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); + // Note: this check depends on string class implementation. It + // expects that string will allocate less than strlen(kRawBytes) + // for an empty string. + EXPECT_GT(strlen(kRawBytes), str.capacity()); + } +} + +TEST_F(CodedStreamTest, + ReadStringNoReservationSizeIsOverTheClosestLimit_GlobalLimitIsCloser) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(sizeof(buffer_)); + coded_input.SetTotalBytesLimit(16); + + std::string str; + EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); + // Note: this check depends on string class implementation. It + // expects that string will allocate less than strlen(kRawBytes) + // for an empty string. + EXPECT_GT(strlen(kRawBytes), str.capacity()); + } +} + +TEST_F(CodedStreamTest, + ReadStringNoReservationSizeIsOverTheClosestLimit_LocalLimitIsCloser) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + // Buffer size in the input must be smaller than sizeof(kRawBytes), + // otherwise check against capacity will fail as ReadStringInline() + // will handle the reading and will reserve the memory as needed. + ArrayInputStream input(buffer_, sizeof(buffer_), 32); + + { + CodedInputStream coded_input(&input); + coded_input.PushLimit(16); + coded_input.SetTotalBytesLimit(sizeof(buffer_)); + EXPECT_EQ(sizeof(buffer_), coded_input.BytesUntilTotalBytesLimit()); + + std::string str; + EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes))); + // Note: this check depends on string class implementation. It + // expects that string will allocate less than strlen(kRawBytes) + // for an empty string. + EXPECT_GT(strlen(kRawBytes), str.capacity()); + } +} + + +// ------------------------------------------------------------------- +// Skip + +const char kSkipTestBytes[] = + "<Before skipping><To be skipped><After skipping>"; + +TEST_1D(CodedStreamTest, SkipInput, kBlockSizes) { + memcpy(buffer_, kSkipTestBytes, sizeof(kSkipTestBytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + std::string str; + EXPECT_TRUE(coded_input.ReadString(&str, strlen("<Before skipping>"))); + EXPECT_EQ("<Before skipping>", str); + EXPECT_TRUE(coded_input.Skip(strlen("<To be skipped>"))); + EXPECT_TRUE(coded_input.ReadString(&str, strlen("<After skipping>"))); + EXPECT_EQ("<After skipping>", str); + } + + EXPECT_EQ(strlen(kSkipTestBytes), input.ByteCount()); +} + +// ------------------------------------------------------------------- +// GetDirectBufferPointer + +TEST_F(CodedStreamTest, GetDirectBufferPointerInput) { + ArrayInputStream input(buffer_, sizeof(buffer_), 8); + CodedInputStream coded_input(&input); + + const void* ptr; + int size; + + EXPECT_TRUE(coded_input.GetDirectBufferPointer(&ptr, &size)); + EXPECT_EQ(buffer_, ptr); + EXPECT_EQ(8, size); + + // Peeking again should return the same pointer. + EXPECT_TRUE(coded_input.GetDirectBufferPointer(&ptr, &size)); + EXPECT_EQ(buffer_, ptr); + EXPECT_EQ(8, size); + + // Skip forward in the same buffer then peek again. + EXPECT_TRUE(coded_input.Skip(3)); + EXPECT_TRUE(coded_input.GetDirectBufferPointer(&ptr, &size)); + EXPECT_EQ(buffer_ + 3, ptr); + EXPECT_EQ(5, size); + + // Skip to end of buffer and peek -- should get next buffer. + EXPECT_TRUE(coded_input.Skip(5)); + EXPECT_TRUE(coded_input.GetDirectBufferPointer(&ptr, &size)); + EXPECT_EQ(buffer_ + 8, ptr); + EXPECT_EQ(8, size); +} + +TEST_F(CodedStreamTest, GetDirectBufferPointerInlineInput) { + ArrayInputStream input(buffer_, sizeof(buffer_), 8); + CodedInputStream coded_input(&input); + + const void* ptr; + int size; + + coded_input.GetDirectBufferPointerInline(&ptr, &size); + EXPECT_EQ(buffer_, ptr); + EXPECT_EQ(8, size); + + // Peeking again should return the same pointer. + coded_input.GetDirectBufferPointerInline(&ptr, &size); + EXPECT_EQ(buffer_, ptr); + EXPECT_EQ(8, size); + + // Skip forward in the same buffer then peek again. + EXPECT_TRUE(coded_input.Skip(3)); + coded_input.GetDirectBufferPointerInline(&ptr, &size); + EXPECT_EQ(buffer_ + 3, ptr); + EXPECT_EQ(5, size); + + // Skip to end of buffer and peek -- should return false and provide an empty + // buffer. It does not try to Refresh(). + EXPECT_TRUE(coded_input.Skip(5)); + coded_input.GetDirectBufferPointerInline(&ptr, &size); + EXPECT_EQ(buffer_ + 8, ptr); + EXPECT_EQ(0, size); +} + +// ------------------------------------------------------------------- +// Limits + +TEST_1D(CodedStreamTest, BasicLimit, kBlockSizes) { + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + CodedInputStream::Limit limit = coded_input.PushLimit(8); + + // Read until we hit the limit. + uint32 value; + EXPECT_EQ(8, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(4, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + EXPECT_FALSE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + + coded_input.PopLimit(limit); + + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + } + + EXPECT_EQ(12, input.ByteCount()); +} + +// Test what happens when we push two limits where the second (top) one is +// shorter. +TEST_1D(CodedStreamTest, SmallLimitOnTopOfBigLimit, kBlockSizes) { + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + CodedInputStream::Limit limit1 = coded_input.PushLimit(8); + EXPECT_EQ(8, coded_input.BytesUntilLimit()); + CodedInputStream::Limit limit2 = coded_input.PushLimit(4); + + uint32 value; + + // Read until we hit limit2, the top and shortest limit. + EXPECT_EQ(4, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + EXPECT_FALSE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + + coded_input.PopLimit(limit2); + + // Read until we hit limit1. + EXPECT_EQ(4, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + EXPECT_FALSE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + + coded_input.PopLimit(limit1); + + // No more limits. + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + } + + EXPECT_EQ(12, input.ByteCount()); +} + +// Test what happens when we push two limits where the second (top) one is +// longer. In this case, the top limit is shortened to match the previous +// limit. +TEST_1D(CodedStreamTest, BigLimitOnTopOfSmallLimit, kBlockSizes) { + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + CodedInputStream::Limit limit1 = coded_input.PushLimit(4); + EXPECT_EQ(4, coded_input.BytesUntilLimit()); + CodedInputStream::Limit limit2 = coded_input.PushLimit(8); + + uint32 value; + + // Read until we hit limit2. Except, wait! limit1 is shorter, so + // we end up hitting that first, despite having 4 bytes to go on + // limit2. + EXPECT_EQ(4, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + EXPECT_FALSE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + + coded_input.PopLimit(limit2); + + // OK, popped limit2, now limit1 is on top, which we've already hit. + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + EXPECT_FALSE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + + coded_input.PopLimit(limit1); + + // No more limits. + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + } + + EXPECT_EQ(8, input.ByteCount()); +} + +TEST_F(CodedStreamTest, ExpectAtEnd) { + // Test ExpectAtEnd(), which is based on limits. + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + + EXPECT_FALSE(coded_input.ExpectAtEnd()); + + CodedInputStream::Limit limit = coded_input.PushLimit(4); + + uint32 value; + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_TRUE(coded_input.ExpectAtEnd()); + + coded_input.PopLimit(limit); + EXPECT_FALSE(coded_input.ExpectAtEnd()); +} + +TEST_F(CodedStreamTest, NegativeLimit) { + // Check what happens when we push a negative limit. + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + + CodedInputStream::Limit limit = coded_input.PushLimit(-1234); + // BytesUntilLimit() returns -1 to mean "no limit", which actually means + // "the limit is INT_MAX relative to the beginning of the stream". + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + coded_input.PopLimit(limit); +} + +TEST_F(CodedStreamTest, NegativeLimitAfterReading) { + // Check what happens when we push a negative limit. + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + ASSERT_TRUE(coded_input.Skip(128)); + + CodedInputStream::Limit limit = coded_input.PushLimit(-64); + // BytesUntilLimit() returns -1 to mean "no limit", which actually means + // "the limit is INT_MAX relative to the beginning of the stream". + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + coded_input.PopLimit(limit); +} + +TEST_F(CodedStreamTest, OverflowLimit) { + // Check what happens when we push a limit large enough that its absolute + // position is more than 2GB into the stream. + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + ASSERT_TRUE(coded_input.Skip(128)); + + CodedInputStream::Limit limit = coded_input.PushLimit(INT_MAX); + // BytesUntilLimit() returns -1 to mean "no limit", which actually means + // "the limit is INT_MAX relative to the beginning of the stream". + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + coded_input.PopLimit(limit); +} + +TEST_F(CodedStreamTest, TotalBytesLimit) { + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + coded_input.SetTotalBytesLimit(16); + EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit()); + + std::string str; + EXPECT_TRUE(coded_input.ReadString(&str, 16)); + EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit()); + + std::vector<std::string> errors; + + { + ScopedMemoryLog error_log; + EXPECT_FALSE(coded_input.ReadString(&str, 1)); + errors = error_log.GetMessages(ERROR); + } + + ASSERT_EQ(1, errors.size()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, + "A protocol message was rejected because it was too big", + errors[0]); + + coded_input.SetTotalBytesLimit(32); + EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit()); + EXPECT_TRUE(coded_input.ReadString(&str, 16)); + EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit()); +} + +TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) { + // total_bytes_limit_ is not a valid place for a message to end. + + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + + // Set both total_bytes_limit and a regular limit at 16 bytes. + coded_input.SetTotalBytesLimit(16); + CodedInputStream::Limit limit = coded_input.PushLimit(16); + + // Read 16 bytes. + std::string str; + EXPECT_TRUE(coded_input.ReadString(&str, 16)); + + // Read a tag. Should fail, but report being a valid endpoint since it's + // a regular limit. + EXPECT_EQ(0, coded_input.ReadTagNoLastTag()); + EXPECT_TRUE(coded_input.ConsumedEntireMessage()); + + // Pop the limit. + coded_input.PopLimit(limit); + + // Read a tag. Should fail, and report *not* being a valid endpoint, since + // this time we're hitting the total bytes limit. + EXPECT_EQ(0, coded_input.ReadTagNoLastTag()); + EXPECT_FALSE(coded_input.ConsumedEntireMessage()); +} + +TEST_F(CodedStreamTest, RecursionLimit) { + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + coded_input.SetRecursionLimit(4); + + // This is way too much testing for a counter. + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 1 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 2 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 3 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 6 + coded_input.DecrementRecursionDepth(); // 5 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 6 + coded_input.DecrementRecursionDepth(); // 5 + coded_input.DecrementRecursionDepth(); // 4 + coded_input.DecrementRecursionDepth(); // 3 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5 + coded_input.DecrementRecursionDepth(); // 4 + coded_input.DecrementRecursionDepth(); // 3 + coded_input.DecrementRecursionDepth(); // 2 + coded_input.DecrementRecursionDepth(); // 1 + coded_input.DecrementRecursionDepth(); // 0 + coded_input.DecrementRecursionDepth(); // 0 + coded_input.DecrementRecursionDepth(); // 0 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 1 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 2 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 3 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5 + + coded_input.SetRecursionLimit(6); + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 6 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 7 +} + + +class ReallyBigInputStream : public ZeroCopyInputStream { + public: + ReallyBigInputStream() : backup_amount_(0), buffer_count_(0) {} + ~ReallyBigInputStream() {} + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override { + // We only expect BackUp() to be called at the end. + EXPECT_EQ(0, backup_amount_); + + switch (buffer_count_++) { + case 0: + *data = buffer_; + *size = sizeof(buffer_); + return true; + case 1: + // Return an enormously large buffer that, when combined with the 1k + // returned already, should overflow the total_bytes_read_ counter in + // CodedInputStream. Note that we'll only read the first 1024 bytes + // of this buffer so it's OK that we have it point at buffer_. + *data = buffer_; + *size = INT_MAX; + return true; + default: + return false; + } + } + + void BackUp(int count) override { backup_amount_ = count; } + + bool Skip(int count) override { + GOOGLE_LOG(FATAL) << "Not implemented."; + return false; + } + int64_t ByteCount() const override { + GOOGLE_LOG(FATAL) << "Not implemented."; + return 0; + } + + int backup_amount_; + + private: + char buffer_[1024]; + int64 buffer_count_; +}; + +TEST_F(CodedStreamTest, InputOver2G) { + // CodedInputStream should gracefully handle input over 2G and call + // input.BackUp() with the correct number of bytes on destruction. + ReallyBigInputStream input; + + std::vector<std::string> errors; + + { + ScopedMemoryLog error_log; + CodedInputStream coded_input(&input); + std::string str; + EXPECT_TRUE(coded_input.ReadString(&str, 512)); + EXPECT_TRUE(coded_input.ReadString(&str, 1024)); + errors = error_log.GetMessages(ERROR); + } + + EXPECT_EQ(INT_MAX - 512, input.backup_amount_); + EXPECT_EQ(0, errors.size()); +} + +} // namespace +} // namespace io +} // namespace protobuf +} // namespace google + +#include <port_undef.inc> diff --git a/NorthstarDedicatedTest/include/protobuf/io/gzip_stream.cc b/NorthstarDedicatedTest/include/protobuf/io/gzip_stream.cc new file mode 100644 index 00000000..a8dd889a --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/gzip_stream.cc @@ -0,0 +1,333 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: brianolson@google.com (Brian Olson) +// +// This file contains the implementation of classes GzipInputStream and +// GzipOutputStream. + + +#if HAVE_ZLIB +#include <io/gzip_stream.h> + +#include <stubs/common.h> +#include <stubs/logging.h> + +namespace google { +namespace protobuf { +namespace io { + +static const int kDefaultBufferSize = 65536; + +GzipInputStream::GzipInputStream(ZeroCopyInputStream* sub_stream, Format format, + int buffer_size) + : format_(format), sub_stream_(sub_stream), zerror_(Z_OK), byte_count_(0) { + zcontext_.state = Z_NULL; + zcontext_.zalloc = Z_NULL; + zcontext_.zfree = Z_NULL; + zcontext_.opaque = Z_NULL; + zcontext_.total_out = 0; + zcontext_.next_in = NULL; + zcontext_.avail_in = 0; + zcontext_.total_in = 0; + zcontext_.msg = NULL; + if (buffer_size == -1) { + output_buffer_length_ = kDefaultBufferSize; + } else { + output_buffer_length_ = buffer_size; + } + output_buffer_ = operator new(output_buffer_length_); + GOOGLE_CHECK(output_buffer_ != NULL); + zcontext_.next_out = static_cast<Bytef*>(output_buffer_); + zcontext_.avail_out = output_buffer_length_; + output_position_ = output_buffer_; +} +GzipInputStream::~GzipInputStream() { + operator delete(output_buffer_); + zerror_ = inflateEnd(&zcontext_); +} + +static inline int internalInflateInit2(z_stream* zcontext, + GzipInputStream::Format format) { + int windowBitsFormat = 0; + switch (format) { + case GzipInputStream::GZIP: + windowBitsFormat = 16; + break; + case GzipInputStream::AUTO: + windowBitsFormat = 32; + break; + case GzipInputStream::ZLIB: + windowBitsFormat = 0; + break; + } + return inflateInit2(zcontext, /* windowBits */ 15 | windowBitsFormat); +} + +int GzipInputStream::Inflate(int flush) { + if ((zerror_ == Z_OK) && (zcontext_.avail_out == 0)) { + // previous inflate filled output buffer. don't change input params yet. + } else if (zcontext_.avail_in == 0) { + const void* in; + int in_size; + bool first = zcontext_.next_in == NULL; + bool ok = sub_stream_->Next(&in, &in_size); + if (!ok) { + zcontext_.next_out = NULL; + zcontext_.avail_out = 0; + return Z_STREAM_END; + } + zcontext_.next_in = static_cast<Bytef*>(const_cast<void*>(in)); + zcontext_.avail_in = in_size; + if (first) { + int error = internalInflateInit2(&zcontext_, format_); + if (error != Z_OK) { + return error; + } + } + } + zcontext_.next_out = static_cast<Bytef*>(output_buffer_); + zcontext_.avail_out = output_buffer_length_; + output_position_ = output_buffer_; + int error = inflate(&zcontext_, flush); + return error; +} + +void GzipInputStream::DoNextOutput(const void** data, int* size) { + *data = output_position_; + *size = ((uintptr_t)zcontext_.next_out) - ((uintptr_t)output_position_); + output_position_ = zcontext_.next_out; +} + +// implements ZeroCopyInputStream ---------------------------------- +bool GzipInputStream::Next(const void** data, int* size) { + bool ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END) || + (zerror_ == Z_BUF_ERROR); + if ((!ok) || (zcontext_.next_out == NULL)) { + return false; + } + if (zcontext_.next_out != output_position_) { + DoNextOutput(data, size); + return true; + } + if (zerror_ == Z_STREAM_END) { + if (zcontext_.next_out != NULL) { + // sub_stream_ may have concatenated streams to follow + zerror_ = inflateEnd(&zcontext_); + byte_count_ += zcontext_.total_out; + if (zerror_ != Z_OK) { + return false; + } + zerror_ = internalInflateInit2(&zcontext_, format_); + if (zerror_ != Z_OK) { + return false; + } + } else { + *data = NULL; + *size = 0; + return false; + } + } + zerror_ = Inflate(Z_NO_FLUSH); + if ((zerror_ == Z_STREAM_END) && (zcontext_.next_out == NULL)) { + // The underlying stream's Next returned false inside Inflate. + return false; + } + ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END) || + (zerror_ == Z_BUF_ERROR); + if (!ok) { + return false; + } + DoNextOutput(data, size); + return true; +} +void GzipInputStream::BackUp(int count) { + output_position_ = reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(output_position_) - count); +} +bool GzipInputStream::Skip(int count) { + const void* data; + int size = 0; + bool ok = Next(&data, &size); + while (ok && (size < count)) { + count -= size; + ok = Next(&data, &size); + } + if (size > count) { + BackUp(size - count); + } + return ok; +} +int64_t GzipInputStream::ByteCount() const { + int64_t ret = byte_count_ + zcontext_.total_out; + if (zcontext_.next_out != NULL && output_position_ != NULL) { + ret += reinterpret_cast<uintptr_t>(zcontext_.next_out) - + reinterpret_cast<uintptr_t>(output_position_); + } + return ret; +} + +// ========================================================================= + +GzipOutputStream::Options::Options() + : format(GZIP), + buffer_size(kDefaultBufferSize), + compression_level(Z_DEFAULT_COMPRESSION), + compression_strategy(Z_DEFAULT_STRATEGY) {} + +GzipOutputStream::GzipOutputStream(ZeroCopyOutputStream* sub_stream) { + Init(sub_stream, Options()); +} + +GzipOutputStream::GzipOutputStream(ZeroCopyOutputStream* sub_stream, + const Options& options) { + Init(sub_stream, options); +} + +void GzipOutputStream::Init(ZeroCopyOutputStream* sub_stream, + const Options& options) { + sub_stream_ = sub_stream; + sub_data_ = NULL; + sub_data_size_ = 0; + + input_buffer_length_ = options.buffer_size; + input_buffer_ = operator new(input_buffer_length_); + GOOGLE_CHECK(input_buffer_ != NULL); + + zcontext_.zalloc = Z_NULL; + zcontext_.zfree = Z_NULL; + zcontext_.opaque = Z_NULL; + zcontext_.next_out = NULL; + zcontext_.avail_out = 0; + zcontext_.total_out = 0; + zcontext_.next_in = NULL; + zcontext_.avail_in = 0; + zcontext_.total_in = 0; + zcontext_.msg = NULL; + // default to GZIP format + int windowBitsFormat = 16; + if (options.format == ZLIB) { + windowBitsFormat = 0; + } + zerror_ = + deflateInit2(&zcontext_, options.compression_level, Z_DEFLATED, + /* windowBits */ 15 | windowBitsFormat, + /* memLevel (default) */ 8, options.compression_strategy); +} + +GzipOutputStream::~GzipOutputStream() { + Close(); + operator delete(input_buffer_); +} + +// private +int GzipOutputStream::Deflate(int flush) { + int error = Z_OK; + do { + if ((sub_data_ == NULL) || (zcontext_.avail_out == 0)) { + bool ok = sub_stream_->Next(&sub_data_, &sub_data_size_); + if (!ok) { + sub_data_ = NULL; + sub_data_size_ = 0; + return Z_BUF_ERROR; + } + GOOGLE_CHECK_GT(sub_data_size_, 0); + zcontext_.next_out = static_cast<Bytef*>(sub_data_); + zcontext_.avail_out = sub_data_size_; + } + error = deflate(&zcontext_, flush); + } while (error == Z_OK && zcontext_.avail_out == 0); + if ((flush == Z_FULL_FLUSH) || (flush == Z_FINISH)) { + // Notify lower layer of data. + sub_stream_->BackUp(zcontext_.avail_out); + // We don't own the buffer anymore. + sub_data_ = NULL; + sub_data_size_ = 0; + } + return error; +} + +// implements ZeroCopyOutputStream --------------------------------- +bool GzipOutputStream::Next(void** data, int* size) { + if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) { + return false; + } + if (zcontext_.avail_in != 0) { + zerror_ = Deflate(Z_NO_FLUSH); + if (zerror_ != Z_OK) { + return false; + } + } + if (zcontext_.avail_in == 0) { + // all input was consumed. reset the buffer. + zcontext_.next_in = static_cast<Bytef*>(input_buffer_); + zcontext_.avail_in = input_buffer_length_; + *data = input_buffer_; + *size = input_buffer_length_; + } else { + // The loop in Deflate should consume all avail_in + GOOGLE_LOG(DFATAL) << "Deflate left bytes unconsumed"; + } + return true; +} +void GzipOutputStream::BackUp(int count) { + GOOGLE_CHECK_GE(zcontext_.avail_in, static_cast<uInt>(count)); + zcontext_.avail_in -= count; +} +int64_t GzipOutputStream::ByteCount() const { + return zcontext_.total_in + zcontext_.avail_in; +} + +bool GzipOutputStream::Flush() { + zerror_ = Deflate(Z_FULL_FLUSH); + // Return true if the flush succeeded or if it was a no-op. + return (zerror_ == Z_OK) || + (zerror_ == Z_BUF_ERROR && zcontext_.avail_in == 0 && + zcontext_.avail_out != 0); +} + +bool GzipOutputStream::Close() { + if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) { + return false; + } + do { + zerror_ = Deflate(Z_FINISH); + } while (zerror_ == Z_OK); + zerror_ = deflateEnd(&zcontext_); + bool ok = zerror_ == Z_OK; + zerror_ = Z_STREAM_END; + return ok; +} + +} // namespace io +} // namespace protobuf +} // namespace google + +#endif // HAVE_ZLIB diff --git a/NorthstarDedicatedTest/include/protobuf/io/gzip_stream.h b/NorthstarDedicatedTest/include/protobuf/io/gzip_stream.h new file mode 100644 index 00000000..1cea18fc --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/gzip_stream.h @@ -0,0 +1,202 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: brianolson@google.com (Brian Olson) +// +// This file contains the definition for classes GzipInputStream and +// GzipOutputStream. +// +// GzipInputStream decompresses data from an underlying +// ZeroCopyInputStream and provides the decompressed data as a +// ZeroCopyInputStream. +// +// GzipOutputStream is an ZeroCopyOutputStream that compresses data to +// an underlying ZeroCopyOutputStream. + +#ifndef GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__ +#define GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__ + + +#include <stubs/common.h> +#include <io/zero_copy_stream.h> +#include <port.h> +#include <zlib.h> + +#include <port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +// A ZeroCopyInputStream that reads compressed data through zlib +class PROTOBUF_EXPORT GzipInputStream : public ZeroCopyInputStream { + public: + // Format key for constructor + enum Format { + // zlib will autodetect gzip header or deflate stream + AUTO = 0, + + // GZIP streams have some extra header data for file attributes. + GZIP = 1, + + // Simpler zlib stream format. + ZLIB = 2, + }; + + // buffer_size and format may be -1 for default of 64kB and GZIP format + explicit GzipInputStream(ZeroCopyInputStream* sub_stream, + Format format = AUTO, int buffer_size = -1); + virtual ~GzipInputStream(); + + // Return last error message or NULL if no error. + inline const char* ZlibErrorMessage() const { return zcontext_.msg; } + inline int ZlibErrorCode() const { return zerror_; } + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + private: + Format format_; + + ZeroCopyInputStream* sub_stream_; + + z_stream zcontext_; + int zerror_; + + void* output_buffer_; + void* output_position_; + size_t output_buffer_length_; + int64_t byte_count_; + + int Inflate(int flush); + void DoNextOutput(const void** data, int* size); + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GzipInputStream); +}; + +class PROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream { + public: + // Format key for constructor + enum Format { + // GZIP streams have some extra header data for file attributes. + GZIP = 1, + + // Simpler zlib stream format. + ZLIB = 2, + }; + + struct PROTOBUF_EXPORT Options { + // Defaults to GZIP. + Format format; + + // What size buffer to use internally. Defaults to 64kB. + int buffer_size; + + // A number between 0 and 9, where 0 is no compression and 9 is best + // compression. Defaults to Z_DEFAULT_COMPRESSION (see zlib.h). + int compression_level; + + // Defaults to Z_DEFAULT_STRATEGY. Can also be set to Z_FILTERED, + // Z_HUFFMAN_ONLY, or Z_RLE. See the documentation for deflateInit2 in + // zlib.h for definitions of these constants. + int compression_strategy; + + Options(); // Initializes with default values. + }; + + // Create a GzipOutputStream with default options. + explicit GzipOutputStream(ZeroCopyOutputStream* sub_stream); + + // Create a GzipOutputStream with the given options. + GzipOutputStream(ZeroCopyOutputStream* sub_stream, const Options& options); + + virtual ~GzipOutputStream(); + + // Return last error message or NULL if no error. + inline const char* ZlibErrorMessage() const { return zcontext_.msg; } + inline int ZlibErrorCode() const { return zerror_; } + + // Flushes data written so far to zipped data in the underlying stream. + // It is the caller's responsibility to flush the underlying stream if + // necessary. + // Compression may be less efficient stopping and starting around flushes. + // Returns true if no error. + // + // Please ensure that block size is > 6. Here is an excerpt from the zlib + // doc that explains why: + // + // In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that avail_out + // is greater than six to avoid repeated flush markers due to + // avail_out == 0 on return. + bool Flush(); + + // Writes out all data and closes the gzip stream. + // It is the caller's responsibility to close the underlying stream if + // necessary. + // Returns true if no error. + bool Close(); + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + + private: + ZeroCopyOutputStream* sub_stream_; + // Result from calling Next() on sub_stream_ + void* sub_data_; + int sub_data_size_; + + z_stream zcontext_; + int zerror_; + void* input_buffer_; + size_t input_buffer_length_; + + // Shared constructor code. + void Init(ZeroCopyOutputStream* sub_stream, const Options& options); + + // Do some compression. + // Takes zlib flush mode. + // Returns zlib error code. + int Deflate(int flush); + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GzipOutputStream); +}; + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__ diff --git a/NorthstarDedicatedTest/include/protobuf/io/gzip_stream_unittest.sh b/NorthstarDedicatedTest/include/protobuf/io/gzip_stream_unittest.sh new file mode 100644 index 00000000..16251a94 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/gzip_stream_unittest.sh @@ -0,0 +1,44 @@ +#!/bin/sh -x +# +# Protocol Buffers - Google's data interchange format +# Copyright 2009 Google Inc. All rights reserved. +# https://developers.google.com/protocol-buffers/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Author: brianolson@google.com (Brian Olson) +# +# Test compatibility between command line gzip/gunzip binaries and +# ZeroCopyStream versions. + +TESTFILE=Makefile + +(./zcgzip < ${TESTFILE} | gunzip | cmp - ${TESTFILE}) && \ +(gzip < ${TESTFILE} | ./zcgunzip | cmp - ${TESTFILE}) + +# Result of "(cmd) && (cmd)" implicitly becomes result of this script +# and thus the test. diff --git a/NorthstarDedicatedTest/include/protobuf/io/io_win32.cc b/NorthstarDedicatedTest/include/protobuf/io/io_win32.cc new file mode 100644 index 00000000..9edceed3 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/io_win32.cc @@ -0,0 +1,470 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: laszlocsomor@google.com (Laszlo Csomor) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +// Implementation for long-path-aware open/mkdir/access/etc. on Windows, as well +// as for the supporting utility functions. +// +// These functions convert the input path to an absolute Windows path +// with "\\?\" prefix, then pass that to _wopen/_wmkdir/_waccess/etc. +// (declared in <io.h>) respectively. This allows working with files/directories +// whose paths are longer than MAX_PATH (260 chars). +// +// This file is only used on Windows, it's empty on other platforms. + +#if defined(_WIN32) && !defined(_XBOX_ONE) + +// Comment this out to fall back to using the ANSI versions (open, mkdir, ...) +// instead of the Unicode ones (_wopen, _wmkdir, ...). Doing so can be useful to +// debug failing tests if that's caused by the long path support. +#define SUPPORT_LONGPATHS + +#include <io/io_win32.h> + +#include <ctype.h> +#include <direct.h> +#include <errno.h> +#include <fcntl.h> +#include <io.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <wctype.h> + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif + +#include <windows.h> + +#include <memory> +#include <sstream> +#include <string> +#include <vector> + +namespace google { +namespace protobuf { +namespace io { +namespace win32 { +namespace { + +using std::string; +using std::wstring; + +template <typename char_type> +struct CharTraits { + static bool is_alpha(char_type ch); +}; + +template <> +struct CharTraits<char> { + static bool is_alpha(char ch) { return isalpha(ch); } +}; + +template <> +struct CharTraits<wchar_t> { + static bool is_alpha(wchar_t ch) { return iswalpha(ch); } +}; + +template <typename char_type> +bool null_or_empty(const char_type* s) { + return s == nullptr || *s == 0; +} + +// Returns true if the path starts with a drive letter, e.g. "c:". +// Note that this won't check for the "\" after the drive letter, so this also +// returns true for "c:foo" (which is "c:\${PWD}\foo"). +// This check requires that a path not have a longpath prefix ("\\?\"). +template <typename char_type> +bool has_drive_letter(const char_type* ch) { + return CharTraits<char_type>::is_alpha(ch[0]) && ch[1] == ':'; +} + +// Returns true if the path starts with a longpath prefix ("\\?\"). +template <typename char_type> +bool has_longpath_prefix(const char_type* path) { + return path[0] == '\\' && path[1] == '\\' && path[2] == '?' && + path[3] == '\\'; +} + +template <typename char_type> +bool is_separator(char_type c) { + return c == '/' || c == '\\'; +} + +// Returns true if the path starts with a drive specifier (e.g. "c:\"). +template <typename char_type> +bool is_path_absolute(const char_type* path) { + return has_drive_letter(path) && is_separator(path[2]); +} + +template <typename char_type> +bool is_drive_relative(const char_type* path) { + return has_drive_letter(path) && (path[2] == 0 || !is_separator(path[2])); +} + +wstring join_paths(const wstring& path1, const wstring& path2) { + if (path1.empty() || is_path_absolute(path2.c_str()) || + has_longpath_prefix(path2.c_str())) { + return path2; + } + if (path2.empty()) { + return path1; + } + + if (is_separator(path1[path1.size() - 1])) { + return is_separator(path2[0]) ? (path1 + path2.substr(1)) + : (path1 + path2); + } else { + return is_separator(path2[0]) ? (path1 + path2) + : (path1 + L'\\' + path2); + } +} + +wstring normalize(wstring path) { + if (has_longpath_prefix(path.c_str())) { + path = path.substr(4); + } + + static const wstring dot(L"."); + static const wstring dotdot(L".."); + const WCHAR* p = path.c_str(); + + std::vector<wstring> segments; + int segment_start = -1; + // Find the path segments in `path` (separated by "/"). + for (int i = 0;; ++i) { + if (!is_separator(p[i]) && p[i] != L'\0') { + // The current character does not end a segment, so start one unless it's + // already started. + if (segment_start < 0) { + segment_start = i; + } + } else if (segment_start >= 0 && i > segment_start) { + // The current character is "/" or "\0", so this ends a segment. + // Add that to `segments` if there's anything to add; handle "." and "..". + wstring segment(p, segment_start, i - segment_start); + segment_start = -1; + if (segment == dotdot) { + if (!segments.empty() && + (!has_drive_letter(segments[0].c_str()) || segments.size() > 1)) { + segments.pop_back(); + } + } else if (segment != dot && !segment.empty()) { + segments.push_back(segment); + } + } + if (p[i] == L'\0') { + break; + } + } + + // Handle the case when `path` is just a drive specifier (or some degenerate + // form of it, e.g. "c:\.."). + if (segments.size() == 1 && segments[0].size() == 2 && + has_drive_letter(segments[0].c_str())) { + return segments[0] + L'\\'; + } + + // Join all segments. + bool first = true; + std::wstringstream result; + for (int i = 0; i < segments.size(); ++i) { + if (!first) { + result << L'\\'; + } + first = false; + result << segments[i]; + } + // Preserve trailing separator if the input contained it. + if (!path.empty() && is_separator(p[path.size() - 1])) { + result << L'\\'; + } + return result.str(); +} + +bool as_windows_path(const char* path, wstring* result) { + if (null_or_empty(path)) { + result->clear(); + return true; + } + wstring wpath; + if (!strings::utf8_to_wcs(path, &wpath)) { + return false; + } + if (has_longpath_prefix(wpath.c_str())) { + *result = wpath; + return true; + } + if (is_separator(path[0]) || is_drive_relative(path)) { + return false; + } + + + if (!is_path_absolute(wpath.c_str())) { + int size = ::GetCurrentDirectoryW(0, nullptr); + if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return false; + } + std::unique_ptr<WCHAR[]> wcwd(new WCHAR[size]); + ::GetCurrentDirectoryW(size, wcwd.get()); + wpath = join_paths(wcwd.get(), wpath); + } + wpath = normalize(wpath); + if (!has_longpath_prefix(wpath.c_str())) { + // Add the "\\?\" prefix unconditionally. This way we prevent the Win32 API + // from processing the path and "helpfully" removing trailing dots from the + // path, for example. + // See https://github.com/bazelbuild/bazel/issues/2935 + wpath = wstring(L"\\\\?\\") + wpath; + } + *result = wpath; + return true; +} + +} // namespace + +int open(const char* path, int flags, int mode) { +#ifdef SUPPORT_LONGPATHS + wstring wpath; + if (!as_windows_path(path, &wpath)) { + errno = ENOENT; + return -1; + } + return ::_wopen(wpath.c_str(), flags, mode); +#else + return ::_open(path, flags, mode); +#endif +} + +int mkdir(const char* path, int /*_mode*/) { +#ifdef SUPPORT_LONGPATHS + wstring wpath; + if (!as_windows_path(path, &wpath)) { + errno = ENOENT; + return -1; + } + return ::_wmkdir(wpath.c_str()); +#else // not SUPPORT_LONGPATHS + return ::_mkdir(path); +#endif // not SUPPORT_LONGPATHS +} + +int access(const char* path, int mode) { +#ifdef SUPPORT_LONGPATHS + wstring wpath; + if (!as_windows_path(path, &wpath)) { + errno = ENOENT; + return -1; + } + return ::_waccess(wpath.c_str(), mode); +#else + return ::_access(path, mode); +#endif +} + +int chdir(const char* path) { +#ifdef SUPPORT_LONGPATHS + wstring wpath; + if (!as_windows_path(path, &wpath)) { + errno = ENOENT; + return -1; + } + return ::_wchdir(wpath.c_str()); +#else + return ::_chdir(path); +#endif +} + +int stat(const char* path, struct _stat* buffer) { +#ifdef SUPPORT_LONGPATHS + wstring wpath; + if (!as_windows_path(path, &wpath)) { + errno = ENOENT; + return -1; + } + return ::_wstat(wpath.c_str(), buffer); +#else // not SUPPORT_LONGPATHS + return ::_stat(path, buffer); +#endif // not SUPPORT_LONGPATHS +} + +FILE* fopen(const char* path, const char* mode) { +#ifdef SUPPORT_LONGPATHS + if (null_or_empty(path)) { + errno = EINVAL; + return nullptr; + } + wstring wpath; + if (!as_windows_path(path, &wpath)) { + errno = ENOENT; + return nullptr; + } + wstring wmode; + if (!strings::utf8_to_wcs(mode, &wmode)) { + errno = EINVAL; + return nullptr; + } + return ::_wfopen(wpath.c_str(), wmode.c_str()); +#else + return ::fopen(path, mode); +#endif +} + +int close(int fd) { return ::_close(fd); } + +int dup(int fd) { return ::_dup(fd); } + +int dup2(int fd1, int fd2) { return ::_dup2(fd1, fd2); } + +int read(int fd, void* buffer, size_t size) { + return ::_read(fd, buffer, size); +} + +int setmode(int fd, int mode) { return ::_setmode(fd, mode); } + +int write(int fd, const void* buffer, size_t size) { + return ::_write(fd, buffer, size); +} + +wstring testonly_utf8_to_winpath(const char* path) { + wstring wpath; + return as_windows_path(path, &wpath) ? wpath : wstring(); +} + +ExpandWildcardsResult ExpandWildcards( + const string& path, std::function<void(const string&)> consume) { + if (path.find_first_of("*?") == string::npos) { + // There are no wildcards in the path, we don't need to expand it. + consume(path); + return ExpandWildcardsResult::kSuccess; + } + + wstring wpath; + if (!as_windows_path(path.c_str(), &wpath)) { + return ExpandWildcardsResult::kErrorInputPathConversion; + } + + static const wstring kDot = L"."; + static const wstring kDotDot = L".."; + WIN32_FIND_DATAW metadata; + HANDLE handle = ::FindFirstFileW(wpath.c_str(), &metadata); + if (handle == INVALID_HANDLE_VALUE) { + // The pattern does not match any files (or directories). + return ExpandWildcardsResult::kErrorNoMatchingFile; + } + + string::size_type pos = path.find_last_of("\\/"); + string dirname; + if (pos != string::npos) { + dirname = path.substr(0, pos + 1); + } + + ExpandWildcardsResult matched = ExpandWildcardsResult::kErrorNoMatchingFile; + do { + // Ignore ".", "..", and directories. + if ((metadata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0 && + kDot != metadata.cFileName && kDotDot != metadata.cFileName) { + matched = ExpandWildcardsResult::kSuccess; + string filename; + if (!strings::wcs_to_utf8(metadata.cFileName, &filename)) { + return ExpandWildcardsResult::kErrorOutputPathConversion; + } + + if (dirname.empty()) { + consume(filename); + } else { + consume(dirname + filename); + } + } + } while (::FindNextFileW(handle, &metadata)); + FindClose(handle); + return matched; +} + +namespace strings { + +bool wcs_to_mbs(const WCHAR* s, string* out, bool outUtf8) { + if (null_or_empty(s)) { + out->clear(); + return true; + } + BOOL usedDefaultChar = FALSE; + SetLastError(0); + int size = WideCharToMultiByte( + outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, nullptr, 0, nullptr, + outUtf8 ? nullptr : &usedDefaultChar); + if ((size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) + || usedDefaultChar) { + return false; + } + std::unique_ptr<CHAR[]> astr(new CHAR[size]); + WideCharToMultiByte( + outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, astr.get(), size, nullptr, nullptr); + out->assign(astr.get()); + return true; +} + +bool mbs_to_wcs(const char* s, wstring* out, bool inUtf8) { + if (null_or_empty(s)) { + out->clear(); + return true; + } + + SetLastError(0); + int size = + MultiByteToWideChar(inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, nullptr, 0); + if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return false; + } + std::unique_ptr<WCHAR[]> wstr(new WCHAR[size]); + MultiByteToWideChar( + inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, wstr.get(), size + 1); + out->assign(wstr.get()); + return true; +} + +bool utf8_to_wcs(const char* input, wstring* out) { + return mbs_to_wcs(input, out, true); +} + +bool wcs_to_utf8(const wchar_t* input, string* out) { + return wcs_to_mbs(input, out, true); +} + +} // namespace strings +} // namespace win32 +} // namespace io +} // namespace protobuf +} // namespace google + +#endif // defined(_WIN32) diff --git a/NorthstarDedicatedTest/include/protobuf/io/io_win32.h b/NorthstarDedicatedTest/include/protobuf/io/io_win32.h new file mode 100644 index 00000000..0423736a --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/io_win32.h @@ -0,0 +1,139 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: laszlocsomor@google.com (Laszlo Csomor) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +// This file contains the declarations for Windows implementations of +// commonly used POSIX functions such as open(2) and access(2), as well +// as macro definitions for flags of these functions. +// +// By including this file you'll redefine open/access/etc. to +// ::google::protobuf::io::win32::{open/access/etc.}. +// Make sure you don't include a header that attempts to redeclare or +// redefine these functions, that'll lead to confusing compilation +// errors. It's best to #include this file as the last one to ensure that. +// +// This file is only used on Windows, it's empty on other platforms. + +#ifndef GOOGLE_PROTOBUF_IO_IO_WIN32_H__ +#define GOOGLE_PROTOBUF_IO_IO_WIN32_H__ + +#if defined(_WIN32) + +#include <functional> +#include <string> + +#include <port.h> +#include <port_def.inc> + +// Compilers on Windows other than MSVC (e.g. Cygwin, MinGW32) define the +// following functions already, except for mkdir. +namespace google { +namespace protobuf { +namespace io { +namespace win32 { + +PROTOBUF_EXPORT FILE* fopen(const char* path, const char* mode); +PROTOBUF_EXPORT int access(const char* path, int mode); +PROTOBUF_EXPORT int chdir(const char* path); +PROTOBUF_EXPORT int close(int fd); +PROTOBUF_EXPORT int dup(int fd); +PROTOBUF_EXPORT int dup2(int fd1, int fd2); +PROTOBUF_EXPORT int mkdir(const char* path, int _mode); +PROTOBUF_EXPORT int open(const char* path, int flags, int mode = 0); +PROTOBUF_EXPORT int read(int fd, void* buffer, size_t size); +PROTOBUF_EXPORT int setmode(int fd, int mode); +PROTOBUF_EXPORT int stat(const char* path, struct _stat* buffer); +PROTOBUF_EXPORT int write(int fd, const void* buffer, size_t size); +PROTOBUF_EXPORT std::wstring testonly_utf8_to_winpath(const char* path); + +enum class ExpandWildcardsResult { + kSuccess = 0, + kErrorNoMatchingFile = 1, + kErrorInputPathConversion = 2, + kErrorOutputPathConversion = 3, +}; + +// Expand wildcards in a path pattern, feed the result to a consumer function. +// +// `path` must be a valid, Windows-style path. It may be absolute, or relative +// to the current working directory, and it may contain wildcards ("*" and "?") +// in the last path segment. This function passes all matching file names to +// `consume`. The resulting paths may not be absolute nor normalized. +// +// The function returns a value from `ExpandWildcardsResult`. +PROTOBUF_EXPORT ExpandWildcardsResult ExpandWildcards( + const std::string& path, std::function<void(const std::string&)> consume); + +namespace strings { + +// Convert from UTF-16 to Active-Code-Page-encoded or to UTF-8-encoded text. +PROTOBUF_EXPORT bool wcs_to_mbs(const wchar_t* s, std::string* out, + bool outUtf8); + +// Convert from Active-Code-Page-encoded or UTF-8-encoded text to UTF-16. +PROTOBUF_EXPORT bool mbs_to_wcs(const char* s, std::wstring* out, bool inUtf8); + +// Convert from UTF-8-encoded text to UTF-16. +PROTOBUF_EXPORT bool utf8_to_wcs(const char* input, std::wstring* out); + +// Convert from UTF-16-encoded text to UTF-8. +PROTOBUF_EXPORT bool wcs_to_utf8(const wchar_t* input, std::string* out); + +} // namespace strings + +} // namespace win32 +} // namespace io +} // namespace protobuf +} // namespace google + +#ifndef W_OK +#define W_OK 02 // not defined by MSVC for whatever reason +#endif + +#ifndef F_OK +#define F_OK 00 // not defined by MSVC for whatever reason +#endif + +#ifndef STDIN_FILENO +#define STDIN_FILENO 0 +#endif + +#ifndef STDOUT_FILENO +#define STDOUT_FILENO 1 +#endif + +#include <port_undef.inc> + +#endif // defined(_WIN32) + +#endif // GOOGLE_PROTOBUF_IO_IO_WIN32_H__ diff --git a/NorthstarDedicatedTest/include/protobuf/io/io_win32_unittest.cc b/NorthstarDedicatedTest/include/protobuf/io/io_win32_unittest.cc new file mode 100644 index 00000000..8fd4667d --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/io_win32_unittest.cc @@ -0,0 +1,631 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: laszlocsomor@google.com (Laszlo Csomor) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +// Unit tests for long-path-aware open/mkdir/access/etc. on Windows, as well as +// for the supporting utility functions. +// +// This file is only used on Windows, it's empty on other platforms. + +#if defined(_WIN32) + +#define WIN32_LEAN_AND_MEAN +#include <io/io_win32.h> + +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <wchar.h> +#include <windows.h> + +#include <memory> +#include <sstream> +#include <string> +#include <vector> + +#include <gtest/gtest.h> + +namespace google { +namespace protobuf { +namespace io { +namespace win32 { +namespace { + +const char kUtf8Text[] = { + 'h', 'i', ' ', + // utf-8: 11010000 10011111, utf-16: 100 0001 1111 = 0x041F + static_cast<char>(0xd0), static_cast<char>(0x9f), + // utf-8: 11010001 10000000, utf-16: 100 0100 0000 = 0x0440 + static_cast<char>(0xd1), static_cast<char>(0x80), + // utf-8: 11010000 10111000, utf-16: 100 0011 1000 = 0x0438 + static_cast<char>(0xd0), static_cast<char>(0xb8), + // utf-8: 11010000 10110010, utf-16: 100 0011 0010 = 0x0432 + static_cast<char>(0xd0), static_cast<char>(0xb2), + // utf-8: 11010000 10110101, utf-16: 100 0011 0101 = 0x0435 + static_cast<char>(0xd0), static_cast<char>(0xb5), + // utf-8: 11010001 10000010, utf-16: 100 0100 0010 = 0x0442 + static_cast<char>(0xd1), static_cast<char>(0x82), 0 +}; + +const wchar_t kUtf16Text[] = { + L'h', L'i', L' ', + L'\x41f', L'\x440', L'\x438', L'\x432', L'\x435', L'\x442', 0 +}; + +using std::string; +using std::vector; +using std::wstring; + +class IoWin32Test : public ::testing::Test { + public: + void SetUp(); + void TearDown(); + + protected: + bool CreateAllUnder(wstring path); + bool DeleteAllUnder(wstring path); + + WCHAR working_directory[MAX_PATH]; + string test_tmpdir; + wstring wtest_tmpdir; +}; + +#define ASSERT_INITIALIZED \ + { \ + EXPECT_FALSE(test_tmpdir.empty()); \ + EXPECT_FALSE(wtest_tmpdir.empty()); \ + } + +namespace { +void StripTrailingSlashes(string* str) { + int i = str->size() - 1; + for (; i >= 0 && ((*str)[i] == '/' || (*str)[i] == '\\'); --i) {} + str->resize(i+1); +} + +bool GetEnvVarAsUtf8(const WCHAR* name, string* result) { + DWORD size = ::GetEnvironmentVariableW(name, nullptr, 0); + if (size > 0 && GetLastError() != ERROR_ENVVAR_NOT_FOUND) { + std::unique_ptr<WCHAR[]> wcs(new WCHAR[size]); + ::GetEnvironmentVariableW(name, wcs.get(), size); + // GetEnvironmentVariableA retrieves an Active-Code-Page-encoded text which + // we'd first need to convert to UTF-16 then to UTF-8, because there seems + // to be no API function to do that conversion directly. + // GetEnvironmentVariableW retrieves an UTF-16-encoded text, which we need + // to convert to UTF-8. + return strings::wcs_to_utf8(wcs.get(), result); + } else { + return false; + } +} + +bool GetCwdAsUtf8(string* result) { + DWORD size = ::GetCurrentDirectoryW(0, nullptr); + if (size > 0) { + std::unique_ptr<WCHAR[]> wcs(new WCHAR[size]); + ::GetCurrentDirectoryW(size, wcs.get()); + // GetCurrentDirectoryA retrieves an Active-Code-Page-encoded text which + // we'd first need to convert to UTF-16 then to UTF-8, because there seems + // to be no API function to do that conversion directly. + // GetCurrentDirectoryW retrieves an UTF-16-encoded text, which we need + // to convert to UTF-8. + return strings::wcs_to_utf8(wcs.get(), result); + } else { + return false; + } +} + +bool CreateEmptyFile(const wstring& path) { + HANDLE h = CreateFileW(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); + if (h == INVALID_HANDLE_VALUE) { + return false; + } + CloseHandle(h); + return true; +} + +} // namespace + +void IoWin32Test::SetUp() { + test_tmpdir.clear(); + wtest_tmpdir.clear(); + DWORD size = ::GetCurrentDirectoryW(MAX_PATH, working_directory); + EXPECT_GT(size, 0); + EXPECT_LT(size, MAX_PATH); + + string tmp; + bool ok = false; + if (!ok) { + // Bazel sets this environment variable when it runs tests. + ok = GetEnvVarAsUtf8(L"TEST_TMPDIR", &tmp); + } + if (!ok) { + // Bazel 0.8.0 sets this environment for every build and test action. + ok = GetEnvVarAsUtf8(L"TEMP", &tmp); + } + if (!ok) { + // Bazel 0.8.0 sets this environment for every build and test action. + ok = GetEnvVarAsUtf8(L"TMP", &tmp); + } + if (!ok) { + // Fall back to using the current directory. + ok = GetCwdAsUtf8(&tmp); + } + if (!ok || tmp.empty()) { + FAIL() << "Cannot find a temp directory."; + } + + StripTrailingSlashes(&tmp); + std::stringstream result; + // Deleting files and directories is asynchronous on Windows, and if TearDown + // just deleted the previous temp directory, sometimes we cannot recreate the + // same directory. + // Use a counter so every test method gets its own temp directory. + static unsigned int counter = 0; + result << tmp << "\\w32tst" << counter++ << ".tmp"; + test_tmpdir = result.str(); + wtest_tmpdir = testonly_utf8_to_winpath(test_tmpdir.c_str()); + ASSERT_FALSE(wtest_tmpdir.empty()); + ASSERT_TRUE(DeleteAllUnder(wtest_tmpdir)); + ASSERT_TRUE(CreateAllUnder(wtest_tmpdir)); +} + +void IoWin32Test::TearDown() { + if (!wtest_tmpdir.empty()) { + DeleteAllUnder(wtest_tmpdir); + } + ::SetCurrentDirectoryW(working_directory); +} + +bool IoWin32Test::CreateAllUnder(wstring path) { + // Prepend UNC prefix if the path doesn't have it already. Don't bother + // checking if the path is shorter than MAX_PATH, let's just do it + // unconditionally. + if (path.find(L"\\\\?\\") != 0) { + path = wstring(L"\\\\?\\") + path; + } + if (::CreateDirectoryW(path.c_str(), nullptr) || + GetLastError() == ERROR_ALREADY_EXISTS || + GetLastError() == ERROR_ACCESS_DENIED) { + return true; + } + if (GetLastError() == ERROR_PATH_NOT_FOUND) { + size_t pos = path.find_last_of(L'\\'); + if (pos != wstring::npos) { + wstring parent(path, 0, pos); + if (CreateAllUnder(parent) && CreateDirectoryW(path.c_str(), nullptr)) { + return true; + } + } + } + return false; +} + +bool IoWin32Test::DeleteAllUnder(wstring path) { + static const wstring kDot(L"."); + static const wstring kDotDot(L".."); + + // Prepend UNC prefix if the path doesn't have it already. Don't bother + // checking if the path is shorter than MAX_PATH, let's just do it + // unconditionally. + if (path.find(L"\\\\?\\") != 0) { + path = wstring(L"\\\\?\\") + path; + } + // Append "\" if necessary. + if (path[path.size() - 1] != L'\\') { + path.push_back(L'\\'); + } + + WIN32_FIND_DATAW metadata; + HANDLE handle = ::FindFirstFileW((path + L"*").c_str(), &metadata); + if (handle == INVALID_HANDLE_VALUE) { + return true; // directory doesn't exist + } + + bool result = true; + do { + wstring childname = metadata.cFileName; + if (kDot != childname && kDotDot != childname) { + wstring childpath = path + childname; + if ((metadata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) { + // If this is not a junction, delete its contents recursively. + // Finally delete this directory/junction too. + if (((metadata.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) == 0 && + !DeleteAllUnder(childpath)) || + !::RemoveDirectoryW(childpath.c_str())) { + result = false; + break; + } + } else { + if (!::DeleteFileW(childpath.c_str())) { + result = false; + break; + } + } + } + } while (::FindNextFileW(handle, &metadata)); + ::FindClose(handle); + return result; +} + +TEST_F(IoWin32Test, AccessTest) { + ASSERT_INITIALIZED; + + string path = test_tmpdir; + while (path.size() < MAX_PATH - 30) { + path += "\\accesstest"; + EXPECT_EQ(mkdir(path.c_str(), 0644), 0); + } + string file = path + "\\file.txt"; + int fd = open(file.c_str(), O_CREAT | O_WRONLY, 0644); + if (fd > 0) { + EXPECT_EQ(close(fd), 0); + } else { + EXPECT_TRUE(false); + } + + EXPECT_EQ(access(test_tmpdir.c_str(), F_OK), 0); + EXPECT_EQ(access(path.c_str(), F_OK), 0); + EXPECT_EQ(access(path.c_str(), W_OK), 0); + EXPECT_EQ(access(file.c_str(), F_OK | W_OK), 0); + EXPECT_NE(access((file + ".blah").c_str(), F_OK), 0); + EXPECT_NE(access((file + ".blah").c_str(), W_OK), 0); + + EXPECT_EQ(access(".", F_OK), 0); + EXPECT_EQ(access(".", W_OK), 0); + EXPECT_EQ(access((test_tmpdir + "/accesstest").c_str(), F_OK | W_OK), 0); + ASSERT_EQ(access((test_tmpdir + "/./normalize_me/.././accesstest").c_str(), + F_OK | W_OK), + 0); + EXPECT_NE(access("io_win32_unittest.AccessTest.nonexistent", F_OK), 0); + EXPECT_NE(access("io_win32_unittest.AccessTest.nonexistent", W_OK), 0); + + ASSERT_EQ(access("c:bad", F_OK), -1); + ASSERT_EQ(errno, ENOENT); + ASSERT_EQ(access("/tmp/bad", F_OK), -1); + ASSERT_EQ(errno, ENOENT); + ASSERT_EQ(access("\\bad", F_OK), -1); + ASSERT_EQ(errno, ENOENT); +} + +TEST_F(IoWin32Test, OpenTest) { + ASSERT_INITIALIZED; + + string path = test_tmpdir; + while (path.size() < MAX_PATH) { + path += "\\opentest"; + EXPECT_EQ(mkdir(path.c_str(), 0644), 0); + } + string file = path + "\\file.txt"; + int fd = open(file.c_str(), O_CREAT | O_WRONLY, 0644); + if (fd > 0) { + EXPECT_EQ(write(fd, "hello", 5), 5); + EXPECT_EQ(close(fd), 0); + } else { + EXPECT_TRUE(false); + } + + ASSERT_EQ(open("c:bad.txt", O_CREAT | O_WRONLY, 0644), -1); + ASSERT_EQ(errno, ENOENT); + ASSERT_EQ(open("/tmp/bad.txt", O_CREAT | O_WRONLY, 0644), -1); + ASSERT_EQ(errno, ENOENT); + ASSERT_EQ(open("\\bad.txt", O_CREAT | O_WRONLY, 0644), -1); + ASSERT_EQ(errno, ENOENT); +} + +TEST_F(IoWin32Test, MkdirTest) { + ASSERT_INITIALIZED; + + string path = test_tmpdir; + do { + path += "\\mkdirtest"; + ASSERT_EQ(mkdir(path.c_str(), 0644), 0); + } while (path.size() <= MAX_PATH); + + ASSERT_EQ(mkdir("c:bad", 0644), -1); + ASSERT_EQ(errno, ENOENT); + ASSERT_EQ(mkdir("/tmp/bad", 0644), -1); + ASSERT_EQ(errno, ENOENT); + ASSERT_EQ(mkdir("\\bad", 0644), -1); + ASSERT_EQ(errno, ENOENT); +} + +TEST_F(IoWin32Test, MkdirTestNonAscii) { + ASSERT_INITIALIZED; + + // Create a non-ASCII path. + // Ensure that we can create the directory using CreateDirectoryW. + EXPECT_TRUE(CreateDirectoryW((wtest_tmpdir + L"\\1").c_str(), nullptr)); + EXPECT_TRUE(CreateDirectoryW((wtest_tmpdir + L"\\1\\" + kUtf16Text).c_str(), nullptr)); + // Ensure that we can create a very similarly named directory using mkdir. + // We don't attempt to delete and recreate the same directory, because on + // Windows, deleting files and directories seems to be asynchronous. + EXPECT_EQ(mkdir((test_tmpdir + "\\2").c_str(), 0644), 0); + EXPECT_EQ(mkdir((test_tmpdir + "\\2\\" + kUtf8Text).c_str(), 0644), 0); +} + +TEST_F(IoWin32Test, ChdirTest) { + string path("C:\\"); + EXPECT_EQ(access(path.c_str(), F_OK), 0); + ASSERT_EQ(chdir(path.c_str()), 0); + + // Do not try to chdir into the test_tmpdir, it may already contain directory + // names with trailing dots. + // Instead test here with an obviously dot-trailed path. If the win32_chdir + // function would not convert the path to absolute and prefix with "\\?\" then + // the Win32 API would ignore the trailing dot, but because of the prefixing + // there'll be no path processing done, so we'll actually attempt to chdir + // into "C:\some\path\foo." + path = test_tmpdir + "/foo."; + EXPECT_EQ(mkdir(path.c_str(), 644), 0); + EXPECT_EQ(access(path.c_str(), F_OK), 0); + ASSERT_NE(chdir(path.c_str()), 0); +} + +TEST_F(IoWin32Test, ChdirTestNonAscii) { + ASSERT_INITIALIZED; + + // Create a directory with a non-ASCII path and ensure we can cd into it. + wstring wNonAscii(wtest_tmpdir + L"\\" + kUtf16Text); + string nonAscii; + EXPECT_TRUE(strings::wcs_to_utf8(wNonAscii.c_str(), &nonAscii)); + EXPECT_TRUE(CreateDirectoryW(wNonAscii.c_str(), nullptr)); + WCHAR cwd[MAX_PATH]; + EXPECT_TRUE(GetCurrentDirectoryW(MAX_PATH, cwd)); + // Ensure that we can cd into the path using SetCurrentDirectoryW. + EXPECT_TRUE(SetCurrentDirectoryW(wNonAscii.c_str())); + EXPECT_TRUE(SetCurrentDirectoryW(cwd)); + // Ensure that we can cd into the path using chdir. + ASSERT_EQ(chdir(nonAscii.c_str()), 0); + // Ensure that the GetCurrentDirectoryW returns the desired path. + EXPECT_TRUE(GetCurrentDirectoryW(MAX_PATH, cwd)); + ASSERT_EQ(wNonAscii, cwd); +} + +TEST_F(IoWin32Test, ExpandWildcardsInRelativePathTest) { + wstring wNonAscii(wtest_tmpdir + L"\\" + kUtf16Text); + EXPECT_TRUE(CreateDirectoryW(wNonAscii.c_str(), nullptr)); + // Create mock files we will test pattern matching on. + EXPECT_TRUE(CreateEmptyFile(wNonAscii + L"\\foo_a.proto")); + EXPECT_TRUE(CreateEmptyFile(wNonAscii + L"\\foo_b.proto")); + EXPECT_TRUE(CreateEmptyFile(wNonAscii + L"\\bar.proto")); + // `cd` into `wtest_tmpdir`. + EXPECT_TRUE(SetCurrentDirectoryW(wtest_tmpdir.c_str())); + + int found_a = 0; + int found_b = 0; + vector<string> found_bad; + // Assert matching a relative path pattern. Results should also be relative. + ExpandWildcardsResult result = + ExpandWildcards(string(kUtf8Text) + "\\foo*.proto", + [&found_a, &found_b, &found_bad](const string& p) { + if (p == string(kUtf8Text) + "\\foo_a.proto") { + found_a++; + } else if (p == string(kUtf8Text) + "\\foo_b.proto") { + found_b++; + } else { + found_bad.push_back(p); + } + }); + EXPECT_EQ(result, ExpandWildcardsResult::kSuccess); + EXPECT_EQ(found_a, 1); + EXPECT_EQ(found_b, 1); + if (!found_bad.empty()) { + FAIL() << found_bad[0]; + } + + // Assert matching the exact filename. + found_a = 0; + found_bad.clear(); + result = ExpandWildcards(string(kUtf8Text) + "\\foo_a.proto", + [&found_a, &found_bad](const string& p) { + if (p == string(kUtf8Text) + "\\foo_a.proto") { + found_a++; + } else { + found_bad.push_back(p); + } + }); + EXPECT_EQ(result, ExpandWildcardsResult::kSuccess); + EXPECT_EQ(found_a, 1); + if (!found_bad.empty()) { + FAIL() << found_bad[0]; + } +} + +TEST_F(IoWin32Test, ExpandWildcardsInAbsolutePathTest) { + wstring wNonAscii(wtest_tmpdir + L"\\" + kUtf16Text); + EXPECT_TRUE(CreateDirectoryW(wNonAscii.c_str(), nullptr)); + // Create mock files we will test pattern matching on. + EXPECT_TRUE(CreateEmptyFile(wNonAscii + L"\\foo_a.proto")); + EXPECT_TRUE(CreateEmptyFile(wNonAscii + L"\\foo_b.proto")); + EXPECT_TRUE(CreateEmptyFile(wNonAscii + L"\\bar.proto")); + + int found_a = 0; + int found_b = 0; + vector<string> found_bad; + // Assert matching an absolute path. The results should also use absolute + // path. + ExpandWildcardsResult result = + ExpandWildcards(string(test_tmpdir) + "\\" + kUtf8Text + "\\foo*.proto", + [this, &found_a, &found_b, &found_bad](const string& p) { + if (p == string(this->test_tmpdir) + "\\" + kUtf8Text + + "\\foo_a.proto") { + found_a++; + } else if (p == string(this->test_tmpdir) + "\\" + + kUtf8Text + "\\foo_b.proto") { + found_b++; + } else { + found_bad.push_back(p); + } + }); + EXPECT_EQ(result, ExpandWildcardsResult::kSuccess); + EXPECT_EQ(found_a, 1); + EXPECT_EQ(found_b, 1); + if (!found_bad.empty()) { + FAIL() << found_bad[0]; + } + + // Assert matching the exact filename. + found_a = 0; + found_bad.clear(); + result = + ExpandWildcards(string(test_tmpdir) + "\\" + kUtf8Text + "\\foo_a.proto", + [this, &found_a, &found_bad](const string& p) { + if (p == string(this->test_tmpdir) + "\\" + kUtf8Text + + "\\foo_a.proto") { + found_a++; + } else { + found_bad.push_back(p); + } + }); + EXPECT_EQ(result, ExpandWildcardsResult::kSuccess); + EXPECT_EQ(found_a, 1); + if (!found_bad.empty()) { + FAIL() << found_bad[0]; + } +} + +TEST_F(IoWin32Test, ExpandWildcardsIgnoresDirectoriesTest) { + wstring wNonAscii(wtest_tmpdir + L"\\" + kUtf16Text); + EXPECT_TRUE(CreateDirectoryW(wNonAscii.c_str(), nullptr)); + // Create mock files we will test pattern matching on. + EXPECT_TRUE(CreateEmptyFile(wNonAscii + L"\\foo_a.proto")); + EXPECT_TRUE( + CreateDirectoryW((wNonAscii + L"\\foo_b.proto").c_str(), nullptr)); + EXPECT_TRUE(CreateEmptyFile(wNonAscii + L"\\foo_c.proto")); + // `cd` into `wtest_tmpdir`. + EXPECT_TRUE(SetCurrentDirectoryW(wtest_tmpdir.c_str())); + + int found_a = 0; + int found_c = 0; + vector<string> found_bad; + // Assert that the pattern matches exactly the expected files, and using the + // absolute path as did the input pattern. + ExpandWildcardsResult result = + ExpandWildcards(string(kUtf8Text) + "\\foo*.proto", + [&found_a, &found_c, &found_bad](const string& p) { + if (p == string(kUtf8Text) + "\\foo_a.proto") { + found_a++; + } else if (p == string(kUtf8Text) + "\\foo_c.proto") { + found_c++; + } else { + found_bad.push_back(p); + } + }); + EXPECT_EQ(result, ExpandWildcardsResult::kSuccess); + EXPECT_EQ(found_a, 1); + EXPECT_EQ(found_c, 1); + if (!found_bad.empty()) { + FAIL() << found_bad[0]; + } +} + +TEST_F(IoWin32Test, ExpandWildcardsFailsIfNoFileMatchesTest) { + wstring wNonAscii(wtest_tmpdir + L"\\" + kUtf16Text); + EXPECT_TRUE(CreateDirectoryW(wNonAscii.c_str(), nullptr)); + // Create mock files we will test pattern matching on. + EXPECT_TRUE(CreateEmptyFile(wNonAscii + L"\\foo_a.proto")); + // `cd` into `wtest_tmpdir`. + EXPECT_TRUE(SetCurrentDirectoryW(wtest_tmpdir.c_str())); + + // Control test: should match foo*.proto + ExpandWildcardsResult result = + ExpandWildcards(string(kUtf8Text) + "\\foo*.proto", [](const string&) {}); + EXPECT_EQ(result, ExpandWildcardsResult::kSuccess); + + // Control test: should match foo_a.proto + result = ExpandWildcards(string(kUtf8Text) + "\\foo_a.proto", + [](const string&) {}); + EXPECT_EQ(result, ExpandWildcardsResult::kSuccess); + + // Actual test: should not match anything. + result = + ExpandWildcards(string(kUtf8Text) + "\\bar*.proto", [](const string&) {}); + ASSERT_EQ(result, ExpandWildcardsResult::kErrorNoMatchingFile); +} + +TEST_F(IoWin32Test, AsWindowsPathTest) { + DWORD size = GetCurrentDirectoryW(0, nullptr); + std::unique_ptr<wchar_t[]> cwd_str(new wchar_t[size]); + EXPECT_GT(GetCurrentDirectoryW(size, cwd_str.get()), 0); + wstring cwd = wstring(L"\\\\?\\") + cwd_str.get(); + + ASSERT_EQ(testonly_utf8_to_winpath("relative_mkdirtest"), + cwd + L"\\relative_mkdirtest"); + ASSERT_EQ(testonly_utf8_to_winpath("preserve//\\trailing///"), + cwd + L"\\preserve\\trailing\\"); + ASSERT_EQ(testonly_utf8_to_winpath("./normalize_me\\/../blah"), + cwd + L"\\blah"); + std::ostringstream relpath; + for (wchar_t* p = cwd_str.get(); *p; ++p) { + if (*p == '/' || *p == '\\') { + relpath << "../"; + } + } + relpath << ".\\/../\\./beyond-toplevel"; + ASSERT_EQ(testonly_utf8_to_winpath(relpath.str().c_str()), + wstring(L"\\\\?\\") + cwd_str.get()[0] + L":\\beyond-toplevel"); + + // Absolute unix paths lack drive letters, driveless absolute windows paths + // do too. Neither can be converted to a drive-specifying absolute Windows + // path. + ASSERT_EQ(testonly_utf8_to_winpath("/absolute/unix/path"), L""); + // Though valid on Windows, we also don't support UNC paths (\\UNC\\blah). + ASSERT_EQ(testonly_utf8_to_winpath("\\driveless\\absolute"), L""); + // Though valid in cmd.exe, drive-relative paths are not supported. + ASSERT_EQ(testonly_utf8_to_winpath("c:foo"), L""); + ASSERT_EQ(testonly_utf8_to_winpath("c:/foo"), L"\\\\?\\c:\\foo"); + ASSERT_EQ(testonly_utf8_to_winpath("\\\\?\\C:\\foo"), L"\\\\?\\C:\\foo"); +} + +TEST_F(IoWin32Test, Utf8Utf16ConversionTest) { + string mbs; + wstring wcs; + ASSERT_TRUE(strings::utf8_to_wcs(kUtf8Text, &wcs)); + ASSERT_TRUE(strings::wcs_to_utf8(kUtf16Text, &mbs)); + ASSERT_EQ(wcs, kUtf16Text); + ASSERT_EQ(mbs, kUtf8Text); +} + +} // namespace +} // namespace win32 +} // namespace io +} // namespace protobuf +} // namespace google + +#endif // defined(_WIN32) + diff --git a/NorthstarDedicatedTest/include/protobuf/io/package_info.h b/NorthstarDedicatedTest/include/protobuf/io/package_info.h new file mode 100644 index 00000000..46c3dac9 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/package_info.h @@ -0,0 +1,53 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file exists solely to document the google::protobuf::io namespace. +// It is not compiled into anything, but it may be read by an automated +// documentation generator. + +namespace google { +namespace protobuf { + +// Auxiliary classes used for I/O. +// +// The Protocol Buffer library uses the classes in this package to deal with +// I/O and encoding/decoding raw bytes. Most users will not need to +// deal with this package. However, users who want to adapt the system to +// work with their own I/O abstractions -- e.g., to allow Protocol Buffers +// to be read from a different kind of input stream without the need for a +// temporary buffer -- should take a closer look. +namespace io {} + +} // namespace protobuf +} // namespace google diff --git a/NorthstarDedicatedTest/include/protobuf/io/printer.cc b/NorthstarDedicatedTest/include/protobuf/io/printer.cc new file mode 100644 index 00000000..1764ed9b --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/printer.cc @@ -0,0 +1,400 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#include <io/printer.h> + +#include <cctype> + +#include <stubs/logging.h> +#include <stubs/common.h> +#include <io/zero_copy_stream.h> + +namespace google { +namespace protobuf { +namespace io { + +Printer::Printer(ZeroCopyOutputStream* output, char variable_delimiter) + : variable_delimiter_(variable_delimiter), + output_(output), + buffer_(NULL), + buffer_size_(0), + offset_(0), + at_start_of_line_(true), + failed_(false), + annotation_collector_(NULL) {} + +Printer::Printer(ZeroCopyOutputStream* output, char variable_delimiter, + AnnotationCollector* annotation_collector) + : variable_delimiter_(variable_delimiter), + output_(output), + buffer_(NULL), + buffer_size_(0), + offset_(0), + at_start_of_line_(true), + failed_(false), + annotation_collector_(annotation_collector) {} + +Printer::~Printer() { + // Only BackUp() if we have called Next() at least once and never failed. + if (buffer_size_ > 0 && !failed_) { + output_->BackUp(buffer_size_); + } +} + +bool Printer::GetSubstitutionRange(const char* varname, + std::pair<size_t, size_t>* range) { + std::map<std::string, std::pair<size_t, size_t> >::const_iterator iter = + substitutions_.find(varname); + if (iter == substitutions_.end()) { + GOOGLE_LOG(DFATAL) << " Undefined variable in annotation: " << varname; + return false; + } + if (iter->second.first > iter->second.second) { + GOOGLE_LOG(DFATAL) << " Variable used for annotation used multiple times: " + << varname; + return false; + } + *range = iter->second; + return true; +} + +void Printer::Annotate(const char* begin_varname, const char* end_varname, + const std::string& file_path, + const std::vector<int>& path) { + if (annotation_collector_ == NULL) { + // Can't generate signatures with this Printer. + return; + } + std::pair<size_t, size_t> begin, end; + if (!GetSubstitutionRange(begin_varname, &begin) || + !GetSubstitutionRange(end_varname, &end)) { + return; + } + if (begin.first > end.second) { + GOOGLE_LOG(DFATAL) << " Annotation has negative length from " << begin_varname + << " to " << end_varname; + } else { + annotation_collector_->AddAnnotation(begin.first, end.second, file_path, + path); + } +} + +void Printer::Print(const std::map<std::string, std::string>& variables, + const char* text) { + int size = strlen(text); + int pos = 0; // The number of bytes we've written so far. + substitutions_.clear(); + line_start_variables_.clear(); + + for (int i = 0; i < size; i++) { + if (text[i] == '\n') { + // Saw newline. If there is more text, we may need to insert an indent + // here. So, write what we have so far, including the '\n'. + WriteRaw(text + pos, i - pos + 1); + pos = i + 1; + + // Setting this true will cause the next WriteRaw() to insert an indent + // first. + at_start_of_line_ = true; + line_start_variables_.clear(); + + } else if (text[i] == variable_delimiter_) { + // Saw the start of a variable name. + + // Write what we have so far. + WriteRaw(text + pos, i - pos); + pos = i + 1; + + // Find closing delimiter. + const char* end = strchr(text + pos, variable_delimiter_); + if (end == NULL) { + GOOGLE_LOG(DFATAL) << " Unclosed variable name."; + end = text + pos; + } + int endpos = end - text; + + std::string varname(text + pos, endpos - pos); + if (varname.empty()) { + // Two delimiters in a row reduce to a literal delimiter character. + WriteRaw(&variable_delimiter_, 1); + } else { + // Replace with the variable's value. + std::map<std::string, std::string>::const_iterator iter = + variables.find(varname); + if (iter == variables.end()) { + GOOGLE_LOG(DFATAL) << " Undefined variable: " << varname; + } else { + if (at_start_of_line_ && iter->second.empty()) { + line_start_variables_.push_back(varname); + } + WriteRaw(iter->second.data(), iter->second.size()); + std::pair<std::map<std::string, std::pair<size_t, size_t> >::iterator, + bool> + inserted = substitutions_.insert(std::make_pair( + varname, + std::make_pair(offset_ - iter->second.size(), offset_))); + if (!inserted.second) { + // This variable was used multiple times. Make its span have + // negative length so we can detect it if it gets used in an + // annotation. + inserted.first->second = std::make_pair(1, 0); + } + } + } + + // Advance past this variable. + i = endpos; + pos = endpos + 1; + } + } + + // Write the rest. + WriteRaw(text + pos, size - pos); +} + +void Printer::Indent() { indent_ += " "; } + +void Printer::Outdent() { + if (indent_.empty()) { + GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent()."; + return; + } + + indent_.resize(indent_.size() - 2); +} + +void Printer::PrintRaw(const std::string& data) { + WriteRaw(data.data(), data.size()); +} + +void Printer::PrintRaw(const char* data) { + if (failed_) return; + WriteRaw(data, strlen(data)); +} + +void Printer::WriteRaw(const char* data, int size) { + if (failed_) return; + if (size == 0) return; + + if (at_start_of_line_ && (size > 0) && (data[0] != '\n')) { + // Insert an indent. + at_start_of_line_ = false; + CopyToBuffer(indent_.data(), indent_.size()); + if (failed_) return; + // Fix up empty variables (e.g., "{") that should be annotated as + // coming after the indent. + for (std::vector<std::string>::iterator i = line_start_variables_.begin(); + i != line_start_variables_.end(); ++i) { + substitutions_[*i].first += indent_.size(); + substitutions_[*i].second += indent_.size(); + } + } + + // If we're going to write any data, clear line_start_variables_, since + // we've either updated them in the block above or they no longer refer to + // the current line. + line_start_variables_.clear(); + + CopyToBuffer(data, size); +} + +bool Printer::Next() { + do { + void* void_buffer; + if (!output_->Next(&void_buffer, &buffer_size_)) { + failed_ = true; + return false; + } + buffer_ = reinterpret_cast<char*>(void_buffer); + } while (buffer_size_ == 0); + return true; +} + +void Printer::CopyToBuffer(const char* data, int size) { + if (failed_) return; + if (size == 0) return; + + while (size > buffer_size_) { + // Data exceeds space in the buffer. Copy what we can and request a + // new buffer. + if (buffer_size_ > 0) { + memcpy(buffer_, data, buffer_size_); + offset_ += buffer_size_; + data += buffer_size_; + size -= buffer_size_; + } + void* void_buffer; + failed_ = !output_->Next(&void_buffer, &buffer_size_); + if (failed_) return; + buffer_ = reinterpret_cast<char*>(void_buffer); + } + + // Buffer is big enough to receive the data; copy it. + memcpy(buffer_, data, size); + buffer_ += size; + buffer_size_ -= size; + offset_ += size; +} + +void Printer::IndentIfAtStart() { + if (at_start_of_line_) { + CopyToBuffer(indent_.data(), indent_.size()); + at_start_of_line_ = false; + } +} + +void Printer::FormatInternal(const std::vector<std::string>& args, + const std::map<std::string, std::string>& vars, + const char* format) { + auto save = format; + int arg_index = 0; + std::vector<AnnotationCollector::Annotation> annotations; + while (*format) { + char c = *format++; + switch (c) { + case '$': + format = WriteVariable(args, vars, format, &arg_index, &annotations); + continue; + case '\n': + at_start_of_line_ = true; + line_start_variables_.clear(); + break; + default: + IndentIfAtStart(); + break; + } + push_back(c); + } + if (arg_index != static_cast<int>(args.size())) { + GOOGLE_LOG(FATAL) << " Unused arguments. " << save; + } + if (!annotations.empty()) { + GOOGLE_LOG(FATAL) << " Annotation range is not-closed, expect $}$. " << save; + } +} + +const char* Printer::WriteVariable( + const std::vector<std::string>& args, + const std::map<std::string, std::string>& vars, const char* format, + int* arg_index, std::vector<AnnotationCollector::Annotation>* annotations) { + auto start = format; + auto end = strchr(format, '$'); + if (!end) { + GOOGLE_LOG(FATAL) << " Unclosed variable name."; + } + format = end + 1; + if (end == start) { + // "$$" is an escape for just '$' + IndentIfAtStart(); + push_back('$'); + return format; + } + if (*start == '{') { + GOOGLE_CHECK(std::isdigit(start[1])); + GOOGLE_CHECK_EQ(end - start, 2); + int idx = start[1] - '1'; + if (idx < 0 || static_cast<size_t>(idx) >= args.size()) { + GOOGLE_LOG(FATAL) << "Annotation ${" << idx + 1 << "$ is out of bounds."; + } + if (idx > *arg_index) { + GOOGLE_LOG(FATAL) << "Annotation arg must be in correct order as given. Expected" + << " ${" << (*arg_index) + 1 << "$ got ${" << idx + 1 << "$."; + } else if (idx == *arg_index) { + (*arg_index)++; + } + IndentIfAtStart(); + annotations->push_back({{offset_, 0}, args[idx]}); + return format; + } else if (*start == '}') { + GOOGLE_CHECK(annotations); + if (annotations->empty()) { + GOOGLE_LOG(FATAL) << "Unexpected end of annotation found."; + } + auto& a = annotations->back(); + a.first.second = offset_; + if (annotation_collector_) annotation_collector_->AddAnnotationNew(a); + annotations->pop_back(); + return format; + } + auto start_var = start; + while (start_var < end && *start_var == ' ') start_var++; + if (start_var == end) { + GOOGLE_LOG(FATAL) << " Empty variable."; + } + auto end_var = end; + while (start_var < end_var && *(end_var - 1) == ' ') end_var--; + std::string var_name{ + start_var, static_cast<std::string::size_type>(end_var - start_var)}; + std::string sub; + if (std::isdigit(var_name[0])) { + GOOGLE_CHECK_EQ(var_name.size(), 1U); // No need for multi-digits + int idx = var_name[0] - '1'; // Start counting at 1 + GOOGLE_CHECK_GE(idx, 0); + if (static_cast<size_t>(idx) >= args.size()) { + GOOGLE_LOG(FATAL) << "Argument $" << idx + 1 << "$ is out of bounds."; + } + if (idx > *arg_index) { + GOOGLE_LOG(FATAL) << "Arguments must be used in same order as given. Expected $" + << (*arg_index) + 1 << "$ got $" << idx + 1 << "$."; + } else if (idx == *arg_index) { + (*arg_index)++; + } + sub = args[idx]; + } else { + auto it = vars.find(var_name); + if (it == vars.end()) { + GOOGLE_LOG(FATAL) << " Unknown variable: " << var_name << "."; + } + sub = it->second; + } + + // By returning here in case of empty we also skip possible spaces inside + // the $...$, i.e. "void$ dllexpor$ f();" -> "void f();" in the empty case. + if (sub.empty()) return format; + + // We're going to write something non-empty so we need a possible indent. + IndentIfAtStart(); + + // Write the possible spaces in front. + CopyToBuffer(start, start_var - start); + // Write a non-empty substituted variable. + CopyToBuffer(sub.c_str(), sub.size()); + // Finish off with writing possible trailing spaces. + CopyToBuffer(end_var, end - end_var); + return format; +} + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/NorthstarDedicatedTest/include/protobuf/io/printer.h b/NorthstarDedicatedTest/include/protobuf/io/printer.h new file mode 100644 index 00000000..15e19855 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/printer.h @@ -0,0 +1,385 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Utility class for writing text to a ZeroCopyOutputStream. + +#ifndef GOOGLE_PROTOBUF_IO_PRINTER_H__ +#define GOOGLE_PROTOBUF_IO_PRINTER_H__ + + +#include <map> +#include <string> +#include <vector> + +#include <stubs/common.h> +#include <port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +class ZeroCopyOutputStream; // zero_copy_stream.h + +// Records annotations about a Printer's output. +class PROTOBUF_EXPORT AnnotationCollector { + public: + // Annotation is a offset range and a payload pair. + typedef std::pair<std::pair<size_t, size_t>, std::string> Annotation; + + // Records that the bytes in file_path beginning with begin_offset and ending + // before end_offset are associated with the SourceCodeInfo-style path. + virtual void AddAnnotation(size_t begin_offset, size_t end_offset, + const std::string& file_path, + const std::vector<int>& path) = 0; + + // TODO(gerbens) I don't see why we need virtuals here. Just a vector of + // range, payload pairs stored in a context should suffice. + virtual void AddAnnotationNew(Annotation& /* a */) {} + + virtual ~AnnotationCollector() {} +}; + +// Records annotations about a Printer's output to the given protocol buffer, +// assuming that the buffer has an ::Annotation message exposing path, +// source_file, begin and end fields. +template <typename AnnotationProto> +class AnnotationProtoCollector : public AnnotationCollector { + public: + // annotation_proto is the protocol buffer to which new Annotations should be + // added. It is not owned by the AnnotationProtoCollector. + explicit AnnotationProtoCollector(AnnotationProto* annotation_proto) + : annotation_proto_(annotation_proto) {} + + // Override for AnnotationCollector::AddAnnotation. + void AddAnnotation(size_t begin_offset, size_t end_offset, + const std::string& file_path, + const std::vector<int>& path) override { + typename AnnotationProto::Annotation* annotation = + annotation_proto_->add_annotation(); + for (int i = 0; i < path.size(); ++i) { + annotation->add_path(path[i]); + } + annotation->set_source_file(file_path); + annotation->set_begin(begin_offset); + annotation->set_end(end_offset); + } + // Override for AnnotationCollector::AddAnnotation. + void AddAnnotationNew(Annotation& a) override { + auto* annotation = annotation_proto_->add_annotation(); + annotation->ParseFromString(a.second); + annotation->set_begin(a.first.first); + annotation->set_end(a.first.second); + } + + private: + // The protocol buffer to which new annotations should be added. + AnnotationProto* const annotation_proto_; +}; + +// This simple utility class assists in code generation. It basically +// allows the caller to define a set of variables and then output some +// text with variable substitutions. Example usage: +// +// Printer printer(output, '$'); +// map<string, string> vars; +// vars["name"] = "Bob"; +// printer.Print(vars, "My name is $name$."); +// +// The above writes "My name is Bob." to the output stream. +// +// Printer aggressively enforces correct usage, crashing (with assert failures) +// in the case of undefined variables in debug builds. This helps greatly in +// debugging code which uses it. +// +// If a Printer is constructed with an AnnotationCollector, it will provide it +// with annotations that connect the Printer's output to paths that can identify +// various descriptors. In the above example, if person_ is a descriptor that +// identifies Bob, we can associate the output string "My name is Bob." with +// a source path pointing to that descriptor with: +// +// printer.Annotate("name", person_); +// +// The AnnotationCollector will be sent an annotation linking the output range +// covering "Bob" to the logical path provided by person_. Tools may use +// this association to (for example) link "Bob" in the output back to the +// source file that defined the person_ descriptor identifying Bob. +// +// Annotate can only examine variables substituted during the last call to +// Print. It is invalid to refer to a variable that was used multiple times +// in a single Print call. +// +// In full generality, one may specify a range of output text using a beginning +// substitution variable and an ending variable. The resulting annotation will +// span from the first character of the substituted value for the beginning +// variable to the last character of the substituted value for the ending +// variable. For example, the Annotate call above is equivalent to this one: +// +// printer.Annotate("name", "name", person_); +// +// This is useful if multiple variables combine to form a single span of output +// that should be annotated with the same source path. For example: +// +// Printer printer(output, '$'); +// map<string, string> vars; +// vars["first"] = "Alice"; +// vars["last"] = "Smith"; +// printer.Print(vars, "My name is $first$ $last$."); +// printer.Annotate("first", "last", person_); +// +// This code would associate the span covering "Alice Smith" in the output with +// the person_ descriptor. +// +// Note that the beginning variable must come before (or overlap with, in the +// case of zero-sized substitution values) the ending variable. +// +// It is also sometimes useful to use variables with zero-sized values as +// markers. This avoids issues with multiple references to the same variable +// and also allows annotation ranges to span literal text from the Print +// templates: +// +// Printer printer(output, '$'); +// map<string, string> vars; +// vars["foo"] = "bar"; +// vars["function"] = "call"; +// vars["mark"] = ""; +// printer.Print(vars, "$function$($foo$,$foo$)$mark$"); +// printer.Annotate("function", "mark", call_); +// +// This code associates the span covering "call(bar,bar)" in the output with the +// call_ descriptor. + +class PROTOBUF_EXPORT Printer { + public: + // Create a printer that writes text to the given output stream. Use the + // given character as the delimiter for variables. + Printer(ZeroCopyOutputStream* output, char variable_delimiter); + + // Create a printer that writes text to the given output stream. Use the + // given character as the delimiter for variables. If annotation_collector + // is not null, Printer will provide it with annotations about code written + // to the stream. annotation_collector is not owned by Printer. + Printer(ZeroCopyOutputStream* output, char variable_delimiter, + AnnotationCollector* annotation_collector); + + ~Printer(); + + // Link a substitution variable emitted by the last call to Print to the + // object described by descriptor. + template <typename SomeDescriptor> + void Annotate(const char* varname, const SomeDescriptor* descriptor) { + Annotate(varname, varname, descriptor); + } + + // Link the output range defined by the substitution variables as emitted by + // the last call to Print to the object described by descriptor. The range + // begins at begin_varname's value and ends after the last character of the + // value substituted for end_varname. + template <typename SomeDescriptor> + void Annotate(const char* begin_varname, const char* end_varname, + const SomeDescriptor* descriptor) { + if (annotation_collector_ == NULL) { + // Annotations aren't turned on for this Printer, so don't pay the cost + // of building the location path. + return; + } + std::vector<int> path; + descriptor->GetLocationPath(&path); + Annotate(begin_varname, end_varname, descriptor->file()->name(), path); + } + + // Link a substitution variable emitted by the last call to Print to the file + // with path file_name. + void Annotate(const char* varname, const std::string& file_name) { + Annotate(varname, varname, file_name); + } + + // Link the output range defined by the substitution variables as emitted by + // the last call to Print to the file with path file_name. The range begins + // at begin_varname's value and ends after the last character of the value + // substituted for end_varname. + void Annotate(const char* begin_varname, const char* end_varname, + const std::string& file_name) { + if (annotation_collector_ == NULL) { + // Annotations aren't turned on for this Printer. + return; + } + std::vector<int> empty_path; + Annotate(begin_varname, end_varname, file_name, empty_path); + } + + // Print some text after applying variable substitutions. If a particular + // variable in the text is not defined, this will crash. Variables to be + // substituted are identified by their names surrounded by delimiter + // characters (as given to the constructor). The variable bindings are + // defined by the given map. + void Print(const std::map<std::string, std::string>& variables, + const char* text); + + // Like the first Print(), except the substitutions are given as parameters. + template <typename... Args> + void Print(const char* text, const Args&... args) { + std::map<std::string, std::string> vars; + PrintInternal(&vars, text, args...); + } + + // Indent text by two spaces. After calling Indent(), two spaces will be + // inserted at the beginning of each line of text. Indent() may be called + // multiple times to produce deeper indents. + void Indent(); + + // Reduces the current indent level by two spaces, or crashes if the indent + // level is zero. + void Outdent(); + + // Write a string to the output buffer. + // This method does not look for newlines to add indentation. + void PrintRaw(const std::string& data); + + // Write a zero-delimited string to output buffer. + // This method does not look for newlines to add indentation. + void PrintRaw(const char* data); + + // Write some bytes to the output buffer. + // This method does not look for newlines to add indentation. + void WriteRaw(const char* data, int size); + + // FormatInternal is a helper function not meant to use directly, use + // compiler::cpp::Formatter instead. This function is meant to support + // formatting text using named variables (eq. "$foo$) from a lookup map (vars) + // and variables directly supplied by arguments (eq "$1$" meaning first + // argument which is the zero index element of args). + void FormatInternal(const std::vector<std::string>& args, + const std::map<std::string, std::string>& vars, + const char* format); + + // True if any write to the underlying stream failed. (We don't just + // crash in this case because this is an I/O failure, not a programming + // error.) + bool failed() const { return failed_; } + + private: + // Link the output range defined by the substitution variables as emitted by + // the last call to Print to the object found at the SourceCodeInfo-style path + // in a file with path file_path. The range begins at the start of + // begin_varname's value and ends after the last character of the value + // substituted for end_varname. Note that begin_varname and end_varname + // may refer to the same variable. + void Annotate(const char* begin_varname, const char* end_varname, + const std::string& file_path, const std::vector<int>& path); + + // Base case + void PrintInternal(std::map<std::string, std::string>* vars, + const char* text) { + Print(*vars, text); + } + + template <typename... Args> + void PrintInternal(std::map<std::string, std::string>* vars, const char* text, + const char* key, const std::string& value, + const Args&... args) { + (*vars)[key] = value; + PrintInternal(vars, text, args...); + } + + // Copy size worth of bytes from data to buffer_. + void CopyToBuffer(const char* data, int size); + + void push_back(char c) { + if (failed_) return; + if (buffer_size_ == 0) { + if (!Next()) return; + } + *buffer_++ = c; + buffer_size_--; + offset_++; + } + + bool Next(); + + inline void IndentIfAtStart(); + const char* WriteVariable( + const std::vector<std::string>& args, + const std::map<std::string, std::string>& vars, const char* format, + int* arg_index, + std::vector<AnnotationCollector::Annotation>* annotations); + + const char variable_delimiter_; + + ZeroCopyOutputStream* const output_; + char* buffer_; + int buffer_size_; + // The current position, in bytes, in the output stream. This is equivalent + // to the total number of bytes that have been written so far. This value is + // used to calculate annotation ranges in the substitutions_ map below. + size_t offset_; + + std::string indent_; + bool at_start_of_line_; + bool failed_; + + // A map from variable name to [start, end) offsets in the output buffer. + // These refer to the offsets used for a variable after the last call to + // Print. If a variable was used more than once, the entry used in + // this map is set to a negative-length span. For singly-used variables, the + // start offset is the beginning of the substitution; the end offset is the + // last byte of the substitution plus one (such that (end - start) is the + // length of the substituted string). + std::map<std::string, std::pair<size_t, size_t> > substitutions_; + + // Keeps track of the keys in substitutions_ that need to be updated when + // indents are inserted. These are keys that refer to the beginning of the + // current line. + std::vector<std::string> line_start_variables_; + + // Returns true and sets range to the substitution range in the output for + // varname if varname was used once in the last call to Print. If varname + // was not used, or if it was used multiple times, returns false (and + // fails a debug assertion). + bool GetSubstitutionRange(const char* varname, + std::pair<size_t, size_t>* range); + + // If non-null, annotation_collector_ is used to store annotations about + // generated code. + AnnotationCollector* const annotation_collector_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Printer); +}; + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_PRINTER_H__ diff --git a/NorthstarDedicatedTest/include/protobuf/io/printer_unittest.cc b/NorthstarDedicatedTest/include/protobuf/io/printer_unittest.cc new file mode 100644 index 00000000..1652703b --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/printer_unittest.cc @@ -0,0 +1,735 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#include <io/printer.h> + +#include <vector> + +#include <stubs/logging.h> +#include <stubs/common.h> +#include <descriptor.pb.h> +#include <testing/googletest.h> +#include <gtest/gtest.h> +#include <io/zero_copy_stream_impl.h> + +namespace google { +namespace protobuf { +namespace io { + +// Each test repeats over several block sizes in order to test both cases +// where particular writes cross a buffer boundary and cases where they do +// not. + +TEST(Printer, EmptyPrinter) { + char buffer[8192]; + const int block_size = 100; + ArrayOutputStream output(buffer, GOOGLE_ARRAYSIZE(buffer), block_size); + Printer printer(&output, '\0'); + EXPECT_TRUE(!printer.failed()); +} + +TEST(Printer, BasicPrinting) { + char buffer[8192]; + + for (int block_size = 1; block_size < 512; block_size *= 2) { + ArrayOutputStream output(buffer, sizeof(buffer), block_size); + + { + Printer printer(&output, '\0'); + + printer.Print("Hello World!"); + printer.Print(" This is the same line.\n"); + printer.Print("But this is a new one.\nAnd this is another one."); + + EXPECT_FALSE(printer.failed()); + } + + buffer[output.ByteCount()] = '\0'; + + EXPECT_STREQ( + "Hello World! This is the same line.\n" + "But this is a new one.\n" + "And this is another one.", + buffer); + } +} + +TEST(Printer, WriteRaw) { + char buffer[8192]; + + for (int block_size = 1; block_size < 512; block_size *= 2) { + ArrayOutputStream output(buffer, sizeof(buffer), block_size); + + { + std::string string_obj = "From an object\n"; + Printer printer(&output, '$'); + printer.WriteRaw("Hello World!", 12); + printer.PrintRaw(" This is the same line.\n"); + printer.PrintRaw("But this is a new one.\nAnd this is another one."); + printer.WriteRaw("\n", 1); + printer.PrintRaw(string_obj); + EXPECT_FALSE(printer.failed()); + } + + buffer[output.ByteCount()] = '\0'; + + EXPECT_STREQ( + "Hello World! This is the same line.\n" + "But this is a new one.\n" + "And this is another one." + "\n" + "From an object\n", + buffer); + } +} + +TEST(Printer, VariableSubstitution) { + char buffer[8192]; + + for (int block_size = 1; block_size < 512; block_size *= 2) { + ArrayOutputStream output(buffer, sizeof(buffer), block_size); + + { + Printer printer(&output, '$'); + std::map<std::string, std::string> vars; + + vars["foo"] = "World"; + vars["bar"] = "$foo$"; + vars["abcdefg"] = "1234"; + + printer.Print(vars, "Hello $foo$!\nbar = $bar$\n"); + printer.PrintRaw("RawBit\n"); + printer.Print(vars, "$abcdefg$\nA literal dollar sign: $$"); + + vars["foo"] = "blah"; + printer.Print(vars, "\nNow foo = $foo$."); + + EXPECT_FALSE(printer.failed()); + } + + buffer[output.ByteCount()] = '\0'; + + EXPECT_STREQ( + "Hello World!\n" + "bar = $foo$\n" + "RawBit\n" + "1234\n" + "A literal dollar sign: $\n" + "Now foo = blah.", + buffer); + } +} + +TEST(Printer, InlineVariableSubstitution) { + char buffer[8192]; + + ArrayOutputStream output(buffer, sizeof(buffer)); + + { + Printer printer(&output, '$'); + printer.Print("Hello $foo$!\n", "foo", "World"); + printer.PrintRaw("RawBit\n"); + printer.Print("$foo$ $bar$\n", "foo", "one", "bar", "two"); + EXPECT_FALSE(printer.failed()); + } + + buffer[output.ByteCount()] = '\0'; + + EXPECT_STREQ( + "Hello World!\n" + "RawBit\n" + "one two\n", + buffer); +} + +// MockDescriptorFile defines only those members that Printer uses to write out +// annotations. +class MockDescriptorFile { + public: + explicit MockDescriptorFile(const std::string& file) : file_(file) {} + + // The mock filename for this file. + const std::string& name() const { return file_; } + + private: + std::string file_; +}; + +// MockDescriptor defines only those members that Printer uses to write out +// annotations. +class MockDescriptor { + public: + MockDescriptor(const std::string& file, const std::vector<int>& path) + : file_(file), path_(path) {} + + // The mock file in which this descriptor was defined. + const MockDescriptorFile* file() const { return &file_; } + + private: + // Allows access to GetLocationPath. + friend class Printer; + + // Copies the pre-stored path to output. + void GetLocationPath(std::vector<int>* output) const { *output = path_; } + + MockDescriptorFile file_; + std::vector<int> path_; +}; + +TEST(Printer, AnnotateMap) { + char buffer[8192]; + ArrayOutputStream output(buffer, sizeof(buffer)); + GeneratedCodeInfo info; + AnnotationProtoCollector<GeneratedCodeInfo> info_collector(&info); + { + Printer printer(&output, '$', &info_collector); + std::map<std::string, std::string> vars; + vars["foo"] = "3"; + vars["bar"] = "5"; + printer.Print(vars, "012$foo$4$bar$\n"); + std::vector<int> path_1; + path_1.push_back(33); + std::vector<int> path_2; + path_2.push_back(11); + path_2.push_back(22); + MockDescriptor descriptor_1("path_1", path_1); + MockDescriptor descriptor_2("path_2", path_2); + printer.Annotate("foo", "foo", &descriptor_1); + printer.Annotate("bar", "bar", &descriptor_2); + } + buffer[output.ByteCount()] = '\0'; + EXPECT_STREQ("012345\n", buffer); + ASSERT_EQ(2, info.annotation_size()); + const GeneratedCodeInfo::Annotation* foo = info.annotation(0).path_size() == 1 + ? &info.annotation(0) + : &info.annotation(1); + const GeneratedCodeInfo::Annotation* bar = info.annotation(0).path_size() == 1 + ? &info.annotation(1) + : &info.annotation(0); + ASSERT_EQ(1, foo->path_size()); + ASSERT_EQ(2, bar->path_size()); + EXPECT_EQ(33, foo->path(0)); + EXPECT_EQ(11, bar->path(0)); + EXPECT_EQ(22, bar->path(1)); + EXPECT_EQ("path_1", foo->source_file()); + EXPECT_EQ("path_2", bar->source_file()); + EXPECT_EQ(3, foo->begin()); + EXPECT_EQ(4, foo->end()); + EXPECT_EQ(5, bar->begin()); + EXPECT_EQ(6, bar->end()); +} + +TEST(Printer, AnnotateInline) { + char buffer[8192]; + ArrayOutputStream output(buffer, sizeof(buffer)); + GeneratedCodeInfo info; + AnnotationProtoCollector<GeneratedCodeInfo> info_collector(&info); + { + Printer printer(&output, '$', &info_collector); + printer.Print("012$foo$4$bar$\n", "foo", "3", "bar", "5"); + std::vector<int> path_1; + path_1.push_back(33); + std::vector<int> path_2; + path_2.push_back(11); + path_2.push_back(22); + MockDescriptor descriptor_1("path_1", path_1); + MockDescriptor descriptor_2("path_2", path_2); + printer.Annotate("foo", "foo", &descriptor_1); + printer.Annotate("bar", "bar", &descriptor_2); + } + buffer[output.ByteCount()] = '\0'; + EXPECT_STREQ("012345\n", buffer); + ASSERT_EQ(2, info.annotation_size()); + const GeneratedCodeInfo::Annotation* foo = info.annotation(0).path_size() == 1 + ? &info.annotation(0) + : &info.annotation(1); + const GeneratedCodeInfo::Annotation* bar = info.annotation(0).path_size() == 1 + ? &info.annotation(1) + : &info.annotation(0); + ASSERT_EQ(1, foo->path_size()); + ASSERT_EQ(2, bar->path_size()); + EXPECT_EQ(33, foo->path(0)); + EXPECT_EQ(11, bar->path(0)); + EXPECT_EQ(22, bar->path(1)); + EXPECT_EQ("path_1", foo->source_file()); + EXPECT_EQ("path_2", bar->source_file()); + EXPECT_EQ(3, foo->begin()); + EXPECT_EQ(4, foo->end()); + EXPECT_EQ(5, bar->begin()); + EXPECT_EQ(6, bar->end()); +} + +TEST(Printer, AnnotateRange) { + char buffer[8192]; + ArrayOutputStream output(buffer, sizeof(buffer)); + GeneratedCodeInfo info; + AnnotationProtoCollector<GeneratedCodeInfo> info_collector(&info); + { + Printer printer(&output, '$', &info_collector); + printer.Print("012$foo$4$bar$\n", "foo", "3", "bar", "5"); + std::vector<int> path; + path.push_back(33); + MockDescriptor descriptor("path", path); + printer.Annotate("foo", "bar", &descriptor); + } + buffer[output.ByteCount()] = '\0'; + EXPECT_STREQ("012345\n", buffer); + ASSERT_EQ(1, info.annotation_size()); + const GeneratedCodeInfo::Annotation* foobar = &info.annotation(0); + ASSERT_EQ(1, foobar->path_size()); + EXPECT_EQ(33, foobar->path(0)); + EXPECT_EQ("path", foobar->source_file()); + EXPECT_EQ(3, foobar->begin()); + EXPECT_EQ(6, foobar->end()); +} + +TEST(Printer, AnnotateEmptyRange) { + char buffer[8192]; + ArrayOutputStream output(buffer, sizeof(buffer)); + GeneratedCodeInfo info; + AnnotationProtoCollector<GeneratedCodeInfo> info_collector(&info); + { + Printer printer(&output, '$', &info_collector); + printer.Print("012$foo$4$baz$$bam$$bar$\n", "foo", "3", "bar", "5", "baz", + "", "bam", ""); + std::vector<int> path; + path.push_back(33); + MockDescriptor descriptor("path", path); + printer.Annotate("baz", "bam", &descriptor); + } + buffer[output.ByteCount()] = '\0'; + EXPECT_STREQ("012345\n", buffer); + ASSERT_EQ(1, info.annotation_size()); + const GeneratedCodeInfo::Annotation* bazbam = &info.annotation(0); + ASSERT_EQ(1, bazbam->path_size()); + EXPECT_EQ(33, bazbam->path(0)); + EXPECT_EQ("path", bazbam->source_file()); + EXPECT_EQ(5, bazbam->begin()); + EXPECT_EQ(5, bazbam->end()); +} + +TEST(Printer, AnnotateDespiteUnrelatedMultipleUses) { + char buffer[8192]; + ArrayOutputStream output(buffer, sizeof(buffer)); + GeneratedCodeInfo info; + AnnotationProtoCollector<GeneratedCodeInfo> info_collector(&info); + { + Printer printer(&output, '$', &info_collector); + printer.Print("012$foo$4$foo$$bar$\n", "foo", "3", "bar", "5"); + std::vector<int> path; + path.push_back(33); + MockDescriptor descriptor("path", path); + printer.Annotate("bar", "bar", &descriptor); + } + buffer[output.ByteCount()] = '\0'; + EXPECT_STREQ("0123435\n", buffer); + ASSERT_EQ(1, info.annotation_size()); + const GeneratedCodeInfo::Annotation* bar = &info.annotation(0); + ASSERT_EQ(1, bar->path_size()); + EXPECT_EQ(33, bar->path(0)); + EXPECT_EQ("path", bar->source_file()); + EXPECT_EQ(6, bar->begin()); + EXPECT_EQ(7, bar->end()); +} + +TEST(Printer, AnnotateIndent) { + char buffer[8192]; + ArrayOutputStream output(buffer, sizeof(buffer)); + GeneratedCodeInfo info; + AnnotationProtoCollector<GeneratedCodeInfo> info_collector(&info); + { + Printer printer(&output, '$', &info_collector); + printer.Print("0\n"); + printer.Indent(); + printer.Print("$foo$", "foo", "4"); + std::vector<int> path; + path.push_back(44); + MockDescriptor descriptor("path", path); + printer.Annotate("foo", &descriptor); + printer.Print(",\n"); + printer.Print("$bar$", "bar", "9"); + path[0] = 99; + MockDescriptor descriptor_two("path", path); + printer.Annotate("bar", &descriptor_two); + printer.Print("\n${$$D$$}$\n", "{", "", "}", "", "D", "d"); + path[0] = 1313; + MockDescriptor descriptor_three("path", path); + printer.Annotate("{", "}", &descriptor_three); + printer.Outdent(); + printer.Print("\n"); + } + buffer[output.ByteCount()] = '\0'; + EXPECT_STREQ("0\n 4,\n 9\n d\n\n", buffer); + ASSERT_EQ(3, info.annotation_size()); + const GeneratedCodeInfo::Annotation* foo = &info.annotation(0); + ASSERT_EQ(1, foo->path_size()); + EXPECT_EQ(44, foo->path(0)); + EXPECT_EQ("path", foo->source_file()); + EXPECT_EQ(4, foo->begin()); + EXPECT_EQ(5, foo->end()); + const GeneratedCodeInfo::Annotation* bar = &info.annotation(1); + ASSERT_EQ(1, bar->path_size()); + EXPECT_EQ(99, bar->path(0)); + EXPECT_EQ("path", bar->source_file()); + EXPECT_EQ(9, bar->begin()); + EXPECT_EQ(10, bar->end()); + const GeneratedCodeInfo::Annotation* braces = &info.annotation(2); + ASSERT_EQ(1, braces->path_size()); + EXPECT_EQ(1313, braces->path(0)); + EXPECT_EQ("path", braces->source_file()); + EXPECT_EQ(13, braces->begin()); + EXPECT_EQ(14, braces->end()); +} + +TEST(Printer, AnnotateIndentNewline) { + char buffer[8192]; + ArrayOutputStream output(buffer, sizeof(buffer)); + GeneratedCodeInfo info; + AnnotationProtoCollector<GeneratedCodeInfo> info_collector(&info); + { + Printer printer(&output, '$', &info_collector); + printer.Indent(); + printer.Print("$A$$N$$B$C\n", "A", "", "N", "\nz", "B", ""); + std::vector<int> path; + path.push_back(0); + MockDescriptor descriptor("path", path); + printer.Annotate("A", "B", &descriptor); + printer.Outdent(); + printer.Print("\n"); + } + buffer[output.ByteCount()] = '\0'; + EXPECT_STREQ("\nz C\n\n", buffer); + ASSERT_EQ(1, info.annotation_size()); + const GeneratedCodeInfo::Annotation* ab = &info.annotation(0); + ASSERT_EQ(1, ab->path_size()); + EXPECT_EQ(0, ab->path(0)); + EXPECT_EQ("path", ab->source_file()); + EXPECT_EQ(0, ab->begin()); + EXPECT_EQ(4, ab->end()); +} + +TEST(Printer, Indenting) { + char buffer[8192]; + + for (int block_size = 1; block_size < 512; block_size *= 2) { + ArrayOutputStream output(buffer, sizeof(buffer), block_size); + + { + Printer printer(&output, '$'); + std::map<std::string, std::string> vars; + + vars["newline"] = "\n"; + + printer.Print("This is not indented.\n"); + printer.Indent(); + printer.Print("This is indented\nAnd so is this\n"); + printer.Outdent(); + printer.Print("But this is not."); + printer.Indent(); + printer.Print( + " And this is still the same line.\n" + "But this is indented.\n"); + printer.PrintRaw("RawBit has indent at start\n"); + printer.PrintRaw("but not after a raw newline\n"); + printer.Print(vars, + "Note that a newline in a variable will break " + "indenting, as we see$newline$here.\n"); + printer.Indent(); + printer.Print("And this"); + printer.Outdent(); + printer.Outdent(); + printer.Print(" is double-indented\nBack to normal."); + + EXPECT_FALSE(printer.failed()); + } + + buffer[output.ByteCount()] = '\0'; + + EXPECT_STREQ( + "This is not indented.\n" + " This is indented\n" + " And so is this\n" + "But this is not. And this is still the same line.\n" + " But this is indented.\n" + " RawBit has indent at start\n" + "but not after a raw newline\n" + "Note that a newline in a variable will break indenting, as we see\n" + "here.\n" + " And this is double-indented\n" + "Back to normal.", + buffer); + } +} + +// Death tests do not work on Windows as of yet. +#ifdef PROTOBUF_HAS_DEATH_TEST +TEST(Printer, Death) { + char buffer[8192]; + + ArrayOutputStream output(buffer, sizeof(buffer)); + Printer printer(&output, '$'); + + EXPECT_DEBUG_DEATH(printer.Print("$nosuchvar$"), "Undefined variable"); + EXPECT_DEBUG_DEATH(printer.Print("$unclosed"), "Unclosed variable name"); + EXPECT_DEBUG_DEATH(printer.Outdent(), "without matching Indent"); +} + +TEST(Printer, AnnotateMultipleUsesDeath) { + char buffer[8192]; + ArrayOutputStream output(buffer, sizeof(buffer)); + GeneratedCodeInfo info; + AnnotationProtoCollector<GeneratedCodeInfo> info_collector(&info); + { + Printer printer(&output, '$', &info_collector); + printer.Print("012$foo$4$foo$\n", "foo", "3"); + std::vector<int> path; + path.push_back(33); + MockDescriptor descriptor("path", path); + EXPECT_DEBUG_DEATH(printer.Annotate("foo", "foo", &descriptor), "multiple"); + } +} + +TEST(Printer, AnnotateNegativeLengthDeath) { + char buffer[8192]; + ArrayOutputStream output(buffer, sizeof(buffer)); + GeneratedCodeInfo info; + AnnotationProtoCollector<GeneratedCodeInfo> info_collector(&info); + { + Printer printer(&output, '$', &info_collector); + printer.Print("012$foo$4$bar$\n", "foo", "3", "bar", "5"); + std::vector<int> path; + path.push_back(33); + MockDescriptor descriptor("path", path); + EXPECT_DEBUG_DEATH(printer.Annotate("bar", "foo", &descriptor), "negative"); + } +} + +TEST(Printer, AnnotateUndefinedDeath) { + char buffer[8192]; + ArrayOutputStream output(buffer, sizeof(buffer)); + GeneratedCodeInfo info; + AnnotationProtoCollector<GeneratedCodeInfo> info_collector(&info); + { + Printer printer(&output, '$', &info_collector); + printer.Print("012$foo$4$foo$\n", "foo", "3"); + std::vector<int> path; + path.push_back(33); + MockDescriptor descriptor("path", path); + EXPECT_DEBUG_DEATH(printer.Annotate("bar", "bar", &descriptor), + "Undefined"); + } +} +#endif // PROTOBUF_HAS_DEATH_TEST + +TEST(Printer, WriteFailurePartial) { + char buffer[17]; + + ArrayOutputStream output(buffer, sizeof(buffer)); + Printer printer(&output, '$'); + + // Print 16 bytes to almost fill the buffer (should not fail). + printer.Print("0123456789abcdef"); + EXPECT_FALSE(printer.failed()); + + // Try to print 2 chars. Only one fits. + printer.Print("<>"); + EXPECT_TRUE(printer.failed()); + + // Anything else should fail too. + printer.Print(" "); + EXPECT_TRUE(printer.failed()); + printer.Print("blah"); + EXPECT_TRUE(printer.failed()); + + // Buffer should contain the first 17 bytes written. + EXPECT_EQ("0123456789abcdef<", std::string(buffer, sizeof(buffer))); +} + +TEST(Printer, WriteFailureExact) { + char buffer[16]; + + ArrayOutputStream output(buffer, sizeof(buffer)); + Printer printer(&output, '$'); + + // Print 16 bytes to fill the buffer exactly (should not fail). + printer.Print("0123456789abcdef"); + EXPECT_FALSE(printer.failed()); + + // Try to print one more byte (should fail). + printer.Print(" "); + EXPECT_TRUE(printer.failed()); + + // Should not crash + printer.Print("blah"); + EXPECT_TRUE(printer.failed()); + + // Buffer should contain the first 16 bytes written. + EXPECT_EQ("0123456789abcdef", std::string(buffer, sizeof(buffer))); +} + +TEST(Printer, FormatInternal) { + std::vector<std::string> args{"arg1", "arg2"}; + std::map<std::string, std::string> vars{ + {"foo", "bar"}, {"baz", "bla"}, {"empty", ""}}; + // Substitution tests + { + // Direct arg substitution + std::string s; + { + StringOutputStream output(&s); + Printer printer(&output, '$'); + printer.FormatInternal(args, vars, "$1$ $2$"); + } + EXPECT_EQ("arg1 arg2", s); + } + { + // Variable substitution including spaces left + std::string s; + { + StringOutputStream output(&s); + Printer printer(&output, '$'); + printer.FormatInternal({}, vars, "$foo$$ baz$$ empty$"); + } + EXPECT_EQ("bar bla", s); + } + { + // Variable substitution including spaces right + std::string s; + { + StringOutputStream output(&s); + Printer printer(&output, '$'); + printer.FormatInternal({}, vars, "$empty $$foo $$baz$"); + } + EXPECT_EQ("bar bla", s); + } + { + // Mixed variable substitution + std::string s; + { + StringOutputStream output(&s); + Printer printer(&output, '$'); + printer.FormatInternal(args, vars, "$empty $$1$ $foo $$2$ $baz$"); + } + EXPECT_EQ("arg1 bar arg2 bla", s); + } + + // Indentation tests + { + // Empty lines shouldn't indent. + std::string s; + { + StringOutputStream output(&s); + Printer printer(&output, '$'); + printer.Indent(); + printer.FormatInternal(args, vars, "$empty $\n\n$1$ $foo $$2$\n$baz$"); + printer.Outdent(); + } + EXPECT_EQ("\n\n arg1 bar arg2\n bla", s); + } + { + // Annotations should respect indentation. + std::string s; + GeneratedCodeInfo info; + { + StringOutputStream output(&s); + AnnotationProtoCollector<GeneratedCodeInfo> info_collector(&info); + Printer printer(&output, '$', &info_collector); + printer.Indent(); + GeneratedCodeInfo::Annotation annotation; + annotation.set_source_file("file.proto"); + annotation.add_path(33); + std::vector<std::string> args{annotation.SerializeAsString(), "arg1", + "arg2"}; + printer.FormatInternal(args, vars, "$empty $\n\n${1$$2$$}$ $3$\n$baz$"); + printer.Outdent(); + } + EXPECT_EQ("\n\n arg1 arg2\n bla", s); + ASSERT_EQ(1, info.annotation_size()); + const GeneratedCodeInfo::Annotation* arg1 = &info.annotation(0); + ASSERT_EQ(1, arg1->path_size()); + EXPECT_EQ(33, arg1->path(0)); + EXPECT_EQ("file.proto", arg1->source_file()); + EXPECT_EQ(4, arg1->begin()); + EXPECT_EQ(8, arg1->end()); + } +#ifdef PROTOBUF_HAS_DEATH_TEST + // Death tests in case of illegal format strings. + { + // Unused arguments + std::string s; + StringOutputStream output(&s); + Printer printer(&output, '$'); + EXPECT_DEATH(printer.FormatInternal(args, vars, "$empty $$1$"), "Unused"); + } + { + // Wrong order arguments + std::string s; + StringOutputStream output(&s); + Printer printer(&output, '$'); + EXPECT_DEATH(printer.FormatInternal(args, vars, "$2$ $1$"), "order"); + } + { + // Zero is illegal argument + std::string s; + StringOutputStream output(&s); + Printer printer(&output, '$'); + EXPECT_DEATH(printer.FormatInternal(args, vars, "$0$"), "failed"); + } + { + // Argument out of bounds + std::string s; + StringOutputStream output(&s); + Printer printer(&output, '$'); + EXPECT_DEATH(printer.FormatInternal(args, vars, "$1$ $2$ $3$"), "bounds"); + } + { + // Unknown variable + std::string s; + StringOutputStream output(&s); + Printer printer(&output, '$'); + EXPECT_DEATH(printer.FormatInternal(args, vars, "$huh$ $1$$2$"), "Unknown"); + } + { + // Illegal variable + std::string s; + StringOutputStream output(&s); + Printer printer(&output, '$'); + EXPECT_DEATH(printer.FormatInternal({}, vars, "$ $"), "Empty"); + } +#endif // PROTOBUF_HAS_DEATH_TEST +} + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/NorthstarDedicatedTest/include/protobuf/io/strtod.cc b/NorthstarDedicatedTest/include/protobuf/io/strtod.cc new file mode 100644 index 00000000..427a0761 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/strtod.cc @@ -0,0 +1,82 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <io/strtod.h> + +#include <cstdio> +#include <cstring> +#include <limits> +#include <string> + +#include <stubs/logging.h> +#include <stubs/common.h> + +#include <stubs/strutil.h> + +namespace google { +namespace protobuf { +namespace io { + +// This approximately 0x1.ffffffp127, but we don't use 0x1.ffffffp127 because +// it won't compile in MSVC. +const double MAX_FLOAT_AS_DOUBLE_ROUNDED = 3.4028235677973366e+38; + +float SafeDoubleToFloat(double value) { + // static_cast<float> on a number larger than float can result in illegal + // instruction error, so we need to manually convert it to infinity or max. + if (value > std::numeric_limits<float>::max()) { + // Max float value is about 3.4028234664E38 when represented as a double. + // However, when printing float as text, it will be rounded as + // 3.4028235e+38. If we parse the value of 3.4028235e+38 from text and + // compare it to 3.4028234664E38, we may think that it is larger, but + // actually, any number between these two numbers could only be represented + // as the same max float number in float, so we should treat them the same + // as max float. + if (value <= MAX_FLOAT_AS_DOUBLE_ROUNDED) { + return std::numeric_limits<float>::max(); + } + return std::numeric_limits<float>::infinity(); + } else if (value < -std::numeric_limits<float>::max()) { + if (value >= -MAX_FLOAT_AS_DOUBLE_ROUNDED) { + return -std::numeric_limits<float>::max(); + } + return -std::numeric_limits<float>::infinity(); + } else { + return static_cast<float>(value); + } +} + +double NoLocaleStrtod(const char* str, char** endptr) { + return google::protobuf::internal::NoLocaleStrtod(str, endptr); +} + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/NorthstarDedicatedTest/include/protobuf/io/strtod.h b/NorthstarDedicatedTest/include/protobuf/io/strtod.h new file mode 100644 index 00000000..38f544af --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/strtod.h @@ -0,0 +1,55 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A locale-independent version of strtod(), used to parse floating +// point default values in .proto files, where the decimal separator +// is always a dot. + +#ifndef GOOGLE_PROTOBUF_IO_STRTOD_H__ +#define GOOGLE_PROTOBUF_IO_STRTOD_H__ + +namespace google { +namespace protobuf { +namespace io { + +// A locale-independent version of the standard strtod(), which always +// uses a dot as the decimal separator. +double NoLocaleStrtod(const char* str, char** endptr); + +// Casts a double value to a float value. If the value is outside of the +// representable range of float, it will be converted to positive or negative +// infinity. +float SafeDoubleToFloat(double value); + +} // namespace io +} // namespace protobuf +} // namespace google + +#endif // GOOGLE_PROTOBUF_IO_STRTOD_H__ diff --git a/NorthstarDedicatedTest/include/protobuf/io/tokenizer.cc b/NorthstarDedicatedTest/include/protobuf/io/tokenizer.cc new file mode 100644 index 00000000..712368f5 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/tokenizer.cc @@ -0,0 +1,1185 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Here we have a hand-written lexer. At first you might ask yourself, +// "Hand-written text processing? Is Kenton crazy?!" Well, first of all, +// yes I am crazy, but that's beside the point. There are actually reasons +// why I ended up writing this this way. +// +// The traditional approach to lexing is to use lex to generate a lexer for +// you. Unfortunately, lex's output is ridiculously ugly and difficult to +// integrate cleanly with C++ code, especially abstract code or code meant +// as a library. Better parser-generators exist but would add dependencies +// which most users won't already have, which we'd like to avoid. (GNU flex +// has a C++ output option, but it's still ridiculously ugly, non-abstract, +// and not library-friendly.) +// +// The next approach that any good software engineer should look at is to +// use regular expressions. And, indeed, I did. I have code which +// implements this same class using regular expressions. It's about 200 +// lines shorter. However: +// - Rather than error messages telling you "This string has an invalid +// escape sequence at line 5, column 45", you get error messages like +// "Parse error on line 5". Giving more precise errors requires adding +// a lot of code that ends up basically as complex as the hand-coded +// version anyway. +// - The regular expression to match a string literal looks like this: +// kString = new RE("(\"([^\"\\\\]|" // non-escaped +// "\\\\[abfnrtv?\"'\\\\0-7]|" // normal escape +// "\\\\x[0-9a-fA-F])*\"|" // hex escape +// "\'([^\'\\\\]|" // Also support single-quotes. +// "\\\\[abfnrtv?\"'\\\\0-7]|" +// "\\\\x[0-9a-fA-F])*\')"); +// Verifying the correctness of this line noise is actually harder than +// verifying the correctness of ConsumeString(), defined below. I'm not +// even confident that the above is correct, after staring at it for some +// time. +// - PCRE is fast, but there's still more overhead involved than the code +// below. +// - Sadly, regular expressions are not part of the C standard library, so +// using them would require depending on some other library. For the +// open source release, this could be really annoying. Nobody likes +// downloading one piece of software just to find that they need to +// download something else to make it work, and in all likelihood +// people downloading Protocol Buffers will already be doing so just +// to make something else work. We could include a copy of PCRE with +// our code, but that obligates us to keep it up-to-date and just seems +// like a big waste just to save 200 lines of code. +// +// On a similar but unrelated note, I'm even scared to use ctype.h. +// Apparently functions like isalpha() are locale-dependent. So, if we used +// that, then if this code is being called from some program that doesn't +// have its locale set to "C", it would behave strangely. We can't just set +// the locale to "C" ourselves since we might break the calling program that +// way, particularly if it is multi-threaded. WTF? Someone please let me +// (Kenton) know if I'm missing something here... +// +// I'd love to hear about other alternatives, though, as this code isn't +// exactly pretty. + +#include <io/tokenizer.h> + +#include <stubs/common.h> +#include <stubs/logging.h> +#include <stubs/stringprintf.h> +#include <stubs/strutil.h> +#include <io/strtod.h> +#include <io/zero_copy_stream.h> +#include <stubs/stl_util.h> + +namespace google { +namespace protobuf { +namespace io { +namespace { + +// As mentioned above, I don't trust ctype.h due to the presence of "locales". +// So, I have written replacement functions here. Someone please smack me if +// this is a bad idea or if there is some way around this. +// +// These "character classes" are designed to be used in template methods. +// For instance, Tokenizer::ConsumeZeroOrMore<Whitespace>() will eat +// whitespace. + +// Note: No class is allowed to contain '\0', since this is used to mark end- +// of-input and is handled specially. + +#define CHARACTER_CLASS(NAME, EXPRESSION) \ + class NAME { \ + public: \ + static inline bool InClass(char c) { return EXPRESSION; } \ + } + +CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' || c == '\r' || + c == '\v' || c == '\f'); +CHARACTER_CLASS(WhitespaceNoNewline, + c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f'); + +CHARACTER_CLASS(Unprintable, c<' ' && c> '\0'); + +CHARACTER_CLASS(Digit, '0' <= c && c <= '9'); +CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7'); +CHARACTER_CLASS(HexDigit, ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F')); + +CHARACTER_CLASS(Letter, + ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_')); + +CHARACTER_CLASS(Alphanumeric, ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || + ('0' <= c && c <= '9') || (c == '_')); + +CHARACTER_CLASS(Escape, c == 'a' || c == 'b' || c == 'f' || c == 'n' || + c == 'r' || c == 't' || c == 'v' || c == '\\' || + c == '?' || c == '\'' || c == '\"'); + +#undef CHARACTER_CLASS + +// Given a char, interpret it as a numeric digit and return its value. +// This supports any number base up to 36. +inline int DigitValue(char digit) { + if ('0' <= digit && digit <= '9') return digit - '0'; + if ('a' <= digit && digit <= 'z') return digit - 'a' + 10; + if ('A' <= digit && digit <= 'Z') return digit - 'A' + 10; + return -1; +} + +// Inline because it's only used in one place. +inline char TranslateEscape(char c) { + switch (c) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '\\': + return '\\'; + case '?': + return '\?'; // Trigraphs = :( + case '\'': + return '\''; + case '"': + return '\"'; + + // We expect escape sequences to have been validated separately. + default: + return '?'; + } +} + +} // anonymous namespace + +ErrorCollector::~ErrorCollector() {} + +// =================================================================== + +Tokenizer::Tokenizer(ZeroCopyInputStream* input, + ErrorCollector* error_collector) + : input_(input), + error_collector_(error_collector), + buffer_(NULL), + buffer_size_(0), + buffer_pos_(0), + read_error_(false), + line_(0), + column_(0), + record_target_(NULL), + record_start_(-1), + allow_f_after_float_(false), + comment_style_(CPP_COMMENT_STYLE), + require_space_after_number_(true), + allow_multiline_strings_(false) { + current_.line = 0; + current_.column = 0; + current_.end_column = 0; + current_.type = TYPE_START; + + Refresh(); +} + +Tokenizer::~Tokenizer() { + // If we had any buffer left unread, return it to the underlying stream + // so that someone else can read it. + if (buffer_size_ > buffer_pos_) { + input_->BackUp(buffer_size_ - buffer_pos_); + } +} + +bool Tokenizer::report_whitespace() const { return report_whitespace_; } +// Note: `set_report_whitespace(false)` implies `set_report_newlines(false)`. +void Tokenizer::set_report_whitespace(bool report) { + report_whitespace_ = report; + report_newlines_ &= report; +} + +// If true, newline tokens are reported by Next(). +bool Tokenizer::report_newlines() const { return report_newlines_; } +// Note: `set_report_newlines(true)` implies `set_report_whitespace(true)`. +void Tokenizer::set_report_newlines(bool report) { + report_newlines_ = report; + report_whitespace_ |= report; // enable report_whitespace if necessary +} + +// ------------------------------------------------------------------- +// Internal helpers. + +void Tokenizer::NextChar() { + // Update our line and column counters based on the character being + // consumed. + if (current_char_ == '\n') { + ++line_; + column_ = 0; + } else if (current_char_ == '\t') { + column_ += kTabWidth - column_ % kTabWidth; + } else { + ++column_; + } + + // Advance to the next character. + ++buffer_pos_; + if (buffer_pos_ < buffer_size_) { + current_char_ = buffer_[buffer_pos_]; + } else { + Refresh(); + } +} + +void Tokenizer::Refresh() { + if (read_error_) { + current_char_ = '\0'; + return; + } + + // If we're in a token, append the rest of the buffer to it. + if (record_target_ != NULL && record_start_ < buffer_size_) { + record_target_->append(buffer_ + record_start_, + buffer_size_ - record_start_); + record_start_ = 0; + } + + const void* data = NULL; + buffer_ = NULL; + buffer_pos_ = 0; + do { + if (!input_->Next(&data, &buffer_size_)) { + // end of stream (or read error) + buffer_size_ = 0; + read_error_ = true; + current_char_ = '\0'; + return; + } + } while (buffer_size_ == 0); + + buffer_ = static_cast<const char*>(data); + + current_char_ = buffer_[0]; +} + +inline void Tokenizer::RecordTo(std::string* target) { + record_target_ = target; + record_start_ = buffer_pos_; +} + +inline void Tokenizer::StopRecording() { + // Note: The if() is necessary because some STL implementations crash when + // you call string::append(NULL, 0), presumably because they are trying to + // be helpful by detecting the NULL pointer, even though there's nothing + // wrong with reading zero bytes from NULL. + if (buffer_pos_ != record_start_) { + record_target_->append(buffer_ + record_start_, + buffer_pos_ - record_start_); + } + record_target_ = NULL; + record_start_ = -1; +} + +inline void Tokenizer::StartToken() { + current_.type = TYPE_START; // Just for the sake of initializing it. + current_.text.clear(); + current_.line = line_; + current_.column = column_; + RecordTo(¤t_.text); +} + +inline void Tokenizer::EndToken() { + StopRecording(); + current_.end_column = column_; +} + +// ------------------------------------------------------------------- +// Helper methods that consume characters. + +template <typename CharacterClass> +inline bool Tokenizer::LookingAt() { + return CharacterClass::InClass(current_char_); +} + +template <typename CharacterClass> +inline bool Tokenizer::TryConsumeOne() { + if (CharacterClass::InClass(current_char_)) { + NextChar(); + return true; + } else { + return false; + } +} + +inline bool Tokenizer::TryConsume(char c) { + if (current_char_ == c) { + NextChar(); + return true; + } else { + return false; + } +} + +template <typename CharacterClass> +inline void Tokenizer::ConsumeZeroOrMore() { + while (CharacterClass::InClass(current_char_)) { + NextChar(); + } +} + +template <typename CharacterClass> +inline void Tokenizer::ConsumeOneOrMore(const char* error) { + if (!CharacterClass::InClass(current_char_)) { + AddError(error); + } else { + do { + NextChar(); + } while (CharacterClass::InClass(current_char_)); + } +} + +// ------------------------------------------------------------------- +// Methods that read whole patterns matching certain kinds of tokens +// or comments. + +void Tokenizer::ConsumeString(char delimiter) { + while (true) { + switch (current_char_) { + case '\0': + AddError("Unexpected end of string."); + return; + + case '\n': { + if (!allow_multiline_strings_) { + AddError("String literals cannot cross line boundaries."); + return; + } + NextChar(); + break; + } + + case '\\': { + // An escape sequence. + NextChar(); + if (TryConsumeOne<Escape>()) { + // Valid escape sequence. + } else if (TryConsumeOne<OctalDigit>()) { + // Possibly followed by two more octal digits, but these will + // just be consumed by the main loop anyway so we don't need + // to do so explicitly here. + } else if (TryConsume('x')) { + if (!TryConsumeOne<HexDigit>()) { + AddError("Expected hex digits for escape sequence."); + } + // Possibly followed by another hex digit, but again we don't care. + } else if (TryConsume('u')) { + if (!TryConsumeOne<HexDigit>() || !TryConsumeOne<HexDigit>() || + !TryConsumeOne<HexDigit>() || !TryConsumeOne<HexDigit>()) { + AddError("Expected four hex digits for \\u escape sequence."); + } + } else if (TryConsume('U')) { + // We expect 8 hex digits; but only the range up to 0x10ffff is + // legal. + if (!TryConsume('0') || !TryConsume('0') || + !(TryConsume('0') || TryConsume('1')) || + !TryConsumeOne<HexDigit>() || !TryConsumeOne<HexDigit>() || + !TryConsumeOne<HexDigit>() || !TryConsumeOne<HexDigit>() || + !TryConsumeOne<HexDigit>()) { + AddError( + "Expected eight hex digits up to 10ffff for \\U escape " + "sequence"); + } + } else { + AddError("Invalid escape sequence in string literal."); + } + break; + } + + default: { + if (current_char_ == delimiter) { + NextChar(); + return; + } + NextChar(); + break; + } + } + } +} + +Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero, + bool started_with_dot) { + bool is_float = false; + + if (started_with_zero && (TryConsume('x') || TryConsume('X'))) { + // A hex number (started with "0x"). + ConsumeOneOrMore<HexDigit>("\"0x\" must be followed by hex digits."); + + } else if (started_with_zero && LookingAt<Digit>()) { + // An octal number (had a leading zero). + ConsumeZeroOrMore<OctalDigit>(); + if (LookingAt<Digit>()) { + AddError("Numbers starting with leading zero must be in octal."); + ConsumeZeroOrMore<Digit>(); + } + + } else { + // A decimal number. + if (started_with_dot) { + is_float = true; + ConsumeZeroOrMore<Digit>(); + } else { + ConsumeZeroOrMore<Digit>(); + + if (TryConsume('.')) { + is_float = true; + ConsumeZeroOrMore<Digit>(); + } + } + + if (TryConsume('e') || TryConsume('E')) { + is_float = true; + TryConsume('-') || TryConsume('+'); + ConsumeOneOrMore<Digit>("\"e\" must be followed by exponent."); + } + + if (allow_f_after_float_ && (TryConsume('f') || TryConsume('F'))) { + is_float = true; + } + } + + if (LookingAt<Letter>() && require_space_after_number_) { + AddError("Need space between number and identifier."); + } else if (current_char_ == '.') { + if (is_float) { + AddError( + "Already saw decimal point or exponent; can't have another one."); + } else { + AddError("Hex and octal numbers must be integers."); + } + } + + return is_float ? TYPE_FLOAT : TYPE_INTEGER; +} + +void Tokenizer::ConsumeLineComment(std::string* content) { + if (content != NULL) RecordTo(content); + + while (current_char_ != '\0' && current_char_ != '\n') { + NextChar(); + } + TryConsume('\n'); + + if (content != NULL) StopRecording(); +} + +void Tokenizer::ConsumeBlockComment(std::string* content) { + int start_line = line_; + int start_column = column_ - 2; + + if (content != NULL) RecordTo(content); + + while (true) { + while (current_char_ != '\0' && current_char_ != '*' && + current_char_ != '/' && current_char_ != '\n') { + NextChar(); + } + + if (TryConsume('\n')) { + if (content != NULL) StopRecording(); + + // Consume leading whitespace and asterisk; + ConsumeZeroOrMore<WhitespaceNoNewline>(); + if (TryConsume('*')) { + if (TryConsume('/')) { + // End of comment. + break; + } + } + + if (content != NULL) RecordTo(content); + } else if (TryConsume('*') && TryConsume('/')) { + // End of comment. + if (content != NULL) { + StopRecording(); + // Strip trailing "*/". + content->erase(content->size() - 2); + } + break; + } else if (TryConsume('/') && current_char_ == '*') { + // Note: We didn't consume the '*' because if there is a '/' after it + // we want to interpret that as the end of the comment. + AddError( + "\"/*\" inside block comment. Block comments cannot be nested."); + } else if (current_char_ == '\0') { + AddError("End-of-file inside block comment."); + error_collector_->AddError(start_line, start_column, + " Comment started here."); + if (content != NULL) StopRecording(); + break; + } + } +} + +Tokenizer::NextCommentStatus Tokenizer::TryConsumeCommentStart() { + if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) { + if (TryConsume('/')) { + return LINE_COMMENT; + } else if (TryConsume('*')) { + return BLOCK_COMMENT; + } else { + // Oops, it was just a slash. Return it. + current_.type = TYPE_SYMBOL; + current_.text = "/"; + current_.line = line_; + current_.column = column_ - 1; + current_.end_column = column_; + return SLASH_NOT_COMMENT; + } + } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) { + return LINE_COMMENT; + } else { + return NO_COMMENT; + } +} + +bool Tokenizer::TryConsumeWhitespace() { + if (report_newlines_) { + if (TryConsumeOne<WhitespaceNoNewline>()) { + ConsumeZeroOrMore<WhitespaceNoNewline>(); + current_.type = TYPE_WHITESPACE; + return true; + } + return false; + } + if (TryConsumeOne<Whitespace>()) { + ConsumeZeroOrMore<Whitespace>(); + current_.type = TYPE_WHITESPACE; + return report_whitespace_; + } + return false; +} + +bool Tokenizer::TryConsumeNewline() { + if (!report_whitespace_ || !report_newlines_) { + return false; + } + if (TryConsume('\n')) { + current_.type = TYPE_NEWLINE; + return true; + } + return false; +} + +// ------------------------------------------------------------------- + +bool Tokenizer::Next() { + previous_ = current_; + + while (!read_error_) { + StartToken(); + bool report_token = TryConsumeWhitespace() || TryConsumeNewline(); + EndToken(); + if (report_token) { + return true; + } + + switch (TryConsumeCommentStart()) { + case LINE_COMMENT: + ConsumeLineComment(NULL); + continue; + case BLOCK_COMMENT: + ConsumeBlockComment(NULL); + continue; + case SLASH_NOT_COMMENT: + return true; + case NO_COMMENT: + break; + } + + // Check for EOF before continuing. + if (read_error_) break; + + if (LookingAt<Unprintable>() || current_char_ == '\0') { + AddError("Invalid control characters encountered in text."); + NextChar(); + // Skip more unprintable characters, too. But, remember that '\0' is + // also what current_char_ is set to after EOF / read error. We have + // to be careful not to go into an infinite loop of trying to consume + // it, so make sure to check read_error_ explicitly before consuming + // '\0'. + while (TryConsumeOne<Unprintable>() || + (!read_error_ && TryConsume('\0'))) { + // Ignore. + } + + } else { + // Reading some sort of token. + StartToken(); + + if (TryConsumeOne<Letter>()) { + ConsumeZeroOrMore<Alphanumeric>(); + current_.type = TYPE_IDENTIFIER; + } else if (TryConsume('0')) { + current_.type = ConsumeNumber(true, false); + } else if (TryConsume('.')) { + // This could be the beginning of a floating-point number, or it could + // just be a '.' symbol. + + if (TryConsumeOne<Digit>()) { + // It's a floating-point number. + if (previous_.type == TYPE_IDENTIFIER && + current_.line == previous_.line && + current_.column == previous_.end_column) { + // We don't accept syntax like "blah.123". + error_collector_->AddError( + line_, column_ - 2, + "Need space between identifier and decimal point."); + } + current_.type = ConsumeNumber(false, true); + } else { + current_.type = TYPE_SYMBOL; + } + } else if (TryConsumeOne<Digit>()) { + current_.type = ConsumeNumber(false, false); + } else if (TryConsume('\"')) { + ConsumeString('\"'); + current_.type = TYPE_STRING; + } else if (TryConsume('\'')) { + ConsumeString('\''); + current_.type = TYPE_STRING; + } else { + // Check if the high order bit is set. + if (current_char_ & 0x80) { + error_collector_->AddError( + line_, column_, + StringPrintf("Interpreting non ascii codepoint %d.", + static_cast<unsigned char>(current_char_))); + } + NextChar(); + current_.type = TYPE_SYMBOL; + } + + EndToken(); + return true; + } + } + + // EOF + current_.type = TYPE_END; + current_.text.clear(); + current_.line = line_; + current_.column = column_; + current_.end_column = column_; + return false; +} + +namespace { + +// Helper class for collecting comments and putting them in the right places. +// +// This basically just buffers the most recent comment until it can be decided +// exactly where that comment should be placed. When Flush() is called, the +// current comment goes into either prev_trailing_comments or detached_comments. +// When the CommentCollector is destroyed, the last buffered comment goes into +// next_leading_comments. +class CommentCollector { + public: + CommentCollector(std::string* prev_trailing_comments, + std::vector<std::string>* detached_comments, + std::string* next_leading_comments) + : prev_trailing_comments_(prev_trailing_comments), + detached_comments_(detached_comments), + next_leading_comments_(next_leading_comments), + has_comment_(false), + is_line_comment_(false), + can_attach_to_prev_(true) { + if (prev_trailing_comments != NULL) prev_trailing_comments->clear(); + if (detached_comments != NULL) detached_comments->clear(); + if (next_leading_comments != NULL) next_leading_comments->clear(); + } + + ~CommentCollector() { + // Whatever is in the buffer is a leading comment. + if (next_leading_comments_ != NULL && has_comment_) { + comment_buffer_.swap(*next_leading_comments_); + } + } + + // About to read a line comment. Get the comment buffer pointer in order to + // read into it. + std::string* GetBufferForLineComment() { + // We want to combine with previous line comments, but not block comments. + if (has_comment_ && !is_line_comment_) { + Flush(); + } + has_comment_ = true; + is_line_comment_ = true; + return &comment_buffer_; + } + + // About to read a block comment. Get the comment buffer pointer in order to + // read into it. + std::string* GetBufferForBlockComment() { + if (has_comment_) { + Flush(); + } + has_comment_ = true; + is_line_comment_ = false; + return &comment_buffer_; + } + + void ClearBuffer() { + comment_buffer_.clear(); + has_comment_ = false; + } + + // Called once we know that the comment buffer is complete and is *not* + // connected to the next token. + void Flush() { + if (has_comment_) { + if (can_attach_to_prev_) { + if (prev_trailing_comments_ != NULL) { + prev_trailing_comments_->append(comment_buffer_); + } + can_attach_to_prev_ = false; + } else { + if (detached_comments_ != NULL) { + detached_comments_->push_back(comment_buffer_); + } + } + ClearBuffer(); + } + } + + void DetachFromPrev() { can_attach_to_prev_ = false; } + + private: + std::string* prev_trailing_comments_; + std::vector<std::string>* detached_comments_; + std::string* next_leading_comments_; + + std::string comment_buffer_; + + // True if any comments were read into comment_buffer_. This can be true even + // if comment_buffer_ is empty, namely if the comment was "/**/". + bool has_comment_; + + // Is the comment in the comment buffer a line comment? + bool is_line_comment_; + + // Is it still possible that we could be reading a comment attached to the + // previous token? + bool can_attach_to_prev_; +}; + +} // namespace + +bool Tokenizer::NextWithComments(std::string* prev_trailing_comments, + std::vector<std::string>* detached_comments, + std::string* next_leading_comments) { + CommentCollector collector(prev_trailing_comments, detached_comments, + next_leading_comments); + + if (current_.type == TYPE_START) { + // Ignore unicode byte order mark(BOM) if it appears at the file + // beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted. + if (TryConsume(static_cast<char>(0xEF))) { + if (!TryConsume(static_cast<char>(0xBB)) || + !TryConsume(static_cast<char>(0xBF))) { + AddError( + "Proto file starts with 0xEF but not UTF-8 BOM. " + "Only UTF-8 is accepted for proto file."); + return false; + } + } + collector.DetachFromPrev(); + } else { + // A comment appearing on the same line must be attached to the previous + // declaration. + ConsumeZeroOrMore<WhitespaceNoNewline>(); + switch (TryConsumeCommentStart()) { + case LINE_COMMENT: + ConsumeLineComment(collector.GetBufferForLineComment()); + + // Don't allow comments on subsequent lines to be attached to a trailing + // comment. + collector.Flush(); + break; + case BLOCK_COMMENT: + ConsumeBlockComment(collector.GetBufferForBlockComment()); + + ConsumeZeroOrMore<WhitespaceNoNewline>(); + if (!TryConsume('\n')) { + // Oops, the next token is on the same line. If we recorded a comment + // we really have no idea which token it should be attached to. + collector.ClearBuffer(); + return Next(); + } + + // Don't allow comments on subsequent lines to be attached to a trailing + // comment. + collector.Flush(); + break; + case SLASH_NOT_COMMENT: + return true; + case NO_COMMENT: + if (!TryConsume('\n')) { + // The next token is on the same line. There are no comments. + return Next(); + } + break; + } + } + + // OK, we are now on the line *after* the previous token. + while (true) { + ConsumeZeroOrMore<WhitespaceNoNewline>(); + + switch (TryConsumeCommentStart()) { + case LINE_COMMENT: + ConsumeLineComment(collector.GetBufferForLineComment()); + break; + case BLOCK_COMMENT: + ConsumeBlockComment(collector.GetBufferForBlockComment()); + + // Consume the rest of the line so that we don't interpret it as a + // blank line the next time around the loop. + ConsumeZeroOrMore<WhitespaceNoNewline>(); + TryConsume('\n'); + break; + case SLASH_NOT_COMMENT: + return true; + case NO_COMMENT: + if (TryConsume('\n')) { + // Completely blank line. + collector.Flush(); + collector.DetachFromPrev(); + } else { + bool result = Next(); + if (!result || current_.text == "}" || current_.text == "]" || + current_.text == ")") { + // It looks like we're at the end of a scope. In this case it + // makes no sense to attach a comment to the following token. + collector.Flush(); + } + return result; + } + break; + } + } +} + +// ------------------------------------------------------------------- +// Token-parsing helpers. Remember that these don't need to report +// errors since any errors should already have been reported while +// tokenizing. Also, these can assume that whatever text they +// are given is text that the tokenizer actually parsed as a token +// of the given type. + +bool Tokenizer::ParseInteger(const std::string& text, uint64_t max_value, + uint64_t* output) { + // Sadly, we can't just use strtoul() since it is only 32-bit and strtoull() + // is non-standard. I hate the C standard library. :( + + // return strtoull(text.c_str(), NULL, 0); + + const char* ptr = text.c_str(); + int base = 10; + if (ptr[0] == '0') { + if (ptr[1] == 'x' || ptr[1] == 'X') { + // This is hex. + base = 16; + ptr += 2; + } else { + // This is octal. + base = 8; + } + } + + uint64_t result = 0; + for (; *ptr != '\0'; ptr++) { + int digit = DigitValue(*ptr); + if (digit < 0 || digit >= base) { + // The token provided by Tokenizer is invalid. i.e., 099 is an invalid + // token, but Tokenizer still think it's integer. + return false; + } + if (static_cast<uint64_t>(digit) > max_value || + result > (max_value - digit) / base) { + // Overflow. + return false; + } + result = result * base + digit; + } + + *output = result; + return true; +} + +double Tokenizer::ParseFloat(const std::string& text) { + const char* start = text.c_str(); + char* end; + double result = NoLocaleStrtod(start, &end); + + // "1e" is not a valid float, but if the tokenizer reads it, it will + // report an error but still return it as a valid token. We need to + // accept anything the tokenizer could possibly return, error or not. + if (*end == 'e' || *end == 'E') { + ++end; + if (*end == '-' || *end == '+') ++end; + } + + // If the Tokenizer had allow_f_after_float_ enabled, the float may be + // suffixed with the letter 'f'. + if (*end == 'f' || *end == 'F') { + ++end; + } + + GOOGLE_LOG_IF(DFATAL, + static_cast<size_t>(end - start) != text.size() || *start == '-') + << " Tokenizer::ParseFloat() passed text that could not have been" + " tokenized as a float: " + << CEscape(text); + return result; +} + +// Helper to append a Unicode code point to a string as UTF8, without bringing +// in any external dependencies. +static void AppendUTF8(uint32_t code_point, std::string* output) { + uint32_t tmp = 0; + int len = 0; + if (code_point <= 0x7f) { + tmp = code_point; + len = 1; + } else if (code_point <= 0x07ff) { + tmp = 0x0000c080 | ((code_point & 0x07c0) << 2) | (code_point & 0x003f); + len = 2; + } else if (code_point <= 0xffff) { + tmp = 0x00e08080 | ((code_point & 0xf000) << 4) | + ((code_point & 0x0fc0) << 2) | (code_point & 0x003f); + len = 3; + } else if (code_point <= 0x10ffff) { + tmp = 0xf0808080 | ((code_point & 0x1c0000) << 6) | + ((code_point & 0x03f000) << 4) | ((code_point & 0x000fc0) << 2) | + (code_point & 0x003f); + len = 4; + } else { + // Unicode code points end at 0x10FFFF, so this is out-of-range. + // ConsumeString permits hex values up to 0x1FFFFF, and FetchUnicodePoint + // doesn't perform a range check. + StringAppendF(output, "\\U%08x", code_point); + return; + } + tmp = ghtonl(tmp); + output->append(reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len); +} + +// Try to read <len> hex digits from ptr, and stuff the numeric result into +// *result. Returns true if that many digits were successfully consumed. +static bool ReadHexDigits(const char* ptr, int len, uint32_t* result) { + *result = 0; + if (len == 0) return false; + for (const char* end = ptr + len; ptr < end; ++ptr) { + if (*ptr == '\0') return false; + *result = (*result << 4) + DigitValue(*ptr); + } + return true; +} + +// Handling UTF-16 surrogate pairs. UTF-16 encodes code points in the range +// 0x10000...0x10ffff as a pair of numbers, a head surrogate followed by a trail +// surrogate. These numbers are in a reserved range of Unicode code points, so +// if we encounter such a pair we know how to parse it and convert it into a +// single code point. +static const uint32_t kMinHeadSurrogate = 0xd800; +static const uint32_t kMaxHeadSurrogate = 0xdc00; +static const uint32_t kMinTrailSurrogate = 0xdc00; +static const uint32_t kMaxTrailSurrogate = 0xe000; + +static inline bool IsHeadSurrogate(uint32_t code_point) { + return (code_point >= kMinHeadSurrogate) && (code_point < kMaxHeadSurrogate); +} + +static inline bool IsTrailSurrogate(uint32_t code_point) { + return (code_point >= kMinTrailSurrogate) && + (code_point < kMaxTrailSurrogate); +} + +// Combine a head and trail surrogate into a single Unicode code point. +static uint32_t AssembleUTF16(uint32_t head_surrogate, + uint32_t trail_surrogate) { + GOOGLE_DCHECK(IsHeadSurrogate(head_surrogate)); + GOOGLE_DCHECK(IsTrailSurrogate(trail_surrogate)); + return 0x10000 + (((head_surrogate - kMinHeadSurrogate) << 10) | + (trail_surrogate - kMinTrailSurrogate)); +} + +// Convert the escape sequence parameter to a number of expected hex digits. +static inline int UnicodeLength(char key) { + if (key == 'u') return 4; + if (key == 'U') return 8; + return 0; +} + +// Given a pointer to the 'u' or 'U' starting a Unicode escape sequence, attempt +// to parse that sequence. On success, returns a pointer to the first char +// beyond that sequence, and fills in *code_point. On failure, returns ptr +// itself. +static const char* FetchUnicodePoint(const char* ptr, uint32_t* code_point) { + const char* p = ptr; + // Fetch the code point. + const int len = UnicodeLength(*p++); + if (!ReadHexDigits(p, len, code_point)) return ptr; + p += len; + + // Check if the code point we read is a "head surrogate." If so, then we + // expect it to be immediately followed by another code point which is a valid + // "trail surrogate," and together they form a UTF-16 pair which decodes into + // a single Unicode point. Trail surrogates may only use \u, not \U. + if (IsHeadSurrogate(*code_point) && *p == '\\' && *(p + 1) == 'u') { + uint32_t trail_surrogate; + if (ReadHexDigits(p + 2, 4, &trail_surrogate) && + IsTrailSurrogate(trail_surrogate)) { + *code_point = AssembleUTF16(*code_point, trail_surrogate); + p += 6; + } + // If this failed, then we just emit the head surrogate as a code point. + // It's bogus, but so is the string. + } + + return p; +} + +// The text string must begin and end with single or double quote +// characters. +void Tokenizer::ParseStringAppend(const std::string& text, + std::string* output) { + // Reminder: text[0] is always a quote character. (If text is + // empty, it's invalid, so we'll just return). + const size_t text_size = text.size(); + if (text_size == 0) { + GOOGLE_LOG(DFATAL) << " Tokenizer::ParseStringAppend() passed text that could not" + " have been tokenized as a string: " + << CEscape(text); + return; + } + + // Reserve room for new string. The branch is necessary because if + // there is already space available the reserve() call might + // downsize the output. + const size_t new_len = text_size + output->size(); + if (new_len > output->capacity()) { + output->reserve(new_len); + } + + // Loop through the string copying characters to "output" and + // interpreting escape sequences. Note that any invalid escape + // sequences or other errors were already reported while tokenizing. + // In this case we do not need to produce valid results. + for (const char* ptr = text.c_str() + 1; *ptr != '\0'; ptr++) { + if (*ptr == '\\' && ptr[1] != '\0') { + // An escape sequence. + ++ptr; + + if (OctalDigit::InClass(*ptr)) { + // An octal escape. May one, two, or three digits. + int code = DigitValue(*ptr); + if (OctalDigit::InClass(ptr[1])) { + ++ptr; + code = code * 8 + DigitValue(*ptr); + } + if (OctalDigit::InClass(ptr[1])) { + ++ptr; + code = code * 8 + DigitValue(*ptr); + } + output->push_back(static_cast<char>(code)); + + } else if (*ptr == 'x') { + // A hex escape. May zero, one, or two digits. (The zero case + // will have been caught as an error earlier.) + int code = 0; + if (HexDigit::InClass(ptr[1])) { + ++ptr; + code = DigitValue(*ptr); + } + if (HexDigit::InClass(ptr[1])) { + ++ptr; + code = code * 16 + DigitValue(*ptr); + } + output->push_back(static_cast<char>(code)); + + } else if (*ptr == 'u' || *ptr == 'U') { + uint32_t unicode; + const char* end = FetchUnicodePoint(ptr, &unicode); + if (end == ptr) { + // Failure: Just dump out what we saw, don't try to parse it. + output->push_back(*ptr); + } else { + AppendUTF8(unicode, output); + ptr = end - 1; // Because we're about to ++ptr. + } + } else { + // Some other escape code. + output->push_back(TranslateEscape(*ptr)); + } + + } else if (*ptr == text[0] && ptr[1] == '\0') { + // Ignore final quote matching the starting quote. + } else { + output->push_back(*ptr); + } + } +} + +template <typename CharacterClass> +static bool AllInClass(const std::string& s) { + for (const char character : s) { + if (!CharacterClass::InClass(character)) return false; + } + return true; +} + +bool Tokenizer::IsIdentifier(const std::string& text) { + // Mirrors IDENTIFIER definition in Tokenizer::Next() above. + if (text.size() == 0) return false; + if (!Letter::InClass(text.at(0))) return false; + if (!AllInClass<Alphanumeric>(text.substr(1))) return false; + return true; +} + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/NorthstarDedicatedTest/include/protobuf/io/tokenizer.h b/NorthstarDedicatedTest/include/protobuf/io/tokenizer.h new file mode 100644 index 00000000..a4059081 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/tokenizer.h @@ -0,0 +1,440 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Class for parsing tokenized text from a ZeroCopyInputStream. + +#ifndef GOOGLE_PROTOBUF_IO_TOKENIZER_H__ +#define GOOGLE_PROTOBUF_IO_TOKENIZER_H__ + + +#include <string> +#include <vector> + +#include <stubs/common.h> +#include <stubs/logging.h> +#include <port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +class ZeroCopyInputStream; // zero_copy_stream.h + +// Defined in this file. +class ErrorCollector; +class Tokenizer; + +// By "column number", the proto compiler refers to a count of the number +// of bytes before a given byte, except that a tab character advances to +// the next multiple of 8 bytes. Note in particular that column numbers +// are zero-based, while many user interfaces use one-based column numbers. +typedef int ColumnNumber; + +// Abstract interface for an object which collects the errors that occur +// during parsing. A typical implementation might simply print the errors +// to stdout. +class PROTOBUF_EXPORT ErrorCollector { + public: + inline ErrorCollector() {} + virtual ~ErrorCollector(); + + // Indicates that there was an error in the input at the given line and + // column numbers. The numbers are zero-based, so you may want to add + // 1 to each before printing them. + virtual void AddError(int line, ColumnNumber column, + const std::string& message) = 0; + + // Indicates that there was a warning in the input at the given line and + // column numbers. The numbers are zero-based, so you may want to add + // 1 to each before printing them. + virtual void AddWarning(int /* line */, ColumnNumber /* column */, + const std::string& /* message */) {} + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector); +}; + +// This class converts a stream of raw text into a stream of tokens for +// the protocol definition parser to parse. The tokens recognized are +// similar to those that make up the C language; see the TokenType enum for +// precise descriptions. Whitespace and comments are skipped. By default, +// C- and C++-style comments are recognized, but other styles can be used by +// calling set_comment_style(). +class PROTOBUF_EXPORT Tokenizer { + public: + // Construct a Tokenizer that reads and tokenizes text from the given + // input stream and writes errors to the given error_collector. + // The caller keeps ownership of input and error_collector. + Tokenizer(ZeroCopyInputStream* input, ErrorCollector* error_collector); + ~Tokenizer(); + + enum TokenType { + TYPE_START, // Next() has not yet been called. + TYPE_END, // End of input reached. "text" is empty. + + TYPE_IDENTIFIER, // A sequence of letters, digits, and underscores, not + // starting with a digit. It is an error for a number + // to be followed by an identifier with no space in + // between. + TYPE_INTEGER, // A sequence of digits representing an integer. Normally + // the digits are decimal, but a prefix of "0x" indicates + // a hex number and a leading zero indicates octal, just + // like with C numeric literals. A leading negative sign + // is NOT included in the token; it's up to the parser to + // interpret the unary minus operator on its own. + TYPE_FLOAT, // A floating point literal, with a fractional part and/or + // an exponent. Always in decimal. Again, never + // negative. + TYPE_STRING, // A quoted sequence of escaped characters. Either single + // or double quotes can be used, but they must match. + // A string literal cannot cross a line break. + TYPE_SYMBOL, // Any other printable character, like '!' or '+'. + // Symbols are always a single character, so "!+$%" is + // four tokens. + TYPE_WHITESPACE, // A sequence of whitespace. This token type is only + // produced if report_whitespace() is true. It is not + // reported for whitespace within comments or strings. + TYPE_NEWLINE, // A newline (\n). This token type is only + // produced if report_whitespace() is true and + // report_newlines() is true. It is not reported for + // newlines in comments or strings. + }; + + // Structure representing a token read from the token stream. + struct Token { + TokenType type; + std::string text; // The exact text of the token as it appeared in + // the input. e.g. tokens of TYPE_STRING will still + // be escaped and in quotes. + + // "line" and "column" specify the position of the first character of + // the token within the input stream. They are zero-based. + int line; + ColumnNumber column; + ColumnNumber end_column; + }; + + // Get the current token. This is updated when Next() is called. Before + // the first call to Next(), current() has type TYPE_START and no contents. + const Token& current(); + + // Return the previous token -- i.e. what current() returned before the + // previous call to Next(). + const Token& previous(); + + // Advance to the next token. Returns false if the end of the input is + // reached. + bool Next(); + + // Like Next(), but also collects comments which appear between the previous + // and next tokens. + // + // Comments which appear to be attached to the previous token are stored + // in *prev_tailing_comments. Comments which appear to be attached to the + // next token are stored in *next_leading_comments. Comments appearing in + // between which do not appear to be attached to either will be added to + // detached_comments. Any of these parameters can be NULL to simply discard + // the comments. + // + // A series of line comments appearing on consecutive lines, with no other + // tokens appearing on those lines, will be treated as a single comment. + // + // Only the comment content is returned; comment markers (e.g. //) are + // stripped out. For block comments, leading whitespace and an asterisk will + // be stripped from the beginning of each line other than the first. Newlines + // are included in the output. + // + // Examples: + // + // optional int32 foo = 1; // Comment attached to foo. + // // Comment attached to bar. + // optional int32 bar = 2; + // + // optional string baz = 3; + // // Comment attached to baz. + // // Another line attached to baz. + // + // // Comment attached to qux. + // // + // // Another line attached to qux. + // optional double qux = 4; + // + // // Detached comment. This is not attached to qux or corge + // // because there are blank lines separating it from both. + // + // optional string corge = 5; + // /* Block comment attached + // * to corge. Leading asterisks + // * will be removed. */ + // /* Block comment attached to + // * grault. */ + // optional int32 grault = 6; + bool NextWithComments(std::string* prev_trailing_comments, + std::vector<std::string>* detached_comments, + std::string* next_leading_comments); + + // Parse helpers --------------------------------------------------- + + // Parses a TYPE_FLOAT token. This never fails, so long as the text actually + // comes from a TYPE_FLOAT token parsed by Tokenizer. If it doesn't, the + // result is undefined (possibly an assert failure). + static double ParseFloat(const std::string& text); + + // Parses a TYPE_STRING token. This never fails, so long as the text actually + // comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the + // result is undefined (possibly an assert failure). + static void ParseString(const std::string& text, std::string* output); + + // Identical to ParseString, but appends to output. + static void ParseStringAppend(const std::string& text, std::string* output); + + // Parses a TYPE_INTEGER token. Returns false if the result would be + // greater than max_value. Otherwise, returns true and sets *output to the + // result. If the text is not from a Token of type TYPE_INTEGER originally + // parsed by a Tokenizer, the result is undefined (possibly an assert + // failure). + static bool ParseInteger(const std::string& text, uint64_t max_value, + uint64_t* output); + + // Options --------------------------------------------------------- + + // Set true to allow floats to be suffixed with the letter 'f'. Tokens + // which would otherwise be integers but which have the 'f' suffix will be + // forced to be interpreted as floats. For all other purposes, the 'f' is + // ignored. + void set_allow_f_after_float(bool value) { allow_f_after_float_ = value; } + + // Valid values for set_comment_style(). + enum CommentStyle { + // Line comments begin with "//", block comments are delimited by "/*" and + // "*/". + CPP_COMMENT_STYLE, + // Line comments begin with "#". No way to write block comments. + SH_COMMENT_STYLE + }; + + // Sets the comment style. + void set_comment_style(CommentStyle style) { comment_style_ = style; } + + // Whether to require whitespace between a number and a field name. + // Default is true. Do not use this; for Google-internal cleanup only. + void set_require_space_after_number(bool require) { + require_space_after_number_ = require; + } + + // Whether to allow string literals to span multiple lines. Default is false. + // Do not use this; for Google-internal cleanup only. + void set_allow_multiline_strings(bool allow) { + allow_multiline_strings_ = allow; + } + + // If true, whitespace tokens are reported by Next(). + // Note: `set_report_whitespace(false)` implies `set_report_newlines(false)`. + bool report_whitespace() const; + void set_report_whitespace(bool report); + + // If true, newline tokens are reported by Next(). + // Note: `set_report_newlines(true)` implies `set_report_whitespace(true)`. + bool report_newlines() const; + void set_report_newlines(bool report); + + // External helper: validate an identifier. + static bool IsIdentifier(const std::string& text); + + // ----------------------------------------------------------------- + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer); + + Token current_; // Returned by current(). + Token previous_; // Returned by previous(). + + ZeroCopyInputStream* input_; + ErrorCollector* error_collector_; + + char current_char_; // == buffer_[buffer_pos_], updated by NextChar(). + const char* buffer_; // Current buffer returned from input_. + int buffer_size_; // Size of buffer_. + int buffer_pos_; // Current position within the buffer. + bool read_error_; // Did we previously encounter a read error? + + // Line and column number of current_char_ within the whole input stream. + int line_; + ColumnNumber column_; + + // String to which text should be appended as we advance through it. + // Call RecordTo(&str) to start recording and StopRecording() to stop. + // E.g. StartToken() calls RecordTo(¤t_.text). record_start_ is the + // position within the current buffer where recording started. + std::string* record_target_; + int record_start_; + + // Options. + bool allow_f_after_float_; + CommentStyle comment_style_; + bool require_space_after_number_; + bool allow_multiline_strings_; + bool report_whitespace_ = false; + bool report_newlines_ = false; + + // Since we count columns we need to interpret tabs somehow. We'll take + // the standard 8-character definition for lack of any way to do better. + // This must match the documentation of ColumnNumber. + static const int kTabWidth = 8; + + // ----------------------------------------------------------------- + // Helper methods. + + // Consume this character and advance to the next one. + void NextChar(); + + // Read a new buffer from the input. + void Refresh(); + + inline void RecordTo(std::string* target); + inline void StopRecording(); + + // Called when the current character is the first character of a new + // token (not including whitespace or comments). + inline void StartToken(); + // Called when the current character is the first character after the + // end of the last token. After this returns, current_.text will + // contain all text consumed since StartToken() was called. + inline void EndToken(); + + // Convenience method to add an error at the current line and column. + void AddError(const std::string& message) { + error_collector_->AddError(line_, column_, message); + } + + // ----------------------------------------------------------------- + // The following four methods are used to consume tokens of specific + // types. They are actually used to consume all characters *after* + // the first, since the calling function consumes the first character + // in order to decide what kind of token is being read. + + // Read and consume a string, ending when the given delimiter is + // consumed. + void ConsumeString(char delimiter); + + // Read and consume a number, returning TYPE_FLOAT or TYPE_INTEGER + // depending on what was read. This needs to know if the first + // character was a zero in order to correctly recognize hex and octal + // numbers. + // It also needs to know if the first character was a . to parse floating + // point correctly. + TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot); + + // Consume the rest of a line. + void ConsumeLineComment(std::string* content); + // Consume until "*/". + void ConsumeBlockComment(std::string* content); + + enum NextCommentStatus { + // Started a line comment. + LINE_COMMENT, + + // Started a block comment. + BLOCK_COMMENT, + + // Consumed a slash, then realized it wasn't a comment. current_ has + // been filled in with a slash token. The caller should return it. + SLASH_NOT_COMMENT, + + // We do not appear to be starting a comment here. + NO_COMMENT + }; + + // If we're at the start of a new comment, consume it and return what kind + // of comment it is. + NextCommentStatus TryConsumeCommentStart(); + + // If we're looking at a TYPE_WHITESPACE token and `report_whitespace_` is + // true, consume it and return true. + bool TryConsumeWhitespace(); + + // If we're looking at a TYPE_NEWLINE token and `report_newlines_` is true, + // consume it and return true. + bool TryConsumeNewline(); + + // ----------------------------------------------------------------- + // These helper methods make the parsing code more readable. The + // "character classes" referred to are defined at the top of the .cc file. + // Basically it is a C++ class with one method: + // static bool InClass(char c); + // The method returns true if c is a member of this "class", like "Letter" + // or "Digit". + + // Returns true if the current character is of the given character + // class, but does not consume anything. + template <typename CharacterClass> + inline bool LookingAt(); + + // If the current character is in the given class, consume it and return + // true. Otherwise return false. + // e.g. TryConsumeOne<Letter>() + template <typename CharacterClass> + inline bool TryConsumeOne(); + + // Like above, but try to consume the specific character indicated. + inline bool TryConsume(char c); + + // Consume zero or more of the given character class. + template <typename CharacterClass> + inline void ConsumeZeroOrMore(); + + // Consume one or more of the given character class or log the given + // error message. + // e.g. ConsumeOneOrMore<Digit>("Expected digits."); + template <typename CharacterClass> + inline void ConsumeOneOrMore(const char* error); +}; + +// inline methods ==================================================== +inline const Tokenizer::Token& Tokenizer::current() { return current_; } + +inline const Tokenizer::Token& Tokenizer::previous() { return previous_; } + +inline void Tokenizer::ParseString(const std::string& text, + std::string* output) { + output->clear(); + ParseStringAppend(text, output); +} + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_TOKENIZER_H__ diff --git a/NorthstarDedicatedTest/include/protobuf/io/tokenizer_unittest.cc b/NorthstarDedicatedTest/include/protobuf/io/tokenizer_unittest.cc new file mode 100644 index 00000000..87e23677 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/tokenizer_unittest.cc @@ -0,0 +1,1073 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#include <io/tokenizer.h> + +#include <limits.h> +#include <math.h> + +#include <vector> + +#include <stubs/common.h> +#include <stubs/logging.h> +#include <stubs/strutil.h> +#include <stubs/substitute.h> +#include <io/zero_copy_stream_impl.h> +#include <testing/googletest.h> +#include <gtest/gtest.h> + +namespace google { +namespace protobuf { +namespace io { +namespace { + +// =================================================================== +// Data-Driven Test Infrastructure + +// TODO(kenton): This is copied from coded_stream_unittest. This is +// temporary until these features are integrated into gTest itself. + +// TEST_1D and TEST_2D are macros I'd eventually like to see added to +// gTest. These macros can be used to declare tests which should be +// run multiple times, once for each item in some input array. TEST_1D +// tests all cases in a single input array. TEST_2D tests all +// combinations of cases from two arrays. The arrays must be statically +// defined such that the GOOGLE_ARRAYSIZE() macro works on them. Example: +// +// int kCases[] = {1, 2, 3, 4} +// TEST_1D(MyFixture, MyTest, kCases) { +// EXPECT_GT(kCases_case, 0); +// } +// +// This test iterates through the numbers 1, 2, 3, and 4 and tests that +// they are all grater than zero. In case of failure, the exact case +// which failed will be printed. The case type must be printable using +// ostream::operator<<. + +#define TEST_1D(FIXTURE, NAME, CASES) \ + class FIXTURE##_##NAME##_DD : public FIXTURE { \ + protected: \ + template <typename CaseType> \ + void DoSingleCase(const CaseType& CASES##_case); \ + }; \ + \ + TEST_F(FIXTURE##_##NAME##_DD, NAME) { \ + for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES); i++) { \ + SCOPED_TRACE(testing::Message() \ + << #CASES " case #" << i << ": " << CASES[i]); \ + DoSingleCase(CASES[i]); \ + } \ + } \ + \ + template <typename CaseType> \ + void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case) + +#define TEST_2D(FIXTURE, NAME, CASES1, CASES2) \ + class FIXTURE##_##NAME##_DD : public FIXTURE { \ + protected: \ + template <typename CaseType1, typename CaseType2> \ + void DoSingleCase(const CaseType1& CASES1##_case, \ + const CaseType2& CASES2##_case); \ + }; \ + \ + TEST_F(FIXTURE##_##NAME##_DD, NAME) { \ + for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES1); i++) { \ + for (int j = 0; j < GOOGLE_ARRAYSIZE(CASES2); j++) { \ + SCOPED_TRACE(testing::Message() \ + << #CASES1 " case #" << i << ": " << CASES1[i] << ", " \ + << #CASES2 " case #" << j << ": " << CASES2[j]); \ + DoSingleCase(CASES1[i], CASES2[j]); \ + } \ + } \ + } \ + \ + template <typename CaseType1, typename CaseType2> \ + void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \ + const CaseType2& CASES2##_case) + +// ------------------------------------------------------------------- + +// An input stream that is basically like an ArrayInputStream but sometimes +// returns empty buffers, just to throw us off. +class TestInputStream : public ZeroCopyInputStream { + public: + TestInputStream(const void* data, int size, int block_size) + : array_stream_(data, size, block_size), counter_(0) {} + ~TestInputStream() {} + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override { + // We'll return empty buffers starting with the first buffer, and every + // 3 and 5 buffers after that. + if (counter_ % 3 == 0 || counter_ % 5 == 0) { + *data = NULL; + *size = 0; + ++counter_; + return true; + } else { + ++counter_; + return array_stream_.Next(data, size); + } + } + + void BackUp(int count) override { return array_stream_.BackUp(count); } + bool Skip(int count) override { return array_stream_.Skip(count); } + int64_t ByteCount() const override { return array_stream_.ByteCount(); } + + private: + ArrayInputStream array_stream_; + int counter_; +}; + +// ------------------------------------------------------------------- + +// An error collector which simply concatenates all its errors into a big +// block of text which can be checked. +class TestErrorCollector : public ErrorCollector { + public: + TestErrorCollector() {} + ~TestErrorCollector() {} + + std::string text_; + + // implements ErrorCollector --------------------------------------- + void AddError(int line, int column, const std::string& message) { + strings::SubstituteAndAppend(&text_, "$0:$1: $2\n", line, column, message); + } +}; + +// ------------------------------------------------------------------- + +// We test each operation over a variety of block sizes to insure that +// we test cases where reads cross buffer boundaries as well as cases +// where they don't. This is sort of a brute-force approach to this, +// but it's easy to write and easy to understand. +const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024}; + +class TokenizerTest : public testing::Test { + protected: + // For easy testing. + uint64 ParseInteger(const std::string& text) { + uint64 result; + EXPECT_TRUE(Tokenizer::ParseInteger(text, kuint64max, &result)); + return result; + } +}; + +// =================================================================== + +// These tests causes gcc 3.3.5 (and earlier?) to give the cryptic error: +// "sorry, unimplemented: `method_call_expr' not supported by dump_expr" +#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3) + +// In each test case, the entire input text should parse as a single token +// of the given type. +struct SimpleTokenCase { + std::string input; + Tokenizer::TokenType type; +}; + +inline std::ostream& operator<<(std::ostream& out, + const SimpleTokenCase& test_case) { + return out << CEscape(test_case.input); +} + +SimpleTokenCase kSimpleTokenCases[] = { + // Test identifiers. + {"hello", Tokenizer::TYPE_IDENTIFIER}, + + // Test integers. + {"123", Tokenizer::TYPE_INTEGER}, + {"0xab6", Tokenizer::TYPE_INTEGER}, + {"0XAB6", Tokenizer::TYPE_INTEGER}, + {"0X1234567", Tokenizer::TYPE_INTEGER}, + {"0x89abcdef", Tokenizer::TYPE_INTEGER}, + {"0x89ABCDEF", Tokenizer::TYPE_INTEGER}, + {"01234567", Tokenizer::TYPE_INTEGER}, + + // Test floats. + {"123.45", Tokenizer::TYPE_FLOAT}, + {"1.", Tokenizer::TYPE_FLOAT}, + {"1e3", Tokenizer::TYPE_FLOAT}, + {"1E3", Tokenizer::TYPE_FLOAT}, + {"1e-3", Tokenizer::TYPE_FLOAT}, + {"1e+3", Tokenizer::TYPE_FLOAT}, + {"1.e3", Tokenizer::TYPE_FLOAT}, + {"1.2e3", Tokenizer::TYPE_FLOAT}, + {".1", Tokenizer::TYPE_FLOAT}, + {".1e3", Tokenizer::TYPE_FLOAT}, + {".1e-3", Tokenizer::TYPE_FLOAT}, + {".1e+3", Tokenizer::TYPE_FLOAT}, + + // Test strings. + {"'hello'", Tokenizer::TYPE_STRING}, + {"\"foo\"", Tokenizer::TYPE_STRING}, + {"'a\"b'", Tokenizer::TYPE_STRING}, + {"\"a'b\"", Tokenizer::TYPE_STRING}, + {"'a\\'b'", Tokenizer::TYPE_STRING}, + {"\"a\\\"b\"", Tokenizer::TYPE_STRING}, + {"'\\xf'", Tokenizer::TYPE_STRING}, + {"'\\0'", Tokenizer::TYPE_STRING}, + + // Test symbols. + {"+", Tokenizer::TYPE_SYMBOL}, + {".", Tokenizer::TYPE_SYMBOL}, +}; + +TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) { + // Set up the tokenizer. + TestInputStream input(kSimpleTokenCases_case.input.data(), + kSimpleTokenCases_case.input.size(), kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + + // Before Next() is called, the initial token should always be TYPE_START. + EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type); + EXPECT_EQ("", tokenizer.current().text); + EXPECT_EQ(0, tokenizer.current().line); + EXPECT_EQ(0, tokenizer.current().column); + EXPECT_EQ(0, tokenizer.current().end_column); + + // Parse the token. + ASSERT_TRUE(tokenizer.Next()); + + // Check that it has the right type. + EXPECT_EQ(kSimpleTokenCases_case.type, tokenizer.current().type); + // Check that it contains the complete input text. + EXPECT_EQ(kSimpleTokenCases_case.input, tokenizer.current().text); + // Check that it is located at the beginning of the input + EXPECT_EQ(0, tokenizer.current().line); + EXPECT_EQ(0, tokenizer.current().column); + EXPECT_EQ(kSimpleTokenCases_case.input.size(), + tokenizer.current().end_column); + + // There should be no more input. + EXPECT_FALSE(tokenizer.Next()); + + // After Next() returns false, the token should have type TYPE_END. + EXPECT_EQ(Tokenizer::TYPE_END, tokenizer.current().type); + EXPECT_EQ("", tokenizer.current().text); + EXPECT_EQ(0, tokenizer.current().line); + EXPECT_EQ(kSimpleTokenCases_case.input.size(), tokenizer.current().column); + EXPECT_EQ(kSimpleTokenCases_case.input.size(), + tokenizer.current().end_column); + + // There should be no errors. + EXPECT_TRUE(error_collector.text_.empty()); +} + +TEST_1D(TokenizerTest, FloatSuffix, kBlockSizes) { + // Test the "allow_f_after_float" option. + + // Set up the tokenizer. + const char* text = "1f 2.5f 6e3f 7F"; + TestInputStream input(text, strlen(text), kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + tokenizer.set_allow_f_after_float(true); + + // Advance through tokens and check that they are parsed as expected. + ASSERT_TRUE(tokenizer.Next()); + EXPECT_EQ(tokenizer.current().text, "1f"); + EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); + ASSERT_TRUE(tokenizer.Next()); + EXPECT_EQ(tokenizer.current().text, "2.5f"); + EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); + ASSERT_TRUE(tokenizer.Next()); + EXPECT_EQ(tokenizer.current().text, "6e3f"); + EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); + ASSERT_TRUE(tokenizer.Next()); + EXPECT_EQ(tokenizer.current().text, "7F"); + EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); + + // There should be no more input. + EXPECT_FALSE(tokenizer.Next()); + // There should be no errors. + EXPECT_TRUE(error_collector.text_.empty()); +} + +SimpleTokenCase kWhitespaceTokenCases[] = { + {" ", Tokenizer::TYPE_WHITESPACE}, + {" ", Tokenizer::TYPE_WHITESPACE}, + {"\t", Tokenizer::TYPE_WHITESPACE}, + {"\v", Tokenizer::TYPE_WHITESPACE}, + {"\t ", Tokenizer::TYPE_WHITESPACE}, + {"\v\t", Tokenizer::TYPE_WHITESPACE}, + {" \t\r", Tokenizer::TYPE_WHITESPACE}, + // Newlines: + {"\n", Tokenizer::TYPE_NEWLINE}, +}; + +TEST_2D(TokenizerTest, Whitespace, kWhitespaceTokenCases, kBlockSizes) { + { + TestInputStream input(kWhitespaceTokenCases_case.input.data(), + kWhitespaceTokenCases_case.input.size(), + kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + + EXPECT_FALSE(tokenizer.Next()); + } + { + TestInputStream input(kWhitespaceTokenCases_case.input.data(), + kWhitespaceTokenCases_case.input.size(), + kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + tokenizer.set_report_whitespace(true); + tokenizer.set_report_newlines(true); + + ASSERT_TRUE(tokenizer.Next()); + EXPECT_EQ(tokenizer.current().text, kWhitespaceTokenCases_case.input); + EXPECT_EQ(tokenizer.current().type, kWhitespaceTokenCases_case.type); + + EXPECT_FALSE(tokenizer.Next()); + } +} + +#endif + +// ------------------------------------------------------------------- + +// In each case, the input is parsed to produce a list of tokens. The +// last token in "output" must have type TYPE_END. +struct MultiTokenCase { + std::string input; + std::vector<Tokenizer::Token> output; +}; + +inline std::ostream& operator<<(std::ostream& out, + const MultiTokenCase& test_case) { + return out << CEscape(test_case.input); +} + +MultiTokenCase kMultiTokenCases[] = { + // Test empty input. + {"", + { + {Tokenizer::TYPE_END, "", 0, 0, 0}, + }}, + + // Test all token types at the same time. + {"foo 1 1.2 + 'bar'", + { + {Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3}, + {Tokenizer::TYPE_INTEGER, "1", 0, 4, 5}, + {Tokenizer::TYPE_FLOAT, "1.2", 0, 6, 9}, + {Tokenizer::TYPE_SYMBOL, "+", 0, 10, 11}, + {Tokenizer::TYPE_STRING, "'bar'", 0, 12, 17}, + {Tokenizer::TYPE_END, "", 0, 17, 17}, + }}, + + // Test that consecutive symbols are parsed as separate tokens. + {"!@+%", + { + {Tokenizer::TYPE_SYMBOL, "!", 0, 0, 1}, + {Tokenizer::TYPE_SYMBOL, "@", 0, 1, 2}, + {Tokenizer::TYPE_SYMBOL, "+", 0, 2, 3}, + {Tokenizer::TYPE_SYMBOL, "%", 0, 3, 4}, + {Tokenizer::TYPE_END, "", 0, 4, 4}, + }}, + + // Test that newlines affect line numbers correctly. + {"foo bar\nrab oof", + { + {Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3}, + {Tokenizer::TYPE_IDENTIFIER, "bar", 0, 4, 7}, + {Tokenizer::TYPE_IDENTIFIER, "rab", 1, 0, 3}, + {Tokenizer::TYPE_IDENTIFIER, "oof", 1, 4, 7}, + {Tokenizer::TYPE_END, "", 1, 7, 7}, + }}, + + // Test that tabs affect column numbers correctly. + {"foo\tbar \tbaz", + { + {Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3}, + {Tokenizer::TYPE_IDENTIFIER, "bar", 0, 8, 11}, + {Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16, 19}, + {Tokenizer::TYPE_END, "", 0, 19, 19}, + }}, + + // Test that tabs in string literals affect column numbers correctly. + {"\"foo\tbar\" baz", + { + {Tokenizer::TYPE_STRING, "\"foo\tbar\"", 0, 0, 12}, + {Tokenizer::TYPE_IDENTIFIER, "baz", 0, 13, 16}, + {Tokenizer::TYPE_END, "", 0, 16, 16}, + }}, + + // Test that line comments are ignored. + {"foo // This is a comment\n" + "bar // This is another comment", + { + {Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3}, + {Tokenizer::TYPE_IDENTIFIER, "bar", 1, 0, 3}, + {Tokenizer::TYPE_END, "", 1, 30, 30}, + }}, + + // Test that block comments are ignored. + {"foo /* This is a block comment */ bar", + { + {Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3}, + {Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34, 37}, + {Tokenizer::TYPE_END, "", 0, 37, 37}, + }}, + + // Test that sh-style comments are not ignored by default. + {"foo # bar\n" + "baz", + { + {Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3}, + {Tokenizer::TYPE_SYMBOL, "#", 0, 4, 5}, + {Tokenizer::TYPE_IDENTIFIER, "bar", 0, 6, 9}, + {Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0, 3}, + {Tokenizer::TYPE_END, "", 1, 3, 3}, + }}, + + // Test all whitespace chars + {"foo\n\t\r\v\fbar", + { + {Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3}, + {Tokenizer::TYPE_IDENTIFIER, "bar", 1, 11, 14}, + {Tokenizer::TYPE_END, "", 1, 14, 14}, + }}, +}; + +TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) { + // Set up the tokenizer. + TestInputStream input(kMultiTokenCases_case.input.data(), + kMultiTokenCases_case.input.size(), kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + + // Before Next() is called, the initial token should always be TYPE_START. + EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type); + EXPECT_EQ("", tokenizer.current().text); + EXPECT_EQ(0, tokenizer.current().line); + EXPECT_EQ(0, tokenizer.current().column); + EXPECT_EQ(0, tokenizer.current().end_column); + + // Loop through all expected tokens. + int i = 0; + Tokenizer::Token token; + do { + token = kMultiTokenCases_case.output[i++]; + + SCOPED_TRACE(testing::Message() << "Token #" << i << ": " << token.text); + + Tokenizer::Token previous = tokenizer.current(); + + // Next() should only return false when it hits the end token. + if (token.type != Tokenizer::TYPE_END) { + ASSERT_TRUE(tokenizer.Next()); + } else { + ASSERT_FALSE(tokenizer.Next()); + } + + // Check that the previous token is set correctly. + EXPECT_EQ(previous.type, tokenizer.previous().type); + EXPECT_EQ(previous.text, tokenizer.previous().text); + EXPECT_EQ(previous.line, tokenizer.previous().line); + EXPECT_EQ(previous.column, tokenizer.previous().column); + EXPECT_EQ(previous.end_column, tokenizer.previous().end_column); + + // Check that the token matches the expected one. + EXPECT_EQ(token.type, tokenizer.current().type); + EXPECT_EQ(token.text, tokenizer.current().text); + EXPECT_EQ(token.line, tokenizer.current().line); + EXPECT_EQ(token.column, tokenizer.current().column); + EXPECT_EQ(token.end_column, tokenizer.current().end_column); + + } while (token.type != Tokenizer::TYPE_END); + + // There should be no errors. + EXPECT_TRUE(error_collector.text_.empty()); +} + +MultiTokenCase kMultiWhitespaceTokenCases[] = { + // Test all token types at the same time. + {"foo 1 \t1.2 \n +\v'bar'", + { + {Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3}, + {Tokenizer::TYPE_WHITESPACE, " ", 0, 3, 4}, + {Tokenizer::TYPE_INTEGER, "1", 0, 4, 5}, + {Tokenizer::TYPE_WHITESPACE, " \t", 0, 5, 8}, + {Tokenizer::TYPE_FLOAT, "1.2", 0, 8, 11}, + {Tokenizer::TYPE_WHITESPACE, " ", 0, 11, 13}, + {Tokenizer::TYPE_NEWLINE, "\n", 0, 13, 0}, + {Tokenizer::TYPE_WHITESPACE, " ", 1, 0, 3}, + {Tokenizer::TYPE_SYMBOL, "+", 1, 3, 4}, + {Tokenizer::TYPE_WHITESPACE, "\v", 1, 4, 5}, + {Tokenizer::TYPE_STRING, "'bar'", 1, 5, 10}, + {Tokenizer::TYPE_END, "", 1, 10, 10}, + }}, + +}; + +TEST_2D(TokenizerTest, MultipleWhitespaceTokens, kMultiWhitespaceTokenCases, + kBlockSizes) { + // Set up the tokenizer. + TestInputStream input(kMultiWhitespaceTokenCases_case.input.data(), + kMultiWhitespaceTokenCases_case.input.size(), + kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + tokenizer.set_report_whitespace(true); + tokenizer.set_report_newlines(true); + + // Before Next() is called, the initial token should always be TYPE_START. + EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type); + EXPECT_EQ("", tokenizer.current().text); + EXPECT_EQ(0, tokenizer.current().line); + EXPECT_EQ(0, tokenizer.current().column); + EXPECT_EQ(0, tokenizer.current().end_column); + + // Loop through all expected tokens. + int i = 0; + Tokenizer::Token token; + do { + token = kMultiWhitespaceTokenCases_case.output[i++]; + + SCOPED_TRACE(testing::Message() << "Token #" << i << ": " << token.text); + + Tokenizer::Token previous = tokenizer.current(); + + // Next() should only return false when it hits the end token. + if (token.type != Tokenizer::TYPE_END) { + ASSERT_TRUE(tokenizer.Next()); + } else { + ASSERT_FALSE(tokenizer.Next()); + } + + // Check that the previous token is set correctly. + EXPECT_EQ(previous.type, tokenizer.previous().type); + EXPECT_EQ(previous.text, tokenizer.previous().text); + EXPECT_EQ(previous.line, tokenizer.previous().line); + EXPECT_EQ(previous.column, tokenizer.previous().column); + EXPECT_EQ(previous.end_column, tokenizer.previous().end_column); + + // Check that the token matches the expected one. + EXPECT_EQ(token.type, tokenizer.current().type); + EXPECT_EQ(token.text, tokenizer.current().text); + EXPECT_EQ(token.line, tokenizer.current().line); + EXPECT_EQ(token.column, tokenizer.current().column); + EXPECT_EQ(token.end_column, tokenizer.current().end_column); + + } while (token.type != Tokenizer::TYPE_END); + + // There should be no errors. + EXPECT_TRUE(error_collector.text_.empty()); +} + +// This test causes gcc 3.3.5 (and earlier?) to give the cryptic error: +// "sorry, unimplemented: `method_call_expr' not supported by dump_expr" +#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3) + +TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) { + // Test the "comment_style" option. + + const char* text = + "foo # bar\n" + "baz // qux\n" + "corge /* grault */\n" + "garply"; + const char* const kTokens[] = {"foo", // "# bar" is ignored + "baz", "/", "/", "qux", "corge", "/", + "*", "grault", "*", "/", "garply"}; + + // Set up the tokenizer. + TestInputStream input(text, strlen(text), kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + tokenizer.set_comment_style(Tokenizer::SH_COMMENT_STYLE); + + // Advance through tokens and check that they are parsed as expected. + for (int i = 0; i < GOOGLE_ARRAYSIZE(kTokens); i++) { + EXPECT_TRUE(tokenizer.Next()); + EXPECT_EQ(tokenizer.current().text, kTokens[i]); + } + + // There should be no more input. + EXPECT_FALSE(tokenizer.Next()); + // There should be no errors. + EXPECT_TRUE(error_collector.text_.empty()); +} + +#endif + +// ------------------------------------------------------------------- + +// In each case, the input is expected to have two tokens named "prev" and +// "next" with comments in between. +struct DocCommentCase { + std::string input; + + const char* prev_trailing_comments; + const char* detached_comments[10]; + const char* next_leading_comments; +}; + +inline std::ostream& operator<<(std::ostream& out, + const DocCommentCase& test_case) { + return out << CEscape(test_case.input); +} + +DocCommentCase kDocCommentCases[] = { + {"prev next", + + "", + {}, + ""}, + + {"prev /* ignored */ next", + + "", + {}, + ""}, + + {"prev // trailing comment\n" + "next", + + " trailing comment\n", + {}, + ""}, + + {"prev\n" + "// leading comment\n" + "// line 2\n" + "next", + + "", + {}, + " leading comment\n" + " line 2\n"}, + + {"prev\n" + "// trailing comment\n" + "// line 2\n" + "\n" + "next", + + " trailing comment\n" + " line 2\n", + {}, + ""}, + + {"prev // trailing comment\n" + "// leading comment\n" + "// line 2\n" + "next", + + " trailing comment\n", + {}, + " leading comment\n" + " line 2\n"}, + + {"prev /* trailing block comment */\n" + "/* leading block comment\n" + " * line 2\n" + " * line 3 */" + "next", + + " trailing block comment ", + {}, + " leading block comment\n" + " line 2\n" + " line 3 "}, + + {"prev\n" + "/* trailing block comment\n" + " * line 2\n" + " * line 3\n" + " */\n" + "/* leading block comment\n" + " * line 2\n" + " * line 3 */" + "next", + + " trailing block comment\n" + " line 2\n" + " line 3\n", + {}, + " leading block comment\n" + " line 2\n" + " line 3 "}, + + {"prev\n" + "// trailing comment\n" + "\n" + "// detached comment\n" + "// line 2\n" + "\n" + "// second detached comment\n" + "/* third detached comment\n" + " * line 2 */\n" + "// leading comment\n" + "next", + + " trailing comment\n", + {" detached comment\n" + " line 2\n", + " second detached comment\n", + " third detached comment\n" + " line 2 "}, + " leading comment\n"}, + + {"prev /**/\n" + "\n" + "// detached comment\n" + "\n" + "// leading comment\n" + "next", + + "", + {" detached comment\n"}, + " leading comment\n"}, + + {"prev /**/\n" + "// leading comment\n" + "next", + + "", + {}, + " leading comment\n"}, +}; + +TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) { + // Set up the tokenizer. + TestInputStream input(kDocCommentCases_case.input.data(), + kDocCommentCases_case.input.size(), kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + + // Set up a second tokenizer where we'll pass all NULLs to NextWithComments(). + TestInputStream input2(kDocCommentCases_case.input.data(), + kDocCommentCases_case.input.size(), kBlockSizes_case); + Tokenizer tokenizer2(&input2, &error_collector); + + tokenizer.Next(); + tokenizer2.Next(); + + EXPECT_EQ("prev", tokenizer.current().text); + EXPECT_EQ("prev", tokenizer2.current().text); + + std::string prev_trailing_comments; + std::vector<std::string> detached_comments; + std::string next_leading_comments; + tokenizer.NextWithComments(&prev_trailing_comments, &detached_comments, + &next_leading_comments); + tokenizer2.NextWithComments(NULL, NULL, NULL); + EXPECT_EQ("next", tokenizer.current().text); + EXPECT_EQ("next", tokenizer2.current().text); + + EXPECT_EQ(kDocCommentCases_case.prev_trailing_comments, + prev_trailing_comments); + + for (int i = 0; i < detached_comments.size(); i++) { + ASSERT_LT(i, GOOGLE_ARRAYSIZE(kDocCommentCases)); + ASSERT_TRUE(kDocCommentCases_case.detached_comments[i] != NULL); + EXPECT_EQ(kDocCommentCases_case.detached_comments[i], detached_comments[i]); + } + + // Verify that we matched all the detached comments. + EXPECT_EQ(NULL, + kDocCommentCases_case.detached_comments[detached_comments.size()]); + + EXPECT_EQ(kDocCommentCases_case.next_leading_comments, next_leading_comments); +} + +// ------------------------------------------------------------------- + +// Test parse helpers. It's not really worth setting up a full data-driven +// test here. +TEST_F(TokenizerTest, ParseInteger) { + EXPECT_EQ(0, ParseInteger("0")); + EXPECT_EQ(123, ParseInteger("123")); + EXPECT_EQ(0xabcdef12u, ParseInteger("0xabcdef12")); + EXPECT_EQ(0xabcdef12u, ParseInteger("0xABCDEF12")); + EXPECT_EQ(kuint64max, ParseInteger("0xFFFFFFFFFFFFFFFF")); + EXPECT_EQ(01234567, ParseInteger("01234567")); + EXPECT_EQ(0X123, ParseInteger("0X123")); + + // Test invalid integers that may still be tokenized as integers. + EXPECT_EQ(0, ParseInteger("0x")); + + uint64 i; + + // Test invalid integers that will never be tokenized as integers. + EXPECT_FALSE(Tokenizer::ParseInteger("zxy", kuint64max, &i)); + EXPECT_FALSE(Tokenizer::ParseInteger("1.2", kuint64max, &i)); + EXPECT_FALSE(Tokenizer::ParseInteger("08", kuint64max, &i)); + EXPECT_FALSE(Tokenizer::ParseInteger("0xg", kuint64max, &i)); + EXPECT_FALSE(Tokenizer::ParseInteger("-1", kuint64max, &i)); + + // Test overflows. + EXPECT_TRUE(Tokenizer::ParseInteger("0", 0, &i)); + EXPECT_FALSE(Tokenizer::ParseInteger("1", 0, &i)); + EXPECT_TRUE(Tokenizer::ParseInteger("1", 1, &i)); + EXPECT_TRUE(Tokenizer::ParseInteger("12345", 12345, &i)); + EXPECT_FALSE(Tokenizer::ParseInteger("12346", 12345, &i)); + EXPECT_TRUE(Tokenizer::ParseInteger("0xFFFFFFFFFFFFFFFF", kuint64max, &i)); + EXPECT_FALSE(Tokenizer::ParseInteger("0x10000000000000000", kuint64max, &i)); +} + +TEST_F(TokenizerTest, ParseFloat) { + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.")); + EXPECT_DOUBLE_EQ(1e3, Tokenizer::ParseFloat("1e3")); + EXPECT_DOUBLE_EQ(1e3, Tokenizer::ParseFloat("1E3")); + EXPECT_DOUBLE_EQ(1.5e3, Tokenizer::ParseFloat("1.5e3")); + EXPECT_DOUBLE_EQ(.1, Tokenizer::ParseFloat(".1")); + EXPECT_DOUBLE_EQ(.25, Tokenizer::ParseFloat(".25")); + EXPECT_DOUBLE_EQ(.1e3, Tokenizer::ParseFloat(".1e3")); + EXPECT_DOUBLE_EQ(.25e3, Tokenizer::ParseFloat(".25e3")); + EXPECT_DOUBLE_EQ(.1e+3, Tokenizer::ParseFloat(".1e+3")); + EXPECT_DOUBLE_EQ(.1e-3, Tokenizer::ParseFloat(".1e-3")); + EXPECT_DOUBLE_EQ(5, Tokenizer::ParseFloat("5")); + EXPECT_DOUBLE_EQ(6e-12, Tokenizer::ParseFloat("6e-12")); + EXPECT_DOUBLE_EQ(1.2, Tokenizer::ParseFloat("1.2")); + EXPECT_DOUBLE_EQ(1.e2, Tokenizer::ParseFloat("1.e2")); + + // Test invalid integers that may still be tokenized as integers. + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e")); + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e-")); + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.e")); + + // Test 'f' suffix. + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1f")); + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.0f")); + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1F")); + + // These should parse successfully even though they are out of range. + // Overflows become infinity and underflows become zero. + EXPECT_EQ(0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999")); + EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999")); + +#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet + // Test invalid integers that will never be tokenized as integers. + EXPECT_DEBUG_DEATH( + Tokenizer::ParseFloat("zxy"), + "passed text that could not have been tokenized as a float"); + EXPECT_DEBUG_DEATH( + Tokenizer::ParseFloat("1-e0"), + "passed text that could not have been tokenized as a float"); + EXPECT_DEBUG_DEATH( + Tokenizer::ParseFloat("-1.0"), + "passed text that could not have been tokenized as a float"); +#endif // PROTOBUF_HAS_DEATH_TEST +} + +TEST_F(TokenizerTest, ParseString) { + std::string output; + Tokenizer::ParseString("'hello'", &output); + EXPECT_EQ("hello", output); + Tokenizer::ParseString("\"blah\\nblah2\"", &output); + EXPECT_EQ("blah\nblah2", output); + Tokenizer::ParseString("'\\1x\\1\\123\\739\\52\\334n\\3'", &output); + EXPECT_EQ("\1x\1\123\739\52\334n\3", output); + Tokenizer::ParseString("'\\x20\\x4'", &output); + EXPECT_EQ("\x20\x4", output); + + // Test invalid strings that may still be tokenized as strings. + Tokenizer::ParseString("\"\\a\\l\\v\\t", &output); // \l is invalid + EXPECT_EQ("\a?\v\t", output); + Tokenizer::ParseString("'", &output); + EXPECT_EQ("", output); + Tokenizer::ParseString("'\\", &output); + EXPECT_EQ("\\", output); + + // Experiment with Unicode escapes. Here are one-, two- and three-byte Unicode + // characters. + Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\U00024b62XX'", &output); + EXPECT_EQ("$¢€ð¤¢XX", output); + // Same thing encoded using UTF16. + Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\ud852\\udf62XX'", &output); + EXPECT_EQ("$¢€ð¤¢XX", output); + // Here's some broken UTF16; there's a head surrogate with no tail surrogate. + // We just output this as if it were UTF8; it's not a defined code point, but + // it has a defined encoding. + Tokenizer::ParseString("'\\ud852XX'", &output); + EXPECT_EQ("\xed\xa1\x92XX", output); + // Malformed escape: Demons may fly out of the nose. + Tokenizer::ParseString("'\\u0'", &output); + EXPECT_EQ("u0", output); + // Beyond the range of valid UTF-32 code units. + Tokenizer::ParseString("'\\U00110000\\U00200000\\UFFFFFFFF'", &output); + EXPECT_EQ("\\U00110000\\U00200000\\Uffffffff", output); + + // Test invalid strings that will never be tokenized as strings. +#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet + EXPECT_DEBUG_DEATH( + Tokenizer::ParseString("", &output), + "passed text that could not have been tokenized as a string"); +#endif // PROTOBUF_HAS_DEATH_TEST +} + +TEST_F(TokenizerTest, ParseStringAppend) { + // Check that ParseString and ParseStringAppend differ. + std::string output("stuff+"); + Tokenizer::ParseStringAppend("'hello'", &output); + EXPECT_EQ("stuff+hello", output); + Tokenizer::ParseString("'hello'", &output); + EXPECT_EQ("hello", output); +} + +// ------------------------------------------------------------------- + +// Each case parses some input text, ignoring the tokens produced, and +// checks that the error output matches what is expected. +struct ErrorCase { + std::string input; + bool recoverable; // True if the tokenizer should be able to recover and + // parse more tokens after seeing this error. Cases + // for which this is true must end with "foo" as + // the last token, which the test will check for. + const char* errors; +}; + +inline std::ostream& operator<<(std::ostream& out, const ErrorCase& test_case) { + return out << CEscape(test_case.input); +} + +ErrorCase kErrorCases[] = { + // String errors. + {"'\\l' foo", true, "0:2: Invalid escape sequence in string literal.\n"}, + {"'\\X' foo", true, "0:2: Invalid escape sequence in string literal.\n"}, + {"'\\x' foo", true, "0:3: Expected hex digits for escape sequence.\n"}, + {"'foo", false, "0:4: Unexpected end of string.\n"}, + {"'bar\nfoo", true, "0:4: String literals cannot cross line boundaries.\n"}, + {"'\\u01' foo", true, + "0:5: Expected four hex digits for \\u escape sequence.\n"}, + {"'\\u01' foo", true, + "0:5: Expected four hex digits for \\u escape sequence.\n"}, + {"'\\uXYZ' foo", true, + "0:3: Expected four hex digits for \\u escape sequence.\n"}, + + // Integer errors. + {"123foo", true, "0:3: Need space between number and identifier.\n"}, + + // Hex/octal errors. + {"0x foo", true, "0:2: \"0x\" must be followed by hex digits.\n"}, + {"0541823 foo", true, + "0:4: Numbers starting with leading zero must be in octal.\n"}, + {"0x123z foo", true, "0:5: Need space between number and identifier.\n"}, + {"0x123.4 foo", true, "0:5: Hex and octal numbers must be integers.\n"}, + {"0123.4 foo", true, "0:4: Hex and octal numbers must be integers.\n"}, + + // Float errors. + {"1e foo", true, "0:2: \"e\" must be followed by exponent.\n"}, + {"1e- foo", true, "0:3: \"e\" must be followed by exponent.\n"}, + {"1.2.3 foo", true, + "0:3: Already saw decimal point or exponent; can't have another one.\n"}, + {"1e2.3 foo", true, + "0:3: Already saw decimal point or exponent; can't have another one.\n"}, + {"a.1 foo", true, + "0:1: Need space between identifier and decimal point.\n"}, + // allow_f_after_float not enabled, so this should be an error. + {"1.0f foo", true, "0:3: Need space between number and identifier.\n"}, + + // Block comment errors. + {"/*", false, + "0:2: End-of-file inside block comment.\n" + "0:0: Comment started here.\n"}, + {"/*/*/ foo", true, + "0:3: \"/*\" inside block comment. Block comments cannot be nested.\n"}, + + // Control characters. Multiple consecutive control characters should only + // produce one error. + {"\b foo", true, "0:0: Invalid control characters encountered in text.\n"}, + {"\b\b foo", true, + "0:0: Invalid control characters encountered in text.\n"}, + + // Check that control characters at end of input don't result in an + // infinite loop. + {"\b", false, "0:0: Invalid control characters encountered in text.\n"}, + + // Check recovery from '\0'. We have to explicitly specify the length of + // these strings because otherwise the string constructor will just call + // strlen() which will see the first '\0' and think that is the end of the + // string. + {std::string("\0foo", 4), true, + "0:0: Invalid control characters encountered in text.\n"}, + {std::string("\0\0foo", 5), true, + "0:0: Invalid control characters encountered in text.\n"}, + + // Check error from high order bits set + {"\300foo", true, "0:0: Interpreting non ascii codepoint 192.\n"}, +}; + +TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) { + // Set up the tokenizer. + TestInputStream input(kErrorCases_case.input.data(), + kErrorCases_case.input.size(), kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + + // Ignore all input, except remember if the last token was "foo". + bool last_was_foo = false; + while (tokenizer.Next()) { + last_was_foo = tokenizer.current().text == "foo"; + } + + // Check that the errors match what was expected. + EXPECT_EQ(kErrorCases_case.errors, error_collector.text_); + + // If the error was recoverable, make sure we saw "foo" after it. + if (kErrorCases_case.recoverable) { + EXPECT_TRUE(last_was_foo); + } +} + +// ------------------------------------------------------------------- + +TEST_1D(TokenizerTest, BackUpOnDestruction, kBlockSizes) { + std::string text = "foo bar"; + TestInputStream input(text.data(), text.size(), kBlockSizes_case); + + // Create a tokenizer, read one token, then destroy it. + { + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + + tokenizer.Next(); + } + + // Only "foo" should have been read. + EXPECT_EQ(strlen("foo"), input.ByteCount()); +} + + +} // namespace +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream.cc b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream.cc new file mode 100644 index 00000000..a44c87b6 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream.cc @@ -0,0 +1,55 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#include <io/zero_copy_stream.h> + +#include <stubs/logging.h> +#include <stubs/common.h> + +namespace google { +namespace protobuf { +namespace io { + + +bool ZeroCopyOutputStream::WriteAliasedRaw(const void* /* data */, + int /* size */) { + GOOGLE_LOG(FATAL) << "This ZeroCopyOutputStream doesn't support aliasing. " + "Reaching here usually means a ZeroCopyOutputStream " + "implementation bug."; + return false; +} + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream.h b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream.h new file mode 100644 index 00000000..a29f75c0 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream.h @@ -0,0 +1,253 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains the ZeroCopyInputStream and ZeroCopyOutputStream +// interfaces, which represent abstract I/O streams to and from which +// protocol buffers can be read and written. For a few simple +// implementations of these interfaces, see zero_copy_stream_impl.h. +// +// These interfaces are different from classic I/O streams in that they +// try to minimize the amount of data copying that needs to be done. +// To accomplish this, responsibility for allocating buffers is moved to +// the stream object, rather than being the responsibility of the caller. +// So, the stream can return a buffer which actually points directly into +// the final data structure where the bytes are to be stored, and the caller +// can interact directly with that buffer, eliminating an intermediate copy +// operation. +// +// As an example, consider the common case in which you are reading bytes +// from an array that is already in memory (or perhaps an mmap()ed file). +// With classic I/O streams, you would do something like: +// char buffer[BUFFER_SIZE]; +// input->Read(buffer, BUFFER_SIZE); +// DoSomething(buffer, BUFFER_SIZE); +// Then, the stream basically just calls memcpy() to copy the data from +// the array into your buffer. With a ZeroCopyInputStream, you would do +// this instead: +// const void* buffer; +// int size; +// input->Next(&buffer, &size); +// DoSomething(buffer, size); +// Here, no copy is performed. The input stream returns a pointer directly +// into the backing array, and the caller ends up reading directly from it. +// +// If you want to be able to read the old-fashion way, you can create +// a CodedInputStream or CodedOutputStream wrapping these objects and use +// their ReadRaw()/WriteRaw() methods. These will, of course, add a copy +// step, but Coded*Stream will handle buffering so at least it will be +// reasonably efficient. +// +// ZeroCopyInputStream example: +// // Read in a file and print its contents to stdout. +// int fd = open("myfile", O_RDONLY); +// ZeroCopyInputStream* input = new FileInputStream(fd); +// +// const void* buffer; +// int size; +// while (input->Next(&buffer, &size)) { +// cout.write(buffer, size); +// } +// +// delete input; +// close(fd); +// +// ZeroCopyOutputStream example: +// // Copy the contents of "infile" to "outfile", using plain read() for +// // "infile" but a ZeroCopyOutputStream for "outfile". +// int infd = open("infile", O_RDONLY); +// int outfd = open("outfile", O_WRONLY); +// ZeroCopyOutputStream* output = new FileOutputStream(outfd); +// +// void* buffer; +// int size; +// while (output->Next(&buffer, &size)) { +// int bytes = read(infd, buffer, size); +// if (bytes < size) { +// // Reached EOF. +// output->BackUp(size - bytes); +// break; +// } +// } +// +// delete output; +// close(infd); +// close(outfd); + +#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ +#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ + + +#include <string> + +#include <stubs/common.h> +#include <port_def.inc> + + +namespace google { +namespace protobuf { +namespace io { + +// Defined in this file. +class ZeroCopyInputStream; +class ZeroCopyOutputStream; + +// Abstract interface similar to an input stream but designed to minimize +// copying. +class PROTOBUF_EXPORT ZeroCopyInputStream { + public: + ZeroCopyInputStream() {} + virtual ~ZeroCopyInputStream() {} + + // Obtains a chunk of data from the stream. + // + // Preconditions: + // * "size" and "data" are not NULL. + // + // Postconditions: + // * If the returned value is false, there is no more data to return or + // an error occurred. All errors are permanent. + // * Otherwise, "size" points to the actual number of bytes read and "data" + // points to a pointer to a buffer containing these bytes. + // * Ownership of this buffer remains with the stream, and the buffer + // remains valid only until some other method of the stream is called + // or the stream is destroyed. + // * It is legal for the returned buffer to have zero size, as long + // as repeatedly calling Next() eventually yields a buffer with non-zero + // size. + virtual bool Next(const void** data, int* size) = 0; + + // Backs up a number of bytes, so that the next call to Next() returns + // data again that was already returned by the last call to Next(). This + // is useful when writing procedures that are only supposed to read up + // to a certain point in the input, then return. If Next() returns a + // buffer that goes beyond what you wanted to read, you can use BackUp() + // to return to the point where you intended to finish. + // + // Preconditions: + // * The last method called must have been Next(). + // * count must be less than or equal to the size of the last buffer + // returned by Next(). + // + // Postconditions: + // * The last "count" bytes of the last buffer returned by Next() will be + // pushed back into the stream. Subsequent calls to Next() will return + // the same data again before producing new data. + virtual void BackUp(int count) = 0; + + // Skips a number of bytes. Returns false if the end of the stream is + // reached or some input error occurred. In the end-of-stream case, the + // stream is advanced to the end of the stream (so ByteCount() will return + // the total size of the stream). + virtual bool Skip(int count) = 0; + + // Returns the total number of bytes read since this object was created. + virtual int64_t ByteCount() const = 0; + + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyInputStream); +}; + +// Abstract interface similar to an output stream but designed to minimize +// copying. +class PROTOBUF_EXPORT ZeroCopyOutputStream { + public: + ZeroCopyOutputStream() {} + virtual ~ZeroCopyOutputStream() {} + + // Obtains a buffer into which data can be written. Any data written + // into this buffer will eventually (maybe instantly, maybe later on) + // be written to the output. + // + // Preconditions: + // * "size" and "data" are not NULL. + // + // Postconditions: + // * If the returned value is false, an error occurred. All errors are + // permanent. + // * Otherwise, "size" points to the actual number of bytes in the buffer + // and "data" points to the buffer. + // * Ownership of this buffer remains with the stream, and the buffer + // remains valid only until some other method of the stream is called + // or the stream is destroyed. + // * Any data which the caller stores in this buffer will eventually be + // written to the output (unless BackUp() is called). + // * It is legal for the returned buffer to have zero size, as long + // as repeatedly calling Next() eventually yields a buffer with non-zero + // size. + virtual bool Next(void** data, int* size) = 0; + + // Backs up a number of bytes, so that the end of the last buffer returned + // by Next() is not actually written. This is needed when you finish + // writing all the data you want to write, but the last buffer was bigger + // than you needed. You don't want to write a bunch of garbage after the + // end of your data, so you use BackUp() to back up. + // + // Preconditions: + // * The last method called must have been Next(). + // * count must be less than or equal to the size of the last buffer + // returned by Next(). + // * The caller must not have written anything to the last "count" bytes + // of that buffer. + // + // Postconditions: + // * The last "count" bytes of the last buffer returned by Next() will be + // ignored. + virtual void BackUp(int count) = 0; + + // Returns the total number of bytes written since this object was created. + virtual int64_t ByteCount() const = 0; + + // Write a given chunk of data to the output. Some output streams may + // implement this in a way that avoids copying. Check AllowsAliasing() before + // calling WriteAliasedRaw(). It will GOOGLE_CHECK fail if WriteAliasedRaw() is + // called on a stream that does not allow aliasing. + // + // NOTE: It is caller's responsibility to ensure that the chunk of memory + // remains live until all of the data has been consumed from the stream. + virtual bool WriteAliasedRaw(const void* data, int size); + virtual bool AllowsAliasing() const { return false; } + + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyOutputStream); +}; + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ diff --git a/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl.cc b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl.cc new file mode 100644 index 00000000..fffa3b5d --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl.cc @@ -0,0 +1,372 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#ifndef _MSC_VER +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#endif +#include <errno.h> + +#include <algorithm> +#include <iostream> + +#include <stubs/common.h> +#include <stubs/logging.h> +#include <io/io_win32.h> +#include <io/zero_copy_stream_impl.h> +#include <stubs/stl_util.h> + + +namespace google { +namespace protobuf { +namespace io { + +#ifdef _WIN32 +// Win32 lseek is broken: If invoked on a non-seekable file descriptor, its +// return value is undefined. We re-define it to always produce an error. +#define lseek(fd, offset, origin) ((off_t)-1) +// DO NOT include <io.h>, instead create functions in io_win32.{h,cc} and import +// them like we do below. +using google::protobuf::io::win32::access; +using google::protobuf::io::win32::close; +using google::protobuf::io::win32::open; +using google::protobuf::io::win32::read; +using google::protobuf::io::win32::write; +#endif + +namespace { + +// EINTR sucks. +int close_no_eintr(int fd) { + int result; + do { + result = close(fd); + } while (result < 0 && errno == EINTR); + return result; +} + +} // namespace + +// =================================================================== + +FileInputStream::FileInputStream(int file_descriptor, int block_size) + : copying_input_(file_descriptor), impl_(©ing_input_, block_size) {} + +bool FileInputStream::Close() { return copying_input_.Close(); } + +bool FileInputStream::Next(const void** data, int* size) { + return impl_.Next(data, size); +} + +void FileInputStream::BackUp(int count) { impl_.BackUp(count); } + +bool FileInputStream::Skip(int count) { return impl_.Skip(count); } + +int64_t FileInputStream::ByteCount() const { return impl_.ByteCount(); } + +FileInputStream::CopyingFileInputStream::CopyingFileInputStream( + int file_descriptor) + : file_(file_descriptor), + close_on_delete_(false), + is_closed_(false), + errno_(0), + previous_seek_failed_(false) { +#ifndef _WIN32 + int flags = fcntl(file_, F_GETFL); + flags &= ~O_NONBLOCK; + fcntl(file_, F_SETFL, flags); +#endif +} + +FileInputStream::CopyingFileInputStream::~CopyingFileInputStream() { + if (close_on_delete_) { + if (!Close()) { + GOOGLE_LOG(ERROR) << "close() failed: " << strerror(errno_); + } + } +} + +bool FileInputStream::CopyingFileInputStream::Close() { + GOOGLE_CHECK(!is_closed_); + + is_closed_ = true; + if (close_no_eintr(file_) != 0) { + // The docs on close() do not specify whether a file descriptor is still + // open after close() fails with EIO. However, the glibc source code + // seems to indicate that it is not. + errno_ = errno; + return false; + } + + return true; +} + +int FileInputStream::CopyingFileInputStream::Read(void* buffer, int size) { + GOOGLE_CHECK(!is_closed_); + + int result; + do { + result = read(file_, buffer, size); + } while (result < 0 && errno == EINTR); + + if (result < 0) { + // Read error (not EOF). + errno_ = errno; + } + + return result; +} + +int FileInputStream::CopyingFileInputStream::Skip(int count) { + GOOGLE_CHECK(!is_closed_); + + if (!previous_seek_failed_ && lseek(file_, count, SEEK_CUR) != (off_t)-1) { + // Seek succeeded. + return count; + } else { + // Failed to seek. + + // Note to self: Don't seek again. This file descriptor doesn't + // support it. + previous_seek_failed_ = true; + + // Use the default implementation. + return CopyingInputStream::Skip(count); + } +} + +// =================================================================== + +FileOutputStream::FileOutputStream(int file_descriptor, int /*block_size*/) + : CopyingOutputStreamAdaptor(©ing_output_), + copying_output_(file_descriptor) {} + +bool FileOutputStream::Close() { + bool flush_succeeded = Flush(); + return copying_output_.Close() && flush_succeeded; +} + +FileOutputStream::CopyingFileOutputStream::CopyingFileOutputStream( + int file_descriptor) + : file_(file_descriptor), + close_on_delete_(false), + is_closed_(false), + errno_(0) {} + +FileOutputStream::~FileOutputStream() { Flush(); } + +FileOutputStream::CopyingFileOutputStream::~CopyingFileOutputStream() { + if (close_on_delete_) { + if (!Close()) { + GOOGLE_LOG(ERROR) << "close() failed: " << strerror(errno_); + } + } +} + +bool FileOutputStream::CopyingFileOutputStream::Close() { + GOOGLE_CHECK(!is_closed_); + + is_closed_ = true; + if (close_no_eintr(file_) != 0) { + // The docs on close() do not specify whether a file descriptor is still + // open after close() fails with EIO. However, the glibc source code + // seems to indicate that it is not. + errno_ = errno; + return false; + } + + return true; +} + +bool FileOutputStream::CopyingFileOutputStream::Write(const void* buffer, + int size) { + GOOGLE_CHECK(!is_closed_); + int total_written = 0; + + const uint8_t* buffer_base = reinterpret_cast<const uint8_t*>(buffer); + + while (total_written < size) { + int bytes; + do { + bytes = write(file_, buffer_base + total_written, size - total_written); + } while (bytes < 0 && errno == EINTR); + + if (bytes <= 0) { + // Write error. + + // FIXME(kenton): According to the man page, if write() returns zero, + // there was no error; write() simply did not write anything. It's + // unclear under what circumstances this might happen, but presumably + // errno won't be set in this case. I am confused as to how such an + // event should be handled. For now I'm treating it as an error, since + // retrying seems like it could lead to an infinite loop. I suspect + // this never actually happens anyway. + + if (bytes < 0) { + errno_ = errno; + } + return false; + } + total_written += bytes; + } + + return true; +} + +// =================================================================== + +IstreamInputStream::IstreamInputStream(std::istream* input, int block_size) + : copying_input_(input), impl_(©ing_input_, block_size) {} + +bool IstreamInputStream::Next(const void** data, int* size) { + return impl_.Next(data, size); +} + +void IstreamInputStream::BackUp(int count) { impl_.BackUp(count); } + +bool IstreamInputStream::Skip(int count) { return impl_.Skip(count); } + +int64_t IstreamInputStream::ByteCount() const { return impl_.ByteCount(); } + +IstreamInputStream::CopyingIstreamInputStream::CopyingIstreamInputStream( + std::istream* input) + : input_(input) {} + +IstreamInputStream::CopyingIstreamInputStream::~CopyingIstreamInputStream() {} + +int IstreamInputStream::CopyingIstreamInputStream::Read(void* buffer, + int size) { + input_->read(reinterpret_cast<char*>(buffer), size); + int result = input_->gcount(); + if (result == 0 && input_->fail() && !input_->eof()) { + return -1; + } + return result; +} + +// =================================================================== + +OstreamOutputStream::OstreamOutputStream(std::ostream* output, int block_size) + : copying_output_(output), impl_(©ing_output_, block_size) {} + +OstreamOutputStream::~OstreamOutputStream() { impl_.Flush(); } + +bool OstreamOutputStream::Next(void** data, int* size) { + return impl_.Next(data, size); +} + +void OstreamOutputStream::BackUp(int count) { impl_.BackUp(count); } + +int64_t OstreamOutputStream::ByteCount() const { return impl_.ByteCount(); } + +OstreamOutputStream::CopyingOstreamOutputStream::CopyingOstreamOutputStream( + std::ostream* output) + : output_(output) {} + +OstreamOutputStream::CopyingOstreamOutputStream::~CopyingOstreamOutputStream() { +} + +bool OstreamOutputStream::CopyingOstreamOutputStream::Write(const void* buffer, + int size) { + output_->write(reinterpret_cast<const char*>(buffer), size); + return output_->good(); +} + +// =================================================================== + +ConcatenatingInputStream::ConcatenatingInputStream( + ZeroCopyInputStream* const streams[], int count) + : streams_(streams), stream_count_(count), bytes_retired_(0) { +} + +bool ConcatenatingInputStream::Next(const void** data, int* size) { + while (stream_count_ > 0) { + if (streams_[0]->Next(data, size)) return true; + + // That stream is done. Advance to the next one. + bytes_retired_ += streams_[0]->ByteCount(); + ++streams_; + --stream_count_; + } + + // No more streams. + return false; +} + +void ConcatenatingInputStream::BackUp(int count) { + if (stream_count_ > 0) { + streams_[0]->BackUp(count); + } else { + GOOGLE_LOG(DFATAL) << "Can't BackUp() after failed Next()."; + } +} + +bool ConcatenatingInputStream::Skip(int count) { + while (stream_count_ > 0) { + // Assume that ByteCount() can be used to find out how much we actually + // skipped when Skip() fails. + int64_t target_byte_count = streams_[0]->ByteCount() + count; + if (streams_[0]->Skip(count)) return true; + + // Hit the end of the stream. Figure out how many more bytes we still have + // to skip. + int64_t final_byte_count = streams_[0]->ByteCount(); + GOOGLE_DCHECK_LT(final_byte_count, target_byte_count); + count = target_byte_count - final_byte_count; + + // That stream is done. Advance to the next one. + bytes_retired_ += final_byte_count; + ++streams_; + --stream_count_; + } + + return false; +} + +int64_t ConcatenatingInputStream::ByteCount() const { + if (stream_count_ == 0) { + return bytes_retired_; + } else { + return bytes_retired_ + streams_[0]->ByteCount(); + } +} + + +// =================================================================== + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl.h b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl.h new file mode 100644 index 00000000..c7d34c61 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl.h @@ -0,0 +1,327 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains common implementations of the interfaces defined in +// zero_copy_stream.h which are only included in the full (non-lite) +// protobuf library. These implementations include Unix file descriptors +// and C++ iostreams. See also: zero_copy_stream_impl_lite.h + +#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ +#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ + + +#include <iosfwd> +#include <string> + +#include <stubs/common.h> +#include <io/zero_copy_stream.h> +#include <io/zero_copy_stream_impl_lite.h> + +#include <port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +// =================================================================== + +// A ZeroCopyInputStream which reads from a file descriptor. +// +// FileInputStream is preferred over using an ifstream with IstreamInputStream. +// The latter will introduce an extra layer of buffering, harming performance. +// Also, it's conceivable that FileInputStream could someday be enhanced +// to use zero-copy file descriptors on OSs which support them. +class PROTOBUF_EXPORT FileInputStream : public ZeroCopyInputStream { + public: + // Creates a stream that reads from the given Unix file descriptor. + // If a block_size is given, it specifies the number of bytes that + // should be read and returned with each call to Next(). Otherwise, + // a reasonable default is used. + explicit FileInputStream(int file_descriptor, int block_size = -1); + + // Flushes any buffers and closes the underlying file. Returns false if + // an error occurs during the process; use GetErrno() to examine the error. + // Even if an error occurs, the file descriptor is closed when this returns. + bool Close(); + + // By default, the file descriptor is not closed when the stream is + // destroyed. Call SetCloseOnDelete(true) to change that. WARNING: + // This leaves no way for the caller to detect if close() fails. If + // detecting close() errors is important to you, you should arrange + // to close the descriptor yourself. + void SetCloseOnDelete(bool value) { copying_input_.SetCloseOnDelete(value); } + + // If an I/O error has occurred on this file descriptor, this is the + // errno from that error. Otherwise, this is zero. Once an error + // occurs, the stream is broken and all subsequent operations will + // fail. + int GetErrno() const { return copying_input_.GetErrno(); } + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + private: + class PROTOBUF_EXPORT CopyingFileInputStream : public CopyingInputStream { + public: + CopyingFileInputStream(int file_descriptor); + ~CopyingFileInputStream() override; + + bool Close(); + void SetCloseOnDelete(bool value) { close_on_delete_ = value; } + int GetErrno() const { return errno_; } + + // implements CopyingInputStream --------------------------------- + int Read(void* buffer, int size) override; + int Skip(int count) override; + + private: + // The file descriptor. + const int file_; + bool close_on_delete_; + bool is_closed_; + + // The errno of the I/O error, if one has occurred. Otherwise, zero. + int errno_; + + // Did we try to seek once and fail? If so, we assume this file descriptor + // doesn't support seeking and won't try again. + bool previous_seek_failed_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileInputStream); + }; + + CopyingFileInputStream copying_input_; + CopyingInputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileInputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream which writes to a file descriptor. +// +// FileOutputStream is preferred over using an ofstream with +// OstreamOutputStream. The latter will introduce an extra layer of buffering, +// harming performance. Also, it's conceivable that FileOutputStream could +// someday be enhanced to use zero-copy file descriptors on OSs which +// support them. +class PROTOBUF_EXPORT FileOutputStream : public CopyingOutputStreamAdaptor { + public: + // Creates a stream that writes to the given Unix file descriptor. + // If a block_size is given, it specifies the size of the buffers + // that should be returned by Next(). Otherwise, a reasonable default + // is used. + explicit FileOutputStream(int file_descriptor, int block_size = -1); + + ~FileOutputStream() override; + + // Flushes any buffers and closes the underlying file. Returns false if + // an error occurs during the process; use GetErrno() to examine the error. + // Even if an error occurs, the file descriptor is closed when this returns. + bool Close(); + + // By default, the file descriptor is not closed when the stream is + // destroyed. Call SetCloseOnDelete(true) to change that. WARNING: + // This leaves no way for the caller to detect if close() fails. If + // detecting close() errors is important to you, you should arrange + // to close the descriptor yourself. + void SetCloseOnDelete(bool value) { copying_output_.SetCloseOnDelete(value); } + + // If an I/O error has occurred on this file descriptor, this is the + // errno from that error. Otherwise, this is zero. Once an error + // occurs, the stream is broken and all subsequent operations will + // fail. + int GetErrno() const { return copying_output_.GetErrno(); } + + private: + class PROTOBUF_EXPORT CopyingFileOutputStream : public CopyingOutputStream { + public: + CopyingFileOutputStream(int file_descriptor); + ~CopyingFileOutputStream() override; + + bool Close(); + void SetCloseOnDelete(bool value) { close_on_delete_ = value; } + int GetErrno() const { return errno_; } + + // implements CopyingOutputStream -------------------------------- + bool Write(const void* buffer, int size) override; + + private: + // The file descriptor. + const int file_; + bool close_on_delete_; + bool is_closed_; + + // The errno of the I/O error, if one has occurred. Otherwise, zero. + int errno_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileOutputStream); + }; + + CopyingFileOutputStream copying_output_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileOutputStream); +}; + +// =================================================================== + +// A ZeroCopyInputStream which reads from a C++ istream. +// +// Note that for reading files (or anything represented by a file descriptor), +// FileInputStream is more efficient. +class PROTOBUF_EXPORT IstreamInputStream : public ZeroCopyInputStream { + public: + // Creates a stream that reads from the given C++ istream. + // If a block_size is given, it specifies the number of bytes that + // should be read and returned with each call to Next(). Otherwise, + // a reasonable default is used. + explicit IstreamInputStream(std::istream* stream, int block_size = -1); + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + private: + class PROTOBUF_EXPORT CopyingIstreamInputStream : public CopyingInputStream { + public: + CopyingIstreamInputStream(std::istream* input); + ~CopyingIstreamInputStream() override; + + // implements CopyingInputStream --------------------------------- + int Read(void* buffer, int size) override; + // (We use the default implementation of Skip().) + + private: + // The stream. + std::istream* input_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingIstreamInputStream); + }; + + CopyingIstreamInputStream copying_input_; + CopyingInputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(IstreamInputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream which writes to a C++ ostream. +// +// Note that for writing files (or anything represented by a file descriptor), +// FileOutputStream is more efficient. +class PROTOBUF_EXPORT OstreamOutputStream : public ZeroCopyOutputStream { + public: + // Creates a stream that writes to the given C++ ostream. + // If a block_size is given, it specifies the size of the buffers + // that should be returned by Next(). Otherwise, a reasonable default + // is used. + explicit OstreamOutputStream(std::ostream* stream, int block_size = -1); + ~OstreamOutputStream() override; + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + + private: + class PROTOBUF_EXPORT CopyingOstreamOutputStream + : public CopyingOutputStream { + public: + CopyingOstreamOutputStream(std::ostream* output); + ~CopyingOstreamOutputStream() override; + + // implements CopyingOutputStream -------------------------------- + bool Write(const void* buffer, int size) override; + + private: + // The stream. + std::ostream* output_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOstreamOutputStream); + }; + + CopyingOstreamOutputStream copying_output_; + CopyingOutputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(OstreamOutputStream); +}; + +// =================================================================== + +// A ZeroCopyInputStream which reads from several other streams in sequence. +// ConcatenatingInputStream is unable to distinguish between end-of-stream +// and read errors in the underlying streams, so it assumes any errors mean +// end-of-stream. So, if the underlying streams fail for any other reason, +// ConcatenatingInputStream may do odd things. It is suggested that you do +// not use ConcatenatingInputStream on streams that might produce read errors +// other than end-of-stream. +class PROTOBUF_EXPORT ConcatenatingInputStream : public ZeroCopyInputStream { + public: + // All streams passed in as well as the array itself must remain valid + // until the ConcatenatingInputStream is destroyed. + ConcatenatingInputStream(ZeroCopyInputStream* const streams[], int count); + ~ConcatenatingInputStream() override = default; + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + + private: + // As streams are retired, streams_ is incremented and count_ is + // decremented. + ZeroCopyInputStream* const* streams_; + int stream_count_; + int64_t bytes_retired_; // Bytes read from previous streams. + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ConcatenatingInputStream); +}; + +// =================================================================== + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ diff --git a/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl_lite.cc b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl_lite.cc new file mode 100644 index 00000000..8f5e5373 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl_lite.cc @@ -0,0 +1,467 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#include <io/zero_copy_stream_impl_lite.h> + +#include <algorithm> +#include <limits> + +#include <stubs/common.h> +#include <stubs/logging.h> +#include <stubs/casts.h> +#include <stubs/stl_util.h> + +namespace google { +namespace protobuf { +namespace io { + +namespace { + +// Default block size for Copying{In,Out}putStreamAdaptor. +static const int kDefaultBlockSize = 8192; + +} // namespace + +// =================================================================== + +ArrayInputStream::ArrayInputStream(const void* data, int size, int block_size) + : data_(reinterpret_cast<const uint8_t*>(data)), + size_(size), + block_size_(block_size > 0 ? block_size : size), + position_(0), + last_returned_size_(0) {} + +bool ArrayInputStream::Next(const void** data, int* size) { + if (position_ < size_) { + last_returned_size_ = std::min(block_size_, size_ - position_); + *data = data_ + position_; + *size = last_returned_size_; + position_ += last_returned_size_; + return true; + } else { + // We're at the end of the array. + last_returned_size_ = 0; // Don't let caller back up. + return false; + } +} + +void ArrayInputStream::BackUp(int count) { + GOOGLE_CHECK_GT(last_returned_size_, 0) + << "BackUp() can only be called after a successful Next()."; + GOOGLE_CHECK_LE(count, last_returned_size_); + GOOGLE_CHECK_GE(count, 0); + position_ -= count; + last_returned_size_ = 0; // Don't let caller back up further. +} + +bool ArrayInputStream::Skip(int count) { + GOOGLE_CHECK_GE(count, 0); + last_returned_size_ = 0; // Don't let caller back up. + if (count > size_ - position_) { + position_ = size_; + return false; + } else { + position_ += count; + return true; + } +} + +int64_t ArrayInputStream::ByteCount() const { return position_; } + + +// =================================================================== + +ArrayOutputStream::ArrayOutputStream(void* data, int size, int block_size) + : data_(reinterpret_cast<uint8_t*>(data)), + size_(size), + block_size_(block_size > 0 ? block_size : size), + position_(0), + last_returned_size_(0) {} + +bool ArrayOutputStream::Next(void** data, int* size) { + if (position_ < size_) { + last_returned_size_ = std::min(block_size_, size_ - position_); + *data = data_ + position_; + *size = last_returned_size_; + position_ += last_returned_size_; + return true; + } else { + // We're at the end of the array. + last_returned_size_ = 0; // Don't let caller back up. + return false; + } +} + +void ArrayOutputStream::BackUp(int count) { + GOOGLE_CHECK_GT(last_returned_size_, 0) + << "BackUp() can only be called after a successful Next()."; + GOOGLE_CHECK_LE(count, last_returned_size_); + GOOGLE_CHECK_GE(count, 0); + position_ -= count; + last_returned_size_ = 0; // Don't let caller back up further. +} + +int64_t ArrayOutputStream::ByteCount() const { return position_; } + +// =================================================================== + +StringOutputStream::StringOutputStream(std::string* target) : target_(target) {} + +bool StringOutputStream::Next(void** data, int* size) { + GOOGLE_CHECK(target_ != NULL); + size_t old_size = target_->size(); + + // Grow the string. + size_t new_size; + if (old_size < target_->capacity()) { + // Resize the string to match its capacity, since we can get away + // without a memory allocation this way. + new_size = target_->capacity(); + } else { + // Size has reached capacity, try to double it. + new_size = old_size * 2; + } + // Avoid integer overflow in returned '*size'. + new_size = std::min(new_size, old_size + std::numeric_limits<int>::max()); + // Increase the size, also make sure that it is at least kMinimumSize. + STLStringResizeUninitialized( + target_, + std::max(new_size, + kMinimumSize + 0)); // "+ 0" works around GCC4 weirdness. + + *data = mutable_string_data(target_) + old_size; + *size = target_->size() - old_size; + return true; +} + +void StringOutputStream::BackUp(int count) { + GOOGLE_CHECK_GE(count, 0); + GOOGLE_CHECK(target_ != NULL); + GOOGLE_CHECK_LE(static_cast<size_t>(count), target_->size()); + target_->resize(target_->size() - count); +} + +int64_t StringOutputStream::ByteCount() const { + GOOGLE_CHECK(target_ != NULL); + return target_->size(); +} + +// =================================================================== + +int CopyingInputStream::Skip(int count) { + char junk[4096]; + int skipped = 0; + while (skipped < count) { + int bytes = Read(junk, std::min(count - skipped, + implicit_cast<int>(sizeof(junk)))); + if (bytes <= 0) { + // EOF or read error. + return skipped; + } + skipped += bytes; + } + return skipped; +} + +CopyingInputStreamAdaptor::CopyingInputStreamAdaptor( + CopyingInputStream* copying_stream, int block_size) + : copying_stream_(copying_stream), + owns_copying_stream_(false), + failed_(false), + position_(0), + buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize), + buffer_used_(0), + backup_bytes_(0) {} + +CopyingInputStreamAdaptor::~CopyingInputStreamAdaptor() { + if (owns_copying_stream_) { + delete copying_stream_; + } +} + +bool CopyingInputStreamAdaptor::Next(const void** data, int* size) { + if (failed_) { + // Already failed on a previous read. + return false; + } + + AllocateBufferIfNeeded(); + + if (backup_bytes_ > 0) { + // We have data left over from a previous BackUp(), so just return that. + *data = buffer_.get() + buffer_used_ - backup_bytes_; + *size = backup_bytes_; + backup_bytes_ = 0; + return true; + } + + // Read new data into the buffer. + buffer_used_ = copying_stream_->Read(buffer_.get(), buffer_size_); + if (buffer_used_ <= 0) { + // EOF or read error. We don't need the buffer anymore. + if (buffer_used_ < 0) { + // Read error (not EOF). + failed_ = true; + } + FreeBuffer(); + return false; + } + position_ += buffer_used_; + + *size = buffer_used_; + *data = buffer_.get(); + return true; +} + +void CopyingInputStreamAdaptor::BackUp(int count) { + GOOGLE_CHECK(backup_bytes_ == 0 && buffer_.get() != NULL) + << " BackUp() can only be called after Next()."; + GOOGLE_CHECK_LE(count, buffer_used_) + << " Can't back up over more bytes than were returned by the last call" + " to Next()."; + GOOGLE_CHECK_GE(count, 0) << " Parameter to BackUp() can't be negative."; + + backup_bytes_ = count; +} + +bool CopyingInputStreamAdaptor::Skip(int count) { + GOOGLE_CHECK_GE(count, 0); + + if (failed_) { + // Already failed on a previous read. + return false; + } + + // First skip any bytes left over from a previous BackUp(). + if (backup_bytes_ >= count) { + // We have more data left over than we're trying to skip. Just chop it. + backup_bytes_ -= count; + return true; + } + + count -= backup_bytes_; + backup_bytes_ = 0; + + int skipped = copying_stream_->Skip(count); + position_ += skipped; + return skipped == count; +} + +int64_t CopyingInputStreamAdaptor::ByteCount() const { + return position_ - backup_bytes_; +} + +void CopyingInputStreamAdaptor::AllocateBufferIfNeeded() { + if (buffer_.get() == NULL) { + buffer_.reset(new uint8_t[buffer_size_]); + } +} + +void CopyingInputStreamAdaptor::FreeBuffer() { + GOOGLE_CHECK_EQ(backup_bytes_, 0); + buffer_used_ = 0; + buffer_.reset(); +} + +// =================================================================== + +CopyingOutputStreamAdaptor::CopyingOutputStreamAdaptor( + CopyingOutputStream* copying_stream, int block_size) + : copying_stream_(copying_stream), + owns_copying_stream_(false), + failed_(false), + position_(0), + buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize), + buffer_used_(0) {} + +CopyingOutputStreamAdaptor::~CopyingOutputStreamAdaptor() { + WriteBuffer(); + if (owns_copying_stream_) { + delete copying_stream_; + } +} + +bool CopyingOutputStreamAdaptor::Flush() { return WriteBuffer(); } + +bool CopyingOutputStreamAdaptor::Next(void** data, int* size) { + if (buffer_used_ == buffer_size_) { + if (!WriteBuffer()) return false; + } + + AllocateBufferIfNeeded(); + + *data = buffer_.get() + buffer_used_; + *size = buffer_size_ - buffer_used_; + buffer_used_ = buffer_size_; + return true; +} + +void CopyingOutputStreamAdaptor::BackUp(int count) { + GOOGLE_CHECK_GE(count, 0); + GOOGLE_CHECK_EQ(buffer_used_, buffer_size_) + << " BackUp() can only be called after Next()."; + GOOGLE_CHECK_LE(count, buffer_used_) + << " Can't back up over more bytes than were returned by the last call" + " to Next()."; + + buffer_used_ -= count; +} + +int64_t CopyingOutputStreamAdaptor::ByteCount() const { + return position_ + buffer_used_; +} + +bool CopyingOutputStreamAdaptor::WriteAliasedRaw(const void* data, int size) { + if (size >= buffer_size_) { + if (!Flush() || !copying_stream_->Write(data, size)) { + return false; + } + GOOGLE_DCHECK_EQ(buffer_used_, 0); + position_ += size; + return true; + } + + void* out; + int out_size; + while (true) { + if (!Next(&out, &out_size)) { + return false; + } + + if (size <= out_size) { + std::memcpy(out, data, size); + BackUp(out_size - size); + return true; + } + + std::memcpy(out, data, out_size); + data = static_cast<const char*>(data) + out_size; + size -= out_size; + } + return true; +} + + +bool CopyingOutputStreamAdaptor::WriteBuffer() { + if (failed_) { + // Already failed on a previous write. + return false; + } + + if (buffer_used_ == 0) return true; + + if (copying_stream_->Write(buffer_.get(), buffer_used_)) { + position_ += buffer_used_; + buffer_used_ = 0; + return true; + } else { + failed_ = true; + FreeBuffer(); + return false; + } +} + +void CopyingOutputStreamAdaptor::AllocateBufferIfNeeded() { + if (buffer_ == NULL) { + buffer_.reset(new uint8_t[buffer_size_]); + } +} + +void CopyingOutputStreamAdaptor::FreeBuffer() { + buffer_used_ = 0; + buffer_.reset(); +} + +// =================================================================== + +LimitingInputStream::LimitingInputStream(ZeroCopyInputStream* input, + int64_t limit) + : input_(input), limit_(limit) { + prior_bytes_read_ = input_->ByteCount(); +} + +LimitingInputStream::~LimitingInputStream() { + // If we overshot the limit, back up. + if (limit_ < 0) input_->BackUp(-limit_); +} + +bool LimitingInputStream::Next(const void** data, int* size) { + if (limit_ <= 0) return false; + if (!input_->Next(data, size)) return false; + + limit_ -= *size; + if (limit_ < 0) { + // We overshot the limit. Reduce *size to hide the rest of the buffer. + *size += limit_; + } + return true; +} + +void LimitingInputStream::BackUp(int count) { + if (limit_ < 0) { + input_->BackUp(count - limit_); + limit_ = count; + } else { + input_->BackUp(count); + limit_ += count; + } +} + +bool LimitingInputStream::Skip(int count) { + if (count > limit_) { + if (limit_ < 0) return false; + input_->Skip(limit_); + limit_ = 0; + return false; + } else { + if (!input_->Skip(count)) return false; + limit_ -= count; + return true; + } +} + +int64_t LimitingInputStream::ByteCount() const { + if (limit_ < 0) { + return input_->ByteCount() + limit_ - prior_bytes_read_; + } else { + return input_->ByteCount() - prior_bytes_read_; + } +} + + +// =================================================================== + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl_lite.h b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl_lite.h new file mode 100644 index 00000000..25d9750a --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_impl_lite.h @@ -0,0 +1,408 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains common implementations of the interfaces defined in +// zero_copy_stream.h which are included in the "lite" protobuf library. +// These implementations cover I/O on raw arrays and strings, as well as +// adaptors which make it easy to implement streams based on traditional +// streams. Of course, many users will probably want to write their own +// implementations of these interfaces specific to the particular I/O +// abstractions they prefer to use, but these should cover the most common +// cases. + +#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__ +#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__ + + +#include <iosfwd> +#include <memory> +#include <string> + +#include <stubs/callback.h> +#include <stubs/common.h> +#include <io/zero_copy_stream.h> +#include <stubs/stl_util.h> + + +#include <port_def.inc> + +namespace google { +namespace protobuf { +namespace io { + +// =================================================================== + +// A ZeroCopyInputStream backed by an in-memory array of bytes. +class PROTOBUF_EXPORT ArrayInputStream : public ZeroCopyInputStream { + public: + // Create an InputStream that returns the bytes pointed to by "data". + // "data" remains the property of the caller but must remain valid until + // the stream is destroyed. If a block_size is given, calls to Next() + // will return data blocks no larger than the given size. Otherwise, the + // first call to Next() returns the entire array. block_size is mainly + // useful for testing; in production you would probably never want to set + // it. + ArrayInputStream(const void* data, int size, int block_size = -1); + ~ArrayInputStream() override = default; + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + + private: + const uint8_t* const data_; // The byte array. + const int size_; // Total size of the array. + const int block_size_; // How many bytes to return at a time. + + int position_; + int last_returned_size_; // How many bytes we returned last time Next() + // was called (used for error checking only). + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayInputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream backed by an in-memory array of bytes. +class PROTOBUF_EXPORT ArrayOutputStream : public ZeroCopyOutputStream { + public: + // Create an OutputStream that writes to the bytes pointed to by "data". + // "data" remains the property of the caller but must remain valid until + // the stream is destroyed. If a block_size is given, calls to Next() + // will return data blocks no larger than the given size. Otherwise, the + // first call to Next() returns the entire array. block_size is mainly + // useful for testing; in production you would probably never want to set + // it. + ArrayOutputStream(void* data, int size, int block_size = -1); + ~ArrayOutputStream() override = default; + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + + private: + uint8_t* const data_; // The byte array. + const int size_; // Total size of the array. + const int block_size_; // How many bytes to return at a time. + + int position_; + int last_returned_size_; // How many bytes we returned last time Next() + // was called (used for error checking only). + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayOutputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream which appends bytes to a string. +class PROTOBUF_EXPORT StringOutputStream : public ZeroCopyOutputStream { + public: + // Create a StringOutputStream which appends bytes to the given string. + // The string remains property of the caller, but it is mutated in arbitrary + // ways and MUST NOT be accessed in any way until you're done with the + // stream. Either be sure there's no further usage, or (safest) destroy the + // stream before using the contents. + // + // Hint: If you call target->reserve(n) before creating the stream, + // the first call to Next() will return at least n bytes of buffer + // space. + explicit StringOutputStream(std::string* target); + ~StringOutputStream() override = default; + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + + private: + static constexpr size_t kMinimumSize = 16; + + std::string* target_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StringOutputStream); +}; + +// Note: There is no StringInputStream. Instead, just create an +// ArrayInputStream as follows: +// ArrayInputStream input(str.data(), str.size()); + +// =================================================================== + +// A generic traditional input stream interface. +// +// Lots of traditional input streams (e.g. file descriptors, C stdio +// streams, and C++ iostreams) expose an interface where every read +// involves copying bytes into a buffer. If you want to take such an +// interface and make a ZeroCopyInputStream based on it, simply implement +// CopyingInputStream and then use CopyingInputStreamAdaptor. +// +// CopyingInputStream implementations should avoid buffering if possible. +// CopyingInputStreamAdaptor does its own buffering and will read data +// in large blocks. +class PROTOBUF_EXPORT CopyingInputStream { + public: + virtual ~CopyingInputStream() {} + + // Reads up to "size" bytes into the given buffer. Returns the number of + // bytes read. Read() waits until at least one byte is available, or + // returns zero if no bytes will ever become available (EOF), or -1 if a + // permanent read error occurred. + virtual int Read(void* buffer, int size) = 0; + + // Skips the next "count" bytes of input. Returns the number of bytes + // actually skipped. This will always be exactly equal to "count" unless + // EOF was reached or a permanent read error occurred. + // + // The default implementation just repeatedly calls Read() into a scratch + // buffer. + virtual int Skip(int count); +}; + +// A ZeroCopyInputStream which reads from a CopyingInputStream. This is +// useful for implementing ZeroCopyInputStreams that read from traditional +// streams. Note that this class is not really zero-copy. +// +// If you want to read from file descriptors or C++ istreams, this is +// already implemented for you: use FileInputStream or IstreamInputStream +// respectively. +class PROTOBUF_EXPORT CopyingInputStreamAdaptor : public ZeroCopyInputStream { + public: + // Creates a stream that reads from the given CopyingInputStream. + // If a block_size is given, it specifies the number of bytes that + // should be read and returned with each call to Next(). Otherwise, + // a reasonable default is used. The caller retains ownership of + // copying_stream unless SetOwnsCopyingStream(true) is called. + explicit CopyingInputStreamAdaptor(CopyingInputStream* copying_stream, + int block_size = -1); + ~CopyingInputStreamAdaptor() override; + + // Call SetOwnsCopyingStream(true) to tell the CopyingInputStreamAdaptor to + // delete the underlying CopyingInputStream when it is destroyed. + void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; } + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + private: + // Insures that buffer_ is not NULL. + void AllocateBufferIfNeeded(); + // Frees the buffer and resets buffer_used_. + void FreeBuffer(); + + // The underlying copying stream. + CopyingInputStream* copying_stream_; + bool owns_copying_stream_; + + // True if we have seen a permanent error from the underlying stream. + bool failed_; + + // The current position of copying_stream_, relative to the point where + // we started reading. + int64_t position_; + + // Data is read into this buffer. It may be NULL if no buffer is currently + // in use. Otherwise, it points to an array of size buffer_size_. + std::unique_ptr<uint8_t[]> buffer_; + const int buffer_size_; + + // Number of valid bytes currently in the buffer (i.e. the size last + // returned by Next()). 0 <= buffer_used_ <= buffer_size_. + int buffer_used_; + + // Number of bytes in the buffer which were backed up over by a call to + // BackUp(). These need to be returned again. + // 0 <= backup_bytes_ <= buffer_used_ + int backup_bytes_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingInputStreamAdaptor); +}; + +// =================================================================== + +// A generic traditional output stream interface. +// +// Lots of traditional output streams (e.g. file descriptors, C stdio +// streams, and C++ iostreams) expose an interface where every write +// involves copying bytes from a buffer. If you want to take such an +// interface and make a ZeroCopyOutputStream based on it, simply implement +// CopyingOutputStream and then use CopyingOutputStreamAdaptor. +// +// CopyingOutputStream implementations should avoid buffering if possible. +// CopyingOutputStreamAdaptor does its own buffering and will write data +// in large blocks. +class PROTOBUF_EXPORT CopyingOutputStream { + public: + virtual ~CopyingOutputStream() {} + + // Writes "size" bytes from the given buffer to the output. Returns true + // if successful, false on a write error. + virtual bool Write(const void* buffer, int size) = 0; +}; + +// A ZeroCopyOutputStream which writes to a CopyingOutputStream. This is +// useful for implementing ZeroCopyOutputStreams that write to traditional +// streams. Note that this class is not really zero-copy. +// +// If you want to write to file descriptors or C++ ostreams, this is +// already implemented for you: use FileOutputStream or OstreamOutputStream +// respectively. +class PROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStream { + public: + // Creates a stream that writes to the given Unix file descriptor. + // If a block_size is given, it specifies the size of the buffers + // that should be returned by Next(). Otherwise, a reasonable default + // is used. + explicit CopyingOutputStreamAdaptor(CopyingOutputStream* copying_stream, + int block_size = -1); + ~CopyingOutputStreamAdaptor() override; + + // Writes all pending data to the underlying stream. Returns false if a + // write error occurred on the underlying stream. (The underlying + // stream itself is not necessarily flushed.) + bool Flush(); + + // Call SetOwnsCopyingStream(true) to tell the CopyingOutputStreamAdaptor to + // delete the underlying CopyingOutputStream when it is destroyed. + void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; } + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + bool WriteAliasedRaw(const void* data, int size) override; + bool AllowsAliasing() const override { return true; } + + private: + // Write the current buffer, if it is present. + bool WriteBuffer(); + // Insures that buffer_ is not NULL. + void AllocateBufferIfNeeded(); + // Frees the buffer. + void FreeBuffer(); + + // The underlying copying stream. + CopyingOutputStream* copying_stream_; + bool owns_copying_stream_; + + // True if we have seen a permanent error from the underlying stream. + bool failed_; + + // The current position of copying_stream_, relative to the point where + // we started writing. + int64_t position_; + + // Data is written from this buffer. It may be NULL if no buffer is + // currently in use. Otherwise, it points to an array of size buffer_size_. + std::unique_ptr<uint8_t[]> buffer_; + const int buffer_size_; + + // Number of valid bytes currently in the buffer (i.e. the size last + // returned by Next()). When BackUp() is called, we just reduce this. + // 0 <= buffer_used_ <= buffer_size_. + int buffer_used_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOutputStreamAdaptor); +}; + +// =================================================================== + +// A ZeroCopyInputStream which wraps some other stream and limits it to +// a particular byte count. +class PROTOBUF_EXPORT LimitingInputStream : public ZeroCopyInputStream { + public: + LimitingInputStream(ZeroCopyInputStream* input, int64_t limit); + ~LimitingInputStream() override; + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + + private: + ZeroCopyInputStream* input_; + int64_t limit_; // Decreases as we go, becomes negative if we overshoot. + int64_t prior_bytes_read_; // Bytes read on underlying stream at construction + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream); +}; + + +// =================================================================== + +// mutable_string_data() and as_string_data() are workarounds to improve +// the performance of writing new data to an existing string. Unfortunately +// the methods provided by the string class are suboptimal, and using memcpy() +// is mildly annoying because it requires its pointer args to be non-NULL even +// if we ask it to copy 0 bytes. Furthermore, string_as_array() has the +// property that it always returns NULL if its arg is the empty string, exactly +// what we want to avoid if we're using it in conjunction with memcpy()! +// With C++11, the desired memcpy() boils down to memcpy(..., &(*s)[0], size), +// where s is a string*. Without C++11, &(*s)[0] is not guaranteed to be safe, +// so we use string_as_array(), and live with the extra logic that tests whether +// *s is empty. + +// Return a pointer to mutable characters underlying the given string. The +// return value is valid until the next time the string is resized. We +// trust the caller to treat the return value as an array of length s->size(). +inline char* mutable_string_data(std::string* s) { + // This should be simpler & faster than string_as_array() because the latter + // is guaranteed to return NULL when *s is empty, so it has to check for that. + return &(*s)[0]; +} + +// as_string_data(s) is equivalent to +// ({ char* p = mutable_string_data(s); make_pair(p, p != NULL); }) +// Sometimes it's faster: in some scenarios p cannot be NULL, and then the +// code can avoid that check. +inline std::pair<char*, bool> as_string_data(std::string* s) { + char* p = mutable_string_data(s); + return std::make_pair(p, true); +} + +} // namespace io +} // namespace protobuf +} // namespace google + +#include <port_undef.inc> + +#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__ diff --git a/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_unittest.cc b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_unittest.cc new file mode 100644 index 00000000..149fa881 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/io/zero_copy_stream_unittest.cc @@ -0,0 +1,1088 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Testing strategy: For each type of I/O (array, string, file, etc.) we +// create an output stream and write some data to it, then create a +// corresponding input stream to read the same data back and expect it to +// match. When the data is written, it is written in several small chunks +// of varying sizes, with a BackUp() after each chunk. It is read back +// similarly, but with chunks separated at different points. The whole +// process is run with a variety of block sizes for both the input and +// the output. +// +// TODO(kenton): Rewrite this test to bring it up to the standards of all +// the other proto2 tests. May want to wait for gTest to implement +// "parametized tests" so that one set of tests can be used on all the +// implementations. + +#include <chrono> +#include <thread> + +#ifndef _MSC_VER +#include <sys/socket.h> +#include <unistd.h> +#endif +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <memory> +#include <sstream> + +#include <testing/file.h> +#include <test_util2.h> +#include <io/coded_stream.h> +#include <io/io_win32.h> +#include <io/zero_copy_stream_impl.h> + +#if HAVE_ZLIB +#include <io/gzip_stream.h> +#endif + +#include <stubs/common.h> +#include <stubs/logging.h> +#include <testing/file.h> +#include <testing/googletest.h> +#include <gtest/gtest.h> + +namespace google { +namespace protobuf { +namespace io { +namespace { + +#ifdef _WIN32 +#define pipe(fds) _pipe(fds, 4096, O_BINARY) +// DO NOT include <io.h>, instead create functions in io_win32.{h,cc} and import +// them like we do below. +using google::protobuf::io::win32::access; +using google::protobuf::io::win32::close; +using google::protobuf::io::win32::mkdir; +using google::protobuf::io::win32::open; +#endif + +#ifndef O_BINARY +#ifdef _O_BINARY +#define O_BINARY _O_BINARY +#else +#define O_BINARY 0 // If this isn't defined, the platform doesn't need it. +#endif +#endif + +class IoTest : public testing::Test { + protected: + // Test helpers. + + // Helper to write an array of data to an output stream. + bool WriteToOutput(ZeroCopyOutputStream* output, const void* data, int size); + // Helper to read a fixed-length array of data from an input stream. + int ReadFromInput(ZeroCopyInputStream* input, void* data, int size); + // Write a string to the output stream. + void WriteString(ZeroCopyOutputStream* output, const std::string& str); + // Read a number of bytes equal to the size of the given string and checks + // that it matches the string. + void ReadString(ZeroCopyInputStream* input, const std::string& str); + // Writes some text to the output stream in a particular order. Returns + // the number of bytes written, in case the caller needs that to set up an + // input stream. + int WriteStuff(ZeroCopyOutputStream* output); + // Reads text from an input stream and expects it to match what + // WriteStuff() writes. + void ReadStuff(ZeroCopyInputStream* input, bool read_eof = true); + + // Similar to WriteStuff, but performs more sophisticated testing. + int WriteStuffLarge(ZeroCopyOutputStream* output); + // Reads and tests a stream that should have been written to + // via WriteStuffLarge(). + void ReadStuffLarge(ZeroCopyInputStream* input); + +#if HAVE_ZLIB + std::string Compress(const std::string& data, + const GzipOutputStream::Options& options); + std::string Uncompress(const std::string& data); +#endif + + static const int kBlockSizes[]; + static const int kBlockSizeCount; +}; + +const int IoTest::kBlockSizes[] = {-1, 1, 2, 5, 7, 10, 23, 64}; +const int IoTest::kBlockSizeCount = GOOGLE_ARRAYSIZE(IoTest::kBlockSizes); + +bool IoTest::WriteToOutput(ZeroCopyOutputStream* output, const void* data, + int size) { + const uint8* in = reinterpret_cast<const uint8*>(data); + int in_size = size; + + void* out; + int out_size; + + while (true) { + if (!output->Next(&out, &out_size)) { + return false; + } + EXPECT_GT(out_size, 0); + + if (in_size <= out_size) { + memcpy(out, in, in_size); + output->BackUp(out_size - in_size); + return true; + } + + memcpy(out, in, out_size); + in += out_size; + in_size -= out_size; + } +} + +#define MAX_REPEATED_ZEROS 100 + +int IoTest::ReadFromInput(ZeroCopyInputStream* input, void* data, int size) { + uint8* out = reinterpret_cast<uint8*>(data); + int out_size = size; + + const void* in; + int in_size = 0; + + int repeated_zeros = 0; + + while (true) { + if (!input->Next(&in, &in_size)) { + return size - out_size; + } + EXPECT_GT(in_size, -1); + if (in_size == 0) { + repeated_zeros++; + } else { + repeated_zeros = 0; + } + EXPECT_LT(repeated_zeros, MAX_REPEATED_ZEROS); + + if (out_size <= in_size) { + memcpy(out, in, out_size); + if (in_size > out_size) { + input->BackUp(in_size - out_size); + } + return size; // Copied all of it. + } + + memcpy(out, in, in_size); + out += in_size; + out_size -= in_size; + } +} + +void IoTest::WriteString(ZeroCopyOutputStream* output, const std::string& str) { + EXPECT_TRUE(WriteToOutput(output, str.c_str(), str.size())); +} + +void IoTest::ReadString(ZeroCopyInputStream* input, const std::string& str) { + std::unique_ptr<char[]> buffer(new char[str.size() + 1]); + buffer[str.size()] = '\0'; + EXPECT_EQ(ReadFromInput(input, buffer.get(), str.size()), str.size()); + EXPECT_STREQ(str.c_str(), buffer.get()); +} + +int IoTest::WriteStuff(ZeroCopyOutputStream* output) { + WriteString(output, "Hello world!\n"); + WriteString(output, "Some te"); + WriteString(output, "xt. Blah blah."); + WriteString(output, "abcdefg"); + WriteString(output, "01234567890123456789"); + WriteString(output, "foobar"); + + EXPECT_EQ(output->ByteCount(), 68); + + int result = output->ByteCount(); + return result; +} + +// Reads text from an input stream and expects it to match what WriteStuff() +// writes. +void IoTest::ReadStuff(ZeroCopyInputStream* input, bool read_eof) { + ReadString(input, "Hello world!\n"); + ReadString(input, "Some text. "); + ReadString(input, "Blah "); + ReadString(input, "blah."); + ReadString(input, "abcdefg"); + EXPECT_TRUE(input->Skip(20)); + ReadString(input, "foo"); + ReadString(input, "bar"); + + EXPECT_EQ(input->ByteCount(), 68); + + if (read_eof) { + uint8 byte; + EXPECT_EQ(ReadFromInput(input, &byte, 1), 0); + } +} + +int IoTest::WriteStuffLarge(ZeroCopyOutputStream* output) { + WriteString(output, "Hello world!\n"); + WriteString(output, "Some te"); + WriteString(output, "xt. Blah blah."); + WriteString(output, std::string(100000, 'x')); // A very long string + WriteString(output, std::string(100000, 'y')); // A very long string + WriteString(output, "01234567890123456789"); + + EXPECT_EQ(output->ByteCount(), 200055); + + int result = output->ByteCount(); + return result; +} + +// Reads text from an input stream and expects it to match what WriteStuff() +// writes. +void IoTest::ReadStuffLarge(ZeroCopyInputStream* input) { + ReadString(input, "Hello world!\nSome text. "); + EXPECT_TRUE(input->Skip(5)); + ReadString(input, "blah."); + EXPECT_TRUE(input->Skip(100000 - 10)); + ReadString(input, std::string(10, 'x') + std::string(100000 - 20000, 'y')); + EXPECT_TRUE(input->Skip(20000 - 10)); + ReadString(input, "yyyyyyyyyy01234567890123456789"); + + EXPECT_EQ(input->ByteCount(), 200055); + + uint8 byte; + EXPECT_EQ(ReadFromInput(input, &byte, 1), 0); +} + +// =================================================================== + +TEST_F(IoTest, ArrayIo) { + const int kBufferSize = 256; + uint8 buffer[kBufferSize]; + + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + int size; + { + ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]); + size = WriteStuff(&output); + } + { + ArrayInputStream input(buffer, size, kBlockSizes[j]); + ReadStuff(&input); + } + } + } +} + +TEST_F(IoTest, TwoSessionWrite) { + // Test that two concatenated write sessions read correctly + + static const char* strA = "0123456789"; + static const char* strB = "WhirledPeas"; + const int kBufferSize = 2 * 1024; + uint8* buffer = new uint8[kBufferSize]; + char* temp_buffer = new char[40]; + + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + ArrayOutputStream* output = + new ArrayOutputStream(buffer, kBufferSize, kBlockSizes[i]); + CodedOutputStream* coded_output = new CodedOutputStream(output); + coded_output->WriteVarint32(strlen(strA)); + coded_output->WriteRaw(strA, strlen(strA)); + delete coded_output; // flush + int64 pos = output->ByteCount(); + delete output; + output = new ArrayOutputStream(buffer + pos, kBufferSize - pos, + kBlockSizes[i]); + coded_output = new CodedOutputStream(output); + coded_output->WriteVarint32(strlen(strB)); + coded_output->WriteRaw(strB, strlen(strB)); + delete coded_output; // flush + int64 size = pos + output->ByteCount(); + delete output; + + ArrayInputStream* input = + new ArrayInputStream(buffer, size, kBlockSizes[j]); + CodedInputStream* coded_input = new CodedInputStream(input); + uint32 insize; + EXPECT_TRUE(coded_input->ReadVarint32(&insize)); + EXPECT_EQ(strlen(strA), insize); + EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize)); + EXPECT_EQ(0, memcmp(temp_buffer, strA, insize)); + + EXPECT_TRUE(coded_input->ReadVarint32(&insize)); + EXPECT_EQ(strlen(strB), insize); + EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize)); + EXPECT_EQ(0, memcmp(temp_buffer, strB, insize)); + + delete coded_input; + delete input; + } + } + + delete[] temp_buffer; + delete[] buffer; +} + +#if HAVE_ZLIB +TEST_F(IoTest, GzipIo) { + const int kBufferSize = 2 * 1024; + uint8* buffer = new uint8[kBufferSize]; + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + for (int z = 0; z < kBlockSizeCount; z++) { + int gzip_buffer_size = kBlockSizes[z]; + int size; + { + ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]); + GzipOutputStream::Options options; + options.format = GzipOutputStream::GZIP; + if (gzip_buffer_size != -1) { + options.buffer_size = gzip_buffer_size; + } + GzipOutputStream gzout(&output, options); + WriteStuff(&gzout); + gzout.Close(); + size = output.ByteCount(); + } + { + ArrayInputStream input(buffer, size, kBlockSizes[j]); + GzipInputStream gzin(&input, GzipInputStream::GZIP, gzip_buffer_size); + ReadStuff(&gzin); + } + } + } + } + delete[] buffer; +} + +TEST_F(IoTest, GzipIoWithFlush) { + const int kBufferSize = 2 * 1024; + uint8* buffer = new uint8[kBufferSize]; + // We start with i = 4 as we want a block size > 6. With block size <= 6 + // Flush() fills up the entire 2K buffer with flush markers and the test + // fails. See documentation for Flush() for more detail. + for (int i = 4; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + for (int z = 0; z < kBlockSizeCount; z++) { + int gzip_buffer_size = kBlockSizes[z]; + int size; + { + ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]); + GzipOutputStream::Options options; + options.format = GzipOutputStream::GZIP; + if (gzip_buffer_size != -1) { + options.buffer_size = gzip_buffer_size; + } + GzipOutputStream gzout(&output, options); + WriteStuff(&gzout); + EXPECT_TRUE(gzout.Flush()); + gzout.Close(); + size = output.ByteCount(); + } + { + ArrayInputStream input(buffer, size, kBlockSizes[j]); + GzipInputStream gzin(&input, GzipInputStream::GZIP, gzip_buffer_size); + ReadStuff(&gzin); + } + } + } + } + delete[] buffer; +} + +TEST_F(IoTest, GzipIoContiguousFlushes) { + const int kBufferSize = 2 * 1024; + uint8* buffer = new uint8[kBufferSize]; + + int block_size = kBlockSizes[4]; + int gzip_buffer_size = block_size; + int size; + + ArrayOutputStream output(buffer, kBufferSize, block_size); + GzipOutputStream::Options options; + options.format = GzipOutputStream::GZIP; + if (gzip_buffer_size != -1) { + options.buffer_size = gzip_buffer_size; + } + GzipOutputStream gzout(&output, options); + WriteStuff(&gzout); + EXPECT_TRUE(gzout.Flush()); + EXPECT_TRUE(gzout.Flush()); + gzout.Close(); + size = output.ByteCount(); + + ArrayInputStream input(buffer, size, block_size); + GzipInputStream gzin(&input, GzipInputStream::GZIP, gzip_buffer_size); + ReadStuff(&gzin); + + delete[] buffer; +} + +TEST_F(IoTest, GzipIoReadAfterFlush) { + const int kBufferSize = 2 * 1024; + uint8* buffer = new uint8[kBufferSize]; + + int block_size = kBlockSizes[4]; + int gzip_buffer_size = block_size; + int size; + ArrayOutputStream output(buffer, kBufferSize, block_size); + GzipOutputStream::Options options; + options.format = GzipOutputStream::GZIP; + if (gzip_buffer_size != -1) { + options.buffer_size = gzip_buffer_size; + } + + GzipOutputStream gzout(&output, options); + WriteStuff(&gzout); + EXPECT_TRUE(gzout.Flush()); + size = output.ByteCount(); + + ArrayInputStream input(buffer, size, block_size); + GzipInputStream gzin(&input, GzipInputStream::GZIP, gzip_buffer_size); + ReadStuff(&gzin); + + gzout.Close(); + + delete[] buffer; +} + +TEST_F(IoTest, ZlibIo) { + const int kBufferSize = 2 * 1024; + uint8* buffer = new uint8[kBufferSize]; + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + for (int z = 0; z < kBlockSizeCount; z++) { + int gzip_buffer_size = kBlockSizes[z]; + int size; + { + ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]); + GzipOutputStream::Options options; + options.format = GzipOutputStream::ZLIB; + if (gzip_buffer_size != -1) { + options.buffer_size = gzip_buffer_size; + } + GzipOutputStream gzout(&output, options); + WriteStuff(&gzout); + gzout.Close(); + size = output.ByteCount(); + } + { + ArrayInputStream input(buffer, size, kBlockSizes[j]); + GzipInputStream gzin(&input, GzipInputStream::ZLIB, gzip_buffer_size); + ReadStuff(&gzin); + } + } + } + } + delete[] buffer; +} + +TEST_F(IoTest, ZlibIoInputAutodetect) { + const int kBufferSize = 2 * 1024; + uint8* buffer = new uint8[kBufferSize]; + int size; + { + ArrayOutputStream output(buffer, kBufferSize); + GzipOutputStream::Options options; + options.format = GzipOutputStream::ZLIB; + GzipOutputStream gzout(&output, options); + WriteStuff(&gzout); + gzout.Close(); + size = output.ByteCount(); + } + { + ArrayInputStream input(buffer, size); + GzipInputStream gzin(&input, GzipInputStream::AUTO); + ReadStuff(&gzin); + } + { + ArrayOutputStream output(buffer, kBufferSize); + GzipOutputStream::Options options; + options.format = GzipOutputStream::GZIP; + GzipOutputStream gzout(&output, options); + WriteStuff(&gzout); + gzout.Close(); + size = output.ByteCount(); + } + { + ArrayInputStream input(buffer, size); + GzipInputStream gzin(&input, GzipInputStream::AUTO); + ReadStuff(&gzin); + } + delete[] buffer; +} + +std::string IoTest::Compress(const std::string& data, + const GzipOutputStream::Options& options) { + std::string result; + { + StringOutputStream output(&result); + GzipOutputStream gzout(&output, options); + WriteToOutput(&gzout, data.data(), data.size()); + } + return result; +} + +std::string IoTest::Uncompress(const std::string& data) { + std::string result; + { + ArrayInputStream input(data.data(), data.size()); + GzipInputStream gzin(&input); + const void* buffer; + int size; + while (gzin.Next(&buffer, &size)) { + result.append(reinterpret_cast<const char*>(buffer), size); + } + } + return result; +} + +TEST_F(IoTest, CompressionOptions) { + // Some ad-hoc testing of compression options. + + std::string golden_filename = + TestUtil::GetTestDataPath("net/proto2/internal/testdata/golden_message"); + std::string golden; + GOOGLE_CHECK_OK(File::GetContents(golden_filename, &golden, true)); + + GzipOutputStream::Options options; + std::string gzip_compressed = Compress(golden, options); + + options.compression_level = 0; + std::string not_compressed = Compress(golden, options); + + // Try zlib compression for fun. + options = GzipOutputStream::Options(); + options.format = GzipOutputStream::ZLIB; + std::string zlib_compressed = Compress(golden, options); + + // Uncompressed should be bigger than the original since it should have some + // sort of header. + EXPECT_GT(not_compressed.size(), golden.size()); + + // Higher compression levels should result in smaller sizes. + EXPECT_LT(zlib_compressed.size(), not_compressed.size()); + + // ZLIB format should differ from GZIP format. + EXPECT_TRUE(zlib_compressed != gzip_compressed); + + // Everything should decompress correctly. + EXPECT_TRUE(Uncompress(not_compressed) == golden); + EXPECT_TRUE(Uncompress(gzip_compressed) == golden); + EXPECT_TRUE(Uncompress(zlib_compressed) == golden); +} + +TEST_F(IoTest, TwoSessionWriteGzip) { + // Test that two concatenated gzip streams can be read correctly + + static const char* strA = "0123456789"; + static const char* strB = "QuickBrownFox"; + const int kBufferSize = 2 * 1024; + uint8* buffer = new uint8[kBufferSize]; + char* temp_buffer = new char[40]; + + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + ArrayOutputStream* output = + new ArrayOutputStream(buffer, kBufferSize, kBlockSizes[i]); + GzipOutputStream* gzout = new GzipOutputStream(output); + CodedOutputStream* coded_output = new CodedOutputStream(gzout); + int32 outlen = strlen(strA) + 1; + coded_output->WriteVarint32(outlen); + coded_output->WriteRaw(strA, outlen); + delete coded_output; // flush + delete gzout; // flush + int64 pos = output->ByteCount(); + delete output; + output = new ArrayOutputStream(buffer + pos, kBufferSize - pos, + kBlockSizes[i]); + gzout = new GzipOutputStream(output); + coded_output = new CodedOutputStream(gzout); + outlen = strlen(strB) + 1; + coded_output->WriteVarint32(outlen); + coded_output->WriteRaw(strB, outlen); + delete coded_output; // flush + delete gzout; // flush + int64 size = pos + output->ByteCount(); + delete output; + + ArrayInputStream* input = + new ArrayInputStream(buffer, size, kBlockSizes[j]); + GzipInputStream* gzin = new GzipInputStream(input); + CodedInputStream* coded_input = new CodedInputStream(gzin); + uint32 insize; + EXPECT_TRUE(coded_input->ReadVarint32(&insize)); + EXPECT_EQ(strlen(strA) + 1, insize); + EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize)); + EXPECT_EQ(0, memcmp(temp_buffer, strA, insize)) + << "strA=" << strA << " in=" << temp_buffer; + + EXPECT_TRUE(coded_input->ReadVarint32(&insize)); + EXPECT_EQ(strlen(strB) + 1, insize); + EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize)); + EXPECT_EQ(0, memcmp(temp_buffer, strB, insize)) + << " out_block_size=" << kBlockSizes[i] + << " in_block_size=" << kBlockSizes[j] << " pos=" << pos + << " size=" << size << " strB=" << strB << " in=" << temp_buffer; + + delete coded_input; + delete gzin; + delete input; + } + } + + delete[] temp_buffer; + delete[] buffer; +} + +TEST_F(IoTest, GzipInputByteCountAfterClosed) { + std::string golden = "abcdefghijklmnopqrstuvwxyz"; + std::string compressed = Compress(golden, GzipOutputStream::Options()); + + for (int i = 0; i < kBlockSizeCount; i++) { + ArrayInputStream arr_input(compressed.data(), compressed.size(), + kBlockSizes[i]); + GzipInputStream gz_input(&arr_input); + const void* buffer; + int size; + while (gz_input.Next(&buffer, &size)) { + EXPECT_LE(gz_input.ByteCount(), golden.size()); + } + EXPECT_EQ(golden.size(), gz_input.ByteCount()); + } +} + +TEST_F(IoTest, GzipInputByteCountAfterClosedConcatenatedStreams) { + std::string golden1 = "abcdefghijklmnopqrstuvwxyz"; + std::string golden2 = "the quick brown fox jumps over the lazy dog"; + const size_t total_size = golden1.size() + golden2.size(); + std::string compressed = Compress(golden1, GzipOutputStream::Options()) + + Compress(golden2, GzipOutputStream::Options()); + + for (int i = 0; i < kBlockSizeCount; i++) { + ArrayInputStream arr_input(compressed.data(), compressed.size(), + kBlockSizes[i]); + GzipInputStream gz_input(&arr_input); + const void* buffer; + int size; + while (gz_input.Next(&buffer, &size)) { + EXPECT_LE(gz_input.ByteCount(), total_size); + } + EXPECT_EQ(total_size, gz_input.ByteCount()); + } +} +#endif + +// There is no string input, only string output. Also, it doesn't support +// explicit block sizes. So, we'll only run one test and we'll use +// ArrayInput to read back the results. +TEST_F(IoTest, StringIo) { + std::string str; + { + StringOutputStream output(&str); + WriteStuff(&output); + } + { + ArrayInputStream input(str.data(), str.size()); + ReadStuff(&input); + } +} + +// Verifies that outputs up to kint32max can be created. +TEST_F(IoTest, LargeOutput) { + std::string str; + StringOutputStream output(&str); + void* unused_data; + int size; + // Repeatedly calling Next should eventually grow the buffer to kint32max. + do { + EXPECT_TRUE(output.Next(&unused_data, &size)); + } while (str.size() < std::numeric_limits<int>::max()); + // Further increases should be possible. + output.Next(&unused_data, &size); + EXPECT_GT(size, 0); +} + + +// To test files, we create a temporary file, write, read, truncate, repeat. +TEST_F(IoTest, FileIo) { + std::string filename = TestTempDir() + "/zero_copy_stream_test_file"; + + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + // Make a temporary file. + int file = + open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0777); + ASSERT_GE(file, 0); + + { + FileOutputStream output(file, kBlockSizes[i]); + WriteStuff(&output); + EXPECT_EQ(0, output.GetErrno()); + } + + // Rewind. + ASSERT_NE(lseek(file, 0, SEEK_SET), (off_t)-1); + + { + FileInputStream input(file, kBlockSizes[j]); + ReadStuff(&input); + EXPECT_EQ(0, input.GetErrno()); + } + + close(file); + } + } +} + +#ifndef _MSC_VER +// This tests the FileInputStream with a non blocking file. It opens a pipe in +// non blocking mode, then starts reading it. The writing thread starts writing +// 100ms after that. +TEST_F(IoTest, NonBlockingFileIo) { + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + int fd[2]; + // On Linux we could use pipe2 to make the pipe non-blocking in one step, + // but we instead use pipe and fcntl because pipe2 is not available on + // Mac OS. + ASSERT_EQ(pipe(fd), 0); + ASSERT_EQ(fcntl(fd[0], F_SETFL, O_NONBLOCK), 0); + ASSERT_EQ(fcntl(fd[1], F_SETFL, O_NONBLOCK), 0); + + std::mutex go_write; + go_write.lock(); + + bool done_reading = false; + + std::thread write_thread([this, fd, &go_write, i]() { + go_write.lock(); + go_write.unlock(); + FileOutputStream output(fd[1], kBlockSizes[i]); + WriteStuff(&output); + EXPECT_EQ(0, output.GetErrno()); + }); + + std::thread read_thread([this, fd, &done_reading, j]() { + FileInputStream input(fd[0], kBlockSizes[j]); + ReadStuff(&input, false /* read_eof */); + done_reading = true; + close(fd[0]); + close(fd[1]); + EXPECT_EQ(0, input.GetErrno()); + }); + + // Sleeping is not necessary but makes the next expectation relevant: the + // reading thread waits for the data to be available before returning. + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + EXPECT_FALSE(done_reading); + go_write.unlock(); + write_thread.join(); + read_thread.join(); + EXPECT_TRUE(done_reading); + } + } +} + +TEST_F(IoTest, BlockingFileIoWithTimeout) { + int fd[2]; + + for (int i = 0; i < kBlockSizeCount; i++) { + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, fd), 0); + struct timeval tv { + .tv_sec = 0, .tv_usec = 5000 + }; + ASSERT_EQ(setsockopt(fd[0], SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), 0); + FileInputStream input(fd[0], kBlockSizes[i]); + uint8 byte; + EXPECT_EQ(ReadFromInput(&input, &byte, 1), 0); + EXPECT_EQ(EAGAIN, input.GetErrno()); + } +} +#endif + +#if HAVE_ZLIB +TEST_F(IoTest, GzipFileIo) { + std::string filename = TestTempDir() + "/zero_copy_stream_test_file"; + + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + // Make a temporary file. + int file = + open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0777); + ASSERT_GE(file, 0); + { + FileOutputStream output(file, kBlockSizes[i]); + GzipOutputStream gzout(&output); + WriteStuffLarge(&gzout); + gzout.Close(); + output.Flush(); + EXPECT_EQ(0, output.GetErrno()); + } + + // Rewind. + ASSERT_NE(lseek(file, 0, SEEK_SET), (off_t)-1); + + { + FileInputStream input(file, kBlockSizes[j]); + GzipInputStream gzin(&input); + ReadStuffLarge(&gzin); + EXPECT_EQ(0, input.GetErrno()); + } + + close(file); + } + } +} +#endif + +// MSVC raises various debugging exceptions if we try to use a file +// descriptor of -1, defeating our tests below. This class will disable +// these debug assertions while in scope. +class MsvcDebugDisabler { + public: +#if defined(_MSC_VER) && _MSC_VER >= 1400 + MsvcDebugDisabler() { + old_handler_ = _set_invalid_parameter_handler(MyHandler); + old_mode_ = _CrtSetReportMode(_CRT_ASSERT, 0); + } + ~MsvcDebugDisabler() { + old_handler_ = _set_invalid_parameter_handler(old_handler_); + old_mode_ = _CrtSetReportMode(_CRT_ASSERT, old_mode_); + } + + static void MyHandler(const wchar_t* expr, const wchar_t* func, + const wchar_t* file, unsigned int line, + uintptr_t pReserved) { + // do nothing + } + + _invalid_parameter_handler old_handler_; + int old_mode_; +#else + // Dummy constructor and destructor to ensure that GCC doesn't complain + // that debug_disabler is an unused variable. + MsvcDebugDisabler() {} + ~MsvcDebugDisabler() {} +#endif +}; + +// Test that FileInputStreams report errors correctly. +TEST_F(IoTest, FileReadError) { + MsvcDebugDisabler debug_disabler; + + // -1 = invalid file descriptor. + FileInputStream input(-1); + + const void* buffer; + int size; + EXPECT_FALSE(input.Next(&buffer, &size)); + EXPECT_EQ(EBADF, input.GetErrno()); +} + +// Test that FileOutputStreams report errors correctly. +TEST_F(IoTest, FileWriteError) { + MsvcDebugDisabler debug_disabler; + + // -1 = invalid file descriptor. + FileOutputStream input(-1); + + void* buffer; + int size; + + // The first call to Next() succeeds because it doesn't have anything to + // write yet. + EXPECT_TRUE(input.Next(&buffer, &size)); + + // Second call fails. + EXPECT_FALSE(input.Next(&buffer, &size)); + + EXPECT_EQ(EBADF, input.GetErrno()); +} + +// Pipes are not seekable, so File{Input,Output}Stream ends up doing some +// different things to handle them. We'll test by writing to a pipe and +// reading back from it. +TEST_F(IoTest, PipeIo) { + int files[2]; + + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + // Need to create a new pipe each time because ReadStuff() expects + // to see EOF at the end. + ASSERT_EQ(pipe(files), 0); + + { + FileOutputStream output(files[1], kBlockSizes[i]); + WriteStuff(&output); + EXPECT_EQ(0, output.GetErrno()); + } + close(files[1]); // Send EOF. + + { + FileInputStream input(files[0], kBlockSizes[j]); + ReadStuff(&input); + EXPECT_EQ(0, input.GetErrno()); + } + close(files[0]); + } + } +} + +// Test using C++ iostreams. +TEST_F(IoTest, IostreamIo) { + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + { + std::stringstream stream; + + { + OstreamOutputStream output(&stream, kBlockSizes[i]); + WriteStuff(&output); + EXPECT_FALSE(stream.fail()); + } + + { + IstreamInputStream input(&stream, kBlockSizes[j]); + ReadStuff(&input); + EXPECT_TRUE(stream.eof()); + } + } + + { + std::stringstream stream; + + { + OstreamOutputStream output(&stream, kBlockSizes[i]); + WriteStuffLarge(&output); + EXPECT_FALSE(stream.fail()); + } + + { + IstreamInputStream input(&stream, kBlockSizes[j]); + ReadStuffLarge(&input); + EXPECT_TRUE(stream.eof()); + } + } + } + } +} + +// To test ConcatenatingInputStream, we create several ArrayInputStreams +// covering a buffer and then concatenate them. +TEST_F(IoTest, ConcatenatingInputStream) { + const int kBufferSize = 256; + uint8 buffer[kBufferSize]; + + // Fill the buffer. + ArrayOutputStream output(buffer, kBufferSize); + WriteStuff(&output); + + // Now split it up into multiple streams of varying sizes. + ASSERT_EQ(68, output.ByteCount()); // Test depends on this. + ArrayInputStream input1(buffer, 12); + ArrayInputStream input2(buffer + 12, 7); + ArrayInputStream input3(buffer + 19, 6); + ArrayInputStream input4(buffer + 25, 15); + ArrayInputStream input5(buffer + 40, 0); + // Note: We want to make sure we have a stream boundary somewhere between + // bytes 42 and 62, which is the range that it Skip()ed by ReadStuff(). This + // tests that a bug that existed in the original code for Skip() is fixed. + ArrayInputStream input6(buffer + 40, 10); + ArrayInputStream input7(buffer + 50, 18); // Total = 68 bytes. + + ZeroCopyInputStream* streams[] = {&input1, &input2, &input3, &input4, + &input5, &input6, &input7}; + + // Create the concatenating stream and read. + ConcatenatingInputStream input(streams, GOOGLE_ARRAYSIZE(streams)); + ReadStuff(&input); +} + +// To test LimitingInputStream, we write our golden text to a buffer, then +// create an ArrayInputStream that contains the whole buffer (not just the +// bytes written), then use a LimitingInputStream to limit it just to the +// bytes written. +TEST_F(IoTest, LimitingInputStream) { + const int kBufferSize = 256; + uint8 buffer[kBufferSize]; + + // Fill the buffer. + ArrayOutputStream output(buffer, kBufferSize); + WriteStuff(&output); + + // Set up input. + ArrayInputStream array_input(buffer, kBufferSize); + LimitingInputStream input(&array_input, output.ByteCount()); + + ReadStuff(&input); +} + +// Checks that ByteCount works correctly for LimitingInputStreams where the +// underlying stream has already been read. +TEST_F(IoTest, LimitingInputStreamByteCount) { + const int kHalfBufferSize = 128; + const int kBufferSize = kHalfBufferSize * 2; + uint8 buffer[kBufferSize]; + + // Set up input. Only allow half to be read at once. + ArrayInputStream array_input(buffer, kBufferSize, kHalfBufferSize); + const void* data; + int size; + EXPECT_TRUE(array_input.Next(&data, &size)); + EXPECT_EQ(kHalfBufferSize, array_input.ByteCount()); + // kHalfBufferSize - 1 to test limiting logic as well. + LimitingInputStream input(&array_input, kHalfBufferSize - 1); + EXPECT_EQ(0, input.ByteCount()); + EXPECT_TRUE(input.Next(&data, &size)); + EXPECT_EQ(kHalfBufferSize - 1, input.ByteCount()); +} + +// Check that a zero-size array doesn't confuse the code. +TEST(ZeroSizeArray, Input) { + ArrayInputStream input(NULL, 0); + const void* data; + int size; + EXPECT_FALSE(input.Next(&data, &size)); +} + +TEST(ZeroSizeArray, Output) { + ArrayOutputStream output(NULL, 0); + void* data; + int size; + EXPECT_FALSE(output.Next(&data, &size)); +} + +} // namespace +} // namespace io +} // namespace protobuf +} // namespace google |