diff options
Diffstat (limited to 'NorthstarDedicatedTest/include/protobuf/stubs/structurally_valid.cc')
-rw-r--r-- | NorthstarDedicatedTest/include/protobuf/stubs/structurally_valid.cc | 615 |
1 files changed, 615 insertions, 0 deletions
diff --git a/NorthstarDedicatedTest/include/protobuf/stubs/structurally_valid.cc b/NorthstarDedicatedTest/include/protobuf/stubs/structurally_valid.cc new file mode 100644 index 00000000..09d956d1 --- /dev/null +++ b/NorthstarDedicatedTest/include/protobuf/stubs/structurally_valid.cc @@ -0,0 +1,615 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: jrm@google.com (Jim Meehan) + +#include <stubs/common.h> + +#include <stubs/stringpiece.h> + +namespace google { +namespace protobuf { +namespace internal { + +// These four-byte entries compactly encode how many bytes 0..255 to delete +// in making a string replacement, how many bytes to add 0..255, and the offset +// 0..64k-1 of the replacement string in remap_string. +struct RemapEntry { + uint8 delete_bytes; + uint8 add_bytes; + uint16 bytes_offset; +}; + +// Exit type codes for state tables. All but the first get stuffed into +// signed one-byte entries. The first is only generated by executable code. +// To distinguish from next-state entries, these must be contiguous and +// all <= kExitNone +typedef enum { + kExitDstSpaceFull = 239, + kExitIllegalStructure, // 240 + kExitOK, // 241 + kExitReject, // ... + kExitReplace1, + kExitReplace2, + kExitReplace3, + kExitReplace21, + kExitReplace31, + kExitReplace32, + kExitReplaceOffset1, + kExitReplaceOffset2, + kExitReplace1S0, + kExitSpecial, + kExitDoAgain, + kExitRejectAlt, + kExitNone // 255 +} ExitReason; + + +// This struct represents one entire state table. The three initialized byte +// areas are state_table, remap_base, and remap_string. state0 and state0_size +// give the byte offset and length within state_table of the initial state -- +// table lookups are expected to start and end in this state, but for +// truncated UTF-8 strings, may end in a different state. These allow a quick +// test for that condition. entry_shift is 8 for tables subscripted by a full +// byte value and 6 for space-optimized tables subscripted by only six +// significant bits in UTF-8 continuation bytes. +typedef struct { + const uint32 state0; + const uint32 state0_size; + const uint32 total_size; + const int max_expand; + const int entry_shift; + const int bytes_per_entry; + const uint32 losub; + const uint32 hiadd; + const uint8* state_table; + const RemapEntry* remap_base; + const uint8* remap_string; + const uint8* fast_state; +} UTF8StateMachineObj; + +typedef UTF8StateMachineObj UTF8ScanObj; + +#define X__ (kExitIllegalStructure) +#define RJ_ (kExitReject) +#define S1_ (kExitReplace1) +#define S2_ (kExitReplace2) +#define S3_ (kExitReplace3) +#define S21 (kExitReplace21) +#define S31 (kExitReplace31) +#define S32 (kExitReplace32) +#define T1_ (kExitReplaceOffset1) +#define T2_ (kExitReplaceOffset2) +#define S11 (kExitReplace1S0) +#define SP_ (kExitSpecial) +#define D__ (kExitDoAgain) +#define RJA (kExitRejectAlt) + +// Entire table has 9 state blocks of 256 entries each +static const unsigned int utf8acceptnonsurrogates_STATE0 = 0; // state[0] +static const unsigned int utf8acceptnonsurrogates_STATE0_SIZE = 256; // =[1] +static const unsigned int utf8acceptnonsurrogates_TOTAL_SIZE = 2304; +static const unsigned int utf8acceptnonsurrogates_MAX_EXPAND_X4 = 0; +static const unsigned int utf8acceptnonsurrogates_SHIFT = 8; +static const unsigned int utf8acceptnonsurrogates_BYTES = 1; +static const unsigned int utf8acceptnonsurrogates_LOSUB = 0x20202020; +static const unsigned int utf8acceptnonsurrogates_HIADD = 0x00000000; + +static const uint8 utf8acceptnonsurrogates[] = { +// state[0] 0x000000 Byte 1 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, + 4, 5, 5, 5, 6, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[1] 0x000080 Byte 2 of 2 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[2] 0x000000 Byte 2 of 3 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[3] 0x001000 Byte 2 of 3 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[4] 0x000000 Byte 2 of 4 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[5] 0x040000 Byte 2 of 4 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[6] 0x100000 Byte 2 of 4 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[7] 0x00d000 Byte 2 of 3 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[8] 0x00d800 Byte 3 of 3 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +}; + +// Remap base[0] = (del, add, string_offset) +static const RemapEntry utf8acceptnonsurrogates_remap_base[] = { +{0, 0, 0} }; + +// Remap string[0] +static const unsigned char utf8acceptnonsurrogates_remap_string[] = { +0 }; + +static const unsigned char utf8acceptnonsurrogates_fast[256] = { +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static const UTF8ScanObj utf8acceptnonsurrogates_obj = { + utf8acceptnonsurrogates_STATE0, + utf8acceptnonsurrogates_STATE0_SIZE, + utf8acceptnonsurrogates_TOTAL_SIZE, + utf8acceptnonsurrogates_MAX_EXPAND_X4, + utf8acceptnonsurrogates_SHIFT, + utf8acceptnonsurrogates_BYTES, + utf8acceptnonsurrogates_LOSUB, + utf8acceptnonsurrogates_HIADD, + utf8acceptnonsurrogates, + utf8acceptnonsurrogates_remap_base, + utf8acceptnonsurrogates_remap_string, + utf8acceptnonsurrogates_fast +}; + + +#undef X__ +#undef RJ_ +#undef S1_ +#undef S2_ +#undef S3_ +#undef S21 +#undef S31 +#undef S32 +#undef T1_ +#undef T2_ +#undef S11 +#undef SP_ +#undef D__ +#undef RJA + +// Return true if current Tbl pointer is within state0 range +// Note that unsigned compare checks both ends of range simultaneously +static inline bool InStateZero(const UTF8ScanObj* st, const uint8* Tbl) { + const uint8* Tbl0 = &st->state_table[st->state0]; + return (static_cast<uint32>(Tbl - Tbl0) < st->state0_size); +} + +// Scan a UTF-8 string based on state table. +// Always scan complete UTF-8 characters +// Set number of bytes scanned. Return reason for exiting +int UTF8GenericScan(const UTF8ScanObj* st, + const char * str, + int str_length, + int* bytes_consumed) { + *bytes_consumed = 0; + if (str_length == 0) return kExitOK; + + int eshift = st->entry_shift; + const uint8* isrc = reinterpret_cast<const uint8*>(str); + const uint8* src = isrc; + const uint8* srclimit = isrc + str_length; + const uint8* srclimit8 = str_length < 7 ? isrc : srclimit - 7; + const uint8* Tbl_0 = &st->state_table[st->state0]; + + DoAgain: + // Do state-table scan + int e = 0; + uint8 c; + const uint8* Tbl2 = &st->fast_state[0]; + const uint32 losub = st->losub; + const uint32 hiadd = st->hiadd; + // Check initial few bytes one at a time until 8-byte aligned + //---------------------------- + while ((((uintptr_t)src & 0x07) != 0) && + (src < srclimit) && + Tbl2[src[0]] == 0) { + src++; + } + if (((uintptr_t)src & 0x07) == 0) { + // Do fast for groups of 8 identity bytes. + // This covers a lot of 7-bit ASCII ~8x faster then the 1-byte loop, + // including slowing slightly on cr/lf/ht + //---------------------------- + while (src < srclimit8) { + uint32 s0123 = (reinterpret_cast<const uint32 *>(src))[0]; + uint32 s4567 = (reinterpret_cast<const uint32 *>(src))[1]; + src += 8; + // This is a fast range check for all bytes in [lowsub..0x80-hiadd) + uint32 temp = (s0123 - losub) | (s0123 + hiadd) | + (s4567 - losub) | (s4567 + hiadd); + if ((temp & 0x80808080) != 0) { + // We typically end up here on cr/lf/ht; src was incremented + int e0123 = (Tbl2[src[-8]] | Tbl2[src[-7]]) | + (Tbl2[src[-6]] | Tbl2[src[-5]]); + if (e0123 != 0) { + src -= 8; + break; + } // Exit on Non-interchange + e0123 = (Tbl2[src[-4]] | Tbl2[src[-3]]) | + (Tbl2[src[-2]] | Tbl2[src[-1]]); + if (e0123 != 0) { + src -= 4; + break; + } // Exit on Non-interchange + // Else OK, go around again + } + } + } + //---------------------------- + + // Byte-at-a-time scan + //---------------------------- + const uint8* Tbl = Tbl_0; + while (src < srclimit) { + c = *src; + e = Tbl[c]; + src++; + if (e >= kExitIllegalStructure) {break;} + Tbl = &Tbl_0[e << eshift]; + } + //---------------------------- + + // Exit possibilities: + // Some exit code, !state0, back up over last char + // Some exit code, state0, back up one byte exactly + // source consumed, !state0, back up over partial char + // source consumed, state0, exit OK + // For illegal byte in state0, avoid backup up over PREVIOUS char + // For truncated last char, back up to beginning of it + + if (e >= kExitIllegalStructure) { + // Back up over exactly one byte of rejected/illegal UTF-8 character + src--; + // Back up more if needed + if (!InStateZero(st, Tbl)) { + do { + src--; + } while ((src > isrc) && ((src[0] & 0xc0) == 0x80)); + } + } else if (!InStateZero(st, Tbl)) { + // Back up over truncated UTF-8 character + e = kExitIllegalStructure; + do { + src--; + } while ((src > isrc) && ((src[0] & 0xc0) == 0x80)); + } else { + // Normal termination, source fully consumed + e = kExitOK; + } + + if (e == kExitDoAgain) { + // Loop back up to the fast scan + goto DoAgain; + } + + *bytes_consumed = src - isrc; + return e; +} + +int UTF8GenericScanFastAscii(const UTF8ScanObj* st, + const char * str, + int str_length, + int* bytes_consumed) { + *bytes_consumed = 0; + if (str_length == 0) return kExitOK; + + const uint8* isrc = reinterpret_cast<const uint8*>(str); + const uint8* src = isrc; + const uint8* srclimit = isrc + str_length; + const uint8* srclimit8 = str_length < 7 ? isrc : srclimit - 7; + int n; + int rest_consumed; + int exit_reason; + do { + // Check initial few bytes one at a time until 8-byte aligned + while ((((uintptr_t)src & 0x07) != 0) && + (src < srclimit) && (src[0] < 0x80)) { + src++; + } + if (((uintptr_t)src & 0x07) == 0) { + while ((src < srclimit8) && + (((reinterpret_cast<const uint32*>(src)[0] | + reinterpret_cast<const uint32*>(src)[1]) & 0x80808080) == 0)) { + src += 8; + } + } + while ((src < srclimit) && (src[0] < 0x80)) { + src++; + } + // Run state table on the rest + n = src - isrc; + exit_reason = UTF8GenericScan(st, str + n, str_length - n, &rest_consumed); + src += rest_consumed; + } while ( exit_reason == kExitDoAgain ); + + *bytes_consumed = src - isrc; + return exit_reason; +} + +// Hack: On some compilers the static tables are initialized at startup. +// We can't use them until they are initialized. However, some Protocol +// Buffer parsing happens at static init time and may try to validate +// UTF-8 strings. Since UTF-8 validation is only used for debugging +// anyway, we simply always return success if initialization hasn't +// occurred yet. +namespace { + +bool module_initialized_ = false; + +struct InitDetector { + InitDetector() { + module_initialized_ = true; + } +}; +InitDetector init_detector; + +} // namespace + +bool IsStructurallyValidUTF8(const char* buf, int len) { + if (!module_initialized_) return true; + + int bytes_consumed = 0; + UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj, + buf, len, &bytes_consumed); + return (bytes_consumed == len); +} + +int UTF8SpnStructurallyValid(StringPiece str) { + if (!module_initialized_) return str.size(); + + int bytes_consumed = 0; + UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj, + str.data(), str.size(), &bytes_consumed); + return bytes_consumed; +} + +// Coerce UTF-8 byte string in src_str to be +// a structurally-valid equal-length string by selectively +// overwriting illegal bytes with replace_char (typically blank). +// replace_char must be legal printable 7-bit Ascii 0x20..0x7e. +// src_str is read-only. If any overwriting is needed, a modified byte string +// is created in idst, length isrclen. +// +// Returns pointer to output buffer, isrc if no changes were made, +// or idst if some bytes were changed. +// +// Fast case: all is structurally valid and no byte copying is done. +// +char* UTF8CoerceToStructurallyValid(StringPiece src_str, char* idst, + const char replace_char) { + const char* isrc = src_str.data(); + const int len = src_str.length(); + int n = UTF8SpnStructurallyValid(src_str); + if (n == len) { // Normal case -- all is cool, return + return const_cast<char*>(isrc); + } else { // Unusual case -- copy w/o bad bytes + const char* src = isrc; + const char* srclimit = isrc + len; + char* dst = idst; + memmove(dst, src, n); // Copy initial good chunk + src += n; + dst += n; + while (src < srclimit) { // src points to bogus byte or is off the end + dst[0] = replace_char; // replace one bad byte + src++; + dst++; + StringPiece str2(src, srclimit - src); + n = UTF8SpnStructurallyValid(str2); // scan the remainder + memmove(dst, src, n); // copy next good chunk + src += n; + dst += n; + } + } + return idst; +} + +} // namespace internal +} // namespace protobuf +} // namespace google |