1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
|
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
#define GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
#include <cstdint>
#include <stack>
#include <string>
#include <stubs/common.h>
#include <stubs/status.h>
#include <stubs/strutil.h>
#include <stubs/status.h>
#include <port_def.inc>
namespace google {
namespace protobuf {
namespace util {
namespace converter {
class ObjectWriter;
// A JSON parser that can parse a stream of JSON chunks rather than needing the
// entire JSON string up front. It is a modified version of the parser in
// //net/proto/json/json-parser.h that has been changed in the following ways:
// - Changed from recursion to an explicit stack to allow resumption
// - Added support for int64 and uint64 numbers
// - Removed support for octal and decimal escapes
// - Removed support for numeric keys
// - Removed support for functions (javascript)
// - Removed some lax-comma support (but kept trailing comma support)
// - Writes directly to an ObjectWriter rather than using subclassing
//
// Here is an example usage:
// JsonStreamParser parser(ow_.get());
// util::Status result = parser.Parse(chunk1);
// result.Update(parser.Parse(chunk2));
// result.Update(parser.FinishParse());
// GOOGLE_DCHECK(result.ok()) << "Failed to parse JSON";
//
// This parser is thread-compatible as long as only one thread is calling a
// Parse() method at a time.
class PROTOBUF_EXPORT JsonStreamParser {
public:
// Creates a JsonStreamParser that will write to the given ObjectWriter.
explicit JsonStreamParser(ObjectWriter* ow);
virtual ~JsonStreamParser();
// Parses a UTF-8 encoded JSON string from a StringPiece. If the returned
// status is non-ok, the status might contain a payload ParseErrorType with
// type_url kParseErrorTypeUrl and a payload containing string snippet of the
// error with type_url kParseErrorSnippetUrl.
util::Status Parse(StringPiece json);
// Finish parsing the JSON string. If the returned status is non-ok, the
// status might contain a payload ParseErrorType with type_url
// kParseErrorTypeUrl and a payload containing string snippet of the error
// with type_url kParseErrorSnippetUrl.
util::Status FinishParse();
// Sets the max recursion depth of JSON message to be deserialized. JSON
// messages over this depth will fail to be deserialized.
// Default value is 100.
void set_max_recursion_depth(int max_depth) {
max_recursion_depth_ = max_depth;
}
// Denotes the cause of error.
enum ParseErrorType {
UNKNOWN_PARSE_ERROR,
OCTAL_OR_HEX_ARE_NOT_VALID_JSON_VALUES,
EXPECTED_COLON,
EXPECTED_COMMA_OR_BRACKET,
EXPECTED_VALUE,
EXPECTED_COMMA_OR_BRACES,
EXPECTED_OBJECT_KEY_OR_BRACES,
EXPECTED_VALUE_OR_BRACKET,
INVALID_KEY_OR_VARIABLE_NAME,
NON_UTF_8,
PARSING_TERMINATED_BEFORE_END_OF_INPUT,
UNEXPECTED_TOKEN,
EXPECTED_CLOSING_QUOTE,
ILLEGAL_HEX_STRING,
INVALID_ESCAPE_SEQUENCE,
MISSING_LOW_SURROGATE,
INVALID_LOW_SURROGATE,
INVALID_UNICODE,
UNABLE_TO_PARSE_NUMBER,
NUMBER_EXCEEDS_RANGE_DOUBLE
};
private:
friend class JsonStreamParserTest;
// Return the current recursion depth.
int recursion_depth() { return recursion_depth_; }
enum TokenType {
BEGIN_STRING, // " or '
BEGIN_NUMBER, // - or digit
BEGIN_TRUE, // true
BEGIN_FALSE, // false
BEGIN_NULL, // null
BEGIN_OBJECT, // {
END_OBJECT, // }
BEGIN_ARRAY, // [
END_ARRAY, // ]
ENTRY_SEPARATOR, // :
VALUE_SEPARATOR, // ,
BEGIN_KEY, // letter, _, $ or digit. Must begin with non-digit
UNKNOWN // Unknown token or we ran out of the stream.
};
enum ParseType {
VALUE, // Expects a {, [, true, false, null, string or number
OBJ_MID, // Expects a ',' or }
ENTRY, // Expects a key or }
ENTRY_MID, // Expects a :
ARRAY_VALUE, // Expects a value or ]
ARRAY_MID // Expects a ',' or ]
};
// Holds the result of parsing a number
struct NumberResult {
enum Type { DOUBLE, INT, UINT };
Type type;
union {
double double_val;
int64_t int_val;
uint64_t uint_val;
};
};
// Parses a single chunk of JSON, returning an error if the JSON was invalid.
util::Status ParseChunk(StringPiece chunk);
// Runs the parser based on stack_ and p_, until the stack is empty or p_ runs
// out of data. If we unexpectedly run out of p_ we push the latest back onto
// the stack and return.
util::Status RunParser();
// Parses a value from p_ and writes it to ow_.
// A value may be an object, array, true, false, null, string or number.
util::Status ParseValue(TokenType type);
// Parses a string and writes it out to the ow_.
util::Status ParseString();
// Parses a string, storing the result in parsed_.
util::Status ParseStringHelper();
// This function parses unicode escape sequences in strings. It returns an
// error when there's a parsing error, either the size is not the expected
// size or a character is not a hex digit. When it returns str will contain
// what has been successfully parsed so far.
util::Status ParseUnicodeEscape();
// Expects p_ to point to a JSON number, writes the number to the writer using
// the appropriate Render method based on the type of number.
util::Status ParseNumber();
// Parse a number into a NumberResult, reporting an error if no number could
// be parsed. This method will try to parse into a uint64, int64, or double
// based on whether the number was positive or negative or had a decimal
// component.
util::Status ParseNumberHelper(NumberResult* result);
// Parse a number as double into a NumberResult.
util::Status ParseDoubleHelper(const std::string& number,
NumberResult* result);
// Handles a { during parsing of a value.
util::Status HandleBeginObject();
// Parses from the ENTRY state.
util::Status ParseEntry(TokenType type);
// Parses from the ENTRY_MID state.
util::Status ParseEntryMid(TokenType type);
// Parses from the OBJ_MID state.
util::Status ParseObjectMid(TokenType type);
// Handles a [ during parsing of a value.
util::Status HandleBeginArray();
// Parses from the ARRAY_VALUE state.
util::Status ParseArrayValue(TokenType type);
// Parses from the ARRAY_MID state.
util::Status ParseArrayMid(TokenType type);
// Expects p_ to point to an unquoted literal
util::Status ParseTrue();
util::Status ParseFalse();
util::Status ParseNull();
util::Status ParseEmptyNull();
// Whether an empty-null is allowed in the current state.
bool IsEmptyNullAllowed(TokenType type);
// Whether the whole input is all whitespaces.
bool IsInputAllWhiteSpaces(TokenType type);
// Report a failure as a util::Status.
util::Status ReportFailure(StringPiece message,
ParseErrorType parse_code);
// Report a failure due to an UNKNOWN token type. We check if we hit the
// end of the stream and if we're finishing or not to detect what type of
// status to return in this case.
util::Status ReportUnknown(StringPiece message,
ParseErrorType parse_code);
// Helper function to check recursion depth and increment it. It will return
// OkStatus() if the current depth is allowed. Otherwise an error is returned.
// key is used for error reporting.
util::Status IncrementRecursionDepth(StringPiece key) const;
// Advance p_ past all whitespace or until the end of the string.
void SkipWhitespace();
// Advance p_ one UTF-8 character
void Advance();
// Expects p_ to point to the beginning of a key.
util::Status ParseKey();
// Return the type of the next token at p_.
TokenType GetNextTokenType();
// The object writer to write parse events to.
ObjectWriter* ow_;
// The stack of parsing we still need to do. When the stack runs empty we will
// have parsed a single value from the root (e.g. an object or list).
std::stack<ParseType> stack_;
// Contains any leftover text from a previous chunk that we weren't able to
// fully parse, for example the start of a key or number.
std::string leftover_;
// The current chunk of JSON being parsed. Primarily used for providing
// context during error reporting.
StringPiece json_;
// A pointer within the current JSON being parsed, used to track location.
StringPiece p_;
// Stores the last key read, as we separate parsing of keys and values.
StringPiece key_;
// Storage for key_ if we need to keep ownership, for example between chunks
// or if the key was unescaped from a JSON string.
std::string key_storage_;
// True during the FinishParse() call, so we know that any errors are fatal.
// For example an unterminated string will normally result in cancelling and
// trying during the next chunk, but during FinishParse() it is an error.
bool finishing_;
// Whether non whitespace tokens have been seen during parsing.
// It is used to handle the case of a pure whitespace stream input.
bool seen_non_whitespace_;
// The JsonStreamParser requires a root element by default and it will raise
// error if the root element is missing. If `allow_no_root_element_` is true,
// the JsonStreamParser can also handle this case.
bool allow_no_root_element_;
// String we parsed during a call to ParseStringHelper().
StringPiece parsed_;
// Storage for the string we parsed. This may be empty if the string was able
// to be parsed directly from the input.
std::string parsed_storage_;
// The character that opened the string, either ' or ".
// A value of 0 indicates that string parsing is not in process.
char string_open_;
// Storage for the chunk that are being parsed in ParseChunk().
std::string chunk_storage_;
// Whether to allow non UTF-8 encoded input and replace invalid code points.
bool coerce_to_utf8_;
// Replacement character for invalid UTF-8 code points.
std::string utf8_replacement_character_;
// Whether allows empty string represented null array value or object entry
// value.
bool allow_empty_null_;
// Whether unquoted object keys can contain embedded non-alphanumeric
// characters when this is unambiguous for parsing.
bool allow_permissive_key_naming_;
// Whether allows out-of-range floating point numbers or reject them.
bool loose_float_number_conversion_;
// Tracks current recursion depth.
mutable int recursion_depth_;
// Maximum allowed recursion depth.
int max_recursion_depth_;
GOOGLE_DISALLOW_IMPLICIT_CONSTRUCTORS(JsonStreamParser);
};
} // namespace converter
} // namespace util
} // namespace protobuf
} // namespace google
#include <port_undef.inc>
#endif // GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
|