diff options
| author | LemonBoy <thatlemon@gmail.com> | 2020-09-21 21:34:51 +0200 |
|---|---|---|
| committer | LemonBoy <thatlemon@gmail.com> | 2020-11-20 08:36:10 +0100 |
| commit | 6b39167fdc2aa4c4c57d0459f3de792e001562aa (patch) | |
| tree | bbee4c49f6dc61d2f72cd02417a396e2cb0d9552 /lib/std | |
| parent | 73be59433f36f5a8f5c7fa7bbffcb8eb17b1735d (diff) | |
| download | zig-6b39167fdc2aa4c4c57d0459f3de792e001562aa.tar.gz zig-6b39167fdc2aa4c4c57d0459f3de792e001562aa.zip | |
std: Rewrite the fmt parser
Turn the FSM parser into a linear one so that's easier to implement new
features and/or more error checking without adding more and more states.
Functionally-speaking the two parsers are at feature parity.
Diffstat (limited to 'lib/std')
| -rw-r--r-- | lib/std/fmt.zig | 368 |
1 files changed, 178 insertions, 190 deletions
diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig index 074db49cc5..0693fa0664 100644 --- a/lib/std/fmt.zig +++ b/lib/std/fmt.zig @@ -27,18 +27,6 @@ pub const FormatOptions = struct { fill: u8 = ' ', }; -fn peekIsAlign(comptime fmt: []const u8) bool { - // Should only be called during a state transition to the format segment. - comptime assert(fmt[0] == ':'); - - inline for (([_]u8{ 1, 2 })[0..]) |i| { - if (fmt.len > i and (fmt[i] == '<' or fmt[i] == '^' or fmt[i] == '>')) { - return true; - } - } - return false; -} - /// Renders fmt string with args, calling output with slices of bytes. /// If `output` returns an error, the error is returned from `format` and /// `output` is not called again. @@ -103,22 +91,6 @@ pub fn format( @compileError("32 arguments max are supported per format call"); } - const State = enum { - Start, - Positional, - CloseBrace, - Specifier, - FormatFillAndAlign, - FormatWidth, - FormatPrecision, - }; - - comptime var start_index = 0; - comptime var state = State.Start; - comptime var maybe_pos_arg: ?comptime_int = null; - comptime var specifier_start = 0; - comptime var specifier_end = 0; - comptime var options = FormatOptions{}; comptime var arg_state: struct { next_arg: usize = 0, used_args: ArgSetType = 0, @@ -128,7 +100,7 @@ pub fn format( return (@popCount(ArgSetType, self.used_args) != self.args_len); } - fn nextArg(comptime self: *@This(), comptime pos_arg: ?comptime_int) comptime_int { + fn nextArg(comptime self: *@This(), comptime pos_arg: ?usize) comptime_int { const next_idx = pos_arg orelse blk: { const arg = self.next_arg; self.next_arg += 1; @@ -146,182 +118,193 @@ pub fn format( } } = .{}; - inline for (fmt) |c, i| { - switch (state) { - .Start => switch (c) { - '{' => { - if (start_index < i) { - try writer.writeAll(fmt[start_index..i]); - } + comptime var parser: struct { + buf: []const u8 = undefined, + pos: usize = 0, + + // Returns a decimal number or null if the current character is not a + // digit + fn number(comptime self: *@This()) ?usize { + var r: ?usize = null; + + while (self.pos < self.buf.len) : (self.pos += 1) { + switch (self.buf[self.pos]) { + '0'...'9' => { + if (r == null) r = 0; + r.? *= 10; + r.? += self.buf[self.pos] - '0'; + }, + else => break, + } + } - start_index = i; - specifier_start = i + 1; - specifier_end = i + 1; - maybe_pos_arg = null; - state = .Positional; - options = FormatOptions{}; - }, - '}' => { - if (start_index < i) { - try writer.writeAll(fmt[start_index..i]); - } - state = .CloseBrace; - }, - else => {}, - }, - .Positional => switch (c) { - '{' => { - state = .Start; - start_index = i; - }, - ':' => { - state = if (comptime peekIsAlign(fmt[i..])) State.FormatFillAndAlign else State.FormatWidth; - specifier_end = i; - }, - '0'...'9' => { - if (maybe_pos_arg == null) { - maybe_pos_arg = 0; - } + return r; + } - maybe_pos_arg.? *= 10; - maybe_pos_arg.? += c - '0'; - specifier_start = i + 1; + // Returns a substring of the input starting from the current position + // and ending where `ch` is found or until the end if not found + fn until(comptime self: *@This(), comptime ch: u8) []const u8 { + const start = self.pos; - if (maybe_pos_arg.? >= args.len) { - @compileError("Positional value refers to non-existent argument"); - } - }, - '}' => { - const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg); - - try formatType( - args[arg_to_print], - fmt[0..0], - options, - writer, - default_max_depth, - ); - - state = .Start; - start_index = i + 1; - }, - else => { - state = .Specifier; - specifier_start = i; - }, - }, - .CloseBrace => switch (c) { - '}' => { - state = .Start; - start_index = i; - }, - else => @compileError("Single '}' encountered in format string"), - }, - .Specifier => switch (c) { - ':' => { - specifier_end = i; - state = if (comptime peekIsAlign(fmt[i..])) State.FormatFillAndAlign else State.FormatWidth; - }, - '}' => { - const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg); - - try formatType( - args[arg_to_print], - fmt[specifier_start..i], - options, - writer, - default_max_depth, - ); - state = .Start; - start_index = i + 1; - }, + if (start >= self.buf.len) + return &[_]u8{}; + + while (self.pos < self.buf.len) : (self.pos += 1) { + if (self.buf[self.pos] == ch) break; + } + return self.buf[start..self.pos]; + } + + // Returns one character, if available + fn char(comptime self: *@This()) ?u8 { + if (self.pos < self.buf.len) { + const ch = self.buf[self.pos]; + self.pos += 1; + return ch; + } + return null; + } + + // Returns the n-th next character or null if that's past the end + fn peek(comptime self: *@This(), comptime n: usize) ?u8 { + return if (self.pos + n < self.buf.len) self.buf[self.pos + n] else null; + } + } = .{}; + + comptime var options: FormatOptions = .{}; + + @setEvalBranchQuota(2000000); + + comptime var i = 0; + inline while (i < fmt.len) { + comptime const start_index = i; + + inline while (i < fmt.len) : (i += 1) { + switch (fmt[i]) { + '{', '}' => break, else => {}, - }, - // Only entered if the format string contains a fill/align segment. - .FormatFillAndAlign => switch (c) { + } + } + + comptime var end_index = i; + comptime var unescape_brace = false; + + // Handle {{ and }}, those are un-escaped as single braces + if (i + 1 < fmt.len and fmt[i + 1] == fmt[i]) { + unescape_brace = true; + // Make the first brace part of the literal... + end_index += 1; + // ...and skip both + i += 2; + } + + // Write out the literal + if (start_index != end_index) { + try writer.writeAll(fmt[start_index..end_index]); + } + + // We've already skipped the other brace, restart the loop + if (unescape_brace) continue; + + if (i >= fmt.len) break; + + if (fmt[i] == '}') { + @compileError("missing opening {"); + } + + // Get past the { + comptime assert(fmt[i] == '{'); + i += 1; + + comptime const fmt_begin = i; + // Find the closing brace + inline while (i < fmt.len and fmt[i] != '}') : (i += 1) {} + comptime const fmt_end = i; + + if (i >= fmt.len) { + @compileError("missing closing }"); + } + + // Get past the } + comptime assert(fmt[i] == '}'); + i += 1; + + options = .{}; + + // Parse the format fragment between braces + parser.buf = fmt[fmt_begin..fmt_end]; + parser.pos = 0; + + // Parse the positional argument number + comptime var opt_pos_arg = comptime parser.number(); + + // Parse the format specifier + comptime var specifier_arg = comptime parser.until(':'); + + // Skip the colon, if present + if (comptime parser.char()) |ch| { + if (ch != ':') + @compileError("expected : or }, found '" ++ [1]u8{ch} ++ "'"); + } + + // Parse the fill character + // The fill parameter requires the alignment parameter to be specified + // too + if (comptime parser.peek(1)) |ch| { + if (comptime mem.indexOfScalar(u8, "<^>", ch) != null) { + options.fill = comptime parser.char().?; + } + } + + // Parse the alignment parameter + if (comptime parser.peek(0)) |ch| { + switch (ch) { '<' => { - options.alignment = Alignment.Left; - state = .FormatWidth; + options.alignment = .Left; + _ = comptime parser.char(); }, '^' => { - options.alignment = Alignment.Center; - state = .FormatWidth; + options.alignment = .Center; + _ = comptime parser.char(); }, '>' => { - options.alignment = Alignment.Right; - state = .FormatWidth; + options.alignment = .Right; + _ = comptime parser.char(); }, - else => { - options.fill = c; - }, - }, - .FormatWidth => switch (c) { - '0'...'9' => { - if (options.width == null) { - options.width = 0; - } + else => {}, + } + } - options.width.? *= 10; - options.width.? += c - '0'; - }, - '.' => { - state = .FormatPrecision; - }, - '}' => { - const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg); - - try formatType( - args[arg_to_print], - fmt[specifier_start..specifier_end], - options, - writer, - default_max_depth, - ); - state = .Start; - start_index = i + 1; - }, - else => { - @compileError("Unexpected character in width value: " ++ [_]u8{c}); - }, - }, - .FormatPrecision => switch (c) { - '0'...'9' => { - if (options.precision == null) { - options.precision = 0; - } + // Parse the width parameter + comptime var opt_width_arg = comptime parser.number(); + options.width = opt_width_arg; - options.precision.? *= 10; - options.precision.? += c - '0'; - }, - '}' => { - const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg); - - try formatType( - args[arg_to_print], - fmt[specifier_start..specifier_end], - options, - writer, - default_max_depth, - ); - state = .Start; - start_index = i + 1; - }, - else => { - @compileError("Unexpected character in precision value: " ++ [_]u8{c}); - }, - }, - } - } - comptime { - if (comptime arg_state.hasUnusedArgs()) { - @compileError("Unused arguments"); + // Skip the dot, if present + if (comptime parser.char()) |ch| { + if (ch != '.') + @compileError("expected . or }, found '" ++ [1]u8{ch} ++ "'"); } - if (state != State.Start) { - @compileError("Incomplete format string: " ++ fmt); + + // Parse the precision parameter + comptime var opt_precision_arg = comptime parser.number(); + options.precision = opt_precision_arg; + + if (comptime parser.char()) |ch| { + @compileError("extraneous trailing character '" ++ [1]u8{ch} ++ "'"); } + + const arg_to_print = comptime arg_state.nextArg(opt_pos_arg); + try formatType( + args[arg_to_print], + specifier_arg, + options, + writer, + default_max_depth, + ); } - if (start_index < fmt.len) { - try writer.writeAll(fmt[start_index..]); + + if (comptime arg_state.hasUnusedArgs()) { + @compileError("Unused arguments"); } } @@ -1371,6 +1354,11 @@ test "parse unsigned comptime" { } } +test "escaped braces" { + try testFmt("escaped: {{foo}}\n", "escaped: {{{{foo}}}}\n", .{}); + try testFmt("escaped: {foo}\n", "escaped: {{foo}}\n", .{}); +} + test "optional" { { const value: ?i32 = 1234; |
