aboutsummaryrefslogtreecommitdiff
path: root/lib/std
diff options
context:
space:
mode:
authorLemonBoy <thatlemon@gmail.com>2020-09-21 21:34:51 +0200
committerLemonBoy <thatlemon@gmail.com>2020-11-20 08:36:10 +0100
commit6b39167fdc2aa4c4c57d0459f3de792e001562aa (patch)
treebbee4c49f6dc61d2f72cd02417a396e2cb0d9552 /lib/std
parent73be59433f36f5a8f5c7fa7bbffcb8eb17b1735d (diff)
downloadzig-6b39167fdc2aa4c4c57d0459f3de792e001562aa.tar.gz
zig-6b39167fdc2aa4c4c57d0459f3de792e001562aa.zip
std: Rewrite the fmt parser
Turn the FSM parser into a linear one so that's easier to implement new features and/or more error checking without adding more and more states. Functionally-speaking the two parsers are at feature parity.
Diffstat (limited to 'lib/std')
-rw-r--r--lib/std/fmt.zig368
1 files changed, 178 insertions, 190 deletions
diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig
index 074db49cc5..0693fa0664 100644
--- a/lib/std/fmt.zig
+++ b/lib/std/fmt.zig
@@ -27,18 +27,6 @@ pub const FormatOptions = struct {
fill: u8 = ' ',
};
-fn peekIsAlign(comptime fmt: []const u8) bool {
- // Should only be called during a state transition to the format segment.
- comptime assert(fmt[0] == ':');
-
- inline for (([_]u8{ 1, 2 })[0..]) |i| {
- if (fmt.len > i and (fmt[i] == '<' or fmt[i] == '^' or fmt[i] == '>')) {
- return true;
- }
- }
- return false;
-}
-
/// Renders fmt string with args, calling output with slices of bytes.
/// If `output` returns an error, the error is returned from `format` and
/// `output` is not called again.
@@ -103,22 +91,6 @@ pub fn format(
@compileError("32 arguments max are supported per format call");
}
- const State = enum {
- Start,
- Positional,
- CloseBrace,
- Specifier,
- FormatFillAndAlign,
- FormatWidth,
- FormatPrecision,
- };
-
- comptime var start_index = 0;
- comptime var state = State.Start;
- comptime var maybe_pos_arg: ?comptime_int = null;
- comptime var specifier_start = 0;
- comptime var specifier_end = 0;
- comptime var options = FormatOptions{};
comptime var arg_state: struct {
next_arg: usize = 0,
used_args: ArgSetType = 0,
@@ -128,7 +100,7 @@ pub fn format(
return (@popCount(ArgSetType, self.used_args) != self.args_len);
}
- fn nextArg(comptime self: *@This(), comptime pos_arg: ?comptime_int) comptime_int {
+ fn nextArg(comptime self: *@This(), comptime pos_arg: ?usize) comptime_int {
const next_idx = pos_arg orelse blk: {
const arg = self.next_arg;
self.next_arg += 1;
@@ -146,182 +118,193 @@ pub fn format(
}
} = .{};
- inline for (fmt) |c, i| {
- switch (state) {
- .Start => switch (c) {
- '{' => {
- if (start_index < i) {
- try writer.writeAll(fmt[start_index..i]);
- }
+ comptime var parser: struct {
+ buf: []const u8 = undefined,
+ pos: usize = 0,
+
+ // Returns a decimal number or null if the current character is not a
+ // digit
+ fn number(comptime self: *@This()) ?usize {
+ var r: ?usize = null;
+
+ while (self.pos < self.buf.len) : (self.pos += 1) {
+ switch (self.buf[self.pos]) {
+ '0'...'9' => {
+ if (r == null) r = 0;
+ r.? *= 10;
+ r.? += self.buf[self.pos] - '0';
+ },
+ else => break,
+ }
+ }
- start_index = i;
- specifier_start = i + 1;
- specifier_end = i + 1;
- maybe_pos_arg = null;
- state = .Positional;
- options = FormatOptions{};
- },
- '}' => {
- if (start_index < i) {
- try writer.writeAll(fmt[start_index..i]);
- }
- state = .CloseBrace;
- },
- else => {},
- },
- .Positional => switch (c) {
- '{' => {
- state = .Start;
- start_index = i;
- },
- ':' => {
- state = if (comptime peekIsAlign(fmt[i..])) State.FormatFillAndAlign else State.FormatWidth;
- specifier_end = i;
- },
- '0'...'9' => {
- if (maybe_pos_arg == null) {
- maybe_pos_arg = 0;
- }
+ return r;
+ }
- maybe_pos_arg.? *= 10;
- maybe_pos_arg.? += c - '0';
- specifier_start = i + 1;
+ // Returns a substring of the input starting from the current position
+ // and ending where `ch` is found or until the end if not found
+ fn until(comptime self: *@This(), comptime ch: u8) []const u8 {
+ const start = self.pos;
- if (maybe_pos_arg.? >= args.len) {
- @compileError("Positional value refers to non-existent argument");
- }
- },
- '}' => {
- const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg);
-
- try formatType(
- args[arg_to_print],
- fmt[0..0],
- options,
- writer,
- default_max_depth,
- );
-
- state = .Start;
- start_index = i + 1;
- },
- else => {
- state = .Specifier;
- specifier_start = i;
- },
- },
- .CloseBrace => switch (c) {
- '}' => {
- state = .Start;
- start_index = i;
- },
- else => @compileError("Single '}' encountered in format string"),
- },
- .Specifier => switch (c) {
- ':' => {
- specifier_end = i;
- state = if (comptime peekIsAlign(fmt[i..])) State.FormatFillAndAlign else State.FormatWidth;
- },
- '}' => {
- const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg);
-
- try formatType(
- args[arg_to_print],
- fmt[specifier_start..i],
- options,
- writer,
- default_max_depth,
- );
- state = .Start;
- start_index = i + 1;
- },
+ if (start >= self.buf.len)
+ return &[_]u8{};
+
+ while (self.pos < self.buf.len) : (self.pos += 1) {
+ if (self.buf[self.pos] == ch) break;
+ }
+ return self.buf[start..self.pos];
+ }
+
+ // Returns one character, if available
+ fn char(comptime self: *@This()) ?u8 {
+ if (self.pos < self.buf.len) {
+ const ch = self.buf[self.pos];
+ self.pos += 1;
+ return ch;
+ }
+ return null;
+ }
+
+ // Returns the n-th next character or null if that's past the end
+ fn peek(comptime self: *@This(), comptime n: usize) ?u8 {
+ return if (self.pos + n < self.buf.len) self.buf[self.pos + n] else null;
+ }
+ } = .{};
+
+ comptime var options: FormatOptions = .{};
+
+ @setEvalBranchQuota(2000000);
+
+ comptime var i = 0;
+ inline while (i < fmt.len) {
+ comptime const start_index = i;
+
+ inline while (i < fmt.len) : (i += 1) {
+ switch (fmt[i]) {
+ '{', '}' => break,
else => {},
- },
- // Only entered if the format string contains a fill/align segment.
- .FormatFillAndAlign => switch (c) {
+ }
+ }
+
+ comptime var end_index = i;
+ comptime var unescape_brace = false;
+
+ // Handle {{ and }}, those are un-escaped as single braces
+ if (i + 1 < fmt.len and fmt[i + 1] == fmt[i]) {
+ unescape_brace = true;
+ // Make the first brace part of the literal...
+ end_index += 1;
+ // ...and skip both
+ i += 2;
+ }
+
+ // Write out the literal
+ if (start_index != end_index) {
+ try writer.writeAll(fmt[start_index..end_index]);
+ }
+
+ // We've already skipped the other brace, restart the loop
+ if (unescape_brace) continue;
+
+ if (i >= fmt.len) break;
+
+ if (fmt[i] == '}') {
+ @compileError("missing opening {");
+ }
+
+ // Get past the {
+ comptime assert(fmt[i] == '{');
+ i += 1;
+
+ comptime const fmt_begin = i;
+ // Find the closing brace
+ inline while (i < fmt.len and fmt[i] != '}') : (i += 1) {}
+ comptime const fmt_end = i;
+
+ if (i >= fmt.len) {
+ @compileError("missing closing }");
+ }
+
+ // Get past the }
+ comptime assert(fmt[i] == '}');
+ i += 1;
+
+ options = .{};
+
+ // Parse the format fragment between braces
+ parser.buf = fmt[fmt_begin..fmt_end];
+ parser.pos = 0;
+
+ // Parse the positional argument number
+ comptime var opt_pos_arg = comptime parser.number();
+
+ // Parse the format specifier
+ comptime var specifier_arg = comptime parser.until(':');
+
+ // Skip the colon, if present
+ if (comptime parser.char()) |ch| {
+ if (ch != ':')
+ @compileError("expected : or }, found '" ++ [1]u8{ch} ++ "'");
+ }
+
+ // Parse the fill character
+ // The fill parameter requires the alignment parameter to be specified
+ // too
+ if (comptime parser.peek(1)) |ch| {
+ if (comptime mem.indexOfScalar(u8, "<^>", ch) != null) {
+ options.fill = comptime parser.char().?;
+ }
+ }
+
+ // Parse the alignment parameter
+ if (comptime parser.peek(0)) |ch| {
+ switch (ch) {
'<' => {
- options.alignment = Alignment.Left;
- state = .FormatWidth;
+ options.alignment = .Left;
+ _ = comptime parser.char();
},
'^' => {
- options.alignment = Alignment.Center;
- state = .FormatWidth;
+ options.alignment = .Center;
+ _ = comptime parser.char();
},
'>' => {
- options.alignment = Alignment.Right;
- state = .FormatWidth;
+ options.alignment = .Right;
+ _ = comptime parser.char();
},
- else => {
- options.fill = c;
- },
- },
- .FormatWidth => switch (c) {
- '0'...'9' => {
- if (options.width == null) {
- options.width = 0;
- }
+ else => {},
+ }
+ }
- options.width.? *= 10;
- options.width.? += c - '0';
- },
- '.' => {
- state = .FormatPrecision;
- },
- '}' => {
- const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg);
-
- try formatType(
- args[arg_to_print],
- fmt[specifier_start..specifier_end],
- options,
- writer,
- default_max_depth,
- );
- state = .Start;
- start_index = i + 1;
- },
- else => {
- @compileError("Unexpected character in width value: " ++ [_]u8{c});
- },
- },
- .FormatPrecision => switch (c) {
- '0'...'9' => {
- if (options.precision == null) {
- options.precision = 0;
- }
+ // Parse the width parameter
+ comptime var opt_width_arg = comptime parser.number();
+ options.width = opt_width_arg;
- options.precision.? *= 10;
- options.precision.? += c - '0';
- },
- '}' => {
- const arg_to_print = comptime arg_state.nextArg(maybe_pos_arg);
-
- try formatType(
- args[arg_to_print],
- fmt[specifier_start..specifier_end],
- options,
- writer,
- default_max_depth,
- );
- state = .Start;
- start_index = i + 1;
- },
- else => {
- @compileError("Unexpected character in precision value: " ++ [_]u8{c});
- },
- },
- }
- }
- comptime {
- if (comptime arg_state.hasUnusedArgs()) {
- @compileError("Unused arguments");
+ // Skip the dot, if present
+ if (comptime parser.char()) |ch| {
+ if (ch != '.')
+ @compileError("expected . or }, found '" ++ [1]u8{ch} ++ "'");
}
- if (state != State.Start) {
- @compileError("Incomplete format string: " ++ fmt);
+
+ // Parse the precision parameter
+ comptime var opt_precision_arg = comptime parser.number();
+ options.precision = opt_precision_arg;
+
+ if (comptime parser.char()) |ch| {
+ @compileError("extraneous trailing character '" ++ [1]u8{ch} ++ "'");
}
+
+ const arg_to_print = comptime arg_state.nextArg(opt_pos_arg);
+ try formatType(
+ args[arg_to_print],
+ specifier_arg,
+ options,
+ writer,
+ default_max_depth,
+ );
}
- if (start_index < fmt.len) {
- try writer.writeAll(fmt[start_index..]);
+
+ if (comptime arg_state.hasUnusedArgs()) {
+ @compileError("Unused arguments");
}
}
@@ -1371,6 +1354,11 @@ test "parse unsigned comptime" {
}
}
+test "escaped braces" {
+ try testFmt("escaped: {{foo}}\n", "escaped: {{{{foo}}}}\n", .{});
+ try testFmt("escaped: {foo}\n", "escaped: {{foo}}\n", .{});
+}
+
test "optional" {
{
const value: ?i32 = 1234;