Merge pull request #17221 from Vexu/aro-translate-c

Aro translate-c
author: Veikka Tuominen <git@vexu.eu> 2023-10-02 07:08:53 +0300
committer: GitHub <noreply@github.com> 2023-10-02 07:08:53 +0300
commit: fc4d53e2ea6b41440e37caf32d2fd236d0f58c93 (patch)
tree: be400bc7033d3f198978ad04c05c14f15b8c5324 /deps/aro/Preprocessor.zig
parent: 0f1652dc603ad43be733cfdd721cedf38d9e45d9 (diff)
parent: 5792570197f44b2c7599fb756f5c1e9d59bd0a9a (diff)
download: zig-fc4d53e2ea6b41440e37caf32d2fd236d0f58c93.tar.gz
zig-fc4d53e2ea6b41440e37caf32d2fd236d0f58c93.zip
1 files changed, 2691 insertions, 0 deletions
diff --git a/deps/aro/Preprocessor.zig b/deps/aro/Preprocessor.zig
new file mode 100644
index 0000000000..95758ae374
--- /dev/null
+++ b/deps/aro/Preprocessor.zig
@@ -0,0 +1,2691 @@
+const std = @import("std");
+const mem = std.mem;
+const Allocator = mem.Allocator;
+const assert = std.debug.assert;
+const Compilation = @import("Compilation.zig");
+const Error = Compilation.Error;
+const Source = @import("Source.zig");
+const Tokenizer = @import("Tokenizer.zig");
+const RawToken = Tokenizer.Token;
+const Parser = @import("Parser.zig");
+const Diagnostics = @import("Diagnostics.zig");
+const Token = @import("Tree.zig").Token;
+const Attribute = @import("Attribute.zig");
+const features = @import("features.zig");
+
+const Preprocessor = @This();
+const DefineMap = std.StringHashMap(Macro);
+const RawTokenList = std.ArrayList(RawToken);
+const max_include_depth = 200;
+
+/// Errors that can be returned when expanding a macro.
+/// error.UnknownPragma can occur within Preprocessor.pragma() but
+/// it is handled there and doesn't escape that function
+const MacroError = Error || error{StopPreprocessing};
+
+const Macro = struct {
+    /// Parameters of the function type macro
+    params: []const []const u8,
+
+    /// Token constituting the macro body
+    tokens: []const RawToken,
+
+    /// If the function type macro has variable number of arguments
+    var_args: bool,
+
+    /// Is a function type macro
+    is_func: bool,
+
+    /// Is a predefined macro
+    is_builtin: bool = false,
+
+    /// Location of macro in the source
+    /// `byte_offset` and `line` are used to define the range of tokens included
+    /// in the macro.
+    loc: Source.Location,
+
+    fn eql(a: Macro, b: Macro, pp: *Preprocessor) bool {
+        if (a.tokens.len != b.tokens.len) return false;
+        if (a.is_builtin != b.is_builtin) return false;
+        for (a.tokens, b.tokens) |a_tok, b_tok| if (!tokEql(pp, a_tok, b_tok)) return false;
+
+        if (a.is_func and b.is_func) {
+            if (a.var_args != b.var_args) return false;
+            if (a.params.len != b.params.len) return false;
+            for (a.params, b.params) |a_param, b_param| if (!mem.eql(u8, a_param, b_param)) return false;
+        }
+
+        return true;
+    }
+
+    fn tokEql(pp: *Preprocessor, a: RawToken, b: RawToken) bool {
+        return mem.eql(u8, pp.tokSlice(a), pp.tokSlice(b));
+    }
+};
+
+comp: *Compilation,
+gpa: mem.Allocator,
+arena: std.heap.ArenaAllocator,
+defines: DefineMap,
+tokens: Token.List = .{},
+token_buf: RawTokenList,
+char_buf: std.ArrayList(u8),
+/// Counter that is incremented each time preprocess() is called
+/// Can be used to distinguish multiple preprocessings of the same file
+preprocess_count: u32 = 0,
+generated_line: u32 = 1,
+add_expansion_nl: u32 = 0,
+include_depth: u8 = 0,
+counter: u32 = 0,
+expansion_source_loc: Source.Location = undefined,
+poisoned_identifiers: std.StringHashMap(void),
+/// Map from Source.Id to macro name in the `#ifndef` condition which guards the source, if any
+include_guards: std.AutoHashMapUnmanaged(Source.Id, []const u8) = .{},
+
+/// Memory is retained to avoid allocation on every single token.
+top_expansion_buf: ExpandBuf,
+
+/// Dump current state to stderr.
+verbose: bool = false,
+preserve_whitespace: bool = false,
+
+pub fn init(comp: *Compilation) Preprocessor {
+    const pp = Preprocessor{
+        .comp = comp,
+        .gpa = comp.gpa,
+        .arena = std.heap.ArenaAllocator.init(comp.gpa),
+        .defines = DefineMap.init(comp.gpa),
+        .token_buf = RawTokenList.init(comp.gpa),
+        .char_buf = std.ArrayList(u8).init(comp.gpa),
+        .poisoned_identifiers = std.StringHashMap(void).init(comp.gpa),
+        .top_expansion_buf = ExpandBuf.init(comp.gpa),
+    };
+    comp.pragmaEvent(.before_preprocess);
+    return pp;
+}
+
+const builtin_macros = struct {
+    const args = [1][]const u8{"X"};
+
+    const has_attribute = [1]RawToken{.{
+        .id = .macro_param_has_attribute,
+        .source = .generated,
+    }};
+    const has_warning = [1]RawToken{.{
+        .id = .macro_param_has_warning,
+        .source = .generated,
+    }};
+    const has_feature = [1]RawToken{.{
+        .id = .macro_param_has_feature,
+        .source = .generated,
+    }};
+    const has_extension = [1]RawToken{.{
+        .id = .macro_param_has_extension,
+        .source = .generated,
+    }};
+    const has_builtin = [1]RawToken{.{
+        .id = .macro_param_has_builtin,
+        .source = .generated,
+    }};
+    const has_include = [1]RawToken{.{
+        .id = .macro_param_has_include,
+        .source = .generated,
+    }};
+    const has_include_next = [1]RawToken{.{
+        .id = .macro_param_has_include_next,
+        .source = .generated,
+    }};
+
+    const is_identifier = [1]RawToken{.{
+        .id = .macro_param_is_identifier,
+        .source = .generated,
+    }};
+
+    const pragma_operator = [1]RawToken{.{
+        .id = .macro_param_pragma_operator,
+        .source = .generated,
+    }};
+
+    const file = [1]RawToken{.{
+        .id = .macro_file,
+        .source = .generated,
+    }};
+    const line = [1]RawToken{.{
+        .id = .macro_line,
+        .source = .generated,
+    }};
+    const counter = [1]RawToken{.{
+        .id = .macro_counter,
+        .source = .generated,
+    }};
+};
+
+fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, is_func: bool, tokens: []const RawToken) !void {
+    try pp.defines.putNoClobber(name, .{
+        .params = &builtin_macros.args,
+        .tokens = tokens,
+        .var_args = false,
+        .is_func = is_func,
+        .loc = .{ .id = .generated },
+        .is_builtin = true,
+    });
+}
+
+pub fn addBuiltinMacros(pp: *Preprocessor) !void {
+    try pp.addBuiltinMacro("__has_attribute", true, &builtin_macros.has_attribute);
+    try pp.addBuiltinMacro("__has_warning", true, &builtin_macros.has_warning);
+    try pp.addBuiltinMacro("__has_feature", true, &builtin_macros.has_feature);
+    try pp.addBuiltinMacro("__has_extension", true, &builtin_macros.has_extension);
+    try pp.addBuiltinMacro("__has_builtin", true, &builtin_macros.has_builtin);
+    try pp.addBuiltinMacro("__has_include", true, &builtin_macros.has_include);
+    try pp.addBuiltinMacro("__has_include_next", true, &builtin_macros.has_include_next);
+    try pp.addBuiltinMacro("__is_identifier", true, &builtin_macros.is_identifier);
+    try pp.addBuiltinMacro("_Pragma", true, &builtin_macros.pragma_operator);
+
+    try pp.addBuiltinMacro("__FILE__", false, &builtin_macros.file);
+    try pp.addBuiltinMacro("__LINE__", false, &builtin_macros.line);
+    try pp.addBuiltinMacro("__COUNTER__", false, &builtin_macros.counter);
+}
+
+pub fn deinit(pp: *Preprocessor) void {
+    pp.defines.deinit();
+    for (pp.tokens.items(.expansion_locs)) |loc| Token.free(loc, pp.gpa);
+    pp.tokens.deinit(pp.gpa);
+    pp.arena.deinit();
+    pp.token_buf.deinit();
+    pp.char_buf.deinit();
+    pp.poisoned_identifiers.deinit();
+    pp.include_guards.deinit(pp.gpa);
+    pp.top_expansion_buf.deinit();
+}
+
+/// Preprocess a source file, returns eof token.
+pub fn preprocess(pp: *Preprocessor, source: Source) Error!Token {
+    return pp.preprocessExtra(source) catch |er| switch (er) {
+        // This cannot occur in the main file and is handled in `include`.
+        error.StopPreprocessing => unreachable,
+        else => |e| return e,
+    };
+}
+
+/// Return the name of the #ifndef guard macro that starts a source, if any.
+fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
+    var tokenizer = Tokenizer{
+        .buf = source.buf,
+        .comp = pp.comp,
+        .source = source.id,
+    };
+    var hash = tokenizer.nextNoWS();
+    while (hash.id == .nl) hash = tokenizer.nextNoWS();
+    if (hash.id != .hash) return null;
+    const ifndef = tokenizer.nextNoWS();
+    if (ifndef.id != .keyword_ifndef) return null;
+    const guard = tokenizer.nextNoWS();
+    if (guard.id != .identifier) return null;
+    return pp.tokSlice(guard);
+}
+
+fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
+    if (pp.comp.invalid_utf8_locs.get(source.id)) |offset| {
+        try pp.comp.diag.add(.{
+            .tag = .invalid_utf8,
+            // Todo: compute line number
+            .loc = .{ .id = source.id, .byte_offset = offset },
+        }, &.{});
+        return error.FatalError;
+    }
+    var guard_name = pp.findIncludeGuard(source);
+
+    pp.preprocess_count += 1;
+    var tokenizer = Tokenizer{
+        .buf = source.buf,
+        .comp = pp.comp,
+        .source = source.id,
+    };
+
+    // Estimate how many new tokens this source will contain.
+    const estimated_token_count = source.buf.len / 8;
+    try pp.tokens.ensureTotalCapacity(pp.gpa, pp.tokens.len + estimated_token_count);
+
+    var if_level: u8 = 0;
+    var if_kind = std.PackedIntArray(u2, 256).init([1]u2{0} ** 256);
+    const until_else = 0;
+    const until_endif = 1;
+    const until_endif_seen_else = 2;
+
+    var start_of_line = true;
+    while (true) {
+        var tok = tokenizer.next();
+        switch (tok.id) {
+            .hash => if (!start_of_line) try pp.tokens.append(pp.gpa, tokFromRaw(tok)) else {
+                const directive = tokenizer.nextNoWS();
+                switch (directive.id) {
+                    .keyword_error, .keyword_warning => {
+                        // #error tokens..
+                        pp.top_expansion_buf.items.len = 0;
+                        const char_top = pp.char_buf.items.len;
+                        defer pp.char_buf.items.len = char_top;
+
+                        while (true) {
+                            tok = tokenizer.next();
+                            if (tok.id == .nl or tok.id == .eof) break;
+                            if (tok.id == .whitespace) tok.id = .macro_ws;
+                            try pp.top_expansion_buf.append(tokFromRaw(tok));
+                        }
+                        try pp.stringify(pp.top_expansion_buf.items);
+                        const slice = pp.char_buf.items[char_top + 1 .. pp.char_buf.items.len - 2];
+                        const duped = try pp.comp.diag.arena.allocator().dupe(u8, slice);
+
+                        try pp.comp.diag.add(.{
+                            .tag = if (directive.id == .keyword_error) .error_directive else .warning_directive,
+                            .loc = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line },
+                            .extra = .{ .str = duped },
+                        }, &.{});
+                    },
+                    .keyword_if => {
+                        const sum, const overflowed = @addWithOverflow(if_level, 1);
+                        if (overflowed != 0)
+                            return pp.fatal(directive, "too many #if nestings", .{});
+                        if_level = sum;
+
+                        if (try pp.expr(&tokenizer)) {
+                            if_kind.set(if_level, until_endif);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering then branch of #if", .{});
+                            }
+                        } else {
+                            if_kind.set(if_level, until_else);
+                            try pp.skip(&tokenizer, .until_else);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering else branch of #if", .{});
+                            }
+                        }
+                    },
+                    .keyword_ifdef => {
+                        const sum, const overflowed = @addWithOverflow(if_level, 1);
+                        if (overflowed != 0)
+                            return pp.fatal(directive, "too many #if nestings", .{});
+                        if_level = sum;
+
+                        const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
+                        try pp.expectNl(&tokenizer);
+                        if (pp.defines.get(macro_name) != null) {
+                            if_kind.set(if_level, until_endif);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering then branch of #ifdef", .{});
+                            }
+                        } else {
+                            if_kind.set(if_level, until_else);
+                            try pp.skip(&tokenizer, .until_else);
+                            if (pp.verbose) {
+                                pp.verboseLog(directive, "entering else branch of #ifdef", .{});
+                            }
+                        }
+                    },
+                    .keyword_ifndef => {
+                        const sum, const overflowed = @addWithOverflow(if_level, 1);
+                        if (overflowed != 0)
+                            return pp.fatal(directive, "too many #if nestings", .{});
+                        if_level = sum;
+
+                        const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
+                        try pp.expectNl(&tokenizer);
+                        if (pp.defines.get(macro_name) == null) {
+                            if_kind.set(if_level, until_endif);
+                        } else {
+                            if_kind.set(if_level, until_else);
+                            try pp.skip(&tokenizer, .until_else);
+                        }
+                    },
+                    .keyword_elif => {
+                        if (if_level == 0) {
+                            try pp.err(directive, .elif_without_if);
+                            if_level += 1;
+                            if_kind.set(if_level, until_else);
+                        } else if (if_level == 1) {
+                            guard_name = null;
+                        }
+                        switch (if_kind.get(if_level)) {
+                            until_else => if (try pp.expr(&tokenizer)) {
+                                if_kind.set(if_level, until_endif);
+                                if (pp.verbose) {
+                                    pp.verboseLog(directive, "entering then branch of #elif", .{});
+                                }
+                            } else {
+                                try pp.skip(&tokenizer, .until_else);
+                                if (pp.verbose) {
+                                    pp.verboseLog(directive, "entering else branch of #elif", .{});
+                                }
+                            },
+                            until_endif => try pp.skip(&tokenizer, .until_endif),
+                            until_endif_seen_else => {
+                                try pp.err(directive, .elif_after_else);
+                                skipToNl(&tokenizer);
+                            },
+                            else => unreachable,
+                        }
+                    },
+                    .keyword_elifdef => {
+                        if (if_level == 0) {
+                            try pp.err(directive, .elifdef_without_if);
+                            if_level += 1;
+                            if_kind.set(if_level, until_else);
+                        } else if (if_level == 1) {
+                            guard_name = null;
+                        }
+                        switch (if_kind.get(if_level)) {
+                            until_else => {
+                                const macro_name = try pp.expectMacroName(&tokenizer);
+                                if (macro_name == null) {
+                                    if_kind.set(if_level, until_else);
+                                    try pp.skip(&tokenizer, .until_else);
+                                    if (pp.verbose) {
+                                        pp.verboseLog(directive, "entering else branch of #elifdef", .{});
+                                    }
+                                } else {
+                                    try pp.expectNl(&tokenizer);
+                                    if (pp.defines.get(macro_name.?) != null) {
+                                        if_kind.set(if_level, until_endif);
+                                        if (pp.verbose) {
+                                            pp.verboseLog(directive, "entering then branch of #elifdef", .{});
+                                        }
+                                    } else {
+                                        if_kind.set(if_level, until_else);
+                                        try pp.skip(&tokenizer, .until_else);
+                                        if (pp.verbose) {
+                                            pp.verboseLog(directive, "entering else branch of #elifdef", .{});
+                                        }
+                                    }
+                                }
+                            },
+                            until_endif => try pp.skip(&tokenizer, .until_endif),
+                            until_endif_seen_else => {
+                                try pp.err(directive, .elifdef_after_else);
+                                skipToNl(&tokenizer);
+                            },
+                            else => unreachable,
+                        }
+                    },
+                    .keyword_elifndef => {
+                        if (if_level == 0) {
+                            try pp.err(directive, .elifdef_without_if);
+                            if_level += 1;
+                            if_kind.set(if_level, until_else);
+                        } else if (if_level == 1) {
+                            guard_name = null;
+                        }
+                        switch (if_kind.get(if_level)) {
+                            until_else => {
+                                const macro_name = try pp.expectMacroName(&tokenizer);
+                                if (macro_name == null) {
+                                    if_kind.set(if_level, until_else);
+                                    try pp.skip(&tokenizer, .until_else);
+                                    if (pp.verbose) {
+                                        pp.verboseLog(directive, "entering else branch of #elifndef", .{});
+                                    }
+                                } else {
+                                    try pp.expectNl(&tokenizer);
+                                    if (pp.defines.get(macro_name.?) == null) {
+                                        if_kind.set(if_level, until_endif);
+                                        if (pp.verbose) {
+                                            pp.verboseLog(directive, "entering then branch of #elifndef", .{});
+                                        }
+                                    } else {
+                                        if_kind.set(if_level, until_else);
+                                        try pp.skip(&tokenizer, .until_else);
+                                        if (pp.verbose) {
+                                            pp.verboseLog(directive, "entering else branch of #elifndef", .{});
+                                        }
+                                    }
+                                }
+                            },
+                            until_endif => try pp.skip(&tokenizer, .until_endif),
+                            until_endif_seen_else => {
+                                try pp.err(directive, .elifdef_after_else);
+                                skipToNl(&tokenizer);
+                            },
+                            else => unreachable,
+                        }
+                    },
+                    .keyword_else => {
+                        try pp.expectNl(&tokenizer);
+                        if (if_level == 0) {
+                            try pp.err(directive, .else_without_if);
+                            continue;
+                        } else if (if_level == 1) {
+                            guard_name = null;
+                        }
+                        switch (if_kind.get(if_level)) {
+                            until_else => {
+                                if_kind.set(if_level, until_endif_seen_else);
+                                if (pp.verbose) {
+                                    pp.verboseLog(directive, "#else branch here", .{});
+                                }
+                            },
+                            until_endif => try pp.skip(&tokenizer, .until_endif_seen_else),
+                            until_endif_seen_else => {
+                                try pp.err(directive, .else_after_else);
+                                skipToNl(&tokenizer);
+                            },
+                            else => unreachable,
+                        }
+                    },
+                    .keyword_endif => {
+                        try pp.expectNl(&tokenizer);
+                        if (if_level == 0) {
+                            guard_name = null;
+                            try pp.err(directive, .endif_without_if);
+                            continue;
+                        } else if (if_level == 1) {
+                            const saved_tokenizer = tokenizer;
+                            defer tokenizer = saved_tokenizer;
+
+                            var next = tokenizer.nextNoWS();
+                            while (next.id == .nl) : (next = tokenizer.nextNoWS()) {}
+                            if (next.id != .eof) guard_name = null;
+                        }
+                        if_level -= 1;
+                    },
+                    .keyword_define => try pp.define(&tokenizer),
+                    .keyword_undef => {
+                        const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
+
+                        _ = pp.defines.remove(macro_name);
+                        try pp.expectNl(&tokenizer);
+                    },
+                    .keyword_include => try pp.include(&tokenizer, .first),
+                    .keyword_include_next => {
+                        try pp.comp.diag.add(.{
+                            .tag = .include_next,
+                            .loc = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line },
+                        }, &.{});
+                        if (pp.include_depth == 0) {
+                            try pp.comp.diag.add(.{
+                                .tag = .include_next_outside_header,
+                                .loc = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line },
+                            }, &.{});
+                            try pp.include(&tokenizer, .first);
+                        } else {
+                            try pp.include(&tokenizer, .next);
+                        }
+                    },
+                    .keyword_embed => try pp.embed(&tokenizer),
+                    .keyword_pragma => try pp.pragma(&tokenizer, directive, null, &.{}),
+                    .keyword_line => {
+                        // #line number "file"
+                        const digits = tokenizer.nextNoWS();
+                        if (digits.id != .pp_num) try pp.err(digits, .line_simple_digit);
+                        // TODO: validate that the pp_num token is solely digits
+
+                        if (digits.id == .eof or digits.id == .nl) continue;
+                        const name = tokenizer.nextNoWS();
+                        if (name.id == .eof or name.id == .nl) continue;
+                        if (name.id != .string_literal) try pp.err(name, .line_invalid_filename);
+                        try pp.expectNl(&tokenizer);
+                    },
+                    .pp_num => {
+                        // # number "file" flags
+                        // TODO: validate that the pp_num token is solely digits
+                        // if not, emit `GNU line marker directive requires a simple digit sequence`
+                        const name = tokenizer.nextNoWS();
+                        if (name.id == .eof or name.id == .nl) continue;
+                        if (name.id != .string_literal) try pp.err(name, .line_invalid_filename);
+
+                        const flag_1 = tokenizer.nextNoWS();
+                        if (flag_1.id == .eof or flag_1.id == .nl) continue;
+                        const flag_2 = tokenizer.nextNoWS();
+                        if (flag_2.id == .eof or flag_2.id == .nl) continue;
+                        const flag_3 = tokenizer.nextNoWS();
+                        if (flag_3.id == .eof or flag_3.id == .nl) continue;
+                        const flag_4 = tokenizer.nextNoWS();
+                        if (flag_4.id == .eof or flag_4.id == .nl) continue;
+                        try pp.expectNl(&tokenizer);
+                    },
+                    .nl => {},
+                    .eof => {
+                        if (if_level != 0) try pp.err(tok, .unterminated_conditional_directive);
+                        return tokFromRaw(directive);
+                    },
+                    else => {
+                        try pp.err(tok, .invalid_preprocessing_directive);
+                        skipToNl(&tokenizer);
+                    },
+                }
+            },
+            .whitespace => if (pp.preserve_whitespace) try pp.tokens.append(pp.gpa, tokFromRaw(tok)),
+            .nl => {
+                start_of_line = true;
+                if (pp.preserve_whitespace) try pp.tokens.append(pp.gpa, tokFromRaw(tok));
+            },
+            .eof => {
+                if (if_level != 0) try pp.err(tok, .unterminated_conditional_directive);
+                // The following check needs to occur here and not at the top of the function
+                // because a pragma may change the level during preprocessing
+                if (source.buf.len > 0 and source.buf[source.buf.len - 1] != '\n') {
+                    try pp.err(tok, .newline_eof);
+                }
+                if (guard_name) |name| {
+                    if (try pp.include_guards.fetchPut(pp.gpa, source.id, name)) |prev| {
+                        assert(mem.eql(u8, name, prev.value));
+                    }
+                }
+                return tokFromRaw(tok);
+            },
+            else => {
+                if (tok.id.isMacroIdentifier() and pp.poisoned_identifiers.get(pp.tokSlice(tok)) != null) {
+                    try pp.err(tok, .poisoned_identifier);
+                }
+                // Add the token to the buffer doing any necessary expansions.
+                start_of_line = false;
+                try pp.expandMacro(&tokenizer, tok);
+            },
+        }
+    }
+}
+
+/// Get raw token source string.
+/// Returned slice is invalidated when comp.generated_buf is updated.
+pub fn tokSlice(pp: *Preprocessor, token: RawToken) []const u8 {
+    if (token.id.lexeme()) |some| return some;
+    const source = pp.comp.getSource(token.source);
+    return source.buf[token.start..token.end];
+}
+
+/// Convert a token from the Tokenizer into a token used by the parser.
+fn tokFromRaw(raw: RawToken) Token {
+    return .{
+        .id = raw.id,
+        .loc = .{
+            .id = raw.source,
+            .byte_offset = raw.start,
+            .line = raw.line,
+        },
+    };
+}
+
+fn err(pp: *Preprocessor, raw: RawToken, tag: Diagnostics.Tag) !void {
+    try pp.comp.diag.add(.{
+        .tag = tag,
+        .loc = .{
+            .id = raw.source,
+            .byte_offset = raw.start,
+            .line = raw.line,
+        },
+    }, &.{});
+}
+
+fn fatal(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) Compilation.Error {
+    const source = pp.comp.getSource(raw.source);
+    const line_col = source.lineCol(.{ .id = raw.source, .line = raw.line, .byte_offset = raw.start });
+    return pp.comp.diag.fatal(source.path, line_col.line, raw.line, line_col.col, fmt, args);
+}
+
+fn verboseLog(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) void {
+    const source = pp.comp.getSource(raw.source);
+    const line_col = source.lineCol(.{ .id = raw.source, .line = raw.line, .byte_offset = raw.start });
+
+    const stderr = std.io.getStdErr().writer();
+    var buf_writer = std.io.bufferedWriter(stderr);
+    const writer = buf_writer.writer();
+    defer buf_writer.flush() catch {};
+    writer.print("{s}:{d}:{d}: ", .{ source.path, line_col.line_no, line_col.col }) catch return;
+    writer.print(fmt, args) catch return;
+    writer.writeByte('\n') catch return;
+    writer.writeAll(line_col.line) catch return;
+    writer.writeByte('\n') catch return;
+}
+
+/// Consume next token, error if it is not an identifier.
+fn expectMacroName(pp: *Preprocessor, tokenizer: *Tokenizer) Error!?[]const u8 {
+    const macro_name = tokenizer.nextNoWS();
+    if (!macro_name.id.isMacroIdentifier()) {
+        try pp.err(macro_name, .macro_name_missing);
+        skipToNl(tokenizer);
+        return null;
+    }
+    return pp.tokSlice(macro_name);
+}
+
+/// Skip until after a newline, error if extra tokens before it.
+fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
+    var sent_err = false;
+    while (true) {
+        const tok = tokenizer.next();
+        if (tok.id == .nl or tok.id == .eof) return;
+        if (tok.id == .whitespace) continue;
+        if (!sent_err) {
+            sent_err = true;
+            try pp.err(tok, .extra_tokens_directive_end);
+        }
+    }
+}
+
+/// Consume all tokens until a newline and parse the result into a boolean.
+fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
+    const start = pp.tokens.len;
+    defer {
+        for (pp.top_expansion_buf.items) |tok| Token.free(tok.expansion_locs, pp.gpa);
+        pp.tokens.len = start;
+    }
+
+    pp.top_expansion_buf.items.len = 0;
+    const eof = while (true) {
+        var tok = tokenizer.next();
+        switch (tok.id) {
+            .nl, .eof => break tok,
+            .whitespace => if (pp.top_expansion_buf.items.len == 0) continue,
+            else => {},
+        }
+        try pp.top_expansion_buf.append(tokFromRaw(tok));
+    } else unreachable;
+    if (pp.top_expansion_buf.items.len != 0) {
+        pp.expansion_source_loc = pp.top_expansion_buf.items[0].loc;
+        try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, pp.top_expansion_buf.items.len, false, .expr);
+    }
+    for (pp.top_expansion_buf.items) |tok| {
+        if (tok.id == .macro_ws) continue;
+        if (!tok.id.validPreprocessorExprStart()) {
+            try pp.comp.diag.add(.{
+                .tag = .invalid_preproc_expr_start,
+                .loc = tok.loc,
+            }, tok.expansionSlice());
+            return false;
+        }
+        break;
+    } else {
+        try pp.err(eof, .expected_value_in_expr);
+        return false;
+    }
+
+    // validate the tokens in the expression
+    try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.top_expansion_buf.items.len);
+    var i: usize = 0;
+    const items = pp.top_expansion_buf.items;
+    while (i < items.len) : (i += 1) {
+        var tok = items[i];
+        switch (tok.id) {
+            .string_literal,
+            .string_literal_utf_16,
+            .string_literal_utf_8,
+            .string_literal_utf_32,
+            .string_literal_wide,
+            => {
+                try pp.comp.diag.add(.{
+                    .tag = .string_literal_in_pp_expr,
+                    .loc = tok.loc,
+                }, tok.expansionSlice());
+                return false;
+            },
+            .plus_plus,
+            .minus_minus,
+            .plus_equal,
+            .minus_equal,
+            .asterisk_equal,
+            .slash_equal,
+            .percent_equal,
+            .angle_bracket_angle_bracket_left_equal,
+            .angle_bracket_angle_bracket_right_equal,
+            .ampersand_equal,
+            .caret_equal,
+            .pipe_equal,
+            .l_bracket,
+            .r_bracket,
+            .l_brace,
+            .r_brace,
+            .ellipsis,
+            .semicolon,
+            .hash,
+            .hash_hash,
+            .equal,
+            .arrow,
+            .period,
+            => {
+                try pp.comp.diag.add(.{
+                    .tag = .invalid_preproc_operator,
+                    .loc = tok.loc,
+                }, tok.expansionSlice());
+                return false;
+            },
+            .macro_ws, .whitespace => continue,
+            .keyword_false => tok.id = .zero,
+            .keyword_true => tok.id = .one,
+            else => if (tok.id.isMacroIdentifier()) {
+                if (tok.id == .keyword_defined) {
+                    const tokens_consumed = try pp.handleKeywordDefined(&tok, items[i + 1 ..], eof);
+                    i += tokens_consumed;
+                } else {
+                    try pp.comp.diag.add(.{
+                        .tag = .undefined_macro,
+                        .loc = tok.loc,
+                        .extra = .{ .str = pp.expandedSlice(tok) },
+                    }, tok.expansionSlice());
+
+                    if (i + 1 < pp.top_expansion_buf.items.len and
+                        pp.top_expansion_buf.items[i + 1].id == .l_paren)
+                    {
+                        try pp.comp.diag.add(.{
+                            .tag = .fn_macro_undefined,
+                            .loc = tok.loc,
+                            .extra = .{ .str = pp.expandedSlice(tok) },
+                        }, tok.expansionSlice());
+                        return false;
+                    }
+
+                    tok.id = .zero; // undefined macro
+                }
+            },
+        }
+        pp.tokens.appendAssumeCapacity(tok);
+    }
+    try pp.tokens.append(pp.gpa, .{
+        .id = .eof,
+        .loc = tokFromRaw(eof).loc,
+    });
+
+    // Actually parse it.
+    var parser = Parser{
+        .pp = pp,
+        .comp = pp.comp,
+        .gpa = pp.gpa,
+        .tok_ids = pp.tokens.items(.id),
+        .tok_i = @intCast(start),
+        .arena = pp.arena.allocator(),
+        .in_macro = true,
+        .data = undefined,
+        .strings = undefined,
+        .retained_strings = undefined,
+        .value_map = undefined,
+        .labels = undefined,
+        .decl_buf = undefined,
+        .list_buf = undefined,
+        .param_buf = undefined,
+        .enum_buf = undefined,
+        .record_buf = undefined,
+        .attr_buf = undefined,
+        .field_attr_buf = undefined,
+        .string_ids = undefined,
+    };
+    return parser.macroExpr();
+}
+
+/// Turns macro_tok from .keyword_defined into .zero or .one depending on whether the argument is defined
+/// Returns the number of tokens consumed
+fn handleKeywordDefined(pp: *Preprocessor, macro_tok: *Token, tokens: []const Token, eof: RawToken) !usize {
+    std.debug.assert(macro_tok.id == .keyword_defined);
+    var it = TokenIterator.init(tokens);
+    const first = it.nextNoWS() orelse {
+        try pp.err(eof, .macro_name_missing);
+        return it.i;
+    };
+    switch (first.id) {
+        .l_paren => {},
+        else => {
+            if (!first.id.isMacroIdentifier()) {
+                try pp.comp.diag.add(.{
+                    .tag = .macro_name_must_be_identifier,
+                    .loc = first.loc,
+                    .extra = .{ .str = pp.expandedSlice(first) },
+                }, first.expansionSlice());
+            }
+            macro_tok.id = if (pp.defines.contains(pp.expandedSlice(first))) .one else .zero;
+            return it.i;
+        },
+    }
+    const second = it.nextNoWS() orelse {
+        try pp.err(eof, .macro_name_missing);
+        return it.i;
+    };
+    if (!second.id.isMacroIdentifier()) {
+        try pp.comp.diag.add(.{
+            .tag = .macro_name_must_be_identifier,
+            .loc = second.loc,
+        }, second.expansionSlice());
+        return it.i;
+    }
+    macro_tok.id = if (pp.defines.contains(pp.expandedSlice(second))) .one else .zero;
+
+    const last = it.nextNoWS();
+    if (last == null or last.?.id != .r_paren) {
+        const tok = last orelse tokFromRaw(eof);
+        try pp.comp.diag.add(.{
+            .tag = .closing_paren,
+            .loc = tok.loc,
+        }, tok.expansionSlice());
+        try pp.comp.diag.add(.{
+            .tag = .to_match_paren,
+            .loc = first.loc,
+        }, first.expansionSlice());
+    }
+
+    return it.i;
+}
+
+/// Skip until #else #elif #endif, return last directive token id.
+/// Also skips nested #if ... #endifs.
+fn skip(
+    pp: *Preprocessor,
+    tokenizer: *Tokenizer,
+    cont: enum { until_else, until_endif, until_endif_seen_else },
+) Error!void {
+    var ifs_seen: u32 = 0;
+    var line_start = true;
+    while (tokenizer.index < tokenizer.buf.len) {
+        if (line_start) {
+            const saved_tokenizer = tokenizer.*;
+            const hash = tokenizer.nextNoWS();
+            if (hash.id == .nl) continue;
+            line_start = false;
+            if (hash.id != .hash) continue;
+            const directive = tokenizer.nextNoWS();
+            switch (directive.id) {
+                .keyword_else => {
+                    if (ifs_seen != 0) continue;
+                    if (cont == .until_endif_seen_else) {
+                        try pp.err(directive, .else_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_elif => {
+                    if (ifs_seen != 0 or cont == .until_endif) continue;
+                    if (cont == .until_endif_seen_else) {
+                        try pp.err(directive, .elif_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_elifdef => {
+                    if (ifs_seen != 0 or cont == .until_endif) continue;
+                    if (cont == .until_endif_seen_else) {
+                        try pp.err(directive, .elifdef_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_elifndef => {
+                    if (ifs_seen != 0 or cont == .until_endif) continue;
+                    if (cont == .until_endif_seen_else) {
+                        try pp.err(directive, .elifndef_after_else);
+                        continue;
+                    }
+                    tokenizer.* = saved_tokenizer;
+                    return;
+                },
+                .keyword_endif => {
+                    if (ifs_seen == 0) {
+                        tokenizer.* = saved_tokenizer;
+                        return;
+                    }
+                    ifs_seen -= 1;
+                },
+                .keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1,
+                else => {},
+            }
+        } else if (tokenizer.buf[tokenizer.index] == '\n') {
+            line_start = true;
+            tokenizer.index += 1;
+            tokenizer.line += 1;
+        } else {
+            line_start = false;
+            tokenizer.index += 1;
+        }
+    } else {
+        const eof = tokenizer.next();
+        return pp.err(eof, .unterminated_conditional_directive);
+    }
+}
+
+// Skip until newline, ignore other tokens.
+fn skipToNl(tokenizer: *Tokenizer) void {
+    while (true) {
+        const tok = tokenizer.next();
+        if (tok.id == .nl or tok.id == .eof) return;
+    }
+}
+
+const ExpandBuf = std.ArrayList(Token);
+fn removePlacemarkers(buf: *ExpandBuf) void {
+    var i: usize = buf.items.len -% 1;
+    while (i < buf.items.len) : (i -%= 1) {
+        if (buf.items[i].id == .placemarker) {
+            const placemarker = buf.orderedRemove(i);
+            Token.free(placemarker.expansion_locs, buf.allocator);
+        }
+    }
+}
+
+const MacroArguments = std.ArrayList([]const Token);
+fn deinitMacroArguments(allocator: Allocator, args: *const MacroArguments) void {
+    for (args.items) |item| {
+        for (item) |tok| Token.free(tok.expansion_locs, allocator);
+        allocator.free(item);
+    }
+    args.deinit();
+}
+
+fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf {
+    var buf = ExpandBuf.init(pp.gpa);
+    errdefer buf.deinit();
+    try buf.ensureTotalCapacity(simple_macro.tokens.len);
+
+    // Add all of the simple_macros tokens to the new buffer handling any concats.
+    var i: usize = 0;
+    while (i < simple_macro.tokens.len) : (i += 1) {
+        const raw = simple_macro.tokens[i];
+        const tok = tokFromRaw(raw);
+        switch (raw.id) {
+            .hash_hash => {
+                var rhs = tokFromRaw(simple_macro.tokens[i + 1]);
+                i += 1;
+                while (rhs.id == .whitespace) {
+                    rhs = tokFromRaw(simple_macro.tokens[i + 1]);
+                    i += 1;
+                }
+                try pp.pasteTokens(&buf, &.{rhs});
+            },
+            .whitespace => if (pp.preserve_whitespace) buf.appendAssumeCapacity(tok),
+            .macro_file => {
+                const start = pp.comp.generated_buf.items.len;
+                const source = pp.comp.getSource(pp.expansion_source_loc.id);
+                try pp.comp.generated_buf.writer().print("\"{s}\"\n", .{source.path});
+
+                buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok));
+            },
+            .macro_line => {
+                const start = pp.comp.generated_buf.items.len;
+                const source = pp.comp.getSource(pp.expansion_source_loc.id);
+                try pp.comp.generated_buf.writer().print("{d}\n", .{source.physicalLine(pp.expansion_source_loc)});
+
+                buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok));
+            },
+            .macro_counter => {
+                defer pp.counter += 1;
+                const start = pp.comp.generated_buf.items.len;
+                try pp.comp.generated_buf.writer().print("{d}\n", .{pp.counter});
+
+                buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok));
+            },
+            else => buf.appendAssumeCapacity(tok),
+        }
+    }
+
+    return buf;
+}
+
+/// Join a possibly-parenthesized series of string literal tokens into a single string without
+/// leading or trailing quotes. The returned slice is invalidated if pp.char_buf changes.
+/// Returns error.ExpectedStringLiteral if parentheses are not balanced, a non-string-literal
+/// is encountered, or if no string literals are encountered
+/// TODO: destringize (replace all '\\' with a single `\` and all '\"' with a '"')
+fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const Token) ![]const u8 {
+    const char_top = pp.char_buf.items.len;
+    defer pp.char_buf.items.len = char_top;
+    var unwrapped = toks;
+    if (toks.len >= 2 and toks[0].id == .l_paren and toks[toks.len - 1].id == .r_paren) {
+        unwrapped = toks[1 .. toks.len - 1];
+    }
+    if (unwrapped.len == 0) return error.ExpectedStringLiteral;
+
+    for (unwrapped) |tok| {
+        if (tok.id == .macro_ws) continue;
+        if (tok.id != .string_literal) return error.ExpectedStringLiteral;
+        const str = pp.expandedSlice(tok);
+        try pp.char_buf.appendSlice(str[1 .. str.len - 1]);
+    }
+    return pp.char_buf.items[char_top..];
+}
+
+/// Handle the _Pragma operator (implemented as a builtin macro)
+fn pragmaOperator(pp: *Preprocessor, arg_tok: Token, operator_loc: Source.Location) !void {
+    const arg_slice = pp.expandedSlice(arg_tok);
+    const content = arg_slice[1 .. arg_slice.len - 1];
+    const directive = "#pragma ";
+
+    pp.char_buf.clearRetainingCapacity();
+    const total_len = directive.len + content.len + 1; // destringify can never grow the string, + 1 for newline
+    try pp.char_buf.ensureUnusedCapacity(total_len);
+    pp.char_buf.appendSliceAssumeCapacity(directive);
+    pp.destringify(content);
+    pp.char_buf.appendAssumeCapacity('\n');
+
+    const start = pp.comp.generated_buf.items.len;
+    try pp.comp.generated_buf.appendSlice(pp.char_buf.items);
+    var tmp_tokenizer = Tokenizer{
+        .buf = pp.comp.generated_buf.items,
+        .comp = pp.comp,
+        .index = @intCast(start),
+        .source = .generated,
+        .line = pp.generated_line,
+    };
+    pp.generated_line += 1;
+    const hash_tok = tmp_tokenizer.next();
+    assert(hash_tok.id == .hash);
+    const pragma_tok = tmp_tokenizer.next();
+    assert(pragma_tok.id == .keyword_pragma);
+    try pp.pragma(&tmp_tokenizer, pragma_tok, operator_loc, arg_tok.expansionSlice());
+}
+
+/// Inverts the output of the preprocessor stringify (#) operation
+/// (except all whitespace is condensed to a single space)
+/// writes output to pp.char_buf; assumes capacity is sufficient
+/// backslash backslash -> backslash
+/// backslash doublequote -> doublequote
+/// All other characters remain the same
+fn destringify(pp: *Preprocessor, str: []const u8) void {
+    var state: enum { start, backslash_seen } = .start;
+    for (str) |c| {
+        switch (c) {
+            '\\' => {
+                if (state == .backslash_seen) pp.char_buf.appendAssumeCapacity(c);
+                state = if (state == .start) .backslash_seen else .start;
+            },
+            else => {
+                if (state == .backslash_seen and c != '"') pp.char_buf.appendAssumeCapacity('\\');
+                pp.char_buf.appendAssumeCapacity(c);
+                state = .start;
+            },
+        }
+    }
+}
+
+/// Stringify `tokens` into pp.char_buf.
+/// See https://gcc.gnu.org/onlinedocs/gcc-11.2.0/cpp/Stringizing.html#Stringizing
+fn stringify(pp: *Preprocessor, tokens: []const Token) !void {
+    try pp.char_buf.append('"');
+    var ws_state: enum { start, need, not_needed } = .start;
+    for (tokens) |tok| {
+        if (tok.id == .macro_ws) {
+            if (ws_state == .start) continue;
+            ws_state = .need;
+            continue;
+        }
+        if (ws_state == .need) try pp.char_buf.append(' ');
+        ws_state = .not_needed;
+
+        // backslashes not inside strings are not escaped
+        const is_str = switch (tok.id) {
+            .string_literal,
+            .string_literal_utf_16,
+            .string_literal_utf_8,
+            .string_literal_utf_32,
+            .string_literal_wide,
+            .char_literal,
+            .char_literal_utf_16,
+            .char_literal_utf_32,
+            .char_literal_wide,
+            => true,
+            else => false,
+        };
+
+        for (pp.expandedSlice(tok)) |c| {
+            if (c == '"')
+                try pp.char_buf.appendSlice("\\\"")
+            else if (c == '\\' and is_str)
+                try pp.char_buf.appendSlice("\\\\")
+            else
+                try pp.char_buf.append(c);
+        }
+    }
+    if (pp.char_buf.items[pp.char_buf.items.len - 1] == '\\') {
+        const tok = tokens[tokens.len - 1];
+        try pp.comp.diag.add(.{
+            .tag = .invalid_pp_stringify_escape,
+            .loc = tok.loc,
+        }, tok.expansionSlice());
+        pp.char_buf.items.len -= 1;
+    }
+    try pp.char_buf.appendSlice("\"\n");
+}
+
+fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token) !?[]const u8 {
+    const char_top = pp.char_buf.items.len;
+    defer pp.char_buf.items.len = char_top;
+
+    // Trim leading/trailing whitespace
+    var begin: usize = 0;
+    var end: usize = param_toks.len;
+    while (begin < end and param_toks[begin].id == .macro_ws) : (begin += 1) {}
+    while (end > begin and param_toks[end - 1].id == .macro_ws) : (end -= 1) {}
+    const params = param_toks[begin..end];
+
+    if (params.len == 0) {
+        try pp.comp.diag.add(.{
+            .tag = .expected_filename,
+            .loc = param_toks[0].loc,
+        }, param_toks[0].expansionSlice());
+        return null;
+    }
+    // no string pasting
+    if (params[0].id == .string_literal and params.len > 1) {
+        try pp.comp.diag.add(.{
+            .tag = .closing_paren,
+            .loc = params[1].loc,
+        }, params[1].expansionSlice());
+        return null;
+    }
+
+    for (params) |tok| {
+        const str = pp.expandedSliceExtra(tok, .preserve_macro_ws);
+        try pp.char_buf.appendSlice(str);
+    }
+
+    const include_str = pp.char_buf.items[char_top..];
+    if (include_str.len < 3) {
+        try pp.comp.diag.add(.{
+            .tag = .empty_filename,
+            .loc = params[0].loc,
+        }, params[0].expansionSlice());
+        return null;
+    }
+
+    switch (include_str[0]) {
+        '<' => {
+            if (include_str[include_str.len - 1] != '>') {
+                // Ugly hack to find out where the '>' should go, since we don't have the closing ')' location
+                const start = params[0].loc;
+                try pp.comp.diag.add(.{
+                    .tag = .header_str_closing,
+                    .loc = .{ .id = start.id, .byte_offset = start.byte_offset + @as(u32, @intCast(include_str.len)) + 1, .line = start.line },
+                }, params[0].expansionSlice());
+                try pp.comp.diag.add(.{
+                    .tag = .header_str_match,
+                    .loc = params[0].loc,
+                }, params[0].expansionSlice());
+                return null;
+            }
+            return include_str;
+        },
+        '"' => return include_str,
+        else => {
+            try pp.comp.diag.add(.{
+                .tag = .expected_filename,
+                .loc = params[0].loc,
+            }, params[0].expansionSlice());
+            return null;
+        },
+    }
+}
+
+fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const Token, src_loc: Source.Location) Error!bool {
+    switch (builtin) {
+        .macro_param_has_attribute,
+        .macro_param_has_feature,
+        .macro_param_has_extension,
+        .macro_param_has_builtin,
+        => {
+            var invalid: ?Token = null;
+            var identifier: ?Token = null;
+            for (param_toks) |tok| {
+                if (tok.id == .macro_ws) continue;
+                if (!tok.id.isMacroIdentifier()) {
+                    invalid = tok;
+                    break;
+                }
+                if (identifier) |_| invalid = tok else identifier = tok;
+            }
+            if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc };
+            if (invalid) |some| {
+                try pp.comp.diag.add(
+                    .{ .tag = .feature_check_requires_identifier, .loc = some.loc },
+                    some.expansionSlice(),
+                );
+                return false;
+            }
+
+            const ident_str = pp.expandedSlice(identifier.?);
+            return switch (builtin) {
+                .macro_param_has_attribute => Attribute.fromString(.gnu, null, ident_str) != null,
+                .macro_param_has_feature => features.hasFeature(pp.comp, ident_str),
+                .macro_param_has_extension => features.hasExtension(pp.comp, ident_str),
+                .macro_param_has_builtin => pp.comp.hasBuiltin(ident_str),
+                else => unreachable,
+            };
+        },
+        .macro_param_has_warning => {
+            const actual_param = pp.pasteStringsUnsafe(param_toks) catch |er| switch (er) {
+                error.ExpectedStringLiteral => {
+                    try pp.comp.diag.add(.{
+                        .tag = .expected_str_literal_in,
+                        .loc = param_toks[0].loc,
+                        .extra = .{ .str = "__has_warning" },
+                    }, param_toks[0].expansionSlice());
+                    return false;
+                },
+                else => |e| return e,
+            };
+            if (!mem.startsWith(u8, actual_param, "-W")) {
+                try pp.comp.diag.add(.{
+                    .tag = .malformed_warning_check,
+                    .loc = param_toks[0].loc,
+                    .extra = .{ .str = "__has_warning" },
+                }, param_toks[0].expansionSlice());
+                return false;
+            }
+            const warning_name = actual_param[2..];
+            return Diagnostics.warningExists(warning_name);
+        },
+        .macro_param_is_identifier => {
+            var invalid: ?Token = null;
+            var identifier: ?Token = null;
+            for (param_toks) |tok| switch (tok.id) {
+                .macro_ws => continue,
+                else => {
+                    if (identifier) |_| invalid = tok else identifier = tok;
+                },
+            };
+            if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc };
+            if (invalid) |some| {
+                try pp.comp.diag.add(.{
+                    .tag = .missing_tok_builtin,
+                    .loc = some.loc,
+                    .extra = .{ .tok_id_expected = .r_paren },
+                }, some.expansionSlice());
+                return false;
+            }
+
+            const id = identifier.?.id;
+            return id == .identifier or id == .extended_identifier;
+        },
+        .macro_param_has_include, .macro_param_has_include_next => {
+            const include_str = (try pp.reconstructIncludeString(param_toks)) orelse return false;
+            const include_type: Compilation.IncludeType = switch (include_str[0]) {
+                '"' => .quotes,
+                '<' => .angle_brackets,
+                else => unreachable,
+            };
+            const filename = include_str[1 .. include_str.len - 1];
+            if (builtin == .macro_param_has_include or pp.include_depth == 0) {
+                if (builtin == .macro_param_has_include_next) {
+                    try pp.comp.diag.add(.{
+                        .tag = .include_next_outside_header,
+                        .loc = src_loc,
+                    }, &.{});
+                }
+                return pp.comp.hasInclude(filename, src_loc.id, include_type, .first);
+            }
+            return pp.comp.hasInclude(filename, src_loc.id, include_type, .next);
+        },
+        else => unreachable,
+    }
+}
+
+fn expandFuncMacro(
+    pp: *Preprocessor,
+    loc: Source.Location,
+    func_macro: *const Macro,
+    args: *const MacroArguments,
+    expanded_args: *const MacroArguments,
+) MacroError!ExpandBuf {
+    var buf = ExpandBuf.init(pp.gpa);
+    try buf.ensureTotalCapacity(func_macro.tokens.len);
+    errdefer buf.deinit();
+
+    var expanded_variable_arguments = ExpandBuf.init(pp.gpa);
+    defer expanded_variable_arguments.deinit();
+    var variable_arguments = ExpandBuf.init(pp.gpa);
+    defer variable_arguments.deinit();
+
+    if (func_macro.var_args) {
+        var i: usize = func_macro.params.len;
+        while (i < expanded_args.items.len) : (i += 1) {
+            try variable_arguments.appendSlice(args.items[i]);
+            try expanded_variable_arguments.appendSlice(expanded_args.items[i]);
+            if (i != expanded_args.items.len - 1) {
+                const comma = Token{ .id = .comma, .loc = .{ .id = .generated } };
+                try variable_arguments.append(comma);
+                try expanded_variable_arguments.append(comma);
+            }
+        }
+    }
+
+    // token concatenation and expansion phase
+    var tok_i: usize = 0;
+    while (tok_i < func_macro.tokens.len) : (tok_i += 1) {
+        const raw = func_macro.tokens[tok_i];
+        switch (raw.id) {
+            .hash_hash => while (tok_i + 1 < func_macro.tokens.len) {
+                const raw_next = func_macro.tokens[tok_i + 1];
+                tok_i += 1;
+
+                const next = switch (raw_next.id) {
+                    .macro_ws => continue,
+                    .hash_hash => continue,
+                    .macro_param, .macro_param_no_expand => if (args.items[raw_next.end].len > 0)
+                        args.items[raw_next.end]
+                    else
+                        &[1]Token{tokFromRaw(.{ .id = .placemarker, .source = .generated })},
+                    .keyword_va_args => variable_arguments.items,
+                    else => &[1]Token{tokFromRaw(raw_next)},
+                };
+
+                try pp.pasteTokens(&buf, next);
+                if (next.len != 0) break;
+            },
+            .macro_param_no_expand => {
+                const slice = if (args.items[raw.end].len > 0)
+                    args.items[raw.end]
+                else
+                    &[1]Token{tokFromRaw(.{ .id = .placemarker, .source = .generated })};
+                const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
+                try bufCopyTokens(&buf, slice, &.{raw_loc});
+            },
+            .macro_param => {
+                const arg = expanded_args.items[raw.end];
+                const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
+                try bufCopyTokens(&buf, arg, &.{raw_loc});
+            },
+            .keyword_va_args => {
+                const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
+                try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc});
+            },
+            .stringify_param, .stringify_va_args => {
+                const arg = if (raw.id == .stringify_va_args)
+                    variable_arguments.items
+                else
+                    args.items[raw.end];
+
+                pp.char_buf.clearRetainingCapacity();
+                try pp.stringify(arg);
+
+                const start = pp.comp.generated_buf.items.len;
+                try pp.comp.generated_buf.appendSlice(pp.char_buf.items);
+
+                try buf.append(try pp.makeGeneratedToken(start, .string_literal, tokFromRaw(raw)));
+            },
+            .macro_param_has_attribute,
+            .macro_param_has_warning,
+            .macro_param_has_feature,
+            .macro_param_has_extension,
+            .macro_param_has_builtin,
+            .macro_param_has_include,
+            .macro_param_has_include_next,
+            .macro_param_is_identifier,
+            => {
+                const arg = expanded_args.items[0];
+                const result = if (arg.len == 0) blk: {
+                    const extra = Diagnostics.Message.Extra{ .arguments = .{ .expected = 1, .actual = 0 } };
+                    try pp.comp.diag.add(.{ .tag = .expected_arguments, .loc = loc, .extra = extra }, &.{});
+                    break :blk false;
+                } else try pp.handleBuiltinMacro(raw.id, arg, loc);
+                const start = pp.comp.generated_buf.items.len;
+                try pp.comp.generated_buf.writer().print("{}\n", .{@intFromBool(result)});
+                try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
+            },
+            .macro_param_pragma_operator => {
+                const param_toks = expanded_args.items[0];
+                // Clang and GCC require exactly one token (so, no parentheses or string pasting)
+                // even though their error messages indicate otherwise. Ours is slightly more
+                // descriptive.
+                var invalid: ?Token = null;
+                var string: ?Token = null;
+                for (param_toks) |tok| switch (tok.id) {
+                    .string_literal => {
+                        if (string) |_| invalid = tok else string = tok;
+                    },
+                    .macro_ws => continue,
+                    else => {
+                        invalid = tok;
+                        break;
+                    },
+                };
+                if (string == null and invalid == null) invalid = .{ .loc = loc, .id = .eof };
+                if (invalid) |some| try pp.comp.diag.add(
+                    .{ .tag = .pragma_operator_string_literal, .loc = some.loc },
+                    some.expansionSlice(),
+                ) else try pp.pragmaOperator(string.?, loc);
+            },
+            .comma => {
+                if (tok_i + 2 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
+                    const hash_hash = func_macro.tokens[tok_i + 1];
+                    var maybe_va_args = func_macro.tokens[tok_i + 2];
+                    var consumed: usize = 2;
+                    if (maybe_va_args.id == .macro_ws and tok_i + 3 < func_macro.tokens.len) {
+                        consumed = 3;
+                        maybe_va_args = func_macro.tokens[tok_i + 3];
+                    }
+                    if (maybe_va_args.id == .keyword_va_args) {
+                        // GNU extension: `, ##__VA_ARGS__` deletes the comma if __VA_ARGS__ is empty
+                        tok_i += consumed;
+                        if (func_macro.params.len == expanded_args.items.len) {
+                            // Empty __VA_ARGS__, drop the comma
+                            try pp.err(hash_hash, .comma_deletion_va_args);
+                        } else if (func_macro.params.len == 0 and expanded_args.items.len == 1 and expanded_args.items[0].len == 0) {
+                            // Ambiguous whether this is "empty __VA_ARGS__" or "__VA_ARGS__ omitted"
+                            if (pp.comp.langopts.standard.isGNU()) {
+                                // GNU standard, drop the comma
+                                try pp.err(hash_hash, .comma_deletion_va_args);
+                            } else {
+                                // C standard, retain the comma
+                                try buf.append(tokFromRaw(raw));
+                            }
+                        } else {
+                            try buf.append(tokFromRaw(raw));
+                            if (expanded_variable_arguments.items.len > 0 or variable_arguments.items.len == func_macro.params.len) {
+                                try pp.err(hash_hash, .comma_deletion_va_args);
+                            }
+                            const raw_loc = Source.Location{
+                                .id = maybe_va_args.source,
+                                .byte_offset = maybe_va_args.start,
+                                .line = maybe_va_args.line,
+                            };
+                            try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc});
+                        }
+                        continue;
+                    }
+                }
+                // Regular comma, no token pasting with __VA_ARGS__
+                try buf.append(tokFromRaw(raw));
+            },
+            else => try buf.append(tokFromRaw(raw)),
+        }
+    }
+    removePlacemarkers(&buf);
+
+    return buf;
+}
+
+fn shouldExpand(tok: Token, macro: *Macro) bool {
+    // macro.loc.line contains the macros end index
+    if (tok.loc.id == macro.loc.id and
+        tok.loc.byte_offset >= macro.loc.byte_offset and
+        tok.loc.byte_offset <= macro.loc.line)
+        return false;
+    for (tok.expansionSlice()) |loc| {
+        if (loc.id == macro.loc.id and
+            loc.byte_offset >= macro.loc.byte_offset and
+            loc.byte_offset <= macro.loc.line)
+            return false;
+    }
+    if (tok.flags.expansion_disabled) return false;
+
+    return true;
+}
+
+fn bufCopyTokens(buf: *ExpandBuf, tokens: []const Token, src: []const Source.Location) !void {
+    try buf.ensureUnusedCapacity(tokens.len);
+    for (tokens) |tok| {
+        var copy = try tok.dupe(buf.allocator);
+        errdefer Token.free(copy.expansion_locs, buf.allocator);
+        try copy.addExpansionLocation(buf.allocator, src);
+        buf.appendAssumeCapacity(copy);
+    }
+}
+
+fn nextBufToken(
+    pp: *Preprocessor,
+    tokenizer: *Tokenizer,
+    buf: *ExpandBuf,
+    start_idx: *usize,
+    end_idx: *usize,
+    extend_buf: bool,
+) Error!Token {
+    start_idx.* += 1;
+    if (start_idx.* == buf.items.len and start_idx.* >= end_idx.*) {
+        if (extend_buf) {
+            const raw_tok = tokenizer.next();
+            if (raw_tok.id.isMacroIdentifier() and
+                pp.poisoned_identifiers.get(pp.tokSlice(raw_tok)) != null)
+                try pp.err(raw_tok, .poisoned_identifier);
+
+            if (raw_tok.id == .nl) pp.add_expansion_nl += 1;
+
+            const new_tok = tokFromRaw(raw_tok);
+            end_idx.* += 1;
+            try buf.append(new_tok);
+            return new_tok;
+        } else {
+            return Token{ .id = .eof, .loc = .{ .id = .generated } };
+        }
+    } else {
+        return buf.items[start_idx.*];
+    }
+}
+
+fn collectMacroFuncArguments(
+    pp: *Preprocessor,
+    tokenizer: *Tokenizer,
+    buf: *ExpandBuf,
+    start_idx: *usize,
+    end_idx: *usize,
+    extend_buf: bool,
+    is_builtin: bool,
+) !MacroArguments {
+    const name_tok = buf.items[start_idx.*];
+    const saved_tokenizer = tokenizer.*;
+    const old_end = end_idx.*;
+
+    while (true) {
+        const tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
+        switch (tok.id) {
+            .nl, .whitespace, .macro_ws => {},
+            .l_paren => break,
+            else => {
+                if (is_builtin) {
+                    try pp.comp.diag.add(.{
+                        .tag = .missing_lparen_after_builtin,
+                        .loc = name_tok.loc,
+                        .extra = .{ .str = pp.expandedSlice(name_tok) },
+                    }, tok.expansionSlice());
+                }
+                // Not a macro function call, go over normal identifier, rewind
+                tokenizer.* = saved_tokenizer;
+                end_idx.* = old_end;
+                return error.MissingLParen;
+            },
+        }
+    }
+
+    // collect the arguments.
+    var parens: u32 = 0;
+    var args = MacroArguments.init(pp.gpa);
+    errdefer deinitMacroArguments(pp.gpa, &args);
+    var curArgument = std.ArrayList(Token).init(pp.gpa);
+    defer curArgument.deinit();
+    while (true) {
+        var tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
+        tok.flags.is_macro_arg = true;
+        switch (tok.id) {
+            .comma => {
+                if (parens == 0) {
+                    const owned = try curArgument.toOwnedSlice();
+                    errdefer pp.gpa.free(owned);
+                    try args.append(owned);
+                } else {
+                    const duped = try tok.dupe(pp.gpa);
+                    errdefer Token.free(duped.expansion_locs, pp.gpa);
+                    try curArgument.append(duped);
+                }
+            },
+            .l_paren => {
+                const duped = try tok.dupe(pp.gpa);
+                errdefer Token.free(duped.expansion_locs, pp.gpa);
+                try curArgument.append(duped);
+                parens += 1;
+            },
+            .r_paren => {
+                if (parens == 0) {
+                    const owned = try curArgument.toOwnedSlice();
+                    errdefer pp.gpa.free(owned);
+                    try args.append(owned);
+                    break;
+                } else {
+                    const duped = try tok.dupe(pp.gpa);
+                    errdefer Token.free(duped.expansion_locs, pp.gpa);
+                    try curArgument.append(duped);
+                    parens -= 1;
+                }
+            },
+            .eof => {
+                {
+                    const owned = try curArgument.toOwnedSlice();
+                    errdefer pp.gpa.free(owned);
+                    try args.append(owned);
+                }
+                tokenizer.* = saved_tokenizer;
+                try pp.comp.diag.add(
+                    .{ .tag = .unterminated_macro_arg_list, .loc = name_tok.loc },
+                    name_tok.expansionSlice(),
+                );
+                return error.Unterminated;
+            },
+            .nl, .whitespace => {
+                try curArgument.append(.{ .id = .macro_ws, .loc = tok.loc });
+            },
+            else => {
+                const duped = try tok.dupe(pp.gpa);
+                errdefer Token.free(duped.expansion_locs, pp.gpa);
+                try curArgument.append(duped);
+            },
+        }
+    }
+
+    return args;
+}
+
+fn removeExpandedTokens(pp: *Preprocessor, buf: *ExpandBuf, start: usize, len: usize, moving_end_idx: *usize) !void {
+    for (buf.items[start .. start + len]) |tok| Token.free(tok.expansion_locs, pp.gpa);
+    try buf.replaceRange(start, len, &.{});
+    moving_end_idx.* -|= len;
+}
+
+/// The behavior of `defined` depends on whether we are in a preprocessor
+/// expression context (#if or #elif) or not.
+/// In a non-expression context it's just an identifier. Within a preprocessor
+/// expression it is a unary operator or one-argument function.
+const EvalContext = enum {
+    expr,
+    non_expr,
+};
+
+/// Helper for safely iterating over a slice of tokens while skipping whitespace
+const TokenIterator = struct {
+    toks: []const Token,
+    i: usize,
+
+    fn init(toks: []const Token) TokenIterator {
+        return .{ .toks = toks, .i = 0 };
+    }
+
+    fn nextNoWS(self: *TokenIterator) ?Token {
+        while (self.i < self.toks.len) : (self.i += 1) {
+            const tok = self.toks[self.i];
+            if (tok.id == .whitespace or tok.id == .macro_ws) continue;
+
+            self.i += 1;
+            return tok;
+        }
+        return null;
+    }
+};
+
+fn expandMacroExhaustive(
+    pp: *Preprocessor,
+    tokenizer: *Tokenizer,
+    buf: *ExpandBuf,
+    start_idx: usize,
+    end_idx: usize,
+    extend_buf: bool,
+    eval_ctx: EvalContext,
+) MacroError!void {
+    var moving_end_idx = end_idx;
+    var advance_index: usize = 0;
+    // rescan loop
+    var do_rescan = true;
+    while (do_rescan) {
+        do_rescan = false;
+        // expansion loop
+        var idx: usize = start_idx + advance_index;
+        while (idx < moving_end_idx) {
+            const macro_tok = buf.items[idx];
+            if (macro_tok.id == .keyword_defined and eval_ctx == .expr) {
+                idx += 1;
+                var it = TokenIterator.init(buf.items[idx..moving_end_idx]);
+                if (it.nextNoWS()) |tok| {
+                    switch (tok.id) {
+                        .l_paren => {
+                            _ = it.nextNoWS(); // eat (what should be) identifier
+                            _ = it.nextNoWS(); // eat (what should be) r paren
+                        },
+                        .identifier, .extended_identifier => {},
+                        else => {},
+                    }
+                }
+                idx += it.i;
+                continue;
+            }
+            const macro_entry = pp.defines.getPtr(pp.expandedSlice(macro_tok));
+            if (macro_entry == null or !shouldExpand(buf.items[idx], macro_entry.?)) {
+                idx += 1;
+                continue;
+            }
+            if (macro_entry) |macro| macro_handler: {
+                if (macro.is_func) {
+                    var macro_scan_idx = idx;
+                    // to be saved in case this doesn't turn out to be a call
+                    const args = pp.collectMacroFuncArguments(
+                        tokenizer,
+                        buf,
+                        &macro_scan_idx,
+                        &moving_end_idx,
+                        extend_buf,
+                        macro.is_builtin,
+                    ) catch |er| switch (er) {
+                        error.MissingLParen => {
+                            if (!buf.items[idx].flags.is_macro_arg) buf.items[idx].flags.expansion_disabled = true;
+                            idx += 1;
+                            break :macro_handler;
+                        },
+                        error.Unterminated => {
+                            if (pp.comp.langopts.emulate == .gcc) idx += 1;
+                            try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx, &moving_end_idx);
+                            break :macro_handler;
+                        },
+                        else => |e| return e,
+                    };
+                    defer {
+                        for (args.items) |item| {
+                            pp.gpa.free(item);
+                        }
+                        args.deinit();
+                    }
+
+                    var args_count: u32 = @intCast(args.items.len);
+                    // if the macro has zero arguments g() args_count is still 1
+                    // an empty token list g() and a whitespace-only token list g(    )
+                    // counts as zero arguments for the purposes of argument-count validation
+                    if (args_count == 1 and macro.params.len == 0) {
+                        for (args.items[0]) |tok| {
+                            if (tok.id != .macro_ws) break;
+                        } else {
+                            args_count = 0;
+                        }
+                    }
+
+                    // Validate argument count.
+                    const extra = Diagnostics.Message.Extra{
+                        .arguments = .{ .expected = @intCast(macro.params.len), .actual = args_count },
+                    };
+                    if (macro.var_args and args_count < macro.params.len) {
+                        try pp.comp.diag.add(
+                            .{ .tag = .expected_at_least_arguments, .loc = buf.items[idx].loc, .extra = extra },
+                            buf.items[idx].expansionSlice(),
+                        );
+                        idx += 1;
+                        try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx);
+                        continue;
+                    }
+                    if (!macro.var_args and args_count != macro.params.len) {
+                        try pp.comp.diag.add(
+                            .{ .tag = .expected_arguments, .loc = buf.items[idx].loc, .extra = extra },
+                            buf.items[idx].expansionSlice(),
+                        );
+                        idx += 1;
+                        try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx);
+                        continue;
+                    }
+                    var expanded_args = MacroArguments.init(pp.gpa);
+                    defer deinitMacroArguments(pp.gpa, &expanded_args);
+                    try expanded_args.ensureTotalCapacity(args.items.len);
+                    for (args.items) |arg| {
+                        var expand_buf = ExpandBuf.init(pp.gpa);
+                        errdefer expand_buf.deinit();
+                        try expand_buf.appendSlice(arg);
+
+                        try pp.expandMacroExhaustive(tokenizer, &expand_buf, 0, expand_buf.items.len, false, eval_ctx);
+
+                        expanded_args.appendAssumeCapacity(try expand_buf.toOwnedSlice());
+                    }
+
+                    var res = try pp.expandFuncMacro(macro_tok.loc, macro, &args, &expanded_args);
+                    defer res.deinit();
+                    const tokens_added = res.items.len;
+
+                    const macro_expansion_locs = macro_tok.expansionSlice();
+                    for (res.items) |*tok| {
+                        try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
+                        try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
+                    }
+
+                    const tokens_removed = macro_scan_idx - idx + 1;
+                    for (buf.items[idx .. idx + tokens_removed]) |tok| Token.free(tok.expansion_locs, pp.gpa);
+                    try buf.replaceRange(idx, tokens_removed, res.items);
+
+                    moving_end_idx += tokens_added;
+                    // Overflow here means that we encountered an unterminated argument list
+                    // while expanding the body of this macro.
+                    moving_end_idx -|= tokens_removed;
+                    idx += tokens_added;
+                    do_rescan = true;
+                } else {
+                    const res = try pp.expandObjMacro(macro);
+                    defer res.deinit();
+
+                    const macro_expansion_locs = macro_tok.expansionSlice();
+                    var increment_idx_by = res.items.len;
+                    for (res.items, 0..) |*tok, i| {
+                        tok.flags.is_macro_arg = macro_tok.flags.is_macro_arg;
+                        try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
+                        try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
+                        if (tok.id == .keyword_defined and eval_ctx == .expr) {
+                            try pp.comp.diag.add(.{
+                                .tag = .expansion_to_defined,
+                                .loc = tok.loc,
+                            }, tok.expansionSlice());
+                        }
+
+                        if (i < increment_idx_by and (tok.id == .keyword_defined or pp.defines.contains(pp.expandedSlice(tok.*)))) {
+                            increment_idx_by = i;
+                        }
+                    }
+
+                    Token.free(buf.items[idx].expansion_locs, pp.gpa);
+                    try buf.replaceRange(idx, 1, res.items);
+                    idx += increment_idx_by;
+                    moving_end_idx = moving_end_idx + res.items.len - 1;
+                    do_rescan = true;
+                }
+            }
+            if (idx - start_idx == advance_index + 1 and !do_rescan) {
+                advance_index += 1;
+            }
+        } // end of replacement phase
+    }
+    // end of scanning phase
+
+    // trim excess buffer
+    for (buf.items[moving_end_idx..]) |item| {
+        Token.free(item.expansion_locs, pp.gpa);
+    }
+    buf.items.len = moving_end_idx;
+}
+
+/// Try to expand a macro after a possible candidate has been read from the `tokenizer`
+/// into the `raw` token passed as argument
+fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroError!void {
+    var source_tok = tokFromRaw(raw);
+    if (!raw.id.isMacroIdentifier()) {
+        source_tok.id.simplifyMacroKeyword();
+        return pp.tokens.append(pp.gpa, source_tok);
+    }
+    pp.top_expansion_buf.items.len = 0;
+    try pp.top_expansion_buf.append(source_tok);
+    pp.expansion_source_loc = source_tok.loc;
+
+    try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
+    try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.top_expansion_buf.items.len);
+    for (pp.top_expansion_buf.items) |*tok| {
+        if (tok.id == .macro_ws and !pp.preserve_whitespace) {
+            Token.free(tok.expansion_locs, pp.gpa);
+            continue;
+        }
+        tok.id.simplifyMacroKeywordExtra(true);
+        pp.tokens.appendAssumeCapacity(tok.*);
+    }
+    if (pp.preserve_whitespace) {
+        try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.add_expansion_nl);
+        while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) {
+            pp.tokens.appendAssumeCapacity(.{ .id = .nl, .loc = .{ .id = .generated } });
+        }
+    }
+}
+
+fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 {
+    if (tok.id.lexeme()) |some| {
+        if (!tok.id.allowsDigraphs(pp.comp) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some;
+    }
+    var tmp_tokenizer = Tokenizer{
+        .buf = pp.comp.getSource(tok.loc.id).buf,
+        .comp = pp.comp,
+        .index = tok.loc.byte_offset,
+        .source = .generated,
+    };
+    if (tok.id == .macro_string) {
+        while (true) : (tmp_tokenizer.index += 1) {
+            if (tmp_tokenizer.buf[tmp_tokenizer.index] == '>') break;
+        }
+        return tmp_tokenizer.buf[tok.loc.byte_offset .. tmp_tokenizer.index + 1];
+    }
+    const res = tmp_tokenizer.next();
+    return tmp_tokenizer.buf[res.start..res.end];
+}
+
+/// Get expanded token source string.
+pub fn expandedSlice(pp: *Preprocessor, tok: Token) []const u8 {
+    return pp.expandedSliceExtra(tok, .single_macro_ws);
+}
+
+/// Concat two tokens and add the result to pp.generated
+fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token) Error!void {
+    const lhs = while (lhs_toks.popOrNull()) |lhs| {
+        if (lhs.id == .macro_ws)
+            Token.free(lhs.expansion_locs, pp.gpa)
+        else
+            break lhs;
+    } else {
+        return bufCopyTokens(lhs_toks, rhs_toks, &.{});
+    };
+
+    var rhs_rest: u32 = 1;
+    const rhs = for (rhs_toks) |rhs| {
+        if (rhs.id != .macro_ws) break rhs;
+        rhs_rest += 1;
+    } else {
+        return lhs_toks.appendAssumeCapacity(lhs);
+    };
+    defer Token.free(lhs.expansion_locs, pp.gpa);
+
+    const start = pp.comp.generated_buf.items.len;
+    const end = start + pp.expandedSlice(lhs).len + pp.expandedSlice(rhs).len;
+    try pp.comp.generated_buf.ensureTotalCapacity(end + 1); // +1 for a newline
+    // We cannot use the same slices here since they might be invalidated by `ensureCapacity`
+    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(lhs));
+    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(rhs));
+    pp.comp.generated_buf.appendAssumeCapacity('\n');
+
+    // Try to tokenize the result.
+    var tmp_tokenizer = Tokenizer{
+        .buf = pp.comp.generated_buf.items,
+        .comp = pp.comp,
+        .index = @intCast(start),
+        .source = .generated,
+    };
+    const pasted_token = tmp_tokenizer.nextNoWS();
+    const next = tmp_tokenizer.nextNoWS().id;
+    if (next != .nl and next != .eof) {
+        try pp.comp.diag.add(.{
+            .tag = .pasting_formed_invalid,
+            .loc = lhs.loc,
+            .extra = .{ .str = try pp.comp.diag.arena.allocator().dupe(
+                u8,
+                pp.comp.generated_buf.items[start..end],
+            ) },
+        }, lhs.expansionSlice());
+    }
+
+    const pasted_id = if (lhs.id == .placemarker and rhs.id == .placemarker)
+        .placemarker
+    else
+        pasted_token.id;
+    try lhs_toks.append(try pp.makeGeneratedToken(start, pasted_id, lhs));
+    try bufCopyTokens(lhs_toks, rhs_toks[rhs_rest..], &.{});
+}
+
+fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: Token) !Token {
+    var pasted_token = Token{ .id = id, .loc = .{
+        .id = .generated,
+        .byte_offset = @intCast(start),
+        .line = pp.generated_line,
+    } };
+    pp.generated_line += 1;
+    try pasted_token.addExpansionLocation(pp.gpa, &.{source.loc});
+    try pasted_token.addExpansionLocation(pp.gpa, source.expansionSlice());
+    return pasted_token;
+}
+
+/// Defines a new macro and warns if it is a duplicate
+fn defineMacro(pp: *Preprocessor, name_tok: RawToken, macro: Macro) Error!void {
+    const name_str = pp.tokSlice(name_tok);
+    const gop = try pp.defines.getOrPut(name_str);
+    if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) {
+        try pp.comp.diag.add(.{
+            .tag = if (gop.value_ptr.is_builtin) .builtin_macro_redefined else .macro_redefined,
+            .loc = .{ .id = name_tok.source, .byte_offset = name_tok.start, .line = name_tok.line },
+            .extra = .{ .str = name_str },
+        }, &.{});
+        // TODO add a previous definition note
+    }
+    if (pp.verbose) {
+        pp.verboseLog(name_tok, "macro {s} defined", .{name_str});
+    }
+    gop.value_ptr.* = macro;
+}
+
+/// Handle a #define directive.
+fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
+    // Get macro name and validate it.
+    const macro_name = tokenizer.nextNoWS();
+    if (macro_name.id == .keyword_defined) {
+        try pp.err(macro_name, .defined_as_macro_name);
+        return skipToNl(tokenizer);
+    }
+    if (!macro_name.id.isMacroIdentifier()) {
+        try pp.err(macro_name, .macro_name_must_be_identifier);
+        return skipToNl(tokenizer);
+    }
+    var macro_name_token_id = macro_name.id;
+    macro_name_token_id.simplifyMacroKeyword();
+    switch (macro_name_token_id) {
+        .identifier, .extended_identifier => {},
+        else => if (macro_name_token_id.isMacroIdentifier()) {
+            try pp.err(macro_name, .keyword_macro);
+        },
+    }
+
+    // Check for function macros and empty defines.
+    var first = tokenizer.next();
+    switch (first.id) {
+        .nl, .eof => return pp.defineMacro(macro_name, .{
+            .params = undefined,
+            .tokens = undefined,
+            .var_args = false,
+            .loc = undefined,
+            .is_func = false,
+        }),
+        .whitespace => first = tokenizer.next(),
+        .l_paren => return pp.defineFn(tokenizer, macro_name, first),
+        else => try pp.err(first, .whitespace_after_macro_name),
+    }
+    if (first.id == .hash_hash) {
+        try pp.err(first, .hash_hash_at_start);
+        return skipToNl(tokenizer);
+    }
+    first.id.simplifyMacroKeyword();
+
+    pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time.
+
+    var need_ws = false;
+    // Collect the token body and validate any ## found.
+    var tok = first;
+    const end_index = while (true) {
+        tok.id.simplifyMacroKeyword();
+        switch (tok.id) {
+            .hash_hash => {
+                const next = tokenizer.nextNoWS();
+                switch (next.id) {
+                    .nl, .eof => {
+                        try pp.err(tok, .hash_hash_at_end);
+                        return;
+                    },
+                    .hash_hash => {
+                        try pp.err(next, .hash_hash_at_end);
+                        return;
+                    },
+                    else => {},
+                }
+                try pp.token_buf.append(tok);
+                try pp.token_buf.append(next);
+            },
+            .nl, .eof => break tok.start,
+            .whitespace => need_ws = true,
+            else => {
+                if (tok.id != .whitespace and need_ws) {
+                    need_ws = false;
+                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
+                }
+                try pp.token_buf.append(tok);
+            },
+        }
+        tok = tokenizer.next();
+    } else unreachable;
+
+    const list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
+    try pp.defineMacro(macro_name, .{
+        .loc = .{
+            .id = macro_name.source,
+            .byte_offset = first.start,
+            .line = end_index,
+        },
+        .tokens = list,
+        .params = undefined,
+        .is_func = false,
+        .var_args = false,
+    });
+}
+
+/// Handle a function like #define directive.
+fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_paren: RawToken) Error!void {
+    assert(macro_name.id.isMacroIdentifier());
+    var params = std.ArrayList([]const u8).init(pp.gpa);
+    defer params.deinit();
+
+    // Parse the parameter list.
+    var gnu_var_args: []const u8 = "";
+    var var_args = false;
+    const start_index = while (true) {
+        var tok = tokenizer.nextNoWS();
+        if (tok.id == .r_paren) break tok.end;
+        if (tok.id == .eof) return pp.err(tok, .unterminated_macro_param_list);
+        if (tok.id == .ellipsis) {
+            var_args = true;
+            const r_paren = tokenizer.nextNoWS();
+            if (r_paren.id != .r_paren) {
+                try pp.err(r_paren, .missing_paren_param_list);
+                try pp.err(l_paren, .to_match_paren);
+                return skipToNl(tokenizer);
+            }
+            break r_paren.end;
+        }
+        if (!tok.id.isMacroIdentifier()) {
+            try pp.err(tok, .invalid_token_param_list);
+            return skipToNl(tokenizer);
+        }
+
+        try params.append(pp.tokSlice(tok));
+
+        tok = tokenizer.nextNoWS();
+        if (tok.id == .ellipsis) {
+            try pp.err(tok, .gnu_va_macro);
+            gnu_var_args = params.pop();
+            const r_paren = tokenizer.nextNoWS();
+            if (r_paren.id != .r_paren) {
+                try pp.err(r_paren, .missing_paren_param_list);
+                try pp.err(l_paren, .to_match_paren);
+                return skipToNl(tokenizer);
+            }
+            break r_paren.end;
+        } else if (tok.id == .r_paren) {
+            break tok.end;
+        } else if (tok.id != .comma) {
+            try pp.err(tok, .expected_comma_param_list);
+            return skipToNl(tokenizer);
+        }
+    } else unreachable;
+
+    var need_ws = false;
+    // Collect the body tokens and validate # and ##'s found.
+    pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time.
+    const end_index = tok_loop: while (true) {
+        var tok = tokenizer.next();
+        switch (tok.id) {
+            .nl, .eof => break tok.start,
+            .whitespace => need_ws = pp.token_buf.items.len != 0,
+            .hash => {
+                if (tok.id != .whitespace and need_ws) {
+                    need_ws = false;
+                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
+                }
+                const param = tokenizer.nextNoWS();
+                blk: {
+                    if (var_args and param.id == .keyword_va_args) {
+                        tok.id = .stringify_va_args;
+                        try pp.token_buf.append(tok);
+                        continue :tok_loop;
+                    }
+                    if (!param.id.isMacroIdentifier()) break :blk;
+                    const s = pp.tokSlice(param);
+                    if (mem.eql(u8, s, gnu_var_args)) {
+                        tok.id = .stringify_va_args;
+                        try pp.token_buf.append(tok);
+                        continue :tok_loop;
+                    }
+                    for (params.items, 0..) |p, i| {
+                        if (mem.eql(u8, p, s)) {
+                            tok.id = .stringify_param;
+                            tok.end = @intCast(i);
+                            try pp.token_buf.append(tok);
+                            continue :tok_loop;
+                        }
+                    }
+                }
+                try pp.err(param, .hash_not_followed_param);
+                return skipToNl(tokenizer);
+            },
+            .hash_hash => {
+                need_ws = false;
+                // if ## appears at the beginning, the token buf is still empty
+                // in this case, error out
+                if (pp.token_buf.items.len == 0) {
+                    try pp.err(tok, .hash_hash_at_start);
+                    return skipToNl(tokenizer);
+                }
+                const saved_tokenizer = tokenizer.*;
+                const next = tokenizer.nextNoWS();
+                if (next.id == .nl or next.id == .eof) {
+                    try pp.err(tok, .hash_hash_at_end);
+                    return;
+                }
+                tokenizer.* = saved_tokenizer;
+                // convert the previous token to .macro_param_no_expand if it was .macro_param
+                if (pp.token_buf.items[pp.token_buf.items.len - 1].id == .macro_param) {
+                    pp.token_buf.items[pp.token_buf.items.len - 1].id = .macro_param_no_expand;
+                }
+                try pp.token_buf.append(tok);
+            },
+            else => {
+                if (tok.id != .whitespace and need_ws) {
+                    need_ws = false;
+                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
+                }
+                if (var_args and tok.id == .keyword_va_args) {
+                    // do nothing
+                } else if (tok.id.isMacroIdentifier()) {
+                    tok.id.simplifyMacroKeyword();
+                    const s = pp.tokSlice(tok);
+                    if (mem.eql(u8, gnu_var_args, s)) {
+                        tok.id = .keyword_va_args;
+                    } else for (params.items, 0..) |param, i| {
+                        if (mem.eql(u8, param, s)) {
+                            // NOTE: it doesn't matter to assign .macro_param_no_expand
+                            // here in case a ## was the previous token, because
+                            // ## processing will eat this token with the same semantics
+                            tok.id = .macro_param;
+                            tok.end = @intCast(i);
+                            break;
+                        }
+                    }
+                }
+                try pp.token_buf.append(tok);
+            },
+        }
+    } else unreachable;
+
+    const param_list = try pp.arena.allocator().dupe([]const u8, params.items);
+    const token_list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
+    try pp.defineMacro(macro_name, .{
+        .is_func = true,
+        .params = param_list,
+        .var_args = var_args or gnu_var_args.len != 0,
+        .tokens = token_list,
+        .loc = .{
+            .id = macro_name.source,
+            .byte_offset = start_index,
+            .line = end_index,
+        },
+    });
+}
+
+/// Handle an #embed directive
+fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
+    const first = tokenizer.nextNoWS();
+    const filename_tok = pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof) catch |er| switch (er) {
+        error.InvalidInclude => return,
+        else => |e| return e,
+    };
+
+    // Check for empty filename.
+    const tok_slice = pp.expandedSlice(filename_tok);
+    if (tok_slice.len < 3) {
+        try pp.err(first, .empty_filename);
+        return;
+    }
+    const filename = tok_slice[1 .. tok_slice.len - 1];
+    const include_type: Compilation.IncludeType = switch (filename_tok.id) {
+        .string_literal => .quotes,
+        .macro_string => .angle_brackets,
+        else => unreachable,
+    };
+
+    const embed_bytes = (try pp.comp.findEmbed(filename, first.source, include_type)) orelse return pp.fatal(first, "'{s}' not found", .{filename});
+    defer pp.comp.gpa.free(embed_bytes);
+
+    if (embed_bytes.len == 0) return;
+
+    try pp.tokens.ensureUnusedCapacity(pp.comp.gpa, 2 * embed_bytes.len - 1); // N bytes and N-1 commas
+
+    // TODO: We currently only support systems with CHAR_BIT == 8
+    // If the target's CHAR_BIT is not 8, we need to write out correctly-sized embed_bytes
+    // and correctly account for the target's endianness
+    const writer = pp.comp.generated_buf.writer();
+
+    {
+        const byte = embed_bytes[0];
+        const start = pp.comp.generated_buf.items.len;
+        try writer.print("{d}", .{byte});
+        pp.tokens.appendAssumeCapacity(try pp.makeGeneratedToken(start, .embed_byte, filename_tok));
+    }
+
+    for (embed_bytes[1..]) |byte| {
+        const start = pp.comp.generated_buf.items.len;
+        try writer.print(",{d}", .{byte});
+        pp.tokens.appendAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } });
+        pp.tokens.appendAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, filename_tok));
+    }
+    try pp.comp.generated_buf.append('\n');
+}
+
+// Handle a #include directive.
+fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInclude) MacroError!void {
+    const first = tokenizer.nextNoWS();
+    const new_source = findIncludeSource(pp, tokenizer, first, which) catch |er| switch (er) {
+        error.InvalidInclude => return,
+        else => |e| return e,
+    };
+
+    // Prevent stack overflow
+    pp.include_depth += 1;
+    defer pp.include_depth -= 1;
+    if (pp.include_depth > max_include_depth) {
+        try pp.comp.diag.add(.{
+            .tag = .too_many_includes,
+            .loc = .{ .id = first.source, .byte_offset = first.start, .line = first.line },
+        }, &.{});
+        return error.StopPreprocessing;
+    }
+
+    if (pp.include_guards.get(new_source.id)) |guard| {
+        if (pp.defines.contains(guard)) return;
+    }
+
+    if (pp.verbose) {
+        pp.verboseLog(first, "include file {s}", .{new_source.path});
+    }
+
+    _ = pp.preprocessExtra(new_source) catch |er| switch (er) {
+        error.StopPreprocessing => {},
+        else => |e| return e,
+    };
+}
+
+/// tokens that are part of a pragma directive can happen in 3 ways:
+///     1. directly in the text via `#pragma ...`
+///     2. Via a string literal argument to `_Pragma`
+///     3. Via a stringified macro argument which is used as an argument to `_Pragma`
+/// operator_loc: Location of `_Pragma`; null if this is from #pragma
+/// arg_locs: expansion locations of the argument to _Pragma. empty if #pragma or a raw string literal was used
+fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !Token {
+    var tok = tokFromRaw(raw);
+    if (operator_loc) |loc| {
+        try tok.addExpansionLocation(pp.gpa, &.{loc});
+    }
+    try tok.addExpansionLocation(pp.gpa, arg_locs);
+    return tok;
+}
+
+/// Handle a pragma directive
+fn pragma(pp: *Preprocessor, tokenizer: *Tokenizer, pragma_tok: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !void {
+    const name_tok = tokenizer.nextNoWS();
+    if (name_tok.id == .nl or name_tok.id == .eof) return;
+
+    const name = pp.tokSlice(name_tok);
+    try pp.tokens.append(pp.gpa, try pp.makePragmaToken(pragma_tok, operator_loc, arg_locs));
+    const pragma_start: u32 = @intCast(pp.tokens.len);
+
+    const pragma_name_tok = try pp.makePragmaToken(name_tok, operator_loc, arg_locs);
+    try pp.tokens.append(pp.gpa, pragma_name_tok);
+    while (true) {
+        const next_tok = tokenizer.next();
+        if (next_tok.id == .whitespace) continue;
+        if (next_tok.id == .eof) {
+            try pp.tokens.append(pp.gpa, .{
+                .id = .nl,
+                .loc = .{ .id = .generated },
+            });
+            break;
+        }
+        try pp.tokens.append(pp.gpa, try pp.makePragmaToken(next_tok, operator_loc, arg_locs));
+        if (next_tok.id == .nl) break;
+    }
+    if (pp.comp.getPragma(name)) |prag| unknown: {
+        return prag.preprocessorCB(pp, pragma_start) catch |er| switch (er) {
+            error.UnknownPragma => break :unknown,
+            else => |e| return e,
+        };
+    }
+    return pp.comp.diag.add(.{
+        .tag = .unknown_pragma,
+        .loc = pragma_name_tok.loc,
+    }, pragma_name_tok.expansionSlice());
+}
+
+fn findIncludeFilenameToken(
+    pp: *Preprocessor,
+    first_token: RawToken,
+    tokenizer: *Tokenizer,
+    trailing_token_behavior: enum { ignore_trailing_tokens, expect_nl_eof },
+) !Token {
+    const start = pp.tokens.len;
+    defer pp.tokens.len = start;
+    var first = first_token;
+
+    if (first.id == .angle_bracket_left) to_end: {
+        // The tokenizer does not handle <foo> include strings so do it here.
+        while (tokenizer.index < tokenizer.buf.len) : (tokenizer.index += 1) {
+            switch (tokenizer.buf[tokenizer.index]) {
+                '>' => {
+                    tokenizer.index += 1;
+                    first.end = tokenizer.index;
+                    first.id = .macro_string;
+                    break :to_end;
+                },
+                '\n' => break,
+                else => {},
+            }
+        }
+        try pp.comp.diag.add(.{
+            .tag = .header_str_closing,
+            .loc = .{ .id = first.source, .byte_offset = tokenizer.index, .line = first.line },
+        }, &.{});
+        try pp.err(first, .header_str_match);
+    }
+    // Try to expand if the argument is a macro.
+    try pp.expandMacro(tokenizer, first);
+
+    // Check that we actually got a string.
+    const filename_tok = pp.tokens.get(start);
+    switch (filename_tok.id) {
+        .string_literal, .macro_string => {},
+        else => {
+            try pp.err(first, .expected_filename);
+            try pp.expectNl(tokenizer);
+            return error.InvalidInclude;
+        },
+    }
+    switch (trailing_token_behavior) {
+        .expect_nl_eof => {
+            // Error on extra tokens.
+            const nl = tokenizer.nextNoWS();
+            if ((nl.id != .nl and nl.id != .eof) or pp.tokens.len > start + 1) {
+                skipToNl(tokenizer);
+                try pp.err(first, .extra_tokens_directive_end);
+            }
+        },
+        .ignore_trailing_tokens => {},
+    }
+    return filename_tok;
+}
+
+fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken, which: Compilation.WhichInclude) !Source {
+    const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof);
+
+    // Check for empty filename.
+    const tok_slice = pp.expandedSlice(filename_tok);
+    if (tok_slice.len < 3) {
+        try pp.err(first, .empty_filename);
+        return error.InvalidInclude;
+    }
+
+    // Find the file.
+    const filename = tok_slice[1 .. tok_slice.len - 1];
+    const include_type: Compilation.IncludeType = switch (filename_tok.id) {
+        .string_literal => .quotes,
+        .macro_string => .angle_brackets,
+        else => unreachable,
+    };
+
+    return (try pp.comp.findInclude(filename, first.source, include_type, which)) orelse
+        pp.fatal(first, "'{s}' not found", .{filename});
+}
+
+/// Pretty print tokens and try to preserve whitespace.
+pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
+    var i: u32 = 0;
+    while (true) : (i += 1) {
+        var cur: Token = pp.tokens.get(i);
+        switch (cur.id) {
+            .eof => {
+                if (pp.tokens.len > 1 and pp.tokens.items(.id)[i - 1] != .nl) try w.writeByte('\n');
+                break;
+            },
+            .nl => try w.writeAll("\n"),
+            .keyword_pragma => {
+                const pragma_name = pp.expandedSlice(pp.tokens.get(i + 1));
+                const end_idx = mem.indexOfScalarPos(Token.Id, pp.tokens.items(.id), i, .nl) orelse i + 1;
+                const pragma_len = @as(u32, @intCast(end_idx)) - i;
+
+                if (pp.comp.getPragma(pragma_name)) |prag| {
+                    if (!prag.shouldPreserveTokens(pp, i + 1)) {
+                        i += pragma_len;
+                        cur = pp.tokens.get(i);
+                        continue;
+                    }
+                }
+                try w.writeAll("#pragma");
+                i += 1;
+                while (true) : (i += 1) {
+                    cur = pp.tokens.get(i);
+                    if (cur.id == .nl) {
+                        try w.writeByte('\n');
+                        break;
+                    }
+                    try w.writeByte(' ');
+                    const slice = pp.expandedSlice(cur);
+                    try w.writeAll(slice);
+                }
+            },
+            .whitespace => {
+                var slice = pp.expandedSlice(cur);
+                while (mem.indexOfScalar(u8, slice, '\n')) |some| {
+                    try w.writeByte('\n');
+                    slice = slice[some + 1 ..];
+                }
+                for (slice) |_| try w.writeByte(' ');
+            },
+            else => {
+                const slice = pp.expandedSlice(cur);
+                try w.writeAll(slice);
+            },
+        }
+    }
+}
+
+test "Preserve pragma tokens sometimes" {
+    const allocator = std.testing.allocator;
+    const Test = struct {
+        fn runPreprocessor(source_text: []const u8) ![]const u8 {
+            var buf = std.ArrayList(u8).init(allocator);
+            defer buf.deinit();
+
+            var comp = Compilation.init(allocator);
+            defer comp.deinit();
+
+            try comp.addDefaultPragmaHandlers();
+
+            var pp = Preprocessor.init(&comp);
+            defer pp.deinit();
+
+            pp.preserve_whitespace = true;
+
+            const test_runner_macros = try comp.addSourceFromBuffer("<test_runner>", source_text);
+            const eof = try pp.preprocess(test_runner_macros);
+            try pp.tokens.append(pp.gpa, eof);
+            try pp.prettyPrintTokens(buf.writer());
+            return allocator.dupe(u8, buf.items);
+        }
+
+        fn check(source_text: []const u8, expected: []const u8) !void {
+            const output = try runPreprocessor(source_text);
+            defer allocator.free(output);
+
+            try std.testing.expectEqualStrings(expected, output);
+        }
+    };
+    const preserve_gcc_diagnostic =
+        \\#pragma GCC diagnostic error "-Wnewline-eof"
+        \\#pragma GCC warning error "-Wnewline-eof"
+        \\int x;
+        \\#pragma GCC ignored error "-Wnewline-eof"
+        \\
+    ;
+    try Test.check(preserve_gcc_diagnostic, preserve_gcc_diagnostic);
+
+    const omit_once =
+        \\#pragma once
+        \\int x;
+        \\#pragma once
+        \\
+    ;
+    try Test.check(omit_once, "int x;\n");
+
+    const omit_poison =
+        \\#pragma GCC poison foobar
+        \\
+    ;
+    try Test.check(omit_poison, "");
+}
+
+test "destringify" {
+    const allocator = std.testing.allocator;
+    const Test = struct {
+        fn testDestringify(pp: *Preprocessor, stringified: []const u8, destringified: []const u8) !void {
+            pp.char_buf.clearRetainingCapacity();
+            try pp.char_buf.ensureUnusedCapacity(stringified.len);
+            pp.destringify(stringified);
+            try std.testing.expectEqualStrings(destringified, pp.char_buf.items);
+        }
+    };
+    var comp = Compilation.init(allocator);
+    defer comp.deinit();
+    var pp = Preprocessor.init(&comp);
+    defer pp.deinit();
+
+    try Test.testDestringify(&pp, "hello\tworld\n", "hello\tworld\n");
+    try Test.testDestringify(&pp,
+        \\ \"FOO BAR BAZ\"
+    ,
+        \\ "FOO BAR BAZ"
+    );
+    try Test.testDestringify(&pp,
+        \\ \\t\\n
+        \\
+    ,
+        \\ \t\n
+        \\
+    );
+}
+
+test "Include guards" {
+    const Test = struct {
+        /// This is here so that when #elifdef / #elifndef are added we don't forget
+        /// to test that they don't accidentally break include guard detection
+        fn pairsWithIfndef(tok_id: RawToken.Id) bool {
+            return switch (tok_id) {
+                .keyword_elif,
+                .keyword_elifdef,
+                .keyword_elifndef,
+                .keyword_else,
+                => true,
+
+                .keyword_include,
+                .keyword_include_next,
+                .keyword_embed,
+                .keyword_define,
+                .keyword_defined,
+                .keyword_undef,
+                .keyword_ifdef,
+                .keyword_ifndef,
+                .keyword_error,
+                .keyword_warning,
+                .keyword_pragma,
+                .keyword_line,
+                .keyword_endif,
+                => false,
+                else => unreachable,
+            };
+        }
+
+        fn skippable(tok_id: RawToken.Id) bool {
+            return switch (tok_id) {
+                .keyword_defined, .keyword_va_args, .keyword_endif => true,
+                else => false,
+            };
+        }
+
+        fn testIncludeGuard(allocator: std.mem.Allocator, comptime template: []const u8, tok_id: RawToken.Id, expected_guards: u32) !void {
+            var comp = Compilation.init(allocator);
+            defer comp.deinit();
+            var pp = Preprocessor.init(&comp);
+            defer pp.deinit();
+
+            const path = try std.fs.path.join(allocator, &.{ ".", "bar.h" });
+            defer allocator.free(path);
+
+            _ = try comp.addSourceFromBuffer(path, "int bar = 5;\n");
+
+            var buf = std.ArrayList(u8).init(allocator);
+            defer buf.deinit();
+
+            var writer = buf.writer();
+            switch (tok_id) {
+                .keyword_include, .keyword_include_next => try writer.print(template, .{ tok_id.lexeme().?, " \"bar.h\"" }),
+                .keyword_define, .keyword_undef => try writer.print(template, .{ tok_id.lexeme().?, " BAR" }),
+                .keyword_ifndef,
+                .keyword_ifdef,
+                .keyword_elifdef,
+                .keyword_elifndef,
+                => try writer.print(template, .{ tok_id.lexeme().?, " BAR\n#endif" }),
+                else => try writer.print(template, .{ tok_id.lexeme().?, "" }),
+            }
+            const source = try comp.addSourceFromBuffer("test.h", buf.items);
+            _ = try pp.preprocess(source);
+
+            try std.testing.expectEqual(expected_guards, pp.include_guards.count());
+        }
+    };
+    const tags = std.meta.tags(RawToken.Id);
+    for (tags) |tag| {
+        if (Test.skippable(tag)) continue;
+        var copy = tag;
+        copy.simplifyMacroKeyword();
+        if (copy != tag or tag == .keyword_else) {
+            const inside_ifndef_template =
+                \\//Leading comment (should be ignored)
+                \\
+                \\#ifndef FOO
+                \\#{s}{s}
+                \\#endif
+            ;
+            const expected_guards: u32 = if (Test.pairsWithIfndef(tag)) 0 else 1;
+            try Test.testIncludeGuard(std.testing.allocator, inside_ifndef_template, tag, expected_guards);
+
+            const outside_ifndef_template =
+                \\#ifndef FOO
+                \\#endif
+                \\#{s}{s}
+            ;
+            try Test.testIncludeGuard(std.testing.allocator, outside_ifndef_template, tag, 0);
+        }
+    }
+}
author	Veikka Tuominen <git@vexu.eu>	2023-10-02 07:08:53 +0300
committer	GitHub <noreply@github.com>	2023-10-02 07:08:53 +0300
commit	fc4d53e2ea6b41440e37caf32d2fd236d0f58c93 (patch)
tree	be400bc7033d3f198978ad04c05c14f15b8c5324 /deps/aro/Preprocessor.zig
parent	0f1652dc603ad43be733cfdd721cedf38d9e45d9 (diff)
parent	5792570197f44b2c7599fb756f5c1e9d59bd0a9a (diff)
download	zig-fc4d53e2ea6b41440e37caf32d2fd236d0f58c93.tar.gz zig-fc4d53e2ea6b41440e37caf32d2fd236d0f58c93.zip