Merge pull request #21700 from ziglang/cli-lib-dirs

move linker input file parsing to the frontend
author: Andrew Kelley <andrew@ziglang.org> 2024-10-23 22:56:04 -0700
committer: GitHub <noreply@github.com> 2024-10-23 22:56:04 -0700
commit: c563ba6b15b65ecdc1cb538c9437e11dfb330453 (patch)
tree: 99dd968efc3daea52a1d3628b7d8cedba53e84b7 /src/link/LdScript.zig
parent: 33d07f4b6efe461ee3fbfa32cb18f60aac8c2827 (diff)
parent: 4bdc2d38717b5655acd862a5762e069419b158c7 (diff)
download: zig-c563ba6b15b65ecdc1cb538c9437e11dfb330453.tar.gz
zig-c563ba6b15b65ecdc1cb538c9437e11dfb330453.zip
1 files changed, 449 insertions, 0 deletions
diff --git a/src/link/LdScript.zig b/src/link/LdScript.zig
new file mode 100644
index 0000000000..ed5dbc4681
--- /dev/null
+++ b/src/link/LdScript.zig
@@ -0,0 +1,449 @@
+path: Path,
+cpu_arch: ?std.Target.Cpu.Arch,
+args: []const Arg,
+
+pub const Arg = struct {
+    needed: bool = false,
+    path: []const u8,
+};
+
+pub fn deinit(ls: *LdScript, gpa: Allocator) void {
+    gpa.free(ls.args);
+    ls.* = undefined;
+}
+
+pub const Error = error{
+    LinkFailure,
+    UnknownCpuArch,
+    OutOfMemory,
+};
+
+pub fn parse(
+    gpa: Allocator,
+    diags: *Diags,
+    /// For error reporting.
+    path: Path,
+    data: []const u8,
+) Error!LdScript {
+    var tokenizer = Tokenizer{ .source = data };
+    var tokens: std.ArrayListUnmanaged(Token) = .empty;
+    defer tokens.deinit(gpa);
+    var line_col: std.ArrayListUnmanaged(LineColumn) = .empty;
+    defer line_col.deinit(gpa);
+
+    var line: usize = 0;
+    var prev_line_last_col: usize = 0;
+
+    while (true) {
+        const tok = tokenizer.next();
+        try tokens.append(gpa, tok);
+        const column = tok.start - prev_line_last_col;
+        try line_col.append(gpa, .{ .line = line, .column = column });
+        switch (tok.id) {
+            .invalid => {
+                return diags.failParse(path, "invalid token in LD script: '{s}' ({d}:{d})", .{
+                    std.fmt.fmtSliceEscapeLower(tok.get(data)), line, column,
+                });
+            },
+            .new_line => {
+                line += 1;
+                prev_line_last_col = tok.end;
+            },
+            .eof => break,
+            else => {},
+        }
+    }
+
+    var it: TokenIterator = .{ .tokens = tokens.items };
+    var parser: Parser = .{
+        .gpa = gpa,
+        .source = data,
+        .it = &it,
+        .args = .empty,
+        .cpu_arch = null,
+    };
+    defer parser.args.deinit(gpa);
+
+    parser.start() catch |err| switch (err) {
+        error.UnexpectedToken => {
+            const last_token_id = parser.it.pos - 1;
+            const last_token = parser.it.get(last_token_id);
+            const lcol = line_col.items[last_token_id];
+            return diags.failParse(path, "unexpected token in LD script: {s}: '{s}' ({d}:{d})", .{
+                @tagName(last_token.id),
+                last_token.get(data),
+                lcol.line,
+                lcol.column,
+            });
+        },
+        else => |e| return e,
+    };
+    return .{
+        .path = path,
+        .cpu_arch = parser.cpu_arch,
+        .args = try parser.args.toOwnedSlice(gpa),
+    };
+}
+
+const LineColumn = struct {
+    line: usize,
+    column: usize,
+};
+
+const Command = enum {
+    output_format,
+    input,
+    group,
+    as_needed,
+
+    fn fromString(s: []const u8) ?Command {
+        inline for (@typeInfo(Command).@"enum".fields) |field| {
+            const upper_name = n: {
+                comptime var buf: [field.name.len]u8 = undefined;
+                inline for (field.name, 0..) |c, i| {
+                    buf[i] = comptime std.ascii.toUpper(c);
+                }
+                break :n buf;
+            };
+            if (std.mem.eql(u8, &upper_name, s)) return @field(Command, field.name);
+        }
+        return null;
+    }
+};
+
+const Parser = struct {
+    gpa: Allocator,
+    source: []const u8,
+    it: *TokenIterator,
+
+    cpu_arch: ?std.Target.Cpu.Arch,
+    args: std.ArrayListUnmanaged(Arg),
+
+    fn start(parser: *Parser) !void {
+        while (true) {
+            parser.skipAny(&.{ .comment, .new_line });
+
+            if (parser.maybe(.command)) |cmd_id| {
+                const cmd = parser.getCommand(cmd_id);
+                switch (cmd) {
+                    .output_format => parser.cpu_arch = try parser.outputFormat(),
+                    // TODO we should verify that group only contains libraries
+                    .input, .group => try parser.group(),
+                    else => return error.UnexpectedToken,
+                }
+            } else break;
+        }
+
+        if (parser.it.next()) |tok| switch (tok.id) {
+            .eof => {},
+            else => return error.UnexpectedToken,
+        };
+    }
+
+    fn outputFormat(p: *Parser) !std.Target.Cpu.Arch {
+        const value = value: {
+            if (p.skip(&.{.lparen})) {
+                const value_id = try p.require(.literal);
+                const value = p.it.get(value_id);
+                _ = try p.require(.rparen);
+                break :value value.get(p.source);
+            } else if (p.skip(&.{ .new_line, .lbrace })) {
+                const value_id = try p.require(.literal);
+                const value = p.it.get(value_id);
+                _ = p.skip(&.{.new_line});
+                _ = try p.require(.rbrace);
+                break :value value.get(p.source);
+            } else return error.UnexpectedToken;
+        };
+        if (std.mem.eql(u8, value, "elf64-x86-64")) return .x86_64;
+        if (std.mem.eql(u8, value, "elf64-littleaarch64")) return .aarch64;
+        return error.UnknownCpuArch;
+    }
+
+    fn group(p: *Parser) !void {
+        const gpa = p.gpa;
+        if (!p.skip(&.{.lparen})) return error.UnexpectedToken;
+
+        while (true) {
+            if (p.maybe(.literal)) |tok_id| {
+                const tok = p.it.get(tok_id);
+                const path = tok.get(p.source);
+                try p.args.append(gpa, .{ .path = path, .needed = true });
+            } else if (p.maybe(.command)) |cmd_id| {
+                const cmd = p.getCommand(cmd_id);
+                switch (cmd) {
+                    .as_needed => try p.asNeeded(),
+                    else => return error.UnexpectedToken,
+                }
+            } else break;
+        }
+
+        _ = try p.require(.rparen);
+    }
+
+    fn asNeeded(p: *Parser) !void {
+        const gpa = p.gpa;
+        if (!p.skip(&.{.lparen})) return error.UnexpectedToken;
+
+        while (p.maybe(.literal)) |tok_id| {
+            const tok = p.it.get(tok_id);
+            const path = tok.get(p.source);
+            try p.args.append(gpa, .{ .path = path, .needed = false });
+        }
+
+        _ = try p.require(.rparen);
+    }
+
+    fn skip(p: *Parser, comptime ids: []const Token.Id) bool {
+        const pos = p.it.pos;
+        inline for (ids) |id| {
+            const tok = p.it.next() orelse return false;
+            if (tok.id != id) {
+                p.it.seekTo(pos);
+                return false;
+            }
+        }
+        return true;
+    }
+
+    fn skipAny(p: *Parser, comptime ids: []const Token.Id) void {
+        outer: while (p.it.next()) |tok| {
+            inline for (ids) |id| {
+                if (id == tok.id) continue :outer;
+            }
+            break p.it.seekBy(-1);
+        }
+    }
+
+    fn maybe(p: *Parser, comptime id: Token.Id) ?Token.Index {
+        const pos = p.it.pos;
+        const tok = p.it.next() orelse return null;
+        if (tok.id == id) return pos;
+        p.it.seekBy(-1);
+        return null;
+    }
+
+    fn require(p: *Parser, comptime id: Token.Id) !Token.Index {
+        return p.maybe(id) orelse return error.UnexpectedToken;
+    }
+
+    fn getCommand(p: *Parser, index: Token.Index) Command {
+        const tok = p.it.get(index);
+        assert(tok.id == .command);
+        return Command.fromString(tok.get(p.source)).?;
+    }
+};
+
+const Token = struct {
+    id: Id,
+    start: usize,
+    end: usize,
+
+    const Id = enum {
+        eof,
+        invalid,
+
+        new_line,
+        lparen, // (
+        rparen, // )
+        lbrace, // {
+        rbrace, // }
+
+        comment, // /* */
+
+        command, // literal with special meaning, see Command
+        literal,
+    };
+
+    const Index = usize;
+
+    fn get(tok: Token, source: []const u8) []const u8 {
+        return source[tok.start..tok.end];
+    }
+};
+
+const Tokenizer = struct {
+    source: []const u8,
+    index: usize = 0,
+
+    fn matchesPattern(comptime pattern: []const u8, slice: []const u8) bool {
+        comptime var count: usize = 0;
+        inline while (count < pattern.len) : (count += 1) {
+            if (count >= slice.len) return false;
+            const c = slice[count];
+            if (pattern[count] != c) return false;
+        }
+        return true;
+    }
+
+    fn matches(tok: Tokenizer, comptime pattern: []const u8) bool {
+        return matchesPattern(pattern, tok.source[tok.index..]);
+    }
+
+    fn isCommand(tok: Tokenizer, start: usize, end: usize) bool {
+        return if (Command.fromString(tok.source[start..end]) == null) false else true;
+    }
+
+    fn next(tok: *Tokenizer) Token {
+        var result = Token{
+            .id = .eof,
+            .start = tok.index,
+            .end = undefined,
+        };
+
+        var state: enum {
+            start,
+            comment,
+            literal,
+        } = .start;
+
+        while (tok.index < tok.source.len) : (tok.index += 1) {
+            const c = tok.source[tok.index];
+            switch (state) {
+                .start => switch (c) {
+                    ' ', '\t' => result.start += 1,
+
+                    '\n' => {
+                        result.id = .new_line;
+                        tok.index += 1;
+                        break;
+                    },
+
+                    '\r' => {
+                        if (tok.matches("\r\n")) {
+                            result.id = .new_line;
+                            tok.index += "\r\n".len;
+                        } else {
+                            result.id = .invalid;
+                            tok.index += 1;
+                        }
+                        break;
+                    },
+
+                    '/' => if (tok.matches("/*")) {
+                        state = .comment;
+                        tok.index += "/*".len;
+                    } else {
+                        state = .literal;
+                    },
+
+                    '(' => {
+                        result.id = .lparen;
+                        tok.index += 1;
+                        break;
+                    },
+
+                    ')' => {
+                        result.id = .rparen;
+                        tok.index += 1;
+                        break;
+                    },
+
+                    '{' => {
+                        result.id = .lbrace;
+                        tok.index += 1;
+                        break;
+                    },
+
+                    '}' => {
+                        result.id = .rbrace;
+                        tok.index += 1;
+                        break;
+                    },
+
+                    else => state = .literal,
+                },
+
+                .comment => switch (c) {
+                    '*' => if (tok.matches("*/")) {
+                        result.id = .comment;
+                        tok.index += "*/".len;
+                        break;
+                    },
+                    else => {},
+                },
+
+                .literal => switch (c) {
+                    ' ', '(', '\n' => {
+                        if (tok.isCommand(result.start, tok.index)) {
+                            result.id = .command;
+                        } else {
+                            result.id = .literal;
+                        }
+                        break;
+                    },
+
+                    ')' => {
+                        result.id = .literal;
+                        break;
+                    },
+
+                    '\r' => {
+                        if (tok.matches("\r\n")) {
+                            if (tok.isCommand(result.start, tok.index)) {
+                                result.id = .command;
+                            } else {
+                                result.id = .literal;
+                            }
+                        } else {
+                            result.id = .invalid;
+                            tok.index += 1;
+                        }
+                        break;
+                    },
+
+                    else => {},
+                },
+            }
+        }
+
+        result.end = tok.index;
+        return result;
+    }
+};
+
+const TokenIterator = struct {
+    tokens: []const Token,
+    pos: Token.Index = 0,
+
+    fn next(it: *TokenIterator) ?Token {
+        const token = it.peek() orelse return null;
+        it.pos += 1;
+        return token;
+    }
+
+    fn peek(it: TokenIterator) ?Token {
+        if (it.pos >= it.tokens.len) return null;
+        return it.tokens[it.pos];
+    }
+
+    fn reset(it: *TokenIterator) void {
+        it.pos = 0;
+    }
+
+    fn seekTo(it: *TokenIterator, pos: Token.Index) void {
+        it.pos = pos;
+    }
+
+    fn seekBy(it: *TokenIterator, offset: isize) void {
+        const new_pos = @as(isize, @bitCast(it.pos)) + offset;
+        if (new_pos < 0) {
+            it.pos = 0;
+        } else {
+            it.pos = @as(usize, @intCast(new_pos));
+        }
+    }
+
+    fn get(it: *TokenIterator, pos: Token.Index) Token {
+        assert(pos < it.tokens.len);
+        return it.tokens[pos];
+    }
+};
+
+const LdScript = @This();
+const Diags = @import("../link.zig").Diags;
+
+const std = @import("std");
+const assert = std.debug.assert;
+const Path = std.Build.Cache.Path;
+const Allocator = std.mem.Allocator;
author	Andrew Kelley <andrew@ziglang.org>	2024-10-23 22:56:04 -0700
committer	GitHub <noreply@github.com>	2024-10-23 22:56:04 -0700
commit	c563ba6b15b65ecdc1cb538c9437e11dfb330453 (patch)
tree	99dd968efc3daea52a1d3628b7d8cedba53e84b7 /src/link/LdScript.zig
parent	33d07f4b6efe461ee3fbfa32cb18f60aac8c2827 (diff)
parent	4bdc2d38717b5655acd862a5762e069419b158c7 (diff)
download	zig-c563ba6b15b65ecdc1cb538c9437e11dfb330453.tar.gz zig-c563ba6b15b65ecdc1cb538c9437e11dfb330453.zip