diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2024-10-23 22:56:04 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-23 22:56:04 -0700 |
| commit | c563ba6b15b65ecdc1cb538c9437e11dfb330453 (patch) | |
| tree | 99dd968efc3daea52a1d3628b7d8cedba53e84b7 /src/link/LdScript.zig | |
| parent | 33d07f4b6efe461ee3fbfa32cb18f60aac8c2827 (diff) | |
| parent | 4bdc2d38717b5655acd862a5762e069419b158c7 (diff) | |
| download | zig-c563ba6b15b65ecdc1cb538c9437e11dfb330453.tar.gz zig-c563ba6b15b65ecdc1cb538c9437e11dfb330453.zip | |
Merge pull request #21700 from ziglang/cli-lib-dirs
move linker input file parsing to the frontend
Diffstat (limited to 'src/link/LdScript.zig')
| -rw-r--r-- | src/link/LdScript.zig | 449 |
1 files changed, 449 insertions, 0 deletions
diff --git a/src/link/LdScript.zig b/src/link/LdScript.zig new file mode 100644 index 0000000000..ed5dbc4681 --- /dev/null +++ b/src/link/LdScript.zig @@ -0,0 +1,449 @@ +path: Path, +cpu_arch: ?std.Target.Cpu.Arch, +args: []const Arg, + +pub const Arg = struct { + needed: bool = false, + path: []const u8, +}; + +pub fn deinit(ls: *LdScript, gpa: Allocator) void { + gpa.free(ls.args); + ls.* = undefined; +} + +pub const Error = error{ + LinkFailure, + UnknownCpuArch, + OutOfMemory, +}; + +pub fn parse( + gpa: Allocator, + diags: *Diags, + /// For error reporting. + path: Path, + data: []const u8, +) Error!LdScript { + var tokenizer = Tokenizer{ .source = data }; + var tokens: std.ArrayListUnmanaged(Token) = .empty; + defer tokens.deinit(gpa); + var line_col: std.ArrayListUnmanaged(LineColumn) = .empty; + defer line_col.deinit(gpa); + + var line: usize = 0; + var prev_line_last_col: usize = 0; + + while (true) { + const tok = tokenizer.next(); + try tokens.append(gpa, tok); + const column = tok.start - prev_line_last_col; + try line_col.append(gpa, .{ .line = line, .column = column }); + switch (tok.id) { + .invalid => { + return diags.failParse(path, "invalid token in LD script: '{s}' ({d}:{d})", .{ + std.fmt.fmtSliceEscapeLower(tok.get(data)), line, column, + }); + }, + .new_line => { + line += 1; + prev_line_last_col = tok.end; + }, + .eof => break, + else => {}, + } + } + + var it: TokenIterator = .{ .tokens = tokens.items }; + var parser: Parser = .{ + .gpa = gpa, + .source = data, + .it = &it, + .args = .empty, + .cpu_arch = null, + }; + defer parser.args.deinit(gpa); + + parser.start() catch |err| switch (err) { + error.UnexpectedToken => { + const last_token_id = parser.it.pos - 1; + const last_token = parser.it.get(last_token_id); + const lcol = line_col.items[last_token_id]; + return diags.failParse(path, "unexpected token in LD script: {s}: '{s}' ({d}:{d})", .{ + @tagName(last_token.id), + last_token.get(data), + lcol.line, + lcol.column, + }); + }, + else => |e| return e, + }; + return .{ + .path = path, + .cpu_arch = parser.cpu_arch, + .args = try parser.args.toOwnedSlice(gpa), + }; +} + +const LineColumn = struct { + line: usize, + column: usize, +}; + +const Command = enum { + output_format, + input, + group, + as_needed, + + fn fromString(s: []const u8) ?Command { + inline for (@typeInfo(Command).@"enum".fields) |field| { + const upper_name = n: { + comptime var buf: [field.name.len]u8 = undefined; + inline for (field.name, 0..) |c, i| { + buf[i] = comptime std.ascii.toUpper(c); + } + break :n buf; + }; + if (std.mem.eql(u8, &upper_name, s)) return @field(Command, field.name); + } + return null; + } +}; + +const Parser = struct { + gpa: Allocator, + source: []const u8, + it: *TokenIterator, + + cpu_arch: ?std.Target.Cpu.Arch, + args: std.ArrayListUnmanaged(Arg), + + fn start(parser: *Parser) !void { + while (true) { + parser.skipAny(&.{ .comment, .new_line }); + + if (parser.maybe(.command)) |cmd_id| { + const cmd = parser.getCommand(cmd_id); + switch (cmd) { + .output_format => parser.cpu_arch = try parser.outputFormat(), + // TODO we should verify that group only contains libraries + .input, .group => try parser.group(), + else => return error.UnexpectedToken, + } + } else break; + } + + if (parser.it.next()) |tok| switch (tok.id) { + .eof => {}, + else => return error.UnexpectedToken, + }; + } + + fn outputFormat(p: *Parser) !std.Target.Cpu.Arch { + const value = value: { + if (p.skip(&.{.lparen})) { + const value_id = try p.require(.literal); + const value = p.it.get(value_id); + _ = try p.require(.rparen); + break :value value.get(p.source); + } else if (p.skip(&.{ .new_line, .lbrace })) { + const value_id = try p.require(.literal); + const value = p.it.get(value_id); + _ = p.skip(&.{.new_line}); + _ = try p.require(.rbrace); + break :value value.get(p.source); + } else return error.UnexpectedToken; + }; + if (std.mem.eql(u8, value, "elf64-x86-64")) return .x86_64; + if (std.mem.eql(u8, value, "elf64-littleaarch64")) return .aarch64; + return error.UnknownCpuArch; + } + + fn group(p: *Parser) !void { + const gpa = p.gpa; + if (!p.skip(&.{.lparen})) return error.UnexpectedToken; + + while (true) { + if (p.maybe(.literal)) |tok_id| { + const tok = p.it.get(tok_id); + const path = tok.get(p.source); + try p.args.append(gpa, .{ .path = path, .needed = true }); + } else if (p.maybe(.command)) |cmd_id| { + const cmd = p.getCommand(cmd_id); + switch (cmd) { + .as_needed => try p.asNeeded(), + else => return error.UnexpectedToken, + } + } else break; + } + + _ = try p.require(.rparen); + } + + fn asNeeded(p: *Parser) !void { + const gpa = p.gpa; + if (!p.skip(&.{.lparen})) return error.UnexpectedToken; + + while (p.maybe(.literal)) |tok_id| { + const tok = p.it.get(tok_id); + const path = tok.get(p.source); + try p.args.append(gpa, .{ .path = path, .needed = false }); + } + + _ = try p.require(.rparen); + } + + fn skip(p: *Parser, comptime ids: []const Token.Id) bool { + const pos = p.it.pos; + inline for (ids) |id| { + const tok = p.it.next() orelse return false; + if (tok.id != id) { + p.it.seekTo(pos); + return false; + } + } + return true; + } + + fn skipAny(p: *Parser, comptime ids: []const Token.Id) void { + outer: while (p.it.next()) |tok| { + inline for (ids) |id| { + if (id == tok.id) continue :outer; + } + break p.it.seekBy(-1); + } + } + + fn maybe(p: *Parser, comptime id: Token.Id) ?Token.Index { + const pos = p.it.pos; + const tok = p.it.next() orelse return null; + if (tok.id == id) return pos; + p.it.seekBy(-1); + return null; + } + + fn require(p: *Parser, comptime id: Token.Id) !Token.Index { + return p.maybe(id) orelse return error.UnexpectedToken; + } + + fn getCommand(p: *Parser, index: Token.Index) Command { + const tok = p.it.get(index); + assert(tok.id == .command); + return Command.fromString(tok.get(p.source)).?; + } +}; + +const Token = struct { + id: Id, + start: usize, + end: usize, + + const Id = enum { + eof, + invalid, + + new_line, + lparen, // ( + rparen, // ) + lbrace, // { + rbrace, // } + + comment, // /* */ + + command, // literal with special meaning, see Command + literal, + }; + + const Index = usize; + + fn get(tok: Token, source: []const u8) []const u8 { + return source[tok.start..tok.end]; + } +}; + +const Tokenizer = struct { + source: []const u8, + index: usize = 0, + + fn matchesPattern(comptime pattern: []const u8, slice: []const u8) bool { + comptime var count: usize = 0; + inline while (count < pattern.len) : (count += 1) { + if (count >= slice.len) return false; + const c = slice[count]; + if (pattern[count] != c) return false; + } + return true; + } + + fn matches(tok: Tokenizer, comptime pattern: []const u8) bool { + return matchesPattern(pattern, tok.source[tok.index..]); + } + + fn isCommand(tok: Tokenizer, start: usize, end: usize) bool { + return if (Command.fromString(tok.source[start..end]) == null) false else true; + } + + fn next(tok: *Tokenizer) Token { + var result = Token{ + .id = .eof, + .start = tok.index, + .end = undefined, + }; + + var state: enum { + start, + comment, + literal, + } = .start; + + while (tok.index < tok.source.len) : (tok.index += 1) { + const c = tok.source[tok.index]; + switch (state) { + .start => switch (c) { + ' ', '\t' => result.start += 1, + + '\n' => { + result.id = .new_line; + tok.index += 1; + break; + }, + + '\r' => { + if (tok.matches("\r\n")) { + result.id = .new_line; + tok.index += "\r\n".len; + } else { + result.id = .invalid; + tok.index += 1; + } + break; + }, + + '/' => if (tok.matches("/*")) { + state = .comment; + tok.index += "/*".len; + } else { + state = .literal; + }, + + '(' => { + result.id = .lparen; + tok.index += 1; + break; + }, + + ')' => { + result.id = .rparen; + tok.index += 1; + break; + }, + + '{' => { + result.id = .lbrace; + tok.index += 1; + break; + }, + + '}' => { + result.id = .rbrace; + tok.index += 1; + break; + }, + + else => state = .literal, + }, + + .comment => switch (c) { + '*' => if (tok.matches("*/")) { + result.id = .comment; + tok.index += "*/".len; + break; + }, + else => {}, + }, + + .literal => switch (c) { + ' ', '(', '\n' => { + if (tok.isCommand(result.start, tok.index)) { + result.id = .command; + } else { + result.id = .literal; + } + break; + }, + + ')' => { + result.id = .literal; + break; + }, + + '\r' => { + if (tok.matches("\r\n")) { + if (tok.isCommand(result.start, tok.index)) { + result.id = .command; + } else { + result.id = .literal; + } + } else { + result.id = .invalid; + tok.index += 1; + } + break; + }, + + else => {}, + }, + } + } + + result.end = tok.index; + return result; + } +}; + +const TokenIterator = struct { + tokens: []const Token, + pos: Token.Index = 0, + + fn next(it: *TokenIterator) ?Token { + const token = it.peek() orelse return null; + it.pos += 1; + return token; + } + + fn peek(it: TokenIterator) ?Token { + if (it.pos >= it.tokens.len) return null; + return it.tokens[it.pos]; + } + + fn reset(it: *TokenIterator) void { + it.pos = 0; + } + + fn seekTo(it: *TokenIterator, pos: Token.Index) void { + it.pos = pos; + } + + fn seekBy(it: *TokenIterator, offset: isize) void { + const new_pos = @as(isize, @bitCast(it.pos)) + offset; + if (new_pos < 0) { + it.pos = 0; + } else { + it.pos = @as(usize, @intCast(new_pos)); + } + } + + fn get(it: *TokenIterator, pos: Token.Index) Token { + assert(pos < it.tokens.len); + return it.tokens[pos]; + } +}; + +const LdScript = @This(); +const Diags = @import("../link.zig").Diags; + +const std = @import("std"); +const assert = std.debug.assert; +const Path = std.Build.Cache.Path; +const Allocator = std.mem.Allocator; |
