From 1e785409bb77a4ae1c8a496acae64d94c34431b9 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 14 Oct 2024 15:30:30 -0700 Subject: move link.Elf.LdScript to link.LdScript --- src/link/LdScript.zig | 450 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 450 insertions(+) create mode 100644 src/link/LdScript.zig (limited to 'src/link/LdScript.zig') diff --git a/src/link/LdScript.zig b/src/link/LdScript.zig new file mode 100644 index 0000000000..9c10656f3c --- /dev/null +++ b/src/link/LdScript.zig @@ -0,0 +1,450 @@ +path: Path, +cpu_arch: ?std.Target.Cpu.Arch, +args: []const Arg, + +pub const Arg = struct { + needed: bool = false, + path: []const u8, +}; + +pub fn deinit(ls: *LdScript, gpa: Allocator) void { + gpa.free(ls.args); + ls.* = undefined; +} + +pub const Error = error{ + LinkFailure, + UnexpectedToken, + UnknownCpuArch, + OutOfMemory, +}; + +pub fn parse( + gpa: Allocator, + diags: *Diags, + /// For error reporting. + path: Path, + data: []const u8, +) Error!LdScript { + var tokenizer = Tokenizer{ .source = data }; + var tokens: std.ArrayListUnmanaged(Token) = .empty; + defer tokens.deinit(gpa); + var line_col: std.ArrayListUnmanaged(LineColumn) = .empty; + defer line_col.deinit(gpa); + + var line: usize = 0; + var prev_line_last_col: usize = 0; + + while (true) { + const tok = tokenizer.next(); + try tokens.append(gpa, tok); + const column = tok.start - prev_line_last_col; + try line_col.append(gpa, .{ .line = line, .column = column }); + switch (tok.id) { + .invalid => { + return diags.failParse(path, "invalid token in LD script: '{s}' ({d}:{d})", .{ + std.fmt.fmtSliceEscapeLower(tok.get(data)), line, column, + }); + }, + .new_line => { + line += 1; + prev_line_last_col = tok.end; + }, + .eof => break, + else => {}, + } + } + + var it: TokenIterator = .{ .tokens = tokens.items }; + var parser: Parser = .{ + .gpa = gpa, + .source = data, + .it = &it, + .args = .empty, + .cpu_arch = null, + }; + defer parser.args.deinit(gpa); + + parser.start() catch |err| switch (err) { + error.UnexpectedToken => { + const last_token_id = parser.it.pos - 1; + const last_token = parser.it.get(last_token_id); + const lcol = line_col.items[last_token_id]; + return diags.failParse(path, "unexpected token in LD script: {s}: '{s}' ({d}:{d})", .{ + @tagName(last_token.id), + last_token.get(data), + lcol.line, + lcol.column, + }); + }, + else => |e| return e, + }; + return .{ + .path = path, + .cpu_arch = parser.cpu_arch, + .args = try parser.args.toOwnedSlice(gpa), + }; +} + +const LineColumn = struct { + line: usize, + column: usize, +}; + +const Command = enum { + output_format, + input, + group, + as_needed, + + fn fromString(s: []const u8) ?Command { + inline for (@typeInfo(Command).@"enum".fields) |field| { + const upper_name = n: { + comptime var buf: [field.name.len]u8 = undefined; + inline for (field.name, 0..) |c, i| { + buf[i] = comptime std.ascii.toUpper(c); + } + break :n buf; + }; + if (std.mem.eql(u8, &upper_name, s)) return @field(Command, field.name); + } + return null; + } +}; + +const Parser = struct { + gpa: Allocator, + source: []const u8, + it: *TokenIterator, + + cpu_arch: ?std.Target.Cpu.Arch, + args: std.ArrayListUnmanaged(Arg), + + fn start(parser: *Parser) !void { + while (true) { + parser.skipAny(&.{ .comment, .new_line }); + + if (parser.maybe(.command)) |cmd_id| { + const cmd = parser.getCommand(cmd_id); + switch (cmd) { + .output_format => parser.cpu_arch = try parser.outputFormat(), + // TODO we should verify that group only contains libraries + .input, .group => try parser.group(), + else => return error.UnexpectedToken, + } + } else break; + } + + if (parser.it.next()) |tok| switch (tok.id) { + .eof => {}, + else => return error.UnexpectedToken, + }; + } + + fn outputFormat(p: *Parser) !std.Target.Cpu.Arch { + const value = value: { + if (p.skip(&.{.lparen})) { + const value_id = try p.require(.literal); + const value = p.it.get(value_id); + _ = try p.require(.rparen); + break :value value.get(p.source); + } else if (p.skip(&.{ .new_line, .lbrace })) { + const value_id = try p.require(.literal); + const value = p.it.get(value_id); + _ = p.skip(&.{.new_line}); + _ = try p.require(.rbrace); + break :value value.get(p.source); + } else return error.UnexpectedToken; + }; + if (std.mem.eql(u8, value, "elf64-x86-64")) return .x86_64; + if (std.mem.eql(u8, value, "elf64-littleaarch64")) return .aarch64; + return error.UnknownCpuArch; + } + + fn group(p: *Parser) !void { + const gpa = p.gpa; + if (!p.skip(&.{.lparen})) return error.UnexpectedToken; + + while (true) { + if (p.maybe(.literal)) |tok_id| { + const tok = p.it.get(tok_id); + const path = tok.get(p.source); + try p.args.append(gpa, .{ .path = path, .needed = true }); + } else if (p.maybe(.command)) |cmd_id| { + const cmd = p.getCommand(cmd_id); + switch (cmd) { + .as_needed => try p.asNeeded(), + else => return error.UnexpectedToken, + } + } else break; + } + + _ = try p.require(.rparen); + } + + fn asNeeded(p: *Parser) !void { + const gpa = p.gpa; + if (!p.skip(&.{.lparen})) return error.UnexpectedToken; + + while (p.maybe(.literal)) |tok_id| { + const tok = p.it.get(tok_id); + const path = tok.get(p.source); + try p.args.append(gpa, .{ .path = path, .needed = false }); + } + + _ = try p.require(.rparen); + } + + fn skip(p: *Parser, comptime ids: []const Token.Id) bool { + const pos = p.it.pos; + inline for (ids) |id| { + const tok = p.it.next() orelse return false; + if (tok.id != id) { + p.it.seekTo(pos); + return false; + } + } + return true; + } + + fn skipAny(p: *Parser, comptime ids: []const Token.Id) void { + outer: while (p.it.next()) |tok| { + inline for (ids) |id| { + if (id == tok.id) continue :outer; + } + break p.it.seekBy(-1); + } + } + + fn maybe(p: *Parser, comptime id: Token.Id) ?Token.Index { + const pos = p.it.pos; + const tok = p.it.next() orelse return null; + if (tok.id == id) return pos; + p.it.seekBy(-1); + return null; + } + + fn require(p: *Parser, comptime id: Token.Id) !Token.Index { + return p.maybe(id) orelse return error.UnexpectedToken; + } + + fn getCommand(p: *Parser, index: Token.Index) Command { + const tok = p.it.get(index); + assert(tok.id == .command); + return Command.fromString(tok.get(p.source)).?; + } +}; + +const Token = struct { + id: Id, + start: usize, + end: usize, + + const Id = enum { + eof, + invalid, + + new_line, + lparen, // ( + rparen, // ) + lbrace, // { + rbrace, // } + + comment, // /* */ + + command, // literal with special meaning, see Command + literal, + }; + + const Index = usize; + + fn get(tok: Token, source: []const u8) []const u8 { + return source[tok.start..tok.end]; + } +}; + +const Tokenizer = struct { + source: []const u8, + index: usize = 0, + + fn matchesPattern(comptime pattern: []const u8, slice: []const u8) bool { + comptime var count: usize = 0; + inline while (count < pattern.len) : (count += 1) { + if (count >= slice.len) return false; + const c = slice[count]; + if (pattern[count] != c) return false; + } + return true; + } + + fn matches(tok: Tokenizer, comptime pattern: []const u8) bool { + return matchesPattern(pattern, tok.source[tok.index..]); + } + + fn isCommand(tok: Tokenizer, start: usize, end: usize) bool { + return if (Command.fromString(tok.source[start..end]) == null) false else true; + } + + fn next(tok: *Tokenizer) Token { + var result = Token{ + .id = .eof, + .start = tok.index, + .end = undefined, + }; + + var state: enum { + start, + comment, + literal, + } = .start; + + while (tok.index < tok.source.len) : (tok.index += 1) { + const c = tok.source[tok.index]; + switch (state) { + .start => switch (c) { + ' ', '\t' => result.start += 1, + + '\n' => { + result.id = .new_line; + tok.index += 1; + break; + }, + + '\r' => { + if (tok.matches("\r\n")) { + result.id = .new_line; + tok.index += "\r\n".len; + } else { + result.id = .invalid; + tok.index += 1; + } + break; + }, + + '/' => if (tok.matches("/*")) { + state = .comment; + tok.index += "/*".len; + } else { + state = .literal; + }, + + '(' => { + result.id = .lparen; + tok.index += 1; + break; + }, + + ')' => { + result.id = .rparen; + tok.index += 1; + break; + }, + + '{' => { + result.id = .lbrace; + tok.index += 1; + break; + }, + + '}' => { + result.id = .rbrace; + tok.index += 1; + break; + }, + + else => state = .literal, + }, + + .comment => switch (c) { + '*' => if (tok.matches("*/")) { + result.id = .comment; + tok.index += "*/".len; + break; + }, + else => {}, + }, + + .literal => switch (c) { + ' ', '(', '\n' => { + if (tok.isCommand(result.start, tok.index)) { + result.id = .command; + } else { + result.id = .literal; + } + break; + }, + + ')' => { + result.id = .literal; + break; + }, + + '\r' => { + if (tok.matches("\r\n")) { + if (tok.isCommand(result.start, tok.index)) { + result.id = .command; + } else { + result.id = .literal; + } + } else { + result.id = .invalid; + tok.index += 1; + } + break; + }, + + else => {}, + }, + } + } + + result.end = tok.index; + return result; + } +}; + +const TokenIterator = struct { + tokens: []const Token, + pos: Token.Index = 0, + + fn next(it: *TokenIterator) ?Token { + const token = it.peek() orelse return null; + it.pos += 1; + return token; + } + + fn peek(it: TokenIterator) ?Token { + if (it.pos >= it.tokens.len) return null; + return it.tokens[it.pos]; + } + + fn reset(it: *TokenIterator) void { + it.pos = 0; + } + + fn seekTo(it: *TokenIterator, pos: Token.Index) void { + it.pos = pos; + } + + fn seekBy(it: *TokenIterator, offset: isize) void { + const new_pos = @as(isize, @bitCast(it.pos)) + offset; + if (new_pos < 0) { + it.pos = 0; + } else { + it.pos = @as(usize, @intCast(new_pos)); + } + } + + fn get(it: *TokenIterator, pos: Token.Index) Token { + assert(pos < it.tokens.len); + return it.tokens[pos]; + } +}; + +const LdScript = @This(); +const Diags = @import("../link.zig").Diags; + +const std = @import("std"); +const assert = std.debug.assert; +const Path = std.Build.Cache.Path; +const Allocator = std.mem.Allocator; -- cgit v1.2.3 From 5b016e290a5ba335b295afeae104af6b3396a425 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 14 Oct 2024 22:24:46 -0700 Subject: move ld script processing to the frontend along with the relevant logic, making the libraries within subject to the same search criteria as all the other libraries. this unfortunately means doing file system access on all .so files when targeting ELF to determine if they are linker scripts, however, I have a plan to address this. --- src/Compilation.zig | 26 ++-- src/link.zig | 17 +++ src/link/Elf.zig | 165 ++------------------- src/link/LdScript.zig | 1 - src/main.zig | 394 +++++++++++++++++++++++++++++++++++--------------- test/link/elf.zig | 22 +-- 6 files changed, 328 insertions(+), 297 deletions(-) (limited to 'src/link/LdScript.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index 2776d2960a..d4b187915c 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1003,10 +1003,11 @@ pub const LinkObject = struct { path: Path, must_link: bool = false, needed: bool = false, - // When the library is passed via a positional argument, it will be - // added as a full path. If it's `-l`, then just the basename. - // - // Consistent with `withLOption` variable name in lld ELF driver. + weak: bool = false, + /// When the library is passed via a positional argument, it will be + /// added as a full path. If it's `-l`, then just the basename. + /// + /// Consistent with `withLOption` variable name in lld ELF driver. loption: bool = false, pub fn isObject(lo: LinkObject) bool { @@ -1061,6 +1062,9 @@ pub const CreateOptions = struct { /// this flag would be set to disable this machinery to avoid false positives. disable_lld_caching: bool = false, cache_mode: CacheMode = .incremental, + /// This field is intended to be removed. + /// The ELF implementation no longer uses this data, however the MachO and COFF + /// implementations still do. lib_dirs: []const []const u8 = &[0][]const u8{}, rpath_list: []const []const u8 = &[0][]const u8{}, symbol_wrap_set: std.StringArrayHashMapUnmanaged(void) = .empty, @@ -2563,6 +2567,7 @@ fn addNonIncrementalStuffToCacheManifest( _ = try man.addFilePath(obj.path, null); man.hash.add(obj.must_link); man.hash.add(obj.needed); + man.hash.add(obj.weak); man.hash.add(obj.loption); } @@ -3219,18 +3224,7 @@ pub fn getAllErrorsAlloc(comp: *Compilation) !ErrorBundle { })); } - for (comp.link_diags.msgs.items) |link_err| { - try bundle.addRootErrorMessage(.{ - .msg = try bundle.addString(link_err.msg), - .notes_len = @intCast(link_err.notes.len), - }); - const notes_start = try bundle.reserveNotes(@intCast(link_err.notes.len)); - for (link_err.notes, 0..) |note, i| { - bundle.extra.items[notes_start + i] = @intFromEnum(try bundle.addErrorMessage(.{ - .msg = try bundle.addString(note.msg), - })); - } - } + try comp.link_diags.addMessagesToBundle(&bundle); if (comp.zcu) |zcu| { if (bundle.root_list.items.len == 0 and zcu.compile_log_sources.count() != 0) { diff --git a/src/link.zig b/src/link.zig index 7280ce3df7..463115540f 100644 --- a/src/link.zig +++ b/src/link.zig @@ -24,6 +24,8 @@ const lldMain = @import("main.zig").lldMain; const Package = @import("Package.zig"); const dev = @import("dev.zig"); +pub const LdScript = @import("link/LdScript.zig"); + /// When adding a new field, remember to update `hashAddSystemLibs`. /// These are *always* dynamically linked. Static libraries will be /// provided as positional arguments. @@ -336,6 +338,21 @@ pub const Diags = struct { log.debug("memory allocation failure", .{}); diags.flags.alloc_failure_occurred = true; } + + pub fn addMessagesToBundle(diags: *const Diags, bundle: *std.zig.ErrorBundle.Wip) Allocator.Error!void { + for (diags.msgs.items) |link_err| { + try bundle.addRootErrorMessage(.{ + .msg = try bundle.addString(link_err.msg), + .notes_len = @intCast(link_err.notes.len), + }); + const notes_start = try bundle.reserveNotes(@intCast(link_err.notes.len)); + for (link_err.notes, 0..) |note, i| { + bundle.extra.items[notes_start + i] = @intFromEnum(try bundle.addErrorMessage(.{ + .msg = try bundle.addString(note.msg), + })); + } + } + } }; pub fn hashAddSystemLibs( diff --git a/src/link/Elf.zig b/src/link/Elf.zig index 494cb94932..4f96e10d87 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -1,5 +1,4 @@ pub const Atom = @import("Elf/Atom.zig"); -pub const LdScript = @import("LdScript.zig"); base: link.File, rpath_table: std.StringArrayHashMapUnmanaged(void), @@ -16,7 +15,6 @@ z_relro: bool, z_common_page_size: ?u64, /// TODO make this non optional and resolve the default in open() z_max_page_size: ?u64, -lib_dirs: []const []const u8, hash_style: HashStyle, compress_debug_sections: CompressDebugSections, symbol_wrap_set: std.StringArrayHashMapUnmanaged(void), @@ -329,7 +327,6 @@ pub fn createEmpty( .z_relro = options.z_relro, .z_common_page_size = options.z_common_page_size, .z_max_page_size = options.z_max_page_size, - .lib_dirs = options.lib_dirs, .hash_style = options.hash_style, .compress_debug_sections = options.compress_debug_sections, .symbol_wrap_set = options.symbol_wrap_set, @@ -845,30 +842,17 @@ pub fn flushModule(self: *Elf, arena: Allocator, tid: Zcu.PerThread.Id, prog_nod if (comp.libc_installation) |lc| { const flags = target_util.libcFullLinkFlags(target); - var test_path = std.ArrayList(u8).init(arena); - var checked_paths = std.ArrayList([]const u8).init(arena); - for (flags) |flag| { - checked_paths.clearRetainingCapacity(); + assert(mem.startsWith(u8, flag, "-l")); const lib_name = flag["-l".len..]; - - success: { - if (!self.base.isStatic()) { - if (try self.accessLibPath(arena, &test_path, &checked_paths, lc.crt_dir.?, lib_name, .dynamic)) - break :success; - } - if (try self.accessLibPath(arena, &test_path, &checked_paths, lc.crt_dir.?, lib_name, .static)) - break :success; - - diags.addMissingLibraryError( - checked_paths.items, - "missing system library: '{s}' was not found", - .{lib_name}, - ); - continue; - } - - const resolved_path = Path.initCwd(try arena.dupe(u8, test_path.items)); + const suffix = switch (comp.config.link_mode) { + .static => target.staticLibSuffix(), + .dynamic => target.dynamicLibSuffix(), + }; + const lib_path = try std.fmt.allocPrint(arena, "{s}/lib{s}{s}", .{ + lc.crt_dir.?, lib_name, suffix, + }); + const resolved_path = Path.initCwd(lib_path); parseInputReportingFailure(self, resolved_path, false, false); } } else if (target.isGnuLibC()) { @@ -1194,11 +1178,6 @@ fn dumpArgv(self: *Elf, comp: *Compilation) !void { if (csu.crti) |path| try argv.append(try path.toString(arena)); if (csu.crtbegin) |path| try argv.append(try path.toString(arena)); - for (self.lib_dirs) |lib_dir| { - try argv.append("-L"); - try argv.append(lib_dir); - } - if (comp.config.link_libc) { if (self.base.comp.libc_installation) |libc_installation| { try argv.append("-L"); @@ -1340,7 +1319,7 @@ pub const ParseError = error{ NotSupported, InvalidCharacter, UnknownFileType, -} || LdScript.Error || fs.Dir.AccessError || fs.File.SeekError || fs.File.OpenError || fs.File.ReadError; +} || fs.Dir.AccessError || fs.File.SeekError || fs.File.OpenError || fs.File.ReadError; fn parseCrtFileReportingFailure(self: *Elf, crt_file: Compilation.CrtFile) void { parseInputReportingFailure(self, crt_file.full_object_path, false, false); @@ -1358,23 +1337,12 @@ pub fn parseInputReportingFailure(self: *Elf, path: Path, needed: bool, must_lin .needed = needed, }, &self.shared_objects, &self.files, target) catch |err| switch (err) { error.LinkFailure => return, // already reported - error.BadMagic, error.UnexpectedEndOfFile => { - // It could be a linker script. - self.parseLdScript(.{ .path = path, .needed = needed }) catch |err2| switch (err2) { - error.LinkFailure => return, // already reported - else => |e| diags.addParseError(path, "failed to parse linker script: {s}", .{@errorName(e)}), - }; - }, else => |e| diags.addParseError(path, "failed to parse shared object: {s}", .{@errorName(e)}), }, .static_library => parseArchive(self, path, must_link) catch |err| switch (err) { error.LinkFailure => return, // already reported else => |e| diags.addParseError(path, "failed to parse archive: {s}", .{@errorName(e)}), }, - .unknown => self.parseLdScript(.{ .path = path, .needed = needed }) catch |err| switch (err) { - error.LinkFailure => return, // already reported - else => |e| diags.addParseError(path, "failed to parse linker script: {s}", .{@errorName(e)}), - }, else => diags.addParseError(path, "unrecognized file type", .{}), } } @@ -1512,72 +1480,6 @@ fn parseSharedObject( } } -fn parseLdScript(self: *Elf, lib: SystemLib) ParseError!void { - const tracy = trace(@src()); - defer tracy.end(); - - const comp = self.base.comp; - const gpa = comp.gpa; - const diags = &comp.link_diags; - - const in_file = try lib.path.root_dir.handle.openFile(lib.path.sub_path, .{}); - defer in_file.close(); - const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32)); - defer gpa.free(data); - - var script = try LdScript.parse(gpa, diags, lib.path, data); - defer script.deinit(gpa); - - var arena_allocator = std.heap.ArenaAllocator.init(gpa); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - var test_path = std.ArrayList(u8).init(arena); - var checked_paths = std.ArrayList([]const u8).init(arena); - - for (script.args) |script_arg| { - checked_paths.clearRetainingCapacity(); - - success: { - if (mem.startsWith(u8, script_arg.path, "-l")) { - const lib_name = script_arg.path["-l".len..]; - - for (self.lib_dirs) |lib_dir| { - if (!self.base.isStatic()) { - if (try self.accessLibPath(arena, &test_path, &checked_paths, lib_dir, lib_name, .dynamic)) - break :success; - } - if (try self.accessLibPath(arena, &test_path, &checked_paths, lib_dir, lib_name, .static)) - break :success; - } - } else { - var buffer: [fs.max_path_bytes]u8 = undefined; - if (fs.realpath(script_arg.path, &buffer)) |path| { - test_path.clearRetainingCapacity(); - try test_path.writer().writeAll(path); - break :success; - } else |_| {} - - try checked_paths.append(try arena.dupe(u8, script_arg.path)); - for (self.lib_dirs) |lib_dir| { - if (try self.accessLibPath(arena, &test_path, &checked_paths, lib_dir, script_arg.path, null)) - break :success; - } - } - - diags.addMissingLibraryError( - checked_paths.items, - "missing library dependency: GNU ld script '{}' requires '{s}', but file not found", - .{ @as(Path, lib.path), script_arg.path }, - ); - continue; - } - - const full_path = Path.initCwd(test_path.items); - parseInputReportingFailure(self, full_path, script_arg.needed, false); - } -} - pub fn validateEFlags(self: *Elf, file_index: File.Index, e_flags: elf.Word) !void { if (self.first_eflags == null) { self.first_eflags = e_flags; @@ -1618,39 +1520,6 @@ pub fn validateEFlags(self: *Elf, file_index: File.Index, e_flags: elf.Word) !vo } } -fn accessLibPath( - self: *Elf, - arena: Allocator, - test_path: *std.ArrayList(u8), - checked_paths: ?*std.ArrayList([]const u8), - lib_dir_path: []const u8, - lib_name: []const u8, - link_mode: ?std.builtin.LinkMode, -) !bool { - const sep = fs.path.sep_str; - const target = self.getTarget(); - test_path.clearRetainingCapacity(); - const prefix = if (link_mode != null) "lib" else ""; - const suffix = if (link_mode) |mode| switch (mode) { - .static => target.staticLibSuffix(), - .dynamic => target.dynamicLibSuffix(), - } else ""; - try test_path.writer().print("{s}" ++ sep ++ "{s}{s}{s}", .{ - lib_dir_path, - prefix, - lib_name, - suffix, - }); - if (checked_paths) |cpaths| { - try cpaths.append(try arena.dupe(u8, test_path.items)); - } - fs.cwd().access(test_path.items, .{}) catch |err| switch (err) { - error.FileNotFound => return false, - else => |e| return e, - }; - return true; -} - /// When resolving symbols, we approach the problem similarly to `mold`. /// 1. Resolve symbols across all objects (including those preemptively extracted archives). /// 2. Resolve symbols across all shared objects. @@ -1840,7 +1709,7 @@ pub fn initOutputSection(self: *Elf, args: struct { ".dtors", ".gnu.warning", }; inline for (name_prefixes) |prefix| { - if (std.mem.eql(u8, args.name, prefix) or std.mem.startsWith(u8, args.name, prefix ++ ".")) { + if (mem.eql(u8, args.name, prefix) or mem.startsWith(u8, args.name, prefix ++ ".")) { break :blk prefix; } } @@ -1852,9 +1721,9 @@ pub fn initOutputSection(self: *Elf, args: struct { switch (args.type) { elf.SHT_NULL => unreachable, elf.SHT_PROGBITS => { - if (std.mem.eql(u8, args.name, ".init_array") or std.mem.startsWith(u8, args.name, ".init_array.")) + if (mem.eql(u8, args.name, ".init_array") or mem.startsWith(u8, args.name, ".init_array.")) break :tt elf.SHT_INIT_ARRAY; - if (std.mem.eql(u8, args.name, ".fini_array") or std.mem.startsWith(u8, args.name, ".fini_array.")) + if (mem.eql(u8, args.name, ".fini_array") or mem.startsWith(u8, args.name, ".fini_array.")) break :tt elf.SHT_FINI_ARRAY; break :tt args.type; }, @@ -1971,7 +1840,6 @@ fn linkWithLLD(self: *Elf, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: s man.hash.add(comp.link_eh_frame_hdr); man.hash.add(self.emit_relocs); man.hash.add(comp.config.rdynamic); - man.hash.addListOfBytes(self.lib_dirs); man.hash.addListOfBytes(self.rpath_table.keys()); if (output_mode == .Exe) { man.hash.add(self.base.stack_size); @@ -2265,11 +2133,6 @@ fn linkWithLLD(self: *Elf, arena: Allocator, tid: Zcu.PerThread.Id, prog_node: s try argv.appendSlice(&.{ "-wrap", symbol_name }); } - for (self.lib_dirs) |lib_dir| { - try argv.append("-L"); - try argv.append(lib_dir); - } - if (comp.config.link_libc) { if (comp.libc_installation) |libc_installation| { try argv.append("-L"); @@ -4868,7 +4731,7 @@ fn shString( off: u32, ) [:0]const u8 { const slice = shstrtab[off..]; - return slice[0..std.mem.indexOfScalar(u8, slice, 0).? :0]; + return slice[0..mem.indexOfScalar(u8, slice, 0).? :0]; } pub fn insertShString(self: *Elf, name: [:0]const u8) error{OutOfMemory}!u32 { diff --git a/src/link/LdScript.zig b/src/link/LdScript.zig index 9c10656f3c..ed5dbc4681 100644 --- a/src/link/LdScript.zig +++ b/src/link/LdScript.zig @@ -14,7 +14,6 @@ pub fn deinit(ls: *LdScript, gpa: Allocator) void { pub const Error = error{ LinkFailure, - UnexpectedToken, UnknownCpuArch, OutOfMemory, }; diff --git a/src/main.zig b/src/main.zig index 058de72442..1ec9548883 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3618,6 +3618,28 @@ fn buildOutputType( return cleanExit(); } +const LinkerInput = union(enum) { + /// An argument like: -l[name] + named: Named, + /// When a file path is provided. + path: struct { + path: Path, + /// We still need all this info because the path may point to a .so + /// file which may actually be a "linker script" that references + /// library names which need to be resolved. + info: SystemLib, + }, + /// Put exactly this string in the dynamic section, no rpath. + exact: struct { + name: []const u8, + }, + + const Named = struct { + name: []const u8, + info: SystemLib, + }; +}; + const CreateModule = struct { global_cache_directory: Cache.Directory, modules: std.StringArrayHashMapUnmanaged(CliModule), @@ -3760,10 +3782,7 @@ fn createModule( // First, remove libc, libc++, and compiler_rt libraries from the system libraries list. // We need to know whether the set of system libraries contains anything besides these // to decide whether to trigger native path detection logic. - var external_system_libs: std.MultiArrayList(struct { - name: []const u8, - info: SystemLib, - }) = .{}; + var external_linker_inputs: std.ArrayListUnmanaged(LinkerInput) = .empty; for (create_module.system_libs.keys(), create_module.system_libs.values()) |lib_name, info| { if (std.zig.target.isLibCLibName(target, lib_name)) { create_module.opts.link_libc = true; @@ -3815,13 +3834,13 @@ fn createModule( } } - try external_system_libs.append(arena, .{ + try external_linker_inputs.append(arena, .{ .named = .{ .name = lib_name, .info = info, - }); + } }); } - // After this point, external_system_libs is used instead of system_libs. - if (external_system_libs.len != 0) + // After this point, external_linker_inputs is used instead of system_libs. + if (external_linker_inputs.items.len != 0) create_module.want_native_include_dirs = true; // Resolve the library path arguments with respect to sysroot. @@ -3878,7 +3897,7 @@ fn createModule( } if (builtin.target.os.tag == .windows and (target.abi == .msvc or target.abi == .itanium) and - external_system_libs.len != 0) + external_linker_inputs.items.len != 0) { if (create_module.libc_installation == null) { create_module.libc_installation = LibCInstallation.findNative(.{ @@ -3899,20 +3918,67 @@ fn createModule( // If any libs in this list are statically provided, we omit them from the // resolved list and populate the link_objects array instead. { - var test_path = std.ArrayList(u8).init(gpa); - defer test_path.deinit(); + var test_path: std.ArrayListUnmanaged(u8) = .empty; + defer test_path.deinit(gpa); + + var checked_paths: std.ArrayListUnmanaged(u8) = .empty; + defer checked_paths.deinit(gpa); - var checked_paths = std.ArrayList(u8).init(gpa); - defer checked_paths.deinit(); + var ld_script_bytes: std.ArrayListUnmanaged(u8) = .empty; + defer ld_script_bytes.deinit(gpa); - var failed_libs = std.ArrayList(struct { + var failed_libs: std.ArrayListUnmanaged(struct { name: []const u8, strategy: SystemLib.SearchStrategy, checked_paths: []const u8, preferred_mode: std.builtin.LinkMode, - }).init(arena); + }) = .empty; + + // Convert external system libs into a stack so that items can be + // pushed to it. + // + // This is necessary because shared objects might turn out to be + // "linker scripts" that in fact resolve to one or more other + // external system libs, including parameters such as "needed". + // + // Unfortunately, such files need to be detected immediately, so + // that this library search logic can be applied to them. + mem.reverse(LinkerInput, external_linker_inputs.items); + + syslib: while (external_linker_inputs.popOrNull()) |external_linker_input| { + const external_system_lib: LinkerInput.Named = switch (external_linker_input) { + .named => |named| named, + .path => |p| p: { + if (fs.path.isAbsolute(p.path.sub_path)) { + try create_module.link_objects.append(arena, .{ + .path = p.path, + .needed = p.info.needed, + .weak = p.info.weak, + }); + continue; + } + const lib_name, const link_mode = stripLibPrefixAndSuffix(p.path.sub_path, target); + break :p .{ + .name = lib_name, + .info = .{ + .needed = p.info.needed, + .weak = p.info.weak, + .preferred_mode = link_mode, + .search_strategy = .no_fallback, + }, + }; + }, + .exact => |exact| { + try create_module.link_objects.append(arena, .{ + .path = Path.initCwd(exact.name), + .loption = true, + }); + continue; + }, + }; + const lib_name = external_system_lib.name; + const info = external_system_lib.info; - syslib: for (external_system_libs.items(.name), external_system_libs.items(.info)) |lib_name, info| { // Checked in the first pass above while looking for libc libraries. assert(!fs.path.isAbsolute(lib_name)); @@ -3921,33 +3987,26 @@ fn createModule( switch (info.search_strategy) { .mode_first, .no_fallback => { // check for preferred mode - for (create_module.lib_dirs.items) |lib_dir_path| { - if (try accessLibPath( - &test_path, - &checked_paths, - lib_dir_path, - lib_name, - target, - info.preferred_mode, - )) { - const path = Path.initCwd(try arena.dupe(u8, test_path.items)); - switch (info.preferred_mode) { - .static => try create_module.link_objects.append(arena, .{ .path = path }), - .dynamic => try create_module.resolved_system_libs.append(arena, .{ - .name = lib_name, - .lib = .{ - .needed = info.needed, - .weak = info.weak, - .path = path, - }, - }), - } - continue :syslib; - } - } + for (create_module.lib_dirs.items) |lib_dir_path| switch (try accessLibPath( + gpa, + arena, + &test_path, + &checked_paths, + &external_linker_inputs, + create_module, + &ld_script_bytes, + lib_dir_path, + lib_name, + target, + info.preferred_mode, + info, + )) { + .ok => continue :syslib, + .no_match => {}, + }; // check for fallback mode if (info.search_strategy == .no_fallback) { - try failed_libs.append(.{ + try failed_libs.append(arena, .{ .name = lib_name, .strategy = info.search_strategy, .checked_paths = try arena.dupe(u8, checked_paths.items), @@ -3955,31 +4014,24 @@ fn createModule( }); continue :syslib; } - for (create_module.lib_dirs.items) |lib_dir_path| { - if (try accessLibPath( - &test_path, - &checked_paths, - lib_dir_path, - lib_name, - target, - info.fallbackMode(), - )) { - const path = Path.initCwd(try arena.dupe(u8, test_path.items)); - switch (info.fallbackMode()) { - .static => try create_module.link_objects.append(arena, .{ .path = path }), - .dynamic => try create_module.resolved_system_libs.append(arena, .{ - .name = lib_name, - .lib = .{ - .needed = info.needed, - .weak = info.weak, - .path = path, - }, - }), - } - continue :syslib; - } - } - try failed_libs.append(.{ + for (create_module.lib_dirs.items) |lib_dir_path| switch (try accessLibPath( + gpa, + arena, + &test_path, + &checked_paths, + &external_linker_inputs, + create_module, + &ld_script_bytes, + lib_dir_path, + lib_name, + target, + info.fallbackMode(), + info, + )) { + .ok => continue :syslib, + .no_match => {}, + }; + try failed_libs.append(arena, .{ .name = lib_name, .strategy = info.search_strategy, .checked_paths = try arena.dupe(u8, checked_paths.items), @@ -3990,54 +4042,44 @@ fn createModule( .paths_first => { for (create_module.lib_dirs.items) |lib_dir_path| { // check for preferred mode - if (try accessLibPath( + switch (try accessLibPath( + gpa, + arena, &test_path, &checked_paths, + &external_linker_inputs, + create_module, + &ld_script_bytes, lib_dir_path, lib_name, target, info.preferred_mode, + info, )) { - const path = Path.initCwd(try arena.dupe(u8, test_path.items)); - switch (info.preferred_mode) { - .static => try create_module.link_objects.append(arena, .{ .path = path }), - .dynamic => try create_module.resolved_system_libs.append(arena, .{ - .name = lib_name, - .lib = .{ - .needed = info.needed, - .weak = info.weak, - .path = path, - }, - }), - } - continue :syslib; + .ok => continue :syslib, + .no_match => {}, } // check for fallback mode - if (try accessLibPath( + switch (try accessLibPath( + gpa, + arena, &test_path, &checked_paths, + &external_linker_inputs, + create_module, + &ld_script_bytes, lib_dir_path, lib_name, target, info.fallbackMode(), + info, )) { - const path = Path.initCwd(try arena.dupe(u8, test_path.items)); - switch (info.fallbackMode()) { - .static => try create_module.link_objects.append(arena, .{ .path = path }), - .dynamic => try create_module.resolved_system_libs.append(arena, .{ - .name = lib_name, - .lib = .{ - .needed = info.needed, - .weak = info.weak, - .path = path, - }, - }), - } - continue :syslib; + .ok => continue :syslib, + .no_match => {}, } } - try failed_libs.append(.{ + try failed_libs.append(arena, .{ .name = lib_name, .strategy = info.search_strategy, .checked_paths = try arena.dupe(u8, checked_paths.items), @@ -4059,8 +4101,8 @@ fn createModule( process.exit(1); } } - // After this point, create_module.resolved_system_libs is used instead of - // create_module.external_system_libs. + // After this point, create_module.resolved_system_libs is used instead + // of external_linker_inputs. if (create_module.resolved_system_libs.len != 0) create_module.opts.any_dyn_libs = true; @@ -6857,33 +6899,45 @@ const ClangSearchSanitizer = struct { }; }; +const AccessLibPathResult = enum { ok, no_match }; + fn accessLibPath( - test_path: *std.ArrayList(u8), - checked_paths: *std.ArrayList(u8), + gpa: Allocator, + arena: Allocator, + /// Allocated via `gpa`. + test_path: *std.ArrayListUnmanaged(u8), + /// Allocated via `gpa`. + checked_paths: *std.ArrayListUnmanaged(u8), + /// Allocated via `arena`. + external_linker_inputs: *std.ArrayListUnmanaged(LinkerInput), + create_module: *CreateModule, + /// Allocated via `gpa`. + ld_script_bytes: *std.ArrayListUnmanaged(u8), lib_dir_path: []const u8, lib_name: []const u8, target: std.Target, link_mode: std.builtin.LinkMode, -) !bool { + parent: SystemLib, +) Allocator.Error!AccessLibPathResult { const sep = fs.path.sep_str; if (target.isDarwin() and link_mode == .dynamic) tbd: { // Prefer .tbd over .dylib. test_path.clearRetainingCapacity(); - try test_path.writer().print("{s}" ++ sep ++ "lib{s}.tbd", .{ lib_dir_path, lib_name }); - try checked_paths.writer().print("\n {s}", .{test_path.items}); + try test_path.writer(gpa).print("{s}" ++ sep ++ "lib{s}.tbd", .{ lib_dir_path, lib_name }); + try checked_paths.writer(gpa).print("\n {s}", .{test_path.items}); fs.cwd().access(test_path.items, .{}) catch |err| switch (err) { error.FileNotFound => break :tbd, else => |e| fatal("unable to search for tbd library '{s}': {s}", .{ test_path.items, @errorName(e), }), }; - return true; + return finishAccessLibPath(arena, create_module, test_path, link_mode, parent, lib_name); } main_check: { test_path.clearRetainingCapacity(); - try test_path.writer().print("{s}" ++ sep ++ "{s}{s}{s}", .{ + try test_path.writer(gpa).print("{s}" ++ sep ++ "{s}{s}{s}", .{ lib_dir_path, target.libPrefix(), lib_name, @@ -6892,49 +6946,148 @@ fn accessLibPath( .dynamic => target.dynamicLibSuffix(), }, }); - try checked_paths.writer().print("\n {s}", .{test_path.items}); + try checked_paths.writer(gpa).print("\n {s}", .{test_path.items}); + + // In the case of .so files, they might actually be "linker scripts" + // that contain references to other libraries. + if (target.ofmt == .elf and mem.endsWith(u8, test_path.items, ".so")) { + var file = fs.cwd().openFile(test_path.items, .{}) catch |err| switch (err) { + error.FileNotFound => break :main_check, + else => |e| fatal("unable to search for {s} library '{s}': {s}", .{ + @tagName(link_mode), test_path.items, @errorName(e), + }), + }; + defer file.close(); + try ld_script_bytes.resize(gpa, @sizeOf(std.elf.Elf64_Ehdr)); + const n = file.readAll(ld_script_bytes.items) catch |err| fatal("failed to read {s}: {s}", .{ + test_path.items, @errorName(err), + }); + elf_file: { + if (n != ld_script_bytes.items.len) break :elf_file; + if (!mem.eql(u8, ld_script_bytes.items[0..4], "\x7fELF")) break :elf_file; + // Appears to be an ELF file. + return finishAccessLibPath(arena, create_module, test_path, link_mode, parent, lib_name); + } + const stat = file.stat() catch |err| + fatal("failed to stat {s}: {s}", .{ test_path.items, @errorName(err) }); + const size = std.math.cast(u32, stat.size) orelse + fatal("{s}: linker script too big", .{test_path.items}); + try ld_script_bytes.resize(gpa, size); + const buf = ld_script_bytes.items[n..]; + const n2 = file.readAll(buf) catch |err| + fatal("failed to read {s}: {s}", .{ test_path.items, @errorName(err) }); + if (n2 != buf.len) fatal("failed to read {s}: unexpected end of file", .{test_path.items}); + var diags = link.Diags.init(gpa); + defer diags.deinit(); + const ld_script_result = link.LdScript.parse(gpa, &diags, Path.initCwd(test_path.items), ld_script_bytes.items); + if (diags.hasErrors()) { + var wip_errors: std.zig.ErrorBundle.Wip = undefined; + try wip_errors.init(gpa); + defer wip_errors.deinit(); + + try diags.addMessagesToBundle(&wip_errors); + + var error_bundle = try wip_errors.toOwnedBundle(""); + defer error_bundle.deinit(gpa); + + const color: Color = .auto; + error_bundle.renderToStdErr(color.renderOptions()); + + process.exit(1); + } + + var ld_script = ld_script_result catch |err| + fatal("{s}: failed to parse linker script: {s}", .{ test_path.items, @errorName(err) }); + defer ld_script.deinit(gpa); + + try external_linker_inputs.ensureUnusedCapacity(arena, ld_script.args.len); + for (ld_script.args) |arg| { + const syslib: SystemLib = .{ + .needed = arg.needed or parent.needed, + .weak = parent.weak, + .preferred_mode = parent.preferred_mode, + .search_strategy = parent.search_strategy, + }; + if (mem.startsWith(u8, arg.path, "-l")) { + external_linker_inputs.appendAssumeCapacity(.{ .named = .{ + .name = try arena.dupe(u8, arg.path["-l".len..]), + .info = syslib, + } }); + } else { + external_linker_inputs.appendAssumeCapacity(.{ .path = .{ + .path = Path.initCwd(try arena.dupe(u8, arg.path)), + .info = syslib, + } }); + } + } + return .ok; + } + fs.cwd().access(test_path.items, .{}) catch |err| switch (err) { error.FileNotFound => break :main_check, else => |e| fatal("unable to search for {s} library '{s}': {s}", .{ @tagName(link_mode), test_path.items, @errorName(e), }), }; - return true; + return finishAccessLibPath(arena, create_module, test_path, link_mode, parent, lib_name); } // In the case of Darwin, the main check will be .dylib, so here we // additionally check for .so files. if (target.isDarwin() and link_mode == .dynamic) so: { test_path.clearRetainingCapacity(); - try test_path.writer().print("{s}" ++ sep ++ "lib{s}.so", .{ lib_dir_path, lib_name }); - try checked_paths.writer().print("\n {s}", .{test_path.items}); + try test_path.writer(gpa).print("{s}" ++ sep ++ "lib{s}.so", .{ lib_dir_path, lib_name }); + try checked_paths.writer(gpa).print("\n {s}", .{test_path.items}); fs.cwd().access(test_path.items, .{}) catch |err| switch (err) { error.FileNotFound => break :so, else => |e| fatal("unable to search for so library '{s}': {s}", .{ test_path.items, @errorName(e), }), }; - return true; + return finishAccessLibPath(arena, create_module, test_path, link_mode, parent, lib_name); } // In the case of MinGW, the main check will be .lib but we also need to // look for `libfoo.a`. if (target.isMinGW() and link_mode == .static) mingw: { test_path.clearRetainingCapacity(); - try test_path.writer().print("{s}" ++ sep ++ "lib{s}.a", .{ + try test_path.writer(gpa).print("{s}" ++ sep ++ "lib{s}.a", .{ lib_dir_path, lib_name, }); - try checked_paths.writer().print("\n {s}", .{test_path.items}); + try checked_paths.writer(gpa).print("\n {s}", .{test_path.items}); fs.cwd().access(test_path.items, .{}) catch |err| switch (err) { error.FileNotFound => break :mingw, else => |e| fatal("unable to search for static library '{s}': {s}", .{ test_path.items, @errorName(e), }), }; - return true; + return finishAccessLibPath(arena, create_module, test_path, link_mode, parent, lib_name); } - return false; + return .no_match; +} + +fn finishAccessLibPath( + arena: Allocator, + create_module: *CreateModule, + test_path: *std.ArrayListUnmanaged(u8), + link_mode: std.builtin.LinkMode, + parent: SystemLib, + lib_name: []const u8, +) Allocator.Error!AccessLibPathResult { + const path = Path.initCwd(try arena.dupe(u8, test_path.items)); + switch (link_mode) { + .static => try create_module.link_objects.append(arena, .{ .path = path }), + .dynamic => try create_module.resolved_system_libs.append(arena, .{ + .name = lib_name, + .lib = .{ + .needed = parent.needed, + .weak = parent.weak, + .path = path, + }, + }), + } + return .ok; } fn accessFrameworkPath( @@ -7634,3 +7787,18 @@ fn handleModArg( c_source_files_owner_index.* = create_module.c_source_files.items.len; rc_source_files_owner_index.* = create_module.rc_source_files.items.len; } + +fn stripLibPrefixAndSuffix(path: []const u8, target: std.Target) struct { []const u8, std.builtin.LinkMode } { + const prefix = target.libPrefix(); + const static_suffix = target.staticLibSuffix(); + const dynamic_suffix = target.dynamicLibSuffix(); + const basename = fs.path.basename(path); + const unlibbed = if (mem.startsWith(u8, basename, prefix)) basename[prefix.len..] else basename; + if (mem.endsWith(u8, unlibbed, static_suffix)) return .{ + unlibbed[0 .. unlibbed.len - static_suffix.len], .static, + }; + if (mem.endsWith(u8, unlibbed, dynamic_suffix)) return .{ + unlibbed[0 .. unlibbed.len - dynamic_suffix.len], .dynamic, + }; + fatal("unrecognized library path: {s}", .{path}); +} diff --git a/test/link/elf.zig b/test/link/elf.zig index f6891cf17e..0b8aa45141 100644 --- a/test/link/elf.zig +++ b/test/link/elf.zig @@ -2165,13 +2165,11 @@ fn testLdScriptPathError(b: *Build, opts: Options) *Step { exe.addLibraryPath(scripts.getDirectory()); exe.linkLibC(); - expectLinkErrors( - exe, - test_step, - .{ - .contains = "error: missing library dependency: GNU ld script '/?/liba.so' requires 'libfoo.so', but file not found", - }, - ); + // TODO: A future enhancement could make this error message also mention + // the file that references the missing library. + expectLinkErrors(exe, test_step, .{ + .stderr_contains = "error: unable to find dynamic system library 'foo' using strategy 'no_fallback'. searched paths:", + }); return test_step; } @@ -3907,16 +3905,8 @@ fn testUnknownFileTypeError(b: *Build, opts: Options) *Step { exe.linkLibrary(dylib); exe.linkLibC(); - // TODO: improve the test harness to be able to selectively match lines in error output - // while avoiding jankiness - // expectLinkErrors(exe, test_step, .{ .exact = &.{ - // "error: invalid token in LD script: '\\x00\\x00\\x00\\x0c\\x00\\x00\\x00/usr/lib/dyld\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0d' (0:989)", - // "note: while parsing /?/liba.dylib", - // "error: unexpected error: parsing input file failed with error InvalidLdScript", - // "note: while parsing /?/liba.dylib", - // } }); expectLinkErrors(exe, test_step, .{ - .starts_with = "error: invalid token in LD script: '\\x00\\x00\\x00\\x0c\\x00\\x00\\x00/usr/lib/dyld\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x0d' (", + .contains = "error: failed to parse shared object: BadMagic", }); return test_step; -- cgit v1.2.3