diff options
Diffstat (limited to 'lib/std/Build/Step/CheckObject.zig')
| -rw-r--r-- | lib/std/Build/Step/CheckObject.zig | 871 |
1 files changed, 625 insertions, 246 deletions
diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig index 0c74a4e40e..5a816b4103 100644 --- a/lib/std/Build/Step/CheckObject.zig +++ b/lib/std/Build/Step/CheckObject.zig @@ -18,7 +18,6 @@ step: Step, source: std.Build.FileSource, max_bytes: usize = 20 * 1024 * 1024, checks: std.ArrayList(Check), -dump_symtab: bool = false, obj_format: std.Target.ObjectFormat, pub fn create( @@ -53,84 +52,104 @@ const SearchPhrase = struct { } }; -/// There two types of actions currently supported: -/// * `.match` - is the main building block of standard matchers with optional eat-all token `{*}` -/// and extractors by name such as `{n_value}`. Please note this action is very simplistic in nature -/// i.e., it won't really handle edge cases/nontrivial examples. But given that we do want to use -/// it mainly to test the output of our object format parser-dumpers when testing the linkers, etc. -/// it should be plenty useful in its current form. -/// * `.compute_cmp` - can be used to perform an operation on the extracted global variables +/// There five types of actions currently supported: +/// .exact - will do an exact match against the haystack +/// .contains - will check for existence within the haystack +/// .not_present - will check for non-existence within the haystack +/// .extract - will do an exact match and extract into a variable enclosed within `{name}` braces +/// .compute_cmp - will perform an operation on the extracted global variables /// using the MatchAction. It currently only supports an addition. The operation is required /// to be specified in Reverse Polish Notation to ease in operator-precedence parsing (well, /// to avoid any parsing really). /// For example, if the two extracted values were saved as `vmaddr` and `entryoff` respectively /// they could then be added with this simple program `vmaddr entryoff +`. const Action = struct { - tag: enum { match, not_present, compute_cmp }, + tag: enum { exact, contains, not_present, extract, compute_cmp }, phrase: SearchPhrase, expected: ?ComputeCompareExpected = null, - /// Will return true if the `phrase` was found in the `haystack`. - /// Some examples include: - /// - /// LC 0 => will match in its entirety - /// vmaddr {vmaddr} => will match `vmaddr` and then extract the following value as u64 - /// and save under `vmaddr` global name (see `global_vars` param) - /// name {*}libobjc{*}.dylib => will match `name` followed by a token which contains `libobjc` and `.dylib` - /// in that order with other letters in between - fn match( + /// Returns true if the `phrase` is an exact match with the haystack and variable was successfully extracted. + fn extract( act: Action, b: *std.Build, step: *Step, haystack: []const u8, global_vars: anytype, ) !bool { - assert(act.tag == .match or act.tag == .not_present); - const phrase = act.phrase.resolve(b, step); - var candidate_var: ?struct { name: []const u8, value: u64 } = null; - var hay_it = mem.tokenizeScalar(u8, mem.trim(u8, haystack, " "), ' '); - var needle_it = mem.tokenizeScalar(u8, mem.trim(u8, phrase, " "), ' '); + assert(act.tag == .extract); + const hay = mem.trim(u8, haystack, " "); + const phrase = mem.trim(u8, act.phrase.resolve(b, step), " "); + + var candidate_vars = std.ArrayList(struct { name: []const u8, value: u64 }).init(b.allocator); + var hay_it = mem.tokenizeScalar(u8, hay, ' '); + var needle_it = mem.tokenizeScalar(u8, phrase, ' '); while (needle_it.next()) |needle_tok| { - const hay_tok = hay_it.next() orelse return false; - - if (mem.indexOf(u8, needle_tok, "{*}")) |index| { - // We have fuzzy matchers within the search pattern, so we match substrings. - var start = index; - var n_tok = needle_tok; - var h_tok = hay_tok; - while (true) { - n_tok = n_tok[start + 3 ..]; - const inner = if (mem.indexOf(u8, n_tok, "{*}")) |sub_end| - n_tok[0..sub_end] - else - n_tok; - if (mem.indexOf(u8, h_tok, inner) == null) return false; - start = mem.indexOf(u8, n_tok, "{*}") orelse break; - } - } else if (mem.startsWith(u8, needle_tok, "{")) { + const hay_tok = hay_it.next() orelse break; + if (mem.startsWith(u8, needle_tok, "{")) { const closing_brace = mem.indexOf(u8, needle_tok, "}") orelse return error.MissingClosingBrace; if (closing_brace != needle_tok.len - 1) return error.ClosingBraceNotLast; const name = needle_tok[1..closing_brace]; if (name.len == 0) return error.MissingBraceValue; - const value = try std.fmt.parseInt(u64, hay_tok, 16); - candidate_var = .{ + const value = std.fmt.parseInt(u64, hay_tok, 16) catch return false; + try candidate_vars.append(.{ .name = name, .value = value, - }; + }); } else { if (!mem.eql(u8, hay_tok, needle_tok)) return false; } } - if (candidate_var) |v| { - try global_vars.putNoClobber(v.name, v.value); - } + if (candidate_vars.items.len == 0) return false; + + for (candidate_vars.items) |cv| try global_vars.putNoClobber(cv.name, cv.value); return true; } + /// Returns true if the `phrase` is an exact match with the haystack. + fn exact( + act: Action, + b: *std.Build, + step: *Step, + haystack: []const u8, + ) bool { + assert(act.tag == .exact); + const hay = mem.trim(u8, haystack, " "); + const phrase = mem.trim(u8, act.phrase.resolve(b, step), " "); + return mem.eql(u8, hay, phrase); + } + + /// Returns true if the `phrase` exists within the haystack. + fn contains( + act: Action, + b: *std.Build, + step: *Step, + haystack: []const u8, + ) bool { + assert(act.tag == .contains); + const hay = mem.trim(u8, haystack, " "); + const phrase = mem.trim(u8, act.phrase.resolve(b, step), " "); + return mem.indexOf(u8, hay, phrase) != null; + } + + /// Returns true if the `phrase` does not exist within the haystack. + fn notPresent( + act: Action, + b: *std.Build, + step: *Step, + haystack: []const u8, + ) bool { + assert(act.tag == .not_present); + return !contains(.{ + .tag = .contains, + .phrase = act.phrase, + .expected = act.expected, + }, b, step, haystack); + } + /// Will return true if the `phrase` is correctly parsed into an RPN program and /// its reduced, computed value compares using `op` with the expected value, either /// a literal or another extracted variable. @@ -235,9 +254,23 @@ const Check = struct { }; } - fn match(self: *Check, phrase: SearchPhrase) void { + fn extract(self: *Check, phrase: SearchPhrase) void { + self.actions.append(.{ + .tag = .extract, + .phrase = phrase, + }) catch @panic("OOM"); + } + + fn exact(self: *Check, phrase: SearchPhrase) void { self.actions.append(.{ - .tag = .match, + .tag = .exact, + .phrase = phrase, + }) catch @panic("OOM"); + } + + fn contains(self: *Check, phrase: SearchPhrase) void { + self.actions.append(.{ + .tag = .contains, .phrase = phrase, }) catch @panic("OOM"); } @@ -258,52 +291,118 @@ const Check = struct { } }; -/// Creates a new sequence of actions with `phrase` as the first anchor searched phrase. -pub fn checkStart(self: *CheckObject, phrase: []const u8) void { +/// Creates a new empty sequence of actions. +pub fn checkStart(self: *CheckObject) void { var new_check = Check.create(self.step.owner.allocator); - new_check.match(.{ .string = self.step.owner.dupe(phrase) }); self.checks.append(new_check) catch @panic("OOM"); } -/// Adds another searched phrase to the latest created Check with `CheckObject.checkStart(...)`. -/// Asserts at least one check already exists. -pub fn checkNext(self: *CheckObject, phrase: []const u8) void { +/// Adds an exact match phrase to the latest created Check with `CheckObject.checkStart()`. +pub fn checkExact(self: *CheckObject, phrase: []const u8) void { + self.checkExactInner(phrase, null); +} + +/// Like `checkExact()` but takes an additional argument `FileSource` which will be +/// resolved to a full search query in `make()`. +pub fn checkExactFileSource(self: *CheckObject, phrase: []const u8, file_source: std.Build.FileSource) void { + self.checkExactInner(phrase, file_source); +} + +fn checkExactInner(self: *CheckObject, phrase: []const u8, file_source: ?std.Build.FileSource) void { assert(self.checks.items.len > 0); const last = &self.checks.items[self.checks.items.len - 1]; - last.match(.{ .string = self.step.owner.dupe(phrase) }); + last.exact(.{ .string = self.step.owner.dupe(phrase), .file_source = file_source }); +} + +/// Adds a fuzzy match phrase to the latest created Check with `CheckObject.checkStart()`. +pub fn checkContains(self: *CheckObject, phrase: []const u8) void { + self.checkContainsInner(phrase, null); } -/// Like `checkNext()` but takes an additional argument `FileSource` which will be +/// Like `checkContains()` but takes an additional argument `FileSource` which will be /// resolved to a full search query in `make()`. -pub fn checkNextFileSource( - self: *CheckObject, - phrase: []const u8, - file_source: std.Build.FileSource, -) void { +pub fn checkContainsFileSource(self: *CheckObject, phrase: []const u8, file_source: std.Build.FileSource) void { + self.checkContainsInner(phrase, file_source); +} + +fn checkContainsInner(self: *CheckObject, phrase: []const u8, file_source: ?std.Build.FileSource) void { + assert(self.checks.items.len > 0); + const last = &self.checks.items[self.checks.items.len - 1]; + last.contains(.{ .string = self.step.owner.dupe(phrase), .file_source = file_source }); +} + +/// Adds an exact match phrase with variable extractor to the latest created Check +/// with `CheckObject.checkStart()`. +pub fn checkExtract(self: *CheckObject, phrase: []const u8) void { + self.checkExtractInner(phrase, null); +} + +/// Like `checkExtract()` but takes an additional argument `FileSource` which will be +/// resolved to a full search query in `make()`. +pub fn checkExtractFileSource(self: *CheckObject, phrase: []const u8, file_source: std.Build.FileSource) void { + self.checkExtractInner(phrase, file_source); +} + +fn checkExtractInner(self: *CheckObject, phrase: []const u8, file_source: ?std.Build.FileSource) void { assert(self.checks.items.len > 0); const last = &self.checks.items[self.checks.items.len - 1]; - last.match(.{ .string = self.step.owner.dupe(phrase), .file_source = file_source }); + last.extract(.{ .string = self.step.owner.dupe(phrase), .file_source = file_source }); } /// Adds another searched phrase to the latest created Check with `CheckObject.checkStart(...)` /// however ensures there is no matching phrase in the output. -/// Asserts at least one check already exists. pub fn checkNotPresent(self: *CheckObject, phrase: []const u8) void { + self.checkNotPresentInner(phrase, null); +} + +/// Like `checkExtract()` but takes an additional argument `FileSource` which will be +/// resolved to a full search query in `make()`. +pub fn checkNotPresentFileSource(self: *CheckObject, phrase: []const u8, file_source: std.Build.FileSource) void { + self.checkNotPresentInner(phrase, file_source); +} + +fn checkNotPresentInner(self: *CheckObject, phrase: []const u8, file_source: ?std.Build.FileSource) void { assert(self.checks.items.len > 0); const last = &self.checks.items[self.checks.items.len - 1]; - last.notPresent(.{ .string = self.step.owner.dupe(phrase) }); + last.notPresent(.{ .string = self.step.owner.dupe(phrase), .file_source = file_source }); } /// Creates a new check checking specifically symbol table parsed and dumped from the object /// file. -/// Issuing this check will force parsing and dumping of the symbol table. pub fn checkInSymtab(self: *CheckObject) void { - self.dump_symtab = true; - const symtab_label = switch (self.obj_format) { + const label = switch (self.obj_format) { .macho => MachODumper.symtab_label, - else => @panic("TODO other parsers"), + .elf => ElfDumper.symtab_label, + .wasm => WasmDumper.symtab_label, + .coff => @panic("TODO symtab for coff"), + else => @panic("TODO other file formats"), }; - self.checkStart(symtab_label); + self.checkStart(); + self.checkExact(label); +} + +/// Creates a new check checking specifically dynamic symbol table parsed and dumped from the object +/// file. +/// This check is target-dependent and applicable to ELF only. +pub fn checkInDynamicSymtab(self: *CheckObject) void { + const label = switch (self.obj_format) { + .elf => ElfDumper.dynamic_symtab_label, + else => @panic("Unsupported target platform"), + }; + self.checkStart(); + self.checkExact(label); +} + +/// Creates a new check checking specifically dynamic section parsed and dumped from the object +/// file. +/// This check is target-dependent and applicable to ELF only. +pub fn checkInDynamicSection(self: *CheckObject) void { + const label = switch (self.obj_format) { + .elf => ElfDumper.dynamic_section_label, + else => @panic("Unsupported target platform"), + }; + self.checkStart(); + self.checkExact(label); } /// Creates a new standalone, singular check which allows running simple binary operations @@ -336,16 +435,10 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { ) catch |err| return step.fail("unable to read '{s}': {s}", .{ src_path, @errorName(err) }); const output = switch (self.obj_format) { - .macho => try MachODumper.parseAndDump(step, contents, .{ - .dump_symtab = self.dump_symtab, - }), - .elf => try ElfDumper.parseAndDump(step, contents, .{ - .dump_symtab = self.dump_symtab, - }), + .macho => try MachODumper.parseAndDump(step, contents), + .elf => try ElfDumper.parseAndDump(step, contents), .coff => @panic("TODO coff parser"), - .wasm => try WasmDumper.parseAndDump(step, contents, .{ - .dump_symtab = self.dump_symtab, - }), + .wasm => try WasmDumper.parseAndDump(step, contents), else => unreachable, }; @@ -355,9 +448,9 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { var it = mem.tokenizeAny(u8, output, "\r\n"); for (chk.actions.items) |act| { switch (act.tag) { - .match => { + .exact => { while (it.next()) |line| { - if (try act.match(b, step, line, &vars)) break; + if (act.exact(b, step, line)) break; } else { return step.fail( \\ @@ -369,18 +462,46 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { , .{ act.phrase.resolve(b, step), output }); } }, + .contains => { + while (it.next()) |line| { + if (act.contains(b, step, line)) break; + } else { + return step.fail( + \\ + \\========= expected to find: ========================== + \\*{s}* + \\========= but parsed file does not contain it: ======= + \\{s} + \\====================================================== + , .{ act.phrase.resolve(b, step), output }); + } + }, .not_present => { while (it.next()) |line| { - if (try act.match(b, step, line, &vars)) { - return step.fail( - \\ - \\========= expected not to find: =================== - \\{s} - \\========= but parsed file does contain it: ======== - \\{s} - \\=================================================== - , .{ act.phrase.resolve(b, step), output }); - } + if (act.notPresent(b, step, line)) break; + } else { + return step.fail( + \\ + \\========= expected not to find: =================== + \\{s} + \\========= but parsed file does contain it: ======== + \\{s} + \\=================================================== + , .{ act.phrase.resolve(b, step), output }); + } + }, + .extract => { + while (it.next()) |line| { + if (try act.extract(b, step, line, &vars)) break; + } else { + return step.fail( + \\ + \\========= expected to find and extract: ============== + \\{s} + \\========= but parsed file does not contain it: ======= + \\{s} + \\====================================================== + , .{ act.phrase.resolve(b, step), output }); } }, .compute_cmp => { @@ -410,15 +531,16 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { } } -const Opts = struct { - dump_symtab: bool = false, -}; - const MachODumper = struct { const LoadCommandIterator = macho.LoadCommandIterator; - const symtab_label = "symtab"; + const symtab_label = "symbol table"; + + const Symtab = struct { + symbols: []align(1) const macho.nlist_64, + strings: []const u8, + }; - fn parseAndDump(step: *Step, bytes: []align(@alignOf(u64)) const u8, opts: Opts) ![]const u8 { + fn parseAndDump(step: *Step, bytes: []align(@alignOf(u64)) const u8) ![]const u8 { const gpa = step.owner.allocator; var stream = std.io.fixedBufferStream(bytes); const reader = stream.reader(); @@ -431,8 +553,7 @@ const MachODumper = struct { var output = std.ArrayList(u8).init(gpa); const writer = output.writer(); - var symtab: []const macho.nlist_64 = undefined; - var strtab: []const u8 = undefined; + var symtab: ?Symtab = null; var sections = std.ArrayList(macho.section_64).init(gpa); var imports = std.ArrayList([]const u8).init(gpa); @@ -450,13 +571,11 @@ const MachODumper = struct { sections.appendAssumeCapacity(sect); } }, - .SYMTAB => if (opts.dump_symtab) { + .SYMTAB => { const lc = cmd.cast(macho.symtab_command).?; - symtab = @as( - [*]const macho.nlist_64, - @ptrCast(@alignCast(&bytes[lc.symoff])), - )[0..lc.nsyms]; - strtab = bytes[lc.stroff..][0..lc.strsize]; + const symbols = @as([*]align(1) const macho.nlist_64, @ptrCast(bytes.ptr + lc.symoff))[0..lc.nsyms]; + const strings = bytes[lc.stroff..][0..lc.strsize]; + symtab = .{ .symbols = symbols, .strings = strings }; }, .LOAD_DYLIB, .LOAD_WEAK_DYLIB, @@ -473,53 +592,8 @@ const MachODumper = struct { i += 1; } - if (opts.dump_symtab) { - try writer.print("{s}\n", .{symtab_label}); - for (symtab) |sym| { - if (sym.stab()) continue; - const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + sym.n_strx)), 0); - if (sym.sect()) { - const sect = sections.items[sym.n_sect - 1]; - try writer.print("{x} ({s},{s})", .{ - sym.n_value, - sect.segName(), - sect.sectName(), - }); - if (sym.ext()) { - try writer.writeAll(" external"); - } - try writer.print(" {s}\n", .{sym_name}); - } else if (sym.undf()) { - const ordinal = @divTrunc(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); - const import_name = blk: { - if (ordinal <= 0) { - if (ordinal == macho.BIND_SPECIAL_DYLIB_SELF) - break :blk "self import"; - if (ordinal == macho.BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE) - break :blk "main executable"; - if (ordinal == macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP) - break :blk "flat lookup"; - unreachable; - } - const full_path = imports.items[@as(u16, @bitCast(ordinal)) - 1]; - const basename = fs.path.basename(full_path); - assert(basename.len > 0); - const ext = mem.lastIndexOfScalar(u8, basename, '.') orelse basename.len; - break :blk basename[0..ext]; - }; - try writer.writeAll("(undefined)"); - if (sym.weakRef()) { - try writer.writeAll(" weak"); - } - if (sym.ext()) { - try writer.writeAll(" external"); - } - try writer.print(" {s} (from {s})\n", .{ - sym_name, - import_name, - }); - } else unreachable; - } + if (symtab) |stab| { + try dumpSymtab(sections.items, imports.items, stab, writer); } return output.toOwnedSlice(); @@ -696,10 +770,67 @@ const MachODumper = struct { else => {}, } } + + fn dumpSymtab( + sections: []const macho.section_64, + imports: []const []const u8, + symtab: Symtab, + writer: anytype, + ) !void { + try writer.writeAll(symtab_label ++ "\n"); + + for (symtab.symbols) |sym| { + if (sym.stab()) continue; + const sym_name = mem.sliceTo(@as([*:0]const u8, @ptrCast(symtab.strings.ptr + sym.n_strx)), 0); + if (sym.sect()) { + const sect = sections[sym.n_sect - 1]; + try writer.print("{x} ({s},{s})", .{ + sym.n_value, + sect.segName(), + sect.sectName(), + }); + if (sym.ext()) { + try writer.writeAll(" external"); + } + try writer.print(" {s}\n", .{sym_name}); + } else if (sym.undf()) { + const ordinal = @divTrunc(@as(i16, @bitCast(sym.n_desc)), macho.N_SYMBOL_RESOLVER); + const import_name = blk: { + if (ordinal <= 0) { + if (ordinal == macho.BIND_SPECIAL_DYLIB_SELF) + break :blk "self import"; + if (ordinal == macho.BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE) + break :blk "main executable"; + if (ordinal == macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP) + break :blk "flat lookup"; + unreachable; + } + const full_path = imports[@as(u16, @bitCast(ordinal)) - 1]; + const basename = fs.path.basename(full_path); + assert(basename.len > 0); + const ext = mem.lastIndexOfScalar(u8, basename, '.') orelse basename.len; + break :blk basename[0..ext]; + }; + try writer.writeAll("(undefined)"); + if (sym.weakRef()) { + try writer.writeAll(" weak"); + } + if (sym.ext()) { + try writer.writeAll(" external"); + } + try writer.print(" {s} (from {s})\n", .{ + sym_name, + import_name, + }); + } else unreachable; + } + } }; const ElfDumper = struct { - const symtab_label = "symtab"; + const symtab_label = "symbol table"; + const dynamic_symtab_label = "dynamic symbol table"; + const dynamic_section_label = "dynamic section"; const Symtab = struct { symbols: []align(1) const elf.Elf64_Sym, @@ -712,8 +843,7 @@ const ElfDumper = struct { fn getName(st: Symtab, index: usize) ?[]const u8 { const sym = st.get(index) orelse return null; - assert(sym.st_name < st.strings.len); - return mem.sliceTo(@ptrCast(st.strings.ptr + sym.st_name), 0); + return getString(st.strings, sym.st_name); } }; @@ -728,7 +858,7 @@ const ElfDumper = struct { dysymtab: ?Symtab = null, }; - fn parseAndDump(step: *Step, bytes: []const u8, opts: Opts) ![]const u8 { + fn parseAndDump(step: *Step, bytes: []const u8) ![]const u8 { const gpa = step.owner.allocator; var stream = std.io.fixedBufferStream(bytes); const reader = stream.reader(); @@ -751,34 +881,32 @@ const ElfDumper = struct { }; ctx.shstrtab = getSectionContents(ctx, ctx.hdr.e_shstrndx); - if (opts.dump_symtab) { - for (ctx.shdrs, 0..) |shdr, i| switch (shdr.sh_type) { - elf.SHT_SYMTAB, elf.SHT_DYNSYM => { - const raw = getSectionContents(ctx, i); - const nsyms = @divExact(raw.len, @sizeOf(elf.Elf64_Sym)); - const symbols = @as([*]align(1) const elf.Elf64_Sym, @ptrCast(raw.ptr))[0..nsyms]; - const strings = getSectionContents(ctx, shdr.sh_link); - - switch (shdr.sh_type) { - elf.SHT_SYMTAB => { - ctx.symtab = .{ - .symbols = symbols, - .strings = strings, - }; - }, - elf.SHT_DYNSYM => { - ctx.dysymtab = .{ - .symbols = symbols, - .strings = strings, - }; - }, - else => unreachable, - } - }, + for (ctx.shdrs, 0..) |shdr, i| switch (shdr.sh_type) { + elf.SHT_SYMTAB, elf.SHT_DYNSYM => { + const raw = getSectionContents(ctx, i); + const nsyms = @divExact(raw.len, @sizeOf(elf.Elf64_Sym)); + const symbols = @as([*]align(1) const elf.Elf64_Sym, @ptrCast(raw.ptr))[0..nsyms]; + const strings = getSectionContents(ctx, shdr.sh_link); + + switch (shdr.sh_type) { + elf.SHT_SYMTAB => { + ctx.symtab = .{ + .symbols = symbols, + .strings = strings, + }; + }, + elf.SHT_DYNSYM => { + ctx.dysymtab = .{ + .symbols = symbols, + .strings = strings, + }; + }, + else => unreachable, + } + }, - else => {}, - }; - } + else => {}, + }; var output = std.ArrayList(u8).init(gpa); const writer = output.writer(); @@ -786,14 +914,16 @@ const ElfDumper = struct { try dumpHeader(ctx, writer); try dumpShdrs(ctx, writer); try dumpPhdrs(ctx, writer); + try dumpDynamicSection(ctx, writer); + try dumpSymtab(ctx, .symtab, writer); + try dumpSymtab(ctx, .dysymtab, writer); return output.toOwnedSlice(); } - fn getSectionName(ctx: Context, shndx: usize) []const u8 { + inline fn getSectionName(ctx: Context, shndx: usize) []const u8 { const shdr = ctx.shdrs[shndx]; - assert(shdr.sh_name < ctx.shstrtab.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.shstrtab.ptr + shdr.sh_name)), 0); + return getString(ctx.shstrtab, shdr.sh_name); } fn getSectionContents(ctx: Context, shndx: usize) []const u8 { @@ -803,6 +933,17 @@ const ElfDumper = struct { return ctx.data[shdr.sh_offset..][0..shdr.sh_size]; } + fn getSectionByName(ctx: Context, name: []const u8) ?usize { + for (0..ctx.shdrs.len) |shndx| { + if (mem.eql(u8, getSectionName(ctx, shndx), name)) return shndx; + } else return null; + } + + fn getString(strtab: []const u8, off: u32) []const u8 { + assert(off < strtab.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(strtab.ptr + off)), 0); + } + fn dumpHeader(ctx: Context, writer: anytype) !void { try writer.writeAll("header\n"); try writer.print("type {s}\n", .{@tagName(ctx.hdr.e_type)}); @@ -812,6 +953,8 @@ const ElfDumper = struct { fn dumpShdrs(ctx: Context, writer: anytype) !void { if (ctx.shdrs.len == 0) return; + try writer.writeAll("section headers\n"); + for (ctx.shdrs, 0..) |shdr, shndx| { try writer.print("shdr {d}\n", .{shndx}); try writer.print("name {s}\n", .{getSectionName(ctx, shndx)}); @@ -824,6 +967,145 @@ const ElfDumper = struct { } } + fn dumpDynamicSection(ctx: Context, writer: anytype) !void { + const shndx = getSectionByName(ctx, ".dynamic") orelse return; + const shdr = ctx.shdrs[shndx]; + const strtab = getSectionContents(ctx, shdr.sh_link); + const data = getSectionContents(ctx, shndx); + const nentries = @divExact(data.len, @sizeOf(elf.Elf64_Dyn)); + const entries = @as([*]align(1) const elf.Elf64_Dyn, @ptrCast(data.ptr))[0..nentries]; + + try writer.writeAll(ElfDumper.dynamic_section_label ++ "\n"); + + for (entries) |entry| { + const key = @as(u64, @bitCast(entry.d_tag)); + const value = entry.d_val; + + const key_str = switch (key) { + elf.DT_NEEDED => "NEEDED", + elf.DT_SONAME => "SONAME", + elf.DT_INIT_ARRAY => "INIT_ARRAY", + elf.DT_INIT_ARRAYSZ => "INIT_ARRAYSZ", + elf.DT_FINI_ARRAY => "FINI_ARRAY", + elf.DT_FINI_ARRAYSZ => "FINI_ARRAYSZ", + elf.DT_HASH => "HASH", + elf.DT_GNU_HASH => "GNU_HASH", + elf.DT_STRTAB => "STRTAB", + elf.DT_SYMTAB => "SYMTAB", + elf.DT_STRSZ => "STRSZ", + elf.DT_SYMENT => "SYMENT", + elf.DT_PLTGOT => "PLTGOT", + elf.DT_PLTRELSZ => "PLTRELSZ", + elf.DT_PLTREL => "PLTREL", + elf.DT_JMPREL => "JMPREL", + elf.DT_RELA => "RELA", + elf.DT_RELASZ => "RELASZ", + elf.DT_RELAENT => "RELAENT", + elf.DT_VERDEF => "VERDEF", + elf.DT_VERDEFNUM => "VERDEFNUM", + elf.DT_FLAGS => "FLAGS", + elf.DT_FLAGS_1 => "FLAGS_1", + elf.DT_VERNEED => "VERNEED", + elf.DT_VERNEEDNUM => "VERNEEDNUM", + elf.DT_VERSYM => "VERSYM", + elf.DT_RELACOUNT => "RELACOUNT", + elf.DT_RPATH => "RPATH", + elf.DT_RUNPATH => "RUNPATH", + elf.DT_INIT => "INIT", + elf.DT_FINI => "FINI", + elf.DT_NULL => "NULL", + else => "UNKNOWN", + }; + try writer.print("{s}", .{key_str}); + + switch (key) { + elf.DT_NEEDED, + elf.DT_SONAME, + elf.DT_RPATH, + elf.DT_RUNPATH, + => { + const name = getString(strtab, @intCast(value)); + try writer.print(" {s}", .{name}); + }, + + elf.DT_INIT_ARRAY, + elf.DT_FINI_ARRAY, + elf.DT_HASH, + elf.DT_GNU_HASH, + elf.DT_STRTAB, + elf.DT_SYMTAB, + elf.DT_PLTGOT, + elf.DT_JMPREL, + elf.DT_RELA, + elf.DT_VERDEF, + elf.DT_VERNEED, + elf.DT_VERSYM, + elf.DT_INIT, + elf.DT_FINI, + elf.DT_NULL, + => try writer.print(" {x}", .{value}), + + elf.DT_INIT_ARRAYSZ, + elf.DT_FINI_ARRAYSZ, + elf.DT_STRSZ, + elf.DT_SYMENT, + elf.DT_PLTRELSZ, + elf.DT_RELASZ, + elf.DT_RELAENT, + elf.DT_RELACOUNT, + => try writer.print(" {d}", .{value}), + + elf.DT_PLTREL => try writer.writeAll(switch (value) { + elf.DT_REL => " REL", + elf.DT_RELA => " RELA", + else => " UNKNOWN", + }), + + elf.DT_FLAGS => if (value > 0) { + if (value & elf.DF_ORIGIN != 0) try writer.writeAll(" ORIGIN"); + if (value & elf.DF_SYMBOLIC != 0) try writer.writeAll(" SYMBOLIC"); + if (value & elf.DF_TEXTREL != 0) try writer.writeAll(" TEXTREL"); + if (value & elf.DF_BIND_NOW != 0) try writer.writeAll(" BIND_NOW"); + if (value & elf.DF_STATIC_TLS != 0) try writer.writeAll(" STATIC_TLS"); + }, + + elf.DT_FLAGS_1 => if (value > 0) { + if (value & elf.DF_1_NOW != 0) try writer.writeAll(" NOW"); + if (value & elf.DF_1_GLOBAL != 0) try writer.writeAll(" GLOBAL"); + if (value & elf.DF_1_GROUP != 0) try writer.writeAll(" GROUP"); + if (value & elf.DF_1_NODELETE != 0) try writer.writeAll(" NODELETE"); + if (value & elf.DF_1_LOADFLTR != 0) try writer.writeAll(" LOADFLTR"); + if (value & elf.DF_1_INITFIRST != 0) try writer.writeAll(" INITFIRST"); + if (value & elf.DF_1_NOOPEN != 0) try writer.writeAll(" NOOPEN"); + if (value & elf.DF_1_ORIGIN != 0) try writer.writeAll(" ORIGIN"); + if (value & elf.DF_1_DIRECT != 0) try writer.writeAll(" DIRECT"); + if (value & elf.DF_1_TRANS != 0) try writer.writeAll(" TRANS"); + if (value & elf.DF_1_INTERPOSE != 0) try writer.writeAll(" INTERPOSE"); + if (value & elf.DF_1_NODEFLIB != 0) try writer.writeAll(" NODEFLIB"); + if (value & elf.DF_1_NODUMP != 0) try writer.writeAll(" NODUMP"); + if (value & elf.DF_1_CONFALT != 0) try writer.writeAll(" CONFALT"); + if (value & elf.DF_1_ENDFILTEE != 0) try writer.writeAll(" ENDFILTEE"); + if (value & elf.DF_1_DISPRELDNE != 0) try writer.writeAll(" DISPRELDNE"); + if (value & elf.DF_1_DISPRELPND != 0) try writer.writeAll(" DISPRELPND"); + if (value & elf.DF_1_NODIRECT != 0) try writer.writeAll(" NODIRECT"); + if (value & elf.DF_1_IGNMULDEF != 0) try writer.writeAll(" IGNMULDEF"); + if (value & elf.DF_1_NOKSYMS != 0) try writer.writeAll(" NOKSYMS"); + if (value & elf.DF_1_NOHDR != 0) try writer.writeAll(" NOHDR"); + if (value & elf.DF_1_EDITED != 0) try writer.writeAll(" EDITED"); + if (value & elf.DF_1_NORELOC != 0) try writer.writeAll(" NORELOC"); + if (value & elf.DF_1_SYMINTPOSE != 0) try writer.writeAll(" SYMINTPOSE"); + if (value & elf.DF_1_GLOBAUDIT != 0) try writer.writeAll(" GLOBAUDIT"); + if (value & elf.DF_1_SINGLETON != 0) try writer.writeAll(" SINGLETON"); + if (value & elf.DF_1_STUB != 0) try writer.writeAll(" STUB"); + if (value & elf.DF_1_PIE != 0) try writer.writeAll(" PIE"); + }, + + else => try writer.print(" {x}", .{value}), + } + try writer.writeByte('\n'); + } + } + fn fmtShType(sh_type: u32) std.fmt.Formatter(formatShType) { return .{ .data = sh_type }; } @@ -836,46 +1118,46 @@ const ElfDumper = struct { ) !void { _ = unused_fmt_string; _ = options; - if (elf.SHT_LOOS <= sh_type and sh_type < elf.SHT_HIOS) { - try writer.print("LOOS+0x{x}", .{sh_type - elf.SHT_LOOS}); - } else if (elf.SHT_LOPROC <= sh_type and sh_type < elf.SHT_HIPROC) { - try writer.print("LOPROC+0x{x}", .{sh_type - elf.SHT_LOPROC}); - } else if (elf.SHT_LOUSER <= sh_type and sh_type < elf.SHT_HIUSER) { - try writer.print("LOUSER+0x{x}", .{sh_type - elf.SHT_LOUSER}); - } else { - const name = switch (sh_type) { - elf.SHT_NULL => "NULL", - elf.SHT_PROGBITS => "PROGBITS", - elf.SHT_SYMTAB => "SYMTAB", - elf.SHT_STRTAB => "STRTAB", - elf.SHT_RELA => "RELA", - elf.SHT_HASH => "HASH", - elf.SHT_DYNAMIC => "DYNAMIC", - elf.SHT_NOTE => "NOTE", - elf.SHT_NOBITS => "NOBITS", - elf.SHT_REL => "REL", - elf.SHT_SHLIB => "SHLIB", - elf.SHT_DYNSYM => "DYNSYM", - elf.SHT_INIT_ARRAY => "INIT_ARRAY", - elf.SHT_FINI_ARRAY => "FINI_ARRAY", - elf.SHT_PREINIT_ARRAY => "PREINIT_ARRAY", - elf.SHT_GROUP => "GROUP", - elf.SHT_SYMTAB_SHNDX => "SYMTAB_SHNDX", - elf.SHT_X86_64_UNWIND => "X86_64_UNWIND", - elf.SHT_LLVM_ADDRSIG => "LLVM_ADDRSIG", - elf.SHT_GNU_HASH => "GNU_HASH", - elf.SHT_GNU_VERDEF => "VERDEF", - elf.SHT_GNU_VERNEED => "VERNEED", - elf.SHT_GNU_VERSYM => "VERSYM", - else => "UNKNOWN", - }; - try writer.writeAll(name); - } + const name = switch (sh_type) { + elf.SHT_NULL => "NULL", + elf.SHT_PROGBITS => "PROGBITS", + elf.SHT_SYMTAB => "SYMTAB", + elf.SHT_STRTAB => "STRTAB", + elf.SHT_RELA => "RELA", + elf.SHT_HASH => "HASH", + elf.SHT_DYNAMIC => "DYNAMIC", + elf.SHT_NOTE => "NOTE", + elf.SHT_NOBITS => "NOBITS", + elf.SHT_REL => "REL", + elf.SHT_SHLIB => "SHLIB", + elf.SHT_DYNSYM => "DYNSYM", + elf.SHT_INIT_ARRAY => "INIT_ARRAY", + elf.SHT_FINI_ARRAY => "FINI_ARRAY", + elf.SHT_PREINIT_ARRAY => "PREINIT_ARRAY", + elf.SHT_GROUP => "GROUP", + elf.SHT_SYMTAB_SHNDX => "SYMTAB_SHNDX", + elf.SHT_X86_64_UNWIND => "X86_64_UNWIND", + elf.SHT_LLVM_ADDRSIG => "LLVM_ADDRSIG", + elf.SHT_GNU_HASH => "GNU_HASH", + elf.SHT_GNU_VERDEF => "VERDEF", + elf.SHT_GNU_VERNEED => "VERNEED", + elf.SHT_GNU_VERSYM => "VERSYM", + else => if (elf.SHT_LOOS <= sh_type and sh_type < elf.SHT_HIOS) { + return try writer.print("LOOS+0x{x}", .{sh_type - elf.SHT_LOOS}); + } else if (elf.SHT_LOPROC <= sh_type and sh_type < elf.SHT_HIPROC) { + return try writer.print("LOPROC+0x{x}", .{sh_type - elf.SHT_LOPROC}); + } else if (elf.SHT_LOUSER <= sh_type and sh_type < elf.SHT_HIUSER) { + return try writer.print("LOUSER+0x{x}", .{sh_type - elf.SHT_LOUSER}); + } else "UNKNOWN", + }; + try writer.writeAll(name); } fn dumpPhdrs(ctx: Context, writer: anytype) !void { if (ctx.phdrs.len == 0) return; + try writer.writeAll("program headers\n"); + for (ctx.phdrs, 0..) |phdr, phndx| { try writer.print("phdr {d}\n", .{phndx}); try writer.print("type {s}\n", .{fmtPhType(phdr.p_type)}); @@ -885,7 +1167,28 @@ const ElfDumper = struct { try writer.print("memsz {x}\n", .{phdr.p_memsz}); try writer.print("filesz {x}\n", .{phdr.p_filesz}); try writer.print("align {x}\n", .{phdr.p_align}); - // TODO dump formatted p_flags + + { + const flags = phdr.p_flags; + try writer.writeAll("flags"); + if (flags > 0) try writer.writeByte(' '); + if (flags & elf.PF_R != 0) { + try writer.writeByte('R'); + } + if (flags & elf.PF_W != 0) { + try writer.writeByte('W'); + } + if (flags & elf.PF_X != 0) { + try writer.writeByte('E'); + } + if (flags & elf.PF_MASKOS != 0) { + try writer.writeAll("OS"); + } + if (flags & elf.PF_MASKPROC != 0) { + try writer.writeAll("PROC"); + } + try writer.writeByte('\n'); + } } } @@ -901,27 +1204,107 @@ const ElfDumper = struct { ) !void { _ = unused_fmt_string; _ = options; - if (elf.PT_LOOS <= ph_type and ph_type < elf.PT_HIOS) { - try writer.print("LOOS+0x{x}", .{ph_type - elf.PT_LOOS}); - } else if (elf.PT_LOPROC <= ph_type and ph_type < elf.PT_HIPROC) { - try writer.print("LOPROC+0x{x}", .{ph_type - elf.PT_LOPROC}); - } else { - const p_type = switch (ph_type) { - elf.PT_NULL => "NULL", - elf.PT_LOAD => "LOAD", - elf.PT_DYNAMIC => "DYNAMIC", - elf.PT_INTERP => "INTERP", - elf.PT_NOTE => "NOTE", - elf.PT_SHLIB => "SHLIB", - elf.PT_PHDR => "PHDR", - elf.PT_TLS => "TLS", - elf.PT_NUM => "NUM", - elf.PT_GNU_EH_FRAME => "GNU_EH_FRAME", - elf.PT_GNU_STACK => "GNU_STACK", - elf.PT_GNU_RELRO => "GNU_RELRO", - else => "UNKNOWN", + const p_type = switch (ph_type) { + elf.PT_NULL => "NULL", + elf.PT_LOAD => "LOAD", + elf.PT_DYNAMIC => "DYNAMIC", + elf.PT_INTERP => "INTERP", + elf.PT_NOTE => "NOTE", + elf.PT_SHLIB => "SHLIB", + elf.PT_PHDR => "PHDR", + elf.PT_TLS => "TLS", + elf.PT_NUM => "NUM", + elf.PT_GNU_EH_FRAME => "GNU_EH_FRAME", + elf.PT_GNU_STACK => "GNU_STACK", + elf.PT_GNU_RELRO => "GNU_RELRO", + else => if (elf.PT_LOOS <= ph_type and ph_type < elf.PT_HIOS) { + return try writer.print("LOOS+0x{x}", .{ph_type - elf.PT_LOOS}); + } else if (elf.PT_LOPROC <= ph_type and ph_type < elf.PT_HIPROC) { + return try writer.print("LOPROC+0x{x}", .{ph_type - elf.PT_LOPROC}); + } else "UNKNOWN", + }; + try writer.writeAll(p_type); + } + + fn dumpSymtab(ctx: Context, comptime @"type": enum { symtab, dysymtab }, writer: anytype) !void { + const symtab = switch (@"type") { + .symtab => ctx.symtab, + .dysymtab => ctx.dysymtab, + } orelse return; + + try writer.writeAll(switch (@"type") { + .symtab => symtab_label, + .dysymtab => dynamic_symtab_label, + } ++ "\n"); + + for (symtab.symbols, 0..) |sym, index| { + try writer.print("{x} {x}", .{ sym.st_value, sym.st_size }); + + { + if (elf.SHN_LORESERVE <= sym.st_shndx and sym.st_shndx < elf.SHN_HIRESERVE) { + if (elf.SHN_LOPROC <= sym.st_shndx and sym.st_shndx < elf.SHN_HIPROC) { + try writer.print(" LO+{d}", .{sym.st_shndx - elf.SHN_LOPROC}); + } else { + const sym_ndx = &switch (sym.st_shndx) { + elf.SHN_ABS => "ABS", + elf.SHN_COMMON => "COM", + elf.SHN_LIVEPATCH => "LIV", + else => "UNK", + }; + try writer.print(" {s}", .{sym_ndx}); + } + } else if (sym.st_shndx == elf.SHN_UNDEF) { + try writer.writeAll(" UND"); + } else { + try writer.print(" {x}", .{sym.st_shndx}); + } + } + + blk: { + const tt = sym.st_type(); + const sym_type = switch (tt) { + elf.STT_NOTYPE => "NOTYPE", + elf.STT_OBJECT => "OBJECT", + elf.STT_FUNC => "FUNC", + elf.STT_SECTION => "SECTION", + elf.STT_FILE => "FILE", + elf.STT_COMMON => "COMMON", + elf.STT_TLS => "TLS", + elf.STT_NUM => "NUM", + elf.STT_GNU_IFUNC => "IFUNC", + else => if (elf.STT_LOPROC <= tt and tt < elf.STT_HIPROC) { + break :blk try writer.print(" LOPROC+{d}", .{tt - elf.STT_LOPROC}); + } else if (elf.STT_LOOS <= tt and tt < elf.STT_HIOS) { + break :blk try writer.print(" LOOS+{d}", .{tt - elf.STT_LOOS}); + } else "UNK", + }; + try writer.print(" {s}", .{sym_type}); + } + + blk: { + const bind = sym.st_bind(); + const sym_bind = switch (bind) { + elf.STB_LOCAL => "LOCAL", + elf.STB_GLOBAL => "GLOBAL", + elf.STB_WEAK => "WEAK", + elf.STB_NUM => "NUM", + else => if (elf.STB_LOPROC <= bind and bind < elf.STB_HIPROC) { + break :blk try writer.print(" LOPROC+{d}", .{bind - elf.STB_LOPROC}); + } else if (elf.STB_LOOS <= bind and bind < elf.STB_HIOS) { + break :blk try writer.print(" LOOS+{d}", .{bind - elf.STB_LOOS}); + } else "UNKNOWN", + }; + try writer.print(" {s}", .{sym_bind}); + } + + const sym_vis = @as(elf.STV, @enumFromInt(sym.st_other)); + try writer.print(" {s}", .{@tagName(sym_vis)}); + + const sym_name = switch (sym.st_type()) { + elf.STT_SECTION => getSectionName(ctx, sym.st_shndx), + else => symtab.getName(index).?, }; - try writer.writeAll(p_type); + try writer.print(" {s}\n", .{sym_name}); } } }; @@ -929,12 +1312,8 @@ const ElfDumper = struct { const WasmDumper = struct { const symtab_label = "symbols"; - fn parseAndDump(step: *Step, bytes: []const u8, opts: Opts) ![]const u8 { + fn parseAndDump(step: *Step, bytes: []const u8) ![]const u8 { const gpa = step.owner.allocator; - if (opts.dump_symtab) { - @panic("TODO: Implement symbol table parsing and dumping"); - } - var fbs = std.io.fixedBufferStream(bytes); const reader = fbs.reader(); |
