diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2022-06-24 00:02:12 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-06-24 00:02:12 +0200 |
| commit | 291c08f7b0ea4e333c37a0ac378176891f255fa0 (patch) | |
| tree | ee9571bf196c1fce5ec9d298dffc103c66b4d3ab /lib/std/build/CheckObjectStep.zig | |
| parent | 87d8cb19e4eed905b93d39554ea9a2a1012f6668 (diff) | |
| parent | 03ddb42b8bb96815c1bb4b857ffdfb94191ab861 (diff) | |
| download | zig-291c08f7b0ea4e333c37a0ac378176891f255fa0.tar.gz zig-291c08f7b0ea4e333c37a0ac378176891f255fa0.zip | |
Merge pull request #11910 from ziglang/linker-tests
Diffstat (limited to 'lib/std/build/CheckObjectStep.zig')
| -rw-r--r-- | lib/std/build/CheckObjectStep.zig | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/lib/std/build/CheckObjectStep.zig b/lib/std/build/CheckObjectStep.zig new file mode 100644 index 0000000000..65a57f8832 --- /dev/null +++ b/lib/std/build/CheckObjectStep.zig @@ -0,0 +1,392 @@ +const std = @import("../std.zig"); +const assert = std.debug.assert; +const build = std.build; +const fs = std.fs; +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; + +const CheckObjectStep = @This(); + +const Allocator = mem.Allocator; +const Builder = build.Builder; +const Step = build.Step; + +pub const base_id = .check_obj; + +step: Step, +builder: *Builder, +source: build.FileSource, +max_bytes: usize = 20 * 1024 * 1024, +checks: std.ArrayList(Check), +dump_symtab: bool = false, +obj_format: std.Target.ObjectFormat, + +pub fn create(builder: *Builder, source: build.FileSource, obj_format: std.Target.ObjectFormat) *CheckObjectStep { + const gpa = builder.allocator; + const self = gpa.create(CheckObjectStep) catch unreachable; + self.* = .{ + .builder = builder, + .step = Step.init(.check_file, "CheckObject", gpa, make), + .source = source.dupe(builder), + .checks = std.ArrayList(Check).init(gpa), + .obj_format = obj_format, + }; + self.source.addStepDependencies(&self.step); + return self; +} + +const Action = union(enum) { + match: MatchAction, + compute_eq: ComputeEqAction, +}; + +/// MatchAction is the main building block of standard matchers with optional eat-all token `{*}` +/// and extractors by name such as `{n_value}`. Please note this action is very simplistic in nature +/// i.e., it won't really handle edge cases/nontrivial examples. But given that we do want to use +/// it mainly to test the output of our object format parser-dumpers when testing the linkers, etc. +/// it should be plenty useful in its current form. +const MatchAction = struct { + needle: []const u8, + + /// Will return true if the `needle` was found in the `haystack`. + /// Some examples include: + /// + /// LC 0 => will match in its entirety + /// vmaddr {vmaddr} => will match `vmaddr` and then extract the following value as u64 + /// and save under `vmaddr` global name (see `global_vars` param) + /// name {*}libobjc{*}.dylib => will match `name` followed by a token which contains `libobjc` and `.dylib` + /// in that order with other letters in between + fn match(act: MatchAction, haystack: []const u8, global_vars: anytype) !bool { + var hay_it = mem.tokenize(u8, mem.trim(u8, haystack, " "), " "); + var needle_it = mem.tokenize(u8, mem.trim(u8, act.needle, " "), " "); + + while (needle_it.next()) |needle_tok| { + const hay_tok = hay_it.next() orelse return false; + + if (mem.indexOf(u8, needle_tok, "{*}")) |index| { + // We have fuzzy matchers within the search pattern, so we match substrings. + var start = index; + var n_tok = needle_tok; + var h_tok = hay_tok; + while (true) { + n_tok = n_tok[start + 3 ..]; + const inner = if (mem.indexOf(u8, n_tok, "{*}")) |sub_end| + n_tok[0..sub_end] + else + n_tok; + if (mem.indexOf(u8, h_tok, inner) == null) return false; + start = mem.indexOf(u8, n_tok, "{*}") orelse break; + } + } else if (mem.startsWith(u8, needle_tok, "{")) { + const closing_brace = mem.indexOf(u8, needle_tok, "}") orelse return error.MissingClosingBrace; + if (closing_brace != needle_tok.len - 1) return error.ClosingBraceNotLast; + + const name = needle_tok[1..closing_brace]; + if (name.len == 0) return error.MissingBraceValue; + const value = try std.fmt.parseInt(u64, hay_tok, 16); + try global_vars.putNoClobber(name, value); + } else { + if (!mem.eql(u8, hay_tok, needle_tok)) return false; + } + } + + return true; + } +}; + +/// ComputeEqAction can be used to perform an operation on the extracted global variables +/// using the MatchAction. It currently only supports an addition. The operation is required +/// to be specified in Reverse Polish Notation to ease in operator-precedence parsing (well, +/// to avoid any parsing really). +/// For example, if the two extracted values were saved as `vmaddr` and `entryoff` respectively +/// they could then be added with this simple program `vmaddr entryoff +`. +const ComputeEqAction = struct { + expected: []const u8, + var_stack: std.ArrayList([]const u8), + op_stack: std.ArrayList(Op), + + const Op = enum { + add, + }; +}; + +const Check = struct { + builder: *Builder, + actions: std.ArrayList(Action), + + fn create(b: *Builder) Check { + return .{ + .builder = b, + .actions = std.ArrayList(Action).init(b.allocator), + }; + } + + fn match(self: *Check, needle: []const u8) void { + self.actions.append(.{ + .match = .{ .needle = self.builder.dupe(needle) }, + }) catch unreachable; + } + + fn computeEq(self: *Check, act: ComputeEqAction) void { + self.actions.append(.{ + .compute_eq = act, + }) catch unreachable; + } +}; + +/// Creates a new sequence of actions with `phrase` as the first anchor searched phrase. +pub fn checkStart(self: *CheckObjectStep, phrase: []const u8) void { + var new_check = Check.create(self.builder); + new_check.match(phrase); + self.checks.append(new_check) catch unreachable; +} + +/// Adds another searched phrase to the latest created Check with `CheckObjectStep.checkStart(...)`. +/// Asserts at least one check already exists. +pub fn checkNext(self: *CheckObjectStep, phrase: []const u8) void { + assert(self.checks.items.len > 0); + const last = &self.checks.items[self.checks.items.len - 1]; + last.match(phrase); +} + +/// Creates a new check checking specifically symbol table parsed and dumped from the object +/// file. +/// Issuing this check will force parsing and dumping of the symbol table. +pub fn checkInSymtab(self: *CheckObjectStep) void { + self.dump_symtab = true; + const symtab_label = switch (self.obj_format) { + .macho => MachODumper.symtab_label, + else => @panic("TODO other parsers"), + }; + self.checkStart(symtab_label); +} + +/// Creates a new standalone, singular check which allows running simple binary operations +/// on the extracted variables. It will then compare the reduced program with the value of +/// the expected variable. +pub fn checkComputeEq(self: *CheckObjectStep, program: []const u8, expected: []const u8) void { + const gpa = self.builder.allocator; + var ca = ComputeEqAction{ + .expected = expected, + .var_stack = std.ArrayList([]const u8).init(gpa), + .op_stack = std.ArrayList(ComputeEqAction.Op).init(gpa), + }; + + var it = mem.tokenize(u8, program, " "); + while (it.next()) |next| { + if (mem.eql(u8, next, "+")) { + ca.op_stack.append(.add) catch unreachable; + } else { + ca.var_stack.append(self.builder.dupe(next)) catch unreachable; + } + } + + var new_check = Check.create(self.builder); + new_check.computeEq(ca); + self.checks.append(new_check) catch unreachable; +} + +fn make(step: *Step) !void { + const self = @fieldParentPtr(CheckObjectStep, "step", step); + + const gpa = self.builder.allocator; + const src_path = self.source.getPath(self.builder); + const contents = try fs.cwd().readFileAlloc(gpa, src_path, self.max_bytes); + + const output = switch (self.obj_format) { + .macho => try MachODumper.parseAndDump(contents, .{ + .gpa = gpa, + .dump_symtab = self.dump_symtab, + }), + .elf => @panic("TODO elf parser"), + .coff => @panic("TODO coff parser"), + .wasm => @panic("TODO wasm parser"), + else => unreachable, + }; + + var vars = std.StringHashMap(u64).init(gpa); + + for (self.checks.items) |chk| { + var it = mem.tokenize(u8, output, "\r\n"); + for (chk.actions.items) |act| { + switch (act) { + .match => |match_act| { + while (it.next()) |line| { + if (try match_act.match(line, &vars)) break; + } else { + std.debug.print( + \\ + \\========= Expected to find: ========================== + \\{s} + \\========= But parsed file does not contain it: ======= + \\{s} + \\ + , .{ match_act.needle, output }); + return error.TestFailed; + } + }, + .compute_eq => |c_eq| { + var values = std.ArrayList(u64).init(gpa); + try values.ensureTotalCapacity(c_eq.var_stack.items.len); + for (c_eq.var_stack.items) |vv| { + const val = vars.get(vv) orelse { + std.debug.print( + \\ + \\========= Variable was not extracted: =========== + \\{s} + \\========= From parsed file: ===================== + \\{s} + \\ + , .{ vv, output }); + return error.TestFailed; + }; + values.appendAssumeCapacity(val); + } + + var op_i: usize = 1; + var reduced: u64 = values.items[0]; + for (c_eq.op_stack.items) |op| { + const other = values.items[op_i]; + switch (op) { + .add => { + reduced += other; + }, + } + } + + const expected = vars.get(c_eq.expected) orelse { + std.debug.print( + \\ + \\========= Variable was not extracted: =========== + \\{s} + \\========= From parsed file: ===================== + \\{s} + \\ + , .{ c_eq.expected, output }); + return error.TestFailed; + }; + try testing.expectEqual(reduced, expected); + }, + } + } + } +} + +const Opts = struct { + gpa: ?Allocator = null, + dump_symtab: bool = false, +}; + +const MachODumper = struct { + const symtab_label = "symtab"; + + fn parseAndDump(bytes: []const u8, opts: Opts) ![]const u8 { + const gpa = opts.gpa orelse unreachable; // MachO dumper requires an allocator + var stream = std.io.fixedBufferStream(bytes); + const reader = stream.reader(); + + const hdr = try reader.readStruct(macho.mach_header_64); + if (hdr.magic != macho.MH_MAGIC_64) { + return error.InvalidMagicNumber; + } + + var output = std.ArrayList(u8).init(gpa); + const writer = output.writer(); + + var symtab_cmd: ?macho.symtab_command = null; + var i: u16 = 0; + while (i < hdr.ncmds) : (i += 1) { + var cmd = try macho.LoadCommand.read(gpa, reader); + + if (opts.dump_symtab and cmd.cmd() == .SYMTAB) { + symtab_cmd = cmd.symtab; + } + + try dumpLoadCommand(cmd, i, writer); + try writer.writeByte('\n'); + } + + if (symtab_cmd) |cmd| { + try writer.writeAll(symtab_label ++ "\n"); + const strtab = bytes[cmd.stroff..][0..cmd.strsize]; + const raw_symtab = bytes[cmd.symoff..][0 .. cmd.nsyms * @sizeOf(macho.nlist_64)]; + const symtab = mem.bytesAsSlice(macho.nlist_64, raw_symtab); + + for (symtab) |sym| { + if (sym.stab()) continue; + const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0); + try writer.print("{s} {x}\n", .{ sym_name, sym.n_value }); + } + } + + return output.toOwnedSlice(); + } + + fn dumpLoadCommand(lc: macho.LoadCommand, index: u16, writer: anytype) !void { + // print header first + try writer.print( + \\LC {d} + \\cmd {s} + \\cmdsize {d} + , .{ index, @tagName(lc.cmd()), lc.cmdsize() }); + + switch (lc.cmd()) { + .SEGMENT_64 => { + // TODO dump section headers + const seg = lc.segment.inner; + try writer.writeByte('\n'); + try writer.print( + \\segname {s} + \\vmaddr {x} + \\vmsize {x} + \\fileoff {x} + \\filesz {x} + , .{ + seg.segName(), + seg.vmaddr, + seg.vmsize, + seg.fileoff, + seg.filesize, + }); + }, + + .ID_DYLIB, + .LOAD_DYLIB, + => { + const dylib = lc.dylib.inner.dylib; + try writer.writeByte('\n'); + try writer.print( + \\name {s} + \\timestamp {d} + \\current version {x} + \\compatibility version {x} + , .{ + mem.sliceTo(lc.dylib.data, 0), + dylib.timestamp, + dylib.current_version, + dylib.compatibility_version, + }); + }, + + .MAIN => { + try writer.writeByte('\n'); + try writer.print( + \\entryoff {x} + \\stacksize {x} + , .{ lc.main.entryoff, lc.main.stacksize }); + }, + + .RPATH => { + try writer.writeByte('\n'); + try writer.print( + \\path {s} + , .{ + mem.sliceTo(lc.rpath.data, 0), + }); + }, + + else => {}, + } + } +}; |
