From 33de937fd91c64cd65894369cf7d92665a8e582e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 2 Mar 2024 16:01:29 -0700 Subject: move `zig objcopy` command to be lazily built part of #19063 --- src/objcopy.zig | 1357 ------------------------------------------------------- 1 file changed, 1357 deletions(-) delete mode 100644 src/objcopy.zig (limited to 'src/objcopy.zig') diff --git a/src/objcopy.zig b/src/objcopy.zig deleted file mode 100644 index 81638d4af0..0000000000 --- a/src/objcopy.zig +++ /dev/null @@ -1,1357 +0,0 @@ -const std = @import("std"); -const mem = std.mem; -const fs = std.fs; -const elf = std.elf; -const Allocator = std.mem.Allocator; -const File = std.fs.File; -const assert = std.debug.assert; - -const main = @import("main.zig"); -const fatal = main.fatal; -const Server = std.zig.Server; -const build_options = @import("build_options"); - -pub fn cmdObjCopy( - gpa: Allocator, - arena: Allocator, - args: []const []const u8, -) !void { - var i: usize = 0; - var opt_out_fmt: ?std.Target.ObjectFormat = null; - var opt_input: ?[]const u8 = null; - var opt_output: ?[]const u8 = null; - var opt_extract: ?[]const u8 = null; - var opt_add_debuglink: ?[]const u8 = null; - var only_section: ?[]const u8 = null; - var pad_to: ?u64 = null; - var strip_all: bool = false; - var strip_debug: bool = false; - var only_keep_debug: bool = false; - var compress_debug_sections: bool = false; - var listen = false; - while (i < args.len) : (i += 1) { - const arg = args[i]; - if (!mem.startsWith(u8, arg, "-")) { - if (opt_input == null) { - opt_input = arg; - } else if (opt_output == null) { - opt_output = arg; - } else { - fatal("unexpected positional argument: '{s}'", .{arg}); - } - } else if (mem.eql(u8, arg, "-h") or mem.eql(u8, arg, "--help")) { - return std.io.getStdOut().writeAll(usage); - } else if (mem.eql(u8, arg, "-O") or mem.eql(u8, arg, "--output-target")) { - i += 1; - if (i >= args.len) fatal("expected another argument after '{s}'", .{arg}); - const next_arg = args[i]; - if (mem.eql(u8, next_arg, "binary")) { - opt_out_fmt = .raw; - } else { - opt_out_fmt = std.meta.stringToEnum(std.Target.ObjectFormat, next_arg) orelse - fatal("invalid output format: '{s}'", .{next_arg}); - } - } else if (mem.startsWith(u8, arg, "--output-target=")) { - const next_arg = arg["--output-target=".len..]; - if (mem.eql(u8, next_arg, "binary")) { - opt_out_fmt = .raw; - } else { - opt_out_fmt = std.meta.stringToEnum(std.Target.ObjectFormat, next_arg) orelse - fatal("invalid output format: '{s}'", .{next_arg}); - } - } else if (mem.eql(u8, arg, "-j") or mem.eql(u8, arg, "--only-section")) { - i += 1; - if (i >= args.len) fatal("expected another argument after '{s}'", .{arg}); - only_section = args[i]; - } else if (mem.eql(u8, arg, "--listen=-")) { - listen = true; - } else if (mem.startsWith(u8, arg, "--only-section=")) { - only_section = arg["--only-section=".len..]; - } else if (mem.eql(u8, arg, "--pad-to")) { - i += 1; - if (i >= args.len) fatal("expected another argument after '{s}'", .{arg}); - pad_to = std.fmt.parseInt(u64, args[i], 0) catch |err| { - fatal("unable to parse: '{s}': {s}", .{ args[i], @errorName(err) }); - }; - } else if (mem.eql(u8, arg, "-g") or mem.eql(u8, arg, "--strip-debug")) { - strip_debug = true; - } else if (mem.eql(u8, arg, "-S") or mem.eql(u8, arg, "--strip-all")) { - strip_all = true; - } else if (mem.eql(u8, arg, "--only-keep-debug")) { - only_keep_debug = true; - } else if (mem.eql(u8, arg, "--compress-debug-sections")) { - compress_debug_sections = true; - } else if (mem.startsWith(u8, arg, "--add-gnu-debuglink=")) { - opt_add_debuglink = arg["--add-gnu-debuglink=".len..]; - } else if (mem.eql(u8, arg, "--add-gnu-debuglink")) { - i += 1; - if (i >= args.len) fatal("expected another argument after '{s}'", .{arg}); - opt_add_debuglink = args[i]; - } else if (mem.startsWith(u8, arg, "--extract-to=")) { - opt_extract = arg["--extract-to=".len..]; - } else if (mem.eql(u8, arg, "--extract-to")) { - i += 1; - if (i >= args.len) fatal("expected another argument after '{s}'", .{arg}); - opt_extract = args[i]; - } else { - fatal("unrecognized argument: '{s}'", .{arg}); - } - } - const input = opt_input orelse fatal("expected input parameter", .{}); - const output = opt_output orelse fatal("expected output parameter", .{}); - - var in_file = fs.cwd().openFile(input, .{}) catch |err| - fatal("unable to open '{s}': {s}", .{ input, @errorName(err) }); - defer in_file.close(); - - const elf_hdr = std.elf.Header.read(in_file) catch |err| switch (err) { - error.InvalidElfMagic => fatal("not an ELF file: '{s}'", .{input}), - else => fatal("unable to read '{s}': {s}", .{ input, @errorName(err) }), - }; - - const in_ofmt = .elf; - - const out_fmt: std.Target.ObjectFormat = opt_out_fmt orelse ofmt: { - if (mem.endsWith(u8, output, ".hex") or std.mem.endsWith(u8, output, ".ihex")) { - break :ofmt .hex; - } else if (mem.endsWith(u8, output, ".bin")) { - break :ofmt .raw; - } else if (mem.endsWith(u8, output, ".elf")) { - break :ofmt .elf; - } else { - break :ofmt in_ofmt; - } - }; - - const mode = mode: { - if (out_fmt != .elf or only_keep_debug) - break :mode fs.File.default_mode; - if (in_file.stat()) |stat| - break :mode stat.mode - else |_| - break :mode fs.File.default_mode; - }; - var out_file = try fs.cwd().createFile(output, .{ .mode = mode }); - defer out_file.close(); - - switch (out_fmt) { - .hex, .raw => { - if (strip_debug or strip_all or only_keep_debug) - fatal("zig objcopy: ELF to RAW or HEX copying does not support --strip", .{}); - if (opt_extract != null) - fatal("zig objcopy: ELF to RAW or HEX copying does not support --extract-to", .{}); - - try emitElf(arena, in_file, out_file, elf_hdr, .{ - .ofmt = out_fmt, - .only_section = only_section, - .pad_to = pad_to, - }); - }, - .elf => { - if (elf_hdr.endian != @import("builtin").target.cpu.arch.endian()) - fatal("zig objcopy: ELF to ELF copying only supports native endian", .{}); - if (elf_hdr.phoff == 0) // no program header - fatal("zig objcopy: ELF to ELF copying only supports programs", .{}); - if (only_section) |_| - fatal("zig objcopy: ELF to ELF copying does not support --only-section", .{}); - if (pad_to) |_| - fatal("zig objcopy: ELF to ELF copying does not support --pad-to", .{}); - - try stripElf(arena, in_file, out_file, elf_hdr, .{ - .strip_debug = strip_debug, - .strip_all = strip_all, - .only_keep_debug = only_keep_debug, - .add_debuglink = opt_add_debuglink, - .extract_to = opt_extract, - .compress_debug = compress_debug_sections, - }); - return std.process.cleanExit(); - }, - else => fatal("unsupported output object format: {s}", .{@tagName(out_fmt)}), - } - - if (listen) { - var server = try Server.init(.{ - .gpa = gpa, - .in = std.io.getStdIn(), - .out = std.io.getStdOut(), - .zig_version = build_options.version, - }); - defer server.deinit(); - - var seen_update = false; - while (true) { - const hdr = try server.receiveMessage(); - switch (hdr.tag) { - .exit => { - return std.process.cleanExit(); - }, - .update => { - if (seen_update) { - std.debug.print("zig objcopy only supports 1 update for now\n", .{}); - std.process.exit(1); - } - seen_update = true; - - try server.serveEmitBinPath(output, .{ - .flags = .{ .cache_hit = false }, - }); - }, - else => { - std.debug.print("unsupported message: {s}", .{@tagName(hdr.tag)}); - std.process.exit(1); - }, - } - } - } - return std.process.cleanExit(); -} - -const usage = - \\Usage: zig objcopy [options] input output - \\ - \\Options: - \\ -h, --help Print this help and exit - \\ --output-target= Format of the output file - \\ -O Alias for --output-target - \\ --only-section=
Remove all but
- \\ -j Alias for --only-section - \\ --pad-to Pad the last section up to address - \\ --strip-debug, -g Remove all debug sections from the output. - \\ --strip-all, -S Remove all debug sections and symbol table from the output. - \\ --only-keep-debug Strip a file, removing contents of any sections that would not be stripped by --strip-debug and leaving the debugging sections intact. - \\ --add-gnu-debuglink= Creates a .gnu_debuglink section which contains a reference to and adds it to the output file. - \\ --extract-to Extract the removed sections into , and add a .gnu-debuglink section. - \\ --compress-debug-sections Compress DWARF debug sections with zlib - \\ -; - -pub const EmitRawElfOptions = struct { - ofmt: std.Target.ObjectFormat, - only_section: ?[]const u8 = null, - pad_to: ?u64 = null, -}; - -fn emitElf( - arena: Allocator, - in_file: File, - out_file: File, - elf_hdr: elf.Header, - options: EmitRawElfOptions, -) !void { - var binary_elf_output = try BinaryElfOutput.parse(arena, in_file, elf_hdr); - defer binary_elf_output.deinit(); - - if (options.ofmt == .elf) { - fatal("zig objcopy: ELF to ELF copying is not implemented yet", .{}); - } - - if (options.only_section) |target_name| { - switch (options.ofmt) { - .hex => fatal("zig objcopy: hex format with sections is not implemented yet", .{}), - .raw => { - for (binary_elf_output.sections.items) |section| { - if (section.name) |curr_name| { - if (!std.mem.eql(u8, curr_name, target_name)) - continue; - } else { - continue; - } - - try writeBinaryElfSection(in_file, out_file, section); - try padFile(out_file, options.pad_to); - return; - } - }, - else => unreachable, - } - - return error.SectionNotFound; - } - - switch (options.ofmt) { - .raw => { - for (binary_elf_output.sections.items) |section| { - try out_file.seekTo(section.binaryOffset); - try writeBinaryElfSection(in_file, out_file, section); - } - try padFile(out_file, options.pad_to); - }, - .hex => { - if (binary_elf_output.segments.items.len == 0) return; - if (!containsValidAddressRange(binary_elf_output.segments.items)) { - return error.InvalidHexfileAddressRange; - } - - var hex_writer = HexWriter{ .out_file = out_file }; - for (binary_elf_output.segments.items) |segment| { - try hex_writer.writeSegment(segment, in_file); - } - if (options.pad_to) |_| { - // Padding to a size in hex files isn't applicable - return error.InvalidArgument; - } - try hex_writer.writeEOF(); - }, - else => unreachable, - } -} - -const BinaryElfSection = struct { - elfOffset: u64, - binaryOffset: u64, - fileSize: usize, - name: ?[]const u8, - segment: ?*BinaryElfSegment, -}; - -const BinaryElfSegment = struct { - physicalAddress: u64, - virtualAddress: u64, - elfOffset: u64, - binaryOffset: u64, - fileSize: u64, - firstSection: ?*BinaryElfSection, -}; - -const BinaryElfOutput = struct { - segments: std.ArrayListUnmanaged(*BinaryElfSegment), - sections: std.ArrayListUnmanaged(*BinaryElfSection), - allocator: Allocator, - shstrtab: ?[]const u8, - - const Self = @This(); - - pub fn deinit(self: *Self) void { - if (self.shstrtab) |shstrtab| - self.allocator.free(shstrtab); - self.sections.deinit(self.allocator); - self.segments.deinit(self.allocator); - } - - pub fn parse(allocator: Allocator, elf_file: File, elf_hdr: elf.Header) !Self { - var self: Self = .{ - .segments = .{}, - .sections = .{}, - .allocator = allocator, - .shstrtab = null, - }; - errdefer self.sections.deinit(allocator); - errdefer self.segments.deinit(allocator); - - self.shstrtab = blk: { - if (elf_hdr.shstrndx >= elf_hdr.shnum) break :blk null; - - var section_headers = elf_hdr.section_header_iterator(&elf_file); - - var section_counter: usize = 0; - while (section_counter < elf_hdr.shstrndx) : (section_counter += 1) { - _ = (try section_headers.next()).?; - } - - const shstrtab_shdr = (try section_headers.next()).?; - - const buffer = try allocator.alloc(u8, @intCast(shstrtab_shdr.sh_size)); - errdefer allocator.free(buffer); - - const num_read = try elf_file.preadAll(buffer, shstrtab_shdr.sh_offset); - if (num_read != buffer.len) return error.EndOfStream; - - break :blk buffer; - }; - - errdefer if (self.shstrtab) |shstrtab| allocator.free(shstrtab); - - var section_headers = elf_hdr.section_header_iterator(&elf_file); - while (try section_headers.next()) |section| { - if (sectionValidForOutput(section)) { - const newSection = try allocator.create(BinaryElfSection); - - newSection.binaryOffset = 0; - newSection.elfOffset = section.sh_offset; - newSection.fileSize = @intCast(section.sh_size); - newSection.segment = null; - - newSection.name = if (self.shstrtab) |shstrtab| - std.mem.span(@as([*:0]const u8, @ptrCast(&shstrtab[section.sh_name]))) - else - null; - - try self.sections.append(allocator, newSection); - } - } - - var program_headers = elf_hdr.program_header_iterator(&elf_file); - while (try program_headers.next()) |phdr| { - if (phdr.p_type == elf.PT_LOAD) { - const newSegment = try allocator.create(BinaryElfSegment); - - newSegment.physicalAddress = if (phdr.p_paddr != 0) phdr.p_paddr else phdr.p_vaddr; - newSegment.virtualAddress = phdr.p_vaddr; - newSegment.fileSize = @intCast(phdr.p_filesz); - newSegment.elfOffset = phdr.p_offset; - newSegment.binaryOffset = 0; - newSegment.firstSection = null; - - for (self.sections.items) |section| { - if (sectionWithinSegment(section, phdr)) { - if (section.segment) |sectionSegment| { - if (sectionSegment.elfOffset > newSegment.elfOffset) { - section.segment = newSegment; - } - } else { - section.segment = newSegment; - } - - if (newSegment.firstSection == null) { - newSegment.firstSection = section; - } - } - } - - try self.segments.append(allocator, newSegment); - } - } - - mem.sort(*BinaryElfSegment, self.segments.items, {}, segmentSortCompare); - - for (self.segments.items, 0..) |firstSegment, i| { - if (firstSegment.firstSection) |firstSection| { - const diff = firstSection.elfOffset - firstSegment.elfOffset; - - firstSegment.elfOffset += diff; - firstSegment.fileSize += diff; - firstSegment.physicalAddress += diff; - - const basePhysicalAddress = firstSegment.physicalAddress; - - for (self.segments.items[i + 1 ..]) |segment| { - segment.binaryOffset = segment.physicalAddress - basePhysicalAddress; - } - break; - } - } - - for (self.sections.items) |section| { - if (section.segment) |segment| { - section.binaryOffset = segment.binaryOffset + (section.elfOffset - segment.elfOffset); - } - } - - mem.sort(*BinaryElfSection, self.sections.items, {}, sectionSortCompare); - - return self; - } - - fn sectionWithinSegment(section: *BinaryElfSection, segment: elf.Elf64_Phdr) bool { - return segment.p_offset <= section.elfOffset and (segment.p_offset + segment.p_filesz) >= (section.elfOffset + section.fileSize); - } - - fn sectionValidForOutput(shdr: anytype) bool { - return shdr.sh_type != elf.SHT_NOBITS and - ((shdr.sh_flags & elf.SHF_ALLOC) == elf.SHF_ALLOC); - } - - fn segmentSortCompare(context: void, left: *BinaryElfSegment, right: *BinaryElfSegment) bool { - _ = context; - if (left.physicalAddress < right.physicalAddress) { - return true; - } - if (left.physicalAddress > right.physicalAddress) { - return false; - } - return false; - } - - fn sectionSortCompare(context: void, left: *BinaryElfSection, right: *BinaryElfSection) bool { - _ = context; - return left.binaryOffset < right.binaryOffset; - } -}; - -fn writeBinaryElfSection(elf_file: File, out_file: File, section: *BinaryElfSection) !void { - try out_file.writeFileAll(elf_file, .{ - .in_offset = section.elfOffset, - .in_len = section.fileSize, - }); -} - -const HexWriter = struct { - prev_addr: ?u32 = null, - out_file: File, - - /// Max data bytes per line of output - const MAX_PAYLOAD_LEN: u8 = 16; - - fn addressParts(address: u16) [2]u8 { - const msb: u8 = @truncate(address >> 8); - const lsb: u8 = @truncate(address); - return [2]u8{ msb, lsb }; - } - - const Record = struct { - const Type = enum(u8) { - Data = 0, - EOF = 1, - ExtendedSegmentAddress = 2, - ExtendedLinearAddress = 4, - }; - - address: u16, - payload: union(Type) { - Data: []const u8, - EOF: void, - ExtendedSegmentAddress: [2]u8, - ExtendedLinearAddress: [2]u8, - }, - - fn EOF() Record { - return Record{ - .address = 0, - .payload = .EOF, - }; - } - - fn Data(address: u32, data: []const u8) Record { - return Record{ - .address = @intCast(address % 0x10000), - .payload = .{ .Data = data }, - }; - } - - fn Address(address: u32) Record { - assert(address > 0xFFFF); - const segment: u16 = @intCast(address / 0x10000); - if (address > 0xFFFFF) { - return Record{ - .address = 0, - .payload = .{ .ExtendedLinearAddress = addressParts(segment) }, - }; - } else { - return Record{ - .address = 0, - .payload = .{ .ExtendedSegmentAddress = addressParts(segment << 12) }, - }; - } - } - - fn getPayloadBytes(self: *const Record) []const u8 { - return switch (self.payload) { - .Data => |d| d, - .EOF => @as([]const u8, &.{}), - .ExtendedSegmentAddress, .ExtendedLinearAddress => |*seg| seg, - }; - } - - fn checksum(self: Record) u8 { - const payload_bytes = self.getPayloadBytes(); - - var sum: u8 = @intCast(payload_bytes.len); - const parts = addressParts(self.address); - sum +%= parts[0]; - sum +%= parts[1]; - sum +%= @intFromEnum(self.payload); - for (payload_bytes) |byte| { - sum +%= byte; - } - return (sum ^ 0xFF) +% 1; - } - - fn write(self: Record, file: File) File.WriteError!void { - const linesep = "\r\n"; - // colon, (length, address, type, payload, checksum) as hex, CRLF - const BUFSIZE = 1 + (1 + 2 + 1 + MAX_PAYLOAD_LEN + 1) * 2 + linesep.len; - var outbuf: [BUFSIZE]u8 = undefined; - const payload_bytes = self.getPayloadBytes(); - assert(payload_bytes.len <= MAX_PAYLOAD_LEN); - - const line = try std.fmt.bufPrint(&outbuf, ":{0X:0>2}{1X:0>4}{2X:0>2}{3s}{4X:0>2}" ++ linesep, .{ - @as(u8, @intCast(payload_bytes.len)), - self.address, - @intFromEnum(self.payload), - std.fmt.fmtSliceHexUpper(payload_bytes), - self.checksum(), - }); - try file.writeAll(line); - } - }; - - pub fn writeSegment(self: *HexWriter, segment: *const BinaryElfSegment, elf_file: File) !void { - var buf: [MAX_PAYLOAD_LEN]u8 = undefined; - var bytes_read: usize = 0; - while (bytes_read < segment.fileSize) { - const row_address: u32 = @intCast(segment.physicalAddress + bytes_read); - - const remaining = segment.fileSize - bytes_read; - const to_read: usize = @intCast(@min(remaining, MAX_PAYLOAD_LEN)); - const did_read = try elf_file.preadAll(buf[0..to_read], segment.elfOffset + bytes_read); - if (did_read < to_read) return error.UnexpectedEOF; - - try self.writeDataRow(row_address, buf[0..did_read]); - - bytes_read += did_read; - } - } - - fn writeDataRow(self: *HexWriter, address: u32, data: []const u8) File.WriteError!void { - const record = Record.Data(address, data); - if (address > 0xFFFF and (self.prev_addr == null or record.address != self.prev_addr.?)) { - try Record.Address(address).write(self.out_file); - } - try record.write(self.out_file); - self.prev_addr = @intCast(record.address + data.len); - } - - fn writeEOF(self: HexWriter) File.WriteError!void { - try Record.EOF().write(self.out_file); - } -}; - -fn containsValidAddressRange(segments: []*BinaryElfSegment) bool { - const max_address = std.math.maxInt(u32); - for (segments) |segment| { - if (segment.fileSize > max_address or - segment.physicalAddress > max_address - segment.fileSize) return false; - } - return true; -} - -fn padFile(f: File, opt_size: ?u64) !void { - const size = opt_size orelse return; - try f.setEndPos(size); -} - -test "HexWriter.Record.Address has correct payload and checksum" { - const record = HexWriter.Record.Address(0x0800_0000); - const payload = record.getPayloadBytes(); - const sum = record.checksum(); - try std.testing.expect(sum == 0xF2); - try std.testing.expect(payload.len == 2); - try std.testing.expect(payload[0] == 8); - try std.testing.expect(payload[1] == 0); -} - -test "containsValidAddressRange" { - var segment = BinaryElfSegment{ - .physicalAddress = 0, - .virtualAddress = 0, - .elfOffset = 0, - .binaryOffset = 0, - .fileSize = 0, - .firstSection = null, - }; - var buf: [1]*BinaryElfSegment = .{&segment}; - - // segment too big - segment.fileSize = std.math.maxInt(u32) + 1; - try std.testing.expect(!containsValidAddressRange(&buf)); - - // start address too big - segment.physicalAddress = std.math.maxInt(u32) + 1; - segment.fileSize = 2; - try std.testing.expect(!containsValidAddressRange(&buf)); - - // max address too big - segment.physicalAddress = std.math.maxInt(u32) - 1; - segment.fileSize = 2; - try std.testing.expect(!containsValidAddressRange(&buf)); - - // is ok - segment.physicalAddress = std.math.maxInt(u32) - 1; - segment.fileSize = 1; - try std.testing.expect(containsValidAddressRange(&buf)); -} - -// ------------- -// ELF to ELF stripping - -const StripElfOptions = struct { - extract_to: ?[]const u8 = null, - add_debuglink: ?[]const u8 = null, - strip_all: bool = false, - strip_debug: bool = false, - only_keep_debug: bool = false, - compress_debug: bool = false, -}; - -fn stripElf( - allocator: Allocator, - in_file: File, - out_file: File, - elf_hdr: elf.Header, - options: StripElfOptions, -) !void { - const Filter = ElfFileHelper.Filter; - const DebugLink = ElfFileHelper.DebugLink; - - const filter: Filter = filter: { - if (options.only_keep_debug) break :filter .debug; - if (options.strip_all) break :filter .program; - if (options.strip_debug) break :filter .program_and_symbols; - break :filter .all; - }; - - const filter_complement: ?Filter = blk: { - if (options.extract_to) |_| { - break :blk switch (filter) { - .program => .debug_and_symbols, - .debug => .program_and_symbols, - .program_and_symbols => .debug, - .debug_and_symbols => .program, - .all => fatal("zig objcopy: nothing to extract", .{}), - }; - } else { - break :blk null; - } - }; - const debuglink_path = path: { - if (options.add_debuglink) |path| break :path path; - if (options.extract_to) |path| break :path path; - break :path null; - }; - - switch (elf_hdr.is_64) { - inline else => |is_64| { - var elf_file = try ElfFile(is_64).parse(allocator, in_file, elf_hdr); - defer elf_file.deinit(); - - if (filter_complement) |flt| { - // write the .dbg file and close it, so it can be read back to compute the debuglink checksum. - const path = options.extract_to.?; - const dbg_file = std.fs.cwd().createFile(path, .{}) catch |err| { - fatal("zig objcopy: unable to create '{s}': {s}", .{ path, @errorName(err) }); - }; - defer dbg_file.close(); - - try elf_file.emit(allocator, dbg_file, in_file, .{ .section_filter = flt, .compress_debug = options.compress_debug }); - } - - const debuglink: ?DebugLink = if (debuglink_path) |path| ElfFileHelper.createDebugLink(path) else null; - try elf_file.emit(allocator, out_file, in_file, .{ .section_filter = filter, .debuglink = debuglink, .compress_debug = options.compress_debug }); - }, - } -} - -// note: this is "a minimal effort implementation" -// It doesn't support all possibile elf files: some sections type may need fixups, the program header may need fix up, ... -// It was written for a specific use case (strip debug info to a sperate file, for linux 64-bits executables built with `zig` or `zig c++` ) -// It moves and reoders the sections as little as possible to avoid having to do fixups. -// TODO: support non-native endianess - -fn ElfFile(comptime is_64: bool) type { - const Elf_Ehdr = if (is_64) elf.Elf64_Ehdr else elf.Elf32_Ehdr; - const Elf_Phdr = if (is_64) elf.Elf64_Phdr else elf.Elf32_Phdr; - const Elf_Shdr = if (is_64) elf.Elf64_Shdr else elf.Elf32_Shdr; - const Elf_Chdr = if (is_64) elf.Elf64_Chdr else elf.Elf32_Chdr; - const Elf_Sym = if (is_64) elf.Elf64_Sym else elf.Elf32_Sym; - const Elf_Verdef = if (is_64) elf.Elf64_Verdef else elf.Elf32_Verdef; - const Elf_OffSize = if (is_64) elf.Elf64_Off else elf.Elf32_Off; - - return struct { - raw_elf_header: Elf_Ehdr, - program_segments: []const Elf_Phdr, - sections: []const Section, - arena: std.heap.ArenaAllocator, - - const SectionCategory = ElfFileHelper.SectionCategory; - const section_memory_align = @alignOf(Elf_Sym); // most restrictive of what we may load in memory - const Section = struct { - section: Elf_Shdr, - name: []const u8 = "", - segment: ?*const Elf_Phdr = null, // if the section is used by a program segment (there can be more than one) - payload: ?[]align(section_memory_align) const u8 = null, // if we need the data in memory - category: SectionCategory = .none, // should the section be kept in the exe or stripped to the debug database, or both. - }; - - const Self = @This(); - - pub fn parse(gpa: Allocator, in_file: File, header: elf.Header) !Self { - var arena = std.heap.ArenaAllocator.init(gpa); - errdefer arena.deinit(); - const allocator = arena.allocator(); - - var raw_header: Elf_Ehdr = undefined; - { - const bytes_read = try in_file.preadAll(std.mem.asBytes(&raw_header), 0); - if (bytes_read < @sizeOf(Elf_Ehdr)) - return error.TRUNCATED_ELF; - } - - // program header: list of segments - const program_segments = blk: { - if (@sizeOf(Elf_Phdr) != header.phentsize) - fatal("zig objcopy: unsuported ELF file, unexpected phentsize ({d})", .{header.phentsize}); - - const program_header = try allocator.alloc(Elf_Phdr, header.phnum); - const bytes_read = try in_file.preadAll(std.mem.sliceAsBytes(program_header), header.phoff); - if (bytes_read < @sizeOf(Elf_Phdr) * header.phnum) - return error.TRUNCATED_ELF; - break :blk program_header; - }; - - // section header - const sections = blk: { - if (@sizeOf(Elf_Shdr) != header.shentsize) - fatal("zig objcopy: unsuported ELF file, unexpected shentsize ({d})", .{header.shentsize}); - - const section_header = try allocator.alloc(Section, header.shnum); - - const raw_section_header = try allocator.alloc(Elf_Shdr, header.shnum); - defer allocator.free(raw_section_header); - const bytes_read = try in_file.preadAll(std.mem.sliceAsBytes(raw_section_header), header.shoff); - if (bytes_read < @sizeOf(Elf_Phdr) * header.shnum) - return error.TRUNCATED_ELF; - - for (section_header, raw_section_header) |*section, hdr| { - section.* = .{ .section = hdr }; - } - break :blk section_header; - }; - - // load data to memory for some sections: - // string tables for access - // sections than need modifications when other sections move. - for (sections, 0..) |*section, idx| { - const need_data = switch (section.section.sh_type) { - elf.DT_VERSYM => true, - elf.SHT_SYMTAB, elf.SHT_DYNSYM => true, - else => false, - }; - const need_strings = (idx == header.shstrndx); - - if (need_data or need_strings) { - const buffer = try allocator.alignedAlloc(u8, section_memory_align, @intCast(section.section.sh_size)); - const bytes_read = try in_file.preadAll(buffer, section.section.sh_offset); - if (bytes_read != section.section.sh_size) return error.TRUNCATED_ELF; - section.payload = buffer; - } - } - - // fill-in sections info: - // resolve the name - // find if a program segment uses the section - // categorize sections usage (used by program segments, debug datadase, common metadata, symbol table) - for (sections) |*section| { - section.segment = for (program_segments) |*seg| { - if (sectionWithinSegment(section.section, seg.*)) break seg; - } else null; - - if (section.section.sh_name != 0 and header.shstrndx != elf.SHN_UNDEF) - section.name = std.mem.span(@as([*:0]const u8, @ptrCast(§ions[header.shstrndx].payload.?[section.section.sh_name]))); - - const category_from_program: SectionCategory = if (section.segment != null) .exe else .debug; - section.category = switch (section.section.sh_type) { - elf.SHT_NOTE => .common, - elf.SHT_SYMTAB => .symbols, // "strip all" vs "strip only debug" - elf.SHT_DYNSYM => .exe, - elf.SHT_PROGBITS => cat: { - if (std.mem.eql(u8, section.name, ".comment")) break :cat .exe; - if (std.mem.eql(u8, section.name, ".gnu_debuglink")) break :cat .none; - break :cat category_from_program; - }, - elf.SHT_LOPROC...elf.SHT_HIPROC => .common, // don't strip unknown sections - elf.SHT_LOUSER...elf.SHT_HIUSER => .common, // don't strip unknown sections - else => category_from_program, - }; - } - - sections[0].category = .common; // mandatory null section - if (header.shstrndx != elf.SHN_UNDEF) - sections[header.shstrndx].category = .common; // string table for the headers - - // recursively propagate section categories to their linked sections, so that they are kept together - var dirty: u1 = 1; - while (dirty != 0) { - dirty = 0; - - for (sections) |*section| { - if (section.section.sh_link != elf.SHN_UNDEF) - dirty |= ElfFileHelper.propagateCategory(§ions[section.section.sh_link].category, section.category); - if ((section.section.sh_flags & elf.SHF_INFO_LINK) != 0 and section.section.sh_info != elf.SHN_UNDEF) - dirty |= ElfFileHelper.propagateCategory(§ions[section.section.sh_info].category, section.category); - } - } - - return Self{ - .arena = arena, - .raw_elf_header = raw_header, - .program_segments = program_segments, - .sections = sections, - }; - } - - pub fn deinit(self: *Self) void { - self.arena.deinit(); - } - - const Filter = ElfFileHelper.Filter; - const DebugLink = ElfFileHelper.DebugLink; - const EmitElfOptions = struct { - section_filter: Filter = .all, - debuglink: ?DebugLink = null, - compress_debug: bool = false, - }; - fn emit(self: *const Self, gpa: Allocator, out_file: File, in_file: File, options: EmitElfOptions) !void { - var arena = std.heap.ArenaAllocator.init(gpa); - defer arena.deinit(); - const allocator = arena.allocator(); - - // when emitting the stripped exe: - // - unused sections are removed - // when emitting the debug file: - // - all sections are kept, but some are emptied and their types is changed to SHT_NOBITS - // the program header is kept unchanged. (`strip` does update it, but `eu-strip` does not, and it still works) - - const Update = struct { - action: ElfFileHelper.Action, - - // remap the indexs after omitting the filtered sections - remap_idx: u16, - - // optionally overrides the payload from the source file - payload: ?[]align(section_memory_align) const u8 = null, - section: ?Elf_Shdr = null, - }; - const sections_update = try allocator.alloc(Update, self.sections.len); - const new_shnum = blk: { - var next_idx: u16 = 0; - for (self.sections, sections_update) |section, *update| { - const action = ElfFileHelper.selectAction(section.category, options.section_filter); - const remap_idx = idx: { - if (action == .strip) break :idx elf.SHN_UNDEF; - next_idx += 1; - break :idx next_idx - 1; - }; - update.* = Update{ .action = action, .remap_idx = remap_idx }; - } - - if (options.debuglink != null) - next_idx += 1; - - break :blk next_idx; - }; - - // add a ".gnu_debuglink" to the string table if needed - const debuglink_name: u32 = blk: { - if (options.debuglink == null) break :blk elf.SHN_UNDEF; - if (self.raw_elf_header.e_shstrndx == elf.SHN_UNDEF) - fatal("zig objcopy: no strtab, cannot add the debuglink section", .{}); // TODO add the section if needed? - - const strtab = &self.sections[self.raw_elf_header.e_shstrndx]; - const update = §ions_update[self.raw_elf_header.e_shstrndx]; - - const name: []const u8 = ".gnu_debuglink"; - const new_offset: u32 = @intCast(strtab.payload.?.len); - const buf = try allocator.alignedAlloc(u8, section_memory_align, new_offset + name.len + 1); - @memcpy(buf[0..new_offset], strtab.payload.?); - @memcpy(buf[new_offset..][0..name.len], name); - buf[new_offset + name.len] = 0; - - assert(update.action == .keep); - update.payload = buf; - - break :blk new_offset; - }; - - // maybe compress .debug sections - if (options.compress_debug) { - for (self.sections[1..], sections_update[1..]) |section, *update| { - if (update.action != .keep) continue; - if (!std.mem.startsWith(u8, section.name, ".debug_")) continue; - if ((section.section.sh_flags & elf.SHF_COMPRESSED) != 0) continue; // already compressed - - const chdr = Elf_Chdr{ - .ch_type = elf.COMPRESS.ZLIB, - .ch_size = section.section.sh_size, - .ch_addralign = section.section.sh_addralign, - }; - - const compressed_payload = try ElfFileHelper.tryCompressSection(allocator, in_file, section.section.sh_offset, section.section.sh_size, std.mem.asBytes(&chdr)); - if (compressed_payload) |payload| { - update.payload = payload; - update.section = section.section; - update.section.?.sh_addralign = @alignOf(Elf_Chdr); - update.section.?.sh_size = @intCast(payload.len); - update.section.?.sh_flags |= elf.SHF_COMPRESSED; - } - } - } - - var cmdbuf = std.ArrayList(ElfFileHelper.WriteCmd).init(allocator); - defer cmdbuf.deinit(); - try cmdbuf.ensureUnusedCapacity(3 + new_shnum); - var eof_offset: Elf_OffSize = 0; // track the end of the data written so far. - - // build the updated headers - // nb: updated_elf_header will be updated before the actual write - var updated_elf_header = self.raw_elf_header; - if (updated_elf_header.e_shstrndx != elf.SHN_UNDEF) - updated_elf_header.e_shstrndx = sections_update[updated_elf_header.e_shstrndx].remap_idx; - cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = std.mem.asBytes(&updated_elf_header), .out_offset = 0 } }); - eof_offset = @sizeOf(Elf_Ehdr); - - // program header as-is. - // nb: for only-debug files, removing it appears to work, but is invalid by ELF specifcation. - { - assert(updated_elf_header.e_phoff == @sizeOf(Elf_Ehdr)); - const data = std.mem.sliceAsBytes(self.program_segments); - assert(data.len == @as(usize, updated_elf_header.e_phentsize) * updated_elf_header.e_phnum); - cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = data, .out_offset = updated_elf_header.e_phoff } }); - eof_offset = updated_elf_header.e_phoff + @as(Elf_OffSize, @intCast(data.len)); - } - - // update sections and queue payload writes - const updated_section_header = blk: { - const dest_sections = try allocator.alloc(Elf_Shdr, new_shnum); - - { - // the ELF format doesn't specify the order for all sections. - // this code only supports when they are in increasing file order. - var offset: u64 = eof_offset; - for (self.sections[1..]) |section| { - if (section.section.sh_type == elf.SHT_NOBITS) - continue; - if (section.section.sh_offset < offset) { - fatal("zig objcopy: unsuported ELF file", .{}); - } - offset = section.section.sh_offset; - } - } - - dest_sections[0] = self.sections[0].section; - - var dest_section_idx: u32 = 1; - for (self.sections[1..], sections_update[1..]) |section, update| { - if (update.action == .strip) continue; - assert(update.remap_idx == dest_section_idx); - - const src = if (update.section) |*s| s else §ion.section; - const dest = &dest_sections[dest_section_idx]; - const payload = if (update.payload) |data| data else section.payload; - dest_section_idx += 1; - - dest.* = src.*; - - if (src.sh_link != elf.SHN_UNDEF) - dest.sh_link = sections_update[src.sh_link].remap_idx; - if ((src.sh_flags & elf.SHF_INFO_LINK) != 0 and src.sh_info != elf.SHN_UNDEF) - dest.sh_info = sections_update[src.sh_info].remap_idx; - - if (payload) |data| - dest.sh_size = @intCast(data.len); - - const addralign = if (src.sh_addralign == 0 or dest.sh_type == elf.SHT_NOBITS) 1 else src.sh_addralign; - dest.sh_offset = std.mem.alignForward(Elf_OffSize, eof_offset, addralign); - if (src.sh_offset != dest.sh_offset and section.segment != null and update.action != .empty and dest.sh_type != elf.SHT_NOTE and dest.sh_type != elf.SHT_NOBITS) { - if (src.sh_offset > dest.sh_offset) { - dest.sh_offset = src.sh_offset; // add padding to avoid modifing the program segments - } else { - fatal("zig objcopy: cannot adjust program segments", .{}); - } - } - assert(dest.sh_addr % addralign == dest.sh_offset % addralign); - - if (update.action == .empty) - dest.sh_type = elf.SHT_NOBITS; - - if (dest.sh_type != elf.SHT_NOBITS) { - if (payload) |src_data| { - // update sections payload and write - const dest_data = switch (src.sh_type) { - elf.DT_VERSYM => dst_data: { - const data = try allocator.alignedAlloc(u8, section_memory_align, src_data.len); - @memcpy(data, src_data); - - const defs = @as([*]Elf_Verdef, @ptrCast(data))[0 .. @as(usize, @intCast(src.sh_size)) / @sizeOf(Elf_Verdef)]; - for (defs) |*def| { - if (def.vd_ndx != elf.SHN_UNDEF) - def.vd_ndx = sections_update[src.sh_info].remap_idx; - } - - break :dst_data data; - }, - elf.SHT_SYMTAB, elf.SHT_DYNSYM => dst_data: { - const data = try allocator.alignedAlloc(u8, section_memory_align, src_data.len); - @memcpy(data, src_data); - - const syms = @as([*]Elf_Sym, @ptrCast(data))[0 .. @as(usize, @intCast(src.sh_size)) / @sizeOf(Elf_Sym)]; - for (syms) |*sym| { - if (sym.st_shndx != elf.SHN_UNDEF and sym.st_shndx < elf.SHN_LORESERVE) - sym.st_shndx = sections_update[sym.st_shndx].remap_idx; - } - - break :dst_data data; - }, - else => src_data, - }; - - assert(dest_data.len == dest.sh_size); - cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = dest_data, .out_offset = dest.sh_offset } }); - eof_offset = dest.sh_offset + dest.sh_size; - } else { - // direct contents copy - cmdbuf.appendAssumeCapacity(.{ .copy_range = .{ .in_offset = src.sh_offset, .len = dest.sh_size, .out_offset = dest.sh_offset } }); - eof_offset = dest.sh_offset + dest.sh_size; - } - } else { - // account for alignment padding even in empty sections to keep logical section order - eof_offset = dest.sh_offset; - } - } - - // add a ".gnu_debuglink" section - if (options.debuglink) |link| { - const payload = payload: { - const crc_offset = std.mem.alignForward(usize, link.name.len + 1, 4); - const buf = try allocator.alignedAlloc(u8, 4, crc_offset + 4); - @memcpy(buf[0..link.name.len], link.name); - @memset(buf[link.name.len..crc_offset], 0); - @memcpy(buf[crc_offset..], std.mem.asBytes(&link.crc32)); - break :payload buf; - }; - - dest_sections[dest_section_idx] = Elf_Shdr{ - .sh_name = debuglink_name, - .sh_type = elf.SHT_PROGBITS, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = eof_offset, - .sh_size = @intCast(payload.len), - .sh_link = elf.SHN_UNDEF, - .sh_info = elf.SHN_UNDEF, - .sh_addralign = 4, - .sh_entsize = 0, - }; - dest_section_idx += 1; - - cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = payload, .out_offset = eof_offset } }); - eof_offset += @as(Elf_OffSize, @intCast(payload.len)); - } - - assert(dest_section_idx == new_shnum); - break :blk dest_sections; - }; - - // write the section header at the tail - { - const offset = std.mem.alignForward(Elf_OffSize, eof_offset, @alignOf(Elf_Shdr)); - - const data = std.mem.sliceAsBytes(updated_section_header); - assert(data.len == @as(usize, updated_elf_header.e_shentsize) * new_shnum); - updated_elf_header.e_shoff = offset; - updated_elf_header.e_shnum = new_shnum; - - cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = data, .out_offset = updated_elf_header.e_shoff } }); - } - - try ElfFileHelper.write(allocator, out_file, in_file, cmdbuf.items); - } - - fn sectionWithinSegment(section: Elf_Shdr, segment: Elf_Phdr) bool { - const file_size = if (section.sh_type == elf.SHT_NOBITS) 0 else section.sh_size; - return segment.p_offset <= section.sh_offset and (segment.p_offset + segment.p_filesz) >= (section.sh_offset + file_size); - } - }; -} - -const ElfFileHelper = struct { - const DebugLink = struct { name: []const u8, crc32: u32 }; - const Filter = enum { all, program, debug, program_and_symbols, debug_and_symbols }; - - const SectionCategory = enum { common, exe, debug, symbols, none }; - fn propagateCategory(cur: *SectionCategory, new: SectionCategory) u1 { - const cat: SectionCategory = switch (cur.*) { - .none => new, - .common => .common, - .debug => switch (new) { - .none, .debug => .debug, - else => new, - }, - .exe => switch (new) { - .common => .common, - .none, .debug, .exe => .exe, - .symbols => .exe, - }, - .symbols => switch (new) { - .none, .common, .debug, .exe => unreachable, - .symbols => .symbols, - }, - }; - - if (cur.* != cat) { - cur.* = cat; - return 1; - } else { - return 0; - } - } - - const Action = enum { keep, strip, empty }; - fn selectAction(category: SectionCategory, filter: Filter) Action { - if (category == .none) return .strip; - return switch (filter) { - .all => switch (category) { - .none => .strip, - else => .keep, - }, - .program => switch (category) { - .common, .exe => .keep, - else => .strip, - }, - .program_and_symbols => switch (category) { - .common, .exe, .symbols => .keep, - else => .strip, - }, - .debug => switch (category) { - .exe, .symbols => .empty, - .none => .strip, - else => .keep, - }, - .debug_and_symbols => switch (category) { - .exe => .empty, - .none => .strip, - else => .keep, - }, - }; - } - - const WriteCmd = union(enum) { - copy_range: struct { in_offset: u64, len: u64, out_offset: u64 }, - write_data: struct { data: []const u8, out_offset: u64 }, - }; - fn write(allocator: Allocator, out_file: File, in_file: File, cmds: []const WriteCmd) !void { - // consolidate holes between writes: - // by coping original padding data from in_file (by fusing contiguous ranges) - // by writing zeroes otherwise - const zeroes = [1]u8{0} ** 4096; - var consolidated = std.ArrayList(WriteCmd).init(allocator); - defer consolidated.deinit(); - try consolidated.ensureUnusedCapacity(cmds.len * 2); - var offset: u64 = 0; - var fused_cmd: ?WriteCmd = null; - for (cmds) |cmd| { - switch (cmd) { - .write_data => |data| { - assert(data.out_offset >= offset); - if (fused_cmd) |prev| { - consolidated.appendAssumeCapacity(prev); - fused_cmd = null; - } - if (data.out_offset > offset) { - consolidated.appendAssumeCapacity(.{ .write_data = .{ .data = zeroes[0..@intCast(data.out_offset - offset)], .out_offset = offset } }); - } - consolidated.appendAssumeCapacity(cmd); - offset = data.out_offset + data.data.len; - }, - .copy_range => |range| { - assert(range.out_offset >= offset); - if (fused_cmd) |prev| { - if (range.in_offset >= prev.copy_range.in_offset + prev.copy_range.len and (range.out_offset - prev.copy_range.out_offset == range.in_offset - prev.copy_range.in_offset)) { - fused_cmd = .{ .copy_range = .{ - .in_offset = prev.copy_range.in_offset, - .out_offset = prev.copy_range.out_offset, - .len = (range.out_offset + range.len) - prev.copy_range.out_offset, - } }; - } else { - consolidated.appendAssumeCapacity(prev); - if (range.out_offset > offset) { - consolidated.appendAssumeCapacity(.{ .write_data = .{ .data = zeroes[0..@intCast(range.out_offset - offset)], .out_offset = offset } }); - } - fused_cmd = cmd; - } - } else { - fused_cmd = cmd; - } - offset = range.out_offset + range.len; - }, - } - } - if (fused_cmd) |cmd| { - consolidated.appendAssumeCapacity(cmd); - } - - // write the output file - for (consolidated.items) |cmd| { - switch (cmd) { - .write_data => |data| { - var iovec = [_]std.os.iovec_const{.{ .iov_base = data.data.ptr, .iov_len = data.data.len }}; - try out_file.pwritevAll(&iovec, data.out_offset); - }, - .copy_range => |range| { - const copied_bytes = try in_file.copyRangeAll(range.in_offset, out_file, range.out_offset, range.len); - if (copied_bytes < range.len) return error.TRUNCATED_ELF; - }, - } - } - } - - fn tryCompressSection(allocator: Allocator, in_file: File, offset: u64, size: u64, prefix: []const u8) !?[]align(8) const u8 { - if (size < prefix.len) return null; - - try in_file.seekTo(offset); - var section_reader = std.io.limitedReader(in_file.reader(), size); - - // allocate as large as decompressed data. if the compression doesn't fit, keep the data uncompressed. - const compressed_data = try allocator.alignedAlloc(u8, 8, @intCast(size)); - var compressed_stream = std.io.fixedBufferStream(compressed_data); - - try compressed_stream.writer().writeAll(prefix); - - { - var compressor = try std.compress.zlib.compressor(compressed_stream.writer(), .{}); - - var buf: [8000]u8 = undefined; - while (true) { - const bytes_read = try section_reader.read(&buf); - if (bytes_read == 0) break; - const bytes_written = compressor.write(buf[0..bytes_read]) catch |err| switch (err) { - error.NoSpaceLeft => { - allocator.free(compressed_data); - return null; - }, - else => return err, - }; - std.debug.assert(bytes_written == bytes_read); - } - compressor.finish() catch |err| switch (err) { - error.NoSpaceLeft => { - allocator.free(compressed_data); - return null; - }, - else => return err, - }; - } - - const compressed_len: usize = @intCast(compressed_stream.getPos() catch unreachable); - const data = allocator.realloc(compressed_data, compressed_len) catch compressed_data; - return data[0..compressed_len]; - } - - fn createDebugLink(path: []const u8) DebugLink { - const file = std.fs.cwd().openFile(path, .{}) catch |err| { - fatal("zig objcopy: could not open `{s}`: {s}\n", .{ path, @errorName(err) }); - }; - defer file.close(); - - const crc = ElfFileHelper.computeFileCrc(file) catch |err| { - fatal("zig objcopy: could not read `{s}`: {s}\n", .{ path, @errorName(err) }); - }; - return .{ - .name = std.fs.path.basename(path), - .crc32 = crc, - }; - } - - fn computeFileCrc(file: File) !u32 { - var buf: [8000]u8 = undefined; - - try file.seekTo(0); - var hasher = std.hash.Crc32.init(); - while (true) { - const bytes_read = try file.read(&buf); - if (bytes_read == 0) break; - hasher.update(buf[0..bytes_read]); - } - return hasher.final(); - } -}; -- cgit v1.2.3