From 808306f49ac8f7fd57c10a48a0126fefd07ab690 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 15 Oct 2024 15:33:10 +0200 Subject: macho: rename dwarf.zig to Dwarf.zig Separate commit since macOS is case-insensitive by default and so I had to do it from Linux. --- src/link/MachO/Dwarf.zig | 286 +++++++++++++++++++++++++++++++++++++++++++++++ src/link/MachO/dwarf.zig | 286 ----------------------------------------------- 2 files changed, 286 insertions(+), 286 deletions(-) create mode 100644 src/link/MachO/Dwarf.zig delete mode 100644 src/link/MachO/dwarf.zig (limited to 'src/link') diff --git a/src/link/MachO/Dwarf.zig b/src/link/MachO/Dwarf.zig new file mode 100644 index 0000000000..c9db7ed7b7 --- /dev/null +++ b/src/link/MachO/Dwarf.zig @@ -0,0 +1,286 @@ +pub const InfoReader = struct { + bytes: []const u8, + strtab: []const u8, + pos: usize = 0, + + pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader { + var length: u64 = try p.readInt(u32); + const is_64bit = length == 0xffffffff; + if (is_64bit) { + length = try p.readInt(u64); + } + const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32; + return .{ + .format = dw_fmt, + .length = length, + .version = try p.readInt(u16), + .debug_abbrev_offset = try p.readOffset(dw_fmt), + .address_size = try p.readByte(), + }; + } + + pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void { + const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow; + const end_pos = p.pos + switch (cuh.format) { + .dwarf32 => @as(usize, 4), + .dwarf64 => 12, + } + cuh_length; + while (p.pos < end_pos) { + const di_code = try p.readUleb128(u64); + if (di_code == 0) return error.Eof; + if (di_code == code) return; + + while (try abbrev_reader.readAttr()) |attr| switch (attr.at) { + dwarf.FORM.sec_offset, + dwarf.FORM.ref_addr, + => { + _ = try p.readOffset(cuh.format); + }, + + dwarf.FORM.addr => { + _ = try p.readNBytes(cuh.address_size); + }, + + dwarf.FORM.block1, + dwarf.FORM.block2, + dwarf.FORM.block4, + dwarf.FORM.block, + => { + _ = try p.readBlock(attr.form); + }, + + dwarf.FORM.exprloc => { + _ = try p.readExprLoc(); + }, + + dwarf.FORM.flag_present => {}, + + dwarf.FORM.data1, + dwarf.FORM.ref1, + dwarf.FORM.flag, + dwarf.FORM.data2, + dwarf.FORM.ref2, + dwarf.FORM.data4, + dwarf.FORM.ref4, + dwarf.FORM.data8, + dwarf.FORM.ref8, + dwarf.FORM.ref_sig8, + dwarf.FORM.udata, + dwarf.FORM.ref_udata, + dwarf.FORM.sdata, + => { + _ = try p.readConstant(attr.form); + }, + + dwarf.FORM.strp, + dwarf.FORM.string, + => { + _ = try p.readString(attr.form, cuh); + }, + + else => { + // TODO better errors + log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form}); + return error.UnhandledDwFormValue; + }, + }; + } + } + + pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 { + const len: u64 = switch (form) { + dwarf.FORM.block1 => try p.readByte(), + dwarf.FORM.block2 => try p.readInt(u16), + dwarf.FORM.block4 => try p.readInt(u32), + dwarf.FORM.block => try p.readUleb128(u64), + else => unreachable, + }; + return p.readNBytes(len); + } + + pub fn readExprLoc(p: *InfoReader) ![]const u8 { + const len: u64 = try p.readUleb128(u64); + return p.readNBytes(len); + } + + pub fn readConstant(p: *InfoReader, form: Form) !u64 { + return switch (form) { + dwarf.FORM.data1, dwarf.FORM.ref1, dwarf.FORM.flag => try p.readByte(), + dwarf.FORM.data2, dwarf.FORM.ref2 => try p.readInt(u16), + dwarf.FORM.data4, dwarf.FORM.ref4 => try p.readInt(u32), + dwarf.FORM.data8, dwarf.FORM.ref8, dwarf.FORM.ref_sig8 => try p.readInt(u64), + dwarf.FORM.udata, dwarf.FORM.ref_udata => try p.readUleb128(u64), + dwarf.FORM.sdata => @bitCast(try p.readIleb128(i64)), + else => return error.UnhandledConstantForm, + }; + } + + pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 { + switch (form) { + dwarf.FORM.strp => { + const off = try p.readOffset(cuh.format); + const off_u = math.cast(usize, off) orelse return error.Overflow; + return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.strtab.ptr + off_u)), 0); + }, + dwarf.FORM.string => { + const start = p.pos; + while (p.pos < p.bytes.len) : (p.pos += 1) { + if (p.bytes[p.pos] == 0) break; + } + if (p.bytes[p.pos] != 0) return error.Eof; + return p.bytes[start..p.pos :0]; + }, + else => unreachable, + } + } + + pub fn readByte(p: *InfoReader) !u8 { + if (p.pos + 1 > p.bytes.len) return error.Eof; + defer p.pos += 1; + return p.bytes[p.pos]; + } + + pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 { + const num_usize = math.cast(usize, num) orelse return error.Overflow; + if (p.pos + num_usize > p.bytes.len) return error.Eof; + defer p.pos += num_usize; + return p.bytes[p.pos..][0..num_usize]; + } + + pub fn readInt(p: *InfoReader, comptime Int: type) !Int { + if (p.pos + @sizeOf(Int) > p.bytes.len) return error.Eof; + defer p.pos += @sizeOf(Int); + return mem.readInt(Int, p.bytes[p.pos..][0..@sizeOf(Int)], .little); + } + + pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 { + return switch (dw_fmt) { + .dwarf32 => try p.readInt(u32), + .dwarf64 => try p.readInt(u64), + }; + } + + pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type { + var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); + var creader = std.io.countingReader(stream.reader()); + const value: Type = try leb.readUleb128(Type, creader.reader()); + p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; + return value; + } + + pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type { + var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); + var creader = std.io.countingReader(stream.reader()); + const value: Type = try leb.readIleb128(Type, creader.reader()); + p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; + return value; + } + + pub fn seekTo(p: *InfoReader, off: u64) !void { + p.pos = math.cast(usize, off) orelse return error.Overflow; + } +}; + +pub const AbbrevReader = struct { + bytes: []const u8, + pos: usize = 0, + + pub fn hasMore(p: AbbrevReader) bool { + return p.pos < p.bytes.len; + } + + pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl { + const pos = p.pos; + const code = try p.readUleb128(Code); + if (code == 0) return null; + + const tag = try p.readUleb128(Tag); + const has_children = (try p.readByte()) > 0; + return .{ + .code = code, + .pos = pos, + .len = p.pos - pos, + .tag = tag, + .has_children = has_children, + }; + } + + pub fn readAttr(p: *AbbrevReader) !?AbbrevAttr { + const pos = p.pos; + const at = try p.readUleb128(At); + const form = try p.readUleb128(Form); + return if (at == 0 and form == 0) null else .{ + .at = at, + .form = form, + .pos = pos, + .len = p.pos - pos, + }; + } + + pub fn readByte(p: *AbbrevReader) !u8 { + if (p.pos + 1 > p.bytes.len) return error.Eof; + defer p.pos += 1; + return p.bytes[p.pos]; + } + + pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type { + var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); + var creader = std.io.countingReader(stream.reader()); + const value: Type = try leb.readUleb128(Type, creader.reader()); + p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; + return value; + } + + pub fn seekTo(p: *AbbrevReader, off: u64) !void { + p.pos = math.cast(usize, off) orelse return error.Overflow; + } +}; + +const AbbrevDecl = struct { + code: Code, + pos: usize, + len: usize, + tag: Tag, + has_children: bool, +}; + +const AbbrevAttr = struct { + at: At, + form: Form, + pos: usize, + len: usize, +}; + +const CompileUnitHeader = struct { + format: DwarfFormat, + length: u64, + version: u16, + debug_abbrev_offset: u64, + address_size: u8, +}; + +const Die = struct { + pos: usize, + len: usize, +}; + +const DwarfFormat = enum { + dwarf32, + dwarf64, +}; + +const dwarf = std.dwarf; +const leb = std.leb; +const log = std.log.scoped(.link); +const math = std.math; +const mem = std.mem; +const std = @import("std"); + +const At = u64; +const Code = u64; +const Form = u64; +const Tag = u64; + +pub const AT = dwarf.AT; +pub const FORM = dwarf.FORM; +pub const TAG = dwarf.TAG; diff --git a/src/link/MachO/dwarf.zig b/src/link/MachO/dwarf.zig deleted file mode 100644 index c9db7ed7b7..0000000000 --- a/src/link/MachO/dwarf.zig +++ /dev/null @@ -1,286 +0,0 @@ -pub const InfoReader = struct { - bytes: []const u8, - strtab: []const u8, - pos: usize = 0, - - pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader { - var length: u64 = try p.readInt(u32); - const is_64bit = length == 0xffffffff; - if (is_64bit) { - length = try p.readInt(u64); - } - const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32; - return .{ - .format = dw_fmt, - .length = length, - .version = try p.readInt(u16), - .debug_abbrev_offset = try p.readOffset(dw_fmt), - .address_size = try p.readByte(), - }; - } - - pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void { - const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow; - const end_pos = p.pos + switch (cuh.format) { - .dwarf32 => @as(usize, 4), - .dwarf64 => 12, - } + cuh_length; - while (p.pos < end_pos) { - const di_code = try p.readUleb128(u64); - if (di_code == 0) return error.Eof; - if (di_code == code) return; - - while (try abbrev_reader.readAttr()) |attr| switch (attr.at) { - dwarf.FORM.sec_offset, - dwarf.FORM.ref_addr, - => { - _ = try p.readOffset(cuh.format); - }, - - dwarf.FORM.addr => { - _ = try p.readNBytes(cuh.address_size); - }, - - dwarf.FORM.block1, - dwarf.FORM.block2, - dwarf.FORM.block4, - dwarf.FORM.block, - => { - _ = try p.readBlock(attr.form); - }, - - dwarf.FORM.exprloc => { - _ = try p.readExprLoc(); - }, - - dwarf.FORM.flag_present => {}, - - dwarf.FORM.data1, - dwarf.FORM.ref1, - dwarf.FORM.flag, - dwarf.FORM.data2, - dwarf.FORM.ref2, - dwarf.FORM.data4, - dwarf.FORM.ref4, - dwarf.FORM.data8, - dwarf.FORM.ref8, - dwarf.FORM.ref_sig8, - dwarf.FORM.udata, - dwarf.FORM.ref_udata, - dwarf.FORM.sdata, - => { - _ = try p.readConstant(attr.form); - }, - - dwarf.FORM.strp, - dwarf.FORM.string, - => { - _ = try p.readString(attr.form, cuh); - }, - - else => { - // TODO better errors - log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form}); - return error.UnhandledDwFormValue; - }, - }; - } - } - - pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 { - const len: u64 = switch (form) { - dwarf.FORM.block1 => try p.readByte(), - dwarf.FORM.block2 => try p.readInt(u16), - dwarf.FORM.block4 => try p.readInt(u32), - dwarf.FORM.block => try p.readUleb128(u64), - else => unreachable, - }; - return p.readNBytes(len); - } - - pub fn readExprLoc(p: *InfoReader) ![]const u8 { - const len: u64 = try p.readUleb128(u64); - return p.readNBytes(len); - } - - pub fn readConstant(p: *InfoReader, form: Form) !u64 { - return switch (form) { - dwarf.FORM.data1, dwarf.FORM.ref1, dwarf.FORM.flag => try p.readByte(), - dwarf.FORM.data2, dwarf.FORM.ref2 => try p.readInt(u16), - dwarf.FORM.data4, dwarf.FORM.ref4 => try p.readInt(u32), - dwarf.FORM.data8, dwarf.FORM.ref8, dwarf.FORM.ref_sig8 => try p.readInt(u64), - dwarf.FORM.udata, dwarf.FORM.ref_udata => try p.readUleb128(u64), - dwarf.FORM.sdata => @bitCast(try p.readIleb128(i64)), - else => return error.UnhandledConstantForm, - }; - } - - pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 { - switch (form) { - dwarf.FORM.strp => { - const off = try p.readOffset(cuh.format); - const off_u = math.cast(usize, off) orelse return error.Overflow; - return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.strtab.ptr + off_u)), 0); - }, - dwarf.FORM.string => { - const start = p.pos; - while (p.pos < p.bytes.len) : (p.pos += 1) { - if (p.bytes[p.pos] == 0) break; - } - if (p.bytes[p.pos] != 0) return error.Eof; - return p.bytes[start..p.pos :0]; - }, - else => unreachable, - } - } - - pub fn readByte(p: *InfoReader) !u8 { - if (p.pos + 1 > p.bytes.len) return error.Eof; - defer p.pos += 1; - return p.bytes[p.pos]; - } - - pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 { - const num_usize = math.cast(usize, num) orelse return error.Overflow; - if (p.pos + num_usize > p.bytes.len) return error.Eof; - defer p.pos += num_usize; - return p.bytes[p.pos..][0..num_usize]; - } - - pub fn readInt(p: *InfoReader, comptime Int: type) !Int { - if (p.pos + @sizeOf(Int) > p.bytes.len) return error.Eof; - defer p.pos += @sizeOf(Int); - return mem.readInt(Int, p.bytes[p.pos..][0..@sizeOf(Int)], .little); - } - - pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 { - return switch (dw_fmt) { - .dwarf32 => try p.readInt(u32), - .dwarf64 => try p.readInt(u64), - }; - } - - pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type { - var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); - var creader = std.io.countingReader(stream.reader()); - const value: Type = try leb.readUleb128(Type, creader.reader()); - p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; - return value; - } - - pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type { - var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); - var creader = std.io.countingReader(stream.reader()); - const value: Type = try leb.readIleb128(Type, creader.reader()); - p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; - return value; - } - - pub fn seekTo(p: *InfoReader, off: u64) !void { - p.pos = math.cast(usize, off) orelse return error.Overflow; - } -}; - -pub const AbbrevReader = struct { - bytes: []const u8, - pos: usize = 0, - - pub fn hasMore(p: AbbrevReader) bool { - return p.pos < p.bytes.len; - } - - pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl { - const pos = p.pos; - const code = try p.readUleb128(Code); - if (code == 0) return null; - - const tag = try p.readUleb128(Tag); - const has_children = (try p.readByte()) > 0; - return .{ - .code = code, - .pos = pos, - .len = p.pos - pos, - .tag = tag, - .has_children = has_children, - }; - } - - pub fn readAttr(p: *AbbrevReader) !?AbbrevAttr { - const pos = p.pos; - const at = try p.readUleb128(At); - const form = try p.readUleb128(Form); - return if (at == 0 and form == 0) null else .{ - .at = at, - .form = form, - .pos = pos, - .len = p.pos - pos, - }; - } - - pub fn readByte(p: *AbbrevReader) !u8 { - if (p.pos + 1 > p.bytes.len) return error.Eof; - defer p.pos += 1; - return p.bytes[p.pos]; - } - - pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type { - var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); - var creader = std.io.countingReader(stream.reader()); - const value: Type = try leb.readUleb128(Type, creader.reader()); - p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; - return value; - } - - pub fn seekTo(p: *AbbrevReader, off: u64) !void { - p.pos = math.cast(usize, off) orelse return error.Overflow; - } -}; - -const AbbrevDecl = struct { - code: Code, - pos: usize, - len: usize, - tag: Tag, - has_children: bool, -}; - -const AbbrevAttr = struct { - at: At, - form: Form, - pos: usize, - len: usize, -}; - -const CompileUnitHeader = struct { - format: DwarfFormat, - length: u64, - version: u16, - debug_abbrev_offset: u64, - address_size: u8, -}; - -const Die = struct { - pos: usize, - len: usize, -}; - -const DwarfFormat = enum { - dwarf32, - dwarf64, -}; - -const dwarf = std.dwarf; -const leb = std.leb; -const log = std.log.scoped(.link); -const math = std.math; -const mem = std.mem; -const std = @import("std"); - -const At = u64; -const Code = u64; -const Form = u64; -const Tag = u64; - -pub const AT = dwarf.AT; -pub const FORM = dwarf.FORM; -pub const TAG = dwarf.TAG; -- cgit v1.2.3 From 8e815000515182f06fa436668664e4329c407a3e Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 16 Oct 2024 12:28:13 +0200 Subject: macho: handle DWARFv5 when parsing debug info in objects --- src/link/MachO/Dwarf.zig | 300 ++++++++++++++++++++++++++++++++-------------- src/link/MachO/Object.zig | 226 +++++++++++++++------------------- 2 files changed, 301 insertions(+), 225 deletions(-) (limited to 'src/link') diff --git a/src/link/MachO/Dwarf.zig b/src/link/MachO/Dwarf.zig index c9db7ed7b7..d337605410 100644 --- a/src/link/MachO/Dwarf.zig +++ b/src/link/MachO/Dwarf.zig @@ -1,25 +1,73 @@ +debug_info: []u8 = &[0]u8{}, +debug_abbrev: []u8 = &[0]u8{}, +debug_str: []u8 = &[0]u8{}, +debug_str_offsets: []u8 = &[0]u8{}, + +pub fn deinit(dwarf: *Dwarf, allocator: Allocator) void { + allocator.free(dwarf.debug_info); + allocator.free(dwarf.debug_abbrev); + allocator.free(dwarf.debug_str); + allocator.free(dwarf.debug_str_offsets); +} + +/// Pulls an offset into __debug_str section from a __debug_str_offs section. +/// This is new in DWARFv5 and requires the producer to specify DW_FORM_strx* (`index` arg) +/// but also DW_AT_str_offsets_base with DW_FORM_sec_offset (`base` arg) in the opening header +/// of a "referencing entity" such as DW_TAG_compile_unit. +fn getOffset(debug_str_offsets: []const u8, base: u64, index: u64, dw_fmt: DwarfFormat) u64 { + return switch (dw_fmt) { + .dwarf32 => @as(*align(1) const u32, @ptrCast(debug_str_offsets.ptr + base + index * @sizeOf(u32))).*, + .dwarf64 => @as(*align(1) const u64, @ptrCast(debug_str_offsets.ptr + base + index * @sizeOf(u64))).*, + }; +} + pub const InfoReader = struct { - bytes: []const u8, - strtab: []const u8, + ctx: Dwarf, pos: usize = 0, - pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader { + fn bytes(p: InfoReader) []const u8 { + return p.ctx.debug_info; + } + + pub fn readCompileUnitHeader(p: *InfoReader, macho_file: *MachO) !CompileUnitHeader { + _ = macho_file; var length: u64 = try p.readInt(u32); const is_64bit = length == 0xffffffff; if (is_64bit) { length = try p.readInt(u64); } const dw_fmt: DwarfFormat = if (is_64bit) .dwarf64 else .dwarf32; + const version = try p.readInt(Version); + const rest: struct { + debug_abbrev_offset: u64, + address_size: u8, + unit_type: u8, + } = switch (version) { + 4 => .{ + .debug_abbrev_offset = try p.readOffset(dw_fmt), + .address_size = try p.readByte(), + .unit_type = 0, + }, + 5 => .{ + // According to the spec, version 5 introduced .unit_type field in the header, and + // it reordered .debug_abbrev_offset with .address_size fields. + .unit_type = try p.readByte(), + .address_size = try p.readByte(), + .debug_abbrev_offset = try p.readOffset(dw_fmt), + }, + else => return error.InvalidVersion, + }; return .{ .format = dw_fmt, .length = length, - .version = try p.readInt(u16), - .debug_abbrev_offset = try p.readOffset(dw_fmt), - .address_size = try p.readByte(), + .version = version, + .debug_abbrev_offset = rest.debug_abbrev_offset, + .address_size = rest.address_size, + .unit_type = rest.unit_type, }; } - pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void { + pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader, macho_file: *MachO) !void { const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow; const end_pos = p.pos + switch (cuh.format) { .dwarf32 => @as(usize, 4), @@ -27,72 +75,100 @@ pub const InfoReader = struct { } + cuh_length; while (p.pos < end_pos) { const di_code = try p.readUleb128(u64); - if (di_code == 0) return error.Eof; + if (di_code == 0) return error.UnexpectedEndOfFile; if (di_code == code) return; - while (try abbrev_reader.readAttr()) |attr| switch (attr.at) { - dwarf.FORM.sec_offset, - dwarf.FORM.ref_addr, - => { - _ = try p.readOffset(cuh.format); - }, + while (try abbrev_reader.readAttr()) |attr| { + try p.skip(attr.form, cuh, macho_file); + } + } + return error.UnexpectedEndOfFile; + } - dwarf.FORM.addr => { - _ = try p.readNBytes(cuh.address_size); - }, + /// When skipping attributes, we don't really need to be able to handle them all + /// since we only ever care about the DW_TAG_compile_unit. + pub fn skip(p: *InfoReader, form: Form, cuh: CompileUnitHeader, macho_file: *MachO) !void { + _ = macho_file; + switch (form) { + dw.FORM.sec_offset, + dw.FORM.ref_addr, + => { + _ = try p.readOffset(cuh.format); + }, - dwarf.FORM.block1, - dwarf.FORM.block2, - dwarf.FORM.block4, - dwarf.FORM.block, - => { - _ = try p.readBlock(attr.form); - }, + dw.FORM.addr => { + _ = try p.readNBytes(cuh.address_size); + }, - dwarf.FORM.exprloc => { - _ = try p.readExprLoc(); - }, + dw.FORM.block1, + dw.FORM.block2, + dw.FORM.block4, + dw.FORM.block, + => { + _ = try p.readBlock(form); + }, + + dw.FORM.exprloc => { + _ = try p.readExprLoc(); + }, - dwarf.FORM.flag_present => {}, - - dwarf.FORM.data1, - dwarf.FORM.ref1, - dwarf.FORM.flag, - dwarf.FORM.data2, - dwarf.FORM.ref2, - dwarf.FORM.data4, - dwarf.FORM.ref4, - dwarf.FORM.data8, - dwarf.FORM.ref8, - dwarf.FORM.ref_sig8, - dwarf.FORM.udata, - dwarf.FORM.ref_udata, - dwarf.FORM.sdata, + dw.FORM.flag_present => {}, + + dw.FORM.data1, + dw.FORM.ref1, + dw.FORM.flag, + dw.FORM.data2, + dw.FORM.ref2, + dw.FORM.data4, + dw.FORM.ref4, + dw.FORM.data8, + dw.FORM.ref8, + dw.FORM.ref_sig8, + dw.FORM.udata, + dw.FORM.ref_udata, + dw.FORM.sdata, + => { + _ = try p.readConstant(form); + }, + + dw.FORM.strp, + dw.FORM.string, + => { + _ = try p.readString(form, cuh); + }, + + else => if (cuh.version >= 5) switch (form) { + dw.FORM.strx, + dw.FORM.strx1, + dw.FORM.strx2, + dw.FORM.strx3, + dw.FORM.strx4, => { - _ = try p.readConstant(attr.form); + // We are just iterating over the __debug_info data, so we don't care about an actual + // string, therefore we set the `base = 0`. + _ = try p.readStringIndexed(form, cuh, 0); }, - dwarf.FORM.strp, - dwarf.FORM.string, + dw.FORM.addrx, + dw.FORM.addrx1, + dw.FORM.addrx2, + dw.FORM.addrx3, + dw.FORM.addrx4, => { - _ = try p.readString(attr.form, cuh); + _ = try p.readIndex(form); }, - else => { - // TODO better errors - log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form}); - return error.UnhandledDwFormValue; - }, - }; + else => return error.UnknownForm, + } else return error.UnknownForm, } } pub fn readBlock(p: *InfoReader, form: Form) ![]const u8 { const len: u64 = switch (form) { - dwarf.FORM.block1 => try p.readByte(), - dwarf.FORM.block2 => try p.readInt(u16), - dwarf.FORM.block4 => try p.readInt(u32), - dwarf.FORM.block => try p.readUleb128(u64), + dw.FORM.block1 => try p.readByte(), + dw.FORM.block2 => try p.readInt(u16), + dw.FORM.block4 => try p.readInt(u32), + dw.FORM.block => try p.readUleb128(u64), else => unreachable, }; return p.readNBytes(len); @@ -105,52 +181,79 @@ pub const InfoReader = struct { pub fn readConstant(p: *InfoReader, form: Form) !u64 { return switch (form) { - dwarf.FORM.data1, dwarf.FORM.ref1, dwarf.FORM.flag => try p.readByte(), - dwarf.FORM.data2, dwarf.FORM.ref2 => try p.readInt(u16), - dwarf.FORM.data4, dwarf.FORM.ref4 => try p.readInt(u32), - dwarf.FORM.data8, dwarf.FORM.ref8, dwarf.FORM.ref_sig8 => try p.readInt(u64), - dwarf.FORM.udata, dwarf.FORM.ref_udata => try p.readUleb128(u64), - dwarf.FORM.sdata => @bitCast(try p.readIleb128(i64)), + dw.FORM.data1, dw.FORM.ref1, dw.FORM.flag => try p.readByte(), + dw.FORM.data2, dw.FORM.ref2 => try p.readInt(u16), + dw.FORM.data4, dw.FORM.ref4 => try p.readInt(u32), + dw.FORM.data8, dw.FORM.ref8, dw.FORM.ref_sig8 => try p.readInt(u64), + dw.FORM.udata, dw.FORM.ref_udata => try p.readUleb128(u64), + dw.FORM.sdata => @bitCast(try p.readIleb128(i64)), else => return error.UnhandledConstantForm, }; } + pub fn readIndex(p: *InfoReader, form: Form) !u64 { + return switch (form) { + dw.FORM.strx1, dw.FORM.addrx1 => try p.readByte(), + dw.FORM.strx2, dw.FORM.addrx2 => try p.readInt(u16), + dw.FORM.strx3, dw.FORM.addrx3 => error.UnhandledDwForm, + dw.FORM.strx4, dw.FORM.addrx4 => try p.readInt(u32), + dw.FORM.strx, dw.FORM.addrx => try p.readUleb128(u64), + else => return error.UnhandledIndexForm, + }; + } + pub fn readString(p: *InfoReader, form: Form, cuh: CompileUnitHeader) ![:0]const u8 { switch (form) { - dwarf.FORM.strp => { + dw.FORM.strp => { const off = try p.readOffset(cuh.format); const off_u = math.cast(usize, off) orelse return error.Overflow; - return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.strtab.ptr + off_u)), 0); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off_u)), 0); }, - dwarf.FORM.string => { + dw.FORM.string => { const start = p.pos; - while (p.pos < p.bytes.len) : (p.pos += 1) { - if (p.bytes[p.pos] == 0) break; + while (p.pos < p.bytes().len) : (p.pos += 1) { + if (p.bytes()[p.pos] == 0) break; } - if (p.bytes[p.pos] != 0) return error.Eof; - return p.bytes[start..p.pos :0]; + if (p.bytes()[p.pos] != 0) return error.UnexpectedEndOfFile; + return p.bytes()[start..p.pos :0]; + }, + else => unreachable, + } + } + + pub fn readStringIndexed(p: *InfoReader, form: Form, cuh: CompileUnitHeader, base: u64) ![:0]const u8 { + switch (form) { + dw.FORM.strx, + dw.FORM.strx1, + dw.FORM.strx2, + dw.FORM.strx3, + dw.FORM.strx4, + => { + const index = try p.readIndex(form); + const off = getOffset(p.ctx.debug_str_offsets, base, index, cuh.format); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off)), 0); }, else => unreachable, } } pub fn readByte(p: *InfoReader) !u8 { - if (p.pos + 1 > p.bytes.len) return error.Eof; + if (p.pos + 1 > p.bytes().len) return error.UnexpectedEndOfFile; defer p.pos += 1; - return p.bytes[p.pos]; + return p.bytes()[p.pos]; } pub fn readNBytes(p: *InfoReader, num: u64) ![]const u8 { const num_usize = math.cast(usize, num) orelse return error.Overflow; - if (p.pos + num_usize > p.bytes.len) return error.Eof; + if (p.pos + num_usize > p.bytes().len) return error.UnexpectedEndOfFile; defer p.pos += num_usize; - return p.bytes[p.pos..][0..num_usize]; + return p.bytes()[p.pos..][0..num_usize]; } pub fn readInt(p: *InfoReader, comptime Int: type) !Int { - if (p.pos + @sizeOf(Int) > p.bytes.len) return error.Eof; + if (p.pos + @sizeOf(Int) > p.bytes().len) return error.UnexpectedEndOfFile; defer p.pos += @sizeOf(Int); - return mem.readInt(Int, p.bytes[p.pos..][0..@sizeOf(Int)], .little); + return mem.readInt(Int, p.bytes()[p.pos..][0..@sizeOf(Int)], .little); } pub fn readOffset(p: *InfoReader, dw_fmt: DwarfFormat) !u64 { @@ -161,7 +264,7 @@ pub const InfoReader = struct { } pub fn readUleb128(p: *InfoReader, comptime Type: type) !Type { - var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); + var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]); var creader = std.io.countingReader(stream.reader()); const value: Type = try leb.readUleb128(Type, creader.reader()); p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; @@ -169,7 +272,7 @@ pub const InfoReader = struct { } pub fn readIleb128(p: *InfoReader, comptime Type: type) !Type { - var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); + var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]); var creader = std.io.countingReader(stream.reader()); const value: Type = try leb.readIleb128(Type, creader.reader()); p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; @@ -182,11 +285,15 @@ pub const InfoReader = struct { }; pub const AbbrevReader = struct { - bytes: []const u8, + ctx: Dwarf, pos: usize = 0, + fn bytes(p: AbbrevReader) []const u8 { + return p.ctx.debug_abbrev; + } + pub fn hasMore(p: AbbrevReader) bool { - return p.pos < p.bytes.len; + return p.pos < p.bytes().len; } pub fn readDecl(p: *AbbrevReader) !?AbbrevDecl { @@ -218,13 +325,13 @@ pub const AbbrevReader = struct { } pub fn readByte(p: *AbbrevReader) !u8 { - if (p.pos + 1 > p.bytes.len) return error.Eof; + if (p.pos + 1 > p.bytes().len) return error.Eof; defer p.pos += 1; - return p.bytes[p.pos]; + return p.bytes()[p.pos]; } pub fn readUleb128(p: *AbbrevReader, comptime Type: type) !Type { - var stream = std.io.fixedBufferStream(p.bytes[p.pos..]); + var stream = std.io.fixedBufferStream(p.bytes()[p.pos..]); var creader = std.io.countingReader(stream.reader()); const value: Type = try leb.readUleb128(Type, creader.reader()); p.pos += math.cast(usize, creader.bytes_read) orelse return error.Overflow; @@ -254,9 +361,10 @@ const AbbrevAttr = struct { const CompileUnitHeader = struct { format: DwarfFormat, length: u64, - version: u16, + version: Version, debug_abbrev_offset: u64, address_size: u8, + unit_type: u8, }; const Die = struct { @@ -269,18 +377,24 @@ const DwarfFormat = enum { dwarf64, }; -const dwarf = std.dwarf; +const dw = std.dwarf; const leb = std.leb; const log = std.log.scoped(.link); const math = std.math; const mem = std.mem; const std = @import("std"); - -const At = u64; -const Code = u64; -const Form = u64; -const Tag = u64; - -pub const AT = dwarf.AT; -pub const FORM = dwarf.FORM; -pub const TAG = dwarf.TAG; +const Allocator = mem.Allocator; +const Dwarf = @This(); +const File = @import("file.zig").File; +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); + +pub const At = u64; +pub const Code = u64; +pub const Form = u64; +pub const Tag = u64; +pub const Version = u16; + +pub const AT = dw.AT; +pub const FORM = dw.FORM; +pub const TAG = dw.TAG; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 3efc0a1e5a..23c35a4181 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1359,151 +1359,102 @@ fn parseDebugInfo(self: *Object, macho_file: *MachO) !void { defer tracy.end(); const gpa = macho_file.base.comp.gpa; + const file = macho_file.getFileHandle(self.file_handle); - var debug_info_index: ?usize = null; - var debug_abbrev_index: ?usize = null; - var debug_str_index: ?usize = null; + var dwarf: Dwarf = .{}; + defer dwarf.deinit(gpa); for (self.sections.items(.header), 0..) |sect, index| { + const n_sect: u8 = @intCast(index); if (sect.attrs() & macho.S_ATTR_DEBUG == 0) continue; - if (mem.eql(u8, sect.sectName(), "__debug_info")) debug_info_index = index; - if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) debug_abbrev_index = index; - if (mem.eql(u8, sect.sectName(), "__debug_str")) debug_str_index = index; + if (mem.eql(u8, sect.sectName(), "__debug_info")) { + dwarf.debug_info = try self.readSectionData(gpa, file, n_sect); + } + if (mem.eql(u8, sect.sectName(), "__debug_abbrev")) { + dwarf.debug_abbrev = try self.readSectionData(gpa, file, n_sect); + } + if (mem.eql(u8, sect.sectName(), "__debug_str")) { + dwarf.debug_str = try self.readSectionData(gpa, file, n_sect); + } + if (mem.eql(u8, sect.sectName(), "__debug_str_offs")) { + dwarf.debug_str_offsets = try self.readSectionData(gpa, file, n_sect); + } } - if (debug_info_index == null or debug_abbrev_index == null) return; - - const slice = self.sections.slice(); - const file = macho_file.getFileHandle(self.file_handle); - const debug_info = blk: { - const sect = slice.items(.header)[debug_info_index.?]; - const size = math.cast(usize, sect.size) orelse return error.Overflow; - const data = try gpa.alloc(u8, size); - const amt = try file.preadAll(data, sect.offset + self.offset); - if (amt != data.len) return error.InputOutput; - break :blk data; - }; - defer gpa.free(debug_info); - const debug_abbrev = blk: { - const sect = slice.items(.header)[debug_abbrev_index.?]; - const size = math.cast(usize, sect.size) orelse return error.Overflow; - const data = try gpa.alloc(u8, size); - const amt = try file.preadAll(data, sect.offset + self.offset); - if (amt != data.len) return error.InputOutput; - break :blk data; - }; - defer gpa.free(debug_abbrev); - const debug_str = if (debug_str_index) |sid| blk: { - const sect = slice.items(.header)[sid]; - const size = math.cast(usize, sect.size) orelse return error.Overflow; - const data = try gpa.alloc(u8, size); - const amt = try file.preadAll(data, sect.offset + self.offset); - if (amt != data.len) return error.InputOutput; - break :blk data; - } else &[0]u8{}; - defer gpa.free(debug_str); - - self.compile_unit = self.findCompileUnit(.{ - .gpa = gpa, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - }) catch null; // TODO figure out what errors are fatal, and when we silently fail -} - -fn findCompileUnit(self: *Object, args: struct { - gpa: Allocator, - debug_info: []const u8, - debug_abbrev: []const u8, - debug_str: []const u8, -}) !CompileUnit { - var cu_wip: struct { - comp_dir: ?[:0]const u8 = null, - tu_name: ?[:0]const u8 = null, - } = .{}; - - const gpa = args.gpa; - var info_reader = dwarf.InfoReader{ .bytes = args.debug_info, .strtab = args.debug_str }; - var abbrev_reader = dwarf.AbbrevReader{ .bytes = args.debug_abbrev }; - - const cuh = try info_reader.readCompileUnitHeader(); - try abbrev_reader.seekTo(cuh.debug_abbrev_offset); - - const cu_decl = (try abbrev_reader.readDecl()) orelse return error.Eof; - if (cu_decl.tag != dwarf.TAG.compile_unit) return error.UnexpectedTag; - - try info_reader.seekToDie(cu_decl.code, cuh, &abbrev_reader); - - while (try abbrev_reader.readAttr()) |attr| switch (attr.at) { - dwarf.AT.name => { - cu_wip.tu_name = try info_reader.readString(attr.form, cuh); - }, - dwarf.AT.comp_dir => { - cu_wip.comp_dir = try info_reader.readString(attr.form, cuh); - }, - else => switch (attr.form) { - dwarf.FORM.sec_offset, - dwarf.FORM.ref_addr, - => { - _ = try info_reader.readOffset(cuh.format); - }, + if (dwarf.debug_info.len == 0) return; - dwarf.FORM.addr => { - _ = try info_reader.readNBytes(cuh.address_size); - }, + self.compile_unit = try self.findCompileUnit(gpa, dwarf, macho_file); +} - dwarf.FORM.block1, - dwarf.FORM.block2, - dwarf.FORM.block4, - dwarf.FORM.block, - => { - _ = try info_reader.readBlock(attr.form); - }, +fn findCompileUnit(self: *Object, gpa: Allocator, ctx: Dwarf, macho_file: *MachO) !CompileUnit { + var info_reader = Dwarf.InfoReader{ .ctx = ctx }; + var abbrev_reader = Dwarf.AbbrevReader{ .ctx = ctx }; - dwarf.FORM.exprloc => { - _ = try info_reader.readExprLoc(); - }, + const cuh = try info_reader.readCompileUnitHeader(macho_file); + try abbrev_reader.seekTo(cuh.debug_abbrev_offset); - dwarf.FORM.flag_present => {}, - - dwarf.FORM.data1, - dwarf.FORM.ref1, - dwarf.FORM.flag, - dwarf.FORM.data2, - dwarf.FORM.ref2, - dwarf.FORM.data4, - dwarf.FORM.ref4, - dwarf.FORM.data8, - dwarf.FORM.ref8, - dwarf.FORM.ref_sig8, - dwarf.FORM.udata, - dwarf.FORM.ref_udata, - dwarf.FORM.sdata, - => { - _ = try info_reader.readConstant(attr.form); - }, + const cu_decl = (try abbrev_reader.readDecl()) orelse return error.UnexpectedEndOfFile; + if (cu_decl.tag != Dwarf.TAG.compile_unit) return error.UnexpectedTag; - dwarf.FORM.strp, - dwarf.FORM.string, - => { - _ = try info_reader.readString(attr.form, cuh); - }, + try info_reader.seekToDie(cu_decl.code, cuh, &abbrev_reader, macho_file); - else => { - // TODO actual errors? - log.err("unhandled DW_FORM_* value with identifier {x}", .{attr.form}); - return error.UnhandledForm; - }, - }, + const Pos = struct { + pos: usize, + form: Dwarf.Form, }; - - if (cu_wip.comp_dir == null) return error.MissingCompDir; - if (cu_wip.tu_name == null) return error.MissingTuName; - - return .{ - .comp_dir = try self.addString(gpa, cu_wip.comp_dir.?), - .tu_name = try self.addString(gpa, cu_wip.tu_name.?), + var saved: struct { + tu_name: ?Pos, + comp_dir: ?Pos, + str_offsets_base: ?Pos, + } = .{ + .tu_name = null, + .comp_dir = null, + .str_offsets_base = null, }; + while (try abbrev_reader.readAttr()) |attr| { + const pos: Pos = .{ .pos = info_reader.pos, .form = attr.form }; + switch (attr.at) { + Dwarf.AT.name => saved.tu_name = pos, + Dwarf.AT.comp_dir => saved.comp_dir = pos, + Dwarf.AT.str_offsets_base => saved.str_offsets_base = pos, + else => {}, + } + try info_reader.skip(attr.form, cuh, macho_file); + } + + if (saved.comp_dir == null) return error.MissingCompDir; + if (saved.tu_name == null) return error.MissingTuName; + + const str_offsets_base: ?u64 = if (saved.str_offsets_base) |str_offsets_base| str_offsets_base: { + try info_reader.seekTo(str_offsets_base.pos); + break :str_offsets_base try info_reader.readOffset(cuh.format); + } else null; + + var cu: CompileUnit = .{ .comp_dir = .{}, .tu_name = .{} }; + for (&[_]struct { Pos, *MachO.String }{ + .{ saved.comp_dir.?, &cu.comp_dir }, + .{ saved.tu_name.?, &cu.tu_name }, + }) |tuple| { + const pos, const str_offset_ptr = tuple; + try info_reader.seekTo(pos.pos); + str_offset_ptr.* = switch (pos.form) { + Dwarf.FORM.strp, + Dwarf.FORM.string, + => try self.addString(gpa, try info_reader.readString(pos.form, cuh)), + Dwarf.FORM.strx, + Dwarf.FORM.strx1, + Dwarf.FORM.strx2, + Dwarf.FORM.strx3, + Dwarf.FORM.strx4, + => blk: { + const base = str_offsets_base orelse return error.MalformedDwarf; + break :blk try self.addString(gpa, try info_reader.readStringIndexed(pos.form, cuh, base)); + }, + else => return error.InvalidForm, + }; + } + + return cu; } pub fn resolveSymbols(self: *Object, macho_file: *MachO) !void { @@ -2561,6 +2512,17 @@ pub fn getUnwindRecord(self: *Object, index: UnwindInfo.Record.Index) *UnwindInf return &self.unwind_records.items[index]; } +/// Caller owns the memory. +pub fn readSectionData(self: Object, allocator: Allocator, file: File.Handle, n_sect: u8) ![]u8 { + const header = self.sections.items(.header)[n_sect]; + const size = math.cast(usize, header.size) orelse return error.Overflow; + const data = try allocator.alloc(u8, size); + const amt = try file.preadAll(data, header.offset + self.offset); + errdefer allocator.free(data); + if (amt != data.len) return error.InputOutput; + return data; +} + pub fn format( self: *Object, comptime unused_fmt_string: []const u8, @@ -3219,7 +3181,6 @@ const aarch64 = struct { }; const assert = std.debug.assert; -const dwarf = @import("dwarf.zig"); const eh_frame = @import("eh_frame.zig"); const log = std.log.scoped(.link); const macho = std.macho; @@ -3233,6 +3194,7 @@ const Allocator = mem.Allocator; const Archive = @import("Archive.zig"); const Atom = @import("Atom.zig"); const Cie = eh_frame.Cie; +const Dwarf = @import("Dwarf.zig"); const Fde = eh_frame.Fde; const File = @import("file.zig").File; const LoadCommandIterator = macho.LoadCommandIterator; -- cgit v1.2.3 From c824b350511780581c0e5c1da85d0d9d769701ea Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 16 Oct 2024 12:43:31 +0200 Subject: macho: move things around in MachO/Object.zig and refactor --- src/link/MachO/Object.zig | 58 +++++++++++++---------------------------------- 1 file changed, 16 insertions(+), 42 deletions(-) (limited to 'src/link') diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 23c35a4181..eb997c01e4 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -443,11 +443,8 @@ fn initCstringLiterals(self: *Object, allocator: Allocator, file: File.Handle, m for (slice.items(.header), 0..) |sect, n_sect| { if (!isCstringLiteral(sect)) continue; - const sect_size = math.cast(usize, sect.size) orelse return error.Overflow; - const data = try allocator.alloc(u8, sect_size); + const data = try self.readSectionData(allocator, file, @intCast(n_sect)); defer allocator.free(data); - const amt = try file.preadAll(data, sect.offset + self.offset); - if (amt != data.len) return error.InputOutput; var count: u32 = 0; var start: u32 = 0; @@ -646,13 +643,10 @@ pub fn resolveLiterals(self: *Object, lp: *MachO.LiteralPool, macho_file: *MachO } const slice = self.sections.slice(); - for (slice.items(.header), slice.items(.subsections)) |header, subs| { + for (slice.items(.header), slice.items(.subsections), 0..) |header, subs, n_sect| { if (isCstringLiteral(header) or isFixedSizeLiteral(header)) { - const sect_size = math.cast(usize, header.size) orelse return error.Overflow; - const data = try gpa.alloc(u8, sect_size); + const data = try self.readSectionData(gpa, file, @intCast(n_sect)); defer gpa.free(data); - const amt = try file.preadAll(data, header.offset + self.offset); - if (amt != data.len) return error.InputOutput; for (subs.items) |sub| { const atom = self.getAtom(sub.atom).?; @@ -686,12 +680,7 @@ pub fn resolveLiterals(self: *Object, lp: *MachO.LiteralPool, macho_file: *MachO buffer.resize(target_size) catch unreachable; const gop = try sections_data.getOrPut(target.n_sect); if (!gop.found_existing) { - const target_sect = slice.items(.header)[target.n_sect]; - const target_sect_size = math.cast(usize, target_sect.size) orelse return error.Overflow; - const data = try gpa.alloc(u8, target_sect_size); - const amt = try file.preadAll(data, target_sect.offset + self.offset); - if (amt != data.len) return error.InputOutput; - gop.value_ptr.* = data; + gop.value_ptr.* = try self.readSectionData(gpa, file, @intCast(target.n_sect)); } const data = gop.value_ptr.*; const target_off = math.cast(usize, target.off) orelse return error.Overflow; @@ -1000,7 +989,7 @@ fn initRelocs(self: *Object, file: File.Handle, cpu_arch: std.Target.Cpu.Arch, m defer tracy.end(); const slice = self.sections.slice(); - for (slice.items(.header), slice.items(.relocs)) |sect, *out| { + for (slice.items(.header), slice.items(.relocs), 0..) |sect, *out, n_sect| { if (sect.nreloc == 0) continue; // We skip relocs for __DWARF since even in -r mode, the linker is expected to emit // debug symbol stabs in the relocatable. This made me curious why that is. For now, @@ -1009,8 +998,8 @@ fn initRelocs(self: *Object, file: File.Handle, cpu_arch: std.Target.Cpu.Arch, m !mem.eql(u8, sect.sectName(), "__compact_unwind")) continue; switch (cpu_arch) { - .x86_64 => try x86_64.parseRelocs(self, sect, out, file, macho_file), - .aarch64 => try aarch64.parseRelocs(self, sect, out, file, macho_file), + .x86_64 => try x86_64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file), + .aarch64 => try aarch64.parseRelocs(self, @intCast(n_sect), sect, out, file, macho_file), else => unreachable, } @@ -1146,11 +1135,8 @@ fn initUnwindRecords(self: *Object, allocator: Allocator, sect_id: u8, file: Fil }; const header = self.sections.items(.header)[sect_id]; - const size = math.cast(usize, header.size) orelse return error.Overflow; - const data = try allocator.alloc(u8, size); + const data = try self.readSectionData(allocator, file, sect_id); defer allocator.free(data); - const amt = try file.preadAll(data, header.offset + self.offset); - if (amt != data.len) return error.InputOutput; const nrecs = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); const recs = @as([*]align(1) const macho.compact_unwind_entry, @ptrCast(data.ptr))[0..nrecs]; @@ -2810,6 +2796,7 @@ const CompactUnwindCtx = struct { const x86_64 = struct { fn parseRelocs( self: *Object, + n_sect: u8, sect: macho.section_64, out: *std.ArrayListUnmanaged(Relocation), handle: File.Handle, @@ -2819,19 +2806,12 @@ const x86_64 = struct { const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); defer gpa.free(relocs_buffer); - { - const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset); - if (amt != relocs_buffer.len) return error.InputOutput; - } + const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset); + if (amt != relocs_buffer.len) return error.InputOutput; const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; - const sect_size = math.cast(usize, sect.size) orelse return error.Overflow; - const code = try gpa.alloc(u8, sect_size); + const code = try self.readSectionData(gpa, handle, n_sect); defer gpa.free(code); - { - const amt = try handle.preadAll(code, sect.offset + self.offset); - if (amt != code.len) return error.InputOutput; - } try out.ensureTotalCapacityPrecise(gpa, relocs.len); @@ -2983,6 +2963,7 @@ const x86_64 = struct { const aarch64 = struct { fn parseRelocs( self: *Object, + n_sect: u8, sect: macho.section_64, out: *std.ArrayListUnmanaged(Relocation), handle: File.Handle, @@ -2992,19 +2973,12 @@ const aarch64 = struct { const relocs_buffer = try gpa.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); defer gpa.free(relocs_buffer); - { - const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset); - if (amt != relocs_buffer.len) return error.InputOutput; - } + const amt = try handle.preadAll(relocs_buffer, sect.reloff + self.offset); + if (amt != relocs_buffer.len) return error.InputOutput; const relocs = @as([*]align(1) const macho.relocation_info, @ptrCast(relocs_buffer.ptr))[0..sect.nreloc]; - const sect_size = math.cast(usize, sect.size) orelse return error.Overflow; - const code = try gpa.alloc(u8, sect_size); + const code = try self.readSectionData(gpa, handle, n_sect); defer gpa.free(code); - { - const amt = try handle.preadAll(code, sect.offset + self.offset); - if (amt != code.len) return error.InputOutput; - } try out.ensureTotalCapacityPrecise(gpa, relocs.len); -- cgit v1.2.3 From 0769afbb0f2fbb9c72d97ec0bcdcaba0ac916341 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Wed, 16 Oct 2024 13:20:19 +0200 Subject: macho: refactors errors from parsing DWARF Currently we don't report any errors to the user due to a bug in self-hosted x86_64-macos backend. --- src/link/MachO/Dwarf.zig | 16 +++++++--------- src/link/MachO/Object.zig | 18 +++++++++++------- 2 files changed, 18 insertions(+), 16 deletions(-) (limited to 'src/link') diff --git a/src/link/MachO/Dwarf.zig b/src/link/MachO/Dwarf.zig index d337605410..0d4beadcd2 100644 --- a/src/link/MachO/Dwarf.zig +++ b/src/link/MachO/Dwarf.zig @@ -29,8 +29,7 @@ pub const InfoReader = struct { return p.ctx.debug_info; } - pub fn readCompileUnitHeader(p: *InfoReader, macho_file: *MachO) !CompileUnitHeader { - _ = macho_file; + pub fn readCompileUnitHeader(p: *InfoReader) !CompileUnitHeader { var length: u64 = try p.readInt(u32); const is_64bit = length == 0xffffffff; if (is_64bit) { @@ -67,7 +66,7 @@ pub const InfoReader = struct { }; } - pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader, macho_file: *MachO) !void { + pub fn seekToDie(p: *InfoReader, code: Code, cuh: CompileUnitHeader, abbrev_reader: *AbbrevReader) !void { const cuh_length = math.cast(usize, cuh.length) orelse return error.Overflow; const end_pos = p.pos + switch (cuh.format) { .dwarf32 => @as(usize, 4), @@ -79,7 +78,7 @@ pub const InfoReader = struct { if (di_code == code) return; while (try abbrev_reader.readAttr()) |attr| { - try p.skip(attr.form, cuh, macho_file); + try p.skip(attr.form, cuh); } } return error.UnexpectedEndOfFile; @@ -87,8 +86,7 @@ pub const InfoReader = struct { /// When skipping attributes, we don't really need to be able to handle them all /// since we only ever care about the DW_TAG_compile_unit. - pub fn skip(p: *InfoReader, form: Form, cuh: CompileUnitHeader, macho_file: *MachO) !void { - _ = macho_file; + pub fn skip(p: *InfoReader, form: Form, cuh: CompileUnitHeader) !void { switch (form) { dw.FORM.sec_offset, dw.FORM.ref_addr, @@ -158,8 +156,8 @@ pub const InfoReader = struct { _ = try p.readIndex(form); }, - else => return error.UnknownForm, - } else return error.UnknownForm, + else => return error.UnhandledForm, + } else return error.UnhandledForm, } } @@ -195,7 +193,7 @@ pub const InfoReader = struct { return switch (form) { dw.FORM.strx1, dw.FORM.addrx1 => try p.readByte(), dw.FORM.strx2, dw.FORM.addrx2 => try p.readInt(u16), - dw.FORM.strx3, dw.FORM.addrx3 => error.UnhandledDwForm, + dw.FORM.strx3, dw.FORM.addrx3 => error.UnhandledForm, dw.FORM.strx4, dw.FORM.addrx4 => try p.readInt(u32), dw.FORM.strx, dw.FORM.addrx => try p.readUleb128(u64), else => return error.UnhandledIndexForm, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index eb997c01e4..349ee99ca4 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1362,6 +1362,8 @@ fn parseDebugInfo(self: *Object, macho_file: *MachO) !void { if (mem.eql(u8, sect.sectName(), "__debug_str")) { dwarf.debug_str = try self.readSectionData(gpa, file, n_sect); } + // __debug_str_offs[ets] section is a new addition in DWARFv5 and is generally + // required in order to correctly parse strings. if (mem.eql(u8, sect.sectName(), "__debug_str_offs")) { dwarf.debug_str_offsets = try self.readSectionData(gpa, file, n_sect); } @@ -1369,20 +1371,22 @@ fn parseDebugInfo(self: *Object, macho_file: *MachO) !void { if (dwarf.debug_info.len == 0) return; - self.compile_unit = try self.findCompileUnit(gpa, dwarf, macho_file); + // TODO return error once we fix emitting DWARF in self-hosted backend. + // https://github.com/ziglang/zig/issues/21719 + self.compile_unit = self.findCompileUnit(gpa, dwarf) catch null; } -fn findCompileUnit(self: *Object, gpa: Allocator, ctx: Dwarf, macho_file: *MachO) !CompileUnit { +fn findCompileUnit(self: *Object, gpa: Allocator, ctx: Dwarf) !CompileUnit { var info_reader = Dwarf.InfoReader{ .ctx = ctx }; var abbrev_reader = Dwarf.AbbrevReader{ .ctx = ctx }; - const cuh = try info_reader.readCompileUnitHeader(macho_file); + const cuh = try info_reader.readCompileUnitHeader(); try abbrev_reader.seekTo(cuh.debug_abbrev_offset); const cu_decl = (try abbrev_reader.readDecl()) orelse return error.UnexpectedEndOfFile; if (cu_decl.tag != Dwarf.TAG.compile_unit) return error.UnexpectedTag; - try info_reader.seekToDie(cu_decl.code, cuh, &abbrev_reader, macho_file); + try info_reader.seekToDie(cu_decl.code, cuh, &abbrev_reader); const Pos = struct { pos: usize, @@ -1405,10 +1409,10 @@ fn findCompileUnit(self: *Object, gpa: Allocator, ctx: Dwarf, macho_file: *MachO Dwarf.AT.str_offsets_base => saved.str_offsets_base = pos, else => {}, } - try info_reader.skip(attr.form, cuh, macho_file); + try info_reader.skip(attr.form, cuh); } - if (saved.comp_dir == null) return error.MissingCompDir; + if (saved.comp_dir == null) return error.MissingCompileDir; if (saved.tu_name == null) return error.MissingTuName; const str_offsets_base: ?u64 = if (saved.str_offsets_base) |str_offsets_base| str_offsets_base: { @@ -1433,7 +1437,7 @@ fn findCompileUnit(self: *Object, gpa: Allocator, ctx: Dwarf, macho_file: *MachO Dwarf.FORM.strx3, Dwarf.FORM.strx4, => blk: { - const base = str_offsets_base orelse return error.MalformedDwarf; + const base = str_offsets_base orelse return error.MissingStrOffsetsBase; break :blk try self.addString(gpa, try info_reader.readStringIndexed(pos.form, cuh, base)); }, else => return error.InvalidForm, -- cgit v1.2.3 From 29c7f6810fe00cf69adf81d635d3410402b530e8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 18 Oct 2024 10:40:05 +0200 Subject: macho: fix 32bit builds --- src/link/MachO/Dwarf.zig | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'src/link') diff --git a/src/link/MachO/Dwarf.zig b/src/link/MachO/Dwarf.zig index 0d4beadcd2..fdc3f33bbc 100644 --- a/src/link/MachO/Dwarf.zig +++ b/src/link/MachO/Dwarf.zig @@ -14,10 +14,18 @@ pub fn deinit(dwarf: *Dwarf, allocator: Allocator) void { /// This is new in DWARFv5 and requires the producer to specify DW_FORM_strx* (`index` arg) /// but also DW_AT_str_offsets_base with DW_FORM_sec_offset (`base` arg) in the opening header /// of a "referencing entity" such as DW_TAG_compile_unit. -fn getOffset(debug_str_offsets: []const u8, base: u64, index: u64, dw_fmt: DwarfFormat) u64 { +fn getOffset(debug_str_offsets: []const u8, base: u64, index: u64, dw_fmt: DwarfFormat) error{Overflow}!u64 { + const base_as_usize = math.cast(usize, base) orelse return error.Overflow; + const index_as_usize = math.cast(usize, index) orelse return error.Overflow; return switch (dw_fmt) { - .dwarf32 => @as(*align(1) const u32, @ptrCast(debug_str_offsets.ptr + base + index * @sizeOf(u32))).*, - .dwarf64 => @as(*align(1) const u64, @ptrCast(debug_str_offsets.ptr + base + index * @sizeOf(u64))).*, + .dwarf32 => @as( + *align(1) const u32, + @ptrCast(debug_str_offsets.ptr + base_as_usize + index_as_usize * @sizeOf(u32)), + ).*, + .dwarf64 => @as( + *align(1) const u64, + @ptrCast(debug_str_offsets.ptr + base_as_usize + index_as_usize * @sizeOf(u64)), + ).*, }; } @@ -228,7 +236,10 @@ pub const InfoReader = struct { dw.FORM.strx4, => { const index = try p.readIndex(form); - const off = getOffset(p.ctx.debug_str_offsets, base, index, cuh.format); + const off = math.cast( + usize, + try getOffset(p.ctx.debug_str_offsets, base, index, cuh.format), + ) orelse return error.Overflow; return mem.sliceTo(@as([*:0]const u8, @ptrCast(p.ctx.debug_str.ptr + off)), 0); }, else => unreachable, -- cgit v1.2.3