diff options
| author | Andrew Kelley <superjoe30@gmail.com> | 2018-08-25 04:50:51 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-08-25 04:50:51 -0400 |
| commit | 4003cd4747019d79ff50aaa22415d2d3dfc15cf4 (patch) | |
| tree | 1f77690a5fb7ccbef75bcab9c8c1e008ef3c5068 /std | |
| parent | bf1f91595d4d3b5911632c671ef16e44d70dc9a6 (diff) | |
| parent | 815950996dcc92ac6ac285f2005dbac51b9cb6f8 (diff) | |
| download | zig-4003cd4747019d79ff50aaa22415d2d3dfc15cf4.tar.gz zig-4003cd4747019d79ff50aaa22415d2d3dfc15cf4.zip | |
Merge pull request #1406 from ziglang/macos-stack-traces
MacOS stack traces
closes #1365
Diffstat (limited to 'std')
| -rw-r--r-- | std/c/darwin.zig | 12 | ||||
| -rw-r--r-- | std/c/linux.zig | 3 | ||||
| -rw-r--r-- | std/debug/index.zig | 779 | ||||
| -rw-r--r-- | std/elf.zig | 5 | ||||
| -rw-r--r-- | std/hash_map.zig | 16 | ||||
| -rw-r--r-- | std/index.zig | 1 | ||||
| -rw-r--r-- | std/io.zig | 6 | ||||
| -rw-r--r-- | std/macho.zig | 468 | ||||
| -rw-r--r-- | std/os/index.zig | 29 |
9 files changed, 1023 insertions, 296 deletions
diff --git a/std/c/darwin.zig b/std/c/darwin.zig index 437b081cac..2e238e40eb 100644 --- a/std/c/darwin.zig +++ b/std/c/darwin.zig @@ -1,5 +1,8 @@ +const macho = @import("../macho.zig"); + extern "c" fn __error() *c_int; pub extern "c" fn _NSGetExecutablePath(buf: [*]u8, bufsize: *u32) c_int; +pub extern "c" fn _dyld_get_image_header(image_index: u32) ?*mach_header; pub extern "c" fn __getdirentries64(fd: c_int, buf_ptr: [*]u8, buf_len: usize, basep: *i64) usize; @@ -33,6 +36,15 @@ pub extern "c" fn sysctlnametomib(name: [*]const u8, mibp: ?*c_int, sizep: ?*usi pub extern "c" fn bind(socket: c_int, address: ?*const sockaddr, address_len: socklen_t) c_int; pub extern "c" fn socket(domain: c_int, type: c_int, protocol: c_int) c_int; +/// The value of the link editor defined symbol _MH_EXECUTE_SYM is the address +/// of the mach header in a Mach-O executable file type. It does not appear in +/// any file type other than a MH_EXECUTE file type. The type of the symbol is +/// absolute as the header is not part of any section. +pub extern "c" var _mh_execute_header: if (@sizeOf(usize) == 8) mach_header_64 else mach_header; + +pub const mach_header_64 = macho.mach_header_64; +pub const mach_header = macho.mach_header; + pub use @import("../os/darwin/errno.zig"); pub const _errno = __error; diff --git a/std/c/linux.zig b/std/c/linux.zig index 2699e9bd09..b0dadf071d 100644 --- a/std/c/linux.zig +++ b/std/c/linux.zig @@ -8,3 +8,6 @@ pub const pthread_attr_t = extern struct { __size: [56]u8, __align: c_long, }; + +/// See std.elf for constants for this +pub extern fn getauxval(__type: c_ulong) c_ulong; diff --git a/std/debug/index.zig b/std/debug/index.zig index 3477d1b6d3..39c41d4bc1 100644 --- a/std/debug/index.zig +++ b/std/debug/index.zig @@ -4,8 +4,8 @@ const mem = std.mem; const io = std.io; const os = std.os; const elf = std.elf; -const DW = std.dwarf; const macho = std.macho; +const DW = std.dwarf; const ArrayList = std.ArrayList; const builtin = @import("builtin"); @@ -19,9 +19,10 @@ pub const runtime_safety = switch (builtin.mode) { /// Tries to write to stderr, unbuffered, and ignores any error returned. /// Does not append a newline. -/// TODO atomic/multithread support var stderr_file: os.File = undefined; var stderr_file_out_stream: io.FileOutStream = undefined; + +/// TODO multithreaded awareness var stderr_stream: ?*io.OutStream(io.FileOutStream.Error) = null; var stderr_mutex = std.Mutex.init(); pub fn warn(comptime fmt: []const u8, args: ...) void { @@ -30,6 +31,7 @@ pub fn warn(comptime fmt: []const u8, args: ...) void { const stderr = getStderrStream() catch return; stderr.print(fmt, args) catch return; } + pub fn getStderrStream() !*io.OutStream(io.FileOutStream.Error) { if (stderr_stream) |st| { return st; @@ -42,14 +44,15 @@ pub fn getStderrStream() !*io.OutStream(io.FileOutStream.Error) { } } -var self_debug_info: ?*ElfStackTrace = null; -pub fn getSelfDebugInfo() !*ElfStackTrace { - if (self_debug_info) |info| { +/// TODO multithreaded awareness +var self_debug_info: ?DebugInfo = null; + +pub fn getSelfDebugInfo() !*DebugInfo { + if (self_debug_info) |*info| { return info; } else { - const info = try openSelfDebugInfo(getDebugInfoAllocator()); - self_debug_info = info; - return info; + self_debug_info = try openSelfDebugInfo(getDebugInfoAllocator()); + return &self_debug_info.?; } } @@ -60,6 +63,7 @@ fn wantTtyColor() bool { } /// Tries to print the current stack trace to stderr, unbuffered, and ignores any error returned. +/// TODO multithreaded awareness pub fn dumpCurrentStackTrace(start_addr: ?usize) void { const stderr = getStderrStream() catch return; const debug_info = getSelfDebugInfo() catch |err| { @@ -73,6 +77,7 @@ pub fn dumpCurrentStackTrace(start_addr: ?usize) void { } /// Tries to print a stack trace to stderr, unbuffered, and ignores any error returned. +/// TODO multithreaded awareness pub fn dumpStackTrace(stack_trace: *const builtin.StackTrace) void { const stderr = getStderrStream() catch return; const debug_info = getSelfDebugInfo() catch |err| { @@ -127,6 +132,7 @@ pub fn panic(comptime format: []const u8, args: ...) noreturn { panicExtra(null, first_trace_addr, format, args); } +/// TODO multithreaded awareness var panicking: u8 = 0; // TODO make this a bool pub fn panicExtra(trace: ?*const builtin.StackTrace, first_trace_addr: ?usize, comptime format: []const u8, args: ...) noreturn { @@ -155,7 +161,7 @@ const WHITE = "\x1b[37;1m"; const DIM = "\x1b[2m"; const RESET = "\x1b[0m"; -pub fn writeStackTrace(stack_trace: *const builtin.StackTrace, out_stream: var, allocator: *mem.Allocator, debug_info: *ElfStackTrace, tty_color: bool) !void { +pub fn writeStackTrace(stack_trace: *const builtin.StackTrace, out_stream: var, allocator: *mem.Allocator, debug_info: *DebugInfo, tty_color: bool) !void { var frame_index: usize = undefined; var frames_left: usize = undefined; if (stack_trace.index < stack_trace.instruction_addresses.len) { @@ -185,7 +191,7 @@ pub inline fn getReturnAddress(frame_count: usize) usize { return @intToPtr(*const usize, fp + @sizeOf(usize)).*; } -pub fn writeCurrentStackTrace(out_stream: var, allocator: *mem.Allocator, debug_info: *ElfStackTrace, tty_color: bool, start_addr: ?usize) !void { +pub fn writeCurrentStackTrace(out_stream: var, allocator: *mem.Allocator, debug_info: *DebugInfo, tty_color: bool, start_addr: ?usize) !void { const AddressState = union(enum) { NotLookingForStartAddress, LookingForStartAddress: usize, @@ -218,128 +224,290 @@ pub fn writeCurrentStackTrace(out_stream: var, allocator: *mem.Allocator, debug_ } } -pub fn printSourceAtAddress(debug_info: *ElfStackTrace, out_stream: var, address: usize, tty_color: bool) !void { +pub fn printSourceAtAddress(debug_info: *DebugInfo, out_stream: var, address: usize, tty_color: bool) !void { switch (builtin.os) { - builtin.Os.windows => return error.UnsupportedDebugInfo, - builtin.Os.macosx => { - // TODO(bnoordhuis) It's theoretically possible to obtain the - // compilation unit from the symbtab but it's not that useful - // in practice because the compiler dumps everything in a single - // object file. Future improvement: use external dSYM data when - // available. - const unknown = macho.Symbol{ - .name = "???", - .address = address, - }; - const symbol = debug_info.symbol_table.search(address) orelse &unknown; - try out_stream.print(WHITE ++ "{}" ++ RESET ++ ": " ++ DIM ++ "0x{x}" ++ " in ??? (???)" ++ RESET ++ "\n", symbol.name, address); + builtin.Os.macosx => return printSourceAtAddressMacOs(debug_info, out_stream, address, tty_color), + builtin.Os.linux => return printSourceAtAddressLinux(debug_info, out_stream, address, tty_color), + builtin.Os.windows => { + // TODO https://github.com/ziglang/zig/issues/721 + return error.UnsupportedOperatingSystem; }, - else => { - const compile_unit = findCompileUnit(debug_info, address) catch { - if (tty_color) { - try out_stream.print("???:?:?: " ++ DIM ++ "0x{x} in ??? (???)" ++ RESET ++ "\n ???\n\n", address); - } else { - try out_stream.print("???:?:?: 0x{x} in ??? (???)\n ???\n\n", address); - } - return; - }; - const compile_unit_name = try compile_unit.die.getAttrString(debug_info, DW.AT_name); - if (getLineNumberInfo(debug_info, compile_unit, address - 1)) |line_info| { - defer line_info.deinit(); - if (tty_color) { - try out_stream.print( - WHITE ++ "{}:{}:{}" ++ RESET ++ ": " ++ DIM ++ "0x{x} in ??? ({})" ++ RESET ++ "\n", - line_info.file_name, - line_info.line, - line_info.column, - address, - compile_unit_name, - ); - if (printLineFromFile(out_stream, line_info)) { - if (line_info.column == 0) { - try out_stream.write("\n"); - } else { - { - var col_i: usize = 1; - while (col_i < line_info.column) : (col_i += 1) { - try out_stream.writeByte(' '); - } - } - try out_stream.write(GREEN ++ "^" ++ RESET ++ "\n"); - } - } else |err| switch (err) { - error.EndOfFile => {}, - else => return err, - } - } else { - try out_stream.print( - "{}:{}:{}: 0x{x} in ??? ({})\n", - line_info.file_name, - line_info.line, - line_info.column, - address, - compile_unit_name, - ); - } - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - try out_stream.print("0x{x} in ??? ({})\n", address, compile_unit_name); - }, - else => return err, + else => return error.UnsupportedOperatingSystem, + } +} + +fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { + var min: usize = 0; + var max: usize = symbols.len - 1; // Exclude sentinel. + while (min < max) { + const mid = min + (max - min) / 2; + const curr = &symbols[mid]; + const next = &symbols[mid + 1]; + if (address >= next.address()) { + min = mid + 1; + } else if (address < curr.address()) { + max = mid; + } else { + return curr; + } + } + return null; +} + +fn printSourceAtAddressMacOs(di: *DebugInfo, out_stream: var, address: usize, tty_color: bool) !void { + const base_addr = @ptrToInt(&std.c._mh_execute_header); + const adjusted_addr = 0x100000000 + (address - base_addr); + + const symbol = machoSearchSymbols(di.symbols, adjusted_addr) orelse { + if (tty_color) { + try out_stream.print("???:?:?: " ++ DIM ++ "0x{x} in ??? (???)" ++ RESET ++ "\n\n\n", address); + } else { + try out_stream.print("???:?:?: 0x{x} in ??? (???)\n\n\n", address); + } + return; + }; + + const symbol_name = mem.toSliceConst(u8, di.strings.ptr + symbol.nlist.n_strx); + const compile_unit_name = if (symbol.ofile) |ofile| blk: { + const ofile_path = mem.toSliceConst(u8, di.strings.ptr + ofile.n_strx); + break :blk os.path.basename(ofile_path); + } else "???"; + if (getLineNumberInfoMacOs(di, symbol.*, adjusted_addr)) |line_info| { + defer line_info.deinit(); + try printLineInfo(di, out_stream, line_info, address, symbol_name, compile_unit_name, tty_color); + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + if (tty_color) { + try out_stream.print("???:?:?: " ++ DIM ++ "0x{x} in {} ({})" ++ RESET ++ "\n\n\n", address, symbol_name, compile_unit_name); + } else { + try out_stream.print("???:?:?: 0x{x} in {} ({})\n\n\n", address, symbol_name, compile_unit_name); } }, + else => return err, } } -pub fn openSelfDebugInfo(allocator: *mem.Allocator) !*ElfStackTrace { - switch (builtin.object_format) { - builtin.ObjectFormat.elf => { - const st = try allocator.create(ElfStackTrace{ - .self_exe_file = undefined, - .elf = undefined, - .debug_info = undefined, - .debug_abbrev = undefined, - .debug_str = undefined, - .debug_line = undefined, - .debug_ranges = null, - .abbrev_table_list = ArrayList(AbbrevTableHeader).init(allocator), - .compile_unit_list = ArrayList(CompileUnit).init(allocator), - }); - errdefer allocator.destroy(st); - st.self_exe_file = try os.openSelfExe(); - errdefer st.self_exe_file.close(); - - try st.elf.openFile(allocator, &st.self_exe_file); - errdefer st.elf.close(); - - st.debug_info = (try st.elf.findSection(".debug_info")) orelse return error.MissingDebugInfo; - st.debug_abbrev = (try st.elf.findSection(".debug_abbrev")) orelse return error.MissingDebugInfo; - st.debug_str = (try st.elf.findSection(".debug_str")) orelse return error.MissingDebugInfo; - st.debug_line = (try st.elf.findSection(".debug_line")) orelse return error.MissingDebugInfo; - st.debug_ranges = (try st.elf.findSection(".debug_ranges")); - try scanAllCompileUnits(st); - return st; +pub fn printSourceAtAddressLinux(debug_info: *DebugInfo, out_stream: var, address: usize, tty_color: bool) !void { + const compile_unit = findCompileUnit(debug_info, address) catch { + if (tty_color) { + try out_stream.print("???:?:?: " ++ DIM ++ "0x{x} in ??? (???)" ++ RESET ++ "\n\n\n", address); + } else { + try out_stream.print("???:?:?: 0x{x} in ??? (???)\n\n\n", address); + } + return; + }; + const compile_unit_name = try compile_unit.die.getAttrString(debug_info, DW.AT_name); + if (getLineNumberInfoLinux(debug_info, compile_unit, address - 1)) |line_info| { + defer line_info.deinit(); + const symbol_name = "???"; + try printLineInfo(debug_info, out_stream, line_info, address, symbol_name, compile_unit_name, tty_color); + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + if (tty_color) { + try out_stream.print("???:?:?: " ++ DIM ++ "0x{x} in ??? ({})" ++ RESET ++ "\n\n\n", address, compile_unit_name); + } else { + try out_stream.print("???:?:?: 0x{x} in ??? ({})\n\n\n", address, compile_unit_name); + } }, - builtin.ObjectFormat.macho => { - var exe_file = try os.openSelfExe(); - defer exe_file.close(); + else => return err, + } +} - const st = try allocator.create(ElfStackTrace{ .symbol_table = try macho.loadSymbols(allocator, &io.FileInStream.init(&exe_file)) }); - errdefer allocator.destroy(st); - return st; - }, - builtin.ObjectFormat.coff => { - return error.TodoSupportCoffDebugInfo; - }, - builtin.ObjectFormat.wasm => { - return error.TodoSupportCOFFDebugInfo; - }, - builtin.ObjectFormat.unknown => { - return error.UnknownObjectFormat; +fn printLineInfo( + debug_info: *DebugInfo, + out_stream: var, + line_info: LineInfo, + address: usize, + symbol_name: []const u8, + compile_unit_name: []const u8, + tty_color: bool, +) !void { + if (tty_color) { + try out_stream.print( + WHITE ++ "{}:{}:{}" ++ RESET ++ ": " ++ DIM ++ "0x{x} in {} ({})" ++ RESET ++ "\n", + line_info.file_name, + line_info.line, + line_info.column, + address, + symbol_name, + compile_unit_name, + ); + if (printLineFromFile(out_stream, line_info)) { + if (line_info.column == 0) { + try out_stream.write("\n"); + } else { + { + var col_i: usize = 1; + while (col_i < line_info.column) : (col_i += 1) { + try out_stream.writeByte(' '); + } + } + try out_stream.write(GREEN ++ "^" ++ RESET ++ "\n"); + } + } else |err| switch (err) { + error.EndOfFile => {}, + else => return err, + } + } else { + try out_stream.print( + "{}:{}:{}: 0x{x} in {} ({})\n", + line_info.file_name, + line_info.line, + line_info.column, + address, + symbol_name, + compile_unit_name, + ); + } +} + +// TODO use this +pub const OpenSelfDebugInfoError = error{ + MissingDebugInfo, + OutOfMemory, + UnsupportedOperatingSystem, +}; + +pub fn openSelfDebugInfo(allocator: *mem.Allocator) !DebugInfo { + switch (builtin.os) { + builtin.Os.linux => return openSelfDebugInfoLinux(allocator), + builtin.Os.macosx, builtin.Os.ios => return openSelfDebugInfoMacOs(allocator), + builtin.Os.windows => { + // TODO: https://github.com/ziglang/zig/issues/721 + return error.UnsupportedOperatingSystem; }, + else => return error.UnsupportedOperatingSystem, } } +fn openSelfDebugInfoLinux(allocator: *mem.Allocator) !DebugInfo { + var di = DebugInfo{ + .self_exe_file = undefined, + .elf = undefined, + .debug_info = undefined, + .debug_abbrev = undefined, + .debug_str = undefined, + .debug_line = undefined, + .debug_ranges = null, + .abbrev_table_list = ArrayList(AbbrevTableHeader).init(allocator), + .compile_unit_list = ArrayList(CompileUnit).init(allocator), + }; + di.self_exe_file = try os.openSelfExe(); + errdefer di.self_exe_file.close(); + + try di.elf.openFile(allocator, &di.self_exe_file); + errdefer di.elf.close(); + + di.debug_info = (try di.elf.findSection(".debug_info")) orelse return error.MissingDebugInfo; + di.debug_abbrev = (try di.elf.findSection(".debug_abbrev")) orelse return error.MissingDebugInfo; + di.debug_str = (try di.elf.findSection(".debug_str")) orelse return error.MissingDebugInfo; + di.debug_line = (try di.elf.findSection(".debug_line")) orelse return error.MissingDebugInfo; + di.debug_ranges = (try di.elf.findSection(".debug_ranges")); + try scanAllCompileUnits(&di); + return di; +} + +pub fn findElfSection(elf: *Elf, name: []const u8) ?*elf.Shdr { + var file_stream = io.FileInStream.init(elf.in_file); + const in = &file_stream.stream; + + section_loop: for (elf.section_headers) |*elf_section| { + if (elf_section.sh_type == SHT_NULL) continue; + + const name_offset = elf.string_section.offset + elf_section.name; + try elf.in_file.seekTo(name_offset); + + for (name) |expected_c| { + const target_c = try in.readByte(); + if (target_c == 0 or expected_c != target_c) continue :section_loop; + } + + { + const null_byte = try in.readByte(); + if (null_byte == 0) return elf_section; + } + } + + return null; +} + +fn openSelfDebugInfoMacOs(allocator: *mem.Allocator) !DebugInfo { + const hdr = &std.c._mh_execute_header; + assert(hdr.magic == std.macho.MH_MAGIC_64); + + const hdr_base = @ptrCast([*]u8, hdr); + var ptr = hdr_base + @sizeOf(macho.mach_header_64); + var ncmd: u32 = hdr.ncmds; + const symtab = while (ncmd != 0) : (ncmd -= 1) { + const lc = @ptrCast(*std.macho.load_command, ptr); + switch (lc.cmd) { + std.macho.LC_SYMTAB => break @ptrCast(*std.macho.symtab_command, ptr), + else => {}, + } + ptr += lc.cmdsize; // TODO https://github.com/ziglang/zig/issues/1403 + } else { + return error.MissingDebugInfo; + }; + const syms = @ptrCast([*]macho.nlist_64, hdr_base + symtab.symoff)[0..symtab.nsyms]; + const strings = @ptrCast([*]u8, hdr_base + symtab.stroff)[0..symtab.strsize]; + + const symbols_buf = try allocator.alloc(MachoSymbol, syms.len); + + var ofile: ?*macho.nlist_64 = null; + var reloc: u64 = 0; + var symbol_index: usize = 0; + var last_len: u64 = 0; + for (syms) |*sym| { + if (sym.n_type & std.macho.N_STAB != 0) { + switch (sym.n_type) { + std.macho.N_OSO => { + ofile = sym; + reloc = 0; + }, + std.macho.N_FUN => { + if (sym.n_sect == 0) { + last_len = sym.n_value; + } else { + symbols_buf[symbol_index] = MachoSymbol{ + .nlist = sym, + .ofile = ofile, + .reloc = reloc, + }; + symbol_index += 1; + } + }, + std.macho.N_BNSYM => { + if (reloc == 0) { + reloc = sym.n_value; + } + }, + else => continue, + } + } + } + const sentinel = try allocator.createOne(macho.nlist_64); + sentinel.* = macho.nlist_64{ + .n_strx = 0, + .n_type = 36, + .n_sect = 0, + .n_desc = 0, + .n_value = symbols_buf[symbol_index - 1].nlist.n_value + last_len, + }; + + const symbols = allocator.shrink(MachoSymbol, symbols_buf, symbol_index); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + std.sort.sort(MachoSymbol, symbols, MachoSymbol.addressLessThan); + + return DebugInfo{ + .ofiles = DebugInfo.OFileTable.init(allocator), + .symbols = symbols, + .strings = strings, + }; +} + fn printLineFromFile(out_stream: var, line_info: *const LineInfo) !void { var f = try os.File.openRead(line_info.file_name); defer f.close(); @@ -372,12 +540,42 @@ fn printLineFromFile(out_stream: var, line_info: *const LineInfo) !void { } } -pub const ElfStackTrace = switch (builtin.os) { - builtin.Os.macosx => struct { - symbol_table: macho.SymbolTable, +const MachoSymbol = struct { + nlist: *macho.nlist_64, + ofile: ?*macho.nlist_64, + reloc: u64, + + /// Returns the address from the macho file + fn address(self: MachoSymbol) u64 { + return self.nlist.n_value; + } + + fn addressLessThan(lhs: MachoSymbol, rhs: MachoSymbol) bool { + return lhs.address() < rhs.address(); + } +}; + +const MachOFile = struct { + bytes: []align(@alignOf(macho.mach_header_64)) const u8, + sect_debug_info: ?*const macho.section_64, + sect_debug_line: ?*const macho.section_64, +}; - pub fn close(self: *ElfStackTrace) void { - self.symbol_table.deinit(); +pub const DebugInfo = switch (builtin.os) { + builtin.Os.macosx => struct { + symbols: []const MachoSymbol, + strings: []const u8, + ofiles: OFileTable, + + const OFileTable = std.HashMap( + *macho.nlist_64, + MachOFile, + std.hash_map.getHashPtrAddrFn(*macho.nlist_64), + std.hash_map.getTrivialEqlFn(*macho.nlist_64), + ); + + pub fn allocator(self: DebugInfo) *mem.Allocator { + return self.ofiles.allocator; } }, else => struct { @@ -391,17 +589,17 @@ pub const ElfStackTrace = switch (builtin.os) { abbrev_table_list: ArrayList(AbbrevTableHeader), compile_unit_list: ArrayList(CompileUnit), - pub fn allocator(self: *const ElfStackTrace) *mem.Allocator { + pub fn allocator(self: DebugInfo) *mem.Allocator { return self.abbrev_table_list.allocator; } - pub fn readString(self: *ElfStackTrace) ![]u8 { + pub fn readString(self: *DebugInfo) ![]u8 { var in_file_stream = io.FileInStream.init(&self.self_exe_file); const in_stream = &in_file_stream.stream; return readStringRaw(self.allocator(), in_stream); } - pub fn close(self: *ElfStackTrace) void { + pub fn close(self: *DebugInfo) void { self.self_exe_file.close(); self.elf.close(); } @@ -508,7 +706,7 @@ const Die = struct { }; } - fn getAttrString(self: *const Die, st: *ElfStackTrace, id: u64) ![]u8 { + fn getAttrString(self: *const Die, st: *DebugInfo, id: u64) ![]u8 { const form_value = self.getAttr(id) orelse return error.MissingDebugInfo; return switch (form_value.*) { FormValue.String => |value| value, @@ -623,7 +821,7 @@ fn readStringRaw(allocator: *mem.Allocator, in_stream: var) ![]u8 { return buf.toSlice(); } -fn getString(st: *ElfStackTrace, offset: u64) ![]u8 { +fn getString(st: *DebugInfo, offset: u64) ![]u8 { const pos = st.debug_str.offset + offset; try st.self_exe_file.seekTo(pos); return st.readString(); @@ -730,7 +928,7 @@ fn parseFormValue(allocator: *mem.Allocator, in_stream: var, form_id: u64, is_64 }; } -fn parseAbbrevTable(st: *ElfStackTrace) !AbbrevTable { +fn parseAbbrevTable(st: *DebugInfo) !AbbrevTable { const in_file = &st.self_exe_file; var in_file_stream = io.FileInStream.init(in_file); const in_stream = &in_file_stream.stream; @@ -760,7 +958,7 @@ fn parseAbbrevTable(st: *ElfStackTrace) !AbbrevTable { /// Gets an already existing AbbrevTable given the abbrev_offset, or if not found, /// seeks in the stream and parses it. -fn getAbbrevTable(st: *ElfStackTrace, abbrev_offset: u64) !*const AbbrevTable { +fn getAbbrevTable(st: *DebugInfo, abbrev_offset: u64) !*const AbbrevTable { for (st.abbrev_table_list.toSlice()) |*header| { if (header.offset == abbrev_offset) { return &header.table; @@ -781,7 +979,7 @@ fn getAbbrevTableEntry(abbrev_table: *const AbbrevTable, abbrev_code: u64) ?*con return null; } -fn parseDie(st: *ElfStackTrace, abbrev_table: *const AbbrevTable, is_64: bool) !Die { +fn parseDie(st: *DebugInfo, abbrev_table: *const AbbrevTable, is_64: bool) !Die { const in_file = &st.self_exe_file; var in_file_stream = io.FileInStream.init(in_file); const in_stream = &in_file_stream.stream; @@ -803,12 +1001,210 @@ fn parseDie(st: *ElfStackTrace, abbrev_table: *const AbbrevTable, is_64: bool) ! return result; } -fn getLineNumberInfo(st: *ElfStackTrace, compile_unit: *const CompileUnit, target_address: usize) !LineInfo { - const compile_unit_cwd = try compile_unit.die.getAttrString(st, DW.AT_comp_dir); +fn getLineNumberInfoMacOs(di: *DebugInfo, symbol: MachoSymbol, target_address: usize) !LineInfo { + const ofile = symbol.ofile orelse return error.MissingDebugInfo; + const gop = try di.ofiles.getOrPut(ofile); + const mach_o_file = if (gop.found_existing) &gop.kv.value else blk: { + errdefer _ = di.ofiles.remove(ofile); + const ofile_path = mem.toSliceConst(u8, di.strings.ptr + ofile.n_strx); + + gop.kv.value = MachOFile{ + .bytes = try std.io.readFileAllocAligned(di.ofiles.allocator, ofile_path, @alignOf(macho.mach_header_64)), + .sect_debug_info = null, + .sect_debug_line = null, + }; + const hdr = @ptrCast(*const macho.mach_header_64, gop.kv.value.bytes.ptr); + if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; + + const hdr_base = @ptrCast([*]const u8, hdr); + var ptr = hdr_base + @sizeOf(macho.mach_header_64); + var ncmd: u32 = hdr.ncmds; + const segcmd = while (ncmd != 0) : (ncmd -= 1) { + const lc = @ptrCast(*const std.macho.load_command, ptr); + switch (lc.cmd) { + std.macho.LC_SEGMENT_64 => break @ptrCast(*const std.macho.segment_command_64, ptr), + else => {}, + } + ptr += lc.cmdsize; // TODO https://github.com/ziglang/zig/issues/1403 + } else { + return error.MissingDebugInfo; + }; + const sections = @alignCast(@alignOf(macho.section_64), @ptrCast([*]const macho.section_64, ptr + @sizeOf(std.macho.segment_command_64)))[0..segcmd.nsects]; + for (sections) |*sect| { + if (sect.flags & macho.SECTION_TYPE == macho.S_REGULAR and + (sect.flags & macho.SECTION_ATTRIBUTES) & macho.S_ATTR_DEBUG == macho.S_ATTR_DEBUG) + { + const sect_name = mem.toSliceConst(u8, §.sectname); + if (mem.eql(u8, sect_name, "__debug_line")) { + gop.kv.value.sect_debug_line = sect; + } else if (mem.eql(u8, sect_name, "__debug_info")) { + gop.kv.value.sect_debug_info = sect; + } + } + } - const in_file = &st.self_exe_file; - const debug_line_end = st.debug_line.offset + st.debug_line.size; - var this_offset = st.debug_line.offset; + break :blk &gop.kv.value; + }; + + const sect_debug_line = mach_o_file.sect_debug_line orelse return error.MissingDebugInfo; + var ptr = mach_o_file.bytes.ptr + sect_debug_line.offset; + + var is_64: bool = undefined; + const unit_length = try readInitialLengthMem(&ptr, &is_64); + if (unit_length == 0) return error.MissingDebugInfo; + + const version = readIntMem(&ptr, u16, builtin.Endian.Little); + // TODO support 3 and 5 + if (version != 2 and version != 4) return error.InvalidDebugInfo; + + const prologue_length = if (is_64) + readIntMem(&ptr, u64, builtin.Endian.Little) + else + readIntMem(&ptr, u32, builtin.Endian.Little); + const prog_start = ptr + prologue_length; + + const minimum_instruction_length = readByteMem(&ptr); + if (minimum_instruction_length == 0) return error.InvalidDebugInfo; + + if (version >= 4) { + // maximum_operations_per_instruction + ptr += 1; + } + + const default_is_stmt = readByteMem(&ptr) != 0; + const line_base = readByteSignedMem(&ptr); + + const line_range = readByteMem(&ptr); + if (line_range == 0) return error.InvalidDebugInfo; + + const opcode_base = readByteMem(&ptr); + + const standard_opcode_lengths = ptr[0 .. opcode_base - 1]; + ptr += opcode_base - 1; + + var include_directories = ArrayList([]const u8).init(di.allocator()); + try include_directories.append(""); + while (true) { + const dir = readStringMem(&ptr); + if (dir.len == 0) break; + try include_directories.append(dir); + } + + var file_entries = ArrayList(FileEntry).init(di.allocator()); + var prog = LineNumberProgram.init(default_is_stmt, include_directories.toSliceConst(), &file_entries, target_address); + + while (true) { + const file_name = readStringMem(&ptr); + if (file_name.len == 0) break; + const dir_index = try readULeb128Mem(&ptr); + const mtime = try readULeb128Mem(&ptr); + const len_bytes = try readULeb128Mem(&ptr); + try file_entries.append(FileEntry{ + .file_name = file_name, + .dir_index = dir_index, + .mtime = mtime, + .len_bytes = len_bytes, + }); + } + + ptr = prog_start; + while (true) { + const opcode = readByteMem(&ptr); + + if (opcode == DW.LNS_extended_op) { + const op_size = try readULeb128Mem(&ptr); + if (op_size < 1) return error.InvalidDebugInfo; + var sub_op = readByteMem(&ptr); + switch (sub_op) { + DW.LNE_end_sequence => { + prog.end_sequence = true; + if (try prog.checkLineMatch()) |info| return info; + return error.MissingDebugInfo; + }, + DW.LNE_set_address => { + const addr = readIntMem(&ptr, usize, builtin.Endian.Little); + prog.address = symbol.reloc + addr; + }, + DW.LNE_define_file => { + const file_name = readStringMem(&ptr); + const dir_index = try readULeb128Mem(&ptr); + const mtime = try readULeb128Mem(&ptr); + const len_bytes = try readULeb128Mem(&ptr); + try file_entries.append(FileEntry{ + .file_name = file_name, + .dir_index = dir_index, + .mtime = mtime, + .len_bytes = len_bytes, + }); + }, + else => { + ptr += op_size - 1; + }, + } + } else if (opcode >= opcode_base) { + // special opcodes + const adjusted_opcode = opcode - opcode_base; + const inc_addr = minimum_instruction_length * (adjusted_opcode / line_range); + const inc_line = i32(line_base) + i32(adjusted_opcode % line_range); + prog.line += inc_line; + prog.address += inc_addr; + if (try prog.checkLineMatch()) |info| return info; + prog.basic_block = false; + } else { + switch (opcode) { + DW.LNS_copy => { + if (try prog.checkLineMatch()) |info| return info; + prog.basic_block = false; + }, + DW.LNS_advance_pc => { + const arg = try readULeb128Mem(&ptr); + prog.address += arg * minimum_instruction_length; + }, + DW.LNS_advance_line => { + const arg = try readILeb128Mem(&ptr); + prog.line += arg; + }, + DW.LNS_set_file => { + const arg = try readULeb128Mem(&ptr); + prog.file = arg; + }, + DW.LNS_set_column => { + const arg = try readULeb128Mem(&ptr); + prog.column = arg; + }, + DW.LNS_negate_stmt => { + prog.is_stmt = !prog.is_stmt; + }, + DW.LNS_set_basic_block => { + prog.basic_block = true; + }, + DW.LNS_const_add_pc => { + const inc_addr = minimum_instruction_length * ((255 - opcode_base) / line_range); + prog.address += inc_addr; + }, + DW.LNS_fixed_advance_pc => { + const arg = readIntMem(&ptr, u16, builtin.Endian.Little); + prog.address += arg; + }, + DW.LNS_set_prologue_end => {}, + else => { + if (opcode - 1 >= standard_opcode_lengths.len) return error.InvalidDebugInfo; + const len_bytes = standard_opcode_lengths[opcode - 1]; + ptr += len_bytes; + }, + } + } + } + + return error.MissingDebugInfo; +} + +fn getLineNumberInfoLinux(di: *DebugInfo, compile_unit: *const CompileUnit, target_address: usize) !LineInfo { + const compile_unit_cwd = try compile_unit.die.getAttrString(di, DW.AT_comp_dir); + + const in_file = &di.self_exe_file; + const debug_line_end = di.debug_line.offset + di.debug_line.size; + var this_offset = di.debug_line.offset; var this_index: usize = 0; var in_file_stream = io.FileInStream.init(in_file); @@ -827,11 +1223,11 @@ fn getLineNumberInfo(st: *ElfStackTrace, compile_unit: *const CompileUnit, targe continue; } - const version = try in_stream.readInt(st.elf.endian, u16); + const version = try in_stream.readInt(di.elf.endian, u16); // TODO support 3 and 5 if (version != 2 and version != 4) return error.InvalidDebugInfo; - const prologue_length = if (is_64) try in_stream.readInt(st.elf.endian, u64) else try in_stream.readInt(st.elf.endian, u32); + const prologue_length = if (is_64) try in_stream.readInt(di.elf.endian, u64) else try in_stream.readInt(di.elf.endian, u32); const prog_start_offset = (try in_file.getPos()) + prologue_length; const minimum_instruction_length = try in_stream.readByte(); @@ -850,7 +1246,7 @@ fn getLineNumberInfo(st: *ElfStackTrace, compile_unit: *const CompileUnit, targe const opcode_base = try in_stream.readByte(); - const standard_opcode_lengths = try st.allocator().alloc(u8, opcode_base - 1); + const standard_opcode_lengths = try di.allocator().alloc(u8, opcode_base - 1); { var i: usize = 0; @@ -859,19 +1255,19 @@ fn getLineNumberInfo(st: *ElfStackTrace, compile_unit: *const CompileUnit, targe } } - var include_directories = ArrayList([]u8).init(st.allocator()); + var include_directories = ArrayList([]u8).init(di.allocator()); try include_directories.append(compile_unit_cwd); while (true) { - const dir = try st.readString(); + const dir = try di.readString(); if (dir.len == 0) break; try include_directories.append(dir); } - var file_entries = ArrayList(FileEntry).init(st.allocator()); + var file_entries = ArrayList(FileEntry).init(di.allocator()); var prog = LineNumberProgram.init(default_is_stmt, include_directories.toSliceConst(), &file_entries, target_address); while (true) { - const file_name = try st.readString(); + const file_name = try di.readString(); if (file_name.len == 0) break; const dir_index = try readULeb128(in_stream); const mtime = try readULeb128(in_stream); @@ -900,11 +1296,11 @@ fn getLineNumberInfo(st: *ElfStackTrace, compile_unit: *const CompileUnit, targe return error.MissingDebugInfo; }, DW.LNE_set_address => { - const addr = try in_stream.readInt(st.elf.endian, usize); + const addr = try in_stream.readInt(di.elf.endian, usize); prog.address = addr; }, DW.LNE_define_file => { - const file_name = try st.readString(); + const file_name = try di.readString(); const dir_index = try readULeb128(in_stream); const mtime = try readULeb128(in_stream); const len_bytes = try readULeb128(in_stream); @@ -962,7 +1358,7 @@ fn getLineNumberInfo(st: *ElfStackTrace, compile_unit: *const CompileUnit, targe prog.address += inc_addr; }, DW.LNS_fixed_advance_pc => { - const arg = try in_stream.readInt(st.elf.endian, u16); + const arg = try in_stream.readInt(di.elf.endian, u16); prog.address += arg; }, DW.LNS_set_prologue_end => {}, @@ -981,7 +1377,7 @@ fn getLineNumberInfo(st: *ElfStackTrace, compile_unit: *const CompileUnit, targe return error.MissingDebugInfo; } -fn scanAllCompileUnits(st: *ElfStackTrace) !void { +fn scanAllCompileUnits(st: *DebugInfo) !void { const debug_info_end = st.debug_info.offset + st.debug_info.size; var this_unit_offset = st.debug_info.offset; var cu_index: usize = 0; @@ -1051,7 +1447,7 @@ fn scanAllCompileUnits(st: *ElfStackTrace) !void { } } -fn findCompileUnit(st: *ElfStackTrace, target_address: u64) !*const CompileUnit { +fn findCompileUnit(st: *DebugInfo, target_address: u64) !*const CompileUnit { var in_file_stream = io.FileInStream.init(&st.self_exe_file); const in_stream = &in_file_stream.stream; for (st.compile_unit_list.toSlice()) |*compile_unit| { @@ -1085,6 +1481,89 @@ fn findCompileUnit(st: *ElfStackTrace, target_address: u64) !*const CompileUnit return error.MissingDebugInfo; } +fn readIntMem(ptr: *[*]const u8, comptime T: type, endian: builtin.Endian) T { + const result = mem.readInt(ptr.*[0..@sizeOf(T)], T, endian); + ptr.* += @sizeOf(T); + return result; +} + +fn readByteMem(ptr: *[*]const u8) u8 { + const result = ptr.*[0]; + ptr.* += 1; + return result; +} + +fn readByteSignedMem(ptr: *[*]const u8) i8 { + return @bitCast(i8, readByteMem(ptr)); +} + +fn readInitialLengthMem(ptr: *[*]const u8, is_64: *bool) !u64 { + const first_32_bits = mem.readIntLE(u32, ptr.*[0..4]); + is_64.* = (first_32_bits == 0xffffffff); + if (is_64.*) { + ptr.* += 4; + const result = mem.readIntLE(u64, ptr.*[0..8]); + ptr.* += 8; + return result; + } else { + if (first_32_bits >= 0xfffffff0) return error.InvalidDebugInfo; + ptr.* += 4; + return u64(first_32_bits); + } +} + +fn readStringMem(ptr: *[*]const u8) []const u8 { + const result = mem.toSliceConst(u8, ptr.*); + ptr.* += result.len + 1; + return result; +} + +fn readULeb128Mem(ptr: *[*]const u8) !u64 { + var result: u64 = 0; + var shift: usize = 0; + var i: usize = 0; + + while (true) { + const byte = ptr.*[i]; + i += 1; + + var operand: u64 = undefined; + + if (@shlWithOverflow(u64, byte & 0b01111111, @intCast(u6, shift), &operand)) return error.InvalidDebugInfo; + + result |= operand; + + if ((byte & 0b10000000) == 0) { + ptr.* += i; + return result; + } + + shift += 7; + } +} +fn readILeb128Mem(ptr: *[*]const u8) !i64 { + var result: i64 = 0; + var shift: usize = 0; + var i: usize = 0; + + while (true) { + const byte = ptr.*[i]; + i += 1; + + var operand: i64 = undefined; + if (@shlWithOverflow(i64, byte & 0b01111111, @intCast(u6, shift), &operand)) return error.InvalidDebugInfo; + + result |= operand; + shift += 7; + + if ((byte & 0b10000000) == 0) { + if (shift < @sizeOf(i64) * 8 and (byte & 0b01000000) != 0) result |= -(i64(1) << @intCast(u6, shift)); + ptr.* += i; + return result; + } + } +} + fn readInitialLength(comptime E: type, in_stream: *io.InStream(E), is_64: *bool) !u64 { const first_32_bits = try in_stream.readIntLe(u32); is_64.* = (first_32_bits == 0xffffffff); @@ -1141,7 +1620,7 @@ pub const global_allocator = &global_fixed_allocator.allocator; var global_fixed_allocator = std.heap.ThreadSafeFixedBufferAllocator.init(global_allocator_mem[0..]); var global_allocator_mem: [100 * 1024]u8 = undefined; -// TODO make thread safe +/// TODO multithreaded awareness var debug_info_allocator: ?*mem.Allocator = null; var debug_info_direct_allocator: std.heap.DirectAllocator = undefined; var debug_info_arena_allocator: std.heap.ArenaAllocator = undefined; diff --git a/std/elf.zig b/std/elf.zig index 8e6445c631..3d81555319 100644 --- a/std/elf.zig +++ b/std/elf.zig @@ -869,6 +869,11 @@ pub const Phdr = switch (@sizeOf(usize)) { 8 => Elf64_Phdr, else => @compileError("expected pointer size of 32 or 64"), }; +pub const Shdr = switch (@sizeOf(usize)) { + 4 => Elf32_Shdr, + 8 => Elf64_Shdr, + else => @compileError("expected pointer size of 32 or 64"), +}; pub const Sym = switch (@sizeOf(usize)) { 4 => Elf32_Sym, 8 => Elf64_Sym, diff --git a/std/hash_map.zig b/std/hash_map.zig index 0c100e15d9..9654d612a5 100644 --- a/std/hash_map.zig +++ b/std/hash_map.zig @@ -408,6 +408,22 @@ test "iterator hash map" { assert(entry.value == values[0]); } +pub fn getHashPtrAddrFn(comptime K: type) (fn (K) u32) { + return struct { + fn hash(key: K) u32 { + return getAutoHashFn(usize)(@ptrToInt(key)); + } + }.hash; +} + +pub fn getTrivialEqlFn(comptime K: type) (fn (K, K) bool) { + return struct { + fn eql(a: K, b: K) bool { + return a == b; + } + }.eql; +} + pub fn getAutoHashFn(comptime K: type) (fn (K) u32) { return struct { fn hash(key: K) u32 { diff --git a/std/index.zig b/std/index.zig index 59f54fa9d7..8dfc59b1d2 100644 --- a/std/index.zig +++ b/std/index.zig @@ -24,6 +24,7 @@ pub const empty_import = @import("empty.zig"); pub const event = @import("event.zig"); pub const fmt = @import("fmt/index.zig"); pub const hash = @import("hash/index.zig"); +pub const hash_map = @import("hash_map.zig"); pub const heap = @import("heap.zig"); pub const io = @import("io.zig"); pub const json = @import("json.zig"); diff --git a/std/io.zig b/std/io.zig index c7154065cb..369f6eede3 100644 --- a/std/io.zig +++ b/std/io.zig @@ -207,6 +207,12 @@ pub fn InStream(comptime ReadError: type) type { _ = try self.readByte(); } } + + pub fn readStruct(self: *Self, comptime T: type, ptr: *T) !void { + // Only extern and packed structs have defined in-memory layout. + assert(@typeInfo(T).Struct.layout != builtin.TypeInfo.ContainerLayout.Auto); + return self.readNoEof(@sliceToBytes((*[1]T)(ptr)[0..])); + } }; } diff --git a/std/macho.zig b/std/macho.zig index ddc4d334e4..4325810b03 100644 --- a/std/macho.zig +++ b/std/macho.zig @@ -1,16 +1,18 @@ -const builtin = @import("builtin"); -const std = @import("index.zig"); -const io = std.io; -const mem = std.mem; -const MH_MAGIC_64 = 0xFEEDFACF; -const MH_PIE = 0x200000; -const LC_SYMTAB = 2; +pub const mach_header = extern struct { + magic: u32, + cputype: cpu_type_t, + cpusubtype: cpu_subtype_t, + filetype: u32, + ncmds: u32, + sizeofcmds: u32, + flags: u32, +}; -const MachHeader64 = packed struct { +pub const mach_header_64 = extern struct { magic: u32, - cputype: u32, - cpusubtype: u32, + cputype: cpu_type_t, + cpusubtype: cpu_subtype_t, filetype: u32, ncmds: u32, sizeofcmds: u32, @@ -18,19 +20,138 @@ const MachHeader64 = packed struct { reserved: u32, }; -const LoadCommand = packed struct { +pub const load_command = extern struct { cmd: u32, cmdsize: u32, }; -const SymtabCommand = packed struct { - symoff: u32, - nsyms: u32, - stroff: u32, - strsize: u32, + +/// The symtab_command contains the offsets and sizes of the link-edit 4.3BSD +/// "stab" style symbol table information as described in the header files +/// <nlist.h> and <stab.h>. +pub const symtab_command = extern struct { + cmd: u32, /// LC_SYMTAB + cmdsize: u32, /// sizeof(struct symtab_command) + symoff: u32, /// symbol table offset + nsyms: u32, /// number of symbol table entries + stroff: u32, /// string table offset + strsize: u32, /// string table size in bytes +}; + +/// The linkedit_data_command contains the offsets and sizes of a blob +/// of data in the __LINKEDIT segment. +const linkedit_data_command = extern struct { + cmd: u32,/// LC_CODE_SIGNATURE, LC_SEGMENT_SPLIT_INFO, LC_FUNCTION_STARTS, LC_DATA_IN_CODE, LC_DYLIB_CODE_SIGN_DRS or LC_LINKER_OPTIMIZATION_HINT. + cmdsize: u32, /// sizeof(struct linkedit_data_command) + dataoff: u32 , /// file offset of data in __LINKEDIT segment + datasize: u32 , /// file size of data in __LINKEDIT segment +}; + +/// The segment load command indicates that a part of this file is to be +/// mapped into the task's address space. The size of this segment in memory, +/// vmsize, maybe equal to or larger than the amount to map from this file, +/// filesize. The file is mapped starting at fileoff to the beginning of +/// the segment in memory, vmaddr. The rest of the memory of the segment, +/// if any, is allocated zero fill on demand. The segment's maximum virtual +/// memory protection and initial virtual memory protection are specified +/// by the maxprot and initprot fields. If the segment has sections then the +/// section structures directly follow the segment command and their size is +/// reflected in cmdsize. +pub const segment_command = extern struct { + cmd: u32,/// LC_SEGMENT + cmdsize: u32,/// includes sizeof section structs + segname: [16]u8,/// segment name + vmaddr: u32,/// memory address of this segment + vmsize: u32,/// memory size of this segment + fileoff: u32,/// file offset of this segment + filesize: u32,/// amount to map from the file + maxprot: vm_prot_t,/// maximum VM protection + initprot: vm_prot_t,/// initial VM protection + nsects: u32,/// number of sections in segment + flags: u32, +}; + +/// The 64-bit segment load command indicates that a part of this file is to be +/// mapped into a 64-bit task's address space. If the 64-bit segment has +/// sections then section_64 structures directly follow the 64-bit segment +/// command and their size is reflected in cmdsize. +pub const segment_command_64 = extern struct { + cmd: u32, /// LC_SEGMENT_64 + cmdsize: u32, /// includes sizeof section_64 structs + segname: [16]u8, /// segment name + vmaddr: u64, /// memory address of this segment + vmsize: u64, /// memory size of this segment + fileoff: u64, /// file offset of this segment + filesize: u64, /// amount to map from the file + maxprot: vm_prot_t, /// maximum VM protection + initprot: vm_prot_t, /// initial VM protection + nsects: u32, /// number of sections in segment + flags: u32, +}; + +/// A segment is made up of zero or more sections. Non-MH_OBJECT files have +/// all of their segments with the proper sections in each, and padded to the +/// specified segment alignment when produced by the link editor. The first +/// segment of a MH_EXECUTE and MH_FVMLIB format file contains the mach_header +/// and load commands of the object file before its first section. The zero +/// fill sections are always last in their segment (in all formats). This +/// allows the zeroed segment padding to be mapped into memory where zero fill +/// sections might be. The gigabyte zero fill sections, those with the section +/// type S_GB_ZEROFILL, can only be in a segment with sections of this type. +/// These segments are then placed after all other segments. +/// +/// The MH_OBJECT format has all of its sections in one segment for +/// compactness. There is no padding to a specified segment boundary and the +/// mach_header and load commands are not part of the segment. +/// +/// Sections with the same section name, sectname, going into the same segment, +/// segname, are combined by the link editor. The resulting section is aligned +/// to the maximum alignment of the combined sections and is the new section's +/// alignment. The combined sections are aligned to their original alignment in +/// the combined section. Any padded bytes to get the specified alignment are +/// zeroed. +/// +/// The format of the relocation entries referenced by the reloff and nreloc +/// fields of the section structure for mach object files is described in the +/// header file <reloc.h>. +pub const @"section" = extern struct { + sectname: [16]u8, /// name of this section + segname: [16]u8, /// segment this section goes in + addr: u32, /// memory address of this section + size: u32, /// size in bytes of this section + offset: u32, /// file offset of this section + @"align": u32, /// section alignment (power of 2) + reloff: u32, /// file offset of relocation entries + nreloc: u32, /// number of relocation entries + flags: u32, /// flags (section type and attributes + reserved1: u32, /// reserved (for offset or index) + reserved2: u32, /// reserved (for count or sizeof) +}; + +pub const section_64 = extern struct { + sectname: [16]u8, /// name of this section + segname: [16]u8, /// segment this section goes in + addr: u64, /// memory address of this section + size: u64, /// size in bytes of this section + offset: u32, /// file offset of this section + @"align": u32, /// section alignment (power of 2) + reloff: u32, /// file offset of relocation entries + nreloc: u32, /// number of relocation entries + flags: u32, /// flags (section type and attributes + reserved1: u32, /// reserved (for offset or index) + reserved2: u32, /// reserved (for count or sizeof) + reserved3: u32, /// reserved +}; + +pub const nlist = extern struct { + n_strx: u32, + n_type: u8, + n_sect: u8, + n_desc: i16, + n_value: u32, }; -const Nlist64 = packed struct { +pub const nlist_64 = extern struct { n_strx: u32, n_type: u8, n_sect: u8, @@ -38,135 +159,190 @@ const Nlist64 = packed struct { n_value: u64, }; -pub const Symbol = struct { - name: []const u8, - address: u64, +/// After MacOS X 10.1 when a new load command is added that is required to be +/// understood by the dynamic linker for the image to execute properly the +/// LC_REQ_DYLD bit will be or'ed into the load command constant. If the dynamic +/// linker sees such a load command it it does not understand will issue a +/// "unknown load command required for execution" error and refuse to use the +/// image. Other load commands without this bit that are not understood will +/// simply be ignored. +pub const LC_REQ_DYLD = 0x80000000; - fn addressLessThan(lhs: Symbol, rhs: Symbol) bool { - return lhs.address < rhs.address; - } -}; +pub const LC_SEGMENT = 0x1; /// segment of this file to be mapped +pub const LC_SYMTAB = 0x2; /// link-edit stab symbol table info +pub const LC_SYMSEG = 0x3; /// link-edit gdb symbol table info (obsolete) +pub const LC_THREAD = 0x4; /// thread +pub const LC_UNIXTHREAD = 0x5; /// unix thread (includes a stack) +pub const LC_LOADFVMLIB = 0x6; /// load a specified fixed VM shared library +pub const LC_IDFVMLIB = 0x7; /// fixed VM shared library identification +pub const LC_IDENT = 0x8; /// object identification info (obsolete) +pub const LC_FVMFILE = 0x9; /// fixed VM file inclusion (internal use) +pub const LC_PREPAGE = 0xa; /// prepage command (internal use) +pub const LC_DYSYMTAB = 0xb; /// dynamic link-edit symbol table info +pub const LC_LOAD_DYLIB = 0xc; /// load a dynamically linked shared library +pub const LC_ID_DYLIB = 0xd; /// dynamically linked shared lib ident +pub const LC_LOAD_DYLINKER = 0xe; /// load a dynamic linker +pub const LC_ID_DYLINKER = 0xf; /// dynamic linker identification +pub const LC_PREBOUND_DYLIB = 0x10; /// modules prebound for a dynamically +pub const LC_ROUTINES = 0x11; /// image routines +pub const LC_SUB_FRAMEWORK = 0x12; /// sub framework +pub const LC_SUB_UMBRELLA = 0x13; /// sub umbrella +pub const LC_SUB_CLIENT = 0x14; /// sub client +pub const LC_SUB_LIBRARY = 0x15; /// sub library +pub const LC_TWOLEVEL_HINTS = 0x16; /// two-level namespace lookup hints +pub const LC_PREBIND_CKSUM = 0x17; /// prebind checksum -pub const SymbolTable = struct { - allocator: *mem.Allocator, - symbols: []const Symbol, - strings: []const u8, - - // Doubles as an eyecatcher to calculate the PIE slide, see loadSymbols(). - // Ideally we'd use _mh_execute_header because it's always at 0x100000000 - // in the image but as it's located in a different section than executable - // code, its displacement is different. - pub fn deinit(self: *SymbolTable) void { - self.allocator.free(self.symbols); - self.symbols = []const Symbol{}; - - self.allocator.free(self.strings); - self.strings = []const u8{}; - } - - pub fn search(self: *const SymbolTable, address: usize) ?*const Symbol { - var min: usize = 0; - var max: usize = self.symbols.len - 1; // Exclude sentinel. - while (min < max) { - const mid = min + (max - min) / 2; - const curr = &self.symbols[mid]; - const next = &self.symbols[mid + 1]; - if (address >= next.address) { - min = mid + 1; - } else if (address < curr.address) { - max = mid; - } else { - return curr; - } - } - return null; - } -}; +/// load a dynamically linked shared library that is allowed to be missing +/// (all symbols are weak imported). +pub const LC_LOAD_WEAK_DYLIB = (0x18 | LC_REQ_DYLD); + +pub const LC_SEGMENT_64 = 0x19; /// 64-bit segment of this file to be mapped +pub const LC_ROUTINES_64 = 0x1a; /// 64-bit image routines +pub const LC_UUID = 0x1b; /// the uuid +pub const LC_RPATH = (0x1c | LC_REQ_DYLD); /// runpath additions +pub const LC_CODE_SIGNATURE = 0x1d; /// local of code signature +pub const LC_SEGMENT_SPLIT_INFO = 0x1e; /// local of info to split segments +pub const LC_REEXPORT_DYLIB = (0x1f | LC_REQ_DYLD); /// load and re-export dylib +pub const LC_LAZY_LOAD_DYLIB = 0x20; /// delay load of dylib until first use +pub const LC_ENCRYPTION_INFO = 0x21; /// encrypted segment information +pub const LC_DYLD_INFO = 0x22; /// compressed dyld information +pub const LC_DYLD_INFO_ONLY = (0x22|LC_REQ_DYLD); /// compressed dyld information only +pub const LC_LOAD_UPWARD_DYLIB = (0x23 | LC_REQ_DYLD); /// load upward dylib +pub const LC_VERSION_MIN_MACOSX = 0x24; /// build for MacOSX min OS version +pub const LC_VERSION_MIN_IPHONEOS = 0x25; /// build for iPhoneOS min OS version +pub const LC_FUNCTION_STARTS = 0x26; /// compressed table of function start addresses +pub const LC_DYLD_ENVIRONMENT = 0x27; /// string for dyld to treat like environment variable +pub const LC_MAIN = (0x28|LC_REQ_DYLD); /// replacement for LC_UNIXTHREAD +pub const LC_DATA_IN_CODE = 0x29; /// table of non-instructions in __text +pub const LC_SOURCE_VERSION = 0x2A; /// source version used to build binary +pub const LC_DYLIB_CODE_SIGN_DRS = 0x2B; /// Code signing DRs copied from linked dylibs +pub const LC_ENCRYPTION_INFO_64 = 0x2C; /// 64-bit encrypted segment information +pub const LC_LINKER_OPTION = 0x2D; /// linker options in MH_OBJECT files +pub const LC_LINKER_OPTIMIZATION_HINT = 0x2E; /// optimization hints in MH_OBJECT files +pub const LC_VERSION_MIN_TVOS = 0x2F; /// build for AppleTV min OS version +pub const LC_VERSION_MIN_WATCHOS = 0x30; /// build for Watch min OS version +pub const LC_NOTE = 0x31; /// arbitrary data included within a Mach-O file +pub const LC_BUILD_VERSION = 0x32; /// build for platform min OS version + +pub const MH_MAGIC = 0xfeedface; /// the mach magic number +pub const MH_CIGAM = 0xcefaedfe; /// NXSwapInt(MH_MAGIC) + +pub const MH_MAGIC_64 = 0xfeedfacf; /// the 64-bit mach magic number +pub const MH_CIGAM_64 = 0xcffaedfe; /// NXSwapInt(MH_MAGIC_64) + +pub const MH_OBJECT = 0x1; /// relocatable object file +pub const MH_EXECUTE = 0x2; /// demand paged executable file +pub const MH_FVMLIB = 0x3; /// fixed VM shared library file +pub const MH_CORE = 0x4; /// core file +pub const MH_PRELOAD = 0x5; /// preloaded executable file +pub const MH_DYLIB = 0x6; /// dynamically bound shared library +pub const MH_DYLINKER = 0x7; /// dynamic link editor +pub const MH_BUNDLE = 0x8; /// dynamically bound bundle file +pub const MH_DYLIB_STUB = 0x9; /// shared library stub for static linking only, no section contents +pub const MH_DSYM = 0xa; /// companion file with only debug sections +pub const MH_KEXT_BUNDLE = 0xb; /// x86_64 kexts + +// Constants for the flags field of the mach_header + +pub const MH_NOUNDEFS = 0x1; /// the object file has no undefined references +pub const MH_INCRLINK = 0x2; /// the object file is the output of an incremental link against a base file and can't be link edited again +pub const MH_DYLDLINK = 0x4; /// the object file is input for the dynamic linker and can't be staticly link edited again +pub const MH_BINDATLOAD = 0x8; /// the object file's undefined references are bound by the dynamic linker when loaded. +pub const MH_PREBOUND = 0x10; /// the file has its dynamic undefined references prebound. +pub const MH_SPLIT_SEGS = 0x20; /// the file has its read-only and read-write segments split +pub const MH_LAZY_INIT = 0x40; /// the shared library init routine is to be run lazily via catching memory faults to its writeable segments (obsolete) +pub const MH_TWOLEVEL = 0x80; /// the image is using two-level name space bindings +pub const MH_FORCE_FLAT = 0x100; /// the executable is forcing all images to use flat name space bindings +pub const MH_NOMULTIDEFS = 0x200; /// this umbrella guarantees no multiple defintions of symbols in its sub-images so the two-level namespace hints can always be used. +pub const MH_NOFIXPREBINDING = 0x400; /// do not have dyld notify the prebinding agent about this executable +pub const MH_PREBINDABLE = 0x800; /// the binary is not prebound but can have its prebinding redone. only used when MH_PREBOUND is not set. +pub const MH_ALLMODSBOUND = 0x1000; /// indicates that this binary binds to all two-level namespace modules of its dependent libraries. only used when MH_PREBINDABLE and MH_TWOLEVEL are both set. +pub const MH_SUBSECTIONS_VIA_SYMBOLS = 0x2000;/// safe to divide up the sections into sub-sections via symbols for dead code stripping +pub const MH_CANONICAL = 0x4000; /// the binary has been canonicalized via the unprebind operation +pub const MH_WEAK_DEFINES = 0x8000; /// the final linked image contains external weak symbols +pub const MH_BINDS_TO_WEAK = 0x10000; /// the final linked image uses weak symbols + +pub const MH_ALLOW_STACK_EXECUTION = 0x20000;/// When this bit is set, all stacks in the task will be given stack execution privilege. Only used in MH_EXECUTE filetypes. +pub const MH_ROOT_SAFE = 0x40000; /// When this bit is set, the binary declares it is safe for use in processes with uid zero + +pub const MH_SETUID_SAFE = 0x80000; /// When this bit is set, the binary declares it is safe for use in processes when issetugid() is true + +pub const MH_NO_REEXPORTED_DYLIBS = 0x100000; /// When this bit is set on a dylib, the static linker does not need to examine dependent dylibs to see if any are re-exported +pub const MH_PIE = 0x200000; /// When this bit is set, the OS will load the main executable at a random address. Only used in MH_EXECUTE filetypes. +pub const MH_DEAD_STRIPPABLE_DYLIB = 0x400000; /// Only for use on dylibs. When linking against a dylib that has this bit set, the static linker will automatically not create a LC_LOAD_DYLIB load command to the dylib if no symbols are being referenced from the dylib. +pub const MH_HAS_TLV_DESCRIPTORS = 0x800000; /// Contains a section of type S_THREAD_LOCAL_VARIABLES + +pub const MH_NO_HEAP_EXECUTION = 0x1000000; /// When this bit is set, the OS will run the main executable with a non-executable heap even on platforms (e.g. i386) that don't require it. Only used in MH_EXECUTE filetypes. + +pub const MH_APP_EXTENSION_SAFE = 0x02000000; /// The code was linked for use in an application extension. + +pub const MH_NLIST_OUTOFSYNC_WITH_DYLDINFO = 0x04000000; /// The external symbols listed in the nlist symbol table do not include all the symbols listed in the dyld info. + + +/// The flags field of a section structure is separated into two parts a section +/// type and section attributes. The section types are mutually exclusive (it +/// can only have one type) but the section attributes are not (it may have more +/// than one attribute). +/// 256 section types +pub const SECTION_TYPE = 0x000000ff; +pub const SECTION_ATTRIBUTES = 0xffffff00; /// 24 section attributes + +pub const S_REGULAR = 0x0; /// regular section +pub const S_ZEROFILL = 0x1; /// zero fill on demand section +pub const S_CSTRING_LITERALS = 0x2; /// section with only literal C string +pub const S_4BYTE_LITERALS = 0x3; /// section with only 4 byte literals +pub const S_8BYTE_LITERALS = 0x4; /// section with only 8 byte literals +pub const S_LITERAL_POINTERS = 0x5; /// section with only pointers to + + +pub const N_STAB = 0xe0; /// if any of these bits set, a symbolic debugging entry +pub const N_PEXT = 0x10; /// private external symbol bit +pub const N_TYPE = 0x0e; /// mask for the type bits +pub const N_EXT = 0x01; /// external symbol bit, set for external symbols + + +pub const N_GSYM = 0x20; /// global symbol: name,,NO_SECT,type,0 +pub const N_FNAME = 0x22; /// procedure name (f77 kludge): name,,NO_SECT,0,0 +pub const N_FUN = 0x24; /// procedure: name,,n_sect,linenumber,address +pub const N_STSYM = 0x26; /// static symbol: name,,n_sect,type,address +pub const N_LCSYM = 0x28; /// .lcomm symbol: name,,n_sect,type,address +pub const N_BNSYM = 0x2e; /// begin nsect sym: 0,,n_sect,0,address +pub const N_AST = 0x32; /// AST file path: name,,NO_SECT,0,0 +pub const N_OPT = 0x3c; /// emitted with gcc2_compiled and in gcc source +pub const N_RSYM = 0x40; /// register sym: name,,NO_SECT,type,register +pub const N_SLINE = 0x44; /// src line: 0,,n_sect,linenumber,address +pub const N_ENSYM = 0x4e; /// end nsect sym: 0,,n_sect,0,address +pub const N_SSYM = 0x60; /// structure elt: name,,NO_SECT,type,struct_offset +pub const N_SO = 0x64; /// source file name: name,,n_sect,0,address +pub const N_OSO = 0x66; /// object file name: name,,0,0,st_mtime +pub const N_LSYM = 0x80; /// local sym: name,,NO_SECT,type,offset +pub const N_BINCL = 0x82; /// include file beginning: name,,NO_SECT,0,sum +pub const N_SOL = 0x84; /// #included file name: name,,n_sect,0,address +pub const N_PARAMS = 0x86; /// compiler parameters: name,,NO_SECT,0,0 +pub const N_VERSION = 0x88; /// compiler version: name,,NO_SECT,0,0 +pub const N_OLEVEL = 0x8A; /// compiler -O level: name,,NO_SECT,0,0 +pub const N_PSYM = 0xa0; /// parameter: name,,NO_SECT,type,offset +pub const N_EINCL = 0xa2; /// include file end: name,,NO_SECT,0,0 +pub const N_ENTRY = 0xa4; /// alternate entry: name,,n_sect,linenumber,address +pub const N_LBRAC = 0xc0; /// left bracket: 0,,NO_SECT,nesting level,address +pub const N_EXCL = 0xc2; /// deleted include file: name,,NO_SECT,0,sum +pub const N_RBRAC = 0xe0; /// right bracket: 0,,NO_SECT,nesting level,address +pub const N_BCOMM = 0xe2; /// begin common: name,,NO_SECT,0,0 +pub const N_ECOMM = 0xe4; /// end common: name,,n_sect,0,0 +pub const N_ECOML = 0xe8; /// end common (local name): 0,,n_sect,0,address +pub const N_LENG = 0xfe; /// second stab entry with length information + +/// If a segment contains any sections marked with S_ATTR_DEBUG then all +/// sections in that segment must have this attribute. No section other than +/// a section marked with this attribute may reference the contents of this +/// section. A section with this attribute may contain no symbols and must have +/// a section type S_REGULAR. The static linker will not copy section contents +/// from sections with this attribute into its output file. These sections +/// generally contain DWARF debugging info. +pub const S_ATTR_DEBUG = 0x02000000; /// a debug section + +pub const cpu_type_t = integer_t; +pub const cpu_subtype_t = integer_t; +pub const integer_t = c_int; +pub const vm_prot_t = c_int; -pub fn loadSymbols(allocator: *mem.Allocator, in: *io.FileInStream) !SymbolTable { - var file = in.file; - try file.seekTo(0); - - var hdr: MachHeader64 = undefined; - try readOneNoEof(in, MachHeader64, &hdr); - if (hdr.magic != MH_MAGIC_64) return error.MissingDebugInfo; - const is_pie = MH_PIE == (hdr.flags & MH_PIE); - - var pos: usize = @sizeOf(@typeOf(hdr)); - var ncmd: u32 = hdr.ncmds; - while (ncmd != 0) : (ncmd -= 1) { - try file.seekTo(pos); - var lc: LoadCommand = undefined; - try readOneNoEof(in, LoadCommand, &lc); - if (lc.cmd == LC_SYMTAB) break; - pos += lc.cmdsize; - } else { - return error.MissingDebugInfo; - } - - var cmd: SymtabCommand = undefined; - try readOneNoEof(in, SymtabCommand, &cmd); - - try file.seekTo(cmd.symoff); - var syms = try allocator.alloc(Nlist64, cmd.nsyms); - defer allocator.free(syms); - try readNoEof(in, Nlist64, syms); - - try file.seekTo(cmd.stroff); - var strings = try allocator.alloc(u8, cmd.strsize); - errdefer allocator.free(strings); - try in.stream.readNoEof(strings); - - var nsyms: usize = 0; - for (syms) |sym| - if (isSymbol(sym)) nsyms += 1; - if (nsyms == 0) return error.MissingDebugInfo; - - var symbols = try allocator.alloc(Symbol, nsyms + 1); // Room for sentinel. - errdefer allocator.free(symbols); - - var pie_slide: usize = 0; - var nsym: usize = 0; - for (syms) |sym| { - if (!isSymbol(sym)) continue; - const start = sym.n_strx; - const end = mem.indexOfScalarPos(u8, strings, start, 0).?; - const name = strings[start..end]; - const address = sym.n_value; - symbols[nsym] = Symbol{ .name = name, .address = address }; - nsym += 1; - if (is_pie and mem.eql(u8, name, "_SymbolTable_deinit")) { - pie_slide = @ptrToInt(SymbolTable.deinit) - address; - } - } - - // Effectively a no-op, lld emits symbols in ascending order. - std.sort.sort(Symbol, symbols[0..nsyms], Symbol.addressLessThan); - - // Insert the sentinel. Since we don't know where the last function ends, - // we arbitrarily limit it to the start address + 4 KB. - const top = symbols[nsyms - 1].address + 4096; - symbols[nsyms] = Symbol{ .name = "", .address = top }; - - if (pie_slide != 0) { - for (symbols) |*symbol| - symbol.address += pie_slide; - } - - return SymbolTable{ - .allocator = allocator, - .symbols = symbols, - .strings = strings, - }; -} - -fn readNoEof(in: *io.FileInStream, comptime T: type, result: []T) !void { - return in.stream.readNoEof(@sliceToBytes(result)); -} -fn readOneNoEof(in: *io.FileInStream, comptime T: type, result: *T) !void { - return readNoEof(in, T, (*[1]T)(result)[0..]); -} - -fn isSymbol(sym: *const Nlist64) bool { - return sym.n_value != 0 and sym.n_desc == 0; -} diff --git a/std/os/index.zig b/std/os/index.zig index a0b6e6bf45..29d887e214 100644 --- a/std/os/index.zig +++ b/std/os/index.zig @@ -635,6 +635,35 @@ fn posixExecveErrnoToErr(err: usize) PosixExecveError { pub var linux_aux_raw = []usize{0} ** 38; pub var posix_environ_raw: [][*]u8 = undefined; +/// See std.elf for the constants. +pub fn linuxGetAuxVal(index: usize) usize { + if (builtin.link_libc) { + return usize(std.c.getauxval(index)); + } else { + return linux_aux_raw[index]; + } +} + +pub fn getBaseAddress() usize { + switch (builtin.os) { + builtin.Os.linux => { + const base = linuxGetAuxVal(std.elf.AT_BASE); + if (base != 0) { + return base; + } + const phdr = linuxGetAuxVal(std.elf.AT_PHDR); + const ElfHeader = switch (@sizeOf(usize)) { + 4 => std.elf.Elf32_Ehdr, + 8 => std.elf.Elf64_Ehdr, + else => @compileError("Unsupported architecture"), + }; + return phdr - @sizeOf(ElfHeader); + }, + builtin.Os.macosx => return @ptrToInt(&std.c._mh_execute_header), + else => @compileError("Unsupported OS"), + } +} + /// Caller must free result when done. /// TODO make this go through libc when we have it pub fn getEnvMap(allocator: *Allocator) !BufMap { |
