diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2024-08-07 11:55:30 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-07 11:55:30 -0700 |
| commit | 0e99f517f2a1c8c19d7350a221539521b365230e (patch) | |
| tree | f487716661a5771d7c41f45106bf264da1ea133f /lib/std | |
| parent | f9f894200891c8af6ce3a3ad222cd0bf1ee15587 (diff) | |
| parent | d721d9af69220c059a7be825d295a0b53081c4a0 (diff) | |
| download | zig-0e99f517f2a1c8c19d7350a221539521b365230e.tar.gz zig-0e99f517f2a1c8c19d7350a221539521b365230e.zip | |
Merge pull request #20958 from ziglang/fuzz
introduce a fuzz testing web interface
Diffstat (limited to 'lib/std')
| -rw-r--r-- | lib/std/Build.zig | 12 | ||||
| -rw-r--r-- | lib/std/Build/Cache/Path.zig | 8 | ||||
| -rw-r--r-- | lib/std/Build/Fuzz.zig | 96 | ||||
| -rw-r--r-- | lib/std/Build/Fuzz/WebServer.zig | 679 | ||||
| -rw-r--r-- | lib/std/Build/Fuzz/abi.zig | 69 | ||||
| -rw-r--r-- | lib/std/Build/Step.zig | 3 | ||||
| -rw-r--r-- | lib/std/Build/Step/Run.zig | 71 | ||||
| -rw-r--r-- | lib/std/debug.zig | 137 | ||||
| -rw-r--r-- | lib/std/debug/Coverage.zig | 244 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf.zig | 760 | ||||
| -rw-r--r-- | lib/std/debug/FixedBufferReader.zig | 93 | ||||
| -rw-r--r-- | lib/std/debug/Info.zig | 62 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo.zig | 287 | ||||
| -rw-r--r-- | lib/std/http.zig | 2 | ||||
| -rw-r--r-- | lib/std/http/WebSocket.zig | 243 | ||||
| -rw-r--r-- | lib/std/posix.zig | 5 | ||||
| -rw-r--r-- | lib/std/zig/Server.zig | 28 | ||||
| -rw-r--r-- | lib/std/zig/tokenizer.zig | 45 |
18 files changed, 2228 insertions, 616 deletions
diff --git a/lib/std/Build.zig b/lib/std/Build.zig index 7612ad0d6d..03743cf52e 100644 --- a/lib/std/Build.zig +++ b/lib/std/Build.zig @@ -2300,22 +2300,26 @@ pub const LazyPath = union(enum) { } pub fn path(lazy_path: LazyPath, b: *Build, sub_path: []const u8) LazyPath { + return lazy_path.join(b.allocator, sub_path) catch @panic("OOM"); + } + + pub fn join(lazy_path: LazyPath, arena: Allocator, sub_path: []const u8) Allocator.Error!LazyPath { return switch (lazy_path) { .src_path => |src| .{ .src_path = .{ .owner = src.owner, - .sub_path = b.pathResolve(&.{ src.sub_path, sub_path }), + .sub_path = try fs.path.resolve(arena, &.{ src.sub_path, sub_path }), } }, .generated => |gen| .{ .generated = .{ .file = gen.file, .up = gen.up, - .sub_path = b.pathResolve(&.{ gen.sub_path, sub_path }), + .sub_path = try fs.path.resolve(arena, &.{ gen.sub_path, sub_path }), } }, .cwd_relative => |cwd_relative| .{ - .cwd_relative = b.pathResolve(&.{ cwd_relative, sub_path }), + .cwd_relative = try fs.path.resolve(arena, &.{ cwd_relative, sub_path }), }, .dependency => |dep| .{ .dependency = .{ .dependency = dep.dependency, - .sub_path = b.pathResolve(&.{ dep.sub_path, sub_path }), + .sub_path = try fs.path.resolve(arena, &.{ dep.sub_path, sub_path }), } }, }; } diff --git a/lib/std/Build/Cache/Path.zig b/lib/std/Build/Cache/Path.zig index b81786d0a8..65c6f6a9bc 100644 --- a/lib/std/Build/Cache/Path.zig +++ b/lib/std/Build/Cache/Path.zig @@ -32,16 +32,16 @@ pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.E }; } -pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 { +pub fn joinString(p: Path, gpa: Allocator, sub_path: []const u8) Allocator.Error![]u8 { const parts: []const []const u8 = if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return p.root_dir.join(allocator, parts); + return p.root_dir.join(gpa, parts); } -pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 { +pub fn joinStringZ(p: Path, gpa: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 { const parts: []const []const u8 = if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return p.root_dir.joinZ(allocator, parts); + return p.root_dir.joinZ(gpa, parts); } pub fn openFile( diff --git a/lib/std/Build/Fuzz.zig b/lib/std/Build/Fuzz.zig index 2628b92516..9857db5a1f 100644 --- a/lib/std/Build/Fuzz.zig +++ b/lib/std/Build/Fuzz.zig @@ -1,57 +1,102 @@ +const builtin = @import("builtin"); const std = @import("../std.zig"); -const Fuzz = @This(); +const Build = std.Build; const Step = std.Build.Step; const assert = std.debug.assert; const fatal = std.process.fatal; +const Allocator = std.mem.Allocator; +const log = std.log; + +const Fuzz = @This(); const build_runner = @import("root"); +pub const WebServer = @import("Fuzz/WebServer.zig"); +pub const abi = @import("Fuzz/abi.zig"); + pub fn start( + gpa: Allocator, + arena: Allocator, + global_cache_directory: Build.Cache.Directory, + zig_lib_directory: Build.Cache.Directory, + zig_exe_path: []const u8, thread_pool: *std.Thread.Pool, all_steps: []const *Step, ttyconf: std.io.tty.Config, + listen_address: std.net.Address, prog_node: std.Progress.Node, -) void { - const count = block: { +) Allocator.Error!void { + const fuzz_run_steps = block: { const rebuild_node = prog_node.start("Rebuilding Unit Tests", 0); defer rebuild_node.end(); - var count: usize = 0; var wait_group: std.Thread.WaitGroup = .{}; defer wait_group.wait(); + var fuzz_run_steps: std.ArrayListUnmanaged(*Step.Run) = .{}; + defer fuzz_run_steps.deinit(gpa); for (all_steps) |step| { const run = step.cast(Step.Run) orelse continue; if (run.fuzz_tests.items.len > 0 and run.producer != null) { thread_pool.spawnWg(&wait_group, rebuildTestsWorkerRun, .{ run, ttyconf, rebuild_node }); - count += 1; + try fuzz_run_steps.append(gpa, run); } } - if (count == 0) fatal("no fuzz tests found", .{}); - rebuild_node.setEstimatedTotalItems(count); - break :block count; + if (fuzz_run_steps.items.len == 0) fatal("no fuzz tests found", .{}); + rebuild_node.setEstimatedTotalItems(fuzz_run_steps.items.len); + break :block try arena.dupe(*Step.Run, fuzz_run_steps.items); }; // Detect failure. - for (all_steps) |step| { - const run = step.cast(Step.Run) orelse continue; - if (run.fuzz_tests.items.len > 0 and run.rebuilt_executable == null) + for (fuzz_run_steps) |run| { + assert(run.fuzz_tests.items.len > 0); + if (run.rebuilt_executable == null) fatal("one or more unit tests failed to be rebuilt in fuzz mode", .{}); } + var web_server: WebServer = .{ + .gpa = gpa, + .global_cache_directory = global_cache_directory, + .zig_lib_directory = zig_lib_directory, + .zig_exe_path = zig_exe_path, + .listen_address = listen_address, + .fuzz_run_steps = fuzz_run_steps, + + .msg_queue = .{}, + .mutex = .{}, + .condition = .{}, + + .coverage_files = .{}, + .coverage_mutex = .{}, + .coverage_condition = .{}, + }; + + // For accepting HTTP connections. + const web_server_thread = std.Thread.spawn(.{}, WebServer.run, .{&web_server}) catch |err| { + fatal("unable to spawn web server thread: {s}", .{@errorName(err)}); + }; + defer web_server_thread.join(); + + // For polling messages and sending updates to subscribers. + const coverage_thread = std.Thread.spawn(.{}, WebServer.coverageRun, .{&web_server}) catch |err| { + fatal("unable to spawn coverage thread: {s}", .{@errorName(err)}); + }; + defer coverage_thread.join(); + { - const fuzz_node = prog_node.start("Fuzzing", count); + const fuzz_node = prog_node.start("Fuzzing", fuzz_run_steps.len); defer fuzz_node.end(); var wait_group: std.Thread.WaitGroup = .{}; defer wait_group.wait(); - for (all_steps) |step| { - const run = step.cast(Step.Run) orelse continue; + for (fuzz_run_steps) |run| { for (run.fuzz_tests.items) |unit_test_index| { assert(run.rebuilt_executable != null); - thread_pool.spawnWg(&wait_group, fuzzWorkerRun, .{ run, unit_test_index, ttyconf, fuzz_node }); + thread_pool.spawnWg(&wait_group, fuzzWorkerRun, .{ + run, &web_server, unit_test_index, ttyconf, fuzz_node, + }); } } } - fatal("all fuzz workers crashed", .{}); + log.err("all fuzz workers crashed", .{}); } fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog_node: std.Progress.Node) void { @@ -74,20 +119,21 @@ fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog build_runner.printErrorMessages(gpa, &compile.step, ttyconf, stderr, false) catch {}; } - if (result) |rebuilt_bin_path| { - run.rebuilt_executable = rebuilt_bin_path; - } else |err| switch (err) { - error.MakeFailed => {}, + const rebuilt_bin_path = result catch |err| switch (err) { + error.MakeFailed => return, else => { - std.debug.print("step '{s}': failed to rebuild in fuzz mode: {s}\n", .{ + log.err("step '{s}': failed to rebuild in fuzz mode: {s}", .{ compile.step.name, @errorName(err), }); + return; }, - } + }; + run.rebuilt_executable = rebuilt_bin_path; } fn fuzzWorkerRun( run: *Step.Run, + web_server: *WebServer, unit_test_index: u32, ttyconf: std.io.tty.Config, parent_prog_node: std.Progress.Node, @@ -98,17 +144,19 @@ fn fuzzWorkerRun( const prog_node = parent_prog_node.start(test_name, 0); defer prog_node.end(); - run.rerunInFuzzMode(unit_test_index, prog_node) catch |err| switch (err) { + run.rerunInFuzzMode(web_server, unit_test_index, prog_node) catch |err| switch (err) { error.MakeFailed => { const stderr = std.io.getStdErr(); std.debug.lockStdErr(); defer std.debug.unlockStdErr(); build_runner.printErrorMessages(gpa, &run.step, ttyconf, stderr, false) catch {}; + return; }, else => { - std.debug.print("step '{s}': failed to rebuild '{s}' in fuzz mode: {s}\n", .{ + log.err("step '{s}': failed to rerun '{s}' in fuzz mode: {s}", .{ run.step.name, test_name, @errorName(err), }); + return; }, }; } diff --git a/lib/std/Build/Fuzz/WebServer.zig b/lib/std/Build/Fuzz/WebServer.zig new file mode 100644 index 0000000000..c0dfddacd5 --- /dev/null +++ b/lib/std/Build/Fuzz/WebServer.zig @@ -0,0 +1,679 @@ +const builtin = @import("builtin"); + +const std = @import("../../std.zig"); +const Allocator = std.mem.Allocator; +const Build = std.Build; +const Step = std.Build.Step; +const Coverage = std.debug.Coverage; +const abi = std.Build.Fuzz.abi; +const log = std.log; + +const WebServer = @This(); + +gpa: Allocator, +global_cache_directory: Build.Cache.Directory, +zig_lib_directory: Build.Cache.Directory, +zig_exe_path: []const u8, +listen_address: std.net.Address, +fuzz_run_steps: []const *Step.Run, + +/// Messages from fuzz workers. Protected by mutex. +msg_queue: std.ArrayListUnmanaged(Msg), +/// Protects `msg_queue` only. +mutex: std.Thread.Mutex, +/// Signaled when there is a message in `msg_queue`. +condition: std.Thread.Condition, + +coverage_files: std.AutoArrayHashMapUnmanaged(u64, CoverageMap), +/// Protects `coverage_files` only. +coverage_mutex: std.Thread.Mutex, +/// Signaled when `coverage_files` changes. +coverage_condition: std.Thread.Condition, + +const CoverageMap = struct { + mapped_memory: []align(std.mem.page_size) const u8, + coverage: Coverage, + source_locations: []Coverage.SourceLocation, + /// Elements are indexes into `source_locations` pointing to the unit tests that are being fuzz tested. + entry_points: std.ArrayListUnmanaged(u32), + + fn deinit(cm: *CoverageMap, gpa: Allocator) void { + std.posix.munmap(cm.mapped_memory); + cm.coverage.deinit(gpa); + cm.* = undefined; + } +}; + +const Msg = union(enum) { + coverage: struct { + id: u64, + run: *Step.Run, + }, + entry_point: struct { + coverage_id: u64, + addr: u64, + }, +}; + +pub fn run(ws: *WebServer) void { + var http_server = ws.listen_address.listen(.{ + .reuse_address = true, + }) catch |err| { + log.err("failed to listen to port {d}: {s}", .{ ws.listen_address.in.getPort(), @errorName(err) }); + return; + }; + const port = http_server.listen_address.in.getPort(); + log.info("web interface listening at http://127.0.0.1:{d}/", .{port}); + + while (true) { + const connection = http_server.accept() catch |err| { + log.err("failed to accept connection: {s}", .{@errorName(err)}); + return; + }; + _ = std.Thread.spawn(.{}, accept, .{ ws, connection }) catch |err| { + log.err("unable to spawn connection thread: {s}", .{@errorName(err)}); + connection.stream.close(); + continue; + }; + } +} + +fn accept(ws: *WebServer, connection: std.net.Server.Connection) void { + defer connection.stream.close(); + + var read_buffer: [0x4000]u8 = undefined; + var server = std.http.Server.init(connection, &read_buffer); + var web_socket: std.http.WebSocket = undefined; + var send_buffer: [0x4000]u8 = undefined; + var ws_recv_buffer: [0x4000]u8 align(4) = undefined; + while (server.state == .ready) { + var request = server.receiveHead() catch |err| switch (err) { + error.HttpConnectionClosing => return, + else => { + log.err("closing http connection: {s}", .{@errorName(err)}); + return; + }, + }; + if (web_socket.init(&request, &send_buffer, &ws_recv_buffer) catch |err| { + log.err("initializing web socket: {s}", .{@errorName(err)}); + return; + }) { + serveWebSocket(ws, &web_socket) catch |err| { + log.err("unable to serve web socket connection: {s}", .{@errorName(err)}); + return; + }; + } else { + serveRequest(ws, &request) catch |err| switch (err) { + error.AlreadyReported => return, + else => |e| { + log.err("unable to serve {s}: {s}", .{ request.head.target, @errorName(e) }); + return; + }, + }; + } + } +} + +fn serveRequest(ws: *WebServer, request: *std.http.Server.Request) !void { + if (std.mem.eql(u8, request.head.target, "/") or + std.mem.eql(u8, request.head.target, "/debug") or + std.mem.eql(u8, request.head.target, "/debug/")) + { + try serveFile(ws, request, "fuzzer/index.html", "text/html"); + } else if (std.mem.eql(u8, request.head.target, "/main.js") or + std.mem.eql(u8, request.head.target, "/debug/main.js")) + { + try serveFile(ws, request, "fuzzer/main.js", "application/javascript"); + } else if (std.mem.eql(u8, request.head.target, "/main.wasm")) { + try serveWasm(ws, request, .ReleaseFast); + } else if (std.mem.eql(u8, request.head.target, "/debug/main.wasm")) { + try serveWasm(ws, request, .Debug); + } else if (std.mem.eql(u8, request.head.target, "/sources.tar") or + std.mem.eql(u8, request.head.target, "/debug/sources.tar")) + { + try serveSourcesTar(ws, request); + } else { + try request.respond("not found", .{ + .status = .not_found, + .extra_headers = &.{ + .{ .name = "content-type", .value = "text/plain" }, + }, + }); + } +} + +fn serveFile( + ws: *WebServer, + request: *std.http.Server.Request, + name: []const u8, + content_type: []const u8, +) !void { + const gpa = ws.gpa; + // The desired API is actually sendfile, which will require enhancing std.http.Server. + // We load the file with every request so that the user can make changes to the file + // and refresh the HTML page without restarting this server. + const file_contents = ws.zig_lib_directory.handle.readFileAlloc(gpa, name, 10 * 1024 * 1024) catch |err| { + log.err("failed to read '{}{s}': {s}", .{ ws.zig_lib_directory, name, @errorName(err) }); + return error.AlreadyReported; + }; + defer gpa.free(file_contents); + try request.respond(file_contents, .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = content_type }, + cache_control_header, + }, + }); +} + +fn serveWasm( + ws: *WebServer, + request: *std.http.Server.Request, + optimize_mode: std.builtin.OptimizeMode, +) !void { + const gpa = ws.gpa; + + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + // Do the compilation every request, so that the user can edit the files + // and see the changes without restarting the server. + const wasm_binary_path = try buildWasmBinary(ws, arena, optimize_mode); + // std.http.Server does not have a sendfile API yet. + const file_contents = try std.fs.cwd().readFileAlloc(gpa, wasm_binary_path, 10 * 1024 * 1024); + defer gpa.free(file_contents); + try request.respond(file_contents, .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = "application/wasm" }, + cache_control_header, + }, + }); +} + +fn buildWasmBinary( + ws: *WebServer, + arena: Allocator, + optimize_mode: std.builtin.OptimizeMode, +) ![]const u8 { + const gpa = ws.gpa; + + const main_src_path: Build.Cache.Path = .{ + .root_dir = ws.zig_lib_directory, + .sub_path = "fuzzer/wasm/main.zig", + }; + const walk_src_path: Build.Cache.Path = .{ + .root_dir = ws.zig_lib_directory, + .sub_path = "docs/wasm/Walk.zig", + }; + const html_render_src_path: Build.Cache.Path = .{ + .root_dir = ws.zig_lib_directory, + .sub_path = "docs/wasm/html_render.zig", + }; + + var argv: std.ArrayListUnmanaged([]const u8) = .{}; + + try argv.appendSlice(arena, &.{ + ws.zig_exe_path, "build-exe", // + "-fno-entry", // + "-O", @tagName(optimize_mode), // + "-target", "wasm32-freestanding", // + "-mcpu", "baseline+atomics+bulk_memory+multivalue+mutable_globals+nontrapping_fptoint+reference_types+sign_ext", // + "--cache-dir", ws.global_cache_directory.path orelse ".", // + "--global-cache-dir", ws.global_cache_directory.path orelse ".", // + "--name", "fuzzer", // + "-rdynamic", // + "-fsingle-threaded", // + "--dep", "Walk", // + "--dep", "html_render", // + try std.fmt.allocPrint(arena, "-Mroot={}", .{main_src_path}), // + try std.fmt.allocPrint(arena, "-MWalk={}", .{walk_src_path}), // + "--dep", "Walk", // + try std.fmt.allocPrint(arena, "-Mhtml_render={}", .{html_render_src_path}), // + "--listen=-", + }); + + var child = std.process.Child.init(argv.items, gpa); + child.stdin_behavior = .Pipe; + child.stdout_behavior = .Pipe; + child.stderr_behavior = .Pipe; + try child.spawn(); + + var poller = std.io.poll(gpa, enum { stdout, stderr }, .{ + .stdout = child.stdout.?, + .stderr = child.stderr.?, + }); + defer poller.deinit(); + + try sendMessage(child.stdin.?, .update); + try sendMessage(child.stdin.?, .exit); + + const Header = std.zig.Server.Message.Header; + var result: ?[]const u8 = null; + var result_error_bundle = std.zig.ErrorBundle.empty; + + const stdout = poller.fifo(.stdout); + + poll: while (true) { + while (stdout.readableLength() < @sizeOf(Header)) { + if (!(try poller.poll())) break :poll; + } + const header = stdout.reader().readStruct(Header) catch unreachable; + while (stdout.readableLength() < header.bytes_len) { + if (!(try poller.poll())) break :poll; + } + const body = stdout.readableSliceOfLen(header.bytes_len); + + switch (header.tag) { + .zig_version => { + if (!std.mem.eql(u8, builtin.zig_version_string, body)) { + return error.ZigProtocolVersionMismatch; + } + }, + .error_bundle => { + const EbHdr = std.zig.Server.Message.ErrorBundle; + const eb_hdr = @as(*align(1) const EbHdr, @ptrCast(body)); + const extra_bytes = + body[@sizeOf(EbHdr)..][0 .. @sizeOf(u32) * eb_hdr.extra_len]; + const string_bytes = + body[@sizeOf(EbHdr) + extra_bytes.len ..][0..eb_hdr.string_bytes_len]; + // TODO: use @ptrCast when the compiler supports it + const unaligned_extra = std.mem.bytesAsSlice(u32, extra_bytes); + const extra_array = try arena.alloc(u32, unaligned_extra.len); + @memcpy(extra_array, unaligned_extra); + result_error_bundle = .{ + .string_bytes = try arena.dupe(u8, string_bytes), + .extra = extra_array, + }; + }, + .emit_bin_path => { + const EbpHdr = std.zig.Server.Message.EmitBinPath; + const ebp_hdr = @as(*align(1) const EbpHdr, @ptrCast(body)); + if (!ebp_hdr.flags.cache_hit) { + log.info("source changes detected; rebuilt wasm component", .{}); + } + result = try arena.dupe(u8, body[@sizeOf(EbpHdr)..]); + }, + else => {}, // ignore other messages + } + + stdout.discard(body.len); + } + + const stderr = poller.fifo(.stderr); + if (stderr.readableLength() > 0) { + const owned_stderr = try stderr.toOwnedSlice(); + defer gpa.free(owned_stderr); + std.debug.print("{s}", .{owned_stderr}); + } + + // Send EOF to stdin. + child.stdin.?.close(); + child.stdin = null; + + switch (try child.wait()) { + .Exited => |code| { + if (code != 0) { + log.err( + "the following command exited with error code {d}:\n{s}", + .{ code, try Build.Step.allocPrintCmd(arena, null, argv.items) }, + ); + return error.WasmCompilationFailed; + } + }, + .Signal, .Stopped, .Unknown => { + log.err( + "the following command terminated unexpectedly:\n{s}", + .{try Build.Step.allocPrintCmd(arena, null, argv.items)}, + ); + return error.WasmCompilationFailed; + }, + } + + if (result_error_bundle.errorMessageCount() > 0) { + const color = std.zig.Color.auto; + result_error_bundle.renderToStdErr(color.renderOptions()); + log.err("the following command failed with {d} compilation errors:\n{s}", .{ + result_error_bundle.errorMessageCount(), + try Build.Step.allocPrintCmd(arena, null, argv.items), + }); + return error.WasmCompilationFailed; + } + + return result orelse { + log.err("child process failed to report result\n{s}", .{ + try Build.Step.allocPrintCmd(arena, null, argv.items), + }); + return error.WasmCompilationFailed; + }; +} + +fn sendMessage(file: std.fs.File, tag: std.zig.Client.Message.Tag) !void { + const header: std.zig.Client.Message.Header = .{ + .tag = tag, + .bytes_len = 0, + }; + try file.writeAll(std.mem.asBytes(&header)); +} + +fn serveWebSocket(ws: *WebServer, web_socket: *std.http.WebSocket) !void { + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + // On first connection, the client needs all the coverage information + // so that subsequent updates can contain only the updated bits. + var prev_unique_runs: usize = 0; + var prev_entry_points: usize = 0; + try sendCoverageContext(ws, web_socket, &prev_unique_runs, &prev_entry_points); + while (true) { + ws.coverage_condition.timedWait(&ws.coverage_mutex, std.time.ns_per_ms * 500) catch {}; + try sendCoverageContext(ws, web_socket, &prev_unique_runs, &prev_entry_points); + } +} + +fn sendCoverageContext( + ws: *WebServer, + web_socket: *std.http.WebSocket, + prev_unique_runs: *usize, + prev_entry_points: *usize, +) !void { + const coverage_maps = ws.coverage_files.values(); + if (coverage_maps.len == 0) return; + // TODO: make each events URL correspond to one coverage map + const coverage_map = &coverage_maps[0]; + const cov_header: *const abi.SeenPcsHeader = @ptrCast(coverage_map.mapped_memory[0..@sizeOf(abi.SeenPcsHeader)]); + const seen_pcs = coverage_map.mapped_memory[@sizeOf(abi.SeenPcsHeader) + coverage_map.source_locations.len * @sizeOf(usize) ..]; + const n_runs = @atomicLoad(usize, &cov_header.n_runs, .monotonic); + const unique_runs = @atomicLoad(usize, &cov_header.unique_runs, .monotonic); + const lowest_stack = @atomicLoad(usize, &cov_header.lowest_stack, .monotonic); + if (prev_unique_runs.* != unique_runs) { + // There has been an update. + if (prev_unique_runs.* == 0) { + // We need to send initial context. + const header: abi.SourceIndexHeader = .{ + .flags = .{}, + .directories_len = @intCast(coverage_map.coverage.directories.entries.len), + .files_len = @intCast(coverage_map.coverage.files.entries.len), + .source_locations_len = @intCast(coverage_map.source_locations.len), + .string_bytes_len = @intCast(coverage_map.coverage.string_bytes.items.len), + }; + const iovecs: [5]std.posix.iovec_const = .{ + makeIov(std.mem.asBytes(&header)), + makeIov(std.mem.sliceAsBytes(coverage_map.coverage.directories.keys())), + makeIov(std.mem.sliceAsBytes(coverage_map.coverage.files.keys())), + makeIov(std.mem.sliceAsBytes(coverage_map.source_locations)), + makeIov(coverage_map.coverage.string_bytes.items), + }; + try web_socket.writeMessagev(&iovecs, .binary); + } + + const header: abi.CoverageUpdateHeader = .{ + .n_runs = n_runs, + .unique_runs = unique_runs, + .lowest_stack = lowest_stack, + }; + const iovecs: [2]std.posix.iovec_const = .{ + makeIov(std.mem.asBytes(&header)), + makeIov(seen_pcs), + }; + try web_socket.writeMessagev(&iovecs, .binary); + + prev_unique_runs.* = unique_runs; + } + + if (prev_entry_points.* != coverage_map.entry_points.items.len) { + const header: abi.EntryPointHeader = .{ + .flags = .{ + .locs_len = @intCast(coverage_map.entry_points.items.len), + }, + }; + const iovecs: [2]std.posix.iovec_const = .{ + makeIov(std.mem.asBytes(&header)), + makeIov(std.mem.sliceAsBytes(coverage_map.entry_points.items)), + }; + try web_socket.writeMessagev(&iovecs, .binary); + + prev_entry_points.* = coverage_map.entry_points.items.len; + } +} + +fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void { + const gpa = ws.gpa; + + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + var send_buffer: [0x4000]u8 = undefined; + var response = request.respondStreaming(.{ + .send_buffer = &send_buffer, + .respond_options = .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = "application/x-tar" }, + cache_control_header, + }, + }, + }); + const w = response.writer(); + + const DedupeTable = std.ArrayHashMapUnmanaged(Build.Cache.Path, void, Build.Cache.Path.TableAdapter, false); + var dedupe_table: DedupeTable = .{}; + defer dedupe_table.deinit(gpa); + + for (ws.fuzz_run_steps) |run_step| { + const compile_step_inputs = run_step.producer.?.step.inputs.table; + for (compile_step_inputs.keys(), compile_step_inputs.values()) |dir_path, *file_list| { + try dedupe_table.ensureUnusedCapacity(gpa, file_list.items.len); + for (file_list.items) |sub_path| { + // Special file "." means the entire directory. + if (std.mem.eql(u8, sub_path, ".")) continue; + const joined_path = try dir_path.join(arena, sub_path); + _ = dedupe_table.getOrPutAssumeCapacity(joined_path); + } + } + } + + const deduped_paths = dedupe_table.keys(); + const SortContext = struct { + pub fn lessThan(this: @This(), lhs: Build.Cache.Path, rhs: Build.Cache.Path) bool { + _ = this; + return switch (std.mem.order(u8, lhs.root_dir.path orelse ".", rhs.root_dir.path orelse ".")) { + .lt => true, + .gt => false, + .eq => std.mem.lessThan(u8, lhs.sub_path, rhs.sub_path), + }; + } + }; + std.mem.sortUnstable(Build.Cache.Path, deduped_paths, SortContext{}, SortContext.lessThan); + + var cwd_cache: ?[]const u8 = null; + + for (deduped_paths) |joined_path| { + var file = joined_path.root_dir.handle.openFile(joined_path.sub_path, .{}) catch |err| { + log.err("failed to open {}: {s}", .{ joined_path, @errorName(err) }); + continue; + }; + defer file.close(); + + const stat = file.stat() catch |err| { + log.err("failed to stat {}: {s}", .{ joined_path, @errorName(err) }); + continue; + }; + if (stat.kind != .file) + continue; + + const padding = p: { + const remainder = stat.size % 512; + break :p if (remainder > 0) 512 - remainder else 0; + }; + + var file_header = std.tar.output.Header.init(); + file_header.typeflag = .regular; + try file_header.setPath( + joined_path.root_dir.path orelse try memoizedCwd(arena, &cwd_cache), + joined_path.sub_path, + ); + try file_header.setSize(stat.size); + try file_header.updateChecksum(); + try w.writeAll(std.mem.asBytes(&file_header)); + try w.writeFile(file); + try w.writeByteNTimes(0, padding); + } + + // intentionally omitting the pointless trailer + //try w.writeByteNTimes(0, 512 * 2); + try response.end(); +} + +fn memoizedCwd(arena: Allocator, opt_ptr: *?[]const u8) ![]const u8 { + if (opt_ptr.*) |cached| return cached; + const result = try std.process.getCwdAlloc(arena); + opt_ptr.* = result; + return result; +} + +const cache_control_header: std.http.Header = .{ + .name = "cache-control", + .value = "max-age=0, must-revalidate", +}; + +pub fn coverageRun(ws: *WebServer) void { + ws.mutex.lock(); + defer ws.mutex.unlock(); + + while (true) { + ws.condition.wait(&ws.mutex); + for (ws.msg_queue.items) |msg| switch (msg) { + .coverage => |coverage| prepareTables(ws, coverage.run, coverage.id) catch |err| switch (err) { + error.AlreadyReported => continue, + else => |e| log.err("failed to prepare code coverage tables: {s}", .{@errorName(e)}), + }, + .entry_point => |entry_point| addEntryPoint(ws, entry_point.coverage_id, entry_point.addr) catch |err| switch (err) { + error.AlreadyReported => continue, + else => |e| log.err("failed to prepare code coverage tables: {s}", .{@errorName(e)}), + }, + }; + ws.msg_queue.clearRetainingCapacity(); + } +} + +fn prepareTables( + ws: *WebServer, + run_step: *Step.Run, + coverage_id: u64, +) error{ OutOfMemory, AlreadyReported }!void { + const gpa = ws.gpa; + + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + const gop = try ws.coverage_files.getOrPut(gpa, coverage_id); + if (gop.found_existing) { + // We are fuzzing the same executable with multiple threads. + // Perhaps the same unit test; perhaps a different one. In any + // case, since the coverage file is the same, we only have to + // notice changes to that one file in order to learn coverage for + // this particular executable. + return; + } + errdefer _ = ws.coverage_files.pop(); + + gop.value_ptr.* = .{ + .coverage = std.debug.Coverage.init, + .mapped_memory = undefined, // populated below + .source_locations = undefined, // populated below + .entry_points = .{}, + }; + errdefer gop.value_ptr.coverage.deinit(gpa); + + const rebuilt_exe_path: Build.Cache.Path = .{ + .root_dir = Build.Cache.Directory.cwd(), + .sub_path = run_step.rebuilt_executable.?, + }; + var debug_info = std.debug.Info.load(gpa, rebuilt_exe_path, &gop.value_ptr.coverage) catch |err| { + log.err("step '{s}': failed to load debug information for '{}': {s}", .{ + run_step.step.name, rebuilt_exe_path, @errorName(err), + }); + return error.AlreadyReported; + }; + defer debug_info.deinit(gpa); + + const coverage_file_path: Build.Cache.Path = .{ + .root_dir = run_step.step.owner.cache_root, + .sub_path = "v/" ++ std.fmt.hex(coverage_id), + }; + var coverage_file = coverage_file_path.root_dir.handle.openFile(coverage_file_path.sub_path, .{}) catch |err| { + log.err("step '{s}': failed to load coverage file '{}': {s}", .{ + run_step.step.name, coverage_file_path, @errorName(err), + }); + return error.AlreadyReported; + }; + defer coverage_file.close(); + + const file_size = coverage_file.getEndPos() catch |err| { + log.err("unable to check len of coverage file '{}': {s}", .{ coverage_file_path, @errorName(err) }); + return error.AlreadyReported; + }; + + const mapped_memory = std.posix.mmap( + null, + file_size, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + coverage_file.handle, + 0, + ) catch |err| { + log.err("failed to map coverage file '{}': {s}", .{ coverage_file_path, @errorName(err) }); + return error.AlreadyReported; + }; + gop.value_ptr.mapped_memory = mapped_memory; + + const header: *const abi.SeenPcsHeader = @ptrCast(mapped_memory[0..@sizeOf(abi.SeenPcsHeader)]); + const pcs_bytes = mapped_memory[@sizeOf(abi.SeenPcsHeader)..][0 .. header.pcs_len * @sizeOf(usize)]; + const pcs = std.mem.bytesAsSlice(usize, pcs_bytes); + const source_locations = try gpa.alloc(Coverage.SourceLocation, pcs.len); + errdefer gpa.free(source_locations); + debug_info.resolveAddresses(gpa, pcs, source_locations) catch |err| { + log.err("failed to resolve addresses to source locations: {s}", .{@errorName(err)}); + return error.AlreadyReported; + }; + gop.value_ptr.source_locations = source_locations; + + ws.coverage_condition.broadcast(); +} + +fn addEntryPoint(ws: *WebServer, coverage_id: u64, addr: u64) error{ AlreadyReported, OutOfMemory }!void { + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + const coverage_map = ws.coverage_files.getPtr(coverage_id).?; + const ptr = coverage_map.mapped_memory; + const pcs_bytes = ptr[@sizeOf(abi.SeenPcsHeader)..][0 .. coverage_map.source_locations.len * @sizeOf(usize)]; + const pcs: []const usize = @alignCast(std.mem.bytesAsSlice(usize, pcs_bytes)); + const index = std.sort.upperBound(usize, pcs, addr, struct { + fn order(context: usize, item: usize) std.math.Order { + return std.math.order(item, context); + } + }.order); + if (index >= pcs.len) { + log.err("unable to find unit test entry address 0x{x} in source locations (range: 0x{x} to 0x{x})", .{ + addr, pcs[0], pcs[pcs.len - 1], + }); + return error.AlreadyReported; + } + if (false) { + const sl = coverage_map.source_locations[index]; + const file_name = coverage_map.coverage.stringAt(coverage_map.coverage.fileAt(sl.file).basename); + log.debug("server found entry point {s}:{d}:{d}", .{ + file_name, sl.line, sl.column, + }); + } + const gpa = ws.gpa; + try coverage_map.entry_points.append(gpa, @intCast(index)); +} + +fn makeIov(s: []const u8) std.posix.iovec_const { + return .{ + .base = s.ptr, + .len = s.len, + }; +} diff --git a/lib/std/Build/Fuzz/abi.zig b/lib/std/Build/Fuzz/abi.zig new file mode 100644 index 0000000000..f385f9b08a --- /dev/null +++ b/lib/std/Build/Fuzz/abi.zig @@ -0,0 +1,69 @@ +//! This file is shared among Zig code running in wildly different contexts: +//! libfuzzer, compiled alongside unit tests, the build runner, running on the +//! host computer, and the fuzzing web interface webassembly code running in +//! the browser. All of these components interface to some degree via an ABI. + +/// libfuzzer uses this and its usize is the one that counts. To match the ABI, +/// make the ints be the size of the target used with libfuzzer. +/// +/// Trailing: +/// * pc_addr: usize for each pcs_len +/// * 1 bit per pc_addr, usize elements +pub const SeenPcsHeader = extern struct { + n_runs: usize, + unique_runs: usize, + pcs_len: usize, + lowest_stack: usize, +}; + +pub const ToClientTag = enum(u8) { + source_index, + coverage_update, + entry_points, + _, +}; + +/// Sent to the fuzzer web client on first connection to the websocket URL. +/// +/// Trailing: +/// * std.debug.Coverage.String for each directories_len +/// * std.debug.Coverage.File for each files_len +/// * std.debug.Coverage.SourceLocation for each source_locations_len +/// * u8 for each string_bytes_len +pub const SourceIndexHeader = extern struct { + flags: Flags, + directories_len: u32, + files_len: u32, + source_locations_len: u32, + string_bytes_len: u32, + + pub const Flags = packed struct(u32) { + tag: ToClientTag = .source_index, + _: u24 = 0, + }; +}; + +/// Sent to the fuzzer web client whenever the set of covered source locations +/// changes. +/// +/// Trailing: +/// * one bit per source_locations_len, contained in u8 elements +pub const CoverageUpdateHeader = extern struct { + tag: ToClientTag = .coverage_update, + n_runs: u64 align(1), + unique_runs: u64 align(1), + lowest_stack: u64 align(1), +}; + +/// Sent to the fuzzer web client when the set of entry points is updated. +/// +/// Trailing: +/// * one u32 index of source_locations per locs_len +pub const EntryPointHeader = extern struct { + flags: Flags, + + pub const Flags = packed struct(u32) { + tag: ToClientTag = .entry_points, + locs_len: u24, + }; +}; diff --git a/lib/std/Build/Step.zig b/lib/std/Build/Step.zig index 8f3236d867..47a6e49a82 100644 --- a/lib/std/Build/Step.zig +++ b/lib/std/Build/Step.zig @@ -559,7 +559,8 @@ fn zigProcessUpdate(s: *Step, zp: *ZigProcess, watch: bool) !?[]const u8 { }, .zig_lib => zl: { if (s.cast(Step.Compile)) |compile| { - if (compile.zig_lib_dir) |lp| { + if (compile.zig_lib_dir) |zig_lib_dir| { + const lp = try zig_lib_dir.join(arena, sub_path); try addWatchInput(s, lp); break :zl; } diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index c2d25cd82c..5d9ebce9aa 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -205,6 +205,7 @@ pub fn enableTestRunnerMode(run: *Run) void { run.stdio = .zig_test; run.addArgs(&.{ std.fmt.allocPrint(arena, "--seed=0x{x}", .{b.graph.random_seed}) catch @panic("OOM"), + std.fmt.allocPrint(arena, "--cache-dir={s}", .{b.cache_root.path orelse ""}) catch @panic("OOM"), "--listen=-", }); } @@ -845,7 +846,12 @@ fn make(step: *Step, options: Step.MakeOptions) !void { ); } -pub fn rerunInFuzzMode(run: *Run, unit_test_index: u32, prog_node: std.Progress.Node) !void { +pub fn rerunInFuzzMode( + run: *Run, + web_server: *std.Build.Fuzz.WebServer, + unit_test_index: u32, + prog_node: std.Progress.Node, +) !void { const step = &run.step; const b = step.owner; const arena = b.allocator; @@ -877,7 +883,10 @@ pub fn rerunInFuzzMode(run: *Run, unit_test_index: u32, prog_node: std.Progress. const has_side_effects = false; const rand_int = std.crypto.random.int(u64); const tmp_dir_path = "tmp" ++ fs.path.sep_str ++ std.fmt.hex(rand_int); - try runCommand(run, argv_list.items, has_side_effects, tmp_dir_path, prog_node, unit_test_index); + try runCommand(run, argv_list.items, has_side_effects, tmp_dir_path, prog_node, .{ + .unit_test_index = unit_test_index, + .web_server = web_server, + }); } fn populateGeneratedPaths( @@ -952,13 +961,18 @@ fn termMatches(expected: ?std.process.Child.Term, actual: std.process.Child.Term }; } +const FuzzContext = struct { + web_server: *std.Build.Fuzz.WebServer, + unit_test_index: u32, +}; + fn runCommand( run: *Run, argv: []const []const u8, has_side_effects: bool, output_dir_path: []const u8, prog_node: std.Progress.Node, - fuzz_unit_test_index: ?u32, + fuzz_context: ?FuzzContext, ) !void { const step = &run.step; const b = step.owner; @@ -977,7 +991,7 @@ fn runCommand( var interp_argv = std.ArrayList([]const u8).init(b.allocator); defer interp_argv.deinit(); - const result = spawnChildAndCollect(run, argv, has_side_effects, prog_node, fuzz_unit_test_index) catch |err| term: { + const result = spawnChildAndCollect(run, argv, has_side_effects, prog_node, fuzz_context) catch |err| term: { // InvalidExe: cpu arch mismatch // FileNotFound: can happen with a wrong dynamic linker path if (err == error.InvalidExe or err == error.FileNotFound) interpret: { @@ -1113,7 +1127,7 @@ fn runCommand( try Step.handleVerbose2(step.owner, cwd, run.env_map, interp_argv.items); - break :term spawnChildAndCollect(run, interp_argv.items, has_side_effects, prog_node, fuzz_unit_test_index) catch |e| { + break :term spawnChildAndCollect(run, interp_argv.items, has_side_effects, prog_node, fuzz_context) catch |e| { if (!run.failing_to_execute_foreign_is_an_error) return error.MakeSkipped; return step.fail("unable to spawn interpreter {s}: {s}", .{ @@ -1133,7 +1147,7 @@ fn runCommand( const final_argv = if (interp_argv.items.len == 0) argv else interp_argv.items; - if (fuzz_unit_test_index != null) { + if (fuzz_context != null) { try step.handleChildProcessTerm(result.term, cwd, final_argv); return; } @@ -1298,12 +1312,12 @@ fn spawnChildAndCollect( argv: []const []const u8, has_side_effects: bool, prog_node: std.Progress.Node, - fuzz_unit_test_index: ?u32, + fuzz_context: ?FuzzContext, ) !ChildProcResult { const b = run.step.owner; const arena = b.allocator; - if (fuzz_unit_test_index != null) { + if (fuzz_context != null) { assert(!has_side_effects); assert(run.stdio == .zig_test); } @@ -1357,7 +1371,7 @@ fn spawnChildAndCollect( var timer = try std.time.Timer.start(); const result = if (run.stdio == .zig_test) - evalZigTest(run, &child, prog_node, fuzz_unit_test_index) + evalZigTest(run, &child, prog_node, fuzz_context) else evalGeneric(run, &child); @@ -1383,7 +1397,7 @@ fn evalZigTest( run: *Run, child: *std.process.Child, prog_node: std.Progress.Node, - fuzz_unit_test_index: ?u32, + fuzz_context: ?FuzzContext, ) !StdIoResult { const gpa = run.step.owner.allocator; const arena = run.step.owner.allocator; @@ -1394,8 +1408,8 @@ fn evalZigTest( }); defer poller.deinit(); - if (fuzz_unit_test_index) |index| { - try sendRunTestMessage(child.stdin.?, .start_fuzzing, index); + if (fuzz_context) |fuzz| { + try sendRunTestMessage(child.stdin.?, .start_fuzzing, fuzz.unit_test_index); } else { run.fuzz_tests.clearRetainingCapacity(); try sendMessage(child.stdin.?, .query_test_metadata); @@ -1413,6 +1427,7 @@ fn evalZigTest( var log_err_count: u32 = 0; var metadata: ?TestMetadata = null; + var coverage_id: ?u64 = null; var sub_prog_node: ?std.Progress.Node = null; defer if (sub_prog_node) |n| n.end(); @@ -1437,7 +1452,7 @@ fn evalZigTest( } }, .test_metadata => { - assert(fuzz_unit_test_index == null); + assert(fuzz_context == null); const TmHdr = std.zig.Server.Message.TestMetadata; const tm_hdr = @as(*align(1) const TmHdr, @ptrCast(body)); test_count = tm_hdr.tests_len; @@ -1466,7 +1481,7 @@ fn evalZigTest( try requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node); }, .test_results => { - assert(fuzz_unit_test_index == null); + assert(fuzz_context == null); const md = metadata.?; const TrHdr = std.zig.Server.Message.TestResults; @@ -1500,6 +1515,34 @@ fn evalZigTest( try requestNextTest(child.stdin.?, &metadata.?, &sub_prog_node); }, + .coverage_id => { + const web_server = fuzz_context.?.web_server; + const msg_ptr: *align(1) const u64 = @ptrCast(body); + coverage_id = msg_ptr.*; + { + web_server.mutex.lock(); + defer web_server.mutex.unlock(); + try web_server.msg_queue.append(web_server.gpa, .{ .coverage = .{ + .id = coverage_id.?, + .run = run, + } }); + web_server.condition.signal(); + } + }, + .fuzz_start_addr => { + const web_server = fuzz_context.?.web_server; + const msg_ptr: *align(1) const u64 = @ptrCast(body); + const addr = msg_ptr.*; + { + web_server.mutex.lock(); + defer web_server.mutex.unlock(); + try web_server.msg_queue.append(web_server.gpa, .{ .entry_point = .{ + .addr = addr, + .coverage_id = coverage_id.?, + } }); + web_server.condition.signal(); + } + }, else => {}, // ignore other messages } diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 4d3437f665..a3a8a533ee 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -14,9 +14,12 @@ const native_os = builtin.os.tag; const native_endian = native_arch.endian(); pub const MemoryAccessor = @import("debug/MemoryAccessor.zig"); +pub const FixedBufferReader = @import("debug/FixedBufferReader.zig"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); +pub const Info = @import("debug/Info.zig"); +pub const Coverage = @import("debug/Coverage.zig"); /// Unresolved source locations can be represented with a single `usize` that /// corresponds to a virtual memory address of the program counter. Combined @@ -26,6 +29,18 @@ pub const SourceLocation = struct { line: u64, column: u64, file_name: []const u8, + + pub const invalid: SourceLocation = .{ + .line = 0, + .column = 0, + .file_name = &.{}, + }; +}; + +pub const Symbol = struct { + name: []const u8 = "???", + compile_unit_name: []const u8 = "???", + source_location: ?SourceLocation = null, }; /// Deprecated because it returns the optimization mode of the standard @@ -748,7 +763,7 @@ pub fn writeCurrentStackTrace( // an overflow. We do not need to signal `StackIterator` as it will correctly detect this // condition on the subsequent iteration and return `null` thus terminating the loop. // same behaviour for x86-windows-msvc - const address = if (return_address == 0) return_address else return_address - 1; + const address = return_address -| 1; try printSourceAtAddress(debug_info, out_stream, address, tty_config); } else printLastUnwindError(&it, debug_info, out_stream, tty_config); } @@ -871,13 +886,13 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), else => return err, }; - defer symbol_info.deinit(debug_info.allocator); + defer if (symbol_info.source_location) |sl| debug_info.allocator.free(sl.file_name); return printLineInfo( out_stream, - symbol_info.line_info, + symbol_info.source_location, address, - symbol_info.symbol_name, + symbol_info.name, symbol_info.compile_unit_name, tty_config, printLineFromFileAnyOs, @@ -886,7 +901,7 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: fn printLineInfo( out_stream: anytype, - line_info: ?SourceLocation, + source_location: ?SourceLocation, address: usize, symbol_name: []const u8, compile_unit_name: []const u8, @@ -896,8 +911,8 @@ fn printLineInfo( nosuspend { try tty_config.setColor(out_stream, .bold); - if (line_info) |*li| { - try out_stream.print("{s}:{d}:{d}", .{ li.file_name, li.line, li.column }); + if (source_location) |*sl| { + try out_stream.print("{s}:{d}:{d}", .{ sl.file_name, sl.line, sl.column }); } else { try out_stream.writeAll("???:?:?"); } @@ -910,11 +925,11 @@ fn printLineInfo( try out_stream.writeAll("\n"); // Show the matching source code line if possible - if (line_info) |li| { - if (printLineFromFile(out_stream, li)) { - if (li.column > 0) { + if (source_location) |sl| { + if (printLineFromFile(out_stream, sl)) { + if (sl.column > 0) { // The caret already takes one char - const space_needed = @as(usize, @intCast(li.column - 1)); + const space_needed = @as(usize, @intCast(sl.column - 1)); try out_stream.writeByteNTimes(' ', space_needed); try tty_config.setColor(out_stream, .green); @@ -932,10 +947,10 @@ fn printLineInfo( } } -fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void { +fn printLineFromFileAnyOs(out_stream: anytype, source_location: SourceLocation) !void { // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. - var f = try fs.cwd().openFile(line_info.file_name, .{}); + var f = try fs.cwd().openFile(source_location.file_name, .{}); defer f.close(); // TODO fstat and make sure that the file has the correct size @@ -944,7 +959,7 @@ fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void const line_start = seek: { var current_line_start: usize = 0; var next_line: usize = 1; - while (next_line != line_info.line) { + while (next_line != source_location.line) { const slice = buf[current_line_start..amt_read]; if (mem.indexOfScalar(u8, slice, '\n')) |pos| { next_line += 1; @@ -1481,99 +1496,6 @@ pub const SafetyLock = struct { } }; -/// Deprecated. Don't use this, just read from your memory directly. -/// -/// This only exists because someone was too lazy to rework logic that used to -/// operate on an open file to operate on a memory buffer instead. -pub const DeprecatedFixedBufferReader = struct { - buf: []const u8, - pos: usize = 0, - endian: std.builtin.Endian, - - pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; - - pub fn seekTo(fbr: *DeprecatedFixedBufferReader, pos: u64) Error!void { - if (pos > fbr.buf.len) return error.EndOfBuffer; - fbr.pos = @intCast(pos); - } - - pub fn seekForward(fbr: *DeprecatedFixedBufferReader, amount: u64) Error!void { - if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; - fbr.pos += @intCast(amount); - } - - pub inline fn readByte(fbr: *DeprecatedFixedBufferReader) Error!u8 { - if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; - defer fbr.pos += 1; - return fbr.buf[fbr.pos]; - } - - pub fn readByteSigned(fbr: *DeprecatedFixedBufferReader) Error!i8 { - return @bitCast(try fbr.readByte()); - } - - pub fn readInt(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - const size = @divExact(@typeInfo(T).Int.bits, 8); - if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; - defer fbr.pos += size; - return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); - } - - pub fn readIntChecked( - fbr: *DeprecatedFixedBufferReader, - comptime T: type, - ma: *MemoryAccessor, - ) Error!T { - if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) - return error.InvalidBuffer; - - return fbr.readInt(T); - } - - pub fn readUleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - return std.leb.readUleb128(T, fbr); - } - - pub fn readIleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - return std.leb.readIleb128(T, fbr); - } - - pub fn readAddress(fbr: *DeprecatedFixedBufferReader, format: std.dwarf.Format) Error!u64 { - return switch (format) { - .@"32" => try fbr.readInt(u32), - .@"64" => try fbr.readInt(u64), - }; - } - - pub fn readAddressChecked( - fbr: *DeprecatedFixedBufferReader, - format: std.dwarf.Format, - ma: *MemoryAccessor, - ) Error!u64 { - return switch (format) { - .@"32" => try fbr.readIntChecked(u32, ma), - .@"64" => try fbr.readIntChecked(u64, ma), - }; - } - - pub fn readBytes(fbr: *DeprecatedFixedBufferReader, len: usize) Error![]const u8 { - if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; - defer fbr.pos += len; - return fbr.buf[fbr.pos..][0..len]; - } - - pub fn readBytesTo(fbr: *DeprecatedFixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { - const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ - u8, - fbr.buf, - fbr.pos, - sentinel, - }) orelse return error.EndOfBuffer; - defer fbr.pos = end + 1; - return fbr.buf[fbr.pos..end :sentinel]; - } -}; - /// Detect whether the program is being executed in the Valgrind virtual machine. /// /// When Valgrind integrations are disabled, this returns comptime-known false. @@ -1587,6 +1509,7 @@ pub inline fn inValgrind() bool { test { _ = &Dwarf; _ = &MemoryAccessor; + _ = &FixedBufferReader; _ = &Pdb; _ = &SelfInfo; _ = &dumpHex; diff --git a/lib/std/debug/Coverage.zig b/lib/std/debug/Coverage.zig new file mode 100644 index 0000000000..f341efaffb --- /dev/null +++ b/lib/std/debug/Coverage.zig @@ -0,0 +1,244 @@ +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const Hash = std.hash.Wyhash; +const Dwarf = std.debug.Dwarf; +const assert = std.debug.assert; + +const Coverage = @This(); + +/// Provides a globally-scoped integer index for directories. +/// +/// As opposed to, for example, a directory index that is compilation-unit +/// scoped inside a single ELF module. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +directories: std.ArrayHashMapUnmanaged(String, void, String.MapContext, false), +/// Provides a globally-scoped integer index for files. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false), +string_bytes: std.ArrayListUnmanaged(u8), +/// Protects the other fields. +mutex: std.Thread.Mutex, + +pub const init: Coverage = .{ + .directories = .{}, + .files = .{}, + .mutex = .{}, + .string_bytes = .{}, +}; + +pub const String = enum(u32) { + _, + + pub const MapContext = struct { + string_bytes: []const u8, + + pub fn eql(self: @This(), a: String, b: String, b_index: usize) bool { + _ = b_index; + const a_slice = span(self.string_bytes[@intFromEnum(a)..]); + const b_slice = span(self.string_bytes[@intFromEnum(b)..]); + return std.mem.eql(u8, a_slice, b_slice); + } + + pub fn hash(self: @This(), a: String) u32 { + return @truncate(Hash.hash(0, span(self.string_bytes[@intFromEnum(a)..]))); + } + }; + + pub const SliceAdapter = struct { + string_bytes: []const u8, + + pub fn eql(self: @This(), a_slice: []const u8, b: String, b_index: usize) bool { + _ = b_index; + const b_slice = span(self.string_bytes[@intFromEnum(b)..]); + return std.mem.eql(u8, a_slice, b_slice); + } + pub fn hash(self: @This(), a: []const u8) u32 { + _ = self; + return @truncate(Hash.hash(0, a)); + } + }; +}; + +pub const SourceLocation = extern struct { + file: File.Index, + line: u32, + column: u32, + + pub const invalid: SourceLocation = .{ + .file = .invalid, + .line = 0, + .column = 0, + }; +}; + +pub const File = extern struct { + directory_index: u32, + basename: String, + + pub const Index = enum(u32) { + invalid = std.math.maxInt(u32), + _, + }; + + pub const MapContext = struct { + string_bytes: []const u8, + + pub fn hash(self: MapContext, a: File) u32 { + const a_basename = span(self.string_bytes[@intFromEnum(a.basename)..]); + return @truncate(Hash.hash(a.directory_index, a_basename)); + } + + pub fn eql(self: MapContext, a: File, b: File, b_index: usize) bool { + _ = b_index; + if (a.directory_index != b.directory_index) return false; + const a_basename = span(self.string_bytes[@intFromEnum(a.basename)..]); + const b_basename = span(self.string_bytes[@intFromEnum(b.basename)..]); + return std.mem.eql(u8, a_basename, b_basename); + } + }; + + pub const SliceAdapter = struct { + string_bytes: []const u8, + + pub const Entry = struct { + directory_index: u32, + basename: []const u8, + }; + + pub fn hash(self: @This(), a: Entry) u32 { + _ = self; + return @truncate(Hash.hash(a.directory_index, a.basename)); + } + + pub fn eql(self: @This(), a: Entry, b: File, b_index: usize) bool { + _ = b_index; + if (a.directory_index != b.directory_index) return false; + const b_basename = span(self.string_bytes[@intFromEnum(b.basename)..]); + return std.mem.eql(u8, a.basename, b_basename); + } + }; +}; + +pub fn deinit(cov: *Coverage, gpa: Allocator) void { + cov.directories.deinit(gpa); + cov.files.deinit(gpa); + cov.string_bytes.deinit(gpa); + cov.* = undefined; +} + +pub fn fileAt(cov: *Coverage, index: File.Index) *File { + return &cov.files.keys()[@intFromEnum(index)]; +} + +pub fn stringAt(cov: *Coverage, index: String) [:0]const u8 { + return span(cov.string_bytes.items[@intFromEnum(index)..]); +} + +pub const ResolveAddressesDwarfError = Dwarf.ScanError; + +pub fn resolveAddressesDwarf( + cov: *Coverage, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []SourceLocation, + d: *Dwarf, +) ResolveAddressesDwarfError!void { + assert(sorted_pc_addrs.len == output.len); + assert(d.compile_units_sorted); + + var cu_i: usize = 0; + var line_table_i: usize = 0; + var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0]; + var range = cu.pc_range.?; + // Protects directories and files tables from other threads. + cov.mutex.lock(); + defer cov.mutex.unlock(); + next_pc: for (sorted_pc_addrs, output) |pc, *out| { + while (pc >= range.end) { + cu_i += 1; + if (cu_i >= d.compile_unit_list.items.len) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + range = cu.pc_range orelse { + out.* = SourceLocation.invalid; + continue :next_pc; + }; + } + if (pc < range.start) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + if (line_table_i == 0) { + line_table_i = 1; + cov.mutex.unlock(); + defer cov.mutex.lock(); + d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + out.* = SourceLocation.invalid; + cu_i += 1; + if (cu_i < d.compile_unit_list.items.len) { + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + if (cu.pc_range) |r| range = r; + } + continue :next_pc; + }, + else => |e| return e, + }; + } + const slc = &cu.src_loc_cache.?; + const table_addrs = slc.line_table.keys(); + while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1; + + const entry = slc.line_table.values()[line_table_i - 1]; + const corrected_file_index = entry.file - @intFromBool(slc.version < 5); + const file_entry = slc.files[corrected_file_index]; + const dir_path = slc.directories[file_entry.dir_index].path; + try cov.string_bytes.ensureUnusedCapacity(gpa, dir_path.len + file_entry.path.len + 2); + const dir_gop = try cov.directories.getOrPutContextAdapted(gpa, dir_path, String.SliceAdapter{ + .string_bytes = cov.string_bytes.items, + }, String.MapContext{ + .string_bytes = cov.string_bytes.items, + }); + if (!dir_gop.found_existing) + dir_gop.key_ptr.* = addStringAssumeCapacity(cov, dir_path); + const file_gop = try cov.files.getOrPutContextAdapted(gpa, File.SliceAdapter.Entry{ + .directory_index = @intCast(dir_gop.index), + .basename = file_entry.path, + }, File.SliceAdapter{ + .string_bytes = cov.string_bytes.items, + }, File.MapContext{ + .string_bytes = cov.string_bytes.items, + }); + if (!file_gop.found_existing) file_gop.key_ptr.* = .{ + .directory_index = @intCast(dir_gop.index), + .basename = addStringAssumeCapacity(cov, file_entry.path), + }; + out.* = .{ + .file = @enumFromInt(file_gop.index), + .line = entry.line, + .column = entry.column, + }; + } +} + +pub fn addStringAssumeCapacity(cov: *Coverage, s: []const u8) String { + const result: String = @enumFromInt(cov.string_bytes.items.len); + cov.string_bytes.appendSliceAssumeCapacity(s); + cov.string_bytes.appendAssumeCapacity(0); + return result; +} + +fn span(s: []const u8) [:0]const u8 { + return std.mem.sliceTo(@as([:0]const u8, @ptrCast(s)), 0); +} diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 991c731549..e3d4ab1a8f 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -12,6 +12,8 @@ const native_endian = builtin.cpu.arch.endian(); const std = @import("../std.zig"); const Allocator = std.mem.Allocator; +const elf = std.elf; +const mem = std.mem; const DW = std.dwarf; const AT = DW.AT; const EH = DW.EH; @@ -22,11 +24,10 @@ const UT = DW.UT; const assert = std.debug.assert; const cast = std.math.cast; const maxInt = std.math.maxInt; -const readInt = std.mem.readInt; const MemoryAccessor = std.debug.MemoryAccessor; +const Path = std.Build.Cache.Path; -/// Did I mention this is deprecated? -const DeprecatedFixedBufferReader = std.debug.DeprecatedFixedBufferReader; +const FixedBufferReader = std.debug.FixedBufferReader; const Dwarf = @This(); @@ -37,6 +38,7 @@ pub const call_frame = @import("Dwarf/call_frame.zig"); endian: std.builtin.Endian, sections: SectionArray = null_section_array, is_macho: bool, +compile_units_sorted: bool, // Filled later by the initializer abbrev_table_list: std.ArrayListUnmanaged(Abbrev.Table) = .{}, @@ -136,6 +138,34 @@ pub const CompileUnit = struct { rnglists_base: usize, loclists_base: usize, frame_base: ?*const FormValue, + + src_loc_cache: ?SrcLocCache, + + pub const SrcLocCache = struct { + line_table: LineTable, + directories: []const FileEntry, + files: []FileEntry, + version: u16, + + pub const LineTable = std.AutoArrayHashMapUnmanaged(u64, LineEntry); + + pub const LineEntry = struct { + line: u32, + column: u32, + /// Offset by 1 depending on whether Dwarf version is >= 5. + file: u32, + }; + + pub fn findSource(slc: *const SrcLocCache, address: u64) !LineEntry { + const index = std.sort.upperBound(u64, slc.line_table.keys(), address, struct { + fn order(context: u64, item: u64) std.math.Order { + return std.math.order(item, context); + } + }.order); + if (index == 0) return missing(); + return slc.line_table.values()[index - 1]; + } + }; }; pub const FormValue = union(enum) { @@ -252,13 +282,13 @@ pub const Die = struct { .@"32" => { const byte_offset = compile_unit.str_offsets_base + 4 * index; if (byte_offset + 4 > debug_str_offsets.len) return bad(); - const offset = readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); + const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); return getStringGeneric(opt_str, offset); }, .@"64" => { const byte_offset = compile_unit.str_offsets_base + 8 * index; if (byte_offset + 8 > debug_str_offsets.len) return bad(); - const offset = readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); + const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); return getStringGeneric(opt_str, offset); }, } @@ -325,7 +355,7 @@ pub const ExceptionFrameHeader = struct { var left: usize = 0; var len: usize = self.fde_count; - var fbr: DeprecatedFixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; + var fbr: FixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; while (len > 1) { const mid = left + len / 2; @@ -368,7 +398,7 @@ pub const ExceptionFrameHeader = struct { const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0 .. eh_frame_len orelse maxInt(u32)]; const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: DeprecatedFixedBufferReader = .{ + var eh_frame_fbr: FixedBufferReader = .{ .buf = eh_frame, .pos = fde_offset, .endian = native_endian, @@ -426,9 +456,9 @@ pub const EntryHeader = struct { } /// Reads a header for either an FDE or a CIE, then advances the fbr to the position after the trailing structure. - /// `fbr` must be a DeprecatedFixedBufferReader backed by either the .eh_frame or .debug_frame sections. + /// `fbr` must be a FixedBufferReader backed by either the .eh_frame or .debug_frame sections. pub fn read( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, opt_ma: ?*MemoryAccessor, dwarf_section: Section.Id, ) !EntryHeader { @@ -541,7 +571,7 @@ pub const CommonInformationEntry = struct { ) !CommonInformationEntry { if (addr_size_bytes > 8) return error.UnsupportedAddrSize; - var fbr: DeprecatedFixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; + var fbr: FixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; const version = try fbr.readByte(); switch (dwarf_section) { @@ -675,7 +705,7 @@ pub const FrameDescriptionEntry = struct { ) !FrameDescriptionEntry { if (addr_size_bytes > 8) return error.InvalidAddrSize; - var fbr: DeprecatedFixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; + var fbr: FixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset), @@ -721,12 +751,14 @@ const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); pub const SectionArray = [num_sections]?Section; pub const null_section_array = [_]?Section{null} ** num_sections; +pub const OpenError = ScanError; + /// Initialize DWARF info. The caller has the responsibility to initialize most /// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the /// main binary file (not the secondary debug info file). -pub fn open(di: *Dwarf, allocator: Allocator) !void { - try di.scanAllFunctions(allocator); - try di.scanAllCompileUnits(allocator); +pub fn open(d: *Dwarf, gpa: Allocator) OpenError!void { + try d.scanAllFunctions(gpa); + try d.scanAllCompileUnits(gpa); } const PcRange = struct { @@ -747,21 +779,26 @@ pub fn sectionVirtualOffset(di: Dwarf, dwarf_section: Section.Id, base_address: return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.virtualOffset(base_address) else null; } -pub fn deinit(di: *Dwarf, allocator: Allocator) void { +pub fn deinit(di: *Dwarf, gpa: Allocator) void { for (di.sections) |opt_section| { - if (opt_section) |s| if (s.owned) allocator.free(s.data); + if (opt_section) |s| if (s.owned) gpa.free(s.data); } for (di.abbrev_table_list.items) |*abbrev| { - abbrev.deinit(allocator); + abbrev.deinit(gpa); } - di.abbrev_table_list.deinit(allocator); + di.abbrev_table_list.deinit(gpa); for (di.compile_unit_list.items) |*cu| { - cu.die.deinit(allocator); + if (cu.src_loc_cache) |*slc| { + slc.line_table.deinit(gpa); + gpa.free(slc.directories); + gpa.free(slc.files); + } + cu.die.deinit(gpa); } - di.compile_unit_list.deinit(allocator); - di.func_list.deinit(allocator); - di.cie_map.deinit(allocator); - di.fde_list.deinit(allocator); + di.compile_unit_list.deinit(gpa); + di.func_list.deinit(gpa); + di.cie_map.deinit(gpa); + di.fde_list.deinit(gpa); di.* = undefined; } @@ -777,8 +814,13 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { return null; } -fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; +pub const ScanError = error{ + InvalidDebugInfo, + MissingDebugInfo, +} || Allocator.Error || std.debug.FixedBufferReader.Error; + +fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; while (this_unit_offset < fbr.buf.len) { @@ -837,6 +879,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { .rnglists_base = 0, .loclists_base = 0, .frame_base = null, + .src_loc_cache = null, }; while (true) { @@ -964,8 +1007,8 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { } } -fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; +fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; var attrs_buf = std.ArrayList(Die.Attr).init(allocator); @@ -1023,6 +1066,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { .rnglists_base = if (compile_unit_die.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0, .loclists_base = if (compile_unit_die.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0, .frame_base = compile_unit_die.getAttr(AT.frame_base), + .src_loc_cache = null, }; compile_unit.pc_range = x: { @@ -1052,12 +1096,45 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { } } +/// Populate missing PC ranges in compilation units, and then sort them by start address. +/// Does not guarantee pc_range to be non-null because there could be missing debug info. +pub fn sortCompileUnits(d: *Dwarf) ScanError!void { + assert(!d.compile_units_sorted); + + for (d.compile_unit_list.items) |*cu| { + if (cu.pc_range != null) continue; + const ranges_value = cu.die.getAttr(AT.ranges) orelse continue; + var iter = DebugRangeIterator.init(ranges_value, d, cu) catch continue; + var start: u64 = maxInt(u64); + var end: u64 = 0; + while (try iter.next()) |range| { + start = @min(start, range.start_addr); + end = @max(end, range.end_addr); + } + if (end != 0) cu.pc_range = .{ + .start = start, + .end = end, + }; + } + + std.mem.sortUnstable(CompileUnit, d.compile_unit_list.items, {}, struct { + pub fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool { + _ = ctx; + const a_range = a.pc_range orelse return false; + const b_range = b.pc_range orelse return true; + return a_range.start < b_range.start; + } + }.lessThan); + + d.compile_units_sorted = true; +} + const DebugRangeIterator = struct { base_address: u64, section_type: Section.Id, di: *const Dwarf, compile_unit: *const CompileUnit, - fbr: DeprecatedFixedBufferReader, + fbr: FixedBufferReader, pub fn init(ranges_value: *const FormValue, di: *const Dwarf, compile_unit: *const CompileUnit) !@This() { const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges; @@ -1070,13 +1147,13 @@ const DebugRangeIterator = struct { .@"32" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx)); if (offset_loc + 4 > debug_ranges.len) return bad(); - const offset = readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); + const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); break :off compile_unit.rnglists_base + offset; }, .@"64" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx)); if (offset_loc + 8 > debug_ranges.len) return bad(); - const offset = readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); + const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); break :off compile_unit.rnglists_base + offset; }, } @@ -1199,7 +1276,8 @@ const DebugRangeIterator = struct { } }; -pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUnit { +/// TODO: change this to binary searching the sorted compile unit list +pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { if (target_address >= range.start and target_address < range.end) return compile_unit; @@ -1231,7 +1309,7 @@ fn getAbbrevTable(di: *Dwarf, allocator: Allocator, abbrev_offset: u64) !*const } fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table { - var fbr: DeprecatedFixedBufferReader = .{ + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_abbrev).?, .pos = cast(usize, offset) orelse return bad(), .endian = di.endian, @@ -1283,11 +1361,11 @@ fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table } fn parseDie( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, attrs_buf: []Die.Attr, abbrev_table: *const Abbrev.Table, format: Format, -) !?Die { +) ScanError!?Die { const abbrev_code = try fbr.readUleb128(u64); if (abbrev_code == 0) return null; const table_entry = abbrev_table.get(abbrev_code) orelse return bad(); @@ -1309,34 +1387,36 @@ fn parseDie( }; } -pub fn getLineNumberInfo( - di: *Dwarf, - allocator: Allocator, - compile_unit: CompileUnit, - target_address: u64, -) !std.debug.SourceLocation { - const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); +fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) !CompileUnit.SrcLocCache { + const compile_unit_cwd = try compile_unit.die.getAttrString(d, AT.comp_dir, d.section(.debug_line_str), compile_unit.*); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ + .buf = d.section(.debug_line).?, + .endian = d.endian, + }; try fbr.seekTo(line_info_offset); const unit_header = try readUnitHeader(&fbr, null); if (unit_header.unit_length == 0) return missing(); + const next_offset = unit_header.header_length + unit_header.unit_length; const version = try fbr.readInt(u16); if (version < 2) return bad(); - var addr_size: u8 = switch (unit_header.format) { - .@"32" => 4, - .@"64" => 8, + const addr_size: u8, const seg_size: u8 = if (version >= 5) .{ + try fbr.readByte(), + try fbr.readByte(), + } else .{ + switch (unit_header.format) { + .@"32" => 4, + .@"64" => 8, + }, + 0, }; - var seg_size: u8 = 0; - if (version >= 5) { - addr_size = try fbr.readByte(); - seg_size = try fbr.readByte(); - } + _ = addr_size; + _ = seg_size; const prologue_length = try fbr.readAddress(unit_header.format); const prog_start_offset = fbr.pos + prologue_length; @@ -1345,8 +1425,8 @@ pub fn getLineNumberInfo( if (minimum_instruction_length == 0) return bad(); if (version >= 4) { - // maximum_operations_per_instruction - _ = try fbr.readByte(); + const maximum_operations_per_instruction = try fbr.readByte(); + _ = maximum_operations_per_instruction; } const default_is_stmt = (try fbr.readByte()) != 0; @@ -1359,18 +1439,18 @@ pub fn getLineNumberInfo( const standard_opcode_lengths = try fbr.readBytes(opcode_base - 1); - var include_directories = std.ArrayList(FileEntry).init(allocator); - defer include_directories.deinit(); - var file_entries = std.ArrayList(FileEntry).init(allocator); - defer file_entries.deinit(); + var directories: std.ArrayListUnmanaged(FileEntry) = .{}; + defer directories.deinit(gpa); + var file_entries: std.ArrayListUnmanaged(FileEntry) = .{}; + defer file_entries.deinit(gpa); if (version < 5) { - try include_directories.append(.{ .path = compile_unit_cwd }); + try directories.append(gpa, .{ .path = compile_unit_cwd }); while (true) { const dir = try fbr.readBytesTo(0); if (dir.len == 0) break; - try include_directories.append(.{ .path = dir }); + try directories.append(gpa, .{ .path = dir }); } while (true) { @@ -1379,7 +1459,7 @@ pub fn getLineNumberInfo( const dir_index = try fbr.readUleb128(u32); const mtime = try fbr.readUleb128(u64); const size = try fbr.readUleb128(u64); - try file_entries.append(.{ + try file_entries.append(gpa, .{ .path = file_name, .dir_index = dir_index, .mtime = mtime, @@ -1403,52 +1483,10 @@ pub fn getLineNumberInfo( } const directories_count = try fbr.readUleb128(usize); - try include_directories.ensureUnusedCapacity(directories_count); - { - var i: usize = 0; - while (i < directories_count) : (i += 1) { - var e: FileEntry = .{ .path = &.{} }; - for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { - const form_value = try parseFormValue( - &fbr, - ent_fmt.form_code, - unit_header.format, - null, - ); - switch (ent_fmt.content_type_code) { - DW.LNCT.path => e.path = try form_value.getString(di.*), - DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), - DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), - DW.LNCT.size => e.size = try form_value.getUInt(u64), - DW.LNCT.MD5 => e.md5 = switch (form_value) { - .data16 => |data16| data16.*, - else => return bad(), - }, - else => continue, - } - } - include_directories.appendAssumeCapacity(e); - } - } - } - - var file_ent_fmt_buf: [10]FileEntFmt = undefined; - const file_name_entry_format_count = try fbr.readByte(); - if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); - for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { - ent_fmt.* = .{ - .content_type_code = try fbr.readUleb128(u8), - .form_code = try fbr.readUleb128(u16), - }; - } - const file_names_count = try fbr.readUleb128(usize); - try file_entries.ensureUnusedCapacity(file_names_count); - { - var i: usize = 0; - while (i < file_names_count) : (i += 1) { - var e: FileEntry = .{ .path = &.{} }; - for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { + for (try directories.addManyAsSlice(gpa, directories_count)) |*e| { + e.* = .{ .path = &.{} }; + for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { const form_value = try parseFormValue( &fbr, ent_fmt.form_code, @@ -1456,7 +1494,7 @@ pub fn getLineNumberInfo( null, ); switch (ent_fmt.content_type_code) { - DW.LNCT.path => e.path = try form_value.getString(di.*), + DW.LNCT.path => e.path = try form_value.getString(d.*), DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), DW.LNCT.size => e.size = try form_value.getUInt(u64), @@ -1467,17 +1505,49 @@ pub fn getLineNumberInfo( else => continue, } } - file_entries.appendAssumeCapacity(e); + } + } + + var file_ent_fmt_buf: [10]FileEntFmt = undefined; + const file_name_entry_format_count = try fbr.readByte(); + if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { + ent_fmt.* = .{ + .content_type_code = try fbr.readUleb128(u8), + .form_code = try fbr.readUleb128(u16), + }; + } + + const file_names_count = try fbr.readUleb128(usize); + try file_entries.ensureUnusedCapacity(gpa, file_names_count); + + for (try file_entries.addManyAsSlice(gpa, file_names_count)) |*e| { + e.* = .{ .path = &.{} }; + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { + const form_value = try parseFormValue( + &fbr, + ent_fmt.form_code, + unit_header.format, + null, + ); + switch (ent_fmt.content_type_code) { + DW.LNCT.path => e.path = try form_value.getString(d.*), + DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), + DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), + DW.LNCT.size => e.size = try form_value.getUInt(u64), + DW.LNCT.MD5 => e.md5 = switch (form_value) { + .data16 => |data16| data16.*, + else => return bad(), + }, + else => continue, + } } } } - var prog = LineNumberProgram.init( - default_is_stmt, - include_directories.items, - target_address, - version, - ); + var prog = LineNumberProgram.init(default_is_stmt, version); + var line_table: CompileUnit.SrcLocCache.LineTable = .{}; + errdefer line_table.deinit(gpa); try fbr.seekTo(prog_start_offset); @@ -1493,7 +1563,7 @@ pub fn getLineNumberInfo( switch (sub_op) { DW.LNE.end_sequence => { prog.end_sequence = true; - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.reset(); }, DW.LNE.set_address => { @@ -1505,7 +1575,7 @@ pub fn getLineNumberInfo( const dir_index = try fbr.readUleb128(u32); const mtime = try fbr.readUleb128(u64); const size = try fbr.readUleb128(u64); - try file_entries.append(.{ + try file_entries.append(gpa, .{ .path = path, .dir_index = dir_index, .mtime = mtime, @@ -1521,12 +1591,12 @@ pub fn getLineNumberInfo( const inc_line = @as(i32, line_base) + @as(i32, adjusted_opcode % line_range); prog.line += inc_line; prog.address += inc_addr; - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.basic_block = false; } else { switch (opcode) { DW.LNS.copy => { - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.basic_block = false; }, DW.LNS.advance_pc => { @@ -1568,7 +1638,39 @@ pub fn getLineNumberInfo( } } - return missing(); + return .{ + .line_table = line_table, + .directories = try directories.toOwnedSlice(gpa), + .files = try file_entries.toOwnedSlice(gpa), + .version = version, + }; +} + +pub fn populateSrcLocCache(d: *Dwarf, gpa: Allocator, cu: *CompileUnit) ScanError!void { + if (cu.src_loc_cache != null) return; + cu.src_loc_cache = try runLineNumberProgram(d, gpa, cu); +} + +pub fn getLineNumberInfo( + d: *Dwarf, + gpa: Allocator, + compile_unit: *CompileUnit, + target_address: u64, +) !std.debug.SourceLocation { + try populateSrcLocCache(d, gpa, compile_unit); + const slc = &compile_unit.src_loc_cache.?; + const entry = try slc.findSource(target_address); + const file_index = entry.file - @intFromBool(slc.version < 5); + if (file_index >= slc.files.len) return bad(); + const file_entry = &slc.files[file_index]; + if (file_entry.dir_index >= slc.directories.len) return bad(); + const dir_name = slc.directories[file_entry.dir_index].path; + const file_name = try std.fs.path.join(gpa, &.{ dir_name, file_entry.path }); + return .{ + .line = entry.line, + .column = entry.column, + .file_name = file_name, + }; } fn getString(di: Dwarf, offset: u64) ![:0]const u8 { @@ -1588,7 +1690,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { // The header is 8 or 12 bytes depending on is_64. if (compile_unit.addr_base < 8) return bad(); - const version = readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); + const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); if (version != 5) return bad(); const addr_size = debug_addr[compile_unit.addr_base - 2]; @@ -1598,9 +1700,9 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { if (byte_offset + addr_size > debug_addr.len) return bad(); return switch (addr_size) { 1 => debug_addr[byte_offset], - 2 => readInt(u16, debug_addr[byte_offset..][0..2], di.endian), - 4 => readInt(u32, debug_addr[byte_offset..][0..4], di.endian), - 8 => readInt(u64, debug_addr[byte_offset..][0..8], di.endian), + 2 => mem.readInt(u16, debug_addr[byte_offset..][0..2], di.endian), + 4 => mem.readInt(u32, debug_addr[byte_offset..][0..4], di.endian), + 8 => mem.readInt(u64, debug_addr[byte_offset..][0..8], di.endian), else => bad(), }; } @@ -1611,7 +1713,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { /// of FDEs is built for binary searching during unwinding. pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: DeprecatedFixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; + var fbr: FixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; const version = try fbr.readByte(); if (version != 1) break :blk; @@ -1651,7 +1753,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; for (frame_sections) |frame_section| { if (di.section(frame_section)) |section_data| { - var fbr: DeprecatedFixedBufferReader = .{ .buf = section_data, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = section_data, .endian = di.endian }; while (fbr.pos < fbr.buf.len) { const entry_header = try EntryHeader.read(&fbr, null, frame_section); switch (entry_header.type) { @@ -1695,11 +1797,11 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) } fn parseFormValue( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, form_id: u64, format: Format, implicit_const: ?i64, -) anyerror!FormValue { +) ScanError!FormValue { return switch (form_id) { FORM.addr => .{ .addr = try fbr.readAddress(switch (@bitSizeOf(usize)) { 32 => .@"32", @@ -1783,17 +1885,6 @@ const LineNumberProgram = struct { end_sequence: bool, default_is_stmt: bool, - target_address: u64, - include_dirs: []const FileEntry, - - prev_valid: bool, - prev_address: u64, - prev_file: usize, - prev_line: i64, - prev_column: u64, - prev_is_stmt: bool, - prev_basic_block: bool, - prev_end_sequence: bool, // Reset the state machine following the DWARF specification pub fn reset(self: *LineNumberProgram) void { @@ -1804,24 +1895,10 @@ const LineNumberProgram = struct { self.is_stmt = self.default_is_stmt; self.basic_block = false; self.end_sequence = false; - // Invalidate all the remaining fields - self.prev_valid = false; - self.prev_address = 0; - self.prev_file = undefined; - self.prev_line = undefined; - self.prev_column = undefined; - self.prev_is_stmt = undefined; - self.prev_basic_block = undefined; - self.prev_end_sequence = undefined; } - pub fn init( - is_stmt: bool, - include_dirs: []const FileEntry, - target_address: u64, - version: u16, - ) LineNumberProgram { - return LineNumberProgram{ + pub fn init(is_stmt: bool, version: u16) LineNumberProgram { + return .{ .address = 0, .file = 1, .line = 1, @@ -1830,60 +1907,17 @@ const LineNumberProgram = struct { .is_stmt = is_stmt, .basic_block = false, .end_sequence = false, - .include_dirs = include_dirs, .default_is_stmt = is_stmt, - .target_address = target_address, - .prev_valid = false, - .prev_address = 0, - .prev_file = undefined, - .prev_line = undefined, - .prev_column = undefined, - .prev_is_stmt = undefined, - .prev_basic_block = undefined, - .prev_end_sequence = undefined, }; } - pub fn checkLineMatch( - self: *LineNumberProgram, - allocator: Allocator, - file_entries: []const FileEntry, - ) !?std.debug.SourceLocation { - if (self.prev_valid and - self.target_address >= self.prev_address and - self.target_address < self.address) - { - const file_index = if (self.version >= 5) self.prev_file else i: { - if (self.prev_file == 0) return missing(); - break :i self.prev_file - 1; - }; - - if (file_index >= file_entries.len) return bad(); - const file_entry = &file_entries[file_index]; - - if (file_entry.dir_index >= self.include_dirs.len) return bad(); - const dir_name = self.include_dirs[file_entry.dir_index].path; - - const file_name = try std.fs.path.join(allocator, &[_][]const u8{ - dir_name, file_entry.path, - }); - - return std.debug.SourceLocation{ - .line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0, - .column = self.prev_column, - .file_name = file_name, - }; - } - - self.prev_valid = true; - self.prev_address = self.address; - self.prev_file = self.file; - self.prev_line = self.line; - self.prev_column = self.column; - self.prev_is_stmt = self.is_stmt; - self.prev_basic_block = self.basic_block; - self.prev_end_sequence = self.end_sequence; - return null; + pub fn addRow(prog: *LineNumberProgram, gpa: Allocator, table: *CompileUnit.SrcLocCache.LineTable) !void { + if (prog.line == 0) return; // garbage data + try table.put(gpa, prog.address, .{ + .line = cast(u32, prog.line) orelse maxInt(u32), + .column = cast(u32, prog.column) orelse maxInt(u32), + .file = cast(u32, prog.file) orelse return bad(), + }); } }; @@ -1892,7 +1926,8 @@ const UnitHeader = struct { header_length: u4, unit_length: u64, }; -fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) !UnitHeader { + +fn readUnitHeader(fbr: *FixedBufferReader, opt_ma: ?*MemoryAccessor) ScanError!UnitHeader { return switch (try if (opt_ma) |ma| fbr.readIntChecked(u32, ma) else fbr.readInt(u32)) { 0...0xfffffff0 - 1 => |unit_length| .{ .format = .@"32", @@ -1957,7 +1992,7 @@ const EhPointerContext = struct { text_rel_base: ?u64 = null, function_rel_base: ?u64 = null, }; -fn readEhPointer(fbr: *DeprecatedFixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { +fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { if (enc == EH.PE.omit) return null; const value: union(enum) { @@ -2023,3 +2058,320 @@ fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); } } + +pub const ElfModule = struct { + base_address: usize, + dwarf: Dwarf, + mapped_memory: []align(std.mem.page_size) const u8, + external_mapped_memory: ?[]align(std.mem.page_size) const u8, + + pub fn deinit(self: *@This(), allocator: Allocator) void { + self.dwarf.deinit(allocator); + std.posix.munmap(self.mapped_memory); + if (self.external_mapped_memory) |m| std.posix.munmap(m); + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { + // Translate the VA into an address into this object + const relocated_address = address - self.base_address; + return self.dwarf.getSymbol(allocator, relocated_address); + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = allocator; + _ = address; + return &self.dwarf; + } + + pub const LoadError = error{ + InvalidDebugInfo, + MissingDebugInfo, + InvalidElfMagic, + InvalidElfVersion, + InvalidElfEndian, + /// TODO: implement this and then remove this error code + UnimplementedDwarfForeignEndian, + /// The debug info may be valid but this implementation uses memory + /// mapping which limits things to usize. If the target debug info is + /// 64-bit and host is 32-bit, there may be debug info that is not + /// supportable using this method. + Overflow, + + PermissionDenied, + LockedMemoryLimitExceeded, + MemoryMappingNotSupported, + } || Allocator.Error || std.fs.File.OpenError || OpenError; + + /// Reads debug info from an already mapped ELF file. + /// + /// If the required sections aren't present but a reference to external debug + /// info is, then this this function will recurse to attempt to load the debug + /// sections from an external file. + pub fn load( + gpa: Allocator, + mapped_mem: []align(std.mem.page_size) const u8, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(std.mem.page_size) const u8, + elf_filename: ?[]const u8, + ) LoadError!Dwarf.ElfModule { + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; + + const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); + if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; + if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; + + const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { + elf.ELFDATA2LSB => .little, + elf.ELFDATA2MSB => .big, + else => return error.InvalidElfEndian, + }; + if (endian != native_endian) return error.UnimplementedDwarfForeignEndian; + + const shoff = hdr.e_shoff; + const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); + const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[cast(usize, str_section_off) orelse return error.Overflow])); + const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; + const shdrs = @as( + [*]const elf.Shdr, + @ptrCast(@alignCast(&mapped_mem[shoff])), + )[0..hdr.e_shnum]; + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + + // Combine section list. This takes ownership over any owned sections from the parent scope. + for (parent_sections, §ions) |*parent, *section_elem| { + if (parent.*) |*p| { + section_elem.* = p.*; + p.owned = false; + } + } + errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); + + var separate_debug_filename: ?[]const u8 = null; + var separate_debug_crc: ?u32 = null; + + for (shdrs) |*shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); + + if (mem.eql(u8, name, ".gnu_debuglink")) { + const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); + const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); + const crc_bytes = gnu_debuglink[crc_offset..][0..4]; + separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); + separate_debug_filename = debug_filename; + continue; + } + + var section_index: ?usize = null; + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |sect, i| { + if (mem.eql(u8, "." ++ sect.name, name)) section_index = i; + } + if (section_index == null) continue; + if (sections[section_index.?] != null) continue; + + const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { + var section_stream = std.io.fixedBufferStream(section_bytes); + const section_reader = section_stream.reader(); + const chdr = section_reader.readStruct(elf.Chdr) catch continue; + if (chdr.ch_type != .ZLIB) continue; + + var zlib_stream = std.compress.zlib.decompressor(section_reader); + + const decompressed_section = try gpa.alloc(u8, chdr.ch_size); + errdefer gpa.free(decompressed_section); + + const read = zlib_stream.reader().readAll(decompressed_section) catch continue; + assert(read == decompressed_section.len); + + break :blk .{ + .data = decompressed_section, + .virtual_address = shdr.sh_addr, + .owned = true, + }; + } else .{ + .data = section_bytes, + .virtual_address = shdr.sh_addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + + // Attempt to load debug info from an external file + // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html + if (missing_debug_info) { + + // Only allow one level of debug info nesting + if (parent_mapped_mem) |_| { + return error.MissingDebugInfo; + } + + const global_debug_directories = [_][]const u8{ + "/usr/lib/debug", + }; + + // <global debug directory>/.build-id/<2-character id prefix>/<id remainder>.debug + if (build_id) |id| blk: { + if (id.len < 3) break :blk; + + // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice + const extension = ".debug"; + var id_prefix_buf: [2]u8 = undefined; + var filename_buf: [38 + extension.len]u8 = undefined; + + _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; + const filename = std.fmt.bufPrint( + &filename_buf, + "{s}" ++ extension, + .{std.fmt.fmtSliceHexLower(id[1..])}, + ) catch break :blk; + + for (global_debug_directories) |global_directory| { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ + global_directory, ".build-id", &id_prefix_buf, filename, + }), + }; + defer gpa.free(path.sub_path); + + return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; + } + } + + // use the path from .gnu_debuglink, in the same search order as gdb + if (separate_debug_filename) |separate_filename| blk: { + if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) + return error.MissingDebugInfo; + + // <cwd>/<gnu_debuglink> + if (loadPath( + gpa, + .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = separate_filename, + }, + null, + separate_debug_crc, + §ions, + mapped_mem, + )) |debug_info| { + return debug_info; + } else |_| {} + + // <cwd>/.debug/<gnu_debuglink> + { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ ".debug", separate_filename }), + }; + defer gpa.free(path.sub_path); + + if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + + var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; + const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :blk; + + // <global debug directory>/<absolute folder of current binary>/<gnu_debuglink> + for (global_debug_directories) |global_directory| { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), + }; + defer gpa.free(path.sub_path); + if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + } + + return error.MissingDebugInfo; + } + + var di: Dwarf = .{ + .endian = endian, + .sections = sections, + .is_macho = false, + .compile_units_sorted = false, + }; + + try Dwarf.open(&di, gpa); + + return .{ + .base_address = 0, + .dwarf = di, + .mapped_memory = parent_mapped_mem orelse mapped_mem, + .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, + }; + } + + pub fn loadPath( + gpa: Allocator, + elf_file_path: Path, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(std.mem.page_size) const u8, + ) LoadError!Dwarf.ElfModule { + const elf_file = elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}) catch |err| switch (err) { + error.FileNotFound => return missing(), + else => return err, + }; + defer elf_file.close(); + + const end_pos = elf_file.getEndPos() catch return bad(); + const file_len = cast(usize, end_pos) orelse return error.Overflow; + + const mapped_mem = try std.posix.mmap( + null, + file_len, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + elf_file.handle, + 0, + ); + errdefer std.posix.munmap(mapped_mem); + + return load( + gpa, + mapped_mem, + build_id, + expected_crc, + parent_sections, + parent_mapped_mem, + elf_file_path.sub_path, + ); + } +}; + +pub fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { + if (di.findCompileUnit(address)) |compile_unit| { + return .{ + .name = di.getSymbolName(address) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .source_location = di.getLineNumberInfo(allocator, compile_unit, address) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => return .{}, + else => return err, + } +} + +pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { + const start = cast(usize, offset) orelse return error.Overflow; + const end = start + (cast(usize, size) orelse return error.Overflow); + return ptr[start..end]; +} diff --git a/lib/std/debug/FixedBufferReader.zig b/lib/std/debug/FixedBufferReader.zig new file mode 100644 index 0000000000..494245a9e9 --- /dev/null +++ b/lib/std/debug/FixedBufferReader.zig @@ -0,0 +1,93 @@ +//! Optimized for performance in debug builds. + +const std = @import("../std.zig"); +const MemoryAccessor = std.debug.MemoryAccessor; + +const FixedBufferReader = @This(); + +buf: []const u8, +pos: usize = 0, +endian: std.builtin.Endian, + +pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; + +pub fn seekTo(fbr: *FixedBufferReader, pos: u64) Error!void { + if (pos > fbr.buf.len) return error.EndOfBuffer; + fbr.pos = @intCast(pos); +} + +pub fn seekForward(fbr: *FixedBufferReader, amount: u64) Error!void { + if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; + fbr.pos += @intCast(amount); +} + +pub inline fn readByte(fbr: *FixedBufferReader) Error!u8 { + if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; + defer fbr.pos += 1; + return fbr.buf[fbr.pos]; +} + +pub fn readByteSigned(fbr: *FixedBufferReader) Error!i8 { + return @bitCast(try fbr.readByte()); +} + +pub fn readInt(fbr: *FixedBufferReader, comptime T: type) Error!T { + const size = @divExact(@typeInfo(T).Int.bits, 8); + if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; + defer fbr.pos += size; + return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); +} + +pub fn readIntChecked( + fbr: *FixedBufferReader, + comptime T: type, + ma: *MemoryAccessor, +) Error!T { + if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) + return error.InvalidBuffer; + + return fbr.readInt(T); +} + +pub fn readUleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readUleb128(T, fbr); +} + +pub fn readIleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readIleb128(T, fbr); +} + +pub fn readAddress(fbr: *FixedBufferReader, format: std.dwarf.Format) Error!u64 { + return switch (format) { + .@"32" => try fbr.readInt(u32), + .@"64" => try fbr.readInt(u64), + }; +} + +pub fn readAddressChecked( + fbr: *FixedBufferReader, + format: std.dwarf.Format, + ma: *MemoryAccessor, +) Error!u64 { + return switch (format) { + .@"32" => try fbr.readIntChecked(u32, ma), + .@"64" => try fbr.readIntChecked(u64, ma), + }; +} + +pub fn readBytes(fbr: *FixedBufferReader, len: usize) Error![]const u8 { + if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; + defer fbr.pos += len; + return fbr.buf[fbr.pos..][0..len]; +} + +pub fn readBytesTo(fbr: *FixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { + const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ + u8, + fbr.buf, + fbr.pos, + sentinel, + }) orelse return error.EndOfBuffer; + defer fbr.pos = end + 1; + return fbr.buf[fbr.pos..end :sentinel]; +} diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig new file mode 100644 index 0000000000..ee191d2c12 --- /dev/null +++ b/lib/std/debug/Info.zig @@ -0,0 +1,62 @@ +//! Cross-platform abstraction for loading debug information into an in-memory +//! format that supports queries such as "what is the source location of this +//! virtual memory address?" +//! +//! Unlike `std.debug.SelfInfo`, this API does not assume the debug information +//! in question happens to match the host CPU architecture, OS, or other target +//! properties. + +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const Path = std.Build.Cache.Path; +const Dwarf = std.debug.Dwarf; +const page_size = std.mem.page_size; +const assert = std.debug.assert; +const Coverage = std.debug.Coverage; +const SourceLocation = std.debug.Coverage.SourceLocation; + +const Info = @This(); + +/// Sorted by key, ascending. +address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), +/// Externally managed, outlives this `Info` instance. +coverage: *Coverage, + +pub const LoadError = Dwarf.ElfModule.LoadError; + +pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + try elf_module.dwarf.sortCompileUnits(); + var info: Info = .{ + .address_map = .{}, + .coverage = coverage, + }; + try info.address_map.put(gpa, elf_module.base_address, elf_module); + return info; +} + +pub fn deinit(info: *Info, gpa: Allocator) void { + for (info.address_map.values()) |*elf_module| { + elf_module.dwarf.deinit(gpa); + } + info.address_map.deinit(gpa); + info.* = undefined; +} + +pub const ResolveAddressesError = Coverage.ResolveAddressesDwarfError; + +/// Given an array of virtual memory addresses, sorted ascending, outputs a +/// corresponding array of source locations. +pub fn resolveAddresses( + info: *Info, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []SourceLocation, +) ResolveAddressesError!void { + assert(sorted_pc_addrs.len == output.len); + if (info.address_map.entries.len != 1) @panic("TODO"); + const elf_module = &info.address_map.values()[0]; + return info.coverage.resolveAddressesDwarf(gpa, sorted_pc_addrs, output, &elf_module.dwarf); +} diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index f9747a088e..2d87243c5d 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -587,7 +587,7 @@ pub const Module = switch (native_os) { } if (section_index == null) continue; - const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); + const section_bytes = try Dwarf.chopSlice(mapped_mem, sect.offset, sect.size); sections[section_index.?] = .{ .data = section_bytes, .virtual_address = sect.addr, @@ -602,10 +602,11 @@ pub const Module = switch (native_os) { sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; if (missing_debug_info) return error.MissingDebugInfo; - var di = Dwarf{ + var di: Dwarf = .{ .endian = .little, .sections = sections, .is_macho = true, + .compile_units_sorted = false, }; try Dwarf.open(&di, allocator); @@ -622,7 +623,7 @@ pub const Module = switch (native_os) { return result.value_ptr; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { nosuspend { const result = try self.getOFileInfoForAddress(allocator, address); if (result.symbol == null) return .{}; @@ -630,19 +631,19 @@ pub const Module = switch (native_os) { // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); - if (result.o_file_info == null) return .{ .symbol_name = stab_symbol }; + if (result.o_file_info == null) return .{ .name = stab_symbol }; // Translate again the address, this time into an address inside the // .o file const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ - .symbol_name = "???", + .name = "???", }; const addr_off = result.relocated_address - result.symbol.?.addr; const o_file_di = &result.o_file_info.?.di; if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { - return SymbolInfo{ - .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", + return .{ + .name = o_file_di.getSymbolName(relocated_address_o) orelse "???", .compile_unit_name = compile_unit.die.getAttrString( o_file_di, std.dwarf.AT.name, @@ -651,9 +652,9 @@ pub const Module = switch (native_os) { ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => "???", }, - .line_info = o_file_di.getLineNumberInfo( + .source_location = o_file_di.getLineNumberInfo( allocator, - compile_unit.*, + compile_unit, relocated_address_o + addr_off, ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, @@ -662,7 +663,7 @@ pub const Module = switch (native_os) { }; } else |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{ .symbol_name = stab_symbol }; + return .{ .name = stab_symbol }; }, else => return err, } @@ -729,7 +730,7 @@ pub const Module = switch (native_os) { } } - fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?SymbolInfo { + fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?std.debug.Symbol { var coff_section: *align(1) const coff.SectionHeader = undefined; const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { if (sect_contrib.Section > self.coff_section_headers.len) continue; @@ -759,14 +760,14 @@ pub const Module = switch (native_os) { relocated_address - coff_section.virtual_address, ); - return SymbolInfo{ - .symbol_name = symbol_name, + return .{ + .name = symbol_name, .compile_unit_name = obj_basename, - .line_info = opt_line_info, + .source_location = opt_line_info, }; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { // Translate the VA into an address into this object const relocated_address = address - self.base_address; @@ -776,10 +777,10 @@ pub const Module = switch (native_os) { if (self.dwarf) |*dwarf| { const dwarf_address = relocated_address + self.coff_image_base; - return getSymbolFromDwarf(allocator, dwarf_address, dwarf); + return dwarf.getSymbol(allocator, dwarf_address); } - return SymbolInfo{}; + return .{}; } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { @@ -792,41 +793,18 @@ pub const Module = switch (native_os) { }; } }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { - base_address: usize, - dwarf: Dwarf, - mapped_memory: []align(mem.page_size) const u8, - external_mapped_memory: ?[]align(mem.page_size) const u8, - - pub fn deinit(self: *@This(), allocator: Allocator) void { - self.dwarf.deinit(allocator); - posix.munmap(self.mapped_memory); - if (self.external_mapped_memory) |m| posix.munmap(m); - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { - _ = allocator; - _ = address; - return &self.dwarf; - } - }, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Dwarf.ElfModule, .wasi, .emscripten => struct { pub fn deinit(self: *@This(), allocator: Allocator) void { _ = self; _ = allocator; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { _ = self; _ = allocator; _ = address; - return SymbolInfo{}; + return .{}; } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { @@ -1014,10 +992,11 @@ fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { } else null; } - var dwarf = Dwarf{ + var dwarf: Dwarf = .{ .endian = native_endian, .sections = sections, .is_macho = false, + .compile_units_sorted = false, }; try Dwarf.open(&dwarf, allocator); @@ -1068,7 +1047,7 @@ pub fn readElfDebugInfo( expected_crc: ?u32, parent_sections: *Dwarf.SectionArray, parent_mapped_mem: ?[]align(mem.page_size) const u8, -) !Module { +) !Dwarf.ElfModule { nosuspend { const elf_file = (if (elf_filename) |filename| blk: { break :blk fs.cwd().openFile(filename, .{}); @@ -1078,176 +1057,15 @@ pub fn readElfDebugInfo( }; const mapped_mem = try mapWholeFile(elf_file); - if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; - - const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); - if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; - if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; - - const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { - elf.ELFDATA2LSB => .little, - elf.ELFDATA2MSB => .big, - else => return error.InvalidElfEndian, - }; - assert(endian == native_endian); // this is our own debug info - - const shoff = hdr.e_shoff; - const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); - const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow])); - const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; - const shdrs = @as( - [*]const elf.Shdr, - @ptrCast(@alignCast(&mapped_mem[shoff])), - )[0..hdr.e_shnum]; - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - - // Combine section list. This takes ownership over any owned sections from the parent scope. - for (parent_sections, §ions) |*parent, *section| { - if (parent.*) |*p| { - section.* = p.*; - p.owned = false; - } - } - errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - - var separate_debug_filename: ?[]const u8 = null; - var separate_debug_crc: ?u32 = null; - - for (shdrs) |*shdr| { - if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; - const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); - - if (mem.eql(u8, name, ".gnu_debuglink")) { - const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); - const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); - const crc_bytes = gnu_debuglink[crc_offset..][0..4]; - separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); - separate_debug_filename = debug_filename; - continue; - } - - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "." ++ section.name, name)) section_index = i; - } - if (section_index == null) continue; - if (sections[section_index.?] != null) continue; - - const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { - var section_stream = std.io.fixedBufferStream(section_bytes); - var section_reader = section_stream.reader(); - const chdr = section_reader.readStruct(elf.Chdr) catch continue; - if (chdr.ch_type != .ZLIB) continue; - - var zlib_stream = std.compress.zlib.decompressor(section_stream.reader()); - - const decompressed_section = try allocator.alloc(u8, chdr.ch_size); - errdefer allocator.free(decompressed_section); - - const read = zlib_stream.reader().readAll(decompressed_section) catch continue; - assert(read == decompressed_section.len); - - break :blk .{ - .data = decompressed_section, - .virtual_address = shdr.sh_addr, - .owned = true, - }; - } else .{ - .data = section_bytes, - .virtual_address = shdr.sh_addr, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - - // Attempt to load debug info from an external file - // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html - if (missing_debug_info) { - - // Only allow one level of debug info nesting - if (parent_mapped_mem) |_| { - return error.MissingDebugInfo; - } - - const global_debug_directories = [_][]const u8{ - "/usr/lib/debug", - }; - - // <global debug directory>/.build-id/<2-character id prefix>/<id remainder>.debug - if (build_id) |id| blk: { - if (id.len < 3) break :blk; - - // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice - const extension = ".debug"; - var id_prefix_buf: [2]u8 = undefined; - var filename_buf: [38 + extension.len]u8 = undefined; - - _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; - const filename = std.fmt.bufPrint( - &filename_buf, - "{s}" ++ extension, - .{std.fmt.fmtSliceHexLower(id[1..])}, - ) catch break :blk; - - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); - defer allocator.free(path); - - return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; - } - } - - // use the path from .gnu_debuglink, in the same search order as gdb - if (separate_debug_filename) |separate_filename| blk: { - if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; - - // <cwd>/<gnu_debuglink> - if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - - // <cwd>/.debug/<gnu_debuglink> - { - const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); - defer allocator.free(path); - - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - - var cwd_buf: [fs.max_path_bytes]u8 = undefined; - const cwd_path = posix.realpath(".", &cwd_buf) catch break :blk; - - // <global debug directory>/<absolute folder of current binary>/<gnu_debuglink> - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); - defer allocator.free(path); - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - } - - return error.MissingDebugInfo; - } - - var di = Dwarf{ - .endian = endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&di, allocator); - - return .{ - .base_address = undefined, - .dwarf = di, - .mapped_memory = parent_mapped_mem orelse mapped_mem, - .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, - }; + return Dwarf.ElfModule.load( + allocator, + mapped_mem, + build_id, + expected_crc, + parent_sections, + parent_mapped_mem, + elf_filename, + ); } } @@ -1289,22 +1107,6 @@ fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 { } } -fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { - const start = math.cast(usize, offset) orelse return error.Overflow; - const end = start + (math.cast(usize, size) orelse return error.Overflow); - return ptr[start..end]; -} - -pub const SymbolInfo = struct { - symbol_name: []const u8 = "???", - compile_unit_name: []const u8 = "???", - line_info: ?std.debug.SourceLocation = null, - - pub fn deinit(self: SymbolInfo, allocator: Allocator) void { - if (self.line_info) |li| allocator.free(li.file_name); - } -}; - fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { var min: usize = 0; var max: usize = symbols.len - 1; @@ -1350,26 +1152,6 @@ test machoSearchSymbols { try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); } -fn getSymbolFromDwarf(allocator: Allocator, address: u64, di: *Dwarf) !SymbolInfo { - if (nosuspend di.findCompileUnit(address)) |compile_unit| { - return SymbolInfo{ - .symbol_name = nosuspend di.getSymbolName(address) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{}; - }, - else => return err, - } -} - /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. @@ -1796,7 +1578,7 @@ pub fn unwindFrameDwarf( const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; if (fde_offset >= frame_section.len) return error.MissingFDE; - var fbr: std.debug.DeprecatedFixedBufferReader = .{ + var fbr: std.debug.FixedBufferReader = .{ .buf = frame_section, .pos = fde_offset, .endian = di.endian, @@ -2028,6 +1810,7 @@ fn unwindFrameMachODwarf( var di: Dwarf = .{ .endian = native_endian, .is_macho = true, + .compile_units_sorted = false, }; defer di.deinit(context.allocator); diff --git a/lib/std/http.zig b/lib/std/http.zig index 621c7a5f0d..d5d5583299 100644 --- a/lib/std/http.zig +++ b/lib/std/http.zig @@ -4,6 +4,7 @@ pub const protocol = @import("http/protocol.zig"); pub const HeadParser = @import("http/HeadParser.zig"); pub const ChunkParser = @import("http/ChunkParser.zig"); pub const HeaderIterator = @import("http/HeaderIterator.zig"); +pub const WebSocket = @import("http/WebSocket.zig"); pub const Version = enum { @"HTTP/1.0", @@ -318,6 +319,7 @@ test { _ = Status; _ = HeadParser; _ = ChunkParser; + _ = WebSocket; _ = @import("http/test.zig"); } } diff --git a/lib/std/http/WebSocket.zig b/lib/std/http/WebSocket.zig new file mode 100644 index 0000000000..ad513fddf8 --- /dev/null +++ b/lib/std/http/WebSocket.zig @@ -0,0 +1,243 @@ +//! See https://tools.ietf.org/html/rfc6455 + +const builtin = @import("builtin"); +const std = @import("std"); +const WebSocket = @This(); +const assert = std.debug.assert; +const native_endian = builtin.cpu.arch.endian(); + +key: []const u8, +request: *std.http.Server.Request, +recv_fifo: std.fifo.LinearFifo(u8, .Slice), +reader: std.io.AnyReader, +response: std.http.Server.Response, +/// Number of bytes that have been peeked but not discarded yet. +outstanding_len: usize, + +pub const InitError = error{WebSocketUpgradeMissingKey} || + std.http.Server.Request.ReaderError; + +pub fn init( + ws: *WebSocket, + request: *std.http.Server.Request, + send_buffer: []u8, + recv_buffer: []align(4) u8, +) InitError!bool { + var sec_websocket_key: ?[]const u8 = null; + var upgrade_websocket: bool = false; + var it = request.iterateHeaders(); + while (it.next()) |header| { + if (std.ascii.eqlIgnoreCase(header.name, "sec-websocket-key")) { + sec_websocket_key = header.value; + } else if (std.ascii.eqlIgnoreCase(header.name, "upgrade")) { + if (!std.mem.eql(u8, header.value, "websocket")) + return false; + upgrade_websocket = true; + } + } + if (!upgrade_websocket) + return false; + + const key = sec_websocket_key orelse return error.WebSocketUpgradeMissingKey; + + var sha1 = std.crypto.hash.Sha1.init(.{}); + sha1.update(key); + sha1.update("258EAFA5-E914-47DA-95CA-C5AB0DC85B11"); + var digest: [std.crypto.hash.Sha1.digest_length]u8 = undefined; + sha1.final(&digest); + var base64_digest: [28]u8 = undefined; + assert(std.base64.standard.Encoder.encode(&base64_digest, &digest).len == base64_digest.len); + + request.head.content_length = std.math.maxInt(u64); + + ws.* = .{ + .key = key, + .recv_fifo = std.fifo.LinearFifo(u8, .Slice).init(recv_buffer), + .reader = try request.reader(), + .response = request.respondStreaming(.{ + .send_buffer = send_buffer, + .respond_options = .{ + .status = .switching_protocols, + .extra_headers = &.{ + .{ .name = "upgrade", .value = "websocket" }, + .{ .name = "connection", .value = "upgrade" }, + .{ .name = "sec-websocket-accept", .value = &base64_digest }, + }, + .transfer_encoding = .none, + }, + }), + .request = request, + .outstanding_len = 0, + }; + return true; +} + +pub const Header0 = packed struct(u8) { + opcode: Opcode, + rsv3: u1 = 0, + rsv2: u1 = 0, + rsv1: u1 = 0, + fin: bool, +}; + +pub const Header1 = packed struct(u8) { + payload_len: enum(u7) { + len16 = 126, + len64 = 127, + _, + }, + mask: bool, +}; + +pub const Opcode = enum(u4) { + continuation = 0, + text = 1, + binary = 2, + connection_close = 8, + ping = 9, + /// "A Pong frame MAY be sent unsolicited. This serves as a unidirectional + /// heartbeat. A response to an unsolicited Pong frame is not expected." + pong = 10, + _, +}; + +pub const ReadSmallTextMessageError = error{ + ConnectionClose, + UnexpectedOpCode, + MessageTooBig, + MissingMaskBit, +} || RecvError; + +pub const SmallMessage = struct { + /// Can be text, binary, or ping. + opcode: Opcode, + data: []u8, +}; + +/// Reads the next message from the WebSocket stream, failing if the message does not fit +/// into `recv_buffer`. +pub fn readSmallMessage(ws: *WebSocket) ReadSmallTextMessageError!SmallMessage { + while (true) { + const header_bytes = (try recv(ws, 2))[0..2]; + const h0: Header0 = @bitCast(header_bytes[0]); + const h1: Header1 = @bitCast(header_bytes[1]); + + switch (h0.opcode) { + .text, .binary, .pong, .ping => {}, + .connection_close => return error.ConnectionClose, + .continuation => return error.UnexpectedOpCode, + _ => return error.UnexpectedOpCode, + } + + if (!h0.fin) return error.MessageTooBig; + if (!h1.mask) return error.MissingMaskBit; + + const len: usize = switch (h1.payload_len) { + .len16 => try recvReadInt(ws, u16), + .len64 => std.math.cast(usize, try recvReadInt(ws, u64)) orelse return error.MessageTooBig, + else => @intFromEnum(h1.payload_len), + }; + if (len > ws.recv_fifo.buf.len) return error.MessageTooBig; + + const mask: u32 = @bitCast((try recv(ws, 4))[0..4].*); + const payload = try recv(ws, len); + + // Skip pongs. + if (h0.opcode == .pong) continue; + + // The last item may contain a partial word of unused data. + const floored_len = (payload.len / 4) * 4; + const u32_payload: []align(1) u32 = @alignCast(std.mem.bytesAsSlice(u32, payload[0..floored_len])); + for (u32_payload) |*elem| elem.* ^= mask; + const mask_bytes = std.mem.asBytes(&mask)[0 .. payload.len - floored_len]; + for (payload[floored_len..], mask_bytes) |*leftover, m| leftover.* ^= m; + + return .{ + .opcode = h0.opcode, + .data = payload, + }; + } +} + +const RecvError = std.http.Server.Request.ReadError || error{EndOfStream}; + +fn recv(ws: *WebSocket, len: usize) RecvError![]u8 { + ws.recv_fifo.discard(ws.outstanding_len); + assert(len <= ws.recv_fifo.buf.len); + if (len > ws.recv_fifo.count) { + const small_buf = ws.recv_fifo.writableSlice(0); + const needed = len - ws.recv_fifo.count; + const buf = if (small_buf.len >= needed) small_buf else b: { + ws.recv_fifo.realign(); + break :b ws.recv_fifo.writableSlice(0); + }; + const n = try @as(RecvError!usize, @errorCast(ws.reader.readAtLeast(buf, needed))); + if (n < needed) return error.EndOfStream; + ws.recv_fifo.update(n); + } + ws.outstanding_len = len; + // TODO: improve the std lib API so this cast isn't necessary. + return @constCast(ws.recv_fifo.readableSliceOfLen(len)); +} + +fn recvReadInt(ws: *WebSocket, comptime I: type) !I { + const unswapped: I = @bitCast((try recv(ws, @sizeOf(I)))[0..@sizeOf(I)].*); + return switch (native_endian) { + .little => @byteSwap(unswapped), + .big => unswapped, + }; +} + +pub const WriteError = std.http.Server.Response.WriteError; + +pub fn writeMessage(ws: *WebSocket, message: []const u8, opcode: Opcode) WriteError!void { + const iovecs: [1]std.posix.iovec_const = .{ + .{ .base = message.ptr, .len = message.len }, + }; + return writeMessagev(ws, &iovecs, opcode); +} + +pub fn writeMessagev(ws: *WebSocket, message: []const std.posix.iovec_const, opcode: Opcode) WriteError!void { + const total_len = l: { + var total_len: u64 = 0; + for (message) |iovec| total_len += iovec.len; + break :l total_len; + }; + + var header_buf: [2 + 8]u8 = undefined; + header_buf[0] = @bitCast(@as(Header0, .{ + .opcode = opcode, + .fin = true, + })); + const header = switch (total_len) { + 0...125 => blk: { + header_buf[1] = @bitCast(@as(Header1, .{ + .payload_len = @enumFromInt(total_len), + .mask = false, + })); + break :blk header_buf[0..2]; + }, + 126...0xffff => blk: { + header_buf[1] = @bitCast(@as(Header1, .{ + .payload_len = .len16, + .mask = false, + })); + std.mem.writeInt(u16, header_buf[2..4], @intCast(total_len), .big); + break :blk header_buf[0..4]; + }, + else => blk: { + header_buf[1] = @bitCast(@as(Header1, .{ + .payload_len = .len64, + .mask = false, + })); + std.mem.writeInt(u64, header_buf[2..10], total_len, .big); + break :blk header_buf[0..10]; + }, + }; + + const response = &ws.response; + try response.writeAll(header); + for (message) |iovec| + try response.writeAll(iovec.base[0..iovec.len]); + try response.flush(); +} diff --git a/lib/std/posix.zig b/lib/std/posix.zig index e04efbbcc0..02f2d975dd 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -47,6 +47,11 @@ else switch (native_os) { .plan9 => std.os.plan9, else => struct { pub const ucontext_t = void; + pub const pid_t = void; + pub const pollfd = void; + pub const fd_t = void; + pub const uid_t = void; + pub const gid_t = void; }, }; diff --git a/lib/std/zig/Server.zig b/lib/std/zig/Server.zig index f1e564d43e..7ce017045d 100644 --- a/lib/std/zig/Server.zig +++ b/lib/std/zig/Server.zig @@ -28,6 +28,14 @@ pub const Message = struct { /// The remaining bytes is the file path relative to that prefix. /// The prefixes are hard-coded in Compilation.create (cwd, zig lib dir, local cache dir) file_system_inputs, + /// Body is a u64le that indicates the file path within the cache used + /// to store coverage information. The integer is a hash of the PCs + /// stored within that file. + coverage_id, + /// Body is a u64le that indicates the function pointer virtual memory + /// address of the fuzz unit test. This is used to provide a starting + /// point to view coverage. + fuzz_start_addr, _, }; @@ -180,6 +188,14 @@ pub fn serveMessage( try s.out.writevAll(iovecs[0 .. bufs.len + 1]); } +pub fn serveU64Message(s: *Server, tag: OutMessage.Tag, int: u64) !void { + const msg_le = bswap(int); + return s.serveMessage(.{ + .tag = tag, + .bytes_len = @sizeOf(u64), + }, &.{std.mem.asBytes(&msg_le)}); +} + pub fn serveEmitBinPath( s: *Server, fs_path: []const u8, @@ -187,7 +203,7 @@ pub fn serveEmitBinPath( ) !void { try s.serveMessage(.{ .tag = .emit_bin_path, - .bytes_len = @as(u32, @intCast(fs_path.len + @sizeOf(OutMessage.EmitBinPath))), + .bytes_len = @intCast(fs_path.len + @sizeOf(OutMessage.EmitBinPath)), }, &.{ std.mem.asBytes(&header), fs_path, @@ -201,7 +217,7 @@ pub fn serveTestResults( const msg_le = bswap(msg); try s.serveMessage(.{ .tag = .test_results, - .bytes_len = @as(u32, @intCast(@sizeOf(OutMessage.TestResults))), + .bytes_len = @intCast(@sizeOf(OutMessage.TestResults)), }, &.{ std.mem.asBytes(&msg_le), }); @@ -209,14 +225,14 @@ pub fn serveTestResults( pub fn serveErrorBundle(s: *Server, error_bundle: std.zig.ErrorBundle) !void { const eb_hdr: OutMessage.ErrorBundle = .{ - .extra_len = @as(u32, @intCast(error_bundle.extra.len)), - .string_bytes_len = @as(u32, @intCast(error_bundle.string_bytes.len)), + .extra_len = @intCast(error_bundle.extra.len), + .string_bytes_len = @intCast(error_bundle.string_bytes.len), }; const bytes_len = @sizeOf(OutMessage.ErrorBundle) + 4 * error_bundle.extra.len + error_bundle.string_bytes.len; try s.serveMessage(.{ .tag = .error_bundle, - .bytes_len = @as(u32, @intCast(bytes_len)), + .bytes_len = @intCast(bytes_len), }, &.{ std.mem.asBytes(&eb_hdr), // TODO: implement @ptrCast between slices changing the length @@ -251,7 +267,7 @@ pub fn serveTestMetadata(s: *Server, test_metadata: TestMetadata) !void { return s.serveMessage(.{ .tag = .test_metadata, - .bytes_len = @as(u32, @intCast(bytes_len)), + .bytes_len = @intCast(bytes_len), }, &.{ std.mem.asBytes(&header), // TODO: implement @ptrCast between slices changing the length diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index c375818770..b63bde5633 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -1840,3 +1840,48 @@ fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !v try std.testing.expectEqual(source.len, last_token.loc.start); try std.testing.expectEqual(source.len, last_token.loc.end); } + +test "fuzzable properties upheld" { + const source = std.testing.fuzzInput(.{}); + const source0 = try std.testing.allocator.dupeZ(u8, source); + defer std.testing.allocator.free(source0); + var tokenizer = Tokenizer.init(source0); + var tokenization_failed = false; + while (true) { + const token = tokenizer.next(); + + // Property: token end location after start location (or equal) + try std.testing.expect(token.loc.end >= token.loc.start); + + switch (token.tag) { + .invalid => { + tokenization_failed = true; + + // Property: invalid token always ends at newline or eof + try std.testing.expect(source0[token.loc.end] == '\n' or source0[token.loc.end] == 0); + }, + .eof => { + // Property: EOF token is always 0-length at end of source. + try std.testing.expectEqual(source0.len, token.loc.start); + try std.testing.expectEqual(source0.len, token.loc.end); + break; + }, + else => continue, + } + } + + if (source0.len > 0) for (source0, source0[1..][0..source0.len]) |cur, next| { + // Property: No null byte allowed except at end. + if (cur == 0) { + try std.testing.expect(tokenization_failed); + } + // Property: No ASCII control characters other than \n and \t are allowed. + if (std.ascii.isControl(cur) and cur != '\n' and cur != '\t') { + try std.testing.expect(tokenization_failed); + } + // Property: All '\r' must be followed by '\n'. + if (cur == '\r' and next != '\n') { + try std.testing.expect(tokenization_failed); + } + }; +} |
