diff options
Diffstat (limited to 'src/Compilation.zig')
| -rw-r--r-- | src/Compilation.zig | 196 |
1 files changed, 191 insertions, 5 deletions
diff --git a/src/Compilation.zig b/src/Compilation.zig index 3796ed6acc..419c56019f 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -173,7 +173,6 @@ verbose_cimport: bool, verbose_llvm_cpu_features: bool, verbose_link: bool, disable_c_depfile: bool, -time_report: bool, stack_report: bool, debug_compiler_runtime_libs: bool, debug_compile_errors: bool, @@ -263,6 +262,8 @@ link_prog_node: std.Progress.Node = std.Progress.Node.none, llvm_opt_bisect_limit: c_int, +time_report: ?TimeReport, + file_system_inputs: ?*std.ArrayListUnmanaged(u8), /// This is the digest of the cache for the current compilation. @@ -322,6 +323,72 @@ const QueuedJobs = struct { zigc_lib: bool = false, }; +pub const Timer = union(enum) { + unused, + active: struct { + start: std.time.Instant, + saved_ns: u64, + }, + paused: u64, + stopped, + + pub fn pause(t: *Timer) void { + switch (t.*) { + .unused => return, + .active => |a| { + const current = std.time.Instant.now() catch unreachable; + const new_ns = switch (current.order(a.start)) { + .lt, .eq => 0, + .gt => current.since(a.start), + }; + t.* = .{ .paused = a.saved_ns + new_ns }; + }, + .paused => unreachable, + .stopped => unreachable, + } + } + pub fn @"resume"(t: *Timer) void { + switch (t.*) { + .unused => return, + .active => unreachable, + .paused => |saved_ns| t.* = .{ .active = .{ + .start = std.time.Instant.now() catch unreachable, + .saved_ns = saved_ns, + } }, + .stopped => unreachable, + } + } + pub fn finish(t: *Timer) ?u64 { + defer t.* = .stopped; + switch (t.*) { + .unused => return null, + .active => |a| { + const current = std.time.Instant.now() catch unreachable; + const new_ns = switch (current.order(a.start)) { + .lt, .eq => 0, + .gt => current.since(a.start), + }; + return a.saved_ns + new_ns; + }, + .paused => |ns| return ns, + .stopped => unreachable, + } + } +}; + +/// Starts a timer for measuring a `--time-report` value. If `comp.time_report` is `null`, the +/// returned timer does nothing. When the thing being timed is done, call `Timer.finish`. If that +/// function returns non-`null`, then the value is a number of nanoseconds, and `comp.time_report` +/// is set. +pub fn startTimer(comp: *Compilation) Timer { + if (comp.time_report == null) return .unused; + const now = std.time.Instant.now() catch @panic("std.time.Timer unsupported; cannot emit time report"); + return .{ .active = .{ + .start = now, + .saved_ns = 0, + } }; +} + /// A filesystem path, represented relative to one of a few specific directories where possible. /// Every path (considering symlinks as distinct paths) has a canonical representation in this form. /// This abstraction allows us to: @@ -787,6 +854,58 @@ pub inline fn debugIncremental(comp: *const Compilation) bool { return comp.debug_incremental; } +pub const TimeReport = struct { + stats: std.Build.abi.time_report.CompileResult.Stats, + + /// Allocated into `gpa`. The pass time statistics emitted by LLVM's "time-passes" option. + /// LLVM provides this data in ASCII form as a table, which can be directly shown to users. + /// + /// Ideally, we would be able to use `printAllJSONValues` to get *structured* data which we can + /// then display more nicely. Unfortunately, that function seems to trip an assertion on one of + /// the pass timer names at the time of writing. + llvm_pass_timings: []u8, + + /// Key is a ZIR `declaration` instruction; value is the number of nanoseconds spent analyzing + /// it. This is the total across all instances of the generic parent namespace, and (if this is + /// a function) all generic instances of this function. It also includes time spent analyzing + /// function bodies if this is a function (generic or otherwise). + /// An entry not existing means the declaration has not been analyzed (so far). + decl_sema_info: std.AutoArrayHashMapUnmanaged(InternPool.TrackedInst.Index, struct { + ns: u64, + count: u32, + }), + + /// Key is a ZIR `declaration` instruction which is a function or test; value is the number of + /// nanoseconds spent running codegen on it. As above, this is the total across all generic + /// instances, both of this function itself and of its parent namespace. + /// An entry not existing means the declaration has not been codegenned (so far). + /// Every key in `decl_codegen_ns` is also in `decl_sema_ns`. + decl_codegen_ns: std.AutoArrayHashMapUnmanaged(InternPool.TrackedInst.Index, u64), + + /// Key is a ZIR `declaration` instruction which is anything other than a `comptime` decl; value + /// is the number of nanoseconds spent linking it into the binary. As above, this is the total + /// across all generic instances. + /// An entry not existing means the declaration has not been linked (so far). + /// Every key in `decl_link_ns` is also in `decl_sema_ns`. + decl_link_ns: std.AutoArrayHashMapUnmanaged(InternPool.TrackedInst.Index, u64), + + pub fn deinit(tr: *TimeReport, gpa: Allocator) void { + tr.stats = undefined; + gpa.free(tr.llvm_pass_timings); + tr.decl_sema_info.deinit(gpa); + tr.decl_codegen_ns.deinit(gpa); + tr.decl_link_ns.deinit(gpa); + } + + pub const init: TimeReport = .{ + .stats = .init, + .llvm_pass_timings = &.{}, + .decl_sema_info = .empty, + .decl_codegen_ns = .empty, + .decl_link_ns = .empty, + }; +}; + pub const default_stack_protector_buffer_size = target_util.default_stack_protector_buffer_size; pub const SemaError = Zcu.SemaError; @@ -2027,7 +2146,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil .verbose_link = options.verbose_link, .disable_c_depfile = options.disable_c_depfile, .reference_trace = options.reference_trace, - .time_report = options.time_report, + .time_report = if (options.time_report) .init else null, .stack_report = options.stack_report, .test_filters = options.test_filters, .test_name_prefix = options.test_name_prefix, @@ -2561,6 +2680,8 @@ pub fn destroy(comp: *Compilation) void { } comp.failed_win32_resources.deinit(gpa); + if (comp.time_report) |*tr| tr.deinit(gpa); + comp.link_diags.deinit(); comp.clearMiscFailures(); @@ -2657,6 +2778,10 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void { comp.clearMiscFailures(); comp.last_update_was_cache_hit = false; + if (comp.time_report) |*tr| { + tr.deinit(gpa); // this is information about an old update + tr.* = .init; + } var tmp_dir_rand_int: u64 = undefined; var man: Cache.Manifest = undefined; @@ -2688,6 +2813,14 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void { whole.cache_manifest = &man; try addNonIncrementalStuffToCacheManifest(comp, arena, &man); + // Under `--time-report`, ignore cache hits; do the work anyway for those juicy numbers. + const ignore_hit = comp.time_report != null; + + if (ignore_hit) { + // We're going to do the work regardless of whether this is a hit or a miss. + man.want_shared_lock = false; + } + const is_hit = man.hit() catch |err| switch (err) { error.CacheCheckFailed => switch (man.diagnostic) { .none => unreachable, @@ -2713,7 +2846,7 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void { .{}, ), }; - if (is_hit) { + if (is_hit and !ignore_hit) { // In this case the cache hit contains the full set of file system inputs. Nice! if (comp.file_system_inputs) |buf| try man.populateFileSystemInputs(buf); if (comp.parent_whole_cache) |pwc| { @@ -2734,6 +2867,11 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void { } log.debug("CacheMode.whole cache miss for {s}", .{comp.root_name}); + if (ignore_hit) { + // Okay, now set this back so that `writeManifest` will downgrade our lock later. + man.want_shared_lock = true; + } + // Compile the artifacts to a temporary directory. whole.tmp_artifact_directory = d: { tmp_dir_rand_int = std.crypto.random.int(u64); @@ -2786,6 +2924,8 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void { const pt: Zcu.PerThread = .activate(zcu, .main); defer pt.deactivate(); + assert(zcu.cur_analysis_timer == null); + zcu.skip_analysis_this_update = false; // TODO: doing this in `resolveReferences` later could avoid adding inputs for dead embedfiles. Investigate! @@ -2829,6 +2969,8 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void { const pt: Zcu.PerThread = .activate(zcu, .main); defer pt.deactivate(); + assert(zcu.cur_analysis_timer == null); + if (!zcu.skip_analysis_this_update) { if (comp.config.is_test) { // The `test_functions` decl has been intentionally postponed until now, @@ -3040,11 +3182,22 @@ fn flush( ) !void { if (comp.zcu) |zcu| { if (zcu.llvm_object) |llvm_object| { + const pt: Zcu.PerThread = .activate(zcu, tid); + defer pt.deactivate(); + // Emit the ZCU object from LLVM now; it's required to flush the output file. // If there's an output file, it wants to decide where the LLVM object goes! const sub_prog_node = comp.link_prog_node.start("LLVM Emit Object", 0); defer sub_prog_node.end(); - try llvm_object.emit(.{ .zcu = zcu, .tid = tid }, .{ + + var timer = comp.startTimer(); + defer if (timer.finish()) |ns| { + comp.mutex.lock(); + defer comp.mutex.unlock(); + comp.time_report.?.stats.real_ns_llvm_emit = ns; + }; + + try llvm_object.emit(pt, .{ .pre_ir_path = comp.verbose_llvm_ir, .pre_bc_path = comp.verbose_llvm_bc, @@ -3071,7 +3224,7 @@ fn flush( .is_debug = comp.root_mod.optimize_mode == .Debug, .is_small = comp.root_mod.optimize_mode == .ReleaseSmall, - .time_report = comp.time_report, + .time_report = if (comp.time_report) |*p| p else null, .sanitize_thread = comp.config.any_sanitize_thread, .fuzz = comp.config.any_fuzz, .lto = comp.config.lto, @@ -3079,6 +3232,12 @@ fn flush( } } if (comp.bin_file) |lf| { + var timer = comp.startTimer(); + defer if (timer.finish()) |ns| { + comp.mutex.lock(); + defer comp.mutex.unlock(); + comp.time_report.?.stats.real_ns_link_flush = ns; + }; // This is needed before reading the error flags. lf.flush(arena, tid, comp.link_prog_node) catch |err| switch (err) { error.LinkFailure => {}, // Already reported. @@ -4223,6 +4382,17 @@ fn performAllTheWork( zcu.generation += 1; }; + // This is awkward: we don't want to start the timer until later, but we won't want to stop it + // until the wait groups finish. That means we need do do this. + var decl_work_timer: ?Timer = null; + defer commit_timer: { + const t = &(decl_work_timer orelse break :commit_timer); + const ns = t.finish() orelse break :commit_timer; + comp.mutex.lock(); + defer comp.mutex.unlock(); + comp.time_report.?.stats.real_ns_decls = ns; + } + // Here we queue up all the AstGen tasks first, followed by C object compilation. // We wait until the AstGen tasks are all completed before proceeding to the // (at least for now) single-threaded main work queue. However, C object compilation @@ -4431,6 +4601,13 @@ fn performAllTheWork( const zir_prog_node = main_progress_node.start("AST Lowering", 0); defer zir_prog_node.end(); + var timer = comp.startTimer(); + defer if (timer.finish()) |ns| { + comp.mutex.lock(); + defer comp.mutex.unlock(); + comp.time_report.?.stats.real_ns_files = ns; + }; + var astgen_wait_group: WaitGroup = .{}; defer astgen_wait_group.wait(); @@ -4556,6 +4733,10 @@ fn performAllTheWork( return; } + if (comp.time_report) |*tr| { + tr.stats.n_reachable_files = @intCast(zcu.alive_files.count()); + } + if (comp.incremental) { const update_zir_refs_node = main_progress_node.start("Update ZIR References", 0); defer update_zir_refs_node.end(); @@ -4599,6 +4780,11 @@ fn performAllTheWork( } } + if (comp.zcu != null) { + // Start the timer for the "decls" part of the pipeline (Sema, CodeGen, link). + decl_work_timer = comp.startTimer(); + } + work: while (true) { for (&comp.work_queues) |*work_queue| if (work_queue.readItem()) |job| { try processOneJob(@intFromEnum(Zcu.PerThread.Id.main), comp, job); |
