diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2022-08-04 14:24:00 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2022-08-04 14:24:00 -0700 |
| commit | ba70eee8bb81da0d44982a84395aee660635e5ba (patch) | |
| tree | 40f4ef036ab60572f9b19e94dc84415993ed3fa8 /src | |
| parent | a3045b8abbba896da34a02266f2be89dd6c90ecc (diff) | |
| parent | 616f65df750f53e6334cc5ed2c8f4b5668d573f2 (diff) | |
| download | zig-ba70eee8bb81da0d44982a84395aee660635e5ba.tar.gz zig-ba70eee8bb81da0d44982a84395aee660635e5ba.zip | |
Merge remote-tracking branch 'origin/master' into llvm15
Diffstat (limited to 'src')
| -rw-r--r-- | src/AstGen.zig | 137 | ||||
| -rw-r--r-- | src/Autodoc.zig | 27 | ||||
| -rw-r--r-- | src/Module.zig | 107 | ||||
| -rw-r--r-- | src/Sema.zig | 746 | ||||
| -rw-r--r-- | src/Zir.zig | 6 | ||||
| -rw-r--r-- | src/link/Dwarf.zig | 44 | ||||
| -rw-r--r-- | src/link/MachO.zig | 4113 | ||||
| -rw-r--r-- | src/link/MachO/Archive.zig | 58 | ||||
| -rw-r--r-- | src/link/MachO/Atom.zig | 40 | ||||
| -rw-r--r-- | src/link/MachO/CodeSignature.zig | 12 | ||||
| -rw-r--r-- | src/link/MachO/DebugSymbols.zig | 530 | ||||
| -rw-r--r-- | src/link/MachO/Dylib.zig | 159 | ||||
| -rw-r--r-- | src/link/MachO/Object.zig | 327 | ||||
| -rw-r--r-- | src/link/MachO/dead_strip.zig | 50 | ||||
| -rw-r--r-- | src/link/MachO/fat.zig | 4 |
15 files changed, 2995 insertions, 3365 deletions
diff --git a/src/AstGen.zig b/src/AstGen.zig index b6a7450f3a..e30913ac76 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -768,12 +768,12 @@ fn expr(gz: *GenZir, scope: *Scope, rl: ResultLoc, node: Ast.Node.Index) InnerEr .if_simple => return ifExpr(gz, scope, rl.br(), node, tree.ifSimple(node)), .@"if" => return ifExpr(gz, scope, rl.br(), node, tree.ifFull(node)), - .while_simple => return whileExpr(gz, scope, rl.br(), node, tree.whileSimple(node)), - .while_cont => return whileExpr(gz, scope, rl.br(), node, tree.whileCont(node)), - .@"while" => return whileExpr(gz, scope, rl.br(), node, tree.whileFull(node)), + .while_simple => return whileExpr(gz, scope, rl.br(), node, tree.whileSimple(node), false), + .while_cont => return whileExpr(gz, scope, rl.br(), node, tree.whileCont(node), false), + .@"while" => return whileExpr(gz, scope, rl.br(), node, tree.whileFull(node), false), - .for_simple => return forExpr(gz, scope, rl.br(), node, tree.forSimple(node)), - .@"for" => return forExpr(gz, scope, rl.br(), node, tree.forFull(node)), + .for_simple => return forExpr(gz, scope, rl.br(), node, tree.forSimple(node), false), + .@"for" => return forExpr(gz, scope, rl.br(), node, tree.forFull(node), false), .slice_open => { const lhs = try expr(gz, scope, .ref, node_datas[node].lhs); @@ -1899,6 +1899,17 @@ fn breakExpr(parent_gz: *GenZir, parent_scope: *Scope, node: Ast.Node.Index) Inn .local_ptr => scope = scope.cast(Scope.LocalPtr).?.parent, .namespace => break, .defer_normal, .defer_error => scope = scope.cast(Scope.Defer).?.parent, + .defer_gen => { + const defer_gen = scope.cast(Scope.DeferGen).?; + + return astgen.failNodeNotes(node, "cannot break out of defer expression", .{}, &.{ + try astgen.errNoteNode( + defer_gen.defer_node, + "defer expression here", + .{}, + ), + }); + }, .top => unreachable, } } @@ -1958,6 +1969,17 @@ fn continueExpr(parent_gz: *GenZir, parent_scope: *Scope, node: Ast.Node.Index) try unusedResultDeferExpr(parent_gz, defer_scope, defer_scope.parent, expr_node); }, .defer_error => scope = scope.cast(Scope.Defer).?.parent, + .defer_gen => { + const defer_gen = scope.cast(Scope.DeferGen).?; + + return astgen.failNodeNotes(node, "cannot continue out of defer expression", .{}, &.{ + try astgen.errNoteNode( + defer_gen.defer_node, + "defer expression here", + .{}, + ), + }); + }, .namespace => break, .top => unreachable, } @@ -2022,6 +2044,7 @@ fn checkLabelRedefinition(astgen: *AstGen, parent_scope: *Scope, label: Ast.Toke .local_val => scope = scope.cast(Scope.LocalVal).?.parent, .local_ptr => scope = scope.cast(Scope.LocalPtr).?.parent, .defer_normal, .defer_error => scope = scope.cast(Scope.Defer).?.parent, + .defer_gen => scope = scope.cast(Scope.DeferGen).?.parent, .namespace => break, .top => unreachable, } @@ -2129,6 +2152,7 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod const astgen = gz.astgen; const tree = astgen.tree; const node_tags = tree.nodes.items(.tag); + const node_data = tree.nodes.items(.data); if (statements.len == 0) return; @@ -2155,8 +2179,10 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod }, ); } - switch (node_tags[statement]) { - // zig fmt: off + var inner_node = statement; + while (true) { + switch (node_tags[inner_node]) { + // zig fmt: off .global_var_decl => scope = try varDecl(gz, scope, statement, block_arena_allocator, tree.globalVarDecl(statement)), .local_var_decl => scope = try varDecl(gz, scope, statement, block_arena_allocator, tree.localVarDecl(statement)), .simple_var_decl => scope = try varDecl(gz, scope, statement, block_arena_allocator, tree.simpleVarDecl(statement)), @@ -2181,9 +2207,23 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod .assign_add_wrap => try assignOp(gz, scope, statement, .addwrap), .assign_mul => try assignOp(gz, scope, statement, .mul), .assign_mul_wrap => try assignOp(gz, scope, statement, .mulwrap), + + .grouped_expression => { + inner_node = node_data[statement].lhs; + continue; + }, + + .while_simple => _ = try whileExpr(gz, scope, .discard, inner_node, tree.whileSimple(inner_node), true), + .while_cont => _ = try whileExpr(gz, scope, .discard, inner_node, tree.whileCont(inner_node), true), + .@"while" => _ = try whileExpr(gz, scope, .discard, inner_node, tree.whileFull(inner_node), true), + + .for_simple => _ = try forExpr(gz, scope, .discard, inner_node, tree.forSimple(inner_node), true), + .@"for" => _ = try forExpr(gz, scope, .discard, inner_node, tree.forFull(inner_node), true), - else => noreturn_src_node = try unusedResultExpr(gz, scope, statement), + else => noreturn_src_node = try unusedResultExpr(gz, scope, inner_node), // zig fmt: on + } + break; } } @@ -2206,7 +2246,13 @@ fn unusedResultDeferExpr(gz: *GenZir, defer_scope: *Scope.Defer, expr_scope: *Sc astgen.source_offset = defer_scope.source_offset; astgen.source_line = defer_scope.source_line; astgen.source_column = defer_scope.source_column; - _ = try unusedResultExpr(gz, expr_scope, expr_node); + + var defer_gen: Scope.DeferGen = .{ + .parent = expr_scope, + .defer_node = defer_scope.defer_node, + }; + + _ = try unusedResultExpr(gz, &defer_gen.base, expr_node); } /// Returns AST source node of the thing that is noreturn if the statement is @@ -2216,6 +2262,10 @@ fn unusedResultExpr(gz: *GenZir, scope: *Scope, statement: Ast.Node.Index) Inner // We need to emit an error if the result is not `noreturn` or `void`, but // we want to avoid adding the ZIR instruction if possible for performance. const maybe_unused_result = try expr(gz, scope, .none, statement); + return addEnsureResult(gz, maybe_unused_result, statement); +} + +fn addEnsureResult(gz: *GenZir, maybe_unused_result: Zir.Inst.Ref, statement: Ast.Node.Index) InnerError!Ast.Node.Index { var noreturn_src_node: Ast.Node.Index = 0; const elide_check = if (refToIndex(maybe_unused_result)) |inst| b: { // Note that this array becomes invalid after appending more items to it @@ -2553,6 +2603,7 @@ fn countDefers(astgen: *AstGen, outer_scope: *Scope, inner_scope: *Scope) struct .gen_zir => scope = scope.cast(GenZir).?.parent, .local_val => scope = scope.cast(Scope.LocalVal).?.parent, .local_ptr => scope = scope.cast(Scope.LocalPtr).?.parent, + .defer_gen => scope = scope.cast(Scope.DeferGen).?.parent, .defer_normal => { const defer_scope = scope.cast(Scope.Defer).?; scope = defer_scope.parent; @@ -2602,6 +2653,7 @@ fn genDefers( .gen_zir => scope = scope.cast(GenZir).?.parent, .local_val => scope = scope.cast(Scope.LocalVal).?.parent, .local_ptr => scope = scope.cast(Scope.LocalPtr).?.parent, + .defer_gen => scope = scope.cast(Scope.DeferGen).?.parent, .defer_normal => { const defer_scope = scope.cast(Scope.Defer).?; scope = defer_scope.parent; @@ -2644,6 +2696,7 @@ fn genDefers( break :blk &local_val_scope.base; }; try unusedResultDeferExpr(gz, defer_scope, sub_scope, expr_node); + try checkUsed(gz, scope, sub_scope); try gz.addDbgBlockEnd(); }, .normal_only => continue, @@ -2681,6 +2734,7 @@ fn checkUsed( scope = s.parent; }, .defer_normal, .defer_error => scope = scope.cast(Scope.Defer).?.parent, + .defer_gen => scope = scope.cast(Scope.DeferGen).?.parent, .namespace => unreachable, .top => unreachable, } @@ -4040,6 +4094,7 @@ fn testDecl( .local_val, .local_ptr => unreachable, // a test cannot be in a local scope .gen_zir => s = s.cast(GenZir).?.parent, .defer_normal, .defer_error => s = s.cast(Scope.Defer).?.parent, + .defer_gen => s = s.cast(Scope.DeferGen).?.parent, .namespace => { const ns = s.cast(Scope.Namespace).?; if (ns.decls.get(name_str_index)) |i| { @@ -5330,7 +5385,7 @@ fn ifExpr( const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_err_ptr else .is_non_err; break :c .{ .inst = err_union, - .bool_bit = try block_scope.addUnNode(tag, err_union, node), + .bool_bit = try block_scope.addUnNode(tag, err_union, if_full.ast.cond_expr), }; } else if (if_full.payload_token) |_| { const cond_rl: ResultLoc = if (payload_is_ref) .ref else .none; @@ -5338,7 +5393,7 @@ fn ifExpr( const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_null_ptr else .is_non_null; break :c .{ .inst = optional, - .bool_bit = try block_scope.addUnNode(tag, optional, node), + .bool_bit = try block_scope.addUnNode(tag, optional, if_full.ast.cond_expr), }; } else { const cond = try expr(&block_scope, &block_scope.base, bool_rl, if_full.ast.cond_expr); @@ -5369,7 +5424,7 @@ fn ifExpr( .err_union_payload_unsafe_ptr else .err_union_payload_unsafe; - const payload_inst = try then_scope.addUnNode(tag, cond.inst, node); + const payload_inst = try then_scope.addUnNode(tag, cond.inst, if_full.ast.then_expr); const token_name_index = payload_token + @boolToInt(payload_is_ref); const ident_name = try astgen.identAsString(token_name_index); const token_name_str = tree.tokenSlice(token_name_index); @@ -5398,7 +5453,7 @@ fn ifExpr( const ident_bytes = tree.tokenSlice(ident_token); if (mem.eql(u8, "_", ident_bytes)) break :s &then_scope.base; - const payload_inst = try then_scope.addUnNode(tag, cond.inst, node); + const payload_inst = try then_scope.addUnNode(tag, cond.inst, if_full.ast.then_expr); const ident_name = try astgen.identAsString(ident_token); try astgen.detectLocalShadowing(&then_scope.base, ident_name, ident_token, ident_bytes); payload_val_scope = .{ @@ -5441,7 +5496,7 @@ fn ifExpr( .err_union_code_ptr else .err_union_code; - const payload_inst = try else_scope.addUnNode(tag, cond.inst, node); + const payload_inst = try else_scope.addUnNode(tag, cond.inst, if_full.ast.cond_expr); const ident_name = try astgen.identAsString(error_token); const error_token_str = tree.tokenSlice(error_token); if (mem.eql(u8, "_", error_token_str)) @@ -5615,6 +5670,7 @@ fn whileExpr( rl: ResultLoc, node: Ast.Node.Index, while_full: Ast.full.While, + is_statement: bool, ) InnerError!Zir.Inst.Ref { const astgen = parent_gz.astgen; const tree = astgen.tree; @@ -5654,7 +5710,7 @@ fn whileExpr( const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_err_ptr else .is_non_err; break :c .{ .inst = err_union, - .bool_bit = try continue_scope.addUnNode(tag, err_union, node), + .bool_bit = try continue_scope.addUnNode(tag, err_union, while_full.ast.then_expr), }; } else if (while_full.payload_token) |_| { const cond_rl: ResultLoc = if (payload_is_ref) .ref else .none; @@ -5662,7 +5718,7 @@ fn whileExpr( const tag: Zir.Inst.Tag = if (payload_is_ref) .is_non_null_ptr else .is_non_null; break :c .{ .inst = optional, - .bool_bit = try continue_scope.addUnNode(tag, optional, node), + .bool_bit = try continue_scope.addUnNode(tag, optional, while_full.ast.then_expr), }; } else { const cond = try expr(&continue_scope, &continue_scope.base, bool_rl, while_full.ast.cond_expr); @@ -5700,7 +5756,7 @@ fn whileExpr( else .err_union_payload_unsafe; // will add this instruction to then_scope.instructions below - payload_inst = try then_scope.makeUnNode(tag, cond.inst, node); + payload_inst = try then_scope.makeUnNode(tag, cond.inst, while_full.ast.cond_expr); const ident_token = if (payload_is_ref) payload_token + 1 else payload_token; const ident_bytes = tree.tokenSlice(ident_token); if (mem.eql(u8, "_", ident_bytes)) @@ -5729,7 +5785,7 @@ fn whileExpr( else .optional_payload_unsafe; // will add this instruction to then_scope.instructions below - payload_inst = try then_scope.makeUnNode(tag, cond.inst, node); + payload_inst = try then_scope.makeUnNode(tag, cond.inst, while_full.ast.cond_expr); const ident_name = try astgen.identAsString(ident_token); const ident_bytes = tree.tokenSlice(ident_token); if (mem.eql(u8, "_", ident_bytes)) @@ -5785,6 +5841,8 @@ fn whileExpr( try then_scope.addDbgVar(.dbg_var_val, some, dbg_var_inst); } const then_result = try expr(&then_scope, then_sub_scope, loop_scope.break_result_loc, while_full.ast.then_expr); + _ = try addEnsureResult(&then_scope, then_result, while_full.ast.then_expr); + try checkUsed(parent_gz, &then_scope.base, then_sub_scope); try then_scope.addDbgBlockEnd(); @@ -5803,7 +5861,7 @@ fn whileExpr( .err_union_code_ptr else .err_union_code; - const else_payload_inst = try else_scope.addUnNode(tag, cond.inst, node); + const else_payload_inst = try else_scope.addUnNode(tag, cond.inst, while_full.ast.cond_expr); const ident_name = try astgen.identAsString(error_token); const ident_bytes = tree.tokenSlice(error_token); if (mem.eql(u8, ident_bytes, "_")) @@ -5827,7 +5885,11 @@ fn whileExpr( // control flow apply to outer loops; not this one. loop_scope.continue_block = 0; loop_scope.break_block = 0; - const e = try expr(&else_scope, sub_scope, loop_scope.break_result_loc, else_node); + const else_result = try expr(&else_scope, sub_scope, loop_scope.break_result_loc, else_node); + if (is_statement) { + _ = try addEnsureResult(&else_scope, else_result, else_node); + } + if (!else_scope.endsWithNoReturn()) { loop_scope.break_count += 1; } @@ -5835,7 +5897,7 @@ fn whileExpr( try else_scope.addDbgBlockEnd(); break :blk .{ .src = else_node, - .result = e, + .result = else_result, }; } else .{ .src = while_full.ast.then_expr, @@ -5848,7 +5910,7 @@ fn whileExpr( } } const break_tag: Zir.Inst.Tag = if (is_inline) .break_inline else .@"break"; - return finishThenElseBlock( + const result = try finishThenElseBlock( parent_gz, rl, node, @@ -5863,6 +5925,10 @@ fn whileExpr( cond_block, break_tag, ); + if (is_statement) { + _ = try parent_gz.addUnNode(.ensure_result_used, result, node); + } + return result; } fn forExpr( @@ -5871,6 +5937,7 @@ fn forExpr( rl: ResultLoc, node: Ast.Node.Index, for_full: Ast.full.While, + is_statement: bool, ) InnerError!Zir.Inst.Ref { const astgen = parent_gz.astgen; @@ -6014,6 +6081,8 @@ fn forExpr( }; const then_result = try expr(&then_scope, then_sub_scope, loop_scope.break_result_loc, for_full.ast.then_expr); + _ = try addEnsureResult(&then_scope, then_result, for_full.ast.then_expr); + try checkUsed(parent_gz, &then_scope.base, then_sub_scope); try then_scope.addDbgBlockEnd(); @@ -6031,6 +6100,10 @@ fn forExpr( loop_scope.continue_block = 0; loop_scope.break_block = 0; const else_result = try expr(&else_scope, sub_scope, loop_scope.break_result_loc, else_node); + if (is_statement) { + _ = try addEnsureResult(&else_scope, else_result, else_node); + } + if (!else_scope.endsWithNoReturn()) { loop_scope.break_count += 1; } @@ -6049,7 +6122,7 @@ fn forExpr( } } const break_tag: Zir.Inst.Tag = if (is_inline) .break_inline else .@"break"; - return finishThenElseBlock( + const result = try finishThenElseBlock( parent_gz, rl, node, @@ -6064,6 +6137,10 @@ fn forExpr( cond_block, break_tag, ); + if (is_statement) { + _ = try parent_gz.addUnNode(.ensure_result_used, result, node); + } + return result; } fn switchExpr( @@ -6730,6 +6807,7 @@ fn localVarRef( }, .gen_zir => s = s.cast(GenZir).?.parent, .defer_normal, .defer_error => s = s.cast(Scope.Defer).?.parent, + .defer_gen => s = s.cast(Scope.DeferGen).?.parent, .namespace => { const ns = s.cast(Scope.Namespace).?; if (ns.decls.get(name_str_index)) |i| { @@ -7351,6 +7429,7 @@ fn builtinCall( }, .gen_zir => s = s.cast(GenZir).?.parent, .defer_normal, .defer_error => s = s.cast(Scope.Defer).?.parent, + .defer_gen => s = s.cast(Scope.DeferGen).?.parent, .namespace => { const ns = s.cast(Scope.Namespace).?; if (ns.decls.get(decl_name)) |i| { @@ -7424,7 +7503,8 @@ fn builtinCall( const token_starts = tree.tokens.items(.start); const node_start = token_starts[tree.firstToken(node)]; astgen.advanceSourceCursor(node_start); - const result = try gz.addExtendedPayload(.builtin_src, Zir.Inst.LineColumn{ + const result = try gz.addExtendedPayload(.builtin_src, Zir.Inst.Src{ + .node = gz.nodeIndexToRelative(node), .line = astgen.source_line, .column = astgen.source_column, }); @@ -9807,6 +9887,7 @@ const Scope = struct { local_ptr, defer_normal, defer_error, + defer_gen, namespace, top, }; @@ -9904,6 +9985,13 @@ const Scope = struct { const base_tag: Scope.Tag = .top; base: Scope = Scope{ .tag = base_tag }, }; + + const DeferGen = struct { + const base_tag: Scope.Tag = .defer_gen; + base: Scope = Scope{ .tag = base_tag }, + parent: *Scope, + defer_node: Ast.Node.Index, + }; }; /// This is a temporary structure; references to it are valid only @@ -11414,6 +11502,7 @@ fn detectLocalShadowing( }, .gen_zir => s = s.cast(GenZir).?.parent, .defer_normal, .defer_error => s = s.cast(Scope.Defer).?.parent, + .defer_gen => s = s.cast(Scope.DeferGen).?.parent, .top => break, }; } diff --git a/src/Autodoc.zig b/src/Autodoc.zig index 2d163bf4f0..a98ad7aee0 100644 --- a/src/Autodoc.zig +++ b/src/Autodoc.zig @@ -468,7 +468,7 @@ const DocData = struct { child: Expr, }, ErrorUnion: struct { lhs: Expr, rhs: Expr }, - // ErrorUnion: struct { name: []const u8 }, + InferredErrorUnion: struct { payload: Expr }, ErrorSet: struct { name: []const u8, fields: ?[]const Field = null, @@ -580,8 +580,9 @@ const DocData = struct { enumLiteral: []const u8, // direct value alignOf: usize, // index in `exprs` typeOf: usize, // index in `exprs` + typeInfo: usize, // index in `exprs` typeOf_peer: []usize, - errorUnion: usize, // index in `exprs` + errorUnion: usize, // index in `types` as: As, sizeOf: usize, // index in `exprs` bitSizeOf: usize, // index in `exprs` @@ -1398,7 +1399,6 @@ fn walkInstruction( const extra = file.zir.extraData(Zir.Inst.PtrType, ptr.payload_index); var extra_index = extra.end; - const type_slot_index = self.types.items.len; const elem_type_ref = try self.walkRef( file, parent_scope, @@ -1445,6 +1445,7 @@ fn walkInstruction( host_size = ref_result.expr; } + const type_slot_index = self.types.items.len; try self.types.append(self.arena, .{ .Pointer = .{ .size = ptr.size, @@ -1788,7 +1789,7 @@ fn walkInstruction( return DocData.WalkResult{ .typeRef = operand.typeRef, - .expr = .{ .typeOf = operand_index }, + .expr = .{ .typeInfo = operand_index }, }; }, .as_node => { @@ -1928,7 +1929,7 @@ fn walkInstruction( .comptimeExpr = self.comptime_exprs.items.len, } }; try self.comptime_exprs.append(self.arena, .{ - .code = "if(banana) 1 else 0", + .code = "if (...) { ... }", }); return res; }, @@ -2118,6 +2119,7 @@ fn walkInstruction( inst_index, self_ast_node_index, type_slot_index, + tags[inst_index] == .func_inferred, ); return result; @@ -2993,7 +2995,7 @@ fn tryResolveRefPath( "TODO: handle `{s}`in tryResolveRefPath\nInfo: {}", .{ @tagName(resolved_parent), resolved_parent }, ); - path[i + 1] = (try self.cteTodo("match failure")).expr; + path[i + 1] = (try self.cteTodo("<match failure>")).expr; continue :outer; }, .comptimeExpr, .call, .typeOf => { @@ -3415,6 +3417,7 @@ fn analyzeFunction( inst_index: usize, self_ast_node_index: usize, type_slot_index: usize, + ret_is_inferred_error_set: bool, ) AutodocErrors!DocData.WalkResult { const tags = file.zir.instructions.items(.tag); const data = file.zir.instructions.items(.data); @@ -3521,13 +3524,23 @@ fn analyzeFunction( else => null, }; + const ret_type: DocData.Expr = blk: { + if (ret_is_inferred_error_set) { + const ret_type_slot_index = self.types.items.len; + try self.types.append(self.arena, .{ + .InferredErrorUnion = .{ .payload = ret_type_ref }, + }); + break :blk .{ .type = ret_type_slot_index }; + } else break :blk ret_type_ref; + }; + self.ast_nodes.items[self_ast_node_index].fields = param_ast_indexes.items; self.types.items[type_slot_index] = .{ .Fn = .{ .name = "todo_name func", .src = self_ast_node_index, .params = param_type_refs.items, - .ret = ret_type_ref, + .ret = ret_type, .generic_ret = generic_ret, }, }; diff --git a/src/Module.zig b/src/Module.zig index 4ac2775515..6122b417e4 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -2161,6 +2161,10 @@ pub const SrcLoc = struct { .local_var_decl => tree.localVarDecl(node), .simple_var_decl => tree.simpleVarDecl(node), .aligned_var_decl => tree.alignedVarDecl(node), + .@"usingnamespace" => { + const node_data = tree.nodes.items(.data); + return nodeToSpan(tree, node_data[node].lhs); + }, else => unreachable, }; if (full.ast.type_node != 0) { @@ -2171,6 +2175,58 @@ pub const SrcLoc = struct { const end = start + @intCast(u32, tree.tokenSlice(tok_index).len); return Span{ .start = start, .end = end, .main = start }; }, + .node_offset_var_decl_align => |node_off| { + const tree = try src_loc.file_scope.getTree(gpa); + const node = src_loc.declRelativeToNodeIndex(node_off); + const node_tags = tree.nodes.items(.tag); + const full: Ast.full.VarDecl = switch (node_tags[node]) { + .global_var_decl => tree.globalVarDecl(node), + .local_var_decl => tree.localVarDecl(node), + .simple_var_decl => tree.simpleVarDecl(node), + .aligned_var_decl => tree.alignedVarDecl(node), + else => unreachable, + }; + return nodeToSpan(tree, full.ast.align_node); + }, + .node_offset_var_decl_section => |node_off| { + const tree = try src_loc.file_scope.getTree(gpa); + const node = src_loc.declRelativeToNodeIndex(node_off); + const node_tags = tree.nodes.items(.tag); + const full: Ast.full.VarDecl = switch (node_tags[node]) { + .global_var_decl => tree.globalVarDecl(node), + .local_var_decl => tree.localVarDecl(node), + .simple_var_decl => tree.simpleVarDecl(node), + .aligned_var_decl => tree.alignedVarDecl(node), + else => unreachable, + }; + return nodeToSpan(tree, full.ast.section_node); + }, + .node_offset_var_decl_addrspace => |node_off| { + const tree = try src_loc.file_scope.getTree(gpa); + const node = src_loc.declRelativeToNodeIndex(node_off); + const node_tags = tree.nodes.items(.tag); + const full: Ast.full.VarDecl = switch (node_tags[node]) { + .global_var_decl => tree.globalVarDecl(node), + .local_var_decl => tree.localVarDecl(node), + .simple_var_decl => tree.simpleVarDecl(node), + .aligned_var_decl => tree.alignedVarDecl(node), + else => unreachable, + }; + return nodeToSpan(tree, full.ast.addrspace_node); + }, + .node_offset_var_decl_init => |node_off| { + const tree = try src_loc.file_scope.getTree(gpa); + const node = src_loc.declRelativeToNodeIndex(node_off); + const node_tags = tree.nodes.items(.tag); + const full: Ast.full.VarDecl = switch (node_tags[node]) { + .global_var_decl => tree.globalVarDecl(node), + .local_var_decl => tree.localVarDecl(node), + .simple_var_decl => tree.simpleVarDecl(node), + .aligned_var_decl => tree.alignedVarDecl(node), + else => unreachable, + }; + return nodeToSpan(tree, full.ast.init_node); + }, .node_offset_builtin_call_arg0 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 0), .node_offset_builtin_call_arg1 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 1), .node_offset_builtin_call_arg2 => |n| return src_loc.byteOffsetBuiltinCallArg(gpa, n, 2), @@ -2857,6 +2913,18 @@ pub const LazySrcLoc = union(enum) { /// to the type expression. /// The Decl is determined contextually. node_offset_var_decl_ty: i32, + /// The source location points to the alignment expression of a var decl. + /// The Decl is determined contextually. + node_offset_var_decl_align: i32, + /// The source location points to the linksection expression of a var decl. + /// The Decl is determined contextually. + node_offset_var_decl_section: i32, + /// The source location points to the addrspace expression of a var decl. + /// The Decl is determined contextually. + node_offset_var_decl_addrspace: i32, + /// The source location points to the initializer of a var decl. + /// The Decl is determined contextually. + node_offset_var_decl_init: i32, /// The source location points to a for loop condition expression, /// found by taking this AST node index offset from the containing /// Decl AST node, which points to a for loop AST node. Next, navigate @@ -3098,6 +3166,10 @@ pub const LazySrcLoc = union(enum) { .node_offset, .node_offset_initializer, .node_offset_var_decl_ty, + .node_offset_var_decl_align, + .node_offset_var_decl_section, + .node_offset_var_decl_addrspace, + .node_offset_var_decl_init, .node_offset_for_cond, .node_offset_builtin_call_arg0, .node_offset_builtin_call_arg1, @@ -4414,17 +4486,26 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { const body = zir.extra[extra.end..][0..extra.data.body_len]; const result_ref = (try sema.analyzeBodyBreak(&block_scope, body)).?.operand; try wip_captures.finalize(); - const src = LazySrcLoc.nodeOffset(0); - const decl_tv = try sema.resolveInstValue(&block_scope, .unneeded, result_ref, undefined); + const align_src: LazySrcLoc = .{ .node_offset_var_decl_align = 0 }; + const section_src: LazySrcLoc = .{ .node_offset_var_decl_section = 0 }; + const address_space_src: LazySrcLoc = .{ .node_offset_var_decl_addrspace = 0 }; + const ty_src: LazySrcLoc = .{ .node_offset_var_decl_ty = 0 }; + const init_src: LazySrcLoc = .{ .node_offset_var_decl_init = 0 }; + const decl_tv = try sema.resolveInstValue(&block_scope, init_src, result_ref, undefined); const decl_align: u32 = blk: { const align_ref = decl.zirAlignRef(); if (align_ref == .none) break :blk 0; - break :blk try sema.resolveAlign(&block_scope, src, align_ref); + break :blk try sema.resolveAlign(&block_scope, align_src, align_ref); }; const decl_linksection: ?[*:0]const u8 = blk: { const linksection_ref = decl.zirLinksectionRef(); if (linksection_ref == .none) break :blk null; - const bytes = try sema.resolveConstString(&block_scope, src, linksection_ref, "linksection must be comptime known"); + const bytes = try sema.resolveConstString(&block_scope, section_src, linksection_ref, "linksection must be comptime known"); + if (mem.indexOfScalar(u8, bytes, 0) != null) { + return sema.fail(&block_scope, section_src, "linksection cannot contain null bytes", .{}); + } else if (bytes.len == 0) { + return sema.fail(&block_scope, section_src, "linksection cannot be empty", .{}); + } break :blk (try decl_arena_allocator.dupeZ(u8, bytes)).ptr; }; const target = sema.mod.getTarget(); @@ -4442,27 +4523,27 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { .constant => target_util.defaultAddressSpace(target, .global_constant), else => unreachable, }, - else => |addrspace_ref| try sema.analyzeAddrspace(&block_scope, src, addrspace_ref, addrspace_ctx), + else => |addrspace_ref| try sema.analyzeAddrspace(&block_scope, address_space_src, addrspace_ref, addrspace_ctx), }; }; // Note this resolves the type of the Decl, not the value; if this Decl // is a struct, for example, this resolves `type` (which needs no resolution), // not the struct itself. - try sema.resolveTypeLayout(&block_scope, src, decl_tv.ty); + try sema.resolveTypeLayout(&block_scope, ty_src, decl_tv.ty); const decl_arena_state = try decl_arena_allocator.create(std.heap.ArenaAllocator.State); if (decl.is_usingnamespace) { if (!decl_tv.ty.eql(Type.type, mod)) { - return sema.fail(&block_scope, src, "expected type, found {}", .{ + return sema.fail(&block_scope, ty_src, "expected type, found {}", .{ decl_tv.ty.fmt(mod), }); } var buffer: Value.ToTypeBuffer = undefined; const ty = try decl_tv.val.toType(&buffer).copy(decl_arena_allocator); if (ty.getNamespace() == null) { - return sema.fail(&block_scope, src, "type {} has no namespace", .{ty.fmt(mod)}); + return sema.fail(&block_scope, ty_src, "type {} has no namespace", .{ty.fmt(mod)}); } decl.ty = Type.type; @@ -4508,7 +4589,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { decl.analysis = .complete; decl.generation = mod.generation; - const has_runtime_bits = try sema.fnHasRuntimeBits(&block_scope, src, decl.ty); + const has_runtime_bits = try sema.fnHasRuntimeBits(&block_scope, ty_src, decl.ty); if (has_runtime_bits) { // We don't fully codegen the decl until later, but we do need to reserve a global @@ -4525,7 +4606,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { const is_inline = decl.ty.fnCallingConvention() == .Inline; if (decl.is_exported) { - const export_src = src; // TODO make this point at `export` token + const export_src: LazySrcLoc = .{ .token_offset = @boolToInt(decl.is_pub) }; if (is_inline) { return sema.fail(&block_scope, export_src, "export of inline function", .{}); } @@ -4588,14 +4669,14 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { decl.generation = mod.generation; const has_runtime_bits = is_extern or - (queue_linker_work and try sema.typeHasRuntimeBits(&block_scope, src, decl.ty)); + (queue_linker_work and try sema.typeHasRuntimeBits(&block_scope, ty_src, decl.ty)); if (has_runtime_bits) { log.debug("queue linker work for {*} ({s})", .{ decl, decl.name }); // Needed for codegen_decl which will call updateDecl and then the // codegen backend wants full access to the Decl Type. - try sema.resolveTypeFully(&block_scope, src, decl.ty); + try sema.resolveTypeFully(&block_scope, ty_src, decl.ty); try mod.comp.bin_file.allocateDeclIndexes(decl_index); try mod.comp.work_queue.writeItem(.{ .codegen_decl = decl_index }); @@ -4606,7 +4687,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { } if (decl.is_exported) { - const export_src = src; // TODO point to the export token + const export_src: LazySrcLoc = .{ .token_offset = @boolToInt(decl.is_pub) }; // The scope needs to have the decl in it. const options: std.builtin.ExportOptions = .{ .name = mem.sliceTo(decl.name, 0) }; try sema.analyzeExport(&block_scope, export_src, options, decl_index); diff --git a/src/Sema.zig b/src/Sema.zig index a0829d6eb7..2721ed5179 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -74,6 +74,8 @@ types_to_resolve: std.ArrayListUnmanaged(Air.Inst.Ref) = .{}, /// Sema must convert comptime control flow to runtime control flow, which means /// breaking from a block. post_hoc_blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, *LabeledBlock) = .{}, +/// Populated with the last compile error created. +err: ?*Module.ErrorMsg = null, const std = @import("std"); const mem = std.mem; @@ -174,7 +176,6 @@ pub const Block = struct { pub const Inlining = struct { comptime_result: Air.Inst.Ref, merges: Merges, - err: ?*Module.ErrorMsg = null, }; pub const Merges = struct { @@ -1159,7 +1160,7 @@ fn analyzeBodyInner( try sema.errNote(block, runtime_src, msg, "runtime control flow here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } i += 1; @@ -1738,7 +1739,7 @@ fn failWithNeededComptime(sema: *Sema, block: *Block, src: LazySrcLoc, reason: [ try sema.errNote(block, src, msg, "{s}", .{reason}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn failWithUseOfUndef(sema: *Sema, block: *Block, src: LazySrcLoc) CompileError { @@ -1770,7 +1771,7 @@ fn failWithArrayInitNotSupported(sema: *Sema, block: *Block, src: LazySrcLoc, ty } break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn failWithStructInitNotSupported(sema: *Sema, block: *Block, src: LazySrcLoc, ty: Type) CompileError { @@ -1801,7 +1802,7 @@ fn failWithIntegerOverflow(sema: *Sema, block: *Block, src: LazySrcLoc, int_ty: try sema.errNote(block, src, msg, "when computing vector element at index '{d}'", .{vector_index}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return sema.fail(block, src, "overflow of integer type '{}' with value '{}'", .{ int_ty.fmt(sema.mod), val.fmtValue(int_ty, sema.mod), @@ -1823,7 +1824,7 @@ fn failWithInvalidComptimeFieldStore(sema: *Sema, block: *Block, init_src: LazyS try sema.errNote(block, default_value_src, msg, "default value set here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } /// We don't return a pointer to the new error note because the pointer @@ -1878,10 +1879,10 @@ pub fn fail( args: anytype, ) CompileError { const err_msg = try sema.errMsg(block, src, format, args); - return sema.failWithOwnedErrorMsg(block, err_msg); + return sema.failWithOwnedErrorMsg(err_msg); } -fn failWithOwnedErrorMsg(sema: *Sema, block: *Block, err_msg: *Module.ErrorMsg) CompileError { +fn failWithOwnedErrorMsg(sema: *Sema, err_msg: *Module.ErrorMsg) CompileError { @setCold(true); if (crash_report.is_enabled and sema.mod.comp.debug_compile_errors) { @@ -1894,7 +1895,7 @@ fn failWithOwnedErrorMsg(sema: *Sema, block: *Block, err_msg: *Module.ErrorMsg) } const mod = sema.mod; - if (block.inlining) |some| some.err = err_msg; + sema.err = err_msg; { errdefer err_msg.destroy(mod.gpa); @@ -2591,7 +2592,7 @@ fn zirEnumDecl( try sema.errNote(block, other_tag_src, msg, "other field here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (has_tag_value) { @@ -2886,7 +2887,7 @@ fn ensureResultUsed( try sema.errNote(block, src, msg, "consider using `try`, `catch`, or `if`", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, else => { const msg = msg: { @@ -2896,7 +2897,7 @@ fn ensureResultUsed( try sema.errNote(block, src, msg, "this error can be suppressed by assigning the value to '_'", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, } } @@ -2917,7 +2918,7 @@ fn zirEnsureResultNonError(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Com try sema.errNote(block, src, msg, "consider using `try`, `catch`, or `if`", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, else => return, } @@ -2957,7 +2958,7 @@ fn zirIndexablePtrLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileE ); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return sema.fieldVal(block, src, object, "len", src); @@ -2971,7 +2972,7 @@ fn zirAllocExtended( const extra = sema.code.extraData(Zir.Inst.AllocExtended, extended.operand); const src = LazySrcLoc.nodeOffset(extra.data.src_node); const ty_src: LazySrcLoc = .{ .node_offset_var_decl_ty = extra.data.src_node }; - const align_src = src; // TODO better source location + const align_src: LazySrcLoc = .{ .node_offset_var_decl_align = extra.data.src_node }; const small = @bitCast(Zir.Inst.AllocExtended.Small, extended.small); var extra_index: usize = extra.end; @@ -3615,7 +3616,7 @@ fn validateUnionInit( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if ((is_comptime or block.is_comptime) and @@ -3747,7 +3748,7 @@ fn validateStructInit( try sema.errNote(block, other_field_src, msg, "other field here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } found_fields[field_index] = field_ptr; } @@ -3808,7 +3809,7 @@ fn validateStructInit( .{fqn}, ); } - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return; @@ -3938,7 +3939,7 @@ fn validateStructInit( .{fqn}, ); } - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (struct_is_comptime) { @@ -4000,7 +4001,7 @@ fn zirValidateArrayInit( } if (root_msg) |msg| { - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -4180,7 +4181,7 @@ fn zirValidateDeref(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErr try sema.explainWhyTypeIsComptime(block, src, msg, src.toSrcLoc(src_decl), elem_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -4206,7 +4207,7 @@ fn failWithBadMemberAccess( try sema.addDeclaredHereNote(msg, agg_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn failWithBadStructFieldAccess( @@ -4232,7 +4233,7 @@ fn failWithBadStructFieldAccess( try sema.mod.errNoteNonLazy(struct_obj.srcLoc(sema.mod), msg, "struct declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn failWithBadUnionFieldAccess( @@ -4258,7 +4259,7 @@ fn failWithBadUnionFieldAccess( try sema.mod.errNoteNonLazy(union_obj.srcLoc(sema.mod), msg, "union declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn addDeclaredHereNote(sema: *Sema, parent: *Module.ErrorMsg, decl_ty: Type) !void { @@ -4747,7 +4748,7 @@ fn zirCImport(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileEr @import("clang.zig").Stage2ErrorMsg.delete(c_import_res.errors.ptr, c_import_res.errors.len); break :msg msg; }; - return sema.failWithOwnedErrorMsg(parent_block, msg); + return sema.failWithOwnedErrorMsg(msg); } const c_import_pkg = Package.create( sema.gpa, @@ -4921,7 +4922,7 @@ fn analyzeBlockBody( break :msg msg; }; - return sema.failWithOwnedErrorMsg(child_block, msg); + return sema.failWithOwnedErrorMsg(msg); } const ty_inst = try sema.addType(resolved_ty); try sema.air_extra.ensureUnusedCapacity(gpa, @typeInfo(Air.Block).Struct.fields.len + @@ -5049,18 +5050,18 @@ pub fn analyzeExport( try mod.ensureDeclAnalyzed(exported_decl_index); const exported_decl = mod.declPtr(exported_decl_index); - if (!(try sema.validateExternType(exported_decl.ty, .other))) { + if (!sema.validateExternType(exported_decl.ty, .other)) { const msg = msg: { const msg = try sema.errMsg(block, src, "unable to export type '{}'", .{exported_decl.ty.fmt(sema.mod)}); errdefer msg.destroy(sema.gpa); const src_decl = sema.mod.declPtr(block.src_decl); - try sema.explainWhyTypeIsNotExtern(block, src, msg, src.toSrcLoc(src_decl), exported_decl.ty, .other); + try sema.explainWhyTypeIsNotExtern(msg, src.toSrcLoc(src_decl), exported_decl.ty, .other); try sema.addDeclaredHereNote(msg, exported_decl.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const gpa = mod.gpa; @@ -5150,7 +5151,7 @@ fn zirSetAlignStack(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.Inst try sema.errNote(block, gop.value_ptr.src, msg, "other instance here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } gop.value_ptr.* = .{ .alignment = alignment, .src = src }; } @@ -5311,7 +5312,14 @@ fn zirDeclRef(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air const src = inst_data.src(); const decl_name = inst_data.get(sema.code); const decl_index = try sema.lookupIdentifier(block, src, decl_name); - return sema.analyzeDeclRef(decl_index); + return sema.analyzeDeclRef(decl_index) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.errNote(block, src, msg, "referenced here", .{}); + return err; + }, + else => return err, + }; } fn zirDeclVal(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { @@ -5413,7 +5421,7 @@ fn lookupInNamespace( } break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, } } else if (namespace.decls.getKeyAdapted(ident_name, Module.DeclAdapter{ .mod = mod })) |decl_index| { @@ -5872,9 +5880,8 @@ fn analyzeCall( sema.analyzeBody(&child_block, fn_info.body) catch |err| switch (err) { error.ComptimeReturn => break :result inlining.comptime_result, error.AnalysisFail => { - const err_msg = inlining.err orelse return err; + const err_msg = sema.err orelse return err; try sema.errNote(block, call_src, err_msg, "called from here", .{}); - if (block.inlining) |some| some.err = err_msg; return err; }, else => |e| return e, @@ -6802,7 +6809,7 @@ fn zirMergeErrorSets(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileEr try sema.errNote(block, src, msg, "'||' merges error sets; 'or' performs boolean OR", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const lhs_ty = try sema.analyzeAsType(block, lhs_src, lhs); const rhs_ty = try sema.analyzeAsType(block, rhs_src, rhs); @@ -6927,7 +6934,7 @@ fn zirIntToEnum(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A try sema.addDeclaredHereNote(msg, dest_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return sema.addConstant(dest_ty, int_val); } @@ -7632,9 +7639,9 @@ fn funcCommon( try sema.addDeclaredHereNote(msg, bare_return_type); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } - if (!Type.fnCallingConventionAllowsZigTypes(cc_workaround) and !(try sema.validateExternType(return_type, .ret_ty))) { + if (!Type.fnCallingConventionAllowsZigTypes(cc_workaround) and !sema.validateExternType(return_type, .ret_ty)) { const msg = msg: { const msg = try sema.errMsg(block, ret_ty_src, "return type '{}' not allowed in function with calling convention '{s}'", .{ return_type.fmt(sema.mod), @tagName(cc_workaround), @@ -7642,12 +7649,12 @@ fn funcCommon( errdefer msg.destroy(sema.gpa); const src_decl = sema.mod.declPtr(block.src_decl); - try sema.explainWhyTypeIsNotExtern(block, ret_ty_src, msg, ret_ty_src.toSrcLoc(src_decl), return_type, .ret_ty); + try sema.explainWhyTypeIsNotExtern(msg, ret_ty_src.toSrcLoc(src_decl), return_type, .ret_ty); try sema.addDeclaredHereNote(msg, return_type); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const arch = sema.mod.getTarget().cpu.arch; @@ -7812,7 +7819,7 @@ fn analyzeParameter( try sema.errNote(block, param_src, msg, "function is generic because of this parameter", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (this_generic and !Type.fnCallingConventionAllowsZigTypes(cc)) { return sema.fail(block, param_src, "generic parameters not allowed in function with calling convention '{s}'", .{@tagName(cc)}); @@ -7828,9 +7835,9 @@ fn analyzeParameter( try sema.addDeclaredHereNote(msg, param.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } - if (!Type.fnCallingConventionAllowsZigTypes(cc) and !(try sema.validateExternType(param.ty, .param_ty))) { + if (!Type.fnCallingConventionAllowsZigTypes(cc) and !sema.validateExternType(param.ty, .param_ty)) { const msg = msg: { const msg = try sema.errMsg(block, param_src, "parameter of type '{}' not allowed in function with calling convention '{s}'", .{ param.ty.fmt(sema.mod), @tagName(cc), @@ -7838,12 +7845,12 @@ fn analyzeParameter( errdefer msg.destroy(sema.gpa); const src_decl = sema.mod.declPtr(block.src_decl); - try sema.explainWhyTypeIsNotExtern(block, param_src, msg, param_src.toSrcLoc(src_decl), param.ty, .param_ty); + try sema.explainWhyTypeIsNotExtern(msg, param_src.toSrcLoc(src_decl), param.ty, .param_ty); try sema.addDeclaredHereNote(msg, param.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (requires_comptime and !param.is_comptime) { const msg = msg: { @@ -7855,7 +7862,7 @@ fn analyzeParameter( try sema.addDeclaredHereNote(msg, param.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -8046,7 +8053,7 @@ fn zirPtrToInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Ai if (try sema.resolveMaybeUndefValIntable(block, ptr_src, ptr)) |ptr_val| { return sema.addConstant(Type.usize, ptr_val); } - try sema.requireRuntimeBlock(block, ptr_src, ptr_src); + try sema.requireRuntimeBlock(block, inst_data.src(), ptr_src); return block.addUnOp(.ptrtoint, ptr); } @@ -8288,6 +8295,7 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air const dest_ty = try sema.resolveType(block, dest_ty_src, extra.lhs); const operand = try sema.resolveInst(extra.rhs); + const operand_ty = sema.typeOf(operand); switch (dest_ty.zigTypeTag()) { .AnyFrame, .ComptimeFloat, @@ -8310,19 +8318,30 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air const msg = msg: { const msg = try sema.errMsg(block, dest_ty_src, "cannot @bitCast to '{}'", .{dest_ty.fmt(sema.mod)}); errdefer msg.destroy(sema.gpa); - switch (sema.typeOf(operand).zigTypeTag()) { - .Int, .ComptimeInt => try sema.errNote(block, dest_ty_src, msg, "use @intToEnum for type coercion", .{}), + switch (operand_ty.zigTypeTag()) { + .Int, .ComptimeInt => try sema.errNote(block, dest_ty_src, msg, "use @intToEnum to cast from '{}'", .{operand_ty.fmt(sema.mod)}), else => {}, } break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, - .Pointer => return sema.fail(block, dest_ty_src, "cannot @bitCast to '{}', use @ptrCast to cast to a pointer", .{ - dest_ty.fmt(sema.mod), - }), + .Pointer => { + const msg = msg: { + const msg = try sema.errMsg(block, dest_ty_src, "cannot @bitCast to '{}'", .{dest_ty.fmt(sema.mod)}); + errdefer msg.destroy(sema.gpa); + switch (operand_ty.zigTypeTag()) { + .Int, .ComptimeInt => try sema.errNote(block, dest_ty_src, msg, "use @intToPtr to cast from '{}'", .{operand_ty.fmt(sema.mod)}), + .Pointer => try sema.errNote(block, dest_ty_src, msg, "use @ptrCast to cast from '{}'", .{operand_ty.fmt(sema.mod)}), + else => {}, + } + + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(msg); + }, .Struct, .Union => if (dest_ty.containerLayout() == .Auto) { const container = switch (dest_ty.zigTypeTag()) { .Struct => "struct", @@ -8342,6 +8361,70 @@ fn zirBitcast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air .Vector, => {}, } + switch (operand_ty.zigTypeTag()) { + .AnyFrame, + .ComptimeFloat, + .ComptimeInt, + .EnumLiteral, + .ErrorSet, + .ErrorUnion, + .Fn, + .Frame, + .NoReturn, + .Null, + .Opaque, + .Optional, + .Type, + .Undefined, + .Void, + => return sema.fail(block, operand_src, "cannot @bitCast from '{}'", .{operand_ty.fmt(sema.mod)}), + + .Enum => { + const msg = msg: { + const msg = try sema.errMsg(block, operand_src, "cannot @bitCast from '{}'", .{operand_ty.fmt(sema.mod)}); + errdefer msg.destroy(sema.gpa); + switch (dest_ty.zigTypeTag()) { + .Int, .ComptimeInt => try sema.errNote(block, operand_src, msg, "use @enumToInt to cast to '{}'", .{dest_ty.fmt(sema.mod)}), + else => {}, + } + + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(msg); + }, + .Pointer => { + const msg = msg: { + const msg = try sema.errMsg(block, operand_src, "cannot @bitCast from '{}'", .{operand_ty.fmt(sema.mod)}); + errdefer msg.destroy(sema.gpa); + switch (dest_ty.zigTypeTag()) { + .Int, .ComptimeInt => try sema.errNote(block, operand_src, msg, "use @ptrToInt to cast to '{}'", .{dest_ty.fmt(sema.mod)}), + .Pointer => try sema.errNote(block, operand_src, msg, "use @ptrCast to cast to '{}'", .{dest_ty.fmt(sema.mod)}), + else => {}, + } + + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(msg); + }, + .Struct, .Union => if (operand_ty.containerLayout() == .Auto) { + const container = switch (operand_ty.zigTypeTag()) { + .Struct => "struct", + .Union => "union", + else => unreachable, + }; + return sema.fail(block, operand_src, "cannot @bitCast from '{}', {s} does not have a guaranteed in-memory layout", .{ + operand_ty.fmt(sema.mod), container, + }); + }, + .BoundFn => @panic("TODO remove this type from the language and compiler"), + + .Array, + .Bool, + .Float, + .Int, + .Vector, + => {}, + } return sema.bitCast(block, dest_ty, operand, operand_src); } @@ -8587,7 +8670,7 @@ fn zirSwitchCapture( try sema.errNote(block, item_src, msg, "type '{}' here", .{field.ty.fmt(sema.mod)}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -8710,7 +8793,7 @@ fn zirSwitchCond( } break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; return sema.unionToTag(block, enum_ty, operand, src); }, @@ -8799,7 +8882,7 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError ); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const target = sema.mod.getTarget(); @@ -8903,7 +8986,7 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError ); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } else if (special_prong == .none and operand_ty.isNonexhaustiveEnum() and !union_originally) { return sema.fail( block, @@ -9003,7 +9086,7 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError if (maybe_msg) |msg| { maybe_msg = null; try sema.addDeclaredHereNote(msg, operand_ty); - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (special_prong == .@"else" and seen_errors.count() == operand_ty.errorSetNames().len) { @@ -9812,7 +9895,7 @@ fn validateSwitchDupe( ); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn validateSwitchItemBool( @@ -9882,7 +9965,7 @@ fn validateSwitchNoRange( ); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn zirHasField(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { @@ -12806,7 +12889,7 @@ fn analyzeCmpUnionTag( try sema.mod.errNoteNonLazy(union_ty.declSrcLoc(sema.mod), msg, "union '{}' is not a tagged union", .{union_ty.fmt(sema.mod)}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; // Coerce both the union and the tag to the union's tag type, and then execute the // enum comparison codepath. @@ -13067,6 +13150,36 @@ fn zirClosureGet( scope = scope.parent.?; } else unreachable; + if (tv.val.tag() == .generic_poison and !block.is_typeof and !block.is_comptime and sema.func != null) { + const msg = msg: { + const name = name: { + const file = sema.owner_decl.getFileScope(); + const tree = file.getTree(sema.mod.gpa) catch |err| { + // In this case we emit a warning + a less precise source location. + log.warn("unable to load {s}: {s}", .{ + file.sub_file_path, @errorName(err), + }); + break :name null; + }; + const node = sema.owner_decl.relativeToNodeIndex(inst_data.src_node); + const token = tree.nodes.items(.main_token)[node]; + break :name tree.tokenSlice(token); + }; + + const msg = if (name) |some| + try sema.errMsg(block, inst_data.src(), "'{s}' not accessible from inner function", .{some}) + else + try sema.errMsg(block, inst_data.src(), "variable not accessible from inner function", .{}); + errdefer msg.destroy(sema.gpa); + + try sema.errNote(block, LazySrcLoc.nodeOffset(0), msg, "crossed function definition here", .{}); + + // TODO add "declared here" note + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(msg); + } + return sema.addConstant(tv.ty, tv.val); } @@ -13098,8 +13211,8 @@ fn zirBuiltinSrc( const tracy = trace(@src()); defer tracy.end(); - const src = sema.src; // TODO better source location - const extra = sema.code.extraData(Zir.Inst.LineColumn, extended.operand).data; + const extra = sema.code.extraData(Zir.Inst.Src, extended.operand).data; + const src = LazySrcLoc.nodeOffset(extra.node); const func = sema.func orelse return sema.fail(block, src, "@src outside function", .{}); const fn_owner_decl = sema.mod.declPtr(func.owner_decl); @@ -14790,18 +14903,18 @@ fn zirPtrType(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air } else if (inst_data.size == .Many and elem_ty.zigTypeTag() == .Opaque) { return sema.fail(block, elem_ty_src, "unknown-length pointer to opaque not allowed", .{}); } else if (inst_data.size == .C) { - if (!(try sema.validateExternType(elem_ty, .other))) { + if (!sema.validateExternType(elem_ty, .other)) { const msg = msg: { const msg = try sema.errMsg(block, elem_ty_src, "C pointers cannot point to non-C-ABI-compatible type '{}'", .{elem_ty.fmt(sema.mod)}); errdefer msg.destroy(sema.gpa); const src_decl = sema.mod.declPtr(block.src_decl); - try sema.explainWhyTypeIsNotExtern(block, elem_ty_src, msg, elem_ty_src.toSrcLoc(src_decl), elem_ty, .other); + try sema.explainWhyTypeIsNotExtern(msg, elem_ty_src.toSrcLoc(src_decl), elem_ty, .other); try sema.addDeclaredHereNote(msg, elem_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (elem_ty.zigTypeTag() == .Opaque) { return sema.fail(block, elem_ty_src, "C pointers cannot point to opaque types", .{}); @@ -14972,7 +15085,7 @@ fn zirStructInit( try sema.errNote(block, other_field_src, msg, "other field here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } found_fields[field_index] = item.data.field_type; field_inits[field_index] = try sema.resolveInst(item.data.init); @@ -15113,7 +15226,7 @@ fn finishStructInit( .{fqn}, ); } - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const is_comptime = for (field_inits) |field_init| { @@ -15188,7 +15301,7 @@ fn zirStructInitAnon( try sema.errNote(block, prev_source, msg, "other field here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } gop.value_ptr.* = @intCast(u32, i); @@ -15204,7 +15317,7 @@ fn zirStructInitAnon( try sema.addDeclaredHereNote(msg, types[i]); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const init_src = src; // TODO better source location if (try sema.resolveMaybeUndefVal(block, init_src, init)) |init_val| { @@ -15402,7 +15515,7 @@ fn zirArrayInitAnon( try sema.addDeclaredHereNote(msg, types[i]); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (try sema.resolveMaybeUndefVal(block, operand_src, elem)) |val| { values[i] = val; @@ -15716,7 +15829,7 @@ fn zirTagName(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air try sema.addDeclaredHereNote(msg, operand_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, else => return sema.fail(block, operand_src, "expected enum or union; found '{}'", .{ operand_ty.fmt(mod), @@ -15735,7 +15848,7 @@ fn zirTagName(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air try mod.errNoteNonLazy(enum_decl.srcLoc(), msg, "declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; const field_name = enum_ty.enumFieldName(field_index); return sema.addStrLit(block, field_name); @@ -15874,18 +15987,18 @@ fn zirReify(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I } else if (ptr_size == .Many and elem_ty.zigTypeTag() == .Opaque) { return sema.fail(block, src, "unknown-length pointer to opaque not allowed", .{}); } else if (ptr_size == .C) { - if (!(try sema.validateExternType(elem_ty, .other))) { + if (!sema.validateExternType(elem_ty, .other)) { const msg = msg: { const msg = try sema.errMsg(block, src, "C pointers cannot point to non-C-ABI-compatible type '{}'", .{elem_ty.fmt(sema.mod)}); errdefer msg.destroy(sema.gpa); const src_decl = sema.mod.declPtr(block.src_decl); - try sema.explainWhyTypeIsNotExtern(block, src, msg, src.toSrcLoc(src_decl), elem_ty, .other); + try sema.explainWhyTypeIsNotExtern(msg, src.toSrcLoc(src_decl), elem_ty, .other); try sema.addDeclaredHereNote(msg, elem_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (elem_ty.zigTypeTag() == .Opaque) { return sema.fail(block, src, "C pointers cannot point to opaque types", .{}); @@ -16288,7 +16401,7 @@ fn zirReify(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I try sema.addDeclaredHereNote(msg, union_obj.tag_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -16322,7 +16435,7 @@ fn zirReify(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I try sema.addDeclaredHereNote(msg, union_obj.tag_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -16353,10 +16466,24 @@ fn zirReify(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.I return sema.fail(block, src, "varargs functions must have C calling convention", .{}); } - const alignment = @intCast(u29, alignment_val.toUnsignedInt(target)); // TODO: Validate this value. + const alignment = alignment: { + if (!try sema.intFitsInType(block, src, alignment_val, Type.u32, null)) { + return sema.fail(block, src, "alignment must fit in 'u32'", .{}); + } + const alignment = @intCast(u29, alignment_val.toUnsignedInt(target)); + if (alignment == target_util.defaultFunctionAlignment(target)) { + break :alignment 0; + } else { + break :alignment alignment; + } + }; var buf: Value.ToTypeBuffer = undefined; - const args: []Value = if (args_val.castTag(.aggregate)) |some| some.data else &.{}; + const args_slice_val = args_val.castTag(.slice).?.data; + const args_decl_index = args_slice_val.ptr.pointerDecl().?; + try sema.ensureDeclAnalyzed(args_decl_index); + const args_decl = mod.declPtr(args_decl_index); + const args: []Value = if (args_decl.val.castTag(.aggregate)) |some| some.data else &.{}; var param_types = try sema.arena.alloc(Type, args.len); var comptime_params = try sema.arena.alloc(bool, args.len); var noalias_bits: u32 = 0; @@ -16778,7 +16905,7 @@ fn zirErrSetCast(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstDat try sema.addDeclaredHereNote(msg, dest_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (maybe_operand_val) |val| { @@ -16796,7 +16923,7 @@ fn zirErrSetCast(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstDat try sema.addDeclaredHereNote(msg, dest_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -16934,7 +17061,7 @@ fn zirTruncate(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Ai }); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -17225,7 +17352,7 @@ fn bitOffsetOf(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!u6 try sema.addDeclaredHereNote(msg, ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, } @@ -17329,7 +17456,7 @@ fn checkPtrOperand( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, .Optional => if (ty.isPtrLikeOptional()) return, else => {}, @@ -17359,7 +17486,7 @@ fn checkPtrType( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, .Optional => if (ty.isPtrLikeOptional()) return, else => {}, @@ -17495,7 +17622,7 @@ fn checkComptimeVarStore( try sema.errNote(block, cond_src, msg, "runtime condition here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (block.runtime_loop) |loop_src| { const msg = msg: { @@ -17504,7 +17631,7 @@ fn checkComptimeVarStore( try sema.errNote(block, loop_src, msg, "non-inline loop here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } unreachable; } @@ -17641,7 +17768,7 @@ fn checkVectorizableBinaryOperands( try sema.errNote(block, rhs_src, msg, "length {d} here", .{rhs_len}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } else { const msg = msg: { @@ -17658,7 +17785,7 @@ fn checkVectorizableBinaryOperands( } break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -18058,7 +18185,7 @@ fn analyzeShuffle( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -18654,7 +18781,7 @@ fn zirFieldParentPtr(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileEr try sema.addDeclaredHereNote(msg, struct_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return sema.addConstant(result_ptr, payload.data.container_ptr); } @@ -18883,10 +19010,8 @@ fn zirVarExtended( extended: Zir.Inst.Extended.InstData, ) CompileError!Air.Inst.Ref { const extra = sema.code.extraData(Zir.Inst.ExtendedVar, extended.operand); - const src = sema.src; - const ty_src: LazySrcLoc = src; // TODO add a LazySrcLoc that points at type - const name_src: LazySrcLoc = src; // TODO add a LazySrcLoc that points at the name token - const init_src: LazySrcLoc = src; // TODO add a LazySrcLoc that points at init expr + const ty_src: LazySrcLoc = .{ .node_offset_var_decl_ty = 0 }; + const init_src: LazySrcLoc = .{ .node_offset_var_decl_init = 0 }; const small = @bitCast(Zir.Inst.ExtendedVar.Small, extended.small); var extra_index: usize = extra.end; @@ -18900,12 +19025,6 @@ fn zirVarExtended( // ZIR supports encoding this information but it is not used; the information // is encoded via the Decl entry. assert(!small.has_align); - //const align_val: Value = if (small.has_align) blk: { - // const align_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]); - // extra_index += 1; - // const align_tv = try sema.resolveInstConst(block, align_src, align_ref); - // break :blk align_tv.val; - //} else Value.@"null"; const uncasted_init: Air.Inst.Ref = if (small.has_init) blk: { const init_ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_index]); @@ -18929,7 +19048,7 @@ fn zirVarExtended( return sema.failWithNeededComptime(block, init_src, "container level variable initializers must be comptime known"); } else Value.initTag(.unreachable_value); - try sema.validateVarType(block, name_src, var_ty, small.is_extern); + try sema.validateVarType(block, ty_src, var_ty, small.is_extern); const new_var = try sema.gpa.create(Module.Var); errdefer sema.gpa.destroy(new_var); @@ -19462,7 +19581,7 @@ fn requireRuntimeBlock(sema: *Sema, block: *Block, src: LazySrcLoc, runtime_src: } break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } try sema.requireFunctionBlock(block, src); } @@ -19491,7 +19610,7 @@ fn validateVarType( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn validateRunTimeType( @@ -19668,7 +19787,7 @@ const ExternPosition = enum { /// Returns true if `ty` is allowed in extern types. /// Does *NOT* require `ty` to be resolved in any way. -fn validateExternType(sema: *Sema, ty: Type, position: ExternPosition) CompileError!bool { +fn validateExternType(sema: *Sema, ty: Type, position: ExternPosition) bool { switch (ty.zigTypeTag()) { .Type, .ComptimeFloat, @@ -19713,8 +19832,6 @@ fn validateExternType(sema: *Sema, ty: Type, position: ExternPosition) CompileEr fn explainWhyTypeIsNotExtern( sema: *Sema, - block: *Block, - src: LazySrcLoc, msg: *Module.ErrorMsg, src_loc: Module.SrcLoc, ty: Type, @@ -19758,7 +19875,7 @@ fn explainWhyTypeIsNotExtern( var buf: Type.Payload.Bits = undefined; const tag_ty = ty.intTagType(&buf); try mod.errNoteNonLazy(src_loc, msg, "enum tag type '{}' is not extern compatible", .{tag_ty.fmt(sema.mod)}); - try sema.explainWhyTypeIsNotExtern(block, src, msg, src_loc, tag_ty, position); + try sema.explainWhyTypeIsNotExtern(msg, src_loc, tag_ty, position); }, .Struct => try mod.errNoteNonLazy(src_loc, msg, "only structs with packed or extern layout are extern compatible", .{}), .Union => try mod.errNoteNonLazy(src_loc, msg, "only unions with packed or extern layout are extern compatible", .{}), @@ -19768,13 +19885,87 @@ fn explainWhyTypeIsNotExtern( } else if (position == .param_ty) { return mod.errNoteNonLazy(src_loc, msg, "arrays are not allowed as a parameter type", .{}); } - try sema.explainWhyTypeIsNotExtern(block, src, msg, src_loc, ty.elemType2(), position); + try sema.explainWhyTypeIsNotExtern(msg, src_loc, ty.elemType2(), position); }, - .Vector => try sema.explainWhyTypeIsNotExtern(block, src, msg, src_loc, ty.elemType2(), position), + .Vector => try sema.explainWhyTypeIsNotExtern(msg, src_loc, ty.elemType2(), position), .Optional => try mod.errNoteNonLazy(src_loc, msg, "only pointer like optionals are extern compatible", .{}), } } +/// Returns true if `ty` is allowed in packed types. +/// Does *NOT* require `ty` to be resolved in any way. +fn validatePackedType(ty: Type) bool { + switch (ty.zigTypeTag()) { + .Type, + .ComptimeFloat, + .ComptimeInt, + .EnumLiteral, + .Undefined, + .Null, + .ErrorUnion, + .ErrorSet, + .BoundFn, + .Frame, + .NoReturn, + .Opaque, + .AnyFrame, + .Fn, + .Array, + .Optional, + => return false, + .Void, + .Bool, + .Float, + .Pointer, + .Int, + .Vector, + .Enum, + => return true, + .Struct, .Union => return ty.containerLayout() == .Packed, + } +} + +fn explainWhyTypeIsNotPacked( + sema: *Sema, + msg: *Module.ErrorMsg, + src_loc: Module.SrcLoc, + ty: Type, +) CompileError!void { + const mod = sema.mod; + switch (ty.zigTypeTag()) { + .Void, + .Bool, + .Float, + .Pointer, + .Int, + .Vector, + .Enum, + => return, + .Type, + .ComptimeFloat, + .ComptimeInt, + .EnumLiteral, + .Undefined, + .Null, + .BoundFn, + .Frame, + .NoReturn, + .Opaque, + .ErrorUnion, + .ErrorSet, + .AnyFrame, + .Optional, + .Array, + => try mod.errNoteNonLazy(src_loc, msg, "type has no guaranteed in-memory representation", .{}), + .Fn => { + try mod.errNoteNonLazy(src_loc, msg, "type has no guaranteed in-memory representation", .{}); + try mod.errNoteNonLazy(src_loc, msg, "use '*const ' to make a function pointer type", .{}); + }, + .Struct => try mod.errNoteNonLazy(src_loc, msg, "only packed structs layout are allowed in packed types", .{}), + .Union => try mod.errNoteNonLazy(src_loc, msg, "only packed unions layout are allowed in packed types", .{}), + } +} + pub const PanicId = enum { unreach, unwrap_null, @@ -19994,6 +20185,77 @@ fn panicIndexOutOfBounds( try sema.addSafetyCheckExtra(parent_block, ok, &fail_block); } +fn panicSentinelMismatch( + sema: *Sema, + parent_block: *Block, + src: LazySrcLoc, + maybe_sentinel: ?Value, + sentinel_ty: Type, + ptr: Air.Inst.Ref, + sentinel_index: Air.Inst.Ref, +) !void { + const expected_sentinel_val = maybe_sentinel orelse return; + const expected_sentinel = try sema.addConstant(sentinel_ty, expected_sentinel_val); + + const ptr_ty = sema.typeOf(ptr); + const actual_sentinel = if (ptr_ty.isSlice()) + try parent_block.addBinOp(.slice_elem_val, ptr, sentinel_index) + else blk: { + const elem_ptr_ty = try sema.elemPtrType(ptr_ty, null); + const sentinel_ptr = try parent_block.addPtrElemPtr(ptr, sentinel_index, elem_ptr_ty); + break :blk try parent_block.addTyOp(.load, sentinel_ty, sentinel_ptr); + }; + + const ok = if (sentinel_ty.zigTypeTag() == .Vector) ok: { + const eql = + try parent_block.addCmpVector(expected_sentinel, actual_sentinel, .eq, try sema.addType(sentinel_ty)); + break :ok try parent_block.addInst(.{ + .tag = .reduce, + .data = .{ .reduce = .{ + .operand = eql, + .operation = .And, + } }, + }); + } else if (sentinel_ty.isSelfComparable(true)) + try parent_block.addBinOp(.cmp_eq, expected_sentinel, actual_sentinel) + else { + const panic_fn = try sema.getBuiltin(parent_block, src, "checkNonScalarSentinel"); + const args: [2]Air.Inst.Ref = .{ expected_sentinel, actual_sentinel }; + _ = try sema.analyzeCall(parent_block, panic_fn, src, src, .auto, false, &args, null); + return; + }; + const gpa = sema.gpa; + + var fail_block: Block = .{ + .parent = parent_block, + .sema = sema, + .src_decl = parent_block.src_decl, + .namespace = parent_block.namespace, + .wip_capture_scope = parent_block.wip_capture_scope, + .instructions = .{}, + .inlining = parent_block.inlining, + .is_comptime = parent_block.is_comptime, + }; + + defer fail_block.instructions.deinit(gpa); + + { + const this_feature_is_implemented_in_the_backend = + sema.mod.comp.bin_file.options.use_llvm; + + if (!this_feature_is_implemented_in_the_backend) { + // TODO implement this feature in all the backends and then delete this branch + _ = try fail_block.addNoOp(.breakpoint); + _ = try fail_block.addNoOp(.unreach); + } else { + const panic_fn = try sema.getBuiltin(&fail_block, src, "panicSentinelMismatch"); + const args: [2]Air.Inst.Ref = .{ expected_sentinel, actual_sentinel }; + _ = try sema.analyzeCall(&fail_block, panic_fn, src, src, .auto, false, &args, null); + } + } + try sema.addSafetyCheckExtra(parent_block, ok, &fail_block); +} + fn safetyPanic( sema: *Sema, block: *Block, @@ -20050,7 +20312,7 @@ fn emitBackwardBranch(sema: *Sema, block: *Block, src: LazySrcLoc) !void { "use @setEvalBranchQuota() to raise the branch limit from {d}", .{sema.branch_quota}, ); - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -20158,7 +20420,7 @@ fn fieldVal( try sema.addDeclaredHereNote(msg, child_type); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } else (try sema.mod.getErrorValue(field_name)).key; return sema.addConstant( @@ -20213,7 +20475,7 @@ fn fieldVal( if (child_type.zigTypeTag() == .Array) try sema.errNote(block, src, msg, "array values have 'len' member", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }, } }, @@ -20551,7 +20813,7 @@ fn fieldCallBind( try sema.addDeclaredHereNote(msg, concrete_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn finishFieldCallBind( @@ -20606,7 +20868,7 @@ fn namespaceLookup( try sema.mod.errNoteNonLazy(decl.srcLoc(), msg, "declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } return decl_index; } @@ -20621,7 +20883,14 @@ fn namespaceLookupRef( decl_name: []const u8, ) CompileError!?Air.Inst.Ref { const decl = (try sema.namespaceLookup(block, src, namespace, decl_name)) orelse return null; - return try sema.analyzeDeclRef(decl); + return sema.analyzeDeclRef(decl) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.errNote(block, src, msg, "referenced here", .{}); + return err; + }, + else => return err, + }; } fn namespaceLookupVal( @@ -20939,7 +21208,7 @@ fn unionFieldPtr( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } }, .Packed, .Extern => {}, @@ -21009,7 +21278,7 @@ fn unionFieldVal( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } }, .Packed, .Extern => { @@ -21183,7 +21452,7 @@ fn validateRuntimeElemAccess( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } @@ -21959,7 +22228,7 @@ fn coerceExtra( try sema.addDeclaredHereNote(msg, dest_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; return sema.addConstant( dest_ty, @@ -22087,7 +22356,7 @@ fn coerceExtra( try sema.mod.errNoteNonLazy(ret_ty_src.toSrcLoc(src_decl), msg, "'noreturn' declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const msg = msg: { @@ -22128,7 +22397,7 @@ fn coerceExtra( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const InMemoryCoercionResult = union(enum) { @@ -24193,7 +24462,7 @@ fn coerceEnumToUnion( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; const enum_tag = try sema.coerce(block, tag_ty, inst, inst_src); @@ -24208,7 +24477,7 @@ fn coerceEnumToUnion( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; const field = union_obj.fields.values()[field_index]; const field_ty = try sema.resolveTypeFields(block, inst_src, field.ty); @@ -24224,7 +24493,7 @@ fn coerceEnumToUnion( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); }; return sema.addConstant(union_ty, try Value.Tag.@"union".create(sema.arena, .{ @@ -24244,7 +24513,7 @@ fn coerceEnumToUnion( try sema.addDeclaredHereNote(msg, tag_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } // If the union has all fields 0 bits, the union value is just the enum value. @@ -24273,7 +24542,7 @@ fn coerceEnumToUnion( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } fn coerceAnonStructToUnion( @@ -24302,7 +24571,7 @@ fn coerceAnonStructToUnion( try sema.addDeclaredHereNote(msg, union_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const field_name = anon_struct.names[0]; @@ -24362,7 +24631,7 @@ fn coerceArrayLike( try sema.errNote(block, inst_src, msg, "source has length {d}", .{inst_len}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const dest_elem_ty = dest_ty.childType(); @@ -24434,7 +24703,7 @@ fn coerceTupleToArray( try sema.errNote(block, inst_src, msg, "source has length {d}", .{inst_len}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const dest_elems = try sema.usizeCast(block, dest_ty_src, dest_ty.arrayLenIncludingSentinel()); @@ -24597,7 +24866,7 @@ fn coerceTupleToStruct( if (root_msg) |msg| { try sema.addDeclaredHereNote(msg, struct_ty); - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (runtime_src) |rs| { @@ -24701,7 +24970,7 @@ fn coerceTupleToTuple( if (root_msg) |msg| { try sema.addDeclaredHereNote(msg, tuple_ty); - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } if (runtime_src) |rs| { @@ -24724,7 +24993,14 @@ fn analyzeDeclVal( if (sema.decl_val_table.get(decl_index)) |result| { return result; } - const decl_ref = try sema.analyzeDeclRef(decl_index); + const decl_ref = sema.analyzeDeclRef(decl_index) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.errNote(block, src, msg, "referenced here", .{}); + return err; + }, + else => return err, + }; const result = try sema.analyzeLoad(block, src, decl_ref, src); if (Air.refToIndex(result)) |index| { if (sema.air_instructions.items(.tag)[index] == .constant and !block.is_typeof) { @@ -24735,6 +25011,12 @@ fn analyzeDeclVal( } fn ensureDeclAnalyzed(sema: *Sema, decl_index: Decl.Index) CompileError!void { + const decl = sema.mod.declPtr(decl_index); + if (decl.analysis == .in_progress) { + const msg = try Module.ErrorMsg.create(sema.gpa, decl.srcLoc(), "dependency loop detected", .{}); + return sema.failWithOwnedErrorMsg(msg); + } + sema.mod.ensureDeclAnalyzed(decl_index) catch |err| { if (sema.owner_func) |owner_func| { owner_func.state = .dependency_failure; @@ -25214,6 +25496,7 @@ fn analyzeSlice( } break :s null; }; + const slice_sentinel = if (sentinel_opt != .none) sentinel else null; // requirement: start <= end if (try sema.resolveDefinedValue(block, end_src, end)) |end_val| { @@ -25265,7 +25548,7 @@ fn analyzeSlice( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } } } @@ -25293,7 +25576,12 @@ fn analyzeSlice( const opt_new_ptr_val = try sema.resolveMaybeUndefVal(block, ptr_src, new_ptr); const new_ptr_val = opt_new_ptr_val orelse { - return block.addBitCast(return_ty, new_ptr); + const result = try block.addBitCast(return_ty, new_ptr); + if (block.wantSafety()) { + // requirement: result[new_len] == slice_sentinel + try sema.panicSentinelMismatch(block, src, slice_sentinel, elem_ty, result, new_len); + } + return result; }; if (!new_ptr_val.isUndef()) { @@ -25357,7 +25645,7 @@ fn analyzeSlice( // requirement: start <= end try sema.panicIndexOutOfBounds(block, src, start, end, .cmp_lte); } - return block.addInst(.{ + const result = try block.addInst(.{ .tag = .slice, .data = .{ .ty_pl = .{ .ty = try sema.addType(return_ty), @@ -25367,6 +25655,11 @@ fn analyzeSlice( }), } }, }); + if (block.wantSafety()) { + // requirement: result[new_len] == slice_sentinel + try sema.panicSentinelMismatch(block, src, slice_sentinel, elem_ty, result, new_len); + } + return result; } /// Asserts that lhs and rhs types are both numeric. @@ -26204,7 +26497,7 @@ fn resolvePeerTypes( break :msg msg; }; - return sema.failWithOwnedErrorMsg(block, msg); + return sema.failWithOwnedErrorMsg(msg); } const chosen_ty = sema.typeOf(chosen); @@ -26363,21 +26656,41 @@ fn resolveStructLayout( switch (struct_obj.status) { .none, .have_field_types => {}, .field_types_wip, .layout_wip => { - return sema.fail(block, src, "struct '{}' depends on itself", .{ty.fmt(sema.mod)}); + const msg = try Module.ErrorMsg.create( + sema.gpa, + struct_obj.srcLoc(sema.mod), + "struct '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + return sema.failWithOwnedErrorMsg(msg); }, .have_layout, .fully_resolved_wip, .fully_resolved => return, } struct_obj.status = .layout_wip; - for (struct_obj.fields.values()) |field| { - try sema.resolveTypeLayout(block, src, field.ty); + for (struct_obj.fields.values()) |field, i| { + sema.resolveTypeLayout(block, src, field.ty) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.addFieldErrNote(block, ty, i, msg, "while checking this field", .{}); + return err; + }, + else => return err, + }; } struct_obj.status = .have_layout; // In case of querying the ABI alignment of this struct, we will ask // for hasRuntimeBits() of each field, so we need "requires comptime" // to be known already before this function returns. - for (struct_obj.fields.values()) |field| { - _ = try sema.typeRequiresComptime(block, src, field.ty); + for (struct_obj.fields.values()) |field, i| { + _ = sema.typeRequiresComptime(block, src, field.ty) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.addFieldErrNote(block, ty, i, msg, "while checking this field", .{}); + return err; + }, + else => return err, + }; } } // otherwise it's a tuple; no need to resolve anything @@ -26394,13 +26707,26 @@ fn resolveUnionLayout( switch (union_obj.status) { .none, .have_field_types => {}, .field_types_wip, .layout_wip => { - return sema.fail(block, src, "union '{}' depends on itself", .{ty.fmt(sema.mod)}); + const msg = try Module.ErrorMsg.create( + sema.gpa, + union_obj.srcLoc(sema.mod), + "union '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + return sema.failWithOwnedErrorMsg(msg); }, .have_layout, .fully_resolved_wip, .fully_resolved => return, } union_obj.status = .layout_wip; - for (union_obj.fields.values()) |field| { - try sema.resolveTypeLayout(block, src, field.ty); + for (union_obj.fields.values()) |field, i| { + sema.resolveTypeLayout(block, src, field.ty) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + try sema.addFieldErrNote(block, ty, i, msg, "while checking this field", .{}); + return err; + }, + else => return err, + }; } union_obj.status = .have_layout; } @@ -26528,12 +26854,12 @@ pub fn resolveTypeFields(sema: *Sema, block: *Block, src: LazySrcLoc, ty: Type) switch (ty.tag()) { .@"struct" => { const struct_obj = ty.castTag(.@"struct").?.data; - try sema.resolveTypeFieldsStruct(block, src, ty, struct_obj); + try sema.resolveTypeFieldsStruct(ty, struct_obj); return ty; }, .@"union", .union_safety_tagged, .union_tagged => { const union_obj = ty.cast(Type.Payload.Union).?.data; - try sema.resolveTypeFieldsUnion(block, src, ty, union_obj); + try sema.resolveTypeFieldsUnion(ty, union_obj); return ty; }, .type_info => return sema.resolveBuiltinTypeFields(block, src, "Type"), @@ -26554,15 +26880,19 @@ pub fn resolveTypeFields(sema: *Sema, block: *Block, src: LazySrcLoc, ty: Type) fn resolveTypeFieldsStruct( sema: *Sema, - block: *Block, - src: LazySrcLoc, ty: Type, struct_obj: *Module.Struct, ) CompileError!void { switch (struct_obj.status) { .none => {}, .field_types_wip => { - return sema.fail(block, src, "struct '{}' depends on itself", .{ty.fmt(sema.mod)}); + const msg = try Module.ErrorMsg.create( + sema.gpa, + struct_obj.srcLoc(sema.mod), + "struct '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + return sema.failWithOwnedErrorMsg(msg); }, .have_field_types, .have_layout, @@ -26576,17 +26906,17 @@ fn resolveTypeFieldsStruct( try semaStructFields(sema.mod, struct_obj); } -fn resolveTypeFieldsUnion( - sema: *Sema, - block: *Block, - src: LazySrcLoc, - ty: Type, - union_obj: *Module.Union, -) CompileError!void { +fn resolveTypeFieldsUnion(sema: *Sema, ty: Type, union_obj: *Module.Union) CompileError!void { switch (union_obj.status) { .none => {}, .field_types_wip => { - return sema.fail(block, src, "union '{}' depends on itself", .{ty.fmt(sema.mod)}); + const msg = try Module.ErrorMsg.create( + sema.gpa, + union_obj.srcLoc(sema.mod), + "union '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + return sema.failWithOwnedErrorMsg(msg); }, .have_field_types, .have_layout, @@ -26802,7 +27132,7 @@ fn semaStructFields(mod: *Module, struct_obj: *Module.Struct) CompileError!void try sema.errNote(&block_scope, src, msg, "struct declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } gop.value_ptr.* = .{ .ty = Type.initTag(.noreturn), @@ -26851,31 +27181,44 @@ fn semaStructFields(mod: *Module, struct_obj: *Module.Struct) CompileError!void const field = &struct_obj.fields.values()[i]; field.ty = try field_ty.copy(decl_arena_allocator); - if (struct_obj.layout == .Extern and !(try sema.validateExternType(field.ty, .other))) { + if (field_ty.zigTypeTag() == .Opaque) { + const msg = msg: { + const tree = try sema.getAstTree(&block_scope); + const field_src = enumFieldSrcLoc(decl, tree.*, struct_obj.node_offset, i); + const msg = try sema.errMsg(&block_scope, field_src, "opaque types have unknown size and therefore cannot be directly embedded in structs", .{}); + errdefer msg.destroy(sema.gpa); + + try sema.addDeclaredHereNote(msg, field_ty); + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(msg); + } + if (struct_obj.layout == .Extern and !sema.validateExternType(field.ty, .other)) { const msg = msg: { const tree = try sema.getAstTree(&block_scope); const fields_src = enumFieldSrcLoc(decl, tree.*, struct_obj.node_offset, i); const msg = try sema.errMsg(&block_scope, fields_src, "extern structs cannot contain fields of type '{}'", .{field.ty.fmt(sema.mod)}); errdefer msg.destroy(sema.gpa); - try sema.explainWhyTypeIsNotExtern(&block_scope, fields_src, msg, fields_src.toSrcLoc(decl), field.ty, .other); + try sema.explainWhyTypeIsNotExtern(msg, fields_src.toSrcLoc(decl), field.ty, .other); try sema.addDeclaredHereNote(msg, field.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); - } - if (field_ty.zigTypeTag() == .Opaque) { + return sema.failWithOwnedErrorMsg(msg); + } else if (struct_obj.layout == .Packed and !(validatePackedType(field.ty))) { const msg = msg: { const tree = try sema.getAstTree(&block_scope); - const field_src = enumFieldSrcLoc(decl, tree.*, struct_obj.node_offset, i); - const msg = try sema.errMsg(&block_scope, field_src, "opaque types have unknown size and therefore cannot be directly embedded in structs", .{}); + const fields_src = enumFieldSrcLoc(decl, tree.*, struct_obj.node_offset, i); + const msg = try sema.errMsg(&block_scope, fields_src, "packed structs cannot contain fields of type '{}'", .{field.ty.fmt(sema.mod)}); errdefer msg.destroy(sema.gpa); - try sema.addDeclaredHereNote(msg, field_ty); + try sema.explainWhyTypeIsNotPacked(msg, fields_src.toSrcLoc(decl), field.ty); + + try sema.addDeclaredHereNote(msg, field.ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } if (zir_field.align_body_len > 0) { @@ -27157,7 +27500,7 @@ fn semaUnionFields(mod: *Module, union_obj: *Module.Union) CompileError!void { try sema.errNote(&block_scope, src, msg, "union declared here", .{}); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } if (tag_ty_field_names) |*names| { @@ -27171,35 +27514,48 @@ fn semaUnionFields(mod: *Module, union_obj: *Module.Union) CompileError!void { try sema.addDeclaredHereNote(msg, union_obj.tag_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } } - if (union_obj.layout == .Extern and !(try sema.validateExternType(field_ty, .union_field))) { + if (field_ty.zigTypeTag() == .Opaque) { const msg = msg: { const tree = try sema.getAstTree(&block_scope); const field_src = enumFieldSrcLoc(decl, tree.*, union_obj.node_offset, field_i); - const msg = try sema.errMsg(&block_scope, field_src, "extern unions cannot contain fields of type '{}'", .{field_ty.fmt(sema.mod)}); + const msg = try sema.errMsg(&block_scope, field_src, "opaque types have unknown size and therefore cannot be directly embedded in unions", .{}); errdefer msg.destroy(sema.gpa); - try sema.explainWhyTypeIsNotExtern(&block_scope, field_src, msg, field_src.toSrcLoc(decl), field_ty, .union_field); - try sema.addDeclaredHereNote(msg, field_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } - if (field_ty.zigTypeTag() == .Opaque) { + if (union_obj.layout == .Extern and !sema.validateExternType(field_ty, .union_field)) { const msg = msg: { const tree = try sema.getAstTree(&block_scope); const field_src = enumFieldSrcLoc(decl, tree.*, union_obj.node_offset, field_i); - const msg = try sema.errMsg(&block_scope, field_src, "opaque types have unknown size and therefore cannot be directly embedded in unions", .{}); + const msg = try sema.errMsg(&block_scope, field_src, "extern unions cannot contain fields of type '{}'", .{field_ty.fmt(sema.mod)}); errdefer msg.destroy(sema.gpa); + try sema.explainWhyTypeIsNotExtern(msg, field_src.toSrcLoc(decl), field_ty, .union_field); + try sema.addDeclaredHereNote(msg, field_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); + } else if (union_obj.layout == .Packed and !(validatePackedType(field_ty))) { + const msg = msg: { + const tree = try sema.getAstTree(&block_scope); + const fields_src = enumFieldSrcLoc(decl, tree.*, union_obj.node_offset, field_i); + const msg = try sema.errMsg(&block_scope, fields_src, "packed unions cannot contain fields of type '{}'", .{field_ty.fmt(sema.mod)}); + errdefer msg.destroy(sema.gpa); + + try sema.explainWhyTypeIsNotPacked(msg, fields_src.toSrcLoc(decl), field_ty); + + try sema.addDeclaredHereNote(msg, field_ty); + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(msg); } gop.value_ptr.* = .{ @@ -27231,7 +27587,7 @@ fn semaUnionFields(mod: *Module, union_obj: *Module.Union) CompileError!void { try sema.addDeclaredHereNote(msg, union_obj.tag_ty); break :msg msg; }; - return sema.failWithOwnedErrorMsg(&block_scope, msg); + return sema.failWithOwnedErrorMsg(msg); } } } @@ -27494,9 +27850,19 @@ pub fn typeHasOnePossibleValue( .@"struct" => { const resolved_ty = try sema.resolveTypeFields(block, src, ty); const s = resolved_ty.castTag(.@"struct").?.data; - for (s.fields.values()) |value| { - if (value.is_comptime) continue; - if ((try sema.typeHasOnePossibleValue(block, src, value.ty)) == null) { + for (s.fields.values()) |field, i| { + if (field.is_comptime) continue; + if (field.ty.eql(resolved_ty, sema.mod)) { + const msg = try Module.ErrorMsg.create( + sema.gpa, + s.srcLoc(sema.mod), + "struct '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + try sema.addFieldErrNote(block, resolved_ty, i, msg, "while checking this field", .{}); + return sema.failWithOwnedErrorMsg(msg); + } + if ((try sema.typeHasOnePossibleValue(block, src, field.ty)) == null) { return null; } } @@ -27562,6 +27928,16 @@ pub fn typeHasOnePossibleValue( const tag_val = (try sema.typeHasOnePossibleValue(block, src, union_obj.tag_ty)) orelse return null; const only_field = union_obj.fields.values()[0]; + if (only_field.ty.eql(resolved_ty, sema.mod)) { + const msg = try Module.ErrorMsg.create( + sema.gpa, + union_obj.srcLoc(sema.mod), + "union '{}' depends on itself", + .{ty.fmt(sema.mod)}, + ); + try sema.addFieldErrNote(block, resolved_ty, 0, msg, "while checking this field", .{}); + return sema.failWithOwnedErrorMsg(msg); + } const val_val = (try sema.typeHasOnePossibleValue(block, src, only_field.ty)) orelse return null; // TODO make this not allocate. The function in `Type.onePossibleValue` @@ -28201,7 +28577,7 @@ pub fn typeRequiresComptime(sema: *Sema, block: *Block, src: LazySrcLoc, ty: Typ if (struct_obj.status == .field_types_wip) return false; - try sema.resolveTypeFieldsStruct(block, src, ty, struct_obj); + try sema.resolveTypeFieldsStruct(ty, struct_obj); struct_obj.requires_comptime = .wip; for (struct_obj.fields.values()) |field| { @@ -28226,7 +28602,7 @@ pub fn typeRequiresComptime(sema: *Sema, block: *Block, src: LazySrcLoc, ty: Typ if (union_obj.status == .field_types_wip) return false; - try sema.resolveTypeFieldsUnion(block, src, ty, union_obj); + try sema.resolveTypeFieldsUnion(ty, union_obj); union_obj.requires_comptime = .wip; for (union_obj.fields.values()) |field| { diff --git a/src/Zir.zig b/src/Zir.zig index ccd677df0b..3aa2378697 100644 --- a/src/Zir.zig +++ b/src/Zir.zig @@ -3548,6 +3548,12 @@ pub const Inst = struct { ty: Ref, init_count: u32, }; + + pub const Src = struct { + node: i32, + line: u32, + column: u32, + }; }; pub const SpecialProng = enum { none, @"else", under }; diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 03ba53801b..627f946e36 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -853,8 +853,7 @@ pub fn commitDeclState( .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = &dwarf_segment.sections.items[d_sym.debug_line_section_index.?]; + const debug_line_sect = &d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = debug_line_sect.offset + src_fn.off; try pwriteDbgLineNops(d_sym.file, file_pos, 0, &[0]u8{}, src_fn.len); }, @@ -933,8 +932,8 @@ pub fn commitDeclState( .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = &dwarf_segment.sections.items[d_sym.debug_line_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_line_sect = &d_sym.sections.items[d_sym.debug_line_section_index.?]; if (needed_size != debug_line_sect.size) { if (needed_size > d_sym.allocatedSize(debug_line_sect.offset)) { const new_offset = d_sym.findFreeSpace(needed_size, 1); @@ -955,10 +954,9 @@ pub fn commitDeclState( ); debug_line_sect.offset = @intCast(u32, new_offset); - debug_line_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_line_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; } debug_line_sect.size = needed_size; - d_sym.load_commands_dirty = true; // TODO look into making only the one section dirty d_sym.debug_line_header_dirty = true; } const file_pos = debug_line_sect.offset + src_fn.off; @@ -1137,8 +1135,7 @@ fn updateDeclDebugInfoAllocation(self: *Dwarf, file: *File, atom: *Atom, len: u3 .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; const file_pos = debug_info_sect.offset + atom.off; try pwriteDbgInfoNops(d_sym.file, file_pos, 0, &[0]u8{}, atom.len, false); }, @@ -1235,8 +1232,8 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; if (needed_size != debug_info_sect.size) { if (needed_size > d_sym.allocatedSize(debug_info_sect.offset)) { const new_offset = d_sym.findFreeSpace(needed_size, 1); @@ -1257,10 +1254,9 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co ); debug_info_sect.offset = @intCast(u32, new_offset); - debug_info_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_info_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; } debug_info_sect.size = needed_size; - d_sym.load_commands_dirty = true; // TODO look into making only the one section dirty d_sym.debug_line_header_dirty = true; } const file_pos = debug_info_sect.offset + atom.off; @@ -1330,8 +1326,7 @@ pub fn updateDeclLineNumber(self: *Dwarf, file: *File, decl: *const Module.Decl) .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const sect = dwarf_seg.sections.items[d_sym.debug_line_section_index.?]; + const sect = d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = sect.offset + decl.fn_link.macho.off + self.getRelocDbgLineOff(); try d_sym.file.pwriteAll(&data, file_pos); }, @@ -1557,14 +1552,14 @@ pub fn writeDbgAbbrev(self: *Dwarf, file: *File) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_abbrev_sect = &dwarf_segment.sections.items[d_sym.debug_abbrev_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_abbrev_sect = &d_sym.sections.items[d_sym.debug_abbrev_section_index.?]; const allocated_size = d_sym.allocatedSize(debug_abbrev_sect.offset); if (needed_size > allocated_size) { debug_abbrev_sect.size = 0; // free the space const offset = d_sym.findFreeSpace(needed_size, 1); debug_abbrev_sect.offset = @intCast(u32, offset); - debug_abbrev_sect.addr = dwarf_segment.inner.vmaddr + offset - dwarf_segment.inner.fileoff; + debug_abbrev_sect.addr = dwarf_segment.vmaddr + offset - dwarf_segment.fileoff; } debug_abbrev_sect.size = needed_size; log.debug("__debug_abbrev start=0x{x} end=0x{x}", .{ @@ -1681,8 +1676,7 @@ pub fn writeDbgInfoHeader(self: *Dwarf, file: *File, module: *Module, low_pc: u6 .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = dwarf_seg.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = d_sym.sections.items[d_sym.debug_info_section_index.?]; const file_pos = debug_info_sect.offset; try pwriteDbgInfoNops(d_sym.file, file_pos, 0, di_buf.items, jmp_amt, false); }, @@ -1998,13 +1992,13 @@ pub fn writeDbgAranges(self: *Dwarf, file: *File, addr: u64, size: u64) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_aranges_sect = &dwarf_seg.sections.items[d_sym.debug_aranges_section_index.?]; + const dwarf_seg = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_aranges_sect = &d_sym.sections.items[d_sym.debug_aranges_section_index.?]; const allocated_size = d_sym.allocatedSize(debug_aranges_sect.offset); if (needed_size > allocated_size) { debug_aranges_sect.size = 0; // free the space const new_offset = d_sym.findFreeSpace(needed_size, 16); - debug_aranges_sect.addr = dwarf_seg.inner.vmaddr + new_offset - dwarf_seg.inner.fileoff; + debug_aranges_sect.addr = dwarf_seg.vmaddr + new_offset - dwarf_seg.fileoff; debug_aranges_sect.offset = @intCast(u32, new_offset); } debug_aranges_sect.size = needed_size; @@ -2134,8 +2128,7 @@ pub fn writeDbgLineHeader(self: *Dwarf, file: *File, module: *Module) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = dwarf_seg.sections.items[d_sym.debug_line_section_index.?]; + const debug_line_sect = d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = debug_line_sect.offset; try pwriteDbgLineNops(d_sym.file, file_pos, 0, di_buf.items, jmp_amt); }, @@ -2264,8 +2257,7 @@ pub fn flushModule(self: *Dwarf, file: *File, module: *Module) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; break :blk debug_info_sect.offset; }, // for wasm, the offset is always 0 as we write to memory first diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a247b3e6c5..db207af5f5 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -17,6 +17,7 @@ const aarch64 = @import("../arch/aarch64/bits.zig"); const bind = @import("MachO/bind.zig"); const codegen = @import("../codegen.zig"); const dead_strip = @import("MachO/dead_strip.zig"); +const fat = @import("MachO/fat.zig"); const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); const target_util = @import("../target.zig"); @@ -60,6 +61,32 @@ const SystemLib = struct { weak: bool = false, }; +const Section = struct { + header: macho.section_64, + segment_index: u8, + + // TODO is null here necessary, or can we do away with tracking via section + // size in incremental context? + last_atom: ?*Atom = null, + + /// A list of atoms that have surplus capacity. This list can have false + /// positives, as functions grow and shrink over time, only sometimes being added + /// or removed from the freelist. + /// + /// An atom has surplus capacity when its overcapacity value is greater than + /// padToIdeal(minimum_atom_size). That is, when it has so + /// much extra capacity, that we could fit a small new symbol in it, itself with + /// ideal_capacity or more. + /// + /// Ideal capacity is defined by size + (size / ideal_factor). + /// + /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that + /// overcapacity can be negative. A simple way to have negative overcapacity is to + /// allocate a fresh atom, which will have ideal capacity, and then grow it + /// by 1 byte. It will then have -1 overcapacity. + free_list: std.ArrayListUnmanaged(*Atom) = .{}, +}; + base: File, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. @@ -77,81 +104,32 @@ page_size: u16, /// fashion (default for LLVM backend). mode: enum { incremental, one_shot }, -/// The absolute address of the entry point. -entry_addr: ?u64 = null, - -/// Code signature (if any) -code_signature: ?CodeSignature = null, +uuid: macho.uuid_command = .{ + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, +}, objects: std.ArrayListUnmanaged(Object) = .{}, archives: std.ArrayListUnmanaged(Archive) = .{}, - dylibs: std.ArrayListUnmanaged(Dylib) = .{}, dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, referenced_dylibs: std.AutoArrayHashMapUnmanaged(u16, void) = .{}, -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, - -pagezero_segment_cmd_index: ?u16 = null, -text_segment_cmd_index: ?u16 = null, -data_const_segment_cmd_index: ?u16 = null, -data_segment_cmd_index: ?u16 = null, -linkedit_segment_cmd_index: ?u16 = null, -dyld_info_cmd_index: ?u16 = null, -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -dylinker_cmd_index: ?u16 = null, -data_in_code_cmd_index: ?u16 = null, -function_starts_cmd_index: ?u16 = null, -main_cmd_index: ?u16 = null, -dylib_id_cmd_index: ?u16 = null, -source_version_cmd_index: ?u16 = null, -build_version_cmd_index: ?u16 = null, -uuid_cmd_index: ?u16 = null, -code_signature_cmd_index: ?u16 = null, - -// __TEXT segment sections -text_section_index: ?u16 = null, -stubs_section_index: ?u16 = null, -stub_helper_section_index: ?u16 = null, -text_const_section_index: ?u16 = null, -cstring_section_index: ?u16 = null, -ustring_section_index: ?u16 = null, -gcc_except_tab_section_index: ?u16 = null, -unwind_info_section_index: ?u16 = null, -eh_frame_section_index: ?u16 = null, - -objc_methlist_section_index: ?u16 = null, -objc_methname_section_index: ?u16 = null, -objc_methtype_section_index: ?u16 = null, -objc_classname_section_index: ?u16 = null, - -// __DATA_CONST segment sections -got_section_index: ?u16 = null, -mod_init_func_section_index: ?u16 = null, -mod_term_func_section_index: ?u16 = null, -data_const_section_index: ?u16 = null, - -objc_cfstring_section_index: ?u16 = null, -objc_classlist_section_index: ?u16 = null, -objc_imageinfo_section_index: ?u16 = null, - -// __DATA segment sections -tlv_section_index: ?u16 = null, -tlv_data_section_index: ?u16 = null, -tlv_bss_section_index: ?u16 = null, -tlv_ptrs_section_index: ?u16 = null, -la_symbol_ptr_section_index: ?u16 = null, -data_section_index: ?u16 = null, -bss_section_index: ?u16 = null, - -objc_const_section_index: ?u16 = null, -objc_selrefs_section_index: ?u16 = null, -objc_classrefs_section_index: ?u16 = null, -objc_data_section_index: ?u16 = null, - -rustc_section_index: ?u16 = null, -rustc_section_size: u64 = 0, +segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, +sections: std.MultiArrayList(Section) = .{}, + +pagezero_segment_cmd_index: ?u8 = null, +text_segment_cmd_index: ?u8 = null, +data_const_segment_cmd_index: ?u8 = null, +data_segment_cmd_index: ?u8 = null, +linkedit_segment_cmd_index: ?u8 = null, + +text_section_index: ?u8 = null, +stubs_section_index: ?u8 = null, +stub_helper_section_index: ?u8 = null, +got_section_index: ?u8 = null, +la_symbol_ptr_section_index: ?u8 = null, +data_section_index: ?u8 = null, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, @@ -188,37 +166,12 @@ stubs_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, -load_commands_dirty: bool = false, -sections_order_dirty: bool = false, - /// A helper var to indicate if we are at the start of the incremental updates, or /// already somewhere further along the update-and-run chain. /// TODO once we add opening a prelinked output binary from file, this will become /// obsolete as we will carry on where we left off. cold_start: bool = true, -section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, - -/// A list of atoms that have surplus capacity. This list can have false -/// positives, as functions grow and shrink over time, only sometimes being added -/// or removed from the freelist. -/// -/// An atom has surplus capacity when its overcapacity value is greater than -/// padToIdeal(minimum_atom_size). That is, when it has so -/// much extra capacity, that we could fit a small new symbol in it, itself with -/// ideal_capacity or more. -/// -/// Ideal capacity is defined by size + (size / ideal_factor). -/// -/// Overcapacity is measured by actual_capacity - ideal_capacity. Note that -/// overcapacity can be negative. A simple way to have negative overcapacity is to -/// allocate a fresh atom, which will have ideal capacity, and then grow it -/// by 1 byte. It will then have -1 overcapacity. -atom_free_lists: std.AutoHashMapUnmanaged(MatchingSection, std.ArrayListUnmanaged(*Atom)) = .{}, - -/// Pointer to the last allocated atom -atoms: std.AutoHashMapUnmanaged(MatchingSection, *Atom) = .{}, - /// List of atoms that are either synthetic or map directly to the Zig source program. managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, @@ -250,7 +203,7 @@ unnamed_const_atoms: UnnamedConstTable = .{}, /// We store them here so that we can properly dispose of any allocated /// memory within the atom in the incremental linker. /// TODO consolidate this. -decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?MatchingSection) = .{}, +decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?u8) = .{}, const Entry = struct { target: SymbolWithLoc, @@ -408,12 +361,7 @@ pub fn openPath(allocator: Allocator, options: link.Options) !*MachO { pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { const cpu_arch = options.target.cpu.arch; - const os_tag = options.target.os.tag; - const abi = options.target.abi; const page_size: u16 = if (cpu_arch == .aarch64) 0x4000 else 0x1000; - // Adhoc code signature is required when targeting aarch64-macos either directly or indirectly via the simulator - // ABI such as aarch64-ios-simulator, etc. - const requires_adhoc_codesig = cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator); const use_llvm = build_options.have_llvm and options.use_llvm; const use_stage1 = build_options.is_stage1 and options.use_stage1; @@ -428,10 +376,6 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { .file = null, }, .page_size = page_size, - .code_signature = if (requires_adhoc_codesig) - CodeSignature.init(page_size) - else - null, .mode = if (use_stage1 or use_llvm or options.module == null or options.cache_mode == .whole) .one_shot else @@ -562,8 +506,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, - }, .Dynamic).init(self.base.allocator); - defer dependent_libs.deinit(); + }, .Dynamic).init(arena); + try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); } @@ -573,7 +517,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createDyldPrivateAtom(); try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); - try self.addCodeSignatureLC(); if (self.unresolved.count() > 0) { return error.UndefinedSymbolReference; @@ -583,67 +526,92 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No if (build_options.enable_logging) { self.logSymtab(); - self.logSectionOrdinals(); + self.logSections(); self.logAtoms(); } try self.writeAtomsIncremental(); - try self.setEntryPoint(); - try self.updateSectionOrdinals(); - try self.writeLinkeditSegment(); + var lc_buffer = std.ArrayList(u8).init(arena); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; - if (self.d_sym) |*d_sym| { - // Flush debug symbols bundle. - try d_sym.flushModule(self.base.allocator, self.base.options); + try self.writeLinkeditSegmentData(&ncmds, lc_writer); + try writeDylinkerLC(&ncmds, lc_writer); + + self.writeMainLC(&ncmds, lc_writer) catch |err| switch (err) { + error.MissingMainEntrypoint => { + self.error_flags.no_entry_point_found = true; + }, + else => |e| return e, + }; + + try self.writeDylibIdLC(&ncmds, lc_writer); + try self.writeRpathLCs(&ncmds, lc_writer); + + { + try lc_writer.writeStruct(macho.source_version_command{ + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }); + ncmds += 1; } - // code signature and entitlements - if (self.base.options.entitlements) |path| { - if (self.code_signature) |*csig| { - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - } else { - var csig = CodeSignature.init(self.page_size); - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - self.code_signature = csig; - } + try self.writeBuildVersionLC(&ncmds, lc_writer); + + { + std.crypto.random.bytes(&self.uuid.uuid); + try lc_writer.writeStruct(self.uuid); + ncmds += 1; } - if (self.code_signature) |*csig| { - csig.clear(self.base.allocator); - csig.code_directory.ident = self.base.options.emit.?.sub_path; + try self.writeLoadDylibLCs(&ncmds, lc_writer); + + const target = self.base.options.target; + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) + break :blk true; + break :blk false; + }; + var codesig_offset: ?u32 = null; + var codesig: ?CodeSignature = if (requires_codesig) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values // written out to the file. // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. - try self.writeCodeSignaturePadding(csig); - } + var codesig = CodeSignature.init(self.page_size); + codesig.code_directory.ident = self.base.options.emit.?.sub_path; + if (self.base.options.entitlements) |path| { + try codesig.addEntitlements(arena, path); + } + codesig_offset = try self.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + break :blk codesig; + } else null; - try self.writeLoadCommands(); - try self.writeHeader(); + var headers_buf = std.ArrayList(u8).init(arena); + try self.writeSegmentHeaders(&ncmds, headers_buf.writer()); - if (self.entry_addr == null and self.base.options.output_mode == .Exe) { - log.debug("flushing. no_entry_point_found = true", .{}); - self.error_flags.no_entry_point_found = true; - } else { - log.debug("flushing. no_entry_point_found = false", .{}); - self.error_flags.no_entry_point_found = false; - } + try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); - assert(!self.load_commands_dirty); + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); - if (self.code_signature) |*csig| { - try self.writeCodeSignature(csig); // code signing always comes last + if (codesig) |*csig| { + try self.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last } - if (build_options.enable_link_snapshots) { - if (self.base.options.enable_link_snapshots) - try self.snapshotState(); + if (self.d_sym) |*d_sym| { + // Flush debug symbols bundle. + try d_sym.flushModule(self.base.allocator, self.base.options); } + // if (build_options.enable_link_snapshots) { + // if (self.base.options.enable_link_snapshots) + // try self.snapshotState(); + // } + if (cache_miss) { // Update the file with the digest. If it fails we can continue; it only // means that the next invocation will have an unnecessary cache miss. @@ -708,6 +676,9 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) sub_prog_node.context.refresh(); defer sub_prog_node.end(); + const cpu_arch = self.base.options.target.cpu.arch; + const os_tag = self.base.options.target.os.tag; + const abi = self.base.options.target.abi; const is_lib = self.base.options.output_mode == .Lib; const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; @@ -990,40 +961,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) } } - // rpaths - var rpath_table = std.StringArrayHashMap(void).init(arena); - for (self.base.options.rpath_list) |rpath| { - if (rpath_table.contains(rpath)) continue; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = macho.emptyGenericCommandWithData(macho.rpath_command{ - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try gpa.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(gpa, .{ .rpath = rpath_cmd }); - try rpath_table.putNoClobber(rpath, {}); - self.load_commands_dirty = true; - } - - // code signature and entitlements - if (self.base.options.entitlements) |path| { - if (self.code_signature) |*csig| { - try csig.addEntitlements(gpa, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - } else { - var csig = CodeSignature.init(self.page_size); - try csig.addEntitlements(gpa, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - self.code_signature = csig; - } - } - if (self.base.options.verbose_link) { var argv = std.ArrayList([]const u8).init(arena); @@ -1048,7 +985,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try argv.append(syslibroot); } - for (rpath_table.keys()) |rpath| { + for (self.base.options.rpath_list) |rpath| { try argv.append("-rpath"); try argv.append(rpath); } @@ -1157,25 +1094,22 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, - }, .Dynamic).init(gpa); - defer dependent_libs.deinit(); + }, .Dynamic).init(arena); + try self.parseInputFiles(positionals.items, self.base.options.sysroot, &dependent_libs); try self.parseAndForceLoadStaticArchives(must_link_archives.keys()); try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); - for (self.objects.items) |*object, object_id| { - try self.resolveSymbolsInObject(object, @intCast(u16, object_id)); + for (self.objects.items) |_, object_id| { + try self.resolveSymbolsInObject(@intCast(u16, object_id)); } try self.resolveSymbolsInArchives(); try self.resolveDyldStubBinder(); - try self.createDyldPrivateAtom(); - try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); try self.createMhExecuteHeaderSymbol(); try self.createDsoHandleSymbol(); - try self.addCodeSignatureLC(); try self.resolveSymbolsAtLoading(); if (self.unresolved.count() > 0) { @@ -1188,7 +1122,13 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) return error.FrameworkNotFound; } + for (self.objects.items) |*object| { + try object.scanInputSections(self); + } + + try self.createDyldPrivateAtom(); try self.createTentativeDefAtoms(); + try self.createStubHelperPreambleAtom(); for (self.objects.items) |*object, object_id| { try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id)); @@ -1198,49 +1138,82 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try dead_strip.gcAtoms(self); } - try self.pruneAndSortSections(); try self.allocateSegments(); try self.allocateSymbols(); try self.allocateSpecialSymbols(); - if (build_options.enable_logging) { + if (build_options.enable_logging or true) { self.logSymtab(); - self.logSectionOrdinals(); + self.logSections(); self.logAtoms(); } try self.writeAtomsOneShot(); - if (self.rustc_section_index) |id| { - const sect = self.getSectionPtr(.{ - .seg = self.data_segment_cmd_index.?, - .sect = id, + var lc_buffer = std.ArrayList(u8).init(arena); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; + + try self.writeLinkeditSegmentData(&ncmds, lc_writer); + try writeDylinkerLC(&ncmds, lc_writer); + try self.writeMainLC(&ncmds, lc_writer); + try self.writeDylibIdLC(&ncmds, lc_writer); + try self.writeRpathLCs(&ncmds, lc_writer); + + { + try lc_writer.writeStruct(macho.source_version_command{ + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, }); - sect.size = self.rustc_section_size; + ncmds += 1; + } + + try self.writeBuildVersionLC(&ncmds, lc_writer); + + { + var uuid_lc = macho.uuid_command{ + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, + }; + std.crypto.random.bytes(&uuid_lc.uuid); + try lc_writer.writeStruct(uuid_lc); + ncmds += 1; } - try self.setEntryPoint(); - try self.writeLinkeditSegment(); + try self.writeLoadDylibLCs(&ncmds, lc_writer); - if (self.code_signature) |*csig| { - csig.clear(gpa); - csig.code_directory.ident = self.base.options.emit.?.sub_path; + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) break :blk true; + break :blk false; + }; + var codesig_offset: ?u32 = null; + var codesig: ?CodeSignature = if (requires_codesig) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values // written out to the file. // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. - try self.writeCodeSignaturePadding(csig); - } + var codesig = CodeSignature.init(self.page_size); + codesig.code_directory.ident = self.base.options.emit.?.sub_path; + if (self.base.options.entitlements) |path| { + try codesig.addEntitlements(arena, path); + } + codesig_offset = try self.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + break :blk codesig; + } else null; + + var headers_buf = std.ArrayList(u8).init(arena); + try self.writeSegmentHeaders(&ncmds, headers_buf.writer()); - try self.writeLoadCommands(); - try self.writeHeader(); + try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); - assert(!self.load_commands_dirty); + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); - if (self.code_signature) |*csig| { - try self.writeCodeSignature(csig); // code signing always comes last + if (codesig) |*csig| { + try self.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last } } @@ -1395,66 +1368,77 @@ fn resolveFramework( } fn parseObject(self: *MachO, path: []const u8) !bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; - errdefer file.close(); - - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + defer file.close(); + const name = try gpa.dupe(u8, path); + errdefer gpa.free(name); + const cpu_arch = self.base.options.target.cpu.arch; const mtime: u64 = mtime: { const stat = file.stat() catch break :mtime 0; break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); }; + const file_stat = try file.stat(); + const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); var object = Object{ .name = name, - .file = file, .mtime = mtime, + .contents = contents, }; - object.parse(self.base.allocator, self.base.options.target.cpu.arch) catch |err| switch (err) { + object.parse(gpa, cpu_arch) catch |err| switch (err) { error.EndOfStream, error.NotObject => { - object.deinit(self.base.allocator); + object.deinit(gpa); return false; }, else => |e| return e, }; - try self.objects.append(self.base.allocator, object); + try self.objects.append(gpa, object); return true; } fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; errdefer file.close(); - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + const name = try gpa.dupe(u8, path); + errdefer gpa.free(name); + const cpu_arch = self.base.options.target.cpu.arch; + const reader = file.reader(); + const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); + try reader.context.seekTo(fat_offset); var archive = Archive{ .name = name, + .fat_offset = fat_offset, .file = file, }; - archive.parse(self.base.allocator, self.base.options.target.cpu.arch) catch |err| switch (err) { + archive.parse(gpa, reader) catch |err| switch (err) { error.EndOfStream, error.NotArchive => { - archive.deinit(self.base.allocator); + archive.deinit(gpa); return false; }, else => |e| return e, }; if (force_load) { - defer archive.deinit(self.base.allocator); + defer archive.deinit(gpa); + defer file.close(); // Get all offsets from the ToC - var offsets = std.AutoArrayHashMap(u32, void).init(self.base.allocator); + var offsets = std.AutoArrayHashMap(u32, void).init(gpa); defer offsets.deinit(); for (archive.toc.values()) |offs| { for (offs.items) |off| { @@ -1462,15 +1446,11 @@ fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool { } } for (offsets.keys()) |off| { - const object = try self.objects.addOne(self.base.allocator); - object.* = try archive.parseObject( - self.base.allocator, - self.base.options.target.cpu.arch, - off, - ); + const object = try archive.parseObject(gpa, cpu_arch, off); + try self.objects.append(gpa, object); } } else { - try self.archives.append(self.base.allocator, archive); + try self.archives.append(gpa, archive); } return true; @@ -1481,6 +1461,7 @@ const ParseDylibError = error{ EmptyStubFile, MismatchedCpuArchitecture, UnsupportedCpuArchitecture, + EndOfStream, } || fs.File.OpenError || std.os.PReadError || Dylib.Id.ParseError; const DylibCreateOpts = struct { @@ -1497,43 +1478,53 @@ pub fn parseDylib( dependent_libs: anytype, opts: DylibCreateOpts, ) ParseDylibError!bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; - errdefer file.close(); + defer file.close(); + + const cpu_arch = self.base.options.target.cpu.arch; + const file_stat = try file.stat(); + var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + + const reader = file.reader(); + const fat_offset = math.cast(usize, try fat.getLibraryOffset(reader, cpu_arch)) orelse + return error.Overflow; + try file.seekTo(fat_offset); + file_size -= fat_offset; - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); + defer gpa.free(contents); const dylib_id = @intCast(u16, self.dylibs.items.len); - var dylib = Dylib{ - .name = name, - .file = file, - .weak = opts.weak, - }; + var dylib = Dylib{ .weak = opts.weak }; - dylib.parse( - self.base.allocator, - self.base.options.target.cpu.arch, + dylib.parseFromBinary( + gpa, + cpu_arch, dylib_id, dependent_libs, + path, + contents, ) catch |err| switch (err) { error.EndOfStream, error.NotDylib => { try file.seekTo(0); - var lib_stub = LibStub.loadFromFile(self.base.allocator, file) catch { - dylib.deinit(self.base.allocator); + var lib_stub = LibStub.loadFromFile(gpa, file) catch { + dylib.deinit(gpa); return false; }; defer lib_stub.deinit(); try dylib.parseFromStub( - self.base.allocator, + gpa, self.base.options.target, lib_stub, dylib_id, dependent_libs, + path, ); }, else => |e| return e, @@ -1547,13 +1538,13 @@ pub fn parseDylib( log.warn(" dylib version: {}", .{dylib.id.?.current_version}); // TODO maybe this should be an error and facilitate auto-cleanup? - dylib.deinit(self.base.allocator); + dylib.deinit(gpa); return false; } } - try self.dylibs.append(self.base.allocator, dylib); - try self.dylibs_map.putNoClobber(self.base.allocator, dylib.id.?.name, dylib_id); + try self.dylibs.append(gpa, dylib); + try self.dylibs_map.putNoClobber(gpa, dylib.id.?.name, dylib_id); const should_link_dylib_even_if_unreachable = blk: { if (self.base.options.dead_strip_dylibs and !opts.needed) break :blk false; @@ -1561,8 +1552,7 @@ pub fn parseDylib( }; if (should_link_dylib_even_if_unreachable) { - try self.addLoadDylibLC(dylib_id); - try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); + try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); } return true; @@ -1572,10 +1562,8 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const for (files) |file_name| { const full_path = full_path: { var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - const path = try fs.realpath(file_name, &buffer); - break :full_path try self.base.allocator.dupe(u8, path); + break :full_path try fs.realpath(file_name, &buffer); }; - defer self.base.allocator.free(full_path); log.debug("parsing input file path '{s}'", .{full_path}); if (try self.parseObject(full_path)) continue; @@ -1592,10 +1580,8 @@ fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !voi for (files) |file_name| { const full_path = full_path: { var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - const path = try fs.realpath(file_name, &buffer); - break :full_path try self.base.allocator.dupe(u8, path); + break :full_path try fs.realpath(file_name, &buffer); }; - defer self.base.allocator.free(full_path); log.debug("parsing and force loading static archive '{s}'", .{full_path}); if (try self.parseArchive(full_path, true)) continue; @@ -1669,574 +1655,161 @@ fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: any } } -pub const MatchingSection = struct { - seg: u16, - sect: u16, - - pub fn eql(this: MatchingSection, other: struct { - seg: ?u16, - sect: ?u16, - }) bool { - const seg = other.seg orelse return false; - const sect = other.sect orelse return false; - return this.seg == seg and this.sect == sect; - } -}; - -pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { +pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { const segname = sect.segName(); const sectname = sect.sectName(); - const res: ?MatchingSection = blk: { - switch (sect.type_()) { - macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { - if (self.text_const_section_index == null) { - self.text_const_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; - }, - macho.S_CSTRING_LITERALS => { - if (mem.eql(u8, sectname, "__objc_methname")) { - // TODO it seems the common values within the sections in objects are deduplicated/merged - // on merging the sections' contents. - if (self.objc_methname_section_index == null) { - self.objc_methname_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_methname", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methname_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_methtype")) { - if (self.objc_methtype_section_index == null) { - self.objc_methtype_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_methtype", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methtype_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classname")) { - if (self.objc_classname_section_index == null) { - self.objc_classname_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_classname", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_classname_section_index.?, - }; - } - - if (self.cstring_section_index == null) { - self.cstring_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__cstring", - sect.size, - sect.@"align", - .{ - .flags = macho.S_CSTRING_LITERALS, - }, - ); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.cstring_section_index.?, - }; - }, - macho.S_LITERAL_POINTERS => { - if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { - if (self.objc_selrefs_section_index == null) { - self.objc_selrefs_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_selrefs", - sect.size, - sect.@"align", - .{ - .flags = macho.S_LITERAL_POINTERS, - }, - ); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_selrefs_section_index.?, - }; - } else { - // TODO investigate - break :blk null; - } - }, - macho.S_MOD_INIT_FUNC_POINTERS => { - if (self.mod_init_func_section_index == null) { - self.mod_init_func_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__mod_init_func", - sect.size, - sect.@"align", - .{ - .flags = macho.S_MOD_INIT_FUNC_POINTERS, - }, - ); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_init_func_section_index.?, - }; - }, - macho.S_MOD_TERM_FUNC_POINTERS => { - if (self.mod_term_func_section_index == null) { - self.mod_term_func_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__mod_term_func", - sect.size, - sect.@"align", - .{ - .flags = macho.S_MOD_TERM_FUNC_POINTERS, - }, - ); - } + const res: ?u8 = blk: { + if (mem.eql(u8, "__LLVM", segname)) { + log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + break :blk null; + } - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_term_func_section_index.?, - }; - }, - macho.S_ZEROFILL => { - if (self.bss_section_index == null) { - self.bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__bss", - sect.size, - sect.@"align", - .{ - .flags = macho.S_ZEROFILL, - }, - ); - } + if (sect.isCode()) { + if (self.text_section_index == null) { + self.text_section_index = try self.initSection( + "__TEXT", + "__text", + sect.size, + sect.@"align", + .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); + } + break :blk self.text_section_index.?; + } - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; - }, - macho.S_THREAD_LOCAL_VARIABLES => { - if (self.tlv_section_index == null) { - self.tlv_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_vars", - sect.size, - sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }, - ); - } + if (sect.isDebug()) { + // TODO debug attributes + if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { + log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + } + break :blk null; + } - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_section_index.?, - }; + switch (sect.@"type"()) { + macho.S_4BYTE_LITERALS, + macho.S_8BYTE_LITERALS, + macho.S_16BYTE_LITERALS, + => { + break :blk self.getSectionByName("__TEXT", "__const") orelse try self.initSection( + "__TEXT", + "__const", + sect.size, + sect.@"align", + .{}, + ); }, - macho.S_THREAD_LOCAL_VARIABLE_POINTERS => { - if (self.tlv_ptrs_section_index == null) { - self.tlv_ptrs_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_ptrs", + macho.S_CSTRING_LITERALS => { + if (mem.startsWith(u8, sectname, "__objc")) { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, sect.size, sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - }, + .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_ptrs_section_index.?, - }; + break :blk self.getSectionByName("__TEXT", "__cstring") orelse try self.initSection( + "__TEXT", + "__cstring", + sect.size, + sect.@"align", + .{ .flags = macho.S_CSTRING_LITERALS }, + ); }, - macho.S_THREAD_LOCAL_REGULAR => { - if (self.tlv_data_section_index == null) { - self.tlv_data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_data", - sect.size, - sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }, - ); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_data_section_index.?, - }; + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => { + break :blk self.getSectionByName("__DATA_CONST", sectname) orelse try self.initSection( + "__DATA_CONST", + sectname, + sect.size, + sect.@"align", + .{ .flags = sect.flags }, + ); }, - macho.S_THREAD_LOCAL_ZEROFILL => { - if (self.tlv_bss_section_index == null) { - self.tlv_bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_bss", - sect.size, - sect.@"align", - .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }, - ); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_bss_section_index.?, - }; + macho.S_LITERAL_POINTERS, + macho.S_ZEROFILL, + macho.S_THREAD_LOCAL_VARIABLES, + macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + macho.S_THREAD_LOCAL_REGULAR, + macho.S_THREAD_LOCAL_ZEROFILL, + => { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + sect.size, + sect.@"align", + .{ .flags = sect.flags }, + ); }, macho.S_COALESCED => { - if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { - // TODO I believe __eh_frame is currently part of __unwind_info section - // in the latest ld64 output. - if (self.eh_frame_section_index == null) { - self.eh_frame_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__eh_frame", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.eh_frame_section_index.?, - }; - } - - // TODO audit this: is this the right mapping? - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + sect.size, + sect.@"align", + .{}, + ); }, macho.S_REGULAR => { - if (sect.isCode()) { - if (self.text_section_index == null) { - self.text_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__text", - sect.size, - sect.@"align", - .{ - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | - macho.S_ATTR_SOME_INSTRUCTIONS, - }, - ); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; - } - if (sect.isDebug()) { - // TODO debug attributes - if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { - log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - } - break :blk null; - } - if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__ustring")) { - if (self.ustring_section_index == null) { - self.ustring_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__ustring", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.ustring_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { - if (self.gcc_except_tab_section_index == null) { - self.gcc_except_tab_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__gcc_except_tab", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.gcc_except_tab_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_methlist")) { - if (self.objc_methlist_section_index == null) { - self.objc_methlist_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__objc_methlist", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methlist_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__rodata") or + if (mem.eql(u8, sectname, "__rodata") or mem.eql(u8, sectname, "__typelink") or mem.eql(u8, sectname, "__itablink") or mem.eql(u8, sectname, "__gosymtab") or mem.eql(u8, sectname, "__gopclntab")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - } else { - if (self.text_const_section_index == null) { - self.text_const_section_index = try self.initSection( - self.text_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; - } - } - - if (mem.eql(u8, segname, "__DATA_CONST")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, + break :blk self.getSectionByName("__DATA_CONST", "__const") orelse try self.initSection( + "__DATA_CONST", "__const", sect.size, sect.@"align", .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; } - if (mem.eql(u8, segname, "__DATA")) { - if (mem.eql(u8, sectname, "__const")) { - if (self.data_const_section_index == null) { - self.data_const_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__const", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__cfstring")) { - if (self.objc_cfstring_section_index == null) { - self.objc_cfstring_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__cfstring", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_cfstring_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classlist")) { - if (self.objc_classlist_section_index == null) { - self.objc_classlist_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__objc_classlist", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_classlist_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { - if (self.objc_imageinfo_section_index == null) { - self.objc_imageinfo_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, - "__objc_imageinfo", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_imageinfo_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_const")) { - if (self.objc_const_section_index == null) { - self.objc_const_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_const", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_const_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_classrefs")) { - if (self.objc_classrefs_section_index == null) { - self.objc_classrefs_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_classrefs", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_classrefs_section_index.?, - }; - } else if (mem.eql(u8, sectname, "__objc_data")) { - if (self.objc_data_section_index == null) { - self.objc_data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__objc_data", - sect.size, - sect.@"align", - .{}, - ); - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_data_section_index.?, - }; - } else if (mem.eql(u8, sectname, ".rustc")) { - if (self.rustc_section_index == null) { - self.rustc_section_index = try self.initSection( - self.data_segment_cmd_index.?, - ".rustc", - sect.size, - sect.@"align", - .{}, - ); - // We need to preserve the section size for rustc to properly - // decompress the metadata. - self.rustc_section_size = sect.size; - } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.rustc_section_index.?, - }; - } else { + if (mem.eql(u8, sectname, "__const") or + mem.eql(u8, sectname, "__cfstring") or + mem.eql(u8, sectname, "__objc_classlist") or + mem.eql(u8, sectname, "__objc_imageinfo")) + { + break :blk self.getSectionByName("__DATA_CONST", sectname) orelse + try self.initSection( + "__DATA_CONST", + sectname, + sect.size, + sect.@"align", + .{}, + ); + } else if (mem.eql(u8, sectname, "__data")) { if (self.data_section_index == null) { self.data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__data", + segname, + sectname, sect.size, sect.@"align", .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } } - - if (mem.eql(u8, "__LLVM", segname) and mem.eql(u8, "__asm", sectname)) { - log.debug("TODO LLVM asm section: type 0x{x}, name '{s},{s}'", .{ - sect.flags, segname, sectname, - }); - } - - break :blk null; + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + sect.size, + sect.@"align", + .{}, + ); }, else => break :blk null, } @@ -2259,30 +1832,33 @@ pub fn createEmptyAtom(gpa: Allocator, sym_index: u32, size: u64, alignment: u32 return atom; } -pub fn writeAtom(self: *MachO, atom: *Atom, match: MatchingSection) !void { - const sect = self.getSection(match); +pub fn writeAtom(self: *MachO, atom: *Atom, sect_id: u8) !void { + const section = self.sections.get(sect_id); const sym = atom.getSymbol(self); - const file_offset = sect.offset + sym.n_value - sect.addr; + const file_offset = section.header.offset + sym.n_value - section.header.addr; try atom.resolveRelocs(self); log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); } fn allocateSymbols(self: *MachO) !void { - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last_atom, sect_id| { + const header = slice.items(.header)[sect_id]; + var atom = last_atom orelse continue; while (atom.prev) |prev| { atom = prev; } - const n_sect = self.getSectionOrdinal(match); - const sect = self.getSection(match); - var base_vaddr = sect.addr; + const n_sect = @intCast(u8, sect_id + 1); + var base_vaddr = header.addr; - log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ n_sect, sect.segName(), sect.sectName() }); + log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ + n_sect, + header.segName(), + header.sectName(), + }); while (true) { const alignment = try math.powi(u32, 2, atom.alignment); @@ -2296,7 +1872,10 @@ fn allocateSymbols(self: *MachO) !void { // Update each symbol contained within the atom for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); contained_sym.n_value = base_vaddr + sym_at_off.offset; contained_sym.n_sect = n_sect; } @@ -2310,24 +1889,6 @@ fn allocateSymbols(self: *MachO) !void { } } -fn shiftLocalsByOffset(self: *MachO, match: MatchingSection, offset: i64) !void { - var atom = self.atoms.get(match) orelse return; - - while (true) { - const atom_sym = atom.getSymbolPtr(self); - atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); - - for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); - contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); - } - - if (atom.prev) |prev| { - atom = prev; - } else break; - } -} - fn allocateSpecialSymbols(self: *MachO) !void { for (&[_][]const u8{ "___dso_handle", @@ -2336,16 +1897,13 @@ fn allocateSpecialSymbols(self: *MachO) !void { const global = self.globals.get(name) orelse continue; if (global.file != null) continue; const sym = self.getSymbolPtr(global); - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - sym.n_sect = self.getSectionOrdinal(.{ - .seg = self.text_segment_cmd_index.?, - .sect = 0, - }); - sym.n_value = seg.inner.vmaddr; + const seg = self.segments.items[self.text_segment_cmd_index.?]; + sym.n_sect = 1; + sym.n_value = seg.vmaddr; log.debug("allocating {s} at the start of {s}", .{ name, - seg.inner.segName(), + seg.segName(), }); } } @@ -2353,18 +1911,21 @@ fn allocateSpecialSymbols(self: *MachO) !void { fn writeAtomsOneShot(self: *MachO) !void { assert(self.mode == .one_shot); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const sect = self.getSection(entry.key_ptr.*); - var atom: *Atom = entry.value_ptr.*; + const gpa = self.base.allocator; + const slice = self.sections.slice(); + + for (slice.items(.last_atom)) |last_atom, sect_id| { + const header = slice.items(.header)[sect_id]; + if (header.size == 0) continue; + var atom = last_atom.?; - if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; + if (header.isZerofill()) continue; - var buffer = std.ArrayList(u8).init(self.base.allocator); + var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); - try buffer.ensureTotalCapacity(math.cast(usize, sect.size) orelse return error.Overflow); + try buffer.ensureTotalCapacity(math.cast(usize, header.size) orelse return error.Overflow); - log.debug("writing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); while (atom.prev) |prev| { atom = prev; @@ -2399,18 +1960,18 @@ fn writeAtomsOneShot(self: *MachO) !void { if (atom.next) |next| { atom = next; } else { - assert(buffer.items.len == sect.size); - log.debug(" (writing at file offset 0x{x})", .{sect.offset}); - try self.base.file.?.pwriteAll(buffer.items, sect.offset); + assert(buffer.items.len == header.size); + log.debug(" (writing at file offset 0x{x})", .{header.offset}); + try self.base.file.?.pwriteAll(buffer.items, header.offset); break; } } } } -fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anytype) !void { - const is_code = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; - const min_alignment: u3 = if (!is_code) +fn writePadding(self: *MachO, sect_id: u8, size: usize, writer: anytype) !void { + const header = self.sections.items(.header)[sect_id]; + const min_alignment: u3 = if (!header.isCode()) 1 else switch (self.base.options.target.cpu.arch) { .aarch64 => @sizeOf(u32), @@ -2421,7 +1982,7 @@ fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anyty const len = @divExact(size, min_alignment); var i: usize = 0; while (i < len) : (i += 1) { - if (!is_code) { + if (!header.isCode()) { try writer.writeByte(0); } else switch (self.base.options.target.cpu.arch) { .aarch64 => { @@ -2439,20 +2000,19 @@ fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anyty fn writeAtomsIncremental(self: *MachO) !void { assert(self.mode == .incremental); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - const sect = self.getSection(match); - var atom: *Atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last, i| { + var atom: *Atom = last orelse continue; + const sect_i = @intCast(u8, i); + const header = slice.items(.header)[sect_i]; - // TODO handle zerofill in stage2 - // if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; + if (header.isZerofill()) continue; - log.debug("writing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); while (true) { if (atom.dirty) { - try self.writeAtom(atom, match); + try self.writeAtom(atom, sect_i); atom.dirty = false; } @@ -2503,10 +2063,7 @@ pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }); + try self.allocateAtomCommon(atom, self.got_section_index.?); return atom; } @@ -2535,7 +2092,7 @@ pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - const match = (try self.getMatchingSection(.{ + const match = (try self.getOutputSection(.{ .segname = makeStaticString("__DATA"), .sectname = makeStaticString("__thread_ptrs"), .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, @@ -2561,10 +2118,7 @@ fn createDyldPrivateAtom(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); self.dyld_private_atom = atom; - try self.allocateAtomCommon(atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }); + try self.allocateAtomCommon(atom, self.data_section_index.?); try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); @@ -2692,10 +2246,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { } self.stub_helper_preamble_atom = atom; - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stub_helper_section_index.?); try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); @@ -2771,10 +2322,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stub_helper_section_index.?); return atom; } @@ -2814,10 +2362,7 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWi try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }); + try self.allocateAtomCommon(atom, self.la_symbol_ptr_section_index.?); return atom; } @@ -2896,10 +2441,7 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stubs_section_index.?); return atom; } @@ -2917,19 +2459,18 @@ fn createTentativeDefAtoms(self: *MachO) !void { // Convert any tentative definition into a regular symbol and allocate // text blocks for each tentative definition. - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(gpa, match); - const size = sym.n_value; const alignment = (sym.n_desc >> 8) & 0x0f; + const n_sect = (try self.getOutputSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__bss"), + .flags = macho.S_ZEROFILL, + })).?; sym.* = .{ .n_strx = sym.n_strx, .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = 0, + .n_sect = n_sect, .n_desc = 0, .n_value = 0, }; @@ -2937,7 +2478,7 @@ fn createTentativeDefAtoms(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, global.sym_index, size, alignment); atom.file = global.file; - try self.allocateAtomCommon(atom, match); + try self.allocateAtomCommon(atom, n_sect); if (global.file) |file| { const object = &self.objects.items[file]; @@ -3060,7 +2601,8 @@ fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { gop.value_ptr.* = current; } -fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { +fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { + const object = &self.objects.items[object_id]; log.debug("resolving symbols in '{s}'", .{object.name}); for (object.symtab.items) |sym, index| { @@ -3115,6 +2657,8 @@ fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { fn resolveSymbolsInArchives(self: *MachO) !void { if (self.archives.items.len == 0) return; + const gpa = self.base.allocator; + const cpu_arch = self.base.options.target.cpu.arch; var next_sym: usize = 0; loop: while (next_sym < self.unresolved.count()) { const global = self.globals.values()[self.unresolved.keys()[next_sym]]; @@ -3129,13 +2673,9 @@ fn resolveSymbolsInArchives(self: *MachO) !void { assert(offsets.items.len > 0); const object_id = @intCast(u16, self.objects.items.len); - const object = try self.objects.addOne(self.base.allocator); - object.* = try archive.parseObject( - self.base.allocator, - self.base.options.target.cpu.arch, - offsets.items[0], - ); - try self.resolveSymbolsInObject(object, object_id); + const object = try archive.parseObject(gpa, cpu_arch, offsets.items[0]); + try self.objects.append(gpa, object); + try self.resolveSymbolsInObject(object_id); continue :loop; } @@ -3159,7 +2699,6 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { - try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -3257,7 +2796,6 @@ fn resolveDyldStubBinder(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { - try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -3280,47 +2818,192 @@ fn resolveDyldStubBinder(self: *MachO) !void { self.got_entries.items[got_index].sym_index = got_atom.sym_index; } -fn addLoadDylibLC(self: *MachO, id: u16) !void { - const dylib = self.dylibs.items[id]; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try macho.createLoadDylibCommand( - self.base.allocator, - if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .dylib = dylib_cmd }); - self.load_commands_dirty = true; +fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { + const name_len = mem.sliceTo(default_dyld_path, 0).len; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylinker_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylinker_command{ + .cmd = .LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), + }); + try lc_writer.writeAll(mem.sliceTo(default_dyld_path, 0)); + const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; } -fn addCodeSignatureLC(self: *MachO) !void { - if (self.code_signature_cmd_index != null or self.code_signature == null) return; - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, +fn writeMainLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + if (self.base.options.output_mode != .Exe) return; + const seg = self.segments.items[self.text_segment_cmd_index.?]; + const global = try self.getEntryPoint(); + const sym = self.getSymbol(global); + try lc_writer.writeStruct(macho.entry_point_command{ + .cmd = .MAIN, + .cmdsize = @sizeOf(macho.entry_point_command), + .entryoff = @intCast(u32, sym.n_value - seg.vmaddr), + .stacksize = self.base.options.stack_size_override orelse 0, + }); + ncmds.* += 1; +} + +const WriteDylibLCCtx = struct { + cmd: macho.LC, + name: []const u8, + timestamp: u32 = 2, + current_version: u32 = 0x10000, + compatibility_version: u32 = 0x10000, +}; + +fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { + const name_len = ctx.name.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylib_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylib_command{ + .cmd = ctx.cmd, + .cmdsize = cmdsize, + .dylib = .{ + .name = @sizeOf(macho.dylib_command), + .timestamp = ctx.timestamp, + .current_version = ctx.current_version, + .compatibility_version = ctx.compatibility_version, }, }); - self.load_commands_dirty = true; + try lc_writer.writeAll(ctx.name); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; } -fn setEntryPoint(self: *MachO) !void { - if (self.base.options.output_mode != .Exe) return; +fn writeDylibIdLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + if (self.base.options.output_mode != .Lib) return; + const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; + const curr = self.base.options.version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + const compat = self.base.options.compatibility_version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + try writeDylibLC(.{ + .cmd = .ID_DYLIB, + .name = install_name, + .current_version = curr.major << 16 | curr.minor << 8 | curr.patch, + .compatibility_version = compat.major << 16 | compat.minor << 8 | compat.patch, + }, ncmds, lc_writer); +} - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const global = try self.getEntryPoint(); - const sym = self.getSymbol(global); - const ec = &self.load_commands.items[self.main_cmd_index.?].main; - ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); - ec.stacksize = self.base.options.stack_size_override orelse 0; - self.entry_addr = sym.n_value; - self.load_commands_dirty = true; +const RpathIterator = struct { + buffer: []const []const u8, + table: std.StringHashMap(void), + count: usize = 0, + + fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator { + return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) }; + } + + fn deinit(it: *RpathIterator) void { + it.table.deinit(); + } + + fn next(it: *RpathIterator) !?[]const u8 { + while (true) { + if (it.count >= it.buffer.len) return null; + const rpath = it.buffer[it.count]; + it.count += 1; + const gop = try it.table.getOrPut(rpath); + if (gop.found_existing) continue; + return rpath; + } + } +}; + +fn writeRpathLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const gpa = self.base.allocator; + + var it = RpathIterator.init(gpa, self.base.options.rpath_list); + defer it.deinit(); + + while (try it.next()) |rpath| { + const rpath_len = rpath.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.rpath_command{ + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + try lc_writer.writeAll(rpath); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; + } +} + +fn writeBuildVersionLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + const platform_version = blk: { + const ver = self.base.options.target.os.version_range.semver.min; + const platform_version = ver.major << 16 | ver.minor << 8; + break :blk platform_version; + }; + const sdk_version = if (self.base.options.native_darwin_sdk) |sdk| blk: { + const ver = sdk.version; + const sdk_version = ver.major << 16 | ver.minor << 8; + break :blk sdk_version; + } else platform_version; + const is_simulator_abi = self.base.options.target.abi == .simulator; + try lc_writer.writeStruct(macho.build_version_command{ + .cmdsize = cmdsize, + .platform = switch (self.base.options.target.os.tag) { + .macos => .MACOS, + .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, + .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, + .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, + else => unreachable, + }, + .minos = platform_version, + .sdk = sdk_version, + .ntools = 1, + }); + try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ + .tool = .LD, + .version = 0x0, + })); + ncmds.* += 1; +} + +fn writeLoadDylibLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + for (self.referenced_dylibs.keys()) |id| { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + try writeDylibLC(.{ + .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, + .name = dylib_id.name, + .timestamp = dylib_id.timestamp, + .current_version = dylib_id.current_version, + .compatibility_version = dylib_id.compatibility_version, + }, ncmds, lc_writer); + } } pub fn deinit(self: *MachO) void { @@ -3334,7 +3017,6 @@ pub fn deinit(self: *MachO) void { d_sym.deinit(gpa); } - self.section_ordinals.deinit(gpa); self.tlv_ptr_entries.deinit(gpa); self.tlv_ptr_entries_free_list.deinit(gpa); self.tlv_ptr_entries_table.deinit(gpa); @@ -3371,24 +3053,19 @@ pub fn deinit(self: *MachO) void { self.dylibs_map.deinit(gpa); self.referenced_dylibs.deinit(gpa); - for (self.load_commands.items) |*lc| { - lc.deinit(gpa); + self.segments.deinit(gpa); + + for (self.sections.items(.free_list)) |*list| { + list.deinit(gpa); } - self.load_commands.deinit(gpa); + self.sections.deinit(gpa); for (self.managed_atoms.items) |atom| { atom.deinit(gpa); gpa.destroy(atom); } self.managed_atoms.deinit(gpa); - self.atoms.deinit(gpa); - { - var it = self.atom_free_lists.valueIterator(); - while (it.next()) |free_list| { - free_list.deinit(gpa); - } - self.atom_free_lists.deinit(gpa); - } + if (self.base.options.module) |mod| { for (self.decls.keys()) |decl_index| { const decl = mod.declPtr(decl_index); @@ -3408,34 +3085,24 @@ pub fn deinit(self: *MachO) void { } self.atom_by_index_table.deinit(gpa); - - if (self.code_signature) |*csig| { - csig.deinit(gpa); - } } pub fn closeFiles(self: MachO) void { - for (self.objects.items) |object| { - object.file.close(); - } for (self.archives.items) |archive| { archive.file.close(); } - for (self.dylibs.items) |dylib| { - dylib.file.close(); - } if (self.d_sym) |ds| { ds.file.close(); } } -fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) void { +fn freeAtom(self: *MachO, atom: *Atom, sect_id: u8, owns_atom: bool) void { log.debug("freeAtom {*}", .{atom}); if (!owns_atom) { atom.deinit(self.base.allocator); } - const free_list = self.atom_free_lists.getPtr(match).?; + const free_list = &self.sections.items(.free_list)[sect_id]; var already_have_free_list_node = false; { var i: usize = 0; @@ -3452,13 +3119,14 @@ fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) } } - if (self.atoms.getPtr(match)) |last_atom| { - if (last_atom.* == atom) { + const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; + if (maybe_last_atom.*) |last_atom| { + if (last_atom == atom) { if (atom.prev) |prev| { // TODO shrink the section size here - last_atom.* = prev; + maybe_last_atom.* = prev; } else { - _ = self.atoms.fetchRemove(match); + maybe_last_atom.* = null; } } } @@ -3486,21 +3154,21 @@ fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) } } -fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, match: MatchingSection) void { +fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, sect_id: u8) void { _ = self; _ = atom; _ = new_block_size; - _ = match; + _ = sect_id; // TODO check the new capacity, and if it crosses the size threshold into a big enough // capacity, insert a free list node for it. } -fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { +fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, sect_id: u8) !u64 { const sym = atom.getSymbol(self); const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; const need_realloc = !align_ok or new_atom_size > atom.capacity(self); if (!need_realloc) return sym.n_value; - return self.allocateAtom(atom, new_atom_size, alignment, match); + return self.allocateAtom(atom, new_atom_size, alignment, sect_id); } fn allocateSymbol(self: *MachO) !u32 { @@ -3671,10 +3339,11 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv } pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Module.Decl.Index) !u32 { - var code_buffer = std.ArrayList(u8).init(self.base.allocator); + const gpa = self.base.allocator; + + var code_buffer = std.ArrayList(u8).init(gpa); defer code_buffer.deinit(); - const gpa = self.base.allocator; const module = self.base.options.module.?; const gop = try self.unnamed_const_atoms.getOrPut(gpa, decl_index); if (!gop.found_existing) { @@ -3725,25 +3394,25 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu atom.code.clearRetainingCapacity(); try atom.code.appendSlice(gpa, code); - const match = try self.getMatchingSectionAtom( + const sect_id = try self.getOutputSectionAtom( atom, decl_name, typed_value.ty, typed_value.val, required_alignment, ); - const addr = try self.allocateAtom(atom, code.len, required_alignment, match); + const addr = try self.allocateAtom(atom, code.len, required_alignment, sect_id); log.debug("allocated atom for {?s} at 0x{x}", .{ name, addr }); log.debug(" (required alignment 0x{x})", .{required_alignment}); - errdefer self.freeAtom(atom, match, true); + errdefer self.freeAtom(atom, sect_id, true); const symbol = atom.getSymbolPtr(self); symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = self.getSectionOrdinal(match), + .n_sect = sect_id + 1, .n_desc = 0, .n_value = addr, }; @@ -3894,44 +3563,40 @@ fn needsPointerRebase(ty: Type, val: Value, mod: *Module) bool { } } -fn getMatchingSectionAtom( +fn getOutputSectionAtom( self: *MachO, atom: *Atom, name: []const u8, ty: Type, val: Value, alignment: u32, -) !MatchingSection { +) !u8 { const code = atom.code.items; const mod = self.base.options.module.?; const align_log_2 = math.log2(alignment); const zig_ty = ty.zigTypeTag(); const mode = self.base.options.optimize_mode; - const match: MatchingSection = blk: { + const sect_id: u8 = blk: { // TODO finish and audit this function if (val.isUndefDeep()) { if (mode == .ReleaseFast or mode == .ReleaseSmall) { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + break :blk (try self.getOutputSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__bss"), + .size = code.len, + .@"align" = align_log_2, + })).?; } else { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } } if (val.castTag(.variable)) |_| { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } if (needsPointerRebase(ty, val, mod)) { - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__DATA_CONST"), .sectname = makeStaticString("__const"), .size = code.len, @@ -3941,10 +3606,7 @@ fn getMatchingSectionAtom( switch (zig_ty) { .Fn => { - break :blk MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; + break :blk self.text_section_index.?; }, .Array => { if (val.tag() == .bytes) { @@ -3953,7 +3615,7 @@ fn getMatchingSectionAtom( .const_slice_u8_sentinel_0, .manyptr_const_u8_sentinel_0, => { - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__TEXT"), .sectname = makeStaticString("__cstring"), .flags = macho.S_CSTRING_LITERALS, @@ -3967,22 +3629,21 @@ fn getMatchingSectionAtom( }, else => {}, } - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__TEXT"), .sectname = makeStaticString("__const"), .size = code.len, .@"align" = align_log_2, })).?; }; - const sect = self.getSection(match); - log.debug(" allocating atom '{s}' in '{s},{s}' ({d},{d})", .{ + const header = self.sections.items(.header)[sect_id]; + log.debug(" allocating atom '{s}' in '{s},{s}', ord({d})", .{ name, - sect.segName(), - sect.sectName(), - match.seg, - match.sect, + header.segName(), + header.sectName(), + sect_id, }); - return match; + return sect_id; } fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 { @@ -3996,7 +3657,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 const decl_ptr = self.decls.getPtr(decl_index).?; if (decl_ptr.* == null) { - decl_ptr.* = try self.getMatchingSectionAtom( + decl_ptr.* = try self.getOutputSectionAtom( &decl.link.macho, sym_name, decl.ty, @@ -4045,7 +3706,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = self.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = addr, }; @@ -4134,10 +3795,7 @@ pub fn updateDeclExports( sym.* = .{ .n_strx = try self.strtab.insert(gpa, exp_name), .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = self.getSectionOrdinal(.{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, // TODO what if we export a variable? - }), + .n_sect = self.text_section_index.? + 1, // TODO what if we export a variable? .n_desc = 0, .n_value = decl_sym.n_value, }; @@ -4208,10 +3866,10 @@ pub fn deleteExport(self: *MachO, exp: Export) void { fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { - self.freeAtom(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }, true); + // TODO + // const sect_id = atom.getSymbol(self).n_sect; + const sect_id = self.getSectionByName("__TEXT", "__const").?; + self.freeAtom(atom, sect_id, true); self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; self.locals.items[atom.sym_index].n_type = 0; _ = self.atom_by_index_table.remove(atom.sym_index); @@ -4294,6 +3952,7 @@ pub fn getDeclVAddr(self: *MachO, decl_index: Module.Decl.Index, reloc_info: Fil } fn populateMissingMetadata(self: *MachO) !void { + const gpa = self.base.allocator; const cpu_arch = self.base.options.target.cpu.arch; const pagezero_vmsize = self.base.options.pagezero_size orelse default_pagezero_vmsize; const aligned_pagezero_vmsize = mem.alignBackwardGeneric(u64, pagezero_vmsize, self.page_size); @@ -4305,21 +3964,16 @@ fn populateMissingMetadata(self: *MachO) !void { log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); } - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__PAGEZERO"), - .vmsize = aligned_pagezero_vmsize, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + self.pagezero_segment_cmd_index = @intCast(u8, self.segments.items.len); + try self.segments.append(gpa, .{ + .segname = makeStaticString("__PAGEZERO"), + .vmsize = aligned_pagezero_vmsize, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.text_segment_cmd_index = @intCast(u8, self.segments.items.len); const needed_size = if (self.mode == .incremental) blk: { const headerpad_size = @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size); const program_code_size_hint = self.base.options.program_code_size_hint; @@ -4329,20 +3983,15 @@ fn populateMissingMetadata(self: *MachO) !void { log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); break :blk needed_size; } else 0; - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__TEXT"), - .vmaddr = aligned_pagezero_vmsize, - .vmsize = needed_size, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.EXEC, - .initprot = macho.PROT.READ | macho.PROT.EXEC, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__TEXT"), + .vmaddr = aligned_pagezero_vmsize, + .vmsize = needed_size, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.EXEC, + .initprot = macho.PROT.READ | macho.PROT.EXEC, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.text_section_index == null) { @@ -4353,7 +4002,7 @@ fn populateMissingMetadata(self: *MachO) !void { }; const needed_size = if (self.mode == .incremental) self.base.options.program_code_size_hint else 0; self.text_section_index = try self.initSection( - self.text_segment_cmd_index.?, + "__TEXT", "__text", needed_size, alignment, @@ -4376,7 +4025,7 @@ fn populateMissingMetadata(self: *MachO) !void { }; const needed_size = if (self.mode == .incremental) stub_size * self.base.options.symbol_count_hint else 0; self.stubs_section_index = try self.initSection( - self.text_segment_cmd_index.?, + "__TEXT", "__stubs", needed_size, alignment, @@ -4408,7 +4057,7 @@ fn populateMissingMetadata(self: *MachO) !void { else 0; self.stub_helper_section_index = try self.initSection( - self.text_segment_cmd_index.?, + "__TEXT", "__stub_helper", needed_size, alignment, @@ -4419,7 +4068,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.data_const_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; @@ -4434,21 +4083,16 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff + needed_size, }); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DATA_CONST"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__DATA_CONST"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.WRITE, + .initprot = macho.PROT.READ | macho.PROT.WRITE, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.got_section_index == null) { @@ -4458,7 +4102,7 @@ fn populateMissingMetadata(self: *MachO) !void { 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.got_section_index = try self.initSection( - self.data_const_segment_cmd_index.?, + "__DATA_CONST", "__got", needed_size, alignment, @@ -4469,7 +4113,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.data_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; @@ -4484,21 +4128,16 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff + needed_size, }); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DATA"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__DATA"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.WRITE, + .initprot = macho.PROT.READ | macho.PROT.WRITE, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.la_symbol_ptr_section_index == null) { @@ -4508,7 +4147,7 @@ fn populateMissingMetadata(self: *MachO) !void { 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.la_symbol_ptr_section_index = try self.initSection( - self.data_segment_cmd_index.?, + "__DATA", "__la_symbol_ptr", needed_size, alignment, @@ -4525,7 +4164,7 @@ fn populateMissingMetadata(self: *MachO) !void { 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.data_section_index = try self.initSection( - self.data_segment_cmd_index.?, + "__DATA", "__data", needed_size, alignment, @@ -4533,76 +4172,8 @@ fn populateMissingMetadata(self: *MachO) !void { ); } - if (self.tlv_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.tlv_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_vars", - needed_size, - alignment, - .{ - .flags = macho.S_THREAD_LOCAL_VARIABLES, - }, - ); - } - - if (self.tlv_data_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.tlv_data_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_data", - needed_size, - alignment, - .{ - .flags = macho.S_THREAD_LOCAL_REGULAR, - }, - ); - } - - if (self.tlv_bss_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.tlv_bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__thread_bss", - needed_size, - alignment, - .{ - .flags = macho.S_THREAD_LOCAL_ZEROFILL, - }, - ); - } - - if (self.bss_section_index == null) { - const needed_size = if (self.mode == .incremental) - @sizeOf(u64) * self.base.options.symbol_count_hint - else - 0; - const alignment: u16 = 3; // 2^3 = @sizeOf(u64) - self.bss_section_index = try self.initSection( - self.data_segment_cmd_index.?, - "__bss", - needed_size, - alignment, - .{ - .flags = macho.S_ZEROFILL, - }, - ); - } - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; if (self.mode == .incremental) { @@ -4611,249 +4182,113 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff = base.fileoff; log.debug("found __LINKEDIT segment free space at 0x{x}", .{fileoff}); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__LINKEDIT"), - .vmaddr = vmaddr, - .fileoff = fileoff, - .maxprot = macho.PROT.READ, - .initprot = macho.PROT.READ, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, - }); - self.load_commands_dirty = true; - } - - if (self.dyld_info_cmd_index == null) { - self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .dyld_info_only = .{ - .cmd = .DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = 0, - .rebase_size = 0, - .bind_off = 0, - .bind_size = 0, - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = 0, - .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__LINKEDIT"), + .vmaddr = vmaddr, + .fileoff = fileoff, + .maxprot = macho.PROT.READ, + .initprot = macho.PROT.READ, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .symtab = .{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.dysymtab_cmd_index == null) { - self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .dysymtab = .{ - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }, - }); - self.load_commands_dirty = true; } +} - if (self.dylinker_cmd_index == null) { - self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + mem.sliceTo(default_dyld_path, 0).len, - @sizeOf(u64), - )); - var dylinker_cmd = macho.emptyGenericCommandWithData(macho.dylinker_command{ - .cmd = .LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); - mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.sliceTo(default_dyld_path, 0)); - try self.load_commands.append(self.base.allocator, .{ .dylinker = dylinker_cmd }); - self.load_commands_dirty = true; - } - - if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { - self.main_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .main = .{ - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = 0x0, - .stacksize = 0, - }, - }); - self.load_commands_dirty = true; - } +inline fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool) u64 { + const name_len = if (assume_max_path_len) std.os.PATH_MAX else std.mem.len(name) + 1; + return mem.alignForwardGeneric(u64, cmd_size + name_len, @alignOf(u64)); +} - if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { - self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); - const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; - const current_version = self.base.options.version orelse - std.builtin.Version{ .major = 1, .minor = 0, .patch = 0 }; - const compat_version = self.base.options.compatibility_version orelse - std.builtin.Version{ .major = 1, .minor = 0, .patch = 0 }; - var dylib_cmd = try macho.createLoadDylibCommand( - self.base.allocator, - .ID_DYLIB, - install_name, - 2, - current_version.major << 16 | current_version.minor << 8 | current_version.patch, - compat_version.major << 16 | compat_version.minor << 8 | compat_version.patch, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .dylib = dylib_cmd }); - self.load_commands_dirty = true; +fn calcLCsSize(self: *MachO, assume_max_path_len: bool) !u32 { + const gpa = self.base.allocator; + var sizeofcmds: u64 = 0; + for (self.segments.items) |seg| { + sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64); + } + + // LC_DYLD_INFO_ONLY + sizeofcmds += @sizeOf(macho.dyld_info_command); + // LC_FUNCTION_STARTS + if (self.text_section_index != null) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + // LC_DATA_IN_CODE + sizeofcmds += @sizeOf(macho.linkedit_data_command); + // LC_SYMTAB + sizeofcmds += @sizeOf(macho.symtab_command); + // LC_DYSYMTAB + sizeofcmds += @sizeOf(macho.dysymtab_command); + // LC_LOAD_DYLINKER + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylinker_command), + mem.sliceTo(default_dyld_path, 0), + false, + ); + // LC_MAIN + if (self.base.options.output_mode == .Exe) { + sizeofcmds += @sizeOf(macho.entry_point_command); + } + // LC_ID_DYLIB + if (self.base.options.output_mode == .Lib) { + sizeofcmds += blk: { + const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; + break :blk calcInstallNameLen( + @sizeOf(macho.dylib_command), + install_name, + assume_max_path_len, + ); + }; } - - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .source_version = .{ - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); - self.load_commands_dirty = true; + // LC_RPATH + { + var it = RpathIterator.init(gpa, self.base.options.rpath_list); + defer it.deinit(); + while (try it.next()) |rpath| { + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.rpath_command), + rpath, + assume_max_path_len, + ); + } } - - if (self.build_version_cmd_index == null) { - self.build_version_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version), - @sizeOf(u64), - )); - const platform_version = blk: { - const ver = self.base.options.target.os.version_range.semver.min; - const platform_version = ver.major << 16 | ver.minor << 8; - break :blk platform_version; - }; - const sdk_version = if (self.base.options.native_darwin_sdk) |sdk| blk: { - const ver = sdk.version; - const sdk_version = ver.major << 16 | ver.minor << 8; - break :blk sdk_version; - } else platform_version; - const is_simulator_abi = self.base.options.target.abi == .simulator; - var cmd = macho.emptyGenericCommandWithData(macho.build_version_command{ - .cmdsize = cmdsize, - .platform = switch (self.base.options.target.os.tag) { - .macos => .MACOS, - .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, - .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, - .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, - else => unreachable, - }, - .minos = platform_version, - .sdk = sdk_version, - .ntools = 1, - }); - const ld_ver = macho.build_tool_version{ - .tool = .LD, - .version = 0x0, - }; - cmd.data = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.build_version_command)); - mem.set(u8, cmd.data, 0); - mem.copy(u8, cmd.data, mem.asBytes(&ld_ver)); - try self.load_commands.append(self.base.allocator, .{ .build_version = cmd }); - self.load_commands_dirty = true; - } - - if (self.uuid_cmd_index == null) { - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - var uuid_cmd: macho.uuid_command = .{ - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, + // LC_SOURCE_VERSION + sizeofcmds += @sizeOf(macho.source_version_command); + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + // LC_UUID + sizeofcmds += @sizeOf(macho.uuid_command); + // LC_LOAD_DYLIB + for (self.referenced_dylibs.keys()) |id| { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylib_command), + dylib_id.name, + assume_max_path_len, + ); + } + // LC_CODE_SIGNATURE + { + const target = self.base.options.target; + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) + break :blk true; + break :blk false; }; - std.crypto.random.bytes(&uuid_cmd.uuid); - try self.load_commands.append(self.base.allocator, .{ .uuid = uuid_cmd }); - self.load_commands_dirty = true; - } - - if (self.function_starts_cmd_index == null) { - self.function_starts_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .FUNCTION_STARTS, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - self.load_commands_dirty = true; + if (requires_codesig) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } } - if (self.data_in_code_cmd_index == null) { - self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - self.load_commands_dirty = true; - } + return @intCast(u32, sizeofcmds); } -fn calcMinHeaderpad(self: *MachO) u64 { - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - sizeofcmds += lc.cmdsize(); - } - - var padding: u32 = sizeofcmds + (self.base.options.headerpad_size orelse 0); +fn calcMinHeaderPad(self: *MachO) !u64 { + var padding: u32 = (try self.calcLCsSize(false)) + (self.base.options.headerpad_size orelse 0); log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); if (self.base.options.headerpad_max_install_names) { - var min_headerpad_size: u32 = 0; - for (self.load_commands.items) |lc| switch (lc.cmd()) { - .ID_DYLIB, - .LOAD_WEAK_DYLIB, - .LOAD_DYLIB, - .REEXPORT_DYLIB, - => { - min_headerpad_size += @sizeOf(macho.dylib_command) + std.os.PATH_MAX + 1; - }, - - else => {}, - }; + var min_headerpad_size: u32 = try self.calcLCsSize(true); log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ min_headerpad_size + @sizeOf(macho.mach_header_64), }); @@ -4868,32 +4303,32 @@ fn calcMinHeaderpad(self: *MachO) u64 { fn allocateSegments(self: *MachO) !void { try self.allocateSegment(self.text_segment_cmd_index, &.{ self.pagezero_segment_cmd_index, - }, self.calcMinHeaderpad()); + }, try self.calcMinHeaderPad()); if (self.text_segment_cmd_index) |index| blk: { - const seg = &self.load_commands.items[index].segment; - if (seg.sections.items.len == 0) break :blk; + const indexes = self.getSectionIndexes(index); + if (indexes.start == indexes.end) break :blk; + const seg = self.segments.items[index]; // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. var min_alignment: u32 = 0; - for (seg.sections.items) |sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + const alignment = try math.powi(u32, 2, header.@"align"); min_alignment = math.max(min_alignment, alignment); } assert(min_alignment > 0); - const last_sect_idx = seg.sections.items.len - 1; - const last_sect = seg.sections.items[last_sect_idx]; + const last_header = self.sections.items(.header)[indexes.end - 1]; const shift: u32 = shift: { - const diff = seg.inner.filesize - last_sect.offset - last_sect.size; + const diff = seg.filesize - last_header.offset - last_header.size; const factor = @divTrunc(diff, min_alignment); break :shift @intCast(u32, factor * min_alignment); }; if (shift > 0) { - for (seg.sections.items) |*sect| { - sect.offset += shift; - sect.addr += shift; + for (self.sections.items(.header)[indexes.start..indexes.end]) |*header| { + header.offset += shift; + header.addr += shift; } } } @@ -4917,42 +4352,40 @@ fn allocateSegments(self: *MachO) !void { }, 0); } -fn allocateSegment(self: *MachO, maybe_index: ?u16, indices: []const ?u16, init_size: u64) !void { +fn allocateSegment(self: *MachO, maybe_index: ?u8, indices: []const ?u8, init_size: u64) !void { const index = maybe_index orelse return; - const seg = &self.load_commands.items[index].segment; + const seg = &self.segments.items[index]; const base = self.getSegmentAllocBase(indices); - seg.inner.vmaddr = base.vmaddr; - seg.inner.fileoff = base.fileoff; - seg.inner.filesize = init_size; - seg.inner.vmsize = init_size; + seg.vmaddr = base.vmaddr; + seg.fileoff = base.fileoff; + seg.filesize = init_size; + seg.vmsize = init_size; // Allocate the sections according to their alignment at the beginning of the segment. + const indexes = self.getSectionIndexes(index); var start = init_size; - for (seg.sections.items) |*sect| { - const is_zerofill = sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL; - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - const alignment = try math.powi(u32, 2, sect.@"align"); + const slice = self.sections.slice(); + for (slice.items(.header)[indexes.start..indexes.end]) |*header| { + const alignment = try math.powi(u32, 2, header.@"align"); const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - // TODO handle zerofill sections in stage2 - sect.offset = if (is_zerofill and (use_stage1 or use_llvm)) + header.offset = if (header.isZerofill()) 0 else - @intCast(u32, seg.inner.fileoff + start_aligned); - sect.addr = seg.inner.vmaddr + start_aligned; + @intCast(u32, seg.fileoff + start_aligned); + header.addr = seg.vmaddr + start_aligned; - start = start_aligned + sect.size; + start = start_aligned + header.size; - if (!(is_zerofill and (use_stage1 or use_llvm))) { - seg.inner.filesize = start; + if (!header.isZerofill()) { + seg.filesize = start; } - seg.inner.vmsize = start; + seg.vmsize = start; } - seg.inner.filesize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.vmsize, self.page_size); + seg.filesize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); + seg.vmsize = mem.alignForwardGeneric(u64, seg.vmsize, self.page_size); } const InitSectionOpts = struct { @@ -4963,184 +4396,270 @@ const InitSectionOpts = struct { fn initSection( self: *MachO, - segment_id: u16, + segname: []const u8, sectname: []const u8, size: u64, alignment: u32, opts: InitSectionOpts, -) !u16 { - const seg = &self.load_commands.items[segment_id].segment; - var sect = macho.section_64{ +) !u8 { + const segment_id = self.getSegmentByName(segname).?; + const seg = &self.segments.items[segment_id]; + const index = try self.insertSection(segment_id, .{ .sectname = makeStaticString(sectname), - .segname = seg.inner.segname, - .size = if (self.mode == .incremental) @intCast(u32, size) else 0, - .@"align" = alignment, + .segname = seg.segname, .flags = opts.flags, .reserved1 = opts.reserved1, .reserved2 = opts.reserved2, - }; + }); + seg.cmdsize += @sizeOf(macho.section_64); + seg.nsects += 1; if (self.mode == .incremental) { + const header = &self.sections.items(.header)[index]; + header.size = size; + header.@"align" = alignment; + + const prev_end_off = if (index > 0) blk: { + const prev_section = self.sections.get(index - 1); + if (prev_section.segment_index == segment_id) { + const prev_header = prev_section.header; + break :blk prev_header.offset + padToIdeal(prev_header.size); + } else break :blk seg.fileoff; + } else 0; const alignment_pow_2 = try math.powi(u32, 2, alignment); - const padding: ?u32 = if (segment_id == self.text_segment_cmd_index.?) - @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size) - else - null; - const off = self.findFreeSpace(segment_id, alignment_pow_2, padding); - log.debug("allocating {s},{s} section from 0x{x} to 0x{x}", .{ - sect.segName(), - sect.sectName(), - off, - off + size, - }); + // TODO better prealloc for __text section + // const padding: u64 = if (index == 0) try self.calcMinHeaderPad() else 0; + const padding: u64 = if (index == 0) 0x1000 else 0; + const off = mem.alignForwardGeneric(u64, padding + prev_end_off, alignment_pow_2); - sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + if (!header.isZerofill()) { + header.offset = @intCast(u32, off); + } + header.addr = seg.vmaddr + off - seg.fileoff; - const is_zerofill = opts.flags == macho.S_ZEROFILL or opts.flags == macho.S_THREAD_LOCAL_ZEROFILL; - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; + // TODO Will this break if we are inserting section that is not the last section + // in a segment? + const max_size = self.allocatedSize(segment_id, off); - // TODO handle zerofill in stage2 - if (!(is_zerofill and (use_stage1 or use_llvm))) { - sect.offset = @intCast(u32, off); + if (size > max_size) { + try self.growSection(index, @intCast(u32, size)); } + + log.debug("allocating {s},{s} section at 0x{x}", .{ header.segName(), header.sectName(), off }); + + self.updateSectionOrdinals(index + 1); } - const index = @intCast(u16, seg.sections.items.len); - try seg.sections.append(self.base.allocator, sect); - seg.inner.cmdsize += @sizeOf(macho.section_64); - seg.inner.nsects += 1; + return index; +} - const match = MatchingSection{ - .seg = segment_id, - .sect = index, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.atom_free_lists.putNoClobber(self.base.allocator, match, .{}); +fn getSectionPrecedence(header: macho.section_64) u4 { + if (header.isCode()) { + if (mem.eql(u8, "__text", header.sectName())) return 0x0; + if (header.@"type"() == macho.S_SYMBOL_STUBS) return 0x1; + return 0x2; + } + switch (header.@"type"()) { + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + => return 0x0, + macho.S_MOD_INIT_FUNC_POINTERS => return 0x1, + macho.S_MOD_TERM_FUNC_POINTERS => return 0x2, + macho.S_ZEROFILL => return 0xf, + macho.S_THREAD_LOCAL_REGULAR => return 0xd, + macho.S_THREAD_LOCAL_ZEROFILL => return 0xe, + else => if (mem.eql(u8, "__eh_frame", header.sectName())) + return 0xf + else + return 0x3, + } +} - self.load_commands_dirty = true; - self.sections_order_dirty = true; +fn insertSection(self: *MachO, segment_index: u8, header: macho.section_64) !u8 { + const precedence = getSectionPrecedence(header); + const indexes = self.getSectionIndexes(segment_index); + const insertion_index = for (self.sections.items(.header)[indexes.start..indexes.end]) |hdr, i| { + if (getSectionPrecedence(hdr) > precedence) break @intCast(u8, i + indexes.start); + } else indexes.end; + log.debug("inserting section '{s},{s}' at index {d}", .{ + header.segName(), + header.sectName(), + insertion_index, + }); + for (&[_]*?u8{ + &self.text_section_index, + &self.stubs_section_index, + &self.stub_helper_section_index, + &self.got_section_index, + &self.la_symbol_ptr_section_index, + &self.data_section_index, + }) |maybe_index| { + const index = maybe_index.* orelse continue; + if (insertion_index <= index) maybe_index.* = index + 1; + } + try self.sections.insert(self.base.allocator, insertion_index, .{ + .segment_index = segment_index, + .header = header, + }); + return insertion_index; +} - return index; +fn updateSectionOrdinals(self: *MachO, start: u8) void { + const tracy = trace(@src()); + defer tracy.end(); + + const slice = self.sections.slice(); + for (slice.items(.last_atom)[start..]) |last_atom| { + var atom = last_atom orelse continue; + + while (true) { + const sym = atom.getSymbolPtr(self); + sym.n_sect = start + 1; + + for (atom.contained.items) |sym_at_off| { + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); + contained_sym.n_sect = start + 1; + } + + if (atom.prev) |prev| { + atom = prev; + } else break; + } + } } -fn findFreeSpace(self: MachO, segment_id: u16, alignment: u64, start: ?u32) u64 { - const seg = self.load_commands.items[segment_id].segment; - if (seg.sections.items.len == 0) { - return if (start) |v| v else seg.inner.fileoff; +fn shiftLocalsByOffset(self: *MachO, sect_id: u8, offset: i64) !void { + var atom = self.sections.items(.last_atom)[sect_id] orelse return; + + while (true) { + const atom_sym = atom.getSymbolPtr(self); + atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); + + for (atom.contained.items) |sym_at_off| { + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); + contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); + } + + if (atom.prev) |prev| { + atom = prev; + } else break; } - const last_sect = seg.sections.items[seg.sections.items.len - 1]; - const final_off = last_sect.offset + padToIdeal(last_sect.size); - return mem.alignForwardGeneric(u64, final_off, alignment); } -fn growSegment(self: *MachO, seg_id: u16, new_size: u64) !void { - const seg = &self.load_commands.items[seg_id].segment; - const new_seg_size = mem.alignForwardGeneric(u64, new_size, self.page_size); - assert(new_seg_size > seg.inner.filesize); - const offset_amt = new_seg_size - seg.inner.filesize; +fn growSegment(self: *MachO, segment_index: u8, new_size: u64) !void { + const segment = &self.segments.items[segment_index]; + const new_segment_size = mem.alignForwardGeneric(u64, new_size, self.page_size); + assert(new_segment_size > segment.filesize); + const offset_amt = new_segment_size - segment.filesize; log.debug("growing segment {s} from 0x{x} to 0x{x}", .{ - seg.inner.segname, - seg.inner.filesize, - new_seg_size, + segment.segname, + segment.filesize, + new_segment_size, }); - seg.inner.filesize = new_seg_size; - seg.inner.vmsize = new_seg_size; + segment.filesize = new_segment_size; + segment.vmsize = new_segment_size; log.debug(" (new segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - seg.inner.fileoff, - seg.inner.fileoff + seg.inner.filesize, - seg.inner.vmaddr, - seg.inner.vmaddr + seg.inner.vmsize, + segment.fileoff, + segment.fileoff + segment.filesize, + segment.vmaddr, + segment.vmaddr + segment.vmsize, }); - var next: usize = seg_id + 1; + var next: u8 = segment_index + 1; while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { - const next_seg = &self.load_commands.items[next].segment; + const next_segment = &self.segments.items[next]; try MachO.copyRangeAllOverlappingAlloc( self.base.allocator, self.base.file.?, - next_seg.inner.fileoff, - next_seg.inner.fileoff + offset_amt, - math.cast(usize, next_seg.inner.filesize) orelse return error.Overflow, + next_segment.fileoff, + next_segment.fileoff + offset_amt, + math.cast(usize, next_segment.filesize) orelse return error.Overflow, ); - next_seg.inner.fileoff += offset_amt; - next_seg.inner.vmaddr += offset_amt; + next_segment.fileoff += offset_amt; + next_segment.vmaddr += offset_amt; log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - next_seg.inner.segname, - next_seg.inner.fileoff, - next_seg.inner.fileoff + next_seg.inner.filesize, - next_seg.inner.vmaddr, - next_seg.inner.vmaddr + next_seg.inner.vmsize, + next_segment.segname, + next_segment.fileoff, + next_segment.fileoff + next_segment.filesize, + next_segment.vmaddr, + next_segment.vmaddr + next_segment.vmsize, }); - for (next_seg.sections.items) |*moved_sect, moved_sect_id| { - moved_sect.offset += @intCast(u32, offset_amt); - moved_sect.addr += offset_amt; + const indexes = self.getSectionIndexes(next); + for (self.sections.items(.header)[indexes.start..indexes.end]) |*header, i| { + header.offset += @intCast(u32, offset_amt); + header.addr += offset_amt; log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - moved_sect.segName(), - moved_sect.sectName(), - moved_sect.offset, - moved_sect.offset + moved_sect.size, - moved_sect.addr, - moved_sect.addr + moved_sect.size, + header.segName(), + header.sectName(), + header.offset, + header.offset + header.size, + header.addr, + header.addr + header.size, }); - try self.shiftLocalsByOffset(.{ - .seg = @intCast(u16, next), - .sect = @intCast(u16, moved_sect_id), - }, @intCast(i64, offset_amt)); + try self.shiftLocalsByOffset(@intCast(u8, i + indexes.start), @intCast(i64, offset_amt)); } } } -fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { +fn growSection(self: *MachO, sect_id: u8, new_size: u32) !void { const tracy = trace(@src()); defer tracy.end(); - const seg = &self.load_commands.items[match.seg].segment; - const sect = &seg.sections.items[match.sect]; + const section = self.sections.get(sect_id); + const segment_index = section.segment_index; + const header = section.header; + const segment = self.segments.items[segment_index]; - const alignment = try math.powi(u32, 2, sect.@"align"); - const max_size = self.allocatedSize(match.seg, sect.offset); + const alignment = try math.powi(u32, 2, header.@"align"); + const max_size = self.allocatedSize(segment_index, header.offset); const ideal_size = padToIdeal(new_size); const needed_size = mem.alignForwardGeneric(u32, ideal_size, alignment); if (needed_size > max_size) blk: { log.debug(" (need to grow! needed 0x{x}, max 0x{x})", .{ needed_size, max_size }); - if (match.sect == seg.sections.items.len - 1) { + const indexes = self.getSectionIndexes(segment_index); + if (sect_id == indexes.end - 1) { // Last section, just grow segments - try self.growSegment(match.seg, seg.inner.filesize + needed_size - max_size); + try self.growSegment(segment_index, segment.filesize + needed_size - max_size); break :blk; } // Need to move all sections below in file and address spaces. const offset_amt = offset: { - const max_alignment = try self.getSectionMaxAlignment(match.seg, match.sect + 1); + const max_alignment = try self.getSectionMaxAlignment(sect_id + 1, indexes.end); break :offset mem.alignForwardGeneric(u64, needed_size - max_size, max_alignment); }; // Before we commit to this, check if the segment needs to grow too. // We assume that each section header is growing linearly with the increasing // file offset / virtual memory address space. - const last_sect = seg.sections.items[seg.sections.items.len - 1]; - const last_sect_off = last_sect.offset + last_sect.size; - const seg_off = seg.inner.fileoff + seg.inner.filesize; + const last_sect_header = self.sections.items(.header)[indexes.end - 1]; + const last_sect_off = last_sect_header.offset + last_sect_header.size; + const seg_off = segment.fileoff + segment.filesize; if (last_sect_off + offset_amt > seg_off) { // Need to grow segment first. const spill_size = (last_sect_off + offset_amt) - seg_off; - try self.growSegment(match.seg, seg.inner.filesize + spill_size); + try self.growSegment(segment_index, segment.filesize + spill_size); } // We have enough space to expand within the segment, so move all sections by // the required amount and update their header offsets. - const next_sect = seg.sections.items[match.sect + 1]; + const next_sect = self.sections.items(.header)[sect_id + 1]; const total_size = last_sect_off - next_sect.offset; try MachO.copyRangeAllOverlappingAlloc( @@ -5151,9 +4670,7 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { math.cast(usize, total_size) orelse return error.Overflow, ); - var next = match.sect + 1; - while (next < seg.sections.items.len) : (next += 1) { - const moved_sect = &seg.sections.items[next]; + for (self.sections.items(.header)[sect_id + 1 .. indexes.end]) |*moved_sect, i| { moved_sect.offset += @intCast(u32, offset_amt); moved_sect.addr += offset_amt; @@ -5166,49 +4683,45 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { moved_sect.addr + moved_sect.size, }); - try self.shiftLocalsByOffset(.{ - .seg = match.seg, - .sect = next, - }, @intCast(i64, offset_amt)); + try self.shiftLocalsByOffset(@intCast(u8, sect_id + 1 + i), @intCast(i64, offset_amt)); } } } -fn allocatedSize(self: MachO, segment_id: u16, start: u64) u64 { - const seg = self.load_commands.items[segment_id].segment; - assert(start >= seg.inner.fileoff); - var min_pos: u64 = seg.inner.fileoff + seg.inner.filesize; +fn allocatedSize(self: MachO, segment_id: u8, start: u64) u64 { + const segment = self.segments.items[segment_id]; + const indexes = self.getSectionIndexes(segment_id); + assert(start >= segment.fileoff); + var min_pos: u64 = segment.fileoff + segment.filesize; if (start > min_pos) return 0; - for (seg.sections.items) |section| { - if (section.offset <= start) continue; - if (section.offset < min_pos) min_pos = section.offset; + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.offset <= start) continue; + if (header.offset < min_pos) min_pos = header.offset; } return min_pos - start; } -fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u32 { - const seg = self.load_commands.items[segment_id].segment; +fn getSectionMaxAlignment(self: *MachO, start: u8, end: u8) !u32 { var max_alignment: u32 = 1; - var next = start_sect_id; - while (next < seg.sections.items.len) : (next += 1) { - const sect = seg.sections.items[next]; - const alignment = try math.powi(u32, 2, sect.@"align"); + const slice = self.sections.slice(); + for (slice.items(.header)[start..end]) |header| { + const alignment = try math.powi(u32, 2, header.@"align"); max_alignment = math.max(max_alignment, alignment); } return max_alignment; } -fn allocateAtomCommon(self: *MachO, atom: *Atom, match: MatchingSection) !void { +fn allocateAtomCommon(self: *MachO, atom: *Atom, sect_id: u8) !void { const sym = atom.getSymbolPtr(self); if (self.mode == .incremental) { const size = atom.size; const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, size, alignment, match); + const vaddr = try self.allocateAtom(atom, size, alignment, sect_id); const sym_name = atom.getName(self); log.debug("allocated {s} atom at 0x{x}", .{ sym_name, vaddr }); sym.n_value = vaddr; - } else try self.addAtomToSection(atom, match); - sym.n_sect = self.getSectionOrdinal(match); + } else try self.addAtomToSection(atom, sect_id); + sym.n_sect = sect_id + 1; } fn allocateAtom( @@ -5216,15 +4729,15 @@ fn allocateAtom( atom: *Atom, new_atom_size: u64, alignment: u64, - match: MatchingSection, + sect_id: u8, ) !u64 { const tracy = trace(@src()); defer tracy.end(); - const sect = self.getSectionPtr(match); - var free_list = self.atom_free_lists.get(match).?; - const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; - const new_atom_ideal_capacity = if (needs_padding) padToIdeal(new_atom_size) else new_atom_size; + const header = &self.sections.items(.header)[sect_id]; + const free_list = &self.sections.items(.free_list)[sect_id]; + const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; + const new_atom_ideal_capacity = if (header.isCode()) padToIdeal(new_atom_size) else new_atom_size; // We use these to indicate our intention to update metadata, placing the new atom, // and possibly removing a free list node. @@ -5244,7 +4757,7 @@ fn allocateAtom( // Is it enough that we could fit this new atom? const sym = big_atom.getSymbol(self); const capacity = big_atom.capacity(self); - const ideal_capacity = if (needs_padding) padToIdeal(capacity) else capacity; + const ideal_capacity = if (header.isCode()) padToIdeal(capacity) else capacity; const ideal_capacity_end_vaddr = math.add(u64, sym.n_value, ideal_capacity) catch ideal_capacity; const capacity_end_vaddr = sym.n_value + capacity; const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; @@ -5272,30 +4785,28 @@ fn allocateAtom( free_list_removal = i; } break :blk new_start_vaddr; - } else if (self.atoms.get(match)) |last| { + } else if (maybe_last_atom.*) |last| { const last_symbol = last.getSymbol(self); - const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; + const ideal_capacity = if (header.isCode()) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment); atom_placement = last; break :blk new_start_vaddr; } else { - break :blk mem.alignForwardGeneric(u64, sect.addr, alignment); + break :blk mem.alignForwardGeneric(u64, header.addr, alignment); } }; const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { - const needed_size = @intCast(u32, (vaddr + new_atom_size) - sect.addr); - try self.growSection(match, needed_size); - _ = try self.atoms.put(self.base.allocator, match, atom); - sect.size = needed_size; - self.load_commands_dirty = true; + const needed_size = @intCast(u32, (vaddr + new_atom_size) - header.addr); + try self.growSection(sect_id, needed_size); + maybe_last_atom.* = atom; + header.size = needed_size; } const align_pow = @intCast(u32, math.log2(alignment)); - if (sect.@"align" < align_pow) { - sect.@"align" = align_pow; - self.load_commands_dirty = true; + if (header.@"align" < align_pow) { + header.@"align" = align_pow; } atom.size = new_atom_size; atom.alignment = align_pow; @@ -5322,20 +4833,19 @@ fn allocateAtom( return vaddr; } -pub fn addAtomToSection(self: *MachO, atom: *Atom, match: MatchingSection) !void { - if (self.atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try self.atoms.putNoClobber(self.base.allocator, match, atom); +pub fn addAtomToSection(self: *MachO, atom: *Atom, sect_id: u8) !void { + var section = self.sections.get(sect_id); + if (section.header.size > 0) { + section.last_atom.?.next = atom; + atom.prev = section.last_atom.?; } - const sect = self.getSectionPtr(match); + section.last_atom = atom; const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); - const padding = aligned_end_addr - sect.size; - sect.size += padding + atom.size; - sect.@"align" = @maximum(sect.@"align", atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, section.header.size, atom_alignment); + const padding = aligned_end_addr - section.header.size; + section.header.size += padding + atom.size; + section.header.@"align" = @maximum(section.header.@"align", atom.alignment); + self.sections.set(sect_id, section); } pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { @@ -5368,208 +4878,61 @@ pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { return sym_index; } -fn getSegmentAllocBase(self: MachO, indices: []const ?u16) struct { vmaddr: u64, fileoff: u64 } { +fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, fileoff: u64 } { for (indices) |maybe_prev_id| { const prev_id = maybe_prev_id orelse continue; - const prev = self.load_commands.items[prev_id].segment; + const prev = self.segments.items[prev_id]; return .{ - .vmaddr = prev.inner.vmaddr + prev.inner.vmsize, - .fileoff = prev.inner.fileoff + prev.inner.filesize, + .vmaddr = prev.vmaddr + prev.vmsize, + .fileoff = prev.fileoff + prev.filesize, }; } return .{ .vmaddr = 0, .fileoff = 0 }; } -fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []*?u16) !void { - const seg_id = maybe_seg_id.* orelse return; - - var mapping = std.AutoArrayHashMap(u16, ?u16).init(self.base.allocator); - defer mapping.deinit(); - - const seg = &self.load_commands.items[seg_id].segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureTotalCapacity(self.base.allocator, sections.len); - - for (indices) |maybe_index| { - const old_idx = maybe_index.* orelse continue; - const sect = §ions[old_idx]; - if (sect.size == 0) { - log.debug("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); - maybe_index.* = null; - seg.inner.cmdsize -= @sizeOf(macho.section_64); - seg.inner.nsects -= 1; - } else { - maybe_index.* = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sect.*); +fn writeSegmentHeaders(self: *MachO, ncmds: *u32, writer: anytype) !void { + for (self.segments.items) |seg, i| { + const indexes = self.getSectionIndexes(@intCast(u8, i)); + var out_seg = seg; + out_seg.cmdsize = @sizeOf(macho.segment_command_64); + out_seg.nsects = 0; + + // Update section headers count; any section with size of 0 is excluded + // since it doesn't have any data in the final binary file. + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + out_seg.cmdsize += @sizeOf(macho.section_64); + out_seg.nsects += 1; } - try mapping.putNoClobber(old_idx, maybe_index.*); - } - var atoms = std.ArrayList(struct { match: MatchingSection, atom: *Atom }).init(self.base.allocator); - defer atoms.deinit(); - try atoms.ensureTotalCapacity(mapping.count()); + if (out_seg.nsects == 0 and + (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or + mem.eql(u8, out_seg.segName(), "__DATA"))) continue; - for (mapping.keys()) |old_sect| { - const new_sect = mapping.get(old_sect).? orelse { - _ = self.atoms.remove(.{ .seg = seg_id, .sect = old_sect }); - continue; - }; - const kv = self.atoms.fetchRemove(.{ .seg = seg_id, .sect = old_sect }).?; - atoms.appendAssumeCapacity(.{ - .match = .{ .seg = seg_id, .sect = new_sect }, - .atom = kv.value, - }); - } - - while (atoms.popOrNull()) |next| { - try self.atoms.putNoClobber(self.base.allocator, next.match, next.atom); - } - - if (seg.inner.nsects == 0 and !mem.eql(u8, "__TEXT", seg.inner.segName())) { - // Segment has now become empty, so mark it as such - log.debug("marking segment {s} as dead", .{seg.inner.segName()}); - seg.inner.cmd = @intToEnum(macho.LC, 0); - maybe_seg_id.* = null; - } -} - -fn pruneAndSortSections(self: *MachO) !void { - try self.pruneAndSortSectionsInSegment(&self.text_segment_cmd_index, &.{ - &self.text_section_index, - &self.stubs_section_index, - &self.stub_helper_section_index, - &self.gcc_except_tab_section_index, - &self.cstring_section_index, - &self.ustring_section_index, - &self.text_const_section_index, - &self.objc_methlist_section_index, - &self.objc_methname_section_index, - &self.objc_methtype_section_index, - &self.objc_classname_section_index, - &self.eh_frame_section_index, - }); - - try self.pruneAndSortSectionsInSegment(&self.data_const_segment_cmd_index, &.{ - &self.got_section_index, - &self.mod_init_func_section_index, - &self.mod_term_func_section_index, - &self.data_const_section_index, - &self.objc_cfstring_section_index, - &self.objc_classlist_section_index, - &self.objc_imageinfo_section_index, - }); - - try self.pruneAndSortSectionsInSegment(&self.data_segment_cmd_index, &.{ - &self.rustc_section_index, - &self.la_symbol_ptr_section_index, - &self.objc_const_section_index, - &self.objc_selrefs_section_index, - &self.objc_classrefs_section_index, - &self.objc_data_section_index, - &self.data_section_index, - &self.tlv_section_index, - &self.tlv_ptrs_section_index, - &self.tlv_data_section_index, - &self.tlv_bss_section_index, - &self.bss_section_index, - }); - - // Create new section ordinals. - self.section_ordinals.clearRetainingCapacity(); - if (self.text_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - } - if (self.data_const_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - } - if (self.data_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); + try writer.writeStruct(out_seg); + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + try writer.writeStruct(header); } + + ncmds.* += 1; } - self.sections_order_dirty = false; } -fn updateSectionOrdinals(self: *MachO) !void { - if (!self.sections_order_dirty) return; - - const tracy = trace(@src()); - defer tracy.end(); - - log.debug("updating section ordinals", .{}); - - const gpa = self.base.allocator; - - var ordinal_remap = std.AutoHashMap(u8, u8).init(gpa); - defer ordinal_remap.deinit(); - var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; - - var new_ordinal: u8 = 0; - for (&[_]?u16{ - self.text_segment_cmd_index, - self.data_const_segment_cmd_index, - self.data_segment_cmd_index, - }) |maybe_index| { - const index = maybe_index orelse continue; - const seg = self.load_commands.items[index].segment; - for (seg.sections.items) |sect, sect_id| { - const match = MatchingSection{ - .seg = @intCast(u16, index), - .sect = @intCast(u16, sect_id), - }; - const old_ordinal = self.getSectionOrdinal(match); - new_ordinal += 1; - log.debug("'{s},{s}': sect({d}, '_,_') => sect({d}, '_,_')", .{ - sect.segName(), - sect.sectName(), - old_ordinal, - new_ordinal, - }); - try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); - try ordinals.putNoClobber(gpa, match, {}); - } - } +fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + seg.filesize = 0; + seg.vmsize = 0; - // FIXME Jakub - // TODO no need for duping work here; simply walk the atom graph - for (self.locals.items) |*sym| { - if (sym.undf()) continue; - if (sym.n_sect == 0) continue; - sym.n_sect = ordinal_remap.get(sym.n_sect).?; - } - for (self.objects.items) |*object| { - for (object.symtab.items) |*sym| { - if (sym.undf()) continue; - if (sym.n_sect == 0) continue; - sym.n_sect = ordinal_remap.get(sym.n_sect).?; - } - } + try self.writeDyldInfoData(ncmds, lc_writer); + try self.writeFunctionStarts(ncmds, lc_writer); + try self.writeDataInCode(ncmds, lc_writer); + try self.writeSymtabs(ncmds, lc_writer); - self.section_ordinals.deinit(gpa); - self.section_ordinals = ordinals; + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); } -fn writeDyldInfoData(self: *MachO) !void { +fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -5582,89 +4945,86 @@ fn writeDyldInfoData(self: *MachO) !void { var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); defer lazy_bind_pointers.deinit(); - { - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom: *Atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last_atom, sect_id| { + var atom = last_atom orelse continue; + const segment_index = slice.items(.segment_index)[sect_id]; + const header = slice.items(.header)[sect_id]; - if (self.text_segment_cmd_index) |seg| { - if (match.seg == seg) continue; // __TEXT is non-writable - } + if (mem.eql(u8, header.segName(), "__TEXT")) continue; // __TEXT is non-writable - const seg = self.getSegment(match); - const sect = self.getSection(match); - log.debug("dyld info for {s},{s}", .{ sect.segName(), sect.sectName() }); - - while (true) { - log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); - const sym = atom.getSymbol(self); - const base_offset = sym.n_value - seg.inner.vmaddr; - - for (atom.rebases.items) |offset| { - log.debug(" | rebase at {x}", .{base_offset + offset}); - try rebase_pointers.append(.{ - .offset = base_offset + offset, - .segment_id = match.seg, - }); - } + log.debug("dyld info for {s},{s}", .{ header.segName(), header.sectName() }); - for (atom.bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); - } + const seg = self.segments.items[segment_index]; - for (atom.lazy_bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try lazy_bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); + while (true) { + log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); + const sym = atom.getSymbol(self); + const base_offset = sym.n_value - seg.vmaddr; + + for (atom.rebases.items) |offset| { + log.debug(" | rebase at {x}", .{base_offset + offset}); + try rebase_pointers.append(.{ + .offset = base_offset + offset, + .segment_id = segment_index, + }); + } + + for (atom.bindings.items) |binding| { + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } + try bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); + } - if (atom.prev) |prev| { - atom = prev; - } else break; + for (atom.lazy_bindings.items) |binding| { + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + try lazy_bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); } + + if (atom.prev) |prev| { + atom = prev; + } else break; } } @@ -5675,8 +5035,8 @@ fn writeDyldInfoData(self: *MachO) !void { // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. log.debug("generating export trie", .{}); - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const base_address = text_segment.inner.vmaddr; + const text_segment = self.segments.items[self.text_segment_cmd_index.?]; + const base_address = text_segment.vmaddr; if (self.base.options.output_mode == .Exe) { for (&[_]SymbolWithLoc{ @@ -5714,103 +5074,91 @@ fn writeDyldInfoData(self: *MachO) !void { try trie.finalize(gpa); } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].dyld_info_only; - - const rebase_off = mem.alignForwardGeneric(u64, seg.inner.fileoff, @alignOf(u64)); + const link_seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const rebase_off = mem.alignForwardGeneric(u64, link_seg.fileoff, @alignOf(u64)); + assert(rebase_off == link_seg.fileoff); const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); - dyld_info.rebase_off = @intCast(u32, rebase_off); - dyld_info.rebase_size = @intCast(u32, rebase_size); - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ - dyld_info.rebase_off, - dyld_info.rebase_off + dyld_info.rebase_size, - }); + log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size }); - const bind_off = mem.alignForwardGeneric(u64, dyld_info.rebase_off + dyld_info.rebase_size, @alignOf(u64)); + const bind_off = mem.alignForwardGeneric(u64, rebase_off + rebase_size, @alignOf(u64)); const bind_size = try bind.bindInfoSize(bind_pointers.items); - dyld_info.bind_off = @intCast(u32, bind_off); - dyld_info.bind_size = @intCast(u32, bind_size); - log.debug("writing bind info from 0x{x} to 0x{x}", .{ - dyld_info.bind_off, - dyld_info.bind_off + dyld_info.bind_size, - }); + log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size }); - const lazy_bind_off = mem.alignForwardGeneric(u64, dyld_info.bind_off + dyld_info.bind_size, @alignOf(u64)); + const lazy_bind_off = mem.alignForwardGeneric(u64, bind_off + bind_size, @alignOf(u64)); const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); - dyld_info.lazy_bind_off = @intCast(u32, lazy_bind_off); - dyld_info.lazy_bind_size = @intCast(u32, lazy_bind_size); - log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ - dyld_info.lazy_bind_off, - dyld_info.lazy_bind_off + dyld_info.lazy_bind_size, - }); + log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ lazy_bind_off, lazy_bind_off + lazy_bind_size }); - const export_off = mem.alignForwardGeneric(u64, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size, @alignOf(u64)); + const export_off = mem.alignForwardGeneric(u64, lazy_bind_off + lazy_bind_size, @alignOf(u64)); const export_size = trie.size; - dyld_info.export_off = @intCast(u32, export_off); - dyld_info.export_size = @intCast(u32, export_size); - log.debug("writing export trie from 0x{x} to 0x{x}", .{ - dyld_info.export_off, - dyld_info.export_off + dyld_info.export_size, - }); + log.debug("writing export trie from 0x{x} to 0x{x}", .{ export_off, export_off + export_size }); - seg.inner.filesize = dyld_info.export_off + dyld_info.export_size - seg.inner.fileoff; + const needed_size = export_off + export_size - rebase_off; + link_seg.filesize = needed_size; - const needed_size = dyld_info.export_off + dyld_info.export_size - dyld_info.rebase_off; - var buffer = try gpa.alloc(u8, needed_size); + var buffer = try gpa.alloc(u8, math.cast(usize, needed_size) orelse return error.Overflow); defer gpa.free(buffer); mem.set(u8, buffer, 0); var stream = std.io.fixedBufferStream(buffer); const writer = stream.writer(); - const base_off = dyld_info.rebase_off; try bind.writeRebaseInfo(rebase_pointers.items, writer); - try stream.seekTo(dyld_info.bind_off - base_off); + try stream.seekTo(bind_off - rebase_off); try bind.writeBindInfo(bind_pointers.items, writer); - try stream.seekTo(dyld_info.lazy_bind_off - base_off); + try stream.seekTo(lazy_bind_off - rebase_off); try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); - try stream.seekTo(dyld_info.export_off - base_off); + try stream.seekTo(export_off - rebase_off); _ = try trie.write(writer); log.debug("writing dyld info from 0x{x} to 0x{x}", .{ - dyld_info.rebase_off, - dyld_info.rebase_off + needed_size, + rebase_off, + rebase_off + needed_size, }); - try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); - try self.populateLazyBindOffsetsInStubHelper( - buffer[dyld_info.lazy_bind_off - base_off ..][0..dyld_info.lazy_bind_size], - ); - - self.load_commands_dirty = true; + try self.base.file.?.pwriteAll(buffer, rebase_off); + const start = math.cast(usize, lazy_bind_off - rebase_off) orelse return error.Overflow; + const end = start + (math.cast(usize, lazy_bind_size) orelse return error.Overflow); + try self.populateLazyBindOffsetsInStubHelper(buffer[start..end]); + + try lc_writer.writeStruct(macho.dyld_info_command{ + .cmd = .DYLD_INFO_ONLY, + .cmdsize = @sizeOf(macho.dyld_info_command), + .rebase_off = @intCast(u32, rebase_off), + .rebase_size = @intCast(u32, rebase_size), + .bind_off = @intCast(u32, bind_off), + .bind_size = @intCast(u32, bind_size), + .weak_bind_off = 0, + .weak_bind_size = 0, + .lazy_bind_off = @intCast(u32, lazy_bind_off), + .lazy_bind_size = @intCast(u32, lazy_bind_size), + .export_off = @intCast(u32, export_off), + .export_size = @intCast(u32, export_size), + }); + ncmds.* += 1; } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { const gpa = self.base.allocator; - const text_segment_cmd_index = self.text_segment_cmd_index orelse return; + const stub_helper_section_index = self.stub_helper_section_index orelse return; - const last_atom = self.atoms.get(.{ - .seg = text_segment_cmd_index, - .sect = stub_helper_section_index, - }) orelse return; if (self.stub_helper_preamble_atom == null) return; - if (last_atom == self.stub_helper_preamble_atom.?) return; + + const section = self.sections.get(stub_helper_section_index); + const last_atom = section.last_atom orelse return; + if (last_atom == self.stub_helper_preamble_atom.?) return; // TODO is this a redundant check? var table = std.AutoHashMap(i64, *Atom).init(gpa); defer table.deinit(); { var stub_atom = last_atom; - var laptr_atom = self.atoms.get(.{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }).?; + var laptr_atom = self.sections.items(.last_atom)[self.la_symbol_ptr_section_index.?].?; const base_addr = blk: { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].segment; - break :blk seg.inner.vmaddr; + const seg = self.segments.items[self.data_segment_cmd_index.?]; + break :blk seg.vmaddr; }; while (true) { @@ -5871,10 +5219,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } } - const sect = self.getSection(.{ - .seg = text_segment_cmd_index, - .sect = stub_helper_section_index, - }); + const header = self.sections.items(.header)[stub_helper_section_index]; const stub_offset: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 1, .aarch64 => 2 * @sizeOf(u32), @@ -5886,7 +5231,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { while (offsets.popOrNull()) |bind_offset| { const atom = table.get(bind_offset.sym_offset).?; const sym = atom.getSymbol(self); - const file_offset = sect.offset + sym.n_value - sect.addr + stub_offset; + const file_offset = header.offset + sym.n_value - header.addr + stub_offset; mem.writeIntLittle(u32, &buf, bind_offset.offset); log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ bind_offset.offset, @@ -5899,14 +5244,14 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { const asc_u64 = std.sort.asc(u64); -fn writeFunctionStarts(self: *MachO) !void { - const text_seg_index = self.text_segment_cmd_index orelse return; - const text_sect_index = self.text_section_index orelse return; - const text_seg = self.load_commands.items[text_seg_index].segment; - +fn writeFunctionStarts(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); + const text_seg_index = self.text_segment_cmd_index orelse return; + const text_sect_index = self.text_section_index orelse return; + const text_seg = self.segments.items[text_seg_index]; + const gpa = self.base.allocator; // We need to sort by address first @@ -5918,8 +5263,8 @@ fn writeFunctionStarts(self: *MachO) !void { const sym = self.getSymbol(global); if (sym.undf()) continue; if (sym.n_desc == N_DESC_GCED) continue; - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - if (match.seg != text_seg_index or match.sect != text_sect_index) continue; + const sect_id = sym.n_sect - 1; + if (sect_id != text_sect_index) continue; addresses.appendAssumeCapacity(sym.n_value); } @@ -5932,7 +5277,7 @@ fn writeFunctionStarts(self: *MachO) !void { var last_off: u32 = 0; for (addresses.items) |addr| { - const offset = @intCast(u32, addr - text_seg.inner.vmaddr); + const offset = @intCast(u32, addr - text_seg.vmaddr); const diff = offset - last_off; if (diff == 0) continue; @@ -5951,22 +5296,22 @@ fn writeFunctionStarts(self: *MachO) !void { try std.leb.writeULEB128(buffer.writer(), offset); } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const fn_cmd = &self.load_commands.items[self.function_starts_cmd_index.?].linkedit_data; + const link_seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64)); + const needed_size = buffer.items.len; + link_seg.filesize = offset + needed_size - link_seg.fileoff; - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const datasize = buffer.items.len; - fn_cmd.dataoff = @intCast(u32, dataoff); - fn_cmd.datasize = @intCast(u32, datasize); - seg.inner.filesize = fn_cmd.dataoff + fn_cmd.datasize - seg.inner.fileoff; + log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - log.debug("writing function starts info from 0x{x} to 0x{x}", .{ - fn_cmd.dataoff, - fn_cmd.dataoff + fn_cmd.datasize, - }); + try self.base.file.?.pwriteAll(buffer.items, offset); - try self.base.file.?.pwriteAll(buffer.items, fn_cmd.dataoff); - self.load_commands_dirty = true; + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .FUNCTION_STARTS, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; } fn filterDataInCode( @@ -5988,17 +5333,15 @@ fn filterDataInCode( return dices[start..end]; } -fn writeDataInCode(self: *MachO) !void { +fn writeDataInCode(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.base.allocator); defer out_dice.deinit(); - const text_sect = self.getSection(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }); + const text_sect_id = self.text_section_index orelse return; + const text_sect_header = self.sections.items(.header)[text_sect_id]; for (self.objects.items) |object| { const dice = object.parseDataInCode() orelse continue; @@ -6008,15 +5351,15 @@ fn writeDataInCode(self: *MachO) !void { const sym = atom.getSymbol(self); if (sym.n_desc == N_DESC_GCED) continue; - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - if (match.seg != self.text_segment_cmd_index.? and match.sect != self.text_section_index.?) { + const sect_id = sym.n_sect - 1; + if (sect_id != self.text_section_index.?) { continue; } const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); - const base = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse + const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse return error.Overflow; for (filtered_dice) |single| { @@ -6030,33 +5373,63 @@ fn writeDataInCode(self: *MachO) !void { } } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].linkedit_data; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); + seg.filesize = offset + needed_size - seg.fileoff; - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const datasize = out_dice.items.len * @sizeOf(macho.data_in_code_entry); - dice_cmd.dataoff = @intCast(u32, dataoff); - dice_cmd.datasize = @intCast(u32, datasize); - seg.inner.filesize = dice_cmd.dataoff + dice_cmd.datasize - seg.inner.fileoff; + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ - dice_cmd.dataoff, - dice_cmd.dataoff + dice_cmd.datasize, + try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), offset); + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), }); - - try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), dice_cmd.dataoff); - self.load_commands_dirty = true; + ncmds.* += 1; } -fn writeSymtab(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); +fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + var symtab_cmd = macho.symtab_command{ + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }; + var dysymtab_cmd = macho.dysymtab_command{ + .cmdsize = @sizeOf(macho.dysymtab_command), + .ilocalsym = 0, + .nlocalsym = 0, + .iextdefsym = 0, + .nextdefsym = 0, + .iundefsym = 0, + .nundefsym = 0, + .tocoff = 0, + .ntoc = 0, + .modtaboff = 0, + .nmodtab = 0, + .extrefsymoff = 0, + .nextrefsyms = 0, + .indirectsymoff = 0, + .nindirectsyms = 0, + .extreloff = 0, + .nextrel = 0, + .locreloff = 0, + .nlocrel = 0, + }; + var ctx = try self.writeSymtab(&symtab_cmd); + defer ctx.imports_table.deinit(); + try self.writeDysymtab(ctx, &dysymtab_cmd); + try self.writeStrtab(&symtab_cmd); + try lc_writer.writeStruct(symtab_cmd); + try lc_writer.writeStruct(dysymtab_cmd); + ncmds.* += 2; +} +fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { const gpa = self.base.allocator; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const symoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(macho.nlist_64)); - symtab.symoff = @intCast(u32, symoff); var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); @@ -6101,8 +5474,8 @@ fn writeSymtab(self: *MachO) !void { var imports = std.ArrayList(macho.nlist_64).init(gpa); defer imports.deinit(); + var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); - defer imports_table.deinit(); for (self.globals.values()) |global| { const sym = self.getSymbol(global); @@ -6115,56 +5488,84 @@ fn writeSymtab(self: *MachO) !void { try imports_table.putNoClobber(global, new_index); } - const nlocals = locals.items.len; - const nexports = exports.items.len; - const nimports = imports.items.len; - symtab.nsyms = @intCast(u32, nlocals + nexports + nimports); + const nlocals = @intCast(u32, locals.items.len); + const nexports = @intCast(u32, exports.items.len); + const nimports = @intCast(u32, imports.items.len); + const nsyms = nlocals + nexports + nimports; + + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric( + u64, + seg.fileoff + seg.filesize, + @alignOf(macho.nlist_64), + ); + const needed_size = nsyms * @sizeOf(macho.nlist_64); + seg.filesize = offset + needed_size - seg.fileoff; var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(symtab.nsyms * @sizeOf(macho.nlist_64)); + try buffer.ensureTotalCapacityPrecise(needed_size); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); - log.debug("writing symtab from 0x{x} to 0x{x}", .{ symtab.symoff, symtab.symoff + buffer.items.len }); - try self.base.file.?.pwriteAll(buffer.items, symtab.symoff); + log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + try self.base.file.?.pwriteAll(buffer.items, offset); - seg.inner.filesize = symtab.symoff + buffer.items.len - seg.inner.fileoff; + lc.symoff = @intCast(u32, offset); + lc.nsyms = nsyms; - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].dysymtab; - dysymtab.nlocalsym = @intCast(u32, nlocals); - dysymtab.iextdefsym = dysymtab.nlocalsym; - dysymtab.nextdefsym = @intCast(u32, nexports); - dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nimports); + return SymtabCtx{ + .nlocalsym = nlocals, + .nextdefsym = nexports, + .nundefsym = nimports, + .imports_table = imports_table, + }; +} + +fn writeStrtab(self: *MachO, lc: *macho.symtab_command) !void { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = self.strtab.buffer.items.len; + seg.filesize = offset + needed_size - seg.fileoff; + + log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + try self.base.file.?.pwriteAll(self.strtab.buffer.items, offset); + + lc.stroff = @intCast(u32, offset); + lc.strsize = @intCast(u32, needed_size); +} +const SymtabCtx = struct { + nlocalsym: u32, + nextdefsym: u32, + nundefsym: u32, + imports_table: std.AutoHashMap(SymbolWithLoc, u32), +}; + +fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !void { + const gpa = self.base.allocator; const nstubs = @intCast(u32, self.stubs_table.count()); const ngot_entries = @intCast(u32, self.got_entries_table.count()); + const nindirectsyms = nstubs * 2 + ngot_entries; + const iextdefsym = ctx.nlocalsym; + const iundefsym = iextdefsym + ctx.nextdefsym; - const indirectsymoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - dysymtab.indirectsymoff = @intCast(u32, indirectsymoff); - dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = nindirectsyms * @sizeOf(u32); + seg.filesize = offset + needed_size - seg.fileoff; - seg.inner.filesize = dysymtab.indirectsymoff + dysymtab.nindirectsyms * @sizeOf(u32) - seg.inner.fileoff; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + dysymtab.nindirectsyms * @sizeOf(u32), - }); + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); var buf = std.ArrayList(u8).init(gpa); defer buf.deinit(); - try buf.ensureTotalCapacity(dysymtab.nindirectsyms * @sizeOf(u32)); + try buf.ensureTotalCapacity(needed_size); const writer = buf.writer(); - if (self.text_segment_cmd_index) |text_segment_cmd_index| blk: { - const stubs_section_index = self.stubs_section_index orelse break :blk; - const stubs = self.getSectionPtr(.{ - .seg = text_segment_cmd_index, - .sect = stubs_section_index, - }); + if (self.stubs_section_index) |sect_id| { + const stubs = &self.sections.items(.header)[sect_id]; stubs.reserved1 = 0; for (self.stubs.items) |entry| { if (entry.sym_index == 0) continue; @@ -6172,16 +5573,12 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); assert(target_sym.undf()); - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } } - if (self.data_const_segment_cmd_index) |data_const_segment_cmd_index| blk: { - const got_section_index = self.got_section_index orelse break :blk; - const got = self.getSectionPtr(.{ - .seg = data_const_segment_cmd_index, - .sect = got_section_index, - }); + if (self.got_section_index) |sect_id| { + const got = &self.sections.items(.header)[sect_id]; got.reserved1 = nstubs; for (self.got_entries.items) |entry| { if (entry.sym_index == 0) continue; @@ -6189,19 +5586,15 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); if (target_sym.undf()) { - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } else { try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); } } } - if (self.data_segment_cmd_index) |data_segment_cmd_index| blk: { - const la_symbol_ptr_section_index = self.la_symbol_ptr_section_index orelse break :blk; - const la_symbol_ptr = self.getSectionPtr(.{ - .seg = data_segment_cmd_index, - .sect = la_symbol_ptr_section_index, - }); + if (self.la_symbol_ptr_section_index) |sect_id| { + const la_symbol_ptr = &self.sections.items(.header)[sect_id]; la_symbol_ptr.reserved1 = nstubs + ngot_entries; for (self.stubs.items) |entry| { if (entry.sym_index == 0) continue; @@ -6209,131 +5602,76 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); assert(target_sym.undf()); - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } } - assert(buf.items.len == dysymtab.nindirectsyms * @sizeOf(u32)); - - try self.base.file.?.pwriteAll(buf.items, dysymtab.indirectsymoff); - self.load_commands_dirty = true; -} - -fn writeStrtab(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const stroff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - - const strsize = self.strtab.buffer.items.len; - symtab.stroff = @intCast(u32, stroff); - symtab.strsize = @intCast(u32, strsize); - seg.inner.filesize = symtab.stroff + symtab.strsize - seg.inner.fileoff; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.base.file.?.pwriteAll(self.strtab.buffer.items, symtab.stroff); - - self.load_commands_dirty = true; -} - -fn writeLinkeditSegment(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - seg.inner.filesize = 0; + assert(buf.items.len == needed_size); + try self.base.file.?.pwriteAll(buf.items, offset); - try self.writeDyldInfoData(); - try self.writeFunctionStarts(); - try self.writeDataInCode(); - try self.writeSymtab(); - try self.writeStrtab(); - - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); + lc.nlocalsym = ctx.nlocalsym; + lc.iextdefsym = iextdefsym; + lc.nextdefsym = ctx.nextdefsym; + lc.iundefsym = iundefsym; + lc.nundefsym = ctx.nundefsym; + lc.indirectsymoff = @intCast(u32, offset); + lc.nindirectsyms = nindirectsyms; } -fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const cs_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].linkedit_data; +fn writeCodeSignaturePadding( + self: *MachO, + code_sig: *CodeSignature, + ncmds: *u32, + lc_writer: anytype, +) !u32 { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, 16); - const datasize = code_sig.estimateSize(dataoff); - cs_cmd.dataoff = @intCast(u32, dataoff); - cs_cmd.datasize = @intCast(u32, code_sig.estimateSize(dataoff)); - - // Advance size of __LINKEDIT segment - seg.inner.filesize = cs_cmd.dataoff + cs_cmd.datasize - seg.inner.fileoff; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ dataoff, dataoff + datasize }); + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, 16); + const needed_size = code_sig.estimateSize(offset); + seg.filesize = offset + needed_size - seg.fileoff; + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); + log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); // Pad out the space. We need to do this to calculate valid hashes for everything in the file // except for code signature data. - try self.base.file.?.pwriteAll(&[_]u8{0}, dataoff + datasize - 1); - self.load_commands_dirty = true; -} + try self.base.file.?.pwriteAll(&[_]u8{0}, offset + needed_size - 1); -fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature) !void { - const tracy = trace(@src()); - defer tracy.end(); + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; - const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].linkedit_data; - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; + return @intCast(u32, offset); +} + +fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void { + const seg = self.segments.items[self.text_segment_cmd_index.?]; var buffer = std.ArrayList(u8).init(self.base.allocator); defer buffer.deinit(); try buffer.ensureTotalCapacityPrecise(code_sig.size()); try code_sig.writeAdhocSignature(self.base.allocator, .{ .file = self.base.file.?, - .exec_seg_base = seg.inner.fileoff, - .exec_seg_limit = seg.inner.filesize, - .code_sig_cmd = code_sig_cmd, + .exec_seg_base = seg.fileoff, + .exec_seg_limit = seg.filesize, + .file_size = offset, .output_mode = self.base.options.output_mode, }, buffer.writer()); assert(buffer.items.len == code_sig.size()); log.debug("writing code signature from 0x{x} to 0x{x}", .{ - code_sig_cmd.dataoff, - code_sig_cmd.dataoff + buffer.items.len, + offset, + offset + buffer.items.len, }); - try self.base.file.?.pwriteAll(buffer.items, code_sig_cmd.dataoff); -} - -/// Writes all load commands and section headers. -fn writeLoadCommands(self: *MachO) !void { - if (!self.load_commands_dirty) return; - - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - sizeofcmds += lc.cmdsize(); - } - - var buffer = try self.base.allocator.alloc(u8, sizeofcmds); - defer self.base.allocator.free(buffer); - var fib = std.io.fixedBufferStream(buffer); - const writer = fib.writer(); - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - try lc.write(writer); - } - - const off = @sizeOf(macho.mach_header_64); - - log.debug("writing load commands from 0x{x} to 0x{x}", .{ off, off + sizeofcmds }); - - try self.base.file.?.pwriteAll(buffer, off); - self.load_commands_dirty = false; + try self.base.file.?.pwriteAll(buffer.items, offset); } /// Writes Mach-O file header. -fn writeHeader(self: *MachO) !void { +fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; @@ -6361,18 +5699,12 @@ fn writeHeader(self: *MachO) !void { else => unreachable, } - if (self.tlv_section_index) |_| { + if (self.getSectionByName("__DATA", "__thread_vars")) |_| { header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; } - header.ncmds = 0; - header.sizeofcmds = 0; - - for (self.load_commands.items) |cmd| { - if (cmd.cmd() == .NONE) continue; - header.sizeofcmds += cmd.cmdsize(); - header.ncmds += 1; - } + header.ncmds = ncmds; + header.sizeofcmds = sizeofcmds; log.debug("writing Mach-O header {}", .{header}); @@ -6392,33 +5724,27 @@ pub fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } -pub fn getSectionOrdinal(self: *MachO, match: MatchingSection) u8 { - return @intCast(u8, self.section_ordinals.getIndex(match).?) + 1; +fn getSegmentByName(self: MachO, segname: []const u8) ?u8 { + for (self.segments.items) |seg, i| { + if (mem.eql(u8, segname, seg.segName())) return @intCast(u8, i); + } else return null; } -pub fn getMatchingSectionFromOrdinal(self: *MachO, ord: u8) MatchingSection { - const index = ord - 1; - assert(index < self.section_ordinals.count()); - return self.section_ordinals.keys()[index]; +pub fn getSectionByName(self: MachO, segname: []const u8, sectname: []const u8) ?u8 { + // TODO investigate caching with a hashmap + for (self.sections.items(.header)) |header, i| { + if (mem.eql(u8, header.segName(), segname) and mem.eql(u8, header.sectName(), sectname)) + return @intCast(u8, i); + } else return null; } -pub fn getSegmentPtr(self: *MachO, match: MatchingSection) *macho.SegmentCommand { - assert(match.seg < self.load_commands.items.len); - return &self.load_commands.items[match.seg].segment; -} - -pub fn getSegment(self: *MachO, match: MatchingSection) macho.SegmentCommand { - return self.getSegmentPtr(match).*; -} - -pub fn getSectionPtr(self: *MachO, match: MatchingSection) *macho.section_64 { - const seg = self.getSegmentPtr(match); - assert(match.sect < seg.sections.items.len); - return &seg.sections.items[match.sect]; -} - -pub fn getSection(self: *MachO, match: MatchingSection) macho.section_64 { - return self.getSectionPtr(match).*; +pub fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { + var start: u8 = 0; + const nsects = for (self.segments.items) |seg, i| { + if (i == segment_index) break @intCast(u8, seg.nsects); + start += @intCast(u8, seg.nsects); + } else 0; + return .{ .start = start, .end = start + nsects }; } pub fn symbolIsTemp(self: *MachO, sym_with_loc: SymbolWithLoc) bool { @@ -6512,72 +5838,6 @@ pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: return i; } -const DebugInfo = struct { - inner: dwarf.DwarfInfo, - debug_info: []const u8, - debug_abbrev: []const u8, - debug_str: []const u8, - debug_line: []const u8, - debug_line_str: []const u8, - debug_ranges: []const u8, - - pub fn parse(allocator: Allocator, object: Object) !?DebugInfo { - var debug_info = blk: { - const index = object.dwarf_debug_info_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_abbrev = blk: { - const index = object.dwarf_debug_abbrev_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_str = blk: { - const index = object.dwarf_debug_str_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line = blk: { - const index = object.dwarf_debug_line_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line_str = blk: { - if (object.dwarf_debug_line_str_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - var debug_ranges = blk: { - if (object.dwarf_debug_ranges_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - - var inner: dwarf.DwarfInfo = .{ - .endian = .Little, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - try dwarf.openDwarfDebugInfo(&inner, allocator); - - return DebugInfo{ - .inner = inner, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - } - - pub fn deinit(self: *DebugInfo, allocator: Allocator) void { - self.inner.deinit(allocator); - } -}; - pub fn generateSymbolStabs( self: *MachO, object: Object, @@ -6585,14 +5845,15 @@ pub fn generateSymbolStabs( ) !void { assert(!self.base.options.strip); - const gpa = self.base.allocator; - log.debug("parsing debug info in '{s}'", .{object.name}); - var debug_info = (try DebugInfo.parse(gpa, object)) orelse return; + const gpa = self.base.allocator; + var debug_info = try object.parseDwarfInfo(); + defer debug_info.deinit(gpa); + try dwarf.openDwarfDebugInfo(&debug_info, gpa); // We assume there is only one CU. - const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { + const compile_unit = debug_info.findCompileUnit(0x0) catch |err| switch (err) { error.MissingDebugInfo => { // TODO audit cases with missing debug info and audit our dwarf.zig module. log.debug("invalid or missing debug info in {s}; skipping", .{object.name}); @@ -6600,8 +5861,8 @@ pub fn generateSymbolStabs( }, else => |e| return e, }; - const tu_name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name); - const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); + const tu_name = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.name); + const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.comp_dir); // Open scope try locals.ensureUnusedCapacity(3); @@ -6664,7 +5925,7 @@ pub fn generateSymbolStabs( fn generateSymbolStabsForSymbol( self: *MachO, sym_loc: SymbolWithLoc, - debug_info: DebugInfo, + debug_info: dwarf.DwarfInfo, buf: *[4]macho.nlist_64, ) ![]const macho.nlist_64 { const gpa = self.base.allocator; @@ -6679,7 +5940,7 @@ fn generateSymbolStabsForSymbol( const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; const size: ?u64 = size: { if (source_sym.tentative()) break :size null; - for (debug_info.inner.func_list.items) |func| { + for (debug_info.func_list.items) |func| { if (func.pc_range) |range| { if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { break :size range.end - range.start; @@ -6731,259 +5992,272 @@ fn generateSymbolStabsForSymbol( } } -fn snapshotState(self: *MachO) !void { - const emit = self.base.options.emit orelse { - log.debug("no emit directory found; skipping snapshot...", .{}); - return; - }; - - const Snapshot = struct { - const Node = struct { - const Tag = enum { - section_start, - section_end, - atom_start, - atom_end, - relocation, - - pub fn jsonStringify( - tag: Tag, - options: std.json.StringifyOptions, - out_stream: anytype, - ) !void { - _ = options; - switch (tag) { - .section_start => try out_stream.writeAll("\"section_start\""), - .section_end => try out_stream.writeAll("\"section_end\""), - .atom_start => try out_stream.writeAll("\"atom_start\""), - .atom_end => try out_stream.writeAll("\"atom_end\""), - .relocation => try out_stream.writeAll("\"relocation\""), - } - } - }; - const Payload = struct { - name: []const u8 = "", - aliases: [][]const u8 = &[0][]const u8{}, - is_global: bool = false, - target: u64 = 0, - }; - address: u64, - tag: Tag, - payload: Payload, - }; - timestamp: i128, - nodes: []Node, - }; - - var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - const out_file = try emit.directory.handle.createFile("snapshots.json", .{ - .truncate = false, - .read = true, - }); - defer out_file.close(); - - if (out_file.seekFromEnd(-1)) { - try out_file.writer().writeByte(','); - } else |err| switch (err) { - error.Unseekable => try out_file.writer().writeByte('['), - else => |e| return e, - } - const writer = out_file.writer(); - - var snapshot = Snapshot{ - .timestamp = std.time.nanoTimestamp(), - .nodes = undefined, - }; - var nodes = std.ArrayList(Snapshot.Node).init(arena); - - for (self.section_ordinals.keys()) |key| { - const sect = self.getSection(key); - const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ sect.segName(), sect.sectName() }); - try nodes.append(.{ - .address = sect.addr, - .tag = .section_start, - .payload = .{ .name = sect_name }, - }); - - const is_tlv = sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; - - var atom: *Atom = self.atoms.get(key) orelse { - try nodes.append(.{ - .address = sect.addr + sect.size, - .tag = .section_end, - .payload = .{}, - }); - continue; - }; - - while (atom.prev) |prev| { - atom = prev; - } - - while (true) { - const atom_sym = atom.getSymbol(self); - var node = Snapshot.Node{ - .address = atom_sym.n_value, - .tag = .atom_start, - .payload = .{ - .name = atom.getName(self), - .is_global = self.globals.contains(atom.getName(self)), - }, - }; - - var aliases = std.ArrayList([]const u8).init(arena); - for (atom.contained.items) |sym_off| { - if (sym_off.offset == 0) { - try aliases.append(self.getSymbolName(.{ - .sym_index = sym_off.sym_index, - .file = atom.file, - })); - } - } - node.payload.aliases = aliases.toOwnedSlice(); - try nodes.append(node); - - var relocs = try std.ArrayList(Snapshot.Node).initCapacity(arena, atom.relocs.items.len); - for (atom.relocs.items) |rel| { - const source_addr = blk: { - const source_sym = atom.getSymbol(self); - break :blk source_sym.n_value + rel.offset; - }; - const target_addr = blk: { - const target_atom = rel.getTargetAtom(self) orelse { - // If there is no atom for target, we still need to check for special, atom-less - // symbols such as `___dso_handle`. - const target_name = self.getSymbolName(rel.target); - if (self.globals.contains(target_name)) { - const atomless_sym = self.getSymbol(rel.target); - break :blk atomless_sym.n_value; - } - break :blk 0; - }; - const target_sym = if (target_atom.isSymbolContained(rel.target, self)) - self.getSymbol(rel.target) - else - target_atom.getSymbol(self); - const base_address: u64 = if (is_tlv) base_address: { - const sect_id: u16 = sect_id: { - if (self.tlv_data_section_index) |i| { - break :sect_id i; - } else if (self.tlv_bss_section_index) |i| { - break :sect_id i; - } else unreachable; - }; - break :base_address self.getSection(.{ - .seg = self.data_segment_cmd_index.?, - .sect = sect_id, - }).addr; - } else 0; - break :blk target_sym.n_value - base_address; - }; - - relocs.appendAssumeCapacity(.{ - .address = source_addr, - .tag = .relocation, - .payload = .{ .target = target_addr }, - }); - } - - if (atom.contained.items.len == 0) { - try nodes.appendSlice(relocs.items); - } else { - // Need to reverse iteration order of relocs since by default for relocatable sources - // they come in reverse. For linking, this doesn't matter in any way, however, for - // arranging the memoryline for displaying it does. - std.mem.reverse(Snapshot.Node, relocs.items); - - var next_i: usize = 0; - var last_rel: usize = 0; - while (next_i < atom.contained.items.len) : (next_i += 1) { - const loc = SymbolWithLoc{ - .sym_index = atom.contained.items[next_i].sym_index, - .file = atom.file, - }; - const cont_sym = self.getSymbol(loc); - const cont_sym_name = self.getSymbolName(loc); - var contained_node = Snapshot.Node{ - .address = cont_sym.n_value, - .tag = .atom_start, - .payload = .{ - .name = cont_sym_name, - .is_global = self.globals.contains(cont_sym_name), - }, - }; - - // Accumulate aliases - var inner_aliases = std.ArrayList([]const u8).init(arena); - while (true) { - if (next_i + 1 >= atom.contained.items.len) break; - const next_sym_loc = SymbolWithLoc{ - .sym_index = atom.contained.items[next_i + 1].sym_index, - .file = atom.file, - }; - const next_sym = self.getSymbol(next_sym_loc); - if (next_sym.n_value != cont_sym.n_value) break; - const next_sym_name = self.getSymbolName(next_sym_loc); - if (self.globals.contains(next_sym_name)) { - try inner_aliases.append(contained_node.payload.name); - contained_node.payload.name = next_sym_name; - contained_node.payload.is_global = true; - } else try inner_aliases.append(next_sym_name); - next_i += 1; - } - - const cont_size = if (next_i + 1 < atom.contained.items.len) - self.getSymbol(.{ - .sym_index = atom.contained.items[next_i + 1].sym_index, - .file = atom.file, - }).n_value - cont_sym.n_value - else - atom_sym.n_value + atom.size - cont_sym.n_value; - - contained_node.payload.aliases = inner_aliases.toOwnedSlice(); - try nodes.append(contained_node); - - for (relocs.items[last_rel..]) |rel| { - if (rel.address >= cont_sym.n_value + cont_size) { - break; - } - try nodes.append(rel); - last_rel += 1; - } - - try nodes.append(.{ - .address = cont_sym.n_value + cont_size, - .tag = .atom_end, - .payload = .{}, - }); - } - } - - try nodes.append(.{ - .address = atom_sym.n_value + atom.size, - .tag = .atom_end, - .payload = .{}, - }); - - if (atom.next) |next| { - atom = next; - } else break; - } - - try nodes.append(.{ - .address = sect.addr + sect.size, - .tag = .section_end, - .payload = .{}, +// fn snapshotState(self: *MachO) !void { +// const emit = self.base.options.emit orelse { +// log.debug("no emit directory found; skipping snapshot...", .{}); +// return; +// }; + +// const Snapshot = struct { +// const Node = struct { +// const Tag = enum { +// section_start, +// section_end, +// atom_start, +// atom_end, +// relocation, + +// pub fn jsonStringify( +// tag: Tag, +// options: std.json.StringifyOptions, +// out_stream: anytype, +// ) !void { +// _ = options; +// switch (tag) { +// .section_start => try out_stream.writeAll("\"section_start\""), +// .section_end => try out_stream.writeAll("\"section_end\""), +// .atom_start => try out_stream.writeAll("\"atom_start\""), +// .atom_end => try out_stream.writeAll("\"atom_end\""), +// .relocation => try out_stream.writeAll("\"relocation\""), +// } +// } +// }; +// const Payload = struct { +// name: []const u8 = "", +// aliases: [][]const u8 = &[0][]const u8{}, +// is_global: bool = false, +// target: u64 = 0, +// }; +// address: u64, +// tag: Tag, +// payload: Payload, +// }; +// timestamp: i128, +// nodes: []Node, +// }; + +// var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); +// defer arena_allocator.deinit(); +// const arena = arena_allocator.allocator(); + +// const out_file = try emit.directory.handle.createFile("snapshots.json", .{ +// .truncate = false, +// .read = true, +// }); +// defer out_file.close(); + +// if (out_file.seekFromEnd(-1)) { +// try out_file.writer().writeByte(','); +// } else |err| switch (err) { +// error.Unseekable => try out_file.writer().writeByte('['), +// else => |e| return e, +// } +// const writer = out_file.writer(); + +// var snapshot = Snapshot{ +// .timestamp = std.time.nanoTimestamp(), +// .nodes = undefined, +// }; +// var nodes = std.ArrayList(Snapshot.Node).init(arena); + +// for (self.section_ordinals.keys()) |key| { +// const sect = self.getSection(key); +// const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ sect.segName(), sect.sectName() }); +// try nodes.append(.{ +// .address = sect.addr, +// .tag = .section_start, +// .payload = .{ .name = sect_name }, +// }); + +// const is_tlv = sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; + +// var atom: *Atom = self.atoms.get(key) orelse { +// try nodes.append(.{ +// .address = sect.addr + sect.size, +// .tag = .section_end, +// .payload = .{}, +// }); +// continue; +// }; + +// while (atom.prev) |prev| { +// atom = prev; +// } + +// while (true) { +// const atom_sym = atom.getSymbol(self); +// var node = Snapshot.Node{ +// .address = atom_sym.n_value, +// .tag = .atom_start, +// .payload = .{ +// .name = atom.getName(self), +// .is_global = self.globals.contains(atom.getName(self)), +// }, +// }; + +// var aliases = std.ArrayList([]const u8).init(arena); +// for (atom.contained.items) |sym_off| { +// if (sym_off.offset == 0) { +// try aliases.append(self.getSymbolName(.{ +// .sym_index = sym_off.sym_index, +// .file = atom.file, +// })); +// } +// } +// node.payload.aliases = aliases.toOwnedSlice(); +// try nodes.append(node); + +// var relocs = try std.ArrayList(Snapshot.Node).initCapacity(arena, atom.relocs.items.len); +// for (atom.relocs.items) |rel| { +// const source_addr = blk: { +// const source_sym = atom.getSymbol(self); +// break :blk source_sym.n_value + rel.offset; +// }; +// const target_addr = blk: { +// const target_atom = rel.getTargetAtom(self) orelse { +// // If there is no atom for target, we still need to check for special, atom-less +// // symbols such as `___dso_handle`. +// const target_name = self.getSymbolName(rel.target); +// if (self.globals.contains(target_name)) { +// const atomless_sym = self.getSymbol(rel.target); +// break :blk atomless_sym.n_value; +// } +// break :blk 0; +// }; +// const target_sym = if (target_atom.isSymbolContained(rel.target, self)) +// self.getSymbol(rel.target) +// else +// target_atom.getSymbol(self); +// const base_address: u64 = if (is_tlv) base_address: { +// const sect_id: u16 = sect_id: { +// if (self.tlv_data_section_index) |i| { +// break :sect_id i; +// } else if (self.tlv_bss_section_index) |i| { +// break :sect_id i; +// } else unreachable; +// }; +// break :base_address self.getSection(.{ +// .seg = self.data_segment_cmd_index.?, +// .sect = sect_id, +// }).addr; +// } else 0; +// break :blk target_sym.n_value - base_address; +// }; + +// relocs.appendAssumeCapacity(.{ +// .address = source_addr, +// .tag = .relocation, +// .payload = .{ .target = target_addr }, +// }); +// } + +// if (atom.contained.items.len == 0) { +// try nodes.appendSlice(relocs.items); +// } else { +// // Need to reverse iteration order of relocs since by default for relocatable sources +// // they come in reverse. For linking, this doesn't matter in any way, however, for +// // arranging the memoryline for displaying it does. +// std.mem.reverse(Snapshot.Node, relocs.items); + +// var next_i: usize = 0; +// var last_rel: usize = 0; +// while (next_i < atom.contained.items.len) : (next_i += 1) { +// const loc = SymbolWithLoc{ +// .sym_index = atom.contained.items[next_i].sym_index, +// .file = atom.file, +// }; +// const cont_sym = self.getSymbol(loc); +// const cont_sym_name = self.getSymbolName(loc); +// var contained_node = Snapshot.Node{ +// .address = cont_sym.n_value, +// .tag = .atom_start, +// .payload = .{ +// .name = cont_sym_name, +// .is_global = self.globals.contains(cont_sym_name), +// }, +// }; + +// // Accumulate aliases +// var inner_aliases = std.ArrayList([]const u8).init(arena); +// while (true) { +// if (next_i + 1 >= atom.contained.items.len) break; +// const next_sym_loc = SymbolWithLoc{ +// .sym_index = atom.contained.items[next_i + 1].sym_index, +// .file = atom.file, +// }; +// const next_sym = self.getSymbol(next_sym_loc); +// if (next_sym.n_value != cont_sym.n_value) break; +// const next_sym_name = self.getSymbolName(next_sym_loc); +// if (self.globals.contains(next_sym_name)) { +// try inner_aliases.append(contained_node.payload.name); +// contained_node.payload.name = next_sym_name; +// contained_node.payload.is_global = true; +// } else try inner_aliases.append(next_sym_name); +// next_i += 1; +// } + +// const cont_size = if (next_i + 1 < atom.contained.items.len) +// self.getSymbol(.{ +// .sym_index = atom.contained.items[next_i + 1].sym_index, +// .file = atom.file, +// }).n_value - cont_sym.n_value +// else +// atom_sym.n_value + atom.size - cont_sym.n_value; + +// contained_node.payload.aliases = inner_aliases.toOwnedSlice(); +// try nodes.append(contained_node); + +// for (relocs.items[last_rel..]) |rel| { +// if (rel.address >= cont_sym.n_value + cont_size) { +// break; +// } +// try nodes.append(rel); +// last_rel += 1; +// } + +// try nodes.append(.{ +// .address = cont_sym.n_value + cont_size, +// .tag = .atom_end, +// .payload = .{}, +// }); +// } +// } + +// try nodes.append(.{ +// .address = atom_sym.n_value + atom.size, +// .tag = .atom_end, +// .payload = .{}, +// }); + +// if (atom.next) |next| { +// atom = next; +// } else break; +// } + +// try nodes.append(.{ +// .address = sect.addr + sect.size, +// .tag = .section_end, +// .payload = .{}, +// }); +// } + +// snapshot.nodes = nodes.toOwnedSlice(); + +// try std.json.stringify(snapshot, .{}, writer); +// try writer.writeByte(']'); +// } + +fn logSections(self: *MachO) void { + log.debug("sections:", .{}); + for (self.sections.items(.header)) |header, i| { + log.debug(" sect({d}): {s},{s} @{x}, sizeof({x})", .{ + i + 1, + header.segName(), + header.sectName(), + header.offset, + header.size, }); } - - snapshot.nodes = nodes.toOwnedSlice(); - - try std.json.stringify(snapshot, .{}, writer); - try writer.writeByte(']'); } fn logSymAttributes(sym: macho.nlist_64, buf: *[9]u8) []const u8 { @@ -7104,26 +6378,19 @@ fn logSymtab(self: *MachO) void { } } -fn logSectionOrdinals(self: *MachO) void { - for (self.section_ordinals.keys()) |match, i| { - const sect = self.getSection(match); - log.debug("sect({d}, '{s},{s}')", .{ i + 1, sect.segName(), sect.sectName() }); - } -} - fn logAtoms(self: *MachO) void { log.debug("atoms:", .{}); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom = entry.value_ptr.*; + + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last, i| { + var atom = last orelse continue; + const header = slice.items(.header)[i]; while (atom.prev) |prev| { atom = prev; } - const sect = self.getSection(match); - log.debug("{s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("{s},{s}", .{ header.segName(), header.sectName() }); while (true) { self.logAtom(atom); diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index ee43e5b2a2..054f75fff3 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -6,19 +6,14 @@ const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; const mem = std.mem; -const fat = @import("fat.zig"); const Allocator = mem.Allocator; const Object = @import("Object.zig"); file: fs.File, +fat_offset: u64, name: []const u8, - -header: ?ar_hdr = null, - -// The actual contents we care about linking with will be embedded at -// an offset within a file if we are linking against a fat lib -library_offset: u64 = 0, +header: ar_hdr = undefined, /// Parsed table of contents. /// Each symbol name points to a list of all definition @@ -103,11 +98,7 @@ pub fn deinit(self: *Archive, allocator: Allocator) void { allocator.free(self.name); } -pub fn parse(self: *Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { - const reader = self.file.reader(); - self.library_offset = try fat.getLibraryOffset(reader, cpu_arch); - try self.file.seekTo(self.library_offset); - +pub fn parse(self: *Archive, allocator: Allocator, reader: anytype) !void { const magic = try reader.readBytesNoEof(SARMAG); if (!mem.eql(u8, &magic, ARMAG)) { log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic }); @@ -115,21 +106,23 @@ pub fn parse(self: *Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch } self.header = try reader.readStruct(ar_hdr); - if (!mem.eql(u8, &self.header.?.ar_fmag, ARFMAG)) { - log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, self.header.?.ar_fmag }); + if (!mem.eql(u8, &self.header.ar_fmag, ARFMAG)) { + log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ + ARFMAG, + self.header.ar_fmag, + }); return error.NotArchive; } - var embedded_name = try parseName(allocator, self.header.?, reader); + const name_or_length = try self.header.nameOrLength(); + var embedded_name = try parseName(allocator, name_or_length, reader); log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name }); defer allocator.free(embedded_name); try self.parseTableOfContents(allocator, reader); - try reader.context.seekTo(0); } -fn parseName(allocator: Allocator, header: ar_hdr, reader: anytype) ![]u8 { - const name_or_length = try header.nameOrLength(); +fn parseName(allocator: Allocator, name_or_length: ar_hdr.NameOrLength, reader: anytype) ![]u8 { var name: []u8 = undefined; switch (name_or_length) { .Name => |n| { @@ -187,9 +180,14 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! } } -pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, offset: u32) !Object { +pub fn parseObject( + self: Archive, + allocator: Allocator, + cpu_arch: std.Target.Cpu.Arch, + offset: u32, +) !Object { const reader = self.file.reader(); - try reader.context.seekTo(offset + self.library_offset); + try reader.context.seekTo(self.fat_offset + offset); const object_header = try reader.readStruct(ar_hdr); @@ -198,7 +196,8 @@ pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu return error.MalformedArchive; } - const object_name = try parseName(allocator, object_header, reader); + const name_or_length = try object_header.nameOrLength(); + const object_name = try parseName(allocator, name_or_length, reader); defer allocator.free(object_name); log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name }); @@ -209,15 +208,24 @@ pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu break :name try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name }); }; + const object_name_len = switch (name_or_length) { + .Name => 0, + .Length => |len| len, + }; + const object_size = (try object_header.size()) - object_name_len; + const contents = try allocator.allocWithOptions(u8, object_size, @alignOf(u64), null); + const amt = try reader.readAll(contents); + if (amt != object_size) { + return error.InputOutput; + } + var object = Object{ - .file = try fs.cwd().openFile(self.name, .{}), .name = name, - .file_offset = @intCast(u32, try reader.context.getPos()), - .mtime = try self.header.?.date(), + .mtime = try self.header.date(), + .contents = contents, }; try object.parse(allocator, cpu_arch); - try reader.context.seekTo(0); return object; } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index ba00764127..4871276f3c 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -246,7 +246,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after ARM64_RELOC_ADDEND", .{}); log.err(" expected ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12", .{}); - log.err(" found {}", .{next}); + log.err(" found {s}", .{@tagName(next)}); return error.UnexpectedRelocationType; }, } @@ -285,7 +285,9 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after ARM64_RELOC_ADDEND", .{}); log.err(" expected ARM64_RELOC_UNSIGNED", .{}); - log.err(" found {}", .{@intToEnum(macho.reloc_type_arm64, relocs[i + 1].r_type)}); + log.err(" found {s}", .{ + @tagName(@intToEnum(macho.reloc_type_arm64, relocs[i + 1].r_type)), + }); return error.UnexpectedRelocationType; }, }, @@ -294,7 +296,9 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after X86_64_RELOC_ADDEND", .{}); log.err(" expected X86_64_RELOC_UNSIGNED", .{}); - log.err(" found {}", .{@intToEnum(macho.reloc_type_x86_64, relocs[i + 1].r_type)}); + log.err(" found {s}", .{ + @tagName(@intToEnum(macho.reloc_type_x86_64, relocs[i + 1].r_type)), + }); return error.UnexpectedRelocationType; }, }, @@ -309,13 +313,13 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: const sect_id = @intCast(u16, rel.r_symbolnum - 1); const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { const sect = object.getSourceSection(sect_id); - const match = (try context.macho_file.getMatchingSection(sect)) orelse + const match = (try context.macho_file.getOutputSection(sect)) orelse unreachable; const sym_index = @intCast(u32, object.symtab.items.len); try object.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = context.macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -459,9 +463,10 @@ fn addPtrBindingOrRebase( }); } else { const source_sym = self.getSymbol(context.macho_file); - const match = context.macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); - const sect = context.macho_file.getSection(match); - const sect_type = sect.type_(); + const section = context.macho_file.sections.get(source_sym.n_sect - 1); + const header = section.header; + const segment_index = section.segment_index; + const sect_type = header.@"type"(); const should_rebase = rebase: { if (rel.r_length != 3) break :rebase false; @@ -470,12 +475,12 @@ fn addPtrBindingOrRebase( // that the segment is writable should be enough here. const is_right_segment = blk: { if (context.macho_file.data_segment_cmd_index) |idx| { - if (match.seg == idx) { + if (segment_index == idx) { break :blk true; } } if (context.macho_file.data_const_segment_cmd_index) |idx| { - if (match.seg == idx) { + if (segment_index == idx) { break :blk true; } } @@ -524,6 +529,7 @@ fn addStub(target: MachO.SymbolWithLoc, context: RelocContext) !void { if (context.macho_file.stubs_table.contains(target)) return; const stub_index = try context.macho_file.allocateStubEntry(target); + const stub_helper_atom = try context.macho_file.createStubHelperAtom(); const laptr_atom = try context.macho_file.createLazyPointerAtom(stub_helper_atom.sym_index, target); const stub_atom = try context.macho_file.createStubAtom(laptr_atom.sym_index); @@ -565,9 +571,8 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { }; const is_tlv = is_tlv: { const source_sym = self.getSymbol(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); - const sect = macho_file.getSection(match); - break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; + const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; }; const target_addr = blk: { const target_atom = rel.getTargetAtom(macho_file) orelse { @@ -597,9 +602,9 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { // * wrt to __thread_data if defined, then // * wrt to __thread_bss const sect_id: u16 = sect_id: { - if (macho_file.tlv_data_section_index) |i| { + if (macho_file.getSectionByName("__DATA", "__thread_data")) |i| { break :sect_id i; - } else if (macho_file.tlv_bss_section_index) |i| { + } else if (macho_file.getSectionByName("__DATA", "__thread_bss")) |i| { break :sect_id i; } else { log.err("threadlocal variables present but no initializer sections found", .{}); @@ -608,10 +613,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { return error.FailedToResolveRelocationTarget; } }; - break :base_address macho_file.getSection(.{ - .seg = macho_file.data_segment_cmd_index.?, - .sect = sect_id, - }).addr; + break :base_address macho_file.sections.items(.header)[sect_id].addr; } else 0; break :blk target_sym.n_value - base_address; }; diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index fbfd487ce2..530a13dc51 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -252,7 +252,7 @@ pub const WriteOpts = struct { file: fs.File, exec_seg_base: u64, exec_seg_limit: u64, - code_sig_cmd: macho.linkedit_data_command, + file_size: u32, output_mode: std.builtin.OutputMode, }; @@ -274,10 +274,9 @@ pub fn writeAdhocSignature( self.code_directory.inner.execSegBase = opts.exec_seg_base; self.code_directory.inner.execSegLimit = opts.exec_seg_limit; self.code_directory.inner.execSegFlags = if (opts.output_mode == .Exe) macho.CS_EXECSEG_MAIN_BINARY else 0; - const file_size = opts.code_sig_cmd.dataoff; - self.code_directory.inner.codeLimit = file_size; + self.code_directory.inner.codeLimit = opts.file_size; - const total_pages = mem.alignForward(file_size, self.page_size) / self.page_size; + const total_pages = mem.alignForward(opts.file_size, self.page_size) / self.page_size; var buffer = try allocator.alloc(u8, self.page_size); defer allocator.free(buffer); @@ -289,7 +288,10 @@ pub fn writeAdhocSignature( var i: usize = 0; while (i < total_pages) : (i += 1) { const fstart = i * self.page_size; - const fsize = if (fstart + self.page_size > file_size) file_size - fstart else self.page_size; + const fsize = if (fstart + self.page_size > opts.file_size) + opts.file_size - fstart + else + self.page_size; const len = try opts.file.preadAll(buffer, fstart); assert(fsize <= len); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 4da106eca1..3bfe334302 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -25,35 +25,18 @@ base: *MachO, dwarf: Dwarf, file: fs.File, -/// Table of all load commands -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, -/// __PAGEZERO segment -pagezero_segment_cmd_index: ?u16 = null, -/// __TEXT segment -text_segment_cmd_index: ?u16 = null, -/// __DATA_CONST segment -data_const_segment_cmd_index: ?u16 = null, -/// __DATA segment -data_segment_cmd_index: ?u16 = null, -/// __LINKEDIT segment -linkedit_segment_cmd_index: ?u16 = null, -/// __DWARF segment -dwarf_segment_cmd_index: ?u16 = null, -/// Symbol table -symtab_cmd_index: ?u16 = null, -/// UUID load command -uuid_cmd_index: ?u16 = null, - -/// Index into __TEXT,__text section. -text_section_index: ?u16 = null, - -debug_info_section_index: ?u16 = null, -debug_abbrev_section_index: ?u16 = null, -debug_str_section_index: ?u16 = null, -debug_aranges_section_index: ?u16 = null, -debug_line_section_index: ?u16 = null, - -load_commands_dirty: bool = false, +segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, +sections: std.ArrayListUnmanaged(macho.section_64) = .{}, + +linkedit_segment_cmd_index: ?u8 = null, +dwarf_segment_cmd_index: ?u8 = null, + +debug_info_section_index: ?u8 = null, +debug_abbrev_section_index: ?u8 = null, +debug_str_section_index: ?u8 = null, +debug_aranges_section_index: ?u8 = null, +debug_line_section_index: ?u8 = null, + debug_string_table_dirty: bool = false, debug_abbrev_section_dirty: bool = false, debug_aranges_section_dirty: bool = false, @@ -78,98 +61,44 @@ pub const Reloc = struct { /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void { - if (self.uuid_cmd_index == null) { - const base_cmd = self.base.load_commands.items[self.base.uuid_cmd_index.?]; - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(allocator, base_cmd); - self.load_commands_dirty = true; - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.base.allocator, .{ - .symtab = .{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - try self.strtab.buffer.append(allocator, 0); - self.load_commands_dirty = true; - } - - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.pagezero_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.text_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.data_const_segment_cmd_index == null) outer: { - if (self.base.data_const_segment_cmd_index == null) break :outer; // __DATA_CONST is optional - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.data_const_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.data_segment_cmd_index == null) outer: { - if (self.base.data_segment_cmd_index == null) break :outer; // __DATA is optional - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.data_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.linkedit_segment_cmd_index.?].segment; - var cmd = try self.copySegmentCommand(allocator, base_cmd); + self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); + log.debug("found __LINKEDIT segment free space 0x{x} to 0x{x}", .{ + self.base.page_size, + self.base.page_size * 2, + }); // TODO this needs reworking - cmd.inner.vmsize = self.base.page_size; - cmd.inner.fileoff = self.base.page_size; - cmd.inner.filesize = self.base.page_size; - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; + try self.segments.append(allocator, .{ + .segname = makeStaticString("__LINKEDIT"), + .vmaddr = self.base.page_size, + .vmsize = self.base.page_size, + .fileoff = self.base.page_size, + .filesize = self.base.page_size, + .maxprot = macho.PROT.READ, + .initprot = macho.PROT.READ, + .cmdsize = @sizeOf(macho.segment_command_64), + }); } if (self.dwarf_segment_cmd_index == null) { - self.dwarf_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.dwarf_segment_cmd_index = @intCast(u8, self.segments.items.len); - const linkedit = self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; + const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?]; const ideal_size: u16 = 200 + 128 + 160 + 250; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.base.page_size); - const fileoff = linkedit.inner.fileoff + linkedit.inner.filesize; - const vmaddr = linkedit.inner.vmaddr + linkedit.inner.vmsize; + const fileoff = linkedit.fileoff + linkedit.filesize; + const vmaddr = linkedit.vmaddr + linkedit.vmsize; log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); - try self.load_commands.append(allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DWARF"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(allocator, .{ + .segname = makeStaticString("__DWARF"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.debug_str_section_index == null) { @@ -203,18 +132,18 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void } } -fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u16 { - const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; +fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u8 { + const segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; var sect = macho.section_64{ .sectname = makeStaticString(sectname), - .segname = seg.inner.segname, + .segname = segment.segname, .size = @intCast(u32, size), .@"align" = alignment, }; const alignment_pow_2 = try math.powi(u32, 2, alignment); const off = self.findFreeSpace(size, alignment_pow_2); - assert(off + size <= seg.inner.fileoff + seg.inner.filesize); // TODO expand + assert(off + size <= segment.fileoff + segment.filesize); // TODO expand log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ sect.segName(), @@ -223,31 +152,20 @@ fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignme off + size, }); - sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + sect.addr = segment.vmaddr + off - segment.fileoff; sect.offset = @intCast(u32, off); - const index = @intCast(u16, seg.sections.items.len); - try seg.sections.append(self.base.base.allocator, sect); - seg.inner.cmdsize += @sizeOf(macho.section_64); - seg.inner.nsects += 1; - - // TODO - // const match = MatchingSection{ - // .seg = segment_id, - // .sect = index, - // }; - // _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - // try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - - self.load_commands_dirty = true; + const index = @intCast(u8, self.sections.items.len); + try self.sections.append(self.base.base.allocator, sect); + segment.cmdsize += @sizeOf(macho.section_64); + segment.nsects += 1; return index; } fn detectAllocCollision(self: *DebugSymbols, start: u64, size: u64) ?u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; const end = start + padToIdeal(size); - for (seg.sections.items) |section| { + for (self.sections.items) |section| { const increased_size = padToIdeal(section.size); const test_end = section.offset + increased_size; if (end > section.offset and start < test_end) { @@ -258,8 +176,8 @@ fn detectAllocCollision(self: *DebugSymbols, start: u64, size: u64) ?u64 { } pub fn findFreeSpace(self: *DebugSymbols, object_size: u64, min_alignment: u64) u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - var offset: u64 = seg.inner.fileoff; + const segment = self.segments.items[self.dwarf_segment_cmd_index.?]; + var offset: u64 = segment.fileoff; while (self.detectAllocCollision(offset, object_size)) |item_end| { offset = mem.alignForwardGeneric(u64, item_end, min_alignment); } @@ -296,8 +214,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti break :blk got_entry.getName(self.base); }, }; - const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const sect = &seg.sections.items[self.debug_info_section_index.?]; + const sect = &self.sections.items[self.debug_info_section_index.?]; const file_offset = sect.offset + reloc.offset; log.debug("resolving relocation: {d}@{x} ('{s}') at offset {x}", .{ reloc.target, @@ -311,15 +228,13 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (self.debug_abbrev_section_dirty) { try self.dwarf.writeDbgAbbrev(&self.base.base); - self.load_commands_dirty = true; self.debug_abbrev_section_dirty = false; } if (self.debug_info_header_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; + const text_section = self.base.sections.items(.header)[self.base.text_section_index.?]; const low_pc = text_section.addr; const high_pc = text_section.addr + text_section.size; try self.dwarf.writeDbgInfoHeader(&self.base.base, module, low_pc, high_pc); @@ -329,10 +244,8 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (self.debug_aranges_section_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; + const text_section = self.base.sections.items(.header)[self.base.text_section_index.?]; try self.dwarf.writeDbgAranges(&self.base.base, text_section.addr, text_section.size); - self.load_commands_dirty = true; self.debug_aranges_section_dirty = false; } @@ -342,8 +255,8 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti } { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const debug_strtab_sect = &dwarf_segment.sections.items[self.debug_str_section_index.?]; + const dwarf_segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; + const debug_strtab_sect = &self.sections.items[self.debug_str_section_index.?]; if (self.debug_string_table_dirty or self.dwarf.strtab.items.len != debug_strtab_sect.size) { const allocated_size = self.allocatedSize(debug_strtab_sect.offset); const needed_size = self.dwarf.strtab.items.len; @@ -351,7 +264,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (needed_size > allocated_size) { debug_strtab_sect.size = 0; // free the space const new_offset = self.findFreeSpace(needed_size, 1); - debug_strtab_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_strtab_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; debug_strtab_sect.offset = @intCast(u32, new_offset); } debug_strtab_sect.size = @intCast(u32, needed_size); @@ -362,28 +275,40 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti }); try self.file.pwriteAll(self.dwarf.strtab.items, debug_strtab_sect.offset); - self.load_commands_dirty = true; self.debug_string_table_dirty = false; } } + var lc_buffer = std.ArrayList(u8).init(allocator); + defer lc_buffer.deinit(); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; + + try self.writeLinkeditSegmentData(&ncmds, lc_writer); self.updateDwarfSegment(); - try self.writeLinkeditSegment(); - try self.updateVirtualMemoryMapping(); - try self.writeLoadCommands(allocator); - try self.writeHeader(); - assert(!self.load_commands_dirty); + { + try lc_writer.writeStruct(self.base.uuid); + ncmds += 1; + } + + var headers_buf = std.ArrayList(u8).init(allocator); + defer headers_buf.deinit(); + try self.writeSegmentHeaders(&ncmds, headers_buf.writer()); + + try self.file.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); + + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + assert(!self.debug_abbrev_section_dirty); assert(!self.debug_aranges_section_dirty); assert(!self.debug_string_table_dirty); } pub fn deinit(self: *DebugSymbols, allocator: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(allocator); - } - self.load_commands.deinit(allocator); + self.segments.deinit(allocator); + self.sections.deinit(allocator); self.dwarf.deinit(); self.strtab.deinit(allocator); self.relocs.deinit(allocator); @@ -402,59 +327,20 @@ pub fn swapRemoveRelocs(self: *DebugSymbols, target: u32) void { } } -fn copySegmentCommand( - self: *DebugSymbols, - allocator: Allocator, - base_cmd: macho.SegmentCommand, -) !macho.SegmentCommand { - var cmd = macho.SegmentCommand{ - .inner = .{ - .segname = undefined, - .cmdsize = base_cmd.inner.cmdsize, - .vmaddr = base_cmd.inner.vmaddr, - .vmsize = base_cmd.inner.vmsize, - .maxprot = base_cmd.inner.maxprot, - .initprot = base_cmd.inner.initprot, - .nsects = base_cmd.inner.nsects, - .flags = base_cmd.inner.flags, - }, - }; - mem.copy(u8, &cmd.inner.segname, &base_cmd.inner.segname); - - try cmd.sections.ensureTotalCapacity(allocator, cmd.inner.nsects); - for (base_cmd.sections.items) |base_sect, i| { - var sect = macho.section_64{ - .sectname = undefined, - .segname = undefined, - .addr = base_sect.addr, - .size = base_sect.size, - .offset = 0, - .@"align" = base_sect.@"align", - .reloff = 0, - .nreloc = 0, - .flags = base_sect.flags, - .reserved1 = base_sect.reserved1, - .reserved2 = base_sect.reserved2, - .reserved3 = base_sect.reserved3, - }; - mem.copy(u8, §.sectname, &base_sect.sectname); - mem.copy(u8, §.segname, &base_sect.segname); - - if (self.base.text_section_index.? == i) { - self.text_section_index = @intCast(u16, i); - } +fn updateDwarfSegment(self: *DebugSymbols) void { + const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?]; + const dwarf_segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; - cmd.sections.appendAssumeCapacity(sect); + const new_start_aligned = linkedit.vmaddr + linkedit.vmsize; + const old_start_aligned = dwarf_segment.vmaddr; + const diff = new_start_aligned - old_start_aligned; + if (diff > 0) { + dwarf_segment.vmaddr = new_start_aligned; } - return cmd; -} - -fn updateDwarfSegment(self: *DebugSymbols) void { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - var max_offset: u64 = 0; - for (dwarf_segment.sections.items) |sect| { + for (self.sections.items) |*sect| { + sect.addr += diff; log.debug(" {s},{s} - 0x{x}-0x{x} - 0x{x}-0x{x}", .{ sect.segName(), sect.sectName(), @@ -468,42 +354,59 @@ fn updateDwarfSegment(self: *DebugSymbols) void { } } - const file_size = max_offset - dwarf_segment.inner.fileoff; + const file_size = max_offset - dwarf_segment.fileoff; log.debug("__DWARF size 0x{x}", .{file_size}); - if (file_size != dwarf_segment.inner.filesize) { - dwarf_segment.inner.filesize = file_size; - if (dwarf_segment.inner.vmsize < dwarf_segment.inner.filesize) { - dwarf_segment.inner.vmsize = mem.alignForwardGeneric(u64, dwarf_segment.inner.filesize, self.base.page_size); - } - self.load_commands_dirty = true; + if (file_size != dwarf_segment.filesize) { + dwarf_segment.filesize = file_size; + dwarf_segment.vmsize = mem.alignForwardGeneric(u64, dwarf_segment.filesize, self.base.page_size); } } -/// Writes all load commands and section headers. -fn writeLoadCommands(self: *DebugSymbols, allocator: Allocator) !void { - if (!self.load_commands_dirty) return; +fn writeSegmentHeaders(self: *DebugSymbols, ncmds: *u32, writer: anytype) !void { + // Write segment/section headers from the binary file first. + const end = self.base.linkedit_segment_cmd_index.?; + for (self.base.segments.items[0..end]) |seg, i| { + const indexes = self.base.getSectionIndexes(@intCast(u8, i)); + var out_seg = seg; + out_seg.fileoff = 0; + out_seg.filesize = 0; + out_seg.cmdsize = @sizeOf(macho.segment_command_64); + out_seg.nsects = 0; + + // Update section headers count; any section with size of 0 is excluded + // since it doesn't have any data in the final binary file. + for (self.base.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + out_seg.cmdsize += @sizeOf(macho.section_64); + out_seg.nsects += 1; + } - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); - } + if (out_seg.nsects == 0 and + (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or + mem.eql(u8, out_seg.segName(), "__DATA"))) continue; - var buffer = try allocator.alloc(u8, sizeofcmds); - defer allocator.free(buffer); - var fib = std.io.fixedBufferStream(buffer); - const writer = fib.writer(); - for (self.load_commands.items) |lc| { - try lc.write(writer); - } + try writer.writeStruct(out_seg); + for (self.base.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + var out_header = header; + out_header.offset = 0; + try writer.writeStruct(out_header); + } - const off = @sizeOf(macho.mach_header_64); - log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); - try self.file.pwriteAll(buffer, off); - self.load_commands_dirty = false; + ncmds.* += 1; + } + // Next, commit DSYM's __LINKEDIT and __DWARF segments headers. + for (self.segments.items) |seg| { + try writer.writeStruct(seg); + ncmds.* += 1; + } + for (self.sections.items) |header| { + try writer.writeStruct(header); + } } -fn writeHeader(self: *DebugSymbols) !void { +fn writeHeader(self: *DebugSymbols, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.filetype = macho.MH_DSYM; @@ -519,12 +422,8 @@ fn writeHeader(self: *DebugSymbols) !void { else => return error.UnsupportedCpuArchitecture, } - header.ncmds = @intCast(u32, self.load_commands.items.len); - header.sizeofcmds = 0; - - for (self.load_commands.items) |cmd| { - header.sizeofcmds += cmd.cmdsize(); - } + header.ncmds = ncmds; + header.sizeofcmds = sizeofcmds; log.debug("writing Mach-O header {}", .{header}); @@ -532,79 +431,46 @@ fn writeHeader(self: *DebugSymbols) !void { } pub fn allocatedSize(self: *DebugSymbols, start: u64) u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - assert(start >= seg.inner.fileoff); + const seg = self.segments.items[self.dwarf_segment_cmd_index.?]; + assert(start >= seg.fileoff); var min_pos: u64 = std.math.maxInt(u64); - for (seg.sections.items) |section| { + for (self.sections.items) |section| { if (section.offset <= start) continue; if (section.offset < min_pos) min_pos = section.offset; } return min_pos - start; } -fn updateVirtualMemoryMapping(self: *DebugSymbols) !void { - const macho_file = self.base; - const allocator = macho_file.base.allocator; - - const IndexTuple = std.meta.Tuple(&[_]type{ *?u16, *?u16 }); - const indices = &[_]IndexTuple{ - .{ &macho_file.text_segment_cmd_index, &self.text_segment_cmd_index }, - .{ &macho_file.data_const_segment_cmd_index, &self.data_const_segment_cmd_index }, - .{ &macho_file.data_segment_cmd_index, &self.data_segment_cmd_index }, - }; - - for (indices) |tuple| { - const orig_cmd = macho_file.load_commands.items[tuple[0].*.?].segment; - const cmd = try self.copySegmentCommand(allocator, orig_cmd); - const comp_cmd = &self.load_commands.items[tuple[1].*.?]; - comp_cmd.deinit(allocator); - self.load_commands.items[tuple[1].*.?] = .{ .segment = cmd }; - } - - // TODO should we set the linkedit vmsize to that of the binary? - const orig_cmd = macho_file.load_commands.items[macho_file.linkedit_segment_cmd_index.?].segment; - const orig_vmaddr = orig_cmd.inner.vmaddr; - const linkedit_cmd = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - linkedit_cmd.inner.vmaddr = orig_vmaddr; - - // Update VM address for the DWARF segment and sections including re-running relocations. - // TODO re-run relocations - const dwarf_cmd = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const new_start_aligned = orig_vmaddr + linkedit_cmd.inner.vmsize; - const old_start_aligned = dwarf_cmd.inner.vmaddr; - const diff = new_start_aligned - old_start_aligned; - if (diff > 0) { - dwarf_cmd.inner.vmaddr = new_start_aligned; - - for (dwarf_cmd.sections.items) |*sect| { - sect.addr += (new_start_aligned - old_start_aligned); - } - } - - self.load_commands_dirty = true; -} - -fn writeLinkeditSegment(self: *DebugSymbols) !void { +fn writeLinkeditSegmentData(self: *DebugSymbols, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); - try self.writeSymbolTable(); - try self.writeStringTable(); + const source_vmaddr = self.base.segments.items[self.base.linkedit_segment_cmd_index.?].vmaddr; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + seg.vmaddr = source_vmaddr; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const aligned_size = mem.alignForwardGeneric(u64, seg.inner.filesize, self.base.page_size); - seg.inner.filesize = aligned_size; - seg.inner.vmsize = aligned_size; + var symtab_cmd = macho.symtab_command{ + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }; + try self.writeSymtab(&symtab_cmd); + try self.writeStrtab(&symtab_cmd); + try lc_writer.writeStruct(symtab_cmd); + ncmds.* += 1; + + const aligned_size = mem.alignForwardGeneric(u64, seg.filesize, self.base.page_size); + seg.filesize = aligned_size; + seg.vmsize = aligned_size; } -fn writeSymbolTable(self: *DebugSymbols) !void { +fn writeSymtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.base.allocator; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - symtab.symoff = @intCast(u32, seg.inner.fileoff); var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); @@ -634,34 +500,32 @@ fn writeSymbolTable(self: *DebugSymbols) !void { const nlocals = locals.items.len; const nexports = exports.items.len; - const locals_off = symtab.symoff; - const locals_size = nlocals * @sizeOf(macho.nlist_64); - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); + const nsyms = nlocals + nexports; - symtab.nsyms = @intCast(u32, nlocals + nexports); - const needed_size = (nlocals + nexports) * @sizeOf(macho.nlist_64); + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff, @alignOf(macho.nlist_64)); + const needed_size = nsyms * @sizeOf(macho.nlist_64); - if (needed_size > seg.inner.filesize) { + if (needed_size > seg.filesize) { const aligned_size = mem.alignForwardGeneric(u64, needed_size, self.base.page_size); - const diff = @intCast(u32, aligned_size - seg.inner.filesize); - const dwarf_seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - seg.inner.filesize = aligned_size; + const diff = @intCast(u32, aligned_size - seg.filesize); + const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; + seg.filesize = aligned_size; try MachO.copyRangeAllOverlappingAlloc( self.base.base.allocator, self.file, - dwarf_seg.inner.fileoff, - dwarf_seg.inner.fileoff + diff, - math.cast(usize, dwarf_seg.inner.filesize) orelse return error.Overflow, + dwarf_seg.fileoff, + dwarf_seg.fileoff + diff, + math.cast(usize, dwarf_seg.filesize) orelse return error.Overflow, ); - const old_seg_fileoff = dwarf_seg.inner.fileoff; - dwarf_seg.inner.fileoff += diff; + const old_seg_fileoff = dwarf_seg.fileoff; + dwarf_seg.fileoff += diff; - log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.inner.fileoff }); + log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.fileoff }); - for (dwarf_seg.sections.items) |*sect| { + for (self.sections.items) |*sect| { const old_offset = sect.offset; sect.offset += diff; @@ -674,47 +538,53 @@ fn writeSymbolTable(self: *DebugSymbols) !void { } } + lc.symoff = @intCast(u32, offset); + lc.nsyms = @intCast(u32, nsyms); + + const locals_off = lc.symoff; + const locals_size = nlocals * @sizeOf(macho.nlist_64); + const exports_off = locals_off + locals_size; + const exports_size = nexports * @sizeOf(macho.nlist_64); + log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); try self.file.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); try self.file.pwriteAll(mem.sliceAsBytes(exports.items), exports_off); - - self.load_commands_dirty = true; } -fn writeStringTable(self: *DebugSymbols) !void { +fn writeStrtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const tracy = trace(@src()); defer tracy.end(); - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const symtab_size = @intCast(u32, symtab.nsyms * @sizeOf(macho.nlist_64)); - symtab.stroff = symtab.symoff + symtab_size; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const symtab_size = @intCast(u32, lc.nsyms * @sizeOf(macho.nlist_64)); + const offset = mem.alignForwardGeneric(u64, lc.symoff + symtab_size, @alignOf(u64)); + lc.stroff = @intCast(u32, offset); const needed_size = mem.alignForwardGeneric(u64, self.strtab.buffer.items.len, @alignOf(u64)); - symtab.strsize = @intCast(u32, needed_size); + lc.strsize = @intCast(u32, needed_size); - if (symtab_size + needed_size > seg.inner.filesize) { - const aligned_size = mem.alignForwardGeneric(u64, symtab_size + needed_size, self.base.page_size); - const diff = @intCast(u32, aligned_size - seg.inner.filesize); - const dwarf_seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - seg.inner.filesize = aligned_size; + if (symtab_size + needed_size > seg.filesize) { + const aligned_size = mem.alignForwardGeneric(u64, offset + needed_size, self.base.page_size); + const diff = @intCast(u32, aligned_size - seg.filesize); + const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; + seg.filesize = aligned_size; try MachO.copyRangeAllOverlappingAlloc( self.base.base.allocator, self.file, - dwarf_seg.inner.fileoff, - dwarf_seg.inner.fileoff + diff, - math.cast(usize, dwarf_seg.inner.filesize) orelse return error.Overflow, + dwarf_seg.fileoff, + dwarf_seg.fileoff + diff, + math.cast(usize, dwarf_seg.filesize) orelse return error.Overflow, ); - const old_seg_fileoff = dwarf_seg.inner.fileoff; - dwarf_seg.inner.fileoff += diff; + const old_seg_fileoff = dwarf_seg.fileoff; + dwarf_seg.fileoff += diff; - log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.inner.fileoff }); + log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.fileoff }); - for (dwarf_seg.sections.items) |*sect| { + for (self.sections.items) |*sect| { const old_offset = sect.offset; sect.offset += diff; @@ -727,9 +597,7 @@ fn writeStringTable(self: *DebugSymbols) !void { } } - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.file.pwriteAll(self.strtab.buffer.items, symtab.stroff); + log.debug("writing string table from 0x{x} to 0x{x}", .{ lc.stroff, lc.stroff + lc.strsize }); - self.load_commands_dirty = true; + try self.file.pwriteAll(self.strtab.buffer.items, lc.stroff); } diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index ffc0b2cca6..0f16eada61 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -13,23 +13,9 @@ const fat = @import("fat.zig"); const Allocator = mem.Allocator; const CrossTarget = std.zig.CrossTarget; const LibStub = @import("../tapi.zig").LibStub; +const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -file: fs.File, -name: []const u8, - -header: ?macho.mach_header_64 = null, - -// The actual dylib contents we care about linking with will be embedded at -// an offset within a file if we are linking against a fat lib -library_offset: u64 = 0, - -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, - -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -id_cmd_index: ?u16 = null, - id: ?Id = null, weak: bool = false, @@ -53,16 +39,12 @@ pub const Id = struct { }; } - pub fn fromLoadCommand(allocator: Allocator, lc: macho.GenericCommandWithData(macho.dylib_command)) !Id { - const dylib = lc.inner.dylib; - const dylib_name = @ptrCast([*:0]const u8, lc.data[dylib.name - @sizeOf(macho.dylib_command) ..]); - const name = try allocator.dupe(u8, mem.sliceTo(dylib_name, 0)); - + pub fn fromLoadCommand(allocator: Allocator, lc: macho.dylib_command, name: []const u8) !Id { return Id{ - .name = name, - .timestamp = dylib.timestamp, - .current_version = dylib.current_version, - .compatibility_version = dylib.compatibility_version, + .name = try allocator.dupe(u8, name), + .timestamp = lc.dylib.timestamp, + .current_version = lc.dylib.current_version, + .compatibility_version = lc.dylib.compatibility_version, }; } @@ -126,125 +108,89 @@ pub const Id = struct { }; pub fn deinit(self: *Dylib, allocator: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(allocator); - } - self.load_commands.deinit(allocator); - for (self.symbols.keys()) |key| { allocator.free(key); } self.symbols.deinit(allocator); - - allocator.free(self.name); - if (self.id) |*id| { id.deinit(allocator); } } -pub fn parse( +pub fn parseFromBinary( self: *Dylib, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, dylib_id: u16, dependent_libs: anytype, + name: []const u8, + data: []align(@alignOf(u64)) const u8, ) !void { - log.debug("parsing shared library '{s}'", .{self.name}); - - self.library_offset = try fat.getLibraryOffset(self.file.reader(), cpu_arch); + var stream = std.io.fixedBufferStream(data); + const reader = stream.reader(); - try self.file.seekTo(self.library_offset); + log.debug("parsing shared library '{s}'", .{name}); - var reader = self.file.reader(); - self.header = try reader.readStruct(macho.mach_header_64); + const header = try reader.readStruct(macho.mach_header_64); - if (self.header.?.filetype != macho.MH_DYLIB) { - log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_DYLIB, self.header.?.filetype }); + if (header.filetype != macho.MH_DYLIB) { + log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_DYLIB, header.filetype }); return error.NotDylib; } - const this_arch: std.Target.Cpu.Arch = try fat.decodeArch(self.header.?.cputype, true); + const this_arch: std.Target.Cpu.Arch = try fat.decodeArch(header.cputype, true); if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {}, found {}", .{ cpu_arch, this_arch }); + log.err("mismatched cpu architecture: expected {s}, found {s}", .{ + @tagName(cpu_arch), + @tagName(this_arch), + }); return error.MismatchedCpuArchitecture; } - try self.readLoadCommands(allocator, reader, dylib_id, dependent_libs); - try self.parseId(allocator); - try self.parseSymbols(allocator); -} - -fn readLoadCommands( - self: *Dylib, - allocator: Allocator, - reader: anytype, - dylib_id: u16, - dependent_libs: anytype, -) !void { - const should_lookup_reexports = self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; - - try self.load_commands.ensureUnusedCapacity(allocator, self.header.?.ncmds); - - var i: u16 = 0; - while (i < self.header.?.ncmds) : (i += 1) { - var cmd = try macho.LoadCommand.read(allocator, reader); + const should_lookup_reexports = header.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; + var it = LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = data[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + while (it.next()) |cmd| { switch (cmd.cmd()) { .SYMTAB => { - self.symtab_cmd_index = i; - }, - .DYSYMTAB => { - self.dysymtab_cmd_index = i; + const symtab_cmd = cmd.cast(macho.symtab_command).?; + const symtab = @ptrCast( + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &data[symtab_cmd.symoff]), + )[0..symtab_cmd.nsyms]; + const strtab = data[symtab_cmd.stroff..][0..symtab_cmd.strsize]; + + for (symtab) |sym| { + const add_to_symtab = sym.ext() and (sym.sect() or sym.indr()); + if (!add_to_symtab) continue; + + const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0); + try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), {}); + } }, .ID_DYLIB => { - self.id_cmd_index = i; + self.id = try Id.fromLoadCommand( + allocator, + cmd.cast(macho.dylib_command).?, + cmd.getDylibPathName(), + ); }, .REEXPORT_DYLIB => { if (should_lookup_reexports) { // Parse install_name to dependent dylib. - var id = try Id.fromLoadCommand(allocator, cmd.dylib); + var id = try Id.fromLoadCommand( + allocator, + cmd.cast(macho.dylib_command).?, + cmd.getDylibPathName(), + ); try dependent_libs.writeItem(.{ .id = id, .parent = dylib_id }); } }, - else => { - log.debug("Unknown load command detected: 0x{x}.", .{@enumToInt(cmd.cmd())}); - }, + else => {}, } - self.load_commands.appendAssumeCapacity(cmd); - } -} - -fn parseId(self: *Dylib, allocator: Allocator) !void { - const index = self.id_cmd_index orelse { - log.debug("no LC_ID_DYLIB load command found; using hard-coded defaults...", .{}); - self.id = try Id.default(allocator, self.name); - return; - }; - self.id = try Id.fromLoadCommand(allocator, self.load_commands.items[index].dylib); -} - -fn parseSymbols(self: *Dylib, allocator: Allocator) !void { - const index = self.symtab_cmd_index orelse return; - const symtab_cmd = self.load_commands.items[index].symtab; - - const symtab = try allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms); - defer allocator.free(symtab); - _ = try self.file.preadAll(symtab, symtab_cmd.symoff + self.library_offset); - const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); - - const strtab = try allocator.alloc(u8, symtab_cmd.strsize); - defer allocator.free(strtab); - _ = try self.file.preadAll(strtab, symtab_cmd.stroff + self.library_offset); - - for (slice) |sym| { - const add_to_symtab = sym.ext() and (sym.sect() or sym.indr()); - - if (!add_to_symtab) continue; - - const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0); - const name = try allocator.dupe(u8, sym_name); - try self.symbols.putNoClobber(allocator, name, {}); } } @@ -356,10 +302,11 @@ pub fn parseFromStub( lib_stub: LibStub, dylib_id: u16, dependent_libs: anytype, + name: []const u8, ) !void { if (lib_stub.inner.len == 0) return error.EmptyStubFile; - log.debug("parsing shared library from stub '{s}'", .{self.name}); + log.debug("parsing shared library from stub '{s}'", .{name}); const umbrella_lib = lib_stub.inner[0]; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 0d929627cd..996a85ed4b 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -3,6 +3,7 @@ const Object = @This(); const std = @import("std"); const build_options = @import("build_options"); const assert = std.debug.assert; +const dwarf = std.dwarf; const fs = std.fs; const io = std.io; const log = std.log.scoped(.link); @@ -14,43 +15,20 @@ const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); +const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -const MatchingSection = MachO.MatchingSection; const SymbolWithLoc = MachO.SymbolWithLoc; -file: fs.File, name: []const u8, mtime: u64, - -/// Data contents of the file. Includes sections, and data of load commands. -/// Excludes the backing memory for the header and load commands. -/// Initialized in `parse`. -contents: []const u8 = undefined, - -file_offset: ?u32 = null, +contents: []align(@alignOf(u64)) const u8, header: macho.mach_header_64 = undefined, - -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, - -segment_cmd_index: ?u16 = null, -text_section_index: ?u16 = null, -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -build_version_cmd_index: ?u16 = null, -data_in_code_cmd_index: ?u16 = null, - -// __DWARF segment sections -dwarf_debug_info_index: ?u16 = null, -dwarf_debug_abbrev_index: ?u16 = null, -dwarf_debug_str_index: ?u16 = null, -dwarf_debug_line_index: ?u16 = null, -dwarf_debug_line_str_index: ?u16 = null, -dwarf_debug_ranges_index: ?u16 = null, +in_symtab: []const macho.nlist_64 = undefined, +in_strtab: []const u8 = undefined, symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -strtab: []const u8 = &.{}, -data_in_code_entries: []const macho.data_in_code_entry = &.{}, +sections: std.ArrayListUnmanaged(macho.section_64) = .{}, sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, @@ -61,12 +39,8 @@ managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, pub fn deinit(self: *Object, gpa: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(gpa); - } - self.load_commands.deinit(gpa); - gpa.free(self.contents); self.symtab.deinit(gpa); + self.sections.deinit(gpa); self.sections_as_symbols.deinit(gpa); self.atom_by_index_table.deinit(gpa); @@ -77,22 +51,15 @@ pub fn deinit(self: *Object, gpa: Allocator) void { self.managed_atoms.deinit(gpa); gpa.free(self.name); + gpa.free(self.contents); } pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { - const file_stat = try self.file.stat(); - const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - self.contents = try self.file.readToEndAlloc(allocator, file_size); - var stream = std.io.fixedBufferStream(self.contents); const reader = stream.reader(); - const file_offset = self.file_offset orelse 0; - if (file_offset > 0) { - try reader.context.seekTo(file_offset); - } - self.header = try reader.readStruct(macho.mach_header_64); + if (self.header.filetype != macho.MH_OBJECT) { log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, @@ -110,92 +77,54 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) }, }; if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {}, found {}", .{ cpu_arch, this_arch }); + log.err("mismatched cpu architecture: expected {s}, found {s}", .{ + @tagName(cpu_arch), + @tagName(this_arch), + }); return error.MismatchedCpuArchitecture; } - try self.load_commands.ensureUnusedCapacity(allocator, self.header.ncmds); - - var i: u16 = 0; - while (i < self.header.ncmds) : (i += 1) { - var cmd = try macho.LoadCommand.read(allocator, reader); + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { switch (cmd.cmd()) { .SEGMENT_64 => { - self.segment_cmd_index = i; - var seg = cmd.segment; - for (seg.sections.items) |*sect, j| { - const index = @intCast(u16, j); - const segname = sect.segName(); - const sectname = sect.sectName(); - if (mem.eql(u8, segname, "__DWARF")) { - if (mem.eql(u8, sectname, "__debug_info")) { - self.dwarf_debug_info_index = index; - } else if (mem.eql(u8, sectname, "__debug_abbrev")) { - self.dwarf_debug_abbrev_index = index; - } else if (mem.eql(u8, sectname, "__debug_str")) { - self.dwarf_debug_str_index = index; - } else if (mem.eql(u8, sectname, "__debug_line")) { - self.dwarf_debug_line_index = index; - } else if (mem.eql(u8, sectname, "__debug_line_str")) { - self.dwarf_debug_line_str_index = index; - } else if (mem.eql(u8, sectname, "__debug_ranges")) { - self.dwarf_debug_ranges_index = index; - } - } else if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__text")) { - self.text_section_index = index; - } - } - - sect.offset += file_offset; - if (sect.reloff > 0) { - sect.reloff += file_offset; - } + const segment = cmd.cast(macho.segment_command_64).?; + try self.sections.ensureUnusedCapacity(allocator, segment.nsects); + for (cmd.getSections()) |sect| { + self.sections.appendAssumeCapacity(sect); } - - seg.inner.fileoff += file_offset; }, .SYMTAB => { - self.symtab_cmd_index = i; - cmd.symtab.symoff += file_offset; - cmd.symtab.stroff += file_offset; - }, - .DYSYMTAB => { - self.dysymtab_cmd_index = i; - }, - .BUILD_VERSION => { - self.build_version_cmd_index = i; - }, - .DATA_IN_CODE => { - self.data_in_code_cmd_index = i; - cmd.linkedit_data.dataoff += file_offset; - }, - else => { - log.debug("Unknown load command detected: 0x{x}.", .{@enumToInt(cmd.cmd())}); + const symtab = cmd.cast(macho.symtab_command).?; + self.in_symtab = @ptrCast( + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &self.contents[symtab.symoff]), + )[0..symtab.nsyms]; + self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; + try self.symtab.appendSlice(allocator, self.in_symtab); }, + else => {}, } - self.load_commands.appendAssumeCapacity(cmd); } - - try self.parseSymtab(allocator); } const Context = struct { - symtab: []const macho.nlist_64, - strtab: []const u8, + object: *const Object, }; const SymbolAtIndex = struct { index: u32, fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 { - return ctx.symtab[self.index]; + return ctx.object.getSourceSymbol(self.index).?; } fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 { const sym = self.getSymbol(ctx); - assert(sym.n_strx < ctx.strtab.len); - return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0); + return ctx.object.getString(sym.n_strx); } /// Returns whether lhs is less than rhs by allocated address in object file. @@ -285,6 +214,23 @@ fn filterRelocs( return relocs[start..end]; } +pub fn scanInputSections(self: Object, macho_file: *MachO) !void { + for (self.sections.items) |sect| { + const match = (try macho_file.getOutputSection(sect)) orelse { + log.debug(" unhandled section", .{}); + continue; + }; + const output = macho_file.sections.items(.header)[match]; + log.debug("mapping '{s},{s}' into output sect({d}, '{s},{s}')", .{ + sect.segName(), + sect.sectName(), + match + 1, + output.segName(), + output.sectName(), + }); + } +} + /// Splits object into atoms assuming one-shot linking mode. pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) !void { assert(macho_file.mode == .one_shot); @@ -293,7 +239,6 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) defer tracy.end(); const gpa = macho_file.base.allocator; - const seg = self.load_commands.items[self.segment_cmd_index.?].segment; log.debug("splitting object({d}, {s}) into atoms: one-shot mode", .{ object_id, self.name }); @@ -302,13 +247,12 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) // the GO compiler does not necessarily respect that therefore we sort immediately by type // and address within. const context = Context{ - .symtab = self.getSourceSymtab(), - .strtab = self.strtab, + .object = self, }; - var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, context.symtab.len); + var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, self.in_symtab.len); defer sorted_all_syms.deinit(); - for (context.symtab) |_, index| { + for (self.in_symtab) |_, index| { sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) }); } @@ -320,53 +264,48 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we // have to infer the start of undef section in the symtab ourselves. - const iundefsym = if (self.dysymtab_cmd_index) |cmd_index| blk: { - const dysymtab = self.load_commands.items[cmd_index].dysymtab; + const iundefsym = blk: { + const dysymtab = self.parseDysymtab() orelse { + var iundefsym: usize = sorted_all_syms.items.len; + while (iundefsym > 0) : (iundefsym -= 1) { + const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context); + if (sym.sect()) break; + } + break :blk iundefsym; + }; break :blk dysymtab.iundefsym; - } else blk: { - var iundefsym: usize = sorted_all_syms.items.len; - while (iundefsym > 0) : (iundefsym -= 1) { - const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context); - if (sym.sect()) break; - } - break :blk iundefsym; }; // We only care about defined symbols, so filter every other out. const sorted_syms = sorted_all_syms.items[0..iundefsym]; const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - for (seg.sections.items) |sect, id| { + for (self.sections.items) |sect, id| { const sect_id = @intCast(u8, id); log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); // Get matching segment/section in the final artifact. - const match = (try macho_file.getMatchingSection(sect)) orelse { + const match = (try macho_file.getOutputSection(sect)) orelse { log.debug(" unhandled section", .{}); continue; }; log.debug(" output sect({d}, '{s},{s}')", .{ - macho_file.getSectionOrdinal(match), - macho_file.getSection(match).segName(), - macho_file.getSection(match).sectName(), + match + 1, + macho_file.sections.items(.header)[match].segName(), + macho_file.sections.items(.header)[match].sectName(), }); const cpu_arch = macho_file.base.options.target.cpu.arch; - const is_zerofill = blk: { - const section_type = sect.type_(); - break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; - }; // Read section's code - const code: ?[]const u8 = if (!is_zerofill) try self.getSectionContents(sect_id) else null; + const code: ?[]const u8 = if (!sect.isZerofill()) try self.getSectionContents(sect) else null; // Read section's list of relocations - const raw_relocs = self.contents[sect.reloff..][0 .. sect.nreloc * @sizeOf(macho.relocation_info)]; - const relocs = mem.bytesAsSlice( - macho.relocation_info, - @alignCast(@alignOf(macho.relocation_info), raw_relocs), - ); + const relocs = @ptrCast( + [*]const macho.relocation_info, + @alignCast(@alignOf(macho.relocation_info), &self.contents[sect.reloff]), + )[0..sect.nreloc]; // Symbols within this section only. const filtered_syms = filterSymbolsByAddress( @@ -387,7 +326,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -476,7 +415,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = addr, }); @@ -501,7 +440,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -535,21 +474,21 @@ fn createAtomFromSubsection( code: ?[]const u8, relocs: []const macho.relocation_info, indexes: []const SymbolAtIndex, - match: MatchingSection, + match: u8, sect: macho.section_64, ) !*Atom { const gpa = macho_file.base.allocator; const sym = self.symtab.items[sym_index]; const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); atom.file = object_id; - self.symtab.items[sym_index].n_sect = macho_file.getSectionOrdinal(match); + self.symtab.items[sym_index].n_sect = match + 1; log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ sym_index, self.getString(sym.n_strx), - macho_file.getSectionOrdinal(match), - macho_file.getSection(match).segName(), - macho_file.getSection(match).sectName(), + match + 1, + macho_file.sections.items(.header)[match].segName(), + macho_file.sections.items(.header)[match].sectName(), object_id, }); @@ -577,7 +516,7 @@ fn createAtomFromSubsection( try atom.contained.ensureTotalCapacity(gpa, indexes.len); for (indexes) |inner_sym_index| { const inner_sym = &self.symtab.items[inner_sym_index.index]; - inner_sym.n_sect = macho_file.getSectionOrdinal(match); + inner_sym.n_sect = match + 1; atom.contained.appendAssumeCapacity(.{ .sym_index = inner_sym_index.index, .offset = inner_sym.n_value - sym.n_value, @@ -589,48 +528,84 @@ fn createAtomFromSubsection( return atom; } -fn parseSymtab(self: *Object, allocator: Allocator) !void { - const index = self.symtab_cmd_index orelse return; - const symtab = self.load_commands.items[index].symtab; - try self.symtab.appendSlice(allocator, self.getSourceSymtab()); - self.strtab = self.contents[symtab.stroff..][0..symtab.strsize]; +pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { + if (index >= self.in_symtab.len) return null; + return self.in_symtab[index]; } -pub fn getSourceSymtab(self: Object) []const macho.nlist_64 { - const index = self.symtab_cmd_index orelse return &[0]macho.nlist_64{}; - const symtab = self.load_commands.items[index].symtab; - const symtab_size = @sizeOf(macho.nlist_64) * symtab.nsyms; - const raw_symtab = self.contents[symtab.symoff..][0..symtab_size]; - return mem.bytesAsSlice( - macho.nlist_64, - @alignCast(@alignOf(macho.nlist_64), raw_symtab), - ); +pub fn getSourceSection(self: Object, index: u16) macho.section_64 { + assert(index < self.sections.items.len); + return self.sections.items[index]; } -pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { - const symtab = self.getSourceSymtab(); - if (index >= symtab.len) return null; - return symtab[index]; +pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .DATA_IN_CODE => { + const dice = cmd.cast(macho.linkedit_data_command).?; + const ndice = @divExact(dice.datasize, @sizeOf(macho.data_in_code_entry)); + return @ptrCast( + [*]const macho.data_in_code_entry, + @alignCast(@alignOf(macho.data_in_code_entry), &self.contents[dice.dataoff]), + )[0..ndice]; + }, + else => {}, + } + } else return null; } -pub fn getSourceSection(self: Object, index: u16) macho.section_64 { - const seg = self.load_commands.items[self.segment_cmd_index.?].segment; - assert(index < seg.sections.items.len); - return seg.sections.items[index]; +fn parseDysymtab(self: Object) ?macho.dysymtab_command { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .DYSYMTAB => { + return cmd.cast(macho.dysymtab_command).?; + }, + else => {}, + } + } else return null; } -pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { - const index = self.data_in_code_cmd_index orelse return null; - const data_in_code = self.load_commands.items[index].linkedit_data; - const raw_dice = self.contents[data_in_code.dataoff..][0..data_in_code.datasize]; - return mem.bytesAsSlice( - macho.data_in_code_entry, - @alignCast(@alignOf(macho.data_in_code_entry), raw_dice), - ); +pub fn parseDwarfInfo(self: Object) error{Overflow}!dwarf.DwarfInfo { + var di = dwarf.DwarfInfo{ + .endian = .Little, + .debug_info = &[0]u8{}, + .debug_abbrev = &[0]u8{}, + .debug_str = &[0]u8{}, + .debug_line = &[0]u8{}, + .debug_line_str = &[0]u8{}, + .debug_ranges = &[0]u8{}, + }; + for (self.sections.items) |sect| { + const segname = sect.segName(); + const sectname = sect.sectName(); + if (mem.eql(u8, segname, "__DWARF")) { + if (mem.eql(u8, sectname, "__debug_info")) { + di.debug_info = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_abbrev")) { + di.debug_abbrev = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_str")) { + di.debug_str = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_line")) { + di.debug_line = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_line_str")) { + di.debug_line_str = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_ranges")) { + di.debug_ranges = try self.getSectionContents(sect); + } + } + } + return di; } -pub fn getSectionContents(self: Object, index: u16) error{Overflow}![]const u8 { - const sect = self.getSourceSection(index); +pub fn getSectionContents(self: Object, sect: macho.section_64) error{Overflow}![]const u8 { const size = math.cast(usize, sect.size) orelse return error.Overflow; log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{ sect.segName(), @@ -642,8 +617,8 @@ pub fn getSectionContents(self: Object, index: u16) error{Overflow}![]const u8 { } pub fn getString(self: Object, off: u32) []const u8 { - assert(off < self.strtab.len); - return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.ptr + off), 0); + assert(off < self.in_strtab.len); + return mem.sliceTo(@ptrCast([*:0]const u8, self.in_strtab.ptr + off), 0); } pub fn getAtomForSymbol(self: Object, sym_index: u32) ?*Atom { diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 909a0450d6..eb2be6e5fe 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -8,7 +8,6 @@ const mem = std.mem; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); -const MatchingSection = MachO.MatchingSection; pub fn gcAtoms(macho_file: *MachO) !void { const gpa = macho_file.base.allocator; @@ -25,12 +24,12 @@ pub fn gcAtoms(macho_file: *MachO) !void { try prune(arena, alive, macho_file); } -fn removeAtomFromSection(atom: *Atom, match: MatchingSection, macho_file: *MachO) void { - const sect = macho_file.getSectionPtr(match); +fn removeAtomFromSection(atom: *Atom, match: u8, macho_file: *MachO) void { + var section = macho_file.sections.get(match); // If we want to enable GC for incremental codepath, we need to take into // account any padding that might have been left here. - sect.size -= atom.size; + section.header.size -= atom.size; if (atom.prev) |prev| { prev.next = atom.next; @@ -38,15 +37,16 @@ fn removeAtomFromSection(atom: *Atom, match: MatchingSection, macho_file: *MachO if (atom.next) |next| { next.prev = atom.prev; } else { - const last = macho_file.atoms.getPtr(match).?; if (atom.prev) |prev| { - last.* = prev; + section.last_atom = prev; } else { // The section will be GCed in the next step. - last.* = undefined; - sect.size = 0; + section.last_atom = null; + section.header.size = 0; } } + + macho_file.sections.set(match, section); } fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { @@ -93,7 +93,7 @@ fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void const is_gc_root = blk: { if (source_sect.isDontDeadStrip()) break :blk true; if (mem.eql(u8, "__StaticInit", source_sect.sectName())) break :blk true; - switch (source_sect.type_()) { + switch (source_sect.@"type"()) { macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, => break :blk true, @@ -173,19 +173,19 @@ fn mark( fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { // Any section that ends up here will be updated, that is, // its size and alignment recalculated. - var gc_sections = std.AutoHashMap(MatchingSection, void).init(arena); + var gc_sections = std.AutoHashMap(u8, void).init(arena); var loop: bool = true; while (loop) { loop = false; for (macho_file.objects.items) |object| { - for (object.getSourceSymtab()) |_, source_index| { + for (object.in_symtab) |_, source_index| { const atom = object.getAtomForSymbol(@intCast(u32, source_index)) orelse continue; if (alive.contains(atom)) continue; const global = atom.getSymbolWithLoc(); const sym = atom.getSymbolPtr(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; if (sym.n_desc == MachO.N_DESC_GCED) continue; if (!sym.ext() and !refersDead(atom, macho_file)) continue; @@ -232,7 +232,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.got_entries_table.remove(entry.target); @@ -244,7 +244,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.stubs_table.remove(entry.target); @@ -256,7 +256,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.tlv_ptr_entries_table.remove(entry.target); @@ -265,13 +265,13 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac var gc_sections_it = gc_sections.iterator(); while (gc_sections_it.next()) |entry| { const match = entry.key_ptr.*; - const sect = macho_file.getSectionPtr(match); - if (sect.size == 0) continue; // Pruning happens automatically in next step. + var section = macho_file.sections.get(match); + if (section.header.size == 0) continue; // Pruning happens automatically in next step. - sect.@"align" = 0; - sect.size = 0; + section.header.@"align" = 0; + section.header.size = 0; - var atom = macho_file.atoms.get(match).?; + var atom = section.last_atom.?; while (atom.prev) |prev| { atom = prev; @@ -279,14 +279,16 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac while (true) { const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); - const padding = aligned_end_addr - sect.size; - sect.size += padding + atom.size; - sect.@"align" = @maximum(sect.@"align", atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, section.header.size, atom_alignment); + const padding = aligned_end_addr - section.header.size; + section.header.size += padding + atom.size; + section.header.@"align" = @maximum(section.header.@"align", atom.alignment); if (atom.next) |next| { atom = next; } else break; } + + macho_file.sections.set(match, section); } } diff --git a/src/link/MachO/fat.zig b/src/link/MachO/fat.zig index 1511f274a8..7c328c1418 100644 --- a/src/link/MachO/fat.zig +++ b/src/link/MachO/fat.zig @@ -46,7 +46,9 @@ pub fn getLibraryOffset(reader: anytype, cpu_arch: std.Target.Cpu.Arch) !u64 { return fat_arch.offset; } } else { - log.err("Could not find matching cpu architecture in fat library: expected {}", .{cpu_arch}); + log.err("Could not find matching cpu architecture in fat library: expected {s}", .{ + @tagName(cpu_arch), + }); return error.MismatchedCpuArchitecture; } } |
