From 5ccee4c986aa9ed73d3deab3145f43689aa58ee4 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Tue, 28 Jul 2020 17:27:44 -0700
Subject: stage2: more progress towards mutable local variables

 * implement sema for runtime deref, store pointer, coerce_to_ptr_elem,
   and store
 * identifiers support being lvalues, except for decls is still TODO
 * codegen supports load, store, ref, alloc
 * introduce more MCValue union tags to support pointers
 * add load, ref, store typed IR instructions
 * add Type.isVolatilePtr
---
 src-self-hosted/codegen.zig | 289 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 253 insertions(+), 36 deletions(-)

(limited to 'src-self-hosted/codegen.zig')

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index 75b042308d..777c9ee5f2 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -209,6 +209,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         err_msg: ?*ErrorMsg,
         args: []MCValue,
         ret_mcv: MCValue,
+        fn_type: Type,
         arg_index: usize,
         src: usize,
         stack_align: u32,
@@ -230,15 +231,23 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             /// No more references to this value remain.
             dead,
             /// A pointer-sized integer that fits in a register.
+            /// If the type is a pointer, this is the pointer address in virtual address space.
             immediate: u64,
             /// The constant was emitted into the code, at this offset.
+            /// If the type is a pointer, it means the pointer address is embedded in the code.
             embedded_in_code: usize,
+            /// The value is a pointer to a constant which was emitted into the code, at this offset.
+            ptr_embedded_in_code: usize,
             /// The value is in a target-specific register.
             register: Register,
             /// The value is in memory at a hard-coded address.
+            /// If the type is a pointer, it means the pointer address is at this memory location.
             memory: u64,
             /// The value is one of the stack variables.
-            stack_offset: u64,
+            /// If the type is a pointer, it means the pointer address is in the stack at this offset.
+            stack_offset: u32,
+            /// The value is a pointer to one of the stack variables (payload is stack offset).
+            ptr_stack_offset: u32,
             /// The value is in the compare flags assuming an unsigned operation,
             /// with this operator applied on top of it.
             compare_flags_unsigned: math.CompareOperator,
@@ -271,6 +280,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     .memory,
                     .compare_flags_unsigned,
                     .compare_flags_signed,
+                    .ptr_stack_offset,
+                    .ptr_embedded_in_code,
                     => false,
 
                     .register,
@@ -356,6 +367,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .err_msg = null,
                 .args = undefined, // populated after `resolveCallingConventionValues`
                 .ret_mcv = undefined, // populated after `resolveCallingConventionValues`
+                .fn_type = fn_type,
                 .arg_index = 0,
                 .branch_stack = &branch_stack,
                 .src = src,
@@ -459,26 +471,23 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .cmp_neq => return self.genCmp(inst.castTag(.cmp_neq).?, .neq),
                 .condbr => return self.genCondBr(inst.castTag(.condbr).?),
                 .constant => unreachable, // excluded from function bodies
+                .floatcast => return self.genFloatCast(inst.castTag(.floatcast).?),
+                .intcast => return self.genIntCast(inst.castTag(.intcast).?),
                 .isnonnull => return self.genIsNonNull(inst.castTag(.isnonnull).?),
                 .isnull => return self.genIsNull(inst.castTag(.isnull).?),
+                .load => return self.genLoad(inst.castTag(.load).?),
+                .not => return self.genNot(inst.castTag(.not).?),
                 .ptrtoint => return self.genPtrToInt(inst.castTag(.ptrtoint).?),
+                .ref => return self.genRef(inst.castTag(.ref).?),
                 .ret => return self.genRet(inst.castTag(.ret).?),
                 .retvoid => return self.genRetVoid(inst.castTag(.retvoid).?),
+                .store => return self.genStore(inst.castTag(.store).?),
                 .sub => return self.genSub(inst.castTag(.sub).?),
                 .unreach => return MCValue{ .unreach = {} },
-                .not => return self.genNot(inst.castTag(.not).?),
-                .floatcast => return self.genFloatCast(inst.castTag(.floatcast).?),
-                .intcast => return self.genIntCast(inst.castTag(.intcast).?),
             }
         }
 
-        fn genAlloc(self: *Self, inst: *ir.Inst.NoOp) !MCValue {
-            const elem_ty = inst.base.ty.elemType();
-            const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch {
-                return self.fail(inst.base.src, "type '{}' too big to fit into stack frame", .{elem_ty});
-            };
-            // TODO swap this for inst.base.ty.ptrAlign
-            const abi_align = elem_ty.abiAlignment(self.target.*);
+        fn allocMem(self: *Self, inst: *ir.Inst, abi_size: u32, abi_align: u32) !u32 {
             if (abi_align > self.stack_align)
                 self.stack_align = abi_align;
             const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
@@ -488,10 +497,66 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             if (branch.next_stack_offset > branch.max_end_stack)
                 branch.max_end_stack = branch.next_stack_offset;
             try branch.stack.putNoClobber(self.gpa, offset, .{
-                .inst = &inst.base,
+                .inst = inst,
                 .size = abi_size,
             });
-            return MCValue{ .stack_offset = offset };
+            return offset;
+        }
+
+        /// Use a pointer instruction as the basis for allocating stack memory.
+        fn allocMemPtr(self: *Self, inst: *ir.Inst) !u32 {
+            const elem_ty = inst.ty.elemType();
+            const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch {
+                return self.fail(inst.src, "type '{}' too big to fit into stack frame", .{elem_ty});
+            };
+            // TODO swap this for inst.ty.ptrAlign
+            const abi_align = elem_ty.abiAlignment(self.target.*);
+            return self.allocMem(inst, abi_size, abi_align);
+        }
+
+        fn allocRegOrMem(self: *Self, inst: *ir.Inst) !MCValue {
+            const elem_ty = inst.ty;
+            const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch {
+                return self.fail(inst.src, "type '{}' too big to fit into stack frame", .{elem_ty});
+            };
+            const abi_align = elem_ty.abiAlignment(self.target.*);
+            if (abi_align > self.stack_align)
+                self.stack_align = abi_align;
+            const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+
+            // TODO Make sure the type can fit in a register before we try to allocate one.
+            const free_index = @ctz(FreeRegInt, branch.free_registers);
+            if (free_index >= callee_preserved_regs.len) {
+                const stack_offset = try self.allocMem(inst, abi_size, abi_align);
+                return MCValue{ .stack_offset = stack_offset };
+            }
+            branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
+            const reg = callee_preserved_regs[free_index];
+            try branch.registers.putNoClobber(self.gpa, reg, .{ .inst = inst });
+            return MCValue{ .register = reg };
+        }
+
+        /// Does not "move" the instruction.
+        fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue {
+            const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+            try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
+            try branch.inst_table.ensureCapacity(self.gpa, branch.inst_table.items().len + 1);
+
+            const free_index = @ctz(FreeRegInt, branch.free_registers);
+            if (free_index >= callee_preserved_regs.len)
+                return self.fail(inst.src, "TODO implement spilling register to stack", .{});
+            branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
+            const reg = callee_preserved_regs[free_index];
+            branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst });
+            const old_mcv = branch.inst_table.get(inst).?;
+            const new_mcv: MCValue = .{ .register = reg };
+            try self.genSetReg(inst.src, reg, old_mcv);
+            return new_mcv;
+        }
+
+        fn genAlloc(self: *Self, inst: *ir.Inst.NoOp) !MCValue {
+            const stack_offset = try self.allocMemPtr(&inst.base);
+            return MCValue{ .ptr_stack_offset = stack_offset };
         }
 
         fn genFloatCast(self: *Self, inst: *ir.Inst.UnOp) !MCValue {
@@ -572,6 +637,85 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             }
         }
 
+        fn genLoad(self: *Self, inst: *ir.Inst.UnOp) !MCValue {
+            const elem_ty = inst.base.ty;
+            if (!elem_ty.hasCodeGenBits())
+                return MCValue.none;
+            const ptr = try self.resolveInst(inst.operand);
+            const is_volatile = inst.operand.ty.isVolatilePtr();
+            if (inst.base.isUnused() and !is_volatile)
+                return MCValue.dead;
+            const dst_mcv: MCValue = blk: {
+                if (inst.base.operandDies(0) and ptr.isMutable()) {
+                    // The MCValue that holds the pointer can be re-used as the value.
+                    // TODO track this in the register/stack allocation metadata.
+                    break :blk ptr;
+                } else {
+                    break :blk try self.allocRegOrMem(&inst.base);
+                }
+            };
+            switch (ptr) {
+                .none => unreachable,
+                .unreach => unreachable,
+                .dead => unreachable,
+                .compare_flags_unsigned => unreachable,
+                .compare_flags_signed => unreachable,
+                .immediate => |imm| try self.setRegOrMem(inst.base.src, elem_ty, dst_mcv, .{ .memory = imm }),
+                .ptr_stack_offset => |off| try self.setRegOrMem(inst.base.src, elem_ty, dst_mcv, .{ .stack_offset = off }),
+                .ptr_embedded_in_code => |off| {
+                    try self.setRegOrMem(inst.base.src, elem_ty, dst_mcv, .{ .embedded_in_code = off });
+                },
+                .embedded_in_code => {
+                    return self.fail(inst.base.src, "TODO implement loading from MCValue.embedded_in_code", .{});
+                },
+                .register => {
+                    return self.fail(inst.base.src, "TODO implement loading from MCValue.register", .{});
+                },
+                .memory => {
+                    return self.fail(inst.base.src, "TODO implement loading from MCValue.memory", .{});
+                },
+                .stack_offset => {
+                    return self.fail(inst.base.src, "TODO implement loading from MCValue.stack_offset", .{});
+                },
+            }
+            return dst_mcv;
+        }
+
+        fn genStore(self: *Self, inst: *ir.Inst.BinOp) !MCValue {
+            const ptr = try self.resolveInst(inst.lhs);
+            const value = try self.resolveInst(inst.rhs);
+            const elem_ty = inst.rhs.ty;
+            switch (ptr) {
+                .none => unreachable,
+                .unreach => unreachable,
+                .dead => unreachable,
+                .compare_flags_unsigned => unreachable,
+                .compare_flags_signed => unreachable,
+                .immediate => |imm| {
+                    try self.setRegOrMem(inst.base.src, elem_ty, .{ .memory = imm }, value);
+                },
+                .ptr_stack_offset => |off| {
+                    try self.genSetStack(inst.base.src, elem_ty, off, value);
+                },
+                .ptr_embedded_in_code => |off| {
+                    try self.setRegOrMem(inst.base.src, elem_ty, .{ .embedded_in_code = off }, value);
+                },
+                .embedded_in_code => {
+                    return self.fail(inst.base.src, "TODO implement storing to MCValue.embedded_in_code", .{});
+                },
+                .register => {
+                    return self.fail(inst.base.src, "TODO implement storing to MCValue.register", .{});
+                },
+                .memory => {
+                    return self.fail(inst.base.src, "TODO implement storing to MCValue.memory", .{});
+                },
+                .stack_offset => {
+                    return self.fail(inst.base.src, "TODO implement storing to MCValue.stack_offset", .{});
+                },
+            }
+            return .none;
+        }
+
         fn genSub(self: *Self, inst: *ir.Inst.BinOp) !MCValue {
             // No side effects, so if it's unreferenced, do nothing.
             if (inst.base.isUnused())
@@ -657,10 +801,14 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .dead, .unreach, .immediate => unreachable,
                 .compare_flags_unsigned => unreachable,
                 .compare_flags_signed => unreachable,
+                .ptr_stack_offset => unreachable,
+                .ptr_embedded_in_code => unreachable,
                 .register => |dst_reg| {
                     switch (src_mcv) {
                         .none => unreachable,
                         .dead, .unreach => unreachable,
+                        .ptr_stack_offset => unreachable,
+                        .ptr_embedded_in_code => unreachable,
                         .register => |src_reg| {
                             self.rex(.{ .b = dst_reg.isExtended(), .r = src_reg.isExtended(), .w = dst_reg.size() == 64 });
                             self.code.appendSliceAssumeCapacity(&[_]u8{ mr + 0x1, 0xC0 | (@as(u8, src_reg.id() & 0b111) << 3) | @as(u8, dst_reg.id() & 0b111) });
@@ -743,6 +891,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     for (info.args) |mc_arg, arg_i| {
                         const arg = inst.args[arg_i];
                         const arg_mcv = try self.resolveInst(inst.args[arg_i]);
+                        // Here we do not use setRegOrMem even though the logic is similar, because
+                        // the function call will move the stack pointer, so the offsets are different.
                         switch (mc_arg) {
                             .none => continue,
                             .register => |reg| {
@@ -754,6 +904,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                                 // mov     qword ptr [rsp + stack_offset], x
                                 return self.fail(inst.base.src, "TODO implement calling with parameters in memory", .{});
                             },
+                            .ptr_stack_offset => {
+                                return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_stack_offset", .{});
+                            },
+                            .ptr_embedded_in_code => {
+                                return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_embedded_in_code", .{});
+                            },
                             .immediate => unreachable,
                             .unreach => unreachable,
                             .dead => unreachable,
@@ -788,8 +944,34 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             return info.return_value;
         }
 
+        fn genRef(self: *Self, inst: *ir.Inst.UnOp) !MCValue {
+            const operand = try self.resolveInst(inst.operand);
+            switch (operand) {
+                .unreach => unreachable,
+                .dead => unreachable,
+                .none => return .none,
+
+                .immediate,
+                .register,
+                .ptr_stack_offset,
+                .ptr_embedded_in_code,
+                .compare_flags_unsigned,
+                .compare_flags_signed,
+                => {
+                    const stack_offset = try self.allocMemPtr(&inst.base);
+                    try self.genSetStack(inst.base.src, inst.operand.ty, stack_offset, operand);
+                    return MCValue{ .ptr_stack_offset = stack_offset };
+                },
+
+                .stack_offset => |offset| return MCValue{ .ptr_stack_offset = offset },
+                .embedded_in_code => |offset| return MCValue{ .ptr_embedded_in_code = offset },
+                .memory => |vaddr| return MCValue{ .immediate = vaddr },
+            }
+        }
+
         fn ret(self: *Self, src: usize, mcv: MCValue) !MCValue {
-            try self.setRegOrStack(src, self.ret_mcv, mcv);
+            const ret_ty = self.fn_type.fnReturnType();
+            try self.setRegOrMem(src, ret_ty, self.ret_mcv, mcv);
             switch (arch) {
                 .i386 => {
                     try self.code.append(0xc3); // ret
@@ -1042,21 +1224,74 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         }
 
         /// Sets the value without any modifications to register allocation metadata or stack allocation metadata.
-        fn setRegOrStack(self: *Self, src: usize, loc: MCValue, val: MCValue) !void {
+        fn setRegOrMem(self: *Self, src: usize, ty: Type, loc: MCValue, val: MCValue) !void {
             switch (loc) {
                 .none => return,
                 .register => |reg| return self.genSetReg(src, reg, val),
-                .stack_offset => {
-                    return self.fail(src, "TODO implement setRegOrStack for stack offset", .{});
+                .stack_offset => |off| return self.genSetStack(src, ty, off, val),
+                .memory => {
+                    return self.fail(src, "TODO implement setRegOrMem for memory", .{});
                 },
                 else => unreachable,
             }
         }
 
-        fn genSetReg(self: *Self, src: usize, reg: Register, mcv: MCValue) error{ CodegenFail, OutOfMemory }!void {
+        fn genSetStack(self: *Self, src: usize, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void {
+            switch (arch) {
+                .x86_64 => switch (mcv) {
+                    .dead => unreachable,
+                    .ptr_stack_offset => unreachable,
+                    .ptr_embedded_in_code => unreachable,
+                    .unreach, .none => return, // Nothing to do.
+                    .compare_flags_unsigned => |op| {
+                        return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{});
+                    },
+                    .compare_flags_signed => |op| {
+                        return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{});
+                    },
+                    .immediate => |x_big| {
+                        try self.code.ensureCapacity(self.code.items.len + 7);
+                        if (x_big <= math.maxInt(u32)) {
+                            const x = @intCast(u32, x_big);
+                            if (stack_offset > 128) {
+                                return self.fail(src, "TODO implement set stack variable with large stack offset", .{});
+                            }
+                            // We have a positive stack offset value but we want a twos complement negative
+                            // offset from rbp, which is at the top of the stack frame.
+                            const negative_offset = @intCast(i8, -@intCast(i32, stack_offset));
+                            const twos_comp = @bitCast(u8, negative_offset);
+                            // mov    DWORD PTR [rbp+offset], immediate
+                            self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp });
+                            mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
+                        } else {
+                            return self.fail(src, "TODO implement set stack variable with large immediate", .{});
+                        }
+                    },
+                    .embedded_in_code => |code_offset| {
+                        return self.fail(src, "TODO implement set stack variable from embedded_in_code", .{});
+                    },
+                    .register => |reg| {
+                        return self.fail(src, "TODO implement set stack variable from register", .{});
+                    },
+                    .memory => |vaddr| {
+                        return self.fail(src, "TODO implement set stack variable from memory vaddr", .{});
+                    },
+                    .stack_offset => |off| {
+                        if (stack_offset == off)
+                            return; // Copy stack variable to itself; nothing to do.
+                        return self.fail(src, "TODO implement copy stack variable to stack variable", .{});
+                    },
+                },
+                else => return self.fail(src, "TODO implement getSetStack for {}", .{self.target.cpu.arch}),
+            }
+        }
+
+        fn genSetReg(self: *Self, src: usize, reg: Register, mcv: MCValue) InnerError!void {
             switch (arch) {
                 .x86_64 => switch (mcv) {
                     .dead => unreachable,
+                    .ptr_stack_offset => unreachable,
+                    .ptr_embedded_in_code => unreachable,
                     .unreach, .none => return, // Nothing to do.
                     .compare_flags_unsigned => |op| {
                         try self.code.ensureCapacity(self.code.items.len + 3);
@@ -1279,24 +1514,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             }
         }
 
-        /// Does not "move" the instruction.
-        fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue {
-            const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
-            try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
-            try branch.inst_table.ensureCapacity(self.gpa, branch.inst_table.items().len + 1);
-
-            const free_index = @ctz(FreeRegInt, branch.free_registers);
-            if (free_index >= callee_preserved_regs.len)
-                return self.fail(inst.src, "TODO implement spilling register to stack", .{});
-            branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
-            const reg = callee_preserved_regs[free_index];
-            branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst });
-            const old_mcv = branch.inst_table.get(inst).?;
-            const new_mcv: MCValue = .{ .register = reg };
-            try self.genSetReg(inst.src, reg, old_mcv);
-            return new_mcv;
-        }
-
         /// If the MCValue is an immediate, and it does not fit within this type,
         /// we put it in a register.
         /// A potential opportunity for future optimization here would be keeping track
-- 
cgit v1.2.3


From cb3e8e323dc6846e16e868fcfb4d4bf135f56f48 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Tue, 28 Jul 2020 19:11:23 -0700
Subject: stage2: x86_64 codegen for movs to/from stack variables

---
 src-self-hosted/codegen.zig | 43 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

(limited to 'src-self-hosted/codegen.zig')

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index 777c9ee5f2..51a59596c4 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -1271,7 +1271,25 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         return self.fail(src, "TODO implement set stack variable from embedded_in_code", .{});
                     },
                     .register => |reg| {
-                        return self.fail(src, "TODO implement set stack variable from register", .{});
+                        try self.code.ensureCapacity(self.code.items.len + 7);
+                        self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
+                        const reg_id: u8 = @truncate(u3, reg.id());
+                        if (stack_offset <= 128) {
+                            // example: 48 89 55 7f           mov    QWORD PTR [rbp+0x7f],rdx
+                            const RM = @as(u8, 0b01_101_000) | reg_id;
+                            const negative_offset = @intCast(i8, -@intCast(i32, stack_offset));
+                            const twos_comp = @bitCast(u8, negative_offset);
+                            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM, twos_comp });
+                        } else if (stack_offset <= 2147483648) {
+                            // example: 48 89 95 80 00 00 00  mov    QWORD PTR [rbp+0x80],rdx
+                            const RM = @as(u8, 0b10_101_000) | reg_id;
+                            const negative_offset = @intCast(i32, -@intCast(i33, stack_offset));
+                            const twos_comp = @bitCast(u32, negative_offset);
+                            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM });
+                            mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp);
+                        } else {
+                            return self.fail(src, "stack offset too large", .{});
+                        }
                     },
                     .memory => |vaddr| {
                         return self.fail(src, "TODO implement set stack variable from memory vaddr", .{});
@@ -1475,7 +1493,28 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         }
                     },
                     .stack_offset => |off| {
-                        return self.fail(src, "TODO implement genSetReg for stack variables", .{});
+                        if (reg.size() != 64) {
+                            return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
+                        }
+                        try self.code.ensureCapacity(self.code.items.len + 7);
+                        self.rex(.{ .w = true, .r = reg.isExtended() });
+                        const reg_id: u8 = @truncate(u3, reg.id());
+                        if (off <= 128) {
+                            // Example: 48 8b 4d 7f           mov    rcx,QWORD PTR [rbp+0x7f]
+                            const RM = @as(u8, 0b01_000_101) | (reg_id << 3);
+                            const negative_offset = @intCast(i8, -@intCast(i32, off));
+                            const twos_comp = @bitCast(u8, negative_offset);
+                            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8b, RM, twos_comp });
+                        } else if (off <= 2147483648) {
+                            // Example: 48 8b 8d 80 00 00 00  mov    rcx,QWORD PTR [rbp+0x80]
+                            const RM = @as(u8, 0b10_000_101) | (reg_id << 3);
+                            const negative_offset = @intCast(i32, -@intCast(i33, off));
+                            const twos_comp = @bitCast(u32, negative_offset);
+                            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8b, RM });
+                            mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp);
+                        } else {
+                            return self.fail(src, "stack offset too large", .{});
+                        }
                     },
                 },
                 else => return self.fail(src, "TODO implement getSetReg for {}", .{self.target.cpu.arch}),
-- 
cgit v1.2.3


From 4beff80b2f30ad85f2127b1281053b8b25b0cc33 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Tue, 28 Jul 2020 21:57:13 -0700
Subject: stage2: codegen handles undefined values

 * `optimize_mode` is passed to `link.File` and stored there
 * improve the debugging function `Module.dumpInst`
 * get rid of `Value.the_one_possible_value` in favor of a few more
   specific values for different types. This is less buggy, one less
   footgun.
 * `Type.onePossibleValue` now returns a `?Value` instead of `bool`.
 * codegen handles undefined values. `undef` is a new `MCValue` tag.
   It uses 0xaa values depending on optimization mode. However
   optimization mode does not yet support scope overrides.
 * link.zig: move the `Options` field from `File.Elf` and `File.C` to
   the base struct.
   - fix the Tag enum to adhere to style conventions
 * ZIR now supports emitting undefined values.
 * Fix the logic of comptime math to properly compare against zero using
   the `compareWithZero` function.
---
 src-self-hosted/Module.zig   |  62 +++++++++++++++-------
 src-self-hosted/codegen.zig  |  58 ++++++++++++++++++---
 src-self-hosted/ir.zig       |   3 +-
 src-self-hosted/link.zig     | 120 +++++++++++++++++++++++--------------------
 src-self-hosted/type.zig     |  33 ++++++++----
 src-self-hosted/value.zig    |  72 ++++++++++++++++++--------
 src-self-hosted/zir.zig      |  17 +++++-
 src-self-hosted/zir_sema.zig |   3 +-
 8 files changed, 248 insertions(+), 120 deletions(-)

(limited to 'src-self-hosted/codegen.zig')

diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig
index 426b28488f..864cd66d32 100644
--- a/src-self-hosted/Module.zig
+++ b/src-self-hosted/Module.zig
@@ -47,7 +47,6 @@ export_owners: std.AutoHashMapUnmanaged(*Decl, []*Export) = .{},
 /// Maps fully qualified namespaced names to the Decl struct for them.
 decl_table: std.HashMapUnmanaged(Scope.NameHash, *Decl, Scope.name_hash_hash, Scope.name_hash_eql, false) = .{},
 
-optimize_mode: std.builtin.Mode,
 link_error_flags: link.File.ErrorFlags = .{},
 
 work_queue: std.fifo.LinearFifo(WorkItem, .Dynamic),
@@ -385,18 +384,6 @@ pub const Scope = struct {
         };
     }
 
-    pub fn dumpInst(self: *Scope, inst: *Inst) void {
-        const zir_module = self.namespace();
-        const loc = std.zig.findLineColumn(zir_module.source.bytes, inst.src);
-        std.debug.warn("{}:{}:{}: {}: ty={}\n", .{
-            zir_module.sub_file_path,
-            loc.line + 1,
-            loc.column + 1,
-            @tagName(inst.tag),
-            inst.ty,
-        });
-    }
-
     /// Asserts the scope has a parent which is a ZIRModule or File and
     /// returns the sub_file_path field.
     pub fn subFilePath(base: *Scope) []const u8 {
@@ -802,6 +789,7 @@ pub fn init(gpa: *Allocator, options: InitOptions) !Module {
         .output_mode = options.output_mode,
         .link_mode = options.link_mode orelse .Static,
         .object_format = options.object_format orelse options.target.getObjectFormat(),
+        .optimize_mode = options.optimize_mode,
     });
     errdefer bin_file.destroy();
 
@@ -838,7 +826,6 @@ pub fn init(gpa: *Allocator, options: InitOptions) !Module {
         .bin_file_dir = bin_file_dir,
         .bin_file_path = options.bin_file_path,
         .bin_file = bin_file,
-        .optimize_mode = options.optimize_mode,
         .work_queue = std.fifo.LinearFifo(WorkItem, .Dynamic).init(gpa),
         .keep_source_files_loaded = options.keep_source_files_loaded,
     };
@@ -894,7 +881,11 @@ fn freeExportList(gpa: *Allocator, export_list: []*Export) void {
 }
 
 pub fn target(self: Module) std.Target {
-    return self.bin_file.options().target;
+    return self.bin_file.options.target;
+}
+
+pub fn optimizeMode(self: Module) std.builtin.Mode {
+    return self.bin_file.options.optimize_mode;
 }
 
 /// Detect changes to source files, perform semantic analysis, and update the output files.
@@ -1991,14 +1982,14 @@ pub fn constType(self: *Module, scope: *Scope, src: usize, ty: Type) !*Inst {
 pub fn constVoid(self: *Module, scope: *Scope, src: usize) !*Inst {
     return self.constInst(scope, src, .{
         .ty = Type.initTag(.void),
-        .val = Value.initTag(.the_one_possible_value),
+        .val = Value.initTag(.void_value),
     });
 }
 
 pub fn constNoReturn(self: *Module, scope: *Scope, src: usize) !*Inst {
     return self.constInst(scope, src, .{
         .ty = Type.initTag(.noreturn),
-        .val = Value.initTag(.the_one_possible_value),
+        .val = Value.initTag(.unreachable_value),
     });
 }
 
@@ -2162,7 +2153,8 @@ pub fn analyzeDeclRefByName(self: *Module, scope: *Scope, src: usize, decl_name:
 }
 
 pub fn wantSafety(self: *Module, scope: *Scope) bool {
-    return switch (self.optimize_mode) {
+    // TODO take into account scope's safety overrides
+    return switch (self.optimizeMode()) {
         .Debug => true,
         .ReleaseSafe => true,
         .ReleaseFast => false,
@@ -2511,7 +2503,7 @@ pub fn storePtr(self: *Module, scope: *Scope, src: usize, ptr: *Inst, uncasted_v
 
     const elem_ty = ptr.ty.elemType();
     const value = try self.coerce(scope, elem_ty, uncasted_value);
-    if (elem_ty.onePossibleValue())
+    if (elem_ty.onePossibleValue() != null)
         return self.constVoid(scope, src);
 
     // TODO handle comptime pointer writes
@@ -2803,3 +2795,35 @@ pub fn singleConstPtrType(self: *Module, scope: *Scope, src: usize, elem_ty: Typ
     type_payload.* = .{ .pointee_type = elem_ty };
     return Type.initPayload(&type_payload.base);
 }
+
+pub fn dumpInst(self: *Module, scope: *Scope, inst: *Inst) void {
+    const zir_module = scope.namespace();
+    const source = zir_module.getSource(self) catch @panic("dumpInst failed to get source");
+    const loc = std.zig.findLineColumn(source, inst.src);
+    if (inst.tag == .constant) {
+        std.debug.warn("constant ty={} val={} src={}:{}:{}\n", .{
+            inst.ty,
+            inst.castTag(.constant).?.val,
+            zir_module.subFilePath(),
+            loc.line + 1,
+            loc.column + 1,
+        });
+    } else if (inst.deaths == 0) {
+        std.debug.warn("{} ty={} src={}:{}:{}\n", .{
+            @tagName(inst.tag),
+            inst.ty,
+            zir_module.subFilePath(),
+            loc.line + 1,
+            loc.column + 1,
+        });
+    } else {
+        std.debug.warn("{} ty={} deaths={b} src={}:{}:{}\n", .{
+            @tagName(inst.tag),
+            inst.ty,
+            inst.deaths,
+            zir_module.subFilePath(),
+            loc.line + 1,
+            loc.column + 1,
+        });
+    }
+}
diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index 51a59596c4..40fb6c5407 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -50,7 +50,7 @@ pub fn generateSymbol(
 
     switch (typed_value.ty.zigTypeTag()) {
         .Fn => {
-            switch (bin_file.options.target.cpu.arch) {
+            switch (bin_file.base.options.target.cpu.arch) {
                 //.arm => return Function(.arm).generateSymbol(bin_file, src, typed_value, code),
                 //.armeb => return Function(.armeb).generateSymbol(bin_file, src, typed_value, code),
                 //.aarch64 => return Function(.aarch64).generateSymbol(bin_file, src, typed_value, code),
@@ -143,7 +143,7 @@ pub fn generateSymbol(
                 // TODO handle the dependency of this symbol on the decl's vaddr.
                 // If the decl changes vaddr, then this symbol needs to get regenerated.
                 const vaddr = bin_file.local_symbols.items[decl.link.local_sym_index].st_value;
-                const endian = bin_file.options.target.cpu.arch.endian();
+                const endian = bin_file.base.options.target.cpu.arch.endian();
                 switch (bin_file.ptr_width) {
                     .p32 => {
                         try code.resize(4);
@@ -166,7 +166,7 @@ pub fn generateSymbol(
             };
         },
         .Int => {
-            const info = typed_value.ty.intInfo(bin_file.options.target);
+            const info = typed_value.ty.intInfo(bin_file.base.options.target);
             if (info.bits == 8 and !info.signed) {
                 const x = typed_value.val.toUnsignedInt();
                 try code.append(@intCast(u8, x));
@@ -230,6 +230,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             unreach,
             /// No more references to this value remain.
             dead,
+            /// The value is undefined.
+            undef,
             /// A pointer-sized integer that fits in a register.
             /// If the type is a pointer, this is the pointer address in virtual address space.
             immediate: u64,
@@ -282,6 +284,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     .compare_flags_signed,
                     .ptr_stack_offset,
                     .ptr_embedded_in_code,
+                    .undef,
                     => false,
 
                     .register,
@@ -360,7 +363,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
 
             var function = Self{
                 .gpa = bin_file.allocator,
-                .target = &bin_file.options.target,
+                .target = &bin_file.base.options.target,
                 .bin_file = bin_file,
                 .mod_fn = module_fn,
                 .code = code,
@@ -656,6 +659,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             };
             switch (ptr) {
                 .none => unreachable,
+                .undef => unreachable,
                 .unreach => unreachable,
                 .dead => unreachable,
                 .compare_flags_unsigned => unreachable,
@@ -687,6 +691,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             const elem_ty = inst.rhs.ty;
             switch (ptr) {
                 .none => unreachable,
+                .undef => unreachable,
                 .unreach => unreachable,
                 .dead => unreachable,
                 .compare_flags_unsigned => unreachable,
@@ -798,6 +803,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         fn genX8664BinMathCode(self: *Self, src: usize, dst_mcv: MCValue, src_mcv: MCValue, opx: u8, mr: u8) !void {
             switch (dst_mcv) {
                 .none => unreachable,
+                .undef => unreachable,
                 .dead, .unreach, .immediate => unreachable,
                 .compare_flags_unsigned => unreachable,
                 .compare_flags_signed => unreachable,
@@ -806,6 +812,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .register => |dst_reg| {
                     switch (src_mcv) {
                         .none => unreachable,
+                        .undef => try self.genSetReg(src, dst_reg, .undef),
                         .dead, .unreach => unreachable,
                         .ptr_stack_offset => unreachable,
                         .ptr_embedded_in_code => unreachable,
@@ -905,11 +912,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                                 return self.fail(inst.base.src, "TODO implement calling with parameters in memory", .{});
                             },
                             .ptr_stack_offset => {
-                                return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_stack_offset", .{});
+                                return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_stack_offset arg", .{});
                             },
                             .ptr_embedded_in_code => {
-                                return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_embedded_in_code", .{});
+                                return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_embedded_in_code arg", .{});
                             },
+                            .undef => unreachable,
                             .immediate => unreachable,
                             .unreach => unreachable,
                             .dead => unreachable,
@@ -966,6 +974,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .stack_offset => |offset| return MCValue{ .ptr_stack_offset = offset },
                 .embedded_in_code => |offset| return MCValue{ .ptr_embedded_in_code = offset },
                 .memory => |vaddr| return MCValue{ .immediate = vaddr },
+
+                .undef => return self.fail(inst.base.src, "TODO implement ref on an undefined value", .{}),
             }
         }
 
@@ -1243,6 +1253,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     .ptr_stack_offset => unreachable,
                     .ptr_embedded_in_code => unreachable,
                     .unreach, .none => return, // Nothing to do.
+                    .undef => {
+                        if (!self.wantSafety())
+                            return; // The already existing value will do just fine.
+                        // TODO Upgrade this to a memset call when we have that available.
+                        return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa });
+                    },
                     .compare_flags_unsigned => |op| {
                         return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{});
                     },
@@ -1250,6 +1266,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{});
                     },
                     .immediate => |x_big| {
+                        if (ty.abiSize(self.target.*) != 4) {
+                            // TODO after fixing this, need to update the undef case above
+                            return self.fail(src, "TODO implement set non 4 abi size stack variable with immediate", .{});
+                        }
                         try self.code.ensureCapacity(self.code.items.len + 7);
                         if (x_big <= math.maxInt(u32)) {
                             const x = @intCast(u32, x_big);
@@ -1311,6 +1331,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     .ptr_stack_offset => unreachable,
                     .ptr_embedded_in_code => unreachable,
                     .unreach, .none => return, // Nothing to do.
+                    .undef => {
+                        if (!self.wantSafety())
+                            return; // The already existing value will do just fine.
+                        // Write the debug undefined value.
+                        switch (reg.size()) {
+                            8 => return self.genSetReg(src, reg, .{ .immediate = 0xaa }),
+                            16 => return self.genSetReg(src, reg, .{ .immediate = 0xaaaa }),
+                            32 => return self.genSetReg(src, reg, .{ .immediate = 0xaaaaaaaa }),
+                            64 => return self.genSetReg(src, reg, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
+                            else => unreachable,
+                        }
+                    },
                     .compare_flags_unsigned => |op| {
                         try self.code.ensureCapacity(self.code.items.len + 3);
                         self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 });
@@ -1471,7 +1503,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                                 // is no way to possibly encode it. This means that RSP, RBP, R12, and R13 cannot be used with
                                 // this instruction.
                                 const id3 = @truncate(u3, reg.id());
-                                std.debug.assert(id3 != 4 and id3 != 5);
+                                assert(id3 != 4 and id3 != 5);
 
                                 // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue.
                                 try self.genSetReg(src, reg, MCValue{ .immediate = x });
@@ -1580,6 +1612,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         }
 
         fn genTypedValue(self: *Self, src: usize, typed_value: TypedValue) !MCValue {
+            if (typed_value.val.isUndef())
+                return MCValue.undef;
             const ptr_bits = self.target.cpu.arch.ptrBitWidth();
             const ptr_bytes: u64 = @divExact(ptr_bits, 8);
             switch (typed_value.ty.zigTypeTag()) {
@@ -1691,6 +1725,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             return result;
         }
 
+        /// TODO support scope overrides. Also note this logic is duplicated with `Module.wantSafety`.
+        fn wantSafety(self: *Self) bool {
+            return switch (self.bin_file.base.options.optimize_mode) {
+                .Debug => true,
+                .ReleaseSafe => true,
+                .ReleaseFast => false,
+                .ReleaseSmall => false,
+            };
+        }
+
         fn fail(self: *Self, src: usize, comptime format: []const u8, args: anytype) error{ CodegenFail, OutOfMemory } {
             @setCold(true);
             assert(self.err_msg == null);
diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig
index 3965a2ea93..deb0a91cec 100644
--- a/src-self-hosted/ir.zig
+++ b/src-self-hosted/ir.zig
@@ -165,8 +165,7 @@ pub const Inst = struct {
 
     /// Returns `null` if runtime-known.
     pub fn value(base: *Inst) ?Value {
-        if (base.ty.onePossibleValue())
-            return Value.initTag(.the_one_possible_value);
+        if (base.ty.onePossibleValue()) |opv| return opv;
 
         const inst = base.cast(Constant) orelse return null;
         return inst.val;
diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig
index cde91cdc01..7cd6876cb2 100644
--- a/src-self-hosted/link.zig
+++ b/src-self-hosted/link.zig
@@ -16,6 +16,7 @@ pub const Options = struct {
     output_mode: std.builtin.OutputMode,
     link_mode: std.builtin.LinkMode,
     object_format: std.builtin.ObjectFormat,
+    optimize_mode: std.builtin.Mode,
     /// Used for calculating how much space to reserve for symbols in case the binary file
     /// does not already have a symbol table.
     symbol_count_hint: u64 = 32,
@@ -66,6 +67,7 @@ pub fn writeFilePath(
         .link_mode = module.link_mode,
         .object_format = module.object_format,
         .symbol_count_hint = module.decls.items.len,
+        .optimize_mode = module.optimize_mode,
     };
     const af = try dir.atomicFile(sub_path, .{ .mode = determineMode(options) });
     defer af.deinit();
@@ -88,9 +90,12 @@ pub fn writeFilePath(
 
 fn openCFile(allocator: *Allocator, file: fs.File, options: Options) !File.C {
     return File.C{
+        .base = .{
+            .tag = .c,
+            .options = options,
+        },
         .allocator = allocator,
         .file = file,
-        .options = options,
         .main = std.ArrayList(u8).init(allocator),
         .header = std.ArrayList(u8).init(allocator),
         .constants = std.ArrayList(u8).init(allocator),
@@ -114,6 +119,8 @@ pub fn openBinFile(allocator: *Allocator, file: fs.File, options: Options) !File
 
 pub const File = struct {
     tag: Tag,
+    options: Options,
+
     pub fn cast(base: *File, comptime T: type) ?*T {
         if (base.tag != T.base_tag)
             return null;
@@ -123,47 +130,47 @@ pub const File = struct {
 
     pub fn makeWritable(base: *File, dir: fs.Dir, sub_path: []const u8) !void {
         switch (base.tag) {
-            .Elf => return @fieldParentPtr(Elf, "base", base).makeWritable(dir, sub_path),
-            .C => {},
+            .elf => return @fieldParentPtr(Elf, "base", base).makeWritable(dir, sub_path),
+            .c => {},
         }
     }
 
     pub fn makeExecutable(base: *File) !void {
         switch (base.tag) {
-            .Elf => return @fieldParentPtr(Elf, "base", base).makeExecutable(),
-            .C => unreachable,
+            .elf => return @fieldParentPtr(Elf, "base", base).makeExecutable(),
+            .c => unreachable,
         }
     }
 
     pub fn updateDecl(base: *File, module: *Module, decl: *Module.Decl) !void {
         switch (base.tag) {
-            .Elf => return @fieldParentPtr(Elf, "base", base).updateDecl(module, decl),
-            .C => return @fieldParentPtr(C, "base", base).updateDecl(module, decl),
+            .elf => return @fieldParentPtr(Elf, "base", base).updateDecl(module, decl),
+            .c => return @fieldParentPtr(C, "base", base).updateDecl(module, decl),
         }
     }
 
     pub fn allocateDeclIndexes(base: *File, decl: *Module.Decl) !void {
         switch (base.tag) {
-            .Elf => return @fieldParentPtr(Elf, "base", base).allocateDeclIndexes(decl),
-            .C => {},
+            .elf => return @fieldParentPtr(Elf, "base", base).allocateDeclIndexes(decl),
+            .c => {},
         }
     }
 
     pub fn deinit(base: *File) void {
         switch (base.tag) {
-            .Elf => @fieldParentPtr(Elf, "base", base).deinit(),
-            .C => @fieldParentPtr(C, "base", base).deinit(),
+            .elf => @fieldParentPtr(Elf, "base", base).deinit(),
+            .c => @fieldParentPtr(C, "base", base).deinit(),
         }
     }
 
     pub fn destroy(base: *File) void {
         switch (base.tag) {
-            .Elf => {
+            .elf => {
                 const parent = @fieldParentPtr(Elf, "base", base);
                 parent.deinit();
                 parent.allocator.destroy(parent);
             },
-            .C => {
+            .c => {
                 const parent = @fieldParentPtr(C, "base", base);
                 parent.deinit();
                 parent.allocator.destroy(parent);
@@ -173,29 +180,22 @@ pub const File = struct {
 
     pub fn flush(base: *File) !void {
         try switch (base.tag) {
-            .Elf => @fieldParentPtr(Elf, "base", base).flush(),
-            .C => @fieldParentPtr(C, "base", base).flush(),
+            .elf => @fieldParentPtr(Elf, "base", base).flush(),
+            .c => @fieldParentPtr(C, "base", base).flush(),
         };
     }
 
     pub fn freeDecl(base: *File, decl: *Module.Decl) void {
         switch (base.tag) {
-            .Elf => @fieldParentPtr(Elf, "base", base).freeDecl(decl),
-            .C => unreachable,
+            .elf => @fieldParentPtr(Elf, "base", base).freeDecl(decl),
+            .c => unreachable,
         }
     }
 
     pub fn errorFlags(base: *File) ErrorFlags {
         return switch (base.tag) {
-            .Elf => @fieldParentPtr(Elf, "base", base).error_flags,
-            .C => return .{ .no_entry_point_found = false },
-        };
-    }
-
-    pub fn options(base: *File) Options {
-        return switch (base.tag) {
-            .Elf => @fieldParentPtr(Elf, "base", base).options,
-            .C => @fieldParentPtr(C, "base", base).options,
+            .elf => @fieldParentPtr(Elf, "base", base).error_flags,
+            .c => return .{ .no_entry_point_found = false },
         };
     }
 
@@ -207,14 +207,14 @@ pub const File = struct {
         exports: []const *Module.Export,
     ) !void {
         switch (base.tag) {
-            .Elf => return @fieldParentPtr(Elf, "base", base).updateDeclExports(module, decl, exports),
-            .C => return {},
+            .elf => return @fieldParentPtr(Elf, "base", base).updateDeclExports(module, decl, exports),
+            .c => return {},
         }
     }
 
     pub const Tag = enum {
-        Elf,
-        C,
+        elf,
+        c,
     };
 
     pub const ErrorFlags = struct {
@@ -222,15 +222,15 @@ pub const File = struct {
     };
 
     pub const C = struct {
-        pub const base_tag: Tag = .C;
-        base: File = File{ .tag = base_tag },
+        pub const base_tag: Tag = .c;
+
+        base: File,
 
         allocator: *Allocator,
         header: std.ArrayList(u8),
         constants: std.ArrayList(u8),
         main: std.ArrayList(u8),
         file: ?fs.File,
-        options: Options,
         called: std.StringHashMap(void),
         need_stddef: bool = false,
         need_stdint: bool = false,
@@ -294,13 +294,13 @@ pub const File = struct {
     };
 
     pub const Elf = struct {
-        pub const base_tag: Tag = .Elf;
-        base: File = File{ .tag = base_tag },
+        pub const base_tag: Tag = .elf;
+
+        base: File,
 
         allocator: *Allocator,
         file: ?fs.File,
         owns_file_handle: bool,
-        options: Options,
         ptr_width: enum { p32, p64 },
 
         /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write.
@@ -460,13 +460,13 @@ pub const File = struct {
             self.file = try dir.createFile(sub_path, .{
                 .truncate = false,
                 .read = true,
-                .mode = determineMode(self.options),
+                .mode = determineMode(self.base.options),
             });
         }
 
         /// Returns end pos of collision, if any.
         fn detectAllocCollision(self: *Elf, start: u64, size: u64) ?u64 {
-            const small_ptr = self.options.target.cpu.arch.ptrBitWidth() == 32;
+            const small_ptr = self.base.options.target.cpu.arch.ptrBitWidth() == 32;
             const ehdr_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Ehdr) else @sizeOf(elf.Elf64_Ehdr);
             if (start < ehdr_size)
                 return ehdr_size;
@@ -569,7 +569,7 @@ pub const File = struct {
             };
             if (self.phdr_load_re_index == null) {
                 self.phdr_load_re_index = @intCast(u16, self.program_headers.items.len);
-                const file_size = self.options.program_code_size_hint;
+                const file_size = self.base.options.program_code_size_hint;
                 const p_align = 0x1000;
                 const off = self.findFreeSpace(file_size, p_align);
                 std.log.debug(.link, "found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
@@ -588,7 +588,7 @@ pub const File = struct {
             }
             if (self.phdr_got_index == null) {
                 self.phdr_got_index = @intCast(u16, self.program_headers.items.len);
-                const file_size = @as(u64, ptr_size) * self.options.symbol_count_hint;
+                const file_size = @as(u64, ptr_size) * self.base.options.symbol_count_hint;
                 // We really only need ptr alignment but since we are using PROGBITS, linux requires
                 // page align.
                 const p_align = 0x1000;
@@ -671,7 +671,7 @@ pub const File = struct {
                 self.symtab_section_index = @intCast(u16, self.sections.items.len);
                 const min_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym);
                 const each_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym);
-                const file_size = self.options.symbol_count_hint * each_size;
+                const file_size = self.base.options.symbol_count_hint * each_size;
                 const off = self.findFreeSpace(file_size, min_align);
                 std.log.debug(.link, "found symtab free space 0x{x} to 0x{x}\n", .{ off, off + file_size });
 
@@ -726,7 +726,7 @@ pub const File = struct {
 
         /// Commit pending changes and write headers.
         pub fn flush(self: *Elf) !void {
-            const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
+            const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
 
             // Unfortunately these have to be buffered and done at the end because ELF does not allow
             // mixing local and global symbols within a symbol table.
@@ -845,7 +845,7 @@ pub const File = struct {
                 }
                 self.shdr_table_dirty = false;
             }
-            if (self.entry_addr == null and self.options.output_mode == .Exe) {
+            if (self.entry_addr == null and self.base.options.output_mode == .Exe) {
                 std.log.debug(.link, "no_entry_point_found = true\n", .{});
                 self.error_flags.no_entry_point_found = true;
             } else {
@@ -875,7 +875,7 @@ pub const File = struct {
             };
             index += 1;
 
-            const endian = self.options.target.cpu.arch.endian();
+            const endian = self.base.options.target.cpu.arch.endian();
             hdr_buf[index] = switch (endian) {
                 .Little => elf.ELFDATA2LSB,
                 .Big => elf.ELFDATA2MSB,
@@ -893,10 +893,10 @@ pub const File = struct {
 
             assert(index == 16);
 
-            const elf_type = switch (self.options.output_mode) {
+            const elf_type = switch (self.base.options.output_mode) {
                 .Exe => elf.ET.EXEC,
                 .Obj => elf.ET.REL,
-                .Lib => switch (self.options.link_mode) {
+                .Lib => switch (self.base.options.link_mode) {
                     .Static => elf.ET.REL,
                     .Dynamic => elf.ET.DYN,
                 },
@@ -904,7 +904,7 @@ pub const File = struct {
             mem.writeInt(u16, hdr_buf[index..][0..2], @enumToInt(elf_type), endian);
             index += 2;
 
-            const machine = self.options.target.cpu.arch.toElfMachine();
+            const machine = self.base.options.target.cpu.arch.toElfMachine();
             mem.writeInt(u16, hdr_buf[index..][0..2], @enumToInt(machine), endian);
             index += 2;
 
@@ -1216,7 +1216,7 @@ pub const File = struct {
                 },
             };
 
-            const required_alignment = typed_value.ty.abiAlignment(self.options.target);
+            const required_alignment = typed_value.ty.abiAlignment(self.base.options.target);
 
             const stt_bits: u8 = switch (typed_value.ty.zigTypeTag()) {
                 .Fn => elf.STT_FUNC,
@@ -1361,9 +1361,9 @@ pub const File = struct {
         }
 
         fn writeProgHeader(self: *Elf, index: usize) !void {
-            const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
+            const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
             const offset = self.program_headers.items[index].p_offset;
-            switch (self.options.target.cpu.arch.ptrBitWidth()) {
+            switch (self.base.options.target.cpu.arch.ptrBitWidth()) {
                 32 => {
                     var phdr = [1]elf.Elf32_Phdr{progHeaderTo32(self.program_headers.items[index])};
                     if (foreign_endian) {
@@ -1383,9 +1383,9 @@ pub const File = struct {
         }
 
         fn writeSectHeader(self: *Elf, index: usize) !void {
-            const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
+            const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
             const offset = self.sections.items[index].sh_offset;
-            switch (self.options.target.cpu.arch.ptrBitWidth()) {
+            switch (self.base.options.target.cpu.arch.ptrBitWidth()) {
                 32 => {
                     var shdr: [1]elf.Elf32_Shdr = undefined;
                     shdr[0] = sectHeaderTo32(self.sections.items[index]);
@@ -1433,7 +1433,7 @@ pub const File = struct {
 
                 self.offset_table_count_dirty = false;
             }
-            const endian = self.options.target.cpu.arch.endian();
+            const endian = self.base.options.target.cpu.arch.endian();
             const off = shdr.sh_offset + @as(u64, entry_size) * index;
             switch (self.ptr_width) {
                 .p32 => {
@@ -1475,7 +1475,7 @@ pub const File = struct {
                 syms_sect.sh_size = needed_size; // anticipating adding the global symbols later
                 self.shdr_table_dirty = true; // TODO look into only writing one section
             }
-            const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
+            const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
             switch (self.ptr_width) {
                 .p32 => {
                     var sym = [1]elf.Elf32_Sym{
@@ -1511,7 +1511,7 @@ pub const File = struct {
                 .p32 => @sizeOf(elf.Elf32_Sym),
                 .p64 => @sizeOf(elf.Elf64_Sym),
             };
-            const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
+            const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian();
             const global_syms_off = syms_sect.sh_offset + self.local_symbols.items.len * sym_size;
             switch (self.ptr_width) {
                 .p32 => {
@@ -1577,9 +1577,12 @@ pub fn createElfFile(allocator: *Allocator, file: fs.File, options: Options) !Fi
     }
 
     var self: File.Elf = .{
+        .base = .{
+            .tag = .elf,
+            .options = options,
+        },
         .allocator = allocator,
         .file = file,
-        .options = options,
         .ptr_width = switch (options.target.cpu.arch.ptrBitWidth()) {
             32 => .p32,
             64 => .p64,
@@ -1637,10 +1640,13 @@ fn openBinFileInner(allocator: *Allocator, file: fs.File, options: Options) !Fil
         .raw => return error.IncrFailed,
     }
     var self: File.Elf = .{
+        .base = .{
+            .tag = .elf,
+            .options = options,
+        },
         .allocator = allocator,
         .file = file,
         .owns_file_handle = false,
-        .options = options,
         .ptr_width = switch (options.target.cpu.arch.ptrBitWidth()) {
             32 => .p32,
             64 => .p64,
diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig
index 729292f6ab..457a69ac6d 100644
--- a/src-self-hosted/type.zig
+++ b/src-self-hosted/type.zig
@@ -1653,7 +1653,7 @@ pub const Type = extern union {
         };
     }
 
-    pub fn onePossibleValue(self: Type) bool {
+    pub fn onePossibleValue(self: Type) ?Value {
         var ty = self;
         while (true) switch (ty.tag()) {
             .f16,
@@ -1692,21 +1692,32 @@ pub const Type = extern union {
             .single_const_pointer_to_comptime_int,
             .array_u8_sentinel_0,
             .const_slice_u8,
-            => return false,
-
             .c_void,
-            .void,
-            .noreturn,
-            .@"null",
-            .@"undefined",
-            => return true,
+            => return null,
+
+            .void => return Value.initTag(.void_value),
+            .noreturn => return Value.initTag(.unreachable_value),
+            .@"null" => return Value.initTag(.null_value),
+            .@"undefined" => return Value.initTag(.undef),
 
-            .int_unsigned => return ty.cast(Payload.IntUnsigned).?.bits == 0,
-            .int_signed => return ty.cast(Payload.IntSigned).?.bits == 0,
+            .int_unsigned => {
+                if (ty.cast(Payload.IntUnsigned).?.bits == 0) {
+                    return Value.initTag(.zero);
+                } else {
+                    return null;
+                }
+            },
+            .int_signed => {
+                if (ty.cast(Payload.IntSigned).?.bits == 0) {
+                    return Value.initTag(.zero);
+                } else {
+                    return null;
+                }
+            },
             .array => {
                 const array = ty.cast(Payload.Array).?;
                 if (array.len == 0)
-                    return true;
+                    return Value.initTag(.empty_array);
                 ty = array.elem_type;
                 continue;
             },
diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig
index 881602d76a..eff7c95be7 100644
--- a/src-self-hosted/value.zig
+++ b/src-self-hosted/value.zig
@@ -63,7 +63,9 @@ pub const Value = extern union {
 
         undef,
         zero,
-        the_one_possible_value, // when the type only has one possible value
+        void_value,
+        unreachable_value,
+        empty_array,
         null_value,
         bool_true,
         bool_false, // See last_no_payload_tag below.
@@ -164,7 +166,9 @@ pub const Value = extern union {
             .const_slice_u8_type,
             .undef,
             .zero,
-            .the_one_possible_value,
+            .void_value,
+            .unreachable_value,
+            .empty_array,
             .null_value,
             .bool_true,
             .bool_false,
@@ -285,7 +289,8 @@ pub const Value = extern union {
             .null_value => return out_stream.writeAll("null"),
             .undef => return out_stream.writeAll("undefined"),
             .zero => return out_stream.writeAll("0"),
-            .the_one_possible_value => return out_stream.writeAll("(one possible value)"),
+            .void_value => return out_stream.writeAll("{}"),
+            .unreachable_value => return out_stream.writeAll("unreachable"),
             .bool_true => return out_stream.writeAll("true"),
             .bool_false => return out_stream.writeAll("false"),
             .ty => return val.cast(Payload.Ty).?.ty.format("", options, out_stream),
@@ -312,6 +317,7 @@ pub const Value = extern union {
                 try out_stream.print("&[{}] ", .{elem_ptr.index});
                 val = elem_ptr.array_ptr;
             },
+            .empty_array => return out_stream.writeAll(".{}"),
             .bytes => return std.zig.renderStringLiteral(self.cast(Payload.Bytes).?.data, out_stream),
             .repeated => {
                 try out_stream.writeAll("(repeated) ");
@@ -388,7 +394,9 @@ pub const Value = extern union {
 
             .undef,
             .zero,
-            .the_one_possible_value,
+            .void_value,
+            .unreachable_value,
+            .empty_array,
             .bool_true,
             .bool_false,
             .null_value,
@@ -460,15 +468,18 @@ pub const Value = extern union {
             .decl_ref,
             .elem_ptr,
             .bytes,
-            .undef,
             .repeated,
             .float_16,
             .float_32,
             .float_64,
             .float_128,
+            .void_value,
+            .unreachable_value,
+            .empty_array,
             => unreachable,
 
-            .the_one_possible_value, // An integer with one possible value is always zero.
+            .undef => unreachable,
+
             .zero,
             .bool_false,
             => return BigIntMutable.init(&space.limbs, 0).toConst(),
@@ -532,16 +543,19 @@ pub const Value = extern union {
             .decl_ref,
             .elem_ptr,
             .bytes,
-            .undef,
             .repeated,
             .float_16,
             .float_32,
             .float_64,
             .float_128,
+            .void_value,
+            .unreachable_value,
+            .empty_array,
             => unreachable,
 
+            .undef => unreachable,
+
             .zero,
-            .the_one_possible_value, // an integer with one possible value is always zero
             .bool_false,
             => return 0,
 
@@ -570,7 +584,7 @@ pub const Value = extern union {
             .float_64 => @floatCast(T, self.cast(Payload.Float_64).?.val),
             .float_128 => @floatCast(T, self.cast(Payload.Float_128).?.val),
 
-            .zero, .the_one_possible_value => 0,
+            .zero => 0,
             .int_u64 => @intToFloat(T, self.cast(Payload.Int_u64).?.int),
             // .int_i64 => @intToFloat(f128, self.cast(Payload.Int_i64).?.int),
             .int_i64 => @panic("TODO lld: error: undefined symbol: __floatditf"),
@@ -637,9 +651,11 @@ pub const Value = extern union {
             .float_32,
             .float_64,
             .float_128,
+            .void_value,
+            .unreachable_value,
+            .empty_array,
             => unreachable,
 
-            .the_one_possible_value, // an integer with one possible value is always zero
             .zero,
             .bool_false,
             => return 0,
@@ -714,11 +730,13 @@ pub const Value = extern union {
             .float_32,
             .float_64,
             .float_128,
+            .void_value,
+            .unreachable_value,
+            .empty_array,
             => unreachable,
 
             .zero,
             .undef,
-            .the_one_possible_value, // an integer with one possible value is always zero
             .bool_false,
             => return true,
 
@@ -797,13 +815,13 @@ pub const Value = extern union {
                 // return Value.initPayload(&res_payload.base).copy(allocator);
             },
             32 => {
-                var res_payload = Value.Payload.Float_32{.val = self.toFloat(f32)};
+                var res_payload = Value.Payload.Float_32{ .val = self.toFloat(f32) };
                 if (!self.eql(Value.initPayload(&res_payload.base)))
                     return error.Overflow;
                 return Value.initPayload(&res_payload.base).copy(allocator);
             },
             64 => {
-                var res_payload = Value.Payload.Float_64{.val = self.toFloat(f64)};
+                var res_payload = Value.Payload.Float_64{ .val = self.toFloat(f64) };
                 if (!self.eql(Value.initPayload(&res_payload.base)))
                     return error.Overflow;
                 return Value.initPayload(&res_payload.base).copy(allocator);
@@ -875,7 +893,9 @@ pub const Value = extern union {
             .int_i64,
             .int_big_positive,
             .int_big_negative,
-            .the_one_possible_value,
+            .empty_array,
+            .void_value,
+            .unreachable_value,
             => unreachable,
 
             .zero => false,
@@ -939,10 +959,12 @@ pub const Value = extern union {
             .bytes,
             .repeated,
             .undef,
+            .void_value,
+            .unreachable_value,
+            .empty_array,
             => unreachable,
 
             .zero,
-            .the_one_possible_value, // an integer with one possible value is always zero
             .bool_false,
             => .eq,
 
@@ -964,8 +986,8 @@ pub const Value = extern union {
     pub fn order(lhs: Value, rhs: Value) std.math.Order {
         const lhs_tag = lhs.tag();
         const rhs_tag = rhs.tag();
-        const lhs_is_zero = lhs_tag == .zero or lhs_tag == .the_one_possible_value;
-        const rhs_is_zero = rhs_tag == .zero or rhs_tag == .the_one_possible_value;
+        const lhs_is_zero = lhs_tag == .zero;
+        const rhs_is_zero = rhs_tag == .zero;
         if (lhs_is_zero) return rhs.orderAgainstZero().invert();
         if (rhs_is_zero) return lhs.orderAgainstZero();
 
@@ -1071,9 +1093,11 @@ pub const Value = extern union {
             .float_32,
             .float_64,
             .float_128,
+            .void_value,
+            .unreachable_value,
+            .empty_array,
             => unreachable,
 
-            .the_one_possible_value => Value.initTag(.the_one_possible_value),
             .ref_val => self.cast(Payload.RefVal).?.val,
             .decl_ref => self.cast(Payload.DeclRef).?.decl.value(),
             .elem_ptr => {
@@ -1130,7 +1154,6 @@ pub const Value = extern union {
             .single_const_pointer_to_comptime_int_type,
             .const_slice_u8_type,
             .zero,
-            .the_one_possible_value,
             .bool_true,
             .bool_false,
             .null_value,
@@ -1147,8 +1170,12 @@ pub const Value = extern union {
             .float_32,
             .float_64,
             .float_128,
+            .void_value,
+            .unreachable_value,
             => unreachable,
 
+            .empty_array => unreachable, // out of bounds array index
+
             .bytes => {
                 const int_payload = try allocator.create(Payload.Int_u64);
                 int_payload.* = .{ .int = self.cast(Payload.Bytes).?.data[index] };
@@ -1175,8 +1202,7 @@ pub const Value = extern union {
         return self.tag() == .undef;
     }
 
-    /// Valid for all types. Asserts the value is not undefined.
-    /// `.the_one_possible_value` is reported as not null.
+    /// Valid for all types. Asserts the value is not undefined and not unreachable.
     pub fn isNull(self: Value) bool {
         return switch (self.tag()) {
             .ty,
@@ -1221,7 +1247,7 @@ pub const Value = extern union {
             .single_const_pointer_to_comptime_int_type,
             .const_slice_u8_type,
             .zero,
-            .the_one_possible_value,
+            .empty_array,
             .bool_true,
             .bool_false,
             .function,
@@ -1238,9 +1264,11 @@ pub const Value = extern union {
             .float_32,
             .float_64,
             .float_128,
+            .void_value,
             => false,
 
             .undef => unreachable,
+            .unreachable_value => unreachable,
             .null_value => true,
         };
     }
diff --git a/src-self-hosted/zir.zig b/src-self-hosted/zir.zig
index 38751e22a3..318c4bdc8e 100644
--- a/src-self-hosted/zir.zig
+++ b/src-self-hosted/zir.zig
@@ -742,7 +742,7 @@ pub const Inst = struct {
                     .@"false" => .{ .ty = Type.initTag(.bool), .val = Value.initTag(.bool_false) },
                     .@"null" => .{ .ty = Type.initTag(.@"null"), .val = Value.initTag(.null_value) },
                     .@"undefined" => .{ .ty = Type.initTag(.@"undefined"), .val = Value.initTag(.undef) },
-                    .void_value => .{ .ty = Type.initTag(.void), .val = Value.initTag(.the_one_possible_value) },
+                    .void_value => .{ .ty = Type.initTag(.void), .val = Value.initTag(.void_value) },
                 };
             }
         };
@@ -1598,6 +1598,21 @@ const EmitZIR = struct {
             const decl = decl_ref.decl;
             return try self.emitUnnamedDecl(try self.emitDeclRef(src, decl));
         }
+        if (typed_value.val.isUndef()) {
+            const as_inst = try self.arena.allocator.create(Inst.BinOp);
+            as_inst.* = .{
+                .base = .{
+                    .tag = .as,
+                    .src = src,
+                },
+                .positionals = .{
+                    .lhs = (try self.emitType(src, typed_value.ty)).inst,
+                    .rhs = (try self.emitPrimitive(src, .@"undefined")).inst,
+                },
+                .kw_args = .{},
+            };
+            return self.emitUnnamedDecl(&as_inst.base);
+        }
         switch (typed_value.ty.zigTypeTag()) {
             .Pointer => {
                 const ptr_elem_type = typed_value.ty.elemType();
diff --git a/src-self-hosted/zir_sema.zig b/src-self-hosted/zir_sema.zig
index 7db2811384..6bd4159e36 100644
--- a/src-self-hosted/zir_sema.zig
+++ b/src-self-hosted/zir_sema.zig
@@ -882,7 +882,7 @@ fn analyzeInstArithmetic(mod: *Module, scope: *Scope, inst: *zir.Inst.BinOp) Inn
 fn analyzeInstComptimeOp(mod: *Module, scope: *Scope, res_type: Type, inst: *zir.Inst.BinOp, lhs_val: Value, rhs_val: Value) InnerError!*Inst {
     // incase rhs is 0, simply return lhs without doing any calculations
     // TODO Once division is implemented we should throw an error when dividing by 0.
-    if (rhs_val.tag() == .zero or rhs_val.tag() == .the_one_possible_value) {
+    if (rhs_val.compareWithZero(.eq)) {
         return mod.constInst(scope, inst.base.src, .{
             .ty = res_type,
             .val = lhs_val,
@@ -1083,6 +1083,7 @@ fn analyzeInstUnreachNoChk(mod: *Module, scope: *Scope, unreach: *zir.Inst.NoOp)
 
 fn analyzeInstUnreachable(mod: *Module, scope: *Scope, unreach: *zir.Inst.NoOp) InnerError!*Inst {
     const b = try mod.requireRuntimeBlock(scope, unreach.base.src);
+    // TODO Add compile error for @optimizeFor occurring too late in a scope.
     if (mod.wantSafety(scope)) {
         // TODO Once we have a panic function to call, call it here instead of this.
         _ = try mod.addNoOp(b, unreach.base.src, Type.initTag(.void), .breakpoint);
-- 
cgit v1.2.3


From 1bbfa36b76271e907cac88e83cec8dee1e3d69f7 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 29 Jul 2020 00:08:43 -0700
Subject: stage2: improved codegen

 * multiple returns jump to one canonical function exitlude. This is in
   preparation for the defer feature.
 * simple elision of trivial jump relocs.
 * omit prelude/exitlude for naked calling convention functions.
 * fix not switching on arch for prelude/exitlude
 * fix swapped registers when setting stack mem from a register
---
 src-self-hosted/codegen.zig | 118 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 90 insertions(+), 28 deletions(-)

(limited to 'src-self-hosted/codegen.zig')

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index 40fb6c5407..6880d6dbf3 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -214,6 +214,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         src: usize,
         stack_align: u32,
 
+        /// The value is an offset into the `Function` `code` from the beginning.
+        /// To perform the reloc, write 32-bit signed little-endian integer
+        /// which is a relative jump, based on the address following the reloc.
+        exitlude_jump_relocs: std.ArrayListUnmanaged(usize) = .{},
+
         /// Whenever there is a runtime branch, we push a Branch onto this stack,
         /// and pop it off when the runtime branch joins. This provides an "overlay"
         /// of the table of mappings from instructions to `MCValue` from within the branch.
@@ -376,6 +381,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .src = src,
                 .stack_align = undefined,
             };
+            defer function.exitlude_jump_relocs.deinit(bin_file.allocator);
 
             var call_info = function.resolveCallingConventionValues(src, fn_type) catch |err| switch (err) {
                 error.CodegenFail => return Result{ .fail = function.err_msg.? },
@@ -401,29 +407,78 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         }
 
         fn gen(self: *Self) !void {
-            try self.code.ensureCapacity(self.code.items.len + 11);
-
-            // TODO omit this for naked functions
-            // push rbp
-            // mov rbp, rsp
-            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x55, 0x48, 0x89, 0xe5 });
-
-            // sub rsp, x
-            const stack_end = self.branch_stack.items[0].max_end_stack;
-            if (stack_end > math.maxInt(i32)) {
-                return self.fail(self.src, "too much stack used in call parameters", .{});
-            } else if (stack_end > math.maxInt(i8)) {
-                // 48 83 ec xx    sub rsp,0x10
-                self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xec });
-                const x = @intCast(u32, stack_end);
-                mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
-            } else if (stack_end != 0) {
-                // 48 81 ec xx xx xx xx   sub rsp,0x80
-                const x = @intCast(u8, stack_end);
-                self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xec, x });
-            }
+            switch (arch) {
+                .x86_64 => {
+                    try self.code.ensureCapacity(self.code.items.len + 11);
+
+                    const cc = self.fn_type.fnCallingConvention();
+                    if (cc != .Naked) {
+                        // We want to subtract the aligned stack frame size from rsp here, but we don't
+                        // yet know how big it will be, so we leave room for a 4-byte stack size.
+                        // TODO During semantic analysis, check if there are no function calls. If there
+                        // are none, here we can omit the part where we subtract and then add rsp.
+                        self.code.appendSliceAssumeCapacity(&[_]u8{
+                            // push rbp
+                            0x55,
+                            // mov rbp, rsp
+                            0x48,
+                            0x89,
+                            0xe5,
+                            // sub rsp, imm32 (with reloc)
+                            0x48,
+                            0x81,
+                            0xec,
+                        });
+                        const reloc_index = self.code.items.len;
+                        self.code.items.len += 4;
+
+                        try self.genBody(self.mod_fn.analysis.success);
+
+                        const stack_end = self.branch_stack.items[0].max_end_stack;
+                        if (stack_end > math.maxInt(i32))
+                            return self.fail(self.src, "too much stack used in call parameters", .{});
+                        const aligned_stack_end = mem.alignForward(stack_end, self.stack_align);
+                        mem.writeIntLittle(u32, self.code.items[reloc_index..][0..4], @intCast(u32, aligned_stack_end));
+
+                        if (self.code.items.len >= math.maxInt(i32)) {
+                            return self.fail(self.src, "unable to perform relocation: jump too far", .{});
+                        }
+                        for (self.exitlude_jump_relocs.items) |jmp_reloc| {
+                            const amt = self.code.items.len - (jmp_reloc + 4);
+                            // If it wouldn't jump at all, elide it.
+                            if (amt == 0) {
+                                self.code.items.len -= 5;
+                                continue;
+                            }
+                            const s32_amt = @intCast(i32, amt);
+                            mem.writeIntLittle(i32, self.code.items[jmp_reloc..][0..4], s32_amt);
+                        }
+
+                        try self.code.ensureCapacity(self.code.items.len + 9);
+                        // add rsp, x
+                        if (aligned_stack_end > math.maxInt(i8)) {
+                            // example: 48 81 c4 ff ff ff 7f  add    rsp,0x7fffffff
+                            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xc4 });
+                            const x = @intCast(u32, aligned_stack_end);
+                            mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
+                        } else if (aligned_stack_end != 0) {
+                            // example: 48 83 c4 7f           add    rsp,0x7f
+                            const x = @intCast(u8, aligned_stack_end);
+                            self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xc4, x });
+                        }
 
-            try self.genBody(self.mod_fn.analysis.success);
+                        self.code.appendSliceAssumeCapacity(&[_]u8{
+                            0x5d, // pop rbp
+                            0xc3, // ret
+                        });
+                    } else {
+                        try self.genBody(self.mod_fn.analysis.success);
+                    }
+                },
+                else => {
+                    try self.genBody(self.mod_fn.analysis.success);
+                },
+            }
         }
 
         fn genBody(self: *Self, body: ir.Body) InnerError!void {
@@ -987,10 +1042,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     try self.code.append(0xc3); // ret
                 },
                 .x86_64 => {
-                    try self.code.appendSlice(&[_]u8{
-                        0x5d, // pop rbp
-                        0xc3, // ret
-                    });
+                    // TODO when implementing defer, this will need to jump to the appropriate defer expression.
+                    // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
+                    // which is available if the jump is 127 bytes or less forward.
+                    try self.code.resize(self.code.items.len + 5);
+                    self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32
+                    try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4);
                 },
                 else => return self.fail(src, "TODO implement return for {}", .{self.target.cpu.arch}),
             }
@@ -1130,6 +1187,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             switch (reloc) {
                 .rel32 => |pos| {
                     const amt = self.code.items.len - (pos + 4);
+                    // If it wouldn't jump at all, elide it.
+                    if (amt == 0) {
+                        self.code.items.len -= 5;
+                        return;
+                    }
                     const s32_amt = math.cast(i32, amt) catch
                         return self.fail(src, "unable to perform relocation: jump too far", .{});
                     mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt);
@@ -1296,13 +1358,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         const reg_id: u8 = @truncate(u3, reg.id());
                         if (stack_offset <= 128) {
                             // example: 48 89 55 7f           mov    QWORD PTR [rbp+0x7f],rdx
-                            const RM = @as(u8, 0b01_101_000) | reg_id;
+                            const RM = @as(u8, 0b01_000_101) | (reg_id << 3);
                             const negative_offset = @intCast(i8, -@intCast(i32, stack_offset));
                             const twos_comp = @bitCast(u8, negative_offset);
                             self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM, twos_comp });
                         } else if (stack_offset <= 2147483648) {
                             // example: 48 89 95 80 00 00 00  mov    QWORD PTR [rbp+0x80],rdx
-                            const RM = @as(u8, 0b10_101_000) | reg_id;
+                            const RM = @as(u8, 0b10_000_101) | (reg_id << 3);
                             const negative_offset = @intCast(i32, -@intCast(i33, stack_offset));
                             const twos_comp = @bitCast(u32, negative_offset);
                             self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM });
-- 
cgit v1.2.3


From 606f157a6b6001b2623d28275a892c1a8ee3a646 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 29 Jul 2020 02:10:35 -0700
Subject: stage2: register-aliasing-aware codegen

 * unify duplicated register allocation codepath
 * support the x86_64 concept of register aliasing
 * slightly improved memset codegen, supports sizes 1, 2, 4, 8
---
 src-self-hosted/codegen.zig        | 161 +++++++++++++++++++++++--------------
 src-self-hosted/codegen/x86_64.zig |  20 +++++
 test/stage2/compare_output.zig     |  34 ++++++++
 3 files changed, 153 insertions(+), 62 deletions(-)

(limited to 'src-self-hosted/codegen.zig')

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index 6880d6dbf3..be88dc67d8 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -328,6 +328,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 self.free_registers |= @as(FreeRegInt, 1) << shift;
             }
 
+            /// Before calling, must ensureCapacity + 1 on branch.registers.
+            /// Returns `null` if all registers are allocated.
+            fn allocReg(self: *Branch, inst: *ir.Inst) ?Register {
+                const free_index = @ctz(FreeRegInt, self.free_registers);
+                if (free_index >= callee_preserved_regs.len) {
+                    return null;
+                }
+                self.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
+                const reg = callee_preserved_regs[free_index];
+                self.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst });
+                return reg;
+            }
+
             fn deinit(self: *Branch, gpa: *Allocator) void {
                 self.inst_table.deinit(gpa);
                 self.registers.deinit(gpa);
@@ -502,8 +515,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             entry.value = .dead;
             switch (prev_value) {
                 .register => |reg| {
-                    _ = branch.registers.remove(reg);
-                    branch.markRegFree(reg);
+                    const reg64 = reg.to64();
+                    _ = branch.registers.remove(reg64);
+                    branch.markRegFree(reg64);
                 },
                 else => {}, // TODO process stack allocation death
             }
@@ -582,30 +596,26 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 self.stack_align = abi_align;
             const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
 
-            // TODO Make sure the type can fit in a register before we try to allocate one.
-            const free_index = @ctz(FreeRegInt, branch.free_registers);
-            if (free_index >= callee_preserved_regs.len) {
-                const stack_offset = try self.allocMem(inst, abi_size, abi_align);
-                return MCValue{ .stack_offset = stack_offset };
+            // Make sure the type can fit in a register before we try to allocate one.
+            const ptr_bits = arch.ptrBitWidth();
+            const ptr_bytes: u64 = @divExact(ptr_bits, 8);
+            if (abi_size <= ptr_bytes) {
+                try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
+                if (branch.allocReg(inst)) |reg| {
+                    return MCValue{ .register = registerAlias(reg, abi_size) };
+                }
             }
-            branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
-            const reg = callee_preserved_regs[free_index];
-            try branch.registers.putNoClobber(self.gpa, reg, .{ .inst = inst });
-            return MCValue{ .register = reg };
+            const stack_offset = try self.allocMem(inst, abi_size, abi_align);
+            return MCValue{ .stack_offset = stack_offset };
         }
 
         /// Does not "move" the instruction.
         fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue {
             const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
             try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
-            try branch.inst_table.ensureCapacity(self.gpa, branch.inst_table.items().len + 1);
 
-            const free_index = @ctz(FreeRegInt, branch.free_registers);
-            if (free_index >= callee_preserved_regs.len)
+            const reg = branch.allocReg(inst) orelse
                 return self.fail(inst.src, "TODO implement spilling register to stack", .{});
-            branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
-            const reg = callee_preserved_regs[free_index];
-            branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst });
             const old_mcv = branch.inst_table.get(inst).?;
             const new_mcv: MCValue = .{ .register = reg };
             try self.genSetReg(inst.src, reg, old_mcv);
@@ -1131,7 +1141,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                             // test reg, 1
                             // TODO detect al, ax, eax
                             try self.code.ensureCapacity(self.code.items.len + 4);
-                            self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 });
+                            // TODO audit this codegen: we force w = true here to make
+                            // the value affect the big register
+                            self.rex(.{ .b = reg.isExtended(), .w = true });
                             self.code.appendSliceAssumeCapacity(&[_]u8{
                                 0xf6,
                                 @as(u8, 0xC0) | (0 << 3) | @truncate(u3, reg.id()),
@@ -1319,7 +1331,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         if (!self.wantSafety())
                             return; // The already existing value will do just fine.
                         // TODO Upgrade this to a memset call when we have that available.
-                        return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa });
+                        switch (ty.abiSize(self.target.*)) {
+                            1 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaa }),
+                            2 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaa }),
+                            4 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa }),
+                            8 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
+                            else => return self.fail(src, "TODO implement memset", .{}),
+                        }
                     },
                     .compare_flags_unsigned => |op| {
                         return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{});
@@ -1328,24 +1346,35 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{});
                     },
                     .immediate => |x_big| {
-                        if (ty.abiSize(self.target.*) != 4) {
-                            // TODO after fixing this, need to update the undef case above
-                            return self.fail(src, "TODO implement set non 4 abi size stack variable with immediate", .{});
+                        if (stack_offset > 128) {
+                            return self.fail(src, "TODO implement set stack variable with large stack offset", .{});
                         }
-                        try self.code.ensureCapacity(self.code.items.len + 7);
-                        if (x_big <= math.maxInt(u32)) {
-                            const x = @intCast(u32, x_big);
-                            if (stack_offset > 128) {
-                                return self.fail(src, "TODO implement set stack variable with large stack offset", .{});
-                            }
-                            // We have a positive stack offset value but we want a twos complement negative
-                            // offset from rbp, which is at the top of the stack frame.
-                            const negative_offset = @intCast(i8, -@intCast(i32, stack_offset));
-                            const twos_comp = @bitCast(u8, negative_offset);
-                            // mov    DWORD PTR [rbp+offset], immediate
-                            self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp });
-                            mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
-                        } else {
+                        try self.code.ensureCapacity(self.code.items.len + 8);
+                        switch (ty.abiSize(self.target.*)) {
+                            1 => {
+                                return self.fail(src, "TODO implement set abi_size=1 stack variable with immediate", .{});
+                            },
+                            2 => {
+                                return self.fail(src, "TODO implement set abi_size=2 stack variable with immediate", .{});
+                            },
+                            4 => {
+                                const x = @intCast(u32, x_big);
+                                // We have a positive stack offset value but we want a twos complement negative
+                                // offset from rbp, which is at the top of the stack frame.
+                                const negative_offset = @intCast(i8, -@intCast(i32, stack_offset));
+                                const twos_comp = @bitCast(u8, negative_offset);
+                                // mov    DWORD PTR [rbp+offset], immediate
+                                self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp });
+                                mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
+                            },
+                            8 => {
+                                return self.fail(src, "TODO implement set abi_size=8 stack variable with immediate", .{});
+                            },
+                            else => {
+                                return self.fail(src, "TODO implement set abi_size=large stack variable with immediate", .{});
+                            },
+                        }
+                        if (x_big <= math.maxInt(u32)) {} else {
                             return self.fail(src, "TODO implement set stack variable with large immediate", .{});
                         }
                     },
@@ -1407,7 +1436,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     },
                     .compare_flags_unsigned => |op| {
                         try self.code.ensureCapacity(self.code.items.len + 3);
-                        self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 });
+                        // TODO audit this codegen: we force w = true here to make
+                        // the value affect the big register
+                        self.rex(.{ .b = reg.isExtended(), .w = true });
                         const opcode: u8 = switch (op) {
                             .gte => 0x93,
                             .gt => 0x97,
@@ -1423,9 +1454,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         return self.fail(src, "TODO set register with compare flags value (signed)", .{});
                     },
                     .immediate => |x| {
-                        if (reg.size() != 64) {
-                            return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
-                        }
                         // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
                         // register is the fastest way to zero a register.
                         if (x == 0) {
@@ -1478,16 +1506,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         //
                         // In this case, the encoding of the REX byte is 0b0100100B
                         try self.code.ensureCapacity(self.code.items.len + 10);
-                        self.rex(.{ .w = true, .b = reg.isExtended() });
+                        self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
                         self.code.items.len += 9;
                         self.code.items[self.code.items.len - 9] = 0xB8 | @as(u8, reg.id() & 0b111);
                         const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8];
                         mem.writeIntLittle(u64, imm_ptr, x);
                     },
                     .embedded_in_code => |code_offset| {
-                        if (reg.size() != 64) {
-                            return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
-                        }
                         // We need the offset from RIP in a signed i32 twos complement.
                         // The instruction is 7 bytes long and RIP points to the next instruction.
                         try self.code.ensureCapacity(self.code.items.len + 7);
@@ -1495,7 +1520,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         // but the operation size is unchanged. Since we're using a disp32, we want mode 0 and lower three
                         // bits as five.
                         // REX 0x8D 0b00RRR101, where RRR is the lower three bits of the id.
-                        self.rex(.{ .w = true, .b = reg.isExtended() });
+                        self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
                         self.code.items.len += 6;
                         const rip = self.code.items.len;
                         const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip);
@@ -1507,12 +1532,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                     },
                     .register => |src_reg| {
                         // If the registers are the same, nothing to do.
-                        if (src_reg == reg)
+                        if (src_reg.id() == reg.id())
                             return;
 
-                        if (reg.size() != 64) {
-                            return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
-                        }
                         // This is a variant of 8B /r. Since we're using 64-bit moves, we require a REX.
                         // This is thus three bytes: REX 0x8B R/M.
                         // If the destination is extended, the R field must be 1.
@@ -1520,14 +1542,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         // Since the register is being accessed directly, the R/M mode is three. The reg field (the middle
                         // three bits) contain the destination, and the R/M field (the lower three bits) contain the source.
                         try self.code.ensureCapacity(self.code.items.len + 3);
-                        self.rex(.{ .w = true, .r = reg.isExtended(), .b = src_reg.isExtended() });
+                        self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended(), .b = src_reg.isExtended() });
                         const R = 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @as(u8, src_reg.id() & 0b111);
                         self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, R });
                     },
                     .memory => |x| {
-                        if (reg.size() != 64) {
-                            return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
-                        }
                         if (x <= math.maxInt(u32)) {
                             // Moving from memory to a register is a variant of `8B /r`.
                             // Since we're using 64-bit moves, we require a REX.
@@ -1537,7 +1556,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                             // 0b00RRR100, where RRR is the lower three bits of the register ID.
                             // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32.
                             try self.code.ensureCapacity(self.code.items.len + 8);
-                            self.rex(.{ .w = true, .b = reg.isExtended() });
+                            self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
                             self.code.appendSliceAssumeCapacity(&[_]u8{
                                 0x8B,
                                 0x04 | (@as(u8, reg.id() & 0b111) << 3), // R
@@ -1580,18 +1599,15 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                                 //
                                 // Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both*
                                 // register operands need to be marked as extended.
-                                self.rex(.{ .w = true, .b = reg.isExtended(), .r = reg.isExtended() });
+                                self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended(), .r = reg.isExtended() });
                                 const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id());
                                 self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM });
                             }
                         }
                     },
                     .stack_offset => |off| {
-                        if (reg.size() != 64) {
-                            return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
-                        }
                         try self.code.ensureCapacity(self.code.items.len + 7);
-                        self.rex(.{ .w = true, .r = reg.isExtended() });
+                        self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() });
                         const reg_id: u8 = @truncate(u3, reg.id());
                         if (off <= 128) {
                             // Example: 48 8b 4d 7f           mov    rcx,QWORD PTR [rbp+0x7f]
@@ -1750,11 +1766,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                             for (param_types) |ty, i| {
                                 switch (ty.zigTypeTag()) {
                                     .Bool, .Int => {
+                                        const param_size = @intCast(u32, ty.abiSize(self.target.*));
                                         if (next_int_reg >= c_abi_int_param_regs.len) {
                                             result.args[i] = .{ .stack_offset = next_stack_offset };
-                                            next_stack_offset += @intCast(u32, ty.abiSize(self.target.*));
+                                            next_stack_offset += param_size;
                                         } else {
-                                            result.args[i] = .{ .register = c_abi_int_param_regs[next_int_reg] };
+                                            const aliased_reg = registerAlias(
+                                                c_abi_int_param_regs[next_int_reg],
+                                                param_size,
+                                            );
+                                            result.args[i] = .{ .register = aliased_reg };
                                             next_int_reg += 1;
                                         }
                                     },
@@ -1778,7 +1799,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .x86_64 => switch (cc) {
                     .Naked => unreachable,
                     .Unspecified, .C => {
-                        result.return_value = .{ .register = c_abi_int_return_regs[0] };
+                        const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*));
+                        const aliased_reg = registerAlias(c_abi_int_return_regs[0], ret_ty_size);
+                        result.return_value = .{ .register = aliased_reg };
                     },
                     else => return self.fail(src, "TODO implement function return values for {}", .{cc}),
                 },
@@ -1825,5 +1848,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
         fn parseRegName(name: []const u8) ?Register {
             return std.meta.stringToEnum(Register, name);
         }
+
+        fn registerAlias(reg: Register, size_bytes: u32) Register {
+            switch (arch) {
+                // For x86_64 we have to pick a smaller register alias depending on abi size.
+                .x86_64 => switch (size_bytes) {
+                    1 => return reg.to8(),
+                    2 => return reg.to16(),
+                    4 => return reg.to32(),
+                    8 => return reg.to64(),
+                    else => unreachable,
+                },
+                else => return reg,
+            }
+        }
     };
 }
diff --git a/src-self-hosted/codegen/x86_64.zig b/src-self-hosted/codegen/x86_64.zig
index f6bad45809..c149613ae9 100644
--- a/src-self-hosted/codegen/x86_64.zig
+++ b/src-self-hosted/codegen/x86_64.zig
@@ -81,6 +81,26 @@ pub const Register = enum(u8) {
             else => null,
         };
     }
+
+    /// Convert from any register to its 64 bit alias.
+    pub fn to64(self: Register) Register {
+        return @intToEnum(Register, self.id());
+    }
+
+    /// Convert from any register to its 32 bit alias.
+    pub fn to32(self: Register) Register {
+        return @intToEnum(Register, @as(u8, self.id()) + 16);
+    }
+
+    /// Convert from any register to its 16 bit alias.
+    pub fn to16(self: Register) Register {
+        return @intToEnum(Register, @as(u8, self.id()) + 32);
+    }
+
+    /// Convert from any register to its 8 bit alias.
+    pub fn to8(self: Register) Register {
+        return @intToEnum(Register, @as(u8, self.id()) + 48);
+    }
 };
 
 // zig fmt: on
diff --git a/test/stage2/compare_output.zig b/test/stage2/compare_output.zig
index 2e7c6317b6..bf6a01f483 100644
--- a/test/stage2/compare_output.zig
+++ b/test/stage2/compare_output.zig
@@ -363,5 +363,39 @@ pub fn addCases(ctx: *TestContext) !void {
         ,
             "",
         );
+
+        // Local mutable variables.
+        case.addCompareOutput(
+            \\export fn _start() noreturn {
+            \\    assert(add(3, 4) == 7);
+            \\    assert(add(20, 10) == 30);
+            \\
+            \\    exit();
+            \\}
+            \\
+            \\fn add(a: u32, b: u32) u32 {
+            \\    var x: u32 = undefined;
+            \\    x = 0;
+            \\    x += a;
+            \\    x += b;
+            \\    return x;
+            \\}
+            \\
+            \\pub fn assert(ok: bool) void {
+            \\    if (!ok) unreachable; // assertion failure
+            \\}
+            \\
+            \\fn exit() noreturn {
+            \\    asm volatile ("syscall"
+            \\        :
+            \\        : [number] "{rax}" (231),
+            \\          [arg1] "{rdi}" (0)
+            \\        : "rcx", "r11", "memory"
+            \\    );
+            \\    unreachable;
+            \\}
+        ,
+            "",
+        );
     }
 }
-- 
cgit v1.2.3


From 8899e6e334758f2e101399075d0456195035c372 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 29 Jul 2020 02:28:35 -0700
Subject: stage2: codegen: fix off-by-one stack variable offsets

---
 src-self-hosted/codegen.zig | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'src-self-hosted/codegen.zig')

diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index be88dc67d8..2ea255bf7f 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -1346,11 +1346,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{});
                     },
                     .immediate => |x_big| {
-                        if (stack_offset > 128) {
+                        const abi_size = ty.abiSize(self.target.*);
+                        const adj_off = stack_offset + abi_size;
+                        if (adj_off > 128) {
                             return self.fail(src, "TODO implement set stack variable with large stack offset", .{});
                         }
                         try self.code.ensureCapacity(self.code.items.len + 8);
-                        switch (ty.abiSize(self.target.*)) {
+                        switch (abi_size) {
                             1 => {
                                 return self.fail(src, "TODO implement set abi_size=1 stack variable with immediate", .{});
                             },
@@ -1361,7 +1363,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                                 const x = @intCast(u32, x_big);
                                 // We have a positive stack offset value but we want a twos complement negative
                                 // offset from rbp, which is at the top of the stack frame.
-                                const negative_offset = @intCast(i8, -@intCast(i32, stack_offset));
+                                const negative_offset = @intCast(i8, -@intCast(i32, adj_off));
                                 const twos_comp = @bitCast(u8, negative_offset);
                                 // mov    DWORD PTR [rbp+offset], immediate
                                 self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp });
@@ -1382,19 +1384,21 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                         return self.fail(src, "TODO implement set stack variable from embedded_in_code", .{});
                     },
                     .register => |reg| {
+                        const abi_size = ty.abiSize(self.target.*);
+                        const adj_off = stack_offset + abi_size;
                         try self.code.ensureCapacity(self.code.items.len + 7);
                         self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
                         const reg_id: u8 = @truncate(u3, reg.id());
-                        if (stack_offset <= 128) {
+                        if (adj_off <= 128) {
                             // example: 48 89 55 7f           mov    QWORD PTR [rbp+0x7f],rdx
                             const RM = @as(u8, 0b01_000_101) | (reg_id << 3);
-                            const negative_offset = @intCast(i8, -@intCast(i32, stack_offset));
+                            const negative_offset = @intCast(i8, -@intCast(i32, adj_off));
                             const twos_comp = @bitCast(u8, negative_offset);
                             self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM, twos_comp });
-                        } else if (stack_offset <= 2147483648) {
+                        } else if (adj_off <= 2147483648) {
                             // example: 48 89 95 80 00 00 00  mov    QWORD PTR [rbp+0x80],rdx
                             const RM = @as(u8, 0b10_000_101) | (reg_id << 3);
-                            const negative_offset = @intCast(i32, -@intCast(i33, stack_offset));
+                            const negative_offset = @intCast(i32, -@intCast(i33, adj_off));
                             const twos_comp = @bitCast(u32, negative_offset);
                             self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM });
                             mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp);
@@ -1605,8 +1609,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                             }
                         }
                     },
-                    .stack_offset => |off| {
+                    .stack_offset => |unadjusted_off| {
                         try self.code.ensureCapacity(self.code.items.len + 7);
+                        const size_bytes = @divExact(reg.size(), 8);
+                        const off = unadjusted_off + size_bytes;
                         self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() });
                         const reg_id: u8 = @truncate(u3, reg.id());
                         if (off <= 128) {
-- 
cgit v1.2.3