From 5ccee4c986aa9ed73d3deab3145f43689aa58ee4 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 28 Jul 2020 17:27:44 -0700 Subject: stage2: more progress towards mutable local variables * implement sema for runtime deref, store pointer, coerce_to_ptr_elem, and store * identifiers support being lvalues, except for decls is still TODO * codegen supports load, store, ref, alloc * introduce more MCValue union tags to support pointers * add load, ref, store typed IR instructions * add Type.isVolatilePtr --- src-self-hosted/codegen.zig | 289 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 253 insertions(+), 36 deletions(-) (limited to 'src-self-hosted/codegen.zig') diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 75b042308d..777c9ee5f2 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -209,6 +209,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { err_msg: ?*ErrorMsg, args: []MCValue, ret_mcv: MCValue, + fn_type: Type, arg_index: usize, src: usize, stack_align: u32, @@ -230,15 +231,23 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// No more references to this value remain. dead, /// A pointer-sized integer that fits in a register. + /// If the type is a pointer, this is the pointer address in virtual address space. immediate: u64, /// The constant was emitted into the code, at this offset. + /// If the type is a pointer, it means the pointer address is embedded in the code. embedded_in_code: usize, + /// The value is a pointer to a constant which was emitted into the code, at this offset. + ptr_embedded_in_code: usize, /// The value is in a target-specific register. register: Register, /// The value is in memory at a hard-coded address. + /// If the type is a pointer, it means the pointer address is at this memory location. memory: u64, /// The value is one of the stack variables. - stack_offset: u64, + /// If the type is a pointer, it means the pointer address is in the stack at this offset. + stack_offset: u32, + /// The value is a pointer to one of the stack variables (payload is stack offset). + ptr_stack_offset: u32, /// The value is in the compare flags assuming an unsigned operation, /// with this operator applied on top of it. compare_flags_unsigned: math.CompareOperator, @@ -271,6 +280,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .memory, .compare_flags_unsigned, .compare_flags_signed, + .ptr_stack_offset, + .ptr_embedded_in_code, => false, .register, @@ -356,6 +367,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .err_msg = null, .args = undefined, // populated after `resolveCallingConventionValues` .ret_mcv = undefined, // populated after `resolveCallingConventionValues` + .fn_type = fn_type, .arg_index = 0, .branch_stack = &branch_stack, .src = src, @@ -459,26 +471,23 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .cmp_neq => return self.genCmp(inst.castTag(.cmp_neq).?, .neq), .condbr => return self.genCondBr(inst.castTag(.condbr).?), .constant => unreachable, // excluded from function bodies + .floatcast => return self.genFloatCast(inst.castTag(.floatcast).?), + .intcast => return self.genIntCast(inst.castTag(.intcast).?), .isnonnull => return self.genIsNonNull(inst.castTag(.isnonnull).?), .isnull => return self.genIsNull(inst.castTag(.isnull).?), + .load => return self.genLoad(inst.castTag(.load).?), + .not => return self.genNot(inst.castTag(.not).?), .ptrtoint => return self.genPtrToInt(inst.castTag(.ptrtoint).?), + .ref => return self.genRef(inst.castTag(.ref).?), .ret => return self.genRet(inst.castTag(.ret).?), .retvoid => return self.genRetVoid(inst.castTag(.retvoid).?), + .store => return self.genStore(inst.castTag(.store).?), .sub => return self.genSub(inst.castTag(.sub).?), .unreach => return MCValue{ .unreach = {} }, - .not => return self.genNot(inst.castTag(.not).?), - .floatcast => return self.genFloatCast(inst.castTag(.floatcast).?), - .intcast => return self.genIntCast(inst.castTag(.intcast).?), } } - fn genAlloc(self: *Self, inst: *ir.Inst.NoOp) !MCValue { - const elem_ty = inst.base.ty.elemType(); - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch { - return self.fail(inst.base.src, "type '{}' too big to fit into stack frame", .{elem_ty}); - }; - // TODO swap this for inst.base.ty.ptrAlign - const abi_align = elem_ty.abiAlignment(self.target.*); + fn allocMem(self: *Self, inst: *ir.Inst, abi_size: u32, abi_align: u32) !u32 { if (abi_align > self.stack_align) self.stack_align = abi_align; const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; @@ -488,10 +497,66 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (branch.next_stack_offset > branch.max_end_stack) branch.max_end_stack = branch.next_stack_offset; try branch.stack.putNoClobber(self.gpa, offset, .{ - .inst = &inst.base, + .inst = inst, .size = abi_size, }); - return MCValue{ .stack_offset = offset }; + return offset; + } + + /// Use a pointer instruction as the basis for allocating stack memory. + fn allocMemPtr(self: *Self, inst: *ir.Inst) !u32 { + const elem_ty = inst.ty.elemType(); + const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch { + return self.fail(inst.src, "type '{}' too big to fit into stack frame", .{elem_ty}); + }; + // TODO swap this for inst.ty.ptrAlign + const abi_align = elem_ty.abiAlignment(self.target.*); + return self.allocMem(inst, abi_size, abi_align); + } + + fn allocRegOrMem(self: *Self, inst: *ir.Inst) !MCValue { + const elem_ty = inst.ty; + const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch { + return self.fail(inst.src, "type '{}' too big to fit into stack frame", .{elem_ty}); + }; + const abi_align = elem_ty.abiAlignment(self.target.*); + if (abi_align > self.stack_align) + self.stack_align = abi_align; + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + + // TODO Make sure the type can fit in a register before we try to allocate one. + const free_index = @ctz(FreeRegInt, branch.free_registers); + if (free_index >= callee_preserved_regs.len) { + const stack_offset = try self.allocMem(inst, abi_size, abi_align); + return MCValue{ .stack_offset = stack_offset }; + } + branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index); + const reg = callee_preserved_regs[free_index]; + try branch.registers.putNoClobber(self.gpa, reg, .{ .inst = inst }); + return MCValue{ .register = reg }; + } + + /// Does not "move" the instruction. + fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue { + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); + try branch.inst_table.ensureCapacity(self.gpa, branch.inst_table.items().len + 1); + + const free_index = @ctz(FreeRegInt, branch.free_registers); + if (free_index >= callee_preserved_regs.len) + return self.fail(inst.src, "TODO implement spilling register to stack", .{}); + branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index); + const reg = callee_preserved_regs[free_index]; + branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst }); + const old_mcv = branch.inst_table.get(inst).?; + const new_mcv: MCValue = .{ .register = reg }; + try self.genSetReg(inst.src, reg, old_mcv); + return new_mcv; + } + + fn genAlloc(self: *Self, inst: *ir.Inst.NoOp) !MCValue { + const stack_offset = try self.allocMemPtr(&inst.base); + return MCValue{ .ptr_stack_offset = stack_offset }; } fn genFloatCast(self: *Self, inst: *ir.Inst.UnOp) !MCValue { @@ -572,6 +637,85 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } + fn genLoad(self: *Self, inst: *ir.Inst.UnOp) !MCValue { + const elem_ty = inst.base.ty; + if (!elem_ty.hasCodeGenBits()) + return MCValue.none; + const ptr = try self.resolveInst(inst.operand); + const is_volatile = inst.operand.ty.isVolatilePtr(); + if (inst.base.isUnused() and !is_volatile) + return MCValue.dead; + const dst_mcv: MCValue = blk: { + if (inst.base.operandDies(0) and ptr.isMutable()) { + // The MCValue that holds the pointer can be re-used as the value. + // TODO track this in the register/stack allocation metadata. + break :blk ptr; + } else { + break :blk try self.allocRegOrMem(&inst.base); + } + }; + switch (ptr) { + .none => unreachable, + .unreach => unreachable, + .dead => unreachable, + .compare_flags_unsigned => unreachable, + .compare_flags_signed => unreachable, + .immediate => |imm| try self.setRegOrMem(inst.base.src, elem_ty, dst_mcv, .{ .memory = imm }), + .ptr_stack_offset => |off| try self.setRegOrMem(inst.base.src, elem_ty, dst_mcv, .{ .stack_offset = off }), + .ptr_embedded_in_code => |off| { + try self.setRegOrMem(inst.base.src, elem_ty, dst_mcv, .{ .embedded_in_code = off }); + }, + .embedded_in_code => { + return self.fail(inst.base.src, "TODO implement loading from MCValue.embedded_in_code", .{}); + }, + .register => { + return self.fail(inst.base.src, "TODO implement loading from MCValue.register", .{}); + }, + .memory => { + return self.fail(inst.base.src, "TODO implement loading from MCValue.memory", .{}); + }, + .stack_offset => { + return self.fail(inst.base.src, "TODO implement loading from MCValue.stack_offset", .{}); + }, + } + return dst_mcv; + } + + fn genStore(self: *Self, inst: *ir.Inst.BinOp) !MCValue { + const ptr = try self.resolveInst(inst.lhs); + const value = try self.resolveInst(inst.rhs); + const elem_ty = inst.rhs.ty; + switch (ptr) { + .none => unreachable, + .unreach => unreachable, + .dead => unreachable, + .compare_flags_unsigned => unreachable, + .compare_flags_signed => unreachable, + .immediate => |imm| { + try self.setRegOrMem(inst.base.src, elem_ty, .{ .memory = imm }, value); + }, + .ptr_stack_offset => |off| { + try self.genSetStack(inst.base.src, elem_ty, off, value); + }, + .ptr_embedded_in_code => |off| { + try self.setRegOrMem(inst.base.src, elem_ty, .{ .embedded_in_code = off }, value); + }, + .embedded_in_code => { + return self.fail(inst.base.src, "TODO implement storing to MCValue.embedded_in_code", .{}); + }, + .register => { + return self.fail(inst.base.src, "TODO implement storing to MCValue.register", .{}); + }, + .memory => { + return self.fail(inst.base.src, "TODO implement storing to MCValue.memory", .{}); + }, + .stack_offset => { + return self.fail(inst.base.src, "TODO implement storing to MCValue.stack_offset", .{}); + }, + } + return .none; + } + fn genSub(self: *Self, inst: *ir.Inst.BinOp) !MCValue { // No side effects, so if it's unreferenced, do nothing. if (inst.base.isUnused()) @@ -657,10 +801,14 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .dead, .unreach, .immediate => unreachable, .compare_flags_unsigned => unreachable, .compare_flags_signed => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, .register => |dst_reg| { switch (src_mcv) { .none => unreachable, .dead, .unreach => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, .register => |src_reg| { self.rex(.{ .b = dst_reg.isExtended(), .r = src_reg.isExtended(), .w = dst_reg.size() == 64 }); self.code.appendSliceAssumeCapacity(&[_]u8{ mr + 0x1, 0xC0 | (@as(u8, src_reg.id() & 0b111) << 3) | @as(u8, dst_reg.id() & 0b111) }); @@ -743,6 +891,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { for (info.args) |mc_arg, arg_i| { const arg = inst.args[arg_i]; const arg_mcv = try self.resolveInst(inst.args[arg_i]); + // Here we do not use setRegOrMem even though the logic is similar, because + // the function call will move the stack pointer, so the offsets are different. switch (mc_arg) { .none => continue, .register => |reg| { @@ -754,6 +904,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // mov qword ptr [rsp + stack_offset], x return self.fail(inst.base.src, "TODO implement calling with parameters in memory", .{}); }, + .ptr_stack_offset => { + return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_stack_offset", .{}); + }, + .ptr_embedded_in_code => { + return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_embedded_in_code", .{}); + }, .immediate => unreachable, .unreach => unreachable, .dead => unreachable, @@ -788,8 +944,34 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return info.return_value; } + fn genRef(self: *Self, inst: *ir.Inst.UnOp) !MCValue { + const operand = try self.resolveInst(inst.operand); + switch (operand) { + .unreach => unreachable, + .dead => unreachable, + .none => return .none, + + .immediate, + .register, + .ptr_stack_offset, + .ptr_embedded_in_code, + .compare_flags_unsigned, + .compare_flags_signed, + => { + const stack_offset = try self.allocMemPtr(&inst.base); + try self.genSetStack(inst.base.src, inst.operand.ty, stack_offset, operand); + return MCValue{ .ptr_stack_offset = stack_offset }; + }, + + .stack_offset => |offset| return MCValue{ .ptr_stack_offset = offset }, + .embedded_in_code => |offset| return MCValue{ .ptr_embedded_in_code = offset }, + .memory => |vaddr| return MCValue{ .immediate = vaddr }, + } + } + fn ret(self: *Self, src: usize, mcv: MCValue) !MCValue { - try self.setRegOrStack(src, self.ret_mcv, mcv); + const ret_ty = self.fn_type.fnReturnType(); + try self.setRegOrMem(src, ret_ty, self.ret_mcv, mcv); switch (arch) { .i386 => { try self.code.append(0xc3); // ret @@ -1042,21 +1224,74 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } /// Sets the value without any modifications to register allocation metadata or stack allocation metadata. - fn setRegOrStack(self: *Self, src: usize, loc: MCValue, val: MCValue) !void { + fn setRegOrMem(self: *Self, src: usize, ty: Type, loc: MCValue, val: MCValue) !void { switch (loc) { .none => return, .register => |reg| return self.genSetReg(src, reg, val), - .stack_offset => { - return self.fail(src, "TODO implement setRegOrStack for stack offset", .{}); + .stack_offset => |off| return self.genSetStack(src, ty, off, val), + .memory => { + return self.fail(src, "TODO implement setRegOrMem for memory", .{}); }, else => unreachable, } } - fn genSetReg(self: *Self, src: usize, reg: Register, mcv: MCValue) error{ CodegenFail, OutOfMemory }!void { + fn genSetStack(self: *Self, src: usize, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void { + switch (arch) { + .x86_64 => switch (mcv) { + .dead => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .unreach, .none => return, // Nothing to do. + .compare_flags_unsigned => |op| { + return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{}); + }, + .compare_flags_signed => |op| { + return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{}); + }, + .immediate => |x_big| { + try self.code.ensureCapacity(self.code.items.len + 7); + if (x_big <= math.maxInt(u32)) { + const x = @intCast(u32, x_big); + if (stack_offset > 128) { + return self.fail(src, "TODO implement set stack variable with large stack offset", .{}); + } + // We have a positive stack offset value but we want a twos complement negative + // offset from rbp, which is at the top of the stack frame. + const negative_offset = @intCast(i8, -@intCast(i32, stack_offset)); + const twos_comp = @bitCast(u8, negative_offset); + // mov DWORD PTR [rbp+offset], immediate + self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); + } else { + return self.fail(src, "TODO implement set stack variable with large immediate", .{}); + } + }, + .embedded_in_code => |code_offset| { + return self.fail(src, "TODO implement set stack variable from embedded_in_code", .{}); + }, + .register => |reg| { + return self.fail(src, "TODO implement set stack variable from register", .{}); + }, + .memory => |vaddr| { + return self.fail(src, "TODO implement set stack variable from memory vaddr", .{}); + }, + .stack_offset => |off| { + if (stack_offset == off) + return; // Copy stack variable to itself; nothing to do. + return self.fail(src, "TODO implement copy stack variable to stack variable", .{}); + }, + }, + else => return self.fail(src, "TODO implement getSetStack for {}", .{self.target.cpu.arch}), + } + } + + fn genSetReg(self: *Self, src: usize, reg: Register, mcv: MCValue) InnerError!void { switch (arch) { .x86_64 => switch (mcv) { .dead => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, .unreach, .none => return, // Nothing to do. .compare_flags_unsigned => |op| { try self.code.ensureCapacity(self.code.items.len + 3); @@ -1279,24 +1514,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } - /// Does not "move" the instruction. - fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue { - const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; - try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); - try branch.inst_table.ensureCapacity(self.gpa, branch.inst_table.items().len + 1); - - const free_index = @ctz(FreeRegInt, branch.free_registers); - if (free_index >= callee_preserved_regs.len) - return self.fail(inst.src, "TODO implement spilling register to stack", .{}); - branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index); - const reg = callee_preserved_regs[free_index]; - branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst }); - const old_mcv = branch.inst_table.get(inst).?; - const new_mcv: MCValue = .{ .register = reg }; - try self.genSetReg(inst.src, reg, old_mcv); - return new_mcv; - } - /// If the MCValue is an immediate, and it does not fit within this type, /// we put it in a register. /// A potential opportunity for future optimization here would be keeping track -- cgit v1.2.3 From cb3e8e323dc6846e16e868fcfb4d4bf135f56f48 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 28 Jul 2020 19:11:23 -0700 Subject: stage2: x86_64 codegen for movs to/from stack variables --- src-self-hosted/codegen.zig | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'src-self-hosted/codegen.zig') diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 777c9ee5f2..51a59596c4 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -1271,7 +1271,25 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.fail(src, "TODO implement set stack variable from embedded_in_code", .{}); }, .register => |reg| { - return self.fail(src, "TODO implement set stack variable from register", .{}); + try self.code.ensureCapacity(self.code.items.len + 7); + self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() }); + const reg_id: u8 = @truncate(u3, reg.id()); + if (stack_offset <= 128) { + // example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx + const RM = @as(u8, 0b01_101_000) | reg_id; + const negative_offset = @intCast(i8, -@intCast(i32, stack_offset)); + const twos_comp = @bitCast(u8, negative_offset); + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM, twos_comp }); + } else if (stack_offset <= 2147483648) { + // example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx + const RM = @as(u8, 0b10_101_000) | reg_id; + const negative_offset = @intCast(i32, -@intCast(i33, stack_offset)); + const twos_comp = @bitCast(u32, negative_offset); + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM }); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp); + } else { + return self.fail(src, "stack offset too large", .{}); + } }, .memory => |vaddr| { return self.fail(src, "TODO implement set stack variable from memory vaddr", .{}); @@ -1475,7 +1493,28 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } }, .stack_offset => |off| { - return self.fail(src, "TODO implement genSetReg for stack variables", .{}); + if (reg.size() != 64) { + return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); + } + try self.code.ensureCapacity(self.code.items.len + 7); + self.rex(.{ .w = true, .r = reg.isExtended() }); + const reg_id: u8 = @truncate(u3, reg.id()); + if (off <= 128) { + // Example: 48 8b 4d 7f mov rcx,QWORD PTR [rbp+0x7f] + const RM = @as(u8, 0b01_000_101) | (reg_id << 3); + const negative_offset = @intCast(i8, -@intCast(i32, off)); + const twos_comp = @bitCast(u8, negative_offset); + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8b, RM, twos_comp }); + } else if (off <= 2147483648) { + // Example: 48 8b 8d 80 00 00 00 mov rcx,QWORD PTR [rbp+0x80] + const RM = @as(u8, 0b10_000_101) | (reg_id << 3); + const negative_offset = @intCast(i32, -@intCast(i33, off)); + const twos_comp = @bitCast(u32, negative_offset); + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8b, RM }); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp); + } else { + return self.fail(src, "stack offset too large", .{}); + } }, }, else => return self.fail(src, "TODO implement getSetReg for {}", .{self.target.cpu.arch}), -- cgit v1.2.3 From 4beff80b2f30ad85f2127b1281053b8b25b0cc33 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 28 Jul 2020 21:57:13 -0700 Subject: stage2: codegen handles undefined values * `optimize_mode` is passed to `link.File` and stored there * improve the debugging function `Module.dumpInst` * get rid of `Value.the_one_possible_value` in favor of a few more specific values for different types. This is less buggy, one less footgun. * `Type.onePossibleValue` now returns a `?Value` instead of `bool`. * codegen handles undefined values. `undef` is a new `MCValue` tag. It uses 0xaa values depending on optimization mode. However optimization mode does not yet support scope overrides. * link.zig: move the `Options` field from `File.Elf` and `File.C` to the base struct. - fix the Tag enum to adhere to style conventions * ZIR now supports emitting undefined values. * Fix the logic of comptime math to properly compare against zero using the `compareWithZero` function. --- src-self-hosted/Module.zig | 62 +++++++++++++++------- src-self-hosted/codegen.zig | 58 ++++++++++++++++++--- src-self-hosted/ir.zig | 3 +- src-self-hosted/link.zig | 120 +++++++++++++++++++++++-------------------- src-self-hosted/type.zig | 33 ++++++++---- src-self-hosted/value.zig | 72 ++++++++++++++++++-------- src-self-hosted/zir.zig | 17 +++++- src-self-hosted/zir_sema.zig | 3 +- 8 files changed, 248 insertions(+), 120 deletions(-) (limited to 'src-self-hosted/codegen.zig') diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index 426b28488f..864cd66d32 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -47,7 +47,6 @@ export_owners: std.AutoHashMapUnmanaged(*Decl, []*Export) = .{}, /// Maps fully qualified namespaced names to the Decl struct for them. decl_table: std.HashMapUnmanaged(Scope.NameHash, *Decl, Scope.name_hash_hash, Scope.name_hash_eql, false) = .{}, -optimize_mode: std.builtin.Mode, link_error_flags: link.File.ErrorFlags = .{}, work_queue: std.fifo.LinearFifo(WorkItem, .Dynamic), @@ -385,18 +384,6 @@ pub const Scope = struct { }; } - pub fn dumpInst(self: *Scope, inst: *Inst) void { - const zir_module = self.namespace(); - const loc = std.zig.findLineColumn(zir_module.source.bytes, inst.src); - std.debug.warn("{}:{}:{}: {}: ty={}\n", .{ - zir_module.sub_file_path, - loc.line + 1, - loc.column + 1, - @tagName(inst.tag), - inst.ty, - }); - } - /// Asserts the scope has a parent which is a ZIRModule or File and /// returns the sub_file_path field. pub fn subFilePath(base: *Scope) []const u8 { @@ -802,6 +789,7 @@ pub fn init(gpa: *Allocator, options: InitOptions) !Module { .output_mode = options.output_mode, .link_mode = options.link_mode orelse .Static, .object_format = options.object_format orelse options.target.getObjectFormat(), + .optimize_mode = options.optimize_mode, }); errdefer bin_file.destroy(); @@ -838,7 +826,6 @@ pub fn init(gpa: *Allocator, options: InitOptions) !Module { .bin_file_dir = bin_file_dir, .bin_file_path = options.bin_file_path, .bin_file = bin_file, - .optimize_mode = options.optimize_mode, .work_queue = std.fifo.LinearFifo(WorkItem, .Dynamic).init(gpa), .keep_source_files_loaded = options.keep_source_files_loaded, }; @@ -894,7 +881,11 @@ fn freeExportList(gpa: *Allocator, export_list: []*Export) void { } pub fn target(self: Module) std.Target { - return self.bin_file.options().target; + return self.bin_file.options.target; +} + +pub fn optimizeMode(self: Module) std.builtin.Mode { + return self.bin_file.options.optimize_mode; } /// Detect changes to source files, perform semantic analysis, and update the output files. @@ -1991,14 +1982,14 @@ pub fn constType(self: *Module, scope: *Scope, src: usize, ty: Type) !*Inst { pub fn constVoid(self: *Module, scope: *Scope, src: usize) !*Inst { return self.constInst(scope, src, .{ .ty = Type.initTag(.void), - .val = Value.initTag(.the_one_possible_value), + .val = Value.initTag(.void_value), }); } pub fn constNoReturn(self: *Module, scope: *Scope, src: usize) !*Inst { return self.constInst(scope, src, .{ .ty = Type.initTag(.noreturn), - .val = Value.initTag(.the_one_possible_value), + .val = Value.initTag(.unreachable_value), }); } @@ -2162,7 +2153,8 @@ pub fn analyzeDeclRefByName(self: *Module, scope: *Scope, src: usize, decl_name: } pub fn wantSafety(self: *Module, scope: *Scope) bool { - return switch (self.optimize_mode) { + // TODO take into account scope's safety overrides + return switch (self.optimizeMode()) { .Debug => true, .ReleaseSafe => true, .ReleaseFast => false, @@ -2511,7 +2503,7 @@ pub fn storePtr(self: *Module, scope: *Scope, src: usize, ptr: *Inst, uncasted_v const elem_ty = ptr.ty.elemType(); const value = try self.coerce(scope, elem_ty, uncasted_value); - if (elem_ty.onePossibleValue()) + if (elem_ty.onePossibleValue() != null) return self.constVoid(scope, src); // TODO handle comptime pointer writes @@ -2803,3 +2795,35 @@ pub fn singleConstPtrType(self: *Module, scope: *Scope, src: usize, elem_ty: Typ type_payload.* = .{ .pointee_type = elem_ty }; return Type.initPayload(&type_payload.base); } + +pub fn dumpInst(self: *Module, scope: *Scope, inst: *Inst) void { + const zir_module = scope.namespace(); + const source = zir_module.getSource(self) catch @panic("dumpInst failed to get source"); + const loc = std.zig.findLineColumn(source, inst.src); + if (inst.tag == .constant) { + std.debug.warn("constant ty={} val={} src={}:{}:{}\n", .{ + inst.ty, + inst.castTag(.constant).?.val, + zir_module.subFilePath(), + loc.line + 1, + loc.column + 1, + }); + } else if (inst.deaths == 0) { + std.debug.warn("{} ty={} src={}:{}:{}\n", .{ + @tagName(inst.tag), + inst.ty, + zir_module.subFilePath(), + loc.line + 1, + loc.column + 1, + }); + } else { + std.debug.warn("{} ty={} deaths={b} src={}:{}:{}\n", .{ + @tagName(inst.tag), + inst.ty, + inst.deaths, + zir_module.subFilePath(), + loc.line + 1, + loc.column + 1, + }); + } +} diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 51a59596c4..40fb6c5407 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -50,7 +50,7 @@ pub fn generateSymbol( switch (typed_value.ty.zigTypeTag()) { .Fn => { - switch (bin_file.options.target.cpu.arch) { + switch (bin_file.base.options.target.cpu.arch) { //.arm => return Function(.arm).generateSymbol(bin_file, src, typed_value, code), //.armeb => return Function(.armeb).generateSymbol(bin_file, src, typed_value, code), //.aarch64 => return Function(.aarch64).generateSymbol(bin_file, src, typed_value, code), @@ -143,7 +143,7 @@ pub fn generateSymbol( // TODO handle the dependency of this symbol on the decl's vaddr. // If the decl changes vaddr, then this symbol needs to get regenerated. const vaddr = bin_file.local_symbols.items[decl.link.local_sym_index].st_value; - const endian = bin_file.options.target.cpu.arch.endian(); + const endian = bin_file.base.options.target.cpu.arch.endian(); switch (bin_file.ptr_width) { .p32 => { try code.resize(4); @@ -166,7 +166,7 @@ pub fn generateSymbol( }; }, .Int => { - const info = typed_value.ty.intInfo(bin_file.options.target); + const info = typed_value.ty.intInfo(bin_file.base.options.target); if (info.bits == 8 and !info.signed) { const x = typed_value.val.toUnsignedInt(); try code.append(@intCast(u8, x)); @@ -230,6 +230,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { unreach, /// No more references to this value remain. dead, + /// The value is undefined. + undef, /// A pointer-sized integer that fits in a register. /// If the type is a pointer, this is the pointer address in virtual address space. immediate: u64, @@ -282,6 +284,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .compare_flags_signed, .ptr_stack_offset, .ptr_embedded_in_code, + .undef, => false, .register, @@ -360,7 +363,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { var function = Self{ .gpa = bin_file.allocator, - .target = &bin_file.options.target, + .target = &bin_file.base.options.target, .bin_file = bin_file, .mod_fn = module_fn, .code = code, @@ -656,6 +659,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }; switch (ptr) { .none => unreachable, + .undef => unreachable, .unreach => unreachable, .dead => unreachable, .compare_flags_unsigned => unreachable, @@ -687,6 +691,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const elem_ty = inst.rhs.ty; switch (ptr) { .none => unreachable, + .undef => unreachable, .unreach => unreachable, .dead => unreachable, .compare_flags_unsigned => unreachable, @@ -798,6 +803,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { fn genX8664BinMathCode(self: *Self, src: usize, dst_mcv: MCValue, src_mcv: MCValue, opx: u8, mr: u8) !void { switch (dst_mcv) { .none => unreachable, + .undef => unreachable, .dead, .unreach, .immediate => unreachable, .compare_flags_unsigned => unreachable, .compare_flags_signed => unreachable, @@ -806,6 +812,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .register => |dst_reg| { switch (src_mcv) { .none => unreachable, + .undef => try self.genSetReg(src, dst_reg, .undef), .dead, .unreach => unreachable, .ptr_stack_offset => unreachable, .ptr_embedded_in_code => unreachable, @@ -905,11 +912,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.fail(inst.base.src, "TODO implement calling with parameters in memory", .{}); }, .ptr_stack_offset => { - return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_stack_offset", .{}); + return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_stack_offset arg", .{}); }, .ptr_embedded_in_code => { - return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_embedded_in_code", .{}); + return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_embedded_in_code arg", .{}); }, + .undef => unreachable, .immediate => unreachable, .unreach => unreachable, .dead => unreachable, @@ -966,6 +974,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .stack_offset => |offset| return MCValue{ .ptr_stack_offset = offset }, .embedded_in_code => |offset| return MCValue{ .ptr_embedded_in_code = offset }, .memory => |vaddr| return MCValue{ .immediate = vaddr }, + + .undef => return self.fail(inst.base.src, "TODO implement ref on an undefined value", .{}), } } @@ -1243,6 +1253,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .ptr_stack_offset => unreachable, .ptr_embedded_in_code => unreachable, .unreach, .none => return, // Nothing to do. + .undef => { + if (!self.wantSafety()) + return; // The already existing value will do just fine. + // TODO Upgrade this to a memset call when we have that available. + return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa }); + }, .compare_flags_unsigned => |op| { return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{}); }, @@ -1250,6 +1266,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{}); }, .immediate => |x_big| { + if (ty.abiSize(self.target.*) != 4) { + // TODO after fixing this, need to update the undef case above + return self.fail(src, "TODO implement set non 4 abi size stack variable with immediate", .{}); + } try self.code.ensureCapacity(self.code.items.len + 7); if (x_big <= math.maxInt(u32)) { const x = @intCast(u32, x_big); @@ -1311,6 +1331,18 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .ptr_stack_offset => unreachable, .ptr_embedded_in_code => unreachable, .unreach, .none => return, // Nothing to do. + .undef => { + if (!self.wantSafety()) + return; // The already existing value will do just fine. + // Write the debug undefined value. + switch (reg.size()) { + 8 => return self.genSetReg(src, reg, .{ .immediate = 0xaa }), + 16 => return self.genSetReg(src, reg, .{ .immediate = 0xaaaa }), + 32 => return self.genSetReg(src, reg, .{ .immediate = 0xaaaaaaaa }), + 64 => return self.genSetReg(src, reg, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), + else => unreachable, + } + }, .compare_flags_unsigned => |op| { try self.code.ensureCapacity(self.code.items.len + 3); self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 }); @@ -1471,7 +1503,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // is no way to possibly encode it. This means that RSP, RBP, R12, and R13 cannot be used with // this instruction. const id3 = @truncate(u3, reg.id()); - std.debug.assert(id3 != 4 and id3 != 5); + assert(id3 != 4 and id3 != 5); // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue. try self.genSetReg(src, reg, MCValue{ .immediate = x }); @@ -1580,6 +1612,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } fn genTypedValue(self: *Self, src: usize, typed_value: TypedValue) !MCValue { + if (typed_value.val.isUndef()) + return MCValue.undef; const ptr_bits = self.target.cpu.arch.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); switch (typed_value.ty.zigTypeTag()) { @@ -1691,6 +1725,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return result; } + /// TODO support scope overrides. Also note this logic is duplicated with `Module.wantSafety`. + fn wantSafety(self: *Self) bool { + return switch (self.bin_file.base.options.optimize_mode) { + .Debug => true, + .ReleaseSafe => true, + .ReleaseFast => false, + .ReleaseSmall => false, + }; + } + fn fail(self: *Self, src: usize, comptime format: []const u8, args: anytype) error{ CodegenFail, OutOfMemory } { @setCold(true); assert(self.err_msg == null); diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index 3965a2ea93..deb0a91cec 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -165,8 +165,7 @@ pub const Inst = struct { /// Returns `null` if runtime-known. pub fn value(base: *Inst) ?Value { - if (base.ty.onePossibleValue()) - return Value.initTag(.the_one_possible_value); + if (base.ty.onePossibleValue()) |opv| return opv; const inst = base.cast(Constant) orelse return null; return inst.val; diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index cde91cdc01..7cd6876cb2 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -16,6 +16,7 @@ pub const Options = struct { output_mode: std.builtin.OutputMode, link_mode: std.builtin.LinkMode, object_format: std.builtin.ObjectFormat, + optimize_mode: std.builtin.Mode, /// Used for calculating how much space to reserve for symbols in case the binary file /// does not already have a symbol table. symbol_count_hint: u64 = 32, @@ -66,6 +67,7 @@ pub fn writeFilePath( .link_mode = module.link_mode, .object_format = module.object_format, .symbol_count_hint = module.decls.items.len, + .optimize_mode = module.optimize_mode, }; const af = try dir.atomicFile(sub_path, .{ .mode = determineMode(options) }); defer af.deinit(); @@ -88,9 +90,12 @@ pub fn writeFilePath( fn openCFile(allocator: *Allocator, file: fs.File, options: Options) !File.C { return File.C{ + .base = .{ + .tag = .c, + .options = options, + }, .allocator = allocator, .file = file, - .options = options, .main = std.ArrayList(u8).init(allocator), .header = std.ArrayList(u8).init(allocator), .constants = std.ArrayList(u8).init(allocator), @@ -114,6 +119,8 @@ pub fn openBinFile(allocator: *Allocator, file: fs.File, options: Options) !File pub const File = struct { tag: Tag, + options: Options, + pub fn cast(base: *File, comptime T: type) ?*T { if (base.tag != T.base_tag) return null; @@ -123,47 +130,47 @@ pub const File = struct { pub fn makeWritable(base: *File, dir: fs.Dir, sub_path: []const u8) !void { switch (base.tag) { - .Elf => return @fieldParentPtr(Elf, "base", base).makeWritable(dir, sub_path), - .C => {}, + .elf => return @fieldParentPtr(Elf, "base", base).makeWritable(dir, sub_path), + .c => {}, } } pub fn makeExecutable(base: *File) !void { switch (base.tag) { - .Elf => return @fieldParentPtr(Elf, "base", base).makeExecutable(), - .C => unreachable, + .elf => return @fieldParentPtr(Elf, "base", base).makeExecutable(), + .c => unreachable, } } pub fn updateDecl(base: *File, module: *Module, decl: *Module.Decl) !void { switch (base.tag) { - .Elf => return @fieldParentPtr(Elf, "base", base).updateDecl(module, decl), - .C => return @fieldParentPtr(C, "base", base).updateDecl(module, decl), + .elf => return @fieldParentPtr(Elf, "base", base).updateDecl(module, decl), + .c => return @fieldParentPtr(C, "base", base).updateDecl(module, decl), } } pub fn allocateDeclIndexes(base: *File, decl: *Module.Decl) !void { switch (base.tag) { - .Elf => return @fieldParentPtr(Elf, "base", base).allocateDeclIndexes(decl), - .C => {}, + .elf => return @fieldParentPtr(Elf, "base", base).allocateDeclIndexes(decl), + .c => {}, } } pub fn deinit(base: *File) void { switch (base.tag) { - .Elf => @fieldParentPtr(Elf, "base", base).deinit(), - .C => @fieldParentPtr(C, "base", base).deinit(), + .elf => @fieldParentPtr(Elf, "base", base).deinit(), + .c => @fieldParentPtr(C, "base", base).deinit(), } } pub fn destroy(base: *File) void { switch (base.tag) { - .Elf => { + .elf => { const parent = @fieldParentPtr(Elf, "base", base); parent.deinit(); parent.allocator.destroy(parent); }, - .C => { + .c => { const parent = @fieldParentPtr(C, "base", base); parent.deinit(); parent.allocator.destroy(parent); @@ -173,29 +180,22 @@ pub const File = struct { pub fn flush(base: *File) !void { try switch (base.tag) { - .Elf => @fieldParentPtr(Elf, "base", base).flush(), - .C => @fieldParentPtr(C, "base", base).flush(), + .elf => @fieldParentPtr(Elf, "base", base).flush(), + .c => @fieldParentPtr(C, "base", base).flush(), }; } pub fn freeDecl(base: *File, decl: *Module.Decl) void { switch (base.tag) { - .Elf => @fieldParentPtr(Elf, "base", base).freeDecl(decl), - .C => unreachable, + .elf => @fieldParentPtr(Elf, "base", base).freeDecl(decl), + .c => unreachable, } } pub fn errorFlags(base: *File) ErrorFlags { return switch (base.tag) { - .Elf => @fieldParentPtr(Elf, "base", base).error_flags, - .C => return .{ .no_entry_point_found = false }, - }; - } - - pub fn options(base: *File) Options { - return switch (base.tag) { - .Elf => @fieldParentPtr(Elf, "base", base).options, - .C => @fieldParentPtr(C, "base", base).options, + .elf => @fieldParentPtr(Elf, "base", base).error_flags, + .c => return .{ .no_entry_point_found = false }, }; } @@ -207,14 +207,14 @@ pub const File = struct { exports: []const *Module.Export, ) !void { switch (base.tag) { - .Elf => return @fieldParentPtr(Elf, "base", base).updateDeclExports(module, decl, exports), - .C => return {}, + .elf => return @fieldParentPtr(Elf, "base", base).updateDeclExports(module, decl, exports), + .c => return {}, } } pub const Tag = enum { - Elf, - C, + elf, + c, }; pub const ErrorFlags = struct { @@ -222,15 +222,15 @@ pub const File = struct { }; pub const C = struct { - pub const base_tag: Tag = .C; - base: File = File{ .tag = base_tag }, + pub const base_tag: Tag = .c; + + base: File, allocator: *Allocator, header: std.ArrayList(u8), constants: std.ArrayList(u8), main: std.ArrayList(u8), file: ?fs.File, - options: Options, called: std.StringHashMap(void), need_stddef: bool = false, need_stdint: bool = false, @@ -294,13 +294,13 @@ pub const File = struct { }; pub const Elf = struct { - pub const base_tag: Tag = .Elf; - base: File = File{ .tag = base_tag }, + pub const base_tag: Tag = .elf; + + base: File, allocator: *Allocator, file: ?fs.File, owns_file_handle: bool, - options: Options, ptr_width: enum { p32, p64 }, /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. @@ -460,13 +460,13 @@ pub const File = struct { self.file = try dir.createFile(sub_path, .{ .truncate = false, .read = true, - .mode = determineMode(self.options), + .mode = determineMode(self.base.options), }); } /// Returns end pos of collision, if any. fn detectAllocCollision(self: *Elf, start: u64, size: u64) ?u64 { - const small_ptr = self.options.target.cpu.arch.ptrBitWidth() == 32; + const small_ptr = self.base.options.target.cpu.arch.ptrBitWidth() == 32; const ehdr_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Ehdr) else @sizeOf(elf.Elf64_Ehdr); if (start < ehdr_size) return ehdr_size; @@ -569,7 +569,7 @@ pub const File = struct { }; if (self.phdr_load_re_index == null) { self.phdr_load_re_index = @intCast(u16, self.program_headers.items.len); - const file_size = self.options.program_code_size_hint; + const file_size = self.base.options.program_code_size_hint; const p_align = 0x1000; const off = self.findFreeSpace(file_size, p_align); std.log.debug(.link, "found PT_LOAD free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); @@ -588,7 +588,7 @@ pub const File = struct { } if (self.phdr_got_index == null) { self.phdr_got_index = @intCast(u16, self.program_headers.items.len); - const file_size = @as(u64, ptr_size) * self.options.symbol_count_hint; + const file_size = @as(u64, ptr_size) * self.base.options.symbol_count_hint; // We really only need ptr alignment but since we are using PROGBITS, linux requires // page align. const p_align = 0x1000; @@ -671,7 +671,7 @@ pub const File = struct { self.symtab_section_index = @intCast(u16, self.sections.items.len); const min_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); const each_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym); - const file_size = self.options.symbol_count_hint * each_size; + const file_size = self.base.options.symbol_count_hint * each_size; const off = self.findFreeSpace(file_size, min_align); std.log.debug(.link, "found symtab free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); @@ -726,7 +726,7 @@ pub const File = struct { /// Commit pending changes and write headers. pub fn flush(self: *Elf) !void { - const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); // Unfortunately these have to be buffered and done at the end because ELF does not allow // mixing local and global symbols within a symbol table. @@ -845,7 +845,7 @@ pub const File = struct { } self.shdr_table_dirty = false; } - if (self.entry_addr == null and self.options.output_mode == .Exe) { + if (self.entry_addr == null and self.base.options.output_mode == .Exe) { std.log.debug(.link, "no_entry_point_found = true\n", .{}); self.error_flags.no_entry_point_found = true; } else { @@ -875,7 +875,7 @@ pub const File = struct { }; index += 1; - const endian = self.options.target.cpu.arch.endian(); + const endian = self.base.options.target.cpu.arch.endian(); hdr_buf[index] = switch (endian) { .Little => elf.ELFDATA2LSB, .Big => elf.ELFDATA2MSB, @@ -893,10 +893,10 @@ pub const File = struct { assert(index == 16); - const elf_type = switch (self.options.output_mode) { + const elf_type = switch (self.base.options.output_mode) { .Exe => elf.ET.EXEC, .Obj => elf.ET.REL, - .Lib => switch (self.options.link_mode) { + .Lib => switch (self.base.options.link_mode) { .Static => elf.ET.REL, .Dynamic => elf.ET.DYN, }, @@ -904,7 +904,7 @@ pub const File = struct { mem.writeInt(u16, hdr_buf[index..][0..2], @enumToInt(elf_type), endian); index += 2; - const machine = self.options.target.cpu.arch.toElfMachine(); + const machine = self.base.options.target.cpu.arch.toElfMachine(); mem.writeInt(u16, hdr_buf[index..][0..2], @enumToInt(machine), endian); index += 2; @@ -1216,7 +1216,7 @@ pub const File = struct { }, }; - const required_alignment = typed_value.ty.abiAlignment(self.options.target); + const required_alignment = typed_value.ty.abiAlignment(self.base.options.target); const stt_bits: u8 = switch (typed_value.ty.zigTypeTag()) { .Fn => elf.STT_FUNC, @@ -1361,9 +1361,9 @@ pub const File = struct { } fn writeProgHeader(self: *Elf, index: usize) !void { - const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); const offset = self.program_headers.items[index].p_offset; - switch (self.options.target.cpu.arch.ptrBitWidth()) { + switch (self.base.options.target.cpu.arch.ptrBitWidth()) { 32 => { var phdr = [1]elf.Elf32_Phdr{progHeaderTo32(self.program_headers.items[index])}; if (foreign_endian) { @@ -1383,9 +1383,9 @@ pub const File = struct { } fn writeSectHeader(self: *Elf, index: usize) !void { - const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); const offset = self.sections.items[index].sh_offset; - switch (self.options.target.cpu.arch.ptrBitWidth()) { + switch (self.base.options.target.cpu.arch.ptrBitWidth()) { 32 => { var shdr: [1]elf.Elf32_Shdr = undefined; shdr[0] = sectHeaderTo32(self.sections.items[index]); @@ -1433,7 +1433,7 @@ pub const File = struct { self.offset_table_count_dirty = false; } - const endian = self.options.target.cpu.arch.endian(); + const endian = self.base.options.target.cpu.arch.endian(); const off = shdr.sh_offset + @as(u64, entry_size) * index; switch (self.ptr_width) { .p32 => { @@ -1475,7 +1475,7 @@ pub const File = struct { syms_sect.sh_size = needed_size; // anticipating adding the global symbols later self.shdr_table_dirty = true; // TODO look into only writing one section } - const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); switch (self.ptr_width) { .p32 => { var sym = [1]elf.Elf32_Sym{ @@ -1511,7 +1511,7 @@ pub const File = struct { .p32 => @sizeOf(elf.Elf32_Sym), .p64 => @sizeOf(elf.Elf64_Sym), }; - const foreign_endian = self.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); + const foreign_endian = self.base.options.target.cpu.arch.endian() != std.Target.current.cpu.arch.endian(); const global_syms_off = syms_sect.sh_offset + self.local_symbols.items.len * sym_size; switch (self.ptr_width) { .p32 => { @@ -1577,9 +1577,12 @@ pub fn createElfFile(allocator: *Allocator, file: fs.File, options: Options) !Fi } var self: File.Elf = .{ + .base = .{ + .tag = .elf, + .options = options, + }, .allocator = allocator, .file = file, - .options = options, .ptr_width = switch (options.target.cpu.arch.ptrBitWidth()) { 32 => .p32, 64 => .p64, @@ -1637,10 +1640,13 @@ fn openBinFileInner(allocator: *Allocator, file: fs.File, options: Options) !Fil .raw => return error.IncrFailed, } var self: File.Elf = .{ + .base = .{ + .tag = .elf, + .options = options, + }, .allocator = allocator, .file = file, .owns_file_handle = false, - .options = options, .ptr_width = switch (options.target.cpu.arch.ptrBitWidth()) { 32 => .p32, 64 => .p64, diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index 729292f6ab..457a69ac6d 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -1653,7 +1653,7 @@ pub const Type = extern union { }; } - pub fn onePossibleValue(self: Type) bool { + pub fn onePossibleValue(self: Type) ?Value { var ty = self; while (true) switch (ty.tag()) { .f16, @@ -1692,21 +1692,32 @@ pub const Type = extern union { .single_const_pointer_to_comptime_int, .array_u8_sentinel_0, .const_slice_u8, - => return false, - .c_void, - .void, - .noreturn, - .@"null", - .@"undefined", - => return true, + => return null, + + .void => return Value.initTag(.void_value), + .noreturn => return Value.initTag(.unreachable_value), + .@"null" => return Value.initTag(.null_value), + .@"undefined" => return Value.initTag(.undef), - .int_unsigned => return ty.cast(Payload.IntUnsigned).?.bits == 0, - .int_signed => return ty.cast(Payload.IntSigned).?.bits == 0, + .int_unsigned => { + if (ty.cast(Payload.IntUnsigned).?.bits == 0) { + return Value.initTag(.zero); + } else { + return null; + } + }, + .int_signed => { + if (ty.cast(Payload.IntSigned).?.bits == 0) { + return Value.initTag(.zero); + } else { + return null; + } + }, .array => { const array = ty.cast(Payload.Array).?; if (array.len == 0) - return true; + return Value.initTag(.empty_array); ty = array.elem_type; continue; }, diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index 881602d76a..eff7c95be7 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -63,7 +63,9 @@ pub const Value = extern union { undef, zero, - the_one_possible_value, // when the type only has one possible value + void_value, + unreachable_value, + empty_array, null_value, bool_true, bool_false, // See last_no_payload_tag below. @@ -164,7 +166,9 @@ pub const Value = extern union { .const_slice_u8_type, .undef, .zero, - .the_one_possible_value, + .void_value, + .unreachable_value, + .empty_array, .null_value, .bool_true, .bool_false, @@ -285,7 +289,8 @@ pub const Value = extern union { .null_value => return out_stream.writeAll("null"), .undef => return out_stream.writeAll("undefined"), .zero => return out_stream.writeAll("0"), - .the_one_possible_value => return out_stream.writeAll("(one possible value)"), + .void_value => return out_stream.writeAll("{}"), + .unreachable_value => return out_stream.writeAll("unreachable"), .bool_true => return out_stream.writeAll("true"), .bool_false => return out_stream.writeAll("false"), .ty => return val.cast(Payload.Ty).?.ty.format("", options, out_stream), @@ -312,6 +317,7 @@ pub const Value = extern union { try out_stream.print("&[{}] ", .{elem_ptr.index}); val = elem_ptr.array_ptr; }, + .empty_array => return out_stream.writeAll(".{}"), .bytes => return std.zig.renderStringLiteral(self.cast(Payload.Bytes).?.data, out_stream), .repeated => { try out_stream.writeAll("(repeated) "); @@ -388,7 +394,9 @@ pub const Value = extern union { .undef, .zero, - .the_one_possible_value, + .void_value, + .unreachable_value, + .empty_array, .bool_true, .bool_false, .null_value, @@ -460,15 +468,18 @@ pub const Value = extern union { .decl_ref, .elem_ptr, .bytes, - .undef, .repeated, .float_16, .float_32, .float_64, .float_128, + .void_value, + .unreachable_value, + .empty_array, => unreachable, - .the_one_possible_value, // An integer with one possible value is always zero. + .undef => unreachable, + .zero, .bool_false, => return BigIntMutable.init(&space.limbs, 0).toConst(), @@ -532,16 +543,19 @@ pub const Value = extern union { .decl_ref, .elem_ptr, .bytes, - .undef, .repeated, .float_16, .float_32, .float_64, .float_128, + .void_value, + .unreachable_value, + .empty_array, => unreachable, + .undef => unreachable, + .zero, - .the_one_possible_value, // an integer with one possible value is always zero .bool_false, => return 0, @@ -570,7 +584,7 @@ pub const Value = extern union { .float_64 => @floatCast(T, self.cast(Payload.Float_64).?.val), .float_128 => @floatCast(T, self.cast(Payload.Float_128).?.val), - .zero, .the_one_possible_value => 0, + .zero => 0, .int_u64 => @intToFloat(T, self.cast(Payload.Int_u64).?.int), // .int_i64 => @intToFloat(f128, self.cast(Payload.Int_i64).?.int), .int_i64 => @panic("TODO lld: error: undefined symbol: __floatditf"), @@ -637,9 +651,11 @@ pub const Value = extern union { .float_32, .float_64, .float_128, + .void_value, + .unreachable_value, + .empty_array, => unreachable, - .the_one_possible_value, // an integer with one possible value is always zero .zero, .bool_false, => return 0, @@ -714,11 +730,13 @@ pub const Value = extern union { .float_32, .float_64, .float_128, + .void_value, + .unreachable_value, + .empty_array, => unreachable, .zero, .undef, - .the_one_possible_value, // an integer with one possible value is always zero .bool_false, => return true, @@ -797,13 +815,13 @@ pub const Value = extern union { // return Value.initPayload(&res_payload.base).copy(allocator); }, 32 => { - var res_payload = Value.Payload.Float_32{.val = self.toFloat(f32)}; + var res_payload = Value.Payload.Float_32{ .val = self.toFloat(f32) }; if (!self.eql(Value.initPayload(&res_payload.base))) return error.Overflow; return Value.initPayload(&res_payload.base).copy(allocator); }, 64 => { - var res_payload = Value.Payload.Float_64{.val = self.toFloat(f64)}; + var res_payload = Value.Payload.Float_64{ .val = self.toFloat(f64) }; if (!self.eql(Value.initPayload(&res_payload.base))) return error.Overflow; return Value.initPayload(&res_payload.base).copy(allocator); @@ -875,7 +893,9 @@ pub const Value = extern union { .int_i64, .int_big_positive, .int_big_negative, - .the_one_possible_value, + .empty_array, + .void_value, + .unreachable_value, => unreachable, .zero => false, @@ -939,10 +959,12 @@ pub const Value = extern union { .bytes, .repeated, .undef, + .void_value, + .unreachable_value, + .empty_array, => unreachable, .zero, - .the_one_possible_value, // an integer with one possible value is always zero .bool_false, => .eq, @@ -964,8 +986,8 @@ pub const Value = extern union { pub fn order(lhs: Value, rhs: Value) std.math.Order { const lhs_tag = lhs.tag(); const rhs_tag = rhs.tag(); - const lhs_is_zero = lhs_tag == .zero or lhs_tag == .the_one_possible_value; - const rhs_is_zero = rhs_tag == .zero or rhs_tag == .the_one_possible_value; + const lhs_is_zero = lhs_tag == .zero; + const rhs_is_zero = rhs_tag == .zero; if (lhs_is_zero) return rhs.orderAgainstZero().invert(); if (rhs_is_zero) return lhs.orderAgainstZero(); @@ -1071,9 +1093,11 @@ pub const Value = extern union { .float_32, .float_64, .float_128, + .void_value, + .unreachable_value, + .empty_array, => unreachable, - .the_one_possible_value => Value.initTag(.the_one_possible_value), .ref_val => self.cast(Payload.RefVal).?.val, .decl_ref => self.cast(Payload.DeclRef).?.decl.value(), .elem_ptr => { @@ -1130,7 +1154,6 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .zero, - .the_one_possible_value, .bool_true, .bool_false, .null_value, @@ -1147,8 +1170,12 @@ pub const Value = extern union { .float_32, .float_64, .float_128, + .void_value, + .unreachable_value, => unreachable, + .empty_array => unreachable, // out of bounds array index + .bytes => { const int_payload = try allocator.create(Payload.Int_u64); int_payload.* = .{ .int = self.cast(Payload.Bytes).?.data[index] }; @@ -1175,8 +1202,7 @@ pub const Value = extern union { return self.tag() == .undef; } - /// Valid for all types. Asserts the value is not undefined. - /// `.the_one_possible_value` is reported as not null. + /// Valid for all types. Asserts the value is not undefined and not unreachable. pub fn isNull(self: Value) bool { return switch (self.tag()) { .ty, @@ -1221,7 +1247,7 @@ pub const Value = extern union { .single_const_pointer_to_comptime_int_type, .const_slice_u8_type, .zero, - .the_one_possible_value, + .empty_array, .bool_true, .bool_false, .function, @@ -1238,9 +1264,11 @@ pub const Value = extern union { .float_32, .float_64, .float_128, + .void_value, => false, .undef => unreachable, + .unreachable_value => unreachable, .null_value => true, }; } diff --git a/src-self-hosted/zir.zig b/src-self-hosted/zir.zig index 38751e22a3..318c4bdc8e 100644 --- a/src-self-hosted/zir.zig +++ b/src-self-hosted/zir.zig @@ -742,7 +742,7 @@ pub const Inst = struct { .@"false" => .{ .ty = Type.initTag(.bool), .val = Value.initTag(.bool_false) }, .@"null" => .{ .ty = Type.initTag(.@"null"), .val = Value.initTag(.null_value) }, .@"undefined" => .{ .ty = Type.initTag(.@"undefined"), .val = Value.initTag(.undef) }, - .void_value => .{ .ty = Type.initTag(.void), .val = Value.initTag(.the_one_possible_value) }, + .void_value => .{ .ty = Type.initTag(.void), .val = Value.initTag(.void_value) }, }; } }; @@ -1598,6 +1598,21 @@ const EmitZIR = struct { const decl = decl_ref.decl; return try self.emitUnnamedDecl(try self.emitDeclRef(src, decl)); } + if (typed_value.val.isUndef()) { + const as_inst = try self.arena.allocator.create(Inst.BinOp); + as_inst.* = .{ + .base = .{ + .tag = .as, + .src = src, + }, + .positionals = .{ + .lhs = (try self.emitType(src, typed_value.ty)).inst, + .rhs = (try self.emitPrimitive(src, .@"undefined")).inst, + }, + .kw_args = .{}, + }; + return self.emitUnnamedDecl(&as_inst.base); + } switch (typed_value.ty.zigTypeTag()) { .Pointer => { const ptr_elem_type = typed_value.ty.elemType(); diff --git a/src-self-hosted/zir_sema.zig b/src-self-hosted/zir_sema.zig index 7db2811384..6bd4159e36 100644 --- a/src-self-hosted/zir_sema.zig +++ b/src-self-hosted/zir_sema.zig @@ -882,7 +882,7 @@ fn analyzeInstArithmetic(mod: *Module, scope: *Scope, inst: *zir.Inst.BinOp) Inn fn analyzeInstComptimeOp(mod: *Module, scope: *Scope, res_type: Type, inst: *zir.Inst.BinOp, lhs_val: Value, rhs_val: Value) InnerError!*Inst { // incase rhs is 0, simply return lhs without doing any calculations // TODO Once division is implemented we should throw an error when dividing by 0. - if (rhs_val.tag() == .zero or rhs_val.tag() == .the_one_possible_value) { + if (rhs_val.compareWithZero(.eq)) { return mod.constInst(scope, inst.base.src, .{ .ty = res_type, .val = lhs_val, @@ -1083,6 +1083,7 @@ fn analyzeInstUnreachNoChk(mod: *Module, scope: *Scope, unreach: *zir.Inst.NoOp) fn analyzeInstUnreachable(mod: *Module, scope: *Scope, unreach: *zir.Inst.NoOp) InnerError!*Inst { const b = try mod.requireRuntimeBlock(scope, unreach.base.src); + // TODO Add compile error for @optimizeFor occurring too late in a scope. if (mod.wantSafety(scope)) { // TODO Once we have a panic function to call, call it here instead of this. _ = try mod.addNoOp(b, unreach.base.src, Type.initTag(.void), .breakpoint); -- cgit v1.2.3 From 1bbfa36b76271e907cac88e83cec8dee1e3d69f7 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 29 Jul 2020 00:08:43 -0700 Subject: stage2: improved codegen * multiple returns jump to one canonical function exitlude. This is in preparation for the defer feature. * simple elision of trivial jump relocs. * omit prelude/exitlude for naked calling convention functions. * fix not switching on arch for prelude/exitlude * fix swapped registers when setting stack mem from a register --- src-self-hosted/codegen.zig | 118 +++++++++++++++++++++++++++++++++----------- 1 file changed, 90 insertions(+), 28 deletions(-) (limited to 'src-self-hosted/codegen.zig') diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 40fb6c5407..6880d6dbf3 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -214,6 +214,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { src: usize, stack_align: u32, + /// The value is an offset into the `Function` `code` from the beginning. + /// To perform the reloc, write 32-bit signed little-endian integer + /// which is a relative jump, based on the address following the reloc. + exitlude_jump_relocs: std.ArrayListUnmanaged(usize) = .{}, + /// Whenever there is a runtime branch, we push a Branch onto this stack, /// and pop it off when the runtime branch joins. This provides an "overlay" /// of the table of mappings from instructions to `MCValue` from within the branch. @@ -376,6 +381,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .src = src, .stack_align = undefined, }; + defer function.exitlude_jump_relocs.deinit(bin_file.allocator); var call_info = function.resolveCallingConventionValues(src, fn_type) catch |err| switch (err) { error.CodegenFail => return Result{ .fail = function.err_msg.? }, @@ -401,29 +407,78 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } fn gen(self: *Self) !void { - try self.code.ensureCapacity(self.code.items.len + 11); - - // TODO omit this for naked functions - // push rbp - // mov rbp, rsp - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x55, 0x48, 0x89, 0xe5 }); - - // sub rsp, x - const stack_end = self.branch_stack.items[0].max_end_stack; - if (stack_end > math.maxInt(i32)) { - return self.fail(self.src, "too much stack used in call parameters", .{}); - } else if (stack_end > math.maxInt(i8)) { - // 48 83 ec xx sub rsp,0x10 - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xec }); - const x = @intCast(u32, stack_end); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); - } else if (stack_end != 0) { - // 48 81 ec xx xx xx xx sub rsp,0x80 - const x = @intCast(u8, stack_end); - self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xec, x }); - } + switch (arch) { + .x86_64 => { + try self.code.ensureCapacity(self.code.items.len + 11); + + const cc = self.fn_type.fnCallingConvention(); + if (cc != .Naked) { + // We want to subtract the aligned stack frame size from rsp here, but we don't + // yet know how big it will be, so we leave room for a 4-byte stack size. + // TODO During semantic analysis, check if there are no function calls. If there + // are none, here we can omit the part where we subtract and then add rsp. + self.code.appendSliceAssumeCapacity(&[_]u8{ + // push rbp + 0x55, + // mov rbp, rsp + 0x48, + 0x89, + 0xe5, + // sub rsp, imm32 (with reloc) + 0x48, + 0x81, + 0xec, + }); + const reloc_index = self.code.items.len; + self.code.items.len += 4; + + try self.genBody(self.mod_fn.analysis.success); + + const stack_end = self.branch_stack.items[0].max_end_stack; + if (stack_end > math.maxInt(i32)) + return self.fail(self.src, "too much stack used in call parameters", .{}); + const aligned_stack_end = mem.alignForward(stack_end, self.stack_align); + mem.writeIntLittle(u32, self.code.items[reloc_index..][0..4], @intCast(u32, aligned_stack_end)); + + if (self.code.items.len >= math.maxInt(i32)) { + return self.fail(self.src, "unable to perform relocation: jump too far", .{}); + } + for (self.exitlude_jump_relocs.items) |jmp_reloc| { + const amt = self.code.items.len - (jmp_reloc + 4); + // If it wouldn't jump at all, elide it. + if (amt == 0) { + self.code.items.len -= 5; + continue; + } + const s32_amt = @intCast(i32, amt); + mem.writeIntLittle(i32, self.code.items[jmp_reloc..][0..4], s32_amt); + } + + try self.code.ensureCapacity(self.code.items.len + 9); + // add rsp, x + if (aligned_stack_end > math.maxInt(i8)) { + // example: 48 81 c4 ff ff ff 7f add rsp,0x7fffffff + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xc4 }); + const x = @intCast(u32, aligned_stack_end); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); + } else if (aligned_stack_end != 0) { + // example: 48 83 c4 7f add rsp,0x7f + const x = @intCast(u8, aligned_stack_end); + self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xc4, x }); + } - try self.genBody(self.mod_fn.analysis.success); + self.code.appendSliceAssumeCapacity(&[_]u8{ + 0x5d, // pop rbp + 0xc3, // ret + }); + } else { + try self.genBody(self.mod_fn.analysis.success); + } + }, + else => { + try self.genBody(self.mod_fn.analysis.success); + }, + } } fn genBody(self: *Self, body: ir.Body) InnerError!void { @@ -987,10 +1042,12 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { try self.code.append(0xc3); // ret }, .x86_64 => { - try self.code.appendSlice(&[_]u8{ - 0x5d, // pop rbp - 0xc3, // ret - }); + // TODO when implementing defer, this will need to jump to the appropriate defer expression. + // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction + // which is available if the jump is 127 bytes or less forward. + try self.code.resize(self.code.items.len + 5); + self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32 + try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4); }, else => return self.fail(src, "TODO implement return for {}", .{self.target.cpu.arch}), } @@ -1130,6 +1187,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { switch (reloc) { .rel32 => |pos| { const amt = self.code.items.len - (pos + 4); + // If it wouldn't jump at all, elide it. + if (amt == 0) { + self.code.items.len -= 5; + return; + } const s32_amt = math.cast(i32, amt) catch return self.fail(src, "unable to perform relocation: jump too far", .{}); mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt); @@ -1296,13 +1358,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const reg_id: u8 = @truncate(u3, reg.id()); if (stack_offset <= 128) { // example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx - const RM = @as(u8, 0b01_101_000) | reg_id; + const RM = @as(u8, 0b01_000_101) | (reg_id << 3); const negative_offset = @intCast(i8, -@intCast(i32, stack_offset)); const twos_comp = @bitCast(u8, negative_offset); self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM, twos_comp }); } else if (stack_offset <= 2147483648) { // example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx - const RM = @as(u8, 0b10_101_000) | reg_id; + const RM = @as(u8, 0b10_000_101) | (reg_id << 3); const negative_offset = @intCast(i32, -@intCast(i33, stack_offset)); const twos_comp = @bitCast(u32, negative_offset); self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM }); -- cgit v1.2.3 From 606f157a6b6001b2623d28275a892c1a8ee3a646 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 29 Jul 2020 02:10:35 -0700 Subject: stage2: register-aliasing-aware codegen * unify duplicated register allocation codepath * support the x86_64 concept of register aliasing * slightly improved memset codegen, supports sizes 1, 2, 4, 8 --- src-self-hosted/codegen.zig | 161 +++++++++++++++++++++++-------------- src-self-hosted/codegen/x86_64.zig | 20 +++++ test/stage2/compare_output.zig | 34 ++++++++ 3 files changed, 153 insertions(+), 62 deletions(-) (limited to 'src-self-hosted/codegen.zig') diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 6880d6dbf3..be88dc67d8 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -328,6 +328,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { self.free_registers |= @as(FreeRegInt, 1) << shift; } + /// Before calling, must ensureCapacity + 1 on branch.registers. + /// Returns `null` if all registers are allocated. + fn allocReg(self: *Branch, inst: *ir.Inst) ?Register { + const free_index = @ctz(FreeRegInt, self.free_registers); + if (free_index >= callee_preserved_regs.len) { + return null; + } + self.free_registers &= ~(@as(FreeRegInt, 1) << free_index); + const reg = callee_preserved_regs[free_index]; + self.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst }); + return reg; + } + fn deinit(self: *Branch, gpa: *Allocator) void { self.inst_table.deinit(gpa); self.registers.deinit(gpa); @@ -502,8 +515,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { entry.value = .dead; switch (prev_value) { .register => |reg| { - _ = branch.registers.remove(reg); - branch.markRegFree(reg); + const reg64 = reg.to64(); + _ = branch.registers.remove(reg64); + branch.markRegFree(reg64); }, else => {}, // TODO process stack allocation death } @@ -582,30 +596,26 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { self.stack_align = abi_align; const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; - // TODO Make sure the type can fit in a register before we try to allocate one. - const free_index = @ctz(FreeRegInt, branch.free_registers); - if (free_index >= callee_preserved_regs.len) { - const stack_offset = try self.allocMem(inst, abi_size, abi_align); - return MCValue{ .stack_offset = stack_offset }; + // Make sure the type can fit in a register before we try to allocate one. + const ptr_bits = arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + if (abi_size <= ptr_bytes) { + try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); + if (branch.allocReg(inst)) |reg| { + return MCValue{ .register = registerAlias(reg, abi_size) }; + } } - branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index); - const reg = callee_preserved_regs[free_index]; - try branch.registers.putNoClobber(self.gpa, reg, .{ .inst = inst }); - return MCValue{ .register = reg }; + const stack_offset = try self.allocMem(inst, abi_size, abi_align); + return MCValue{ .stack_offset = stack_offset }; } /// Does not "move" the instruction. fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue { const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1); - try branch.inst_table.ensureCapacity(self.gpa, branch.inst_table.items().len + 1); - const free_index = @ctz(FreeRegInt, branch.free_registers); - if (free_index >= callee_preserved_regs.len) + const reg = branch.allocReg(inst) orelse return self.fail(inst.src, "TODO implement spilling register to stack", .{}); - branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index); - const reg = callee_preserved_regs[free_index]; - branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst }); const old_mcv = branch.inst_table.get(inst).?; const new_mcv: MCValue = .{ .register = reg }; try self.genSetReg(inst.src, reg, old_mcv); @@ -1131,7 +1141,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // test reg, 1 // TODO detect al, ax, eax try self.code.ensureCapacity(self.code.items.len + 4); - self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 }); + // TODO audit this codegen: we force w = true here to make + // the value affect the big register + self.rex(.{ .b = reg.isExtended(), .w = true }); self.code.appendSliceAssumeCapacity(&[_]u8{ 0xf6, @as(u8, 0xC0) | (0 << 3) | @truncate(u3, reg.id()), @@ -1319,7 +1331,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (!self.wantSafety()) return; // The already existing value will do just fine. // TODO Upgrade this to a memset call when we have that available. - return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa }); + switch (ty.abiSize(self.target.*)) { + 1 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaa }), + 2 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaa }), + 4 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa }), + 8 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), + else => return self.fail(src, "TODO implement memset", .{}), + } }, .compare_flags_unsigned => |op| { return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{}); @@ -1328,24 +1346,35 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{}); }, .immediate => |x_big| { - if (ty.abiSize(self.target.*) != 4) { - // TODO after fixing this, need to update the undef case above - return self.fail(src, "TODO implement set non 4 abi size stack variable with immediate", .{}); + if (stack_offset > 128) { + return self.fail(src, "TODO implement set stack variable with large stack offset", .{}); } - try self.code.ensureCapacity(self.code.items.len + 7); - if (x_big <= math.maxInt(u32)) { - const x = @intCast(u32, x_big); - if (stack_offset > 128) { - return self.fail(src, "TODO implement set stack variable with large stack offset", .{}); - } - // We have a positive stack offset value but we want a twos complement negative - // offset from rbp, which is at the top of the stack frame. - const negative_offset = @intCast(i8, -@intCast(i32, stack_offset)); - const twos_comp = @bitCast(u8, negative_offset); - // mov DWORD PTR [rbp+offset], immediate - self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); - mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); - } else { + try self.code.ensureCapacity(self.code.items.len + 8); + switch (ty.abiSize(self.target.*)) { + 1 => { + return self.fail(src, "TODO implement set abi_size=1 stack variable with immediate", .{}); + }, + 2 => { + return self.fail(src, "TODO implement set abi_size=2 stack variable with immediate", .{}); + }, + 4 => { + const x = @intCast(u32, x_big); + // We have a positive stack offset value but we want a twos complement negative + // offset from rbp, which is at the top of the stack frame. + const negative_offset = @intCast(i8, -@intCast(i32, stack_offset)); + const twos_comp = @bitCast(u8, negative_offset); + // mov DWORD PTR [rbp+offset], immediate + self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); + mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x); + }, + 8 => { + return self.fail(src, "TODO implement set abi_size=8 stack variable with immediate", .{}); + }, + else => { + return self.fail(src, "TODO implement set abi_size=large stack variable with immediate", .{}); + }, + } + if (x_big <= math.maxInt(u32)) {} else { return self.fail(src, "TODO implement set stack variable with large immediate", .{}); } }, @@ -1407,7 +1436,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }, .compare_flags_unsigned => |op| { try self.code.ensureCapacity(self.code.items.len + 3); - self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 }); + // TODO audit this codegen: we force w = true here to make + // the value affect the big register + self.rex(.{ .b = reg.isExtended(), .w = true }); const opcode: u8 = switch (op) { .gte => 0x93, .gt => 0x97, @@ -1423,9 +1454,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.fail(src, "TODO set register with compare flags value (signed)", .{}); }, .immediate => |x| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit // register is the fastest way to zero a register. if (x == 0) { @@ -1478,16 +1506,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // // In this case, the encoding of the REX byte is 0b0100100B try self.code.ensureCapacity(self.code.items.len + 10); - self.rex(.{ .w = true, .b = reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() }); self.code.items.len += 9; self.code.items[self.code.items.len - 9] = 0xB8 | @as(u8, reg.id() & 0b111); const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; mem.writeIntLittle(u64, imm_ptr, x); }, .embedded_in_code => |code_offset| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } // We need the offset from RIP in a signed i32 twos complement. // The instruction is 7 bytes long and RIP points to the next instruction. try self.code.ensureCapacity(self.code.items.len + 7); @@ -1495,7 +1520,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // but the operation size is unchanged. Since we're using a disp32, we want mode 0 and lower three // bits as five. // REX 0x8D 0b00RRR101, where RRR is the lower three bits of the id. - self.rex(.{ .w = true, .b = reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() }); self.code.items.len += 6; const rip = self.code.items.len; const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip); @@ -1507,12 +1532,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }, .register => |src_reg| { // If the registers are the same, nothing to do. - if (src_reg == reg) + if (src_reg.id() == reg.id()) return; - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } // This is a variant of 8B /r. Since we're using 64-bit moves, we require a REX. // This is thus three bytes: REX 0x8B R/M. // If the destination is extended, the R field must be 1. @@ -1520,14 +1542,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // Since the register is being accessed directly, the R/M mode is three. The reg field (the middle // three bits) contain the destination, and the R/M field (the lower three bits) contain the source. try self.code.ensureCapacity(self.code.items.len + 3); - self.rex(.{ .w = true, .r = reg.isExtended(), .b = src_reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended(), .b = src_reg.isExtended() }); const R = 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @as(u8, src_reg.id() & 0b111); self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, R }); }, .memory => |x| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } if (x <= math.maxInt(u32)) { // Moving from memory to a register is a variant of `8B /r`. // Since we're using 64-bit moves, we require a REX. @@ -1537,7 +1556,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // 0b00RRR100, where RRR is the lower three bits of the register ID. // The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32. try self.code.ensureCapacity(self.code.items.len + 8); - self.rex(.{ .w = true, .b = reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() }); self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, 0x04 | (@as(u8, reg.id() & 0b111) << 3), // R @@ -1580,18 +1599,15 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // // Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both* // register operands need to be marked as extended. - self.rex(.{ .w = true, .b = reg.isExtended(), .r = reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended(), .r = reg.isExtended() }); const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id()); self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM }); } } }, .stack_offset => |off| { - if (reg.size() != 64) { - return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{}); - } try self.code.ensureCapacity(self.code.items.len + 7); - self.rex(.{ .w = true, .r = reg.isExtended() }); + self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() }); const reg_id: u8 = @truncate(u3, reg.id()); if (off <= 128) { // Example: 48 8b 4d 7f mov rcx,QWORD PTR [rbp+0x7f] @@ -1750,11 +1766,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { for (param_types) |ty, i| { switch (ty.zigTypeTag()) { .Bool, .Int => { + const param_size = @intCast(u32, ty.abiSize(self.target.*)); if (next_int_reg >= c_abi_int_param_regs.len) { result.args[i] = .{ .stack_offset = next_stack_offset }; - next_stack_offset += @intCast(u32, ty.abiSize(self.target.*)); + next_stack_offset += param_size; } else { - result.args[i] = .{ .register = c_abi_int_param_regs[next_int_reg] }; + const aliased_reg = registerAlias( + c_abi_int_param_regs[next_int_reg], + param_size, + ); + result.args[i] = .{ .register = aliased_reg }; next_int_reg += 1; } }, @@ -1778,7 +1799,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .x86_64 => switch (cc) { .Naked => unreachable, .Unspecified, .C => { - result.return_value = .{ .register = c_abi_int_return_regs[0] }; + const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + const aliased_reg = registerAlias(c_abi_int_return_regs[0], ret_ty_size); + result.return_value = .{ .register = aliased_reg }; }, else => return self.fail(src, "TODO implement function return values for {}", .{cc}), }, @@ -1825,5 +1848,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { fn parseRegName(name: []const u8) ?Register { return std.meta.stringToEnum(Register, name); } + + fn registerAlias(reg: Register, size_bytes: u32) Register { + switch (arch) { + // For x86_64 we have to pick a smaller register alias depending on abi size. + .x86_64 => switch (size_bytes) { + 1 => return reg.to8(), + 2 => return reg.to16(), + 4 => return reg.to32(), + 8 => return reg.to64(), + else => unreachable, + }, + else => return reg, + } + } }; } diff --git a/src-self-hosted/codegen/x86_64.zig b/src-self-hosted/codegen/x86_64.zig index f6bad45809..c149613ae9 100644 --- a/src-self-hosted/codegen/x86_64.zig +++ b/src-self-hosted/codegen/x86_64.zig @@ -81,6 +81,26 @@ pub const Register = enum(u8) { else => null, }; } + + /// Convert from any register to its 64 bit alias. + pub fn to64(self: Register) Register { + return @intToEnum(Register, self.id()); + } + + /// Convert from any register to its 32 bit alias. + pub fn to32(self: Register) Register { + return @intToEnum(Register, @as(u8, self.id()) + 16); + } + + /// Convert from any register to its 16 bit alias. + pub fn to16(self: Register) Register { + return @intToEnum(Register, @as(u8, self.id()) + 32); + } + + /// Convert from any register to its 8 bit alias. + pub fn to8(self: Register) Register { + return @intToEnum(Register, @as(u8, self.id()) + 48); + } }; // zig fmt: on diff --git a/test/stage2/compare_output.zig b/test/stage2/compare_output.zig index 2e7c6317b6..bf6a01f483 100644 --- a/test/stage2/compare_output.zig +++ b/test/stage2/compare_output.zig @@ -363,5 +363,39 @@ pub fn addCases(ctx: *TestContext) !void { , "", ); + + // Local mutable variables. + case.addCompareOutput( + \\export fn _start() noreturn { + \\ assert(add(3, 4) == 7); + \\ assert(add(20, 10) == 30); + \\ + \\ exit(); + \\} + \\ + \\fn add(a: u32, b: u32) u32 { + \\ var x: u32 = undefined; + \\ x = 0; + \\ x += a; + \\ x += b; + \\ return x; + \\} + \\ + \\pub fn assert(ok: bool) void { + \\ if (!ok) unreachable; // assertion failure + \\} + \\ + \\fn exit() noreturn { + \\ asm volatile ("syscall" + \\ : + \\ : [number] "{rax}" (231), + \\ [arg1] "{rdi}" (0) + \\ : "rcx", "r11", "memory" + \\ ); + \\ unreachable; + \\} + , + "", + ); } } -- cgit v1.2.3 From 8899e6e334758f2e101399075d0456195035c372 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 29 Jul 2020 02:28:35 -0700 Subject: stage2: codegen: fix off-by-one stack variable offsets --- src-self-hosted/codegen.zig | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'src-self-hosted/codegen.zig') diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index be88dc67d8..2ea255bf7f 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -1346,11 +1346,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{}); }, .immediate => |x_big| { - if (stack_offset > 128) { + const abi_size = ty.abiSize(self.target.*); + const adj_off = stack_offset + abi_size; + if (adj_off > 128) { return self.fail(src, "TODO implement set stack variable with large stack offset", .{}); } try self.code.ensureCapacity(self.code.items.len + 8); - switch (ty.abiSize(self.target.*)) { + switch (abi_size) { 1 => { return self.fail(src, "TODO implement set abi_size=1 stack variable with immediate", .{}); }, @@ -1361,7 +1363,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const x = @intCast(u32, x_big); // We have a positive stack offset value but we want a twos complement negative // offset from rbp, which is at the top of the stack frame. - const negative_offset = @intCast(i8, -@intCast(i32, stack_offset)); + const negative_offset = @intCast(i8, -@intCast(i32, adj_off)); const twos_comp = @bitCast(u8, negative_offset); // mov DWORD PTR [rbp+offset], immediate self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp }); @@ -1382,19 +1384,21 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { return self.fail(src, "TODO implement set stack variable from embedded_in_code", .{}); }, .register => |reg| { + const abi_size = ty.abiSize(self.target.*); + const adj_off = stack_offset + abi_size; try self.code.ensureCapacity(self.code.items.len + 7); self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() }); const reg_id: u8 = @truncate(u3, reg.id()); - if (stack_offset <= 128) { + if (adj_off <= 128) { // example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx const RM = @as(u8, 0b01_000_101) | (reg_id << 3); - const negative_offset = @intCast(i8, -@intCast(i32, stack_offset)); + const negative_offset = @intCast(i8, -@intCast(i32, adj_off)); const twos_comp = @bitCast(u8, negative_offset); self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM, twos_comp }); - } else if (stack_offset <= 2147483648) { + } else if (adj_off <= 2147483648) { // example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx const RM = @as(u8, 0b10_000_101) | (reg_id << 3); - const negative_offset = @intCast(i32, -@intCast(i33, stack_offset)); + const negative_offset = @intCast(i32, -@intCast(i33, adj_off)); const twos_comp = @bitCast(u32, negative_offset); self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM }); mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp); @@ -1605,8 +1609,10 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } }, - .stack_offset => |off| { + .stack_offset => |unadjusted_off| { try self.code.ensureCapacity(self.code.items.len + 7); + const size_bytes = @divExact(reg.size(), 8); + const off = unadjusted_off + size_bytes; self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() }); const reg_id: u8 = @truncate(u3, reg.id()); if (off <= 128) { -- cgit v1.2.3