aboutsummaryrefslogtreecommitdiff
path: root/src-self-hosted/codegen.zig
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2020-07-29 02:29:46 -0700
committerAndrew Kelley <andrew@ziglang.org>2020-07-29 02:29:46 -0700
commitb3b00ec62f518875a486b4da532f74e304c3aba2 (patch)
tree5ae6f6dd8afba7cacd3d5b1532d4c25b3661189d /src-self-hosted/codegen.zig
parent4fdfaf69c8c55ebac4c5b3c00025c0ac51281b5b (diff)
parent8899e6e334758f2e101399075d0456195035c372 (diff)
downloadzig-b3b00ec62f518875a486b4da532f74e304c3aba2.tar.gz
zig-b3b00ec62f518875a486b4da532f74e304c3aba2.zip
Merge branch 'more-stage2-locals'
Diffstat (limited to 'src-self-hosted/codegen.zig')
-rw-r--r--src-self-hosted/codegen.zig593
1 files changed, 499 insertions, 94 deletions
diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig
index 75b042308d..2ea255bf7f 100644
--- a/src-self-hosted/codegen.zig
+++ b/src-self-hosted/codegen.zig
@@ -50,7 +50,7 @@ pub fn generateSymbol(
switch (typed_value.ty.zigTypeTag()) {
.Fn => {
- switch (bin_file.options.target.cpu.arch) {
+ switch (bin_file.base.options.target.cpu.arch) {
//.arm => return Function(.arm).generateSymbol(bin_file, src, typed_value, code),
//.armeb => return Function(.armeb).generateSymbol(bin_file, src, typed_value, code),
//.aarch64 => return Function(.aarch64).generateSymbol(bin_file, src, typed_value, code),
@@ -143,7 +143,7 @@ pub fn generateSymbol(
// TODO handle the dependency of this symbol on the decl's vaddr.
// If the decl changes vaddr, then this symbol needs to get regenerated.
const vaddr = bin_file.local_symbols.items[decl.link.local_sym_index].st_value;
- const endian = bin_file.options.target.cpu.arch.endian();
+ const endian = bin_file.base.options.target.cpu.arch.endian();
switch (bin_file.ptr_width) {
.p32 => {
try code.resize(4);
@@ -166,7 +166,7 @@ pub fn generateSymbol(
};
},
.Int => {
- const info = typed_value.ty.intInfo(bin_file.options.target);
+ const info = typed_value.ty.intInfo(bin_file.base.options.target);
if (info.bits == 8 and !info.signed) {
const x = typed_value.val.toUnsignedInt();
try code.append(@intCast(u8, x));
@@ -209,10 +209,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
err_msg: ?*ErrorMsg,
args: []MCValue,
ret_mcv: MCValue,
+ fn_type: Type,
arg_index: usize,
src: usize,
stack_align: u32,
+ /// The value is an offset into the `Function` `code` from the beginning.
+ /// To perform the reloc, write 32-bit signed little-endian integer
+ /// which is a relative jump, based on the address following the reloc.
+ exitlude_jump_relocs: std.ArrayListUnmanaged(usize) = .{},
+
/// Whenever there is a runtime branch, we push a Branch onto this stack,
/// and pop it off when the runtime branch joins. This provides an "overlay"
/// of the table of mappings from instructions to `MCValue` from within the branch.
@@ -229,16 +235,26 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
unreach,
/// No more references to this value remain.
dead,
+ /// The value is undefined.
+ undef,
/// A pointer-sized integer that fits in a register.
+ /// If the type is a pointer, this is the pointer address in virtual address space.
immediate: u64,
/// The constant was emitted into the code, at this offset.
+ /// If the type is a pointer, it means the pointer address is embedded in the code.
embedded_in_code: usize,
+ /// The value is a pointer to a constant which was emitted into the code, at this offset.
+ ptr_embedded_in_code: usize,
/// The value is in a target-specific register.
register: Register,
/// The value is in memory at a hard-coded address.
+ /// If the type is a pointer, it means the pointer address is at this memory location.
memory: u64,
/// The value is one of the stack variables.
- stack_offset: u64,
+ /// If the type is a pointer, it means the pointer address is in the stack at this offset.
+ stack_offset: u32,
+ /// The value is a pointer to one of the stack variables (payload is stack offset).
+ ptr_stack_offset: u32,
/// The value is in the compare flags assuming an unsigned operation,
/// with this operator applied on top of it.
compare_flags_unsigned: math.CompareOperator,
@@ -271,6 +287,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
.memory,
.compare_flags_unsigned,
.compare_flags_signed,
+ .ptr_stack_offset,
+ .ptr_embedded_in_code,
+ .undef,
=> false,
.register,
@@ -309,6 +328,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
self.free_registers |= @as(FreeRegInt, 1) << shift;
}
+ /// Before calling, must ensureCapacity + 1 on branch.registers.
+ /// Returns `null` if all registers are allocated.
+ fn allocReg(self: *Branch, inst: *ir.Inst) ?Register {
+ const free_index = @ctz(FreeRegInt, self.free_registers);
+ if (free_index >= callee_preserved_regs.len) {
+ return null;
+ }
+ self.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
+ const reg = callee_preserved_regs[free_index];
+ self.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst });
+ return reg;
+ }
+
fn deinit(self: *Branch, gpa: *Allocator) void {
self.inst_table.deinit(gpa);
self.registers.deinit(gpa);
@@ -349,18 +381,20 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
var function = Self{
.gpa = bin_file.allocator,
- .target = &bin_file.options.target,
+ .target = &bin_file.base.options.target,
.bin_file = bin_file,
.mod_fn = module_fn,
.code = code,
.err_msg = null,
.args = undefined, // populated after `resolveCallingConventionValues`
.ret_mcv = undefined, // populated after `resolveCallingConventionValues`
+ .fn_type = fn_type,
.arg_index = 0,
.branch_stack = &branch_stack,
.src = src,
.stack_align = undefined,
};
+ defer function.exitlude_jump_relocs.deinit(bin_file.allocator);
var call_info = function.resolveCallingConventionValues(src, fn_type) catch |err| switch (err) {
error.CodegenFail => return Result{ .fail = function.err_msg.? },
@@ -386,29 +420,78 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}
fn gen(self: *Self) !void {
- try self.code.ensureCapacity(self.code.items.len + 11);
-
- // TODO omit this for naked functions
- // push rbp
- // mov rbp, rsp
- self.code.appendSliceAssumeCapacity(&[_]u8{ 0x55, 0x48, 0x89, 0xe5 });
-
- // sub rsp, x
- const stack_end = self.branch_stack.items[0].max_end_stack;
- if (stack_end > math.maxInt(i32)) {
- return self.fail(self.src, "too much stack used in call parameters", .{});
- } else if (stack_end > math.maxInt(i8)) {
- // 48 83 ec xx sub rsp,0x10
- self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xec });
- const x = @intCast(u32, stack_end);
- mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
- } else if (stack_end != 0) {
- // 48 81 ec xx xx xx xx sub rsp,0x80
- const x = @intCast(u8, stack_end);
- self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xec, x });
- }
+ switch (arch) {
+ .x86_64 => {
+ try self.code.ensureCapacity(self.code.items.len + 11);
+
+ const cc = self.fn_type.fnCallingConvention();
+ if (cc != .Naked) {
+ // We want to subtract the aligned stack frame size from rsp here, but we don't
+ // yet know how big it will be, so we leave room for a 4-byte stack size.
+ // TODO During semantic analysis, check if there are no function calls. If there
+ // are none, here we can omit the part where we subtract and then add rsp.
+ self.code.appendSliceAssumeCapacity(&[_]u8{
+ // push rbp
+ 0x55,
+ // mov rbp, rsp
+ 0x48,
+ 0x89,
+ 0xe5,
+ // sub rsp, imm32 (with reloc)
+ 0x48,
+ 0x81,
+ 0xec,
+ });
+ const reloc_index = self.code.items.len;
+ self.code.items.len += 4;
+
+ try self.genBody(self.mod_fn.analysis.success);
+
+ const stack_end = self.branch_stack.items[0].max_end_stack;
+ if (stack_end > math.maxInt(i32))
+ return self.fail(self.src, "too much stack used in call parameters", .{});
+ const aligned_stack_end = mem.alignForward(stack_end, self.stack_align);
+ mem.writeIntLittle(u32, self.code.items[reloc_index..][0..4], @intCast(u32, aligned_stack_end));
+
+ if (self.code.items.len >= math.maxInt(i32)) {
+ return self.fail(self.src, "unable to perform relocation: jump too far", .{});
+ }
+ for (self.exitlude_jump_relocs.items) |jmp_reloc| {
+ const amt = self.code.items.len - (jmp_reloc + 4);
+ // If it wouldn't jump at all, elide it.
+ if (amt == 0) {
+ self.code.items.len -= 5;
+ continue;
+ }
+ const s32_amt = @intCast(i32, amt);
+ mem.writeIntLittle(i32, self.code.items[jmp_reloc..][0..4], s32_amt);
+ }
+
+ try self.code.ensureCapacity(self.code.items.len + 9);
+ // add rsp, x
+ if (aligned_stack_end > math.maxInt(i8)) {
+ // example: 48 81 c4 ff ff ff 7f add rsp,0x7fffffff
+ self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x81, 0xc4 });
+ const x = @intCast(u32, aligned_stack_end);
+ mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
+ } else if (aligned_stack_end != 0) {
+ // example: 48 83 c4 7f add rsp,0x7f
+ const x = @intCast(u8, aligned_stack_end);
+ self.code.appendSliceAssumeCapacity(&[_]u8{ 0x48, 0x83, 0xc4, x });
+ }
- try self.genBody(self.mod_fn.analysis.success);
+ self.code.appendSliceAssumeCapacity(&[_]u8{
+ 0x5d, // pop rbp
+ 0xc3, // ret
+ });
+ } else {
+ try self.genBody(self.mod_fn.analysis.success);
+ }
+ },
+ else => {
+ try self.genBody(self.mod_fn.analysis.success);
+ },
+ }
}
fn genBody(self: *Self, body: ir.Body) InnerError!void {
@@ -432,8 +515,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
entry.value = .dead;
switch (prev_value) {
.register => |reg| {
- _ = branch.registers.remove(reg);
- branch.markRegFree(reg);
+ const reg64 = reg.to64();
+ _ = branch.registers.remove(reg64);
+ branch.markRegFree(reg64);
},
else => {}, // TODO process stack allocation death
}
@@ -459,26 +543,23 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
.cmp_neq => return self.genCmp(inst.castTag(.cmp_neq).?, .neq),
.condbr => return self.genCondBr(inst.castTag(.condbr).?),
.constant => unreachable, // excluded from function bodies
+ .floatcast => return self.genFloatCast(inst.castTag(.floatcast).?),
+ .intcast => return self.genIntCast(inst.castTag(.intcast).?),
.isnonnull => return self.genIsNonNull(inst.castTag(.isnonnull).?),
.isnull => return self.genIsNull(inst.castTag(.isnull).?),
+ .load => return self.genLoad(inst.castTag(.load).?),
+ .not => return self.genNot(inst.castTag(.not).?),
.ptrtoint => return self.genPtrToInt(inst.castTag(.ptrtoint).?),
+ .ref => return self.genRef(inst.castTag(.ref).?),
.ret => return self.genRet(inst.castTag(.ret).?),
.retvoid => return self.genRetVoid(inst.castTag(.retvoid).?),
+ .store => return self.genStore(inst.castTag(.store).?),
.sub => return self.genSub(inst.castTag(.sub).?),
.unreach => return MCValue{ .unreach = {} },
- .not => return self.genNot(inst.castTag(.not).?),
- .floatcast => return self.genFloatCast(inst.castTag(.floatcast).?),
- .intcast => return self.genIntCast(inst.castTag(.intcast).?),
}
}
- fn genAlloc(self: *Self, inst: *ir.Inst.NoOp) !MCValue {
- const elem_ty = inst.base.ty.elemType();
- const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch {
- return self.fail(inst.base.src, "type '{}' too big to fit into stack frame", .{elem_ty});
- };
- // TODO swap this for inst.base.ty.ptrAlign
- const abi_align = elem_ty.abiAlignment(self.target.*);
+ fn allocMem(self: *Self, inst: *ir.Inst, abi_size: u32, abi_align: u32) !u32 {
if (abi_align > self.stack_align)
self.stack_align = abi_align;
const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
@@ -488,10 +569,62 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
if (branch.next_stack_offset > branch.max_end_stack)
branch.max_end_stack = branch.next_stack_offset;
try branch.stack.putNoClobber(self.gpa, offset, .{
- .inst = &inst.base,
+ .inst = inst,
.size = abi_size,
});
- return MCValue{ .stack_offset = offset };
+ return offset;
+ }
+
+ /// Use a pointer instruction as the basis for allocating stack memory.
+ fn allocMemPtr(self: *Self, inst: *ir.Inst) !u32 {
+ const elem_ty = inst.ty.elemType();
+ const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch {
+ return self.fail(inst.src, "type '{}' too big to fit into stack frame", .{elem_ty});
+ };
+ // TODO swap this for inst.ty.ptrAlign
+ const abi_align = elem_ty.abiAlignment(self.target.*);
+ return self.allocMem(inst, abi_size, abi_align);
+ }
+
+ fn allocRegOrMem(self: *Self, inst: *ir.Inst) !MCValue {
+ const elem_ty = inst.ty;
+ const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) catch {
+ return self.fail(inst.src, "type '{}' too big to fit into stack frame", .{elem_ty});
+ };
+ const abi_align = elem_ty.abiAlignment(self.target.*);
+ if (abi_align > self.stack_align)
+ self.stack_align = abi_align;
+ const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+
+ // Make sure the type can fit in a register before we try to allocate one.
+ const ptr_bits = arch.ptrBitWidth();
+ const ptr_bytes: u64 = @divExact(ptr_bits, 8);
+ if (abi_size <= ptr_bytes) {
+ try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
+ if (branch.allocReg(inst)) |reg| {
+ return MCValue{ .register = registerAlias(reg, abi_size) };
+ }
+ }
+ const stack_offset = try self.allocMem(inst, abi_size, abi_align);
+ return MCValue{ .stack_offset = stack_offset };
+ }
+
+ /// Does not "move" the instruction.
+ fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue {
+ const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
+ try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
+
+ const reg = branch.allocReg(inst) orelse
+ return self.fail(inst.src, "TODO implement spilling register to stack", .{});
+ const old_mcv = branch.inst_table.get(inst).?;
+ const new_mcv: MCValue = .{ .register = reg };
+ try self.genSetReg(inst.src, reg, old_mcv);
+ return new_mcv;
+ }
+
+ fn genAlloc(self: *Self, inst: *ir.Inst.NoOp) !MCValue {
+ const stack_offset = try self.allocMemPtr(&inst.base);
+ return MCValue{ .ptr_stack_offset = stack_offset };
}
fn genFloatCast(self: *Self, inst: *ir.Inst.UnOp) !MCValue {
@@ -572,6 +705,87 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}
}
+ fn genLoad(self: *Self, inst: *ir.Inst.UnOp) !MCValue {
+ const elem_ty = inst.base.ty;
+ if (!elem_ty.hasCodeGenBits())
+ return MCValue.none;
+ const ptr = try self.resolveInst(inst.operand);
+ const is_volatile = inst.operand.ty.isVolatilePtr();
+ if (inst.base.isUnused() and !is_volatile)
+ return MCValue.dead;
+ const dst_mcv: MCValue = blk: {
+ if (inst.base.operandDies(0) and ptr.isMutable()) {
+ // The MCValue that holds the pointer can be re-used as the value.
+ // TODO track this in the register/stack allocation metadata.
+ break :blk ptr;
+ } else {
+ break :blk try self.allocRegOrMem(&inst.base);
+ }
+ };
+ switch (ptr) {
+ .none => unreachable,
+ .undef => unreachable,
+ .unreach => unreachable,
+ .dead => unreachable,
+ .compare_flags_unsigned => unreachable,
+ .compare_flags_signed => unreachable,
+ .immediate => |imm| try self.setRegOrMem(inst.base.src, elem_ty, dst_mcv, .{ .memory = imm }),
+ .ptr_stack_offset => |off| try self.setRegOrMem(inst.base.src, elem_ty, dst_mcv, .{ .stack_offset = off }),
+ .ptr_embedded_in_code => |off| {
+ try self.setRegOrMem(inst.base.src, elem_ty, dst_mcv, .{ .embedded_in_code = off });
+ },
+ .embedded_in_code => {
+ return self.fail(inst.base.src, "TODO implement loading from MCValue.embedded_in_code", .{});
+ },
+ .register => {
+ return self.fail(inst.base.src, "TODO implement loading from MCValue.register", .{});
+ },
+ .memory => {
+ return self.fail(inst.base.src, "TODO implement loading from MCValue.memory", .{});
+ },
+ .stack_offset => {
+ return self.fail(inst.base.src, "TODO implement loading from MCValue.stack_offset", .{});
+ },
+ }
+ return dst_mcv;
+ }
+
+ fn genStore(self: *Self, inst: *ir.Inst.BinOp) !MCValue {
+ const ptr = try self.resolveInst(inst.lhs);
+ const value = try self.resolveInst(inst.rhs);
+ const elem_ty = inst.rhs.ty;
+ switch (ptr) {
+ .none => unreachable,
+ .undef => unreachable,
+ .unreach => unreachable,
+ .dead => unreachable,
+ .compare_flags_unsigned => unreachable,
+ .compare_flags_signed => unreachable,
+ .immediate => |imm| {
+ try self.setRegOrMem(inst.base.src, elem_ty, .{ .memory = imm }, value);
+ },
+ .ptr_stack_offset => |off| {
+ try self.genSetStack(inst.base.src, elem_ty, off, value);
+ },
+ .ptr_embedded_in_code => |off| {
+ try self.setRegOrMem(inst.base.src, elem_ty, .{ .embedded_in_code = off }, value);
+ },
+ .embedded_in_code => {
+ return self.fail(inst.base.src, "TODO implement storing to MCValue.embedded_in_code", .{});
+ },
+ .register => {
+ return self.fail(inst.base.src, "TODO implement storing to MCValue.register", .{});
+ },
+ .memory => {
+ return self.fail(inst.base.src, "TODO implement storing to MCValue.memory", .{});
+ },
+ .stack_offset => {
+ return self.fail(inst.base.src, "TODO implement storing to MCValue.stack_offset", .{});
+ },
+ }
+ return .none;
+ }
+
fn genSub(self: *Self, inst: *ir.Inst.BinOp) !MCValue {
// No side effects, so if it's unreferenced, do nothing.
if (inst.base.isUnused())
@@ -654,13 +868,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
fn genX8664BinMathCode(self: *Self, src: usize, dst_mcv: MCValue, src_mcv: MCValue, opx: u8, mr: u8) !void {
switch (dst_mcv) {
.none => unreachable,
+ .undef => unreachable,
.dead, .unreach, .immediate => unreachable,
.compare_flags_unsigned => unreachable,
.compare_flags_signed => unreachable,
+ .ptr_stack_offset => unreachable,
+ .ptr_embedded_in_code => unreachable,
.register => |dst_reg| {
switch (src_mcv) {
.none => unreachable,
+ .undef => try self.genSetReg(src, dst_reg, .undef),
.dead, .unreach => unreachable,
+ .ptr_stack_offset => unreachable,
+ .ptr_embedded_in_code => unreachable,
.register => |src_reg| {
self.rex(.{ .b = dst_reg.isExtended(), .r = src_reg.isExtended(), .w = dst_reg.size() == 64 });
self.code.appendSliceAssumeCapacity(&[_]u8{ mr + 0x1, 0xC0 | (@as(u8, src_reg.id() & 0b111) << 3) | @as(u8, dst_reg.id() & 0b111) });
@@ -743,6 +963,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
for (info.args) |mc_arg, arg_i| {
const arg = inst.args[arg_i];
const arg_mcv = try self.resolveInst(inst.args[arg_i]);
+ // Here we do not use setRegOrMem even though the logic is similar, because
+ // the function call will move the stack pointer, so the offsets are different.
switch (mc_arg) {
.none => continue,
.register => |reg| {
@@ -754,6 +976,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// mov qword ptr [rsp + stack_offset], x
return self.fail(inst.base.src, "TODO implement calling with parameters in memory", .{});
},
+ .ptr_stack_offset => {
+ return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_stack_offset arg", .{});
+ },
+ .ptr_embedded_in_code => {
+ return self.fail(inst.base.src, "TODO implement calling with MCValue.ptr_embedded_in_code arg", .{});
+ },
+ .undef => unreachable,
.immediate => unreachable,
.unreach => unreachable,
.dead => unreachable,
@@ -788,17 +1017,47 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
return info.return_value;
}
+ fn genRef(self: *Self, inst: *ir.Inst.UnOp) !MCValue {
+ const operand = try self.resolveInst(inst.operand);
+ switch (operand) {
+ .unreach => unreachable,
+ .dead => unreachable,
+ .none => return .none,
+
+ .immediate,
+ .register,
+ .ptr_stack_offset,
+ .ptr_embedded_in_code,
+ .compare_flags_unsigned,
+ .compare_flags_signed,
+ => {
+ const stack_offset = try self.allocMemPtr(&inst.base);
+ try self.genSetStack(inst.base.src, inst.operand.ty, stack_offset, operand);
+ return MCValue{ .ptr_stack_offset = stack_offset };
+ },
+
+ .stack_offset => |offset| return MCValue{ .ptr_stack_offset = offset },
+ .embedded_in_code => |offset| return MCValue{ .ptr_embedded_in_code = offset },
+ .memory => |vaddr| return MCValue{ .immediate = vaddr },
+
+ .undef => return self.fail(inst.base.src, "TODO implement ref on an undefined value", .{}),
+ }
+ }
+
fn ret(self: *Self, src: usize, mcv: MCValue) !MCValue {
- try self.setRegOrStack(src, self.ret_mcv, mcv);
+ const ret_ty = self.fn_type.fnReturnType();
+ try self.setRegOrMem(src, ret_ty, self.ret_mcv, mcv);
switch (arch) {
.i386 => {
try self.code.append(0xc3); // ret
},
.x86_64 => {
- try self.code.appendSlice(&[_]u8{
- 0x5d, // pop rbp
- 0xc3, // ret
- });
+ // TODO when implementing defer, this will need to jump to the appropriate defer expression.
+ // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction
+ // which is available if the jump is 127 bytes or less forward.
+ try self.code.resize(self.code.items.len + 5);
+ self.code.items[self.code.items.len - 5] = 0xe9; // jmp rel32
+ try self.exitlude_jump_relocs.append(self.gpa, self.code.items.len - 4);
},
else => return self.fail(src, "TODO implement return for {}", .{self.target.cpu.arch}),
}
@@ -882,7 +1141,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// test reg, 1
// TODO detect al, ax, eax
try self.code.ensureCapacity(self.code.items.len + 4);
- self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 });
+ // TODO audit this codegen: we force w = true here to make
+ // the value affect the big register
+ self.rex(.{ .b = reg.isExtended(), .w = true });
self.code.appendSliceAssumeCapacity(&[_]u8{
0xf6,
@as(u8, 0xC0) | (0 << 3) | @truncate(u3, reg.id()),
@@ -938,6 +1199,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
switch (reloc) {
.rel32 => |pos| {
const amt = self.code.items.len - (pos + 4);
+ // If it wouldn't jump at all, elide it.
+ if (amt == 0) {
+ self.code.items.len -= 5;
+ return;
+ }
const s32_amt = math.cast(i32, amt) catch
return self.fail(src, "unable to perform relocation: jump too far", .{});
mem.writeIntLittle(i32, self.code.items[pos..][0..4], s32_amt);
@@ -1042,25 +1308,141 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}
/// Sets the value without any modifications to register allocation metadata or stack allocation metadata.
- fn setRegOrStack(self: *Self, src: usize, loc: MCValue, val: MCValue) !void {
+ fn setRegOrMem(self: *Self, src: usize, ty: Type, loc: MCValue, val: MCValue) !void {
switch (loc) {
.none => return,
.register => |reg| return self.genSetReg(src, reg, val),
- .stack_offset => {
- return self.fail(src, "TODO implement setRegOrStack for stack offset", .{});
+ .stack_offset => |off| return self.genSetStack(src, ty, off, val),
+ .memory => {
+ return self.fail(src, "TODO implement setRegOrMem for memory", .{});
},
else => unreachable,
}
}
- fn genSetReg(self: *Self, src: usize, reg: Register, mcv: MCValue) error{ CodegenFail, OutOfMemory }!void {
+ fn genSetStack(self: *Self, src: usize, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void {
switch (arch) {
.x86_64 => switch (mcv) {
.dead => unreachable,
+ .ptr_stack_offset => unreachable,
+ .ptr_embedded_in_code => unreachable,
.unreach, .none => return, // Nothing to do.
+ .undef => {
+ if (!self.wantSafety())
+ return; // The already existing value will do just fine.
+ // TODO Upgrade this to a memset call when we have that available.
+ switch (ty.abiSize(self.target.*)) {
+ 1 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaa }),
+ 2 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaa }),
+ 4 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaa }),
+ 8 => return self.genSetStack(src, ty, stack_offset, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
+ else => return self.fail(src, "TODO implement memset", .{}),
+ }
+ },
+ .compare_flags_unsigned => |op| {
+ return self.fail(src, "TODO implement set stack variable with compare flags value (unsigned)", .{});
+ },
+ .compare_flags_signed => |op| {
+ return self.fail(src, "TODO implement set stack variable with compare flags value (signed)", .{});
+ },
+ .immediate => |x_big| {
+ const abi_size = ty.abiSize(self.target.*);
+ const adj_off = stack_offset + abi_size;
+ if (adj_off > 128) {
+ return self.fail(src, "TODO implement set stack variable with large stack offset", .{});
+ }
+ try self.code.ensureCapacity(self.code.items.len + 8);
+ switch (abi_size) {
+ 1 => {
+ return self.fail(src, "TODO implement set abi_size=1 stack variable with immediate", .{});
+ },
+ 2 => {
+ return self.fail(src, "TODO implement set abi_size=2 stack variable with immediate", .{});
+ },
+ 4 => {
+ const x = @intCast(u32, x_big);
+ // We have a positive stack offset value but we want a twos complement negative
+ // offset from rbp, which is at the top of the stack frame.
+ const negative_offset = @intCast(i8, -@intCast(i32, adj_off));
+ const twos_comp = @bitCast(u8, negative_offset);
+ // mov DWORD PTR [rbp+offset], immediate
+ self.code.appendSliceAssumeCapacity(&[_]u8{ 0xc7, 0x45, twos_comp });
+ mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), x);
+ },
+ 8 => {
+ return self.fail(src, "TODO implement set abi_size=8 stack variable with immediate", .{});
+ },
+ else => {
+ return self.fail(src, "TODO implement set abi_size=large stack variable with immediate", .{});
+ },
+ }
+ if (x_big <= math.maxInt(u32)) {} else {
+ return self.fail(src, "TODO implement set stack variable with large immediate", .{});
+ }
+ },
+ .embedded_in_code => |code_offset| {
+ return self.fail(src, "TODO implement set stack variable from embedded_in_code", .{});
+ },
+ .register => |reg| {
+ const abi_size = ty.abiSize(self.target.*);
+ const adj_off = stack_offset + abi_size;
+ try self.code.ensureCapacity(self.code.items.len + 7);
+ self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
+ const reg_id: u8 = @truncate(u3, reg.id());
+ if (adj_off <= 128) {
+ // example: 48 89 55 7f mov QWORD PTR [rbp+0x7f],rdx
+ const RM = @as(u8, 0b01_000_101) | (reg_id << 3);
+ const negative_offset = @intCast(i8, -@intCast(i32, adj_off));
+ const twos_comp = @bitCast(u8, negative_offset);
+ self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM, twos_comp });
+ } else if (adj_off <= 2147483648) {
+ // example: 48 89 95 80 00 00 00 mov QWORD PTR [rbp+0x80],rdx
+ const RM = @as(u8, 0b10_000_101) | (reg_id << 3);
+ const negative_offset = @intCast(i32, -@intCast(i33, adj_off));
+ const twos_comp = @bitCast(u32, negative_offset);
+ self.code.appendSliceAssumeCapacity(&[_]u8{ 0x89, RM });
+ mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp);
+ } else {
+ return self.fail(src, "stack offset too large", .{});
+ }
+ },
+ .memory => |vaddr| {
+ return self.fail(src, "TODO implement set stack variable from memory vaddr", .{});
+ },
+ .stack_offset => |off| {
+ if (stack_offset == off)
+ return; // Copy stack variable to itself; nothing to do.
+ return self.fail(src, "TODO implement copy stack variable to stack variable", .{});
+ },
+ },
+ else => return self.fail(src, "TODO implement getSetStack for {}", .{self.target.cpu.arch}),
+ }
+ }
+
+ fn genSetReg(self: *Self, src: usize, reg: Register, mcv: MCValue) InnerError!void {
+ switch (arch) {
+ .x86_64 => switch (mcv) {
+ .dead => unreachable,
+ .ptr_stack_offset => unreachable,
+ .ptr_embedded_in_code => unreachable,
+ .unreach, .none => return, // Nothing to do.
+ .undef => {
+ if (!self.wantSafety())
+ return; // The already existing value will do just fine.
+ // Write the debug undefined value.
+ switch (reg.size()) {
+ 8 => return self.genSetReg(src, reg, .{ .immediate = 0xaa }),
+ 16 => return self.genSetReg(src, reg, .{ .immediate = 0xaaaa }),
+ 32 => return self.genSetReg(src, reg, .{ .immediate = 0xaaaaaaaa }),
+ 64 => return self.genSetReg(src, reg, .{ .immediate = 0xaaaaaaaaaaaaaaaa }),
+ else => unreachable,
+ }
+ },
.compare_flags_unsigned => |op| {
try self.code.ensureCapacity(self.code.items.len + 3);
- self.rex(.{ .b = reg.isExtended(), .w = reg.size() == 64 });
+ // TODO audit this codegen: we force w = true here to make
+ // the value affect the big register
+ self.rex(.{ .b = reg.isExtended(), .w = true });
const opcode: u8 = switch (op) {
.gte => 0x93,
.gt => 0x97,
@@ -1076,9 +1458,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
return self.fail(src, "TODO set register with compare flags value (signed)", .{});
},
.immediate => |x| {
- if (reg.size() != 64) {
- return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
- }
// 32-bit moves zero-extend to 64-bit, so xoring the 32-bit
// register is the fastest way to zero a register.
if (x == 0) {
@@ -1131,16 +1510,13 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
//
// In this case, the encoding of the REX byte is 0b0100100B
try self.code.ensureCapacity(self.code.items.len + 10);
- self.rex(.{ .w = true, .b = reg.isExtended() });
+ self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
self.code.items.len += 9;
self.code.items[self.code.items.len - 9] = 0xB8 | @as(u8, reg.id() & 0b111);
const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8];
mem.writeIntLittle(u64, imm_ptr, x);
},
.embedded_in_code => |code_offset| {
- if (reg.size() != 64) {
- return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
- }
// We need the offset from RIP in a signed i32 twos complement.
// The instruction is 7 bytes long and RIP points to the next instruction.
try self.code.ensureCapacity(self.code.items.len + 7);
@@ -1148,7 +1524,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// but the operation size is unchanged. Since we're using a disp32, we want mode 0 and lower three
// bits as five.
// REX 0x8D 0b00RRR101, where RRR is the lower three bits of the id.
- self.rex(.{ .w = true, .b = reg.isExtended() });
+ self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
self.code.items.len += 6;
const rip = self.code.items.len;
const big_offset = @intCast(i64, code_offset) - @intCast(i64, rip);
@@ -1160,12 +1536,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
},
.register => |src_reg| {
// If the registers are the same, nothing to do.
- if (src_reg == reg)
+ if (src_reg.id() == reg.id())
return;
- if (reg.size() != 64) {
- return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
- }
// This is a variant of 8B /r. Since we're using 64-bit moves, we require a REX.
// This is thus three bytes: REX 0x8B R/M.
// If the destination is extended, the R field must be 1.
@@ -1173,14 +1546,11 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// Since the register is being accessed directly, the R/M mode is three. The reg field (the middle
// three bits) contain the destination, and the R/M field (the lower three bits) contain the source.
try self.code.ensureCapacity(self.code.items.len + 3);
- self.rex(.{ .w = true, .r = reg.isExtended(), .b = src_reg.isExtended() });
+ self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended(), .b = src_reg.isExtended() });
const R = 0xC0 | (@as(u8, reg.id() & 0b111) << 3) | @as(u8, src_reg.id() & 0b111);
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, R });
},
.memory => |x| {
- if (reg.size() != 64) {
- return self.fail(src, "TODO decide whether to implement non-64-bit loads", .{});
- }
if (x <= math.maxInt(u32)) {
// Moving from memory to a register is a variant of `8B /r`.
// Since we're using 64-bit moves, we require a REX.
@@ -1190,7 +1560,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// 0b00RRR100, where RRR is the lower three bits of the register ID.
// The instruction is thus eight bytes; REX 0x8B 0b00RRR100 0x25 followed by a four-byte disp32.
try self.code.ensureCapacity(self.code.items.len + 8);
- self.rex(.{ .w = true, .b = reg.isExtended() });
+ self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended() });
self.code.appendSliceAssumeCapacity(&[_]u8{
0x8B,
0x04 | (@as(u8, reg.id() & 0b111) << 3), // R
@@ -1218,7 +1588,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
// is no way to possibly encode it. This means that RSP, RBP, R12, and R13 cannot be used with
// this instruction.
const id3 = @truncate(u3, reg.id());
- std.debug.assert(id3 != 4 and id3 != 5);
+ assert(id3 != 4 and id3 != 5);
// Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue.
try self.genSetReg(src, reg, MCValue{ .immediate = x });
@@ -1233,14 +1603,34 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
//
// Furthermore, if this is an extended register, both B and R must be set in the REX byte, as *both*
// register operands need to be marked as extended.
- self.rex(.{ .w = true, .b = reg.isExtended(), .r = reg.isExtended() });
+ self.rex(.{ .w = reg.size() == 64, .b = reg.isExtended(), .r = reg.isExtended() });
const RM = (@as(u8, reg.id() & 0b111) << 3) | @truncate(u3, reg.id());
self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8B, RM });
}
}
},
- .stack_offset => |off| {
- return self.fail(src, "TODO implement genSetReg for stack variables", .{});
+ .stack_offset => |unadjusted_off| {
+ try self.code.ensureCapacity(self.code.items.len + 7);
+ const size_bytes = @divExact(reg.size(), 8);
+ const off = unadjusted_off + size_bytes;
+ self.rex(.{ .w = reg.size() == 64, .r = reg.isExtended() });
+ const reg_id: u8 = @truncate(u3, reg.id());
+ if (off <= 128) {
+ // Example: 48 8b 4d 7f mov rcx,QWORD PTR [rbp+0x7f]
+ const RM = @as(u8, 0b01_000_101) | (reg_id << 3);
+ const negative_offset = @intCast(i8, -@intCast(i32, off));
+ const twos_comp = @bitCast(u8, negative_offset);
+ self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8b, RM, twos_comp });
+ } else if (off <= 2147483648) {
+ // Example: 48 8b 8d 80 00 00 00 mov rcx,QWORD PTR [rbp+0x80]
+ const RM = @as(u8, 0b10_000_101) | (reg_id << 3);
+ const negative_offset = @intCast(i32, -@intCast(i33, off));
+ const twos_comp = @bitCast(u32, negative_offset);
+ self.code.appendSliceAssumeCapacity(&[_]u8{ 0x8b, RM });
+ mem.writeIntLittle(u32, self.code.addManyAsArrayAssumeCapacity(4), twos_comp);
+ } else {
+ return self.fail(src, "stack offset too large", .{});
+ }
},
},
else => return self.fail(src, "TODO implement getSetReg for {}", .{self.target.cpu.arch}),
@@ -1279,24 +1669,6 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}
}
- /// Does not "move" the instruction.
- fn copyToNewRegister(self: *Self, inst: *ir.Inst) !MCValue {
- const branch = &self.branch_stack.items[self.branch_stack.items.len - 1];
- try branch.registers.ensureCapacity(self.gpa, branch.registers.items().len + 1);
- try branch.inst_table.ensureCapacity(self.gpa, branch.inst_table.items().len + 1);
-
- const free_index = @ctz(FreeRegInt, branch.free_registers);
- if (free_index >= callee_preserved_regs.len)
- return self.fail(inst.src, "TODO implement spilling register to stack", .{});
- branch.free_registers &= ~(@as(FreeRegInt, 1) << free_index);
- const reg = callee_preserved_regs[free_index];
- branch.registers.putAssumeCapacityNoClobber(reg, .{ .inst = inst });
- const old_mcv = branch.inst_table.get(inst).?;
- const new_mcv: MCValue = .{ .register = reg };
- try self.genSetReg(inst.src, reg, old_mcv);
- return new_mcv;
- }
-
/// If the MCValue is an immediate, and it does not fit within this type,
/// we put it in a register.
/// A potential opportunity for future optimization here would be keeping track
@@ -1324,6 +1696,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}
fn genTypedValue(self: *Self, src: usize, typed_value: TypedValue) !MCValue {
+ if (typed_value.val.isUndef())
+ return MCValue.undef;
const ptr_bits = self.target.cpu.arch.ptrBitWidth();
const ptr_bytes: u64 = @divExact(ptr_bits, 8);
switch (typed_value.ty.zigTypeTag()) {
@@ -1398,11 +1772,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
for (param_types) |ty, i| {
switch (ty.zigTypeTag()) {
.Bool, .Int => {
+ const param_size = @intCast(u32, ty.abiSize(self.target.*));
if (next_int_reg >= c_abi_int_param_regs.len) {
result.args[i] = .{ .stack_offset = next_stack_offset };
- next_stack_offset += @intCast(u32, ty.abiSize(self.target.*));
+ next_stack_offset += param_size;
} else {
- result.args[i] = .{ .register = c_abi_int_param_regs[next_int_reg] };
+ const aliased_reg = registerAlias(
+ c_abi_int_param_regs[next_int_reg],
+ param_size,
+ );
+ result.args[i] = .{ .register = aliased_reg };
next_int_reg += 1;
}
},
@@ -1426,7 +1805,9 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
.x86_64 => switch (cc) {
.Naked => unreachable,
.Unspecified, .C => {
- result.return_value = .{ .register = c_abi_int_return_regs[0] };
+ const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*));
+ const aliased_reg = registerAlias(c_abi_int_return_regs[0], ret_ty_size);
+ result.return_value = .{ .register = aliased_reg };
},
else => return self.fail(src, "TODO implement function return values for {}", .{cc}),
},
@@ -1435,6 +1816,16 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
return result;
}
+ /// TODO support scope overrides. Also note this logic is duplicated with `Module.wantSafety`.
+ fn wantSafety(self: *Self) bool {
+ return switch (self.bin_file.base.options.optimize_mode) {
+ .Debug => true,
+ .ReleaseSafe => true,
+ .ReleaseFast => false,
+ .ReleaseSmall => false,
+ };
+ }
+
fn fail(self: *Self, src: usize, comptime format: []const u8, args: anytype) error{ CodegenFail, OutOfMemory } {
@setCold(true);
assert(self.err_msg == null);
@@ -1463,5 +1854,19 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
fn parseRegName(name: []const u8) ?Register {
return std.meta.stringToEnum(Register, name);
}
+
+ fn registerAlias(reg: Register, size_bytes: u32) Register {
+ switch (arch) {
+ // For x86_64 we have to pick a smaller register alias depending on abi size.
+ .x86_64 => switch (size_bytes) {
+ 1 => return reg.to8(),
+ 2 => return reg.to16(),
+ 4 => return reg.to32(),
+ 8 => return reg.to64(),
+ else => unreachable,
+ },
+ else => return reg,
+ }
+ }
};
}