diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2023-04-27 15:45:01 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-27 15:45:01 -0700 |
| commit | 011bc59e8aa4b62a286e4dd6e92371af5dac8b9f (patch) | |
| tree | 9205ef1625f18435be9dfbffcc709dee73b27724 /src/arch | |
| parent | aa3405aabc539c66a9f110bc044924db07e892b6 (diff) | |
| parent | f56054d129d19a006ff19c56b48bc9c7d9c04593 (diff) | |
| download | zig-011bc59e8aa4b62a286e4dd6e92371af5dac8b9f.tar.gz zig-011bc59e8aa4b62a286e4dd6e92371af5dac8b9f.zip | |
Merge pull request #15435 from jacobly0/x86_64-frame
x86_64: add frame indices
Diffstat (limited to 'src/arch')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 4693 | ||||
| -rw-r--r-- | src/arch/x86_64/Emit.zig | 15 | ||||
| -rw-r--r-- | src/arch/x86_64/Encoding.zig | 55 | ||||
| -rw-r--r-- | src/arch/x86_64/Lower.zig | 33 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 108 | ||||
| -rw-r--r-- | src/arch/x86_64/bits.zig | 78 | ||||
| -rw-r--r-- | src/arch/x86_64/encoder.zig | 211 | ||||
| -rw-r--r-- | src/arch/x86_64/encodings.zig | 6 |
8 files changed, 2619 insertions, 2580 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index c857e65ec9..df0db882ba 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -6,6 +6,7 @@ const codegen = @import("../../codegen.zig"); const leb128 = std.leb; const link = @import("../../link.zig"); const log = std.log.scoped(.codegen); +const tracking_log = std.log.scoped(.tracking); const math = std.math; const mem = std.mem; const trace = @import("../../tracy.zig").trace; @@ -40,6 +41,7 @@ const Memory = bits.Memory; const Register = bits.Register; const RegisterManager = abi.RegisterManager; const RegisterLock = RegisterManager.RegisterLock; +const FrameIndex = bits.FrameIndex; const gp = abi.RegisterClass.gp; const sse = abi.RegisterClass.sse; @@ -47,6 +49,7 @@ const sse = abi.RegisterClass.sse; const InnerError = CodeGenError || error{OutOfRegisters}; const debug_wip_mir = false; +const debug_tracking = false; gpa: Allocator, air: Air, @@ -57,11 +60,10 @@ target: *const std.Target, mod_fn: *const Module.Fn, err_msg: ?*ErrorMsg, args: []MCValue, -ret_mcv: MCValue, +ret_mcv: InstTracking, fn_type: Type, arg_index: u32, src_loc: Module.SrcLoc, -stack_align: u32, eflags_inst: ?Air.Inst.Index = null, @@ -86,17 +88,13 @@ inst_tracking: InstTrackingMap = .{}, blocks: std.AutoHashMapUnmanaged(Air.Inst.Index, BlockData) = .{}, register_manager: RegisterManager = .{}, -/// Maps offset to what is stored there. -stack: std.AutoHashMapUnmanaged(u32, StackAllocation) = .{}, /// Generation of the current scope, increments by 1 for every entered scope. scope_generation: u32 = 0, -/// Offset from the stack base, representing the end of the stack frame. -max_end_stack: u32 = 0, -/// Represents the current end stack offset. If there is no existing slot -/// to place a new stack allocation, it goes here, and then bumps `max_end_stack`. -next_stack_offset: u32 = 0, +frame_allocs: std.MultiArrayList(FrameAlloc) = .{}, +free_frame_indices: std.AutoArrayHashMapUnmanaged(FrameIndex, void) = .{}, +frame_locs: std.MultiArrayList(Mir.FrameLoc) = .{}, /// Debug field, used to find bugs in the compiler. air_bookkeeping: @TypeOf(air_bookkeeping_init) = air_bookkeeping_init, @@ -108,6 +106,9 @@ const air_bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {} const mir_to_air_map_init = if (builtin.mode == .Debug) std.AutoHashMapUnmanaged(Mir.Inst.Index, Air.Inst.Index){} else {}; +const FrameAddr = struct { index: FrameIndex, off: i32 = 0 }; +const RegisterOffset = struct { reg: Register, off: i32 = 0 }; + pub const MCValue = union(enum) { /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc. /// TODO Look into deleting this tag and using `dead` instead, since every use @@ -123,46 +124,59 @@ pub const MCValue = union(enum) { /// A pointer-sized integer that fits in a register. /// If the type is a pointer, this is the pointer address in virtual address space. immediate: u64, - /// The value is in a GP register. + /// The value resides in the EFLAGS register. + eflags: Condition, + /// The value is in a register. register: Register, + /// The value is a constant offset from the value in a register. + register_offset: RegisterOffset, /// The value is a tuple { wrapped, overflow } where wrapped value is stored in the GP register. register_overflow: struct { reg: Register, eflags: Condition }, /// The value is in memory at a hard-coded address. /// If the type is a pointer, it means the pointer address is at this memory location. memory: u64, + /// The value is in memory at a constant offset from the address in a register. + indirect: RegisterOffset, /// The value is in memory. /// Payload is a symbol index. load_direct: u32, - /// The value is a pointer to value in memory. + /// The value is a pointer to a value in memory. /// Payload is a symbol index. lea_direct: u32, /// The value is in memory referenced indirectly via GOT. /// Payload is a symbol index. load_got: u32, + /// The value is a pointer to a value referenced indirectly via GOT. + /// Payload is a symbol index. + lea_got: u32, /// The value is a threadlocal variable. /// Payload is a symbol index. load_tlv: u32, - /// The value is a pointer to threadlocal variable. + /// The value is a pointer to a threadlocal variable. /// Payload is a symbol index. lea_tlv: u32, - /// The value is one of the stack variables. - /// If the type is a pointer, it means the pointer address is in the stack at this offset. - stack_offset: i32, - /// The value is a pointer to one of the stack variables (payload is stack offset). - ptr_stack_offset: i32, - /// The value resides in the EFLAGS register. - eflags: Condition, + /// The value stored at an offset from a frame index + /// Payload is a frame address. + load_frame: FrameAddr, + /// The address of an offset from a frame index + /// Payload is a frame address. + lea_frame: FrameAddr, + /// This indicates that we have already allocated a frame index for this instruction, + /// but it has not been spilled there yet in the current control flow. + /// Payload is a frame index. + reserved_frame: FrameIndex, fn isMemory(mcv: MCValue) bool { return switch (mcv) { .memory, - .stack_offset, - .ptr_stack_offset, .load_direct, .lea_direct, .load_got, + .lea_got, .load_tlv, .lea_tlv, + .load_frame, + .lea_frame, => true, else => false, }; @@ -181,6 +195,134 @@ pub const MCValue = union(enum) { else => false, }; } + + fn getReg(mcv: MCValue) ?Register { + return switch (mcv) { + .register => |reg| reg, + .register_offset, .indirect => |ro| ro.reg, + .register_overflow => |ro| ro.reg, + else => null, + }; + } + + fn getCondition(mcv: MCValue) ?Condition { + return switch (mcv) { + .eflags => |cc| cc, + .register_overflow => |reg_ov| reg_ov.eflags, + else => null, + }; + } + + fn address(mcv: MCValue) MCValue { + return switch (mcv) { + .none, + .unreach, + .dead, + .undef, + .immediate, + .register, + .register_offset, + .eflags, + .register_overflow, + .lea_direct, + .lea_got, + .lea_tlv, + .lea_frame, + .reserved_frame, + => unreachable, // not in memory + .memory => |addr| .{ .immediate = addr }, + .indirect => |reg_off| switch (reg_off.off) { + 0 => .{ .register = reg_off.reg }, + else => .{ .register_offset = reg_off }, + }, + .load_direct => |sym_index| .{ .lea_direct = sym_index }, + .load_got => |sym_index| .{ .lea_got = sym_index }, + .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, + .load_frame => |frame_addr| .{ .lea_frame = frame_addr }, + }; + } + + fn deref(mcv: MCValue) MCValue { + return switch (mcv) { + .none, + .unreach, + .dead, + .undef, + .eflags, + .register_overflow, + .memory, + .indirect, + .load_direct, + .load_got, + .load_tlv, + .load_frame, + .reserved_frame, + => unreachable, // not a dereferenceable + .immediate => |addr| .{ .memory = addr }, + .register => |reg| .{ .indirect = .{ .reg = reg } }, + .register_offset => |reg_off| .{ .indirect = reg_off }, + .lea_direct => |sym_index| .{ .load_direct = sym_index }, + .lea_got => |sym_index| .{ .load_got = sym_index }, + .lea_tlv => |sym_index| .{ .load_tlv = sym_index }, + .lea_frame => |frame_addr| .{ .load_frame = frame_addr }, + }; + } + + fn offset(mcv: MCValue, off: i32) MCValue { + return switch (mcv) { + .none, + .unreach, + .dead, + .undef, + .eflags, + .register_overflow, + .memory, + .indirect, + .load_direct, + .lea_direct, + .load_got, + .lea_got, + .load_tlv, + .lea_tlv, + .load_frame, + .reserved_frame, + => unreachable, // not offsettable + .immediate => |imm| .{ .immediate = @bitCast(u64, @bitCast(i64, imm) +% off) }, + .register => |reg| .{ .register_offset = .{ .reg = reg, .off = off } }, + .register_offset => |reg_off| .{ + .register_offset = .{ .reg = reg_off.reg, .off = reg_off.off + off }, + }, + .lea_frame => |frame_addr| .{ + .lea_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off }, + }, + }; + } + + pub fn format( + mcv: MCValue, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, + ) @TypeOf(writer).Error!void { + switch (mcv) { + .none, .unreach, .dead, .undef => try writer.print("({s})", .{@tagName(mcv)}), + .immediate => |pl| try writer.print("0x{x}", .{pl}), + .memory => |pl| try writer.print("[ds:0x{x}]", .{pl}), + inline .eflags, .register => |pl| try writer.print("{s}", .{@tagName(pl)}), + .register_offset => |pl| try writer.print("{s} + 0x{x}", .{ @tagName(pl.reg), pl.off }), + .register_overflow => |pl| try writer.print("{s}:{s}", .{ @tagName(pl.eflags), @tagName(pl.reg) }), + .indirect => |pl| try writer.print("[{s} + 0x{x}]", .{ @tagName(pl.reg), pl.off }), + .load_direct => |pl| try writer.print("[direct:{d}]", .{pl}), + .lea_direct => |pl| try writer.print("direct:{d}", .{pl}), + .load_got => |pl| try writer.print("[got:{d}]", .{pl}), + .lea_got => |pl| try writer.print("got:{d}", .{pl}), + .load_tlv => |pl| try writer.print("[tlv:{d}]", .{pl}), + .lea_tlv => |pl| try writer.print("tlv:{d}", .{pl}), + .load_frame => |pl| try writer.print("[{} + 0x{x}]", .{ pl.index, pl.off }), + .lea_frame => |pl| try writer.print("{} + 0x{x}", .{ pl.index, pl.off }), + .reserved_frame => |pl| try writer.print("(dead:{})", .{pl}), + } + } }; const InstTrackingMap = std.AutoArrayHashMapUnmanaged(Air.Inst.Index, InstTracking); @@ -189,89 +331,211 @@ const InstTracking = struct { short: MCValue, fn init(result: MCValue) InstTracking { - return .{ .long = result, .short = result }; + return .{ .long = switch (result) { + .none, + .unreach, + .undef, + .immediate, + .memory, + .load_direct, + .lea_direct, + .load_got, + .lea_got, + .load_tlv, + .lea_tlv, + .load_frame, + .lea_frame, + => result, + .dead, + .reserved_frame, + => unreachable, + .eflags, + .register, + .register_offset, + .register_overflow, + .indirect, + => .none, + }, .short = result }; } fn getReg(self: InstTracking) ?Register { - return switch (self.short) { - .register => |reg| reg, - .register_overflow => |ro| ro.reg, - else => null, - }; + return self.short.getReg(); } fn getCondition(self: InstTracking) ?Condition { - return switch (self.short) { - .eflags => |eflags| eflags, - .register_overflow => |ro| ro.eflags, - else => null, - }; + return self.short.getCondition(); } fn spill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) !void { + if (std.meta.eql(self.long, self.short)) return; // Already spilled + // Allocate or reuse frame index switch (self.long) { + .none => self.long = try function.allocRegOrMem(inst, false), + .load_frame => {}, + .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } }, + else => unreachable, + } + tracking_log.debug("spill %{d} from {} to {}", .{ inst, self.short, self.long }); + try function.genCopy(function.air.typeOfIndex(inst), self.long, self.short); + } + + fn reuseFrame(self: *InstTracking) void { + switch (self.long) { + .reserved_frame => |index| self.long = .{ .load_frame = .{ .index = index } }, + else => {}, + } + self.short = switch (self.long) { .none, - .dead, .unreach, - => unreachable, + .undef, + .immediate, + .memory, + .load_direct, + .lea_direct, + .load_got, + .lea_got, + .load_tlv, + .lea_tlv, + .load_frame, + .lea_frame, + => self.long, + .dead, + .eflags, .register, + .register_offset, .register_overflow, - .eflags, - => self.long = try function.allocRegOrMem(inst, self.short == .eflags), - .stack_offset => {}, + .indirect, + .reserved_frame, + => unreachable, + }; + } + + fn trackSpill(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void { + function.freeValue(self.short); + self.reuseFrame(); + tracking_log.debug("%{d} => {} (spilled)", .{ inst, self.* }); + } + + fn verifyMaterialize(self: *InstTracking, target: InstTracking) void { + switch (self.long) { + .none, + .unreach, .undef, .immediate, .memory, .load_direct, .lea_direct, .load_got, + .lea_got, .load_tlv, .lea_tlv, - .ptr_stack_offset, - => return, // these can be rematerialized without using a stack slot + .lea_frame, + => assert(std.meta.eql(self.long, target.long)), + .load_frame, + .reserved_frame, + => switch (target.long) { + .none, + .load_frame, + .reserved_frame, + => {}, + else => unreachable, + }, + .dead, + .eflags, + .register, + .register_offset, + .register_overflow, + .indirect, + => unreachable, } - log.debug("spilling %{d} from {} to {}", .{ inst, self.short, self.long }); - const ty = function.air.typeOfIndex(inst); - try function.setRegOrMem(ty, self.long, self.short); } - fn trackSpill(self: *InstTracking, function: *Self) void { - if (self.getReg()) |reg| function.register_manager.freeReg(reg); - switch (self.short) { - .none, .dead, .unreach => unreachable, - else => {}, - } - self.short = self.long; + fn materialize( + self: *InstTracking, + function: *Self, + inst: Air.Inst.Index, + target: InstTracking, + ) !void { + self.verifyMaterialize(target); + try self.materializeUnsafe(function, inst, target); } - fn materialize(self: *InstTracking, function: *Self, inst: Air.Inst.Index, reg: Register) !void { + fn materializeUnsafe( + self: *InstTracking, + function: *Self, + inst: Air.Inst.Index, + target: InstTracking, + ) !void { const ty = function.air.typeOfIndex(inst); - try function.genSetReg(ty, reg, self.long); + if ((self.long == .none or self.long == .reserved_frame) and target.long == .load_frame) + try function.genCopy(ty, target.long, self.short); + try function.genCopy(ty, target.short, self.short); } - fn trackMaterialize(self: *InstTracking, function: *Self, inst: Air.Inst.Index, reg: Register) void { - assert(inst == function.register_manager.registers[ - RegisterManager.indexOfRegIntoTracked(reg).? - ]); - self.short = .{ .register = reg }; + fn trackMaterialize(self: *InstTracking, inst: Air.Inst.Index, target: InstTracking) void { + self.verifyMaterialize(target); + // Don't clobber reserved frame indices + self.long = if (target.long == .none) switch (self.long) { + .load_frame => |addr| .{ .reserved_frame = addr.index }, + .reserved_frame => self.long, + else => target.long, + } else target.long; + self.short = target.short; + tracking_log.debug("%{d} => {} (materialize)", .{ inst, self.* }); } - fn resurrect(self: *InstTracking, scope_generation: u32) void { + fn resurrect(self: *InstTracking, inst: Air.Inst.Index, scope_generation: u32) void { switch (self.short) { .dead => |die_generation| if (die_generation >= scope_generation) { - self.short = self.long; + self.reuseFrame(); + tracking_log.debug("%{d} => {} (resurrect)", .{ inst, self.* }); }, else => {}, } } - fn die(self: *InstTracking, function: *Self) void { + fn die(self: *InstTracking, function: *Self, inst: Air.Inst.Index) void { function.freeValue(self.short); - self.reuse(function); + self.short = .{ .dead = function.scope_generation }; + tracking_log.debug("%{d} => {} (death)", .{ inst, self.* }); } - fn reuse(self: *InstTracking, function: *Self) void { + fn reuse( + self: *InstTracking, + function: *Self, + new_inst: Air.Inst.Index, + old_inst: Air.Inst.Index, + ) void { self.short = .{ .dead = function.scope_generation }; + tracking_log.debug("%{d} => {} (reuse %{d})", .{ new_inst, self.*, old_inst }); + } + + pub fn format( + self: InstTracking, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, + ) @TypeOf(writer).Error!void { + if (!std.meta.eql(self.long, self.short)) try writer.print("|{}| ", .{self.long}); + try writer.print("{}", .{self.short}); + } +}; + +const FrameAlloc = struct { + abi_size: u31, + abi_align: u5, + ref_count: u16, + + fn init(alloc_abi: struct { size: u64, alignment: u32 }) FrameAlloc { + assert(math.isPowerOfTwo(alloc_abi.alignment)); + return .{ + .abi_size = @intCast(u31, alloc_abi.size), + .abi_align = math.log2_int(u32, alloc_abi.alignment), + .ref_count = 0, + }; + } + fn initType(ty: Type, target: Target) FrameAlloc { + return init(.{ .size = ty.abiSize(target), .alignment = ty.abiAlignment(target) }); } }; @@ -332,12 +596,13 @@ pub fn generate( .fn_type = fn_type, .arg_index = 0, .src_loc = src_loc, - .stack_align = undefined, .end_di_line = module_fn.rbrace_line, .end_di_column = module_fn.rbrace_column, }; defer { - function.stack.deinit(gpa); + function.frame_allocs.deinit(gpa); + function.free_frame_indices.deinit(gpa); + function.frame_locs.deinit(gpa); var block_it = function.blocks.valueIterator(); while (block_it.next()) |block| block.deinit(gpa); function.blocks.deinit(gpa); @@ -349,7 +614,23 @@ pub fn generate( if (builtin.mode == .Debug) function.mir_to_air_map.deinit(gpa); } - var call_info = function.resolveCallingConventionValues(fn_type, &.{}) catch |err| switch (err) { + try function.frame_allocs.resize(gpa, FrameIndex.named_count); + function.frame_allocs.set( + @enumToInt(FrameIndex.stack_frame), + FrameAlloc.init(.{ + .size = 0, + .alignment = if (mod.align_stack_fns.get(module_fn)) |set_align_stack| + set_align_stack.alignment + else + 1, + }), + ); + function.frame_allocs.set( + @enumToInt(FrameIndex.call_frame), + FrameAlloc.init(.{ .size = 0, .alignment = 1 }), + ); + + var call_info = function.resolveCallingConventionValues(fn_type, &.{}, .args_frame) catch |err| switch (err) { error.CodegenFail => return Result{ .fail = function.err_msg.? }, error.OutOfRegisters => return Result{ .fail = try ErrorMsg.create( @@ -365,8 +646,18 @@ pub fn generate( function.args = call_info.args; function.ret_mcv = call_info.return_value; - function.stack_align = call_info.stack_align; - function.max_end_stack = call_info.stack_byte_count; + function.frame_allocs.set(@enumToInt(FrameIndex.ret_addr), FrameAlloc.init(.{ + .size = Type.usize.abiSize(function.target.*), + .alignment = @min(Type.usize.abiAlignment(function.target.*), call_info.stack_align), + })); + function.frame_allocs.set(@enumToInt(FrameIndex.base_ptr), FrameAlloc.init(.{ + .size = Type.usize.abiSize(function.target.*), + .alignment = @min(Type.usize.abiAlignment(function.target.*) * 2, call_info.stack_align), + })); + function.frame_allocs.set( + @enumToInt(FrameIndex.args_frame), + FrameAlloc.init(.{ .size = call_info.stack_byte_count, .alignment = call_info.stack_align }), + ); function.gen() catch |err| switch (err) { error.CodegenFail => return Result{ .fail = function.err_msg.? }, @@ -379,6 +670,7 @@ pub fn generate( var mir = Mir{ .instructions = function.mir_instructions.toOwnedSlice(), .extra = try function.mir_extra.toOwnedSlice(bin_file.allocator), + .frame_locs = function.frame_locs.toOwnedSlice(), }; defer mir.deinit(bin_file.allocator); @@ -432,6 +724,7 @@ fn dumpWipMir(self: *Self, inst: Mir.Inst) !void { .mir = .{ .instructions = self.mir_instructions.slice(), .extra = self.mir_extra.items, + .frame_locs = (std.MultiArrayList(Mir.FrameLoc){}).slice(), }, .target = self.target, .src_loc = self.src_loc, @@ -458,6 +751,14 @@ fn dumpWipMir(self: *Self, inst: Mir.Inst) !void { } } +fn dumpTracking(self: *Self) !void { + if (!debug_tracking) return; + const stderr = std.io.getStdErr().writer(); + + var it = self.inst_tracking.iterator(); + while (it.next()) |entry| try stderr.print("%{d} = {}\n", .{ entry.key_ptr.*, entry.value_ptr.* }); +} + fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { const gpa = self.gpa; try self.mir_instructions.ensureUnusedCapacity(gpa, 1); @@ -550,6 +851,14 @@ fn asmJccReloc(self: *Self, target: Mir.Inst.Index, cc: bits.Condition) !Mir.Ins }); } +fn asmPlaceholder(self: *Self) !Mir.Inst.Index { + return self.addInst(.{ + .tag = .dead, + .ops = undefined, + .data = undefined, + }); +} + fn asmOpOnly(self: *Self, tag: Mir.Inst.Tag) !void { _ = try self.addInst(.{ .tag = tag, @@ -765,44 +1074,32 @@ fn gen(self: *Self) InnerError!void { const cc = self.fn_type.fnCallingConvention(); if (cc != .Naked) { try self.asmRegister(.push, .rbp); + const backpatch_push_callee_preserved_regs = try self.asmPlaceholder(); try self.asmRegisterRegister(.mov, .rbp, .rsp); - - // We want to subtract the aligned stack frame size from rsp here, but we don't - // yet know how big it will be, so we leave room for a 4-byte stack size. - // TODO During semantic analysis, check if there are no function calls. If there - // are none, here we can omit the part where we subtract and then add rsp. - const backpatch_stack_sub = try self.addInst(.{ - .tag = .dead, - .ops = undefined, - .data = undefined, - }); - - if (self.ret_mcv == .stack_offset) { - // The address where to store the return value for the caller is in a - // register which the callee is free to clobber. Therefore, we purposely - // spill it to stack immediately. - const stack_offset = mem.alignForwardGeneric(u32, self.next_stack_offset + 8, 8); - self.next_stack_offset = stack_offset; - self.max_end_stack = @max(self.max_end_stack, self.next_stack_offset); - - const ret_reg = abi.getCAbiIntParamRegs(self.target.*)[0]; - try self.genSetStack(Type.usize, @intCast(i32, stack_offset), MCValue{ .register = ret_reg }, .{}); - self.ret_mcv = MCValue{ .stack_offset = @intCast(i32, stack_offset) }; - log.debug("gen: spilling {s} to stack at offset {}", .{ @tagName(ret_reg), stack_offset }); + const backpatch_frame_align = try self.asmPlaceholder(); + const backpatch_stack_alloc = try self.asmPlaceholder(); + + switch (self.ret_mcv.long) { + .none, .unreach => {}, + .indirect => { + // The address where to store the return value for the caller is in a + // register which the callee is free to clobber. Therefore, we purposely + // spill it to stack immediately. + const frame_index = + try self.allocFrameIndex(FrameAlloc.initType(Type.usize, self.target.*)); + try self.genSetMem( + .{ .frame = frame_index }, + 0, + Type.usize, + self.ret_mcv.long.address().offset(-self.ret_mcv.short.indirect.off), + ); + self.ret_mcv.long = .{ .load_frame = .{ .index = frame_index } }; + tracking_log.debug("spill {} to {}", .{ self.ret_mcv.long, frame_index }); + }, + else => unreachable, } - _ = try self.addInst(.{ - .tag = .dbg_prologue_end, - .ops = undefined, - .data = undefined, - }); - - // Push callee-preserved regs that were used actually in use. - const backpatch_push_callee_preserved_regs = try self.addInst(.{ - .tag = .dead, - .ops = undefined, - .data = undefined, - }); + try self.asmOpOnly(.dbg_prologue_end); try self.genBody(self.air.getMainBody()); @@ -814,69 +1111,43 @@ fn gen(self: *Self) InnerError!void { // } // Eliding the reloc will cause a miscompilation in this case. for (self.exitlude_jump_relocs.items) |jmp_reloc| { - self.mir_instructions.items(.data)[jmp_reloc].inst = @intCast(u32, self.mir_instructions.len); + self.mir_instructions.items(.data)[jmp_reloc].inst = + @intCast(u32, self.mir_instructions.len); } - // Create list of registers to save in the prologue. - // TODO handle register classes - var reg_list = Mir.RegisterList{}; - const callee_preserved_regs = abi.getCalleePreservedRegs(self.target.*); - for (callee_preserved_regs) |reg| { - if (self.register_manager.isRegAllocated(reg)) { - reg_list.push(callee_preserved_regs, reg); - } - } - const saved_regs_stack_space: u32 = reg_list.count() * 8; - - // Pop saved callee-preserved regs. - const backpatch_pop_callee_preserved_regs = try self.addInst(.{ - .tag = .dead, - .ops = undefined, - .data = undefined, - }); - - _ = try self.addInst(.{ - .tag = .dbg_epilogue_begin, - .ops = undefined, - .data = undefined, - }); - - // Maybe add rsp, x if required. This is backpatched later. - const backpatch_stack_add = try self.addInst(.{ - .tag = .dead, - .ops = undefined, - .data = undefined, - }); - + try self.asmOpOnly(.dbg_epilogue_begin); + const backpatch_stack_dealloc = try self.asmPlaceholder(); + const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder(); try self.asmRegister(.pop, .rbp); try self.asmOpOnly(.ret); - // Adjust the stack - if (self.max_end_stack > math.maxInt(i32)) { - return self.failSymbol("too much stack used in call parameters", .{}); + const frame_layout = try self.computeFrameLayout(); + const need_frame_align = frame_layout.stack_mask != math.maxInt(u32); + const need_stack_adjust = frame_layout.stack_adjust > 0; + const need_save_reg = frame_layout.save_reg_list.count() > 0; + if (need_frame_align) { + self.mir_instructions.set(backpatch_frame_align, .{ + .tag = .@"and", + .ops = .ri_s, + .data = .{ .ri = .{ .r = .rsp, .i = frame_layout.stack_mask } }, + }); } - - const aligned_stack_end = @intCast( - u32, - mem.alignForward(self.max_end_stack + saved_regs_stack_space, self.stack_align), - ); - if (aligned_stack_end > 0) { - self.mir_instructions.set(backpatch_stack_sub, .{ + if (need_stack_adjust) { + self.mir_instructions.set(backpatch_stack_alloc, .{ .tag = .sub, - .ops = .ri_u, - .data = .{ .ri = .{ .r = .rsp, .i = aligned_stack_end } }, - }); - self.mir_instructions.set(backpatch_stack_add, .{ - .tag = .add, - .ops = .ri_u, - .data = .{ .ri = .{ .r = .rsp, .i = aligned_stack_end } }, + .ops = .ri_s, + .data = .{ .ri = .{ .r = .rsp, .i = frame_layout.stack_adjust } }, }); - - const save_reg_list = try self.addExtra(Mir.SaveRegisterList{ - .base_reg = @enumToInt(Register.rbp), - .register_list = reg_list.asInt(), - .stack_end = aligned_stack_end, + } + if (need_frame_align or need_stack_adjust) { + self.mir_instructions.set(backpatch_stack_dealloc, .{ + .tag = .mov, + .ops = .rr, + .data = .{ .rr = .{ .r1 = .rsp, .r2 = .rbp } }, }); + } + if (need_save_reg) { + const save_reg_list = frame_layout.save_reg_list.asInt(); self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{ .tag = .push_regs, .ops = undefined, @@ -889,30 +1160,19 @@ fn gen(self: *Self) InnerError!void { }); } } else { - _ = try self.addInst(.{ - .tag = .dbg_prologue_end, - .ops = undefined, - .data = undefined, - }); - + try self.asmOpOnly(.dbg_prologue_end); try self.genBody(self.air.getMainBody()); - - _ = try self.addInst(.{ - .tag = .dbg_epilogue_begin, - .ops = undefined, - .data = undefined, - }); + try self.asmOpOnly(.dbg_epilogue_begin); } // Drop them off at the rbrace. - const payload = try self.addExtra(Mir.DbgLineColumn{ - .line = self.end_di_line, - .column = self.end_di_column, - }); _ = try self.addInst(.{ .tag = .dbg_line, .ops = undefined, - .data = .{ .payload = payload }, + .data = .{ .line_column = .{ + .line = self.end_di_line, + .column = self.end_di_column, + } }, }); } @@ -932,6 +1192,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { self.air, self.liveness, ); + self.dumpTracking() catch {}; const old_air_bookkeeping = self.air_bookkeeping; try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1); @@ -1187,23 +1448,16 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { while (it.next()) |index| { const tracked_inst = self.register_manager.registers[index]; const tracking = self.getResolvedInstValue(tracked_inst); - assert(RegisterManager.indexOfRegIntoTracked(switch (tracking.short) { - .register => |reg| reg, - .register_overflow => |ro| ro.reg, - else => unreachable, - }).? == index); + assert(RegisterManager.indexOfRegIntoTracked(tracking.getReg().?).? == index); } } } } + self.dumpTracking() catch {}; } fn getValue(self: *Self, value: MCValue, inst: ?Air.Inst.Index) void { - const reg = switch (value) { - .register => |reg| reg, - .register_overflow => |ro| ro.reg, - else => return, - }; + const reg = value.getReg() orelse return; if (self.register_manager.isRegFree(reg)) { self.register_manager.getRegAssumeFree(reg, inst); } @@ -1214,8 +1468,11 @@ fn freeValue(self: *Self, value: MCValue) void { .register => |reg| { self.register_manager.freeReg(reg); }, - .register_overflow => |ro| { - self.register_manager.freeReg(ro.reg); + .register_offset => |reg_off| { + self.register_manager.freeReg(reg_off.reg); + }, + .register_overflow => |reg_ov| { + self.register_manager.freeReg(reg_ov.reg); self.eflags_inst = null; }, .eflags => { @@ -1231,10 +1488,10 @@ fn feed(self: *Self, bt: *Liveness.BigTomb, operand: Air.Inst.Ref) void { /// Asserts there is already capacity to insert into top branch inst_table. fn processDeath(self: *Self, inst: Air.Inst.Index) void { - const air_tags = self.air.instructions.items(.tag); - if (air_tags[inst] == .constant) return; - log.debug("%{d} => {}", .{ inst, MCValue.dead }); - self.inst_tracking.getPtr(inst).?.die(self); + switch (self.air.instructions.items(.tag)[inst]) { + .constant, .const_ty => unreachable, + else => self.inst_tracking.getPtr(inst).?.die(self, inst), + } } /// Called when there are no operands, and the instruction is always unreferenced. @@ -1249,7 +1506,7 @@ fn finishAirResult(self: *Self, inst: Air.Inst.Index, result: MCValue) void { .none, .dead, .unreach => {}, else => unreachable, // Why didn't the result die? } else { - log.debug("%{d} => {}", .{ inst, result }); + tracking_log.debug("%{d} => {} (birth)", .{ inst, result }); self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(result)); // In some cases, an operand may be reused as the result. // If that operand died and was a register, it was freed by @@ -1273,36 +1530,123 @@ fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Live self.finishAirResult(inst, result); } -fn allocMem(self: *Self, inst: ?Air.Inst.Index, abi_size: u32, abi_align: u32) !u32 { - if (abi_align > self.stack_align) - self.stack_align = abi_align; - // TODO find a free slot instead of always appending - const offset = mem.alignForwardGeneric(u32, self.next_stack_offset + abi_size, abi_align); - self.next_stack_offset = offset; - self.max_end_stack = @max(self.max_end_stack, self.next_stack_offset); - try self.stack.putNoClobber(self.gpa, offset, .{ - .inst = inst, - .size = abi_size, - }); - return offset; +const FrameLayout = struct { + stack_mask: u32, + stack_adjust: u32, + save_reg_list: Mir.RegisterList, +}; + +fn setFrameLoc( + self: *Self, + frame_index: FrameIndex, + base: Register, + offset: *i32, + comptime aligned: bool, +) void { + const frame_i = @enumToInt(frame_index); + if (aligned) { + const alignment = @as(i32, 1) << self.frame_allocs.items(.abi_align)[frame_i]; + offset.* = mem.alignForwardGeneric(i32, offset.*, alignment); + } + self.frame_locs.set(frame_i, .{ .base = base, .disp = offset.* }); + offset.* += self.frame_allocs.items(.abi_size)[frame_i]; +} + +fn computeFrameLayout(self: *Self) !FrameLayout { + const frame_allocs_len = self.frame_allocs.len; + try self.frame_locs.resize(self.gpa, frame_allocs_len); + const stack_frame_order = try self.gpa.alloc(FrameIndex, frame_allocs_len - FrameIndex.named_count); + defer self.gpa.free(stack_frame_order); + + const frame_size = self.frame_allocs.items(.abi_size); + const frame_align = self.frame_allocs.items(.abi_align); + const frame_offset = self.frame_locs.items(.disp); + + for (stack_frame_order, FrameIndex.named_count..) |*frame_order, frame_index| + frame_order.* = @intToEnum(FrameIndex, frame_index); + { + const SortContext = struct { + frame_align: @TypeOf(frame_align), + pub fn lessThan(context: @This(), lhs: FrameIndex, rhs: FrameIndex) bool { + return context.frame_align[@enumToInt(lhs)] > context.frame_align[@enumToInt(rhs)]; + } + }; + const sort_context = SortContext{ .frame_align = frame_align }; + std.sort.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan); + } + + const call_frame_align = frame_align[@enumToInt(FrameIndex.call_frame)]; + const stack_frame_align = frame_align[@enumToInt(FrameIndex.stack_frame)]; + const args_frame_align = frame_align[@enumToInt(FrameIndex.args_frame)]; + const needed_align = @max(call_frame_align, stack_frame_align); + const need_align_stack = needed_align > args_frame_align; + + // Create list of registers to save in the prologue. + // TODO handle register classes + var save_reg_list = Mir.RegisterList{}; + const callee_preserved_regs = abi.getCalleePreservedRegs(self.target.*); + for (callee_preserved_regs) |reg| { + if (self.register_manager.isRegAllocated(reg)) { + save_reg_list.push(callee_preserved_regs, reg); + } + } + + var rbp_offset = @intCast(i32, save_reg_list.count() * 8); + self.setFrameLoc(.base_ptr, .rbp, &rbp_offset, false); + self.setFrameLoc(.ret_addr, .rbp, &rbp_offset, false); + self.setFrameLoc(.args_frame, .rbp, &rbp_offset, false); + const stack_frame_align_offset = + if (need_align_stack) 0 else frame_offset[@enumToInt(FrameIndex.args_frame)]; + + var rsp_offset: i32 = 0; + self.setFrameLoc(.call_frame, .rsp, &rsp_offset, true); + self.setFrameLoc(.stack_frame, .rsp, &rsp_offset, true); + for (stack_frame_order) |frame_index| self.setFrameLoc(frame_index, .rsp, &rsp_offset, true); + rsp_offset += stack_frame_align_offset; + rsp_offset = mem.alignForwardGeneric(i32, rsp_offset, @as(i32, 1) << needed_align); + rsp_offset -= stack_frame_align_offset; + frame_size[@enumToInt(FrameIndex.call_frame)] = + @intCast(u31, rsp_offset - frame_offset[@enumToInt(FrameIndex.stack_frame)]); + + return .{ + .stack_mask = @as(u32, math.maxInt(u32)) << (if (need_align_stack) needed_align else 0), + .stack_adjust = @intCast(u32, rsp_offset - frame_offset[@enumToInt(FrameIndex.call_frame)]), + .save_reg_list = save_reg_list, + }; } -/// Use a pointer instruction as the basis for allocating stack memory. -fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !u32 { - const ptr_ty = self.air.typeOfIndex(inst); - const elem_ty = ptr_ty.elemType(); +fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex { + const frame_allocs_slice = self.frame_allocs.slice(); + const frame_size = frame_allocs_slice.items(.abi_size); + const frame_align = frame_allocs_slice.items(.abi_align); + for (self.free_frame_indices.keys(), 0..) |frame_index, free_i| { + const abi_size = frame_size[@enumToInt(frame_index)]; + if (abi_size != alloc.abi_size) continue; + const abi_align = &frame_align[@enumToInt(frame_index)]; + abi_align.* = @max(abi_align.*, alloc.abi_align); + + const stack_frame_align = &frame_align[@enumToInt(FrameIndex.stack_frame)]; + stack_frame_align.* = @max(stack_frame_align.*, alloc.abi_align); - if (!elem_ty.hasRuntimeBitsIgnoreComptime()) { - return self.allocMem(inst, @sizeOf(usize), @alignOf(usize)); + _ = self.free_frame_indices.swapRemoveAt(free_i); + return frame_index; } + const frame_index = @intToEnum(FrameIndex, self.frame_allocs.len); + try self.frame_allocs.append(self.gpa, alloc); + return frame_index; +} - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { - const mod = self.bin_file.options.module.?; - return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); - }; - // TODO swap this for inst.ty.ptrAlign - const abi_align = ptr_ty.ptrAlignment(self.target.*); - return self.allocMem(inst, abi_size, abi_align); +/// Use a pointer instruction as the basis for allocating stack memory. +fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !FrameIndex { + const ptr_ty = self.air.typeOfIndex(inst); + const val_ty = ptr_ty.childType(); + return self.allocFrameIndex(FrameAlloc.init(.{ + .size = math.cast(u32, val_ty.abiSize(self.target.*)) orelse { + const mod = self.bin_file.options.module.?; + return self.fail("type '{}' too big to fit into stack frame", .{val_ty.fmt(mod)}); + }, + .alignment = @max(ptr_ty.ptrAlignment(self.target.*), 1), + })); } fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { @@ -1320,42 +1664,35 @@ fn allocRegOrMemAdvanced(self: *Self, elem_ty: Type, inst: ?Air.Inst.Index, reg_ }; if (reg_ok) { - switch (elem_ty.zigTypeTag()) { - .Vector => return self.fail("TODO allocRegOrMem for Vector type", .{}), - .Float => { - if (intrinsicsAllowed(self.target.*, elem_ty)) { - const ptr_bytes: u64 = 32; - if (abi_size <= ptr_bytes) { - if (self.register_manager.tryAllocReg(inst, sse)) |reg| { - return MCValue{ .register = registerAlias(reg, abi_size) }; - } - } - } - - return self.fail("TODO allocRegOrMem for Float type without SSE/AVX support", .{}); - }, - else => { - // Make sure the type can fit in a register before we try to allocate one. - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - if (abi_size <= ptr_bytes) { - if (self.register_manager.tryAllocReg(inst, gp)) |reg| { - return MCValue{ .register = registerAlias(reg, abi_size) }; - } - } - }, + // Make sure the type can fit in a register before we try to allocate one. + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + if (abi_size <= ptr_bytes) { + if (self.register_manager.tryAllocReg(inst, try self.regClassForType(elem_ty))) |reg| { + return MCValue{ .register = registerAlias(reg, abi_size) }; + } } } - const abi_align = elem_ty.abiAlignment(self.target.*); - if (abi_align > self.stack_align) - self.stack_align = abi_align; - const stack_offset = try self.allocMem(inst, abi_size, abi_align); - return MCValue{ .stack_offset = @intCast(i32, stack_offset) }; + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(elem_ty, self.target.*)); + return .{ .load_frame = .{ .index = frame_index } }; +} + +fn regClassForType(self: *Self, ty: Type) !RegisterManager.RegisterBitSet { + return switch (ty.zigTypeTag()) { + .Vector => self.fail("TODO regClassForType for {}", .{ty.fmt(self.bin_file.options.module.?)}), + .Float => switch (ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) sse else gp, + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) sse else gp, + else => gp, + }, + else => gp, + }; } const State = struct { registers: RegisterManager.TrackedRegisters, + reg_tracking: [RegisterManager.RegisterBitSet.bit_length]InstTracking, free_registers: RegisterManager.RegisterBitSet, inst_tracking_len: u32, scope_generation: u32, @@ -1370,8 +1707,14 @@ fn initRetroactiveState(self: *Self) State { fn saveRetroactiveState(self: *Self, state: *State) !void { try self.spillEflagsIfOccupied(); - state.registers = self.register_manager.registers; - state.free_registers = self.register_manager.free_registers; + const free_registers = self.register_manager.free_registers; + var it = free_registers.iterator(.{ .kind = .unset }); + while (it.next()) |index| { + const tracked_inst = self.register_manager.registers[index]; + state.registers[index] = tracked_inst; + state.reg_tracking[index] = self.inst_tracking.get(tracked_inst).?; + } + state.free_registers = free_registers; } fn saveState(self: *Self) !State { @@ -1387,18 +1730,18 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt close_scope: bool, }) !void { if (opts.close_scope) { - for (self.inst_tracking.values()[state.inst_tracking_len..]) |*tracking| tracking.die(self); + for ( + self.inst_tracking.keys()[state.inst_tracking_len..], + self.inst_tracking.values()[state.inst_tracking_len..], + ) |inst, *tracking| tracking.die(self, inst); self.inst_tracking.shrinkRetainingCapacity(state.inst_tracking_len); } - if (opts.resurrect) for (self.inst_tracking.values()[0..state.inst_tracking_len]) |*tracking| - tracking.resurrect(state.scope_generation); - const air_tags = self.air.instructions.items(.tag); - for (deaths) |death| switch (air_tags[death]) { - .constant => {}, - .const_ty => unreachable, - else => self.inst_tracking.getPtr(death).?.die(self), - }; + if (opts.resurrect) for ( + self.inst_tracking.keys()[0..state.inst_tracking_len], + self.inst_tracking.values()[0..state.inst_tracking_len], + ) |inst, *tracking| tracking.resurrect(inst, state.scope_generation); + for (deaths) |death| self.processDeath(death); for (0..state.registers.len) |index| { const current_maybe_inst = if (self.register_manager.free_registers.isSet(index)) @@ -1411,26 +1754,34 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt state.registers[index]; if (std.debug.runtime_safety) if (target_maybe_inst) |target_inst| assert(self.inst_tracking.getIndex(target_inst).? < state.inst_tracking_len); - if (current_maybe_inst == target_maybe_inst) continue; - const reg = RegisterManager.regAtTrackedIndex( - @intCast(RegisterManager.RegisterBitSet.ShiftInt, index), - ); if (opts.emit_instructions) { if (current_maybe_inst) |current_inst| { try self.inst_tracking.getPtr(current_inst).?.spill(self, current_inst); } if (target_maybe_inst) |target_inst| { - try self.inst_tracking.getPtr(target_inst).?.materialize(self, target_inst, reg); + try self.inst_tracking.getPtr(target_inst).?.materialize( + self, + target_inst, + state.reg_tracking[index], + ); } } if (opts.update_tracking) { if (current_maybe_inst) |current_inst| { - self.inst_tracking.getPtr(current_inst).?.trackSpill(self); + self.inst_tracking.getPtr(current_inst).?.trackSpill(self, current_inst); + } + { + const reg = RegisterManager.regAtTrackedIndex( + @intCast(RegisterManager.RegisterBitSet.ShiftInt, index), + ); + self.register_manager.freeReg(reg); + self.register_manager.getRegAssumeFree(reg, target_maybe_inst); } - self.register_manager.freeReg(reg); - self.register_manager.getRegAssumeFree(reg, target_maybe_inst); if (target_maybe_inst) |target_inst| { - self.inst_tracking.getPtr(target_inst).?.trackMaterialize(self, target_inst, reg); + self.inst_tracking.getPtr(target_inst).?.trackMaterialize( + target_inst, + state.reg_tracking[index], + ); } } } @@ -1438,7 +1789,7 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt try self.inst_tracking.getPtr(inst).?.spill(self, inst); if (opts.update_tracking) if (self.eflags_inst) |inst| { self.eflags_inst = null; - self.inst_tracking.getPtr(inst).?.trackSpill(self); + self.inst_tracking.getPtr(inst).?.trackSpill(self, inst); }; if (opts.update_tracking and std.debug.runtime_safety) { @@ -1452,9 +1803,9 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void { const tracking = self.inst_tracking.getPtr(inst).?; - assert(tracking.getReg().?.to64() == reg.to64()); + assert(tracking.getReg().?.id() == reg.id()); try tracking.spill(self, inst); - tracking.trackSpill(self); + tracking.trackSpill(self, inst); } pub fn spillEflagsIfOccupied(self: *Self) !void { @@ -1463,7 +1814,7 @@ pub fn spillEflagsIfOccupied(self: *Self) !void { const tracking = self.inst_tracking.getPtr(inst).?; assert(tracking.getCondition() != null); try tracking.spill(self, inst); - tracking.trackSpill(self); + tracking.trackSpill(self, inst); } } @@ -1477,15 +1828,8 @@ pub fn spillRegisters(self: *Self, registers: []const Register) !void { /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) { - .Float => blk: { - if (intrinsicsAllowed(self.target.*, ty)) break :blk sse; - return self.fail("TODO copy {} to register", .{ty.fmtDebug()}); - }, - else => gp, - }; - const reg: Register = try self.register_manager.allocReg(null, reg_class); - try self.genSetReg(ty, reg, mcv); + const reg: Register = try self.register_manager.allocReg(null, try self.regClassForType(ty)); + try self.genSetReg(reg, ty, mcv); return reg; } @@ -1493,28 +1837,35 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { /// `reg_owner` is the instruction that gets associated with the register in the register table. /// This can have a side effect of spilling instructions to the stack to free up a register. /// WARNING make sure that the allocated register matches the returned MCValue from an instruction! -fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue { - const reg_class: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) { - .Float => blk: { - if (intrinsicsAllowed(self.target.*, ty)) break :blk sse; - return self.fail("TODO copy {} to register", .{ty.fmtDebug()}); - }, - else => gp, - }; - const reg: Register = try self.register_manager.allocReg(reg_owner, reg_class); - try self.genSetReg(ty, reg, mcv); +fn copyToRegisterWithInstTracking( + self: *Self, + reg_owner: Air.Inst.Index, + ty: Type, + mcv: MCValue, +) !MCValue { + const reg: Register = try self.register_manager.allocReg(reg_owner, try self.regClassForType(ty)); + try self.genSetReg(reg, ty, mcv); return MCValue{ .register = reg }; } fn airAlloc(self: *Self, inst: Air.Inst.Index) !void { - const stack_offset = try self.allocMemPtr(inst); - const result = MCValue{ .ptr_stack_offset = @intCast(i32, stack_offset) }; + const result = MCValue{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } }; return self.finishAir(inst, result, .{ .none, .none, .none }); } fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { - const stack_offset = try self.allocMemPtr(inst); - const result = MCValue{ .ptr_stack_offset = @intCast(i32, stack_offset) }; + const result: MCValue = switch (self.ret_mcv.long) { + else => unreachable, + .none => .{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } }, + .load_frame => .{ .register_offset = .{ + .reg = (try self.copyToRegisterWithInstTracking( + inst, + self.air.typeOfIndex(inst), + self.ret_mcv.long, + )).register, + .off = self.ret_mcv.short.indirect.off, + } }, + }; return self.finishAir(inst, result, .{ .none, .none, .none }); } @@ -1577,12 +1928,14 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { registerAlias(src_reg, min_abi_size), ); }, - .stack_offset => |src_off| { - try self.asmRegisterMemory(tag, dst_alias, Memory.sib( - Memory.PtrSize.fromSize(min_abi_size), - .{ .base = .rbp, .disp = -src_off }, - )); - }, + .load_frame => |frame_addr| try self.asmRegisterMemory( + tag, + dst_alias, + Memory.sib(Memory.PtrSize.fromSize(min_abi_size), .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }), + ), else => return self.fail("TODO airIntCast from {s} to {s}", .{ @tagName(src_mcv), @tagName(dst_mcv), @@ -1591,7 +1944,7 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { if (self.regExtraBits(min_ty) > 0) try self.truncateRegister(min_ty, dst_reg); }, else => { - try self.setRegOrMem(min_ty, dst_mcv, src_mcv); + try self.genCopy(min_ty, dst_mcv, src_mcv); const extra = dst_abi_size * 8 - dst_int_info.bits; if (extra > 0) { try self.genShiftBinOpMir(switch (signedness) { @@ -1653,16 +2006,22 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; + const slice_ty = self.air.typeOfIndex(inst); const ptr = try self.resolveInst(bin_op.lhs); const ptr_ty = self.air.typeOf(bin_op.lhs); const len = try self.resolveInst(bin_op.rhs); const len_ty = self.air.typeOf(bin_op.rhs); - const stack_offset = @intCast(i32, try self.allocMem(inst, 16, 16)); - try self.genSetStack(ptr_ty, stack_offset, ptr, .{}); - try self.genSetStack(len_ty, stack_offset - 8, len, .{}); - const result = MCValue{ .stack_offset = stack_offset }; + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, self.target.*)); + try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr); + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, ptr_ty.abiSize(self.target.*)), + len_ty, + len, + ); + const result = MCValue{ .load_frame = .{ .index = frame_index } }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -1778,14 +2137,14 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { const reg_bits = self.regBitSize(ty); const cc: Condition = if (ty.isSignedInt()) cc: { - try self.genSetReg(ty, limit_reg, dst_mcv); + try self.genSetReg(limit_reg, ty, dst_mcv); try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); try self.genBinOpMir(.xor, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1, }); break :cc .o; } else cc: { - try self.genSetReg(ty, limit_reg, .{ + try self.genSetReg(limit_reg, ty, .{ .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - reg_bits), }); break :cc .c; @@ -1829,14 +2188,14 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { const reg_bits = self.regBitSize(ty); const cc: Condition = if (ty.isSignedInt()) cc: { - try self.genSetReg(ty, limit_reg, dst_mcv); + try self.genSetReg(limit_reg, ty, dst_mcv); try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); try self.genBinOpMir(.xor, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1, }); break :cc .o; } else cc: { - try self.genSetReg(ty, limit_reg, .{ .immediate = 0 }); + try self.genSetReg(limit_reg, ty, .{ .immediate = 0 }); break :cc .c; }; try self.genBinOpMir(.sub, ty, dst_mcv, rhs_mcv); @@ -1880,7 +2239,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { const reg_bits = self.regBitSize(ty); const cc: Condition = if (ty.isSignedInt()) cc: { - try self.genSetReg(ty, limit_reg, lhs_mcv); + try self.genSetReg(limit_reg, ty, lhs_mcv); try self.genBinOpMir(.xor, ty, limit_mcv, rhs_mcv); try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); try self.genBinOpMir(.xor, ty, limit_mcv, .{ @@ -1888,7 +2247,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { }); break :cc .o; } else cc: { - try self.genSetReg(ty, limit_reg, .{ + try self.genSetReg(limit_reg, ty, .{ .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - reg_bits), }); break :cc .c; @@ -1916,17 +2275,18 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { .Int => { try self.spillEflagsIfOccupied(); - const partial_mcv = switch (tag) { - .add_with_overflow => try self.genBinOp(null, .add, bin_op.lhs, bin_op.rhs), - .sub_with_overflow => try self.genBinOp(null, .sub, bin_op.lhs, bin_op.rhs), + const partial_mcv = try self.genBinOp(null, switch (tag) { + .add_with_overflow => .add, + .sub_with_overflow => .sub, else => unreachable, - }; + }, bin_op.lhs, bin_op.rhs); const int_info = ty.intInfo(self.target.*); const cc: Condition = switch (int_info.signedness) { .unsigned => .c, .signed => .o, }; + const tuple_ty = self.air.typeOfIndex(inst); if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) { switch (partial_mcv) { .register => |reg| { @@ -1936,27 +2296,32 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { else => {}, } - const abi_size = @intCast(i32, ty.abiSize(self.target.*)); - const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genSetStack( + const frame_index = + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), Type.u1, - dst_mcv.stack_offset - abi_size, .{ .eflags = cc }, - .{}, ); - try self.genSetStack(ty, dst_mcv.stack_offset, partial_mcv, .{}); - break :result dst_mcv; + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)), + ty, + partial_mcv, + ); + break :result .{ .load_frame = .{ .index = frame_index } }; } - const tuple_ty = self.air.typeOfIndex(inst); - const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*)); - const tuple_align = tuple_ty.abiAlignment(self.target.*); - const overflow_bit_offset = @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)); - const stack_offset = @intCast(i32, try self.allocMem(inst, tuple_size, tuple_align)); - - try self.genSetStackTruncatedOverflowCompare(ty, stack_offset, overflow_bit_offset, partial_mcv.register, cc); - - break :result .{ .stack_offset = stack_offset }; + const frame_index = + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); + try self.genSetFrameTruncatedOverflowCompare( + tuple_ty, + frame_index, + partial_mcv.register, + cc, + ); + break :result .{ .load_frame = .{ .index = frame_index } }; }, else => unreachable, } @@ -1998,6 +2363,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { try self.genBinOpMir(.cmp, lhs_ty, tmp_mcv, lhs); const cc = Condition.ne; + const tuple_ty = self.air.typeOfIndex(inst); if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) { switch (partial_mcv) { .register => |reg| { @@ -2007,27 +2373,32 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { else => {}, } - const abi_size = @intCast(i32, lhs_ty.abiSize(self.target.*)); - const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genSetStack( - Type.u1, - dst_mcv.stack_offset - abi_size, + const frame_index = + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), + tuple_ty.structFieldType(1), .{ .eflags = cc }, - .{}, ); - try self.genSetStack(lhs_ty, dst_mcv.stack_offset, partial_mcv, .{}); - break :result dst_mcv; + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)), + tuple_ty.structFieldType(0), + partial_mcv, + ); + break :result .{ .load_frame = .{ .index = frame_index } }; } - const tuple_ty = self.air.typeOfIndex(inst); - const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*)); - const tuple_align = tuple_ty.abiAlignment(self.target.*); - const overflow_bit_offset = @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)); - const stack_offset = @intCast(i32, try self.allocMem(inst, tuple_size, tuple_align)); - - try self.genSetStackTruncatedOverflowCompare(lhs_ty, stack_offset, overflow_bit_offset, partial_mcv.register, cc); - - break :result .{ .stack_offset = stack_offset }; + const frame_index = + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); + try self.genSetFrameTruncatedOverflowCompare( + tuple_ty, + frame_index, + partial_mcv.register, + cc, + ); + break :result .{ .load_frame = .{ .index = frame_index } }; }, else => unreachable, } @@ -2035,17 +2406,17 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } -fn genSetStackTruncatedOverflowCompare( +fn genSetFrameTruncatedOverflowCompare( self: *Self, - ty: Type, - stack_offset: i32, - overflow_bit_offset: i32, + tuple_ty: Type, + frame_index: FrameIndex, reg: Register, cc: Condition, ) !void { const reg_lock = self.register_manager.lockReg(reg); defer if (reg_lock) |lock| self.register_manager.unlockReg(lock); + const ty = tuple_ty.structFieldType(0); const int_info = ty.intInfo(self.target.*); const extended_ty = switch (int_info.signedness) { .signed => Type.isize, @@ -2062,7 +2433,7 @@ fn genSetStackTruncatedOverflowCompare( try self.asmSetccRegister(overflow_reg.to8(), cc); const scratch_reg = temp_regs[1]; - try self.genSetReg(extended_ty, scratch_reg, .{ .register = reg }); + try self.genSetReg(scratch_reg, extended_ty, .{ .register = reg }); try self.truncateRegister(ty, scratch_reg); try self.genBinOpMir( .cmp, @@ -2080,10 +2451,18 @@ fn genSetStackTruncatedOverflowCompare( .{ .register = eq_reg }, ); - try self.genSetStack(ty, stack_offset, .{ .register = scratch_reg }, .{}); - try self.genSetStack(Type.u1, stack_offset - overflow_bit_offset, .{ - .register = overflow_reg.to8(), - }, .{}); + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), + tuple_ty.structFieldType(1), + .{ .register = overflow_reg.to8() }, + ); + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)), + ty, + .{ .register = scratch_reg }, + ); } fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { @@ -2101,6 +2480,8 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { .unsigned => .c, .signed => .o, }; + + const tuple_ty = self.air.typeOfIndex(inst); if (dst_info.bits >= 8 and math.isPowerOfTwo(dst_info.bits)) { var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) { .signed => .int_signed, @@ -2127,16 +2508,21 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { // For now, this is the only supported multiply that doesn't fit in a register. assert(dst_info.bits == 128 and src_pl.data == 64); - const dst_abi_size = @intCast(i32, dst_ty.abiSize(self.target.*)); - const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genSetStack( - Type.u1, - dst_mcv.stack_offset - dst_abi_size, - .{ .immediate = 0 }, // 64x64 -> 128 never overflows - .{}, + const frame_index = + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), + tuple_ty.structFieldType(1), + .{ .immediate = 0 }, // overflow is impossible for 64-bit*64-bit -> 128-bit ); - try self.genSetStack(dst_ty, dst_mcv.stack_offset, partial_mcv, .{}); - break :result dst_mcv; + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)), + tuple_ty.structFieldType(0), + partial_mcv, + ); + break :result .{ .load_frame = .{ .index = frame_index } }; } const dst_reg: Register = dst_reg: { @@ -2184,15 +2570,10 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } }; - const tuple_ty = self.air.typeOfIndex(inst); - const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*)); - const tuple_align = tuple_ty.abiAlignment(self.target.*); - const overflow_bit_offset = @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)); - const stack_offset = @intCast(i32, try self.allocMem(inst, tuple_size, tuple_align)); - - try self.genSetStackTruncatedOverflowCompare(dst_ty, stack_offset, overflow_bit_offset, dst_reg, cc); - - break :result .{ .stack_offset = stack_offset }; + const frame_index = + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); + try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, dst_reg, cc); + break :result .{ .load_frame = .{ .index = frame_index } }; }, else => unreachable, } @@ -2215,31 +2596,40 @@ fn genIntMulDivOpMir( return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{}); } - lhs: { - switch (lhs) { - .register => |reg| if (reg.to64() == .rax) break :lhs, - else => {}, - } - try self.genSetReg(ty, .rax, lhs); - } - + try self.genSetReg(.rax, ty, lhs); switch (tag) { else => unreachable, .mul, .imul => {}, .div => try self.asmRegisterRegister(.xor, .edx, .edx), - .idiv => try self.asmOpOnly(.cqo), + .idiv => switch (self.regBitSize(ty)) { + 8 => try self.asmOpOnly(.cbw), + 16 => try self.asmOpOnly(.cwd), + 32 => try self.asmOpOnly(.cdq), + 64 => try self.asmOpOnly(.cqo), + else => unreachable, + }, } - const factor: MCValue = switch (rhs) { - .register, .stack_offset => rhs, + const mat_rhs: MCValue = switch (rhs) { + .register, .indirect, .load_frame => rhs, else => .{ .register = try self.copyToTmpRegister(ty, rhs) }, }; - switch (factor) { - .register => |reg| try self.asmRegister(tag, reg), - .stack_offset => |off| try self.asmMemory(tag, Memory.sib( - Memory.PtrSize.fromSize(abi_size), - .{ .base = .rbp, .disp = -off }, - )), + switch (mat_rhs) { + .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)), + .indirect, .load_frame => try self.asmMemory( + tag, + Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (mat_rhs) { + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }, + else => unreachable, + }), + ), else => unreachable, } } @@ -2247,7 +2637,8 @@ fn genIntMulDivOpMir( /// Always returns a register. /// Clobbers .rax and .rdx registers. fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue { - const signedness = ty.intInfo(self.target.*).signedness; + const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const int_info = ty.intInfo(self.target.*); const dividend: Register = switch (lhs) { .register => |reg| reg, else => try self.copyToTmpRegister(ty, lhs), @@ -2262,16 +2653,32 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa const divisor_lock = self.register_manager.lockReg(divisor); defer if (divisor_lock) |lock| self.register_manager.unlockReg(lock); - try self.genIntMulDivOpMir(switch (signedness) { + try self.genIntMulDivOpMir(switch (int_info.signedness) { .signed => .idiv, .unsigned => .div, - }, Type.isize, .{ .register = dividend }, .{ .register = divisor }); + }, ty, .{ .register = dividend }, .{ .register = divisor }); - try self.asmRegisterRegister(.xor, divisor.to64(), dividend.to64()); - try self.asmRegisterImmediate(.sar, divisor.to64(), Immediate.u(63)); - try self.asmRegisterRegister(.@"test", .rdx, .rdx); - try self.asmCmovccRegisterRegister(divisor.to64(), .rdx, .e); - try self.genBinOpMir(.add, Type.isize, .{ .register = divisor }, .{ .register = .rax }); + try self.asmRegisterRegister( + .xor, + registerAlias(divisor, abi_size), + registerAlias(dividend, abi_size), + ); + try self.asmRegisterImmediate( + .sar, + registerAlias(divisor, abi_size), + Immediate.u(int_info.bits - 1), + ); + try self.asmRegisterRegister( + .@"test", + registerAlias(.rdx, abi_size), + registerAlias(.rdx, abi_size), + ); + try self.asmCmovccRegisterRegister( + registerAlias(divisor, abi_size), + registerAlias(.rdx, abi_size), + .z, + ); + try self.genBinOpMir(.add, ty, .{ .register = divisor }, .{ .register = .rax }); return MCValue{ .register = divisor }; } @@ -2315,7 +2722,7 @@ fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void { } const pl_mcv = try self.allocRegOrMem(inst, true); - try self.setRegOrMem(pl_ty, pl_mcv, switch (opt_mcv) { + try self.genCopy(pl_ty, pl_mcv, switch (opt_mcv) { else => opt_mcv, .register_overflow => |ro| .{ .register = ro.reg }, }); @@ -2361,11 +2768,7 @@ fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { const pl_ty = dst_ty.childType(); const pl_abi_size = @intCast(i32, pl_ty.abiSize(self.target.*)); - try self.asmMemoryImmediate( - .mov, - Memory.sib(.byte, .{ .base = dst_mcv.register, .disp = pl_abi_size }), - Immediate.u(1), - ); + try self.genSetMem(.{ .reg = dst_mcv.register }, pl_abi_size, Type.bool, .{ .immediate = 1 }); break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -2389,10 +2792,6 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { const err_off = errUnionErrorOffset(payload_ty, self.target.*); switch (operand) { - .stack_offset => |off| { - const offset = off - @intCast(i32, err_off); - break :result MCValue{ .stack_offset = offset }; - }, .register => |reg| { // TODO reuse operand const eu_lock = self.register_manager.lockReg(reg); @@ -2407,6 +2806,10 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { } break :result result; }, + .load_frame => |frame_addr| break :result .{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + @intCast(i32, err_off), + } }, else => return self.fail("TODO implement unwrap_err_err for {}", .{operand}), } }; @@ -2434,10 +2837,10 @@ fn genUnwrapErrorUnionPayloadMir( const payload_off = errUnionPayloadOffset(payload_ty, self.target.*); switch (err_union) { - .stack_offset => |off| { - const offset = off - @intCast(i32, payload_off); - break :result MCValue{ .stack_offset = offset }; - }, + .load_frame => |frame_addr| break :result .{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + @intCast(i32, payload_off), + } }, .register => |reg| { // TODO reuse operand const eu_lock = self.register_manager.lockReg(reg); @@ -2488,7 +2891,10 @@ fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterMemory( .mov, registerAlias(dst_reg, err_abi_size), - Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ .base = src_reg, .disp = err_off }), + Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ + .base = .{ .reg = src_reg }, + .disp = err_off, + }), ); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); @@ -2523,7 +2929,7 @@ fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterMemory( .lea, registerAlias(dst_reg, dst_abi_size), - Memory.sib(.qword, .{ .base = src_reg, .disp = pl_off }), + Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }), ); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); @@ -2548,7 +2954,10 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { const err_abi_size = @intCast(u32, err_ty.abiSize(self.target.*)); try self.asmMemoryImmediate( .mov, - Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ .base = src_reg, .disp = err_off }), + Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ + .base = .{ .reg = src_reg }, + .disp = err_off, + }), Immediate.u(0), ); @@ -2567,7 +2976,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterMemory( .lea, registerAlias(dst_reg, dst_abi_size), - Memory.sib(.qword, .{ .base = src_reg, .disp = pl_off }), + Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }), ); break :result .{ .register = dst_reg }; }; @@ -2608,7 +3017,7 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { defer if (pl_lock) |lock| self.register_manager.unlockReg(lock); const opt_mcv = try self.allocRegOrMem(inst, true); - try self.setRegOrMem(pl_ty, opt_mcv, pl_mcv); + try self.genCopy(pl_ty, opt_mcv, pl_mcv); if (!same_repr) { const pl_abi_size = @intCast(i32, pl_ty.abiSize(self.target.*)); @@ -2621,9 +3030,12 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { Immediate.u(@intCast(u6, pl_abi_size * 8)), ), - .stack_offset => |off| try self.asmMemoryImmediate( + .load_frame => |frame_addr| try self.asmMemoryImmediate( .mov, - Memory.sib(.byte, .{ .base = .rbp, .disp = pl_abi_size - off }), + Memory.sib(.byte, .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off + pl_abi_size, + }), Immediate.u(1), ), } @@ -2637,26 +3049,21 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const error_union_ty = self.air.getRefType(ty_op.ty); - const payload_ty = error_union_ty.errorUnionPayload(); + const eu_ty = self.air.getRefType(ty_op.ty); + const pl_ty = eu_ty.errorUnionPayload(); + const err_ty = eu_ty.errorUnionSet(); const operand = try self.resolveInst(ty_op.operand); const result: MCValue = result: { - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { - break :result operand; - } - - const abi_size = @intCast(u32, error_union_ty.abiSize(self.target.*)); - const abi_align = error_union_ty.abiAlignment(self.target.*); - const stack_offset = @intCast(i32, try self.allocMem(inst, abi_size, abi_align)); - const payload_off = errUnionPayloadOffset(payload_ty, self.target.*); - const err_off = errUnionErrorOffset(payload_ty, self.target.*); - try self.genSetStack(payload_ty, stack_offset - @intCast(i32, payload_off), operand, .{}); - try self.genSetStack(Type.anyerror, stack_offset - @intCast(i32, err_off), .{ .immediate = 0 }, .{}); + if (!pl_ty.hasRuntimeBitsIgnoreComptime()) break :result .{ .immediate = 0 }; - break :result MCValue{ .stack_offset = stack_offset }; + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, self.target.*)); + const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*)); + const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*)); + try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand); + try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, .{ .immediate = 0 }); + break :result .{ .load_frame = .{ .index = frame_index } }; }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -2664,26 +3071,21 @@ fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const error_union_ty = self.air.getRefType(ty_op.ty); - const payload_ty = error_union_ty.errorUnionPayload(); - const operand = try self.resolveInst(ty_op.operand); + const eu_ty = self.air.getRefType(ty_op.ty); + const pl_ty = eu_ty.errorUnionPayload(); + const err_ty = eu_ty.errorUnionSet(); const result: MCValue = result: { - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { - break :result operand; - } - - const abi_size = @intCast(u32, error_union_ty.abiSize(self.target.*)); - const abi_align = error_union_ty.abiAlignment(self.target.*); - const stack_offset = @intCast(i32, try self.allocMem(inst, abi_size, abi_align)); - const payload_off = errUnionPayloadOffset(payload_ty, self.target.*); - const err_off = errUnionErrorOffset(payload_ty, self.target.*); - try self.genSetStack(Type.anyerror, stack_offset - @intCast(i32, err_off), operand, .{}); - try self.genSetStack(payload_ty, stack_offset - @intCast(i32, payload_off), .undef, .{}); + if (!pl_ty.hasRuntimeBitsIgnoreComptime()) break :result try self.resolveInst(ty_op.operand); - break :result MCValue{ .stack_offset = stack_offset }; + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, self.target.*)); + const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*)); + const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*)); + try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef); + const operand = try self.resolveInst(ty_op.operand); + try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, operand); + break :result .{ .load_frame = .{ .index = frame_index } }; }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -2695,7 +3097,7 @@ fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, true); const dst_ty = self.air.typeOfIndex(inst); - try self.setRegOrMem(dst_ty, dst_mcv, src_mcv); + try self.genCopy(dst_ty, dst_mcv, src_mcv); break :result dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -2707,9 +3109,10 @@ fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void { const operand = try self.resolveInst(ty_op.operand); const dst_mcv: MCValue = blk: { switch (operand) { - .stack_offset => |off| { - break :blk MCValue{ .stack_offset = off - 8 }; - }, + .load_frame => |frame_addr| break :blk .{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + 8, + } }, else => return self.fail("TODO implement slice_len for {}", .{operand}), } }; @@ -2743,7 +3146,7 @@ fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { .lea, registerAlias(dst_reg, dst_abi_size), Memory.sib(.qword, .{ - .base = src_reg, + .base = .{ .reg = src_reg }, .disp = @divExact(self.target.cpu.arch.ptrBitWidth(), 8), }), ); @@ -2771,7 +3174,7 @@ fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Regi // Optimisation: if index MCValue is an immediate, we can multiply in `comptime` // and set the register directly to the scaled offset as an immediate. const reg = try self.register_manager.allocReg(null, gp); - try self.genSetReg(index_ty, reg, .{ .immediate = imm * elem_size }); + try self.genSetReg(reg, index_ty, .{ .immediate = imm * elem_size }); break :blk reg; }, else => { @@ -2812,10 +3215,14 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { const addr_reg = try self.register_manager.allocReg(null, gp); switch (slice_mcv) { - .stack_offset => |off| try self.asmRegisterMemory(.mov, addr_reg.to64(), Memory.sib(.qword, .{ - .base = .rbp, - .disp = -off, - })), + .load_frame => |frame_addr| try self.asmRegisterMemory( + .mov, + addr_reg.to64(), + Memory.sib(.qword, .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }), + ), else => return self.fail("TODO implement slice_elem_ptr when slice is {}", .{slice_mcv}), } // TODO we could allocate register here, but need to expect addr register and potentially @@ -2834,7 +3241,7 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf); const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs); const dst_mcv = try self.allocRegOrMem(inst, false); - try self.load(dst_mcv, elem_ptr, slice_ptr_field_type); + try self.load(dst_mcv, slice_ptr_field_type, elem_ptr); return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -2872,33 +3279,27 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); defer self.register_manager.unlockReg(offset_reg_lock); - const addr_reg = (try self.register_manager.allocReg(null, gp)).to64(); + const addr_reg = try self.register_manager.allocReg(null, gp); switch (array) { .register => { - const off = @intCast(i32, try self.allocMem( - inst, - @intCast(u32, array_ty.abiSize(self.target.*)), - array_ty.abiAlignment(self.target.*), - )); - try self.genSetStack(array_ty, off, array, .{}); - try self.asmRegisterMemory(.lea, addr_reg, Memory.sib(.qword, .{ - .base = .rbp, - .disp = -off, - })); - }, - .stack_offset => |off| { - try self.asmRegisterMemory(.lea, addr_reg, Memory.sib(.qword, .{ - .base = .rbp, - .disp = -off, - })); + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, self.target.*)); + try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array); + try self.asmRegisterMemory( + .lea, + addr_reg, + Memory.sib(.qword, .{ .base = .{ .frame = frame_index } }), + ); }, - .load_got => try self.genSetReg(array_ty, addr_reg, array), - .memory, .load_direct, .load_tlv => try self.genSetReg(Type.usize, addr_reg, switch (array) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => unreachable, - }), + .load_frame => |frame_addr| try self.asmRegisterMemory( + .lea, + addr_reg, + Memory.sib(.qword, .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off }), + ), + .memory, + .load_direct, + .load_got, + .load_tlv, + => try self.genSetReg(addr_reg, Type.usize, array.address()), .lea_direct, .lea_tlv => unreachable, else => return self.fail("TODO implement array_elem_val when array is {}", .{array}), } @@ -2907,7 +3308,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { // offset register. const dst_mcv = try self.allocRegOrMem(inst, false); try self.genBinOpMir(.add, Type.usize, .{ .register = addr_reg }, .{ .register = offset_reg }); - try self.load(dst_mcv, .{ .register = addr_reg.to64() }, array_ty); + try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }); return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -2948,7 +3349,7 @@ fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { else => null, }; defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - try self.load(dst_mcv, .{ .register = elem_ptr_reg }, ptr_ty); + try self.load(dst_mcv, ptr_ty, .{ .register = elem_ptr_reg }); return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -2987,8 +3388,8 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const ptr_ty = self.air.typeOf(bin_op.lhs); - const union_ty = ptr_ty.childType(); + const ptr_union_ty = self.air.typeOf(bin_op.lhs); + const union_ty = ptr_union_ty.childType(); const tag_ty = self.air.typeOf(bin_op.rhs); const layout = union_ty.unionGetLayout(self.target.*); @@ -3012,12 +3413,15 @@ fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void { const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align < layout.payload_align) blk: { // TODO reusing the operand - const reg = try self.copyToTmpRegister(ptr_ty, ptr); - try self.genBinOpMir(.add, ptr_ty, .{ .register = reg }, .{ .immediate = layout.payload_size }); + const reg = try self.copyToTmpRegister(ptr_union_ty, ptr); + try self.genBinOpMir(.add, ptr_union_ty, .{ .register = reg }, .{ .immediate = layout.payload_size }); break :blk MCValue{ .register = reg }; } else ptr; - try self.store(adjusted_ptr, tag, ptr_ty, tag_ty); + var ptr_tag_pl = ptr_union_ty.ptrInfo(); + ptr_tag_pl.data.pointee_type = tag_ty; + const ptr_tag_ty = Type.initPayload(&ptr_tag_pl.base); + try self.store(ptr_tag_ty, adjusted_ptr, tag); return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -3044,11 +3448,14 @@ fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void { const tag_abi_size = tag_ty.abiSize(self.target.*); const dst_mcv: MCValue = blk: { switch (operand) { - .stack_offset => |off| { + .load_frame => |frame_addr| { if (tag_abi_size <= 8) { - const offset: i32 = if (layout.tag_align < layout.payload_align) @intCast(i32, layout.payload_size) else 0; + const off: i32 = if (layout.tag_align < layout.payload_align) + @intCast(i32, layout.payload_size) + else + 0; break :blk try self.copyToRegisterWithInstTracking(inst, tag_ty, .{ - .stack_offset = off - offset, + .load_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off }, }); } @@ -3131,7 +3538,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { .nz, ); - try self.genSetReg(dst_ty, dst_reg, .{ .immediate = src_bits - 1 }); + try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 }); try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .register = imm_reg }); } break :result dst_mcv; @@ -3171,7 +3578,7 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { }; const tmp_mcv = tmp: { if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) break :tmp src_mcv; - try self.genSetReg(src_ty, dst_reg, src_mcv); + try self.genSetReg(dst_reg, src_ty, src_mcv); break :tmp dst_mcv; }; try self.genBinOpMir(.@"or", src_ty, tmp_mcv, mask_mcv); @@ -3345,7 +3752,7 @@ fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, m const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register); defer self.register_manager.unlockReg(dst_lock); - try self.genSetReg(src_ty, dst_mcv.register, src_mcv); + try self.genSetReg(dst_mcv.register, src_ty, src_mcv); switch (src_bits) { else => unreachable, 16 => try self.genBinOpMir(.rol, src_ty, dst_mcv, .{ .immediate = 8 }), @@ -3450,7 +3857,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { .lea, if (src_abi_size > 4) tmp.to64() else tmp.to32(), Memory.sib(.qword, .{ - .base = dst.to64(), + .base = .{ .reg = dst.to64() }, .scale_index = .{ .index = tmp.to64(), .scale = 1 << 2 }, }), ); @@ -3473,7 +3880,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { .lea, if (src_abi_size > 4) dst.to64() else dst.to32(), Memory.sib(.qword, .{ - .base = tmp.to64(), + .base = .{ .reg = tmp.to64() }, .scale_index = .{ .index = dst.to64(), .scale = 1 << 1 }, }), ); @@ -3507,6 +3914,17 @@ fn reuseOperand( op_index: Liveness.OperandInt, mcv: MCValue, ) bool { + return self.reuseOperandAdvanced(inst, operand, op_index, mcv, inst); +} + +fn reuseOperandAdvanced( + self: *Self, + inst: Air.Inst.Index, + operand: Air.Inst.Ref, + op_index: Liveness.OperandInt, + mcv: MCValue, + tracked_inst: Air.Inst.Index, +) bool { if (!self.liveness.operandDies(inst, op_index)) return false; @@ -3516,25 +3934,23 @@ fn reuseOperand( // new instruction. if (!self.register_manager.isRegFree(reg)) { if (RegisterManager.indexOfRegIntoTracked(reg)) |index| { - self.register_manager.registers[index] = inst; + self.register_manager.registers[index] = tracked_inst; } } - log.debug("%{d} => {} (reused)", .{ inst, reg }); - }, - .stack_offset => |off| { - log.debug("%{d} => stack offset {d} (reused)", .{ inst, off }); }, + .load_frame => |frame_addr| if (frame_addr.index.isNamed()) return false, else => return false, } // Prevent the operand deaths processing code from deallocating it. self.liveness.clearOperandDeath(inst, op_index); - self.getResolvedInstValue(Air.refToIndex(operand).?).reuse(self); + const op_inst = Air.refToIndex(operand).?; + self.getResolvedInstValue(op_inst).reuse(self, tracked_inst, op_inst); return true; } -fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_mcv: MCValue, ptr_ty: Type) InnerError!void { +fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { const ptr_info = ptr_ty.ptrInfo().data; const val_ty = ptr_info.pointee_type; @@ -3564,10 +3980,14 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_mcv: MCValue, ptr_ty: Type) Inn if (val_bit_off < val_extra_bits) val_abi_size else val_abi_size * 2; if (load_abi_size <= 8) { const load_reg = registerAlias(dst_reg, load_abi_size); - try self.asmRegisterMemory(.mov, load_reg, Memory.sib( - Memory.PtrSize.fromSize(load_abi_size), - .{ .base = ptr_reg, .disp = val_byte_off }, - )); + try self.asmRegisterMemory( + .mov, + load_reg, + Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{ + .base = .{ .reg = ptr_reg }, + .disp = val_byte_off, + }), + ); try self.asmRegisterImmediate(.shr, load_reg, Immediate.u(val_bit_off)); } else { const tmp_reg = registerAlias(try self.register_manager.allocReg(null, gp), val_abi_size); @@ -3575,85 +3995,60 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_mcv: MCValue, ptr_ty: Type) Inn defer self.register_manager.unlockReg(tmp_lock); const dst_alias = registerAlias(dst_reg, val_abi_size); - try self.asmRegisterMemory(.mov, dst_alias, Memory.sib( - Memory.PtrSize.fromSize(val_abi_size), - .{ .base = ptr_reg, .disp = val_byte_off }, - )); - try self.asmRegisterMemory(.mov, tmp_reg, Memory.sib( - Memory.PtrSize.fromSize(val_abi_size), - .{ .base = ptr_reg, .disp = val_byte_off + 1 }, - )); + try self.asmRegisterMemory( + .mov, + dst_alias, + Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{ + .base = .{ .reg = ptr_reg }, + .disp = val_byte_off, + }), + ); + try self.asmRegisterMemory( + .mov, + tmp_reg, + Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{ + .base = .{ .reg = ptr_reg }, + .disp = val_byte_off + 1, + }), + ); try self.asmRegisterRegisterImmediate(.shrd, dst_alias, tmp_reg, Immediate.u(val_bit_off)); } if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg); - try self.setRegOrMem(val_ty, dst_mcv, .{ .register = dst_reg }); -} - -fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!void { - const elem_ty = ptr_ty.elemType(); - const abi_size = @intCast(u32, elem_ty.abiSize(self.target.*)); - switch (ptr) { - .none => unreachable, - .undef => unreachable, - .unreach => unreachable, - .dead => unreachable, - .eflags => unreachable, - .register_overflow => unreachable, - .immediate => |imm| { - try self.setRegOrMem(elem_ty, dst_mcv, .{ .memory = imm }); - }, - .stack_offset => { - const reg = try self.copyToTmpRegister(ptr_ty, ptr); - try self.load(dst_mcv, .{ .register = reg }, ptr_ty); - }, - .ptr_stack_offset => |off| { - try self.setRegOrMem(elem_ty, dst_mcv, .{ .stack_offset = off }); - }, - .register => |reg| { - const reg_lock = self.register_manager.lockReg(reg); - defer if (reg_lock) |lock| self.register_manager.unlockReg(lock); + try self.genCopy(val_ty, dst_mcv, .{ .register = dst_reg }); +} - switch (dst_mcv) { - .dead => unreachable, - .undef => unreachable, - .eflags => unreachable, - .register => |dst_reg| { - try self.asmRegisterMemory( - .mov, - registerAlias(dst_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg }), - ); - }, - .stack_offset => |off| { - if (abi_size <= 8) { - const tmp_reg = try self.register_manager.allocReg(null, gp); - try self.load(.{ .register = tmp_reg }, ptr, ptr_ty); - return self.genSetStack(elem_ty, off, MCValue{ .register = tmp_reg }, .{}); - } +fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { + const dst_ty = ptr_ty.childType(); + switch (ptr_mcv) { + .none, + .unreach, + .dead, + .undef, + .eflags, + .register_overflow, + .reserved_frame, + => unreachable, // not a valid pointer + .immediate, + .register, + .register_offset, + .lea_direct, + .lea_got, + .lea_tlv, + .lea_frame, + => try self.genCopy(dst_ty, dst_mcv, ptr_mcv.deref()), + .memory, + .indirect, + .load_direct, + .load_got, + .load_tlv, + .load_frame, + => { + const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv); + const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); + defer self.register_manager.unlockReg(addr_lock); - try self.genInlineMemcpy( - .{ .ptr_stack_offset = off }, - ptr, - .{ .immediate = abi_size }, - .{}, - ); - }, - else => return self.fail("TODO implement loading from register into {}", .{dst_mcv}), - } - }, - .load_direct => |sym_index| { - const addr_reg = try self.copyToTmpRegister(Type.usize, .{ .lea_direct = sym_index }); - const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); - defer self.register_manager.unlockReg(addr_reg_lock); - // Load the pointer, which is stored in memory - try self.asmRegisterMemory(.mov, addr_reg, Memory.sib(.qword, .{ .base = addr_reg })); - try self.load(dst_mcv, .{ .register = addr_reg }, ptr_ty); - }, - .load_tlv => |sym_index| try self.load(dst_mcv, .{ .lea_tlv = sym_index }, ptr_ty), - .memory, .load_got, .lea_direct, .lea_tlv => { - const reg = try self.copyToTmpRegister(ptr_ty, ptr); - try self.load(dst_mcv, .{ .register = reg }, ptr_ty); + try self.genCopy(dst_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }); }, } } @@ -3669,53 +4064,48 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - const ptr = try self.resolveInst(ty_op.operand); - const dst_mcv: MCValue = if (elem_size <= 8 and self.reuseOperand(inst, ty_op.operand, 0, ptr)) + const ptr_mcv = try self.resolveInst(ty_op.operand); + const dst_mcv = if (elem_size <= 8 and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv)) // The MCValue that holds the pointer can be re-used as the value. - ptr + ptr_mcv else try self.allocRegOrMem(inst, true); const ptr_ty = self.air.typeOf(ty_op.operand); if (ptr_ty.ptrInfo().data.host_size > 0) { - try self.packedLoad(dst_mcv, ptr, ptr_ty); + try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv); } else { - try self.load(dst_mcv, ptr, ptr_ty); + try self.load(dst_mcv, ptr_ty, ptr_mcv); } break :result dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn packedStore( - self: *Self, - ptr_mcv: MCValue, - val_mcv: MCValue, - ptr_ty: Type, - val_ty: Type, -) InnerError!void { +fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void { const ptr_info = ptr_ty.ptrInfo().data; + const src_ty = ptr_ty.childType(); const limb_abi_size = @min(ptr_info.host_size, 8); const limb_abi_bits = limb_abi_size * 8; - const val_bit_size = val_ty.bitSize(self.target.*); - const val_byte_off = @intCast(i32, ptr_info.bit_offset / limb_abi_bits * limb_abi_size); - const val_bit_off = ptr_info.bit_offset % limb_abi_bits; + const src_bit_size = src_ty.bitSize(self.target.*); + const src_byte_off = @intCast(i32, ptr_info.bit_offset / limb_abi_bits * limb_abi_size); + const src_bit_off = ptr_info.bit_offset % limb_abi_bits; const ptr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv); const ptr_lock = self.register_manager.lockRegAssumeUnused(ptr_reg); defer self.register_manager.unlockReg(ptr_lock); var limb_i: u16 = 0; - while (limb_i * limb_abi_bits < val_bit_off + val_bit_size) : (limb_i += 1) { - const part_bit_off = if (limb_i == 0) val_bit_off else 0; + while (limb_i * limb_abi_bits < src_bit_off + src_bit_size) : (limb_i += 1) { + const part_bit_off = if (limb_i == 0) src_bit_off else 0; const part_bit_size = - @min(val_bit_off + val_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off; - const limb_mem = Memory.sib( - Memory.PtrSize.fromSize(limb_abi_size), - .{ .base = ptr_reg, .disp = val_byte_off + limb_i * limb_abi_bits }, - ); + @min(src_bit_off + src_bit_size - limb_i * limb_abi_bits, limb_abi_bits) - part_bit_off; + const limb_mem = Memory.sib(Memory.PtrSize.fromSize(limb_abi_size), .{ + .base = .{ .reg = ptr_reg }, + .disp = src_byte_off + limb_i * limb_abi_bits, + }); const part_mask = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - part_bit_size)) << @intCast(u6, part_bit_off); @@ -3731,208 +4121,59 @@ fn packedStore( try self.asmMemoryRegister(.@"and", limb_mem, part_mask_reg); } - if (val_bit_size <= 64) { + if (src_bit_size <= 64) { const tmp_reg = try self.register_manager.allocReg(null, gp); const tmp_mcv = MCValue{ .register = tmp_reg }; const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.genSetReg(val_ty, tmp_reg, val_mcv); + try self.genSetReg(tmp_reg, src_ty, src_mcv); switch (limb_i) { - 0 => try self.genShiftBinOpMir(.shl, val_ty, tmp_mcv, .{ .immediate = val_bit_off }), - 1 => try self.genShiftBinOpMir(.shr, val_ty, tmp_mcv, .{ - .immediate = limb_abi_bits - val_bit_off, + 0 => try self.genShiftBinOpMir(.shl, src_ty, tmp_mcv, .{ .immediate = src_bit_off }), + 1 => try self.genShiftBinOpMir(.shr, src_ty, tmp_mcv, .{ + .immediate = limb_abi_bits - src_bit_off, }), else => unreachable, } - try self.genBinOpMir(.@"and", val_ty, tmp_mcv, .{ .immediate = part_mask }); + try self.genBinOpMir(.@"and", src_ty, tmp_mcv, .{ .immediate = part_mask }); try self.asmMemoryRegister(.@"or", limb_mem, registerAlias(tmp_reg, limb_abi_size)); } else return self.fail("TODO: implement packed store of {}", .{ - val_ty.fmt(self.bin_file.options.module.?), + src_ty.fmt(self.bin_file.options.module.?), }); } } -fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type) InnerError!void { - const abi_size = @intCast(u32, value_ty.abiSize(self.target.*)); - switch (ptr) { - .none => unreachable, - .undef => unreachable, - .unreach => unreachable, - .dead => unreachable, - .eflags => unreachable, - .register_overflow => unreachable, - .immediate, .stack_offset => { - const reg = try self.copyToTmpRegister(ptr_ty, ptr); - try self.store(.{ .register = reg }, value, ptr_ty, value_ty); - }, - .ptr_stack_offset => |off| { - try self.genSetStack(value_ty, off, value, .{}); - }, - .register => |reg| { - const reg_lock = self.register_manager.lockReg(reg); - defer if (reg_lock) |lock| self.register_manager.unlockReg(lock); - - switch (value) { - .none => unreachable, - .dead => unreachable, - .unreach => unreachable, - .eflags => |cc| try self.asmSetccMemory( - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }), - cc, - ), - .undef => if (self.wantSafety()) switch (abi_size) { - 1 => try self.store(ptr, .{ .immediate = 0xaa }, ptr_ty, value_ty), - 2 => try self.store(ptr, .{ .immediate = 0xaaaa }, ptr_ty, value_ty), - 4 => try self.store(ptr, .{ .immediate = 0xaaaaaaaa }, ptr_ty, value_ty), - 8 => try self.store(ptr, .{ .immediate = 0xaaaaaaaaaaaaaaaa }, ptr_ty, value_ty), - else => try self.genInlineMemset( - ptr, - .{ .immediate = 0xaa }, - .{ .immediate = abi_size }, - .{}, - ), - }, - .immediate => |imm| switch (self.regBitSize(value_ty)) { - 8 => try self.asmMemoryImmediate( - .mov, - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }), - if (math.cast(i8, @bitCast(i64, imm))) |small| - Immediate.s(small) - else - Immediate.u(@intCast(u8, imm)), - ), - 16 => try self.asmMemoryImmediate( - .mov, - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }), - if (math.cast(i16, @bitCast(i64, imm))) |small| - Immediate.s(small) - else - Immediate.u(@intCast(u16, imm)), - ), - 32 => try self.asmMemoryImmediate( - .mov, - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }), - if (math.cast(i32, @bitCast(i64, imm))) |small| - Immediate.s(small) - else - Immediate.u(@intCast(u32, imm)), - ), - 64 => if (math.cast(i32, @bitCast(i64, imm))) |small| - try self.asmMemoryImmediate( - .mov, - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }), - Immediate.s(small), - ) - else - try self.asmMemoryRegister( - .mov, - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }), - registerAlias(try self.copyToTmpRegister(value_ty, value), abi_size), - ), - else => unreachable, - }, - .register => |src_reg| try self.genInlineMemcpyRegisterRegister( - value_ty, - reg, - src_reg, - 0, - ), - .register_overflow => |ro| { - const ro_reg_lock = self.register_manager.lockReg(ro.reg); - defer if (ro_reg_lock) |lock| self.register_manager.unlockReg(lock); - - const wrapped_ty = value_ty.structFieldType(0); - try self.genInlineMemcpyRegisterRegister(wrapped_ty, reg, ro.reg, 0); - - const overflow_bit_ty = value_ty.structFieldType(1); - const overflow_bit_offset = value_ty.structFieldOffset(1, self.target.*); - const tmp_reg = try self.register_manager.allocReg(null, gp); - try self.asmSetccRegister(tmp_reg.to8(), ro.eflags); - try self.genInlineMemcpyRegisterRegister( - overflow_bit_ty, - reg, - tmp_reg, - -@intCast(i32, overflow_bit_offset), - ); - }, - .memory, .load_tlv, .load_direct => if (abi_size <= 8) { - const tmp_reg = try self.copyToTmpRegister(value_ty, value); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); - } else { - const addr_reg = (try self.register_manager.allocReg(null, gp)).to64(); - const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); - defer self.register_manager.unlockReg(addr_lock); - - try self.genSetReg(Type.usize, addr_reg, switch (value) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => unreachable, - }); - try self.genInlineMemcpy( - ptr, - .{ .register = addr_reg }, - .{ .immediate = abi_size }, - .{}, - ); - }, - .stack_offset => |off| if (abi_size <= 8) { - const tmp_reg = try self.copyToTmpRegister(value_ty, value); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); - } else try self.genInlineMemcpy( - ptr, - .{ .ptr_stack_offset = off }, - .{ .immediate = abi_size }, - .{}, - ), - .ptr_stack_offset, .load_got, .lea_direct, .lea_tlv => { - const tmp_reg = try self.copyToTmpRegister(value_ty, value); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.store(ptr, .{ .register = tmp_reg }, ptr_ty, value_ty); - }, - } - }, - .memory, .load_direct, .load_tlv => { - const value_lock: ?RegisterLock = switch (value) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (value_lock) |lock| self.register_manager.unlockReg(lock); - - const addr_reg = (try self.register_manager.allocReg(null, gp)).to64(); - const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); - defer self.register_manager.unlockReg(addr_reg_lock); - - switch (ptr) { - .memory => |addr| try self.genSetReg(ptr_ty, addr_reg, .{ .immediate = addr }), - .load_direct => |sym_index| try self.genSetReg(ptr_ty, addr_reg, .{ .lea_direct = sym_index }), - .load_tlv => |sym_index| try self.genSetReg(ptr_ty, addr_reg, .{ .lea_tlv = sym_index }), - else => unreachable, - } - if (ptr != .load_tlv) { - // Load the pointer, which is stored in memory - try self.asmRegisterMemory(.mov, addr_reg, Memory.sib(.qword, .{ .base = addr_reg })); - } - - const new_ptr = MCValue{ .register = addr_reg }; - try self.store(new_ptr, value, ptr_ty, value_ty); - }, - .load_got, .lea_direct, .lea_tlv => { - const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr); - const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); - defer self.register_manager.unlockReg(addr_reg_lock); +fn store(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void { + const src_ty = ptr_ty.childType(); + switch (ptr_mcv) { + .none, + .unreach, + .dead, + .undef, + .eflags, + .register_overflow, + .reserved_frame, + => unreachable, // not a valid pointer + .immediate, + .register, + .register_offset, + .lea_direct, + .lea_got, + .lea_tlv, + .lea_frame, + => try self.genCopy(src_ty, ptr_mcv.deref(), src_mcv), + .memory, + .indirect, + .load_direct, + .load_got, + .load_tlv, + .load_frame, + => { + const addr_reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv); + const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); + defer self.register_manager.unlockReg(addr_lock); - const new_ptr = MCValue{ .register = addr_reg }; - try self.store(new_ptr, value, ptr_ty, value_ty); + try self.genCopy(src_ty, .{ .indirect = .{ .reg = addr_reg } }, src_mcv); }, } } @@ -3944,15 +4185,13 @@ fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void { // TODO if the value is undef, don't lower this instruction } const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const ptr = try self.resolveInst(bin_op.lhs); + const ptr_mcv = try self.resolveInst(bin_op.lhs); const ptr_ty = self.air.typeOf(bin_op.lhs); - const value = try self.resolveInst(bin_op.rhs); - const value_ty = self.air.typeOf(bin_op.rhs); - log.debug("airStore(%{d}): {} <- {}", .{ inst, ptr, value }); + const src_mcv = try self.resolveInst(bin_op.rhs); if (ptr_ty.ptrInfo().data.host_size > 0) { - try self.packedStore(ptr, value, ptr_ty, value_ty); + try self.packedStore(ptr_ty, ptr_mcv, src_mcv); } else { - try self.store(ptr, value, ptr_ty, value_ty); + try self.store(ptr_ty, ptr_mcv, src_mcv); } return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -3971,67 +4210,65 @@ fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void { } fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue { + const ptr_field_ty = self.air.typeOfIndex(inst); const mcv = try self.resolveInst(operand); - const ptr_ty = self.air.typeOf(operand); - const container_ty = ptr_ty.childType(); + const ptr_container_ty = self.air.typeOf(operand); + const container_ty = ptr_container_ty.childType(); const field_offset = switch (container_ty.containerLayout()) { .Auto, .Extern => @intCast(u32, container_ty.structFieldOffset(index, self.target.*)), - .Packed => if (container_ty.zigTypeTag() == .Struct and ptr_ty.ptrInfo().data.host_size == 0) + .Packed => if (container_ty.zigTypeTag() == .Struct and + ptr_field_ty.ptrInfo().data.host_size == 0) container_ty.packedStructFieldByteOffset(index, self.target.*) else 0, }; - const dst_mcv: MCValue = result: { + const result: MCValue = result: { switch (mcv) { - .stack_offset, .lea_tlv, .load_tlv => { - const offset_reg = try self.copyToTmpRegister(ptr_ty, .{ + .load_frame, .lea_tlv, .load_tlv => { + const offset_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = field_offset, }); const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); defer self.register_manager.unlockReg(offset_reg_lock); - const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ptr_ty, switch (mcv) { + const dst_mcv = try self.copyToRegisterWithInstTracking(inst, Type.usize, switch (mcv) { .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, else => mcv, }); - try self.genBinOpMir(.add, ptr_ty, dst_mcv, .{ .register = offset_reg }); + try self.genBinOpMir(.add, Type.usize, dst_mcv, .{ .register = offset_reg }); break :result dst_mcv; }, - .ptr_stack_offset => |off| { - const ptr_stack_offset = off - @intCast(i32, field_offset); - break :result MCValue{ .ptr_stack_offset = ptr_stack_offset }; - }, - .register => |reg| { - const reg_lock = self.register_manager.lockRegAssumeUnused(reg); - defer self.register_manager.unlockReg(reg_lock); - - const offset_reg = try self.copyToTmpRegister(ptr_ty, .{ - .immediate = field_offset, - }); - const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); - defer self.register_manager.unlockReg(offset_reg_lock); - - const can_reuse_operand = self.reuseOperand(inst, operand, 0, mcv); - const result_reg: Register = blk: { - if (can_reuse_operand) { - break :blk reg; - } else { - const result_reg = try self.register_manager.allocReg(inst, gp); - try self.genSetReg(ptr_ty, result_reg, mcv); - break :blk result_reg; - } - }; - const result_reg_lock = self.register_manager.lockReg(result_reg); - defer if (result_reg_lock) |lock| self.register_manager.unlockReg(lock); + .indirect => |reg_off| break :result .{ .indirect = .{ + .reg = reg_off.reg, + .off = reg_off.off + @intCast(i32, field_offset), + } }, + .lea_frame => |frame_addr| break :result .{ .lea_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + @intCast(i32, field_offset), + } }, + .register, .register_offset => { + const src_reg = mcv.getReg().?; + const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); + defer self.register_manager.unlockReg(src_lock); - try self.genBinOpMir(.add, ptr_ty, .{ .register = result_reg }, .{ .register = offset_reg }); - break :result MCValue{ .register = result_reg }; + const dst_mcv: MCValue = if (self.reuseOperand(inst, operand, 0, mcv)) + mcv + else + .{ .register = try self.copyToTmpRegister(ptr_field_ty, mcv) }; + break :result .{ .register_offset = .{ + .reg = dst_mcv.getReg().?, + .off = switch (dst_mcv) { + .register => 0, + .register_offset => |reg_off| reg_off.off, + else => unreachable, + } + @intCast(i32, field_offset), + } }; }, else => return self.fail("TODO implement fieldPtr for {}", .{mcv}), } }; - return dst_mcv; + return result; } fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { @@ -4055,7 +4292,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { }; switch (src_mcv) { - .stack_offset => |src_off| { + .load_frame => |frame_addr| { const field_abi_size = @intCast(u32, field_ty.abiSize(self.target.*)); const limb_abi_size = @min(field_abi_size, 8); const limb_abi_bits = limb_abi_size * 8; @@ -4063,11 +4300,14 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const field_bit_off = field_off % limb_abi_bits; if (field_bit_off == 0) { - const off_mcv = MCValue{ .stack_offset = src_off - field_byte_off }; + const off_mcv = MCValue{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + field_byte_off, + } }; if (self.reuseOperand(inst, operand, 0, src_mcv)) break :result off_mcv; const dst_mcv = try self.allocRegOrMem(inst, true); - try self.setRegOrMem(field_ty, dst_mcv, off_mcv); + try self.genCopy(field_ty, dst_mcv, off_mcv); break :result dst_mcv; } @@ -4081,10 +4321,14 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { if (field_bit_off < field_extra_bits) field_abi_size else field_abi_size * 2; if (load_abi_size <= 8) { const load_reg = registerAlias(dst_reg, load_abi_size); - try self.asmRegisterMemory(.mov, load_reg, Memory.sib( - Memory.PtrSize.fromSize(load_abi_size), - .{ .base = .rbp, .disp = field_byte_off - src_off }, - )); + try self.asmRegisterMemory( + .mov, + load_reg, + Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off + field_byte_off, + }), + ); try self.asmRegisterImmediate(.shr, load_reg, Immediate.u(field_bit_off)); } else { const tmp_reg = registerAlias( @@ -4095,14 +4339,22 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(tmp_lock); const dst_alias = registerAlias(dst_reg, field_abi_size); - try self.asmRegisterMemory(.mov, dst_alias, Memory.sib( - Memory.PtrSize.fromSize(field_abi_size), - .{ .base = .rbp, .disp = field_byte_off - src_off }, - )); - try self.asmRegisterMemory(.mov, tmp_reg, Memory.sib( - Memory.PtrSize.fromSize(field_abi_size), - .{ .base = .rbp, .disp = field_byte_off + 1 - src_off }, - )); + try self.asmRegisterMemory( + .mov, + dst_alias, + Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off + field_byte_off, + }), + ); + try self.asmRegisterMemory( + .mov, + tmp_reg, + Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off + field_byte_off + @intCast(i32, limb_abi_size), + }), + ); try self.asmRegisterRegisterImmediate( .shrd, dst_alias, @@ -4249,41 +4501,51 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue) !void { const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); + if (abi_size > 8) return self.fail("TODO implement {} for {}", .{ + mir_tag, + dst_ty.fmt(self.bin_file.options.module.?), + }); switch (dst_mcv) { - .none => unreachable, - .undef => unreachable, - .dead, .unreach, .immediate => unreachable, - .eflags => unreachable, - .register_overflow => unreachable, + .none, + .unreach, + .dead, + .undef, + .immediate, + .register_offset, + .eflags, + .register_overflow, + .lea_direct, + .lea_got, + .lea_tlv, + .lea_frame, + .reserved_frame, + => unreachable, // unmodifiable destination .register => |dst_reg| try self.asmRegister(mir_tag, registerAlias(dst_reg, abi_size)), - .stack_offset => |off| { - if (abi_size > 8) { - return self.fail("TODO implement {} for stack dst with large ABI", .{mir_tag}); - } - - try self.asmMemory(mir_tag, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ - .base = .rbp, - .disp = -off, - })); - }, - .ptr_stack_offset => unreachable, - .load_got, .lea_direct, .lea_tlv => unreachable, - .memory, .load_direct, .load_tlv => { - const addr_reg = (try self.register_manager.allocReg(null, gp)).to64(); + .memory, .load_got, .load_direct, .load_tlv => { + const addr_reg = try self.register_manager.allocReg(null, gp); const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_reg_lock); - try self.genSetReg(Type.usize, addr_reg, switch (dst_mcv) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => unreachable, - }); + try self.genSetReg(addr_reg, Type.usize, dst_mcv.address()); try self.asmMemory( mir_tag, - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = addr_reg }), + Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{ .reg = addr_reg } }), ); }, + .indirect, .load_frame => try self.asmMemory( + mir_tag, + Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (dst_mcv) { + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }, + else => unreachable, + }), + ), } } @@ -4305,7 +4567,7 @@ fn genShiftBinOpMir( else => {}, } self.register_manager.getRegAssumeFree(.rcx, null); - try self.genSetReg(Type.u8, .rcx, shift_mcv); + try self.genSetReg(.cl, Type.u8, shift_mcv); break :rhs .{ .register = .rcx }; }; @@ -4328,21 +4590,42 @@ fn genShiftBinOpMir( @tagName(rhs_mcv), }), }, - .stack_offset => |lhs_off| switch (rhs_mcv) { - .immediate => |rhs_imm| try self.asmMemoryImmediate( - tag, - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -lhs_off }), - Immediate.u(rhs_imm), - ), - .register => |rhs_reg| try self.asmMemoryRegister( - tag, - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -lhs_off }), - registerAlias(rhs_reg, 1), - ), - else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{ - @tagName(lhs_mcv), - @tagName(rhs_mcv), - }), + .memory, .indirect, .load_frame => { + const lhs_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (lhs_mcv) { + .memory => |addr| .{ + .base = .{ .reg = .ds }, + .disp = math.cast(i32, @bitCast(i64, addr)) orelse + return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{ + @tagName(lhs_mcv), + @tagName(rhs_mcv), + }), + }, + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }, + else => unreachable, + }); + switch (rhs_mcv) { + .immediate => |rhs_imm| try self.asmMemoryImmediate( + tag, + lhs_mem, + Immediate.u(rhs_imm), + ), + .register => |rhs_reg| try self.asmMemoryRegister( + tag, + lhs_mem, + registerAlias(rhs_reg, 1), + ), + else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{ + @tagName(lhs_mcv), + @tagName(rhs_mcv), + }), + } }, else => return self.fail("TODO genShiftBinOpMir between {s} and {s}", .{ @tagName(lhs_mcv), @@ -4360,22 +4643,31 @@ fn genShiftBinOpMir( else => unreachable, }; switch (lhs_mcv) { - .stack_offset => |dst_off| switch (rhs_mcv) { + .load_frame => |dst_frame_addr| switch (rhs_mcv) { .immediate => |rhs_imm| if (rhs_imm == 0) {} else if (rhs_imm < 64) { try self.asmRegisterMemory( .mov, tmp_reg, - Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[0], + }), ); try self.asmMemoryRegisterImmediate( info.double_tag, - Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[1], + }), tmp_reg, Immediate.u(rhs_imm), ); try self.asmMemoryImmediate( tag, - Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[0], + }), Immediate.u(rhs_imm), ); } else { @@ -4383,14 +4675,20 @@ fn genShiftBinOpMir( try self.asmRegisterMemory( .mov, tmp_reg, - Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[0], + }), ); if (rhs_imm > 64) { try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(rhs_imm - 64)); } try self.asmMemoryRegister( .mov, - Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[1], + }), tmp_reg, ); switch (tag) { @@ -4398,13 +4696,19 @@ fn genShiftBinOpMir( try self.asmRegisterRegister(.xor, tmp_reg.to32(), tmp_reg.to32()); try self.asmMemoryRegister( .mov, - Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[0], + }), tmp_reg, ); }, .sar => try self.asmMemoryImmediate( tag, - Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[0], + }), Immediate.u(63), ), else => unreachable, @@ -4419,16 +4723,22 @@ fn genShiftBinOpMir( const second_lock = self.register_manager.lockRegAssumeUnused(second_reg); defer self.register_manager.unlockReg(second_lock); - try self.genSetReg(Type.u8, .cl, rhs_mcv); + try self.genSetReg(.cl, Type.u8, rhs_mcv); try self.asmRegisterMemory( .mov, first_reg, - Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[0], + }), ); try self.asmRegisterMemory( .mov, second_reg, - Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[1], + }), ); switch (tag) { .shl, .sal, .shr => try self.asmRegisterRegister( @@ -4449,12 +4759,18 @@ fn genShiftBinOpMir( try self.asmCmovccRegisterRegister(first_reg, tmp_reg, .ae); try self.asmMemoryRegister( .mov, - Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[1] - dst_off }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[1], + }), second_reg, ); try self.asmMemoryRegister( .mov, - Memory.sib(.qword, .{ .base = .rbp, .disp = info.offsets[0] - dst_off }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[0], + }), first_reg, ); }, @@ -4515,7 +4831,7 @@ fn genShiftBinOp( if (self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) break :dst lhs_mcv; } const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true); - try self.setRegOrMem(lhs_ty, dst_mcv, lhs_mcv); + try self.genCopy(lhs_ty, dst_mcv, lhs_mcv); break :dst dst_mcv; }; @@ -4610,14 +4926,22 @@ fn genMulDivBinOp( }, dst_abi_size) }; const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false); - try self.asmMemoryRegister(.mov, Memory.sib(.qword, .{ - .base = .rbp, - .disp = 0 - dst_mcv.stack_offset, - }), .rax); - try self.asmMemoryRegister(.mov, Memory.sib(.qword, .{ - .base = .rbp, - .disp = 8 - dst_mcv.stack_offset, - }), .rdx); + try self.asmMemoryRegister( + .mov, + Memory.sib(.qword, .{ + .base = .{ .frame = dst_mcv.load_frame.index }, + .disp = dst_mcv.load_frame.off, + }), + .rax, + ); + try self.asmMemoryRegister( + .mov, + Memory.sib(.qword, .{ + .base = .{ .frame = dst_mcv.load_frame.index }, + .disp = dst_mcv.load_frame.off + 8, + }), + .rdx, + ); return dst_mcv; }, @@ -4627,8 +4951,24 @@ fn genMulDivBinOp( switch (signedness) { .signed => { + const lhs_lock = switch (lhs) { + .register => |reg| self.register_manager.lockReg(reg), + else => null, + }; + defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); + const rhs_lock = switch (rhs) { + .register => |reg| self.register_manager.lockReg(reg), + else => null, + }; + defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); + + // hack around hazard between rhs and div_floor by copying rhs to another register + const rhs_copy = try self.copyToTmpRegister(ty, rhs); + const rhs_copy_lock = self.register_manager.lockRegAssumeUnused(rhs_copy); + defer self.register_manager.unlockReg(rhs_copy_lock); + const div_floor = try self.genInlineIntDivFloor(ty, lhs, rhs); - try self.genIntMulComplexOpMir(ty, div_floor, rhs); + try self.genIntMulComplexOpMir(ty, div_floor, .{ .register = rhs_copy }); const div_floor_lock = self.register_manager.lockReg(div_floor.register); defer if (div_floor_lock) |lock| self.register_manager.unlockReg(lock); @@ -4775,18 +5115,14 @@ fn genBinOp( } } const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true); - try self.setRegOrMem(lhs_ty, dst_mcv, switch (lhs) { - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => lhs, - }); + try self.genCopy(lhs_ty, dst_mcv, lhs); break :dst dst_mcv; }; - const dst_mcv_lock: ?RegisterLock = switch (dst_mcv) { + const dst_lock: ?RegisterLock = switch (dst_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, }; - defer if (dst_mcv_lock) |lock| self.register_manager.unlockReg(lock); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); const src_mcv = if (flipped) lhs else rhs; switch (tag) { @@ -4878,10 +5214,21 @@ fn genBinOp( .max, => switch (lhs_ty.zigTypeTag()) { .Int => { - const mat_src_mcv = switch (src_mcv) { - .immediate => MCValue{ .register = try self.copyToTmpRegister(rhs_ty, src_mcv) }, - else => src_mcv, - }; + const mat_src_mcv: MCValue = if (switch (src_mcv) { + .immediate, + .eflags, + .register_offset, + .load_direct, + .lea_direct, + .load_got, + .lea_got, + .load_tlv, + .lea_tlv, + .lea_frame, + => true, + .memory => |addr| math.cast(i32, @bitCast(i64, addr)) == null, + else => false, + }) .{ .register = try self.copyToTmpRegister(rhs_ty, src_mcv) } else src_mcv; const mat_mcv_lock = switch (mat_src_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, @@ -4913,57 +5260,56 @@ fn genBinOp( defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock); switch (mat_src_mcv) { .none, - .undef, - .dead, .unreach, + .dead, + .undef, .immediate, .eflags, + .register_offset, .register_overflow, - .ptr_stack_offset, - .load_got, + .load_direct, .lea_direct, + .load_got, + .lea_got, + .load_tlv, .lea_tlv, + .lea_frame, + .reserved_frame, => unreachable, .register => |src_reg| try self.asmCmovccRegisterRegister( registerAlias(tmp_reg, cmov_abi_size), registerAlias(src_reg, cmov_abi_size), cc, ), - .stack_offset => |off| try self.asmCmovccRegisterMemory( + .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( registerAlias(tmp_reg, cmov_abi_size), - Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{ - .base = .rbp, - .disp = -off, + Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), switch (mat_src_mcv) { + .memory => |addr| .{ + .base = .{ .reg = .ds }, + .disp = @intCast(i32, @bitCast(i64, addr)), + }, + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }, + else => unreachable, }), cc, ), - .memory, .load_direct, .load_tlv => { - const addr_reg = (try self.register_manager.allocReg(null, gp)).to64(); - const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); - defer self.register_manager.unlockReg(addr_reg_lock); - - try self.genSetReg(Type.usize, addr_reg, switch (mat_src_mcv) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => unreachable, - }); - try self.asmCmovccRegisterMemory( - registerAlias(tmp_reg, cmov_abi_size), - Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{ .base = addr_reg }), - cc, - ); - }, } - try self.setRegOrMem(lhs_ty, dst_mcv, .{ .register = tmp_reg }); + try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg }); }, - .Float => try self.genBinOpMir(switch (lhs_ty.tag()) { - .f32 => switch (tag) { + .Float => try self.genBinOpMir(switch (lhs_ty.floatBits(self.target.*)) { + 32 => switch (tag) { .min => .minss, .max => .maxss, else => unreachable, }, - .f64 => switch (tag) { + 64 => switch (tag) { .min => .minsd, .max => .maxsd, else => unreachable, @@ -4981,25 +5327,38 @@ fn genBinOp( fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); switch (dst_mcv) { - .none => unreachable, - .undef => unreachable, - .dead, .unreach, .immediate => unreachable, - .eflags => unreachable, - .register_overflow => unreachable, + .none, + .unreach, + .dead, + .undef, + .immediate, + .register_offset, + .eflags, + .register_overflow, + .lea_direct, + .lea_got, + .lea_tlv, + .lea_frame, + .reserved_frame, + => unreachable, // unmodifiable destination .register => |dst_reg| { const dst_alias = registerAlias(dst_reg, abi_size); switch (src_mcv) { - .none => unreachable, - .undef => unreachable, - .dead, .unreach => unreachable, - .register_overflow => unreachable, + .none, + .unreach, + .dead, + .undef, + .register_overflow, + .reserved_frame, + => unreachable, .register => |src_reg| switch (ty.zigTypeTag()) { .Float => { - if (intrinsicsAllowed(self.target.*, ty)) { - return self.asmRegisterRegister(mir_tag, dst_reg.to128(), src_reg.to128()); - } - - return self.fail("TODO genBinOpMir for float register-register and no intrinsics", .{}); + if (!Target.x86.featureSetHas(self.target.cpu.features, .sse)) + return self.fail("TODO genBinOpMir for {s} {} without sse", .{ + @tagName(mir_tag), + ty.fmt(self.bin_file.options.module.?), + }); + return self.asmRegisterRegister(mir_tag, dst_reg.to128(), src_reg.to128()); }, else => try self.asmRegisterRegister( mir_tag, @@ -5041,14 +5400,16 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s )), else => unreachable, }, - .ptr_stack_offset, + .register_offset, + .eflags, .memory, - .load_got, - .lea_direct, .load_direct, - .lea_tlv, + .lea_direct, + .load_got, + .lea_got, .load_tlv, - .eflags, + .lea_tlv, + .lea_frame, => { assert(abi_size <= 8); const dst_reg_lock = self.register_manager.lockReg(dst_reg); @@ -5057,75 +5418,84 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s const reg = try self.copyToTmpRegister(ty, src_mcv); return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg }); }, - .stack_offset => |off| try self.asmRegisterMemory( + .indirect, .load_frame => try self.asmRegisterMemory( mir_tag, registerAlias(dst_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -off }), + Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }, + else => unreachable, + }), ), } }, - .memory, .load_got, .load_direct, .load_tlv, .stack_offset => { - const dst: ?struct { - addr_reg: Register, - addr_lock: RegisterLock, - } = switch (dst_mcv) { + .memory, .indirect, .load_got, .load_direct, .load_tlv, .load_frame => { + const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock }; + const limb_abi_size = @min(abi_size, 8); + + const dst_info: OpInfo = switch (dst_mcv) { else => unreachable, .memory, .load_got, .load_direct, .load_tlv => dst: { const dst_addr_reg = (try self.register_manager.allocReg(null, gp)).to64(); const dst_addr_lock = self.register_manager.lockRegAssumeUnused(dst_addr_reg); errdefer self.register_manager.unlockReg(dst_addr_lock); - try self.genSetReg(Type.usize, dst_addr_reg, switch (dst_mcv) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => dst_mcv, - }); - + try self.genSetReg(dst_addr_reg, Type.usize, dst_mcv.address()); break :dst .{ .addr_reg = dst_addr_reg, .addr_lock = dst_addr_lock, }; }, - .stack_offset => null, + .load_frame => null, }; - defer if (dst) |lock| self.register_manager.unlockReg(lock.addr_lock); - - const src: ?struct { - limb_reg: Register, - limb_lock: RegisterLock, - addr_reg: Register, - addr_lock: RegisterLock, - } = switch (src_mcv) { - else => null, + defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock); + + const src_info: OpInfo = switch (src_mcv) { + .none, + .unreach, + .dead, + .undef, + .register_overflow, + .reserved_frame, + => unreachable, + .immediate, + .register, + .register_offset, + .eflags, + .indirect, + .lea_direct, + .lea_got, + .lea_tlv, + .load_frame, + .lea_frame, + => null, .memory, .load_got, .load_direct, .load_tlv => src: { - const src_limb_reg = try self.register_manager.allocReg(null, gp); - const src_limb_lock = self.register_manager.lockRegAssumeUnused(src_limb_reg); - errdefer self.register_manager.unlockReg(src_limb_lock); + switch (src_mcv) { + .memory => |addr| if (math.cast(i32, @bitCast(i64, addr)) != null and + math.cast(i32, @bitCast(i64, addr) + abi_size - limb_abi_size) != null) + break :src null, + .load_got, .load_direct, .load_tlv => {}, + else => unreachable, + } const src_addr_reg = (try self.register_manager.allocReg(null, gp)).to64(); const src_addr_lock = self.register_manager.lockRegAssumeUnused(src_addr_reg); errdefer self.register_manager.unlockReg(src_addr_lock); - try self.genSetReg(Type.usize, src_addr_reg, switch (src_mcv) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => src_mcv, - }); - + try self.genSetReg(src_addr_reg, Type.usize, src_mcv.address()); break :src .{ .addr_reg = src_addr_reg, .addr_lock = src_addr_lock, - .limb_reg = src_limb_reg, - .limb_lock = src_limb_lock, }; }, }; - defer if (src) |locks| { - self.register_manager.unlockReg(locks.limb_lock); - self.register_manager.unlockReg(locks.addr_lock); - }; + defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock); const ty_signedness = if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned; @@ -5133,7 +5503,6 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s .signed => Type.usize, .unsigned => Type.isize, }; - const limb_abi_size = @min(abi_size, 8); var off: i32 = 0; while (off < abi_size) : (off += 8) { const mir_limb_tag = switch (off) { @@ -5150,35 +5519,45 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s const dst_limb_mem = Memory.sib( Memory.PtrSize.fromSize(limb_abi_size), switch (dst_mcv) { - else => unreachable, - .stack_offset => |dst_off| .{ - .base = .rbp, - .disp = off - dst_off, - }, .memory, .load_got, .load_direct, .load_tlv, - => .{ .base = dst.?.addr_reg, .disp = off }, + => .{ .base = .{ .reg = dst_info.?.addr_reg }, .disp = off }, + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off + off, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off + off, + }, + else => unreachable, }, ); switch (src_mcv) { - .none => unreachable, - .undef => unreachable, - .dead, .unreach => unreachable, - .register_overflow => unreachable, - .register => |src_reg| { - assert(off == 0); - try self.asmMemoryRegister( + .none, + .unreach, + .dead, + .undef, + .register_overflow, + .reserved_frame, + => unreachable, + .register => |src_reg| switch (off) { + 0 => try self.asmMemoryRegister( mir_limb_tag, dst_limb_mem, registerAlias(src_reg, limb_abi_size), - ); + ), + else => unreachable, }, .immediate => |src_imm| { - const imm = if (off == 0) src_imm else switch (ty_signedness) { - .signed => @bitCast(u64, @bitCast(i64, src_imm) >> 63), - .unsigned => 0, + const imm = switch (off) { + 0 => src_imm, + else => switch (ty_signedness) { + .signed => @bitCast(u64, @bitCast(i64, src_imm) >> 63), + .unsigned => 0, + }, }; switch (self.regBitSize(limb_ty)) { 8 => try self.asmMemoryImmediate( @@ -5223,47 +5602,43 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s else => unreachable, } }, + .register_offset, + .eflags, .memory, - .load_got, + .indirect, .load_direct, - .load_tlv, - => { - try self.asmRegisterMemory( - .mov, - registerAlias(src.?.limb_reg, limb_abi_size), - Memory.sib( - Memory.PtrSize.fromSize(limb_abi_size), - .{ .base = src.?.addr_reg, .disp = off }, - ), - ); - try self.asmMemoryRegister( - mir_limb_tag, - dst_limb_mem, - registerAlias(src.?.limb_reg, limb_abi_size), - ); - }, - .stack_offset, - .ptr_stack_offset, - .eflags, .lea_direct, + .load_got, + .lea_got, + .load_tlv, .lea_tlv, + .load_frame, + .lea_frame, => { - const src_limb_reg = try self.copyToTmpRegister(limb_ty, switch (src_mcv) { - .stack_offset => |src_off| .{ .stack_offset = src_off - off }, - .ptr_stack_offset, + const src_limb_reg = try self.copyToTmpRegister(limb_ty, if (src_info) |info| .{ + .indirect = .{ .reg = info.addr_reg, .off = off }, + } else switch (src_mcv) { .eflags, - .load_got, + .register_offset, .lea_direct, + .lea_got, .lea_tlv, - => off: { - assert(off == 0); - break :off src_mcv; + .lea_frame, + => switch (off) { + 0 => src_mcv, + else => .{ .immediate = 0 }, }, + .memory => |addr| .{ .memory = @bitCast(u64, @bitCast(i64, addr) + off) }, + .indirect => |reg_off| .{ .indirect = .{ + .reg = reg_off.reg, + .off = reg_off.off + off, + } }, + .load_frame => |frame_addr| .{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + off, + } }, else => unreachable, }); - const src_limb_lock = self.register_manager.lockReg(src_limb_reg); - defer if (src_limb_lock) |lock| self.register_manager.unlockReg(lock); - try self.asmMemoryRegister( mir_limb_tag, dst_limb_mem, @@ -5273,9 +5648,6 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s } } }, - .ptr_stack_offset => unreachable, - .lea_tlv => unreachable, - .lea_direct => unreachable, } } @@ -5284,27 +5656,33 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); switch (dst_mcv) { - .none => unreachable, - .undef => unreachable, - .dead, .unreach, .immediate => unreachable, - .eflags => unreachable, - .ptr_stack_offset => unreachable, - .lea_direct => unreachable, - .lea_tlv => unreachable, - .register_overflow => unreachable, + .none, + .unreach, + .dead, + .undef, + .immediate, + .register_offset, + .eflags, + .register_overflow, + .lea_direct, + .lea_got, + .lea_tlv, + .lea_frame, + .reserved_frame, + => unreachable, // unmodifiable destination .register => |dst_reg| { const dst_alias = registerAlias(dst_reg, abi_size); const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); switch (src_mcv) { - .none => unreachable, - .undef => try self.genSetReg(dst_ty, dst_reg, .undef), - .dead, .unreach => unreachable, - .ptr_stack_offset => unreachable, - .lea_direct => unreachable, - .lea_tlv => unreachable, - .register_overflow => unreachable, + .none, + .unreach, + .dead, + .undef, + .register_overflow, + .reserved_frame, + => unreachable, .register => |src_reg| try self.asmRegisterRegister( .imul, dst_alias, @@ -5323,74 +5701,54 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg }); } }, - .stack_offset => |off| { - try self.asmRegisterMemory( - .imul, - dst_alias, - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -off }), - ); - }, - .memory, - .load_got, + .register_offset, + .eflags, .load_direct, - .load_tlv, - => { - return self.fail("TODO implement x86 multiply source memory", .{}); - }, - .eflags => { - return self.fail("TODO implement x86 multiply source eflags", .{}); - }, - } - }, - .stack_offset => |off| { - switch (src_mcv) { - .none => unreachable, - .undef => return self.genSetStack(dst_ty, off, .undef, .{}), - .dead, .unreach => unreachable, - .ptr_stack_offset => unreachable, - .lea_direct => unreachable, - .lea_tlv => unreachable, - .register_overflow => unreachable, - .register => |src_reg| { - // copy dst to a register - const dst_reg = try self.copyToTmpRegister(dst_ty, dst_mcv); - try self.asmRegisterRegister( - .imul, - registerAlias(dst_reg, abi_size), - registerAlias(src_reg, abi_size), - ); - // copy dst_reg back out - return self.genSetStack(dst_ty, off, .{ .register = dst_reg }, .{}); - }, - .immediate => { - // copy dst to a register - const dst_reg = try self.copyToTmpRegister(dst_ty, dst_mcv); - const dst_reg_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_reg_lock); - - try self.genIntMulComplexOpMir(dst_ty, .{ .register = dst_reg }, src_mcv); - - return self.genSetStack(dst_ty, off, .{ .register = dst_reg }, .{}); - }, - .memory, + .lea_direct, .load_got, - .load_direct, + .lea_got, .load_tlv, - .stack_offset, - => { - return self.fail("TODO implement x86 multiply source memory", .{}); - }, - .eflags => { - return self.fail("TODO implement x86 multiply source eflags", .{}); - }, + .lea_tlv, + .lea_frame, + => try self.asmRegisterRegister( + .imul, + dst_alias, + registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size), + ), + .memory, .indirect, .load_frame => try self.asmRegisterMemory( + .imul, + dst_alias, + Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { + .memory => |addr| .{ + .base = .{ .reg = .ds }, + .disp = math.cast(i32, @bitCast(i64, addr)) orelse + return self.asmRegisterRegister( + .imul, + dst_alias, + registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size), + ), + }, + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }, + else => unreachable, + }), + ), } }, - .memory, - .load_got, - .load_direct, - .load_tlv, - => { - return self.fail("TODO implement x86 multiply destination memory", .{}); + .memory, .indirect, .load_direct, .load_got, .load_tlv, .load_frame => { + const tmp_reg = try self.copyToTmpRegister(dst_ty, dst_mcv); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.genIntMulComplexOpMir(dst_ty, tmp_mcv, src_mcv); + try self.genCopy(dst_ty, dst_mcv, tmp_mcv); }, } } @@ -5401,24 +5759,19 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { while (self.args[arg_index] == .none) arg_index += 1; self.arg_index = arg_index + 1; - const ty = self.air.typeOfIndex(inst); - const mcv = self.args[arg_index]; - const src_index = self.air.instructions.items(.data)[inst].arg.src_index; - const name = self.mod_fn.getParamName(self.bin_file.options.module.?, src_index); - const result: MCValue = if (self.liveness.isUnused(inst)) .unreach else result: { - const dst_mcv: MCValue = switch (mcv) { - .register => |reg| blk: { - self.register_manager.getRegAssumeFree(reg.to64(), inst); - break :blk MCValue{ .register = reg }; - }, - .stack_offset => |off| blk: { - const offset = @intCast(i32, self.max_end_stack) - off + 16; - break :blk MCValue{ .stack_offset = -offset }; - }, - else => return self.fail("TODO implement arg for {}", .{mcv}), - }; + const dst_mcv = self.args[arg_index]; + switch (dst_mcv) { + .register => |reg| self.register_manager.getRegAssumeFree(reg, inst), + .load_frame => {}, + else => return self.fail("TODO implement arg for {}", .{dst_mcv}), + } + + const ty = self.air.typeOfIndex(inst); + const src_index = self.air.instructions.items(.data)[inst].arg.src_index; + const name = self.mod_fn.getParamName(self.bin_file.options.module.?, src_index); try self.genArgDbgInfo(ty, name, dst_mcv); + break :result dst_mcv; }; return self.finishAir(inst, result, .{ .none, .none, .none }); @@ -5429,13 +5782,15 @@ fn genArgDbgInfo(self: Self, ty: Type, name: [:0]const u8, mcv: MCValue) !void { .dwarf => |dw| { const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) { .register => |reg| .{ .register = reg.dwarfLocOp() }, - .stack_offset => |off| .{ - .stack = .{ - // TODO handle -fomit-frame-pointer - .fp_register = Register.rbp.dwarfLocOpDeref(), - .offset = -off, - }, - }, + // TODO use a frame index + .load_frame => return, + //.stack_offset => |off| .{ + // .stack = .{ + // // TODO handle -fomit-frame-pointer + // .fp_register = Register.rbp.dwarfLocOpDeref(), + // .offset = -off, + // }, + //}, else => unreachable, // not a valid function parameter }; try dw.genArgDbgInfo(name, ty, self.mod_fn.owner_decl, loc); @@ -5462,12 +5817,12 @@ fn genVarDbgInfo( .dwarf => |dw| { const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) { .register => |reg| .{ .register = reg.dwarfLocOp() }, - .ptr_stack_offset, - .stack_offset, - => |off| .{ .stack = .{ - .fp_register = Register.rbp.dwarfLocOpDeref(), - .offset = -off, - } }, + // TODO use a frame index + .load_frame, .lea_frame => return, + //=> |off| .{ .stack = .{ + // .fp_register = Register.rbp.dwarfLocOpDeref(), + // .offset = -off, + //} }, .memory => |address| .{ .memory = address }, .load_got => |sym_index| .{ .linker_load = .{ .type = .got, .sym_index = sym_index } }, .load_direct => |sym_index| .{ .linker_load = .{ .type = .direct, .sym_index = sym_index } }, @@ -5498,15 +5853,13 @@ fn airBreakpoint(self: *Self) !void { fn airRetAddr(self: *Self, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, true); - try self.setRegOrMem(Type.usize, dst_mcv, .{ - .stack_offset = -@as(i32, @divExact(self.target.cpu.arch.ptrBitWidth(), 8)), - }); + try self.genCopy(Type.usize, dst_mcv, .{ .load_frame = .{ .index = .ret_addr } }); return self.finishAir(inst, dst_mcv, .{ .none, .none, .none }); } fn airFrameAddress(self: *Self, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, true); - try self.setRegOrMem(Type.usize, dst_mcv, .{ .register = .rbp }); + try self.genCopy(Type.usize, dst_mcv, .{ .lea_frame = .{ .index = .base_ptr } }); return self.finishAir(inst, dst_mcv, .{ .none, .none, .none }); } @@ -5534,94 +5887,69 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier else => unreachable, }; - var info = try self.resolveCallingConventionValues(fn_ty, args[fn_ty.fnParamLen()..]); + var info = try self.resolveCallingConventionValues(fn_ty, args[fn_ty.fnParamLen()..], .call_frame); defer info.deinit(self); + // We need a properly aligned and sized call frame to be able to call this function. + { + const needed_call_frame = + FrameAlloc.init(.{ .size = info.stack_byte_count, .alignment = info.stack_align }); + const frame_allocs_slice = self.frame_allocs.slice(); + const stack_frame_size = + &frame_allocs_slice.items(.abi_size)[@enumToInt(FrameIndex.call_frame)]; + stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size); + const stack_frame_align = + &frame_allocs_slice.items(.abi_align)[@enumToInt(FrameIndex.call_frame)]; + stack_frame_align.* = @max(stack_frame_align.*, needed_call_frame.abi_align); + } + try self.spillEflagsIfOccupied(); try self.spillRegisters(abi.getCallerPreservedRegs(self.target.*)); // set stack arguments first because this can clobber registers // also clobber spill arguments as we go - if (info.return_value == .stack_offset) { - try self.spillRegisters(&.{abi.getCAbiIntParamRegs(self.target.*)[0]}); + switch (info.return_value.long) { + .none, .unreach => {}, + .indirect => |reg_off| try self.spillRegisters(&.{reg_off.reg}), + else => unreachable, } for (args, info.args) |arg, mc_arg| { const arg_ty = self.air.typeOf(arg); const arg_mcv = try self.resolveInst(arg); - // Here we do not use setRegOrMem even though the logic is similar, because - // the function call will move the stack pointer, so the offsets are different. switch (mc_arg) { .none => {}, .register => |reg| try self.spillRegisters(&.{reg}), - .stack_offset => |off| { - // TODO rewrite using `genSetStack` - try self.genSetStackArg(arg_ty, off, arg_mcv); - }, - .ptr_stack_offset => { - return self.fail("TODO implement calling with MCValue.ptr_stack_offset arg", .{}); - }, - .undef => unreachable, - .immediate => unreachable, - .unreach => unreachable, - .dead => unreachable, - .memory => unreachable, - .eflags => unreachable, - .register_overflow => unreachable, - .load_got => unreachable, - .lea_direct => unreachable, - .load_direct => unreachable, - .lea_tlv => unreachable, - .load_tlv => unreachable, + .load_frame => try self.genCopy(arg_ty, mc_arg, arg_mcv), + else => unreachable, } } // now we are free to set register arguments - const ret_reg_lock: ?RegisterLock = blk: { - if (info.return_value == .stack_offset) { + const ret_lock = switch (info.return_value.long) { + .none, .unreach => null, + .indirect => |reg_off| lock: { const ret_ty = fn_ty.fnReturnType(); - const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*)); - const ret_abi_align = @intCast(u32, ret_ty.abiAlignment(self.target.*)); - const stack_offset = @intCast(i32, try self.allocMem(inst, ret_abi_size, ret_abi_align)); - log.debug("airCall: return value on stack at offset {}", .{stack_offset}); - - const ret_reg = abi.getCAbiIntParamRegs(self.target.*)[0]; - try self.genSetReg(Type.usize, ret_reg, .{ .ptr_stack_offset = stack_offset }); - const ret_reg_lock = self.register_manager.lockRegAssumeUnused(ret_reg); - - info.return_value.stack_offset = stack_offset; - - break :blk ret_reg_lock; - } - break :blk null; + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ret_ty, self.target.*)); + try self.genSetReg(reg_off.reg, Type.usize, .{ + .lea_frame = .{ .index = frame_index, .off = -reg_off.off }, + }); + info.return_value.short = .{ .load_frame = .{ .index = frame_index } }; + break :lock self.register_manager.lockRegAssumeUnused(reg_off.reg); + }, + else => unreachable, }; - defer if (ret_reg_lock) |lock| self.register_manager.unlockReg(lock); + defer if (ret_lock) |lock| self.register_manager.unlockReg(lock); for (args, info.args) |arg, mc_arg| { const arg_ty = self.air.typeOf(arg); const arg_mcv = try self.resolveInst(arg); switch (mc_arg) { - .none, .stack_offset, .ptr_stack_offset => {}, - .register => |reg| try self.genSetReg(arg_ty, reg, arg_mcv), - .undef => unreachable, - .immediate => unreachable, - .unreach => unreachable, - .dead => unreachable, - .memory => unreachable, - .eflags => unreachable, - .register_overflow => unreachable, - .load_got => unreachable, - .lea_direct => unreachable, - .load_direct => unreachable, - .lea_tlv => unreachable, - .load_tlv => unreachable, + .none, .load_frame => {}, + .register => try self.genCopy(arg_ty, mc_arg, arg_mcv), + else => unreachable, } } - if (info.stack_byte_count > 0) { - // Adjust the stack - try self.asmRegisterImmediate(.sub, .rsp, Immediate.u(info.stack_byte_count)); - } - // Due to incremental compilation, how function calls are generated depends // on linking. const mod = self.bin_file.options.module.?; @@ -5635,16 +5963,16 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = atom.getOffsetTableAddress(elf_file); try self.asmMemory(.call, Memory.sib(.qword, .{ - .base = .ds, + .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr), })); } else if (self.bin_file.cast(link.File.Coff)) |_| { const sym_index = try self.getSymbolIndexForDecl(func.owner_decl); - try self.genSetReg(Type.usize, .rax, .{ .load_got = sym_index }); + try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); try self.asmRegister(.call, .rax); } else if (self.bin_file.cast(link.File.MachO)) |_| { const sym_index = try self.getSymbolIndexForDecl(func.owner_decl); - try self.genSetReg(Type.usize, .rax, .{ .load_got = sym_index }); + try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); try self.asmRegister(.call, .rax); } else if (self.bin_file.cast(link.File.Plan9)) |p9| { const decl_block_index = try p9.seeDecl(func.owner_decl); @@ -5655,7 +5983,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const got_index = decl_block.got_index.?; const fn_got_addr = got_addr + got_index * ptr_bytes; try self.asmMemory(.call, Memory.sib(.qword, .{ - .base = .ds, + .base = .{ .reg = .ds }, .disp = @intCast(i32, fn_got_addr), })); } else unreachable; @@ -5696,41 +6024,15 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier } else { assert(ty.zigTypeTag() == .Pointer); const mcv = try self.resolveInst(callee); - try self.genSetReg(Type.usize, .rax, mcv); + try self.genSetReg(.rax, Type.usize, mcv); try self.asmRegister(.call, .rax); } - if (info.stack_byte_count > 0) { - // Readjust the stack - try self.asmRegisterImmediate(.add, .rsp, Immediate.u(info.stack_byte_count)); - } - - const result: MCValue = result: { - if (self.liveness.isUnused(inst)) break :result .unreach; - - switch (info.return_value) { - .register => { - // Save function return value in a new register - break :result try self.copyToRegisterWithInstTracking( - inst, - self.air.typeOfIndex(inst), - info.return_value, - ); - }, - else => {}, - } - break :result info.return_value; - }; - - if (args.len <= Liveness.bpi - 2) { - var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1); - buf[0] = callee; - std.mem.copy(Air.Inst.Ref, buf[1..], args); - return self.finishAir(inst, result, buf); - } var bt = self.liveness.iterateBigTomb(inst); self.feed(&bt, callee); for (args) |arg| self.feed(&bt, arg); + + const result = if (self.liveness.isUnused(inst)) .unreach else info.return_value.short; return self.finishAirResult(inst, result); } @@ -5738,18 +6040,19 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const operand = try self.resolveInst(un_op); const ret_ty = self.fn_type.fnReturnType(); - switch (self.ret_mcv) { - .immediate => { - assert(ret_ty.isError()); - }, - .stack_offset => { - try self.store(self.ret_mcv, operand, Type.usize, ret_ty); - }, - else => { - try self.setRegOrMem(ret_ty, self.ret_mcv, operand); + switch (self.ret_mcv.short) { + .none => {}, + .register => try self.genCopy(ret_ty, self.ret_mcv.short, operand), + .indirect => |reg_off| { + try self.register_manager.getReg(reg_off.reg, null); + const lock = self.register_manager.lockRegAssumeUnused(reg_off.reg); + defer self.register_manager.unlockReg(lock); + + try self.genSetReg(reg_off.reg, Type.usize, self.ret_mcv.long); + try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ret_ty, operand); }, + else => unreachable, } - // TODO when implementing defer, this will need to jump to the appropriate defer expression. // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction // which is available if the jump is 127 bytes or less forward. const jmp_reloc = try self.asmJmpReloc(undefined); @@ -5761,25 +6064,12 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const ptr = try self.resolveInst(un_op); const ptr_ty = self.air.typeOf(un_op); - const elem_ty = ptr_ty.elemType(); - const abi_size = elem_ty.abiSize(self.target.*); - switch (self.ret_mcv) { - .immediate => { - assert(elem_ty.isError()); - }, - .stack_offset => { - const reg = try self.copyToTmpRegister(Type.usize, self.ret_mcv); - const reg_lock = self.register_manager.lockRegAssumeUnused(reg); - defer self.register_manager.unlockReg(reg_lock); - - try self.genInlineMemcpy(.{ .register = reg }, ptr, .{ .immediate = abi_size }, .{}); - }, - else => { - try self.load(self.ret_mcv, ptr, ptr_ty); - try self.setRegOrMem(elem_ty, self.ret_mcv, self.ret_mcv); - }, + switch (self.ret_mcv.short) { + .none => {}, + .register => try self.load(self.ret_mcv.short, ptr_ty, ptr), + .indirect => |reg_off| try self.genSetReg(reg_off.reg, ptr_ty, ptr), + else => unreachable, } - // TODO when implementing defer, this will need to jump to the appropriate defer expression. // TODO optimization opportunity: figure out when we can emit this as a 2 byte instruction // which is available if the jump is 127 bytes or less forward. const jmp_reloc = try self.asmJmpReloc(undefined); @@ -5822,7 +6112,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { break :dst rhs_mcv; } else if (dst_mem_ok) dst: { const dst_mcv = try self.allocTempRegOrMem(ty, true); - try self.setRegOrMem(ty, dst_mcv, lhs_mcv); + try self.genCopy(ty, dst_mcv, lhs_mcv); break :dst dst_mcv; } else .{ .register = try self.copyToTmpRegister(ty, lhs_mcv) }; const dst_lock = switch (dst_mcv) { @@ -5865,26 +6155,27 @@ fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void { const atom = elf_file.getAtom(atom_index); _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = atom.getOffsetTableAddress(elf_file); - try self.asmRegisterMemory(.mov, addr_reg.to64(), Memory.sib(.qword, .{ - .base = .ds, - .disp = @intCast(i32, got_addr), - })); + try self.asmRegisterMemory( + .mov, + addr_reg.to64(), + Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }), + ); } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const atom_index = try coff_file.getOrCreateAtomForLazySymbol( .{ .kind = .const_data, .ty = Type.anyerror }, 4, // dword alignment ); const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(Type.usize, addr_reg, .{ .load_got = sym_index }); + try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { const atom_index = try macho_file.getOrCreateAtomForLazySymbol( .{ .kind = .const_data, .ty = Type.anyerror }, 4, // dword alignment ); const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(Type.usize, addr_reg, .{ .load_got = sym_index }); + try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); } else { - return self.fail("TODO implement airErrorName for x86_64 {s}", .{@tagName(self.bin_file.tag)}); + return self.fail("TODO implement airCmpLtErrorsLen for x86_64 {s}", .{@tagName(self.bin_file.tag)}); } try self.spillEflagsIfOccupied(); @@ -5900,7 +6191,7 @@ fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterMemory( .cmp, registerAlias(dst_reg, op_abi_size), - Memory.sib(Memory.PtrSize.fromSize(op_abi_size), .{ .base = addr_reg }), + Memory.sib(Memory.PtrSize.fromSize(op_abi_size), .{ .base = .{ .reg = addr_reg } }), ); const result = MCValue{ .eflags = .b }; return self.finishAir(inst, result, .{ un_op, .none, .none }); @@ -5971,14 +6262,13 @@ fn genTry( fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { const dbg_stmt = self.air.instructions.items(.data)[inst].dbg_stmt; - const payload = try self.addExtra(Mir.DbgLineColumn{ - .line = dbg_stmt.line, - .column = dbg_stmt.column, - }); _ = try self.addInst(.{ .tag = .dbg_line, .ops = undefined, - .data = .{ .payload = payload }, + .data = .{ .line_column = .{ + .line = dbg_stmt.line, + .column = dbg_stmt.column, + } }, }); return self.finishAirBookkeeping(); } @@ -6002,8 +6292,6 @@ fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void { const ty = self.air.typeOf(operand); const mcv = try self.resolveInst(operand); - log.debug("airDbgVar: %{d}: {}, {}", .{ inst, ty.fmtDebug(), mcv }); - const name = self.air.nullTerminatedString(pl_op.payload); const tag = self.air.instructions.items(.tag)[inst]; @@ -6025,7 +6313,7 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !u32 { return self.asmJccReloc(undefined, .e); }, .immediate, - .stack_offset, + .load_frame, => { try self.spillEflagsIfOccupied(); if (abi_size <= 8) { @@ -6118,11 +6406,14 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC .dead, .undef, .immediate, - .register_overflow, - .ptr_stack_offset, .eflags, + .register_offset, + .register_overflow, .lea_direct, + .lea_got, .lea_tlv, + .lea_frame, + .reserved_frame, => unreachable, .register => |opt_reg| { @@ -6152,27 +6443,36 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC const addr_reg_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_reg_lock); - try self.genSetReg(Type.usize, addr_reg, switch (opt_mcv) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => opt_mcv, - }); - + try self.genSetReg(addr_reg, Type.usize, opt_mcv.address()); const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); - try self.asmMemoryImmediate(.cmp, Memory.sib( - Memory.PtrSize.fromSize(some_abi_size), - .{ .base = addr_reg, .disp = some_info.off }, - ), Immediate.u(0)); + try self.asmMemoryImmediate( + .cmp, + Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{ + .base = .{ .reg = addr_reg }, + .disp = some_info.off, + }), + Immediate.u(0), + ); return .{ .eflags = .e }; }, - .stack_offset => |off| { + .indirect, .load_frame => { const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); - try self.asmMemoryImmediate(.cmp, Memory.sib( - Memory.PtrSize.fromSize(some_abi_size), - .{ .base = .rbp, .disp = some_info.off - off }, - ), Immediate.u(0)); + try self.asmMemoryImmediate( + .cmp, + Memory.sib(Memory.PtrSize.fromSize(some_abi_size), switch (opt_mcv) { + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off + some_info.off, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off + some_info.off, + }, + else => unreachable, + }), + Immediate.u(0), + ); return .{ .eflags = .e }; }, } @@ -6200,10 +6500,14 @@ fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); - try self.asmMemoryImmediate(.cmp, Memory.sib( - Memory.PtrSize.fromSize(some_abi_size), - .{ .base = ptr_reg, .disp = some_info.off }, - ), Immediate.u(0)); + try self.asmMemoryImmediate( + .cmp, + Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{ + .base = .{ .reg = ptr_reg }, + .disp = some_info.off, + }), + Immediate.u(0), + ); return .{ .eflags = .e }; } @@ -6221,10 +6525,6 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) ! const err_off = errUnionErrorOffset(ty.errorUnionPayload(), self.target.*); switch (operand) { - .stack_offset => |off| { - const offset = off - @intCast(i32, err_off); - try self.genBinOpMir(.cmp, Type.anyerror, .{ .stack_offset = offset }, .{ .immediate = 0 }); - }, .register => |reg| { const eu_lock = self.register_manager.lockReg(reg); defer if (eu_lock) |lock| self.register_manager.unlockReg(lock); @@ -6238,6 +6538,15 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) ! } try self.genBinOpMir(.cmp, Type.anyerror, .{ .register = tmp_reg }, .{ .immediate = 0 }); }, + .load_frame => |frame_addr| try self.genBinOpMir( + .cmp, + Type.anyerror, + .{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + @intCast(i32, err_off), + } }, + .{ .immediate = 0 }, + ), else => return self.fail("TODO implement isErr for {}", .{operand}), } @@ -6324,9 +6633,9 @@ fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void { } }; const ptr_ty = self.air.typeOf(un_op); - try self.load(operand, operand_ptr, ptr_ty); + try self.load(operand, ptr_ty, operand_ptr); - const result = try self.isErr(inst, ptr_ty.elemType(), operand); + const result = try self.isErr(inst, ptr_ty.childType(), operand); return self.finishAir(inst, result, .{ un_op, .none, .none }); } @@ -6358,9 +6667,9 @@ fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void { } }; const ptr_ty = self.air.typeOf(un_op); - try self.load(operand, operand_ptr, ptr_ty); + try self.load(operand, ptr_ty, operand_ptr); - const result = try self.isNonErr(inst, ptr_ty.elemType(), operand); + const result = try self.isNonErr(inst, ptr_ty.childType(), operand); return self.finishAir(inst, result, .{ un_op, .none, .none }); } @@ -6389,6 +6698,7 @@ fn airLoop(self: *Self, inst: Air.Inst.Index) !void { fn airBlock(self: *Self, inst: Air.Inst.Index) !void { // A block is a setup to be able to jump to the end. + const inst_tracking_i = self.inst_tracking.count(); self.inst_tracking.putAssumeCapacityNoClobber(inst, InstTracking.init(.unreach)); self.scope_generation += 1; @@ -6412,8 +6722,9 @@ fn airBlock(self: *Self, inst: Air.Inst.Index) !void { for (block_data.value.relocs.items) |reloc| try self.performReloc(reloc); } - const tracking = self.inst_tracking.getPtr(inst).?; - if (self.liveness.isUnused(inst)) tracking.die(self); + if (std.debug.runtime_safety) assert(self.inst_tracking.getIndex(inst).? == inst_tracking_i); + const tracking = &self.inst_tracking.values()[inst_tracking_i]; + if (self.liveness.isUnused(inst)) tracking.die(self, inst); self.getValue(tracking.short, inst); self.finishAirBookkeeping(); } @@ -6520,35 +6831,38 @@ fn airBr(self: *Self, inst: Air.Inst.Index) !void { !block_ty.hasRuntimeBitsIgnoreComptime() or self.liveness.isUnused(br.block_inst); const block_tracking = self.inst_tracking.getPtr(br.block_inst).?; const block_data = self.blocks.getPtr(br.block_inst).?; + const first_br = block_data.relocs.items.len == 0; + const block_result = result: { + if (block_unused) break :result .none; + + if (self.reuseOperandAdvanced(inst, br.operand, 0, src_mcv, br.block_inst)) { + if (first_br) break :result src_mcv; + + if (block_tracking.getReg()) |block_reg| + try self.register_manager.getReg(block_reg, br.block_inst); + // .long = .none to avoid merging operand and block result stack frames. + var current_tracking = InstTracking{ .long = .none, .short = src_mcv }; + try current_tracking.materializeUnsafe(self, br.block_inst, block_tracking.*); + if (src_mcv.getReg()) |src_reg| self.register_manager.freeReg(src_reg); + break :result block_tracking.short; + } - if (block_data.relocs.items.len == 0) { - block_tracking.* = InstTracking.init(result: { - if (block_unused) break :result .none; - if (self.reuseOperand(inst, br.operand, 0, src_mcv)) { - // Fix instruction tracking - switch (src_mcv) { - .register => |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |index| { - self.register_manager.registers[index] = br.block_inst; - }, - else => {}, - } - break :result src_mcv; - } - - const new_mcv = try self.allocRegOrMem(br.block_inst, true); - try self.setRegOrMem(block_ty, new_mcv, src_mcv); - break :result new_mcv; - }); - } else if (!block_unused) try self.setRegOrMem(block_ty, block_tracking.short, src_mcv); + const dst_mcv = if (first_br) try self.allocRegOrMem(br.block_inst, true) else dst: { + self.getValue(block_tracking.short, br.block_inst); + break :dst block_tracking.short; + }; + try self.genCopy(block_ty, dst_mcv, src_mcv); + break :result dst_mcv; + }; // Process operand death so that it is properly accounted for in the State below. if (self.liveness.operandDies(inst, 0)) { if (Air.refToIndex(br.operand)) |op_inst| self.processDeath(op_inst); } - if (block_data.relocs.items.len == 0) { + if (first_br) { + block_tracking.* = InstTracking.init(block_result); try self.saveRetroactiveState(&block_data.state); - block_tracking.die(self); } else try self.restoreState(block_data.state, &.{}, .{ .emit_instructions = true, .update_tracking = false, @@ -6556,6 +6870,9 @@ fn airBr(self: *Self, inst: Air.Inst.Index) !void { .close_scope = false, }); + // Stop tracking block result without forgetting tracking info + self.freeValue(block_tracking.short); + // Emit a jump with a relocation. It will be patched up after the block ends. // Leave the jump offset undefined const jmp_reloc = try self.asmJmpReloc(undefined); @@ -6634,7 +6951,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { const arg_mcv = try self.resolveInst(input); try self.register_manager.getReg(reg, null); - try self.genSetReg(self.air.typeOf(input), reg, arg_mcv); + try self.genSetReg(reg, self.air.typeOf(input), arg_mcv); } { @@ -6666,11 +6983,15 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { .qword else null; - const mnem = std.meta.stringToEnum(Mir.Inst.Tag, mnem_str) orelse - (if (mnem_size) |_| - std.meta.stringToEnum(Mir.Inst.Tag, mnem_str[0 .. mnem_str.len - 1]) - else - null) orelse return self.fail("Invalid mnemonic: '{s}'", .{mnem_str}); + const mnem = mnem: { + if (mnem_size) |_| { + if (std.meta.stringToEnum(Mir.Inst.Tag, mnem_str[0 .. mnem_str.len - 1])) |mnem| { + break :mnem mnem; + } + } + break :mnem std.meta.stringToEnum(Mir.Inst.Tag, mnem_str) orelse + return self.fail("Invalid mnemonic: '{s}'", .{mnem_str}); + }; var op_it = mem.tokenize(u8, mnem_it.rest(), ","); var ops = [1]encoder.Instruction.Operand{.none} ** 4; @@ -6686,7 +7007,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { return self.fail("Invalid displacement: '{s}'", .{op_str}); op.* = .{ .mem = Memory.sib( mnem_size orelse return self.fail("Unknown size: '{s}'", .{op_str}), - .{ .base = reg, .disp = disp }, + .{ .base = .{ .reg = reg }, .disp = disp }, ) }; } else { if (mnem_size) |size| if (reg.bitSize() != size.bitSize()) @@ -6805,335 +7126,326 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { return self.finishAirResult(inst, result); } -/// Sets the value without any modifications to register allocation metadata or stack allocation metadata. -fn setRegOrMem(self: *Self, ty: Type, loc: MCValue, val: MCValue) !void { - switch (loc) { - .none => return, - .immediate => unreachable, - .register => |reg| return self.genSetReg(ty, reg, val), - .stack_offset => |off| return self.genSetStack(ty, off, val, .{}), - .memory => { - return self.fail("TODO implement setRegOrMem for memory", .{}); +fn movMirTag(self: *Self, ty: Type) !Mir.Inst.Tag { + return switch (ty.zigTypeTag()) { + else => .mov, + .Float => switch (ty.floatBits(self.target.*)) { + 16 => .mov, + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) .movss else .mov, + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) .movsd else .mov, + else => return self.fail("TODO movMirTag for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), }, - else => { - return self.fail("TODO implement setRegOrMem for {}", .{loc}); + }; +} + +fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { + const src_lock = switch (src_mcv) { + .register => |reg| self.register_manager.lockReg(reg), + .register_overflow => |ro| self.register_manager.lockReg(ro.reg), + else => null, + }; + defer if (src_lock) |lock| self.register_manager.unlockReg(lock); + + switch (dst_mcv) { + .none, + .unreach, + .dead, + .undef, + .immediate, + .eflags, + .register_overflow, + .lea_direct, + .lea_got, + .lea_tlv, + .lea_frame, + .reserved_frame, + => unreachable, // unmodifiable destination + .register => |reg| try self.genSetReg(reg, ty, src_mcv), + .register_offset => |dst_reg_off| try self.genSetReg(dst_reg_off.reg, ty, switch (src_mcv) { + .none, + .unreach, + .dead, + .undef, + .register_overflow, + .reserved_frame, + => unreachable, + .immediate, + .register, + .register_offset, + .lea_frame, + => src_mcv.offset(-dst_reg_off.off), + else => .{ .register_offset = .{ + .reg = try self.copyToTmpRegister(ty, src_mcv), + .off = -dst_reg_off.off, + } }, + }), + .indirect => |reg_off| try self.genSetMem(.{ .reg = reg_off.reg }, reg_off.off, ty, src_mcv), + .memory, .load_direct, .load_got, .load_tlv => { + switch (dst_mcv) { + .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| + return self.genSetMem(.{ .reg = .ds }, small_addr, ty, src_mcv), + .load_direct, .load_got, .load_tlv => {}, + else => unreachable, + } + + const addr_reg = try self.copyToTmpRegister(Type.usize, dst_mcv.address()); + const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); + defer self.register_manager.unlockReg(addr_lock); + + try self.genSetMem(.{ .reg = addr_reg }, 0, ty, src_mcv); }, + .load_frame => |frame_addr| try self.genSetMem( + .{ .frame = frame_addr.index }, + frame_addr.off, + ty, + src_mcv, + ), } } -fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerError!void { +fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerError!void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - switch (mcv) { - .dead => unreachable, - .unreach, .none => return, - .undef => { - if (!self.wantSafety()) return; // The already existing value will do just fine. - if (abi_size <= 8) { - const reg = try self.copyToTmpRegister(ty, mcv); - return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg }); + if (abi_size > 8) return self.fail("genSetReg called with a value larger than one register", .{}); + switch (src_mcv) { + .none, + .unreach, + .dead, + .register_overflow, + .reserved_frame, + => unreachable, + .undef => if (self.wantSafety()) + try self.genSetReg(dst_reg.to64(), Type.usize, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), + .eflags => |cc| try self.asmSetccRegister(dst_reg.to8(), cc), + .immediate => |imm| { + if (imm == 0) { + // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit + // register is the fastest way to zero a register. + try self.asmRegisterRegister(.xor, dst_reg.to32(), dst_reg.to32()); + } else if (abi_size > 4 and math.cast(u32, imm) != null) { + // 32-bit moves zero-extend to 64-bit. + try self.asmRegisterImmediate(.mov, dst_reg.to32(), Immediate.u(imm)); + } else if (abi_size <= 4 and @bitCast(i64, imm) < 0) { + try self.asmRegisterImmediate( + .mov, + registerAlias(dst_reg, abi_size), + Immediate.s(@intCast(i32, @bitCast(i64, imm))), + ); + } else { + try self.asmRegisterImmediate( + .mov, + registerAlias(dst_reg, abi_size), + Immediate.u(imm), + ); } - try self.genInlineMemset( - .{ .ptr_stack_offset = stack_offset }, - .{ .immediate = 0xaa }, - .{ .immediate = abi_size }, - .{ .dest_stack_base = .rsp }, - ); }, - .register_overflow => return self.fail("TODO genSetStackArg for register with overflow bit", .{}), - .eflags => { - const reg = try self.copyToTmpRegister(ty, mcv); - return self.genSetStackArg(ty, stack_offset, .{ .register = reg }); - }, - .immediate => |imm| { - switch (abi_size) { - 1, 2, 4 => { - // TODO - // We have a positive stack offset value but we want a twos complement negative - // offset from rbp, which is at the top of the stack frame. - const immediate = if (ty.isSignedInt()) - Immediate.s(@intCast(i32, @bitCast(i64, imm))) - else - Immediate.u(@intCast(u32, imm)); - try self.asmMemoryImmediate(.mov, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ - .base = .rsp, - .disp = -stack_offset, - }), immediate); + .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister( + if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point)) + try self.movMirTag(ty) + else switch (abi_size) { + 4 => .movd, + 8 => .movq, + else => return self.fail( + "unsupported register copy from {s} to {s}", + .{ @tagName(src_reg), @tagName(dst_reg) }, + ), + }, + registerAlias(dst_reg, abi_size), + registerAlias(src_reg, abi_size), + ), + .register_offset, .indirect, .load_frame, .lea_frame => try self.asmRegisterMemory( + switch (src_mcv) { + .register_offset => |reg_off| switch (reg_off.off) { + 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }), + else => .lea, }, - 8 => { - const reg = try self.copyToTmpRegister(ty, mcv); - return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg }); + .indirect, .load_frame => try self.movMirTag(ty), + .lea_frame => .lea, + else => unreachable, + }, + registerAlias(dst_reg, abi_size), + Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { + .register_offset, .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, }, - else => return self.fail("TODO implement inputs on stack for {} with abi size > 8", .{mcv}), - } - }, - .memory, - .load_direct, - .load_tlv, - => { - if (abi_size <= 8) { - const reg = try self.copyToTmpRegister(ty, mcv); - return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg }); + .load_frame, .lea_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }, + else => unreachable, + }), + ), + .memory, .load_direct, .load_got, .load_tlv => { + switch (src_mcv) { + .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| + return self.asmRegisterMemory( + try self.movMirTag(ty), + registerAlias(dst_reg, abi_size), + Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ + .base = .{ .reg = .ds }, + .disp = small_addr, + }), + ), + .load_direct => |sym_index| if (try self.movMirTag(ty) == .mov) { + const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); + _ = try self.addInst(.{ + .tag = .mov_linker, + .ops = .direct_reloc, + .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ + .reg = @enumToInt(dst_reg.to64()), + .atom_index = atom_index, + .sym_index = sym_index, + }) }, + }); + return; + }, + .load_got, .load_tlv => {}, + else => unreachable, } - const addr_reg = (try self.register_manager.allocReg(null, gp)).to64(); + const addr_reg = try self.copyToTmpRegister(Type.usize, src_mcv.address()); const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_lock); - try self.genSetReg(Type.usize, addr_reg, switch (mcv) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => unreachable, - }); - try self.genInlineMemcpy( - .{ .ptr_stack_offset = stack_offset }, - .{ .register = addr_reg }, - .{ .immediate = abi_size }, - .{ .dest_stack_base = .rsp }, + try self.asmRegisterMemory( + try self.movMirTag(ty), + registerAlias(dst_reg, abi_size), + Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{ .reg = addr_reg } }), ); }, - .register => |reg| { - switch (ty.zigTypeTag()) { - .Float => { - if (intrinsicsAllowed(self.target.*, ty)) { - const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .movss, - .f64 => .movsd, - else => return self.fail( - "TODO genSetStackArg for register for type {}", - .{ty.fmtDebug()}, - ), - }; - // TODO verify this - const ptr_size: Memory.PtrSize = switch (ty.tag()) { - .f32 => .dword, - .f64 => .qword, - else => unreachable, - }; - return self.asmMemoryRegister(tag, Memory.sib(ptr_size, .{ - .base = .rsp, - .disp = -stack_offset, - }), reg.to128()); - } - - return self.fail("TODO genSetStackArg for register with no intrinsics", .{}); + .lea_direct, .lea_got => |sym_index| { + const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); + _ = try self.addInst(.{ + .tag = switch (src_mcv) { + .lea_direct => .lea_linker, + .lea_got => .mov_linker, + else => unreachable, }, - else => { - try self.asmMemoryRegister(.mov, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ - .base = .rsp, - .disp = -stack_offset, - }), registerAlias(reg, abi_size)); + .ops = switch (src_mcv) { + .lea_direct => .direct_reloc, + .lea_got => .got_reloc, + else => unreachable, }, - } - }, - .ptr_stack_offset, - .load_got, - .lea_direct, - .lea_tlv, - => { - const reg = try self.copyToTmpRegister(ty, mcv); - return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg }); + .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ + .reg = @enumToInt(dst_reg.to64()), + .atom_index = atom_index, + .sym_index = sym_index, + }) }, + }); }, - .stack_offset => |mcv_off| { - if (abi_size <= 8) { - const reg = try self.copyToTmpRegister(ty, mcv); - return self.genSetStackArg(ty, stack_offset, MCValue{ .register = reg }); - } - - try self.genInlineMemcpy( - .{ .ptr_stack_offset = stack_offset }, - .{ .ptr_stack_offset = mcv_off }, - .{ .immediate = abi_size }, - .{ .dest_stack_base = .rsp }, - ); + .lea_tlv => |sym_index| { + const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); + if (self.bin_file.cast(link.File.MachO)) |_| { + _ = try self.addInst(.{ + .tag = .lea_linker, + .ops = .tlv_reloc, + .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ + .reg = @enumToInt(Register.rdi), + .atom_index = atom_index, + .sym_index = sym_index, + }) }, + }); + // TODO: spill registers before calling + try self.asmMemory(.call, Memory.sib(.qword, .{ .base = .{ .reg = .rdi } })); + try self.genSetReg(dst_reg.to64(), Type.usize, .{ .register = .rax }); + } else return self.fail("TODO emit ptr to TLV sequence on {s}", .{ + @tagName(self.bin_file.tag), + }); }, } } -fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: InlineMemcpyOpts) InnerError!void { - const base_reg = opts.dest_stack_base orelse .rbp; +fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCValue) InnerError!void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - switch (mcv) { - .dead => unreachable, - .unreach, .none => return, // Nothing to do. - .undef => { - if (!self.wantSafety()) return; // The already existing value will do just fine. - // TODO Upgrade this to a memset call when we have that available. - switch (abi_size) { - 1, 2, 4 => { - const value: u64 = switch (abi_size) { - 1 => 0xaa, - 2 => 0xaaaa, - 4 => 0xaaaaaaaa, - else => unreachable, - }; - return self.asmMemoryImmediate(.mov, Memory.sib( - Memory.PtrSize.fromSize(abi_size), - .{ .base = base_reg, .disp = -stack_offset }, - ), Immediate.u(value)); - }, - 8 => return self.genSetStack( - ty, - stack_offset, - .{ .immediate = 0xaaaaaaaaaaaaaaaa }, - opts, - ), - else => |x| return self.genInlineMemset( - .{ .ptr_stack_offset = stack_offset }, - .{ .immediate = 0xaa }, - .{ .immediate = x }, - opts, - ), - } + const dst_ptr_mcv: MCValue = switch (base) { + .none => .{ .immediate = @bitCast(u64, @as(i64, disp)) }, + .reg => |base_reg| .{ .register_offset = .{ .reg = base_reg, .off = disp } }, + .frame => |base_frame_index| .{ .lea_frame = .{ .index = base_frame_index, .off = disp } }, + }; + switch (src_mcv) { + .none, .unreach, .dead, .reserved_frame => unreachable, + .undef => if (self.wantSafety()) + try self.genInlineMemset(dst_ptr_mcv, .{ .immediate = 0xaa }, .{ .immediate = abi_size }), + .immediate => |imm| switch (abi_size) { + 1, 2, 4 => { + const immediate = if (ty.isSignedInt()) + Immediate.s(@truncate(i32, @bitCast(i64, imm))) + else + Immediate.u(@intCast(u32, imm)); + try self.asmMemoryImmediate( + .mov, + Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }), + immediate, + ); + }, + 3, 5...7 => unreachable, + else => if (math.cast(i32, @bitCast(i64, imm))) |small| { + try self.asmMemoryImmediate( + .mov, + Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }), + Immediate.s(small), + ); + } else { + var offset: i32 = 0; + while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate( + .mov, + Memory.sib(.dword, .{ .base = base, .disp = disp + offset }), + if (ty.isSignedInt()) + Immediate.s(@truncate( + i32, + @bitCast(i64, imm) >> (math.cast(u6, offset * 8) orelse 63), + )) + else + Immediate.u(@truncate( + u32, + if (math.cast(u6, offset * 8)) |shift| imm >> shift else 0, + )), + ); + }, }, + .eflags => |cc| try self.asmSetccMemory(Memory.sib(.byte, .{ .base = base, .disp = disp }), cc), + .register => |reg| try self.asmMemoryRegister( + try self.movMirTag(ty), + Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }), + registerAlias(reg, abi_size), + ), .register_overflow => |ro| { - const reg_lock = self.register_manager.lockReg(ro.reg); - defer if (reg_lock) |lock| self.register_manager.unlockReg(lock); - - const wrapped_ty = ty.structFieldType(0); - try self.genSetStack(wrapped_ty, stack_offset, .{ .register = ro.reg }, .{}); - - const overflow_bit_ty = ty.structFieldType(1); - const overflow_bit_offset = ty.structFieldOffset(1, self.target.*); - const tmp_reg = try self.register_manager.allocReg(null, gp); - try self.asmSetccRegister(tmp_reg.to8(), ro.eflags); - - return self.genSetStack( - overflow_bit_ty, - stack_offset - @intCast(i32, overflow_bit_offset), - .{ .register = tmp_reg.to8() }, - .{}, + try self.genSetMem( + base, + disp + @intCast(i32, ty.structFieldOffset(0, self.target.*)), + ty.structFieldType(0), + .{ .register = ro.reg }, + ); + try self.genSetMem( + base, + disp + @intCast(i32, ty.structFieldOffset(1, self.target.*)), + ty.structFieldType(1), + .{ .eflags = ro.eflags }, ); }, - .eflags => |cc| try self.asmSetccMemory( - Memory.sib(.byte, .{ .base = base_reg, .disp = -stack_offset }), - cc, - ), - .immediate => |imm| { - // TODO - switch (abi_size) { - 0 => { - assert(ty.isError()); - try self.asmMemoryImmediate(.mov, Memory.sib(.byte, .{ - .base = base_reg, - .disp = -stack_offset, - }), Immediate.u(@truncate(u8, imm))); - }, - 1, 2, 4 => { - const immediate = if (ty.isSignedInt()) - Immediate.s(@truncate(i32, @bitCast(i64, imm))) - else - Immediate.u(@intCast(u32, imm)); - try self.asmMemoryImmediate(.mov, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ - .base = base_reg, - .disp = -stack_offset, - }), immediate); - }, - 3, 5...7 => unreachable, - else => { - // 64 bit write to memory would take two mov's anyways so we - // insted just use two 32 bit writes to avoid register allocation - if (math.cast(i32, @bitCast(i64, imm))) |small| { - try self.asmMemoryImmediate(.mov, Memory.sib( - Memory.PtrSize.fromSize(abi_size), - .{ .base = base_reg, .disp = -stack_offset }, - ), Immediate.s(small)); - } else { - var offset: i32 = 0; - while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate( - .mov, - Memory.sib(.dword, .{ .base = base_reg, .disp = offset - stack_offset }), - if (ty.isSignedInt()) - Immediate.s(@truncate( - i32, - @bitCast(i64, imm) >> (math.cast(u6, offset * 8) orelse 63), - )) - else - Immediate.u(@truncate( - u32, - if (math.cast(u6, offset * 8)) |shift| imm >> shift else 0, - )), - ); - } - }, - } - }, - .register => |reg| { - switch (ty.zigTypeTag()) { - .Float => { - if (intrinsicsAllowed(self.target.*, ty)) { - const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .movss, - .f64 => .movsd, - else => return self.fail( - "TODO genSetStack for register for type {}", - .{ty.fmtDebug()}, - ), - }; - const ptr_size: Memory.PtrSize = switch (ty.tag()) { - .f32 => .dword, - .f64 => .qword, - else => unreachable, - }; - return self.asmMemoryRegister(tag, Memory.sib(ptr_size, .{ - .base = base_reg.to64(), - .disp = -stack_offset, - }), reg.to128()); - } - - return self.fail("TODO genSetStack for register for type float with no intrinsics", .{}); - }, - else => { - try self.genInlineMemcpyRegisterRegister(ty, base_reg, reg, stack_offset); - }, - } - }, + .register_offset, .memory, + .indirect, .load_direct, - .load_tlv, .lea_direct, - .lea_tlv, - => if (abi_size <= 8) { - const reg = try self.copyToTmpRegister(ty, mcv); - return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }, opts); - } else { - const addr_reg = (try self.register_manager.allocReg(null, gp)).to64(); - const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); - defer self.register_manager.unlockReg(addr_lock); - - try self.genSetReg(Type.usize, addr_reg, switch (mcv) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => mcv, - }); - try self.genInlineMemcpy( - .{ .ptr_stack_offset = stack_offset }, - .{ .register = addr_reg }, - .{ .immediate = abi_size }, - .{}, - ); - }, - .stack_offset => |off| if (abi_size <= 8) { - const tmp_reg = try self.copyToTmpRegister(ty, mcv); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.genSetStack(ty, stack_offset, .{ .register = tmp_reg }, opts); - } else try self.genInlineMemcpy( - .{ .ptr_stack_offset = stack_offset }, - .{ .ptr_stack_offset = off }, - .{ .immediate = abi_size }, - .{}, - ), - .ptr_stack_offset, .load_got, - => { - const tmp_reg = try self.copyToTmpRegister(ty, mcv); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.genSetStack(ty, stack_offset, .{ .register = tmp_reg }, opts); + .lea_got, + .load_tlv, + .lea_tlv, + .load_frame, + .lea_frame, + => switch (abi_size) { + 0 => {}, + 1, 2, 4, 8 => { + const src_reg = try self.copyToTmpRegister(ty, src_mcv); + const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); + defer self.register_manager.unlockReg(src_lock); + + try self.genSetMem(base, disp, ty, .{ .register = src_reg }); + }, + else => try self.genInlineMemcpy(dst_ptr_mcv, src_mcv.address(), .{ .immediate = abi_size }), }, } } @@ -7165,10 +7477,14 @@ fn genInlineMemcpyRegisterRegister( var remainder = abi_size; while (remainder > 0) { const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder)); - try self.asmMemoryRegister(.mov, Memory.sib(Memory.PtrSize.fromSize(nearest_power_of_two), .{ - .base = dst_reg, - .disp = -next_offset, - }), registerAlias(tmp_reg, nearest_power_of_two)); + try self.asmMemoryRegister( + .mov, + Memory.sib(Memory.PtrSize.fromSize(nearest_power_of_two), .{ + .base = dst_reg, + .disp = -next_offset, + }), + registerAlias(tmp_reg, nearest_power_of_two), + ); if (nearest_power_of_two > 1) { try self.genShiftBinOpMir(.shr, ty, .{ .register = tmp_reg }, .{ @@ -7191,121 +7507,11 @@ fn genInlineMemcpyRegisterRegister( } } -const InlineMemcpyOpts = struct { - source_stack_base: ?Register = null, - dest_stack_base: ?Register = null, -}; - -fn genInlineMemcpy( - self: *Self, - dst_ptr: MCValue, - src_ptr: MCValue, - len: MCValue, - opts: InlineMemcpyOpts, -) InnerError!void { - const ssbase_lock: ?RegisterLock = if (opts.source_stack_base) |reg| - self.register_manager.lockReg(reg) - else - null; - defer if (ssbase_lock) |reg| self.register_manager.unlockReg(reg); - - const dsbase_lock: ?RegisterLock = if (opts.dest_stack_base) |reg| - self.register_manager.lockReg(reg) - else - null; - defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock); - +fn genInlineMemcpy(self: *Self, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue) InnerError!void { try self.spillRegisters(&.{ .rdi, .rsi, .rcx }); - - switch (dst_ptr) { - .lea_tlv, - .load_tlv, - => { - try self.genSetReg(Type.usize, .rdi, switch (dst_ptr) { - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => dst_ptr, - }); - }, - .memory, - .load_got, - .lea_direct, - .load_direct, - => { - try self.genSetReg(Type.usize, .rdi, switch (dst_ptr) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - else => dst_ptr, - }); - // Load the pointer, which is stored in memory - try self.asmRegisterMemory(.mov, .rdi, Memory.sib(.qword, .{ .base = .rdi })); - }, - .stack_offset, .ptr_stack_offset => |off| { - try self.asmRegisterMemory(switch (dst_ptr) { - .stack_offset => .mov, - .ptr_stack_offset => .lea, - else => unreachable, - }, .rdi, Memory.sib(.qword, .{ - .base = opts.dest_stack_base orelse .rbp, - .disp = -off, - })); - }, - .register => |reg| { - try self.asmRegisterRegister( - .mov, - registerAlias(.rdi, @intCast(u32, @divExact(reg.bitSize(), 8))), - reg, - ); - }, - else => { - return self.fail("TODO implement memcpy for setting stack when dest is {}", .{dst_ptr}); - }, - } - - switch (src_ptr) { - .lea_tlv, - .load_tlv, - => { - try self.genSetReg(Type.usize, .rsi, switch (src_ptr) { - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => dst_ptr, - }); - }, - .memory, - .load_got, - .lea_direct, - .load_direct, - => { - try self.genSetReg(Type.usize, .rsi, switch (src_ptr) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - else => src_ptr, - }); - // Load the pointer, which is stored in memory - try self.asmRegisterMemory(.mov, .rsi, Memory.sib(.qword, .{ .base = .rsi })); - }, - .stack_offset, .ptr_stack_offset => |off| { - try self.asmRegisterMemory(switch (src_ptr) { - .stack_offset => .mov, - .ptr_stack_offset => .lea, - else => unreachable, - }, .rsi, Memory.sib(.qword, .{ - .base = opts.source_stack_base orelse .rbp, - .disp = -off, - })); - }, - .register => |reg| { - try self.asmRegisterRegister( - .mov, - registerAlias(.rsi, @intCast(u32, @divExact(reg.bitSize(), 8))), - reg, - ); - }, - else => { - return self.fail("TODO implement memcpy for setting stack when src is {}", .{src_ptr}); - }, - } - - try self.genSetReg(Type.usize, .rcx, len); + try self.genSetReg(.rdi, Type.usize, dst_ptr); + try self.genSetReg(.rsi, Type.usize, src_ptr); + try self.genSetReg(.rcx, Type.usize, len); _ = try self.addInst(.{ .tag = .movs, .ops = .string, @@ -7313,67 +7519,11 @@ fn genInlineMemcpy( }); } -fn genInlineMemset( - self: *Self, - dst_ptr: MCValue, - value: MCValue, - len: MCValue, - opts: InlineMemcpyOpts, -) InnerError!void { - const dsbase_lock: ?RegisterLock = if (opts.dest_stack_base) |reg| - self.register_manager.lockReg(reg) - else - null; - defer if (dsbase_lock) |lock| self.register_manager.unlockReg(lock); - +fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) InnerError!void { try self.spillRegisters(&.{ .rdi, .al, .rcx }); - - switch (dst_ptr) { - .lea_tlv, - .load_tlv, - => { - try self.genSetReg(Type.usize, .rdi, switch (dst_ptr) { - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => dst_ptr, - }); - }, - .load_got => try self.genSetReg(Type.usize, .rdi, dst_ptr), - .memory, - .lea_direct, - .load_direct, - => { - try self.genSetReg(Type.usize, .rdi, switch (dst_ptr) { - .memory => |addr| .{ .immediate = addr }, - .load_direct => |sym_index| .{ .lea_direct = sym_index }, - else => dst_ptr, - }); - // Load the pointer, which is stored in memory - try self.asmRegisterMemory(.mov, .rdi, Memory.sib(.qword, .{ .base = .rdi })); - }, - .stack_offset, .ptr_stack_offset => |off| { - try self.asmRegisterMemory(switch (dst_ptr) { - .stack_offset => .mov, - .ptr_stack_offset => .lea, - else => unreachable, - }, .rdi, Memory.sib(.qword, .{ - .base = opts.dest_stack_base orelse .rbp, - .disp = -off, - })); - }, - .register => |reg| { - try self.asmRegisterRegister( - .mov, - registerAlias(.rdi, @intCast(u32, @divExact(reg.bitSize(), 8))), - reg, - ); - }, - else => { - return self.fail("TODO implement memset for setting stack when dest is {}", .{dst_ptr}); - }, - } - - try self.genSetReg(Type.u8, .al, value); - try self.genSetReg(Type.usize, .rcx, len); + try self.genSetReg(.rdi, Type.usize, dst_ptr); + try self.genSetReg(.al, Type.u8, value); + try self.genSetReg(.rcx, Type.usize, len); _ = try self.addInst(.{ .tag = .stos, .ops = .string, @@ -7381,307 +7531,6 @@ fn genInlineMemset( }); } -fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void { - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - if (abi_size > 8) return self.fail("genSetReg called with a value larger than one register", .{}); - switch (mcv) { - .dead => unreachable, - .register_overflow => unreachable, - .ptr_stack_offset => |off| { - try self.asmRegisterMemory( - .lea, - registerAlias(reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -off }), - ); - }, - .unreach, .none => return, // Nothing to do. - .undef => { - if (!self.wantSafety()) return; // The already existing value will do just fine. - // Write the debug undefined value. - switch (self.regBitSize(ty)) { - 8 => return self.genSetReg(ty, reg, .{ .immediate = 0xaa }), - 16 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaa }), - 32 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaaaaaa }), - 64 => return self.genSetReg(ty, reg, .{ .immediate = 0xaaaaaaaaaaaaaaaa }), - else => unreachable, - } - }, - .eflags => |cc| { - return self.asmSetccRegister(reg.to8(), cc); - }, - .immediate => |imm| { - if (imm == 0) { - // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit - // register is the fastest way to zero a register. - try self.asmRegisterRegister(.xor, reg.to32(), reg.to32()); - } else if (abi_size > 4 and math.cast(u32, imm) != null) { - // 32-bit moves zero-extend to 64-bit. - try self.asmRegisterImmediate(.mov, reg.to32(), Immediate.u(imm)); - } else if (abi_size <= 4 and @bitCast(i64, imm) < 0) { - try self.asmRegisterImmediate( - .mov, - registerAlias(reg, abi_size), - Immediate.s(@intCast(i32, @bitCast(i64, imm))), - ); - } else { - try self.asmRegisterImmediate( - .mov, - registerAlias(reg, abi_size), - Immediate.u(imm), - ); - } - }, - .register => |src_reg| { - // If the registers are the same, nothing to do. - if (src_reg.id() == reg.id()) - return; - - switch (ty.zigTypeTag()) { - .Int => switch (ty.intInfo(self.target.*).signedness) { - .signed => { - if (abi_size <= 4) { - return self.asmRegisterRegister( - .movsx, - reg.to64(), - registerAlias(src_reg, abi_size), - ); - } - }, - .unsigned => { - if (abi_size <= 2) { - return self.asmRegisterRegister( - .movzx, - reg.to64(), - registerAlias(src_reg, abi_size), - ); - } - }, - }, - .Float => { - if (intrinsicsAllowed(self.target.*, ty)) { - const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .movss, - .f64 => .movsd, - else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}), - }; - return self.asmRegisterRegister(tag, reg.to128(), src_reg.to128()); - } - return self.fail("TODO genSetReg from register for float with no intrinsics", .{}); - }, - else => {}, - } - - try self.asmRegisterRegister(.mov, registerAlias(reg, abi_size), registerAlias(src_reg, abi_size)); - }, - .memory => |addr| switch (ty.zigTypeTag()) { - .Float => { - const base_reg = (try self.register_manager.allocReg(null, gp)).to64(); - try self.genSetReg(Type.usize, base_reg, .{ .immediate = addr }); - - if (intrinsicsAllowed(self.target.*, ty)) { - return self.asmRegisterMemory( - switch (ty.tag()) { - .f32 => .movss, - .f64 => .movsd, - else => return self.fail("TODO genSetReg from memory for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), - }, - reg.to128(), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base_reg }), - ); - } - - return self.fail("TODO genSetReg from memory for float with no intrinsics", .{}); - }, - else => { - if (addr <= math.maxInt(i32)) { - try self.asmRegisterMemory( - .mov, - registerAlias(reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ - .base = .ds, - .disp = @intCast(i32, addr), - }), - ); - } else { - if (reg.to64() == .rax) { - // If this is RAX, we can use a direct load. - // Otherwise, we need to load the address, then indirectly load the value. - _ = try self.addInst(.{ - .tag = .mov_moffs, - .ops = .rax_moffs, - .data = .{ .payload = try self.addExtra(Mir.MemoryMoffs.encode(.ds, addr)) }, - }); - } else { - // Rather than duplicate the logic used for the move, we just use a self-call with a new MCValue. - try self.genSetReg(Type.usize, reg, MCValue{ .immediate = addr }); - try self.asmRegisterMemory( - .mov, - registerAlias(reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = reg.to64() }), - ); - } - } - }, - }, - .load_got, .lea_direct => |sym_index| { - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); - _ = try self.addInst(.{ - .tag = switch (mcv) { - .load_got => .mov_linker, - .lea_direct => .lea_linker, - else => unreachable, - }, - .ops = switch (mcv) { - .load_got => .got_reloc, - .lea_direct => .direct_reloc, - else => unreachable, - }, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(reg), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, - }); - }, - .load_direct => |sym_index| { - switch (ty.zigTypeTag()) { - .Float => { - const addr_reg = (try self.register_manager.allocReg(null, gp)).to64(); - try self.genSetReg(Type.usize, addr_reg, .{ .lea_direct = sym_index }); - - if (intrinsicsAllowed(self.target.*, ty)) { - return self.asmRegisterMemory( - switch (ty.tag()) { - .f32 => .movss, - .f64 => .movsd, - else => return self.fail("TODO genSetReg from memory for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), - }, - reg.to128(), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = addr_reg }), - ); - } - - return self.fail("TODO genSetReg from memory for float with no intrinsics", .{}); - }, - else => { - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); - _ = try self.addInst(.{ - .tag = .mov_linker, - .ops = .direct_reloc, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(registerAlias(reg, abi_size)), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, - }); - }, - } - }, - .lea_tlv => |sym_index| { - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); - if (self.bin_file.cast(link.File.MachO)) |_| { - _ = try self.addInst(.{ - .tag = .lea_linker, - .ops = .tlv_reloc, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(Register.rdi), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, - }); - // TODO: spill registers before calling - try self.asmMemory(.call, Memory.sib(.qword, .{ .base = .rdi })); - try self.genSetReg(Type.usize, reg, .{ .register = .rax }); - } else return self.fail("TODO emit ptr to TLV sequence on {s}", .{@tagName(self.bin_file.tag)}); - }, - .load_tlv => |sym_index| { - const base_reg = switch (ty.zigTypeTag()) { - .Float => (try self.register_manager.allocReg(null, gp)).to64(), - else => reg.to64(), - }; - try self.genSetReg(Type.usize, base_reg, .{ .lea_tlv = sym_index }); - switch (ty.zigTypeTag()) { - .Float => if (intrinsicsAllowed(self.target.*, ty)) { - return self.asmRegisterMemory( - switch (ty.tag()) { - .f32 => .movss, - .f64 => .movsd, - else => return self.fail("TODO genSetReg from memory for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), - }, - reg.to128(), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base_reg }), - ); - } else return self.fail("TODO genSetReg from memory for float with no intrinsics", .{}), - else => try self.asmRegisterMemory( - .mov, - registerAlias(reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base_reg }), - ), - } - }, - .stack_offset => |off| { - switch (ty.zigTypeTag()) { - .Int => switch (ty.intInfo(self.target.*).signedness) { - .signed => { - if (abi_size <= 4) { - return self.asmRegisterMemory( - .movsx, - reg.to64(), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ - .base = .rbp, - .disp = -off, - }), - ); - } - }, - .unsigned => { - if (abi_size <= 2) { - return self.asmRegisterMemory( - .movzx, - reg.to64(), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ - .base = .rbp, - .disp = -off, - }), - ); - } - }, - }, - .Float => { - if (intrinsicsAllowed(self.target.*, ty)) { - const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .movss, - .f64 => .movsd, - else => return self.fail( - "TODO genSetReg from stack offset for {}", - .{ty.fmtDebug()}, - ), - }; - return self.asmRegisterMemory(tag, reg.to128(), Memory.sib( - Memory.PtrSize.fromSize(abi_size), - .{ .base = .rbp, .disp = -off }, - )); - } - return self.fail("TODO genSetReg from stack offset for float with no intrinsics", .{}); - }, - else => {}, - } - - try self.asmRegisterMemory( - .mov, - registerAlias(reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .rbp, .disp = -off }), - ); - }, - } -} - fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result = result: { @@ -7691,7 +7540,7 @@ fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, true); const dst_ty = self.air.typeOfIndex(inst); - try self.setRegOrMem(dst_ty, dst_mcv, src_mcv); + try self.genCopy(dst_ty, dst_mcv, src_mcv); break :result dst_mcv; }; return self.finishAir(inst, result, .{ un_op, .none, .none }); @@ -7699,9 +7548,14 @@ fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void { fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const dst_ty = self.air.typeOfIndex(inst); + const src_ty = self.air.typeOf(ty_op.operand); + const result = result: { + const dst_rc = try self.regClassForType(dst_ty); + const src_rc = try self.regClassForType(src_ty); const operand = try self.resolveInst(ty_op.operand); - if (self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand; + if (dst_rc.eql(src_rc) and self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand; const operand_lock = switch (operand) { .register => |reg| self.register_manager.lockReg(reg), @@ -7711,25 +7565,31 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); const dest = try self.allocRegOrMem(inst, true); - try self.setRegOrMem(self.air.typeOfIndex(inst), dest, operand); + try self.genCopy(self.air.typeOfIndex(inst), dest, operand); break :result dest; }; - log.debug("airBitCast(%{d}): {}", .{ inst, result }); return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; + + const slice_ty = self.air.typeOfIndex(inst); const ptr_ty = self.air.typeOf(ty_op.operand); const ptr = try self.resolveInst(ty_op.operand); const array_ty = ptr_ty.childType(); const array_len = array_ty.arrayLen(); - const stack_offset = @intCast(i32, try self.allocMem(inst, 16, 16)); - try self.genSetStack(ptr_ty, stack_offset, ptr, .{}); - try self.genSetStack(Type.u64, stack_offset - 8, .{ .immediate = array_len }, .{}); + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, self.target.*)); + try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr); + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, ptr_ty.abiSize(self.target.*)), + Type.usize, + .{ .immediate = array_len }, + ); - const result = MCValue{ .stack_offset = stack_offset }; + const result = MCValue{ .load_frame = .{ .index = frame_index } }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -7758,29 +7618,31 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { } // move float src to ST(0) - const stack_offset = switch (operand) { - .stack_offset, .ptr_stack_offset => |offset| offset, - else => blk: { - const offset = @intCast(i32, try self.allocMem( - inst, - src_abi_size, - src_ty.abiAlignment(self.target.*), - )); - try self.genSetStack(src_ty, offset, operand, .{}); - break :blk offset; + const frame_addr: FrameAddr = switch (operand) { + .load_frame => |frame_addr| frame_addr, + else => frame_addr: { + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(src_ty, self.target.*)); + try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, operand); + break :frame_addr .{ .index = frame_index }; }, }; - try self.asmMemory(.fld, Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{ - .base = .rbp, - .disp = -stack_offset, - })); + try self.asmMemory( + .fld, + Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }), + ); // convert const stack_dst = try self.allocRegOrMem(inst, false); - try self.asmMemory(.fisttp, Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{ - .base = .rbp, - .disp = -stack_dst.stack_offset, - })); + try self.asmMemory( + .fisttp, + Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{ + .base = .{ .frame = stack_dst.load_frame.index }, + .disp = stack_dst.load_frame.off, + }), + ); return self.finishAir(inst, stack_dst, .{ ty_op.operand, .none, .none }); } @@ -7800,20 +7662,32 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { const exp_mcv = try self.resolveInst(extra.expected_value); if (val_abi_size > 8) switch (exp_mcv) { - .stack_offset => |exp_off| { - try self.genSetReg(Type.usize, .rax, .{ .stack_offset = exp_off - 0 }); - try self.genSetReg(Type.usize, .rdx, .{ .stack_offset = exp_off - 8 }); + .load_frame => |frame_addr| { + try self.genSetReg(.rax, Type.usize, .{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + 0, + } }); + try self.genSetReg(.rdx, Type.usize, .{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + 8, + } }); }, else => return self.fail("TODO implement cmpxchg for {s}", .{@tagName(exp_mcv)}), - } else try self.genSetReg(val_ty, .rax, exp_mcv); + } else try self.genSetReg(.rax, val_ty, exp_mcv); const rax_lock = self.register_manager.lockRegAssumeUnused(.rax); defer self.register_manager.unlockReg(rax_lock); const new_mcv = try self.resolveInst(extra.new_value); const new_reg: Register = if (val_abi_size > 8) switch (new_mcv) { - .stack_offset => |new_off| new: { - try self.genSetReg(Type.usize, .rbx, .{ .stack_offset = new_off - 0 }); - try self.genSetReg(Type.usize, .rcx, .{ .stack_offset = new_off - 8 }); + .load_frame => |frame_addr| new: { + try self.genSetReg(.rbx, Type.usize, .{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + 0, + } }); + try self.genSetReg(.rcx, Type.usize, .{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + 8, + } }); break :new undefined; }, else => return self.fail("TODO implement cmpxchg for {s}", .{@tagName(exp_mcv)}), @@ -7823,11 +7697,19 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { const ptr_size = Memory.PtrSize.fromSize(val_abi_size); const ptr_mem = switch (ptr_mcv) { - .register => |reg| Memory.sib(ptr_size, .{ .base = reg }), - .ptr_stack_offset => |off| Memory.sib(ptr_size, .{ .base = .rbp, .disp = -off }), - else => Memory.sib(ptr_size, .{ .base = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }), + .register => |reg| Memory.sib(ptr_size, .{ .base = .{ .reg = reg } }), + .lea_frame => |frame_addr| Memory.sib(ptr_size, .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }), + else => Memory.sib(ptr_size, .{ .base = .{ + .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv), + } }), + }; + const mem_lock = switch (ptr_mem.base()) { + .none, .frame => null, + .reg => |reg| self.register_manager.lockReg(reg), }; - const mem_lock = if (ptr_mem.base()) |reg| self.register_manager.lockReg(reg) else null; defer if (mem_lock) |lock| self.register_manager.unlockReg(lock); try self.spillEflagsIfOccupied(); @@ -7851,9 +7733,24 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { } const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genSetStack(Type.bool, dst_mcv.stack_offset - 16, .{ .eflags = .ne }, .{}); - try self.genSetStack(Type.usize, dst_mcv.stack_offset - 8, .{ .register = .rdx }, .{}); - try self.genSetStack(Type.usize, dst_mcv.stack_offset - 0, .{ .register = .rax }, .{}); + try self.genSetMem( + .{ .frame = dst_mcv.load_frame.index }, + dst_mcv.load_frame.off + 16, + Type.bool, + .{ .eflags = .ne }, + ); + try self.genSetMem( + .{ .frame = dst_mcv.load_frame.index }, + dst_mcv.load_frame.off + 8, + Type.usize, + .{ .register = .rdx }, + ); + try self.genSetMem( + .{ .frame = dst_mcv.load_frame.index }, + dst_mcv.load_frame.off + 0, + Type.usize, + .{ .register = .rax }, + ); break :result dst_mcv; }; return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value }); @@ -7884,11 +7781,19 @@ fn atomicOp( const val_abi_size = @intCast(u32, val_ty.abiSize(self.target.*)); const ptr_size = Memory.PtrSize.fromSize(val_abi_size); const ptr_mem = switch (ptr_mcv) { - .register => |reg| Memory.sib(ptr_size, .{ .base = reg }), - .ptr_stack_offset => |off| Memory.sib(ptr_size, .{ .base = .rbp, .disp = -off }), - else => Memory.sib(ptr_size, .{ .base = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }), + .register => |reg| Memory.sib(ptr_size, .{ .base = .{ .reg = reg } }), + .lea_frame => |frame_addr| Memory.sib(ptr_size, .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }), + else => Memory.sib(ptr_size, .{ .base = .{ + .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv), + } }), + }; + const mem_lock = switch (ptr_mem.base()) { + .none, .frame => null, + .reg => |reg| self.register_manager.lockReg(reg), }; - const mem_lock = if (ptr_mem.base()) |reg| self.register_manager.lockReg(reg) else null; defer if (mem_lock) |lock| self.register_manager.unlockReg(lock); const method: enum { lock, loop, libcall } = if (val_ty.isRuntimeFloat()) @@ -7928,7 +7833,7 @@ fn atomicOp( const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_lock); - try self.genSetReg(val_ty, dst_reg, val_mcv); + try self.genSetReg(dst_reg, val_ty, val_mcv); if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) { try self.genUnOpMir(.neg, val_ty, dst_mcv); } @@ -7952,10 +7857,10 @@ fn atomicOp( try self.asmRegisterMemory(.mov, registerAlias(.rax, val_abi_size), ptr_mem); const loop = @intCast(u32, self.mir_instructions.len); if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { - try self.genSetReg(val_ty, tmp_reg, .{ .register = .rax }); + try self.genSetReg(tmp_reg, val_ty, .{ .register = .rax }); } if (rmw_op) |op| switch (op) { - .Xchg => try self.genSetReg(val_ty, tmp_reg, val_mcv), + .Xchg => try self.genSetReg(tmp_reg, val_ty, val_mcv), .Add => try self.genBinOpMir(.add, val_ty, tmp_mcv, val_mcv), .Sub => try self.genBinOpMir(.sub, val_ty, tmp_mcv, val_mcv), .And => try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv), @@ -7990,12 +7895,12 @@ fn atomicOp( registerAlias(val_reg, cmov_abi_size), cc, ), - .stack_offset => |val_off| try self.asmCmovccRegisterMemory( + .load_frame => |frame_addr| try self.asmCmovccRegisterMemory( registerAlias(tmp_reg, cmov_abi_size), - Memory.sib( - Memory.PtrSize.fromSize(cmov_abi_size), - .{ .base = .rbp, .disp = -val_off }, - ), + Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }), cc, ), else => { @@ -8028,9 +7933,15 @@ fn atomicOp( })); const loop = @intCast(u32, self.mir_instructions.len); switch (val_mcv) { - .stack_offset => |val_off| { - const val_lo_mem = Memory.sib(.qword, .{ .base = .rbp, .disp = 0 - val_off }); - const val_hi_mem = Memory.sib(.qword, .{ .base = .rbp, .disp = 8 - val_off }); + .load_frame => |frame_addr| { + const val_lo_mem = Memory.sib(.qword, .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off + 0, + }); + const val_hi_mem = Memory.sib(.qword, .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off + 8, + }); if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { try self.asmRegisterRegister(.mov, .rbx, .rax); @@ -8087,12 +7998,18 @@ fn atomicOp( const dst_mcv = try self.allocTempRegOrMem(val_ty, false); try self.asmMemoryRegister( .mov, - Memory.sib(.qword, .{ .base = .rbp, .disp = 0 - dst_mcv.stack_offset }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_mcv.load_frame.index }, + .disp = dst_mcv.load_frame.off + 0, + }), .rax, ); try self.asmMemoryRegister( .mov, - Memory.sib(.qword, .{ .base = .rbp, .disp = 8 - dst_mcv.stack_offset }), + Memory.sib(.qword, .{ + .base = .{ .frame = dst_mcv.load_frame.index }, + .disp = dst_mcv.load_frame.off + 8, + }), .rdx, ); return dst_mcv; @@ -8139,7 +8056,7 @@ fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void { else try self.allocRegOrMem(inst, true); - try self.load(dst_mcv, ptr_mcv, ptr_ty); + try self.load(dst_mcv, ptr_ty, ptr_mcv); return self.finishAir(inst, dst_mcv, .{ atomic_load.ptr, .none, .none }); } @@ -8184,16 +8101,16 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const elem_abi_size = @intCast(u31, elem_ty.abiSize(self.target.*)); if (elem_abi_size == 1) { - const ptr = switch (dst_ptr_ty.ptrSize()) { + const ptr: MCValue = switch (dst_ptr_ty.ptrSize()) { // TODO: this only handles slices stored in the stack - .Slice => @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 0 }), + .Slice => dst_ptr, .One => dst_ptr, .C, .Many => unreachable, }; - const len = switch (dst_ptr_ty.ptrSize()) { + const len: MCValue = switch (dst_ptr_ty.ptrSize()) { // TODO: this only handles slices stored in the stack - .Slice => @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 }), - .One => @as(MCValue, .{ .immediate = dst_ptr_ty.childType().arrayLen() }), + .Slice => dst_ptr.address().offset(8).deref(), + .One => .{ .immediate = dst_ptr_ty.childType().arrayLen() }, .C, .Many => unreachable, }; const len_lock: ?RegisterLock = switch (len) { @@ -8202,7 +8119,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { }; defer if (len_lock) |lock| self.register_manager.unlockReg(lock); - try self.genInlineMemset(ptr, src_val, len, .{}); + try self.genInlineMemset(ptr, src_val, len); return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -8215,8 +8132,8 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const slice_ptr_ty = dst_ptr_ty.slicePtrFieldType(&buf); // TODO: this only handles slices stored in the stack - const ptr = @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 0 }); - const len = @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 }); + const ptr = dst_ptr; + const len = dst_ptr.address().offset(8).deref(); // Used to store the number of elements for comparison. // After comparison, updated to store number of bytes needed to copy. @@ -8225,55 +8142,51 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const len_lock = self.register_manager.lockRegAssumeUnused(len_reg); defer self.register_manager.unlockReg(len_lock); - try self.asmRegisterMemory(.mov, len_reg, Memory.sib(.qword, .{ - .base = .rbp, - .disp = -len.stack_offset, - })); + try self.genSetReg(len_reg, Type.usize, len); const skip_reloc = try self.asmJccReloc(undefined, .z); - try self.store(ptr, src_val, slice_ptr_ty, elem_ty); + try self.store(slice_ptr_ty, ptr, src_val); const second_elem_ptr_reg = try self.register_manager.allocReg(null, gp); const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg }; const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); defer self.register_manager.unlockReg(second_elem_ptr_lock); - try self.asmRegisterMemory( - .lea, - second_elem_ptr_reg, - Memory.sib(.qword, .{ - .base = try self.copyToTmpRegister(Type.usize, ptr), - .disp = elem_abi_size, - }), - ); + try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{ + .reg = try self.copyToTmpRegister(Type.usize, ptr), + .off = elem_abi_size, + } }); try self.genBinOpMir(.sub, Type.usize, len_mcv, .{ .immediate = 1 }); try self.asmRegisterRegisterImmediate(.imul, len_reg, len_reg, Immediate.u(elem_abi_size)); - try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv, .{}); + try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv); try self.performReloc(skip_reloc); }, .One => { + var elem_ptr_pl = Type.Payload.ElemType{ + .base = .{ .tag = .single_mut_pointer }, + .data = elem_ty, + }; + const elem_ptr_ty = Type.initPayload(&elem_ptr_pl.base); + const len = dst_ptr_ty.childType().arrayLen(); + assert(len != 0); // prevented by Sema - try self.store(dst_ptr, src_val, dst_ptr_ty, elem_ty); + try self.store(elem_ptr_ty, dst_ptr, src_val); const second_elem_ptr_reg = try self.register_manager.allocReg(null, gp); const second_elem_ptr_mcv: MCValue = .{ .register = second_elem_ptr_reg }; const second_elem_ptr_lock = self.register_manager.lockRegAssumeUnused(second_elem_ptr_reg); defer self.register_manager.unlockReg(second_elem_ptr_lock); - try self.asmRegisterMemory( - .lea, - second_elem_ptr_reg, - Memory.sib(.qword, .{ - .base = try self.copyToTmpRegister(Type.usize, dst_ptr), - .disp = elem_abi_size, - }), - ); + try self.genSetReg(second_elem_ptr_reg, Type.usize, .{ .register_offset = .{ + .reg = try self.copyToTmpRegister(Type.usize, dst_ptr), + .off = elem_abi_size, + } }); const bytes_to_copy: MCValue = .{ .immediate = elem_abi_size * (len - 1) }; - try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy, .{}); + try self.genInlineMemcpy(second_elem_ptr_mcv, dst_ptr, bytes_to_copy); }, .C, .Many => unreachable, } @@ -8299,9 +8212,9 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { }; defer if (src_ptr_lock) |lock| self.register_manager.unlockReg(lock); - const len = switch (dst_ptr_ty.ptrSize()) { - .Slice => @as(MCValue, .{ .stack_offset = dst_ptr.stack_offset - 8 }), - .One => @as(MCValue, .{ .immediate = dst_ptr_ty.childType().arrayLen() }), + const len: MCValue = switch (dst_ptr_ty.ptrSize()) { + .Slice => dst_ptr.address().offset(8).deref(), + .One => .{ .immediate = dst_ptr_ty.childType().arrayLen() }, .C, .Many => unreachable, }; const len_lock: ?RegisterLock = switch (len) { @@ -8311,7 +8224,7 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { defer if (len_lock) |lock| self.register_manager.unlockReg(lock); // TODO: dst_ptr and src_ptr could be slices rather than raw pointers - try self.genInlineMemcpy(dst_ptr, src_ptr, len, .{}); + try self.genInlineMemcpy(dst_ptr, src_ptr, len); return self.finishAir(inst, .unreach, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -8345,24 +8258,25 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { const atom = elf_file.getAtom(atom_index); _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = atom.getOffsetTableAddress(elf_file); - try self.asmRegisterMemory(.mov, addr_reg.to64(), Memory.sib(.qword, .{ - .base = .ds, - .disp = @intCast(i32, got_addr), - })); + try self.asmRegisterMemory( + .mov, + addr_reg.to64(), + Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }), + ); } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const atom_index = try coff_file.getOrCreateAtomForLazySymbol( .{ .kind = .const_data, .ty = Type.anyerror }, 4, // dword alignment ); const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(Type.usize, addr_reg, .{ .load_got = sym_index }); + try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { const atom_index = try macho_file.getOrCreateAtomForLazySymbol( .{ .kind = .const_data, .ty = Type.anyerror }, 4, // dword alignment ); const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(Type.usize, addr_reg, .{ .load_got = sym_index }); + try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); } else { return self.fail("TODO implement airErrorName for x86_64 {s}", .{@tagName(self.bin_file.tag)}); } @@ -8377,36 +8291,60 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { try self.truncateRegister(err_ty, err_reg.to32()); - try self.asmRegisterMemory(.mov, start_reg.to32(), Memory.sib(.dword, .{ - .base = addr_reg.to64(), - .scale_index = .{ .scale = 4, .index = err_reg.to64() }, - .disp = 4, - })); - try self.asmRegisterMemory(.mov, end_reg.to32(), Memory.sib(.dword, .{ - .base = addr_reg.to64(), - .scale_index = .{ .scale = 4, .index = err_reg.to64() }, - .disp = 8, - })); + try self.asmRegisterMemory( + .mov, + start_reg.to32(), + Memory.sib(.dword, .{ + .base = .{ .reg = addr_reg.to64() }, + .scale_index = .{ .scale = 4, .index = err_reg.to64() }, + .disp = 4, + }), + ); + try self.asmRegisterMemory( + .mov, + end_reg.to32(), + Memory.sib(.dword, .{ + .base = .{ .reg = addr_reg.to64() }, + .scale_index = .{ .scale = 4, .index = err_reg.to64() }, + .disp = 8, + }), + ); try self.asmRegisterRegister(.sub, end_reg.to32(), start_reg.to32()); - try self.asmRegisterMemory(.lea, start_reg.to64(), Memory.sib(.byte, .{ - .base = addr_reg.to64(), - .scale_index = .{ .scale = 1, .index = start_reg.to64() }, - .disp = 0, - })); - try self.asmRegisterMemory(.lea, end_reg.to32(), Memory.sib(.byte, .{ - .base = end_reg.to64(), - .disp = -1, - })); + try self.asmRegisterMemory( + .lea, + start_reg.to64(), + Memory.sib(.byte, .{ + .base = .{ .reg = addr_reg.to64() }, + .scale_index = .{ .scale = 1, .index = start_reg.to64() }, + .disp = 0, + }), + ); + try self.asmRegisterMemory( + .lea, + end_reg.to32(), + Memory.sib(.byte, .{ + .base = .{ .reg = end_reg.to64() }, + .disp = -1, + }), + ); const dst_mcv = try self.allocRegOrMem(inst, false); - try self.asmMemoryRegister(.mov, Memory.sib(.qword, .{ - .base = .rbp, - .disp = 0 - dst_mcv.stack_offset, - }), start_reg.to64()); - try self.asmMemoryRegister(.mov, Memory.sib(.qword, .{ - .base = .rbp, - .disp = 8 - dst_mcv.stack_offset, - }), end_reg.to64()); + try self.asmMemoryRegister( + .mov, + Memory.sib(.qword, .{ + .base = .{ .frame = dst_mcv.load_frame.index }, + .disp = dst_mcv.load_frame.off, + }), + start_reg.to64(), + ); + try self.asmMemoryRegister( + .mov, + Memory.sib(.qword, .{ + .base = .{ .frame = dst_mcv.load_frame.index }, + .disp = dst_mcv.load_frame.off + 8, + }), + end_reg.to64(), + ); return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } @@ -8445,19 +8383,17 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { const len = @intCast(usize, result_ty.arrayLen()); const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const elements = @ptrCast([]const Air.Inst.Ref, self.air.extra[ty_pl.payload..][0..len]); - const abi_size = @intCast(u32, result_ty.abiSize(self.target.*)); - const abi_align = result_ty.abiAlignment(self.target.*); const result: MCValue = result: { switch (result_ty.zigTypeTag()) { .Struct => { - const stack_offset = @intCast(i32, try self.allocMem(inst, abi_size, abi_align)); + const frame_index = + try self.allocFrameIndex(FrameAlloc.initType(result_ty, self.target.*)); if (result_ty.containerLayout() == .Packed) { const struct_obj = result_ty.castTag(.@"struct").?.data; try self.genInlineMemset( - .{ .ptr_stack_offset = stack_offset }, + .{ .lea_frame = .{ .index = frame_index } }, .{ .immediate = 0 }, - .{ .immediate = abi_size }, - .{}, + .{ .immediate = result_ty.abiSize(self.target.*) }, ); for (elements, 0..) |elem, elem_i| { if (result_ty.structFieldValueComptime(elem_i) != null) continue; @@ -8465,7 +8401,10 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { const elem_ty = result_ty.structFieldType(elem_i); const elem_bit_size = @intCast(u32, elem_ty.bitSize(self.target.*)); if (elem_bit_size > 64) { - return self.fail("TODO airAggregateInit implement packed structs with large fields", .{}); + return self.fail( + "TODO airAggregateInit implement packed structs with large fields", + .{}, + ); } const elem_abi_size = @intCast(u32, elem_ty.abiSize(self.target.*)); const elem_abi_bits = elem_abi_size * 8; @@ -8503,7 +8442,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { try self.genBinOpMir( .@"or", elem_ty, - .{ .stack_offset = stack_offset - elem_byte_off }, + .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } }, .{ .register = elem_reg }, ); if (elem_bit_off > elem_extra_bits) { @@ -8520,8 +8459,10 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { try self.genBinOpMir( .@"or", elem_ty, - .{ .stack_offset = stack_offset - elem_byte_off - - @intCast(i32, elem_abi_size) }, + .{ .load_frame = .{ + .index = frame_index, + .off = elem_byte_off + @intCast(i32, elem_abi_size), + } }, .{ .register = reg }, ); } @@ -8536,12 +8477,13 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index }, else => elem_mcv, }; - try self.genSetStack(elem_ty, stack_offset - elem_off, mat_elem_mcv, .{}); + try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv); } - break :result .{ .stack_offset = stack_offset }; + break :result .{ .load_frame = .{ .index = frame_index } }; }, .Array => { - const stack_offset = @intCast(i32, try self.allocMem(inst, abi_size, abi_align)); + const frame_index = + try self.allocFrameIndex(FrameAlloc.initType(result_ty, self.target.*)); const elem_ty = result_ty.childType(); const elem_size = @intCast(u32, elem_ty.abiSize(self.target.*)); @@ -8552,9 +8494,9 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { else => elem_mcv, }; const elem_off = @intCast(i32, elem_size * elem_i); - try self.genSetStack(elem_ty, stack_offset - elem_off, mat_elem_mcv, .{}); + try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv); } - break :result MCValue{ .stack_offset = stack_offset }; + break :result .{ .load_frame = .{ .index = frame_index } }; }, .Vector => return self.fail("TODO implement aggregate_init for vectors", .{}), else => unreachable, @@ -8596,7 +8538,7 @@ fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { const ty = self.air.typeOf(ref); // If the type has no codegen bits, no need to store it. - if (!ty.hasRuntimeBitsIgnoreComptime() and !ty.isError()) return .none; + if (!ty.hasRuntimeBitsIgnoreComptime()) return .none; if (Air.refToIndex(ref)) |inst| { const mcv = switch (self.air.instructions.items(.tag)[inst]) { @@ -8666,8 +8608,8 @@ fn genTypedValue(self: *Self, arg_tv: TypedValue) InnerError!MCValue { .immediate => |imm| .{ .immediate = imm }, .memory => |addr| .{ .memory = addr }, .load_direct => |sym_index| .{ .load_direct = sym_index }, - .load_got => |sym_index| .{ .load_got = sym_index }, - .load_tlv => |sym_index| .{ .load_tlv = sym_index }, + .load_got => |sym_index| .{ .lea_got = sym_index }, + .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, }, .fail => |msg| { self.err_msg = msg; @@ -8679,9 +8621,9 @@ fn genTypedValue(self: *Self, arg_tv: TypedValue) InnerError!MCValue { const CallMCValues = struct { args: []MCValue, - return_value: MCValue, - stack_byte_count: u32, - stack_align: u32, + return_value: InstTracking, + stack_byte_count: u31, + stack_align: u31, fn deinit(self: *CallMCValues, func: *Self) void { func.gpa.free(self.args); @@ -8694,6 +8636,7 @@ fn resolveCallingConventionValues( self: *Self, fn_ty: Type, var_args: []const Air.Inst.Ref, + stack_frame_base: FrameIndex, ) !CallMCValues { const cc = fn_ty.fnCallingConvention(); const param_len = fn_ty.fnParamLen(); @@ -8706,7 +8649,7 @@ fn resolveCallingConventionValues( .args = try self.gpa.alloc(MCValue, param_types.len), // These undefined values must be populated before returning from this function. .return_value = undefined, - .stack_byte_count = undefined, + .stack_byte_count = 0, .stack_align = undefined, }; errdefer self.gpa.free(result.args); @@ -8716,40 +8659,46 @@ fn resolveCallingConventionValues( switch (cc) { .Naked => { assert(result.args.len == 0); - result.return_value = .unreach; - result.stack_byte_count = 0; - result.stack_align = 1; - return result; + result.return_value = InstTracking.init(.unreach); + result.stack_align = 8; }, .C => { + var param_reg_i: usize = 0; + result.stack_align = 16; + + switch (self.target.os.tag) { + .windows => { + // Align the stack to 16bytes before allocating shadow stack space (if any). + result.stack_byte_count += @intCast(u31, 4 * Type.usize.abiSize(self.target.*)); + }, + else => {}, + } + // Return values if (ret_ty.zigTypeTag() == .NoReturn) { - result.return_value = .unreach; - } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) { + result.return_value = InstTracking.init(.unreach); + } else if (!ret_ty.hasRuntimeBitsIgnoreComptime()) { // TODO: is this even possible for C calling convention? - result.return_value = .none; + result.return_value = InstTracking.init(.none); } else { - const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); - if (ret_ty_size == 0) { - assert(ret_ty.isError()); - result.return_value = .{ .immediate = 0 }; - } else if (ret_ty_size <= 8) { - const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size); - result.return_value = .{ .register = aliased_reg }; + const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0]; + const ret_ty_size = @intCast(u31, ret_ty.abiSize(self.target.*)); + if (ret_ty_size <= 8) { + const aliased_reg = registerAlias(ret_reg, ret_ty_size); + result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none }; } else { - // TODO: return argument cell should go first - result.return_value = .{ .stack_offset = 0 }; + const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i]; + param_reg_i += 1; + result.return_value = .{ + .short = .{ .indirect = .{ .reg = ret_reg } }, + .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, + }; } } // Input params - var next_stack_offset: u32 = switch (result.return_value) { - .stack_offset => |off| @intCast(u32, off), - else => 0, - }; - - for (param_types, result.args, 0..) |ty, *arg, i| { - assert(ty.hasRuntimeBits()); + for (param_types, result.args) |ty, *arg| { + assert(ty.hasRuntimeBitsIgnoreComptime()); const classes: []const abi.Class = switch (self.target.os.tag) { .windows => &[1]abi.Class{abi.classifyWindows(ty, self.target.*)}, @@ -8760,8 +8709,10 @@ fn resolveCallingConventionValues( } switch (classes[0]) { .integer => blk: { - if (i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk; // fallthrough - arg.* = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[i] }; + if (param_reg_i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk; + const param_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i]; + param_reg_i += 1; + arg.* = .{ .register = param_reg }; continue; }, .memory => {}, // fallthrough @@ -8770,82 +8721,61 @@ fn resolveCallingConventionValues( }), } - const param_size = @intCast(u32, ty.abiSize(self.target.*)); - const param_align = @intCast(u32, ty.abiAlignment(self.target.*)); - const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align); - arg.* = .{ .stack_offset = @intCast(i32, offset) }; - next_stack_offset = offset; + const param_size = @intCast(u31, ty.abiSize(self.target.*)); + const param_align = @intCast(u31, ty.abiAlignment(self.target.*)); + result.stack_byte_count = + mem.alignForwardGeneric(u31, result.stack_byte_count, param_align); + arg.* = .{ .load_frame = .{ + .index = stack_frame_base, + .off = result.stack_byte_count, + } }; + result.stack_byte_count += param_size; } - - // Align the stack to 16bytes before allocating shadow stack space (if any). - const aligned_next_stack_offset = mem.alignForwardGeneric(u32, next_stack_offset, 16); - const padding = aligned_next_stack_offset - next_stack_offset; - if (padding > 0) { - for (result.args) |*arg| { - if (arg.isRegister()) continue; - arg.stack_offset += @intCast(i32, padding); - } - } - - const shadow_stack_space: u32 = switch (self.target.os.tag) { - .windows => @intCast(u32, 4 * @sizeOf(u64)), - else => 0, - }; - - // alignment padding | args ... | shadow stack space (if any) | ret addr | $rbp | - result.stack_byte_count = aligned_next_stack_offset + shadow_stack_space; - result.stack_align = 16; }, .Unspecified => { + result.stack_align = 16; + // Return values if (ret_ty.zigTypeTag() == .NoReturn) { - result.return_value = .unreach; - } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) { - result.return_value = .none; + result.return_value = InstTracking.init(.unreach); + } else if (!ret_ty.hasRuntimeBitsIgnoreComptime()) { + result.return_value = InstTracking.init(.none); } else { - const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); - if (ret_ty_size == 0) { - assert(ret_ty.isError()); - result.return_value = .{ .immediate = 0 }; - } else if (ret_ty_size <= 8 and !ret_ty.isRuntimeFloat()) { - const aliased_reg = registerAlias(abi.getCAbiIntReturnRegs(self.target.*)[0], ret_ty_size); - result.return_value = .{ .register = aliased_reg }; + const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0]; + const ret_ty_size = @intCast(u31, ret_ty.abiSize(self.target.*)); + if (ret_ty_size <= 8 and !ret_ty.isRuntimeFloat()) { + const aliased_reg = registerAlias(ret_reg, ret_ty_size); + result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none }; } else { - // We simply make the return MCValue a stack offset. However, the actual value - // for the offset will be populated later. We will also push the stack offset - // value into an appropriate register when we resolve the offset. - result.return_value = .{ .stack_offset = 0 }; + const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[0]; + result.return_value = .{ + .short = .{ .indirect = .{ .reg = ret_reg } }, + .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, + }; } } // Input params - var next_stack_offset: u32 = switch (result.return_value) { - .stack_offset => |off| @intCast(u32, off), - else => 0, - }; - for (param_types, result.args) |ty, *arg| { - if (!ty.hasRuntimeBits()) { + if (!ty.hasRuntimeBitsIgnoreComptime()) { arg.* = .none; continue; } - const param_size = @intCast(u32, ty.abiSize(self.target.*)); - const param_align = @intCast(u32, ty.abiAlignment(self.target.*)); - const offset = mem.alignForwardGeneric(u32, next_stack_offset + param_size, param_align); - arg.* = .{ .stack_offset = @intCast(i32, offset) }; - next_stack_offset = offset; + const param_size = @intCast(u31, ty.abiSize(self.target.*)); + const param_align = @intCast(u31, ty.abiAlignment(self.target.*)); + result.stack_byte_count = + mem.alignForwardGeneric(u31, result.stack_byte_count, param_align); + arg.* = .{ .load_frame = .{ + .index = stack_frame_base, + .off = result.stack_byte_count, + } }; + result.stack_byte_count += param_size; } - - result.stack_align = 16; - // TODO fix this so that the 16byte alignment padding is at the current value of $rsp, and push - // the args onto the stack so that there is no padding between the first argument and - // the standard preamble. - // alignment padding | args ... | ret addr | $rbp | - result.stack_byte_count = mem.alignForwardGeneric(u32, next_stack_offset, result.stack_align); }, else => return self.fail("TODO implement function parameters and return values for {} on x86_64", .{cc}), } + result.stack_byte_count = mem.alignForwardGeneric(u31, result.stack_byte_count, result.stack_align); return result; } @@ -8953,15 +8883,6 @@ fn regExtraBits(self: *Self, ty: Type) u64 { return self.regBitSize(ty) - ty.bitSize(self.target.*); } -fn intrinsicsAllowed(target: Target, ty: Type) bool { - return switch (ty.tag()) { - .f32, - .f64, - => Target.x86.featureSetHasAny(target.cpu.features, .{ .sse2, .avx, .avx2 }), - else => unreachable, // TODO finish this off - }; -} - fn hasAvxSupport(target: Target) bool { return Target.x86.featureSetHasAny(target.cpu.features, .{ .avx, .avx2 }); } diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index a81840be47..c6c8f7995c 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -13,7 +13,9 @@ prev_di_pc: usize, code_offset_mapping: std.AutoHashMapUnmanaged(Mir.Inst.Index, usize) = .{}, relocs: std.ArrayListUnmanaged(Reloc) = .{}, -pub const Error = Lower.Error || error{EmitFail}; +pub const Error = Lower.Error || error{ + EmitFail, +}; pub fn emitMir(emit: *Emit) Error!void { for (0..emit.lower.mir.instructions.len) |i| { @@ -22,7 +24,7 @@ pub fn emitMir(emit: *Emit) Error!void { const start_offset = @intCast(u32, emit.code.items.len); try emit.code_offset_mapping.putNoClobber(emit.lower.allocator, index, start_offset); - for (try emit.lower.lowerMir(inst)) |lower_inst| try lower_inst.encode(emit.code.writer()); + for (try emit.lower.lowerMir(inst)) |lower_inst| try lower_inst.encode(emit.code.writer(), .{}); const end_offset = @intCast(u32, emit.code.items.len); switch (inst.tag) { @@ -120,11 +122,10 @@ pub fn emitMir(emit: *Emit) Error!void { .length = 6, }), - .dbg_line => { - const dbg_line_column = - emit.lower.mir.extraData(Mir.DbgLineColumn, inst.data.payload).data; - try emit.dbgAdvancePCAndLine(dbg_line_column.line, dbg_line_column.column); - }, + .dbg_line => try emit.dbgAdvancePCAndLine( + inst.data.line_column.line, + inst.data.line_column.column, + ), .dbg_prologue_end => { switch (emit.debug_output) { diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 1361570405..21899b912b 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -47,17 +47,8 @@ pub fn findByMnemonic( }, else => {}, } else false; - const rex_extended = for (ops) |op| switch (op) { - .reg => |r| if (r.isExtended()) break true, - .mem => |m| { - if (m.base()) |base| { - if (base.isExtended()) break true; - } - if (m.scaleIndex()) |si| { - if (si.index.isExtended()) break true; - } - }, - else => {}, + const rex_extended = for (ops) |op| { + if (op.isBaseExtended() or op.isIndexExtended()) break true; } else false; if ((rex_required or rex_extended) and rex_invalid) return error.CannotEncode; @@ -67,11 +58,11 @@ pub fn findByMnemonic( next: for (mnemonic_to_encodings_map[@enumToInt(mnemonic)]) |data| { switch (data.mode) { .rex => if (!rex_required) continue, - .long => {}, + .long, .sse2_long => {}, else => if (rex_required) continue, } for (input_ops, data.ops) |input_op, data_op| - if (!input_op.isSubset(data_op, data.mode)) continue :next; + if (!input_op.isSubset(data_op)) continue :next; const enc = Encoding{ .mnemonic = mnemonic, .data = data }; if (shortest_enc) |previous_shortest_enc| { @@ -99,7 +90,7 @@ pub fn findByOpcode(opc: []const u8, prefixes: struct { if (prefixes.rex.w) { switch (data.mode) { .short, .fpu, .sse, .sse2, .sse4_1, .none => continue, - .long, .rex => {}, + .long, .sse2_long, .rex => {}, } } else if (prefixes.rex.present and !prefixes.rex.isSet()) { switch (data.mode) { @@ -147,7 +138,7 @@ pub fn modRmExt(encoding: Encoding) u3 { pub fn operandBitSize(encoding: Encoding) u64 { switch (encoding.data.mode) { .short => return 16, - .long => return 64, + .long, .sse2_long => return 64, else => {}, } const bit_size: u64 = switch (encoding.data.op_en) { @@ -172,7 +163,7 @@ pub fn format( _ = options; _ = fmt; switch (encoding.data.mode) { - .long => try writer.writeAll("REX.W + "), + .long, .sse2_long => try writer.writeAll("REX.W + "), else => {}, } @@ -273,6 +264,8 @@ pub const Mnemonic = enum { @"test", tzcnt, ud2, xadd, xchg, xor, + // MMX + movd, // SSE addss, cmpss, @@ -287,7 +280,7 @@ pub const Mnemonic = enum { //cmpsd, divsd, maxsd, minsd, - movq, //movsd, + movq, //movd, movsd, mulsd, subsd, ucomisd, @@ -470,6 +463,17 @@ pub const Op = enum { }; } + pub fn class(op: Op) bits.Register.Class { + return switch (op) { + else => unreachable, + .al, .ax, .eax, .rax, .cl => .general_purpose, + .r8, .r16, .r32, .r64 => .general_purpose, + .rm8, .rm16, .rm32, .rm64 => .general_purpose, + .sreg => .segment, + .xmm, .xmm_m32, .xmm_m64 => .floating_point, + }; + } + pub fn isFloatingPointRegister(op: Op) bool { return switch (op) { .xmm, .xmm_m32, .xmm_m64 => true, @@ -478,7 +482,7 @@ pub const Op = enum { } /// Given an operand `op` checks if `target` is a subset for the purposes of the encoding. - pub fn isSubset(op: Op, target: Op, mode: Mode) bool { + pub fn isSubset(op: Op, target: Op) bool { switch (op) { .m, .o16, .o32, .o64 => unreachable, .moffs, .sreg => return op == target, @@ -488,13 +492,13 @@ pub const Op = enum { }, else => { if (op.isRegister() and target.isRegister()) { - switch (mode) { - .sse, .sse2, .sse4_1 => return op.isFloatingPointRegister() and target.isFloatingPointRegister(), - else => switch (target) { - .cl, .al, .ax, .eax, .rax => return op == target, - else => return op.bitSize() == target.bitSize(), + return switch (target) { + .cl, .al, .ax, .eax, .rax => op == target, + else => op.class() == target.class() and switch (target.class()) { + .floating_point => true, + else => op.bitSize() == target.bitSize(), }, - } + }; } if (op.isMemory() and target.isMemory()) { switch (target) { @@ -532,6 +536,7 @@ pub const Mode = enum { long, sse, sse2, + sse2_long, sse4_1, }; @@ -544,7 +549,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op std.mem.copy(Operand, &inst.ops, ops); var cwriter = std.io.countingWriter(std.io.null_writer); - inst.encode(cwriter.writer()) catch unreachable; // Not allowed to fail here unless OOM. + inst.encode(cwriter.writer(), .{ .allow_frame_loc = true }) catch unreachable; // Not allowed to fail here unless OOM. return @intCast(usize, cwriter.bytes_written); } diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index e88484c9da..af0146c6e1 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -60,6 +60,8 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .mfence, .mov, .movbe, + .movd, + .movq, .movzx, .mul, .neg, @@ -136,7 +138,8 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .setcc => try lower.mirSetcc(inst), .jcc => try lower.emit(.none, mnem_cc(.j, inst.data.inst_cc.cc), &.{.{ .imm = Immediate.s(0) }}), - .push_regs, .pop_regs => try lower.mirPushPopRegisterList(inst), + .push_regs => try lower.mirPushPopRegisterList(inst, .push), + .pop_regs => try lower.mirPushPopRegisterList(inst, .pop), .dbg_line, .dbg_prologue_end, @@ -190,7 +193,7 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { } fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { - return switch (ops) { + return lower.mir.resolveFrameLoc(switch (ops) { .rm_sib, .rm_sib_cc, .m_sib, @@ -227,7 +230,7 @@ fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { => lower.mir.extraData(Mir.MemoryMoffs, payload).data.decode(), else => unreachable, - }; + }); } fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) Error!void { @@ -417,23 +420,15 @@ fn mirSetcc(lower: *Lower, inst: Mir.Inst) Error!void { } } -fn mirPushPopRegisterList(lower: *Lower, inst: Mir.Inst) Error!void { - const save_reg_list = lower.mir.extraData(Mir.SaveRegisterList, inst.data.payload).data; - const base = @intToEnum(Register, save_reg_list.base_reg); - var disp: i32 = -@intCast(i32, save_reg_list.stack_end); - const reg_list = Mir.RegisterList.fromInt(save_reg_list.register_list); +fn mirPushPopRegisterList(lower: *Lower, inst: Mir.Inst, comptime mnemonic: Mnemonic) Error!void { + const reg_list = Mir.RegisterList.fromInt(inst.data.payload); const callee_preserved_regs = abi.getCalleePreservedRegs(lower.target.*); - for (callee_preserved_regs) |callee_preserved_reg| { - if (!reg_list.isSet(callee_preserved_regs, callee_preserved_reg)) continue; - const reg_op = Operand{ .reg = callee_preserved_reg }; - const mem_op = Operand{ .mem = Memory.sib(.qword, .{ .base = base, .disp = disp }) }; - try lower.emit(.none, .mov, switch (inst.tag) { - .push_regs => &.{ mem_op, reg_op }, - .pop_regs => &.{ reg_op, mem_op }, - else => unreachable, - }); - disp += 8; - } + var it = reg_list.iterator(.{ .direction = switch (mnemonic) { + .push => .reverse, + .pop => .forward, + else => unreachable, + } }); + while (it.next()) |i| try lower.emit(.none, mnemonic, &.{.{ .reg = callee_preserved_regs[i] }}); } fn mirLeaLinker(lower: *Lower, inst: Mir.Inst) Error!void { diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 8772b835dd..c8703373d2 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -23,6 +23,7 @@ const Register = bits.Register; instructions: std.MultiArrayList(Inst).Slice, /// The meaning of this data is determined by `Inst.Tag` value. extra: []const u32, +frame_locs: std.MultiArrayList(FrameLoc).Slice, pub const Inst = struct { tag: Tag, @@ -98,6 +99,10 @@ pub const Inst = struct { mov, /// Move data after swapping bytes movbe, + /// Move doubleword + movd, + /// Move quadword + movq, /// Move with sign extension movsx, /// Move with zero extension @@ -241,13 +246,13 @@ pub const Inst = struct { /// Start of epilogue dbg_epilogue_begin, /// Update debug line - /// Uses `payload` payload with data of type `DbgLineColumn`. + /// Uses `line_column` payload containing the line and column. dbg_line, /// Push registers - /// Uses `payload` payload with data of type `SaveRegisterList`. + /// Uses `payload` payload containing `RegisterList.asInt` directly. push_regs, /// Pop registers - /// Uses `payload` payload with data of type `SaveRegisterList`. + /// Uses `payload` payload containing `RegisterList.asInt` directly. pop_regs, /// Tombstone @@ -500,6 +505,11 @@ pub const Inst = struct { /// Index into the linker's symbol table. sym_index: u32, }, + /// Debug line and column position + line_column: struct { + line: u32, + column: u32, + }, /// Index into `extra`. Meaning of what can be found there is context-dependent. payload: u32, }; @@ -522,12 +532,11 @@ pub const LeaRegisterReloc = struct { sym_index: u32, }; -/// Used in conjunction with `SaveRegisterList` payload to transfer a list of used registers -/// in a compact manner. +/// Used in conjunction with payload to transfer a list of used registers in a compact manner. pub const RegisterList = struct { bitset: BitSet = BitSet.initEmpty(), - const BitSet = IntegerBitSet(@ctz(@as(u32, 0))); + const BitSet = IntegerBitSet(32); const Self = @This(); fn getIndexForReg(registers: []const Register, reg: Register) BitSet.MaskInt { @@ -547,6 +556,10 @@ pub const RegisterList = struct { return self.bitset.isSet(index); } + pub fn iterator(self: Self, comptime options: std.bit_set.IteratorOptions) BitSet.Iterator(options) { + return self.bitset.iterator(options); + } + pub fn asInt(self: Self) u32 { return self.bitset.mask; } @@ -562,14 +575,6 @@ pub const RegisterList = struct { } }; -pub const SaveRegisterList = struct { - /// Base register - base_reg: u32, - /// Use `RegisterList` to populate. - register_list: u32, - stack_end: u32, -}; - pub const Imm64 = struct { msb: u32, lsb: u32, @@ -593,41 +598,51 @@ pub const Imm64 = struct { pub const MemorySib = struct { /// Size of the pointer. ptr_size: u32, - /// Base register. -1 means null, or no base register. - base: i32, - /// Scale for index register. -1 means null, or no scale. - /// This has to be in sync with `index` field. - scale: i32, - /// Index register. -1 means null, or no index register. - /// This has to be in sync with `scale` field. - index: i32, + /// Base register tag of type Memory.Base.Tag + base_tag: u32, + /// Base register of type Register or FrameIndex + base: u32, + /// Scale starting at bit 0 and index register starting at bit 4. + scale_index: u32, /// Displacement value. disp: i32, pub fn encode(mem: Memory) MemorySib { const sib = mem.sib; + assert(sib.scale_index.scale == 0 or std.math.isPowerOfTwo(sib.scale_index.scale)); return .{ .ptr_size = @enumToInt(sib.ptr_size), - .base = if (sib.base) |r| @enumToInt(r) else -1, - .scale = if (sib.scale_index) |si| si.scale else -1, - .index = if (sib.scale_index) |si| @enumToInt(si.index) else -1, + .base_tag = @enumToInt(@as(Memory.Base.Tag, sib.base)), + .base = switch (sib.base) { + .none => undefined, + .reg => |r| @enumToInt(r), + .frame => |fi| @enumToInt(fi), + }, + .scale_index = @as(u32, sib.scale_index.scale) << 0 | + @as(u32, if (sib.scale_index.scale > 0) + @enumToInt(sib.scale_index.index) + else + undefined) << 4, .disp = sib.disp, }; } pub fn decode(msib: MemorySib) Memory { - const base: ?Register = if (msib.base == -1) null else @intToEnum(Register, msib.base); - const scale_index: ?Memory.ScaleIndex = if (msib.index == -1) null else .{ - .scale = @intCast(u4, msib.scale), - .index = @intToEnum(Register, msib.index), - }; - const mem: Memory = .{ .sib = .{ + const scale = @truncate(u4, msib.scale_index); + assert(scale == 0 or std.math.isPowerOfTwo(scale)); + return .{ .sib = .{ .ptr_size = @intToEnum(Memory.PtrSize, msib.ptr_size), - .base = base, - .scale_index = scale_index, + .base = switch (@intToEnum(Memory.Base.Tag, msib.base_tag)) { + .none => .none, + .reg => .{ .reg = @intToEnum(Register, msib.base) }, + .frame => .{ .frame = @intToEnum(bits.FrameIndex, msib.base) }, + }, + .scale_index = .{ + .scale = scale, + .index = if (scale > 0) @intToEnum(Register, msib.scale_index >> 4) else undefined, + }, .disp = msib.disp, } }; - return mem; } }; @@ -676,14 +691,10 @@ pub const MemoryMoffs = struct { } }; -pub const DbgLineColumn = struct { - line: u32, - column: u32, -}; - pub fn deinit(mir: *Mir, gpa: std.mem.Allocator) void { mir.instructions.deinit(gpa); gpa.free(mir.extra); + mir.frame_locs.deinit(gpa); mir.* = undefined; } @@ -704,3 +715,22 @@ pub fn extraData(mir: Mir, comptime T: type, index: u32) struct { data: T, end: .end = i, }; } + +pub const FrameLoc = struct { + base: Register, + disp: i32, +}; + +pub fn resolveFrameLoc(mir: Mir, mem: Memory) Memory { + return switch (mem) { + .sib => |sib| switch (sib.base) { + .none, .reg => mem, + .frame => |index| if (mir.frame_locs.len > 0) Memory.sib(sib.ptr_size, .{ + .base = .{ .reg = mir.frame_locs.items(.base)[@enumToInt(index)] }, + .disp = mir.frame_locs.items(.disp)[@enumToInt(index)] + sib.disp, + .scale_index = mem.scaleIndex(), + }) else mem, + }, + .rip, .moffs => mem, + }; +} diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 38e4a7e717..5d06865566 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -405,14 +405,69 @@ test "Register classes" { try expect(Register.fs.class() == .segment); } +pub const FrameIndex = enum(u32) { + // This index refers to the start of the arguments passed to this function + args_frame, + // This index refers to the return address pushed by a `call` and popped by a `ret`. + ret_addr, + // This index refers to the base pointer pushed in the prologue and popped in the epilogue. + base_ptr, + // This index refers to the entire stack frame. + stack_frame, + // This index refers to the start of the call frame for arguments passed to called functions + call_frame, + // Other indices are used for local variable stack slots + _, + + pub const named_count = @typeInfo(FrameIndex).Enum.fields.len; + + pub fn isNamed(fi: FrameIndex) bool { + return @enumToInt(fi) < named_count; + } + + pub fn format( + fi: FrameIndex, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) @TypeOf(writer).Error!void { + try writer.writeAll("FrameIndex"); + if (fi.isNamed()) { + try writer.writeByte('.'); + try writer.writeAll(@tagName(fi)); + } else { + try writer.writeByte('('); + try std.fmt.formatType(@enumToInt(fi), fmt, options, writer, 0); + try writer.writeByte(')'); + } + } +}; + pub const Memory = union(enum) { sib: Sib, rip: Rip, moffs: Moffs, - pub const ScaleIndex = packed struct { + pub const Base = union(enum) { + none, + reg: Register, + frame: FrameIndex, + + pub const Tag = @typeInfo(Base).Union.tag_type.?; + + pub fn isExtended(self: Base) bool { + return switch (self) { + .none, .frame => false, // neither rsp nor rbp are extended + .reg => |reg| reg.isExtended(), + }; + } + }; + + pub const ScaleIndex = struct { scale: u4, index: Register, + + const none = ScaleIndex{ .scale = 0, .index = undefined }; }; pub const PtrSize = enum { @@ -460,8 +515,8 @@ pub const Memory = union(enum) { pub const Sib = struct { ptr_size: PtrSize, - base: ?Register, - scale_index: ?ScaleIndex, + base: Base, + scale_index: ScaleIndex, disp: i32, }; @@ -482,7 +537,7 @@ pub const Memory = union(enum) { pub fn sib(ptr_size: PtrSize, args: struct { disp: i32 = 0, - base: ?Register = null, + base: Base = .none, scale_index: ?ScaleIndex = null, }) Memory { if (args.scale_index) |si| assert(std.math.isPowerOfTwo(si.scale)); @@ -490,7 +545,7 @@ pub const Memory = union(enum) { .base = args.base, .disp = args.disp, .ptr_size = ptr_size, - .scale_index = args.scale_index, + .scale_index = if (args.scale_index) |si| si else ScaleIndex.none, } }; } @@ -502,22 +557,25 @@ pub const Memory = union(enum) { return switch (mem) { .moffs => true, .rip => false, - .sib => |s| if (s.base) |r| r.class() == .segment else false, + .sib => |s| switch (s.base) { + .none, .frame => false, + .reg => |reg| reg.class() == .segment, + }, }; } - pub fn base(mem: Memory) ?Register { + pub fn base(mem: Memory) Base { return switch (mem) { - .moffs => |m| m.seg, + .moffs => |m| .{ .reg = m.seg }, .sib => |s| s.base, - .rip => null, + .rip => .none, }; } pub fn scaleIndex(mem: Memory) ?ScaleIndex { return switch (mem) { .moffs, .rip => null, - .sib => |s| s.scale_index, + .sib => |s| if (s.scale_index.scale > 0) s.scale_index else null, }; } diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 578dba5f5d..73b40ea3be 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -54,6 +54,21 @@ pub const Instruction = struct { }; } + pub fn isBaseExtended(op: Operand) bool { + return switch (op) { + .none, .imm => false, + .reg => |reg| reg.isExtended(), + .mem => |mem| mem.base().isExtended(), + }; + } + + pub fn isIndexExtended(op: Operand) bool { + return switch (op) { + .none, .reg, .imm => false, + .mem => |mem| if (mem.scaleIndex()) |si| si.index.isExtended() else false, + }; + } + fn format( op: Operand, comptime unused_format_string: []const u8, @@ -98,17 +113,24 @@ pub const Instruction = struct { try writer.print("{s} ptr ", .{@tagName(sib.ptr_size)}); if (mem.isSegmentRegister()) { - return writer.print("{s}:0x{x}", .{ @tagName(sib.base.?), sib.disp }); + return writer.print("{s}:0x{x}", .{ @tagName(sib.base.reg), sib.disp }); } try writer.writeByte('['); var any = false; - if (sib.base) |base| { - try writer.print("{s}", .{@tagName(base)}); - any = true; + switch (sib.base) { + .none => {}, + .reg => |reg| { + try writer.print("{s}", .{@tagName(reg)}); + any = true; + }, + .frame => |frame| { + try writer.print("{}", .{frame}); + any = true; + }, } - if (sib.scale_index) |si| { + if (mem.scaleIndex()) |si| { if (any) try writer.writeAll(" + "); try writer.print("{s} * {d}", .{ @tagName(si.index), si.scale }); any = true; @@ -182,8 +204,8 @@ pub const Instruction = struct { } } - pub fn encode(inst: Instruction, writer: anytype) !void { - const encoder = Encoder(@TypeOf(writer)){ .writer = writer }; + pub fn encode(inst: Instruction, writer: anytype, comptime opts: Options) !void { + const encoder = Encoder(@TypeOf(writer), opts){ .writer = writer }; const enc = inst.encoding; const data = enc.data; @@ -269,22 +291,24 @@ pub const Instruction = struct { const segment_override: ?Register = switch (op_en) { .i, .zi, .o, .oi, .d, .np => null, - .fd => inst.ops[1].mem.base().?, - .td => inst.ops[0].mem.base().?, - .rm, .rmi => if (inst.ops[1].isSegmentRegister()) blk: { - break :blk switch (inst.ops[1]) { - .reg => |r| r, - .mem => |m| m.base().?, + .fd => inst.ops[1].mem.base().reg, + .td => inst.ops[0].mem.base().reg, + .rm, .rmi => if (inst.ops[1].isSegmentRegister()) + switch (inst.ops[1]) { + .reg => |reg| reg, + .mem => |mem| mem.base().reg, else => unreachable, - }; - } else null, - .m, .mi, .m1, .mc, .mr, .mri, .mrc => if (inst.ops[0].isSegmentRegister()) blk: { - break :blk switch (inst.ops[0]) { - .reg => |r| r, - .mem => |m| m.base().?, + } + else + null, + .m, .mi, .m1, .mc, .mr, .mri, .mrc => if (inst.ops[0].isSegmentRegister()) + switch (inst.ops[0]) { + .reg => |reg| reg, + .mem => |mem| mem.base().reg, else => unreachable, - }; - } else null, + } + else + null, }; if (segment_override) |seg| { legacy.setSegmentOverride(seg); @@ -298,7 +322,10 @@ pub const Instruction = struct { var rex = Rex{}; rex.present = inst.encoding.data.mode == .rex; - rex.w = inst.encoding.data.mode == .long; + switch (inst.encoding.data.mode) { + .long, .sse2_long => rex.w = true, + else => {}, + } switch (op_en) { .np, .i, .zi, .fd, .td, .d => {}, @@ -307,27 +334,17 @@ pub const Instruction = struct { const r_op = switch (op_en) { .rm, .rmi => inst.ops[0], .mr, .mri, .mrc => inst.ops[1], - else => null, + else => .none, }; - if (r_op) |op| { - rex.r = op.reg.isExtended(); - } + rex.r = r_op.isBaseExtended(); const b_x_op = switch (op_en) { .rm, .rmi => inst.ops[1], .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0], else => unreachable, }; - switch (b_x_op) { - .reg => |r| { - rex.b = r.isExtended(); - }, - .mem => |mem| { - rex.b = if (mem.base()) |base| base.isExtended() else false; - rex.x = if (mem.scaleIndex()) |si| si.index.isExtended() else false; - }, - else => unreachable, - } + rex.b = b_x_op.isBaseExtended(); + rex.x = b_x_op.isIndexExtended(); }, } @@ -348,72 +365,75 @@ pub const Instruction = struct { switch (mem) { .moffs => unreachable, - .sib => |sib| { - if (sib.base) |base| { - if (base.class() == .segment) { - // TODO audit this wrt SIB - try encoder.modRm_SIBDisp0(operand_enc); - if (sib.scale_index) |si| { - const scale = math.log2_int(u4, si.scale); - try encoder.sib_scaleIndexDisp32(scale, si.index.lowEnc()); - } else { - try encoder.sib_disp32(); - } - try encoder.disp32(sib.disp); + .sib => |sib| switch (sib.base) { + .none => { + try encoder.modRm_SIBDisp0(operand_enc); + if (mem.scaleIndex()) |si| { + const scale = math.log2_int(u4, si.scale); + try encoder.sib_scaleIndexDisp32(scale, si.index.lowEnc()); } else { - assert(base.class() == .general_purpose); - const dst = base.lowEnc(); - const src = operand_enc; - if (dst == 4 or sib.scale_index != null) { - if (sib.disp == 0 and dst != 5) { - try encoder.modRm_SIBDisp0(src); - if (sib.scale_index) |si| { - const scale = math.log2_int(u4, si.scale); - try encoder.sib_scaleIndexBase(scale, si.index.lowEnc(), dst); - } else { - try encoder.sib_base(dst); - } - } else if (math.cast(i8, sib.disp)) |_| { - try encoder.modRm_SIBDisp8(src); - if (sib.scale_index) |si| { - const scale = math.log2_int(u4, si.scale); - try encoder.sib_scaleIndexBaseDisp8(scale, si.index.lowEnc(), dst); - } else { - try encoder.sib_baseDisp8(dst); - } - try encoder.disp8(@truncate(i8, sib.disp)); - } else { - try encoder.modRm_SIBDisp32(src); - if (sib.scale_index) |si| { - const scale = math.log2_int(u4, si.scale); - try encoder.sib_scaleIndexBaseDisp32(scale, si.index.lowEnc(), dst); - } else { - try encoder.sib_baseDisp32(dst); - } - try encoder.disp32(sib.disp); - } - } else { - if (sib.disp == 0 and dst != 5) { - try encoder.modRm_indirectDisp0(src, dst); - } else if (math.cast(i8, sib.disp)) |_| { - try encoder.modRm_indirectDisp8(src, dst); - try encoder.disp8(@truncate(i8, sib.disp)); - } else { - try encoder.modRm_indirectDisp32(src, dst); - try encoder.disp32(sib.disp); - } - } + try encoder.sib_disp32(); } - } else { + try encoder.disp32(sib.disp); + }, + .reg => |base| if (base.class() == .segment) { + // TODO audit this wrt SIB try encoder.modRm_SIBDisp0(operand_enc); - if (sib.scale_index) |si| { + if (mem.scaleIndex()) |si| { const scale = math.log2_int(u4, si.scale); try encoder.sib_scaleIndexDisp32(scale, si.index.lowEnc()); } else { try encoder.sib_disp32(); } try encoder.disp32(sib.disp); - } + } else { + assert(base.class() == .general_purpose); + const dst = base.lowEnc(); + const src = operand_enc; + if (dst == 4 or mem.scaleIndex() != null) { + if (sib.disp == 0 and dst != 5) { + try encoder.modRm_SIBDisp0(src); + if (mem.scaleIndex()) |si| { + const scale = math.log2_int(u4, si.scale); + try encoder.sib_scaleIndexBase(scale, si.index.lowEnc(), dst); + } else { + try encoder.sib_base(dst); + } + } else if (math.cast(i8, sib.disp)) |_| { + try encoder.modRm_SIBDisp8(src); + if (mem.scaleIndex()) |si| { + const scale = math.log2_int(u4, si.scale); + try encoder.sib_scaleIndexBaseDisp8(scale, si.index.lowEnc(), dst); + } else { + try encoder.sib_baseDisp8(dst); + } + try encoder.disp8(@truncate(i8, sib.disp)); + } else { + try encoder.modRm_SIBDisp32(src); + if (mem.scaleIndex()) |si| { + const scale = math.log2_int(u4, si.scale); + try encoder.sib_scaleIndexBaseDisp32(scale, si.index.lowEnc(), dst); + } else { + try encoder.sib_baseDisp32(dst); + } + try encoder.disp32(sib.disp); + } + } else { + if (sib.disp == 0 and dst != 5) { + try encoder.modRm_indirectDisp0(src, dst); + } else if (math.cast(i8, sib.disp)) |_| { + try encoder.modRm_indirectDisp8(src, dst); + try encoder.disp8(@truncate(i8, sib.disp)); + } else { + try encoder.modRm_indirectDisp32(src, dst); + try encoder.disp32(sib.disp); + } + } + }, + .frame => if (@TypeOf(encoder).options.allow_frame_loc) { + try encoder.modRm_indirectDisp32(operand_enc, undefined); + try encoder.disp32(undefined); + } else return error.CannotEncode, }, .rip => |rip| { try encoder.modRm_RIPDisp32(operand_enc); @@ -482,11 +502,14 @@ pub const LegacyPrefixes = packed struct { } }; -fn Encoder(comptime T: type) type { +pub const Options = struct { allow_frame_loc: bool = false }; + +fn Encoder(comptime T: type, comptime opts: Options) type { return struct { writer: T, const Self = @This(); + pub const options = opts; // -------- // Prefixes diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 05933b68bb..333bdceea8 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -860,6 +860,12 @@ pub const table = [_]Entry{ .{ .minsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .sse2 }, + .{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .sse2 }, + .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .sse2 }, + + .{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .sse2_long }, + .{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .sse2_long }, + .{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .sse2 }, .{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .sse2 }, |
