diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2023-05-02 08:25:57 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-05-02 08:25:57 -0700 |
| commit | a2e2e25165b51ef92dbfbd9c46b6a01c90b250b3 (patch) | |
| tree | 7a3889971c17987ed1644945f1399e3c4309ed92 /src | |
| parent | 28923474401051a9aa0bddd60904b9be64943dba (diff) | |
| parent | 3b1ea390a301dbdc992043d97cf618a94e8801de (diff) | |
| download | zig-a2e2e25165b51ef92dbfbd9c46b6a01c90b250b3.tar.gz zig-a2e2e25165b51ef92dbfbd9c46b6a01c90b250b3.zip | |
Merge pull request #15505 from jacobly0/x86_64-behavior
x86_64: fixes for behavior tests
Diffstat (limited to 'src')
| -rw-r--r-- | src/Sema.zig | 9 | ||||
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 1732 | ||||
| -rw-r--r-- | src/arch/x86_64/Encoding.zig | 27 | ||||
| -rw-r--r-- | src/arch/x86_64/Lower.zig | 6 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 12 | ||||
| -rw-r--r-- | src/arch/x86_64/encoder.zig | 2 | ||||
| -rw-r--r-- | src/arch/x86_64/encodings.zig | 14 | ||||
| -rw-r--r-- | src/codegen.zig | 157 | ||||
| -rw-r--r-- | src/link.zig | 4 | ||||
| -rw-r--r-- | src/link/Coff.zig | 111 | ||||
| -rw-r--r-- | src/link/Elf.zig | 107 | ||||
| -rw-r--r-- | src/link/MachO.zig | 109 | ||||
| -rw-r--r-- | src/print_air.zig | 12 |
13 files changed, 1488 insertions, 814 deletions
diff --git a/src/Sema.zig b/src/Sema.zig index 71a1215dcd..79f2fd7fca 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -23287,8 +23287,7 @@ fn panicWithMsg( const arena = sema.arena; if (!mod.backendSupportsFeature(.panic_fn)) { - _ = try block.addNoOp(.breakpoint); - _ = try block.addNoOp(.unreach); + _ = try block.addNoOp(.trap); return; } const panic_fn = try sema.getBuiltin("panic"); @@ -23336,8 +23335,7 @@ fn panicUnwrapError( { if (!sema.mod.backendSupportsFeature(.panic_unwrap_error)) { - _ = try fail_block.addNoOp(.breakpoint); - _ = try fail_block.addNoOp(.unreach); + _ = try fail_block.addNoOp(.trap); } else { const panic_fn = try sema.getBuiltin("panicUnwrapError"); const err = try fail_block.addTyOp(unwrap_err_tag, Type.anyerror, operand); @@ -23462,8 +23460,7 @@ fn safetyCheckFormatted( defer fail_block.instructions.deinit(gpa); if (!sema.mod.backendSupportsFeature(.safety_check_formatted)) { - _ = try fail_block.addNoOp(.breakpoint); - _ = try fail_block.addNoOp(.unreach); + _ = try fail_block.addNoOp(.trap); } else { const panic_fn = try sema.getBuiltin(func); _ = try sema.analyzeCall(&fail_block, panic_fn, sema.src, sema.src, .auto, false, args, null); diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index be972d7aea..a658103c1a 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -7,6 +7,8 @@ const leb128 = std.leb; const link = @import("../../link.zig"); const log = std.log.scoped(.codegen); const tracking_log = std.log.scoped(.tracking); +const verbose_tracking_log = std.log.scoped(.verbose_tracking); +const wip_mir_log = std.log.scoped(.wip_mir); const math = std.math; const mem = std.mem; const trace = @import("../../tracy.zig").trace; @@ -48,16 +50,13 @@ const sse = abi.RegisterClass.sse; const InnerError = CodeGenError || error{OutOfRegisters}; -const debug_wip_mir = false; -const debug_tracking = false; - gpa: Allocator, air: Air, liveness: Liveness, bin_file: *link.File, debug_output: DebugInfoOutput, target: *const std.Target, -mod_fn: *const Module.Fn, +owner: Owner, err_msg: ?*ErrorMsg, args: []MCValue, ret_mcv: InstTracking, @@ -109,6 +108,44 @@ const mir_to_air_map_init = if (builtin.mode == .Debug) std.AutoHashMapUnmanaged const FrameAddr = struct { index: FrameIndex, off: i32 = 0 }; const RegisterOffset = struct { reg: Register, off: i32 = 0 }; +const Owner = union(enum) { + mod_fn: *const Module.Fn, + lazy_sym: link.File.LazySymbol, + + fn getDecl(owner: Owner) Module.Decl.Index { + return switch (owner) { + .mod_fn => |mod_fn| mod_fn.owner_decl, + .lazy_sym => |lazy_sym| lazy_sym.ty.getOwnerDecl(), + }; + } + + fn getSymbolIndex(owner: Owner, ctx: *Self) !u32 { + switch (owner) { + .mod_fn => |mod_fn| { + const decl_index = mod_fn.owner_decl; + if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { + const atom = try macho_file.getOrCreateAtomForDecl(decl_index); + return macho_file.getAtom(atom).getSymbolIndex().?; + } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| { + const atom = try coff_file.getOrCreateAtomForDecl(decl_index); + return coff_file.getAtom(atom).getSymbolIndex().?; + } else unreachable; + }, + .lazy_sym => |lazy_sym| { + if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { + const atom = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); + return macho_file.getAtom(atom).getSymbolIndex().?; + } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| { + const atom = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); + return coff_file.getAtom(atom).getSymbolIndex().?; + } else unreachable; + }, + } + } +}; + pub const MCValue = union(enum) { /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc. /// TODO Look into deleting this tag and using `dead` instead, since every use @@ -220,9 +257,9 @@ pub const MCValue = union(enum) { .dead, .undef, .immediate, + .eflags, .register, .register_offset, - .eflags, .register_overflow, .lea_direct, .lea_got, @@ -298,6 +335,41 @@ pub const MCValue = union(enum) { }; } + fn mem(mcv: MCValue, ptr_size: Memory.PtrSize) Memory { + return switch (mcv) { + .none, + .unreach, + .dead, + .undef, + .immediate, + .eflags, + .register, + .register_offset, + .register_overflow, + .load_direct, + .lea_direct, + .load_got, + .lea_got, + .load_tlv, + .lea_tlv, + .lea_frame, + .reserved_frame, + => unreachable, + .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| + Memory.sib(ptr_size, .{ .base = .{ .reg = .ds }, .disp = small_addr }) + else + Memory.moffs(.ds, addr), + .indirect => |reg_off| Memory.sib(ptr_size, .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, + }), + .load_frame => |frame_addr| Memory.sib(ptr_size, .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }), + }; + } + pub fn format( mcv: MCValue, comptime _: []const u8, @@ -575,12 +647,6 @@ pub fn generate( assert(fn_owner_decl.has_tv); const fn_type = fn_owner_decl.ty; - if (debug_wip_mir) { - const stderr = std.io.getStdErr().writer(); - fn_owner_decl.renderFullyQualifiedName(mod, stderr) catch {}; - stderr.writeAll(":\n") catch {}; - } - const gpa = bin_file.allocator; var function = Self{ .gpa = gpa, @@ -589,7 +655,7 @@ pub fn generate( .target = &bin_file.options.target, .bin_file = bin_file, .debug_output = debug_output, - .mod_fn = module_fn, + .owner = .{ .mod_fn = module_fn }, .err_msg = null, .args = undefined, // populated after `resolveCallingConventionValues` .ret_mcv = undefined, // populated after `resolveCallingConventionValues` @@ -614,6 +680,8 @@ pub fn generate( if (builtin.mode == .Debug) function.mir_to_air_map.deinit(gpa); } + wip_mir_log.debug("{}:", .{function.fmtDecl(module_fn.owner_decl)}); + try function.frame_allocs.resize(gpa, FrameIndex.named_count); function.frame_allocs.set( @enumToInt(FrameIndex.stack_frame), @@ -715,48 +783,190 @@ pub fn generate( } } -fn dumpWipMir(self: *Self, inst: Mir.Inst) !void { - if (!debug_wip_mir) return; - const stderr = std.io.getStdErr().writer(); +pub fn generateLazy( + bin_file: *link.File, + src_loc: Module.SrcLoc, + lazy_sym: link.File.LazySymbol, + code: *std.ArrayList(u8), + debug_output: DebugInfoOutput, +) CodeGenError!Result { + const gpa = bin_file.allocator; + var function = Self{ + .gpa = gpa, + .air = undefined, + .liveness = undefined, + .target = &bin_file.options.target, + .bin_file = bin_file, + .debug_output = debug_output, + .owner = .{ .lazy_sym = lazy_sym }, + .err_msg = null, + .args = undefined, + .ret_mcv = undefined, + .fn_type = undefined, + .arg_index = undefined, + .src_loc = src_loc, + .end_di_line = undefined, // no debug info yet + .end_di_column = undefined, // no debug info yet + }; + defer { + function.mir_instructions.deinit(gpa); + function.mir_extra.deinit(gpa); + } + + function.genLazy(lazy_sym) catch |err| switch (err) { + error.CodegenFail => return Result{ .fail = function.err_msg.? }, + error.OutOfRegisters => return Result{ + .fail = try ErrorMsg.create(bin_file.allocator, src_loc, "CodeGen ran out of registers. This is a bug in the Zig compiler.", .{}), + }, + else => |e| return e, + }; + + var mir = Mir{ + .instructions = function.mir_instructions.toOwnedSlice(), + .extra = try function.mir_extra.toOwnedSlice(bin_file.allocator), + .frame_locs = function.frame_locs.toOwnedSlice(), + }; + defer mir.deinit(bin_file.allocator); + var emit = Emit{ + .lower = .{ + .allocator = bin_file.allocator, + .mir = mir, + .target = &bin_file.options.target, + .src_loc = src_loc, + }, + .bin_file = bin_file, + .debug_output = debug_output, + .code = code, + .prev_di_pc = undefined, // no debug info yet + .prev_di_line = undefined, // no debug info yet + .prev_di_column = undefined, // no debug info yet + }; + defer emit.deinit(); + emit.emitMir() catch |err| switch (err) { + error.LowerFail, error.EmitFail => return Result{ .fail = emit.lower.err_msg.? }, + error.InvalidInstruction, error.CannotEncode => |e| { + const msg = switch (e) { + error.InvalidInstruction => "CodeGen failed to find a viable instruction.", + error.CannotEncode => "CodeGen failed to encode the instruction.", + }; + return Result{ + .fail = try ErrorMsg.create( + bin_file.allocator, + src_loc, + "{s} This is a bug in the Zig compiler.", + .{msg}, + ), + }; + }, + else => |e| return e, + }; + + if (function.err_msg) |em| { + return Result{ .fail = em }; + } else { + return Result.ok; + } +} + +const FormatDeclData = struct { + mod: *Module, + decl_index: Module.Decl.Index, +}; +fn formatDecl( + data: FormatDeclData, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, +) @TypeOf(writer).Error!void { + try data.mod.declPtr(data.decl_index).renderFullyQualifiedName(data.mod, writer); +} +fn fmtDecl(self: *Self, decl_index: Module.Decl.Index) std.fmt.Formatter(formatDecl) { + return .{ .data = .{ + .mod = self.bin_file.options.module.?, + .decl_index = decl_index, + } }; +} + +const FormatAirData = struct { + self: *Self, + inst: Air.Inst.Index, +}; +fn formatAir( + data: FormatAirData, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, +) @TypeOf(writer).Error!void { + @import("../../print_air.zig").dumpInst( + data.inst, + data.self.bin_file.options.module.?, + data.self.air, + data.self.liveness, + ); +} +fn fmtAir(self: *Self, inst: Air.Inst.Index) std.fmt.Formatter(formatAir) { + return .{ .data = .{ .self = self, .inst = inst } }; +} + +const FormatWipMirData = struct { + self: *Self, + inst: Mir.Inst.Index, +}; +fn formatWipMir( + data: FormatWipMirData, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, +) @TypeOf(writer).Error!void { var lower = Lower{ - .allocator = self.gpa, + .allocator = data.self.gpa, .mir = .{ - .instructions = self.mir_instructions.slice(), - .extra = self.mir_extra.items, + .instructions = data.self.mir_instructions.slice(), + .extra = data.self.mir_extra.items, .frame_locs = (std.MultiArrayList(Mir.FrameLoc){}).slice(), }, - .target = self.target, - .src_loc = self.src_loc, + .target = data.self.target, + .src_loc = data.self.src_loc, }; - for (lower.lowerMir(inst) catch |err| switch (err) { + for (lower.lowerMir(data.self.mir_instructions.get(data.inst)) catch |err| switch (err) { error.LowerFail => { defer { - lower.err_msg.?.deinit(self.gpa); + lower.err_msg.?.deinit(data.self.gpa); lower.err_msg = null; } - try stderr.print("{s}\n", .{lower.err_msg.?.msg}); + try writer.writeAll(lower.err_msg.?.msg); return; }, - error.InvalidInstruction, error.CannotEncode => |e| { - try stderr.writeAll(switch (e) { - error.InvalidInstruction => "CodeGen failed to find a viable instruction.\n", - error.CannotEncode => "CodeGen failed to encode the instruction.\n", + error.OutOfMemory, error.InvalidInstruction, error.CannotEncode => |e| { + try writer.writeAll(switch (e) { + error.OutOfMemory => "Out of memory", + error.InvalidInstruction => "CodeGen failed to find a viable instruction.", + error.CannotEncode => "CodeGen failed to encode the instruction.", }); return; }, else => |e| return e, - }) |lower_inst| { - try stderr.print(" | {}\n", .{lower_inst}); - } + }) |lower_inst| try writer.print(" | {}", .{lower_inst}); +} +fn fmtWipMir(self: *Self, inst: Mir.Inst.Index) std.fmt.Formatter(formatWipMir) { + return .{ .data = .{ .self = self, .inst = inst } }; } -fn dumpTracking(self: *Self) !void { - if (!debug_tracking) return; - const stderr = std.io.getStdErr().writer(); - - var it = self.inst_tracking.iterator(); - while (it.next()) |entry| try stderr.print("%{d} = {}\n", .{ entry.key_ptr.*, entry.value_ptr.* }); +const FormatTrackingData = struct { + self: *Self, +}; +fn formatTracking( + data: FormatTrackingData, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, +) @TypeOf(writer).Error!void { + var it = data.self.inst_tracking.iterator(); + while (it.next()) |entry| try writer.print("\n%{d} = {}", .{ entry.key_ptr.*, entry.value_ptr.* }); +} +fn fmtTracking(self: *Self) std.fmt.Formatter(formatTracking) { + return .{ .data = .{ .self = self } }; } fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { @@ -764,7 +974,14 @@ fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { try self.mir_instructions.ensureUnusedCapacity(gpa, 1); const result_index = @intCast(Mir.Inst.Index, self.mir_instructions.len); self.mir_instructions.appendAssumeCapacity(inst); - self.dumpWipMir(inst) catch {}; + switch (inst.tag) { + else => wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)}), + .dbg_line, + .dbg_prologue_end, + .dbg_epilogue_begin, + .dead, + => {}, + } return result_index; } @@ -1186,13 +1403,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { } if (self.liveness.isUnused(inst) and !self.air.mustLower(inst)) continue; - if (debug_wip_mir) @import("../../print_air.zig").dumpInst( - inst, - self.bin_file.options.module.?, - self.air, - self.liveness, - ); - self.dumpTracking() catch {}; + wip_mir_log.debug("{}", .{self.fmtAir(inst)}); + verbose_tracking_log.debug("{}", .{self.fmtTracking()}); const old_air_bookkeeping = self.air_bookkeeping; try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1); @@ -1244,9 +1456,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .ceil, .round, .trunc_float, - .neg, => try self.airUnaryMath(inst), + .neg => try self.airNeg(inst), + .add_with_overflow => try self.airAddSubWithOverflow(inst), .sub_with_overflow => try self.airAddSubWithOverflow(inst), .mul_with_overflow => try self.airMulWithOverflow(inst), @@ -1453,7 +1666,69 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { } } } - self.dumpTracking() catch {}; + verbose_tracking_log.debug("{}", .{self.fmtTracking()}); +} + +fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void { + switch (lazy_sym.ty.zigTypeTag()) { + .Enum => { + const enum_ty = lazy_sym.ty; + wip_mir_log.debug("{}.@tagName:", .{enum_ty.fmt(self.bin_file.options.module.?)}); + + const param_regs = abi.getCAbiIntParamRegs(self.target.*); + const param_locks = self.register_manager.lockRegsAssumeUnused(2, param_regs[0..2].*); + defer for (param_locks) |lock| self.register_manager.unlockReg(lock); + + const ret_reg = param_regs[0]; + const enum_mcv = MCValue{ .register = param_regs[1] }; + + var exitlude_jump_relocs = try self.gpa.alloc(u32, enum_ty.enumFieldCount()); + defer self.gpa.free(exitlude_jump_relocs); + + const data_reg = try self.register_manager.allocReg(null, gp); + const data_lock = self.register_manager.lockRegAssumeUnused(data_reg); + defer self.register_manager.unlockReg(data_lock); + try self.genLazySymbolRef(.lea, data_reg, .{ .kind = .const_data, .ty = enum_ty }); + + var data_off: i32 = 0; + for ( + exitlude_jump_relocs, + enum_ty.enumFields().keys(), + 0.., + ) |*exitlude_jump_reloc, tag_name, index| { + var tag_pl = Value.Payload.U32{ + .base = .{ .tag = .enum_field_index }, + .data = @intCast(u32, index), + }; + const tag_val = Value.initPayload(&tag_pl.base); + const tag_mcv = try self.genTypedValue(.{ .ty = enum_ty, .val = tag_val }); + try self.genBinOpMir(.cmp, enum_ty, enum_mcv, tag_mcv); + const skip_reloc = try self.asmJccReloc(undefined, .ne); + + try self.genSetMem( + .{ .reg = ret_reg }, + 0, + Type.usize, + .{ .register_offset = .{ .reg = data_reg, .off = data_off } }, + ); + try self.genSetMem(.{ .reg = ret_reg }, 8, Type.usize, .{ .immediate = tag_name.len }); + + exitlude_jump_reloc.* = try self.asmJmpReloc(undefined); + try self.performReloc(skip_reloc); + + data_off += @intCast(i32, tag_name.len + 1); + } + + try self.airTrap(); + + for (exitlude_jump_relocs) |reloc| try self.performReloc(reloc); + try self.asmOpOnly(.ret); + }, + else => return self.fail( + "TODO implement {s} for {}", + .{ @tagName(lazy_sym.kind), lazy_sym.ty.fmt(self.bin_file.options.module.?) }, + ), + } } fn getValue(self: *Self, value: MCValue, inst: ?Air.Inst.Index) void { @@ -1619,15 +1894,16 @@ fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex { const frame_allocs_slice = self.frame_allocs.slice(); const frame_size = frame_allocs_slice.items(.abi_size); const frame_align = frame_allocs_slice.items(.abi_align); + + const stack_frame_align = &frame_align[@enumToInt(FrameIndex.stack_frame)]; + stack_frame_align.* = @max(stack_frame_align.*, alloc.abi_align); + for (self.free_frame_indices.keys(), 0..) |frame_index, free_i| { const abi_size = frame_size[@enumToInt(frame_index)]; if (abi_size != alloc.abi_size) continue; const abi_align = &frame_align[@enumToInt(frame_index)]; abi_align.* = @max(abi_align.*, alloc.abi_align); - const stack_frame_align = &frame_align[@enumToInt(FrameIndex.stack_frame)]; - stack_frame_align.* = @max(stack_frame_align.*, alloc.abi_align); - _ = self.free_frame_indices.swapRemoveAt(free_i); return frame_index; } @@ -1828,7 +2104,7 @@ pub fn spillRegisters(self: *Self, registers: []const Register) !void { /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const reg: Register = try self.register_manager.allocReg(null, try self.regClassForType(ty)); + const reg = try self.register_manager.allocReg(null, try self.regClassForType(ty)); try self.genSetReg(reg, ty, mcv); return reg; } @@ -1871,16 +2147,48 @@ fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airFptrunc for {}", .{self.target.cpu.arch}); - // return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const dst_ty = self.air.typeOfIndex(inst); + const src_ty = self.air.typeOf(ty_op.operand); + if (dst_ty.floatBits(self.target.*) != 32 or src_ty.floatBits(self.target.*) != 64 or + !Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + return self.fail("TODO implement airFptrunc from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }); + + const src_mcv = try self.resolveInst(ty_op.operand); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); + const dst_lock = self.register_manager.lockReg(dst_mcv.register); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + try self.genBinOpMir(.cvtsd2ss, src_ty, dst_mcv, src_mcv); + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airFpext(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airFpext for {}", .{self.target.cpu.arch}); - // return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const dst_ty = self.air.typeOfIndex(inst); + const src_ty = self.air.typeOf(ty_op.operand); + if (dst_ty.floatBits(self.target.*) != 64 or src_ty.floatBits(self.target.*) != 32 or + !Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + return self.fail("TODO implement airFpext from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }); + + const src_mcv = try self.resolveInst(ty_op.operand); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); + const dst_lock = self.register_manager.lockReg(dst_mcv.register); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + try self.genBinOpMir(.cvtss2sd, src_ty, dst_mcv, src_mcv); + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { @@ -1928,13 +2236,10 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { registerAlias(src_reg, min_abi_size), ); }, - .load_frame => |frame_addr| try self.asmRegisterMemory( + .memory, .indirect, .load_frame => try self.asmRegisterMemory( tag, dst_alias, - Memory.sib(Memory.PtrSize.fromSize(min_abi_size), .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }), + src_mcv.mem(Memory.PtrSize.fromSize(min_abi_size)), ), else => return self.fail("TODO airIntCast from {s} to {s}", .{ @tagName(src_mcv), @@ -2102,6 +2407,7 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { } }; const src_ty = Type.initPayload(&src_pl.base); + try self.spillEflagsIfOccupied(); try self.spillRegisters(&.{ .rax, .rdx }); const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); @@ -2315,12 +2621,7 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const frame_index = try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); - try self.genSetFrameTruncatedOverflowCompare( - tuple_ty, - frame_index, - partial_mcv.register, - cc, - ); + try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); break :result .{ .load_frame = .{ .index = frame_index } }; }, else => unreachable, @@ -2392,12 +2693,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const frame_index = try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); - try self.genSetFrameTruncatedOverflowCompare( - tuple_ty, - frame_index, - partial_mcv.register, - cc, - ); + try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); break :result .{ .load_frame = .{ .index = frame_index } }; }, else => unreachable, @@ -2410,173 +2706,175 @@ fn genSetFrameTruncatedOverflowCompare( self: *Self, tuple_ty: Type, frame_index: FrameIndex, - reg: Register, - cc: Condition, + src_mcv: MCValue, + overflow_cc: ?Condition, ) !void { - const reg_lock = self.register_manager.lockReg(reg); - defer if (reg_lock) |lock| self.register_manager.unlockReg(lock); + const src_lock = switch (src_mcv) { + .register => |reg| self.register_manager.lockReg(reg), + else => null, + }; + defer if (src_lock) |lock| self.register_manager.unlockReg(lock); const ty = tuple_ty.structFieldType(0); const int_info = ty.intInfo(self.target.*); - const extended_ty = switch (int_info.signedness) { - .signed => Type.isize, - .unsigned => ty, + + var hi_limb_pl = Type.Payload.Bits{ + .base = .{ .tag = switch (int_info.signedness) { + .signed => .int_signed, + .unsigned => .int_unsigned, + } }, + .data = (int_info.bits - 1) % 64 + 1, }; + const hi_limb_ty = Type.initPayload(&hi_limb_pl.base); - const temp_regs = try self.register_manager.allocRegs(3, .{ null, null, null }, gp); - const temp_regs_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs); - defer for (temp_regs_locks) |rreg| { - self.register_manager.unlockReg(rreg); + var rest_pl = Type.Payload.Bits{ + .base = .{ .tag = .int_unsigned }, + .data = int_info.bits - hi_limb_pl.data, }; + const rest_ty = Type.initPayload(&rest_pl.base); + + const temp_regs = try self.register_manager.allocRegs(3, .{ null, null, null }, gp); + const temp_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs); + defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); const overflow_reg = temp_regs[0]; - try self.asmSetccRegister(overflow_reg.to8(), cc); + if (overflow_cc) |cc| try self.asmSetccRegister(overflow_reg.to8(), cc); const scratch_reg = temp_regs[1]; - try self.genSetReg(scratch_reg, extended_ty, .{ .register = reg }); - try self.truncateRegister(ty, scratch_reg); - try self.genBinOpMir( - .cmp, - extended_ty, - .{ .register = reg }, - .{ .register = scratch_reg }, - ); + const hi_limb_off = if (int_info.bits <= 64) 0 else (int_info.bits - 1) / 64 * 8; + const hi_limb_mcv = if (hi_limb_off > 0) + src_mcv.address().offset(int_info.bits / 64 * 8).deref() + else + src_mcv; + try self.genSetReg(scratch_reg, hi_limb_ty, hi_limb_mcv); + try self.truncateRegister(hi_limb_ty, scratch_reg); + try self.genBinOpMir(.cmp, hi_limb_ty, .{ .register = scratch_reg }, hi_limb_mcv); const eq_reg = temp_regs[2]; - try self.asmSetccRegister(eq_reg.to8(), .ne); - try self.genBinOpMir( - .@"or", - Type.u8, - .{ .register = overflow_reg }, - .{ .register = eq_reg }, - ); + if (overflow_cc) |_| { + try self.asmSetccRegister(eq_reg.to8(), .ne); + try self.genBinOpMir(.@"or", Type.u8, .{ .register = overflow_reg }, .{ .register = eq_reg }); + } + const payload_off = @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)); + if (hi_limb_off > 0) try self.genSetMem(.{ .frame = frame_index }, payload_off, rest_ty, src_mcv); try self.genSetMem( .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), - tuple_ty.structFieldType(1), - .{ .register = overflow_reg.to8() }, + payload_off + hi_limb_off, + hi_limb_ty, + .{ .register = scratch_reg }, ); try self.genSetMem( .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)), - ty, - .{ .register = scratch_reg }, + @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), + tuple_ty.structFieldType(1), + if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne }, ); } fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - const result: MCValue = result: { - const dst_ty = self.air.typeOf(bin_op.lhs); - switch (dst_ty.zigTypeTag()) { - .Vector => return self.fail("TODO implement mul_with_overflow for Vector type", .{}), - .Int => { - try self.spillEflagsIfOccupied(); + const dst_ty = self.air.typeOf(bin_op.lhs); + const result: MCValue = switch (dst_ty.zigTypeTag()) { + .Vector => return self.fail("TODO implement mul_with_overflow for Vector type", .{}), + .Int => result: { + try self.spillEflagsIfOccupied(); + try self.spillRegisters(&.{ .rax, .rdx }); - const dst_info = dst_ty.intInfo(self.target.*); - const cc: Condition = switch (dst_info.signedness) { - .unsigned => .c, - .signed => .o, + const dst_info = dst_ty.intInfo(self.target.*); + const cc: Condition = switch (dst_info.signedness) { + .unsigned => .c, + .signed => .o, + }; + + const lhs_active_bits = self.activeIntBits(bin_op.lhs); + const rhs_active_bits = self.activeIntBits(bin_op.rhs); + var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) { + .signed => .int_signed, + .unsigned => .int_unsigned, + } }, .data = math.max3(lhs_active_bits, rhs_active_bits, dst_info.bits / 2) }; + const src_ty = Type.initPayload(&src_pl.base); + + const lhs = try self.resolveInst(bin_op.lhs); + const rhs = try self.resolveInst(bin_op.rhs); + + const tuple_ty = self.air.typeOfIndex(inst); + const extra_bits = if (dst_info.bits <= 64) + self.regExtraBits(dst_ty) + else + dst_info.bits % 64; + const partial_mcv = if (dst_info.signedness == .signed and extra_bits > 0) dst: { + const rhs_lock: ?RegisterLock = switch (rhs) { + .register => |reg| self.register_manager.lockRegAssumeUnused(reg), + else => null, }; + defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - const tuple_ty = self.air.typeOfIndex(inst); - if (dst_info.bits >= 8 and math.isPowerOfTwo(dst_info.bits)) { - var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) { - .signed => .int_signed, - .unsigned => .int_unsigned, - } }, .data = math.max3( - self.activeIntBits(bin_op.lhs), - self.activeIntBits(bin_op.rhs), - dst_info.bits / 2, - ) }; - const src_ty = Type.initPayload(&src_pl.base); + const dst_reg: Register = blk: { + if (lhs.isRegister()) break :blk lhs.register; + break :blk try self.copyToTmpRegister(dst_ty, lhs); + }; + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_reg_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_reg_lock); - try self.spillRegisters(&.{ .rax, .rdx }); - const lhs = try self.resolveInst(bin_op.lhs); - const rhs = try self.resolveInst(bin_op.rhs); + const rhs_mcv: MCValue = blk: { + if (rhs.isRegister() or rhs.isMemory()) break :blk rhs; + break :blk MCValue{ .register = try self.copyToTmpRegister(dst_ty, rhs) }; + }; + const rhs_mcv_lock: ?RegisterLock = switch (rhs_mcv) { + .register => |reg| self.register_manager.lockReg(reg), + else => null, + }; + defer if (rhs_mcv_lock) |lock| self.register_manager.unlockReg(lock); - const partial_mcv = try self.genMulDivBinOp(.mul, null, dst_ty, src_ty, lhs, rhs); - switch (partial_mcv) { - .register => |reg| { - self.eflags_inst = inst; - break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; - }, - else => {}, - } + try self.genIntMulComplexOpMir(Type.isize, dst_mcv, rhs_mcv); + break :dst dst_mcv; + } else try self.genMulDivBinOp(.mul, null, dst_ty, src_ty, lhs, rhs); - // For now, this is the only supported multiply that doesn't fit in a register. - assert(dst_info.bits == 128 and src_pl.data == 64); + switch (partial_mcv) { + .register => |reg| if (extra_bits == 0) { + self.eflags_inst = inst; + break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; + } else { const frame_index = try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), - tuple_ty.structFieldType(1), - .{ .immediate = 0 }, // overflow is impossible for 64-bit*64-bit -> 128-bit - ); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)), - tuple_ty.structFieldType(0), - partial_mcv, - ); + try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); break :result .{ .load_frame = .{ .index = frame_index } }; - } - - const dst_reg: Register = dst_reg: { - switch (dst_info.signedness) { - .signed => { - const lhs = try self.resolveInst(bin_op.lhs); - const rhs = try self.resolveInst(bin_op.rhs); - - const rhs_lock: ?RegisterLock = switch (rhs) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - - const dst_reg: Register = blk: { - if (lhs.isRegister()) break :blk lhs.register; - break :blk try self.copyToTmpRegister(dst_ty, lhs); - }; - const dst_reg_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_reg_lock); - - const rhs_mcv: MCValue = blk: { - if (rhs.isRegister() or rhs.isMemory()) break :blk rhs; - break :blk MCValue{ .register = try self.copyToTmpRegister(dst_ty, rhs) }; - }; - const rhs_mcv_lock: ?RegisterLock = switch (rhs_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (rhs_mcv_lock) |lock| self.register_manager.unlockReg(lock); - - try self.genIntMulComplexOpMir(Type.isize, .{ .register = dst_reg }, rhs_mcv); - - break :dst_reg dst_reg; - }, - .unsigned => { - try self.spillRegisters(&.{ .rax, .rdx }); - - const lhs = try self.resolveInst(bin_op.lhs); - const rhs = try self.resolveInst(bin_op.rhs); + }, + else => { + // For now, this is the only supported multiply that doesn't fit in a register, + // so cc being set is impossible. - const dst_mcv = try self.genMulDivBinOp(.mul, null, dst_ty, dst_ty, lhs, rhs); - break :dst_reg dst_mcv.register; - }, - } - }; + assert(dst_info.bits <= 128 and src_pl.data == 64); - const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); - try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, dst_reg, cc); - break :result .{ .load_frame = .{ .index = frame_index } }; - }, - else => unreachable, - } + const frame_index = + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); + if (dst_info.bits >= lhs_active_bits + rhs_active_bits) { + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)), + tuple_ty.structFieldType(0), + partial_mcv, + ); + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), + tuple_ty.structFieldType(1), + .{ .immediate = 0 }, + ); + } else try self.genSetFrameTruncatedOverflowCompare( + tuple_ty, + frame_index, + partial_mcv, + null, + ); + break :result .{ .load_frame = .{ .index = frame_index } }; + }, + } + }, + else => unreachable, }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -2616,19 +2914,9 @@ fn genIntMulDivOpMir( }; switch (mat_rhs) { .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)), - .indirect, .load_frame => try self.asmMemory( + .memory, .indirect, .load_frame => try self.asmMemory( tag, - Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (mat_rhs) { - .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, - .disp = reg_off.off, - }, - .load_frame => |frame_addr| .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }, - else => unreachable, - }), + mat_rhs.mem(Memory.PtrSize.fromSize(abi_size)), ), else => unreachable, } @@ -3900,10 +4188,65 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } +fn airNeg(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const ty = self.air.typeOf(un_op); + const ty_bits = ty.floatBits(self.target.*); + + var arena = std.heap.ArenaAllocator.init(self.gpa); + defer arena.deinit(); + + const ExpectedContents = union { + f16: Value.Payload.Float_16, + f32: Value.Payload.Float_32, + f64: Value.Payload.Float_64, + f80: Value.Payload.Float_80, + f128: Value.Payload.Float_128, + }; + var stack align(@alignOf(ExpectedContents)) = + std.heap.stackFallback(@sizeOf(ExpectedContents), arena.allocator()); + + var vec_pl = Type.Payload.Array{ + .base = .{ .tag = .vector }, + .data = .{ + .len = @divExact(128, ty_bits), + .elem_type = ty, + }, + }; + const vec_ty = Type.initPayload(&vec_pl.base); + + var sign_pl = Value.Payload.SubValue{ + .base = .{ .tag = .repeated }, + .data = try Value.floatToValue(-0.0, stack.get(), ty, self.target.*), + }; + const sign_val = Value.initPayload(&sign_pl.base); + + const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val }); + + const src_mcv = try self.resolveInst(un_op); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + const dst_lock = self.register_manager.lockReg(dst_mcv.register); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + try self.genBinOpMir(switch (ty_bits) { + 32 => .xorps, + 64 => .xorpd, + else => return self.fail("TODO implement airNeg for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + }, vec_ty, dst_mcv, sign_mcv); + return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); +} + fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; _ = un_op; - return self.fail("TODO implement airUnaryMath for {}", .{self.target.cpu.arch}); + return self.fail("TODO implement airUnaryMath for {}", .{ + self.air.instructions.items(.tag)[inst], + }); //return self.finishAir(inst, result, .{ un_op, .none, .none }); } @@ -4056,7 +4399,6 @@ fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerErro fn airLoad(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const elem_ty = self.air.typeOfIndex(inst); - const elem_size = elem_ty.abiSize(self.target.*); const result: MCValue = result: { if (!elem_ty.hasRuntimeBitsIgnoreComptime()) break :result .none; @@ -4064,14 +4406,20 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); + const ptr_ty = self.air.typeOf(ty_op.operand); + const elem_size = elem_ty.abiSize(self.target.*); + + const elem_rc = try self.regClassForType(elem_ty); + const ptr_rc = try self.regClassForType(ptr_ty); + const ptr_mcv = try self.resolveInst(ty_op.operand); - const dst_mcv = if (elem_size <= 8 and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv)) + const dst_mcv = if (elem_size <= 8 and elem_rc.supersetOf(ptr_rc) and + self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv)) // The MCValue that holds the pointer can be re-used as the value. ptr_mcv else try self.allocRegOrMem(inst, true); - const ptr_ty = self.air.typeOf(ty_op.operand); if (ptr_ty.ptrInfo().data.host_size > 0) { try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv); } else { @@ -4293,17 +4641,9 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { switch (src_mcv) { .load_frame => |frame_addr| { - const field_abi_size = @intCast(u32, field_ty.abiSize(self.target.*)); - const limb_abi_size = @min(field_abi_size, 8); - const limb_abi_bits = limb_abi_size * 8; - const field_byte_off = @intCast(i32, field_off / limb_abi_bits * limb_abi_size); - const field_bit_off = field_off % limb_abi_bits; - - if (field_bit_off == 0) { - const off_mcv = MCValue{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + field_byte_off, - } }; + if (field_off % 8 == 0) { + const off_mcv = + src_mcv.address().offset(@intCast(i32, @divExact(field_off, 8))).deref(); if (self.reuseOperand(inst, operand, 0, src_mcv)) break :result off_mcv; const dst_mcv = try self.allocRegOrMem(inst, true); @@ -4311,6 +4651,12 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { break :result dst_mcv; } + const field_abi_size = @intCast(u32, field_ty.abiSize(self.target.*)); + const limb_abi_size = @min(field_abi_size, 8); + const limb_abi_bits = limb_abi_size * 8; + const field_byte_off = @intCast(i32, field_off / limb_abi_bits * limb_abi_size); + const field_bit_off = field_off % limb_abi_bits; + if (field_abi_size > 8) { return self.fail("TODO implement struct_field_val with large packed field", .{}); } @@ -4448,9 +4794,6 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: if (src_ty.zigTypeTag() == .Vector) { return self.fail("TODO implement genUnOp for {}", .{src_ty.fmt(self.bin_file.options.module.?)}); } - if (src_ty.abiSize(self.target.*) > 8) { - return self.fail("TODO implement genUnOp for {}", .{src_ty.fmt(self.bin_file.options.module.?)}); - } switch (src_mcv) { .eflags => |cc| switch (tag) { @@ -4466,13 +4809,13 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: }; defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - const dst_mcv: MCValue = if (maybe_inst) |inst| - if (self.reuseOperand(inst, src_air, 0, src_mcv)) - src_mcv - else - try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv) - else - .{ .register = try self.copyToTmpRegister(src_ty, src_mcv) }; + const dst_mcv: MCValue = dst: { + if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv)) break :dst src_mcv; + + const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, maybe_inst, true); + try self.genCopy(src_ty, dst_mcv, src_mcv); + break :dst dst_mcv; + }; const dst_lock = switch (dst_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, @@ -4481,19 +4824,33 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: switch (tag) { .not => { + const limb_abi_size = @intCast(u16, @min(src_ty.abiSize(self.target.*), 8)); const int_info = if (src_ty.tag() == .bool) std.builtin.Type.Int{ .signedness = .unsigned, .bits = 1 } else src_ty.intInfo(self.target.*); - const extra_bits = self.regExtraBits(src_ty); - if (int_info.signedness == .unsigned and extra_bits > 0) { - const mask = (@as(u64, 1) << @intCast(u6, src_ty.bitSize(self.target.*))) - 1; - try self.genBinOpMir(.xor, src_ty, dst_mcv, .{ .immediate = mask }); - } else try self.genUnOpMir(.not, src_ty, dst_mcv); - }, + var byte_off: i32 = 0; + while (byte_off * 8 < int_info.bits) : (byte_off += limb_abi_size) { + var limb_pl = Type.Payload.Bits{ + .base = .{ .tag = switch (int_info.signedness) { + .signed => .int_signed, + .unsigned => .int_unsigned, + } }, + .data = @intCast(u16, @min(int_info.bits - byte_off * 8, limb_abi_size * 8)), + }; + const limb_ty = Type.initPayload(&limb_pl.base); + const limb_mcv = switch (byte_off) { + 0 => dst_mcv, + else => dst_mcv.address().offset(byte_off).deref(), + }; + if (limb_pl.base.tag == .int_unsigned and self.regExtraBits(limb_ty) > 0) { + const mask = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - limb_pl.data); + try self.genBinOpMir(.xor, limb_ty, limb_mcv, .{ .immediate = mask }); + } else try self.genUnOpMir(.not, limb_ty, limb_mcv); + } + }, .neg => try self.genUnOpMir(.neg, src_ty, dst_mcv), - else => unreachable, } return dst_mcv; @@ -4534,17 +4891,7 @@ fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue }, .indirect, .load_frame => try self.asmMemory( mir_tag, - Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (dst_mcv) { - .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, - .disp = reg_off.off, - }, - .load_frame => |frame_addr| .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }, - else => unreachable, - }), + dst_mcv.mem(Memory.PtrSize.fromSize(abi_size)), ), } } @@ -5128,24 +5475,69 @@ fn genBinOp( switch (tag) { .add, .addwrap, - => try self.genBinOpMir(switch (lhs_ty.tag()) { + => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { else => .add, - .f32 => .addss, - .f64 => .addsd, + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .addss + else + return self.fail("TODO implement genBinOp for {s} {} without sse", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .addsd + else + return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + }, }, lhs_ty, dst_mcv, src_mcv), .sub, .subwrap, - => try self.genBinOpMir(switch (lhs_ty.tag()) { + => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { else => .sub, - .f32 => .subss, - .f64 => .subsd, + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .subss + else + return self.fail("TODO implement genBinOp for {s} {} without sse", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .subsd + else + return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + }, }, lhs_ty, dst_mcv, src_mcv), - .mul => try self.genBinOpMir(switch (lhs_ty.tag()) { - .f32 => .mulss, - .f64 => .mulsd, + .mul => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }), + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .mulss + else + return self.fail("TODO implement genBinOp for {s} {} without sse", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .mulsd + else + return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + }, }, lhs_ty, dst_mcv, src_mcv), .div_float, @@ -5153,12 +5545,27 @@ fn genBinOp( .div_trunc, .div_floor, => { - try self.genBinOpMir(switch (lhs_ty.tag()) { - .f32 => .divss, - .f64 => .divsd, + try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), }), + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .divss + else + return self.fail("TODO implement genBinOp for {s} {} without sse", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .divsd + else + return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + }, }, lhs_ty, dst_mcv, src_mcv); switch (tag) { .div_float, @@ -5169,16 +5576,18 @@ fn genBinOp( => if (Target.x86.featureSetHas(self.target.cpu.features, .sse4_1)) { const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*)); const dst_alias = registerAlias(dst_mcv.register, abi_size); - try self.asmRegisterRegisterImmediate(switch (lhs_ty.tag()) { - .f32 => .roundss, - .f64 => .roundsd, + try self.asmRegisterRegisterImmediate(switch (lhs_ty.floatBits(self.target.*)) { + 32 => .roundss, + 64 => .roundsd, else => unreachable, }, dst_alias, dst_alias, Immediate.u(switch (tag) { .div_trunc => 0b1_0_11, .div_floor => 0b1_0_01, else => unreachable, })); - } else return self.fail("TODO implement round without sse4_1", .{}), + } else return self.fail("TODO implement genBinOp for {s} {} without sse4_1", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), else => unreachable, } }, @@ -5400,39 +5809,68 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s )), else => unreachable, }, - .register_offset, .eflags, + .register_offset, .memory, + .indirect, .load_direct, .lea_direct, .load_got, .lea_got, .load_tlv, .lea_tlv, + .load_frame, .lea_frame, => { - assert(abi_size <= 8); + blk: { + return self.asmRegisterMemory( + mir_tag, + registerAlias(dst_reg, abi_size), + Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { + .memory => |addr| .{ + .base = .{ .reg = .ds }, + .disp = math.cast(i32, addr) orelse break :blk, + }, + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }, + else => break :blk, + }), + ); + } + const dst_reg_lock = self.register_manager.lockReg(dst_reg); defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock); - const reg = try self.copyToTmpRegister(ty, src_mcv); - return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg }); - }, - .indirect, .load_frame => try self.asmRegisterMemory( - mir_tag, - registerAlias(dst_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { - .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, - .disp = reg_off.off, + switch (src_mcv) { + .eflags, + .register_offset, + .lea_direct, + .lea_got, + .lea_tlv, + .lea_frame, + => { + const reg = try self.copyToTmpRegister(ty, src_mcv); + return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg }); }, - .load_frame => |frame_addr| .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, + .memory, + .load_direct, + .load_got, + .load_tlv, + => { + const addr_reg = try self.copyToTmpRegister(ty, src_mcv.address()); + return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ + .indirect = .{ .reg = addr_reg }, + }); }, else => unreachable, - }), - ), + } + }, } }, .memory, .indirect, .load_got, .load_direct, .load_tlv, .load_frame => { @@ -5769,7 +6207,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { const ty = self.air.typeOfIndex(inst); const src_index = self.air.instructions.items(.data)[inst].arg.src_index; - const name = self.mod_fn.getParamName(self.bin_file.options.module.?, src_index); + const name = self.owner.mod_fn.getParamName(self.bin_file.options.module.?, src_index); try self.genArgDbgInfo(ty, name, dst_mcv); break :result dst_mcv; @@ -5793,7 +6231,10 @@ fn genArgDbgInfo(self: Self, ty: Type, name: [:0]const u8, mcv: MCValue) !void { //}, else => unreachable, // not a valid function parameter }; - try dw.genArgDbgInfo(name, ty, self.mod_fn.owner_decl, loc); + // TODO: this might need adjusting like the linkers do. + // Instead of flattening the owner and passing Decl.Index here we may + // want to special case LazySymbol in DWARF linker too. + try dw.genArgDbgInfo(name, ty, self.owner.getDecl(), loc); }, .plan9 => {}, .none => {}, @@ -5834,7 +6275,10 @@ fn genVarDbgInfo( break :blk .nop; }, }; - try dw.genVarDbgInfo(name, ty, self.mod_fn.owner_decl, is_ptr, loc); + // TODO: this might need adjusting like the linkers do. + // Instead of flattening the owner and passing Decl.Index here we may + // want to special case LazySymbol in DWARF linker too. + try dw.genVarDbgInfo(name, ty, self.owner.getDecl(), is_ptr, loc); }, .plan9 => {}, .none => {}, @@ -5966,12 +6410,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr), })); - } else if (self.bin_file.cast(link.File.Coff)) |_| { - const sym_index = try self.getSymbolIndexForDecl(func.owner_decl); + } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { + const atom = try coff_file.getOrCreateAtomForDecl(func.owner_decl); + const sym_index = coff_file.getAtom(atom).getSymbolIndex().?; try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); try self.asmRegister(.call, .rax); - } else if (self.bin_file.cast(link.File.MachO)) |_| { - const sym_index = try self.getSymbolIndexForDecl(func.owner_decl); + } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { + const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl); + const sym_index = macho_file.getAtom(atom).getSymbolIndex().?; try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); try self.asmRegister(.call, .rax); } else if (self.bin_file.cast(link.File.Plan9)) |p9| { @@ -5992,7 +6438,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const decl_name = mem.sliceTo(mod.declPtr(extern_fn.owner_decl).name, 0); const lib_name = mem.sliceTo(extern_fn.lib_name, 0); if (self.bin_file.cast(link.File.Coff)) |coff_file| { - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); + const atom_index = try self.owner.getSymbolIndex(self); const sym_index = try coff_file.getGlobalSymbol(decl_name, lib_name); _ = try self.addInst(.{ .tag = .mov_linker, @@ -6005,8 +6451,8 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier }); try self.asmRegister(.call, .rax); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { + const atom_index = try self.owner.getSymbolIndex(self); const sym_index = try macho_file.getGlobalSymbol(decl_name, lib_name); - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); _ = try self.addInst(.{ .tag = .call_extern, .ops = undefined, @@ -6122,10 +6568,25 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); const src_mcv = if (flipped) lhs_mcv else rhs_mcv; - try self.genBinOpMir(switch (ty.tag()) { + try self.genBinOpMir(switch (ty.zigTypeTag()) { else => .cmp, - .f32 => .ucomiss, - .f64 => .ucomisd, + .Float => switch (ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .ucomiss + else + return self.fail("TODO implement airCmp for {} without sse", .{ + ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .ucomisd + else + return self.fail("TODO implement airCmp for {} without sse2", .{ + ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement airCmp for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + }, }, ty, dst_mcv, src_mcv); const signedness = if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned; @@ -6141,42 +6602,13 @@ fn airCmpVector(self: *Self, inst: Air.Inst.Index) !void { } fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const addr_reg = try self.register_manager.allocReg(null, gp); const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_lock); - - if (self.bin_file.cast(link.File.Elf)) |elf_file| { - const atom_index = try elf_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const atom = elf_file.getAtom(atom_index); - _ = try atom.getOrCreateOffsetTableEntry(elf_file); - const got_addr = atom.getOffsetTableAddress(elf_file); - try self.asmRegisterMemory( - .mov, - addr_reg.to64(), - Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }), - ); - } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { - const atom_index = try coff_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); - } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const atom_index = try macho_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); - } else { - return self.fail("TODO implement airCmpLtErrorsLen for x86_64 {s}", .{@tagName(self.bin_file.tag)}); - } + try self.genLazySymbolRef(.lea, addr_reg, link.File.LazySymbol.initDecl(.const_data, null, mod)); try self.spillEflagsIfOccupied(); self.eflags_inst = inst; @@ -6345,35 +6777,26 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { if (Air.refToIndex(pl_op.operand)) |op_inst| self.processDeath(op_inst); } - const outer_state = try self.saveState(); - { - self.scope_generation += 1; - const inner_state = try self.saveState(); + self.scope_generation += 1; + const state = try self.saveState(); - for (liveness_cond_br.then_deaths) |operand| self.processDeath(operand); - try self.genBody(then_body); - try self.restoreState(inner_state, &.{}, .{ - .emit_instructions = false, - .update_tracking = true, - .resurrect = true, - .close_scope = true, - }); + for (liveness_cond_br.then_deaths) |operand| self.processDeath(operand); + try self.genBody(then_body); + try self.restoreState(state, &.{}, .{ + .emit_instructions = false, + .update_tracking = true, + .resurrect = true, + .close_scope = true, + }); - try self.performReloc(reloc); + try self.performReloc(reloc); - for (liveness_cond_br.else_deaths) |operand| self.processDeath(operand); - try self.genBody(else_body); - try self.restoreState(inner_state, &.{}, .{ - .emit_instructions = false, - .update_tracking = true, - .resurrect = true, - .close_scope = true, - }); - } - try self.restoreState(outer_state, &.{}, .{ + for (liveness_cond_br.else_deaths) |operand| self.processDeath(operand); + try self.genBody(else_body); + try self.restoreState(state, &.{}, .{ .emit_instructions = false, - .update_tracking = false, - .resurrect = false, + .update_tracking = true, + .resurrect = true, .close_scope = true, }); @@ -6746,64 +7169,56 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { if (Air.refToIndex(pl_op.operand)) |op_inst| self.processDeath(op_inst); } - const outer_state = try self.saveState(); - { - self.scope_generation += 1; - const inner_state = try self.saveState(); - - while (case_i < switch_br.data.cases_len) : (case_i += 1) { - const case = self.air.extraData(Air.SwitchBr.Case, extra_index); - const items = @ptrCast( - []const Air.Inst.Ref, - self.air.extra[case.end..][0..case.data.items_len], - ); - const case_body = self.air.extra[case.end + items.len ..][0..case.data.body_len]; - extra_index = case.end + items.len + case_body.len; + self.scope_generation += 1; + const state = try self.saveState(); - var relocs = try self.gpa.alloc(u32, items.len); - defer self.gpa.free(relocs); + while (case_i < switch_br.data.cases_len) : (case_i += 1) { + const case = self.air.extraData(Air.SwitchBr.Case, extra_index); + const items = @ptrCast( + []const Air.Inst.Ref, + self.air.extra[case.end..][0..case.data.items_len], + ); + const case_body = self.air.extra[case.end + items.len ..][0..case.data.body_len]; + extra_index = case.end + items.len + case_body.len; - for (items, relocs) |item, *reloc| { - try self.spillEflagsIfOccupied(); - const item_mcv = try self.resolveInst(item); - try self.genBinOpMir(.cmp, condition_ty, condition, item_mcv); - reloc.* = try self.asmJccReloc(undefined, .ne); - } + var relocs = try self.gpa.alloc(u32, items.len); + defer self.gpa.free(relocs); - for (liveness.deaths[case_i]) |operand| self.processDeath(operand); + try self.spillEflagsIfOccupied(); + for (items, relocs, 0..) |item, *reloc, i| { + const item_mcv = try self.resolveInst(item); + try self.genBinOpMir(.cmp, condition_ty, condition, item_mcv); + reloc.* = try self.asmJccReloc(undefined, if (i < relocs.len - 1) .e else .ne); + } - try self.genBody(case_body); - try self.restoreState(inner_state, &.{}, .{ - .emit_instructions = false, - .update_tracking = true, - .resurrect = true, - .close_scope = true, - }); + for (liveness.deaths[case_i]) |operand| self.processDeath(operand); - for (relocs) |reloc| try self.performReloc(reloc); - } + for (relocs[0 .. relocs.len - 1]) |reloc| try self.performReloc(reloc); + try self.genBody(case_body); + try self.restoreState(state, &.{}, .{ + .emit_instructions = false, + .update_tracking = true, + .resurrect = true, + .close_scope = true, + }); - if (switch_br.data.else_body_len > 0) { - const else_body = self.air.extra[extra_index..][0..switch_br.data.else_body_len]; + try self.performReloc(relocs[relocs.len - 1]); + } - const else_deaths = liveness.deaths.len - 1; - for (liveness.deaths[else_deaths]) |operand| self.processDeath(operand); + if (switch_br.data.else_body_len > 0) { + const else_body = self.air.extra[extra_index..][0..switch_br.data.else_body_len]; - try self.genBody(else_body); - try self.restoreState(inner_state, &.{}, .{ - .emit_instructions = false, - .update_tracking = true, - .resurrect = true, - .close_scope = true, - }); - } + const else_deaths = liveness.deaths.len - 1; + for (liveness.deaths[else_deaths]) |operand| self.processDeath(operand); + + try self.genBody(else_body); + try self.restoreState(state, &.{}, .{ + .emit_instructions = false, + .update_tracking = true, + .resurrect = true, + .close_scope = true, + }); } - try self.restoreState(outer_state, &.{}, .{ - .emit_instructions = false, - .update_tracking = false, - .resurrect = false, - .close_scope = true, - }); // We already took care of pl_op.operand earlier, so we're going to pass .none here return self.finishAir(inst, .unreach, .{ .none, .none, .none }); @@ -7289,7 +7704,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }), ), .load_direct => |sym_index| if (try self.movMirTag(ty) == .mov) { - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); + const atom_index = try self.owner.getSymbolIndex(self); _ = try self.addInst(.{ .tag = .mov_linker, .ops = .direct_reloc, @@ -7316,7 +7731,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr ); }, .lea_direct, .lea_got => |sym_index| { - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); + const atom_index = try self.owner.getSymbolIndex(self); _ = try self.addInst(.{ .tag = switch (src_mcv) { .lea_direct => .lea_linker, @@ -7336,7 +7751,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); }, .lea_tlv => |sym_index| { - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); + const atom_index = try self.owner.getSymbolIndex(self); if (self.bin_file.cast(link.File.MachO)) |_| { _ = try self.addInst(.{ .tag = .lea_linker, @@ -7531,6 +7946,67 @@ fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) }); } +fn genLazySymbolRef( + self: *Self, + comptime tag: Mir.Inst.Tag, + reg: Register, + lazy_sym: link.File.LazySymbol, +) InnerError!void { + if (self.bin_file.cast(link.File.Elf)) |elf_file| { + const atom_index = elf_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return self.fail("{s} creating lazy symbol", .{@errorName(err)}); + const atom = elf_file.getAtom(atom_index); + _ = try atom.getOrCreateOffsetTableEntry(elf_file); + const got_addr = atom.getOffsetTableAddress(elf_file); + const got_mem = + Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }); + switch (tag) { + .lea, .mov => try self.asmRegisterMemory(.mov, reg.to64(), got_mem), + .call => try self.asmMemory(.call, got_mem), + else => unreachable, + } + switch (tag) { + .lea, .call => {}, + .mov => try self.asmRegisterMemory( + tag, + reg.to64(), + Memory.sib(.qword, .{ .base = .{ .reg = reg.to64() } }), + ), + else => unreachable, + } + } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { + const atom_index = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return self.fail("{s} creating lazy symbol", .{@errorName(err)}); + const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?; + switch (tag) { + .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }), + .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }), + else => unreachable, + } + switch (tag) { + .lea, .mov => {}, + .call => try self.asmRegister(.call, reg), + else => unreachable, + } + } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { + const atom_index = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return self.fail("{s} creating lazy symbol", .{@errorName(err)}); + const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; + switch (tag) { + .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }), + .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }), + else => unreachable, + } + switch (tag) { + .lea, .mov => {}, + .call => try self.asmRegister(.call, reg), + else => unreachable, + } + } else { + return self.fail("TODO implement genLazySymbol for x86_64 {s}", .{@tagName(self.bin_file.tag)}); + } +} + fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result = result: { @@ -7555,7 +8031,8 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { const dst_rc = try self.regClassForType(dst_ty); const src_rc = try self.regClassForType(src_ty); const operand = try self.resolveInst(ty_op.operand); - if (dst_rc.eql(src_rc) and self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand; + if (dst_rc.supersetOf(src_rc) and self.reuseOperand(inst, ty_op.operand, 0, operand)) + break :result operand; const operand_lock = switch (operand) { .register => |reg| self.register_manager.lockReg(reg), @@ -7595,9 +8072,59 @@ fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airIntToFloat for {}", .{self.target.cpu.arch}); - //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + + const src_ty = self.air.typeOf(ty_op.operand); + const src_bits = @intCast(u32, src_ty.bitSize(self.target.*)); + const src_signedness = + if (src_ty.isAbiInt()) src_ty.intInfo(self.target.*).signedness else .unsigned; + const dst_ty = self.air.typeOfIndex(inst); + + const src_size = std.math.divCeil(u32, @max(switch (src_signedness) { + .signed => src_bits, + .unsigned => src_bits + 1, + }, 32), 8) catch unreachable; + if (src_size > 8) return self.fail("TODO implement airIntToFloat from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }); + + const src_mcv = try self.resolveInst(ty_op.operand); + const src_reg = switch (src_mcv) { + .register => |reg| reg, + else => try self.copyToTmpRegister(src_ty, src_mcv), + }; + const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); + defer self.register_manager.unlockReg(src_lock); + + if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg); + + const dst_reg = try self.register_manager.allocReg(inst, try self.regClassForType(dst_ty)); + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + + try self.asmRegisterRegister(switch (dst_ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .cvtsi2ss + else + return self.fail("TODO implement airIntToFloat from {} to {} without sse", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .cvtsi2sd + else + return self.fail("TODO implement airIntToFloat from {} to {} without sse2", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement airIntToFloat from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }), + }, dst_reg.to128(), registerAlias(src_reg, src_size)); + + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { @@ -7652,70 +8179,50 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { const extra = self.air.extraData(Air.Cmpxchg, ty_pl.payload).data; const ptr_ty = self.air.typeOf(extra.ptr); - const ptr_mcv = try self.resolveInst(extra.ptr); const val_ty = self.air.typeOf(extra.expected_value); const val_abi_size = @intCast(u32, val_ty.abiSize(self.target.*)); try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx }); const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx }); - for (regs_lock) |lock| self.register_manager.unlockReg(lock); + defer for (regs_lock) |lock| self.register_manager.unlockReg(lock); const exp_mcv = try self.resolveInst(extra.expected_value); - if (val_abi_size > 8) switch (exp_mcv) { - .load_frame => |frame_addr| { - try self.genSetReg(.rax, Type.usize, .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + 0, - } }); - try self.genSetReg(.rdx, Type.usize, .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + 8, - } }); - }, - else => return self.fail("TODO implement cmpxchg for {s}", .{@tagName(exp_mcv)}), + if (val_abi_size > 8) { + try self.genSetReg(.rax, Type.usize, exp_mcv); + try self.genSetReg(.rdx, Type.usize, exp_mcv.address().offset(8).deref()); } else try self.genSetReg(.rax, val_ty, exp_mcv); - const rax_lock = self.register_manager.lockRegAssumeUnused(.rax); - defer self.register_manager.unlockReg(rax_lock); const new_mcv = try self.resolveInst(extra.new_value); - const new_reg: Register = if (val_abi_size > 8) switch (new_mcv) { - .load_frame => |frame_addr| new: { - try self.genSetReg(.rbx, Type.usize, .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + 0, - } }); - try self.genSetReg(.rcx, Type.usize, .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + 8, - } }); - break :new undefined; - }, - else => return self.fail("TODO implement cmpxchg for {s}", .{@tagName(exp_mcv)}), + const new_reg = if (val_abi_size > 8) new: { + try self.genSetReg(.rbx, Type.usize, new_mcv); + try self.genSetReg(.rcx, Type.usize, new_mcv.address().offset(8).deref()); + break :new null; } else try self.copyToTmpRegister(val_ty, new_mcv); - const new_lock = self.register_manager.lockRegAssumeUnused(new_reg); - defer self.register_manager.unlockReg(new_lock); + const new_lock = if (new_reg) |reg| self.register_manager.lockRegAssumeUnused(reg) else null; + defer if (new_lock) |lock| self.register_manager.unlockReg(lock); + const ptr_mcv = try self.resolveInst(extra.ptr); const ptr_size = Memory.PtrSize.fromSize(val_abi_size); const ptr_mem = switch (ptr_mcv) { - .register => |reg| Memory.sib(ptr_size, .{ .base = .{ .reg = reg } }), - .lea_frame => |frame_addr| Memory.sib(ptr_size, .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, + .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(ptr_size), + else => Memory.sib(ptr_size, .{ + .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, }), - else => Memory.sib(ptr_size, .{ .base = .{ - .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv), - } }), }; - const mem_lock = switch (ptr_mem.base()) { + switch (ptr_mem) { + .sib, .rip => {}, + .moffs => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}), + } + const ptr_lock = switch (ptr_mem.base()) { .none, .frame => null, .reg => |reg| self.register_manager.lockReg(reg), }; - defer if (mem_lock) |lock| self.register_manager.unlockReg(lock); + defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); try self.spillEflagsIfOccupied(); if (val_abi_size <= 8) { _ = try self.addInst(.{ .tag = .cmpxchg, .ops = .lock_mr_sib, .data = .{ .rx = .{ - .r = registerAlias(new_reg, val_abi_size), + .r = registerAlias(new_reg.?, val_abi_size), .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), } } }); } else { @@ -7733,24 +8240,9 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { } const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genSetMem( - .{ .frame = dst_mcv.load_frame.index }, - dst_mcv.load_frame.off + 16, - Type.bool, - .{ .eflags = .ne }, - ); - try self.genSetMem( - .{ .frame = dst_mcv.load_frame.index }, - dst_mcv.load_frame.off + 8, - Type.usize, - .{ .register = .rdx }, - ); - try self.genSetMem( - .{ .frame = dst_mcv.load_frame.index }, - dst_mcv.load_frame.off + 0, - Type.usize, - .{ .register = .rax }, - ); + try self.genCopy(Type.usize, dst_mcv, .{ .register = .rax }); + try self.genCopy(Type.usize, dst_mcv.address().offset(8).deref(), .{ .register = .rdx }); + try self.genCopy(Type.bool, dst_mcv.address().offset(16).deref(), .{ .eflags = .ne }); break :result dst_mcv; }; return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value }); @@ -7781,15 +8273,15 @@ fn atomicOp( const val_abi_size = @intCast(u32, val_ty.abiSize(self.target.*)); const ptr_size = Memory.PtrSize.fromSize(val_abi_size); const ptr_mem = switch (ptr_mcv) { - .register => |reg| Memory.sib(ptr_size, .{ .base = .{ .reg = reg } }), - .lea_frame => |frame_addr| Memory.sib(ptr_size, .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, + .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(ptr_size), + else => Memory.sib(ptr_size, .{ + .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, }), - else => Memory.sib(ptr_size, .{ .base = .{ - .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv), - } }), }; + switch (ptr_mem) { + .sib, .rip => {}, + .moffs => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}), + } const mem_lock = switch (ptr_mem.base()) { .none, .frame => null, .reg => |reg| self.register_manager.lockReg(reg), @@ -7895,12 +8387,9 @@ fn atomicOp( registerAlias(val_reg, cmov_abi_size), cc, ), - .load_frame => |frame_addr| try self.asmCmovccRegisterMemory( + .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( registerAlias(tmp_reg, cmov_abi_size), - Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }), + val_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)), cc, ), else => { @@ -7923,72 +8412,62 @@ fn atomicOp( } else { try self.asmRegisterMemory(.mov, .rax, Memory.sib(.qword, .{ .base = ptr_mem.sib.base, - .scale_index = ptr_mem.sib.scale_index, + .scale_index = ptr_mem.scaleIndex(), .disp = ptr_mem.sib.disp + 0, })); try self.asmRegisterMemory(.mov, .rdx, Memory.sib(.qword, .{ .base = ptr_mem.sib.base, - .scale_index = ptr_mem.sib.scale_index, + .scale_index = ptr_mem.scaleIndex(), .disp = ptr_mem.sib.disp + 8, })); const loop = @intCast(u32, self.mir_instructions.len); - switch (val_mcv) { - .load_frame => |frame_addr| { - const val_lo_mem = Memory.sib(.qword, .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off + 0, - }); - const val_hi_mem = Memory.sib(.qword, .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off + 8, - }); - - if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { - try self.asmRegisterRegister(.mov, .rbx, .rax); - try self.asmRegisterRegister(.mov, .rcx, .rdx); - } - if (rmw_op) |op| switch (op) { - .Xchg => { - try self.asmRegisterMemory(.mov, .rbx, val_lo_mem); - try self.asmRegisterMemory(.mov, .rcx, val_hi_mem); - }, - .Add => { - try self.asmRegisterMemory(.add, .rbx, val_lo_mem); - try self.asmRegisterMemory(.adc, .rcx, val_hi_mem); - }, - .Sub => { - try self.asmRegisterMemory(.sub, .rbx, val_lo_mem); - try self.asmRegisterMemory(.sbb, .rcx, val_hi_mem); - }, - .And => { - try self.asmRegisterMemory(.@"and", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"and", .rcx, val_hi_mem); - }, - .Nand => { - try self.asmRegisterMemory(.@"and", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"and", .rcx, val_hi_mem); - try self.asmRegister(.not, .rbx); - try self.asmRegister(.not, .rcx); - }, - .Or => { - try self.asmRegisterMemory(.@"or", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"or", .rcx, val_hi_mem); - }, - .Xor => { - try self.asmRegisterMemory(.xor, .rbx, val_lo_mem); - try self.asmRegisterMemory(.xor, .rcx, val_hi_mem); - }, - else => return self.fail( - "TODO implement x86 atomic loop for large abi {s}", - .{@tagName(op)}, - ), - }; - }, - else => return self.fail( - "TODO implement x86 atomic loop for large abi {s}", - .{@tagName(val_mcv)}, - ), + const val_mem_mcv: MCValue = switch (val_mcv) { + .memory, .indirect, .load_frame => val_mcv, + else => .{ .indirect = .{ + .reg = try self.copyToTmpRegister(Type.usize, val_mcv.address()), + } }, + }; + const val_lo_mem = val_mem_mcv.mem(.qword); + const val_hi_mem = val_mem_mcv.address().offset(8).deref().mem(.qword); + if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { + try self.asmRegisterRegister(.mov, .rbx, .rax); + try self.asmRegisterRegister(.mov, .rcx, .rdx); } + if (rmw_op) |op| switch (op) { + .Xchg => { + try self.asmRegisterMemory(.mov, .rbx, val_lo_mem); + try self.asmRegisterMemory(.mov, .rcx, val_hi_mem); + }, + .Add => { + try self.asmRegisterMemory(.add, .rbx, val_lo_mem); + try self.asmRegisterMemory(.adc, .rcx, val_hi_mem); + }, + .Sub => { + try self.asmRegisterMemory(.sub, .rbx, val_lo_mem); + try self.asmRegisterMemory(.sbb, .rcx, val_hi_mem); + }, + .And => { + try self.asmRegisterMemory(.@"and", .rbx, val_lo_mem); + try self.asmRegisterMemory(.@"and", .rcx, val_hi_mem); + }, + .Nand => { + try self.asmRegisterMemory(.@"and", .rbx, val_lo_mem); + try self.asmRegisterMemory(.@"and", .rcx, val_hi_mem); + try self.asmRegister(.not, .rbx); + try self.asmRegister(.not, .rcx); + }, + .Or => { + try self.asmRegisterMemory(.@"or", .rbx, val_lo_mem); + try self.asmRegisterMemory(.@"or", .rcx, val_hi_mem); + }, + .Xor => { + try self.asmRegisterMemory(.xor, .rbx, val_lo_mem); + try self.asmRegisterMemory(.xor, .rcx, val_hi_mem); + }, + else => return self.fail("TODO implement x86 atomic loop for {} {s}", .{ + val_ty.fmt(self.bin_file.options.module.?), @tagName(op), + }), + }; _ = try self.addInst(.{ .tag = .cmpxchgb, .ops = .lock_m_sib, .data = .{ .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), } }); @@ -8230,14 +8709,48 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { } fn airTagName(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; + const inst_ty = self.air.typeOfIndex(inst); + const enum_ty = self.air.typeOf(un_op); + + // We need a properly aligned and sized call frame to be able to call this function. + { + const needed_call_frame = FrameAlloc.init(.{ + .size = inst_ty.abiSize(self.target.*), + .alignment = inst_ty.abiAlignment(self.target.*), + }); + const frame_allocs_slice = self.frame_allocs.slice(); + const stack_frame_size = + &frame_allocs_slice.items(.abi_size)[@enumToInt(FrameIndex.call_frame)]; + stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size); + const stack_frame_align = + &frame_allocs_slice.items(.abi_align)[@enumToInt(FrameIndex.call_frame)]; + stack_frame_align.* = @max(stack_frame_align.*, needed_call_frame.abi_align); + } + + try self.spillEflagsIfOccupied(); + try self.spillRegisters(abi.getCallerPreservedRegs(self.target.*)); + + const param_regs = abi.getCAbiIntParamRegs(self.target.*); + + const dst_mcv = try self.allocRegOrMem(inst, false); + try self.genSetReg(param_regs[0], Type.usize, dst_mcv.address()); + const operand = try self.resolveInst(un_op); - _ = operand; - return self.fail("TODO implement airTagName for x86_64", .{}); - //return self.finishAir(inst, result, .{ un_op, .none, .none }); + try self.genSetReg(param_regs[1], enum_ty, operand); + + try self.genLazySymbolRef( + .call, + .rax, + link.File.LazySymbol.initDecl(.code, enum_ty.getOwnerDecl(), mod), + ); + + return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const err_ty = self.air.typeOf(un_op); @@ -8249,37 +8762,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { const addr_reg = try self.register_manager.allocReg(null, gp); const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_lock); - - if (self.bin_file.cast(link.File.Elf)) |elf_file| { - const atom_index = try elf_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const atom = elf_file.getAtom(atom_index); - _ = try atom.getOrCreateOffsetTableEntry(elf_file); - const got_addr = atom.getOffsetTableAddress(elf_file); - try self.asmRegisterMemory( - .mov, - addr_reg.to64(), - Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }), - ); - } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { - const atom_index = try coff_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); - } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const atom_index = try macho_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); - } else { - return self.fail("TODO implement airErrorName for x86_64 {s}", .{@tagName(self.bin_file.tag)}); - } + try self.genLazySymbolRef(.lea, addr_reg, link.File.LazySymbol.initDecl(.const_data, null, mod)); const start_reg = try self.register_manager.allocReg(null, gp); const start_lock = self.register_manager.lockRegAssumeUnused(start_reg); @@ -8596,12 +9079,7 @@ fn limitImmediateType(self: *Self, operand: Air.Inst.Ref, comptime T: type) !MCV } fn genTypedValue(self: *Self, arg_tv: TypedValue) InnerError!MCValue { - const mcv: MCValue = switch (try codegen.genTypedValue( - self.bin_file, - self.src_loc, - arg_tv, - self.mod_fn.owner_decl, - )) { + return switch (try codegen.genTypedValue(self.bin_file, self.src_loc, arg_tv, self.owner.getDecl())) { .mcv => |mcv| switch (mcv) { .none => .none, .undef => .undef, @@ -8616,7 +9094,6 @@ fn genTypedValue(self: *Self, arg_tv: TypedValue) InnerError!MCValue { return error.CodegenFail; }, }; - return mcv; } const CallMCValues = struct { @@ -8664,6 +9141,7 @@ fn resolveCallingConventionValues( }, .C => { var param_reg_i: usize = 0; + var param_sse_reg_i: usize = 0; result.stack_align = 16; switch (self.target.os.tag) { @@ -8681,26 +9159,39 @@ fn resolveCallingConventionValues( // TODO: is this even possible for C calling convention? result.return_value = InstTracking.init(.none); } else { - const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0]; - const ret_ty_size = @intCast(u31, ret_ty.abiSize(self.target.*)); - if (ret_ty_size <= 8) { - const aliased_reg = registerAlias(ret_reg, ret_ty_size); - result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none }; - } else { - const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i]; - param_reg_i += 1; - result.return_value = .{ - .short = .{ .indirect = .{ .reg = ret_reg } }, - .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, - }; + const classes = switch (self.target.os.tag) { + .windows => &[1]abi.Class{abi.classifyWindows(ret_ty, self.target.*)}, + else => mem.sliceTo(&abi.classifySystemV(ret_ty, self.target.*, .ret), .none), + }; + if (classes.len > 1) { + return self.fail("TODO handle multiple classes per type", .{}); } + const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0]; + result.return_value = switch (classes[0]) { + .integer => InstTracking.init(.{ .register = registerAlias( + ret_reg, + @intCast(u32, ret_ty.abiSize(self.target.*)), + ) }), + .float, .sse => InstTracking.init(.{ .register = .xmm0 }), + .memory => ret: { + const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i]; + param_reg_i += 1; + break :ret .{ + .short = .{ .indirect = .{ .reg = ret_reg } }, + .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, + }; + }, + else => |class| return self.fail("TODO handle calling convention class {s}", .{ + @tagName(class), + }), + }; } // Input params for (param_types, result.args) |ty, *arg| { assert(ty.hasRuntimeBitsIgnoreComptime()); - const classes: []const abi.Class = switch (self.target.os.tag) { + const classes = switch (self.target.os.tag) { .windows => &[1]abi.Class{abi.classifyWindows(ty, self.target.*)}, else => mem.sliceTo(&abi.classifySystemV(ty, self.target.*, .arg), .none), }; @@ -8708,13 +9199,29 @@ fn resolveCallingConventionValues( return self.fail("TODO handle multiple classes per type", .{}); } switch (classes[0]) { - .integer => blk: { - if (param_reg_i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk; - const param_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i]; + .integer => if (param_reg_i < abi.getCAbiIntParamRegs(self.target.*).len) { + arg.* = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i] }; param_reg_i += 1; - arg.* = .{ .register = param_reg }; continue; }, + .float, .sse => switch (self.target.os.tag) { + .windows => if (param_reg_i < 4) { + arg.* = .{ .register = @intToEnum( + Register, + @enumToInt(Register.xmm0) + param_reg_i, + ) }; + param_reg_i += 1; + continue; + }, + else => if (param_sse_reg_i < 8) { + arg.* = .{ .register = @intToEnum( + Register, + @enumToInt(Register.xmm0) + param_sse_reg_i, + ) }; + param_sse_reg_i += 1; + continue; + }, + }, .memory => {}, // fallthrough else => |class| return self.fail("TODO handle calling convention class {s}", .{ @tagName(class), @@ -8863,15 +9370,16 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { } fn regBitSize(self: *Self, ty: Type) u64 { + const abi_size = ty.abiSize(self.target.*); return switch (ty.zigTypeTag()) { - else => switch (ty.abiSize(self.target.*)) { + else => switch (abi_size) { 1 => 8, 2 => 16, 3...4 => 32, 5...8 => 64, else => unreachable, }, - .Float => switch (ty.abiSize(self.target.*)) { + .Float => switch (abi_size) { 1...16 => 128, 17...32 => 256, else => unreachable, @@ -8882,17 +9390,3 @@ fn regBitSize(self: *Self, ty: Type) u64 { fn regExtraBits(self: *Self, ty: Type) u64 { return self.regBitSize(ty) - ty.bitSize(self.target.*); } - -fn hasAvxSupport(target: Target) bool { - return Target.x86.featureSetHasAny(target.cpu.features, .{ .avx, .avx2 }); -} - -fn getSymbolIndexForDecl(self: *Self, decl_index: Module.Decl.Index) !u32 { - if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const atom = try macho_file.getOrCreateAtomForDecl(decl_index); - return macho_file.getAtom(atom).getSymbolIndex().?; - } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { - const atom = try coff_file.getOrCreateAtomForDecl(decl_index); - return coff_file.getAtom(atom).getSymbolIndex().?; - } else unreachable; -} diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index a977af7842..5cb7f7a2d9 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -58,7 +58,7 @@ pub fn findByMnemonic( next: for (mnemonic_to_encodings_map[@enumToInt(mnemonic)]) |data| { switch (data.mode) { .rex => if (!rex_required) continue, - .long, .sse2_long => {}, + .long, .sse_long, .sse2_long => {}, else => if (rex_required) continue, } for (input_ops, data.ops) |input_op, data_op| @@ -90,7 +90,7 @@ pub fn findByOpcode(opc: []const u8, prefixes: struct { if (prefixes.rex.w) { switch (data.mode) { .short, .fpu, .sse, .sse2, .sse4_1, .none => continue, - .long, .sse2_long, .rex => {}, + .long, .sse_long, .sse2_long, .rex => {}, } } else if (prefixes.rex.present and !prefixes.rex.isSet()) { switch (data.mode) { @@ -138,7 +138,7 @@ pub fn modRmExt(encoding: Encoding) u3 { pub fn operandBitSize(encoding: Encoding) u64 { switch (encoding.data.mode) { .short => return 16, - .long, .sse2_long => return 64, + .long, .sse_long, .sse2_long => return 64, else => {}, } const bit_size: u64 = switch (encoding.data.op_en) { @@ -163,7 +163,7 @@ pub fn format( _ = options; _ = fmt; switch (encoding.data.mode) { - .long, .sse2_long => try writer.writeAll("REX.W + "), + .long, .sse_long, .sse2_long => try writer.writeAll("REX.W + "), else => {}, } @@ -269,21 +269,25 @@ pub const Mnemonic = enum { // SSE addss, cmpss, + cvtsi2ss, divss, maxss, minss, movss, mulss, subss, ucomiss, + xorps, // SSE2 addsd, //cmpsd, + cvtsd2ss, cvtsi2sd, cvtss2sd, divsd, maxsd, minsd, movq, //movd, movsd, mulsd, subsd, ucomisd, + xorpd, // SSE4.1 roundss, roundsd, @@ -318,7 +322,7 @@ pub const Op = enum { m, moffs, sreg, - xmm, xmm_m32, xmm_m64, + xmm, xmm_m32, xmm_m64, xmm_m128, // zig fmt: on pub fn fromOperand(operand: Instruction.Operand) Op { @@ -400,7 +404,7 @@ pub const Op = enum { .imm32, .imm32s, .eax, .r32, .m32, .rm32, .rel32, .xmm_m32 => 32, .imm64, .rax, .r64, .m64, .rm64, .xmm_m64 => 64, .m80 => 80, - .m128, .xmm => 128, + .m128, .xmm, .xmm_m128 => 128, }; } @@ -423,8 +427,8 @@ pub const Op = enum { .al, .ax, .eax, .rax, .r8, .r16, .r32, .r64, .rm8, .rm16, .rm32, .rm64, - .xmm, .xmm_m32, .xmm_m64, - => true, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128, + => true, else => false, }; // zig fmt: on @@ -449,7 +453,7 @@ pub const Op = enum { .rm8, .rm16, .rm32, .rm64, .m8, .m16, .m32, .m64, .m80, .m128, .m, - .xmm_m32, .xmm_m64, + .xmm_m32, .xmm_m64, .xmm_m128, => true, else => false, }; @@ -470,13 +474,13 @@ pub const Op = enum { .r8, .r16, .r32, .r64 => .general_purpose, .rm8, .rm16, .rm32, .rm64 => .general_purpose, .sreg => .segment, - .xmm, .xmm_m32, .xmm_m64 => .floating_point, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point, }; } pub fn isFloatingPointRegister(op: Op) bool { return switch (op) { - .xmm, .xmm_m32, .xmm_m64 => true, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => true, else => false, }; } @@ -535,6 +539,7 @@ pub const Mode = enum { rex, long, sse, + sse_long, sse2, sse2_long, sse4_1, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index af0146c6e1..a961100687 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -95,6 +95,7 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .addss, .cmpss, + .cvtsi2ss, .divss, .maxss, .minss, @@ -103,8 +104,12 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .roundss, .subss, .ucomiss, + .xorps, .addsd, .cmpsd, + .cvtsd2ss, + .cvtsi2sd, + .cvtss2sd, .divsd, .maxsd, .minsd, @@ -113,6 +118,7 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .roundsd, .subsd, .ucomisd, + .xorpd, => try lower.mirGeneric(inst), .cmps, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index c8703373d2..c14338b13d 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -170,6 +170,8 @@ pub const Inst = struct { addss, /// Compare scalar single-precision floating-point values cmpss, + /// Convert doubleword integer to scalar single-precision floating-point value + cvtsi2ss, /// Divide scalar single-precision floating-point values divss, /// Return maximum single-precision floating-point value @@ -186,10 +188,18 @@ pub const Inst = struct { subss, /// Unordered compare scalar single-precision floating-point values ucomiss, + /// Bitwise logical xor of packed single precision floating-point values + xorps, /// Add double precision floating point values addsd, /// Compare scalar double-precision floating-point values cmpsd, + /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value + cvtsd2ss, + /// Convert doubleword integer to scalar double-precision floating-point value + cvtsi2sd, + /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value + cvtss2sd, /// Divide scalar double-precision floating-point values divsd, /// Return maximum double-precision floating-point value @@ -206,6 +216,8 @@ pub const Inst = struct { subsd, /// Unordered compare scalar double-precision floating-point values ucomisd, + /// Bitwise logical xor of packed double precision floating-point values + xorpd, /// Compare string operands cmps, diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 329dfca924..4c900697f5 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -323,7 +323,7 @@ pub const Instruction = struct { var rex = Rex{}; rex.present = inst.encoding.data.mode == .rex; switch (inst.encoding.data.mode) { - .long, .sse2_long => rex.w = true, + .long, .sse_long, .sse2_long => rex.w = true, else => {}, } diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 333bdceea8..ac427c3633 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -834,6 +834,9 @@ pub const table = [_]Entry{ .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .sse }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse_long }, + .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .sse }, .{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .sse }, @@ -849,11 +852,20 @@ pub const table = [_]Entry{ .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .sse }, + .{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .sse }, + // SSE2 .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .sse2 }, .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .sse2 }, + .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .sse2 }, + + .{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .sse2 }, + .{ .cvtsi2sd, .rm, &.{ .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .sse2_long }, + + .{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .sse2 }, + .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .sse2 }, .{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .sse2 }, @@ -878,6 +890,8 @@ pub const table = [_]Entry{ .{ .ucomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2e }, 0, .sse2 }, + .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .sse2 }, + // SSE4.1 .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .sse4_1 }, .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .sse4_1 }, diff --git a/src/codegen.zig b/src/codegen.zig index bf80a90cc3..078feb409d 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -7,6 +7,7 @@ const link = @import("link.zig"); const log = std.log.scoped(.codegen); const mem = std.mem; const math = std.math; +const target_util = @import("target.zig"); const trace = @import("tracy.zig").trace; const Air = @import("Air.zig"); @@ -89,25 +90,36 @@ pub fn generateFunction( } } +pub fn generateLazyFunction( + bin_file: *link.File, + src_loc: Module.SrcLoc, + lazy_sym: link.File.LazySymbol, + code: *std.ArrayList(u8), + debug_output: DebugInfoOutput, +) CodeGenError!Result { + switch (bin_file.options.target.cpu.arch) { + .x86_64 => return @import("arch/x86_64/CodeGen.zig").generateLazy(bin_file, src_loc, lazy_sym, code, debug_output), + else => unreachable, + } +} + fn writeFloat(comptime F: type, f: F, target: Target, endian: std.builtin.Endian, code: []u8) void { _ = target; - const Int = @Type(.{ .Int = .{ - .signedness = .unsigned, - .bits = @typeInfo(F).Float.bits, - } }); + const bits = @typeInfo(F).Float.bits; + const Int = @Type(.{ .Int = .{ .signedness = .unsigned, .bits = bits } }); const int = @bitCast(Int, f); - mem.writeInt(Int, code[0..@sizeOf(Int)], int, endian); + mem.writeInt(Int, code[0..@divExact(bits, 8)], int, endian); } pub fn generateLazySymbol( bin_file: *link.File, src_loc: Module.SrcLoc, lazy_sym: link.File.LazySymbol, + alignment: *u32, code: *std.ArrayList(u8), debug_output: DebugInfoOutput, reloc_info: RelocInfo, ) CodeGenError!Result { - _ = debug_output; _ = reloc_info; const tracy = trace(@src()); @@ -122,7 +134,13 @@ pub fn generateLazySymbol( lazy_sym.ty.fmt(mod), }); - if (lazy_sym.kind == .const_data and lazy_sym.ty.isAnyError()) { + if (lazy_sym.kind == .code) { + alignment.* = target_util.defaultFunctionAlignment(target); + return generateLazyFunction(bin_file, src_loc, lazy_sym, code, debug_output); + } + + if (lazy_sym.ty.isAnyError()) { + alignment.* = 4; const err_names = mod.error_name_list.items; mem.writeInt(u32, try code.addManyAsArray(4), @intCast(u32, err_names.len), endian); var offset = code.items.len; @@ -136,6 +154,14 @@ pub fn generateLazySymbol( } mem.writeInt(u32, code.items[offset..][0..4], @intCast(u32, code.items.len), endian); return Result.ok; + } else if (lazy_sym.ty.zigTypeTag() == .Enum) { + alignment.* = 1; + for (lazy_sym.ty.enumFields().keys()) |tag_name| { + try code.ensureUnusedCapacity(tag_name.len + 1); + code.appendSliceAssumeCapacity(tag_name); + code.appendAssumeCapacity(0); + } + return Result.ok; } else return .{ .fail = try ErrorMsg.create( bin_file.allocator, src_loc, @@ -187,18 +213,14 @@ pub fn generateSymbol( }; }, .Float => { - const float_bits = typed_value.ty.floatBits(target); - switch (float_bits) { + switch (typed_value.ty.floatBits(target)) { 16 => writeFloat(f16, typed_value.val.toFloat(f16), target, endian, try code.addManyAsArray(2)), 32 => writeFloat(f32, typed_value.val.toFloat(f32), target, endian, try code.addManyAsArray(4)), 64 => writeFloat(f64, typed_value.val.toFloat(f64), target, endian, try code.addManyAsArray(8)), - 80 => return Result{ - .fail = try ErrorMsg.create( - bin_file.allocator, - src_loc, - "TODO handle f80 in generateSymbol", - .{}, - ), + 80 => { + writeFloat(f80, typed_value.val.toFloat(f80), target, endian, try code.addManyAsArray(10)); + const abi_size = math.cast(usize, typed_value.ty.abiSize(target)) orelse return error.Overflow; + try code.appendNTimes(0, abi_size - 10); }, 128 => writeFloat(f128, typed_value.val.toFloat(f128), target, endian, try code.addManyAsArray(16)), else => unreachable, @@ -291,6 +313,20 @@ pub fn generateSymbol( }, }, .Pointer => switch (typed_value.val.tag()) { + .null_value => { + switch (target.cpu.arch.ptrBitWidth()) { + 32 => { + mem.writeInt(u32, try code.addManyAsArray(4), 0, endian); + if (typed_value.ty.isSlice()) try code.appendNTimes(0xaa, 4); + }, + 64 => { + mem.writeInt(u64, try code.addManyAsArray(8), 0, endian); + if (typed_value.ty.isSlice()) try code.appendNTimes(0xaa, 8); + }, + else => unreachable, + } + return Result.ok; + }, .zero, .one, .int_u64, .int_big_positive => { switch (target.cpu.arch.ptrBitWidth()) { 32 => { @@ -397,30 +433,15 @@ pub fn generateSymbol( }, } }, - .elem_ptr => { - const elem_ptr = typed_value.val.castTag(.elem_ptr).?.data; - const elem_size = typed_value.ty.childType().abiSize(target); - const addend = @intCast(u32, elem_ptr.index * elem_size); - const array_ptr = elem_ptr.array_ptr; - - switch (array_ptr.tag()) { - .decl_ref => { - const decl_index = array_ptr.castTag(.decl_ref).?.data; - return lowerDeclRef(bin_file, src_loc, typed_value, decl_index, code, debug_output, .{ - .parent_atom_index = reloc_info.parent_atom_index, - .addend = (reloc_info.addend orelse 0) + addend, - }); - }, - else => return Result{ - .fail = try ErrorMsg.create( - bin_file.allocator, - src_loc, - "TODO implement generateSymbol for pointer type value: '{s}'", - .{@tagName(typed_value.val.tag())}, - ), - }, - } - }, + .elem_ptr => return lowerParentPtr( + bin_file, + src_loc, + typed_value, + typed_value.val, + code, + debug_output, + reloc_info, + ), else => return Result{ .fail = try ErrorMsg.create( bin_file.allocator, @@ -838,9 +859,62 @@ pub fn generateSymbol( } } +fn lowerParentPtr( + bin_file: *link.File, + src_loc: Module.SrcLoc, + typed_value: TypedValue, + parent_ptr: Value, + code: *std.ArrayList(u8), + debug_output: DebugInfoOutput, + reloc_info: RelocInfo, +) CodeGenError!Result { + const target = bin_file.options.target; + + switch (parent_ptr.tag()) { + .elem_ptr => { + const elem_ptr = parent_ptr.castTag(.elem_ptr).?.data; + return lowerParentPtr( + bin_file, + src_loc, + typed_value, + elem_ptr.array_ptr, + code, + debug_output, + reloc_info.offset(@intCast(u32, elem_ptr.index * elem_ptr.elem_ty.abiSize(target))), + ); + }, + .decl_ref => { + const decl_index = parent_ptr.castTag(.decl_ref).?.data; + return lowerDeclRef( + bin_file, + src_loc, + typed_value, + decl_index, + code, + debug_output, + reloc_info, + ); + }, + else => |t| { + return Result{ + .fail = try ErrorMsg.create( + bin_file.allocator, + src_loc, + "TODO implement lowerParentPtr for type '{s}'", + .{@tagName(t)}, + ), + }; + }, + } +} + const RelocInfo = struct { parent_atom_index: u32, addend: ?u32 = null, + + fn offset(ri: RelocInfo, addend: u32) RelocInfo { + return .{ .parent_atom_index = ri.parent_atom_index, .addend = (ri.addend orelse 0) + addend }; + } }; fn lowerDeclRef( @@ -1095,6 +1169,9 @@ pub fn genTypedValue( .Slice => {}, else => { switch (typed_value.val.tag()) { + .null_value => { + return GenResult.mcv(.{ .immediate = 0 }); + }, .int_u64 => { return GenResult.mcv(.{ .immediate = typed_value.val.toUnsignedInt(target) }); }, diff --git a/src/link.zig b/src/link.zig index 672a53999f..74e3ca85fc 100644 --- a/src/link.zig +++ b/src/link.zig @@ -1120,8 +1120,8 @@ pub const File = struct { kind: Kind, ty: Type, - pub fn initDecl(kind: Kind, decl: Module.Decl.OptionalIndex, mod: *Module) LazySymbol { - return .{ .kind = kind, .ty = if (decl.unwrap()) |decl_index| + pub fn initDecl(kind: Kind, decl: ?Module.Decl.Index, mod: *Module) LazySymbol { + return .{ .kind = kind, .ty = if (decl) |decl_index| mod.declPtr(decl_index).val.castTag(.ty).?.data else Type.anyerror }; diff --git a/src/link/Coff.zig b/src/link/Coff.zig index 0af681bb5e..81e8c57bdd 100644 --- a/src/link/Coff.zig +++ b/src/link/Coff.zig @@ -143,9 +143,11 @@ const Section = struct { const LazySymbolTable = std.AutoArrayHashMapUnmanaged(Module.Decl.OptionalIndex, LazySymbolMetadata); const LazySymbolMetadata = struct { - text_atom: ?Atom.Index = null, - rdata_atom: ?Atom.Index = null, - alignment: u32, + const State = enum { unused, pending_flush, flushed }; + text_atom: Atom.Index = undefined, + rdata_atom: Atom.Index = undefined, + text_state: State = .unused, + rdata_state: State = .unused, }; const DeclMetadata = struct { @@ -1137,7 +1139,11 @@ pub fn lowerUnnamedConst(self: *Coff, tv: TypedValue, decl_index: Module.Decl.In return atom.getSymbolIndex().?; } -pub fn updateDecl(self: *Coff, module: *Module, decl_index: Module.Decl.Index) !void { +pub fn updateDecl( + self: *Coff, + module: *Module, + decl_index: Module.Decl.Index, +) link.File.UpdateDeclError!void { if (build_options.skip_non_native and builtin.object_format != .coff) { @panic("Attempted to compile for object format that was disabled by build configuration"); } @@ -1189,32 +1195,16 @@ pub fn updateDecl(self: *Coff, module: *Module, decl_index: Module.Decl.Index) ! return self.updateDeclExports(module, decl_index, module.getDeclExports(decl_index)); } -fn updateLazySymbol(self: *Coff, decl: Module.Decl.OptionalIndex, metadata: LazySymbolMetadata) !void { - const mod = self.base.options.module.?; - if (metadata.text_atom) |atom| try self.updateLazySymbolAtom( - link.File.LazySymbol.initDecl(.code, decl, mod), - atom, - self.text_section_index.?, - metadata.alignment, - ); - if (metadata.rdata_atom) |atom| try self.updateLazySymbolAtom( - link.File.LazySymbol.initDecl(.const_data, decl, mod), - atom, - self.rdata_section_index.?, - metadata.alignment, - ); -} - fn updateLazySymbolAtom( self: *Coff, sym: link.File.LazySymbol, atom_index: Atom.Index, section_index: u16, - required_alignment: u32, ) !void { const gpa = self.base.allocator; const mod = self.base.options.module.?; + var required_alignment: u32 = undefined; var code_buffer = std.ArrayList(u8).init(gpa); defer code_buffer.deinit(); @@ -1235,9 +1225,15 @@ fn updateLazySymbolAtom( .parent_decl_node = undefined, .lazy = .unneeded, }; - const res = try codegen.generateLazySymbol(&self.base, src, sym, &code_buffer, .none, .{ - .parent_atom_index = local_sym_index, - }); + const res = try codegen.generateLazySymbol( + &self.base, + src, + sym, + &required_alignment, + &code_buffer, + .none, + .{ .parent_atom_index = local_sym_index }, + ); const code = switch (res) { .ok => code_buffer.items, .fail => |em| { @@ -1265,20 +1261,27 @@ fn updateLazySymbolAtom( try self.writeAtom(atom_index, code); } -pub fn getOrCreateAtomForLazySymbol( - self: *Coff, - sym: link.File.LazySymbol, - alignment: u32, -) !Atom.Index { +pub fn getOrCreateAtomForLazySymbol(self: *Coff, sym: link.File.LazySymbol) !Atom.Index { const gop = try self.lazy_syms.getOrPut(self.base.allocator, sym.getDecl()); - errdefer _ = self.lazy_syms.pop(); - if (!gop.found_existing) gop.value_ptr.* = .{ .alignment = alignment }; - const atom = switch (sym.kind) { - .code => &gop.value_ptr.text_atom, - .const_data => &gop.value_ptr.rdata_atom, + errdefer _ = if (!gop.found_existing) self.lazy_syms.pop(); + if (!gop.found_existing) gop.value_ptr.* = .{}; + const metadata: struct { atom: *Atom.Index, state: *LazySymbolMetadata.State } = switch (sym.kind) { + .code => .{ .atom = &gop.value_ptr.text_atom, .state = &gop.value_ptr.text_state }, + .const_data => .{ .atom = &gop.value_ptr.rdata_atom, .state = &gop.value_ptr.rdata_state }, }; - if (atom.* == null) atom.* = try self.createAtom(); - return atom.*.?; + switch (metadata.state.*) { + .unused => metadata.atom.* = try self.createAtom(), + .pending_flush => return metadata.atom.*, + .flushed => {}, + } + metadata.state.* = .pending_flush; + const atom = metadata.atom.*; + // anyerror needs to be deferred until flushModule + if (sym.getDecl() != .none) try self.updateLazySymbolAtom(sym, atom, switch (sym.kind) { + .code => self.text_section_index.?, + .const_data => self.rdata_section_index.?, + }); + return atom; } pub fn getOrCreateAtomForDecl(self: *Coff, decl_index: Module.Decl.Index) !Atom.Index { @@ -1410,7 +1413,7 @@ pub fn updateDeclExports( module: *Module, decl_index: Module.Decl.Index, exports: []const *Module.Export, -) !void { +) link.File.UpdateDeclExportsError!void { if (build_options.skip_non_native and builtin.object_format != .coff) { @panic("Attempted to compile for object format that was disabled by build configuration"); } @@ -1605,16 +1608,34 @@ pub fn flushModule(self: *Coff, comp: *Compilation, prog_node: *std.Progress.Nod sub_prog_node.activate(); defer sub_prog_node.end(); - // Most lazy symbols can be updated when the corresponding decl is, - // so we only have to worry about the one without an associated decl. - if (self.lazy_syms.get(.none)) |metadata| { - self.updateLazySymbol(.none, metadata) catch |err| switch (err) { - error.CodegenFail => return error.FlushFailure, - else => |e| return e, + const gpa = self.base.allocator; + + const module = self.base.options.module orelse return error.LinkingWithoutZigSourceUnimplemented; + + if (self.lazy_syms.getPtr(.none)) |metadata| { + // Most lazy symbols can be updated on first use, but + // anyerror needs to wait for everything to be flushed. + if (metadata.text_state != .unused) self.updateLazySymbolAtom( + link.File.LazySymbol.initDecl(.code, null, module), + metadata.text_atom, + self.text_section_index.?, + ) catch |err| return switch (err) { + error.CodegenFail => error.FlushFailure, + else => |e| e, + }; + if (metadata.rdata_state != .unused) self.updateLazySymbolAtom( + link.File.LazySymbol.initDecl(.const_data, null, module), + metadata.rdata_atom, + self.rdata_section_index.?, + ) catch |err| return switch (err) { + error.CodegenFail => error.FlushFailure, + else => |e| e, }; } - - const gpa = self.base.allocator; + for (self.lazy_syms.values()) |*metadata| { + if (metadata.text_state != .unused) metadata.text_state = .flushed; + if (metadata.rdata_state != .unused) metadata.rdata_state = .flushed; + } while (self.unresolved.popOrNull()) |entry| { assert(entry.value); // We only expect imports generated by the incremental linker for now. diff --git a/src/link/Elf.zig b/src/link/Elf.zig index 48d952b6cc..724ec76500 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -65,9 +65,11 @@ const Section = struct { }; const LazySymbolMetadata = struct { - text_atom: ?Atom.Index = null, - rodata_atom: ?Atom.Index = null, - alignment: u32, + const State = enum { unused, pending_flush, flushed }; + text_atom: Atom.Index = undefined, + rodata_atom: Atom.Index = undefined, + text_state: State = .unused, + rodata_state: State = .unused, }; const DeclMetadata = struct { @@ -1033,19 +1035,35 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node sub_prog_node.activate(); defer sub_prog_node.end(); - // Most lazy symbols can be updated when the corresponding decl is, - // so we only have to worry about the one without an associated decl. - if (self.lazy_syms.get(.none)) |metadata| { - self.updateLazySymbol(.none, metadata) catch |err| switch (err) { - error.CodegenFail => return error.FlushFailure, - else => |e| return e, - }; - } - // TODO This linker code currently assumes there is only 1 compilation unit and it // corresponds to the Zig source code. const module = self.base.options.module orelse return error.LinkingWithoutZigSourceUnimplemented; + if (self.lazy_syms.getPtr(.none)) |metadata| { + // Most lazy symbols can be updated on first use, but + // anyerror needs to wait for everything to be flushed. + if (metadata.text_state != .unused) self.updateLazySymbolAtom( + File.LazySymbol.initDecl(.code, null, module), + metadata.text_atom, + self.text_section_index.?, + ) catch |err| return switch (err) { + error.CodegenFail => error.FlushFailure, + else => |e| e, + }; + if (metadata.rodata_state != .unused) self.updateLazySymbolAtom( + File.LazySymbol.initDecl(.const_data, null, module), + metadata.rodata_atom, + self.rodata_section_index.?, + ) catch |err| return switch (err) { + error.CodegenFail => error.FlushFailure, + else => |e| e, + }; + } + for (self.lazy_syms.values()) |*metadata| { + if (metadata.text_state != .unused) metadata.text_state = .flushed; + if (metadata.rodata_state != .unused) metadata.rodata_state = .flushed; + } + const target_endian = self.base.options.target.cpu.arch.endian(); const foreign_endian = target_endian != builtin.cpu.arch.endian(); @@ -2377,16 +2395,27 @@ pub fn freeDecl(self: *Elf, decl_index: Module.Decl.Index) void { } } -pub fn getOrCreateAtomForLazySymbol(self: *Elf, sym: File.LazySymbol, alignment: u32) !Atom.Index { +pub fn getOrCreateAtomForLazySymbol(self: *Elf, sym: File.LazySymbol) !Atom.Index { const gop = try self.lazy_syms.getOrPut(self.base.allocator, sym.getDecl()); - errdefer _ = self.lazy_syms.pop(); - if (!gop.found_existing) gop.value_ptr.* = .{ .alignment = alignment }; - const atom = switch (sym.kind) { - .code => &gop.value_ptr.text_atom, - .const_data => &gop.value_ptr.rodata_atom, + errdefer _ = if (!gop.found_existing) self.lazy_syms.pop(); + if (!gop.found_existing) gop.value_ptr.* = .{}; + const metadata: struct { atom: *Atom.Index, state: *LazySymbolMetadata.State } = switch (sym.kind) { + .code => .{ .atom = &gop.value_ptr.text_atom, .state = &gop.value_ptr.text_state }, + .const_data => .{ .atom = &gop.value_ptr.rodata_atom, .state = &gop.value_ptr.rodata_state }, }; - if (atom.* == null) atom.* = try self.createAtom(); - return atom.*.?; + switch (metadata.state.*) { + .unused => metadata.atom.* = try self.createAtom(), + .pending_flush => return metadata.atom.*, + .flushed => {}, + } + metadata.state.* = .pending_flush; + const atom = metadata.atom.*; + // anyerror needs to be deferred until flushModule + if (sym.getDecl() != .none) try self.updateLazySymbolAtom(sym, atom, switch (sym.kind) { + .code => self.text_section_index.?, + .const_data => self.rodata_section_index.?, + }); + return atom; } pub fn getOrCreateAtomForDecl(self: *Elf, decl_index: Module.Decl.Index) !Atom.Index { @@ -2580,7 +2609,11 @@ pub fn updateFunc(self: *Elf, module: *Module, func: *Module.Fn, air: Air, liven return self.updateDeclExports(module, decl_index, module.getDeclExports(decl_index)); } -pub fn updateDecl(self: *Elf, module: *Module, decl_index: Module.Decl.Index) !void { +pub fn updateDecl( + self: *Elf, + module: *Module, + decl_index: Module.Decl.Index, +) File.UpdateDeclError!void { if (build_options.skip_non_native and builtin.object_format != .elf) { @panic("Attempted to compile for object format that was disabled by build configuration"); } @@ -2657,32 +2690,16 @@ pub fn updateDecl(self: *Elf, module: *Module, decl_index: Module.Decl.Index) !v return self.updateDeclExports(module, decl_index, module.getDeclExports(decl_index)); } -fn updateLazySymbol(self: *Elf, decl: Module.Decl.OptionalIndex, metadata: LazySymbolMetadata) !void { - const mod = self.base.options.module.?; - if (metadata.text_atom) |atom| try self.updateLazySymbolAtom( - File.LazySymbol.initDecl(.code, decl, mod), - atom, - self.text_section_index.?, - metadata.alignment, - ); - if (metadata.rodata_atom) |atom| try self.updateLazySymbolAtom( - File.LazySymbol.initDecl(.const_data, decl, mod), - atom, - self.rodata_section_index.?, - metadata.alignment, - ); -} - fn updateLazySymbolAtom( self: *Elf, sym: File.LazySymbol, atom_index: Atom.Index, shdr_index: u16, - required_alignment: u32, ) !void { const gpa = self.base.allocator; const mod = self.base.options.module.?; + var required_alignment: u32 = undefined; var code_buffer = std.ArrayList(u8).init(gpa); defer code_buffer.deinit(); @@ -2707,9 +2724,15 @@ fn updateLazySymbolAtom( .parent_decl_node = undefined, .lazy = .unneeded, }; - const res = try codegen.generateLazySymbol(&self.base, src, sym, &code_buffer, .none, .{ - .parent_atom_index = local_sym_index, - }); + const res = try codegen.generateLazySymbol( + &self.base, + src, + sym, + &required_alignment, + &code_buffer, + .none, + .{ .parent_atom_index = local_sym_index }, + ); const code = switch (res) { .ok => code_buffer.items, .fail => |em| { @@ -2814,7 +2837,7 @@ pub fn updateDeclExports( module: *Module, decl_index: Module.Decl.Index, exports: []const *Module.Export, -) !void { +) File.UpdateDeclExportsError!void { if (build_options.skip_non_native and builtin.object_format != .elf) { @panic("Attempted to compile for object format that was disabled by build configuration"); } diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 21633dea64..a346ec756f 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -236,9 +236,11 @@ const is_hot_update_compatible = switch (builtin.target.os.tag) { const LazySymbolTable = std.AutoArrayHashMapUnmanaged(Module.Decl.OptionalIndex, LazySymbolMetadata); const LazySymbolMetadata = struct { - text_atom: ?Atom.Index = null, - data_const_atom: ?Atom.Index = null, - alignment: u32, + const State = enum { unused, pending_flush, flushed }; + text_atom: Atom.Index = undefined, + data_const_atom: Atom.Index = undefined, + text_state: State = .unused, + data_const_state: State = .unused, }; const TlvSymbolTable = std.AutoArrayHashMapUnmanaged(SymbolWithLoc, Atom.Index); @@ -494,16 +496,32 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No sub_prog_node.activate(); defer sub_prog_node.end(); - // Most lazy symbols can be updated when the corresponding decl is, - // so we only have to worry about the one without an associated decl. - if (self.lazy_syms.get(.none)) |metadata| { - self.updateLazySymbol(.none, metadata) catch |err| switch (err) { - error.CodegenFail => return error.FlushFailure, - else => |e| return e, + const module = self.base.options.module orelse return error.LinkingWithoutZigSourceUnimplemented; + + if (self.lazy_syms.getPtr(.none)) |metadata| { + // Most lazy symbols can be updated on first use, but + // anyerror needs to wait for everything to be flushed. + if (metadata.text_state != .unused) self.updateLazySymbolAtom( + File.LazySymbol.initDecl(.code, null, module), + metadata.text_atom, + self.text_section_index.?, + ) catch |err| return switch (err) { + error.CodegenFail => error.FlushFailure, + else => |e| e, + }; + if (metadata.data_const_state != .unused) self.updateLazySymbolAtom( + File.LazySymbol.initDecl(.const_data, null, module), + metadata.data_const_atom, + self.data_const_section_index.?, + ) catch |err| return switch (err) { + error.CodegenFail => error.FlushFailure, + else => |e| e, }; } - - const module = self.base.options.module orelse return error.LinkingWithoutZigSourceUnimplemented; + for (self.lazy_syms.values()) |*metadata| { + if (metadata.text_state != .unused) metadata.text_state = .flushed; + if (metadata.data_const_state != .unused) metadata.data_const_state = .flushed; + } if (self.d_sym) |*d_sym| { try d_sym.dwarf.flushModule(module); @@ -2037,32 +2055,16 @@ pub fn updateDecl(self: *MachO, module: *Module, decl_index: Module.Decl.Index) try self.updateDeclExports(module, decl_index, module.getDeclExports(decl_index)); } -fn updateLazySymbol(self: *MachO, decl: Module.Decl.OptionalIndex, metadata: LazySymbolMetadata) !void { - const mod = self.base.options.module.?; - if (metadata.text_atom) |atom| try self.updateLazySymbolAtom( - File.LazySymbol.initDecl(.code, decl, mod), - atom, - self.text_section_index.?, - metadata.alignment, - ); - if (metadata.data_const_atom) |atom| try self.updateLazySymbolAtom( - File.LazySymbol.initDecl(.const_data, decl, mod), - atom, - self.data_const_section_index.?, - metadata.alignment, - ); -} - fn updateLazySymbolAtom( self: *MachO, sym: File.LazySymbol, atom_index: Atom.Index, section_index: u8, - required_alignment: u32, ) !void { const gpa = self.base.allocator; const mod = self.base.options.module.?; + var required_alignment: u32 = undefined; var code_buffer = std.ArrayList(u8).init(gpa); defer code_buffer.deinit(); @@ -2087,9 +2089,15 @@ fn updateLazySymbolAtom( .parent_decl_node = undefined, .lazy = .unneeded, }; - const res = try codegen.generateLazySymbol(&self.base, src, sym, &code_buffer, .none, .{ - .parent_atom_index = local_sym_index, - }); + const res = try codegen.generateLazySymbol( + &self.base, + src, + sym, + &required_alignment, + &code_buffer, + .none, + .{ .parent_atom_index = local_sym_index }, + ); const code = switch (res) { .ok => code_buffer.items, .fail => |em| { @@ -2108,7 +2116,7 @@ fn updateLazySymbolAtom( errdefer self.freeAtom(atom_index); log.debug("allocated atom for {s} at 0x{x}", .{ name, vaddr }); - log.debug(" (required alignment 0x{x}", .{required_alignment}); + log.debug(" (required alignment 0x{x})", .{required_alignment}); atom.size = code.len; symbol.n_value = vaddr; @@ -2117,16 +2125,30 @@ fn updateLazySymbolAtom( try self.writeAtom(atom_index, code); } -pub fn getOrCreateAtomForLazySymbol(self: *MachO, sym: File.LazySymbol, alignment: u32) !Atom.Index { +pub fn getOrCreateAtomForLazySymbol(self: *MachO, sym: File.LazySymbol) !Atom.Index { const gop = try self.lazy_syms.getOrPut(self.base.allocator, sym.getDecl()); - errdefer _ = self.lazy_syms.pop(); - if (!gop.found_existing) gop.value_ptr.* = .{ .alignment = alignment }; - const atom = switch (sym.kind) { - .code => &gop.value_ptr.text_atom, - .const_data => &gop.value_ptr.data_const_atom, + errdefer _ = if (!gop.found_existing) self.lazy_syms.pop(); + if (!gop.found_existing) gop.value_ptr.* = .{}; + const metadata: struct { atom: *Atom.Index, state: *LazySymbolMetadata.State } = switch (sym.kind) { + .code => .{ .atom = &gop.value_ptr.text_atom, .state = &gop.value_ptr.text_state }, + .const_data => .{ + .atom = &gop.value_ptr.data_const_atom, + .state = &gop.value_ptr.data_const_state, + }, }; - if (atom.* == null) atom.* = try self.createAtom(); - return atom.*.?; + switch (metadata.state.*) { + .unused => metadata.atom.* = try self.createAtom(), + .pending_flush => return metadata.atom.*, + .flushed => {}, + } + metadata.state.* = .pending_flush; + const atom = metadata.atom.*; + // anyerror needs to be deferred until flushModule + if (sym.getDecl() != .none) try self.updateLazySymbolAtom(sym, atom, switch (sym.kind) { + .code => self.text_section_index.?, + .const_data => self.data_const_section_index.?, + }); + return atom; } fn updateThreadlocalVariable(self: *MachO, module: *Module, decl_index: Module.Decl.Index) !void { @@ -2357,7 +2379,7 @@ pub fn updateDeclExports( module: *Module, decl_index: Module.Decl.Index, exports: []const *Module.Export, -) !void { +) File.UpdateDeclExportsError!void { if (build_options.skip_non_native and builtin.object_format != .macho) { @panic("Attempted to compile for object format that was disabled by build configuration"); } @@ -4147,9 +4169,6 @@ pub fn logSymtab(self: *MachO) void { log.debug("stubs entries:", .{}); log.debug("{}", .{self.stub_table}); - - // log.debug("threadlocal entries:", .{}); - // log.debug("{}", .{self.tlv_table}); } pub fn logAtoms(self: *MachO) void { @@ -4189,6 +4208,6 @@ pub fn logAtom(self: *MachO, atom_index: Atom.Index) void { sym.n_value, atom.size, atom.file, - sym.n_sect, + sym.n_sect + 1, }); } diff --git a/src/print_air.zig b/src/print_air.zig index 2d7995842f..d90d31ec67 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -94,14 +94,20 @@ const Writer = struct { for (w.air.instructions.items(.tag), 0..) |tag, i| { const inst = @intCast(Air.Inst.Index, i); switch (tag) { - .constant, .const_ty => try w.writeInst(s, inst), + .constant, .const_ty => { + try w.writeInst(s, inst); + try s.writeByte('\n'); + }, else => continue, } } } fn writeBody(w: *Writer, s: anytype, body: []const Air.Inst.Index) @TypeOf(s).Error!void { - for (body) |inst| try w.writeInst(s, inst); + for (body) |inst| { + try w.writeInst(s, inst); + try s.writeByte('\n'); + } } fn writeInst(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void { @@ -336,7 +342,7 @@ const Writer = struct { .work_group_id, => try w.writeWorkDimension(s, inst), } - try s.writeAll(")\n"); + try s.writeByte(')'); } fn writeBinOp(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void { |
