diff options
Diffstat (limited to 'src/codegen')
| -rw-r--r-- | src/codegen/x86_64.zig | 728 |
1 files changed, 402 insertions, 326 deletions
diff --git a/src/codegen/x86_64.zig b/src/codegen/x86_64.zig index 745d1b13cf..dd0b74d46a 100644 --- a/src/codegen/x86_64.zig +++ b/src/codegen/x86_64.zig @@ -3,6 +3,7 @@ const testing = std.testing; const mem = std.mem; const assert = std.debug.assert; const ArrayList = std.ArrayList; +const Allocator = std.mem.Allocator; const Type = @import("../Type.zig"); const DW = std.dwarf; @@ -145,51 +146,57 @@ pub const callee_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8 pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx }; -/// Represents an unencoded x86 instruction. +/// Encoding helper functions for x86_64 instructions /// -/// Roughly based on the table headings at http://ref.x86asm.net/coder64.html -pub const Instruction = struct { - /// Opcode prefix, needed for certain rare ops (e.g. MOVSS) - opcode_prefix: ?u8 = null, - - /// One-byte primary opcode - primary_opcode_1b: ?u8 = null, - /// Two-byte primary opcode (always prefixed with 0f) - primary_opcode_2b: ?u8 = null, - // TODO: Support 3-byte opcodes - - /// Secondary opcode - secondary_opcode: ?u8 = null, - - /// Opcode extension (to be placed in the ModR/M byte in place of reg) - opcode_extension: ?u3 = null, - - /// Legacy prefixes to use with this instruction - /// Most of the time, this field will be 0 and no prefixes are added. - /// Otherwise, a prefix will be added for each field set. - legacy_prefixes: LegacyPrefixes = .{}, - - /// 64-bit operand size - operand_size_64: bool = false, - - /// The opcode-reg field, - /// stored in the 3 least significant bits of the opcode - /// on certain instructions + REX if extended - opcode_reg: ?Register = null, - - /// The reg field - reg: ?Register = null, - /// The mod + r/m field - modrm: ?ModrmEffectiveAddress = null, - /// Location of the 3rd operand, if applicable - sib: ?SibEffectiveAddress = null, - - /// Number of bytes of immediate - immediate_bytes: u8 = 0, - /// The value of the immediate - immediate: u64 = 0, - - /// See legacy_prefixes +/// Many of these helpers do very little, but they can help make things +/// slightly more readable with more descriptive field names / function names. +/// +/// Some of them also have asserts to ensure that we aren't doing dumb things. +/// For example, trying to use register 4 (esp) in an indirect modr/m byte is illegal, +/// you need to encode it with an SIB byte. +/// +/// Note that ALL of these helper functions will assume capacity, +/// so ensure that the `code` has sufficient capacity before using them. +/// The `init` method is the recommended way to ensure capacity. +pub const Encoder = struct { + /// Non-owning reference to the code array + code: *ArrayList(u8), + + const Self = @This(); + + /// Wrap `code` in Encoder to make it easier to call these helper functions + /// + /// maximum_inst_size should contain the maximum number of bytes + /// that the encoded instruction will take. + /// This is because the helper functions will assume capacity + /// in order to avoid bounds checking. + pub fn init(code: *ArrayList(u8), maximum_inst_size: u8) !Self { + try code.ensureCapacity(code.items.len + maximum_inst_size); + return Self{ .code = code }; + } + + /// Directly write a number to the code array with big endianness + pub fn writeIntBig(self: Self, comptime T: type, value: T) void { + mem.writeIntBig( + T, + self.code.addManyAsArrayAssumeCapacity(@divExact(@typeInfo(T).Int.bits, 8)), + value, + ); + } + + /// Directly write a number to the code array with little endianness + pub fn writeIntLittle(self: Self, comptime T: type, value: T) void { + mem.writeIntLittle( + T, + self.code.addManyAsArrayAssumeCapacity(@divExact(@typeInfo(T).Int.bits, 8)), + value, + ); + } + + // -------- + // Prefixes + // -------- + pub const LegacyPrefixes = packed struct { /// LOCK prefix_f0: bool = false, @@ -212,322 +219,391 @@ pub const Instruction = struct { /// Branch taken prefix_3e: bool = false, - /// Operand size override + /// Operand size override (enables 16 bit operation) prefix_66: bool = false, - /// Address size override + /// Address size override (enables 16 bit address size) prefix_67: bool = false, padding: u5 = 0, }; - /// Encodes an effective address for the Mod + R/M part of the ModR/M byte - /// - /// Note that depending on the instruction, not all effective addresses are allowed. - /// - /// Examples: - /// eax: .reg = .eax - /// [eax]: .mem = .eax - /// [eax + 8]: .mem_disp = .{ .reg = .eax, .disp = 8 } - /// [eax - 8]: .mem_disp = .{ .reg = .eax, .disp = -8 } - /// [55]: .disp32 = 55 - pub const ModrmEffectiveAddress = union(enum) { - reg: Register, - mem: Register, - mem_disp: struct { - reg: Register, - disp: i32, - }, - disp32: u32, - - pub fn isExtended(self: @This()) bool { - return switch (self) { - .reg => |reg| reg.isExtended(), - .mem => |memea| memea.isExtended(), - .mem_disp => |mem_disp| mem_disp.reg.isExtended(), - .disp32 => false, - }; - } - }; - - /// Encodes an effective address for the SIB byte - /// - /// Note that depending on the instruction, not all effective addresses are allowed. - /// - /// Examples: - /// [eax + ebx * 2]: .base_index = .{ .base = .eax, .index = .ebx, .scale = 2 } - /// [eax]: .base_index = .{ .base = .eax, .index = null, .scale = 1 } - /// [ebx * 2 + 256]: .index_disp = .{ .index = .ebx, .scale = 2, .disp = 256 } - /// [[ebp] + ebx * 2 + 8]: .ebp_index_disp = .{ .index = .ebx, .scale = 2, .disp = 8 } - pub const SibEffectiveAddress = union(enum) { - base_index: struct { - base: Register, - index: ?Register, - scale: u8, // 1, 2, 4, or 8 - }, - index_disp: struct { - index: ?Register, - scale: u8, // 1, 2, 4, or 8 - disp: u32, - }, - ebp_index_disp: struct { - index: ?Register, - scale: u8, // 1, 2, 4, or 8 - disp: u32, - }, - - pub fn baseIsExtended(self: @This()) bool { - return switch (self) { - .base_index => |base_index| base_index.base.isExtended(), - .index_disp, .ebp_index_disp => false, - }; - } - - pub fn indexIsExtended(self: @This()) bool { - return switch (self) { - .base_index => |base_index| if (base_index.index) |idx| idx.isExtended() else false, - .index_disp => |index_disp| if (index_disp.index) |idx| idx.isExtended() else false, - .ebp_index_disp => |ebp_index_disp| if (ebp_index_disp.index) |idx| idx.isExtended() else false, - }; - } - }; - - /// Writes the encoded Instruction to the code ArrayList - pub fn encodeInto(inst: Instruction, code: *ArrayList(u8)) !void { - // We need to write the following, in that order: - // - Legacy prefixes (0 to 13 bytes) - // - REX prefix (0 to 1 byte) - // - Opcode (1, 2, or 3 bytes) - // - ModR/M (0 or 1 byte) - // - SIB (0 or 1 byte) - // - Displacement (0, 1, 2, or 4 bytes) - // - Immediate (0, 1, 2, 4, or 8 bytes) - - // By this calculation, an instruction could be up to 31 bytes long (will probably not happen) - try code.ensureCapacity(code.items.len + 31); - - // Legacy prefixes - if (@bitCast(u16, inst.legacy_prefixes) != 0) { + /// Encodes legacy prefixes + pub fn legacyPrefixes(self: Self, prefixes: LegacyPrefixes) void { + if (@bitCast(u16, prefixes) != 0) { // Hopefully this path isn't taken very often, so we'll do it the slow way for now // LOCK - if (inst.legacy_prefixes.prefix_f0) code.appendAssumeCapacity(0xf0); + if (prefixes.prefix_f0) self.code.appendAssumeCapacity(0xf0); // REPNZ, REPNE, REP, Scalar Double-precision - if (inst.legacy_prefixes.prefix_f2) code.appendAssumeCapacity(0xf2); + if (prefixes.prefix_f2) self.code.appendAssumeCapacity(0xf2); // REPZ, REPE, REP, Scalar Single-precision - if (inst.legacy_prefixes.prefix_f3) code.appendAssumeCapacity(0xf3); + if (prefixes.prefix_f3) self.code.appendAssumeCapacity(0xf3); // CS segment override or Branch not taken - if (inst.legacy_prefixes.prefix_2e) code.appendAssumeCapacity(0x2e); + if (prefixes.prefix_2e) self.code.appendAssumeCapacity(0x2e); // DS segment override - if (inst.legacy_prefixes.prefix_36) code.appendAssumeCapacity(0x36); + if (prefixes.prefix_36) self.code.appendAssumeCapacity(0x36); // ES segment override - if (inst.legacy_prefixes.prefix_26) code.appendAssumeCapacity(0x26); + if (prefixes.prefix_26) self.code.appendAssumeCapacity(0x26); // FS segment override - if (inst.legacy_prefixes.prefix_64) code.appendAssumeCapacity(0x64); + if (prefixes.prefix_64) self.code.appendAssumeCapacity(0x64); // GS segment override - if (inst.legacy_prefixes.prefix_65) code.appendAssumeCapacity(0x65); + if (prefixes.prefix_65) self.code.appendAssumeCapacity(0x65); // Branch taken - if (inst.legacy_prefixes.prefix_3e) code.appendAssumeCapacity(0x3e); + if (prefixes.prefix_3e) self.code.appendAssumeCapacity(0x3e); // Operand size override - if (inst.legacy_prefixes.prefix_66) code.appendAssumeCapacity(0x66); + if (prefixes.prefix_66) self.code.appendAssumeCapacity(0x66); // Address size override - if (inst.legacy_prefixes.prefix_67) code.appendAssumeCapacity(0x67); + if (prefixes.prefix_67) self.code.appendAssumeCapacity(0x67); } + } - // REX prefix - // - // A REX prefix has the following form: - // 0b0100_WRXB - // 0100: fixed bits - // W: stands for "wide", indicates that the instruction uses 64-bit operands. - // R, X, and B each contain the 4th bit of a register - // these have to be set when using registers 8-15. - // R: stands for "reg", extends the reg field in the ModR/M byte. - // X: stands for "index", extends the index field in the SIB byte. - // B: stands for "base", extends either the r/m field in the ModR/M byte, - // the base field in the SIB byte, - // or the opcode reg field in the Opcode byte. - { - var value: u8 = 0x40; - if (inst.opcode_reg) |opcode_reg| { - if (opcode_reg.isExtended()) { - value |= 0x1; - } - } - if (inst.modrm) |modrm| { - if (modrm.isExtended()) { - value |= 0x1; - } - } - if (inst.sib) |sib| { - if (sib.baseIsExtended()) { - value |= 0x1; - } - if (sib.indexIsExtended()) { - value |= 0x2; - } - } - if (inst.reg) |reg| { - if (reg.isExtended()) { - value |= 0x4; - } - } - if (inst.operand_size_64) { - value |= 0x8; - } - if (value != 0x40) { - code.appendAssumeCapacity(value); - } - } + /// Use 16 bit operand size + /// + /// Note that this flag is overridden by REX.W, if both are present. + pub fn prefix16BitMode(self: Self) void { + self.code.appendAssumeCapacity(0x66); + } - // Opcode - if (inst.primary_opcode_1b) |opcode| { - var value = opcode; - if (inst.opcode_reg) |opcode_reg| { - value |= opcode_reg.low_id(); - } - code.appendAssumeCapacity(value); - } else if (inst.primary_opcode_2b) |opcode| { - code.appendAssumeCapacity(0x0f); - var value = opcode; - if (inst.opcode_reg) |opcode_reg| { - value |= opcode_reg.low_id(); - } - code.appendAssumeCapacity(value); - } + /// From section 2.2.1.2 of the manual, REX is encoded as b0100WRXB + pub const Rex = struct { + /// Wide, enables 64-bit operation + w: bool = false, + /// Extends the reg field in the ModR/M byte + r: bool = false, + /// Extends the index field in the SIB byte + x: bool = false, + /// Extends the r/m field in the ModR/M byte, + /// or the base field in the SIB byte, + /// or the reg field in the Opcode byte + b: bool = false, + }; - var disp8: ?u8 = null; - var disp16: ?u16 = null; - var disp32: ?u32 = null; - - // ModR/M - // - // Example ModR/M byte: - // c7: ModR/M byte that contains: - // 11 000 111: - // ^ ^ ^ - // mod | | - // reg | - // r/m - // where mod = 11 indicates that both operands are registers, - // reg = 000 indicates that the first operand is register EAX - // r/m = 111 indicates that the second operand is register EDI (since mod = 11) - if (inst.modrm != null or inst.reg != null or inst.opcode_extension != null) { - var value: u8 = 0; - - // mod + rm - if (inst.modrm) |modrm| { - switch (modrm) { - .reg => |reg| { - value |= reg.low_id(); - value |= 0b11_000_000; - }, - .mem => |memea| { - assert(memea.low_id() != 4 and memea.low_id() != 5); - value |= memea.low_id(); - // value |= 0b00_000_000; - }, - .mem_disp => |mem_disp| { - assert(mem_disp.reg.low_id() != 4); - value |= mem_disp.reg.low_id(); - if (mem_disp.disp < 128) { - // Use 1 byte of displacement - value |= 0b01_000_000; - disp8 = @bitCast(u8, @intCast(i8, mem_disp.disp)); - } else { - // Use all 4 bytes of displacement - value |= 0b10_000_000; - disp32 = @bitCast(u32, mem_disp.disp); - } - }, - .disp32 => |d| { - value |= 0b00_000_101; - disp32 = d; - }, - } - } - - // reg - if (inst.reg) |reg| { - value |= @as(u8, reg.low_id()) << 3; - } else if (inst.opcode_extension) |ext| { - value |= @as(u8, ext) << 3; - } - - code.appendAssumeCapacity(value); - } + /// Encodes a REX prefix byte given all the fields + /// + /// Use this byte whenever you need 64 bit operation, + /// or one of reg, index, r/m, base, or opcode-reg might be extended. + /// + /// See struct `Rex` for a description of each field. + /// + /// Does not add a prefix byte if none of the fields are set! + pub fn rex(self: Self, byte: Rex) void { + var value: u8 = 0b0100_0000; - // SIB - { - if (inst.sib) |sib| { - return error.TODOSIBByteForX8664; - } - } + if (byte.w) value |= 0b1000; + if (byte.r) value |= 0b0100; + if (byte.x) value |= 0b0010; + if (byte.b) value |= 0b0001; - // Displacement - // - // The size of the displacement depends on the instruction used and is very fragile. - // The bytes are simply written in LE order. - { - - // These writes won't fail because we ensured capacity earlier. - if (disp8) |d| - code.appendAssumeCapacity(d) - else if (disp16) |d| - mem.writeIntLittle(u16, code.addManyAsArrayAssumeCapacity(2), d) - else if (disp32) |d| - mem.writeIntLittle(u32, code.addManyAsArrayAssumeCapacity(4), d); + if (value != 0b0100_0000) { + self.code.appendAssumeCapacity(value); } + } - // Immediate - // - // The size of the immediate depends on the instruction used and is very fragile. - // The bytes are simply written in LE order. - { - // These writes won't fail because we ensured capacity earlier. - if (inst.immediate_bytes == 1) - code.appendAssumeCapacity(@intCast(u8, inst.immediate)) - else if (inst.immediate_bytes == 2) - mem.writeIntLittle(u16, code.addManyAsArrayAssumeCapacity(2), @intCast(u16, inst.immediate)) - else if (inst.immediate_bytes == 4) - mem.writeIntLittle(u32, code.addManyAsArrayAssumeCapacity(4), @intCast(u32, inst.immediate)) - else if (inst.immediate_bytes == 8) - mem.writeIntLittle(u64, code.addManyAsArrayAssumeCapacity(8), inst.immediate); - } + // ------ + // Opcode + // ------ + + /// Encodes a 1 byte opcode + pub fn opcode_1byte(self: Self, opcode: u8) void { + self.code.appendAssumeCapacity(opcode); + } + + /// Encodes a 2 byte opcode + /// + /// e.g. IMUL has the opcode 0x0f 0xaf, so you use + /// + /// encoder.opcode_2byte(0x0f, 0xaf); + pub fn opcode_2byte(self: Self, prefix: u8, opcode: u8) void { + self.code.appendAssumeCapacity(prefix); + self.code.appendAssumeCapacity(opcode); + } + + /// Encodes a 1 byte opcode with a reg field + /// + /// Remember to add a REX prefix byte if reg is extended! + pub fn opcode_withReg(self: Self, opcode: u8, reg: u3) void { + assert(opcode & 0b111 == 0); + self.code.appendAssumeCapacity(opcode | reg); + } + + // ------ + // ModR/M + // ------ + + /// Construct a ModR/M byte given all the fields + /// + /// Remember to add a REX prefix byte if reg or rm are extended! + pub fn modRm(self: Self, mod: u2, reg_or_opx: u3, rm: u3) void { + self.code.appendAssumeCapacity( + @as(u8, mod) << 6 | @as(u8, reg_or_opx) << 3 | rm, + ); + } + + /// Construct a ModR/M byte using direct r/m addressing + /// r/m effective address: r/m + /// + /// Note reg's effective address is always just reg for the ModR/M byte. + /// Remember to add a REX prefix byte if reg or rm are extended! + pub fn modRm_direct(self: Self, reg_or_opx: u3, rm: u3) void { + self.modRm(0b11, reg_or_opx, rm); + } + + /// Construct a ModR/M byte using indirect r/m addressing + /// r/m effective address: [r/m] + /// + /// Note reg's effective address is always just reg for the ModR/M byte. + /// Remember to add a REX prefix byte if reg or rm are extended! + pub fn modRm_indirectDisp0(self: Self, reg_or_opx: u3, rm: u3) void { + assert(rm != 4 and rm != 5); + self.modRm(0b00, reg_or_opx, rm); + } + + /// Construct a ModR/M byte using indirect SIB addressing + /// r/m effective address: [SIB] + /// + /// Note reg's effective address is always just reg for the ModR/M byte. + /// Remember to add a REX prefix byte if reg or rm are extended! + pub fn modRm_SIBDisp0(self: Self, reg_or_opx: u3) void { + self.modRm(0b00, reg_or_opx, 0b100); + } + + /// Construct a ModR/M byte using RIP-relative addressing + /// r/m effective address: [RIP + disp32] + /// + /// Note reg's effective address is always just reg for the ModR/M byte. + /// Remember to add a REX prefix byte if reg or rm are extended! + pub fn modRm_RIPDisp32(self: Self, reg_or_opx: u3) void { + self.modRm(0b00, reg_or_opx, 0b101); + } + + /// Construct a ModR/M byte using indirect r/m with a 8bit displacement + /// r/m effective address: [r/m + disp8] + /// + /// Note reg's effective address is always just reg for the ModR/M byte. + /// Remember to add a REX prefix byte if reg or rm are extended! + pub fn modRm_indirectDisp8(self: Self, reg_or_opx: u3, rm: u3) void { + assert(rm != 4); + self.modRm(0b01, reg_or_opx, rm); + } + + /// Construct a ModR/M byte using indirect SIB with a 8bit displacement + /// r/m effective address: [SIB + disp8] + /// + /// Note reg's effective address is always just reg for the ModR/M byte. + /// Remember to add a REX prefix byte if reg or rm are extended! + pub fn modRm_SIBDisp8(self: Self, reg_or_opx: u3) void { + self.modRm(0b01, reg_or_opx, 0b100); + } + + /// Construct a ModR/M byte using indirect r/m with a 32bit displacement + /// r/m effective address: [r/m + disp32] + /// + /// Note reg's effective address is always just reg for the ModR/M byte. + /// Remember to add a REX prefix byte if reg or rm are extended! + pub fn modRm_indirectDisp32(self: Self, reg_or_opx: u3, rm: u3) void { + assert(rm != 4); + self.modRm(0b10, reg_or_opx, rm); + } + + /// Construct a ModR/M byte using indirect SIB with a 32bit displacement + /// r/m effective address: [SIB + disp32] + /// + /// Note reg's effective address is always just reg for the ModR/M byte. + /// Remember to add a REX prefix byte if reg or rm are extended! + pub fn modRm_SIBDisp32(self: Self, reg_or_opx: u3) void { + self.modRm(0b10, reg_or_opx, 0b100); + } + + // --- + // SIB + // --- + + /// Construct a SIB byte given all the fields + /// + /// Remember to add a REX prefix byte if index or base are extended! + pub fn sib(self: Self, scale: u2, index: u3, base: u3) void { + self.code.appendAssumeCapacity( + @as(u8, scale) << 6 | @as(u8, index) << 3 | base, + ); + } + + /// Construct a SIB byte with scale * index + base, no frills. + /// r/m effective address: [base + scale * index] + /// + /// Remember to add a REX prefix byte if index or base are extended! + pub fn sib_scaleIndexBase(self: Self, scale: u2, index: u3, base: u3) void { + assert(base != 5); + + self.sib(scale, index, base); + } + + /// Construct a SIB byte with scale * index + disp32 + /// r/m effective address: [scale * index + disp32] + /// + /// Remember to add a REX prefix byte if index or base are extended! + pub fn sib_scaleIndexDisp32(self: Self, scale: u2, index: u3) void { + assert(index != 4); + + // scale is actually ignored + // index = 4 means no index + // base = 5 means no base, if mod == 0. + self.sib(scale, index, 5); + } + + /// Construct a SIB byte with just base + /// r/m effective address: [base] + /// + /// Remember to add a REX prefix byte if index or base are extended! + pub fn sib_base(self: Self, base: u3) void { + assert(base != 5); + + // scale is actually ignored + // index = 4 means no index + self.sib(0, 4, base); + } + + /// Construct a SIB byte with just disp32 + /// r/m effective address: [disp32] + /// + /// Remember to add a REX prefix byte if index or base are extended! + pub fn sib_disp32(self: Self) void { + // scale is actually ignored + // index = 4 means no index + // base = 5 means no base, if mod == 0. + self.sib(0, 4, 5); + } + + /// Construct a SIB byte with scale * index + base + disp8 + /// r/m effective address: [base + scale * index + disp8] + /// + /// Remember to add a REX prefix byte if index or base are extended! + pub fn sib_scaleIndexBaseDisp8(self: Self, scale: u2, index: u3, base: u3) void { + self.sib(scale, index, base); + } + + /// Construct a SIB byte with base + disp8, no index + /// r/m effective address: [base + disp8] + /// + /// Remember to add a REX prefix byte if index or base are extended! + pub fn sib_baseDisp8(self: Self, base: u3) void { + // scale is ignored + // index = 4 means no index + self.sib(0, 4, base); + } + + /// Construct a SIB byte with scale * index + base + disp32 + /// r/m effective address: [base + scale * index + disp32] + /// + /// Remember to add a REX prefix byte if index or base are extended! + pub fn sib_scaleIndexBaseDisp32(self: Self, scale: u2, index: u3, base: u3) void { + self.sib(scale, index, base); + } + + /// Construct a SIB byte with base + disp32, no index + /// r/m effective address: [base + disp32] + /// + /// Remember to add a REX prefix byte if index or base are extended! + pub fn sib_baseDisp32(self: Self, base: u3) void { + // scale is ignored + // index = 4 means no index + self.sib(0, 4, base); + } + + // ------------------------- + // Trivial (no bit fiddling) + // ------------------------- + + /// Encode an 8 bit immediate + /// + /// It is sign-extended to 64 bits by the cpu. + pub fn imm8(self: Self, imm: i8) void { + self.code.appendAssumeCapacity(@bitCast(u8, imm)); + } + + /// Encode an 8 bit displacement + /// + /// It is sign-extended to 64 bits by the cpu. + pub fn disp8(self: Self, disp: i8) void { + self.code.appendAssumeCapacity(@bitCast(u8, disp)); + } + + /// Encode an 16 bit immediate + /// + /// It is sign-extended to 64 bits by the cpu. + pub fn imm16(self: Self, imm: i16) void { + self.writeIntLittle(i16, imm); + } + + /// Encode an 32 bit immediate + /// + /// It is sign-extended to 64 bits by the cpu. + pub fn imm32(self: Self, imm: i32) void { + self.writeIntLittle(i32, imm); + } + + /// Encode an 32 bit displacement + /// + /// It is sign-extended to 64 bits by the cpu. + pub fn disp32(self: Self, disp: i32) void { + self.writeIntLittle(i32, disp); + } + + /// Encode an 64 bit immediate + /// + /// It is sign-extended to 64 bits by the cpu. + pub fn imm64(self: Self, imm: u64) void { + self.writeIntLittle(u64, imm); } }; -fn expectEncoded(inst: Instruction, expected: []const u8) !void { +test "x86_64 Encoder helpers" { var code = ArrayList(u8).init(testing.allocator); defer code.deinit(); - try inst.encodeInto(&code); - testing.expectEqualSlices(u8, expected, code.items); -} -test "x86_64 Instruction.encodeInto" { // simple integer multiplication // imul eax,edi // 0faf c7 - try expectEncoded(Instruction{ - .primary_opcode_2b = 0xaf, // imul - .reg = .eax, // destination - .modrm = .{ .reg = .edi }, // source - }, &[_]u8{ 0x0f, 0xaf, 0xc7 }); + { + try code.resize(0); + const encoder = try Encoder.init(&code, 4); + encoder.rex(.{ + .r = Register.eax.isExtended(), + .b = Register.edi.isExtended(), + }); + encoder.opcode_2byte(0x0f, 0xaf); + encoder.modRm_direct( + Register.eax.low_id(), + Register.edi.low_id(), + ); + + testing.expectEqualSlices(u8, &[_]u8{ 0x0f, 0xaf, 0xc7 }, code.items); + } // simple mov // mov eax,edi // 89 f8 - try expectEncoded(Instruction{ - .primary_opcode_1b = 0x89, // mov (with rm as destination) - .reg = .edi, // source - .modrm = .{ .reg = .eax }, // destination - }, &[_]u8{ 0x89, 0xf8 }); + { + try code.resize(0); + const encoder = try Encoder.init(&code, 3); + encoder.rex(.{ + .r = Register.edi.isExtended(), + .b = Register.eax.isExtended(), + }); + encoder.opcode_1byte(0x89); + encoder.modRm_direct( + Register.edi.low_id(), + Register.eax.low_id(), + ); + + testing.expectEqualSlices(u8, &[_]u8{ 0x89, 0xf8 }, code.items); + } // signed integer addition of 32-bit sign extended immediate to 64 bit register @@ -542,19 +618,19 @@ test "x86_64 Instruction.encodeInto" { // : 000 <-- opcode_extension = 0 because opcode extension is /0. /0 specifies ADD // : 001 <-- 001 is rcx // ffffff7f : 2147483647 - try expectEncoded(Instruction{ - // REX.W + - .operand_size_64 = true, - // 81 - .primary_opcode_1b = 0x81, - // /0 - .opcode_extension = 0, - // rcx - .modrm = .{ .reg = .rcx }, - // immediate - .immediate_bytes = 4, - .immediate = 2147483647, - }, &[_]u8{ 0x48, 0x81, 0xc1, 0xff, 0xff, 0xff, 0x7f }); + { + try code.resize(0); + const encoder = try Encoder.init(&code, 7); + encoder.rex(.{ .w = true }); // use 64 bit operation + encoder.opcode_1byte(0x81); + encoder.modRm_direct( + 0, + Register.rcx.low_id(), + ); + encoder.imm32(2147483647); + + testing.expectEqualSlices(u8, &[_]u8{ 0x48, 0x81, 0xc1, 0xff, 0xff, 0xff, 0x7f }, code.items); + } } // TODO add these registers to the enum and populate dwarfLocOp |
