diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2023-03-12 08:41:44 +0100 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2023-03-12 08:41:44 +0100 |
| commit | 707a74655be9fd702cb1be84baa719b29435fcf7 (patch) | |
| tree | 5a01bc7f9c6e93e8b505ecadd463e316c7045e59 /src/arch/x86_64/encoder.zig | |
| parent | 433558a92f005d3ad68528c62ffd6006c48b80bd (diff) | |
| download | zig-707a74655be9fd702cb1be84baa719b29435fcf7.tar.gz zig-707a74655be9fd702cb1be84baa719b29435fcf7.zip | |
x86_64: downstream encoder/assembler tests
Diffstat (limited to 'src/arch/x86_64/encoder.zig')
| -rw-r--r-- | src/arch/x86_64/encoder.zig | 1489 |
1 files changed, 1489 insertions, 0 deletions
diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 9206b621bc..7e29f95069 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -2,6 +2,7 @@ const std = @import("std"); const assert = std.debug.assert; const log = std.log.scoped(.x86_64_encoder); const math = std.math; +const testing = std.testing; const bits = @import("bits.zig"); const Encoding = @import("Encoding.zig"); @@ -784,3 +785,1491 @@ pub const Rex = struct { return rex.w or rex.r or rex.x or rex.b; } }; + +// Tests +fn expectEqualHexStrings(expected: []const u8, given: []const u8, assembly: []const u8) !void { + assert(expected.len > 0); + if (std.mem.eql(u8, expected, given)) return; + const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(expected)}); + defer testing.allocator.free(expected_fmt); + const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(given)}); + defer testing.allocator.free(given_fmt); + const idx = std.mem.indexOfDiff(u8, expected_fmt, given_fmt).?; + var padding = try testing.allocator.alloc(u8, idx + 5); + defer testing.allocator.free(padding); + std.mem.set(u8, padding, ' '); + std.debug.print("\nASM: {s}\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{ + assembly, + expected_fmt, + given_fmt, + padding, + }); + return error.TestFailed; +} + +const TestEncode = struct { + buffer: [32]u8 = undefined, + index: usize = 0, + + fn encode(enc: *TestEncode, mnemonic: Instruction.Mnemonic, args: struct { + op1: Instruction.Operand = .none, + op2: Instruction.Operand = .none, + op3: Instruction.Operand = .none, + op4: Instruction.Operand = .none, + }) !void { + var stream = std.io.fixedBufferStream(&enc.buffer); + var count_writer = std.io.countingWriter(stream.writer()); + const inst = try Instruction.new(mnemonic, .{ + .op1 = args.op1, + .op2 = args.op2, + .op3 = args.op3, + .op4 = args.op4, + }); + try inst.encode(count_writer.writer()); + enc.index = count_writer.bytes_written; + } + + fn code(enc: TestEncode) []const u8 { + return enc.buffer[0..enc.index]; + } +}; + +test "encode" { + var buf = std.ArrayList(u8).init(testing.allocator); + defer buf.deinit(); + + const inst = try Instruction.new(.mov, .{ + .op1 = .{ .reg = .rbx }, + .op2 = .{ .imm = Immediate.u(4) }, + }); + try inst.encode(buf.writer()); + try testing.expectEqualSlices(u8, &.{ 0x48, 0xc7, 0xc3, 0x4, 0x0, 0x0, 0x0 }, buf.items); +} + +test "lower I encoding" { + var enc = TestEncode{}; + + try enc.encode(.push, .{ .op1 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x6A\x10", enc.code(), "push 0x10"); + + try enc.encode(.push, .{ .op1 = .{ .imm = Immediate.u(0x1000) } }); + try expectEqualHexStrings("\x66\x68\x00\x10", enc.code(), "push 0x1000"); + + try enc.encode(.push, .{ .op1 = .{ .imm = Immediate.u(0x10000000) } }); + try expectEqualHexStrings("\x68\x00\x00\x00\x10", enc.code(), "push 0x10000000"); + + try enc.encode(.adc, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x10000000) } }); + try expectEqualHexStrings("\x48\x15\x00\x00\x00\x10", enc.code(), "adc rax, 0x10000000"); + + try enc.encode(.add, .{ .op1 = .{ .reg = .al }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x04\x10", enc.code(), "add al, 0x10"); + + try enc.encode(.add, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x48\x83\xC0\x10", enc.code(), "add rax, 0x10"); + + try enc.encode(.sbb, .{ .op1 = .{ .reg = .ax }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x66\x1D\x10\x00", enc.code(), "sbb ax, 0x10"); + + try enc.encode(.xor, .{ .op1 = .{ .reg = .al }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x34\x10", enc.code(), "xor al, 0x10"); +} + +test "lower MI encoding" { + var enc = TestEncode{}; + + try enc.encode(.mov, .{ .op1 = .{ .reg = .r12 }, .op2 = .{ .imm = Immediate.u(0x1000) } }); + try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.byte, .{ + .base = .r12, + .disp = 0, + }) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x41\xC6\x04\x24\x10", enc.code(), "mov BYTE PTR [r12], 0x10"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .r12 }, .op2 = .{ .imm = Immediate.u(0x1000) } }); + try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .r12 }, .op2 = .{ .imm = Immediate.u(0x1000) } }); + try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x48\xc7\xc0\x10\x00\x00\x00", enc.code(), "mov rax, 0x10"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.dword, .{ + .base = .r11, + .disp = 0, + }) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x41\xc7\x03\x10\x00\x00\x00", enc.code(), "mov DWORD PTR [r11], 0x10"); + + try enc.encode(.mov, .{ + .op1 = .{ .mem = Memory.rip(.qword, 0x10) }, + .op2 = .{ .imm = Immediate.u(0x10) }, + }); + try expectEqualHexStrings( + "\x48\xC7\x05\x10\x00\x00\x00\x10\x00\x00\x00", + enc.code(), + "mov QWORD PTR [rip + 0x10], 0x10", + ); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = .rbp, + .disp = -8, + }) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x48\xc7\x45\xf8\x10\x00\x00\x00", enc.code(), "mov QWORD PTR [rbp - 8], 0x10"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.word, .{ + .base = .rbp, + .disp = -2, + }) }, .op2 = .{ .imm = Immediate.s(-16) } }); + try expectEqualHexStrings("\x66\xC7\x45\xFE\xF0\xFF", enc.code(), "mov WORD PTR [rbp - 2], -16"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.byte, .{ + .base = .rbp, + .disp = -1, + }) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\xC6\x45\xFF\x10", enc.code(), "mov BYTE PTR [rbp - 1], 0x10"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = .ds, + .disp = 0x10000000, + .scale_index = .{ + .scale = 2, + .index = .rcx, + }, + }) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings( + "\x48\xC7\x04\x4D\x00\x00\x00\x10\x10\x00\x00\x00", + enc.code(), + "mov QWORD PTR [rcx*2 + 0x10000000], 0x10", + ); + + try enc.encode(.adc, .{ .op1 = .{ .mem = Memory.sib(.byte, .{ + .base = .rbp, + .disp = -0x10, + }) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x80\x55\xF0\x10", enc.code(), "adc BYTE PTR [rbp - 0x10], 0x10"); + + try enc.encode(.adc, .{ .op1 = .{ .mem = Memory.rip(.qword, 0) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x48\x83\x15\x00\x00\x00\x00\x10", enc.code(), "adc QWORD PTR [rip], 0x10"); + + try enc.encode(.adc, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x48\x83\xD0\x10", enc.code(), "adc rax, 0x10"); + + try enc.encode(.add, .{ .op1 = .{ .mem = Memory.sib(.dword, .{ + .base = .rdx, + .disp = -8, + }) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x83\x42\xF8\x10", enc.code(), "add DWORD PTR [rdx - 8], 0x10"); + + try enc.encode(.add, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x48\x83\xC0\x10", enc.code(), "add rax, 0x10"); + + try enc.encode(.add, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = .rbp, + .disp = -0x10, + }) }, .op2 = .{ .imm = Immediate.s(-0x10) } }); + try expectEqualHexStrings("\x48\x83\x45\xF0\xF0", enc.code(), "add QWORD PTR [rbp - 0x10], -0x10"); + + try enc.encode(.@"and", .{ .op1 = .{ .mem = Memory.sib(.dword, .{ + .base = .ds, + .disp = 0x10000000, + }) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings( + "\x83\x24\x25\x00\x00\x00\x10\x10", + enc.code(), + "and DWORD PTR ds:0x10000000, 0x10", + ); + + try enc.encode(.@"and", .{ .op1 = .{ .mem = Memory.sib(.dword, .{ + .base = .es, + .disp = 0x10000000, + }) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings( + "\x26\x83\x24\x25\x00\x00\x00\x10\x10", + enc.code(), + "and DWORD PTR es:0x10000000, 0x10", + ); + + try enc.encode(.@"and", .{ .op1 = .{ .mem = Memory.sib(.dword, .{ + .base = .r12, + .disp = 0x10000000, + }) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings( + "\x41\x83\xA4\x24\x00\x00\x00\x10\x10", + enc.code(), + "and DWORD PTR [r12 + 0x10000000], 0x10", + ); + + try enc.encode(.sub, .{ .op1 = .{ .mem = Memory.sib(.dword, .{ + .base = .r11, + .disp = 0x10000000, + }) }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings( + "\x41\x83\xAB\x00\x00\x00\x10\x10", + enc.code(), + "sub DWORD PTR [r11 + 0x10000000], 0x10", + ); +} + +test "lower RM encoding" { + var enc = TestEncode{}; + + try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.sib(.qword, .{ + .base = .r11, + .disp = 0, + }) } }); + try expectEqualHexStrings("\x49\x8b\x03", enc.code(), "mov rax, QWORD PTR [r11]"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .rbx }, .op2 = .{ .mem = Memory.sib(.qword, .{ + .base = .ds, + .disp = 0x10, + }) } }); + try expectEqualHexStrings("\x48\x8B\x1C\x25\x10\x00\x00\x00", enc.code(), "mov rbx, QWORD PTR ds:0x10"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.sib(.qword, .{ + .base = .rbp, + .disp = -4, + }) } }); + try expectEqualHexStrings("\x48\x8B\x45\xFC", enc.code(), "mov rax, QWORD PTR [rbp - 4]"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.sib(.qword, .{ + .base = .rbp, + .scale_index = .{ + .scale = 1, + .index = .rcx, + }, + .disp = -8, + }) } }); + try expectEqualHexStrings("\x48\x8B\x44\x0D\xF8", enc.code(), "mov rax, QWORD PTR [rbp + rcx*1 - 8]"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.sib(.dword, .{ + .base = .rbp, + .scale_index = .{ + .scale = 4, + .index = .rdx, + }, + .disp = -4, + }) } }); + try expectEqualHexStrings("\x8B\x44\x95\xFC", enc.code(), "mov eax, dword ptr [rbp + rdx*4 - 4]"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.sib(.qword, .{ + .base = .rbp, + .scale_index = .{ + .scale = 8, + .index = .rcx, + }, + .disp = -8, + }) } }); + try expectEqualHexStrings("\x48\x8B\x44\xCD\xF8", enc.code(), "mov rax, QWORD PTR [rbp + rcx*8 - 8]"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .r8b }, .op2 = .{ .mem = Memory.sib(.byte, .{ + .base = .rsi, + .scale_index = .{ + .scale = 1, + .index = .rcx, + }, + .disp = -24, + }) } }); + try expectEqualHexStrings("\x44\x8A\x44\x0E\xE8", enc.code(), "mov r8b, BYTE PTR [rsi + rcx*1 - 24]"); + + // TODO this mnemonic needs cleanup as some prefixes are obsolete. + try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .reg = .cs } }); + try expectEqualHexStrings("\x48\x8C\xC8", enc.code(), "mov rax, cs"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = .rbp, + .disp = -16, + }) }, .op2 = .{ .reg = .fs } }); + try expectEqualHexStrings("\x48\x8C\x65\xF0", enc.code(), "mov QWORD PTR [rbp - 16], fs"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .r12w }, .op2 = .{ .reg = .cs } }); + try expectEqualHexStrings("\x66\x41\x8C\xCC", enc.code(), "mov r12w, cs"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.word, .{ + .base = .rbp, + .disp = -16, + }) }, .op2 = .{ .reg = .fs } }); + try expectEqualHexStrings("\x66\x8C\x65\xF0", enc.code(), "mov WORD PTR [rbp - 16], fs"); + + try enc.encode(.movsx, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .reg = .bx } }); + try expectEqualHexStrings("\x0F\xBF\xC3", enc.code(), "movsx eax, bx"); + + try enc.encode(.movsx, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .reg = .bl } }); + try expectEqualHexStrings("\x0F\xBE\xC3", enc.code(), "movsx eax, bl"); + + try enc.encode(.movsx, .{ .op1 = .{ .reg = .ax }, .op2 = .{ .reg = .bl } }); + try expectEqualHexStrings("\x66\x0F\xBE\xC3", enc.code(), "movsx ax, bl"); + + try enc.encode(.movsx, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.sib(.word, .{ + .base = .rbp, + .disp = 0, + }) } }); + try expectEqualHexStrings("\x0F\xBF\x45\x00", enc.code(), "movsx eax, BYTE PTR [rbp]"); + + try enc.encode(.movsx, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.sib(.byte, .{ + .base = null, + .scale_index = .{ + .index = .rax, + .scale = 2, + }, + .disp = 0, + }) } }); + try expectEqualHexStrings("\x0F\xBE\x04\x45\x00\x00\x00\x00", enc.code(), "movsx eax, BYTE PTR [rax * 2]"); + + try enc.encode(.movsx, .{ .op1 = .{ .reg = .ax }, .op2 = .{ .mem = Memory.rip(.byte, 0x10) } }); + try expectEqualHexStrings("\x66\x0F\xBE\x05\x10\x00\x00\x00", enc.code(), "movsx ax, BYTE PTR [rip + 0x10]"); + + try enc.encode(.movsx, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .reg = .bx } }); + try expectEqualHexStrings("\x48\x0F\xBF\xC3", enc.code(), "movsx rax, bx"); + + try enc.encode(.movsxd, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .reg = .ebx } }); + try expectEqualHexStrings("\x48\x63\xC3", enc.code(), "movsxd rax, ebx"); + + try enc.encode(.lea, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.rip(.qword, 0x10) } }); + try expectEqualHexStrings("\x48\x8D\x05\x10\x00\x00\x00", enc.code(), "lea rax, QWORD PTR [rip + 0x10]"); + + try enc.encode(.lea, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.rip(.dword, 0x10) } }); + try expectEqualHexStrings("\x48\x8D\x05\x10\x00\x00\x00", enc.code(), "lea rax, DWORD PTR [rip + 0x10]"); + + try enc.encode(.lea, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.rip(.dword, 0x10) } }); + try expectEqualHexStrings("\x8D\x05\x10\x00\x00\x00", enc.code(), "lea eax, DWORD PTR [rip + 0x10]"); + + try enc.encode(.lea, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.rip(.word, 0x10) } }); + try expectEqualHexStrings("\x8D\x05\x10\x00\x00\x00", enc.code(), "lea eax, WORD PTR [rip + 0x10]"); + + try enc.encode(.lea, .{ .op1 = .{ .reg = .ax }, .op2 = .{ .mem = Memory.rip(.byte, 0x10) } }); + try expectEqualHexStrings("\x66\x8D\x05\x10\x00\x00\x00", enc.code(), "lea ax, BYTE PTR [rip + 0x10]"); + + try enc.encode(.lea, .{ .op1 = .{ .reg = .rsi }, .op2 = .{ .mem = Memory.sib(.qword, .{ + .base = .rbp, + .scale_index = .{ + .scale = 1, + .index = .rcx, + }, + .disp = 0, + }) } }); + try expectEqualHexStrings("\x48\x8D\x74\x0D\x00", enc.code(), "lea rsi, QWORD PTR [rbp + rcx*1 + 0]"); + + try enc.encode(.add, .{ .op1 = .{ .reg = .r11 }, .op2 = .{ .mem = Memory.sib(.qword, .{ + .base = .ds, + .disp = 0x10000000, + }) } }); + try expectEqualHexStrings("\x4C\x03\x1C\x25\x00\x00\x00\x10", enc.code(), "add r11, QWORD PTR ds:0x10000000"); + + try enc.encode(.add, .{ .op1 = .{ .reg = .r12b }, .op2 = .{ .mem = Memory.sib(.byte, .{ + .base = .ds, + .disp = 0x10000000, + }) } }); + try expectEqualHexStrings("\x44\x02\x24\x25\x00\x00\x00\x10", enc.code(), "add r11b, BYTE PTR ds:0x10000000"); + + try enc.encode(.add, .{ .op1 = .{ .reg = .r12b }, .op2 = .{ .mem = Memory.sib(.byte, .{ + .base = .fs, + .disp = 0x10000000, + }) } }); + try expectEqualHexStrings("\x64\x44\x02\x24\x25\x00\x00\x00\x10", enc.code(), "add r11b, BYTE PTR fs:0x10000000"); + + try enc.encode(.sub, .{ .op1 = .{ .reg = .r11 }, .op2 = .{ .mem = Memory.sib(.qword, .{ + .base = .r13, + .disp = 0x10000000, + }) } }); + try expectEqualHexStrings("\x4D\x2B\x9D\x00\x00\x00\x10", enc.code(), "sub r11, QWORD PTR [r13 + 0x10000000]"); + + try enc.encode(.sub, .{ .op1 = .{ .reg = .r11 }, .op2 = .{ .mem = Memory.sib(.qword, .{ + .base = .r12, + .disp = 0x10000000, + }) } }); + try expectEqualHexStrings("\x4D\x2B\x9C\x24\x00\x00\x00\x10", enc.code(), "sub r11, QWORD PTR [r12 + 0x10000000]"); + + try enc.encode(.imul, .{ .op1 = .{ .reg = .r11 }, .op2 = .{ .reg = .r12 } }); + try expectEqualHexStrings("\x4D\x0F\xAF\xDC", enc.code(), "mov r11, r12"); +} + +test "lower RMI encoding" { + var enc = TestEncode{}; + + try enc.encode(.imul, .{ + .op1 = .{ .reg = .r11 }, + .op2 = .{ .reg = .r12 }, + .op3 = .{ .imm = Immediate.s(-2) }, + }); + try expectEqualHexStrings("\x4D\x6B\xDC\xFE", enc.code(), "imul r11, r12, -2"); + + try enc.encode(.imul, .{ + .op1 = .{ .reg = .r11 }, + .op2 = .{ .mem = Memory.rip(.qword, -16) }, + .op3 = .{ .imm = Immediate.s(-1024) }, + }); + try expectEqualHexStrings( + "\x4C\x69\x1D\xF0\xFF\xFF\xFF\x00\xFC\xFF\xFF", + enc.code(), + "imul r11, QWORD PTR [rip - 16], -1024", + ); + + try enc.encode(.imul, .{ + .op1 = .{ .reg = .bx }, + .op2 = .{ .mem = Memory.sib(.word, .{ + .base = .rbp, + .disp = -16, + }) }, + .op3 = .{ .imm = Immediate.s(-1024) }, + }); + try expectEqualHexStrings( + "\x66\x69\x5D\xF0\x00\xFC", + enc.code(), + "imul bx, WORD PTR [rbp - 16], -1024", + ); + + try enc.encode(.imul, .{ + .op1 = .{ .reg = .bx }, + .op2 = .{ .mem = Memory.sib(.word, .{ + .base = .rbp, + .disp = -16, + }) }, + .op3 = .{ .imm = Immediate.u(1024) }, + }); + try expectEqualHexStrings( + "\x66\x69\x5D\xF0\x00\x04", + enc.code(), + "imul bx, WORD PTR [rbp - 16], 1024", + ); +} + +test "lower MR encoding" { + var enc = TestEncode{}; + + try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .reg = .rbx } }); + try expectEqualHexStrings("\x48\x89\xD8", enc.code(), "mov rax, rbx"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = .rbp, + .disp = -4, + }) }, .op2 = .{ .reg = .r11 } }); + try expectEqualHexStrings("\x4c\x89\x5d\xfc", enc.code(), "mov QWORD PTR [rbp - 4], r11"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.rip(.qword, 0x10) }, .op2 = .{ .reg = .r12 } }); + try expectEqualHexStrings("\x4C\x89\x25\x10\x00\x00\x00", enc.code(), "mov QWORD PTR [rip + 0x10], r12"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = .r11, + .scale_index = .{ + .scale = 2, + .index = .r12, + }, + .disp = 0x10, + }) }, .op2 = .{ .reg = .r13 } }); + try expectEqualHexStrings("\x4F\x89\x6C\x63\x10", enc.code(), "mov QWORD PTR [r11 + 2 * r12 + 0x10], r13"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.rip(.word, -0x10) }, .op2 = .{ .reg = .r12w } }); + try expectEqualHexStrings("\x66\x44\x89\x25\xF0\xFF\xFF\xFF", enc.code(), "mov WORD PTR [rip - 0x10], r12w"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.byte, .{ + .base = .r11, + .scale_index = .{ + .scale = 2, + .index = .r12, + }, + .disp = 0x10, + }) }, .op2 = .{ .reg = .r13b } }); + try expectEqualHexStrings("\x47\x88\x6C\x63\x10", enc.code(), "mov BYTE PTR [r11 + 2 * r12 + 0x10], r13b"); + + try enc.encode(.add, .{ .op1 = .{ .mem = Memory.sib(.byte, .{ + .base = .ds, + .disp = 0x10000000, + }) }, .op2 = .{ .reg = .r12b } }); + try expectEqualHexStrings("\x44\x00\x24\x25\x00\x00\x00\x10", enc.code(), "add BYTE PTR ds:0x10000000, r12b"); + + try enc.encode(.add, .{ .op1 = .{ .mem = Memory.sib(.dword, .{ + .base = .ds, + .disp = 0x10000000, + }) }, .op2 = .{ .reg = .r12d } }); + try expectEqualHexStrings("\x44\x01\x24\x25\x00\x00\x00\x10", enc.code(), "add DWORD PTR [ds:0x10000000], r12d"); + + try enc.encode(.add, .{ .op1 = .{ .mem = Memory.sib(.dword, .{ + .base = .gs, + .disp = 0x10000000, + }) }, .op2 = .{ .reg = .r12d } }); + try expectEqualHexStrings("\x65\x44\x01\x24\x25\x00\x00\x00\x10", enc.code(), "add DWORD PTR [gs:0x10000000], r12d"); + + try enc.encode(.sub, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = .r11, + .disp = 0x10000000, + }) }, .op2 = .{ .reg = .r12 } }); + try expectEqualHexStrings("\x4D\x29\xA3\x00\x00\x00\x10", enc.code(), "sub QWORD PTR [r11 + 0x10000000], r12"); +} + +test "lower M encoding" { + var enc = TestEncode{}; + + try enc.encode(.call, .{ .op1 = .{ .reg = .r12 } }); + try expectEqualHexStrings("\x41\xFF\xD4", enc.code(), "call r12"); + + try enc.encode(.call, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = .r12, + .disp = 0, + }) } }); + try expectEqualHexStrings("\x41\xFF\x14\x24", enc.code(), "call QWORD PTR [r12]"); + + try enc.encode(.call, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = null, + .scale_index = .{ + .index = .r11, + .scale = 2, + }, + .disp = 0, + }) } }); + try expectEqualHexStrings("\x42\xFF\x14\x5D\x00\x00\x00\x00", enc.code(), "call QWORD PTR [r11 * 2]"); + + try enc.encode(.call, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = null, + .scale_index = .{ + .index = .r12, + .scale = 2, + }, + .disp = 0, + }) } }); + try expectEqualHexStrings("\x42\xFF\x14\x65\x00\x00\x00\x00", enc.code(), "call QWORD PTR [r12 * 2]"); + + try enc.encode(.call, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = .gs, + .disp = 0, + }) } }); + try expectEqualHexStrings("\x65\xFF\x14\x25\x00\x00\x00\x00", enc.code(), "call gs:0x0"); + + try enc.encode(.call, .{ .op1 = .{ .imm = Immediate.s(0) } }); + try expectEqualHexStrings("\xE8\x00\x00\x00\x00", enc.code(), "call 0x0"); + + try enc.encode(.push, .{ .op1 = .{ .mem = Memory.sib(.qword, .{ + .base = .rbp, + .disp = 0, + }) } }); + try expectEqualHexStrings("\xFF\x75\x00", enc.code(), "push QWORD PTR [rbp]"); + + try enc.encode(.push, .{ .op1 = .{ .mem = Memory.sib(.word, .{ + .base = .rbp, + .disp = 0, + }) } }); + try expectEqualHexStrings("\x66\xFF\x75\x00", enc.code(), "push QWORD PTR [rbp]"); + + try enc.encode(.pop, .{ .op1 = .{ .mem = Memory.rip(.qword, 0) } }); + try expectEqualHexStrings("\x8F\x05\x00\x00\x00\x00", enc.code(), "pop QWORD PTR [rip]"); + + try enc.encode(.pop, .{ .op1 = .{ .mem = Memory.rip(.word, 0) } }); + try expectEqualHexStrings("\x66\x8F\x05\x00\x00\x00\x00", enc.code(), "pop WORD PTR [rbp]"); + + try enc.encode(.imul, .{ .op1 = .{ .reg = .rax } }); + try expectEqualHexStrings("\x48\xF7\xE8", enc.code(), "imul rax"); + + try enc.encode(.imul, .{ .op1 = .{ .reg = .r12 } }); + try expectEqualHexStrings("\x49\xF7\xEC", enc.code(), "imul r12"); +} + +test "lower O encoding" { + var enc = TestEncode{}; + + try enc.encode(.push, .{ .op1 = .{ .reg = .rax } }); + try expectEqualHexStrings("\x50", enc.code(), "push rax"); + + try enc.encode(.push, .{ .op1 = .{ .reg = .r12w } }); + try expectEqualHexStrings("\x66\x41\x54", enc.code(), "push r12w"); + + try enc.encode(.pop, .{ .op1 = .{ .reg = .r12 } }); + try expectEqualHexStrings("\x41\x5c", enc.code(), "pop r12"); +} + +test "lower OI encoding" { + var enc = TestEncode{}; + + try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x1000000000000000) } }); + try expectEqualHexStrings( + "\x48\xB8\x00\x00\x00\x00\x00\x00\x00\x10", + enc.code(), + "movabs rax, 0x1000000000000000", + ); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .r11 }, .op2 = .{ .imm = Immediate.u(0x1000000000000000) } }); + try expectEqualHexStrings( + "\x49\xBB\x00\x00\x00\x00\x00\x00\x00\x10", + enc.code(), + "movabs r11, 0x1000000000000000", + ); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .r11d }, .op2 = .{ .imm = Immediate.u(0x10000000) } }); + try expectEqualHexStrings("\x41\xBB\x00\x00\x00\x10", enc.code(), "mov r11d, 0x10000000"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .r11w }, .op2 = .{ .imm = Immediate.u(0x1000) } }); + try expectEqualHexStrings("\x66\x41\xBB\x00\x10", enc.code(), "mov r11w, 0x1000"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .r11b }, .op2 = .{ .imm = Immediate.u(0x10) } }); + try expectEqualHexStrings("\x41\xB3\x10", enc.code(), "mov r11b, 0x10"); +} + +test "lower FD/TD encoding" { + var enc = TestEncode{}; + + try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.moffs(.cs, 0x10) } }); + try expectEqualHexStrings("\x2E\x48\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs rax, cs:0x10"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.moffs(.fs, 0x10) } }); + try expectEqualHexStrings("\x64\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs eax, fs:0x10"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .ax }, .op2 = .{ .mem = Memory.moffs(.gs, 0x10) } }); + try expectEqualHexStrings("\x65\x66\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs ax, gs:0x10"); + + try enc.encode(.mov, .{ .op1 = .{ .reg = .al }, .op2 = .{ .mem = Memory.moffs(.ds, 0x10) } }); + try expectEqualHexStrings("\xA0\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs al, ds:0x10"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.moffs(.cs, 0x10) }, .op2 = .{ .reg = .rax } }); + try expectEqualHexStrings("\x2E\x48\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs cs:0x10, rax"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.moffs(.fs, 0x10) }, .op2 = .{ .reg = .eax } }); + try expectEqualHexStrings("\x64\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs fs:0x10, eax"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.moffs(.gs, 0x10) }, .op2 = .{ .reg = .ax } }); + try expectEqualHexStrings("\x65\x66\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs gs:0x10, ax"); + + try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.moffs(.ds, 0x10) }, .op2 = .{ .reg = .al } }); + try expectEqualHexStrings("\xA2\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs ds:0x10, al"); +} + +test "lower NP encoding" { + var enc = TestEncode{}; + + try enc.encode(.int3, .{}); + try expectEqualHexStrings("\xCC", enc.code(), "int3"); + + try enc.encode(.nop, .{}); + try expectEqualHexStrings("\x90", enc.code(), "nop"); + + try enc.encode(.ret, .{}); + try expectEqualHexStrings("\xC3", enc.code(), "ret"); + + try enc.encode(.syscall, .{}); + try expectEqualHexStrings("\x0f\x05", enc.code(), "syscall"); +} + +fn invalidInstruction(mnemonic: Instruction.Mnemonic, args: struct { + op1: Instruction.Operand = .none, + op2: Instruction.Operand = .none, + op3: Instruction.Operand = .none, + op4: Instruction.Operand = .none, +}) !void { + const err = Instruction.new(mnemonic, .{ + .op1 = args.op1, + .op2 = args.op2, + .op3 = args.op3, + .op4 = args.op4, + }); + try testing.expectError(error.InvalidInstruction, err); +} + +test "invalid instruction" { + try invalidInstruction(.call, .{ .op1 = .{ .reg = .eax } }); + try invalidInstruction(.call, .{ .op1 = .{ .reg = .ax } }); + try invalidInstruction(.call, .{ .op1 = .{ .reg = .al } }); + try invalidInstruction(.call, .{ .op1 = .{ .mem = Memory.rip(.dword, 0) } }); + try invalidInstruction(.call, .{ .op1 = .{ .mem = Memory.rip(.word, 0) } }); + try invalidInstruction(.call, .{ .op1 = .{ .mem = Memory.rip(.byte, 0) } }); + try invalidInstruction(.mov, .{ .op1 = .{ .mem = Memory.rip(.word, 0x10) }, .op2 = .{ .reg = .r12 } }); + try invalidInstruction(.lea, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .reg = .rbx } }); + try invalidInstruction(.lea, .{ .op1 = .{ .reg = .al }, .op2 = .{ .mem = Memory.rip(.byte, 0) } }); + try invalidInstruction(.pop, .{ .op1 = .{ .reg = .r12b } }); + try invalidInstruction(.pop, .{ .op1 = .{ .reg = .r12d } }); + try invalidInstruction(.push, .{ .op1 = .{ .reg = .r12b } }); + try invalidInstruction(.push, .{ .op1 = .{ .reg = .r12d } }); + try invalidInstruction(.push, .{ .op1 = .{ .imm = Immediate.u(0x1000000000000000) } }); +} + +fn cannotEncode(mnemonic: Instruction.Mnemonic, args: struct { + op1: Instruction.Operand = .none, + op2: Instruction.Operand = .none, + op3: Instruction.Operand = .none, + op4: Instruction.Operand = .none, +}) !void { + try testing.expectError(error.CannotEncode, Instruction.new(mnemonic, .{ + .op1 = args.op1, + .op2 = args.op2, + .op3 = args.op3, + .op4 = args.op4, + })); +} + +test "cannot encode" { + try cannotEncode(.@"test", .{ + .op1 = .{ .mem = Memory.sib(.byte, .{ .base = .r12, .disp = 0 }) }, + .op2 = .{ .reg = .ah }, + }); + try cannotEncode(.@"test", .{ + .op1 = .{ .reg = .r11b }, + .op2 = .{ .reg = .bh }, + }); + try cannotEncode(.mov, .{ + .op1 = .{ .reg = .sil }, + .op2 = .{ .reg = .ah }, + }); +} + +const Assembler = struct { + it: Tokenizer, + + const Tokenizer = struct { + input: []const u8, + pos: usize = 0, + + const Error = error{InvalidToken}; + + const Token = struct { + id: Id, + start: usize, + end: usize, + + const Id = enum { + eof, + + space, + new_line, + + colon, + comma, + open_br, + close_br, + plus, + minus, + star, + + string, + numeral, + }; + }; + + const Iterator = struct {}; + + fn next(it: *Tokenizer) !Token { + var result = Token{ + .id = .eof, + .start = it.pos, + .end = it.pos, + }; + + var state: enum { + start, + space, + new_line, + string, + numeral, + numeral_hex, + } = .start; + + while (it.pos < it.input.len) : (it.pos += 1) { + const ch = it.input[it.pos]; + switch (state) { + .start => switch (ch) { + ',' => { + result.id = .comma; + it.pos += 1; + break; + }, + ':' => { + result.id = .colon; + it.pos += 1; + break; + }, + '[' => { + result.id = .open_br; + it.pos += 1; + break; + }, + ']' => { + result.id = .close_br; + it.pos += 1; + break; + }, + '+' => { + result.id = .plus; + it.pos += 1; + break; + }, + '-' => { + result.id = .minus; + it.pos += 1; + break; + }, + '*' => { + result.id = .star; + it.pos += 1; + break; + }, + ' ', '\t' => state = .space, + '\n', '\r' => state = .new_line, + 'a'...'z', 'A'...'Z' => state = .string, + '0'...'9' => state = .numeral, + else => return error.InvalidToken, + }, + + .space => switch (ch) { + ' ', '\t' => {}, + else => { + result.id = .space; + break; + }, + }, + + .new_line => switch (ch) { + '\n', '\r', ' ', '\t' => {}, + else => { + result.id = .new_line; + break; + }, + }, + + .string => switch (ch) { + 'a'...'z', 'A'...'Z', '0'...'9' => {}, + else => { + result.id = .string; + break; + }, + }, + + .numeral => switch (ch) { + 'x' => state = .numeral_hex, + '0'...'9' => {}, + else => { + result.id = .numeral; + break; + }, + }, + + .numeral_hex => switch (ch) { + 'a'...'f' => {}, + '0'...'9' => {}, + else => { + result.id = .numeral; + break; + }, + }, + } + } + + if (it.pos >= it.input.len) { + switch (state) { + .string => result.id = .string, + .numeral, .numeral_hex => result.id = .numeral, + else => {}, + } + } + + result.end = it.pos; + return result; + } + + fn seekTo(it: *Tokenizer, pos: usize) void { + it.pos = pos; + } + }; + + pub fn init(input: []const u8) Assembler { + return .{ + .it = Tokenizer{ .input = input }, + }; + } + + pub fn assemble(as: *Assembler, writer: anytype) !void { + while (try as.next()) |parsed_inst| { + const inst = try Instruction.new(parsed_inst.mnemonic, .{ + .op1 = parsed_inst.ops[0], + .op2 = parsed_inst.ops[1], + .op3 = parsed_inst.ops[2], + .op4 = parsed_inst.ops[3], + }); + try inst.encode(writer); + } + } + + const ParseResult = struct { + mnemonic: Instruction.Mnemonic, + ops: [4]Instruction.Operand, + }; + + const ParseError = error{ + UnexpectedToken, + InvalidMnemonic, + InvalidOperand, + InvalidRegister, + InvalidPtrSize, + InvalidMemoryOperand, + InvalidScaleIndex, + } || Tokenizer.Error || std.fmt.ParseIntError; + + fn next(as: *Assembler) ParseError!?ParseResult { + try as.skip(2, .{ .space, .new_line }); + const mnemonic_tok = as.expect(.string) catch |err| switch (err) { + error.UnexpectedToken => return if (try as.peek() == .eof) null else err, + else => return err, + }; + const mnemonic = mnemonicFromString(as.source(mnemonic_tok)) orelse + return error.InvalidMnemonic; + try as.skip(1, .{.space}); + + const rules = .{ + .{}, + .{.register}, + .{.memory}, + .{.immediate}, + .{ .register, .register }, + .{ .register, .memory }, + .{ .memory, .register }, + .{ .register, .immediate }, + .{ .memory, .immediate }, + .{ .register, .register, .immediate }, + .{ .register, .memory, .immediate }, + }; + + const pos = as.it.pos; + inline for (rules) |rule| { + var ops = [4]Instruction.Operand{ .none, .none, .none, .none }; + if (as.parseOperandRule(rule, &ops)) { + return .{ + .mnemonic = mnemonic, + .ops = ops, + }; + } else |_| { + as.it.seekTo(pos); + } + } + + return error.InvalidOperand; + } + + fn source(as: *Assembler, token: Tokenizer.Token) []const u8 { + return as.it.input[token.start..token.end]; + } + + fn peek(as: *Assembler) Tokenizer.Error!Tokenizer.Token.Id { + const pos = as.it.pos; + const next_tok = try as.it.next(); + const id = next_tok.id; + as.it.seekTo(pos); + return id; + } + + fn expect(as: *Assembler, id: Tokenizer.Token.Id) ParseError!Tokenizer.Token { + const next_tok_id = try as.peek(); + if (next_tok_id == id) return as.it.next(); + return error.UnexpectedToken; + } + + fn skip(as: *Assembler, comptime num: comptime_int, tok_ids: [num]Tokenizer.Token.Id) Tokenizer.Error!void { + outer: while (true) { + const pos = as.it.pos; + const next_tok = try as.it.next(); + inline for (tok_ids) |tok_id| { + if (next_tok.id == tok_id) continue :outer; + } + as.it.seekTo(pos); + break; + } + } + + fn mnemonicFromString(bytes: []const u8) ?Instruction.Mnemonic { + const ti = @typeInfo(Instruction.Mnemonic).Enum; + inline for (ti.fields) |field| { + if (std.mem.eql(u8, bytes, field.name)) { + return @field(Instruction.Mnemonic, field.name); + } + } + return null; + } + + fn parseOperandRule(as: *Assembler, rule: anytype, ops: *[4]Instruction.Operand) ParseError!void { + inline for (rule, 0..) |cond, i| { + comptime assert(i < 4); + if (i > 0) { + _ = try as.expect(.comma); + try as.skip(1, .{.space}); + } + if (@typeInfo(@TypeOf(cond)) != .EnumLiteral) { + @compileError("invalid condition in the rule: " ++ @typeName(@TypeOf(cond))); + } + switch (cond) { + .register => { + const reg_tok = try as.expect(.string); + const reg = registerFromString(as.source(reg_tok)) orelse + return error.InvalidOperand; + ops[i] = .{ .reg = reg }; + }, + .memory => { + const mem = try as.parseMemory(); + ops[i] = .{ .mem = mem }; + }, + .immediate => { + const is_neg = if (as.expect(.minus)) |_| true else |_| false; + const imm_tok = try as.expect(.numeral); + const imm: Immediate = if (is_neg) blk: { + const imm = try std.fmt.parseInt(i32, as.source(imm_tok), 0); + break :blk .{ .signed = imm * -1 }; + } else .{ .unsigned = try std.fmt.parseInt(u64, as.source(imm_tok), 0) }; + ops[i] = .{ .imm = imm }; + }, + else => @compileError("unhandled enum literal " ++ @tagName(cond)), + } + try as.skip(1, .{.space}); + } + + try as.skip(1, .{.space}); + const tok = try as.it.next(); + switch (tok.id) { + .new_line, .eof => {}, + else => return error.InvalidOperand, + } + } + + fn registerFromString(bytes: []const u8) ?Register { + const ti = @typeInfo(Register).Enum; + inline for (ti.fields) |field| { + if (std.mem.eql(u8, bytes, field.name)) { + return @field(Register, field.name); + } + } + return null; + } + + fn parseMemory(as: *Assembler) ParseError!Memory { + const ptr_size: ?Memory.PtrSize = blk: { + const pos = as.it.pos; + const ptr_size = as.parsePtrSize() catch |err| switch (err) { + error.UnexpectedToken => { + as.it.seekTo(pos); + break :blk null; + }, + else => return err, + }; + break :blk ptr_size; + }; + + try as.skip(1, .{.space}); + + // Supported rules and orderings. + const rules = .{ + .{ .open_br, .base, .close_br }, // [ base ] + .{ .open_br, .base, .plus, .disp, .close_br }, // [ base + disp ] + .{ .open_br, .base, .minus, .disp, .close_br }, // [ base - disp ] + .{ .open_br, .disp, .plus, .base, .close_br }, // [ disp + base ] + .{ .open_br, .base, .plus, .index, .close_br }, // [ base + index ] + .{ .open_br, .base, .plus, .index, .star, .scale, .close_br }, // [ base + index * scale ] + .{ .open_br, .index, .star, .scale, .plus, .base, .close_br }, // [ index * scale + base ] + .{ .open_br, .base, .plus, .index, .star, .scale, .plus, .disp, .close_br }, // [ base + index * scale + disp ] + .{ .open_br, .base, .plus, .index, .star, .scale, .minus, .disp, .close_br }, // [ base + index * scale - disp ] + .{ .open_br, .index, .star, .scale, .plus, .base, .plus, .disp, .close_br }, // [ index * scale + base + disp ] + .{ .open_br, .index, .star, .scale, .plus, .base, .minus, .disp, .close_br }, // [ index * scale + base - disp ] + .{ .open_br, .disp, .plus, .index, .star, .scale, .plus, .base, .close_br }, // [ disp + index * scale + base ] + .{ .open_br, .disp, .plus, .base, .plus, .index, .star, .scale, .close_br }, // [ disp + base + index * scale ] + .{ .open_br, .base, .plus, .disp, .plus, .index, .star, .scale, .close_br }, // [ base + disp + index * scale ] + .{ .open_br, .base, .minus, .disp, .plus, .index, .star, .scale, .close_br }, // [ base - disp + index * scale ] + .{ .open_br, .base, .plus, .disp, .plus, .scale, .star, .index, .close_br }, // [ base + disp + scale * index ] + .{ .open_br, .base, .minus, .disp, .plus, .scale, .star, .index, .close_br }, // [ base - disp + scale * index ] + .{ .open_br, .rip, .plus, .disp, .close_br }, // [ rip + disp ] + .{ .open_br, .rip, .minus, .disp, .close_br }, // [ rig - disp ] + .{ .base, .colon, .disp }, // seg:disp + }; + + const pos = as.it.pos; + inline for (rules) |rule| { + if (as.parseMemoryRule(rule)) |res| { + if (res.rip) { + if (res.base != null or res.scale_index != null or res.offset != null) + return error.InvalidMemoryOperand; + return Memory.rip(ptr_size orelse .qword, res.disp orelse 0); + } + if (res.base) |base| { + if (res.rip) + return error.InvalidMemoryOperand; + if (res.offset) |offset| { + if (res.scale_index != null or res.disp != null) + return error.InvalidMemoryOperand; + return Memory.moffs(base, offset); + } + return Memory.sib(ptr_size orelse .qword, .{ + .base = base, + .scale_index = res.scale_index, + .disp = res.disp orelse 0, + }); + } + return error.InvalidMemoryOperand; + } else |_| { + as.it.seekTo(pos); + } + } + + return error.InvalidOperand; + } + + const MemoryParseResult = struct { + rip: bool = false, + base: ?Register = null, + scale_index: ?Memory.ScaleIndex = null, + disp: ?i32 = null, + offset: ?u64 = null, + }; + + fn parseMemoryRule(as: *Assembler, rule: anytype) ParseError!MemoryParseResult { + var res: MemoryParseResult = .{}; + inline for (rule, 0..) |cond, i| { + if (@typeInfo(@TypeOf(cond)) != .EnumLiteral) { + @compileError("unsupported condition type in the rule: " ++ @typeName(@TypeOf(cond))); + } + switch (cond) { + .open_br, .close_br, .plus, .minus, .star, .colon => { + _ = try as.expect(cond); + }, + .base => { + const tok = try as.expect(.string); + res.base = registerFromString(as.source(tok)) orelse return error.InvalidMemoryOperand; + }, + .rip => { + const tok = try as.expect(.string); + if (!std.mem.eql(u8, as.source(tok), "rip")) return error.InvalidMemoryOperand; + res.rip = true; + }, + .index => { + const tok = try as.expect(.string); + const index = registerFromString(as.source(tok)) orelse + return error.InvalidMemoryOperand; + if (res.scale_index) |*si| { + si.index = index; + } else { + res.scale_index = .{ .scale = 1, .index = index }; + } + }, + .scale => { + const tok = try as.expect(.numeral); + const scale = try std.fmt.parseInt(u2, as.source(tok), 0); + if (res.scale_index) |*si| { + si.scale = scale; + } else { + res.scale_index = .{ .scale = scale, .index = undefined }; + } + }, + .disp => { + const tok = try as.expect(.numeral); + const is_neg = blk: { + if (i > 0) { + if (rule[i - 1] == .minus) break :blk true; + } + break :blk false; + }; + if (std.fmt.parseInt(i32, as.source(tok), 0)) |disp| { + res.disp = if (is_neg) -1 * disp else disp; + } else |err| switch (err) { + error.Overflow => { + if (is_neg) return err; + if (res.base) |base| { + if (base.class() != .segment) return err; + } + const offset = try std.fmt.parseInt(u64, as.source(tok), 0); + res.offset = offset; + }, + else => return err, + } + }, + else => @compileError("unhandled operand output type: " ++ @tagName(cond)), + } + try as.skip(1, .{.space}); + } + return res; + } + + fn parsePtrSize(as: *Assembler) ParseError!Memory.PtrSize { + const size = try as.expect(.string); + try as.skip(1, .{.space}); + const ptr = try as.expect(.string); + + const size_raw = as.source(size); + const ptr_raw = as.source(ptr); + const len = size_raw.len + ptr_raw.len + 1; + var buf: ["qword ptr".len]u8 = undefined; + if (len > buf.len) return error.InvalidPtrSize; + + for (size_raw, 0..) |c, i| { + buf[i] = std.ascii.toLower(c); + } + buf[size_raw.len] = ' '; + for (ptr_raw, 0..) |c, i| { + buf[size_raw.len + i + 1] = std.ascii.toLower(c); + } + + const slice = buf[0..len]; + if (std.mem.eql(u8, slice, "qword ptr")) return .qword; + if (std.mem.eql(u8, slice, "dword ptr")) return .dword; + if (std.mem.eql(u8, slice, "word ptr")) return .word; + if (std.mem.eql(u8, slice, "byte ptr")) return .byte; + if (std.mem.eql(u8, slice, "tbyte ptr")) return .tbyte; + return error.InvalidPtrSize; + } +}; + +test "assemble" { + const input = + \\int3 + \\mov rax, rbx + \\mov qword ptr [rbp], rax + \\mov qword ptr [rbp - 16], rax + \\mov qword ptr [16 + rbp], rax + \\mov rax, 0x10 + \\mov byte ptr [rbp - 0x10], 0x10 + \\mov word ptr [rbp + r12], r11w + \\mov word ptr [rbp + r12 * 2], r11w + \\mov word ptr [rbp + r12 * 2 - 16], r11w + \\mov dword ptr [rip - 16], r12d + \\mov rax, fs:0x0 + \\mov rax, gs:0x1000000000000000 + \\movzx r12, al + \\imul r12, qword ptr [rbp - 16], 6 + \\jmp 0x0 + \\jc 0x0 + \\jb 0x0 + \\sal rax, 1 + \\sal rax, 63 + \\shl rax, 63 + \\sar rax, 63 + \\shr rax, 63 + \\test byte ptr [rbp - 16], r12b + \\sal r12, cl + \\mul qword ptr [rip - 16] + \\div r12 + \\idiv byte ptr [rbp - 16] + \\cwde + \\cbw + \\cdqe + \\test byte ptr [rbp], ah + \\test byte ptr [r12], spl + \\cdq + \\cwd + \\cqo + \\test bl, 0x1 + \\mov rbx,0x8000000000000000 + \\movss xmm0, dword ptr [rbp] + \\movss xmm0, xmm1 + \\movss dword ptr [rbp - 16 + rax * 2], xmm7 + \\movss dword ptr [rbp - 16 + rax * 2], xmm8 + \\movss xmm15, xmm9 + \\movsd xmm8, qword ptr [rbp - 16] + \\movsd qword ptr [rbp - 8], xmm0 + \\movq xmm8, qword ptr [rbp - 16] + \\movq qword ptr [rbp - 16], xmm8 + \\ucomisd xmm0, qword ptr [rbp - 16] + \\fisttp qword ptr [rbp - 16] + \\fisttp word ptr [rip + 32] + \\fisttp dword ptr [rax] + \\fld tbyte ptr [rbp] + \\fld dword ptr [rbp] + \\xor bl, 0xff + \\ud2 + \\add rsp, -1 + \\add rsp, 0xff + \\mov sil, byte ptr [rax + rcx * 1] + \\ + ; + + // zig fmt: off + const expected = &[_]u8{ + 0xCC, + 0x48, 0x89, 0xD8, + 0x48, 0x89, 0x45, 0x00, + 0x48, 0x89, 0x45, 0xF0, + 0x48, 0x89, 0x45, 0x10, + 0x48, 0xC7, 0xC0, 0x10, 0x00, 0x00, 0x00, + 0xC6, 0x45, 0xF0, 0x10, + 0x66, 0x46, 0x89, 0x5C, 0x25, 0x00, + 0x66, 0x46, 0x89, 0x5C, 0x65, 0x00, + 0x66, 0x46, 0x89, 0x5C, 0x65, 0xF0, + 0x44, 0x89, 0x25, 0xF0, 0xFF, 0xFF, 0xFF, + 0x64, 0x48, 0x8B, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, + 0x65, 0x48, 0xA1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, + 0x4C, 0x0F, 0xB6, 0xE0, + 0x4C, 0x6B, 0x65, 0xF0, 0x06, + 0xE9, 0x00, 0x00, 0x00, 0x00, + 0x0F, 0x82, 0x00, 0x00, 0x00, 0x00, + 0x0F, 0x82, 0x00, 0x00, 0x00, 0x00, + 0x48, 0xD1, 0xE0, + 0x48, 0xC1, 0xE0, 0x3F, + 0x48, 0xC1, 0xE0, 0x3F, + 0x48, 0xC1, 0xF8, 0x3F, + 0x48, 0xC1, 0xE8, 0x3F, + 0x44, 0x84, 0x65, 0xF0, + 0x49, 0xD3, 0xE4, + 0x48, 0xF7, 0x25, 0xF0, 0xFF, 0xFF, 0xFF, + 0x49, 0xF7, 0xF4, + 0xF6, 0x7D, 0xF0, + 0x98, + 0x66, 0x98, + 0x48, 0x98, + 0x84, 0x65, 0x00, + 0x41, 0x84, 0x24, 0x24, + 0x99, + 0x66, 0x99, + 0x48, 0x99, + 0xF6, 0xC3, 0x01, + 0x48, 0xBB, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, + 0xF3, 0x0F, 0x10, 0x45, 0x00, + 0xF3, 0x0F, 0x10, 0xC1, + 0xF3, 0x0F, 0x11, 0x7C, 0x45, 0xF0, + 0xF3, 0x44, 0x0F, 0x11, 0x44, 0x45, 0xF0, + 0xF3, 0x45, 0x0F, 0x10, 0xF9, + 0xF2, 0x44, 0x0F, 0x10, 0x45, 0xF0, + 0xF2, 0x0F, 0x11, 0x45, 0xF8, + 0xF3, 0x44, 0x0F, 0x7E, 0x45, 0xF0, + 0x66, 0x44, 0x0F, 0xD6, 0x45, 0xF0, + 0x66, 0x0F, 0x2E, 0x45, 0xF0, + 0xDD, 0x4D, 0xF0, + 0xDF, 0x0D, 0x20, 0x00, 0x00, 0x00, + 0xDB, 0x08, + 0xDB, 0x6D, 0x00, + 0xD9, 0x45, 0x00, + 0x80, 0xF3, 0xFF, + 0x0F, 0x0B, + 0x48, 0x83, 0xC4, 0xFF, + 0x48, 0x81, 0xC4, 0xFF, 0x00, 0x00, 0x00, + 0x40, 0x8A, 0x34, 0x08, + }; + // zig fmt: on + + var as = Assembler.init(input); + var output = std.ArrayList(u8).init(testing.allocator); + defer output.deinit(); + try as.assemble(output.writer()); + try expectEqualHexStrings(expected, output.items, input); +} + +test "assemble - Jcc" { + const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{ + .{ .ja, 0x87 }, + .{ .jae, 0x83 }, + .{ .jb, 0x82 }, + .{ .jbe, 0x86 }, + .{ .jc, 0x82 }, + .{ .je, 0x84 }, + .{ .jg, 0x8f }, + .{ .jge, 0x8d }, + .{ .jl, 0x8c }, + .{ .jle, 0x8e }, + .{ .jna, 0x86 }, + .{ .jnae, 0x82 }, + .{ .jnb, 0x83 }, + .{ .jnbe, 0x87 }, + .{ .jnc, 0x83 }, + .{ .jne, 0x85 }, + .{ .jng, 0x8e }, + .{ .jnge, 0x8c }, + .{ .jnl, 0x8d }, + .{ .jnle, 0x8f }, + .{ .jno, 0x81 }, + .{ .jnp, 0x8b }, + .{ .jns, 0x89 }, + .{ .jnz, 0x85 }, + .{ .jo, 0x80 }, + .{ .jp, 0x8a }, + .{ .jpe, 0x8a }, + .{ .jpo, 0x8b }, + .{ .js, 0x88 }, + .{ .jz, 0x84 }, + }; + + inline for (&mnemonics) |mnemonic| { + const input = @tagName(mnemonic[0]) ++ " 0x0"; + const expected = [_]u8{ 0x0f, mnemonic[1], 0x0, 0x0, 0x0, 0x0 }; + var as = Assembler.init(input); + var output = std.ArrayList(u8).init(testing.allocator); + defer output.deinit(); + try as.assemble(output.writer()); + try expectEqualHexStrings(&expected, output.items, input); + } +} + +test "assemble - SETcc" { + const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{ + .{ .seta, 0x97 }, + .{ .setae, 0x93 }, + .{ .setb, 0x92 }, + .{ .setbe, 0x96 }, + .{ .setc, 0x92 }, + .{ .sete, 0x94 }, + .{ .setg, 0x9f }, + .{ .setge, 0x9d }, + .{ .setl, 0x9c }, + .{ .setle, 0x9e }, + .{ .setna, 0x96 }, + .{ .setnae, 0x92 }, + .{ .setnb, 0x93 }, + .{ .setnbe, 0x97 }, + .{ .setnc, 0x93 }, + .{ .setne, 0x95 }, + .{ .setng, 0x9e }, + .{ .setnge, 0x9c }, + .{ .setnl, 0x9d }, + .{ .setnle, 0x9f }, + .{ .setno, 0x91 }, + .{ .setnp, 0x9b }, + .{ .setns, 0x99 }, + .{ .setnz, 0x95 }, + .{ .seto, 0x90 }, + .{ .setp, 0x9a }, + .{ .setpe, 0x9a }, + .{ .setpo, 0x9b }, + .{ .sets, 0x98 }, + .{ .setz, 0x94 }, + }; + + inline for (&mnemonics) |mnemonic| { + const input = @tagName(mnemonic[0]) ++ " al"; + const expected = [_]u8{ 0x0f, mnemonic[1], 0xC0 }; + var as = Assembler.init(input); + var output = std.ArrayList(u8).init(testing.allocator); + defer output.deinit(); + try as.assemble(output.writer()); + try expectEqualHexStrings(&expected, output.items, input); + } +} + +test "assemble - CMOVcc" { + const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{ + .{ .cmova, 0x47 }, + .{ .cmovae, 0x43 }, + .{ .cmovb, 0x42 }, + .{ .cmovbe, 0x46 }, + .{ .cmovc, 0x42 }, + .{ .cmove, 0x44 }, + .{ .cmovg, 0x4f }, + .{ .cmovge, 0x4d }, + .{ .cmovl, 0x4c }, + .{ .cmovle, 0x4e }, + .{ .cmovna, 0x46 }, + .{ .cmovnae, 0x42 }, + .{ .cmovnb, 0x43 }, + .{ .cmovnbe, 0x47 }, + .{ .cmovnc, 0x43 }, + .{ .cmovne, 0x45 }, + .{ .cmovng, 0x4e }, + .{ .cmovnge, 0x4c }, + .{ .cmovnl, 0x4d }, + .{ .cmovnle, 0x4f }, + .{ .cmovno, 0x41 }, + .{ .cmovnp, 0x4b }, + .{ .cmovns, 0x49 }, + .{ .cmovnz, 0x45 }, + .{ .cmovo, 0x40 }, + .{ .cmovp, 0x4a }, + .{ .cmovpe, 0x4a }, + .{ .cmovpo, 0x4b }, + .{ .cmovs, 0x48 }, + .{ .cmovz, 0x44 }, + }; + + inline for (&mnemonics) |mnemonic| { + const input = @tagName(mnemonic[0]) ++ " rax, rbx"; + const expected = [_]u8{ 0x48, 0x0f, mnemonic[1], 0xC3 }; + var as = Assembler.init(input); + var output = std.ArrayList(u8).init(testing.allocator); + defer output.deinit(); + try as.assemble(output.writer()); + try expectEqualHexStrings(&expected, output.items, input); + } +} |
