aboutsummaryrefslogtreecommitdiff
path: root/src/arch/x86_64/encoder.zig
diff options
context:
space:
mode:
authorJakub Konka <kubkon@jakubkonka.com>2023-03-12 08:41:44 +0100
committerJakub Konka <kubkon@jakubkonka.com>2023-03-12 08:41:44 +0100
commit707a74655be9fd702cb1be84baa719b29435fcf7 (patch)
tree5a01bc7f9c6e93e8b505ecadd463e316c7045e59 /src/arch/x86_64/encoder.zig
parent433558a92f005d3ad68528c62ffd6006c48b80bd (diff)
downloadzig-707a74655be9fd702cb1be84baa719b29435fcf7.tar.gz
zig-707a74655be9fd702cb1be84baa719b29435fcf7.zip
x86_64: downstream encoder/assembler tests
Diffstat (limited to 'src/arch/x86_64/encoder.zig')
-rw-r--r--src/arch/x86_64/encoder.zig1489
1 files changed, 1489 insertions, 0 deletions
diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig
index 9206b621bc..7e29f95069 100644
--- a/src/arch/x86_64/encoder.zig
+++ b/src/arch/x86_64/encoder.zig
@@ -2,6 +2,7 @@ const std = @import("std");
const assert = std.debug.assert;
const log = std.log.scoped(.x86_64_encoder);
const math = std.math;
+const testing = std.testing;
const bits = @import("bits.zig");
const Encoding = @import("Encoding.zig");
@@ -784,3 +785,1491 @@ pub const Rex = struct {
return rex.w or rex.r or rex.x or rex.b;
}
};
+
+// Tests
+fn expectEqualHexStrings(expected: []const u8, given: []const u8, assembly: []const u8) !void {
+ assert(expected.len > 0);
+ if (std.mem.eql(u8, expected, given)) return;
+ const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(expected)});
+ defer testing.allocator.free(expected_fmt);
+ const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(given)});
+ defer testing.allocator.free(given_fmt);
+ const idx = std.mem.indexOfDiff(u8, expected_fmt, given_fmt).?;
+ var padding = try testing.allocator.alloc(u8, idx + 5);
+ defer testing.allocator.free(padding);
+ std.mem.set(u8, padding, ' ');
+ std.debug.print("\nASM: {s}\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{
+ assembly,
+ expected_fmt,
+ given_fmt,
+ padding,
+ });
+ return error.TestFailed;
+}
+
+const TestEncode = struct {
+ buffer: [32]u8 = undefined,
+ index: usize = 0,
+
+ fn encode(enc: *TestEncode, mnemonic: Instruction.Mnemonic, args: struct {
+ op1: Instruction.Operand = .none,
+ op2: Instruction.Operand = .none,
+ op3: Instruction.Operand = .none,
+ op4: Instruction.Operand = .none,
+ }) !void {
+ var stream = std.io.fixedBufferStream(&enc.buffer);
+ var count_writer = std.io.countingWriter(stream.writer());
+ const inst = try Instruction.new(mnemonic, .{
+ .op1 = args.op1,
+ .op2 = args.op2,
+ .op3 = args.op3,
+ .op4 = args.op4,
+ });
+ try inst.encode(count_writer.writer());
+ enc.index = count_writer.bytes_written;
+ }
+
+ fn code(enc: TestEncode) []const u8 {
+ return enc.buffer[0..enc.index];
+ }
+};
+
+test "encode" {
+ var buf = std.ArrayList(u8).init(testing.allocator);
+ defer buf.deinit();
+
+ const inst = try Instruction.new(.mov, .{
+ .op1 = .{ .reg = .rbx },
+ .op2 = .{ .imm = Immediate.u(4) },
+ });
+ try inst.encode(buf.writer());
+ try testing.expectEqualSlices(u8, &.{ 0x48, 0xc7, 0xc3, 0x4, 0x0, 0x0, 0x0 }, buf.items);
+}
+
+test "lower I encoding" {
+ var enc = TestEncode{};
+
+ try enc.encode(.push, .{ .op1 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x6A\x10", enc.code(), "push 0x10");
+
+ try enc.encode(.push, .{ .op1 = .{ .imm = Immediate.u(0x1000) } });
+ try expectEqualHexStrings("\x66\x68\x00\x10", enc.code(), "push 0x1000");
+
+ try enc.encode(.push, .{ .op1 = .{ .imm = Immediate.u(0x10000000) } });
+ try expectEqualHexStrings("\x68\x00\x00\x00\x10", enc.code(), "push 0x10000000");
+
+ try enc.encode(.adc, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x10000000) } });
+ try expectEqualHexStrings("\x48\x15\x00\x00\x00\x10", enc.code(), "adc rax, 0x10000000");
+
+ try enc.encode(.add, .{ .op1 = .{ .reg = .al }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x04\x10", enc.code(), "add al, 0x10");
+
+ try enc.encode(.add, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x48\x83\xC0\x10", enc.code(), "add rax, 0x10");
+
+ try enc.encode(.sbb, .{ .op1 = .{ .reg = .ax }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x66\x1D\x10\x00", enc.code(), "sbb ax, 0x10");
+
+ try enc.encode(.xor, .{ .op1 = .{ .reg = .al }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x34\x10", enc.code(), "xor al, 0x10");
+}
+
+test "lower MI encoding" {
+ var enc = TestEncode{};
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .r12 }, .op2 = .{ .imm = Immediate.u(0x1000) } });
+ try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.byte, .{
+ .base = .r12,
+ .disp = 0,
+ }) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x41\xC6\x04\x24\x10", enc.code(), "mov BYTE PTR [r12], 0x10");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .r12 }, .op2 = .{ .imm = Immediate.u(0x1000) } });
+ try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .r12 }, .op2 = .{ .imm = Immediate.u(0x1000) } });
+ try expectEqualHexStrings("\x49\xC7\xC4\x00\x10\x00\x00", enc.code(), "mov r12, 0x1000");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x48\xc7\xc0\x10\x00\x00\x00", enc.code(), "mov rax, 0x10");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.dword, .{
+ .base = .r11,
+ .disp = 0,
+ }) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x41\xc7\x03\x10\x00\x00\x00", enc.code(), "mov DWORD PTR [r11], 0x10");
+
+ try enc.encode(.mov, .{
+ .op1 = .{ .mem = Memory.rip(.qword, 0x10) },
+ .op2 = .{ .imm = Immediate.u(0x10) },
+ });
+ try expectEqualHexStrings(
+ "\x48\xC7\x05\x10\x00\x00\x00\x10\x00\x00\x00",
+ enc.code(),
+ "mov QWORD PTR [rip + 0x10], 0x10",
+ );
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = .rbp,
+ .disp = -8,
+ }) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x48\xc7\x45\xf8\x10\x00\x00\x00", enc.code(), "mov QWORD PTR [rbp - 8], 0x10");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.word, .{
+ .base = .rbp,
+ .disp = -2,
+ }) }, .op2 = .{ .imm = Immediate.s(-16) } });
+ try expectEqualHexStrings("\x66\xC7\x45\xFE\xF0\xFF", enc.code(), "mov WORD PTR [rbp - 2], -16");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.byte, .{
+ .base = .rbp,
+ .disp = -1,
+ }) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\xC6\x45\xFF\x10", enc.code(), "mov BYTE PTR [rbp - 1], 0x10");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = .ds,
+ .disp = 0x10000000,
+ .scale_index = .{
+ .scale = 2,
+ .index = .rcx,
+ },
+ }) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings(
+ "\x48\xC7\x04\x4D\x00\x00\x00\x10\x10\x00\x00\x00",
+ enc.code(),
+ "mov QWORD PTR [rcx*2 + 0x10000000], 0x10",
+ );
+
+ try enc.encode(.adc, .{ .op1 = .{ .mem = Memory.sib(.byte, .{
+ .base = .rbp,
+ .disp = -0x10,
+ }) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x80\x55\xF0\x10", enc.code(), "adc BYTE PTR [rbp - 0x10], 0x10");
+
+ try enc.encode(.adc, .{ .op1 = .{ .mem = Memory.rip(.qword, 0) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x48\x83\x15\x00\x00\x00\x00\x10", enc.code(), "adc QWORD PTR [rip], 0x10");
+
+ try enc.encode(.adc, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x48\x83\xD0\x10", enc.code(), "adc rax, 0x10");
+
+ try enc.encode(.add, .{ .op1 = .{ .mem = Memory.sib(.dword, .{
+ .base = .rdx,
+ .disp = -8,
+ }) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x83\x42\xF8\x10", enc.code(), "add DWORD PTR [rdx - 8], 0x10");
+
+ try enc.encode(.add, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x48\x83\xC0\x10", enc.code(), "add rax, 0x10");
+
+ try enc.encode(.add, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = .rbp,
+ .disp = -0x10,
+ }) }, .op2 = .{ .imm = Immediate.s(-0x10) } });
+ try expectEqualHexStrings("\x48\x83\x45\xF0\xF0", enc.code(), "add QWORD PTR [rbp - 0x10], -0x10");
+
+ try enc.encode(.@"and", .{ .op1 = .{ .mem = Memory.sib(.dword, .{
+ .base = .ds,
+ .disp = 0x10000000,
+ }) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings(
+ "\x83\x24\x25\x00\x00\x00\x10\x10",
+ enc.code(),
+ "and DWORD PTR ds:0x10000000, 0x10",
+ );
+
+ try enc.encode(.@"and", .{ .op1 = .{ .mem = Memory.sib(.dword, .{
+ .base = .es,
+ .disp = 0x10000000,
+ }) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings(
+ "\x26\x83\x24\x25\x00\x00\x00\x10\x10",
+ enc.code(),
+ "and DWORD PTR es:0x10000000, 0x10",
+ );
+
+ try enc.encode(.@"and", .{ .op1 = .{ .mem = Memory.sib(.dword, .{
+ .base = .r12,
+ .disp = 0x10000000,
+ }) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings(
+ "\x41\x83\xA4\x24\x00\x00\x00\x10\x10",
+ enc.code(),
+ "and DWORD PTR [r12 + 0x10000000], 0x10",
+ );
+
+ try enc.encode(.sub, .{ .op1 = .{ .mem = Memory.sib(.dword, .{
+ .base = .r11,
+ .disp = 0x10000000,
+ }) }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings(
+ "\x41\x83\xAB\x00\x00\x00\x10\x10",
+ enc.code(),
+ "sub DWORD PTR [r11 + 0x10000000], 0x10",
+ );
+}
+
+test "lower RM encoding" {
+ var enc = TestEncode{};
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.sib(.qword, .{
+ .base = .r11,
+ .disp = 0,
+ }) } });
+ try expectEqualHexStrings("\x49\x8b\x03", enc.code(), "mov rax, QWORD PTR [r11]");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .rbx }, .op2 = .{ .mem = Memory.sib(.qword, .{
+ .base = .ds,
+ .disp = 0x10,
+ }) } });
+ try expectEqualHexStrings("\x48\x8B\x1C\x25\x10\x00\x00\x00", enc.code(), "mov rbx, QWORD PTR ds:0x10");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.sib(.qword, .{
+ .base = .rbp,
+ .disp = -4,
+ }) } });
+ try expectEqualHexStrings("\x48\x8B\x45\xFC", enc.code(), "mov rax, QWORD PTR [rbp - 4]");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.sib(.qword, .{
+ .base = .rbp,
+ .scale_index = .{
+ .scale = 1,
+ .index = .rcx,
+ },
+ .disp = -8,
+ }) } });
+ try expectEqualHexStrings("\x48\x8B\x44\x0D\xF8", enc.code(), "mov rax, QWORD PTR [rbp + rcx*1 - 8]");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.sib(.dword, .{
+ .base = .rbp,
+ .scale_index = .{
+ .scale = 4,
+ .index = .rdx,
+ },
+ .disp = -4,
+ }) } });
+ try expectEqualHexStrings("\x8B\x44\x95\xFC", enc.code(), "mov eax, dword ptr [rbp + rdx*4 - 4]");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.sib(.qword, .{
+ .base = .rbp,
+ .scale_index = .{
+ .scale = 8,
+ .index = .rcx,
+ },
+ .disp = -8,
+ }) } });
+ try expectEqualHexStrings("\x48\x8B\x44\xCD\xF8", enc.code(), "mov rax, QWORD PTR [rbp + rcx*8 - 8]");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .r8b }, .op2 = .{ .mem = Memory.sib(.byte, .{
+ .base = .rsi,
+ .scale_index = .{
+ .scale = 1,
+ .index = .rcx,
+ },
+ .disp = -24,
+ }) } });
+ try expectEqualHexStrings("\x44\x8A\x44\x0E\xE8", enc.code(), "mov r8b, BYTE PTR [rsi + rcx*1 - 24]");
+
+ // TODO this mnemonic needs cleanup as some prefixes are obsolete.
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .reg = .cs } });
+ try expectEqualHexStrings("\x48\x8C\xC8", enc.code(), "mov rax, cs");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = .rbp,
+ .disp = -16,
+ }) }, .op2 = .{ .reg = .fs } });
+ try expectEqualHexStrings("\x48\x8C\x65\xF0", enc.code(), "mov QWORD PTR [rbp - 16], fs");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .r12w }, .op2 = .{ .reg = .cs } });
+ try expectEqualHexStrings("\x66\x41\x8C\xCC", enc.code(), "mov r12w, cs");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.word, .{
+ .base = .rbp,
+ .disp = -16,
+ }) }, .op2 = .{ .reg = .fs } });
+ try expectEqualHexStrings("\x66\x8C\x65\xF0", enc.code(), "mov WORD PTR [rbp - 16], fs");
+
+ try enc.encode(.movsx, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .reg = .bx } });
+ try expectEqualHexStrings("\x0F\xBF\xC3", enc.code(), "movsx eax, bx");
+
+ try enc.encode(.movsx, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .reg = .bl } });
+ try expectEqualHexStrings("\x0F\xBE\xC3", enc.code(), "movsx eax, bl");
+
+ try enc.encode(.movsx, .{ .op1 = .{ .reg = .ax }, .op2 = .{ .reg = .bl } });
+ try expectEqualHexStrings("\x66\x0F\xBE\xC3", enc.code(), "movsx ax, bl");
+
+ try enc.encode(.movsx, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.sib(.word, .{
+ .base = .rbp,
+ .disp = 0,
+ }) } });
+ try expectEqualHexStrings("\x0F\xBF\x45\x00", enc.code(), "movsx eax, BYTE PTR [rbp]");
+
+ try enc.encode(.movsx, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.sib(.byte, .{
+ .base = null,
+ .scale_index = .{
+ .index = .rax,
+ .scale = 2,
+ },
+ .disp = 0,
+ }) } });
+ try expectEqualHexStrings("\x0F\xBE\x04\x45\x00\x00\x00\x00", enc.code(), "movsx eax, BYTE PTR [rax * 2]");
+
+ try enc.encode(.movsx, .{ .op1 = .{ .reg = .ax }, .op2 = .{ .mem = Memory.rip(.byte, 0x10) } });
+ try expectEqualHexStrings("\x66\x0F\xBE\x05\x10\x00\x00\x00", enc.code(), "movsx ax, BYTE PTR [rip + 0x10]");
+
+ try enc.encode(.movsx, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .reg = .bx } });
+ try expectEqualHexStrings("\x48\x0F\xBF\xC3", enc.code(), "movsx rax, bx");
+
+ try enc.encode(.movsxd, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .reg = .ebx } });
+ try expectEqualHexStrings("\x48\x63\xC3", enc.code(), "movsxd rax, ebx");
+
+ try enc.encode(.lea, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.rip(.qword, 0x10) } });
+ try expectEqualHexStrings("\x48\x8D\x05\x10\x00\x00\x00", enc.code(), "lea rax, QWORD PTR [rip + 0x10]");
+
+ try enc.encode(.lea, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.rip(.dword, 0x10) } });
+ try expectEqualHexStrings("\x48\x8D\x05\x10\x00\x00\x00", enc.code(), "lea rax, DWORD PTR [rip + 0x10]");
+
+ try enc.encode(.lea, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.rip(.dword, 0x10) } });
+ try expectEqualHexStrings("\x8D\x05\x10\x00\x00\x00", enc.code(), "lea eax, DWORD PTR [rip + 0x10]");
+
+ try enc.encode(.lea, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.rip(.word, 0x10) } });
+ try expectEqualHexStrings("\x8D\x05\x10\x00\x00\x00", enc.code(), "lea eax, WORD PTR [rip + 0x10]");
+
+ try enc.encode(.lea, .{ .op1 = .{ .reg = .ax }, .op2 = .{ .mem = Memory.rip(.byte, 0x10) } });
+ try expectEqualHexStrings("\x66\x8D\x05\x10\x00\x00\x00", enc.code(), "lea ax, BYTE PTR [rip + 0x10]");
+
+ try enc.encode(.lea, .{ .op1 = .{ .reg = .rsi }, .op2 = .{ .mem = Memory.sib(.qword, .{
+ .base = .rbp,
+ .scale_index = .{
+ .scale = 1,
+ .index = .rcx,
+ },
+ .disp = 0,
+ }) } });
+ try expectEqualHexStrings("\x48\x8D\x74\x0D\x00", enc.code(), "lea rsi, QWORD PTR [rbp + rcx*1 + 0]");
+
+ try enc.encode(.add, .{ .op1 = .{ .reg = .r11 }, .op2 = .{ .mem = Memory.sib(.qword, .{
+ .base = .ds,
+ .disp = 0x10000000,
+ }) } });
+ try expectEqualHexStrings("\x4C\x03\x1C\x25\x00\x00\x00\x10", enc.code(), "add r11, QWORD PTR ds:0x10000000");
+
+ try enc.encode(.add, .{ .op1 = .{ .reg = .r12b }, .op2 = .{ .mem = Memory.sib(.byte, .{
+ .base = .ds,
+ .disp = 0x10000000,
+ }) } });
+ try expectEqualHexStrings("\x44\x02\x24\x25\x00\x00\x00\x10", enc.code(), "add r11b, BYTE PTR ds:0x10000000");
+
+ try enc.encode(.add, .{ .op1 = .{ .reg = .r12b }, .op2 = .{ .mem = Memory.sib(.byte, .{
+ .base = .fs,
+ .disp = 0x10000000,
+ }) } });
+ try expectEqualHexStrings("\x64\x44\x02\x24\x25\x00\x00\x00\x10", enc.code(), "add r11b, BYTE PTR fs:0x10000000");
+
+ try enc.encode(.sub, .{ .op1 = .{ .reg = .r11 }, .op2 = .{ .mem = Memory.sib(.qword, .{
+ .base = .r13,
+ .disp = 0x10000000,
+ }) } });
+ try expectEqualHexStrings("\x4D\x2B\x9D\x00\x00\x00\x10", enc.code(), "sub r11, QWORD PTR [r13 + 0x10000000]");
+
+ try enc.encode(.sub, .{ .op1 = .{ .reg = .r11 }, .op2 = .{ .mem = Memory.sib(.qword, .{
+ .base = .r12,
+ .disp = 0x10000000,
+ }) } });
+ try expectEqualHexStrings("\x4D\x2B\x9C\x24\x00\x00\x00\x10", enc.code(), "sub r11, QWORD PTR [r12 + 0x10000000]");
+
+ try enc.encode(.imul, .{ .op1 = .{ .reg = .r11 }, .op2 = .{ .reg = .r12 } });
+ try expectEqualHexStrings("\x4D\x0F\xAF\xDC", enc.code(), "mov r11, r12");
+}
+
+test "lower RMI encoding" {
+ var enc = TestEncode{};
+
+ try enc.encode(.imul, .{
+ .op1 = .{ .reg = .r11 },
+ .op2 = .{ .reg = .r12 },
+ .op3 = .{ .imm = Immediate.s(-2) },
+ });
+ try expectEqualHexStrings("\x4D\x6B\xDC\xFE", enc.code(), "imul r11, r12, -2");
+
+ try enc.encode(.imul, .{
+ .op1 = .{ .reg = .r11 },
+ .op2 = .{ .mem = Memory.rip(.qword, -16) },
+ .op3 = .{ .imm = Immediate.s(-1024) },
+ });
+ try expectEqualHexStrings(
+ "\x4C\x69\x1D\xF0\xFF\xFF\xFF\x00\xFC\xFF\xFF",
+ enc.code(),
+ "imul r11, QWORD PTR [rip - 16], -1024",
+ );
+
+ try enc.encode(.imul, .{
+ .op1 = .{ .reg = .bx },
+ .op2 = .{ .mem = Memory.sib(.word, .{
+ .base = .rbp,
+ .disp = -16,
+ }) },
+ .op3 = .{ .imm = Immediate.s(-1024) },
+ });
+ try expectEqualHexStrings(
+ "\x66\x69\x5D\xF0\x00\xFC",
+ enc.code(),
+ "imul bx, WORD PTR [rbp - 16], -1024",
+ );
+
+ try enc.encode(.imul, .{
+ .op1 = .{ .reg = .bx },
+ .op2 = .{ .mem = Memory.sib(.word, .{
+ .base = .rbp,
+ .disp = -16,
+ }) },
+ .op3 = .{ .imm = Immediate.u(1024) },
+ });
+ try expectEqualHexStrings(
+ "\x66\x69\x5D\xF0\x00\x04",
+ enc.code(),
+ "imul bx, WORD PTR [rbp - 16], 1024",
+ );
+}
+
+test "lower MR encoding" {
+ var enc = TestEncode{};
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .reg = .rbx } });
+ try expectEqualHexStrings("\x48\x89\xD8", enc.code(), "mov rax, rbx");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = .rbp,
+ .disp = -4,
+ }) }, .op2 = .{ .reg = .r11 } });
+ try expectEqualHexStrings("\x4c\x89\x5d\xfc", enc.code(), "mov QWORD PTR [rbp - 4], r11");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.rip(.qword, 0x10) }, .op2 = .{ .reg = .r12 } });
+ try expectEqualHexStrings("\x4C\x89\x25\x10\x00\x00\x00", enc.code(), "mov QWORD PTR [rip + 0x10], r12");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = .r11,
+ .scale_index = .{
+ .scale = 2,
+ .index = .r12,
+ },
+ .disp = 0x10,
+ }) }, .op2 = .{ .reg = .r13 } });
+ try expectEqualHexStrings("\x4F\x89\x6C\x63\x10", enc.code(), "mov QWORD PTR [r11 + 2 * r12 + 0x10], r13");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.rip(.word, -0x10) }, .op2 = .{ .reg = .r12w } });
+ try expectEqualHexStrings("\x66\x44\x89\x25\xF0\xFF\xFF\xFF", enc.code(), "mov WORD PTR [rip - 0x10], r12w");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.sib(.byte, .{
+ .base = .r11,
+ .scale_index = .{
+ .scale = 2,
+ .index = .r12,
+ },
+ .disp = 0x10,
+ }) }, .op2 = .{ .reg = .r13b } });
+ try expectEqualHexStrings("\x47\x88\x6C\x63\x10", enc.code(), "mov BYTE PTR [r11 + 2 * r12 + 0x10], r13b");
+
+ try enc.encode(.add, .{ .op1 = .{ .mem = Memory.sib(.byte, .{
+ .base = .ds,
+ .disp = 0x10000000,
+ }) }, .op2 = .{ .reg = .r12b } });
+ try expectEqualHexStrings("\x44\x00\x24\x25\x00\x00\x00\x10", enc.code(), "add BYTE PTR ds:0x10000000, r12b");
+
+ try enc.encode(.add, .{ .op1 = .{ .mem = Memory.sib(.dword, .{
+ .base = .ds,
+ .disp = 0x10000000,
+ }) }, .op2 = .{ .reg = .r12d } });
+ try expectEqualHexStrings("\x44\x01\x24\x25\x00\x00\x00\x10", enc.code(), "add DWORD PTR [ds:0x10000000], r12d");
+
+ try enc.encode(.add, .{ .op1 = .{ .mem = Memory.sib(.dword, .{
+ .base = .gs,
+ .disp = 0x10000000,
+ }) }, .op2 = .{ .reg = .r12d } });
+ try expectEqualHexStrings("\x65\x44\x01\x24\x25\x00\x00\x00\x10", enc.code(), "add DWORD PTR [gs:0x10000000], r12d");
+
+ try enc.encode(.sub, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = .r11,
+ .disp = 0x10000000,
+ }) }, .op2 = .{ .reg = .r12 } });
+ try expectEqualHexStrings("\x4D\x29\xA3\x00\x00\x00\x10", enc.code(), "sub QWORD PTR [r11 + 0x10000000], r12");
+}
+
+test "lower M encoding" {
+ var enc = TestEncode{};
+
+ try enc.encode(.call, .{ .op1 = .{ .reg = .r12 } });
+ try expectEqualHexStrings("\x41\xFF\xD4", enc.code(), "call r12");
+
+ try enc.encode(.call, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = .r12,
+ .disp = 0,
+ }) } });
+ try expectEqualHexStrings("\x41\xFF\x14\x24", enc.code(), "call QWORD PTR [r12]");
+
+ try enc.encode(.call, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = null,
+ .scale_index = .{
+ .index = .r11,
+ .scale = 2,
+ },
+ .disp = 0,
+ }) } });
+ try expectEqualHexStrings("\x42\xFF\x14\x5D\x00\x00\x00\x00", enc.code(), "call QWORD PTR [r11 * 2]");
+
+ try enc.encode(.call, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = null,
+ .scale_index = .{
+ .index = .r12,
+ .scale = 2,
+ },
+ .disp = 0,
+ }) } });
+ try expectEqualHexStrings("\x42\xFF\x14\x65\x00\x00\x00\x00", enc.code(), "call QWORD PTR [r12 * 2]");
+
+ try enc.encode(.call, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = .gs,
+ .disp = 0,
+ }) } });
+ try expectEqualHexStrings("\x65\xFF\x14\x25\x00\x00\x00\x00", enc.code(), "call gs:0x0");
+
+ try enc.encode(.call, .{ .op1 = .{ .imm = Immediate.s(0) } });
+ try expectEqualHexStrings("\xE8\x00\x00\x00\x00", enc.code(), "call 0x0");
+
+ try enc.encode(.push, .{ .op1 = .{ .mem = Memory.sib(.qword, .{
+ .base = .rbp,
+ .disp = 0,
+ }) } });
+ try expectEqualHexStrings("\xFF\x75\x00", enc.code(), "push QWORD PTR [rbp]");
+
+ try enc.encode(.push, .{ .op1 = .{ .mem = Memory.sib(.word, .{
+ .base = .rbp,
+ .disp = 0,
+ }) } });
+ try expectEqualHexStrings("\x66\xFF\x75\x00", enc.code(), "push QWORD PTR [rbp]");
+
+ try enc.encode(.pop, .{ .op1 = .{ .mem = Memory.rip(.qword, 0) } });
+ try expectEqualHexStrings("\x8F\x05\x00\x00\x00\x00", enc.code(), "pop QWORD PTR [rip]");
+
+ try enc.encode(.pop, .{ .op1 = .{ .mem = Memory.rip(.word, 0) } });
+ try expectEqualHexStrings("\x66\x8F\x05\x00\x00\x00\x00", enc.code(), "pop WORD PTR [rbp]");
+
+ try enc.encode(.imul, .{ .op1 = .{ .reg = .rax } });
+ try expectEqualHexStrings("\x48\xF7\xE8", enc.code(), "imul rax");
+
+ try enc.encode(.imul, .{ .op1 = .{ .reg = .r12 } });
+ try expectEqualHexStrings("\x49\xF7\xEC", enc.code(), "imul r12");
+}
+
+test "lower O encoding" {
+ var enc = TestEncode{};
+
+ try enc.encode(.push, .{ .op1 = .{ .reg = .rax } });
+ try expectEqualHexStrings("\x50", enc.code(), "push rax");
+
+ try enc.encode(.push, .{ .op1 = .{ .reg = .r12w } });
+ try expectEqualHexStrings("\x66\x41\x54", enc.code(), "push r12w");
+
+ try enc.encode(.pop, .{ .op1 = .{ .reg = .r12 } });
+ try expectEqualHexStrings("\x41\x5c", enc.code(), "pop r12");
+}
+
+test "lower OI encoding" {
+ var enc = TestEncode{};
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .imm = Immediate.u(0x1000000000000000) } });
+ try expectEqualHexStrings(
+ "\x48\xB8\x00\x00\x00\x00\x00\x00\x00\x10",
+ enc.code(),
+ "movabs rax, 0x1000000000000000",
+ );
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .r11 }, .op2 = .{ .imm = Immediate.u(0x1000000000000000) } });
+ try expectEqualHexStrings(
+ "\x49\xBB\x00\x00\x00\x00\x00\x00\x00\x10",
+ enc.code(),
+ "movabs r11, 0x1000000000000000",
+ );
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .r11d }, .op2 = .{ .imm = Immediate.u(0x10000000) } });
+ try expectEqualHexStrings("\x41\xBB\x00\x00\x00\x10", enc.code(), "mov r11d, 0x10000000");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .r11w }, .op2 = .{ .imm = Immediate.u(0x1000) } });
+ try expectEqualHexStrings("\x66\x41\xBB\x00\x10", enc.code(), "mov r11w, 0x1000");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .r11b }, .op2 = .{ .imm = Immediate.u(0x10) } });
+ try expectEqualHexStrings("\x41\xB3\x10", enc.code(), "mov r11b, 0x10");
+}
+
+test "lower FD/TD encoding" {
+ var enc = TestEncode{};
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .mem = Memory.moffs(.cs, 0x10) } });
+ try expectEqualHexStrings("\x2E\x48\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs rax, cs:0x10");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .eax }, .op2 = .{ .mem = Memory.moffs(.fs, 0x10) } });
+ try expectEqualHexStrings("\x64\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs eax, fs:0x10");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .ax }, .op2 = .{ .mem = Memory.moffs(.gs, 0x10) } });
+ try expectEqualHexStrings("\x65\x66\xA1\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs ax, gs:0x10");
+
+ try enc.encode(.mov, .{ .op1 = .{ .reg = .al }, .op2 = .{ .mem = Memory.moffs(.ds, 0x10) } });
+ try expectEqualHexStrings("\xA0\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs al, ds:0x10");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.moffs(.cs, 0x10) }, .op2 = .{ .reg = .rax } });
+ try expectEqualHexStrings("\x2E\x48\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs cs:0x10, rax");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.moffs(.fs, 0x10) }, .op2 = .{ .reg = .eax } });
+ try expectEqualHexStrings("\x64\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs fs:0x10, eax");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.moffs(.gs, 0x10) }, .op2 = .{ .reg = .ax } });
+ try expectEqualHexStrings("\x65\x66\xA3\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs gs:0x10, ax");
+
+ try enc.encode(.mov, .{ .op1 = .{ .mem = Memory.moffs(.ds, 0x10) }, .op2 = .{ .reg = .al } });
+ try expectEqualHexStrings("\xA2\x10\x00\x00\x00\x00\x00\x00\x00", enc.code(), "movabs ds:0x10, al");
+}
+
+test "lower NP encoding" {
+ var enc = TestEncode{};
+
+ try enc.encode(.int3, .{});
+ try expectEqualHexStrings("\xCC", enc.code(), "int3");
+
+ try enc.encode(.nop, .{});
+ try expectEqualHexStrings("\x90", enc.code(), "nop");
+
+ try enc.encode(.ret, .{});
+ try expectEqualHexStrings("\xC3", enc.code(), "ret");
+
+ try enc.encode(.syscall, .{});
+ try expectEqualHexStrings("\x0f\x05", enc.code(), "syscall");
+}
+
+fn invalidInstruction(mnemonic: Instruction.Mnemonic, args: struct {
+ op1: Instruction.Operand = .none,
+ op2: Instruction.Operand = .none,
+ op3: Instruction.Operand = .none,
+ op4: Instruction.Operand = .none,
+}) !void {
+ const err = Instruction.new(mnemonic, .{
+ .op1 = args.op1,
+ .op2 = args.op2,
+ .op3 = args.op3,
+ .op4 = args.op4,
+ });
+ try testing.expectError(error.InvalidInstruction, err);
+}
+
+test "invalid instruction" {
+ try invalidInstruction(.call, .{ .op1 = .{ .reg = .eax } });
+ try invalidInstruction(.call, .{ .op1 = .{ .reg = .ax } });
+ try invalidInstruction(.call, .{ .op1 = .{ .reg = .al } });
+ try invalidInstruction(.call, .{ .op1 = .{ .mem = Memory.rip(.dword, 0) } });
+ try invalidInstruction(.call, .{ .op1 = .{ .mem = Memory.rip(.word, 0) } });
+ try invalidInstruction(.call, .{ .op1 = .{ .mem = Memory.rip(.byte, 0) } });
+ try invalidInstruction(.mov, .{ .op1 = .{ .mem = Memory.rip(.word, 0x10) }, .op2 = .{ .reg = .r12 } });
+ try invalidInstruction(.lea, .{ .op1 = .{ .reg = .rax }, .op2 = .{ .reg = .rbx } });
+ try invalidInstruction(.lea, .{ .op1 = .{ .reg = .al }, .op2 = .{ .mem = Memory.rip(.byte, 0) } });
+ try invalidInstruction(.pop, .{ .op1 = .{ .reg = .r12b } });
+ try invalidInstruction(.pop, .{ .op1 = .{ .reg = .r12d } });
+ try invalidInstruction(.push, .{ .op1 = .{ .reg = .r12b } });
+ try invalidInstruction(.push, .{ .op1 = .{ .reg = .r12d } });
+ try invalidInstruction(.push, .{ .op1 = .{ .imm = Immediate.u(0x1000000000000000) } });
+}
+
+fn cannotEncode(mnemonic: Instruction.Mnemonic, args: struct {
+ op1: Instruction.Operand = .none,
+ op2: Instruction.Operand = .none,
+ op3: Instruction.Operand = .none,
+ op4: Instruction.Operand = .none,
+}) !void {
+ try testing.expectError(error.CannotEncode, Instruction.new(mnemonic, .{
+ .op1 = args.op1,
+ .op2 = args.op2,
+ .op3 = args.op3,
+ .op4 = args.op4,
+ }));
+}
+
+test "cannot encode" {
+ try cannotEncode(.@"test", .{
+ .op1 = .{ .mem = Memory.sib(.byte, .{ .base = .r12, .disp = 0 }) },
+ .op2 = .{ .reg = .ah },
+ });
+ try cannotEncode(.@"test", .{
+ .op1 = .{ .reg = .r11b },
+ .op2 = .{ .reg = .bh },
+ });
+ try cannotEncode(.mov, .{
+ .op1 = .{ .reg = .sil },
+ .op2 = .{ .reg = .ah },
+ });
+}
+
+const Assembler = struct {
+ it: Tokenizer,
+
+ const Tokenizer = struct {
+ input: []const u8,
+ pos: usize = 0,
+
+ const Error = error{InvalidToken};
+
+ const Token = struct {
+ id: Id,
+ start: usize,
+ end: usize,
+
+ const Id = enum {
+ eof,
+
+ space,
+ new_line,
+
+ colon,
+ comma,
+ open_br,
+ close_br,
+ plus,
+ minus,
+ star,
+
+ string,
+ numeral,
+ };
+ };
+
+ const Iterator = struct {};
+
+ fn next(it: *Tokenizer) !Token {
+ var result = Token{
+ .id = .eof,
+ .start = it.pos,
+ .end = it.pos,
+ };
+
+ var state: enum {
+ start,
+ space,
+ new_line,
+ string,
+ numeral,
+ numeral_hex,
+ } = .start;
+
+ while (it.pos < it.input.len) : (it.pos += 1) {
+ const ch = it.input[it.pos];
+ switch (state) {
+ .start => switch (ch) {
+ ',' => {
+ result.id = .comma;
+ it.pos += 1;
+ break;
+ },
+ ':' => {
+ result.id = .colon;
+ it.pos += 1;
+ break;
+ },
+ '[' => {
+ result.id = .open_br;
+ it.pos += 1;
+ break;
+ },
+ ']' => {
+ result.id = .close_br;
+ it.pos += 1;
+ break;
+ },
+ '+' => {
+ result.id = .plus;
+ it.pos += 1;
+ break;
+ },
+ '-' => {
+ result.id = .minus;
+ it.pos += 1;
+ break;
+ },
+ '*' => {
+ result.id = .star;
+ it.pos += 1;
+ break;
+ },
+ ' ', '\t' => state = .space,
+ '\n', '\r' => state = .new_line,
+ 'a'...'z', 'A'...'Z' => state = .string,
+ '0'...'9' => state = .numeral,
+ else => return error.InvalidToken,
+ },
+
+ .space => switch (ch) {
+ ' ', '\t' => {},
+ else => {
+ result.id = .space;
+ break;
+ },
+ },
+
+ .new_line => switch (ch) {
+ '\n', '\r', ' ', '\t' => {},
+ else => {
+ result.id = .new_line;
+ break;
+ },
+ },
+
+ .string => switch (ch) {
+ 'a'...'z', 'A'...'Z', '0'...'9' => {},
+ else => {
+ result.id = .string;
+ break;
+ },
+ },
+
+ .numeral => switch (ch) {
+ 'x' => state = .numeral_hex,
+ '0'...'9' => {},
+ else => {
+ result.id = .numeral;
+ break;
+ },
+ },
+
+ .numeral_hex => switch (ch) {
+ 'a'...'f' => {},
+ '0'...'9' => {},
+ else => {
+ result.id = .numeral;
+ break;
+ },
+ },
+ }
+ }
+
+ if (it.pos >= it.input.len) {
+ switch (state) {
+ .string => result.id = .string,
+ .numeral, .numeral_hex => result.id = .numeral,
+ else => {},
+ }
+ }
+
+ result.end = it.pos;
+ return result;
+ }
+
+ fn seekTo(it: *Tokenizer, pos: usize) void {
+ it.pos = pos;
+ }
+ };
+
+ pub fn init(input: []const u8) Assembler {
+ return .{
+ .it = Tokenizer{ .input = input },
+ };
+ }
+
+ pub fn assemble(as: *Assembler, writer: anytype) !void {
+ while (try as.next()) |parsed_inst| {
+ const inst = try Instruction.new(parsed_inst.mnemonic, .{
+ .op1 = parsed_inst.ops[0],
+ .op2 = parsed_inst.ops[1],
+ .op3 = parsed_inst.ops[2],
+ .op4 = parsed_inst.ops[3],
+ });
+ try inst.encode(writer);
+ }
+ }
+
+ const ParseResult = struct {
+ mnemonic: Instruction.Mnemonic,
+ ops: [4]Instruction.Operand,
+ };
+
+ const ParseError = error{
+ UnexpectedToken,
+ InvalidMnemonic,
+ InvalidOperand,
+ InvalidRegister,
+ InvalidPtrSize,
+ InvalidMemoryOperand,
+ InvalidScaleIndex,
+ } || Tokenizer.Error || std.fmt.ParseIntError;
+
+ fn next(as: *Assembler) ParseError!?ParseResult {
+ try as.skip(2, .{ .space, .new_line });
+ const mnemonic_tok = as.expect(.string) catch |err| switch (err) {
+ error.UnexpectedToken => return if (try as.peek() == .eof) null else err,
+ else => return err,
+ };
+ const mnemonic = mnemonicFromString(as.source(mnemonic_tok)) orelse
+ return error.InvalidMnemonic;
+ try as.skip(1, .{.space});
+
+ const rules = .{
+ .{},
+ .{.register},
+ .{.memory},
+ .{.immediate},
+ .{ .register, .register },
+ .{ .register, .memory },
+ .{ .memory, .register },
+ .{ .register, .immediate },
+ .{ .memory, .immediate },
+ .{ .register, .register, .immediate },
+ .{ .register, .memory, .immediate },
+ };
+
+ const pos = as.it.pos;
+ inline for (rules) |rule| {
+ var ops = [4]Instruction.Operand{ .none, .none, .none, .none };
+ if (as.parseOperandRule(rule, &ops)) {
+ return .{
+ .mnemonic = mnemonic,
+ .ops = ops,
+ };
+ } else |_| {
+ as.it.seekTo(pos);
+ }
+ }
+
+ return error.InvalidOperand;
+ }
+
+ fn source(as: *Assembler, token: Tokenizer.Token) []const u8 {
+ return as.it.input[token.start..token.end];
+ }
+
+ fn peek(as: *Assembler) Tokenizer.Error!Tokenizer.Token.Id {
+ const pos = as.it.pos;
+ const next_tok = try as.it.next();
+ const id = next_tok.id;
+ as.it.seekTo(pos);
+ return id;
+ }
+
+ fn expect(as: *Assembler, id: Tokenizer.Token.Id) ParseError!Tokenizer.Token {
+ const next_tok_id = try as.peek();
+ if (next_tok_id == id) return as.it.next();
+ return error.UnexpectedToken;
+ }
+
+ fn skip(as: *Assembler, comptime num: comptime_int, tok_ids: [num]Tokenizer.Token.Id) Tokenizer.Error!void {
+ outer: while (true) {
+ const pos = as.it.pos;
+ const next_tok = try as.it.next();
+ inline for (tok_ids) |tok_id| {
+ if (next_tok.id == tok_id) continue :outer;
+ }
+ as.it.seekTo(pos);
+ break;
+ }
+ }
+
+ fn mnemonicFromString(bytes: []const u8) ?Instruction.Mnemonic {
+ const ti = @typeInfo(Instruction.Mnemonic).Enum;
+ inline for (ti.fields) |field| {
+ if (std.mem.eql(u8, bytes, field.name)) {
+ return @field(Instruction.Mnemonic, field.name);
+ }
+ }
+ return null;
+ }
+
+ fn parseOperandRule(as: *Assembler, rule: anytype, ops: *[4]Instruction.Operand) ParseError!void {
+ inline for (rule, 0..) |cond, i| {
+ comptime assert(i < 4);
+ if (i > 0) {
+ _ = try as.expect(.comma);
+ try as.skip(1, .{.space});
+ }
+ if (@typeInfo(@TypeOf(cond)) != .EnumLiteral) {
+ @compileError("invalid condition in the rule: " ++ @typeName(@TypeOf(cond)));
+ }
+ switch (cond) {
+ .register => {
+ const reg_tok = try as.expect(.string);
+ const reg = registerFromString(as.source(reg_tok)) orelse
+ return error.InvalidOperand;
+ ops[i] = .{ .reg = reg };
+ },
+ .memory => {
+ const mem = try as.parseMemory();
+ ops[i] = .{ .mem = mem };
+ },
+ .immediate => {
+ const is_neg = if (as.expect(.minus)) |_| true else |_| false;
+ const imm_tok = try as.expect(.numeral);
+ const imm: Immediate = if (is_neg) blk: {
+ const imm = try std.fmt.parseInt(i32, as.source(imm_tok), 0);
+ break :blk .{ .signed = imm * -1 };
+ } else .{ .unsigned = try std.fmt.parseInt(u64, as.source(imm_tok), 0) };
+ ops[i] = .{ .imm = imm };
+ },
+ else => @compileError("unhandled enum literal " ++ @tagName(cond)),
+ }
+ try as.skip(1, .{.space});
+ }
+
+ try as.skip(1, .{.space});
+ const tok = try as.it.next();
+ switch (tok.id) {
+ .new_line, .eof => {},
+ else => return error.InvalidOperand,
+ }
+ }
+
+ fn registerFromString(bytes: []const u8) ?Register {
+ const ti = @typeInfo(Register).Enum;
+ inline for (ti.fields) |field| {
+ if (std.mem.eql(u8, bytes, field.name)) {
+ return @field(Register, field.name);
+ }
+ }
+ return null;
+ }
+
+ fn parseMemory(as: *Assembler) ParseError!Memory {
+ const ptr_size: ?Memory.PtrSize = blk: {
+ const pos = as.it.pos;
+ const ptr_size = as.parsePtrSize() catch |err| switch (err) {
+ error.UnexpectedToken => {
+ as.it.seekTo(pos);
+ break :blk null;
+ },
+ else => return err,
+ };
+ break :blk ptr_size;
+ };
+
+ try as.skip(1, .{.space});
+
+ // Supported rules and orderings.
+ const rules = .{
+ .{ .open_br, .base, .close_br }, // [ base ]
+ .{ .open_br, .base, .plus, .disp, .close_br }, // [ base + disp ]
+ .{ .open_br, .base, .minus, .disp, .close_br }, // [ base - disp ]
+ .{ .open_br, .disp, .plus, .base, .close_br }, // [ disp + base ]
+ .{ .open_br, .base, .plus, .index, .close_br }, // [ base + index ]
+ .{ .open_br, .base, .plus, .index, .star, .scale, .close_br }, // [ base + index * scale ]
+ .{ .open_br, .index, .star, .scale, .plus, .base, .close_br }, // [ index * scale + base ]
+ .{ .open_br, .base, .plus, .index, .star, .scale, .plus, .disp, .close_br }, // [ base + index * scale + disp ]
+ .{ .open_br, .base, .plus, .index, .star, .scale, .minus, .disp, .close_br }, // [ base + index * scale - disp ]
+ .{ .open_br, .index, .star, .scale, .plus, .base, .plus, .disp, .close_br }, // [ index * scale + base + disp ]
+ .{ .open_br, .index, .star, .scale, .plus, .base, .minus, .disp, .close_br }, // [ index * scale + base - disp ]
+ .{ .open_br, .disp, .plus, .index, .star, .scale, .plus, .base, .close_br }, // [ disp + index * scale + base ]
+ .{ .open_br, .disp, .plus, .base, .plus, .index, .star, .scale, .close_br }, // [ disp + base + index * scale ]
+ .{ .open_br, .base, .plus, .disp, .plus, .index, .star, .scale, .close_br }, // [ base + disp + index * scale ]
+ .{ .open_br, .base, .minus, .disp, .plus, .index, .star, .scale, .close_br }, // [ base - disp + index * scale ]
+ .{ .open_br, .base, .plus, .disp, .plus, .scale, .star, .index, .close_br }, // [ base + disp + scale * index ]
+ .{ .open_br, .base, .minus, .disp, .plus, .scale, .star, .index, .close_br }, // [ base - disp + scale * index ]
+ .{ .open_br, .rip, .plus, .disp, .close_br }, // [ rip + disp ]
+ .{ .open_br, .rip, .minus, .disp, .close_br }, // [ rig - disp ]
+ .{ .base, .colon, .disp }, // seg:disp
+ };
+
+ const pos = as.it.pos;
+ inline for (rules) |rule| {
+ if (as.parseMemoryRule(rule)) |res| {
+ if (res.rip) {
+ if (res.base != null or res.scale_index != null or res.offset != null)
+ return error.InvalidMemoryOperand;
+ return Memory.rip(ptr_size orelse .qword, res.disp orelse 0);
+ }
+ if (res.base) |base| {
+ if (res.rip)
+ return error.InvalidMemoryOperand;
+ if (res.offset) |offset| {
+ if (res.scale_index != null or res.disp != null)
+ return error.InvalidMemoryOperand;
+ return Memory.moffs(base, offset);
+ }
+ return Memory.sib(ptr_size orelse .qword, .{
+ .base = base,
+ .scale_index = res.scale_index,
+ .disp = res.disp orelse 0,
+ });
+ }
+ return error.InvalidMemoryOperand;
+ } else |_| {
+ as.it.seekTo(pos);
+ }
+ }
+
+ return error.InvalidOperand;
+ }
+
+ const MemoryParseResult = struct {
+ rip: bool = false,
+ base: ?Register = null,
+ scale_index: ?Memory.ScaleIndex = null,
+ disp: ?i32 = null,
+ offset: ?u64 = null,
+ };
+
+ fn parseMemoryRule(as: *Assembler, rule: anytype) ParseError!MemoryParseResult {
+ var res: MemoryParseResult = .{};
+ inline for (rule, 0..) |cond, i| {
+ if (@typeInfo(@TypeOf(cond)) != .EnumLiteral) {
+ @compileError("unsupported condition type in the rule: " ++ @typeName(@TypeOf(cond)));
+ }
+ switch (cond) {
+ .open_br, .close_br, .plus, .minus, .star, .colon => {
+ _ = try as.expect(cond);
+ },
+ .base => {
+ const tok = try as.expect(.string);
+ res.base = registerFromString(as.source(tok)) orelse return error.InvalidMemoryOperand;
+ },
+ .rip => {
+ const tok = try as.expect(.string);
+ if (!std.mem.eql(u8, as.source(tok), "rip")) return error.InvalidMemoryOperand;
+ res.rip = true;
+ },
+ .index => {
+ const tok = try as.expect(.string);
+ const index = registerFromString(as.source(tok)) orelse
+ return error.InvalidMemoryOperand;
+ if (res.scale_index) |*si| {
+ si.index = index;
+ } else {
+ res.scale_index = .{ .scale = 1, .index = index };
+ }
+ },
+ .scale => {
+ const tok = try as.expect(.numeral);
+ const scale = try std.fmt.parseInt(u2, as.source(tok), 0);
+ if (res.scale_index) |*si| {
+ si.scale = scale;
+ } else {
+ res.scale_index = .{ .scale = scale, .index = undefined };
+ }
+ },
+ .disp => {
+ const tok = try as.expect(.numeral);
+ const is_neg = blk: {
+ if (i > 0) {
+ if (rule[i - 1] == .minus) break :blk true;
+ }
+ break :blk false;
+ };
+ if (std.fmt.parseInt(i32, as.source(tok), 0)) |disp| {
+ res.disp = if (is_neg) -1 * disp else disp;
+ } else |err| switch (err) {
+ error.Overflow => {
+ if (is_neg) return err;
+ if (res.base) |base| {
+ if (base.class() != .segment) return err;
+ }
+ const offset = try std.fmt.parseInt(u64, as.source(tok), 0);
+ res.offset = offset;
+ },
+ else => return err,
+ }
+ },
+ else => @compileError("unhandled operand output type: " ++ @tagName(cond)),
+ }
+ try as.skip(1, .{.space});
+ }
+ return res;
+ }
+
+ fn parsePtrSize(as: *Assembler) ParseError!Memory.PtrSize {
+ const size = try as.expect(.string);
+ try as.skip(1, .{.space});
+ const ptr = try as.expect(.string);
+
+ const size_raw = as.source(size);
+ const ptr_raw = as.source(ptr);
+ const len = size_raw.len + ptr_raw.len + 1;
+ var buf: ["qword ptr".len]u8 = undefined;
+ if (len > buf.len) return error.InvalidPtrSize;
+
+ for (size_raw, 0..) |c, i| {
+ buf[i] = std.ascii.toLower(c);
+ }
+ buf[size_raw.len] = ' ';
+ for (ptr_raw, 0..) |c, i| {
+ buf[size_raw.len + i + 1] = std.ascii.toLower(c);
+ }
+
+ const slice = buf[0..len];
+ if (std.mem.eql(u8, slice, "qword ptr")) return .qword;
+ if (std.mem.eql(u8, slice, "dword ptr")) return .dword;
+ if (std.mem.eql(u8, slice, "word ptr")) return .word;
+ if (std.mem.eql(u8, slice, "byte ptr")) return .byte;
+ if (std.mem.eql(u8, slice, "tbyte ptr")) return .tbyte;
+ return error.InvalidPtrSize;
+ }
+};
+
+test "assemble" {
+ const input =
+ \\int3
+ \\mov rax, rbx
+ \\mov qword ptr [rbp], rax
+ \\mov qword ptr [rbp - 16], rax
+ \\mov qword ptr [16 + rbp], rax
+ \\mov rax, 0x10
+ \\mov byte ptr [rbp - 0x10], 0x10
+ \\mov word ptr [rbp + r12], r11w
+ \\mov word ptr [rbp + r12 * 2], r11w
+ \\mov word ptr [rbp + r12 * 2 - 16], r11w
+ \\mov dword ptr [rip - 16], r12d
+ \\mov rax, fs:0x0
+ \\mov rax, gs:0x1000000000000000
+ \\movzx r12, al
+ \\imul r12, qword ptr [rbp - 16], 6
+ \\jmp 0x0
+ \\jc 0x0
+ \\jb 0x0
+ \\sal rax, 1
+ \\sal rax, 63
+ \\shl rax, 63
+ \\sar rax, 63
+ \\shr rax, 63
+ \\test byte ptr [rbp - 16], r12b
+ \\sal r12, cl
+ \\mul qword ptr [rip - 16]
+ \\div r12
+ \\idiv byte ptr [rbp - 16]
+ \\cwde
+ \\cbw
+ \\cdqe
+ \\test byte ptr [rbp], ah
+ \\test byte ptr [r12], spl
+ \\cdq
+ \\cwd
+ \\cqo
+ \\test bl, 0x1
+ \\mov rbx,0x8000000000000000
+ \\movss xmm0, dword ptr [rbp]
+ \\movss xmm0, xmm1
+ \\movss dword ptr [rbp - 16 + rax * 2], xmm7
+ \\movss dword ptr [rbp - 16 + rax * 2], xmm8
+ \\movss xmm15, xmm9
+ \\movsd xmm8, qword ptr [rbp - 16]
+ \\movsd qword ptr [rbp - 8], xmm0
+ \\movq xmm8, qword ptr [rbp - 16]
+ \\movq qword ptr [rbp - 16], xmm8
+ \\ucomisd xmm0, qword ptr [rbp - 16]
+ \\fisttp qword ptr [rbp - 16]
+ \\fisttp word ptr [rip + 32]
+ \\fisttp dword ptr [rax]
+ \\fld tbyte ptr [rbp]
+ \\fld dword ptr [rbp]
+ \\xor bl, 0xff
+ \\ud2
+ \\add rsp, -1
+ \\add rsp, 0xff
+ \\mov sil, byte ptr [rax + rcx * 1]
+ \\
+ ;
+
+ // zig fmt: off
+ const expected = &[_]u8{
+ 0xCC,
+ 0x48, 0x89, 0xD8,
+ 0x48, 0x89, 0x45, 0x00,
+ 0x48, 0x89, 0x45, 0xF0,
+ 0x48, 0x89, 0x45, 0x10,
+ 0x48, 0xC7, 0xC0, 0x10, 0x00, 0x00, 0x00,
+ 0xC6, 0x45, 0xF0, 0x10,
+ 0x66, 0x46, 0x89, 0x5C, 0x25, 0x00,
+ 0x66, 0x46, 0x89, 0x5C, 0x65, 0x00,
+ 0x66, 0x46, 0x89, 0x5C, 0x65, 0xF0,
+ 0x44, 0x89, 0x25, 0xF0, 0xFF, 0xFF, 0xFF,
+ 0x64, 0x48, 0x8B, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00,
+ 0x65, 0x48, 0xA1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+ 0x4C, 0x0F, 0xB6, 0xE0,
+ 0x4C, 0x6B, 0x65, 0xF0, 0x06,
+ 0xE9, 0x00, 0x00, 0x00, 0x00,
+ 0x0F, 0x82, 0x00, 0x00, 0x00, 0x00,
+ 0x0F, 0x82, 0x00, 0x00, 0x00, 0x00,
+ 0x48, 0xD1, 0xE0,
+ 0x48, 0xC1, 0xE0, 0x3F,
+ 0x48, 0xC1, 0xE0, 0x3F,
+ 0x48, 0xC1, 0xF8, 0x3F,
+ 0x48, 0xC1, 0xE8, 0x3F,
+ 0x44, 0x84, 0x65, 0xF0,
+ 0x49, 0xD3, 0xE4,
+ 0x48, 0xF7, 0x25, 0xF0, 0xFF, 0xFF, 0xFF,
+ 0x49, 0xF7, 0xF4,
+ 0xF6, 0x7D, 0xF0,
+ 0x98,
+ 0x66, 0x98,
+ 0x48, 0x98,
+ 0x84, 0x65, 0x00,
+ 0x41, 0x84, 0x24, 0x24,
+ 0x99,
+ 0x66, 0x99,
+ 0x48, 0x99,
+ 0xF6, 0xC3, 0x01,
+ 0x48, 0xBB, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
+ 0xF3, 0x0F, 0x10, 0x45, 0x00,
+ 0xF3, 0x0F, 0x10, 0xC1,
+ 0xF3, 0x0F, 0x11, 0x7C, 0x45, 0xF0,
+ 0xF3, 0x44, 0x0F, 0x11, 0x44, 0x45, 0xF0,
+ 0xF3, 0x45, 0x0F, 0x10, 0xF9,
+ 0xF2, 0x44, 0x0F, 0x10, 0x45, 0xF0,
+ 0xF2, 0x0F, 0x11, 0x45, 0xF8,
+ 0xF3, 0x44, 0x0F, 0x7E, 0x45, 0xF0,
+ 0x66, 0x44, 0x0F, 0xD6, 0x45, 0xF0,
+ 0x66, 0x0F, 0x2E, 0x45, 0xF0,
+ 0xDD, 0x4D, 0xF0,
+ 0xDF, 0x0D, 0x20, 0x00, 0x00, 0x00,
+ 0xDB, 0x08,
+ 0xDB, 0x6D, 0x00,
+ 0xD9, 0x45, 0x00,
+ 0x80, 0xF3, 0xFF,
+ 0x0F, 0x0B,
+ 0x48, 0x83, 0xC4, 0xFF,
+ 0x48, 0x81, 0xC4, 0xFF, 0x00, 0x00, 0x00,
+ 0x40, 0x8A, 0x34, 0x08,
+ };
+ // zig fmt: on
+
+ var as = Assembler.init(input);
+ var output = std.ArrayList(u8).init(testing.allocator);
+ defer output.deinit();
+ try as.assemble(output.writer());
+ try expectEqualHexStrings(expected, output.items, input);
+}
+
+test "assemble - Jcc" {
+ const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{
+ .{ .ja, 0x87 },
+ .{ .jae, 0x83 },
+ .{ .jb, 0x82 },
+ .{ .jbe, 0x86 },
+ .{ .jc, 0x82 },
+ .{ .je, 0x84 },
+ .{ .jg, 0x8f },
+ .{ .jge, 0x8d },
+ .{ .jl, 0x8c },
+ .{ .jle, 0x8e },
+ .{ .jna, 0x86 },
+ .{ .jnae, 0x82 },
+ .{ .jnb, 0x83 },
+ .{ .jnbe, 0x87 },
+ .{ .jnc, 0x83 },
+ .{ .jne, 0x85 },
+ .{ .jng, 0x8e },
+ .{ .jnge, 0x8c },
+ .{ .jnl, 0x8d },
+ .{ .jnle, 0x8f },
+ .{ .jno, 0x81 },
+ .{ .jnp, 0x8b },
+ .{ .jns, 0x89 },
+ .{ .jnz, 0x85 },
+ .{ .jo, 0x80 },
+ .{ .jp, 0x8a },
+ .{ .jpe, 0x8a },
+ .{ .jpo, 0x8b },
+ .{ .js, 0x88 },
+ .{ .jz, 0x84 },
+ };
+
+ inline for (&mnemonics) |mnemonic| {
+ const input = @tagName(mnemonic[0]) ++ " 0x0";
+ const expected = [_]u8{ 0x0f, mnemonic[1], 0x0, 0x0, 0x0, 0x0 };
+ var as = Assembler.init(input);
+ var output = std.ArrayList(u8).init(testing.allocator);
+ defer output.deinit();
+ try as.assemble(output.writer());
+ try expectEqualHexStrings(&expected, output.items, input);
+ }
+}
+
+test "assemble - SETcc" {
+ const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{
+ .{ .seta, 0x97 },
+ .{ .setae, 0x93 },
+ .{ .setb, 0x92 },
+ .{ .setbe, 0x96 },
+ .{ .setc, 0x92 },
+ .{ .sete, 0x94 },
+ .{ .setg, 0x9f },
+ .{ .setge, 0x9d },
+ .{ .setl, 0x9c },
+ .{ .setle, 0x9e },
+ .{ .setna, 0x96 },
+ .{ .setnae, 0x92 },
+ .{ .setnb, 0x93 },
+ .{ .setnbe, 0x97 },
+ .{ .setnc, 0x93 },
+ .{ .setne, 0x95 },
+ .{ .setng, 0x9e },
+ .{ .setnge, 0x9c },
+ .{ .setnl, 0x9d },
+ .{ .setnle, 0x9f },
+ .{ .setno, 0x91 },
+ .{ .setnp, 0x9b },
+ .{ .setns, 0x99 },
+ .{ .setnz, 0x95 },
+ .{ .seto, 0x90 },
+ .{ .setp, 0x9a },
+ .{ .setpe, 0x9a },
+ .{ .setpo, 0x9b },
+ .{ .sets, 0x98 },
+ .{ .setz, 0x94 },
+ };
+
+ inline for (&mnemonics) |mnemonic| {
+ const input = @tagName(mnemonic[0]) ++ " al";
+ const expected = [_]u8{ 0x0f, mnemonic[1], 0xC0 };
+ var as = Assembler.init(input);
+ var output = std.ArrayList(u8).init(testing.allocator);
+ defer output.deinit();
+ try as.assemble(output.writer());
+ try expectEqualHexStrings(&expected, output.items, input);
+ }
+}
+
+test "assemble - CMOVcc" {
+ const mnemonics = [_]struct { Instruction.Mnemonic, u8 }{
+ .{ .cmova, 0x47 },
+ .{ .cmovae, 0x43 },
+ .{ .cmovb, 0x42 },
+ .{ .cmovbe, 0x46 },
+ .{ .cmovc, 0x42 },
+ .{ .cmove, 0x44 },
+ .{ .cmovg, 0x4f },
+ .{ .cmovge, 0x4d },
+ .{ .cmovl, 0x4c },
+ .{ .cmovle, 0x4e },
+ .{ .cmovna, 0x46 },
+ .{ .cmovnae, 0x42 },
+ .{ .cmovnb, 0x43 },
+ .{ .cmovnbe, 0x47 },
+ .{ .cmovnc, 0x43 },
+ .{ .cmovne, 0x45 },
+ .{ .cmovng, 0x4e },
+ .{ .cmovnge, 0x4c },
+ .{ .cmovnl, 0x4d },
+ .{ .cmovnle, 0x4f },
+ .{ .cmovno, 0x41 },
+ .{ .cmovnp, 0x4b },
+ .{ .cmovns, 0x49 },
+ .{ .cmovnz, 0x45 },
+ .{ .cmovo, 0x40 },
+ .{ .cmovp, 0x4a },
+ .{ .cmovpe, 0x4a },
+ .{ .cmovpo, 0x4b },
+ .{ .cmovs, 0x48 },
+ .{ .cmovz, 0x44 },
+ };
+
+ inline for (&mnemonics) |mnemonic| {
+ const input = @tagName(mnemonic[0]) ++ " rax, rbx";
+ const expected = [_]u8{ 0x48, 0x0f, mnemonic[1], 0xC3 };
+ var as = Assembler.init(input);
+ var output = std.ArrayList(u8).init(testing.allocator);
+ defer output.deinit();
+ try as.assemble(output.writer());
+ try expectEqualHexStrings(&expected, output.items, input);
+ }
+}