diff options
56 files changed, 5494 insertions, 2978 deletions
diff --git a/doc/langref.html.in b/doc/langref.html.in index 69ad0624c4..057bb91f71 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -2983,7 +2983,7 @@ test "using slices for strings" { const world: []const u8 = "世界"; var all_together: [100]u8 = undefined; - // You can use slice syntax with at least one runtime-know index on an + // You can use slice syntax with at least one runtime-known index on an // array to convert an array into a slice. var start : usize = 0; const all_together_slice = all_together[start..]; diff --git a/lib/docs/main.js b/lib/docs/main.js index 9b650643e9..f9bb9b9fb8 100644 --- a/lib/docs/main.js +++ b/lib/docs/main.js @@ -1845,18 +1845,19 @@ const NAV_MODES = { let structObj = typeObj; let name = ""; if (opts.wantHtml) { - if (structObj.is_tuple) { - name = "<span class='tok-kw'>tuple</span> { "; - } else { - name = "<span class='tok-kw'>struct</span> { "; - } + name = "<span class='tok-kw'>struct</span>"; } else { - if (structObj.is_tuple) { - name = "tuple { "; + name = "struct"; + } + if (structObj.backing_int !== null) { + if (opts.wantHtml) { + name = "<span class='tok-kw'>packed</span> " + name; } else { - name = "struct { "; + name = "packed " + name; } + name += " (" + exprName(structObj.backing_int, opts) + ")"; } + name += " { "; if (structObj.field_types.length > 1 && opts.wantHtml) { name += "</br>"; } let indent = ""; if (structObj.field_types.length > 1 && opts.wantHtml) { @@ -1982,11 +1983,11 @@ const NAV_MODES = { name += " (" + exprName(unionObj.tag, opts) + ")"; } name += " { "; - if (unionObj.fields.length > 1 && opts.wantHtml) { + if (unionObj.field_types.length > 1 && opts.wantHtml) { name += "</br>"; } let indent = ""; - if (unionObj.fields.length > 1 && opts.wantHtml) { + if (unionObj.field_types.length > 1 && opts.wantHtml) { indent = " " } if (opts.indent) { @@ -1994,17 +1995,17 @@ const NAV_MODES = { } let unionNode = getAstNode(unionObj.src); let field_end = ","; - if (unionObj.fields.length > 1 && opts.wantHtml) { + if (unionObj.field_types.length > 1 && opts.wantHtml) { field_end += "</br>"; } else { field_end += " "; } - for (let i = 0; i < unionObj.fields.length; i += 1) { + for (let i = 0; i < unionObj.field_types.length; i += 1) { let fieldNode = getAstNode(unionNode.fields[i]); let fieldName = fieldNode.name; let html = indent + escapeHtml(fieldName); - let fieldTypeExpr = unionObj.fields[i]; + let fieldTypeExpr = unionObj.field_types[i]; html += ": "; html += exprName(fieldTypeExpr, { ...opts, indent: indent }); @@ -4494,9 +4495,10 @@ function addDeclToSearchResults(decl, declIndex, modNames, item, list, stack) { pubDecls: ty[4], field_types: ty[5], field_defaults: ty[6], - is_tuple: ty[7], - line_number: ty[8], - parent_container: ty[9], + backing_int: ty[7], + is_tuple: ty[8], + line_number: ty[9], + parent_container: ty[10], }; case 10: // ComptimeExpr case 11: // ComptimeFloat diff --git a/lib/std/Build/Step/ConfigHeader.zig b/lib/std/Build/Step/ConfigHeader.zig index a17784c96a..f6939e0e38 100644 --- a/lib/std/Build/Step/ConfigHeader.zig +++ b/lib/std/Build/Step/ConfigHeader.zig @@ -306,7 +306,9 @@ fn render_cmake( } var it = std.mem.tokenize(u8, line[1..], " \t\r"); const cmakedefine = it.next().?; - if (!std.mem.eql(u8, cmakedefine, "cmakedefine")) { + if (!std.mem.eql(u8, cmakedefine, "cmakedefine") and + !std.mem.eql(u8, cmakedefine, "cmakedefine01")) + { try output.appendSlice(line); try output.appendSlice("\n"); continue; diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index c506e23f90..ba2c084e24 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -822,7 +822,7 @@ fn runCommand( .zig_test => { const prefix: []const u8 = p: { if (result.stdio.test_metadata) |tm| { - if (tm.next_index <= tm.names.len) { + if (tm.next_index > 0 and tm.next_index <= tm.names.len) { const name = tm.testName(tm.next_index - 1); break :p b.fmt("while executing test '{s}', ", .{name}); } diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig index 7879eddc4b..e5ee8a432a 100644 --- a/lib/std/c/darwin.zig +++ b/lib/std/c/darwin.zig @@ -3916,3 +3916,89 @@ pub const THREAD_AFFINITY = struct { /// individual cpus (high performance cpus group and low consumption one), thus the pthread QOS api is more appropriate in this case. pub extern "c" fn thread_affinity_get(thread: thread_act_t, flavor: thread_policy_flavor_t, info: thread_policy_t, infocnt: [*]mach_msg_type_number_t, default: *boolean_t) kern_return_t; pub extern "c" fn thread_affinity_set(thread: thread_act_t, flavor: thread_policy_flavor_t, info: thread_policy_t, infocnt: mach_msg_type_number_t) kern_return_t; + +pub const cpu_type_t = integer_t; +pub const cpu_subtype_t = integer_t; +pub const cpu_threadtype_t = integer_t; +pub const host_flavor_t = integer_t; +pub const host_info_t = *integer_t; +pub const host_can_has_debugger_info = extern struct { + can_has_debugger: boolean_t, +}; +pub const host_can_has_debugger_info_data_t = host_can_has_debugger_info; +pub const host_can_has_debugger_info_t = *host_can_has_debugger_info; + +pub const host_sched_info = extern struct { + min_timeout: integer_t, + min_quantum: integer_t, +}; +pub const host_sched_info_data_t = host_sched_info; +pub const host_sched_info_t = *host_sched_info; + +pub const kernel_resource_sizes = extern struct { + task: natural_t, + thread: natural_t, + port: natural_t, + memory_region: natural_t, + memory_object: natural_t, +}; + +pub const kernel_resource_sizes_data_t = kernel_resource_sizes; +pub const kernel_resource_sizes_t = *kernel_resource_sizes; + +pub const host_priority_info = extern struct { + kernel_priority: integer_t, + system_priority: integer_t, + server_priority: integer_t, + user_priority: integer_t, + depress_priority: integer_t, + idle_priority: integer_t, + minimum_priority: integer_t, + maximum_priority: integer_t, +}; + +pub const host_priority_info_data_t = host_priority_info; +pub const host_priority_info_t = *host_priority_info; + +pub const CPU_STATE_MAX = 4; + +pub const host_cpu_load_info = extern struct { + cpu_ticks: [CPU_STATE_MAX]natural_t, +}; + +pub const host_cpu_load_info_data_t = host_cpu_load_info; +pub const host_cpu_load_info_t = *host_cpu_load_info; + +pub const HOST = struct { + pub const BASIC_INFO = 1; + pub const SCHED_INFO = 3; + pub const RESOURCE_SIZES = 4; + pub const PRIORITY_INFO = 5; + pub const SEMAPHORE_TRAPS = 7; + pub const MACH_MSG_TRAPS = 8; + pub const VM_PURGEABLE = 9; + pub const DEBUG_INFO_INTERNAL = 10; + pub const CAN_HAS_DEBUGGER = 11; + pub const PREFERRED_USER_ARCH = 12; + pub const CAN_HAS_DEBUGGER_COUNT = @intCast(mach_msg_type_number_t, @sizeOf(host_can_has_debugger_info_data_t) / @sizeOf(integer_t)); + pub const SCHED_INFO_COUNT = @intCast(mach_msg_type_number_t, @sizeOf(host_sched_info_data_t) / @sizeOf(integer_t)); + pub const RESOURCES_SIZES_COUNT = @intCast(mach_msg_type_number_t, @sizeOf(kernel_resource_sizes_data_t) / @sizeOf(integer_t)); + pub const PRIORITY_INFO_COUNT = @intCast(mach_msg_type_number_t, @sizeOf(host_priority_info_data_t) / @sizeOf(integer_t)); + pub const CPU_LOAD_INFO_COUNT = @intCast(mach_msg_type_number_t, @sizeOf(host_cpu_load_info_data_t) / @sizeOf(integer_t)); +}; + +pub const host_basic_info = packed struct(u32) { + max_cpus: integer_t, + avail_cpus: integer_t, + memory_size: natural_t, + cpu_type: cpu_type_t, + cpu_subtype: cpu_subtype_t, + cpu_threadtype: cpu_threadtype_t, + physical_cpu: integer_t, + physical_cpu_max: integer_t, + logical_cpu: integer_t, + logical_cpu_max: integer_t, + max_mem: u64, +}; + +pub extern "c" fn host_info(host: host_t, flavor: host_flavor_t, info_out: host_info_t, info_outCnt: [*]mach_msg_type_number_t) kern_return_t; diff --git a/lib/std/c/freebsd.zig b/lib/std/c/freebsd.zig index 352c12c695..3b93ccc0c0 100644 --- a/lib/std/c/freebsd.zig +++ b/lib/std/c/freebsd.zig @@ -10,7 +10,6 @@ pub const cpuset_t = extern struct { __bits: [(CPU_SETSIZE + (@bitSizeOf(c_long) - 1)) / @bitSizeOf(c_long)]c_long, }; -// TODO: can eventually serve for the domainset_t's type too. fn __BIT_COUNT(bits: []const c_long) c_long { var count: c_long = 0; for (bits) |b| { @@ -64,6 +63,9 @@ pub extern "c" fn getpid() pid_t; pub extern "c" fn kinfo_getfile(pid: pid_t, cntp: *c_int) ?[*]kinfo_file; pub extern "c" fn kinfo_getvmmap(pid: pid_t, cntp: *c_int) ?[*]kinfo_vmentry; +pub extern "c" fn kinfo_getproc(pid: pid_t) ?[*]kinfo_proc; +pub extern "c" fn kinfo_getvmobject(cntp: *c_int) ?[*]kinfo_vmobject; +pub extern "c" fn kinfo_getswapvmobject(cntp: *c_int) ?[*]kinfo_vmobject; pub extern "c" fn cpuset_getaffinity(level: cpulevel_t, which: cpuwhich_t, id: id_t, setsize: usize, mask: *cpuset_t) c_int; pub extern "c" fn cpuset_setaffinity(level: cpulevel_t, which: cpuwhich_t, id: id_t, setsize: usize, mask: *const cpuset_t) c_int; @@ -676,6 +678,214 @@ comptime { std.debug.assert(@sizeOf(kinfo_vmentry) == KINFO_VMENTRY_SIZE); } +pub const WMESGLEN = 8; +pub const LOCKNAMELEN = 8; +pub const TDNAMLEN = 16; +pub const COMMLEN = 19; +pub const MAXCOMLEN = 19; +pub const KI_EMULNAMELEN = 16; +pub const KI_NGROUPS = 16; +pub const LOGNAMELEN = 17; +pub const LOGINCLASSLEN = 17; + +pub const KI_NSPARE_INT = 2; +pub const KI_NSPARE_LONG = 12; +pub const KI_NSPARE_PTR = 5; + +pub const RUSAGE_SELF = 0; +pub const RUSAGE_CHILDREN = -1; +pub const RUSAGE_THREAD = 1; + +pub const proc = opaque {}; +pub const thread = opaque {}; +pub const vnode = opaque {}; +pub const filedesc = opaque {}; +pub const pwddesc = opaque {}; +pub const vmspace = opaque {}; +pub const pcb = opaque {}; +pub const lwpid_t = i32; +pub const fixpt_t = u32; +pub const vm_size_t = usize; +pub const segsz_t = isize; + +pub const itimerval = extern struct { + interval: timeval, + value: timeval, +}; + +pub const pstats = extern struct { + cru: rusage, + timer: [3]itimerval, + prof: extern struct { + base: u8, + size: c_ulong, + off: c_ulong, + scale: c_ulong, + }, + start: timeval, +}; + +pub const user = extern struct { + stats: pstats, + kproc: kinfo_proc, +}; + +pub const pargs = extern struct { + ref: c_uint, + length: c_uint, + args: [1]u8, +}; + +pub const priority = extern struct { + class: u8, + level: u8, + native: u8, + user: u8, +}; + +pub const rusage = extern struct { + utime: timeval, + stime: timeval, + maxrss: c_long, + ixrss: c_long, + idrss: c_long, + isrss: c_long, + minflt: c_long, + majflt: c_long, + nswap: c_long, + inblock: c_long, + oublock: c_long, + msgsnd: c_long, + msgrcv: c_long, + nsignals: c_long, + nvcsw: c_long, + nivcsw: c_long, +}; + +pub const kinfo_proc = extern struct { + structsize: c_int, + layout: c_int, + args: *pargs, + paddr: *proc, + addr: *user, + tracep: *vnode, + textvp: *vnode, + fd: *filedesc, + vmspace: *vmspace, + wchan: ?*const anyopaque, + pid: pid_t, + ppid: pid_t, + pgid: pid_t, + tpgid: pid_t, + sid: pid_t, + tsid: pid_t, + jobc: c_short, + spare_short1: c_short, + tdev_freebsd11: u32, + siglist: sigset_t, + sigmask: sigset_t, + sigignore: sigset_t, + sigcatch: sigset_t, + uid: uid_t, + ruid: uid_t, + svuid: uid_t, + rgid: gid_t, + svgid: gid_t, + ngroups: c_short, + spare_short2: c_short, + groups: [KI_NGROUPS]gid_t, + size: vm_size_t, + rssize: segsz_t, + swrss: segsz_t, + tsize: segsz_t, + dsize: segsz_t, + ssize: segsz_t, + xstat: c_ushort, + acflag: c_ushort, + pctcpu: fixpt_t, + estcpu: c_uint, + slptime: c_uint, + swtime: c_uint, + cow: c_uint, + runtime: u64, + start: timeval, + childtime: timeval, + flag: c_long, + kiflag: c_long, + traceflag: c_int, + stat: u8, + nice: i8, + lock: u8, + rqindex: u8, + oncpu_old: u8, + lastcpu_old: u8, + tdname: [TDNAMLEN + 1]u8, + wmesg: [WMESGLEN + 1]u8, + login: [LOGNAMELEN + 1]u8, + lockname: [LOCKNAMELEN + 1]u8, + comm: [COMMLEN + 1]u8, + emul: [KI_EMULNAMELEN + 1]u8, + loginclass: [LOGINCLASSLEN + 1]u8, + moretdname: [MAXCOMLEN - TDNAMLEN + 1]u8, + sparestrings: [46]u8, + spareints: [KI_NSPARE_INT]c_int, + tdev: u64, + oncpu: c_int, + lastcpu: c_int, + tracer: c_int, + flag2: c_int, + fibnum: c_int, + cr_flags: c_uint, + jid: c_int, + numthreads: c_int, + tid: lwpid_t, + pri: priority, + rusage: rusage, + rusage_ch: rusage, + pcb: *pcb, + stack: ?*anyopaque, + udata: ?*anyopaque, + tdaddr: *thread, + pd: *pwddesc, + spareptrs: [KI_NSPARE_PTR]?*anyopaque, + sparelongs: [KI_NSPARE_LONG]c_long, + sflag: c_long, + tdflag: c_long, +}; + +pub const KINFO_PROC_SIZE = switch (builtin.cpu.arch) { + .x86 => 768, + .arm => 816, + else => 1088, +}; + +comptime { + assert(@sizeOf(kinfo_proc) == KINFO_PROC_SIZE); +} + +pub const kinfo_vmobject = extern struct { + structsize: c_int, + tpe: c_int, + size: u64, + vn_fileid: u64, + vn_fsid_freebsd11: u32, + ref_count: c_int, + shadow_count: c_int, + memattr: c_int, + resident: u64, + active: u64, + inactive: u64, + type_spec: extern union { + _vn_fsid: u64, + _backing_obj: u64, + }, + me: u64, + _qspare: [6]u64, + swapped: u32, + _ispare: [7]u32, + path: [PATH_MAX]u8, +}; + pub const CTL = struct { pub const KERN = 1; pub const DEBUG = 5; @@ -2362,3 +2572,38 @@ pub extern "c" fn mincore( length: usize, vec: [*]u8, ) c_int; + +pub const MAXMEMDOM = 8; +pub const domainid_t = u8; + +pub const LIST_ENTRY = opaque {}; + +pub const DOMAINSET = struct { + pub const POLICY_INVALID = 0; + pub const POLICY_ROUNDROBIN = 1; + pub const POLICY_FIRSTOUCH = 2; + pub const POLICY_PREFER = 3; + pub const POLICY_INTERLEAVE = 4; + pub const POLICY_MAX = DOMAINSET.POLICY_INTERLEAVE; +}; + +pub const DOMAINSET_SIZE = 256; +pub const domainset_t = extern struct { + __bits: [(DOMAINSET_SIZE + (@sizeOf(domainset) - 1)) / @bitSizeOf(domainset)]domainset, +}; + +pub fn DOMAINSET_COUNT(set: domainset_t) c_int { + return @intCast(c_int, __BIT_COUNT(set.__bits[0..])); +} + +pub const domainset = extern struct { + link: LIST_ENTRY, + mask: domainset_t, + policy: u16, + prefer: domainid_t, + cnt: domainid_t, + order: [MAXMEMDOM]domainid_t, +}; + +pub extern "c" fn cpuset_getdomain(level: cpulevel_t, which: cpuwhich_t, id: id_t, len: usize, domain: *domainset_t, r: *c_int) c_int; +pub extern "c" fn cpuset_setdomain(level: cpulevel_t, which: cpuwhich_t, id: id_t, len: usize, domain: *const domainset_t, r: c_int) c_int; diff --git a/lib/std/c/haiku.zig b/lib/std/c/haiku.zig index 9b693a59c2..b0fcb710af 100644 --- a/lib/std/c/haiku.zig +++ b/lib/std/c/haiku.zig @@ -11,9 +11,9 @@ extern "c" fn _errnop() *c_int; pub const _errno = _errnop; -pub extern "c" fn find_directory(which: c_int, volume: i32, createIt: bool, path_ptr: [*]u8, length: i32) status_t; +pub extern "c" fn find_directory(which: directory_which, volume: i32, createIt: bool, path_ptr: [*]u8, length: i32) status_t; -pub extern "c" fn find_path(codePointer: *const u8, baseDirectory: c_int, subPath: [*:0]const u8, pathBuffer: [*:0]u8, bufferSize: usize) status_t; +pub extern "c" fn find_path(codePointer: *const u8, baseDirectory: path_base_directory, subPath: [*:0]const u8, pathBuffer: [*:0]u8, bufferSize: usize) status_t; pub extern "c" fn find_thread(thread_name: ?*anyopaque) i32; @@ -1024,6 +1024,13 @@ pub const directory_which = enum(c_int) { _, }; +// TODO fill out if needed +pub const path_base_directory = enum(c_int) { + B_FIND_PATH_IMAGE_PATH = 1000, +}; + +pub const B_APP_IMAGE_SYMBOL = null; + pub const cc_t = u8; pub const speed_t = u8; pub const tcflag_t = u32; diff --git a/lib/std/c/openbsd.zig b/lib/std/c/openbsd.zig index 51c4bcb6dd..3551f50020 100644 --- a/lib/std/c/openbsd.zig +++ b/lib/std/c/openbsd.zig @@ -1127,6 +1127,18 @@ pub usingnamespace switch (builtin.cpu.arch) { sc_cookie: c_long, }; }, + .arm64 => struct { + pub const ucontext_t = extern struct { + __sc_unused: c_int, + sc_mask: c_int, + sc_sp: c_ulong, + sc_lr: c_ulong, + sc_elr: c_ulong, + sc_spsr: c_ulong, + sc_x: [30]c_ulong, + sc_cookie: c_long, + }; + }, else => struct {}, }; diff --git a/lib/std/crypto/tls/Client.zig b/lib/std/crypto/tls/Client.zig index e67fd20925..cd505afa6e 100644 --- a/lib/std/crypto/tls/Client.zig +++ b/lib/std/crypto/tls/Client.zig @@ -1233,7 +1233,7 @@ fn finishRead2(c: *Client, first: []const u8, frag1: []const u8, out: usize) usi c.partial_cleartext_idx = 0; c.partial_ciphertext_idx = 0; c.partial_ciphertext_end = @intCast(@TypeOf(c.partial_ciphertext_end), first.len + frag1.len); - @memcpy(c.partially_read_buffer[0..first.len], first); + std.mem.copyForwards(u8, c.partially_read_buffer[0..first.len], first); @memcpy(c.partially_read_buffer[first.len..][0..frag1.len], frag1); } return out; diff --git a/lib/std/elf.zig b/lib/std/elf.zig index e2cad5640e..751f82a9ea 100644 --- a/lib/std/elf.zig +++ b/lib/std/elf.zig @@ -296,10 +296,14 @@ pub const SHT_GROUP = 17; pub const SHT_SYMTAB_SHNDX = 18; /// Start of OS-specific pub const SHT_LOOS = 0x60000000; +/// LLVM address-significance table +pub const SHT_LLVM_ADDRSIG = 0x6fff4c03; /// End of OS-specific pub const SHT_HIOS = 0x6fffffff; /// Start of processor-specific pub const SHT_LOPROC = 0x70000000; +/// Unwind information +pub const SHT_X86_64_UNWIND = 0x70000001; /// End of processor-specific pub const SHT_HIPROC = 0x7fffffff; /// Start of application-specific @@ -1632,6 +1636,9 @@ pub const SHF_TLS = 0x400; /// Identifies a section containing compressed data. pub const SHF_COMPRESSED = 0x800; +/// Not to be GCed by the linker +pub const SHF_GNU_RETAIN = 0x200000; + /// This section is excluded from the final executable or shared library. pub const SHF_EXCLUDE = 0x80000000; diff --git a/lib/std/enums.zig b/lib/std/enums.zig index 8e67c358b7..f44f8fd89d 100644 --- a/lib/std/enums.zig +++ b/lib/std/enums.zig @@ -48,6 +48,22 @@ pub fn values(comptime E: type) []const E { return comptime valuesFromFields(E, @typeInfo(E).Enum.fields); } +/// A safe alternative to @tagName() for non-exhaustive enums that doesn't +/// panic when `e` has no tagged value. +/// Returns the tag name for `e` or null if no tag exists. +pub fn tagName(comptime E: type, e: E) ?[]const u8 { + return inline for (@typeInfo(E).Enum.fields) |f| { + if (@enumToInt(e) == f.value) break f.name; + } else null; +} + +test tagName { + const E = enum(u8) { a, b, _ }; + try testing.expect(tagName(E, .a) != null); + try testing.expectEqualStrings("a", tagName(E, .a).?); + try testing.expect(tagName(E, @intToEnum(E, 42)) == null); +} + /// Determines the length of a direct-mapped enum array, indexed by /// @intCast(usize, @enumToInt(enum_value)). /// If the enum is non-exhaustive, the resulting length will only be enough diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig index cf791df1a6..c3ccd75d27 100644 --- a/lib/std/fmt.zig +++ b/lib/std/fmt.zig @@ -2244,8 +2244,8 @@ test "struct" { field: u8, }; const value = Struct{ .field = 42 }; - try expectFmt("struct: Struct{ .field = 42 }\n", "struct: {}\n", .{value}); - try expectFmt("struct: Struct{ .field = 42 }\n", "struct: {}\n", .{&value}); + try expectFmt("struct: fmt.test.struct.Struct{ .field = 42 }\n", "struct: {}\n", .{value}); + try expectFmt("struct: fmt.test.struct.Struct{ .field = 42 }\n", "struct: {}\n", .{&value}); } { const Struct = struct { @@ -2253,8 +2253,24 @@ test "struct" { b: u1, }; const value = Struct{ .a = 0, .b = 1 }; - try expectFmt("struct: Struct{ .a = 0, .b = 1 }\n", "struct: {}\n", .{value}); + try expectFmt("struct: fmt.test.struct.Struct{ .a = 0, .b = 1 }\n", "struct: {}\n", .{value}); } + + const S = struct { + a: u32, + b: anyerror, + }; + + const inst = S{ + .a = 456, + .b = error.Unused, + }; + + try expectFmt("fmt.test.struct.S{ .a = 456, .b = error.Unused }", "{}", .{inst}); + // Tuples + try expectFmt("{ }", "{}", .{.{}}); + try expectFmt("{ -1 }", "{}", .{.{-1}}); + try expectFmt("{ -1, 42, 2.5e+04 }", "{}", .{.{ -1, 42, 0.25e5 }}); } test "enum" { @@ -2263,13 +2279,26 @@ test "enum" { Two, }; const value = Enum.Two; - try expectFmt("enum: Enum.Two\n", "enum: {}\n", .{value}); - try expectFmt("enum: Enum.Two\n", "enum: {}\n", .{&value}); - try expectFmt("enum: Enum.One\n", "enum: {}\n", .{Enum.One}); - try expectFmt("enum: Enum.Two\n", "enum: {}\n", .{Enum.Two}); + try expectFmt("enum: fmt.test.enum.Enum.Two\n", "enum: {}\n", .{value}); + try expectFmt("enum: fmt.test.enum.Enum.Two\n", "enum: {}\n", .{&value}); + try expectFmt("enum: fmt.test.enum.Enum.One\n", "enum: {}\n", .{Enum.One}); + try expectFmt("enum: fmt.test.enum.Enum.Two\n", "enum: {}\n", .{Enum.Two}); // test very large enum to verify ct branch quota is large enough - try expectFmt("enum: os.windows.win32error.Win32Error.INVALID_FUNCTION\n", "enum: {}\n", .{std.os.windows.Win32Error.INVALID_FUNCTION}); + // TODO: https://github.com/ziglang/zig/issues/15609 + if (!((builtin.cpu.arch == .wasm32) and builtin.mode == .Debug)) { + try expectFmt("enum: os.windows.win32error.Win32Error.INVALID_FUNCTION\n", "enum: {}\n", .{std.os.windows.Win32Error.INVALID_FUNCTION}); + } + + const E = enum { + One, + Two, + Three, + }; + + const inst = E.Two; + + try expectFmt("fmt.test.enum.E.Two", "{}", .{inst}); } test "non-exhaustive enum" { @@ -2445,24 +2474,6 @@ test "custom" { try expectFmt("dim: 10.200x2.220\n", "dim: {d}\n", .{value}); } -test "struct" { - const S = struct { - a: u32, - b: anyerror, - }; - - const inst = S{ - .a = 456, - .b = error.Unused, - }; - - try expectFmt("fmt.test.struct.S{ .a = 456, .b = error.Unused }", "{}", .{inst}); - // Tuples - try expectFmt("{ }", "{}", .{.{}}); - try expectFmt("{ -1 }", "{}", .{.{-1}}); - try expectFmt("{ -1, 42, 2.5e+04 }", "{}", .{.{ -1, 42, 0.25e5 }}); -} - test "union" { const TU = union(enum) { float: f32, @@ -2493,18 +2504,6 @@ test "union" { try std.testing.expect(mem.eql(u8, eu_result[0..18], "fmt.test.union.EU@")); } -test "enum" { - const E = enum { - One, - Two, - Three, - }; - - const inst = E.Two; - - try expectFmt("fmt.test.enum.E.Two", "{}", .{inst}); -} - test "struct.self-referential" { const S = struct { const SelfType = @This(); diff --git a/lib/std/fmt/parse_float/parse_float.zig b/lib/std/fmt/parse_float/parse_float.zig index 6d77346e9b..08d1c55862 100644 --- a/lib/std/fmt/parse_float/parse_float.zig +++ b/lib/std/fmt/parse_float/parse_float.zig @@ -12,6 +12,14 @@ pub const ParseFloatError = error{ }; pub fn parseFloat(comptime T: type, s: []const u8) ParseFloatError!T { + if (@typeInfo(T) != .Float) { + @compileError("Cannot parse a float into a non-floating point type."); + } + + if (T == f80) { + @compileError("TODO support parsing float to f80"); + } + if (s.len == 0) { return error.InvalidCharacter; } diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 9f64387bd8..7327a3a913 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -2999,8 +2999,7 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 { .haiku => { // The only possible issue when looking for the self image path is // when the buffer is too short. - // TODO replace with proper constants - if (os.find_path(null, 1000, null, out_buffer.ptr, out_buffer.len) != 0) + if (os.find_path(os.B_APP_IMAGE_SYMBOL, os.path_base_directory.B_FIND_IMAGE_PATH, null, out_buffer.ptr, out_buffer.len) != 0) return error.Overflow; return mem.sliceTo(out_buffer, 0); }, diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index df3446a2c0..91f5682831 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -1741,6 +1741,22 @@ test "std.hash_map clone" { try expectEqual(b.get(1).?, 1); try expectEqual(b.get(2).?, 2); try expectEqual(b.get(3).?, 3); + + var original = AutoHashMap(i32, i32).init(std.testing.allocator); + defer original.deinit(); + + var i: u8 = 0; + while (i < 10) : (i += 1) { + try original.putNoClobber(i, i * 10); + } + + var copy = try original.clone(); + defer copy.deinit(); + + i = 0; + while (i < 10) : (i += 1) { + try testing.expect(copy.get(i).? == i * 10); + } } test "std.hash_map ensureTotalCapacity with existing elements" { @@ -2072,24 +2088,6 @@ test "std.hash_map basic hash map usage" { try testing.expect(map.remove(3) == true); } -test "std.hash_map clone" { - var original = AutoHashMap(i32, i32).init(std.testing.allocator); - defer original.deinit(); - - var i: u8 = 0; - while (i < 10) : (i += 1) { - try original.putNoClobber(i, i * 10); - } - - var copy = try original.clone(); - defer copy.deinit(); - - i = 0; - while (i < 10) : (i += 1) { - try testing.expect(copy.get(i).? == i * 10); - } -} - test "std.hash_map getOrPutAdapted" { const AdaptedContext = struct { fn eql(self: @This(), adapted_key: []const u8, test_key: u64) bool { diff --git a/lib/std/io/reader.zig b/lib/std/io/reader.zig index 65b7a086c5..0b88e6b31a 100644 --- a/lib/std/io/reader.zig +++ b/lib/std/io/reader.zig @@ -553,10 +553,18 @@ test "Reader.readUntilDelimiter returns StreamTooLong, then bytes read until the } test "Reader.readUntilDelimiter returns EndOfStream" { - var buf: [5]u8 = undefined; - var fis = std.io.fixedBufferStream(""); - const reader = fis.reader(); - try std.testing.expectError(error.EndOfStream, reader.readUntilDelimiter(&buf, '\n')); + { + var buf: [5]u8 = undefined; + var fis = std.io.fixedBufferStream(""); + const reader = fis.reader(); + try std.testing.expectError(error.EndOfStream, reader.readUntilDelimiter(&buf, '\n')); + } + { + var buf: [5]u8 = undefined; + var fis = std.io.fixedBufferStream("1234"); + const reader = fis.reader(); + try std.testing.expectError(error.EndOfStream, reader.readUntilDelimiter(&buf, '\n')); + } } test "Reader.readUntilDelimiter returns bytes read until delimiter, then EndOfStream" { @@ -567,13 +575,6 @@ test "Reader.readUntilDelimiter returns bytes read until delimiter, then EndOfSt try std.testing.expectError(error.EndOfStream, reader.readUntilDelimiter(&buf, '\n')); } -test "Reader.readUntilDelimiter returns EndOfStream" { - var buf: [5]u8 = undefined; - var fis = std.io.fixedBufferStream("1234"); - const reader = fis.reader(); - try std.testing.expectError(error.EndOfStream, reader.readUntilDelimiter(&buf, '\n')); -} - test "Reader.readUntilDelimiter returns StreamTooLong, then EndOfStream" { var buf: [5]u8 = undefined; var fis = std.io.fixedBufferStream("12345"); diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index b01d9b04ff..686a3fdbda 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -1519,7 +1519,7 @@ pub const Mutable = struct { r.positive = r_positive; } - if (xy_trailing != 0) { + if (xy_trailing != 0 and r.limbs[r.len - 1] != 0) { // Manually shift here since we know its limb aligned. mem.copyBackwards(Limb, r.limbs[xy_trailing..], r.limbs[0..r.len]); @memset(r.limbs[0..xy_trailing], 0); diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig index 0066ce9940..0514453cf4 100644 --- a/lib/std/math/big/int_test.zig +++ b/lib/std/math/big/int_test.zig @@ -1373,6 +1373,19 @@ test "big.int div trunc single-single -/-" { try testing.expect((try r.to(i32)) == er); } +test "big.int divTrunc #15535" { + var one = try Managed.initSet(testing.allocator, 1); + defer one.deinit(); + var x = try Managed.initSet(testing.allocator, std.math.pow(u128, 2, 64)); + defer x.deinit(); + var r = try Managed.init(testing.allocator); + defer r.deinit(); + var q = try Managed.init(testing.allocator); + defer q.deinit(); + try q.divTrunc(&r, &x, &x); + try testing.expect(r.order(one) == std.math.Order.lt); +} + test "big.int divFloor #10932" { var a = try Managed.init(testing.allocator); defer a.deinit(); @@ -2012,15 +2025,10 @@ test "big.int shift-right negative" { defer arg2.deinit(); try a.shiftRight(&arg2, 10); try testing.expect((try a.to(i32)) == -1); // -5 >> 10 == -1 -} -test "big.int shift-right negative" { - var a = try Managed.init(testing.allocator); - defer a.deinit(); - - var arg = try Managed.initSet(testing.allocator, -10); - defer arg.deinit(); - try a.shiftRight(&arg, 1232); + var arg3 = try Managed.initSet(testing.allocator, -10); + defer arg3.deinit(); + try a.shiftRight(&arg3, 1232); try testing.expect((try a.to(i32)) == -1); // -10 >> 1232 == -1 } @@ -2483,7 +2491,7 @@ test "big.int gcd non-one small" { try testing.expect((try r.to(u32)) == 1); } -test "big.int gcd non-one small" { +test "big.int gcd non-one medium" { var a = try Managed.initSet(testing.allocator, 4864); defer a.deinit(); var b = try Managed.initSet(testing.allocator, 3458); diff --git a/lib/std/math/big/rational.zig b/lib/std/math/big/rational.zig index c3609a6fa2..cdc33e351d 100644 --- a/lib/std/math/big/rational.zig +++ b/lib/std/math/big/rational.zig @@ -782,36 +782,38 @@ test "big.rational mul" { } test "big.rational div" { - var a = try Rational.init(testing.allocator); - defer a.deinit(); - var b = try Rational.init(testing.allocator); - defer b.deinit(); - var r = try Rational.init(testing.allocator); - defer r.deinit(); + { + var a = try Rational.init(testing.allocator); + defer a.deinit(); + var b = try Rational.init(testing.allocator); + defer b.deinit(); + var r = try Rational.init(testing.allocator); + defer r.deinit(); - try a.setRatio(78923, 23341); - try b.setRatio(123097, 12441414); - try a.div(a, b); + try a.setRatio(78923, 23341); + try b.setRatio(123097, 12441414); + try a.div(a, b); - try r.setRatio(75531824394, 221015929); - try testing.expect((try a.order(r)) == .eq); -} + try r.setRatio(75531824394, 221015929); + try testing.expect((try a.order(r)) == .eq); + } -test "big.rational div" { - var a = try Rational.init(testing.allocator); - defer a.deinit(); - var r = try Rational.init(testing.allocator); - defer r.deinit(); + { + var a = try Rational.init(testing.allocator); + defer a.deinit(); + var r = try Rational.init(testing.allocator); + defer r.deinit(); - try a.setRatio(78923, 23341); - a.invert(); + try a.setRatio(78923, 23341); + a.invert(); - try r.setRatio(23341, 78923); - try testing.expect((try a.order(r)) == .eq); + try r.setRatio(23341, 78923); + try testing.expect((try a.order(r)) == .eq); - try a.setRatio(-78923, 23341); - a.invert(); + try a.setRatio(-78923, 23341); + a.invert(); - try r.setRatio(-23341, 78923); - try testing.expect((try a.order(r)) == .eq); + try r.setRatio(-23341, 78923); + try testing.expect((try a.order(r)) == .eq); + } } diff --git a/lib/std/meta.zig b/lib/std/meta.zig index 7be3b71347..cd83061d53 100644 --- a/lib/std/meta.zig +++ b/lib/std/meta.zig @@ -14,48 +14,7 @@ test { _ = TrailerFlags; } -pub fn tagName(v: anytype) []const u8 { - const T = @TypeOf(v); - switch (@typeInfo(T)) { - .ErrorSet => return @errorName(v), - else => return @tagName(v), - } -} - -test "std.meta.tagName" { - const E1 = enum { - A, - B, - }; - const E2 = enum(u8) { - C = 33, - D, - }; - const U1 = union(enum) { - G: u8, - H: u16, - }; - const U2 = union(E2) { - C: u8, - D: u16, - }; - - var u1g = U1{ .G = 0 }; - var u1h = U1{ .H = 0 }; - var u2a = U2{ .C = 0 }; - var u2b = U2{ .D = 0 }; - - try testing.expect(mem.eql(u8, tagName(E1.A), "A")); - try testing.expect(mem.eql(u8, tagName(E1.B), "B")); - try testing.expect(mem.eql(u8, tagName(E2.C), "C")); - try testing.expect(mem.eql(u8, tagName(E2.D), "D")); - try testing.expect(mem.eql(u8, tagName(error.E), "E")); - try testing.expect(mem.eql(u8, tagName(error.F), "F")); - try testing.expect(mem.eql(u8, tagName(u1g), "G")); - try testing.expect(mem.eql(u8, tagName(u1h), "H")); - try testing.expect(mem.eql(u8, tagName(u2a), "C")); - try testing.expect(mem.eql(u8, tagName(u2b), "D")); -} +pub const tagName = @compileError("deprecated; use @tagName or @errorName directly"); /// Given an enum or tagged union, returns true if the comptime-supplied /// string matches the name of the tag value. This match process should diff --git a/lib/std/net/test.zig b/lib/std/net/test.zig index 9923e4932b..817d6c2593 100644 --- a/lib/std/net/test.zig +++ b/lib/std/net/test.zig @@ -182,7 +182,7 @@ test "listen on a port, send bytes, receive bytes" { try testing.expectEqualSlices(u8, "Hello world!", buf[0..n]); } -test "listen on a port, send bytes, receive bytes" { +test "listen on a port, send bytes, receive bytes, async-only" { if (!std.io.is_async) return error.SkipZigTest; if (builtin.os.tag != .linux and !builtin.os.tag.isDarwin()) { diff --git a/lib/std/priority_dequeue.zig b/lib/std/priority_dequeue.zig index db55be3804..05e3d7e58b 100644 --- a/lib/std/priority_dequeue.zig +++ b/lib/std/priority_dequeue.zig @@ -633,7 +633,7 @@ test "std.PriorityDequeue: peekMax" { try expect(queue.peekMax().? == 9); } -test "std.PriorityDequeue: sift up with odd indices" { +test "std.PriorityDequeue: sift up with odd indices, removeMin" { var queue = PDQ.init(testing.allocator, {}); defer queue.deinit(); const items = [_]u32{ 15, 7, 21, 14, 13, 22, 12, 6, 7, 25, 5, 24, 11, 16, 15, 24, 2, 1 }; @@ -647,7 +647,7 @@ test "std.PriorityDequeue: sift up with odd indices" { } } -test "std.PriorityDequeue: sift up with odd indices" { +test "std.PriorityDequeue: sift up with odd indices, removeMax" { var queue = PDQ.init(testing.allocator, {}); defer queue.deinit(); const items = [_]u32{ 15, 7, 21, 14, 13, 22, 12, 6, 7, 25, 5, 24, 11, 16, 15, 24, 2, 1 }; diff --git a/lib/std/target/x86.zig b/lib/std/target/x86.zig index c46367e755..bf3b8cb953 100644 --- a/lib/std/target/x86.zig +++ b/lib/std/target/x86.zig @@ -326,7 +326,7 @@ pub const all_features = blk: { }; result[@enumToInt(Feature.avx512ifma)] = .{ .llvm_name = "avx512ifma", - .description = "Enable AVX-512 Integer Fused Multiple-Add", + .description = "Enable AVX-512 Integer Fused Multiply-Add", .dependencies = featureSet(&[_]Feature{ .avx512f, }), @@ -599,14 +599,14 @@ pub const all_features = blk: { }; result[@enumToInt(Feature.fma)] = .{ .llvm_name = "fma", - .description = "Enable three-operand fused multiple-add", + .description = "Enable three-operand fused multiply-add", .dependencies = featureSet(&[_]Feature{ .avx, }), }; result[@enumToInt(Feature.fma4)] = .{ .llvm_name = "fma4", - .description = "Enable four-operand fused multiple-add", + .description = "Enable four-operand fused multiply-add", .dependencies = featureSet(&[_]Feature{ .avx, .sse4a, diff --git a/lib/std/zig/Ast.zig b/lib/std/zig/Ast.zig index cb86696e13..7bc78c17da 100644 --- a/lib/std/zig/Ast.zig +++ b/lib/std/zig/Ast.zig @@ -3511,3 +3511,7 @@ const Token = std.zig.Token; const Ast = @This(); const Allocator = std.mem.Allocator; const Parse = @import("Parse.zig"); + +test { + testing.refAllDecls(@This()); +} diff --git a/lib/std/zig/number_literal.zig b/lib/std/zig/number_literal.zig index 1b41908371..b021190ad9 100644 --- a/lib/std/zig/number_literal.zig +++ b/lib/std/zig/number_literal.zig @@ -44,8 +44,6 @@ pub const Error = union(enum) { duplicate_period, /// Float literal has multiple exponents. duplicate_exponent: usize, - /// Decimal float has hexadecimal exponent. - invalid_hex_exponent: usize, /// Exponent comes directly after '_' digit separator. exponent_after_underscore: usize, /// Special character (+-.) comes directly after exponent. @@ -103,7 +101,6 @@ pub fn parseNumberLiteral(bytes: []const u8) Result { }, 'e', 'E' => if (base == 10) { float = true; - if (base != 10 and base != 16) return .{ .failure = .{ .invalid_float_base = 2 } }; if (exponent) return .{ .failure = .{ .duplicate_exponent = i } }; if (underscore) return .{ .failure = .{ .exponent_after_underscore = i } }; special = c; @@ -112,10 +109,8 @@ pub fn parseNumberLiteral(bytes: []const u8) Result { }, 'p', 'P' => if (base == 16) { float = true; - if (base != 10 and base != 16) return .{ .failure = .{ .invalid_float_base = 2 } }; if (exponent) return .{ .failure = .{ .duplicate_exponent = i } }; if (underscore) return .{ .failure = .{ .exponent_after_underscore = i } }; - if (base != 16) return .{ .failure = .{ .invalid_hex_exponent = i } }; special = c; exponent = true; continue; @@ -123,7 +118,7 @@ pub fn parseNumberLiteral(bytes: []const u8) Result { '.' => { float = true; if (base != 10 and base != 16) return .{ .failure = .{ .invalid_float_base = 2 } }; - if (period) return .{ .failure = .{ .duplicate_exponent = i } }; + if (period) return .{ .failure = .duplicate_period }; period = true; if (underscore) return .{ .failure = .{ .special_after_underscore = i } }; special = c; @@ -131,7 +126,8 @@ pub fn parseNumberLiteral(bytes: []const u8) Result { }, '+', '-' => { switch (special) { - 'p', 'P', 'e', 'E' => {}, + 'p', 'P' => {}, + 'e', 'E' => if (base != 10) return .{ .failure = .{ .invalid_exponent_sign = i } }, else => return .{ .failure = .{ .invalid_exponent_sign = i } }, } special = c; diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 9176e14480..21785278ec 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -1240,7 +1240,7 @@ test "zig fmt: infix operator and then multiline string literal" { ); } -test "zig fmt: infix operator and then multiline string literal" { +test "zig fmt: infix operator and then multiline string literal over multiple lines" { try testCanonical( \\const x = "" ++ \\ \\ hi0 @@ -4310,7 +4310,7 @@ test "zig fmt: comptime before comptime field" { }); } -test "zig fmt: invalid else branch statement" { +test "zig fmt: invalid doc comments on comptime and test blocks" { try testError( \\/// This is a doc comment for a comptime block. \\comptime {} @@ -5191,7 +5191,7 @@ test "zig fmt: preserve container doc comment in container without trailing comm ); } -test "zig fmt: make single-line if no trailing comma" { +test "zig fmt: make single-line if no trailing comma, fmt: off" { try testCanonical( \\// Test trailing comma syntax \\// zig fmt: off @@ -5270,7 +5270,7 @@ test "zig fmt: variable initialized with ==" { , &.{.wrong_equal_var_decl}); } -test "zig fmt: missing const/var before local variable" { +test "zig fmt: missing const/var before local variable in comptime block" { try testError( \\comptime { \\ z: u32; @@ -5732,6 +5732,62 @@ test "zig fmt: canonicalize symbols (asm)" { ); } +test "zig fmt: don't canonicalize _ in enums" { + try testTransform( + \\const A = enum { + \\ first, + \\ second, + \\ third, + \\ _, + \\}; + \\const B = enum { + \\ @"_", + \\ @"__", + \\ @"___", + \\ @"____", + \\}; + \\const C = struct { + \\ @"_": u8, + \\ @"__": u8, + \\ @"___": u8, + \\ @"____": u8, + \\}; + \\const D = union { + \\ @"_": u8, + \\ @"__": u8, + \\ @"___": u8, + \\ @"____": u8, + \\}; + \\ + , + \\const A = enum { + \\ first, + \\ second, + \\ third, + \\ _, + \\}; + \\const B = enum { + \\ @"_", + \\ __, + \\ ___, + \\ ____, + \\}; + \\const C = struct { + \\ _: u8, + \\ __: u8, + \\ ___: u8, + \\ ____: u8, + \\}; + \\const D = union { + \\ _: u8, + \\ __: u8, + \\ ___: u8, + \\ ____: u8, + \\}; + \\ + ); +} + test "zig fmt: error for missing sentinel value in sentinel slice" { try testError( \\const foo = foo[0..:]; diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig index 367d06f7c6..e1ccc8e0e8 100644 --- a/lib/std/zig/render.zig +++ b/lib/std/zig/render.zig @@ -40,27 +40,28 @@ pub fn renderTree(buffer: *std.ArrayList(u8), tree: Ast) Error!void { /// Render all members in the given slice, keeping empty lines where appropriate fn renderMembers(gpa: Allocator, ais: *Ais, tree: Ast, members: []const Ast.Node.Index) Error!void { if (members.len == 0) return; - var is_tuple = true; - for (members) |member| { - const container_field = tree.fullContainerField(member) orelse continue; - if (!container_field.ast.tuple_like) { - is_tuple = false; - break; - } - } - try renderMember(gpa, ais, tree, members[0], is_tuple, .newline); + const container: Container = for (members) |member| { + if (tree.fullContainerField(member)) |field| if (!field.ast.tuple_like) break .other; + } else .tuple; + try renderMember(gpa, ais, tree, container, members[0], .newline); for (members[1..]) |member| { try renderExtraNewline(ais, tree, member); - try renderMember(gpa, ais, tree, member, is_tuple, .newline); + try renderMember(gpa, ais, tree, container, member, .newline); } } +const Container = enum { + @"enum", + tuple, + other, +}; + fn renderMember( gpa: Allocator, ais: *Ais, tree: Ast, + container: Container, decl: Ast.Node.Index, - is_tuple: bool, space: Space, ) Error!void { const token_tags = tree.tokens.items(.tag); @@ -180,7 +181,7 @@ fn renderMember( .container_field_init, .container_field_align, .container_field, - => return renderContainerField(gpa, ais, tree, tree.fullContainerField(decl).?, is_tuple, space), + => return renderContainerField(gpa, ais, tree, container, tree.fullContainerField(decl).?, space), .@"comptime" => return renderExpression(gpa, ais, tree, decl, space), @@ -1279,19 +1280,23 @@ fn renderContainerField( gpa: Allocator, ais: *Ais, tree: Ast, + container: Container, field_param: Ast.full.ContainerField, - is_tuple: bool, space: Space, ) Error!void { var field = field_param; - if (!is_tuple) field.convertToNonTupleLike(tree.nodes); + if (container != .tuple) field.convertToNonTupleLike(tree.nodes); + const quote: QuoteBehavior = switch (container) { + .@"enum" => .eagerly_unquote_except_underscore, + .tuple, .other => .eagerly_unquote, + }; if (field.comptime_token) |t| { try renderToken(ais, tree, t, .space); // comptime } if (field.ast.type_expr == 0 and field.ast.value_expr == 0) { if (field.ast.align_expr != 0) { - try renderIdentifier(ais, tree, field.ast.main_token, .space, .eagerly_unquote); // name + try renderIdentifier(ais, tree, field.ast.main_token, .space, quote); // name const lparen_token = tree.firstToken(field.ast.align_expr) - 1; const align_kw = lparen_token - 1; const rparen_token = tree.lastToken(field.ast.align_expr) + 1; @@ -1300,11 +1305,11 @@ fn renderContainerField( try renderExpression(gpa, ais, tree, field.ast.align_expr, .none); // alignment return renderToken(ais, tree, rparen_token, .space); // ) } - return renderIdentifierComma(ais, tree, field.ast.main_token, space, .eagerly_unquote); // name + return renderIdentifierComma(ais, tree, field.ast.main_token, space, quote); // name } if (field.ast.type_expr != 0 and field.ast.value_expr == 0) { if (!field.ast.tuple_like) { - try renderIdentifier(ais, tree, field.ast.main_token, .none, .eagerly_unquote); // name + try renderIdentifier(ais, tree, field.ast.main_token, .none, quote); // name try renderToken(ais, tree, field.ast.main_token + 1, .space); // : } @@ -1321,7 +1326,7 @@ fn renderContainerField( } } if (field.ast.type_expr == 0 and field.ast.value_expr != 0) { - try renderIdentifier(ais, tree, field.ast.main_token, .space, .eagerly_unquote); // name + try renderIdentifier(ais, tree, field.ast.main_token, .space, quote); // name if (field.ast.align_expr != 0) { const lparen_token = tree.firstToken(field.ast.align_expr) - 1; const align_kw = lparen_token - 1; @@ -1335,7 +1340,7 @@ fn renderContainerField( return renderExpressionComma(gpa, ais, tree, field.ast.value_expr, space); // value } if (!field.ast.tuple_like) { - try renderIdentifier(ais, tree, field.ast.main_token, .none, .eagerly_unquote); // name + try renderIdentifier(ais, tree, field.ast.main_token, .none, quote); // name try renderToken(ais, tree, field.ast.main_token + 1, .space); // : } try renderExpression(gpa, ais, tree, field.ast.type_expr, .space); // type @@ -2054,13 +2059,12 @@ fn renderContainerDecl( try renderToken(ais, tree, layout_token, .space); } - var is_tuple = token_tags[container_decl.ast.main_token] == .keyword_struct; - if (is_tuple) for (container_decl.ast.members) |member| { - const container_field = tree.fullContainerField(member) orelse continue; - if (!container_field.ast.tuple_like) { - is_tuple = false; - break; - } + const container: Container = switch (token_tags[container_decl.ast.main_token]) { + .keyword_enum => .@"enum", + .keyword_struct => for (container_decl.ast.members) |member| { + if (tree.fullContainerField(member)) |field| if (!field.ast.tuple_like) break .other; + } else .tuple, + else => .other, }; var lbrace: Ast.TokenIndex = undefined; @@ -2129,7 +2133,7 @@ fn renderContainerDecl( // Print all the declarations on the same line. try renderToken(ais, tree, lbrace, .space); // lbrace for (container_decl.ast.members) |member| { - try renderMember(gpa, ais, tree, member, is_tuple, .space); + try renderMember(gpa, ais, tree, container, member, .space); } return renderToken(ais, tree, rbrace, space); // rbrace } @@ -2147,9 +2151,9 @@ fn renderContainerDecl( .container_field_init, .container_field_align, .container_field, - => try renderMember(gpa, ais, tree, member, is_tuple, .comma), + => try renderMember(gpa, ais, tree, container, member, .comma), - else => try renderMember(gpa, ais, tree, member, is_tuple, .newline), + else => try renderMember(gpa, ais, tree, container, member, .newline), } } ais.popIndent(); @@ -2565,6 +2569,7 @@ fn renderSpace(ais: *Ais, tree: Ast, token_index: Ast.TokenIndex, lexeme_len: us const QuoteBehavior = enum { preserve_when_shadowing, eagerly_unquote, + eagerly_unquote_except_underscore, }; fn renderIdentifier(ais: *Ais, tree: Ast, token_index: Ast.TokenIndex, space: Space, quote: QuoteBehavior) Error!void { @@ -2589,7 +2594,9 @@ fn renderIdentifier(ais: *Ais, tree: Ast, token_index: Ast.TokenIndex, space: Sp // Special case for _ which would incorrectly be rejected by isValidId below. if (contents.len == 1 and contents[0] == '_') switch (quote) { .eagerly_unquote => return renderQuotedIdentifier(ais, tree, token_index, space, true), - .preserve_when_shadowing => return renderQuotedIdentifier(ais, tree, token_index, space, false), + .eagerly_unquote_except_underscore, + .preserve_when_shadowing, + => return renderQuotedIdentifier(ais, tree, token_index, space, false), }; // Scan the entire name for characters that would (after un-escaping) be illegal in a symbol, @@ -2653,7 +2660,9 @@ fn renderIdentifier(ais: *Ais, tree: Ast, token_index: Ast.TokenIndex, space: Sp return renderQuotedIdentifier(ais, tree, token_index, space, false); } if (primitives.isPrimitive(buf[0..buf_i])) switch (quote) { - .eagerly_unquote => return renderQuotedIdentifier(ais, tree, token_index, space, true), + .eagerly_unquote, + .eagerly_unquote_except_underscore, + => return renderQuotedIdentifier(ais, tree, token_index, space, true), .preserve_when_shadowing => return renderQuotedIdentifier(ais, tree, token_index, space, false), }; } diff --git a/src/AstGen.zig b/src/AstGen.zig index 749e3d28c4..300aae8d77 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -7622,14 +7622,16 @@ fn failWithNumberError(astgen: *AstGen, err: std.zig.number_literal.Error, token .invalid_digit => |info| return astgen.failOff(token, @intCast(u32, info.i), "invalid digit '{c}' for {s} base", .{ bytes[info.i], @tagName(info.base) }), .invalid_digit_exponent => |i| return astgen.failOff(token, @intCast(u32, i), "invalid digit '{c}' in exponent", .{bytes[i]}), .duplicate_exponent => |i| return astgen.failOff(token, @intCast(u32, i), "duplicate exponent", .{}), - .invalid_hex_exponent => |i| return astgen.failOff(token, @intCast(u32, i), "hex exponent in decimal float", .{}), .exponent_after_underscore => |i| return astgen.failOff(token, @intCast(u32, i), "expected digit before exponent", .{}), .special_after_underscore => |i| return astgen.failOff(token, @intCast(u32, i), "expected digit before '{c}'", .{bytes[i]}), .trailing_special => |i| return astgen.failOff(token, @intCast(u32, i), "expected digit after '{c}'", .{bytes[i - 1]}), .trailing_underscore => |i| return astgen.failOff(token, @intCast(u32, i), "trailing digit separator", .{}), .duplicate_period => unreachable, // Validated by tokenizer .invalid_character => unreachable, // Validated by tokenizer - .invalid_exponent_sign => unreachable, // Validated by tokenizer + .invalid_exponent_sign => |i| { + assert(bytes.len >= 2 and bytes[0] == '0' and bytes[1] == 'x'); // Validated by tokenizer + return astgen.failOff(token, @intCast(u32, i), "sign '{c}' cannot follow digit '{c}' in hex base", .{ bytes[i], bytes[i - 1] }); + }, } } diff --git a/src/Autodoc.zig b/src/Autodoc.zig index 42813f7614..e9fb8ab630 100644 --- a/src/Autodoc.zig +++ b/src/Autodoc.zig @@ -604,6 +604,7 @@ const DocData = struct { pubDecls: []usize = &.{}, // index into decls field_types: []Expr = &.{}, // (use src->fields to find names) field_defaults: []?Expr = &.{}, // default values is specified + backing_int: ?Expr = null, // backing integer if specified is_tuple: bool, line_number: usize, parent_container: ?usize, // index into `types` @@ -996,6 +997,12 @@ fn walkInstruction( }; } + const maybe_tldoc_comment = try self.getTLDocComment(new_file.file); + try self.ast_nodes.append(self.arena, .{ + .name = path, + .docs = maybe_tldoc_comment, + }); + result.value_ptr.* = self.types.items.len; var new_scope = Scope{ @@ -2587,12 +2594,12 @@ fn walkInstruction( // We delay analysis because union tags can refer to // decls defined inside the union itself. - const tag_type_ref: Ref = if (small.has_tag_type) blk: { + const tag_type_ref: ?Ref = if (small.has_tag_type) blk: { const tag_type = file.zir.extra[extra_index]; extra_index += 1; const tag_ref = @intToEnum(Ref, tag_type); break :blk tag_ref; - } else .none; + } else null; const body_len = if (small.has_body_len) blk: { const body_len = file.zir.extra[extra_index]; @@ -2619,13 +2626,13 @@ fn walkInstruction( ); // Analyze the tag once all decls have been analyzed - const tag_type = try self.walkRef( + const tag_type = if (tag_type_ref) |tt_ref| (try self.walkRef( file, &scope, parent_src, - tag_type_ref, + tt_ref, false, - ); + )).expr else null; // Fields extra_index += body_len; @@ -2657,7 +2664,7 @@ fn walkInstruction( .privDecls = priv_decl_indexes.items, .pubDecls = decl_indexes.items, .fields = field_type_refs.items, - .tag = tag_type.expr, + .tag = tag_type, .auto_enum = small.auto_enum_tag, .parent_container = parent_scope.enclosing_type, }, @@ -2848,13 +2855,24 @@ fn walkInstruction( break :blk fields_len; } else 0; - // TODO: Expose explicit backing integer types in some way. + // We don't care about decls yet + if (small.has_decls_len) extra_index += 1; + + var backing_int: ?DocData.Expr = null; if (small.has_backing_int) { const backing_int_body_len = file.zir.extra[extra_index]; extra_index += 1; // backing_int_body_len if (backing_int_body_len == 0) { + const backing_int_ref = @intToEnum(Ref, file.zir.extra[extra_index]); + const backing_int_res = try self.walkRef(file, &scope, src_info, backing_int_ref, true); + backing_int = backing_int_res.expr; extra_index += 1; // backing_int_ref } else { + const backing_int_body = file.zir.extra[extra_index..][0..backing_int_body_len]; + const break_inst = backing_int_body[backing_int_body.len - 1]; + const operand = data[break_inst].@"break".operand; + const backing_int_res = try self.walkRef(file, &scope, src_info, operand, true); + backing_int = backing_int_res.expr; extra_index += backing_int_body_len; // backing_int_body_inst } } @@ -2897,6 +2915,7 @@ fn walkInstruction( .field_types = field_type_refs.items, .field_defaults = field_default_refs.items, .is_tuple = small.is_tuple, + .backing_int = backing_int, .line_number = self.ast_nodes.items[self_ast_node_index].line, .parent_container = parent_scope.enclosing_type, }, diff --git a/src/Module.zig b/src/Module.zig index 538c716bc3..6a33990463 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -5280,6 +5280,9 @@ fn scanDecl(iter: *ScanDeclIter, decl_sub_index: usize, flags: u4) Allocator.Err } }, }; + var must_free_decl_name = true; + defer if (must_free_decl_name) gpa.free(decl_name); + const is_exported = export_bit and decl_name_index != 0; if (kind == .@"usingnamespace") try namespace.usingnamespace_set.ensureUnusedCapacity(gpa, 1); @@ -5296,6 +5299,7 @@ fn scanDecl(iter: *ScanDeclIter, decl_sub_index: usize, flags: u4) Allocator.Err const new_decl = mod.declPtr(new_decl_index); new_decl.kind = kind; new_decl.name = decl_name; + must_free_decl_name = false; if (kind == .@"usingnamespace") { namespace.usingnamespace_set.putAssumeCapacity(new_decl_index, is_pub); } @@ -5339,9 +5343,29 @@ fn scanDecl(iter: *ScanDeclIter, decl_sub_index: usize, flags: u4) Allocator.Err new_decl.alive = true; // This Decl corresponds to an AST node and therefore always alive. return; } - gpa.free(decl_name); const decl_index = gop.key_ptr.*; const decl = mod.declPtr(decl_index); + if (kind == .@"test") { + const src_loc = SrcLoc{ + .file_scope = decl.getFileScope(), + .parent_decl_node = decl.src_node, + .lazy = .{ .token_offset = 1 }, + }; + const msg = try ErrorMsg.create( + gpa, + src_loc, + "found test declaration with duplicate name: {s}", + .{decl_name}, + ); + errdefer msg.destroy(gpa); + try mod.failed_decls.putNoClobber(gpa, decl_index, msg); + const other_src_loc = SrcLoc{ + .file_scope = namespace.file_scope, + .parent_decl_node = decl_node, + .lazy = .{ .token_offset = 1 }, + }; + try mod.errNoteNonLazy(other_src_loc, msg, "other test here", .{}); + } log.debug("scan existing {*} ({s}) of {*}", .{ decl, decl.name, namespace }); // Update the AST node of the decl; even if its contents are unchanged, it may // have been re-ordered. diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 97e672b71f..2dc1cc8ee4 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -205,16 +205,7 @@ pub const MCValue = union(enum) { fn isMemory(mcv: MCValue) bool { return switch (mcv) { - .memory, - .load_direct, - .lea_direct, - .load_got, - .lea_got, - .load_tlv, - .lea_tlv, - .load_frame, - .lea_frame, - => true, + .memory, .indirect, .load_frame => true, else => false, }; } @@ -937,7 +928,7 @@ fn formatWipMir( .target = data.self.target, .src_loc = data.self.src_loc, }; - for (lower.lowerMir(data.self.mir_instructions.get(data.inst)) catch |err| switch (err) { + for ((lower.lowerMir(data.inst) catch |err| switch (err) { error.LowerFail => { defer { lower.err_msg.?.deinit(data.self.gpa); @@ -955,7 +946,7 @@ fn formatWipMir( return; }, else => |e| return e, - }) |lower_inst| try writer.print(" | {}", .{lower_inst}); + }).insts) |lowered_inst| try writer.print(" | {}", .{lowered_inst}); } fn fmtWipMir(self: *Self, inst: Mir.Inst.Index) std.fmt.Formatter(formatWipMir) { return .{ .data = .{ .self = self, .inst = inst } }; @@ -982,14 +973,14 @@ fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { try self.mir_instructions.ensureUnusedCapacity(gpa, 1); const result_index = @intCast(Mir.Inst.Index, self.mir_instructions.len); self.mir_instructions.appendAssumeCapacity(inst); - switch (inst.tag) { - else => wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)}), - .dbg_line, - .dbg_prologue_end, - .dbg_epilogue_begin, - .dead, - => {}, - } + if (inst.tag != .pseudo or switch (inst.ops) { + else => true, + .pseudo_dbg_prologue_end_none, + .pseudo_dbg_line_line_column, + .pseudo_dbg_epilogue_begin_none, + .pseudo_dead_none, + => false, + }) wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)}); return result_index; } @@ -1012,131 +1003,248 @@ fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 { return result; } -fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void { +/// A `cc` of `.z_and_np` clobbers `reg2`! +fn asmCmovccRegisterRegister(self: *Self, reg1: Register, reg2: Register, cc: bits.Condition) !void { _ = try self.addInst(.{ - .tag = .setcc, - .ops = .r_cc, - .data = .{ .r_cc = .{ .r = reg, .cc = cc } }, + .tag = switch (cc) { + else => .cmov, + .z_and_np, .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => .rr, + .z_and_np => .pseudo_cmov_z_and_np_rr, + .nz_or_p => .pseudo_cmov_nz_or_p_rr, + }, + .data = .{ .rr = .{ + .fixes = switch (cc) { + else => Mir.Inst.Fixes.fromCondition(cc), + .z_and_np, .nz_or_p => ._, + }, + .r1 = reg1, + .r2 = reg2, + } }, }); } -fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void { +/// A `cc` of `.z_and_np` is not supported by this encoding! +fn asmCmovccRegisterMemory(self: *Self, reg: Register, m: Memory, cc: bits.Condition) !void { _ = try self.addInst(.{ - .tag = .setcc, - .ops = switch (m) { - .sib => .m_sib_cc, - .rip => .m_rip_cc, - else => unreachable, + .tag = switch (cc) { + else => .cmov, + .z_and_np => unreachable, + .nz_or_p => .pseudo, }, - .data = .{ .x_cc = .{ .cc = cc, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .ops = switch (cc) { + else => switch (m) { + .sib => .rm_sib, + .rip => .rm_rip, + else => unreachable, + }, + .z_and_np => unreachable, + .nz_or_p => switch (m) { + .sib => .pseudo_cmov_nz_or_p_rm_sib, + .rip => .pseudo_cmov_nz_or_p_rm_rip, + else => unreachable, + }, + }, + .data = .{ .rx = .{ + .fixes = switch (cc) { + else => Mir.Inst.Fixes.fromCondition(cc), + .z_and_np => unreachable, + .nz_or_p => ._, + }, + .r1 = reg, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } -fn asmCmovccRegisterRegister(self: *Self, reg1: Register, reg2: Register, cc: bits.Condition) !void { +fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void { _ = try self.addInst(.{ - .tag = .cmovcc, - .ops = .rr_cc, - .data = .{ .rr_cc = .{ .r1 = reg1, .r2 = reg2, .cc = cc } }, + .tag = switch (cc) { + else => .set, + .z_and_np, .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => .r, + .z_and_np => .pseudo_set_z_and_np_r, + .nz_or_p => .pseudo_set_nz_or_p_r, + }, + .data = switch (cc) { + else => .{ .r = .{ + .fixes = Mir.Inst.Fixes.fromCondition(cc), + .r1 = reg, + } }, + .z_and_np, .nz_or_p => .{ .r_scratch = .{ + .r1 = reg, + .scratch_reg = (try self.register_manager.allocReg(null, gp)).to8(), + } }, + }, }); } -fn asmCmovccRegisterMemory(self: *Self, reg: Register, m: Memory, cc: bits.Condition) !void { +fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void { + const payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }; _ = try self.addInst(.{ - .tag = .cmovcc, - .ops = switch (m) { - .sib => .rm_sib_cc, - .rip => .rm_rip_cc, - else => unreachable, + .tag = switch (cc) { + else => .set, + .z_and_np, .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => switch (m) { + .sib => .m_sib, + .rip => .m_rip, + else => unreachable, + }, + .z_and_np => switch (m) { + .sib => .pseudo_set_z_and_np_m_sib, + .rip => .pseudo_set_z_and_np_m_rip, + else => unreachable, + }, + .nz_or_p => switch (m) { + .sib => .pseudo_set_nz_or_p_m_sib, + .rip => .pseudo_set_nz_or_p_m_rip, + else => unreachable, + }, + }, + .data = switch (cc) { + else => .{ .x = .{ + .fixes = Mir.Inst.Fixes.fromCondition(cc), + .payload = payload, + } }, + .z_and_np, .nz_or_p => .{ .x_scratch = .{ + .scratch_reg = (try self.register_manager.allocReg(null, gp)).to8(), + .payload = payload, + } }, }, - .data = .{ .rx_cc = .{ .r = reg, .cc = cc, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, }); } fn asmJmpReloc(self: *Self, target: Mir.Inst.Index) !Mir.Inst.Index { return self.addInst(.{ - .tag = .jmp_reloc, - .ops = undefined, - .data = .{ .inst = target }, + .tag = .jmp, + .ops = .inst, + .data = .{ .inst = .{ + .inst = target, + } }, }); } fn asmJccReloc(self: *Self, target: Mir.Inst.Index, cc: bits.Condition) !Mir.Inst.Index { return self.addInst(.{ - .tag = .jcc, - .ops = .inst_cc, - .data = .{ .inst_cc = .{ .inst = target, .cc = cc } }, + .tag = switch (cc) { + else => .j, + .z_and_np, .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => .inst, + .z_and_np => .pseudo_j_z_and_np_inst, + .nz_or_p => .pseudo_j_nz_or_p_inst, + }, + .data = .{ .inst = .{ + .fixes = switch (cc) { + else => Mir.Inst.Fixes.fromCondition(cc), + .z_and_np, .nz_or_p => ._, + }, + .inst = target, + } }, }); } fn asmPlaceholder(self: *Self) !Mir.Inst.Index { return self.addInst(.{ - .tag = .dead, - .ops = undefined, + .tag = .pseudo, + .ops = .pseudo_dead_none, .data = undefined, }); } -fn asmOpOnly(self: *Self, tag: Mir.Inst.Tag) !void { +fn asmOpOnly(self: *Self, tag: Mir.Inst.FixedTag) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .none, + .data = .{ .none = .{ + .fixes = tag[0], + } }, + }); +} + +fn asmPseudo(self: *Self, ops: Mir.Inst.Ops) !void { + _ = try self.addInst(.{ + .tag = .pseudo, + .ops = ops, .data = undefined, }); } -fn asmRegister(self: *Self, tag: Mir.Inst.Tag, reg: Register) !void { +fn asmRegister(self: *Self, tag: Mir.Inst.FixedTag, reg: Register) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .r, - .data = .{ .r = reg }, + .data = .{ .r = .{ + .fixes = tag[0], + .r1 = reg, + } }, }); } -fn asmImmediate(self: *Self, tag: Mir.Inst.Tag, imm: Immediate) !void { +fn asmImmediate(self: *Self, tag: Mir.Inst.FixedTag, imm: Immediate) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (imm) { .signed => .i_s, .unsigned => .i_u, }, - .data = .{ .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), + .data = .{ .i = .{ + .fixes = tag[0], + .i = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + }, } }, }); } -fn asmRegisterRegister(self: *Self, tag: Mir.Inst.Tag, reg1: Register, reg2: Register) !void { +fn asmRegisterRegister(self: *Self, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .rr, - .data = .{ .rr = .{ .r1 = reg1, .r2 = reg2 } }, + .data = .{ .rr = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + } }, }); } -fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.Tag, reg: Register, imm: Immediate) !void { +fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, imm: Immediate) !void { const ops: Mir.Inst.Ops = switch (imm) { .signed => .ri_s, .unsigned => |u| if (math.cast(u32, u)) |_| .ri_u else .ri64, }; _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = ops, .data = switch (ops) { - .ri_s, .ri_u => .{ .ri = .{ .r = reg, .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), - } } }, + .ri_s, .ri_u => .{ .ri = .{ + .fixes = tag[0], + .r1 = reg, + .i = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + }, + } }, .ri64 => .{ .rx = .{ - .r = reg, + .fixes = tag[0], + .r1 = reg, .payload = try self.addExtra(Mir.Imm64.encode(imm.unsigned)), } }, else => unreachable, @@ -1146,111 +1254,214 @@ fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.Tag, reg: Register, imm: Imme fn asmRegisterRegisterRegister( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, reg3: Register, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .rrr, - .data = .{ .rrr = .{ .r1 = reg1, .r2 = reg2, .r3 = reg3 } }, + .data = .{ .rrr = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .r3 = reg3, + } }, + }); +} + +fn asmRegisterRegisterRegisterImmediate( + self: *Self, + tag: Mir.Inst.FixedTag, + reg1: Register, + reg2: Register, + reg3: Register, + imm: Immediate, +) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = .rrri, + .data = .{ .rrri = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .r3 = reg3, + .i = @intCast(u8, imm.unsigned), + } }, }); } fn asmRegisterRegisterImmediate( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, imm: Immediate, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (imm) { .signed => .rri_s, .unsigned => .rri_u, }, - .data = .{ .rri = .{ .r1 = reg1, .r2 = reg2, .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), - } } }, + .data = .{ .rri = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .i = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + }, + } }, + }); +} + +fn asmRegisterRegisterMemory( + self: *Self, + tag: Mir.Inst.FixedTag, + reg1: Register, + reg2: Register, + m: Memory, +) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = switch (m) { + .sib => .rrm_sib, + .rip => .rrm_rip, + else => unreachable, + }, + .data = .{ .rrx = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } -fn asmMemory(self: *Self, tag: Mir.Inst.Tag, m: Memory) !void { +fn asmMemory(self: *Self, tag: Mir.Inst.FixedTag, m: Memory) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .m_sib, .rip => .m_rip, else => unreachable, }, - .data = .{ .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, + .data = .{ .x = .{ + .fixes = tag[0], + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, } }, }); } -fn asmRegisterMemory(self: *Self, tag: Mir.Inst.Tag, reg: Register, m: Memory) !void { +fn asmRegisterMemory(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, m: Memory) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .rm_sib, .rip => .rm_rip, else => unreachable, }, - .data = .{ .rx = .{ .r = reg, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .rx = .{ + .fixes = tag[0], + .r1 = reg, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } fn asmRegisterMemoryImmediate( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, reg: Register, m: Memory, imm: Immediate, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .rmi_sib, .rip => .rmi_rip, else => unreachable, }, - .data = .{ .rix = .{ .r = reg, .i = @intCast(u8, imm.unsigned), .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + .data = .{ .rix = .{ + .fixes = tag[0], + .r1 = reg, + .i = @intCast(u8, imm.unsigned), + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, + }); +} + +fn asmRegisterRegisterMemoryImmediate( + self: *Self, + tag: Mir.Inst.FixedTag, + reg1: Register, + reg2: Register, + m: Memory, + imm: Immediate, +) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = switch (m) { + .sib => .rrmi_sib, + .rip => .rrmi_rip, else => unreachable, - } } }, + }, + .data = .{ .rrix = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .i = @intCast(u8, imm.unsigned), + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } -fn asmMemoryRegister(self: *Self, tag: Mir.Inst.Tag, m: Memory, reg: Register) !void { +fn asmMemoryRegister(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, reg: Register) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .mr_sib, .rip => .mr_rip, else => unreachable, }, - .data = .{ .rx = .{ .r = reg, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .rx = .{ + .fixes = tag[0], + .r1 = reg, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } -fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.Tag, m: Memory, imm: Immediate) !void { +fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => switch (imm) { .signed => .mi_sib_s, @@ -1262,67 +1473,81 @@ fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.Tag, m: Memory, imm: Immediate) }, else => unreachable, }, - .data = .{ .ix = .{ .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), - }, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .x = .{ + .fixes = tag[0], + .payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + } }), + } }, }); + _ = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }; } fn asmMemoryRegisterRegister( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, m: Memory, reg1: Register, reg2: Register, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .mrr_sib, .rip => .mrr_rip, else => unreachable, }, - .data = .{ .rrx = .{ .r1 = reg1, .r2 = reg2, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .rrx = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } fn asmMemoryRegisterImmediate( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, m: Memory, reg: Register, imm: Immediate, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .mri_sib, .rip => .mri_rip, else => unreachable, }, - .data = .{ .rix = .{ .r = reg, .i = @intCast(u8, imm.unsigned), .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .rix = .{ + .fixes = tag[0], + .r1 = reg, + .i = @intCast(u8, imm.unsigned), + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } fn gen(self: *Self) InnerError!void { const cc = self.fn_type.fnCallingConvention(); if (cc != .Naked) { - try self.asmRegister(.push, .rbp); + try self.asmRegister(.{ ._, .push }, .rbp); const backpatch_push_callee_preserved_regs = try self.asmPlaceholder(); - try self.asmRegisterRegister(.mov, .rbp, .rsp); + try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp); const backpatch_frame_align = try self.asmPlaceholder(); const backpatch_stack_alloc = try self.asmPlaceholder(); @@ -1346,7 +1571,7 @@ fn gen(self: *Self) InnerError!void { else => unreachable, } - try self.asmOpOnly(.dbg_prologue_end); + try self.asmPseudo(.pseudo_dbg_prologue_end_none); try self.genBody(self.air.getMainBody()); @@ -1358,15 +1583,15 @@ fn gen(self: *Self) InnerError!void { // } // Eliding the reloc will cause a miscompilation in this case. for (self.exitlude_jump_relocs.items) |jmp_reloc| { - self.mir_instructions.items(.data)[jmp_reloc].inst = + self.mir_instructions.items(.data)[jmp_reloc].inst.inst = @intCast(u32, self.mir_instructions.len); } - try self.asmOpOnly(.dbg_epilogue_begin); + try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); const backpatch_stack_dealloc = try self.asmPlaceholder(); const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder(); - try self.asmRegister(.pop, .rbp); - try self.asmOpOnly(.ret); + try self.asmRegister(.{ ._, .pop }, .rbp); + try self.asmOpOnly(.{ ._, .ret }); const frame_layout = try self.computeFrameLayout(); const need_frame_align = frame_layout.stack_mask != math.maxInt(u32); @@ -1376,46 +1601,54 @@ fn gen(self: *Self) InnerError!void { self.mir_instructions.set(backpatch_frame_align, .{ .tag = .@"and", .ops = .ri_s, - .data = .{ .ri = .{ .r = .rsp, .i = frame_layout.stack_mask } }, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = frame_layout.stack_mask, + } }, }); } if (need_stack_adjust) { self.mir_instructions.set(backpatch_stack_alloc, .{ .tag = .sub, .ops = .ri_s, - .data = .{ .ri = .{ .r = .rsp, .i = frame_layout.stack_adjust } }, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = frame_layout.stack_adjust, + } }, }); } if (need_frame_align or need_stack_adjust) { self.mir_instructions.set(backpatch_stack_dealloc, .{ .tag = .mov, .ops = .rr, - .data = .{ .rr = .{ .r1 = .rsp, .r2 = .rbp } }, + .data = .{ .rr = .{ + .r1 = .rsp, + .r2 = .rbp, + } }, }); } if (need_save_reg) { - const save_reg_list = frame_layout.save_reg_list.asInt(); self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{ - .tag = .push_regs, - .ops = undefined, - .data = .{ .payload = save_reg_list }, + .tag = .pseudo, + .ops = .pseudo_push_reg_list, + .data = .{ .reg_list = frame_layout.save_reg_list }, }); self.mir_instructions.set(backpatch_pop_callee_preserved_regs, .{ - .tag = .pop_regs, - .ops = undefined, - .data = .{ .payload = save_reg_list }, + .tag = .pseudo, + .ops = .pseudo_pop_reg_list, + .data = .{ .reg_list = frame_layout.save_reg_list }, }); } } else { - try self.asmOpOnly(.dbg_prologue_end); + try self.asmPseudo(.pseudo_dbg_prologue_end_none); try self.genBody(self.air.getMainBody()); - try self.asmOpOnly(.dbg_epilogue_begin); + try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); } // Drop them off at the rbrace. _ = try self.addInst(.{ - .tag = .dbg_line, - .ops = undefined, + .tag = .pseudo, + .ops = .pseudo_dbg_line_line_column, .data = .{ .line_column = .{ .line = self.end_di_line, .column = self.end_di_column, @@ -1480,12 +1713,12 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .log, .log2, .log10, - .floor, - .ceil, .round, - .trunc_float, => try self.airUnaryMath(inst), + .floor => try self.airRound(inst, 0b1_0_01), + .ceil => try self.airRound(inst, 0b1_0_10), + .trunc_float => try self.airRound(inst, 0b1_0_11), .sqrt => try self.airSqrt(inst), .neg, .fabs => try self.airFloatSign(inst), @@ -1731,7 +1964,7 @@ fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void { }; const tag_val = Value.initPayload(&tag_pl.base); const tag_mcv = try self.genTypedValue(.{ .ty = enum_ty, .val = tag_val }); - try self.genBinOpMir(.cmp, enum_ty, enum_mcv, tag_mcv); + try self.genBinOpMir(.{ ._, .cmp }, enum_ty, enum_mcv, tag_mcv); const skip_reloc = try self.asmJccReloc(undefined, .ne); try self.genSetMem( @@ -1751,7 +1984,7 @@ fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void { try self.airTrap(); for (exitlude_jump_relocs) |reloc| try self.performReloc(reloc); - try self.asmOpOnly(.ret); + try self.asmOpOnly(.{ ._, .ret }); }, else => return self.fail( "TODO implement {s} for {}", @@ -1919,6 +2152,11 @@ fn computeFrameLayout(self: *Self) !FrameLayout { }; } +fn getFrameAddrAlignment(self: *Self, frame_addr: FrameAddr) u32 { + const alloc_align = @as(u32, 1) << self.frame_allocs.get(@enumToInt(frame_addr.index)).abi_align; + return @min(alloc_align, @bitCast(u32, frame_addr.off) & (alloc_align - 1)); +} + fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex { const frame_allocs_slice = self.frame_allocs.slice(); const frame_size = frame_allocs_slice.items(.abi_size); @@ -1962,24 +2200,36 @@ fn allocTempRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool) !MCValue { return self.allocRegOrMemAdvanced(elem_ty, null, reg_ok); } -fn allocRegOrMemAdvanced(self: *Self, elem_ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue { - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { +fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue { + const abi_size = math.cast(u32, ty.abiSize(self.target.*)) orelse { const mod = self.bin_file.options.module.?; - return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); + return self.fail("type '{}' too big to fit into stack frame", .{ty.fmt(mod)}); }; - if (reg_ok) { - // Make sure the type can fit in a register before we try to allocate one. - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - if (abi_size <= ptr_bytes) { - if (self.register_manager.tryAllocReg(inst, regClassForType(elem_ty))) |reg| { + if (reg_ok) need_mem: { + if (abi_size <= @as(u32, switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 16, 32, 64, 128 => 16, + 80 => break :need_mem, + else => unreachable, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 16, 32, 64 => if (self.hasFeature(.avx)) 32 else 16, + 80, 128 => break :need_mem, + else => unreachable, + }, + else => break :need_mem, + }, + else => 8, + })) { + if (self.register_manager.tryAllocReg(inst, regClassForType(ty))) |reg| { return MCValue{ .register = registerAlias(reg, abi_size) }; } } } - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(elem_ty, self.target.*)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ty, self.target.*)); return .{ .load_frame = .{ .index = frame_index } }; } @@ -2172,44 +2422,127 @@ fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const dst_ty = self.air.typeOfIndex(inst); + const dst_bits = dst_ty.floatBits(self.target.*); const src_ty = self.air.typeOf(ty_op.operand); - if (dst_ty.floatBits(self.target.*) != 32 or src_ty.floatBits(self.target.*) != 64 or - !Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - return self.fail("TODO implement airFptrunc from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }); + const src_bits = src_ty.floatBits(self.target.*); const src_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const dst_lock = self.register_manager.lockReg(dst_mcv.register); + const dst_reg = dst_mcv.getReg().?.to128(); + const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir(.cvtsd2ss, src_ty, dst_mcv, src_mcv); + if (dst_bits == 16 and self.hasFeature(.f16c)) { + switch (src_bits) { + 32 => { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + mat_src_reg.to128(), + Immediate.u(0b1_00), + ); + }, + else => return self.fail("TODO implement airFptrunc from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }), + } + } else if (src_bits == 64 and dst_bits == 32) { + if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + .{ .v_, .cvtsd2ss }, + dst_reg, + dst_reg, + src_mcv.mem(.qword), + ) else try self.asmRegisterRegisterRegister( + .{ .v_, .cvtsd2ss }, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ ._, .cvtsd2ss }, + dst_reg, + src_mcv.mem(.qword), + ) else try self.asmRegisterRegister( + .{ ._, .cvtsd2ss }, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ); + } else return self.fail("TODO implement airFptrunc from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airFpext(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const dst_ty = self.air.typeOfIndex(inst); + const dst_bits = dst_ty.floatBits(self.target.*); const src_ty = self.air.typeOf(ty_op.operand); - if (dst_ty.floatBits(self.target.*) != 64 or src_ty.floatBits(self.target.*) != 32 or - !Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - return self.fail("TODO implement airFpext from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }); + const src_bits = src_ty.floatBits(self.target.*); const src_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const dst_lock = self.register_manager.lockReg(dst_mcv.register); + const dst_reg = dst_mcv.getReg().?.to128(); + const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir(.cvtss2sd, src_ty, dst_mcv, src_mcv); + if (src_bits == 16 and self.hasFeature(.f16c)) { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128()); + switch (dst_bits) { + 32 => {}, + 64 => try self.asmRegisterRegisterRegister(.{ .v_, .cvtss2sd }, dst_reg, dst_reg, dst_reg), + else => return self.fail("TODO implement airFpext from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }), + } + } else if (src_bits == 32 and dst_bits == 64) { + if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + .{ .v_, .cvtss2sd }, + dst_reg, + dst_reg, + src_mcv.mem(.dword), + ) else try self.asmRegisterRegisterRegister( + .{ .v_, .cvtss2sd }, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ ._, .cvtss2sd }, + dst_reg, + src_mcv.mem(.dword), + ) else try self.asmRegisterRegister( + .{ ._, .cvtss2sd }, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ); + } else return self.fail("TODO implement airFpext from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } @@ -2241,11 +2574,11 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { switch (dst_mcv) { .register => |dst_reg| { const min_abi_size = @min(dst_abi_size, src_abi_size); - const tag: Mir.Inst.Tag = switch (signedness) { - .signed => .movsx, - .unsigned => if (min_abi_size > 2) .mov else .movzx, + const tag: Mir.Inst.FixedTag = switch (signedness) { + .signed => if (min_abi_size >= 4) .{ ._d, .movsx } else .{ ._, .movsx }, + .unsigned => if (min_abi_size >= 4) .{ ._, .mov } else .{ ._, .movzx }, }; - const dst_alias = switch (tag) { + const dst_alias = switch (tag[1]) { .movsx => dst_reg.to64(), .mov, .movzx => if (min_abi_size > 4) dst_reg.to64() else dst_reg.to32(), else => unreachable, @@ -2274,14 +2607,24 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { try self.genCopy(min_ty, dst_mcv, src_mcv); const extra = dst_abi_size * 8 - dst_int_info.bits; if (extra > 0) { - try self.genShiftBinOpMir(switch (signedness) { - .signed => .sal, - .unsigned => .shl, - }, dst_ty, dst_mcv, .{ .immediate = extra }); - try self.genShiftBinOpMir(switch (signedness) { - .signed => .sar, - .unsigned => .shr, - }, dst_ty, dst_mcv, .{ .immediate = extra }); + try self.genShiftBinOpMir( + switch (signedness) { + .signed => .{ ._l, .sa }, + .unsigned => .{ ._l, .sh }, + }, + dst_ty, + dst_mcv, + .{ .immediate = extra }, + ); + try self.genShiftBinOpMir( + switch (signedness) { + .signed => .{ ._r, .sa }, + .unsigned => .{ ._r, .sh }, + }, + dst_ty, + dst_mcv, + .{ .immediate = extra }, + ); } }, } @@ -2466,8 +2809,8 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { const reg_bits = self.regBitSize(ty); const cc: Condition = if (ty.isSignedInt()) cc: { try self.genSetReg(limit_reg, ty, dst_mcv); - try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.xor, ty, limit_mcv, .{ + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1, }); break :cc .o; @@ -2477,7 +2820,7 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { }); break :cc .c; }; - try self.genBinOpMir(.add, ty, dst_mcv, rhs_mcv); + try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv); const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -2517,8 +2860,8 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { const reg_bits = self.regBitSize(ty); const cc: Condition = if (ty.isSignedInt()) cc: { try self.genSetReg(limit_reg, ty, dst_mcv); - try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.xor, ty, limit_mcv, .{ + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1, }); break :cc .o; @@ -2526,7 +2869,7 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { try self.genSetReg(limit_reg, ty, .{ .immediate = 0 }); break :cc .c; }; - try self.genBinOpMir(.sub, ty, dst_mcv, rhs_mcv); + try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv); const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -2568,9 +2911,9 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { const reg_bits = self.regBitSize(ty); const cc: Condition = if (ty.isSignedInt()) cc: { try self.genSetReg(limit_reg, ty, lhs_mcv); - try self.genBinOpMir(.xor, ty, limit_mcv, rhs_mcv); - try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.xor, ty, limit_mcv, .{ + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, rhs_mcv); + try self.genShiftBinOpMir(.{ ._, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1, }); break :cc .o; @@ -2683,7 +3026,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }; defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir(.cmp, lhs_ty, tmp_mcv, lhs); + try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, tmp_mcv, lhs); const cc = Condition.ne; const tuple_ty = self.air.typeOfIndex(inst); @@ -2770,12 +3113,17 @@ fn genSetFrameTruncatedOverflowCompare( src_mcv; try self.genSetReg(scratch_reg, hi_limb_ty, hi_limb_mcv); try self.truncateRegister(hi_limb_ty, scratch_reg); - try self.genBinOpMir(.cmp, hi_limb_ty, .{ .register = scratch_reg }, hi_limb_mcv); + try self.genBinOpMir(.{ ._, .cmp }, hi_limb_ty, .{ .register = scratch_reg }, hi_limb_mcv); const eq_reg = temp_regs[2]; if (overflow_cc) |_| { try self.asmSetccRegister(eq_reg.to8(), .ne); - try self.genBinOpMir(.@"or", Type.u8, .{ .register = overflow_reg }, .{ .register = eq_reg }); + try self.genBinOpMir( + .{ ._, .@"or" }, + Type.u8, + .{ .register = overflow_reg }, + .{ .register = eq_reg }, + ); } const payload_off = @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)); @@ -2904,28 +3252,25 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { /// Generates signed or unsigned integer multiplication/division. /// Clobbers .rax and .rdx registers. /// Quotient is saved in .rax and remainder in .rdx. -fn genIntMulDivOpMir( - self: *Self, - tag: Mir.Inst.Tag, - ty: Type, - lhs: MCValue, - rhs: MCValue, -) !void { +fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); if (abi_size > 8) { return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{}); } try self.genSetReg(.rax, ty, lhs); - switch (tag) { + switch (tag[1]) { else => unreachable, - .mul, .imul => {}, - .div => try self.asmRegisterRegister(.xor, .edx, .edx), - .idiv => switch (self.regBitSize(ty)) { - 8 => try self.asmOpOnly(.cbw), - 16 => try self.asmOpOnly(.cwd), - 32 => try self.asmOpOnly(.cdq), - 64 => try self.asmOpOnly(.cqo), + .mul => {}, + .div => switch (tag[0]) { + ._ => try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx), + .i_ => switch (self.regBitSize(ty)) { + 8 => try self.asmOpOnly(.{ ._, .cbw }), + 16 => try self.asmOpOnly(.{ ._, .cwd }), + 32 => try self.asmOpOnly(.{ ._, .cdq }), + 64 => try self.asmOpOnly(.{ ._, .cqo }), + else => unreachable, + }, else => unreachable, }, } @@ -2963,23 +3308,28 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa const divisor_lock = self.register_manager.lockReg(divisor); defer if (divisor_lock) |lock| self.register_manager.unlockReg(lock); - try self.genIntMulDivOpMir(switch (int_info.signedness) { - .signed => .idiv, - .unsigned => .div, - }, ty, .{ .register = dividend }, .{ .register = divisor }); + try self.genIntMulDivOpMir( + switch (int_info.signedness) { + .signed => .{ .i_, .div }, + .unsigned => .{ ._, .div }, + }, + ty, + .{ .register = dividend }, + .{ .register = divisor }, + ); try self.asmRegisterRegister( - .xor, + .{ ._, .xor }, registerAlias(divisor, abi_size), registerAlias(dividend, abi_size), ); try self.asmRegisterImmediate( - .sar, + .{ ._r, .sa }, registerAlias(divisor, abi_size), Immediate.u(int_info.bits - 1), ); try self.asmRegisterRegister( - .@"test", + .{ ._, .@"test" }, registerAlias(.rdx, abi_size), registerAlias(.rdx, abi_size), ); @@ -2988,7 +3338,7 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa registerAlias(.rdx, abi_size), .z, ); - try self.genBinOpMir(.add, ty, .{ .register = divisor }, .{ .register = .rax }); + try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax }); return MCValue{ .register = divisor }; } @@ -3110,7 +3460,12 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { const result = try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand); if (err_off > 0) { const shift = @intCast(u6, err_off * 8); - try self.genShiftBinOpMir(.shr, err_union_ty, result, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + err_union_ty, + result, + .{ .immediate = shift }, + ); } else { try self.truncateRegister(Type.anyerror, result.register); } @@ -3162,7 +3517,12 @@ fn genUnwrapErrorUnionPayloadMir( .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) }; if (payload_off > 0) { const shift = @intCast(u6, payload_off * 8); - try self.genShiftBinOpMir(.shr, err_union_ty, result_mcv, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + err_union_ty, + result_mcv, + .{ .immediate = shift }, + ); } else { try self.truncateRegister(payload_ty, result_mcv.register); } @@ -3199,7 +3559,7 @@ fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void { const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*)); const err_abi_size = @intCast(u32, err_ty.abiSize(self.target.*)); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, registerAlias(dst_reg, err_abi_size), Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ .base = .{ .reg = src_reg }, @@ -3237,7 +3597,7 @@ fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*)); const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, registerAlias(dst_reg, dst_abi_size), Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }), ); @@ -3263,7 +3623,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*)); const err_abi_size = @intCast(u32, err_ty.abiSize(self.target.*)); try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ .base = .{ .reg = src_reg }, .disp = err_off, @@ -3284,7 +3644,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*)); const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, registerAlias(dst_reg, dst_abi_size), Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }), ); @@ -3335,13 +3695,13 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { else => unreachable, .register => |opt_reg| try self.asmRegisterImmediate( - .bts, + .{ ._s, .bt }, opt_reg, Immediate.u(@intCast(u6, pl_abi_size * 8)), ), .load_frame => |frame_addr| try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(.byte, .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off + pl_abi_size, @@ -3453,7 +3813,7 @@ fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, registerAlias(dst_reg, dst_abi_size), Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, @@ -3527,7 +3887,7 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { try self.genSetReg(addr_reg, Type.usize, slice_mcv); // TODO we could allocate register here, but need to expect addr register and potentially // offset register. - try self.genBinOpMir(.add, slice_ptr_field_type, .{ .register = addr_reg }, .{ + try self.genBinOpMir(.{ ._, .add }, slice_ptr_field_type, .{ .register = addr_reg }, .{ .register = offset_reg, }); return MCValue{ .register = addr_reg.to64() }; @@ -3585,13 +3945,13 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, self.target.*)); try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, addr_reg, Memory.sib(.qword, .{ .base = .{ .frame = frame_index } }), ); }, .load_frame => |frame_addr| try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, addr_reg, Memory.sib(.qword, .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off }), ), @@ -3607,7 +3967,12 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { // TODO we could allocate register here, but need to expect addr register and potentially // offset register. const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genBinOpMir(.add, Type.usize, .{ .register = addr_reg }, .{ .register = offset_reg }); + try self.genBinOpMir( + .{ ._, .add }, + Type.usize, + .{ .register = addr_reg }, + .{ .register = offset_reg }, + ); try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }); return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -3641,7 +4006,11 @@ fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { try self.copyToTmpRegister(ptr_ty, ptr_mcv); const elem_ptr_lock = self.register_manager.lockRegAssumeUnused(elem_ptr_reg); defer self.register_manager.unlockReg(elem_ptr_lock); - try self.asmRegisterRegister(.add, elem_ptr_reg, offset_reg); + try self.asmRegisterRegister( + .{ ._, .add }, + elem_ptr_reg, + offset_reg, + ); const dst_mcv = try self.allocRegOrMem(inst, true); const dst_lock = switch (dst_mcv) { @@ -3681,7 +4050,7 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(offset_reg_lock); const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr); - try self.genBinOpMir(.add, ptr_ty, dst_mcv, .{ .register = offset_reg }); + try self.genBinOpMir(.{ ._, .add }, ptr_ty, dst_mcv, .{ .register = offset_reg }); return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none }); } @@ -3714,7 +4083,12 @@ fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void { const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align < layout.payload_align) blk: { // TODO reusing the operand const reg = try self.copyToTmpRegister(ptr_union_ty, ptr); - try self.genBinOpMir(.add, ptr_union_ty, .{ .register = reg }, .{ .immediate = layout.payload_size }); + try self.genBinOpMir( + .{ ._, .add }, + ptr_union_ty, + .{ .register = reg }, + .{ .immediate = layout.payload_size }, + ); break :blk MCValue{ .register = reg }; } else ptr; @@ -3767,7 +4141,7 @@ fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void { else 0; const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand); - try self.genShiftBinOpMir(.shr, Type.usize, result, .{ .immediate = shift }); + try self.genShiftBinOpMir(.{ ._r, .sh }, Type.usize, result, .{ .immediate = shift }); break :blk MCValue{ .register = registerAlias(result.register, @intCast(u32, layout.tag_size)), }; @@ -3798,24 +4172,53 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { const dst_reg = try self.register_manager.allocReg(inst, gp); const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); - if (Target.x86.featureSetHas(self.target.cpu.features, .lzcnt)) { - try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv); - const extra_bits = self.regExtraBits(src_ty); - if (extra_bits > 0) { - try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits }); - } + const src_bits = src_ty.bitSize(self.target.*); + if (self.hasFeature(.lzcnt)) { + if (src_bits <= 64) { + try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv); + + const extra_bits = self.regExtraBits(src_ty); + if (extra_bits > 0) { + try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits }); + } + } else if (src_bits <= 128) { + const tmp_reg = try self.register_manager.allocReg(null, gp); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.genBinOpMir(.{ ._, .lzcnt }, Type.u64, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); + try self.genBinOpMir( + .{ ._, .lzcnt }, + Type.u64, + tmp_mcv, + mat_src_mcv.address().offset(8).deref(), + ); + try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc); + + if (src_bits < 128) { + try self.genBinOpMir( + .{ ._, .sub }, + dst_ty, + dst_mcv, + .{ .immediate = 128 - src_bits }, + ); + } + } else return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); break :result dst_mcv; } - const src_bits = src_ty.bitSize(self.target.*); + if (src_bits > 64) + return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); if (math.isPowerOfTwo(src_bits)) { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits ^ (src_bits - 1), }); - try self.genBinOpMir(.bsr, src_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv); const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -3824,12 +4227,12 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { .z, ); - try self.genBinOpMir(.xor, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 }); } else { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - self.regBitSize(dst_ty)), }); - try self.genBinOpMir(.bsr, src_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv); const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -3839,7 +4242,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { ); try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 }); - try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .register = imm_reg }); + try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .register = imm_reg }); } break :result dst_mcv; }; @@ -3869,27 +4272,55 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - if (Target.x86.featureSetHas(self.target.cpu.features, .bmi)) { - const extra_bits = self.regExtraBits(src_ty); - const masked_mcv = if (extra_bits > 0) masked: { - const mask_mcv = MCValue{ - .immediate = ((@as(u64, 1) << @intCast(u6, extra_bits)) - 1) << - @intCast(u6, src_bits), - }; - const tmp_mcv = tmp: { - if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) break :tmp src_mcv; - try self.genSetReg(dst_reg, src_ty, src_mcv); - break :tmp dst_mcv; - }; - try self.genBinOpMir(.@"or", src_ty, tmp_mcv, mask_mcv); - break :masked tmp_mcv; - } else mat_src_mcv; - try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv); + if (self.hasFeature(.bmi)) { + if (src_bits <= 64) { + const extra_bits = self.regExtraBits(src_ty); + const masked_mcv = if (extra_bits > 0) masked: { + const tmp_mcv = tmp: { + if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) + break :tmp src_mcv; + try self.genSetReg(dst_reg, src_ty, src_mcv); + break :tmp dst_mcv; + }; + try self.genBinOpMir( + .{ ._, .@"or" }, + src_ty, + tmp_mcv, + .{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - extra_bits)) << + @intCast(u6, src_bits) }, + ); + break :masked tmp_mcv; + } else mat_src_mcv; + try self.genBinOpMir(.{ ._, .tzcnt }, src_ty, dst_mcv, masked_mcv); + } else if (src_bits <= 128) { + const tmp_reg = try self.register_manager.allocReg(null, gp); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + const masked_mcv = if (src_bits < 128) masked: { + try self.genCopy(Type.u64, dst_mcv, mat_src_mcv.address().offset(8).deref()); + try self.genBinOpMir( + .{ ._, .@"or" }, + Type.u64, + dst_mcv, + .{ .immediate = @as(u64, math.maxInt(u64)) << @intCast(u6, src_bits - 64) }, + ); + break :masked dst_mcv; + } else mat_src_mcv.address().offset(8).deref(); + try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, dst_mcv, masked_mcv); + try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); + try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, tmp_mcv, mat_src_mcv); + try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc); + } else return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); break :result dst_mcv; } + if (src_bits > 64) + return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); + const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits }); - try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv); const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -3909,7 +4340,7 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void { const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*)); const src_mcv = try self.resolveInst(ty_op.operand); - if (Target.x86.featureSetHas(self.target.cpu.features, .popcnt)) { + if (self.hasFeature(.popcnt)) { const mat_src_mcv = switch (src_mcv) { .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) }, else => src_mcv, @@ -3927,7 +4358,7 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void { .{ .register = try self.register_manager.allocReg(inst, gp) }; const popcnt_ty = if (src_abi_size > 1) src_ty else Type.u16; - try self.genBinOpMir(.popcnt, popcnt_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .popcnt }, popcnt_ty, dst_mcv, mat_src_mcv); break :result dst_mcv; } @@ -3958,54 +4389,54 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void { undefined; // dst = operand - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = operand - try self.asmRegisterImmediate(.shr, tmp, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1)); // tmp = operand >> 1 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0_1); - try self.asmRegisterRegister(.@"and", tmp, imm); - } else try self.asmRegisterImmediate(.@"and", tmp, imm_0_1); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + } else try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1); // tmp = (operand >> 1) & 0x55...55 - try self.asmRegisterRegister(.sub, dst, tmp); + try self.asmRegisterRegister(.{ ._, .sub }, dst, tmp); // dst = temp1 = operand - ((operand >> 1) & 0x55...55) - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp1 - try self.asmRegisterImmediate(.shr, dst, Immediate.u(2)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2)); // dst = temp1 >> 2 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_00_11); - try self.asmRegisterRegister(.@"and", tmp, imm); - try self.asmRegisterRegister(.@"and", dst, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); } else { - try self.asmRegisterImmediate(.@"and", tmp, imm_00_11); - try self.asmRegisterImmediate(.@"and", dst, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11); } // tmp = temp1 & 0x33...33 // dst = (temp1 >> 2) & 0x33...33 - try self.asmRegisterRegister(.add, tmp, dst); + try self.asmRegisterRegister(.{ ._, .add }, tmp, dst); // tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33) - try self.asmRegisterRegister(.mov, dst, tmp); + try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); // dst = temp2 - try self.asmRegisterImmediate(.shr, tmp, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(4)); // tmp = temp2 >> 4 - try self.asmRegisterRegister(.add, dst, tmp); + try self.asmRegisterRegister(.{ ._, .add }, dst, tmp); // dst = temp2 + (temp2 >> 4) if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0000_1111); - try self.asmRegisterImmediate(.mov, tmp, imm_0000_0001); - try self.asmRegisterRegister(.@"and", dst, imm); - try self.asmRegisterRegister(.imul, dst, tmp); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .mov }, tmp, imm_0000_0001); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); + try self.asmRegisterRegister(.{ .i_, .mul }, dst, tmp); } else { - try self.asmRegisterImmediate(.@"and", dst, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111); if (src_abi_size > 1) { - try self.asmRegisterRegisterImmediate(.imul, dst, dst, imm_0000_0001); + try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst, dst, imm_0000_0001); } } // dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f // dst = temp3 * 0x01...01 if (src_abi_size > 1) { - try self.asmRegisterImmediate(.shr, dst, Immediate.u((src_abi_size - 1) * 8)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u((src_abi_size - 1) * 8)); } // dst = (temp3 * 0x01...01) >> (bits - 8) } @@ -4034,11 +4465,11 @@ fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, m 16 => if ((mem_ok or src_mcv.isRegister()) and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - try self.genBinOpMir(.rol, src_ty, src_mcv, .{ .immediate = 8 }); + try self.genBinOpMir(.{ ._l, .ro }, src_ty, src_mcv, .{ .immediate = 8 }); return src_mcv; }, 32, 64 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - try self.genUnOpMir(.bswap, src_ty, src_mcv); + try self.genUnOpMir(.{ ._, .bswap }, src_ty, src_mcv); return src_mcv; }, } @@ -4055,10 +4486,10 @@ fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, m try self.genSetReg(dst_mcv.register, src_ty, src_mcv); switch (src_bits) { else => unreachable, - 16 => try self.genBinOpMir(.rol, src_ty, dst_mcv, .{ .immediate = 8 }), - 32, 64 => try self.genUnOpMir(.bswap, src_ty, dst_mcv), + 16 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }), + 32, 64 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv), } - } else try self.genBinOpMir(.movbe, src_ty, dst_mcv, src_mcv); + } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv); return dst_mcv; } @@ -4067,7 +4498,7 @@ fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, m const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_lock); - try self.genBinOpMir(.movbe, src_ty, dst_mcv, src_mcv); + try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv); return dst_mcv; } @@ -4081,7 +4512,7 @@ fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void { switch (self.regExtraBits(src_ty)) { 0 => {}, else => |extra| try self.genBinOpMir( - if (src_ty.isSignedInt()) .sar else .shr, + if (src_ty.isSignedInt()) .{ ._r, .sa } else .{ ._r, .sh }, src_ty, dst_mcv, .{ .immediate = extra }, @@ -4121,40 +4552,40 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { const imm_0_1 = Immediate.u(mask / 0b1_1); // dst = temp1 = bswap(operand) - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp1 - try self.asmRegisterImmediate(.shr, dst, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(4)); // dst = temp1 >> 4 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0000_1111); - try self.asmRegisterRegister(.@"and", tmp, imm); - try self.asmRegisterRegister(.@"and", dst, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); } else { - try self.asmRegisterImmediate(.@"and", tmp, imm_0000_1111); - try self.asmRegisterImmediate(.@"and", dst, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111); } // tmp = temp1 & 0x0F...0F // dst = (temp1 >> 4) & 0x0F...0F - try self.asmRegisterImmediate(.shl, tmp, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, Immediate.u(4)); // tmp = (temp1 & 0x0F...0F) << 4 - try self.asmRegisterRegister(.@"or", dst, tmp); + try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp); // dst = temp2 = ((temp1 >> 4) & 0x0F...0F) | ((temp1 & 0x0F...0F) << 4) - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp2 - try self.asmRegisterImmediate(.shr, dst, Immediate.u(2)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2)); // dst = temp2 >> 2 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_00_11); - try self.asmRegisterRegister(.@"and", tmp, imm); - try self.asmRegisterRegister(.@"and", dst, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); } else { - try self.asmRegisterImmediate(.@"and", tmp, imm_00_11); - try self.asmRegisterImmediate(.@"and", dst, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11); } // tmp = temp2 & 0x33...33 // dst = (temp2 >> 2) & 0x33...33 try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, if (src_abi_size > 4) tmp.to64() else tmp.to32(), Memory.sib(.qword, .{ .base = .{ .reg = dst.to64() }, @@ -4162,22 +4593,22 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { }), ); // tmp = temp3 = ((temp2 >> 2) & 0x33...33) + ((temp2 & 0x33...33) << 2) - try self.asmRegisterRegister(.mov, dst, tmp); + try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); // dst = temp3 - try self.asmRegisterImmediate(.shr, tmp, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1)); // tmp = temp3 >> 1 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0_1); - try self.asmRegisterRegister(.@"and", dst, imm); - try self.asmRegisterRegister(.@"and", tmp, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); } else { - try self.asmRegisterImmediate(.@"and", dst, imm_0_1); - try self.asmRegisterImmediate(.@"and", tmp, imm_0_1); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0_1); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1); } // dst = temp3 & 0x55...55 // tmp = (temp3 >> 1) & 0x55...55 try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, if (src_abi_size > 4) dst.to64() else dst.to32(), Memory.sib(.qword, .{ .base = .{ .reg = tmp.to64() }, @@ -4190,7 +4621,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { switch (self.regExtraBits(src_ty)) { 0 => {}, else => |extra| try self.genBinOpMir( - if (src_ty.isSignedInt()) .sar else .shr, + if (src_ty.isSignedInt()) .{ ._r, .sa } else .{ ._r, .sh }, src_ty, dst_mcv, .{ .immediate = extra }, @@ -4246,19 +4677,20 @@ fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { const tag = self.air.instructions.items(.tag)[inst]; try self.genBinOpMir(switch (ty_bits) { // No point using an extra prefix byte for *pd which performs the same operation. - 32, 64 => switch (tag) { - .neg => .xorps, - .fabs => .andnps, + 16, 32, 64, 128 => switch (tag) { + .neg => .{ ._ps, .xor }, + .fabs => .{ ._ps, .andn }, else => unreachable, }, - else => return self.fail("TODO implement airFloatSign for {}", .{ + 80 => return self.fail("TODO implement airFloatSign for {}", .{ ty.fmt(self.bin_file.options.module.?), }), + else => unreachable, }, vec_ty, dst_mcv, sign_mcv); return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } -fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { +fn airRound(self: *Self, inst: Air.Inst.Index, mode: u4) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const ty = self.air.typeOf(un_op); @@ -4267,20 +4699,226 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { src_mcv else try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + try self.genRound(ty, dst_reg, src_mcv, mode); + return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); +} + +fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4) !void { + if (!self.hasFeature(.sse4_1)) + return self.fail("TODO implement genRound without sse4_1 feature", .{}); - try self.genBinOpMir(switch (ty.zigTypeTag()) { + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag()) { .Float => switch (ty.floatBits(self.target.*)) { - 32 => .sqrtss, - 64 => .sqrtsd, - else => return self.fail("TODO implement airSqrt for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, + 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, + 2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round }, + 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => null, }, - else => return self.fail("TODO implement airSqrt for {}", .{ + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genRound for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }); + + const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const dst_alias = registerAlias(dst_reg, abi_size); + switch (mir_tag[0]) { + .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + mir_tag, + dst_alias, + dst_alias, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + Immediate.u(mode), + ) else try self.asmRegisterRegisterRegisterImmediate( + mir_tag, + dst_alias, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + Immediate.u(mode), + ), + else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + mir_tag, + dst_alias, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + Immediate.u(mode), + ) else try self.asmRegisterRegisterImmediate( + mir_tag, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + Immediate.u(mode), + ), + } +} + +fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const ty = self.air.typeOf(un_op); + const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + + const src_mcv = try self.resolveInst(un_op); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + const result: MCValue = result: { + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + break :result dst_mcv; + } else null, + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, + 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) { + 1 => { + try self.asmRegisterRegister( + .{ .v_, .cvtph2ps }, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv)).to128(), + ); + try self.asmRegisterRegisterRegister( + .{ .v_ss, .sqrt }, + dst_reg, + dst_reg, + dst_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + break :result dst_mcv; + }, + 2...8 => { + const wide_reg = registerAlias(dst_reg, abi_size * 2); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_, .cvtph2ps }, + wide_reg, + src_mcv.mem(Memory.PtrSize.fromSize( + @intCast(u32, @divExact(wide_reg.bitSize(), 16)), + )), + ) else try self.asmRegisterRegister( + .{ .v_, .cvtph2ps }, + wide_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + wide_reg, + Immediate.u(0b1_00), + ); + break :result dst_mcv; + }, + else => null, + } else null, + 32 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, + 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, + 2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt }, + 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null, + else => null, + }, + 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{ ty.fmt(self.bin_file.options.module.?), - }), - }, ty, dst_mcv, src_mcv); - return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); + }); + switch (mir_tag[0]) { + .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + mir_tag, + dst_reg, + dst_reg, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegisterRegister( + mir_tag, + dst_reg, + dst_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + ), + else => if (src_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + dst_reg, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegister( + mir_tag, + dst_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + ), + } + break :result dst_mcv; + }; + return self.finishAir(inst, result, .{ un_op, .none, .none }); } fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void { @@ -4366,14 +5004,14 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn if (load_abi_size <= 8) { const load_reg = registerAlias(dst_reg, load_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, load_reg, Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{ .base = .{ .reg = ptr_reg }, .disp = val_byte_off, }), ); - try self.asmRegisterImmediate(.shr, load_reg, Immediate.u(val_bit_off)); + try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(val_bit_off)); } else { const tmp_reg = registerAlias(try self.register_manager.allocReg(null, gp), val_abi_size); const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); @@ -4381,7 +5019,7 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn const dst_alias = registerAlias(dst_reg, val_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, dst_alias, Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{ .base = .{ .reg = ptr_reg }, @@ -4389,14 +5027,19 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{ .base = .{ .reg = ptr_reg }, .disp = val_byte_off + 1, }), ); - try self.asmRegisterRegisterImmediate(.shrd, dst_alias, tmp_reg, Immediate.u(val_bit_off)); + try self.asmRegisterRegisterImmediate( + .{ ._rd, .sh }, + dst_alias, + tmp_reg, + Immediate.u(val_bit_off), + ); } if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg); @@ -4502,13 +5145,13 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In const part_mask_not = part_mask ^ (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - limb_abi_bits)); if (limb_abi_size <= 4) { - try self.asmMemoryImmediate(.@"and", limb_mem, Immediate.u(part_mask_not)); + try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.u(part_mask_not)); } else if (math.cast(i32, @bitCast(i64, part_mask_not))) |small| { - try self.asmMemoryImmediate(.@"and", limb_mem, Immediate.s(small)); + try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.s(small)); } else { const part_mask_reg = try self.register_manager.allocReg(null, gp); - try self.asmRegisterImmediate(.mov, part_mask_reg, Immediate.u(part_mask_not)); - try self.asmMemoryRegister(.@"and", limb_mem, part_mask_reg); + try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, Immediate.u(part_mask_not)); + try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg); } if (src_bit_size <= 64) { @@ -4519,14 +5162,26 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In try self.genSetReg(tmp_reg, src_ty, src_mcv); switch (limb_i) { - 0 => try self.genShiftBinOpMir(.shl, src_ty, tmp_mcv, .{ .immediate = src_bit_off }), - 1 => try self.genShiftBinOpMir(.shr, src_ty, tmp_mcv, .{ - .immediate = limb_abi_bits - src_bit_off, - }), + 0 => try self.genShiftBinOpMir( + .{ ._l, .sh }, + src_ty, + tmp_mcv, + .{ .immediate = src_bit_off }, + ), + 1 => try self.genShiftBinOpMir( + .{ ._r, .sh }, + src_ty, + tmp_mcv, + .{ .immediate = limb_abi_bits - src_bit_off }, + ), else => unreachable, } - try self.genBinOpMir(.@"and", src_ty, tmp_mcv, .{ .immediate = part_mask }); - try self.asmMemoryRegister(.@"or", limb_mem, registerAlias(tmp_reg, limb_abi_size)); + try self.genBinOpMir(.{ ._, .@"and" }, src_ty, tmp_mcv, .{ .immediate = part_mask }); + try self.asmMemoryRegister( + .{ ._, .@"or" }, + limb_mem, + registerAlias(tmp_reg, limb_abi_size), + ); } else return self.fail("TODO: implement packed store of {}", .{ src_ty.fmt(self.bin_file.options.module.?), }); @@ -4626,7 +5281,7 @@ fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32 .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, else => mcv, }); - try self.genBinOpMir(.add, Type.usize, dst_mcv, .{ .register = offset_reg }); + try self.genBinOpMir(.{ ._, .add }, Type.usize, dst_mcv, .{ .register = offset_reg }); break :result dst_mcv; }, .indirect => |reg_off| break :result .{ .indirect = .{ @@ -4710,14 +5365,14 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { if (load_abi_size <= 8) { const load_reg = registerAlias(dst_reg, load_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, load_reg, Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off + field_byte_off, }), ); - try self.asmRegisterImmediate(.shr, load_reg, Immediate.u(field_bit_off)); + try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(field_bit_off)); } else { const tmp_reg = registerAlias( try self.register_manager.allocReg(null, gp), @@ -4728,7 +5383,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const dst_alias = registerAlias(dst_reg, field_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, dst_alias, Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{ .base = .{ .frame = frame_addr.index }, @@ -4736,7 +5391,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{ .base = .{ .frame = frame_addr.index }, @@ -4744,7 +5399,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterRegisterImmediate( - .shrd, + .{ ._rd, .sh }, dst_alias, tmp_reg, Immediate.u(field_bit_off), @@ -4752,7 +5407,14 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { } if (field_extra_bits > 0) try self.truncateRegister(field_ty, dst_reg); - break :result .{ .register = dst_reg }; + + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_rc = regClassForType(field_ty); + if (dst_rc.eql(gp)) break :result dst_mcv; + + const result_reg = try self.register_manager.allocReg(inst, dst_rc); + try self.genSetReg(result_reg, field_ty, dst_mcv); + break :result .{ .register = result_reg }; }, .register => |reg| { const reg_lock = self.register_manager.lockRegAssumeUnused(reg); @@ -4773,21 +5435,26 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { defer if (dst_mcv_lock) |lock| self.register_manager.unlockReg(lock); // Shift by struct_field_offset. - try self.genShiftBinOpMir(.shr, Type.usize, dst_mcv, .{ .immediate = field_off }); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + Type.usize, + dst_mcv, + .{ .immediate = field_off }, + ); // Mask to field_bit_size bits const field_bit_size = field_ty.bitSize(self.target.*); const mask = ~@as(u64, 0) >> @intCast(u6, 64 - field_bit_size); const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = mask }); - try self.genBinOpMir(.@"and", Type.usize, dst_mcv, .{ .register = tmp_reg }); + try self.genBinOpMir(.{ ._, .@"and" }, Type.usize, dst_mcv, .{ .register = tmp_reg }); const signedness = if (field_ty.isAbiInt()) field_ty.intInfo(self.target.*).signedness else .unsigned; const field_byte_size = @intCast(u32, field_ty.abiSize(self.target.*)); if (signedness == .signed and field_byte_size < 8) { try self.asmRegisterRegister( - .movsx, + if (field_byte_size >= 4) .{ ._d, .movsx } else .{ ._, .movsx }, dst_mcv.register, registerAlias(dst_mcv.register, field_byte_size), ); @@ -4899,17 +5566,17 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: if (limb_pl.base.tag == .int_unsigned and self.regExtraBits(limb_ty) > 0) { const mask = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - limb_pl.data); - try self.genBinOpMir(.xor, limb_ty, limb_mcv, .{ .immediate = mask }); - } else try self.genUnOpMir(.not, limb_ty, limb_mcv); + try self.genBinOpMir(.{ ._, .xor }, limb_ty, limb_mcv, .{ .immediate = mask }); + } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv); } }, - .neg => try self.genUnOpMir(.neg, src_ty, dst_mcv), + .neg => try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv), else => unreachable, } return dst_mcv; } -fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue) !void { +fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void { const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); if (abi_size > 8) return self.fail("TODO implement {} for {}", .{ mir_tag, @@ -4952,7 +5619,7 @@ fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue /// Clobbers .rcx for non-immediate shift value. fn genShiftBinOpMir( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, ty: Type, lhs_mcv: MCValue, shift_mcv: MCValue, @@ -5037,16 +5704,16 @@ fn genShiftBinOpMir( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - const info: struct { offsets: [2]i32, double_tag: Mir.Inst.Tag } = switch (tag) { - .shl, .sal => .{ .offsets = .{ 0, 8 }, .double_tag = .shld }, - .shr, .sar => .{ .offsets = .{ 8, 0 }, .double_tag = .shrd }, + const info: struct { offsets: [2]i32, double_tag: Mir.Inst.FixedTag } = switch (tag[0]) { + ._l => .{ .offsets = .{ 0, 8 }, .double_tag = .{ ._ld, .sh } }, + ._r => .{ .offsets = .{ 8, 0 }, .double_tag = .{ ._rd, .sh } }, else => unreachable, }; switch (lhs_mcv) { .load_frame => |dst_frame_addr| switch (rhs_mcv) { .immediate => |rhs_imm| if (rhs_imm == 0) {} else if (rhs_imm < 64) { try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, @@ -5073,7 +5740,7 @@ fn genShiftBinOpMir( } else { assert(rhs_imm < 128); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, @@ -5084,34 +5751,30 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(rhs_imm - 64)); } try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[1], }), tmp_reg, ); - switch (tag) { - .shl, .sal, .shr => { - try self.asmRegisterRegister(.xor, tmp_reg.to32(), tmp_reg.to32()); - try self.asmMemoryRegister( - .mov, - Memory.sib(.qword, .{ - .base = .{ .frame = dst_frame_addr.index }, - .disp = dst_frame_addr.off + info.offsets[0], - }), - tmp_reg, - ); - }, - .sar => try self.asmMemoryImmediate( - tag, + if (tag[0] == ._r and tag[1] == .sa) try self.asmMemoryImmediate( + tag, + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[0], + }), + Immediate.u(63), + ) else { + try self.asmRegisterRegister(.{ ._, .xor }, tmp_reg.to32(), tmp_reg.to32()); + try self.asmMemoryRegister( + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[0], }), - Immediate.u(63), - ), - else => unreachable, + tmp_reg, + ); } }, else => { @@ -5125,7 +5788,7 @@ fn genShiftBinOpMir( try self.genSetReg(.cl, Type.u8, rhs_mcv); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, first_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, @@ -5133,32 +5796,28 @@ fn genShiftBinOpMir( }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, second_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[1], }), ); - switch (tag) { - .shl, .sal, .shr => try self.asmRegisterRegister( - .xor, - tmp_reg.to32(), - tmp_reg.to32(), - ), - .sar => { - try self.asmRegisterRegister(.mov, tmp_reg, first_reg); - try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63)); - }, - else => unreachable, - } + if (tag[0] == ._r and tag[1] == .sa) { + try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, first_reg); + try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63)); + } else try self.asmRegisterRegister( + .{ ._, .xor }, + tmp_reg.to32(), + tmp_reg.to32(), + ); try self.asmRegisterRegisterRegister(info.double_tag, second_reg, first_reg, .cl); try self.asmRegisterRegister(tag, first_reg, .cl); - try self.asmRegisterImmediate(.cmp, .cl, Immediate.u(64)); + try self.asmRegisterImmediate(.{ ._, .cmp }, .cl, Immediate.u(64)); try self.asmCmovccRegisterRegister(second_reg, first_reg, .ae); try self.asmCmovccRegisterRegister(first_reg, tmp_reg, .ae); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[1], @@ -5166,7 +5825,7 @@ fn genShiftBinOpMir( second_reg, ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[0], @@ -5191,7 +5850,7 @@ fn genShiftBinOpMir( /// Asserts .rcx is free. fn genShiftBinOp( self: *Self, - tag: Air.Inst.Tag, + air_tag: Air.Inst.Tag, maybe_inst: ?Air.Inst.Index, lhs_mcv: MCValue, rhs_mcv: MCValue, @@ -5236,14 +5895,14 @@ fn genShiftBinOp( }; const signedness = lhs_ty.intInfo(self.target.*).signedness; - try self.genShiftBinOpMir(switch (tag) { + try self.genShiftBinOpMir(switch (air_tag) { .shl, .shl_exact => switch (signedness) { - .signed => .sal, - .unsigned => .shl, + .signed => .{ ._l, .sa }, + .unsigned => .{ ._l, .sh }, }, .shr, .shr_exact => switch (signedness) { - .signed => .sar, - .unsigned => .shr, + .signed => .{ ._r, .sa }, + .unsigned => .{ ._r, .sh }, }, else => unreachable, }, lhs_ty, dst_mcv, rhs_mcv); @@ -5303,20 +5962,18 @@ fn genMulDivBinOp( try self.register_manager.getReg(.rax, track_inst_rax); try self.register_manager.getReg(.rdx, track_inst_rdx); - const mir_tag: Mir.Inst.Tag = switch (signedness) { + try self.genIntMulDivOpMir(switch (signedness) { .signed => switch (tag) { - .mul, .mulwrap => .imul, - .div_trunc, .div_exact, .rem => .idiv, + .mul, .mulwrap => .{ .i_, .mul }, + .div_trunc, .div_exact, .rem => .{ .i_, .div }, else => unreachable, }, .unsigned => switch (tag) { - .mul, .mulwrap => .mul, - .div_trunc, .div_exact, .rem => .div, + .mul, .mulwrap => .{ ._, .mul }, + .div_trunc, .div_exact, .rem => .{ ._, .div }, else => unreachable, }, - }; - - try self.genIntMulDivOpMir(mir_tag, ty, lhs, rhs); + }, ty, lhs, rhs); if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) { .mul, .mulwrap, .div_trunc, .div_exact => .rax, @@ -5326,7 +5983,7 @@ fn genMulDivBinOp( const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off, @@ -5334,7 +5991,7 @@ fn genMulDivBinOp( .rax, ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 8, @@ -5375,12 +6032,12 @@ fn genMulDivBinOp( try self.copyToRegisterWithInstTracking(inst, ty, lhs) else .{ .register = try self.copyToTmpRegister(ty, lhs) }; - try self.genBinOpMir(.sub, ty, result, div_floor); + try self.genBinOpMir(.{ ._, .sub }, ty, result, div_floor); return result; }, .unsigned => { - try self.genIntMulDivOpMir(.div, ty, lhs, rhs); + try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, rhs); return .{ .register = registerAlias(.rdx, abi_size) }; }, } @@ -5422,7 +6079,7 @@ fn genMulDivBinOp( switch (signedness) { .signed => return try self.genInlineIntDivFloor(ty, lhs, actual_rhs), .unsigned => { - try self.genIntMulDivOpMir(.div, ty, lhs, actual_rhs); + try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, actual_rhs); return .{ .register = registerAlias(.rax, abi_size) }; }, } @@ -5432,25 +6089,22 @@ fn genMulDivBinOp( } } -/// Result is always a register. fn genBinOp( self: *Self, maybe_inst: ?Air.Inst.Index, - tag: Air.Inst.Tag, + air_tag: Air.Inst.Tag, lhs_air: Air.Inst.Ref, rhs_air: Air.Inst.Ref, ) !MCValue { - const lhs = try self.resolveInst(lhs_air); - const rhs = try self.resolveInst(rhs_air); + const lhs_mcv = try self.resolveInst(lhs_air); + const rhs_mcv = try self.resolveInst(rhs_air); const lhs_ty = self.air.typeOf(lhs_air); const rhs_ty = self.air.typeOf(rhs_air); - if (lhs_ty.zigTypeTag() == .Vector) { - return self.fail("TODO implement genBinOp for {}", .{lhs_ty.fmt(self.bin_file.options.module.?)}); - } + const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*)); - switch (lhs) { + switch (lhs_mcv) { .immediate => |imm| switch (imm) { - 0 => switch (tag) { + 0 => switch (air_tag) { .sub, .subwrap => return self.genUnOp(maybe_inst, .neg, rhs_air), else => {}, }, @@ -5459,9 +6113,10 @@ fn genBinOp( else => {}, } - const is_commutative = switch (tag) { + const is_commutative = switch (air_tag) { .add, .addwrap, + .mul, .bool_or, .bit_or, .bool_and, @@ -5473,48 +6128,42 @@ fn genBinOp( else => false, }; - const dst_mem_ok = switch (tag) { - .add, - .addwrap, - .sub, - .subwrap, - .mul, - .div_float, - .div_exact, - .div_trunc, - .div_floor, - => !lhs_ty.isRuntimeFloat(), - - else => true, + const vec_op = switch (lhs_ty.zigTypeTag()) { + else => false, + .Float, .Vector => true, }; - const lhs_lock: ?RegisterLock = switch (lhs) { + const lhs_lock: ?RegisterLock = switch (lhs_mcv) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), else => null, }; defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); - const rhs_lock: ?RegisterLock = switch (rhs) { + const rhs_lock: ?RegisterLock = switch (rhs_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, }; defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - var flipped: bool = false; + var flipped = false; + var copied_to_dst = true; const dst_mcv: MCValue = dst: { if (maybe_inst) |inst| { - if ((dst_mem_ok or lhs.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs)) { - break :dst lhs; + if ((!vec_op or lhs_mcv.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs_mcv)) { + break :dst lhs_mcv; } - if (is_commutative and (dst_mem_ok or rhs.isRegister()) and - self.reuseOperand(inst, rhs_air, 1, rhs)) + if (is_commutative and (!vec_op or rhs_mcv.isRegister()) and + self.reuseOperand(inst, rhs_air, 1, rhs_mcv)) { flipped = true; - break :dst rhs; + break :dst rhs_mcv; } } const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true); - try self.genCopy(lhs_ty, dst_mcv, lhs); + if (vec_op and lhs_mcv.isRegister() and self.hasFeature(.avx)) + copied_to_dst = false + else + try self.genCopy(lhs_ty, dst_mcv, lhs_mcv); break :dst dst_mcv; }; const dst_lock: ?RegisterLock = switch (dst_mcv) { @@ -5523,160 +6172,52 @@ fn genBinOp( }; defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - const src_mcv = if (flipped) lhs else rhs; - switch (tag) { - .add, - .addwrap, - => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { - else => .add, - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .addss - else - return self.fail("TODO implement genBinOp for {s} {} without sse", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .addsd - else - return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - }, - }, lhs_ty, dst_mcv, src_mcv), - - .sub, - .subwrap, - => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { - else => .sub, - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .subss - else - return self.fail("TODO implement genBinOp for {s} {} without sse", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .subsd - else - return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - }, - }, lhs_ty, dst_mcv, src_mcv), - - .mul => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .mulss - else - return self.fail("TODO implement genBinOp for {s} {} without sse", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .mulsd - else - return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - }, - }, lhs_ty, dst_mcv, src_mcv), + const src_mcv = if (flipped) lhs_mcv else rhs_mcv; + if (!vec_op) { + switch (air_tag) { + .add, + .addwrap, + => try self.genBinOpMir(.{ ._, .add }, lhs_ty, dst_mcv, src_mcv), - .div_float, - .div_exact, - .div_trunc, - .div_floor, - => { - try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .divss - else - return self.fail("TODO implement genBinOp for {s} {} without sse", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .divsd - else - return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - }, - }, lhs_ty, dst_mcv, src_mcv); - switch (tag) { - .div_float, - .div_exact, - => {}, - .div_trunc, - .div_floor, - => if (Target.x86.featureSetHas(self.target.cpu.features, .sse4_1)) { - const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*)); - const dst_alias = registerAlias(dst_mcv.register, abi_size); - try self.asmRegisterRegisterImmediate(switch (lhs_ty.floatBits(self.target.*)) { - 32 => .roundss, - 64 => .roundsd, - else => unreachable, - }, dst_alias, dst_alias, Immediate.u(switch (tag) { - .div_trunc => 0b1_0_11, - .div_floor => 0b1_0_01, + .sub, + .subwrap, + => try self.genBinOpMir(.{ ._, .sub }, lhs_ty, dst_mcv, src_mcv), + + .ptr_add, + .ptr_sub, + => { + const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + const elem_size = lhs_ty.elemType2().abiSize(self.target.*); + try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size }); + try self.genBinOpMir( + switch (air_tag) { + .ptr_add => .{ ._, .add }, + .ptr_sub => .{ ._, .sub }, else => unreachable, - })); - } else return self.fail("TODO implement genBinOp for {s} {} without sse4_1", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - else => unreachable, - } - }, - - .ptr_add, - .ptr_sub, - => { - const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv); - const tmp_mcv = MCValue{ .register = tmp_reg }; - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const elem_size = lhs_ty.elemType2().abiSize(self.target.*); - try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size }); - try self.genBinOpMir(switch (tag) { - .ptr_add => .add, - .ptr_sub => .sub, - else => unreachable, - }, lhs_ty, dst_mcv, tmp_mcv); - }, + }, + lhs_ty, + dst_mcv, + tmp_mcv, + ); + }, - .bool_or, - .bit_or, - => try self.genBinOpMir(.@"or", lhs_ty, dst_mcv, src_mcv), + .bool_or, + .bit_or, + => try self.genBinOpMir(.{ ._, .@"or" }, lhs_ty, dst_mcv, src_mcv), - .bool_and, - .bit_and, - => try self.genBinOpMir(.@"and", lhs_ty, dst_mcv, src_mcv), + .bool_and, + .bit_and, + => try self.genBinOpMir(.{ ._, .@"and" }, lhs_ty, dst_mcv, src_mcv), - .xor => try self.genBinOpMir(.xor, lhs_ty, dst_mcv, src_mcv), + .xor => try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv), - .min, - .max, - => switch (lhs_ty.zigTypeTag()) { - .Int => { + .min, + .max, + => { const mat_src_mcv: MCValue = if (switch (src_mcv) { .immediate, .eflags, @@ -5698,16 +6239,16 @@ fn genBinOp( }; defer if (mat_mcv_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir(.cmp, lhs_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, dst_mcv, mat_src_mcv); const int_info = lhs_ty.intInfo(self.target.*); const cc: Condition = switch (int_info.signedness) { - .unsigned => switch (tag) { + .unsigned => switch (air_tag) { .min => .a, .max => .b, else => unreachable, }, - .signed => switch (tag) { + .signed => switch (air_tag) { .min => .g, .max => .l, else => unreachable, @@ -5766,32 +6307,404 @@ fn genBinOp( } try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg }); }, - .Float => try self.genBinOpMir(switch (lhs_ty.floatBits(self.target.*)) { - 32 => switch (tag) { - .min => .minss, - .max => .maxss, - else => unreachable, - }, - 64 => switch (tag) { - .min => .minsd, - .max => .maxsd, - else => unreachable, - }, - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - }, lhs_ty, dst_mcv, src_mcv), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), }), - }, + } + return dst_mcv; + } + const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag()) { + else => unreachable, + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .{ .vp_w, .insr }, + dst_reg, + dst_reg, + src_mcv.mem(.word), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .{ .vp_, .unpcklwd }, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .{ .v_ss, .add }, + .sub => .{ .v_ss, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, + .max => .{ .v_ss, .max }, + .min => .{ .v_ss, .max }, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + } else null, + 32 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, + else => unreachable, + }, + 64 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, + else => unreachable, + }, + 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType().zigTypeTag()) { + else => null, + .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) switch (lhs_ty.vectorLen()) { + 1 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .{ .vp_w, .insr }, + dst_reg, + dst_reg, + src_mcv.mem(.word), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .{ .vp_, .unpcklwd }, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .{ .v_ss, .add }, + .sub => .{ .v_ss, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, + .max => .{ .v_ss, .max }, + .min => .{ .v_ss, .max }, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + 2 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + .{ .vp_d, .insr }, + dst_reg, + src_mcv.mem(.dword), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .{ .v_ps, .unpckl }, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegisterRegister( + .{ .v_ps, .movhl }, + tmp_reg, + dst_reg, + dst_reg, + ); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + 3...4 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_, .cvtph2ps }, + tmp_reg, + src_mcv.mem(.qword), + ) else try self.asmRegisterRegister( + .{ .v_, .cvtph2ps }, + tmp_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + 5...8 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to256(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg.to256(), dst_reg); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_, .cvtph2ps }, + tmp_reg, + src_mcv.mem(.xword), + ) else try self.asmRegisterRegister( + .{ .v_, .cvtph2ps }, + tmp_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, + else => unreachable, + }, + dst_reg.to256(), + dst_reg.to256(), + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg.to256(), + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + else => null, + } else null, + 32 => switch (lhs_ty.vectorLen()) { + 1 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, + else => unreachable, + }, + 2...4 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min }, + else => unreachable, + }, + 5...8 => if (self.hasFeature(.avx)) switch (air_tag) { + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .mul => .{ .v_ps, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .min }, + else => unreachable, + } else null, + else => null, + }, + 64 => switch (lhs_ty.vectorLen()) { + 1 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, + else => unreachable, + }, + 2 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min }, + else => unreachable, + }, + 3...4 => if (self.hasFeature(.avx)) switch (air_tag) { + .add => .{ .v_pd, .add }, + .sub => .{ .v_pd, .sub }, + .mul => .{ .v_pd, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div }, + .max => .{ .v_pd, .max }, + .min => .{ .v_pd, .min }, + else => unreachable, + } else null, + else => null, + }, + 80, 128 => null, + else => unreachable, + }, + }, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }); + if (self.hasFeature(.avx)) { + const src1_alias = + if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + mir_tag, + dst_reg, + src1_alias, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegisterRegister( + mir_tag, + dst_reg, + src1_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), + ); + } else { + assert(copied_to_dst); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + dst_reg, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegister( + mir_tag, + dst_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), + ); + } + switch (air_tag) { + .add, .sub, .mul, .div_float, .div_exact => {}, + .div_trunc, .div_floor => try self.genRound( + lhs_ty, + dst_reg, + .{ .register = dst_reg }, + switch (air_tag) { + .div_trunc => 0b1_0_11, + .div_floor => 0b1_0_01, + else => unreachable, + }, + ), + .max, .min => {}, // TODO: unordered select else => unreachable, } return dst_mcv; } -fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void { +fn genBinOpMir( + self: *Self, + mir_tag: Mir.Inst.FixedTag, + ty: Type, + dst_mcv: MCValue, + src_mcv: MCValue, +) !void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); switch (dst_mcv) { .none, @@ -5818,20 +6731,11 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s .register_overflow, .reserved_frame, => unreachable, - .register => |src_reg| switch (ty.zigTypeTag()) { - .Float => { - if (!Target.x86.featureSetHas(self.target.cpu.features, .sse)) - return self.fail("TODO genBinOpMir for {s} {} without sse", .{ - @tagName(mir_tag), ty.fmt(self.bin_file.options.module.?), - }); - return self.asmRegisterRegister(mir_tag, dst_reg.to128(), src_reg.to128()); - }, - else => try self.asmRegisterRegister( - mir_tag, - dst_alias, - registerAlias(src_reg, abi_size), - ), - }, + .register => |src_reg| try self.asmRegisterRegister( + mir_tag, + dst_alias, + registerAlias(src_reg, abi_size), + ), .immediate => |imm| switch (self.regBitSize(ty)) { 8 => try self.asmRegisterImmediate( mir_tag, @@ -6005,14 +6909,14 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s }; var off: i32 = 0; while (off < abi_size) : (off += 8) { - const mir_limb_tag = switch (off) { + const mir_limb_tag: Mir.Inst.FixedTag = switch (off) { 0 => mir_tag, - else => switch (mir_tag) { - .add => .adc, - .sub, .cmp => .sbb, + else => switch (mir_tag[1]) { + .add => .{ ._, .adc }, + .sub, .cmp => .{ ._, .sbb }, .@"or", .@"and", .xor => mir_tag, else => return self.fail("TODO genBinOpMir implement large ABI for {s}", .{ - @tagName(mir_tag), + @tagName(mir_tag[1]), }), }, }; @@ -6184,14 +7088,14 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M .reserved_frame, => unreachable, .register => |src_reg| try self.asmRegisterRegister( - .imul, + .{ .i_, .mul }, dst_alias, registerAlias(src_reg, abi_size), ), .immediate => |imm| { if (math.cast(i32, imm)) |small| { try self.asmRegisterRegisterImmediate( - .imul, + .{ .i_, .mul }, dst_alias, dst_alias, Immediate.s(small), @@ -6211,19 +7115,19 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M .lea_tlv, .lea_frame, => try self.asmRegisterRegister( - .imul, + .{ .i_, .mul }, dst_alias, registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size), ), .memory, .indirect, .load_frame => try self.asmRegisterMemory( - .imul, + .{ .i_, .mul }, dst_alias, Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { .memory => |addr| .{ .base = .{ .reg = .ds }, .disp = math.cast(i32, @bitCast(i64, addr)) orelse return self.asmRegisterRegister( - .imul, + .{ .i_, .mul }, dst_alias, registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size), ), @@ -6348,12 +7252,12 @@ fn genVarDbgInfo( } fn airTrap(self: *Self) !void { - try self.asmOpOnly(.ud2); + try self.asmOpOnly(.{ ._, .ud2 }); return self.finishAirBookkeeping(); } fn airBreakpoint(self: *Self) !void { - try self.asmOpOnly(.int3); + try self.asmOpOnly(.{ ._, .int3 }); return self.finishAirBookkeeping(); } @@ -6374,7 +7278,7 @@ fn airFence(self: *Self, inst: Air.Inst.Index) !void { switch (order) { .Unordered, .Monotonic => unreachable, .Acquire, .Release, .AcqRel => {}, - .SeqCst => try self.asmOpOnly(.mfence), + .SeqCst => try self.asmOpOnly(.{ ._, .mfence }), } return self.finishAirBookkeeping(); } @@ -6468,7 +7372,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const atom = elf_file.getAtom(atom_index); _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = atom.getOffsetTableAddress(elf_file); - try self.asmMemory(.call, Memory.sib(.qword, .{ + try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr), })); @@ -6476,12 +7380,12 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const atom = try coff_file.getOrCreateAtomForDecl(func.owner_decl); const sym_index = coff_file.getAtom(atom).getSymbolIndex().?; try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); - try self.asmRegister(.call, .rax); + try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl); const sym_index = macho_file.getAtom(atom).getSymbolIndex().?; try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); - try self.asmRegister(.call, .rax); + try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.Plan9)) |p9| { const decl_block_index = try p9.seeDecl(func.owner_decl); const decl_block = p9.getDeclBlock(decl_block_index); @@ -6490,7 +7394,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const got_addr = p9.bases.data; const got_index = decl_block.got_index.?; const fn_got_addr = got_addr + got_index * ptr_bytes; - try self.asmMemory(.call, Memory.sib(.qword, .{ + try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, fn_got_addr), })); @@ -6503,22 +7407,24 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const atom_index = try self.owner.getSymbolIndex(self); const sym_index = try coff_file.getGlobalSymbol(decl_name, lib_name); _ = try self.addInst(.{ - .tag = .mov_linker, + .tag = .mov, .ops = .import_reloc, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(Register.rax), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .data = .{ .rx = .{ + .r1 = .rax, + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, }); - try self.asmRegister(.call, .rax); + try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { const atom_index = try self.owner.getSymbolIndex(self); const sym_index = try macho_file.getGlobalSymbol(decl_name, lib_name); _ = try self.addInst(.{ - .tag = .call_extern, - .ops = undefined, - .data = .{ .relocation = .{ + .tag = .call, + .ops = .extern_fn_reloc, + .data = .{ .reloc = .{ .atom_index = atom_index, .sym_index = sym_index, } }, @@ -6533,7 +7439,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier assert(ty.zigTypeTag() == .Pointer); const mcv = try self.resolveInst(callee); try self.genSetReg(.rax, Type.usize, mcv); - try self.asmRegister(.call, .rax); + try self.asmRegister(.{ ._, .call }, .rax); } var bt = self.liveness.iterateBigTomb(inst); @@ -6588,8 +7494,6 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const ty = self.air.typeOf(bin_op.lhs); - const ty_abi_size = ty.abiSize(self.target.*); - const can_reuse = ty_abi_size <= 8; try self.spillEflagsIfOccupied(); self.eflags_inst = inst; @@ -6608,52 +7512,103 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { }; defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - const dst_mem_ok = !ty.isRuntimeFloat(); - var flipped = false; - const dst_mcv: MCValue = if (can_reuse and !lhs_mcv.isImmediate() and - (dst_mem_ok or lhs_mcv.isRegister()) and self.liveness.operandDies(inst, 0)) - lhs_mcv - else if (can_reuse and !rhs_mcv.isImmediate() and - (dst_mem_ok or rhs_mcv.isRegister()) and self.liveness.operandDies(inst, 1)) - dst: { - flipped = true; - break :dst rhs_mcv; - } else if (dst_mem_ok) dst: { - const dst_mcv = try self.allocTempRegOrMem(ty, true); - try self.genCopy(ty, dst_mcv, lhs_mcv); - break :dst dst_mcv; - } else .{ .register = try self.copyToTmpRegister(ty, lhs_mcv) }; - const dst_lock = switch (dst_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const result = MCValue{ + .eflags = switch (ty.zigTypeTag()) { + else => result: { + var flipped = false; + const dst_mcv: MCValue = if (lhs_mcv.isRegister() or lhs_mcv.isMemory()) + lhs_mcv + else if (rhs_mcv.isRegister() or rhs_mcv.isMemory()) dst: { + flipped = true; + break :dst rhs_mcv; + } else .{ .register = try self.copyToTmpRegister(ty, lhs_mcv) }; + const dst_lock = switch (dst_mcv) { + .register => |reg| self.register_manager.lockReg(reg), + else => null, + }; + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const src_mcv = if (flipped) lhs_mcv else rhs_mcv; - const src_mcv = if (flipped) lhs_mcv else rhs_mcv; - try self.genBinOpMir(switch (ty.zigTypeTag()) { - else => .cmp, - .Float => switch (ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .ucomiss - else - return self.fail("TODO implement airCmp for {} without sse", .{ - ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .ucomisd - else - return self.fail("TODO implement airCmp for {} without sse2", .{ - ty.fmt(self.bin_file.options.module.?), - }), - else => return self.fail("TODO implement airCmp for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), - }, - }, ty, dst_mcv, src_mcv); + try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, src_mcv); + break :result Condition.fromCompareOperator( + if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned, + if (flipped) op.reverse() else op, + ); + }, + .Float => result: { + const flipped = switch (op) { + .lt, .lte => true, + .eq, .gte, .gt, .neq => false, + }; - const signedness = if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned; - const result = MCValue{ - .eflags = Condition.fromCompareOperator(signedness, if (flipped) op.reverse() else op), + const dst_mcv = if (flipped) rhs_mcv else lhs_mcv; + const dst_reg = if (dst_mcv.isRegister()) + dst_mcv.getReg().? + else + try self.copyToTmpRegister(ty, dst_mcv); + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const src_mcv = if (flipped) lhs_mcv else rhs_mcv; + + switch (ty.floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) { + const tmp1_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp1_mcv = MCValue{ .register = tmp1_reg }; + const tmp1_lock = self.register_manager.lockRegAssumeUnused(tmp1_reg); + defer self.register_manager.unlockReg(tmp1_lock); + + const tmp2_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp2_mcv = MCValue{ .register = tmp2_reg }; + const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg); + defer self.register_manager.unlockReg(tmp2_lock); + + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .{ .vp_w, .insr }, + tmp1_reg, + dst_reg.to128(), + src_mcv.mem(.word), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .{ .vp_, .unpcklwd }, + tmp1_reg, + dst_reg.to128(), + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, tmp1_reg, tmp1_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg); + try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv); + } else return self.fail("TODO implement airCmp for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + 32 => try self.genBinOpMir( + .{ ._ss, .ucomi }, + ty, + .{ .register = dst_reg }, + src_mcv, + ), + 64 => try self.genBinOpMir( + .{ ._sd, .ucomi }, + ty, + .{ .register = dst_reg }, + src_mcv, + ), + else => return self.fail("TODO implement airCmp for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + } + + break :result switch (if (flipped) op.reverse() else op) { + .lt, .lte => unreachable, // required to have been canonicalized to gt(e) + .gt => .a, + .gte => .ae, + .eq => .z_and_np, + .neq => .nz_or_p, + }; + }, + }, }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -6683,7 +7638,7 @@ fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void { else => try self.copyToTmpRegister(op_ty, op_mcv), }; try self.asmRegisterMemory( - .cmp, + .{ ._, .cmp }, registerAlias(dst_reg, op_abi_size), Memory.sib(Memory.PtrSize.fromSize(op_abi_size), .{ .base = .{ .reg = addr_reg } }), ); @@ -6757,8 +7712,8 @@ fn genTry( fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { const dbg_stmt = self.air.instructions.items(.data)[inst].dbg_stmt; _ = try self.addInst(.{ - .tag = .dbg_line, - .ops = undefined, + .tag = .pseudo, + .ops = .pseudo_dbg_line_line_column, .data = .{ .line_column = .{ .line = dbg_stmt.line, .column = dbg_stmt.column, @@ -6803,7 +7758,7 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !u32 { }, .register => |reg| { try self.spillEflagsIfOccupied(); - try self.asmRegisterImmediate(.@"test", reg, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .@"test" }, reg, Immediate.u(1)); return self.asmJccReloc(undefined, .e); }, .immediate, @@ -6906,13 +7861,13 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); const alias_reg = registerAlias(opt_reg, some_abi_size); assert(some_abi_size * 8 == alias_reg.bitSize()); - try self.asmRegisterRegister(.@"test", alias_reg, alias_reg); + try self.asmRegisterRegister(.{ ._, .@"test" }, alias_reg, alias_reg); return .{ .eflags = .z }; } assert(some_info.ty.tag() == .bool); const opt_abi_size = @intCast(u32, opt_ty.abiSize(self.target.*)); try self.asmRegisterImmediate( - .bt, + .{ ._, .bt }, registerAlias(opt_reg, opt_abi_size), Immediate.u(@intCast(u6, some_info.off * 8)), ); @@ -6931,7 +7886,7 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC try self.genSetReg(addr_reg, Type.usize, opt_mcv.address()); const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); try self.asmMemoryImmediate( - .cmp, + .{ ._, .cmp }, Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{ .base = .{ .reg = addr_reg }, .disp = some_info.off, @@ -6944,7 +7899,7 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC .indirect, .load_frame => { const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); try self.asmMemoryImmediate( - .cmp, + .{ ._, .cmp }, Memory.sib(Memory.PtrSize.fromSize(some_abi_size), switch (opt_mcv) { .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, @@ -6986,7 +7941,7 @@ fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); try self.asmMemoryImmediate( - .cmp, + .{ ._, .cmp }, Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{ .base = .{ .reg = ptr_reg }, .disp = some_info.off, @@ -7017,14 +7972,24 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) ! const tmp_reg = try self.copyToTmpRegister(ty, operand); if (err_off > 0) { const shift = @intCast(u6, err_off * 8); - try self.genShiftBinOpMir(.shr, ty, .{ .register = tmp_reg }, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + ty, + .{ .register = tmp_reg }, + .{ .immediate = shift }, + ); } else { try self.truncateRegister(Type.anyerror, tmp_reg); } - try self.genBinOpMir(.cmp, Type.anyerror, .{ .register = tmp_reg }, .{ .immediate = 0 }); + try self.genBinOpMir( + .{ ._, .cmp }, + Type.anyerror, + .{ .register = tmp_reg }, + .{ .immediate = 0 }, + ); }, .load_frame => |frame_addr| try self.genBinOpMir( - .cmp, + .{ ._, .cmp }, Type.anyerror, .{ .load_frame = .{ .index = frame_addr.index, @@ -7249,7 +8214,7 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { try self.spillEflagsIfOccupied(); for (items, relocs, 0..) |item, *reloc, i| { const item_mcv = try self.resolveInst(item); - try self.genBinOpMir(.cmp, condition_ty, condition, item_mcv); + try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, item_mcv); reloc.* = try self.asmJccReloc(undefined, if (i < relocs.len - 1) .e else .ne); } @@ -7289,14 +8254,14 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { fn performReloc(self: *Self, reloc: Mir.Inst.Index) !void { const next_inst = @intCast(u32, self.mir_instructions.len); switch (self.mir_instructions.items(.tag)[reloc]) { - .jcc => { - self.mir_instructions.items(.data)[reloc].inst_cc.inst = next_inst; - }, - .jmp_reloc => { - self.mir_instructions.items(.data)[reloc].inst = next_inst; + .j, .jmp => {}, + .pseudo => switch (self.mir_instructions.items(.ops)[reloc]) { + .pseudo_j_z_and_np_inst, .pseudo_j_nz_or_p_inst => {}, + else => unreachable, }, else => unreachable, } + self.mir_instructions.items(.data)[reloc].inst.inst = next_inst; } fn airBr(self: *Self, inst: Air.Inst.Index) !void { @@ -7460,7 +8425,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { .qword else null; - const mnem = mnem: { + const mnem_tag = Mir.Inst.FixedTag{ ._, mnem: { if (mnem_size) |_| { if (std.meta.stringToEnum(Mir.Inst.Tag, mnem_str[0 .. mnem_str.len - 1])) |mnem| { break :mnem mnem; @@ -7468,7 +8433,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } break :mnem std.meta.stringToEnum(Mir.Inst.Tag, mnem_str) orelse return self.fail("Invalid mnemonic: '{s}'", .{mnem_str}); - }; + } }; var op_it = mem.tokenize(u8, mnem_it.rest(), ","); var ops = [1]encoder.Instruction.Operand{.none} ** 4; @@ -7519,51 +8484,51 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } else if (op_it.next()) |op_str| return self.fail("Extra operand: '{s}'", .{op_str}); (switch (ops[0]) { - .none => self.asmOpOnly(mnem), + .none => self.asmOpOnly(mnem_tag), .reg => |reg0| switch (ops[1]) { - .none => self.asmRegister(mnem, reg0), + .none => self.asmRegister(mnem_tag, reg0), .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterRegister(mnem, reg1, reg0), + .none => self.asmRegisterRegister(mnem_tag, reg1, reg0), .reg => |reg2| switch (ops[3]) { - .none => self.asmRegisterRegisterRegister(mnem, reg2, reg1, reg0), + .none => self.asmRegisterRegisterRegister(mnem_tag, reg2, reg1, reg0), else => error.InvalidInstruction, }, .mem => |mem2| switch (ops[3]) { - .none => self.asmMemoryRegisterRegister(mnem, mem2, reg1, reg0), + .none => self.asmMemoryRegisterRegister(mnem_tag, mem2, reg1, reg0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .mem => |mem1| switch (ops[2]) { - .none => self.asmMemoryRegister(mnem, mem1, reg0), + .none => self.asmMemoryRegister(mnem_tag, mem1, reg0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .mem => |mem0| switch (ops[1]) { - .none => self.asmMemory(mnem, mem0), + .none => self.asmMemory(mnem_tag, mem0), .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterMemory(mnem, reg1, mem0), + .none => self.asmRegisterMemory(mnem_tag, reg1, mem0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .imm => |imm0| switch (ops[1]) { - .none => self.asmImmediate(mnem, imm0), + .none => self.asmImmediate(mnem_tag, imm0), .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterImmediate(mnem, reg1, imm0), + .none => self.asmRegisterImmediate(mnem_tag, reg1, imm0), .reg => |reg2| switch (ops[3]) { - .none => self.asmRegisterRegisterImmediate(mnem, reg2, reg1, imm0), + .none => self.asmRegisterRegisterImmediate(mnem_tag, reg2, reg1, imm0), else => error.InvalidInstruction, }, .mem => |mem2| switch (ops[3]) { - .none => self.asmMemoryRegisterImmediate(mnem, mem2, reg1, imm0), + .none => self.asmMemoryRegisterImmediate(mnem_tag, mem2, reg1, imm0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .mem => |mem1| switch (ops[2]) { - .none => self.asmMemoryImmediate(mnem, mem1, imm0), + .none => self.asmMemoryImmediate(mnem_tag, mem1, imm0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, @@ -7572,7 +8537,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { error.InvalidInstruction => return self.fail( "Invalid instruction: '{s} {s} {s} {s} {s}'", .{ - @tagName(mnem), + @tagName(mnem_tag[1]), @tagName(ops[0]), @tagName(ops[1]), @tagName(ops[2]), @@ -7603,19 +8568,55 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { return self.finishAirResult(inst, result); } -fn movMirTag(self: *Self, ty: Type) !Mir.Inst.Tag { - return switch (ty.zigTypeTag()) { - else => .mov, +fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag { + switch (ty.zigTypeTag()) { + else => return .{ ._, .mov }, .Float => switch (ty.floatBits(self.target.*)) { 16 => unreachable, // needs special handling - 32 => .movss, - 64 => .movsd, - 128 => .movaps, - else => return self.fail("TODO movMirTag from {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), + 32 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, + 64 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, + 128 => return if (self.hasFeature(.avx)) + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, + else => {}, }, - }; + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 16 => switch (ty.vectorLen()) { + 1 => unreachable, // needs special handling + 2 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, + 3...4 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, + 5...8 => return if (self.hasFeature(.avx)) + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, + 9...16 => if (self.hasFeature(.avx)) + return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, + else => {}, + }, + 32 => switch (ty.vectorLen()) { + 1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, + 2...4 => return if (self.hasFeature(.avx)) + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, + 5...8 => if (self.hasFeature(.avx)) + return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, + else => {}, + }, + 64 => switch (ty.vectorLen()) { + 1 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, + 2 => return if (self.hasFeature(.avx)) + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, + 3...4 => if (self.hasFeature(.avx)) + return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, + else => {}, + }, + else => {}, + }, + else => {}, + }, + } + return self.fail("TODO movMirTag for {}", .{ty.fmt(self.bin_file.options.module.?)}); } fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { @@ -7685,7 +8686,8 @@ fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerError!void { const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - if (abi_size > 8) return self.fail("genSetReg called with a value larger than one register", .{}); + if (abi_size * 8 > dst_reg.bitSize()) + return self.fail("genSetReg called with a value larger than dst_reg", .{}); switch (src_mcv) { .none, .unreach, @@ -7700,19 +8702,19 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr if (imm == 0) { // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit // register is the fastest way to zero a register. - try self.asmRegisterRegister(.xor, dst_reg.to32(), dst_reg.to32()); + try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()); } else if (abi_size > 4 and math.cast(u32, imm) != null) { // 32-bit moves zero-extend to 64-bit. - try self.asmRegisterImmediate(.mov, dst_reg.to32(), Immediate.u(imm)); + try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), Immediate.u(imm)); } else if (abi_size <= 4 and @bitCast(i64, imm) < 0) { try self.asmRegisterImmediate( - .mov, + .{ ._, .mov }, registerAlias(dst_reg, abi_size), Immediate.s(@intCast(i32, @bitCast(i64, imm))), ); } else { try self.asmRegisterImmediate( - .mov, + .{ ._, .mov }, registerAlias(dst_reg, abi_size), Immediate.u(imm), ); @@ -7721,18 +8723,18 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister( if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point)) switch (ty.zigTypeTag()) { - else => .mov, - .Float, .Vector => .movaps, + else => .{ ._, .mov }, + .Float, .Vector => .{ ._ps, .mova }, } else switch (abi_size) { 2 => return try self.asmRegisterRegisterImmediate( - if (dst_reg.class() == .floating_point) .pinsrw else .pextrw, - registerAlias(dst_reg, abi_size), - registerAlias(src_reg, abi_size), + if (dst_reg.class() == .floating_point) .{ .p_w, .insr } else .{ .p_w, .extr }, + registerAlias(dst_reg, 4), + registerAlias(src_reg, 4), Immediate.u(0), ), - 4 => .movd, - 8 => .movq, + 4 => .{ ._d, .mov }, + 8 => .{ ._q, .mov }, else => return self.fail( "unsupported register copy from {s} to {s}", .{ @tagName(src_reg), @tagName(dst_reg) }, @@ -7759,7 +8761,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmRegisterMemoryImmediate( - .pinsrw, + .{ .p_w, .insr }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), @@ -7769,10 +8771,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr switch (src_mcv) { .register_offset => |reg_off| switch (reg_off.off) { 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }), - else => .lea, + else => .{ ._, .lea }, }, - .indirect, .load_frame => try self.movMirTag(ty), - .lea_frame => .lea, + .indirect => try self.movMirTag(ty, false), + .load_frame => |frame_addr| try self.movMirTag( + ty, + self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(self.target.*), + ), + .lea_frame => .{ ._, .lea }, else => unreachable, }, registerAlias(dst_reg, abi_size), @@ -7788,14 +8794,18 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); return if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) self.asmRegisterMemoryImmediate( - .pinsrw, + .{ .p_w, .insr }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), ) else self.asmRegisterMemory( - try self.movMirTag(ty), + try self.movMirTag(ty, mem.isAlignedGeneric( + u32, + @bitCast(u32, small_addr), + ty.abiAlignment(self.target.*), + )), registerAlias(dst_reg, abi_size), src_mem, ); @@ -7803,13 +8813,15 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr .load_direct => |sym_index| if (!ty.isRuntimeFloat()) { const atom_index = try self.owner.getSymbolIndex(self); _ = try self.addInst(.{ - .tag = .mov_linker, + .tag = .mov, .ops = .direct_reloc, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(dst_reg.to64()), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .data = .{ .rx = .{ + .r1 = dst_reg.to64(), + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, }); return; }, @@ -7826,14 +8838,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmRegisterMemoryImmediate( - .pinsrw, + .{ .p_w, .insr }, registerAlias(dst_reg, abi_size), src_mem, Immediate.u(0), ) else try self.asmRegisterMemory( - try self.movMirTag(ty), + try self.movMirTag(ty, false), registerAlias(dst_reg, abi_size), src_mem, ); @@ -7842,8 +8854,8 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr const atom_index = try self.owner.getSymbolIndex(self); _ = try self.addInst(.{ .tag = switch (src_mcv) { - .lea_direct => .lea_linker, - .lea_got => .mov_linker, + .lea_direct => .lea, + .lea_got => .mov, else => unreachable, }, .ops = switch (src_mcv) { @@ -7851,27 +8863,31 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr .lea_got => .got_reloc, else => unreachable, }, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(dst_reg.to64()), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .data = .{ .rx = .{ + .r1 = dst_reg.to64(), + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, }); }, .lea_tlv => |sym_index| { const atom_index = try self.owner.getSymbolIndex(self); if (self.bin_file.cast(link.File.MachO)) |_| { _ = try self.addInst(.{ - .tag = .lea_linker, + .tag = .lea, .ops = .tlv_reloc, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(Register.rdi), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .data = .{ .rx = .{ + .r1 = .rdi, + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, }); // TODO: spill registers before calling - try self.asmMemory(.call, Memory.sib(.qword, .{ .base = .{ .reg = .rdi } })); + try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .rdi } })); try self.genSetReg(dst_reg.to64(), Type.usize, .{ .register = .rax }); } else return self.fail("TODO emit ptr to TLV sequence on {s}", .{ @tagName(self.bin_file.tag), @@ -7898,7 +8914,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal else Immediate.u(@intCast(u32, imm)); try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }), immediate, ); @@ -7906,14 +8922,14 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal 3, 5...7 => unreachable, else => if (math.cast(i32, @bitCast(i64, imm))) |small| { try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }), Immediate.s(small), ); } else { var offset: i32 = 0; while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(.dword, .{ .base = base, .disp = disp + offset }), if (ty.isSignedInt()) Immediate.s(@truncate( @@ -7936,14 +8952,31 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal ); if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) try self.asmMemoryRegisterImmediate( - .pextrw, + .{ .p_w, .extr }, dst_mem, - registerAlias(src_reg, abi_size), + src_reg.to128(), Immediate.u(0), ) else try self.asmMemoryRegister( - try self.movMirTag(ty), + try self.movMirTag(ty, switch (base) { + .none => mem.isAlignedGeneric( + u32, + @bitCast(u32, disp), + ty.abiAlignment(self.target.*), + ), + .reg => |reg| switch (reg) { + .es, .cs, .ss, .ds => mem.isAlignedGeneric( + u32, + @bitCast(u32, disp), + ty.abiAlignment(self.target.*), + ), + else => false, + }, + .frame => |frame_index| self.getFrameAddrAlignment( + .{ .index = frame_index, .off = disp }, + ) >= ty.abiAlignment(self.target.*), + }), dst_mem, registerAlias(src_reg, abi_size), ); @@ -8015,7 +9048,7 @@ fn genInlineMemcpyRegisterRegister( while (remainder > 0) { const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder)); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(Memory.PtrSize.fromSize(nearest_power_of_two), .{ .base = dst_reg, .disp = -next_offset, @@ -8024,7 +9057,7 @@ fn genInlineMemcpyRegisterRegister( ); if (nearest_power_of_two > 1) { - try self.genShiftBinOpMir(.shr, ty, .{ .register = tmp_reg }, .{ + try self.genShiftBinOpMir(.{ ._r, .sh }, ty, .{ .register = tmp_reg }, .{ .immediate = nearest_power_of_two * 8, }); } @@ -8035,8 +9068,8 @@ fn genInlineMemcpyRegisterRegister( } else { try self.asmMemoryRegister( switch (src_reg.class()) { - .general_purpose, .segment => .mov, - .floating_point => .movss, + .general_purpose, .segment => .{ ._, .mov }, + .floating_point => .{ ._ss, .mov }, }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }), registerAlias(src_reg, abi_size), @@ -8049,11 +9082,7 @@ fn genInlineMemcpy(self: *Self, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue try self.genSetReg(.rdi, Type.usize, dst_ptr); try self.genSetReg(.rsi, Type.usize, src_ptr); try self.genSetReg(.rcx, Type.usize, len); - _ = try self.addInst(.{ - .tag = .movs, - .ops = .string, - .data = .{ .string = .{ .repeat = .rep, .width = .b } }, - }); + try self.asmOpOnly(.{ .@"rep _sb", .mov }); } fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) InnerError!void { @@ -8061,11 +9090,7 @@ fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) try self.genSetReg(.rdi, Type.usize, dst_ptr); try self.genSetReg(.al, Type.u8, value); try self.genSetReg(.rcx, Type.usize, len); - _ = try self.addInst(.{ - .tag = .stos, - .ops = .string, - .data = .{ .string = .{ .repeat = .rep, .width = .b } }, - }); + try self.asmOpOnly(.{ .@"rep _sb", .sto }); } fn genLazySymbolRef( @@ -8083,14 +9108,14 @@ fn genLazySymbolRef( const got_mem = Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }); switch (tag) { - .lea, .mov => try self.asmRegisterMemory(.mov, reg.to64(), got_mem), - .call => try self.asmMemory(.call, got_mem), + .lea, .mov => try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), got_mem), + .call => try self.asmMemory(.{ ._, .call }, got_mem), else => unreachable, } switch (tag) { .lea, .call => {}, .mov => try self.asmRegisterMemory( - tag, + .{ ._, tag }, reg.to64(), Memory.sib(.qword, .{ .base = .{ .reg = reg.to64() } }), ), @@ -8107,7 +9132,7 @@ fn genLazySymbolRef( } switch (tag) { .lea, .mov => {}, - .call => try self.asmRegister(.call, reg), + .call => try self.asmRegister(.{ ._, .call }, reg), else => unreachable, } } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { @@ -8121,7 +9146,7 @@ fn genLazySymbolRef( } switch (tag) { .lea, .mov => {}, - .call => try self.asmRegister(.call, reg), + .call => try self.asmRegister(.{ ._, .call }, reg), else => unreachable, } } else { @@ -8164,7 +9189,7 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); const dest = try self.allocRegOrMem(inst, true); - try self.genCopy(self.air.typeOfIndex(inst), dest, operand); + try self.genCopy(if (!dest.isMemory() or operand.isMemory()) dst_ty else src_ty, dest, operand); break :result dest; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -8226,13 +9251,13 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { try self.asmRegisterRegister(switch (dst_ty.floatBits(self.target.*)) { 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .cvtsi2ss + .{ ._, .cvtsi2ss } else return self.fail("TODO implement airIntToFloat from {} to {} without sse", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .cvtsi2sd + .{ ._, .cvtsi2sd } else return self.fail("TODO implement airIntToFloat from {} to {} without sse2", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), @@ -8272,7 +9297,7 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { }, }; try self.asmMemory( - .fld, + .{ .f_, .ld }, Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off, @@ -8282,7 +9307,7 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { // convert const stack_dst = try self.allocRegOrMem(inst, false); try self.asmMemory( - .fisttp, + .{ .f_p, .istt }, Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{ .base = .{ .frame = stack_dst.load_frame.index }, .disp = stack_dst.load_frame.off, @@ -8338,16 +9363,11 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); try self.spillEflagsIfOccupied(); - if (val_abi_size <= 8) { - _ = try self.addInst(.{ .tag = .cmpxchg, .ops = .lock_mr_sib, .data = .{ .rx = .{ - .r = registerAlias(new_reg.?, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } } }); - } else { - _ = try self.addInst(.{ .tag = .cmpxchgb, .ops = .lock_m_sib, .data = .{ - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }); - } + if (val_abi_size <= 8) try self.asmMemoryRegister( + .{ .@"lock _", .cmpxchg }, + ptr_mem, + registerAlias(new_reg.?, val_abi_size), + ) else try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem); const result: MCValue = result: { if (self.liveness.isUnused(inst)) break :result .unreach; @@ -8445,16 +9465,17 @@ fn atomicOp( try self.genSetReg(dst_reg, val_ty, val_mcv); if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) { - try self.genUnOpMir(.neg, val_ty, dst_mcv); + try self.genUnOpMir(.{ ._, .neg }, val_ty, dst_mcv); } - _ = try self.addInst(.{ .tag = tag, .ops = switch (tag) { - .mov, .xchg => .mr_sib, - .xadd, .add, .sub, .@"and", .@"or", .xor => .lock_mr_sib, - else => unreachable, - }, .data = .{ .rx = .{ - .r = registerAlias(dst_reg, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } } }); + try self.asmMemoryRegister( + switch (tag) { + .mov, .xchg => .{ ._, tag }, + .xadd, .add, .sub, .@"and", .@"or", .xor => .{ .@"lock _", tag }, + else => unreachable, + }, + ptr_mem, + registerAlias(dst_reg, val_abi_size), + ); return if (unused) .unreach else dst_mcv; }, @@ -8464,22 +9485,22 @@ fn atomicOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterMemory(.mov, registerAlias(.rax, val_abi_size), ptr_mem); + try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(.rax, val_abi_size), ptr_mem); const loop = @intCast(u32, self.mir_instructions.len); if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { try self.genSetReg(tmp_reg, val_ty, .{ .register = .rax }); } if (rmw_op) |op| switch (op) { .Xchg => try self.genSetReg(tmp_reg, val_ty, val_mcv), - .Add => try self.genBinOpMir(.add, val_ty, tmp_mcv, val_mcv), - .Sub => try self.genBinOpMir(.sub, val_ty, tmp_mcv, val_mcv), - .And => try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv), + .Add => try self.genBinOpMir(.{ ._, .add }, val_ty, tmp_mcv, val_mcv), + .Sub => try self.genBinOpMir(.{ ._, .sub }, val_ty, tmp_mcv, val_mcv), + .And => try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv), .Nand => { - try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv); - try self.genUnOpMir(.not, val_ty, tmp_mcv); + try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv); + try self.genUnOpMir(.{ ._, .not }, val_ty, tmp_mcv); }, - .Or => try self.genBinOpMir(.@"or", val_ty, tmp_mcv, val_mcv), - .Xor => try self.genBinOpMir(.xor, val_ty, tmp_mcv, val_mcv), + .Or => try self.genBinOpMir(.{ ._, .@"or" }, val_ty, tmp_mcv, val_mcv), + .Xor => try self.genBinOpMir(.{ ._, .xor }, val_ty, tmp_mcv, val_mcv), .Min, .Max => { const cc: Condition = switch (if (val_ty.isAbiInt()) val_ty.intInfo(self.target.*).signedness @@ -8497,7 +9518,7 @@ fn atomicOp( }, }; - try self.genBinOpMir(.cmp, val_ty, tmp_mcv, val_mcv); + try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv); const cmov_abi_size = @max(val_abi_size, 2); switch (val_mcv) { .register => |val_reg| try self.asmCmovccRegisterRegister( @@ -8521,19 +9542,20 @@ fn atomicOp( } }, }; - _ = try self.addInst(.{ .tag = .cmpxchg, .ops = .lock_mr_sib, .data = .{ .rx = .{ - .r = registerAlias(tmp_reg, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } } }); + try self.asmMemoryRegister( + .{ .@"lock _", .cmpxchg }, + ptr_mem, + registerAlias(tmp_reg, val_abi_size), + ); _ = try self.asmJccReloc(loop, .ne); return if (unused) .unreach else .{ .register = .rax }; } else { - try self.asmRegisterMemory(.mov, .rax, Memory.sib(.qword, .{ + try self.asmRegisterMemory(.{ ._, .mov }, .rax, Memory.sib(.qword, .{ .base = ptr_mem.sib.base, .scale_index = ptr_mem.scaleIndex(), .disp = ptr_mem.sib.disp + 0, })); - try self.asmRegisterMemory(.mov, .rdx, Memory.sib(.qword, .{ + try self.asmRegisterMemory(.{ ._, .mov }, .rdx, Memory.sib(.qword, .{ .base = ptr_mem.sib.base, .scale_index = ptr_mem.scaleIndex(), .disp = ptr_mem.sib.disp + 8, @@ -8548,53 +9570,51 @@ fn atomicOp( const val_lo_mem = val_mem_mcv.mem(.qword); const val_hi_mem = val_mem_mcv.address().offset(8).deref().mem(.qword); if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { - try self.asmRegisterRegister(.mov, .rbx, .rax); - try self.asmRegisterRegister(.mov, .rcx, .rdx); + try self.asmRegisterRegister(.{ ._, .mov }, .rbx, .rax); + try self.asmRegisterRegister(.{ ._, .mov }, .rcx, .rdx); } if (rmw_op) |op| switch (op) { .Xchg => { - try self.asmRegisterMemory(.mov, .rbx, val_lo_mem); - try self.asmRegisterMemory(.mov, .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .mov }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .mov }, .rcx, val_hi_mem); }, .Add => { - try self.asmRegisterMemory(.add, .rbx, val_lo_mem); - try self.asmRegisterMemory(.adc, .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .add }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .adc }, .rcx, val_hi_mem); }, .Sub => { - try self.asmRegisterMemory(.sub, .rbx, val_lo_mem); - try self.asmRegisterMemory(.sbb, .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .sub }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .sbb }, .rcx, val_hi_mem); }, .And => { - try self.asmRegisterMemory(.@"and", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"and", .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem); }, .Nand => { - try self.asmRegisterMemory(.@"and", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"and", .rcx, val_hi_mem); - try self.asmRegister(.not, .rbx); - try self.asmRegister(.not, .rcx); + try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem); + try self.asmRegister(.{ ._, .not }, .rbx); + try self.asmRegister(.{ ._, .not }, .rcx); }, .Or => { - try self.asmRegisterMemory(.@"or", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"or", .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .@"or" }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .@"or" }, .rcx, val_hi_mem); }, .Xor => { - try self.asmRegisterMemory(.xor, .rbx, val_lo_mem); - try self.asmRegisterMemory(.xor, .rcx, val_hi_mem); + try self.asmRegisterMemory(.{ ._, .xor }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .xor }, .rcx, val_hi_mem); }, else => return self.fail("TODO implement x86 atomic loop for {} {s}", .{ val_ty.fmt(self.bin_file.options.module.?), @tagName(op), }), }; - _ = try self.addInst(.{ .tag = .cmpxchgb, .ops = .lock_m_sib, .data = .{ - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }); + try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem); _ = try self.asmJccReloc(loop, .ne); if (unused) return .unreach; const dst_mcv = try self.allocTempRegOrMem(val_ty, false); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 0, @@ -8602,7 +9622,7 @@ fn atomicOp( .rax, ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 8, @@ -8754,8 +9774,13 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { .off = elem_abi_size, } }); - try self.genBinOpMir(.sub, Type.usize, len_mcv, .{ .immediate = 1 }); - try self.asmRegisterRegisterImmediate(.imul, len_reg, len_reg, Immediate.u(elem_abi_size)); + try self.genBinOpMir(.{ ._, .sub }, Type.usize, len_mcv, .{ .immediate = 1 }); + try self.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + len_reg, + len_reg, + Immediate.u(elem_abi_size), + ); try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv); try self.performReloc(skip_reloc); @@ -8893,7 +9918,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { try self.truncateRegister(err_ty, err_reg.to32()); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, start_reg.to32(), Memory.sib(.dword, .{ .base = .{ .reg = addr_reg.to64() }, @@ -8902,7 +9927,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, end_reg.to32(), Memory.sib(.dword, .{ .base = .{ .reg = addr_reg.to64() }, @@ -8910,9 +9935,9 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { .disp = 8, }), ); - try self.asmRegisterRegister(.sub, end_reg.to32(), start_reg.to32()); + try self.asmRegisterRegister(.{ ._, .sub }, end_reg.to32(), start_reg.to32()); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, start_reg.to64(), Memory.sib(.byte, .{ .base = .{ .reg = addr_reg.to64() }, @@ -8921,7 +9946,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, end_reg.to32(), Memory.sib(.byte, .{ .base = .{ .reg = end_reg.to64() }, @@ -8931,7 +9956,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, false); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off, @@ -8939,7 +9964,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { start_reg.to64(), ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 8, @@ -9035,13 +10060,13 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { try self.truncateRegister(elem_ty, elem_reg); } if (elem_bit_off > 0) try self.genShiftBinOpMir( - .shl, + .{ ._l, .sh }, elem_ty, .{ .register = elem_reg }, .{ .immediate = elem_bit_off }, ); try self.genBinOpMir( - .@"or", + .{ ._, .@"or" }, elem_ty, .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } }, .{ .register = elem_reg }, @@ -9052,13 +10077,13 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { try self.truncateRegister(elem_ty, registerAlias(reg, elem_abi_size)); } try self.genShiftBinOpMir( - .shr, + .{ ._r, .sh }, elem_ty, .{ .register = reg }, .{ .immediate = elem_abi_bits - elem_bit_off }, ); try self.genBinOpMir( - .@"or", + .{ ._, .@"or" }, elem_ty, .{ .load_frame = .{ .index = frame_index, @@ -9130,9 +10155,150 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void { fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[inst].pl_op; const extra = self.air.extraData(Air.Bin, pl_op.payload).data; - _ = extra; - return self.fail("TODO implement airMulAdd for x86_64", .{}); - //return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand }); + const ty = self.air.typeOfIndex(inst); + + if (!self.hasFeature(.fma)) return self.fail("TODO implement airMulAdd for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }); + + const ops = [3]Air.Inst.Ref{ extra.lhs, extra.rhs, pl_op.operand }; + var mcvs: [3]MCValue = undefined; + var locks = [1]?RegisterManager.RegisterLock{null} ** 3; + defer for (locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); + var order = [1]u2{0} ** 3; + var unused = std.StaticBitSet(3).initFull(); + for (ops, &mcvs, &locks, 0..) |op, *mcv, *lock, op_i| { + const op_index = @intCast(u2, op_i); + mcv.* = try self.resolveInst(op); + if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) { + order[op_index] = 1; + unused.unset(0); + } else if (unused.isSet(2) and mcv.isMemory()) { + order[op_index] = 3; + unused.unset(2); + } + switch (mcv.*) { + .register => |reg| lock.* = self.register_manager.lockReg(reg), + else => {}, + } + } + for (&order, &mcvs, &locks) |*mop_index, *mcv, *lock| { + if (mop_index.* != 0) continue; + mop_index.* = 1 + @intCast(u2, unused.toggleFirstSet().?); + if (mop_index.* > 1 and mcv.isRegister()) continue; + const reg = try self.copyToTmpRegister(ty, mcv.*); + mcv.* = .{ .register = reg }; + if (lock.*) |old_lock| self.register_manager.unlockReg(old_lock); + lock.* = self.register_manager.lockRegAssumeUnused(reg); + } + + const mir_tag = if (@as( + ?Mir.Inst.FixedTag, + if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 })) + switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .{ .v_ss, .fmadd132 }, + 64 => .{ .v_sd, .fmadd132 }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => .{ .v_ss, .fmadd132 }, + 2...8 => .{ .v_ps, .fmadd132 }, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => .{ .v_sd, .fmadd132 }, + 2...4 => .{ .v_pd, .fmadd132 }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + } + else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 })) + switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .{ .v_ss, .fmadd213 }, + 64 => .{ .v_sd, .fmadd213 }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => .{ .v_ss, .fmadd213 }, + 2...8 => .{ .v_ps, .fmadd213 }, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => .{ .v_sd, .fmadd213 }, + 2...4 => .{ .v_pd, .fmadd213 }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + } + else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 })) + switch (ty.zigTypeTag()) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .{ .v_ss, .fmadd231 }, + 64 => .{ .v_sd, .fmadd231 }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType().zigTypeTag()) { + .Float => switch (ty.childType().floatBits(self.target.*)) { + 32 => switch (ty.vectorLen()) { + 1 => .{ .v_ss, .fmadd231 }, + 2...8 => .{ .v_ps, .fmadd231 }, + else => null, + }, + 64 => switch (ty.vectorLen()) { + 1 => .{ .v_sd, .fmadd231 }, + 2...4 => .{ .v_pd, .fmadd231 }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + } + else + unreachable, + )) |tag| tag else return self.fail("TODO implement airMulAdd for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }); + + var mops: [3]MCValue = undefined; + for (order, mcvs) |mop_index, mcv| mops[mop_index - 1] = mcv; + + const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const mop1_reg = registerAlias(mops[0].getReg().?, abi_size); + const mop2_reg = registerAlias(mops[1].getReg().?, abi_size); + if (mops[2].isRegister()) try self.asmRegisterRegisterRegister( + mir_tag, + mop1_reg, + mop2_reg, + registerAlias(mops[2].getReg().?, abi_size), + ) else try self.asmRegisterRegisterMemory( + mir_tag, + mop1_reg, + mop2_reg, + mops[2].mem(Memory.PtrSize.fromSize(abi_size)), + ); + return self.finishAir(inst, mops[0], ops); } fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { @@ -9471,17 +10637,37 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { switch (int_info.signedness) { .signed => { const shift = @intCast(u6, max_reg_bit_width - int_info.bits); - try self.genShiftBinOpMir(.sal, Type.isize, .{ .register = reg }, .{ .immediate = shift }); - try self.genShiftBinOpMir(.sar, Type.isize, .{ .register = reg }, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._l, .sa }, + Type.isize, + .{ .register = reg }, + .{ .immediate = shift }, + ); + try self.genShiftBinOpMir( + .{ ._r, .sa }, + Type.isize, + .{ .register = reg }, + .{ .immediate = shift }, + ); }, .unsigned => { const shift = @intCast(u6, max_reg_bit_width - int_info.bits); const mask = (~@as(u64, 0)) >> shift; if (int_info.bits <= 32) { - try self.genBinOpMir(.@"and", Type.u32, .{ .register = reg }, .{ .immediate = mask }); + try self.genBinOpMir( + .{ ._, .@"and" }, + Type.u32, + .{ .register = reg }, + .{ .immediate = mask }, + ); } else { const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = mask }); - try self.genBinOpMir(.@"and", Type.usize, .{ .register = reg }, .{ .register = tmp_reg }); + try self.genBinOpMir( + .{ ._, .@"and" }, + Type.usize, + .{ .register = reg }, + .{ .register = tmp_reg }, + ); } }, } @@ -9508,3 +10694,13 @@ fn regBitSize(self: *Self, ty: Type) u64 { fn regExtraBits(self: *Self, ty: Type) u64 { return self.regBitSize(ty) - ty.bitSize(self.target.*); } + +fn hasFeature(self: *Self, feature: Target.x86.Feature) bool { + return Target.x86.featureSetHas(self.target.cpu.features, feature); +} +fn hasAnyFeatures(self: *Self, features: anytype) bool { + return Target.x86.featureSetHasAny(self.target.cpu.features, features); +} +fn hasAllFeatures(self: *Self, features: anytype) bool { + return Target.x86.featureSetHasAll(self.target.cpu.features, features); +} diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index c6c8f7995c..506092ff17 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -18,142 +18,152 @@ pub const Error = Lower.Error || error{ }; pub fn emitMir(emit: *Emit) Error!void { - for (0..emit.lower.mir.instructions.len) |i| { - const index = @intCast(Mir.Inst.Index, i); - const inst = emit.lower.mir.instructions.get(index); - - const start_offset = @intCast(u32, emit.code.items.len); - try emit.code_offset_mapping.putNoClobber(emit.lower.allocator, index, start_offset); - for (try emit.lower.lowerMir(inst)) |lower_inst| try lower_inst.encode(emit.code.writer(), .{}); - const end_offset = @intCast(u32, emit.code.items.len); - - switch (inst.tag) { - else => {}, - - .jmp_reloc => try emit.relocs.append(emit.lower.allocator, .{ - .source = start_offset, - .target = inst.data.inst, - .offset = end_offset - 4, - .length = 5, - }), - - .call_extern => if (emit.bin_file.cast(link.File.MachO)) |macho_file| { - // Add relocation to the decl. - const atom_index = macho_file.getAtomIndexForSymbol( - .{ .sym_index = inst.data.relocation.atom_index, .file = null }, - ).?; - const target = macho_file.getGlobalByIndex(inst.data.relocation.sym_index); - try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ - .type = .branch, + for (0..emit.lower.mir.instructions.len) |mir_i| { + const mir_index = @intCast(Mir.Inst.Index, mir_i); + try emit.code_offset_mapping.putNoClobber( + emit.lower.allocator, + mir_index, + @intCast(u32, emit.code.items.len), + ); + const lowered = try emit.lower.lowerMir(mir_index); + var lowered_relocs = lowered.relocs; + for (lowered.insts, 0..) |lowered_inst, lowered_index| { + const start_offset = @intCast(u32, emit.code.items.len); + try lowered_inst.encode(emit.code.writer(), .{}); + const end_offset = @intCast(u32, emit.code.items.len); + while (lowered_relocs.len > 0 and + lowered_relocs[0].lowered_inst_index == lowered_index) : ({ + lowered_relocs = lowered_relocs[1..]; + }) switch (lowered_relocs[0].target) { + .inst => |target| try emit.relocs.append(emit.lower.allocator, .{ + .source = start_offset, .target = target, .offset = end_offset - 4, - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { - // Add relocation to the decl. - const atom_index = coff_file.getAtomIndexForSymbol( - .{ .sym_index = inst.data.relocation.atom_index, .file = null }, - ).?; - const target = coff_file.getGlobalByIndex(inst.data.relocation.sym_index); - try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ - .type = .direct, - .target = target, - .offset = end_offset - 4, - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else return emit.fail("TODO implement {} for {}", .{ inst.tag, emit.bin_file.tag }), - - .mov_linker, .lea_linker => if (emit.bin_file.cast(link.File.MachO)) |macho_file| { - const metadata = - emit.lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const atom_index = macho_file.getAtomIndexForSymbol(.{ - .sym_index = metadata.atom_index, - .file = null, - }).?; - try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ - .type = switch (inst.ops) { - .got_reloc => .got, - .direct_reloc => .signed, - .tlv_reloc => .tlv, - else => unreachable, - }, - .target = .{ .sym_index = metadata.sym_index, .file = null }, - .offset = @intCast(u32, end_offset - 4), - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { - const metadata = - emit.lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const atom_index = coff_file.getAtomIndexForSymbol(.{ - .sym_index = metadata.atom_index, - .file = null, - }).?; - try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ - .type = switch (inst.ops) { - .got_reloc => .got, - .direct_reloc => .direct, - .import_reloc => .import, - else => unreachable, - }, - .target = switch (inst.ops) { - .got_reloc, - .direct_reloc, - => .{ .sym_index = metadata.sym_index, .file = null }, - .import_reloc => coff_file.getGlobalByIndex(metadata.sym_index), - else => unreachable, - }, - .offset = @intCast(u32, end_offset - 4), - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else return emit.fail("TODO implement {} for {}", .{ inst.tag, emit.bin_file.tag }), - - .jcc => try emit.relocs.append(emit.lower.allocator, .{ - .source = start_offset, - .target = inst.data.inst_cc.inst, - .offset = end_offset - 4, - .length = 6, - }), - - .dbg_line => try emit.dbgAdvancePCAndLine( - inst.data.line_column.line, - inst.data.line_column.column, - ), + .length = @intCast(u5, end_offset - start_offset), + }), + .linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { + // Add relocation to the decl. + const atom_index = macho_file.getAtomIndexForSymbol( + .{ .sym_index = symbol.atom_index, .file = null }, + ).?; + const target = macho_file.getGlobalByIndex(symbol.sym_index); + try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ + .type = .branch, + .target = target, + .offset = end_offset - 4, + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { + // Add relocation to the decl. + const atom_index = coff_file.getAtomIndexForSymbol( + .{ .sym_index = symbol.atom_index, .file = null }, + ).?; + const target = coff_file.getGlobalByIndex(symbol.sym_index); + try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ + .type = .direct, + .target = target, + .offset = end_offset - 4, + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else return emit.fail("TODO implement extern reloc for {s}", .{ + @tagName(emit.bin_file.tag), + }), + .linker_got, + .linker_direct, + .linker_import, + .linker_tlv, + => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { + const atom_index = macho_file.getAtomIndexForSymbol(.{ + .sym_index = symbol.atom_index, + .file = null, + }).?; + try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ + .type = switch (lowered_relocs[0].target) { + .linker_got => .got, + .linker_direct => .signed, + .linker_tlv => .tlv, + else => unreachable, + }, + .target = .{ .sym_index = symbol.sym_index, .file = null }, + .offset = @intCast(u32, end_offset - 4), + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { + const atom_index = coff_file.getAtomIndexForSymbol(.{ + .sym_index = symbol.atom_index, + .file = null, + }).?; + try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ + .type = switch (lowered_relocs[0].target) { + .linker_got => .got, + .linker_direct => .direct, + .linker_import => .import, + else => unreachable, + }, + .target = switch (lowered_relocs[0].target) { + .linker_got, + .linker_direct, + => .{ .sym_index = symbol.sym_index, .file = null }, + .linker_import => coff_file.getGlobalByIndex(symbol.sym_index), + else => unreachable, + }, + .offset = @intCast(u32, end_offset - 4), + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else return emit.fail("TODO implement linker reloc for {s}", .{ + @tagName(emit.bin_file.tag), + }), + }; + } + std.debug.assert(lowered_relocs.len == 0); - .dbg_prologue_end => { - switch (emit.debug_output) { - .dwarf => |dw| { - try dw.setPrologueEnd(); - log.debug("mirDbgPrologueEnd (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + if (lowered.insts.len == 0) { + const mir_inst = emit.lower.mir.instructions.get(mir_index); + switch (mir_inst.tag) { + else => unreachable, + .pseudo => switch (mir_inst.ops) { + else => unreachable, + .pseudo_dbg_prologue_end_none => { + switch (emit.debug_output) { + .dwarf => |dw| { + try dw.setPrologueEnd(); + log.debug("mirDbgPrologueEnd (line={d}, col={d})", .{ + emit.prev_di_line, emit.prev_di_column, + }); + try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } }, - .plan9 => {}, - .none => {}, - } - }, - - .dbg_epilogue_begin => { - switch (emit.debug_output) { - .dwarf => |dw| { - try dw.setEpilogueBegin(); - log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + .pseudo_dbg_line_line_column => try emit.dbgAdvancePCAndLine( + mir_inst.data.line_column.line, + mir_inst.data.line_column.column, + ), + .pseudo_dbg_epilogue_begin_none => { + switch (emit.debug_output) { + .dwarf => |dw| { + try dw.setEpilogueBegin(); + log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{ + emit.prev_di_line, emit.prev_di_column, + }); + try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } }, - .plan9 => {}, - .none => {}, - } - }, + .pseudo_dead_none => {}, + }, + } } } try emit.fixupRelocs(); diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 944fe85458..b6b49e8939 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -23,6 +23,7 @@ const Data = struct { opc: [7]u8, modrm_ext: u3, mode: Mode, + feature: Feature, }; pub fn findByMnemonic( @@ -57,9 +58,9 @@ pub fn findByMnemonic( var shortest_len: ?usize = null; next: for (mnemonic_to_encodings_map[@enumToInt(mnemonic)]) |data| { switch (data.mode) { - .rex => if (!rex_required) continue, - .long, .sse_long, .sse2_long => {}, - else => if (rex_required) continue, + .none, .short => if (rex_required) continue, + .rex, .rex_short => if (!rex_required) continue, + else => {}, } for (input_ops, data.ops) |input_op, data_op| if (!input_op.isSubset(data_op)) continue :next; @@ -88,28 +89,13 @@ pub fn findByOpcode(opc: []const u8, prefixes: struct { if (modrm_ext) |ext| if (ext != data.modrm_ext) continue; if (!std.mem.eql(u8, opc, enc.opcode())) continue; if (prefixes.rex.w) { - switch (data.mode) { - .short, .fpu, .sse, .sse2, .sse4_1, .none => continue, - .long, .sse_long, .sse2_long, .rex => {}, - } + if (!data.mode.isLong()) continue; } else if (prefixes.rex.present and !prefixes.rex.isSet()) { - switch (data.mode) { - .rex => {}, - else => continue, - } + if (!data.mode.isRex()) continue; } else if (prefixes.legacy.prefix_66) { - switch (enc.operandBitSize()) { - 16 => {}, - else => continue, - } + if (!data.mode.isShort()) continue; } else { - switch (data.mode) { - .none => switch (enc.operandBitSize()) { - 16 => continue, - else => {}, - }, - else => continue, - } + if (data.mode.isShort()) continue; } return enc; }; @@ -130,30 +116,11 @@ pub fn mandatoryPrefix(encoding: *const Encoding) ?u8 { pub fn modRmExt(encoding: Encoding) u3 { return switch (encoding.data.op_en) { - .m, .mi, .m1, .mc => encoding.data.modrm_ext, + .m, .mi, .m1, .mc, .vmi => encoding.data.modrm_ext, else => unreachable, }; } -pub fn operandBitSize(encoding: Encoding) u64 { - switch (encoding.data.mode) { - .short => return 16, - .long, .sse_long, .sse2_long => return 64, - else => {}, - } - const bit_size: u64 = switch (encoding.data.op_en) { - .np => switch (encoding.data.ops[0]) { - .o16 => 16, - .o32 => 32, - .o64 => 64, - else => 32, - }, - .td => encoding.data.ops[1].bitSize(), - else => encoding.data.ops[0].bitSize(), - }; - return bit_size; -} - pub fn format( encoding: Encoding, comptime fmt: []const u8, @@ -162,14 +129,41 @@ pub fn format( ) !void { _ = options; _ = fmt; - switch (encoding.data.mode) { - .long, .sse_long, .sse2_long => try writer.writeAll("REX.W + "), - else => {}, - } - for (encoding.opcode()) |byte| { - try writer.print("{x:0>2} ", .{byte}); - } + var opc = encoding.opcode(); + if (encoding.data.mode.isVex()) { + try writer.writeAll("VEX."); + + try writer.writeAll(switch (encoding.data.mode) { + .vex_128_w0, .vex_128_w1, .vex_128_wig => "128", + .vex_256_w0, .vex_256_w1, .vex_256_wig => "256", + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig => "LIG", + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig => "LZ", + else => unreachable, + }); + + switch (opc[0]) { + else => {}, + 0x66, 0xf3, 0xf2 => { + try writer.print(".{X:0>2}", .{opc[0]}); + opc = opc[1..]; + }, + } + + try writer.print(".{}", .{std.fmt.fmtSliceHexUpper(opc[0 .. opc.len - 1])}); + opc = opc[opc.len - 1 ..]; + + try writer.writeAll(".W"); + try writer.writeAll(switch (encoding.data.mode) { + .vex_128_w0, .vex_256_w0, .vex_lig_w0, .vex_lz_w0 => "0", + .vex_128_w1, .vex_256_w1, .vex_lig_w1, .vex_lz_w1 => "1", + .vex_128_wig, .vex_256_wig, .vex_lig_wig, .vex_lz_wig => "IG", + else => unreachable, + }); + + try writer.writeByte(' '); + } else if (encoding.data.mode.isLong()) try writer.writeAll("REX.W + "); + for (opc) |byte| try writer.print("{x:0>2} ", .{byte}); switch (encoding.data.op_en) { .np, .fd, .td, .i, .zi, .d => {}, @@ -183,16 +177,17 @@ pub fn format( }; try writer.print("+{s} ", .{tag}); }, - .m, .mi, .m1, .mc => try writer.print("/{d} ", .{encoding.modRmExt()}), - .mr, .rm, .rmi, .mri, .mrc => try writer.writeAll("/r "), + .m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}), + .mr, .rm, .rmi, .mri, .mrc, .rvm, .rvmi, .mvr => try writer.writeAll("/r "), } switch (encoding.data.op_en) { - .i, .d, .zi, .oi, .mi, .rmi, .mri => { + .i, .d, .zi, .oi, .mi, .rmi, .mri, .vmi, .rvmi => { const op = switch (encoding.data.op_en) { .i, .d => encoding.data.ops[0], .zi, .oi, .mi => encoding.data.ops[1], - .rmi, .mri => encoding.data.ops[2], + .rmi, .mri, .vmi => encoding.data.ops[2], + .rvmi => encoding.data.ops[3], else => unreachable, }; const tag = switch (op) { @@ -207,7 +202,7 @@ pub fn format( }; try writer.print("{s} ", .{tag}); }, - .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc => {}, + .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rvm, .mvr => {}, } try writer.print("{s} ", .{@tagName(encoding.mnemonic)}); @@ -267,44 +262,79 @@ pub const Mnemonic = enum { // MMX movd, // SSE - addss, + addps, addss, andps, andnps, cmpss, cvtsi2ss, - divss, - maxss, minss, - movaps, movss, movups, - mulss, + divps, divss, + maxps, maxss, + minps, minss, + movaps, movhlps, movss, movups, + mulps, mulss, orps, - pextrw, - pinsrw, - sqrtps, - sqrtss, - subss, + pextrw, pinsrw, + sqrtps, sqrtss, + subps, subss, ucomiss, xorps, // SSE2 - addsd, + addpd, addsd, andpd, andnpd, //cmpsd, cvtsd2ss, cvtsi2sd, cvtss2sd, - divsd, - maxsd, minsd, + divpd, divsd, + maxpd, maxsd, + minpd, minsd, movapd, movq, //movd, movsd, movupd, - mulsd, + mulpd, mulsd, orpd, - sqrtpd, - sqrtsd, - subsd, + pshufhw, pshuflw, + psrld, psrlq, psrlw, + punpckhbw, punpckhdq, punpckhqdq, punpckhwd, + punpcklbw, punpckldq, punpcklqdq, punpcklwd, + sqrtpd, sqrtsd, + subpd, subsd, ucomisd, xorpd, + // SSE3 + movddup, movshdup, movsldup, // SSE4.1 - roundss, - roundsd, + pextrb, pextrd, pextrq, + pinsrb, pinsrd, pinsrq, + roundpd, roundps, roundsd, roundss, + // AVX + vaddpd, vaddps, vaddsd, vaddss, + vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd, + vdivpd, vdivps, vdivsd, vdivss, + vmaxpd, vmaxps, vmaxsd, vmaxss, + vminpd, vminps, vminsd, vminss, + vmovapd, vmovaps, + vmovddup, vmovhlps, + vmovsd, + vmovshdup, vmovsldup, + vmovss, + vmovupd, vmovups, + vmulpd, vmulps, vmulsd, vmulss, + vpextrb, vpextrd, vpextrq, vpextrw, + vpinsrb, vpinsrd, vpinsrq, vpinsrw, + vpshufhw, vpshuflw, + vpsrld, vpsrlq, vpsrlw, + vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd, + vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd, + vroundpd, vroundps, vroundsd, vroundss, + vsqrtpd, vsqrtps, vsqrtsd, vsqrtss, + vsubpd, vsubps, vsubsd, vsubss, + // F16C + vcvtph2ps, vcvtps2ph, + // FMA + vfmadd132pd, vfmadd213pd, vfmadd231pd, + vfmadd132ps, vfmadd213ps, vfmadd231ps, + vfmadd132sd, vfmadd213sd, vfmadd231sd, + vfmadd132ss, vfmadd213ss, vfmadd231ss, // zig fmt: on }; @@ -317,6 +347,7 @@ pub const OpEn = enum { fd, td, m1, mc, mi, mr, rm, rmi, mri, mrc, + vmi, rvm, rvmi, mvr, // zig fmt: on }; @@ -331,12 +362,14 @@ pub const Op = enum { cl, r8, r16, r32, r64, rm8, rm16, rm32, rm64, - m8, m16, m32, m64, m80, m128, + r32_m8, r32_m16, r64_m16, + m8, m16, m32, m64, m80, m128, m256, rel8, rel16, rel32, m, moffs, sreg, xmm, xmm_m32, xmm_m64, xmm_m128, + ymm, ymm_m256, // zig fmt: on pub fn fromOperand(operand: Instruction.Operand) Op { @@ -348,6 +381,7 @@ pub const Op = enum { .segment => return .sreg, .floating_point => return switch (reg.bitSize()) { 128 => .xmm, + 256 => .ymm, else => unreachable, }, .general_purpose => { @@ -381,6 +415,7 @@ pub const Op = enum { 64 => .m64, 80 => .m80, 128 => .m128, + 256 => .m256, else => unreachable, }; }, @@ -409,16 +444,52 @@ pub const Op = enum { } } - pub fn bitSize(op: Op) u64 { + pub fn immBitSize(op: Op) u64 { return switch (op) { .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, + .al, .cl, .r8, .rm8, .r32_m8 => unreachable, + .ax, .r16, .rm16 => unreachable, + .eax, .r32, .rm32, .r32_m16 => unreachable, + .rax, .r64, .rm64, .r64_m16 => unreachable, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable, + .ymm, .ymm_m256 => unreachable, + .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, .unity => 1, - .imm8, .imm8s, .al, .cl, .r8, .m8, .rm8, .rel8 => 8, - .imm16, .imm16s, .ax, .r16, .m16, .rm16, .rel16 => 16, - .imm32, .imm32s, .eax, .r32, .m32, .rm32, .rel32, .xmm_m32 => 32, - .imm64, .rax, .r64, .m64, .rm64, .xmm_m64 => 64, + .imm8, .imm8s, .rel8 => 8, + .imm16, .imm16s, .rel16 => 16, + .imm32, .imm32s, .rel32 => 32, + .imm64 => 64, + }; + } + + pub fn regBitSize(op: Op) u64 { + return switch (op) { + .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, + .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, + .rel8, .rel16, .rel32 => unreachable, + .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, + .al, .cl, .r8, .rm8 => 8, + .ax, .r16, .rm16 => 16, + .eax, .r32, .rm32, .r32_m8, .r32_m16 => 32, + .rax, .r64, .rm64, .r64_m16 => 64, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128, + .ymm, .ymm_m256 => 256, + }; + } + + pub fn memBitSize(op: Op) u64 { + return switch (op) { + .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, + .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, + .rel8, .rel16, .rel32 => unreachable, + .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .xmm, .ymm => unreachable, + .m8, .rm8, .r32_m8 => 8, + .m16, .rm16, .r32_m16, .r64_m16 => 16, + .m32, .rm32, .xmm_m32 => 32, + .m64, .rm64, .xmm_m64 => 64, .m80 => 80, - .m128, .xmm, .xmm_m128 => 128, + .m128, .xmm_m128 => 128, + .m256, .ymm_m256 => 256, }; } @@ -441,7 +512,9 @@ pub const Op = enum { .al, .ax, .eax, .rax, .r8, .r16, .r32, .r64, .rm8, .rm16, .rm32, .rm64, + .r32_m8, .r32_m16, .r64_m16, .xmm, .xmm_m32, .xmm_m64, .xmm_m128, + .ymm, .ymm_m256, => true, else => false, }; @@ -465,9 +538,11 @@ pub const Op = enum { // zig fmt: off return switch (op) { .rm8, .rm16, .rm32, .rm64, - .m8, .m16, .m32, .m64, .m80, .m128, + .r32_m8, .r32_m16, .r64_m16, + .m8, .m16, .m32, .m64, .m80, .m128, .m256, .m, .xmm_m32, .xmm_m64, .xmm_m128, + .ymm_m256, => true, else => false, }; @@ -487,15 +562,10 @@ pub const Op = enum { .al, .ax, .eax, .rax, .cl => .general_purpose, .r8, .r16, .r32, .r64 => .general_purpose, .rm8, .rm16, .rm32, .rm64 => .general_purpose, + .r32_m8, .r32_m16, .r64_m16 => .general_purpose, .sreg => .segment, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point, - }; - } - - pub fn isFloatingPointRegister(op: Op) bool { - return switch (op) { - .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => true, - else => false, + .ymm, .ymm_m256 => .floating_point, }; } @@ -512,30 +582,27 @@ pub const Op = enum { if (op.isRegister() and target.isRegister()) { return switch (target) { .cl, .al, .ax, .eax, .rax => op == target, - else => op.class() == target.class() and switch (target.class()) { - .floating_point => true, - else => op.bitSize() == target.bitSize(), - }, + else => op.class() == target.class() and op.regBitSize() == target.regBitSize(), }; } if (op.isMemory() and target.isMemory()) { switch (target) { .m => return true, - else => return op.bitSize() == target.bitSize(), + else => return op.memBitSize() == target.memBitSize(), } } if (op.isImmediate() and target.isImmediate()) { switch (target) { - .imm64 => if (op.bitSize() <= 64) return true, - .imm32s, .rel32 => if (op.bitSize() < 32 or (op.bitSize() == 32 and op.isSigned())) + .imm64 => if (op.immBitSize() <= 64) return true, + .imm32s, .rel32 => if (op.immBitSize() < 32 or (op.immBitSize() == 32 and op.isSigned())) return true, - .imm32 => if (op.bitSize() <= 32) return true, - .imm16s, .rel16 => if (op.bitSize() < 16 or (op.bitSize() == 16 and op.isSigned())) + .imm32 => if (op.immBitSize() <= 32) return true, + .imm16s, .rel16 => if (op.immBitSize() < 16 or (op.immBitSize() == 16 and op.isSigned())) return true, - .imm16 => if (op.bitSize() <= 16) return true, - .imm8s, .rel8 => if (op.bitSize() < 8 or (op.bitSize() == 8 and op.isSigned())) + .imm16 => if (op.immBitSize() <= 16) return true, + .imm8s, .rel8 => if (op.immBitSize() < 8 or (op.immBitSize() == 8 and op.isSigned())) return true, - .imm8 => if (op.bitSize() <= 8) return true, + .imm8 => if (op.immBitSize() <= 8) return true, else => {}, } return op == target; @@ -547,16 +614,81 @@ pub const Op = enum { }; pub const Mode = enum { + // zig fmt: off + none, + short, long, + rex, rex_short, + vex_128_w0, vex_128_w1, vex_128_wig, + vex_256_w0, vex_256_w1, vex_256_wig, + vex_lig_w0, vex_lig_w1, vex_lig_wig, + vex_lz_w0, vex_lz_w1, vex_lz_wig, + // zig fmt: on + + pub fn isShort(mode: Mode) bool { + return switch (mode) { + .short, .rex_short => true, + else => false, + }; + } + + pub fn isLong(mode: Mode) bool { + return switch (mode) { + .long, + .vex_128_w1, + .vex_256_w1, + .vex_lig_w1, + .vex_lz_w1, + => true, + else => false, + }; + } + + pub fn isRex(mode: Mode) bool { + return switch (mode) { + else => false, + .rex, .rex_short => true, + }; + } + + pub fn isVex(mode: Mode) bool { + return switch (mode) { + // zig fmt: off + else => false, + .vex_128_w0, .vex_128_w1, .vex_128_wig, + .vex_256_w0, .vex_256_w1, .vex_256_wig, + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig, + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig, + => true, + // zig fmt: on + }; + } + + pub fn isVecLong(mode: Mode) bool { + return switch (mode) { + // zig fmt: off + else => unreachable, + .vex_128_w0, .vex_128_w1, .vex_128_wig, + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig, + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig, + => false, + .vex_256_w0, .vex_256_w1, .vex_256_wig, + => true, + // zig fmt: on + }; + } +}; + +pub const Feature = enum { none, - short, - fpu, - rex, - long, + avx, + avx2, + f16c, + fma, sse, - sse_long, sse2, - sse2_long, + sse3, sse4_1, + x87, }; fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Operand) usize { @@ -573,7 +705,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op } const mnemonic_to_encodings_map = init: { - @setEvalBranchQuota(100_000); + @setEvalBranchQuota(20_000); const encodings = @import("encodings.zig"); var entries = encodings.table; std.sort.sort(encodings.Entry, &entries, {}, struct { @@ -593,6 +725,7 @@ const mnemonic_to_encodings_map = init: { .opc = undefined, .modrm_ext = entry[4], .mode = entry[5], + .feature = entry[6], }; // TODO: use `@memcpy` for these. When I did that, I got a false positive // compile error for this copy happening at compile time. diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 4289cfaf2a..c32e7fc974 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -5,13 +5,22 @@ mir: Mir, target: *const std.Target, err_msg: ?*ErrorMsg = null, src_loc: Module.SrcLoc, -result: [ +result_insts_len: u8 = undefined, +result_relocs_len: u8 = undefined, +result_insts: [ std.mem.max(usize, &.{ - abi.Win64.callee_preserved_regs.len, - abi.SysV.callee_preserved_regs.len, + 2, // cmovcc: cmovcc \ cmovcc + 3, // setcc: setcc \ setcc \ logicop + 2, // jcc: jcc \ jcc + abi.Win64.callee_preserved_regs.len, // push_regs/pop_regs + abi.SysV.callee_preserved_regs.len, // push_regs/pop_regs }) ]Instruction = undefined, -result_len: usize = undefined, +result_relocs: [ + std.mem.max(usize, &.{ + 2, // jcc: jcc \ jcc + }) +]Reloc = undefined, pub const Error = error{ OutOfMemory, @@ -20,155 +29,155 @@ pub const Error = error{ CannotEncode, }; -/// The returned slice is overwritten by the next call to lowerMir. -pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { - lower.result = undefined; - errdefer lower.result = undefined; - lower.result_len = 0; - defer lower.result_len = undefined; +pub const Reloc = struct { + lowered_inst_index: u8, + target: Target, + + const Target = union(enum) { + inst: Mir.Inst.Index, + linker_extern_fn: Mir.Reloc, + linker_got: Mir.Reloc, + linker_direct: Mir.Reloc, + linker_import: Mir.Reloc, + linker_tlv: Mir.Reloc, + }; +}; +/// The returned slice is overwritten by the next call to lowerMir. +pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { + insts: []const Instruction, + relocs: []const Reloc, +} { + lower.result_insts = undefined; + lower.result_relocs = undefined; + errdefer lower.result_insts = undefined; + errdefer lower.result_relocs = undefined; + lower.result_insts_len = 0; + lower.result_relocs_len = 0; + defer lower.result_insts_len = undefined; + defer lower.result_relocs_len = undefined; + + const inst = lower.mir.instructions.get(index); switch (inst.tag) { - .adc, - .add, - .@"and", - .bsf, - .bsr, - .bswap, - .bt, - .btc, - .btr, - .bts, - .call, - .cbw, - .cwde, - .cdqe, - .cwd, - .cdq, - .cqo, - .cmp, - .cmpxchg, - .div, - .fisttp, - .fld, - .idiv, - .imul, - .int3, - .jmp, - .lea, - .lfence, - .lzcnt, - .mfence, - .mov, - .movbe, - .movd, - .movq, - .movzx, - .mul, - .neg, - .nop, - .not, - .@"or", - .pop, - .popcnt, - .push, - .rcl, - .rcr, - .ret, - .rol, - .ror, - .sal, - .sar, - .sbb, - .sfence, - .shl, - .shld, - .shr, - .shrd, - .sub, - .syscall, - .@"test", - .tzcnt, - .ud2, - .xadd, - .xchg, - .xor, - - .addss, - .andnps, - .andps, - .cmpss, - .cvtsi2ss, - .divss, - .maxss, - .minss, - .movaps, - .movss, - .movups, - .mulss, - .orps, - .pextrw, - .pinsrw, - .roundss, - .sqrtps, - .sqrtss, - .subss, - .ucomiss, - .xorps, - .addsd, - .andnpd, - .andpd, - .cmpsd, - .cvtsd2ss, - .cvtsi2sd, - .cvtss2sd, - .divsd, - .maxsd, - .minsd, - .movsd, - .mulsd, - .orpd, - .roundsd, - .sqrtpd, - .sqrtsd, - .subsd, - .ucomisd, - .xorpd, - => try lower.mirGeneric(inst), - - .cmps, - .lods, - .movs, - .scas, - .stos, - => try lower.mirString(inst), - - .cmpxchgb => try lower.mirCmpxchgBytes(inst), - - .jmp_reloc => try lower.emit(.none, .jmp, &.{.{ .imm = Immediate.s(0) }}), - - .call_extern => try lower.emit(.none, .call, &.{.{ .imm = Immediate.s(0) }}), - - .lea_linker => try lower.mirLeaLinker(inst), - .mov_linker => try lower.mirMovLinker(inst), - - .mov_moffs => try lower.mirMovMoffs(inst), - - .movsx => try lower.mirMovsx(inst), - .cmovcc => try lower.mirCmovcc(inst), - .setcc => try lower.mirSetcc(inst), - .jcc => try lower.emit(.none, mnem_cc(.j, inst.data.inst_cc.cc), &.{.{ .imm = Immediate.s(0) }}), + else => try lower.generic(inst), + .pseudo => switch (inst.ops) { + .pseudo_cmov_z_and_np_rr => { + try lower.emit(.none, .cmovnz, &.{ + .{ .reg = inst.data.rr.r2 }, + .{ .reg = inst.data.rr.r1 }, + }); + try lower.emit(.none, .cmovnp, &.{ + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, + }); + }, + .pseudo_cmov_nz_or_p_rr => { + try lower.emit(.none, .cmovnz, &.{ + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, + }); + try lower.emit(.none, .cmovp, &.{ + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, + }); + }, + .pseudo_cmov_nz_or_p_rm_sib, + .pseudo_cmov_nz_or_p_rm_rip, + => { + try lower.emit(.none, .cmovnz, &.{ + .{ .reg = inst.data.rx.r1 }, + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + }); + try lower.emit(.none, .cmovp, &.{ + .{ .reg = inst.data.rx.r1 }, + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + }); + }, + .pseudo_set_z_and_np_r => { + try lower.emit(.none, .setz, &.{ + .{ .reg = inst.data.r_scratch.r1 }, + }); + try lower.emit(.none, .setnp, &.{ + .{ .reg = inst.data.r_scratch.scratch_reg }, + }); + try lower.emit(.none, .@"and", &.{ + .{ .reg = inst.data.r_scratch.r1 }, + .{ .reg = inst.data.r_scratch.scratch_reg }, + }); + }, + .pseudo_set_z_and_np_m_sib, + .pseudo_set_z_and_np_m_rip, + => { + try lower.emit(.none, .setz, &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, + }); + try lower.emit(.none, .setnp, &.{ + .{ .reg = inst.data.x_scratch.scratch_reg }, + }); + try lower.emit(.none, .@"and", &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, + .{ .reg = inst.data.x_scratch.scratch_reg }, + }); + }, + .pseudo_set_nz_or_p_r => { + try lower.emit(.none, .setnz, &.{ + .{ .reg = inst.data.r_scratch.r1 }, + }); + try lower.emit(.none, .setp, &.{ + .{ .reg = inst.data.r_scratch.scratch_reg }, + }); + try lower.emit(.none, .@"or", &.{ + .{ .reg = inst.data.r_scratch.r1 }, + .{ .reg = inst.data.r_scratch.scratch_reg }, + }); + }, + .pseudo_set_nz_or_p_m_sib, + .pseudo_set_nz_or_p_m_rip, + => { + try lower.emit(.none, .setnz, &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, + }); + try lower.emit(.none, .setp, &.{ + .{ .reg = inst.data.x_scratch.scratch_reg }, + }); + try lower.emit(.none, .@"or", &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, + .{ .reg = inst.data.x_scratch.scratch_reg }, + }); + }, + .pseudo_j_z_and_np_inst => { + try lower.emit(.none, .jnz, &.{ + .{ .imm = lower.reloc(.{ .inst = index + 1 }) }, + }); + try lower.emit(.none, .jnp, &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }); + }, + .pseudo_j_nz_or_p_inst => { + try lower.emit(.none, .jnz, &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }); + try lower.emit(.none, .jp, &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }); + }, - .push_regs => try lower.mirPushPopRegisterList(inst, .push), - .pop_regs => try lower.mirPushPopRegisterList(inst, .pop), + .pseudo_push_reg_list => try lower.pushPopRegList(.push, inst), + .pseudo_pop_reg_list => try lower.pushPopRegList(.pop, inst), - .dbg_line, - .dbg_prologue_end, - .dbg_epilogue_begin, - .dead, - => {}, + .pseudo_dbg_prologue_end_none, + .pseudo_dbg_line_line_column, + .pseudo_dbg_epilogue_begin_none, + .pseudo_dead_none, + => {}, + else => unreachable, + }, } - return lower.result[0..lower.result_len]; + return .{ + .insts = lower.result_insts[0..lower.result_insts_len], + .relocs = lower.result_relocs[0..lower.result_relocs_len], + }; } pub fn fail(lower: *Lower, comptime format: []const u8, args: anytype) Error { @@ -178,12 +187,6 @@ pub fn fail(lower: *Lower, comptime format: []const u8, args: anytype) Error { return error.LowerFail; } -fn mnem_cc(comptime base: @Type(.EnumLiteral), cc: bits.Condition) Mnemonic { - return switch (cc) { - inline else => |c| @field(Mnemonic, @tagName(base) ++ @tagName(c)), - }; -} - fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { return switch (ops) { .rri_s, @@ -191,21 +194,22 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .i_s, .mi_sib_s, .mi_rip_s, - .lock_mi_sib_s, - .lock_mi_rip_s, => Immediate.s(@bitCast(i32, i)), + .rrri, .rri_u, .ri_u, .i_u, .mi_sib_u, .mi_rip_u, - .lock_mi_sib_u, - .lock_mi_rip_u, .rmi_sib, .rmi_rip, .mri_sib, .mri_rip, + .rrm_sib, + .rrm_rip, + .rrmi_sib, + .rrmi_rip, => Immediate.u(i), .ri64 => Immediate.u(lower.mir.extraData(Mir.Imm64, i).data.decode()), @@ -217,76 +221,108 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { return lower.mir.resolveFrameLoc(switch (ops) { .rm_sib, - .rm_sib_cc, .rmi_sib, .m_sib, - .m_sib_cc, .mi_sib_u, .mi_sib_s, .mr_sib, .mrr_sib, .mri_sib, - .lock_m_sib, - .lock_mi_sib_u, - .lock_mi_sib_s, - .lock_mr_sib, + .rrm_sib, + .rrmi_sib, + + .pseudo_cmov_nz_or_p_rm_sib, + .pseudo_set_z_and_np_m_sib, + .pseudo_set_nz_or_p_m_sib, => lower.mir.extraData(Mir.MemorySib, payload).data.decode(), .rm_rip, - .rm_rip_cc, .rmi_rip, .m_rip, - .m_rip_cc, .mi_rip_u, .mi_rip_s, .mr_rip, .mrr_rip, .mri_rip, - .lock_m_rip, - .lock_mi_rip_u, - .lock_mi_rip_s, - .lock_mr_rip, + .rrm_rip, + .rrmi_rip, + + .pseudo_cmov_nz_or_p_rm_rip, + .pseudo_set_z_and_np_m_rip, + .pseudo_set_nz_or_p_m_rip, => lower.mir.extraData(Mir.MemoryRip, payload).data.decode(), .rax_moffs, .moffs_rax, - .lock_moffs_rax, => lower.mir.extraData(Mir.MemoryMoffs, payload).data.decode(), else => unreachable, }); } +fn reloc(lower: *Lower, target: Reloc.Target) Immediate { + lower.result_relocs[lower.result_relocs_len] = .{ + .lowered_inst_index = lower.result_insts_len, + .target = target, + }; + lower.result_relocs_len += 1; + return Immediate.s(0); +} + fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) Error!void { - lower.result[lower.result_len] = try Instruction.new(prefix, mnemonic, ops); - lower.result_len += 1; + lower.result_insts[lower.result_insts_len] = try Instruction.new(prefix, mnemonic, ops); + lower.result_insts_len += 1; } -fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { - try lower.emit(switch (inst.ops) { - else => .none, - .lock_m_sib, - .lock_m_rip, - .lock_mi_sib_u, - .lock_mi_rip_u, - .lock_mi_sib_s, - .lock_mi_rip_s, - .lock_mr_sib, - .lock_mr_rip, - .lock_moffs_rax, - => .lock, - }, switch (inst.tag) { - inline else => |tag| if (@hasField(Mnemonic, @tagName(tag))) - @field(Mnemonic, @tagName(tag)) +fn generic(lower: *Lower, inst: Mir.Inst) Error!void { + const fixes = switch (inst.ops) { + .none => inst.data.none.fixes, + .inst => inst.data.inst.fixes, + .i_s, .i_u => inst.data.i.fixes, + .r => inst.data.r.fixes, + .rr => inst.data.rr.fixes, + .rrr => inst.data.rrr.fixes, + .rrri => inst.data.rrri.fixes, + .rri_s, .rri_u => inst.data.rri.fixes, + .ri_s, .ri_u => inst.data.ri.fixes, + .ri64, .rm_sib, .rm_rip, .mr_sib, .mr_rip => inst.data.rx.fixes, + .mrr_sib, .mrr_rip, .rrm_sib, .rrm_rip => inst.data.rrx.fixes, + .rmi_sib, .rmi_rip, .mri_sib, .mri_rip => inst.data.rix.fixes, + .rrmi_sib, .rrmi_rip => inst.data.rrix.fixes, + .mi_sib_u, .mi_rip_u, .mi_sib_s, .mi_rip_s => inst.data.x.fixes, + .m_sib, .m_rip, .rax_moffs, .moffs_rax => inst.data.x.fixes, + .extern_fn_reloc, .got_reloc, .direct_reloc, .import_reloc, .tlv_reloc => ._, + else => return lower.fail("TODO lower .{s}", .{@tagName(inst.ops)}), + }; + try lower.emit(switch (fixes) { + inline else => |tag| comptime if (std.mem.indexOfScalar(u8, @tagName(tag), ' ')) |space| + @field(Prefix, @tagName(tag)[0..space]) else - unreachable, + .none, + }, mnemonic: { + comptime var max_len = 0; + inline for (@typeInfo(Mnemonic).Enum.fields) |field| max_len = @max(field.name.len, max_len); + var buf: [max_len]u8 = undefined; + + const fixes_name = @tagName(fixes); + const pattern = fixes_name[if (std.mem.indexOfScalar(u8, fixes_name, ' ')) |i| i + 1 else 0..]; + const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?; + const parts = .{ pattern[0..wildcard_i], @tagName(inst.tag), pattern[wildcard_i + 1 ..] }; + const err_msg = "unsupported mnemonic: "; + const mnemonic = std.fmt.bufPrint(&buf, "{s}{s}{s}", parts) catch + return lower.fail(err_msg ++ "'{s}{s}{s}'", parts); + break :mnemonic std.meta.stringToEnum(Mnemonic, mnemonic) orelse + return lower.fail(err_msg ++ "'{s}'", .{mnemonic}); }, switch (inst.ops) { .none => &.{}, + .inst => &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }, .i_s, .i_u => &.{ - .{ .imm = lower.imm(inst.ops, inst.data.i) }, + .{ .imm = lower.imm(inst.ops, inst.data.i.i) }, }, .r => &.{ - .{ .reg = inst.data.r }, + .{ .reg = inst.data.r.r1 }, }, .rr => &.{ .{ .reg = inst.data.rr.r1 }, @@ -297,12 +333,18 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rrr.r2 }, .{ .reg = inst.data.rrr.r3 }, }, + .rrri => &.{ + .{ .reg = inst.data.rrri.r1 }, + .{ .reg = inst.data.rrri.r2 }, + .{ .reg = inst.data.rrri.r3 }, + .{ .imm = lower.imm(inst.ops, inst.data.rrri.i) }, + }, .ri_s, .ri_u => &.{ - .{ .reg = inst.data.ri.r }, + .{ .reg = inst.data.ri.r1 }, .{ .imm = lower.imm(inst.ops, inst.data.ri.i) }, }, .ri64 => &.{ - .{ .reg = inst.data.rx.r }, + .{ .reg = inst.data.rx.r1 }, .{ .imm = lower.imm(inst.ops, inst.data.rx.payload) }, }, .rri_s, .rri_u => &.{ @@ -310,33 +352,28 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rri.r2 }, .{ .imm = lower.imm(inst.ops, inst.data.rri.i) }, }, - .m_sib, .lock_m_sib, .m_rip, .lock_m_rip => &.{ - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, + .m_sib, .m_rip => &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x.payload) }, }, - .mi_sib_s, - .lock_mi_sib_s, - .mi_sib_u, - .lock_mi_sib_u, - .mi_rip_u, - .lock_mi_rip_u, - .mi_rip_s, - .lock_mi_rip_s, - => &.{ - .{ .mem = lower.mem(inst.ops, inst.data.ix.payload) }, - .{ .imm = lower.imm(inst.ops, inst.data.ix.i) }, + .mi_sib_s, .mi_sib_u, .mi_rip_u, .mi_rip_s => &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x.payload + 1) }, + .{ .imm = lower.imm( + inst.ops, + lower.mir.extraData(Mir.Imm32, inst.data.x.payload).data.imm, + ) }, }, .rm_sib, .rm_rip => &.{ - .{ .reg = inst.data.rx.r }, + .{ .reg = inst.data.rx.r1 }, .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, }, .rmi_sib, .rmi_rip => &.{ - .{ .reg = inst.data.rix.r }, + .{ .reg = inst.data.rix.r1 }, .{ .mem = lower.mem(inst.ops, inst.data.rix.payload) }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, }, - .mr_sib, .lock_mr_sib, .mr_rip, .lock_mr_rip => &.{ + .mr_sib, .mr_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, - .{ .reg = inst.data.rx.r }, + .{ .reg = inst.data.rx.r1 }, }, .mrr_sib, .mrr_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.rrx.payload) }, @@ -345,137 +382,60 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { }, .mri_sib, .mri_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.rix.payload) }, - .{ .reg = inst.data.rix.r }, + .{ .reg = inst.data.rix.r1 }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, }, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }); -} - -fn mirString(lower: *Lower, inst: Mir.Inst) Error!void { - switch (inst.ops) { - .string => try lower.emit(switch (inst.data.string.repeat) { - inline else => |repeat| @field(Prefix, @tagName(repeat)), - }, switch (inst.tag) { - inline .cmps, .lods, .movs, .scas, .stos => |tag| switch (inst.data.string.width) { - inline else => |width| @field(Mnemonic, @tagName(tag) ++ @tagName(width)), - }, - else => unreachable, - }, &.{}), - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - } -} - -fn mirCmpxchgBytes(lower: *Lower, inst: Mir.Inst) Error!void { - const ops: [1]Operand = switch (inst.ops) { - .m_sib, .lock_m_sib, .m_rip, .lock_m_rip => .{ - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, + .rrm_sib, .rrm_rip => &.{ + .{ .reg = inst.data.rrx.r1 }, + .{ .reg = inst.data.rrx.r2 }, + .{ .mem = lower.mem(inst.ops, inst.data.rrx.payload) }, + }, + .rrmi_sib, .rrmi_rip => &.{ + .{ .reg = inst.data.rrix.r1 }, + .{ .reg = inst.data.rrix.r2 }, + .{ .mem = lower.mem(inst.ops, inst.data.rrix.payload) }, + .{ .imm = lower.imm(inst.ops, inst.data.rrix.i) }, }, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }; - try lower.emit(switch (inst.ops) { - .m_sib, .m_rip => .none, - .lock_m_sib, .lock_m_rip => .lock, - else => unreachable, - }, switch (@divExact(ops[0].bitSize(), 8)) { - 8 => .cmpxchg8b, - 16 => .cmpxchg16b, - else => return lower.fail("invalid operand for {s}", .{@tagName(inst.tag)}), - }, &ops); -} - -fn mirMovMoffs(lower: *Lower, inst: Mir.Inst) Error!void { - try lower.emit(switch (inst.ops) { - .rax_moffs, .moffs_rax => .none, - .lock_moffs_rax => .lock, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }, .mov, switch (inst.ops) { .rax_moffs => &.{ .{ .reg = .rax }, - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, + .{ .mem = lower.mem(inst.ops, inst.data.x.payload) }, }, - .moffs_rax, .lock_moffs_rax => &.{ - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, + .moffs_rax => &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x.payload) }, .{ .reg = .rax }, }, - else => unreachable, - }); -} - -fn mirMovsx(lower: *Lower, inst: Mir.Inst) Error!void { - const ops: [2]Operand = switch (inst.ops) { - .rr => .{ - .{ .reg = inst.data.rr.r1 }, - .{ .reg = inst.data.rr.r2 }, - }, - .rm_sib, .rm_rip => .{ - .{ .reg = inst.data.rx.r }, - .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + .extern_fn_reloc => &.{ + .{ .imm = lower.reloc(.{ .linker_extern_fn = inst.data.reloc }) }, }, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }; - try lower.emit(.none, switch (ops[0].bitSize()) { - 32, 64 => switch (ops[1].bitSize()) { - 32 => .movsxd, - else => .movsx, + .got_reloc, .direct_reloc, .import_reloc, .tlv_reloc => ops: { + const reg = inst.data.rx.r1; + const extra = lower.mir.extraData(Mir.Reloc, inst.data.rx.payload).data; + _ = lower.reloc(switch (inst.ops) { + .got_reloc => .{ .linker_got = extra }, + .direct_reloc => .{ .linker_direct = extra }, + .import_reloc => .{ .linker_import = extra }, + .tlv_reloc => .{ .linker_tlv = extra }, + else => unreachable, + }); + break :ops &.{ + .{ .reg = reg }, + .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(reg.bitSize()), 0) }, + }; }, - else => .movsx, - }, &ops); -} - -fn mirCmovcc(lower: *Lower, inst: Mir.Inst) Error!void { - switch (inst.ops) { - .rr_cc => try lower.emit(.none, mnem_cc(.cmov, inst.data.rr_cc.cc), &.{ - .{ .reg = inst.data.rr_cc.r1 }, - .{ .reg = inst.data.rr_cc.r2 }, - }), - .rm_sib_cc, .rm_rip_cc => try lower.emit(.none, mnem_cc(.cmov, inst.data.rx_cc.cc), &.{ - .{ .reg = inst.data.rx_cc.r }, - .{ .mem = lower.mem(inst.ops, inst.data.rx_cc.payload) }, - }), else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - } -} - -fn mirSetcc(lower: *Lower, inst: Mir.Inst) Error!void { - switch (inst.ops) { - .r_cc => try lower.emit(.none, mnem_cc(.set, inst.data.r_cc.cc), &.{ - .{ .reg = inst.data.r_cc.r }, - }), - .m_sib_cc, .m_rip_cc => try lower.emit(.none, mnem_cc(.set, inst.data.x_cc.cc), &.{ - .{ .mem = lower.mem(inst.ops, inst.data.x_cc.payload) }, - }), - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - } + }); } -fn mirPushPopRegisterList(lower: *Lower, inst: Mir.Inst, comptime mnemonic: Mnemonic) Error!void { - const reg_list = Mir.RegisterList.fromInt(inst.data.payload); +fn pushPopRegList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Error!void { const callee_preserved_regs = abi.getCalleePreservedRegs(lower.target.*); - var it = reg_list.iterator(.{ .direction = switch (mnemonic) { + var it = inst.data.reg_list.iterator(.{ .direction = switch (mnemonic) { .push => .reverse, .pop => .forward, else => unreachable, } }); - while (it.next()) |i| try lower.emit(.none, mnemonic, &.{.{ .reg = callee_preserved_regs[i] }}); -} - -fn mirLeaLinker(lower: *Lower, inst: Mir.Inst) Error!void { - const metadata = lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const reg = @intToEnum(Register, metadata.reg); - try lower.emit(.none, .lea, &.{ - .{ .reg = reg }, - .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(reg.bitSize()), 0) }, - }); -} - -fn mirMovLinker(lower: *Lower, inst: Mir.Inst) Error!void { - const metadata = lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const reg = @intToEnum(Register, metadata.reg); - try lower.emit(.none, .mov, &.{ - .{ .reg = reg }, - .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(reg.bitSize()), 0) }, - }); + while (it.next()) |i| try lower.emit(.none, mnemonic, &.{.{ + .reg = callee_preserved_regs[i], + }}); } const abi = @import("abi.zig"); diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 6b2db1b696..0a7b5597b3 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -32,12 +32,260 @@ pub const Inst = struct { pub const Index = u32; + pub const Fixes = enum(u8) { + /// ___ + @"_", + + /// Integer __ + i_, + + /// ___ Left + _l, + /// ___ Left Double + _ld, + /// ___ Right + _r, + /// ___ Right Double + _rd, + + /// ___ Above + _a, + /// ___ Above Or Equal + _ae, + /// ___ Below + _b, + /// ___ Below Or Equal + _be, + /// ___ Carry + _c, + /// ___ Equal + _e, + /// ___ Greater + _g, + /// ___ Greater Or Equal + _ge, + /// ___ Less + //_l, + /// ___ Less Or Equal + _le, + /// ___ Not Above + _na, + /// ___ Not Above Or Equal + _nae, + /// ___ Not Below + _nb, + /// ___ Not Below Or Equal + _nbe, + /// ___ Not Carry + _nc, + /// ___ Not Equal + _ne, + /// ___ Not Greater + _ng, + /// ___ Not Greater Or Equal + _nge, + /// ___ Not Less + _nl, + /// ___ Not Less Or Equal + _nle, + /// ___ Not Overflow + _no, + /// ___ Not Parity + _np, + /// ___ Not Sign + _ns, + /// ___ Not Zero + _nz, + /// ___ Overflow + _o, + /// ___ Parity + _p, + /// ___ Parity Even + _pe, + /// ___ Parity Odd + _po, + /// ___ Sign + _s, + /// ___ Zero + _z, + + /// ___ Byte + //_b, + /// ___ Word + _w, + /// ___ Doubleword + _d, + /// ___ QuadWord + _q, + + /// ___ String + //_s, + /// ___ String Byte + _sb, + /// ___ String Word + _sw, + /// ___ String Doubleword + _sd, + /// ___ String Quadword + _sq, + + /// Repeat ___ String + @"rep _s", + /// Repeat ___ String Byte + @"rep _sb", + /// Repeat ___ String Word + @"rep _sw", + /// Repeat ___ String Doubleword + @"rep _sd", + /// Repeat ___ String Quadword + @"rep _sq", + + /// Repeat Equal ___ String + @"repe _s", + /// Repeat Equal ___ String Byte + @"repe _sb", + /// Repeat Equal ___ String Word + @"repe _sw", + /// Repeat Equal ___ String Doubleword + @"repe _sd", + /// Repeat Equal ___ String Quadword + @"repe _sq", + + /// Repeat Not Equal ___ String + @"repne _s", + /// Repeat Not Equal ___ String Byte + @"repne _sb", + /// Repeat Not Equal ___ String Word + @"repne _sw", + /// Repeat Not Equal ___ String Doubleword + @"repne _sd", + /// Repeat Not Equal ___ String Quadword + @"repne _sq", + + /// Repeat Not Zero ___ String + @"repnz _s", + /// Repeat Not Zero ___ String Byte + @"repnz _sb", + /// Repeat Not Zero ___ String Word + @"repnz _sw", + /// Repeat Not Zero ___ String Doubleword + @"repnz _sd", + /// Repeat Not Zero ___ String Quadword + @"repnz _sq", + + /// Repeat Zero ___ String + @"repz _s", + /// Repeat Zero ___ String Byte + @"repz _sb", + /// Repeat Zero ___ String Word + @"repz _sw", + /// Repeat Zero ___ String Doubleword + @"repz _sd", + /// Repeat Zero ___ String Quadword + @"repz _sq", + + /// Locked ___ + @"lock _", + /// ___ And Complement + //_c, + /// Locked ___ And Complement + @"lock _c", + /// ___ And Reset + //_r, + /// Locked ___ And Reset + @"lock _r", + /// ___ And Set + //_s, + /// Locked ___ And Set + @"lock _s", + /// ___ 8 Bytes + _8b, + /// Locked ___ 8 Bytes + @"lock _8b", + /// ___ 16 Bytes + _16b, + /// Locked ___ 16 Bytes + @"lock _16b", + + /// Float ___ + f_, + /// Float ___ Pop + f_p, + + /// Packed ___ + p_, + /// Packed ___ Byte + p_b, + /// Packed ___ Word + p_w, + /// Packed ___ Doubleword + p_d, + /// Packed ___ Quadword + p_q, + /// Packed ___ Double Quadword + p_dq, + + /// ___ Scalar Single-Precision Values + _ss, + /// ___ Packed Single-Precision Values + _ps, + /// ___ Scalar Double-Precision Values + //_sd, + /// ___ Packed Double-Precision Values + _pd, + + /// VEX-Encoded ___ + v_, + /// VEX-Encoded Packed ___ + vp_, + /// VEX-Encoded Packed ___ Byte + vp_b, + /// VEX-Encoded Packed ___ Word + vp_w, + /// VEX-Encoded Packed ___ Doubleword + vp_d, + /// VEX-Encoded Packed ___ Quadword + vp_q, + /// VEX-Encoded Packed ___ Double Quadword + vp_dq, + /// VEX-Encoded ___ Scalar Single-Precision Values + v_ss, + /// VEX-Encoded ___ Packed Single-Precision Values + v_ps, + /// VEX-Encoded ___ Scalar Double-Precision Values + v_sd, + /// VEX-Encoded ___ Packed Double-Precision Values + v_pd, + + /// Mask ___ Byte + k_b, + /// Mask ___ Word + k_w, + /// Mask ___ Doubleword + k_d, + /// Mask ___ Quadword + k_q, + + pub fn fromCondition(cc: bits.Condition) Fixes { + return switch (cc) { + inline else => |cc_tag| @field(Fixes, "_" ++ @tagName(cc_tag)), + .z_and_np, .nz_or_p => unreachable, + }; + } + }; + pub const Tag = enum(u8) { /// Add with carry adc, /// Add + /// Add packed single-precision floating-point values + /// Add scalar single-precision floating-point values + /// Add packed double-precision floating-point values + /// Add scalar double-precision floating-point values add, /// Logical and + /// Bitwise logical and of packed single-precision floating-point values + /// Bitwise logical and of packed double-precision floating-point values @"and", /// Bit scan forward bsf, @@ -46,49 +294,55 @@ pub const Inst = struct { /// Byte swap bswap, /// Bit test - bt, /// Bit test and complement - btc, /// Bit test and reset - btr, /// Bit test and set - bts, + bt, /// Call call, /// Convert byte to word cbw, - /// Convert word to doubleword - cwde, - /// Convert doubleword to quadword - cdqe, - /// Convert word to doubleword - cwd, /// Convert doubleword to quadword cdq, /// Convert doubleword to quadword - cqo, + cdqe, + /// Conditional move + cmov, /// Logical compare + /// Compare string + /// Compare scalar single-precision floating-point values + /// Compare scalar double-precision floating-point values cmp, /// Compare and exchange - cmpxchg, /// Compare and exchange bytes - cmpxchgb, + cmpxchg, + /// Convert doubleword to quadword + cqo, + /// Convert word to doubleword + cwd, + /// Convert word to doubleword + cwde, /// Unsigned division - div, - /// Store integer with truncation - fisttp, - /// Load floating-point value - fld, /// Signed division - idiv, - /// Signed multiplication - imul, + /// Divide packed single-precision floating-point values + /// Divide scalar single-precision floating-point values + /// Divide packed double-precision floating-point values + /// Divide scalar double-precision floating-point values + div, /// int3, + /// Store integer with truncation + istt, + /// Conditional jump + j, /// Jump jmp, + /// Load floating-point value + ld, /// Load effective address lea, + /// Load string + lod, /// Load fence lfence, /// Count the number of leading zero bits @@ -96,18 +350,24 @@ pub const Inst = struct { /// Memory fence mfence, /// Move + /// Move data from string to string + /// Move scalar single-precision floating-point value + /// Move scalar double-precision floating-point value + /// Move doubleword + /// Move quadword mov, /// Move data after swapping bytes movbe, - /// Move doubleword - movd, - /// Move quadword - movq, /// Move with sign extension movsx, /// Move with zero extension movzx, /// Multiply + /// Signed multiplication + /// Multiply packed single-precision floating-point values + /// Multiply scalar single-precision floating-point values + /// Multiply packed double-precision floating-point values + /// Multiply scalar double-precision floating-point values mul, /// Two's complement negation neg, @@ -116,6 +376,8 @@ pub const Inst = struct { /// One's complement negation not, /// Logical or + /// Bitwise logical or of packed single-precision floating-point values + /// Bitwise logical or of packed double-precision floating-point values @"or", /// Pop pop, @@ -124,33 +386,37 @@ pub const Inst = struct { /// Push push, /// Rotate left through carry - rcl, /// Rotate right through carry - rcr, + rc, /// Return ret, /// Rotate left - rol, /// Rotate right - ror, + ro, /// Arithmetic shift left - sal, /// Arithmetic shift right - sar, + sa, /// Integer subtraction with borrow sbb, + /// Scan string + sca, + /// Set byte on condition + set, /// Store fence sfence, /// Logical shift left - shl, /// Double precision shift left - shld, /// Logical shift right - shr, /// Double precision shift right - shrd, + sh, /// Subtract + /// Subtract packed single-precision floating-point values + /// Subtract scalar single-precision floating-point values + /// Subtract packed double-precision floating-point values + /// Subtract scalar double-precision floating-point values sub, + /// Store string + sto, /// Syscall syscall, /// Test condition @@ -164,142 +430,131 @@ pub const Inst = struct { /// Exchange register/memory with register xchg, /// Logical exclusive-or + /// Bitwise logical xor of packed single-precision floating-point values + /// Bitwise logical xor of packed double-precision floating-point values xor, - /// Add single precision floating point values - addss, - /// Bitwise logical and of packed single precision floating-point values - andps, - /// Bitwise logical and not of packed single precision floating-point values - andnps, - /// Compare scalar single-precision floating-point values - cmpss, + /// Bitwise logical and not of packed single-precision floating-point values + /// Bitwise logical and not of packed double-precision floating-point values + andn, /// Convert doubleword integer to scalar single-precision floating-point value cvtsi2ss, - /// Divide scalar single-precision floating-point values - divss, - /// Return maximum single-precision floating-point value - maxss, - /// Return minimum single-precision floating-point value - minss, + /// Maximum of packed single-precision floating-point values + /// Maximum of scalar single-precision floating-point values + /// Maximum of packed double-precision floating-point values + /// Maximum of scalar double-precision floating-point values + max, + /// Minimum of packed single-precision floating-point values + /// Minimum of scalar single-precision floating-point values + /// Minimum of packed double-precision floating-point values + /// Minimum of scalar double-precision floating-point values + min, /// Move aligned packed single-precision floating-point values - movaps, - /// Move scalar single-precision floating-point value - movss, + /// Move aligned packed double-precision floating-point values + mova, + /// Move packed single-precision floating-point values high to low + movhl, /// Move unaligned packed single-precision floating-point values - movups, - /// Multiply scalar single-precision floating-point values - mulss, - /// Bitwise logical or of packed single precision floating-point values - orps, + /// Move unaligned packed double-precision floating-point values + movu, + /// Extract byte /// Extract word - pextrw, + /// Extract doubleword + /// Extract quadword + extr, + /// Insert byte /// Insert word - pinsrw, - /// Round scalar single-precision floating-point values - roundss, - /// Square root of scalar single precision floating-point value - sqrtps, - /// Subtract scalar single-precision floating-point values - sqrtss, - /// Square root of single precision floating-point values - subss, + /// Insert doubleword + /// Insert quadword + insr, + /// Square root of packed single-precision floating-point values + /// Square root of scalar single-precision floating-point value + /// Square root of packed double-precision floating-point values + /// Square root of scalar double-precision floating-point value + sqrt, /// Unordered compare scalar single-precision floating-point values - ucomiss, - /// Bitwise logical xor of packed single precision floating-point values - xorps, - /// Add double precision floating point values - addsd, - /// Bitwise logical and not of packed double precision floating-point values - andnpd, - /// Bitwise logical and of packed double precision floating-point values - andpd, - /// Compare scalar double-precision floating-point values - cmpsd, + /// Unordered compare scalar double-precision floating-point values + ucomi, + /// Unpack and interleave high packed single-precision floating-point values + /// Unpack and interleave high packed double-precision floating-point values + unpckh, + /// Unpack and interleave low packed single-precision floating-point values + /// Unpack and interleave low packed double-precision floating-point values + unpckl, + /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value cvtsd2ss, /// Convert doubleword integer to scalar double-precision floating-point value cvtsi2sd, /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value cvtss2sd, - /// Divide scalar double-precision floating-point values - divsd, - /// Return maximum double-precision floating-point value - maxsd, - /// Return minimum double-precision floating-point value - minsd, - /// Move scalar double-precision floating-point value - movsd, - /// Multiply scalar double-precision floating-point values - mulsd, - /// Bitwise logical or of packed double precision floating-point values - orpd, - /// Round scalar double-precision floating-point values - roundsd, - /// Square root of double precision floating-point values - sqrtpd, - /// Square root of scalar double precision floating-point value - sqrtsd, - /// Subtract scalar double-precision floating-point values - subsd, - /// Unordered compare scalar double-precision floating-point values - ucomisd, - /// Bitwise logical xor of packed double precision floating-point values - xorpd, + /// Shuffle packed high words + shufh, + /// Shuffle packed low words + shufl, + /// Shift packed data right logical + /// Shift packed data right logical + /// Shift packed data right logical + srl, + /// Unpack high data + unpckhbw, + /// Unpack high data + unpckhdq, + /// Unpack high data + unpckhqdq, + /// Unpack high data + unpckhwd, + /// Unpack low data + unpcklbw, + /// Unpack low data + unpckldq, + /// Unpack low data + unpcklqdq, + /// Unpack low data + unpcklwd, - /// Compare string operands - cmps, - /// Load string - lods, - /// Move data from string to string - movs, - /// Scan string - scas, - /// Store string - stos, - - /// Conditional move - cmovcc, - /// Conditional jump - jcc, - /// Set byte on condition - setcc, - - /// Mov absolute to/from memory wrt segment register to/from rax - mov_moffs, - - /// Jump with relocation to another local MIR instruction - /// Uses `inst` payload. - jmp_reloc, + /// Replicate double floating-point values + movddup, + /// Replicate single floating-point values + movshdup, + /// Replicate single floating-point values + movsldup, - /// Call to an extern symbol via linker relocation. - /// Uses `relocation` payload. - call_extern, + /// Round packed single-precision floating-point values + /// Round scalar single-precision floating-point value + /// Round packed double-precision floating-point values + /// Round scalar double-precision floating-point value + round, - /// Load effective address of a symbol not yet allocated in VM. - lea_linker, - /// Move address of a symbol not yet allocated in VM. - mov_linker, + /// Convert 16-bit floating-point values to single-precision floating-point values + cvtph2ps, + /// Convert single-precision floating-point values to 16-bit floating-point values + cvtps2ph, - /// End of prologue - dbg_prologue_end, - /// Start of epilogue - dbg_epilogue_begin, - /// Update debug line - /// Uses `line_column` payload containing the line and column. - dbg_line, - /// Push registers - /// Uses `payload` payload containing `RegisterList.asInt` directly. - push_regs, - /// Pop registers - /// Uses `payload` payload containing `RegisterList.asInt` directly. - pop_regs, + /// Fused multiply-add of packed single-precision floating-point values + /// Fused multiply-add of scalar single-precision floating-point values + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd132, + /// Fused multiply-add of packed single-precision floating-point values + /// Fused multiply-add of scalar single-precision floating-point values + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd213, + /// Fused multiply-add of packed single-precision floating-point values + /// Fused multiply-add of scalar single-precision floating-point values + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd231, - /// Tombstone - /// Emitter should skip this instruction. - dead, + /// A pseudo instruction that requires special lowering. + /// This should be the only tag in this enum that doesn't + /// directly correspond to one or more instruction mnemonics. + pseudo, }; + pub const FixedTag = struct { Fixes, Tag }; + pub const Ops = enum(u8) { /// No data associated with this instruction (only mnemonic is used). none, @@ -312,18 +567,15 @@ pub const Inst = struct { /// Register, register, register operands. /// Uses `rrr` payload. rrr, + /// Register, register, register, immediate (byte) operands. + /// Uses `rrri` payload. + rrri, /// Register, register, immediate (sign-extended) operands. /// Uses `rri` payload. rri_s, /// Register, register, immediate (unsigned) operands. /// Uses `rri` payload. rri_u, - /// Register with condition code (CC). - /// Uses `r_cc` payload. - r_cc, - /// Register, register with condition code (CC). - /// Uses `rr_cc` payload. - rr_cc, /// Register, immediate (sign-extended) operands. /// Uses `ri` payload. ri_s, @@ -348,41 +600,41 @@ pub const Inst = struct { /// Register, memory (RIP) operands. /// Uses `rx` payload. rm_rip, - /// Register, memory (SIB) operands with condition code (CC). - /// Uses `rx_cc` payload. - rm_sib_cc, - /// Register, memory (RIP) operands with condition code (CC). - /// Uses `rx_cc` payload. - rm_rip_cc, /// Register, memory (SIB), immediate (byte) operands. /// Uses `rix` payload with extra data of type `MemorySib`. rmi_sib, + /// Register, register, memory (RIP). + /// Uses `rrix` payload with extra data of type `MemoryRip`. + rrm_rip, + /// Register, register, memory (SIB). + /// Uses `rrix` payload with extra data of type `MemorySib`. + rrm_sib, + /// Register, register, memory (RIP), immediate (byte) operands. + /// Uses `rrix` payload with extra data of type `MemoryRip`. + rrmi_rip, + /// Register, register, memory (SIB), immediate (byte) operands. + /// Uses `rrix` payload with extra data of type `MemorySib`. + rrmi_sib, /// Register, memory (RIP), immediate (byte) operands. /// Uses `rix` payload with extra data of type `MemoryRip`. rmi_rip, /// Single memory (SIB) operand. - /// Uses `payload` with extra data of type `MemorySib`. + /// Uses `x` with extra data of type `MemorySib`. m_sib, /// Single memory (RIP) operand. - /// Uses `payload` with extra data of type `MemoryRip`. + /// Uses `x` with extra data of type `MemoryRip`. m_rip, - /// Single memory (SIB) operand with condition code (CC). - /// Uses `x_cc` with extra data of type `MemorySib`. - m_sib_cc, - /// Single memory (RIP) operand with condition code (CC). - /// Uses `x_cc` with extra data of type `MemoryRip`. - m_rip_cc, /// Memory (SIB), immediate (unsigned) operands. - /// Uses `ix` payload with extra data of type `MemorySib`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`. mi_sib_u, /// Memory (RIP), immediate (unsigned) operands. - /// Uses `ix` payload with extra data of type `MemoryRip`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`. mi_rip_u, /// Memory (SIB), immediate (sign-extend) operands. - /// Uses `ix` payload with extra data of type `MemorySib`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`. mi_sib_s, /// Memory (RIP), immediate (sign-extend) operands. - /// Uses `ix` payload with extra data of type `MemoryRip`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`. mi_rip_s, /// Memory (SIB), register operands. /// Uses `rx` payload with extra data of type `MemorySib`. @@ -403,161 +655,200 @@ pub const Inst = struct { /// Uses `rix` payload with extra data of type `MemoryRip`. mri_rip, /// Rax, Memory moffs. - /// Uses `payload` with extra data of type `MemoryMoffs`. + /// Uses `x` with extra data of type `MemoryMoffs`. rax_moffs, /// Memory moffs, rax. - /// Uses `payload` with extra data of type `MemoryMoffs`. + /// Uses `x` with extra data of type `MemoryMoffs`. moffs_rax, - /// Single memory (SIB) operand with lock prefix. - /// Uses `payload` with extra data of type `MemorySib`. - lock_m_sib, - /// Single memory (RIP) operand with lock prefix. - /// Uses `payload` with extra data of type `MemoryRip`. - lock_m_rip, - /// Memory (SIB), immediate (unsigned) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemorySib`. - lock_mi_sib_u, - /// Memory (RIP), immediate (unsigned) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemoryRip`. - lock_mi_rip_u, - /// Memory (SIB), immediate (sign-extend) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemorySib`. - lock_mi_sib_s, - /// Memory (RIP), immediate (sign-extend) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemoryRip`. - lock_mi_rip_s, - /// Memory (SIB), register operands with lock prefix. - /// Uses `rx` payload with extra data of type `MemorySib`. - lock_mr_sib, - /// Memory (RIP), register operands with lock prefix. - /// Uses `rx` payload with extra data of type `MemoryRip`. - lock_mr_rip, - /// Memory moffs, rax with lock prefix. - /// Uses `payload` with extra data of type `MemoryMoffs`. - lock_moffs_rax, /// References another Mir instruction directly. /// Uses `inst` payload. inst, - /// References another Mir instruction directly with condition code (CC). - /// Uses `inst_cc` payload. - inst_cc, - /// String repeat and width - /// Uses `string` payload. - string, + /// Linker relocation - external function. /// Uses `reloc` payload. - reloc, + extern_fn_reloc, /// Linker relocation - GOT indirection. - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. got_reloc, /// Linker relocation - direct reference. - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. direct_reloc, /// Linker relocation - imports table indirection (binding). - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. import_reloc, /// Linker relocation - threadlocal variable via GOT indirection. - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. tlv_reloc, + + // Pseudo instructions: + + /// Conditional move if zero flag set and parity flag not set + /// Clobbers the source operand! + /// Uses `rr` payload. + pseudo_cmov_z_and_np_rr, + /// Conditional move if zero flag not set or parity flag set + /// Uses `rr` payload. + pseudo_cmov_nz_or_p_rr, + /// Conditional move if zero flag not set or parity flag set + /// Uses `rx` payload. + pseudo_cmov_nz_or_p_rm_sib, + /// Conditional move if zero flag not set or parity flag set + /// Uses `rx` payload. + pseudo_cmov_nz_or_p_rm_rip, + /// Set byte if zero flag set and parity flag not set + /// Requires a scratch register! + /// Uses `r_scratch` payload. + pseudo_set_z_and_np_r, + /// Set byte if zero flag set and parity flag not set + /// Requires a scratch register! + /// Uses `x_scratch` payload. + pseudo_set_z_and_np_m_sib, + /// Set byte if zero flag set and parity flag not set + /// Requires a scratch register! + /// Uses `x_scratch` payload. + pseudo_set_z_and_np_m_rip, + /// Set byte if zero flag not set or parity flag set + /// Requires a scratch register! + /// Uses `r_scratch` payload. + pseudo_set_nz_or_p_r, + /// Set byte if zero flag not set or parity flag set + /// Requires a scratch register! + /// Uses `x_scratch` payload. + pseudo_set_nz_or_p_m_sib, + /// Set byte if zero flag not set or parity flag set + /// Requires a scratch register! + /// Uses `x_scratch` payload. + pseudo_set_nz_or_p_m_rip, + /// Jump if zero flag set and parity flag not set + /// Uses `inst` payload. + pseudo_j_z_and_np_inst, + /// Jump if zero flag not set or parity flag set + /// Uses `inst` payload. + pseudo_j_nz_or_p_inst, + + /// Push registers + /// Uses `reg_list` payload. + pseudo_push_reg_list, + /// Pop registers + /// Uses `reg_list` payload. + pseudo_pop_reg_list, + + /// End of prologue + pseudo_dbg_prologue_end_none, + /// Update debug line + /// Uses `line_column` payload. + pseudo_dbg_line_line_column, + /// Start of epilogue + pseudo_dbg_epilogue_begin_none, + + /// Tombstone + /// Emitter should skip this instruction. + pseudo_dead_none, }; pub const Data = union { + none: struct { + fixes: Fixes = ._, + }, /// References another Mir instruction. - inst: Index, - /// Another instruction with condition code (CC). - /// Used by `jcc`. - inst_cc: struct { - /// Another instruction. + inst: struct { + fixes: Fixes = ._, inst: Index, - /// A condition code for use with EFLAGS register. - cc: bits.Condition, }, /// A 32-bit immediate value. - i: u32, - r: Register, + i: struct { + fixes: Fixes = ._, + i: u32, + }, + r: struct { + fixes: Fixes = ._, + r1: Register, + }, rr: struct { + fixes: Fixes = ._, r1: Register, r2: Register, }, rrr: struct { + fixes: Fixes = ._, r1: Register, r2: Register, r3: Register, }, - rri: struct { + rrri: struct { + fixes: Fixes = ._, r1: Register, r2: Register, - i: u32, - }, - /// Condition code (CC), followed by custom payload found in extra. - x_cc: struct { - cc: bits.Condition, - payload: u32, - }, - /// Register with condition code (CC). - r_cc: struct { - r: Register, - cc: bits.Condition, + r3: Register, + i: u8, }, - /// Register, register with condition code (CC). - rr_cc: struct { + rri: struct { + fixes: Fixes = ._, r1: Register, r2: Register, - cc: bits.Condition, + i: u32, }, /// Register, immediate. ri: struct { - r: Register, + fixes: Fixes = ._, + r1: Register, i: u32, }, /// Register, followed by custom payload found in extra. rx: struct { - r: Register, - payload: u32, - }, - /// Register with condition code (CC), followed by custom payload found in extra. - rx_cc: struct { - r: Register, - cc: bits.Condition, - payload: u32, - }, - /// Immediate, followed by Custom payload found in extra. - ix: struct { - i: u32, + fixes: Fixes = ._, + r1: Register, payload: u32, }, /// Register, register, followed by Custom payload found in extra. rrx: struct { + fixes: Fixes = ._, r1: Register, r2: Register, payload: u32, }, /// Register, byte immediate, followed by Custom payload found in extra. rix: struct { - r: Register, + fixes: Fixes = ._, + r1: Register, + i: u8, + payload: u32, + }, + /// Register, register, byte immediate, followed by Custom payload found in extra. + rrix: struct { + fixes: Fixes = ._, + r1: Register, + r2: Register, i: u8, payload: u32, }, - /// String instruction prefix and width. - string: struct { - repeat: bits.StringRepeat, - width: bits.StringWidth, + /// Register, scratch register + r_scratch: struct { + fixes: Fixes = ._, + r1: Register, + scratch_reg: Register, + }, + /// Scratch register, followed by Custom payload found in extra. + x_scratch: struct { + fixes: Fixes = ._, + scratch_reg: Register, + payload: u32, + }, + /// Custom payload found in extra. + x: struct { + fixes: Fixes = ._, + payload: u32, }, /// Relocation for the linker where: /// * `atom_index` is the index of the source /// * `sym_index` is the index of the target - relocation: struct { - /// Index of the containing atom. - atom_index: u32, - /// Index into the linker's symbol table. - sym_index: u32, - }, + reloc: Reloc, /// Debug line and column position line_column: struct { line: u32, column: u32, }, - /// Index into `extra`. Meaning of what can be found there is context-dependent. - payload: u32, + /// Register list + reg_list: RegisterList, }; // Make sure we don't accidentally make instructions bigger than expected. @@ -569,9 +860,8 @@ pub const Inst = struct { } }; -pub const LeaRegisterReloc = struct { - /// Destination register. - reg: u32, +/// A linker symbol not yet allocated in VM. +pub const Reloc = struct { /// Index of the containing atom. atom_index: u32, /// Index into the linker's symbol table. @@ -606,21 +896,15 @@ pub const RegisterList = struct { return self.bitset.iterator(options); } - pub fn asInt(self: Self) u32 { - return self.bitset.mask; - } - - pub fn fromInt(mask: u32) Self { - return .{ - .bitset = BitSet{ .mask = @intCast(BitSet.MaskInt, mask) }, - }; - } - pub fn count(self: Self) u32 { return @intCast(u32, self.bitset.count()); } }; +pub const Imm32 = struct { + imm: u32, +}; + pub const Imm64 = struct { msb: u32, lsb: u32, diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 5d06865566..3343f280b9 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -6,9 +6,6 @@ const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; const DW = std.dwarf; -pub const StringRepeat = enum(u3) { none, rep, repe, repz, repne, repnz }; -pub const StringWidth = enum(u2) { b, w, d, q }; - /// EFLAGS condition codes pub const Condition = enum(u5) { /// above @@ -72,6 +69,12 @@ pub const Condition = enum(u5) { /// zero z, + // Pseudo conditions + /// zero and not parity + z_and_np, + /// not zero or parity + nz_or_p, + /// Converts a std.math.CompareOperator into a condition flag, /// i.e. returns the condition that is true iff the result of the /// comparison is true. Assumes signed comparison @@ -143,6 +146,9 @@ pub const Condition = enum(u5) { .po => .pe, .s => .ns, .z => .nz, + + .z_and_np => .nz_or_p, + .nz_or_p => .z_and_np, }; } }; @@ -476,7 +482,9 @@ pub const Memory = union(enum) { dword, qword, tbyte, - dqword, + xword, + yword, + zword, pub fn fromSize(size: u32) PtrSize { return switch (size) { @@ -484,7 +492,9 @@ pub const Memory = union(enum) { 2...2 => .word, 3...4 => .dword, 5...8 => .qword, - 9...16 => .dqword, + 9...16 => .xword, + 17...32 => .yword, + 33...64 => .zword, else => unreachable, }; } @@ -496,7 +506,9 @@ pub const Memory = union(enum) { 32 => .dword, 64 => .qword, 80 => .tbyte, - 128 => .dqword, + 128 => .xword, + 256 => .yword, + 512 => .zword, else => unreachable, }; } @@ -508,7 +520,9 @@ pub const Memory = union(enum) { .dword => 32, .qword => 64, .tbyte => 80, - .dqword => 128, + .xword => 128, + .yword => 256, + .zword => 512, }; } }; diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 4c900697f5..0ce875240d 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -151,15 +151,12 @@ pub const Instruction = struct { moffs.offset, }), }, - .imm => |imm| try writer.print("0x{x}", .{imm.asUnsigned(enc_op.bitSize())}), + .imm => |imm| try writer.print("0x{x}", .{imm.asUnsigned(enc_op.immBitSize())}), } } pub fn fmtPrint(op: Operand, enc_op: Encoding.Op) std.fmt.Formatter(fmt) { - return .{ .data = .{ - .op = op, - .enc_op = enc_op, - } }; + return .{ .data = .{ .op = op, .enc_op = enc_op } }; } }; @@ -209,10 +206,16 @@ pub const Instruction = struct { const enc = inst.encoding; const data = enc.data; - try inst.encodeLegacyPrefixes(encoder); - try inst.encodeMandatoryPrefix(encoder); - try inst.encodeRexPrefix(encoder); - try inst.encodeOpcode(encoder); + if (data.mode.isVex()) { + try inst.encodeVexPrefix(encoder); + const opc = inst.encoding.opcode(); + try encoder.opcode_1byte(opc[opc.len - 1]); + } else { + try inst.encodeLegacyPrefixes(encoder); + try inst.encodeMandatoryPrefix(encoder); + try inst.encodeRexPrefix(encoder); + try inst.encodeOpcode(encoder); + } switch (data.op_en) { .np, .o => {}, @@ -222,25 +225,28 @@ pub const Instruction = struct { .td => try encoder.imm64(inst.ops[0].mem.moffs.offset), else => { const mem_op = switch (data.op_en) { - .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0], - .rm, .rmi => inst.ops[1], + .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], + .rm, .rmi, .vmi => inst.ops[1], + .rvm, .rvmi => inst.ops[2], else => unreachable, }; switch (mem_op) { .reg => |reg| { const rm = switch (data.op_en) { - .m, .mi, .m1, .mc => enc.modRmExt(), + .m, .mi, .m1, .mc, .vmi => enc.modRmExt(), .mr, .mri, .mrc => inst.ops[1].reg.lowEnc(), - .rm, .rmi => inst.ops[0].reg.lowEnc(), + .rm, .rmi, .rvm, .rvmi => inst.ops[0].reg.lowEnc(), + .mvr => inst.ops[2].reg.lowEnc(), else => unreachable, }; try encoder.modRm_direct(rm, reg.lowEnc()); }, .mem => |mem| { const op = switch (data.op_en) { - .m, .mi, .m1, .mc => .none, + .m, .mi, .m1, .mc, .vmi => .none, .mr, .mri, .mrc => inst.ops[1], - .rm, .rmi => inst.ops[0], + .rm, .rmi, .rvm, .rvmi => inst.ops[0], + .mvr => inst.ops[2], else => unreachable, }; try encodeMemory(enc, mem, op, encoder); @@ -250,7 +256,8 @@ pub const Instruction = struct { switch (data.op_en) { .mi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder), - .rmi, .mri => try encodeImm(inst.ops[2].imm, data.ops[2], encoder), + .rmi, .mri, .vmi => try encodeImm(inst.ops[2].imm, data.ops[2], encoder), + .rvmi => try encodeImm(inst.ops[3].imm, data.ops[3], encoder), else => {}, } }, @@ -282,11 +289,9 @@ pub const Instruction = struct { .rep, .repe, .repz => legacy.prefix_f3 = true, } - if (data.mode == .none) { - const bit_size = enc.operandBitSize(); - if (bit_size == 16) { - legacy.set16BitOverride(); - } + switch (data.mode) { + .short, .rex_short => legacy.set16BitOverride(), + else => {}, } const segment_override: ?Register = switch (op_en) { @@ -309,6 +314,7 @@ pub const Instruction = struct { } else null, + .vmi, .rvm, .rvmi, .mvr => unreachable, }; if (segment_override) |seg| { legacy.setSegmentOverride(seg); @@ -322,10 +328,7 @@ pub const Instruction = struct { var rex = Rex{}; rex.present = inst.encoding.data.mode == .rex; - switch (inst.encoding.data.mode) { - .long, .sse_long, .sse2_long => rex.w = true, - else => {}, - } + rex.w = inst.encoding.data.mode == .long; switch (op_en) { .np, .i, .zi, .fd, .td, .d => {}, @@ -346,11 +349,71 @@ pub const Instruction = struct { rex.b = b_x_op.isBaseExtended(); rex.x = b_x_op.isIndexExtended(); }, + .vmi, .rvm, .rvmi, .mvr => unreachable, } try encoder.rex(rex); } + fn encodeVexPrefix(inst: Instruction, encoder: anytype) !void { + const op_en = inst.encoding.data.op_en; + const opc = inst.encoding.opcode(); + const mand_pre = inst.encoding.mandatoryPrefix(); + + var vex = Vex{}; + + vex.w = inst.encoding.data.mode.isLong(); + + switch (op_en) { + .np, .i, .zi, .fd, .td, .d => {}, + .o, .oi => vex.b = inst.ops[0].reg.isExtended(), + .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .vmi, .rvm, .rvmi, .mvr => { + const r_op = switch (op_en) { + .rm, .rmi, .rvm, .rvmi => inst.ops[0], + .mr, .mri, .mrc => inst.ops[1], + .mvr => inst.ops[2], + .m, .mi, .m1, .mc, .vmi => .none, + else => unreachable, + }; + vex.r = r_op.isBaseExtended(); + + const b_x_op = switch (op_en) { + .rm, .rmi, .vmi => inst.ops[1], + .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], + .rvm, .rvmi => inst.ops[2], + else => unreachable, + }; + vex.b = b_x_op.isBaseExtended(); + vex.x = b_x_op.isIndexExtended(); + }, + } + + vex.l = inst.encoding.data.mode.isVecLong(); + + vex.p = if (mand_pre) |mand| switch (mand) { + 0x66 => .@"66", + 0xf2 => .f2, + 0xf3 => .f3, + else => unreachable, + } else .none; + + const leading: usize = if (mand_pre) |_| 1 else 0; + assert(opc[leading] == 0x0f); + vex.m = switch (opc[leading + 1]) { + else => .@"0f", + 0x38 => .@"0f38", + 0x3a => .@"0f3a", + }; + + switch (op_en) { + else => {}, + .vmi => vex.v = inst.ops[0].reg, + .rvm, .rvmi => vex.v = inst.ops[1].reg, + } + + try encoder.vex(vex); + } + fn encodeMandatoryPrefix(inst: Instruction, encoder: anytype) !void { const prefix = inst.encoding.mandatoryPrefix() orelse return; try encoder.opcode_1byte(prefix); @@ -443,8 +506,8 @@ pub const Instruction = struct { } fn encodeImm(imm: Immediate, kind: Encoding.Op, encoder: anytype) !void { - const raw = imm.asUnsigned(kind.bitSize()); - switch (kind.bitSize()) { + const raw = imm.asUnsigned(kind.immBitSize()); + switch (kind.immBitSize()) { 8 => try encoder.imm8(@intCast(u8, raw)), 16 => try encoder.imm16(@intCast(u16, raw)), 32 => try encoder.imm32(@intCast(u32, raw)), @@ -562,17 +625,48 @@ fn Encoder(comptime T: type, comptime opts: Options) type { /// or one of reg, index, r/m, base, or opcode-reg might be extended. /// /// See struct `Rex` for a description of each field. - pub fn rex(self: Self, byte: Rex) !void { - if (!byte.present and !byte.isSet()) return; + pub fn rex(self: Self, fields: Rex) !void { + if (!fields.present and !fields.isSet()) return; - var value: u8 = 0b0100_0000; + var byte: u8 = 0b0100_0000; - if (byte.w) value |= 0b1000; - if (byte.r) value |= 0b0100; - if (byte.x) value |= 0b0010; - if (byte.b) value |= 0b0001; + if (fields.w) byte |= 0b1000; + if (fields.r) byte |= 0b0100; + if (fields.x) byte |= 0b0010; + if (fields.b) byte |= 0b0001; - try self.writer.writeByte(value); + try self.writer.writeByte(byte); + } + + /// Encodes a VEX prefix given all the fields + /// + /// See struct `Vex` for a description of each field. + pub fn vex(self: Self, fields: Vex) !void { + if (fields.is3Byte()) { + try self.writer.writeByte(0b1100_0100); + + try self.writer.writeByte( + @as(u8, ~@boolToInt(fields.r)) << 7 | + @as(u8, ~@boolToInt(fields.x)) << 6 | + @as(u8, ~@boolToInt(fields.b)) << 5 | + @as(u8, @enumToInt(fields.m)) << 0, + ); + + try self.writer.writeByte( + @as(u8, @boolToInt(fields.w)) << 7 | + @as(u8, ~fields.v.enc()) << 3 | + @as(u8, @boolToInt(fields.l)) << 2 | + @as(u8, @enumToInt(fields.p)) << 0, + ); + } else { + try self.writer.writeByte(0b1100_0101); + try self.writer.writeByte( + @as(u8, ~@boolToInt(fields.r)) << 7 | + @as(u8, ~fields.v.enc()) << 3 | + @as(u8, @boolToInt(fields.l)) << 2 | + @as(u8, @enumToInt(fields.p)) << 0, + ); + } } // ------ @@ -848,6 +942,31 @@ pub const Rex = struct { } }; +pub const Vex = struct { + w: bool = false, + r: bool = false, + x: bool = false, + b: bool = false, + l: bool = false, + p: enum(u2) { + none = 0b00, + @"66" = 0b01, + f3 = 0b10, + f2 = 0b11, + } = .none, + m: enum(u5) { + @"0f" = 0b0_0001, + @"0f38" = 0b0_0010, + @"0f3a" = 0b0_0011, + _, + } = .@"0f", + v: Register = .ymm0, + + pub fn is3Byte(vex: Vex) bool { + return vex.w or vex.x or vex.b or vex.m != .@"0f"; + } +}; + // Tests fn expectEqualHexStrings(expected: []const u8, given: []const u8, assembly: []const u8) !void { assert(expected.len > 0); diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index f87a110e99..2b9d530c1e 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -3,933 +3,1219 @@ const Mnemonic = Encoding.Mnemonic; const OpEn = Encoding.OpEn; const Op = Encoding.Op; const Mode = Encoding.Mode; +const Feature = Encoding.Feature; const modrm_ext = u3; -pub const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, modrm_ext, Mode }; +pub const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, modrm_ext, Mode, Feature }; // TODO move this into a .zon file when Zig is capable of importing .zon files // zig fmt: off pub const table = [_]Entry{ // General-purpose - .{ .adc, .zi, &.{ .al, .imm8 }, &.{ 0x14 }, 0, .none }, - .{ .adc, .zi, &.{ .ax, .imm16 }, &.{ 0x15 }, 0, .none }, - .{ .adc, .zi, &.{ .eax, .imm32 }, &.{ 0x15 }, 0, .none }, - .{ .adc, .zi, &.{ .rax, .imm32s }, &.{ 0x15 }, 0, .long }, - .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .none }, - .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .rex }, - .{ .adc, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 2, .none }, - .{ .adc, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 2, .none }, - .{ .adc, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 2, .long }, - .{ .adc, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 2, .none }, - .{ .adc, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 2, .none }, - .{ .adc, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 2, .long }, - .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .none }, - .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .rex }, - .{ .adc, .mr, &.{ .rm16, .r16 }, &.{ 0x11 }, 0, .none }, - .{ .adc, .mr, &.{ .rm32, .r32 }, &.{ 0x11 }, 0, .none }, - .{ .adc, .mr, &.{ .rm64, .r64 }, &.{ 0x11 }, 0, .long }, - .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .none }, - .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .rex }, - .{ .adc, .rm, &.{ .r16, .rm16 }, &.{ 0x13 }, 0, .none }, - .{ .adc, .rm, &.{ .r32, .rm32 }, &.{ 0x13 }, 0, .none }, - .{ .adc, .rm, &.{ .r64, .rm64 }, &.{ 0x13 }, 0, .long }, - - .{ .add, .zi, &.{ .al, .imm8 }, &.{ 0x04 }, 0, .none }, - .{ .add, .zi, &.{ .ax, .imm16 }, &.{ 0x05 }, 0, .none }, - .{ .add, .zi, &.{ .eax, .imm32 }, &.{ 0x05 }, 0, .none }, - .{ .add, .zi, &.{ .rax, .imm32s }, &.{ 0x05 }, 0, .long }, - .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .none }, - .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .rex }, - .{ .add, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 0, .none }, - .{ .add, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 0, .none }, - .{ .add, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 0, .long }, - .{ .add, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 0, .none }, - .{ .add, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 0, .none }, - .{ .add, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 0, .long }, - .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .none }, - .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .rex }, - .{ .add, .mr, &.{ .rm16, .r16 }, &.{ 0x01 }, 0, .none }, - .{ .add, .mr, &.{ .rm32, .r32 }, &.{ 0x01 }, 0, .none }, - .{ .add, .mr, &.{ .rm64, .r64 }, &.{ 0x01 }, 0, .long }, - .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .none }, - .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .rex }, - .{ .add, .rm, &.{ .r16, .rm16 }, &.{ 0x03 }, 0, .none }, - .{ .add, .rm, &.{ .r32, .rm32 }, &.{ 0x03 }, 0, .none }, - .{ .add, .rm, &.{ .r64, .rm64 }, &.{ 0x03 }, 0, .long }, - - .{ .@"and", .zi, &.{ .al, .imm8 }, &.{ 0x24 }, 0, .none }, - .{ .@"and", .zi, &.{ .ax, .imm16 }, &.{ 0x25 }, 0, .none }, - .{ .@"and", .zi, &.{ .eax, .imm32 }, &.{ 0x25 }, 0, .none }, - .{ .@"and", .zi, &.{ .rax, .imm32s }, &.{ 0x25 }, 0, .long }, - .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .rex }, - .{ .@"and", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 4, .long }, - .{ .@"and", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 4, .long }, - .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .none }, - .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .rex }, - .{ .@"and", .mr, &.{ .rm16, .r16 }, &.{ 0x21 }, 0, .none }, - .{ .@"and", .mr, &.{ .rm32, .r32 }, &.{ 0x21 }, 0, .none }, - .{ .@"and", .mr, &.{ .rm64, .r64 }, &.{ 0x21 }, 0, .long }, - .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .none }, - .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .rex }, - .{ .@"and", .rm, &.{ .r16, .rm16 }, &.{ 0x23 }, 0, .none }, - .{ .@"and", .rm, &.{ .r32, .rm32 }, &.{ 0x23 }, 0, .none }, - .{ .@"and", .rm, &.{ .r64, .rm64 }, &.{ 0x23 }, 0, .long }, - - .{ .bsf, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbc }, 0, .none }, - .{ .bsf, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbc }, 0, .none }, - .{ .bsf, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbc }, 0, .long }, - - .{ .bsr, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbd }, 0, .none }, - .{ .bsr, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbd }, 0, .none }, - .{ .bsr, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbd }, 0, .long }, - - .{ .bswap, .o, &.{ .r32 }, &.{ 0x0f, 0xc8 }, 0, .none }, - .{ .bswap, .o, &.{ .r64 }, &.{ 0x0f, 0xc8 }, 0, .long }, - - .{ .bt, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xa3 }, 0, .none }, - .{ .bt, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xa3 }, 0, .none }, - .{ .bt, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xa3 }, 0, .long }, - .{ .bt, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 4, .none }, - .{ .bt, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 4, .none }, - .{ .bt, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 4, .long }, - - .{ .btc, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xbb }, 0, .none }, - .{ .btc, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xbb }, 0, .none }, - .{ .btc, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xbb }, 0, .long }, - .{ .btc, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 7, .none }, - .{ .btc, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 7, .none }, - .{ .btc, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 7, .long }, - - .{ .btr, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb3 }, 0, .none }, - .{ .btr, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb3 }, 0, .none }, - .{ .btr, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb3 }, 0, .long }, - .{ .btr, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 6, .none }, - .{ .btr, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 6, .none }, - .{ .btr, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 6, .long }, - - .{ .bts, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xab }, 0, .none }, - .{ .bts, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xab }, 0, .none }, - .{ .bts, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xab }, 0, .long }, - .{ .bts, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 5, .none }, - .{ .bts, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 5, .none }, - .{ .bts, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 5, .long }, + .{ .adc, .zi, &.{ .al, .imm8 }, &.{ 0x14 }, 0, .none, .none }, + .{ .adc, .zi, &.{ .ax, .imm16 }, &.{ 0x15 }, 0, .short, .none }, + .{ .adc, .zi, &.{ .eax, .imm32 }, &.{ 0x15 }, 0, .none, .none }, + .{ .adc, .zi, &.{ .rax, .imm32s }, &.{ 0x15 }, 0, .long, .none }, + .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .rex, .none }, + .{ .adc, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 2, .short, .none }, + .{ .adc, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 2, .long, .none }, + .{ .adc, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 2, .short, .none }, + .{ .adc, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 2, .long, .none }, + .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .none, .none }, + .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .rex, .none }, + .{ .adc, .mr, &.{ .rm16, .r16 }, &.{ 0x11 }, 0, .short, .none }, + .{ .adc, .mr, &.{ .rm32, .r32 }, &.{ 0x11 }, 0, .none, .none }, + .{ .adc, .mr, &.{ .rm64, .r64 }, &.{ 0x11 }, 0, .long, .none }, + .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .none, .none }, + .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .rex, .none }, + .{ .adc, .rm, &.{ .r16, .rm16 }, &.{ 0x13 }, 0, .short, .none }, + .{ .adc, .rm, &.{ .r32, .rm32 }, &.{ 0x13 }, 0, .none, .none }, + .{ .adc, .rm, &.{ .r64, .rm64 }, &.{ 0x13 }, 0, .long, .none }, + + .{ .add, .zi, &.{ .al, .imm8 }, &.{ 0x04 }, 0, .none, .none }, + .{ .add, .zi, &.{ .ax, .imm16 }, &.{ 0x05 }, 0, .short, .none }, + .{ .add, .zi, &.{ .eax, .imm32 }, &.{ 0x05 }, 0, .none, .none }, + .{ .add, .zi, &.{ .rax, .imm32s }, &.{ 0x05 }, 0, .long, .none }, + .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .rex, .none }, + .{ .add, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 0, .short, .none }, + .{ .add, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 0, .long, .none }, + .{ .add, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 0, .short, .none }, + .{ .add, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 0, .long, .none }, + .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .none, .none }, + .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .rex, .none }, + .{ .add, .mr, &.{ .rm16, .r16 }, &.{ 0x01 }, 0, .short, .none }, + .{ .add, .mr, &.{ .rm32, .r32 }, &.{ 0x01 }, 0, .none, .none }, + .{ .add, .mr, &.{ .rm64, .r64 }, &.{ 0x01 }, 0, .long, .none }, + .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .none, .none }, + .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .rex, .none }, + .{ .add, .rm, &.{ .r16, .rm16 }, &.{ 0x03 }, 0, .short, .none }, + .{ .add, .rm, &.{ .r32, .rm32 }, &.{ 0x03 }, 0, .none, .none }, + .{ .add, .rm, &.{ .r64, .rm64 }, &.{ 0x03 }, 0, .long, .none }, + + .{ .@"and", .zi, &.{ .al, .imm8 }, &.{ 0x24 }, 0, .none, .none }, + .{ .@"and", .zi, &.{ .ax, .imm16 }, &.{ 0x25 }, 0, .short, .none }, + .{ .@"and", .zi, &.{ .eax, .imm32 }, &.{ 0x25 }, 0, .none, .none }, + .{ .@"and", .zi, &.{ .rax, .imm32s }, &.{ 0x25 }, 0, .long, .none }, + .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .rex, .none }, + .{ .@"and", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 4, .short, .none }, + .{ .@"and", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 4, .long, .none }, + .{ .@"and", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 4, .short, .none }, + .{ .@"and", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 4, .long, .none }, + .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .none, .none }, + .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .rex, .none }, + .{ .@"and", .mr, &.{ .rm16, .r16 }, &.{ 0x21 }, 0, .short, .none }, + .{ .@"and", .mr, &.{ .rm32, .r32 }, &.{ 0x21 }, 0, .none, .none }, + .{ .@"and", .mr, &.{ .rm64, .r64 }, &.{ 0x21 }, 0, .long, .none }, + .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .none, .none }, + .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .rex, .none }, + .{ .@"and", .rm, &.{ .r16, .rm16 }, &.{ 0x23 }, 0, .short, .none }, + .{ .@"and", .rm, &.{ .r32, .rm32 }, &.{ 0x23 }, 0, .none, .none }, + .{ .@"and", .rm, &.{ .r64, .rm64 }, &.{ 0x23 }, 0, .long, .none }, + + .{ .bsf, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbc }, 0, .short, .none }, + .{ .bsf, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbc }, 0, .none, .none }, + .{ .bsf, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbc }, 0, .long, .none }, + + .{ .bsr, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbd }, 0, .short, .none }, + .{ .bsr, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbd }, 0, .none, .none }, + .{ .bsr, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbd }, 0, .long, .none }, + + .{ .bswap, .o, &.{ .r32 }, &.{ 0x0f, 0xc8 }, 0, .none, .none }, + .{ .bswap, .o, &.{ .r64 }, &.{ 0x0f, 0xc8 }, 0, .long, .none }, + + .{ .bt, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xa3 }, 0, .short, .none }, + .{ .bt, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xa3 }, 0, .none, .none }, + .{ .bt, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xa3 }, 0, .long, .none }, + .{ .bt, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 4, .short, .none }, + .{ .bt, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 4, .none, .none }, + .{ .bt, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 4, .long, .none }, + + .{ .btc, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xbb }, 0, .short, .none }, + .{ .btc, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xbb }, 0, .none, .none }, + .{ .btc, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xbb }, 0, .long, .none }, + .{ .btc, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 7, .short, .none }, + .{ .btc, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 7, .none, .none }, + .{ .btc, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 7, .long, .none }, + + .{ .btr, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb3 }, 0, .short, .none }, + .{ .btr, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb3 }, 0, .none, .none }, + .{ .btr, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb3 }, 0, .long, .none }, + .{ .btr, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 6, .short, .none }, + .{ .btr, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 6, .none, .none }, + .{ .btr, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 6, .long, .none }, + + .{ .bts, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xab }, 0, .short, .none }, + .{ .bts, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xab }, 0, .none, .none }, + .{ .bts, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xab }, 0, .long, .none }, + .{ .bts, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 5, .short, .none }, + .{ .bts, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 5, .none, .none }, + .{ .bts, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 5, .long, .none }, // This is M encoding according to Intel, but D makes more sense here. - .{ .call, .d, &.{ .rel32 }, &.{ 0xe8 }, 0, .none }, - .{ .call, .m, &.{ .rm64 }, &.{ 0xff }, 2, .none }, - - .{ .cbw, .np, &.{ .o16 }, &.{ 0x98 }, 0, .none }, - .{ .cwde, .np, &.{ .o32 }, &.{ 0x98 }, 0, .none }, - .{ .cdqe, .np, &.{ .o64 }, &.{ 0x98 }, 0, .long }, - - .{ .cwd, .np, &.{ .o16 }, &.{ 0x99 }, 0, .none }, - .{ .cdq, .np, &.{ .o32 }, &.{ 0x99 }, 0, .none }, - .{ .cqo, .np, &.{ .o64 }, &.{ 0x99 }, 0, .long }, - - .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long }, - .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long }, - .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long }, - .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long }, - .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long }, - .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long }, - .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long }, - .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long }, - .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long }, - .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long }, - .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long }, - .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long }, - .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long }, - .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long }, - .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long }, - .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long }, - .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long }, - .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long }, - .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long }, - .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long }, - .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .none }, - .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none }, - .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long }, - .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long }, - .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .none }, - .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none }, - .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long }, - .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long }, - .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .none }, - .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none }, - .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long }, - .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long }, - .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long }, - .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long }, - .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .none }, - .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none }, - .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long }, - .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long }, - - .{ .cmp, .zi, &.{ .al, .imm8 }, &.{ 0x3c }, 0, .none }, - .{ .cmp, .zi, &.{ .ax, .imm16 }, &.{ 0x3d }, 0, .none }, - .{ .cmp, .zi, &.{ .eax, .imm32 }, &.{ 0x3d }, 0, .none }, - .{ .cmp, .zi, &.{ .rax, .imm32s }, &.{ 0x3d }, 0, .long }, - .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .rex }, - .{ .cmp, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 7, .long }, - .{ .cmp, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 7, .long }, - .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .none }, - .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .rex }, - .{ .cmp, .mr, &.{ .rm16, .r16 }, &.{ 0x39 }, 0, .none }, - .{ .cmp, .mr, &.{ .rm32, .r32 }, &.{ 0x39 }, 0, .none }, - .{ .cmp, .mr, &.{ .rm64, .r64 }, &.{ 0x39 }, 0, .long }, - .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .none }, - .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .rex }, - .{ .cmp, .rm, &.{ .r16, .rm16 }, &.{ 0x3b }, 0, .none }, - .{ .cmp, .rm, &.{ .r32, .rm32 }, &.{ 0x3b }, 0, .none }, - .{ .cmp, .rm, &.{ .r64, .rm64 }, &.{ 0x3b }, 0, .long }, - - .{ .cmps, .np, &.{ .m8, .m8 }, &.{ 0xa6 }, 0, .none }, - .{ .cmps, .np, &.{ .m16, .m16 }, &.{ 0xa7 }, 0, .none }, - .{ .cmps, .np, &.{ .m32, .m32 }, &.{ 0xa7 }, 0, .none }, - .{ .cmps, .np, &.{ .m64, .m64 }, &.{ 0xa7 }, 0, .long }, - - .{ .cmpsb, .np, &.{}, &.{ 0xa6 }, 0, .none }, - .{ .cmpsw, .np, &.{}, &.{ 0xa7 }, 0, .short }, - .{ .cmpsd, .np, &.{}, &.{ 0xa7 }, 0, .none }, - .{ .cmpsq, .np, &.{}, &.{ 0xa7 }, 0, .long }, - - .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .none }, - .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .rex }, - .{ .cmpxchg, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb1 }, 0, .none }, - .{ .cmpxchg, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb1 }, 0, .none }, - .{ .cmpxchg, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb1 }, 0, .long }, - - .{ .cmpxchg8b , .m, &.{ .m64 }, &.{ 0x0f, 0xc7 }, 1, .none }, - .{ .cmpxchg16b, .m, &.{ .m128 }, &.{ 0x0f, 0xc7 }, 1, .long }, - - .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .none }, - .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .rex }, - .{ .div, .m, &.{ .rm16 }, &.{ 0xf7 }, 6, .none }, - .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none }, - .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long }, - - .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .fpu }, - .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .fpu }, - .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .fpu }, - - .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .fpu }, - .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .fpu }, - .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .fpu }, - - .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none }, - .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex }, - .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .none }, - .{ .idiv, .m, &.{ .rm32 }, &.{ 0xf7 }, 7, .none }, - .{ .idiv, .m, &.{ .rm64 }, &.{ 0xf7 }, 7, .long }, - - .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .none }, - .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .rex }, - .{ .imul, .m, &.{ .rm16, }, &.{ 0xf7 }, 5, .none }, - .{ .imul, .m, &.{ .rm32, }, &.{ 0xf7 }, 5, .none }, - .{ .imul, .m, &.{ .rm64, }, &.{ 0xf7 }, 5, .long }, - .{ .imul, .rm, &.{ .r16, .rm16, }, &.{ 0x0f, 0xaf }, 0, .none }, - .{ .imul, .rm, &.{ .r32, .rm32, }, &.{ 0x0f, 0xaf }, 0, .none }, - .{ .imul, .rm, &.{ .r64, .rm64, }, &.{ 0x0f, 0xaf }, 0, .long }, - .{ .imul, .rmi, &.{ .r16, .rm16, .imm8s }, &.{ 0x6b }, 0, .none }, - .{ .imul, .rmi, &.{ .r32, .rm32, .imm8s }, &.{ 0x6b }, 0, .none }, - .{ .imul, .rmi, &.{ .r64, .rm64, .imm8s }, &.{ 0x6b }, 0, .long }, - .{ .imul, .rmi, &.{ .r16, .rm16, .imm16 }, &.{ 0x69 }, 0, .none }, - .{ .imul, .rmi, &.{ .r32, .rm32, .imm32 }, &.{ 0x69 }, 0, .none }, - .{ .imul, .rmi, &.{ .r64, .rm64, .imm32 }, &.{ 0x69 }, 0, .long }, - - .{ .int3, .np, &.{}, &.{ 0xcc }, 0, .none }, - - .{ .ja, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none }, - .{ .jae, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none }, - .{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none }, - .{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none }, - .{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none }, - .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none }, - .{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none }, - .{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none }, - .{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none }, - .{ .jl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none }, - .{ .jle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none }, - .{ .jna, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none }, - .{ .jnae, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none }, - .{ .jnb, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none }, - .{ .jnbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none }, - .{ .jnc, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none }, - .{ .jne, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none }, - .{ .jng, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none }, - .{ .jnge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none }, - .{ .jnl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none }, - .{ .jnle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none }, - .{ .jno, .d, &.{ .rel32 }, &.{ 0x0f, 0x81 }, 0, .none }, - .{ .jnp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none }, - .{ .jns, .d, &.{ .rel32 }, &.{ 0x0f, 0x89 }, 0, .none }, - .{ .jnz, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none }, - .{ .jo, .d, &.{ .rel32 }, &.{ 0x0f, 0x80 }, 0, .none }, - .{ .jp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none }, - .{ .jpe, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none }, - .{ .jpo, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none }, - .{ .js, .d, &.{ .rel32 }, &.{ 0x0f, 0x88 }, 0, .none }, - .{ .jz, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none }, - - .{ .jmp, .d, &.{ .rel32 }, &.{ 0xe9 }, 0, .none }, - .{ .jmp, .m, &.{ .rm64 }, &.{ 0xff }, 4, .none }, - - .{ .lea, .rm, &.{ .r16, .m }, &.{ 0x8d }, 0, .none }, - .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none }, - .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long }, - - .{ .lfence, .np, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none }, - - .{ .lods, .np, &.{ .m8 }, &.{ 0xac }, 0, .none }, - .{ .lods, .np, &.{ .m16 }, &.{ 0xad }, 0, .none }, - .{ .lods, .np, &.{ .m32 }, &.{ 0xad }, 0, .none }, - .{ .lods, .np, &.{ .m64 }, &.{ 0xad }, 0, .long }, - - .{ .lodsb, .np, &.{}, &.{ 0xac }, 0, .none }, - .{ .lodsw, .np, &.{}, &.{ 0xad }, 0, .short }, - .{ .lodsd, .np, &.{}, &.{ 0xad }, 0, .none }, - .{ .lodsq, .np, &.{}, &.{ 0xad }, 0, .long }, - - .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none }, - .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none }, - .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long }, - - .{ .mfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none }, - - .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none }, - .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .rex }, - .{ .mov, .mr, &.{ .rm16, .r16 }, &.{ 0x89 }, 0, .none }, - .{ .mov, .mr, &.{ .rm32, .r32 }, &.{ 0x89 }, 0, .none }, - .{ .mov, .mr, &.{ .rm64, .r64 }, &.{ 0x89 }, 0, .long }, - .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .none }, - .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .rex }, - .{ .mov, .rm, &.{ .r16, .rm16 }, &.{ 0x8b }, 0, .none }, - .{ .mov, .rm, &.{ .r32, .rm32 }, &.{ 0x8b }, 0, .none }, - .{ .mov, .rm, &.{ .r64, .rm64 }, &.{ 0x8b }, 0, .long }, - .{ .mov, .mr, &.{ .rm16, .sreg }, &.{ 0x8c }, 0, .none }, - .{ .mov, .mr, &.{ .rm64, .sreg }, &.{ 0x8c }, 0, .long }, - .{ .mov, .rm, &.{ .sreg, .rm16 }, &.{ 0x8e }, 0, .none }, - .{ .mov, .rm, &.{ .sreg, .rm64 }, &.{ 0x8e }, 0, .long }, - .{ .mov, .fd, &.{ .al, .moffs }, &.{ 0xa0 }, 0, .none }, - .{ .mov, .fd, &.{ .ax, .moffs }, &.{ 0xa1 }, 0, .none }, - .{ .mov, .fd, &.{ .eax, .moffs }, &.{ 0xa1 }, 0, .none }, - .{ .mov, .fd, &.{ .rax, .moffs }, &.{ 0xa1 }, 0, .long }, - .{ .mov, .td, &.{ .moffs, .al }, &.{ 0xa2 }, 0, .none }, - .{ .mov, .td, &.{ .moffs, .ax }, &.{ 0xa3 }, 0, .none }, - .{ .mov, .td, &.{ .moffs, .eax }, &.{ 0xa3 }, 0, .none }, - .{ .mov, .td, &.{ .moffs, .rax }, &.{ 0xa3 }, 0, .long }, - .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .none }, - .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .rex }, - .{ .mov, .oi, &.{ .r16, .imm16 }, &.{ 0xb8 }, 0, .none }, - .{ .mov, .oi, &.{ .r32, .imm32 }, &.{ 0xb8 }, 0, .none }, - .{ .mov, .oi, &.{ .r64, .imm64 }, &.{ 0xb8 }, 0, .long }, - .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .none }, - .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .rex }, - .{ .mov, .mi, &.{ .rm16, .imm16 }, &.{ 0xc7 }, 0, .none }, - .{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none }, - .{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long }, - - .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none }, - .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none }, - .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long }, - .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none }, - .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none }, - .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long }, - - .{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none }, - .{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .none }, - .{ .movs, .np, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none }, - .{ .movs, .np, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long }, - - .{ .movsb, .np, &.{}, &.{ 0xa4 }, 0, .none }, - .{ .movsw, .np, &.{}, &.{ 0xa5 }, 0, .short }, - .{ .movsd, .np, &.{}, &.{ 0xa5 }, 0, .none }, - .{ .movsq, .np, &.{}, &.{ 0xa5 }, 0, .long }, - - .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none }, - .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex }, - .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none }, - .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex }, - .{ .movsx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xbe }, 0, .long }, - .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .none }, - .{ .movsx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xbf }, 0, .long }, + .{ .call, .d, &.{ .rel32 }, &.{ 0xe8 }, 0, .none, .none }, + .{ .call, .m, &.{ .rm64 }, &.{ 0xff }, 2, .none, .none }, + + .{ .cbw, .np, &.{ .o16 }, &.{ 0x98 }, 0, .short, .none }, + .{ .cwde, .np, &.{ .o32 }, &.{ 0x98 }, 0, .none, .none }, + .{ .cdqe, .np, &.{ .o64 }, &.{ 0x98 }, 0, .long, .none }, + + .{ .cwd, .np, &.{ .o16 }, &.{ 0x99 }, 0, .short, .none }, + .{ .cdq, .np, &.{ .o32 }, &.{ 0x99 }, 0, .none, .none }, + .{ .cqo, .np, &.{ .o64 }, &.{ 0x99 }, 0, .long, .none }, + + .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none }, + .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, + .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, + .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, + .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, + .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none }, + .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, + .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, + .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, + .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none }, + .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, + .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, + .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none }, + .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, + .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, + .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none }, + .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, + .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, + .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none }, + .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, + .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, + .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none }, + .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, + .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, + .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none }, + .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, + .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, + .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, + .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, + .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none }, + .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, + .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, + .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, + .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none }, + .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, + .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, + .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none }, + .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, + .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, + .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none }, + .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, + .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, + .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none }, + .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, + .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, + .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none }, + .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, + .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, + .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .short, .none }, + .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none, .none }, + .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long, .none }, + .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none }, + .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, + .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, + .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .short, .none }, + .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none, .none }, + .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long, .none }, + .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none }, + .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, + .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, + .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .short, .none }, + .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none, .none }, + .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long, .none }, + .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none }, + .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, + .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, + .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none }, + .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, + .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, + .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none }, + .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, + .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, + .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .short, .none }, + .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none, .none }, + .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long, .none }, + .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none }, + .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, + .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, + + .{ .cmp, .zi, &.{ .al, .imm8 }, &.{ 0x3c }, 0, .none, .none }, + .{ .cmp, .zi, &.{ .ax, .imm16 }, &.{ 0x3d }, 0, .short, .none }, + .{ .cmp, .zi, &.{ .eax, .imm32 }, &.{ 0x3d }, 0, .none, .none }, + .{ .cmp, .zi, &.{ .rax, .imm32s }, &.{ 0x3d }, 0, .long, .none }, + .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .rex, .none }, + .{ .cmp, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 7, .short, .none }, + .{ .cmp, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 7, .long, .none }, + .{ .cmp, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 7, .short, .none }, + .{ .cmp, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 7, .long, .none }, + .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .none, .none }, + .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .rex, .none }, + .{ .cmp, .mr, &.{ .rm16, .r16 }, &.{ 0x39 }, 0, .short, .none }, + .{ .cmp, .mr, &.{ .rm32, .r32 }, &.{ 0x39 }, 0, .none, .none }, + .{ .cmp, .mr, &.{ .rm64, .r64 }, &.{ 0x39 }, 0, .long, .none }, + .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .none, .none }, + .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .rex, .none }, + .{ .cmp, .rm, &.{ .r16, .rm16 }, &.{ 0x3b }, 0, .short, .none }, + .{ .cmp, .rm, &.{ .r32, .rm32 }, &.{ 0x3b }, 0, .none, .none }, + .{ .cmp, .rm, &.{ .r64, .rm64 }, &.{ 0x3b }, 0, .long, .none }, + + .{ .cmps, .np, &.{ .m8, .m8 }, &.{ 0xa6 }, 0, .none, .none }, + .{ .cmps, .np, &.{ .m16, .m16 }, &.{ 0xa7 }, 0, .short, .none }, + .{ .cmps, .np, &.{ .m32, .m32 }, &.{ 0xa7 }, 0, .none, .none }, + .{ .cmps, .np, &.{ .m64, .m64 }, &.{ 0xa7 }, 0, .long, .none }, + + .{ .cmpsb, .np, &.{}, &.{ 0xa6 }, 0, .none, .none }, + .{ .cmpsw, .np, &.{}, &.{ 0xa7 }, 0, .short, .none }, + .{ .cmpsd, .np, &.{}, &.{ 0xa7 }, 0, .none, .none }, + .{ .cmpsq, .np, &.{}, &.{ 0xa7 }, 0, .long, .none }, + + .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .none, .none }, + .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .rex, .none }, + .{ .cmpxchg, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb1 }, 0, .short, .none }, + .{ .cmpxchg, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb1 }, 0, .none, .none }, + .{ .cmpxchg, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb1 }, 0, .long, .none }, + + .{ .cmpxchg8b, .m, &.{ .m64 }, &.{ 0x0f, 0xc7 }, 1, .none, .none }, + .{ .cmpxchg16b, .m, &.{ .m128 }, &.{ 0x0f, 0xc7 }, 1, .long, .none }, + + .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .none, .none }, + .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .rex, .none }, + .{ .div, .m, &.{ .rm16 }, &.{ 0xf7 }, 6, .short, .none }, + .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none, .none }, + .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long, .none }, + + .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 }, + .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 }, + .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 }, + + .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 }, + .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 }, + .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 }, + + .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none, .none }, + .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex, .none }, + .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .short, .none }, + .{ .idiv, .m, &.{ .rm32 }, &.{ 0xf7 }, 7, .none, .none }, + .{ .idiv, .m, &.{ .rm64 }, &.{ 0xf7 }, 7, .long, .none }, + + .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .none, .none }, + .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .rex, .none }, + .{ .imul, .m, &.{ .rm16, }, &.{ 0xf7 }, 5, .short, .none }, + .{ .imul, .m, &.{ .rm32, }, &.{ 0xf7 }, 5, .none, .none }, + .{ .imul, .m, &.{ .rm64, }, &.{ 0xf7 }, 5, .long, .none }, + .{ .imul, .rm, &.{ .r16, .rm16, }, &.{ 0x0f, 0xaf }, 0, .short, .none }, + .{ .imul, .rm, &.{ .r32, .rm32, }, &.{ 0x0f, 0xaf }, 0, .none, .none }, + .{ .imul, .rm, &.{ .r64, .rm64, }, &.{ 0x0f, 0xaf }, 0, .long, .none }, + .{ .imul, .rmi, &.{ .r16, .rm16, .imm8s }, &.{ 0x6b }, 0, .short, .none }, + .{ .imul, .rmi, &.{ .r32, .rm32, .imm8s }, &.{ 0x6b }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r64, .rm64, .imm8s }, &.{ 0x6b }, 0, .long, .none }, + .{ .imul, .rmi, &.{ .r16, .rm16, .imm16 }, &.{ 0x69 }, 0, .short, .none }, + .{ .imul, .rmi, &.{ .r32, .rm32, .imm32 }, &.{ 0x69 }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r64, .rm64, .imm32 }, &.{ 0x69 }, 0, .long, .none }, + + .{ .int3, .np, &.{}, &.{ 0xcc }, 0, .none, .none }, + + .{ .ja, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none }, + .{ .jae, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, + .{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .none }, + .{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, + .{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, + .{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, + .{ .jl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none }, + .{ .jle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none }, + .{ .jna, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, + .{ .jnae, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jnb, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jnbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none }, + .{ .jnc, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jne, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none }, + .{ .jng, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none }, + .{ .jnge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none }, + .{ .jnl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, + .{ .jnle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, + .{ .jno, .d, &.{ .rel32 }, &.{ 0x0f, 0x81 }, 0, .none, .none }, + .{ .jnp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none }, + .{ .jns, .d, &.{ .rel32 }, &.{ 0x0f, 0x89 }, 0, .none, .none }, + .{ .jnz, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none }, + .{ .jo, .d, &.{ .rel32 }, &.{ 0x0f, 0x80 }, 0, .none, .none }, + .{ .jp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none }, + .{ .jpe, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none }, + .{ .jpo, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none }, + .{ .js, .d, &.{ .rel32 }, &.{ 0x0f, 0x88 }, 0, .none, .none }, + .{ .jz, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, + + .{ .jmp, .d, &.{ .rel32 }, &.{ 0xe9 }, 0, .none, .none }, + .{ .jmp, .m, &.{ .rm64 }, &.{ 0xff }, 4, .none, .none }, + + .{ .lea, .rm, &.{ .r16, .m }, &.{ 0x8d }, 0, .short, .none }, + .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none, .none }, + .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long, .none }, + + .{ .lfence, .np, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none, .none }, + + .{ .lods, .np, &.{ .m8 }, &.{ 0xac }, 0, .none, .none }, + .{ .lods, .np, &.{ .m16 }, &.{ 0xad }, 0, .short, .none }, + .{ .lods, .np, &.{ .m32 }, &.{ 0xad }, 0, .none, .none }, + .{ .lods, .np, &.{ .m64 }, &.{ 0xad }, 0, .long, .none }, + + .{ .lodsb, .np, &.{}, &.{ 0xac }, 0, .none, .none }, + .{ .lodsw, .np, &.{}, &.{ 0xad }, 0, .short, .none }, + .{ .lodsd, .np, &.{}, &.{ 0xad }, 0, .none, .none }, + .{ .lodsq, .np, &.{}, &.{ 0xad }, 0, .long, .none }, + + .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .none }, + .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .none }, + .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .none }, + + .{ .mfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none }, + + .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none }, + .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .rex, .none }, + .{ .mov, .mr, &.{ .rm16, .r16 }, &.{ 0x89 }, 0, .short, .none }, + .{ .mov, .mr, &.{ .rm32, .r32 }, &.{ 0x89 }, 0, .none, .none }, + .{ .mov, .mr, &.{ .rm64, .r64 }, &.{ 0x89 }, 0, .long, .none }, + .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .none, .none }, + .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .rex, .none }, + .{ .mov, .rm, &.{ .r16, .rm16 }, &.{ 0x8b }, 0, .short, .none }, + .{ .mov, .rm, &.{ .r32, .rm32 }, &.{ 0x8b }, 0, .none, .none }, + .{ .mov, .rm, &.{ .r64, .rm64 }, &.{ 0x8b }, 0, .long, .none }, + .{ .mov, .mr, &.{ .rm16, .sreg }, &.{ 0x8c }, 0, .short, .none }, + .{ .mov, .mr, &.{ .r32_m16, .sreg }, &.{ 0x8c }, 0, .none, .none }, + .{ .mov, .mr, &.{ .r64_m16, .sreg }, &.{ 0x8c }, 0, .long, .none }, + .{ .mov, .rm, &.{ .sreg, .rm16 }, &.{ 0x8e }, 0, .short, .none }, + .{ .mov, .rm, &.{ .sreg, .r32_m16 }, &.{ 0x8e }, 0, .none, .none }, + .{ .mov, .rm, &.{ .sreg, .r64_m16 }, &.{ 0x8e }, 0, .long, .none }, + .{ .mov, .fd, &.{ .al, .moffs }, &.{ 0xa0 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .ax, .moffs }, &.{ 0xa1 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .eax, .moffs }, &.{ 0xa1 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .rax, .moffs }, &.{ 0xa1 }, 0, .long, .none }, + .{ .mov, .td, &.{ .moffs, .al }, &.{ 0xa2 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .ax }, &.{ 0xa3 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .eax }, &.{ 0xa3 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .rax }, &.{ 0xa3 }, 0, .long, .none }, + .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .none, .none }, + .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .rex, .none }, + .{ .mov, .oi, &.{ .r16, .imm16 }, &.{ 0xb8 }, 0, .short, .none }, + .{ .mov, .oi, &.{ .r32, .imm32 }, &.{ 0xb8 }, 0, .none, .none }, + .{ .mov, .oi, &.{ .r64, .imm64 }, &.{ 0xb8 }, 0, .long, .none }, + .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .none, .none }, + .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .rex, .none }, + .{ .mov, .mi, &.{ .rm16, .imm16 }, &.{ 0xc7 }, 0, .short, .none }, + .{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none, .none }, + .{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long, .none }, + + .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .none }, + .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .none }, + .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .none }, + .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .short, .none }, + .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .none }, + .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .none }, + + .{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none, .none }, + .{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none }, + .{ .movs, .np, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none, .none }, + .{ .movs, .np, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long, .none }, + + .{ .movsb, .np, &.{}, &.{ 0xa4 }, 0, .none, .none }, + .{ .movsw, .np, &.{}, &.{ 0xa5 }, 0, .short, .none }, + .{ .movsd, .np, &.{}, &.{ 0xa5 }, 0, .none, .none }, + .{ .movsq, .np, &.{}, &.{ 0xa5 }, 0, .long, .none }, + + .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .short, .none }, + .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex_short, .none }, + .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none, .none }, + .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex, .none }, + .{ .movsx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xbe }, 0, .long, .none }, + .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .none, .none }, + .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .rex, .none }, + .{ .movsx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xbf }, 0, .long, .none }, // This instruction is discouraged. - .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none }, - .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long }, - - .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none }, - .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none }, - .{ .movzx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .long }, - .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .none }, - .{ .movzx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .long }, - - .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .none }, - .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .rex }, - .{ .mul, .m, &.{ .rm16 }, &.{ 0xf7 }, 4, .none }, - .{ .mul, .m, &.{ .rm32 }, &.{ 0xf7 }, 4, .none }, - .{ .mul, .m, &.{ .rm64 }, &.{ 0xf7 }, 4, .long }, - - .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .none }, - .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .rex }, - .{ .neg, .m, &.{ .rm16 }, &.{ 0xf7 }, 3, .none }, - .{ .neg, .m, &.{ .rm32 }, &.{ 0xf7 }, 3, .none }, - .{ .neg, .m, &.{ .rm64 }, &.{ 0xf7 }, 3, .long }, - - .{ .nop, .np, &.{}, &.{ 0x90 }, 0, .none }, - - .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .none }, - .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .rex }, - .{ .not, .m, &.{ .rm16 }, &.{ 0xf7 }, 2, .none }, - .{ .not, .m, &.{ .rm32 }, &.{ 0xf7 }, 2, .none }, - .{ .not, .m, &.{ .rm64 }, &.{ 0xf7 }, 2, .long }, - - .{ .@"or", .zi, &.{ .al, .imm8 }, &.{ 0x0c }, 0, .none }, - .{ .@"or", .zi, &.{ .ax, .imm16 }, &.{ 0x0d }, 0, .none }, - .{ .@"or", .zi, &.{ .eax, .imm32 }, &.{ 0x0d }, 0, .none }, - .{ .@"or", .zi, &.{ .rax, .imm32s }, &.{ 0x0d }, 0, .long }, - .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .rex }, - .{ .@"or", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 1, .long }, - .{ .@"or", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 1, .long }, - .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .none }, - .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .rex }, - .{ .@"or", .mr, &.{ .rm16, .r16 }, &.{ 0x09 }, 0, .none }, - .{ .@"or", .mr, &.{ .rm32, .r32 }, &.{ 0x09 }, 0, .none }, - .{ .@"or", .mr, &.{ .rm64, .r64 }, &.{ 0x09 }, 0, .long }, - .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .none }, - .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .rex }, - .{ .@"or", .rm, &.{ .r16, .rm16 }, &.{ 0x0b }, 0, .none }, - .{ .@"or", .rm, &.{ .r32, .rm32 }, &.{ 0x0b }, 0, .none }, - .{ .@"or", .rm, &.{ .r64, .rm64 }, &.{ 0x0b }, 0, .long }, - - .{ .pop, .o, &.{ .r16 }, &.{ 0x58 }, 0, .none }, - .{ .pop, .o, &.{ .r64 }, &.{ 0x58 }, 0, .none }, - .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .none }, - .{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none }, - - .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none }, - .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none }, - .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long }, - - .{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .none }, - .{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none }, - .{ .push, .m, &.{ .rm16 }, &.{ 0xff }, 6, .none }, - .{ .push, .m, &.{ .rm64 }, &.{ 0xff }, 6, .none }, - .{ .push, .i, &.{ .imm8 }, &.{ 0x6a }, 0, .none }, - .{ .push, .i, &.{ .imm16 }, &.{ 0x68 }, 0, .none }, - .{ .push, .i, &.{ .imm32 }, &.{ 0x68 }, 0, .none }, - - .{ .ret, .np, &.{}, &.{ 0xc3 }, 0, .none }, - - .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .none }, - .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .rex }, - .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .none }, - .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .rex }, - .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .none }, - .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .rex }, - .{ .rcl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 2, .none }, - .{ .rcl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 2, .none }, - .{ .rcl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 2, .none }, - .{ .rcl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 2, .none }, - .{ .rcl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 2, .long }, - .{ .rcl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 2, .none }, - .{ .rcl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 2, .long }, - .{ .rcl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 2, .none }, - .{ .rcl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 2, .long }, - - .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .none }, - .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .rex }, - .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .none }, - .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .rex }, - .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .none }, - .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .rex }, - .{ .rcr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 3, .none }, - .{ .rcr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 3, .none }, - .{ .rcr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 3, .none }, - .{ .rcr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 3, .none }, - .{ .rcr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 3, .long }, - .{ .rcr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 3, .none }, - .{ .rcr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 3, .long }, - .{ .rcr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 3, .none }, - .{ .rcr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 3, .long }, - - .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .none }, - .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .rex }, - .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .none }, - .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .rex }, - .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .none }, - .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .rex }, - .{ .rol, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 0, .none }, - .{ .rol, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 0, .none }, - .{ .rol, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 0, .none }, - .{ .rol, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 0, .none }, - .{ .rol, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 0, .long }, - .{ .rol, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 0, .none }, - .{ .rol, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 0, .long }, - .{ .rol, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 0, .none }, - .{ .rol, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 0, .long }, - - .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .none }, - .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .rex }, - .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .none }, - .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .rex }, - .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .none }, - .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .rex }, - .{ .ror, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 1, .none }, - .{ .ror, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 1, .none }, - .{ .ror, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 1, .none }, - .{ .ror, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 1, .none }, - .{ .ror, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 1, .long }, - .{ .ror, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 1, .none }, - .{ .ror, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 1, .long }, - .{ .ror, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 1, .none }, - .{ .ror, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 1, .long }, - - .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none }, - .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex }, - .{ .sal, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .sal, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .sal, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long }, - .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none }, - .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex }, - .{ .sal, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .sal, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .sal, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long }, - .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none }, - .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex }, - .{ .sal, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .sal, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .sal, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long }, - - .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .none }, - .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .rex }, - .{ .sar, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 7, .none }, - .{ .sar, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 7, .none }, - .{ .sar, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 7, .long }, - .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .none }, - .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .rex }, - .{ .sar, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 7, .none }, - .{ .sar, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 7, .none }, - .{ .sar, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 7, .long }, - .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .none }, - .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .rex }, - .{ .sar, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 7, .none }, - .{ .sar, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 7, .none }, - .{ .sar, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 7, .long }, - - .{ .sbb, .zi, &.{ .al, .imm8 }, &.{ 0x1c }, 0, .none }, - .{ .sbb, .zi, &.{ .ax, .imm16 }, &.{ 0x1d }, 0, .none }, - .{ .sbb, .zi, &.{ .eax, .imm32 }, &.{ 0x1d }, 0, .none }, - .{ .sbb, .zi, &.{ .rax, .imm32s }, &.{ 0x1d }, 0, .long }, - .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .rex }, - .{ .sbb, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 3, .long }, - .{ .sbb, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 3, .long }, - .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .none }, - .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .rex }, - .{ .sbb, .mr, &.{ .rm16, .r16 }, &.{ 0x19 }, 0, .none }, - .{ .sbb, .mr, &.{ .rm32, .r32 }, &.{ 0x19 }, 0, .none }, - .{ .sbb, .mr, &.{ .rm64, .r64 }, &.{ 0x19 }, 0, .long }, - .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .none }, - .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .rex }, - .{ .sbb, .rm, &.{ .r16, .rm16 }, &.{ 0x1b }, 0, .none }, - .{ .sbb, .rm, &.{ .r32, .rm32 }, &.{ 0x1b }, 0, .none }, - .{ .sbb, .rm, &.{ .r64, .rm64 }, &.{ 0x1b }, 0, .long }, - - .{ .scas, .np, &.{ .m8 }, &.{ 0xae }, 0, .none }, - .{ .scas, .np, &.{ .m16 }, &.{ 0xaf }, 0, .none }, - .{ .scas, .np, &.{ .m32 }, &.{ 0xaf }, 0, .none }, - .{ .scas, .np, &.{ .m64 }, &.{ 0xaf }, 0, .long }, - - .{ .scasb, .np, &.{}, &.{ 0xae }, 0, .none }, - .{ .scasw, .np, &.{}, &.{ 0xaf }, 0, .short }, - .{ .scasd, .np, &.{}, &.{ 0xaf }, 0, .none }, - .{ .scasq, .np, &.{}, &.{ 0xaf }, 0, .long }, - - .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none }, - .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex }, - .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none }, - .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex }, - .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none }, - .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex }, - .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none }, - .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex }, - .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none }, - .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex }, - .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none }, - .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex }, - .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none }, - .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex }, - .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none }, - .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex }, - .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none }, - .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex }, - .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none }, - .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex }, - .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none }, - .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex }, - .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none }, - .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex }, - .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none }, - .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex }, - .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none }, - .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex }, - .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none }, - .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex }, - .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none }, - .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex }, - .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none }, - .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex }, - .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none }, - .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex }, - .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none }, - .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex }, - .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none }, - .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex }, - .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .none }, - .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .rex }, - .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none }, - .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex }, - .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .none }, - .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .rex }, - .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none }, - .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex }, - .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .none }, - .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .rex }, - .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none }, - .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex }, - .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none }, - .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex }, - .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none }, - .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex }, - .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .none }, - .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .rex }, - .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none }, - .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex }, - - .{ .sfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none }, - - .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none }, - .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex }, - .{ .shl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .shl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .shl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long }, - .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none }, - .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex }, - .{ .shl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .shl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .shl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long }, - .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none }, - .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex }, - .{ .shl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .shl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .shl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long }, - - .{ .shld, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none }, - .{ .shld, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xa5 }, 0, .none }, - .{ .shld, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none }, - .{ .shld, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .long }, - .{ .shld, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xa5 }, 0, .none }, - .{ .shld, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xa5 }, 0, .long }, - - .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .none }, - .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .rex }, - .{ .shr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 5, .none }, - .{ .shr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 5, .none }, - .{ .shr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 5, .long }, - .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .none }, - .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .rex }, - .{ .shr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 5, .none }, - .{ .shr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 5, .none }, - .{ .shr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 5, .long }, - .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .none }, - .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .rex }, - .{ .shr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 5, .none }, - .{ .shr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 5, .none }, - .{ .shr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 5, .long }, - - .{ .shrd, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xac }, 0, .none }, - .{ .shrd, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xad }, 0, .none }, - .{ .shrd, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xac }, 0, .none }, - .{ .shrd, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xac }, 0, .long }, - .{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none }, - .{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long }, - - .{ .stos, .np, &.{ .m8 }, &.{ 0xaa }, 0, .none }, - .{ .stos, .np, &.{ .m16 }, &.{ 0xab }, 0, .none }, - .{ .stos, .np, &.{ .m32 }, &.{ 0xab }, 0, .none }, - .{ .stos, .np, &.{ .m64 }, &.{ 0xab }, 0, .long }, - - .{ .stosb, .np, &.{}, &.{ 0xaa }, 0, .none }, - .{ .stosw, .np, &.{}, &.{ 0xab }, 0, .short }, - .{ .stosd, .np, &.{}, &.{ 0xab }, 0, .none }, - .{ .stosq, .np, &.{}, &.{ 0xab }, 0, .long }, - - .{ .sub, .zi, &.{ .al, .imm8 }, &.{ 0x2c }, 0, .none }, - .{ .sub, .zi, &.{ .ax, .imm16 }, &.{ 0x2d }, 0, .none }, - .{ .sub, .zi, &.{ .eax, .imm32 }, &.{ 0x2d }, 0, .none }, - .{ .sub, .zi, &.{ .rax, .imm32s }, &.{ 0x2d }, 0, .long }, - .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .none }, - .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .rex }, - .{ .sub, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 5, .none }, - .{ .sub, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 5, .none }, - .{ .sub, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 5, .long }, - .{ .sub, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 5, .none }, - .{ .sub, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 5, .none }, - .{ .sub, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 5, .long }, - .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .none }, - .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .rex }, - .{ .sub, .mr, &.{ .rm16, .r16 }, &.{ 0x29 }, 0, .none }, - .{ .sub, .mr, &.{ .rm32, .r32 }, &.{ 0x29 }, 0, .none }, - .{ .sub, .mr, &.{ .rm64, .r64 }, &.{ 0x29 }, 0, .long }, - .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .none }, - .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .rex }, - .{ .sub, .rm, &.{ .r16, .rm16 }, &.{ 0x2b }, 0, .none }, - .{ .sub, .rm, &.{ .r32, .rm32 }, &.{ 0x2b }, 0, .none }, - .{ .sub, .rm, &.{ .r64, .rm64 }, &.{ 0x2b }, 0, .long }, - - .{ .syscall, .np, &.{}, &.{ 0x0f, 0x05 }, 0, .none } -, - .{ .@"test", .zi, &.{ .al, .imm8 }, &.{ 0xa8 }, 0, .none }, - .{ .@"test", .zi, &.{ .ax, .imm16 }, &.{ 0xa9 }, 0, .none }, - .{ .@"test", .zi, &.{ .eax, .imm32 }, &.{ 0xa9 }, 0, .none }, - .{ .@"test", .zi, &.{ .rax, .imm32s }, &.{ 0xa9 }, 0, .long }, - .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .none }, - .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .rex }, - .{ .@"test", .mi, &.{ .rm16, .imm16 }, &.{ 0xf7 }, 0, .none }, - .{ .@"test", .mi, &.{ .rm32, .imm32 }, &.{ 0xf7 }, 0, .none }, - .{ .@"test", .mi, &.{ .rm64, .imm32s }, &.{ 0xf7 }, 0, .long }, - .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .none }, - .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .rex }, - .{ .@"test", .mr, &.{ .rm16, .r16 }, &.{ 0x85 }, 0, .none }, - .{ .@"test", .mr, &.{ .rm32, .r32 }, &.{ 0x85 }, 0, .none }, - .{ .@"test", .mr, &.{ .rm64, .r64 }, &.{ 0x85 }, 0, .long }, - - .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none }, - .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none }, - .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long }, - - .{ .ud2, .np, &.{}, &.{ 0x0f, 0x0b }, 0, .none }, - - .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .none }, - .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .rex }, - .{ .xadd, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xc1 }, 0, .none }, - .{ .xadd, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xc1 }, 0, .none }, - .{ .xadd, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xc1 }, 0, .long }, - - .{ .xchg, .o, &.{ .ax, .r16 }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .r16, .ax }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .eax, .r32 }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .rax, .r64 }, &.{ 0x90 }, 0, .long }, - .{ .xchg, .o, &.{ .r32, .eax }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .r64, .rax }, &.{ 0x90 }, 0, .long }, - .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .none }, - .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .rex }, - .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .none }, - .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .rex }, - .{ .xchg, .mr, &.{ .rm16, .r16 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .rm, &.{ .r16, .rm16 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .mr, &.{ .rm32, .r32 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .mr, &.{ .rm64, .r64 }, &.{ 0x87 }, 0, .long }, - .{ .xchg, .rm, &.{ .r32, .rm32 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .rm, &.{ .r64, .rm64 }, &.{ 0x87 }, 0, .long }, - - .{ .xor, .zi, &.{ .al, .imm8 }, &.{ 0x34 }, 0, .none }, - .{ .xor, .zi, &.{ .ax, .imm16 }, &.{ 0x35 }, 0, .none }, - .{ .xor, .zi, &.{ .eax, .imm32 }, &.{ 0x35 }, 0, .none }, - .{ .xor, .zi, &.{ .rax, .imm32s }, &.{ 0x35 }, 0, .long }, - .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .none }, - .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .rex }, - .{ .xor, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 6, .none }, - .{ .xor, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 6, .none }, - .{ .xor, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 6, .long }, - .{ .xor, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 6, .none }, - .{ .xor, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 6, .none }, - .{ .xor, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 6, .long }, - .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .none }, - .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .rex }, - .{ .xor, .mr, &.{ .rm16, .r16 }, &.{ 0x31 }, 0, .none }, - .{ .xor, .mr, &.{ .rm32, .r32 }, &.{ 0x31 }, 0, .none }, - .{ .xor, .mr, &.{ .rm64, .r64 }, &.{ 0x31 }, 0, .long }, - .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .none }, - .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .rex }, - .{ .xor, .rm, &.{ .r16, .rm16 }, &.{ 0x33 }, 0, .none }, - .{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none }, - .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long }, + .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none, .none }, + .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long, .none }, + + .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .short, .none }, + .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .rex_short, .none }, + .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none, .none }, + .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .rex, .none }, + .{ .movzx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .long, .none }, + .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .none, .none }, + .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .rex, .none }, + .{ .movzx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .long, .none }, + + .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .none, .none }, + .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .rex, .none }, + .{ .mul, .m, &.{ .rm16 }, &.{ 0xf7 }, 4, .short, .none }, + .{ .mul, .m, &.{ .rm32 }, &.{ 0xf7 }, 4, .none, .none }, + .{ .mul, .m, &.{ .rm64 }, &.{ 0xf7 }, 4, .long, .none }, + + .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .none, .none }, + .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .rex, .none }, + .{ .neg, .m, &.{ .rm16 }, &.{ 0xf7 }, 3, .short, .none }, + .{ .neg, .m, &.{ .rm32 }, &.{ 0xf7 }, 3, .none, .none }, + .{ .neg, .m, &.{ .rm64 }, &.{ 0xf7 }, 3, .long, .none }, + + .{ .nop, .np, &.{}, &.{ 0x90 }, 0, .none, .none }, + + .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .none, .none }, + .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .rex, .none }, + .{ .not, .m, &.{ .rm16 }, &.{ 0xf7 }, 2, .short, .none }, + .{ .not, .m, &.{ .rm32 }, &.{ 0xf7 }, 2, .none, .none }, + .{ .not, .m, &.{ .rm64 }, &.{ 0xf7 }, 2, .long, .none }, + + .{ .@"or", .zi, &.{ .al, .imm8 }, &.{ 0x0c }, 0, .none, .none }, + .{ .@"or", .zi, &.{ .ax, .imm16 }, &.{ 0x0d }, 0, .short, .none }, + .{ .@"or", .zi, &.{ .eax, .imm32 }, &.{ 0x0d }, 0, .none, .none }, + .{ .@"or", .zi, &.{ .rax, .imm32s }, &.{ 0x0d }, 0, .long, .none }, + .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .rex, .none }, + .{ .@"or", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 1, .short, .none }, + .{ .@"or", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 1, .long, .none }, + .{ .@"or", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 1, .short, .none }, + .{ .@"or", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 1, .long, .none }, + .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .none, .none }, + .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .rex, .none }, + .{ .@"or", .mr, &.{ .rm16, .r16 }, &.{ 0x09 }, 0, .short, .none }, + .{ .@"or", .mr, &.{ .rm32, .r32 }, &.{ 0x09 }, 0, .none, .none }, + .{ .@"or", .mr, &.{ .rm64, .r64 }, &.{ 0x09 }, 0, .long, .none }, + .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .none, .none }, + .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .rex, .none }, + .{ .@"or", .rm, &.{ .r16, .rm16 }, &.{ 0x0b }, 0, .short, .none }, + .{ .@"or", .rm, &.{ .r32, .rm32 }, &.{ 0x0b }, 0, .none, .none }, + .{ .@"or", .rm, &.{ .r64, .rm64 }, &.{ 0x0b }, 0, .long, .none }, + + .{ .pop, .o, &.{ .r16 }, &.{ 0x58 }, 0, .short, .none }, + .{ .pop, .o, &.{ .r64 }, &.{ 0x58 }, 0, .none, .none }, + .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none }, + .{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none }, + + .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .none }, + .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .none }, + .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .none }, + + .{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .short, .none }, + .{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none, .none }, + .{ .push, .m, &.{ .rm16 }, &.{ 0xff }, 6, .short, .none }, + .{ .push, .m, &.{ .rm64 }, &.{ 0xff }, 6, .none, .none }, + .{ .push, .i, &.{ .imm8 }, &.{ 0x6a }, 0, .none, .none }, + .{ .push, .i, &.{ .imm16 }, &.{ 0x68 }, 0, .short, .none }, + .{ .push, .i, &.{ .imm32 }, &.{ 0x68 }, 0, .none, .none }, + + .{ .ret, .np, &.{}, &.{ 0xc3 }, 0, .none, .none }, + + .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .none, .none }, + .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .rex, .none }, + .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .none, .none }, + .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .rex, .none }, + .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .none, .none }, + .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .rex, .none }, + .{ .rcl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 2, .short, .none }, + .{ .rcl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 2, .short, .none }, + .{ .rcl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 2, .short, .none }, + .{ .rcl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 2, .none, .none }, + .{ .rcl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 2, .long, .none }, + .{ .rcl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 2, .none, .none }, + .{ .rcl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 2, .long, .none }, + .{ .rcl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 2, .none, .none }, + .{ .rcl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 2, .long, .none }, + + .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .none, .none }, + .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .rex, .none }, + .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .none, .none }, + .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .rex, .none }, + .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .none, .none }, + .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .rex, .none }, + .{ .rcr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 3, .short, .none }, + .{ .rcr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 3, .short, .none }, + .{ .rcr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 3, .short, .none }, + .{ .rcr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 3, .none, .none }, + .{ .rcr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 3, .long, .none }, + .{ .rcr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 3, .none, .none }, + .{ .rcr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 3, .long, .none }, + .{ .rcr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 3, .none, .none }, + .{ .rcr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 3, .long, .none }, + + .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .none, .none }, + .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .rex, .none }, + .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .none, .none }, + .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .rex, .none }, + .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .none, .none }, + .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .rex, .none }, + .{ .rol, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 0, .short, .none }, + .{ .rol, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 0, .short, .none }, + .{ .rol, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 0, .short, .none }, + .{ .rol, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 0, .none, .none }, + .{ .rol, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 0, .long, .none }, + .{ .rol, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 0, .none, .none }, + .{ .rol, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 0, .long, .none }, + .{ .rol, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 0, .none, .none }, + .{ .rol, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 0, .long, .none }, + + .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .none, .none }, + .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .rex, .none }, + .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .none, .none }, + .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .rex, .none }, + .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .none, .none }, + .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .rex, .none }, + .{ .ror, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 1, .short, .none }, + .{ .ror, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 1, .short, .none }, + .{ .ror, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 1, .short, .none }, + .{ .ror, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 1, .none, .none }, + .{ .ror, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 1, .long, .none }, + .{ .ror, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 1, .none, .none }, + .{ .ror, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 1, .long, .none }, + .{ .ror, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 1, .none, .none }, + .{ .ror, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 1, .long, .none }, + + .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, + .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, + .{ .sal, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .short, .none }, + .{ .sal, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none, .none }, + .{ .sal, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long, .none }, + .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none, .none }, + .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex, .none }, + .{ .sal, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .short, .none }, + .{ .sal, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none, .none }, + .{ .sal, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long, .none }, + .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none, .none }, + .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex, .none }, + .{ .sal, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .short, .none }, + .{ .sal, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, + .{ .sal, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long, .none }, + + .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .none, .none }, + .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .rex, .none }, + .{ .sar, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 7, .short, .none }, + .{ .sar, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 7, .none, .none }, + .{ .sar, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 7, .long, .none }, + .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .none, .none }, + .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .rex, .none }, + .{ .sar, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 7, .short, .none }, + .{ .sar, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 7, .none, .none }, + .{ .sar, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 7, .long, .none }, + .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .none, .none }, + .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .rex, .none }, + .{ .sar, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 7, .short, .none }, + .{ .sar, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 7, .none, .none }, + .{ .sar, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 7, .long, .none }, + + .{ .sbb, .zi, &.{ .al, .imm8 }, &.{ 0x1c }, 0, .none, .none }, + .{ .sbb, .zi, &.{ .ax, .imm16 }, &.{ 0x1d }, 0, .short, .none }, + .{ .sbb, .zi, &.{ .eax, .imm32 }, &.{ 0x1d }, 0, .none, .none }, + .{ .sbb, .zi, &.{ .rax, .imm32s }, &.{ 0x1d }, 0, .long, .none }, + .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .rex, .none }, + .{ .sbb, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 3, .short, .none }, + .{ .sbb, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 3, .long, .none }, + .{ .sbb, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 3, .short, .none }, + .{ .sbb, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 3, .long, .none }, + .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .none, .none }, + .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .rex, .none }, + .{ .sbb, .mr, &.{ .rm16, .r16 }, &.{ 0x19 }, 0, .short, .none }, + .{ .sbb, .mr, &.{ .rm32, .r32 }, &.{ 0x19 }, 0, .none, .none }, + .{ .sbb, .mr, &.{ .rm64, .r64 }, &.{ 0x19 }, 0, .long, .none }, + .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .none, .none }, + .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .rex, .none }, + .{ .sbb, .rm, &.{ .r16, .rm16 }, &.{ 0x1b }, 0, .short, .none }, + .{ .sbb, .rm, &.{ .r32, .rm32 }, &.{ 0x1b }, 0, .none, .none }, + .{ .sbb, .rm, &.{ .r64, .rm64 }, &.{ 0x1b }, 0, .long, .none }, + + .{ .scas, .np, &.{ .m8 }, &.{ 0xae }, 0, .none, .none }, + .{ .scas, .np, &.{ .m16 }, &.{ 0xaf }, 0, .short, .none }, + .{ .scas, .np, &.{ .m32 }, &.{ 0xaf }, 0, .none, .none }, + .{ .scas, .np, &.{ .m64 }, &.{ 0xaf }, 0, .long, .none }, + + .{ .scasb, .np, &.{}, &.{ 0xae }, 0, .none, .none }, + .{ .scasw, .np, &.{}, &.{ 0xaf }, 0, .short, .none }, + .{ .scasd, .np, &.{}, &.{ 0xaf }, 0, .none, .none }, + .{ .scasq, .np, &.{}, &.{ 0xaf }, 0, .long, .none }, + + .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none, .none }, + .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex, .none }, + .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none, .none }, + .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex, .none }, + .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none, .none }, + .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex, .none }, + .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none, .none }, + .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex, .none }, + .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none, .none }, + .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex, .none }, + .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none, .none }, + .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex, .none }, + .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none, .none }, + .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex, .none }, + .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none, .none }, + .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex, .none }, + .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none, .none }, + .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex, .none }, + .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none, .none }, + .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex, .none }, + .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none, .none }, + .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex, .none }, + .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none, .none }, + .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex, .none }, + .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none, .none }, + .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex, .none }, + .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none, .none }, + .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex, .none }, + .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none, .none }, + .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex, .none }, + .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none, .none }, + .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex, .none }, + .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none, .none }, + .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex, .none }, + .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none, .none }, + .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex, .none }, + .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none, .none }, + .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex, .none }, + .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none, .none }, + .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex, .none }, + .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .none, .none }, + .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .rex, .none }, + .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none, .none }, + .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex, .none }, + .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .none, .none }, + .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .rex, .none }, + .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none, .none }, + .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex, .none }, + .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .none, .none }, + .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .rex, .none }, + .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none, .none }, + .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex, .none }, + .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none, .none }, + .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex, .none }, + .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none, .none }, + .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex, .none }, + .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .none, .none }, + .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .rex, .none }, + .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none, .none }, + .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex, .none }, + + .{ .sfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none, .none }, + + .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, + .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, + .{ .shl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .short, .none }, + .{ .shl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none, .none }, + .{ .shl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long, .none }, + .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none, .none }, + .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex, .none }, + .{ .shl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .short, .none }, + .{ .shl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none, .none }, + .{ .shl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long, .none }, + .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none, .none }, + .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex, .none }, + .{ .shl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .short, .none }, + .{ .shl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, + .{ .shl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long, .none }, + + .{ .shld, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .short, .none }, + .{ .shld, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xa5 }, 0, .short, .none }, + .{ .shld, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none, .none }, + .{ .shld, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .long, .none }, + .{ .shld, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xa5 }, 0, .none, .none }, + .{ .shld, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xa5 }, 0, .long, .none }, + + .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .none, .none }, + .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .rex, .none }, + .{ .shr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 5, .short, .none }, + .{ .shr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 5, .none, .none }, + .{ .shr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 5, .long, .none }, + .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .none, .none }, + .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .rex, .none }, + .{ .shr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 5, .short, .none }, + .{ .shr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 5, .none, .none }, + .{ .shr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 5, .long, .none }, + .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .none, .none }, + .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .rex, .none }, + .{ .shr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 5, .short, .none }, + .{ .shr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 5, .none, .none }, + .{ .shr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 5, .long, .none }, + + .{ .shrd, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xac }, 0, .short, .none }, + .{ .shrd, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xad }, 0, .short, .none }, + .{ .shrd, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xac }, 0, .none, .none }, + .{ .shrd, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xac }, 0, .long, .none }, + .{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none, .none }, + .{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long, .none }, + + .{ .stos, .np, &.{ .m8 }, &.{ 0xaa }, 0, .none, .none }, + .{ .stos, .np, &.{ .m16 }, &.{ 0xab }, 0, .short, .none }, + .{ .stos, .np, &.{ .m32 }, &.{ 0xab }, 0, .none, .none }, + .{ .stos, .np, &.{ .m64 }, &.{ 0xab }, 0, .long, .none }, + + .{ .stosb, .np, &.{}, &.{ 0xaa }, 0, .none, .none }, + .{ .stosw, .np, &.{}, &.{ 0xab }, 0, .short, .none }, + .{ .stosd, .np, &.{}, &.{ 0xab }, 0, .none, .none }, + .{ .stosq, .np, &.{}, &.{ 0xab }, 0, .long, .none }, + + .{ .sub, .zi, &.{ .al, .imm8 }, &.{ 0x2c }, 0, .none, .none }, + .{ .sub, .zi, &.{ .ax, .imm16 }, &.{ 0x2d }, 0, .short, .none }, + .{ .sub, .zi, &.{ .eax, .imm32 }, &.{ 0x2d }, 0, .none, .none }, + .{ .sub, .zi, &.{ .rax, .imm32s }, &.{ 0x2d }, 0, .long, .none }, + .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .rex, .none }, + .{ .sub, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 5, .short, .none }, + .{ .sub, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 5, .long, .none }, + .{ .sub, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 5, .short, .none }, + .{ .sub, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 5, .long, .none }, + .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .none, .none }, + .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .rex, .none }, + .{ .sub, .mr, &.{ .rm16, .r16 }, &.{ 0x29 }, 0, .short, .none }, + .{ .sub, .mr, &.{ .rm32, .r32 }, &.{ 0x29 }, 0, .none, .none }, + .{ .sub, .mr, &.{ .rm64, .r64 }, &.{ 0x29 }, 0, .long, .none }, + .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .none, .none }, + .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .rex, .none }, + .{ .sub, .rm, &.{ .r16, .rm16 }, &.{ 0x2b }, 0, .short, .none }, + .{ .sub, .rm, &.{ .r32, .rm32 }, &.{ 0x2b }, 0, .none, .none }, + .{ .sub, .rm, &.{ .r64, .rm64 }, &.{ 0x2b }, 0, .long, .none }, + + .{ .syscall, .np, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .none }, + + .{ .@"test", .zi, &.{ .al, .imm8 }, &.{ 0xa8 }, 0, .none, .none }, + .{ .@"test", .zi, &.{ .ax, .imm16 }, &.{ 0xa9 }, 0, .short, .none }, + .{ .@"test", .zi, &.{ .eax, .imm32 }, &.{ 0xa9 }, 0, .none, .none }, + .{ .@"test", .zi, &.{ .rax, .imm32s }, &.{ 0xa9 }, 0, .long, .none }, + .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .none, .none }, + .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .rex, .none }, + .{ .@"test", .mi, &.{ .rm16, .imm16 }, &.{ 0xf7 }, 0, .short, .none }, + .{ .@"test", .mi, &.{ .rm32, .imm32 }, &.{ 0xf7 }, 0, .none, .none }, + .{ .@"test", .mi, &.{ .rm64, .imm32s }, &.{ 0xf7 }, 0, .long, .none }, + .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .none, .none }, + .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .rex, .none }, + .{ .@"test", .mr, &.{ .rm16, .r16 }, &.{ 0x85 }, 0, .short, .none }, + .{ .@"test", .mr, &.{ .rm32, .r32 }, &.{ 0x85 }, 0, .none, .none }, + .{ .@"test", .mr, &.{ .rm64, .r64 }, &.{ 0x85 }, 0, .long, .none }, + + .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .none }, + .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .none }, + .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .none }, + + .{ .ud2, .np, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none }, + + .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .none, .none }, + .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .rex, .none }, + .{ .xadd, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xc1 }, 0, .short, .none }, + .{ .xadd, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xc1 }, 0, .none, .none }, + .{ .xadd, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xc1 }, 0, .long, .none }, + + .{ .xchg, .o, &.{ .ax, .r16 }, &.{ 0x90 }, 0, .short, .none }, + .{ .xchg, .o, &.{ .r16, .ax }, &.{ 0x90 }, 0, .short, .none }, + .{ .xchg, .o, &.{ .eax, .r32 }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .o, &.{ .rax, .r64 }, &.{ 0x90 }, 0, .long, .none }, + .{ .xchg, .o, &.{ .r32, .eax }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .o, &.{ .r64, .rax }, &.{ 0x90 }, 0, .long, .none }, + .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .none, .none }, + .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .rex, .none }, + .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .none, .none }, + .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .rex, .none }, + .{ .xchg, .mr, &.{ .rm16, .r16 }, &.{ 0x87 }, 0, .short, .none }, + .{ .xchg, .rm, &.{ .r16, .rm16 }, &.{ 0x87 }, 0, .short, .none }, + .{ .xchg, .mr, &.{ .rm32, .r32 }, &.{ 0x87 }, 0, .none, .none }, + .{ .xchg, .mr, &.{ .rm64, .r64 }, &.{ 0x87 }, 0, .long, .none }, + .{ .xchg, .rm, &.{ .r32, .rm32 }, &.{ 0x87 }, 0, .none, .none }, + .{ .xchg, .rm, &.{ .r64, .rm64 }, &.{ 0x87 }, 0, .long, .none }, + + .{ .xor, .zi, &.{ .al, .imm8 }, &.{ 0x34 }, 0, .none, .none }, + .{ .xor, .zi, &.{ .ax, .imm16 }, &.{ 0x35 }, 0, .short, .none }, + .{ .xor, .zi, &.{ .eax, .imm32 }, &.{ 0x35 }, 0, .none, .none }, + .{ .xor, .zi, &.{ .rax, .imm32s }, &.{ 0x35 }, 0, .long, .none }, + .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .rex, .none }, + .{ .xor, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 6, .short, .none }, + .{ .xor, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 6, .long, .none }, + .{ .xor, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 6, .short, .none }, + .{ .xor, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 6, .long, .none }, + .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .none, .none }, + .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .rex, .none }, + .{ .xor, .mr, &.{ .rm16, .r16 }, &.{ 0x31 }, 0, .short, .none }, + .{ .xor, .mr, &.{ .rm32, .r32 }, &.{ 0x31 }, 0, .none, .none }, + .{ .xor, .mr, &.{ .rm64, .r64 }, &.{ 0x31 }, 0, .long, .none }, + .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .none, .none }, + .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .rex, .none }, + .{ .xor, .rm, &.{ .r16, .rm16 }, &.{ 0x33 }, 0, .short, .none }, + .{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none, .none }, + .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none }, // SSE - .{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .sse }, + .{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse }, - .{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .sse }, + .{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .none, .sse }, - .{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .sse }, + .{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .none, .sse }, - .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .sse }, + .{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .none, .sse }, - .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse }, - .{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse_long }, + .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse }, - .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .sse }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .none, .sse }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .long, .sse }, - .{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .sse }, + .{ .divps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .none, .sse }, - .{ .minss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .sse }, + .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse }, - .{ .movaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .sse }, - .{ .movaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .sse }, + .{ .maxps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .none, .sse }, - .{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .sse }, - .{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .sse }, + .{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .none, .sse }, - .{ .movups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .sse }, - .{ .movups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .sse }, + .{ .minps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5d }, 0, .none, .sse }, - .{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .sse }, + .{ .minss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .none, .sse }, - .{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .sse }, + .{ .movaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .none, .sse }, + .{ .movaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .none, .sse }, - .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .sse }, + .{ .movhlps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .none, .sse }, - .{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .sse }, - .{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .sse }, + .{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .none, .sse }, + .{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .none, .sse }, - .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .sse }, + .{ .movups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .none, .sse }, + .{ .movups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .none, .sse }, - .{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .sse }, + .{ .mulps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x59 }, 0, .none, .sse }, + + .{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .none, .sse }, + + .{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse }, + + .{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse }, + + .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse }, + + .{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse }, + + .{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .none, .sse }, + + .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .none, .sse }, + + .{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .none, .sse }, // SSE2 - .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .sse2 }, + .{ .addpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .none, .sse2 }, + + .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .none, .sse2 }, + + .{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .none, .sse2 }, + + .{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .none, .sse2 }, + + .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 }, + + .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .none, .sse2 }, + + .{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .none, .sse2 }, + .{ .cvtsi2sd, .rm, &.{ .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .long, .sse2 }, + + .{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .none, .sse2 }, + + .{ .divpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .none, .sse2 }, + + .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 }, + + .{ .maxpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .none, .sse2 }, + + .{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .none, .sse2 }, + + .{ .minpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .none, .sse2 }, + + .{ .minsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .none, .sse2 }, + + .{ .movapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .none, .sse2 }, + .{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .none, .sse2 }, + + .{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .none, .sse2 }, + .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 }, - .{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .sse2 }, + .{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .long, .sse2 }, + .{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .long, .sse2 }, - .{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .sse2 }, + .{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .none, .sse2 }, + .{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .none, .sse2 }, - .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .sse2 }, + .{ .movupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .none, .sse2 }, + .{ .movupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .none, .sse2 }, - .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .sse2 }, + .{ .mulpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x59 }, 0, .none, .sse2 }, - .{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .sse2 }, - .{ .cvtsi2sd, .rm, &.{ .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .sse2_long }, + .{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .none, .sse2 }, - .{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .sse2 }, + .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 }, - .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .sse2 }, + .{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 }, - .{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .sse2 }, + .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, - .{ .minsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .sse2 }, + .{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 }, - .{ .movapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .sse2 }, - .{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .sse2 }, + .{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 }, - .{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .sse2 }, - .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .sse2 }, + .{ .psrlw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .none, .sse2 }, + .{ .psrlw, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .none, .sse2 }, + .{ .psrld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .none, .sse2 }, + .{ .psrld, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .none, .sse2 }, + .{ .psrlq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .none, .sse2 }, + .{ .psrlq, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .none, .sse2 }, - .{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .sse2_long }, - .{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .sse2_long }, + .{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 }, + .{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 }, + .{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 }, + .{ .punpckhqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .none, .sse2 }, - .{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .sse2 }, - .{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .sse2 }, + .{ .punpcklbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .none, .sse2 }, + .{ .punpcklwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .none, .sse2 }, + .{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 }, + .{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 }, - .{ .movupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .sse2 }, - .{ .movupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .sse2 }, + .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 }, - .{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .sse2 }, + .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 }, - .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .sse2 }, + .{ .subpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5c }, 0, .none, .sse2 }, - .{ .pextrw, .mri, &.{ .r16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .sse2 }, + .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .none, .sse2 }, - .{ .pinsrw, .rmi, &.{ .xmm, .rm16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .sse2 }, + .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .none, .sse2 }, + .{ .movsd, .mr, &.{ .xmm_m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .none, .sse2 }, - .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .sse2 }, - .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .sse2 }, + .{ .ucomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2e }, 0, .none, .sse2 }, - .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .sse2 }, + .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .none, .sse2 }, - .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .sse2 }, - .{ .movsd, .mr, &.{ .xmm_m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .sse2 }, + // SSE3 + .{ .movddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .none, .sse3 }, - .{ .ucomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2e }, 0, .sse2 }, + .{ .movshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .none, .sse3 }, - .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .sse2 }, + .{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 }, // SSE4.1 - .{ .pextrw, .mri, &.{ .rm16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .sse4_1 }, + .{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 }, + .{ .pextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .none, .sse4_1 }, + .{ .pextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .long, .sse4_1 }, + + .{ .pextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 }, + + .{ .pinsrb, .rmi, &.{ .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .none, .sse4_1 }, + .{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 }, + .{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 }, + + .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 }, + + .{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 }, + + .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 }, + + .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 }, + + // AVX + .{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx }, + .{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx }, + + .{ .vaddps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .vex_128_wig, .avx }, + .{ .vaddps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x58 }, 0, .vex_256_wig, .avx }, + + .{ .vaddsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx }, + + .{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx }, + + .{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, + + .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, + .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, + + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, + + .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, + + .{ .vdivpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_128_wig, .avx }, + .{ .vdivpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_256_wig, .avx }, + + .{ .vdivps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .vex_128_wig, .avx }, + .{ .vdivps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5e }, 0, .vex_256_wig, .avx }, + + .{ .vdivsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx }, + + .{ .vdivss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx }, + + .{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx }, + .{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx }, + + .{ .vmaxps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .vex_128_wig, .avx }, + .{ .vmaxps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5f }, 0, .vex_256_wig, .avx }, + + .{ .vmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx }, + + .{ .vmaxss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx }, + + .{ .vminpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_128_wig, .avx }, + .{ .vminpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_256_wig, .avx }, + + .{ .vminps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5d }, 0, .vex_128_wig, .avx }, + .{ .vminps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5d }, 0, .vex_256_wig, .avx }, + + .{ .vminsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .vex_lig_wig, .avx }, + + .{ .vminss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .vex_lig_wig, .avx }, + + .{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128_wig, .avx }, + .{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128_wig, .avx }, + .{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256_wig, .avx }, + .{ .vmovapd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_256_wig, .avx }, + + .{ .vmovaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .vex_128_wig, .avx }, + .{ .vmovaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .vex_128_wig, .avx }, + .{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256_wig, .avx }, + .{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256_wig, .avx }, + + .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx }, + + .{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + + .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .mr, &.{ .m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + + .{ .vmovshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovshdup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_256_wig, .avx }, + + .{ .vmovsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovsldup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_256_wig, .avx }, + + .{ .vmovss, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .rm, &.{ .xmm, .m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .mr, &.{ .m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + + .{ .vmovupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_128_wig, .avx }, + .{ .vmovupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_128_wig, .avx }, + .{ .vmovupd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_256_wig, .avx }, + .{ .vmovupd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_256_wig, .avx }, + + .{ .vmovups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .vex_128_wig, .avx }, + .{ .vmovups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .vex_128_wig, .avx }, + .{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256_wig, .avx }, + .{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256_wig, .avx }, + + .{ .vmulpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x59 }, 0, .vex_128_wig, .avx }, + .{ .vmulpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x59 }, 0, .vex_256_wig, .avx }, + + .{ .vmulps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x59 }, 0, .vex_128_wig, .avx }, + .{ .vmulps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x59 }, 0, .vex_256_wig, .avx }, + + .{ .vmulsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx }, + + .{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx }, + + .{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx }, + .{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx }, + .{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx }, + + .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx }, + .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx }, + + .{ .vpinsrb, .rmi, &.{ .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx }, + .{ .vpinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx }, + .{ .vpinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx }, + + .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, + + .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx }, + .{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128_wig, .avx }, + .{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128_wig, .avx }, + .{ .vpsrld, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_128_wig, .avx }, + .{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128_wig, .avx }, + .{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128_wig, .avx }, + + .{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_128_wig, .avx }, + + .{ .vpunpcklbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_128_wig, .avx }, + .{ .vpunpcklwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx }, + .{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx }, + + .{ .vroundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_128_wig, .avx }, + .{ .vroundpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_256_wig, .avx }, + + .{ .vroundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .vex_128_wig, .avx }, + .{ .vroundps, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .vex_256_wig, .avx }, + + .{ .vroundsd, .rvmi, &.{ .xmm, .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .vex_lig_wig, .avx }, + + .{ .vroundss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .vex_lig_wig, .avx }, + + .{ .vsqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_128_wig, .avx }, + .{ .vsqrtpd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_256_wig, .avx }, + + .{ .vsqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .vex_128_wig, .avx }, + .{ .vsqrtps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x51 }, 0, .vex_256_wig, .avx }, + + .{ .vsqrtsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .vex_lig_wig, .avx }, + + .{ .vsqrtss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .vex_lig_wig, .avx }, + + .{ .vsubpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_128_wig, .avx }, + .{ .vsubpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_256_wig, .avx }, + + .{ .vsubps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .vex_128_wig, .avx }, + .{ .vsubps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5c }, 0, .vex_256_wig, .avx }, + + .{ .vsubsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx }, + + .{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx }, + + // F16C + .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c }, + .{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c }, + + .{ .vcvtps2ph, .mri, &.{ .xmm_m64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_128_w0, .f16c }, + .{ .vcvtps2ph, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_256_w0, .f16c }, + + // FMA + .{ .vfmadd132pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd213pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd231pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd132pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_w1, .fma }, + .{ .vfmadd213pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_w1, .fma }, + .{ .vfmadd231pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_w1, .fma }, + + .{ .vfmadd132ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd213ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd231ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd132ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_w0, .fma }, + .{ .vfmadd213ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_w0, .fma }, + .{ .vfmadd231ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_w0, .fma }, + + .{ .vfmadd132sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_lig_w1, .fma }, + .{ .vfmadd213sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_lig_w1, .fma }, + .{ .vfmadd231sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w1, .fma }, + + .{ .vfmadd132ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_lig_w0, .fma }, + .{ .vfmadd213ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_lig_w0, .fma }, + .{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma }, + + // AVX2 + .{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 }, + .{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrld, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_256_wig, .avx2 }, + .{ .vpsrlq, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrlq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_256_wig, .avx2 }, + + .{ .vpunpckhbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_256_wig, .avx2 }, - .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .sse4_1 }, - .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .sse4_1 }, + .{ .vpunpcklbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpcklwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckldq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 }, }; // zig fmt: on diff --git a/src/autodoc/render_source.zig b/src/autodoc/render_source.zig index 4777ba399b..439b43dce3 100644 --- a/src/autodoc/render_source.zig +++ b/src/autodoc/render_source.zig @@ -25,7 +25,7 @@ pub fn genHtml( ); try out.print(" <title>{s} - source view</title>\n", .{src.sub_file_path}); try out.writeAll( - \\ <link rel="icon" href=""/> + \\ <link rel="icon" href=""/> \\ <style> \\ body{ \\ font-family: system-ui, -apple-system, Roboto, "Segoe UI", sans-serif; diff --git a/src/codegen/c.zig b/src/codegen/c.zig index f69cec960e..7da99de5c1 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -159,7 +159,7 @@ const reserved_idents = std.ComptimeStringMap(void, .{ .{ "double", {} }, .{ "else", {} }, .{ "enum", {} }, - .{ "extern ", {} }, + .{ "extern", {} }, .{ "float", {} }, .{ "for", {} }, .{ "fortran", {} }, @@ -198,7 +198,7 @@ const reserved_idents = std.ComptimeStringMap(void, .{ .{ "unsigned", {} }, .{ "void", {} }, .{ "volatile", {} }, - .{ "while ", {} }, + .{ "while", {} }, // stdarg.h .{ "va_start", {} }, diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 7a008ca732..2ec0dedc6f 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -526,7 +526,7 @@ pub const DeclState = struct { .ErrorUnion => { const error_ty = ty.errorUnionSet(); const payload_ty = ty.errorUnionPayload(); - const payload_align = payload_ty.abiAlignment(target); + const payload_align = if (payload_ty.isNoReturn()) 0 else payload_ty.abiAlignment(target); const error_align = Type.anyerror.abiAlignment(target); const abi_size = ty.abiSize(target); const payload_off = if (error_align >= payload_align) Type.anyerror.abiSize(target) else 0; @@ -540,31 +540,35 @@ pub const DeclState = struct { const name = try ty.nameAllocArena(arena, module); try dbg_info_buffer.writer().print("{s}\x00", .{name}); - // DW.AT.member - try dbg_info_buffer.ensureUnusedCapacity(7); - dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.struct_member)); - // DW.AT.name, DW.FORM.string - dbg_info_buffer.appendSliceAssumeCapacity("value"); - dbg_info_buffer.appendAssumeCapacity(0); - // DW.AT.type, DW.FORM.ref4 - var index = dbg_info_buffer.items.len; - try dbg_info_buffer.resize(index + 4); - try self.addTypeRelocGlobal(atom_index, payload_ty, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata - try leb128.writeULEB128(dbg_info_buffer.writer(), payload_off); + if (!payload_ty.isNoReturn()) { + // DW.AT.member + try dbg_info_buffer.ensureUnusedCapacity(7); + dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.struct_member)); + // DW.AT.name, DW.FORM.string + dbg_info_buffer.appendSliceAssumeCapacity("value"); + dbg_info_buffer.appendAssumeCapacity(0); + // DW.AT.type, DW.FORM.ref4 + const index = dbg_info_buffer.items.len; + try dbg_info_buffer.resize(index + 4); + try self.addTypeRelocGlobal(atom_index, payload_ty, @intCast(u32, index)); + // DW.AT.data_member_location, DW.FORM.sdata + try leb128.writeULEB128(dbg_info_buffer.writer(), payload_off); + } - // DW.AT.member - try dbg_info_buffer.ensureUnusedCapacity(5); - dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.struct_member)); - // DW.AT.name, DW.FORM.string - dbg_info_buffer.appendSliceAssumeCapacity("err"); - dbg_info_buffer.appendAssumeCapacity(0); - // DW.AT.type, DW.FORM.ref4 - index = dbg_info_buffer.items.len; - try dbg_info_buffer.resize(index + 4); - try self.addTypeRelocGlobal(atom_index, error_ty, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata - try leb128.writeULEB128(dbg_info_buffer.writer(), error_off); + { + // DW.AT.member + try dbg_info_buffer.ensureUnusedCapacity(5); + dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.struct_member)); + // DW.AT.name, DW.FORM.string + dbg_info_buffer.appendSliceAssumeCapacity("err"); + dbg_info_buffer.appendAssumeCapacity(0); + // DW.AT.type, DW.FORM.ref4 + const index = dbg_info_buffer.items.len; + try dbg_info_buffer.resize(index + 4); + try self.addTypeRelocGlobal(atom_index, error_ty, @intCast(u32, index)); + // DW.AT.data_member_location, DW.FORM.sdata + try leb128.writeULEB128(dbg_info_buffer.writer(), error_off); + } // DW.AT.structure_type delimit children try dbg_info_buffer.append(0); diff --git a/src/type.zig b/src/type.zig index c9a6f49d3e..6122afda62 100644 --- a/src/type.zig +++ b/src/type.zig @@ -3596,12 +3596,12 @@ pub const Type = extern union { fn intAbiSize(bits: u16, target: Target) u64 { const alignment = intAbiAlignment(bits, target); - return std.mem.alignForwardGeneric(u64, (bits + 7) / 8, alignment); + return std.mem.alignForwardGeneric(u64, @intCast(u16, (@as(u17, bits) + 7) / 8), alignment); } fn intAbiAlignment(bits: u16, target: Target) u32 { return @min( - std.math.ceilPowerOfTwoPromote(u16, (bits + 7) / 8), + std.math.ceilPowerOfTwoPromote(u16, @intCast(u16, (@as(u17, bits) + 7) / 8)), target.maxIntAlignment(), ); } diff --git a/test/behavior.zig b/test/behavior.zig index 70293bf45d..24652b6612 100644 --- a/test/behavior.zig +++ b/test/behavior.zig @@ -150,6 +150,7 @@ test { _ = @import("behavior/comptime_memory.zig"); _ = @import("behavior/const_slice_child.zig"); _ = @import("behavior/decltest.zig"); + _ = @import("behavior/duplicated_test_names.zig"); _ = @import("behavior/defer.zig"); _ = @import("behavior/empty_tuple_fields.zig"); _ = @import("behavior/empty_union.zig"); diff --git a/test/behavior/basic.zig b/test/behavior/basic.zig index 6fdd309371..073be26288 100644 --- a/test/behavior/basic.zig +++ b/test/behavior/basic.zig @@ -203,7 +203,7 @@ test "multiline string comments at multiple places" { try expect(mem.eql(u8, s1, s2)); } -test "string concatenation" { +test "string concatenation simple" { try expect(mem.eql(u8, "OK" ++ " IT " ++ "WORKED", "OK IT WORKED")); } @@ -1124,3 +1124,24 @@ test "runtime-known globals initialized with undefined" { try expect(S.s[0] == 1); try expect(S.s[4] == 5); } + +test "arrays and vectors with big integers" { + if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; + + // TODO: only aarch64-windows didn't pass in the PR that added this code. + // figure out why if you can run this target. + if (builtin.os.tag == .windows and builtin.cpu.arch == .aarch64) return error.SkipZigTest; + + inline for (.{ u65528, u65529, u65535 }) |Int| { + var a: [1]Int = undefined; + a[0] = std.math.maxInt(Int); + try expect(a[0] == comptime std.math.maxInt(Int)); + var b: @Vector(1, Int) = undefined; + b[0] = std.math.maxInt(Int); + try expect(b[0] == comptime std.math.maxInt(Int)); + } +} diff --git a/test/behavior/bugs/12891.zig b/test/behavior/bugs/12891.zig index e558783705..354d9e856e 100644 --- a/test/behavior/bugs/12891.zig +++ b/test/behavior/bugs/12891.zig @@ -29,7 +29,6 @@ test "inf >= 1" { test "isNan(nan * 1)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const nan_times_one = comptime std.math.nan(f64) * 1; try std.testing.expect(std.math.isNan(nan_times_one)); @@ -37,7 +36,6 @@ test "isNan(nan * 1)" { test "runtime isNan(nan * 1)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const nan_times_one = std.math.nan(f64) * 1; try std.testing.expect(std.math.isNan(nan_times_one)); @@ -45,7 +43,6 @@ test "runtime isNan(nan * 1)" { test "isNan(nan * 0)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const nan_times_zero = comptime std.math.nan(f64) * 0; try std.testing.expect(std.math.isNan(nan_times_zero)); @@ -55,7 +52,6 @@ test "isNan(nan * 0)" { test "isNan(inf * 0)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const inf_times_zero = comptime std.math.inf(f64) * 0; try std.testing.expect(std.math.isNan(inf_times_zero)); @@ -65,7 +61,6 @@ test "isNan(inf * 0)" { test "runtime isNan(nan * 0)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const nan_times_zero = std.math.nan(f64) * 0; try std.testing.expect(std.math.isNan(nan_times_zero)); @@ -75,7 +70,6 @@ test "runtime isNan(nan * 0)" { test "runtime isNan(inf * 0)" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const inf_times_zero = std.math.inf(f64) * 0; try std.testing.expect(std.math.isNan(inf_times_zero)); diff --git a/test/behavior/bugs/2114.zig b/test/behavior/bugs/2114.zig index f92728eff6..3ad4a97b80 100644 --- a/test/behavior/bugs/2114.zig +++ b/test/behavior/bugs/2114.zig @@ -9,7 +9,8 @@ fn ctz(x: anytype) usize { test "fixed" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .bmi)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/comptime_memory.zig b/test/behavior/comptime_memory.zig index 5c3012d1dc..ea7a67651c 100644 --- a/test/behavior/comptime_memory.zig +++ b/test/behavior/comptime_memory.zig @@ -45,7 +45,7 @@ test "type pun signed and unsigned as offset many pointer" { } } -test "type pun signed and unsigned as array pointer" { +test "type pun signed and unsigned as array pointer with pointer arithemtic" { if (true) { // TODO https://github.com/ziglang/zig/issues/9646 return error.SkipZigTest; diff --git a/test/behavior/duplicated_test_names.zig b/test/behavior/duplicated_test_names.zig new file mode 100644 index 0000000000..d59945ce30 --- /dev/null +++ b/test/behavior/duplicated_test_names.zig @@ -0,0 +1,17 @@ +const Namespace = struct { + test "thingy" {} +}; + +fn thingy(a: usize, b: usize) usize { + return a + b; +} + +comptime { + _ = Namespace; +} + +test "thingy" {} + +test thingy { + if (thingy(1, 2) != 3) unreachable; +} diff --git a/test/behavior/error.zig b/test/behavior/error.zig index 0cd9be05ca..91b5561d62 100644 --- a/test/behavior/error.zig +++ b/test/behavior/error.zig @@ -757,7 +757,6 @@ test "error union of noreturn used with if" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO NoReturn.a = 64; @@ -772,7 +771,6 @@ test "error union of noreturn used with try" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO NoReturn.a = 64; @@ -784,7 +782,6 @@ test "error union of noreturn used with catch" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO NoReturn.a = 64; diff --git a/test/behavior/field_parent_ptr.zig b/test/behavior/field_parent_ptr.zig index bf99fd1795..c56bcad0d2 100644 --- a/test/behavior/field_parent_ptr.zig +++ b/test/behavior/field_parent_ptr.zig @@ -2,7 +2,6 @@ const expect = @import("std").testing.expect; const builtin = @import("builtin"); test "@fieldParentPtr non-first field" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO try testParentFieldPtr(&foo.c); diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig index b98d782da1..242c8dabe5 100644 --- a/test/behavior/floatop.zig +++ b/test/behavior/floatop.zig @@ -8,6 +8,8 @@ const has_f80_rt = switch (builtin.cpu.arch) { .x86_64, .x86 => true, else => false, }; +const no_x86_64_hardware_f16_support = builtin.zig_backend == .stage2_x86_64 and + !std.Target.x86.featureSetHas(builtin.cpu.features, .f16c); const epsilon_16 = 0.001; const epsilon = 0.000001; @@ -52,7 +54,7 @@ fn testFloatComparisons() !void { } test "different sized float comparisons" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -134,7 +136,6 @@ fn testSqrt() !void { test "@sqrt with vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO @@ -152,7 +153,7 @@ fn testSqrtWithVectors() !void { } test "more @sqrt f16 tests" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -202,7 +203,7 @@ fn testSqrtLegacy(comptime T: type, x: T) !void { } test "@sin" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -241,7 +242,7 @@ fn testSinWithVectors() !void { } test "@cos" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -280,7 +281,7 @@ fn testCosWithVectors() !void { } test "@exp" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -318,7 +319,7 @@ fn testExpWithVectors() !void { } test "@exp2" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -403,7 +404,7 @@ test "@log with @vectors" { } test "@log2" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -445,7 +446,7 @@ fn testLog2WithVectors() !void { } test "@log10" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -617,7 +618,8 @@ fn testFloor() !void { test "@floor with vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO @@ -707,7 +709,8 @@ fn testCeil() !void { test "@ceil with vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO @@ -797,7 +800,8 @@ fn testTrunc() !void { test "@trunc with vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO @@ -878,7 +882,7 @@ fn testTruncLegacy(comptime T: type, x: T) !void { } test "negation f16" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -1037,7 +1041,6 @@ test "comptime_float zero divided by zero produces zero" { } test "nan negation f16" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/math.zig b/test/behavior/math.zig index f9c9f43927..7e16111059 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -77,7 +77,8 @@ fn testClz() !void { } test "@clz big ints" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .lzcnt)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -398,7 +399,8 @@ fn testBinaryNot128(comptime Type: type, x: Type) !void { test "division" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/muladd.zig b/test/behavior/muladd.zig index aa36c99784..199f117e7b 100644 --- a/test/behavior/muladd.zig +++ b/test/behavior/muladd.zig @@ -1,8 +1,12 @@ +const std = @import("std"); const builtin = @import("builtin"); -const expect = @import("std").testing.expect; +const expect = std.testing.expect; + +const no_x86_64_hardware_fma_support = builtin.zig_backend == .stage2_x86_64 and + !std.Target.x86.featureSetHas(builtin.cpu.features, .fma); test "@mulAdd" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_fma_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -116,7 +120,7 @@ fn vector32() !void { test "vector f32" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_fma_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -139,7 +143,7 @@ fn vector64() !void { test "vector f64" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_fma_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 2c55af5f85..41b0bfc39b 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -168,7 +168,8 @@ test "array to vector" { test "array to vector with element type coercion" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .f16c)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -1130,20 +1131,6 @@ test "array of vectors is copied" { } test "byte vector initialized in inline function" { - const S = struct { - inline fn boolx4(e0: bool, e1: bool, e2: bool, e3: bool) @Vector(4, bool) { - return .{ e0, e1, e2, e3 }; - } - - fn all(vb: @Vector(4, bool)) bool { - return @reduce(.And, vb); - } - }; - - try expect(S.all(S.boolx4(true, true, true, true))); -} - -test "byte vector initialized in inline function" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO diff --git a/test/cases/compile_errors/invalid_duplicate_test_decl_name.zig b/test/cases/compile_errors/invalid_duplicate_test_decl_name.zig new file mode 100644 index 0000000000..e27bbe7c1a --- /dev/null +++ b/test/cases/compile_errors/invalid_duplicate_test_decl_name.zig @@ -0,0 +1,10 @@ +test "thingy" {} +test "thingy" {} + +// error +// backend=stage2 +// target=native +// is_test=1 +// +// :1:6: error: found test declaration with duplicate name: test.thingy +// :2:6: note: other test here diff --git a/test/cases/compile_errors/number_literal_bad_exponent.zig b/test/cases/compile_errors/number_literal_bad_exponent.zig new file mode 100644 index 0000000000..158c205018 --- /dev/null +++ b/test/cases/compile_errors/number_literal_bad_exponent.zig @@ -0,0 +1,13 @@ +const a = 0x1e-4; +const b = 0x1e+4; +const c = 0x1E-4; +const d = 0x1E+4; + +// error +// backend=stage2 +// target=native +// +// :1:15: error: sign '-' cannot follow digit 'e' in hex base +// :2:15: error: sign '+' cannot follow digit 'e' in hex base +// :3:15: error: sign '-' cannot follow digit 'E' in hex base +// :4:15: error: sign '+' cannot follow digit 'E' in hex base |
