diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Compilation.zig | 10 | ||||
| -rw-r--r-- | src/InternPool.zig | 11 | ||||
| -rw-r--r-- | src/Module.zig | 252 | ||||
| -rw-r--r-- | src/Package/Fetch.zig | 2 | ||||
| -rw-r--r-- | src/Sema.zig | 81 | ||||
| -rw-r--r-- | src/arch/wasm/CodeGen.zig | 2 | ||||
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 1958 | ||||
| -rw-r--r-- | src/arch/x86_64/Encoding.zig | 15 | ||||
| -rw-r--r-- | src/arch/x86_64/Lower.zig | 14 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 30 | ||||
| -rw-r--r-- | src/arch/x86_64/encodings.zig | 69 | ||||
| -rw-r--r-- | src/codegen.zig | 57 | ||||
| -rw-r--r-- | src/codegen/c.zig | 98 | ||||
| -rw-r--r-- | src/codegen/llvm.zig | 34 | ||||
| -rw-r--r-- | src/codegen/spirv.zig | 4 | ||||
| -rw-r--r-- | src/link/Coff.zig | 4 | ||||
| -rw-r--r-- | src/link/Dwarf.zig | 2 | ||||
| -rw-r--r-- | src/link/Elf/ZigObject.zig | 8 | ||||
| -rw-r--r-- | src/link/MachO.zig | 13 | ||||
| -rw-r--r-- | src/link/MachO/Atom.zig | 29 | ||||
| -rw-r--r-- | src/link/MachO/InternalObject.zig | 2 | ||||
| -rw-r--r-- | src/link/MachO/ZigObject.zig | 6 | ||||
| -rw-r--r-- | src/link/Plan9.zig | 2 | ||||
| -rw-r--r-- | src/link/Wasm.zig | 8 | ||||
| -rw-r--r-- | src/main.zig | 33 | ||||
| -rw-r--r-- | src/type.zig | 49 |
26 files changed, 2147 insertions, 646 deletions
diff --git a/src/Compilation.zig b/src/Compilation.zig index 91879094dd..5fa93fa677 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -217,7 +217,7 @@ libcxx_abi_version: libcxx.AbiVersion = libcxx.AbiVersion.default, /// This mutex guards all `Compilation` mutable state. mutex: std.Thread.Mutex = .{}, -test_filter: ?[]const u8, +test_filters: []const []const u8, test_name_prefix: ?[]const u8, emit_asm: ?EmitLoc, @@ -1097,7 +1097,7 @@ pub const CreateOptions = struct { native_system_include_paths: []const []const u8 = &.{}, clang_preprocessor_mode: ClangPreprocessorMode = .no, reference_trace: ?u32 = null, - test_filter: ?[]const u8 = null, + test_filters: []const []const u8 = &.{}, test_name_prefix: ?[]const u8 = null, test_runner_path: ?[]const u8 = null, subsystem: ?std.Target.SubSystem = null, @@ -1506,7 +1506,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil .formatted_panics = formatted_panics, .time_report = options.time_report, .stack_report = options.stack_report, - .test_filter = options.test_filter, + .test_filters = options.test_filters, .test_name_prefix = options.test_name_prefix, .debug_compiler_runtime_libs = options.debug_compiler_runtime_libs, .debug_compile_errors = options.debug_compile_errors, @@ -1613,7 +1613,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil hash.add(options.config.use_lib_llvm); hash.add(options.config.dll_export_fns); hash.add(options.config.is_test); - hash.addOptionalBytes(options.test_filter); + hash.addListOfBytes(options.test_filters); hash.addOptionalBytes(options.test_name_prefix); hash.add(options.skip_linker_dependencies); hash.add(formatted_panics); @@ -2475,7 +2475,7 @@ fn addNonIncrementalStuffToCacheManifest( try addModuleTableToCacheHash(gpa, arena, &man.hash, mod.root_mod, mod.main_mod, .{ .files = man }); // Synchronize with other matching comments: ZigOnlyHashStuff - man.hash.addOptionalBytes(comp.test_filter); + man.hash.addListOfBytes(comp.test_filters); man.hash.addOptionalBytes(comp.test_name_prefix); man.hash.add(comp.skip_linker_dependencies); man.hash.add(comp.formatted_panics); diff --git a/src/InternPool.zig b/src/InternPool.zig index 19be12c129..46676097bf 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -3587,6 +3587,7 @@ pub const Alignment = enum(u6) { @"8" = 3, @"16" = 4, @"32" = 5, + @"64" = 6, none = std.math.maxInt(u6), _, @@ -7403,10 +7404,14 @@ pub fn isIntegerType(ip: *const InternPool, ty: Index) bool { .c_ulong_type, .c_longlong_type, .c_ulonglong_type, - .c_longdouble_type, .comptime_int_type, => true, - else => ip.indexToKey(ty) == .int_type, + else => switch (ip.items.items(.tag)[@intFromEnum(ty)]) { + .type_int_signed, + .type_int_unsigned, + => true, + else => false, + }, }; } @@ -7904,7 +7909,7 @@ pub fn destroyNamespace(ip: *InternPool, gpa: Allocator, index: NamespaceIndex) ip.namespacePtr(index).* = .{ .parent = undefined, .file_scope = undefined, - .ty = undefined, + .decl_index = undefined, }; ip.namespaces_free_list.append(gpa, index) catch { // In order to keep `destroyNamespace` a non-fallible function, we ignore memory diff --git a/src/Module.zig b/src/Module.zig index c27b8ea4be..a4cedd9077 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -411,15 +411,15 @@ pub const Decl = struct { /// This state detects dependency loops. in_progress, /// The file corresponding to this Decl had a parse error or ZIR error. - /// There will be a corresponding ErrorMsg in Module.failed_files. + /// There will be a corresponding ErrorMsg in Zcu.failed_files. file_failure, /// This Decl might be OK but it depends on another one which did not /// successfully complete semantic analysis. dependency_failure, /// Semantic analysis failure. - /// There will be a corresponding ErrorMsg in Module.failed_decls. + /// There will be a corresponding ErrorMsg in Zcu.failed_decls. sema_failure, - /// There will be a corresponding ErrorMsg in Module.failed_decls. + /// There will be a corresponding ErrorMsg in Zcu.failed_decls. codegen_failure, /// Sematic analysis and constant value codegen of this Decl has /// succeeded. However, the Decl may be outdated due to an in-progress @@ -494,77 +494,45 @@ pub const Decl = struct { return LazySrcLoc.nodeOffset(decl.nodeIndexToRelative(node_index)); } - pub fn srcLoc(decl: Decl, mod: *Module) SrcLoc { - return decl.nodeOffsetSrcLoc(0, mod); + pub fn srcLoc(decl: Decl, zcu: *Zcu) SrcLoc { + return decl.nodeOffsetSrcLoc(0, zcu); } - pub fn nodeOffsetSrcLoc(decl: Decl, node_offset: i32, mod: *Module) SrcLoc { + pub fn nodeOffsetSrcLoc(decl: Decl, node_offset: i32, zcu: *Zcu) SrcLoc { return .{ - .file_scope = decl.getFileScope(mod), + .file_scope = decl.getFileScope(zcu), .parent_decl_node = decl.src_node, .lazy = LazySrcLoc.nodeOffset(node_offset), }; } - pub fn srcToken(decl: Decl, mod: *Module) Ast.TokenIndex { - const tree = &decl.getFileScope(mod).tree; + pub fn srcToken(decl: Decl, zcu: *Zcu) Ast.TokenIndex { + const tree = &decl.getFileScope(zcu).tree; return tree.firstToken(decl.src_node); } - pub fn srcByteOffset(decl: Decl, mod: *Module) u32 { - const tree = &decl.getFileScope(mod).tree; + pub fn srcByteOffset(decl: Decl, zcu: *Zcu) u32 { + const tree = &decl.getFileScope(zcu).tree; return tree.tokens.items(.start)[decl.srcToken()]; } - pub fn renderFullyQualifiedName(decl: Decl, mod: *Module, writer: anytype) !void { + pub fn renderFullyQualifiedName(decl: Decl, zcu: *Zcu, writer: anytype) !void { if (decl.name_fully_qualified) { - try writer.print("{}", .{decl.name.fmt(&mod.intern_pool)}); + try writer.print("{}", .{decl.name.fmt(&zcu.intern_pool)}); } else { - try mod.namespacePtr(decl.src_namespace).renderFullyQualifiedName(mod, decl.name, writer); + try zcu.namespacePtr(decl.src_namespace).renderFullyQualifiedName(zcu, decl.name, writer); } } - pub fn renderFullyQualifiedDebugName(decl: Decl, mod: *Module, writer: anytype) !void { - return mod.namespacePtr(decl.src_namespace).renderFullyQualifiedDebugName(mod, decl.name, writer); + pub fn renderFullyQualifiedDebugName(decl: Decl, zcu: *Zcu, writer: anytype) !void { + return zcu.namespacePtr(decl.src_namespace).renderFullyQualifiedDebugName(zcu, decl.name, writer); } - pub fn getFullyQualifiedName(decl: Decl, mod: *Module) !InternPool.NullTerminatedString { - if (decl.name_fully_qualified) return decl.name; - - const ip = &mod.intern_pool; - const count = count: { - var count: usize = ip.stringToSlice(decl.name).len + 1; - var ns: Namespace.Index = decl.src_namespace; - while (true) { - const namespace = mod.namespacePtr(ns); - const ns_decl = mod.declPtr(namespace.getDeclIndex(mod)); - count += ip.stringToSlice(ns_decl.name).len + 1; - ns = namespace.parent.unwrap() orelse { - count += namespace.file_scope.sub_file_path.len; - break :count count; - }; - } - }; - - const gpa = mod.gpa; - const start = ip.string_bytes.items.len; - // Protects reads of interned strings from being reallocated during the call to - // renderFullyQualifiedName. - try ip.string_bytes.ensureUnusedCapacity(gpa, count); - decl.renderFullyQualifiedName(mod, ip.string_bytes.writer(gpa)) catch unreachable; - - // Sanitize the name for nvptx which is more restrictive. - // TODO This should be handled by the backend, not the frontend. Have a - // look at how the C backend does it for inspiration. - const cpu_arch = mod.root_mod.resolved_target.result.cpu.arch; - if (cpu_arch.isNvptx()) { - for (ip.string_bytes.items[start..]) |*byte| switch (byte.*) { - '{', '}', '*', '[', ']', '(', ')', ',', ' ', '\'' => byte.* = '_', - else => {}, - }; - } - - return ip.getOrPutTrailingString(gpa, ip.string_bytes.items.len - start); + pub fn fullyQualifiedName(decl: Decl, zcu: *Zcu) !InternPool.NullTerminatedString { + return if (decl.name_fully_qualified) + decl.name + else + zcu.namespacePtr(decl.src_namespace).fullyQualifiedName(zcu, decl.name); } pub fn typedValue(decl: Decl) error{AnalysisFail}!TypedValue { @@ -572,38 +540,38 @@ pub const Decl = struct { return TypedValue{ .ty = decl.ty, .val = decl.val }; } - pub fn internValue(decl: *Decl, mod: *Module) Allocator.Error!InternPool.Index { + pub fn internValue(decl: *Decl, zcu: *Zcu) Allocator.Error!InternPool.Index { assert(decl.has_tv); - const ip_index = try decl.val.intern(decl.ty, mod); + const ip_index = try decl.val.intern(decl.ty, zcu); decl.val = Value.fromInterned(ip_index); return ip_index; } - pub fn isFunction(decl: Decl, mod: *const Module) !bool { + pub fn isFunction(decl: Decl, zcu: *const Zcu) !bool { const tv = try decl.typedValue(); - return tv.ty.zigTypeTag(mod) == .Fn; + return tv.ty.zigTypeTag(zcu) == .Fn; } /// If the Decl owns its value and it is a struct, return it, /// otherwise null. - pub fn getOwnedStruct(decl: Decl, mod: *Module) ?InternPool.Key.StructType { + pub fn getOwnedStruct(decl: Decl, zcu: *Zcu) ?InternPool.Key.StructType { if (!decl.owns_tv) return null; if (decl.val.ip_index == .none) return null; - return mod.typeToStruct(decl.val.toType()); + return zcu.typeToStruct(decl.val.toType()); } /// If the Decl owns its value and it is a union, return it, /// otherwise null. - pub fn getOwnedUnion(decl: Decl, mod: *Module) ?InternPool.UnionType { + pub fn getOwnedUnion(decl: Decl, zcu: *Zcu) ?InternPool.UnionType { if (!decl.owns_tv) return null; if (decl.val.ip_index == .none) return null; - return mod.typeToUnion(decl.val.toType()); + return zcu.typeToUnion(decl.val.toType()); } - pub fn getOwnedFunction(decl: Decl, mod: *Module) ?InternPool.Key.Func { + pub fn getOwnedFunction(decl: Decl, zcu: *Zcu) ?InternPool.Key.Func { const i = decl.getOwnedFunctionIndex(); if (i == .none) return null; - return switch (mod.intern_pool.indexToKey(i)) { + return switch (zcu.intern_pool.indexToKey(i)) { .func => |func| func, else => null, }; @@ -616,24 +584,24 @@ pub const Decl = struct { /// If the Decl owns its value and it is an extern function, returns it, /// otherwise null. - pub fn getOwnedExternFunc(decl: Decl, mod: *Module) ?InternPool.Key.ExternFunc { - return if (decl.owns_tv) decl.val.getExternFunc(mod) else null; + pub fn getOwnedExternFunc(decl: Decl, zcu: *Zcu) ?InternPool.Key.ExternFunc { + return if (decl.owns_tv) decl.val.getExternFunc(zcu) else null; } /// If the Decl owns its value and it is a variable, returns it, /// otherwise null. - pub fn getOwnedVariable(decl: Decl, mod: *Module) ?InternPool.Key.Variable { - return if (decl.owns_tv) decl.val.getVariable(mod) else null; + pub fn getOwnedVariable(decl: Decl, zcu: *Zcu) ?InternPool.Key.Variable { + return if (decl.owns_tv) decl.val.getVariable(zcu) else null; } /// Gets the namespace that this Decl creates by being a struct, union, /// enum, or opaque. - pub fn getInnerNamespaceIndex(decl: Decl, mod: *Module) Namespace.OptionalIndex { + pub fn getInnerNamespaceIndex(decl: Decl, zcu: *Zcu) Namespace.OptionalIndex { if (!decl.has_tv) return .none; return switch (decl.val.ip_index) { .empty_struct_type => .none, .none => .none, - else => switch (mod.intern_pool.indexToKey(decl.val.toIntern())) { + else => switch (zcu.intern_pool.indexToKey(decl.val.toIntern())) { .opaque_type => |opaque_type| opaque_type.namespace.toOptional(), .struct_type => |struct_type| struct_type.namespace, .union_type => |union_type| union_type.namespace.toOptional(), @@ -644,19 +612,19 @@ pub const Decl = struct { } /// Like `getInnerNamespaceIndex`, but only returns it if the Decl is the owner. - pub fn getOwnedInnerNamespaceIndex(decl: Decl, mod: *Module) Namespace.OptionalIndex { + pub fn getOwnedInnerNamespaceIndex(decl: Decl, zcu: *Zcu) Namespace.OptionalIndex { if (!decl.owns_tv) return .none; - return decl.getInnerNamespaceIndex(mod); + return decl.getInnerNamespaceIndex(zcu); } /// Same as `getOwnedInnerNamespaceIndex` but additionally obtains the pointer. - pub fn getOwnedInnerNamespace(decl: Decl, mod: *Module) ?*Namespace { - return mod.namespacePtrUnwrap(decl.getOwnedInnerNamespaceIndex(mod)); + pub fn getOwnedInnerNamespace(decl: Decl, zcu: *Zcu) ?*Namespace { + return zcu.namespacePtrUnwrap(decl.getOwnedInnerNamespaceIndex(zcu)); } /// Same as `getInnerNamespaceIndex` but additionally obtains the pointer. - pub fn getInnerNamespace(decl: Decl, mod: *Module) ?*Namespace { - return mod.namespacePtrUnwrap(decl.getInnerNamespaceIndex(mod)); + pub fn getInnerNamespace(decl: Decl, zcu: *Zcu) ?*Namespace { + return zcu.namespacePtrUnwrap(decl.getInnerNamespaceIndex(zcu)); } pub fn dump(decl: *Decl) void { @@ -674,27 +642,27 @@ pub const Decl = struct { std.debug.print("\n", .{}); } - pub fn getFileScope(decl: Decl, mod: *Module) *File { - return mod.namespacePtr(decl.src_namespace).file_scope; + pub fn getFileScope(decl: Decl, zcu: *Zcu) *File { + return zcu.namespacePtr(decl.src_namespace).file_scope; } - pub fn getExternDecl(decl: Decl, mod: *Module) OptionalIndex { + pub fn getExternDecl(decl: Decl, zcu: *Zcu) OptionalIndex { assert(decl.has_tv); - return switch (mod.intern_pool.indexToKey(decl.val.toIntern())) { + return switch (zcu.intern_pool.indexToKey(decl.val.toIntern())) { .variable => |variable| if (variable.is_extern) variable.decl.toOptional() else .none, .extern_func => |extern_func| extern_func.decl.toOptional(), else => .none, }; } - pub fn isExtern(decl: Decl, mod: *Module) bool { - return decl.getExternDecl(mod) != .none; + pub fn isExtern(decl: Decl, zcu: *Zcu) bool { + return decl.getExternDecl(zcu) != .none; } - pub fn getAlignment(decl: Decl, mod: *Module) Alignment { + pub fn getAlignment(decl: Decl, zcu: *Zcu) Alignment { assert(decl.has_tv); if (decl.alignment != .none) return decl.alignment; - return decl.ty.abiAlignment(mod); + return decl.ty.abiAlignment(zcu); } }; @@ -704,7 +672,7 @@ pub const EmitH = struct { }; pub const DeclAdapter = struct { - mod: *Module, + zcu: *Zcu, pub fn hash(self: @This(), s: InternPool.NullTerminatedString) u32 { _ = self; @@ -713,8 +681,7 @@ pub const DeclAdapter = struct { pub fn eql(self: @This(), a: InternPool.NullTerminatedString, b_decl_index: Decl.Index, b_index: usize) bool { _ = b_index; - const b_decl = self.mod.declPtr(b_decl_index); - return a == b_decl.name; + return a == self.zcu.declPtr(b_decl_index).name; } }; @@ -723,7 +690,7 @@ pub const Namespace = struct { parent: OptionalIndex, file_scope: *File, /// Will be a struct, enum, union, or opaque. - ty: Type, + decl_index: Decl.Index, /// Direct children of the namespace. /// Declaration order is preserved via entry order. /// These are only declarations named directly by the AST; anonymous @@ -739,7 +706,7 @@ pub const Namespace = struct { const OptionalIndex = InternPool.OptionalNamespaceIndex; const DeclContext = struct { - module: *Module, + zcu: *Zcu, pub fn hash(ctx: @This(), decl_index: Decl.Index) u32 { const decl = ctx.module.declPtr(decl_index); @@ -757,39 +724,87 @@ pub const Namespace = struct { // This renders e.g. "std.fs.Dir.OpenOptions" pub fn renderFullyQualifiedName( ns: Namespace, - mod: *Module, + zcu: *Zcu, name: InternPool.NullTerminatedString, writer: anytype, ) @TypeOf(writer).Error!void { if (ns.parent.unwrap()) |parent| { - const decl = mod.declPtr(ns.getDeclIndex(mod)); - try mod.namespacePtr(parent).renderFullyQualifiedName(mod, decl.name, writer); + try zcu.namespacePtr(parent).renderFullyQualifiedName( + zcu, + zcu.declPtr(ns.decl_index).name, + writer, + ); } else { try ns.file_scope.renderFullyQualifiedName(writer); } - if (name != .empty) try writer.print(".{}", .{name.fmt(&mod.intern_pool)}); + if (name != .empty) try writer.print(".{}", .{name.fmt(&zcu.intern_pool)}); } /// This renders e.g. "std/fs.zig:Dir.OpenOptions" pub fn renderFullyQualifiedDebugName( ns: Namespace, - mod: *Module, + zcu: *Zcu, name: InternPool.NullTerminatedString, writer: anytype, ) @TypeOf(writer).Error!void { - const separator_char: u8 = if (ns.parent.unwrap()) |parent| sep: { - const decl = mod.declPtr(ns.getDeclIndex(mod)); - try mod.namespacePtr(parent).renderFullyQualifiedDebugName(mod, decl.name, writer); + const sep: u8 = if (ns.parent.unwrap()) |parent| sep: { + try zcu.namespacePtr(parent).renderFullyQualifiedDebugName( + zcu, + zcu.declPtr(ns.decl_index).name, + writer, + ); break :sep '.'; } else sep: { try ns.file_scope.renderFullyQualifiedDebugName(writer); break :sep ':'; }; - if (name != .empty) try writer.print("{c}{}", .{ separator_char, name.fmt(&mod.intern_pool) }); + if (name != .empty) try writer.print("{c}{}", .{ sep, name.fmt(&zcu.intern_pool) }); } - pub fn getDeclIndex(ns: Namespace, mod: *Module) Decl.Index { - return ns.ty.getOwnerDecl(mod); + pub fn fullyQualifiedName( + ns: Namespace, + zcu: *Zcu, + name: InternPool.NullTerminatedString, + ) !InternPool.NullTerminatedString { + const ip = &zcu.intern_pool; + const count = count: { + var count: usize = ip.stringToSlice(name).len + 1; + var cur_ns = &ns; + while (true) { + const decl = zcu.declPtr(cur_ns.decl_index); + count += ip.stringToSlice(decl.name).len + 1; + cur_ns = zcu.namespacePtr(cur_ns.parent.unwrap() orelse { + count += ns.file_scope.sub_file_path.len; + break :count count; + }); + } + }; + + const gpa = zcu.gpa; + const start = ip.string_bytes.items.len; + // Protects reads of interned strings from being reallocated during the call to + // renderFullyQualifiedName. + try ip.string_bytes.ensureUnusedCapacity(gpa, count); + ns.renderFullyQualifiedName(zcu, name, ip.string_bytes.writer(gpa)) catch unreachable; + + // Sanitize the name for nvptx which is more restrictive. + // TODO This should be handled by the backend, not the frontend. Have a + // look at how the C backend does it for inspiration. + const cpu_arch = zcu.root_mod.resolved_target.result.cpu.arch; + if (cpu_arch.isNvptx()) { + for (ip.string_bytes.items[start..]) |*byte| switch (byte.*) { + '{', '}', '*', '[', ']', '(', ')', ',', ' ', '\'' => byte.* = '_', + else => {}, + }; + } + + return ip.getOrPutTrailingString(gpa, ip.string_bytes.items.len - start); + } + + pub fn getType(ns: Namespace, zcu: *Zcu) Type { + const decl = zcu.declPtr(ns.decl_index); + assert(decl.has_tv); + return decl.val.toType(); } }; @@ -2559,9 +2574,8 @@ pub fn namespacePtrUnwrap(mod: *Module, index: Namespace.OptionalIndex) ?*Namesp pub fn declIsRoot(mod: *Module, decl_index: Decl.Index) bool { const decl = mod.declPtr(decl_index); const namespace = mod.namespacePtr(decl.src_namespace); - if (namespace.parent != .none) - return false; - return decl_index == namespace.getDeclIndex(mod); + if (namespace.parent != .none) return false; + return decl_index == namespace.decl_index; } fn freeExportList(gpa: Allocator, export_list: *ArrayListUnmanaged(*Export)) void { @@ -3592,7 +3606,7 @@ pub fn ensureFuncBodyAnalyzed(zcu: *Zcu, func_index: InternPool.Index) SemaError defer liveness.deinit(gpa); if (dump_air) { - const fqn = try decl.getFullyQualifiedName(zcu); + const fqn = try decl.fullyQualifiedName(zcu); std.debug.print("# Begin Function AIR: {}:\n", .{fqn.fmt(ip)}); @import("print_air.zig").dump(zcu, air, liveness); std.debug.print("# End Function AIR: {}\n\n", .{fqn.fmt(ip)}); @@ -3738,7 +3752,7 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void { // InternPool index. const new_namespace_index = try mod.createNamespace(.{ .parent = .none, - .ty = undefined, + .decl_index = undefined, .file_scope = file, }); const new_namespace = mod.namespacePtr(new_namespace_index); @@ -3749,6 +3763,7 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void { errdefer @panic("TODO error handling"); file.root_decl = new_decl_index.toOptional(); + new_namespace.decl_index = new_decl_index; new_decl.name = try file.fullyQualifiedName(mod); new_decl.name_fully_qualified = true; @@ -3808,7 +3823,6 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void { _ = try decl.internValue(mod); } - new_namespace.ty = Type.fromInterned(struct_ty); new_decl.val = Value.fromInterned(struct_ty); new_decl.has_tv = true; new_decl.owns_tv = true; @@ -3881,7 +3895,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !SemaDeclResult { const std_decl = mod.declPtr(std_file.root_decl.unwrap().?); const std_namespace = std_decl.getInnerNamespace(mod).?; const builtin_str = try ip.getOrPutString(gpa, "builtin"); - const builtin_decl = mod.declPtr(std_namespace.decls.getKeyAdapted(builtin_str, DeclAdapter{ .mod = mod }) orelse break :blk .none); + const builtin_decl = mod.declPtr(std_namespace.decls.getKeyAdapted(builtin_str, DeclAdapter{ .zcu = mod }) orelse break :blk .none); const builtin_namespace = builtin_decl.getInnerNamespaceIndex(mod).unwrap() orelse break :blk .none; if (decl.src_namespace != builtin_namespace) break :blk .none; // We're in builtin.zig. This could be a builtin we need to add to a specific InternPool index. @@ -4576,8 +4590,8 @@ fn scanDecl(iter: *ScanDeclIter, decl_inst: Zir.Inst.Index) Allocator.Error!void const gop = try namespace.decls.getOrPutContextAdapted( gpa, decl_name, - DeclAdapter{ .mod = zcu }, - Namespace.DeclContext{ .module = zcu }, + DeclAdapter{ .zcu = zcu }, + Namespace.DeclContext{ .zcu = zcu }, ); const comp = zcu.comp; if (!gop.found_existing) { @@ -4600,12 +4614,11 @@ fn scanDecl(iter: *ScanDeclIter, decl_inst: Zir.Inst.Index) Allocator.Error!void .@"test" => a: { if (!comp.config.is_test) break :a false; if (decl_mod != zcu.main_mod) break :a false; - if (is_named_test) { - if (comp.test_filter) |test_filter| { - if (mem.indexOf(u8, ip.stringToSlice(decl_name), test_filter) == null) { - break :a false; - } - } + if (is_named_test and comp.test_filters.len > 0) { + const decl_fqn = ip.stringToSlice(try namespace.fullyQualifiedName(zcu, decl_name)); + for (comp.test_filters) |test_filter| { + if (mem.indexOf(u8, decl_fqn, test_filter)) |_| break; + } else break :a false; } try zcu.test_functions.put(gpa, new_decl_index, {}); break :a true; @@ -5622,7 +5635,7 @@ pub fn populateTestFunctions( const test_functions_str = try ip.getOrPutString(gpa, "test_functions"); const decl_index = builtin_namespace.decls.getKeyAdapted( test_functions_str, - DeclAdapter{ .mod = mod }, + DeclAdapter{ .zcu = mod }, ).?; { // We have to call `ensureDeclAnalyzed` here in case `builtin.test_functions` @@ -5646,8 +5659,7 @@ pub fn populateTestFunctions( for (test_fn_vals, mod.test_functions.keys()) |*test_fn_val, test_decl_index| { const test_decl = mod.declPtr(test_decl_index); - // TODO: write something like getCoercedInts to avoid needing to dupe - const test_decl_name = try gpa.dupe(u8, ip.stringToSlice(test_decl.name)); + const test_decl_name = try gpa.dupe(u8, ip.stringToSlice(try test_decl.fullyQualifiedName(mod))); defer gpa.free(test_decl_name); const test_name_decl_index = n: { const test_name_decl_ty = try mod.arrayType(.{ @@ -6359,17 +6371,13 @@ pub fn opaqueSrcLoc(mod: *Module, opaque_type: InternPool.Key.OpaqueType) SrcLoc } pub fn opaqueFullyQualifiedName(mod: *Module, opaque_type: InternPool.Key.OpaqueType) !InternPool.NullTerminatedString { - return mod.declPtr(opaque_type.decl).getFullyQualifiedName(mod); + return mod.declPtr(opaque_type.decl).fullyQualifiedName(mod); } pub fn declFileScope(mod: *Module, decl_index: Decl.Index) *File { return mod.declPtr(decl_index).getFileScope(mod); } -pub fn namespaceDeclIndex(mod: *Module, namespace_index: Namespace.Index) Decl.Index { - return mod.namespacePtr(namespace_index).getDeclIndex(mod); -} - /// Returns null in the following cases: /// * `@TypeOf(.{})` /// * A struct which has no fields (`struct {}`). diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 8fbaf79ea5..e4e944d186 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -402,7 +402,7 @@ pub fn run(f: *Fetch) RunError!void { return error.FetchFailed; }, } - } else { + } else if (f.job_queue.read_only) { try eb.addRootErrorMessage(.{ .msg = try eb.addString("dependency is missing hash field"), .src_loc = try f.srcLoc(f.location_tok), diff --git a/src/Sema.zig b/src/Sema.zig index 972faff75f..741c4e2fba 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -2801,10 +2801,9 @@ fn zirStructDecl( const new_namespace_index = try mod.createNamespace(.{ .parent = block.namespace.toOptional(), - .ty = undefined, + .decl_index = new_decl_index, .file_scope = block.getFileScope(mod), }); - const new_namespace = mod.namespacePtr(new_namespace_index); errdefer mod.destroyNamespace(new_namespace_index); const struct_ty = ty: { @@ -2821,7 +2820,6 @@ fn zirStructDecl( new_decl.ty = Type.type; new_decl.val = Value.fromInterned(struct_ty); - new_namespace.ty = Type.fromInterned(struct_ty); const decl_val = sema.analyzeDeclVal(block, src, new_decl_index); try mod.finalizeAnonDecl(new_decl_index); @@ -2990,10 +2988,9 @@ fn zirEnumDecl( const new_namespace_index = try mod.createNamespace(.{ .parent = block.namespace.toOptional(), - .ty = undefined, + .decl_index = new_decl_index, .file_scope = block.getFileScope(mod), }); - const new_namespace = mod.namespacePtr(new_namespace_index); errdefer if (!done) mod.destroyNamespace(new_namespace_index); const decls = sema.code.bodySlice(extra_index, decls_len); @@ -3036,7 +3033,6 @@ fn zirEnumDecl( new_decl.ty = Type.type; new_decl.val = Value.fromInterned(incomplete_enum.index); - new_namespace.ty = Type.fromInterned(incomplete_enum.index); const decl_val = try sema.analyzeDeclVal(block, src, new_decl_index); try mod.finalizeAnonDecl(new_decl_index); @@ -3248,10 +3244,9 @@ fn zirUnionDecl( const new_namespace_index = try mod.createNamespace(.{ .parent = block.namespace.toOptional(), - .ty = undefined, + .decl_index = new_decl_index, .file_scope = block.getFileScope(mod), }); - const new_namespace = mod.namespacePtr(new_namespace_index); errdefer mod.destroyNamespace(new_namespace_index); const union_ty = ty: { @@ -3292,7 +3287,6 @@ fn zirUnionDecl( new_decl.ty = Type.type; new_decl.val = Value.fromInterned(union_ty); - new_namespace.ty = Type.fromInterned(union_ty); const decls = sema.code.bodySlice(extra_index, decls_len); try mod.scanNamespace(new_namespace_index, decls, new_decl); @@ -3346,10 +3340,9 @@ fn zirOpaqueDecl( const new_namespace_index = try mod.createNamespace(.{ .parent = block.namespace.toOptional(), - .ty = undefined, + .decl_index = new_decl_index, .file_scope = block.getFileScope(mod), }); - const new_namespace = mod.namespacePtr(new_namespace_index); errdefer mod.destroyNamespace(new_namespace_index); const opaque_ty = try mod.intern(.{ .opaque_type = .{ @@ -3362,7 +3355,6 @@ fn zirOpaqueDecl( new_decl.ty = Type.type; new_decl.val = Value.fromInterned(opaque_ty); - new_namespace.ty = Type.fromInterned(opaque_ty); const decls = sema.code.bodySlice(extra_index, decls_len); try mod.scanNamespace(new_namespace_index, decls, new_decl); @@ -4834,7 +4826,7 @@ fn validateStructInit( if (root_msg) |msg| { if (mod.typeToStruct(struct_ty)) |struct_type| { const decl = mod.declPtr(struct_type.decl.unwrap().?); - const fqn = try decl.getFullyQualifiedName(mod); + const fqn = try decl.fullyQualifiedName(mod); try mod.errNoteNonLazy( decl.srcLoc(mod), msg, @@ -4961,7 +4953,7 @@ fn validateStructInit( if (root_msg) |msg| { if (mod.typeToStruct(struct_ty)) |struct_type| { const decl = mod.declPtr(struct_type.decl.unwrap().?); - const fqn = try decl.getFullyQualifiedName(mod); + const fqn = try decl.fullyQualifiedName(mod); try mod.errNoteNonLazy( decl.srcLoc(mod), msg, @@ -5355,7 +5347,7 @@ fn failWithBadStructFieldAccess( const mod = sema.mod; const gpa = sema.gpa; const decl = mod.declPtr(struct_type.decl.unwrap().?); - const fqn = try decl.getFullyQualifiedName(mod); + const fqn = try decl.fullyQualifiedName(mod); const msg = msg: { const msg = try sema.errMsg( @@ -5382,7 +5374,7 @@ fn failWithBadUnionFieldAccess( const gpa = sema.gpa; const decl = mod.declPtr(union_obj.decl); - const fqn = try decl.getFullyQualifiedName(mod); + const fqn = try decl.fullyQualifiedName(mod); const msg = msg: { const msg = try sema.errMsg( @@ -6504,8 +6496,7 @@ fn lookupInNamespace( const mod = sema.mod; const namespace = mod.namespacePtr(namespace_index); - const namespace_decl_index = namespace.getDeclIndex(mod); - const namespace_decl = mod.declPtr(namespace_decl_index); + const namespace_decl = mod.declPtr(namespace.decl_index); if (namespace_decl.analysis == .file_failure) { return error.AnalysisFail; } @@ -6526,7 +6517,7 @@ fn lookupInNamespace( while (check_i < checked_namespaces.count()) : (check_i += 1) { const check_ns = checked_namespaces.keys()[check_i]; - if (check_ns.decls.getKeyAdapted(ident_name, Module.DeclAdapter{ .mod = mod })) |decl_index| { + if (check_ns.decls.getKeyAdapted(ident_name, Module.DeclAdapter{ .zcu = mod })) |decl_index| { // Skip decls which are not marked pub, which are in a different // file than the `a.b`/`@hasDecl` syntax. const decl = mod.declPtr(decl_index); @@ -6584,7 +6575,7 @@ fn lookupInNamespace( return sema.failWithOwnedErrorMsg(block, msg); }, } - } else if (namespace.decls.getKeyAdapted(ident_name, Module.DeclAdapter{ .mod = mod })) |decl_index| { + } else if (namespace.decls.getKeyAdapted(ident_name, Module.DeclAdapter{ .zcu = mod })) |decl_index| { return decl_index; } @@ -17210,7 +17201,7 @@ fn zirThis( extended: Zir.Inst.Extended.InstData, ) CompileError!Air.Inst.Ref { const mod = sema.mod; - const this_decl_index = mod.namespaceDeclIndex(block.namespace); + const this_decl_index = mod.namespacePtr(block.namespace).decl_index; const src = LazySrcLoc.nodeOffset(@bitCast(extended.operand)); return sema.analyzeDeclVal(block, src, this_decl_index); } @@ -20075,7 +20066,7 @@ fn finishStructInit( if (root_msg) |msg| { if (mod.typeToStruct(struct_ty)) |struct_type| { const decl = mod.declPtr(struct_type.decl.unwrap().?); - const fqn = try decl.getFullyQualifiedName(mod); + const fqn = try decl.fullyQualifiedName(mod); try mod.errNoteNonLazy( decl.srcLoc(mod), msg, @@ -21404,10 +21395,9 @@ fn zirReify( const new_namespace_index = try mod.createNamespace(.{ .parent = block.namespace.toOptional(), - .ty = undefined, + .decl_index = new_decl_index, .file_scope = block.getFileScope(mod), }); - const new_namespace = mod.namespacePtr(new_namespace_index); errdefer mod.destroyNamespace(new_namespace_index); const opaque_ty = try mod.intern(.{ .opaque_type = .{ @@ -21420,7 +21410,6 @@ fn zirReify( new_decl.ty = Type.type; new_decl.val = Value.fromInterned(opaque_ty); - new_namespace.ty = Type.fromInterned(opaque_ty); const decl_val = sema.analyzeDeclVal(block, src, new_decl_index); try mod.finalizeAnonDecl(new_decl_index); @@ -21614,10 +21603,9 @@ fn zirReify( const new_namespace_index = try mod.createNamespace(.{ .parent = block.namespace.toOptional(), - .ty = undefined, + .decl_index = new_decl_index, .file_scope = block.getFileScope(mod), }); - const new_namespace = mod.namespacePtr(new_namespace_index); errdefer mod.destroyNamespace(new_namespace_index); const union_ty = try ip.getUnionType(gpa, .{ @@ -21649,7 +21637,6 @@ fn zirReify( new_decl.ty = Type.type; new_decl.val = Value.fromInterned(union_ty); - new_namespace.ty = Type.fromInterned(union_ty); const decl_val = sema.analyzeDeclVal(block, src, new_decl_index); try mod.finalizeAnonDecl(new_decl_index); @@ -23328,7 +23315,8 @@ fn checkVectorElemType( const mod = sema.mod; switch (ty.zigTypeTag(mod)) { .Int, .Float, .Bool => return, - else => if (ty.isPtrAtRuntime(mod)) return, + .Optional, .Pointer => if (ty.isPtrAtRuntime(mod)) return, + else => {}, } return sema.fail(block, ty_src, "expected integer, float, bool, or pointer for the vector element type; found '{}'", .{ty.fmt(mod)}); } @@ -28455,7 +28443,7 @@ const CoerceOpts = struct { report_err: bool = true, /// Ignored if `report_err == false`. is_ret: bool = false, - /// Should coercion to comptime_int ermit an error message. + /// Should coercion to comptime_int emit an error message. no_cast_to_comptime_int: bool = false, param_src: struct { @@ -31858,6 +31846,34 @@ fn coerceArrayLike( } const dest_elem_ty = dest_ty.childType(mod); + if (dest_ty.isVector(mod) and inst_ty.isVector(mod) and (try sema.resolveValue(inst)) == null) { + const inst_elem_ty = inst_ty.childType(mod); + switch (dest_elem_ty.zigTypeTag(mod)) { + .Int => if (inst_elem_ty.isInt(mod)) { + // integer widening + const dst_info = dest_elem_ty.intInfo(mod); + const src_info = inst_elem_ty.intInfo(mod); + if ((src_info.signedness == dst_info.signedness and dst_info.bits >= src_info.bits) or + // small enough unsigned ints can get casted to large enough signed ints + (dst_info.signedness == .signed and dst_info.bits > src_info.bits)) + { + try sema.requireRuntimeBlock(block, inst_src, null); + return block.addTyOp(.intcast, dest_ty, inst); + } + }, + .Float => if (inst_elem_ty.isRuntimeFloat()) { + // float widening + const src_bits = inst_elem_ty.floatBits(target); + const dst_bits = dest_elem_ty.floatBits(target); + if (dst_bits >= src_bits) { + try sema.requireRuntimeBlock(block, inst_src, null); + return block.addTyOp(.fpext, dest_ty, inst); + } + }, + else => {}, + } + } + const element_vals = try sema.arena.alloc(InternPool.Index, dest_len); const element_refs = try sema.arena.alloc(Air.Inst.Ref, dest_len); var runtime_src: ?LazySrcLoc = null; @@ -37260,7 +37276,7 @@ fn generateUnionTagTypeNumbered( const src_decl = mod.declPtr(block.src_decl); const new_decl_index = try mod.allocateNewDecl(block.namespace, src_decl.src_node, block.wip_capture_scope); errdefer mod.destroyDecl(new_decl_index); - const fqn = try decl.getFullyQualifiedName(mod); + const fqn = try decl.fullyQualifiedName(mod); const name = try ip.getOrPutStringFmt(gpa, "@typeInfo({}).Union.tag_type.?", .{fqn.fmt(ip)}); try mod.initNewAnonDecl(new_decl_index, src_decl.src_line, .{ .ty = Type.noreturn, @@ -37269,7 +37285,6 @@ fn generateUnionTagTypeNumbered( errdefer mod.abortAnonDecl(new_decl_index); const new_decl = mod.declPtr(new_decl_index); - new_decl.name_fully_qualified = true; new_decl.owns_tv = true; new_decl.name_fully_qualified = true; @@ -37310,7 +37325,7 @@ fn generateUnionTagTypeSimple( .val = Value.@"unreachable", }); }; - const fqn = try mod.declPtr(decl_index).getFullyQualifiedName(mod); + const fqn = try mod.declPtr(decl_index).fullyQualifiedName(mod); const src_decl = mod.declPtr(block.src_decl); const new_decl_index = try mod.allocateNewDecl(block.namespace, src_decl.src_node, block.wip_capture_scope); errdefer mod.destroyDecl(new_decl_index); diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 4540724778..781190e13b 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -7223,7 +7223,7 @@ fn getTagNameFunction(func: *CodeGen, enum_ty: Type) InnerError!u32 { defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); - const fqn = ip.stringToSlice(try mod.declPtr(enum_decl_index).getFullyQualifiedName(mod)); + const fqn = ip.stringToSlice(try mod.declPtr(enum_decl_index).fullyQualifiedName(mod)); const func_name = try std.fmt.allocPrintZ(arena, "__zig_tag_name_{s}", .{fqn}); // check if we already generated code for this. diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index f9a291f40b..4ca2ae44bb 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1547,6 +1547,27 @@ fn asmRegisterRegisterMemory( }); } +fn asmRegisterRegisterMemoryRegister( + self: *Self, + tag: Mir.Inst.FixedTag, + reg1: Register, + reg2: Register, + m: Memory, + reg3: Register, +) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = .rrmr, + .data = .{ .rrrx = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .r3 = reg3, + .payload = try self.addExtra(Mir.Memory.encode(m)), + } }, + }); +} + fn asmMemory(self: *Self, tag: Mir.Inst.FixedTag, m: Memory) !void { _ = try self.addInst(.{ .tag = tag[1], @@ -1570,6 +1591,25 @@ fn asmRegisterMemory(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, m: Memo }); } +fn asmRegisterMemoryRegister( + self: *Self, + tag: Mir.Inst.FixedTag, + reg1: Register, + m: Memory, + reg2: Register, +) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = .rmr, + .data = .{ .rrx = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .payload = try self.addExtra(Mir.Memory.encode(m)), + } }, + }); +} + fn asmRegisterMemoryImmediate( self: *Self, tag: Mir.Inst.FixedTag, @@ -2570,7 +2610,8 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt const ExpectedContents = [@typeInfo(RegisterManager.TrackedRegisters).Array.len]RegisterLock; var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = - if (opts.update_tracking) ({}) else std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); + if (opts.update_tracking) + {} else std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); var reg_locks = if (opts.update_tracking) {} else try std.ArrayList(RegisterLock).initCapacity( stack.get(), @@ -2812,11 +2853,14 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { } fn airFpext(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.comp.module.?; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const dst_ty = self.typeOfIndex(inst); - const dst_bits = dst_ty.floatBits(self.target.*); + const dst_scalar_ty = dst_ty.scalarType(mod); + const dst_bits = dst_scalar_ty.floatBits(self.target.*); const src_ty = self.typeOf(ty_op.operand); - const src_bits = src_ty.floatBits(self.target.*); + const src_scalar_ty = src_ty.scalarType(mod); + const src_bits = src_scalar_ty.floatBits(self.target.*); const result = result: { if (switch (src_bits) { @@ -2840,94 +2884,290 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { }, else => unreachable, }) { + if (dst_ty.isVector(mod)) break :result null; var callee_buf: ["__extend?f?f2".len]u8 = undefined; break :result try self.genCall(.{ .lib = .{ - .return_type = self.floatCompilerRtAbiType(dst_ty, src_ty).toIntern(), - .param_types = &.{self.floatCompilerRtAbiType(src_ty, dst_ty).toIntern()}, + .return_type = self.floatCompilerRtAbiType(dst_scalar_ty, src_scalar_ty).toIntern(), + .param_types = &.{self.floatCompilerRtAbiType(src_scalar_ty, dst_scalar_ty).toIntern()}, .callee = std.fmt.bufPrint(&callee_buf, "__extend{c}f{c}f2", .{ floatCompilerRtAbiName(src_bits), floatCompilerRtAbiName(dst_bits), }) catch unreachable, - } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}); + } }, &.{src_scalar_ty}, &.{.{ .air_ref = ty_op.operand }}); } + const src_abi_size: u32 = @intCast(src_ty.abiSize(mod)); const src_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const dst_reg = dst_mcv.getReg().?.to128(); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, @intCast(@max(dst_ty.abiSize(mod), 16))); const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const vec_len = if (dst_ty.isVector(mod)) dst_ty.vectorLen(mod) else 1; if (src_bits == 16) { assert(self.hasFeature(.f16c)); const mat_src_reg = if (src_mcv.isRegister()) src_mcv.getReg().? else try self.copyToTmpRegister(src_ty, src_mcv); - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegister( + .{ .v_ps, .cvtph2 }, + dst_alias, + registerAlias(mat_src_reg, src_abi_size), + ); switch (dst_bits) { 32 => {}, 64 => try self.asmRegisterRegisterRegister( .{ .v_sd, .cvtss2 }, - dst_reg, - dst_reg, - dst_reg, + dst_alias, + dst_alias, + dst_alias, ), else => unreachable, } } else { assert(src_bits == 32 and dst_bits == 64); - if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .{ .v_sd, .cvtss2 }, - dst_reg, - dst_reg, - try src_mcv.mem(self, .dword), - ) else try self.asmRegisterRegisterRegister( - .{ .v_sd, .cvtss2 }, - dst_reg, - dst_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv)).to128(), - ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._sd, .cvtss2 }, - dst_reg, - try src_mcv.mem(self, .dword), + if (self.hasFeature(.avx)) switch (vec_len) { + 1 => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + .{ .v_sd, .cvtss2 }, + dst_alias, + dst_alias, + try src_mcv.mem(self, self.memSize(src_ty)), + ) else try self.asmRegisterRegisterRegister( + .{ .v_sd, .cvtss2 }, + dst_alias, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), + ), + 2...4 => if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_pd, .cvtps2 }, + dst_alias, + try src_mcv.mem(self, self.memSize(src_ty)), + ) else try self.asmRegisterRegister( + .{ .v_pd, .cvtps2 }, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), + ), + else => break :result null, + } else if (src_mcv.isMemory()) try self.asmRegisterMemory( + switch (vec_len) { + 1 => .{ ._sd, .cvtss2 }, + 2 => .{ ._pd, .cvtps2 }, + else => break :result null, + }, + dst_alias, + try src_mcv.mem(self, self.memSize(src_ty)), ) else try self.asmRegisterRegister( - .{ ._sd, .cvtss2 }, - dst_reg, - (if (src_mcv.isRegister()) + switch (vec_len) { + 1 => .{ ._sd, .cvtss2 }, + 2 => .{ ._pd, .cvtps2 }, + else => break :result null, + }, + dst_alias, + registerAlias(if (src_mcv.isRegister()) src_mcv.getReg().? else - try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), ); } break :result dst_mcv; - }; + } orelse return self.fail("TODO implement airFpext from {} to {}", .{ + src_ty.fmt(mod), dst_ty.fmt(mod), + }); return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { const mod = self.bin_file.comp.module.?; const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result: MCValue = result: { - const src_ty = self.typeOf(ty_op.operand); - const src_int_info = src_ty.intInfo(mod); + const src_ty = self.typeOf(ty_op.operand); + const dst_ty = self.typeOfIndex(inst); - const dst_ty = self.typeOfIndex(inst); - const dst_int_info = dst_ty.intInfo(mod); - const abi_size: u32 = @intCast(dst_ty.abiSize(mod)); + const result = @as(?MCValue, result: { + const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod)); - const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty; + const src_int_info = src_ty.intInfo(mod); + const dst_int_info = dst_ty.intInfo(mod); const extend = switch (src_int_info.signedness) { .signed => dst_int_info, .unsigned => src_int_info, }.signedness; const src_mcv = try self.resolveInst(ty_op.operand); + if (dst_ty.isVector(mod)) { + const src_abi_size: u32 = @intCast(src_ty.abiSize(mod)); + const max_abi_size = @max(dst_abi_size, src_abi_size); + if (max_abi_size > @as(u32, if (self.hasFeature(.avx2)) 32 else 16)) break :result null; + const has_avx = self.hasFeature(.avx); + + const dst_elem_abi_size = dst_ty.childType(mod).abiSize(mod); + const src_elem_abi_size = src_ty.childType(mod).abiSize(mod); + switch (math.order(dst_elem_abi_size, src_elem_abi_size)) { + .lt => { + const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) { + else => break :result null, + 1 => switch (src_elem_abi_size) { + else => break :result null, + 2 => switch (dst_int_info.signedness) { + .signed => if (has_avx) .{ .vp_b, .ackssw } else .{ .p_b, .ackssw }, + .unsigned => if (has_avx) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw }, + }, + }, + 2 => switch (src_elem_abi_size) { + else => break :result null, + 4 => switch (dst_int_info.signedness) { + .signed => if (has_avx) .{ .vp_w, .ackssd } else .{ .p_w, .ackssd }, + .unsigned => if (has_avx) + .{ .vp_w, .ackusd } + else if (self.hasFeature(.sse4_1)) + .{ .p_w, .ackusd } + else + break :result null, + }, + }, + }; + + const dst_mcv: MCValue = if (src_mcv.isRegister() and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else if (has_avx and src_mcv.isRegister()) + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } + else + try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, dst_abi_size); + + if (has_avx) try self.asmRegisterRegisterRegister( + mir_tag, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + dst_reg, src_abi_size), + dst_alias, + ) else try self.asmRegisterRegister( + mir_tag, + dst_alias, + dst_alias, + ); + break :result dst_mcv; + }, + .eq => if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + break :result src_mcv + else { + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); + break :result dst_mcv; + }, + .gt => if (self.hasFeature(.sse4_1)) { + const mir_tag: Mir.Inst.FixedTag = .{ switch (dst_elem_abi_size) { + else => break :result null, + 2 => if (has_avx) .vp_w else .p_w, + 4 => if (has_avx) .vp_d else .p_d, + 8 => if (has_avx) .vp_q else .p_q, + }, switch (src_elem_abi_size) { + else => break :result null, + 1 => switch (extend) { + .signed => .movsxb, + .unsigned => .movzxb, + }, + 2 => switch (extend) { + .signed => .movsxw, + .unsigned => .movzxw, + }, + 4 => switch (extend) { + .signed => .movsxd, + .unsigned => .movzxd, + }, + } }; + + const dst_mcv: MCValue = if (src_mcv.isRegister() and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }; + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, dst_abi_size); + + if (src_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + dst_alias, + try src_mcv.mem(self, self.memSize(src_ty)), + ) else try self.asmRegisterRegister( + mir_tag, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), + ); + break :result dst_mcv; + } else { + const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) { + else => break :result null, + 2 => switch (src_elem_abi_size) { + else => break :result null, + 1 => .{ .p_, .unpcklbw }, + }, + 4 => switch (src_elem_abi_size) { + else => break :result null, + 2 => .{ .p_, .unpcklwd }, + }, + 8 => switch (src_elem_abi_size) { + else => break :result null, + 2 => .{ .p_, .unpckldq }, + }, + }; + + const dst_mcv: MCValue = if (src_mcv.isRegister() and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); + const dst_reg = dst_mcv.getReg().?; + + const ext_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); + const ext_alias = registerAlias(ext_reg, src_abi_size); + const ext_lock = self.register_manager.lockRegAssumeUnused(ext_reg); + defer self.register_manager.unlockReg(ext_lock); + + try self.asmRegisterRegister(.{ .p_, .xor }, ext_alias, ext_alias); + switch (extend) { + .signed => try self.asmRegisterRegister( + .{ switch (src_elem_abi_size) { + else => unreachable, + 1 => .p_b, + 2 => .p_w, + 4 => .p_d, + }, .cmpgt }, + ext_alias, + registerAlias(dst_reg, src_abi_size), + ), + .unsigned => {}, + } + try self.asmRegisterRegister( + mir_tag, + registerAlias(dst_reg, dst_abi_size), + registerAlias(ext_reg, dst_abi_size), + ); + break :result dst_mcv; + }, + } + @compileError("unreachable"); + } + + const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty; + const src_storage_bits: u16 = switch (src_mcv) { .register, .register_offset => 64, .register_pair => 128, @@ -2945,13 +3185,13 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { }; if (dst_int_info.bits <= src_int_info.bits) break :result if (dst_mcv.isRegister()) - .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) } + .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) } else dst_mcv; if (dst_mcv.isRegister()) { try self.truncateRegister(src_ty, dst_mcv.getReg().?); - break :result .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) }; + break :result .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) }; } const src_limbs_len = math.divCeil(u16, src_int_info.bits, 64) catch unreachable; @@ -2999,7 +3239,9 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { ); break :result dst_mcv; - }; + }) orelse return self.fail("TODO implement airIntCast from {} to {}", .{ + src_ty.fmt(mod), dst_ty.fmt(mod), + }); return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -3022,7 +3264,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { src_mcv else if (dst_abi_size <= 8) try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv) - else if (dst_abi_size <= 16) dst: { + else if (dst_abi_size <= 16 and !dst_ty.isVector(mod)) dst: { const dst_regs = try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp); const dst_mcv: MCValue = .{ .register_pair = dst_regs }; @@ -3032,26 +3274,29 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); break :dst dst_mcv; } else dst: { - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); + const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, inst, true); + try self.genCopy(src_ty, dst_mcv, src_mcv, .{}); break :dst dst_mcv; }; if (dst_ty.zigTypeTag(mod) == .Vector) { assert(src_ty.zigTypeTag(mod) == .Vector and dst_ty.vectorLen(mod) == src_ty.vectorLen(mod)); - const dst_info = dst_ty.childType(mod).intInfo(mod); - const src_info = src_ty.childType(mod).intInfo(mod); - const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_info.bits) { - 8 => switch (src_info.bits) { - 16 => switch (dst_ty.vectorLen(mod)) { + const dst_elem_ty = dst_ty.childType(mod); + const dst_elem_abi_size: u32 = @intCast(dst_elem_ty.abiSize(mod)); + const src_elem_ty = src_ty.childType(mod); + const src_elem_abi_size: u32 = @intCast(src_elem_ty.abiSize(mod)); + + const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_elem_abi_size) { + 1 => switch (src_elem_abi_size) { + 2 => switch (dst_ty.vectorLen(mod)) { 1...8 => if (self.hasFeature(.avx)) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw }, 9...16 => if (self.hasFeature(.avx2)) .{ .vp_b, .ackusw } else null, else => null, }, else => null, }, - 16 => switch (src_info.bits) { - 32 => switch (dst_ty.vectorLen(mod)) { + 2 => switch (src_elem_abi_size) { + 4 => switch (dst_ty.vectorLen(mod)) { 1...4 => if (self.hasFeature(.avx)) .{ .vp_w, .ackusd } else if (self.hasFeature(.sse4_1)) @@ -3066,12 +3311,14 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { else => null, }) orelse return self.fail("TODO implement airTrunc for {}", .{dst_ty.fmt(mod)}); - const elem_ty = src_ty.childType(mod); - const mask_val = try mod.intValue(elem_ty, @as(u64, math.maxInt(u64)) >> @intCast(64 - dst_info.bits)); + const dst_info = dst_elem_ty.intInfo(mod); + const src_info = src_elem_ty.intInfo(mod); + + const mask_val = try mod.intValue(src_elem_ty, @as(u64, math.maxInt(u64)) >> @intCast(64 - dst_info.bits)); const splat_ty = try mod.vectorType(.{ .len = @intCast(@divExact(@as(u64, if (src_abi_size > 16) 256 else 128), src_info.bits)), - .child = elem_ty.ip_index, + .child = src_elem_ty.ip_index, }); const splat_abi_size: u32 = @intCast(splat_ty.abiSize(mod)); @@ -3086,22 +3333,40 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { else => .{ .register = try self.copyToTmpRegister(Type.usize, splat_mcv.address()) }, }; - const dst_reg = registerAlias(dst_mcv.getReg().?, src_abi_size); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, src_abi_size); if (self.hasFeature(.avx)) { try self.asmRegisterRegisterMemory( .{ .vp_, .@"and" }, - dst_reg, - dst_reg, + dst_alias, + dst_alias, try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)), ); - try self.asmRegisterRegisterRegister(mir_tag, dst_reg, dst_reg, dst_reg); + if (src_abi_size > 16) { + const temp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); + const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); + defer self.register_manager.unlockReg(temp_lock); + + try self.asmRegisterRegisterImmediate( + .{ if (self.hasFeature(.avx2)) .v_i128 else .v_f128, .extract }, + registerAlias(temp_reg, dst_abi_size), + dst_alias, + Immediate.u(1), + ); + try self.asmRegisterRegisterRegister( + mir_tag, + registerAlias(dst_reg, dst_abi_size), + registerAlias(dst_reg, dst_abi_size), + registerAlias(temp_reg, dst_abi_size), + ); + } else try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, dst_alias); } else { try self.asmRegisterMemory( .{ .p_, .@"and" }, - dst_reg, + dst_alias, try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)), ); - try self.asmRegisterRegister(mir_tag, dst_reg, dst_reg); + try self.asmRegisterRegister(mir_tag, dst_alias, dst_alias); } break :result dst_mcv; } @@ -4045,7 +4310,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { if (dst_info.bits > 128 and dst_info.signedness == .unsigned) { const slow_inc = self.hasFeature(.slow_incdec); const abi_size: u32 = @intCast(dst_ty.abiSize(mod)); - const limb_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; + const limb_len = math.divCeil(u32, abi_size, 8) catch unreachable; try self.spillRegisters(&.{ .rax, .rcx, .rdx }); const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx }); @@ -4534,7 +4799,7 @@ fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void { switch (lhs_ty.zigTypeTag(mod)) { .Int => { try self.spillRegisters(&.{.rcx}); - try self.register_manager.getReg(.rcx, null); + try self.register_manager.getKnownReg(.rcx, null); const lhs_mcv = try self.resolveInst(bin_op.lhs); const rhs_mcv = try self.resolveInst(bin_op.rhs); @@ -6560,7 +6825,7 @@ fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type) const dst_mcv: MCValue = .{ .register = .st0 }; if (!std.meta.eql(src_mcv, dst_mcv) or !self.reuseOperand(inst, operand, 0, src_mcv)) - try self.register_manager.getReg(.st0, inst); + try self.register_manager.getKnownReg(.st0, inst); try self.genCopy(ty, dst_mcv, src_mcv, .{}); switch (tag) { @@ -6894,7 +7159,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void { }, else => { const abi_size: u31 = @intCast(ty.abiSize(mod)); - const limb_len = std.math.divCeil(u31, abi_size, 8) catch unreachable; + const limb_len = math.divCeil(u31, abi_size, 8) catch unreachable; const tmp_regs = try self.register_manager.allocRegs(3, .{null} ** 3, abi.RegisterClass.gp); @@ -8181,7 +8446,7 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate( .{ ._, .@"and" }, .cl, - Immediate.u(std.math.maxInt(u6)), + Immediate.u(math.maxInt(u6)), ); try self.asmRegisterImmediate( .{ ._r, .sh }, @@ -8218,7 +8483,7 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate( .{ ._, .@"and" }, .cl, - Immediate.u(std.math.maxInt(u6)), + Immediate.u(math.maxInt(u6)), ); try self.asmRegisterImmediate( .{ ._r, .sh }, @@ -8283,7 +8548,7 @@ fn genShiftBinOpMir( }, .sh }, temp_regs[2].to64(), temp_regs[3].to64(), - Immediate.u(shift_imm & std.math.maxInt(u6)), + Immediate.u(shift_imm & math.maxInt(u6)), ), else => try self.asmRegisterRegisterRegister(.{ switch (tag[0]) { ._l => ._ld, @@ -8338,7 +8603,7 @@ fn genShiftBinOpMir( .immediate => |shift_imm| try self.asmRegisterImmediate( tag, temp_regs[2].to64(), - Immediate.u(shift_imm & std.math.maxInt(u6)), + Immediate.u(shift_imm & math.maxInt(u6)), ), else => try self.asmRegisterRegister(tag, temp_regs[2].to64(), .cl), } @@ -8794,7 +9059,7 @@ fn genShiftBinOp( lhs_ty.fmt(mod), }); - try self.register_manager.getReg(.rcx, null); + try self.register_manager.getKnownReg(.rcx, null); const rcx_lock = self.register_manager.lockReg(.rcx); defer if (rcx_lock) |lock| self.register_manager.unlockReg(lock); @@ -8933,7 +9198,7 @@ fn genMulDivBinOp( switch (tag) { .mul, .mul_wrap => { const slow_inc = self.hasFeature(.slow_incdec); - const limb_len = std.math.divCeil(u32, src_abi_size, 8) catch unreachable; + const limb_len = math.divCeil(u32, src_abi_size, 8) catch unreachable; try self.spillRegisters(&.{ .rax, .rcx, .rdx }); const reg_locks = self.register_manager.lockRegs(3, .{ .rax, .rcx, .rdx }); @@ -9117,8 +9382,8 @@ fn genMulDivBinOp( .rem => maybe_inst, else => null, }; - try self.register_manager.getReg(.rax, track_inst_rax); - try self.register_manager.getReg(.rdx, track_inst_rdx); + try self.register_manager.getKnownReg(.rax, track_inst_rax); + try self.register_manager.getKnownReg(.rdx, track_inst_rdx); try self.genIntMulDivOpMir(switch (signedness) { .signed => switch (tag) { @@ -9158,8 +9423,11 @@ fn genMulDivBinOp( }, .mod => { - try self.register_manager.getReg(.rax, null); - try self.register_manager.getReg(.rdx, if (signedness == .unsigned) maybe_inst else null); + try self.register_manager.getKnownReg(.rax, null); + try self.register_manager.getKnownReg( + .rdx, + if (signedness == .unsigned) maybe_inst else null, + ); switch (signedness) { .signed => { @@ -9200,8 +9468,11 @@ fn genMulDivBinOp( }, .div_floor => { - try self.register_manager.getReg(.rax, if (signedness == .unsigned) maybe_inst else null); - try self.register_manager.getReg(.rdx, null); + try self.register_manager.getKnownReg( + .rax, + if (signedness == .unsigned) maybe_inst else null, + ); + try self.register_manager.getKnownReg(.rdx, null); const lhs_lock: ?RegisterLock = switch (lhs_mcv) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), @@ -9445,7 +9716,7 @@ fn genBinOp( .rem, .mod => unreachable, .max, .min => if (lhs_ty.scalarType(mod).isRuntimeFloat()) registerAlias( if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: { - try self.register_manager.getReg(.xmm0, null); + try self.register_manager.getKnownReg(.xmm0, null); break :mask .xmm0; } else try self.register_manager.allocReg(null, abi.RegisterClass.sse), abi_size, @@ -10820,96 +11091,35 @@ fn genBinOp( lhs_copy_reg.?, mask_reg, ) else { - try self.asmRegisterRegister( - @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) { - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => .{ ._ps, .@"and" }, - 64 => .{ ._pd, .@"and" }, - 16, 80, 128 => null, - else => unreachable, - }, - .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { - .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { - 32 => switch (lhs_ty.vectorLen(mod)) { - 1...4 => .{ ._ps, .@"and" }, - else => null, - }, - 64 => switch (lhs_ty.vectorLen(mod)) { - 1...2 => .{ ._pd, .@"and" }, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, - }, - else => unreachable, - }, + const mir_fixes = @as(?Mir.Inst.Fixes, switch (lhs_ty.zigTypeTag(mod)) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => ._ps, + 64 => ._pd, + 16, 80, 128 => null, else => unreachable, - }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(air_tag), lhs_ty.fmt(mod), - }), - dst_reg, - mask_reg, - ); - try self.asmRegisterRegister( - @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) { - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => .{ ._ps, .andn }, - 64 => .{ ._pd, .andn }, - 16, 80, 128 => null, - else => unreachable, - }, - .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { - .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { - 32 => switch (lhs_ty.vectorLen(mod)) { - 1...4 => .{ ._ps, .andn }, - else => null, - }, - 64 => switch (lhs_ty.vectorLen(mod)) { - 1...2 => .{ ._pd, .andn }, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, + }, + .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen(mod)) { + 1...4 => ._ps, + else => null, }, - else => unreachable, - }, - else => unreachable, - }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(air_tag), lhs_ty.fmt(mod), - }), - mask_reg, - lhs_copy_reg.?, - ); - try self.asmRegisterRegister( - @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) { - .Float => switch (lhs_ty.floatBits(self.target.*)) { - 32 => .{ ._ps, .@"or" }, - 64 => .{ ._pd, .@"or" }, - 16, 80, 128 => null, - else => unreachable, - }, - .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { - .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { - 32 => switch (lhs_ty.vectorLen(mod)) { - 1...4 => .{ ._ps, .@"or" }, - else => null, - }, - 64 => switch (lhs_ty.vectorLen(mod)) { - 1...2 => .{ ._pd, .@"or" }, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, + 64 => switch (lhs_ty.vectorLen(mod)) { + 1...2 => ._pd, + else => null, }, + 16, 80, 128 => null, else => unreachable, }, else => unreachable, - }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(air_tag), lhs_ty.fmt(mod), - }), - dst_reg, - mask_reg, - ); + }, + else => unreachable, + }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(mod), + }); + try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_reg, mask_reg); + try self.asmRegisterRegister(.{ mir_fixes, .andn }, mask_reg, lhs_copy_reg.?); + try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_reg, mask_reg); } }, .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => { @@ -12192,9 +12402,36 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const mod = self.bin_file.comp.module.?; const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - const ty = self.typeOf(bin_op.lhs); + var ty = self.typeOf(bin_op.lhs); + var null_compare: ?Mir.Inst.Index = null; const result: Condition = result: { + try self.spillEflagsIfOccupied(); + + const lhs_mcv = try self.resolveInst(bin_op.lhs); + const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) { + .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null }, + .register_pair => |lhs_regs| locks: { + const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs); + break :locks .{ locks[0], locks[1] }; + }, + .register_offset => |lhs_ro| .{ + self.register_manager.lockRegAssumeUnused(lhs_ro.reg), + null, + }, + else => .{null} ** 2, + }; + defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock); + + const rhs_mcv = try self.resolveInst(bin_op.rhs); + const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) { + .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null }, + .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs), + .register_offset => |rhs_ro| .{ self.register_manager.lockReg(rhs_ro.reg), null }, + else => .{null} ** 2, + }; + defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock); + switch (ty.zigTypeTag(mod)) { .Float => { const float_bits = ty.floatBits(self.target.*); @@ -12231,34 +12468,66 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { }; } }, - else => {}, - } + .Optional => if (!ty.optionalReprIsPayload(mod)) { + const opt_ty = ty; + const opt_abi_size: u31 = @intCast(opt_ty.abiSize(mod)); + ty = opt_ty.optionalChild(mod); + const payload_abi_size: u31 = @intCast(ty.abiSize(mod)); - try self.spillEflagsIfOccupied(); + const temp_lhs_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); + const temp_lhs_lock = self.register_manager.lockRegAssumeUnused(temp_lhs_reg); + defer self.register_manager.unlockReg(temp_lhs_lock); - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) { - .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null }, - .register_pair => |lhs_regs| locks: { - const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs); - break :locks .{ locks[0], locks[1] }; - }, - .register_offset => |lhs_ro| .{ - self.register_manager.lockRegAssumeUnused(lhs_ro.reg), - null, - }, - else => .{null} ** 2, - }; - defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock); + if (lhs_mcv.isMemory()) try self.asmRegisterMemory( + .{ ._, .mov }, + temp_lhs_reg.to8(), + try lhs_mcv.address().offset(payload_abi_size).deref().mem(self, .byte), + ) else { + try self.genSetReg(temp_lhs_reg, opt_ty, lhs_mcv, .{}); + try self.asmRegisterImmediate( + .{ ._r, .sh }, + registerAlias(temp_lhs_reg, opt_abi_size), + Immediate.u(payload_abi_size * 8), + ); + } - const rhs_mcv = try self.resolveInst(bin_op.rhs); - const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) { - .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null }, - .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs), - .register_offset => |rhs_ro| .{ self.register_manager.lockReg(rhs_ro.reg), null }, - else => .{null} ** 2, - }; - defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock); + const payload_compare = payload_compare: { + if (rhs_mcv.isMemory()) { + const rhs_mem = + try rhs_mcv.address().offset(payload_abi_size).deref().mem(self, .byte); + try self.asmMemoryRegister(.{ ._, .@"test" }, rhs_mem, temp_lhs_reg.to8()); + const payload_compare = try self.asmJccReloc(.nz, undefined); + try self.asmRegisterMemory(.{ ._, .cmp }, temp_lhs_reg.to8(), rhs_mem); + break :payload_compare payload_compare; + } + + const temp_rhs_reg = try self.copyToTmpRegister(opt_ty, rhs_mcv); + const temp_rhs_lock = self.register_manager.lockRegAssumeUnused(temp_rhs_reg); + defer self.register_manager.unlockReg(temp_rhs_lock); + + try self.asmRegisterImmediate( + .{ ._r, .sh }, + registerAlias(temp_rhs_reg, opt_abi_size), + Immediate.u(payload_abi_size * 8), + ); + try self.asmRegisterRegister( + .{ ._, .@"test" }, + temp_lhs_reg.to8(), + temp_rhs_reg.to8(), + ); + const payload_compare = try self.asmJccReloc(.nz, undefined); + try self.asmRegisterRegister( + .{ ._, .cmp }, + temp_lhs_reg.to8(), + temp_rhs_reg.to8(), + ); + break :payload_compare payload_compare; + }; + null_compare = try self.asmJmpReloc(undefined); + self.performReloc(payload_compare); + }, + else => {}, + } switch (ty.zigTypeTag(mod)) { else => { @@ -12571,6 +12840,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { } }; + if (null_compare) |reloc| self.performReloc(reloc); self.eflags_inst = inst; return self.finishAir(inst, .{ .eflags = result }, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -13521,6 +13791,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } else if (constraint.len == 1 and std.ascii.isDigit(constraint[0])) arg: { const index = std.fmt.charToDigit(constraint[0], 10) catch unreachable; if (index >= args.items.len) return self.fail("constraint out of bounds: '{s}'", .{constraint}); + try self.genCopy(ty, args.items[index], input_mcv, .{}); break :arg args.items[index]; } else return self.fail("invalid constraint: '{s}'", .{constraint}); if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| { @@ -13619,25 +13890,26 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { label_gop.value_ptr.target = @intCast(self.mir_instructions.len); } else continue; - var mnem_size: ?Memory.Size = null; - const mnem_tag = mnem: { - mnem_size = if (mem.endsWith(u8, mnem_str, "b")) - .byte - else if (mem.endsWith(u8, mnem_str, "w")) - .word - else if (mem.endsWith(u8, mnem_str, "l")) - .dword - else if (mem.endsWith(u8, mnem_str, "q")) - .qword - else if (mem.endsWith(u8, mnem_str, "t")) - .tbyte - else - break :mnem null; - break :mnem std.meta.stringToEnum(Instruction.Mnemonic, mnem_str[0 .. mnem_str.len - 1]); - } orelse mnem: { + var mnem_size: ?Memory.Size = if (mem.endsWith(u8, mnem_str, "b")) + .byte + else if (mem.endsWith(u8, mnem_str, "w")) + .word + else if (mem.endsWith(u8, mnem_str, "l")) + .dword + else if (mem.endsWith(u8, mnem_str, "q") and + (std.mem.indexOfScalar(u8, "vp", mnem_str[0]) == null or !mem.endsWith(u8, mnem_str, "dq"))) + .qword + else if (mem.endsWith(u8, mnem_str, "t")) + .tbyte + else + null; + const mnem_tag = while (true) break std.meta.stringToEnum( + Instruction.Mnemonic, + mnem_str[0 .. mnem_str.len - @intFromBool(mnem_size != null)], + ) orelse if (mnem_size) |_| { mnem_size = null; - break :mnem std.meta.stringToEnum(Instruction.Mnemonic, mnem_str); - } orelse return self.fail("invalid mnemonic: '{s}'", .{mnem_str}); + continue; + } else return self.fail("invalid mnemonic: '{s}'", .{mnem_str}); if (@as(?Memory.Size, switch (mnem_tag) { .clflush => .byte, .fldenv, .fnstenv, .fstenv => .none, @@ -14135,30 +14407,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo else => {}, }, .Int => switch (ty.childType(mod).intInfo(mod).bits) { - 8 => switch (ty.vectorLen(mod)) { - 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{ - .insert = .{ .vp_b, .insr }, - .extract = .{ .vp_b, .extr }, - } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{ - .insert = .{ .p_b, .insr }, - .extract = .{ .p_b, .extr }, - } }, - 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ - .insert = .{ .vp_w, .insr }, - .extract = .{ .vp_w, .extr }, - } } else .{ .insert_extract = .{ - .insert = .{ .p_w, .insr }, - .extract = .{ .p_w, .extr }, - } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } - else - .{ ._d, .mov } }, - 5...8 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 9...16 => return .{ .move = if (self.hasFeature(.avx)) + 1...8 => switch (ty.vectorLen(mod)) { + 1...16 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 17...32 => if (self.hasFeature(.avx)) @@ -14168,23 +14418,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .{ .v_, .movdqu } }, else => {}, }, - 16 => switch (ty.vectorLen(mod)) { - 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ - .insert = .{ .vp_w, .insr }, - .extract = .{ .vp_w, .extr }, - } } else .{ .insert_extract = .{ - .insert = .{ .p_w, .insr }, - .extract = .{ .p_w, .extr }, - } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } - else - .{ ._d, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 5...8 => return .{ .move = if (self.hasFeature(.avx)) + 9...16 => switch (ty.vectorLen(mod)) { + 1...8 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 9...16 => if (self.hasFeature(.avx)) @@ -14194,16 +14429,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .{ .v_, .movdqu } }, else => {}, }, - 32 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } - else - .{ ._d, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) + 17...32 => switch (ty.vectorLen(mod)) { + 1...4 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 5...8 => if (self.hasFeature(.avx)) @@ -14213,12 +14440,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .{ .v_, .movdqu } }, else => {}, }, - 64 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) + 33...64 => switch (ty.vectorLen(mod)) { + 1...2 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 3...4 => if (self.hasFeature(.avx)) @@ -14228,7 +14451,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .{ .v_, .movdqu } }, else => {}, }, - 128 => switch (ty.vectorLen(mod)) { + 65...128 => switch (ty.vectorLen(mod)) { 1 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, @@ -14239,7 +14462,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo .{ .v_, .movdqu } }, else => {}, }, - 256 => switch (ty.vectorLen(mod)) { + 129...256 => switch (ty.vectorLen(mod)) { 1 => if (self.hasFeature(.avx)) return .{ .move = if (aligned) .{ .v_, .movdqa } @@ -14251,11 +14474,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo }, .Pointer, .Optional => if (ty.childType(mod).isPtrAtRuntime(mod)) switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) + 1...2 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 3...4 => if (self.hasFeature(.avx)) @@ -14269,22 +14488,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo unreachable, .Float => switch (ty.childType(mod).floatBits(self.target.*)) { 16 => switch (ty.vectorLen(mod)) { - 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ - .insert = .{ .vp_w, .insr }, - .extract = .{ .vp_w, .extr }, - } } else .{ .insert_extract = .{ - .insert = .{ .p_w, .insr }, - .extract = .{ .p_w, .extr }, - } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_d, .mov } - else - .{ ._d, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_q, .mov } - else - .{ ._q, .mov } }, - 5...8 => return .{ .move = if (self.hasFeature(.avx)) + 1...8 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 9...16 => if (self.hasFeature(.avx)) @@ -14295,15 +14499,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo else => {}, }, 32 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } - else - .{ ._ss, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } - else - .{ ._sd, .mov } }, - 3...4 => return .{ .move = if (self.hasFeature(.avx)) + 1...4 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, 5...8 => if (self.hasFeature(.avx)) @@ -14314,11 +14510,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo else => {}, }, 64 => switch (ty.vectorLen(mod)) { - 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } - else - .{ ._sd, .mov } }, - 2 => return .{ .move = if (self.hasFeature(.avx)) + 1...2 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } }, 3...4 => if (self.hasFeature(.avx)) @@ -14633,7 +14825,7 @@ fn genSetReg( ty, dst_reg.class(), self.getFrameAddrAlignment(frame_addr).compare(.gte, Alignment.fromLog2Units( - std.math.log2_int_ceil(u10, @divExact(dst_reg.bitSize(), 8)), + math.log2_int_ceil(u10, @divExact(dst_reg.bitSize(), 8)), )), ), .lea_frame => .{ .move = .{ ._, .lea } }, @@ -16296,7 +16488,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { }, 65...128 => switch (vector_len) { else => null, - 1...2 => .{ .vp_i128, .broadcast }, + 1...2 => .{ .v_i128, .broadcast }, }, }) orelse break :avx2; @@ -16310,7 +16502,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))), try src_mcv.mem(self, self.memSize(scalar_ty)), ) else { - if (mir_tag[0] == .vp_i128) break :avx2; + if (mir_tag[0] == .v_i128) break :avx2; try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{}); try self.asmRegisterRegister( mir_tag, @@ -16352,7 +16544,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { .{ if (self.hasFeature(.avx)) .vp_w else .p_w, .shufl }, dst_alias, dst_alias, - Immediate.u(0), + Immediate.u(0b00_00_00_00), ); if (switch (scalar_bits) { 1...8 => vector_len > 4, @@ -16563,18 +16755,1158 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void { } fn airSelect(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.comp.module.?; const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; const extra = self.air.extraData(Air.Bin, pl_op.payload).data; - _ = extra; - return self.fail("TODO implement airSelect for x86_64", .{}); - //return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs }); + const ty = self.typeOfIndex(inst); + const vec_len = ty.vectorLen(mod); + const elem_ty = ty.childType(mod); + const elem_abi_size: u32 = @intCast(elem_ty.abiSize(mod)); + const abi_size: u32 = @intCast(ty.abiSize(mod)); + const pred_ty = self.typeOf(pl_op.operand); + + const result = result: { + const has_blend = self.hasFeature(.sse4_1); + const has_avx = self.hasFeature(.avx); + const need_xmm0 = has_blend and !has_avx; + const pred_mcv = try self.resolveInst(pl_op.operand); + const mask_reg = mask: { + switch (pred_mcv) { + .register => |pred_reg| switch (pred_reg.class()) { + .general_purpose => {}, + .sse => if (need_xmm0 and pred_reg.id() != comptime Register.xmm0.id()) { + try self.register_manager.getKnownReg(.xmm0, null); + try self.genSetReg(.xmm0, pred_ty, pred_mcv, .{}); + break :mask .xmm0; + } else break :mask if (has_blend) + pred_reg + else + try self.copyToTmpRegister(pred_ty, pred_mcv), + else => unreachable, + }, + else => {}, + } + const mask_reg: Register = if (need_xmm0) mask_reg: { + try self.register_manager.getKnownReg(.xmm0, null); + break :mask_reg .xmm0; + } else try self.register_manager.allocReg(null, abi.RegisterClass.sse); + const mask_alias = registerAlias(mask_reg, abi_size); + const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg); + defer self.register_manager.unlockReg(mask_lock); + + const pred_fits_in_elem = vec_len <= elem_abi_size; + if (self.hasFeature(.avx2) and abi_size <= 32) { + if (pred_mcv.isRegister()) broadcast: { + try self.asmRegisterRegister( + .{ .v_d, .mov }, + mask_reg.to128(), + pred_mcv.getReg().?.to32(), + ); + if (pred_fits_in_elem and vec_len > 1) try self.asmRegisterRegister( + .{ switch (elem_abi_size) { + 1 => .vp_b, + 2 => .vp_w, + 3...4 => .vp_d, + 5...8 => .vp_q, + 9...16 => { + try self.asmRegisterRegisterRegisterImmediate( + .{ .v_f128, .insert }, + mask_alias, + mask_alias, + mask_reg.to128(), + Immediate.u(1), + ); + break :broadcast; + }, + 17...32 => break :broadcast, + else => unreachable, + }, .broadcast }, + mask_alias, + mask_reg.to128(), + ); + } else try self.asmRegisterMemory( + .{ switch (vec_len) { + 1...8 => .vp_b, + 9...16 => .vp_w, + 17...32 => .vp_d, + else => unreachable, + }, .broadcast }, + mask_alias, + if (pred_mcv.isMemory()) try pred_mcv.mem(self, .byte) else .{ + .base = .{ .reg = (try self.copyToTmpRegister( + Type.usize, + pred_mcv.address(), + )).to64() }, + .mod = .{ .rm = .{ .size = .byte } }, + }, + ); + } else if (abi_size <= 16) broadcast: { + try self.asmRegisterRegister( + .{ if (has_avx) .v_d else ._d, .mov }, + mask_alias, + (if (pred_mcv.isRegister()) + pred_mcv.getReg().? + else + try self.copyToTmpRegister(pred_ty, pred_mcv.address())).to32(), + ); + if (!pred_fits_in_elem or vec_len == 1) break :broadcast; + if (elem_abi_size <= 1) { + if (has_avx) try self.asmRegisterRegisterRegister( + .{ .vp_, .unpcklbw }, + mask_alias, + mask_alias, + mask_alias, + ) else try self.asmRegisterRegister( + .{ .p_, .unpcklbw }, + mask_alias, + mask_alias, + ); + if (abi_size <= 2) break :broadcast; + } + if (elem_abi_size <= 2) { + try self.asmRegisterRegisterImmediate( + .{ if (has_avx) .vp_w else .p_w, .shufl }, + mask_alias, + mask_alias, + Immediate.u(0b00_00_00_00), + ); + if (abi_size <= 8) break :broadcast; + } + try self.asmRegisterRegisterImmediate( + .{ if (has_avx) .vp_d else .p_d, .shuf }, + mask_alias, + mask_alias, + Immediate.u(switch (elem_abi_size) { + 1...2, 5...8 => 0b01_00_01_00, + 3...4 => 0b00_00_00_00, + else => unreachable, + }), + ); + } else return self.fail("TODO implement airSelect for {}", .{ty.fmt(mod)}); + const elem_bits: u16 = @intCast(elem_abi_size * 8); + const mask_elem_ty = try mod.intType(.unsigned, elem_bits); + const mask_ty = try mod.vectorType(.{ .len = vec_len, .child = mask_elem_ty.toIntern() }); + if (!pred_fits_in_elem) if (self.hasFeature(.ssse3)) { + var mask_elems: [32]InternPool.Index = undefined; + for (mask_elems[0..vec_len], 0..) |*elem, bit| elem.* = try mod.intern(.{ .int = .{ + .ty = mask_elem_ty.toIntern(), + .storage = .{ .u64 = bit / elem_bits }, + } }); + const mask_mcv = try self.genTypedValue(.{ + .ty = mask_ty, + .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{ + .ty = mask_ty.toIntern(), + .storage = .{ .elems = mask_elems[0..vec_len] }, + } })), + }); + const mask_mem: Memory = .{ + .base = .{ .reg = try self.copyToTmpRegister(Type.usize, mask_mcv.address()) }, + .mod = .{ .rm = .{ .size = self.memSize(ty) } }, + }; + if (has_avx) try self.asmRegisterRegisterMemory( + .{ .vp_b, .shuf }, + mask_alias, + mask_alias, + mask_mem, + ) else try self.asmRegisterMemory( + .{ .p_b, .shuf }, + mask_alias, + mask_mem, + ); + } else return self.fail("TODO implement airSelect for {}", .{ty.fmt(mod)}); + { + var mask_elems: [32]InternPool.Index = undefined; + for (mask_elems[0..vec_len], 0..) |*elem, bit| elem.* = try mod.intern(.{ .int = .{ + .ty = mask_elem_ty.toIntern(), + .storage = .{ .u64 = @as(u32, 1) << @intCast(bit & (elem_bits - 1)) }, + } }); + const mask_mcv = try self.genTypedValue(.{ + .ty = mask_ty, + .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{ + .ty = mask_ty.toIntern(), + .storage = .{ .elems = mask_elems[0..vec_len] }, + } })), + }); + const mask_mem: Memory = .{ + .base = .{ .reg = try self.copyToTmpRegister(Type.usize, mask_mcv.address()) }, + .mod = .{ .rm = .{ .size = self.memSize(ty) } }, + }; + if (has_avx) { + try self.asmRegisterRegisterMemory( + .{ .vp_, .@"and" }, + mask_alias, + mask_alias, + mask_mem, + ); + try self.asmRegisterRegisterMemory( + .{ .vp_d, .cmpeq }, + mask_alias, + mask_alias, + mask_mem, + ); + } else { + try self.asmRegisterMemory( + .{ .p_, .@"and" }, + mask_alias, + mask_mem, + ); + try self.asmRegisterMemory( + .{ .p_d, .cmpeq }, + mask_alias, + mask_mem, + ); + } + } + break :mask mask_reg; + }; + const mask_alias = registerAlias(mask_reg, abi_size); + const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg); + defer self.register_manager.unlockReg(mask_lock); + + const lhs_mcv = try self.resolveInst(extra.lhs); + const lhs_lock = switch (lhs_mcv) { + .register => |lhs_reg| self.register_manager.lockRegAssumeUnused(lhs_reg), + else => null, + }; + defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); + + const rhs_mcv = try self.resolveInst(extra.rhs); + const rhs_lock = switch (rhs_mcv) { + .register => |rhs_reg| self.register_manager.lockReg(rhs_reg), + else => null, + }; + defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); + + const reuse_mcv = if (has_blend) rhs_mcv else lhs_mcv; + const dst_mcv: MCValue = if (reuse_mcv.isRegister() and self.reuseOperand( + inst, + if (has_blend) extra.rhs else extra.lhs, + @intFromBool(has_blend), + reuse_mcv, + )) reuse_mcv else if (has_avx) + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } + else + try self.copyToRegisterWithInstTracking(inst, ty, reuse_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, abi_size); + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.childType(mod).zigTypeTag(mod)) { + else => null, + .Int => switch (abi_size) { + 0 => unreachable, + 1...16 => if (has_avx) + .{ .vp_b, .blendv } + else if (has_blend) + .{ .p_b, .blendv } + else + .{ .p_, undefined }, + 17...32 => if (self.hasFeature(.avx2)) + .{ .vp_b, .blendv } + else + null, + else => null, + }, + .Float => switch (ty.childType(mod).floatBits(self.target.*)) { + else => unreachable, + 16, 80, 128 => null, + 32 => switch (vec_len) { + 0 => unreachable, + 1...4 => if (has_avx) .{ .v_ps, .blendv } else .{ ._ps, .blendv }, + 5...8 => if (has_avx) .{ .v_ps, .blendv } else null, + else => null, + }, + 64 => switch (vec_len) { + 0 => unreachable, + 1...2 => if (has_avx) .{ .v_pd, .blendv } else .{ ._pd, .blendv }, + 3...4 => if (has_avx) .{ .v_pd, .blendv } else null, + else => null, + }, + }, + }) orelse return self.fail("TODO implement airSelect for {}", .{ty.fmt(mod)}); + if (has_avx) { + const rhs_alias = if (rhs_mcv.isRegister()) + registerAlias(rhs_mcv.getReg().?, abi_size) + else rhs: { + try self.genSetReg(dst_reg, ty, rhs_mcv, .{}); + break :rhs dst_alias; + }; + if (lhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryRegister( + mir_tag, + dst_alias, + rhs_alias, + try lhs_mcv.mem(self, self.memSize(ty)), + mask_alias, + ) else try self.asmRegisterRegisterRegisterRegister( + mir_tag, + dst_alias, + rhs_alias, + registerAlias(if (lhs_mcv.isRegister()) + lhs_mcv.getReg().? + else + try self.copyToTmpRegister(ty, lhs_mcv), abi_size), + mask_alias, + ); + } else if (has_blend) if (lhs_mcv.isMemory()) try self.asmRegisterMemoryRegister( + mir_tag, + dst_alias, + try lhs_mcv.mem(self, self.memSize(ty)), + mask_alias, + ) else try self.asmRegisterRegisterRegister( + mir_tag, + dst_alias, + registerAlias(if (lhs_mcv.isRegister()) + lhs_mcv.getReg().? + else + try self.copyToTmpRegister(ty, lhs_mcv), abi_size), + mask_alias, + ) else { + const mir_fixes = @as(?Mir.Inst.Fixes, switch (elem_ty.zigTypeTag(mod)) { + else => null, + .Int => .p_, + .Float => switch (elem_ty.floatBits(self.target.*)) { + 32 => ._ps, + 64 => ._pd, + 16, 80, 128 => null, + else => unreachable, + }, + }) orelse return self.fail("TODO implement airSelect for {}", .{ty.fmt(mod)}); + try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias); + if (rhs_mcv.isMemory()) try self.asmRegisterMemory( + .{ mir_fixes, .andn }, + mask_alias, + try rhs_mcv.mem(self, Memory.Size.fromSize(abi_size)), + ) else try self.asmRegisterRegister( + .{ mir_fixes, .andn }, + mask_alias, + if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(ty, rhs_mcv), + ); + try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_alias, mask_alias); + } + break :result dst_mcv; + }; + return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs }); } fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.comp.module.?; const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - _ = ty_pl; - return self.fail("TODO implement airShuffle for x86_64", .{}); - //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data; + + const dst_ty = self.typeOfIndex(inst); + const elem_ty = dst_ty.childType(mod); + const elem_abi_size: u16 = @intCast(elem_ty.abiSize(mod)); + const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod)); + const lhs_ty = self.typeOf(extra.a); + const lhs_abi_size: u32 = @intCast(lhs_ty.abiSize(mod)); + const rhs_ty = self.typeOf(extra.b); + const rhs_abi_size: u32 = @intCast(rhs_ty.abiSize(mod)); + const max_abi_size = @max(dst_abi_size, lhs_abi_size, rhs_abi_size); + + const ExpectedContents = [32]?i32; + var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) = + std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa); + const allocator = stack.get(); + + const mask_elems = try allocator.alloc(?i32, extra.mask_len); + defer allocator.free(mask_elems); + for (mask_elems, 0..) |*mask_elem, elem_index| { + const mask_elem_val = + Value.fromInterned(extra.mask).elemValue(mod, elem_index) catch unreachable; + mask_elem.* = if (mask_elem_val.isUndef(mod)) + null + else + @intCast(mask_elem_val.toSignedInt(mod)); + } + + const has_avx = self.hasFeature(.avx); + const result = @as(?MCValue, result: { + for (mask_elems) |mask_elem| { + if (mask_elem) |_| break; + } else break :result try self.allocRegOrMem(inst, true); + + for (mask_elems, 0..) |mask_elem, elem_index| { + if (mask_elem orelse continue != elem_index) break; + } else { + const lhs_mcv = try self.resolveInst(extra.a); + if (self.reuseOperand(inst, extra.a, 0, lhs_mcv)) break :result lhs_mcv; + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy(dst_ty, dst_mcv, lhs_mcv, .{}); + break :result dst_mcv; + } + + for (mask_elems, 0..) |mask_elem, elem_index| { + if (~(mask_elem orelse continue) != elem_index) break; + } else { + const rhs_mcv = try self.resolveInst(extra.b); + if (self.reuseOperand(inst, extra.b, 1, rhs_mcv)) break :result rhs_mcv; + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy(dst_ty, dst_mcv, rhs_mcv, .{}); + break :result dst_mcv; + } + + for ([_]Mir.Inst.Tag{ .unpckl, .unpckh }) |variant| unpck: { + if (elem_abi_size > 8) break :unpck; + if (dst_abi_size > @as(u32, if (if (elem_abi_size >= 4) + has_avx + else + self.hasFeature(.avx2)) 32 else 16)) break :unpck; + + var sources = [1]?u1{null} ** 2; + for (mask_elems, 0..) |maybe_mask_elem, elem_index| { + const mask_elem = maybe_mask_elem orelse continue; + const mask_elem_index = + math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :unpck; + const elem_byte = (elem_index >> 1) * elem_abi_size; + if (mask_elem_index * elem_abi_size != (elem_byte & 0b0111) | @as(u4, switch (variant) { + .unpckl => 0b0000, + .unpckh => 0b1000, + else => unreachable, + }) | (elem_byte << 1 & 0b10000)) break :unpck; + + const source = @intFromBool(mask_elem < 0); + if (sources[elem_index & 0b00001]) |prev_source| { + if (source != prev_source) break :unpck; + } else sources[elem_index & 0b00001] = source; + } + if (sources[0] orelse break :unpck == sources[1] orelse break :unpck) break :unpck; + + const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; + const operand_tys = [2]Type{ lhs_ty, rhs_ty }; + const lhs_mcv = try self.resolveInst(operands[sources[0].?]); + const rhs_mcv = try self.resolveInst(operands[sources[1].?]); + + const dst_mcv: MCValue = if (lhs_mcv.isRegister() and + self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) + lhs_mcv + else if (has_avx and lhs_mcv.isRegister()) + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } + else + try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, max_abi_size); + + const mir_tag: Mir.Inst.FixedTag = if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or + (dst_abi_size > 16 and !self.hasFeature(.avx2))) .{ switch (elem_abi_size) { + 4 => if (has_avx) .v_ps else ._ps, + 8 => if (has_avx) .v_pd else ._pd, + else => unreachable, + }, variant } else .{ if (has_avx) .vp_ else .p_, switch (variant) { + .unpckl => switch (elem_abi_size) { + 1 => .unpcklbw, + 2 => .unpcklwd, + 4 => .unpckldq, + 8 => .unpcklqdq, + else => unreachable, + }, + .unpckh => switch (elem_abi_size) { + 1 => .unpckhbw, + 2 => .unpckhwd, + 4 => .unpckhdq, + 8 => .unpckhqdq, + else => unreachable, + }, + else => unreachable, + } }; + if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemory( + mir_tag, + dst_alias, + registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), + try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + ) else try self.asmRegisterRegisterRegister( + mir_tag, + dst_alias, + registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), + ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + dst_alias, + try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + ) else try self.asmRegisterRegister( + mir_tag, + dst_alias, + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), + ); + break :result dst_mcv; + } + + pshufd: { + if (elem_abi_size != 4) break :pshufd; + if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :pshufd; + + var control: u8 = 0b00_00_00_00; + var sources = [1]?u1{null} ** 1; + for (mask_elems, 0..) |maybe_mask_elem, elem_index| { + const mask_elem = maybe_mask_elem orelse continue; + const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); + if (mask_elem_index & 0b100 != elem_index & 0b100) break :pshufd; + + const source = @intFromBool(mask_elem < 0); + if (sources[0]) |prev_source| { + if (source != prev_source) break :pshufd; + } else sources[(elem_index & 0b010) >> 1] = source; + + const select_bit: u3 = @intCast((elem_index & 0b011) << 1); + const select = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; + if (elem_index & 0b100 == 0) + control |= select + else if (control & @as(u8, 0b11) << select_bit != select) break :pshufd; + } + + const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; + const operand_tys = [2]Type{ lhs_ty, rhs_ty }; + const src_mcv = try self.resolveInst(operands[sources[0] orelse break :pshufd]); + + const dst_reg = if (src_mcv.isRegister() and + self.reuseOperand(inst, operands[sources[0].?], sources[0].?, src_mcv)) + src_mcv.getReg().? + else + try self.register_manager.allocReg(inst, abi.RegisterClass.sse); + const dst_alias = registerAlias(dst_reg, max_abi_size); + + if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + .{ if (has_avx) .vp_d else .p_d, .shuf }, + dst_alias, + try src_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + Immediate.u(control), + ) else try self.asmRegisterRegisterImmediate( + .{ if (has_avx) .vp_d else .p_d, .shuf }, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(operand_tys[sources[0].?], src_mcv), max_abi_size), + Immediate.u(control), + ); + break :result .{ .register = dst_reg }; + } + + shufps: { + if (elem_abi_size != 4) break :shufps; + if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufps; + + var control: u8 = 0b00_00_00_00; + var sources = [1]?u1{null} ** 2; + for (mask_elems, 0..) |maybe_mask_elem, elem_index| { + const mask_elem = maybe_mask_elem orelse continue; + const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); + if (mask_elem_index & 0b100 != elem_index & 0b100) break :shufps; + + const source = @intFromBool(mask_elem < 0); + if (sources[(elem_index & 0b010) >> 1]) |prev_source| { + if (source != prev_source) break :shufps; + } else sources[(elem_index & 0b010) >> 1] = source; + + const select_bit: u3 = @intCast((elem_index & 0b011) << 1); + const select = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit; + if (elem_index & 0b100 == 0) + control |= select + else if (control & @as(u8, 0b11) << select_bit != select) break :shufps; + } + if (sources[0] orelse break :shufps == sources[1] orelse break :shufps) break :shufps; + + const operands = [2]Air.Inst.Ref{ extra.a, extra.b }; + const operand_tys = [2]Type{ lhs_ty, rhs_ty }; + const lhs_mcv = try self.resolveInst(operands[sources[0].?]); + const rhs_mcv = try self.resolveInst(operands[sources[1].?]); + + const dst_mcv: MCValue = if (lhs_mcv.isRegister() and + self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) + lhs_mcv + else if (has_avx and lhs_mcv.isRegister()) + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } + else + try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, max_abi_size); + + if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .{ .v_ps, .shuf }, + dst_alias, + registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), + try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + Immediate.u(control), + ) else try self.asmRegisterRegisterRegisterImmediate( + .{ .v_ps, .shuf }, + dst_alias, + registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), + Immediate.u(control), + ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + .{ ._ps, .shuf }, + dst_alias, + try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + Immediate.u(control), + ) else try self.asmRegisterRegisterImmediate( + .{ ._ps, .shuf }, + dst_alias, + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), + Immediate.u(control), + ); + break :result dst_mcv; + } + + shufpd: { + if (elem_abi_size != 8) break :shufpd; + if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufpd; + + var control: u4 = 0b0_0_0_0; + var sources = [1]?u1{null} ** 2; + for (mask_elems, 0..) |maybe_mask_elem, elem_index| { + const mask_elem = maybe_mask_elem orelse continue; + const mask_elem_index: u2 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem); + if (mask_elem_index & 0b10 != elem_index & 0b10) break :shufpd; + + const source = @intFromBool(mask_elem < 0); + if (sources[elem_index & 0b01]) |prev_source| { + if (source != prev_source) break :shufpd; + } else sources[elem_index & 0b01] = source; + + control |= @as(u4, @intCast(mask_elem_index & 0b01)) << @intCast(elem_index); + } + if (sources[0] orelse break :shufpd == sources[1] orelse break :shufpd) break :shufpd; + + const operands: [2]Air.Inst.Ref = .{ extra.a, extra.b }; + const operand_tys: [2]Type = .{ lhs_ty, rhs_ty }; + const lhs_mcv = try self.resolveInst(operands[sources[0].?]); + const rhs_mcv = try self.resolveInst(operands[sources[1].?]); + + const dst_mcv: MCValue = if (lhs_mcv.isRegister() and + self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv)) + lhs_mcv + else if (has_avx and lhs_mcv.isRegister()) + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } + else + try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, max_abi_size); + + if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .{ .v_pd, .shuf }, + dst_alias, + registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), + try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + Immediate.u(control), + ) else try self.asmRegisterRegisterRegisterImmediate( + .{ .v_pd, .shuf }, + dst_alias, + registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size), + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), + Immediate.u(control), + ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + .{ ._pd, .shuf }, + dst_alias, + try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)), + Immediate.u(control), + ) else try self.asmRegisterRegisterImmediate( + .{ ._pd, .shuf }, + dst_alias, + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size), + Immediate.u(control), + ); + break :result dst_mcv; + } + + blend: { + if (elem_abi_size < 2) break :blend; + if (dst_abi_size > @as(u32, if (has_avx) 32 else 16)) break :blend; + if (!self.hasFeature(.sse4_1)) break :blend; + + var control: u8 = 0b0_0_0_0_0_0_0_0; + for (mask_elems, 0..) |maybe_mask_elem, elem_index| { + const mask_elem = maybe_mask_elem orelse continue; + const mask_elem_index = + math.cast(u4, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blend; + if (mask_elem_index != elem_index) break :blend; + + const select = @as(u8, @intFromBool(mask_elem < 0)) << @truncate(elem_index); + if (elem_index & 0b1000 == 0) + control |= select + else if (control & @as(u8, 0b1) << @truncate(elem_index) != select) break :blend; + } + + if (!elem_ty.isRuntimeFloat() and self.hasFeature(.avx2)) vpblendd: { + const expanded_control = switch (elem_abi_size) { + 4 => control, + 8 => @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) | + @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) | + @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) | + @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00), + else => break :vpblendd, + }; + + const lhs_mcv = try self.resolveInst(extra.a); + const lhs_reg = if (lhs_mcv.isRegister()) + lhs_mcv.getReg().? + else + try self.copyToTmpRegister(dst_ty, lhs_mcv); + const lhs_lock = self.register_manager.lockReg(lhs_reg); + defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); + + const rhs_mcv = try self.resolveInst(extra.b); + const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); + if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .{ .vp_d, .blend }, + registerAlias(dst_reg, dst_abi_size), + registerAlias(lhs_reg, dst_abi_size), + try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + Immediate.u(expanded_control), + ) else try self.asmRegisterRegisterRegisterImmediate( + .{ .vp_d, .blend }, + registerAlias(dst_reg, dst_abi_size), + registerAlias(lhs_reg, dst_abi_size), + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), + Immediate.u(expanded_control), + ); + break :result .{ .register = dst_reg }; + } + + if (!elem_ty.isRuntimeFloat() or elem_abi_size == 2) pblendw: { + const expanded_control = switch (elem_abi_size) { + 2 => control, + 4 => if (dst_abi_size <= 16 or + @as(u4, @intCast(control >> 4)) == @as(u4, @truncate(control >> 0))) + @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) | + @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) | + @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) | + @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00) + else + break :pblendw, + 8 => if (dst_abi_size <= 16 or + @as(u2, @intCast(control >> 2)) == @as(u2, @truncate(control >> 0))) + @as(u8, if (control & 0b01 != 0) 0b0000_1111 else 0b0000_0000) | + @as(u8, if (control & 0b10 != 0) 0b1111_0000 else 0b0000_0000) + else + break :pblendw, + 16 => break :pblendw, + else => unreachable, + }; + + const lhs_mcv = try self.resolveInst(extra.a); + const rhs_mcv = try self.resolveInst(extra.b); + + const dst_mcv: MCValue = if (lhs_mcv.isRegister() and + self.reuseOperand(inst, extra.a, 0, lhs_mcv)) + lhs_mcv + else if (has_avx and lhs_mcv.isRegister()) + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); + const dst_reg = dst_mcv.getReg().?; + + if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .{ .vp_w, .blend }, + registerAlias(dst_reg, dst_abi_size), + registerAlias(if (lhs_mcv.isRegister()) + lhs_mcv.getReg().? + else + dst_reg, dst_abi_size), + try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + Immediate.u(expanded_control), + ) else try self.asmRegisterRegisterRegisterImmediate( + .{ .vp_w, .blend }, + registerAlias(dst_reg, dst_abi_size), + registerAlias(if (lhs_mcv.isRegister()) + lhs_mcv.getReg().? + else + dst_reg, dst_abi_size), + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), + Immediate.u(expanded_control), + ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + .{ .p_w, .blend }, + registerAlias(dst_reg, dst_abi_size), + try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + Immediate.u(expanded_control), + ) else try self.asmRegisterRegisterImmediate( + .{ .p_w, .blend }, + registerAlias(dst_reg, dst_abi_size), + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), + Immediate.u(expanded_control), + ); + break :result .{ .register = dst_reg }; + } + + const expanded_control = switch (elem_abi_size) { + 4, 8 => control, + 16 => @as(u4, if (control & 0b01 != 0) 0b00_11 else 0b00_00) | + @as(u4, if (control & 0b10 != 0) 0b11_00 else 0b00_00), + else => unreachable, + }; + + const lhs_mcv = try self.resolveInst(extra.a); + const rhs_mcv = try self.resolveInst(extra.b); + + const dst_mcv: MCValue = if (lhs_mcv.isRegister() and + self.reuseOperand(inst, extra.a, 0, lhs_mcv)) + lhs_mcv + else if (has_avx and lhs_mcv.isRegister()) + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); + const dst_reg = dst_mcv.getReg().?; + + if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + switch (elem_abi_size) { + 4 => .{ .v_ps, .blend }, + 8, 16 => .{ .v_pd, .blend }, + else => unreachable, + }, + registerAlias(dst_reg, dst_abi_size), + registerAlias(if (lhs_mcv.isRegister()) + lhs_mcv.getReg().? + else + dst_reg, dst_abi_size), + try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + Immediate.u(expanded_control), + ) else try self.asmRegisterRegisterRegisterImmediate( + switch (elem_abi_size) { + 4 => .{ .v_ps, .blend }, + 8, 16 => .{ .v_pd, .blend }, + else => unreachable, + }, + registerAlias(dst_reg, dst_abi_size), + registerAlias(if (lhs_mcv.isRegister()) + lhs_mcv.getReg().? + else + dst_reg, dst_abi_size), + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), + Immediate.u(expanded_control), + ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + switch (elem_abi_size) { + 4 => .{ ._ps, .blend }, + 8, 16 => .{ ._pd, .blend }, + else => unreachable, + }, + registerAlias(dst_reg, dst_abi_size), + try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + Immediate.u(expanded_control), + ) else try self.asmRegisterRegisterImmediate( + switch (elem_abi_size) { + 4 => .{ ._ps, .blend }, + 8, 16 => .{ ._pd, .blend }, + else => unreachable, + }, + registerAlias(dst_reg, dst_abi_size), + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), + Immediate.u(expanded_control), + ); + break :result .{ .register = dst_reg }; + } + + blendv: { + if (dst_abi_size > @as(u32, if (if (elem_abi_size >= 4) + has_avx + else + self.hasFeature(.avx2)) 32 else 16)) break :blendv; + + const select_mask_elem_ty = try mod.intType(.unsigned, elem_abi_size * 8); + const select_mask_ty = try mod.vectorType(.{ + .len = @intCast(mask_elems.len), + .child = select_mask_elem_ty.toIntern(), + }); + var select_mask_elems: [32]InternPool.Index = undefined; + for ( + select_mask_elems[0..mask_elems.len], + mask_elems, + 0.., + ) |*select_mask_elem, maybe_mask_elem, elem_index| { + const mask_elem = maybe_mask_elem orelse continue; + const mask_elem_index = + math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blendv; + if (mask_elem_index != elem_index) break :blendv; + + select_mask_elem.* = (if (mask_elem < 0) + try select_mask_elem_ty.maxIntScalar(mod, select_mask_elem_ty) + else + try select_mask_elem_ty.minIntScalar(mod, select_mask_elem_ty)).toIntern(); + } + const select_mask_mcv = try self.genTypedValue(.{ + .ty = select_mask_ty, + .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{ + .ty = select_mask_ty.toIntern(), + .storage = .{ .elems = select_mask_elems[0..mask_elems.len] }, + } })), + }); + + if (self.hasFeature(.sse4_1)) { + const mir_tag: Mir.Inst.FixedTag = .{ + if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or + (dst_abi_size > 16 and !self.hasFeature(.avx2))) switch (elem_abi_size) { + 4 => if (has_avx) .v_ps else ._ps, + 8 => if (has_avx) .v_pd else ._pd, + else => unreachable, + } else if (has_avx) .vp_b else .p_b, + .blendv, + }; + + const select_mask_reg = if (!has_avx) reg: { + try self.register_manager.getKnownReg(.xmm0, null); + try self.genSetReg(.xmm0, select_mask_elem_ty, select_mask_mcv, .{}); + break :reg .xmm0; + } else try self.copyToTmpRegister(select_mask_ty, select_mask_mcv); + const select_mask_alias = registerAlias(select_mask_reg, dst_abi_size); + const select_mask_lock = self.register_manager.lockRegAssumeUnused(select_mask_reg); + defer self.register_manager.unlockReg(select_mask_lock); + + const lhs_mcv = try self.resolveInst(extra.a); + const rhs_mcv = try self.resolveInst(extra.b); + + const dst_mcv: MCValue = if (lhs_mcv.isRegister() and + self.reuseOperand(inst, extra.a, 0, lhs_mcv)) + lhs_mcv + else if (has_avx and lhs_mcv.isRegister()) + .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, dst_abi_size); + + if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryRegister( + mir_tag, + dst_alias, + if (lhs_mcv.isRegister()) + registerAlias(lhs_mcv.getReg().?, dst_abi_size) + else + dst_alias, + try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + select_mask_alias, + ) else try self.asmRegisterRegisterRegisterRegister( + mir_tag, + dst_alias, + if (lhs_mcv.isRegister()) + registerAlias(lhs_mcv.getReg().?, dst_abi_size) + else + dst_alias, + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), + select_mask_alias, + ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryRegister( + mir_tag, + dst_alias, + try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + select_mask_alias, + ) else try self.asmRegisterRegisterRegister( + mir_tag, + dst_alias, + registerAlias(if (rhs_mcv.isRegister()) + rhs_mcv.getReg().? + else + try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size), + select_mask_alias, + ); + break :result dst_mcv; + } + + const lhs_mcv = try self.resolveInst(extra.a); + const rhs_mcv = try self.resolveInst(extra.b); + + const dst_mcv: MCValue = if (rhs_mcv.isRegister() and + self.reuseOperand(inst, extra.b, 1, rhs_mcv)) + rhs_mcv + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, rhs_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_alias = registerAlias(dst_reg, dst_abi_size); + + const mask_reg = try self.copyToTmpRegister(select_mask_ty, select_mask_mcv); + const mask_alias = registerAlias(mask_reg, dst_abi_size); + const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg); + defer self.register_manager.unlockReg(mask_lock); + + const mir_fixes: Mir.Inst.Fixes = if (elem_ty.isRuntimeFloat()) + switch (elem_ty.floatBits(self.target.*)) { + 16, 80, 128 => .p_, + 32 => ._ps, + 64 => ._pd, + else => unreachable, + } + else + .p_; + try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias); + if (lhs_mcv.isMemory()) try self.asmRegisterMemory( + .{ mir_fixes, .andn }, + mask_alias, + try lhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)), + ) else try self.asmRegisterRegister( + .{ mir_fixes, .andn }, + mask_alias, + if (lhs_mcv.isRegister()) + lhs_mcv.getReg().? + else + try self.copyToTmpRegister(dst_ty, lhs_mcv), + ); + try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_alias, mask_alias); + break :result dst_mcv; + } + + pshufb: { + if (max_abi_size > 16) break :pshufb; + if (!self.hasFeature(.ssse3)) break :pshufb; + + const temp_regs = + try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.sse); + const temp_locks = self.register_manager.lockRegsAssumeUnused(2, temp_regs); + defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); + + const lhs_temp_alias = registerAlias(temp_regs[0], max_abi_size); + try self.genSetReg(temp_regs[0], lhs_ty, .{ .air_ref = extra.a }, .{}); + + const rhs_temp_alias = registerAlias(temp_regs[1], max_abi_size); + try self.genSetReg(temp_regs[1], rhs_ty, .{ .air_ref = extra.b }, .{}); + + var lhs_mask_elems: [16]InternPool.Index = undefined; + for (lhs_mask_elems[0..max_abi_size], 0..) |*lhs_mask_elem, byte_index| { + const elem_index = byte_index / elem_abi_size; + lhs_mask_elem.* = try mod.intern(.{ .int = .{ + .ty = .u8_type, + .storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: { + const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000; + if (mask_elem < 0) break :elem 0b1_00_00000; + const mask_elem_index: u31 = @intCast(mask_elem); + const byte_off: u32 = @intCast(byte_index % elem_abi_size); + break :elem @intCast(mask_elem_index * elem_abi_size + byte_off); + } }, + } }); + } + const lhs_mask_ty = try mod.vectorType(.{ .len = max_abi_size, .child = .u8_type }); + const lhs_mask_mcv = try self.genTypedValue(.{ + .ty = lhs_mask_ty, + .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{ + .ty = lhs_mask_ty.toIntern(), + .storage = .{ .elems = lhs_mask_elems[0..max_abi_size] }, + } })), + }); + const lhs_mask_mem: Memory = .{ + .base = .{ .reg = try self.copyToTmpRegister(Type.usize, lhs_mask_mcv.address()) }, + .mod = .{ .rm = .{ .size = Memory.Size.fromSize(@max(max_abi_size, 16)) } }, + }; + if (has_avx) try self.asmRegisterRegisterMemory( + .{ .vp_b, .shuf }, + lhs_temp_alias, + lhs_temp_alias, + lhs_mask_mem, + ) else try self.asmRegisterMemory( + .{ .p_b, .shuf }, + lhs_temp_alias, + lhs_mask_mem, + ); + + var rhs_mask_elems: [16]InternPool.Index = undefined; + for (rhs_mask_elems[0..max_abi_size], 0..) |*rhs_mask_elem, byte_index| { + const elem_index = byte_index / elem_abi_size; + rhs_mask_elem.* = try mod.intern(.{ .int = .{ + .ty = .u8_type, + .storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: { + const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000; + if (mask_elem >= 0) break :elem 0b1_00_00000; + const mask_elem_index: u31 = @intCast(~mask_elem); + const byte_off: u32 = @intCast(byte_index % elem_abi_size); + break :elem @intCast(mask_elem_index * elem_abi_size + byte_off); + } }, + } }); + } + const rhs_mask_ty = try mod.vectorType(.{ .len = max_abi_size, .child = .u8_type }); + const rhs_mask_mcv = try self.genTypedValue(.{ + .ty = rhs_mask_ty, + .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{ + .ty = rhs_mask_ty.toIntern(), + .storage = .{ .elems = rhs_mask_elems[0..max_abi_size] }, + } })), + }); + const rhs_mask_mem: Memory = .{ + .base = .{ .reg = try self.copyToTmpRegister(Type.usize, rhs_mask_mcv.address()) }, + .mod = .{ .rm = .{ .size = Memory.Size.fromSize(@max(max_abi_size, 16)) } }, + }; + if (has_avx) try self.asmRegisterRegisterMemory( + .{ .vp_b, .shuf }, + rhs_temp_alias, + rhs_temp_alias, + rhs_mask_mem, + ) else try self.asmRegisterMemory( + .{ .p_b, .shuf }, + rhs_temp_alias, + rhs_mask_mem, + ); + + if (has_avx) try self.asmRegisterRegisterRegister( + .{ switch (elem_ty.zigTypeTag(mod)) { + else => break :result null, + .Int => .vp_, + .Float => switch (elem_ty.floatBits(self.target.*)) { + 32 => .v_ps, + 64 => .v_pd, + 16, 80, 128 => break :result null, + else => unreachable, + }, + }, .@"or" }, + lhs_temp_alias, + lhs_temp_alias, + rhs_temp_alias, + ) else try self.asmRegisterRegister( + .{ switch (elem_ty.zigTypeTag(mod)) { + else => break :result null, + .Int => .p_, + .Float => switch (elem_ty.floatBits(self.target.*)) { + 32 => ._ps, + 64 => ._pd, + 16, 80, 128 => break :result null, + else => unreachable, + }, + }, .@"or" }, + lhs_temp_alias, + rhs_temp_alias, + ); + break :result .{ .register = temp_regs[0] }; + } + + break :result null; + }) orelse return self.fail("TODO implement airShuffle from {} and {} to {} with {}", .{ + lhs_ty.fmt(mod), rhs_ty.fmt(mod), dst_ty.fmt(mod), + Value.fromInterned(extra.mask).fmtValue( + Type.fromInterned(mod.intern_pool.typeOf(extra.mask)), + mod, + ), + }); + return self.finishAir(inst, result, .{ extra.a, extra.b, .none }); } fn airReduce(self: *Self, inst: Air.Inst.Index) !void { @@ -16751,7 +18083,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { }, .Array, .Vector => { const elem_ty = result_ty.childType(mod); - if (result_ty.isVector(mod) and elem_ty.bitSize(mod) == 1) { + if (result_ty.isVector(mod) and elem_ty.toIntern() == .bool_type) { const result_size: u32 = @intCast(result_ty.abiSize(mod)); const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); try self.asmRegisterRegister( @@ -17801,7 +19133,7 @@ fn splitType(self: *Self, ty: Type) ![2]Type { else => unreachable, }, .float => Type.f32, - .float_combine => try mod.vectorType(.{ .len = 2, .child = .f32_type }), + .float_combine => try mod.arrayType(.{ .len = 2, .child = .f32_type }), .sse => Type.f64, else => break, }; diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 8b91a20a4f..c4bf71e233 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -324,16 +324,19 @@ pub const Mnemonic = enum { // SSE3 movddup, movshdup, movsldup, // SSSE3 - pabsb, pabsd, pabsw, palignr, + pabsb, pabsd, pabsw, palignr, pshufb, // SSE4.1 blendpd, blendps, blendvpd, blendvps, extractps, insertps, packusdw, + pblendvb, pblendw, pcmpeqq, pextrb, pextrd, pextrq, pinsrb, pinsrd, pinsrq, pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw, + pmovsxbd, pmovsxbq, pmovsxbw, pmovsxdq, pmovsxwd, pmovsxwq, + pmovzxbd, pmovzxbq, pmovzxbw, pmovzxdq, pmovzxwd, pmovzxwq, pmulld, roundpd, roundps, roundsd, roundss, // SSE4.2 @@ -377,7 +380,8 @@ pub const Mnemonic = enum { vpabsb, vpabsd, vpabsw, vpackssdw, vpacksswb, vpackusdw, vpackuswb, vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw, - vpalignr, vpand, vpandn, vpclmulqdq, + vpalignr, vpand, vpandn, + vpblendvb, vpblendw, vpclmulqdq, vpcmpeqb, vpcmpeqd, vpcmpeqq, vpcmpeqw, vpcmpgtb, vpcmpgtd, vpcmpgtq, vpcmpgtw, vpextrb, vpextrd, vpextrq, vpextrw, @@ -385,9 +389,11 @@ pub const Mnemonic = enum { vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw, vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw, vpmovmskb, + vpmovsxbd, vpmovsxbq, vpmovsxbw, vpmovsxdq, vpmovsxwd, vpmovsxwq, + vpmovzxbd, vpmovzxbq, vpmovzxbw, vpmovzxdq, vpmovzxwd, vpmovzxwq, vpmulhw, vpmulld, vpmullw, vpor, - vpshufd, vpshufhw, vpshuflw, + vpshufb, vpshufd, vpshufhw, vpshuflw, vpslld, vpslldq, vpsllq, vpsllw, vpsrad, vpsraq, vpsraw, vpsrld, vpsrldq, vpsrlq, vpsrlw, @@ -409,7 +415,8 @@ pub const Mnemonic = enum { vfmadd132sd, vfmadd213sd, vfmadd231sd, vfmadd132ss, vfmadd213ss, vfmadd231ss, // AVX2 - vpbroadcastb, vpbroadcastd, vpbroadcasti128, vpbroadcastq, vpbroadcastw, + vbroadcasti128, vpbroadcastb, vpbroadcastd, vpbroadcastq, vpbroadcastw, + vextracti128, vinserti128, vpblendd, // zig fmt: on }; diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 4e9c37e5aa..13b97b551a 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -477,8 +477,9 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .rri_s, .rri_u => inst.data.rri.fixes, .ri_s, .ri_u => inst.data.ri.fixes, .ri64, .rm, .rmi_s, .mr => inst.data.rx.fixes, - .mrr, .rrm => inst.data.rrx.fixes, + .mrr, .rrm, .rmr => inst.data.rrx.fixes, .rmi, .mri => inst.data.rix.fixes, + .rrmr => inst.data.rrrx.fixes, .rrmi => inst.data.rrix.fixes, .mi_u, .mi_s => inst.data.x.fixes, .m => inst.data.x.fixes, @@ -565,6 +566,11 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rx.r1 }, .{ .mem = lower.mem(inst.data.rx.payload) }, }, + .rmr => &.{ + .{ .reg = inst.data.rrx.r1 }, + .{ .mem = lower.mem(inst.data.rrx.payload) }, + .{ .reg = inst.data.rrx.r2 }, + }, .rmi => &.{ .{ .reg = inst.data.rix.r1 }, .{ .mem = lower.mem(inst.data.rix.payload) }, @@ -597,6 +603,12 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rrx.r2 }, .{ .mem = lower.mem(inst.data.rrx.payload) }, }, + .rrmr => &.{ + .{ .reg = inst.data.rrrx.r1 }, + .{ .reg = inst.data.rrrx.r2 }, + .{ .mem = lower.mem(inst.data.rrrx.payload) }, + .{ .reg = inst.data.rrrx.r3 }, + }, .rrmi => &.{ .{ .reg = inst.data.rrix.r1 }, .{ .reg = inst.data.rrix.r2 }, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index dea9bb50cb..6cccb34b3e 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -230,6 +230,8 @@ pub const Inst = struct { v_d, /// VEX-Encoded ___ QuadWord v_q, + /// VEX-Encoded ___ Integer Data + v_i128, /// VEX-Encoded Packed ___ vp_, /// VEX-Encoded Packed ___ Byte @@ -242,8 +244,6 @@ pub const Inst = struct { vp_q, /// VEX-Encoded Packed ___ Double Quadword vp_dq, - /// VEX-Encoded Packed ___ Integer Data - vp_i128, /// VEX-Encoded ___ Scalar Single-Precision Values v_ss, /// VEX-Encoded ___ Packed Single-Precision Values @@ -654,10 +654,19 @@ pub const Inst = struct { /// Variable blend scalar double-precision floating-point values blendv, /// Extract packed floating-point values + /// Extract packed integer values extract, /// Insert scalar single-precision floating-point value /// Insert packed floating-point values insert, + /// Packed move with sign extend + movsxb, + movsxd, + movsxw, + /// Packed move with zero extend + movzxb, + movzxd, + movzxw, /// Round packed single-precision floating-point values /// Round scalar single-precision floating-point value /// Round packed double-precision floating-point values @@ -688,6 +697,7 @@ pub const Inst = struct { sha256rnds2, /// Load with broadcast floating-point data + /// Load integer and broadcast broadcast, /// Convert 16-bit floating-point values to single-precision floating-point values @@ -762,8 +772,11 @@ pub const Inst = struct { /// Uses `imm` payload. rel, /// Register, memory operands. - /// Uses `rx` payload. + /// Uses `rx` payload with extra data of type `Memory`. rm, + /// Register, memory, register operands. + /// Uses `rrx` payload with extra data of type `Memory`. + rmr, /// Register, memory, immediate (word) operands. /// Uses `rix` payload with extra data of type `Memory`. rmi, @@ -776,6 +789,9 @@ pub const Inst = struct { /// Register, register, memory. /// Uses `rrix` payload with extra data of type `Memory`. rrm, + /// Register, register, memory, register. + /// Uses `rrrx` payload with extra data of type `Memory`. + rrmr, /// Register, register, memory, immediate (byte) operands. /// Uses `rrix` payload with extra data of type `Memory`. rrmi, @@ -953,6 +969,14 @@ pub const Inst = struct { r2: Register, payload: u32, }, + /// Register, register, register, followed by Custom payload found in extra. + rrrx: struct { + fixes: Fixes = ._, + r1: Register, + r2: Register, + r3: Register, + payload: u32, + }, /// Register, byte immediate, followed by Custom payload found in extra. rix: struct { fixes: Fixes = ._, diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 545e6b23ce..d4a7dcafe7 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1185,6 +1185,8 @@ pub const table = [_]Entry{ .{ .palignr, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0f }, 0, .none, .ssse3 }, + .{ .pshufb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .none, .ssse3 }, + // SSE4.1 .{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 }, @@ -1202,6 +1204,11 @@ pub const table = [_]Entry{ .{ .packusdw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x2b }, 0, .none, .sse4_1 }, + .{ .pblendvb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 }, + .{ .pblendvb, .rm, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 }, + + .{ .pblendw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0e }, 0, .none, .sse4_1 }, + .{ .pcmpeqq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x29 }, 0, .none, .sse4_1 }, .{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 }, @@ -1228,6 +1235,20 @@ pub const table = [_]Entry{ .{ .pminud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .none, .sse4_1 }, + .{ .pmovsxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x20 }, 0, .none, .sse4_1 }, + .{ .pmovsxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x21 }, 0, .none, .sse4_1 }, + .{ .pmovsxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x22 }, 0, .none, .sse4_1 }, + .{ .pmovsxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x23 }, 0, .none, .sse4_1 }, + .{ .pmovsxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x24 }, 0, .none, .sse4_1 }, + .{ .pmovsxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x25 }, 0, .none, .sse4_1 }, + + .{ .pmovzxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x30 }, 0, .none, .sse4_1 }, + .{ .pmovzxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x31 }, 0, .none, .sse4_1 }, + .{ .pmovzxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x32 }, 0, .none, .sse4_1 }, + .{ .pmovzxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x33 }, 0, .none, .sse4_1 }, + .{ .pmovzxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x34 }, 0, .none, .sse4_1 }, + .{ .pmovzxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x35 }, 0, .none, .sse4_1 }, + .{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 }, .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 }, @@ -1528,6 +1549,10 @@ pub const table = [_]Entry{ .{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx }, + .{ .vpblendvb, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4c }, 0, .vex_128_w0, .avx }, + + .{ .vpblendw, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0e }, 0, .vex_128_wig, .avx }, + .{ .vpclmulqdq, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x44 }, 0, .vex_128_wig, .@"pclmul avx" }, .{ .vpcmpeqb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_128_wig, .avx }, @@ -1576,6 +1601,20 @@ pub const table = [_]Entry{ .{ .vpmovmskb, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_128_wig, .avx }, .{ .vpmovmskb, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x20 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x21 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x22 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x23 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x24 }, 0, .vex_128_wig, .avx }, + .{ .vpmovsxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x25 }, 0, .vex_128_wig, .avx }, + + .{ .vpmovzxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x30 }, 0, .vex_128_wig, .avx }, + .{ .vpmovzxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x31 }, 0, .vex_128_wig, .avx }, + .{ .vpmovzxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x32 }, 0, .vex_128_wig, .avx }, + .{ .vpmovzxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x33 }, 0, .vex_128_wig, .avx }, + .{ .vpmovzxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x34 }, 0, .vex_128_wig, .avx }, + .{ .vpmovzxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x35 }, 0, .vex_128_wig, .avx }, + .{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx }, .{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx }, @@ -1584,6 +1623,8 @@ pub const table = [_]Entry{ .{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx }, + .{ .vpshufb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_128_wig, .avx }, + .{ .vpshufd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x70 }, 0, .vex_128_wig, .avx }, .{ .vpshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .vex_128_wig, .avx }, @@ -1728,6 +1769,10 @@ pub const table = [_]Entry{ .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, + .{ .vextracti128, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x39 }, 0, .vex_256_w0, .avx2 }, + + .{ .vinserti128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x38 }, 0, .vex_256_w0, .avx2 }, + .{ .vpabsb, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_256_wig, .avx2 }, .{ .vpabsd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_256_wig, .avx2 }, .{ .vpabsw, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_256_wig, .avx2 }, @@ -1756,6 +1801,13 @@ pub const table = [_]Entry{ .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 }, + .{ .vpblendd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x02 }, 0, .vex_128_w0, .avx2 }, + .{ .vpblendd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x02 }, 0, .vex_256_w0, .avx2 }, + + .{ .vpblendvb, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4c }, 0, .vex_256_w0, .avx2 }, + + .{ .vpblendw, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0e }, 0, .vex_256_wig, .avx2 }, + .{ .vpbroadcastb, .rm, &.{ .xmm, .xmm_m8 }, &.{ 0x66, 0x0f, 0x38, 0x78 }, 0, .vex_128_w0, .avx2 }, .{ .vpbroadcastb, .rm, &.{ .ymm, .xmm_m8 }, &.{ 0x66, 0x0f, 0x38, 0x78 }, 0, .vex_256_w0, .avx2 }, .{ .vpbroadcastw, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x79 }, 0, .vex_128_w0, .avx2 }, @@ -1764,7 +1816,7 @@ pub const table = [_]Entry{ .{ .vpbroadcastd, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x58 }, 0, .vex_256_w0, .avx2 }, .{ .vpbroadcastq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_128_w0, .avx2 }, .{ .vpbroadcastq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_256_w0, .avx2 }, - .{ .vpbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 }, + .{ .vbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 }, .{ .vpcmpeqb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_256_wig, .avx2 }, .{ .vpcmpeqw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x75 }, 0, .vex_256_wig, .avx2 }, @@ -1799,6 +1851,20 @@ pub const table = [_]Entry{ .{ .vpmovmskb, .rm, &.{ .r32, .ymm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_256_wig, .avx2 }, .{ .vpmovmskb, .rm, &.{ .r64, .ymm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxbw, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x20 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxbd, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x21 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxbq, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x22 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxwd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x23 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxwq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x24 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovsxdq, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x25 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpmovzxbw, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x30 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovzxbd, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x31 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovzxbq, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x32 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovzxwd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x33 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovzxwq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x34 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmovzxdq, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x35 }, 0, .vex_256_wig, .avx2 }, + .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx2 }, .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx2 }, @@ -1807,6 +1873,7 @@ pub const table = [_]Entry{ .{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 }, + .{ .vpshufb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_256_wig, .avx2 }, .{ .vpshufd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 }, .{ .vpshufhw, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 }, diff --git a/src/codegen.zig b/src/codegen.zig index 118bab4be9..7bcba80065 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -405,7 +405,7 @@ pub fn generateSymbol( .vector_type => |vector_type| { const abi_size = math.cast(usize, typed_value.ty.abiSize(mod)) orelse return error.Overflow; - if (Type.fromInterned(vector_type.child).bitSize(mod) == 1) { + if (vector_type.child == .bool_type) { const bytes = try code.addManyAsSlice(abi_size); @memset(bytes, 0xaa); var index: usize = 0; @@ -443,37 +443,34 @@ pub fn generateSymbol( }, }) byte.* |= mask else byte.* &= ~mask; } - } else switch (aggregate.storage) { - .bytes => |bytes| try code.appendSlice(bytes), - .elems, .repeated_elem => { - var index: u64 = 0; - while (index < vector_type.len) : (index += 1) { - switch (try generateSymbol(bin_file, src_loc, .{ - .ty = Type.fromInterned(vector_type.child), - .val = Value.fromInterned(switch (aggregate.storage) { - .bytes => unreachable, - .elems => |elems| elems[ - math.cast(usize, index) orelse return error.Overflow - ], - .repeated_elem => |elem| elem, - }), - }, code, debug_output, reloc_info)) { - .ok => {}, - .fail => |em| return .{ .fail = em }, + } else { + switch (aggregate.storage) { + .bytes => |bytes| try code.appendSlice(bytes), + .elems, .repeated_elem => { + var index: u64 = 0; + while (index < vector_type.len) : (index += 1) { + switch (try generateSymbol(bin_file, src_loc, .{ + .ty = Type.fromInterned(vector_type.child), + .val = Value.fromInterned(switch (aggregate.storage) { + .bytes => unreachable, + .elems => |elems| elems[ + math.cast(usize, index) orelse return error.Overflow + ], + .repeated_elem => |elem| elem, + }), + }, code, debug_output, reloc_info)) { + .ok => {}, + .fail => |em| return .{ .fail = em }, + } } - } - }, - } + }, + } - const padding = abi_size - (math.cast(usize, math.divCeil( - u64, - Type.fromInterned(vector_type.child).bitSize(mod) * vector_type.len, - 8, - ) catch |err| switch (err) { - error.DivisionByZero => unreachable, - else => |e| return e, - }) orelse return error.Overflow); - if (padding > 0) try code.appendNTimes(0, padding); + const padding = abi_size - + (math.cast(usize, Type.fromInterned(vector_type.child).abiSize(mod) * vector_type.len) orelse + return error.Overflow); + if (padding > 0) try code.appendNTimes(0, padding); + } }, .anon_struct_type => |tuple| { const struct_begin = code.items.len; diff --git a/src/codegen/c.zig b/src/codegen/c.zig index cf372ff5ef..0977acf7fe 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -4140,9 +4140,7 @@ fn airCmpOp( if (need_cast) try writer.writeAll("(void*)"); try f.writeCValue(writer, lhs, .Other); try v.elem(f, writer); - try writer.writeByte(' '); try writer.writeAll(compareOperatorC(operator)); - try writer.writeByte(' '); if (need_cast) try writer.writeAll("(void*)"); try f.writeCValue(writer, rhs, .Other); try v.elem(f, writer); @@ -4181,41 +4179,28 @@ fn airEquality( const writer = f.object.writer(); const inst_ty = f.typeOfIndex(inst); const local = try f.allocLocal(inst, inst_ty); + const a = try Assignment.start(f, writer, inst_ty); try f.writeCValue(writer, local, .Other); - try writer.writeAll(" = "); + try a.assign(f, writer); if (operand_ty.zigTypeTag(mod) == .Optional and !operand_ty.optionalReprIsPayload(mod)) { - // (A && B) || (C && (A == B)) - // A = lhs.is_null ; B = rhs.is_null ; C = rhs.payload == lhs.payload - - switch (operator) { - .eq => {}, - .neq => try writer.writeByte('!'), - else => unreachable, - } - try writer.writeAll("(("); - try f.writeCValue(writer, lhs, .Other); - try writer.writeAll(".is_null && "); - try f.writeCValue(writer, rhs, .Other); - try writer.writeAll(".is_null) || ("); - try f.writeCValue(writer, lhs, .Other); - try writer.writeAll(".payload == "); - try f.writeCValue(writer, rhs, .Other); - try writer.writeAll(".payload && "); + try f.writeCValueMember(writer, lhs, .{ .identifier = "is_null" }); + try writer.writeAll(" || "); + try f.writeCValueMember(writer, rhs, .{ .identifier = "is_null" }); + try writer.writeAll(" ? "); + try f.writeCValueMember(writer, lhs, .{ .identifier = "is_null" }); + try writer.writeAll(compareOperatorC(operator)); + try f.writeCValueMember(writer, rhs, .{ .identifier = "is_null" }); + try writer.writeAll(" : "); + try f.writeCValueMember(writer, lhs, .{ .identifier = "payload" }); + try writer.writeAll(compareOperatorC(operator)); + try f.writeCValueMember(writer, rhs, .{ .identifier = "payload" }); + } else { try f.writeCValue(writer, lhs, .Other); - try writer.writeAll(".is_null == "); + try writer.writeAll(compareOperatorC(operator)); try f.writeCValue(writer, rhs, .Other); - try writer.writeAll(".is_null));\n"); - - return local; } - - try f.writeCValue(writer, lhs, .Other); - try writer.writeByte(' '); - try writer.writeAll(compareOperatorC(operator)); - try writer.writeByte(' '); - try f.writeCValue(writer, rhs, .Other); - try writer.writeAll(";\n"); + try a.end(f, writer); return local; } @@ -6109,41 +6094,48 @@ fn airFloatCast(f: *Function, inst: Air.Inst.Index) !CValue { const ty_op = f.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const inst_ty = f.typeOfIndex(inst); + const inst_scalar_ty = inst_ty.scalarType(mod); const operand = try f.resolveInst(ty_op.operand); try reap(f, inst, &.{ty_op.operand}); const operand_ty = f.typeOf(ty_op.operand); + const scalar_ty = operand_ty.scalarType(mod); const target = f.object.dg.module.getTarget(); - const operation = if (inst_ty.isRuntimeFloat() and operand_ty.isRuntimeFloat()) - if (inst_ty.floatBits(target) < operand_ty.floatBits(target)) "trunc" else "extend" - else if (inst_ty.isInt(mod) and operand_ty.isRuntimeFloat()) - if (inst_ty.isSignedInt(mod)) "fix" else "fixuns" - else if (inst_ty.isRuntimeFloat() and operand_ty.isInt(mod)) - if (operand_ty.isSignedInt(mod)) "float" else "floatun" + const operation = if (inst_scalar_ty.isRuntimeFloat() and scalar_ty.isRuntimeFloat()) + if (inst_scalar_ty.floatBits(target) < scalar_ty.floatBits(target)) "trunc" else "extend" + else if (inst_scalar_ty.isInt(mod) and scalar_ty.isRuntimeFloat()) + if (inst_scalar_ty.isSignedInt(mod)) "fix" else "fixuns" + else if (inst_scalar_ty.isRuntimeFloat() and scalar_ty.isInt(mod)) + if (scalar_ty.isSignedInt(mod)) "float" else "floatun" else unreachable; const writer = f.object.writer(); const local = try f.allocLocal(inst, inst_ty); + const v = try Vectorize.start(f, inst, writer, operand_ty); + const a = try Assignment.start(f, writer, scalar_ty); try f.writeCValue(writer, local, .Other); - - try writer.writeAll(" = "); - if (inst_ty.isInt(mod) and operand_ty.isRuntimeFloat()) { + try v.elem(f, writer); + try a.assign(f, writer); + if (inst_scalar_ty.isInt(mod) and scalar_ty.isRuntimeFloat()) { try writer.writeAll("zig_wrap_"); - try f.object.dg.renderTypeForBuiltinFnName(writer, inst_ty); + try f.object.dg.renderTypeForBuiltinFnName(writer, inst_scalar_ty); try writer.writeByte('('); } try writer.writeAll("zig_"); try writer.writeAll(operation); - try writer.writeAll(compilerRtAbbrev(operand_ty, mod)); - try writer.writeAll(compilerRtAbbrev(inst_ty, mod)); + try writer.writeAll(compilerRtAbbrev(scalar_ty, mod)); + try writer.writeAll(compilerRtAbbrev(inst_scalar_ty, mod)); try writer.writeByte('('); try f.writeCValue(writer, operand, .FunctionArgument); + try v.elem(f, writer); try writer.writeByte(')'); - if (inst_ty.isInt(mod) and operand_ty.isRuntimeFloat()) { - try f.object.dg.renderBuiltinInfo(writer, inst_ty, .bits); + if (inst_scalar_ty.isInt(mod) and scalar_ty.isRuntimeFloat()) { + try f.object.dg.renderBuiltinInfo(writer, inst_scalar_ty, .bits); try writer.writeByte(')'); } - try writer.writeAll(";\n"); + try a.end(f, writer); + try v.end(f, inst, writer); + return local; } @@ -6315,7 +6307,7 @@ fn airCmpBuiltinCall( try v.elem(f, writer); try f.object.dg.renderBuiltinInfo(writer, scalar_ty, info); try writer.writeByte(')'); - if (!ref_ret) try writer.print(" {s} {}", .{ + if (!ref_ret) try writer.print("{s}{}", .{ compareOperatorC(operator), try f.fmtIntLiteral(Type.i32, try mod.intValue(Type.i32, 0)), }); @@ -7661,12 +7653,12 @@ fn compareOperatorAbbrev(operator: std.math.CompareOperator) []const u8 { fn compareOperatorC(operator: std.math.CompareOperator) []const u8 { return switch (operator) { - .lt => "<", - .lte => "<=", - .eq => "==", - .gte => ">=", - .gt => ">", - .neq => "!=", + .lt => " < ", + .lte => " <= ", + .eq => " == ", + .gte => " >= ", + .gt => " > ", + .neq => " != ", }; } diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 1e7687b96c..bd0c337169 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -1165,7 +1165,7 @@ pub const Object = struct { const fwd_ref = self.debug_unresolved_namespace_scopes.values()[i]; const namespace = self.module.namespacePtr(namespace_index); - const debug_type = try self.lowerDebugType(namespace.ty); + const debug_type = try self.lowerDebugType(namespace.getType(self.module)); self.builder.debugForwardReferenceSetType(fwd_ref, debug_type); } @@ -1803,7 +1803,7 @@ pub const Object = struct { return updateExportedGlobal(self, mod, global_index, exports); } else { const fqn = try self.builder.string( - mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)), + mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)), ); try global_index.rename(fqn, &self.builder); global_index.setLinkage(.internal, &self.builder); @@ -2841,15 +2841,13 @@ pub const Object = struct { const builtin_str = try mod.intern_pool.getOrPutString(mod.gpa, "builtin"); const std_namespace = mod.namespacePtr(mod.declPtr(std_file.root_decl.unwrap().?).src_namespace); - const builtin_decl = std_namespace.decls - .getKeyAdapted(builtin_str, Module.DeclAdapter{ .mod = mod }).?; + const builtin_decl = std_namespace.decls.getKeyAdapted(builtin_str, Module.DeclAdapter{ .zcu = mod }).?; const stack_trace_str = try mod.intern_pool.getOrPutString(mod.gpa, "StackTrace"); // buffer is only used for int_type, `builtin` is a struct. const builtin_ty = mod.declPtr(builtin_decl).val.toType(); const builtin_namespace = builtin_ty.getNamespace(mod).?; - const stack_trace_decl_index = builtin_namespace.decls - .getKeyAdapted(stack_trace_str, Module.DeclAdapter{ .mod = mod }).?; + const stack_trace_decl_index = builtin_namespace.decls.getKeyAdapted(stack_trace_str, Module.DeclAdapter{ .zcu = mod }).?; const stack_trace_decl = mod.declPtr(stack_trace_decl_index); // Sema should have ensured that StackTrace was analyzed. @@ -2892,7 +2890,7 @@ pub const Object = struct { try o.builder.string(ip.stringToSlice(if (is_extern) decl.name else - try decl.getFullyQualifiedName(zcu))), + try decl.fullyQualifiedName(zcu))), toLlvmAddressSpace(decl.@"addrspace", target), ); gop.value_ptr.* = function_index.ptrConst(&o.builder).global; @@ -3106,7 +3104,7 @@ pub const Object = struct { const variable_index = try o.builder.addVariable( try o.builder.string(mod.intern_pool.stringToSlice( - if (is_extern) decl.name else try decl.getFullyQualifiedName(mod), + if (is_extern) decl.name else try decl.fullyQualifiedName(mod), )), try o.lowerType(decl.ty), toLlvmGlobalAddressSpace(decl.@"addrspace", mod.getTarget()), @@ -3331,7 +3329,7 @@ pub const Object = struct { } const name = try o.builder.string(ip.stringToSlice( - try mod.declPtr(struct_type.decl.unwrap().?).getFullyQualifiedName(mod), + try mod.declPtr(struct_type.decl.unwrap().?).fullyQualifiedName(mod), )); var llvm_field_types = std.ArrayListUnmanaged(Builder.Type){}; @@ -3487,7 +3485,7 @@ pub const Object = struct { } const name = try o.builder.string(ip.stringToSlice( - try mod.declPtr(union_obj.decl).getFullyQualifiedName(mod), + try mod.declPtr(union_obj.decl).fullyQualifiedName(mod), )); const aligned_field_ty = Type.fromInterned(union_obj.field_types.get(ip)[layout.most_aligned_field]); @@ -4605,7 +4603,7 @@ pub const Object = struct { const usize_ty = try o.lowerType(Type.usize); const ret_ty = try o.lowerType(Type.slice_const_u8_sentinel_0); - const fqn = try zcu.declPtr(enum_type.decl).getFullyQualifiedName(zcu); + const fqn = try zcu.declPtr(enum_type.decl).fullyQualifiedName(zcu); const target = zcu.root_mod.resolved_target.result; const function_index = try o.builder.addFunction( try o.builder.fnType(ret_ty, &.{try o.lowerType(Type.fromInterned(enum_type.tag_ty))}, .normal), @@ -6623,7 +6621,7 @@ pub const FuncGen = struct { .base_line = self.base_line, }); - const fqn = try decl.getFullyQualifiedName(zcu); + const fqn = try decl.fullyQualifiedName(zcu); const is_internal_linkage = !zcu.decl_exports.contains(decl_index); const fn_ty = try zcu.funcType(.{ @@ -8653,8 +8651,6 @@ pub const FuncGen = struct { const operand_ty = self.typeOf(ty_op.operand); const dest_ty = self.typeOfIndex(inst); const target = mod.getTarget(); - const dest_bits = dest_ty.floatBits(target); - const src_bits = operand_ty.floatBits(target); if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) { return self.wip.cast(.fpext, operand, try o.lowerType(dest_ty), ""); @@ -8662,11 +8658,19 @@ pub const FuncGen = struct { const operand_llvm_ty = try o.lowerType(operand_ty); const dest_llvm_ty = try o.lowerType(dest_ty); + const dest_bits = dest_ty.scalarType(mod).floatBits(target); + const src_bits = operand_ty.scalarType(mod).floatBits(target); const fn_name = try o.builder.fmt("__extend{s}f{s}f2", .{ compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits), }); const libc_fn = try self.getLibcFunction(fn_name, &.{operand_llvm_ty}, dest_llvm_ty); + if (dest_ty.isVector(mod)) return self.buildElementwiseCall( + libc_fn, + &.{operand}, + try o.builder.poisonValue(dest_llvm_ty), + dest_ty.vectorLen(mod), + ); return self.wip.call( .normal, .ccc, @@ -9648,7 +9652,7 @@ pub const FuncGen = struct { if (gop.found_existing) return gop.value_ptr.*; errdefer assert(o.named_enum_map.remove(enum_type.decl)); - const fqn = try zcu.declPtr(enum_type.decl).getFullyQualifiedName(zcu); + const fqn = try zcu.declPtr(enum_type.decl).fullyQualifiedName(zcu); const target = zcu.root_mod.resolved_target.result; const function_index = try o.builder.addFunction( try o.builder.fnType(.i1, &.{try o.lowerType(Type.fromInterned(enum_type.tag_ty))}, .normal), diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig index cbc6ae1eb3..dc3b646ab7 100644 --- a/src/codegen/spirv.zig +++ b/src/codegen/spirv.zig @@ -2019,7 +2019,7 @@ const DeclGen = struct { // Append the actual code into the functions section. try self.spv.addFunction(spv_decl_index, self.func); - const fqn = ip.stringToSlice(try decl.getFullyQualifiedName(self.module)); + const fqn = ip.stringToSlice(try decl.fullyQualifiedName(self.module)); try self.spv.debugName(decl_id, fqn); // Temporarily generate a test kernel declaration if this is a test function. @@ -2055,7 +2055,7 @@ const DeclGen = struct { .id_result = decl_id, .storage_class = actual_storage_class, }); - const fqn = ip.stringToSlice(try decl.getFullyQualifiedName(self.module)); + const fqn = ip.stringToSlice(try decl.fullyQualifiedName(self.module)); try self.spv.debugName(decl_id, fqn); if (opt_init_val) |init_val| { diff --git a/src/link/Coff.zig b/src/link/Coff.zig index 9be6d18df1..5bf83b52ea 100644 --- a/src/link/Coff.zig +++ b/src/link/Coff.zig @@ -1176,7 +1176,7 @@ pub fn lowerUnnamedConst(self: *Coff, tv: TypedValue, decl_index: InternPool.Dec gop.value_ptr.* = .{}; } const unnamed_consts = gop.value_ptr; - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); const index = unnamed_consts.items.len; const sym_name = try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index }); defer gpa.free(sym_name); @@ -1427,7 +1427,7 @@ fn updateDeclCode(self: *Coff, decl_index: InternPool.DeclIndex, code: []u8, com const mod = self.base.comp.module.?; const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); log.debug("updateDeclCode {s}{*}", .{ decl_name, decl }); const required_alignment: u32 = @intCast(decl.getAlignment(mod).toByteUnits(0)); diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index f5f754e03b..a9a6942299 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -1082,7 +1082,7 @@ pub fn initDeclState(self: *Dwarf, mod: *Module, decl_index: InternPool.DeclInde defer tracy.end(); const decl = mod.declPtr(decl_index); - const decl_linkage_name = try decl.getFullyQualifiedName(mod); + const decl_linkage_name = try decl.fullyQualifiedName(mod); log.debug("initDeclState {}{*}", .{ decl_linkage_name.fmt(&mod.intern_pool), decl }); diff --git a/src/link/Elf/ZigObject.zig b/src/link/Elf/ZigObject.zig index ea32f93584..b6413f7d45 100644 --- a/src/link/Elf/ZigObject.zig +++ b/src/link/Elf/ZigObject.zig @@ -903,7 +903,7 @@ fn updateDeclCode( const gpa = elf_file.base.comp.gpa; const mod = elf_file.base.comp.module.?; const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); log.debug("updateDeclCode {s}{*}", .{ decl_name, decl }); @@ -1001,7 +1001,7 @@ fn updateTlv( const gpa = elf_file.base.comp.gpa; const mod = elf_file.base.comp.module.?; const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); log.debug("updateTlv {s} ({*})", .{ decl_name, decl }); @@ -1300,7 +1300,7 @@ pub fn lowerUnnamedConst( } const unnamed_consts = gop.value_ptr; const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); const index = unnamed_consts.items.len; const name = try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index }); defer gpa.free(name); @@ -1482,7 +1482,7 @@ pub fn updateDeclLineNumber( defer tracy.end(); const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); log.debug("updateDeclLineNumber {s}{*}", .{ decl_name, decl }); diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 69dcf7aba1..a50e4bd6a1 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -2170,8 +2170,7 @@ fn initSegments(self: *MachO) !void { for (slice.items(.header)) |header| { const segname = header.segName(); if (self.getSegmentByName(segname) == null) { - const flags: u32 = if (mem.startsWith(u8, segname, "__DATA_CONST")) macho.SG_READ_ONLY else 0; - _ = try self.addSegment(segname, .{ .prot = getSegmentProt(segname), .flags = flags }); + _ = try self.addSegment(segname, .{ .prot = getSegmentProt(segname) }); } } @@ -2247,6 +2246,12 @@ fn initSegments(self: *MachO) !void { segment.nsects += 1; seg_id.* = segment_id; } + + // Set __DATA_CONST as READ_ONLY + if (self.getSegmentByName("__DATA_CONST")) |seg_id| { + const seg = &self.segments.items[seg_id]; + seg.flags |= macho.SG_READ_ONLY; + } } fn allocateSections(self: *MachO) !void { @@ -2474,6 +2479,9 @@ fn initDyldInfoSections(self: *MachO) !void { nbinds += ctx.bind_relocs; nweak_binds += ctx.weak_bind_relocs; } + if (self.getInternalObject()) |int| { + nrebases += int.num_rebase_relocs; + } try self.rebase.entries.ensureUnusedCapacity(gpa, nrebases); try self.bind.entries.ensureUnusedCapacity(gpa, nbinds); try self.weak_bind.entries.ensureUnusedCapacity(gpa, nweak_binds); @@ -3727,7 +3735,6 @@ pub fn addSegment(self: *MachO, name: []const u8, opts: struct { fileoff: u64 = 0, filesize: u64 = 0, prot: macho.vm_prot_t = macho.PROT.NONE, - flags: u32 = 0, }) error{OutOfMemory}!u8 { const gpa = self.base.comp.gpa; const index = @as(u8, @intCast(self.segments.items.len)); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index 2c3f360ec2..d40712046e 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -119,12 +119,9 @@ pub fn getThunk(self: Atom, macho_file: *MachO) *Thunk { pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { const segname, const sectname, const flags = blk: { - const segname = sect.segName(); - const sectname = sect.sectName(); - if (sect.isCode()) break :blk .{ "__TEXT", - sectname, + "__text", macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, }; @@ -135,32 +132,36 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { => break :blk .{ "__TEXT", "__const", macho.S_REGULAR }, macho.S_CSTRING_LITERALS => { - if (mem.startsWith(u8, sectname, "__objc")) break :blk .{ - segname, sectname, macho.S_REGULAR, + if (mem.startsWith(u8, sect.sectName(), "__objc")) break :blk .{ + sect.segName(), sect.sectName(), macho.S_REGULAR, }; break :blk .{ "__TEXT", "__cstring", macho.S_CSTRING_LITERALS }; }, macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, - macho.S_LITERAL_POINTERS, - => break :blk .{ "__DATA_CONST", sectname, sect.flags }, + => break :blk .{ "__DATA_CONST", sect.sectName(), sect.flags }, + macho.S_LITERAL_POINTERS, macho.S_ZEROFILL, macho.S_GB_ZEROFILL, macho.S_THREAD_LOCAL_VARIABLES, macho.S_THREAD_LOCAL_VARIABLE_POINTERS, macho.S_THREAD_LOCAL_REGULAR, macho.S_THREAD_LOCAL_ZEROFILL, - => break :blk .{ "__DATA", sectname, sect.flags }, + => break :blk .{ sect.segName(), sect.sectName(), sect.flags }, - // TODO: do we need this check here? - macho.S_COALESCED => break :blk .{ segname, sectname, macho.S_REGULAR }, + macho.S_COALESCED => break :blk .{ + sect.segName(), + sect.sectName(), + macho.S_REGULAR, + }, macho.S_REGULAR => { + const segname = sect.segName(); + const sectname = sect.sectName(); if (mem.eql(u8, segname, "__DATA")) { - if (mem.eql(u8, sectname, "__const") or - mem.eql(u8, sectname, "__cfstring") or + if (mem.eql(u8, sectname, "__cfstring") or mem.eql(u8, sectname, "__objc_classlist") or mem.eql(u8, sectname, "__objc_imageinfo")) break :blk .{ "__DATA_CONST", @@ -171,7 +172,7 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { break :blk .{ segname, sectname, sect.flags }; }, - else => break :blk .{ segname, sectname, sect.flags }, + else => break :blk .{ sect.segName(), sect.sectName(), sect.flags }, } }; const osec = macho_file.getSectionByName(segname, sectname) orelse try macho_file.addSection( diff --git a/src/link/MachO/InternalObject.zig b/src/link/MachO/InternalObject.zig index 30b750260c..db8a8fd939 100644 --- a/src/link/MachO/InternalObject.zig +++ b/src/link/MachO/InternalObject.zig @@ -8,6 +8,7 @@ strtab: std.ArrayListUnmanaged(u8) = .{}, objc_methnames: std.ArrayListUnmanaged(u8) = .{}, objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64), +num_rebase_relocs: u32 = 0, output_symtab_ctx: MachO.SymtabCtx = .{}, pub fn deinit(self: *InternalObject, allocator: Allocator) void { @@ -115,6 +116,7 @@ fn addObjcSelrefsSection( }, }); atom.relocs = .{ .pos = 0, .len = 1 }; + self.num_rebase_relocs += 1; return atom_index; } diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 3a28e824d5..fadf80b2c0 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -792,7 +792,7 @@ fn updateDeclCode( const gpa = macho_file.base.comp.gpa; const mod = macho_file.base.comp.module.?; const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); log.debug("updateDeclCode {s}{*}", .{ decl_name, decl }); @@ -876,7 +876,7 @@ fn updateTlv( ) !void { const mod = macho_file.base.comp.module.?; const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); log.debug("updateTlv {s} ({*})", .{ decl_name, decl }); @@ -1079,7 +1079,7 @@ pub fn lowerUnnamedConst( } const unnamed_consts = gop.value_ptr; const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); const index = unnamed_consts.items.len; const name = try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index }); defer gpa.free(name); diff --git a/src/link/Plan9.zig b/src/link/Plan9.zig index 2e937a3904..be68465af7 100644 --- a/src/link/Plan9.zig +++ b/src/link/Plan9.zig @@ -478,7 +478,7 @@ pub fn lowerUnnamedConst(self: *Plan9, tv: TypedValue, decl_index: InternPool.De } const unnamed_consts = gop.value_ptr; - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); const index = unnamed_consts.items.len; // name is freed when the unnamed const is freed diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 1a06d0fc6e..f4bc2f8f0f 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -662,7 +662,7 @@ pub fn getOrCreateAtomForDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex) !At const symbol = atom.symbolLoc().getSymbol(wasm); const mod = wasm.base.comp.module.?; const decl = mod.declPtr(decl_index); - const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const full_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); symbol.name = try wasm.string_table.put(gpa, full_name); } return gop.value_ptr.*; @@ -1598,7 +1598,7 @@ pub fn updateDeclLineNumber(wasm: *Wasm, mod: *Module, decl_index: InternPool.De defer tracy.end(); const decl = mod.declPtr(decl_index); - const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); log.debug("updateDeclLineNumber {s}{*}", .{ decl_name, decl }); try dw.updateDeclLineNumber(mod, decl_index); @@ -1612,7 +1612,7 @@ fn finishUpdateDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex, code: []const const atom_index = wasm.decls.get(decl_index).?; const atom = wasm.getAtomPtr(atom_index); const symbol = &wasm.symbols.items[atom.sym_index]; - const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const full_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); symbol.name = try wasm.string_table.put(gpa, full_name); symbol.tag = symbol_tag; try atom.code.appendSlice(gpa, code); @@ -1678,7 +1678,7 @@ pub fn lowerUnnamedConst(wasm: *Wasm, tv: TypedValue, decl_index: InternPool.Dec const parent_atom_index = try wasm.getOrCreateAtomForDecl(decl_index); const parent_atom = wasm.getAtom(parent_atom_index); const local_index = parent_atom.locals.items.len; - const fqn = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); + const fqn = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)); const name = try std.fmt.allocPrintZ(gpa, "__unnamed_{s}_{d}", .{ fqn, local_index, }); diff --git a/src/main.zig b/src/main.zig index db739ebce7..bb8b25c60e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -596,7 +596,7 @@ const usage_build_generic = \\ --export=[value] (WebAssembly) Force a symbol to be exported \\ \\Test Options: - \\ --test-filter [text] Skip tests that do not match filter + \\ --test-filter [text] Skip tests that do not match any filter \\ --test-name-prefix [text] Add prefix to all tests \\ --test-cmd [arg] Specify test execution command one arg at a time \\ --test-cmd-bin Appends test binary path to test cmd args @@ -869,7 +869,7 @@ fn buildOutputType( var link_emit_relocs = false; var build_id: ?std.zig.BuildId = null; var runtime_args_start: ?usize = null; - var test_filter: ?[]const u8 = null; + var test_filters: std.ArrayListUnmanaged([]const u8) = .{}; var test_name_prefix: ?[]const u8 = null; var test_runner_path: ?[]const u8 = null; var override_local_cache_dir: ?[]const u8 = try EnvVar.ZIG_LOCAL_CACHE_DIR.get(arena); @@ -909,7 +909,7 @@ fn buildOutputType( var rc_source_files_owner_index: usize = 0; // null means replace with the test executable binary - var test_exec_args = std.ArrayList(?[]const u8).init(arena); + var test_exec_args: std.ArrayListUnmanaged(?[]const u8) = .{}; // These get set by CLI flags and then snapshotted when a `--mod` flag is // encountered. @@ -1278,13 +1278,13 @@ fn buildOutputType( } else if (mem.eql(u8, arg, "--libc")) { create_module.libc_paths_file = args_iter.nextOrFatal(); } else if (mem.eql(u8, arg, "--test-filter")) { - test_filter = args_iter.nextOrFatal(); + try test_filters.append(arena, args_iter.nextOrFatal()); } else if (mem.eql(u8, arg, "--test-name-prefix")) { test_name_prefix = args_iter.nextOrFatal(); } else if (mem.eql(u8, arg, "--test-runner")) { test_runner_path = args_iter.nextOrFatal(); } else if (mem.eql(u8, arg, "--test-cmd")) { - try test_exec_args.append(args_iter.nextOrFatal()); + try test_exec_args.append(arena, args_iter.nextOrFatal()); } else if (mem.eql(u8, arg, "--cache-dir")) { override_local_cache_dir = args_iter.nextOrFatal(); } else if (mem.eql(u8, arg, "--global-cache-dir")) { @@ -1334,7 +1334,7 @@ fn buildOutputType( } else if (mem.eql(u8, arg, "-fno-each-lib-rpath")) { create_module.each_lib_rpath = false; } else if (mem.eql(u8, arg, "--test-cmd-bin")) { - try test_exec_args.append(null); + try test_exec_args.append(arena, null); } else if (mem.eql(u8, arg, "--test-no-exec")) { test_no_exec = true; } else if (mem.eql(u8, arg, "-ftime-report")) { @@ -3246,7 +3246,7 @@ fn buildOutputType( .time_report = time_report, .stack_report = stack_report, .build_id = build_id, - .test_filter = test_filter, + .test_filters = test_filters.items, .test_name_prefix = test_name_prefix, .test_runner_path = test_runner_path, .disable_lld_caching = disable_lld_caching, @@ -3369,16 +3369,15 @@ fn buildOutputType( const c_code_path = try fs.path.join(arena, &[_][]const u8{ c_code_directory.path orelse ".", c_code_loc.basename, }); - try test_exec_args.append(self_exe_path); - try test_exec_args.append("run"); + try test_exec_args.appendSlice(arena, &.{ self_exe_path, "run" }); if (zig_lib_directory.path) |p| { - try test_exec_args.appendSlice(&.{ "-I", p }); + try test_exec_args.appendSlice(arena, &.{ "-I", p }); } if (create_module.resolved_options.link_libc) { - try test_exec_args.append("-lc"); + try test_exec_args.append(arena, "-lc"); } else if (target.os.tag == .windows) { - try test_exec_args.appendSlice(&.{ + try test_exec_args.appendSlice(arena, &.{ "--subsystem", "console", "-lkernel32", "-lntdll", }); @@ -3386,17 +3385,15 @@ fn buildOutputType( const first_cli_mod = create_module.modules.values()[0]; if (first_cli_mod.target_arch_os_abi) |triple| { - try test_exec_args.append("-target"); - try test_exec_args.append(triple); + try test_exec_args.appendSlice(arena, &.{ "-target", triple }); } if (first_cli_mod.target_mcpu) |mcpu| { - try test_exec_args.append(try std.fmt.allocPrint(arena, "-mcpu={s}", .{mcpu})); + try test_exec_args.append(arena, try std.fmt.allocPrint(arena, "-mcpu={s}", .{mcpu})); } if (create_module.dynamic_linker) |dl| { - try test_exec_args.append("--dynamic-linker"); - try test_exec_args.append(dl); + try test_exec_args.appendSlice(arena, &.{ "--dynamic-linker", dl }); } - try test_exec_args.append(c_code_path); + try test_exec_args.append(arena, c_code_path); } const run_or_test = switch (arg_mode) { diff --git a/src/type.zig b/src/type.zig index a6265692c2..a9d1654ba7 100644 --- a/src/type.zig +++ b/src/type.zig @@ -905,11 +905,32 @@ pub const Type = struct { return Type.fromInterned(array_type.child).abiAlignmentAdvanced(mod, strat); }, .vector_type => |vector_type| { - const bits_u64 = try bitSizeAdvanced(Type.fromInterned(vector_type.child), mod, opt_sema); - const bits: u32 = @intCast(bits_u64); - const bytes = ((bits * vector_type.len) + 7) / 8; - const alignment = std.math.ceilPowerOfTwoAssert(u32, bytes); - return .{ .scalar = Alignment.fromByteUnits(alignment) }; + if (vector_type.len == 0) return .{ .scalar = .@"1" }; + switch (mod.comp.getZigBackend()) { + else => { + const elem_bits: u32 = @intCast(try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema)); + if (elem_bits == 0) return .{ .scalar = .@"1" }; + const bytes = ((elem_bits * vector_type.len) + 7) / 8; + const alignment = std.math.ceilPowerOfTwoAssert(u32, bytes); + return .{ .scalar = Alignment.fromByteUnits(alignment) }; + }, + .stage2_x86_64 => { + if (vector_type.child == .bool_type) { + if (vector_type.len > 256 and std.Target.x86.featureSetHas(target.cpu.features, .avx512f)) return .{ .scalar = .@"64" }; + if (vector_type.len > 128 and std.Target.x86.featureSetHas(target.cpu.features, .avx2)) return .{ .scalar = .@"32" }; + if (vector_type.len > 64) return .{ .scalar = .@"16" }; + const bytes = std.math.divCeil(u32, vector_type.len, 8) catch unreachable; + const alignment = std.math.ceilPowerOfTwoAssert(u32, bytes); + return .{ .scalar = Alignment.fromByteUnits(alignment) }; + } + const elem_bytes: u32 = @intCast((try Type.fromInterned(vector_type.child).abiSizeAdvanced(mod, strat)).scalar); + if (elem_bytes == 0) return .{ .scalar = .@"1" }; + const bytes = elem_bytes * vector_type.len; + if (bytes > 32 and std.Target.x86.featureSetHas(target.cpu.features, .avx512f)) return .{ .scalar = .@"64" }; + if (bytes > 16 and std.Target.x86.featureSetHas(target.cpu.features, .avx)) return .{ .scalar = .@"32" }; + return .{ .scalar = .@"16" }; + }, + } }, .opt_type => return abiAlignmentAdvancedOptional(ty, mod, strat), @@ -1237,9 +1258,6 @@ pub const Type = struct { .storage = .{ .lazy_size = ty.toIntern() }, } }))) }, }; - const elem_bits = try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema); - const total_bits = elem_bits * vector_type.len; - const total_bytes = (total_bits + 7) / 8; const alignment = switch (try ty.abiAlignmentAdvanced(mod, strat)) { .scalar => |x| x, .val => return .{ .val = Value.fromInterned((try mod.intern(.{ .int = .{ @@ -1247,6 +1265,18 @@ pub const Type = struct { .storage = .{ .lazy_size = ty.toIntern() }, } }))) }, }; + const total_bytes = switch (mod.comp.getZigBackend()) { + else => total_bytes: { + const elem_bits = try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema); + const total_bits = elem_bits * vector_type.len; + break :total_bytes (total_bits + 7) / 8; + }, + .stage2_x86_64 => total_bytes: { + if (vector_type.child == .bool_type) break :total_bytes std.math.divCeil(u32, vector_type.len, 8) catch unreachable; + const elem_bytes: u32 = @intCast((try Type.fromInterned(vector_type.child).abiSizeAdvanced(mod, strat)).scalar); + break :total_bytes elem_bytes * vector_type.len; + }, + }; return AbiSizeAdvanced{ .scalar = alignment.forward(total_bytes) }; }, @@ -2108,7 +2138,8 @@ pub const Type = struct { /// Returns true if and only if the type is a fixed-width integer. pub fn isInt(self: Type, mod: *const Module) bool { - return self.isSignedInt(mod) or self.isUnsignedInt(mod); + return self.toIntern() != .comptime_int_type and + mod.intern_pool.isIntegerType(self.toIntern()); } /// Returns true if and only if the type is a fixed-width, signed integer. |
