aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Compilation.zig10
-rw-r--r--src/InternPool.zig11
-rw-r--r--src/Module.zig252
-rw-r--r--src/Package/Fetch.zig2
-rw-r--r--src/Sema.zig81
-rw-r--r--src/arch/wasm/CodeGen.zig2
-rw-r--r--src/arch/x86_64/CodeGen.zig1958
-rw-r--r--src/arch/x86_64/Encoding.zig15
-rw-r--r--src/arch/x86_64/Lower.zig14
-rw-r--r--src/arch/x86_64/Mir.zig30
-rw-r--r--src/arch/x86_64/encodings.zig69
-rw-r--r--src/codegen.zig57
-rw-r--r--src/codegen/c.zig98
-rw-r--r--src/codegen/llvm.zig34
-rw-r--r--src/codegen/spirv.zig4
-rw-r--r--src/link/Coff.zig4
-rw-r--r--src/link/Dwarf.zig2
-rw-r--r--src/link/Elf/ZigObject.zig8
-rw-r--r--src/link/MachO.zig13
-rw-r--r--src/link/MachO/Atom.zig29
-rw-r--r--src/link/MachO/InternalObject.zig2
-rw-r--r--src/link/MachO/ZigObject.zig6
-rw-r--r--src/link/Plan9.zig2
-rw-r--r--src/link/Wasm.zig8
-rw-r--r--src/main.zig33
-rw-r--r--src/type.zig49
26 files changed, 2147 insertions, 646 deletions
diff --git a/src/Compilation.zig b/src/Compilation.zig
index 91879094dd..5fa93fa677 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -217,7 +217,7 @@ libcxx_abi_version: libcxx.AbiVersion = libcxx.AbiVersion.default,
/// This mutex guards all `Compilation` mutable state.
mutex: std.Thread.Mutex = .{},
-test_filter: ?[]const u8,
+test_filters: []const []const u8,
test_name_prefix: ?[]const u8,
emit_asm: ?EmitLoc,
@@ -1097,7 +1097,7 @@ pub const CreateOptions = struct {
native_system_include_paths: []const []const u8 = &.{},
clang_preprocessor_mode: ClangPreprocessorMode = .no,
reference_trace: ?u32 = null,
- test_filter: ?[]const u8 = null,
+ test_filters: []const []const u8 = &.{},
test_name_prefix: ?[]const u8 = null,
test_runner_path: ?[]const u8 = null,
subsystem: ?std.Target.SubSystem = null,
@@ -1506,7 +1506,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
.formatted_panics = formatted_panics,
.time_report = options.time_report,
.stack_report = options.stack_report,
- .test_filter = options.test_filter,
+ .test_filters = options.test_filters,
.test_name_prefix = options.test_name_prefix,
.debug_compiler_runtime_libs = options.debug_compiler_runtime_libs,
.debug_compile_errors = options.debug_compile_errors,
@@ -1613,7 +1613,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
hash.add(options.config.use_lib_llvm);
hash.add(options.config.dll_export_fns);
hash.add(options.config.is_test);
- hash.addOptionalBytes(options.test_filter);
+ hash.addListOfBytes(options.test_filters);
hash.addOptionalBytes(options.test_name_prefix);
hash.add(options.skip_linker_dependencies);
hash.add(formatted_panics);
@@ -2475,7 +2475,7 @@ fn addNonIncrementalStuffToCacheManifest(
try addModuleTableToCacheHash(gpa, arena, &man.hash, mod.root_mod, mod.main_mod, .{ .files = man });
// Synchronize with other matching comments: ZigOnlyHashStuff
- man.hash.addOptionalBytes(comp.test_filter);
+ man.hash.addListOfBytes(comp.test_filters);
man.hash.addOptionalBytes(comp.test_name_prefix);
man.hash.add(comp.skip_linker_dependencies);
man.hash.add(comp.formatted_panics);
diff --git a/src/InternPool.zig b/src/InternPool.zig
index 19be12c129..46676097bf 100644
--- a/src/InternPool.zig
+++ b/src/InternPool.zig
@@ -3587,6 +3587,7 @@ pub const Alignment = enum(u6) {
@"8" = 3,
@"16" = 4,
@"32" = 5,
+ @"64" = 6,
none = std.math.maxInt(u6),
_,
@@ -7403,10 +7404,14 @@ pub fn isIntegerType(ip: *const InternPool, ty: Index) bool {
.c_ulong_type,
.c_longlong_type,
.c_ulonglong_type,
- .c_longdouble_type,
.comptime_int_type,
=> true,
- else => ip.indexToKey(ty) == .int_type,
+ else => switch (ip.items.items(.tag)[@intFromEnum(ty)]) {
+ .type_int_signed,
+ .type_int_unsigned,
+ => true,
+ else => false,
+ },
};
}
@@ -7904,7 +7909,7 @@ pub fn destroyNamespace(ip: *InternPool, gpa: Allocator, index: NamespaceIndex)
ip.namespacePtr(index).* = .{
.parent = undefined,
.file_scope = undefined,
- .ty = undefined,
+ .decl_index = undefined,
};
ip.namespaces_free_list.append(gpa, index) catch {
// In order to keep `destroyNamespace` a non-fallible function, we ignore memory
diff --git a/src/Module.zig b/src/Module.zig
index c27b8ea4be..a4cedd9077 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -411,15 +411,15 @@ pub const Decl = struct {
/// This state detects dependency loops.
in_progress,
/// The file corresponding to this Decl had a parse error or ZIR error.
- /// There will be a corresponding ErrorMsg in Module.failed_files.
+ /// There will be a corresponding ErrorMsg in Zcu.failed_files.
file_failure,
/// This Decl might be OK but it depends on another one which did not
/// successfully complete semantic analysis.
dependency_failure,
/// Semantic analysis failure.
- /// There will be a corresponding ErrorMsg in Module.failed_decls.
+ /// There will be a corresponding ErrorMsg in Zcu.failed_decls.
sema_failure,
- /// There will be a corresponding ErrorMsg in Module.failed_decls.
+ /// There will be a corresponding ErrorMsg in Zcu.failed_decls.
codegen_failure,
/// Sematic analysis and constant value codegen of this Decl has
/// succeeded. However, the Decl may be outdated due to an in-progress
@@ -494,77 +494,45 @@ pub const Decl = struct {
return LazySrcLoc.nodeOffset(decl.nodeIndexToRelative(node_index));
}
- pub fn srcLoc(decl: Decl, mod: *Module) SrcLoc {
- return decl.nodeOffsetSrcLoc(0, mod);
+ pub fn srcLoc(decl: Decl, zcu: *Zcu) SrcLoc {
+ return decl.nodeOffsetSrcLoc(0, zcu);
}
- pub fn nodeOffsetSrcLoc(decl: Decl, node_offset: i32, mod: *Module) SrcLoc {
+ pub fn nodeOffsetSrcLoc(decl: Decl, node_offset: i32, zcu: *Zcu) SrcLoc {
return .{
- .file_scope = decl.getFileScope(mod),
+ .file_scope = decl.getFileScope(zcu),
.parent_decl_node = decl.src_node,
.lazy = LazySrcLoc.nodeOffset(node_offset),
};
}
- pub fn srcToken(decl: Decl, mod: *Module) Ast.TokenIndex {
- const tree = &decl.getFileScope(mod).tree;
+ pub fn srcToken(decl: Decl, zcu: *Zcu) Ast.TokenIndex {
+ const tree = &decl.getFileScope(zcu).tree;
return tree.firstToken(decl.src_node);
}
- pub fn srcByteOffset(decl: Decl, mod: *Module) u32 {
- const tree = &decl.getFileScope(mod).tree;
+ pub fn srcByteOffset(decl: Decl, zcu: *Zcu) u32 {
+ const tree = &decl.getFileScope(zcu).tree;
return tree.tokens.items(.start)[decl.srcToken()];
}
- pub fn renderFullyQualifiedName(decl: Decl, mod: *Module, writer: anytype) !void {
+ pub fn renderFullyQualifiedName(decl: Decl, zcu: *Zcu, writer: anytype) !void {
if (decl.name_fully_qualified) {
- try writer.print("{}", .{decl.name.fmt(&mod.intern_pool)});
+ try writer.print("{}", .{decl.name.fmt(&zcu.intern_pool)});
} else {
- try mod.namespacePtr(decl.src_namespace).renderFullyQualifiedName(mod, decl.name, writer);
+ try zcu.namespacePtr(decl.src_namespace).renderFullyQualifiedName(zcu, decl.name, writer);
}
}
- pub fn renderFullyQualifiedDebugName(decl: Decl, mod: *Module, writer: anytype) !void {
- return mod.namespacePtr(decl.src_namespace).renderFullyQualifiedDebugName(mod, decl.name, writer);
+ pub fn renderFullyQualifiedDebugName(decl: Decl, zcu: *Zcu, writer: anytype) !void {
+ return zcu.namespacePtr(decl.src_namespace).renderFullyQualifiedDebugName(zcu, decl.name, writer);
}
- pub fn getFullyQualifiedName(decl: Decl, mod: *Module) !InternPool.NullTerminatedString {
- if (decl.name_fully_qualified) return decl.name;
-
- const ip = &mod.intern_pool;
- const count = count: {
- var count: usize = ip.stringToSlice(decl.name).len + 1;
- var ns: Namespace.Index = decl.src_namespace;
- while (true) {
- const namespace = mod.namespacePtr(ns);
- const ns_decl = mod.declPtr(namespace.getDeclIndex(mod));
- count += ip.stringToSlice(ns_decl.name).len + 1;
- ns = namespace.parent.unwrap() orelse {
- count += namespace.file_scope.sub_file_path.len;
- break :count count;
- };
- }
- };
-
- const gpa = mod.gpa;
- const start = ip.string_bytes.items.len;
- // Protects reads of interned strings from being reallocated during the call to
- // renderFullyQualifiedName.
- try ip.string_bytes.ensureUnusedCapacity(gpa, count);
- decl.renderFullyQualifiedName(mod, ip.string_bytes.writer(gpa)) catch unreachable;
-
- // Sanitize the name for nvptx which is more restrictive.
- // TODO This should be handled by the backend, not the frontend. Have a
- // look at how the C backend does it for inspiration.
- const cpu_arch = mod.root_mod.resolved_target.result.cpu.arch;
- if (cpu_arch.isNvptx()) {
- for (ip.string_bytes.items[start..]) |*byte| switch (byte.*) {
- '{', '}', '*', '[', ']', '(', ')', ',', ' ', '\'' => byte.* = '_',
- else => {},
- };
- }
-
- return ip.getOrPutTrailingString(gpa, ip.string_bytes.items.len - start);
+ pub fn fullyQualifiedName(decl: Decl, zcu: *Zcu) !InternPool.NullTerminatedString {
+ return if (decl.name_fully_qualified)
+ decl.name
+ else
+ zcu.namespacePtr(decl.src_namespace).fullyQualifiedName(zcu, decl.name);
}
pub fn typedValue(decl: Decl) error{AnalysisFail}!TypedValue {
@@ -572,38 +540,38 @@ pub const Decl = struct {
return TypedValue{ .ty = decl.ty, .val = decl.val };
}
- pub fn internValue(decl: *Decl, mod: *Module) Allocator.Error!InternPool.Index {
+ pub fn internValue(decl: *Decl, zcu: *Zcu) Allocator.Error!InternPool.Index {
assert(decl.has_tv);
- const ip_index = try decl.val.intern(decl.ty, mod);
+ const ip_index = try decl.val.intern(decl.ty, zcu);
decl.val = Value.fromInterned(ip_index);
return ip_index;
}
- pub fn isFunction(decl: Decl, mod: *const Module) !bool {
+ pub fn isFunction(decl: Decl, zcu: *const Zcu) !bool {
const tv = try decl.typedValue();
- return tv.ty.zigTypeTag(mod) == .Fn;
+ return tv.ty.zigTypeTag(zcu) == .Fn;
}
/// If the Decl owns its value and it is a struct, return it,
/// otherwise null.
- pub fn getOwnedStruct(decl: Decl, mod: *Module) ?InternPool.Key.StructType {
+ pub fn getOwnedStruct(decl: Decl, zcu: *Zcu) ?InternPool.Key.StructType {
if (!decl.owns_tv) return null;
if (decl.val.ip_index == .none) return null;
- return mod.typeToStruct(decl.val.toType());
+ return zcu.typeToStruct(decl.val.toType());
}
/// If the Decl owns its value and it is a union, return it,
/// otherwise null.
- pub fn getOwnedUnion(decl: Decl, mod: *Module) ?InternPool.UnionType {
+ pub fn getOwnedUnion(decl: Decl, zcu: *Zcu) ?InternPool.UnionType {
if (!decl.owns_tv) return null;
if (decl.val.ip_index == .none) return null;
- return mod.typeToUnion(decl.val.toType());
+ return zcu.typeToUnion(decl.val.toType());
}
- pub fn getOwnedFunction(decl: Decl, mod: *Module) ?InternPool.Key.Func {
+ pub fn getOwnedFunction(decl: Decl, zcu: *Zcu) ?InternPool.Key.Func {
const i = decl.getOwnedFunctionIndex();
if (i == .none) return null;
- return switch (mod.intern_pool.indexToKey(i)) {
+ return switch (zcu.intern_pool.indexToKey(i)) {
.func => |func| func,
else => null,
};
@@ -616,24 +584,24 @@ pub const Decl = struct {
/// If the Decl owns its value and it is an extern function, returns it,
/// otherwise null.
- pub fn getOwnedExternFunc(decl: Decl, mod: *Module) ?InternPool.Key.ExternFunc {
- return if (decl.owns_tv) decl.val.getExternFunc(mod) else null;
+ pub fn getOwnedExternFunc(decl: Decl, zcu: *Zcu) ?InternPool.Key.ExternFunc {
+ return if (decl.owns_tv) decl.val.getExternFunc(zcu) else null;
}
/// If the Decl owns its value and it is a variable, returns it,
/// otherwise null.
- pub fn getOwnedVariable(decl: Decl, mod: *Module) ?InternPool.Key.Variable {
- return if (decl.owns_tv) decl.val.getVariable(mod) else null;
+ pub fn getOwnedVariable(decl: Decl, zcu: *Zcu) ?InternPool.Key.Variable {
+ return if (decl.owns_tv) decl.val.getVariable(zcu) else null;
}
/// Gets the namespace that this Decl creates by being a struct, union,
/// enum, or opaque.
- pub fn getInnerNamespaceIndex(decl: Decl, mod: *Module) Namespace.OptionalIndex {
+ pub fn getInnerNamespaceIndex(decl: Decl, zcu: *Zcu) Namespace.OptionalIndex {
if (!decl.has_tv) return .none;
return switch (decl.val.ip_index) {
.empty_struct_type => .none,
.none => .none,
- else => switch (mod.intern_pool.indexToKey(decl.val.toIntern())) {
+ else => switch (zcu.intern_pool.indexToKey(decl.val.toIntern())) {
.opaque_type => |opaque_type| opaque_type.namespace.toOptional(),
.struct_type => |struct_type| struct_type.namespace,
.union_type => |union_type| union_type.namespace.toOptional(),
@@ -644,19 +612,19 @@ pub const Decl = struct {
}
/// Like `getInnerNamespaceIndex`, but only returns it if the Decl is the owner.
- pub fn getOwnedInnerNamespaceIndex(decl: Decl, mod: *Module) Namespace.OptionalIndex {
+ pub fn getOwnedInnerNamespaceIndex(decl: Decl, zcu: *Zcu) Namespace.OptionalIndex {
if (!decl.owns_tv) return .none;
- return decl.getInnerNamespaceIndex(mod);
+ return decl.getInnerNamespaceIndex(zcu);
}
/// Same as `getOwnedInnerNamespaceIndex` but additionally obtains the pointer.
- pub fn getOwnedInnerNamespace(decl: Decl, mod: *Module) ?*Namespace {
- return mod.namespacePtrUnwrap(decl.getOwnedInnerNamespaceIndex(mod));
+ pub fn getOwnedInnerNamespace(decl: Decl, zcu: *Zcu) ?*Namespace {
+ return zcu.namespacePtrUnwrap(decl.getOwnedInnerNamespaceIndex(zcu));
}
/// Same as `getInnerNamespaceIndex` but additionally obtains the pointer.
- pub fn getInnerNamespace(decl: Decl, mod: *Module) ?*Namespace {
- return mod.namespacePtrUnwrap(decl.getInnerNamespaceIndex(mod));
+ pub fn getInnerNamespace(decl: Decl, zcu: *Zcu) ?*Namespace {
+ return zcu.namespacePtrUnwrap(decl.getInnerNamespaceIndex(zcu));
}
pub fn dump(decl: *Decl) void {
@@ -674,27 +642,27 @@ pub const Decl = struct {
std.debug.print("\n", .{});
}
- pub fn getFileScope(decl: Decl, mod: *Module) *File {
- return mod.namespacePtr(decl.src_namespace).file_scope;
+ pub fn getFileScope(decl: Decl, zcu: *Zcu) *File {
+ return zcu.namespacePtr(decl.src_namespace).file_scope;
}
- pub fn getExternDecl(decl: Decl, mod: *Module) OptionalIndex {
+ pub fn getExternDecl(decl: Decl, zcu: *Zcu) OptionalIndex {
assert(decl.has_tv);
- return switch (mod.intern_pool.indexToKey(decl.val.toIntern())) {
+ return switch (zcu.intern_pool.indexToKey(decl.val.toIntern())) {
.variable => |variable| if (variable.is_extern) variable.decl.toOptional() else .none,
.extern_func => |extern_func| extern_func.decl.toOptional(),
else => .none,
};
}
- pub fn isExtern(decl: Decl, mod: *Module) bool {
- return decl.getExternDecl(mod) != .none;
+ pub fn isExtern(decl: Decl, zcu: *Zcu) bool {
+ return decl.getExternDecl(zcu) != .none;
}
- pub fn getAlignment(decl: Decl, mod: *Module) Alignment {
+ pub fn getAlignment(decl: Decl, zcu: *Zcu) Alignment {
assert(decl.has_tv);
if (decl.alignment != .none) return decl.alignment;
- return decl.ty.abiAlignment(mod);
+ return decl.ty.abiAlignment(zcu);
}
};
@@ -704,7 +672,7 @@ pub const EmitH = struct {
};
pub const DeclAdapter = struct {
- mod: *Module,
+ zcu: *Zcu,
pub fn hash(self: @This(), s: InternPool.NullTerminatedString) u32 {
_ = self;
@@ -713,8 +681,7 @@ pub const DeclAdapter = struct {
pub fn eql(self: @This(), a: InternPool.NullTerminatedString, b_decl_index: Decl.Index, b_index: usize) bool {
_ = b_index;
- const b_decl = self.mod.declPtr(b_decl_index);
- return a == b_decl.name;
+ return a == self.zcu.declPtr(b_decl_index).name;
}
};
@@ -723,7 +690,7 @@ pub const Namespace = struct {
parent: OptionalIndex,
file_scope: *File,
/// Will be a struct, enum, union, or opaque.
- ty: Type,
+ decl_index: Decl.Index,
/// Direct children of the namespace.
/// Declaration order is preserved via entry order.
/// These are only declarations named directly by the AST; anonymous
@@ -739,7 +706,7 @@ pub const Namespace = struct {
const OptionalIndex = InternPool.OptionalNamespaceIndex;
const DeclContext = struct {
- module: *Module,
+ zcu: *Zcu,
pub fn hash(ctx: @This(), decl_index: Decl.Index) u32 {
const decl = ctx.module.declPtr(decl_index);
@@ -757,39 +724,87 @@ pub const Namespace = struct {
// This renders e.g. "std.fs.Dir.OpenOptions"
pub fn renderFullyQualifiedName(
ns: Namespace,
- mod: *Module,
+ zcu: *Zcu,
name: InternPool.NullTerminatedString,
writer: anytype,
) @TypeOf(writer).Error!void {
if (ns.parent.unwrap()) |parent| {
- const decl = mod.declPtr(ns.getDeclIndex(mod));
- try mod.namespacePtr(parent).renderFullyQualifiedName(mod, decl.name, writer);
+ try zcu.namespacePtr(parent).renderFullyQualifiedName(
+ zcu,
+ zcu.declPtr(ns.decl_index).name,
+ writer,
+ );
} else {
try ns.file_scope.renderFullyQualifiedName(writer);
}
- if (name != .empty) try writer.print(".{}", .{name.fmt(&mod.intern_pool)});
+ if (name != .empty) try writer.print(".{}", .{name.fmt(&zcu.intern_pool)});
}
/// This renders e.g. "std/fs.zig:Dir.OpenOptions"
pub fn renderFullyQualifiedDebugName(
ns: Namespace,
- mod: *Module,
+ zcu: *Zcu,
name: InternPool.NullTerminatedString,
writer: anytype,
) @TypeOf(writer).Error!void {
- const separator_char: u8 = if (ns.parent.unwrap()) |parent| sep: {
- const decl = mod.declPtr(ns.getDeclIndex(mod));
- try mod.namespacePtr(parent).renderFullyQualifiedDebugName(mod, decl.name, writer);
+ const sep: u8 = if (ns.parent.unwrap()) |parent| sep: {
+ try zcu.namespacePtr(parent).renderFullyQualifiedDebugName(
+ zcu,
+ zcu.declPtr(ns.decl_index).name,
+ writer,
+ );
break :sep '.';
} else sep: {
try ns.file_scope.renderFullyQualifiedDebugName(writer);
break :sep ':';
};
- if (name != .empty) try writer.print("{c}{}", .{ separator_char, name.fmt(&mod.intern_pool) });
+ if (name != .empty) try writer.print("{c}{}", .{ sep, name.fmt(&zcu.intern_pool) });
}
- pub fn getDeclIndex(ns: Namespace, mod: *Module) Decl.Index {
- return ns.ty.getOwnerDecl(mod);
+ pub fn fullyQualifiedName(
+ ns: Namespace,
+ zcu: *Zcu,
+ name: InternPool.NullTerminatedString,
+ ) !InternPool.NullTerminatedString {
+ const ip = &zcu.intern_pool;
+ const count = count: {
+ var count: usize = ip.stringToSlice(name).len + 1;
+ var cur_ns = &ns;
+ while (true) {
+ const decl = zcu.declPtr(cur_ns.decl_index);
+ count += ip.stringToSlice(decl.name).len + 1;
+ cur_ns = zcu.namespacePtr(cur_ns.parent.unwrap() orelse {
+ count += ns.file_scope.sub_file_path.len;
+ break :count count;
+ });
+ }
+ };
+
+ const gpa = zcu.gpa;
+ const start = ip.string_bytes.items.len;
+ // Protects reads of interned strings from being reallocated during the call to
+ // renderFullyQualifiedName.
+ try ip.string_bytes.ensureUnusedCapacity(gpa, count);
+ ns.renderFullyQualifiedName(zcu, name, ip.string_bytes.writer(gpa)) catch unreachable;
+
+ // Sanitize the name for nvptx which is more restrictive.
+ // TODO This should be handled by the backend, not the frontend. Have a
+ // look at how the C backend does it for inspiration.
+ const cpu_arch = zcu.root_mod.resolved_target.result.cpu.arch;
+ if (cpu_arch.isNvptx()) {
+ for (ip.string_bytes.items[start..]) |*byte| switch (byte.*) {
+ '{', '}', '*', '[', ']', '(', ')', ',', ' ', '\'' => byte.* = '_',
+ else => {},
+ };
+ }
+
+ return ip.getOrPutTrailingString(gpa, ip.string_bytes.items.len - start);
+ }
+
+ pub fn getType(ns: Namespace, zcu: *Zcu) Type {
+ const decl = zcu.declPtr(ns.decl_index);
+ assert(decl.has_tv);
+ return decl.val.toType();
}
};
@@ -2559,9 +2574,8 @@ pub fn namespacePtrUnwrap(mod: *Module, index: Namespace.OptionalIndex) ?*Namesp
pub fn declIsRoot(mod: *Module, decl_index: Decl.Index) bool {
const decl = mod.declPtr(decl_index);
const namespace = mod.namespacePtr(decl.src_namespace);
- if (namespace.parent != .none)
- return false;
- return decl_index == namespace.getDeclIndex(mod);
+ if (namespace.parent != .none) return false;
+ return decl_index == namespace.decl_index;
}
fn freeExportList(gpa: Allocator, export_list: *ArrayListUnmanaged(*Export)) void {
@@ -3592,7 +3606,7 @@ pub fn ensureFuncBodyAnalyzed(zcu: *Zcu, func_index: InternPool.Index) SemaError
defer liveness.deinit(gpa);
if (dump_air) {
- const fqn = try decl.getFullyQualifiedName(zcu);
+ const fqn = try decl.fullyQualifiedName(zcu);
std.debug.print("# Begin Function AIR: {}:\n", .{fqn.fmt(ip)});
@import("print_air.zig").dump(zcu, air, liveness);
std.debug.print("# End Function AIR: {}\n\n", .{fqn.fmt(ip)});
@@ -3738,7 +3752,7 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void {
// InternPool index.
const new_namespace_index = try mod.createNamespace(.{
.parent = .none,
- .ty = undefined,
+ .decl_index = undefined,
.file_scope = file,
});
const new_namespace = mod.namespacePtr(new_namespace_index);
@@ -3749,6 +3763,7 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void {
errdefer @panic("TODO error handling");
file.root_decl = new_decl_index.toOptional();
+ new_namespace.decl_index = new_decl_index;
new_decl.name = try file.fullyQualifiedName(mod);
new_decl.name_fully_qualified = true;
@@ -3808,7 +3823,6 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void {
_ = try decl.internValue(mod);
}
- new_namespace.ty = Type.fromInterned(struct_ty);
new_decl.val = Value.fromInterned(struct_ty);
new_decl.has_tv = true;
new_decl.owns_tv = true;
@@ -3881,7 +3895,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !SemaDeclResult {
const std_decl = mod.declPtr(std_file.root_decl.unwrap().?);
const std_namespace = std_decl.getInnerNamespace(mod).?;
const builtin_str = try ip.getOrPutString(gpa, "builtin");
- const builtin_decl = mod.declPtr(std_namespace.decls.getKeyAdapted(builtin_str, DeclAdapter{ .mod = mod }) orelse break :blk .none);
+ const builtin_decl = mod.declPtr(std_namespace.decls.getKeyAdapted(builtin_str, DeclAdapter{ .zcu = mod }) orelse break :blk .none);
const builtin_namespace = builtin_decl.getInnerNamespaceIndex(mod).unwrap() orelse break :blk .none;
if (decl.src_namespace != builtin_namespace) break :blk .none;
// We're in builtin.zig. This could be a builtin we need to add to a specific InternPool index.
@@ -4576,8 +4590,8 @@ fn scanDecl(iter: *ScanDeclIter, decl_inst: Zir.Inst.Index) Allocator.Error!void
const gop = try namespace.decls.getOrPutContextAdapted(
gpa,
decl_name,
- DeclAdapter{ .mod = zcu },
- Namespace.DeclContext{ .module = zcu },
+ DeclAdapter{ .zcu = zcu },
+ Namespace.DeclContext{ .zcu = zcu },
);
const comp = zcu.comp;
if (!gop.found_existing) {
@@ -4600,12 +4614,11 @@ fn scanDecl(iter: *ScanDeclIter, decl_inst: Zir.Inst.Index) Allocator.Error!void
.@"test" => a: {
if (!comp.config.is_test) break :a false;
if (decl_mod != zcu.main_mod) break :a false;
- if (is_named_test) {
- if (comp.test_filter) |test_filter| {
- if (mem.indexOf(u8, ip.stringToSlice(decl_name), test_filter) == null) {
- break :a false;
- }
- }
+ if (is_named_test and comp.test_filters.len > 0) {
+ const decl_fqn = ip.stringToSlice(try namespace.fullyQualifiedName(zcu, decl_name));
+ for (comp.test_filters) |test_filter| {
+ if (mem.indexOf(u8, decl_fqn, test_filter)) |_| break;
+ } else break :a false;
}
try zcu.test_functions.put(gpa, new_decl_index, {});
break :a true;
@@ -5622,7 +5635,7 @@ pub fn populateTestFunctions(
const test_functions_str = try ip.getOrPutString(gpa, "test_functions");
const decl_index = builtin_namespace.decls.getKeyAdapted(
test_functions_str,
- DeclAdapter{ .mod = mod },
+ DeclAdapter{ .zcu = mod },
).?;
{
// We have to call `ensureDeclAnalyzed` here in case `builtin.test_functions`
@@ -5646,8 +5659,7 @@ pub fn populateTestFunctions(
for (test_fn_vals, mod.test_functions.keys()) |*test_fn_val, test_decl_index| {
const test_decl = mod.declPtr(test_decl_index);
- // TODO: write something like getCoercedInts to avoid needing to dupe
- const test_decl_name = try gpa.dupe(u8, ip.stringToSlice(test_decl.name));
+ const test_decl_name = try gpa.dupe(u8, ip.stringToSlice(try test_decl.fullyQualifiedName(mod)));
defer gpa.free(test_decl_name);
const test_name_decl_index = n: {
const test_name_decl_ty = try mod.arrayType(.{
@@ -6359,17 +6371,13 @@ pub fn opaqueSrcLoc(mod: *Module, opaque_type: InternPool.Key.OpaqueType) SrcLoc
}
pub fn opaqueFullyQualifiedName(mod: *Module, opaque_type: InternPool.Key.OpaqueType) !InternPool.NullTerminatedString {
- return mod.declPtr(opaque_type.decl).getFullyQualifiedName(mod);
+ return mod.declPtr(opaque_type.decl).fullyQualifiedName(mod);
}
pub fn declFileScope(mod: *Module, decl_index: Decl.Index) *File {
return mod.declPtr(decl_index).getFileScope(mod);
}
-pub fn namespaceDeclIndex(mod: *Module, namespace_index: Namespace.Index) Decl.Index {
- return mod.namespacePtr(namespace_index).getDeclIndex(mod);
-}
-
/// Returns null in the following cases:
/// * `@TypeOf(.{})`
/// * A struct which has no fields (`struct {}`).
diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig
index 8fbaf79ea5..e4e944d186 100644
--- a/src/Package/Fetch.zig
+++ b/src/Package/Fetch.zig
@@ -402,7 +402,7 @@ pub fn run(f: *Fetch) RunError!void {
return error.FetchFailed;
},
}
- } else {
+ } else if (f.job_queue.read_only) {
try eb.addRootErrorMessage(.{
.msg = try eb.addString("dependency is missing hash field"),
.src_loc = try f.srcLoc(f.location_tok),
diff --git a/src/Sema.zig b/src/Sema.zig
index 972faff75f..741c4e2fba 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -2801,10 +2801,9 @@ fn zirStructDecl(
const new_namespace_index = try mod.createNamespace(.{
.parent = block.namespace.toOptional(),
- .ty = undefined,
+ .decl_index = new_decl_index,
.file_scope = block.getFileScope(mod),
});
- const new_namespace = mod.namespacePtr(new_namespace_index);
errdefer mod.destroyNamespace(new_namespace_index);
const struct_ty = ty: {
@@ -2821,7 +2820,6 @@ fn zirStructDecl(
new_decl.ty = Type.type;
new_decl.val = Value.fromInterned(struct_ty);
- new_namespace.ty = Type.fromInterned(struct_ty);
const decl_val = sema.analyzeDeclVal(block, src, new_decl_index);
try mod.finalizeAnonDecl(new_decl_index);
@@ -2990,10 +2988,9 @@ fn zirEnumDecl(
const new_namespace_index = try mod.createNamespace(.{
.parent = block.namespace.toOptional(),
- .ty = undefined,
+ .decl_index = new_decl_index,
.file_scope = block.getFileScope(mod),
});
- const new_namespace = mod.namespacePtr(new_namespace_index);
errdefer if (!done) mod.destroyNamespace(new_namespace_index);
const decls = sema.code.bodySlice(extra_index, decls_len);
@@ -3036,7 +3033,6 @@ fn zirEnumDecl(
new_decl.ty = Type.type;
new_decl.val = Value.fromInterned(incomplete_enum.index);
- new_namespace.ty = Type.fromInterned(incomplete_enum.index);
const decl_val = try sema.analyzeDeclVal(block, src, new_decl_index);
try mod.finalizeAnonDecl(new_decl_index);
@@ -3248,10 +3244,9 @@ fn zirUnionDecl(
const new_namespace_index = try mod.createNamespace(.{
.parent = block.namespace.toOptional(),
- .ty = undefined,
+ .decl_index = new_decl_index,
.file_scope = block.getFileScope(mod),
});
- const new_namespace = mod.namespacePtr(new_namespace_index);
errdefer mod.destroyNamespace(new_namespace_index);
const union_ty = ty: {
@@ -3292,7 +3287,6 @@ fn zirUnionDecl(
new_decl.ty = Type.type;
new_decl.val = Value.fromInterned(union_ty);
- new_namespace.ty = Type.fromInterned(union_ty);
const decls = sema.code.bodySlice(extra_index, decls_len);
try mod.scanNamespace(new_namespace_index, decls, new_decl);
@@ -3346,10 +3340,9 @@ fn zirOpaqueDecl(
const new_namespace_index = try mod.createNamespace(.{
.parent = block.namespace.toOptional(),
- .ty = undefined,
+ .decl_index = new_decl_index,
.file_scope = block.getFileScope(mod),
});
- const new_namespace = mod.namespacePtr(new_namespace_index);
errdefer mod.destroyNamespace(new_namespace_index);
const opaque_ty = try mod.intern(.{ .opaque_type = .{
@@ -3362,7 +3355,6 @@ fn zirOpaqueDecl(
new_decl.ty = Type.type;
new_decl.val = Value.fromInterned(opaque_ty);
- new_namespace.ty = Type.fromInterned(opaque_ty);
const decls = sema.code.bodySlice(extra_index, decls_len);
try mod.scanNamespace(new_namespace_index, decls, new_decl);
@@ -4834,7 +4826,7 @@ fn validateStructInit(
if (root_msg) |msg| {
if (mod.typeToStruct(struct_ty)) |struct_type| {
const decl = mod.declPtr(struct_type.decl.unwrap().?);
- const fqn = try decl.getFullyQualifiedName(mod);
+ const fqn = try decl.fullyQualifiedName(mod);
try mod.errNoteNonLazy(
decl.srcLoc(mod),
msg,
@@ -4961,7 +4953,7 @@ fn validateStructInit(
if (root_msg) |msg| {
if (mod.typeToStruct(struct_ty)) |struct_type| {
const decl = mod.declPtr(struct_type.decl.unwrap().?);
- const fqn = try decl.getFullyQualifiedName(mod);
+ const fqn = try decl.fullyQualifiedName(mod);
try mod.errNoteNonLazy(
decl.srcLoc(mod),
msg,
@@ -5355,7 +5347,7 @@ fn failWithBadStructFieldAccess(
const mod = sema.mod;
const gpa = sema.gpa;
const decl = mod.declPtr(struct_type.decl.unwrap().?);
- const fqn = try decl.getFullyQualifiedName(mod);
+ const fqn = try decl.fullyQualifiedName(mod);
const msg = msg: {
const msg = try sema.errMsg(
@@ -5382,7 +5374,7 @@ fn failWithBadUnionFieldAccess(
const gpa = sema.gpa;
const decl = mod.declPtr(union_obj.decl);
- const fqn = try decl.getFullyQualifiedName(mod);
+ const fqn = try decl.fullyQualifiedName(mod);
const msg = msg: {
const msg = try sema.errMsg(
@@ -6504,8 +6496,7 @@ fn lookupInNamespace(
const mod = sema.mod;
const namespace = mod.namespacePtr(namespace_index);
- const namespace_decl_index = namespace.getDeclIndex(mod);
- const namespace_decl = mod.declPtr(namespace_decl_index);
+ const namespace_decl = mod.declPtr(namespace.decl_index);
if (namespace_decl.analysis == .file_failure) {
return error.AnalysisFail;
}
@@ -6526,7 +6517,7 @@ fn lookupInNamespace(
while (check_i < checked_namespaces.count()) : (check_i += 1) {
const check_ns = checked_namespaces.keys()[check_i];
- if (check_ns.decls.getKeyAdapted(ident_name, Module.DeclAdapter{ .mod = mod })) |decl_index| {
+ if (check_ns.decls.getKeyAdapted(ident_name, Module.DeclAdapter{ .zcu = mod })) |decl_index| {
// Skip decls which are not marked pub, which are in a different
// file than the `a.b`/`@hasDecl` syntax.
const decl = mod.declPtr(decl_index);
@@ -6584,7 +6575,7 @@ fn lookupInNamespace(
return sema.failWithOwnedErrorMsg(block, msg);
},
}
- } else if (namespace.decls.getKeyAdapted(ident_name, Module.DeclAdapter{ .mod = mod })) |decl_index| {
+ } else if (namespace.decls.getKeyAdapted(ident_name, Module.DeclAdapter{ .zcu = mod })) |decl_index| {
return decl_index;
}
@@ -17210,7 +17201,7 @@ fn zirThis(
extended: Zir.Inst.Extended.InstData,
) CompileError!Air.Inst.Ref {
const mod = sema.mod;
- const this_decl_index = mod.namespaceDeclIndex(block.namespace);
+ const this_decl_index = mod.namespacePtr(block.namespace).decl_index;
const src = LazySrcLoc.nodeOffset(@bitCast(extended.operand));
return sema.analyzeDeclVal(block, src, this_decl_index);
}
@@ -20075,7 +20066,7 @@ fn finishStructInit(
if (root_msg) |msg| {
if (mod.typeToStruct(struct_ty)) |struct_type| {
const decl = mod.declPtr(struct_type.decl.unwrap().?);
- const fqn = try decl.getFullyQualifiedName(mod);
+ const fqn = try decl.fullyQualifiedName(mod);
try mod.errNoteNonLazy(
decl.srcLoc(mod),
msg,
@@ -21404,10 +21395,9 @@ fn zirReify(
const new_namespace_index = try mod.createNamespace(.{
.parent = block.namespace.toOptional(),
- .ty = undefined,
+ .decl_index = new_decl_index,
.file_scope = block.getFileScope(mod),
});
- const new_namespace = mod.namespacePtr(new_namespace_index);
errdefer mod.destroyNamespace(new_namespace_index);
const opaque_ty = try mod.intern(.{ .opaque_type = .{
@@ -21420,7 +21410,6 @@ fn zirReify(
new_decl.ty = Type.type;
new_decl.val = Value.fromInterned(opaque_ty);
- new_namespace.ty = Type.fromInterned(opaque_ty);
const decl_val = sema.analyzeDeclVal(block, src, new_decl_index);
try mod.finalizeAnonDecl(new_decl_index);
@@ -21614,10 +21603,9 @@ fn zirReify(
const new_namespace_index = try mod.createNamespace(.{
.parent = block.namespace.toOptional(),
- .ty = undefined,
+ .decl_index = new_decl_index,
.file_scope = block.getFileScope(mod),
});
- const new_namespace = mod.namespacePtr(new_namespace_index);
errdefer mod.destroyNamespace(new_namespace_index);
const union_ty = try ip.getUnionType(gpa, .{
@@ -21649,7 +21637,6 @@ fn zirReify(
new_decl.ty = Type.type;
new_decl.val = Value.fromInterned(union_ty);
- new_namespace.ty = Type.fromInterned(union_ty);
const decl_val = sema.analyzeDeclVal(block, src, new_decl_index);
try mod.finalizeAnonDecl(new_decl_index);
@@ -23328,7 +23315,8 @@ fn checkVectorElemType(
const mod = sema.mod;
switch (ty.zigTypeTag(mod)) {
.Int, .Float, .Bool => return,
- else => if (ty.isPtrAtRuntime(mod)) return,
+ .Optional, .Pointer => if (ty.isPtrAtRuntime(mod)) return,
+ else => {},
}
return sema.fail(block, ty_src, "expected integer, float, bool, or pointer for the vector element type; found '{}'", .{ty.fmt(mod)});
}
@@ -28455,7 +28443,7 @@ const CoerceOpts = struct {
report_err: bool = true,
/// Ignored if `report_err == false`.
is_ret: bool = false,
- /// Should coercion to comptime_int ermit an error message.
+ /// Should coercion to comptime_int emit an error message.
no_cast_to_comptime_int: bool = false,
param_src: struct {
@@ -31858,6 +31846,34 @@ fn coerceArrayLike(
}
const dest_elem_ty = dest_ty.childType(mod);
+ if (dest_ty.isVector(mod) and inst_ty.isVector(mod) and (try sema.resolveValue(inst)) == null) {
+ const inst_elem_ty = inst_ty.childType(mod);
+ switch (dest_elem_ty.zigTypeTag(mod)) {
+ .Int => if (inst_elem_ty.isInt(mod)) {
+ // integer widening
+ const dst_info = dest_elem_ty.intInfo(mod);
+ const src_info = inst_elem_ty.intInfo(mod);
+ if ((src_info.signedness == dst_info.signedness and dst_info.bits >= src_info.bits) or
+ // small enough unsigned ints can get casted to large enough signed ints
+ (dst_info.signedness == .signed and dst_info.bits > src_info.bits))
+ {
+ try sema.requireRuntimeBlock(block, inst_src, null);
+ return block.addTyOp(.intcast, dest_ty, inst);
+ }
+ },
+ .Float => if (inst_elem_ty.isRuntimeFloat()) {
+ // float widening
+ const src_bits = inst_elem_ty.floatBits(target);
+ const dst_bits = dest_elem_ty.floatBits(target);
+ if (dst_bits >= src_bits) {
+ try sema.requireRuntimeBlock(block, inst_src, null);
+ return block.addTyOp(.fpext, dest_ty, inst);
+ }
+ },
+ else => {},
+ }
+ }
+
const element_vals = try sema.arena.alloc(InternPool.Index, dest_len);
const element_refs = try sema.arena.alloc(Air.Inst.Ref, dest_len);
var runtime_src: ?LazySrcLoc = null;
@@ -37260,7 +37276,7 @@ fn generateUnionTagTypeNumbered(
const src_decl = mod.declPtr(block.src_decl);
const new_decl_index = try mod.allocateNewDecl(block.namespace, src_decl.src_node, block.wip_capture_scope);
errdefer mod.destroyDecl(new_decl_index);
- const fqn = try decl.getFullyQualifiedName(mod);
+ const fqn = try decl.fullyQualifiedName(mod);
const name = try ip.getOrPutStringFmt(gpa, "@typeInfo({}).Union.tag_type.?", .{fqn.fmt(ip)});
try mod.initNewAnonDecl(new_decl_index, src_decl.src_line, .{
.ty = Type.noreturn,
@@ -37269,7 +37285,6 @@ fn generateUnionTagTypeNumbered(
errdefer mod.abortAnonDecl(new_decl_index);
const new_decl = mod.declPtr(new_decl_index);
- new_decl.name_fully_qualified = true;
new_decl.owns_tv = true;
new_decl.name_fully_qualified = true;
@@ -37310,7 +37325,7 @@ fn generateUnionTagTypeSimple(
.val = Value.@"unreachable",
});
};
- const fqn = try mod.declPtr(decl_index).getFullyQualifiedName(mod);
+ const fqn = try mod.declPtr(decl_index).fullyQualifiedName(mod);
const src_decl = mod.declPtr(block.src_decl);
const new_decl_index = try mod.allocateNewDecl(block.namespace, src_decl.src_node, block.wip_capture_scope);
errdefer mod.destroyDecl(new_decl_index);
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 4540724778..781190e13b 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -7223,7 +7223,7 @@ fn getTagNameFunction(func: *CodeGen, enum_ty: Type) InnerError!u32 {
defer arena_allocator.deinit();
const arena = arena_allocator.allocator();
- const fqn = ip.stringToSlice(try mod.declPtr(enum_decl_index).getFullyQualifiedName(mod));
+ const fqn = ip.stringToSlice(try mod.declPtr(enum_decl_index).fullyQualifiedName(mod));
const func_name = try std.fmt.allocPrintZ(arena, "__zig_tag_name_{s}", .{fqn});
// check if we already generated code for this.
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index f9a291f40b..4ca2ae44bb 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -1547,6 +1547,27 @@ fn asmRegisterRegisterMemory(
});
}
+fn asmRegisterRegisterMemoryRegister(
+ self: *Self,
+ tag: Mir.Inst.FixedTag,
+ reg1: Register,
+ reg2: Register,
+ m: Memory,
+ reg3: Register,
+) !void {
+ _ = try self.addInst(.{
+ .tag = tag[1],
+ .ops = .rrmr,
+ .data = .{ .rrrx = .{
+ .fixes = tag[0],
+ .r1 = reg1,
+ .r2 = reg2,
+ .r3 = reg3,
+ .payload = try self.addExtra(Mir.Memory.encode(m)),
+ } },
+ });
+}
+
fn asmMemory(self: *Self, tag: Mir.Inst.FixedTag, m: Memory) !void {
_ = try self.addInst(.{
.tag = tag[1],
@@ -1570,6 +1591,25 @@ fn asmRegisterMemory(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, m: Memo
});
}
+fn asmRegisterMemoryRegister(
+ self: *Self,
+ tag: Mir.Inst.FixedTag,
+ reg1: Register,
+ m: Memory,
+ reg2: Register,
+) !void {
+ _ = try self.addInst(.{
+ .tag = tag[1],
+ .ops = .rmr,
+ .data = .{ .rrx = .{
+ .fixes = tag[0],
+ .r1 = reg1,
+ .r2 = reg2,
+ .payload = try self.addExtra(Mir.Memory.encode(m)),
+ } },
+ });
+}
+
fn asmRegisterMemoryImmediate(
self: *Self,
tag: Mir.Inst.FixedTag,
@@ -2570,7 +2610,8 @@ fn restoreState(self: *Self, state: State, deaths: []const Air.Inst.Index, compt
const ExpectedContents = [@typeInfo(RegisterManager.TrackedRegisters).Array.len]RegisterLock;
var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
- if (opts.update_tracking) ({}) else std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
+ if (opts.update_tracking)
+ {} else std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
var reg_locks = if (opts.update_tracking) {} else try std.ArrayList(RegisterLock).initCapacity(
stack.get(),
@@ -2812,11 +2853,14 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void {
}
fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
+ const mod = self.bin_file.comp.module.?;
const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
const dst_ty = self.typeOfIndex(inst);
- const dst_bits = dst_ty.floatBits(self.target.*);
+ const dst_scalar_ty = dst_ty.scalarType(mod);
+ const dst_bits = dst_scalar_ty.floatBits(self.target.*);
const src_ty = self.typeOf(ty_op.operand);
- const src_bits = src_ty.floatBits(self.target.*);
+ const src_scalar_ty = src_ty.scalarType(mod);
+ const src_bits = src_scalar_ty.floatBits(self.target.*);
const result = result: {
if (switch (src_bits) {
@@ -2840,94 +2884,290 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void {
},
else => unreachable,
}) {
+ if (dst_ty.isVector(mod)) break :result null;
var callee_buf: ["__extend?f?f2".len]u8 = undefined;
break :result try self.genCall(.{ .lib = .{
- .return_type = self.floatCompilerRtAbiType(dst_ty, src_ty).toIntern(),
- .param_types = &.{self.floatCompilerRtAbiType(src_ty, dst_ty).toIntern()},
+ .return_type = self.floatCompilerRtAbiType(dst_scalar_ty, src_scalar_ty).toIntern(),
+ .param_types = &.{self.floatCompilerRtAbiType(src_scalar_ty, dst_scalar_ty).toIntern()},
.callee = std.fmt.bufPrint(&callee_buf, "__extend{c}f{c}f2", .{
floatCompilerRtAbiName(src_bits),
floatCompilerRtAbiName(dst_bits),
}) catch unreachable,
- } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }});
+ } }, &.{src_scalar_ty}, &.{.{ .air_ref = ty_op.operand }});
}
+ const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
const src_mcv = try self.resolveInst(ty_op.operand);
const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
src_mcv
else
try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
- const dst_reg = dst_mcv.getReg().?.to128();
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_alias = registerAlias(dst_reg, @intCast(@max(dst_ty.abiSize(mod), 16)));
const dst_lock = self.register_manager.lockReg(dst_reg);
defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
+ const vec_len = if (dst_ty.isVector(mod)) dst_ty.vectorLen(mod) else 1;
if (src_bits == 16) {
assert(self.hasFeature(.f16c));
const mat_src_reg = if (src_mcv.isRegister())
src_mcv.getReg().?
else
try self.copyToTmpRegister(src_ty, src_mcv);
- try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128());
+ try self.asmRegisterRegister(
+ .{ .v_ps, .cvtph2 },
+ dst_alias,
+ registerAlias(mat_src_reg, src_abi_size),
+ );
switch (dst_bits) {
32 => {},
64 => try self.asmRegisterRegisterRegister(
.{ .v_sd, .cvtss2 },
- dst_reg,
- dst_reg,
- dst_reg,
+ dst_alias,
+ dst_alias,
+ dst_alias,
),
else => unreachable,
}
} else {
assert(src_bits == 32 and dst_bits == 64);
- if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
- .{ .v_sd, .cvtss2 },
- dst_reg,
- dst_reg,
- try src_mcv.mem(self, .dword),
- ) else try self.asmRegisterRegisterRegister(
- .{ .v_sd, .cvtss2 },
- dst_reg,
- dst_reg,
- (if (src_mcv.isRegister())
- src_mcv.getReg().?
- else
- try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
- ) else if (src_mcv.isMemory()) try self.asmRegisterMemory(
- .{ ._sd, .cvtss2 },
- dst_reg,
- try src_mcv.mem(self, .dword),
+ if (self.hasFeature(.avx)) switch (vec_len) {
+ 1 => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ .{ .v_sd, .cvtss2 },
+ dst_alias,
+ dst_alias,
+ try src_mcv.mem(self, self.memSize(src_ty)),
+ ) else try self.asmRegisterRegisterRegister(
+ .{ .v_sd, .cvtss2 },
+ dst_alias,
+ dst_alias,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
+ ),
+ 2...4 => if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ .v_pd, .cvtps2 },
+ dst_alias,
+ try src_mcv.mem(self, self.memSize(src_ty)),
+ ) else try self.asmRegisterRegister(
+ .{ .v_pd, .cvtps2 },
+ dst_alias,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
+ ),
+ else => break :result null,
+ } else if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ switch (vec_len) {
+ 1 => .{ ._sd, .cvtss2 },
+ 2 => .{ ._pd, .cvtps2 },
+ else => break :result null,
+ },
+ dst_alias,
+ try src_mcv.mem(self, self.memSize(src_ty)),
) else try self.asmRegisterRegister(
- .{ ._sd, .cvtss2 },
- dst_reg,
- (if (src_mcv.isRegister())
+ switch (vec_len) {
+ 1 => .{ ._sd, .cvtss2 },
+ 2 => .{ ._pd, .cvtps2 },
+ else => break :result null,
+ },
+ dst_alias,
+ registerAlias(if (src_mcv.isRegister())
src_mcv.getReg().?
else
- try self.copyToTmpRegister(src_ty, src_mcv)).to128(),
+ try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
);
}
break :result dst_mcv;
- };
+ } orelse return self.fail("TODO implement airFpext from {} to {}", .{
+ src_ty.fmt(mod), dst_ty.fmt(mod),
+ });
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
}
fn airIntCast(self: *Self, inst: Air.Inst.Index) !void {
const mod = self.bin_file.comp.module.?;
const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
- const result: MCValue = result: {
- const src_ty = self.typeOf(ty_op.operand);
- const src_int_info = src_ty.intInfo(mod);
+ const src_ty = self.typeOf(ty_op.operand);
+ const dst_ty = self.typeOfIndex(inst);
- const dst_ty = self.typeOfIndex(inst);
- const dst_int_info = dst_ty.intInfo(mod);
- const abi_size: u32 = @intCast(dst_ty.abiSize(mod));
+ const result = @as(?MCValue, result: {
+ const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
- const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty;
+ const src_int_info = src_ty.intInfo(mod);
+ const dst_int_info = dst_ty.intInfo(mod);
const extend = switch (src_int_info.signedness) {
.signed => dst_int_info,
.unsigned => src_int_info,
}.signedness;
const src_mcv = try self.resolveInst(ty_op.operand);
+ if (dst_ty.isVector(mod)) {
+ const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
+ const max_abi_size = @max(dst_abi_size, src_abi_size);
+ if (max_abi_size > @as(u32, if (self.hasFeature(.avx2)) 32 else 16)) break :result null;
+ const has_avx = self.hasFeature(.avx);
+
+ const dst_elem_abi_size = dst_ty.childType(mod).abiSize(mod);
+ const src_elem_abi_size = src_ty.childType(mod).abiSize(mod);
+ switch (math.order(dst_elem_abi_size, src_elem_abi_size)) {
+ .lt => {
+ const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) {
+ else => break :result null,
+ 1 => switch (src_elem_abi_size) {
+ else => break :result null,
+ 2 => switch (dst_int_info.signedness) {
+ .signed => if (has_avx) .{ .vp_b, .ackssw } else .{ .p_b, .ackssw },
+ .unsigned => if (has_avx) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw },
+ },
+ },
+ 2 => switch (src_elem_abi_size) {
+ else => break :result null,
+ 4 => switch (dst_int_info.signedness) {
+ .signed => if (has_avx) .{ .vp_w, .ackssd } else .{ .p_w, .ackssd },
+ .unsigned => if (has_avx)
+ .{ .vp_w, .ackusd }
+ else if (self.hasFeature(.sse4_1))
+ .{ .p_w, .ackusd }
+ else
+ break :result null,
+ },
+ },
+ };
+
+ const dst_mcv: MCValue = if (src_mcv.isRegister() and
+ self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
+ src_mcv
+ else if (has_avx and src_mcv.isRegister())
+ .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
+ else
+ try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_alias = registerAlias(dst_reg, dst_abi_size);
+
+ if (has_avx) try self.asmRegisterRegisterRegister(
+ mir_tag,
+ dst_alias,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ dst_reg, src_abi_size),
+ dst_alias,
+ ) else try self.asmRegisterRegister(
+ mir_tag,
+ dst_alias,
+ dst_alias,
+ );
+ break :result dst_mcv;
+ },
+ .eq => if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
+ break :result src_mcv
+ else {
+ const dst_mcv = try self.allocRegOrMem(inst, true);
+ try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
+ break :result dst_mcv;
+ },
+ .gt => if (self.hasFeature(.sse4_1)) {
+ const mir_tag: Mir.Inst.FixedTag = .{ switch (dst_elem_abi_size) {
+ else => break :result null,
+ 2 => if (has_avx) .vp_w else .p_w,
+ 4 => if (has_avx) .vp_d else .p_d,
+ 8 => if (has_avx) .vp_q else .p_q,
+ }, switch (src_elem_abi_size) {
+ else => break :result null,
+ 1 => switch (extend) {
+ .signed => .movsxb,
+ .unsigned => .movzxb,
+ },
+ 2 => switch (extend) {
+ .signed => .movsxw,
+ .unsigned => .movzxw,
+ },
+ 4 => switch (extend) {
+ .signed => .movsxd,
+ .unsigned => .movzxd,
+ },
+ } };
+
+ const dst_mcv: MCValue = if (src_mcv.isRegister() and
+ self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
+ src_mcv
+ else
+ .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) };
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_alias = registerAlias(dst_reg, dst_abi_size);
+
+ if (src_mcv.isMemory()) try self.asmRegisterMemory(
+ mir_tag,
+ dst_alias,
+ try src_mcv.mem(self, self.memSize(src_ty)),
+ ) else try self.asmRegisterRegister(
+ mir_tag,
+ dst_alias,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size),
+ );
+ break :result dst_mcv;
+ } else {
+ const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) {
+ else => break :result null,
+ 2 => switch (src_elem_abi_size) {
+ else => break :result null,
+ 1 => .{ .p_, .unpcklbw },
+ },
+ 4 => switch (src_elem_abi_size) {
+ else => break :result null,
+ 2 => .{ .p_, .unpcklwd },
+ },
+ 8 => switch (src_elem_abi_size) {
+ else => break :result null,
+ 2 => .{ .p_, .unpckldq },
+ },
+ };
+
+ const dst_mcv: MCValue = if (src_mcv.isRegister() and
+ self.reuseOperand(inst, ty_op.operand, 0, src_mcv))
+ src_mcv
+ else
+ try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+
+ const ext_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
+ const ext_alias = registerAlias(ext_reg, src_abi_size);
+ const ext_lock = self.register_manager.lockRegAssumeUnused(ext_reg);
+ defer self.register_manager.unlockReg(ext_lock);
+
+ try self.asmRegisterRegister(.{ .p_, .xor }, ext_alias, ext_alias);
+ switch (extend) {
+ .signed => try self.asmRegisterRegister(
+ .{ switch (src_elem_abi_size) {
+ else => unreachable,
+ 1 => .p_b,
+ 2 => .p_w,
+ 4 => .p_d,
+ }, .cmpgt },
+ ext_alias,
+ registerAlias(dst_reg, src_abi_size),
+ ),
+ .unsigned => {},
+ }
+ try self.asmRegisterRegister(
+ mir_tag,
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(ext_reg, dst_abi_size),
+ );
+ break :result dst_mcv;
+ },
+ }
+ @compileError("unreachable");
+ }
+
+ const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty;
+
const src_storage_bits: u16 = switch (src_mcv) {
.register, .register_offset => 64,
.register_pair => 128,
@@ -2945,13 +3185,13 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void {
};
if (dst_int_info.bits <= src_int_info.bits) break :result if (dst_mcv.isRegister())
- .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) }
+ .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) }
else
dst_mcv;
if (dst_mcv.isRegister()) {
try self.truncateRegister(src_ty, dst_mcv.getReg().?);
- break :result .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) };
+ break :result .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) };
}
const src_limbs_len = math.divCeil(u16, src_int_info.bits, 64) catch unreachable;
@@ -2999,7 +3239,9 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void {
);
break :result dst_mcv;
- };
+ }) orelse return self.fail("TODO implement airIntCast from {} to {}", .{
+ src_ty.fmt(mod), dst_ty.fmt(mod),
+ });
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
}
@@ -3022,7 +3264,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
src_mcv
else if (dst_abi_size <= 8)
try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv)
- else if (dst_abi_size <= 16) dst: {
+ else if (dst_abi_size <= 16 and !dst_ty.isVector(mod)) dst: {
const dst_regs =
try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp);
const dst_mcv: MCValue = .{ .register_pair = dst_regs };
@@ -3032,26 +3274,29 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
break :dst dst_mcv;
} else dst: {
- const dst_mcv = try self.allocRegOrMem(inst, true);
- try self.genCopy(dst_ty, dst_mcv, src_mcv, .{});
+ const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, inst, true);
+ try self.genCopy(src_ty, dst_mcv, src_mcv, .{});
break :dst dst_mcv;
};
if (dst_ty.zigTypeTag(mod) == .Vector) {
assert(src_ty.zigTypeTag(mod) == .Vector and dst_ty.vectorLen(mod) == src_ty.vectorLen(mod));
- const dst_info = dst_ty.childType(mod).intInfo(mod);
- const src_info = src_ty.childType(mod).intInfo(mod);
- const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_info.bits) {
- 8 => switch (src_info.bits) {
- 16 => switch (dst_ty.vectorLen(mod)) {
+ const dst_elem_ty = dst_ty.childType(mod);
+ const dst_elem_abi_size: u32 = @intCast(dst_elem_ty.abiSize(mod));
+ const src_elem_ty = src_ty.childType(mod);
+ const src_elem_abi_size: u32 = @intCast(src_elem_ty.abiSize(mod));
+
+ const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_elem_abi_size) {
+ 1 => switch (src_elem_abi_size) {
+ 2 => switch (dst_ty.vectorLen(mod)) {
1...8 => if (self.hasFeature(.avx)) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw },
9...16 => if (self.hasFeature(.avx2)) .{ .vp_b, .ackusw } else null,
else => null,
},
else => null,
},
- 16 => switch (src_info.bits) {
- 32 => switch (dst_ty.vectorLen(mod)) {
+ 2 => switch (src_elem_abi_size) {
+ 4 => switch (dst_ty.vectorLen(mod)) {
1...4 => if (self.hasFeature(.avx))
.{ .vp_w, .ackusd }
else if (self.hasFeature(.sse4_1))
@@ -3066,12 +3311,14 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
else => null,
}) orelse return self.fail("TODO implement airTrunc for {}", .{dst_ty.fmt(mod)});
- const elem_ty = src_ty.childType(mod);
- const mask_val = try mod.intValue(elem_ty, @as(u64, math.maxInt(u64)) >> @intCast(64 - dst_info.bits));
+ const dst_info = dst_elem_ty.intInfo(mod);
+ const src_info = src_elem_ty.intInfo(mod);
+
+ const mask_val = try mod.intValue(src_elem_ty, @as(u64, math.maxInt(u64)) >> @intCast(64 - dst_info.bits));
const splat_ty = try mod.vectorType(.{
.len = @intCast(@divExact(@as(u64, if (src_abi_size > 16) 256 else 128), src_info.bits)),
- .child = elem_ty.ip_index,
+ .child = src_elem_ty.ip_index,
});
const splat_abi_size: u32 = @intCast(splat_ty.abiSize(mod));
@@ -3086,22 +3333,40 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
else => .{ .register = try self.copyToTmpRegister(Type.usize, splat_mcv.address()) },
};
- const dst_reg = registerAlias(dst_mcv.getReg().?, src_abi_size);
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_alias = registerAlias(dst_reg, src_abi_size);
if (self.hasFeature(.avx)) {
try self.asmRegisterRegisterMemory(
.{ .vp_, .@"and" },
- dst_reg,
- dst_reg,
+ dst_alias,
+ dst_alias,
try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)),
);
- try self.asmRegisterRegisterRegister(mir_tag, dst_reg, dst_reg, dst_reg);
+ if (src_abi_size > 16) {
+ const temp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse);
+ const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg);
+ defer self.register_manager.unlockReg(temp_lock);
+
+ try self.asmRegisterRegisterImmediate(
+ .{ if (self.hasFeature(.avx2)) .v_i128 else .v_f128, .extract },
+ registerAlias(temp_reg, dst_abi_size),
+ dst_alias,
+ Immediate.u(1),
+ );
+ try self.asmRegisterRegisterRegister(
+ mir_tag,
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(temp_reg, dst_abi_size),
+ );
+ } else try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, dst_alias);
} else {
try self.asmRegisterMemory(
.{ .p_, .@"and" },
- dst_reg,
+ dst_alias,
try splat_addr_mcv.deref().mem(self, Memory.Size.fromSize(splat_abi_size)),
);
- try self.asmRegisterRegister(mir_tag, dst_reg, dst_reg);
+ try self.asmRegisterRegister(mir_tag, dst_alias, dst_alias);
}
break :result dst_mcv;
}
@@ -4045,7 +4310,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
if (dst_info.bits > 128 and dst_info.signedness == .unsigned) {
const slow_inc = self.hasFeature(.slow_incdec);
const abi_size: u32 = @intCast(dst_ty.abiSize(mod));
- const limb_len = std.math.divCeil(u32, abi_size, 8) catch unreachable;
+ const limb_len = math.divCeil(u32, abi_size, 8) catch unreachable;
try self.spillRegisters(&.{ .rax, .rcx, .rdx });
const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx });
@@ -4534,7 +4799,7 @@ fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void {
switch (lhs_ty.zigTypeTag(mod)) {
.Int => {
try self.spillRegisters(&.{.rcx});
- try self.register_manager.getReg(.rcx, null);
+ try self.register_manager.getKnownReg(.rcx, null);
const lhs_mcv = try self.resolveInst(bin_op.lhs);
const rhs_mcv = try self.resolveInst(bin_op.rhs);
@@ -6560,7 +6825,7 @@ fn floatSign(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, ty: Type)
const dst_mcv: MCValue = .{ .register = .st0 };
if (!std.meta.eql(src_mcv, dst_mcv) or !self.reuseOperand(inst, operand, 0, src_mcv))
- try self.register_manager.getReg(.st0, inst);
+ try self.register_manager.getKnownReg(.st0, inst);
try self.genCopy(ty, dst_mcv, src_mcv, .{});
switch (tag) {
@@ -6894,7 +7159,7 @@ fn airAbs(self: *Self, inst: Air.Inst.Index) !void {
},
else => {
const abi_size: u31 = @intCast(ty.abiSize(mod));
- const limb_len = std.math.divCeil(u31, abi_size, 8) catch unreachable;
+ const limb_len = math.divCeil(u31, abi_size, 8) catch unreachable;
const tmp_regs =
try self.register_manager.allocRegs(3, .{null} ** 3, abi.RegisterClass.gp);
@@ -8181,7 +8446,7 @@ fn genShiftBinOpMir(
try self.asmRegisterImmediate(
.{ ._, .@"and" },
.cl,
- Immediate.u(std.math.maxInt(u6)),
+ Immediate.u(math.maxInt(u6)),
);
try self.asmRegisterImmediate(
.{ ._r, .sh },
@@ -8218,7 +8483,7 @@ fn genShiftBinOpMir(
try self.asmRegisterImmediate(
.{ ._, .@"and" },
.cl,
- Immediate.u(std.math.maxInt(u6)),
+ Immediate.u(math.maxInt(u6)),
);
try self.asmRegisterImmediate(
.{ ._r, .sh },
@@ -8283,7 +8548,7 @@ fn genShiftBinOpMir(
}, .sh },
temp_regs[2].to64(),
temp_regs[3].to64(),
- Immediate.u(shift_imm & std.math.maxInt(u6)),
+ Immediate.u(shift_imm & math.maxInt(u6)),
),
else => try self.asmRegisterRegisterRegister(.{ switch (tag[0]) {
._l => ._ld,
@@ -8338,7 +8603,7 @@ fn genShiftBinOpMir(
.immediate => |shift_imm| try self.asmRegisterImmediate(
tag,
temp_regs[2].to64(),
- Immediate.u(shift_imm & std.math.maxInt(u6)),
+ Immediate.u(shift_imm & math.maxInt(u6)),
),
else => try self.asmRegisterRegister(tag, temp_regs[2].to64(), .cl),
}
@@ -8794,7 +9059,7 @@ fn genShiftBinOp(
lhs_ty.fmt(mod),
});
- try self.register_manager.getReg(.rcx, null);
+ try self.register_manager.getKnownReg(.rcx, null);
const rcx_lock = self.register_manager.lockReg(.rcx);
defer if (rcx_lock) |lock| self.register_manager.unlockReg(lock);
@@ -8933,7 +9198,7 @@ fn genMulDivBinOp(
switch (tag) {
.mul, .mul_wrap => {
const slow_inc = self.hasFeature(.slow_incdec);
- const limb_len = std.math.divCeil(u32, src_abi_size, 8) catch unreachable;
+ const limb_len = math.divCeil(u32, src_abi_size, 8) catch unreachable;
try self.spillRegisters(&.{ .rax, .rcx, .rdx });
const reg_locks = self.register_manager.lockRegs(3, .{ .rax, .rcx, .rdx });
@@ -9117,8 +9382,8 @@ fn genMulDivBinOp(
.rem => maybe_inst,
else => null,
};
- try self.register_manager.getReg(.rax, track_inst_rax);
- try self.register_manager.getReg(.rdx, track_inst_rdx);
+ try self.register_manager.getKnownReg(.rax, track_inst_rax);
+ try self.register_manager.getKnownReg(.rdx, track_inst_rdx);
try self.genIntMulDivOpMir(switch (signedness) {
.signed => switch (tag) {
@@ -9158,8 +9423,11 @@ fn genMulDivBinOp(
},
.mod => {
- try self.register_manager.getReg(.rax, null);
- try self.register_manager.getReg(.rdx, if (signedness == .unsigned) maybe_inst else null);
+ try self.register_manager.getKnownReg(.rax, null);
+ try self.register_manager.getKnownReg(
+ .rdx,
+ if (signedness == .unsigned) maybe_inst else null,
+ );
switch (signedness) {
.signed => {
@@ -9200,8 +9468,11 @@ fn genMulDivBinOp(
},
.div_floor => {
- try self.register_manager.getReg(.rax, if (signedness == .unsigned) maybe_inst else null);
- try self.register_manager.getReg(.rdx, null);
+ try self.register_manager.getKnownReg(
+ .rax,
+ if (signedness == .unsigned) maybe_inst else null,
+ );
+ try self.register_manager.getKnownReg(.rdx, null);
const lhs_lock: ?RegisterLock = switch (lhs_mcv) {
.register => |reg| self.register_manager.lockRegAssumeUnused(reg),
@@ -9445,7 +9716,7 @@ fn genBinOp(
.rem, .mod => unreachable,
.max, .min => if (lhs_ty.scalarType(mod).isRuntimeFloat()) registerAlias(
if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: {
- try self.register_manager.getReg(.xmm0, null);
+ try self.register_manager.getKnownReg(.xmm0, null);
break :mask .xmm0;
} else try self.register_manager.allocReg(null, abi.RegisterClass.sse),
abi_size,
@@ -10820,96 +11091,35 @@ fn genBinOp(
lhs_copy_reg.?,
mask_reg,
) else {
- try self.asmRegisterRegister(
- @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
- .Float => switch (lhs_ty.floatBits(self.target.*)) {
- 32 => .{ ._ps, .@"and" },
- 64 => .{ ._pd, .@"and" },
- 16, 80, 128 => null,
- else => unreachable,
- },
- .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
- .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
- 32 => switch (lhs_ty.vectorLen(mod)) {
- 1...4 => .{ ._ps, .@"and" },
- else => null,
- },
- 64 => switch (lhs_ty.vectorLen(mod)) {
- 1...2 => .{ ._pd, .@"and" },
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
- },
- else => unreachable,
- },
+ const mir_fixes = @as(?Mir.Inst.Fixes, switch (lhs_ty.zigTypeTag(mod)) {
+ .Float => switch (lhs_ty.floatBits(self.target.*)) {
+ 32 => ._ps,
+ 64 => ._pd,
+ 16, 80, 128 => null,
else => unreachable,
- }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(air_tag), lhs_ty.fmt(mod),
- }),
- dst_reg,
- mask_reg,
- );
- try self.asmRegisterRegister(
- @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
- .Float => switch (lhs_ty.floatBits(self.target.*)) {
- 32 => .{ ._ps, .andn },
- 64 => .{ ._pd, .andn },
- 16, 80, 128 => null,
- else => unreachable,
- },
- .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
- .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
- 32 => switch (lhs_ty.vectorLen(mod)) {
- 1...4 => .{ ._ps, .andn },
- else => null,
- },
- 64 => switch (lhs_ty.vectorLen(mod)) {
- 1...2 => .{ ._pd, .andn },
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
+ },
+ .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
+ .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
+ 32 => switch (lhs_ty.vectorLen(mod)) {
+ 1...4 => ._ps,
+ else => null,
},
- else => unreachable,
- },
- else => unreachable,
- }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(air_tag), lhs_ty.fmt(mod),
- }),
- mask_reg,
- lhs_copy_reg.?,
- );
- try self.asmRegisterRegister(
- @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) {
- .Float => switch (lhs_ty.floatBits(self.target.*)) {
- 32 => .{ ._ps, .@"or" },
- 64 => .{ ._pd, .@"or" },
- 16, 80, 128 => null,
- else => unreachable,
- },
- .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) {
- .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) {
- 32 => switch (lhs_ty.vectorLen(mod)) {
- 1...4 => .{ ._ps, .@"or" },
- else => null,
- },
- 64 => switch (lhs_ty.vectorLen(mod)) {
- 1...2 => .{ ._pd, .@"or" },
- else => null,
- },
- 16, 80, 128 => null,
- else => unreachable,
+ 64 => switch (lhs_ty.vectorLen(mod)) {
+ 1...2 => ._pd,
+ else => null,
},
+ 16, 80, 128 => null,
else => unreachable,
},
else => unreachable,
- }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
- @tagName(air_tag), lhs_ty.fmt(mod),
- }),
- dst_reg,
- mask_reg,
- );
+ },
+ else => unreachable,
+ }) orelse return self.fail("TODO implement genBinOp for {s} {}", .{
+ @tagName(air_tag), lhs_ty.fmt(mod),
+ });
+ try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_reg, mask_reg);
+ try self.asmRegisterRegister(.{ mir_fixes, .andn }, mask_reg, lhs_copy_reg.?);
+ try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_reg, mask_reg);
}
},
.cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => {
@@ -12192,9 +12402,36 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void {
fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
const mod = self.bin_file.comp.module.?;
const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
- const ty = self.typeOf(bin_op.lhs);
+ var ty = self.typeOf(bin_op.lhs);
+ var null_compare: ?Mir.Inst.Index = null;
const result: Condition = result: {
+ try self.spillEflagsIfOccupied();
+
+ const lhs_mcv = try self.resolveInst(bin_op.lhs);
+ const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) {
+ .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null },
+ .register_pair => |lhs_regs| locks: {
+ const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs);
+ break :locks .{ locks[0], locks[1] };
+ },
+ .register_offset => |lhs_ro| .{
+ self.register_manager.lockRegAssumeUnused(lhs_ro.reg),
+ null,
+ },
+ else => .{null} ** 2,
+ };
+ defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
+
+ const rhs_mcv = try self.resolveInst(bin_op.rhs);
+ const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) {
+ .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null },
+ .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs),
+ .register_offset => |rhs_ro| .{ self.register_manager.lockReg(rhs_ro.reg), null },
+ else => .{null} ** 2,
+ };
+ defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
+
switch (ty.zigTypeTag(mod)) {
.Float => {
const float_bits = ty.floatBits(self.target.*);
@@ -12231,34 +12468,66 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
};
}
},
- else => {},
- }
+ .Optional => if (!ty.optionalReprIsPayload(mod)) {
+ const opt_ty = ty;
+ const opt_abi_size: u31 = @intCast(opt_ty.abiSize(mod));
+ ty = opt_ty.optionalChild(mod);
+ const payload_abi_size: u31 = @intCast(ty.abiSize(mod));
- try self.spillEflagsIfOccupied();
+ const temp_lhs_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
+ const temp_lhs_lock = self.register_manager.lockRegAssumeUnused(temp_lhs_reg);
+ defer self.register_manager.unlockReg(temp_lhs_lock);
- const lhs_mcv = try self.resolveInst(bin_op.lhs);
- const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) {
- .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null },
- .register_pair => |lhs_regs| locks: {
- const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs);
- break :locks .{ locks[0], locks[1] };
- },
- .register_offset => |lhs_ro| .{
- self.register_manager.lockRegAssumeUnused(lhs_ro.reg),
- null,
- },
- else => .{null} ** 2,
- };
- defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
+ if (lhs_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ ._, .mov },
+ temp_lhs_reg.to8(),
+ try lhs_mcv.address().offset(payload_abi_size).deref().mem(self, .byte),
+ ) else {
+ try self.genSetReg(temp_lhs_reg, opt_ty, lhs_mcv, .{});
+ try self.asmRegisterImmediate(
+ .{ ._r, .sh },
+ registerAlias(temp_lhs_reg, opt_abi_size),
+ Immediate.u(payload_abi_size * 8),
+ );
+ }
- const rhs_mcv = try self.resolveInst(bin_op.rhs);
- const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) {
- .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null },
- .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs),
- .register_offset => |rhs_ro| .{ self.register_manager.lockReg(rhs_ro.reg), null },
- else => .{null} ** 2,
- };
- defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
+ const payload_compare = payload_compare: {
+ if (rhs_mcv.isMemory()) {
+ const rhs_mem =
+ try rhs_mcv.address().offset(payload_abi_size).deref().mem(self, .byte);
+ try self.asmMemoryRegister(.{ ._, .@"test" }, rhs_mem, temp_lhs_reg.to8());
+ const payload_compare = try self.asmJccReloc(.nz, undefined);
+ try self.asmRegisterMemory(.{ ._, .cmp }, temp_lhs_reg.to8(), rhs_mem);
+ break :payload_compare payload_compare;
+ }
+
+ const temp_rhs_reg = try self.copyToTmpRegister(opt_ty, rhs_mcv);
+ const temp_rhs_lock = self.register_manager.lockRegAssumeUnused(temp_rhs_reg);
+ defer self.register_manager.unlockReg(temp_rhs_lock);
+
+ try self.asmRegisterImmediate(
+ .{ ._r, .sh },
+ registerAlias(temp_rhs_reg, opt_abi_size),
+ Immediate.u(payload_abi_size * 8),
+ );
+ try self.asmRegisterRegister(
+ .{ ._, .@"test" },
+ temp_lhs_reg.to8(),
+ temp_rhs_reg.to8(),
+ );
+ const payload_compare = try self.asmJccReloc(.nz, undefined);
+ try self.asmRegisterRegister(
+ .{ ._, .cmp },
+ temp_lhs_reg.to8(),
+ temp_rhs_reg.to8(),
+ );
+ break :payload_compare payload_compare;
+ };
+ null_compare = try self.asmJmpReloc(undefined);
+ self.performReloc(payload_compare);
+ },
+ else => {},
+ }
switch (ty.zigTypeTag(mod)) {
else => {
@@ -12571,6 +12840,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void {
}
};
+ if (null_compare) |reloc| self.performReloc(reloc);
self.eflags_inst = inst;
return self.finishAir(inst, .{ .eflags = result }, .{ bin_op.lhs, bin_op.rhs, .none });
}
@@ -13521,6 +13791,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
} else if (constraint.len == 1 and std.ascii.isDigit(constraint[0])) arg: {
const index = std.fmt.charToDigit(constraint[0], 10) catch unreachable;
if (index >= args.items.len) return self.fail("constraint out of bounds: '{s}'", .{constraint});
+ try self.genCopy(ty, args.items[index], input_mcv, .{});
break :arg args.items[index];
} else return self.fail("invalid constraint: '{s}'", .{constraint});
if (arg_mcv.getReg()) |reg| if (RegisterManager.indexOfRegIntoTracked(reg)) |_| {
@@ -13619,25 +13890,26 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
label_gop.value_ptr.target = @intCast(self.mir_instructions.len);
} else continue;
- var mnem_size: ?Memory.Size = null;
- const mnem_tag = mnem: {
- mnem_size = if (mem.endsWith(u8, mnem_str, "b"))
- .byte
- else if (mem.endsWith(u8, mnem_str, "w"))
- .word
- else if (mem.endsWith(u8, mnem_str, "l"))
- .dword
- else if (mem.endsWith(u8, mnem_str, "q"))
- .qword
- else if (mem.endsWith(u8, mnem_str, "t"))
- .tbyte
- else
- break :mnem null;
- break :mnem std.meta.stringToEnum(Instruction.Mnemonic, mnem_str[0 .. mnem_str.len - 1]);
- } orelse mnem: {
+ var mnem_size: ?Memory.Size = if (mem.endsWith(u8, mnem_str, "b"))
+ .byte
+ else if (mem.endsWith(u8, mnem_str, "w"))
+ .word
+ else if (mem.endsWith(u8, mnem_str, "l"))
+ .dword
+ else if (mem.endsWith(u8, mnem_str, "q") and
+ (std.mem.indexOfScalar(u8, "vp", mnem_str[0]) == null or !mem.endsWith(u8, mnem_str, "dq")))
+ .qword
+ else if (mem.endsWith(u8, mnem_str, "t"))
+ .tbyte
+ else
+ null;
+ const mnem_tag = while (true) break std.meta.stringToEnum(
+ Instruction.Mnemonic,
+ mnem_str[0 .. mnem_str.len - @intFromBool(mnem_size != null)],
+ ) orelse if (mnem_size) |_| {
mnem_size = null;
- break :mnem std.meta.stringToEnum(Instruction.Mnemonic, mnem_str);
- } orelse return self.fail("invalid mnemonic: '{s}'", .{mnem_str});
+ continue;
+ } else return self.fail("invalid mnemonic: '{s}'", .{mnem_str});
if (@as(?Memory.Size, switch (mnem_tag) {
.clflush => .byte,
.fldenv, .fnstenv, .fstenv => .none,
@@ -14135,30 +14407,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
else => {},
},
.Int => switch (ty.childType(mod).intInfo(mod).bits) {
- 8 => switch (ty.vectorLen(mod)) {
- 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{
- .insert = .{ .vp_b, .insr },
- .extract = .{ .vp_b, .extr },
- } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{
- .insert = .{ .p_b, .insr },
- .extract = .{ .p_b, .extr },
- } },
- 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
- .insert = .{ .vp_w, .insr },
- .extract = .{ .vp_w, .extr },
- } } else .{ .insert_extract = .{
- .insert = .{ .p_w, .insr },
- .extract = .{ .p_w, .extr },
- } },
- 3...4 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_d, .mov }
- else
- .{ ._d, .mov } },
- 5...8 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_q, .mov }
- else
- .{ ._q, .mov } },
- 9...16 => return .{ .move = if (self.hasFeature(.avx))
+ 1...8 => switch (ty.vectorLen(mod)) {
+ 1...16 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
17...32 => if (self.hasFeature(.avx))
@@ -14168,23 +14418,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
.{ .v_, .movdqu } },
else => {},
},
- 16 => switch (ty.vectorLen(mod)) {
- 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
- .insert = .{ .vp_w, .insr },
- .extract = .{ .vp_w, .extr },
- } } else .{ .insert_extract = .{
- .insert = .{ .p_w, .insr },
- .extract = .{ .p_w, .extr },
- } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_d, .mov }
- else
- .{ ._d, .mov } },
- 3...4 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_q, .mov }
- else
- .{ ._q, .mov } },
- 5...8 => return .{ .move = if (self.hasFeature(.avx))
+ 9...16 => switch (ty.vectorLen(mod)) {
+ 1...8 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
9...16 => if (self.hasFeature(.avx))
@@ -14194,16 +14429,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
.{ .v_, .movdqu } },
else => {},
},
- 32 => switch (ty.vectorLen(mod)) {
- 1 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_d, .mov }
- else
- .{ ._d, .mov } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_q, .mov }
- else
- .{ ._q, .mov } },
- 3...4 => return .{ .move = if (self.hasFeature(.avx))
+ 17...32 => switch (ty.vectorLen(mod)) {
+ 1...4 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
5...8 => if (self.hasFeature(.avx))
@@ -14213,12 +14440,8 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
.{ .v_, .movdqu } },
else => {},
},
- 64 => switch (ty.vectorLen(mod)) {
- 1 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_q, .mov }
- else
- .{ ._q, .mov } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
+ 33...64 => switch (ty.vectorLen(mod)) {
+ 1...2 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
3...4 => if (self.hasFeature(.avx))
@@ -14228,7 +14451,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
.{ .v_, .movdqu } },
else => {},
},
- 128 => switch (ty.vectorLen(mod)) {
+ 65...128 => switch (ty.vectorLen(mod)) {
1 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
@@ -14239,7 +14462,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
.{ .v_, .movdqu } },
else => {},
},
- 256 => switch (ty.vectorLen(mod)) {
+ 129...256 => switch (ty.vectorLen(mod)) {
1 => if (self.hasFeature(.avx))
return .{ .move = if (aligned)
.{ .v_, .movdqa }
@@ -14251,11 +14474,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
},
.Pointer, .Optional => if (ty.childType(mod).isPtrAtRuntime(mod))
switch (ty.vectorLen(mod)) {
- 1 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_q, .mov }
- else
- .{ ._q, .mov } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
+ 1...2 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
3...4 => if (self.hasFeature(.avx))
@@ -14269,22 +14488,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
unreachable,
.Float => switch (ty.childType(mod).floatBits(self.target.*)) {
16 => switch (ty.vectorLen(mod)) {
- 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{
- .insert = .{ .vp_w, .insr },
- .extract = .{ .vp_w, .extr },
- } } else .{ .insert_extract = .{
- .insert = .{ .p_w, .insr },
- .extract = .{ .p_w, .extr },
- } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_d, .mov }
- else
- .{ ._d, .mov } },
- 3...4 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_q, .mov }
- else
- .{ ._q, .mov } },
- 5...8 => return .{ .move = if (self.hasFeature(.avx))
+ 1...8 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu }
else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } },
9...16 => if (self.hasFeature(.avx))
@@ -14295,15 +14499,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
else => {},
},
32 => switch (ty.vectorLen(mod)) {
- 1 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_ss, .mov }
- else
- .{ ._ss, .mov } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_sd, .mov }
- else
- .{ ._sd, .mov } },
- 3...4 => return .{ .move = if (self.hasFeature(.avx))
+ 1...4 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }
else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } },
5...8 => if (self.hasFeature(.avx))
@@ -14314,11 +14510,7 @@ fn moveStrategy(self: *Self, ty: Type, class: Register.Class, aligned: bool) !Mo
else => {},
},
64 => switch (ty.vectorLen(mod)) {
- 1 => return .{ .move = if (self.hasFeature(.avx))
- .{ .v_sd, .mov }
- else
- .{ ._sd, .mov } },
- 2 => return .{ .move = if (self.hasFeature(.avx))
+ 1...2 => return .{ .move = if (self.hasFeature(.avx))
if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu }
else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } },
3...4 => if (self.hasFeature(.avx))
@@ -14633,7 +14825,7 @@ fn genSetReg(
ty,
dst_reg.class(),
self.getFrameAddrAlignment(frame_addr).compare(.gte, Alignment.fromLog2Units(
- std.math.log2_int_ceil(u10, @divExact(dst_reg.bitSize(), 8)),
+ math.log2_int_ceil(u10, @divExact(dst_reg.bitSize(), 8)),
)),
),
.lea_frame => .{ .move = .{ ._, .lea } },
@@ -16296,7 +16488,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
},
65...128 => switch (vector_len) {
else => null,
- 1...2 => .{ .vp_i128, .broadcast },
+ 1...2 => .{ .v_i128, .broadcast },
},
}) orelse break :avx2;
@@ -16310,7 +16502,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
registerAlias(dst_reg, @intCast(vector_ty.abiSize(mod))),
try src_mcv.mem(self, self.memSize(scalar_ty)),
) else {
- if (mir_tag[0] == .vp_i128) break :avx2;
+ if (mir_tag[0] == .v_i128) break :avx2;
try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{});
try self.asmRegisterRegister(
mir_tag,
@@ -16352,7 +16544,7 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
.{ if (self.hasFeature(.avx)) .vp_w else .p_w, .shufl },
dst_alias,
dst_alias,
- Immediate.u(0),
+ Immediate.u(0b00_00_00_00),
);
if (switch (scalar_bits) {
1...8 => vector_len > 4,
@@ -16563,18 +16755,1158 @@ fn airSplat(self: *Self, inst: Air.Inst.Index) !void {
}
fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
+ const mod = self.bin_file.comp.module.?;
const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
- _ = extra;
- return self.fail("TODO implement airSelect for x86_64", .{});
- //return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
+ const ty = self.typeOfIndex(inst);
+ const vec_len = ty.vectorLen(mod);
+ const elem_ty = ty.childType(mod);
+ const elem_abi_size: u32 = @intCast(elem_ty.abiSize(mod));
+ const abi_size: u32 = @intCast(ty.abiSize(mod));
+ const pred_ty = self.typeOf(pl_op.operand);
+
+ const result = result: {
+ const has_blend = self.hasFeature(.sse4_1);
+ const has_avx = self.hasFeature(.avx);
+ const need_xmm0 = has_blend and !has_avx;
+ const pred_mcv = try self.resolveInst(pl_op.operand);
+ const mask_reg = mask: {
+ switch (pred_mcv) {
+ .register => |pred_reg| switch (pred_reg.class()) {
+ .general_purpose => {},
+ .sse => if (need_xmm0 and pred_reg.id() != comptime Register.xmm0.id()) {
+ try self.register_manager.getKnownReg(.xmm0, null);
+ try self.genSetReg(.xmm0, pred_ty, pred_mcv, .{});
+ break :mask .xmm0;
+ } else break :mask if (has_blend)
+ pred_reg
+ else
+ try self.copyToTmpRegister(pred_ty, pred_mcv),
+ else => unreachable,
+ },
+ else => {},
+ }
+ const mask_reg: Register = if (need_xmm0) mask_reg: {
+ try self.register_manager.getKnownReg(.xmm0, null);
+ break :mask_reg .xmm0;
+ } else try self.register_manager.allocReg(null, abi.RegisterClass.sse);
+ const mask_alias = registerAlias(mask_reg, abi_size);
+ const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg);
+ defer self.register_manager.unlockReg(mask_lock);
+
+ const pred_fits_in_elem = vec_len <= elem_abi_size;
+ if (self.hasFeature(.avx2) and abi_size <= 32) {
+ if (pred_mcv.isRegister()) broadcast: {
+ try self.asmRegisterRegister(
+ .{ .v_d, .mov },
+ mask_reg.to128(),
+ pred_mcv.getReg().?.to32(),
+ );
+ if (pred_fits_in_elem and vec_len > 1) try self.asmRegisterRegister(
+ .{ switch (elem_abi_size) {
+ 1 => .vp_b,
+ 2 => .vp_w,
+ 3...4 => .vp_d,
+ 5...8 => .vp_q,
+ 9...16 => {
+ try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_f128, .insert },
+ mask_alias,
+ mask_alias,
+ mask_reg.to128(),
+ Immediate.u(1),
+ );
+ break :broadcast;
+ },
+ 17...32 => break :broadcast,
+ else => unreachable,
+ }, .broadcast },
+ mask_alias,
+ mask_reg.to128(),
+ );
+ } else try self.asmRegisterMemory(
+ .{ switch (vec_len) {
+ 1...8 => .vp_b,
+ 9...16 => .vp_w,
+ 17...32 => .vp_d,
+ else => unreachable,
+ }, .broadcast },
+ mask_alias,
+ if (pred_mcv.isMemory()) try pred_mcv.mem(self, .byte) else .{
+ .base = .{ .reg = (try self.copyToTmpRegister(
+ Type.usize,
+ pred_mcv.address(),
+ )).to64() },
+ .mod = .{ .rm = .{ .size = .byte } },
+ },
+ );
+ } else if (abi_size <= 16) broadcast: {
+ try self.asmRegisterRegister(
+ .{ if (has_avx) .v_d else ._d, .mov },
+ mask_alias,
+ (if (pred_mcv.isRegister())
+ pred_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(pred_ty, pred_mcv.address())).to32(),
+ );
+ if (!pred_fits_in_elem or vec_len == 1) break :broadcast;
+ if (elem_abi_size <= 1) {
+ if (has_avx) try self.asmRegisterRegisterRegister(
+ .{ .vp_, .unpcklbw },
+ mask_alias,
+ mask_alias,
+ mask_alias,
+ ) else try self.asmRegisterRegister(
+ .{ .p_, .unpcklbw },
+ mask_alias,
+ mask_alias,
+ );
+ if (abi_size <= 2) break :broadcast;
+ }
+ if (elem_abi_size <= 2) {
+ try self.asmRegisterRegisterImmediate(
+ .{ if (has_avx) .vp_w else .p_w, .shufl },
+ mask_alias,
+ mask_alias,
+ Immediate.u(0b00_00_00_00),
+ );
+ if (abi_size <= 8) break :broadcast;
+ }
+ try self.asmRegisterRegisterImmediate(
+ .{ if (has_avx) .vp_d else .p_d, .shuf },
+ mask_alias,
+ mask_alias,
+ Immediate.u(switch (elem_abi_size) {
+ 1...2, 5...8 => 0b01_00_01_00,
+ 3...4 => 0b00_00_00_00,
+ else => unreachable,
+ }),
+ );
+ } else return self.fail("TODO implement airSelect for {}", .{ty.fmt(mod)});
+ const elem_bits: u16 = @intCast(elem_abi_size * 8);
+ const mask_elem_ty = try mod.intType(.unsigned, elem_bits);
+ const mask_ty = try mod.vectorType(.{ .len = vec_len, .child = mask_elem_ty.toIntern() });
+ if (!pred_fits_in_elem) if (self.hasFeature(.ssse3)) {
+ var mask_elems: [32]InternPool.Index = undefined;
+ for (mask_elems[0..vec_len], 0..) |*elem, bit| elem.* = try mod.intern(.{ .int = .{
+ .ty = mask_elem_ty.toIntern(),
+ .storage = .{ .u64 = bit / elem_bits },
+ } });
+ const mask_mcv = try self.genTypedValue(.{
+ .ty = mask_ty,
+ .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{
+ .ty = mask_ty.toIntern(),
+ .storage = .{ .elems = mask_elems[0..vec_len] },
+ } })),
+ });
+ const mask_mem: Memory = .{
+ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, mask_mcv.address()) },
+ .mod = .{ .rm = .{ .size = self.memSize(ty) } },
+ };
+ if (has_avx) try self.asmRegisterRegisterMemory(
+ .{ .vp_b, .shuf },
+ mask_alias,
+ mask_alias,
+ mask_mem,
+ ) else try self.asmRegisterMemory(
+ .{ .p_b, .shuf },
+ mask_alias,
+ mask_mem,
+ );
+ } else return self.fail("TODO implement airSelect for {}", .{ty.fmt(mod)});
+ {
+ var mask_elems: [32]InternPool.Index = undefined;
+ for (mask_elems[0..vec_len], 0..) |*elem, bit| elem.* = try mod.intern(.{ .int = .{
+ .ty = mask_elem_ty.toIntern(),
+ .storage = .{ .u64 = @as(u32, 1) << @intCast(bit & (elem_bits - 1)) },
+ } });
+ const mask_mcv = try self.genTypedValue(.{
+ .ty = mask_ty,
+ .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{
+ .ty = mask_ty.toIntern(),
+ .storage = .{ .elems = mask_elems[0..vec_len] },
+ } })),
+ });
+ const mask_mem: Memory = .{
+ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, mask_mcv.address()) },
+ .mod = .{ .rm = .{ .size = self.memSize(ty) } },
+ };
+ if (has_avx) {
+ try self.asmRegisterRegisterMemory(
+ .{ .vp_, .@"and" },
+ mask_alias,
+ mask_alias,
+ mask_mem,
+ );
+ try self.asmRegisterRegisterMemory(
+ .{ .vp_d, .cmpeq },
+ mask_alias,
+ mask_alias,
+ mask_mem,
+ );
+ } else {
+ try self.asmRegisterMemory(
+ .{ .p_, .@"and" },
+ mask_alias,
+ mask_mem,
+ );
+ try self.asmRegisterMemory(
+ .{ .p_d, .cmpeq },
+ mask_alias,
+ mask_mem,
+ );
+ }
+ }
+ break :mask mask_reg;
+ };
+ const mask_alias = registerAlias(mask_reg, abi_size);
+ const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg);
+ defer self.register_manager.unlockReg(mask_lock);
+
+ const lhs_mcv = try self.resolveInst(extra.lhs);
+ const lhs_lock = switch (lhs_mcv) {
+ .register => |lhs_reg| self.register_manager.lockRegAssumeUnused(lhs_reg),
+ else => null,
+ };
+ defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
+
+ const rhs_mcv = try self.resolveInst(extra.rhs);
+ const rhs_lock = switch (rhs_mcv) {
+ .register => |rhs_reg| self.register_manager.lockReg(rhs_reg),
+ else => null,
+ };
+ defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
+
+ const reuse_mcv = if (has_blend) rhs_mcv else lhs_mcv;
+ const dst_mcv: MCValue = if (reuse_mcv.isRegister() and self.reuseOperand(
+ inst,
+ if (has_blend) extra.rhs else extra.lhs,
+ @intFromBool(has_blend),
+ reuse_mcv,
+ )) reuse_mcv else if (has_avx)
+ .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
+ else
+ try self.copyToRegisterWithInstTracking(inst, ty, reuse_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_alias = registerAlias(dst_reg, abi_size);
+ const dst_lock = self.register_manager.lockReg(dst_reg);
+ defer if (dst_lock) |lock| self.register_manager.unlockReg(lock);
+
+ const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.childType(mod).zigTypeTag(mod)) {
+ else => null,
+ .Int => switch (abi_size) {
+ 0 => unreachable,
+ 1...16 => if (has_avx)
+ .{ .vp_b, .blendv }
+ else if (has_blend)
+ .{ .p_b, .blendv }
+ else
+ .{ .p_, undefined },
+ 17...32 => if (self.hasFeature(.avx2))
+ .{ .vp_b, .blendv }
+ else
+ null,
+ else => null,
+ },
+ .Float => switch (ty.childType(mod).floatBits(self.target.*)) {
+ else => unreachable,
+ 16, 80, 128 => null,
+ 32 => switch (vec_len) {
+ 0 => unreachable,
+ 1...4 => if (has_avx) .{ .v_ps, .blendv } else .{ ._ps, .blendv },
+ 5...8 => if (has_avx) .{ .v_ps, .blendv } else null,
+ else => null,
+ },
+ 64 => switch (vec_len) {
+ 0 => unreachable,
+ 1...2 => if (has_avx) .{ .v_pd, .blendv } else .{ ._pd, .blendv },
+ 3...4 => if (has_avx) .{ .v_pd, .blendv } else null,
+ else => null,
+ },
+ },
+ }) orelse return self.fail("TODO implement airSelect for {}", .{ty.fmt(mod)});
+ if (has_avx) {
+ const rhs_alias = if (rhs_mcv.isRegister())
+ registerAlias(rhs_mcv.getReg().?, abi_size)
+ else rhs: {
+ try self.genSetReg(dst_reg, ty, rhs_mcv, .{});
+ break :rhs dst_alias;
+ };
+ if (lhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryRegister(
+ mir_tag,
+ dst_alias,
+ rhs_alias,
+ try lhs_mcv.mem(self, self.memSize(ty)),
+ mask_alias,
+ ) else try self.asmRegisterRegisterRegisterRegister(
+ mir_tag,
+ dst_alias,
+ rhs_alias,
+ registerAlias(if (lhs_mcv.isRegister())
+ lhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(ty, lhs_mcv), abi_size),
+ mask_alias,
+ );
+ } else if (has_blend) if (lhs_mcv.isMemory()) try self.asmRegisterMemoryRegister(
+ mir_tag,
+ dst_alias,
+ try lhs_mcv.mem(self, self.memSize(ty)),
+ mask_alias,
+ ) else try self.asmRegisterRegisterRegister(
+ mir_tag,
+ dst_alias,
+ registerAlias(if (lhs_mcv.isRegister())
+ lhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(ty, lhs_mcv), abi_size),
+ mask_alias,
+ ) else {
+ const mir_fixes = @as(?Mir.Inst.Fixes, switch (elem_ty.zigTypeTag(mod)) {
+ else => null,
+ .Int => .p_,
+ .Float => switch (elem_ty.floatBits(self.target.*)) {
+ 32 => ._ps,
+ 64 => ._pd,
+ 16, 80, 128 => null,
+ else => unreachable,
+ },
+ }) orelse return self.fail("TODO implement airSelect for {}", .{ty.fmt(mod)});
+ try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias);
+ if (rhs_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ mir_fixes, .andn },
+ mask_alias,
+ try rhs_mcv.mem(self, Memory.Size.fromSize(abi_size)),
+ ) else try self.asmRegisterRegister(
+ .{ mir_fixes, .andn },
+ mask_alias,
+ if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(ty, rhs_mcv),
+ );
+ try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_alias, mask_alias);
+ }
+ break :result dst_mcv;
+ };
+ return self.finishAir(inst, result, .{ pl_op.operand, extra.lhs, extra.rhs });
}
fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
+ const mod = self.bin_file.comp.module.?;
const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
- _ = ty_pl;
- return self.fail("TODO implement airShuffle for x86_64", .{});
- //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+ const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
+
+ const dst_ty = self.typeOfIndex(inst);
+ const elem_ty = dst_ty.childType(mod);
+ const elem_abi_size: u16 = @intCast(elem_ty.abiSize(mod));
+ const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
+ const lhs_ty = self.typeOf(extra.a);
+ const lhs_abi_size: u32 = @intCast(lhs_ty.abiSize(mod));
+ const rhs_ty = self.typeOf(extra.b);
+ const rhs_abi_size: u32 = @intCast(rhs_ty.abiSize(mod));
+ const max_abi_size = @max(dst_abi_size, lhs_abi_size, rhs_abi_size);
+
+ const ExpectedContents = [32]?i32;
+ var stack align(@max(@alignOf(ExpectedContents), @alignOf(std.heap.StackFallbackAllocator(0)))) =
+ std.heap.stackFallback(@sizeOf(ExpectedContents), self.gpa);
+ const allocator = stack.get();
+
+ const mask_elems = try allocator.alloc(?i32, extra.mask_len);
+ defer allocator.free(mask_elems);
+ for (mask_elems, 0..) |*mask_elem, elem_index| {
+ const mask_elem_val =
+ Value.fromInterned(extra.mask).elemValue(mod, elem_index) catch unreachable;
+ mask_elem.* = if (mask_elem_val.isUndef(mod))
+ null
+ else
+ @intCast(mask_elem_val.toSignedInt(mod));
+ }
+
+ const has_avx = self.hasFeature(.avx);
+ const result = @as(?MCValue, result: {
+ for (mask_elems) |mask_elem| {
+ if (mask_elem) |_| break;
+ } else break :result try self.allocRegOrMem(inst, true);
+
+ for (mask_elems, 0..) |mask_elem, elem_index| {
+ if (mask_elem orelse continue != elem_index) break;
+ } else {
+ const lhs_mcv = try self.resolveInst(extra.a);
+ if (self.reuseOperand(inst, extra.a, 0, lhs_mcv)) break :result lhs_mcv;
+ const dst_mcv = try self.allocRegOrMem(inst, true);
+ try self.genCopy(dst_ty, dst_mcv, lhs_mcv, .{});
+ break :result dst_mcv;
+ }
+
+ for (mask_elems, 0..) |mask_elem, elem_index| {
+ if (~(mask_elem orelse continue) != elem_index) break;
+ } else {
+ const rhs_mcv = try self.resolveInst(extra.b);
+ if (self.reuseOperand(inst, extra.b, 1, rhs_mcv)) break :result rhs_mcv;
+ const dst_mcv = try self.allocRegOrMem(inst, true);
+ try self.genCopy(dst_ty, dst_mcv, rhs_mcv, .{});
+ break :result dst_mcv;
+ }
+
+ for ([_]Mir.Inst.Tag{ .unpckl, .unpckh }) |variant| unpck: {
+ if (elem_abi_size > 8) break :unpck;
+ if (dst_abi_size > @as(u32, if (if (elem_abi_size >= 4)
+ has_avx
+ else
+ self.hasFeature(.avx2)) 32 else 16)) break :unpck;
+
+ var sources = [1]?u1{null} ** 2;
+ for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
+ const mask_elem = maybe_mask_elem orelse continue;
+ const mask_elem_index =
+ math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :unpck;
+ const elem_byte = (elem_index >> 1) * elem_abi_size;
+ if (mask_elem_index * elem_abi_size != (elem_byte & 0b0111) | @as(u4, switch (variant) {
+ .unpckl => 0b0000,
+ .unpckh => 0b1000,
+ else => unreachable,
+ }) | (elem_byte << 1 & 0b10000)) break :unpck;
+
+ const source = @intFromBool(mask_elem < 0);
+ if (sources[elem_index & 0b00001]) |prev_source| {
+ if (source != prev_source) break :unpck;
+ } else sources[elem_index & 0b00001] = source;
+ }
+ if (sources[0] orelse break :unpck == sources[1] orelse break :unpck) break :unpck;
+
+ const operands = [2]Air.Inst.Ref{ extra.a, extra.b };
+ const operand_tys = [2]Type{ lhs_ty, rhs_ty };
+ const lhs_mcv = try self.resolveInst(operands[sources[0].?]);
+ const rhs_mcv = try self.resolveInst(operands[sources[1].?]);
+
+ const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
+ self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv))
+ lhs_mcv
+ else if (has_avx and lhs_mcv.isRegister())
+ .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
+ else
+ try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_alias = registerAlias(dst_reg, max_abi_size);
+
+ const mir_tag: Mir.Inst.FixedTag = if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or
+ (dst_abi_size > 16 and !self.hasFeature(.avx2))) .{ switch (elem_abi_size) {
+ 4 => if (has_avx) .v_ps else ._ps,
+ 8 => if (has_avx) .v_pd else ._pd,
+ else => unreachable,
+ }, variant } else .{ if (has_avx) .vp_ else .p_, switch (variant) {
+ .unpckl => switch (elem_abi_size) {
+ 1 => .unpcklbw,
+ 2 => .unpcklwd,
+ 4 => .unpckldq,
+ 8 => .unpcklqdq,
+ else => unreachable,
+ },
+ .unpckh => switch (elem_abi_size) {
+ 1 => .unpckhbw,
+ 2 => .unpckhwd,
+ 4 => .unpckhdq,
+ 8 => .unpckhqdq,
+ else => unreachable,
+ },
+ else => unreachable,
+ } };
+ if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemory(
+ mir_tag,
+ dst_alias,
+ registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
+ try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)),
+ ) else try self.asmRegisterRegisterRegister(
+ mir_tag,
+ dst_alias,
+ registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
+ ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemory(
+ mir_tag,
+ dst_alias,
+ try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)),
+ ) else try self.asmRegisterRegister(
+ mir_tag,
+ dst_alias,
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
+ );
+ break :result dst_mcv;
+ }
+
+ pshufd: {
+ if (elem_abi_size != 4) break :pshufd;
+ if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :pshufd;
+
+ var control: u8 = 0b00_00_00_00;
+ var sources = [1]?u1{null} ** 1;
+ for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
+ const mask_elem = maybe_mask_elem orelse continue;
+ const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem);
+ if (mask_elem_index & 0b100 != elem_index & 0b100) break :pshufd;
+
+ const source = @intFromBool(mask_elem < 0);
+ if (sources[0]) |prev_source| {
+ if (source != prev_source) break :pshufd;
+ } else sources[(elem_index & 0b010) >> 1] = source;
+
+ const select_bit: u3 = @intCast((elem_index & 0b011) << 1);
+ const select = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit;
+ if (elem_index & 0b100 == 0)
+ control |= select
+ else if (control & @as(u8, 0b11) << select_bit != select) break :pshufd;
+ }
+
+ const operands = [2]Air.Inst.Ref{ extra.a, extra.b };
+ const operand_tys = [2]Type{ lhs_ty, rhs_ty };
+ const src_mcv = try self.resolveInst(operands[sources[0] orelse break :pshufd]);
+
+ const dst_reg = if (src_mcv.isRegister() and
+ self.reuseOperand(inst, operands[sources[0].?], sources[0].?, src_mcv))
+ src_mcv.getReg().?
+ else
+ try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
+ const dst_alias = registerAlias(dst_reg, max_abi_size);
+
+ if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ .{ if (has_avx) .vp_d else .p_d, .shuf },
+ dst_alias,
+ try src_mcv.mem(self, Memory.Size.fromSize(max_abi_size)),
+ Immediate.u(control),
+ ) else try self.asmRegisterRegisterImmediate(
+ .{ if (has_avx) .vp_d else .p_d, .shuf },
+ dst_alias,
+ registerAlias(if (src_mcv.isRegister())
+ src_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(operand_tys[sources[0].?], src_mcv), max_abi_size),
+ Immediate.u(control),
+ );
+ break :result .{ .register = dst_reg };
+ }
+
+ shufps: {
+ if (elem_abi_size != 4) break :shufps;
+ if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufps;
+
+ var control: u8 = 0b00_00_00_00;
+ var sources = [1]?u1{null} ** 2;
+ for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
+ const mask_elem = maybe_mask_elem orelse continue;
+ const mask_elem_index: u3 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem);
+ if (mask_elem_index & 0b100 != elem_index & 0b100) break :shufps;
+
+ const source = @intFromBool(mask_elem < 0);
+ if (sources[(elem_index & 0b010) >> 1]) |prev_source| {
+ if (source != prev_source) break :shufps;
+ } else sources[(elem_index & 0b010) >> 1] = source;
+
+ const select_bit: u3 = @intCast((elem_index & 0b011) << 1);
+ const select = @as(u8, @intCast(mask_elem_index & 0b011)) << select_bit;
+ if (elem_index & 0b100 == 0)
+ control |= select
+ else if (control & @as(u8, 0b11) << select_bit != select) break :shufps;
+ }
+ if (sources[0] orelse break :shufps == sources[1] orelse break :shufps) break :shufps;
+
+ const operands = [2]Air.Inst.Ref{ extra.a, extra.b };
+ const operand_tys = [2]Type{ lhs_ty, rhs_ty };
+ const lhs_mcv = try self.resolveInst(operands[sources[0].?]);
+ const rhs_mcv = try self.resolveInst(operands[sources[1].?]);
+
+ const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
+ self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv))
+ lhs_mcv
+ else if (has_avx and lhs_mcv.isRegister())
+ .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
+ else
+ try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_alias = registerAlias(dst_reg, max_abi_size);
+
+ if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ .{ .v_ps, .shuf },
+ dst_alias,
+ registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
+ try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)),
+ Immediate.u(control),
+ ) else try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_ps, .shuf },
+ dst_alias,
+ registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
+ Immediate.u(control),
+ ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ .{ ._ps, .shuf },
+ dst_alias,
+ try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)),
+ Immediate.u(control),
+ ) else try self.asmRegisterRegisterImmediate(
+ .{ ._ps, .shuf },
+ dst_alias,
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
+ Immediate.u(control),
+ );
+ break :result dst_mcv;
+ }
+
+ shufpd: {
+ if (elem_abi_size != 8) break :shufpd;
+ if (max_abi_size > @as(u32, if (has_avx) 32 else 16)) break :shufpd;
+
+ var control: u4 = 0b0_0_0_0;
+ var sources = [1]?u1{null} ** 2;
+ for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
+ const mask_elem = maybe_mask_elem orelse continue;
+ const mask_elem_index: u2 = @intCast(if (mask_elem < 0) ~mask_elem else mask_elem);
+ if (mask_elem_index & 0b10 != elem_index & 0b10) break :shufpd;
+
+ const source = @intFromBool(mask_elem < 0);
+ if (sources[elem_index & 0b01]) |prev_source| {
+ if (source != prev_source) break :shufpd;
+ } else sources[elem_index & 0b01] = source;
+
+ control |= @as(u4, @intCast(mask_elem_index & 0b01)) << @intCast(elem_index);
+ }
+ if (sources[0] orelse break :shufpd == sources[1] orelse break :shufpd) break :shufpd;
+
+ const operands: [2]Air.Inst.Ref = .{ extra.a, extra.b };
+ const operand_tys: [2]Type = .{ lhs_ty, rhs_ty };
+ const lhs_mcv = try self.resolveInst(operands[sources[0].?]);
+ const rhs_mcv = try self.resolveInst(operands[sources[1].?]);
+
+ const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
+ self.reuseOperand(inst, operands[sources[0].?], sources[0].?, lhs_mcv))
+ lhs_mcv
+ else if (has_avx and lhs_mcv.isRegister())
+ .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
+ else
+ try self.copyToRegisterWithInstTracking(inst, operand_tys[sources[0].?], lhs_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_alias = registerAlias(dst_reg, max_abi_size);
+
+ if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ .{ .v_pd, .shuf },
+ dst_alias,
+ registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
+ try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)),
+ Immediate.u(control),
+ ) else try self.asmRegisterRegisterRegisterImmediate(
+ .{ .v_pd, .shuf },
+ dst_alias,
+ registerAlias(lhs_mcv.getReg() orelse dst_reg, max_abi_size),
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
+ Immediate.u(control),
+ ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ .{ ._pd, .shuf },
+ dst_alias,
+ try rhs_mcv.mem(self, Memory.Size.fromSize(max_abi_size)),
+ Immediate.u(control),
+ ) else try self.asmRegisterRegisterImmediate(
+ .{ ._pd, .shuf },
+ dst_alias,
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(operand_tys[sources[1].?], rhs_mcv), max_abi_size),
+ Immediate.u(control),
+ );
+ break :result dst_mcv;
+ }
+
+ blend: {
+ if (elem_abi_size < 2) break :blend;
+ if (dst_abi_size > @as(u32, if (has_avx) 32 else 16)) break :blend;
+ if (!self.hasFeature(.sse4_1)) break :blend;
+
+ var control: u8 = 0b0_0_0_0_0_0_0_0;
+ for (mask_elems, 0..) |maybe_mask_elem, elem_index| {
+ const mask_elem = maybe_mask_elem orelse continue;
+ const mask_elem_index =
+ math.cast(u4, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blend;
+ if (mask_elem_index != elem_index) break :blend;
+
+ const select = @as(u8, @intFromBool(mask_elem < 0)) << @truncate(elem_index);
+ if (elem_index & 0b1000 == 0)
+ control |= select
+ else if (control & @as(u8, 0b1) << @truncate(elem_index) != select) break :blend;
+ }
+
+ if (!elem_ty.isRuntimeFloat() and self.hasFeature(.avx2)) vpblendd: {
+ const expanded_control = switch (elem_abi_size) {
+ 4 => control,
+ 8 => @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) |
+ @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) |
+ @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) |
+ @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00),
+ else => break :vpblendd,
+ };
+
+ const lhs_mcv = try self.resolveInst(extra.a);
+ const lhs_reg = if (lhs_mcv.isRegister())
+ lhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(dst_ty, lhs_mcv);
+ const lhs_lock = self.register_manager.lockReg(lhs_reg);
+ defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
+
+ const rhs_mcv = try self.resolveInst(extra.b);
+ const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse);
+ if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ .{ .vp_d, .blend },
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(lhs_reg, dst_abi_size),
+ try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)),
+ Immediate.u(expanded_control),
+ ) else try self.asmRegisterRegisterRegisterImmediate(
+ .{ .vp_d, .blend },
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(lhs_reg, dst_abi_size),
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
+ Immediate.u(expanded_control),
+ );
+ break :result .{ .register = dst_reg };
+ }
+
+ if (!elem_ty.isRuntimeFloat() or elem_abi_size == 2) pblendw: {
+ const expanded_control = switch (elem_abi_size) {
+ 2 => control,
+ 4 => if (dst_abi_size <= 16 or
+ @as(u4, @intCast(control >> 4)) == @as(u4, @truncate(control >> 0)))
+ @as(u8, if (control & 0b0001 != 0) 0b00_00_00_11 else 0b00_00_00_00) |
+ @as(u8, if (control & 0b0010 != 0) 0b00_00_11_00 else 0b00_00_00_00) |
+ @as(u8, if (control & 0b0100 != 0) 0b00_11_00_00 else 0b00_00_00_00) |
+ @as(u8, if (control & 0b1000 != 0) 0b11_00_00_00 else 0b00_00_00_00)
+ else
+ break :pblendw,
+ 8 => if (dst_abi_size <= 16 or
+ @as(u2, @intCast(control >> 2)) == @as(u2, @truncate(control >> 0)))
+ @as(u8, if (control & 0b01 != 0) 0b0000_1111 else 0b0000_0000) |
+ @as(u8, if (control & 0b10 != 0) 0b1111_0000 else 0b0000_0000)
+ else
+ break :pblendw,
+ 16 => break :pblendw,
+ else => unreachable,
+ };
+
+ const lhs_mcv = try self.resolveInst(extra.a);
+ const rhs_mcv = try self.resolveInst(extra.b);
+
+ const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
+ self.reuseOperand(inst, extra.a, 0, lhs_mcv))
+ lhs_mcv
+ else if (has_avx and lhs_mcv.isRegister())
+ .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
+ else
+ try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+
+ if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ .{ .vp_w, .blend },
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(if (lhs_mcv.isRegister())
+ lhs_mcv.getReg().?
+ else
+ dst_reg, dst_abi_size),
+ try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)),
+ Immediate.u(expanded_control),
+ ) else try self.asmRegisterRegisterRegisterImmediate(
+ .{ .vp_w, .blend },
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(if (lhs_mcv.isRegister())
+ lhs_mcv.getReg().?
+ else
+ dst_reg, dst_abi_size),
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
+ Immediate.u(expanded_control),
+ ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ .{ .p_w, .blend },
+ registerAlias(dst_reg, dst_abi_size),
+ try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)),
+ Immediate.u(expanded_control),
+ ) else try self.asmRegisterRegisterImmediate(
+ .{ .p_w, .blend },
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
+ Immediate.u(expanded_control),
+ );
+ break :result .{ .register = dst_reg };
+ }
+
+ const expanded_control = switch (elem_abi_size) {
+ 4, 8 => control,
+ 16 => @as(u4, if (control & 0b01 != 0) 0b00_11 else 0b00_00) |
+ @as(u4, if (control & 0b10 != 0) 0b11_00 else 0b00_00),
+ else => unreachable,
+ };
+
+ const lhs_mcv = try self.resolveInst(extra.a);
+ const rhs_mcv = try self.resolveInst(extra.b);
+
+ const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
+ self.reuseOperand(inst, extra.a, 0, lhs_mcv))
+ lhs_mcv
+ else if (has_avx and lhs_mcv.isRegister())
+ .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
+ else
+ try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+
+ if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate(
+ switch (elem_abi_size) {
+ 4 => .{ .v_ps, .blend },
+ 8, 16 => .{ .v_pd, .blend },
+ else => unreachable,
+ },
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(if (lhs_mcv.isRegister())
+ lhs_mcv.getReg().?
+ else
+ dst_reg, dst_abi_size),
+ try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)),
+ Immediate.u(expanded_control),
+ ) else try self.asmRegisterRegisterRegisterImmediate(
+ switch (elem_abi_size) {
+ 4 => .{ .v_ps, .blend },
+ 8, 16 => .{ .v_pd, .blend },
+ else => unreachable,
+ },
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(if (lhs_mcv.isRegister())
+ lhs_mcv.getReg().?
+ else
+ dst_reg, dst_abi_size),
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
+ Immediate.u(expanded_control),
+ ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryImmediate(
+ switch (elem_abi_size) {
+ 4 => .{ ._ps, .blend },
+ 8, 16 => .{ ._pd, .blend },
+ else => unreachable,
+ },
+ registerAlias(dst_reg, dst_abi_size),
+ try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)),
+ Immediate.u(expanded_control),
+ ) else try self.asmRegisterRegisterImmediate(
+ switch (elem_abi_size) {
+ 4 => .{ ._ps, .blend },
+ 8, 16 => .{ ._pd, .blend },
+ else => unreachable,
+ },
+ registerAlias(dst_reg, dst_abi_size),
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
+ Immediate.u(expanded_control),
+ );
+ break :result .{ .register = dst_reg };
+ }
+
+ blendv: {
+ if (dst_abi_size > @as(u32, if (if (elem_abi_size >= 4)
+ has_avx
+ else
+ self.hasFeature(.avx2)) 32 else 16)) break :blendv;
+
+ const select_mask_elem_ty = try mod.intType(.unsigned, elem_abi_size * 8);
+ const select_mask_ty = try mod.vectorType(.{
+ .len = @intCast(mask_elems.len),
+ .child = select_mask_elem_ty.toIntern(),
+ });
+ var select_mask_elems: [32]InternPool.Index = undefined;
+ for (
+ select_mask_elems[0..mask_elems.len],
+ mask_elems,
+ 0..,
+ ) |*select_mask_elem, maybe_mask_elem, elem_index| {
+ const mask_elem = maybe_mask_elem orelse continue;
+ const mask_elem_index =
+ math.cast(u5, if (mask_elem < 0) ~mask_elem else mask_elem) orelse break :blendv;
+ if (mask_elem_index != elem_index) break :blendv;
+
+ select_mask_elem.* = (if (mask_elem < 0)
+ try select_mask_elem_ty.maxIntScalar(mod, select_mask_elem_ty)
+ else
+ try select_mask_elem_ty.minIntScalar(mod, select_mask_elem_ty)).toIntern();
+ }
+ const select_mask_mcv = try self.genTypedValue(.{
+ .ty = select_mask_ty,
+ .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{
+ .ty = select_mask_ty.toIntern(),
+ .storage = .{ .elems = select_mask_elems[0..mask_elems.len] },
+ } })),
+ });
+
+ if (self.hasFeature(.sse4_1)) {
+ const mir_tag: Mir.Inst.FixedTag = .{
+ if ((elem_abi_size >= 4 and elem_ty.isRuntimeFloat()) or
+ (dst_abi_size > 16 and !self.hasFeature(.avx2))) switch (elem_abi_size) {
+ 4 => if (has_avx) .v_ps else ._ps,
+ 8 => if (has_avx) .v_pd else ._pd,
+ else => unreachable,
+ } else if (has_avx) .vp_b else .p_b,
+ .blendv,
+ };
+
+ const select_mask_reg = if (!has_avx) reg: {
+ try self.register_manager.getKnownReg(.xmm0, null);
+ try self.genSetReg(.xmm0, select_mask_elem_ty, select_mask_mcv, .{});
+ break :reg .xmm0;
+ } else try self.copyToTmpRegister(select_mask_ty, select_mask_mcv);
+ const select_mask_alias = registerAlias(select_mask_reg, dst_abi_size);
+ const select_mask_lock = self.register_manager.lockRegAssumeUnused(select_mask_reg);
+ defer self.register_manager.unlockReg(select_mask_lock);
+
+ const lhs_mcv = try self.resolveInst(extra.a);
+ const rhs_mcv = try self.resolveInst(extra.b);
+
+ const dst_mcv: MCValue = if (lhs_mcv.isRegister() and
+ self.reuseOperand(inst, extra.a, 0, lhs_mcv))
+ lhs_mcv
+ else if (has_avx and lhs_mcv.isRegister())
+ .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }
+ else
+ try self.copyToRegisterWithInstTracking(inst, dst_ty, lhs_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_alias = registerAlias(dst_reg, dst_abi_size);
+
+ if (has_avx) if (rhs_mcv.isMemory()) try self.asmRegisterRegisterMemoryRegister(
+ mir_tag,
+ dst_alias,
+ if (lhs_mcv.isRegister())
+ registerAlias(lhs_mcv.getReg().?, dst_abi_size)
+ else
+ dst_alias,
+ try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)),
+ select_mask_alias,
+ ) else try self.asmRegisterRegisterRegisterRegister(
+ mir_tag,
+ dst_alias,
+ if (lhs_mcv.isRegister())
+ registerAlias(lhs_mcv.getReg().?, dst_abi_size)
+ else
+ dst_alias,
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
+ select_mask_alias,
+ ) else if (rhs_mcv.isMemory()) try self.asmRegisterMemoryRegister(
+ mir_tag,
+ dst_alias,
+ try rhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)),
+ select_mask_alias,
+ ) else try self.asmRegisterRegisterRegister(
+ mir_tag,
+ dst_alias,
+ registerAlias(if (rhs_mcv.isRegister())
+ rhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(dst_ty, rhs_mcv), dst_abi_size),
+ select_mask_alias,
+ );
+ break :result dst_mcv;
+ }
+
+ const lhs_mcv = try self.resolveInst(extra.a);
+ const rhs_mcv = try self.resolveInst(extra.b);
+
+ const dst_mcv: MCValue = if (rhs_mcv.isRegister() and
+ self.reuseOperand(inst, extra.b, 1, rhs_mcv))
+ rhs_mcv
+ else
+ try self.copyToRegisterWithInstTracking(inst, dst_ty, rhs_mcv);
+ const dst_reg = dst_mcv.getReg().?;
+ const dst_alias = registerAlias(dst_reg, dst_abi_size);
+
+ const mask_reg = try self.copyToTmpRegister(select_mask_ty, select_mask_mcv);
+ const mask_alias = registerAlias(mask_reg, dst_abi_size);
+ const mask_lock = self.register_manager.lockRegAssumeUnused(mask_reg);
+ defer self.register_manager.unlockReg(mask_lock);
+
+ const mir_fixes: Mir.Inst.Fixes = if (elem_ty.isRuntimeFloat())
+ switch (elem_ty.floatBits(self.target.*)) {
+ 16, 80, 128 => .p_,
+ 32 => ._ps,
+ 64 => ._pd,
+ else => unreachable,
+ }
+ else
+ .p_;
+ try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_alias, mask_alias);
+ if (lhs_mcv.isMemory()) try self.asmRegisterMemory(
+ .{ mir_fixes, .andn },
+ mask_alias,
+ try lhs_mcv.mem(self, Memory.Size.fromSize(dst_abi_size)),
+ ) else try self.asmRegisterRegister(
+ .{ mir_fixes, .andn },
+ mask_alias,
+ if (lhs_mcv.isRegister())
+ lhs_mcv.getReg().?
+ else
+ try self.copyToTmpRegister(dst_ty, lhs_mcv),
+ );
+ try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_alias, mask_alias);
+ break :result dst_mcv;
+ }
+
+ pshufb: {
+ if (max_abi_size > 16) break :pshufb;
+ if (!self.hasFeature(.ssse3)) break :pshufb;
+
+ const temp_regs =
+ try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.sse);
+ const temp_locks = self.register_manager.lockRegsAssumeUnused(2, temp_regs);
+ defer for (temp_locks) |lock| self.register_manager.unlockReg(lock);
+
+ const lhs_temp_alias = registerAlias(temp_regs[0], max_abi_size);
+ try self.genSetReg(temp_regs[0], lhs_ty, .{ .air_ref = extra.a }, .{});
+
+ const rhs_temp_alias = registerAlias(temp_regs[1], max_abi_size);
+ try self.genSetReg(temp_regs[1], rhs_ty, .{ .air_ref = extra.b }, .{});
+
+ var lhs_mask_elems: [16]InternPool.Index = undefined;
+ for (lhs_mask_elems[0..max_abi_size], 0..) |*lhs_mask_elem, byte_index| {
+ const elem_index = byte_index / elem_abi_size;
+ lhs_mask_elem.* = try mod.intern(.{ .int = .{
+ .ty = .u8_type,
+ .storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: {
+ const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000;
+ if (mask_elem < 0) break :elem 0b1_00_00000;
+ const mask_elem_index: u31 = @intCast(mask_elem);
+ const byte_off: u32 = @intCast(byte_index % elem_abi_size);
+ break :elem @intCast(mask_elem_index * elem_abi_size + byte_off);
+ } },
+ } });
+ }
+ const lhs_mask_ty = try mod.vectorType(.{ .len = max_abi_size, .child = .u8_type });
+ const lhs_mask_mcv = try self.genTypedValue(.{
+ .ty = lhs_mask_ty,
+ .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{
+ .ty = lhs_mask_ty.toIntern(),
+ .storage = .{ .elems = lhs_mask_elems[0..max_abi_size] },
+ } })),
+ });
+ const lhs_mask_mem: Memory = .{
+ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, lhs_mask_mcv.address()) },
+ .mod = .{ .rm = .{ .size = Memory.Size.fromSize(@max(max_abi_size, 16)) } },
+ };
+ if (has_avx) try self.asmRegisterRegisterMemory(
+ .{ .vp_b, .shuf },
+ lhs_temp_alias,
+ lhs_temp_alias,
+ lhs_mask_mem,
+ ) else try self.asmRegisterMemory(
+ .{ .p_b, .shuf },
+ lhs_temp_alias,
+ lhs_mask_mem,
+ );
+
+ var rhs_mask_elems: [16]InternPool.Index = undefined;
+ for (rhs_mask_elems[0..max_abi_size], 0..) |*rhs_mask_elem, byte_index| {
+ const elem_index = byte_index / elem_abi_size;
+ rhs_mask_elem.* = try mod.intern(.{ .int = .{
+ .ty = .u8_type,
+ .storage = .{ .u64 = if (elem_index >= mask_elems.len) 0b1_00_00000 else elem: {
+ const mask_elem = mask_elems[elem_index] orelse break :elem 0b1_00_00000;
+ if (mask_elem >= 0) break :elem 0b1_00_00000;
+ const mask_elem_index: u31 = @intCast(~mask_elem);
+ const byte_off: u32 = @intCast(byte_index % elem_abi_size);
+ break :elem @intCast(mask_elem_index * elem_abi_size + byte_off);
+ } },
+ } });
+ }
+ const rhs_mask_ty = try mod.vectorType(.{ .len = max_abi_size, .child = .u8_type });
+ const rhs_mask_mcv = try self.genTypedValue(.{
+ .ty = rhs_mask_ty,
+ .val = Value.fromInterned(try mod.intern(.{ .aggregate = .{
+ .ty = rhs_mask_ty.toIntern(),
+ .storage = .{ .elems = rhs_mask_elems[0..max_abi_size] },
+ } })),
+ });
+ const rhs_mask_mem: Memory = .{
+ .base = .{ .reg = try self.copyToTmpRegister(Type.usize, rhs_mask_mcv.address()) },
+ .mod = .{ .rm = .{ .size = Memory.Size.fromSize(@max(max_abi_size, 16)) } },
+ };
+ if (has_avx) try self.asmRegisterRegisterMemory(
+ .{ .vp_b, .shuf },
+ rhs_temp_alias,
+ rhs_temp_alias,
+ rhs_mask_mem,
+ ) else try self.asmRegisterMemory(
+ .{ .p_b, .shuf },
+ rhs_temp_alias,
+ rhs_mask_mem,
+ );
+
+ if (has_avx) try self.asmRegisterRegisterRegister(
+ .{ switch (elem_ty.zigTypeTag(mod)) {
+ else => break :result null,
+ .Int => .vp_,
+ .Float => switch (elem_ty.floatBits(self.target.*)) {
+ 32 => .v_ps,
+ 64 => .v_pd,
+ 16, 80, 128 => break :result null,
+ else => unreachable,
+ },
+ }, .@"or" },
+ lhs_temp_alias,
+ lhs_temp_alias,
+ rhs_temp_alias,
+ ) else try self.asmRegisterRegister(
+ .{ switch (elem_ty.zigTypeTag(mod)) {
+ else => break :result null,
+ .Int => .p_,
+ .Float => switch (elem_ty.floatBits(self.target.*)) {
+ 32 => ._ps,
+ 64 => ._pd,
+ 16, 80, 128 => break :result null,
+ else => unreachable,
+ },
+ }, .@"or" },
+ lhs_temp_alias,
+ rhs_temp_alias,
+ );
+ break :result .{ .register = temp_regs[0] };
+ }
+
+ break :result null;
+ }) orelse return self.fail("TODO implement airShuffle from {} and {} to {} with {}", .{
+ lhs_ty.fmt(mod), rhs_ty.fmt(mod), dst_ty.fmt(mod),
+ Value.fromInterned(extra.mask).fmtValue(
+ Type.fromInterned(mod.intern_pool.typeOf(extra.mask)),
+ mod,
+ ),
+ });
+ return self.finishAir(inst, result, .{ extra.a, extra.b, .none });
}
fn airReduce(self: *Self, inst: Air.Inst.Index) !void {
@@ -16751,7 +18083,7 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void {
},
.Array, .Vector => {
const elem_ty = result_ty.childType(mod);
- if (result_ty.isVector(mod) and elem_ty.bitSize(mod) == 1) {
+ if (result_ty.isVector(mod) and elem_ty.toIntern() == .bool_type) {
const result_size: u32 = @intCast(result_ty.abiSize(mod));
const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp);
try self.asmRegisterRegister(
@@ -17801,7 +19133,7 @@ fn splitType(self: *Self, ty: Type) ![2]Type {
else => unreachable,
},
.float => Type.f32,
- .float_combine => try mod.vectorType(.{ .len = 2, .child = .f32_type }),
+ .float_combine => try mod.arrayType(.{ .len = 2, .child = .f32_type }),
.sse => Type.f64,
else => break,
};
diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig
index 8b91a20a4f..c4bf71e233 100644
--- a/src/arch/x86_64/Encoding.zig
+++ b/src/arch/x86_64/Encoding.zig
@@ -324,16 +324,19 @@ pub const Mnemonic = enum {
// SSE3
movddup, movshdup, movsldup,
// SSSE3
- pabsb, pabsd, pabsw, palignr,
+ pabsb, pabsd, pabsw, palignr, pshufb,
// SSE4.1
blendpd, blendps, blendvpd, blendvps,
extractps,
insertps,
packusdw,
+ pblendvb, pblendw,
pcmpeqq,
pextrb, pextrd, pextrq,
pinsrb, pinsrd, pinsrq,
pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw,
+ pmovsxbd, pmovsxbq, pmovsxbw, pmovsxdq, pmovsxwd, pmovsxwq,
+ pmovzxbd, pmovzxbq, pmovzxbw, pmovzxdq, pmovzxwd, pmovzxwq,
pmulld,
roundpd, roundps, roundsd, roundss,
// SSE4.2
@@ -377,7 +380,8 @@ pub const Mnemonic = enum {
vpabsb, vpabsd, vpabsw,
vpackssdw, vpacksswb, vpackusdw, vpackuswb,
vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw,
- vpalignr, vpand, vpandn, vpclmulqdq,
+ vpalignr, vpand, vpandn,
+ vpblendvb, vpblendw, vpclmulqdq,
vpcmpeqb, vpcmpeqd, vpcmpeqq, vpcmpeqw,
vpcmpgtb, vpcmpgtd, vpcmpgtq, vpcmpgtw,
vpextrb, vpextrd, vpextrq, vpextrw,
@@ -385,9 +389,11 @@ pub const Mnemonic = enum {
vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw,
vpmovmskb,
+ vpmovsxbd, vpmovsxbq, vpmovsxbw, vpmovsxdq, vpmovsxwd, vpmovsxwq,
+ vpmovzxbd, vpmovzxbq, vpmovzxbw, vpmovzxdq, vpmovzxwd, vpmovzxwq,
vpmulhw, vpmulld, vpmullw,
vpor,
- vpshufd, vpshufhw, vpshuflw,
+ vpshufb, vpshufd, vpshufhw, vpshuflw,
vpslld, vpslldq, vpsllq, vpsllw,
vpsrad, vpsraq, vpsraw,
vpsrld, vpsrldq, vpsrlq, vpsrlw,
@@ -409,7 +415,8 @@ pub const Mnemonic = enum {
vfmadd132sd, vfmadd213sd, vfmadd231sd,
vfmadd132ss, vfmadd213ss, vfmadd231ss,
// AVX2
- vpbroadcastb, vpbroadcastd, vpbroadcasti128, vpbroadcastq, vpbroadcastw,
+ vbroadcasti128, vpbroadcastb, vpbroadcastd, vpbroadcastq, vpbroadcastw,
+ vextracti128, vinserti128, vpblendd,
// zig fmt: on
};
diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig
index 4e9c37e5aa..13b97b551a 100644
--- a/src/arch/x86_64/Lower.zig
+++ b/src/arch/x86_64/Lower.zig
@@ -477,8 +477,9 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.rri_s, .rri_u => inst.data.rri.fixes,
.ri_s, .ri_u => inst.data.ri.fixes,
.ri64, .rm, .rmi_s, .mr => inst.data.rx.fixes,
- .mrr, .rrm => inst.data.rrx.fixes,
+ .mrr, .rrm, .rmr => inst.data.rrx.fixes,
.rmi, .mri => inst.data.rix.fixes,
+ .rrmr => inst.data.rrrx.fixes,
.rrmi => inst.data.rrix.fixes,
.mi_u, .mi_s => inst.data.x.fixes,
.m => inst.data.x.fixes,
@@ -565,6 +566,11 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.{ .reg = inst.data.rx.r1 },
.{ .mem = lower.mem(inst.data.rx.payload) },
},
+ .rmr => &.{
+ .{ .reg = inst.data.rrx.r1 },
+ .{ .mem = lower.mem(inst.data.rrx.payload) },
+ .{ .reg = inst.data.rrx.r2 },
+ },
.rmi => &.{
.{ .reg = inst.data.rix.r1 },
.{ .mem = lower.mem(inst.data.rix.payload) },
@@ -597,6 +603,12 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
.{ .reg = inst.data.rrx.r2 },
.{ .mem = lower.mem(inst.data.rrx.payload) },
},
+ .rrmr => &.{
+ .{ .reg = inst.data.rrrx.r1 },
+ .{ .reg = inst.data.rrrx.r2 },
+ .{ .mem = lower.mem(inst.data.rrrx.payload) },
+ .{ .reg = inst.data.rrrx.r3 },
+ },
.rrmi => &.{
.{ .reg = inst.data.rrix.r1 },
.{ .reg = inst.data.rrix.r2 },
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index dea9bb50cb..6cccb34b3e 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -230,6 +230,8 @@ pub const Inst = struct {
v_d,
/// VEX-Encoded ___ QuadWord
v_q,
+ /// VEX-Encoded ___ Integer Data
+ v_i128,
/// VEX-Encoded Packed ___
vp_,
/// VEX-Encoded Packed ___ Byte
@@ -242,8 +244,6 @@ pub const Inst = struct {
vp_q,
/// VEX-Encoded Packed ___ Double Quadword
vp_dq,
- /// VEX-Encoded Packed ___ Integer Data
- vp_i128,
/// VEX-Encoded ___ Scalar Single-Precision Values
v_ss,
/// VEX-Encoded ___ Packed Single-Precision Values
@@ -654,10 +654,19 @@ pub const Inst = struct {
/// Variable blend scalar double-precision floating-point values
blendv,
/// Extract packed floating-point values
+ /// Extract packed integer values
extract,
/// Insert scalar single-precision floating-point value
/// Insert packed floating-point values
insert,
+ /// Packed move with sign extend
+ movsxb,
+ movsxd,
+ movsxw,
+ /// Packed move with zero extend
+ movzxb,
+ movzxd,
+ movzxw,
/// Round packed single-precision floating-point values
/// Round scalar single-precision floating-point value
/// Round packed double-precision floating-point values
@@ -688,6 +697,7 @@ pub const Inst = struct {
sha256rnds2,
/// Load with broadcast floating-point data
+ /// Load integer and broadcast
broadcast,
/// Convert 16-bit floating-point values to single-precision floating-point values
@@ -762,8 +772,11 @@ pub const Inst = struct {
/// Uses `imm` payload.
rel,
/// Register, memory operands.
- /// Uses `rx` payload.
+ /// Uses `rx` payload with extra data of type `Memory`.
rm,
+ /// Register, memory, register operands.
+ /// Uses `rrx` payload with extra data of type `Memory`.
+ rmr,
/// Register, memory, immediate (word) operands.
/// Uses `rix` payload with extra data of type `Memory`.
rmi,
@@ -776,6 +789,9 @@ pub const Inst = struct {
/// Register, register, memory.
/// Uses `rrix` payload with extra data of type `Memory`.
rrm,
+ /// Register, register, memory, register.
+ /// Uses `rrrx` payload with extra data of type `Memory`.
+ rrmr,
/// Register, register, memory, immediate (byte) operands.
/// Uses `rrix` payload with extra data of type `Memory`.
rrmi,
@@ -953,6 +969,14 @@ pub const Inst = struct {
r2: Register,
payload: u32,
},
+ /// Register, register, register, followed by Custom payload found in extra.
+ rrrx: struct {
+ fixes: Fixes = ._,
+ r1: Register,
+ r2: Register,
+ r3: Register,
+ payload: u32,
+ },
/// Register, byte immediate, followed by Custom payload found in extra.
rix: struct {
fixes: Fixes = ._,
diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig
index 545e6b23ce..d4a7dcafe7 100644
--- a/src/arch/x86_64/encodings.zig
+++ b/src/arch/x86_64/encodings.zig
@@ -1185,6 +1185,8 @@ pub const table = [_]Entry{
.{ .palignr, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0f }, 0, .none, .ssse3 },
+ .{ .pshufb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .none, .ssse3 },
+
// SSE4.1
.{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 },
@@ -1202,6 +1204,11 @@ pub const table = [_]Entry{
.{ .packusdw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x2b }, 0, .none, .sse4_1 },
+ .{ .pblendvb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 },
+ .{ .pblendvb, .rm, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x10 }, 0, .none, .sse4_1 },
+
+ .{ .pblendw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0e }, 0, .none, .sse4_1 },
+
.{ .pcmpeqq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x29 }, 0, .none, .sse4_1 },
.{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 },
@@ -1228,6 +1235,20 @@ pub const table = [_]Entry{
.{ .pminud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .none, .sse4_1 },
+ .{ .pmovsxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x20 }, 0, .none, .sse4_1 },
+ .{ .pmovsxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x21 }, 0, .none, .sse4_1 },
+ .{ .pmovsxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x22 }, 0, .none, .sse4_1 },
+ .{ .pmovsxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x23 }, 0, .none, .sse4_1 },
+ .{ .pmovsxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x24 }, 0, .none, .sse4_1 },
+ .{ .pmovsxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x25 }, 0, .none, .sse4_1 },
+
+ .{ .pmovzxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x30 }, 0, .none, .sse4_1 },
+ .{ .pmovzxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x31 }, 0, .none, .sse4_1 },
+ .{ .pmovzxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x32 }, 0, .none, .sse4_1 },
+ .{ .pmovzxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x33 }, 0, .none, .sse4_1 },
+ .{ .pmovzxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x34 }, 0, .none, .sse4_1 },
+ .{ .pmovzxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x35 }, 0, .none, .sse4_1 },
+
.{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 },
.{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 },
@@ -1528,6 +1549,10 @@ pub const table = [_]Entry{
.{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx },
+ .{ .vpblendvb, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4c }, 0, .vex_128_w0, .avx },
+
+ .{ .vpblendw, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0e }, 0, .vex_128_wig, .avx },
+
.{ .vpclmulqdq, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x44 }, 0, .vex_128_wig, .@"pclmul avx" },
.{ .vpcmpeqb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_128_wig, .avx },
@@ -1576,6 +1601,20 @@ pub const table = [_]Entry{
.{ .vpmovmskb, .rm, &.{ .r32, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_128_wig, .avx },
.{ .vpmovmskb, .rm, &.{ .r64, .xmm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovsxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x20 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovsxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x21 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovsxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x22 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovsxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x23 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovsxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x24 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovsxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x25 }, 0, .vex_128_wig, .avx },
+
+ .{ .vpmovzxbw, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x30 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovzxbd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x31 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovzxbq, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x32 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovzxwd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x33 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovzxwq, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x34 }, 0, .vex_128_wig, .avx },
+ .{ .vpmovzxdq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x35 }, 0, .vex_128_wig, .avx },
+
.{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx },
.{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx },
@@ -1584,6 +1623,8 @@ pub const table = [_]Entry{
.{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx },
+ .{ .vpshufb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_128_wig, .avx },
+
.{ .vpshufd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x70 }, 0, .vex_128_wig, .avx },
.{ .vpshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .vex_128_wig, .avx },
@@ -1728,6 +1769,10 @@ pub const table = [_]Entry{
.{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 },
.{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 },
+ .{ .vextracti128, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x39 }, 0, .vex_256_w0, .avx2 },
+
+ .{ .vinserti128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x38 }, 0, .vex_256_w0, .avx2 },
+
.{ .vpabsb, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1c }, 0, .vex_256_wig, .avx2 },
.{ .vpabsd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1e }, 0, .vex_256_wig, .avx2 },
.{ .vpabsw, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x1d }, 0, .vex_256_wig, .avx2 },
@@ -1756,6 +1801,13 @@ pub const table = [_]Entry{
.{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 },
+ .{ .vpblendd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x02 }, 0, .vex_128_w0, .avx2 },
+ .{ .vpblendd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x02 }, 0, .vex_256_w0, .avx2 },
+
+ .{ .vpblendvb, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4c }, 0, .vex_256_w0, .avx2 },
+
+ .{ .vpblendw, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0e }, 0, .vex_256_wig, .avx2 },
+
.{ .vpbroadcastb, .rm, &.{ .xmm, .xmm_m8 }, &.{ 0x66, 0x0f, 0x38, 0x78 }, 0, .vex_128_w0, .avx2 },
.{ .vpbroadcastb, .rm, &.{ .ymm, .xmm_m8 }, &.{ 0x66, 0x0f, 0x38, 0x78 }, 0, .vex_256_w0, .avx2 },
.{ .vpbroadcastw, .rm, &.{ .xmm, .xmm_m16 }, &.{ 0x66, 0x0f, 0x38, 0x79 }, 0, .vex_128_w0, .avx2 },
@@ -1764,7 +1816,7 @@ pub const table = [_]Entry{
.{ .vpbroadcastd, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x58 }, 0, .vex_256_w0, .avx2 },
.{ .vpbroadcastq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_128_w0, .avx2 },
.{ .vpbroadcastq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x59 }, 0, .vex_256_w0, .avx2 },
- .{ .vpbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 },
+ .{ .vbroadcasti128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x5a }, 0, .vex_256_w0, .avx2 },
.{ .vpcmpeqb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x74 }, 0, .vex_256_wig, .avx2 },
.{ .vpcmpeqw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x75 }, 0, .vex_256_wig, .avx2 },
@@ -1799,6 +1851,20 @@ pub const table = [_]Entry{
.{ .vpmovmskb, .rm, &.{ .r32, .ymm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_256_wig, .avx2 },
.{ .vpmovmskb, .rm, &.{ .r64, .ymm }, &.{ 0x66, 0x0f, 0xd7 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovsxbw, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x20 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovsxbd, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x21 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovsxbq, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x22 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovsxwd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x23 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovsxwq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x24 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovsxdq, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x25 }, 0, .vex_256_wig, .avx2 },
+
+ .{ .vpmovzxbw, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x30 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovzxbd, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x31 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovzxbq, .rm, &.{ .ymm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x32 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovzxwd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x33 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovzxwq, .rm, &.{ .ymm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x34 }, 0, .vex_256_wig, .avx2 },
+ .{ .vpmovzxdq, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x35 }, 0, .vex_256_wig, .avx2 },
+
.{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx2 },
.{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx2 },
@@ -1807,6 +1873,7 @@ pub const table = [_]Entry{
.{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 },
+ .{ .vpshufb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x00 }, 0, .vex_256_wig, .avx2 },
.{ .vpshufd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 },
.{ .vpshufhw, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .vex_256_wig, .avx2 },
diff --git a/src/codegen.zig b/src/codegen.zig
index 118bab4be9..7bcba80065 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -405,7 +405,7 @@ pub fn generateSymbol(
.vector_type => |vector_type| {
const abi_size = math.cast(usize, typed_value.ty.abiSize(mod)) orelse
return error.Overflow;
- if (Type.fromInterned(vector_type.child).bitSize(mod) == 1) {
+ if (vector_type.child == .bool_type) {
const bytes = try code.addManyAsSlice(abi_size);
@memset(bytes, 0xaa);
var index: usize = 0;
@@ -443,37 +443,34 @@ pub fn generateSymbol(
},
}) byte.* |= mask else byte.* &= ~mask;
}
- } else switch (aggregate.storage) {
- .bytes => |bytes| try code.appendSlice(bytes),
- .elems, .repeated_elem => {
- var index: u64 = 0;
- while (index < vector_type.len) : (index += 1) {
- switch (try generateSymbol(bin_file, src_loc, .{
- .ty = Type.fromInterned(vector_type.child),
- .val = Value.fromInterned(switch (aggregate.storage) {
- .bytes => unreachable,
- .elems => |elems| elems[
- math.cast(usize, index) orelse return error.Overflow
- ],
- .repeated_elem => |elem| elem,
- }),
- }, code, debug_output, reloc_info)) {
- .ok => {},
- .fail => |em| return .{ .fail = em },
+ } else {
+ switch (aggregate.storage) {
+ .bytes => |bytes| try code.appendSlice(bytes),
+ .elems, .repeated_elem => {
+ var index: u64 = 0;
+ while (index < vector_type.len) : (index += 1) {
+ switch (try generateSymbol(bin_file, src_loc, .{
+ .ty = Type.fromInterned(vector_type.child),
+ .val = Value.fromInterned(switch (aggregate.storage) {
+ .bytes => unreachable,
+ .elems => |elems| elems[
+ math.cast(usize, index) orelse return error.Overflow
+ ],
+ .repeated_elem => |elem| elem,
+ }),
+ }, code, debug_output, reloc_info)) {
+ .ok => {},
+ .fail => |em| return .{ .fail = em },
+ }
}
- }
- },
- }
+ },
+ }
- const padding = abi_size - (math.cast(usize, math.divCeil(
- u64,
- Type.fromInterned(vector_type.child).bitSize(mod) * vector_type.len,
- 8,
- ) catch |err| switch (err) {
- error.DivisionByZero => unreachable,
- else => |e| return e,
- }) orelse return error.Overflow);
- if (padding > 0) try code.appendNTimes(0, padding);
+ const padding = abi_size -
+ (math.cast(usize, Type.fromInterned(vector_type.child).abiSize(mod) * vector_type.len) orelse
+ return error.Overflow);
+ if (padding > 0) try code.appendNTimes(0, padding);
+ }
},
.anon_struct_type => |tuple| {
const struct_begin = code.items.len;
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index cf372ff5ef..0977acf7fe 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -4140,9 +4140,7 @@ fn airCmpOp(
if (need_cast) try writer.writeAll("(void*)");
try f.writeCValue(writer, lhs, .Other);
try v.elem(f, writer);
- try writer.writeByte(' ');
try writer.writeAll(compareOperatorC(operator));
- try writer.writeByte(' ');
if (need_cast) try writer.writeAll("(void*)");
try f.writeCValue(writer, rhs, .Other);
try v.elem(f, writer);
@@ -4181,41 +4179,28 @@ fn airEquality(
const writer = f.object.writer();
const inst_ty = f.typeOfIndex(inst);
const local = try f.allocLocal(inst, inst_ty);
+ const a = try Assignment.start(f, writer, inst_ty);
try f.writeCValue(writer, local, .Other);
- try writer.writeAll(" = ");
+ try a.assign(f, writer);
if (operand_ty.zigTypeTag(mod) == .Optional and !operand_ty.optionalReprIsPayload(mod)) {
- // (A && B) || (C && (A == B))
- // A = lhs.is_null ; B = rhs.is_null ; C = rhs.payload == lhs.payload
-
- switch (operator) {
- .eq => {},
- .neq => try writer.writeByte('!'),
- else => unreachable,
- }
- try writer.writeAll("((");
- try f.writeCValue(writer, lhs, .Other);
- try writer.writeAll(".is_null && ");
- try f.writeCValue(writer, rhs, .Other);
- try writer.writeAll(".is_null) || (");
- try f.writeCValue(writer, lhs, .Other);
- try writer.writeAll(".payload == ");
- try f.writeCValue(writer, rhs, .Other);
- try writer.writeAll(".payload && ");
+ try f.writeCValueMember(writer, lhs, .{ .identifier = "is_null" });
+ try writer.writeAll(" || ");
+ try f.writeCValueMember(writer, rhs, .{ .identifier = "is_null" });
+ try writer.writeAll(" ? ");
+ try f.writeCValueMember(writer, lhs, .{ .identifier = "is_null" });
+ try writer.writeAll(compareOperatorC(operator));
+ try f.writeCValueMember(writer, rhs, .{ .identifier = "is_null" });
+ try writer.writeAll(" : ");
+ try f.writeCValueMember(writer, lhs, .{ .identifier = "payload" });
+ try writer.writeAll(compareOperatorC(operator));
+ try f.writeCValueMember(writer, rhs, .{ .identifier = "payload" });
+ } else {
try f.writeCValue(writer, lhs, .Other);
- try writer.writeAll(".is_null == ");
+ try writer.writeAll(compareOperatorC(operator));
try f.writeCValue(writer, rhs, .Other);
- try writer.writeAll(".is_null));\n");
-
- return local;
}
-
- try f.writeCValue(writer, lhs, .Other);
- try writer.writeByte(' ');
- try writer.writeAll(compareOperatorC(operator));
- try writer.writeByte(' ');
- try f.writeCValue(writer, rhs, .Other);
- try writer.writeAll(";\n");
+ try a.end(f, writer);
return local;
}
@@ -6109,41 +6094,48 @@ fn airFloatCast(f: *Function, inst: Air.Inst.Index) !CValue {
const ty_op = f.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
const inst_ty = f.typeOfIndex(inst);
+ const inst_scalar_ty = inst_ty.scalarType(mod);
const operand = try f.resolveInst(ty_op.operand);
try reap(f, inst, &.{ty_op.operand});
const operand_ty = f.typeOf(ty_op.operand);
+ const scalar_ty = operand_ty.scalarType(mod);
const target = f.object.dg.module.getTarget();
- const operation = if (inst_ty.isRuntimeFloat() and operand_ty.isRuntimeFloat())
- if (inst_ty.floatBits(target) < operand_ty.floatBits(target)) "trunc" else "extend"
- else if (inst_ty.isInt(mod) and operand_ty.isRuntimeFloat())
- if (inst_ty.isSignedInt(mod)) "fix" else "fixuns"
- else if (inst_ty.isRuntimeFloat() and operand_ty.isInt(mod))
- if (operand_ty.isSignedInt(mod)) "float" else "floatun"
+ const operation = if (inst_scalar_ty.isRuntimeFloat() and scalar_ty.isRuntimeFloat())
+ if (inst_scalar_ty.floatBits(target) < scalar_ty.floatBits(target)) "trunc" else "extend"
+ else if (inst_scalar_ty.isInt(mod) and scalar_ty.isRuntimeFloat())
+ if (inst_scalar_ty.isSignedInt(mod)) "fix" else "fixuns"
+ else if (inst_scalar_ty.isRuntimeFloat() and scalar_ty.isInt(mod))
+ if (scalar_ty.isSignedInt(mod)) "float" else "floatun"
else
unreachable;
const writer = f.object.writer();
const local = try f.allocLocal(inst, inst_ty);
+ const v = try Vectorize.start(f, inst, writer, operand_ty);
+ const a = try Assignment.start(f, writer, scalar_ty);
try f.writeCValue(writer, local, .Other);
-
- try writer.writeAll(" = ");
- if (inst_ty.isInt(mod) and operand_ty.isRuntimeFloat()) {
+ try v.elem(f, writer);
+ try a.assign(f, writer);
+ if (inst_scalar_ty.isInt(mod) and scalar_ty.isRuntimeFloat()) {
try writer.writeAll("zig_wrap_");
- try f.object.dg.renderTypeForBuiltinFnName(writer, inst_ty);
+ try f.object.dg.renderTypeForBuiltinFnName(writer, inst_scalar_ty);
try writer.writeByte('(');
}
try writer.writeAll("zig_");
try writer.writeAll(operation);
- try writer.writeAll(compilerRtAbbrev(operand_ty, mod));
- try writer.writeAll(compilerRtAbbrev(inst_ty, mod));
+ try writer.writeAll(compilerRtAbbrev(scalar_ty, mod));
+ try writer.writeAll(compilerRtAbbrev(inst_scalar_ty, mod));
try writer.writeByte('(');
try f.writeCValue(writer, operand, .FunctionArgument);
+ try v.elem(f, writer);
try writer.writeByte(')');
- if (inst_ty.isInt(mod) and operand_ty.isRuntimeFloat()) {
- try f.object.dg.renderBuiltinInfo(writer, inst_ty, .bits);
+ if (inst_scalar_ty.isInt(mod) and scalar_ty.isRuntimeFloat()) {
+ try f.object.dg.renderBuiltinInfo(writer, inst_scalar_ty, .bits);
try writer.writeByte(')');
}
- try writer.writeAll(";\n");
+ try a.end(f, writer);
+ try v.end(f, inst, writer);
+
return local;
}
@@ -6315,7 +6307,7 @@ fn airCmpBuiltinCall(
try v.elem(f, writer);
try f.object.dg.renderBuiltinInfo(writer, scalar_ty, info);
try writer.writeByte(')');
- if (!ref_ret) try writer.print(" {s} {}", .{
+ if (!ref_ret) try writer.print("{s}{}", .{
compareOperatorC(operator),
try f.fmtIntLiteral(Type.i32, try mod.intValue(Type.i32, 0)),
});
@@ -7661,12 +7653,12 @@ fn compareOperatorAbbrev(operator: std.math.CompareOperator) []const u8 {
fn compareOperatorC(operator: std.math.CompareOperator) []const u8 {
return switch (operator) {
- .lt => "<",
- .lte => "<=",
- .eq => "==",
- .gte => ">=",
- .gt => ">",
- .neq => "!=",
+ .lt => " < ",
+ .lte => " <= ",
+ .eq => " == ",
+ .gte => " >= ",
+ .gt => " > ",
+ .neq => " != ",
};
}
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 1e7687b96c..bd0c337169 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -1165,7 +1165,7 @@ pub const Object = struct {
const fwd_ref = self.debug_unresolved_namespace_scopes.values()[i];
const namespace = self.module.namespacePtr(namespace_index);
- const debug_type = try self.lowerDebugType(namespace.ty);
+ const debug_type = try self.lowerDebugType(namespace.getType(self.module));
self.builder.debugForwardReferenceSetType(fwd_ref, debug_type);
}
@@ -1803,7 +1803,7 @@ pub const Object = struct {
return updateExportedGlobal(self, mod, global_index, exports);
} else {
const fqn = try self.builder.string(
- mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)),
+ mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod)),
);
try global_index.rename(fqn, &self.builder);
global_index.setLinkage(.internal, &self.builder);
@@ -2841,15 +2841,13 @@ pub const Object = struct {
const builtin_str = try mod.intern_pool.getOrPutString(mod.gpa, "builtin");
const std_namespace = mod.namespacePtr(mod.declPtr(std_file.root_decl.unwrap().?).src_namespace);
- const builtin_decl = std_namespace.decls
- .getKeyAdapted(builtin_str, Module.DeclAdapter{ .mod = mod }).?;
+ const builtin_decl = std_namespace.decls.getKeyAdapted(builtin_str, Module.DeclAdapter{ .zcu = mod }).?;
const stack_trace_str = try mod.intern_pool.getOrPutString(mod.gpa, "StackTrace");
// buffer is only used for int_type, `builtin` is a struct.
const builtin_ty = mod.declPtr(builtin_decl).val.toType();
const builtin_namespace = builtin_ty.getNamespace(mod).?;
- const stack_trace_decl_index = builtin_namespace.decls
- .getKeyAdapted(stack_trace_str, Module.DeclAdapter{ .mod = mod }).?;
+ const stack_trace_decl_index = builtin_namespace.decls.getKeyAdapted(stack_trace_str, Module.DeclAdapter{ .zcu = mod }).?;
const stack_trace_decl = mod.declPtr(stack_trace_decl_index);
// Sema should have ensured that StackTrace was analyzed.
@@ -2892,7 +2890,7 @@ pub const Object = struct {
try o.builder.string(ip.stringToSlice(if (is_extern)
decl.name
else
- try decl.getFullyQualifiedName(zcu))),
+ try decl.fullyQualifiedName(zcu))),
toLlvmAddressSpace(decl.@"addrspace", target),
);
gop.value_ptr.* = function_index.ptrConst(&o.builder).global;
@@ -3106,7 +3104,7 @@ pub const Object = struct {
const variable_index = try o.builder.addVariable(
try o.builder.string(mod.intern_pool.stringToSlice(
- if (is_extern) decl.name else try decl.getFullyQualifiedName(mod),
+ if (is_extern) decl.name else try decl.fullyQualifiedName(mod),
)),
try o.lowerType(decl.ty),
toLlvmGlobalAddressSpace(decl.@"addrspace", mod.getTarget()),
@@ -3331,7 +3329,7 @@ pub const Object = struct {
}
const name = try o.builder.string(ip.stringToSlice(
- try mod.declPtr(struct_type.decl.unwrap().?).getFullyQualifiedName(mod),
+ try mod.declPtr(struct_type.decl.unwrap().?).fullyQualifiedName(mod),
));
var llvm_field_types = std.ArrayListUnmanaged(Builder.Type){};
@@ -3487,7 +3485,7 @@ pub const Object = struct {
}
const name = try o.builder.string(ip.stringToSlice(
- try mod.declPtr(union_obj.decl).getFullyQualifiedName(mod),
+ try mod.declPtr(union_obj.decl).fullyQualifiedName(mod),
));
const aligned_field_ty = Type.fromInterned(union_obj.field_types.get(ip)[layout.most_aligned_field]);
@@ -4605,7 +4603,7 @@ pub const Object = struct {
const usize_ty = try o.lowerType(Type.usize);
const ret_ty = try o.lowerType(Type.slice_const_u8_sentinel_0);
- const fqn = try zcu.declPtr(enum_type.decl).getFullyQualifiedName(zcu);
+ const fqn = try zcu.declPtr(enum_type.decl).fullyQualifiedName(zcu);
const target = zcu.root_mod.resolved_target.result;
const function_index = try o.builder.addFunction(
try o.builder.fnType(ret_ty, &.{try o.lowerType(Type.fromInterned(enum_type.tag_ty))}, .normal),
@@ -6623,7 +6621,7 @@ pub const FuncGen = struct {
.base_line = self.base_line,
});
- const fqn = try decl.getFullyQualifiedName(zcu);
+ const fqn = try decl.fullyQualifiedName(zcu);
const is_internal_linkage = !zcu.decl_exports.contains(decl_index);
const fn_ty = try zcu.funcType(.{
@@ -8653,8 +8651,6 @@ pub const FuncGen = struct {
const operand_ty = self.typeOf(ty_op.operand);
const dest_ty = self.typeOfIndex(inst);
const target = mod.getTarget();
- const dest_bits = dest_ty.floatBits(target);
- const src_bits = operand_ty.floatBits(target);
if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) {
return self.wip.cast(.fpext, operand, try o.lowerType(dest_ty), "");
@@ -8662,11 +8658,19 @@ pub const FuncGen = struct {
const operand_llvm_ty = try o.lowerType(operand_ty);
const dest_llvm_ty = try o.lowerType(dest_ty);
+ const dest_bits = dest_ty.scalarType(mod).floatBits(target);
+ const src_bits = operand_ty.scalarType(mod).floatBits(target);
const fn_name = try o.builder.fmt("__extend{s}f{s}f2", .{
compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits),
});
const libc_fn = try self.getLibcFunction(fn_name, &.{operand_llvm_ty}, dest_llvm_ty);
+ if (dest_ty.isVector(mod)) return self.buildElementwiseCall(
+ libc_fn,
+ &.{operand},
+ try o.builder.poisonValue(dest_llvm_ty),
+ dest_ty.vectorLen(mod),
+ );
return self.wip.call(
.normal,
.ccc,
@@ -9648,7 +9652,7 @@ pub const FuncGen = struct {
if (gop.found_existing) return gop.value_ptr.*;
errdefer assert(o.named_enum_map.remove(enum_type.decl));
- const fqn = try zcu.declPtr(enum_type.decl).getFullyQualifiedName(zcu);
+ const fqn = try zcu.declPtr(enum_type.decl).fullyQualifiedName(zcu);
const target = zcu.root_mod.resolved_target.result;
const function_index = try o.builder.addFunction(
try o.builder.fnType(.i1, &.{try o.lowerType(Type.fromInterned(enum_type.tag_ty))}, .normal),
diff --git a/src/codegen/spirv.zig b/src/codegen/spirv.zig
index cbc6ae1eb3..dc3b646ab7 100644
--- a/src/codegen/spirv.zig
+++ b/src/codegen/spirv.zig
@@ -2019,7 +2019,7 @@ const DeclGen = struct {
// Append the actual code into the functions section.
try self.spv.addFunction(spv_decl_index, self.func);
- const fqn = ip.stringToSlice(try decl.getFullyQualifiedName(self.module));
+ const fqn = ip.stringToSlice(try decl.fullyQualifiedName(self.module));
try self.spv.debugName(decl_id, fqn);
// Temporarily generate a test kernel declaration if this is a test function.
@@ -2055,7 +2055,7 @@ const DeclGen = struct {
.id_result = decl_id,
.storage_class = actual_storage_class,
});
- const fqn = ip.stringToSlice(try decl.getFullyQualifiedName(self.module));
+ const fqn = ip.stringToSlice(try decl.fullyQualifiedName(self.module));
try self.spv.debugName(decl_id, fqn);
if (opt_init_val) |init_val| {
diff --git a/src/link/Coff.zig b/src/link/Coff.zig
index 9be6d18df1..5bf83b52ea 100644
--- a/src/link/Coff.zig
+++ b/src/link/Coff.zig
@@ -1176,7 +1176,7 @@ pub fn lowerUnnamedConst(self: *Coff, tv: TypedValue, decl_index: InternPool.Dec
gop.value_ptr.* = .{};
}
const unnamed_consts = gop.value_ptr;
- const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
const index = unnamed_consts.items.len;
const sym_name = try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index });
defer gpa.free(sym_name);
@@ -1427,7 +1427,7 @@ fn updateDeclCode(self: *Coff, decl_index: InternPool.DeclIndex, code: []u8, com
const mod = self.base.comp.module.?;
const decl = mod.declPtr(decl_index);
- const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
log.debug("updateDeclCode {s}{*}", .{ decl_name, decl });
const required_alignment: u32 = @intCast(decl.getAlignment(mod).toByteUnits(0));
diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig
index f5f754e03b..a9a6942299 100644
--- a/src/link/Dwarf.zig
+++ b/src/link/Dwarf.zig
@@ -1082,7 +1082,7 @@ pub fn initDeclState(self: *Dwarf, mod: *Module, decl_index: InternPool.DeclInde
defer tracy.end();
const decl = mod.declPtr(decl_index);
- const decl_linkage_name = try decl.getFullyQualifiedName(mod);
+ const decl_linkage_name = try decl.fullyQualifiedName(mod);
log.debug("initDeclState {}{*}", .{ decl_linkage_name.fmt(&mod.intern_pool), decl });
diff --git a/src/link/Elf/ZigObject.zig b/src/link/Elf/ZigObject.zig
index ea32f93584..b6413f7d45 100644
--- a/src/link/Elf/ZigObject.zig
+++ b/src/link/Elf/ZigObject.zig
@@ -903,7 +903,7 @@ fn updateDeclCode(
const gpa = elf_file.base.comp.gpa;
const mod = elf_file.base.comp.module.?;
const decl = mod.declPtr(decl_index);
- const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
log.debug("updateDeclCode {s}{*}", .{ decl_name, decl });
@@ -1001,7 +1001,7 @@ fn updateTlv(
const gpa = elf_file.base.comp.gpa;
const mod = elf_file.base.comp.module.?;
const decl = mod.declPtr(decl_index);
- const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
log.debug("updateTlv {s} ({*})", .{ decl_name, decl });
@@ -1300,7 +1300,7 @@ pub fn lowerUnnamedConst(
}
const unnamed_consts = gop.value_ptr;
const decl = mod.declPtr(decl_index);
- const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
const index = unnamed_consts.items.len;
const name = try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index });
defer gpa.free(name);
@@ -1482,7 +1482,7 @@ pub fn updateDeclLineNumber(
defer tracy.end();
const decl = mod.declPtr(decl_index);
- const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
log.debug("updateDeclLineNumber {s}{*}", .{ decl_name, decl });
diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 69dcf7aba1..a50e4bd6a1 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -2170,8 +2170,7 @@ fn initSegments(self: *MachO) !void {
for (slice.items(.header)) |header| {
const segname = header.segName();
if (self.getSegmentByName(segname) == null) {
- const flags: u32 = if (mem.startsWith(u8, segname, "__DATA_CONST")) macho.SG_READ_ONLY else 0;
- _ = try self.addSegment(segname, .{ .prot = getSegmentProt(segname), .flags = flags });
+ _ = try self.addSegment(segname, .{ .prot = getSegmentProt(segname) });
}
}
@@ -2247,6 +2246,12 @@ fn initSegments(self: *MachO) !void {
segment.nsects += 1;
seg_id.* = segment_id;
}
+
+ // Set __DATA_CONST as READ_ONLY
+ if (self.getSegmentByName("__DATA_CONST")) |seg_id| {
+ const seg = &self.segments.items[seg_id];
+ seg.flags |= macho.SG_READ_ONLY;
+ }
}
fn allocateSections(self: *MachO) !void {
@@ -2474,6 +2479,9 @@ fn initDyldInfoSections(self: *MachO) !void {
nbinds += ctx.bind_relocs;
nweak_binds += ctx.weak_bind_relocs;
}
+ if (self.getInternalObject()) |int| {
+ nrebases += int.num_rebase_relocs;
+ }
try self.rebase.entries.ensureUnusedCapacity(gpa, nrebases);
try self.bind.entries.ensureUnusedCapacity(gpa, nbinds);
try self.weak_bind.entries.ensureUnusedCapacity(gpa, nweak_binds);
@@ -3727,7 +3735,6 @@ pub fn addSegment(self: *MachO, name: []const u8, opts: struct {
fileoff: u64 = 0,
filesize: u64 = 0,
prot: macho.vm_prot_t = macho.PROT.NONE,
- flags: u32 = 0,
}) error{OutOfMemory}!u8 {
const gpa = self.base.comp.gpa;
const index = @as(u8, @intCast(self.segments.items.len));
diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig
index 2c3f360ec2..d40712046e 100644
--- a/src/link/MachO/Atom.zig
+++ b/src/link/MachO/Atom.zig
@@ -119,12 +119,9 @@ pub fn getThunk(self: Atom, macho_file: *MachO) *Thunk {
pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 {
const segname, const sectname, const flags = blk: {
- const segname = sect.segName();
- const sectname = sect.sectName();
-
if (sect.isCode()) break :blk .{
"__TEXT",
- sectname,
+ "__text",
macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS,
};
@@ -135,32 +132,36 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 {
=> break :blk .{ "__TEXT", "__const", macho.S_REGULAR },
macho.S_CSTRING_LITERALS => {
- if (mem.startsWith(u8, sectname, "__objc")) break :blk .{
- segname, sectname, macho.S_REGULAR,
+ if (mem.startsWith(u8, sect.sectName(), "__objc")) break :blk .{
+ sect.segName(), sect.sectName(), macho.S_REGULAR,
};
break :blk .{ "__TEXT", "__cstring", macho.S_CSTRING_LITERALS };
},
macho.S_MOD_INIT_FUNC_POINTERS,
macho.S_MOD_TERM_FUNC_POINTERS,
- macho.S_LITERAL_POINTERS,
- => break :blk .{ "__DATA_CONST", sectname, sect.flags },
+ => break :blk .{ "__DATA_CONST", sect.sectName(), sect.flags },
+ macho.S_LITERAL_POINTERS,
macho.S_ZEROFILL,
macho.S_GB_ZEROFILL,
macho.S_THREAD_LOCAL_VARIABLES,
macho.S_THREAD_LOCAL_VARIABLE_POINTERS,
macho.S_THREAD_LOCAL_REGULAR,
macho.S_THREAD_LOCAL_ZEROFILL,
- => break :blk .{ "__DATA", sectname, sect.flags },
+ => break :blk .{ sect.segName(), sect.sectName(), sect.flags },
- // TODO: do we need this check here?
- macho.S_COALESCED => break :blk .{ segname, sectname, macho.S_REGULAR },
+ macho.S_COALESCED => break :blk .{
+ sect.segName(),
+ sect.sectName(),
+ macho.S_REGULAR,
+ },
macho.S_REGULAR => {
+ const segname = sect.segName();
+ const sectname = sect.sectName();
if (mem.eql(u8, segname, "__DATA")) {
- if (mem.eql(u8, sectname, "__const") or
- mem.eql(u8, sectname, "__cfstring") or
+ if (mem.eql(u8, sectname, "__cfstring") or
mem.eql(u8, sectname, "__objc_classlist") or
mem.eql(u8, sectname, "__objc_imageinfo")) break :blk .{
"__DATA_CONST",
@@ -171,7 +172,7 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 {
break :blk .{ segname, sectname, sect.flags };
},
- else => break :blk .{ segname, sectname, sect.flags },
+ else => break :blk .{ sect.segName(), sect.sectName(), sect.flags },
}
};
const osec = macho_file.getSectionByName(segname, sectname) orelse try macho_file.addSection(
diff --git a/src/link/MachO/InternalObject.zig b/src/link/MachO/InternalObject.zig
index 30b750260c..db8a8fd939 100644
--- a/src/link/MachO/InternalObject.zig
+++ b/src/link/MachO/InternalObject.zig
@@ -8,6 +8,7 @@ strtab: std.ArrayListUnmanaged(u8) = .{},
objc_methnames: std.ArrayListUnmanaged(u8) = .{},
objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64),
+num_rebase_relocs: u32 = 0,
output_symtab_ctx: MachO.SymtabCtx = .{},
pub fn deinit(self: *InternalObject, allocator: Allocator) void {
@@ -115,6 +116,7 @@ fn addObjcSelrefsSection(
},
});
atom.relocs = .{ .pos = 0, .len = 1 };
+ self.num_rebase_relocs += 1;
return atom_index;
}
diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig
index 3a28e824d5..fadf80b2c0 100644
--- a/src/link/MachO/ZigObject.zig
+++ b/src/link/MachO/ZigObject.zig
@@ -792,7 +792,7 @@ fn updateDeclCode(
const gpa = macho_file.base.comp.gpa;
const mod = macho_file.base.comp.module.?;
const decl = mod.declPtr(decl_index);
- const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
log.debug("updateDeclCode {s}{*}", .{ decl_name, decl });
@@ -876,7 +876,7 @@ fn updateTlv(
) !void {
const mod = macho_file.base.comp.module.?;
const decl = mod.declPtr(decl_index);
- const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
log.debug("updateTlv {s} ({*})", .{ decl_name, decl });
@@ -1079,7 +1079,7 @@ pub fn lowerUnnamedConst(
}
const unnamed_consts = gop.value_ptr;
const decl = mod.declPtr(decl_index);
- const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
const index = unnamed_consts.items.len;
const name = try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index });
defer gpa.free(name);
diff --git a/src/link/Plan9.zig b/src/link/Plan9.zig
index 2e937a3904..be68465af7 100644
--- a/src/link/Plan9.zig
+++ b/src/link/Plan9.zig
@@ -478,7 +478,7 @@ pub fn lowerUnnamedConst(self: *Plan9, tv: TypedValue, decl_index: InternPool.De
}
const unnamed_consts = gop.value_ptr;
- const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
const index = unnamed_consts.items.len;
// name is freed when the unnamed const is freed
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 1a06d0fc6e..f4bc2f8f0f 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -662,7 +662,7 @@ pub fn getOrCreateAtomForDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex) !At
const symbol = atom.symbolLoc().getSymbol(wasm);
const mod = wasm.base.comp.module.?;
const decl = mod.declPtr(decl_index);
- const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const full_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
symbol.name = try wasm.string_table.put(gpa, full_name);
}
return gop.value_ptr.*;
@@ -1598,7 +1598,7 @@ pub fn updateDeclLineNumber(wasm: *Wasm, mod: *Module, decl_index: InternPool.De
defer tracy.end();
const decl = mod.declPtr(decl_index);
- const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const decl_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
log.debug("updateDeclLineNumber {s}{*}", .{ decl_name, decl });
try dw.updateDeclLineNumber(mod, decl_index);
@@ -1612,7 +1612,7 @@ fn finishUpdateDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex, code: []const
const atom_index = wasm.decls.get(decl_index).?;
const atom = wasm.getAtomPtr(atom_index);
const symbol = &wasm.symbols.items[atom.sym_index];
- const full_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const full_name = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
symbol.name = try wasm.string_table.put(gpa, full_name);
symbol.tag = symbol_tag;
try atom.code.appendSlice(gpa, code);
@@ -1678,7 +1678,7 @@ pub fn lowerUnnamedConst(wasm: *Wasm, tv: TypedValue, decl_index: InternPool.Dec
const parent_atom_index = try wasm.getOrCreateAtomForDecl(decl_index);
const parent_atom = wasm.getAtom(parent_atom_index);
const local_index = parent_atom.locals.items.len;
- const fqn = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod));
+ const fqn = mod.intern_pool.stringToSlice(try decl.fullyQualifiedName(mod));
const name = try std.fmt.allocPrintZ(gpa, "__unnamed_{s}_{d}", .{
fqn, local_index,
});
diff --git a/src/main.zig b/src/main.zig
index db739ebce7..bb8b25c60e 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -596,7 +596,7 @@ const usage_build_generic =
\\ --export=[value] (WebAssembly) Force a symbol to be exported
\\
\\Test Options:
- \\ --test-filter [text] Skip tests that do not match filter
+ \\ --test-filter [text] Skip tests that do not match any filter
\\ --test-name-prefix [text] Add prefix to all tests
\\ --test-cmd [arg] Specify test execution command one arg at a time
\\ --test-cmd-bin Appends test binary path to test cmd args
@@ -869,7 +869,7 @@ fn buildOutputType(
var link_emit_relocs = false;
var build_id: ?std.zig.BuildId = null;
var runtime_args_start: ?usize = null;
- var test_filter: ?[]const u8 = null;
+ var test_filters: std.ArrayListUnmanaged([]const u8) = .{};
var test_name_prefix: ?[]const u8 = null;
var test_runner_path: ?[]const u8 = null;
var override_local_cache_dir: ?[]const u8 = try EnvVar.ZIG_LOCAL_CACHE_DIR.get(arena);
@@ -909,7 +909,7 @@ fn buildOutputType(
var rc_source_files_owner_index: usize = 0;
// null means replace with the test executable binary
- var test_exec_args = std.ArrayList(?[]const u8).init(arena);
+ var test_exec_args: std.ArrayListUnmanaged(?[]const u8) = .{};
// These get set by CLI flags and then snapshotted when a `--mod` flag is
// encountered.
@@ -1278,13 +1278,13 @@ fn buildOutputType(
} else if (mem.eql(u8, arg, "--libc")) {
create_module.libc_paths_file = args_iter.nextOrFatal();
} else if (mem.eql(u8, arg, "--test-filter")) {
- test_filter = args_iter.nextOrFatal();
+ try test_filters.append(arena, args_iter.nextOrFatal());
} else if (mem.eql(u8, arg, "--test-name-prefix")) {
test_name_prefix = args_iter.nextOrFatal();
} else if (mem.eql(u8, arg, "--test-runner")) {
test_runner_path = args_iter.nextOrFatal();
} else if (mem.eql(u8, arg, "--test-cmd")) {
- try test_exec_args.append(args_iter.nextOrFatal());
+ try test_exec_args.append(arena, args_iter.nextOrFatal());
} else if (mem.eql(u8, arg, "--cache-dir")) {
override_local_cache_dir = args_iter.nextOrFatal();
} else if (mem.eql(u8, arg, "--global-cache-dir")) {
@@ -1334,7 +1334,7 @@ fn buildOutputType(
} else if (mem.eql(u8, arg, "-fno-each-lib-rpath")) {
create_module.each_lib_rpath = false;
} else if (mem.eql(u8, arg, "--test-cmd-bin")) {
- try test_exec_args.append(null);
+ try test_exec_args.append(arena, null);
} else if (mem.eql(u8, arg, "--test-no-exec")) {
test_no_exec = true;
} else if (mem.eql(u8, arg, "-ftime-report")) {
@@ -3246,7 +3246,7 @@ fn buildOutputType(
.time_report = time_report,
.stack_report = stack_report,
.build_id = build_id,
- .test_filter = test_filter,
+ .test_filters = test_filters.items,
.test_name_prefix = test_name_prefix,
.test_runner_path = test_runner_path,
.disable_lld_caching = disable_lld_caching,
@@ -3369,16 +3369,15 @@ fn buildOutputType(
const c_code_path = try fs.path.join(arena, &[_][]const u8{
c_code_directory.path orelse ".", c_code_loc.basename,
});
- try test_exec_args.append(self_exe_path);
- try test_exec_args.append("run");
+ try test_exec_args.appendSlice(arena, &.{ self_exe_path, "run" });
if (zig_lib_directory.path) |p| {
- try test_exec_args.appendSlice(&.{ "-I", p });
+ try test_exec_args.appendSlice(arena, &.{ "-I", p });
}
if (create_module.resolved_options.link_libc) {
- try test_exec_args.append("-lc");
+ try test_exec_args.append(arena, "-lc");
} else if (target.os.tag == .windows) {
- try test_exec_args.appendSlice(&.{
+ try test_exec_args.appendSlice(arena, &.{
"--subsystem", "console",
"-lkernel32", "-lntdll",
});
@@ -3386,17 +3385,15 @@ fn buildOutputType(
const first_cli_mod = create_module.modules.values()[0];
if (first_cli_mod.target_arch_os_abi) |triple| {
- try test_exec_args.append("-target");
- try test_exec_args.append(triple);
+ try test_exec_args.appendSlice(arena, &.{ "-target", triple });
}
if (first_cli_mod.target_mcpu) |mcpu| {
- try test_exec_args.append(try std.fmt.allocPrint(arena, "-mcpu={s}", .{mcpu}));
+ try test_exec_args.append(arena, try std.fmt.allocPrint(arena, "-mcpu={s}", .{mcpu}));
}
if (create_module.dynamic_linker) |dl| {
- try test_exec_args.append("--dynamic-linker");
- try test_exec_args.append(dl);
+ try test_exec_args.appendSlice(arena, &.{ "--dynamic-linker", dl });
}
- try test_exec_args.append(c_code_path);
+ try test_exec_args.append(arena, c_code_path);
}
const run_or_test = switch (arg_mode) {
diff --git a/src/type.zig b/src/type.zig
index a6265692c2..a9d1654ba7 100644
--- a/src/type.zig
+++ b/src/type.zig
@@ -905,11 +905,32 @@ pub const Type = struct {
return Type.fromInterned(array_type.child).abiAlignmentAdvanced(mod, strat);
},
.vector_type => |vector_type| {
- const bits_u64 = try bitSizeAdvanced(Type.fromInterned(vector_type.child), mod, opt_sema);
- const bits: u32 = @intCast(bits_u64);
- const bytes = ((bits * vector_type.len) + 7) / 8;
- const alignment = std.math.ceilPowerOfTwoAssert(u32, bytes);
- return .{ .scalar = Alignment.fromByteUnits(alignment) };
+ if (vector_type.len == 0) return .{ .scalar = .@"1" };
+ switch (mod.comp.getZigBackend()) {
+ else => {
+ const elem_bits: u32 = @intCast(try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema));
+ if (elem_bits == 0) return .{ .scalar = .@"1" };
+ const bytes = ((elem_bits * vector_type.len) + 7) / 8;
+ const alignment = std.math.ceilPowerOfTwoAssert(u32, bytes);
+ return .{ .scalar = Alignment.fromByteUnits(alignment) };
+ },
+ .stage2_x86_64 => {
+ if (vector_type.child == .bool_type) {
+ if (vector_type.len > 256 and std.Target.x86.featureSetHas(target.cpu.features, .avx512f)) return .{ .scalar = .@"64" };
+ if (vector_type.len > 128 and std.Target.x86.featureSetHas(target.cpu.features, .avx2)) return .{ .scalar = .@"32" };
+ if (vector_type.len > 64) return .{ .scalar = .@"16" };
+ const bytes = std.math.divCeil(u32, vector_type.len, 8) catch unreachable;
+ const alignment = std.math.ceilPowerOfTwoAssert(u32, bytes);
+ return .{ .scalar = Alignment.fromByteUnits(alignment) };
+ }
+ const elem_bytes: u32 = @intCast((try Type.fromInterned(vector_type.child).abiSizeAdvanced(mod, strat)).scalar);
+ if (elem_bytes == 0) return .{ .scalar = .@"1" };
+ const bytes = elem_bytes * vector_type.len;
+ if (bytes > 32 and std.Target.x86.featureSetHas(target.cpu.features, .avx512f)) return .{ .scalar = .@"64" };
+ if (bytes > 16 and std.Target.x86.featureSetHas(target.cpu.features, .avx)) return .{ .scalar = .@"32" };
+ return .{ .scalar = .@"16" };
+ },
+ }
},
.opt_type => return abiAlignmentAdvancedOptional(ty, mod, strat),
@@ -1237,9 +1258,6 @@ pub const Type = struct {
.storage = .{ .lazy_size = ty.toIntern() },
} }))) },
};
- const elem_bits = try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema);
- const total_bits = elem_bits * vector_type.len;
- const total_bytes = (total_bits + 7) / 8;
const alignment = switch (try ty.abiAlignmentAdvanced(mod, strat)) {
.scalar => |x| x,
.val => return .{ .val = Value.fromInterned((try mod.intern(.{ .int = .{
@@ -1247,6 +1265,18 @@ pub const Type = struct {
.storage = .{ .lazy_size = ty.toIntern() },
} }))) },
};
+ const total_bytes = switch (mod.comp.getZigBackend()) {
+ else => total_bytes: {
+ const elem_bits = try Type.fromInterned(vector_type.child).bitSizeAdvanced(mod, opt_sema);
+ const total_bits = elem_bits * vector_type.len;
+ break :total_bytes (total_bits + 7) / 8;
+ },
+ .stage2_x86_64 => total_bytes: {
+ if (vector_type.child == .bool_type) break :total_bytes std.math.divCeil(u32, vector_type.len, 8) catch unreachable;
+ const elem_bytes: u32 = @intCast((try Type.fromInterned(vector_type.child).abiSizeAdvanced(mod, strat)).scalar);
+ break :total_bytes elem_bytes * vector_type.len;
+ },
+ };
return AbiSizeAdvanced{ .scalar = alignment.forward(total_bytes) };
},
@@ -2108,7 +2138,8 @@ pub const Type = struct {
/// Returns true if and only if the type is a fixed-width integer.
pub fn isInt(self: Type, mod: *const Module) bool {
- return self.isSignedInt(mod) or self.isUnsignedInt(mod);
+ return self.toIntern() != .comptime_int_type and
+ mod.intern_pool.isIntegerType(self.toIntern());
}
/// Returns true if and only if the type is a fixed-width, signed integer.