diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/std/c/darwin.zig | 2 | ||||
| -rw-r--r-- | lib/std/os/windows.zig | 3 | ||||
| -rw-r--r-- | lib/std/zig/system/arm.zig | 87 | ||||
| -rw-r--r-- | lib/std/zig/system/darwin/macos.zig | 24 | ||||
| -rw-r--r-- | lib/std/zig/system/x86.zig | 284 |
5 files changed, 314 insertions, 86 deletions
diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig index 561a4e7ce4..ac083ec9f2 100644 --- a/lib/std/c/darwin.zig +++ b/lib/std/c/darwin.zig @@ -1165,6 +1165,8 @@ pub const CPUFAMILY = enum(u32) { ARM_PALMA = 0x72015832, ARM_DONAN = 0x6f5129ac, ARM_BRAVA = 0x17d5b93a, + ARM_TAHITI = 0x75d4acb9, + ARM_TUPAI = 0x204526d0, _, }; diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 018f214144..77bd8ea2f4 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -5315,6 +5315,9 @@ pub const PF = enum(DWORD) { /// This ARM processor implements the ARM v8.3 JavaScript conversion (JSCVT) instructions. ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE = 44, + + /// This Arm processor implements the Arm v8.3 LRCPC instructions (for example, LDAPR). Note that certain Arm v8.2 CPUs may optionally support the LRCPC instructions. + ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, }; pub const MAX_WOW64_SHARED_ENTRIES = 16; diff --git a/lib/std/zig/system/arm.zig b/lib/std/zig/system/arm.zig index c8c3f6a851..ed3f76d023 100644 --- a/lib/std/zig/system/arm.zig +++ b/lib/std/zig/system/arm.zig @@ -22,32 +22,34 @@ pub const cpu_models = struct { // implementer = 0x41 const ARM = [_]E{ - E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null }, - E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null }, - E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null }, - E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null }, - E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null }, - E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null }, - E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null }, - E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null }, - E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null }, - E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null }, - E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null }, - E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null }, - E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null }, - E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null }, - E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null }, - E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null }, - E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null }, - E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null }, - E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null }, - E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null }, - E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null }, - E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null }, - E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null }, + E{ .part = 0x926, .m32 = &A32.arm926ej_s }, + E{ .part = 0xb02, .m32 = &A32.mpcore }, + E{ .part = 0xb36, .m32 = &A32.arm1136j_s }, + E{ .part = 0xb56, .m32 = &A32.arm1156t2_s }, + E{ .part = 0xb76, .m32 = &A32.arm1176jz_s }, + E{ .part = 0xc05, .m32 = &A32.cortex_a5 }, + E{ .part = 0xc07, .m32 = &A32.cortex_a7 }, + E{ .part = 0xc08, .m32 = &A32.cortex_a8 }, + E{ .part = 0xc09, .m32 = &A32.cortex_a9 }, + E{ .part = 0xc0d, .m32 = &A32.cortex_a17 }, + E{ .part = 0xc0e, .m32 = &A32.cortex_a17 }, + E{ .part = 0xc0f, .m32 = &A32.cortex_a15 }, + E{ .part = 0xc14, .m32 = &A32.cortex_r4 }, + E{ .part = 0xc15, .m32 = &A32.cortex_r5 }, + E{ .part = 0xc17, .m32 = &A32.cortex_r7 }, + E{ .part = 0xc18, .m32 = &A32.cortex_r8 }, + E{ .part = 0xc20, .m32 = &A32.cortex_m0 }, + E{ .part = 0xc21, .m32 = &A32.cortex_m1 }, + E{ .part = 0xc23, .m32 = &A32.cortex_m3 }, + E{ .part = 0xc24, .m32 = &A32.cortex_m4 }, + E{ .part = 0xc27, .m32 = &A32.cortex_m7 }, + E{ .part = 0xc60, .m32 = &A32.cortex_m0plus }, + E{ .part = 0xd01, .m32 = &A32.cortex_a32 }, + E{ .part = 0xd02, .m64 = &A64.cortex_a34 }, E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 }, E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 }, E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 }, + E{ .part = 0xd06, .m64 = &A64.cortex_a65 }, E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 }, E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 }, E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 }, @@ -55,16 +57,38 @@ pub const cpu_models = struct { E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 }, E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 }, E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 }, - E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null }, - E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null }, - E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null }, + E{ .part = 0xd0e, .m32 = &A32.cortex_a76ae, .m64 = &A64.cortex_a76ae }, + E{ .part = 0xd13, .m32 = &A32.cortex_r52 }, + E{ .part = 0xd14, .m64 = &A64.cortex_r82ae }, + E{ .part = 0xd15, .m64 = &A64.cortex_r82 }, + E{ .part = 0xd16, .m32 = &A32.cortex_r52plus }, + E{ .part = 0xd20, .m32 = &A32.cortex_m23 }, + E{ .part = 0xd21, .m32 = &A32.cortex_m33 }, + E{ .part = 0xd40, .m32 = &A32.neoverse_v1, .m64 = &A64.neoverse_v1 }, E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 }, + E{ .part = 0xd42, .m32 = &A32.cortex_a78ae, .m64 = &A64.cortex_a78ae }, + E{ .part = 0xd43, .m64 = &A64.cortex_a65ae }, + E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 }, + E{ .part = 0xd46, .m64 = &A64.cortex_a510 }, + E{ .part = 0xd47, .m32 = &A32.cortex_a710, .m64 = &A64.cortex_a710 }, + E{ .part = 0xd48, .m64 = &A64.cortex_x2 }, + E{ .part = 0xd49, .m32 = &A32.neoverse_n2, .m64 = &A64.neoverse_n2 }, + E{ .part = 0xd4a, .m64 = &A64.neoverse_e1 }, E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c }, E{ .part = 0xd4c, .m32 = &A32.cortex_x1c, .m64 = &A64.cortex_x1c }, - E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 }, - E{ .part = 0xd02, .m64 = &A64.cortex_a34 }, - E{ .part = 0xd06, .m64 = &A64.cortex_a65 }, - E{ .part = 0xd43, .m64 = &A64.cortex_a65ae }, + E{ .part = 0xd4d, .m64 = &A64.cortex_a715 }, + E{ .part = 0xd4e, .m64 = &A64.cortex_x3 }, + E{ .part = 0xd4f, .m64 = &A64.neoverse_v2 }, + E{ .part = 0xd80, .m64 = &A64.cortex_a520 }, + E{ .part = 0xd81, .m64 = &A64.cortex_a720 }, + E{ .part = 0xd82, .m64 = &A64.cortex_x4 }, + E{ .part = 0xd83, .m64 = &A64.neoverse_v3ae }, + E{ .part = 0xd84, .m64 = &A64.neoverse_v3 }, + E{ .part = 0xd85, .m64 = &A64.cortex_x925 }, + E{ .part = 0xd87, .m64 = &A64.cortex_a725 }, + E{ .part = 0xd88, .m64 = &A64.cortex_a520ae }, + E{ .part = 0xd89, .m64 = &A64.cortex_a720ae }, + E{ .part = 0xd8e, .m64 = &A64.neoverse_n3 }, }; // implementer = 0x42 const Broadcom = [_]E{ @@ -97,6 +121,7 @@ pub const cpu_models = struct { }; // implementer = 0x51 const Qualcomm = [_]E{ + E{ .part = 0x001, .m64 = &A64.oryon_1 }, E{ .part = 0x06f, .m32 = &A32.krait }, E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo }, E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo }, @@ -110,7 +135,7 @@ pub const cpu_models = struct { E{ .part = 0xc00, .m64 = &A64.falkor }, E{ .part = 0xc01, .m64 = &A64.saphira }, }; - + // implementer = 0x61 const Apple = [_]E{ E{ .part = 0x022, .m64 = &A64.apple_m1 }, E{ .part = 0x023, .m64 = &A64.apple_m1 }, diff --git a/lib/std/zig/system/darwin/macos.zig b/lib/std/zig/system/darwin/macos.zig index 8ae3d470c6..eba837fb49 100644 --- a/lib/std/zig/system/darwin/macos.zig +++ b/lib/std/zig/system/darwin/macos.zig @@ -408,22 +408,24 @@ pub fn detectNativeCpuAndFeatures() ?Target.Cpu { switch (current_arch) { .aarch64, .aarch64_be => { const model = switch (cpu_family) { - .ARM_EVEREST_SAWTOOTH => &Target.aarch64.cpu.apple_a16, - .ARM_BLIZZARD_AVALANCHE => &Target.aarch64.cpu.apple_a15, - .ARM_FIRESTORM_ICESTORM => &Target.aarch64.cpu.apple_a14, - .ARM_LIGHTNING_THUNDER => &Target.aarch64.cpu.apple_a13, - .ARM_VORTEX_TEMPEST => &Target.aarch64.cpu.apple_a12, - .ARM_MONSOON_MISTRAL => &Target.aarch64.cpu.apple_a11, - .ARM_HURRICANE => &Target.aarch64.cpu.apple_a10, - .ARM_TWISTER => &Target.aarch64.cpu.apple_a9, + .ARM_CYCLONE => &Target.aarch64.cpu.apple_a7, .ARM_TYPHOON => &Target.aarch64.cpu.apple_a8, - .ARM_CYCLONE => &Target.aarch64.cpu.cyclone, - .ARM_COLL => &Target.aarch64.cpu.apple_a17, + .ARM_TWISTER => &Target.aarch64.cpu.apple_a9, + .ARM_HURRICANE => &Target.aarch64.cpu.apple_a10, + .ARM_MONSOON_MISTRAL => &Target.aarch64.cpu.apple_a11, + .ARM_VORTEX_TEMPEST => &Target.aarch64.cpu.apple_a12, + .ARM_LIGHTNING_THUNDER => &Target.aarch64.cpu.apple_a13, + .ARM_FIRESTORM_ICESTORM => &Target.aarch64.cpu.apple_m1, // a14 + .ARM_BLIZZARD_AVALANCHE => &Target.aarch64.cpu.apple_m2, // a15 + .ARM_EVEREST_SAWTOOTH => &Target.aarch64.cpu.apple_m3, // a16 .ARM_IBIZA => &Target.aarch64.cpu.apple_m3, // base - .ARM_LOBOS => &Target.aarch64.cpu.apple_m3, // pro .ARM_PALMA => &Target.aarch64.cpu.apple_m3, // max + .ARM_LOBOS => &Target.aarch64.cpu.apple_m3, // pro + .ARM_COLL => &Target.aarch64.cpu.apple_a17, // a17 pro .ARM_DONAN => &Target.aarch64.cpu.apple_m4, // base .ARM_BRAVA => &Target.aarch64.cpu.apple_m4, // pro/max + .ARM_TAHITI => &Target.aarch64.cpu.apple_m4, // a18 pro + .ARM_TUPAI => &Target.aarch64.cpu.apple_m4, // a18 else => return null, }; diff --git a/lib/std/zig/system/x86.zig b/lib/std/zig/system/x86.zig index 428561c371..febd677402 100644 --- a/lib/std/zig/system/x86.zig +++ b/lib/std/zig/system/x86.zig @@ -2,11 +2,30 @@ const std = @import("std"); const builtin = @import("builtin"); const Target = std.Target; -const XCR0_XMM = 0x02; -const XCR0_YMM = 0x04; -const XCR0_MASKREG = 0x20; -const XCR0_ZMM0_15 = 0x40; -const XCR0_ZMM16_31 = 0x80; +/// Only covers EAX for now. +const Xcr0 = packed struct(u32) { + x87: bool, + sse: bool, + avx: bool, + bndreg: bool, + bndcsr: bool, + opmask: bool, + zmm_hi256: bool, + hi16_zmm: bool, + pt: bool, + pkru: bool, + pasid: bool, + cet_u: bool, + cet_s: bool, + hdc: bool, + uintr: bool, + lbr: bool, + hwp: bool, + xtilecfg: bool, + xtiledata: bool, + apx: bool, + _reserved: u12, +}; fn setFeature(cpu: *Target.Cpu, feature: Target.x86.Feature, enabled: bool) void { const idx = @as(Target.Cpu.Feature.Set.Index, @intFromEnum(feature)); @@ -339,12 +358,6 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { leaf = cpuid(1, 0); - setFeature(cpu, .cx8, bit(leaf.edx, 8)); - setFeature(cpu, .cmov, bit(leaf.edx, 15)); - setFeature(cpu, .mmx, bit(leaf.edx, 23)); - setFeature(cpu, .fxsr, bit(leaf.edx, 24)); - setFeature(cpu, .sse, bit(leaf.edx, 25)); - setFeature(cpu, .sse2, bit(leaf.edx, 26)); setFeature(cpu, .sse3, bit(leaf.ecx, 0)); setFeature(cpu, .pclmul, bit(leaf.ecx, 1)); setFeature(cpu, .ssse3, bit(leaf.ecx, 9)); @@ -356,13 +369,20 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { setFeature(cpu, .aes, bit(leaf.ecx, 25)); setFeature(cpu, .rdrnd, bit(leaf.ecx, 30)); + setFeature(cpu, .cx8, bit(leaf.edx, 8)); + setFeature(cpu, .cmov, bit(leaf.edx, 15)); + setFeature(cpu, .mmx, bit(leaf.edx, 23)); + setFeature(cpu, .fxsr, bit(leaf.edx, 24)); + setFeature(cpu, .sse, bit(leaf.edx, 25)); + setFeature(cpu, .sse2, bit(leaf.edx, 26)); + const has_xsave = bit(leaf.ecx, 27); const has_avx = bit(leaf.ecx, 28); // Make sure not to call xgetbv if xsave is not supported - const xcr0_eax = if (has_xsave and has_avx) getXCR0() else 0; + const xcr0: Xcr0 = if (has_xsave and has_avx) @bitCast(getXCR0()) else @bitCast(@as(u32, 0)); - const has_avx_save = hasMask(xcr0_eax, XCR0_XMM | XCR0_YMM); + const has_avx_save = xcr0.sse and xcr0.avx; // LLVM approaches avx512_save by hardcoding it to true on Darwin, // because the kernel saves the context even if the bit is not set. @@ -384,22 +404,26 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { // Darwin lazily saves the AVX512 context on first use: trust that the OS will // save the AVX512 context if we use AVX512 instructions, even if the bit is not // set right now. - const has_avx512_save = switch (os_tag.isDarwin()) { - true => true, - false => hasMask(xcr0_eax, XCR0_MASKREG | XCR0_ZMM0_15 | XCR0_ZMM16_31), - }; + const has_avx512_save = if (os_tag.isDarwin()) + true + else + xcr0.zmm_hi256 and xcr0.hi16_zmm; + + // AMX requires additional context to be saved by the OS. + const has_amx_save = xcr0.xtilecfg and xcr0.xtiledata; setFeature(cpu, .avx, has_avx_save); - setFeature(cpu, .fma, has_avx_save and bit(leaf.ecx, 12)); + setFeature(cpu, .fma, bit(leaf.ecx, 12) and has_avx_save); // Only enable XSAVE if OS has enabled support for saving YMM state. - setFeature(cpu, .xsave, has_avx_save and bit(leaf.ecx, 26)); - setFeature(cpu, .f16c, has_avx_save and bit(leaf.ecx, 29)); + setFeature(cpu, .xsave, bit(leaf.ecx, 26) and has_avx_save); + setFeature(cpu, .f16c, bit(leaf.ecx, 29) and has_avx_save); leaf = cpuid(0x80000000, 0); const max_ext_level = leaf.eax; if (max_ext_level >= 0x80000001) { leaf = cpuid(0x80000001, 0); + setFeature(cpu, .sahf, bit(leaf.ecx, 0)); setFeature(cpu, .lzcnt, bit(leaf.ecx, 5)); setFeature(cpu, .sse4a, bit(leaf.ecx, 6)); @@ -409,11 +433,21 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { setFeature(cpu, .fma4, bit(leaf.ecx, 16) and has_avx_save); setFeature(cpu, .tbm, bit(leaf.ecx, 21)); setFeature(cpu, .mwaitx, bit(leaf.ecx, 29)); + setFeature(cpu, .@"64bit", bit(leaf.edx, 29)); } else { for ([_]Target.x86.Feature{ - .sahf, .lzcnt, .sse4a, .prfchw, .xop, - .lwp, .fma4, .tbm, .mwaitx, .@"64bit", + .sahf, + .lzcnt, + .sse4a, + .prfchw, + .xop, + .lwp, + .fma4, + .tbm, + .mwaitx, + + .@"64bit", }) |feat| { setFeature(cpu, feat, false); } @@ -422,10 +456,16 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { // Misc. memory-related features. if (max_ext_level >= 0x80000008) { leaf = cpuid(0x80000008, 0); + setFeature(cpu, .clzero, bit(leaf.ebx, 0)); + setFeature(cpu, .rdpru, bit(leaf.ebx, 4)); setFeature(cpu, .wbnoinvd, bit(leaf.ebx, 9)); } else { - for ([_]Target.x86.Feature{ .clzero, .wbnoinvd }) |feat| { + for ([_]Target.x86.Feature{ + .clzero, + .rdpru, + .wbnoinvd, + }) |feat| { setFeature(cpu, feat, false); } } @@ -444,6 +484,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { setFeature(cpu, .rtm, bit(leaf.ebx, 11)); // AVX512 is only supported if the OS supports the context save for it. setFeature(cpu, .avx512f, bit(leaf.ebx, 16) and has_avx512_save); + setFeature(cpu, .evex512, bit(leaf.ebx, 16) and has_avx512_save); setFeature(cpu, .avx512dq, bit(leaf.ebx, 17) and has_avx512_save); setFeature(cpu, .rdseed, bit(leaf.ebx, 18)); setFeature(cpu, .adx, bit(leaf.ebx, 19)); @@ -470,8 +511,8 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { setFeature(cpu, .avx512vnni, bit(leaf.ecx, 11) and has_avx512_save); setFeature(cpu, .avx512bitalg, bit(leaf.ecx, 12) and has_avx512_save); setFeature(cpu, .avx512vpopcntdq, bit(leaf.ecx, 14) and has_avx512_save); - setFeature(cpu, .avx512vp2intersect, bit(leaf.edx, 8) and has_avx512_save); setFeature(cpu, .rdpid, bit(leaf.ecx, 22)); + setFeature(cpu, .kl, bit(leaf.ecx, 23)); setFeature(cpu, .cldemote, bit(leaf.ecx, 25)); setFeature(cpu, .movdiri, bit(leaf.ecx, 27)); setFeature(cpu, .movdir64b, bit(leaf.ecx, 28)); @@ -487,32 +528,153 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { // leaves using cpuid, since that information is ignored while // detecting features using the "-march=native" flag. // For more info, see X86 ISA docs. - setFeature(cpu, .pconfig, bit(leaf.edx, 18)); setFeature(cpu, .uintr, bit(leaf.edx, 5)); + setFeature(cpu, .avx512vp2intersect, bit(leaf.edx, 8) and has_avx512_save); + setFeature(cpu, .serialize, bit(leaf.edx, 14)); + setFeature(cpu, .tsxldtrk, bit(leaf.edx, 16)); + setFeature(cpu, .pconfig, bit(leaf.edx, 18)); + setFeature(cpu, .amx_bf16, bit(leaf.edx, 22) and has_amx_save); + setFeature(cpu, .avx512fp16, bit(leaf.edx, 23) and has_avx512_save); + setFeature(cpu, .amx_tile, bit(leaf.edx, 24) and has_amx_save); + setFeature(cpu, .amx_int8, bit(leaf.edx, 25) and has_amx_save); - // TODO I feel unsure about this check. - // It doesn't really seem to check for 7.1, just for 7. - // Is this a sound assumption to make? - // Note that this is what other implementations do, so I kind of trust it. - const has_leaf_7_1 = max_level >= 7; - if (has_leaf_7_1) { + if (leaf.eax >= 1) { leaf = cpuid(0x7, 0x1); + + setFeature(cpu, .sha512, bit(leaf.eax, 0)); + setFeature(cpu, .sm3, bit(leaf.eax, 1)); + setFeature(cpu, .sm4, bit(leaf.eax, 2)); + setFeature(cpu, .raoint, bit(leaf.eax, 3)); + setFeature(cpu, .avxvnni, bit(leaf.eax, 4) and has_avx_save); setFeature(cpu, .avx512bf16, bit(leaf.eax, 5) and has_avx512_save); + setFeature(cpu, .cmpccxadd, bit(leaf.eax, 7)); + setFeature(cpu, .amx_fp16, bit(leaf.eax, 21) and has_amx_save); + setFeature(cpu, .hreset, bit(leaf.eax, 22)); + setFeature(cpu, .avxifma, bit(leaf.eax, 23) and has_avx_save); + + setFeature(cpu, .avxvnniint8, bit(leaf.edx, 4) and has_avx_save); + setFeature(cpu, .avxneconvert, bit(leaf.edx, 5) and has_avx_save); + setFeature(cpu, .amx_complex, bit(leaf.edx, 8) and has_amx_save); + setFeature(cpu, .avxvnniint16, bit(leaf.edx, 10) and has_avx_save); + setFeature(cpu, .prefetchi, bit(leaf.edx, 14)); + setFeature(cpu, .usermsr, bit(leaf.edx, 15)); + setFeature(cpu, .avx10_1_256, bit(leaf.edx, 19)); + // APX + setFeature(cpu, .egpr, bit(leaf.edx, 21)); + setFeature(cpu, .push2pop2, bit(leaf.edx, 21)); + setFeature(cpu, .ppx, bit(leaf.edx, 21)); + setFeature(cpu, .ndd, bit(leaf.edx, 21)); + setFeature(cpu, .ccmp, bit(leaf.edx, 21)); + setFeature(cpu, .cf, bit(leaf.edx, 21)); } else { - setFeature(cpu, .avx512bf16, false); + for ([_]Target.x86.Feature{ + .sha512, + .sm3, + .sm4, + .raoint, + .avxvnni, + .avx512bf16, + .cmpccxadd, + .amx_fp16, + .hreset, + .avxifma, + + .avxvnniint8, + .avxneconvert, + .amx_complex, + .avxvnniint16, + .prefetchi, + .usermsr, + .avx10_1_256, + .egpr, + .push2pop2, + .ppx, + .ndd, + .ccmp, + .cf, + }) |feat| { + setFeature(cpu, feat, false); + } } } else { for ([_]Target.x86.Feature{ - .fsgsbase, .sgx, .bmi, .avx2, - .bmi2, .invpcid, .rtm, .avx512f, - .avx512dq, .rdseed, .adx, .avx512ifma, - .clflushopt, .clwb, .avx512pf, .avx512er, - .avx512cd, .sha, .avx512bw, .avx512vl, - .prefetchwt1, .avx512vbmi, .pku, .waitpkg, - .avx512vbmi2, .shstk, .gfni, .vaes, - .vpclmulqdq, .avx512vnni, .avx512bitalg, .avx512vpopcntdq, - .avx512vp2intersect, .rdpid, .cldemote, .movdiri, - .movdir64b, .enqcmd, .pconfig, .avx512bf16, + .fsgsbase, + .sgx, + .bmi, + .avx2, + .smep, + .bmi2, + .invpcid, + .rtm, + .avx512f, + .evex512, + .avx512dq, + .rdseed, + .adx, + .smap, + .avx512ifma, + .clflushopt, + .clwb, + .avx512pf, + .avx512er, + .avx512cd, + .sha, + .avx512bw, + .avx512vl, + + .prefetchwt1, + .avx512vbmi, + .pku, + .waitpkg, + .avx512vbmi2, + .shstk, + .gfni, + .vaes, + .vpclmulqdq, + .avx512vnni, + .avx512bitalg, + .avx512vpopcntdq, + .rdpid, + .kl, + .cldemote, + .movdiri, + .movdir64b, + .enqcmd, + + .uintr, + .avx512vp2intersect, + .serialize, + .tsxldtrk, + .pconfig, + .amx_bf16, + .avx512fp16, + .amx_tile, + .amx_int8, + + .sha512, + .sm3, + .sm4, + .raoint, + .avxvnni, + .avx512bf16, + .cmpccxadd, + .amx_fp16, + .hreset, + .avxifma, + + .avxvnniint8, + .avxneconvert, + .amx_complex, + .avxvnniint16, + .prefetchi, + .usermsr, + .avx10_1_256, + .egpr, + .push2pop2, + .ppx, + .ndd, + .ccmp, + .cf, }) |feat| { setFeature(cpu, feat, false); } @@ -520,21 +682,55 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { if (max_level >= 0xD and has_avx_save) { leaf = cpuid(0xD, 0x1); + // Only enable XSAVE if OS has enabled support for saving YMM state. setFeature(cpu, .xsaveopt, bit(leaf.eax, 0)); setFeature(cpu, .xsavec, bit(leaf.eax, 1)); setFeature(cpu, .xsaves, bit(leaf.eax, 3)); } else { - for ([_]Target.x86.Feature{ .xsaveopt, .xsavec, .xsaves }) |feat| { + for ([_]Target.x86.Feature{ + .xsaveopt, + .xsavec, + .xsaves, + }) |feat| { setFeature(cpu, feat, false); } } if (max_level >= 0x14) { leaf = cpuid(0x14, 0); + setFeature(cpu, .ptwrite, bit(leaf.ebx, 4)); } else { - setFeature(cpu, .ptwrite, false); + for ([_]Target.x86.Feature{ + .ptwrite, + }) |feat| { + setFeature(cpu, feat, false); + } + } + + if (max_level >= 0x19) { + leaf = cpuid(0x19, 0); + + setFeature(cpu, .widekl, bit(leaf.ebx, 2)); + } else { + for ([_]Target.x86.Feature{ + .widekl, + }) |feat| { + setFeature(cpu, feat, false); + } + } + + if (max_level >= 0x24) { + leaf = cpuid(0x24, 0); + + setFeature(cpu, .avx10_1_512, bit(leaf.ebx, 18)); + } else { + for ([_]Target.x86.Feature{ + .avx10_1_512, + }) |feat| { + setFeature(cpu, feat, false); + } } } |
