diff options
| author | Jacob Young <jacobly0@users.noreply.github.com> | 2024-12-28 22:51:41 -0500 |
|---|---|---|
| committer | Jacob Young <jacobly0@users.noreply.github.com> | 2025-01-16 20:42:08 -0500 |
| commit | ac1a975f9b5a7d939663fa90556a2f038250c531 (patch) | |
| tree | 9df8cf0677d30645b13249b6357240bdf533a67e /lib/std | |
| parent | a7efc56d8680bb51cc2488bbc0680b2fc080174f (diff) | |
| download | zig-ac1a975f9b5a7d939663fa90556a2f038250c531.tar.gz zig-ac1a975f9b5a7d939663fa90556a2f038250c531.zip | |
x86_64: implement clz and not
Diffstat (limited to 'lib/std')
| -rw-r--r-- | lib/std/Target/Query.zig | 8 | ||||
| -rw-r--r-- | lib/std/Target/x86.zig | 65 | ||||
| -rw-r--r-- | lib/std/math/big/int.zig | 13 | ||||
| -rw-r--r-- | lib/std/zig/system/x86.zig | 2 |
4 files changed, 79 insertions, 9 deletions
diff --git a/lib/std/Target/Query.zig b/lib/std/Target/Query.zig index 50db1fed5e..56387c27b3 100644 --- a/lib/std/Target/Query.zig +++ b/lib/std/Target/Query.zig @@ -6,13 +6,13 @@ /// `null` means native. cpu_arch: ?Target.Cpu.Arch = null, -cpu_model: CpuModel = CpuModel.determined_by_arch_os, +cpu_model: CpuModel = .determined_by_arch_os, /// Sparse set of CPU features to add to the set from `cpu_model`. -cpu_features_add: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty, +cpu_features_add: Target.Cpu.Feature.Set = .empty, /// Sparse set of CPU features to remove from the set from `cpu_model`. -cpu_features_sub: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty, +cpu_features_sub: Target.Cpu.Feature.Set = .empty, /// `null` means native. os_tag: ?Target.Os.Tag = null, @@ -38,7 +38,7 @@ abi: ?Target.Abi = null, /// When `os_tag` is `null`, then `null` means native. Otherwise it means the standard path /// based on the `os_tag`. -dynamic_linker: Target.DynamicLinker = Target.DynamicLinker.none, +dynamic_linker: Target.DynamicLinker = .none, /// `null` means default for the cpu/arch/os combo. ofmt: ?Target.ObjectFormat = null, diff --git a/lib/std/Target/x86.zig b/lib/std/Target/x86.zig index 86c334afda..76dddb222d 100644 --- a/lib/std/Target/x86.zig +++ b/lib/std/Target/x86.zig @@ -47,6 +47,7 @@ pub const Feature = enum { bmi2, branch_hint, branchfusion, + bsf_bsr_0_clobbers_result, ccmp, cf, cldemote, @@ -167,6 +168,8 @@ pub const Feature = enum { slow_unaligned_mem_32, sm3, sm4, + smap, + smep, soft_float, sse, sse2, @@ -497,6 +500,11 @@ pub const all_features = blk: { .description = "CMP/TEST can be fused with conditional branches", .dependencies = featureSet(&[_]Feature{}), }; + result[@intFromEnum(Feature.bsf_bsr_0_clobbers_result)] = .{ + .llvm_name = null, + .description = "BSF/BSR may clobber the lower 32-bits of the result register when the source is zero", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.ccmp)] = .{ .llvm_name = "ccmp", .description = "Support conditional cmp & test instructions", @@ -1127,6 +1135,16 @@ pub const all_features = blk: { .avx2, }), }; + result[@intFromEnum(Feature.smap)] = .{ + .llvm_name = null, + .description = "Enable Supervisor Mode Access Prevention", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.smep)] = .{ + .llvm_name = null, + .description = "Enable Supervisor Mode Execution Prevention", + .dependencies = featureSet(&[_]Feature{}), + }; result[@intFromEnum(Feature.soft_float)] = .{ .llvm_name = "soft-float", .description = "Use software floating point features", @@ -1371,6 +1389,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -1467,6 +1487,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .uintr, .vaes, @@ -1545,6 +1567,8 @@ pub const cpu = struct { .slow_3ops_lea, .sm3, .sm4, + .smap, + .smep, .tuning_fast_imm_vector_shift, .uintr, .vaes, @@ -1783,6 +1807,8 @@ pub const cpu = struct { .sahf, .sbb_dep_breaking, .slow_shld, + .smap, + .smep, .sse4a, .vzeroupper, .x87, @@ -1995,6 +2021,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .vzeroupper, .x87, .xsaveopt, @@ -2136,6 +2164,8 @@ pub const cpu = struct { .sahf, .sha, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vzeroupper, .x87, @@ -2195,6 +2225,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vzeroupper, .x87, @@ -2450,6 +2482,8 @@ pub const cpu = struct { .serialize, .sha, .shstk, + .smap, + .smep, .tsxldtrk, .tuning_fast_imm_vector_shift, .uintr, @@ -2519,6 +2553,8 @@ pub const cpu = struct { .slow_incdec, .slow_lea, .slow_two_mem_ops, + .smap, + .smep, .sse4_2, .use_glm_div_sqrt_costs, .vzeroupper, @@ -2898,6 +2934,7 @@ pub const cpu = struct { .rdrnd, .sahf, .slow_3ops_lea, + .smep, .vzeroupper, .x87, .xsaveopt, @@ -2907,6 +2944,7 @@ pub const cpu = struct { .name = "i386", .llvm_name = "i386", .features = featureSet(&[_]Feature{ + .bsf_bsr_0_clobbers_result, .slow_unaligned_mem_16, .vzeroupper, .x87, @@ -2916,6 +2954,7 @@ pub const cpu = struct { .name = "i486", .llvm_name = "i486", .features = featureSet(&[_]Feature{ + .bsf_bsr_0_clobbers_result, .slow_unaligned_mem_16, .vzeroupper, .x87, @@ -3096,6 +3135,7 @@ pub const cpu = struct { .sahf, .slow_3ops_lea, .slow_unaligned_mem_32, + .smep, .vzeroupper, .x87, .xsaveopt, @@ -3403,6 +3443,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -3766,6 +3808,8 @@ pub const cpu = struct { .sha, .shstk, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -3831,6 +3875,8 @@ pub const cpu = struct { .rdseed, .sahf, .sha, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -3939,6 +3985,8 @@ pub const cpu = struct { .serialize, .sha, .shstk, + .smap, + .smep, .tsxldtrk, .tuning_fast_imm_vector_shift, .uintr, @@ -4042,6 +4090,7 @@ pub const cpu = struct { .slow_lea, .slow_pmulld, .slow_two_mem_ops, + .smep, .sse4_2, .use_slm_arith_costs, .vzeroupper, @@ -4098,6 +4147,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vzeroupper, .x87, @@ -4150,6 +4201,8 @@ pub const cpu = struct { .rdseed, .sahf, .slow_3ops_lea, + .smap, + .smep, .vzeroupper, .x87, .xsavec, @@ -4305,6 +4358,8 @@ pub const cpu = struct { .sahf, .sha, .shstk, + .smap, + .smep, .tuning_fast_imm_vector_shift, .vaes, .vpclmulqdq, @@ -4574,6 +4629,8 @@ pub const cpu = struct { .sbb_dep_breaking, .sha, .slow_shld, + .smap, + .smep, .sse4a, .vzeroupper, .x87, @@ -4629,6 +4686,8 @@ pub const cpu = struct { .sbb_dep_breaking, .sha, .slow_shld, + .smap, + .smep, .sse4a, .vzeroupper, .wbnoinvd, @@ -4686,6 +4745,8 @@ pub const cpu = struct { .sbb_dep_breaking, .sha, .slow_shld, + .smap, + .smep, .sse4a, .vaes, .vpclmulqdq, @@ -4757,6 +4818,8 @@ pub const cpu = struct { .sha, .shstk, .slow_shld, + .smap, + .smep, .sse4a, .vaes, .vpclmulqdq, @@ -4833,6 +4896,8 @@ pub const cpu = struct { .sha, .shstk, .slow_shld, + .smap, + .smep, .sse4a, .vaes, .vpclmulqdq, diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index 691ae02280..98d37d8994 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -2520,12 +2520,13 @@ pub const Const = struct { return order(a, b) == .eq; } + /// Returns the number of leading zeros in twos-complement form. pub fn clz(a: Const, bits: Limb) Limb { - // Limbs are stored in little-endian order but we need - // to iterate big-endian. + // Limbs are stored in little-endian order but we need to iterate big-endian. + if (!a.positive) return 0; var total_limb_lz: Limb = 0; var i: usize = a.limbs.len; - const bits_per_limb = @sizeOf(Limb) * 8; + const bits_per_limb = @bitSizeOf(Limb); while (i != 0) { i -= 1; const limb = a.limbs[i]; @@ -2537,13 +2538,15 @@ pub const Const = struct { return total_limb_lz + bits - total_limb_bits; } + /// Returns the number of trailing zeros in twos-complement form. pub fn ctz(a: Const, bits: Limb) Limb { - // Limbs are stored in little-endian order. + // Limbs are stored in little-endian order. Converting a negative number to twos-complement + // flips all bits above the lowest set bit, which does not affect the trailing zero count. var result: Limb = 0; for (a.limbs) |limb| { const limb_tz = @ctz(limb); result += limb_tz; - if (limb_tz != @sizeOf(Limb) * 8) break; + if (limb_tz != @bitSizeOf(Limb)) break; } return @min(result, bits); } diff --git a/lib/std/zig/system/x86.zig b/lib/std/zig/system/x86.zig index 7bd1148e13..2737c67d0c 100644 --- a/lib/std/zig/system/x86.zig +++ b/lib/std/zig/system/x86.zig @@ -369,6 +369,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { setFeature(cpu, .bmi, bit(leaf.ebx, 3)); // AVX2 is only supported if we have the OS save support from AVX. setFeature(cpu, .avx2, bit(leaf.ebx, 5) and has_avx_save); + setFeature(cpu, .smep, bit(leaf.ebx, 7)); setFeature(cpu, .bmi2, bit(leaf.ebx, 8)); setFeature(cpu, .invpcid, bit(leaf.ebx, 10)); setFeature(cpu, .rtm, bit(leaf.ebx, 11)); @@ -377,6 +378,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void { setFeature(cpu, .avx512dq, bit(leaf.ebx, 17) and has_avx512_save); setFeature(cpu, .rdseed, bit(leaf.ebx, 18)); setFeature(cpu, .adx, bit(leaf.ebx, 19)); + setFeature(cpu, .smap, bit(leaf.ebx, 20)); setFeature(cpu, .avx512ifma, bit(leaf.ebx, 21) and has_avx512_save); setFeature(cpu, .clflushopt, bit(leaf.ebx, 23)); setFeature(cpu, .clwb, bit(leaf.ebx, 24)); |
