x86_64: implement clz and not

author: Jacob Young <jacobly0@users.noreply.github.com> 2024-12-28 22:51:41 -0500
committer: Jacob Young <jacobly0@users.noreply.github.com> 2025-01-16 20:42:08 -0500
commit: ac1a975f9b5a7d939663fa90556a2f038250c531 (patch)
tree: 9df8cf0677d30645b13249b6357240bdf533a67e /lib/std
parent: a7efc56d8680bb51cc2488bbc0680b2fc080174f (diff)
download: zig-ac1a975f9b5a7d939663fa90556a2f038250c531.tar.gz
zig-ac1a975f9b5a7d939663fa90556a2f038250c531.zip
4 files changed, 79 insertions, 9 deletions
diff --git a/lib/std/Target/Query.zig b/lib/std/Target/Query.zig
index 50db1fed5e..56387c27b3 100644
--- a/lib/std/Target/Query.zig
+++ b/lib/std/Target/Query.zig
@@ -6,13 +6,13 @@
 /// `null` means native.
 cpu_arch: ?Target.Cpu.Arch = null,
 
-cpu_model: CpuModel = CpuModel.determined_by_arch_os,
+cpu_model: CpuModel = .determined_by_arch_os,
 
 /// Sparse set of CPU features to add to the set from `cpu_model`.
-cpu_features_add: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty,
+cpu_features_add: Target.Cpu.Feature.Set = .empty,
 
 /// Sparse set of CPU features to remove from the set from `cpu_model`.
-cpu_features_sub: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty,
+cpu_features_sub: Target.Cpu.Feature.Set = .empty,
 
 /// `null` means native.
 os_tag: ?Target.Os.Tag = null,
@@ -38,7 +38,7 @@ abi: ?Target.Abi = null,
 
 /// When `os_tag` is `null`, then `null` means native. Otherwise it means the standard path
 /// based on the `os_tag`.
-dynamic_linker: Target.DynamicLinker = Target.DynamicLinker.none,
+dynamic_linker: Target.DynamicLinker = .none,
 
 /// `null` means default for the cpu/arch/os combo.
 ofmt: ?Target.ObjectFormat = null,
diff --git a/lib/std/Target/x86.zig b/lib/std/Target/x86.zig
index 86c334afda..76dddb222d 100644
--- a/lib/std/Target/x86.zig
+++ b/lib/std/Target/x86.zig
@@ -47,6 +47,7 @@ pub const Feature = enum {
     bmi2,
     branch_hint,
     branchfusion,
+    bsf_bsr_0_clobbers_result,
     ccmp,
     cf,
     cldemote,
@@ -167,6 +168,8 @@ pub const Feature = enum {
     slow_unaligned_mem_32,
     sm3,
     sm4,
+    smap,
+    smep,
     soft_float,
     sse,
     sse2,
@@ -497,6 +500,11 @@ pub const all_features = blk: {
         .description = "CMP/TEST can be fused with conditional branches",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@intFromEnum(Feature.bsf_bsr_0_clobbers_result)] = .{
+        .llvm_name = null,
+        .description = "BSF/BSR may clobber the lower 32-bits of the result register when the source is zero",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.ccmp)] = .{
         .llvm_name = "ccmp",
         .description = "Support conditional cmp & test instructions",
@@ -1127,6 +1135,16 @@ pub const all_features = blk: {
             .avx2,
         }),
     };
+    result[@intFromEnum(Feature.smap)] = .{
+        .llvm_name = null,
+        .description = "Enable Supervisor Mode Access Prevention",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@intFromEnum(Feature.smep)] = .{
+        .llvm_name = null,
+        .description = "Enable Supervisor Mode Execution Prevention",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@intFromEnum(Feature.soft_float)] = .{
         .llvm_name = "soft-float",
         .description = "Use software floating point features",
@@ -1371,6 +1389,8 @@ pub const cpu = struct {
             .sha,
             .shstk,
             .slow_3ops_lea,
+            .smap,
+            .smep,
             .tuning_fast_imm_vector_shift,
             .vaes,
             .vpclmulqdq,
@@ -1467,6 +1487,8 @@ pub const cpu = struct {
             .sha,
             .shstk,
             .slow_3ops_lea,
+            .smap,
+            .smep,
             .tuning_fast_imm_vector_shift,
             .uintr,
             .vaes,
@@ -1545,6 +1567,8 @@ pub const cpu = struct {
             .slow_3ops_lea,
             .sm3,
             .sm4,
+            .smap,
+            .smep,
             .tuning_fast_imm_vector_shift,
             .uintr,
             .vaes,
@@ -1783,6 +1807,8 @@ pub const cpu = struct {
             .sahf,
             .sbb_dep_breaking,
             .slow_shld,
+            .smap,
+            .smep,
             .sse4a,
             .vzeroupper,
             .x87,
@@ -1995,6 +2021,8 @@ pub const cpu = struct {
             .rdseed,
             .sahf,
             .slow_3ops_lea,
+            .smap,
+            .smep,
             .vzeroupper,
             .x87,
             .xsaveopt,
@@ -2136,6 +2164,8 @@ pub const cpu = struct {
             .sahf,
             .sha,
             .slow_3ops_lea,
+            .smap,
+            .smep,
             .tuning_fast_imm_vector_shift,
             .vzeroupper,
             .x87,
@@ -2195,6 +2225,8 @@ pub const cpu = struct {
             .rdseed,
             .sahf,
             .slow_3ops_lea,
+            .smap,
+            .smep,
             .tuning_fast_imm_vector_shift,
             .vzeroupper,
             .x87,
@@ -2450,6 +2482,8 @@ pub const cpu = struct {
             .serialize,
             .sha,
             .shstk,
+            .smap,
+            .smep,
             .tsxldtrk,
             .tuning_fast_imm_vector_shift,
             .uintr,
@@ -2519,6 +2553,8 @@ pub const cpu = struct {
             .slow_incdec,
             .slow_lea,
             .slow_two_mem_ops,
+            .smap,
+            .smep,
             .sse4_2,
             .use_glm_div_sqrt_costs,
             .vzeroupper,
@@ -2898,6 +2934,7 @@ pub const cpu = struct {
             .rdrnd,
             .sahf,
             .slow_3ops_lea,
+            .smep,
             .vzeroupper,
             .x87,
             .xsaveopt,
@@ -2907,6 +2944,7 @@ pub const cpu = struct {
         .name = "i386",
         .llvm_name = "i386",
         .features = featureSet(&[_]Feature{
+            .bsf_bsr_0_clobbers_result,
             .slow_unaligned_mem_16,
             .vzeroupper,
             .x87,
@@ -2916,6 +2954,7 @@ pub const cpu = struct {
         .name = "i486",
         .llvm_name = "i486",
         .features = featureSet(&[_]Feature{
+            .bsf_bsr_0_clobbers_result,
             .slow_unaligned_mem_16,
             .vzeroupper,
             .x87,
@@ -3096,6 +3135,7 @@ pub const cpu = struct {
             .sahf,
             .slow_3ops_lea,
             .slow_unaligned_mem_32,
+            .smep,
             .vzeroupper,
             .x87,
             .xsaveopt,
@@ -3403,6 +3443,8 @@ pub const cpu = struct {
             .sha,
             .shstk,
             .slow_3ops_lea,
+            .smap,
+            .smep,
             .tuning_fast_imm_vector_shift,
             .vaes,
             .vpclmulqdq,
@@ -3766,6 +3808,8 @@ pub const cpu = struct {
             .sha,
             .shstk,
             .slow_3ops_lea,
+            .smap,
+            .smep,
             .tuning_fast_imm_vector_shift,
             .vaes,
             .vpclmulqdq,
@@ -3831,6 +3875,8 @@ pub const cpu = struct {
             .rdseed,
             .sahf,
             .sha,
+            .smap,
+            .smep,
             .tuning_fast_imm_vector_shift,
             .vaes,
             .vpclmulqdq,
@@ -3939,6 +3985,8 @@ pub const cpu = struct {
             .serialize,
             .sha,
             .shstk,
+            .smap,
+            .smep,
             .tsxldtrk,
             .tuning_fast_imm_vector_shift,
             .uintr,
@@ -4042,6 +4090,7 @@ pub const cpu = struct {
             .slow_lea,
             .slow_pmulld,
             .slow_two_mem_ops,
+            .smep,
             .sse4_2,
             .use_slm_arith_costs,
             .vzeroupper,
@@ -4098,6 +4147,8 @@ pub const cpu = struct {
             .rdseed,
             .sahf,
             .slow_3ops_lea,
+            .smap,
+            .smep,
             .tuning_fast_imm_vector_shift,
             .vzeroupper,
             .x87,
@@ -4150,6 +4201,8 @@ pub const cpu = struct {
             .rdseed,
             .sahf,
             .slow_3ops_lea,
+            .smap,
+            .smep,
             .vzeroupper,
             .x87,
             .xsavec,
@@ -4305,6 +4358,8 @@ pub const cpu = struct {
             .sahf,
             .sha,
             .shstk,
+            .smap,
+            .smep,
             .tuning_fast_imm_vector_shift,
             .vaes,
             .vpclmulqdq,
@@ -4574,6 +4629,8 @@ pub const cpu = struct {
             .sbb_dep_breaking,
             .sha,
             .slow_shld,
+            .smap,
+            .smep,
             .sse4a,
             .vzeroupper,
             .x87,
@@ -4629,6 +4686,8 @@ pub const cpu = struct {
             .sbb_dep_breaking,
             .sha,
             .slow_shld,
+            .smap,
+            .smep,
             .sse4a,
             .vzeroupper,
             .wbnoinvd,
@@ -4686,6 +4745,8 @@ pub const cpu = struct {
             .sbb_dep_breaking,
             .sha,
             .slow_shld,
+            .smap,
+            .smep,
             .sse4a,
             .vaes,
             .vpclmulqdq,
@@ -4757,6 +4818,8 @@ pub const cpu = struct {
             .sha,
             .shstk,
             .slow_shld,
+            .smap,
+            .smep,
             .sse4a,
             .vaes,
             .vpclmulqdq,
@@ -4833,6 +4896,8 @@ pub const cpu = struct {
             .sha,
             .shstk,
             .slow_shld,
+            .smap,
+            .smep,
             .sse4a,
             .vaes,
             .vpclmulqdq,
diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 691ae02280..98d37d8994 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -2520,12 +2520,13 @@ pub const Const = struct {
         return order(a, b) == .eq;
     }
 
+    /// Returns the number of leading zeros in twos-complement form.
     pub fn clz(a: Const, bits: Limb) Limb {
-        // Limbs are stored in little-endian order but we need
-        // to iterate big-endian.
+        // Limbs are stored in little-endian order but we need to iterate big-endian.
+        if (!a.positive) return 0;
         var total_limb_lz: Limb = 0;
         var i: usize = a.limbs.len;
-        const bits_per_limb = @sizeOf(Limb) * 8;
+        const bits_per_limb = @bitSizeOf(Limb);
         while (i != 0) {
             i -= 1;
             const limb = a.limbs[i];
@@ -2537,13 +2538,15 @@ pub const Const = struct {
         return total_limb_lz + bits - total_limb_bits;
     }
 
+    /// Returns the number of trailing zeros in twos-complement form.
     pub fn ctz(a: Const, bits: Limb) Limb {
-        // Limbs are stored in little-endian order.
+        // Limbs are stored in little-endian order. Converting a negative number to twos-complement
+        // flips all bits above the lowest set bit, which does not affect the trailing zero count.
         var result: Limb = 0;
         for (a.limbs) |limb| {
             const limb_tz = @ctz(limb);
             result += limb_tz;
-            if (limb_tz != @sizeOf(Limb) * 8) break;
+            if (limb_tz != @bitSizeOf(Limb)) break;
         }
         return @min(result, bits);
     }
diff --git a/lib/std/zig/system/x86.zig b/lib/std/zig/system/x86.zig
index 7bd1148e13..2737c67d0c 100644
--- a/lib/std/zig/system/x86.zig
+++ b/lib/std/zig/system/x86.zig
@@ -369,6 +369,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
         setFeature(cpu, .bmi, bit(leaf.ebx, 3));
         // AVX2 is only supported if we have the OS save support from AVX.
         setFeature(cpu, .avx2, bit(leaf.ebx, 5) and has_avx_save);
+        setFeature(cpu, .smep, bit(leaf.ebx, 7));
         setFeature(cpu, .bmi2, bit(leaf.ebx, 8));
         setFeature(cpu, .invpcid, bit(leaf.ebx, 10));
         setFeature(cpu, .rtm, bit(leaf.ebx, 11));
@@ -377,6 +378,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
         setFeature(cpu, .avx512dq, bit(leaf.ebx, 17) and has_avx512_save);
         setFeature(cpu, .rdseed, bit(leaf.ebx, 18));
         setFeature(cpu, .adx, bit(leaf.ebx, 19));
+        setFeature(cpu, .smap, bit(leaf.ebx, 20));
         setFeature(cpu, .avx512ifma, bit(leaf.ebx, 21) and has_avx512_save);
         setFeature(cpu, .clflushopt, bit(leaf.ebx, 23));
         setFeature(cpu, .clwb, bit(leaf.ebx, 24));
author	Jacob Young <jacobly0@users.noreply.github.com>	2024-12-28 22:51:41 -0500
committer	Jacob Young <jacobly0@users.noreply.github.com>	2025-01-16 20:42:08 -0500
commit	ac1a975f9b5a7d939663fa90556a2f038250c531 (patch)
tree	9df8cf0677d30645b13249b6357240bdf533a67e /lib/std
parent	a7efc56d8680bb51cc2488bbc0680b2fc080174f (diff)
download	zig-ac1a975f9b5a7d939663fa90556a2f038250c531.tar.gz zig-ac1a975f9b5a7d939663fa90556a2f038250c531.zip