aboutsummaryrefslogtreecommitdiff
path: root/lib/std
diff options
context:
space:
mode:
authorJacob Young <jacobly0@users.noreply.github.com>2024-12-28 22:51:41 -0500
committerJacob Young <jacobly0@users.noreply.github.com>2025-01-16 20:42:08 -0500
commitac1a975f9b5a7d939663fa90556a2f038250c531 (patch)
tree9df8cf0677d30645b13249b6357240bdf533a67e /lib/std
parenta7efc56d8680bb51cc2488bbc0680b2fc080174f (diff)
downloadzig-ac1a975f9b5a7d939663fa90556a2f038250c531.tar.gz
zig-ac1a975f9b5a7d939663fa90556a2f038250c531.zip
x86_64: implement clz and not
Diffstat (limited to 'lib/std')
-rw-r--r--lib/std/Target/Query.zig8
-rw-r--r--lib/std/Target/x86.zig65
-rw-r--r--lib/std/math/big/int.zig13
-rw-r--r--lib/std/zig/system/x86.zig2
4 files changed, 79 insertions, 9 deletions
diff --git a/lib/std/Target/Query.zig b/lib/std/Target/Query.zig
index 50db1fed5e..56387c27b3 100644
--- a/lib/std/Target/Query.zig
+++ b/lib/std/Target/Query.zig
@@ -6,13 +6,13 @@
/// `null` means native.
cpu_arch: ?Target.Cpu.Arch = null,
-cpu_model: CpuModel = CpuModel.determined_by_arch_os,
+cpu_model: CpuModel = .determined_by_arch_os,
/// Sparse set of CPU features to add to the set from `cpu_model`.
-cpu_features_add: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty,
+cpu_features_add: Target.Cpu.Feature.Set = .empty,
/// Sparse set of CPU features to remove from the set from `cpu_model`.
-cpu_features_sub: Target.Cpu.Feature.Set = Target.Cpu.Feature.Set.empty,
+cpu_features_sub: Target.Cpu.Feature.Set = .empty,
/// `null` means native.
os_tag: ?Target.Os.Tag = null,
@@ -38,7 +38,7 @@ abi: ?Target.Abi = null,
/// When `os_tag` is `null`, then `null` means native. Otherwise it means the standard path
/// based on the `os_tag`.
-dynamic_linker: Target.DynamicLinker = Target.DynamicLinker.none,
+dynamic_linker: Target.DynamicLinker = .none,
/// `null` means default for the cpu/arch/os combo.
ofmt: ?Target.ObjectFormat = null,
diff --git a/lib/std/Target/x86.zig b/lib/std/Target/x86.zig
index 86c334afda..76dddb222d 100644
--- a/lib/std/Target/x86.zig
+++ b/lib/std/Target/x86.zig
@@ -47,6 +47,7 @@ pub const Feature = enum {
bmi2,
branch_hint,
branchfusion,
+ bsf_bsr_0_clobbers_result,
ccmp,
cf,
cldemote,
@@ -167,6 +168,8 @@ pub const Feature = enum {
slow_unaligned_mem_32,
sm3,
sm4,
+ smap,
+ smep,
soft_float,
sse,
sse2,
@@ -497,6 +500,11 @@ pub const all_features = blk: {
.description = "CMP/TEST can be fused with conditional branches",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@intFromEnum(Feature.bsf_bsr_0_clobbers_result)] = .{
+ .llvm_name = null,
+ .description = "BSF/BSR may clobber the lower 32-bits of the result register when the source is zero",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@intFromEnum(Feature.ccmp)] = .{
.llvm_name = "ccmp",
.description = "Support conditional cmp & test instructions",
@@ -1127,6 +1135,16 @@ pub const all_features = blk: {
.avx2,
}),
};
+ result[@intFromEnum(Feature.smap)] = .{
+ .llvm_name = null,
+ .description = "Enable Supervisor Mode Access Prevention",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.smep)] = .{
+ .llvm_name = null,
+ .description = "Enable Supervisor Mode Execution Prevention",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@intFromEnum(Feature.soft_float)] = .{
.llvm_name = "soft-float",
.description = "Use software floating point features",
@@ -1371,6 +1389,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
@@ -1467,6 +1487,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.uintr,
.vaes,
@@ -1545,6 +1567,8 @@ pub const cpu = struct {
.slow_3ops_lea,
.sm3,
.sm4,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.uintr,
.vaes,
@@ -1783,6 +1807,8 @@ pub const cpu = struct {
.sahf,
.sbb_dep_breaking,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vzeroupper,
.x87,
@@ -1995,6 +2021,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
+ .smap,
+ .smep,
.vzeroupper,
.x87,
.xsaveopt,
@@ -2136,6 +2164,8 @@ pub const cpu = struct {
.sahf,
.sha,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vzeroupper,
.x87,
@@ -2195,6 +2225,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vzeroupper,
.x87,
@@ -2450,6 +2482,8 @@ pub const cpu = struct {
.serialize,
.sha,
.shstk,
+ .smap,
+ .smep,
.tsxldtrk,
.tuning_fast_imm_vector_shift,
.uintr,
@@ -2519,6 +2553,8 @@ pub const cpu = struct {
.slow_incdec,
.slow_lea,
.slow_two_mem_ops,
+ .smap,
+ .smep,
.sse4_2,
.use_glm_div_sqrt_costs,
.vzeroupper,
@@ -2898,6 +2934,7 @@ pub const cpu = struct {
.rdrnd,
.sahf,
.slow_3ops_lea,
+ .smep,
.vzeroupper,
.x87,
.xsaveopt,
@@ -2907,6 +2944,7 @@ pub const cpu = struct {
.name = "i386",
.llvm_name = "i386",
.features = featureSet(&[_]Feature{
+ .bsf_bsr_0_clobbers_result,
.slow_unaligned_mem_16,
.vzeroupper,
.x87,
@@ -2916,6 +2954,7 @@ pub const cpu = struct {
.name = "i486",
.llvm_name = "i486",
.features = featureSet(&[_]Feature{
+ .bsf_bsr_0_clobbers_result,
.slow_unaligned_mem_16,
.vzeroupper,
.x87,
@@ -3096,6 +3135,7 @@ pub const cpu = struct {
.sahf,
.slow_3ops_lea,
.slow_unaligned_mem_32,
+ .smep,
.vzeroupper,
.x87,
.xsaveopt,
@@ -3403,6 +3443,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
@@ -3766,6 +3808,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
@@ -3831,6 +3875,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.sha,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
@@ -3939,6 +3985,8 @@ pub const cpu = struct {
.serialize,
.sha,
.shstk,
+ .smap,
+ .smep,
.tsxldtrk,
.tuning_fast_imm_vector_shift,
.uintr,
@@ -4042,6 +4090,7 @@ pub const cpu = struct {
.slow_lea,
.slow_pmulld,
.slow_two_mem_ops,
+ .smep,
.sse4_2,
.use_slm_arith_costs,
.vzeroupper,
@@ -4098,6 +4147,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vzeroupper,
.x87,
@@ -4150,6 +4201,8 @@ pub const cpu = struct {
.rdseed,
.sahf,
.slow_3ops_lea,
+ .smap,
+ .smep,
.vzeroupper,
.x87,
.xsavec,
@@ -4305,6 +4358,8 @@ pub const cpu = struct {
.sahf,
.sha,
.shstk,
+ .smap,
+ .smep,
.tuning_fast_imm_vector_shift,
.vaes,
.vpclmulqdq,
@@ -4574,6 +4629,8 @@ pub const cpu = struct {
.sbb_dep_breaking,
.sha,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vzeroupper,
.x87,
@@ -4629,6 +4686,8 @@ pub const cpu = struct {
.sbb_dep_breaking,
.sha,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vzeroupper,
.wbnoinvd,
@@ -4686,6 +4745,8 @@ pub const cpu = struct {
.sbb_dep_breaking,
.sha,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vaes,
.vpclmulqdq,
@@ -4757,6 +4818,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vaes,
.vpclmulqdq,
@@ -4833,6 +4896,8 @@ pub const cpu = struct {
.sha,
.shstk,
.slow_shld,
+ .smap,
+ .smep,
.sse4a,
.vaes,
.vpclmulqdq,
diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 691ae02280..98d37d8994 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -2520,12 +2520,13 @@ pub const Const = struct {
return order(a, b) == .eq;
}
+ /// Returns the number of leading zeros in twos-complement form.
pub fn clz(a: Const, bits: Limb) Limb {
- // Limbs are stored in little-endian order but we need
- // to iterate big-endian.
+ // Limbs are stored in little-endian order but we need to iterate big-endian.
+ if (!a.positive) return 0;
var total_limb_lz: Limb = 0;
var i: usize = a.limbs.len;
- const bits_per_limb = @sizeOf(Limb) * 8;
+ const bits_per_limb = @bitSizeOf(Limb);
while (i != 0) {
i -= 1;
const limb = a.limbs[i];
@@ -2537,13 +2538,15 @@ pub const Const = struct {
return total_limb_lz + bits - total_limb_bits;
}
+ /// Returns the number of trailing zeros in twos-complement form.
pub fn ctz(a: Const, bits: Limb) Limb {
- // Limbs are stored in little-endian order.
+ // Limbs are stored in little-endian order. Converting a negative number to twos-complement
+ // flips all bits above the lowest set bit, which does not affect the trailing zero count.
var result: Limb = 0;
for (a.limbs) |limb| {
const limb_tz = @ctz(limb);
result += limb_tz;
- if (limb_tz != @sizeOf(Limb) * 8) break;
+ if (limb_tz != @bitSizeOf(Limb)) break;
}
return @min(result, bits);
}
diff --git a/lib/std/zig/system/x86.zig b/lib/std/zig/system/x86.zig
index 7bd1148e13..2737c67d0c 100644
--- a/lib/std/zig/system/x86.zig
+++ b/lib/std/zig/system/x86.zig
@@ -369,6 +369,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
setFeature(cpu, .bmi, bit(leaf.ebx, 3));
// AVX2 is only supported if we have the OS save support from AVX.
setFeature(cpu, .avx2, bit(leaf.ebx, 5) and has_avx_save);
+ setFeature(cpu, .smep, bit(leaf.ebx, 7));
setFeature(cpu, .bmi2, bit(leaf.ebx, 8));
setFeature(cpu, .invpcid, bit(leaf.ebx, 10));
setFeature(cpu, .rtm, bit(leaf.ebx, 11));
@@ -377,6 +378,7 @@ fn detectNativeFeatures(cpu: *Target.Cpu, os_tag: Target.Os.Tag) void {
setFeature(cpu, .avx512dq, bit(leaf.ebx, 17) and has_avx512_save);
setFeature(cpu, .rdseed, bit(leaf.ebx, 18));
setFeature(cpu, .adx, bit(leaf.ebx, 19));
+ setFeature(cpu, .smap, bit(leaf.ebx, 20));
setFeature(cpu, .avx512ifma, bit(leaf.ebx, 21) and has_avx512_save);
setFeature(cpu, .clflushopt, bit(leaf.ebx, 23));
setFeature(cpu, .clwb, bit(leaf.ebx, 24));