aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjoachimschmidt557 <joachim.schmidt557@outlook.com>2023-01-03 19:20:55 +0800
committerJakub Konka <kubkon@jakubkonka.com>2023-01-03 19:56:09 +0100
commit09122650bab893c721f4fa1a852af93358ed1af2 (patch)
tree2d711729a7bf1ef2e2c95d17cab2af1789dfbfb7 /src
parent8032ecb73040f837d7232216b16ae7edaacbc867 (diff)
downloadzig-09122650bab893c721f4fa1a852af93358ed1af2.tar.gz
zig-09122650bab893c721f4fa1a852af93358ed1af2.zip
stage2 AArch64: bump up alignment of stack items fitting in regs
This enables us to use more efficient loading and storing for these small stack items
Diffstat (limited to 'src')
-rw-r--r--src/arch/aarch64/CodeGen.zig27
1 files changed, 18 insertions, 9 deletions
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index 3aee86c687..b333ffc666 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -433,7 +433,7 @@ pub fn generate(
.prev_di_pc = 0,
.prev_di_line = module_fn.lbrace_line,
.prev_di_column = module_fn.lbrace_column,
- .stack_size = mem.alignForwardGeneric(u32, function.max_end_stack, function.stack_align),
+ .stack_size = function.max_end_stack,
.saved_regs_stack_space = function.saved_regs_stack_space,
};
defer emit.deinit();
@@ -560,6 +560,7 @@ fn gen(self: *Self) !void {
const total_stack_size = self.max_end_stack + self.saved_regs_stack_space;
const aligned_total_stack_end = mem.alignForwardGeneric(u32, total_stack_size, self.stack_align);
const stack_size = aligned_total_stack_end - self.saved_regs_stack_space;
+ self.max_end_stack = stack_size;
if (math.cast(u12, stack_size)) |size| {
self.mir_instructions.set(backpatch_reloc, .{
.tag = .sub_immediate,
@@ -982,11 +983,16 @@ fn allocMem(
assert(abi_size > 0);
assert(abi_align > 0);
- if (abi_align > self.stack_align)
- self.stack_align = abi_align;
+ // In order to efficiently load and store stack items that fit
+ // into registers, we bump up the alignment to the next power of
+ // two.
+ const adjusted_align = if (abi_size > 8)
+ abi_align
+ else
+ std.math.ceilPowerOfTwoAssert(u32, abi_size);
// TODO find a free slot instead of always appending
- const offset = mem.alignForwardGeneric(u32, self.next_stack_offset, abi_align) + abi_size;
+ const offset = mem.alignForwardGeneric(u32, self.next_stack_offset, adjusted_align) + abi_size;
self.next_stack_offset = offset;
self.max_end_stack = @max(self.max_end_stack, self.next_stack_offset);
@@ -5396,7 +5402,7 @@ fn setRegOrMem(self: *Self, ty: Type, loc: MCValue, val: MCValue) !void {
}
fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void {
- const abi_size = ty.abiSize(self.target.*);
+ const abi_size = @intCast(u32, ty.abiSize(self.target.*));
switch (mcv) {
.dead => unreachable,
.unreach, .none => return, // Nothing to do.
@@ -5404,7 +5410,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
if (!self.wantSafety())
return; // The already existing value will do just fine.
// TODO Upgrade this to a memset call when we have that available.
- switch (ty.abiSize(self.target.*)) {
+ switch (abi_size) {
1 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaa }),
2 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaa }),
4 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaa }),
@@ -5426,6 +5432,8 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
.register => |reg| {
switch (abi_size) {
1, 2, 4, 8 => {
+ assert(std.mem.isAlignedGeneric(u32, stack_offset, abi_size));
+
const tag: Mir.Inst.Tag = switch (abi_size) {
1 => .strb_stack,
2 => .strh_stack,
@@ -5438,7 +5446,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
.tag = tag,
.data = .{ .load_store_stack = .{
.rt = rt,
- .offset = @intCast(u32, stack_offset),
+ .offset = stack_offset,
} },
});
},
@@ -6001,9 +6009,10 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void {
}
fn airShuffle(self: *Self, inst: Air.Inst.Index) !void {
- const ty_op = self.air.instructions.items(.data)[inst].ty_op;
+ const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
+ const extra = self.air.extraData(Air.Shuffle, ty_pl.payload).data;
const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement airShuffle for {}", .{self.target.cpu.arch});
- return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
+ return self.finishAir(inst, result, .{ extra.a, extra.b, .none });
}
fn airReduce(self: *Self, inst: Air.Inst.Index) !void {