LLVM: elide some loads when lowering

Generally, the load instruction may need to make a copy of an isByRef=true value, such as in the case of the following code: ```zig pub fn swap(comptime T: type, a: *T, b: *T) void { const tmp = a.*; a.* = b.*; b.* = tmp; } ``` However, it only needs to do so if there are any instructions which can possibly write to memory. When calling functions with isByRef=true parameters, the AIR code that is generated looks like loads followed directly by call. This allows for a peephole optimization when lowering loads: if the load instruction operates on an isByRef=true type and dies before any side effects occur, then we can safely lower the load as a no-op that returns its operand. This is one out of three changes I intend to make to address #11498. However I will put these changes in separate branches and merge them separately so that we can have three independent points on the perf charts.
author: Andrew Kelley <andrew@ziglang.org> 2022-05-31 00:19:23 -0700
committer: Andrew Kelley <andrew@ziglang.org> 2022-05-31 14:17:54 -0400
commit: d410693dadfe791e616e78239fa0cec707b95cfa (patch)
tree: cc797ccc0b13a5b65d676a7aac78c88f3c42fa38 /src/codegen/llvm.zig
parent: 26aea8cfa1ea98329e18458cb23be2d60ef52507 (diff)
download: zig-d410693dadfe791e616e78239fa0cec707b95cfa.tar.gz
zig-d410693dadfe791e616e78239fa0cec707b95cfa.zip
1 files changed, 27 insertions, 5 deletions
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 8cfa833cdd..cca83eca9b 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -3885,7 +3885,7 @@ pub const FuncGen = struct {
 
     fn genBody(self: *FuncGen, body: []const Air.Inst.Index) Error!void {
         const air_tags = self.air.instructions.items(.tag);
-        for (body) |inst| {
+        for (body) |inst, i| {
             const opt_value: ?*const llvm.Value = switch (air_tags[inst]) {
                 // zig fmt: off
                 .add       => try self.airAdd(inst),
@@ -3976,7 +3976,7 @@ pub const FuncGen = struct {
                 .fptrunc        => try self.airFptrunc(inst),
                 .fpext          => try self.airFpext(inst),
                 .ptrtoint       => try self.airPtrToInt(inst),
-                .load           => try self.airLoad(inst),
+                .load           => try self.airLoad(inst, body, i + 1),
                 .loop           => try self.airLoop(inst),
                 .not            => try self.airNot(inst),
                 .ret            => try self.airRet(inst),
@@ -6982,11 +6982,33 @@ pub const FuncGen = struct {
         return null;
     }
 
-    fn airLoad(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+    fn airLoad(
+        self: *FuncGen,
+        inst: Air.Inst.Index,
+        body: []const Air.Inst.Index,
+        body_i: usize,
+    ) !?*const llvm.Value {
         const ty_op = self.air.instructions.items(.data)[inst].ty_op;
         const ptr_ty = self.air.typeOf(ty_op.operand);
-        if (!ptr_ty.isVolatilePtr() and self.liveness.isUnused(inst))
-            return null;
+        elide: {
+            const ptr_info = ptr_ty.ptrInfo().data;
+            if (ptr_info.@"volatile") break :elide;
+            if (self.liveness.isUnused(inst)) return null;
+            if (!isByRef(ptr_info.pointee_type)) break :elide;
+
+            // It would be valid to fall back to the code below here that simply calls
+            // load(). However, as an optimization, we want to avoid unnecessary copies
+            // of isByRef=true types. Here, we scan forward in the current block,
+            // looking to see if this load dies before any side effects occur.
+            // In such case, we can safely return the operand without making a copy.
+            for (body[body_i..]) |body_inst| {
+                switch (self.liveness.categorizeOperand(self.air, body_inst, inst)) {
+                    .none => continue,
+                    .write, .noret, .complex => break :elide,
+                    .tomb => return try self.resolveInst(ty_op.operand),
+                }
+            } else unreachable;
+        }
         const ptr = try self.resolveInst(ty_op.operand);
         return self.load(ptr, ptr_ty);
     }
author	Andrew Kelley <andrew@ziglang.org>	2022-05-31 00:19:23 -0700
committer	Andrew Kelley <andrew@ziglang.org>	2022-05-31 14:17:54 -0400
commit	d410693dadfe791e616e78239fa0cec707b95cfa (patch)
tree	cc797ccc0b13a5b65d676a7aac78c88f3c42fa38 /src/codegen/llvm.zig
parent	26aea8cfa1ea98329e18458cb23be2d60ef52507 (diff)
download	zig-d410693dadfe791e616e78239fa0cec707b95cfa.tar.gz zig-d410693dadfe791e616e78239fa0cec707b95cfa.zip