diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2022-05-31 00:19:23 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2022-05-31 14:17:54 -0400 |
| commit | d410693dadfe791e616e78239fa0cec707b95cfa (patch) | |
| tree | cc797ccc0b13a5b65d676a7aac78c88f3c42fa38 /src/codegen/llvm.zig | |
| parent | 26aea8cfa1ea98329e18458cb23be2d60ef52507 (diff) | |
| download | zig-d410693dadfe791e616e78239fa0cec707b95cfa.tar.gz zig-d410693dadfe791e616e78239fa0cec707b95cfa.zip | |
LLVM: elide some loads when lowering
Generally, the load instruction may need to make a copy of an
isByRef=true value, such as in the case of the following code:
```zig
pub fn swap(comptime T: type, a: *T, b: *T) void {
const tmp = a.*;
a.* = b.*;
b.* = tmp;
}
```
However, it only needs to do so if there are any instructions which can
possibly write to memory. When calling functions with isByRef=true
parameters, the AIR code that is generated looks like loads followed
directly by call.
This allows for a peephole optimization when lowering loads: if the load
instruction operates on an isByRef=true type and dies before any side effects
occur, then we can safely lower the load as a no-op that returns its
operand.
This is one out of three changes I intend to make to address #11498.
However I will put these changes in separate branches and merge them
separately so that we can have three independent points on the perf
charts.
Diffstat (limited to 'src/codegen/llvm.zig')
| -rw-r--r-- | src/codegen/llvm.zig | 32 |
1 files changed, 27 insertions, 5 deletions
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 8cfa833cdd..cca83eca9b 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -3885,7 +3885,7 @@ pub const FuncGen = struct { fn genBody(self: *FuncGen, body: []const Air.Inst.Index) Error!void { const air_tags = self.air.instructions.items(.tag); - for (body) |inst| { + for (body) |inst, i| { const opt_value: ?*const llvm.Value = switch (air_tags[inst]) { // zig fmt: off .add => try self.airAdd(inst), @@ -3976,7 +3976,7 @@ pub const FuncGen = struct { .fptrunc => try self.airFptrunc(inst), .fpext => try self.airFpext(inst), .ptrtoint => try self.airPtrToInt(inst), - .load => try self.airLoad(inst), + .load => try self.airLoad(inst, body, i + 1), .loop => try self.airLoop(inst), .not => try self.airNot(inst), .ret => try self.airRet(inst), @@ -6982,11 +6982,33 @@ pub const FuncGen = struct { return null; } - fn airLoad(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airLoad( + self: *FuncGen, + inst: Air.Inst.Index, + body: []const Air.Inst.Index, + body_i: usize, + ) !?*const llvm.Value { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const ptr_ty = self.air.typeOf(ty_op.operand); - if (!ptr_ty.isVolatilePtr() and self.liveness.isUnused(inst)) - return null; + elide: { + const ptr_info = ptr_ty.ptrInfo().data; + if (ptr_info.@"volatile") break :elide; + if (self.liveness.isUnused(inst)) return null; + if (!isByRef(ptr_info.pointee_type)) break :elide; + + // It would be valid to fall back to the code below here that simply calls + // load(). However, as an optimization, we want to avoid unnecessary copies + // of isByRef=true types. Here, we scan forward in the current block, + // looking to see if this load dies before any side effects occur. + // In such case, we can safely return the operand without making a copy. + for (body[body_i..]) |body_inst| { + switch (self.liveness.categorizeOperand(self.air, body_inst, inst)) { + .none => continue, + .write, .noret, .complex => break :elide, + .tomb => return try self.resolveInst(ty_op.operand), + } + } else unreachable; + } const ptr = try self.resolveInst(ty_op.operand); return self.load(ptr, ptr_ty); } |
