aboutsummaryrefslogtreecommitdiff
path: root/src/codegen/llvm.zig
diff options
context:
space:
mode:
authorCody Tapscott <topolarity@tapscott.me>2022-07-28 17:06:57 -0700
committerAndrew Kelley <andrew@ziglang.org>2022-07-31 12:59:19 -0700
commit075f93fa108030fb0dd12faa6e389ace302cfb4c (patch)
tree73cc539ff6dc66dc67cb35ba30af0c9c2a263098 /src/codegen/llvm.zig
parent1ab15b6c9c529c9acc85f4b0bf3fcaea97a5a48e (diff)
downloadzig-075f93fa108030fb0dd12faa6e389ace302cfb4c.tar.gz
zig-075f93fa108030fb0dd12faa6e389ace302cfb4c.zip
stage2 LLVM: Pass inline assembly outputs directly when not targeting memory
This change provides a basic implementation of #2349 for stage2. There's still quite a lot of work before this logic is as complete as what's in Clang (https://github.com/llvm/llvm-project/blob/b3645353041818f61e2580635409ddb81ff5a272/clang/lib/CodeGen/CGStmt.cpp#L2304-L2795), particularly considering the diversity of constraints across targets. It's probably not worth doing the complete work until there's a clearer picture for constraints in Zig's future dedicated ASM syntax, but at least this gives us a small improvement for now. As a bonus, this also fixes a bug with how we were handling `_` identifiers.
Diffstat (limited to 'src/codegen/llvm.zig')
-rw-r--r--src/codegen/llvm.zig141
1 files changed, 109 insertions, 32 deletions
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 6a2e346113..664edb0304 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -5491,22 +5491,26 @@ pub const FuncGen = struct {
defer arena_allocator.deinit();
const arena = arena_allocator.allocator();
- const return_count: u8 = for (outputs) |output| {
- if (output == .none) break 1;
- } else 0;
- const llvm_params_len = inputs.len + outputs.len - return_count;
- const llvm_param_types = try arena.alloc(*const llvm.Type, llvm_params_len);
- const llvm_param_values = try arena.alloc(*const llvm.Value, llvm_params_len);
- const llvm_param_attrs = try arena.alloc(bool, llvm_params_len);
+ // The exact number of return / parameter values depends on which output values
+ // are passed by reference as indirect outputs (determined below).
+ const max_return_count = outputs.len;
+ const llvm_ret_types = try arena.alloc(*const llvm.Type, max_return_count);
+ const llvm_ret_indirect = try arena.alloc(bool, max_return_count);
+
+ const max_param_count = inputs.len + outputs.len;
+ const llvm_param_types = try arena.alloc(*const llvm.Type, max_param_count);
+ const llvm_param_values = try arena.alloc(*const llvm.Value, max_param_count);
+ const llvm_param_attrs = try arena.alloc(bool, max_param_count);
const target = self.dg.module.getTarget();
+ var llvm_ret_i: usize = 0;
var llvm_param_i: usize = 0;
- var total_i: usize = 0;
+ var total_i: u16 = 0;
- var name_map: std.StringArrayHashMapUnmanaged(void) = .{};
- try name_map.ensureUnusedCapacity(arena, outputs.len + inputs.len);
+ var name_map: std.StringArrayHashMapUnmanaged(u16) = .{};
+ try name_map.ensureUnusedCapacity(arena, max_param_count);
- for (outputs) |output| {
+ for (outputs) |output, i| {
const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
@@ -5519,15 +5523,30 @@ pub const FuncGen = struct {
llvm_constraints.appendAssumeCapacity(',');
}
llvm_constraints.appendAssumeCapacity('=');
+
+ // Pass any non-return outputs indirectly, if the constraint accepts a memory location
+ llvm_ret_indirect[i] = (output != .none) and constraintAllowsMemory(constraint);
if (output != .none) {
try llvm_constraints.ensureUnusedCapacity(self.gpa, llvm_constraints.capacity + 1);
- llvm_constraints.appendAssumeCapacity('*');
-
const output_inst = try self.resolveInst(output);
- llvm_param_values[llvm_param_i] = output_inst;
- llvm_param_types[llvm_param_i] = output_inst.typeOf();
- llvm_param_attrs[llvm_param_i] = true;
- llvm_param_i += 1;
+
+ if (llvm_ret_indirect[i]) {
+ // Pass the result by reference as an indirect output (e.g. "=*m")
+ llvm_constraints.appendAssumeCapacity('*');
+
+ llvm_param_values[llvm_param_i] = output_inst;
+ llvm_param_types[llvm_param_i] = output_inst.typeOf();
+ llvm_param_attrs[llvm_param_i] = true;
+ llvm_param_i += 1;
+ } else {
+ // Pass the result directly (e.g. "=r")
+ llvm_ret_types[llvm_ret_i] = output_inst.typeOf().getElementType();
+ llvm_ret_i += 1;
+ }
+ } else {
+ const ret_ty = self.air.typeOfIndex(inst);
+ llvm_ret_types[llvm_ret_i] = try self.dg.lowerType(ret_ty);
+ llvm_ret_i += 1;
}
// LLVM uses commas internally to separate different constraints,
@@ -5536,13 +5555,16 @@ pub const FuncGen = struct {
// to GCC's inline assembly.
// http://llvm.org/docs/LangRef.html#constraint-codes
for (constraint[1..]) |byte| {
- llvm_constraints.appendAssumeCapacity(switch (byte) {
- ',' => '|',
- else => byte,
- });
+ switch (byte) {
+ ',' => llvm_constraints.appendAssumeCapacity('|'),
+ '*' => {}, // Indirect outputs are handled above
+ else => llvm_constraints.appendAssumeCapacity(byte),
+ }
}
- name_map.putAssumeCapacityNoClobber(name, {});
+ if (!std.mem.eql(u8, name, "_")) {
+ name_map.putAssumeCapacityNoClobber(name, total_i);
+ }
total_i += 1;
}
@@ -5594,7 +5616,7 @@ pub const FuncGen = struct {
}
if (!std.mem.eql(u8, name, "_")) {
- name_map.putAssumeCapacityNoClobber(name, {});
+ name_map.putAssumeCapacityNoClobber(name, total_i);
}
// In the case of indirect inputs, LLVM requires the callsite to have
@@ -5625,6 +5647,11 @@ pub const FuncGen = struct {
}
}
+ // We have finished scanning through all inputs/outputs, so the number of
+ // parameters and return values is known.
+ const param_count = llvm_param_i;
+ const return_count = llvm_ret_i;
+
// For some targets, Clang unconditionally adds some clobbers to all inline assembly.
// While this is probably not strictly necessary, if we don't follow Clang's lead
// here then we may risk tripping LLVM bugs since anything not used by Clang tends
@@ -5682,7 +5709,7 @@ pub const FuncGen = struct {
const name = asm_source[name_start..i];
state = .start;
- const index = name_map.getIndex(name) orelse {
+ const index = name_map.get(name) orelse {
// we should validate the assembly in Sema; by now it is too late
return self.todo("unknown input or output name: '{s}'", .{name});
};
@@ -5693,12 +5720,20 @@ pub const FuncGen = struct {
}
}
- const ret_ty = self.air.typeOfIndex(inst);
- const ret_llvm_ty = try self.dg.lowerType(ret_ty);
+ const ret_llvm_ty = switch (return_count) {
+ 0 => self.context.voidType(),
+ 1 => llvm_ret_types[0],
+ else => self.context.structType(
+ llvm_ret_types.ptr,
+ @intCast(c_uint, return_count),
+ .False,
+ ),
+ };
+
const llvm_fn_ty = llvm.functionType(
ret_llvm_ty,
llvm_param_types.ptr,
- @intCast(c_uint, llvm_param_types.len),
+ @intCast(c_uint, param_count),
.False,
);
const asm_fn = llvm.getInlineAsm(
@@ -5715,18 +5750,40 @@ pub const FuncGen = struct {
const call = self.builder.buildCall(
asm_fn,
llvm_param_values.ptr,
- @intCast(c_uint, llvm_param_values.len),
+ @intCast(c_uint, param_count),
.C,
.Auto,
"",
);
- for (llvm_param_attrs) |need_elem_ty, i| {
+ for (llvm_param_attrs[0..param_count]) |need_elem_ty, i| {
if (need_elem_ty) {
const elem_ty = llvm_param_types[i].getElementType();
llvm.setCallElemTypeAttr(call, i, elem_ty);
}
}
- return call;
+
+ var ret_val = call;
+ llvm_ret_i = 0;
+ for (outputs) |output, i| {
+ if (llvm_ret_indirect[i]) continue;
+
+ const output_value = if (return_count > 1) b: {
+ break :b self.builder.buildExtractValue(call, @intCast(c_uint, llvm_ret_i), "");
+ } else call;
+
+ if (output != .none) {
+ const output_ptr = try self.resolveInst(output);
+ const output_ptr_ty = self.air.typeOf(output);
+
+ const store_inst = self.builder.buildStore(output_value, output_ptr);
+ store_inst.setAlignment(output_ptr_ty.ptrAlignment(target));
+ } else {
+ ret_val = output_value;
+ }
+ llvm_ret_i += 1;
+ }
+
+ return ret_val;
}
fn airIsNonNull(
@@ -9709,10 +9766,30 @@ fn errUnionErrorOffset(payload_ty: Type, target: std.Target) u1 {
return @boolToInt(Type.anyerror.abiAlignment(target) <= payload_ty.abiAlignment(target));
}
+/// Returns true for asm constraint (e.g. "=*m", "=r") if it accepts a memory location
+///
+/// See also TargetInfo::validateOutputConstraint, AArch64TargetInfo::validateAsmConstraint, etc. in Clang
fn constraintAllowsMemory(constraint: []const u8) bool {
- return constraint[0] == 'm';
+ // TODO: This implementation is woefully incomplete.
+ for (constraint) |byte| {
+ switch (byte) {
+ '=', '*', ',', '&' => {},
+ 'm', 'o', 'X', 'g' => return true,
+ else => {},
+ }
+ } else return false;
}
+/// Returns true for asm constraint (e.g. "=*m", "=r") if it accepts a register
+///
+/// See also TargetInfo::validateOutputConstraint, AArch64TargetInfo::validateAsmConstraint, etc. in Clang
fn constraintAllowsRegister(constraint: []const u8) bool {
- return constraint[0] != 'm';
+ // TODO: This implementation is woefully incomplete.
+ for (constraint) |byte| {
+ switch (byte) {
+ '=', '*', ',', '&' => {},
+ 'm', 'o' => {},
+ else => return true,
+ }
+ } else return false;
}