From 075f93fa108030fb0dd12faa6e389ace302cfb4c Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Thu, 28 Jul 2022 17:06:57 -0700 Subject: stage2 LLVM: Pass inline assembly outputs directly when not targeting memory This change provides a basic implementation of #2349 for stage2. There's still quite a lot of work before this logic is as complete as what's in Clang (https://github.com/llvm/llvm-project/blob/b3645353041818f61e2580635409ddb81ff5a272/clang/lib/CodeGen/CGStmt.cpp#L2304-L2795), particularly considering the diversity of constraints across targets. It's probably not worth doing the complete work until there's a clearer picture for constraints in Zig's future dedicated ASM syntax, but at least this gives us a small improvement for now. As a bonus, this also fixes a bug with how we were handling `_` identifiers. --- src/codegen/llvm.zig | 141 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 109 insertions(+), 32 deletions(-) (limited to 'src/codegen') diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 6a2e346113..664edb0304 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -5491,22 +5491,26 @@ pub const FuncGen = struct { defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); - const return_count: u8 = for (outputs) |output| { - if (output == .none) break 1; - } else 0; - const llvm_params_len = inputs.len + outputs.len - return_count; - const llvm_param_types = try arena.alloc(*const llvm.Type, llvm_params_len); - const llvm_param_values = try arena.alloc(*const llvm.Value, llvm_params_len); - const llvm_param_attrs = try arena.alloc(bool, llvm_params_len); + // The exact number of return / parameter values depends on which output values + // are passed by reference as indirect outputs (determined below). + const max_return_count = outputs.len; + const llvm_ret_types = try arena.alloc(*const llvm.Type, max_return_count); + const llvm_ret_indirect = try arena.alloc(bool, max_return_count); + + const max_param_count = inputs.len + outputs.len; + const llvm_param_types = try arena.alloc(*const llvm.Type, max_param_count); + const llvm_param_values = try arena.alloc(*const llvm.Value, max_param_count); + const llvm_param_attrs = try arena.alloc(bool, max_param_count); const target = self.dg.module.getTarget(); + var llvm_ret_i: usize = 0; var llvm_param_i: usize = 0; - var total_i: usize = 0; + var total_i: u16 = 0; - var name_map: std.StringArrayHashMapUnmanaged(void) = .{}; - try name_map.ensureUnusedCapacity(arena, outputs.len + inputs.len); + var name_map: std.StringArrayHashMapUnmanaged(u16) = .{}; + try name_map.ensureUnusedCapacity(arena, max_param_count); - for (outputs) |output| { + for (outputs) |output, i| { const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]); const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0); const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0); @@ -5519,15 +5523,30 @@ pub const FuncGen = struct { llvm_constraints.appendAssumeCapacity(','); } llvm_constraints.appendAssumeCapacity('='); + + // Pass any non-return outputs indirectly, if the constraint accepts a memory location + llvm_ret_indirect[i] = (output != .none) and constraintAllowsMemory(constraint); if (output != .none) { try llvm_constraints.ensureUnusedCapacity(self.gpa, llvm_constraints.capacity + 1); - llvm_constraints.appendAssumeCapacity('*'); - const output_inst = try self.resolveInst(output); - llvm_param_values[llvm_param_i] = output_inst; - llvm_param_types[llvm_param_i] = output_inst.typeOf(); - llvm_param_attrs[llvm_param_i] = true; - llvm_param_i += 1; + + if (llvm_ret_indirect[i]) { + // Pass the result by reference as an indirect output (e.g. "=*m") + llvm_constraints.appendAssumeCapacity('*'); + + llvm_param_values[llvm_param_i] = output_inst; + llvm_param_types[llvm_param_i] = output_inst.typeOf(); + llvm_param_attrs[llvm_param_i] = true; + llvm_param_i += 1; + } else { + // Pass the result directly (e.g. "=r") + llvm_ret_types[llvm_ret_i] = output_inst.typeOf().getElementType(); + llvm_ret_i += 1; + } + } else { + const ret_ty = self.air.typeOfIndex(inst); + llvm_ret_types[llvm_ret_i] = try self.dg.lowerType(ret_ty); + llvm_ret_i += 1; } // LLVM uses commas internally to separate different constraints, @@ -5536,13 +5555,16 @@ pub const FuncGen = struct { // to GCC's inline assembly. // http://llvm.org/docs/LangRef.html#constraint-codes for (constraint[1..]) |byte| { - llvm_constraints.appendAssumeCapacity(switch (byte) { - ',' => '|', - else => byte, - }); + switch (byte) { + ',' => llvm_constraints.appendAssumeCapacity('|'), + '*' => {}, // Indirect outputs are handled above + else => llvm_constraints.appendAssumeCapacity(byte), + } } - name_map.putAssumeCapacityNoClobber(name, {}); + if (!std.mem.eql(u8, name, "_")) { + name_map.putAssumeCapacityNoClobber(name, total_i); + } total_i += 1; } @@ -5594,7 +5616,7 @@ pub const FuncGen = struct { } if (!std.mem.eql(u8, name, "_")) { - name_map.putAssumeCapacityNoClobber(name, {}); + name_map.putAssumeCapacityNoClobber(name, total_i); } // In the case of indirect inputs, LLVM requires the callsite to have @@ -5625,6 +5647,11 @@ pub const FuncGen = struct { } } + // We have finished scanning through all inputs/outputs, so the number of + // parameters and return values is known. + const param_count = llvm_param_i; + const return_count = llvm_ret_i; + // For some targets, Clang unconditionally adds some clobbers to all inline assembly. // While this is probably not strictly necessary, if we don't follow Clang's lead // here then we may risk tripping LLVM bugs since anything not used by Clang tends @@ -5682,7 +5709,7 @@ pub const FuncGen = struct { const name = asm_source[name_start..i]; state = .start; - const index = name_map.getIndex(name) orelse { + const index = name_map.get(name) orelse { // we should validate the assembly in Sema; by now it is too late return self.todo("unknown input or output name: '{s}'", .{name}); }; @@ -5693,12 +5720,20 @@ pub const FuncGen = struct { } } - const ret_ty = self.air.typeOfIndex(inst); - const ret_llvm_ty = try self.dg.lowerType(ret_ty); + const ret_llvm_ty = switch (return_count) { + 0 => self.context.voidType(), + 1 => llvm_ret_types[0], + else => self.context.structType( + llvm_ret_types.ptr, + @intCast(c_uint, return_count), + .False, + ), + }; + const llvm_fn_ty = llvm.functionType( ret_llvm_ty, llvm_param_types.ptr, - @intCast(c_uint, llvm_param_types.len), + @intCast(c_uint, param_count), .False, ); const asm_fn = llvm.getInlineAsm( @@ -5715,18 +5750,40 @@ pub const FuncGen = struct { const call = self.builder.buildCall( asm_fn, llvm_param_values.ptr, - @intCast(c_uint, llvm_param_values.len), + @intCast(c_uint, param_count), .C, .Auto, "", ); - for (llvm_param_attrs) |need_elem_ty, i| { + for (llvm_param_attrs[0..param_count]) |need_elem_ty, i| { if (need_elem_ty) { const elem_ty = llvm_param_types[i].getElementType(); llvm.setCallElemTypeAttr(call, i, elem_ty); } } - return call; + + var ret_val = call; + llvm_ret_i = 0; + for (outputs) |output, i| { + if (llvm_ret_indirect[i]) continue; + + const output_value = if (return_count > 1) b: { + break :b self.builder.buildExtractValue(call, @intCast(c_uint, llvm_ret_i), ""); + } else call; + + if (output != .none) { + const output_ptr = try self.resolveInst(output); + const output_ptr_ty = self.air.typeOf(output); + + const store_inst = self.builder.buildStore(output_value, output_ptr); + store_inst.setAlignment(output_ptr_ty.ptrAlignment(target)); + } else { + ret_val = output_value; + } + llvm_ret_i += 1; + } + + return ret_val; } fn airIsNonNull( @@ -9709,10 +9766,30 @@ fn errUnionErrorOffset(payload_ty: Type, target: std.Target) u1 { return @boolToInt(Type.anyerror.abiAlignment(target) <= payload_ty.abiAlignment(target)); } +/// Returns true for asm constraint (e.g. "=*m", "=r") if it accepts a memory location +/// +/// See also TargetInfo::validateOutputConstraint, AArch64TargetInfo::validateAsmConstraint, etc. in Clang fn constraintAllowsMemory(constraint: []const u8) bool { - return constraint[0] == 'm'; + // TODO: This implementation is woefully incomplete. + for (constraint) |byte| { + switch (byte) { + '=', '*', ',', '&' => {}, + 'm', 'o', 'X', 'g' => return true, + else => {}, + } + } else return false; } +/// Returns true for asm constraint (e.g. "=*m", "=r") if it accepts a register +/// +/// See also TargetInfo::validateOutputConstraint, AArch64TargetInfo::validateAsmConstraint, etc. in Clang fn constraintAllowsRegister(constraint: []const u8) bool { - return constraint[0] != 'm'; + // TODO: This implementation is woefully incomplete. + for (constraint) |byte| { + switch (byte) { + '=', '*', ',', '&' => {}, + 'm', 'o' => {}, + else => return true, + } + } else return false; } -- cgit v1.2.3