diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2023-04-29 00:19:55 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-29 00:19:55 -0700 |
| commit | d65b42e07caa00dfe2f2fbf221c593ce57882784 (patch) | |
| tree | 7926cbea1499e0affe930bf6d7455dc24adf014e /src/codegen | |
| parent | fd6200eda6d4fe19c34a59430a88a9ce38d6d7a4 (diff) | |
| parent | fa200ca0cad2705bad40eb723dedf4e3bf11f2ff (diff) | |
| download | zig-d65b42e07caa00dfe2f2fbf221c593ce57882784.tar.gz zig-d65b42e07caa00dfe2f2fbf221c593ce57882784.zip | |
Merge pull request #15481 from ziglang/use-mem-intrinsics
actually use the new memory intrinsics
Diffstat (limited to 'src/codegen')
| -rw-r--r-- | src/codegen/c.zig | 117 | ||||
| -rw-r--r-- | src/codegen/llvm.zig | 82 |
2 files changed, 147 insertions, 52 deletions
diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 385094e495..f69cec960e 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -2411,9 +2411,9 @@ pub fn genErrDecls(o: *Object) !void { const name_buf = try o.dg.gpa.alloc(u8, name_prefix.len + max_name_len); defer o.dg.gpa.free(name_buf); - mem.copy(u8, name_buf, name_prefix); + @memcpy(name_buf[0..name_prefix.len], name_prefix); for (o.dg.module.error_name_list.items) |name| { - mem.copy(u8, name_buf[name_prefix.len..], name); + @memcpy(name_buf[name_prefix.len..][0..name.len], name); const identifier = name_buf[0 .. name_prefix.len + name.len]; var name_ty_pl = Type.Payload.Len{ .base = .{ .tag = .array_u8_sentinel_0 }, .data = name.len }; @@ -3858,7 +3858,7 @@ fn airCmpOp( try reap(f, inst, &.{ data.lhs, data.rhs }); const rhs_ty = f.air.typeOf(data.rhs); - const need_cast = lhs_ty.isSinglePointer() != rhs_ty.isSinglePointer(); + const need_cast = lhs_ty.isSinglePointer() or rhs_ty.isSinglePointer(); const writer = f.object.writer(); const local = try f.allocLocal(inst, inst_ty); const v = try Vectorize.start(f, inst, writer, lhs_ty); @@ -4419,51 +4419,94 @@ fn airBitcast(f: *Function, inst: Air.Inst.Index) !CValue { const dest_ty = f.air.typeOfIndex(inst); const operand = try f.resolveInst(ty_op.operand); - try reap(f, inst, &.{ty_op.operand}); const operand_ty = f.air.typeOf(ty_op.operand); - const target = f.object.dg.module.getTarget(); - const writer = f.object.writer(); - const local = try f.allocLocal(inst, dest_ty); + const bitcasted = try bitcast(f, dest_ty, operand, operand_ty); + try reap(f, inst, &.{ty_op.operand}); + return bitcasted.move(f, inst, dest_ty); +} + +const LocalResult = struct { + c_value: CValue, + need_free: bool, + + fn move(lr: LocalResult, f: *Function, inst: Air.Inst.Index, dest_ty: Type) !CValue { + if (lr.need_free) { + // Move the freshly allocated local to be owned by this instruction, + // by returning it here instead of freeing it. + return lr.c_value; + } + + const local = try f.allocLocal(inst, dest_ty); + try lr.free(f); + const writer = f.object.writer(); + try f.writeCValue(writer, local, .Other); + if (dest_ty.isAbiInt()) { + try writer.writeAll(" = "); + } else { + try writer.writeAll(" = ("); + try f.renderType(writer, dest_ty); + try writer.writeByte(')'); + } + try f.writeCValue(writer, lr.c_value, .Initializer); + try writer.writeAll(";\n"); + return local; + } + + fn free(lr: LocalResult, f: *Function) !void { + if (lr.need_free) { + try freeLocal(f, 0, lr.c_value.new_local, 0); + } + } +}; - // If the assignment looks like 'x = x', we don't need it - const can_elide = operand == .local and operand.local == local.new_local; +fn bitcast(f: *Function, dest_ty: Type, operand: CValue, operand_ty: Type) !LocalResult { + const target = f.object.dg.module.getTarget(); + const writer = f.object.writer(); if (operand_ty.isAbiInt() and dest_ty.isAbiInt()) { - if (can_elide) return local; const src_info = dest_ty.intInfo(target); const dest_info = operand_ty.intInfo(target); if (src_info.signedness == dest_info.signedness and src_info.bits == dest_info.bits) { - try f.writeCValue(writer, local, .Other); - try writer.writeAll(" = "); - try f.writeCValue(writer, operand, .Initializer); - try writer.writeAll(";\n"); - return local; + return .{ + .c_value = operand, + .need_free = false, + }; } } if (dest_ty.isPtrAtRuntime() and operand_ty.isPtrAtRuntime()) { - if (can_elide) return local; + const local = try f.allocLocal(0, dest_ty); try f.writeCValue(writer, local, .Other); try writer.writeAll(" = ("); try f.renderType(writer, dest_ty); try writer.writeByte(')'); try f.writeCValue(writer, operand, .Other); try writer.writeAll(";\n"); - return local; + return .{ + .c_value = local, + .need_free = true, + }; } const operand_lval = if (operand == .constant) blk: { - const operand_local = try f.allocLocal(inst, operand_ty); + const operand_local = try f.allocLocal(0, operand_ty); try f.writeCValue(writer, operand_local, .Other); - try writer.writeAll(" = "); + if (operand_ty.isAbiInt()) { + try writer.writeAll(" = "); + } else { + try writer.writeAll(" = ("); + try f.renderType(writer, operand_ty); + try writer.writeByte(')'); + } try f.writeCValue(writer, operand, .Initializer); try writer.writeAll(";\n"); break :blk operand_local; } else operand; + const local = try f.allocLocal(0, dest_ty); try writer.writeAll("memcpy(&"); try f.writeCValue(writer, local, .Other); try writer.writeAll(", &"); @@ -4528,10 +4571,13 @@ fn airBitcast(f: *Function, inst: Air.Inst.Index) !CValue { } if (operand == .constant) { - try freeLocal(f, inst, operand_lval.new_local, 0); + try freeLocal(f, 0, operand_lval.new_local, 0); } - return local; + return .{ + .c_value = local, + .need_free = true, + }; } fn airTrap(writer: anytype) !CValue { @@ -4831,7 +4877,7 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue { const literal = mem.sliceTo(asm_source[src_i..], '%'); src_i += literal.len; - mem.copy(u8, fixed_asm_source[dst_i..], literal); + @memcpy(fixed_asm_source[dst_i..][0..literal.len], literal); dst_i += literal.len; if (src_i >= asm_source.len) break; @@ -4856,9 +4902,9 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue { const name = desc[0..colon]; const modifier = desc[colon + 1 ..]; - mem.copy(u8, fixed_asm_source[dst_i..], modifier); + @memcpy(fixed_asm_source[dst_i..][0..modifier.len], modifier); dst_i += modifier.len; - mem.copy(u8, fixed_asm_source[dst_i..], name); + @memcpy(fixed_asm_source[dst_i..][0..name.len], name); dst_i += name.len; src_i += desc.len; @@ -6288,15 +6334,19 @@ fn airMemset(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue { } try writer.writeAll("; ++"); try f.writeCValue(writer, index, .Other); - try writer.writeAll(") (("); + try writer.writeAll(") "); + + const a = try Assignment.start(f, writer, elem_ty); + try writer.writeAll("(("); try f.renderType(writer, elem_ptr_ty); try writer.writeByte(')'); try writeSliceOrPtr(f, writer, dest_slice, dest_ty); try writer.writeAll(")["); try f.writeCValue(writer, index, .Other); - try writer.writeAll("] = "); - try f.writeCValue(writer, value, .FunctionArgument); - try writer.writeAll(";\n"); + try writer.writeByte(']'); + try a.assign(f, writer); + try f.writeCValue(writer, value, .Other); + try a.end(f, writer); try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs }); try freeLocal(f, inst, index.new_local, 0); @@ -6304,12 +6354,14 @@ fn airMemset(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue { return .none; } + const bitcasted = try bitcast(f, Type.u8, value, elem_ty); + try writer.writeAll("memset("); switch (dest_ty.ptrSize()) { .Slice => { try f.writeCValueMember(writer, dest_slice, .{ .identifier = "ptr" }); try writer.writeAll(", "); - try f.writeCValue(writer, value, .FunctionArgument); + try f.writeCValue(writer, bitcasted.c_value, .FunctionArgument); try writer.writeAll(", "); try f.writeCValueMember(writer, dest_slice, .{ .identifier = "len" }); try writer.writeAll(");\n"); @@ -6320,11 +6372,12 @@ fn airMemset(f: *Function, inst: Air.Inst.Index, safety: bool) !CValue { try f.writeCValue(writer, dest_slice, .FunctionArgument); try writer.writeAll(", "); - try f.writeCValue(writer, value, .FunctionArgument); + try f.writeCValue(writer, bitcasted.c_value, .FunctionArgument); try writer.print(", {d});\n", .{len}); }, .Many, .C => unreachable, } + try bitcasted.free(f); try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs }); return .none; } @@ -7394,7 +7447,7 @@ fn formatIntLiteral( var int_buf: Value.BigIntSpace = undefined; const int = if (data.val.isUndefDeep()) blk: { undef_limbs = try allocator.alloc(BigIntLimb, BigInt.calcTwosCompLimbCount(data.int_info.bits)); - mem.set(BigIntLimb, undef_limbs, undefPattern(BigIntLimb)); + @memset(undef_limbs, undefPattern(BigIntLimb)); var undef_int = BigInt.Mutable{ .limbs = undef_limbs, @@ -7489,7 +7542,7 @@ fn formatIntLiteral( } else { try data.cty.renderLiteralPrefix(writer, data.kind); wrap.convertToTwosComplement(int, data.int_info.signedness, c_bits); - mem.set(BigIntLimb, wrap.limbs[wrap.len..], 0); + @memset(wrap.limbs[wrap.len..], 0); wrap.len = wrap.limbs.len; const limbs_per_c_limb = @divExact(wrap.len, c_limb_info.count); diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 94f49e801d..d697a41988 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -7939,11 +7939,15 @@ pub const FuncGen = struct { return self.builder.buildPtrToInt(operand_ptr, dest_llvm_ty, ""); } - fn airBitCast(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { + fn airBitCast(self: *FuncGen, inst: Air.Inst.Index) !*llvm.Value { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand_ty = self.air.typeOf(ty_op.operand); const inst_ty = self.air.typeOfIndex(inst); const operand = try self.resolveInst(ty_op.operand); + return self.bitCast(operand, operand_ty, inst_ty); + } + + fn bitCast(self: *FuncGen, operand: *llvm.Value, operand_ty: Type, inst_ty: Type) !*llvm.Value { const operand_is_ref = isByRef(operand_ty); const result_is_ref = isByRef(inst_ty); const llvm_dest_ty = try self.dg.lowerType(inst_ty); @@ -7954,6 +7958,12 @@ pub const FuncGen = struct { return operand; } + if (llvm_dest_ty.getTypeKind() == .Integer and + operand.typeOf().getTypeKind() == .Integer) + { + return self.builder.buildZExtOrBitCast(operand, llvm_dest_ty, ""); + } + if (operand_ty.zigTypeTag() == .Int and inst_ty.isPtrAtRuntime()) { return self.builder.buildIntToPtr(operand, llvm_dest_ty, ""); } @@ -8414,27 +8424,45 @@ pub const FuncGen = struct { const dest_slice = try self.resolveInst(bin_op.lhs); const ptr_ty = self.air.typeOf(bin_op.lhs); const elem_ty = self.air.typeOf(bin_op.rhs); - const target = self.dg.module.getTarget(); - const val_is_undef = if (self.air.value(bin_op.rhs)) |val| val.isUndefDeep() else false; + const module = self.dg.module; + const target = module.getTarget(); const dest_ptr_align = ptr_ty.ptrAlignment(target); const u8_llvm_ty = self.context.intType(8); const dest_ptr = self.sliceOrArrayPtr(dest_slice, ptr_ty); + const is_volatile = ptr_ty.isVolatilePtr(); + + if (self.air.value(bin_op.rhs)) |elem_val| { + if (elem_val.isUndefDeep()) { + // Even if safety is disabled, we still emit a memset to undefined since it conveys + // extra information to LLVM. However, safety makes the difference between using + // 0xaa or actual undefined for the fill byte. + const fill_byte = if (safety) + u8_llvm_ty.constInt(0xaa, .False) + else + u8_llvm_ty.getUndef(); + const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); - if (val_is_undef) { - // Even if safety is disabled, we still emit a memset to undefined since it conveys - // extra information to LLVM. However, safety makes the difference between using - // 0xaa or actual undefined for the fill byte. - const fill_byte = if (safety) - u8_llvm_ty.constInt(0xaa, .False) - else - u8_llvm_ty.getUndef(); - const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); - _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr()); + if (safety and module.comp.bin_file.options.valgrind) { + self.valgrindMarkUndef(dest_ptr, len); + } + return null; + } - if (safety and self.dg.module.comp.bin_file.options.valgrind) { - self.valgrindMarkUndef(dest_ptr, len); + // Test if the element value is compile-time known to be a + // repeating byte pattern, for example, `@as(u64, 0)` has a + // repeating byte pattern of 0 bytes. In such case, the memset + // intrinsic can be used. + var value_buffer: Value.Payload.U64 = undefined; + if (try elem_val.hasRepeatedByteRepr(elem_ty, module, &value_buffer)) |byte_val| { + const fill_byte = try self.resolveValue(.{ + .ty = Type.u8, + .val = byte_val, + }); + const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); + return null; } - return null; } const value = try self.resolveInst(bin_op.rhs); @@ -8442,9 +8470,9 @@ pub const FuncGen = struct { if (elem_abi_size == 1) { // In this case we can take advantage of LLVM's intrinsic. - const fill_byte = self.builder.buildBitCast(value, u8_llvm_ty, ""); + const fill_byte = try self.bitCast(value, elem_ty, Type.u8); const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); - _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr()); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); return null; } @@ -8486,8 +8514,22 @@ pub const FuncGen = struct { _ = self.builder.buildCondBr(end, body_block, end_block); self.builder.positionBuilderAtEnd(body_block); - const store_inst = self.builder.buildStore(value, it_ptr); - store_inst.setAlignment(@min(elem_ty.abiAlignment(target), dest_ptr_align)); + const elem_abi_alignment = elem_ty.abiAlignment(target); + const it_ptr_alignment = @min(elem_abi_alignment, dest_ptr_align); + if (isByRef(elem_ty)) { + _ = self.builder.buildMemCpy( + it_ptr, + it_ptr_alignment, + value, + elem_abi_alignment, + llvm_usize_ty.constInt(elem_abi_size, .False), + is_volatile, + ); + } else { + const store_inst = self.builder.buildStore(value, it_ptr); + store_inst.setAlignment(it_ptr_alignment); + store_inst.setVolatile(llvm.Bool.fromBool(is_volatile)); + } const one_gep = [_]*llvm.Value{llvm_usize_ty.constInt(1, .False)}; const next_ptr = self.builder.buildInBoundsGEP(elem_llvm_ty, it_ptr, &one_gep, one_gep.len, ""); _ = self.builder.buildBr(loop_block); |
