From 00b690540e561391d17c65e45f818db6be8fecec Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 26 Apr 2023 12:49:32 -0700 Subject: llvm backend: fix lowering of memset The bitcast of ABI size 1 elements was problematic for some types. --- src/codegen/llvm.zig | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'src/codegen/llvm.zig') diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 94f49e801d..e5bdec7526 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -7939,11 +7939,15 @@ pub const FuncGen = struct { return self.builder.buildPtrToInt(operand_ptr, dest_llvm_ty, ""); } - fn airBitCast(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { + fn airBitCast(self: *FuncGen, inst: Air.Inst.Index) !*llvm.Value { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand_ty = self.air.typeOf(ty_op.operand); const inst_ty = self.air.typeOfIndex(inst); const operand = try self.resolveInst(ty_op.operand); + return self.bitCast(operand, operand_ty, inst_ty); + } + + fn bitCast(self: *FuncGen, operand: *llvm.Value, operand_ty: Type, inst_ty: Type) !*llvm.Value { const operand_is_ref = isByRef(operand_ty); const result_is_ref = isByRef(inst_ty); const llvm_dest_ty = try self.dg.lowerType(inst_ty); @@ -7954,6 +7958,12 @@ pub const FuncGen = struct { return operand; } + if (llvm_dest_ty.getTypeKind() == .Integer and + operand.typeOf().getTypeKind() == .Integer) + { + return self.builder.buildZExtOrBitCast(operand, llvm_dest_ty, ""); + } + if (operand_ty.zigTypeTag() == .Int and inst_ty.isPtrAtRuntime()) { return self.builder.buildIntToPtr(operand, llvm_dest_ty, ""); } @@ -8442,7 +8452,7 @@ pub const FuncGen = struct { if (elem_abi_size == 1) { // In this case we can take advantage of LLVM's intrinsic. - const fill_byte = self.builder.buildBitCast(value, u8_llvm_ty, ""); + const fill_byte = try self.bitCast(value, elem_ty, Type.u8); const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr()); return null; -- cgit v1.2.3 From 51adbf472bcf9eacc0099e39778a6f9177fea023 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 26 Apr 2023 13:04:56 -0700 Subject: llvm backend: fix memset with byref element value --- src/codegen/llvm.zig | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'src/codegen/llvm.zig') diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index e5bdec7526..a7a1d8f56f 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -8429,6 +8429,7 @@ pub const FuncGen = struct { const dest_ptr_align = ptr_ty.ptrAlignment(target); const u8_llvm_ty = self.context.intType(8); const dest_ptr = self.sliceOrArrayPtr(dest_slice, ptr_ty); + const is_volatile = ptr_ty.isVolatilePtr(); if (val_is_undef) { // Even if safety is disabled, we still emit a memset to undefined since it conveys @@ -8439,7 +8440,7 @@ pub const FuncGen = struct { else u8_llvm_ty.getUndef(); const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); - _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr()); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); if (safety and self.dg.module.comp.bin_file.options.valgrind) { self.valgrindMarkUndef(dest_ptr, len); @@ -8454,7 +8455,7 @@ pub const FuncGen = struct { // In this case we can take advantage of LLVM's intrinsic. const fill_byte = try self.bitCast(value, elem_ty, Type.u8); const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); - _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr()); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); return null; } @@ -8496,8 +8497,22 @@ pub const FuncGen = struct { _ = self.builder.buildCondBr(end, body_block, end_block); self.builder.positionBuilderAtEnd(body_block); - const store_inst = self.builder.buildStore(value, it_ptr); - store_inst.setAlignment(@min(elem_ty.abiAlignment(target), dest_ptr_align)); + const elem_abi_alignment = elem_ty.abiAlignment(target); + const it_ptr_alignment = @min(elem_abi_alignment, dest_ptr_align); + if (isByRef(elem_ty)) { + _ = self.builder.buildMemCpy( + it_ptr, + it_ptr_alignment, + value, + elem_abi_alignment, + llvm_usize_ty.constInt(elem_abi_size, .False), + is_volatile, + ); + } else { + const store_inst = self.builder.buildStore(value, it_ptr); + store_inst.setAlignment(it_ptr_alignment); + store_inst.setVolatile(llvm.Bool.fromBool(is_volatile)); + } const one_gep = [_]*llvm.Value{llvm_usize_ty.constInt(1, .False)}; const next_ptr = self.builder.buildInBoundsGEP(elem_llvm_ty, it_ptr, &one_gep, one_gep.len, ""); _ = self.builder.buildBr(loop_block); -- cgit v1.2.3 From 9295355985202c267b4326b5a6e2ad5158b48e5d Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 26 Apr 2023 13:41:02 -0700 Subject: LLVM backend: optimize memset with comptime-known element When the element is comptime-known, we can check if it has a repeated byte representation. In this case, `@memset` can be lowered with the LLVM intrinsic rather than with a loop. --- src/Sema.zig | 3 +++ src/codegen/llvm.zig | 47 ++++++++++++++++++++++++++++++++--------------- src/value.zig | 38 +++++++++++++++++++++++++++++++++++--- test/behavior/memset.zig | 14 +++++++++++++- 4 files changed, 83 insertions(+), 19 deletions(-) (limited to 'src/codegen/llvm.zig') diff --git a/src/Sema.zig b/src/Sema.zig index 8b47f1877b..e05308b6c0 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -26953,9 +26953,11 @@ fn storePtrVal( defer sema.gpa.free(buffer); reinterpret.val_ptr.*.writeToMemory(mut_kit.ty, sema.mod, buffer) catch |err| switch (err) { error.ReinterpretDeclRef => unreachable, + error.IllDefinedMemoryLayout => unreachable, // Sema was supposed to emit a compile error already }; operand_val.writeToMemory(operand_ty, sema.mod, buffer[reinterpret.byte_offset..]) catch |err| switch (err) { error.ReinterpretDeclRef => unreachable, + error.IllDefinedMemoryLayout => unreachable, // Sema was supposed to emit a compile error already }; const arena = mut_kit.beginArena(sema.mod); @@ -27905,6 +27907,7 @@ fn bitCastVal( defer sema.gpa.free(buffer); val.writeToMemory(old_ty, sema.mod, buffer) catch |err| switch (err) { error.ReinterpretDeclRef => return null, + error.IllDefinedMemoryLayout => unreachable, // Sema was supposed to emit a compile error already }; return try Value.readFromMemory(new_ty, sema.mod, buffer[buffer_offset..], sema.arena); } diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index a7a1d8f56f..d697a41988 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -8424,28 +8424,45 @@ pub const FuncGen = struct { const dest_slice = try self.resolveInst(bin_op.lhs); const ptr_ty = self.air.typeOf(bin_op.lhs); const elem_ty = self.air.typeOf(bin_op.rhs); - const target = self.dg.module.getTarget(); - const val_is_undef = if (self.air.value(bin_op.rhs)) |val| val.isUndefDeep() else false; + const module = self.dg.module; + const target = module.getTarget(); const dest_ptr_align = ptr_ty.ptrAlignment(target); const u8_llvm_ty = self.context.intType(8); const dest_ptr = self.sliceOrArrayPtr(dest_slice, ptr_ty); const is_volatile = ptr_ty.isVolatilePtr(); - if (val_is_undef) { - // Even if safety is disabled, we still emit a memset to undefined since it conveys - // extra information to LLVM. However, safety makes the difference between using - // 0xaa or actual undefined for the fill byte. - const fill_byte = if (safety) - u8_llvm_ty.constInt(0xaa, .False) - else - u8_llvm_ty.getUndef(); - const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); - _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); + if (self.air.value(bin_op.rhs)) |elem_val| { + if (elem_val.isUndefDeep()) { + // Even if safety is disabled, we still emit a memset to undefined since it conveys + // extra information to LLVM. However, safety makes the difference between using + // 0xaa or actual undefined for the fill byte. + const fill_byte = if (safety) + u8_llvm_ty.constInt(0xaa, .False) + else + u8_llvm_ty.getUndef(); + const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); - if (safety and self.dg.module.comp.bin_file.options.valgrind) { - self.valgrindMarkUndef(dest_ptr, len); + if (safety and module.comp.bin_file.options.valgrind) { + self.valgrindMarkUndef(dest_ptr, len); + } + return null; + } + + // Test if the element value is compile-time known to be a + // repeating byte pattern, for example, `@as(u64, 0)` has a + // repeating byte pattern of 0 bytes. In such case, the memset + // intrinsic can be used. + var value_buffer: Value.Payload.U64 = undefined; + if (try elem_val.hasRepeatedByteRepr(elem_ty, module, &value_buffer)) |byte_val| { + const fill_byte = try self.resolveValue(.{ + .ty = Type.u8, + .val = byte_val, + }); + const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); + return null; } - return null; } const value = try self.resolveInst(bin_op.rhs); diff --git a/src/value.zig b/src/value.zig index 05e9d24ee2..2b9636f5e9 100644 --- a/src/value.zig +++ b/src/value.zig @@ -1278,7 +1278,10 @@ pub const Value = extern union { /// /// Asserts that buffer.len >= ty.abiSize(). The buffer is allowed to extend past /// the end of the value in memory. - pub fn writeToMemory(val: Value, ty: Type, mod: *Module, buffer: []u8) error{ReinterpretDeclRef}!void { + pub fn writeToMemory(val: Value, ty: Type, mod: *Module, buffer: []u8) error{ + ReinterpretDeclRef, + IllDefinedMemoryLayout, + }!void { const target = mod.getTarget(); const endian = target.cpu.arch.endian(); if (val.isUndef()) { @@ -1345,7 +1348,7 @@ pub const Value = extern union { return writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0); }, .Struct => switch (ty.containerLayout()) { - .Auto => unreachable, // Sema is supposed to have emitted a compile error already + .Auto => return error.IllDefinedMemoryLayout, .Extern => { const fields = ty.structFields().values(); const field_vals = val.castTag(.aggregate).?.data; @@ -1366,7 +1369,7 @@ pub const Value = extern union { std.mem.writeInt(Int, buffer[0..@sizeOf(Int)], @intCast(Int, int), endian); }, .Union => switch (ty.containerLayout()) { - .Auto => unreachable, + .Auto => return error.IllDefinedMemoryLayout, .Extern => @panic("TODO implement writeToMemory for extern unions"), .Packed => { const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8; @@ -5381,6 +5384,35 @@ pub const Value = extern union { } } + /// If the value is represented in-memory as a series of bytes that all + /// have the same value, return that byte value, otherwise null. + pub fn hasRepeatedByteRepr(val: Value, ty: Type, mod: *Module, value_buffer: *Payload.U64) !?Value { + const target = mod.getTarget(); + const abi_size = ty.abiSize(target); + assert(abi_size >= 1); + const byte_buffer = try mod.gpa.alloc(u8, abi_size); + defer mod.gpa.free(byte_buffer); + + writeToMemory(val, ty, mod, byte_buffer) catch |err| switch (err) { + error.ReinterpretDeclRef => return null, + // TODO: The writeToMemory function was originally created for the purpose + // of comptime pointer casting. However, it is now additionally being used + // for checking the actual memory layout that will be generated by machine + // code late in compilation. So, this error handling is too aggressive and + // causes some false negatives, causing less-than-ideal code generation. + error.IllDefinedMemoryLayout => return null, + }; + const first_byte = byte_buffer[0]; + for (byte_buffer[1..]) |byte| { + if (byte != first_byte) return null; + } + value_buffer.* = .{ + .base = .{ .tag = .int_u64 }, + .data = first_byte, + }; + return initPayload(&value_buffer.base); + } + /// This type is not copyable since it may contain pointers to its inner data. pub const Payload = struct { tag: Tag, diff --git a/test/behavior/memset.zig b/test/behavior/memset.zig index 69add499f9..374fd4b6f5 100644 --- a/test/behavior/memset.zig +++ b/test/behavior/memset.zig @@ -94,7 +94,7 @@ test "memset with 1-byte array element" { try expect(buf[4][0]); } -test "memset with large array element" { +test "memset with large array element, runtime known" { const A = [128]u64; var buf: [5]A = undefined; var runtime_known_element = [_]u64{0} ** 128; @@ -106,6 +106,18 @@ test "memset with large array element" { for (buf[4]) |elem| try expect(elem == 0); } +test "memset with large array element, comptime known" { + const A = [128]u64; + var buf: [5]A = undefined; + const comptime_known_element = [_]u64{0} ** 128; + @memset(&buf, comptime_known_element); + for (buf[0]) |elem| try expect(elem == 0); + for (buf[1]) |elem| try expect(elem == 0); + for (buf[2]) |elem| try expect(elem == 0); + for (buf[3]) |elem| try expect(elem == 0); + for (buf[4]) |elem| try expect(elem == 0); +} + test "memcpy and memset intrinsics" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; -- cgit v1.2.3