diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2023-04-29 00:19:55 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-29 00:19:55 -0700 |
| commit | d65b42e07caa00dfe2f2fbf221c593ce57882784 (patch) | |
| tree | 7926cbea1499e0affe930bf6d7455dc24adf014e /src/codegen/llvm.zig | |
| parent | fd6200eda6d4fe19c34a59430a88a9ce38d6d7a4 (diff) | |
| parent | fa200ca0cad2705bad40eb723dedf4e3bf11f2ff (diff) | |
| download | zig-d65b42e07caa00dfe2f2fbf221c593ce57882784.tar.gz zig-d65b42e07caa00dfe2f2fbf221c593ce57882784.zip | |
Merge pull request #15481 from ziglang/use-mem-intrinsics
actually use the new memory intrinsics
Diffstat (limited to 'src/codegen/llvm.zig')
| -rw-r--r-- | src/codegen/llvm.zig | 82 |
1 files changed, 62 insertions, 20 deletions
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 94f49e801d..d697a41988 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -7939,11 +7939,15 @@ pub const FuncGen = struct { return self.builder.buildPtrToInt(operand_ptr, dest_llvm_ty, ""); } - fn airBitCast(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { + fn airBitCast(self: *FuncGen, inst: Air.Inst.Index) !*llvm.Value { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand_ty = self.air.typeOf(ty_op.operand); const inst_ty = self.air.typeOfIndex(inst); const operand = try self.resolveInst(ty_op.operand); + return self.bitCast(operand, operand_ty, inst_ty); + } + + fn bitCast(self: *FuncGen, operand: *llvm.Value, operand_ty: Type, inst_ty: Type) !*llvm.Value { const operand_is_ref = isByRef(operand_ty); const result_is_ref = isByRef(inst_ty); const llvm_dest_ty = try self.dg.lowerType(inst_ty); @@ -7954,6 +7958,12 @@ pub const FuncGen = struct { return operand; } + if (llvm_dest_ty.getTypeKind() == .Integer and + operand.typeOf().getTypeKind() == .Integer) + { + return self.builder.buildZExtOrBitCast(operand, llvm_dest_ty, ""); + } + if (operand_ty.zigTypeTag() == .Int and inst_ty.isPtrAtRuntime()) { return self.builder.buildIntToPtr(operand, llvm_dest_ty, ""); } @@ -8414,27 +8424,45 @@ pub const FuncGen = struct { const dest_slice = try self.resolveInst(bin_op.lhs); const ptr_ty = self.air.typeOf(bin_op.lhs); const elem_ty = self.air.typeOf(bin_op.rhs); - const target = self.dg.module.getTarget(); - const val_is_undef = if (self.air.value(bin_op.rhs)) |val| val.isUndefDeep() else false; + const module = self.dg.module; + const target = module.getTarget(); const dest_ptr_align = ptr_ty.ptrAlignment(target); const u8_llvm_ty = self.context.intType(8); const dest_ptr = self.sliceOrArrayPtr(dest_slice, ptr_ty); + const is_volatile = ptr_ty.isVolatilePtr(); + + if (self.air.value(bin_op.rhs)) |elem_val| { + if (elem_val.isUndefDeep()) { + // Even if safety is disabled, we still emit a memset to undefined since it conveys + // extra information to LLVM. However, safety makes the difference between using + // 0xaa or actual undefined for the fill byte. + const fill_byte = if (safety) + u8_llvm_ty.constInt(0xaa, .False) + else + u8_llvm_ty.getUndef(); + const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); - if (val_is_undef) { - // Even if safety is disabled, we still emit a memset to undefined since it conveys - // extra information to LLVM. However, safety makes the difference between using - // 0xaa or actual undefined for the fill byte. - const fill_byte = if (safety) - u8_llvm_ty.constInt(0xaa, .False) - else - u8_llvm_ty.getUndef(); - const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); - _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr()); + if (safety and module.comp.bin_file.options.valgrind) { + self.valgrindMarkUndef(dest_ptr, len); + } + return null; + } - if (safety and self.dg.module.comp.bin_file.options.valgrind) { - self.valgrindMarkUndef(dest_ptr, len); + // Test if the element value is compile-time known to be a + // repeating byte pattern, for example, `@as(u64, 0)` has a + // repeating byte pattern of 0 bytes. In such case, the memset + // intrinsic can be used. + var value_buffer: Value.Payload.U64 = undefined; + if (try elem_val.hasRepeatedByteRepr(elem_ty, module, &value_buffer)) |byte_val| { + const fill_byte = try self.resolveValue(.{ + .ty = Type.u8, + .val = byte_val, + }); + const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); + return null; } - return null; } const value = try self.resolveInst(bin_op.rhs); @@ -8442,9 +8470,9 @@ pub const FuncGen = struct { if (elem_abi_size == 1) { // In this case we can take advantage of LLVM's intrinsic. - const fill_byte = self.builder.buildBitCast(value, u8_llvm_ty, ""); + const fill_byte = try self.bitCast(value, elem_ty, Type.u8); const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); - _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr()); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile); return null; } @@ -8486,8 +8514,22 @@ pub const FuncGen = struct { _ = self.builder.buildCondBr(end, body_block, end_block); self.builder.positionBuilderAtEnd(body_block); - const store_inst = self.builder.buildStore(value, it_ptr); - store_inst.setAlignment(@min(elem_ty.abiAlignment(target), dest_ptr_align)); + const elem_abi_alignment = elem_ty.abiAlignment(target); + const it_ptr_alignment = @min(elem_abi_alignment, dest_ptr_align); + if (isByRef(elem_ty)) { + _ = self.builder.buildMemCpy( + it_ptr, + it_ptr_alignment, + value, + elem_abi_alignment, + llvm_usize_ty.constInt(elem_abi_size, .False), + is_volatile, + ); + } else { + const store_inst = self.builder.buildStore(value, it_ptr); + store_inst.setAlignment(it_ptr_alignment); + store_inst.setVolatile(llvm.Bool.fromBool(is_volatile)); + } const one_gep = [_]*llvm.Value{llvm_usize_ty.constInt(1, .False)}; const next_ptr = self.builder.buildInBoundsGEP(elem_llvm_ty, it_ptr, &one_gep, one_gep.len, ""); _ = self.builder.buildBr(loop_block); |
