diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2023-04-26 10:01:54 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-26 10:01:54 -0700 |
| commit | 3c66850e4296ce2e0f9e0d25bc537aa489f4603e (patch) | |
| tree | ae4b78d3e1ee15253ee353a8c9d972a1034f6fc6 /src/codegen/llvm.zig | |
| parent | d0311e28b397d173f0d60c403985047ec952a172 (diff) | |
| parent | badad16f88ac7e1eb84eadf76e13b4dc346d4ced (diff) | |
| download | zig-3c66850e4296ce2e0f9e0d25bc537aa489f4603e.tar.gz zig-3c66850e4296ce2e0f9e0d25bc537aa489f4603e.zip | |
Merge pull request #15278 from ziglang/memcpy-memset
change semantics of `@memcpy` and `@memset`
Diffstat (limited to 'src/codegen/llvm.zig')
| -rw-r--r-- | src/codegen/llvm.zig | 251 |
1 files changed, 180 insertions, 71 deletions
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 76dabd3e9b..94f49e801d 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -4649,7 +4649,8 @@ pub const FuncGen = struct { .not => try self.airNot(inst), .ret => try self.airRet(inst), .ret_load => try self.airRetLoad(inst), - .store => try self.airStore(inst), + .store => try self.airStore(inst, false), + .store_safe => try self.airStore(inst, true), .assembly => try self.airAssembly(inst), .slice_ptr => try self.airSliceField(inst, 0), .slice_len => try self.airSliceField(inst, 1), @@ -4672,7 +4673,8 @@ pub const FuncGen = struct { .fence => try self.airFence(inst), .atomic_rmw => try self.airAtomicRmw(inst), .atomic_load => try self.airAtomicLoad(inst), - .memset => try self.airMemset(inst), + .memset => try self.airMemset(inst, false), + .memset_safe => try self.airMemset(inst, true), .memcpy => try self.airMemcpy(inst), .set_union_tag => try self.airSetUnionTag(inst), .get_union_tag => try self.airGetUnionTag(inst), @@ -5776,6 +5778,36 @@ pub const FuncGen = struct { return result; } + fn sliceOrArrayPtr(fg: *FuncGen, ptr: *llvm.Value, ty: Type) *llvm.Value { + if (ty.isSlice()) { + return fg.builder.buildExtractValue(ptr, 0, ""); + } else { + return ptr; + } + } + + fn sliceOrArrayLenInBytes(fg: *FuncGen, ptr: *llvm.Value, ty: Type) *llvm.Value { + const target = fg.dg.module.getTarget(); + const llvm_usize_ty = fg.context.intType(target.cpu.arch.ptrBitWidth()); + switch (ty.ptrSize()) { + .Slice => { + const len = fg.builder.buildExtractValue(ptr, 1, ""); + const elem_ty = ty.childType(); + const abi_size = elem_ty.abiSize(target); + if (abi_size == 1) return len; + const abi_size_llvm_val = llvm_usize_ty.constInt(abi_size, .False); + return fg.builder.buildMul(len, abi_size_llvm_val, ""); + }, + .One => { + const array_ty = ty.childType(); + const elem_ty = array_ty.childType(); + const abi_size = elem_ty.abiSize(target); + return llvm_usize_ty.constInt(array_ty.arrayLen() * abi_size, .False); + }, + .Many, .C => unreachable, + } + } + fn airSliceField(self: *FuncGen, inst: Air.Inst.Index, index: c_uint) !?*llvm.Value { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand = try self.resolveInst(ty_op.operand); @@ -7261,39 +7293,53 @@ pub const FuncGen = struct { fn airPtrAdd(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - const base_ptr = try self.resolveInst(bin_op.lhs); + const ptr = try self.resolveInst(bin_op.lhs); const offset = try self.resolveInst(bin_op.rhs); const ptr_ty = self.air.typeOf(bin_op.lhs); const llvm_elem_ty = try self.dg.lowerPtrElemTy(ptr_ty.childType()); - if (ptr_ty.ptrSize() == .One) { - // It's a pointer to an array, so according to LLVM we need an extra GEP index. - const indices: [2]*llvm.Value = .{ - self.context.intType(32).constNull(), offset, - }; - return self.builder.buildInBoundsGEP(llvm_elem_ty, base_ptr, &indices, indices.len, ""); - } else { - const indices: [1]*llvm.Value = .{offset}; - return self.builder.buildInBoundsGEP(llvm_elem_ty, base_ptr, &indices, indices.len, ""); + switch (ptr_ty.ptrSize()) { + .One => { + // It's a pointer to an array, so according to LLVM we need an extra GEP index. + const indices: [2]*llvm.Value = .{ self.context.intType(32).constNull(), offset }; + return self.builder.buildInBoundsGEP(llvm_elem_ty, ptr, &indices, indices.len, ""); + }, + .C, .Many => { + const indices: [1]*llvm.Value = .{offset}; + return self.builder.buildInBoundsGEP(llvm_elem_ty, ptr, &indices, indices.len, ""); + }, + .Slice => { + const base = self.builder.buildExtractValue(ptr, 0, ""); + const indices: [1]*llvm.Value = .{offset}; + return self.builder.buildInBoundsGEP(llvm_elem_ty, base, &indices, indices.len, ""); + }, } } fn airPtrSub(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - const base_ptr = try self.resolveInst(bin_op.lhs); + const ptr = try self.resolveInst(bin_op.lhs); const offset = try self.resolveInst(bin_op.rhs); const negative_offset = self.builder.buildNeg(offset, ""); const ptr_ty = self.air.typeOf(bin_op.lhs); const llvm_elem_ty = try self.dg.lowerPtrElemTy(ptr_ty.childType()); - if (ptr_ty.ptrSize() == .One) { - // It's a pointer to an array, so according to LLVM we need an extra GEP index. - const indices: [2]*llvm.Value = .{ - self.context.intType(32).constNull(), negative_offset, - }; - return self.builder.buildInBoundsGEP(llvm_elem_ty, base_ptr, &indices, indices.len, ""); - } else { - const indices: [1]*llvm.Value = .{negative_offset}; - return self.builder.buildInBoundsGEP(llvm_elem_ty, base_ptr, &indices, indices.len, ""); + switch (ptr_ty.ptrSize()) { + .One => { + // It's a pointer to an array, so according to LLVM we need an extra GEP index. + const indices: [2]*llvm.Value = .{ + self.context.intType(32).constNull(), negative_offset, + }; + return self.builder.buildInBoundsGEP(llvm_elem_ty, ptr, &indices, indices.len, ""); + }, + .C, .Many => { + const indices: [1]*llvm.Value = .{negative_offset}; + return self.builder.buildInBoundsGEP(llvm_elem_ty, ptr, &indices, indices.len, ""); + }, + .Slice => { + const base = self.builder.buildExtractValue(ptr, 0, ""); + const indices: [1]*llvm.Value = .{negative_offset}; + return self.builder.buildInBoundsGEP(llvm_elem_ty, base, &indices, indices.len, ""); + }, } } @@ -7887,8 +7933,10 @@ pub const FuncGen = struct { fn airPtrToInt(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { const un_op = self.air.instructions.items(.data)[inst].un_op; const operand = try self.resolveInst(un_op); + const ptr_ty = self.air.typeOf(un_op); + const operand_ptr = self.sliceOrArrayPtr(operand, ptr_ty); const dest_llvm_ty = try self.dg.lowerType(self.air.typeOfIndex(inst)); - return self.builder.buildPtrToInt(operand, dest_llvm_ty, ""); + return self.builder.buildPtrToInt(operand_ptr, dest_llvm_ty, ""); } fn airBitCast(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { @@ -8082,48 +8130,36 @@ pub const FuncGen = struct { return buildAllocaInner(self.context, self.builder, self.llvm_func, self.di_scope != null, llvm_ty, alignment, self.dg.module.getTarget()); } - fn airStore(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { + fn airStore(self: *FuncGen, inst: Air.Inst.Index, safety: bool) !?*llvm.Value { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const dest_ptr = try self.resolveInst(bin_op.lhs); const ptr_ty = self.air.typeOf(bin_op.lhs); const operand_ty = ptr_ty.childType(); - // TODO Sema should emit a different instruction when the store should - // possibly do the safety 0xaa bytes for undefined. const val_is_undef = if (self.air.value(bin_op.rhs)) |val| val.isUndefDeep() else false; if (val_is_undef) { - { - // TODO let's handle this in AIR rather than by having each backend - // check the optimization mode of the compilation because the plan is - // to support setting the optimization mode at finer grained scopes - // which happens in Sema. Codegen should not be aware of this logic. - // I think this comment is basically the same as the other TODO comment just - // above but I'm leaving them both here to make it look super messy and - // thereby bait contributors (or let's be honest, probably myself) into - // fixing this instead of letting it rot. - const safety = switch (self.dg.module.comp.bin_file.options.optimize_mode) { - .ReleaseSmall, .ReleaseFast => false, - .Debug, .ReleaseSafe => true, - }; - if (!safety) { - return null; - } - } + // Even if safety is disabled, we still emit a memset to undefined since it conveys + // extra information to LLVM. However, safety makes the difference between using + // 0xaa or actual undefined for the fill byte. + const u8_llvm_ty = self.context.intType(8); + const fill_byte = if (safety) + u8_llvm_ty.constInt(0xaa, .False) + else + u8_llvm_ty.getUndef(); const target = self.dg.module.getTarget(); const operand_size = operand_ty.abiSize(target); - const u8_llvm_ty = self.context.intType(8); - const fill_char = u8_llvm_ty.constInt(0xaa, .False); - const dest_ptr_align = ptr_ty.ptrAlignment(target); const usize_llvm_ty = try self.dg.lowerType(Type.usize); const len = usize_llvm_ty.constInt(operand_size, .False); - _ = self.builder.buildMemSet(dest_ptr, fill_char, len, dest_ptr_align, ptr_ty.isVolatilePtr()); - if (self.dg.module.comp.bin_file.options.valgrind) { + const dest_ptr_align = ptr_ty.ptrAlignment(target); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr()); + if (safety and self.dg.module.comp.bin_file.options.valgrind) { self.valgrindMarkUndef(dest_ptr, len); } - } else { - const src_operand = try self.resolveInst(bin_op.rhs); - try self.store(dest_ptr, ptr_ty, src_operand, .NotAtomic); + return null; } + + const src_operand = try self.resolveInst(bin_op.rhs); + try self.store(dest_ptr, ptr_ty, src_operand, .NotAtomic); return null; } @@ -8373,34 +8409,107 @@ pub const FuncGen = struct { return null; } - fn airMemset(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { - const pl_op = self.air.instructions.items(.data)[inst].pl_op; - const extra = self.air.extraData(Air.Bin, pl_op.payload).data; - const dest_ptr = try self.resolveInst(pl_op.operand); - const ptr_ty = self.air.typeOf(pl_op.operand); - const value = try self.resolveInst(extra.lhs); - const val_is_undef = if (self.air.value(extra.lhs)) |val| val.isUndefDeep() else false; - const len = try self.resolveInst(extra.rhs); - const u8_llvm_ty = self.context.intType(8); - const fill_char = if (val_is_undef) u8_llvm_ty.constInt(0xaa, .False) else value; + fn airMemset(self: *FuncGen, inst: Air.Inst.Index, safety: bool) !?*llvm.Value { + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const dest_slice = try self.resolveInst(bin_op.lhs); + const ptr_ty = self.air.typeOf(bin_op.lhs); + const elem_ty = self.air.typeOf(bin_op.rhs); const target = self.dg.module.getTarget(); + const val_is_undef = if (self.air.value(bin_op.rhs)) |val| val.isUndefDeep() else false; const dest_ptr_align = ptr_ty.ptrAlignment(target); - _ = self.builder.buildMemSet(dest_ptr, fill_char, len, dest_ptr_align, ptr_ty.isVolatilePtr()); + const u8_llvm_ty = self.context.intType(8); + const dest_ptr = self.sliceOrArrayPtr(dest_slice, ptr_ty); + + if (val_is_undef) { + // Even if safety is disabled, we still emit a memset to undefined since it conveys + // extra information to LLVM. However, safety makes the difference between using + // 0xaa or actual undefined for the fill byte. + const fill_byte = if (safety) + u8_llvm_ty.constInt(0xaa, .False) + else + u8_llvm_ty.getUndef(); + const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr()); + + if (safety and self.dg.module.comp.bin_file.options.valgrind) { + self.valgrindMarkUndef(dest_ptr, len); + } + return null; + } + + const value = try self.resolveInst(bin_op.rhs); + const elem_abi_size = elem_ty.abiSize(target); - if (val_is_undef and self.dg.module.comp.bin_file.options.valgrind) { - self.valgrindMarkUndef(dest_ptr, len); + if (elem_abi_size == 1) { + // In this case we can take advantage of LLVM's intrinsic. + const fill_byte = self.builder.buildBitCast(value, u8_llvm_ty, ""); + const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty); + _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, ptr_ty.isVolatilePtr()); + return null; } + + // non-byte-sized element. lower with a loop. something like this: + + // entry: + // ... + // %end_ptr = getelementptr %ptr, %len + // br loop + // loop: + // %it_ptr = phi body %next_ptr, entry %ptr + // %end = cmp eq %it_ptr, %end_ptr + // cond_br %end body, end + // body: + // store %it_ptr, %value + // %next_ptr = getelementptr %it_ptr, 1 + // br loop + // end: + // ... + const entry_block = self.builder.getInsertBlock(); + const loop_block = self.context.appendBasicBlock(self.llvm_func, "InlineMemsetLoop"); + const body_block = self.context.appendBasicBlock(self.llvm_func, "InlineMemsetBody"); + const end_block = self.context.appendBasicBlock(self.llvm_func, "InlineMemsetEnd"); + + const llvm_usize_ty = self.context.intType(target.cpu.arch.ptrBitWidth()); + const len = switch (ptr_ty.ptrSize()) { + .Slice => self.builder.buildExtractValue(dest_slice, 1, ""), + .One => llvm_usize_ty.constInt(ptr_ty.childType().arrayLen(), .False), + .Many, .C => unreachable, + }; + const elem_llvm_ty = try self.dg.lowerType(elem_ty); + const len_gep = [_]*llvm.Value{len}; + const end_ptr = self.builder.buildInBoundsGEP(elem_llvm_ty, dest_ptr, &len_gep, len_gep.len, ""); + _ = self.builder.buildBr(loop_block); + + self.builder.positionBuilderAtEnd(loop_block); + const it_ptr = self.builder.buildPhi(self.context.pointerType(0), ""); + const end = self.builder.buildICmp(.NE, it_ptr, end_ptr, ""); + _ = self.builder.buildCondBr(end, body_block, end_block); + + self.builder.positionBuilderAtEnd(body_block); + const store_inst = self.builder.buildStore(value, it_ptr); + store_inst.setAlignment(@min(elem_ty.abiAlignment(target), dest_ptr_align)); + const one_gep = [_]*llvm.Value{llvm_usize_ty.constInt(1, .False)}; + const next_ptr = self.builder.buildInBoundsGEP(elem_llvm_ty, it_ptr, &one_gep, one_gep.len, ""); + _ = self.builder.buildBr(loop_block); + + self.builder.positionBuilderAtEnd(end_block); + + const incoming_values: [2]*llvm.Value = .{ next_ptr, dest_ptr }; + const incoming_blocks: [2]*llvm.BasicBlock = .{ body_block, entry_block }; + it_ptr.addIncoming(&incoming_values, &incoming_blocks, 2); + return null; } fn airMemcpy(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { - const pl_op = self.air.instructions.items(.data)[inst].pl_op; - const extra = self.air.extraData(Air.Bin, pl_op.payload).data; - const dest_ptr = try self.resolveInst(pl_op.operand); - const dest_ptr_ty = self.air.typeOf(pl_op.operand); - const src_ptr = try self.resolveInst(extra.lhs); - const src_ptr_ty = self.air.typeOf(extra.lhs); - const len = try self.resolveInst(extra.rhs); + const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const dest_slice = try self.resolveInst(bin_op.lhs); + const dest_ptr_ty = self.air.typeOf(bin_op.lhs); + const src_slice = try self.resolveInst(bin_op.rhs); + const src_ptr_ty = self.air.typeOf(bin_op.rhs); + const src_ptr = self.sliceOrArrayPtr(src_slice, src_ptr_ty); + const len = self.sliceOrArrayLenInBytes(dest_slice, dest_ptr_ty); + const dest_ptr = self.sliceOrArrayPtr(dest_slice, dest_ptr_ty); const is_volatile = src_ptr_ty.isVolatilePtr() or dest_ptr_ty.isVolatilePtr(); const target = self.dg.module.getTarget(); _ = self.builder.buildMemCpy( |
