diff options
| author | Luuk de Gram <luuk@degram.dev> | 2022-11-28 19:21:21 +0100 |
|---|---|---|
| committer | Luuk de Gram <luuk@degram.dev> | 2022-11-30 17:56:02 +0100 |
| commit | 6924f21bbd81683b0889994ce86aa0ae22e5b317 (patch) | |
| tree | a78d5cf5efbe86a6f21856ab9cdbb1a5113b5c49 /src | |
| parent | a7ad1212cb1b127754c7e48ead8d80d66f0f6623 (diff) | |
| download | zig-6924f21bbd81683b0889994ce86aa0ae22e5b317.tar.gz zig-6924f21bbd81683b0889994ce86aa0ae22e5b317.zip | |
wasm: support non-natural alignment in load/store
This implements support for loading and storing where the lhs is
of pointer type with host_size != 0. e.g. when loading a specific
field from a packed struct with a non-byte alignment such as (0:1:3).
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/wasm/CodeGen.zig | 222 |
1 files changed, 144 insertions, 78 deletions
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 9aeaed521d..98dd47b122 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -1492,12 +1492,15 @@ fn memcpy(func: *CodeGen, dst: WValue, src: WValue, len: WValue) !void { // when the length is comptime-known, rather than a runtime value, we can optimize the generated code by having // the loop during codegen, rather than inserting a runtime loop into the binary. switch (len) { - .imm32, .imm64 => { + .imm32, .imm64 => blk: { const length = switch (len) { .imm32 => |val| val, .imm64 => |val| val, else => unreachable, }; + // if the size (length) is more than 1024 bytes, we use a runtime loop instead to prevent + // binary size bloat. + if (length > 1024) break :blk; var offset: u32 = 0; const lhs_base = dst.offset(); const rhs_base = src.offset(); @@ -1518,80 +1521,81 @@ fn memcpy(func: *CodeGen, dst: WValue, src: WValue, len: WValue) !void { else => unreachable, } } + return; }, - else => { - // TODO: We should probably lower this to a call to compiler_rt - // But for now, we implement it manually - var offset = try func.ensureAllocLocal(Type.usize); // local for counter - defer offset.free(func); + else => {}, + } - // outer block to jump to when loop is done - try func.startBlock(.block, wasm.block_empty); - try func.startBlock(.loop, wasm.block_empty); + // TODO: We should probably lower this to a call to compiler_rt + // But for now, we implement it manually + var offset = try func.ensureAllocLocal(Type.usize); // local for counter + defer offset.free(func); - // loop condition (offset == length -> break) - { - try func.emitWValue(offset); - try func.emitWValue(len); - switch (func.arch()) { - .wasm32 => try func.addTag(.i32_eq), - .wasm64 => try func.addTag(.i64_eq), - else => unreachable, - } - try func.addLabel(.br_if, 1); // jump out of loop into outer block (finished) - } + // outer block to jump to when loop is done + try func.startBlock(.block, wasm.block_empty); + try func.startBlock(.loop, wasm.block_empty); - // get dst ptr - { - try func.emitWValue(dst); - try func.emitWValue(offset); - switch (func.arch()) { - .wasm32 => try func.addTag(.i32_add), - .wasm64 => try func.addTag(.i64_add), - else => unreachable, - } - } + // loop condition (offset == length -> break) + { + try func.emitWValue(offset); + try func.emitWValue(len); + switch (func.arch()) { + .wasm32 => try func.addTag(.i32_eq), + .wasm64 => try func.addTag(.i64_eq), + else => unreachable, + } + try func.addLabel(.br_if, 1); // jump out of loop into outer block (finished) + } - // get src value and also store in dst - { - try func.emitWValue(src); - try func.emitWValue(offset); - switch (func.arch()) { - .wasm32 => { - try func.addTag(.i32_add); - try func.addMemArg(.i32_load8_u, .{ .offset = src.offset(), .alignment = 1 }); - try func.addMemArg(.i32_store8, .{ .offset = dst.offset(), .alignment = 1 }); - }, - .wasm64 => { - try func.addTag(.i64_add); - try func.addMemArg(.i64_load8_u, .{ .offset = src.offset(), .alignment = 1 }); - try func.addMemArg(.i64_store8, .{ .offset = dst.offset(), .alignment = 1 }); - }, - else => unreachable, - } - } + // get dst ptr + { + try func.emitWValue(dst); + try func.emitWValue(offset); + switch (func.arch()) { + .wasm32 => try func.addTag(.i32_add), + .wasm64 => try func.addTag(.i64_add), + else => unreachable, + } + } - // increment loop counter - { - try func.emitWValue(offset); - switch (func.arch()) { - .wasm32 => { - try func.addImm32(1); - try func.addTag(.i32_add); - }, - .wasm64 => { - try func.addImm64(1); - try func.addTag(.i64_add); - }, - else => unreachable, - } - try func.addLabel(.local_set, offset.local.value); - try func.addLabel(.br, 0); // jump to start of loop - } - try func.endBlock(); // close off loop block - try func.endBlock(); // close off outer block - }, + // get src value and also store in dst + { + try func.emitWValue(src); + try func.emitWValue(offset); + switch (func.arch()) { + .wasm32 => { + try func.addTag(.i32_add); + try func.addMemArg(.i32_load8_u, .{ .offset = src.offset(), .alignment = 1 }); + try func.addMemArg(.i32_store8, .{ .offset = dst.offset(), .alignment = 1 }); + }, + .wasm64 => { + try func.addTag(.i64_add); + try func.addMemArg(.i64_load8_u, .{ .offset = src.offset(), .alignment = 1 }); + try func.addMemArg(.i64_store8, .{ .offset = dst.offset(), .alignment = 1 }); + }, + else => unreachable, + } } + + // increment loop counter + { + try func.emitWValue(offset); + switch (func.arch()) { + .wasm32 => { + try func.addImm32(1); + try func.addTag(.i32_add); + }, + .wasm64 => { + try func.addImm64(1); + try func.addTag(.i64_add); + }, + else => unreachable, + } + try func.addLabel(.local_set, offset.local.value); + try func.addLabel(.br, 0); // jump to start of loop + } + try func.endBlock(); // close off loop block + try func.endBlock(); // close off outer block } fn ptrSize(func: *const CodeGen) u16 { @@ -2128,9 +2132,45 @@ fn airStore(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const lhs = try func.resolveInst(bin_op.lhs); const rhs = try func.resolveInst(bin_op.rhs); - const ty = func.air.typeOf(bin_op.lhs).childType(); + const ptr_ty = func.air.typeOf(bin_op.lhs); + const ptr_info = ptr_ty.ptrInfo().data; + const ty = ptr_ty.childType(); + if (ptr_info.host_size == 0) { + try func.store(lhs, rhs, ty, 0); + } else { + // at this point we have a non-natural alignment, we must + // load the value, and then shift+or the rhs into the result location. + var int_ty_payload: Type.Payload.Bits = .{ + .base = .{ .tag = .int_unsigned }, + .data = ptr_info.host_size * 8, + }; + const int_elem_ty = Type.initPayload(&int_ty_payload.base); + + var mask = @intCast(u64, (@as(u65, 1) << @intCast(u7, ty.bitSize(func.target))) - 1); + mask <<= @intCast(u6, ptr_info.bit_offset); + mask ^= ~@as(u64, 0); + const shift_val = if (ptr_info.host_size <= 4) + WValue{ .imm32 = ptr_info.bit_offset } + else + WValue{ .imm64 = ptr_info.bit_offset }; + const mask_val = if (ptr_info.host_size <= 4) + WValue{ .imm32 = @truncate(u32, mask) } + else + WValue{ .imm64 = mask }; + + try func.emitWValue(lhs); + const loaded = try func.load(lhs, int_elem_ty, 0); + const anded = try func.binOp(loaded, mask_val, int_elem_ty, .@"and"); + const extended_value = try func.intcast(rhs, ty, int_elem_ty); + const shifted_value = if (ptr_info.bit_offset > 0) shifted: { + break :shifted try func.binOp(extended_value, shift_val, int_elem_ty, .shl); + } else extended_value; + const result = try func.binOp(anded, shifted_value, int_elem_ty, .@"or"); + std.debug.print("Host: {} ty {} ty {}\n", .{ ptr_info.host_size, int_elem_ty.fmtDebug(), ty.fmtDebug() }); + // lhs is still on the stack + try func.store(.stack, result, int_elem_ty, 0); + } - try func.store(lhs, rhs, ty, 0); func.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); } @@ -2218,6 +2258,8 @@ fn airLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); const ty = func.air.getRefType(ty_op.ty); + const ptr_ty = func.air.typeOf(ty_op.operand); + const ptr_info = ptr_ty.ptrInfo().data; if (!ty.hasRuntimeBitsIgnoreComptime()) return func.finishAir(inst, .none, &.{ty_op.operand}); @@ -2228,8 +2270,28 @@ fn airLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { break :result new_local; } - const stack_loaded = try func.load(operand, ty, 0); - break :result try stack_loaded.toLocal(func, ty); + if (ptr_info.host_size == 0) { + const stack_loaded = try func.load(operand, ty, 0); + break :result try stack_loaded.toLocal(func, ty); + } + + // at this point we have a non-natural alignment, we must + // shift the value to obtain the correct bit. + var int_ty_payload: Type.Payload.Bits = .{ + .base = .{ .tag = .int_unsigned }, + .data = ptr_info.host_size * 8, + }; + const int_elem_ty = Type.initPayload(&int_ty_payload.base); + const shift_val = if (ptr_info.host_size <= 4) + WValue{ .imm32 = ptr_info.bit_offset } + else + WValue{ .imm64 = ptr_info.bit_offset }; + + const stack_loaded = try func.load(operand, int_elem_ty, 0); + const shifted = try func.binOp(stack_loaded, shift_val, int_elem_ty, .shr); + const result = try func.trunc(shifted, ty, int_elem_ty); + // const wrapped = try func.wrapOperand(shifted, ty); + break :result try result.toLocal(func, ty); }; func.finishAir(inst, result, &.{ty_op.operand}); } @@ -3151,7 +3213,7 @@ fn airStructFieldPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const struct_ptr = try func.resolveInst(extra.data.struct_operand); const struct_ty = func.air.typeOf(extra.data.struct_operand).childType(); - const result = try func.structFieldPtr(struct_ptr, struct_ty, extra.data.field_index); + const result = try func.structFieldPtr(extra.data.struct_operand, struct_ptr, struct_ty, extra.data.field_index); func.finishAir(inst, result, &.{extra.data.struct_operand}); } @@ -3161,11 +3223,11 @@ fn airStructFieldPtrIndex(func: *CodeGen, inst: Air.Inst.Index, index: u32) Inne const struct_ptr = try func.resolveInst(ty_op.operand); const struct_ty = func.air.typeOf(ty_op.operand).childType(); - const result = try func.structFieldPtr(struct_ptr, struct_ty, index); + const result = try func.structFieldPtr(ty_op.operand, struct_ptr, struct_ty, index); func.finishAir(inst, result, &.{ty_op.operand}); } -fn structFieldPtr(func: *CodeGen, struct_ptr: WValue, struct_ty: Type, index: u32) InnerError!WValue { +fn structFieldPtr(func: *CodeGen, ref: Air.Inst.Ref, struct_ptr: WValue, struct_ty: Type, index: u32) InnerError!WValue { const offset = switch (struct_ty.containerLayout()) { .Packed => switch (struct_ty.zigTypeTag()) { .Struct => struct_ty.packedStructFieldByteOffset(index, func.target), @@ -3174,6 +3236,10 @@ fn structFieldPtr(func: *CodeGen, struct_ptr: WValue, struct_ty: Type, index: u3 }, else => struct_ty.structFieldOffset(index, func.target), }; + // save a load and store when we can simply reuse the operand + if (offset == 0) { + return func.reuseOperand(ref, struct_ptr); + } switch (struct_ptr) { .stack_offset => |stack_offset| { return WValue{ .stack_offset = .{ .value = stack_offset.value + @intCast(u32, offset), .references = 1 } }; @@ -3893,9 +3959,9 @@ fn airTrunc(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { /// Truncates a given operand to a given type, discarding any overflown bits. /// NOTE: Resulting value is left on the stack. fn trunc(func: *CodeGen, operand: WValue, wanted_ty: Type, given_ty: Type) InnerError!WValue { - const int_info = given_ty.intInfo(func.target); - if (toWasmBits(int_info.bits) == null) { - return func.fail("TODO: Implement wasm integer truncation for integer bitsize: {d}", .{int_info.bits}); + const given_bits = @intCast(u16, given_ty.bitSize(func.target)); + if (toWasmBits(given_bits) == null) { + return func.fail("TODO: Implement wasm integer truncation for integer bitsize: {d}", .{given_bits}); } var result = try func.intcast(operand, given_ty, wanted_ty); |
