aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuuk de Gram <luuk@degram.dev>2022-11-28 19:21:21 +0100
committerLuuk de Gram <luuk@degram.dev>2022-11-30 17:56:02 +0100
commit6924f21bbd81683b0889994ce86aa0ae22e5b317 (patch)
treea78d5cf5efbe86a6f21856ab9cdbb1a5113b5c49 /src
parenta7ad1212cb1b127754c7e48ead8d80d66f0f6623 (diff)
downloadzig-6924f21bbd81683b0889994ce86aa0ae22e5b317.tar.gz
zig-6924f21bbd81683b0889994ce86aa0ae22e5b317.zip
wasm: support non-natural alignment in load/store
This implements support for loading and storing where the lhs is of pointer type with host_size != 0. e.g. when loading a specific field from a packed struct with a non-byte alignment such as (0:1:3).
Diffstat (limited to 'src')
-rw-r--r--src/arch/wasm/CodeGen.zig222
1 files changed, 144 insertions, 78 deletions
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 9aeaed521d..98dd47b122 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -1492,12 +1492,15 @@ fn memcpy(func: *CodeGen, dst: WValue, src: WValue, len: WValue) !void {
// when the length is comptime-known, rather than a runtime value, we can optimize the generated code by having
// the loop during codegen, rather than inserting a runtime loop into the binary.
switch (len) {
- .imm32, .imm64 => {
+ .imm32, .imm64 => blk: {
const length = switch (len) {
.imm32 => |val| val,
.imm64 => |val| val,
else => unreachable,
};
+ // if the size (length) is more than 1024 bytes, we use a runtime loop instead to prevent
+ // binary size bloat.
+ if (length > 1024) break :blk;
var offset: u32 = 0;
const lhs_base = dst.offset();
const rhs_base = src.offset();
@@ -1518,80 +1521,81 @@ fn memcpy(func: *CodeGen, dst: WValue, src: WValue, len: WValue) !void {
else => unreachable,
}
}
+ return;
},
- else => {
- // TODO: We should probably lower this to a call to compiler_rt
- // But for now, we implement it manually
- var offset = try func.ensureAllocLocal(Type.usize); // local for counter
- defer offset.free(func);
+ else => {},
+ }
- // outer block to jump to when loop is done
- try func.startBlock(.block, wasm.block_empty);
- try func.startBlock(.loop, wasm.block_empty);
+ // TODO: We should probably lower this to a call to compiler_rt
+ // But for now, we implement it manually
+ var offset = try func.ensureAllocLocal(Type.usize); // local for counter
+ defer offset.free(func);
- // loop condition (offset == length -> break)
- {
- try func.emitWValue(offset);
- try func.emitWValue(len);
- switch (func.arch()) {
- .wasm32 => try func.addTag(.i32_eq),
- .wasm64 => try func.addTag(.i64_eq),
- else => unreachable,
- }
- try func.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
- }
+ // outer block to jump to when loop is done
+ try func.startBlock(.block, wasm.block_empty);
+ try func.startBlock(.loop, wasm.block_empty);
- // get dst ptr
- {
- try func.emitWValue(dst);
- try func.emitWValue(offset);
- switch (func.arch()) {
- .wasm32 => try func.addTag(.i32_add),
- .wasm64 => try func.addTag(.i64_add),
- else => unreachable,
- }
- }
+ // loop condition (offset == length -> break)
+ {
+ try func.emitWValue(offset);
+ try func.emitWValue(len);
+ switch (func.arch()) {
+ .wasm32 => try func.addTag(.i32_eq),
+ .wasm64 => try func.addTag(.i64_eq),
+ else => unreachable,
+ }
+ try func.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
+ }
- // get src value and also store in dst
- {
- try func.emitWValue(src);
- try func.emitWValue(offset);
- switch (func.arch()) {
- .wasm32 => {
- try func.addTag(.i32_add);
- try func.addMemArg(.i32_load8_u, .{ .offset = src.offset(), .alignment = 1 });
- try func.addMemArg(.i32_store8, .{ .offset = dst.offset(), .alignment = 1 });
- },
- .wasm64 => {
- try func.addTag(.i64_add);
- try func.addMemArg(.i64_load8_u, .{ .offset = src.offset(), .alignment = 1 });
- try func.addMemArg(.i64_store8, .{ .offset = dst.offset(), .alignment = 1 });
- },
- else => unreachable,
- }
- }
+ // get dst ptr
+ {
+ try func.emitWValue(dst);
+ try func.emitWValue(offset);
+ switch (func.arch()) {
+ .wasm32 => try func.addTag(.i32_add),
+ .wasm64 => try func.addTag(.i64_add),
+ else => unreachable,
+ }
+ }
- // increment loop counter
- {
- try func.emitWValue(offset);
- switch (func.arch()) {
- .wasm32 => {
- try func.addImm32(1);
- try func.addTag(.i32_add);
- },
- .wasm64 => {
- try func.addImm64(1);
- try func.addTag(.i64_add);
- },
- else => unreachable,
- }
- try func.addLabel(.local_set, offset.local.value);
- try func.addLabel(.br, 0); // jump to start of loop
- }
- try func.endBlock(); // close off loop block
- try func.endBlock(); // close off outer block
- },
+ // get src value and also store in dst
+ {
+ try func.emitWValue(src);
+ try func.emitWValue(offset);
+ switch (func.arch()) {
+ .wasm32 => {
+ try func.addTag(.i32_add);
+ try func.addMemArg(.i32_load8_u, .{ .offset = src.offset(), .alignment = 1 });
+ try func.addMemArg(.i32_store8, .{ .offset = dst.offset(), .alignment = 1 });
+ },
+ .wasm64 => {
+ try func.addTag(.i64_add);
+ try func.addMemArg(.i64_load8_u, .{ .offset = src.offset(), .alignment = 1 });
+ try func.addMemArg(.i64_store8, .{ .offset = dst.offset(), .alignment = 1 });
+ },
+ else => unreachable,
+ }
}
+
+ // increment loop counter
+ {
+ try func.emitWValue(offset);
+ switch (func.arch()) {
+ .wasm32 => {
+ try func.addImm32(1);
+ try func.addTag(.i32_add);
+ },
+ .wasm64 => {
+ try func.addImm64(1);
+ try func.addTag(.i64_add);
+ },
+ else => unreachable,
+ }
+ try func.addLabel(.local_set, offset.local.value);
+ try func.addLabel(.br, 0); // jump to start of loop
+ }
+ try func.endBlock(); // close off loop block
+ try func.endBlock(); // close off outer block
}
fn ptrSize(func: *const CodeGen) u16 {
@@ -2128,9 +2132,45 @@ fn airStore(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
const lhs = try func.resolveInst(bin_op.lhs);
const rhs = try func.resolveInst(bin_op.rhs);
- const ty = func.air.typeOf(bin_op.lhs).childType();
+ const ptr_ty = func.air.typeOf(bin_op.lhs);
+ const ptr_info = ptr_ty.ptrInfo().data;
+ const ty = ptr_ty.childType();
+ if (ptr_info.host_size == 0) {
+ try func.store(lhs, rhs, ty, 0);
+ } else {
+ // at this point we have a non-natural alignment, we must
+ // load the value, and then shift+or the rhs into the result location.
+ var int_ty_payload: Type.Payload.Bits = .{
+ .base = .{ .tag = .int_unsigned },
+ .data = ptr_info.host_size * 8,
+ };
+ const int_elem_ty = Type.initPayload(&int_ty_payload.base);
+
+ var mask = @intCast(u64, (@as(u65, 1) << @intCast(u7, ty.bitSize(func.target))) - 1);
+ mask <<= @intCast(u6, ptr_info.bit_offset);
+ mask ^= ~@as(u64, 0);
+ const shift_val = if (ptr_info.host_size <= 4)
+ WValue{ .imm32 = ptr_info.bit_offset }
+ else
+ WValue{ .imm64 = ptr_info.bit_offset };
+ const mask_val = if (ptr_info.host_size <= 4)
+ WValue{ .imm32 = @truncate(u32, mask) }
+ else
+ WValue{ .imm64 = mask };
+
+ try func.emitWValue(lhs);
+ const loaded = try func.load(lhs, int_elem_ty, 0);
+ const anded = try func.binOp(loaded, mask_val, int_elem_ty, .@"and");
+ const extended_value = try func.intcast(rhs, ty, int_elem_ty);
+ const shifted_value = if (ptr_info.bit_offset > 0) shifted: {
+ break :shifted try func.binOp(extended_value, shift_val, int_elem_ty, .shl);
+ } else extended_value;
+ const result = try func.binOp(anded, shifted_value, int_elem_ty, .@"or");
+ std.debug.print("Host: {} ty {} ty {}\n", .{ ptr_info.host_size, int_elem_ty.fmtDebug(), ty.fmtDebug() });
+ // lhs is still on the stack
+ try func.store(.stack, result, int_elem_ty, 0);
+ }
- try func.store(lhs, rhs, ty, 0);
func.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
}
@@ -2218,6 +2258,8 @@ fn airLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
const ty_op = func.air.instructions.items(.data)[inst].ty_op;
const operand = try func.resolveInst(ty_op.operand);
const ty = func.air.getRefType(ty_op.ty);
+ const ptr_ty = func.air.typeOf(ty_op.operand);
+ const ptr_info = ptr_ty.ptrInfo().data;
if (!ty.hasRuntimeBitsIgnoreComptime()) return func.finishAir(inst, .none, &.{ty_op.operand});
@@ -2228,8 +2270,28 @@ fn airLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
break :result new_local;
}
- const stack_loaded = try func.load(operand, ty, 0);
- break :result try stack_loaded.toLocal(func, ty);
+ if (ptr_info.host_size == 0) {
+ const stack_loaded = try func.load(operand, ty, 0);
+ break :result try stack_loaded.toLocal(func, ty);
+ }
+
+ // at this point we have a non-natural alignment, we must
+ // shift the value to obtain the correct bit.
+ var int_ty_payload: Type.Payload.Bits = .{
+ .base = .{ .tag = .int_unsigned },
+ .data = ptr_info.host_size * 8,
+ };
+ const int_elem_ty = Type.initPayload(&int_ty_payload.base);
+ const shift_val = if (ptr_info.host_size <= 4)
+ WValue{ .imm32 = ptr_info.bit_offset }
+ else
+ WValue{ .imm64 = ptr_info.bit_offset };
+
+ const stack_loaded = try func.load(operand, int_elem_ty, 0);
+ const shifted = try func.binOp(stack_loaded, shift_val, int_elem_ty, .shr);
+ const result = try func.trunc(shifted, ty, int_elem_ty);
+ // const wrapped = try func.wrapOperand(shifted, ty);
+ break :result try result.toLocal(func, ty);
};
func.finishAir(inst, result, &.{ty_op.operand});
}
@@ -3151,7 +3213,7 @@ fn airStructFieldPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
const struct_ptr = try func.resolveInst(extra.data.struct_operand);
const struct_ty = func.air.typeOf(extra.data.struct_operand).childType();
- const result = try func.structFieldPtr(struct_ptr, struct_ty, extra.data.field_index);
+ const result = try func.structFieldPtr(extra.data.struct_operand, struct_ptr, struct_ty, extra.data.field_index);
func.finishAir(inst, result, &.{extra.data.struct_operand});
}
@@ -3161,11 +3223,11 @@ fn airStructFieldPtrIndex(func: *CodeGen, inst: Air.Inst.Index, index: u32) Inne
const struct_ptr = try func.resolveInst(ty_op.operand);
const struct_ty = func.air.typeOf(ty_op.operand).childType();
- const result = try func.structFieldPtr(struct_ptr, struct_ty, index);
+ const result = try func.structFieldPtr(ty_op.operand, struct_ptr, struct_ty, index);
func.finishAir(inst, result, &.{ty_op.operand});
}
-fn structFieldPtr(func: *CodeGen, struct_ptr: WValue, struct_ty: Type, index: u32) InnerError!WValue {
+fn structFieldPtr(func: *CodeGen, ref: Air.Inst.Ref, struct_ptr: WValue, struct_ty: Type, index: u32) InnerError!WValue {
const offset = switch (struct_ty.containerLayout()) {
.Packed => switch (struct_ty.zigTypeTag()) {
.Struct => struct_ty.packedStructFieldByteOffset(index, func.target),
@@ -3174,6 +3236,10 @@ fn structFieldPtr(func: *CodeGen, struct_ptr: WValue, struct_ty: Type, index: u3
},
else => struct_ty.structFieldOffset(index, func.target),
};
+ // save a load and store when we can simply reuse the operand
+ if (offset == 0) {
+ return func.reuseOperand(ref, struct_ptr);
+ }
switch (struct_ptr) {
.stack_offset => |stack_offset| {
return WValue{ .stack_offset = .{ .value = stack_offset.value + @intCast(u32, offset), .references = 1 } };
@@ -3893,9 +3959,9 @@ fn airTrunc(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
/// Truncates a given operand to a given type, discarding any overflown bits.
/// NOTE: Resulting value is left on the stack.
fn trunc(func: *CodeGen, operand: WValue, wanted_ty: Type, given_ty: Type) InnerError!WValue {
- const int_info = given_ty.intInfo(func.target);
- if (toWasmBits(int_info.bits) == null) {
- return func.fail("TODO: Implement wasm integer truncation for integer bitsize: {d}", .{int_info.bits});
+ const given_bits = @intCast(u16, given_ty.bitSize(func.target));
+ if (toWasmBits(given_bits) == null) {
+ return func.fail("TODO: Implement wasm integer truncation for integer bitsize: {d}", .{given_bits});
}
var result = try func.intcast(operand, given_ty, wanted_ty);