diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2019-02-01 13:05:34 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-02-01 13:05:34 -0500 |
| commit | bbe857be96084bae6ca1e5f10e35f3631df50edc (patch) | |
| tree | 00b351b140304365570efc4365ba407c82333d07 /std/io.zig | |
| parent | 8d32d256198589eeaccb92892e5b3145c097514c (diff) | |
| parent | 1a8570403f070933842db7739e7139779b7e04a5 (diff) | |
| download | zig-bbe857be96084bae6ca1e5f10e35f3631df50edc.tar.gz zig-bbe857be96084bae6ca1e5f10e35f3631df50edc.zip | |
Merge pull request #1775 from tgschultz/stdlib-serialization
Added serialization, bitstreams, traits for int sign, TagPayloadType, some fixes to std
Diffstat (limited to 'std/io.zig')
| -rw-r--r-- | std/io.zig | 646 |
1 files changed, 641 insertions, 5 deletions
diff --git a/std/io.zig b/std/io.zig index 428d95725d..57f2ef3df5 100644 --- a/std/io.zig +++ b/std/io.zig @@ -8,6 +8,8 @@ const debug = std.debug; const assert = debug.assert; const os = std.os; const mem = std.mem; +const meta = std.meta; +const trait = meta.trait; const Buffer = std.Buffer; const fmt = std.fmt; const File = std.os.File; @@ -463,6 +465,153 @@ pub const SliceInStream = struct { } }; +/// Creates a stream which allows for reading bit fields from another stream +pub fn BitInStream(endian: builtin.Endian, comptime Error: type) type { + return struct { + const Self = @This(); + + in_stream: *Stream, + bit_buffer: u7, + bit_count: u3, + stream: Stream, + + pub const Stream = InStream(Error); + const u8_bit_count = comptime meta.bitCount(u8); + const u7_bit_count = comptime meta.bitCount(u7); + const u4_bit_count = comptime meta.bitCount(u4); + + pub fn init(in_stream: *Stream) Self { + return Self{ + .in_stream = in_stream, + .bit_buffer = 0, + .bit_count = 0, + .stream = Stream{ .readFn = read }, + }; + } + + /// Reads `bits` bits from the stream and returns a specified unsigned int type + /// containing them in the least significant end, returning an error if the + /// specified number of bits could not be read. + pub fn readBitsNoEof(self: *Self, comptime U: type, bits: usize) !U { + var n: usize = undefined; + const result = try self.readBits(U, bits, &n); + if (n < bits) return error.EndOfStream; + return result; + } + + /// Reads `bits` bits from the stream and returns a specified unsigned int type + /// containing them in the least significant end. The number of bits successfully + /// read is placed in `out_bits`, as reaching the end of the stream is not an error. + pub fn readBits(self: *Self, comptime U: type, bits: usize, out_bits: *usize) Error!U { + debug.assert(trait.isUnsignedInt(U)); + + //by extending the buffer to a minimum of u8 we can cover a number of edge cases + // related to shifting and casting. + const u_bit_count = comptime meta.bitCount(U); + const buf_bit_count = bc: { + debug.assert(u_bit_count >= bits); + break :bc if (u_bit_count <= u8_bit_count) u8_bit_count else u_bit_count; + }; + const Buf = @IntType(false, buf_bit_count); + const BufShift = math.Log2Int(Buf); + + out_bits.* = usize(0); + if (U == u0 or bits == 0) return 0; + var out_buffer = Buf(0); + + if (self.bit_count > 0) { + const n = if (self.bit_count >= bits) @intCast(u3, bits) else self.bit_count; + const shift = u7_bit_count - n; + switch (endian) { + builtin.Endian.Big => { + out_buffer = Buf(self.bit_buffer >> shift); + self.bit_buffer <<= n; + }, + builtin.Endian.Little => { + const value = (self.bit_buffer << shift) >> shift; + out_buffer = Buf(value); + self.bit_buffer >>= n; + }, + } + self.bit_count -= n; + out_bits.* = n; + } + //at this point we know bit_buffer is empty + + //copy bytes until we have enough bits, then leave the rest in bit_buffer + while (out_bits.* < bits) { + const n = bits - out_bits.*; + const next_byte = self.in_stream.readByte() catch |err| { + if (err == error.EndOfStream) { + return @intCast(U, out_buffer); + } + //@BUG: See #1810. Not sure if the bug is that I have to do this for some + // streams, or that I don't for streams with emtpy errorsets. + return @errSetCast(Error, err); + }; + + switch (endian) { + builtin.Endian.Big => { + if (n >= u8_bit_count) { + out_buffer <<= @intCast(u3, u8_bit_count - 1); + out_buffer <<= 1; + out_buffer |= Buf(next_byte); + out_bits.* += u8_bit_count; + continue; + } + + const shift = @intCast(u3, u8_bit_count - n); + out_buffer <<= @intCast(BufShift, n); + out_buffer |= Buf(next_byte >> shift); + out_bits.* += n; + self.bit_buffer = @truncate(u7, next_byte << @intCast(u3, n - 1)); + self.bit_count = shift; + }, + builtin.Endian.Little => { + if (n >= u8_bit_count) { + out_buffer |= Buf(next_byte) << @intCast(BufShift, out_bits.*); + out_bits.* += u8_bit_count; + continue; + } + + const shift = @intCast(u3, u8_bit_count - n); + const value = (next_byte << shift) >> shift; + out_buffer |= Buf(value) << @intCast(BufShift, out_bits.*); + out_bits.* += n; + self.bit_buffer = @truncate(u7, next_byte >> @intCast(u3, n)); + self.bit_count = shift; + }, + } + } + + return @intCast(U, out_buffer); + } + + pub fn alignToByte(self: *Self) void { + self.bit_buffer = 0; + self.bit_count = 0; + } + + pub fn read(self_stream: *Stream, buffer: []u8) Error!usize { + var self = @fieldParentPtr(Self, "stream", self_stream); + + var out_bits: usize = undefined; + var out_bits_total = usize(0); + //@NOTE: I'm not sure this is a good idea, maybe alignToByte should be forced + if (self.bit_count > 0) { + for (buffer) |*b, i| { + b.* = try self.readBits(u8, u8_bit_count, &out_bits); + out_bits_total += out_bits; + } + const incomplete_byte = @boolToInt(out_bits_total % u8_bit_count > 0); + return (out_bits_total / u8_bit_count) + incomplete_byte; + } + + return self.in_stream.read(buffer); + } + }; +} + /// This is a simple OutStream that writes to a slice, and returns an error /// when it runs out of space. pub const SliceOutStream = struct { @@ -656,6 +805,137 @@ pub const BufferOutStream = struct { } }; +/// Creates a stream which allows for writing bit fields to another stream +pub fn BitOutStream(endian: builtin.Endian, comptime Error: type) type { + return struct { + const Self = @This(); + + out_stream: *Stream, + bit_buffer: u8, + bit_count: u4, + stream: Stream, + + pub const Stream = OutStream(Error); + const u8_bit_count = comptime meta.bitCount(u8); + const u4_bit_count = comptime meta.bitCount(u4); + + pub fn init(out_stream: *Stream) Self { + return Self{ + .out_stream = out_stream, + .bit_buffer = 0, + .bit_count = 0, + .stream = Stream{ .writeFn = write }, + }; + } + + /// Write the specified number of bits to the stream from the least significant bits of + /// the specified unsigned int value. Bits will only be written to the stream when there + /// are enough to fill a byte. + pub fn writeBits(self: *Self, value: var, bits: usize) Error!void { + if (bits == 0) return; + + const U = @typeOf(value); + debug.assert(trait.isUnsignedInt(U)); + + //by extending the buffer to a minimum of u8 we can cover a number of edge cases + // related to shifting and casting. + const u_bit_count = comptime meta.bitCount(U); + const buf_bit_count = bc: { + debug.assert(u_bit_count >= bits); + break :bc if (u_bit_count <= u8_bit_count) u8_bit_count else u_bit_count; + }; + const Buf = @IntType(false, buf_bit_count); + const BufShift = math.Log2Int(Buf); + + const buf_value = @intCast(Buf, value); + + const high_byte_shift = @intCast(BufShift, buf_bit_count - u8_bit_count); + var in_buffer = switch (endian) { + builtin.Endian.Big => buf_value << @intCast(BufShift, buf_bit_count - bits), + builtin.Endian.Little => buf_value, + }; + var in_bits = bits; + + if (self.bit_count > 0) { + const bits_remaining = u8_bit_count - self.bit_count; + const n = @intCast(u3, if (bits_remaining > bits) bits else bits_remaining); + switch (endian) { + builtin.Endian.Big => { + const shift = @intCast(BufShift, high_byte_shift + self.bit_count); + const v = @intCast(u8, in_buffer >> shift); + self.bit_buffer |= v; + in_buffer <<= n; + }, + builtin.Endian.Little => { + const v = @truncate(u8, in_buffer) << @intCast(u3, self.bit_count); + self.bit_buffer |= v; + in_buffer >>= n; + }, + } + self.bit_count += n; + in_bits -= n; + + //if we didn't fill the buffer, it's because bits < bits_remaining; + if (self.bit_count != u8_bit_count) return; + try self.out_stream.writeByte(self.bit_buffer); + self.bit_buffer = 0; + self.bit_count = 0; + } + //at this point we know bit_buffer is empty + + //copy bytes until we can't fill one anymore, then leave the rest in bit_buffer + while (in_bits >= u8_bit_count) { + switch (endian) { + builtin.Endian.Big => { + const v = @intCast(u8, in_buffer >> high_byte_shift); + try self.out_stream.writeByte(v); + in_buffer <<= @intCast(u3, u8_bit_count - 1); + in_buffer <<= 1; + }, + builtin.Endian.Little => { + const v = @truncate(u8, in_buffer); + try self.out_stream.writeByte(v); + in_buffer >>= @intCast(u3, u8_bit_count - 1); + in_buffer >>= 1; + }, + } + in_bits -= u8_bit_count; + } + + if (in_bits > 0) { + self.bit_count = @intCast(u4, in_bits); + self.bit_buffer = switch (endian) { + builtin.Endian.Big => @truncate(u8, in_buffer >> high_byte_shift), + builtin.Endian.Little => @truncate(u8, in_buffer), + }; + } + } + + /// Flush any remaining bits to the stream. + pub fn flushBits(self: *Self) !void { + if (self.bit_count == 0) return; + try self.out_stream.writeByte(self.bit_buffer); + self.bit_buffer = 0; + self.bit_count = 0; + } + + pub fn write(self_stream: *Stream, buffer: []const u8) Error!void { + var self = @fieldParentPtr(Self, "stream", self_stream); + + //@NOTE: I'm not sure this is a good idea, maybe flushBits should be forced + if (self.bit_count > 0) { + for (buffer) |b, i| + try self.writeBits(b, u8_bit_count); + return; + } + + return self.out_stream.write(buffer); + } + }; +} + + + pub const BufferedAtomicFile = struct { atomic_file: os.AtomicFile, file_stream: os.File.OutStream, @@ -696,11 +976,6 @@ pub const BufferedAtomicFile = struct { } }; -test "import io tests" { - comptime { - _ = @import("io_test.zig"); - } -} pub fn readLine(buf: *std.Buffer) ![]u8 { var stdin = try getStdIn(); @@ -772,3 +1047,364 @@ test "io.readLineSliceFrom" { debug.assert(mem.eql(u8, "Line 1", try readLineSliceFrom(stream, buf[0..]))); debug.assertError(readLineSliceFrom(stream, buf[0..]), error.OutOfMemory); } + +/// Creates a deserializer that deserializes types from any stream. +/// If `is_packed` is true, the data stream is treated as bit-packed, +/// otherwise data is expected to be packed to the smallest byte. +/// Types may implement a custom deserialization routine with a +/// function named `deserialize` in the form of: +/// pub fn deserialize(self: *Self, deserializer: var) !void +/// which will be called when the deserializer is used to deserialize +/// that type. It will pass a pointer to the type instance to deserialize +/// into and a pointer to the deserializer struct. +pub fn Deserializer(endian: builtin.Endian, is_packed: bool, comptime Error: type) type { + return struct { + const Self = @This(); + + in_stream: if (is_packed) BitInStream(endian, Stream.Error) else *Stream, + + pub const Stream = InStream(Error); + + pub fn init(in_stream: *Stream) Self { + return Self{ .in_stream = switch (is_packed) { + true => BitInStream(endian, Stream.Error).init(in_stream), + else => in_stream, + } }; + } + + pub fn alignToByte(self: *Self) void { + if(!is_packed) return; + self.in_stream.alignToByte(); + } + + //@BUG: inferred error issue. See: #1386 + fn deserializeInt(self: *Self, comptime T: type) (Stream.Error || error{EndOfStream})!T { + debug.assert(trait.is(builtin.TypeId.Int)(T) or trait.is(builtin.TypeId.Float)(T)); + + const u8_bit_count = comptime meta.bitCount(u8); + const t_bit_count = comptime meta.bitCount(T); + + const U = @IntType(false, t_bit_count); + const Log2U = math.Log2Int(U); + const int_size = @sizeOf(U); + + if (is_packed) { + const result = try self.in_stream.readBitsNoEof(U, t_bit_count); + return @bitCast(T, result); + } + + var buffer: [int_size]u8 = undefined; + const read_size = try self.in_stream.read(buffer[0..]); + if (read_size < int_size) return error.EndOfStream; + + if (int_size == 1) return @bitCast(T, buffer[0]); + + var result = U(0); + for (buffer) |byte, i| { + switch (endian) { + builtin.Endian.Big => { + result = (result << @intCast(u4, u8_bit_count)) | byte; + }, + builtin.Endian.Little => { + result |= U(byte) << @intCast(Log2U, u8_bit_count * i); + }, + } + } + + return @bitCast(T, result); + } + + //@TODO: Replace this with @unionInit or whatever when it is added + // see: #1315 + fn setTag(ptr: var, tag: var) void { + const T = @typeOf(ptr); + comptime debug.assert(trait.isPtrTo(builtin.TypeId.Union)(T)); + const U = meta.Child(T); + + const info = @typeInfo(U).Union; + if (info.tag_type) |TagType| { + debug.assert(TagType == @typeOf(tag)); + + var ptr_tag = ptr: { + if (@alignOf(TagType) >= @alignOf(U)) break :ptr @ptrCast(*TagType, ptr); + const offset = comptime max: { + var max_field_size: comptime_int = 0; + for (info.fields) |field_info| { + const field_size = @sizeOf(field_info.field_type); + max_field_size = math.max(max_field_size, field_size); + } + break :max math.max(max_field_size, @alignOf(U)); + }; + break :ptr @intToPtr(*TagType, @ptrToInt(ptr) + offset); + }; + ptr_tag.* = tag; + } + } + + /// Deserializes and returns data of the specified type from the stream + pub fn deserialize(self: *Self, comptime T: type) !T { + var value: T = undefined; + try self.deserializeInto(&value); + return value; + } + + /// Deserializes data into the type pointed to by `ptr` + pub fn deserializeInto(self: *Self, ptr: var) !void { + const T = @typeOf(ptr); + debug.assert(trait.is(builtin.TypeId.Pointer)(T)); + + if (comptime trait.isSlice(T) or comptime trait.isPtrTo(builtin.TypeId.Array)(T)) { + for (ptr) |*v| + try self.deserializeInto(v); + return; + } + + comptime debug.assert(trait.isSingleItemPtr(T)); + + const C = comptime meta.Child(T); + const child_type_id = @typeId(C); + + //custom deserializer: fn(self: *Self, deserializer: var) !void + if (comptime trait.hasFn("deserialize")(C)) return C.deserialize(ptr, self); + + if (comptime trait.isPacked(C) and !is_packed) { + var packed_deserializer = Deserializer(endian, true, Error).init(self.in_stream); + return packed_deserializer.deserializeInto(ptr); + } + + switch (child_type_id) { + builtin.TypeId.Void => return, + builtin.TypeId.Bool => ptr.* = (try self.deserializeInt(u1)) > 0, + builtin.TypeId.Float, builtin.TypeId.Int => ptr.* = try self.deserializeInt(C), + builtin.TypeId.Struct => { + const info = @typeInfo(C).Struct; + + inline for (info.fields) |*field_info| { + const name = field_info.name; + const FieldType = field_info.field_type; + + if (FieldType == void or FieldType == u0) continue; + + //it doesn't make any sense to read pointers + if (comptime trait.is(builtin.TypeId.Pointer)(FieldType)) { + @compileError("Will not " ++ "read field " ++ name ++ " of struct " ++ + @typeName(C) ++ " because it " ++ "is of pointer-type " ++ + @typeName(FieldType) ++ "."); + } + + try self.deserializeInto(&@field(ptr, name)); + } + }, + builtin.TypeId.Union => { + const info = @typeInfo(C).Union; + if (info.tag_type) |TagType| { + //we avoid duplicate iteration over the enum tags + // by getting the int directly and casting it without + // safety. If it is bad, it will be caught anyway. + const TagInt = @TagType(TagType); + const tag = try self.deserializeInt(TagInt); + + { + @setRuntimeSafety(false); + //See: #1315 + setTag(ptr, @intToEnum(TagType, tag)); + } + + inline for (info.fields) |field_info| { + if (field_info.enum_field.?.value == tag) { + const name = field_info.name; + const FieldType = field_info.field_type; + @field(ptr, name) = FieldType(undefined); + try self.deserializeInto(&@field(ptr, name)); + return; + } + } + //This is reachable if the enum data is bad + return error.InvalidEnumTag; + } + @compileError("Cannot meaningfully deserialize " ++ @typeName(C) ++ + " because it is an untagged union Use a custom deserialize()."); + }, + builtin.TypeId.Optional => { + const OC = comptime meta.Child(C); + const exists = (try self.deserializeInt(u1)) > 0; + if (!exists) { + ptr.* = null; + return; + } + + //The way non-pointer optionals are implemented ensures a pointer to them + // will point to the value. The flag is stored at the end of that data. + var val_ptr = @ptrCast(*OC, ptr); + try self.deserializeInto(val_ptr); + //This bit ensures the null flag isn't set. Any actual copying should be + // optimized out... I hope. + ptr.* = val_ptr.*; + }, + builtin.TypeId.Enum => { + var value = try self.deserializeInt(@TagType(C)); + ptr.* = try meta.intToEnum(C, value); + }, + else => { + @compileError("Cannot deserialize " ++ @tagName(child_type_id) ++ " types (unimplemented)."); + }, + } + } + }; +} + +/// Creates a serializer that serializes types to any stream. +/// If `is_packed` is true, the data will be bit-packed into the stream. +/// Note that the you must call `serializer.flush()` when you are done +/// writing bit-packed data in order ensure any unwritten bits are committed. +/// If `is_packed` is false, data is packed to the smallest byte. In the case +/// of packed structs, the struct will written bit-packed and with the specified +/// endianess, after which data will resume being written at the next byte boundary. +/// Types may implement a custom serialization routine with a +/// function named `serialize` in the form of: +/// pub fn serialize(self: Self, serializer: var) !void +/// which will be called when the serializer is used to serialize that type. It will +/// pass a const pointer to the type instance to be serialized and a pointer +/// to the serializer struct. +pub fn Serializer(endian: builtin.Endian, is_packed: bool, comptime Error: type) type { + return struct { + const Self = @This(); + + out_stream: if (is_packed) BitOutStream(endian, Stream.Error) else *Stream, + + pub const Stream = OutStream(Error); + + pub fn init(out_stream: *Stream) Self { + return Self{ .out_stream = switch (is_packed) { + true => BitOutStream(endian, Stream.Error).init(out_stream), + else => out_stream, + } }; + } + + /// Flushes any unwritten bits to the stream + pub fn flush(self: *Self) Stream.Error!void { + if (is_packed) return self.out_stream.flushBits(); + } + + fn serializeInt(self: *Self, value: var) !void { + const T = @typeOf(value); + debug.assert(trait.is(builtin.TypeId.Int)(T) or trait.is(builtin.TypeId.Float)(T)); + + const t_bit_count = comptime meta.bitCount(T); + const u8_bit_count = comptime meta.bitCount(u8); + + const U = @IntType(false, t_bit_count); + const Log2U = math.Log2Int(U); + const int_size = @sizeOf(U); + + const u_value = @bitCast(U, value); + + if (is_packed) return self.out_stream.writeBits(u_value, t_bit_count); + + var buffer: [int_size]u8 = undefined; + if (int_size == 1) buffer[0] = u_value; + + for (buffer) |*byte, i| { + const idx = switch (endian) { + builtin.Endian.Big => int_size - i - 1, + builtin.Endian.Little => i, + }; + const shift = @intCast(Log2U, idx * u8_bit_count); + const v = u_value >> shift; + byte.* = if (t_bit_count < u8_bit_count) v else @truncate(u8, v); + } + + try self.out_stream.write(buffer); + } + + /// Serializes the passed value into the stream + pub fn serialize(self: *Self, value: var) !void { + const T = comptime @typeOf(value); + + if (comptime trait.isIndexable(T)) { + for (value) |v| + try self.serialize(v); + return; + } + + //custom serializer: fn(self: Self, serializer: var) !void + if (comptime trait.hasFn("serialize")(T)) return T.serialize(value, self); + + if (comptime trait.isPacked(T) and !is_packed) { + var packed_serializer = Serializer(endian, true, Error).init(self.out_stream); + try packed_serializer.serialize(value); + try packed_serializer.flush(); + return; + } + + switch (@typeId(T)) { + builtin.TypeId.Void => return, + builtin.TypeId.Bool => try self.serializeInt(u1(@boolToInt(value))), + builtin.TypeId.Float, builtin.TypeId.Int => try self.serializeInt(value), + builtin.TypeId.Struct => { + const info = @typeInfo(T); + + inline for (info.Struct.fields) |*field_info| { + const name = field_info.name; + const FieldType = field_info.field_type; + + if (FieldType == void or FieldType == u0) continue; + + //It doesn't make sense to write pointers + if (comptime trait.is(builtin.TypeId.Pointer)(FieldType)) { + @compileError("Will not " ++ "serialize field " ++ name ++ + " of struct " ++ @typeName(T) ++ " because it " ++ + "is of pointer-type " ++ @typeName(FieldType) ++ "."); + } + try self.serialize(@field(value, name)); + } + }, + builtin.TypeId.Union => { + const info = @typeInfo(T).Union; + if (info.tag_type) |TagType| { + const active_tag = meta.activeTag(value); + try self.serialize(active_tag); + //This inline loop is necessary because active_tag is a runtime + // value, but @field requires a comptime value. Our alternative + // is to check each field for a match + inline for (info.fields) |field_info| { + if (field_info.enum_field.?.value == @enumToInt(active_tag)) { + const name = field_info.name; + const FieldType = field_info.field_type; + try self.serialize(@field(value, name)); + return; + } + } + unreachable; + } + @compileError("Cannot meaningfully serialize " ++ @typeName(T) ++ + " because it is an untagged union Use a custom serialize()."); + }, + builtin.TypeId.Optional => { + if (value == null) { + try self.serializeInt(u1(@boolToInt(false))); + return; + } + try self.serializeInt(u1(@boolToInt(true))); + + const OC = comptime meta.Child(T); + + //The way non-pointer optionals are implemented ensures a pointer to them + // will point to the value. The flag is stored at the end of that data. + var val_ptr = @ptrCast(*const OC, &value); + try self.serialize(val_ptr.*); + }, + builtin.TypeId.Enum => { + try self.serializeInt(@enumToInt(value)); + }, + else => @compileError("Cannot serialize " ++ @tagName(@typeId(T)) ++ " types (unimplemented)."), + } + } + }; +} + +test "import io tests" { + comptime { + _ = @import("io_test.zig"); + } +} |
