diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2025-08-25 18:03:48 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2025-08-26 21:00:58 -0700 |
| commit | 58e60697e2930f4311ae9e744ae1c2877e0b69ed (patch) | |
| tree | fd84142e826d2252f23eaae9b002ae0d3f43e341 /lib/std | |
| parent | 6464e0d4fc9937e154c34567891bae84c63732b9 (diff) | |
| download | zig-58e60697e2930f4311ae9e744ae1c2877e0b69ed.tar.gz zig-58e60697e2930f4311ae9e744ae1c2877e0b69ed.zip | |
std.compress.lzma: update for new I/O API
Diffstat (limited to 'lib/std')
| -rw-r--r-- | lib/std/compress/lzma.zig | 446 | ||||
| -rw-r--r-- | lib/std/compress/lzma/test.zig | 26 | ||||
| -rw-r--r-- | lib/std/compress/lzma2.zig | 83 | ||||
| -rw-r--r-- | lib/std/compress/xz.zig | 366 | ||||
| -rw-r--r-- | lib/std/compress/xz/Decompress.zig | 288 |
5 files changed, 556 insertions, 653 deletions
diff --git a/lib/std/compress/lzma.zig b/lib/std/compress/lzma.zig index c40eeeb56e..597e97a94f 100644 --- a/lib/std/compress/lzma.zig +++ b/lib/std/compress/lzma.zig @@ -4,49 +4,34 @@ const mem = std.mem; const Allocator = std.mem.Allocator; const assert = std.debug.assert; const ArrayList = std.ArrayList; +const Writer = std.Io.Writer; +const Reader = std.Io.Reader; pub const RangeDecoder = struct { range: u32, code: u32, - pub fn init(reader: anytype) !RangeDecoder { - const reserved = try reader.readByte(); - if (reserved != 0) { - return error.CorruptInput; - } - return RangeDecoder{ - .range = 0xFFFF_FFFF, - .code = try reader.readInt(u32, .big), - }; - } - - pub fn fromParts( - range: u32, - code: u32, - ) RangeDecoder { + pub fn init(reader: *Reader) !RangeDecoder { + const reserved = try reader.takeByte(); + if (reserved != 0) return error.InvalidRangeCode; return .{ - .range = range, - .code = code, + .range = 0xFFFF_FFFF, + .code = try reader.takeInt(u32, .big), }; } - pub fn set(self: *RangeDecoder, range: u32, code: u32) void { - self.range = range; - self.code = code; - } - - pub inline fn isFinished(self: RangeDecoder) bool { + pub fn isFinished(self: RangeDecoder) bool { return self.code == 0; } - inline fn normalize(self: *RangeDecoder, reader: anytype) !void { + fn normalize(self: *RangeDecoder, reader: *Reader) !void { if (self.range < 0x0100_0000) { self.range <<= 8; - self.code = (self.code << 8) ^ @as(u32, try reader.readByte()); + self.code = (self.code << 8) ^ @as(u32, try reader.takeByte()); } } - inline fn getBit(self: *RangeDecoder, reader: anytype) !bool { + fn getBit(self: *RangeDecoder, reader: *Reader) !bool { self.range >>= 1; const bit = self.code >= self.range; @@ -57,7 +42,7 @@ pub const RangeDecoder = struct { return bit; } - pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 { + pub fn get(self: *RangeDecoder, reader: *Reader, count: usize) !u32 { var result: u32 = 0; var i: usize = 0; while (i < count) : (i += 1) @@ -65,7 +50,7 @@ pub const RangeDecoder = struct { return result; } - pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool { + pub fn decodeBit(self: *RangeDecoder, reader: *Reader, prob: *u16, update: bool) !bool { const bound = (self.range >> 11) * prob.*; if (self.code < bound) { @@ -88,7 +73,7 @@ pub const RangeDecoder = struct { fn parseBitTree( self: *RangeDecoder, - reader: anytype, + reader: *Reader, num_bits: u5, probs: []u16, update: bool, @@ -104,7 +89,7 @@ pub const RangeDecoder = struct { pub fn parseReverseBitTree( self: *RangeDecoder, - reader: anytype, + reader: *Reader, num_bits: u5, probs: []u16, offset: usize, @@ -123,7 +108,7 @@ pub const RangeDecoder = struct { }; pub const Decode = struct { - lzma_props: Properties, + properties: Properties, unpacked_size: ?u64, literal_probs: Vec2d, pos_slot_decoder: [4]BitTree(6), @@ -141,14 +126,14 @@ pub const Decode = struct { rep_len_decoder: LenDecoder, pub fn init( - allocator: Allocator, - lzma_props: Properties, + gpa: Allocator, + properties: Properties, unpacked_size: ?u64, ) !Decode { return .{ - .lzma_props = lzma_props, + .properties = properties, .unpacked_size = unpacked_size, - .literal_probs = try Vec2d.init(allocator, 0x400, .{ @as(usize, 1) << (lzma_props.lc + lzma_props.lp), 0x300 }), + .literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (properties.lc + properties.lp), 0x300 }), .pos_slot_decoder = @splat(.{}), .align_decoder = .{}, .pos_decoders = @splat(0x400), @@ -165,21 +150,21 @@ pub const Decode = struct { }; } - pub fn deinit(self: *Decode, allocator: Allocator) void { - self.literal_probs.deinit(allocator); + pub fn deinit(self: *Decode, gpa: Allocator) void { + self.literal_probs.deinit(gpa); self.* = undefined; } - pub fn resetState(self: *Decode, allocator: Allocator, new_props: Properties) !void { + pub fn resetState(self: *Decode, gpa: Allocator, new_props: Properties) !void { new_props.validate(); - if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) { + if (self.properties.lc + self.properties.lp == new_props.lc + new_props.lp) { self.literal_probs.fill(0x400); } else { - self.literal_probs.deinit(allocator); - self.literal_probs = try Vec2d.init(allocator, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 }); + self.literal_probs.deinit(gpa); + self.literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 }); } - self.lzma_props = new_props; + self.properties = new_props; for (&self.pos_slot_decoder) |*t| t.reset(); self.align_decoder.reset(); self.pos_decoders = @splat(0x400); @@ -195,26 +180,23 @@ pub const Decode = struct { self.rep_len_decoder.reset(); } - fn processNextInner( + fn processNext( self: *Decode, - allocator: Allocator, - reader: anytype, - writer: anytype, - buffer: anytype, + reader: *Reader, + allocating: *Writer.Allocating, + buffer: *CircularBuffer, decoder: *RangeDecoder, update: bool, ) !ProcessingStatus { - const pos_state = buffer.len & ((@as(usize, 1) << self.lzma_props.pb) - 1); + const gpa = allocating.allocator; + const writer = &allocating.writer; + const pos_state = buffer.len & ((@as(usize, 1) << self.properties.pb) - 1); - if (!try decoder.decodeBit( - reader, - &self.is_match[(self.state << 4) + pos_state], - update, - )) { + if (!try decoder.decodeBit(reader, &self.is_match[(self.state << 4) + pos_state], update)) { const byte: u8 = try self.decodeLiteral(reader, buffer, decoder, update); if (update) { - try buffer.appendLiteral(allocator, byte, writer); + try buffer.appendLiteral(gpa, byte, writer); self.state = if (self.state < 4) 0 @@ -223,7 +205,7 @@ pub const Decode = struct { else self.state - 6; } - return .continue_; + return .more; } var len: usize = undefined; @@ -237,9 +219,9 @@ pub const Decode = struct { if (update) { self.state = if (self.state < 7) 9 else 11; const dist = self.rep[0] + 1; - try buffer.appendLz(allocator, 1, dist, writer); + try buffer.appendLz(gpa, 1, dist, writer); } - return .continue_; + return .more; } } else { const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state], update)) @@ -293,31 +275,19 @@ pub const Decode = struct { len += 2; const dist = self.rep[0] + 1; - try buffer.appendLz(allocator, len, dist, writer); + try buffer.appendLz(gpa, len, dist, writer); } - return .continue_; - } - - fn processNext( - self: *Decode, - allocator: Allocator, - reader: anytype, - writer: anytype, - buffer: anytype, - decoder: *RangeDecoder, - ) !ProcessingStatus { - return self.processNextInner(allocator, reader, writer, buffer, decoder, true); + return .more; } pub fn process( self: *Decode, - allocator: Allocator, - reader: anytype, - writer: anytype, - buffer: anytype, + reader: *Reader, + allocating: *Writer.Allocating, + buffer: *CircularBuffer, decoder: *RangeDecoder, - ) !ProcessingStatus { + ) !void { process_next: { if (self.unpacked_size) |unpacked_size| { if (buffer.len >= unpacked_size) { @@ -326,26 +296,24 @@ pub const Decode = struct { } else if (decoder.isFinished()) { break :process_next; } - - switch (try self.processNext(allocator, reader, writer, buffer, decoder)) { - .continue_ => return .continue_, - .finished => break :process_next, + switch (try self.processNext(reader, allocating, buffer, decoder, true)) { + .more => return, + .finished => {}, } } if (self.unpacked_size) |unpacked_size| { - if (buffer.len != unpacked_size) { - return error.CorruptInput; - } + if (buffer.len != unpacked_size) return error.DecompressedSizeMismatch; } - return .finished; + try buffer.finish(&allocating.writer); + self.state = math.maxInt(usize); } fn decodeLiteral( self: *Decode, - reader: anytype, - buffer: anytype, + reader: *Reader, + buffer: *CircularBuffer, decoder: *RangeDecoder, update: bool, ) !u8 { @@ -353,9 +321,9 @@ pub const Decode = struct { const prev_byte = @as(usize, buffer.lastOr(def_prev_byte)); var result: usize = 1; - const lit_state = ((buffer.len & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) + - (prev_byte >> (8 - self.lzma_props.lc)); - const probs = try self.literal_probs.getMut(lit_state); + const lit_state = ((buffer.len & ((@as(usize, 1) << self.properties.lp) - 1)) << self.properties.lc) + + (prev_byte >> (8 - self.properties.lc)); + const probs = try self.literal_probs.get(lit_state); if (self.state >= 7) { var match_byte = @as(usize, try buffer.lastN(self.rep[0] + 1)); @@ -384,7 +352,7 @@ pub const Decode = struct { fn decodeDistance( self: *Decode, - reader: anytype, + reader: *Reader, decoder: *RangeDecoder, length: usize, update: bool, @@ -415,46 +383,40 @@ pub const Decode = struct { } /// A circular buffer for LZ sequences - pub const LzCircularBuffer = struct { + pub const CircularBuffer = struct { /// Circular buffer buf: ArrayList(u8), - /// Length of the buffer dict_size: usize, - /// Buffer memory limit - memlimit: usize, - + mem_limit: usize, /// Current position cursor: usize, - /// Total number of bytes sent through the buffer len: usize, - const Self = @This(); - - pub fn init(dict_size: usize, memlimit: usize) Self { - return Self{ + pub fn init(dict_size: usize, mem_limit: usize) CircularBuffer { + return .{ .buf = .{}, .dict_size = dict_size, - .memlimit = memlimit, + .mem_limit = mem_limit, .cursor = 0, .len = 0, }; } - pub fn get(self: Self, index: usize) u8 { + pub fn get(self: CircularBuffer, index: usize) u8 { return if (0 <= index and index < self.buf.items.len) self.buf.items[index] else 0; } - pub fn set(self: *Self, allocator: Allocator, index: usize, value: u8) !void { - if (index >= self.memlimit) { + pub fn set(self: *CircularBuffer, gpa: Allocator, index: usize, value: u8) !void { + if (index >= self.mem_limit) { return error.CorruptInput; } - try self.buf.ensureTotalCapacity(allocator, index + 1); + try self.buf.ensureTotalCapacity(gpa, index + 1); while (self.buf.items.len < index) { self.buf.appendAssumeCapacity(0); } @@ -462,7 +424,7 @@ pub const Decode = struct { } /// Retrieve the last byte or return a default - pub fn lastOr(self: Self, lit: u8) u8 { + pub fn lastOr(self: CircularBuffer, lit: u8) u8 { return if (self.len == 0) lit else @@ -470,7 +432,7 @@ pub const Decode = struct { } /// Retrieve the n-th last byte - pub fn lastN(self: Self, dist: usize) !u8 { + pub fn lastN(self: CircularBuffer, dist: usize) !u8 { if (dist > self.dict_size or dist > self.len) { return error.CorruptInput; } @@ -481,12 +443,12 @@ pub const Decode = struct { /// Append a literal pub fn appendLiteral( - self: *Self, - allocator: Allocator, + self: *CircularBuffer, + gpa: Allocator, lit: u8, - writer: anytype, + writer: *Writer, ) !void { - try self.set(allocator, self.cursor, lit); + try self.set(gpa, self.cursor, lit); self.cursor += 1; self.len += 1; @@ -499,11 +461,11 @@ pub const Decode = struct { /// Fetch an LZ sequence (length, distance) from inside the buffer pub fn appendLz( - self: *Self, - allocator: Allocator, + self: *CircularBuffer, + gpa: Allocator, len: usize, dist: usize, - writer: anytype, + writer: *Writer, ) !void { if (dist > self.dict_size or dist > self.len) { return error.CorruptInput; @@ -513,7 +475,7 @@ pub const Decode = struct { var i: usize = 0; while (i < len) : (i += 1) { const x = self.get(offset); - try self.appendLiteral(allocator, x, writer); + try self.appendLiteral(gpa, x, writer); offset += 1; if (offset == self.dict_size) { offset = 0; @@ -521,15 +483,15 @@ pub const Decode = struct { } } - pub fn finish(self: *Self, writer: anytype) !void { + pub fn finish(self: *CircularBuffer, writer: *Writer) !void { if (self.cursor > 0) { try writer.writeAll(self.buf.items[0..self.cursor]); self.cursor = 0; } } - pub fn deinit(self: *Self, allocator: Allocator) void { - self.buf.deinit(allocator); + pub fn deinit(self: *CircularBuffer, gpa: Allocator) void { + self.buf.deinit(gpa); self.* = undefined; } }; @@ -538,11 +500,9 @@ pub const Decode = struct { return struct { probs: [1 << num_bits]u16 = @splat(0x400), - const Self = @This(); - pub fn parse( - self: *Self, - reader: anytype, + self: *@This(), + reader: *Reader, decoder: *RangeDecoder, update: bool, ) !u32 { @@ -550,15 +510,15 @@ pub const Decode = struct { } pub fn parseReverse( - self: *Self, - reader: anytype, + self: *@This(), + reader: *Reader, decoder: *RangeDecoder, update: bool, ) !u32 { return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update); } - pub fn reset(self: *Self) void { + pub fn reset(self: *@This()) void { @memset(&self.probs, 0x400); } }; @@ -573,7 +533,7 @@ pub const Decode = struct { pub fn decode( self: *LenDecoder, - reader: anytype, + reader: *Reader, decoder: *RangeDecoder, pos_state: usize, update: bool, @@ -600,45 +560,35 @@ pub const Decode = struct { data: []u16, cols: usize, - const Self = @This(); - - pub fn init(allocator: Allocator, value: u16, size: struct { usize, usize }) !Self { + pub fn init(gpa: Allocator, value: u16, size: struct { usize, usize }) !Vec2d { const len = try math.mul(usize, size[0], size[1]); - const data = try allocator.alloc(u16, len); + const data = try gpa.alloc(u16, len); @memset(data, value); - return Self{ + return .{ .data = data, .cols = size[1], }; } - pub fn deinit(self: *Self, allocator: Allocator) void { - allocator.free(self.data); + pub fn deinit(self: *Vec2d, gpa: Allocator) void { + gpa.free(self.data); self.* = undefined; } - pub fn fill(self: *Self, value: u16) void { + pub fn fill(self: *Vec2d, value: u16) void { @memset(self.data, value); } - inline fn _get(self: Self, row: usize) ![]u16 { + fn get(self: Vec2d, row: usize) ![]u16 { const start_row = try math.mul(usize, row, self.cols); const end_row = try math.add(usize, start_row, self.cols); return self.data[start_row..end_row]; } - - pub fn get(self: Self, row: usize) ![]const u16 { - return self._get(row); - } - - pub fn getMut(self: *Self, row: usize) ![]u16 { - return self._get(row); - } }; pub const Options = struct { unpacked_size: UnpackedSize = .read_from_header, - memlimit: ?usize = null, + mem_limit: ?usize = null, allow_incomplete: bool = false, }; @@ -649,7 +599,7 @@ pub const Decode = struct { }; const ProcessingStatus = enum { - continue_, + more, finished, }; @@ -670,39 +620,34 @@ pub const Decode = struct { dict_size: u32, unpacked_size: ?u64, - pub fn readHeader(reader: anytype, options: Options) !Params { - var props = try reader.readByte(); - if (props >= 225) { - return error.CorruptInput; - } + pub fn readHeader(reader: *Reader, options: Options) !Params { + var props = try reader.takeByte(); + if (props >= 225) return error.CorruptInput; - const lc = @as(u4, @intCast(props % 9)); + const lc: u4 = @intCast(props % 9); props /= 9; - const lp = @as(u3, @intCast(props % 5)); + const lp: u3 = @intCast(props % 5); props /= 5; - const pb = @as(u3, @intCast(props)); + const pb: u3 = @intCast(props); - const dict_size_provided = try reader.readInt(u32, .little); + const dict_size_provided = try reader.takeInt(u32, .little); const dict_size = @max(0x1000, dict_size_provided); const unpacked_size = switch (options.unpacked_size) { .read_from_header => blk: { - const unpacked_size_provided = try reader.readInt(u64, .little); + const unpacked_size_provided = try reader.takeInt(u64, .little); const marker_mandatory = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF; - break :blk if (marker_mandatory) - null - else - unpacked_size_provided; + break :blk if (marker_mandatory) null else unpacked_size_provided; }, .read_header_but_use_provided => |x| blk: { - _ = try reader.readInt(u64, .little); + _ = try reader.takeInt(u64, .little); break :blk x; }, .use_provided => |x| x, }; - return Params{ - .properties = Properties{ .lc = lc, .lp = lp, .pb = pb }, + return .{ + .properties = .{ .lc = lc, .lp = lp, .pb = pb }, .dict_size = dict_size, .unpacked_size = unpacked_size, }; @@ -710,84 +655,121 @@ pub const Decode = struct { }; }; -pub fn decompress( - allocator: Allocator, - reader: anytype, -) !Decompress(@TypeOf(reader)) { - return decompressWithOptions(allocator, reader, .{}); -} - -pub fn decompressWithOptions( - allocator: Allocator, - reader: anytype, - options: Decode.Options, -) !Decompress(@TypeOf(reader)) { - const params = try Decode.Params.readHeader(reader, options); - return Decompress(@TypeOf(reader)).init(allocator, reader, params, options.memlimit); -} - -pub fn Decompress(comptime ReaderType: type) type { - return struct { - const Self = @This(); - - pub const Error = - ReaderType.Error || - Allocator.Error || - error{ CorruptInput, EndOfStream, Overflow }; - - pub const Reader = std.io.GenericReader(*Self, Error, read); +pub const Decompress = struct { + gpa: Allocator, + input: *Reader, + reader: Reader, + buffer: Decode.CircularBuffer, + range_decoder: RangeDecoder, + decode: Decode, + err: ?Error, + + pub const Error = error{ + OutOfMemory, + ReadFailed, + CorruptInput, + DecompressedSizeMismatch, + EndOfStream, + Overflow, + }; - allocator: Allocator, - in_reader: ReaderType, - to_read: std.ArrayListUnmanaged(u8), + /// Takes ownership of `buffer` which may be resized with `gpa`. + /// + /// LZMA was explicitly designed to take advantage of large heap memory + /// being available, with a dictionary size anywhere from 4K to 4G. Thus, + /// this API dynamically allocates the dictionary as-needed. + pub fn initParams( + input: *Reader, + gpa: Allocator, + buffer: []u8, + params: Decode.Params, + mem_limit: usize, + ) !Decompress { + return .{ + .gpa = gpa, + .input = input, + .buffer = Decode.CircularBuffer.init(params.dict_size, mem_limit), + .range_decoder = try RangeDecoder.init(input), + .decode = try Decode.init(gpa, params.properties, params.unpacked_size), + .reader = .{ + .buffer = buffer, + .vtable = &.{ + .readVec = readVec, + .stream = stream, + }, + .seek = 0, + .end = 0, + }, + .err = null, + }; + } - buffer: Decode.LzCircularBuffer, - decoder: RangeDecoder, - state: Decode, + /// Takes ownership of `buffer` which may be resized with `gpa`. + /// + /// LZMA was explicitly designed to take advantage of large heap memory + /// being available, with a dictionary size anywhere from 4K to 4G. Thus, + /// this API dynamically allocates the dictionary as-needed. + pub fn initOptions( + input: *Reader, + gpa: Allocator, + buffer: []u8, + options: Decode.Options, + mem_limit: usize, + ) !Decompress { + const params = try Decode.Params.readHeader(input, options); + return initParams(input, gpa, buffer, params, mem_limit); + } - pub fn init(allocator: Allocator, source: ReaderType, params: Decode.Params, memlimit: ?usize) !Self { - return Self{ - .allocator = allocator, - .in_reader = source, - .to_read = .{}, + /// Reclaim ownership of the buffer passed to `init`. + pub fn takeBuffer(d: *Decompress) []u8 { + const buffer = d.reader.buffer; + d.reader.buffer = &.{}; + return buffer; + } - .buffer = Decode.LzCircularBuffer.init(params.dict_size, memlimit orelse math.maxInt(usize)), - .decoder = try RangeDecoder.init(source), - .state = try Decode.init(allocator, params.properties, params.unpacked_size), - }; - } + pub fn deinit(d: *Decompress) void { + const gpa = d.gpa; + gpa.free(d.reader.buffer); + d.buffer.deinit(gpa); + d.decode.deinit(gpa); + d.* = undefined; + } - pub fn reader(self: *Self) Reader { - return .{ .context = self }; - } + fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize { + _ = data; + return readIndirect(r); + } - pub fn deinit(self: *Self) void { - self.to_read.deinit(self.allocator); - self.buffer.deinit(self.allocator); - self.state.deinit(self.allocator); - self.* = undefined; - } + fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize { + _ = w; + _ = limit; + return readIndirect(r); + } - pub fn read(self: *Self, output: []u8) Error!usize { - const writer = self.to_read.writer(self.allocator); - while (self.to_read.items.len < output.len) { - switch (try self.state.process(self.allocator, self.in_reader, writer, &self.buffer, &self.decoder)) { - .continue_ => {}, - .finished => { - try self.buffer.finish(writer); - break; - }, - } - } - const input = self.to_read.items; - const n = @min(input.len, output.len); - @memcpy(output[0..n], input[0..n]); - std.mem.copyForwards(u8, input[0 .. input.len - n], input[n..]); - self.to_read.shrinkRetainingCapacity(input.len - n); - return n; - } - }; -} + fn readIndirect(r: *Reader) Reader.Error!usize { + const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); + const gpa = d.gpa; + var allocating = Writer.Allocating.initOwnedSlice(gpa, r.buffer); + allocating.writer.end = r.end; + defer r.end = allocating.writer.end; + if (d.decode.state == math.maxInt(usize)) return error.EndOfStream; + d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) { + error.WriteFailed => { + d.err = error.OutOfMemory; + return error.ReadFailed; + }, + error.EndOfStream => { + d.err = error.EndOfStream; + return error.ReadFailed; + }, + else => |e| { + d.err = e; + return error.ReadFailed; + }, + }; + return 0; + } +}; test { _ = @import("lzma/test.zig"); diff --git a/lib/std/compress/lzma/test.zig b/lib/std/compress/lzma/test.zig index eafb91b6bb..2514ddb8c3 100644 --- a/lib/std/compress/lzma/test.zig +++ b/lib/std/compress/lzma/test.zig @@ -1,19 +1,19 @@ const std = @import("../../std.zig"); -const lzma = @import("../lzma.zig"); +const lzma = std.compress.lzma; fn testDecompress(compressed: []const u8) ![]u8 { - const allocator = std.testing.allocator; - var stream = std.io.fixedBufferStream(compressed); - var decompressor = try lzma.decompress(allocator, stream.reader()); + const gpa = std.testing.allocator; + var stream: std.Io.Reader = .fixed(compressed); + + var decompressor = try lzma.Decompress.initOptions(&stream, gpa, &.{}, .{}, std.math.maxInt(u32)); defer decompressor.deinit(); - const reader = decompressor.reader(); - return reader.readAllAlloc(allocator, std.math.maxInt(usize)); + return decompressor.reader.allocRemaining(gpa, .unlimited); } fn testDecompressEqual(expected: []const u8, compressed: []const u8) !void { - const allocator = std.testing.allocator; + const gpa = std.testing.allocator; const decomp = try testDecompress(compressed); - defer allocator.free(decomp); + defer gpa.free(decomp); try std.testing.expectEqualSlices(u8, expected, decomp); } @@ -89,11 +89,13 @@ test "too small uncompressed size in header" { } test "reading one byte" { + const gpa = std.testing.allocator; const compressed = @embedFile("testdata/good-known_size-with_eopm.lzma"); - var stream = std.io.fixedBufferStream(compressed); - var decompressor = try lzma.decompress(std.testing.allocator, stream.reader()); + var stream: std.Io.Reader = .fixed(compressed); + var decompressor = try lzma.Decompress.initOptions(&stream, gpa, &.{}, .{}, std.math.maxInt(u32)); defer decompressor.deinit(); - var buffer = [1]u8{0}; - _ = try decompressor.read(buffer[0..]); + var buffer: [1]u8 = undefined; + try decompressor.reader.readSliceAll(&buffer); + try std.testing.expectEqual(72, buffer[0]); } diff --git a/lib/std/compress/lzma2.zig b/lib/std/compress/lzma2.zig index e454e6382e..d880bad21d 100644 --- a/lib/std/compress/lzma2.zig +++ b/lib/std/compress/lzma2.zig @@ -2,6 +2,8 @@ const std = @import("../std.zig"); const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; const lzma = std.compress.lzma; +const Writer = std.Io.Writer; +const Reader = std.Io.Reader; /// An accumulating buffer for LZ sequences pub const LzAccumBuffer = struct { @@ -14,30 +16,28 @@ pub const LzAccumBuffer = struct { /// Total number of bytes sent through the buffer len: usize, - const Self = @This(); - - pub fn init(memlimit: usize) Self { - return Self{ + pub fn init(memlimit: usize) LzAccumBuffer { + return .{ .buf = .{}, .memlimit = memlimit, .len = 0, }; } - pub fn appendByte(self: *Self, allocator: Allocator, byte: u8) !void { + pub fn appendByte(self: *LzAccumBuffer, allocator: Allocator, byte: u8) !void { try self.buf.append(allocator, byte); self.len += 1; } /// Reset the internal dictionary - pub fn reset(self: *Self, writer: anytype) !void { + pub fn reset(self: *LzAccumBuffer, writer: *Writer) !void { try writer.writeAll(self.buf.items); self.buf.clearRetainingCapacity(); self.len = 0; } /// Retrieve the last byte or return a default - pub fn lastOr(self: Self, lit: u8) u8 { + pub fn lastOr(self: LzAccumBuffer, lit: u8) u8 { const buf_len = self.buf.items.len; return if (buf_len == 0) lit @@ -46,7 +46,7 @@ pub const LzAccumBuffer = struct { } /// Retrieve the n-th last byte - pub fn lastN(self: Self, dist: usize) !u8 { + pub fn lastN(self: LzAccumBuffer, dist: usize) !u8 { const buf_len = self.buf.items.len; if (dist > buf_len) { return error.CorruptInput; @@ -57,10 +57,10 @@ pub const LzAccumBuffer = struct { /// Append a literal pub fn appendLiteral( - self: *Self, + self: *LzAccumBuffer, allocator: Allocator, lit: u8, - writer: anytype, + writer: *Writer, ) !void { _ = writer; if (self.len >= self.memlimit) { @@ -72,11 +72,11 @@ pub const LzAccumBuffer = struct { /// Fetch an LZ sequence (length, distance) from inside the buffer pub fn appendLz( - self: *Self, + self: *LzAccumBuffer, allocator: Allocator, len: usize, dist: usize, - writer: anytype, + writer: *Writer, ) !void { _ = writer; @@ -95,23 +95,23 @@ pub const LzAccumBuffer = struct { self.len += len; } - pub fn finish(self: *Self, writer: anytype) !void { + pub fn finish(self: *LzAccumBuffer, writer: *Writer) !void { try writer.writeAll(self.buf.items); self.buf.clearRetainingCapacity(); } - pub fn deinit(self: *Self, allocator: Allocator) void { + pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void { self.buf.deinit(allocator); self.* = undefined; } }; pub const Decode = struct { - lzma_state: lzma.Decode, + lzma_decode: lzma.Decode, pub fn init(allocator: Allocator) !Decode { return Decode{ - .lzma_state = try lzma.Decode.init( + .lzma_decode = try lzma.Decode.init( allocator, .{ .lc = 0, @@ -124,15 +124,15 @@ pub const Decode = struct { } pub fn deinit(self: *Decode, allocator: Allocator) void { - self.lzma_state.deinit(allocator); + self.lzma_decode.deinit(allocator); self.* = undefined; } pub fn decompress( self: *Decode, allocator: Allocator, - reader: anytype, - writer: anytype, + reader: *Reader, + writer: *Writer, ) !void { var accum = LzAccumBuffer.init(std.math.maxInt(usize)); defer accum.deinit(allocator); @@ -154,8 +154,8 @@ pub const Decode = struct { fn parseLzma( self: *Decode, allocator: Allocator, - reader: anytype, - writer: anytype, + reader: *Reader, + writer: *Writer, accum: *LzAccumBuffer, status: u8, ) !void { @@ -210,7 +210,7 @@ pub const Decode = struct { } if (reset.state) { - var new_props = self.lzma_state.lzma_props; + var new_props = self.lzma_decode.properties; if (reset.props) { var props = try reader.readByte(); @@ -231,16 +231,16 @@ pub const Decode = struct { new_props = .{ .lc = lc, .lp = lp, .pb = pb }; } - try self.lzma_state.resetState(allocator, new_props); + try self.lzma_decode.resetState(allocator, new_props); } - self.lzma_state.unpacked_size = unpacked_size + accum.len; + self.lzma_decode.unpacked_size = unpacked_size + accum.len; var counter = std.io.countingReader(reader); const counter_reader = counter.reader(); var rangecoder = try lzma.RangeDecoder.init(counter_reader); - while (try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {} + while (try self.lzma_decode.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {} if (counter.bytes_read != packed_size) { return error.CorruptInput; @@ -249,8 +249,8 @@ pub const Decode = struct { fn parseUncompressed( allocator: Allocator, - reader: anytype, - writer: anytype, + reader: *Reader, + writer: *Writer, accum: *LzAccumBuffer, reset_dict: bool, ) !void { @@ -267,24 +267,19 @@ pub const Decode = struct { } }; -pub fn decompress( - allocator: Allocator, - reader: anytype, - writer: anytype, -) !void { - var decoder = try Decode.init(allocator); - defer decoder.deinit(allocator); - return decoder.decompress(allocator, reader, writer); -} - -test { +test "decompress hello world stream" { const expected = "Hello\nWorld!\n"; const compressed = &[_]u8{ 0x01, 0x00, 0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x02, 0x00, 0x06, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x0A, 0x00 }; - const allocator = std.testing.allocator; - var decomp = std.array_list.Managed(u8).init(allocator); - defer decomp.deinit(); - var stream = std.io.fixedBufferStream(compressed); - try decompress(allocator, stream.reader(), decomp.writer()); - try std.testing.expectEqualSlices(u8, expected, decomp.items); + const gpa = std.testing.allocator; + + var stream: std.Io.Reader = .fixed(compressed); + + var decode = try Decode.init(gpa, &stream); + defer decode.deinit(gpa); + + const result = try decode.reader.allocRemaining(gpa, .unlimited); + defer gpa.free(result); + + try std.testing.expectEqualStrings(expected, result); } diff --git a/lib/std/compress/xz.zig b/lib/std/compress/xz.zig index 54a6a38506..eef47cd005 100644 --- a/lib/std/compress/xz.zig +++ b/lib/std/compress/xz.zig @@ -1,368 +1,4 @@ -const std = @import("std"); -const Allocator = std.mem.Allocator; -const ArrayList = std.ArrayList; -const Crc32 = std.hash.Crc32; -const Crc64 = std.hash.crc.Crc64Xz; -const Sha256 = std.crypto.hash.sha2.Sha256; -const lzma2 = std.compress.lzma2; - -pub const Check = enum(u4) { - none = 0x00, - crc32 = 0x01, - crc64 = 0x04, - sha256 = 0x0A, - _, -}; - -fn readStreamFlags(reader: anytype, check: *Check) !void { - const reserved1 = try reader.readByte(); - if (reserved1 != 0) return error.CorruptInput; - const byte = try reader.readByte(); - if ((byte >> 4) != 0) return error.CorruptInput; - check.* = @enumFromInt(@as(u4, @truncate(byte))); -} - -pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) { - return Decompress(@TypeOf(reader)).init(allocator, reader); -} - -pub fn Decompress(comptime ReaderType: type) type { - return struct { - const Self = @This(); - - pub const Error = ReaderType.Error || Decoder(ReaderType).Error; - pub const Reader = std.io.GenericReader(*Self, Error, read); - - allocator: Allocator, - block_decoder: Decoder(ReaderType), - in_reader: ReaderType, - - fn init(allocator: Allocator, source: ReaderType) !Self { - const magic = try source.readBytesNoEof(6); - if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 })) - return error.BadHeader; - - var check: Check = undefined; - const hash_a = blk: { - var hasher = hashedReader(source, Crc32.init()); - try readStreamFlags(hasher.reader(), &check); - break :blk hasher.hasher.final(); - }; - - const hash_b = try source.readInt(u32, .little); - if (hash_a != hash_b) - return error.WrongChecksum; - - return Self{ - .allocator = allocator, - .block_decoder = try decoder(allocator, source, check), - .in_reader = source, - }; - } - - pub fn deinit(self: *Self) void { - self.block_decoder.deinit(); - } - - pub fn reader(self: *Self) Reader { - return .{ .context = self }; - } - - pub fn read(self: *Self, buffer: []u8) Error!usize { - if (buffer.len == 0) - return 0; - - const r = try self.block_decoder.read(buffer); - if (r != 0) - return r; - - const index_size = blk: { - var hasher = hashedReader(self.in_reader, Crc32.init()); - hasher.hasher.update(&[1]u8{0x00}); - - var counter = std.io.countingReader(hasher.reader()); - counter.bytes_read += 1; - - const counting_reader = counter.reader(); - - const record_count = try std.leb.readUleb128(u64, counting_reader); - if (record_count != self.block_decoder.block_count) - return error.CorruptInput; - - var i: usize = 0; - while (i < record_count) : (i += 1) { - // TODO: validate records - _ = try std.leb.readUleb128(u64, counting_reader); - _ = try std.leb.readUleb128(u64, counting_reader); - } - - while (counter.bytes_read % 4 != 0) { - if (try counting_reader.readByte() != 0) - return error.CorruptInput; - } - - const hash_a = hasher.hasher.final(); - const hash_b = try counting_reader.readInt(u32, .little); - if (hash_a != hash_b) - return error.WrongChecksum; - - break :blk counter.bytes_read; - }; - - const hash_a = try self.in_reader.readInt(u32, .little); - - const hash_b = blk: { - var hasher = hashedReader(self.in_reader, Crc32.init()); - const hashed_reader = hasher.reader(); - - const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4; - if (backward_size != index_size) - return error.CorruptInput; - - var check: Check = undefined; - try readStreamFlags(hashed_reader, &check); - - break :blk hasher.hasher.final(); - }; - - if (hash_a != hash_b) - return error.WrongChecksum; - - const magic = try self.in_reader.readBytesNoEof(2); - if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' })) - return error.CorruptInput; - - return 0; - } - }; -} - -pub fn HashedReader(ReaderType: type, HasherType: type) type { - return struct { - child_reader: ReaderType, - hasher: HasherType, - - pub const Error = ReaderType.Error; - pub const Reader = std.io.GenericReader(*@This(), Error, read); - - pub fn read(self: *@This(), buf: []u8) Error!usize { - const amt = try self.child_reader.read(buf); - self.hasher.update(buf[0..amt]); - return amt; - } - - pub fn reader(self: *@This()) Reader { - return .{ .context = self }; - } - }; -} - -pub fn hashedReader( - reader: anytype, - hasher: anytype, -) HashedReader(@TypeOf(reader), @TypeOf(hasher)) { - return .{ .child_reader = reader, .hasher = hasher }; -} - -const DecodeError = error{ - CorruptInput, - EndOfStream, - EndOfStreamWithNoError, - WrongChecksum, - Unsupported, - Overflow, -}; - -pub fn decoder(allocator: Allocator, reader: anytype, check: Check) !Decoder(@TypeOf(reader)) { - return Decoder(@TypeOf(reader)).init(allocator, reader, check); -} - -pub fn Decoder(comptime ReaderType: type) type { - return struct { - const Self = @This(); - pub const Error = - ReaderType.Error || - DecodeError || - Allocator.Error; - pub const Reader = std.io.GenericReader(*Self, Error, read); - - allocator: Allocator, - inner_reader: ReaderType, - check: Check, - err: ?Error, - to_read: ArrayList(u8), - read_pos: usize, - block_count: usize, - - fn init(allocator: Allocator, in_reader: ReaderType, check: Check) !Self { - return Self{ - .allocator = allocator, - .inner_reader = in_reader, - .check = check, - .err = null, - .to_read = .{}, - .read_pos = 0, - .block_count = 0, - }; - } - - pub fn deinit(self: *Self) void { - self.to_read.deinit(self.allocator); - } - - pub fn reader(self: *Self) Reader { - return .{ .context = self }; - } - - pub fn read(self: *Self, output: []u8) Error!usize { - while (true) { - const unread_len = self.to_read.items.len - self.read_pos; - if (unread_len > 0) { - const n = @min(unread_len, output.len); - @memcpy(output[0..n], self.to_read.items[self.read_pos..][0..n]); - self.read_pos += n; - return n; - } - if (self.err) |e| { - if (e == DecodeError.EndOfStreamWithNoError) { - return 0; - } - return e; - } - if (self.read_pos > 0) { - self.to_read.shrinkRetainingCapacity(0); - self.read_pos = 0; - } - self.readBlock() catch |e| { - self.err = e; - }; - } - } - - fn readBlock(self: *Self) Error!void { - var block_counter = std.io.countingReader(self.inner_reader); - const block_reader = block_counter.reader(); - - var packed_size: ?u64 = null; - var unpacked_size: ?u64 = null; - - // Block Header - { - var header_hasher = hashedReader(block_reader, Crc32.init()); - const header_reader = header_hasher.reader(); - - const header_size = @as(u64, try header_reader.readByte()) * 4; - if (header_size == 0) - return error.EndOfStreamWithNoError; - - const Flags = packed struct(u8) { - last_filter_index: u2, - reserved: u4, - has_packed_size: bool, - has_unpacked_size: bool, - }; - - const flags = @as(Flags, @bitCast(try header_reader.readByte())); - const filter_count = @as(u3, flags.last_filter_index) + 1; - if (filter_count > 1) - return error.Unsupported; - - if (flags.has_packed_size) - packed_size = try std.leb.readUleb128(u64, header_reader); - - if (flags.has_unpacked_size) - unpacked_size = try std.leb.readUleb128(u64, header_reader); - - const FilterId = enum(u64) { - lzma2 = 0x21, - _, - }; - - const filter_id = @as( - FilterId, - @enumFromInt(try std.leb.readUleb128(u64, header_reader)), - ); - - if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000) - return error.CorruptInput; - - if (filter_id != .lzma2) - return error.Unsupported; - - const properties_size = try std.leb.readUleb128(u64, header_reader); - if (properties_size != 1) - return error.CorruptInput; - - // TODO: use filter properties - _ = try header_reader.readByte(); - - while (block_counter.bytes_read != header_size) { - if (try header_reader.readByte() != 0) - return error.CorruptInput; - } - - const hash_a = header_hasher.hasher.final(); - const hash_b = try header_reader.readInt(u32, .little); - if (hash_a != hash_b) - return error.WrongChecksum; - } - - // Compressed Data - var packed_counter = std.io.countingReader(block_reader); - try lzma2.decompress( - self.allocator, - packed_counter.reader(), - self.to_read.writer(self.allocator), - ); - - if (packed_size) |s| { - if (s != packed_counter.bytes_read) - return error.CorruptInput; - } - - const unpacked_bytes = self.to_read.items; - if (unpacked_size) |s| { - if (s != unpacked_bytes.len) - return error.CorruptInput; - } - - // Block Padding - while (block_counter.bytes_read % 4 != 0) { - if (try block_reader.readByte() != 0) - return error.CorruptInput; - } - - switch (self.check) { - .none => {}, - .crc32 => { - const hash_a = Crc32.hash(unpacked_bytes); - const hash_b = try self.inner_reader.readInt(u32, .little); - if (hash_a != hash_b) - return error.WrongChecksum; - }, - .crc64 => { - const hash_a = Crc64.hash(unpacked_bytes); - const hash_b = try self.inner_reader.readInt(u64, .little); - if (hash_a != hash_b) - return error.WrongChecksum; - }, - .sha256 => { - var hash_a: [Sha256.digest_length]u8 = undefined; - Sha256.hash(unpacked_bytes, &hash_a, .{}); - - var hash_b: [Sha256.digest_length]u8 = undefined; - try self.inner_reader.readNoEof(&hash_b); - - if (!std.mem.eql(u8, &hash_a, &hash_b)) - return error.WrongChecksum; - }, - else => return error.Unsupported, - } - - self.block_count += 1; - } - }; -} +pub const Decompress = @import("xz/Decompress.zig"); test { _ = @import("xz/test.zig"); diff --git a/lib/std/compress/xz/Decompress.zig b/lib/std/compress/xz/Decompress.zig new file mode 100644 index 0000000000..6b925020d6 --- /dev/null +++ b/lib/std/compress/xz/Decompress.zig @@ -0,0 +1,288 @@ +const Decompress = @This(); +const std = @import("../../std.zig"); +const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const Crc32 = std.hash.Crc32; +const Crc64 = std.hash.crc.Crc64Xz; +const Sha256 = std.crypto.hash.sha2.Sha256; +const lzma2 = std.compress.lzma2; +const Writer = std.Io.Writer; +const Reader = std.Io.Reader; + +/// Underlying compressed data stream to pull bytes from. +input: *Reader, +/// Uncompressed bytes output by this stream implementation. +reader: Reader, +gpa: Allocator, +check: Check, +block_count: usize, +err: ?Error, + +pub const Error = error{ + ReadFailed, + OutOfMemory, + CorruptInput, + EndOfStream, + WrongChecksum, + Unsupported, + Overflow, +}; + +pub const Check = enum(u4) { + none = 0x00, + crc32 = 0x01, + crc64 = 0x04, + sha256 = 0x0A, + _, +}; + +pub const StreamFlags = packed struct(u16) { + null: u8 = 0, + check: Check, + reserved: u4 = 0, +}; + +pub const InitError = error{ + NotXzStream, + WrongChecksum, +}; + +/// XZ uses a series of LZMA2 blocks which each specify a dictionary size +/// anywhere from 4K to 4G. Thus, this API dynamically allocates the dictionary +/// as-needed. +pub fn init( + input: *Reader, + gpa: Allocator, + /// Decompress takes ownership of this buffer and resizes it with `gpa`. + buffer: []u8, +) Decompress { + const magic = try input.takeBytes(6); + if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 })) + return error.NotXzStream; + + const actual_hash = Crc32.hash(try input.peek(@sizeOf(StreamFlags))); + const stream_flags = input.takeStruct(StreamFlags, .little) catch unreachable; + const stored_hash = try input.readInt(u32, .little); + if (actual_hash != stored_hash) return error.WrongChecksum; + + return .{ + .input = input, + .reader = .{ + .vtable = &.{ + .stream = stream, + .readVec = readVec, + }, + .buffer = buffer, + .seek = 0, + .end = 0, + }, + .gpa = gpa, + .check = stream_flags.check, + .block_count = 0, + .err = null, + }; +} + +fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize { + _ = w; + _ = limit; + const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); + _ = d; + @panic("TODO"); +} + +fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize { + _ = data; + const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); + _ = d; + @panic("TODO"); +} + +// if (buffer.len == 0) +// return 0; +// +// const r = try self.block_decode.read(buffer); +// if (r != 0) +// return r; +// +// const index_size = blk: { +// var hasher = hashedReader(self.in_reader, Crc32.init()); +// hasher.hasher.update(&[1]u8{0x00}); +// +// var counter = std.io.countingReader(hasher.reader()); +// counter.bytes_read += 1; +// +// const counting_reader = counter.reader(); +// +// const record_count = try std.leb.readUleb128(u64, counting_reader); +// if (record_count != self.block_decode.block_count) +// return error.CorruptInput; +// +// var i: usize = 0; +// while (i < record_count) : (i += 1) { +// // TODO: validate records +// _ = try std.leb.readUleb128(u64, counting_reader); +// _ = try std.leb.readUleb128(u64, counting_reader); +// } +// +// while (counter.bytes_read % 4 != 0) { +// if (try counting_reader.readByte() != 0) +// return error.CorruptInput; +// } +// +// const hash_a = hasher.hasher.final(); +// const hash_b = try counting_reader.readInt(u32, .little); +// if (hash_a != hash_b) +// return error.WrongChecksum; +// +// break :blk counter.bytes_read; +// }; +// +// const hash_a = try self.in_reader.readInt(u32, .little); +// +// const hash_b = blk: { +// var hasher = hashedReader(self.in_reader, Crc32.init()); +// const hashed_reader = hasher.reader(); +// +// const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4; +// if (backward_size != index_size) +// return error.CorruptInput; +// +// var check: Check = undefined; +// try readStreamFlags(hashed_reader, &check); +// +// break :blk hasher.hasher.final(); +// }; +// +// if (hash_a != hash_b) +// return error.WrongChecksum; +// +// const magic = try self.in_reader.readBytesNoEof(2); +// if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' })) +// return error.CorruptInput; +// +// return 0; +//} + +//fn readBlock(self: *BlockDecode) Error!void { +// var block_counter = std.io.countingReader(self.inner_reader); +// const block_reader = block_counter.reader(); +// +// var packed_size: ?u64 = null; +// var unpacked_size: ?u64 = null; +// +// // Block Header +// { +// var header_hasher = hashedReader(block_reader, Crc32.init()); +// const header_reader = header_hasher.reader(); +// +// const header_size = @as(u64, try header_reader.readByte()) * 4; +// if (header_size == 0) +// return error.EndOfStreamWithNoError; +// +// const Flags = packed struct(u8) { +// last_filter_index: u2, +// reserved: u4, +// has_packed_size: bool, +// has_unpacked_size: bool, +// }; +// +// const flags = @as(Flags, @bitCast(try header_reader.readByte())); +// const filter_count = @as(u3, flags.last_filter_index) + 1; +// if (filter_count > 1) +// return error.Unsupported; +// +// if (flags.has_packed_size) +// packed_size = try std.leb.readUleb128(u64, header_reader); +// +// if (flags.has_unpacked_size) +// unpacked_size = try std.leb.readUleb128(u64, header_reader); +// +// const FilterId = enum(u64) { +// lzma2 = 0x21, +// _, +// }; +// +// const filter_id = @as( +// FilterId, +// @enumFromInt(try std.leb.readUleb128(u64, header_reader)), +// ); +// +// if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000) +// return error.CorruptInput; +// +// if (filter_id != .lzma2) +// return error.Unsupported; +// +// const properties_size = try std.leb.readUleb128(u64, header_reader); +// if (properties_size != 1) +// return error.CorruptInput; +// +// // TODO: use filter properties +// _ = try header_reader.readByte(); +// +// while (block_counter.bytes_read != header_size) { +// if (try header_reader.readByte() != 0) +// return error.CorruptInput; +// } +// +// const hash_a = header_hasher.hasher.final(); +// const hash_b = try header_reader.readInt(u32, .little); +// if (hash_a != hash_b) +// return error.WrongChecksum; +// } +// +// // Compressed Data +// var packed_counter = std.io.countingReader(block_reader); +// try lzma2.decompress( +// self.allocator, +// packed_counter.reader(), +// self.to_read.writer(self.allocator), +// ); +// +// if (packed_size) |s| { +// if (s != packed_counter.bytes_read) +// return error.CorruptInput; +// } +// +// const unpacked_bytes = self.to_read.items; +// if (unpacked_size) |s| { +// if (s != unpacked_bytes.len) +// return error.CorruptInput; +// } +// +// // Block Padding +// while (block_counter.bytes_read % 4 != 0) { +// if (try block_reader.readByte() != 0) +// return error.CorruptInput; +// } +// +// switch (self.check) { +// .none => {}, +// .crc32 => { +// const hash_a = Crc32.hash(unpacked_bytes); +// const hash_b = try self.inner_reader.readInt(u32, .little); +// if (hash_a != hash_b) +// return error.WrongChecksum; +// }, +// .crc64 => { +// const hash_a = Crc64.hash(unpacked_bytes); +// const hash_b = try self.inner_reader.readInt(u64, .little); +// if (hash_a != hash_b) +// return error.WrongChecksum; +// }, +// .sha256 => { +// var hash_a: [Sha256.digest_length]u8 = undefined; +// Sha256.hash(unpacked_bytes, &hash_a, .{}); +// +// var hash_b: [Sha256.digest_length]u8 = undefined; +// try self.inner_reader.readNoEof(&hash_b); +// +// if (!std.mem.eql(u8, &hash_a, &hash_b)) +// return error.WrongChecksum; +// }, +// else => return error.Unsupported, +// } +// +// self.block_count += 1; +//} |
