diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2025-08-25 21:33:57 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2025-08-26 21:00:58 -0700 |
| commit | d87eb7d4e4f2ea606a18640fcc019b60cc435cdd (patch) | |
| tree | 437c0e00c1b9a98e92b3045613de39ad36710429 | |
| parent | a8ae6c2f4265a66c7a63d788a13549c48a1dd8c0 (diff) | |
| download | zig-d87eb7d4e4f2ea606a18640fcc019b60cc435cdd.tar.gz zig-d87eb7d4e4f2ea606a18640fcc019b60cc435cdd.zip | |
std.compress.xz: skeleton in place
missing these things:
- implementation of finish()
- detect packed bytes read for check and block padding
- implementation of discard()
- implementation of block stream checksum
| -rw-r--r-- | lib/std/compress/xz/Decompress.zig | 419 | ||||
| -rw-r--r-- | lib/std/compress/xz/test.zig | 16 |
2 files changed, 234 insertions, 201 deletions
diff --git a/lib/std/compress/xz/Decompress.zig b/lib/std/compress/xz/Decompress.zig index 6b925020d6..99cfa266b1 100644 --- a/lib/std/compress/xz/Decompress.zig +++ b/lib/std/compress/xz/Decompress.zig @@ -26,6 +26,8 @@ pub const Error = error{ WrongChecksum, Unsupported, Overflow, + InvalidRangeCode, + DecompressedSizeMismatch, }; pub const Check = enum(u4) { @@ -55,14 +57,14 @@ pub fn init( gpa: Allocator, /// Decompress takes ownership of this buffer and resizes it with `gpa`. buffer: []u8, -) Decompress { - const magic = try input.takeBytes(6); - if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 })) +) !Decompress { + const magic = try input.takeArray(6); + if (!std.mem.eql(u8, magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 })) return error.NotXzStream; const actual_hash = Crc32.hash(try input.peek(@sizeOf(StreamFlags))); const stream_flags = input.takeStruct(StreamFlags, .little) catch unreachable; - const stored_hash = try input.readInt(u32, .little); + const stored_hash = try input.takeInt(u32, .little); if (actual_hash != stored_hash) return error.WrongChecksum; return .{ @@ -71,6 +73,7 @@ pub fn init( .vtable = &.{ .stream = stream, .readVec = readVec, + .discard = discard, }, .buffer = buffer, .seek = 0, @@ -83,206 +86,232 @@ pub fn init( }; } +/// Reclaim ownership of the buffer passed to `init`. +pub fn takeBuffer(d: *Decompress) []u8 { + const buffer = d.reader.buffer; + d.reader.buffer = &.{}; + return buffer; +} + +pub fn deinit(d: *Decompress) void { + const gpa = d.gpa; + gpa.free(d.reader.buffer); + d.* = undefined; +} + +fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize { + _ = data; + return readIndirect(r); +} + fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize { _ = w; _ = limit; + return readIndirect(r); +} + +fn discard(r: *Reader, limit: std.Io.Limit) Reader.Error!usize { const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); _ = d; + _ = limit; @panic("TODO"); } -fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize { - _ = data; +fn readIndirect(r: *Reader) Reader.Error!usize { const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); + const gpa = d.gpa; + const input = d.input; + + var allocating = Writer.Allocating.initOwnedSlice(gpa, r.buffer); + allocating.writer.end = r.end; + defer { + r.buffer = allocating.writer.buffer; + r.end = allocating.writer.end; + } + + if (d.block_count == std.math.maxInt(usize)) return error.EndOfStream; + + readBlock(input, &allocating) catch |err| switch (err) { + error.WriteFailed => { + d.err = error.OutOfMemory; + return error.ReadFailed; + }, + error.SuccessfulEndOfStream => { + finish(d); + d.block_count = std.math.maxInt(usize); + return error.EndOfStream; + }, + else => |e| { + d.err = e; + return error.ReadFailed; + }, + }; + switch (d.check) { + .none => {}, + .crc32 => { + const declared_checksum = try input.takeInt(u32, .little); + // TODO + //const hash_a = Crc32.hash(unpacked_bytes); + //if (hash_a != hash_b) return error.WrongChecksum; + _ = declared_checksum; + }, + .crc64 => { + const declared_checksum = try input.takeInt(u64, .little); + // TODO + //const hash_a = Crc64.hash(unpacked_bytes); + //if (hash_a != hash_b) return error.WrongChecksum; + _ = declared_checksum; + }, + .sha256 => { + const declared_hash = try input.take(Sha256.digest_length); + // TODO + //var hash_a: [Sha256.digest_length]u8 = undefined; + //Sha256.hash(unpacked_bytes, &hash_a, .{}); + //if (!std.mem.eql(u8, &hash_a, &hash_b)) + // return error.WrongChecksum; + _ = declared_hash; + }, + else => { + d.err = error.Unsupported; + return error.ReadFailed; + }, + } + d.block_count += 1; + return 0; +} + +fn readBlock(input: *Reader, allocating: *Writer.Allocating) !void { + var packed_size: ?u64 = null; + var unpacked_size: ?u64 = null; + + { + // Read the block header via peeking so that we can hash the whole thing too. + const first_byte: usize = try input.peekByte(); + if (first_byte == 0) return error.SuccessfulEndOfStream; + + const declared_header_size = first_byte * 4; + try input.fill(declared_header_size); + const header_seek_start = input.seek; + input.toss(1); + + const Flags = packed struct(u8) { + last_filter_index: u2, + reserved: u4, + has_packed_size: bool, + has_unpacked_size: bool, + }; + const flags = try input.takeStruct(Flags, .little); + + const filter_count = @as(u3, flags.last_filter_index) + 1; + if (filter_count > 1) return error.Unsupported; + + if (flags.has_packed_size) packed_size = try input.takeLeb128(u64); + if (flags.has_unpacked_size) unpacked_size = try input.takeLeb128(u64); + + const FilterId = enum(u64) { + lzma2 = 0x21, + _, + }; + + const filter_id: FilterId = @enumFromInt(try input.takeLeb128(u64)); + if (filter_id != .lzma2) return error.Unsupported; + + const properties_size = try input.takeLeb128(u64); + if (properties_size != 1) return error.CorruptInput; + // TODO: use filter properties + _ = try input.takeByte(); + + const actual_header_size = input.seek - header_seek_start; + if (actual_header_size > declared_header_size) return error.CorruptInput; + var remaining_bytes = declared_header_size - actual_header_size; + while (remaining_bytes != 0) { + if (try input.takeByte() != 0) return error.CorruptInput; + remaining_bytes -= 1; + } + + const header_slice = input.buffer[header_seek_start..][0..declared_header_size]; + const actual_hash = Crc32.hash(header_slice); + const declared_hash = try input.takeInt(u32, .little); + if (actual_hash != declared_hash) return error.WrongChecksum; + } + + // Compressed Data + + var lzma2_decode = try lzma2.Decode.init(allocating.allocator); + const before_size = allocating.writer.end; + try lzma2_decode.decompress(input, allocating); + const unpacked_bytes = allocating.writer.end - before_size; + + // TODO restore this check + //if (packed_size) |s| { + // if (s != packed_counter.bytes_read) + // return error.CorruptInput; + //} + + if (unpacked_size) |s| { + if (s != unpacked_bytes) return error.CorruptInput; + } + + // Block Padding + if (true) @panic("TODO account for block padding"); + //while (block_counter.bytes_read % 4 != 0) { + // if (try block_reader.takeByte() != 0) + // return error.CorruptInput; + //} + +} + +fn finish(d: *Decompress) void { _ = d; @panic("TODO"); -} + //const input = d.input; + //const index_size = blk: { + // const record_count = try input.takeLeb128(u64); + // if (record_count != d.block_decode.block_count) + // return error.CorruptInput; + + // var i: usize = 0; + // while (i < record_count) : (i += 1) { + // // TODO: validate records + // _ = try std.leb.readUleb128(u64, counting_reader); + // _ = try std.leb.readUleb128(u64, counting_reader); + // } + + // while (counter.bytes_read % 4 != 0) { + // if (try counting_reader.takeByte() != 0) + // return error.CorruptInput; + // } + + // const hash_a = hasher.hasher.final(); + // const hash_b = try counting_reader.takeInt(u32, .little); + // if (hash_a != hash_b) + // return error.WrongChecksum; + + // break :blk counter.bytes_read; + //}; + + //const hash_a = try d.in_reader.takeInt(u32, .little); -// if (buffer.len == 0) -// return 0; -// -// const r = try self.block_decode.read(buffer); -// if (r != 0) -// return r; -// -// const index_size = blk: { -// var hasher = hashedReader(self.in_reader, Crc32.init()); -// hasher.hasher.update(&[1]u8{0x00}); -// -// var counter = std.io.countingReader(hasher.reader()); -// counter.bytes_read += 1; -// -// const counting_reader = counter.reader(); -// -// const record_count = try std.leb.readUleb128(u64, counting_reader); -// if (record_count != self.block_decode.block_count) -// return error.CorruptInput; -// -// var i: usize = 0; -// while (i < record_count) : (i += 1) { -// // TODO: validate records -// _ = try std.leb.readUleb128(u64, counting_reader); -// _ = try std.leb.readUleb128(u64, counting_reader); -// } -// -// while (counter.bytes_read % 4 != 0) { -// if (try counting_reader.readByte() != 0) -// return error.CorruptInput; -// } -// -// const hash_a = hasher.hasher.final(); -// const hash_b = try counting_reader.readInt(u32, .little); -// if (hash_a != hash_b) -// return error.WrongChecksum; -// -// break :blk counter.bytes_read; -// }; -// -// const hash_a = try self.in_reader.readInt(u32, .little); -// -// const hash_b = blk: { -// var hasher = hashedReader(self.in_reader, Crc32.init()); -// const hashed_reader = hasher.reader(); -// -// const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4; -// if (backward_size != index_size) -// return error.CorruptInput; -// -// var check: Check = undefined; -// try readStreamFlags(hashed_reader, &check); -// -// break :blk hasher.hasher.final(); -// }; -// -// if (hash_a != hash_b) -// return error.WrongChecksum; -// -// const magic = try self.in_reader.readBytesNoEof(2); -// if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' })) -// return error.CorruptInput; -// -// return 0; -//} - -//fn readBlock(self: *BlockDecode) Error!void { -// var block_counter = std.io.countingReader(self.inner_reader); -// const block_reader = block_counter.reader(); -// -// var packed_size: ?u64 = null; -// var unpacked_size: ?u64 = null; -// -// // Block Header -// { -// var header_hasher = hashedReader(block_reader, Crc32.init()); -// const header_reader = header_hasher.reader(); -// -// const header_size = @as(u64, try header_reader.readByte()) * 4; -// if (header_size == 0) -// return error.EndOfStreamWithNoError; -// -// const Flags = packed struct(u8) { -// last_filter_index: u2, -// reserved: u4, -// has_packed_size: bool, -// has_unpacked_size: bool, -// }; -// -// const flags = @as(Flags, @bitCast(try header_reader.readByte())); -// const filter_count = @as(u3, flags.last_filter_index) + 1; -// if (filter_count > 1) -// return error.Unsupported; -// -// if (flags.has_packed_size) -// packed_size = try std.leb.readUleb128(u64, header_reader); -// -// if (flags.has_unpacked_size) -// unpacked_size = try std.leb.readUleb128(u64, header_reader); -// -// const FilterId = enum(u64) { -// lzma2 = 0x21, -// _, -// }; -// -// const filter_id = @as( -// FilterId, -// @enumFromInt(try std.leb.readUleb128(u64, header_reader)), -// ); -// -// if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000) -// return error.CorruptInput; -// -// if (filter_id != .lzma2) -// return error.Unsupported; -// -// const properties_size = try std.leb.readUleb128(u64, header_reader); -// if (properties_size != 1) -// return error.CorruptInput; -// -// // TODO: use filter properties -// _ = try header_reader.readByte(); -// -// while (block_counter.bytes_read != header_size) { -// if (try header_reader.readByte() != 0) -// return error.CorruptInput; -// } -// -// const hash_a = header_hasher.hasher.final(); -// const hash_b = try header_reader.readInt(u32, .little); -// if (hash_a != hash_b) -// return error.WrongChecksum; -// } -// -// // Compressed Data -// var packed_counter = std.io.countingReader(block_reader); -// try lzma2.decompress( -// self.allocator, -// packed_counter.reader(), -// self.to_read.writer(self.allocator), -// ); -// -// if (packed_size) |s| { -// if (s != packed_counter.bytes_read) -// return error.CorruptInput; -// } -// -// const unpacked_bytes = self.to_read.items; -// if (unpacked_size) |s| { -// if (s != unpacked_bytes.len) -// return error.CorruptInput; -// } -// -// // Block Padding -// while (block_counter.bytes_read % 4 != 0) { -// if (try block_reader.readByte() != 0) -// return error.CorruptInput; -// } -// -// switch (self.check) { -// .none => {}, -// .crc32 => { -// const hash_a = Crc32.hash(unpacked_bytes); -// const hash_b = try self.inner_reader.readInt(u32, .little); -// if (hash_a != hash_b) -// return error.WrongChecksum; -// }, -// .crc64 => { -// const hash_a = Crc64.hash(unpacked_bytes); -// const hash_b = try self.inner_reader.readInt(u64, .little); -// if (hash_a != hash_b) -// return error.WrongChecksum; -// }, -// .sha256 => { -// var hash_a: [Sha256.digest_length]u8 = undefined; -// Sha256.hash(unpacked_bytes, &hash_a, .{}); -// -// var hash_b: [Sha256.digest_length]u8 = undefined; -// try self.inner_reader.readNoEof(&hash_b); -// -// if (!std.mem.eql(u8, &hash_a, &hash_b)) -// return error.WrongChecksum; -// }, -// else => return error.Unsupported, -// } -// -// self.block_count += 1; -//} + //const hash_b = blk: { + // var hasher = hashedReader(d.in_reader, Crc32.init()); + // const hashed_reader = hasher.reader(); + + // const backward_size = (@as(u64, try hashed_reader.takeInt(u32, .little)) + 1) * 4; + // if (backward_size != index_size) + // return error.CorruptInput; + + // var check: Check = undefined; + // try readStreamFlags(hashed_reader, &check); + + // break :blk hasher.hasher.final(); + //}; + + //if (hash_a != hash_b) + // return error.WrongChecksum; + + //const magic = try d.in_reader.takeBytesNoEof(2); + //if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' })) + // return error.CorruptInput; + + //return 0; +} diff --git a/lib/std/compress/xz/test.zig b/lib/std/compress/xz/test.zig index 08180e45c0..a25cc08df0 100644 --- a/lib/std/compress/xz/test.zig +++ b/lib/std/compress/xz/test.zig @@ -3,19 +3,23 @@ const testing = std.testing; const xz = std.compress.xz; fn decompress(data: []const u8) ![]u8 { - var in_stream = std.io.fixedBufferStream(data); + const gpa = testing.allocator; - var xz_stream = try xz.decompress(testing.allocator, in_stream.reader()); + var in_stream: std.Io.Reader = .fixed(data); + + var xz_stream = try xz.Decompress.init(&in_stream, gpa, &.{}); defer xz_stream.deinit(); - return xz_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize)); + return xz_stream.reader.allocRemaining(gpa, .unlimited); } fn testReader(data: []const u8, comptime expected: []const u8) !void { - const buf = try decompress(data); - defer testing.allocator.free(buf); + const gpa = testing.allocator; + + const result = try decompress(data); + defer gpa.free(result); - try testing.expectEqualSlices(u8, expected, buf); + try testing.expectEqualSlices(u8, expected, result); } test "compressed data" { |
