diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2025-08-22 14:14:26 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2025-08-26 21:00:58 -0700 |
| commit | 6464e0d4fc9937e154c34567891bae84c63732b9 (patch) | |
| tree | a42d9f4208bbea1dfab3f6e0f1d1b586fc79abb9 | |
| parent | ea0ce7afb59d7c7ed33f707f3fea3e0babd785b6 (diff) | |
| download | zig-6464e0d4fc9937e154c34567891bae84c63732b9.tar.gz zig-6464e0d4fc9937e154c34567891bae84c63732b9.zip | |
std.compress.xz: flatten namespaces
| -rw-r--r-- | lib/std/compress/xz.zig | 211 | ||||
| -rw-r--r-- | lib/std/compress/xz/block.zig | 208 |
2 files changed, 207 insertions, 212 deletions
diff --git a/lib/std/compress/xz.zig b/lib/std/compress/xz.zig index 6c99e9f427..54a6a38506 100644 --- a/lib/std/compress/xz.zig +++ b/lib/std/compress/xz.zig @@ -1,7 +1,10 @@ const std = @import("std"); -const block = @import("xz/block.zig"); const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; const Crc32 = std.hash.Crc32; +const Crc64 = std.hash.crc.Crc64Xz; +const Sha256 = std.crypto.hash.sha2.Sha256; +const lzma2 = std.compress.lzma2; pub const Check = enum(u4) { none = 0x00, @@ -27,11 +30,11 @@ pub fn Decompress(comptime ReaderType: type) type { return struct { const Self = @This(); - pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error; + pub const Error = ReaderType.Error || Decoder(ReaderType).Error; pub const Reader = std.io.GenericReader(*Self, Error, read); allocator: Allocator, - block_decoder: block.Decoder(ReaderType), + block_decoder: Decoder(ReaderType), in_reader: ReaderType, fn init(allocator: Allocator, source: ReaderType) !Self { @@ -52,7 +55,7 @@ pub fn Decompress(comptime ReaderType: type) type { return Self{ .allocator = allocator, - .block_decoder = try block.decoder(allocator, source, check), + .block_decoder = try decoder(allocator, source, check), .in_reader = source, }; } @@ -161,6 +164,206 @@ pub fn hashedReader( return .{ .child_reader = reader, .hasher = hasher }; } +const DecodeError = error{ + CorruptInput, + EndOfStream, + EndOfStreamWithNoError, + WrongChecksum, + Unsupported, + Overflow, +}; + +pub fn decoder(allocator: Allocator, reader: anytype, check: Check) !Decoder(@TypeOf(reader)) { + return Decoder(@TypeOf(reader)).init(allocator, reader, check); +} + +pub fn Decoder(comptime ReaderType: type) type { + return struct { + const Self = @This(); + pub const Error = + ReaderType.Error || + DecodeError || + Allocator.Error; + pub const Reader = std.io.GenericReader(*Self, Error, read); + + allocator: Allocator, + inner_reader: ReaderType, + check: Check, + err: ?Error, + to_read: ArrayList(u8), + read_pos: usize, + block_count: usize, + + fn init(allocator: Allocator, in_reader: ReaderType, check: Check) !Self { + return Self{ + .allocator = allocator, + .inner_reader = in_reader, + .check = check, + .err = null, + .to_read = .{}, + .read_pos = 0, + .block_count = 0, + }; + } + + pub fn deinit(self: *Self) void { + self.to_read.deinit(self.allocator); + } + + pub fn reader(self: *Self) Reader { + return .{ .context = self }; + } + + pub fn read(self: *Self, output: []u8) Error!usize { + while (true) { + const unread_len = self.to_read.items.len - self.read_pos; + if (unread_len > 0) { + const n = @min(unread_len, output.len); + @memcpy(output[0..n], self.to_read.items[self.read_pos..][0..n]); + self.read_pos += n; + return n; + } + if (self.err) |e| { + if (e == DecodeError.EndOfStreamWithNoError) { + return 0; + } + return e; + } + if (self.read_pos > 0) { + self.to_read.shrinkRetainingCapacity(0); + self.read_pos = 0; + } + self.readBlock() catch |e| { + self.err = e; + }; + } + } + + fn readBlock(self: *Self) Error!void { + var block_counter = std.io.countingReader(self.inner_reader); + const block_reader = block_counter.reader(); + + var packed_size: ?u64 = null; + var unpacked_size: ?u64 = null; + + // Block Header + { + var header_hasher = hashedReader(block_reader, Crc32.init()); + const header_reader = header_hasher.reader(); + + const header_size = @as(u64, try header_reader.readByte()) * 4; + if (header_size == 0) + return error.EndOfStreamWithNoError; + + const Flags = packed struct(u8) { + last_filter_index: u2, + reserved: u4, + has_packed_size: bool, + has_unpacked_size: bool, + }; + + const flags = @as(Flags, @bitCast(try header_reader.readByte())); + const filter_count = @as(u3, flags.last_filter_index) + 1; + if (filter_count > 1) + return error.Unsupported; + + if (flags.has_packed_size) + packed_size = try std.leb.readUleb128(u64, header_reader); + + if (flags.has_unpacked_size) + unpacked_size = try std.leb.readUleb128(u64, header_reader); + + const FilterId = enum(u64) { + lzma2 = 0x21, + _, + }; + + const filter_id = @as( + FilterId, + @enumFromInt(try std.leb.readUleb128(u64, header_reader)), + ); + + if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000) + return error.CorruptInput; + + if (filter_id != .lzma2) + return error.Unsupported; + + const properties_size = try std.leb.readUleb128(u64, header_reader); + if (properties_size != 1) + return error.CorruptInput; + + // TODO: use filter properties + _ = try header_reader.readByte(); + + while (block_counter.bytes_read != header_size) { + if (try header_reader.readByte() != 0) + return error.CorruptInput; + } + + const hash_a = header_hasher.hasher.final(); + const hash_b = try header_reader.readInt(u32, .little); + if (hash_a != hash_b) + return error.WrongChecksum; + } + + // Compressed Data + var packed_counter = std.io.countingReader(block_reader); + try lzma2.decompress( + self.allocator, + packed_counter.reader(), + self.to_read.writer(self.allocator), + ); + + if (packed_size) |s| { + if (s != packed_counter.bytes_read) + return error.CorruptInput; + } + + const unpacked_bytes = self.to_read.items; + if (unpacked_size) |s| { + if (s != unpacked_bytes.len) + return error.CorruptInput; + } + + // Block Padding + while (block_counter.bytes_read % 4 != 0) { + if (try block_reader.readByte() != 0) + return error.CorruptInput; + } + + switch (self.check) { + .none => {}, + .crc32 => { + const hash_a = Crc32.hash(unpacked_bytes); + const hash_b = try self.inner_reader.readInt(u32, .little); + if (hash_a != hash_b) + return error.WrongChecksum; + }, + .crc64 => { + const hash_a = Crc64.hash(unpacked_bytes); + const hash_b = try self.inner_reader.readInt(u64, .little); + if (hash_a != hash_b) + return error.WrongChecksum; + }, + .sha256 => { + var hash_a: [Sha256.digest_length]u8 = undefined; + Sha256.hash(unpacked_bytes, &hash_a, .{}); + + var hash_b: [Sha256.digest_length]u8 = undefined; + try self.inner_reader.readNoEof(&hash_b); + + if (!std.mem.eql(u8, &hash_a, &hash_b)) + return error.WrongChecksum; + }, + else => return error.Unsupported, + } + + self.block_count += 1; + } + }; +} + test { _ = @import("xz/test.zig"); } diff --git a/lib/std/compress/xz/block.zig b/lib/std/compress/xz/block.zig deleted file mode 100644 index 505dc543a8..0000000000 --- a/lib/std/compress/xz/block.zig +++ /dev/null @@ -1,208 +0,0 @@ -const std = @import("../../std.zig"); -const lzma2 = std.compress.lzma2; -const Allocator = std.mem.Allocator; -const ArrayListUnmanaged = std.ArrayListUnmanaged; -const Crc32 = std.hash.Crc32; -const Crc64 = std.hash.crc.Crc64Xz; -const Sha256 = std.crypto.hash.sha2.Sha256; -const xz = std.compress.xz; - -const DecodeError = error{ - CorruptInput, - EndOfStream, - EndOfStreamWithNoError, - WrongChecksum, - Unsupported, - Overflow, -}; - -pub fn decoder(allocator: Allocator, reader: anytype, check: xz.Check) !Decoder(@TypeOf(reader)) { - return Decoder(@TypeOf(reader)).init(allocator, reader, check); -} - -pub fn Decoder(comptime ReaderType: type) type { - return struct { - const Self = @This(); - pub const Error = - ReaderType.Error || - DecodeError || - Allocator.Error; - pub const Reader = std.io.GenericReader(*Self, Error, read); - - allocator: Allocator, - inner_reader: ReaderType, - check: xz.Check, - err: ?Error, - to_read: ArrayListUnmanaged(u8), - read_pos: usize, - block_count: usize, - - fn init(allocator: Allocator, in_reader: ReaderType, check: xz.Check) !Self { - return Self{ - .allocator = allocator, - .inner_reader = in_reader, - .check = check, - .err = null, - .to_read = .{}, - .read_pos = 0, - .block_count = 0, - }; - } - - pub fn deinit(self: *Self) void { - self.to_read.deinit(self.allocator); - } - - pub fn reader(self: *Self) Reader { - return .{ .context = self }; - } - - pub fn read(self: *Self, output: []u8) Error!usize { - while (true) { - const unread_len = self.to_read.items.len - self.read_pos; - if (unread_len > 0) { - const n = @min(unread_len, output.len); - @memcpy(output[0..n], self.to_read.items[self.read_pos..][0..n]); - self.read_pos += n; - return n; - } - if (self.err) |e| { - if (e == DecodeError.EndOfStreamWithNoError) { - return 0; - } - return e; - } - if (self.read_pos > 0) { - self.to_read.shrinkRetainingCapacity(0); - self.read_pos = 0; - } - self.readBlock() catch |e| { - self.err = e; - }; - } - } - - fn readBlock(self: *Self) Error!void { - var block_counter = std.io.countingReader(self.inner_reader); - const block_reader = block_counter.reader(); - - var packed_size: ?u64 = null; - var unpacked_size: ?u64 = null; - - // Block Header - { - var header_hasher = xz.hashedReader(block_reader, Crc32.init()); - const header_reader = header_hasher.reader(); - - const header_size = @as(u64, try header_reader.readByte()) * 4; - if (header_size == 0) - return error.EndOfStreamWithNoError; - - const Flags = packed struct(u8) { - last_filter_index: u2, - reserved: u4, - has_packed_size: bool, - has_unpacked_size: bool, - }; - - const flags = @as(Flags, @bitCast(try header_reader.readByte())); - const filter_count = @as(u3, flags.last_filter_index) + 1; - if (filter_count > 1) - return error.Unsupported; - - if (flags.has_packed_size) - packed_size = try std.leb.readUleb128(u64, header_reader); - - if (flags.has_unpacked_size) - unpacked_size = try std.leb.readUleb128(u64, header_reader); - - const FilterId = enum(u64) { - lzma2 = 0x21, - _, - }; - - const filter_id = @as( - FilterId, - @enumFromInt(try std.leb.readUleb128(u64, header_reader)), - ); - - if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000) - return error.CorruptInput; - - if (filter_id != .lzma2) - return error.Unsupported; - - const properties_size = try std.leb.readUleb128(u64, header_reader); - if (properties_size != 1) - return error.CorruptInput; - - // TODO: use filter properties - _ = try header_reader.readByte(); - - while (block_counter.bytes_read != header_size) { - if (try header_reader.readByte() != 0) - return error.CorruptInput; - } - - const hash_a = header_hasher.hasher.final(); - const hash_b = try header_reader.readInt(u32, .little); - if (hash_a != hash_b) - return error.WrongChecksum; - } - - // Compressed Data - var packed_counter = std.io.countingReader(block_reader); - try lzma2.decompress( - self.allocator, - packed_counter.reader(), - self.to_read.writer(self.allocator), - ); - - if (packed_size) |s| { - if (s != packed_counter.bytes_read) - return error.CorruptInput; - } - - const unpacked_bytes = self.to_read.items; - if (unpacked_size) |s| { - if (s != unpacked_bytes.len) - return error.CorruptInput; - } - - // Block Padding - while (block_counter.bytes_read % 4 != 0) { - if (try block_reader.readByte() != 0) - return error.CorruptInput; - } - - switch (self.check) { - .none => {}, - .crc32 => { - const hash_a = Crc32.hash(unpacked_bytes); - const hash_b = try self.inner_reader.readInt(u32, .little); - if (hash_a != hash_b) - return error.WrongChecksum; - }, - .crc64 => { - const hash_a = Crc64.hash(unpacked_bytes); - const hash_b = try self.inner_reader.readInt(u64, .little); - if (hash_a != hash_b) - return error.WrongChecksum; - }, - .sha256 => { - var hash_a: [Sha256.digest_length]u8 = undefined; - Sha256.hash(unpacked_bytes, &hash_a, .{}); - - var hash_b: [Sha256.digest_length]u8 = undefined; - try self.inner_reader.readNoEof(&hash_b); - - if (!std.mem.eql(u8, &hash_a, &hash_b)) - return error.WrongChecksum; - }, - else => return error.Unsupported, - } - - self.block_count += 1; - } - }; -} |
