std.compress.lzma: update for new I/O API

author: Andrew Kelley <andrew@ziglang.org> 2025-08-25 18:03:48 -0700
committer: Andrew Kelley <andrew@ziglang.org> 2025-08-26 21:00:58 -0700
commit: 58e60697e2930f4311ae9e744ae1c2877e0b69ed (patch)
tree: fd84142e826d2252f23eaae9b002ae0d3f43e341 /lib/std
parent: 6464e0d4fc9937e154c34567891bae84c63732b9 (diff)
download: zig-58e60697e2930f4311ae9e744ae1c2877e0b69ed.tar.gz
zig-58e60697e2930f4311ae9e744ae1c2877e0b69ed.zip
5 files changed, 556 insertions, 653 deletions
diff --git a/lib/std/compress/lzma.zig b/lib/std/compress/lzma.zig
index c40eeeb56e..597e97a94f 100644
--- a/lib/std/compress/lzma.zig
+++ b/lib/std/compress/lzma.zig
@@ -4,49 +4,34 @@ const mem = std.mem;
 const Allocator = std.mem.Allocator;
 const assert = std.debug.assert;
 const ArrayList = std.ArrayList;
+const Writer = std.Io.Writer;
+const Reader = std.Io.Reader;
 
 pub const RangeDecoder = struct {
     range: u32,
     code: u32,
 
-    pub fn init(reader: anytype) !RangeDecoder {
-        const reserved = try reader.readByte();
-        if (reserved != 0) {
-            return error.CorruptInput;
-        }
-        return RangeDecoder{
-            .range = 0xFFFF_FFFF,
-            .code = try reader.readInt(u32, .big),
-        };
-    }
-
-    pub fn fromParts(
-        range: u32,
-        code: u32,
-    ) RangeDecoder {
+    pub fn init(reader: *Reader) !RangeDecoder {
+        const reserved = try reader.takeByte();
+        if (reserved != 0) return error.InvalidRangeCode;
         return .{
-            .range = range,
-            .code = code,
+            .range = 0xFFFF_FFFF,
+            .code = try reader.takeInt(u32, .big),
         };
     }
 
-    pub fn set(self: *RangeDecoder, range: u32, code: u32) void {
-        self.range = range;
-        self.code = code;
-    }
-
-    pub inline fn isFinished(self: RangeDecoder) bool {
+    pub fn isFinished(self: RangeDecoder) bool {
         return self.code == 0;
     }
 
-    inline fn normalize(self: *RangeDecoder, reader: anytype) !void {
+    fn normalize(self: *RangeDecoder, reader: *Reader) !void {
         if (self.range < 0x0100_0000) {
             self.range <<= 8;
-            self.code = (self.code << 8) ^ @as(u32, try reader.readByte());
+            self.code = (self.code << 8) ^ @as(u32, try reader.takeByte());
         }
     }
 
-    inline fn getBit(self: *RangeDecoder, reader: anytype) !bool {
+    fn getBit(self: *RangeDecoder, reader: *Reader) !bool {
         self.range >>= 1;
 
         const bit = self.code >= self.range;
@@ -57,7 +42,7 @@ pub const RangeDecoder = struct {
         return bit;
     }
 
-    pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 {
+    pub fn get(self: *RangeDecoder, reader: *Reader, count: usize) !u32 {
         var result: u32 = 0;
         var i: usize = 0;
         while (i < count) : (i += 1)
@@ -65,7 +50,7 @@ pub const RangeDecoder = struct {
         return result;
     }
 
-    pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool {
+    pub fn decodeBit(self: *RangeDecoder, reader: *Reader, prob: *u16, update: bool) !bool {
         const bound = (self.range >> 11) * prob.*;
 
         if (self.code < bound) {
@@ -88,7 +73,7 @@ pub const RangeDecoder = struct {
 
     fn parseBitTree(
         self: *RangeDecoder,
-        reader: anytype,
+        reader: *Reader,
         num_bits: u5,
         probs: []u16,
         update: bool,
@@ -104,7 +89,7 @@ pub const RangeDecoder = struct {
 
     pub fn parseReverseBitTree(
         self: *RangeDecoder,
-        reader: anytype,
+        reader: *Reader,
         num_bits: u5,
         probs: []u16,
         offset: usize,
@@ -123,7 +108,7 @@ pub const RangeDecoder = struct {
 };
 
 pub const Decode = struct {
-    lzma_props: Properties,
+    properties: Properties,
     unpacked_size: ?u64,
     literal_probs: Vec2d,
     pos_slot_decoder: [4]BitTree(6),
@@ -141,14 +126,14 @@ pub const Decode = struct {
     rep_len_decoder: LenDecoder,
 
     pub fn init(
-        allocator: Allocator,
-        lzma_props: Properties,
+        gpa: Allocator,
+        properties: Properties,
         unpacked_size: ?u64,
     ) !Decode {
         return .{
-            .lzma_props = lzma_props,
+            .properties = properties,
             .unpacked_size = unpacked_size,
-            .literal_probs = try Vec2d.init(allocator, 0x400, .{ @as(usize, 1) << (lzma_props.lc + lzma_props.lp), 0x300 }),
+            .literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (properties.lc + properties.lp), 0x300 }),
             .pos_slot_decoder = @splat(.{}),
             .align_decoder = .{},
             .pos_decoders = @splat(0x400),
@@ -165,21 +150,21 @@ pub const Decode = struct {
         };
     }
 
-    pub fn deinit(self: *Decode, allocator: Allocator) void {
-        self.literal_probs.deinit(allocator);
+    pub fn deinit(self: *Decode, gpa: Allocator) void {
+        self.literal_probs.deinit(gpa);
         self.* = undefined;
     }
 
-    pub fn resetState(self: *Decode, allocator: Allocator, new_props: Properties) !void {
+    pub fn resetState(self: *Decode, gpa: Allocator, new_props: Properties) !void {
         new_props.validate();
-        if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) {
+        if (self.properties.lc + self.properties.lp == new_props.lc + new_props.lp) {
             self.literal_probs.fill(0x400);
         } else {
-            self.literal_probs.deinit(allocator);
-            self.literal_probs = try Vec2d.init(allocator, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 });
+            self.literal_probs.deinit(gpa);
+            self.literal_probs = try Vec2d.init(gpa, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 });
         }
 
-        self.lzma_props = new_props;
+        self.properties = new_props;
         for (&self.pos_slot_decoder) |*t| t.reset();
         self.align_decoder.reset();
         self.pos_decoders = @splat(0x400);
@@ -195,26 +180,23 @@ pub const Decode = struct {
         self.rep_len_decoder.reset();
     }
 
-    fn processNextInner(
+    fn processNext(
         self: *Decode,
-        allocator: Allocator,
-        reader: anytype,
-        writer: anytype,
-        buffer: anytype,
+        reader: *Reader,
+        allocating: *Writer.Allocating,
+        buffer: *CircularBuffer,
         decoder: *RangeDecoder,
         update: bool,
     ) !ProcessingStatus {
-        const pos_state = buffer.len & ((@as(usize, 1) << self.lzma_props.pb) - 1);
+        const gpa = allocating.allocator;
+        const writer = &allocating.writer;
+        const pos_state = buffer.len & ((@as(usize, 1) << self.properties.pb) - 1);
 
-        if (!try decoder.decodeBit(
-            reader,
-            &self.is_match[(self.state << 4) + pos_state],
-            update,
-        )) {
+        if (!try decoder.decodeBit(reader, &self.is_match[(self.state << 4) + pos_state], update)) {
             const byte: u8 = try self.decodeLiteral(reader, buffer, decoder, update);
 
             if (update) {
-                try buffer.appendLiteral(allocator, byte, writer);
+                try buffer.appendLiteral(gpa, byte, writer);
 
                 self.state = if (self.state < 4)
                     0
@@ -223,7 +205,7 @@ pub const Decode = struct {
                 else
                     self.state - 6;
             }
-            return .continue_;
+            return .more;
         }
 
         var len: usize = undefined;
@@ -237,9 +219,9 @@ pub const Decode = struct {
                     if (update) {
                         self.state = if (self.state < 7) 9 else 11;
                         const dist = self.rep[0] + 1;
-                        try buffer.appendLz(allocator, 1, dist, writer);
+                        try buffer.appendLz(gpa, 1, dist, writer);
                     }
-                    return .continue_;
+                    return .more;
                 }
             } else {
                 const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state], update))
@@ -293,31 +275,19 @@ pub const Decode = struct {
             len += 2;
 
             const dist = self.rep[0] + 1;
-            try buffer.appendLz(allocator, len, dist, writer);
+            try buffer.appendLz(gpa, len, dist, writer);
         }
 
-        return .continue_;
-    }
-
-    fn processNext(
-        self: *Decode,
-        allocator: Allocator,
-        reader: anytype,
-        writer: anytype,
-        buffer: anytype,
-        decoder: *RangeDecoder,
-    ) !ProcessingStatus {
-        return self.processNextInner(allocator, reader, writer, buffer, decoder, true);
+        return .more;
     }
 
     pub fn process(
         self: *Decode,
-        allocator: Allocator,
-        reader: anytype,
-        writer: anytype,
-        buffer: anytype,
+        reader: *Reader,
+        allocating: *Writer.Allocating,
+        buffer: *CircularBuffer,
         decoder: *RangeDecoder,
-    ) !ProcessingStatus {
+    ) !void {
         process_next: {
             if (self.unpacked_size) |unpacked_size| {
                 if (buffer.len >= unpacked_size) {
@@ -326,26 +296,24 @@ pub const Decode = struct {
             } else if (decoder.isFinished()) {
                 break :process_next;
             }
-
-            switch (try self.processNext(allocator, reader, writer, buffer, decoder)) {
-                .continue_ => return .continue_,
-                .finished => break :process_next,
+            switch (try self.processNext(reader, allocating, buffer, decoder, true)) {
+                .more => return,
+                .finished => {},
             }
         }
 
         if (self.unpacked_size) |unpacked_size| {
-            if (buffer.len != unpacked_size) {
-                return error.CorruptInput;
-            }
+            if (buffer.len != unpacked_size) return error.DecompressedSizeMismatch;
         }
 
-        return .finished;
+        try buffer.finish(&allocating.writer);
+        self.state = math.maxInt(usize);
     }
 
     fn decodeLiteral(
         self: *Decode,
-        reader: anytype,
-        buffer: anytype,
+        reader: *Reader,
+        buffer: *CircularBuffer,
         decoder: *RangeDecoder,
         update: bool,
     ) !u8 {
@@ -353,9 +321,9 @@ pub const Decode = struct {
         const prev_byte = @as(usize, buffer.lastOr(def_prev_byte));
 
         var result: usize = 1;
-        const lit_state = ((buffer.len & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) +
-            (prev_byte >> (8 - self.lzma_props.lc));
-        const probs = try self.literal_probs.getMut(lit_state);
+        const lit_state = ((buffer.len & ((@as(usize, 1) << self.properties.lp) - 1)) << self.properties.lc) +
+            (prev_byte >> (8 - self.properties.lc));
+        const probs = try self.literal_probs.get(lit_state);
 
         if (self.state >= 7) {
             var match_byte = @as(usize, try buffer.lastN(self.rep[0] + 1));
@@ -384,7 +352,7 @@ pub const Decode = struct {
 
     fn decodeDistance(
         self: *Decode,
-        reader: anytype,
+        reader: *Reader,
         decoder: *RangeDecoder,
         length: usize,
         update: bool,
@@ -415,46 +383,40 @@ pub const Decode = struct {
     }
 
     /// A circular buffer for LZ sequences
-    pub const LzCircularBuffer = struct {
+    pub const CircularBuffer = struct {
         /// Circular buffer
         buf: ArrayList(u8),
-
         /// Length of the buffer
         dict_size: usize,
-
         /// Buffer memory limit
-        memlimit: usize,
-
+        mem_limit: usize,
         /// Current position
         cursor: usize,
-
         /// Total number of bytes sent through the buffer
         len: usize,
 
-        const Self = @This();
-
-        pub fn init(dict_size: usize, memlimit: usize) Self {
-            return Self{
+        pub fn init(dict_size: usize, mem_limit: usize) CircularBuffer {
+            return .{
                 .buf = .{},
                 .dict_size = dict_size,
-                .memlimit = memlimit,
+                .mem_limit = mem_limit,
                 .cursor = 0,
                 .len = 0,
             };
         }
 
-        pub fn get(self: Self, index: usize) u8 {
+        pub fn get(self: CircularBuffer, index: usize) u8 {
             return if (0 <= index and index < self.buf.items.len)
                 self.buf.items[index]
             else
                 0;
         }
 
-        pub fn set(self: *Self, allocator: Allocator, index: usize, value: u8) !void {
-            if (index >= self.memlimit) {
+        pub fn set(self: *CircularBuffer, gpa: Allocator, index: usize, value: u8) !void {
+            if (index >= self.mem_limit) {
                 return error.CorruptInput;
             }
-            try self.buf.ensureTotalCapacity(allocator, index + 1);
+            try self.buf.ensureTotalCapacity(gpa, index + 1);
             while (self.buf.items.len < index) {
                 self.buf.appendAssumeCapacity(0);
             }
@@ -462,7 +424,7 @@ pub const Decode = struct {
         }
 
         /// Retrieve the last byte or return a default
-        pub fn lastOr(self: Self, lit: u8) u8 {
+        pub fn lastOr(self: CircularBuffer, lit: u8) u8 {
             return if (self.len == 0)
                 lit
             else
@@ -470,7 +432,7 @@ pub const Decode = struct {
         }
 
         /// Retrieve the n-th last byte
-        pub fn lastN(self: Self, dist: usize) !u8 {
+        pub fn lastN(self: CircularBuffer, dist: usize) !u8 {
             if (dist > self.dict_size or dist > self.len) {
                 return error.CorruptInput;
             }
@@ -481,12 +443,12 @@ pub const Decode = struct {
 
         /// Append a literal
         pub fn appendLiteral(
-            self: *Self,
-            allocator: Allocator,
+            self: *CircularBuffer,
+            gpa: Allocator,
             lit: u8,
-            writer: anytype,
+            writer: *Writer,
         ) !void {
-            try self.set(allocator, self.cursor, lit);
+            try self.set(gpa, self.cursor, lit);
             self.cursor += 1;
             self.len += 1;
 
@@ -499,11 +461,11 @@ pub const Decode = struct {
 
         /// Fetch an LZ sequence (length, distance) from inside the buffer
         pub fn appendLz(
-            self: *Self,
-            allocator: Allocator,
+            self: *CircularBuffer,
+            gpa: Allocator,
             len: usize,
             dist: usize,
-            writer: anytype,
+            writer: *Writer,
         ) !void {
             if (dist > self.dict_size or dist > self.len) {
                 return error.CorruptInput;
@@ -513,7 +475,7 @@ pub const Decode = struct {
             var i: usize = 0;
             while (i < len) : (i += 1) {
                 const x = self.get(offset);
-                try self.appendLiteral(allocator, x, writer);
+                try self.appendLiteral(gpa, x, writer);
                 offset += 1;
                 if (offset == self.dict_size) {
                     offset = 0;
@@ -521,15 +483,15 @@ pub const Decode = struct {
             }
         }
 
-        pub fn finish(self: *Self, writer: anytype) !void {
+        pub fn finish(self: *CircularBuffer, writer: *Writer) !void {
             if (self.cursor > 0) {
                 try writer.writeAll(self.buf.items[0..self.cursor]);
                 self.cursor = 0;
             }
         }
 
-        pub fn deinit(self: *Self, allocator: Allocator) void {
-            self.buf.deinit(allocator);
+        pub fn deinit(self: *CircularBuffer, gpa: Allocator) void {
+            self.buf.deinit(gpa);
             self.* = undefined;
         }
     };
@@ -538,11 +500,9 @@ pub const Decode = struct {
         return struct {
             probs: [1 << num_bits]u16 = @splat(0x400),
 
-            const Self = @This();
-
             pub fn parse(
-                self: *Self,
-                reader: anytype,
+                self: *@This(),
+                reader: *Reader,
                 decoder: *RangeDecoder,
                 update: bool,
             ) !u32 {
@@ -550,15 +510,15 @@ pub const Decode = struct {
             }
 
             pub fn parseReverse(
-                self: *Self,
-                reader: anytype,
+                self: *@This(),
+                reader: *Reader,
                 decoder: *RangeDecoder,
                 update: bool,
             ) !u32 {
                 return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update);
             }
 
-            pub fn reset(self: *Self) void {
+            pub fn reset(self: *@This()) void {
                 @memset(&self.probs, 0x400);
             }
         };
@@ -573,7 +533,7 @@ pub const Decode = struct {
 
         pub fn decode(
             self: *LenDecoder,
-            reader: anytype,
+            reader: *Reader,
             decoder: *RangeDecoder,
             pos_state: usize,
             update: bool,
@@ -600,45 +560,35 @@ pub const Decode = struct {
         data: []u16,
         cols: usize,
 
-        const Self = @This();
-
-        pub fn init(allocator: Allocator, value: u16, size: struct { usize, usize }) !Self {
+        pub fn init(gpa: Allocator, value: u16, size: struct { usize, usize }) !Vec2d {
             const len = try math.mul(usize, size[0], size[1]);
-            const data = try allocator.alloc(u16, len);
+            const data = try gpa.alloc(u16, len);
             @memset(data, value);
-            return Self{
+            return .{
                 .data = data,
                 .cols = size[1],
             };
         }
 
-        pub fn deinit(self: *Self, allocator: Allocator) void {
-            allocator.free(self.data);
+        pub fn deinit(self: *Vec2d, gpa: Allocator) void {
+            gpa.free(self.data);
             self.* = undefined;
         }
 
-        pub fn fill(self: *Self, value: u16) void {
+        pub fn fill(self: *Vec2d, value: u16) void {
             @memset(self.data, value);
         }
 
-        inline fn _get(self: Self, row: usize) ![]u16 {
+        fn get(self: Vec2d, row: usize) ![]u16 {
             const start_row = try math.mul(usize, row, self.cols);
             const end_row = try math.add(usize, start_row, self.cols);
             return self.data[start_row..end_row];
         }
-
-        pub fn get(self: Self, row: usize) ![]const u16 {
-            return self._get(row);
-        }
-
-        pub fn getMut(self: *Self, row: usize) ![]u16 {
-            return self._get(row);
-        }
     };
 
     pub const Options = struct {
         unpacked_size: UnpackedSize = .read_from_header,
-        memlimit: ?usize = null,
+        mem_limit: ?usize = null,
         allow_incomplete: bool = false,
     };
 
@@ -649,7 +599,7 @@ pub const Decode = struct {
     };
 
     const ProcessingStatus = enum {
-        continue_,
+        more,
         finished,
     };
 
@@ -670,39 +620,34 @@ pub const Decode = struct {
         dict_size: u32,
         unpacked_size: ?u64,
 
-        pub fn readHeader(reader: anytype, options: Options) !Params {
-            var props = try reader.readByte();
-            if (props >= 225) {
-                return error.CorruptInput;
-            }
+        pub fn readHeader(reader: *Reader, options: Options) !Params {
+            var props = try reader.takeByte();
+            if (props >= 225) return error.CorruptInput;
 
-            const lc = @as(u4, @intCast(props % 9));
+            const lc: u4 = @intCast(props % 9);
             props /= 9;
-            const lp = @as(u3, @intCast(props % 5));
+            const lp: u3 = @intCast(props % 5);
             props /= 5;
-            const pb = @as(u3, @intCast(props));
+            const pb: u3 = @intCast(props);
 
-            const dict_size_provided = try reader.readInt(u32, .little);
+            const dict_size_provided = try reader.takeInt(u32, .little);
             const dict_size = @max(0x1000, dict_size_provided);
 
             const unpacked_size = switch (options.unpacked_size) {
                 .read_from_header => blk: {
-                    const unpacked_size_provided = try reader.readInt(u64, .little);
+                    const unpacked_size_provided = try reader.takeInt(u64, .little);
                     const marker_mandatory = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF;
-                    break :blk if (marker_mandatory)
-                        null
-                    else
-                        unpacked_size_provided;
+                    break :blk if (marker_mandatory) null else unpacked_size_provided;
                 },
                 .read_header_but_use_provided => |x| blk: {
-                    _ = try reader.readInt(u64, .little);
+                    _ = try reader.takeInt(u64, .little);
                     break :blk x;
                 },
                 .use_provided => |x| x,
             };
 
-            return Params{
-                .properties = Properties{ .lc = lc, .lp = lp, .pb = pb },
+            return .{
+                .properties = .{ .lc = lc, .lp = lp, .pb = pb },
                 .dict_size = dict_size,
                 .unpacked_size = unpacked_size,
             };
@@ -710,84 +655,121 @@ pub const Decode = struct {
     };
 };
 
-pub fn decompress(
-    allocator: Allocator,
-    reader: anytype,
-) !Decompress(@TypeOf(reader)) {
-    return decompressWithOptions(allocator, reader, .{});
-}
-
-pub fn decompressWithOptions(
-    allocator: Allocator,
-    reader: anytype,
-    options: Decode.Options,
-) !Decompress(@TypeOf(reader)) {
-    const params = try Decode.Params.readHeader(reader, options);
-    return Decompress(@TypeOf(reader)).init(allocator, reader, params, options.memlimit);
-}
-
-pub fn Decompress(comptime ReaderType: type) type {
-    return struct {
-        const Self = @This();
-
-        pub const Error =
-            ReaderType.Error ||
-            Allocator.Error ||
-            error{ CorruptInput, EndOfStream, Overflow };
-
-        pub const Reader = std.io.GenericReader(*Self, Error, read);
+pub const Decompress = struct {
+    gpa: Allocator,
+    input: *Reader,
+    reader: Reader,
+    buffer: Decode.CircularBuffer,
+    range_decoder: RangeDecoder,
+    decode: Decode,
+    err: ?Error,
+
+    pub const Error = error{
+        OutOfMemory,
+        ReadFailed,
+        CorruptInput,
+        DecompressedSizeMismatch,
+        EndOfStream,
+        Overflow,
+    };
 
-        allocator: Allocator,
-        in_reader: ReaderType,
-        to_read: std.ArrayListUnmanaged(u8),
+    /// Takes ownership of `buffer` which may be resized with `gpa`.
+    ///
+    /// LZMA was explicitly designed to take advantage of large heap memory
+    /// being available, with a dictionary size anywhere from 4K to 4G. Thus,
+    /// this API dynamically allocates the dictionary as-needed.
+    pub fn initParams(
+        input: *Reader,
+        gpa: Allocator,
+        buffer: []u8,
+        params: Decode.Params,
+        mem_limit: usize,
+    ) !Decompress {
+        return .{
+            .gpa = gpa,
+            .input = input,
+            .buffer = Decode.CircularBuffer.init(params.dict_size, mem_limit),
+            .range_decoder = try RangeDecoder.init(input),
+            .decode = try Decode.init(gpa, params.properties, params.unpacked_size),
+            .reader = .{
+                .buffer = buffer,
+                .vtable = &.{
+                    .readVec = readVec,
+                    .stream = stream,
+                },
+                .seek = 0,
+                .end = 0,
+            },
+            .err = null,
+        };
+    }
 
-        buffer: Decode.LzCircularBuffer,
-        decoder: RangeDecoder,
-        state: Decode,
+    /// Takes ownership of `buffer` which may be resized with `gpa`.
+    ///
+    /// LZMA was explicitly designed to take advantage of large heap memory
+    /// being available, with a dictionary size anywhere from 4K to 4G. Thus,
+    /// this API dynamically allocates the dictionary as-needed.
+    pub fn initOptions(
+        input: *Reader,
+        gpa: Allocator,
+        buffer: []u8,
+        options: Decode.Options,
+        mem_limit: usize,
+    ) !Decompress {
+        const params = try Decode.Params.readHeader(input, options);
+        return initParams(input, gpa, buffer, params, mem_limit);
+    }
 
-        pub fn init(allocator: Allocator, source: ReaderType, params: Decode.Params, memlimit: ?usize) !Self {
-            return Self{
-                .allocator = allocator,
-                .in_reader = source,
-                .to_read = .{},
+    /// Reclaim ownership of the buffer passed to `init`.
+    pub fn takeBuffer(d: *Decompress) []u8 {
+        const buffer = d.reader.buffer;
+        d.reader.buffer = &.{};
+        return buffer;
+    }
 
-                .buffer = Decode.LzCircularBuffer.init(params.dict_size, memlimit orelse math.maxInt(usize)),
-                .decoder = try RangeDecoder.init(source),
-                .state = try Decode.init(allocator, params.properties, params.unpacked_size),
-            };
-        }
+    pub fn deinit(d: *Decompress) void {
+        const gpa = d.gpa;
+        gpa.free(d.reader.buffer);
+        d.buffer.deinit(gpa);
+        d.decode.deinit(gpa);
+        d.* = undefined;
+    }
 
-        pub fn reader(self: *Self) Reader {
-            return .{ .context = self };
-        }
+    fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize {
+        _ = data;
+        return readIndirect(r);
+    }
 
-        pub fn deinit(self: *Self) void {
-            self.to_read.deinit(self.allocator);
-            self.buffer.deinit(self.allocator);
-            self.state.deinit(self.allocator);
-            self.* = undefined;
-        }
+    fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize {
+        _ = w;
+        _ = limit;
+        return readIndirect(r);
+    }
 
-        pub fn read(self: *Self, output: []u8) Error!usize {
-            const writer = self.to_read.writer(self.allocator);
-            while (self.to_read.items.len < output.len) {
-                switch (try self.state.process(self.allocator, self.in_reader, writer, &self.buffer, &self.decoder)) {
-                    .continue_ => {},
-                    .finished => {
-                        try self.buffer.finish(writer);
-                        break;
-                    },
-                }
-            }
-            const input = self.to_read.items;
-            const n = @min(input.len, output.len);
-            @memcpy(output[0..n], input[0..n]);
-            std.mem.copyForwards(u8, input[0 .. input.len - n], input[n..]);
-            self.to_read.shrinkRetainingCapacity(input.len - n);
-            return n;
-        }
-    };
-}
+    fn readIndirect(r: *Reader) Reader.Error!usize {
+        const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
+        const gpa = d.gpa;
+        var allocating = Writer.Allocating.initOwnedSlice(gpa, r.buffer);
+        allocating.writer.end = r.end;
+        defer r.end = allocating.writer.end;
+        if (d.decode.state == math.maxInt(usize)) return error.EndOfStream;
+        d.decode.process(d.input, &allocating, &d.buffer, &d.range_decoder) catch |err| switch (err) {
+            error.WriteFailed => {
+                d.err = error.OutOfMemory;
+                return error.ReadFailed;
+            },
+            error.EndOfStream => {
+                d.err = error.EndOfStream;
+                return error.ReadFailed;
+            },
+            else => |e| {
+                d.err = e;
+                return error.ReadFailed;
+            },
+        };
+        return 0;
+    }
+};
 
 test {
     _ = @import("lzma/test.zig");
diff --git a/lib/std/compress/lzma/test.zig b/lib/std/compress/lzma/test.zig
index eafb91b6bb..2514ddb8c3 100644
--- a/lib/std/compress/lzma/test.zig
+++ b/lib/std/compress/lzma/test.zig
@@ -1,19 +1,19 @@
 const std = @import("../../std.zig");
-const lzma = @import("../lzma.zig");
+const lzma = std.compress.lzma;
 
 fn testDecompress(compressed: []const u8) ![]u8 {
-    const allocator = std.testing.allocator;
-    var stream = std.io.fixedBufferStream(compressed);
-    var decompressor = try lzma.decompress(allocator, stream.reader());
+    const gpa = std.testing.allocator;
+    var stream: std.Io.Reader = .fixed(compressed);
+
+    var decompressor = try lzma.Decompress.initOptions(&stream, gpa, &.{}, .{}, std.math.maxInt(u32));
     defer decompressor.deinit();
-    const reader = decompressor.reader();
-    return reader.readAllAlloc(allocator, std.math.maxInt(usize));
+    return decompressor.reader.allocRemaining(gpa, .unlimited);
 }
 
 fn testDecompressEqual(expected: []const u8, compressed: []const u8) !void {
-    const allocator = std.testing.allocator;
+    const gpa = std.testing.allocator;
     const decomp = try testDecompress(compressed);
-    defer allocator.free(decomp);
+    defer gpa.free(decomp);
     try std.testing.expectEqualSlices(u8, expected, decomp);
 }
 
@@ -89,11 +89,13 @@ test "too small uncompressed size in header" {
 }
 
 test "reading one byte" {
+    const gpa = std.testing.allocator;
     const compressed = @embedFile("testdata/good-known_size-with_eopm.lzma");
-    var stream = std.io.fixedBufferStream(compressed);
-    var decompressor = try lzma.decompress(std.testing.allocator, stream.reader());
+    var stream: std.Io.Reader = .fixed(compressed);
+    var decompressor = try lzma.Decompress.initOptions(&stream, gpa, &.{}, .{}, std.math.maxInt(u32));
     defer decompressor.deinit();
 
-    var buffer = [1]u8{0};
-    _ = try decompressor.read(buffer[0..]);
+    var buffer: [1]u8 = undefined;
+    try decompressor.reader.readSliceAll(&buffer);
+    try std.testing.expectEqual(72, buffer[0]);
 }
diff --git a/lib/std/compress/lzma2.zig b/lib/std/compress/lzma2.zig
index e454e6382e..d880bad21d 100644
--- a/lib/std/compress/lzma2.zig
+++ b/lib/std/compress/lzma2.zig
@@ -2,6 +2,8 @@ const std = @import("../std.zig");
 const Allocator = std.mem.Allocator;
 const ArrayList = std.ArrayList;
 const lzma = std.compress.lzma;
+const Writer = std.Io.Writer;
+const Reader = std.Io.Reader;
 
 /// An accumulating buffer for LZ sequences
 pub const LzAccumBuffer = struct {
@@ -14,30 +16,28 @@ pub const LzAccumBuffer = struct {
     /// Total number of bytes sent through the buffer
     len: usize,
 
-    const Self = @This();
-
-    pub fn init(memlimit: usize) Self {
-        return Self{
+    pub fn init(memlimit: usize) LzAccumBuffer {
+        return .{
             .buf = .{},
             .memlimit = memlimit,
             .len = 0,
         };
     }
 
-    pub fn appendByte(self: *Self, allocator: Allocator, byte: u8) !void {
+    pub fn appendByte(self: *LzAccumBuffer, allocator: Allocator, byte: u8) !void {
         try self.buf.append(allocator, byte);
         self.len += 1;
     }
 
     /// Reset the internal dictionary
-    pub fn reset(self: *Self, writer: anytype) !void {
+    pub fn reset(self: *LzAccumBuffer, writer: *Writer) !void {
         try writer.writeAll(self.buf.items);
         self.buf.clearRetainingCapacity();
         self.len = 0;
     }
 
     /// Retrieve the last byte or return a default
-    pub fn lastOr(self: Self, lit: u8) u8 {
+    pub fn lastOr(self: LzAccumBuffer, lit: u8) u8 {
         const buf_len = self.buf.items.len;
         return if (buf_len == 0)
             lit
@@ -46,7 +46,7 @@ pub const LzAccumBuffer = struct {
     }
 
     /// Retrieve the n-th last byte
-    pub fn lastN(self: Self, dist: usize) !u8 {
+    pub fn lastN(self: LzAccumBuffer, dist: usize) !u8 {
         const buf_len = self.buf.items.len;
         if (dist > buf_len) {
             return error.CorruptInput;
@@ -57,10 +57,10 @@ pub const LzAccumBuffer = struct {
 
     /// Append a literal
     pub fn appendLiteral(
-        self: *Self,
+        self: *LzAccumBuffer,
         allocator: Allocator,
         lit: u8,
-        writer: anytype,
+        writer: *Writer,
     ) !void {
         _ = writer;
         if (self.len >= self.memlimit) {
@@ -72,11 +72,11 @@ pub const LzAccumBuffer = struct {
 
     /// Fetch an LZ sequence (length, distance) from inside the buffer
     pub fn appendLz(
-        self: *Self,
+        self: *LzAccumBuffer,
         allocator: Allocator,
         len: usize,
         dist: usize,
-        writer: anytype,
+        writer: *Writer,
     ) !void {
         _ = writer;
 
@@ -95,23 +95,23 @@ pub const LzAccumBuffer = struct {
         self.len += len;
     }
 
-    pub fn finish(self: *Self, writer: anytype) !void {
+    pub fn finish(self: *LzAccumBuffer, writer: *Writer) !void {
         try writer.writeAll(self.buf.items);
         self.buf.clearRetainingCapacity();
     }
 
-    pub fn deinit(self: *Self, allocator: Allocator) void {
+    pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void {
         self.buf.deinit(allocator);
         self.* = undefined;
     }
 };
 
 pub const Decode = struct {
-    lzma_state: lzma.Decode,
+    lzma_decode: lzma.Decode,
 
     pub fn init(allocator: Allocator) !Decode {
         return Decode{
-            .lzma_state = try lzma.Decode.init(
+            .lzma_decode = try lzma.Decode.init(
                 allocator,
                 .{
                     .lc = 0,
@@ -124,15 +124,15 @@ pub const Decode = struct {
     }
 
     pub fn deinit(self: *Decode, allocator: Allocator) void {
-        self.lzma_state.deinit(allocator);
+        self.lzma_decode.deinit(allocator);
         self.* = undefined;
     }
 
     pub fn decompress(
         self: *Decode,
         allocator: Allocator,
-        reader: anytype,
-        writer: anytype,
+        reader: *Reader,
+        writer: *Writer,
     ) !void {
         var accum = LzAccumBuffer.init(std.math.maxInt(usize));
         defer accum.deinit(allocator);
@@ -154,8 +154,8 @@ pub const Decode = struct {
     fn parseLzma(
         self: *Decode,
         allocator: Allocator,
-        reader: anytype,
-        writer: anytype,
+        reader: *Reader,
+        writer: *Writer,
         accum: *LzAccumBuffer,
         status: u8,
     ) !void {
@@ -210,7 +210,7 @@ pub const Decode = struct {
         }
 
         if (reset.state) {
-            var new_props = self.lzma_state.lzma_props;
+            var new_props = self.lzma_decode.properties;
 
             if (reset.props) {
                 var props = try reader.readByte();
@@ -231,16 +231,16 @@ pub const Decode = struct {
                 new_props = .{ .lc = lc, .lp = lp, .pb = pb };
             }
 
-            try self.lzma_state.resetState(allocator, new_props);
+            try self.lzma_decode.resetState(allocator, new_props);
         }
 
-        self.lzma_state.unpacked_size = unpacked_size + accum.len;
+        self.lzma_decode.unpacked_size = unpacked_size + accum.len;
 
         var counter = std.io.countingReader(reader);
         const counter_reader = counter.reader();
 
         var rangecoder = try lzma.RangeDecoder.init(counter_reader);
-        while (try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {}
+        while (try self.lzma_decode.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {}
 
         if (counter.bytes_read != packed_size) {
             return error.CorruptInput;
@@ -249,8 +249,8 @@ pub const Decode = struct {
 
     fn parseUncompressed(
         allocator: Allocator,
-        reader: anytype,
-        writer: anytype,
+        reader: *Reader,
+        writer: *Writer,
         accum: *LzAccumBuffer,
         reset_dict: bool,
     ) !void {
@@ -267,24 +267,19 @@ pub const Decode = struct {
     }
 };
 
-pub fn decompress(
-    allocator: Allocator,
-    reader: anytype,
-    writer: anytype,
-) !void {
-    var decoder = try Decode.init(allocator);
-    defer decoder.deinit(allocator);
-    return decoder.decompress(allocator, reader, writer);
-}
-
-test {
+test "decompress hello world stream" {
     const expected = "Hello\nWorld!\n";
     const compressed = &[_]u8{ 0x01, 0x00, 0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x02, 0x00, 0x06, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x0A, 0x00 };
 
-    const allocator = std.testing.allocator;
-    var decomp = std.array_list.Managed(u8).init(allocator);
-    defer decomp.deinit();
-    var stream = std.io.fixedBufferStream(compressed);
-    try decompress(allocator, stream.reader(), decomp.writer());
-    try std.testing.expectEqualSlices(u8, expected, decomp.items);
+    const gpa = std.testing.allocator;
+
+    var stream: std.Io.Reader = .fixed(compressed);
+
+    var decode = try Decode.init(gpa, &stream);
+    defer decode.deinit(gpa);
+
+    const result = try decode.reader.allocRemaining(gpa, .unlimited);
+    defer gpa.free(result);
+
+    try std.testing.expectEqualStrings(expected, result);
 }
diff --git a/lib/std/compress/xz.zig b/lib/std/compress/xz.zig
index 54a6a38506..eef47cd005 100644
--- a/lib/std/compress/xz.zig
+++ b/lib/std/compress/xz.zig
@@ -1,368 +1,4 @@
-const std = @import("std");
-const Allocator = std.mem.Allocator;
-const ArrayList = std.ArrayList;
-const Crc32 = std.hash.Crc32;
-const Crc64 = std.hash.crc.Crc64Xz;
-const Sha256 = std.crypto.hash.sha2.Sha256;
-const lzma2 = std.compress.lzma2;
-
-pub const Check = enum(u4) {
-    none = 0x00,
-    crc32 = 0x01,
-    crc64 = 0x04,
-    sha256 = 0x0A,
-    _,
-};
-
-fn readStreamFlags(reader: anytype, check: *Check) !void {
-    const reserved1 = try reader.readByte();
-    if (reserved1 != 0) return error.CorruptInput;
-    const byte = try reader.readByte();
-    if ((byte >> 4) != 0) return error.CorruptInput;
-    check.* = @enumFromInt(@as(u4, @truncate(byte)));
-}
-
-pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
-    return Decompress(@TypeOf(reader)).init(allocator, reader);
-}
-
-pub fn Decompress(comptime ReaderType: type) type {
-    return struct {
-        const Self = @This();
-
-        pub const Error = ReaderType.Error || Decoder(ReaderType).Error;
-        pub const Reader = std.io.GenericReader(*Self, Error, read);
-
-        allocator: Allocator,
-        block_decoder: Decoder(ReaderType),
-        in_reader: ReaderType,
-
-        fn init(allocator: Allocator, source: ReaderType) !Self {
-            const magic = try source.readBytesNoEof(6);
-            if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
-                return error.BadHeader;
-
-            var check: Check = undefined;
-            const hash_a = blk: {
-                var hasher = hashedReader(source, Crc32.init());
-                try readStreamFlags(hasher.reader(), &check);
-                break :blk hasher.hasher.final();
-            };
-
-            const hash_b = try source.readInt(u32, .little);
-            if (hash_a != hash_b)
-                return error.WrongChecksum;
-
-            return Self{
-                .allocator = allocator,
-                .block_decoder = try decoder(allocator, source, check),
-                .in_reader = source,
-            };
-        }
-
-        pub fn deinit(self: *Self) void {
-            self.block_decoder.deinit();
-        }
-
-        pub fn reader(self: *Self) Reader {
-            return .{ .context = self };
-        }
-
-        pub fn read(self: *Self, buffer: []u8) Error!usize {
-            if (buffer.len == 0)
-                return 0;
-
-            const r = try self.block_decoder.read(buffer);
-            if (r != 0)
-                return r;
-
-            const index_size = blk: {
-                var hasher = hashedReader(self.in_reader, Crc32.init());
-                hasher.hasher.update(&[1]u8{0x00});
-
-                var counter = std.io.countingReader(hasher.reader());
-                counter.bytes_read += 1;
-
-                const counting_reader = counter.reader();
-
-                const record_count = try std.leb.readUleb128(u64, counting_reader);
-                if (record_count != self.block_decoder.block_count)
-                    return error.CorruptInput;
-
-                var i: usize = 0;
-                while (i < record_count) : (i += 1) {
-                    // TODO: validate records
-                    _ = try std.leb.readUleb128(u64, counting_reader);
-                    _ = try std.leb.readUleb128(u64, counting_reader);
-                }
-
-                while (counter.bytes_read % 4 != 0) {
-                    if (try counting_reader.readByte() != 0)
-                        return error.CorruptInput;
-                }
-
-                const hash_a = hasher.hasher.final();
-                const hash_b = try counting_reader.readInt(u32, .little);
-                if (hash_a != hash_b)
-                    return error.WrongChecksum;
-
-                break :blk counter.bytes_read;
-            };
-
-            const hash_a = try self.in_reader.readInt(u32, .little);
-
-            const hash_b = blk: {
-                var hasher = hashedReader(self.in_reader, Crc32.init());
-                const hashed_reader = hasher.reader();
-
-                const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4;
-                if (backward_size != index_size)
-                    return error.CorruptInput;
-
-                var check: Check = undefined;
-                try readStreamFlags(hashed_reader, &check);
-
-                break :blk hasher.hasher.final();
-            };
-
-            if (hash_a != hash_b)
-                return error.WrongChecksum;
-
-            const magic = try self.in_reader.readBytesNoEof(2);
-            if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
-                return error.CorruptInput;
-
-            return 0;
-        }
-    };
-}
-
-pub fn HashedReader(ReaderType: type, HasherType: type) type {
-    return struct {
-        child_reader: ReaderType,
-        hasher: HasherType,
-
-        pub const Error = ReaderType.Error;
-        pub const Reader = std.io.GenericReader(*@This(), Error, read);
-
-        pub fn read(self: *@This(), buf: []u8) Error!usize {
-            const amt = try self.child_reader.read(buf);
-            self.hasher.update(buf[0..amt]);
-            return amt;
-        }
-
-        pub fn reader(self: *@This()) Reader {
-            return .{ .context = self };
-        }
-    };
-}
-
-pub fn hashedReader(
-    reader: anytype,
-    hasher: anytype,
-) HashedReader(@TypeOf(reader), @TypeOf(hasher)) {
-    return .{ .child_reader = reader, .hasher = hasher };
-}
-
-const DecodeError = error{
-    CorruptInput,
-    EndOfStream,
-    EndOfStreamWithNoError,
-    WrongChecksum,
-    Unsupported,
-    Overflow,
-};
-
-pub fn decoder(allocator: Allocator, reader: anytype, check: Check) !Decoder(@TypeOf(reader)) {
-    return Decoder(@TypeOf(reader)).init(allocator, reader, check);
-}
-
-pub fn Decoder(comptime ReaderType: type) type {
-    return struct {
-        const Self = @This();
-        pub const Error =
-            ReaderType.Error ||
-            DecodeError ||
-            Allocator.Error;
-        pub const Reader = std.io.GenericReader(*Self, Error, read);
-
-        allocator: Allocator,
-        inner_reader: ReaderType,
-        check: Check,
-        err: ?Error,
-        to_read: ArrayList(u8),
-        read_pos: usize,
-        block_count: usize,
-
-        fn init(allocator: Allocator, in_reader: ReaderType, check: Check) !Self {
-            return Self{
-                .allocator = allocator,
-                .inner_reader = in_reader,
-                .check = check,
-                .err = null,
-                .to_read = .{},
-                .read_pos = 0,
-                .block_count = 0,
-            };
-        }
-
-        pub fn deinit(self: *Self) void {
-            self.to_read.deinit(self.allocator);
-        }
-
-        pub fn reader(self: *Self) Reader {
-            return .{ .context = self };
-        }
-
-        pub fn read(self: *Self, output: []u8) Error!usize {
-            while (true) {
-                const unread_len = self.to_read.items.len - self.read_pos;
-                if (unread_len > 0) {
-                    const n = @min(unread_len, output.len);
-                    @memcpy(output[0..n], self.to_read.items[self.read_pos..][0..n]);
-                    self.read_pos += n;
-                    return n;
-                }
-                if (self.err) |e| {
-                    if (e == DecodeError.EndOfStreamWithNoError) {
-                        return 0;
-                    }
-                    return e;
-                }
-                if (self.read_pos > 0) {
-                    self.to_read.shrinkRetainingCapacity(0);
-                    self.read_pos = 0;
-                }
-                self.readBlock() catch |e| {
-                    self.err = e;
-                };
-            }
-        }
-
-        fn readBlock(self: *Self) Error!void {
-            var block_counter = std.io.countingReader(self.inner_reader);
-            const block_reader = block_counter.reader();
-
-            var packed_size: ?u64 = null;
-            var unpacked_size: ?u64 = null;
-
-            // Block Header
-            {
-                var header_hasher = hashedReader(block_reader, Crc32.init());
-                const header_reader = header_hasher.reader();
-
-                const header_size = @as(u64, try header_reader.readByte()) * 4;
-                if (header_size == 0)
-                    return error.EndOfStreamWithNoError;
-
-                const Flags = packed struct(u8) {
-                    last_filter_index: u2,
-                    reserved: u4,
-                    has_packed_size: bool,
-                    has_unpacked_size: bool,
-                };
-
-                const flags = @as(Flags, @bitCast(try header_reader.readByte()));
-                const filter_count = @as(u3, flags.last_filter_index) + 1;
-                if (filter_count > 1)
-                    return error.Unsupported;
-
-                if (flags.has_packed_size)
-                    packed_size = try std.leb.readUleb128(u64, header_reader);
-
-                if (flags.has_unpacked_size)
-                    unpacked_size = try std.leb.readUleb128(u64, header_reader);
-
-                const FilterId = enum(u64) {
-                    lzma2 = 0x21,
-                    _,
-                };
-
-                const filter_id = @as(
-                    FilterId,
-                    @enumFromInt(try std.leb.readUleb128(u64, header_reader)),
-                );
-
-                if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000)
-                    return error.CorruptInput;
-
-                if (filter_id != .lzma2)
-                    return error.Unsupported;
-
-                const properties_size = try std.leb.readUleb128(u64, header_reader);
-                if (properties_size != 1)
-                    return error.CorruptInput;
-
-                // TODO: use filter properties
-                _ = try header_reader.readByte();
-
-                while (block_counter.bytes_read != header_size) {
-                    if (try header_reader.readByte() != 0)
-                        return error.CorruptInput;
-                }
-
-                const hash_a = header_hasher.hasher.final();
-                const hash_b = try header_reader.readInt(u32, .little);
-                if (hash_a != hash_b)
-                    return error.WrongChecksum;
-            }
-
-            // Compressed Data
-            var packed_counter = std.io.countingReader(block_reader);
-            try lzma2.decompress(
-                self.allocator,
-                packed_counter.reader(),
-                self.to_read.writer(self.allocator),
-            );
-
-            if (packed_size) |s| {
-                if (s != packed_counter.bytes_read)
-                    return error.CorruptInput;
-            }
-
-            const unpacked_bytes = self.to_read.items;
-            if (unpacked_size) |s| {
-                if (s != unpacked_bytes.len)
-                    return error.CorruptInput;
-            }
-
-            // Block Padding
-            while (block_counter.bytes_read % 4 != 0) {
-                if (try block_reader.readByte() != 0)
-                    return error.CorruptInput;
-            }
-
-            switch (self.check) {
-                .none => {},
-                .crc32 => {
-                    const hash_a = Crc32.hash(unpacked_bytes);
-                    const hash_b = try self.inner_reader.readInt(u32, .little);
-                    if (hash_a != hash_b)
-                        return error.WrongChecksum;
-                },
-                .crc64 => {
-                    const hash_a = Crc64.hash(unpacked_bytes);
-                    const hash_b = try self.inner_reader.readInt(u64, .little);
-                    if (hash_a != hash_b)
-                        return error.WrongChecksum;
-                },
-                .sha256 => {
-                    var hash_a: [Sha256.digest_length]u8 = undefined;
-                    Sha256.hash(unpacked_bytes, &hash_a, .{});
-
-                    var hash_b: [Sha256.digest_length]u8 = undefined;
-                    try self.inner_reader.readNoEof(&hash_b);
-
-                    if (!std.mem.eql(u8, &hash_a, &hash_b))
-                        return error.WrongChecksum;
-                },
-                else => return error.Unsupported,
-            }
-
-            self.block_count += 1;
-        }
-    };
-}
+pub const Decompress = @import("xz/Decompress.zig");
 
 test {
     _ = @import("xz/test.zig");
diff --git a/lib/std/compress/xz/Decompress.zig b/lib/std/compress/xz/Decompress.zig
new file mode 100644
index 0000000000..6b925020d6
--- /dev/null
+++ b/lib/std/compress/xz/Decompress.zig
@@ -0,0 +1,288 @@
+const Decompress = @This();
+const std = @import("../../std.zig");
+const Allocator = std.mem.Allocator;
+const ArrayList = std.ArrayList;
+const Crc32 = std.hash.Crc32;
+const Crc64 = std.hash.crc.Crc64Xz;
+const Sha256 = std.crypto.hash.sha2.Sha256;
+const lzma2 = std.compress.lzma2;
+const Writer = std.Io.Writer;
+const Reader = std.Io.Reader;
+
+/// Underlying compressed data stream to pull bytes from.
+input: *Reader,
+/// Uncompressed bytes output by this stream implementation.
+reader: Reader,
+gpa: Allocator,
+check: Check,
+block_count: usize,
+err: ?Error,
+
+pub const Error = error{
+    ReadFailed,
+    OutOfMemory,
+    CorruptInput,
+    EndOfStream,
+    WrongChecksum,
+    Unsupported,
+    Overflow,
+};
+
+pub const Check = enum(u4) {
+    none = 0x00,
+    crc32 = 0x01,
+    crc64 = 0x04,
+    sha256 = 0x0A,
+    _,
+};
+
+pub const StreamFlags = packed struct(u16) {
+    null: u8 = 0,
+    check: Check,
+    reserved: u4 = 0,
+};
+
+pub const InitError = error{
+    NotXzStream,
+    WrongChecksum,
+};
+
+/// XZ uses a series of LZMA2 blocks which each specify a dictionary size
+/// anywhere from 4K to 4G. Thus, this API dynamically allocates the dictionary
+/// as-needed.
+pub fn init(
+    input: *Reader,
+    gpa: Allocator,
+    /// Decompress takes ownership of this buffer and resizes it with `gpa`.
+    buffer: []u8,
+) Decompress {
+    const magic = try input.takeBytes(6);
+    if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
+        return error.NotXzStream;
+
+    const actual_hash = Crc32.hash(try input.peek(@sizeOf(StreamFlags)));
+    const stream_flags = input.takeStruct(StreamFlags, .little) catch unreachable;
+    const stored_hash = try input.readInt(u32, .little);
+    if (actual_hash != stored_hash) return error.WrongChecksum;
+
+    return .{
+        .input = input,
+        .reader = .{
+            .vtable = &.{
+                .stream = stream,
+                .readVec = readVec,
+            },
+            .buffer = buffer,
+            .seek = 0,
+            .end = 0,
+        },
+        .gpa = gpa,
+        .check = stream_flags.check,
+        .block_count = 0,
+        .err = null,
+    };
+}
+
+fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize {
+    _ = w;
+    _ = limit;
+    const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
+    _ = d;
+    @panic("TODO");
+}
+
+fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize {
+    _ = data;
+    const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
+    _ = d;
+    @panic("TODO");
+}
+
+//    if (buffer.len == 0)
+//        return 0;
+//
+//    const r = try self.block_decode.read(buffer);
+//    if (r != 0)
+//        return r;
+//
+//    const index_size = blk: {
+//        var hasher = hashedReader(self.in_reader, Crc32.init());
+//        hasher.hasher.update(&[1]u8{0x00});
+//
+//        var counter = std.io.countingReader(hasher.reader());
+//        counter.bytes_read += 1;
+//
+//        const counting_reader = counter.reader();
+//
+//        const record_count = try std.leb.readUleb128(u64, counting_reader);
+//        if (record_count != self.block_decode.block_count)
+//            return error.CorruptInput;
+//
+//        var i: usize = 0;
+//        while (i < record_count) : (i += 1) {
+//            // TODO: validate records
+//            _ = try std.leb.readUleb128(u64, counting_reader);
+//            _ = try std.leb.readUleb128(u64, counting_reader);
+//        }
+//
+//        while (counter.bytes_read % 4 != 0) {
+//            if (try counting_reader.readByte() != 0)
+//                return error.CorruptInput;
+//        }
+//
+//        const hash_a = hasher.hasher.final();
+//        const hash_b = try counting_reader.readInt(u32, .little);
+//        if (hash_a != hash_b)
+//            return error.WrongChecksum;
+//
+//        break :blk counter.bytes_read;
+//    };
+//
+//    const hash_a = try self.in_reader.readInt(u32, .little);
+//
+//    const hash_b = blk: {
+//        var hasher = hashedReader(self.in_reader, Crc32.init());
+//        const hashed_reader = hasher.reader();
+//
+//        const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4;
+//        if (backward_size != index_size)
+//            return error.CorruptInput;
+//
+//        var check: Check = undefined;
+//        try readStreamFlags(hashed_reader, &check);
+//
+//        break :blk hasher.hasher.final();
+//    };
+//
+//    if (hash_a != hash_b)
+//        return error.WrongChecksum;
+//
+//    const magic = try self.in_reader.readBytesNoEof(2);
+//    if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
+//        return error.CorruptInput;
+//
+//    return 0;
+//}
+
+//fn readBlock(self: *BlockDecode) Error!void {
+//    var block_counter = std.io.countingReader(self.inner_reader);
+//    const block_reader = block_counter.reader();
+//
+//    var packed_size: ?u64 = null;
+//    var unpacked_size: ?u64 = null;
+//
+//    // Block Header
+//    {
+//        var header_hasher = hashedReader(block_reader, Crc32.init());
+//        const header_reader = header_hasher.reader();
+//
+//        const header_size = @as(u64, try header_reader.readByte()) * 4;
+//        if (header_size == 0)
+//            return error.EndOfStreamWithNoError;
+//
+//        const Flags = packed struct(u8) {
+//            last_filter_index: u2,
+//            reserved: u4,
+//            has_packed_size: bool,
+//            has_unpacked_size: bool,
+//        };
+//
+//        const flags = @as(Flags, @bitCast(try header_reader.readByte()));
+//        const filter_count = @as(u3, flags.last_filter_index) + 1;
+//        if (filter_count > 1)
+//            return error.Unsupported;
+//
+//        if (flags.has_packed_size)
+//            packed_size = try std.leb.readUleb128(u64, header_reader);
+//
+//        if (flags.has_unpacked_size)
+//            unpacked_size = try std.leb.readUleb128(u64, header_reader);
+//
+//        const FilterId = enum(u64) {
+//            lzma2 = 0x21,
+//            _,
+//        };
+//
+//        const filter_id = @as(
+//            FilterId,
+//            @enumFromInt(try std.leb.readUleb128(u64, header_reader)),
+//        );
+//
+//        if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000)
+//            return error.CorruptInput;
+//
+//        if (filter_id != .lzma2)
+//            return error.Unsupported;
+//
+//        const properties_size = try std.leb.readUleb128(u64, header_reader);
+//        if (properties_size != 1)
+//            return error.CorruptInput;
+//
+//        // TODO: use filter properties
+//        _ = try header_reader.readByte();
+//
+//        while (block_counter.bytes_read != header_size) {
+//            if (try header_reader.readByte() != 0)
+//                return error.CorruptInput;
+//        }
+//
+//        const hash_a = header_hasher.hasher.final();
+//        const hash_b = try header_reader.readInt(u32, .little);
+//        if (hash_a != hash_b)
+//            return error.WrongChecksum;
+//    }
+//
+//    // Compressed Data
+//    var packed_counter = std.io.countingReader(block_reader);
+//    try lzma2.decompress(
+//        self.allocator,
+//        packed_counter.reader(),
+//        self.to_read.writer(self.allocator),
+//    );
+//
+//    if (packed_size) |s| {
+//        if (s != packed_counter.bytes_read)
+//            return error.CorruptInput;
+//    }
+//
+//    const unpacked_bytes = self.to_read.items;
+//    if (unpacked_size) |s| {
+//        if (s != unpacked_bytes.len)
+//            return error.CorruptInput;
+//    }
+//
+//    // Block Padding
+//    while (block_counter.bytes_read % 4 != 0) {
+//        if (try block_reader.readByte() != 0)
+//            return error.CorruptInput;
+//    }
+//
+//    switch (self.check) {
+//        .none => {},
+//        .crc32 => {
+//            const hash_a = Crc32.hash(unpacked_bytes);
+//            const hash_b = try self.inner_reader.readInt(u32, .little);
+//            if (hash_a != hash_b)
+//                return error.WrongChecksum;
+//        },
+//        .crc64 => {
+//            const hash_a = Crc64.hash(unpacked_bytes);
+//            const hash_b = try self.inner_reader.readInt(u64, .little);
+//            if (hash_a != hash_b)
+//                return error.WrongChecksum;
+//        },
+//        .sha256 => {
+//            var hash_a: [Sha256.digest_length]u8 = undefined;
+//            Sha256.hash(unpacked_bytes, &hash_a, .{});
+//
+//            var hash_b: [Sha256.digest_length]u8 = undefined;
+//            try self.inner_reader.readNoEof(&hash_b);
+//
+//            if (!std.mem.eql(u8, &hash_a, &hash_b))
+//                return error.WrongChecksum;
+//        },
+//        else => return error.Unsupported,
+//    }
+//
+//    self.block_count += 1;
+//}
author	Andrew Kelley <andrew@ziglang.org>	2025-08-25 18:03:48 -0700
committer	Andrew Kelley <andrew@ziglang.org>	2025-08-26 21:00:58 -0700
commit	58e60697e2930f4311ae9e744ae1c2877e0b69ed (patch)
tree	fd84142e826d2252f23eaae9b002ae0d3f43e341 /lib/std
parent	6464e0d4fc9937e154c34567891bae84c63732b9 (diff)
download	zig-58e60697e2930f4311ae9e744ae1c2877e0b69ed.tar.gz zig-58e60697e2930f4311ae9e744ae1c2877e0b69ed.zip