diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2025-08-22 14:03:44 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2025-08-26 21:00:58 -0700 |
| commit | ea0ce7afb59d7c7ed33f707f3fea3e0babd785b6 (patch) | |
| tree | b77bdcd8b4ad1b484d15e84ed4123a0382b4a024 | |
| parent | 9399fcddce0bcd8e987b053f3946aa1b0ff2ef0a (diff) | |
| download | zig-ea0ce7afb59d7c7ed33f707f3fea3e0babd785b6.tar.gz zig-ea0ce7afb59d7c7ed33f707f3fea3e0babd785b6.zip | |
std.compress: flatten lzma and lzma2 namespaces
| -rw-r--r-- | lib/std/compress/lzma.zig | 726 | ||||
| -rw-r--r-- | lib/std/compress/lzma/decode.zig | 379 | ||||
| -rw-r--r-- | lib/std/compress/lzma/decode/lzbuffer.zig | 228 | ||||
| -rw-r--r-- | lib/std/compress/lzma/decode/rangecoder.zig | 181 | ||||
| -rw-r--r-- | lib/std/compress/lzma/vec2d.zig | 128 | ||||
| -rw-r--r-- | lib/std/compress/lzma2.zig | 268 | ||||
| -rw-r--r-- | lib/std/compress/lzma2/decode.zig | 169 |
7 files changed, 981 insertions, 1098 deletions
diff --git a/lib/std/compress/lzma.zig b/lib/std/compress/lzma.zig index 5d072f5381..c40eeeb56e 100644 --- a/lib/std/compress/lzma.zig +++ b/lib/std/compress/lzma.zig @@ -2,8 +2,713 @@ const std = @import("../std.zig"); const math = std.math; const mem = std.mem; const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const ArrayList = std.ArrayList; -pub const decode = @import("lzma/decode.zig"); +pub const RangeDecoder = struct { + range: u32, + code: u32, + + pub fn init(reader: anytype) !RangeDecoder { + const reserved = try reader.readByte(); + if (reserved != 0) { + return error.CorruptInput; + } + return RangeDecoder{ + .range = 0xFFFF_FFFF, + .code = try reader.readInt(u32, .big), + }; + } + + pub fn fromParts( + range: u32, + code: u32, + ) RangeDecoder { + return .{ + .range = range, + .code = code, + }; + } + + pub fn set(self: *RangeDecoder, range: u32, code: u32) void { + self.range = range; + self.code = code; + } + + pub inline fn isFinished(self: RangeDecoder) bool { + return self.code == 0; + } + + inline fn normalize(self: *RangeDecoder, reader: anytype) !void { + if (self.range < 0x0100_0000) { + self.range <<= 8; + self.code = (self.code << 8) ^ @as(u32, try reader.readByte()); + } + } + + inline fn getBit(self: *RangeDecoder, reader: anytype) !bool { + self.range >>= 1; + + const bit = self.code >= self.range; + if (bit) + self.code -= self.range; + + try self.normalize(reader); + return bit; + } + + pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 { + var result: u32 = 0; + var i: usize = 0; + while (i < count) : (i += 1) + result = (result << 1) ^ @intFromBool(try self.getBit(reader)); + return result; + } + + pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool { + const bound = (self.range >> 11) * prob.*; + + if (self.code < bound) { + if (update) + prob.* += (0x800 - prob.*) >> 5; + self.range = bound; + + try self.normalize(reader); + return false; + } else { + if (update) + prob.* -= prob.* >> 5; + self.code -= bound; + self.range -= bound; + + try self.normalize(reader); + return true; + } + } + + fn parseBitTree( + self: *RangeDecoder, + reader: anytype, + num_bits: u5, + probs: []u16, + update: bool, + ) !u32 { + var tmp: u32 = 1; + var i: @TypeOf(num_bits) = 0; + while (i < num_bits) : (i += 1) { + const bit = try self.decodeBit(reader, &probs[tmp], update); + tmp = (tmp << 1) ^ @intFromBool(bit); + } + return tmp - (@as(u32, 1) << num_bits); + } + + pub fn parseReverseBitTree( + self: *RangeDecoder, + reader: anytype, + num_bits: u5, + probs: []u16, + offset: usize, + update: bool, + ) !u32 { + var result: u32 = 0; + var tmp: usize = 1; + var i: @TypeOf(num_bits) = 0; + while (i < num_bits) : (i += 1) { + const bit = @intFromBool(try self.decodeBit(reader, &probs[offset + tmp], update)); + tmp = (tmp << 1) ^ bit; + result ^= @as(u32, bit) << i; + } + return result; + } +}; + +pub const Decode = struct { + lzma_props: Properties, + unpacked_size: ?u64, + literal_probs: Vec2d, + pos_slot_decoder: [4]BitTree(6), + align_decoder: BitTree(4), + pos_decoders: [115]u16, + is_match: [192]u16, + is_rep: [12]u16, + is_rep_g0: [12]u16, + is_rep_g1: [12]u16, + is_rep_g2: [12]u16, + is_rep_0long: [192]u16, + state: usize, + rep: [4]usize, + len_decoder: LenDecoder, + rep_len_decoder: LenDecoder, + + pub fn init( + allocator: Allocator, + lzma_props: Properties, + unpacked_size: ?u64, + ) !Decode { + return .{ + .lzma_props = lzma_props, + .unpacked_size = unpacked_size, + .literal_probs = try Vec2d.init(allocator, 0x400, .{ @as(usize, 1) << (lzma_props.lc + lzma_props.lp), 0x300 }), + .pos_slot_decoder = @splat(.{}), + .align_decoder = .{}, + .pos_decoders = @splat(0x400), + .is_match = @splat(0x400), + .is_rep = @splat(0x400), + .is_rep_g0 = @splat(0x400), + .is_rep_g1 = @splat(0x400), + .is_rep_g2 = @splat(0x400), + .is_rep_0long = @splat(0x400), + .state = 0, + .rep = @splat(0), + .len_decoder = .{}, + .rep_len_decoder = .{}, + }; + } + + pub fn deinit(self: *Decode, allocator: Allocator) void { + self.literal_probs.deinit(allocator); + self.* = undefined; + } + + pub fn resetState(self: *Decode, allocator: Allocator, new_props: Properties) !void { + new_props.validate(); + if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) { + self.literal_probs.fill(0x400); + } else { + self.literal_probs.deinit(allocator); + self.literal_probs = try Vec2d.init(allocator, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 }); + } + + self.lzma_props = new_props; + for (&self.pos_slot_decoder) |*t| t.reset(); + self.align_decoder.reset(); + self.pos_decoders = @splat(0x400); + self.is_match = @splat(0x400); + self.is_rep = @splat(0x400); + self.is_rep_g0 = @splat(0x400); + self.is_rep_g1 = @splat(0x400); + self.is_rep_g2 = @splat(0x400); + self.is_rep_0long = @splat(0x400); + self.state = 0; + self.rep = @splat(0); + self.len_decoder.reset(); + self.rep_len_decoder.reset(); + } + + fn processNextInner( + self: *Decode, + allocator: Allocator, + reader: anytype, + writer: anytype, + buffer: anytype, + decoder: *RangeDecoder, + update: bool, + ) !ProcessingStatus { + const pos_state = buffer.len & ((@as(usize, 1) << self.lzma_props.pb) - 1); + + if (!try decoder.decodeBit( + reader, + &self.is_match[(self.state << 4) + pos_state], + update, + )) { + const byte: u8 = try self.decodeLiteral(reader, buffer, decoder, update); + + if (update) { + try buffer.appendLiteral(allocator, byte, writer); + + self.state = if (self.state < 4) + 0 + else if (self.state < 10) + self.state - 3 + else + self.state - 6; + } + return .continue_; + } + + var len: usize = undefined; + if (try decoder.decodeBit(reader, &self.is_rep[self.state], update)) { + if (!try decoder.decodeBit(reader, &self.is_rep_g0[self.state], update)) { + if (!try decoder.decodeBit( + reader, + &self.is_rep_0long[(self.state << 4) + pos_state], + update, + )) { + if (update) { + self.state = if (self.state < 7) 9 else 11; + const dist = self.rep[0] + 1; + try buffer.appendLz(allocator, 1, dist, writer); + } + return .continue_; + } + } else { + const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state], update)) + 1 + else if (!try decoder.decodeBit(reader, &self.is_rep_g2[self.state], update)) + 2 + else + 3; + if (update) { + const dist = self.rep[idx]; + var i = idx; + while (i > 0) : (i -= 1) { + self.rep[i] = self.rep[i - 1]; + } + self.rep[0] = dist; + } + } + + len = try self.rep_len_decoder.decode(reader, decoder, pos_state, update); + + if (update) { + self.state = if (self.state < 7) 8 else 11; + } + } else { + if (update) { + self.rep[3] = self.rep[2]; + self.rep[2] = self.rep[1]; + self.rep[1] = self.rep[0]; + } + + len = try self.len_decoder.decode(reader, decoder, pos_state, update); + + if (update) { + self.state = if (self.state < 7) 7 else 10; + } + + const rep_0 = try self.decodeDistance(reader, decoder, len, update); + + if (update) { + self.rep[0] = rep_0; + if (self.rep[0] == 0xFFFF_FFFF) { + if (decoder.isFinished()) { + return .finished; + } + return error.CorruptInput; + } + } + } + + if (update) { + len += 2; + + const dist = self.rep[0] + 1; + try buffer.appendLz(allocator, len, dist, writer); + } + + return .continue_; + } + + fn processNext( + self: *Decode, + allocator: Allocator, + reader: anytype, + writer: anytype, + buffer: anytype, + decoder: *RangeDecoder, + ) !ProcessingStatus { + return self.processNextInner(allocator, reader, writer, buffer, decoder, true); + } + + pub fn process( + self: *Decode, + allocator: Allocator, + reader: anytype, + writer: anytype, + buffer: anytype, + decoder: *RangeDecoder, + ) !ProcessingStatus { + process_next: { + if (self.unpacked_size) |unpacked_size| { + if (buffer.len >= unpacked_size) { + break :process_next; + } + } else if (decoder.isFinished()) { + break :process_next; + } + + switch (try self.processNext(allocator, reader, writer, buffer, decoder)) { + .continue_ => return .continue_, + .finished => break :process_next, + } + } + + if (self.unpacked_size) |unpacked_size| { + if (buffer.len != unpacked_size) { + return error.CorruptInput; + } + } + + return .finished; + } + + fn decodeLiteral( + self: *Decode, + reader: anytype, + buffer: anytype, + decoder: *RangeDecoder, + update: bool, + ) !u8 { + const def_prev_byte = 0; + const prev_byte = @as(usize, buffer.lastOr(def_prev_byte)); + + var result: usize = 1; + const lit_state = ((buffer.len & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) + + (prev_byte >> (8 - self.lzma_props.lc)); + const probs = try self.literal_probs.getMut(lit_state); + + if (self.state >= 7) { + var match_byte = @as(usize, try buffer.lastN(self.rep[0] + 1)); + + while (result < 0x100) { + const match_bit = (match_byte >> 7) & 1; + match_byte <<= 1; + const bit = @intFromBool(try decoder.decodeBit( + reader, + &probs[((@as(usize, 1) + match_bit) << 8) + result], + update, + )); + result = (result << 1) ^ bit; + if (match_bit != bit) { + break; + } + } + } + + while (result < 0x100) { + result = (result << 1) ^ @intFromBool(try decoder.decodeBit(reader, &probs[result], update)); + } + + return @as(u8, @truncate(result - 0x100)); + } + + fn decodeDistance( + self: *Decode, + reader: anytype, + decoder: *RangeDecoder, + length: usize, + update: bool, + ) !usize { + const len_state = if (length > 3) 3 else length; + + const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(reader, decoder, update)); + if (pos_slot < 4) + return pos_slot; + + const num_direct_bits = @as(u5, @intCast((pos_slot >> 1) - 1)); + var result = (2 ^ (pos_slot & 1)) << num_direct_bits; + + if (pos_slot < 14) { + result += try decoder.parseReverseBitTree( + reader, + num_direct_bits, + &self.pos_decoders, + result - pos_slot, + update, + ); + } else { + result += @as(usize, try decoder.get(reader, num_direct_bits - 4)) << 4; + result += try self.align_decoder.parseReverse(reader, decoder, update); + } + + return result; + } + + /// A circular buffer for LZ sequences + pub const LzCircularBuffer = struct { + /// Circular buffer + buf: ArrayList(u8), + + /// Length of the buffer + dict_size: usize, + + /// Buffer memory limit + memlimit: usize, + + /// Current position + cursor: usize, + + /// Total number of bytes sent through the buffer + len: usize, + + const Self = @This(); + + pub fn init(dict_size: usize, memlimit: usize) Self { + return Self{ + .buf = .{}, + .dict_size = dict_size, + .memlimit = memlimit, + .cursor = 0, + .len = 0, + }; + } + + pub fn get(self: Self, index: usize) u8 { + return if (0 <= index and index < self.buf.items.len) + self.buf.items[index] + else + 0; + } + + pub fn set(self: *Self, allocator: Allocator, index: usize, value: u8) !void { + if (index >= self.memlimit) { + return error.CorruptInput; + } + try self.buf.ensureTotalCapacity(allocator, index + 1); + while (self.buf.items.len < index) { + self.buf.appendAssumeCapacity(0); + } + self.buf.appendAssumeCapacity(value); + } + + /// Retrieve the last byte or return a default + pub fn lastOr(self: Self, lit: u8) u8 { + return if (self.len == 0) + lit + else + self.get((self.dict_size + self.cursor - 1) % self.dict_size); + } + + /// Retrieve the n-th last byte + pub fn lastN(self: Self, dist: usize) !u8 { + if (dist > self.dict_size or dist > self.len) { + return error.CorruptInput; + } + + const offset = (self.dict_size + self.cursor - dist) % self.dict_size; + return self.get(offset); + } + + /// Append a literal + pub fn appendLiteral( + self: *Self, + allocator: Allocator, + lit: u8, + writer: anytype, + ) !void { + try self.set(allocator, self.cursor, lit); + self.cursor += 1; + self.len += 1; + + // Flush the circular buffer to the output + if (self.cursor == self.dict_size) { + try writer.writeAll(self.buf.items); + self.cursor = 0; + } + } + + /// Fetch an LZ sequence (length, distance) from inside the buffer + pub fn appendLz( + self: *Self, + allocator: Allocator, + len: usize, + dist: usize, + writer: anytype, + ) !void { + if (dist > self.dict_size or dist > self.len) { + return error.CorruptInput; + } + + var offset = (self.dict_size + self.cursor - dist) % self.dict_size; + var i: usize = 0; + while (i < len) : (i += 1) { + const x = self.get(offset); + try self.appendLiteral(allocator, x, writer); + offset += 1; + if (offset == self.dict_size) { + offset = 0; + } + } + } + + pub fn finish(self: *Self, writer: anytype) !void { + if (self.cursor > 0) { + try writer.writeAll(self.buf.items[0..self.cursor]); + self.cursor = 0; + } + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + self.buf.deinit(allocator); + self.* = undefined; + } + }; + + pub fn BitTree(comptime num_bits: usize) type { + return struct { + probs: [1 << num_bits]u16 = @splat(0x400), + + const Self = @This(); + + pub fn parse( + self: *Self, + reader: anytype, + decoder: *RangeDecoder, + update: bool, + ) !u32 { + return decoder.parseBitTree(reader, num_bits, &self.probs, update); + } + + pub fn parseReverse( + self: *Self, + reader: anytype, + decoder: *RangeDecoder, + update: bool, + ) !u32 { + return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update); + } + + pub fn reset(self: *Self) void { + @memset(&self.probs, 0x400); + } + }; + } + + pub const LenDecoder = struct { + choice: u16 = 0x400, + choice2: u16 = 0x400, + low_coder: [16]BitTree(3) = @splat(.{}), + mid_coder: [16]BitTree(3) = @splat(.{}), + high_coder: BitTree(8) = .{}, + + pub fn decode( + self: *LenDecoder, + reader: anytype, + decoder: *RangeDecoder, + pos_state: usize, + update: bool, + ) !usize { + if (!try decoder.decodeBit(reader, &self.choice, update)) { + return @as(usize, try self.low_coder[pos_state].parse(reader, decoder, update)); + } else if (!try decoder.decodeBit(reader, &self.choice2, update)) { + return @as(usize, try self.mid_coder[pos_state].parse(reader, decoder, update)) + 8; + } else { + return @as(usize, try self.high_coder.parse(reader, decoder, update)) + 16; + } + } + + pub fn reset(self: *LenDecoder) void { + self.choice = 0x400; + self.choice2 = 0x400; + for (&self.low_coder) |*t| t.reset(); + for (&self.mid_coder) |*t| t.reset(); + self.high_coder.reset(); + } + }; + + pub const Vec2d = struct { + data: []u16, + cols: usize, + + const Self = @This(); + + pub fn init(allocator: Allocator, value: u16, size: struct { usize, usize }) !Self { + const len = try math.mul(usize, size[0], size[1]); + const data = try allocator.alloc(u16, len); + @memset(data, value); + return Self{ + .data = data, + .cols = size[1], + }; + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + allocator.free(self.data); + self.* = undefined; + } + + pub fn fill(self: *Self, value: u16) void { + @memset(self.data, value); + } + + inline fn _get(self: Self, row: usize) ![]u16 { + const start_row = try math.mul(usize, row, self.cols); + const end_row = try math.add(usize, start_row, self.cols); + return self.data[start_row..end_row]; + } + + pub fn get(self: Self, row: usize) ![]const u16 { + return self._get(row); + } + + pub fn getMut(self: *Self, row: usize) ![]u16 { + return self._get(row); + } + }; + + pub const Options = struct { + unpacked_size: UnpackedSize = .read_from_header, + memlimit: ?usize = null, + allow_incomplete: bool = false, + }; + + pub const UnpackedSize = union(enum) { + read_from_header, + read_header_but_use_provided: ?u64, + use_provided: ?u64, + }; + + const ProcessingStatus = enum { + continue_, + finished, + }; + + pub const Properties = struct { + lc: u4, + lp: u3, + pb: u3, + + fn validate(self: Properties) void { + assert(self.lc <= 8); + assert(self.lp <= 4); + assert(self.pb <= 4); + } + }; + + pub const Params = struct { + properties: Properties, + dict_size: u32, + unpacked_size: ?u64, + + pub fn readHeader(reader: anytype, options: Options) !Params { + var props = try reader.readByte(); + if (props >= 225) { + return error.CorruptInput; + } + + const lc = @as(u4, @intCast(props % 9)); + props /= 9; + const lp = @as(u3, @intCast(props % 5)); + props /= 5; + const pb = @as(u3, @intCast(props)); + + const dict_size_provided = try reader.readInt(u32, .little); + const dict_size = @max(0x1000, dict_size_provided); + + const unpacked_size = switch (options.unpacked_size) { + .read_from_header => blk: { + const unpacked_size_provided = try reader.readInt(u64, .little); + const marker_mandatory = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF; + break :blk if (marker_mandatory) + null + else + unpacked_size_provided; + }, + .read_header_but_use_provided => |x| blk: { + _ = try reader.readInt(u64, .little); + break :blk x; + }, + .use_provided => |x| x, + }; + + return Params{ + .properties = Properties{ .lc = lc, .lp = lp, .pb = pb }, + .dict_size = dict_size, + .unpacked_size = unpacked_size, + }; + } + }; +}; pub fn decompress( allocator: Allocator, @@ -15,9 +720,9 @@ pub fn decompress( pub fn decompressWithOptions( allocator: Allocator, reader: anytype, - options: decode.Options, + options: Decode.Options, ) !Decompress(@TypeOf(reader)) { - const params = try decode.Params.readHeader(reader, options); + const params = try Decode.Params.readHeader(reader, options); return Decompress(@TypeOf(reader)).init(allocator, reader, params, options.memlimit); } @@ -36,19 +741,19 @@ pub fn Decompress(comptime ReaderType: type) type { in_reader: ReaderType, to_read: std.ArrayListUnmanaged(u8), - buffer: decode.lzbuffer.LzCircularBuffer, - decoder: decode.rangecoder.RangeDecoder, - state: decode.DecoderState, + buffer: Decode.LzCircularBuffer, + decoder: RangeDecoder, + state: Decode, - pub fn init(allocator: Allocator, source: ReaderType, params: decode.Params, memlimit: ?usize) !Self { + pub fn init(allocator: Allocator, source: ReaderType, params: Decode.Params, memlimit: ?usize) !Self { return Self{ .allocator = allocator, .in_reader = source, .to_read = .{}, - .buffer = decode.lzbuffer.LzCircularBuffer.init(params.dict_size, memlimit orelse math.maxInt(usize)), - .decoder = try decode.rangecoder.RangeDecoder.init(source), - .state = try decode.DecoderState.init(allocator, params.properties, params.unpacked_size), + .buffer = Decode.LzCircularBuffer.init(params.dict_size, memlimit orelse math.maxInt(usize)), + .decoder = try RangeDecoder.init(source), + .state = try Decode.init(allocator, params.properties, params.unpacked_size), }; } @@ -86,5 +791,4 @@ pub fn Decompress(comptime ReaderType: type) type { test { _ = @import("lzma/test.zig"); - _ = @import("lzma/vec2d.zig"); } diff --git a/lib/std/compress/lzma/decode.zig b/lib/std/compress/lzma/decode.zig deleted file mode 100644 index ac45eb52b3..0000000000 --- a/lib/std/compress/lzma/decode.zig +++ /dev/null @@ -1,379 +0,0 @@ -const std = @import("../../std.zig"); -const assert = std.debug.assert; -const math = std.math; -const Allocator = std.mem.Allocator; - -pub const lzbuffer = @import("decode/lzbuffer.zig"); -pub const rangecoder = @import("decode/rangecoder.zig"); - -const LzCircularBuffer = lzbuffer.LzCircularBuffer; -const BitTree = rangecoder.BitTree; -const LenDecoder = rangecoder.LenDecoder; -const RangeDecoder = rangecoder.RangeDecoder; -const Vec2D = @import("vec2d.zig").Vec2D; - -pub const Options = struct { - unpacked_size: UnpackedSize = .read_from_header, - memlimit: ?usize = null, - allow_incomplete: bool = false, -}; - -pub const UnpackedSize = union(enum) { - read_from_header, - read_header_but_use_provided: ?u64, - use_provided: ?u64, -}; - -const ProcessingStatus = enum { - continue_, - finished, -}; - -pub const Properties = struct { - lc: u4, - lp: u3, - pb: u3, - - fn validate(self: Properties) void { - assert(self.lc <= 8); - assert(self.lp <= 4); - assert(self.pb <= 4); - } -}; - -pub const Params = struct { - properties: Properties, - dict_size: u32, - unpacked_size: ?u64, - - pub fn readHeader(reader: anytype, options: Options) !Params { - var props = try reader.readByte(); - if (props >= 225) { - return error.CorruptInput; - } - - const lc = @as(u4, @intCast(props % 9)); - props /= 9; - const lp = @as(u3, @intCast(props % 5)); - props /= 5; - const pb = @as(u3, @intCast(props)); - - const dict_size_provided = try reader.readInt(u32, .little); - const dict_size = @max(0x1000, dict_size_provided); - - const unpacked_size = switch (options.unpacked_size) { - .read_from_header => blk: { - const unpacked_size_provided = try reader.readInt(u64, .little); - const marker_mandatory = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF; - break :blk if (marker_mandatory) - null - else - unpacked_size_provided; - }, - .read_header_but_use_provided => |x| blk: { - _ = try reader.readInt(u64, .little); - break :blk x; - }, - .use_provided => |x| x, - }; - - return Params{ - .properties = Properties{ .lc = lc, .lp = lp, .pb = pb }, - .dict_size = dict_size, - .unpacked_size = unpacked_size, - }; - } -}; - -pub const DecoderState = struct { - lzma_props: Properties, - unpacked_size: ?u64, - literal_probs: Vec2D(u16), - pos_slot_decoder: [4]BitTree(6), - align_decoder: BitTree(4), - pos_decoders: [115]u16, - is_match: [192]u16, - is_rep: [12]u16, - is_rep_g0: [12]u16, - is_rep_g1: [12]u16, - is_rep_g2: [12]u16, - is_rep_0long: [192]u16, - state: usize, - rep: [4]usize, - len_decoder: LenDecoder, - rep_len_decoder: LenDecoder, - - pub fn init( - allocator: Allocator, - lzma_props: Properties, - unpacked_size: ?u64, - ) !DecoderState { - return .{ - .lzma_props = lzma_props, - .unpacked_size = unpacked_size, - .literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (lzma_props.lc + lzma_props.lp), 0x300 }), - .pos_slot_decoder = @splat(.{}), - .align_decoder = .{}, - .pos_decoders = @splat(0x400), - .is_match = @splat(0x400), - .is_rep = @splat(0x400), - .is_rep_g0 = @splat(0x400), - .is_rep_g1 = @splat(0x400), - .is_rep_g2 = @splat(0x400), - .is_rep_0long = @splat(0x400), - .state = 0, - .rep = @splat(0), - .len_decoder = .{}, - .rep_len_decoder = .{}, - }; - } - - pub fn deinit(self: *DecoderState, allocator: Allocator) void { - self.literal_probs.deinit(allocator); - self.* = undefined; - } - - pub fn resetState(self: *DecoderState, allocator: Allocator, new_props: Properties) !void { - new_props.validate(); - if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) { - self.literal_probs.fill(0x400); - } else { - self.literal_probs.deinit(allocator); - self.literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 }); - } - - self.lzma_props = new_props; - for (&self.pos_slot_decoder) |*t| t.reset(); - self.align_decoder.reset(); - self.pos_decoders = @splat(0x400); - self.is_match = @splat(0x400); - self.is_rep = @splat(0x400); - self.is_rep_g0 = @splat(0x400); - self.is_rep_g1 = @splat(0x400); - self.is_rep_g2 = @splat(0x400); - self.is_rep_0long = @splat(0x400); - self.state = 0; - self.rep = @splat(0); - self.len_decoder.reset(); - self.rep_len_decoder.reset(); - } - - fn processNextInner( - self: *DecoderState, - allocator: Allocator, - reader: anytype, - writer: anytype, - buffer: anytype, - decoder: *RangeDecoder, - update: bool, - ) !ProcessingStatus { - const pos_state = buffer.len & ((@as(usize, 1) << self.lzma_props.pb) - 1); - - if (!try decoder.decodeBit( - reader, - &self.is_match[(self.state << 4) + pos_state], - update, - )) { - const byte: u8 = try self.decodeLiteral(reader, buffer, decoder, update); - - if (update) { - try buffer.appendLiteral(allocator, byte, writer); - - self.state = if (self.state < 4) - 0 - else if (self.state < 10) - self.state - 3 - else - self.state - 6; - } - return .continue_; - } - - var len: usize = undefined; - if (try decoder.decodeBit(reader, &self.is_rep[self.state], update)) { - if (!try decoder.decodeBit(reader, &self.is_rep_g0[self.state], update)) { - if (!try decoder.decodeBit( - reader, - &self.is_rep_0long[(self.state << 4) + pos_state], - update, - )) { - if (update) { - self.state = if (self.state < 7) 9 else 11; - const dist = self.rep[0] + 1; - try buffer.appendLz(allocator, 1, dist, writer); - } - return .continue_; - } - } else { - const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state], update)) - 1 - else if (!try decoder.decodeBit(reader, &self.is_rep_g2[self.state], update)) - 2 - else - 3; - if (update) { - const dist = self.rep[idx]; - var i = idx; - while (i > 0) : (i -= 1) { - self.rep[i] = self.rep[i - 1]; - } - self.rep[0] = dist; - } - } - - len = try self.rep_len_decoder.decode(reader, decoder, pos_state, update); - - if (update) { - self.state = if (self.state < 7) 8 else 11; - } - } else { - if (update) { - self.rep[3] = self.rep[2]; - self.rep[2] = self.rep[1]; - self.rep[1] = self.rep[0]; - } - - len = try self.len_decoder.decode(reader, decoder, pos_state, update); - - if (update) { - self.state = if (self.state < 7) 7 else 10; - } - - const rep_0 = try self.decodeDistance(reader, decoder, len, update); - - if (update) { - self.rep[0] = rep_0; - if (self.rep[0] == 0xFFFF_FFFF) { - if (decoder.isFinished()) { - return .finished; - } - return error.CorruptInput; - } - } - } - - if (update) { - len += 2; - - const dist = self.rep[0] + 1; - try buffer.appendLz(allocator, len, dist, writer); - } - - return .continue_; - } - - fn processNext( - self: *DecoderState, - allocator: Allocator, - reader: anytype, - writer: anytype, - buffer: anytype, - decoder: *RangeDecoder, - ) !ProcessingStatus { - return self.processNextInner(allocator, reader, writer, buffer, decoder, true); - } - - pub fn process( - self: *DecoderState, - allocator: Allocator, - reader: anytype, - writer: anytype, - buffer: anytype, - decoder: *RangeDecoder, - ) !ProcessingStatus { - process_next: { - if (self.unpacked_size) |unpacked_size| { - if (buffer.len >= unpacked_size) { - break :process_next; - } - } else if (decoder.isFinished()) { - break :process_next; - } - - switch (try self.processNext(allocator, reader, writer, buffer, decoder)) { - .continue_ => return .continue_, - .finished => break :process_next, - } - } - - if (self.unpacked_size) |unpacked_size| { - if (buffer.len != unpacked_size) { - return error.CorruptInput; - } - } - - return .finished; - } - - fn decodeLiteral( - self: *DecoderState, - reader: anytype, - buffer: anytype, - decoder: *RangeDecoder, - update: bool, - ) !u8 { - const def_prev_byte = 0; - const prev_byte = @as(usize, buffer.lastOr(def_prev_byte)); - - var result: usize = 1; - const lit_state = ((buffer.len & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) + - (prev_byte >> (8 - self.lzma_props.lc)); - const probs = try self.literal_probs.getMut(lit_state); - - if (self.state >= 7) { - var match_byte = @as(usize, try buffer.lastN(self.rep[0] + 1)); - - while (result < 0x100) { - const match_bit = (match_byte >> 7) & 1; - match_byte <<= 1; - const bit = @intFromBool(try decoder.decodeBit( - reader, - &probs[((@as(usize, 1) + match_bit) << 8) + result], - update, - )); - result = (result << 1) ^ bit; - if (match_bit != bit) { - break; - } - } - } - - while (result < 0x100) { - result = (result << 1) ^ @intFromBool(try decoder.decodeBit(reader, &probs[result], update)); - } - - return @as(u8, @truncate(result - 0x100)); - } - - fn decodeDistance( - self: *DecoderState, - reader: anytype, - decoder: *RangeDecoder, - length: usize, - update: bool, - ) !usize { - const len_state = if (length > 3) 3 else length; - - const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(reader, decoder, update)); - if (pos_slot < 4) - return pos_slot; - - const num_direct_bits = @as(u5, @intCast((pos_slot >> 1) - 1)); - var result = (2 ^ (pos_slot & 1)) << num_direct_bits; - - if (pos_slot < 14) { - result += try decoder.parseReverseBitTree( - reader, - num_direct_bits, - &self.pos_decoders, - result - pos_slot, - update, - ); - } else { - result += @as(usize, try decoder.get(reader, num_direct_bits - 4)) << 4; - result += try self.align_decoder.parseReverse(reader, decoder, update); - } - - return result; - } -}; diff --git a/lib/std/compress/lzma/decode/lzbuffer.zig b/lib/std/compress/lzma/decode/lzbuffer.zig deleted file mode 100644 index 80c470c5f9..0000000000 --- a/lib/std/compress/lzma/decode/lzbuffer.zig +++ /dev/null @@ -1,228 +0,0 @@ -const std = @import("../../../std.zig"); -const math = std.math; -const mem = std.mem; -const Allocator = std.mem.Allocator; -const ArrayListUnmanaged = std.ArrayListUnmanaged; - -/// An accumulating buffer for LZ sequences -pub const LzAccumBuffer = struct { - /// Buffer - buf: ArrayListUnmanaged(u8), - - /// Buffer memory limit - memlimit: usize, - - /// Total number of bytes sent through the buffer - len: usize, - - const Self = @This(); - - pub fn init(memlimit: usize) Self { - return Self{ - .buf = .{}, - .memlimit = memlimit, - .len = 0, - }; - } - - pub fn appendByte(self: *Self, allocator: Allocator, byte: u8) !void { - try self.buf.append(allocator, byte); - self.len += 1; - } - - /// Reset the internal dictionary - pub fn reset(self: *Self, writer: anytype) !void { - try writer.writeAll(self.buf.items); - self.buf.clearRetainingCapacity(); - self.len = 0; - } - - /// Retrieve the last byte or return a default - pub fn lastOr(self: Self, lit: u8) u8 { - const buf_len = self.buf.items.len; - return if (buf_len == 0) - lit - else - self.buf.items[buf_len - 1]; - } - - /// Retrieve the n-th last byte - pub fn lastN(self: Self, dist: usize) !u8 { - const buf_len = self.buf.items.len; - if (dist > buf_len) { - return error.CorruptInput; - } - - return self.buf.items[buf_len - dist]; - } - - /// Append a literal - pub fn appendLiteral( - self: *Self, - allocator: Allocator, - lit: u8, - writer: anytype, - ) !void { - _ = writer; - if (self.len >= self.memlimit) { - return error.CorruptInput; - } - try self.buf.append(allocator, lit); - self.len += 1; - } - - /// Fetch an LZ sequence (length, distance) from inside the buffer - pub fn appendLz( - self: *Self, - allocator: Allocator, - len: usize, - dist: usize, - writer: anytype, - ) !void { - _ = writer; - - const buf_len = self.buf.items.len; - if (dist > buf_len) { - return error.CorruptInput; - } - - var offset = buf_len - dist; - var i: usize = 0; - while (i < len) : (i += 1) { - const x = self.buf.items[offset]; - try self.buf.append(allocator, x); - offset += 1; - } - self.len += len; - } - - pub fn finish(self: *Self, writer: anytype) !void { - try writer.writeAll(self.buf.items); - self.buf.clearRetainingCapacity(); - } - - pub fn deinit(self: *Self, allocator: Allocator) void { - self.buf.deinit(allocator); - self.* = undefined; - } -}; - -/// A circular buffer for LZ sequences -pub const LzCircularBuffer = struct { - /// Circular buffer - buf: ArrayListUnmanaged(u8), - - /// Length of the buffer - dict_size: usize, - - /// Buffer memory limit - memlimit: usize, - - /// Current position - cursor: usize, - - /// Total number of bytes sent through the buffer - len: usize, - - const Self = @This(); - - pub fn init(dict_size: usize, memlimit: usize) Self { - return Self{ - .buf = .{}, - .dict_size = dict_size, - .memlimit = memlimit, - .cursor = 0, - .len = 0, - }; - } - - pub fn get(self: Self, index: usize) u8 { - return if (0 <= index and index < self.buf.items.len) - self.buf.items[index] - else - 0; - } - - pub fn set(self: *Self, allocator: Allocator, index: usize, value: u8) !void { - if (index >= self.memlimit) { - return error.CorruptInput; - } - try self.buf.ensureTotalCapacity(allocator, index + 1); - while (self.buf.items.len < index) { - self.buf.appendAssumeCapacity(0); - } - self.buf.appendAssumeCapacity(value); - } - - /// Retrieve the last byte or return a default - pub fn lastOr(self: Self, lit: u8) u8 { - return if (self.len == 0) - lit - else - self.get((self.dict_size + self.cursor - 1) % self.dict_size); - } - - /// Retrieve the n-th last byte - pub fn lastN(self: Self, dist: usize) !u8 { - if (dist > self.dict_size or dist > self.len) { - return error.CorruptInput; - } - - const offset = (self.dict_size + self.cursor - dist) % self.dict_size; - return self.get(offset); - } - - /// Append a literal - pub fn appendLiteral( - self: *Self, - allocator: Allocator, - lit: u8, - writer: anytype, - ) !void { - try self.set(allocator, self.cursor, lit); - self.cursor += 1; - self.len += 1; - - // Flush the circular buffer to the output - if (self.cursor == self.dict_size) { - try writer.writeAll(self.buf.items); - self.cursor = 0; - } - } - - /// Fetch an LZ sequence (length, distance) from inside the buffer - pub fn appendLz( - self: *Self, - allocator: Allocator, - len: usize, - dist: usize, - writer: anytype, - ) !void { - if (dist > self.dict_size or dist > self.len) { - return error.CorruptInput; - } - - var offset = (self.dict_size + self.cursor - dist) % self.dict_size; - var i: usize = 0; - while (i < len) : (i += 1) { - const x = self.get(offset); - try self.appendLiteral(allocator, x, writer); - offset += 1; - if (offset == self.dict_size) { - offset = 0; - } - } - } - - pub fn finish(self: *Self, writer: anytype) !void { - if (self.cursor > 0) { - try writer.writeAll(self.buf.items[0..self.cursor]); - self.cursor = 0; - } - } - - pub fn deinit(self: *Self, allocator: Allocator) void { - self.buf.deinit(allocator); - self.* = undefined; - } -}; diff --git a/lib/std/compress/lzma/decode/rangecoder.zig b/lib/std/compress/lzma/decode/rangecoder.zig deleted file mode 100644 index 01930884d7..0000000000 --- a/lib/std/compress/lzma/decode/rangecoder.zig +++ /dev/null @@ -1,181 +0,0 @@ -const std = @import("../../../std.zig"); -const mem = std.mem; - -pub const RangeDecoder = struct { - range: u32, - code: u32, - - pub fn init(reader: anytype) !RangeDecoder { - const reserved = try reader.readByte(); - if (reserved != 0) { - return error.CorruptInput; - } - return RangeDecoder{ - .range = 0xFFFF_FFFF, - .code = try reader.readInt(u32, .big), - }; - } - - pub fn fromParts( - range: u32, - code: u32, - ) RangeDecoder { - return .{ - .range = range, - .code = code, - }; - } - - pub fn set(self: *RangeDecoder, range: u32, code: u32) void { - self.range = range; - self.code = code; - } - - pub inline fn isFinished(self: RangeDecoder) bool { - return self.code == 0; - } - - inline fn normalize(self: *RangeDecoder, reader: anytype) !void { - if (self.range < 0x0100_0000) { - self.range <<= 8; - self.code = (self.code << 8) ^ @as(u32, try reader.readByte()); - } - } - - inline fn getBit(self: *RangeDecoder, reader: anytype) !bool { - self.range >>= 1; - - const bit = self.code >= self.range; - if (bit) - self.code -= self.range; - - try self.normalize(reader); - return bit; - } - - pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 { - var result: u32 = 0; - var i: usize = 0; - while (i < count) : (i += 1) - result = (result << 1) ^ @intFromBool(try self.getBit(reader)); - return result; - } - - pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool { - const bound = (self.range >> 11) * prob.*; - - if (self.code < bound) { - if (update) - prob.* += (0x800 - prob.*) >> 5; - self.range = bound; - - try self.normalize(reader); - return false; - } else { - if (update) - prob.* -= prob.* >> 5; - self.code -= bound; - self.range -= bound; - - try self.normalize(reader); - return true; - } - } - - fn parseBitTree( - self: *RangeDecoder, - reader: anytype, - num_bits: u5, - probs: []u16, - update: bool, - ) !u32 { - var tmp: u32 = 1; - var i: @TypeOf(num_bits) = 0; - while (i < num_bits) : (i += 1) { - const bit = try self.decodeBit(reader, &probs[tmp], update); - tmp = (tmp << 1) ^ @intFromBool(bit); - } - return tmp - (@as(u32, 1) << num_bits); - } - - pub fn parseReverseBitTree( - self: *RangeDecoder, - reader: anytype, - num_bits: u5, - probs: []u16, - offset: usize, - update: bool, - ) !u32 { - var result: u32 = 0; - var tmp: usize = 1; - var i: @TypeOf(num_bits) = 0; - while (i < num_bits) : (i += 1) { - const bit = @intFromBool(try self.decodeBit(reader, &probs[offset + tmp], update)); - tmp = (tmp << 1) ^ bit; - result ^= @as(u32, bit) << i; - } - return result; - } -}; - -pub fn BitTree(comptime num_bits: usize) type { - return struct { - probs: [1 << num_bits]u16 = @splat(0x400), - - const Self = @This(); - - pub fn parse( - self: *Self, - reader: anytype, - decoder: *RangeDecoder, - update: bool, - ) !u32 { - return decoder.parseBitTree(reader, num_bits, &self.probs, update); - } - - pub fn parseReverse( - self: *Self, - reader: anytype, - decoder: *RangeDecoder, - update: bool, - ) !u32 { - return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update); - } - - pub fn reset(self: *Self) void { - @memset(&self.probs, 0x400); - } - }; -} - -pub const LenDecoder = struct { - choice: u16 = 0x400, - choice2: u16 = 0x400, - low_coder: [16]BitTree(3) = @splat(.{}), - mid_coder: [16]BitTree(3) = @splat(.{}), - high_coder: BitTree(8) = .{}, - - pub fn decode( - self: *LenDecoder, - reader: anytype, - decoder: *RangeDecoder, - pos_state: usize, - update: bool, - ) !usize { - if (!try decoder.decodeBit(reader, &self.choice, update)) { - return @as(usize, try self.low_coder[pos_state].parse(reader, decoder, update)); - } else if (!try decoder.decodeBit(reader, &self.choice2, update)) { - return @as(usize, try self.mid_coder[pos_state].parse(reader, decoder, update)) + 8; - } else { - return @as(usize, try self.high_coder.parse(reader, decoder, update)) + 16; - } - } - - pub fn reset(self: *LenDecoder) void { - self.choice = 0x400; - self.choice2 = 0x400; - for (&self.low_coder) |*t| t.reset(); - for (&self.mid_coder) |*t| t.reset(); - self.high_coder.reset(); - } -}; diff --git a/lib/std/compress/lzma/vec2d.zig b/lib/std/compress/lzma/vec2d.zig deleted file mode 100644 index df61093b85..0000000000 --- a/lib/std/compress/lzma/vec2d.zig +++ /dev/null @@ -1,128 +0,0 @@ -const std = @import("../../std.zig"); -const math = std.math; -const mem = std.mem; -const Allocator = std.mem.Allocator; - -pub fn Vec2D(comptime T: type) type { - return struct { - data: []T, - cols: usize, - - const Self = @This(); - - pub fn init(allocator: Allocator, value: T, size: struct { usize, usize }) !Self { - const len = try math.mul(usize, size[0], size[1]); - const data = try allocator.alloc(T, len); - @memset(data, value); - return Self{ - .data = data, - .cols = size[1], - }; - } - - pub fn deinit(self: *Self, allocator: Allocator) void { - allocator.free(self.data); - self.* = undefined; - } - - pub fn fill(self: *Self, value: T) void { - @memset(self.data, value); - } - - inline fn _get(self: Self, row: usize) ![]T { - const start_row = try math.mul(usize, row, self.cols); - const end_row = try math.add(usize, start_row, self.cols); - return self.data[start_row..end_row]; - } - - pub fn get(self: Self, row: usize) ![]const T { - return self._get(row); - } - - pub fn getMut(self: *Self, row: usize) ![]T { - return self._get(row); - } - }; -} - -const testing = std.testing; -const expectEqualSlices = std.testing.expectEqualSlices; -const expectError = std.testing.expectError; - -test "init" { - const allocator = testing.allocator; - var vec2d = try Vec2D(i32).init(allocator, 1, .{ 2, 3 }); - defer vec2d.deinit(allocator); - - try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(0)); - try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(1)); -} - -test "init overflow" { - const allocator = testing.allocator; - try expectError( - error.Overflow, - Vec2D(i32).init(allocator, 1, .{ math.maxInt(usize), math.maxInt(usize) }), - ); -} - -test "fill" { - const allocator = testing.allocator; - var vec2d = try Vec2D(i32).init(allocator, 0, .{ 2, 3 }); - defer vec2d.deinit(allocator); - - vec2d.fill(7); - - try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(0)); - try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(1)); -} - -test "get" { - var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 }; - const vec2d = Vec2D(i32){ - .data = &data, - .cols = 2, - }; - - try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0)); - try expectEqualSlices(i32, &.{ 2, 3 }, try vec2d.get(1)); - try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2)); - try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3)); -} - -test "getMut" { - var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 }; - var vec2d = Vec2D(i32){ - .data = &data, - .cols = 2, - }; - - const row = try vec2d.getMut(1); - row[1] = 9; - - try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0)); - // (1, 1) should be 9. - try expectEqualSlices(i32, &.{ 2, 9 }, try vec2d.get(1)); - try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2)); - try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3)); -} - -test "get multiplication overflow" { - const allocator = testing.allocator; - var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 4 }); - defer matrix.deinit(allocator); - - const row = (math.maxInt(usize) / 4) + 1; - try expectError(error.Overflow, matrix.get(row)); - try expectError(error.Overflow, matrix.getMut(row)); -} - -test "get addition overflow" { - const allocator = testing.allocator; - var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 5 }); - defer matrix.deinit(allocator); - - const row = math.maxInt(usize) / 5; - try expectError(error.Overflow, matrix.get(row)); - try expectError(error.Overflow, matrix.getMut(row)); -} diff --git a/lib/std/compress/lzma2.zig b/lib/std/compress/lzma2.zig index 1ccfe36584..e454e6382e 100644 --- a/lib/std/compress/lzma2.zig +++ b/lib/std/compress/lzma2.zig @@ -1,14 +1,278 @@ const std = @import("../std.zig"); const Allocator = std.mem.Allocator; +const ArrayList = std.ArrayList; +const lzma = std.compress.lzma; -pub const decode = @import("lzma2/decode.zig"); +/// An accumulating buffer for LZ sequences +pub const LzAccumBuffer = struct { + /// Buffer + buf: ArrayList(u8), + + /// Buffer memory limit + memlimit: usize, + + /// Total number of bytes sent through the buffer + len: usize, + + const Self = @This(); + + pub fn init(memlimit: usize) Self { + return Self{ + .buf = .{}, + .memlimit = memlimit, + .len = 0, + }; + } + + pub fn appendByte(self: *Self, allocator: Allocator, byte: u8) !void { + try self.buf.append(allocator, byte); + self.len += 1; + } + + /// Reset the internal dictionary + pub fn reset(self: *Self, writer: anytype) !void { + try writer.writeAll(self.buf.items); + self.buf.clearRetainingCapacity(); + self.len = 0; + } + + /// Retrieve the last byte or return a default + pub fn lastOr(self: Self, lit: u8) u8 { + const buf_len = self.buf.items.len; + return if (buf_len == 0) + lit + else + self.buf.items[buf_len - 1]; + } + + /// Retrieve the n-th last byte + pub fn lastN(self: Self, dist: usize) !u8 { + const buf_len = self.buf.items.len; + if (dist > buf_len) { + return error.CorruptInput; + } + + return self.buf.items[buf_len - dist]; + } + + /// Append a literal + pub fn appendLiteral( + self: *Self, + allocator: Allocator, + lit: u8, + writer: anytype, + ) !void { + _ = writer; + if (self.len >= self.memlimit) { + return error.CorruptInput; + } + try self.buf.append(allocator, lit); + self.len += 1; + } + + /// Fetch an LZ sequence (length, distance) from inside the buffer + pub fn appendLz( + self: *Self, + allocator: Allocator, + len: usize, + dist: usize, + writer: anytype, + ) !void { + _ = writer; + + const buf_len = self.buf.items.len; + if (dist > buf_len) { + return error.CorruptInput; + } + + var offset = buf_len - dist; + var i: usize = 0; + while (i < len) : (i += 1) { + const x = self.buf.items[offset]; + try self.buf.append(allocator, x); + offset += 1; + } + self.len += len; + } + + pub fn finish(self: *Self, writer: anytype) !void { + try writer.writeAll(self.buf.items); + self.buf.clearRetainingCapacity(); + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + self.buf.deinit(allocator); + self.* = undefined; + } +}; + +pub const Decode = struct { + lzma_state: lzma.Decode, + + pub fn init(allocator: Allocator) !Decode { + return Decode{ + .lzma_state = try lzma.Decode.init( + allocator, + .{ + .lc = 0, + .lp = 0, + .pb = 0, + }, + null, + ), + }; + } + + pub fn deinit(self: *Decode, allocator: Allocator) void { + self.lzma_state.deinit(allocator); + self.* = undefined; + } + + pub fn decompress( + self: *Decode, + allocator: Allocator, + reader: anytype, + writer: anytype, + ) !void { + var accum = LzAccumBuffer.init(std.math.maxInt(usize)); + defer accum.deinit(allocator); + + while (true) { + const status = try reader.readByte(); + + switch (status) { + 0 => break, + 1 => try parseUncompressed(allocator, reader, writer, &accum, true), + 2 => try parseUncompressed(allocator, reader, writer, &accum, false), + else => try self.parseLzma(allocator, reader, writer, &accum, status), + } + } + + try accum.finish(writer); + } + + fn parseLzma( + self: *Decode, + allocator: Allocator, + reader: anytype, + writer: anytype, + accum: *LzAccumBuffer, + status: u8, + ) !void { + if (status & 0x80 == 0) { + return error.CorruptInput; + } + + const Reset = struct { + dict: bool, + state: bool, + props: bool, + }; + + const reset = switch ((status >> 5) & 0x3) { + 0 => Reset{ + .dict = false, + .state = false, + .props = false, + }, + 1 => Reset{ + .dict = false, + .state = true, + .props = false, + }, + 2 => Reset{ + .dict = false, + .state = true, + .props = true, + }, + 3 => Reset{ + .dict = true, + .state = true, + .props = true, + }, + else => unreachable, + }; + + const unpacked_size = blk: { + var tmp: u64 = status & 0x1F; + tmp <<= 16; + tmp |= try reader.readInt(u16, .big); + break :blk tmp + 1; + }; + + const packed_size = blk: { + const tmp: u17 = try reader.readInt(u16, .big); + break :blk tmp + 1; + }; + + if (reset.dict) { + try accum.reset(writer); + } + + if (reset.state) { + var new_props = self.lzma_state.lzma_props; + + if (reset.props) { + var props = try reader.readByte(); + if (props >= 225) { + return error.CorruptInput; + } + + const lc = @as(u4, @intCast(props % 9)); + props /= 9; + const lp = @as(u3, @intCast(props % 5)); + props /= 5; + const pb = @as(u3, @intCast(props)); + + if (lc + lp > 4) { + return error.CorruptInput; + } + + new_props = .{ .lc = lc, .lp = lp, .pb = pb }; + } + + try self.lzma_state.resetState(allocator, new_props); + } + + self.lzma_state.unpacked_size = unpacked_size + accum.len; + + var counter = std.io.countingReader(reader); + const counter_reader = counter.reader(); + + var rangecoder = try lzma.RangeDecoder.init(counter_reader); + while (try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {} + + if (counter.bytes_read != packed_size) { + return error.CorruptInput; + } + } + + fn parseUncompressed( + allocator: Allocator, + reader: anytype, + writer: anytype, + accum: *LzAccumBuffer, + reset_dict: bool, + ) !void { + const unpacked_size = @as(u17, try reader.readInt(u16, .big)) + 1; + + if (reset_dict) { + try accum.reset(writer); + } + + var i: @TypeOf(unpacked_size) = 0; + while (i < unpacked_size) : (i += 1) { + try accum.appendByte(allocator, try reader.readByte()); + } + } +}; pub fn decompress( allocator: Allocator, reader: anytype, writer: anytype, ) !void { - var decoder = try decode.Decoder.init(allocator); + var decoder = try Decode.init(allocator); defer decoder.deinit(allocator); return decoder.decompress(allocator, reader, writer); } diff --git a/lib/std/compress/lzma2/decode.zig b/lib/std/compress/lzma2/decode.zig deleted file mode 100644 index 938c2d437b..0000000000 --- a/lib/std/compress/lzma2/decode.zig +++ /dev/null @@ -1,169 +0,0 @@ -const std = @import("../../std.zig"); -const Allocator = std.mem.Allocator; - -const lzma = @import("../lzma.zig"); -const DecoderState = lzma.decode.DecoderState; -const LzAccumBuffer = lzma.decode.lzbuffer.LzAccumBuffer; -const Properties = lzma.decode.Properties; -const RangeDecoder = lzma.decode.rangecoder.RangeDecoder; - -pub const Decoder = struct { - lzma_state: DecoderState, - - pub fn init(allocator: Allocator) !Decoder { - return Decoder{ - .lzma_state = try DecoderState.init( - allocator, - Properties{ - .lc = 0, - .lp = 0, - .pb = 0, - }, - null, - ), - }; - } - - pub fn deinit(self: *Decoder, allocator: Allocator) void { - self.lzma_state.deinit(allocator); - self.* = undefined; - } - - pub fn decompress( - self: *Decoder, - allocator: Allocator, - reader: anytype, - writer: anytype, - ) !void { - var accum = LzAccumBuffer.init(std.math.maxInt(usize)); - defer accum.deinit(allocator); - - while (true) { - const status = try reader.readByte(); - - switch (status) { - 0 => break, - 1 => try parseUncompressed(allocator, reader, writer, &accum, true), - 2 => try parseUncompressed(allocator, reader, writer, &accum, false), - else => try self.parseLzma(allocator, reader, writer, &accum, status), - } - } - - try accum.finish(writer); - } - - fn parseLzma( - self: *Decoder, - allocator: Allocator, - reader: anytype, - writer: anytype, - accum: *LzAccumBuffer, - status: u8, - ) !void { - if (status & 0x80 == 0) { - return error.CorruptInput; - } - - const Reset = struct { - dict: bool, - state: bool, - props: bool, - }; - - const reset = switch ((status >> 5) & 0x3) { - 0 => Reset{ - .dict = false, - .state = false, - .props = false, - }, - 1 => Reset{ - .dict = false, - .state = true, - .props = false, - }, - 2 => Reset{ - .dict = false, - .state = true, - .props = true, - }, - 3 => Reset{ - .dict = true, - .state = true, - .props = true, - }, - else => unreachable, - }; - - const unpacked_size = blk: { - var tmp: u64 = status & 0x1F; - tmp <<= 16; - tmp |= try reader.readInt(u16, .big); - break :blk tmp + 1; - }; - - const packed_size = blk: { - const tmp: u17 = try reader.readInt(u16, .big); - break :blk tmp + 1; - }; - - if (reset.dict) { - try accum.reset(writer); - } - - if (reset.state) { - var new_props = self.lzma_state.lzma_props; - - if (reset.props) { - var props = try reader.readByte(); - if (props >= 225) { - return error.CorruptInput; - } - - const lc = @as(u4, @intCast(props % 9)); - props /= 9; - const lp = @as(u3, @intCast(props % 5)); - props /= 5; - const pb = @as(u3, @intCast(props)); - - if (lc + lp > 4) { - return error.CorruptInput; - } - - new_props = Properties{ .lc = lc, .lp = lp, .pb = pb }; - } - - try self.lzma_state.resetState(allocator, new_props); - } - - self.lzma_state.unpacked_size = unpacked_size + accum.len; - - var counter = std.io.countingReader(reader); - const counter_reader = counter.reader(); - - var rangecoder = try RangeDecoder.init(counter_reader); - while (try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {} - - if (counter.bytes_read != packed_size) { - return error.CorruptInput; - } - } - - fn parseUncompressed( - allocator: Allocator, - reader: anytype, - writer: anytype, - accum: *LzAccumBuffer, - reset_dict: bool, - ) !void { - const unpacked_size = @as(u17, try reader.readInt(u16, .big)) + 1; - - if (reset_dict) { - try accum.reset(writer); - } - - var i: @TypeOf(unpacked_size) = 0; - while (i < unpacked_size) : (i += 1) { - try accum.appendByte(allocator, try reader.readByte()); - } - } -}; |
