aboutsummaryrefslogtreecommitdiff
path: root/lib/std/compress/zstandard/decode/block.zig
diff options
context:
space:
mode:
Diffstat (limited to 'lib/std/compress/zstandard/decode/block.zig')
-rw-r--r--lib/std/compress/zstandard/decode/block.zig1149
1 files changed, 0 insertions, 1149 deletions
diff --git a/lib/std/compress/zstandard/decode/block.zig b/lib/std/compress/zstandard/decode/block.zig
deleted file mode 100644
index 49c6e7dc36..0000000000
--- a/lib/std/compress/zstandard/decode/block.zig
+++ /dev/null
@@ -1,1149 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-const RingBuffer = std.RingBuffer;
-
-const types = @import("../types.zig");
-const frame = types.frame;
-const Table = types.compressed_block.Table;
-const LiteralsSection = types.compressed_block.LiteralsSection;
-const SequencesSection = types.compressed_block.SequencesSection;
-
-const huffman = @import("huffman.zig");
-const readers = @import("../readers.zig");
-
-const decodeFseTable = @import("fse.zig").decodeFseTable;
-
-pub const Error = error{
- BlockSizeOverMaximum,
- MalformedBlockSize,
- ReservedBlock,
- MalformedRleBlock,
- MalformedCompressedBlock,
-};
-
-pub const DecodeState = struct {
- repeat_offsets: [3]u32,
-
- offset: StateData(8),
- match: StateData(9),
- literal: StateData(9),
-
- offset_fse_buffer: []Table.Fse,
- match_fse_buffer: []Table.Fse,
- literal_fse_buffer: []Table.Fse,
-
- fse_tables_undefined: bool,
-
- literal_stream_reader: readers.ReverseBitReader,
- literal_stream_index: usize,
- literal_streams: LiteralsSection.Streams,
- literal_header: LiteralsSection.Header,
- huffman_tree: ?LiteralsSection.HuffmanTree,
-
- literal_written_count: usize,
- written_count: usize = 0,
-
- fn StateData(comptime max_accuracy_log: comptime_int) type {
- return struct {
- state: State,
- table: Table,
- accuracy_log: u8,
-
- const State = std.meta.Int(.unsigned, max_accuracy_log);
- };
- }
-
- pub fn init(
- literal_fse_buffer: []Table.Fse,
- match_fse_buffer: []Table.Fse,
- offset_fse_buffer: []Table.Fse,
- ) DecodeState {
- return DecodeState{
- .repeat_offsets = .{
- types.compressed_block.start_repeated_offset_1,
- types.compressed_block.start_repeated_offset_2,
- types.compressed_block.start_repeated_offset_3,
- },
-
- .offset = undefined,
- .match = undefined,
- .literal = undefined,
-
- .literal_fse_buffer = literal_fse_buffer,
- .match_fse_buffer = match_fse_buffer,
- .offset_fse_buffer = offset_fse_buffer,
-
- .fse_tables_undefined = true,
-
- .literal_written_count = 0,
- .literal_header = undefined,
- .literal_streams = undefined,
- .literal_stream_reader = undefined,
- .literal_stream_index = undefined,
- .huffman_tree = null,
-
- .written_count = 0,
- };
- }
-
- /// Prepare the decoder to decode a compressed block. Loads the literals
- /// stream and Huffman tree from `literals` and reads the FSE tables from
- /// `source`.
- ///
- /// Errors returned:
- /// - `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's
- /// first byte does not have any bits set
- /// - `error.TreelessLiteralsFirst` `literals` is a treeless literals
- /// section and the decode state does not have a Huffman tree from a
- /// previous block
- /// - `error.RepeatModeFirst` on the first call if one of the sequence FSE
- /// tables is set to repeat mode
- /// - `error.MalformedAccuracyLog` if an FSE table has an invalid accuracy
- /// - `error.MalformedFseTable` if there are errors decoding an FSE table
- /// - `error.EndOfStream` if `source` ends before all FSE tables are read
- pub fn prepare(
- self: *DecodeState,
- source: anytype,
- literals: LiteralsSection,
- sequences_header: SequencesSection.Header,
- ) !void {
- self.literal_written_count = 0;
- self.literal_header = literals.header;
- self.literal_streams = literals.streams;
-
- if (literals.huffman_tree) |tree| {
- self.huffman_tree = tree;
- } else if (literals.header.block_type == .treeless and self.huffman_tree == null) {
- return error.TreelessLiteralsFirst;
- }
-
- switch (literals.header.block_type) {
- .raw, .rle => {},
- .compressed, .treeless => {
- self.literal_stream_index = 0;
- switch (literals.streams) {
- .one => |slice| try self.initLiteralStream(slice),
- .four => |streams| try self.initLiteralStream(streams[0]),
- }
- },
- }
-
- if (sequences_header.sequence_count > 0) {
- try self.updateFseTable(source, .literal, sequences_header.literal_lengths);
- try self.updateFseTable(source, .offset, sequences_header.offsets);
- try self.updateFseTable(source, .match, sequences_header.match_lengths);
- self.fse_tables_undefined = false;
- }
- }
-
- /// Read initial FSE states for sequence decoding.
- ///
- /// Errors returned:
- /// - `error.EndOfStream` if `bit_reader` does not contain enough bits.
- pub fn readInitialFseState(self: *DecodeState, bit_reader: *readers.ReverseBitReader) error{EndOfStream}!void {
- self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log);
- self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log);
- self.match.state = try bit_reader.readBitsNoEof(u9, self.match.accuracy_log);
- }
-
- fn updateRepeatOffset(self: *DecodeState, offset: u32) void {
- self.repeat_offsets[2] = self.repeat_offsets[1];
- self.repeat_offsets[1] = self.repeat_offsets[0];
- self.repeat_offsets[0] = offset;
- }
-
- fn useRepeatOffset(self: *DecodeState, index: usize) u32 {
- if (index == 1)
- std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1])
- else if (index == 2) {
- std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]);
- std.mem.swap(u32, &self.repeat_offsets[1], &self.repeat_offsets[2]);
- }
- return self.repeat_offsets[0];
- }
-
- const DataType = enum { offset, match, literal };
-
- fn updateState(
- self: *DecodeState,
- comptime choice: DataType,
- bit_reader: *readers.ReverseBitReader,
- ) error{ MalformedFseBits, EndOfStream }!void {
- switch (@field(self, @tagName(choice)).table) {
- .rle => {},
- .fse => |table| {
- const data = table[@field(self, @tagName(choice)).state];
- const T = @TypeOf(@field(self, @tagName(choice))).State;
- const bits_summand = try bit_reader.readBitsNoEof(T, data.bits);
- const next_state = std.math.cast(
- @TypeOf(@field(self, @tagName(choice))).State,
- data.baseline + bits_summand,
- ) orelse return error.MalformedFseBits;
- @field(self, @tagName(choice)).state = next_state;
- },
- }
- }
-
- const FseTableError = error{
- MalformedFseTable,
- MalformedAccuracyLog,
- RepeatModeFirst,
- EndOfStream,
- };
-
- fn updateFseTable(
- self: *DecodeState,
- source: anytype,
- comptime choice: DataType,
- mode: SequencesSection.Header.Mode,
- ) !void {
- const field_name = @tagName(choice);
- switch (mode) {
- .predefined => {
- @field(self, field_name).accuracy_log =
- @field(types.compressed_block.default_accuracy_log, field_name);
-
- @field(self, field_name).table =
- @field(types.compressed_block, "predefined_" ++ field_name ++ "_fse_table");
- },
- .rle => {
- @field(self, field_name).accuracy_log = 0;
- @field(self, field_name).table = .{ .rle = try source.readByte() };
- },
- .fse => {
- var bit_reader = readers.bitReader(source);
-
- const table_size = try decodeFseTable(
- &bit_reader,
- @field(types.compressed_block.table_symbol_count_max, field_name),
- @field(types.compressed_block.table_accuracy_log_max, field_name),
- @field(self, field_name ++ "_fse_buffer"),
- );
- @field(self, field_name).table = .{
- .fse = @field(self, field_name ++ "_fse_buffer")[0..table_size],
- };
- @field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size);
- },
- .repeat => if (self.fse_tables_undefined) return error.RepeatModeFirst,
- }
- }
-
- const Sequence = struct {
- literal_length: u32,
- match_length: u32,
- offset: u32,
- };
-
- fn nextSequence(
- self: *DecodeState,
- bit_reader: *readers.ReverseBitReader,
- ) error{ InvalidBitStream, EndOfStream }!Sequence {
- const raw_code = self.getCode(.offset);
- const offset_code = std.math.cast(u5, raw_code) orelse {
- return error.InvalidBitStream;
- };
- const offset_value = (@as(u32, 1) << offset_code) + try bit_reader.readBitsNoEof(u32, offset_code);
-
- const match_code = self.getCode(.match);
- if (match_code >= types.compressed_block.match_length_code_table.len)
- return error.InvalidBitStream;
- const match = types.compressed_block.match_length_code_table[match_code];
- const match_length = match[0] + try bit_reader.readBitsNoEof(u32, match[1]);
-
- const literal_code = self.getCode(.literal);
- if (literal_code >= types.compressed_block.literals_length_code_table.len)
- return error.InvalidBitStream;
- const literal = types.compressed_block.literals_length_code_table[literal_code];
- const literal_length = literal[0] + try bit_reader.readBitsNoEof(u32, literal[1]);
-
- const offset = if (offset_value > 3) offset: {
- const offset = offset_value - 3;
- self.updateRepeatOffset(offset);
- break :offset offset;
- } else offset: {
- if (literal_length == 0) {
- if (offset_value == 3) {
- const offset = self.repeat_offsets[0] - 1;
- self.updateRepeatOffset(offset);
- break :offset offset;
- }
- break :offset self.useRepeatOffset(offset_value);
- }
- break :offset self.useRepeatOffset(offset_value - 1);
- };
-
- if (offset == 0) return error.InvalidBitStream;
-
- return .{
- .literal_length = literal_length,
- .match_length = match_length,
- .offset = offset,
- };
- }
-
- fn executeSequenceSlice(
- self: *DecodeState,
- dest: []u8,
- write_pos: usize,
- sequence: Sequence,
- ) (error{MalformedSequence} || DecodeLiteralsError)!void {
- if (sequence.offset > write_pos + sequence.literal_length) return error.MalformedSequence;
-
- try self.decodeLiteralsSlice(dest[write_pos..], sequence.literal_length);
- const copy_start = write_pos + sequence.literal_length - sequence.offset;
- for (
- dest[write_pos + sequence.literal_length ..][0..sequence.match_length],
- dest[copy_start..][0..sequence.match_length],
- ) |*d, s| d.* = s;
- self.written_count += sequence.match_length;
- }
-
- fn executeSequenceRingBuffer(
- self: *DecodeState,
- dest: *RingBuffer,
- sequence: Sequence,
- ) (error{MalformedSequence} || DecodeLiteralsError)!void {
- if (sequence.offset > @min(dest.data.len, self.written_count + sequence.literal_length))
- return error.MalformedSequence;
-
- try self.decodeLiteralsRingBuffer(dest, sequence.literal_length);
- const copy_start = dest.write_index + dest.data.len - sequence.offset;
- const copy_slice = dest.sliceAt(copy_start, sequence.match_length);
- dest.writeSliceForwardsAssumeCapacity(copy_slice.first);
- dest.writeSliceForwardsAssumeCapacity(copy_slice.second);
- self.written_count += sequence.match_length;
- }
-
- const DecodeSequenceError = error{
- InvalidBitStream,
- EndOfStream,
- MalformedSequence,
- MalformedFseBits,
- } || DecodeLiteralsError;
-
- /// Decode one sequence from `bit_reader` into `dest`, written starting at
- /// `write_pos` and update FSE states if `last_sequence` is `false`.
- /// `prepare()` must be called for the block before attempting to decode
- /// sequences.
- ///
- /// Errors returned:
- /// - `error.MalformedSequence` if the decompressed sequence would be
- /// longer than `sequence_size_limit` or the sequence's offset is too
- /// large
- /// - `error.UnexpectedEndOfLiteralStream` if the decoder state's literal
- /// streams do not contain enough literals for the sequence (this may
- /// mean the literal stream or the sequence is malformed).
- /// - `error.InvalidBitStream` if the FSE sequence bitstream is malformed
- /// - `error.EndOfStream` if `bit_reader` does not contain enough bits
- /// - `error.DestTooSmall` if `dest` is not large enough to holde the
- /// decompressed sequence
- pub fn decodeSequenceSlice(
- self: *DecodeState,
- dest: []u8,
- write_pos: usize,
- bit_reader: *readers.ReverseBitReader,
- sequence_size_limit: usize,
- last_sequence: bool,
- ) (error{DestTooSmall} || DecodeSequenceError)!usize {
- const sequence = try self.nextSequence(bit_reader);
- const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length;
- if (sequence_length > sequence_size_limit) return error.MalformedSequence;
- if (sequence_length > dest[write_pos..].len) return error.DestTooSmall;
-
- try self.executeSequenceSlice(dest, write_pos, sequence);
- if (!last_sequence) {
- try self.updateState(.literal, bit_reader);
- try self.updateState(.match, bit_reader);
- try self.updateState(.offset, bit_reader);
- }
- return sequence_length;
- }
-
- /// Decode one sequence from `bit_reader` into `dest`; see
- /// `decodeSequenceSlice`.
- pub fn decodeSequenceRingBuffer(
- self: *DecodeState,
- dest: *RingBuffer,
- bit_reader: anytype,
- sequence_size_limit: usize,
- last_sequence: bool,
- ) DecodeSequenceError!usize {
- const sequence = try self.nextSequence(bit_reader);
- const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length;
- if (sequence_length > sequence_size_limit) return error.MalformedSequence;
-
- try self.executeSequenceRingBuffer(dest, sequence);
- if (!last_sequence) {
- try self.updateState(.literal, bit_reader);
- try self.updateState(.match, bit_reader);
- try self.updateState(.offset, bit_reader);
- }
- return sequence_length;
- }
-
- fn nextLiteralMultiStream(
- self: *DecodeState,
- ) error{BitStreamHasNoStartBit}!void {
- self.literal_stream_index += 1;
- try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]);
- }
-
- fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
- try self.literal_stream_reader.init(bytes);
- }
-
- fn isLiteralStreamEmpty(self: *DecodeState) bool {
- switch (self.literal_streams) {
- .one => return self.literal_stream_reader.isEmpty(),
- .four => return self.literal_stream_index == 3 and self.literal_stream_reader.isEmpty(),
- }
- }
-
- const LiteralBitsError = error{
- BitStreamHasNoStartBit,
- UnexpectedEndOfLiteralStream,
- };
- fn readLiteralsBits(
- self: *DecodeState,
- bit_count_to_read: u16,
- ) LiteralBitsError!u16 {
- return self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch bits: {
- if (self.literal_streams == .four and self.literal_stream_index < 3) {
- try self.nextLiteralMultiStream();
- break :bits self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch
- return error.UnexpectedEndOfLiteralStream;
- } else {
- return error.UnexpectedEndOfLiteralStream;
- }
- };
- }
-
- const DecodeLiteralsError = error{
- MalformedLiteralsLength,
- NotFound,
- } || LiteralBitsError;
-
- /// Decode `len` bytes of literals into `dest`.
- ///
- /// Errors returned:
- /// - `error.MalformedLiteralsLength` if the number of literal bytes
- /// decoded by `self` plus `len` is greater than the regenerated size of
- /// `literals`
- /// - `error.UnexpectedEndOfLiteralStream` and `error.NotFound` if there
- /// are problems decoding Huffman compressed literals
- pub fn decodeLiteralsSlice(
- self: *DecodeState,
- dest: []u8,
- len: usize,
- ) DecodeLiteralsError!void {
- if (self.literal_written_count + len > self.literal_header.regenerated_size)
- return error.MalformedLiteralsLength;
-
- switch (self.literal_header.block_type) {
- .raw => {
- const literal_data = self.literal_streams.one[self.literal_written_count..][0..len];
- @memcpy(dest[0..len], literal_data);
- self.literal_written_count += len;
- self.written_count += len;
- },
- .rle => {
- for (0..len) |i| {
- dest[i] = self.literal_streams.one[0];
- }
- self.literal_written_count += len;
- self.written_count += len;
- },
- .compressed, .treeless => {
- // const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4;
- const huffman_tree = self.huffman_tree orelse unreachable;
- const max_bit_count = huffman_tree.max_bit_count;
- const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
- huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight,
- max_bit_count,
- );
- var bits_read: u4 = 0;
- var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one;
- var bit_count_to_read: u4 = starting_bit_count;
- for (0..len) |i| {
- var prefix: u16 = 0;
- while (true) {
- const new_bits = self.readLiteralsBits(bit_count_to_read) catch |err| {
- return err;
- };
- prefix <<= bit_count_to_read;
- prefix |= new_bits;
- bits_read += bit_count_to_read;
- const result = huffman_tree.query(huffman_tree_index, prefix) catch |err| {
- return err;
- };
-
- switch (result) {
- .symbol => |sym| {
- dest[i] = sym;
- bit_count_to_read = starting_bit_count;
- bits_read = 0;
- huffman_tree_index = huffman_tree.symbol_count_minus_one;
- break;
- },
- .index => |index| {
- huffman_tree_index = index;
- const bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
- huffman_tree.nodes[index].weight,
- max_bit_count,
- );
- bit_count_to_read = bit_count - bits_read;
- },
- }
- }
- }
- self.literal_written_count += len;
- self.written_count += len;
- },
- }
- }
-
- /// Decode literals into `dest`; see `decodeLiteralsSlice()`.
- pub fn decodeLiteralsRingBuffer(
- self: *DecodeState,
- dest: *RingBuffer,
- len: usize,
- ) DecodeLiteralsError!void {
- if (self.literal_written_count + len > self.literal_header.regenerated_size)
- return error.MalformedLiteralsLength;
-
- switch (self.literal_header.block_type) {
- .raw => {
- const literals_end = self.literal_written_count + len;
- const literal_data = self.literal_streams.one[self.literal_written_count..literals_end];
- dest.writeSliceAssumeCapacity(literal_data);
- self.literal_written_count += len;
- self.written_count += len;
- },
- .rle => {
- for (0..len) |_| {
- dest.writeAssumeCapacity(self.literal_streams.one[0]);
- }
- self.literal_written_count += len;
- self.written_count += len;
- },
- .compressed, .treeless => {
- // const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4;
- const huffman_tree = self.huffman_tree orelse unreachable;
- const max_bit_count = huffman_tree.max_bit_count;
- const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
- huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight,
- max_bit_count,
- );
- var bits_read: u4 = 0;
- var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one;
- var bit_count_to_read: u4 = starting_bit_count;
- for (0..len) |_| {
- var prefix: u16 = 0;
- while (true) {
- const new_bits = try self.readLiteralsBits(bit_count_to_read);
- prefix <<= bit_count_to_read;
- prefix |= new_bits;
- bits_read += bit_count_to_read;
- const result = try huffman_tree.query(huffman_tree_index, prefix);
-
- switch (result) {
- .symbol => |sym| {
- dest.writeAssumeCapacity(sym);
- bit_count_to_read = starting_bit_count;
- bits_read = 0;
- huffman_tree_index = huffman_tree.symbol_count_minus_one;
- break;
- },
- .index => |index| {
- huffman_tree_index = index;
- const bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
- huffman_tree.nodes[index].weight,
- max_bit_count,
- );
- bit_count_to_read = bit_count - bits_read;
- },
- }
- }
- }
- self.literal_written_count += len;
- self.written_count += len;
- },
- }
- }
-
- fn getCode(self: *DecodeState, comptime choice: DataType) u32 {
- return switch (@field(self, @tagName(choice)).table) {
- .rle => |value| value,
- .fse => |table| table[@field(self, @tagName(choice)).state].symbol,
- };
- }
-};
-
-/// Decode a single block from `src` into `dest`. The beginning of `src` must be
-/// the start of the block content (i.e. directly after the block header).
-/// Increments `consumed_count` by the number of bytes read from `src` to decode
-/// the block and returns the decompressed size of the block.
-///
-/// Errors returned:
-///
-/// - `error.BlockSizeOverMaximum` if block's size is larger than 1 << 17 or
-/// `dest[written_count..].len`
-/// - `error.MalformedBlockSize` if `src.len` is smaller than the block size
-/// and the block is a raw or compressed block
-/// - `error.ReservedBlock` if the block is a reserved block
-/// - `error.MalformedRleBlock` if the block is an RLE block and `src.len < 1`
-/// - `error.MalformedCompressedBlock` if there are errors decoding a
-/// compressed block
-/// - `error.DestTooSmall` is `dest` is not large enough to hold the
-/// decompressed block
-pub fn decodeBlock(
- dest: []u8,
- src: []const u8,
- block_header: frame.Zstandard.Block.Header,
- decode_state: *DecodeState,
- consumed_count: *usize,
- block_size_max: usize,
- written_count: usize,
-) (error{DestTooSmall} || Error)!usize {
- const block_size = block_header.block_size;
- if (block_size_max < block_size) return error.BlockSizeOverMaximum;
- switch (block_header.block_type) {
- .raw => {
- if (src.len < block_size) return error.MalformedBlockSize;
- if (dest[written_count..].len < block_size) return error.DestTooSmall;
- @memcpy(dest[written_count..][0..block_size], src[0..block_size]);
- consumed_count.* += block_size;
- decode_state.written_count += block_size;
- return block_size;
- },
- .rle => {
- if (src.len < 1) return error.MalformedRleBlock;
- if (dest[written_count..].len < block_size) return error.DestTooSmall;
- for (written_count..block_size + written_count) |write_pos| {
- dest[write_pos] = src[0];
- }
- consumed_count.* += 1;
- decode_state.written_count += block_size;
- return block_size;
- },
- .compressed => {
- if (src.len < block_size) return error.MalformedBlockSize;
- var bytes_read: usize = 0;
- const literals = decodeLiteralsSectionSlice(src[0..block_size], &bytes_read) catch
- return error.MalformedCompressedBlock;
- var fbs = std.io.fixedBufferStream(src[bytes_read..block_size]);
- const fbs_reader = fbs.reader();
- const sequences_header = decodeSequencesHeader(fbs_reader) catch
- return error.MalformedCompressedBlock;
-
- decode_state.prepare(fbs_reader, literals, sequences_header) catch
- return error.MalformedCompressedBlock;
-
- bytes_read += fbs.pos;
-
- var bytes_written: usize = 0;
- {
- const bit_stream_bytes = src[bytes_read..block_size];
- var bit_stream: readers.ReverseBitReader = undefined;
- bit_stream.init(bit_stream_bytes) catch return error.MalformedCompressedBlock;
-
- if (sequences_header.sequence_count > 0) {
- decode_state.readInitialFseState(&bit_stream) catch
- return error.MalformedCompressedBlock;
-
- var sequence_size_limit = block_size_max;
- for (0..sequences_header.sequence_count) |i| {
- const write_pos = written_count + bytes_written;
- const decompressed_size = decode_state.decodeSequenceSlice(
- dest,
- write_pos,
- &bit_stream,
- sequence_size_limit,
- i == sequences_header.sequence_count - 1,
- ) catch |err| switch (err) {
- error.DestTooSmall => return error.DestTooSmall,
- else => return error.MalformedCompressedBlock,
- };
- bytes_written += decompressed_size;
- sequence_size_limit -= decompressed_size;
- }
- }
-
- if (!bit_stream.isEmpty()) {
- return error.MalformedCompressedBlock;
- }
- }
-
- if (decode_state.literal_written_count < literals.header.regenerated_size) {
- const len = literals.header.regenerated_size - decode_state.literal_written_count;
- if (len > dest[written_count + bytes_written ..].len) return error.DestTooSmall;
- decode_state.decodeLiteralsSlice(dest[written_count + bytes_written ..], len) catch
- return error.MalformedCompressedBlock;
- bytes_written += len;
- }
-
- switch (decode_state.literal_header.block_type) {
- .treeless, .compressed => {
- if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock;
- },
- .raw, .rle => {},
- }
-
- consumed_count.* += block_size;
- return bytes_written;
- },
- .reserved => return error.ReservedBlock,
- }
-}
-
-/// Decode a single block from `src` into `dest`; see `decodeBlock()`. Returns
-/// the size of the decompressed block, which can be used with `dest.sliceLast()`
-/// to get the decompressed bytes. `error.BlockSizeOverMaximum` is returned if
-/// the block's compressed or decompressed size is larger than `block_size_max`.
-pub fn decodeBlockRingBuffer(
- dest: *RingBuffer,
- src: []const u8,
- block_header: frame.Zstandard.Block.Header,
- decode_state: *DecodeState,
- consumed_count: *usize,
- block_size_max: usize,
-) Error!usize {
- const block_size = block_header.block_size;
- if (block_size_max < block_size) return error.BlockSizeOverMaximum;
- switch (block_header.block_type) {
- .raw => {
- if (src.len < block_size) return error.MalformedBlockSize;
- // dest may have length zero if block_size == 0, causing division by zero in
- // writeSliceAssumeCapacity()
- if (block_size > 0) {
- const data = src[0..block_size];
- dest.writeSliceAssumeCapacity(data);
- consumed_count.* += block_size;
- decode_state.written_count += block_size;
- }
- return block_size;
- },
- .rle => {
- if (src.len < 1) return error.MalformedRleBlock;
- for (0..block_size) |_| {
- dest.writeAssumeCapacity(src[0]);
- }
- consumed_count.* += 1;
- decode_state.written_count += block_size;
- return block_size;
- },
- .compressed => {
- if (src.len < block_size) return error.MalformedBlockSize;
- var bytes_read: usize = 0;
- const literals = decodeLiteralsSectionSlice(src[0..block_size], &bytes_read) catch
- return error.MalformedCompressedBlock;
- var fbs = std.io.fixedBufferStream(src[bytes_read..block_size]);
- const fbs_reader = fbs.reader();
- const sequences_header = decodeSequencesHeader(fbs_reader) catch
- return error.MalformedCompressedBlock;
-
- decode_state.prepare(fbs_reader, literals, sequences_header) catch
- return error.MalformedCompressedBlock;
-
- bytes_read += fbs.pos;
-
- var bytes_written: usize = 0;
- {
- const bit_stream_bytes = src[bytes_read..block_size];
- var bit_stream: readers.ReverseBitReader = undefined;
- bit_stream.init(bit_stream_bytes) catch return error.MalformedCompressedBlock;
-
- if (sequences_header.sequence_count > 0) {
- decode_state.readInitialFseState(&bit_stream) catch
- return error.MalformedCompressedBlock;
-
- var sequence_size_limit = block_size_max;
- for (0..sequences_header.sequence_count) |i| {
- const decompressed_size = decode_state.decodeSequenceRingBuffer(
- dest,
- &bit_stream,
- sequence_size_limit,
- i == sequences_header.sequence_count - 1,
- ) catch return error.MalformedCompressedBlock;
- bytes_written += decompressed_size;
- sequence_size_limit -= decompressed_size;
- }
- }
-
- if (!bit_stream.isEmpty()) {
- return error.MalformedCompressedBlock;
- }
- }
-
- if (decode_state.literal_written_count < literals.header.regenerated_size) {
- const len = literals.header.regenerated_size - decode_state.literal_written_count;
- decode_state.decodeLiteralsRingBuffer(dest, len) catch
- return error.MalformedCompressedBlock;
- bytes_written += len;
- }
-
- switch (decode_state.literal_header.block_type) {
- .treeless, .compressed => {
- if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock;
- },
- .raw, .rle => {},
- }
-
- consumed_count.* += block_size;
- if (bytes_written > block_size_max) return error.BlockSizeOverMaximum;
- return bytes_written;
- },
- .reserved => return error.ReservedBlock,
- }
-}
-
-/// Decode a single block from `source` into `dest`. Literal and sequence data
-/// from the block is copied into `literals_buffer` and `sequence_buffer`, which
-/// must be large enough or `error.LiteralsBufferTooSmall` and
-/// `error.SequenceBufferTooSmall` are returned (the maximum block size is an
-/// upper bound for the size of both buffers). See `decodeBlock`
-/// and `decodeBlockRingBuffer` for function that can decode a block without
-/// these extra copies. `error.EndOfStream` is returned if `source` does not
-/// contain enough bytes.
-pub fn decodeBlockReader(
- dest: *RingBuffer,
- source: anytype,
- block_header: frame.Zstandard.Block.Header,
- decode_state: *DecodeState,
- block_size_max: usize,
- literals_buffer: []u8,
- sequence_buffer: []u8,
-) !void {
- const block_size = block_header.block_size;
- var block_reader_limited = std.io.limitedReader(source, block_size);
- const block_reader = block_reader_limited.reader();
- if (block_size_max < block_size) return error.BlockSizeOverMaximum;
- switch (block_header.block_type) {
- .raw => {
- if (block_size == 0) return;
- const slice = dest.sliceAt(dest.write_index, block_size);
- try source.readNoEof(slice.first);
- try source.readNoEof(slice.second);
- dest.write_index = dest.mask2(dest.write_index + block_size);
- decode_state.written_count += block_size;
- },
- .rle => {
- const byte = try source.readByte();
- for (0..block_size) |_| {
- dest.writeAssumeCapacity(byte);
- }
- decode_state.written_count += block_size;
- },
- .compressed => {
- const literals = try decodeLiteralsSection(block_reader, literals_buffer);
- const sequences_header = try decodeSequencesHeader(block_reader);
-
- try decode_state.prepare(block_reader, literals, sequences_header);
-
- var bytes_written: usize = 0;
- {
- const size = try block_reader.readAll(sequence_buffer);
- var bit_stream: readers.ReverseBitReader = undefined;
- try bit_stream.init(sequence_buffer[0..size]);
-
- if (sequences_header.sequence_count > 0) {
- if (sequence_buffer.len < block_reader_limited.bytes_left)
- return error.SequenceBufferTooSmall;
-
- decode_state.readInitialFseState(&bit_stream) catch
- return error.MalformedCompressedBlock;
-
- var sequence_size_limit = block_size_max;
- for (0..sequences_header.sequence_count) |i| {
- const decompressed_size = decode_state.decodeSequenceRingBuffer(
- dest,
- &bit_stream,
- sequence_size_limit,
- i == sequences_header.sequence_count - 1,
- ) catch return error.MalformedCompressedBlock;
- sequence_size_limit -= decompressed_size;
- bytes_written += decompressed_size;
- }
- }
-
- if (!bit_stream.isEmpty()) {
- return error.MalformedCompressedBlock;
- }
- }
-
- if (decode_state.literal_written_count < literals.header.regenerated_size) {
- const len = literals.header.regenerated_size - decode_state.literal_written_count;
- decode_state.decodeLiteralsRingBuffer(dest, len) catch
- return error.MalformedCompressedBlock;
- bytes_written += len;
- }
-
- switch (decode_state.literal_header.block_type) {
- .treeless, .compressed => {
- if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock;
- },
- .raw, .rle => {},
- }
-
- if (bytes_written > block_size_max) return error.BlockSizeOverMaximum;
- if (block_reader_limited.bytes_left != 0) return error.MalformedCompressedBlock;
- decode_state.literal_written_count = 0;
- },
- .reserved => return error.ReservedBlock,
- }
-}
-
-/// Decode the header of a block.
-pub fn decodeBlockHeader(src: *const [3]u8) frame.Zstandard.Block.Header {
- const last_block = src[0] & 1 == 1;
- const block_type = @as(frame.Zstandard.Block.Type, @enumFromInt((src[0] & 0b110) >> 1));
- const block_size = ((src[0] & 0b11111000) >> 3) + (@as(u21, src[1]) << 5) + (@as(u21, src[2]) << 13);
- return .{
- .last_block = last_block,
- .block_type = block_type,
- .block_size = block_size,
- };
-}
-
-/// Decode the header of a block.
-///
-/// Errors returned:
-/// - `error.EndOfStream` if `src.len < 3`
-pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.Zstandard.Block.Header {
- if (src.len < 3) return error.EndOfStream;
- return decodeBlockHeader(src[0..3]);
-}
-
-/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
-/// number of bytes the section uses.
-///
-/// Errors returned:
-/// - `error.MalformedLiteralsHeader` if the header is invalid
-/// - `error.MalformedLiteralsSection` if there are decoding errors
-/// - `error.MalformedAccuracyLog` if compressed literals have invalid
-/// accuracy
-/// - `error.MalformedFseTable` if compressed literals have invalid FSE table
-/// - `error.MalformedHuffmanTree` if there are errors decoding a Huffamn tree
-/// - `error.EndOfStream` if there are not enough bytes in `src`
-pub fn decodeLiteralsSectionSlice(
- src: []const u8,
- consumed_count: *usize,
-) (error{ MalformedLiteralsHeader, MalformedLiteralsSection, EndOfStream } || huffman.Error)!LiteralsSection {
- var bytes_read: usize = 0;
- const header = header: {
- var fbs = std.io.fixedBufferStream(src);
- defer bytes_read = fbs.pos;
- break :header decodeLiteralsHeader(fbs.reader()) catch return error.MalformedLiteralsHeader;
- };
- switch (header.block_type) {
- .raw => {
- if (src.len < bytes_read + header.regenerated_size) return error.MalformedLiteralsSection;
- const stream = src[bytes_read..][0..header.regenerated_size];
- consumed_count.* += header.regenerated_size + bytes_read;
- return LiteralsSection{
- .header = header,
- .huffman_tree = null,
- .streams = .{ .one = stream },
- };
- },
- .rle => {
- if (src.len < bytes_read + 1) return error.MalformedLiteralsSection;
- const stream = src[bytes_read..][0..1];
- consumed_count.* += 1 + bytes_read;
- return LiteralsSection{
- .header = header,
- .huffman_tree = null,
- .streams = .{ .one = stream },
- };
- },
- .compressed, .treeless => {
- const huffman_tree_start = bytes_read;
- const huffman_tree = if (header.block_type == .compressed)
- try huffman.decodeHuffmanTreeSlice(src[bytes_read..], &bytes_read)
- else
- null;
- const huffman_tree_size = bytes_read - huffman_tree_start;
- const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch
- return error.MalformedLiteralsSection;
-
- if (src.len < bytes_read + total_streams_size) return error.MalformedLiteralsSection;
- const stream_data = src[bytes_read .. bytes_read + total_streams_size];
-
- const streams = try decodeStreams(header.size_format, stream_data);
- consumed_count.* += bytes_read + total_streams_size;
- return LiteralsSection{
- .header = header,
- .huffman_tree = huffman_tree,
- .streams = streams,
- };
- },
- }
-}
-
-/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the
-/// number of bytes the section uses. See `decodeLiterasSectionSlice()`.
-pub fn decodeLiteralsSection(
- source: anytype,
- buffer: []u8,
-) !LiteralsSection {
- const header = try decodeLiteralsHeader(source);
- switch (header.block_type) {
- .raw => {
- if (buffer.len < header.regenerated_size) return error.LiteralsBufferTooSmall;
- try source.readNoEof(buffer[0..header.regenerated_size]);
- return LiteralsSection{
- .header = header,
- .huffman_tree = null,
- .streams = .{ .one = buffer },
- };
- },
- .rle => {
- buffer[0] = try source.readByte();
- return LiteralsSection{
- .header = header,
- .huffman_tree = null,
- .streams = .{ .one = buffer[0..1] },
- };
- },
- .compressed, .treeless => {
- var counting_reader = std.io.countingReader(source);
- const huffman_tree = if (header.block_type == .compressed)
- try huffman.decodeHuffmanTree(counting_reader.reader(), buffer)
- else
- null;
- const huffman_tree_size = @as(usize, @intCast(counting_reader.bytes_read));
- const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch
- return error.MalformedLiteralsSection;
-
- if (total_streams_size > buffer.len) return error.LiteralsBufferTooSmall;
- try source.readNoEof(buffer[0..total_streams_size]);
- const stream_data = buffer[0..total_streams_size];
-
- const streams = try decodeStreams(header.size_format, stream_data);
- return LiteralsSection{
- .header = header,
- .huffman_tree = huffman_tree,
- .streams = streams,
- };
- },
- }
-}
-
-fn decodeStreams(size_format: u2, stream_data: []const u8) !LiteralsSection.Streams {
- if (size_format == 0) {
- return .{ .one = stream_data };
- }
-
- if (stream_data.len < 6) return error.MalformedLiteralsSection;
-
- const stream_1_length: usize = std.mem.readInt(u16, stream_data[0..2], .little);
- const stream_2_length: usize = std.mem.readInt(u16, stream_data[2..4], .little);
- const stream_3_length: usize = std.mem.readInt(u16, stream_data[4..6], .little);
-
- const stream_1_start = 6;
- const stream_2_start = stream_1_start + stream_1_length;
- const stream_3_start = stream_2_start + stream_2_length;
- const stream_4_start = stream_3_start + stream_3_length;
-
- if (stream_data.len < stream_4_start) return error.MalformedLiteralsSection;
-
- return .{ .four = .{
- stream_data[stream_1_start .. stream_1_start + stream_1_length],
- stream_data[stream_2_start .. stream_2_start + stream_2_length],
- stream_data[stream_3_start .. stream_3_start + stream_3_length],
- stream_data[stream_4_start..],
- } };
-}
-
-/// Decode a literals section header.
-///
-/// Errors returned:
-/// - `error.EndOfStream` if there are not enough bytes in `source`
-pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header {
- const byte0 = try source.readByte();
- const block_type = @as(LiteralsSection.BlockType, @enumFromInt(byte0 & 0b11));
- const size_format = @as(u2, @intCast((byte0 & 0b1100) >> 2));
- var regenerated_size: u20 = undefined;
- var compressed_size: ?u18 = null;
- switch (block_type) {
- .raw, .rle => {
- switch (size_format) {
- 0, 2 => {
- regenerated_size = byte0 >> 3;
- },
- 1 => regenerated_size = (byte0 >> 4) + (@as(u20, try source.readByte()) << 4),
- 3 => regenerated_size = (byte0 >> 4) +
- (@as(u20, try source.readByte()) << 4) +
- (@as(u20, try source.readByte()) << 12),
- }
- },
- .compressed, .treeless => {
- const byte1 = try source.readByte();
- const byte2 = try source.readByte();
- switch (size_format) {
- 0, 1 => {
- regenerated_size = (byte0 >> 4) + ((@as(u20, byte1) & 0b00111111) << 4);
- compressed_size = ((byte1 & 0b11000000) >> 6) + (@as(u18, byte2) << 2);
- },
- 2 => {
- const byte3 = try source.readByte();
- regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00000011) << 12);
- compressed_size = ((byte2 & 0b11111100) >> 2) + (@as(u18, byte3) << 6);
- },
- 3 => {
- const byte3 = try source.readByte();
- const byte4 = try source.readByte();
- regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00111111) << 12);
- compressed_size = ((byte2 & 0b11000000) >> 6) + (@as(u18, byte3) << 2) + (@as(u18, byte4) << 10);
- },
- }
- },
- }
- return LiteralsSection.Header{
- .block_type = block_type,
- .size_format = size_format,
- .regenerated_size = regenerated_size,
- .compressed_size = compressed_size,
- };
-}
-
-/// Decode a sequences section header.
-///
-/// Errors returned:
-/// - `error.ReservedBitSet` if the reserved bit is set
-/// - `error.EndOfStream` if there are not enough bytes in `source`
-pub fn decodeSequencesHeader(
- source: anytype,
-) !SequencesSection.Header {
- var sequence_count: u24 = undefined;
-
- const byte0 = try source.readByte();
- if (byte0 == 0) {
- return SequencesSection.Header{
- .sequence_count = 0,
- .offsets = undefined,
- .match_lengths = undefined,
- .literal_lengths = undefined,
- };
- } else if (byte0 < 128) {
- sequence_count = byte0;
- } else if (byte0 < 255) {
- sequence_count = (@as(u24, (byte0 - 128)) << 8) + try source.readByte();
- } else {
- sequence_count = (try source.readByte()) + (@as(u24, try source.readByte()) << 8) + 0x7F00;
- }
-
- const compression_modes = try source.readByte();
-
- const matches_mode = @as(SequencesSection.Header.Mode, @enumFromInt((compression_modes & 0b00001100) >> 2));
- const offsets_mode = @as(SequencesSection.Header.Mode, @enumFromInt((compression_modes & 0b00110000) >> 4));
- const literal_mode = @as(SequencesSection.Header.Mode, @enumFromInt((compression_modes & 0b11000000) >> 6));
- if (compression_modes & 0b11 != 0) return error.ReservedBitSet;
-
- return SequencesSection.Header{
- .sequence_count = sequence_count,
- .offsets = offsets_mode,
- .match_lengths = matches_mode,
- .literal_lengths = literal_mode,
- };
-}