diff options
Diffstat (limited to 'lib')
81 files changed, 4561 insertions, 6842 deletions
diff --git a/lib/compiler/build_runner.zig b/lib/compiler/build_runner.zig index 693e9b4c70..6b7266ee71 100644 --- a/lib/compiler/build_runner.zig +++ b/lib/compiler/build_runner.zig @@ -696,8 +696,11 @@ fn runStepNames( .failures, .none => true, else => false, }; - if (failure_count == 0 and failures_only) { - return run.cleanExit(); + if (failure_count == 0) { + std.Progress.setStatus(.success); + if (failures_only) return run.cleanExit(); + } else { + std.Progress.setStatus(.failure); } const ttyconf = run.ttyconf; @@ -708,7 +711,7 @@ fn runStepNames( const total_count = success_count + failure_count + pending_count + skipped_count; ttyconf.setColor(w, .cyan) catch {}; - w.writeAll("Build Summary:") catch {}; + w.writeAll("\nBuild Summary:") catch {}; ttyconf.setColor(w, .reset) catch {}; w.print(" {d}/{d} steps succeeded", .{ success_count, total_count }) catch {}; if (skipped_count > 0) w.print("; {d} skipped", .{skipped_count}) catch {}; @@ -1149,6 +1152,7 @@ fn workerMakeOneStep( } else |err| switch (err) { error.MakeFailed => { @atomicStore(Step.State, &s.state, .failure, .seq_cst); + std.Progress.setStatus(.failure_working); break :handle_result; }, error.MakeSkipped => @atomicStore(Step.State, &s.state, .skipped, .seq_cst), diff --git a/lib/compiler/objcopy.zig b/lib/compiler/objcopy.zig index 52ffe208f6..5908f8b73d 100644 --- a/lib/compiler/objcopy.zig +++ b/lib/compiler/objcopy.zig @@ -13,6 +13,9 @@ const Server = std.zig.Server; var stdin_buffer: [1024]u8 = undefined; var stdout_buffer: [1024]u8 = undefined; +var input_buffer: [1024]u8 = undefined; +var output_buffer: [1024]u8 = undefined; + pub fn main() !void { var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator); defer arena_instance.deinit(); @@ -145,13 +148,16 @@ fn cmdObjCopy(gpa: Allocator, arena: Allocator, args: []const []const u8) !void const input = opt_input orelse fatal("expected input parameter", .{}); const output = opt_output orelse fatal("expected output parameter", .{}); - var in_file = fs.cwd().openFile(input, .{}) catch |err| - fatal("unable to open '{s}': {s}", .{ input, @errorName(err) }); - defer in_file.close(); + const input_file = fs.cwd().openFile(input, .{}) catch |err| fatal("failed to open {s}: {t}", .{ input, err }); + defer input_file.close(); + + const stat = input_file.stat() catch |err| fatal("failed to stat {s}: {t}", .{ input, err }); - const elf_hdr = std.elf.Header.read(in_file) catch |err| switch (err) { - error.InvalidElfMagic => fatal("not an ELF file: '{s}'", .{input}), - else => fatal("unable to read '{s}': {s}", .{ input, @errorName(err) }), + var in: File.Reader = .initSize(input_file, &input_buffer, stat.size); + + const elf_hdr = std.elf.Header.read(&in.interface) catch |err| switch (err) { + error.ReadFailed => fatal("unable to read {s}: {t}", .{ input, in.err.? }), + else => |e| fatal("invalid elf file: {t}", .{e}), }; const in_ofmt = .elf; @@ -168,16 +174,12 @@ fn cmdObjCopy(gpa: Allocator, arena: Allocator, args: []const []const u8) !void } }; - const mode = mode: { - if (out_fmt != .elf or only_keep_debug) - break :mode fs.File.default_mode; - if (in_file.stat()) |stat| - break :mode stat.mode - else |_| - break :mode fs.File.default_mode; - }; - var out_file = try fs.cwd().createFile(output, .{ .mode = mode }); - defer out_file.close(); + const mode = if (out_fmt != .elf or only_keep_debug) fs.File.default_mode else stat.mode; + + var output_file = try fs.cwd().createFile(output, .{ .mode = mode }); + defer output_file.close(); + + var out = output_file.writer(&output_buffer); switch (out_fmt) { .hex, .raw => { @@ -192,7 +194,7 @@ fn cmdObjCopy(gpa: Allocator, arena: Allocator, args: []const []const u8) !void if (set_section_flags != null) fatal("zig objcopy: ELF to RAW or HEX copying does not support --set_section_flags", .{}); - try emitElf(arena, in_file, out_file, elf_hdr, .{ + try emitElf(arena, &in, &out, elf_hdr, .{ .ofmt = out_fmt, .only_section = only_section, .pad_to = pad_to, @@ -208,22 +210,13 @@ fn cmdObjCopy(gpa: Allocator, arena: Allocator, args: []const []const u8) !void if (pad_to) |_| fatal("zig objcopy: ELF to ELF copying does not support --pad-to", .{}); - try stripElf(arena, in_file, out_file, elf_hdr, .{ - .strip_debug = strip_debug, - .strip_all = strip_all, - .only_keep_debug = only_keep_debug, - .add_debuglink = opt_add_debuglink, - .extract_to = opt_extract, - .compress_debug = compress_debug_sections, - .add_section = add_section, - .set_section_alignment = set_section_alignment, - .set_section_flags = set_section_flags, - }); - return std.process.cleanExit(); + fatal("unimplemented", .{}); }, else => fatal("unsupported output object format: {s}", .{@tagName(out_fmt)}), } + try out.end(); + if (listen) { var stdin_reader = fs.File.stdin().reader(&stdin_buffer); var stdout_writer = fs.File.stdout().writer(&stdout_buffer); @@ -304,12 +297,12 @@ const SetSectionFlags = struct { fn emitElf( arena: Allocator, - in_file: File, - out_file: File, + in: *File.Reader, + out: *File.Writer, elf_hdr: elf.Header, options: EmitRawElfOptions, ) !void { - var binary_elf_output = try BinaryElfOutput.parse(arena, in_file, elf_hdr); + var binary_elf_output = try BinaryElfOutput.parse(arena, in, elf_hdr); defer binary_elf_output.deinit(); if (options.ofmt == .elf) { @@ -328,8 +321,8 @@ fn emitElf( continue; } - try writeBinaryElfSection(in_file, out_file, section); - try padFile(out_file, options.pad_to); + try writeBinaryElfSection(in, out, section); + try padFile(out, options.pad_to); return; } }, @@ -342,10 +335,10 @@ fn emitElf( switch (options.ofmt) { .raw => { for (binary_elf_output.sections.items) |section| { - try out_file.seekTo(section.binaryOffset); - try writeBinaryElfSection(in_file, out_file, section); + try out.seekTo(section.binaryOffset); + try writeBinaryElfSection(in, out, section); } - try padFile(out_file, options.pad_to); + try padFile(out, options.pad_to); }, .hex => { if (binary_elf_output.segments.items.len == 0) return; @@ -353,15 +346,15 @@ fn emitElf( return error.InvalidHexfileAddressRange; } - var hex_writer = HexWriter{ .out_file = out_file }; + var hex_writer = HexWriter{ .out = out }; for (binary_elf_output.segments.items) |segment| { - try hex_writer.writeSegment(segment, in_file); + try hex_writer.writeSegment(segment, in); } if (options.pad_to) |_| { // Padding to a size in hex files isn't applicable return error.InvalidArgument; } - try hex_writer.writeEOF(); + try hex_writer.writeEof(); }, else => unreachable, } @@ -399,7 +392,7 @@ const BinaryElfOutput = struct { self.segments.deinit(self.allocator); } - pub fn parse(allocator: Allocator, elf_file: File, elf_hdr: elf.Header) !Self { + pub fn parse(allocator: Allocator, in: *File.Reader, elf_hdr: elf.Header) !Self { var self: Self = .{ .segments = .{}, .sections = .{}, @@ -412,7 +405,7 @@ const BinaryElfOutput = struct { self.shstrtab = blk: { if (elf_hdr.shstrndx >= elf_hdr.shnum) break :blk null; - var section_headers = elf_hdr.section_header_iterator(&elf_file); + var section_headers = elf_hdr.iterateSectionHeaders(in); var section_counter: usize = 0; while (section_counter < elf_hdr.shstrndx) : (section_counter += 1) { @@ -421,18 +414,13 @@ const BinaryElfOutput = struct { const shstrtab_shdr = (try section_headers.next()).?; - const buffer = try allocator.alloc(u8, @intCast(shstrtab_shdr.sh_size)); - errdefer allocator.free(buffer); - - const num_read = try elf_file.preadAll(buffer, shstrtab_shdr.sh_offset); - if (num_read != buffer.len) return error.EndOfStream; - - break :blk buffer; + try in.seekTo(shstrtab_shdr.sh_offset); + break :blk try in.interface.readAlloc(allocator, shstrtab_shdr.sh_size); }; errdefer if (self.shstrtab) |shstrtab| allocator.free(shstrtab); - var section_headers = elf_hdr.section_header_iterator(&elf_file); + var section_headers = elf_hdr.iterateSectionHeaders(in); while (try section_headers.next()) |section| { if (sectionValidForOutput(section)) { const newSection = try allocator.create(BinaryElfSection); @@ -451,7 +439,7 @@ const BinaryElfOutput = struct { } } - var program_headers = elf_hdr.program_header_iterator(&elf_file); + var program_headers = elf_hdr.iterateProgramHeaders(in); while (try program_headers.next()) |phdr| { if (phdr.p_type == elf.PT_LOAD) { const newSegment = try allocator.create(BinaryElfSegment); @@ -539,19 +527,17 @@ const BinaryElfOutput = struct { } }; -fn writeBinaryElfSection(elf_file: File, out_file: File, section: *BinaryElfSection) !void { - try out_file.writeFileAll(elf_file, .{ - .in_offset = section.elfOffset, - .in_len = section.fileSize, - }); +fn writeBinaryElfSection(in: *File.Reader, out: *File.Writer, section: *BinaryElfSection) !void { + try in.seekTo(section.elfOffset); + _ = try out.interface.sendFileAll(in, .limited(section.fileSize)); } const HexWriter = struct { prev_addr: ?u32 = null, - out_file: File, + out: *File.Writer, /// Max data bytes per line of output - const MAX_PAYLOAD_LEN: u8 = 16; + const max_payload_len: u8 = 16; fn addressParts(address: u16) [2]u8 { const msb: u8 = @truncate(address >> 8); @@ -627,13 +613,13 @@ const HexWriter = struct { return (sum ^ 0xFF) +% 1; } - fn write(self: Record, file: File) File.WriteError!void { + fn write(self: Record, out: *File.Writer) !void { const linesep = "\r\n"; // colon, (length, address, type, payload, checksum) as hex, CRLF - const BUFSIZE = 1 + (1 + 2 + 1 + MAX_PAYLOAD_LEN + 1) * 2 + linesep.len; + const BUFSIZE = 1 + (1 + 2 + 1 + max_payload_len + 1) * 2 + linesep.len; var outbuf: [BUFSIZE]u8 = undefined; const payload_bytes = self.getPayloadBytes(); - assert(payload_bytes.len <= MAX_PAYLOAD_LEN); + assert(payload_bytes.len <= max_payload_len); const line = try std.fmt.bufPrint(&outbuf, ":{0X:0>2}{1X:0>4}{2X:0>2}{3X}{4X:0>2}" ++ linesep, .{ @as(u8, @intCast(payload_bytes.len)), @@ -642,38 +628,37 @@ const HexWriter = struct { payload_bytes, self.checksum(), }); - try file.writeAll(line); + try out.interface.writeAll(line); } }; - pub fn writeSegment(self: *HexWriter, segment: *const BinaryElfSegment, elf_file: File) !void { - var buf: [MAX_PAYLOAD_LEN]u8 = undefined; + pub fn writeSegment(self: *HexWriter, segment: *const BinaryElfSegment, in: *File.Reader) !void { + var buf: [max_payload_len]u8 = undefined; var bytes_read: usize = 0; while (bytes_read < segment.fileSize) { const row_address: u32 = @intCast(segment.physicalAddress + bytes_read); const remaining = segment.fileSize - bytes_read; - const to_read: usize = @intCast(@min(remaining, MAX_PAYLOAD_LEN)); - const did_read = try elf_file.preadAll(buf[0..to_read], segment.elfOffset + bytes_read); - if (did_read < to_read) return error.UnexpectedEOF; + const dest = buf[0..@min(remaining, max_payload_len)]; + try in.seekTo(segment.elfOffset + bytes_read); + try in.interface.readSliceAll(dest); + try self.writeDataRow(row_address, dest); - try self.writeDataRow(row_address, buf[0..did_read]); - - bytes_read += did_read; + bytes_read += dest.len; } } - fn writeDataRow(self: *HexWriter, address: u32, data: []const u8) File.WriteError!void { + fn writeDataRow(self: *HexWriter, address: u32, data: []const u8) !void { const record = Record.Data(address, data); if (address > 0xFFFF and (self.prev_addr == null or record.address != self.prev_addr.?)) { - try Record.Address(address).write(self.out_file); + try Record.Address(address).write(self.out); } - try record.write(self.out_file); + try record.write(self.out); self.prev_addr = @intCast(record.address + data.len); } - fn writeEOF(self: HexWriter) File.WriteError!void { - try Record.EOF().write(self.out_file); + fn writeEof(self: HexWriter) !void { + try Record.EOF().write(self.out); } }; @@ -686,9 +671,9 @@ fn containsValidAddressRange(segments: []*BinaryElfSegment) bool { return true; } -fn padFile(f: File, opt_size: ?u64) !void { +fn padFile(out: *File.Writer, opt_size: ?u64) !void { const size = opt_size orelse return; - try f.setEndPos(size); + try out.file.setEndPos(size); } test "HexWriter.Record.Address has correct payload and checksum" { @@ -732,836 +717,6 @@ test "containsValidAddressRange" { try std.testing.expect(containsValidAddressRange(&buf)); } -// ------------- -// ELF to ELF stripping - -const StripElfOptions = struct { - extract_to: ?[]const u8 = null, - add_debuglink: ?[]const u8 = null, - strip_all: bool = false, - strip_debug: bool = false, - only_keep_debug: bool = false, - compress_debug: bool = false, - add_section: ?AddSection, - set_section_alignment: ?SetSectionAlignment, - set_section_flags: ?SetSectionFlags, -}; - -fn stripElf( - allocator: Allocator, - in_file: File, - out_file: File, - elf_hdr: elf.Header, - options: StripElfOptions, -) !void { - const Filter = ElfFileHelper.Filter; - const DebugLink = ElfFileHelper.DebugLink; - - const filter: Filter = filter: { - if (options.only_keep_debug) break :filter .debug; - if (options.strip_all) break :filter .program; - if (options.strip_debug) break :filter .program_and_symbols; - break :filter .all; - }; - - const filter_complement: ?Filter = blk: { - if (options.extract_to) |_| { - break :blk switch (filter) { - .program => .debug_and_symbols, - .debug => .program_and_symbols, - .program_and_symbols => .debug, - .debug_and_symbols => .program, - .all => fatal("zig objcopy: nothing to extract", .{}), - }; - } else { - break :blk null; - } - }; - const debuglink_path = path: { - if (options.add_debuglink) |path| break :path path; - if (options.extract_to) |path| break :path path; - break :path null; - }; - - switch (elf_hdr.is_64) { - inline else => |is_64| { - var elf_file = try ElfFile(is_64).parse(allocator, in_file, elf_hdr); - defer elf_file.deinit(); - - if (options.add_section) |user_section| { - for (elf_file.sections) |section| { - if (std.mem.eql(u8, section.name, user_section.section_name)) { - fatal("zig objcopy: unable to add section '{s}'. Section already exists in input", .{user_section.section_name}); - } - } - } - - if (filter_complement) |flt| { - // write the .dbg file and close it, so it can be read back to compute the debuglink checksum. - const path = options.extract_to.?; - const dbg_file = std.fs.cwd().createFile(path, .{}) catch |err| { - fatal("zig objcopy: unable to create '{s}': {s}", .{ path, @errorName(err) }); - }; - defer dbg_file.close(); - - try elf_file.emit(allocator, dbg_file, in_file, .{ .section_filter = flt, .compress_debug = options.compress_debug }); - } - - const debuglink: ?DebugLink = if (debuglink_path) |path| ElfFileHelper.createDebugLink(path) else null; - try elf_file.emit(allocator, out_file, in_file, .{ - .section_filter = filter, - .debuglink = debuglink, - .compress_debug = options.compress_debug, - .add_section = options.add_section, - .set_section_alignment = options.set_section_alignment, - .set_section_flags = options.set_section_flags, - }); - }, - } -} - -// note: this is "a minimal effort implementation" -// It doesn't support all possibile elf files: some sections type may need fixups, the program header may need fix up, ... -// It was written for a specific use case (strip debug info to a sperate file, for linux 64-bits executables built with `zig` or `zig c++` ) -// It moves and reoders the sections as little as possible to avoid having to do fixups. -// TODO: support non-native endianess - -fn ElfFile(comptime is_64: bool) type { - const Elf_Ehdr = if (is_64) elf.Elf64_Ehdr else elf.Elf32_Ehdr; - const Elf_Phdr = if (is_64) elf.Elf64_Phdr else elf.Elf32_Phdr; - const Elf_Shdr = if (is_64) elf.Elf64_Shdr else elf.Elf32_Shdr; - const Elf_Chdr = if (is_64) elf.Elf64_Chdr else elf.Elf32_Chdr; - const Elf_Sym = if (is_64) elf.Elf64_Sym else elf.Elf32_Sym; - const Elf_OffSize = if (is_64) elf.Elf64_Off else elf.Elf32_Off; - - return struct { - raw_elf_header: Elf_Ehdr, - program_segments: []const Elf_Phdr, - sections: []const Section, - arena: std.heap.ArenaAllocator, - - const SectionCategory = ElfFileHelper.SectionCategory; - const section_memory_align: std.mem.Alignment = .of(Elf_Sym); // most restrictive of what we may load in memory - const Section = struct { - section: Elf_Shdr, - name: []const u8 = "", - segment: ?*const Elf_Phdr = null, // if the section is used by a program segment (there can be more than one) - payload: ?[]align(section_memory_align.toByteUnits()) const u8 = null, // if we need the data in memory - category: SectionCategory = .none, // should the section be kept in the exe or stripped to the debug database, or both. - }; - - const Self = @This(); - - pub fn parse(gpa: Allocator, in_file: File, header: elf.Header) !Self { - var arena = std.heap.ArenaAllocator.init(gpa); - errdefer arena.deinit(); - const allocator = arena.allocator(); - - var raw_header: Elf_Ehdr = undefined; - { - const bytes_read = try in_file.preadAll(std.mem.asBytes(&raw_header), 0); - if (bytes_read < @sizeOf(Elf_Ehdr)) - return error.TRUNCATED_ELF; - } - - // program header: list of segments - const program_segments = blk: { - if (@sizeOf(Elf_Phdr) != header.phentsize) - fatal("zig objcopy: unsupported ELF file, unexpected phentsize ({d})", .{header.phentsize}); - - const program_header = try allocator.alloc(Elf_Phdr, header.phnum); - const bytes_read = try in_file.preadAll(std.mem.sliceAsBytes(program_header), header.phoff); - if (bytes_read < @sizeOf(Elf_Phdr) * header.phnum) - return error.TRUNCATED_ELF; - break :blk program_header; - }; - - // section header - const sections = blk: { - if (@sizeOf(Elf_Shdr) != header.shentsize) - fatal("zig objcopy: unsupported ELF file, unexpected shentsize ({d})", .{header.shentsize}); - - const section_header = try allocator.alloc(Section, header.shnum); - - const raw_section_header = try allocator.alloc(Elf_Shdr, header.shnum); - defer allocator.free(raw_section_header); - const bytes_read = try in_file.preadAll(std.mem.sliceAsBytes(raw_section_header), header.shoff); - if (bytes_read < @sizeOf(Elf_Phdr) * header.shnum) - return error.TRUNCATED_ELF; - - for (section_header, raw_section_header) |*section, hdr| { - section.* = .{ .section = hdr }; - } - break :blk section_header; - }; - - // load data to memory for some sections: - // string tables for access - // sections than need modifications when other sections move. - for (sections, 0..) |*section, idx| { - const need_data = switch (section.section.sh_type) { - elf.DT_VERSYM => true, - elf.SHT_SYMTAB, elf.SHT_DYNSYM => true, - else => false, - }; - const need_strings = (idx == header.shstrndx); - - if (need_data or need_strings) { - const buffer = try allocator.alignedAlloc(u8, section_memory_align, @intCast(section.section.sh_size)); - const bytes_read = try in_file.preadAll(buffer, section.section.sh_offset); - if (bytes_read != section.section.sh_size) return error.TRUNCATED_ELF; - section.payload = buffer; - } - } - - // fill-in sections info: - // resolve the name - // find if a program segment uses the section - // categorize sections usage (used by program segments, debug datadase, common metadata, symbol table) - for (sections) |*section| { - section.segment = for (program_segments) |*seg| { - if (sectionWithinSegment(section.section, seg.*)) break seg; - } else null; - - if (section.section.sh_name != 0 and header.shstrndx != elf.SHN_UNDEF) - section.name = std.mem.span(@as([*:0]const u8, @ptrCast(§ions[header.shstrndx].payload.?[section.section.sh_name]))); - - const category_from_program: SectionCategory = if (section.segment != null) .exe else .debug; - section.category = switch (section.section.sh_type) { - elf.SHT_NOTE => .common, - elf.SHT_SYMTAB => .symbols, // "strip all" vs "strip only debug" - elf.SHT_DYNSYM => .exe, - elf.SHT_PROGBITS => cat: { - if (std.mem.eql(u8, section.name, ".comment")) break :cat .exe; - if (std.mem.eql(u8, section.name, ".gnu_debuglink")) break :cat .none; - break :cat category_from_program; - }, - elf.SHT_LOPROC...elf.SHT_HIPROC => .common, // don't strip unknown sections - elf.SHT_LOUSER...elf.SHT_HIUSER => .common, // don't strip unknown sections - else => category_from_program, - }; - } - - sections[0].category = .common; // mandatory null section - if (header.shstrndx != elf.SHN_UNDEF) - sections[header.shstrndx].category = .common; // string table for the headers - - // recursively propagate section categories to their linked sections, so that they are kept together - var dirty: u1 = 1; - while (dirty != 0) { - dirty = 0; - - for (sections) |*section| { - if (section.section.sh_link != elf.SHN_UNDEF) - dirty |= ElfFileHelper.propagateCategory(§ions[section.section.sh_link].category, section.category); - if ((section.section.sh_flags & elf.SHF_INFO_LINK) != 0 and section.section.sh_info != elf.SHN_UNDEF) - dirty |= ElfFileHelper.propagateCategory(§ions[section.section.sh_info].category, section.category); - } - } - - return Self{ - .arena = arena, - .raw_elf_header = raw_header, - .program_segments = program_segments, - .sections = sections, - }; - } - - pub fn deinit(self: *Self) void { - self.arena.deinit(); - } - - const Filter = ElfFileHelper.Filter; - const DebugLink = ElfFileHelper.DebugLink; - const EmitElfOptions = struct { - section_filter: Filter = .all, - debuglink: ?DebugLink = null, - compress_debug: bool = false, - add_section: ?AddSection = null, - set_section_alignment: ?SetSectionAlignment = null, - set_section_flags: ?SetSectionFlags = null, - }; - fn emit(self: *const Self, gpa: Allocator, out_file: File, in_file: File, options: EmitElfOptions) !void { - var arena = std.heap.ArenaAllocator.init(gpa); - defer arena.deinit(); - const allocator = arena.allocator(); - - // when emitting the stripped exe: - // - unused sections are removed - // when emitting the debug file: - // - all sections are kept, but some are emptied and their types is changed to SHT_NOBITS - // the program header is kept unchanged. (`strip` does update it, but `eu-strip` does not, and it still works) - - const Update = struct { - action: ElfFileHelper.Action, - - // remap the indexs after omitting the filtered sections - remap_idx: u16, - - // optionally overrides the payload from the source file - payload: ?[]align(section_memory_align.toByteUnits()) const u8 = null, - section: ?Elf_Shdr = null, - }; - const sections_update = try allocator.alloc(Update, self.sections.len); - const new_shnum = blk: { - var next_idx: u16 = 0; - for (self.sections, sections_update) |section, *update| { - const action = ElfFileHelper.selectAction(section.category, options.section_filter); - const remap_idx = idx: { - if (action == .strip) break :idx elf.SHN_UNDEF; - next_idx += 1; - break :idx next_idx - 1; - }; - update.* = Update{ .action = action, .remap_idx = remap_idx }; - } - - if (options.debuglink != null) - next_idx += 1; - - if (options.add_section != null) { - next_idx += 1; - } - - break :blk next_idx; - }; - - // add a ".gnu_debuglink" to the string table if needed - const debuglink_name: u32 = blk: { - if (options.debuglink == null) break :blk elf.SHN_UNDEF; - if (self.raw_elf_header.e_shstrndx == elf.SHN_UNDEF) - fatal("zig objcopy: no strtab, cannot add the debuglink section", .{}); // TODO add the section if needed? - - const strtab = &self.sections[self.raw_elf_header.e_shstrndx]; - const update = §ions_update[self.raw_elf_header.e_shstrndx]; - - const name: []const u8 = ".gnu_debuglink"; - const new_offset: u32 = @intCast(strtab.payload.?.len); - const buf = try allocator.alignedAlloc(u8, section_memory_align, new_offset + name.len + 1); - @memcpy(buf[0..new_offset], strtab.payload.?); - @memcpy(buf[new_offset..][0..name.len], name); - buf[new_offset + name.len] = 0; - - assert(update.action == .keep); - update.payload = buf; - - break :blk new_offset; - }; - - // add user section to the string table if needed - const user_section_name: u32 = blk: { - if (options.add_section == null) break :blk elf.SHN_UNDEF; - if (self.raw_elf_header.e_shstrndx == elf.SHN_UNDEF) - fatal("zig objcopy: no strtab, cannot add the user section", .{}); // TODO add the section if needed? - - const strtab = &self.sections[self.raw_elf_header.e_shstrndx]; - const update = §ions_update[self.raw_elf_header.e_shstrndx]; - - const name = options.add_section.?.section_name; - const new_offset: u32 = @intCast(strtab.payload.?.len); - const buf = try allocator.alignedAlloc(u8, section_memory_align, new_offset + name.len + 1); - @memcpy(buf[0..new_offset], strtab.payload.?); - @memcpy(buf[new_offset..][0..name.len], name); - buf[new_offset + name.len] = 0; - - assert(update.action == .keep); - update.payload = buf; - - break :blk new_offset; - }; - - // maybe compress .debug sections - if (options.compress_debug) { - for (self.sections[1..], sections_update[1..]) |section, *update| { - if (update.action != .keep) continue; - if (!std.mem.startsWith(u8, section.name, ".debug_")) continue; - if ((section.section.sh_flags & elf.SHF_COMPRESSED) != 0) continue; // already compressed - - const chdr = Elf_Chdr{ - .ch_type = elf.COMPRESS.ZLIB, - .ch_size = section.section.sh_size, - .ch_addralign = section.section.sh_addralign, - }; - - const compressed_payload = try ElfFileHelper.tryCompressSection(allocator, in_file, section.section.sh_offset, section.section.sh_size, std.mem.asBytes(&chdr)); - if (compressed_payload) |payload| { - update.payload = payload; - update.section = section.section; - update.section.?.sh_addralign = @alignOf(Elf_Chdr); - update.section.?.sh_size = @intCast(payload.len); - update.section.?.sh_flags |= elf.SHF_COMPRESSED; - } - } - } - - var cmdbuf = std.ArrayList(ElfFileHelper.WriteCmd).init(allocator); - defer cmdbuf.deinit(); - try cmdbuf.ensureUnusedCapacity(3 + new_shnum); - var eof_offset: Elf_OffSize = 0; // track the end of the data written so far. - - // build the updated headers - // nb: updated_elf_header will be updated before the actual write - var updated_elf_header = self.raw_elf_header; - if (updated_elf_header.e_shstrndx != elf.SHN_UNDEF) - updated_elf_header.e_shstrndx = sections_update[updated_elf_header.e_shstrndx].remap_idx; - cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = std.mem.asBytes(&updated_elf_header), .out_offset = 0 } }); - eof_offset = @sizeOf(Elf_Ehdr); - - // program header as-is. - // nb: for only-debug files, removing it appears to work, but is invalid by ELF specifcation. - { - assert(updated_elf_header.e_phoff == @sizeOf(Elf_Ehdr)); - const data = std.mem.sliceAsBytes(self.program_segments); - assert(data.len == @as(usize, updated_elf_header.e_phentsize) * updated_elf_header.e_phnum); - cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = data, .out_offset = updated_elf_header.e_phoff } }); - eof_offset = updated_elf_header.e_phoff + @as(Elf_OffSize, @intCast(data.len)); - } - - // update sections and queue payload writes - const updated_section_header = blk: { - const dest_sections = try allocator.alloc(Elf_Shdr, new_shnum); - - { - // the ELF format doesn't specify the order for all sections. - // this code only supports when they are in increasing file order. - var offset: u64 = eof_offset; - for (self.sections[1..]) |section| { - if (section.section.sh_type == elf.SHT_NOBITS) - continue; - if (section.section.sh_offset < offset) { - fatal("zig objcopy: unsupported ELF file", .{}); - } - offset = section.section.sh_offset; - } - } - - dest_sections[0] = self.sections[0].section; - - var dest_section_idx: u32 = 1; - for (self.sections[1..], sections_update[1..]) |section, update| { - if (update.action == .strip) continue; - assert(update.remap_idx == dest_section_idx); - - const src = if (update.section) |*s| s else §ion.section; - const dest = &dest_sections[dest_section_idx]; - const payload = if (update.payload) |data| data else section.payload; - dest_section_idx += 1; - - dest.* = src.*; - - if (src.sh_link != elf.SHN_UNDEF) - dest.sh_link = sections_update[src.sh_link].remap_idx; - if ((src.sh_flags & elf.SHF_INFO_LINK) != 0 and src.sh_info != elf.SHN_UNDEF) - dest.sh_info = sections_update[src.sh_info].remap_idx; - - if (payload) |data| - dest.sh_size = @intCast(data.len); - - const addralign = if (src.sh_addralign == 0 or dest.sh_type == elf.SHT_NOBITS) 1 else src.sh_addralign; - dest.sh_offset = std.mem.alignForward(Elf_OffSize, eof_offset, addralign); - if (src.sh_offset != dest.sh_offset and section.segment != null and update.action != .empty and dest.sh_type != elf.SHT_NOTE and dest.sh_type != elf.SHT_NOBITS) { - if (src.sh_offset > dest.sh_offset) { - dest.sh_offset = src.sh_offset; // add padding to avoid modifing the program segments - } else { - fatal("zig objcopy: cannot adjust program segments", .{}); - } - } - assert(dest.sh_addr % addralign == dest.sh_offset % addralign); - - if (update.action == .empty) - dest.sh_type = elf.SHT_NOBITS; - - if (dest.sh_type != elf.SHT_NOBITS) { - if (payload) |src_data| { - // update sections payload and write - const dest_data = switch (src.sh_type) { - elf.DT_VERSYM => dst_data: { - const data = try allocator.alignedAlloc(u8, section_memory_align, src_data.len); - @memcpy(data, src_data); - - const defs = @as([*]elf.Verdef, @ptrCast(data))[0 .. @as(usize, @intCast(src.sh_size)) / @sizeOf(elf.Verdef)]; - for (defs) |*def| switch (def.ndx) { - .LOCAL, .GLOBAL => {}, - else => def.ndx = @enumFromInt(sections_update[src.sh_info].remap_idx), - }; - - break :dst_data data; - }, - elf.SHT_SYMTAB, elf.SHT_DYNSYM => dst_data: { - const data = try allocator.alignedAlloc(u8, section_memory_align, src_data.len); - @memcpy(data, src_data); - - const syms = @as([*]Elf_Sym, @ptrCast(data))[0 .. @as(usize, @intCast(src.sh_size)) / @sizeOf(Elf_Sym)]; - for (syms) |*sym| { - if (sym.st_shndx != elf.SHN_UNDEF and sym.st_shndx < elf.SHN_LORESERVE) - sym.st_shndx = sections_update[sym.st_shndx].remap_idx; - } - - break :dst_data data; - }, - else => src_data, - }; - - assert(dest_data.len == dest.sh_size); - cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = dest_data, .out_offset = dest.sh_offset } }); - eof_offset = dest.sh_offset + dest.sh_size; - } else { - // direct contents copy - cmdbuf.appendAssumeCapacity(.{ .copy_range = .{ .in_offset = src.sh_offset, .len = dest.sh_size, .out_offset = dest.sh_offset } }); - eof_offset = dest.sh_offset + dest.sh_size; - } - } else { - // account for alignment padding even in empty sections to keep logical section order - eof_offset = dest.sh_offset; - } - } - - // add a ".gnu_debuglink" section - if (options.debuglink) |link| { - const payload = payload: { - const crc_offset = std.mem.alignForward(usize, link.name.len + 1, 4); - const buf = try allocator.alignedAlloc(u8, .@"4", crc_offset + 4); - @memcpy(buf[0..link.name.len], link.name); - @memset(buf[link.name.len..crc_offset], 0); - @memcpy(buf[crc_offset..], std.mem.asBytes(&link.crc32)); - break :payload buf; - }; - - dest_sections[dest_section_idx] = Elf_Shdr{ - .sh_name = debuglink_name, - .sh_type = elf.SHT_PROGBITS, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = eof_offset, - .sh_size = @intCast(payload.len), - .sh_link = elf.SHN_UNDEF, - .sh_info = elf.SHN_UNDEF, - .sh_addralign = 4, - .sh_entsize = 0, - }; - dest_section_idx += 1; - - cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = payload, .out_offset = eof_offset } }); - eof_offset += @as(Elf_OffSize, @intCast(payload.len)); - } - - // --add-section - if (options.add_section) |add_section| { - var section_file = fs.cwd().openFile(add_section.file_path, .{}) catch |err| - fatal("unable to open '{s}': {s}", .{ add_section.file_path, @errorName(err) }); - defer section_file.close(); - - const payload = try section_file.readToEndAlloc(arena.allocator(), std.math.maxInt(usize)); - - dest_sections[dest_section_idx] = Elf_Shdr{ - .sh_name = user_section_name, - .sh_type = elf.SHT_PROGBITS, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = eof_offset, - .sh_size = @intCast(payload.len), - .sh_link = elf.SHN_UNDEF, - .sh_info = elf.SHN_UNDEF, - .sh_addralign = 4, - .sh_entsize = 0, - }; - dest_section_idx += 1; - - cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = payload, .out_offset = eof_offset } }); - eof_offset += @as(Elf_OffSize, @intCast(payload.len)); - } - - assert(dest_section_idx == new_shnum); - break :blk dest_sections; - }; - - // --set-section-alignment: overwrite alignment - if (options.set_section_alignment) |set_align| { - if (self.raw_elf_header.e_shstrndx == elf.SHN_UNDEF) - fatal("zig objcopy: no strtab, cannot add the user section", .{}); // TODO add the section if needed? - - const strtab = §ions_update[self.raw_elf_header.e_shstrndx]; - for (updated_section_header) |*section| { - const section_name = std.mem.span(@as([*:0]const u8, @ptrCast(&strtab.payload.?[section.sh_name]))); - if (std.mem.eql(u8, section_name, set_align.section_name)) { - section.sh_addralign = set_align.alignment; - break; - } - } else std.log.warn("Skipping --set-section-alignment. Section '{s}' not found", .{set_align.section_name}); - } - - // --set-section-flags: overwrite flags - if (options.set_section_flags) |set_flags| { - if (self.raw_elf_header.e_shstrndx == elf.SHN_UNDEF) - fatal("zig objcopy: no strtab, cannot add the user section", .{}); // TODO add the section if needed? - - const strtab = §ions_update[self.raw_elf_header.e_shstrndx]; - for (updated_section_header) |*section| { - const section_name = std.mem.span(@as([*:0]const u8, @ptrCast(&strtab.payload.?[section.sh_name]))); - if (std.mem.eql(u8, section_name, set_flags.section_name)) { - section.sh_flags = std.elf.SHF_WRITE; // default is writable cleared by "readonly" - const f = set_flags.flags; - - // Supporting a subset of GNU and LLVM objcopy for ELF only - // GNU: - // alloc: add SHF_ALLOC - // contents: if section is SHT_NOBITS, set SHT_PROGBITS, otherwise do nothing - // load: if section is SHT_NOBITS, set SHT_PROGBITS, otherwise do nothing (same as contents) - // noload: not ELF relevant - // readonly: clear default SHF_WRITE flag - // code: add SHF_EXECINSTR - // data: not ELF relevant - // rom: ignored - // exclude: add SHF_EXCLUDE - // share: not ELF relevant - // debug: not ELF relevant - // large: add SHF_X86_64_LARGE. Fatal error if target is not x86_64 - if (f.alloc) section.sh_flags |= std.elf.SHF_ALLOC; - if (f.contents or f.load) { - if (section.sh_type == std.elf.SHT_NOBITS) section.sh_type = std.elf.SHT_PROGBITS; - } - if (f.readonly) section.sh_flags &= ~@as(@TypeOf(section.sh_type), std.elf.SHF_WRITE); - if (f.code) section.sh_flags |= std.elf.SHF_EXECINSTR; - if (f.exclude) section.sh_flags |= std.elf.SHF_EXCLUDE; - if (f.large) { - if (updated_elf_header.e_machine != std.elf.EM.X86_64) - fatal("zig objcopy: 'large' section flag is only supported on x86_64 targets", .{}); - section.sh_flags |= std.elf.SHF_X86_64_LARGE; - } - - // LLVM: - // merge: add SHF_MERGE - // strings: add SHF_STRINGS - if (f.merge) section.sh_flags |= std.elf.SHF_MERGE; - if (f.strings) section.sh_flags |= std.elf.SHF_STRINGS; - break; - } - } else std.log.warn("Skipping --set-section-flags. Section '{s}' not found", .{set_flags.section_name}); - } - - // write the section header at the tail - { - const offset = std.mem.alignForward(Elf_OffSize, eof_offset, @alignOf(Elf_Shdr)); - - const data = std.mem.sliceAsBytes(updated_section_header); - assert(data.len == @as(usize, updated_elf_header.e_shentsize) * new_shnum); - updated_elf_header.e_shoff = offset; - updated_elf_header.e_shnum = new_shnum; - - cmdbuf.appendAssumeCapacity(.{ .write_data = .{ .data = data, .out_offset = updated_elf_header.e_shoff } }); - } - - try ElfFileHelper.write(allocator, out_file, in_file, cmdbuf.items); - } - - fn sectionWithinSegment(section: Elf_Shdr, segment: Elf_Phdr) bool { - const file_size = if (section.sh_type == elf.SHT_NOBITS) 0 else section.sh_size; - return segment.p_offset <= section.sh_offset and (segment.p_offset + segment.p_filesz) >= (section.sh_offset + file_size); - } - }; -} - -const ElfFileHelper = struct { - const DebugLink = struct { name: []const u8, crc32: u32 }; - const Filter = enum { all, program, debug, program_and_symbols, debug_and_symbols }; - - const SectionCategory = enum { common, exe, debug, symbols, none }; - fn propagateCategory(cur: *SectionCategory, new: SectionCategory) u1 { - const cat: SectionCategory = switch (cur.*) { - .none => new, - .common => .common, - .debug => switch (new) { - .none, .debug => .debug, - else => new, - }, - .exe => switch (new) { - .common => .common, - .none, .debug, .exe => .exe, - .symbols => .exe, - }, - .symbols => switch (new) { - .none, .common, .debug, .exe => unreachable, - .symbols => .symbols, - }, - }; - - if (cur.* != cat) { - cur.* = cat; - return 1; - } else { - return 0; - } - } - - const Action = enum { keep, strip, empty }; - fn selectAction(category: SectionCategory, filter: Filter) Action { - if (category == .none) return .strip; - return switch (filter) { - .all => switch (category) { - .none => .strip, - else => .keep, - }, - .program => switch (category) { - .common, .exe => .keep, - else => .strip, - }, - .program_and_symbols => switch (category) { - .common, .exe, .symbols => .keep, - else => .strip, - }, - .debug => switch (category) { - .exe, .symbols => .empty, - .none => .strip, - else => .keep, - }, - .debug_and_symbols => switch (category) { - .exe => .empty, - .none => .strip, - else => .keep, - }, - }; - } - - const WriteCmd = union(enum) { - copy_range: struct { in_offset: u64, len: u64, out_offset: u64 }, - write_data: struct { data: []const u8, out_offset: u64 }, - }; - fn write(allocator: Allocator, out_file: File, in_file: File, cmds: []const WriteCmd) !void { - // consolidate holes between writes: - // by coping original padding data from in_file (by fusing contiguous ranges) - // by writing zeroes otherwise - const zeroes = [1]u8{0} ** 4096; - var consolidated = std.ArrayList(WriteCmd).init(allocator); - defer consolidated.deinit(); - try consolidated.ensureUnusedCapacity(cmds.len * 2); - var offset: u64 = 0; - var fused_cmd: ?WriteCmd = null; - for (cmds) |cmd| { - switch (cmd) { - .write_data => |data| { - assert(data.out_offset >= offset); - if (fused_cmd) |prev| { - consolidated.appendAssumeCapacity(prev); - fused_cmd = null; - } - if (data.out_offset > offset) { - consolidated.appendAssumeCapacity(.{ .write_data = .{ .data = zeroes[0..@intCast(data.out_offset - offset)], .out_offset = offset } }); - } - consolidated.appendAssumeCapacity(cmd); - offset = data.out_offset + data.data.len; - }, - .copy_range => |range| { - assert(range.out_offset >= offset); - if (fused_cmd) |prev| { - if (range.in_offset >= prev.copy_range.in_offset + prev.copy_range.len and (range.out_offset - prev.copy_range.out_offset == range.in_offset - prev.copy_range.in_offset)) { - fused_cmd = .{ .copy_range = .{ - .in_offset = prev.copy_range.in_offset, - .out_offset = prev.copy_range.out_offset, - .len = (range.out_offset + range.len) - prev.copy_range.out_offset, - } }; - } else { - consolidated.appendAssumeCapacity(prev); - if (range.out_offset > offset) { - consolidated.appendAssumeCapacity(.{ .write_data = .{ .data = zeroes[0..@intCast(range.out_offset - offset)], .out_offset = offset } }); - } - fused_cmd = cmd; - } - } else { - fused_cmd = cmd; - } - offset = range.out_offset + range.len; - }, - } - } - if (fused_cmd) |cmd| { - consolidated.appendAssumeCapacity(cmd); - } - - // write the output file - for (consolidated.items) |cmd| { - switch (cmd) { - .write_data => |data| { - var iovec = [_]std.posix.iovec_const{.{ .base = data.data.ptr, .len = data.data.len }}; - try out_file.pwritevAll(&iovec, data.out_offset); - }, - .copy_range => |range| { - const copied_bytes = try in_file.copyRangeAll(range.in_offset, out_file, range.out_offset, range.len); - if (copied_bytes < range.len) return error.TRUNCATED_ELF; - }, - } - } - } - - fn tryCompressSection(allocator: Allocator, in_file: File, offset: u64, size: u64, prefix: []const u8) !?[]align(8) const u8 { - if (size < prefix.len) return null; - - try in_file.seekTo(offset); - var section_reader = std.io.limitedReader(in_file.deprecatedReader(), size); - - // allocate as large as decompressed data. if the compression doesn't fit, keep the data uncompressed. - const compressed_data = try allocator.alignedAlloc(u8, .@"8", @intCast(size)); - var compressed_stream = std.io.fixedBufferStream(compressed_data); - - try compressed_stream.writer().writeAll(prefix); - - { - var compressor = try std.compress.zlib.compressor(compressed_stream.writer(), .{}); - - var buf: [8000]u8 = undefined; - while (true) { - const bytes_read = try section_reader.read(&buf); - if (bytes_read == 0) break; - const bytes_written = compressor.write(buf[0..bytes_read]) catch |err| switch (err) { - error.NoSpaceLeft => { - allocator.free(compressed_data); - return null; - }, - else => return err, - }; - std.debug.assert(bytes_written == bytes_read); - } - compressor.finish() catch |err| switch (err) { - error.NoSpaceLeft => { - allocator.free(compressed_data); - return null; - }, - else => return err, - }; - } - - const compressed_len: usize = @intCast(compressed_stream.getPos() catch unreachable); - const data = allocator.realloc(compressed_data, compressed_len) catch compressed_data; - return data[0..compressed_len]; - } - - fn createDebugLink(path: []const u8) DebugLink { - const file = std.fs.cwd().openFile(path, .{}) catch |err| { - fatal("zig objcopy: could not open `{s}`: {s}\n", .{ path, @errorName(err) }); - }; - defer file.close(); - - const crc = ElfFileHelper.computeFileCrc(file) catch |err| { - fatal("zig objcopy: could not read `{s}`: {s}\n", .{ path, @errorName(err) }); - }; - return .{ - .name = std.fs.path.basename(path), - .crc32 = crc, - }; - } - - fn computeFileCrc(file: File) !u32 { - var buf: [8000]u8 = undefined; - - try file.seekTo(0); - var hasher = std.hash.Crc32.init(); - while (true) { - const bytes_read = try file.read(&buf); - if (bytes_read == 0) break; - hasher.update(buf[0..bytes_read]); - } - return hasher.final(); - } -}; - const SectionFlags = packed struct { alloc: bool = false, contents: bool = false, diff --git a/lib/compiler/std-docs.zig b/lib/compiler/std-docs.zig index b5bc742717..74a9c65849 100644 --- a/lib/compiler/std-docs.zig +++ b/lib/compiler/std-docs.zig @@ -60,7 +60,9 @@ pub fn main() !void { const should_open_browser = force_open_browser orelse (listen_port == 0); const address = std.net.Address.parseIp("127.0.0.1", listen_port) catch unreachable; - var http_server = try address.listen(.{}); + var http_server = try address.listen(.{ + .reuse_address = true, + }); const port = http_server.listen_address.in.getPort(); const url_with_newline = try std.fmt.allocPrint(arena, "http://127.0.0.1:{d}/\n", .{port}); std.fs.File.stdout().writeAll(url_with_newline) catch {}; @@ -189,7 +191,11 @@ fn serveSourcesTar(request: *std.http.Server.Request, context: *Context) !void { var walker = try std_dir.walk(gpa); defer walker.deinit(); - var archiver = std.tar.writer(response.writer()); + var adapter_buffer: [500]u8 = undefined; + var response_writer = response.writer().adaptToNewApi(); + response_writer.new_interface.buffer = &adapter_buffer; + + var archiver: std.tar.Writer = .{ .underlying_writer = &response_writer.new_interface }; archiver.prefix = "std"; while (try walker.next()) |entry| { @@ -204,7 +210,13 @@ fn serveSourcesTar(request: *std.http.Server.Request, context: *Context) !void { } var file = try entry.dir.openFile(entry.basename, .{}); defer file.close(); - try archiver.writeFile(entry.path, file); + const stat = try file.stat(); + var file_reader: std.fs.File.Reader = .{ + .file = file, + .interface = std.fs.File.Reader.initInterface(&.{}), + .size = stat.size, + }; + try archiver.writeFile(entry.path, &file_reader, stat.mtime); } { @@ -217,6 +229,7 @@ fn serveSourcesTar(request: *std.http.Server.Request, context: *Context) !void { // intentionally omitting the pointless trailer //try archiver.finish(); + try response_writer.new_interface.flush(); try response.end(); } @@ -307,21 +320,17 @@ fn buildWasmBinary( try sendMessage(child.stdin.?, .update); try sendMessage(child.stdin.?, .exit); - const Header = std.zig.Server.Message.Header; var result: ?Cache.Path = null; var result_error_bundle = std.zig.ErrorBundle.empty; - const stdout = poller.fifo(.stdout); + const stdout = poller.reader(.stdout); poll: while (true) { - while (stdout.readableLength() < @sizeOf(Header)) { - if (!(try poller.poll())) break :poll; - } - const header = stdout.reader().readStruct(Header) catch unreachable; - while (stdout.readableLength() < header.bytes_len) { - if (!(try poller.poll())) break :poll; - } - const body = stdout.readableSliceOfLen(header.bytes_len); + const Header = std.zig.Server.Message.Header; + while (stdout.buffered().len < @sizeOf(Header)) if (!try poller.poll()) break :poll; + const header = stdout.takeStruct(Header, .little) catch unreachable; + while (stdout.buffered().len < header.bytes_len) if (!try poller.poll()) break :poll; + const body = stdout.take(header.bytes_len) catch unreachable; switch (header.tag) { .zig_version => { @@ -361,15 +370,11 @@ fn buildWasmBinary( }, else => {}, // ignore other messages } - - stdout.discard(body.len); } - const stderr = poller.fifo(.stderr); - if (stderr.readableLength() > 0) { - const owned_stderr = try stderr.toOwnedSlice(); - defer gpa.free(owned_stderr); - std.debug.print("{s}", .{owned_stderr}); + const stderr = poller.reader(.stderr); + if (stderr.bufferedLen() > 0) { + std.debug.print("{s}", .{stderr.buffered()}); } // Send EOF to stdin. diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index 8b60a75399..e618f72d2f 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -16,6 +16,7 @@ var stdin_buffer: [4096]u8 = undefined; var stdout_buffer: [4096]u8 = undefined; const crippled = switch (builtin.zig_backend) { + .stage2_aarch64, .stage2_powerpc, .stage2_riscv64, => true, @@ -287,13 +288,14 @@ pub fn log( /// work-in-progress backends can handle it. pub fn mainSimple() anyerror!void { @disableInstrumentation(); - // is the backend capable of printing to stderr? - const enable_print = switch (builtin.zig_backend) { + // is the backend capable of calling `std.fs.File.writeAll`? + const enable_write = switch (builtin.zig_backend) { + .stage2_aarch64, .stage2_riscv64 => true, else => false, }; - // is the backend capable of using std.fmt.format to print a summary at the end? - const print_summary = switch (builtin.zig_backend) { - .stage2_riscv64 => true, + // is the backend capable of calling `std.Io.Writer.print`? + const enable_print = switch (builtin.zig_backend) { + .stage2_aarch64, .stage2_riscv64 => true, else => false, }; @@ -302,34 +304,31 @@ pub fn mainSimple() anyerror!void { var failed: u64 = 0; // we don't want to bring in File and Writer if the backend doesn't support it - const stderr = if (comptime enable_print) std.fs.File.stderr() else {}; + const stdout = if (enable_write) std.fs.File.stdout() else {}; for (builtin.test_functions) |test_fn| { + if (enable_write) { + stdout.writeAll(test_fn.name) catch {}; + stdout.writeAll("... ") catch {}; + } if (test_fn.func()) |_| { - if (enable_print) { - stderr.writeAll(test_fn.name) catch {}; - stderr.writeAll("... ") catch {}; - stderr.writeAll("PASS\n") catch {}; - } + if (enable_write) stdout.writeAll("PASS\n") catch {}; } else |err| { - if (enable_print) { - stderr.writeAll(test_fn.name) catch {}; - stderr.writeAll("... ") catch {}; - } if (err != error.SkipZigTest) { - if (enable_print) stderr.writeAll("FAIL\n") catch {}; + if (enable_write) stdout.writeAll("FAIL\n") catch {}; failed += 1; - if (!enable_print) return err; + if (!enable_write) return err; continue; } - if (enable_print) stderr.writeAll("SKIP\n") catch {}; + if (enable_write) stdout.writeAll("SKIP\n") catch {}; skipped += 1; continue; } passed += 1; } - if (enable_print and print_summary) { - stderr.deprecatedWriter().print("{} passed, {} skipped, {} failed\n", .{ passed, skipped, failed }) catch {}; + if (enable_print) { + var stdout_writer = stdout.writer(&.{}); + stdout_writer.interface.print("{} passed, {} skipped, {} failed\n", .{ passed, skipped, failed }) catch {}; } if (failed != 0) std.process.exit(1); } diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig index 46db464fd9..b8723c56ee 100644 --- a/lib/compiler_rt.zig +++ b/lib/compiler_rt.zig @@ -249,12 +249,12 @@ comptime { _ = @import("compiler_rt/hexagon.zig"); if (@import("builtin").object_format != .c) { - _ = @import("compiler_rt/atomics.zig"); + if (builtin.zig_backend != .stage2_aarch64) _ = @import("compiler_rt/atomics.zig"); _ = @import("compiler_rt/stack_probe.zig"); // macOS has these functions inside libSystem. if (builtin.cpu.arch.isAARCH64() and !builtin.os.tag.isDarwin()) { - _ = @import("compiler_rt/aarch64_outline_atomics.zig"); + if (builtin.zig_backend != .stage2_aarch64) _ = @import("compiler_rt/aarch64_outline_atomics.zig"); } _ = @import("compiler_rt/memcpy.zig"); diff --git a/lib/compiler_rt/addo.zig b/lib/compiler_rt/addo.zig index beb6249223..610d620690 100644 --- a/lib/compiler_rt/addo.zig +++ b/lib/compiler_rt/addo.zig @@ -1,6 +1,4 @@ const std = @import("std"); -const builtin = @import("builtin"); -const is_test = builtin.is_test; const common = @import("./common.zig"); pub const panic = @import("common.zig").panic; @@ -16,7 +14,7 @@ comptime { // - addoXi4_generic as default inline fn addoXi4_generic(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); overflow.* = 0; const sum: ST = a +% b; // Hackers Delight: section Overflow Detection, subsection Signed Add/Subtract diff --git a/lib/compiler_rt/addoti4_test.zig b/lib/compiler_rt/addoti4_test.zig index dc85830df9..d031d1d428 100644 --- a/lib/compiler_rt/addoti4_test.zig +++ b/lib/compiler_rt/addoti4_test.zig @@ -1,4 +1,5 @@ const addv = @import("addo.zig"); +const builtin = @import("builtin"); const std = @import("std"); const testing = std.testing; const math = std.math; @@ -23,6 +24,8 @@ fn simple_addoti4(a: i128, b: i128, overflow: *c_int) i128 { } test "addoti4" { + if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; + const min: i128 = math.minInt(i128); const max: i128 = math.maxInt(i128); var i: i128 = 1; diff --git a/lib/compiler_rt/clear_cache.zig b/lib/compiler_rt/clear_cache.zig index e4a0a9d00d..c43d35602c 100644 --- a/lib/compiler_rt/clear_cache.zig +++ b/lib/compiler_rt/clear_cache.zig @@ -97,8 +97,7 @@ fn clear_cache(start: usize, end: usize) callconv(.c) void { .nbytes = end - start, .whichcache = 3, // ICACHE | DCACHE }; - asm volatile ( - \\ syscall + asm volatile ("syscall" : : [_] "{$2}" (165), // nr = SYS_sysarch [_] "{$4}" (0), // op = MIPS_CACHEFLUSH @@ -116,11 +115,8 @@ fn clear_cache(start: usize, end: usize) callconv(.c) void { } else if (arm64 and !apple) { // Get Cache Type Info. // TODO memoize this? - var ctr_el0: u64 = 0; - asm volatile ( - \\mrs %[x], ctr_el0 - \\ - : [x] "=r" (ctr_el0), + const ctr_el0 = asm volatile ("mrs %[ctr_el0], ctr_el0" + : [ctr_el0] "=r" (-> u64), ); // The DC and IC instructions must use 64-bit registers so we don't use // uintptr_t in case this runs in an IPL32 environment. @@ -187,9 +183,7 @@ fn clear_cache(start: usize, end: usize) callconv(.c) void { exportIt(); } else if (os == .linux and loongarch) { // See: https://github.com/llvm/llvm-project/blob/cf54cae26b65fc3201eff7200ffb9b0c9e8f9a13/compiler-rt/lib/builtins/clear_cache.c#L94-L95 - asm volatile ( - \\ ibar 0 - ); + asm volatile ("ibar 0"); exportIt(); } diff --git a/lib/compiler_rt/cmp.zig b/lib/compiler_rt/cmp.zig index e1273aa622..67cb5b0938 100644 --- a/lib/compiler_rt/cmp.zig +++ b/lib/compiler_rt/cmp.zig @@ -1,6 +1,5 @@ const std = @import("std"); const builtin = @import("builtin"); -const is_test = builtin.is_test; const common = @import("common.zig"); pub const panic = common.panic; diff --git a/lib/compiler_rt/common.zig b/lib/compiler_rt/common.zig index f5423019f1..1160b1c718 100644 --- a/lib/compiler_rt/common.zig +++ b/lib/compiler_rt/common.zig @@ -102,9 +102,14 @@ pub const gnu_f16_abi = switch (builtin.cpu.arch) { pub const want_sparc_abi = builtin.cpu.arch.isSPARC(); +pub const test_safety = switch (builtin.zig_backend) { + .stage2_aarch64 => false, + else => builtin.is_test, +}; + // Avoid dragging in the runtime safety mechanisms into this .o file, unless // we're trying to test compiler-rt. -pub const panic = if (builtin.is_test) std.debug.FullPanic(std.debug.defaultPanic) else std.debug.no_panic; +pub const panic = if (test_safety) std.debug.FullPanic(std.debug.defaultPanic) else std.debug.no_panic; /// This seems to mostly correspond to `clang::TargetInfo::HasFloat16`. pub fn F16T(comptime OtherType: type) type { diff --git a/lib/compiler_rt/comparedf2_test.zig b/lib/compiler_rt/comparedf2_test.zig index 9444c6adf7..dbae6bbeec 100644 --- a/lib/compiler_rt/comparedf2_test.zig +++ b/lib/compiler_rt/comparedf2_test.zig @@ -4,7 +4,6 @@ const std = @import("std"); const builtin = @import("builtin"); -const is_test = builtin.is_test; const __eqdf2 = @import("./cmpdf2.zig").__eqdf2; const __ledf2 = @import("./cmpdf2.zig").__ledf2; diff --git a/lib/compiler_rt/comparesf2_test.zig b/lib/compiler_rt/comparesf2_test.zig index 40b1324cfa..65e78da99e 100644 --- a/lib/compiler_rt/comparesf2_test.zig +++ b/lib/compiler_rt/comparesf2_test.zig @@ -4,7 +4,6 @@ const std = @import("std"); const builtin = @import("builtin"); -const is_test = builtin.is_test; const __eqsf2 = @import("./cmpsf2.zig").__eqsf2; const __lesf2 = @import("./cmpsf2.zig").__lesf2; diff --git a/lib/compiler_rt/count0bits.zig b/lib/compiler_rt/count0bits.zig index c9bdfb7c23..874604eb2c 100644 --- a/lib/compiler_rt/count0bits.zig +++ b/lib/compiler_rt/count0bits.zig @@ -1,6 +1,5 @@ const std = @import("std"); const builtin = @import("builtin"); -const is_test = builtin.is_test; const common = @import("common.zig"); pub const panic = common.panic; diff --git a/lib/compiler_rt/divdf3.zig b/lib/compiler_rt/divdf3.zig index 0340404a69..7b47cd3a70 100644 --- a/lib/compiler_rt/divdf3.zig +++ b/lib/compiler_rt/divdf3.zig @@ -5,7 +5,6 @@ const std = @import("std"); const builtin = @import("builtin"); const arch = builtin.cpu.arch; -const is_test = builtin.is_test; const common = @import("common.zig"); const normalize = common.normalize; diff --git a/lib/compiler_rt/divmodei4.zig b/lib/compiler_rt/divmodei4.zig index 3f12e8697d..ab11452206 100644 --- a/lib/compiler_rt/divmodei4.zig +++ b/lib/compiler_rt/divmodei4.zig @@ -34,7 +34,7 @@ fn divmod(q: ?[]u32, r: ?[]u32, u: []u32, v: []u32) !void { } pub fn __divei4(q_p: [*]u8, u_p: [*]u8, v_p: [*]u8, bits: usize) callconv(.c) void { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits)); const q: []u32 = @ptrCast(@alignCast(q_p[0..byte_size])); const u: []u32 = @ptrCast(@alignCast(u_p[0..byte_size])); @@ -43,7 +43,7 @@ pub fn __divei4(q_p: [*]u8, u_p: [*]u8, v_p: [*]u8, bits: usize) callconv(.c) vo } pub fn __modei4(r_p: [*]u8, u_p: [*]u8, v_p: [*]u8, bits: usize) callconv(.c) void { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits)); const r: []u32 = @ptrCast(@alignCast(r_p[0..byte_size])); const u: []u32 = @ptrCast(@alignCast(u_p[0..byte_size])); diff --git a/lib/compiler_rt/fixint_test.zig b/lib/compiler_rt/fixint_test.zig index 57b4093809..198167ab86 100644 --- a/lib/compiler_rt/fixint_test.zig +++ b/lib/compiler_rt/fixint_test.zig @@ -1,4 +1,3 @@ -const is_test = @import("builtin").is_test; const std = @import("std"); const math = std.math; const testing = std.testing; diff --git a/lib/compiler_rt/int.zig b/lib/compiler_rt/int.zig index 4a89d0799d..16c504ee66 100644 --- a/lib/compiler_rt/int.zig +++ b/lib/compiler_rt/int.zig @@ -6,7 +6,6 @@ const testing = std.testing; const maxInt = std.math.maxInt; const minInt = std.math.minInt; const arch = builtin.cpu.arch; -const is_test = builtin.is_test; const common = @import("common.zig"); const udivmod = @import("udivmod.zig").udivmod; const __divti3 = @import("divti3.zig").__divti3; diff --git a/lib/compiler_rt/memcpy.zig b/lib/compiler_rt/memcpy.zig index 30971677ab..424e92954d 100644 --- a/lib/compiler_rt/memcpy.zig +++ b/lib/compiler_rt/memcpy.zig @@ -11,7 +11,7 @@ comptime { .visibility = common.visibility, }; - if (builtin.mode == .ReleaseSmall) + if (builtin.mode == .ReleaseSmall or builtin.zig_backend == .stage2_aarch64) @export(&memcpySmall, export_options) else @export(&memcpyFast, export_options); @@ -195,6 +195,8 @@ inline fn copyRange4( } test "memcpy" { + if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; + const S = struct { fn testFunc(comptime copy_func: anytype) !void { const max_len = 1024; diff --git a/lib/compiler_rt/memmove.zig b/lib/compiler_rt/memmove.zig index 71289a50ae..46c5a631cb 100644 --- a/lib/compiler_rt/memmove.zig +++ b/lib/compiler_rt/memmove.zig @@ -14,7 +14,7 @@ comptime { .visibility = common.visibility, }; - if (builtin.mode == .ReleaseSmall) + if (builtin.mode == .ReleaseSmall or builtin.zig_backend == .stage2_aarch64) @export(&memmoveSmall, export_options) else @export(&memmoveFast, export_options); @@ -39,7 +39,7 @@ fn memmoveSmall(opt_dest: ?[*]u8, opt_src: ?[*]const u8, len: usize) callconv(.c } fn memmoveFast(dest: ?[*]u8, src: ?[*]u8, len: usize) callconv(.c) ?[*]u8 { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); const small_limit = @max(2 * @sizeOf(Element), @sizeOf(Element)); if (copySmallLength(small_limit, dest.?, src.?, len)) return dest; @@ -79,7 +79,7 @@ inline fn copyLessThan16( src: [*]const u8, len: usize, ) void { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); if (len < 4) { if (len == 0) return; const b = len / 2; @@ -100,7 +100,7 @@ inline fn copy16ToSmallLimit( src: [*]const u8, len: usize, ) bool { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); inline for (2..(std.math.log2(small_limit) + 1) / 2 + 1) |p| { const limit = 1 << (2 * p); if (len < limit) { @@ -119,7 +119,7 @@ inline fn copyRange4( src: [*]const u8, len: usize, ) void { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); comptime assert(std.math.isPowerOfTwo(copy_len)); assert(len >= copy_len); assert(len < 4 * copy_len); @@ -147,7 +147,7 @@ inline fn copyForwards( src: [*]const u8, len: usize, ) void { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); assert(len >= 2 * @sizeOf(Element)); const head = src[0..@sizeOf(Element)].*; @@ -181,7 +181,7 @@ inline fn copyBlocks( src: anytype, max_bytes: usize, ) void { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); const T = @typeInfo(@TypeOf(dest)).pointer.child; comptime assert(T == @typeInfo(@TypeOf(src)).pointer.child); @@ -217,6 +217,8 @@ inline fn copyBackwards( } test memmoveFast { + if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; + const max_len = 1024; var buffer: [max_len + @alignOf(Element) - 1]u8 = undefined; for (&buffer, 0..) |*b, i| { diff --git a/lib/compiler_rt/mulf3.zig b/lib/compiler_rt/mulf3.zig index ad60ec41a5..34d39fb9b7 100644 --- a/lib/compiler_rt/mulf3.zig +++ b/lib/compiler_rt/mulf3.zig @@ -6,7 +6,7 @@ const common = @import("./common.zig"); /// Ported from: /// https://github.com/llvm/llvm-project/blob/2ffb1b0413efa9a24eb3c49e710e36f92e2cb50b/compiler-rt/lib/builtins/fp_mul_impl.inc pub inline fn mulf3(comptime T: type, a: T, b: T) T { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); const typeWidth = @typeInfo(T).float.bits; const significandBits = math.floatMantissaBits(T); const fractionalBits = math.floatFractionalBits(T); @@ -163,7 +163,7 @@ pub inline fn mulf3(comptime T: type, a: T, b: T) T { /// /// This is analogous to an shr version of `@shlWithOverflow` fn wideShrWithTruncation(comptime Z: type, hi: *Z, lo: *Z, count: u32) bool { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); const typeWidth = @typeInfo(Z).int.bits; var inexact = false; if (count < typeWidth) { diff --git a/lib/compiler_rt/rem_pio2_large.zig b/lib/compiler_rt/rem_pio2_large.zig index b107a0fabb..f15e0d71f6 100644 --- a/lib/compiler_rt/rem_pio2_large.zig +++ b/lib/compiler_rt/rem_pio2_large.zig @@ -251,7 +251,7 @@ const PIo2 = [_]f64{ /// compiler will convert from decimal to binary accurately enough /// to produce the hexadecimal values shown. /// -pub fn rem_pio2_large(x: []f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 { +pub fn rem_pio2_large(x: []const f64, y: []f64, e0: i32, nx: i32, prec: usize) i32 { var jz: i32 = undefined; var jx: i32 = undefined; var jv: i32 = undefined; diff --git a/lib/compiler_rt/stack_probe.zig b/lib/compiler_rt/stack_probe.zig index 94212b7a23..21259ec435 100644 --- a/lib/compiler_rt/stack_probe.zig +++ b/lib/compiler_rt/stack_probe.zig @@ -4,7 +4,6 @@ const common = @import("common.zig"); const os_tag = builtin.os.tag; const arch = builtin.cpu.arch; const abi = builtin.abi; -const is_test = builtin.is_test; pub const panic = common.panic; diff --git a/lib/compiler_rt/suboti4_test.zig b/lib/compiler_rt/suboti4_test.zig index 68ad0ff72f..65018bc966 100644 --- a/lib/compiler_rt/suboti4_test.zig +++ b/lib/compiler_rt/suboti4_test.zig @@ -1,4 +1,5 @@ const subo = @import("subo.zig"); +const builtin = @import("builtin"); const std = @import("std"); const testing = std.testing; const math = std.math; @@ -27,6 +28,8 @@ pub fn simple_suboti4(a: i128, b: i128, overflow: *c_int) i128 { } test "suboti3" { + if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; + const min: i128 = math.minInt(i128); const max: i128 = math.maxInt(i128); var i: i128 = 1; diff --git a/lib/compiler_rt/udivmod.zig b/lib/compiler_rt/udivmod.zig index a9705f317d..bf6aaadeae 100644 --- a/lib/compiler_rt/udivmod.zig +++ b/lib/compiler_rt/udivmod.zig @@ -1,8 +1,8 @@ const std = @import("std"); const builtin = @import("builtin"); -const is_test = builtin.is_test; const Log2Int = std.math.Log2Int; -const HalveInt = @import("common.zig").HalveInt; +const common = @import("common.zig"); +const HalveInt = common.HalveInt; const lo = switch (builtin.cpu.arch.endian()) { .big => 1, @@ -14,7 +14,7 @@ const hi = 1 - lo; // Returns U / v_ and sets r = U % v_. fn divwide_generic(comptime T: type, _u1: T, _u0: T, v_: T, r: *T) T { const HalfT = HalveInt(T, false).HalfT; - @setRuntimeSafety(is_test); + @setRuntimeSafety(common.test_safety); var v = v_; const b = @as(T, 1) << (@bitSizeOf(T) / 2); @@ -70,7 +70,7 @@ fn divwide_generic(comptime T: type, _u1: T, _u0: T, v_: T, r: *T) T { } fn divwide(comptime T: type, _u1: T, _u0: T, v: T, r: *T) T { - @setRuntimeSafety(is_test); + @setRuntimeSafety(common.test_safety); if (T == u64 and builtin.target.cpu.arch == .x86_64 and builtin.target.os.tag != .windows) { var rem: T = undefined; const quo = asm ( @@ -90,7 +90,7 @@ fn divwide(comptime T: type, _u1: T, _u0: T, v: T, r: *T) T { // Returns a_ / b_ and sets maybe_rem = a_ % b. pub fn udivmod(comptime T: type, a_: T, b_: T, maybe_rem: ?*T) T { - @setRuntimeSafety(is_test); + @setRuntimeSafety(common.test_safety); const HalfT = HalveInt(T, false).HalfT; const SignedT = std.meta.Int(.signed, @bitSizeOf(T)); diff --git a/lib/compiler_rt/udivmodei4.zig b/lib/compiler_rt/udivmodei4.zig index 6d6f6c1b65..0923f3f222 100644 --- a/lib/compiler_rt/udivmodei4.zig +++ b/lib/compiler_rt/udivmodei4.zig @@ -113,7 +113,7 @@ pub fn divmod(q: ?[]u32, r: ?[]u32, u: []const u32, v: []const u32) !void { } pub fn __udivei4(q_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) callconv(.c) void { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits)); const q: []u32 = @ptrCast(@alignCast(q_p[0..byte_size])); const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size])); @@ -122,7 +122,7 @@ pub fn __udivei4(q_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) ca } pub fn __umodei4(r_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) callconv(.c) void { - @setRuntimeSafety(builtin.is_test); + @setRuntimeSafety(common.test_safety); const byte_size = std.zig.target.intByteSize(&builtin.target, @intCast(bits)); const r: []u32 = @ptrCast(@alignCast(r_p[0..byte_size])); const u: []const u32 = @ptrCast(@alignCast(u_p[0..byte_size])); @@ -131,6 +131,7 @@ pub fn __umodei4(r_p: [*]u8, u_p: [*]const u8, v_p: [*]const u8, bits: usize) ca } test "__udivei4/__umodei4" { + if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; diff --git a/lib/docs/wasm/main.zig b/lib/docs/wasm/main.zig index 7e9ffa5e4c..d3043cd917 100644 --- a/lib/docs/wasm/main.zig +++ b/lib/docs/wasm/main.zig @@ -772,10 +772,10 @@ export fn decl_type_html(decl_index: Decl.Index) String { const Oom = error{OutOfMemory}; fn unpackInner(tar_bytes: []u8) !void { - var fbs = std.io.fixedBufferStream(tar_bytes); + var reader: std.Io.Reader = .fixed(tar_bytes); var file_name_buffer: [1024]u8 = undefined; var link_name_buffer: [1024]u8 = undefined; - var it = std.tar.iterator(fbs.reader(), .{ + var it: std.tar.Iterator = .init(&reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, }); @@ -796,7 +796,7 @@ fn unpackInner(tar_bytes: []u8) !void { { gop.value_ptr.* = file; } - const file_bytes = tar_bytes[fbs.pos..][0..@intCast(tar_file.size)]; + const file_bytes = tar_bytes[reader.seek..][0..@intCast(tar_file.size)]; assert(file == try Walk.add_file(file_name, file_bytes)); } } else { diff --git a/lib/init/build.zig b/lib/init/build.zig index 8a1c03819b..481b586a44 100644 --- a/lib/init/build.zig +++ b/lib/init/build.zig @@ -1,4 +1,3 @@ -//! Use `zig init --strip` next time to generate a project without comments. const std = @import("std"); // Although this function looks imperative, it does not perform the build diff --git a/lib/std/Build.zig b/lib/std/Build.zig index e65a71e12b..d6b0e68f5d 100644 --- a/lib/std/Build.zig +++ b/lib/std/Build.zig @@ -408,104 +408,179 @@ fn createChildOnly( return child; } -fn userInputOptionsFromArgs(allocator: Allocator, args: anytype) UserInputOptionsMap { - var user_input_options = UserInputOptionsMap.init(allocator); +fn userInputOptionsFromArgs(arena: Allocator, args: anytype) UserInputOptionsMap { + var map = UserInputOptionsMap.init(arena); inline for (@typeInfo(@TypeOf(args)).@"struct".fields) |field| { - const v = @field(args, field.name); - const T = @TypeOf(v); - switch (T) { - Target.Query => { - user_input_options.put(field.name, .{ - .name = field.name, - .value = .{ .scalar = v.zigTriple(allocator) catch @panic("OOM") }, - .used = false, - }) catch @panic("OOM"); - user_input_options.put("cpu", .{ - .name = "cpu", - .value = .{ .scalar = v.serializeCpuAlloc(allocator) catch @panic("OOM") }, - .used = false, - }) catch @panic("OOM"); - }, - ResolvedTarget => { - user_input_options.put(field.name, .{ - .name = field.name, - .value = .{ .scalar = v.query.zigTriple(allocator) catch @panic("OOM") }, - .used = false, - }) catch @panic("OOM"); - user_input_options.put("cpu", .{ - .name = "cpu", - .value = .{ .scalar = v.query.serializeCpuAlloc(allocator) catch @panic("OOM") }, - .used = false, - }) catch @panic("OOM"); - }, - LazyPath => { - user_input_options.put(field.name, .{ + if (field.type == @Type(.null)) continue; + addUserInputOptionFromArg(arena, &map, field, field.type, @field(args, field.name)); + } + return map; +} + +fn addUserInputOptionFromArg( + arena: Allocator, + map: *UserInputOptionsMap, + field: std.builtin.Type.StructField, + comptime T: type, + /// If null, the value won't be added, but `T` will still be type-checked. + maybe_value: ?T, +) void { + switch (T) { + Target.Query => return if (maybe_value) |v| { + map.put(field.name, .{ + .name = field.name, + .value = .{ .scalar = v.zigTriple(arena) catch @panic("OOM") }, + .used = false, + }) catch @panic("OOM"); + map.put("cpu", .{ + .name = "cpu", + .value = .{ .scalar = v.serializeCpuAlloc(arena) catch @panic("OOM") }, + .used = false, + }) catch @panic("OOM"); + }, + ResolvedTarget => return if (maybe_value) |v| { + map.put(field.name, .{ + .name = field.name, + .value = .{ .scalar = v.query.zigTriple(arena) catch @panic("OOM") }, + .used = false, + }) catch @panic("OOM"); + map.put("cpu", .{ + .name = "cpu", + .value = .{ .scalar = v.query.serializeCpuAlloc(arena) catch @panic("OOM") }, + .used = false, + }) catch @panic("OOM"); + }, + std.zig.BuildId => return if (maybe_value) |v| { + map.put(field.name, .{ + .name = field.name, + .value = .{ .scalar = std.fmt.allocPrint(arena, "{f}", .{v}) catch @panic("OOM") }, + .used = false, + }) catch @panic("OOM"); + }, + LazyPath => return if (maybe_value) |v| { + map.put(field.name, .{ + .name = field.name, + .value = .{ .lazy_path = v.dupeInner(arena) }, + .used = false, + }) catch @panic("OOM"); + }, + []const LazyPath => return if (maybe_value) |v| { + var list = ArrayList(LazyPath).initCapacity(arena, v.len) catch @panic("OOM"); + for (v) |lp| list.appendAssumeCapacity(lp.dupeInner(arena)); + map.put(field.name, .{ + .name = field.name, + .value = .{ .lazy_path_list = list }, + .used = false, + }) catch @panic("OOM"); + }, + []const u8 => return if (maybe_value) |v| { + map.put(field.name, .{ + .name = field.name, + .value = .{ .scalar = arena.dupe(u8, v) catch @panic("OOM") }, + .used = false, + }) catch @panic("OOM"); + }, + []const []const u8 => return if (maybe_value) |v| { + var list = ArrayList([]const u8).initCapacity(arena, v.len) catch @panic("OOM"); + for (v) |s| list.appendAssumeCapacity(arena.dupe(u8, s) catch @panic("OOM")); + map.put(field.name, .{ + .name = field.name, + .value = .{ .list = list }, + .used = false, + }) catch @panic("OOM"); + }, + else => switch (@typeInfo(T)) { + .bool => return if (maybe_value) |v| { + map.put(field.name, .{ .name = field.name, - .value = .{ .lazy_path = v.dupeInner(allocator) }, + .value = .{ .scalar = if (v) "true" else "false" }, .used = false, }) catch @panic("OOM"); }, - []const LazyPath => { - var list = ArrayList(LazyPath).initCapacity(allocator, v.len) catch @panic("OOM"); - for (v) |lp| list.appendAssumeCapacity(lp.dupeInner(allocator)); - user_input_options.put(field.name, .{ + .@"enum", .enum_literal => return if (maybe_value) |v| { + map.put(field.name, .{ .name = field.name, - .value = .{ .lazy_path_list = list }, + .value = .{ .scalar = @tagName(v) }, .used = false, }) catch @panic("OOM"); }, - []const u8 => { - user_input_options.put(field.name, .{ + .comptime_int, .int => return if (maybe_value) |v| { + map.put(field.name, .{ .name = field.name, - .value = .{ .scalar = v }, + .value = .{ .scalar = std.fmt.allocPrint(arena, "{d}", .{v}) catch @panic("OOM") }, .used = false, }) catch @panic("OOM"); }, - []const []const u8 => { - var list = ArrayList([]const u8).initCapacity(allocator, v.len) catch @panic("OOM"); - list.appendSliceAssumeCapacity(v); - - user_input_options.put(field.name, .{ + .comptime_float, .float => return if (maybe_value) |v| { + map.put(field.name, .{ .name = field.name, - .value = .{ .list = list }, + .value = .{ .scalar = std.fmt.allocPrint(arena, "{x}", .{v}) catch @panic("OOM") }, .used = false, }) catch @panic("OOM"); }, - else => switch (@typeInfo(T)) { - .bool => { - user_input_options.put(field.name, .{ - .name = field.name, - .value = .{ .scalar = if (v) "true" else "false" }, - .used = false, - }) catch @panic("OOM"); - }, - .@"enum", .enum_literal => { - user_input_options.put(field.name, .{ - .name = field.name, - .value = .{ .scalar = @tagName(v) }, - .used = false, - }) catch @panic("OOM"); + .pointer => |ptr_info| switch (ptr_info.size) { + .one => switch (@typeInfo(ptr_info.child)) { + .array => |array_info| { + comptime var slice_info = ptr_info; + slice_info.size = .slice; + slice_info.is_const = true; + slice_info.child = array_info.child; + slice_info.sentinel_ptr = null; + addUserInputOptionFromArg( + arena, + map, + field, + @Type(.{ .pointer = slice_info }), + maybe_value orelse null, + ); + return; + }, + else => {}, }, - .comptime_int, .int => { - user_input_options.put(field.name, .{ - .name = field.name, - .value = .{ .scalar = std.fmt.allocPrint(allocator, "{d}", .{v}) catch @panic("OOM") }, - .used = false, - }) catch @panic("OOM"); + .slice => switch (@typeInfo(ptr_info.child)) { + .@"enum" => return if (maybe_value) |v| { + var list = ArrayList([]const u8).initCapacity(arena, v.len) catch @panic("OOM"); + for (v) |tag| list.appendAssumeCapacity(@tagName(tag)); + map.put(field.name, .{ + .name = field.name, + .value = .{ .list = list }, + .used = false, + }) catch @panic("OOM"); + }, + else => { + comptime var slice_info = ptr_info; + slice_info.is_const = true; + slice_info.sentinel_ptr = null; + addUserInputOptionFromArg( + arena, + map, + field, + @Type(.{ .pointer = slice_info }), + maybe_value orelse null, + ); + return; + }, }, - .comptime_float, .float => { - user_input_options.put(field.name, .{ - .name = field.name, - .value = .{ .scalar = std.fmt.allocPrint(allocator, "{e}", .{v}) catch @panic("OOM") }, - .used = false, - }) catch @panic("OOM"); + else => {}, + }, + .null => unreachable, + .optional => |info| switch (@typeInfo(info.child)) { + .optional => {}, + else => { + addUserInputOptionFromArg( + arena, + map, + field, + info.child, + maybe_value orelse null, + ); + return; }, - else => @compileError("option '" ++ field.name ++ "' has unsupported type: " ++ @typeName(T)), }, - } + else => {}, + }, } - - return user_input_options; + @compileError("option '" ++ field.name ++ "' has unsupported type: " ++ @typeName(field.type)); } const OrderedUserValue = union(enum) { diff --git a/lib/std/Build/Fuzz/WebServer.zig b/lib/std/Build/Fuzz/WebServer.zig index b28a6e185c..18582a60ef 100644 --- a/lib/std/Build/Fuzz/WebServer.zig +++ b/lib/std/Build/Fuzz/WebServer.zig @@ -273,21 +273,17 @@ fn buildWasmBinary( try sendMessage(child.stdin.?, .update); try sendMessage(child.stdin.?, .exit); - const Header = std.zig.Server.Message.Header; var result: ?Path = null; var result_error_bundle = std.zig.ErrorBundle.empty; - const stdout = poller.fifo(.stdout); + const stdout = poller.reader(.stdout); poll: while (true) { - while (stdout.readableLength() < @sizeOf(Header)) { - if (!(try poller.poll())) break :poll; - } - const header = stdout.reader().readStruct(Header) catch unreachable; - while (stdout.readableLength() < header.bytes_len) { - if (!(try poller.poll())) break :poll; - } - const body = stdout.readableSliceOfLen(header.bytes_len); + const Header = std.zig.Server.Message.Header; + while (stdout.buffered().len < @sizeOf(Header)) if (!try poller.poll()) break :poll; + const header = stdout.takeStruct(Header, .little) catch unreachable; + while (stdout.buffered().len < header.bytes_len) if (!try poller.poll()) break :poll; + const body = stdout.take(header.bytes_len) catch unreachable; switch (header.tag) { .zig_version => { @@ -325,15 +321,11 @@ fn buildWasmBinary( }, else => {}, // ignore other messages } - - stdout.discard(body.len); } - const stderr = poller.fifo(.stderr); - if (stderr.readableLength() > 0) { - const owned_stderr = try stderr.toOwnedSlice(); - defer gpa.free(owned_stderr); - std.debug.print("{s}", .{owned_stderr}); + const stderr_contents = try poller.toOwnedSlice(.stderr); + if (stderr_contents.len > 0) { + std.debug.print("{s}", .{stderr_contents}); } // Send EOF to stdin. @@ -522,7 +514,9 @@ fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void { var cwd_cache: ?[]const u8 = null; - var archiver = std.tar.writer(response.writer()); + var adapter = response.writer().adaptToNewApi(); + var archiver: std.tar.Writer = .{ .underlying_writer = &adapter.new_interface }; + var read_buffer: [1024]u8 = undefined; for (deduped_paths) |joined_path| { var file = joined_path.root_dir.handle.openFile(joined_path.sub_path, .{}) catch |err| { @@ -530,13 +524,14 @@ fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void { continue; }; defer file.close(); - + const stat = try file.stat(); + var file_reader: std.fs.File.Reader = .initSize(file, &read_buffer, stat.size); archiver.prefix = joined_path.root_dir.path orelse try memoizedCwd(arena, &cwd_cache); - try archiver.writeFile(joined_path.sub_path, file); + try archiver.writeFile(joined_path.sub_path, &file_reader, stat.mtime); } - // intentionally omitting the pointless trailer - //try archiver.finish(); + // intentionally not calling `archiver.finishPedantically` + try adapter.new_interface.flush(); try response.end(); } diff --git a/lib/std/Build/Step.zig b/lib/std/Build/Step.zig index 5192249f12..8583427aad 100644 --- a/lib/std/Build/Step.zig +++ b/lib/std/Build/Step.zig @@ -286,7 +286,7 @@ pub fn cast(step: *Step, comptime T: type) ?*T { } /// For debugging purposes, prints identifying information about this Step. -pub fn dump(step: *Step, w: *std.io.Writer, tty_config: std.io.tty.Config) void { +pub fn dump(step: *Step, w: *std.Io.Writer, tty_config: std.Io.tty.Config) void { const debug_info = std.debug.getSelfDebugInfo() catch |err| { w.print("Unable to dump stack trace: Unable to open debug info: {s}\n", .{ @errorName(err), @@ -359,7 +359,7 @@ pub fn addError(step: *Step, comptime fmt: []const u8, args: anytype) error{OutO pub const ZigProcess = struct { child: std.process.Child, - poller: std.io.Poller(StreamEnum), + poller: std.Io.Poller(StreamEnum), progress_ipc_fd: if (std.Progress.have_ipc) ?std.posix.fd_t else void, pub const StreamEnum = enum { stdout, stderr }; @@ -428,7 +428,7 @@ pub fn evalZigProcess( const zp = try gpa.create(ZigProcess); zp.* = .{ .child = child, - .poller = std.io.poll(gpa, ZigProcess.StreamEnum, .{ + .poller = std.Io.poll(gpa, ZigProcess.StreamEnum, .{ .stdout = child.stdout.?, .stderr = child.stderr.?, }), @@ -508,20 +508,16 @@ fn zigProcessUpdate(s: *Step, zp: *ZigProcess, watch: bool) !?Path { try sendMessage(zp.child.stdin.?, .update); if (!watch) try sendMessage(zp.child.stdin.?, .exit); - const Header = std.zig.Server.Message.Header; var result: ?Path = null; - const stdout = zp.poller.fifo(.stdout); + const stdout = zp.poller.reader(.stdout); poll: while (true) { - while (stdout.readableLength() < @sizeOf(Header)) { - if (!(try zp.poller.poll())) break :poll; - } - const header = stdout.reader().readStruct(Header) catch unreachable; - while (stdout.readableLength() < header.bytes_len) { - if (!(try zp.poller.poll())) break :poll; - } - const body = stdout.readableSliceOfLen(header.bytes_len); + const Header = std.zig.Server.Message.Header; + while (stdout.buffered().len < @sizeOf(Header)) if (!try zp.poller.poll()) break :poll; + const header = stdout.takeStruct(Header, .little) catch unreachable; + while (stdout.buffered().len < header.bytes_len) if (!try zp.poller.poll()) break :poll; + const body = stdout.take(header.bytes_len) catch unreachable; switch (header.tag) { .zig_version => { @@ -547,11 +543,8 @@ fn zigProcessUpdate(s: *Step, zp: *ZigProcess, watch: bool) !?Path { .string_bytes = try arena.dupe(u8, string_bytes), .extra = extra_array, }; - if (watch) { - // This message indicates the end of the update. - stdout.discard(body.len); - break; - } + // This message indicates the end of the update. + if (watch) break :poll; }, .emit_digest => { const EmitDigest = std.zig.Server.Message.EmitDigest; @@ -611,15 +604,13 @@ fn zigProcessUpdate(s: *Step, zp: *ZigProcess, watch: bool) !?Path { }, else => {}, // ignore other messages } - - stdout.discard(body.len); } s.result_duration_ns = timer.read(); - const stderr = zp.poller.fifo(.stderr); - if (stderr.readableLength() > 0) { - try s.result_error_msgs.append(arena, try stderr.toOwnedSlice()); + const stderr_contents = try zp.poller.toOwnedSlice(.stderr); + if (stderr_contents.len > 0) { + try s.result_error_msgs.append(arena, try arena.dupe(u8, stderr_contents)); } return result; @@ -736,7 +727,7 @@ pub fn allocPrintCmd2( argv: []const []const u8, ) Allocator.Error![]u8 { const shell = struct { - fn escape(writer: anytype, string: []const u8, is_argv0: bool) !void { + fn escape(writer: *std.Io.Writer, string: []const u8, is_argv0: bool) !void { for (string) |c| { if (switch (c) { else => true, @@ -770,9 +761,9 @@ pub fn allocPrintCmd2( } }; - var buf: std.ArrayListUnmanaged(u8) = .empty; - const writer = buf.writer(arena); - if (opt_cwd) |cwd| try writer.print("cd {s} && ", .{cwd}); + var aw: std.Io.Writer.Allocating = .init(arena); + const writer = &aw.writer; + if (opt_cwd) |cwd| writer.print("cd {s} && ", .{cwd}) catch return error.OutOfMemory; if (opt_env) |env| { const process_env_map = std.process.getEnvMap(arena) catch std.process.EnvMap.init(arena); var it = env.iterator(); @@ -782,17 +773,17 @@ pub fn allocPrintCmd2( if (process_env_map.get(key)) |process_value| { if (std.mem.eql(u8, value, process_value)) continue; } - try writer.print("{s}=", .{key}); - try shell.escape(writer, value, false); - try writer.writeByte(' '); + writer.print("{s}=", .{key}) catch return error.OutOfMemory; + shell.escape(writer, value, false) catch return error.OutOfMemory; + writer.writeByte(' ') catch return error.OutOfMemory; } } - try shell.escape(writer, argv[0], true); + shell.escape(writer, argv[0], true) catch return error.OutOfMemory; for (argv[1..]) |arg| { - try writer.writeByte(' '); - try shell.escape(writer, arg, false); + writer.writeByte(' ') catch return error.OutOfMemory; + shell.escape(writer, arg, false) catch return error.OutOfMemory; } - return buf.toOwnedSlice(arena); + return aw.toOwnedSlice(); } /// Prefer `cacheHitAndWatch` unless you already added watch inputs diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig index 356ea4e34e..141d18a7bf 100644 --- a/lib/std/Build/Step/Compile.zig +++ b/lib/std/Build/Step/Compile.zig @@ -681,10 +681,14 @@ pub fn producesImplib(compile: *Compile) bool { return compile.isDll(); } +/// Deprecated; use `compile.root_module.link_libc = true` instead. +/// To be removed after 0.15.0 is tagged. pub fn linkLibC(compile: *Compile) void { compile.root_module.link_libc = true; } +/// Deprecated; use `compile.root_module.link_libcpp = true` instead. +/// To be removed after 0.15.0 is tagged. pub fn linkLibCpp(compile: *Compile) void { compile.root_module.link_libcpp = true; } @@ -802,10 +806,14 @@ fn runPkgConfig(compile: *Compile, lib_name: []const u8) !PkgConfigResult { }; } +/// Deprecated; use `compile.root_module.linkSystemLibrary(name, .{})` instead. +/// To be removed after 0.15.0 is tagged. pub fn linkSystemLibrary(compile: *Compile, name: []const u8) void { return compile.root_module.linkSystemLibrary(name, .{}); } +/// Deprecated; use `compile.root_module.linkSystemLibrary(name, options)` instead. +/// To be removed after 0.15.0 is tagged. pub fn linkSystemLibrary2( compile: *Compile, name: []const u8, @@ -814,22 +822,26 @@ pub fn linkSystemLibrary2( return compile.root_module.linkSystemLibrary(name, options); } +/// Deprecated; use `c.root_module.linkFramework(name, .{})` instead. +/// To be removed after 0.15.0 is tagged. pub fn linkFramework(c: *Compile, name: []const u8) void { c.root_module.linkFramework(name, .{}); } -/// Handy when you have many C/C++ source files and want them all to have the same flags. +/// Deprecated; use `compile.root_module.addCSourceFiles(options)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addCSourceFiles(compile: *Compile, options: Module.AddCSourceFilesOptions) void { compile.root_module.addCSourceFiles(options); } +/// Deprecated; use `compile.root_module.addCSourceFile(source)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addCSourceFile(compile: *Compile, source: Module.CSourceFile) void { compile.root_module.addCSourceFile(source); } -/// Resource files must have the extension `.rc`. -/// Can be called regardless of target. The .rc file will be ignored -/// if the target object format does not support embedded resources. +/// Deprecated; use `compile.root_module.addWin32ResourceFile(source)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addWin32ResourceFile(compile: *Compile, source: Module.RcSourceFile) void { compile.root_module.addWin32ResourceFile(source); } @@ -915,54 +927,80 @@ pub fn getEmittedLlvmBc(compile: *Compile) LazyPath { return compile.getEmittedFileGeneric(&compile.generated_llvm_bc); } +/// Deprecated; use `compile.root_module.addAssemblyFile(source)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addAssemblyFile(compile: *Compile, source: LazyPath) void { compile.root_module.addAssemblyFile(source); } +/// Deprecated; use `compile.root_module.addObjectFile(source)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addObjectFile(compile: *Compile, source: LazyPath) void { compile.root_module.addObjectFile(source); } +/// Deprecated; use `compile.root_module.addObject(object)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addObject(compile: *Compile, object: *Compile) void { compile.root_module.addObject(object); } +/// Deprecated; use `compile.root_module.linkLibrary(library)` instead. +/// To be removed after 0.15.0 is tagged. pub fn linkLibrary(compile: *Compile, library: *Compile) void { compile.root_module.linkLibrary(library); } +/// Deprecated; use `compile.root_module.addAfterIncludePath(lazy_path)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addAfterIncludePath(compile: *Compile, lazy_path: LazyPath) void { compile.root_module.addAfterIncludePath(lazy_path); } +/// Deprecated; use `compile.root_module.addSystemIncludePath(lazy_path)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addSystemIncludePath(compile: *Compile, lazy_path: LazyPath) void { compile.root_module.addSystemIncludePath(lazy_path); } +/// Deprecated; use `compile.root_module.addIncludePath(lazy_path)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addIncludePath(compile: *Compile, lazy_path: LazyPath) void { compile.root_module.addIncludePath(lazy_path); } +/// Deprecated; use `compile.root_module.addConfigHeader(config_header)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addConfigHeader(compile: *Compile, config_header: *Step.ConfigHeader) void { compile.root_module.addConfigHeader(config_header); } +/// Deprecated; use `compile.root_module.addEmbedPath(lazy_path)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addEmbedPath(compile: *Compile, lazy_path: LazyPath) void { compile.root_module.addEmbedPath(lazy_path); } +/// Deprecated; use `compile.root_module.addLibraryPath(directory_path)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addLibraryPath(compile: *Compile, directory_path: LazyPath) void { compile.root_module.addLibraryPath(directory_path); } +/// Deprecated; use `compile.root_module.addRPath(directory_path)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addRPath(compile: *Compile, directory_path: LazyPath) void { compile.root_module.addRPath(directory_path); } +/// Deprecated; use `compile.root_module.addSystemFrameworkPath(directory_path)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addSystemFrameworkPath(compile: *Compile, directory_path: LazyPath) void { compile.root_module.addSystemFrameworkPath(directory_path); } +/// Deprecated; use `compile.root_module.addFrameworkPath(directory_path)` instead. +/// To be removed after 0.15.0 is tagged. pub fn addFrameworkPath(compile: *Compile, directory_path: LazyPath) void { compile.root_module.addFrameworkPath(directory_path); } diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index e35b602e06..57f5d73f0c 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -73,9 +73,12 @@ skip_foreign_checks: bool, /// external executor (such as qemu) but not fail if the executor is unavailable. failing_to_execute_foreign_is_an_error: bool, +/// Deprecated in favor of `stdio_limit`. +max_stdio_size: usize, + /// If stderr or stdout exceeds this amount, the child process is killed and /// the step fails. -max_stdio_size: usize, +stdio_limit: std.Io.Limit, captured_stdout: ?*Output, captured_stderr: ?*Output, @@ -169,7 +172,7 @@ pub const Output = struct { pub fn create(owner: *std.Build, name: []const u8) *Run { const run = owner.allocator.create(Run) catch @panic("OOM"); run.* = .{ - .step = Step.init(.{ + .step = .init(.{ .id = base_id, .name = name, .owner = owner, @@ -186,6 +189,7 @@ pub fn create(owner: *std.Build, name: []const u8) *Run { .skip_foreign_checks = false, .failing_to_execute_foreign_is_an_error = true, .max_stdio_size = 10 * 1024 * 1024, + .stdio_limit = .unlimited, .captured_stdout = null, .captured_stderr = null, .dep_output_file = null, @@ -1011,7 +1015,7 @@ fn populateGeneratedPaths( } } -fn formatTerm(term: ?std.process.Child.Term, w: *std.io.Writer) std.io.Writer.Error!void { +fn formatTerm(term: ?std.process.Child.Term, w: *std.Io.Writer) std.Io.Writer.Error!void { if (term) |t| switch (t) { .Exited => |code| try w.print("exited with code {d}", .{code}), .Signal => |sig| try w.print("terminated with signal {d}", .{sig}), @@ -1500,7 +1504,7 @@ fn evalZigTest( const gpa = run.step.owner.allocator; const arena = run.step.owner.allocator; - var poller = std.io.poll(gpa, enum { stdout, stderr }, .{ + var poller = std.Io.poll(gpa, enum { stdout, stderr }, .{ .stdout = child.stdout.?, .stderr = child.stderr.?, }); @@ -1524,11 +1528,6 @@ fn evalZigTest( break :failed false; }; - const Header = std.zig.Server.Message.Header; - - const stdout = poller.fifo(.stdout); - const stderr = poller.fifo(.stderr); - var fail_count: u32 = 0; var skip_count: u32 = 0; var leak_count: u32 = 0; @@ -1541,16 +1540,14 @@ fn evalZigTest( var sub_prog_node: ?std.Progress.Node = null; defer if (sub_prog_node) |n| n.end(); + const stdout = poller.reader(.stdout); + const stderr = poller.reader(.stderr); const any_write_failed = first_write_failed or poll: while (true) { - while (stdout.readableLength() < @sizeOf(Header)) { - if (!(try poller.poll())) break :poll false; - } - const header = stdout.reader().readStruct(Header) catch unreachable; - while (stdout.readableLength() < header.bytes_len) { - if (!(try poller.poll())) break :poll false; - } - const body = stdout.readableSliceOfLen(header.bytes_len); - + const Header = std.zig.Server.Message.Header; + while (stdout.buffered().len < @sizeOf(Header)) if (!try poller.poll()) break :poll false; + const header = stdout.takeStruct(Header, .little) catch unreachable; + while (stdout.buffered().len < header.bytes_len) if (!try poller.poll()) break :poll false; + const body = stdout.take(header.bytes_len) catch unreachable; switch (header.tag) { .zig_version => { if (!std.mem.eql(u8, builtin.zig_version_string, body)) { @@ -1607,9 +1604,9 @@ fn evalZigTest( if (tr_hdr.flags.fail or tr_hdr.flags.leak or tr_hdr.flags.log_err_count > 0) { const name = std.mem.sliceTo(md.string_bytes[md.names[tr_hdr.index]..], 0); - const orig_msg = stderr.readableSlice(0); - defer stderr.discard(orig_msg.len); - const msg = std.mem.trim(u8, orig_msg, "\n"); + const stderr_contents = stderr.buffered(); + stderr.toss(stderr_contents.len); + const msg = std.mem.trim(u8, stderr_contents, "\n"); const label = if (tr_hdr.flags.fail) "failed" else if (tr_hdr.flags.leak) @@ -1660,8 +1657,6 @@ fn evalZigTest( }, else => {}, // ignore other messages } - - stdout.discard(body.len); }; if (any_write_failed) { @@ -1670,9 +1665,9 @@ fn evalZigTest( while (try poller.poll()) {} } - if (stderr.readableLength() > 0) { - const msg = std.mem.trim(u8, try stderr.toOwnedSlice(), "\n"); - if (msg.len > 0) run.step.result_stderr = msg; + const stderr_contents = std.mem.trim(u8, stderr.buffered(), "\n"); + if (stderr_contents.len > 0) { + run.step.result_stderr = try arena.dupe(u8, stderr_contents); } // Send EOF to stdin. @@ -1769,13 +1764,22 @@ fn evalGeneric(run: *Run, child: *std.process.Child) !StdIoResult { child.stdin = null; }, .lazy_path => |lazy_path| { - const path = lazy_path.getPath2(b, &run.step); - const file = b.build_root.handle.openFile(path, .{}) catch |err| { + const path = lazy_path.getPath3(b, &run.step); + const file = path.root_dir.handle.openFile(path.subPathOrDot(), .{}) catch |err| { return run.step.fail("unable to open stdin file: {s}", .{@errorName(err)}); }; defer file.close(); - child.stdin.?.writeFileAll(file, .{}) catch |err| { - return run.step.fail("unable to write file to stdin: {s}", .{@errorName(err)}); + // TODO https://github.com/ziglang/zig/issues/23955 + var buffer: [1024]u8 = undefined; + var file_reader = file.reader(&buffer); + var stdin_writer = child.stdin.?.writer(&.{}); + _ = stdin_writer.interface.sendFileAll(&file_reader, .unlimited) catch |err| switch (err) { + error.ReadFailed => return run.step.fail("failed to read from {f}: {t}", .{ + path, file_reader.err.?, + }), + error.WriteFailed => return run.step.fail("failed to write to stdin: {t}", .{ + stdin_writer.err.?, + }), }; child.stdin.?.close(); child.stdin = null; @@ -1786,28 +1790,43 @@ fn evalGeneric(run: *Run, child: *std.process.Child) !StdIoResult { var stdout_bytes: ?[]const u8 = null; var stderr_bytes: ?[]const u8 = null; + run.stdio_limit = run.stdio_limit.min(.limited(run.max_stdio_size)); if (child.stdout) |stdout| { if (child.stderr) |stderr| { - var poller = std.io.poll(arena, enum { stdout, stderr }, .{ + var poller = std.Io.poll(arena, enum { stdout, stderr }, .{ .stdout = stdout, .stderr = stderr, }); defer poller.deinit(); while (try poller.poll()) { - if (poller.fifo(.stdout).count > run.max_stdio_size) - return error.StdoutStreamTooLong; - if (poller.fifo(.stderr).count > run.max_stdio_size) - return error.StderrStreamTooLong; + if (run.stdio_limit.toInt()) |limit| { + if (poller.reader(.stderr).buffered().len > limit) + return error.StdoutStreamTooLong; + if (poller.reader(.stderr).buffered().len > limit) + return error.StderrStreamTooLong; + } } - stdout_bytes = try poller.fifo(.stdout).toOwnedSlice(); - stderr_bytes = try poller.fifo(.stderr).toOwnedSlice(); + stdout_bytes = try poller.toOwnedSlice(.stdout); + stderr_bytes = try poller.toOwnedSlice(.stderr); } else { - stdout_bytes = try stdout.deprecatedReader().readAllAlloc(arena, run.max_stdio_size); + var small_buffer: [1]u8 = undefined; + var stdout_reader = stdout.readerStreaming(&small_buffer); + stdout_bytes = stdout_reader.interface.allocRemaining(arena, run.stdio_limit) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ReadFailed => return stdout_reader.err.?, + error.StreamTooLong => return error.StdoutStreamTooLong, + }; } } else if (child.stderr) |stderr| { - stderr_bytes = try stderr.deprecatedReader().readAllAlloc(arena, run.max_stdio_size); + var small_buffer: [1]u8 = undefined; + var stderr_reader = stderr.readerStreaming(&small_buffer); + stderr_bytes = stderr_reader.interface.allocRemaining(arena, run.stdio_limit) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ReadFailed => return stderr_reader.err.?, + error.StreamTooLong => return error.StderrStreamTooLong, + }; } if (stderr_bytes) |bytes| if (bytes.len > 0) { diff --git a/lib/std/Io.zig b/lib/std/Io.zig index ff6966d7f7..1511f0dcad 100644 --- a/lib/std/Io.zig +++ b/lib/std/Io.zig @@ -1,16 +1,11 @@ -const std = @import("std.zig"); const builtin = @import("builtin"); -const root = @import("root"); -const c = std.c; const is_windows = builtin.os.tag == .windows; + +const std = @import("std.zig"); const windows = std.os.windows; const posix = std.posix; const math = std.math; const assert = std.debug.assert; -const fs = std.fs; -const mem = std.mem; -const meta = std.meta; -const File = std.fs.File; const Allocator = std.mem.Allocator; const Alignment = std.mem.Alignment; @@ -314,11 +309,11 @@ pub fn GenericReader( } /// Helper for bridging to the new `Reader` API while upgrading. - pub fn adaptToNewApi(self: *const Self) Adapter { + pub fn adaptToNewApi(self: *const Self, buffer: []u8) Adapter { return .{ .derp_reader = self.*, .new_interface = .{ - .buffer = &.{}, + .buffer = buffer, .vtable = &.{ .stream = Adapter.stream }, .seek = 0, .end = 0, @@ -334,10 +329,12 @@ pub fn GenericReader( fn stream(r: *Reader, w: *Writer, limit: Limit) Reader.StreamError!usize { const a: *@This() = @alignCast(@fieldParentPtr("new_interface", r)); const buf = limit.slice(try w.writableSliceGreedy(1)); - return a.derp_reader.read(buf) catch |err| { + const n = a.derp_reader.read(buf) catch |err| { a.err = err; return error.ReadFailed; }; + w.advance(n); + return n; } }; }; @@ -419,9 +416,14 @@ pub fn GenericWriter( new_interface: Writer, err: ?Error = null, - fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize { + fn drain(w: *std.io.Writer, data: []const []const u8, splat: usize) std.io.Writer.Error!usize { _ = splat; const a: *@This() = @alignCast(@fieldParentPtr("new_interface", w)); + const buffered = w.buffered(); + if (buffered.len != 0) return w.consume(a.derp_writer.write(buffered) catch |err| { + a.err = err; + return error.WriteFailed; + }); return a.derp_writer.write(data[0]) catch |err| { a.err = err; return error.WriteFailed; @@ -435,54 +437,46 @@ pub fn GenericWriter( pub const AnyReader = @import("Io/DeprecatedReader.zig"); /// Deprecated in favor of `Writer`. pub const AnyWriter = @import("Io/DeprecatedWriter.zig"); - +/// Deprecated in favor of `File.Reader` and `File.Writer`. pub const SeekableStream = @import("Io/seekable_stream.zig").SeekableStream; - +/// Deprecated in favor of `Writer`. pub const BufferedWriter = @import("Io/buffered_writer.zig").BufferedWriter; +/// Deprecated in favor of `Writer`. pub const bufferedWriter = @import("Io/buffered_writer.zig").bufferedWriter; - +/// Deprecated in favor of `Reader`. pub const BufferedReader = @import("Io/buffered_reader.zig").BufferedReader; +/// Deprecated in favor of `Reader`. pub const bufferedReader = @import("Io/buffered_reader.zig").bufferedReader; +/// Deprecated in favor of `Reader`. pub const bufferedReaderSize = @import("Io/buffered_reader.zig").bufferedReaderSize; - +/// Deprecated in favor of `Reader`. pub const FixedBufferStream = @import("Io/fixed_buffer_stream.zig").FixedBufferStream; +/// Deprecated in favor of `Reader`. pub const fixedBufferStream = @import("Io/fixed_buffer_stream.zig").fixedBufferStream; - -pub const CWriter = @import("Io/c_writer.zig").CWriter; -pub const cWriter = @import("Io/c_writer.zig").cWriter; - +/// Deprecated in favor of `Reader.Limited`. pub const LimitedReader = @import("Io/limited_reader.zig").LimitedReader; +/// Deprecated in favor of `Reader.Limited`. pub const limitedReader = @import("Io/limited_reader.zig").limitedReader; - +/// Deprecated with no replacement; inefficient pattern pub const CountingWriter = @import("Io/counting_writer.zig").CountingWriter; +/// Deprecated with no replacement; inefficient pattern pub const countingWriter = @import("Io/counting_writer.zig").countingWriter; +/// Deprecated with no replacement; inefficient pattern pub const CountingReader = @import("Io/counting_reader.zig").CountingReader; +/// Deprecated with no replacement; inefficient pattern pub const countingReader = @import("Io/counting_reader.zig").countingReader; -pub const MultiWriter = @import("Io/multi_writer.zig").MultiWriter; -pub const multiWriter = @import("Io/multi_writer.zig").multiWriter; - pub const BitReader = @import("Io/bit_reader.zig").BitReader; pub const bitReader = @import("Io/bit_reader.zig").bitReader; pub const BitWriter = @import("Io/bit_writer.zig").BitWriter; pub const bitWriter = @import("Io/bit_writer.zig").bitWriter; -pub const ChangeDetectionStream = @import("Io/change_detection_stream.zig").ChangeDetectionStream; -pub const changeDetectionStream = @import("Io/change_detection_stream.zig").changeDetectionStream; - -pub const FindByteWriter = @import("Io/find_byte_writer.zig").FindByteWriter; -pub const findByteWriter = @import("Io/find_byte_writer.zig").findByteWriter; - -pub const BufferedAtomicFile = @import("Io/buffered_atomic_file.zig").BufferedAtomicFile; - -pub const StreamSource = @import("Io/stream_source.zig").StreamSource; - pub const tty = @import("Io/tty.zig"); -/// A Writer that doesn't write to anything. +/// Deprecated in favor of `Writer.Discarding`. pub const null_writer: NullWriter = .{ .context = {} }; - +/// Deprecated in favor of `Writer.Discarding`. pub const NullWriter = GenericWriter(void, error{}, dummyWrite); fn dummyWrite(context: void, data: []const u8) error{}!usize { _ = context; @@ -494,54 +488,51 @@ test null_writer { } pub fn poll( - allocator: Allocator, + gpa: Allocator, comptime StreamEnum: type, files: PollFiles(StreamEnum), ) Poller(StreamEnum) { const enum_fields = @typeInfo(StreamEnum).@"enum".fields; - var result: Poller(StreamEnum) = undefined; - - if (is_windows) result.windows = .{ - .first_read_done = false, - .overlapped = [1]windows.OVERLAPPED{ - mem.zeroes(windows.OVERLAPPED), - } ** enum_fields.len, - .small_bufs = undefined, - .active = .{ - .count = 0, - .handles_buf = undefined, - .stream_map = undefined, - }, + var result: Poller(StreamEnum) = .{ + .gpa = gpa, + .readers = @splat(.failing), + .poll_fds = undefined, + .windows = if (is_windows) .{ + .first_read_done = false, + .overlapped = [1]windows.OVERLAPPED{ + std.mem.zeroes(windows.OVERLAPPED), + } ** enum_fields.len, + .small_bufs = undefined, + .active = .{ + .count = 0, + .handles_buf = undefined, + .stream_map = undefined, + }, + } else {}, }; - inline for (0..enum_fields.len) |i| { - result.fifos[i] = .{ - .allocator = allocator, - .buf = &.{}, - .head = 0, - .count = 0, - }; + inline for (enum_fields, 0..) |field, i| { if (is_windows) { - result.windows.active.handles_buf[i] = @field(files, enum_fields[i].name).handle; + result.windows.active.handles_buf[i] = @field(files, field.name).handle; } else { result.poll_fds[i] = .{ - .fd = @field(files, enum_fields[i].name).handle, + .fd = @field(files, field.name).handle, .events = posix.POLL.IN, .revents = undefined, }; } } + return result; } -pub const PollFifo = std.fifo.LinearFifo(u8, .Dynamic); - pub fn Poller(comptime StreamEnum: type) type { return struct { const enum_fields = @typeInfo(StreamEnum).@"enum".fields; const PollFd = if (is_windows) void else posix.pollfd; - fifos: [enum_fields.len]PollFifo, + gpa: Allocator, + readers: [enum_fields.len]Reader, poll_fds: [enum_fields.len]PollFd, windows: if (is_windows) struct { first_read_done: bool, @@ -553,7 +544,7 @@ pub fn Poller(comptime StreamEnum: type) type { stream_map: [enum_fields.len]StreamEnum, pub fn removeAt(self: *@This(), index: u32) void { - std.debug.assert(index < self.count); + assert(index < self.count); for (index + 1..self.count) |i| { self.handles_buf[i - 1] = self.handles_buf[i]; self.stream_map[i - 1] = self.stream_map[i]; @@ -566,13 +557,14 @@ pub fn Poller(comptime StreamEnum: type) type { const Self = @This(); pub fn deinit(self: *Self) void { + const gpa = self.gpa; if (is_windows) { // cancel any pending IO to prevent clobbering OVERLAPPED value for (self.windows.active.handles_buf[0..self.windows.active.count]) |h| { _ = windows.kernel32.CancelIo(h); } } - inline for (&self.fifos) |*q| q.deinit(); + inline for (&self.readers) |*r| gpa.free(r.buffer); self.* = undefined; } @@ -592,21 +584,40 @@ pub fn Poller(comptime StreamEnum: type) type { } } - pub inline fn fifo(self: *Self, comptime which: StreamEnum) *PollFifo { - return &self.fifos[@intFromEnum(which)]; + pub fn reader(self: *Self, which: StreamEnum) *Reader { + return &self.readers[@intFromEnum(which)]; + } + + pub fn toOwnedSlice(self: *Self, which: StreamEnum) error{OutOfMemory}![]u8 { + const gpa = self.gpa; + const r = reader(self, which); + if (r.seek == 0) { + const new = try gpa.realloc(r.buffer, r.end); + r.buffer = &.{}; + r.end = 0; + return new; + } + const new = try gpa.dupe(u8, r.buffered()); + gpa.free(r.buffer); + r.buffer = &.{}; + r.seek = 0; + r.end = 0; + return new; } fn pollWindows(self: *Self, nanoseconds: ?u64) !bool { const bump_amt = 512; + const gpa = self.gpa; if (!self.windows.first_read_done) { var already_read_data = false; for (0..enum_fields.len) |i| { const handle = self.windows.active.handles_buf[i]; switch (try windowsAsyncReadToFifoAndQueueSmallRead( + gpa, handle, &self.windows.overlapped[i], - &self.fifos[i], + &self.readers[i], &self.windows.small_bufs[i], bump_amt, )) { @@ -653,7 +664,7 @@ pub fn Poller(comptime StreamEnum: type) type { const handle = self.windows.active.handles_buf[active_idx]; const overlapped = &self.windows.overlapped[stream_idx]; - const stream_fifo = &self.fifos[stream_idx]; + const stream_reader = &self.readers[stream_idx]; const small_buf = &self.windows.small_bufs[stream_idx]; const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, false)) { @@ -664,12 +675,16 @@ pub fn Poller(comptime StreamEnum: type) type { }, .aborted => unreachable, }; - try stream_fifo.write(small_buf[0..num_bytes_read]); + const buf = small_buf[0..num_bytes_read]; + const dest = try writableSliceGreedyAlloc(stream_reader, gpa, buf.len); + @memcpy(dest[0..buf.len], buf); + advanceBufferEnd(stream_reader, buf.len); switch (try windowsAsyncReadToFifoAndQueueSmallRead( + gpa, handle, overlapped, - stream_fifo, + stream_reader, small_buf, bump_amt, )) { @@ -684,6 +699,7 @@ pub fn Poller(comptime StreamEnum: type) type { } fn pollPosix(self: *Self, nanoseconds: ?u64) !bool { + const gpa = self.gpa; // We ask for ensureUnusedCapacity with this much extra space. This // has more of an effect on small reads because once the reads // start to get larger the amount of space an ArrayList will @@ -703,18 +719,18 @@ pub fn Poller(comptime StreamEnum: type) type { } var keep_polling = false; - inline for (&self.poll_fds, &self.fifos) |*poll_fd, *q| { + for (&self.poll_fds, &self.readers) |*poll_fd, *r| { // Try reading whatever is available before checking the error // conditions. // It's still possible to read after a POLL.HUP is received, // always check if there's some data waiting to be read first. if (poll_fd.revents & posix.POLL.IN != 0) { - const buf = try q.writableWithSize(bump_amt); + const buf = try writableSliceGreedyAlloc(r, gpa, bump_amt); const amt = posix.read(poll_fd.fd, buf) catch |err| switch (err) { error.BrokenPipe => 0, // Handle the same as EOF. else => |e| return e, }; - q.update(amt); + advanceBufferEnd(r, amt); if (amt == 0) { // Remove the fd when the EOF condition is met. poll_fd.fd = -1; @@ -730,146 +746,181 @@ pub fn Poller(comptime StreamEnum: type) type { } return keep_polling; } - }; -} -/// The `ReadFile` docuementation states that `lpNumberOfBytesRead` does not have a meaningful -/// result when using overlapped I/O, but also that it cannot be `null` on Windows 7. For -/// compatibility, we point it to this dummy variables, which we never otherwise access. -/// See: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile -var win_dummy_bytes_read: u32 = undefined; - -/// Read as much data as possible from `handle` with `overlapped`, and write it to the FIFO. Before -/// returning, queue a read into `small_buf` so that `WaitForMultipleObjects` returns when more data -/// is available. `handle` must have no pending asynchronous operation. -fn windowsAsyncReadToFifoAndQueueSmallRead( - handle: windows.HANDLE, - overlapped: *windows.OVERLAPPED, - fifo: *PollFifo, - small_buf: *[128]u8, - bump_amt: usize, -) !enum { empty, populated, closed_populated, closed } { - var read_any_data = false; - while (true) { - const fifo_read_pending = while (true) { - const buf = try fifo.writableWithSize(bump_amt); - const buf_len = math.cast(u32, buf.len) orelse math.maxInt(u32); - - if (0 == windows.kernel32.ReadFile( - handle, - buf.ptr, - buf_len, - &win_dummy_bytes_read, - overlapped, - )) switch (windows.GetLastError()) { - .IO_PENDING => break true, - .BROKEN_PIPE => return if (read_any_data) .closed_populated else .closed, - else => |err| return windows.unexpectedError(err), - }; + /// Returns a slice into the unused capacity of `buffer` with at least + /// `min_len` bytes, extending `buffer` by resizing it with `gpa` as necessary. + /// + /// After calling this function, typically the caller will follow up with a + /// call to `advanceBufferEnd` to report the actual number of bytes buffered. + fn writableSliceGreedyAlloc(r: *Reader, allocator: Allocator, min_len: usize) Allocator.Error![]u8 { + { + const unused = r.buffer[r.end..]; + if (unused.len >= min_len) return unused; + } + if (r.seek > 0) r.rebase(r.buffer.len) catch unreachable; + { + var list: std.ArrayListUnmanaged(u8) = .{ + .items = r.buffer[0..r.end], + .capacity = r.buffer.len, + }; + defer r.buffer = list.allocatedSlice(); + try list.ensureUnusedCapacity(allocator, min_len); + } + const unused = r.buffer[r.end..]; + assert(unused.len >= min_len); + return unused; + } + + /// After writing directly into the unused capacity of `buffer`, this function + /// updates `end` so that users of `Reader` can receive the data. + fn advanceBufferEnd(r: *Reader, n: usize) void { + assert(n <= r.buffer.len - r.end); + r.end += n; + } + + /// The `ReadFile` docuementation states that `lpNumberOfBytesRead` does not have a meaningful + /// result when using overlapped I/O, but also that it cannot be `null` on Windows 7. For + /// compatibility, we point it to this dummy variables, which we never otherwise access. + /// See: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile + var win_dummy_bytes_read: u32 = undefined; + + /// Read as much data as possible from `handle` with `overlapped`, and write it to the FIFO. Before + /// returning, queue a read into `small_buf` so that `WaitForMultipleObjects` returns when more data + /// is available. `handle` must have no pending asynchronous operation. + fn windowsAsyncReadToFifoAndQueueSmallRead( + gpa: Allocator, + handle: windows.HANDLE, + overlapped: *windows.OVERLAPPED, + r: *Reader, + small_buf: *[128]u8, + bump_amt: usize, + ) !enum { empty, populated, closed_populated, closed } { + var read_any_data = false; + while (true) { + const fifo_read_pending = while (true) { + const buf = try writableSliceGreedyAlloc(r, gpa, bump_amt); + const buf_len = math.cast(u32, buf.len) orelse math.maxInt(u32); - const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, false)) { - .success => |n| n, - .closed => return if (read_any_data) .closed_populated else .closed, - .aborted => unreachable, - }; + if (0 == windows.kernel32.ReadFile( + handle, + buf.ptr, + buf_len, + &win_dummy_bytes_read, + overlapped, + )) switch (windows.GetLastError()) { + .IO_PENDING => break true, + .BROKEN_PIPE => return if (read_any_data) .closed_populated else .closed, + else => |err| return windows.unexpectedError(err), + }; - read_any_data = true; - fifo.update(num_bytes_read); + const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, false)) { + .success => |n| n, + .closed => return if (read_any_data) .closed_populated else .closed, + .aborted => unreachable, + }; - if (num_bytes_read == buf_len) { - // We filled the buffer, so there's probably more data available. - continue; - } else { - // We didn't fill the buffer, so assume we're out of data. - // There is no pending read. - break false; - } - }; + read_any_data = true; + advanceBufferEnd(r, num_bytes_read); - if (fifo_read_pending) cancel_read: { - // Cancel the pending read into the FIFO. - _ = windows.kernel32.CancelIo(handle); + if (num_bytes_read == buf_len) { + // We filled the buffer, so there's probably more data available. + continue; + } else { + // We didn't fill the buffer, so assume we're out of data. + // There is no pending read. + break false; + } + }; - // We have to wait for the handle to be signalled, i.e. for the cancellation to complete. - switch (windows.kernel32.WaitForSingleObject(handle, windows.INFINITE)) { - windows.WAIT_OBJECT_0 => {}, - windows.WAIT_FAILED => return windows.unexpectedError(windows.GetLastError()), - else => unreachable, - } + if (fifo_read_pending) cancel_read: { + // Cancel the pending read into the FIFO. + _ = windows.kernel32.CancelIo(handle); - // If it completed before we canceled, make sure to tell the FIFO! - const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, true)) { - .success => |n| n, - .closed => return if (read_any_data) .closed_populated else .closed, - .aborted => break :cancel_read, - }; - read_any_data = true; - fifo.update(num_bytes_read); - } - - // Try to queue the 1-byte read. - if (0 == windows.kernel32.ReadFile( - handle, - small_buf, - small_buf.len, - &win_dummy_bytes_read, - overlapped, - )) switch (windows.GetLastError()) { - .IO_PENDING => { - // 1-byte read pending as intended - return if (read_any_data) .populated else .empty; - }, - .BROKEN_PIPE => return if (read_any_data) .closed_populated else .closed, - else => |err| return windows.unexpectedError(err), - }; + // We have to wait for the handle to be signalled, i.e. for the cancellation to complete. + switch (windows.kernel32.WaitForSingleObject(handle, windows.INFINITE)) { + windows.WAIT_OBJECT_0 => {}, + windows.WAIT_FAILED => return windows.unexpectedError(windows.GetLastError()), + else => unreachable, + } - // We got data back this time. Write it to the FIFO and run the main loop again. - const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, false)) { - .success => |n| n, - .closed => return if (read_any_data) .closed_populated else .closed, - .aborted => unreachable, - }; - try fifo.write(small_buf[0..num_bytes_read]); - read_any_data = true; - } -} + // If it completed before we canceled, make sure to tell the FIFO! + const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, true)) { + .success => |n| n, + .closed => return if (read_any_data) .closed_populated else .closed, + .aborted => break :cancel_read, + }; + read_any_data = true; + advanceBufferEnd(r, num_bytes_read); + } -/// Simple wrapper around `GetOverlappedResult` to determine the result of a `ReadFile` operation. -/// If `!allow_aborted`, then `aborted` is never returned (`OPERATION_ABORTED` is considered unexpected). -/// -/// The `ReadFile` documentation states that the number of bytes read by an overlapped `ReadFile` must be determined using `GetOverlappedResult`, even if the -/// operation immediately returns data: -/// "Use NULL for [lpNumberOfBytesRead] if this is an asynchronous operation to avoid potentially -/// erroneous results." -/// "If `hFile` was opened with `FILE_FLAG_OVERLAPPED`, the following conditions are in effect: [...] -/// The lpNumberOfBytesRead parameter should be set to NULL. Use the GetOverlappedResult function to -/// get the actual number of bytes read." -/// See: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile -fn windowsGetReadResult( - handle: windows.HANDLE, - overlapped: *windows.OVERLAPPED, - allow_aborted: bool, -) !union(enum) { - success: u32, - closed, - aborted, -} { - var num_bytes_read: u32 = undefined; - if (0 == windows.kernel32.GetOverlappedResult( - handle, - overlapped, - &num_bytes_read, - 0, - )) switch (windows.GetLastError()) { - .BROKEN_PIPE => return .closed, - .OPERATION_ABORTED => |err| if (allow_aborted) { - return .aborted; - } else { - return windows.unexpectedError(err); - }, - else => |err| return windows.unexpectedError(err), + // Try to queue the 1-byte read. + if (0 == windows.kernel32.ReadFile( + handle, + small_buf, + small_buf.len, + &win_dummy_bytes_read, + overlapped, + )) switch (windows.GetLastError()) { + .IO_PENDING => { + // 1-byte read pending as intended + return if (read_any_data) .populated else .empty; + }, + .BROKEN_PIPE => return if (read_any_data) .closed_populated else .closed, + else => |err| return windows.unexpectedError(err), + }; + + // We got data back this time. Write it to the FIFO and run the main loop again. + const num_bytes_read = switch (try windowsGetReadResult(handle, overlapped, false)) { + .success => |n| n, + .closed => return if (read_any_data) .closed_populated else .closed, + .aborted => unreachable, + }; + const buf = small_buf[0..num_bytes_read]; + const dest = try writableSliceGreedyAlloc(r, gpa, buf.len); + @memcpy(dest[0..buf.len], buf); + advanceBufferEnd(r, buf.len); + read_any_data = true; + } + } + + /// Simple wrapper around `GetOverlappedResult` to determine the result of a `ReadFile` operation. + /// If `!allow_aborted`, then `aborted` is never returned (`OPERATION_ABORTED` is considered unexpected). + /// + /// The `ReadFile` documentation states that the number of bytes read by an overlapped `ReadFile` must be determined using `GetOverlappedResult`, even if the + /// operation immediately returns data: + /// "Use NULL for [lpNumberOfBytesRead] if this is an asynchronous operation to avoid potentially + /// erroneous results." + /// "If `hFile` was opened with `FILE_FLAG_OVERLAPPED`, the following conditions are in effect: [...] + /// The lpNumberOfBytesRead parameter should be set to NULL. Use the GetOverlappedResult function to + /// get the actual number of bytes read." + /// See: https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile + fn windowsGetReadResult( + handle: windows.HANDLE, + overlapped: *windows.OVERLAPPED, + allow_aborted: bool, + ) !union(enum) { + success: u32, + closed, + aborted, + } { + var num_bytes_read: u32 = undefined; + if (0 == windows.kernel32.GetOverlappedResult( + handle, + overlapped, + &num_bytes_read, + 0, + )) switch (windows.GetLastError()) { + .BROKEN_PIPE => return .closed, + .OPERATION_ABORTED => |err| if (allow_aborted) { + return .aborted; + } else { + return windows.unexpectedError(err); + }, + else => |err| return windows.unexpectedError(err), + }; + return .{ .success = num_bytes_read }; + } }; - return .{ .success = num_bytes_read }; } /// Given an enum, returns a struct with fields of that enum, each field @@ -880,10 +931,10 @@ pub fn PollFiles(comptime StreamEnum: type) type { for (&struct_fields, enum_fields) |*struct_field, enum_field| { struct_field.* = .{ .name = enum_field.name, - .type = fs.File, + .type = std.fs.File, .default_value_ptr = null, .is_comptime = false, - .alignment = @alignOf(fs.File), + .alignment = @alignOf(std.fs.File), }; } return @Type(.{ .@"struct" = .{ @@ -898,16 +949,14 @@ test { _ = Reader; _ = Reader.Limited; _ = Writer; - _ = @import("Io/bit_reader.zig"); - _ = @import("Io/bit_writer.zig"); - _ = @import("Io/buffered_atomic_file.zig"); - _ = @import("Io/buffered_reader.zig"); - _ = @import("Io/buffered_writer.zig"); - _ = @import("Io/c_writer.zig"); - _ = @import("Io/counting_writer.zig"); - _ = @import("Io/counting_reader.zig"); - _ = @import("Io/fixed_buffer_stream.zig"); - _ = @import("Io/seekable_stream.zig"); - _ = @import("Io/stream_source.zig"); + _ = BitReader; + _ = BitWriter; + _ = BufferedReader; + _ = BufferedWriter; + _ = CountingWriter; + _ = CountingReader; + _ = FixedBufferStream; + _ = SeekableStream; + _ = tty; _ = @import("Io/test.zig"); } diff --git a/lib/std/Io/DeprecatedReader.zig b/lib/std/Io/DeprecatedReader.zig index f6cb9f61d5..af1eda8415 100644 --- a/lib/std/Io/DeprecatedReader.zig +++ b/lib/std/Io/DeprecatedReader.zig @@ -373,11 +373,11 @@ pub fn discard(self: Self) anyerror!u64 { } /// Helper for bridging to the new `Reader` API while upgrading. -pub fn adaptToNewApi(self: *const Self) Adapter { +pub fn adaptToNewApi(self: *const Self, buffer: []u8) Adapter { return .{ .derp_reader = self.*, .new_interface = .{ - .buffer = &.{}, + .buffer = buffer, .vtable = &.{ .stream = Adapter.stream }, .seek = 0, .end = 0, @@ -393,10 +393,12 @@ pub const Adapter = struct { fn stream(r: *std.io.Reader, w: *std.io.Writer, limit: std.io.Limit) std.io.Reader.StreamError!usize { const a: *@This() = @alignCast(@fieldParentPtr("new_interface", r)); const buf = limit.slice(try w.writableSliceGreedy(1)); - return a.derp_reader.read(buf) catch |err| { + const n = a.derp_reader.read(buf) catch |err| { a.err = err; return error.ReadFailed; }; + w.advance(n); + return n; } }; diff --git a/lib/std/Io/DeprecatedWriter.zig b/lib/std/Io/DeprecatedWriter.zig index 391b985357..81774b357c 100644 --- a/lib/std/Io/DeprecatedWriter.zig +++ b/lib/std/Io/DeprecatedWriter.zig @@ -100,7 +100,12 @@ pub const Adapter = struct { fn drain(w: *std.io.Writer, data: []const []const u8, splat: usize) std.io.Writer.Error!usize { _ = splat; - const a: *@This() = @fieldParentPtr("new_interface", w); + const a: *@This() = @alignCast(@fieldParentPtr("new_interface", w)); + const buffered = w.buffered(); + if (buffered.len != 0) return w.consume(a.derp_writer.write(buffered) catch |err| { + a.err = err; + return error.WriteFailed; + }); return a.derp_writer.write(data[0]) catch |err| { a.err = err; return error.WriteFailed; diff --git a/lib/std/Io/Reader.zig b/lib/std/Io/Reader.zig index f25e113522..da9e01dd2c 100644 --- a/lib/std/Io/Reader.zig +++ b/lib/std/Io/Reader.zig @@ -67,6 +67,18 @@ pub const VTable = struct { /// /// This function is only called when `buffer` is empty. discard: *const fn (r: *Reader, limit: Limit) Error!usize = defaultDiscard, + + /// Ensures `capacity` more data can be buffered without rebasing. + /// + /// Asserts `capacity` is within buffer capacity, or that the stream ends + /// within `capacity` bytes. + /// + /// Only called when `capacity` cannot fit into the unused capacity of + /// `buffer`. + /// + /// The default implementation moves buffered data to the start of + /// `buffer`, setting `seek` to zero, and cannot fail. + rebase: *const fn (r: *Reader, capacity: usize) RebaseError!void = defaultRebase, }; pub const StreamError = error{ @@ -97,6 +109,10 @@ pub const ShortError = error{ ReadFailed, }; +pub const RebaseError = error{ + EndOfStream, +}; + pub const failing: Reader = .{ .vtable = &.{ .stream = failingStream, @@ -122,6 +138,7 @@ pub fn fixed(buffer: []const u8) Reader { .vtable = &.{ .stream = endingStream, .discard = endingDiscard, + .rebase = endingRebase, }, // This cast is safe because all potential writes to it will instead // return `error.EndOfStream`. @@ -179,6 +196,38 @@ pub fn streamExact(r: *Reader, w: *Writer, n: usize) StreamError!void { while (remaining != 0) remaining -= try r.stream(w, .limited(remaining)); } +/// "Pump" exactly `n` bytes from the reader to the writer. +pub fn streamExact64(r: *Reader, w: *Writer, n: u64) StreamError!void { + var remaining = n; + while (remaining != 0) remaining -= try r.stream(w, .limited64(remaining)); +} + +/// "Pump" exactly `n` bytes from the reader to the writer. +/// +/// When draining `w`, ensures that at least `preserve_len` bytes remain +/// buffered. +/// +/// Asserts `Writer.buffer` capacity exceeds `preserve_len`. +pub fn streamExactPreserve(r: *Reader, w: *Writer, preserve_len: usize, n: usize) StreamError!void { + if (w.end + n <= w.buffer.len) { + @branchHint(.likely); + return streamExact(r, w, n); + } + // If `n` is large, we can ignore `preserve_len` up to a point. + var remaining = n; + while (remaining > preserve_len) { + assert(remaining != 0); + remaining -= try r.stream(w, .limited(remaining - preserve_len)); + if (w.end + remaining <= w.buffer.len) return streamExact(r, w, remaining); + } + // All the next bytes received must be preserved. + if (preserve_len < w.end) { + @memmove(w.buffer[0..preserve_len], w.buffer[w.end - preserve_len ..][0..preserve_len]); + w.end = preserve_len; + } + return streamExact(r, w, remaining); +} + /// "Pump" data from the reader to the writer, handling `error.EndOfStream` as /// a success case. /// @@ -234,7 +283,7 @@ pub fn allocRemaining(r: *Reader, gpa: Allocator, limit: Limit) LimitedAllocErro /// such case, the next byte that would be read will be the first one to exceed /// `limit`, and all preceeding bytes have been appended to `list`. /// -/// Asserts `buffer` has nonzero capacity. +/// If `limit` is not `Limit.unlimited`, asserts `buffer` has nonzero capacity. /// /// See also: /// * `allocRemaining` @@ -245,7 +294,7 @@ pub fn appendRemaining( list: *std.ArrayListAlignedUnmanaged(u8, alignment), limit: Limit, ) LimitedAllocError!void { - assert(r.buffer.len != 0); // Needed to detect limit exceeded without losing data. + if (limit != .unlimited) assert(r.buffer.len != 0); // Needed to detect limit exceeded without losing data. const buffer_contents = r.buffer[r.seek..r.end]; const copy_len = limit.minInt(buffer_contents.len); try list.appendSlice(gpa, r.buffer[0..copy_len]); @@ -748,11 +797,8 @@ pub fn peekDelimiterInclusive(r: *Reader, delimiter: u8) DelimiterError![]u8 { @branchHint(.likely); return buffer[seek .. end + 1]; } - if (r.vtable.stream == &endingStream) { - // Protect the `@constCast` of `fixed`. - return error.EndOfStream; - } - r.rebase(); + // TODO take a parameter for max search length rather than relying on buffer capacity + try rebase(r, r.buffer.len); while (r.buffer.len - r.end != 0) { const end_cap = r.buffer[r.end..]; var writer: Writer = .fixed(end_cap); @@ -1018,11 +1064,7 @@ fn fillUnbuffered(r: *Reader, n: usize) Error!void { }; if (r.seek + n <= r.end) return; }; - if (r.vtable.stream == &endingStream) { - // Protect the `@constCast` of `fixed`. - return error.EndOfStream; - } - rebaseCapacity(r, n); + try rebase(r, n); var writer: Writer = .{ .buffer = r.buffer, .vtable = &.{ .drain = Writer.fixedDrain }, @@ -1042,7 +1084,7 @@ fn fillUnbuffered(r: *Reader, n: usize) Error!void { /// /// Asserts buffer capacity is at least 1. pub fn fillMore(r: *Reader) Error!void { - rebaseCapacity(r, 1); + try rebase(r, 1); var writer: Writer = .{ .buffer = r.buffer, .end = r.end, @@ -1219,7 +1261,7 @@ pub fn takeLeb128(r: *Reader, comptime Result: type) TakeLeb128Error!Result { pub fn expandTotalCapacity(r: *Reader, allocator: Allocator, n: usize) Allocator.Error!void { if (n <= r.buffer.len) return; - if (r.seek > 0) rebase(r); + if (r.seek > 0) rebase(r, r.buffer.len); var list: ArrayList(u8) = .{ .items = r.buffer[0..r.end], .capacity = r.buffer.len, @@ -1235,37 +1277,6 @@ pub fn fillAlloc(r: *Reader, allocator: Allocator, n: usize) FillAllocError!void return fill(r, n); } -/// Returns a slice into the unused capacity of `buffer` with at least -/// `min_len` bytes, extending `buffer` by resizing it with `gpa` as necessary. -/// -/// After calling this function, typically the caller will follow up with a -/// call to `advanceBufferEnd` to report the actual number of bytes buffered. -pub fn writableSliceGreedyAlloc(r: *Reader, allocator: Allocator, min_len: usize) Allocator.Error![]u8 { - { - const unused = r.buffer[r.end..]; - if (unused.len >= min_len) return unused; - } - if (r.seek > 0) rebase(r); - { - var list: ArrayList(u8) = .{ - .items = r.buffer[0..r.end], - .capacity = r.buffer.len, - }; - defer r.buffer = list.allocatedSlice(); - try list.ensureUnusedCapacity(allocator, min_len); - } - const unused = r.buffer[r.end..]; - assert(unused.len >= min_len); - return unused; -} - -/// After writing directly into the unused capacity of `buffer`, this function -/// updates `end` so that users of `Reader` can receive the data. -pub fn advanceBufferEnd(r: *Reader, n: usize) void { - assert(n <= r.buffer.len - r.end); - r.end += n; -} - fn takeMultipleOf7Leb128(r: *Reader, comptime Result: type) TakeLeb128Error!Result { const result_info = @typeInfo(Result).int; comptime assert(result_info.bits % 7 == 0); @@ -1296,37 +1307,20 @@ fn takeMultipleOf7Leb128(r: *Reader, comptime Result: type) TakeLeb128Error!Resu } } -/// Left-aligns data such that `r.seek` becomes zero. -/// -/// If `r.seek` is not already zero then `buffer` is mutated, making it illegal -/// to call this function with a const-casted `buffer`, such as in the case of -/// `fixed`. This issue can be avoided: -/// * in implementations, by attempting a read before a rebase, in which -/// case the read will return `error.EndOfStream`, preventing the rebase. -/// * in usage, by copying into a mutable buffer before initializing `fixed`. -pub fn rebase(r: *Reader) void { - if (r.seek == 0) return; +/// Ensures `capacity` more data can be buffered without rebasing. +pub fn rebase(r: *Reader, capacity: usize) RebaseError!void { + if (r.end + capacity <= r.buffer.len) return; + return r.vtable.rebase(r, capacity); +} + +pub fn defaultRebase(r: *Reader, capacity: usize) RebaseError!void { + if (r.end <= r.buffer.len - capacity) return; const data = r.buffer[r.seek..r.end]; @memmove(r.buffer[0..data.len], data); r.seek = 0; r.end = data.len; } -/// Ensures `capacity` more data can be buffered without rebasing, by rebasing -/// if necessary. -/// -/// Asserts `capacity` is within the buffer capacity. -/// -/// If the rebase occurs then `buffer` is mutated, making it illegal to call -/// this function with a const-casted `buffer`, such as in the case of `fixed`. -/// This issue can be avoided: -/// * in implementations, by attempting a read before a rebase, in which -/// case the read will return `error.EndOfStream`, preventing the rebase. -/// * in usage, by copying into a mutable buffer before initializing `fixed`. -pub fn rebaseCapacity(r: *Reader, capacity: usize) void { - if (r.end > r.buffer.len - capacity) rebase(r); -} - /// Advances the stream and decreases the size of the storage buffer by `n`, /// returning the range of bytes no longer accessible by `r`. /// @@ -1682,6 +1676,12 @@ fn endingDiscard(r: *Reader, limit: Limit) Error!usize { return error.EndOfStream; } +fn endingRebase(r: *Reader, capacity: usize) RebaseError!void { + _ = r; + _ = capacity; + return error.EndOfStream; +} + fn failingStream(r: *Reader, w: *Writer, limit: Limit) StreamError!usize { _ = r; _ = w; diff --git a/lib/std/Io/Writer.zig b/lib/std/Io/Writer.zig index 0723073592..06a6534071 100644 --- a/lib/std/Io/Writer.zig +++ b/lib/std/Io/Writer.zig @@ -256,10 +256,10 @@ test "fixed buffer flush" { try testing.expectEqual(10, buffer[0]); } -/// Calls `VTable.drain` but hides the last `preserve_length` bytes from the +/// Calls `VTable.drain` but hides the last `preserve_len` bytes from the /// implementation, keeping them buffered. -pub fn drainPreserve(w: *Writer, preserve_length: usize) Error!void { - const temp_end = w.end -| preserve_length; +pub fn drainPreserve(w: *Writer, preserve_len: usize) Error!void { + const temp_end = w.end -| preserve_len; const preserved = w.buffer[temp_end..w.end]; w.end = temp_end; defer w.end += preserved.len; @@ -310,24 +310,38 @@ pub fn writableSliceGreedy(w: *Writer, minimum_length: usize) Error![]u8 { } /// Asserts the provided buffer has total capacity enough for `minimum_length` -/// and `preserve_length` combined. +/// and `preserve_len` combined. /// /// Does not `advance` the buffer end position. /// -/// When draining the buffer, ensures that at least `preserve_length` bytes +/// When draining the buffer, ensures that at least `preserve_len` bytes /// remain buffered. /// -/// If `preserve_length` is zero, this is equivalent to `writableSliceGreedy`. -pub fn writableSliceGreedyPreserve(w: *Writer, preserve_length: usize, minimum_length: usize) Error![]u8 { - assert(w.buffer.len >= preserve_length + minimum_length); +/// If `preserve_len` is zero, this is equivalent to `writableSliceGreedy`. +pub fn writableSliceGreedyPreserve(w: *Writer, preserve_len: usize, minimum_length: usize) Error![]u8 { + assert(w.buffer.len >= preserve_len + minimum_length); while (w.buffer.len - w.end < minimum_length) { - try drainPreserve(w, preserve_length); + try drainPreserve(w, preserve_len); } else { @branchHint(.likely); return w.buffer[w.end..]; } } +/// Asserts the provided buffer has total capacity enough for `len`. +/// +/// Advances the buffer end position by `len`. +/// +/// When draining the buffer, ensures that at least `preserve_len` bytes +/// remain buffered. +/// +/// If `preserve_len` is zero, this is equivalent to `writableSlice`. +pub fn writableSlicePreserve(w: *Writer, preserve_len: usize, len: usize) Error![]u8 { + const big_slice = try w.writableSliceGreedyPreserve(preserve_len, len); + advance(w, len); + return big_slice[0..len]; +} + pub const WritableVectorIterator = struct { first: []u8, middle: []const []u8 = &.{}, @@ -523,16 +537,16 @@ pub fn write(w: *Writer, bytes: []const u8) Error!usize { return w.vtable.drain(w, &.{bytes}, 1); } -/// Asserts `buffer` capacity exceeds `preserve_length`. -pub fn writePreserve(w: *Writer, preserve_length: usize, bytes: []const u8) Error!usize { - assert(preserve_length <= w.buffer.len); +/// Asserts `buffer` capacity exceeds `preserve_len`. +pub fn writePreserve(w: *Writer, preserve_len: usize, bytes: []const u8) Error!usize { + assert(preserve_len <= w.buffer.len); if (w.end + bytes.len <= w.buffer.len) { @branchHint(.likely); @memcpy(w.buffer[w.end..][0..bytes.len], bytes); w.end += bytes.len; return bytes.len; } - const temp_end = w.end -| preserve_length; + const temp_end = w.end -| preserve_len; const preserved = w.buffer[temp_end..w.end]; w.end = temp_end; defer w.end += preserved.len; @@ -552,13 +566,13 @@ pub fn writeAll(w: *Writer, bytes: []const u8) Error!void { /// Calls `drain` as many times as necessary such that all of `bytes` are /// transferred. /// -/// When draining the buffer, ensures that at least `preserve_length` bytes +/// When draining the buffer, ensures that at least `preserve_len` bytes /// remain buffered. /// -/// Asserts `buffer` capacity exceeds `preserve_length`. -pub fn writeAllPreserve(w: *Writer, preserve_length: usize, bytes: []const u8) Error!void { +/// Asserts `buffer` capacity exceeds `preserve_len`. +pub fn writeAllPreserve(w: *Writer, preserve_len: usize, bytes: []const u8) Error!void { var index: usize = 0; - while (index < bytes.len) index += try w.writePreserve(preserve_length, bytes[index..]); + while (index < bytes.len) index += try w.writePreserve(preserve_len, bytes[index..]); } /// Renders fmt string with args, calling `writer` with slices of bytes. @@ -761,11 +775,11 @@ pub fn writeByte(w: *Writer, byte: u8) Error!void { } } -/// When draining the buffer, ensures that at least `preserve_length` bytes +/// When draining the buffer, ensures that at least `preserve_len` bytes /// remain buffered. -pub fn writeBytePreserve(w: *Writer, preserve_length: usize, byte: u8) Error!void { +pub fn writeBytePreserve(w: *Writer, preserve_len: usize, byte: u8) Error!void { while (w.buffer.len - w.end == 0) { - try drainPreserve(w, preserve_length); + try drainPreserve(w, preserve_len); } else { @branchHint(.likely); w.buffer[w.end] = byte; @@ -788,10 +802,42 @@ test splatByteAll { try testing.expectEqualStrings("7" ** 45, aw.writer.buffered()); } +pub fn splatBytePreserve(w: *Writer, preserve_len: usize, byte: u8, n: usize) Error!void { + const new_end = w.end + n; + if (new_end <= w.buffer.len) { + @memset(w.buffer[w.end..][0..n], byte); + w.end = new_end; + return; + } + // If `n` is large, we can ignore `preserve_len` up to a point. + var remaining = n; + while (remaining > preserve_len) { + assert(remaining != 0); + remaining -= try splatByte(w, byte, remaining - preserve_len); + if (w.end + remaining <= w.buffer.len) { + @memset(w.buffer[w.end..][0..remaining], byte); + w.end += remaining; + return; + } + } + // All the next bytes received must be preserved. + if (preserve_len < w.end) { + @memmove(w.buffer[0..preserve_len], w.buffer[w.end - preserve_len ..][0..preserve_len]); + w.end = preserve_len; + } + while (remaining > 0) remaining -= try w.splatByte(byte, remaining); +} + /// Writes the same byte many times, allowing short writes. /// /// Does maximum of one underlying `VTable.drain`. pub fn splatByte(w: *Writer, byte: u8, n: usize) Error!usize { + if (w.end + n <= w.buffer.len) { + @branchHint(.likely); + @memset(w.buffer[w.end..][0..n], byte); + w.end += n; + return n; + } return writeSplat(w, &.{&.{byte}}, n); } @@ -801,9 +847,10 @@ pub fn splatBytesAll(w: *Writer, bytes: []const u8, splat: usize) Error!void { var remaining_bytes: usize = bytes.len * splat; remaining_bytes -= try w.splatBytes(bytes, splat); while (remaining_bytes > 0) { - const leftover = remaining_bytes % bytes.len; - const buffers: [2][]const u8 = .{ bytes[bytes.len - leftover ..], bytes }; - remaining_bytes -= try w.writeSplat(&buffers, splat); + const leftover_splat = remaining_bytes / bytes.len; + const leftover_bytes = remaining_bytes % bytes.len; + const buffers: [2][]const u8 = .{ bytes[bytes.len - leftover_bytes ..], bytes }; + remaining_bytes -= try w.writeSplat(&buffers, leftover_splat); } } @@ -1564,17 +1611,23 @@ pub fn printFloatHexOptions(w: *Writer, value: anytype, options: std.fmt.Number) } pub fn printFloatHex(w: *Writer, value: anytype, case: std.fmt.Case, opt_precision: ?usize) Error!void { - if (std.math.signbit(value)) try w.writeByte('-'); - if (std.math.isNan(value)) return w.writeAll(switch (case) { + const v = switch (@TypeOf(value)) { + // comptime_float internally is a f128; this preserves precision. + comptime_float => @as(f128, value), + else => value, + }; + + if (std.math.signbit(v)) try w.writeByte('-'); + if (std.math.isNan(v)) return w.writeAll(switch (case) { .lower => "nan", .upper => "NAN", }); - if (std.math.isInf(value)) return w.writeAll(switch (case) { + if (std.math.isInf(v)) return w.writeAll(switch (case) { .lower => "inf", .upper => "INF", }); - const T = @TypeOf(value); + const T = @TypeOf(v); const TU = std.meta.Int(.unsigned, @bitSizeOf(T)); const mantissa_bits = std.math.floatMantissaBits(T); @@ -1584,7 +1637,7 @@ pub fn printFloatHex(w: *Writer, value: anytype, case: std.fmt.Case, opt_precisi const exponent_mask = (1 << exponent_bits) - 1; const exponent_bias = (1 << (exponent_bits - 1)) - 1; - const as_bits: TU = @bitCast(value); + const as_bits: TU = @bitCast(v); var mantissa = as_bits & mantissa_mask; var exponent: i32 = @as(u16, @truncate((as_bits >> mantissa_bits) & exponent_mask)); @@ -2239,6 +2292,10 @@ pub const Discarding = struct { pub fn sendFile(w: *Writer, file_reader: *File.Reader, limit: Limit) FileError!usize { if (File.Handle == void) return error.Unimplemented; + switch (builtin.zig_backend) { + else => {}, + .stage2_aarch64 => return error.Unimplemented, + } const d: *Discarding = @alignCast(@fieldParentPtr("writer", w)); d.count += w.end; w.end = 0; diff --git a/lib/std/Io/buffered_atomic_file.zig b/lib/std/Io/buffered_atomic_file.zig deleted file mode 100644 index 48510bde52..0000000000 --- a/lib/std/Io/buffered_atomic_file.zig +++ /dev/null @@ -1,55 +0,0 @@ -const std = @import("../std.zig"); -const mem = std.mem; -const fs = std.fs; -const File = std.fs.File; - -pub const BufferedAtomicFile = struct { - atomic_file: fs.AtomicFile, - file_writer: File.Writer, - buffered_writer: BufferedWriter, - allocator: mem.Allocator, - - pub const buffer_size = 4096; - pub const BufferedWriter = std.io.BufferedWriter(buffer_size, File.Writer); - pub const Writer = std.io.GenericWriter(*BufferedWriter, BufferedWriter.Error, BufferedWriter.write); - - /// TODO when https://github.com/ziglang/zig/issues/2761 is solved - /// this API will not need an allocator - pub fn create( - allocator: mem.Allocator, - dir: fs.Dir, - dest_path: []const u8, - atomic_file_options: fs.Dir.AtomicFileOptions, - ) !*BufferedAtomicFile { - var self = try allocator.create(BufferedAtomicFile); - self.* = BufferedAtomicFile{ - .atomic_file = undefined, - .file_writer = undefined, - .buffered_writer = undefined, - .allocator = allocator, - }; - errdefer allocator.destroy(self); - - self.atomic_file = try dir.atomicFile(dest_path, atomic_file_options); - errdefer self.atomic_file.deinit(); - - self.file_writer = self.atomic_file.file.deprecatedWriter(); - self.buffered_writer = .{ .unbuffered_writer = self.file_writer }; - return self; - } - - /// always call destroy, even after successful finish() - pub fn destroy(self: *BufferedAtomicFile) void { - self.atomic_file.deinit(); - self.allocator.destroy(self); - } - - pub fn finish(self: *BufferedAtomicFile) !void { - try self.buffered_writer.flush(); - try self.atomic_file.finish(); - } - - pub fn writer(self: *BufferedAtomicFile) Writer { - return .{ .context = &self.buffered_writer }; - } -}; diff --git a/lib/std/Io/c_writer.zig b/lib/std/Io/c_writer.zig deleted file mode 100644 index 30d0cabcf5..0000000000 --- a/lib/std/Io/c_writer.zig +++ /dev/null @@ -1,44 +0,0 @@ -const std = @import("../std.zig"); -const builtin = @import("builtin"); -const io = std.io; -const testing = std.testing; - -pub const CWriter = io.GenericWriter(*std.c.FILE, std.fs.File.WriteError, cWriterWrite); - -pub fn cWriter(c_file: *std.c.FILE) CWriter { - return .{ .context = c_file }; -} - -fn cWriterWrite(c_file: *std.c.FILE, bytes: []const u8) std.fs.File.WriteError!usize { - const amt_written = std.c.fwrite(bytes.ptr, 1, bytes.len, c_file); - if (amt_written >= 0) return amt_written; - switch (@as(std.c.E, @enumFromInt(std.c._errno().*))) { - .SUCCESS => unreachable, - .INVAL => unreachable, - .FAULT => unreachable, - .AGAIN => unreachable, // this is a blocking API - .BADF => unreachable, // always a race condition - .DESTADDRREQ => unreachable, // connect was never called - .DQUOT => return error.DiskQuota, - .FBIG => return error.FileTooBig, - .IO => return error.InputOutput, - .NOSPC => return error.NoSpaceLeft, - .PERM => return error.PermissionDenied, - .PIPE => return error.BrokenPipe, - else => |err| return std.posix.unexpectedErrno(err), - } -} - -test cWriter { - if (!builtin.link_libc or builtin.os.tag == .wasi) return error.SkipZigTest; - - const filename = "tmp_io_test_file.txt"; - const out_file = std.c.fopen(filename, "w") orelse return error.UnableToOpenTestFile; - defer { - _ = std.c.fclose(out_file); - std.fs.cwd().deleteFileZ(filename) catch {}; - } - - const writer = cWriter(out_file); - try writer.print("hi: {}\n", .{@as(i32, 123)}); -} diff --git a/lib/std/Io/change_detection_stream.zig b/lib/std/Io/change_detection_stream.zig deleted file mode 100644 index d9da1c4a0e..0000000000 --- a/lib/std/Io/change_detection_stream.zig +++ /dev/null @@ -1,55 +0,0 @@ -const std = @import("../std.zig"); -const io = std.io; -const mem = std.mem; -const assert = std.debug.assert; - -/// Used to detect if the data written to a stream differs from a source buffer -pub fn ChangeDetectionStream(comptime WriterType: type) type { - return struct { - const Self = @This(); - pub const Error = WriterType.Error; - pub const Writer = io.GenericWriter(*Self, Error, write); - - anything_changed: bool, - underlying_writer: WriterType, - source_index: usize, - source: []const u8, - - pub fn writer(self: *Self) Writer { - return .{ .context = self }; - } - - fn write(self: *Self, bytes: []const u8) Error!usize { - if (!self.anything_changed) { - const end = self.source_index + bytes.len; - if (end > self.source.len) { - self.anything_changed = true; - } else { - const src_slice = self.source[self.source_index..end]; - self.source_index += bytes.len; - if (!mem.eql(u8, bytes, src_slice)) { - self.anything_changed = true; - } - } - } - - return self.underlying_writer.write(bytes); - } - - pub fn changeDetected(self: *Self) bool { - return self.anything_changed or (self.source_index != self.source.len); - } - }; -} - -pub fn changeDetectionStream( - source: []const u8, - underlying_writer: anytype, -) ChangeDetectionStream(@TypeOf(underlying_writer)) { - return ChangeDetectionStream(@TypeOf(underlying_writer)){ - .anything_changed = false, - .underlying_writer = underlying_writer, - .source_index = 0, - .source = source, - }; -} diff --git a/lib/std/Io/find_byte_writer.zig b/lib/std/Io/find_byte_writer.zig deleted file mode 100644 index fe6836f603..0000000000 --- a/lib/std/Io/find_byte_writer.zig +++ /dev/null @@ -1,40 +0,0 @@ -const std = @import("../std.zig"); -const io = std.io; -const assert = std.debug.assert; - -/// A Writer that returns whether the given character has been written to it. -/// The contents are not written to anything. -pub fn FindByteWriter(comptime UnderlyingWriter: type) type { - return struct { - const Self = @This(); - pub const Error = UnderlyingWriter.Error; - pub const Writer = io.GenericWriter(*Self, Error, write); - - underlying_writer: UnderlyingWriter, - byte_found: bool, - byte: u8, - - pub fn writer(self: *Self) Writer { - return .{ .context = self }; - } - - fn write(self: *Self, bytes: []const u8) Error!usize { - if (!self.byte_found) { - self.byte_found = blk: { - for (bytes) |b| - if (b == self.byte) break :blk true; - break :blk false; - }; - } - return self.underlying_writer.write(bytes); - } - }; -} - -pub fn findByteWriter(byte: u8, underlying_writer: anytype) FindByteWriter(@TypeOf(underlying_writer)) { - return FindByteWriter(@TypeOf(underlying_writer)){ - .underlying_writer = underlying_writer, - .byte = byte, - .byte_found = false, - }; -} diff --git a/lib/std/Io/multi_writer.zig b/lib/std/Io/multi_writer.zig deleted file mode 100644 index 20e9e782de..0000000000 --- a/lib/std/Io/multi_writer.zig +++ /dev/null @@ -1,53 +0,0 @@ -const std = @import("../std.zig"); -const io = std.io; - -/// Takes a tuple of streams, and constructs a new stream that writes to all of them -pub fn MultiWriter(comptime Writers: type) type { - comptime var ErrSet = error{}; - inline for (@typeInfo(Writers).@"struct".fields) |field| { - const StreamType = field.type; - ErrSet = ErrSet || StreamType.Error; - } - - return struct { - const Self = @This(); - - streams: Writers, - - pub const Error = ErrSet; - pub const Writer = io.GenericWriter(*Self, Error, write); - - pub fn writer(self: *Self) Writer { - return .{ .context = self }; - } - - pub fn write(self: *Self, bytes: []const u8) Error!usize { - inline for (self.streams) |stream| - try stream.writeAll(bytes); - return bytes.len; - } - }; -} - -pub fn multiWriter(streams: anytype) MultiWriter(@TypeOf(streams)) { - return .{ .streams = streams }; -} - -const testing = std.testing; - -test "MultiWriter" { - var tmp = testing.tmpDir(.{}); - defer tmp.cleanup(); - var f = try tmp.dir.createFile("t.txt", .{}); - - var buf1: [255]u8 = undefined; - var fbs1 = io.fixedBufferStream(&buf1); - var buf2: [255]u8 = undefined; - var stream = multiWriter(.{ fbs1.writer(), f.writer() }); - - try stream.writer().print("HI", .{}); - f.close(); - - try testing.expectEqualSlices(u8, "HI", fbs1.getWritten()); - try testing.expectEqualSlices(u8, "HI", try tmp.dir.readFile("t.txt", &buf2)); -} diff --git a/lib/std/Io/stream_source.zig b/lib/std/Io/stream_source.zig deleted file mode 100644 index 2a3527e479..0000000000 --- a/lib/std/Io/stream_source.zig +++ /dev/null @@ -1,127 +0,0 @@ -const std = @import("../std.zig"); -const builtin = @import("builtin"); -const io = std.io; - -/// Provides `io.GenericReader`, `io.GenericWriter`, and `io.SeekableStream` for in-memory buffers as -/// well as files. -/// For memory sources, if the supplied byte buffer is const, then `io.GenericWriter` is not available. -/// The error set of the stream functions is the error set of the corresponding file functions. -pub const StreamSource = union(enum) { - // TODO: expose UEFI files to std.os in a way that allows this to be true - const has_file = (builtin.os.tag != .freestanding and builtin.os.tag != .uefi); - - /// The stream access is redirected to this buffer. - buffer: io.FixedBufferStream([]u8), - - /// The stream access is redirected to this buffer. - /// Writing to the source will always yield `error.AccessDenied`. - const_buffer: io.FixedBufferStream([]const u8), - - /// The stream access is redirected to this file. - /// On freestanding, this must never be initialized! - file: if (has_file) std.fs.File else void, - - pub const ReadError = io.FixedBufferStream([]u8).ReadError || (if (has_file) std.fs.File.ReadError else error{}); - pub const WriteError = error{AccessDenied} || io.FixedBufferStream([]u8).WriteError || (if (has_file) std.fs.File.WriteError else error{}); - pub const SeekError = io.FixedBufferStream([]u8).SeekError || (if (has_file) std.fs.File.SeekError else error{}); - pub const GetSeekPosError = io.FixedBufferStream([]u8).GetSeekPosError || (if (has_file) std.fs.File.GetSeekPosError else error{}); - - pub const Reader = io.GenericReader(*StreamSource, ReadError, read); - pub const Writer = io.GenericWriter(*StreamSource, WriteError, write); - pub const SeekableStream = io.SeekableStream( - *StreamSource, - SeekError, - GetSeekPosError, - seekTo, - seekBy, - getPos, - getEndPos, - ); - - pub fn read(self: *StreamSource, dest: []u8) ReadError!usize { - switch (self.*) { - .buffer => |*x| return x.read(dest), - .const_buffer => |*x| return x.read(dest), - .file => |x| if (!has_file) unreachable else return x.read(dest), - } - } - - pub fn write(self: *StreamSource, bytes: []const u8) WriteError!usize { - switch (self.*) { - .buffer => |*x| return x.write(bytes), - .const_buffer => return error.AccessDenied, - .file => |x| if (!has_file) unreachable else return x.write(bytes), - } - } - - pub fn seekTo(self: *StreamSource, pos: u64) SeekError!void { - switch (self.*) { - .buffer => |*x| return x.seekTo(pos), - .const_buffer => |*x| return x.seekTo(pos), - .file => |x| if (!has_file) unreachable else return x.seekTo(pos), - } - } - - pub fn seekBy(self: *StreamSource, amt: i64) SeekError!void { - switch (self.*) { - .buffer => |*x| return x.seekBy(amt), - .const_buffer => |*x| return x.seekBy(amt), - .file => |x| if (!has_file) unreachable else return x.seekBy(amt), - } - } - - pub fn getEndPos(self: *StreamSource) GetSeekPosError!u64 { - switch (self.*) { - .buffer => |*x| return x.getEndPos(), - .const_buffer => |*x| return x.getEndPos(), - .file => |x| if (!has_file) unreachable else return x.getEndPos(), - } - } - - pub fn getPos(self: *StreamSource) GetSeekPosError!u64 { - switch (self.*) { - .buffer => |*x| return x.getPos(), - .const_buffer => |*x| return x.getPos(), - .file => |x| if (!has_file) unreachable else return x.getPos(), - } - } - - pub fn reader(self: *StreamSource) Reader { - return .{ .context = self }; - } - - pub fn writer(self: *StreamSource) Writer { - return .{ .context = self }; - } - - pub fn seekableStream(self: *StreamSource) SeekableStream { - return .{ .context = self }; - } -}; - -test "refs" { - std.testing.refAllDecls(StreamSource); -} - -test "mutable buffer" { - var buffer: [64]u8 = undefined; - var source = StreamSource{ .buffer = std.io.fixedBufferStream(&buffer) }; - - var writer = source.writer(); - - try writer.writeAll("Hello, World!"); - - try std.testing.expectEqualStrings("Hello, World!", source.buffer.getWritten()); -} - -test "const buffer" { - const buffer: [64]u8 = "Hello, World!".* ++ ([1]u8{0xAA} ** 51); - var source = StreamSource{ .const_buffer = std.io.fixedBufferStream(&buffer) }; - - var reader = source.reader(); - - var dst_buffer: [13]u8 = undefined; - try reader.readNoEof(&dst_buffer); - - try std.testing.expectEqualStrings("Hello, World!", &dst_buffer); -} diff --git a/lib/std/Progress.zig b/lib/std/Progress.zig index 2634553d25..2806c1a09c 100644 --- a/lib/std/Progress.zig +++ b/lib/std/Progress.zig @@ -25,6 +25,7 @@ redraw_event: std.Thread.ResetEvent, /// Accessed atomically. done: bool, need_clear: bool, +status: Status, refresh_rate_ns: u64, initial_delay_ns: u64, @@ -47,6 +48,22 @@ node_freelist: Freelist, /// value may at times temporarily exceed the node count. node_end_index: u32, +pub const Status = enum { + /// Indicates the application is progressing towards completion of a task. + /// Unless the application is interactive, this is the only status the + /// program will ever have! + working, + /// The application has completed an operation, and is now waiting for user + /// input rather than calling exit(0). + success, + /// The application encountered an error, and is now waiting for user input + /// rather than calling exit(1). + failure, + /// The application encountered at least one error, but is still working on + /// more tasks. + failure_working, +}; + const Freelist = packed struct(u32) { head: Node.OptionalIndex, /// Whenever `node_freelist` is added to, this generation is incremented @@ -383,6 +400,7 @@ var global_progress: Progress = .{ .draw_buffer = undefined, .done = false, .need_clear = false, + .status = .working, .node_parents = &node_parents_buffer, .node_storage = &node_storage_buffer, @@ -408,6 +426,9 @@ pub const have_ipc = switch (builtin.os.tag) { const noop_impl = builtin.single_threaded or switch (builtin.os.tag) { .wasi, .freestanding => true, else => false, +} or switch (builtin.zig_backend) { + .stage2_aarch64 => true, + else => false, }; /// Initializes a global Progress instance. @@ -495,6 +516,11 @@ pub fn start(options: Options) Node { return root_node; } +pub fn setStatus(new_status: Status) void { + if (noop_impl) return; + @atomicStore(Status, &global_progress.status, new_status, .monotonic); +} + /// Returns whether a resize is needed to learn the terminal size. fn wait(timeout_ns: u64) bool { const resize_flag = if (global_progress.redraw_event.timedWait(timeout_ns)) |_| @@ -675,6 +701,14 @@ const save = "\x1b7"; const restore = "\x1b8"; const finish_sync = "\x1b[?2026l"; +const progress_remove = "\x1b]9;4;0\x07"; +const @"progress_normal {d}" = "\x1b]9;4;1;{d}\x07"; +const @"progress_error {d}" = "\x1b]9;4;2;{d}\x07"; +const progress_pulsing = "\x1b]9;4;3\x07"; +const progress_pulsing_error = "\x1b]9;4;2\x07"; +const progress_normal_100 = "\x1b]9;4;1;100\x07"; +const progress_error_100 = "\x1b]9;4;2;100\x07"; + const TreeSymbol = enum { /// ├─ tee, @@ -754,10 +788,10 @@ fn appendTreeSymbol(symbol: TreeSymbol, buf: []u8, start_i: usize) usize { } fn clearWrittenWithEscapeCodes() anyerror!void { - if (!global_progress.need_clear) return; + if (noop_impl or !global_progress.need_clear) return; global_progress.need_clear = false; - try write(clear); + try write(clear ++ progress_remove); } /// U+25BA or â–º @@ -1200,6 +1234,43 @@ fn computeRedraw(serialized_buffer: *Serialized.Buffer) struct { []u8, usize } { i, const nl_n = computeNode(buf, i, 0, serialized, children, root_node_index); if (global_progress.terminal_mode == .ansi_escape_codes) { + { + // Set progress state https://conemu.github.io/en/AnsiEscapeCodes.html#ConEmu_specific_OSC + const root_storage = &serialized.storage[0]; + const storage = if (root_storage.name[0] != 0 or children[0].child == .none) root_storage else &serialized.storage[@intFromEnum(children[0].child)]; + const estimated_total = storage.estimated_total_count; + const completed_items = storage.completed_count; + const status = @atomicLoad(Status, &global_progress.status, .monotonic); + switch (status) { + .working => { + if (estimated_total == 0) { + buf[i..][0..progress_pulsing.len].* = progress_pulsing.*; + i += progress_pulsing.len; + } else { + const percent = completed_items * 100 / estimated_total; + i += (std.fmt.bufPrint(buf[i..], @"progress_normal {d}", .{percent}) catch &.{}).len; + } + }, + .success => { + buf[i..][0..progress_remove.len].* = progress_remove.*; + i += progress_remove.len; + }, + .failure => { + buf[i..][0..progress_error_100.len].* = progress_error_100.*; + i += progress_error_100.len; + }, + .failure_working => { + if (estimated_total == 0) { + buf[i..][0..progress_pulsing_error.len].* = progress_pulsing_error.*; + i += progress_pulsing_error.len; + } else { + const percent = completed_items * 100 / estimated_total; + i += (std.fmt.bufPrint(buf[i..], @"progress_error {d}", .{percent}) catch &.{}).len; + } + }, + } + } + if (nl_n > 0) { buf[i] = '\r'; i += 1; diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig index 8f4aefc713..54376426e2 100644 --- a/lib/std/builtin.zig +++ b/lib/std/builtin.zig @@ -772,7 +772,7 @@ pub const Endian = enum { /// This data structure is used by the Zig language code generation and /// therefore must be kept in sync with the compiler implementation. -pub const Signedness = enum { +pub const Signedness = enum(u1) { signed, unsigned, }; @@ -894,7 +894,10 @@ pub const VaList = switch (builtin.cpu.arch) { .aarch64, .aarch64_be => switch (builtin.os.tag) { .windows => *u8, .ios, .macos, .tvos, .watchos, .visionos => *u8, - else => @compileError("disabled due to miscompilations"), // VaListAarch64, + else => switch (builtin.zig_backend) { + .stage2_aarch64 => VaListAarch64, + else => @compileError("disabled due to miscompilations"), + }, }, .arm, .armeb, .thumb, .thumbeb => switch (builtin.os.tag) { .ios, .macos, .tvos, .watchos, .visionos => *u8, diff --git a/lib/std/c.zig b/lib/std/c.zig index 2880e3850a..e2f55dd6fb 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -7147,7 +7147,7 @@ pub const dirent = switch (native_os) { off: off_t, reclen: c_ushort, type: u8, - name: [256:0]u8, + name: [255:0]u8, }, else => void, }; @@ -10497,9 +10497,9 @@ pub const sysconf = switch (native_os) { pub const sf_hdtr = switch (native_os) { .freebsd, .macos, .ios, .tvos, .watchos, .visionos => extern struct { - headers: [*]const iovec_const, + headers: ?[*]const iovec_const, hdr_cnt: c_int, - trailers: [*]const iovec_const, + trailers: ?[*]const iovec_const, trl_cnt: c_int, }, else => void, diff --git a/lib/std/compress.zig b/lib/std/compress.zig index e07c3a4126..018de51001 100644 --- a/lib/std/compress.zig +++ b/lib/std/compress.zig @@ -1,75 +1,19 @@ //! Compression algorithms. -const std = @import("std.zig"); - pub const flate = @import("compress/flate.zig"); pub const gzip = @import("compress/gzip.zig"); pub const zlib = @import("compress/zlib.zig"); pub const lzma = @import("compress/lzma.zig"); pub const lzma2 = @import("compress/lzma2.zig"); pub const xz = @import("compress/xz.zig"); -pub const zstd = @import("compress/zstandard.zig"); - -pub fn HashedReader(ReaderType: type, HasherType: type) type { - return struct { - child_reader: ReaderType, - hasher: HasherType, - - pub const Error = ReaderType.Error; - pub const Reader = std.io.GenericReader(*@This(), Error, read); - - pub fn read(self: *@This(), buf: []u8) Error!usize { - const amt = try self.child_reader.read(buf); - self.hasher.update(buf[0..amt]); - return amt; - } - - pub fn reader(self: *@This()) Reader { - return .{ .context = self }; - } - }; -} - -pub fn hashedReader( - reader: anytype, - hasher: anytype, -) HashedReader(@TypeOf(reader), @TypeOf(hasher)) { - return .{ .child_reader = reader, .hasher = hasher }; -} - -pub fn HashedWriter(WriterType: type, HasherType: type) type { - return struct { - child_writer: WriterType, - hasher: HasherType, - - pub const Error = WriterType.Error; - pub const Writer = std.io.GenericWriter(*@This(), Error, write); - - pub fn write(self: *@This(), buf: []const u8) Error!usize { - const amt = try self.child_writer.write(buf); - self.hasher.update(buf[0..amt]); - return amt; - } - - pub fn writer(self: *@This()) Writer { - return .{ .context = self }; - } - }; -} - -pub fn hashedWriter( - writer: anytype, - hasher: anytype, -) HashedWriter(@TypeOf(writer), @TypeOf(hasher)) { - return .{ .child_writer = writer, .hasher = hasher }; -} +pub const zstd = @import("compress/zstd.zig"); test { + _ = flate; _ = lzma; _ = lzma2; _ = xz; _ = zstd; - _ = flate; _ = gzip; _ = zlib; } diff --git a/lib/std/compress/xz.zig b/lib/std/compress/xz.zig index 445d103098..6c99e9f427 100644 --- a/lib/std/compress/xz.zig +++ b/lib/std/compress/xz.zig @@ -12,17 +12,11 @@ pub const Check = enum(u4) { }; fn readStreamFlags(reader: anytype, check: *Check) !void { - var bit_reader = std.io.bitReader(.little, reader); - - const reserved1 = try bit_reader.readBitsNoEof(u8, 8); - if (reserved1 != 0) - return error.CorruptInput; - - check.* = @as(Check, @enumFromInt(try bit_reader.readBitsNoEof(u4, 4))); - - const reserved2 = try bit_reader.readBitsNoEof(u4, 4); - if (reserved2 != 0) - return error.CorruptInput; + const reserved1 = try reader.readByte(); + if (reserved1 != 0) return error.CorruptInput; + const byte = try reader.readByte(); + if ((byte >> 4) != 0) return error.CorruptInput; + check.* = @enumFromInt(@as(u4, @truncate(byte))); } pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) { @@ -47,7 +41,7 @@ pub fn Decompress(comptime ReaderType: type) type { var check: Check = undefined; const hash_a = blk: { - var hasher = std.compress.hashedReader(source, Crc32.init()); + var hasher = hashedReader(source, Crc32.init()); try readStreamFlags(hasher.reader(), &check); break :blk hasher.hasher.final(); }; @@ -80,7 +74,7 @@ pub fn Decompress(comptime ReaderType: type) type { return r; const index_size = blk: { - var hasher = std.compress.hashedReader(self.in_reader, Crc32.init()); + var hasher = hashedReader(self.in_reader, Crc32.init()); hasher.hasher.update(&[1]u8{0x00}); var counter = std.io.countingReader(hasher.reader()); @@ -115,7 +109,7 @@ pub fn Decompress(comptime ReaderType: type) type { const hash_a = try self.in_reader.readInt(u32, .little); const hash_b = blk: { - var hasher = std.compress.hashedReader(self.in_reader, Crc32.init()); + var hasher = hashedReader(self.in_reader, Crc32.init()); const hashed_reader = hasher.reader(); const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4; @@ -140,6 +134,33 @@ pub fn Decompress(comptime ReaderType: type) type { }; } +pub fn HashedReader(ReaderType: type, HasherType: type) type { + return struct { + child_reader: ReaderType, + hasher: HasherType, + + pub const Error = ReaderType.Error; + pub const Reader = std.io.GenericReader(*@This(), Error, read); + + pub fn read(self: *@This(), buf: []u8) Error!usize { + const amt = try self.child_reader.read(buf); + self.hasher.update(buf[0..amt]); + return amt; + } + + pub fn reader(self: *@This()) Reader { + return .{ .context = self }; + } + }; +} + +pub fn hashedReader( + reader: anytype, + hasher: anytype, +) HashedReader(@TypeOf(reader), @TypeOf(hasher)) { + return .{ .child_reader = reader, .hasher = hasher }; +} + test { _ = @import("xz/test.zig"); } diff --git a/lib/std/compress/xz/block.zig b/lib/std/compress/xz/block.zig index 6253341f36..505dc543a8 100644 --- a/lib/std/compress/xz/block.zig +++ b/lib/std/compress/xz/block.zig @@ -91,7 +91,7 @@ pub fn Decoder(comptime ReaderType: type) type { // Block Header { - var header_hasher = std.compress.hashedReader(block_reader, Crc32.init()); + var header_hasher = xz.hashedReader(block_reader, Crc32.init()); const header_reader = header_hasher.reader(); const header_size = @as(u64, try header_reader.readByte()) * 4; diff --git a/lib/std/compress/zstandard.zig b/lib/std/compress/zstandard.zig deleted file mode 100644 index df45e9686d..0000000000 --- a/lib/std/compress/zstandard.zig +++ /dev/null @@ -1,310 +0,0 @@ -const std = @import("std"); -const RingBuffer = std.RingBuffer; - -const types = @import("zstandard/types.zig"); -pub const frame = types.frame; -pub const compressed_block = types.compressed_block; - -pub const decompress = @import("zstandard/decompress.zig"); - -pub const DecompressorOptions = struct { - verify_checksum: bool = true, - window_buffer: []u8, - - /// Recommended amount by the standard. Lower than this may result - /// in inability to decompress common streams. - pub const default_window_buffer_len = 8 * 1024 * 1024; -}; - -pub fn Decompressor(comptime ReaderType: type) type { - return struct { - const Self = @This(); - - const table_size_max = types.compressed_block.table_size_max; - - source: std.io.CountingReader(ReaderType), - state: enum { NewFrame, InFrame, LastBlock }, - decode_state: decompress.block.DecodeState, - frame_context: decompress.FrameContext, - buffer: WindowBuffer, - literal_fse_buffer: [table_size_max.literal]types.compressed_block.Table.Fse, - match_fse_buffer: [table_size_max.match]types.compressed_block.Table.Fse, - offset_fse_buffer: [table_size_max.offset]types.compressed_block.Table.Fse, - literals_buffer: [types.block_size_max]u8, - sequence_buffer: [types.block_size_max]u8, - verify_checksum: bool, - checksum: ?u32, - current_frame_decompressed_size: usize, - - const WindowBuffer = struct { - data: []u8 = undefined, - read_index: usize = 0, - write_index: usize = 0, - }; - - pub const Error = ReaderType.Error || error{ - ChecksumFailure, - DictionaryIdFlagUnsupported, - MalformedBlock, - MalformedFrame, - OutOfMemory, - }; - - pub const Reader = std.io.GenericReader(*Self, Error, read); - - pub fn init(source: ReaderType, options: DecompressorOptions) Self { - return .{ - .source = std.io.countingReader(source), - .state = .NewFrame, - .decode_state = undefined, - .frame_context = undefined, - .buffer = .{ .data = options.window_buffer }, - .literal_fse_buffer = undefined, - .match_fse_buffer = undefined, - .offset_fse_buffer = undefined, - .literals_buffer = undefined, - .sequence_buffer = undefined, - .verify_checksum = options.verify_checksum, - .checksum = undefined, - .current_frame_decompressed_size = undefined, - }; - } - - fn frameInit(self: *Self) !void { - const source_reader = self.source.reader(); - switch (try decompress.decodeFrameHeader(source_reader)) { - .skippable => |header| { - try source_reader.skipBytes(header.frame_size, .{}); - self.state = .NewFrame; - }, - .zstandard => |header| { - const frame_context = try decompress.FrameContext.init( - header, - self.buffer.data.len, - self.verify_checksum, - ); - - const decode_state = decompress.block.DecodeState.init( - &self.literal_fse_buffer, - &self.match_fse_buffer, - &self.offset_fse_buffer, - ); - - self.decode_state = decode_state; - self.frame_context = frame_context; - - self.checksum = null; - self.current_frame_decompressed_size = 0; - - self.state = .InFrame; - }, - } - } - - pub fn reader(self: *Self) Reader { - return .{ .context = self }; - } - - pub fn read(self: *Self, buffer: []u8) Error!usize { - if (buffer.len == 0) return 0; - - var size: usize = 0; - while (size == 0) { - while (self.state == .NewFrame) { - const initial_count = self.source.bytes_read; - self.frameInit() catch |err| switch (err) { - error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported, - error.EndOfStream => return if (self.source.bytes_read == initial_count) - 0 - else - error.MalformedFrame, - else => return error.MalformedFrame, - }; - } - size = try self.readInner(buffer); - } - return size; - } - - fn readInner(self: *Self, buffer: []u8) Error!usize { - std.debug.assert(self.state != .NewFrame); - - var ring_buffer = RingBuffer{ - .data = self.buffer.data, - .read_index = self.buffer.read_index, - .write_index = self.buffer.write_index, - }; - defer { - self.buffer.read_index = ring_buffer.read_index; - self.buffer.write_index = ring_buffer.write_index; - } - - const source_reader = self.source.reader(); - while (ring_buffer.isEmpty() and self.state != .LastBlock) { - const header_bytes = source_reader.readBytesNoEof(3) catch - return error.MalformedFrame; - const block_header = decompress.block.decodeBlockHeader(&header_bytes); - - decompress.block.decodeBlockReader( - &ring_buffer, - source_reader, - block_header, - &self.decode_state, - self.frame_context.block_size_max, - &self.literals_buffer, - &self.sequence_buffer, - ) catch - return error.MalformedBlock; - - if (self.frame_context.content_size) |size| { - if (self.current_frame_decompressed_size > size) return error.MalformedFrame; - } - - const size = ring_buffer.len(); - self.current_frame_decompressed_size += size; - - if (self.frame_context.hasher_opt) |*hasher| { - if (size > 0) { - const written_slice = ring_buffer.sliceLast(size); - hasher.update(written_slice.first); - hasher.update(written_slice.second); - } - } - if (block_header.last_block) { - self.state = .LastBlock; - if (self.frame_context.has_checksum) { - const checksum = source_reader.readInt(u32, .little) catch - return error.MalformedFrame; - if (self.verify_checksum) { - if (self.frame_context.hasher_opt) |*hasher| { - if (checksum != decompress.computeChecksum(hasher)) - return error.ChecksumFailure; - } - } - } - if (self.frame_context.content_size) |content_size| { - if (content_size != self.current_frame_decompressed_size) { - return error.MalformedFrame; - } - } - } - } - - const size = @min(ring_buffer.len(), buffer.len); - if (size > 0) { - ring_buffer.readFirstAssumeLength(buffer, size); - } - if (self.state == .LastBlock and ring_buffer.len() == 0) { - self.state = .NewFrame; - } - return size; - } - }; -} - -pub fn decompressor(reader: anytype, options: DecompressorOptions) Decompressor(@TypeOf(reader)) { - return Decompressor(@TypeOf(reader)).init(reader, options); -} - -fn testDecompress(data: []const u8) ![]u8 { - const window_buffer = try std.testing.allocator.alloc(u8, 1 << 23); - defer std.testing.allocator.free(window_buffer); - - var in_stream = std.io.fixedBufferStream(data); - var zstd_stream = decompressor(in_stream.reader(), .{ .window_buffer = window_buffer }); - const result = zstd_stream.reader().readAllAlloc(std.testing.allocator, std.math.maxInt(usize)); - return result; -} - -fn testReader(data: []const u8, comptime expected: []const u8) !void { - const buf = try testDecompress(data); - defer std.testing.allocator.free(buf); - try std.testing.expectEqualSlices(u8, expected, buf); -} - -test "decompression" { - const uncompressed = @embedFile("testdata/rfc8478.txt"); - const compressed3 = @embedFile("testdata/rfc8478.txt.zst.3"); - const compressed19 = @embedFile("testdata/rfc8478.txt.zst.19"); - - const buffer = try std.testing.allocator.alloc(u8, uncompressed.len); - defer std.testing.allocator.free(buffer); - - const res3 = try decompress.decode(buffer, compressed3, true); - try std.testing.expectEqual(uncompressed.len, res3); - try std.testing.expectEqualSlices(u8, uncompressed, buffer); - - @memset(buffer, undefined); - const res19 = try decompress.decode(buffer, compressed19, true); - try std.testing.expectEqual(uncompressed.len, res19); - try std.testing.expectEqualSlices(u8, uncompressed, buffer); - - try testReader(compressed3, uncompressed); - try testReader(compressed19, uncompressed); -} - -fn expectEqualDecoded(expected: []const u8, input: []const u8) !void { - { - const result = try decompress.decodeAlloc(std.testing.allocator, input, false, 1 << 23); - defer std.testing.allocator.free(result); - try std.testing.expectEqualStrings(expected, result); - } - - { - var buffer = try std.testing.allocator.alloc(u8, 2 * expected.len); - defer std.testing.allocator.free(buffer); - - const size = try decompress.decode(buffer, input, false); - try std.testing.expectEqualStrings(expected, buffer[0..size]); - } -} - -fn expectEqualDecodedStreaming(expected: []const u8, input: []const u8) !void { - const window_buffer = try std.testing.allocator.alloc(u8, 1 << 23); - defer std.testing.allocator.free(window_buffer); - - var in_stream = std.io.fixedBufferStream(input); - var stream = decompressor(in_stream.reader(), .{ .window_buffer = window_buffer }); - - const result = try stream.reader().readAllAlloc(std.testing.allocator, std.math.maxInt(usize)); - defer std.testing.allocator.free(result); - - try std.testing.expectEqualStrings(expected, result); -} - -test "zero sized block" { - const input_raw = - "\x28\xb5\x2f\xfd" ++ // zstandard frame magic number - "\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero - "\x01\x00\x00"; // block header with: last_block set, block_type raw, block_size zero - - const input_rle = - "\x28\xb5\x2f\xfd" ++ // zstandard frame magic number - "\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero - "\x03\x00\x00" ++ // block header with: last_block set, block_type rle, block_size zero - "\xaa"; // block_content - - try expectEqualDecoded("", input_raw); - try expectEqualDecoded("", input_rle); - try expectEqualDecodedStreaming("", input_raw); - try expectEqualDecodedStreaming("", input_rle); -} - -test "declared raw literals size too large" { - const input_raw = - "\x28\xb5\x2f\xfd" ++ // zstandard frame magic number - "\x00\x00" ++ // frame header: everything unset, window descriptor zero - "\x95\x00\x00" ++ // block header with: last_block set, block_type compressed, block_size 18 - "\xbc\xf3\xae" ++ // literals section header with: type raw, size_format 3, regenerated_size 716603 - "\xa5\x9f\xe3"; // some bytes of literal content - the content is shorter than regenerated_size - - // Note that the regenerated_size in the above input is larger than block maximum size, so the - // block can't be valid as it is a raw literals block. - - var fbs = std.io.fixedBufferStream(input_raw); - var window: [1024]u8 = undefined; - var stream = decompressor(fbs.reader(), .{ .window_buffer = &window }); - - var buf: [1024]u8 = undefined; - try std.testing.expectError(error.MalformedBlock, stream.read(&buf)); -} diff --git a/lib/std/compress/zstandard/decode/block.zig b/lib/std/compress/zstandard/decode/block.zig deleted file mode 100644 index 49c6e7dc36..0000000000 --- a/lib/std/compress/zstandard/decode/block.zig +++ /dev/null @@ -1,1149 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const RingBuffer = std.RingBuffer; - -const types = @import("../types.zig"); -const frame = types.frame; -const Table = types.compressed_block.Table; -const LiteralsSection = types.compressed_block.LiteralsSection; -const SequencesSection = types.compressed_block.SequencesSection; - -const huffman = @import("huffman.zig"); -const readers = @import("../readers.zig"); - -const decodeFseTable = @import("fse.zig").decodeFseTable; - -pub const Error = error{ - BlockSizeOverMaximum, - MalformedBlockSize, - ReservedBlock, - MalformedRleBlock, - MalformedCompressedBlock, -}; - -pub const DecodeState = struct { - repeat_offsets: [3]u32, - - offset: StateData(8), - match: StateData(9), - literal: StateData(9), - - offset_fse_buffer: []Table.Fse, - match_fse_buffer: []Table.Fse, - literal_fse_buffer: []Table.Fse, - - fse_tables_undefined: bool, - - literal_stream_reader: readers.ReverseBitReader, - literal_stream_index: usize, - literal_streams: LiteralsSection.Streams, - literal_header: LiteralsSection.Header, - huffman_tree: ?LiteralsSection.HuffmanTree, - - literal_written_count: usize, - written_count: usize = 0, - - fn StateData(comptime max_accuracy_log: comptime_int) type { - return struct { - state: State, - table: Table, - accuracy_log: u8, - - const State = std.meta.Int(.unsigned, max_accuracy_log); - }; - } - - pub fn init( - literal_fse_buffer: []Table.Fse, - match_fse_buffer: []Table.Fse, - offset_fse_buffer: []Table.Fse, - ) DecodeState { - return DecodeState{ - .repeat_offsets = .{ - types.compressed_block.start_repeated_offset_1, - types.compressed_block.start_repeated_offset_2, - types.compressed_block.start_repeated_offset_3, - }, - - .offset = undefined, - .match = undefined, - .literal = undefined, - - .literal_fse_buffer = literal_fse_buffer, - .match_fse_buffer = match_fse_buffer, - .offset_fse_buffer = offset_fse_buffer, - - .fse_tables_undefined = true, - - .literal_written_count = 0, - .literal_header = undefined, - .literal_streams = undefined, - .literal_stream_reader = undefined, - .literal_stream_index = undefined, - .huffman_tree = null, - - .written_count = 0, - }; - } - - /// Prepare the decoder to decode a compressed block. Loads the literals - /// stream and Huffman tree from `literals` and reads the FSE tables from - /// `source`. - /// - /// Errors returned: - /// - `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's - /// first byte does not have any bits set - /// - `error.TreelessLiteralsFirst` `literals` is a treeless literals - /// section and the decode state does not have a Huffman tree from a - /// previous block - /// - `error.RepeatModeFirst` on the first call if one of the sequence FSE - /// tables is set to repeat mode - /// - `error.MalformedAccuracyLog` if an FSE table has an invalid accuracy - /// - `error.MalformedFseTable` if there are errors decoding an FSE table - /// - `error.EndOfStream` if `source` ends before all FSE tables are read - pub fn prepare( - self: *DecodeState, - source: anytype, - literals: LiteralsSection, - sequences_header: SequencesSection.Header, - ) !void { - self.literal_written_count = 0; - self.literal_header = literals.header; - self.literal_streams = literals.streams; - - if (literals.huffman_tree) |tree| { - self.huffman_tree = tree; - } else if (literals.header.block_type == .treeless and self.huffman_tree == null) { - return error.TreelessLiteralsFirst; - } - - switch (literals.header.block_type) { - .raw, .rle => {}, - .compressed, .treeless => { - self.literal_stream_index = 0; - switch (literals.streams) { - .one => |slice| try self.initLiteralStream(slice), - .four => |streams| try self.initLiteralStream(streams[0]), - } - }, - } - - if (sequences_header.sequence_count > 0) { - try self.updateFseTable(source, .literal, sequences_header.literal_lengths); - try self.updateFseTable(source, .offset, sequences_header.offsets); - try self.updateFseTable(source, .match, sequences_header.match_lengths); - self.fse_tables_undefined = false; - } - } - - /// Read initial FSE states for sequence decoding. - /// - /// Errors returned: - /// - `error.EndOfStream` if `bit_reader` does not contain enough bits. - pub fn readInitialFseState(self: *DecodeState, bit_reader: *readers.ReverseBitReader) error{EndOfStream}!void { - self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log); - self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log); - self.match.state = try bit_reader.readBitsNoEof(u9, self.match.accuracy_log); - } - - fn updateRepeatOffset(self: *DecodeState, offset: u32) void { - self.repeat_offsets[2] = self.repeat_offsets[1]; - self.repeat_offsets[1] = self.repeat_offsets[0]; - self.repeat_offsets[0] = offset; - } - - fn useRepeatOffset(self: *DecodeState, index: usize) u32 { - if (index == 1) - std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1]) - else if (index == 2) { - std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]); - std.mem.swap(u32, &self.repeat_offsets[1], &self.repeat_offsets[2]); - } - return self.repeat_offsets[0]; - } - - const DataType = enum { offset, match, literal }; - - fn updateState( - self: *DecodeState, - comptime choice: DataType, - bit_reader: *readers.ReverseBitReader, - ) error{ MalformedFseBits, EndOfStream }!void { - switch (@field(self, @tagName(choice)).table) { - .rle => {}, - .fse => |table| { - const data = table[@field(self, @tagName(choice)).state]; - const T = @TypeOf(@field(self, @tagName(choice))).State; - const bits_summand = try bit_reader.readBitsNoEof(T, data.bits); - const next_state = std.math.cast( - @TypeOf(@field(self, @tagName(choice))).State, - data.baseline + bits_summand, - ) orelse return error.MalformedFseBits; - @field(self, @tagName(choice)).state = next_state; - }, - } - } - - const FseTableError = error{ - MalformedFseTable, - MalformedAccuracyLog, - RepeatModeFirst, - EndOfStream, - }; - - fn updateFseTable( - self: *DecodeState, - source: anytype, - comptime choice: DataType, - mode: SequencesSection.Header.Mode, - ) !void { - const field_name = @tagName(choice); - switch (mode) { - .predefined => { - @field(self, field_name).accuracy_log = - @field(types.compressed_block.default_accuracy_log, field_name); - - @field(self, field_name).table = - @field(types.compressed_block, "predefined_" ++ field_name ++ "_fse_table"); - }, - .rle => { - @field(self, field_name).accuracy_log = 0; - @field(self, field_name).table = .{ .rle = try source.readByte() }; - }, - .fse => { - var bit_reader = readers.bitReader(source); - - const table_size = try decodeFseTable( - &bit_reader, - @field(types.compressed_block.table_symbol_count_max, field_name), - @field(types.compressed_block.table_accuracy_log_max, field_name), - @field(self, field_name ++ "_fse_buffer"), - ); - @field(self, field_name).table = .{ - .fse = @field(self, field_name ++ "_fse_buffer")[0..table_size], - }; - @field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size); - }, - .repeat => if (self.fse_tables_undefined) return error.RepeatModeFirst, - } - } - - const Sequence = struct { - literal_length: u32, - match_length: u32, - offset: u32, - }; - - fn nextSequence( - self: *DecodeState, - bit_reader: *readers.ReverseBitReader, - ) error{ InvalidBitStream, EndOfStream }!Sequence { - const raw_code = self.getCode(.offset); - const offset_code = std.math.cast(u5, raw_code) orelse { - return error.InvalidBitStream; - }; - const offset_value = (@as(u32, 1) << offset_code) + try bit_reader.readBitsNoEof(u32, offset_code); - - const match_code = self.getCode(.match); - if (match_code >= types.compressed_block.match_length_code_table.len) - return error.InvalidBitStream; - const match = types.compressed_block.match_length_code_table[match_code]; - const match_length = match[0] + try bit_reader.readBitsNoEof(u32, match[1]); - - const literal_code = self.getCode(.literal); - if (literal_code >= types.compressed_block.literals_length_code_table.len) - return error.InvalidBitStream; - const literal = types.compressed_block.literals_length_code_table[literal_code]; - const literal_length = literal[0] + try bit_reader.readBitsNoEof(u32, literal[1]); - - const offset = if (offset_value > 3) offset: { - const offset = offset_value - 3; - self.updateRepeatOffset(offset); - break :offset offset; - } else offset: { - if (literal_length == 0) { - if (offset_value == 3) { - const offset = self.repeat_offsets[0] - 1; - self.updateRepeatOffset(offset); - break :offset offset; - } - break :offset self.useRepeatOffset(offset_value); - } - break :offset self.useRepeatOffset(offset_value - 1); - }; - - if (offset == 0) return error.InvalidBitStream; - - return .{ - .literal_length = literal_length, - .match_length = match_length, - .offset = offset, - }; - } - - fn executeSequenceSlice( - self: *DecodeState, - dest: []u8, - write_pos: usize, - sequence: Sequence, - ) (error{MalformedSequence} || DecodeLiteralsError)!void { - if (sequence.offset > write_pos + sequence.literal_length) return error.MalformedSequence; - - try self.decodeLiteralsSlice(dest[write_pos..], sequence.literal_length); - const copy_start = write_pos + sequence.literal_length - sequence.offset; - for ( - dest[write_pos + sequence.literal_length ..][0..sequence.match_length], - dest[copy_start..][0..sequence.match_length], - ) |*d, s| d.* = s; - self.written_count += sequence.match_length; - } - - fn executeSequenceRingBuffer( - self: *DecodeState, - dest: *RingBuffer, - sequence: Sequence, - ) (error{MalformedSequence} || DecodeLiteralsError)!void { - if (sequence.offset > @min(dest.data.len, self.written_count + sequence.literal_length)) - return error.MalformedSequence; - - try self.decodeLiteralsRingBuffer(dest, sequence.literal_length); - const copy_start = dest.write_index + dest.data.len - sequence.offset; - const copy_slice = dest.sliceAt(copy_start, sequence.match_length); - dest.writeSliceForwardsAssumeCapacity(copy_slice.first); - dest.writeSliceForwardsAssumeCapacity(copy_slice.second); - self.written_count += sequence.match_length; - } - - const DecodeSequenceError = error{ - InvalidBitStream, - EndOfStream, - MalformedSequence, - MalformedFseBits, - } || DecodeLiteralsError; - - /// Decode one sequence from `bit_reader` into `dest`, written starting at - /// `write_pos` and update FSE states if `last_sequence` is `false`. - /// `prepare()` must be called for the block before attempting to decode - /// sequences. - /// - /// Errors returned: - /// - `error.MalformedSequence` if the decompressed sequence would be - /// longer than `sequence_size_limit` or the sequence's offset is too - /// large - /// - `error.UnexpectedEndOfLiteralStream` if the decoder state's literal - /// streams do not contain enough literals for the sequence (this may - /// mean the literal stream or the sequence is malformed). - /// - `error.InvalidBitStream` if the FSE sequence bitstream is malformed - /// - `error.EndOfStream` if `bit_reader` does not contain enough bits - /// - `error.DestTooSmall` if `dest` is not large enough to holde the - /// decompressed sequence - pub fn decodeSequenceSlice( - self: *DecodeState, - dest: []u8, - write_pos: usize, - bit_reader: *readers.ReverseBitReader, - sequence_size_limit: usize, - last_sequence: bool, - ) (error{DestTooSmall} || DecodeSequenceError)!usize { - const sequence = try self.nextSequence(bit_reader); - const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length; - if (sequence_length > sequence_size_limit) return error.MalformedSequence; - if (sequence_length > dest[write_pos..].len) return error.DestTooSmall; - - try self.executeSequenceSlice(dest, write_pos, sequence); - if (!last_sequence) { - try self.updateState(.literal, bit_reader); - try self.updateState(.match, bit_reader); - try self.updateState(.offset, bit_reader); - } - return sequence_length; - } - - /// Decode one sequence from `bit_reader` into `dest`; see - /// `decodeSequenceSlice`. - pub fn decodeSequenceRingBuffer( - self: *DecodeState, - dest: *RingBuffer, - bit_reader: anytype, - sequence_size_limit: usize, - last_sequence: bool, - ) DecodeSequenceError!usize { - const sequence = try self.nextSequence(bit_reader); - const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length; - if (sequence_length > sequence_size_limit) return error.MalformedSequence; - - try self.executeSequenceRingBuffer(dest, sequence); - if (!last_sequence) { - try self.updateState(.literal, bit_reader); - try self.updateState(.match, bit_reader); - try self.updateState(.offset, bit_reader); - } - return sequence_length; - } - - fn nextLiteralMultiStream( - self: *DecodeState, - ) error{BitStreamHasNoStartBit}!void { - self.literal_stream_index += 1; - try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]); - } - - fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void { - try self.literal_stream_reader.init(bytes); - } - - fn isLiteralStreamEmpty(self: *DecodeState) bool { - switch (self.literal_streams) { - .one => return self.literal_stream_reader.isEmpty(), - .four => return self.literal_stream_index == 3 and self.literal_stream_reader.isEmpty(), - } - } - - const LiteralBitsError = error{ - BitStreamHasNoStartBit, - UnexpectedEndOfLiteralStream, - }; - fn readLiteralsBits( - self: *DecodeState, - bit_count_to_read: u16, - ) LiteralBitsError!u16 { - return self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch bits: { - if (self.literal_streams == .four and self.literal_stream_index < 3) { - try self.nextLiteralMultiStream(); - break :bits self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch - return error.UnexpectedEndOfLiteralStream; - } else { - return error.UnexpectedEndOfLiteralStream; - } - }; - } - - const DecodeLiteralsError = error{ - MalformedLiteralsLength, - NotFound, - } || LiteralBitsError; - - /// Decode `len` bytes of literals into `dest`. - /// - /// Errors returned: - /// - `error.MalformedLiteralsLength` if the number of literal bytes - /// decoded by `self` plus `len` is greater than the regenerated size of - /// `literals` - /// - `error.UnexpectedEndOfLiteralStream` and `error.NotFound` if there - /// are problems decoding Huffman compressed literals - pub fn decodeLiteralsSlice( - self: *DecodeState, - dest: []u8, - len: usize, - ) DecodeLiteralsError!void { - if (self.literal_written_count + len > self.literal_header.regenerated_size) - return error.MalformedLiteralsLength; - - switch (self.literal_header.block_type) { - .raw => { - const literal_data = self.literal_streams.one[self.literal_written_count..][0..len]; - @memcpy(dest[0..len], literal_data); - self.literal_written_count += len; - self.written_count += len; - }, - .rle => { - for (0..len) |i| { - dest[i] = self.literal_streams.one[0]; - } - self.literal_written_count += len; - self.written_count += len; - }, - .compressed, .treeless => { - // const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4; - const huffman_tree = self.huffman_tree orelse unreachable; - const max_bit_count = huffman_tree.max_bit_count; - const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount( - huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight, - max_bit_count, - ); - var bits_read: u4 = 0; - var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one; - var bit_count_to_read: u4 = starting_bit_count; - for (0..len) |i| { - var prefix: u16 = 0; - while (true) { - const new_bits = self.readLiteralsBits(bit_count_to_read) catch |err| { - return err; - }; - prefix <<= bit_count_to_read; - prefix |= new_bits; - bits_read += bit_count_to_read; - const result = huffman_tree.query(huffman_tree_index, prefix) catch |err| { - return err; - }; - - switch (result) { - .symbol => |sym| { - dest[i] = sym; - bit_count_to_read = starting_bit_count; - bits_read = 0; - huffman_tree_index = huffman_tree.symbol_count_minus_one; - break; - }, - .index => |index| { - huffman_tree_index = index; - const bit_count = LiteralsSection.HuffmanTree.weightToBitCount( - huffman_tree.nodes[index].weight, - max_bit_count, - ); - bit_count_to_read = bit_count - bits_read; - }, - } - } - } - self.literal_written_count += len; - self.written_count += len; - }, - } - } - - /// Decode literals into `dest`; see `decodeLiteralsSlice()`. - pub fn decodeLiteralsRingBuffer( - self: *DecodeState, - dest: *RingBuffer, - len: usize, - ) DecodeLiteralsError!void { - if (self.literal_written_count + len > self.literal_header.regenerated_size) - return error.MalformedLiteralsLength; - - switch (self.literal_header.block_type) { - .raw => { - const literals_end = self.literal_written_count + len; - const literal_data = self.literal_streams.one[self.literal_written_count..literals_end]; - dest.writeSliceAssumeCapacity(literal_data); - self.literal_written_count += len; - self.written_count += len; - }, - .rle => { - for (0..len) |_| { - dest.writeAssumeCapacity(self.literal_streams.one[0]); - } - self.literal_written_count += len; - self.written_count += len; - }, - .compressed, .treeless => { - // const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4; - const huffman_tree = self.huffman_tree orelse unreachable; - const max_bit_count = huffman_tree.max_bit_count; - const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount( - huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight, - max_bit_count, - ); - var bits_read: u4 = 0; - var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one; - var bit_count_to_read: u4 = starting_bit_count; - for (0..len) |_| { - var prefix: u16 = 0; - while (true) { - const new_bits = try self.readLiteralsBits(bit_count_to_read); - prefix <<= bit_count_to_read; - prefix |= new_bits; - bits_read += bit_count_to_read; - const result = try huffman_tree.query(huffman_tree_index, prefix); - - switch (result) { - .symbol => |sym| { - dest.writeAssumeCapacity(sym); - bit_count_to_read = starting_bit_count; - bits_read = 0; - huffman_tree_index = huffman_tree.symbol_count_minus_one; - break; - }, - .index => |index| { - huffman_tree_index = index; - const bit_count = LiteralsSection.HuffmanTree.weightToBitCount( - huffman_tree.nodes[index].weight, - max_bit_count, - ); - bit_count_to_read = bit_count - bits_read; - }, - } - } - } - self.literal_written_count += len; - self.written_count += len; - }, - } - } - - fn getCode(self: *DecodeState, comptime choice: DataType) u32 { - return switch (@field(self, @tagName(choice)).table) { - .rle => |value| value, - .fse => |table| table[@field(self, @tagName(choice)).state].symbol, - }; - } -}; - -/// Decode a single block from `src` into `dest`. The beginning of `src` must be -/// the start of the block content (i.e. directly after the block header). -/// Increments `consumed_count` by the number of bytes read from `src` to decode -/// the block and returns the decompressed size of the block. -/// -/// Errors returned: -/// -/// - `error.BlockSizeOverMaximum` if block's size is larger than 1 << 17 or -/// `dest[written_count..].len` -/// - `error.MalformedBlockSize` if `src.len` is smaller than the block size -/// and the block is a raw or compressed block -/// - `error.ReservedBlock` if the block is a reserved block -/// - `error.MalformedRleBlock` if the block is an RLE block and `src.len < 1` -/// - `error.MalformedCompressedBlock` if there are errors decoding a -/// compressed block -/// - `error.DestTooSmall` is `dest` is not large enough to hold the -/// decompressed block -pub fn decodeBlock( - dest: []u8, - src: []const u8, - block_header: frame.Zstandard.Block.Header, - decode_state: *DecodeState, - consumed_count: *usize, - block_size_max: usize, - written_count: usize, -) (error{DestTooSmall} || Error)!usize { - const block_size = block_header.block_size; - if (block_size_max < block_size) return error.BlockSizeOverMaximum; - switch (block_header.block_type) { - .raw => { - if (src.len < block_size) return error.MalformedBlockSize; - if (dest[written_count..].len < block_size) return error.DestTooSmall; - @memcpy(dest[written_count..][0..block_size], src[0..block_size]); - consumed_count.* += block_size; - decode_state.written_count += block_size; - return block_size; - }, - .rle => { - if (src.len < 1) return error.MalformedRleBlock; - if (dest[written_count..].len < block_size) return error.DestTooSmall; - for (written_count..block_size + written_count) |write_pos| { - dest[write_pos] = src[0]; - } - consumed_count.* += 1; - decode_state.written_count += block_size; - return block_size; - }, - .compressed => { - if (src.len < block_size) return error.MalformedBlockSize; - var bytes_read: usize = 0; - const literals = decodeLiteralsSectionSlice(src[0..block_size], &bytes_read) catch - return error.MalformedCompressedBlock; - var fbs = std.io.fixedBufferStream(src[bytes_read..block_size]); - const fbs_reader = fbs.reader(); - const sequences_header = decodeSequencesHeader(fbs_reader) catch - return error.MalformedCompressedBlock; - - decode_state.prepare(fbs_reader, literals, sequences_header) catch - return error.MalformedCompressedBlock; - - bytes_read += fbs.pos; - - var bytes_written: usize = 0; - { - const bit_stream_bytes = src[bytes_read..block_size]; - var bit_stream: readers.ReverseBitReader = undefined; - bit_stream.init(bit_stream_bytes) catch return error.MalformedCompressedBlock; - - if (sequences_header.sequence_count > 0) { - decode_state.readInitialFseState(&bit_stream) catch - return error.MalformedCompressedBlock; - - var sequence_size_limit = block_size_max; - for (0..sequences_header.sequence_count) |i| { - const write_pos = written_count + bytes_written; - const decompressed_size = decode_state.decodeSequenceSlice( - dest, - write_pos, - &bit_stream, - sequence_size_limit, - i == sequences_header.sequence_count - 1, - ) catch |err| switch (err) { - error.DestTooSmall => return error.DestTooSmall, - else => return error.MalformedCompressedBlock, - }; - bytes_written += decompressed_size; - sequence_size_limit -= decompressed_size; - } - } - - if (!bit_stream.isEmpty()) { - return error.MalformedCompressedBlock; - } - } - - if (decode_state.literal_written_count < literals.header.regenerated_size) { - const len = literals.header.regenerated_size - decode_state.literal_written_count; - if (len > dest[written_count + bytes_written ..].len) return error.DestTooSmall; - decode_state.decodeLiteralsSlice(dest[written_count + bytes_written ..], len) catch - return error.MalformedCompressedBlock; - bytes_written += len; - } - - switch (decode_state.literal_header.block_type) { - .treeless, .compressed => { - if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock; - }, - .raw, .rle => {}, - } - - consumed_count.* += block_size; - return bytes_written; - }, - .reserved => return error.ReservedBlock, - } -} - -/// Decode a single block from `src` into `dest`; see `decodeBlock()`. Returns -/// the size of the decompressed block, which can be used with `dest.sliceLast()` -/// to get the decompressed bytes. `error.BlockSizeOverMaximum` is returned if -/// the block's compressed or decompressed size is larger than `block_size_max`. -pub fn decodeBlockRingBuffer( - dest: *RingBuffer, - src: []const u8, - block_header: frame.Zstandard.Block.Header, - decode_state: *DecodeState, - consumed_count: *usize, - block_size_max: usize, -) Error!usize { - const block_size = block_header.block_size; - if (block_size_max < block_size) return error.BlockSizeOverMaximum; - switch (block_header.block_type) { - .raw => { - if (src.len < block_size) return error.MalformedBlockSize; - // dest may have length zero if block_size == 0, causing division by zero in - // writeSliceAssumeCapacity() - if (block_size > 0) { - const data = src[0..block_size]; - dest.writeSliceAssumeCapacity(data); - consumed_count.* += block_size; - decode_state.written_count += block_size; - } - return block_size; - }, - .rle => { - if (src.len < 1) return error.MalformedRleBlock; - for (0..block_size) |_| { - dest.writeAssumeCapacity(src[0]); - } - consumed_count.* += 1; - decode_state.written_count += block_size; - return block_size; - }, - .compressed => { - if (src.len < block_size) return error.MalformedBlockSize; - var bytes_read: usize = 0; - const literals = decodeLiteralsSectionSlice(src[0..block_size], &bytes_read) catch - return error.MalformedCompressedBlock; - var fbs = std.io.fixedBufferStream(src[bytes_read..block_size]); - const fbs_reader = fbs.reader(); - const sequences_header = decodeSequencesHeader(fbs_reader) catch - return error.MalformedCompressedBlock; - - decode_state.prepare(fbs_reader, literals, sequences_header) catch - return error.MalformedCompressedBlock; - - bytes_read += fbs.pos; - - var bytes_written: usize = 0; - { - const bit_stream_bytes = src[bytes_read..block_size]; - var bit_stream: readers.ReverseBitReader = undefined; - bit_stream.init(bit_stream_bytes) catch return error.MalformedCompressedBlock; - - if (sequences_header.sequence_count > 0) { - decode_state.readInitialFseState(&bit_stream) catch - return error.MalformedCompressedBlock; - - var sequence_size_limit = block_size_max; - for (0..sequences_header.sequence_count) |i| { - const decompressed_size = decode_state.decodeSequenceRingBuffer( - dest, - &bit_stream, - sequence_size_limit, - i == sequences_header.sequence_count - 1, - ) catch return error.MalformedCompressedBlock; - bytes_written += decompressed_size; - sequence_size_limit -= decompressed_size; - } - } - - if (!bit_stream.isEmpty()) { - return error.MalformedCompressedBlock; - } - } - - if (decode_state.literal_written_count < literals.header.regenerated_size) { - const len = literals.header.regenerated_size - decode_state.literal_written_count; - decode_state.decodeLiteralsRingBuffer(dest, len) catch - return error.MalformedCompressedBlock; - bytes_written += len; - } - - switch (decode_state.literal_header.block_type) { - .treeless, .compressed => { - if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock; - }, - .raw, .rle => {}, - } - - consumed_count.* += block_size; - if (bytes_written > block_size_max) return error.BlockSizeOverMaximum; - return bytes_written; - }, - .reserved => return error.ReservedBlock, - } -} - -/// Decode a single block from `source` into `dest`. Literal and sequence data -/// from the block is copied into `literals_buffer` and `sequence_buffer`, which -/// must be large enough or `error.LiteralsBufferTooSmall` and -/// `error.SequenceBufferTooSmall` are returned (the maximum block size is an -/// upper bound for the size of both buffers). See `decodeBlock` -/// and `decodeBlockRingBuffer` for function that can decode a block without -/// these extra copies. `error.EndOfStream` is returned if `source` does not -/// contain enough bytes. -pub fn decodeBlockReader( - dest: *RingBuffer, - source: anytype, - block_header: frame.Zstandard.Block.Header, - decode_state: *DecodeState, - block_size_max: usize, - literals_buffer: []u8, - sequence_buffer: []u8, -) !void { - const block_size = block_header.block_size; - var block_reader_limited = std.io.limitedReader(source, block_size); - const block_reader = block_reader_limited.reader(); - if (block_size_max < block_size) return error.BlockSizeOverMaximum; - switch (block_header.block_type) { - .raw => { - if (block_size == 0) return; - const slice = dest.sliceAt(dest.write_index, block_size); - try source.readNoEof(slice.first); - try source.readNoEof(slice.second); - dest.write_index = dest.mask2(dest.write_index + block_size); - decode_state.written_count += block_size; - }, - .rle => { - const byte = try source.readByte(); - for (0..block_size) |_| { - dest.writeAssumeCapacity(byte); - } - decode_state.written_count += block_size; - }, - .compressed => { - const literals = try decodeLiteralsSection(block_reader, literals_buffer); - const sequences_header = try decodeSequencesHeader(block_reader); - - try decode_state.prepare(block_reader, literals, sequences_header); - - var bytes_written: usize = 0; - { - const size = try block_reader.readAll(sequence_buffer); - var bit_stream: readers.ReverseBitReader = undefined; - try bit_stream.init(sequence_buffer[0..size]); - - if (sequences_header.sequence_count > 0) { - if (sequence_buffer.len < block_reader_limited.bytes_left) - return error.SequenceBufferTooSmall; - - decode_state.readInitialFseState(&bit_stream) catch - return error.MalformedCompressedBlock; - - var sequence_size_limit = block_size_max; - for (0..sequences_header.sequence_count) |i| { - const decompressed_size = decode_state.decodeSequenceRingBuffer( - dest, - &bit_stream, - sequence_size_limit, - i == sequences_header.sequence_count - 1, - ) catch return error.MalformedCompressedBlock; - sequence_size_limit -= decompressed_size; - bytes_written += decompressed_size; - } - } - - if (!bit_stream.isEmpty()) { - return error.MalformedCompressedBlock; - } - } - - if (decode_state.literal_written_count < literals.header.regenerated_size) { - const len = literals.header.regenerated_size - decode_state.literal_written_count; - decode_state.decodeLiteralsRingBuffer(dest, len) catch - return error.MalformedCompressedBlock; - bytes_written += len; - } - - switch (decode_state.literal_header.block_type) { - .treeless, .compressed => { - if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock; - }, - .raw, .rle => {}, - } - - if (bytes_written > block_size_max) return error.BlockSizeOverMaximum; - if (block_reader_limited.bytes_left != 0) return error.MalformedCompressedBlock; - decode_state.literal_written_count = 0; - }, - .reserved => return error.ReservedBlock, - } -} - -/// Decode the header of a block. -pub fn decodeBlockHeader(src: *const [3]u8) frame.Zstandard.Block.Header { - const last_block = src[0] & 1 == 1; - const block_type = @as(frame.Zstandard.Block.Type, @enumFromInt((src[0] & 0b110) >> 1)); - const block_size = ((src[0] & 0b11111000) >> 3) + (@as(u21, src[1]) << 5) + (@as(u21, src[2]) << 13); - return .{ - .last_block = last_block, - .block_type = block_type, - .block_size = block_size, - }; -} - -/// Decode the header of a block. -/// -/// Errors returned: -/// - `error.EndOfStream` if `src.len < 3` -pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.Zstandard.Block.Header { - if (src.len < 3) return error.EndOfStream; - return decodeBlockHeader(src[0..3]); -} - -/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the -/// number of bytes the section uses. -/// -/// Errors returned: -/// - `error.MalformedLiteralsHeader` if the header is invalid -/// - `error.MalformedLiteralsSection` if there are decoding errors -/// - `error.MalformedAccuracyLog` if compressed literals have invalid -/// accuracy -/// - `error.MalformedFseTable` if compressed literals have invalid FSE table -/// - `error.MalformedHuffmanTree` if there are errors decoding a Huffamn tree -/// - `error.EndOfStream` if there are not enough bytes in `src` -pub fn decodeLiteralsSectionSlice( - src: []const u8, - consumed_count: *usize, -) (error{ MalformedLiteralsHeader, MalformedLiteralsSection, EndOfStream } || huffman.Error)!LiteralsSection { - var bytes_read: usize = 0; - const header = header: { - var fbs = std.io.fixedBufferStream(src); - defer bytes_read = fbs.pos; - break :header decodeLiteralsHeader(fbs.reader()) catch return error.MalformedLiteralsHeader; - }; - switch (header.block_type) { - .raw => { - if (src.len < bytes_read + header.regenerated_size) return error.MalformedLiteralsSection; - const stream = src[bytes_read..][0..header.regenerated_size]; - consumed_count.* += header.regenerated_size + bytes_read; - return LiteralsSection{ - .header = header, - .huffman_tree = null, - .streams = .{ .one = stream }, - }; - }, - .rle => { - if (src.len < bytes_read + 1) return error.MalformedLiteralsSection; - const stream = src[bytes_read..][0..1]; - consumed_count.* += 1 + bytes_read; - return LiteralsSection{ - .header = header, - .huffman_tree = null, - .streams = .{ .one = stream }, - }; - }, - .compressed, .treeless => { - const huffman_tree_start = bytes_read; - const huffman_tree = if (header.block_type == .compressed) - try huffman.decodeHuffmanTreeSlice(src[bytes_read..], &bytes_read) - else - null; - const huffman_tree_size = bytes_read - huffman_tree_start; - const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch - return error.MalformedLiteralsSection; - - if (src.len < bytes_read + total_streams_size) return error.MalformedLiteralsSection; - const stream_data = src[bytes_read .. bytes_read + total_streams_size]; - - const streams = try decodeStreams(header.size_format, stream_data); - consumed_count.* += bytes_read + total_streams_size; - return LiteralsSection{ - .header = header, - .huffman_tree = huffman_tree, - .streams = streams, - }; - }, - } -} - -/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the -/// number of bytes the section uses. See `decodeLiterasSectionSlice()`. -pub fn decodeLiteralsSection( - source: anytype, - buffer: []u8, -) !LiteralsSection { - const header = try decodeLiteralsHeader(source); - switch (header.block_type) { - .raw => { - if (buffer.len < header.regenerated_size) return error.LiteralsBufferTooSmall; - try source.readNoEof(buffer[0..header.regenerated_size]); - return LiteralsSection{ - .header = header, - .huffman_tree = null, - .streams = .{ .one = buffer }, - }; - }, - .rle => { - buffer[0] = try source.readByte(); - return LiteralsSection{ - .header = header, - .huffman_tree = null, - .streams = .{ .one = buffer[0..1] }, - }; - }, - .compressed, .treeless => { - var counting_reader = std.io.countingReader(source); - const huffman_tree = if (header.block_type == .compressed) - try huffman.decodeHuffmanTree(counting_reader.reader(), buffer) - else - null; - const huffman_tree_size = @as(usize, @intCast(counting_reader.bytes_read)); - const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch - return error.MalformedLiteralsSection; - - if (total_streams_size > buffer.len) return error.LiteralsBufferTooSmall; - try source.readNoEof(buffer[0..total_streams_size]); - const stream_data = buffer[0..total_streams_size]; - - const streams = try decodeStreams(header.size_format, stream_data); - return LiteralsSection{ - .header = header, - .huffman_tree = huffman_tree, - .streams = streams, - }; - }, - } -} - -fn decodeStreams(size_format: u2, stream_data: []const u8) !LiteralsSection.Streams { - if (size_format == 0) { - return .{ .one = stream_data }; - } - - if (stream_data.len < 6) return error.MalformedLiteralsSection; - - const stream_1_length: usize = std.mem.readInt(u16, stream_data[0..2], .little); - const stream_2_length: usize = std.mem.readInt(u16, stream_data[2..4], .little); - const stream_3_length: usize = std.mem.readInt(u16, stream_data[4..6], .little); - - const stream_1_start = 6; - const stream_2_start = stream_1_start + stream_1_length; - const stream_3_start = stream_2_start + stream_2_length; - const stream_4_start = stream_3_start + stream_3_length; - - if (stream_data.len < stream_4_start) return error.MalformedLiteralsSection; - - return .{ .four = .{ - stream_data[stream_1_start .. stream_1_start + stream_1_length], - stream_data[stream_2_start .. stream_2_start + stream_2_length], - stream_data[stream_3_start .. stream_3_start + stream_3_length], - stream_data[stream_4_start..], - } }; -} - -/// Decode a literals section header. -/// -/// Errors returned: -/// - `error.EndOfStream` if there are not enough bytes in `source` -pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header { - const byte0 = try source.readByte(); - const block_type = @as(LiteralsSection.BlockType, @enumFromInt(byte0 & 0b11)); - const size_format = @as(u2, @intCast((byte0 & 0b1100) >> 2)); - var regenerated_size: u20 = undefined; - var compressed_size: ?u18 = null; - switch (block_type) { - .raw, .rle => { - switch (size_format) { - 0, 2 => { - regenerated_size = byte0 >> 3; - }, - 1 => regenerated_size = (byte0 >> 4) + (@as(u20, try source.readByte()) << 4), - 3 => regenerated_size = (byte0 >> 4) + - (@as(u20, try source.readByte()) << 4) + - (@as(u20, try source.readByte()) << 12), - } - }, - .compressed, .treeless => { - const byte1 = try source.readByte(); - const byte2 = try source.readByte(); - switch (size_format) { - 0, 1 => { - regenerated_size = (byte0 >> 4) + ((@as(u20, byte1) & 0b00111111) << 4); - compressed_size = ((byte1 & 0b11000000) >> 6) + (@as(u18, byte2) << 2); - }, - 2 => { - const byte3 = try source.readByte(); - regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00000011) << 12); - compressed_size = ((byte2 & 0b11111100) >> 2) + (@as(u18, byte3) << 6); - }, - 3 => { - const byte3 = try source.readByte(); - const byte4 = try source.readByte(); - regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00111111) << 12); - compressed_size = ((byte2 & 0b11000000) >> 6) + (@as(u18, byte3) << 2) + (@as(u18, byte4) << 10); - }, - } - }, - } - return LiteralsSection.Header{ - .block_type = block_type, - .size_format = size_format, - .regenerated_size = regenerated_size, - .compressed_size = compressed_size, - }; -} - -/// Decode a sequences section header. -/// -/// Errors returned: -/// - `error.ReservedBitSet` if the reserved bit is set -/// - `error.EndOfStream` if there are not enough bytes in `source` -pub fn decodeSequencesHeader( - source: anytype, -) !SequencesSection.Header { - var sequence_count: u24 = undefined; - - const byte0 = try source.readByte(); - if (byte0 == 0) { - return SequencesSection.Header{ - .sequence_count = 0, - .offsets = undefined, - .match_lengths = undefined, - .literal_lengths = undefined, - }; - } else if (byte0 < 128) { - sequence_count = byte0; - } else if (byte0 < 255) { - sequence_count = (@as(u24, (byte0 - 128)) << 8) + try source.readByte(); - } else { - sequence_count = (try source.readByte()) + (@as(u24, try source.readByte()) << 8) + 0x7F00; - } - - const compression_modes = try source.readByte(); - - const matches_mode = @as(SequencesSection.Header.Mode, @enumFromInt((compression_modes & 0b00001100) >> 2)); - const offsets_mode = @as(SequencesSection.Header.Mode, @enumFromInt((compression_modes & 0b00110000) >> 4)); - const literal_mode = @as(SequencesSection.Header.Mode, @enumFromInt((compression_modes & 0b11000000) >> 6)); - if (compression_modes & 0b11 != 0) return error.ReservedBitSet; - - return SequencesSection.Header{ - .sequence_count = sequence_count, - .offsets = offsets_mode, - .match_lengths = matches_mode, - .literal_lengths = literal_mode, - }; -} diff --git a/lib/std/compress/zstandard/decode/fse.zig b/lib/std/compress/zstandard/decode/fse.zig deleted file mode 100644 index 6e987f9c6f..0000000000 --- a/lib/std/compress/zstandard/decode/fse.zig +++ /dev/null @@ -1,153 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; - -const types = @import("../types.zig"); -const Table = types.compressed_block.Table; - -pub fn decodeFseTable( - bit_reader: anytype, - expected_symbol_count: usize, - max_accuracy_log: u4, - entries: []Table.Fse, -) !usize { - const accuracy_log_biased = try bit_reader.readBitsNoEof(u4, 4); - if (accuracy_log_biased > max_accuracy_log -| 5) return error.MalformedAccuracyLog; - const accuracy_log = accuracy_log_biased + 5; - - var values: [256]u16 = undefined; - var value_count: usize = 0; - - const total_probability = @as(u16, 1) << accuracy_log; - var accumulated_probability: u16 = 0; - - while (accumulated_probability < total_probability) { - // WARNING: The RFC is poorly worded, and would suggest std.math.log2_int_ceil is correct here, - // but power of two (remaining probabilities + 1) need max bits set to 1 more. - const max_bits = std.math.log2_int(u16, total_probability - accumulated_probability + 1) + 1; - const small = try bit_reader.readBitsNoEof(u16, max_bits - 1); - - const cutoff = (@as(u16, 1) << max_bits) - 1 - (total_probability - accumulated_probability + 1); - - const value = if (small < cutoff) - small - else value: { - const value_read = small + (try bit_reader.readBitsNoEof(u16, 1) << (max_bits - 1)); - break :value if (value_read < @as(u16, 1) << (max_bits - 1)) - value_read - else - value_read - cutoff; - }; - - accumulated_probability += if (value != 0) value - 1 else 1; - - values[value_count] = value; - value_count += 1; - - if (value == 1) { - while (true) { - const repeat_flag = try bit_reader.readBitsNoEof(u2, 2); - if (repeat_flag + value_count > 256) return error.MalformedFseTable; - for (0..repeat_flag) |_| { - values[value_count] = 1; - value_count += 1; - } - if (repeat_flag < 3) break; - } - } - if (value_count == 256) break; - } - bit_reader.alignToByte(); - - if (value_count < 2) return error.MalformedFseTable; - if (accumulated_probability != total_probability) return error.MalformedFseTable; - if (value_count > expected_symbol_count) return error.MalformedFseTable; - - const table_size = total_probability; - - try buildFseTable(values[0..value_count], entries[0..table_size]); - return table_size; -} - -fn buildFseTable(values: []const u16, entries: []Table.Fse) !void { - const total_probability = @as(u16, @intCast(entries.len)); - const accuracy_log = std.math.log2_int(u16, total_probability); - assert(total_probability <= 1 << 9); - - var less_than_one_count: usize = 0; - for (values, 0..) |value, i| { - if (value == 0) { - entries[entries.len - 1 - less_than_one_count] = Table.Fse{ - .symbol = @as(u8, @intCast(i)), - .baseline = 0, - .bits = accuracy_log, - }; - less_than_one_count += 1; - } - } - - var position: usize = 0; - var temp_states: [1 << 9]u16 = undefined; - for (values, 0..) |value, symbol| { - if (value == 0 or value == 1) continue; - const probability = value - 1; - - const state_share_dividend = std.math.ceilPowerOfTwo(u16, probability) catch - return error.MalformedFseTable; - const share_size = @divExact(total_probability, state_share_dividend); - const double_state_count = state_share_dividend - probability; - const single_state_count = probability - double_state_count; - const share_size_log = std.math.log2_int(u16, share_size); - - for (0..probability) |i| { - temp_states[i] = @as(u16, @intCast(position)); - position += (entries.len >> 1) + (entries.len >> 3) + 3; - position &= entries.len - 1; - while (position >= entries.len - less_than_one_count) { - position += (entries.len >> 1) + (entries.len >> 3) + 3; - position &= entries.len - 1; - } - } - std.mem.sort(u16, temp_states[0..probability], {}, std.sort.asc(u16)); - for (0..probability) |i| { - entries[temp_states[i]] = if (i < double_state_count) Table.Fse{ - .symbol = @as(u8, @intCast(symbol)), - .bits = share_size_log + 1, - .baseline = single_state_count * share_size + @as(u16, @intCast(i)) * 2 * share_size, - } else Table.Fse{ - .symbol = @as(u8, @intCast(symbol)), - .bits = share_size_log, - .baseline = (@as(u16, @intCast(i)) - double_state_count) * share_size, - }; - } - } -} - -test buildFseTable { - const literals_length_default_values = [36]u16{ - 5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 2, 2, 2, 2, 2, - 0, 0, 0, 0, - }; - - const match_lengths_default_values = [53]u16{ - 2, 5, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, - 0, 0, 0, 0, 0, - }; - - const offset_codes_default_values = [29]u16{ - 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, - }; - - var entries: [64]Table.Fse = undefined; - try buildFseTable(&literals_length_default_values, &entries); - try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_literal_fse_table.fse, &entries); - - try buildFseTable(&match_lengths_default_values, &entries); - try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_match_fse_table.fse, &entries); - - try buildFseTable(&offset_codes_default_values, entries[0..32]); - try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_offset_fse_table.fse, entries[0..32]); -} diff --git a/lib/std/compress/zstandard/decode/huffman.zig b/lib/std/compress/zstandard/decode/huffman.zig deleted file mode 100644 index 4728ccd027..0000000000 --- a/lib/std/compress/zstandard/decode/huffman.zig +++ /dev/null @@ -1,234 +0,0 @@ -const std = @import("std"); - -const types = @import("../types.zig"); -const LiteralsSection = types.compressed_block.LiteralsSection; -const Table = types.compressed_block.Table; - -const readers = @import("../readers.zig"); - -const decodeFseTable = @import("fse.zig").decodeFseTable; - -pub const Error = error{ - MalformedHuffmanTree, - MalformedFseTable, - MalformedAccuracyLog, - EndOfStream, -}; - -fn decodeFseHuffmanTree( - source: anytype, - compressed_size: usize, - buffer: []u8, - weights: *[256]u4, -) !usize { - var stream = std.io.limitedReader(source, compressed_size); - var bit_reader = readers.bitReader(stream.reader()); - - var entries: [1 << 6]Table.Fse = undefined; - const table_size = decodeFseTable(&bit_reader, 256, 6, &entries) catch |err| switch (err) { - error.MalformedAccuracyLog, error.MalformedFseTable => |e| return e, - error.EndOfStream => return error.MalformedFseTable, - else => |e| return e, - }; - const accuracy_log = std.math.log2_int_ceil(usize, table_size); - - const amount = try stream.reader().readAll(buffer); - var huff_bits: readers.ReverseBitReader = undefined; - huff_bits.init(buffer[0..amount]) catch return error.MalformedHuffmanTree; - - return assignWeights(&huff_bits, accuracy_log, &entries, weights); -} - -fn decodeFseHuffmanTreeSlice(src: []const u8, compressed_size: usize, weights: *[256]u4) !usize { - if (src.len < compressed_size) return error.MalformedHuffmanTree; - var stream = std.io.fixedBufferStream(src[0..compressed_size]); - var counting_reader = std.io.countingReader(stream.reader()); - var bit_reader = readers.bitReader(counting_reader.reader()); - - var entries: [1 << 6]Table.Fse = undefined; - const table_size = decodeFseTable(&bit_reader, 256, 6, &entries) catch |err| switch (err) { - error.MalformedAccuracyLog, error.MalformedFseTable => |e| return e, - error.EndOfStream => return error.MalformedFseTable, - }; - const accuracy_log = std.math.log2_int_ceil(usize, table_size); - - const start_index = std.math.cast(usize, counting_reader.bytes_read) orelse - return error.MalformedHuffmanTree; - const huff_data = src[start_index..compressed_size]; - var huff_bits: readers.ReverseBitReader = undefined; - huff_bits.init(huff_data) catch return error.MalformedHuffmanTree; - - return assignWeights(&huff_bits, accuracy_log, &entries, weights); -} - -fn assignWeights( - huff_bits: *readers.ReverseBitReader, - accuracy_log: u16, - entries: *[1 << 6]Table.Fse, - weights: *[256]u4, -) !usize { - var i: usize = 0; - var even_state: u32 = huff_bits.readBitsNoEof(u32, accuracy_log) catch return error.MalformedHuffmanTree; - var odd_state: u32 = huff_bits.readBitsNoEof(u32, accuracy_log) catch return error.MalformedHuffmanTree; - - while (i < 254) { - const even_data = entries[even_state]; - var read_bits: u16 = 0; - const even_bits = huff_bits.readBits(u32, even_data.bits, &read_bits) catch unreachable; - weights[i] = std.math.cast(u4, even_data.symbol) orelse return error.MalformedHuffmanTree; - i += 1; - if (read_bits < even_data.bits) { - weights[i] = std.math.cast(u4, entries[odd_state].symbol) orelse return error.MalformedHuffmanTree; - i += 1; - break; - } - even_state = even_data.baseline + even_bits; - - read_bits = 0; - const odd_data = entries[odd_state]; - const odd_bits = huff_bits.readBits(u32, odd_data.bits, &read_bits) catch unreachable; - weights[i] = std.math.cast(u4, odd_data.symbol) orelse return error.MalformedHuffmanTree; - i += 1; - if (read_bits < odd_data.bits) { - if (i == 255) return error.MalformedHuffmanTree; - weights[i] = std.math.cast(u4, entries[even_state].symbol) orelse return error.MalformedHuffmanTree; - i += 1; - break; - } - odd_state = odd_data.baseline + odd_bits; - } else return error.MalformedHuffmanTree; - - if (!huff_bits.isEmpty()) { - return error.MalformedHuffmanTree; - } - - return i + 1; // stream contains all but the last symbol -} - -fn decodeDirectHuffmanTree(source: anytype, encoded_symbol_count: usize, weights: *[256]u4) !usize { - const weights_byte_count = (encoded_symbol_count + 1) / 2; - for (0..weights_byte_count) |i| { - const byte = try source.readByte(); - weights[2 * i] = @as(u4, @intCast(byte >> 4)); - weights[2 * i + 1] = @as(u4, @intCast(byte & 0xF)); - } - return encoded_symbol_count + 1; -} - -fn assignSymbols(weight_sorted_prefixed_symbols: []LiteralsSection.HuffmanTree.PrefixedSymbol, weights: [256]u4) usize { - for (0..weight_sorted_prefixed_symbols.len) |i| { - weight_sorted_prefixed_symbols[i] = .{ - .symbol = @as(u8, @intCast(i)), - .weight = undefined, - .prefix = undefined, - }; - } - - std.mem.sort( - LiteralsSection.HuffmanTree.PrefixedSymbol, - weight_sorted_prefixed_symbols, - weights, - lessThanByWeight, - ); - - var prefix: u16 = 0; - var prefixed_symbol_count: usize = 0; - var sorted_index: usize = 0; - const symbol_count = weight_sorted_prefixed_symbols.len; - while (sorted_index < symbol_count) { - var symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; - const weight = weights[symbol]; - if (weight == 0) { - sorted_index += 1; - continue; - } - - while (sorted_index < symbol_count) : ({ - sorted_index += 1; - prefixed_symbol_count += 1; - prefix += 1; - }) { - symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; - if (weights[symbol] != weight) { - prefix = ((prefix - 1) >> (weights[symbol] - weight)) + 1; - break; - } - weight_sorted_prefixed_symbols[prefixed_symbol_count].symbol = symbol; - weight_sorted_prefixed_symbols[prefixed_symbol_count].prefix = prefix; - weight_sorted_prefixed_symbols[prefixed_symbol_count].weight = weight; - } - } - return prefixed_symbol_count; -} - -fn buildHuffmanTree(weights: *[256]u4, symbol_count: usize) error{MalformedHuffmanTree}!LiteralsSection.HuffmanTree { - var weight_power_sum_big: u32 = 0; - for (weights[0 .. symbol_count - 1]) |value| { - weight_power_sum_big += (@as(u16, 1) << value) >> 1; - } - if (weight_power_sum_big >= 1 << 11) return error.MalformedHuffmanTree; - const weight_power_sum = @as(u16, @intCast(weight_power_sum_big)); - - // advance to next power of two (even if weight_power_sum is a power of 2) - // TODO: is it valid to have weight_power_sum == 0? - const max_number_of_bits = if (weight_power_sum == 0) 1 else std.math.log2_int(u16, weight_power_sum) + 1; - const next_power_of_two = @as(u16, 1) << max_number_of_bits; - weights[symbol_count - 1] = std.math.log2_int(u16, next_power_of_two - weight_power_sum) + 1; - - var weight_sorted_prefixed_symbols: [256]LiteralsSection.HuffmanTree.PrefixedSymbol = undefined; - const prefixed_symbol_count = assignSymbols(weight_sorted_prefixed_symbols[0..symbol_count], weights.*); - const tree = LiteralsSection.HuffmanTree{ - .max_bit_count = max_number_of_bits, - .symbol_count_minus_one = @as(u8, @intCast(prefixed_symbol_count - 1)), - .nodes = weight_sorted_prefixed_symbols, - }; - return tree; -} - -pub fn decodeHuffmanTree( - source: anytype, - buffer: []u8, -) (@TypeOf(source).Error || Error)!LiteralsSection.HuffmanTree { - const header = try source.readByte(); - var weights: [256]u4 = undefined; - const symbol_count = if (header < 128) - // FSE compressed weights - try decodeFseHuffmanTree(source, header, buffer, &weights) - else - try decodeDirectHuffmanTree(source, header - 127, &weights); - - return buildHuffmanTree(&weights, symbol_count); -} - -pub fn decodeHuffmanTreeSlice( - src: []const u8, - consumed_count: *usize, -) Error!LiteralsSection.HuffmanTree { - if (src.len == 0) return error.MalformedHuffmanTree; - const header = src[0]; - var bytes_read: usize = 1; - var weights: [256]u4 = undefined; - const symbol_count = if (header < 128) count: { - // FSE compressed weights - bytes_read += header; - break :count try decodeFseHuffmanTreeSlice(src[1..], header, &weights); - } else count: { - var fbs = std.io.fixedBufferStream(src[1..]); - defer bytes_read += fbs.pos; - break :count try decodeDirectHuffmanTree(fbs.reader(), header - 127, &weights); - }; - - consumed_count.* += bytes_read; - return buildHuffmanTree(&weights, symbol_count); -} - -fn lessThanByWeight( - weights: [256]u4, - lhs: LiteralsSection.HuffmanTree.PrefixedSymbol, - rhs: LiteralsSection.HuffmanTree.PrefixedSymbol, -) bool { - // NOTE: this function relies on the use of a stable sorting algorithm, - // otherwise a special case of if (weights[lhs] == weights[rhs]) return lhs < rhs; - // should be added - return weights[lhs.symbol] < weights[rhs.symbol]; -} diff --git a/lib/std/compress/zstandard/decompress.zig b/lib/std/compress/zstandard/decompress.zig deleted file mode 100644 index adc7b89749..0000000000 --- a/lib/std/compress/zstandard/decompress.zig +++ /dev/null @@ -1,633 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const Allocator = std.mem.Allocator; -const RingBuffer = std.RingBuffer; - -const types = @import("types.zig"); -const frame = types.frame; -const LiteralsSection = types.compressed_block.LiteralsSection; -const SequencesSection = types.compressed_block.SequencesSection; -const SkippableHeader = types.frame.Skippable.Header; -const ZstandardHeader = types.frame.Zstandard.Header; -const Table = types.compressed_block.Table; - -pub const block = @import("decode/block.zig"); - -const readers = @import("readers.zig"); - -/// Returns `true` is `magic` is a valid magic number for a skippable frame -pub fn isSkippableMagic(magic: u32) bool { - return frame.Skippable.magic_number_min <= magic and magic <= frame.Skippable.magic_number_max; -} - -/// Returns the kind of frame at the beginning of `source`. -/// -/// Errors returned: -/// - `error.BadMagic` if `source` begins with bytes not equal to the -/// Zstandard frame magic number, or outside the range of magic numbers for -/// skippable frames. -/// - `error.EndOfStream` if `source` contains fewer than 4 bytes -pub fn decodeFrameType(source: anytype) error{ BadMagic, EndOfStream }!frame.Kind { - const magic = try source.readInt(u32, .little); - return frameType(magic); -} - -/// Returns the kind of frame associated to `magic`. -/// -/// Errors returned: -/// - `error.BadMagic` if `magic` is not a valid magic number. -pub fn frameType(magic: u32) error{BadMagic}!frame.Kind { - return if (magic == frame.Zstandard.magic_number) - .zstandard - else if (isSkippableMagic(magic)) - .skippable - else - error.BadMagic; -} - -pub const FrameHeader = union(enum) { - zstandard: ZstandardHeader, - skippable: SkippableHeader, -}; - -pub const HeaderError = error{ BadMagic, EndOfStream, ReservedBitSet }; - -/// Returns the header of the frame at the beginning of `source`. -/// -/// Errors returned: -/// - `error.BadMagic` if `source` begins with bytes not equal to the -/// Zstandard frame magic number, or outside the range of magic numbers for -/// skippable frames. -/// - `error.EndOfStream` if `source` contains fewer than 4 bytes -/// - `error.ReservedBitSet` if the frame is a Zstandard frame and any of the -/// reserved bits are set -pub fn decodeFrameHeader(source: anytype) (@TypeOf(source).Error || HeaderError)!FrameHeader { - const magic = try source.readInt(u32, .little); - const frame_type = try frameType(magic); - switch (frame_type) { - .zstandard => return FrameHeader{ .zstandard = try decodeZstandardHeader(source) }, - .skippable => return FrameHeader{ - .skippable = .{ - .magic_number = magic, - .frame_size = try source.readInt(u32, .little), - }, - }, - } -} - -pub const ReadWriteCount = struct { - read_count: usize, - write_count: usize, -}; - -/// Decodes frames from `src` into `dest`; returns the length of the result. -/// The stream should not have extra trailing bytes - either all bytes in `src` -/// will be decoded, or an error will be returned. An error will be returned if -/// a Zstandard frame in `src` does not declare its content size. -/// -/// Errors returned: -/// - `error.DictionaryIdFlagUnsupported` if a `src` contains a frame that -/// uses a dictionary -/// - `error.MalformedFrame` if a frame in `src` is invalid -/// - `error.UnknownContentSizeUnsupported` if a frame in `src` does not -/// declare its content size -pub fn decode(dest: []u8, src: []const u8, verify_checksum: bool) error{ - MalformedFrame, - UnknownContentSizeUnsupported, - DictionaryIdFlagUnsupported, -}!usize { - var write_count: usize = 0; - var read_count: usize = 0; - while (read_count < src.len) { - const counts = decodeFrame(dest, src[read_count..], verify_checksum) catch |err| { - switch (err) { - error.UnknownContentSizeUnsupported => return error.UnknownContentSizeUnsupported, - error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported, - else => return error.MalformedFrame, - } - }; - read_count += counts.read_count; - write_count += counts.write_count; - } - return write_count; -} - -/// Decodes a stream of frames from `src`; returns the decoded bytes. The stream -/// should not have extra trailing bytes - either all bytes in `src` will be -/// decoded, or an error will be returned. -/// -/// Errors returned: -/// - `error.DictionaryIdFlagUnsupported` if a `src` contains a frame that -/// uses a dictionary -/// - `error.MalformedFrame` if a frame in `src` is invalid -/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory -pub fn decodeAlloc( - allocator: Allocator, - src: []const u8, - verify_checksum: bool, - window_size_max: usize, -) error{ DictionaryIdFlagUnsupported, MalformedFrame, OutOfMemory }![]u8 { - var result = std.ArrayList(u8).init(allocator); - errdefer result.deinit(); - - var read_count: usize = 0; - while (read_count < src.len) { - read_count += decodeFrameArrayList( - allocator, - &result, - src[read_count..], - verify_checksum, - window_size_max, - ) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported, - else => return error.MalformedFrame, - }; - } - return result.toOwnedSlice(); -} - -/// Decodes the frame at the start of `src` into `dest`. Returns the number of -/// bytes read from `src` and written to `dest`. This function can only decode -/// frames that declare the decompressed content size. -/// -/// Errors returned: -/// - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic -/// number for a Zstandard or skippable frame -/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the -/// uncompressed content size -/// - `error.WindowSizeUnknown` if the frame does not have a valid window size -/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data -/// size declared by the frame header -/// - `error.ContentSizeTooLarge` if the frame header indicates a content size -/// that is larger than `std.math.maxInt(usize)` -/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary -/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame -/// contains a checksum that does not match the checksum of the decompressed -/// data -/// - `error.ReservedBitSet` if any of the reserved bits of the frame header -/// are set -/// - `error.EndOfStream` if `src` does not contain a complete frame -/// - `error.BadContentSize` if the content size declared by the frame does -/// not equal the actual size of decompressed data -/// - an error in `block.Error` if there are errors decoding a block -/// - `error.SkippableSizeTooLarge` if the frame is skippable and reports a -/// size greater than `src.len` -pub fn decodeFrame( - dest: []u8, - src: []const u8, - verify_checksum: bool, -) (error{ - BadMagic, - UnknownContentSizeUnsupported, - ContentTooLarge, - ContentSizeTooLarge, - WindowSizeUnknown, - DictionaryIdFlagUnsupported, - SkippableSizeTooLarge, -} || FrameError)!ReadWriteCount { - var fbs = std.io.fixedBufferStream(src); - switch (try decodeFrameType(fbs.reader())) { - .zstandard => return decodeZstandardFrame(dest, src, verify_checksum), - .skippable => { - const content_size = try fbs.reader().readInt(u32, .little); - if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge; - const read_count = @as(usize, content_size) + 8; - if (read_count > src.len) return error.SkippableSizeTooLarge; - return ReadWriteCount{ - .read_count = read_count, - .write_count = 0, - }; - }, - } -} - -/// Decodes the frame at the start of `src` into `dest`. Returns the number of -/// bytes read from `src`. -/// -/// Errors returned: -/// - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic -/// number for a Zstandard or skippable frame -/// - `error.WindowSizeUnknown` if the frame does not have a valid window size -/// - `error.WindowTooLarge` if the window size is larger than -/// `window_size_max` -/// - `error.ContentSizeTooLarge` if the frame header indicates a content size -/// that is larger than `std.math.maxInt(usize)` -/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary -/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame -/// contains a checksum that does not match the checksum of the decompressed -/// data -/// - `error.ReservedBitSet` if any of the reserved bits of the frame header -/// are set -/// - `error.EndOfStream` if `src` does not contain a complete frame -/// - `error.BadContentSize` if the content size declared by the frame does -/// not equal the actual size of decompressed data -/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory -/// - an error in `block.Error` if there are errors decoding a block -/// - `error.SkippableSizeTooLarge` if the frame is skippable and reports a -/// size greater than `src.len` -pub fn decodeFrameArrayList( - allocator: Allocator, - dest: *std.ArrayList(u8), - src: []const u8, - verify_checksum: bool, - window_size_max: usize, -) (error{ BadMagic, OutOfMemory, SkippableSizeTooLarge } || FrameContext.Error || FrameError)!usize { - var fbs = std.io.fixedBufferStream(src); - const reader = fbs.reader(); - const magic = try reader.readInt(u32, .little); - switch (try frameType(magic)) { - .zstandard => return decodeZstandardFrameArrayList( - allocator, - dest, - src, - verify_checksum, - window_size_max, - ), - .skippable => { - const content_size = try fbs.reader().readInt(u32, .little); - if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge; - const read_count = @as(usize, content_size) + 8; - if (read_count > src.len) return error.SkippableSizeTooLarge; - return read_count; - }, - } -} - -/// Returns the frame checksum corresponding to the data fed into `hasher` -pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 { - const hash = hasher.final(); - return @as(u32, @intCast(hash & 0xFFFFFFFF)); -} - -const FrameError = error{ - ChecksumFailure, - BadContentSize, - EndOfStream, - ReservedBitSet, -} || block.Error; - -/// Decode a Zstandard frame from `src` into `dest`, returning the number of -/// bytes read from `src` and written to `dest`. The first four bytes of `src` -/// must be the magic number for a Zstandard frame. -/// -/// Error returned: -/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the -/// uncompressed content size -/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data -/// size declared by the frame header -/// - `error.WindowSizeUnknown` if the frame does not have a valid window size -/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary -/// - `error.ContentSizeTooLarge` if the frame header indicates a content size -/// that is larger than `std.math.maxInt(usize)` -/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame -/// contains a checksum that does not match the checksum of the decompressed -/// data -/// - `error.ReservedBitSet` if the reserved bit of the frame header is set -/// - `error.EndOfStream` if `src` does not contain a complete frame -/// - an error in `block.Error` if there are errors decoding a block -/// - `error.BadContentSize` if the content size declared by the frame does -/// not equal the actual size of decompressed data -pub fn decodeZstandardFrame( - dest: []u8, - src: []const u8, - verify_checksum: bool, -) (error{ - UnknownContentSizeUnsupported, - ContentTooLarge, - ContentSizeTooLarge, - WindowSizeUnknown, - DictionaryIdFlagUnsupported, -} || FrameError)!ReadWriteCount { - assert(std.mem.readInt(u32, src[0..4], .little) == frame.Zstandard.magic_number); - var consumed_count: usize = 4; - - var frame_context = context: { - var fbs = std.io.fixedBufferStream(src[consumed_count..]); - const source = fbs.reader(); - const frame_header = try decodeZstandardHeader(source); - consumed_count += fbs.pos; - break :context FrameContext.init( - frame_header, - std.math.maxInt(usize), - verify_checksum, - ) catch |err| switch (err) { - error.WindowTooLarge => unreachable, - inline else => |e| return e, - }; - }; - const counts = try decodeZStandardFrameBlocks( - dest, - src[consumed_count..], - &frame_context, - ); - return ReadWriteCount{ - .read_count = counts.read_count + consumed_count, - .write_count = counts.write_count, - }; -} - -pub fn decodeZStandardFrameBlocks( - dest: []u8, - src: []const u8, - frame_context: *FrameContext, -) (error{ ContentTooLarge, UnknownContentSizeUnsupported } || FrameError)!ReadWriteCount { - const content_size = frame_context.content_size orelse - return error.UnknownContentSizeUnsupported; - if (dest.len < content_size) return error.ContentTooLarge; - - var consumed_count: usize = 0; - const written_count = decodeFrameBlocksInner( - dest[0..content_size], - src[consumed_count..], - &consumed_count, - if (frame_context.hasher_opt) |*hasher| hasher else null, - frame_context.block_size_max, - ) catch |err| switch (err) { - error.DestTooSmall => return error.BadContentSize, - inline else => |e| return e, - }; - - if (written_count != content_size) return error.BadContentSize; - if (frame_context.has_checksum) { - if (src.len < consumed_count + 4) return error.EndOfStream; - const checksum = std.mem.readInt(u32, src[consumed_count..][0..4], .little); - consumed_count += 4; - if (frame_context.hasher_opt) |*hasher| { - if (checksum != computeChecksum(hasher)) return error.ChecksumFailure; - } - } - return ReadWriteCount{ .read_count = consumed_count, .write_count = written_count }; -} - -pub const FrameContext = struct { - hasher_opt: ?std.hash.XxHash64, - window_size: usize, - has_checksum: bool, - block_size_max: usize, - content_size: ?usize, - - const Error = error{ - DictionaryIdFlagUnsupported, - WindowSizeUnknown, - WindowTooLarge, - ContentSizeTooLarge, - }; - /// Validates `frame_header` and returns the associated `FrameContext`. - /// - /// Errors returned: - /// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary - /// - `error.WindowSizeUnknown` if the frame does not have a valid window - /// size - /// - `error.WindowTooLarge` if the window size is larger than - /// `window_size_max` or `std.math.intMax(usize)` - /// - `error.ContentSizeTooLarge` if the frame header indicates a content - /// size larger than `std.math.maxInt(usize)` - pub fn init( - frame_header: ZstandardHeader, - window_size_max: usize, - verify_checksum: bool, - ) Error!FrameContext { - if (frame_header.descriptor.dictionary_id_flag != 0) - return error.DictionaryIdFlagUnsupported; - - const window_size_raw = frameWindowSize(frame_header) orelse return error.WindowSizeUnknown; - const window_size = if (window_size_raw > window_size_max) - return error.WindowTooLarge - else - std.math.cast(usize, window_size_raw) orelse return error.WindowTooLarge; - - const should_compute_checksum = - frame_header.descriptor.content_checksum_flag and verify_checksum; - - const content_size = if (frame_header.content_size) |size| - std.math.cast(usize, size) orelse return error.ContentSizeTooLarge - else - null; - - return .{ - .hasher_opt = if (should_compute_checksum) std.hash.XxHash64.init(0) else null, - .window_size = window_size, - .has_checksum = frame_header.descriptor.content_checksum_flag, - .block_size_max = @min(types.block_size_max, window_size), - .content_size = content_size, - }; - } -}; - -/// Decode a Zstandard from from `src` and return number of bytes read; see -/// `decodeZstandardFrame()`. The first four bytes of `src` must be the magic -/// number for a Zstandard frame. -/// -/// Errors returned: -/// - `error.WindowSizeUnknown` if the frame does not have a valid window size -/// - `error.WindowTooLarge` if the window size is larger than -/// `window_size_max` -/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary -/// - `error.ContentSizeTooLarge` if the frame header indicates a content size -/// that is larger than `std.math.maxInt(usize)` -/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame -/// contains a checksum that does not match the checksum of the decompressed -/// data -/// - `error.ReservedBitSet` if the reserved bit of the frame header is set -/// - `error.EndOfStream` if `src` does not contain a complete frame -/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory -/// - an error in `block.Error` if there are errors decoding a block -/// - `error.BadContentSize` if the content size declared by the frame does -/// not equal the size of decompressed data -pub fn decodeZstandardFrameArrayList( - allocator: Allocator, - dest: *std.ArrayList(u8), - src: []const u8, - verify_checksum: bool, - window_size_max: usize, -) (error{OutOfMemory} || FrameContext.Error || FrameError)!usize { - assert(std.mem.readInt(u32, src[0..4], .little) == frame.Zstandard.magic_number); - var consumed_count: usize = 4; - - var frame_context = context: { - var fbs = std.io.fixedBufferStream(src[consumed_count..]); - const source = fbs.reader(); - const frame_header = try decodeZstandardHeader(source); - consumed_count += fbs.pos; - break :context try FrameContext.init(frame_header, window_size_max, verify_checksum); - }; - - consumed_count += try decodeZstandardFrameBlocksArrayList( - allocator, - dest, - src[consumed_count..], - &frame_context, - ); - return consumed_count; -} - -pub fn decodeZstandardFrameBlocksArrayList( - allocator: Allocator, - dest: *std.ArrayList(u8), - src: []const u8, - frame_context: *FrameContext, -) (error{OutOfMemory} || FrameError)!usize { - const initial_len = dest.items.len; - - var ring_buffer = try RingBuffer.init(allocator, frame_context.window_size); - defer ring_buffer.deinit(allocator); - - // These tables take 7680 bytes - var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined; - var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined; - var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined; - - var block_header = try block.decodeBlockHeaderSlice(src); - var consumed_count: usize = 3; - var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data); - while (true) : ({ - block_header = try block.decodeBlockHeaderSlice(src[consumed_count..]); - consumed_count += 3; - }) { - const written_size = try block.decodeBlockRingBuffer( - &ring_buffer, - src[consumed_count..], - block_header, - &decode_state, - &consumed_count, - frame_context.block_size_max, - ); - if (frame_context.content_size) |size| { - if (dest.items.len - initial_len > size) { - return error.BadContentSize; - } - } - if (written_size > 0) { - const written_slice = ring_buffer.sliceLast(written_size); - try dest.appendSlice(written_slice.first); - try dest.appendSlice(written_slice.second); - if (frame_context.hasher_opt) |*hasher| { - hasher.update(written_slice.first); - hasher.update(written_slice.second); - } - } - if (block_header.last_block) break; - } - if (frame_context.content_size) |size| { - if (dest.items.len - initial_len != size) { - return error.BadContentSize; - } - } - - if (frame_context.has_checksum) { - if (src.len < consumed_count + 4) return error.EndOfStream; - const checksum = std.mem.readInt(u32, src[consumed_count..][0..4], .little); - consumed_count += 4; - if (frame_context.hasher_opt) |*hasher| { - if (checksum != computeChecksum(hasher)) return error.ChecksumFailure; - } - } - return consumed_count; -} - -fn decodeFrameBlocksInner( - dest: []u8, - src: []const u8, - consumed_count: *usize, - hash: ?*std.hash.XxHash64, - block_size_max: usize, -) (error{ EndOfStream, DestTooSmall } || block.Error)!usize { - // These tables take 7680 bytes - var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined; - var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined; - var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined; - - var block_header = try block.decodeBlockHeaderSlice(src); - var bytes_read: usize = 3; - defer consumed_count.* += bytes_read; - var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data); - var count: usize = 0; - while (true) : ({ - block_header = try block.decodeBlockHeaderSlice(src[bytes_read..]); - bytes_read += 3; - }) { - const written_size = try block.decodeBlock( - dest, - src[bytes_read..], - block_header, - &decode_state, - &bytes_read, - block_size_max, - count, - ); - if (hash) |hash_state| hash_state.update(dest[count .. count + written_size]); - count += written_size; - if (block_header.last_block) break; - } - return count; -} - -/// Decode the header of a skippable frame. The first four bytes of `src` must -/// be a valid magic number for a skippable frame. -pub fn decodeSkippableHeader(src: *const [8]u8) SkippableHeader { - const magic = std.mem.readInt(u32, src[0..4], .little); - assert(isSkippableMagic(magic)); - const frame_size = std.mem.readInt(u32, src[4..8], .little); - return .{ - .magic_number = magic, - .frame_size = frame_size, - }; -} - -/// Returns the window size required to decompress a frame, or `null` if it -/// cannot be determined (which indicates a malformed frame header). -pub fn frameWindowSize(header: ZstandardHeader) ?u64 { - if (header.window_descriptor) |descriptor| { - const exponent = (descriptor & 0b11111000) >> 3; - const mantissa = descriptor & 0b00000111; - const window_log = 10 + exponent; - const window_base = @as(u64, 1) << @as(u6, @intCast(window_log)); - const window_add = (window_base / 8) * mantissa; - return window_base + window_add; - } else return header.content_size; -} - -/// Decode the header of a Zstandard frame. -/// -/// Errors returned: -/// - `error.ReservedBitSet` if any of the reserved bits of the header are set -/// - `error.EndOfStream` if `source` does not contain a complete header -pub fn decodeZstandardHeader( - source: anytype, -) (@TypeOf(source).Error || error{ EndOfStream, ReservedBitSet })!ZstandardHeader { - const descriptor = @as(ZstandardHeader.Descriptor, @bitCast(try source.readByte())); - - if (descriptor.reserved) return error.ReservedBitSet; - - var window_descriptor: ?u8 = null; - if (!descriptor.single_segment_flag) { - window_descriptor = try source.readByte(); - } - - var dictionary_id: ?u32 = null; - if (descriptor.dictionary_id_flag > 0) { - // if flag is 3 then field_size = 4, else field_size = flag - const field_size = (@as(u4, 1) << descriptor.dictionary_id_flag) >> 1; - dictionary_id = try source.readVarInt(u32, .little, field_size); - } - - var content_size: ?u64 = null; - if (descriptor.single_segment_flag or descriptor.content_size_flag > 0) { - const field_size = @as(u4, 1) << descriptor.content_size_flag; - content_size = try source.readVarInt(u64, .little, field_size); - if (field_size == 2) content_size.? += 256; - } - - const header = ZstandardHeader{ - .descriptor = descriptor, - .window_descriptor = window_descriptor, - .dictionary_id = dictionary_id, - .content_size = content_size, - }; - return header; -} - -test { - std.testing.refAllDecls(@This()); -} diff --git a/lib/std/compress/zstandard/readers.zig b/lib/std/compress/zstandard/readers.zig deleted file mode 100644 index 7b15784187..0000000000 --- a/lib/std/compress/zstandard/readers.zig +++ /dev/null @@ -1,82 +0,0 @@ -const std = @import("std"); - -pub const ReversedByteReader = struct { - remaining_bytes: usize, - bytes: []const u8, - - const Reader = std.io.GenericReader(*ReversedByteReader, error{}, readFn); - - pub fn init(bytes: []const u8) ReversedByteReader { - return .{ - .bytes = bytes, - .remaining_bytes = bytes.len, - }; - } - - pub fn reader(self: *ReversedByteReader) Reader { - return .{ .context = self }; - } - - fn readFn(ctx: *ReversedByteReader, buffer: []u8) !usize { - if (ctx.remaining_bytes == 0) return 0; - const byte_index = ctx.remaining_bytes - 1; - buffer[0] = ctx.bytes[byte_index]; - // buffer[0] = @bitReverse(ctx.bytes[byte_index]); - ctx.remaining_bytes = byte_index; - return 1; - } -}; - -/// A bit reader for reading the reversed bit streams used to encode -/// FSE compressed data. -pub const ReverseBitReader = struct { - byte_reader: ReversedByteReader, - bit_reader: std.io.BitReader(.big, ReversedByteReader.Reader), - - pub fn init(self: *ReverseBitReader, bytes: []const u8) error{BitStreamHasNoStartBit}!void { - self.byte_reader = ReversedByteReader.init(bytes); - self.bit_reader = std.io.bitReader(.big, self.byte_reader.reader()); - if (bytes.len == 0) return; - var i: usize = 0; - while (i < 8 and 0 == self.readBitsNoEof(u1, 1) catch unreachable) : (i += 1) {} - if (i == 8) return error.BitStreamHasNoStartBit; - } - - pub fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: u16) error{EndOfStream}!U { - return self.bit_reader.readBitsNoEof(U, num_bits); - } - - pub fn readBits(self: *@This(), comptime U: type, num_bits: u16, out_bits: *u16) error{}!U { - return try self.bit_reader.readBits(U, num_bits, out_bits); - } - - pub fn alignToByte(self: *@This()) void { - self.bit_reader.alignToByte(); - } - - pub fn isEmpty(self: ReverseBitReader) bool { - return self.byte_reader.remaining_bytes == 0 and self.bit_reader.count == 0; - } -}; - -pub fn BitReader(comptime Reader: type) type { - return struct { - underlying: std.io.BitReader(.little, Reader), - - pub fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: u16) !U { - return self.underlying.readBitsNoEof(U, num_bits); - } - - pub fn readBits(self: *@This(), comptime U: type, num_bits: u16, out_bits: *u16) !U { - return self.underlying.readBits(U, num_bits, out_bits); - } - - pub fn alignToByte(self: *@This()) void { - self.underlying.alignToByte(); - } - }; -} - -pub fn bitReader(reader: anytype) BitReader(@TypeOf(reader)) { - return .{ .underlying = std.io.bitReader(.little, reader) }; -} diff --git a/lib/std/compress/zstandard/types.zig b/lib/std/compress/zstandard/types.zig deleted file mode 100644 index 41c3797d16..0000000000 --- a/lib/std/compress/zstandard/types.zig +++ /dev/null @@ -1,403 +0,0 @@ -pub const block_size_max = 1 << 17; - -pub const frame = struct { - pub const Kind = enum { zstandard, skippable }; - - pub const Zstandard = struct { - pub const magic_number = 0xFD2FB528; - - header: Header, - data_blocks: []Block, - checksum: ?u32, - - pub const Header = struct { - descriptor: Descriptor, - window_descriptor: ?u8, - dictionary_id: ?u32, - content_size: ?u64, - - pub const Descriptor = packed struct { - dictionary_id_flag: u2, - content_checksum_flag: bool, - reserved: bool, - unused: bool, - single_segment_flag: bool, - content_size_flag: u2, - }; - }; - - pub const Block = struct { - pub const Header = struct { - last_block: bool, - block_type: Block.Type, - block_size: u21, - }; - - pub const Type = enum(u2) { - raw, - rle, - compressed, - reserved, - }; - }; - }; - - pub const Skippable = struct { - pub const magic_number_min = 0x184D2A50; - pub const magic_number_max = 0x184D2A5F; - - pub const Header = struct { - magic_number: u32, - frame_size: u32, - }; - }; -}; - -pub const compressed_block = struct { - pub const LiteralsSection = struct { - header: Header, - huffman_tree: ?HuffmanTree, - streams: Streams, - - pub const Streams = union(enum) { - one: []const u8, - four: [4][]const u8, - }; - - pub const Header = struct { - block_type: BlockType, - size_format: u2, - regenerated_size: u20, - compressed_size: ?u18, - }; - - pub const BlockType = enum(u2) { - raw, - rle, - compressed, - treeless, - }; - - pub const HuffmanTree = struct { - max_bit_count: u4, - symbol_count_minus_one: u8, - nodes: [256]PrefixedSymbol, - - pub const PrefixedSymbol = struct { - symbol: u8, - prefix: u16, - weight: u4, - }; - - pub const Result = union(enum) { - symbol: u8, - index: usize, - }; - - pub fn query(self: HuffmanTree, index: usize, prefix: u16) error{NotFound}!Result { - var node = self.nodes[index]; - const weight = node.weight; - var i: usize = index; - while (node.weight == weight) { - if (node.prefix == prefix) return Result{ .symbol = node.symbol }; - if (i == 0) return error.NotFound; - i -= 1; - node = self.nodes[i]; - } - return Result{ .index = i }; - } - - pub fn weightToBitCount(weight: u4, max_bit_count: u4) u4 { - return if (weight == 0) 0 else ((max_bit_count + 1) - weight); - } - }; - - pub const StreamCount = enum { one, four }; - pub fn streamCount(size_format: u2, block_type: BlockType) StreamCount { - return switch (block_type) { - .raw, .rle => .one, - .compressed, .treeless => if (size_format == 0) .one else .four, - }; - } - }; - - pub const SequencesSection = struct { - header: SequencesSection.Header, - literals_length_table: Table, - offset_table: Table, - match_length_table: Table, - - pub const Header = struct { - sequence_count: u24, - match_lengths: Mode, - offsets: Mode, - literal_lengths: Mode, - - pub const Mode = enum(u2) { - predefined, - rle, - fse, - repeat, - }; - }; - }; - - pub const Table = union(enum) { - fse: []const Fse, - rle: u8, - - pub const Fse = struct { - symbol: u8, - baseline: u16, - bits: u8, - }; - }; - - pub const literals_length_code_table = [36]struct { u32, u5 }{ - .{ 0, 0 }, .{ 1, 0 }, .{ 2, 0 }, .{ 3, 0 }, - .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, - .{ 8, 0 }, .{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 }, - .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, .{ 15, 0 }, - .{ 16, 1 }, .{ 18, 1 }, .{ 20, 1 }, .{ 22, 1 }, - .{ 24, 2 }, .{ 28, 2 }, .{ 32, 3 }, .{ 40, 3 }, - .{ 48, 4 }, .{ 64, 6 }, .{ 128, 7 }, .{ 256, 8 }, - .{ 512, 9 }, .{ 1024, 10 }, .{ 2048, 11 }, .{ 4096, 12 }, - .{ 8192, 13 }, .{ 16384, 14 }, .{ 32768, 15 }, .{ 65536, 16 }, - }; - - pub const match_length_code_table = [53]struct { u32, u5 }{ - .{ 3, 0 }, .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, .{ 8, 0 }, - .{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 }, .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, - .{ 15, 0 }, .{ 16, 0 }, .{ 17, 0 }, .{ 18, 0 }, .{ 19, 0 }, .{ 20, 0 }, - .{ 21, 0 }, .{ 22, 0 }, .{ 23, 0 }, .{ 24, 0 }, .{ 25, 0 }, .{ 26, 0 }, - .{ 27, 0 }, .{ 28, 0 }, .{ 29, 0 }, .{ 30, 0 }, .{ 31, 0 }, .{ 32, 0 }, - .{ 33, 0 }, .{ 34, 0 }, .{ 35, 1 }, .{ 37, 1 }, .{ 39, 1 }, .{ 41, 1 }, - .{ 43, 2 }, .{ 47, 2 }, .{ 51, 3 }, .{ 59, 3 }, .{ 67, 4 }, .{ 83, 4 }, - .{ 99, 5 }, .{ 131, 7 }, .{ 259, 8 }, .{ 515, 9 }, .{ 1027, 10 }, .{ 2051, 11 }, - .{ 4099, 12 }, .{ 8195, 13 }, .{ 16387, 14 }, .{ 32771, 15 }, .{ 65539, 16 }, - }; - - pub const literals_length_default_distribution = [36]i16{ - 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, - -1, -1, -1, -1, - }; - - pub const match_lengths_default_distribution = [53]i16{ - 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, - -1, -1, -1, -1, -1, - }; - - pub const offset_codes_default_distribution = [29]i16{ - 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, - }; - - pub const predefined_literal_fse_table = Table{ - .fse = &[64]Table.Fse{ - .{ .symbol = 0, .bits = 4, .baseline = 0 }, - .{ .symbol = 0, .bits = 4, .baseline = 16 }, - .{ .symbol = 1, .bits = 5, .baseline = 32 }, - .{ .symbol = 3, .bits = 5, .baseline = 0 }, - .{ .symbol = 4, .bits = 5, .baseline = 0 }, - .{ .symbol = 6, .bits = 5, .baseline = 0 }, - .{ .symbol = 7, .bits = 5, .baseline = 0 }, - .{ .symbol = 9, .bits = 5, .baseline = 0 }, - .{ .symbol = 10, .bits = 5, .baseline = 0 }, - .{ .symbol = 12, .bits = 5, .baseline = 0 }, - .{ .symbol = 14, .bits = 6, .baseline = 0 }, - .{ .symbol = 16, .bits = 5, .baseline = 0 }, - .{ .symbol = 18, .bits = 5, .baseline = 0 }, - .{ .symbol = 19, .bits = 5, .baseline = 0 }, - .{ .symbol = 21, .bits = 5, .baseline = 0 }, - .{ .symbol = 22, .bits = 5, .baseline = 0 }, - .{ .symbol = 24, .bits = 5, .baseline = 0 }, - .{ .symbol = 25, .bits = 5, .baseline = 32 }, - .{ .symbol = 26, .bits = 5, .baseline = 0 }, - .{ .symbol = 27, .bits = 6, .baseline = 0 }, - .{ .symbol = 29, .bits = 6, .baseline = 0 }, - .{ .symbol = 31, .bits = 6, .baseline = 0 }, - .{ .symbol = 0, .bits = 4, .baseline = 32 }, - .{ .symbol = 1, .bits = 4, .baseline = 0 }, - .{ .symbol = 2, .bits = 5, .baseline = 0 }, - .{ .symbol = 4, .bits = 5, .baseline = 32 }, - .{ .symbol = 5, .bits = 5, .baseline = 0 }, - .{ .symbol = 7, .bits = 5, .baseline = 32 }, - .{ .symbol = 8, .bits = 5, .baseline = 0 }, - .{ .symbol = 10, .bits = 5, .baseline = 32 }, - .{ .symbol = 11, .bits = 5, .baseline = 0 }, - .{ .symbol = 13, .bits = 6, .baseline = 0 }, - .{ .symbol = 16, .bits = 5, .baseline = 32 }, - .{ .symbol = 17, .bits = 5, .baseline = 0 }, - .{ .symbol = 19, .bits = 5, .baseline = 32 }, - .{ .symbol = 20, .bits = 5, .baseline = 0 }, - .{ .symbol = 22, .bits = 5, .baseline = 32 }, - .{ .symbol = 23, .bits = 5, .baseline = 0 }, - .{ .symbol = 25, .bits = 4, .baseline = 0 }, - .{ .symbol = 25, .bits = 4, .baseline = 16 }, - .{ .symbol = 26, .bits = 5, .baseline = 32 }, - .{ .symbol = 28, .bits = 6, .baseline = 0 }, - .{ .symbol = 30, .bits = 6, .baseline = 0 }, - .{ .symbol = 0, .bits = 4, .baseline = 48 }, - .{ .symbol = 1, .bits = 4, .baseline = 16 }, - .{ .symbol = 2, .bits = 5, .baseline = 32 }, - .{ .symbol = 3, .bits = 5, .baseline = 32 }, - .{ .symbol = 5, .bits = 5, .baseline = 32 }, - .{ .symbol = 6, .bits = 5, .baseline = 32 }, - .{ .symbol = 8, .bits = 5, .baseline = 32 }, - .{ .symbol = 9, .bits = 5, .baseline = 32 }, - .{ .symbol = 11, .bits = 5, .baseline = 32 }, - .{ .symbol = 12, .bits = 5, .baseline = 32 }, - .{ .symbol = 15, .bits = 6, .baseline = 0 }, - .{ .symbol = 17, .bits = 5, .baseline = 32 }, - .{ .symbol = 18, .bits = 5, .baseline = 32 }, - .{ .symbol = 20, .bits = 5, .baseline = 32 }, - .{ .symbol = 21, .bits = 5, .baseline = 32 }, - .{ .symbol = 23, .bits = 5, .baseline = 32 }, - .{ .symbol = 24, .bits = 5, .baseline = 32 }, - .{ .symbol = 35, .bits = 6, .baseline = 0 }, - .{ .symbol = 34, .bits = 6, .baseline = 0 }, - .{ .symbol = 33, .bits = 6, .baseline = 0 }, - .{ .symbol = 32, .bits = 6, .baseline = 0 }, - }, - }; - - pub const predefined_match_fse_table = Table{ - .fse = &[64]Table.Fse{ - .{ .symbol = 0, .bits = 6, .baseline = 0 }, - .{ .symbol = 1, .bits = 4, .baseline = 0 }, - .{ .symbol = 2, .bits = 5, .baseline = 32 }, - .{ .symbol = 3, .bits = 5, .baseline = 0 }, - .{ .symbol = 5, .bits = 5, .baseline = 0 }, - .{ .symbol = 6, .bits = 5, .baseline = 0 }, - .{ .symbol = 8, .bits = 5, .baseline = 0 }, - .{ .symbol = 10, .bits = 6, .baseline = 0 }, - .{ .symbol = 13, .bits = 6, .baseline = 0 }, - .{ .symbol = 16, .bits = 6, .baseline = 0 }, - .{ .symbol = 19, .bits = 6, .baseline = 0 }, - .{ .symbol = 22, .bits = 6, .baseline = 0 }, - .{ .symbol = 25, .bits = 6, .baseline = 0 }, - .{ .symbol = 28, .bits = 6, .baseline = 0 }, - .{ .symbol = 31, .bits = 6, .baseline = 0 }, - .{ .symbol = 33, .bits = 6, .baseline = 0 }, - .{ .symbol = 35, .bits = 6, .baseline = 0 }, - .{ .symbol = 37, .bits = 6, .baseline = 0 }, - .{ .symbol = 39, .bits = 6, .baseline = 0 }, - .{ .symbol = 41, .bits = 6, .baseline = 0 }, - .{ .symbol = 43, .bits = 6, .baseline = 0 }, - .{ .symbol = 45, .bits = 6, .baseline = 0 }, - .{ .symbol = 1, .bits = 4, .baseline = 16 }, - .{ .symbol = 2, .bits = 4, .baseline = 0 }, - .{ .symbol = 3, .bits = 5, .baseline = 32 }, - .{ .symbol = 4, .bits = 5, .baseline = 0 }, - .{ .symbol = 6, .bits = 5, .baseline = 32 }, - .{ .symbol = 7, .bits = 5, .baseline = 0 }, - .{ .symbol = 9, .bits = 6, .baseline = 0 }, - .{ .symbol = 12, .bits = 6, .baseline = 0 }, - .{ .symbol = 15, .bits = 6, .baseline = 0 }, - .{ .symbol = 18, .bits = 6, .baseline = 0 }, - .{ .symbol = 21, .bits = 6, .baseline = 0 }, - .{ .symbol = 24, .bits = 6, .baseline = 0 }, - .{ .symbol = 27, .bits = 6, .baseline = 0 }, - .{ .symbol = 30, .bits = 6, .baseline = 0 }, - .{ .symbol = 32, .bits = 6, .baseline = 0 }, - .{ .symbol = 34, .bits = 6, .baseline = 0 }, - .{ .symbol = 36, .bits = 6, .baseline = 0 }, - .{ .symbol = 38, .bits = 6, .baseline = 0 }, - .{ .symbol = 40, .bits = 6, .baseline = 0 }, - .{ .symbol = 42, .bits = 6, .baseline = 0 }, - .{ .symbol = 44, .bits = 6, .baseline = 0 }, - .{ .symbol = 1, .bits = 4, .baseline = 32 }, - .{ .symbol = 1, .bits = 4, .baseline = 48 }, - .{ .symbol = 2, .bits = 4, .baseline = 16 }, - .{ .symbol = 4, .bits = 5, .baseline = 32 }, - .{ .symbol = 5, .bits = 5, .baseline = 32 }, - .{ .symbol = 7, .bits = 5, .baseline = 32 }, - .{ .symbol = 8, .bits = 5, .baseline = 32 }, - .{ .symbol = 11, .bits = 6, .baseline = 0 }, - .{ .symbol = 14, .bits = 6, .baseline = 0 }, - .{ .symbol = 17, .bits = 6, .baseline = 0 }, - .{ .symbol = 20, .bits = 6, .baseline = 0 }, - .{ .symbol = 23, .bits = 6, .baseline = 0 }, - .{ .symbol = 26, .bits = 6, .baseline = 0 }, - .{ .symbol = 29, .bits = 6, .baseline = 0 }, - .{ .symbol = 52, .bits = 6, .baseline = 0 }, - .{ .symbol = 51, .bits = 6, .baseline = 0 }, - .{ .symbol = 50, .bits = 6, .baseline = 0 }, - .{ .symbol = 49, .bits = 6, .baseline = 0 }, - .{ .symbol = 48, .bits = 6, .baseline = 0 }, - .{ .symbol = 47, .bits = 6, .baseline = 0 }, - .{ .symbol = 46, .bits = 6, .baseline = 0 }, - }, - }; - - pub const predefined_offset_fse_table = Table{ - .fse = &[32]Table.Fse{ - .{ .symbol = 0, .bits = 5, .baseline = 0 }, - .{ .symbol = 6, .bits = 4, .baseline = 0 }, - .{ .symbol = 9, .bits = 5, .baseline = 0 }, - .{ .symbol = 15, .bits = 5, .baseline = 0 }, - .{ .symbol = 21, .bits = 5, .baseline = 0 }, - .{ .symbol = 3, .bits = 5, .baseline = 0 }, - .{ .symbol = 7, .bits = 4, .baseline = 0 }, - .{ .symbol = 12, .bits = 5, .baseline = 0 }, - .{ .symbol = 18, .bits = 5, .baseline = 0 }, - .{ .symbol = 23, .bits = 5, .baseline = 0 }, - .{ .symbol = 5, .bits = 5, .baseline = 0 }, - .{ .symbol = 8, .bits = 4, .baseline = 0 }, - .{ .symbol = 14, .bits = 5, .baseline = 0 }, - .{ .symbol = 20, .bits = 5, .baseline = 0 }, - .{ .symbol = 2, .bits = 5, .baseline = 0 }, - .{ .symbol = 7, .bits = 4, .baseline = 16 }, - .{ .symbol = 11, .bits = 5, .baseline = 0 }, - .{ .symbol = 17, .bits = 5, .baseline = 0 }, - .{ .symbol = 22, .bits = 5, .baseline = 0 }, - .{ .symbol = 4, .bits = 5, .baseline = 0 }, - .{ .symbol = 8, .bits = 4, .baseline = 16 }, - .{ .symbol = 13, .bits = 5, .baseline = 0 }, - .{ .symbol = 19, .bits = 5, .baseline = 0 }, - .{ .symbol = 1, .bits = 5, .baseline = 0 }, - .{ .symbol = 6, .bits = 4, .baseline = 16 }, - .{ .symbol = 10, .bits = 5, .baseline = 0 }, - .{ .symbol = 16, .bits = 5, .baseline = 0 }, - .{ .symbol = 28, .bits = 5, .baseline = 0 }, - .{ .symbol = 27, .bits = 5, .baseline = 0 }, - .{ .symbol = 26, .bits = 5, .baseline = 0 }, - .{ .symbol = 25, .bits = 5, .baseline = 0 }, - .{ .symbol = 24, .bits = 5, .baseline = 0 }, - }, - }; - pub const start_repeated_offset_1 = 1; - pub const start_repeated_offset_2 = 4; - pub const start_repeated_offset_3 = 8; - - pub const table_accuracy_log_max = struct { - pub const literal = 9; - pub const match = 9; - pub const offset = 8; - }; - - pub const table_symbol_count_max = struct { - pub const literal = 36; - pub const match = 53; - pub const offset = 32; - }; - - pub const default_accuracy_log = struct { - pub const literal = 6; - pub const match = 6; - pub const offset = 5; - }; - pub const table_size_max = struct { - pub const literal = 1 << table_accuracy_log_max.literal; - pub const match = 1 << table_accuracy_log_max.match; - pub const offset = 1 << table_accuracy_log_max.offset; - }; -}; - -test { - const testing = @import("std").testing; - testing.refAllDeclsRecursive(@This()); -} diff --git a/lib/std/compress/zstd.zig b/lib/std/compress/zstd.zig new file mode 100644 index 0000000000..0352a0e1f4 --- /dev/null +++ b/lib/std/compress/zstd.zig @@ -0,0 +1,152 @@ +const std = @import("../std.zig"); +const assert = std.debug.assert; + +pub const Decompress = @import("zstd/Decompress.zig"); + +/// Recommended amount by the standard. Lower than this may result in inability +/// to decompress common streams. +pub const default_window_len = 8 * 1024 * 1024; +pub const block_size_max = 1 << 17; + +pub const literals_length_default_distribution = [36]i16{ + 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, + -1, -1, -1, -1, +}; + +pub const match_lengths_default_distribution = [53]i16{ + 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, + -1, -1, -1, -1, -1, +}; + +pub const offset_codes_default_distribution = [29]i16{ + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, +}; + +pub const start_repeated_offset_1 = 1; +pub const start_repeated_offset_2 = 4; +pub const start_repeated_offset_3 = 8; + +pub const literals_length_code_table = [36]struct { u32, u5 }{ + .{ 0, 0 }, .{ 1, 0 }, .{ 2, 0 }, .{ 3, 0 }, + .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, + .{ 8, 0 }, .{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 }, + .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, .{ 15, 0 }, + .{ 16, 1 }, .{ 18, 1 }, .{ 20, 1 }, .{ 22, 1 }, + .{ 24, 2 }, .{ 28, 2 }, .{ 32, 3 }, .{ 40, 3 }, + .{ 48, 4 }, .{ 64, 6 }, .{ 128, 7 }, .{ 256, 8 }, + .{ 512, 9 }, .{ 1024, 10 }, .{ 2048, 11 }, .{ 4096, 12 }, + .{ 8192, 13 }, .{ 16384, 14 }, .{ 32768, 15 }, .{ 65536, 16 }, +}; + +pub const match_length_code_table = [53]struct { u32, u5 }{ + .{ 3, 0 }, .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, .{ 8, 0 }, + .{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 }, .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, + .{ 15, 0 }, .{ 16, 0 }, .{ 17, 0 }, .{ 18, 0 }, .{ 19, 0 }, .{ 20, 0 }, + .{ 21, 0 }, .{ 22, 0 }, .{ 23, 0 }, .{ 24, 0 }, .{ 25, 0 }, .{ 26, 0 }, + .{ 27, 0 }, .{ 28, 0 }, .{ 29, 0 }, .{ 30, 0 }, .{ 31, 0 }, .{ 32, 0 }, + .{ 33, 0 }, .{ 34, 0 }, .{ 35, 1 }, .{ 37, 1 }, .{ 39, 1 }, .{ 41, 1 }, + .{ 43, 2 }, .{ 47, 2 }, .{ 51, 3 }, .{ 59, 3 }, .{ 67, 4 }, .{ 83, 4 }, + .{ 99, 5 }, .{ 131, 7 }, .{ 259, 8 }, .{ 515, 9 }, .{ 1027, 10 }, .{ 2051, 11 }, + .{ 4099, 12 }, .{ 8195, 13 }, .{ 16387, 14 }, .{ 32771, 15 }, .{ 65539, 16 }, +}; + +pub const table_accuracy_log_max = struct { + pub const literal = 9; + pub const match = 9; + pub const offset = 8; +}; + +pub const table_symbol_count_max = struct { + pub const literal = 36; + pub const match = 53; + pub const offset = 32; +}; + +pub const default_accuracy_log = struct { + pub const literal = 6; + pub const match = 6; + pub const offset = 5; +}; +pub const table_size_max = struct { + pub const literal = 1 << table_accuracy_log_max.literal; + pub const match = 1 << table_accuracy_log_max.match; + pub const offset = 1 << table_accuracy_log_max.offset; +}; + +fn testDecompress(gpa: std.mem.Allocator, compressed: []const u8) ![]u8 { + var out: std.ArrayListUnmanaged(u8) = .empty; + defer out.deinit(gpa); + try out.ensureUnusedCapacity(gpa, default_window_len); + + var in: std.io.Reader = .fixed(compressed); + var zstd_stream: Decompress = .init(&in, &.{}, .{}); + try zstd_stream.reader.appendRemaining(gpa, null, &out, .unlimited); + + return out.toOwnedSlice(gpa); +} + +fn testExpectDecompress(uncompressed: []const u8, compressed: []const u8) !void { + const gpa = std.testing.allocator; + const result = try testDecompress(gpa, compressed); + defer gpa.free(result); + try std.testing.expectEqualSlices(u8, uncompressed, result); +} + +fn testExpectDecompressError(err: anyerror, compressed: []const u8) !void { + const gpa = std.testing.allocator; + + var out: std.ArrayListUnmanaged(u8) = .empty; + defer out.deinit(gpa); + try out.ensureUnusedCapacity(gpa, default_window_len); + + var in: std.io.Reader = .fixed(compressed); + var zstd_stream: Decompress = .init(&in, &.{}, .{}); + try std.testing.expectError( + error.ReadFailed, + zstd_stream.reader.appendRemaining(gpa, null, &out, .unlimited), + ); + try std.testing.expectError(err, zstd_stream.err orelse {}); +} + +test Decompress { + const uncompressed = @embedFile("testdata/rfc8478.txt"); + const compressed3 = @embedFile("testdata/rfc8478.txt.zst.3"); + const compressed19 = @embedFile("testdata/rfc8478.txt.zst.19"); + + try testExpectDecompress(uncompressed, compressed3); + try testExpectDecompress(uncompressed, compressed19); +} + +test "zero sized raw block" { + const input_raw = + "\x28\xb5\x2f\xfd" ++ // zstandard frame magic number + "\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero + "\x01\x00\x00"; // block header with: last_block set, block_type raw, block_size zero + try testExpectDecompress("", input_raw); +} + +test "zero sized rle block" { + const input_rle = + "\x28\xb5\x2f\xfd" ++ // zstandard frame magic number + "\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero + "\x03\x00\x00" ++ // block header with: last_block set, block_type rle, block_size zero + "\xaa"; // block_content + try testExpectDecompress("", input_rle); +} + +test "declared raw literals size too large" { + const input_raw = + "\x28\xb5\x2f\xfd" ++ // zstandard frame magic number + "\x00\x00" ++ // frame header: everything unset, window descriptor zero + "\x95\x00\x00" ++ // block header with: last_block set, block_type compressed, block_size 18 + "\xbc\xf3\xae" ++ // literals section header with: type raw, size_format 3, regenerated_size 716603 + "\xa5\x9f\xe3"; // some bytes of literal content - the content is shorter than regenerated_size + + // Note that the regenerated_size in the above input is larger than block maximum size, so the + // block can't be valid as it is a raw literals block. + try testExpectDecompressError(error.MalformedLiteralsSection, input_raw); +} diff --git a/lib/std/compress/zstd/Decompress.zig b/lib/std/compress/zstd/Decompress.zig new file mode 100644 index 0000000000..b13a2dcf7a --- /dev/null +++ b/lib/std/compress/zstd/Decompress.zig @@ -0,0 +1,1840 @@ +const Decompress = @This(); +const std = @import("std"); +const assert = std.debug.assert; +const Reader = std.io.Reader; +const Limit = std.io.Limit; +const zstd = @import("../zstd.zig"); +const Writer = std.io.Writer; + +input: *Reader, +reader: Reader, +state: State, +verify_checksum: bool, +window_len: u32, +err: ?Error = null, + +const State = union(enum) { + new_frame, + in_frame: InFrame, + skipping_frame: usize, + end, + + const InFrame = struct { + frame: Frame, + checksum: ?u32, + decompressed_size: usize, + decode: Frame.Zstandard.Decode, + }; +}; + +pub const Options = struct { + /// Verifying checksums is not implemented yet and will cause a panic if + /// you set this to true. + verify_checksum: bool = false, + + /// The output buffer is asserted to have capacity for `window_len` plus + /// `zstd.block_size_max`. + /// + /// If `window_len` is too small, then some streams will fail to decompress + /// with `error.OutputBufferUndersize`. + window_len: u32 = zstd.default_window_len, +}; + +pub const Error = error{ + BadMagic, + BlockOversize, + ChecksumFailure, + ContentOversize, + DictionaryIdFlagUnsupported, + EndOfStream, + HuffmanTreeIncomplete, + InvalidBitStream, + MalformedAccuracyLog, + MalformedBlock, + MalformedCompressedBlock, + MalformedFrame, + MalformedFseBits, + MalformedFseTable, + MalformedHuffmanTree, + MalformedLiteralsHeader, + MalformedLiteralsLength, + MalformedLiteralsSection, + MalformedSequence, + MissingStartBit, + OutputBufferUndersize, + InputBufferUndersize, + ReadFailed, + RepeatModeFirst, + ReservedBitSet, + ReservedBlock, + SequenceBufferUndersize, + TreelessLiteralsFirst, + UnexpectedEndOfLiteralStream, + WindowOversize, + WindowSizeUnknown, +}; + +/// When connecting `reader` to a `Writer`, `buffer` should be empty, and +/// `Writer.buffer` capacity has requirements based on `Options.window_len`. +/// +/// Otherwise, `buffer` has those requirements. +pub fn init(input: *Reader, buffer: []u8, options: Options) Decompress { + return .{ + .input = input, + .state = .new_frame, + .verify_checksum = options.verify_checksum, + .window_len = options.window_len, + .reader = .{ + .vtable = &.{ + .stream = stream, + .rebase = rebase, + }, + .buffer = buffer, + .seek = 0, + .end = 0, + }, + }; +} + +fn rebase(r: *Reader, capacity: usize) Reader.RebaseError!void { + const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); + assert(capacity <= r.buffer.len - d.window_len); + assert(r.end + capacity > r.buffer.len); + const buffered = r.buffer[0..r.end]; + const discard = buffered.len - d.window_len; + const keep = buffered[discard..]; + @memmove(r.buffer[0..keep.len], keep); + r.end = keep.len; + r.seek -= discard; +} + +fn stream(r: *Reader, w: *Writer, limit: Limit) Reader.StreamError!usize { + const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); + const in = d.input; + + switch (d.state) { + .new_frame => { + // Allow error.EndOfStream only on the frame magic. + const magic = try in.takeEnumNonexhaustive(Frame.Magic, .little); + initFrame(d, w.buffer.len, magic) catch |err| { + d.err = err; + return error.ReadFailed; + }; + return readInFrame(d, w, limit, &d.state.in_frame) catch |err| switch (err) { + error.ReadFailed => return error.ReadFailed, + error.WriteFailed => return error.WriteFailed, + else => |e| { + d.err = e; + return error.ReadFailed; + }, + }; + }, + .in_frame => |*in_frame| { + return readInFrame(d, w, limit, in_frame) catch |err| switch (err) { + error.ReadFailed => return error.ReadFailed, + error.WriteFailed => return error.WriteFailed, + else => |e| { + d.err = e; + return error.ReadFailed; + }, + }; + }, + .skipping_frame => |*remaining| { + const n = in.discard(.limited(remaining.*)) catch |err| { + d.err = err; + return error.ReadFailed; + }; + remaining.* -= n; + if (remaining.* == 0) d.state = .new_frame; + return 0; + }, + .end => return error.EndOfStream, + } +} + +fn initFrame(d: *Decompress, window_size_max: usize, magic: Frame.Magic) !void { + const in = d.input; + switch (magic.kind() orelse return error.BadMagic) { + .zstandard => { + const header = try Frame.Zstandard.Header.decode(in); + d.state = .{ .in_frame = .{ + .frame = try Frame.init(header, window_size_max, d.verify_checksum), + .checksum = null, + .decompressed_size = 0, + .decode = .init, + } }; + }, + .skippable => { + const frame_size = try in.takeInt(u32, .little); + d.state = .{ .skipping_frame = frame_size }; + }, + } +} + +fn readInFrame(d: *Decompress, w: *Writer, limit: Limit, state: *State.InFrame) !usize { + const in = d.input; + const window_len = d.window_len; + + const block_header = try in.takeStruct(Frame.Zstandard.Block.Header, .little); + const block_size = block_header.size; + const frame_block_size_max = state.frame.block_size_max; + if (frame_block_size_max < block_size) return error.BlockOversize; + if (@intFromEnum(limit) < block_size) return error.OutputBufferUndersize; + var bytes_written: usize = 0; + switch (block_header.type) { + .raw => { + try in.streamExactPreserve(w, window_len, block_size); + bytes_written = block_size; + }, + .rle => { + const byte = try in.takeByte(); + try w.splatBytePreserve(window_len, byte, block_size); + bytes_written = block_size; + }, + .compressed => { + var literals_buffer: [zstd.block_size_max]u8 = undefined; + var sequence_buffer: [zstd.block_size_max]u8 = undefined; + var remaining: Limit = .limited(block_size); + const literals = try LiteralsSection.decode(in, &remaining, &literals_buffer); + const sequences_header = try SequencesSection.Header.decode(in, &remaining); + + const decode = &state.decode; + try decode.prepare(in, &remaining, literals, sequences_header); + + { + if (sequence_buffer.len < @intFromEnum(remaining)) + return error.SequenceBufferUndersize; + const seq_slice = remaining.slice(&sequence_buffer); + try in.readSliceAll(seq_slice); + var bit_stream = try ReverseBitReader.init(seq_slice); + + if (sequences_header.sequence_count > 0) { + try decode.readInitialFseState(&bit_stream); + + // Ensures the following calls to `decodeSequence` will not flush. + if (window_len + frame_block_size_max > w.buffer.len) return error.OutputBufferUndersize; + const dest = (try w.writableSliceGreedyPreserve(window_len, frame_block_size_max))[0..frame_block_size_max]; + const write_pos = dest.ptr - w.buffer.ptr; + for (0..sequences_header.sequence_count - 1) |_| { + bytes_written += try decode.decodeSequence(w.buffer, write_pos + bytes_written, &bit_stream); + try decode.updateState(.literal, &bit_stream); + try decode.updateState(.match, &bit_stream); + try decode.updateState(.offset, &bit_stream); + } + bytes_written += try decode.decodeSequence(w.buffer, write_pos + bytes_written, &bit_stream); + if (bytes_written > dest.len) return error.MalformedSequence; + w.advance(bytes_written); + } + + if (!bit_stream.isEmpty()) { + return error.MalformedCompressedBlock; + } + } + + if (decode.literal_written_count < literals.header.regenerated_size) { + const len = literals.header.regenerated_size - decode.literal_written_count; + try decode.decodeLiterals(w, len); + decode.literal_written_count += len; + bytes_written += len; + } + + switch (decode.literal_header.block_type) { + .treeless, .compressed => { + if (!decode.isLiteralStreamEmpty()) return error.MalformedCompressedBlock; + }, + .raw, .rle => {}, + } + + if (bytes_written > frame_block_size_max) return error.BlockOversize; + }, + .reserved => return error.ReservedBlock, + } + + if (state.frame.hasher_opt) |*hasher| { + if (bytes_written > 0) { + _ = hasher; + @panic("TODO all those bytes written needed to go through the hasher too"); + } + } + + state.decompressed_size += bytes_written; + + if (block_header.last) { + if (state.frame.has_checksum) { + const expected_checksum = try in.takeInt(u32, .little); + if (state.frame.hasher_opt) |*hasher| { + const actual_checksum: u32 = @truncate(hasher.final()); + if (expected_checksum != actual_checksum) return error.ChecksumFailure; + } + } + if (state.frame.content_size) |content_size| { + if (content_size != state.decompressed_size) { + return error.MalformedFrame; + } + } + d.state = .new_frame; + } else if (state.frame.content_size) |content_size| { + if (state.decompressed_size > content_size) return error.MalformedFrame; + } + + return bytes_written; +} + +pub const Frame = struct { + hasher_opt: ?std.hash.XxHash64, + window_size: usize, + has_checksum: bool, + block_size_max: usize, + content_size: ?usize, + + pub const Magic = enum(u32) { + zstandard = 0xFD2FB528, + _, + + pub fn kind(m: Magic) ?Kind { + return switch (@intFromEnum(m)) { + @intFromEnum(Magic.zstandard) => .zstandard, + @intFromEnum(Skippable.magic_min)...@intFromEnum(Skippable.magic_max) => .skippable, + else => null, + }; + } + + pub fn isSkippable(m: Magic) bool { + return switch (@intFromEnum(m)) { + @intFromEnum(Skippable.magic_min)...@intFromEnum(Skippable.magic_max) => true, + else => false, + }; + } + }; + + pub const Kind = enum { zstandard, skippable }; + + pub const Zstandard = struct { + pub const magic: Magic = .zstandard; + + header: Header, + data_blocks: []Block, + checksum: ?u32, + + pub const Header = struct { + descriptor: Descriptor, + window_descriptor: ?u8, + dictionary_id: ?u32, + content_size: ?u64, + + pub const Descriptor = packed struct { + dictionary_id_flag: u2, + content_checksum_flag: bool, + reserved: bool, + unused: bool, + single_segment_flag: bool, + content_size_flag: u2, + }; + + pub const DecodeError = Reader.Error || error{ReservedBitSet}; + + pub fn decode(in: *Reader) DecodeError!Header { + const descriptor: Descriptor = @bitCast(try in.takeByte()); + + if (descriptor.reserved) return error.ReservedBitSet; + + const window_descriptor: ?u8 = if (descriptor.single_segment_flag) null else try in.takeByte(); + + const dictionary_id: ?u32 = if (descriptor.dictionary_id_flag > 0) d: { + // if flag is 3 then field_size = 4, else field_size = flag + const field_size = (@as(u4, 1) << descriptor.dictionary_id_flag) >> 1; + break :d try in.takeVarInt(u32, .little, field_size); + } else null; + + const content_size: ?u64 = if (descriptor.single_segment_flag or descriptor.content_size_flag > 0) c: { + const field_size = @as(u4, 1) << descriptor.content_size_flag; + const content_size = try in.takeVarInt(u64, .little, field_size); + break :c if (field_size == 2) content_size + 256 else content_size; + } else null; + + return .{ + .descriptor = descriptor, + .window_descriptor = window_descriptor, + .dictionary_id = dictionary_id, + .content_size = content_size, + }; + } + + /// Returns the window size required to decompress a frame, or `null` if it + /// cannot be determined (which indicates a malformed frame header). + pub fn windowSize(header: Header) ?u64 { + if (header.window_descriptor) |descriptor| { + const exponent = (descriptor & 0b11111000) >> 3; + const mantissa = descriptor & 0b00000111; + const window_log = 10 + exponent; + const window_base = @as(u64, 1) << @as(u6, @intCast(window_log)); + const window_add = (window_base / 8) * mantissa; + return window_base + window_add; + } else return header.content_size; + } + }; + + pub const Block = struct { + pub const Header = packed struct(u24) { + last: bool, + type: Type, + size: u21, + }; + + pub const Type = enum(u2) { + raw, + rle, + compressed, + reserved, + }; + }; + + pub const Decode = struct { + repeat_offsets: [3]u32, + + offset: StateData(8), + match: StateData(9), + literal: StateData(9), + + literal_fse_buffer: [zstd.table_size_max.literal]Table.Fse, + match_fse_buffer: [zstd.table_size_max.match]Table.Fse, + offset_fse_buffer: [zstd.table_size_max.offset]Table.Fse, + + fse_tables_undefined: bool, + + literal_stream_reader: ReverseBitReader, + literal_stream_index: usize, + literal_streams: LiteralsSection.Streams, + literal_header: LiteralsSection.Header, + huffman_tree: ?LiteralsSection.HuffmanTree, + + literal_written_count: usize, + + fn StateData(comptime max_accuracy_log: comptime_int) type { + return struct { + state: @This().State, + table: Table, + accuracy_log: u8, + + const State = std.meta.Int(.unsigned, max_accuracy_log); + }; + } + + const init: Decode = .{ + .repeat_offsets = .{ + zstd.start_repeated_offset_1, + zstd.start_repeated_offset_2, + zstd.start_repeated_offset_3, + }, + + .offset = undefined, + .match = undefined, + .literal = undefined, + + .literal_fse_buffer = undefined, + .match_fse_buffer = undefined, + .offset_fse_buffer = undefined, + + .fse_tables_undefined = true, + + .literal_written_count = 0, + .literal_header = undefined, + .literal_streams = undefined, + .literal_stream_reader = undefined, + .literal_stream_index = undefined, + .huffman_tree = null, + }; + + pub const PrepareError = error{ + /// the (reversed) literal bitstream's first byte does not have any bits set + MissingStartBit, + /// `literals` is a treeless literals section and the decode state does not + /// have a Huffman tree from a previous block + TreelessLiteralsFirst, + /// on the first call if one of the sequence FSE tables is set to repeat mode + RepeatModeFirst, + /// an FSE table has an invalid accuracy + MalformedAccuracyLog, + /// failed decoding an FSE table + MalformedFseTable, + /// input stream ends before all FSE tables are read + EndOfStream, + ReadFailed, + InputBufferUndersize, + }; + + /// Prepare the decoder to decode a compressed block. Loads the + /// literals stream and Huffman tree from `literals` and reads the + /// FSE tables from `in`. + pub fn prepare( + self: *Decode, + in: *Reader, + remaining: *Limit, + literals: LiteralsSection, + sequences_header: SequencesSection.Header, + ) PrepareError!void { + self.literal_written_count = 0; + self.literal_header = literals.header; + self.literal_streams = literals.streams; + + if (literals.huffman_tree) |tree| { + self.huffman_tree = tree; + } else if (literals.header.block_type == .treeless and self.huffman_tree == null) { + return error.TreelessLiteralsFirst; + } + + switch (literals.header.block_type) { + .raw, .rle => {}, + .compressed, .treeless => { + self.literal_stream_index = 0; + switch (literals.streams) { + .one => |slice| try self.initLiteralStream(slice), + .four => |streams| try self.initLiteralStream(streams[0]), + } + }, + } + + if (sequences_header.sequence_count > 0) { + try self.updateFseTable(in, remaining, .literal, sequences_header.literal_lengths); + try self.updateFseTable(in, remaining, .offset, sequences_header.offsets); + try self.updateFseTable(in, remaining, .match, sequences_header.match_lengths); + self.fse_tables_undefined = false; + } + } + + /// Read initial FSE states for sequence decoding. + pub fn readInitialFseState(self: *Decode, bit_reader: *ReverseBitReader) error{EndOfStream}!void { + self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log); + self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log); + self.match.state = try bit_reader.readBitsNoEof(u9, self.match.accuracy_log); + } + + fn updateRepeatOffset(self: *Decode, offset: u32) void { + self.repeat_offsets[2] = self.repeat_offsets[1]; + self.repeat_offsets[1] = self.repeat_offsets[0]; + self.repeat_offsets[0] = offset; + } + + fn useRepeatOffset(self: *Decode, index: usize) u32 { + if (index == 1) + std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1]) + else if (index == 2) { + std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]); + std.mem.swap(u32, &self.repeat_offsets[1], &self.repeat_offsets[2]); + } + return self.repeat_offsets[0]; + } + + const WhichFse = enum { offset, match, literal }; + + /// TODO: don't use `@field` + fn updateState( + self: *Decode, + comptime choice: WhichFse, + bit_reader: *ReverseBitReader, + ) error{ MalformedFseBits, EndOfStream }!void { + switch (@field(self, @tagName(choice)).table) { + .rle => {}, + .fse => |table| { + const data = table[@field(self, @tagName(choice)).state]; + const T = @TypeOf(@field(self, @tagName(choice))).State; + const bits_summand = try bit_reader.readBitsNoEof(T, data.bits); + const next_state = std.math.cast( + @TypeOf(@field(self, @tagName(choice))).State, + data.baseline + bits_summand, + ) orelse return error.MalformedFseBits; + @field(self, @tagName(choice)).state = next_state; + }, + } + } + + const FseTableError = error{ + MalformedFseTable, + MalformedAccuracyLog, + RepeatModeFirst, + EndOfStream, + }; + + /// TODO: don't use `@field` + fn updateFseTable( + self: *Decode, + in: *Reader, + remaining: *Limit, + comptime choice: WhichFse, + mode: SequencesSection.Header.Mode, + ) !void { + const field_name = @tagName(choice); + switch (mode) { + .predefined => { + @field(self, field_name).accuracy_log = + @field(zstd.default_accuracy_log, field_name); + + @field(self, field_name).table = + @field(Table, "predefined_" ++ field_name); + }, + .rle => { + @field(self, field_name).accuracy_log = 0; + remaining.* = remaining.subtract(1) orelse return error.EndOfStream; + @field(self, field_name).table = .{ .rle = try in.takeByte() }; + }, + .fse => { + const max_table_size = 2048; + const peek_len: usize = remaining.minInt(max_table_size); + if (in.buffer.len < peek_len) return error.InputBufferUndersize; + const limited_buffer = try in.peek(peek_len); + var bit_reader: BitReader = .{ .bytes = limited_buffer }; + const table_size = try Table.decode( + &bit_reader, + @field(zstd.table_symbol_count_max, field_name), + @field(zstd.table_accuracy_log_max, field_name), + &@field(self, field_name ++ "_fse_buffer"), + ); + @field(self, field_name).table = .{ + .fse = (&@field(self, field_name ++ "_fse_buffer"))[0..table_size], + }; + @field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size); + in.toss(bit_reader.index); + remaining.* = remaining.subtract(bit_reader.index).?; + }, + .repeat => if (self.fse_tables_undefined) return error.RepeatModeFirst, + } + } + + const Sequence = struct { + literal_length: u32, + match_length: u32, + offset: u32, + }; + + fn nextSequence( + self: *Decode, + bit_reader: *ReverseBitReader, + ) error{ InvalidBitStream, EndOfStream }!Sequence { + const raw_code = self.getCode(.offset); + const offset_code = std.math.cast(u5, raw_code) orelse { + return error.InvalidBitStream; + }; + const offset_value = (@as(u32, 1) << offset_code) + try bit_reader.readBitsNoEof(u32, offset_code); + + const match_code = self.getCode(.match); + if (match_code >= zstd.match_length_code_table.len) + return error.InvalidBitStream; + const match = zstd.match_length_code_table[match_code]; + const match_length = match[0] + try bit_reader.readBitsNoEof(u32, match[1]); + + const literal_code = self.getCode(.literal); + if (literal_code >= zstd.literals_length_code_table.len) + return error.InvalidBitStream; + const literal = zstd.literals_length_code_table[literal_code]; + const literal_length = literal[0] + try bit_reader.readBitsNoEof(u32, literal[1]); + + const offset = if (offset_value > 3) offset: { + const offset = offset_value - 3; + self.updateRepeatOffset(offset); + break :offset offset; + } else offset: { + if (literal_length == 0) { + if (offset_value == 3) { + const offset = self.repeat_offsets[0] - 1; + self.updateRepeatOffset(offset); + break :offset offset; + } + break :offset self.useRepeatOffset(offset_value); + } + break :offset self.useRepeatOffset(offset_value - 1); + }; + + if (offset == 0) return error.InvalidBitStream; + + return .{ + .literal_length = literal_length, + .match_length = match_length, + .offset = offset, + }; + } + + /// Decode one sequence from `bit_reader` into `dest`. Updates FSE states + /// if `last_sequence` is `false`. Assumes `prepare` called for the block + /// before attempting to decode sequences. + fn decodeSequence( + decode: *Decode, + dest: []u8, + write_pos: usize, + bit_reader: *ReverseBitReader, + ) !usize { + const sequence = try decode.nextSequence(bit_reader); + const literal_length: usize = sequence.literal_length; + const match_length: usize = sequence.match_length; + const sequence_length = literal_length + match_length; + + const copy_start = std.math.sub(usize, write_pos + sequence.literal_length, sequence.offset) catch + return error.MalformedSequence; + + if (decode.literal_written_count + literal_length > decode.literal_header.regenerated_size) + return error.MalformedLiteralsLength; + var sub_bw: Writer = .fixed(dest[write_pos..]); + try decodeLiterals(decode, &sub_bw, literal_length); + decode.literal_written_count += literal_length; + // This is not a @memmove; it intentionally repeats patterns + // caused by iterating one byte at a time. + for ( + dest[write_pos + literal_length ..][0..match_length], + dest[copy_start..][0..match_length], + ) |*d, s| d.* = s; + return sequence_length; + } + + fn nextLiteralMultiStream(self: *Decode) error{MissingStartBit}!void { + self.literal_stream_index += 1; + try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]); + } + + fn initLiteralStream(self: *Decode, bytes: []const u8) error{MissingStartBit}!void { + self.literal_stream_reader = try ReverseBitReader.init(bytes); + } + + fn isLiteralStreamEmpty(self: *Decode) bool { + switch (self.literal_streams) { + .one => return self.literal_stream_reader.isEmpty(), + .four => return self.literal_stream_index == 3 and self.literal_stream_reader.isEmpty(), + } + } + + const LiteralBitsError = error{ + MissingStartBit, + UnexpectedEndOfLiteralStream, + }; + fn readLiteralsBits( + self: *Decode, + bit_count_to_read: u16, + ) LiteralBitsError!u16 { + return self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch bits: { + if (self.literal_streams == .four and self.literal_stream_index < 3) { + try self.nextLiteralMultiStream(); + break :bits self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch + return error.UnexpectedEndOfLiteralStream; + } else { + return error.UnexpectedEndOfLiteralStream; + } + }; + } + + /// Decode `len` bytes of literals into `w`. + fn decodeLiterals(d: *Decode, w: *Writer, len: usize) !void { + switch (d.literal_header.block_type) { + .raw => { + try w.writeAll(d.literal_streams.one[d.literal_written_count..][0..len]); + }, + .rle => { + try w.splatByteAll(d.literal_streams.one[0], len); + }, + .compressed, .treeless => { + if (len > w.buffer.len) return error.OutputBufferUndersize; + const buf = try w.writableSlice(len); + const huffman_tree = d.huffman_tree.?; + const max_bit_count = huffman_tree.max_bit_count; + const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount( + huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight, + max_bit_count, + ); + var bits_read: u4 = 0; + var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one; + var bit_count_to_read: u4 = starting_bit_count; + for (buf) |*out| { + var prefix: u16 = 0; + while (true) { + const new_bits = try d.readLiteralsBits(bit_count_to_read); + prefix <<= bit_count_to_read; + prefix |= new_bits; + bits_read += bit_count_to_read; + const result = try huffman_tree.query(huffman_tree_index, prefix); + + switch (result) { + .symbol => |sym| { + out.* = sym; + bit_count_to_read = starting_bit_count; + bits_read = 0; + huffman_tree_index = huffman_tree.symbol_count_minus_one; + break; + }, + .index => |index| { + huffman_tree_index = index; + const bit_count = LiteralsSection.HuffmanTree.weightToBitCount( + huffman_tree.nodes[index].weight, + max_bit_count, + ); + bit_count_to_read = bit_count - bits_read; + }, + } + } + } + }, + } + } + + /// TODO: don't use `@field` + fn getCode(self: *Decode, comptime choice: WhichFse) u32 { + return switch (@field(self, @tagName(choice)).table) { + .rle => |value| value, + .fse => |table| table[@field(self, @tagName(choice)).state].symbol, + }; + } + }; + }; + + pub const Skippable = struct { + pub const magic_min: Magic = @enumFromInt(0x184D2A50); + pub const magic_max: Magic = @enumFromInt(0x184D2A5F); + + pub const Header = struct { + magic_number: u32, + frame_size: u32, + }; + }; + + const InitError = error{ + /// Frame uses a dictionary. + DictionaryIdFlagUnsupported, + /// Frame does not have a valid window size. + WindowSizeUnknown, + /// Window size exceeds `window_size_max` or max `usize` value. + WindowOversize, + /// Frame header indicates a content size exceeding max `usize` value. + ContentOversize, + }; + + /// Validates `frame_header` and returns the associated `Frame`. + pub fn init( + frame_header: Frame.Zstandard.Header, + window_size_max: usize, + verify_checksum: bool, + ) InitError!Frame { + if (frame_header.descriptor.dictionary_id_flag != 0) + return error.DictionaryIdFlagUnsupported; + + const window_size_raw = frame_header.windowSize() orelse return error.WindowSizeUnknown; + const window_size = if (window_size_raw > window_size_max) + return error.WindowOversize + else + std.math.cast(usize, window_size_raw) orelse return error.WindowOversize; + + const should_compute_checksum = + frame_header.descriptor.content_checksum_flag and verify_checksum; + + const content_size = if (frame_header.content_size) |size| + std.math.cast(usize, size) orelse return error.ContentOversize + else + null; + + return .{ + .hasher_opt = if (should_compute_checksum) std.hash.XxHash64.init(0) else null, + .window_size = window_size, + .has_checksum = frame_header.descriptor.content_checksum_flag, + .block_size_max = @min(zstd.block_size_max, window_size), + .content_size = content_size, + }; + } +}; + +pub const LiteralsSection = struct { + header: Header, + huffman_tree: ?HuffmanTree, + streams: Streams, + + pub const Streams = union(enum) { + one: []const u8, + four: [4][]const u8, + + fn decode(size_format: u2, stream_data: []const u8) !Streams { + if (size_format == 0) { + return .{ .one = stream_data }; + } + + if (stream_data.len < 6) return error.MalformedLiteralsSection; + + const stream_1_length: usize = std.mem.readInt(u16, stream_data[0..2], .little); + const stream_2_length: usize = std.mem.readInt(u16, stream_data[2..4], .little); + const stream_3_length: usize = std.mem.readInt(u16, stream_data[4..6], .little); + + const stream_1_start = 6; + const stream_2_start = stream_1_start + stream_1_length; + const stream_3_start = stream_2_start + stream_2_length; + const stream_4_start = stream_3_start + stream_3_length; + + if (stream_data.len < stream_4_start) return error.MalformedLiteralsSection; + + return .{ .four = .{ + stream_data[stream_1_start .. stream_1_start + stream_1_length], + stream_data[stream_2_start .. stream_2_start + stream_2_length], + stream_data[stream_3_start .. stream_3_start + stream_3_length], + stream_data[stream_4_start..], + } }; + } + }; + + pub const Header = struct { + block_type: BlockType, + size_format: u2, + regenerated_size: u20, + compressed_size: ?u18, + + /// Decode a literals section header. + pub fn decode(in: *Reader, remaining: *Limit) !Header { + remaining.* = remaining.subtract(1) orelse return error.EndOfStream; + const byte0 = try in.takeByte(); + const block_type: BlockType = @enumFromInt(byte0 & 0b11); + const size_format: u2 = @intCast((byte0 & 0b1100) >> 2); + var regenerated_size: u20 = undefined; + var compressed_size: ?u18 = null; + switch (block_type) { + .raw, .rle => { + switch (size_format) { + 0, 2 => { + regenerated_size = byte0 >> 3; + }, + 1 => { + remaining.* = remaining.subtract(1) orelse return error.EndOfStream; + regenerated_size = (byte0 >> 4) + (@as(u20, try in.takeByte()) << 4); + }, + 3 => { + remaining.* = remaining.subtract(2) orelse return error.EndOfStream; + regenerated_size = (byte0 >> 4) + + (@as(u20, try in.takeByte()) << 4) + + (@as(u20, try in.takeByte()) << 12); + }, + } + }, + .compressed, .treeless => { + remaining.* = remaining.subtract(2) orelse return error.EndOfStream; + const byte1 = try in.takeByte(); + const byte2 = try in.takeByte(); + switch (size_format) { + 0, 1 => { + regenerated_size = (byte0 >> 4) + ((@as(u20, byte1) & 0b00111111) << 4); + compressed_size = ((byte1 & 0b11000000) >> 6) + (@as(u18, byte2) << 2); + }, + 2 => { + remaining.* = remaining.subtract(1) orelse return error.EndOfStream; + const byte3 = try in.takeByte(); + regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00000011) << 12); + compressed_size = ((byte2 & 0b11111100) >> 2) + (@as(u18, byte3) << 6); + }, + 3 => { + remaining.* = remaining.subtract(2) orelse return error.EndOfStream; + const byte3 = try in.takeByte(); + const byte4 = try in.takeByte(); + regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00111111) << 12); + compressed_size = ((byte2 & 0b11000000) >> 6) + (@as(u18, byte3) << 2) + (@as(u18, byte4) << 10); + }, + } + }, + } + return .{ + .block_type = block_type, + .size_format = size_format, + .regenerated_size = regenerated_size, + .compressed_size = compressed_size, + }; + } + }; + + pub const BlockType = enum(u2) { + raw, + rle, + compressed, + treeless, + }; + + pub const HuffmanTree = struct { + max_bit_count: u4, + symbol_count_minus_one: u8, + nodes: [256]PrefixedSymbol, + + pub const PrefixedSymbol = struct { + symbol: u8, + prefix: u16, + weight: u4, + }; + + pub const Result = union(enum) { + symbol: u8, + index: usize, + }; + + pub fn query(self: HuffmanTree, index: usize, prefix: u16) error{HuffmanTreeIncomplete}!Result { + var node = self.nodes[index]; + const weight = node.weight; + var i: usize = index; + while (node.weight == weight) { + if (node.prefix == prefix) return .{ .symbol = node.symbol }; + if (i == 0) return error.HuffmanTreeIncomplete; + i -= 1; + node = self.nodes[i]; + } + return .{ .index = i }; + } + + pub fn weightToBitCount(weight: u4, max_bit_count: u4) u4 { + return if (weight == 0) 0 else ((max_bit_count + 1) - weight); + } + + pub const DecodeError = Reader.Error || error{ + MalformedHuffmanTree, + MalformedFseTable, + MalformedAccuracyLog, + EndOfStream, + MissingStartBit, + }; + + pub fn decode(in: *Reader, remaining: *Limit) HuffmanTree.DecodeError!HuffmanTree { + remaining.* = remaining.subtract(1) orelse return error.EndOfStream; + const header = try in.takeByte(); + if (header < 128) { + return decodeFse(in, remaining, header); + } else { + return decodeDirect(in, remaining, header - 127); + } + } + + fn decodeDirect( + in: *Reader, + remaining: *Limit, + encoded_symbol_count: usize, + ) HuffmanTree.DecodeError!HuffmanTree { + var weights: [256]u4 = undefined; + const weights_byte_count = (encoded_symbol_count + 1) / 2; + remaining.* = remaining.subtract(weights_byte_count) orelse return error.EndOfStream; + for (0..weights_byte_count) |i| { + const byte = try in.takeByte(); + weights[2 * i] = @as(u4, @intCast(byte >> 4)); + weights[2 * i + 1] = @as(u4, @intCast(byte & 0xF)); + } + const symbol_count = encoded_symbol_count + 1; + return build(&weights, symbol_count); + } + + fn decodeFse( + in: *Reader, + remaining: *Limit, + compressed_size: usize, + ) HuffmanTree.DecodeError!HuffmanTree { + var weights: [256]u4 = undefined; + remaining.* = remaining.subtract(compressed_size) orelse return error.EndOfStream; + const compressed_buffer = try in.take(compressed_size); + var bit_reader: BitReader = .{ .bytes = compressed_buffer }; + var entries: [1 << 6]Table.Fse = undefined; + const table_size = try Table.decode(&bit_reader, 256, 6, &entries); + const accuracy_log = std.math.log2_int_ceil(usize, table_size); + const remaining_buffer = bit_reader.bytes[bit_reader.index..]; + const symbol_count = try assignWeights(remaining_buffer, accuracy_log, &entries, &weights); + return build(&weights, symbol_count); + } + + fn assignWeights( + huff_bits_buffer: []const u8, + accuracy_log: u16, + entries: *[1 << 6]Table.Fse, + weights: *[256]u4, + ) !usize { + var huff_bits = try ReverseBitReader.init(huff_bits_buffer); + + var i: usize = 0; + var even_state: u32 = try huff_bits.readBitsNoEof(u32, accuracy_log); + var odd_state: u32 = try huff_bits.readBitsNoEof(u32, accuracy_log); + + while (i < 254) { + const even_data = entries[even_state]; + var read_bits: u16 = 0; + const even_bits = huff_bits.readBits(u32, even_data.bits, &read_bits) catch unreachable; + weights[i] = std.math.cast(u4, even_data.symbol) orelse return error.MalformedHuffmanTree; + i += 1; + if (read_bits < even_data.bits) { + weights[i] = std.math.cast(u4, entries[odd_state].symbol) orelse return error.MalformedHuffmanTree; + i += 1; + break; + } + even_state = even_data.baseline + even_bits; + + read_bits = 0; + const odd_data = entries[odd_state]; + const odd_bits = huff_bits.readBits(u32, odd_data.bits, &read_bits) catch unreachable; + weights[i] = std.math.cast(u4, odd_data.symbol) orelse return error.MalformedHuffmanTree; + i += 1; + if (read_bits < odd_data.bits) { + if (i == 255) return error.MalformedHuffmanTree; + weights[i] = std.math.cast(u4, entries[even_state].symbol) orelse return error.MalformedHuffmanTree; + i += 1; + break; + } + odd_state = odd_data.baseline + odd_bits; + } else return error.MalformedHuffmanTree; + + if (!huff_bits.isEmpty()) { + return error.MalformedHuffmanTree; + } + + return i + 1; // stream contains all but the last symbol + } + + fn assignSymbols(weight_sorted_prefixed_symbols: []PrefixedSymbol, weights: [256]u4) usize { + for (0..weight_sorted_prefixed_symbols.len) |i| { + weight_sorted_prefixed_symbols[i] = .{ + .symbol = @as(u8, @intCast(i)), + .weight = undefined, + .prefix = undefined, + }; + } + + std.mem.sort( + PrefixedSymbol, + weight_sorted_prefixed_symbols, + weights, + lessThanByWeight, + ); + + var prefix: u16 = 0; + var prefixed_symbol_count: usize = 0; + var sorted_index: usize = 0; + const symbol_count = weight_sorted_prefixed_symbols.len; + while (sorted_index < symbol_count) { + var symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; + const weight = weights[symbol]; + if (weight == 0) { + sorted_index += 1; + continue; + } + + while (sorted_index < symbol_count) : ({ + sorted_index += 1; + prefixed_symbol_count += 1; + prefix += 1; + }) { + symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; + if (weights[symbol] != weight) { + prefix = ((prefix - 1) >> (weights[symbol] - weight)) + 1; + break; + } + weight_sorted_prefixed_symbols[prefixed_symbol_count].symbol = symbol; + weight_sorted_prefixed_symbols[prefixed_symbol_count].prefix = prefix; + weight_sorted_prefixed_symbols[prefixed_symbol_count].weight = weight; + } + } + return prefixed_symbol_count; + } + + fn build(weights: *[256]u4, symbol_count: usize) error{MalformedHuffmanTree}!HuffmanTree { + var weight_power_sum_big: u32 = 0; + for (weights[0 .. symbol_count - 1]) |value| { + weight_power_sum_big += (@as(u16, 1) << value) >> 1; + } + if (weight_power_sum_big >= 1 << 11) return error.MalformedHuffmanTree; + const weight_power_sum = @as(u16, @intCast(weight_power_sum_big)); + + // advance to next power of two (even if weight_power_sum is a power of 2) + // TODO: is it valid to have weight_power_sum == 0? + const max_number_of_bits = if (weight_power_sum == 0) 1 else std.math.log2_int(u16, weight_power_sum) + 1; + const next_power_of_two = @as(u16, 1) << max_number_of_bits; + weights[symbol_count - 1] = std.math.log2_int(u16, next_power_of_two - weight_power_sum) + 1; + + var weight_sorted_prefixed_symbols: [256]PrefixedSymbol = undefined; + const prefixed_symbol_count = assignSymbols(weight_sorted_prefixed_symbols[0..symbol_count], weights.*); + const tree: HuffmanTree = .{ + .max_bit_count = max_number_of_bits, + .symbol_count_minus_one = @as(u8, @intCast(prefixed_symbol_count - 1)), + .nodes = weight_sorted_prefixed_symbols, + }; + return tree; + } + + fn lessThanByWeight( + weights: [256]u4, + lhs: PrefixedSymbol, + rhs: PrefixedSymbol, + ) bool { + // NOTE: this function relies on the use of a stable sorting algorithm, + // otherwise a special case of if (weights[lhs] == weights[rhs]) return lhs < rhs; + // should be added + return weights[lhs.symbol] < weights[rhs.symbol]; + } + }; + + pub const StreamCount = enum { one, four }; + pub fn streamCount(size_format: u2, block_type: BlockType) StreamCount { + return switch (block_type) { + .raw, .rle => .one, + .compressed, .treeless => if (size_format == 0) .one else .four, + }; + } + + pub const DecodeError = error{ + /// Invalid header. + MalformedLiteralsHeader, + /// Decoding errors. + MalformedLiteralsSection, + /// Compressed literals have invalid accuracy. + MalformedAccuracyLog, + /// Compressed literals have invalid FSE table. + MalformedFseTable, + /// Failed decoding a Huffamn tree. + MalformedHuffmanTree, + /// Not enough bytes to complete the section. + EndOfStream, + ReadFailed, + MissingStartBit, + }; + + pub fn decode(in: *Reader, remaining: *Limit, buffer: []u8) DecodeError!LiteralsSection { + const header = try Header.decode(in, remaining); + switch (header.block_type) { + .raw => { + if (buffer.len < header.regenerated_size) return error.MalformedLiteralsSection; + remaining.* = remaining.subtract(header.regenerated_size) orelse return error.EndOfStream; + try in.readSliceAll(buffer[0..header.regenerated_size]); + return .{ + .header = header, + .huffman_tree = null, + .streams = .{ .one = buffer }, + }; + }, + .rle => { + remaining.* = remaining.subtract(1) orelse return error.EndOfStream; + buffer[0] = try in.takeByte(); + return .{ + .header = header, + .huffman_tree = null, + .streams = .{ .one = buffer[0..1] }, + }; + }, + .compressed, .treeless => { + const before_remaining = remaining.*; + const huffman_tree = if (header.block_type == .compressed) + try HuffmanTree.decode(in, remaining) + else + null; + const huffman_tree_size = @intFromEnum(before_remaining) - @intFromEnum(remaining.*); + const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch + return error.MalformedLiteralsSection; + if (total_streams_size > buffer.len) return error.MalformedLiteralsSection; + remaining.* = remaining.subtract(total_streams_size) orelse return error.EndOfStream; + try in.readSliceAll(buffer[0..total_streams_size]); + const stream_data = buffer[0..total_streams_size]; + const streams = try Streams.decode(header.size_format, stream_data); + return .{ + .header = header, + .huffman_tree = huffman_tree, + .streams = streams, + }; + }, + } + } +}; + +pub const SequencesSection = struct { + header: Header, + literals_length_table: Table, + offset_table: Table, + match_length_table: Table, + + pub const Header = struct { + sequence_count: u24, + match_lengths: Mode, + offsets: Mode, + literal_lengths: Mode, + + pub const Mode = enum(u2) { + predefined, + rle, + fse, + repeat, + }; + + pub const DecodeError = error{ + ReservedBitSet, + EndOfStream, + ReadFailed, + }; + + pub fn decode(in: *Reader, remaining: *Limit) DecodeError!Header { + var sequence_count: u24 = undefined; + + remaining.* = remaining.subtract(1) orelse return error.EndOfStream; + const byte0 = try in.takeByte(); + if (byte0 == 0) { + return .{ + .sequence_count = 0, + .offsets = undefined, + .match_lengths = undefined, + .literal_lengths = undefined, + }; + } else if (byte0 < 128) { + remaining.* = remaining.subtract(1) orelse return error.EndOfStream; + sequence_count = byte0; + } else if (byte0 < 255) { + remaining.* = remaining.subtract(2) orelse return error.EndOfStream; + sequence_count = (@as(u24, (byte0 - 128)) << 8) + try in.takeByte(); + } else { + remaining.* = remaining.subtract(3) orelse return error.EndOfStream; + sequence_count = (try in.takeByte()) + (@as(u24, try in.takeByte()) << 8) + 0x7F00; + } + + const compression_modes = try in.takeByte(); + + const matches_mode: Header.Mode = @enumFromInt((compression_modes & 0b00001100) >> 2); + const offsets_mode: Header.Mode = @enumFromInt((compression_modes & 0b00110000) >> 4); + const literal_mode: Header.Mode = @enumFromInt((compression_modes & 0b11000000) >> 6); + if (compression_modes & 0b11 != 0) return error.ReservedBitSet; + + return .{ + .sequence_count = sequence_count, + .offsets = offsets_mode, + .match_lengths = matches_mode, + .literal_lengths = literal_mode, + }; + } + }; +}; + +pub const Table = union(enum) { + fse: []const Fse, + rle: u8, + + pub const Fse = struct { + symbol: u8, + baseline: u16, + bits: u8, + }; + + pub fn decode( + bit_reader: *BitReader, + expected_symbol_count: usize, + max_accuracy_log: u4, + entries: []Table.Fse, + ) !usize { + const accuracy_log_biased = try bit_reader.readBitsNoEof(u4, 4); + if (accuracy_log_biased > max_accuracy_log -| 5) return error.MalformedAccuracyLog; + const accuracy_log = accuracy_log_biased + 5; + + var values: [256]u16 = undefined; + var value_count: usize = 0; + + const total_probability = @as(u16, 1) << accuracy_log; + var accumulated_probability: u16 = 0; + + while (accumulated_probability < total_probability) { + // WARNING: The RFC is poorly worded, and would suggest std.math.log2_int_ceil is correct here, + // but power of two (remaining probabilities + 1) need max bits set to 1 more. + const max_bits = std.math.log2_int(u16, total_probability - accumulated_probability + 1) + 1; + const small = try bit_reader.readBitsNoEof(u16, max_bits - 1); + + const cutoff = (@as(u16, 1) << max_bits) - 1 - (total_probability - accumulated_probability + 1); + + const value = if (small < cutoff) + small + else value: { + const value_read = small + (try bit_reader.readBitsNoEof(u16, 1) << (max_bits - 1)); + break :value if (value_read < @as(u16, 1) << (max_bits - 1)) + value_read + else + value_read - cutoff; + }; + + accumulated_probability += if (value != 0) value - 1 else 1; + + values[value_count] = value; + value_count += 1; + + if (value == 1) { + while (true) { + const repeat_flag = try bit_reader.readBitsNoEof(u2, 2); + if (repeat_flag + value_count > 256) return error.MalformedFseTable; + for (0..repeat_flag) |_| { + values[value_count] = 1; + value_count += 1; + } + if (repeat_flag < 3) break; + } + } + if (value_count == 256) break; + } + bit_reader.alignToByte(); + + if (value_count < 2) return error.MalformedFseTable; + if (accumulated_probability != total_probability) return error.MalformedFseTable; + if (value_count > expected_symbol_count) return error.MalformedFseTable; + + const table_size = total_probability; + + try build(values[0..value_count], entries[0..table_size]); + return table_size; + } + + pub fn build(values: []const u16, entries: []Table.Fse) !void { + const total_probability = @as(u16, @intCast(entries.len)); + const accuracy_log = std.math.log2_int(u16, total_probability); + assert(total_probability <= 1 << 9); + + var less_than_one_count: usize = 0; + for (values, 0..) |value, i| { + if (value == 0) { + entries[entries.len - 1 - less_than_one_count] = Table.Fse{ + .symbol = @as(u8, @intCast(i)), + .baseline = 0, + .bits = accuracy_log, + }; + less_than_one_count += 1; + } + } + + var position: usize = 0; + var temp_states: [1 << 9]u16 = undefined; + for (values, 0..) |value, symbol| { + if (value == 0 or value == 1) continue; + const probability = value - 1; + + const state_share_dividend = std.math.ceilPowerOfTwo(u16, probability) catch + return error.MalformedFseTable; + const share_size = @divExact(total_probability, state_share_dividend); + const double_state_count = state_share_dividend - probability; + const single_state_count = probability - double_state_count; + const share_size_log = std.math.log2_int(u16, share_size); + + for (0..probability) |i| { + temp_states[i] = @as(u16, @intCast(position)); + position += (entries.len >> 1) + (entries.len >> 3) + 3; + position &= entries.len - 1; + while (position >= entries.len - less_than_one_count) { + position += (entries.len >> 1) + (entries.len >> 3) + 3; + position &= entries.len - 1; + } + } + std.mem.sort(u16, temp_states[0..probability], {}, std.sort.asc(u16)); + for (0..probability) |i| { + entries[temp_states[i]] = if (i < double_state_count) Table.Fse{ + .symbol = @as(u8, @intCast(symbol)), + .bits = share_size_log + 1, + .baseline = single_state_count * share_size + @as(u16, @intCast(i)) * 2 * share_size, + } else Table.Fse{ + .symbol = @as(u8, @intCast(symbol)), + .bits = share_size_log, + .baseline = (@as(u16, @intCast(i)) - double_state_count) * share_size, + }; + } + } + } + + test build { + const literals_length_default_values = [36]u16{ + 5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 2, 2, 2, 2, 2, + 0, 0, 0, 0, + }; + + const match_lengths_default_values = [53]u16{ + 2, 5, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, + 0, 0, 0, 0, 0, + }; + + const offset_codes_default_values = [29]u16{ + 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, + }; + + var entries: [64]Table.Fse = undefined; + try build(&literals_length_default_values, &entries); + try std.testing.expectEqualSlices(Table.Fse, Table.predefined_literal.fse, &entries); + + try build(&match_lengths_default_values, &entries); + try std.testing.expectEqualSlices(Table.Fse, Table.predefined_match.fse, &entries); + + try build(&offset_codes_default_values, entries[0..32]); + try std.testing.expectEqualSlices(Table.Fse, Table.predefined_offset.fse, entries[0..32]); + } + + pub const predefined_literal: Table = .{ + .fse = &[64]Table.Fse{ + .{ .symbol = 0, .bits = 4, .baseline = 0 }, + .{ .symbol = 0, .bits = 4, .baseline = 16 }, + .{ .symbol = 1, .bits = 5, .baseline = 32 }, + .{ .symbol = 3, .bits = 5, .baseline = 0 }, + .{ .symbol = 4, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 5, .baseline = 0 }, + .{ .symbol = 9, .bits = 5, .baseline = 0 }, + .{ .symbol = 10, .bits = 5, .baseline = 0 }, + .{ .symbol = 12, .bits = 5, .baseline = 0 }, + .{ .symbol = 14, .bits = 6, .baseline = 0 }, + .{ .symbol = 16, .bits = 5, .baseline = 0 }, + .{ .symbol = 18, .bits = 5, .baseline = 0 }, + .{ .symbol = 19, .bits = 5, .baseline = 0 }, + .{ .symbol = 21, .bits = 5, .baseline = 0 }, + .{ .symbol = 22, .bits = 5, .baseline = 0 }, + .{ .symbol = 24, .bits = 5, .baseline = 0 }, + .{ .symbol = 25, .bits = 5, .baseline = 32 }, + .{ .symbol = 26, .bits = 5, .baseline = 0 }, + .{ .symbol = 27, .bits = 6, .baseline = 0 }, + .{ .symbol = 29, .bits = 6, .baseline = 0 }, + .{ .symbol = 31, .bits = 6, .baseline = 0 }, + .{ .symbol = 0, .bits = 4, .baseline = 32 }, + .{ .symbol = 1, .bits = 4, .baseline = 0 }, + .{ .symbol = 2, .bits = 5, .baseline = 0 }, + .{ .symbol = 4, .bits = 5, .baseline = 32 }, + .{ .symbol = 5, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 5, .baseline = 32 }, + .{ .symbol = 8, .bits = 5, .baseline = 0 }, + .{ .symbol = 10, .bits = 5, .baseline = 32 }, + .{ .symbol = 11, .bits = 5, .baseline = 0 }, + .{ .symbol = 13, .bits = 6, .baseline = 0 }, + .{ .symbol = 16, .bits = 5, .baseline = 32 }, + .{ .symbol = 17, .bits = 5, .baseline = 0 }, + .{ .symbol = 19, .bits = 5, .baseline = 32 }, + .{ .symbol = 20, .bits = 5, .baseline = 0 }, + .{ .symbol = 22, .bits = 5, .baseline = 32 }, + .{ .symbol = 23, .bits = 5, .baseline = 0 }, + .{ .symbol = 25, .bits = 4, .baseline = 0 }, + .{ .symbol = 25, .bits = 4, .baseline = 16 }, + .{ .symbol = 26, .bits = 5, .baseline = 32 }, + .{ .symbol = 28, .bits = 6, .baseline = 0 }, + .{ .symbol = 30, .bits = 6, .baseline = 0 }, + .{ .symbol = 0, .bits = 4, .baseline = 48 }, + .{ .symbol = 1, .bits = 4, .baseline = 16 }, + .{ .symbol = 2, .bits = 5, .baseline = 32 }, + .{ .symbol = 3, .bits = 5, .baseline = 32 }, + .{ .symbol = 5, .bits = 5, .baseline = 32 }, + .{ .symbol = 6, .bits = 5, .baseline = 32 }, + .{ .symbol = 8, .bits = 5, .baseline = 32 }, + .{ .symbol = 9, .bits = 5, .baseline = 32 }, + .{ .symbol = 11, .bits = 5, .baseline = 32 }, + .{ .symbol = 12, .bits = 5, .baseline = 32 }, + .{ .symbol = 15, .bits = 6, .baseline = 0 }, + .{ .symbol = 17, .bits = 5, .baseline = 32 }, + .{ .symbol = 18, .bits = 5, .baseline = 32 }, + .{ .symbol = 20, .bits = 5, .baseline = 32 }, + .{ .symbol = 21, .bits = 5, .baseline = 32 }, + .{ .symbol = 23, .bits = 5, .baseline = 32 }, + .{ .symbol = 24, .bits = 5, .baseline = 32 }, + .{ .symbol = 35, .bits = 6, .baseline = 0 }, + .{ .symbol = 34, .bits = 6, .baseline = 0 }, + .{ .symbol = 33, .bits = 6, .baseline = 0 }, + .{ .symbol = 32, .bits = 6, .baseline = 0 }, + }, + }; + + pub const predefined_match: Table = .{ + .fse = &[64]Table.Fse{ + .{ .symbol = 0, .bits = 6, .baseline = 0 }, + .{ .symbol = 1, .bits = 4, .baseline = 0 }, + .{ .symbol = 2, .bits = 5, .baseline = 32 }, + .{ .symbol = 3, .bits = 5, .baseline = 0 }, + .{ .symbol = 5, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 5, .baseline = 0 }, + .{ .symbol = 8, .bits = 5, .baseline = 0 }, + .{ .symbol = 10, .bits = 6, .baseline = 0 }, + .{ .symbol = 13, .bits = 6, .baseline = 0 }, + .{ .symbol = 16, .bits = 6, .baseline = 0 }, + .{ .symbol = 19, .bits = 6, .baseline = 0 }, + .{ .symbol = 22, .bits = 6, .baseline = 0 }, + .{ .symbol = 25, .bits = 6, .baseline = 0 }, + .{ .symbol = 28, .bits = 6, .baseline = 0 }, + .{ .symbol = 31, .bits = 6, .baseline = 0 }, + .{ .symbol = 33, .bits = 6, .baseline = 0 }, + .{ .symbol = 35, .bits = 6, .baseline = 0 }, + .{ .symbol = 37, .bits = 6, .baseline = 0 }, + .{ .symbol = 39, .bits = 6, .baseline = 0 }, + .{ .symbol = 41, .bits = 6, .baseline = 0 }, + .{ .symbol = 43, .bits = 6, .baseline = 0 }, + .{ .symbol = 45, .bits = 6, .baseline = 0 }, + .{ .symbol = 1, .bits = 4, .baseline = 16 }, + .{ .symbol = 2, .bits = 4, .baseline = 0 }, + .{ .symbol = 3, .bits = 5, .baseline = 32 }, + .{ .symbol = 4, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 5, .baseline = 32 }, + .{ .symbol = 7, .bits = 5, .baseline = 0 }, + .{ .symbol = 9, .bits = 6, .baseline = 0 }, + .{ .symbol = 12, .bits = 6, .baseline = 0 }, + .{ .symbol = 15, .bits = 6, .baseline = 0 }, + .{ .symbol = 18, .bits = 6, .baseline = 0 }, + .{ .symbol = 21, .bits = 6, .baseline = 0 }, + .{ .symbol = 24, .bits = 6, .baseline = 0 }, + .{ .symbol = 27, .bits = 6, .baseline = 0 }, + .{ .symbol = 30, .bits = 6, .baseline = 0 }, + .{ .symbol = 32, .bits = 6, .baseline = 0 }, + .{ .symbol = 34, .bits = 6, .baseline = 0 }, + .{ .symbol = 36, .bits = 6, .baseline = 0 }, + .{ .symbol = 38, .bits = 6, .baseline = 0 }, + .{ .symbol = 40, .bits = 6, .baseline = 0 }, + .{ .symbol = 42, .bits = 6, .baseline = 0 }, + .{ .symbol = 44, .bits = 6, .baseline = 0 }, + .{ .symbol = 1, .bits = 4, .baseline = 32 }, + .{ .symbol = 1, .bits = 4, .baseline = 48 }, + .{ .symbol = 2, .bits = 4, .baseline = 16 }, + .{ .symbol = 4, .bits = 5, .baseline = 32 }, + .{ .symbol = 5, .bits = 5, .baseline = 32 }, + .{ .symbol = 7, .bits = 5, .baseline = 32 }, + .{ .symbol = 8, .bits = 5, .baseline = 32 }, + .{ .symbol = 11, .bits = 6, .baseline = 0 }, + .{ .symbol = 14, .bits = 6, .baseline = 0 }, + .{ .symbol = 17, .bits = 6, .baseline = 0 }, + .{ .symbol = 20, .bits = 6, .baseline = 0 }, + .{ .symbol = 23, .bits = 6, .baseline = 0 }, + .{ .symbol = 26, .bits = 6, .baseline = 0 }, + .{ .symbol = 29, .bits = 6, .baseline = 0 }, + .{ .symbol = 52, .bits = 6, .baseline = 0 }, + .{ .symbol = 51, .bits = 6, .baseline = 0 }, + .{ .symbol = 50, .bits = 6, .baseline = 0 }, + .{ .symbol = 49, .bits = 6, .baseline = 0 }, + .{ .symbol = 48, .bits = 6, .baseline = 0 }, + .{ .symbol = 47, .bits = 6, .baseline = 0 }, + .{ .symbol = 46, .bits = 6, .baseline = 0 }, + }, + }; + + pub const predefined_offset: Table = .{ + .fse = &[32]Table.Fse{ + .{ .symbol = 0, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 4, .baseline = 0 }, + .{ .symbol = 9, .bits = 5, .baseline = 0 }, + .{ .symbol = 15, .bits = 5, .baseline = 0 }, + .{ .symbol = 21, .bits = 5, .baseline = 0 }, + .{ .symbol = 3, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 4, .baseline = 0 }, + .{ .symbol = 12, .bits = 5, .baseline = 0 }, + .{ .symbol = 18, .bits = 5, .baseline = 0 }, + .{ .symbol = 23, .bits = 5, .baseline = 0 }, + .{ .symbol = 5, .bits = 5, .baseline = 0 }, + .{ .symbol = 8, .bits = 4, .baseline = 0 }, + .{ .symbol = 14, .bits = 5, .baseline = 0 }, + .{ .symbol = 20, .bits = 5, .baseline = 0 }, + .{ .symbol = 2, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 4, .baseline = 16 }, + .{ .symbol = 11, .bits = 5, .baseline = 0 }, + .{ .symbol = 17, .bits = 5, .baseline = 0 }, + .{ .symbol = 22, .bits = 5, .baseline = 0 }, + .{ .symbol = 4, .bits = 5, .baseline = 0 }, + .{ .symbol = 8, .bits = 4, .baseline = 16 }, + .{ .symbol = 13, .bits = 5, .baseline = 0 }, + .{ .symbol = 19, .bits = 5, .baseline = 0 }, + .{ .symbol = 1, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 4, .baseline = 16 }, + .{ .symbol = 10, .bits = 5, .baseline = 0 }, + .{ .symbol = 16, .bits = 5, .baseline = 0 }, + .{ .symbol = 28, .bits = 5, .baseline = 0 }, + .{ .symbol = 27, .bits = 5, .baseline = 0 }, + .{ .symbol = 26, .bits = 5, .baseline = 0 }, + .{ .symbol = 25, .bits = 5, .baseline = 0 }, + .{ .symbol = 24, .bits = 5, .baseline = 0 }, + }, + }; +}; + +const low_bit_mask = [9]u8{ + 0b00000000, + 0b00000001, + 0b00000011, + 0b00000111, + 0b00001111, + 0b00011111, + 0b00111111, + 0b01111111, + 0b11111111, +}; + +fn Bits(comptime T: type) type { + return struct { T, u16 }; +} + +/// For reading the reversed bit streams used to encode FSE compressed data. +const ReverseBitReader = struct { + bytes: []const u8, + remaining: usize, + bits: u8, + count: u4, + + fn init(bytes: []const u8) error{MissingStartBit}!ReverseBitReader { + var result: ReverseBitReader = .{ + .bytes = bytes, + .remaining = bytes.len, + .bits = 0, + .count = 0, + }; + if (bytes.len == 0) return result; + for (0..8) |_| if (0 != (result.readBitsNoEof(u1, 1) catch unreachable)) return result; + return error.MissingStartBit; + } + + fn initBits(comptime T: type, out: anytype, num: u16) Bits(T) { + const UT = std.meta.Int(.unsigned, @bitSizeOf(T)); + return .{ + @bitCast(@as(UT, @intCast(out))), + num, + }; + } + + fn readBitsNoEof(self: *ReverseBitReader, comptime T: type, num: u16) error{EndOfStream}!T { + const b, const c = try self.readBitsTuple(T, num); + if (c < num) return error.EndOfStream; + return b; + } + + fn readBits(self: *ReverseBitReader, comptime T: type, num: u16, out_bits: *u16) !T { + const b, const c = try self.readBitsTuple(T, num); + out_bits.* = c; + return b; + } + + fn readBitsTuple(self: *ReverseBitReader, comptime T: type, num: u16) !Bits(T) { + const UT = std.meta.Int(.unsigned, @bitSizeOf(T)); + const U = if (@bitSizeOf(T) < 8) u8 else UT; + + if (num <= self.count) return initBits(T, self.removeBits(@intCast(num)), num); + + var out_count: u16 = self.count; + var out: U = self.removeBits(self.count); + + const full_bytes_left = (num - out_count) / 8; + + for (0..full_bytes_left) |_| { + const byte = takeByte(self) catch |err| switch (err) { + error.EndOfStream => return initBits(T, out, out_count), + }; + if (U == u8) out = 0 else out <<= 8; + out |= byte; + out_count += 8; + } + + const bits_left = num - out_count; + const keep = 8 - bits_left; + + if (bits_left == 0) return initBits(T, out, out_count); + + const final_byte = takeByte(self) catch |err| switch (err) { + error.EndOfStream => return initBits(T, out, out_count), + }; + + out <<= @intCast(bits_left); + out |= final_byte >> @intCast(keep); + self.bits = final_byte & low_bit_mask[keep]; + + self.count = @intCast(keep); + return initBits(T, out, num); + } + + fn takeByte(rbr: *ReverseBitReader) error{EndOfStream}!u8 { + if (rbr.remaining == 0) return error.EndOfStream; + rbr.remaining -= 1; + return rbr.bytes[rbr.remaining]; + } + + fn isEmpty(self: *const ReverseBitReader) bool { + return self.remaining == 0 and self.count == 0; + } + + fn removeBits(self: *ReverseBitReader, num: u4) u8 { + if (num == 8) { + self.count = 0; + return self.bits; + } + + const keep = self.count - num; + const bits = self.bits >> @intCast(keep); + self.bits &= low_bit_mask[keep]; + + self.count = keep; + return bits; + } +}; + +const BitReader = struct { + bytes: []const u8, + index: usize = 0, + bits: u8 = 0, + count: u4 = 0, + + fn initBits(comptime T: type, out: anytype, num: u16) Bits(T) { + const UT = std.meta.Int(.unsigned, @bitSizeOf(T)); + return .{ + @bitCast(@as(UT, @intCast(out))), + num, + }; + } + + fn readBitsNoEof(self: *@This(), comptime T: type, num: u16) !T { + const b, const c = try self.readBitsTuple(T, num); + if (c < num) return error.EndOfStream; + return b; + } + + fn readBits(self: *@This(), comptime T: type, num: u16, out_bits: *u16) !T { + const b, const c = try self.readBitsTuple(T, num); + out_bits.* = c; + return b; + } + + fn readBitsTuple(self: *@This(), comptime T: type, num: u16) !Bits(T) { + const UT = std.meta.Int(.unsigned, @bitSizeOf(T)); + const U = if (@bitSizeOf(T) < 8) u8 else UT; + + if (num <= self.count) return initBits(T, self.removeBits(@intCast(num)), num); + + var out_count: u16 = self.count; + var out: U = self.removeBits(self.count); + + const full_bytes_left = (num - out_count) / 8; + + for (0..full_bytes_left) |_| { + const byte = takeByte(self) catch |err| switch (err) { + error.EndOfStream => return initBits(T, out, out_count), + }; + + const pos = @as(U, byte) << @intCast(out_count); + out |= pos; + out_count += 8; + } + + const bits_left = num - out_count; + const keep = 8 - bits_left; + + if (bits_left == 0) return initBits(T, out, out_count); + + const final_byte = takeByte(self) catch |err| switch (err) { + error.EndOfStream => return initBits(T, out, out_count), + }; + + const pos = @as(U, final_byte & low_bit_mask[bits_left]) << @intCast(out_count); + out |= pos; + self.bits = final_byte >> @intCast(bits_left); + + self.count = @intCast(keep); + return initBits(T, out, num); + } + + fn takeByte(br: *BitReader) error{EndOfStream}!u8 { + if (br.bytes.len - br.index == 0) return error.EndOfStream; + const result = br.bytes[br.index]; + br.index += 1; + return result; + } + + fn removeBits(self: *@This(), num: u4) u8 { + if (num == 8) { + self.count = 0; + return self.bits; + } + + const keep = self.count - num; + const bits = self.bits & low_bit_mask[num]; + self.bits >>= @intCast(num); + self.count = keep; + return bits; + } + + fn alignToByte(self: *@This()) void { + self.bits = 0; + self.count = 0; + } +}; + +test { + _ = Table; +} diff --git a/lib/std/crypto/md5.zig b/lib/std/crypto/md5.zig index 92c8dac796..a580f826f3 100644 --- a/lib/std/crypto/md5.zig +++ b/lib/std/crypto/md5.zig @@ -54,12 +54,20 @@ pub const Md5 = struct { }; } - pub fn hash(b: []const u8, out: *[digest_length]u8, options: Options) void { + pub fn hash(data: []const u8, out: *[digest_length]u8, options: Options) void { var d = Md5.init(options); - d.update(b); + d.update(data); d.final(out); } + pub fn hashResult(data: []const u8) [digest_length]u8 { + var out: [digest_length]u8 = undefined; + var d = Md5.init(.{}); + d.update(data); + d.final(&out); + return out; + } + pub fn update(d: *Self, b: []const u8) void { var off: usize = 0; diff --git a/lib/std/elf.zig b/lib/std/elf.zig index 4e15cd3a09..2583e83d19 100644 --- a/lib/std/elf.zig +++ b/lib/std/elf.zig @@ -482,6 +482,7 @@ pub const Header = struct { is_64: bool, endian: std.builtin.Endian, os_abi: OSABI, + /// The meaning of this value depends on `os_abi`. abi_version: u8, type: ET, machine: EM, @@ -494,205 +495,135 @@ pub const Header = struct { shnum: u16, shstrndx: u16, - pub fn program_header_iterator(self: Header, parse_source: anytype) ProgramHeaderIterator(@TypeOf(parse_source)) { - return ProgramHeaderIterator(@TypeOf(parse_source)){ - .elf_header = self, - .parse_source = parse_source, + pub fn iterateProgramHeaders(h: Header, file_reader: *std.fs.File.Reader) ProgramHeaderIterator { + return .{ + .elf_header = h, + .file_reader = file_reader, }; } - pub fn section_header_iterator(self: Header, parse_source: anytype) SectionHeaderIterator(@TypeOf(parse_source)) { - return SectionHeaderIterator(@TypeOf(parse_source)){ - .elf_header = self, - .parse_source = parse_source, + pub fn iterateSectionHeaders(h: Header, file_reader: *std.fs.File.Reader) SectionHeaderIterator { + return .{ + .elf_header = h, + .file_reader = file_reader, }; } - pub fn read(parse_source: anytype) !Header { - var hdr_buf: [@sizeOf(Elf64_Ehdr)]u8 align(@alignOf(Elf64_Ehdr)) = undefined; - try parse_source.seekableStream().seekTo(0); - try parse_source.deprecatedReader().readNoEof(&hdr_buf); - return Header.parse(&hdr_buf); - } + pub const ReadError = std.Io.Reader.Error || error{ + InvalidElfMagic, + InvalidElfVersion, + InvalidElfClass, + InvalidElfEndian, + }; - pub fn parse(hdr_buf: *align(@alignOf(Elf64_Ehdr)) const [@sizeOf(Elf64_Ehdr)]u8) !Header { - const hdr32 = @as(*const Elf32_Ehdr, @ptrCast(hdr_buf)); - const hdr64 = @as(*const Elf64_Ehdr, @ptrCast(hdr_buf)); - if (!mem.eql(u8, hdr32.e_ident[0..4], MAGIC)) return error.InvalidElfMagic; - if (hdr32.e_ident[EI_VERSION] != 1) return error.InvalidElfVersion; + pub fn read(r: *std.Io.Reader) ReadError!Header { + const buf = try r.peek(@sizeOf(Elf64_Ehdr)); - const is_64 = switch (hdr32.e_ident[EI_CLASS]) { - ELFCLASS32 => false, - ELFCLASS64 => true, - else => return error.InvalidElfClass, - }; + if (!mem.eql(u8, buf[0..4], MAGIC)) return error.InvalidElfMagic; + if (buf[EI_VERSION] != 1) return error.InvalidElfVersion; - const endian: std.builtin.Endian = switch (hdr32.e_ident[EI_DATA]) { + const endian: std.builtin.Endian = switch (buf[EI_DATA]) { ELFDATA2LSB => .little, ELFDATA2MSB => .big, else => return error.InvalidElfEndian, }; - const need_bswap = endian != native_endian; + return switch (buf[EI_CLASS]) { + ELFCLASS32 => .init(try r.takeStruct(Elf32_Ehdr, endian), endian), + ELFCLASS64 => .init(try r.takeStruct(Elf64_Ehdr, endian), endian), + else => return error.InvalidElfClass, + }; + } + + pub fn init(hdr: anytype, endian: std.builtin.Endian) Header { // Converting integers to exhaustive enums using `@enumFromInt` could cause a panic. comptime assert(!@typeInfo(OSABI).@"enum".is_exhaustive); - const os_abi: OSABI = @enumFromInt(hdr32.e_ident[EI_OSABI]); + return .{ + .is_64 = switch (@TypeOf(hdr)) { + Elf32_Ehdr => false, + Elf64_Ehdr => true, + else => @compileError("bad type"), + }, + .endian = endian, + .os_abi = @enumFromInt(hdr.e_ident[EI_OSABI]), + .abi_version = hdr.e_ident[EI_ABIVERSION], + .type = hdr.e_type, + .machine = hdr.e_machine, + .entry = hdr.e_entry, + .phoff = hdr.e_phoff, + .shoff = hdr.e_shoff, + .phentsize = hdr.e_phentsize, + .phnum = hdr.e_phnum, + .shentsize = hdr.e_shentsize, + .shnum = hdr.e_shnum, + .shstrndx = hdr.e_shstrndx, + }; + } +}; - // The meaning of this value depends on `os_abi` so just make it available as `u8`. - const abi_version = hdr32.e_ident[EI_ABIVERSION]; +pub const ProgramHeaderIterator = struct { + elf_header: Header, + file_reader: *std.fs.File.Reader, + index: usize = 0, - const @"type" = if (need_bswap) blk: { - comptime assert(!@typeInfo(ET).@"enum".is_exhaustive); - const value = @intFromEnum(hdr32.e_type); - break :blk @as(ET, @enumFromInt(@byteSwap(value))); - } else hdr32.e_type; + pub fn next(it: *ProgramHeaderIterator) !?Elf64_Phdr { + if (it.index >= it.elf_header.phnum) return null; + defer it.index += 1; - const machine = if (need_bswap) blk: { - comptime assert(!@typeInfo(EM).@"enum".is_exhaustive); - const value = @intFromEnum(hdr32.e_machine); - break :blk @as(EM, @enumFromInt(@byteSwap(value))); - } else hdr32.e_machine; + if (it.elf_header.is_64) { + const offset = it.elf_header.phoff + @sizeOf(Elf64_Phdr) * it.index; + try it.file_reader.seekTo(offset); + const phdr = try it.file_reader.interface.takeStruct(Elf64_Phdr, it.elf_header.endian); + return phdr; + } - return @as(Header, .{ - .is_64 = is_64, - .endian = endian, - .os_abi = os_abi, - .abi_version = abi_version, - .type = @"type", - .machine = machine, - .entry = int(is_64, need_bswap, hdr32.e_entry, hdr64.e_entry), - .phoff = int(is_64, need_bswap, hdr32.e_phoff, hdr64.e_phoff), - .shoff = int(is_64, need_bswap, hdr32.e_shoff, hdr64.e_shoff), - .phentsize = int(is_64, need_bswap, hdr32.e_phentsize, hdr64.e_phentsize), - .phnum = int(is_64, need_bswap, hdr32.e_phnum, hdr64.e_phnum), - .shentsize = int(is_64, need_bswap, hdr32.e_shentsize, hdr64.e_shentsize), - .shnum = int(is_64, need_bswap, hdr32.e_shnum, hdr64.e_shnum), - .shstrndx = int(is_64, need_bswap, hdr32.e_shstrndx, hdr64.e_shstrndx), - }); + const offset = it.elf_header.phoff + @sizeOf(Elf32_Phdr) * it.index; + try it.file_reader.seekTo(offset); + const phdr = try it.file_reader.interface.takeStruct(Elf32_Phdr, it.elf_header.endian); + return .{ + .p_type = phdr.p_type, + .p_offset = phdr.p_offset, + .p_vaddr = phdr.p_vaddr, + .p_paddr = phdr.p_paddr, + .p_filesz = phdr.p_filesz, + .p_memsz = phdr.p_memsz, + .p_flags = phdr.p_flags, + .p_align = phdr.p_align, + }; } }; -pub fn ProgramHeaderIterator(comptime ParseSource: anytype) type { - return struct { - elf_header: Header, - parse_source: ParseSource, - index: usize = 0, - - pub fn next(self: *@This()) !?Elf64_Phdr { - if (self.index >= self.elf_header.phnum) return null; - defer self.index += 1; - - if (self.elf_header.is_64) { - var phdr: Elf64_Phdr = undefined; - const offset = self.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * self.index; - try self.parse_source.seekableStream().seekTo(offset); - try self.parse_source.deprecatedReader().readNoEof(mem.asBytes(&phdr)); - - // ELF endianness matches native endianness. - if (self.elf_header.endian == native_endian) return phdr; - - // Convert fields to native endianness. - mem.byteSwapAllFields(Elf64_Phdr, &phdr); - return phdr; - } - - var phdr: Elf32_Phdr = undefined; - const offset = self.elf_header.phoff + @sizeOf(@TypeOf(phdr)) * self.index; - try self.parse_source.seekableStream().seekTo(offset); - try self.parse_source.deprecatedReader().readNoEof(mem.asBytes(&phdr)); - - // ELF endianness does NOT match native endianness. - if (self.elf_header.endian != native_endian) { - // Convert fields to native endianness. - mem.byteSwapAllFields(Elf32_Phdr, &phdr); - } - - // Convert 32-bit header to 64-bit. - return Elf64_Phdr{ - .p_type = phdr.p_type, - .p_offset = phdr.p_offset, - .p_vaddr = phdr.p_vaddr, - .p_paddr = phdr.p_paddr, - .p_filesz = phdr.p_filesz, - .p_memsz = phdr.p_memsz, - .p_flags = phdr.p_flags, - .p_align = phdr.p_align, - }; - } - }; -} +pub const SectionHeaderIterator = struct { + elf_header: Header, + file_reader: *std.fs.File.Reader, + index: usize = 0, -pub fn SectionHeaderIterator(comptime ParseSource: anytype) type { - return struct { - elf_header: Header, - parse_source: ParseSource, - index: usize = 0, - - pub fn next(self: *@This()) !?Elf64_Shdr { - if (self.index >= self.elf_header.shnum) return null; - defer self.index += 1; - - if (self.elf_header.is_64) { - var shdr: Elf64_Shdr = undefined; - const offset = self.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * self.index; - try self.parse_source.seekableStream().seekTo(offset); - try self.parse_source.deprecatedReader().readNoEof(mem.asBytes(&shdr)); - - // ELF endianness matches native endianness. - if (self.elf_header.endian == native_endian) return shdr; - - // Convert fields to native endianness. - mem.byteSwapAllFields(Elf64_Shdr, &shdr); - return shdr; - } - - var shdr: Elf32_Shdr = undefined; - const offset = self.elf_header.shoff + @sizeOf(@TypeOf(shdr)) * self.index; - try self.parse_source.seekableStream().seekTo(offset); - try self.parse_source.deprecatedReader().readNoEof(mem.asBytes(&shdr)); - - // ELF endianness does NOT match native endianness. - if (self.elf_header.endian != native_endian) { - // Convert fields to native endianness. - mem.byteSwapAllFields(Elf32_Shdr, &shdr); - } - - // Convert 32-bit header to 64-bit. - return Elf64_Shdr{ - .sh_name = shdr.sh_name, - .sh_type = shdr.sh_type, - .sh_flags = shdr.sh_flags, - .sh_addr = shdr.sh_addr, - .sh_offset = shdr.sh_offset, - .sh_size = shdr.sh_size, - .sh_link = shdr.sh_link, - .sh_info = shdr.sh_info, - .sh_addralign = shdr.sh_addralign, - .sh_entsize = shdr.sh_entsize, - }; - } - }; -} + pub fn next(it: *SectionHeaderIterator) !?Elf64_Shdr { + if (it.index >= it.elf_header.shnum) return null; + defer it.index += 1; -fn int(is_64: bool, need_bswap: bool, int_32: anytype, int_64: anytype) @TypeOf(int_64) { - if (is_64) { - if (need_bswap) { - return @byteSwap(int_64); - } else { - return int_64; + if (it.elf_header.is_64) { + try it.file_reader.seekTo(it.elf_header.shoff + @sizeOf(Elf64_Shdr) * it.index); + const shdr = try it.file_reader.interface.takeStruct(Elf64_Shdr, it.elf_header.endian); + return shdr; } - } else { - return int32(need_bswap, int_32, @TypeOf(int_64)); - } -} -fn int32(need_bswap: bool, int_32: anytype, comptime Int64: anytype) Int64 { - if (need_bswap) { - return @byteSwap(int_32); - } else { - return int_32; + try it.file_reader.seekTo(it.elf_header.shoff + @sizeOf(Elf32_Shdr) * it.index); + const shdr = try it.file_reader.interface.takeStruct(Elf32_Shdr, it.elf_header.endian); + return .{ + .sh_name = shdr.sh_name, + .sh_type = shdr.sh_type, + .sh_flags = shdr.sh_flags, + .sh_addr = shdr.sh_addr, + .sh_offset = shdr.sh_offset, + .sh_size = shdr.sh_size, + .sh_link = shdr.sh_link, + .sh_info = shdr.sh_info, + .sh_addralign = shdr.sh_addralign, + .sh_entsize = shdr.sh_entsize, + }; } -} +}; pub const ELFCLASSNONE = 0; pub const ELFCLASS32 = 1; @@ -2070,7 +2001,7 @@ pub const R_AARCH64 = enum(u32) { TLSLE_LDST64_TPREL_LO12 = 558, /// Likewise; no check. TLSLE_LDST64_TPREL_LO12_NC = 559, - /// PC-rel. load immediate 20:2. + /// PC-rel. load immediate 20:2. TLSDESC_LD_PREL19 = 560, /// PC-rel. ADR immediate 20:0. TLSDESC_ADR_PREL21 = 561, diff --git a/lib/std/fs/AtomicFile.zig b/lib/std/fs/AtomicFile.zig index 17a17f8993..96793aec72 100644 --- a/lib/std/fs/AtomicFile.zig +++ b/lib/std/fs/AtomicFile.zig @@ -1,6 +1,13 @@ -file: File, -// TODO either replace this with rand_buf or use []u16 on Windows -tmp_path_buf: [tmp_path_len:0]u8, +const AtomicFile = @This(); +const std = @import("../std.zig"); +const File = std.fs.File; +const Dir = std.fs.Dir; +const fs = std.fs; +const assert = std.debug.assert; +const posix = std.posix; + +file_writer: File.Writer, +random_integer: u64, dest_basename: []const u8, file_open: bool, file_exists: bool, @@ -9,35 +16,24 @@ dir: Dir, pub const InitError = File.OpenError; -pub const random_bytes_len = 12; -const tmp_path_len = fs.base64_encoder.calcSize(random_bytes_len); - /// Note that the `Dir.atomicFile` API may be more handy than this lower-level function. pub fn init( dest_basename: []const u8, mode: File.Mode, dir: Dir, close_dir_on_deinit: bool, + write_buffer: []u8, ) InitError!AtomicFile { - var rand_buf: [random_bytes_len]u8 = undefined; - var tmp_path_buf: [tmp_path_len:0]u8 = undefined; - while (true) { - std.crypto.random.bytes(rand_buf[0..]); - const tmp_path = fs.base64_encoder.encode(&tmp_path_buf, &rand_buf); - tmp_path_buf[tmp_path.len] = 0; - - const file = dir.createFile( - tmp_path, - .{ .mode = mode, .exclusive = true }, - ) catch |err| switch (err) { + const random_integer = std.crypto.random.int(u64); + const tmp_sub_path = std.fmt.hex(random_integer); + const file = dir.createFile(&tmp_sub_path, .{ .mode = mode, .exclusive = true }) catch |err| switch (err) { error.PathAlreadyExists => continue, else => |e| return e, }; - - return AtomicFile{ - .file = file, - .tmp_path_buf = tmp_path_buf, + return .{ + .file_writer = file.writer(write_buffer), + .random_integer = random_integer, .dest_basename = dest_basename, .file_open = true, .file_exists = true, @@ -48,41 +44,51 @@ pub fn init( } /// Always call deinit, even after a successful finish(). -pub fn deinit(self: *AtomicFile) void { - if (self.file_open) { - self.file.close(); - self.file_open = false; +pub fn deinit(af: *AtomicFile) void { + if (af.file_open) { + af.file_writer.file.close(); + af.file_open = false; } - if (self.file_exists) { - self.dir.deleteFile(&self.tmp_path_buf) catch {}; - self.file_exists = false; + if (af.file_exists) { + const tmp_sub_path = std.fmt.hex(af.random_integer); + af.dir.deleteFile(&tmp_sub_path) catch {}; + af.file_exists = false; } - if (self.close_dir_on_deinit) { - self.dir.close(); + if (af.close_dir_on_deinit) { + af.dir.close(); } - self.* = undefined; + af.* = undefined; } -pub const FinishError = posix.RenameError; +pub const FlushError = File.WriteError; + +pub fn flush(af: *AtomicFile) FlushError!void { + af.file_writer.interface.flush() catch |err| switch (err) { + error.WriteFailed => return af.file_writer.err.?, + }; +} + +pub const RenameIntoPlaceError = posix.RenameError; /// On Windows, this function introduces a period of time where some file /// system operations on the destination file will result in /// `error.AccessDenied`, including rename operations (such as the one used in /// this function). -pub fn finish(self: *AtomicFile) FinishError!void { - assert(self.file_exists); - if (self.file_open) { - self.file.close(); - self.file_open = false; +pub fn renameIntoPlace(af: *AtomicFile) RenameIntoPlaceError!void { + assert(af.file_exists); + if (af.file_open) { + af.file_writer.file.close(); + af.file_open = false; } - try posix.renameat(self.dir.fd, self.tmp_path_buf[0..], self.dir.fd, self.dest_basename); - self.file_exists = false; + const tmp_sub_path = std.fmt.hex(af.random_integer); + try posix.renameat(af.dir.fd, &tmp_sub_path, af.dir.fd, af.dest_basename); + af.file_exists = false; } -const AtomicFile = @This(); -const std = @import("../std.zig"); -const File = std.fs.File; -const Dir = std.fs.Dir; -const fs = std.fs; -const assert = std.debug.assert; -const posix = std.posix; +pub const FinishError = FlushError || RenameIntoPlaceError; + +/// Combination of `flush` followed by `renameIntoPlace`. +pub fn finish(af: *AtomicFile) FinishError!void { + try af.flush(); + try af.renameIntoPlace(); +} diff --git a/lib/std/fs/Dir.zig b/lib/std/fs/Dir.zig index 27d97a00cb..16418d216f 100644 --- a/lib/std/fs/Dir.zig +++ b/lib/std/fs/Dir.zig @@ -1,3 +1,20 @@ +const Dir = @This(); +const builtin = @import("builtin"); +const std = @import("../std.zig"); +const File = std.fs.File; +const AtomicFile = std.fs.AtomicFile; +const base64_encoder = fs.base64_encoder; +const posix = std.posix; +const mem = std.mem; +const path = fs.path; +const fs = std.fs; +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const linux = std.os.linux; +const windows = std.os.windows; +const native_os = builtin.os.tag; +const have_flock = @TypeOf(posix.system.flock) != void; + fd: Handle, pub const Handle = posix.fd_t; @@ -1862,9 +1879,10 @@ pub fn symLinkW( /// Same as `symLink`, except tries to create the symbolic link until it /// succeeds or encounters an error other than `error.PathAlreadyExists`. -/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). -/// On WASI, both paths should be encoded as valid UTF-8. -/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. +/// +/// * On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// * On WASI, both paths should be encoded as valid UTF-8. +/// * On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn atomicSymLink( dir: Dir, target_path: []const u8, @@ -1880,9 +1898,8 @@ pub fn atomicSymLink( const dirname = path.dirname(sym_link_path) orelse "."; - var rand_buf: [AtomicFile.random_bytes_len]u8 = undefined; - - const temp_path_len = dirname.len + 1 + base64_encoder.calcSize(rand_buf.len); + const rand_len = @sizeOf(u64) * 2; + const temp_path_len = dirname.len + 1 + rand_len; var temp_path_buf: [fs.max_path_bytes]u8 = undefined; if (temp_path_len > temp_path_buf.len) return error.NameTooLong; @@ -1892,8 +1909,8 @@ pub fn atomicSymLink( const temp_path = temp_path_buf[0..temp_path_len]; while (true) { - crypto.random.bytes(rand_buf[0..]); - _ = base64_encoder.encode(temp_path[dirname.len + 1 ..], rand_buf[0..]); + const random_integer = std.crypto.random.int(u64); + temp_path[dirname.len + 1 ..][0..rand_len].* = std.fmt.hex(random_integer); if (dir.symLink(target_path, temp_path, flags)) { return dir.rename(temp_path, sym_link_path); @@ -2552,25 +2569,42 @@ pub fn updateFile( try dest_dir.makePath(dirname); } - var atomic_file = try dest_dir.atomicFile(dest_path, .{ .mode = actual_mode }); + var buffer: [1000]u8 = undefined; // Used only when direct fd-to-fd is not available. + var atomic_file = try dest_dir.atomicFile(dest_path, .{ + .mode = actual_mode, + .write_buffer = &buffer, + }); defer atomic_file.deinit(); - try atomic_file.file.writeFileAll(src_file, .{ .in_len = src_stat.size }); - try atomic_file.file.updateTimes(src_stat.atime, src_stat.mtime); + var src_reader: File.Reader = .initSize(src_file, &.{}, src_stat.size); + const dest_writer = &atomic_file.file_writer.interface; + + _ = dest_writer.sendFileAll(&src_reader, .unlimited) catch |err| switch (err) { + error.ReadFailed => return src_reader.err.?, + error.WriteFailed => return atomic_file.file_writer.err.?, + }; + try atomic_file.file_writer.file.updateTimes(src_stat.atime, src_stat.mtime); try atomic_file.finish(); - return PrevStatus.stale; + return .stale; } pub const CopyFileError = File.OpenError || File.StatError || - AtomicFile.InitError || CopyFileRawError || AtomicFile.FinishError; + AtomicFile.InitError || AtomicFile.FinishError || + File.ReadError || File.WriteError; -/// Guaranteed to be atomic. -/// On Linux, until https://patchwork.kernel.org/patch/9636735/ is merged and readily available, -/// there is a possibility of power loss or application termination leaving temporary files present -/// in the same directory as dest_path. -/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). -/// On WASI, both paths should be encoded as valid UTF-8. -/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. +/// Atomically creates a new file at `dest_path` within `dest_dir` with the +/// same contents as `source_path` within `source_dir`, overwriting any already +/// existing file. +/// +/// On Linux, until https://patchwork.kernel.org/patch/9636735/ is merged and +/// readily available, there is a possibility of power loss or application +/// termination leaving temporary files present in the same directory as +/// dest_path. +/// +/// On Windows, both paths should be encoded as +/// [WTF-8](https://simonsapin.github.io/wtf-8/). On WASI, both paths should be +/// encoded as valid UTF-8. On other platforms, both paths are an opaque +/// sequence of bytes with no particular encoding. pub fn copyFile( source_dir: Dir, source_path: []const u8, @@ -2578,79 +2612,34 @@ pub fn copyFile( dest_path: []const u8, options: CopyFileOptions, ) CopyFileError!void { - var in_file = try source_dir.openFile(source_path, .{}); - defer in_file.close(); + var file_reader: File.Reader = .init(try source_dir.openFile(source_path, .{}), &.{}); + defer file_reader.file.close(); - var size: ?u64 = null; const mode = options.override_mode orelse blk: { - const st = try in_file.stat(); - size = st.size; + const st = try file_reader.file.stat(); + file_reader.size = st.size; break :blk st.mode; }; - var atomic_file = try dest_dir.atomicFile(dest_path, .{ .mode = mode }); + var buffer: [1024]u8 = undefined; // Used only when direct fd-to-fd is not available. + var atomic_file = try dest_dir.atomicFile(dest_path, .{ + .mode = mode, + .write_buffer = &buffer, + }); defer atomic_file.deinit(); - try copy_file(in_file.handle, atomic_file.file.handle, size); - try atomic_file.finish(); -} - -const CopyFileRawError = error{SystemResources} || posix.CopyFileRangeError || posix.SendFileError; - -// Transfer all the data between two file descriptors in the most efficient way. -// The copy starts at offset 0, the initial offsets are preserved. -// No metadata is transferred over. -fn copy_file(fd_in: posix.fd_t, fd_out: posix.fd_t, maybe_size: ?u64) CopyFileRawError!void { - if (builtin.target.os.tag.isDarwin()) { - const rc = posix.system.fcopyfile(fd_in, fd_out, null, .{ .DATA = true }); - switch (posix.errno(rc)) { - .SUCCESS => return, - .INVAL => unreachable, - .NOMEM => return error.SystemResources, - // The source file is not a directory, symbolic link, or regular file. - // Try with the fallback path before giving up. - .OPNOTSUPP => {}, - else => |err| return posix.unexpectedErrno(err), - } - } - - if (native_os == .linux) { - // Try copy_file_range first as that works at the FS level and is the - // most efficient method (if available). - var offset: u64 = 0; - cfr_loop: while (true) { - // The kernel checks the u64 value `offset+count` for overflow, use - // a 32 bit value so that the syscall won't return EINVAL except for - // impossibly large files (> 2^64-1 - 2^32-1). - const amt = try posix.copy_file_range(fd_in, offset, fd_out, offset, std.math.maxInt(u32), 0); - // Terminate as soon as we have copied size bytes or no bytes - if (maybe_size) |s| { - if (s == amt) break :cfr_loop; - } - if (amt == 0) break :cfr_loop; - offset += amt; - } - return; - } + _ = atomic_file.file_writer.interface.sendFileAll(&file_reader, .unlimited) catch |err| switch (err) { + error.ReadFailed => return file_reader.err.?, + error.WriteFailed => return atomic_file.file_writer.err.?, + }; - // Sendfile is a zero-copy mechanism iff the OS supports it, otherwise the - // fallback code will copy the contents chunk by chunk. - const empty_iovec = [0]posix.iovec_const{}; - var offset: u64 = 0; - sendfile_loop: while (true) { - const amt = try posix.sendfile(fd_out, fd_in, offset, 0, &empty_iovec, &empty_iovec, 0); - // Terminate as soon as we have copied size bytes or no bytes - if (maybe_size) |s| { - if (s == amt) break :sendfile_loop; - } - if (amt == 0) break :sendfile_loop; - offset += amt; - } + try atomic_file.finish(); } pub const AtomicFileOptions = struct { mode: File.Mode = File.default_mode, make_path: bool = false, + write_buffer: []u8, }; /// Directly access the `.file` field, and then call `AtomicFile.finish` to @@ -2668,9 +2657,9 @@ pub fn atomicFile(self: Dir, dest_path: []const u8, options: AtomicFileOptions) else try self.openDir(dirname, .{}); - return AtomicFile.init(fs.path.basename(dest_path), options.mode, dir, true); + return .init(fs.path.basename(dest_path), options.mode, dir, true, options.write_buffer); } else { - return AtomicFile.init(dest_path, options.mode, self, false); + return .init(dest_path, options.mode, self, false, options.write_buffer); } } @@ -2768,30 +2757,3 @@ pub fn setPermissions(self: Dir, permissions: Permissions) SetPermissionsError!v const file: File = .{ .handle = self.fd }; try file.setPermissions(permissions); } - -const Metadata = File.Metadata; -pub const MetadataError = File.MetadataError; - -/// Returns a `Metadata` struct, representing the permissions on the directory -pub fn metadata(self: Dir) MetadataError!Metadata { - const file: File = .{ .handle = self.fd }; - return try file.metadata(); -} - -const Dir = @This(); -const builtin = @import("builtin"); -const std = @import("../std.zig"); -const File = std.fs.File; -const AtomicFile = std.fs.AtomicFile; -const base64_encoder = fs.base64_encoder; -const crypto = std.crypto; -const posix = std.posix; -const mem = std.mem; -const path = fs.path; -const fs = std.fs; -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const linux = std.os.linux; -const windows = std.os.windows; -const native_os = builtin.os.tag; -const have_flock = @TypeOf(posix.system.flock) != void; diff --git a/lib/std/fs/File.zig b/lib/std/fs/File.zig index 5b7e0aa570..fd965babfc 100644 --- a/lib/std/fs/File.zig +++ b/lib/std/fs/File.zig @@ -1089,113 +1089,6 @@ pub fn copyRangeAll(in: File, in_offset: u64, out: File, out_offset: u64, len: u return total_bytes_copied; } -/// Deprecated in favor of `Writer`. -pub const WriteFileOptions = struct { - in_offset: u64 = 0, - in_len: ?u64 = null, - headers_and_trailers: []posix.iovec_const = &[0]posix.iovec_const{}, - header_count: usize = 0, -}; - -/// Deprecated in favor of `Writer`. -pub const WriteFileError = ReadError || error{EndOfStream} || WriteError; - -/// Deprecated in favor of `Writer`. -pub fn writeFileAll(self: File, in_file: File, args: WriteFileOptions) WriteFileError!void { - return self.writeFileAllSendfile(in_file, args) catch |err| switch (err) { - error.Unseekable, - error.FastOpenAlreadyInProgress, - error.MessageTooBig, - error.FileDescriptorNotASocket, - error.NetworkUnreachable, - error.NetworkSubsystemFailed, - error.ConnectionRefused, - => return self.writeFileAllUnseekable(in_file, args), - else => |e| return e, - }; -} - -/// Deprecated in favor of `Writer`. -pub fn writeFileAllUnseekable(self: File, in_file: File, args: WriteFileOptions) WriteFileError!void { - const headers = args.headers_and_trailers[0..args.header_count]; - const trailers = args.headers_and_trailers[args.header_count..]; - try self.writevAll(headers); - try in_file.deprecatedReader().skipBytes(args.in_offset, .{ .buf_size = 4096 }); - var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); - if (args.in_len) |len| { - var stream = std.io.limitedReader(in_file.deprecatedReader(), len); - try fifo.pump(stream.reader(), self.deprecatedWriter()); - } else { - try fifo.pump(in_file.deprecatedReader(), self.deprecatedWriter()); - } - try self.writevAll(trailers); -} - -/// Deprecated in favor of `Writer`. -fn writeFileAllSendfile(self: File, in_file: File, args: WriteFileOptions) posix.SendFileError!void { - const count = blk: { - if (args.in_len) |l| { - if (l == 0) { - return self.writevAll(args.headers_and_trailers); - } else { - break :blk l; - } - } else { - break :blk 0; - } - }; - const headers = args.headers_and_trailers[0..args.header_count]; - const trailers = args.headers_and_trailers[args.header_count..]; - const zero_iovec = &[0]posix.iovec_const{}; - // When reading the whole file, we cannot put the trailers in the sendfile() syscall, - // because we have no way to determine whether a partial write is past the end of the file or not. - const trls = if (count == 0) zero_iovec else trailers; - const offset = args.in_offset; - const out_fd = self.handle; - const in_fd = in_file.handle; - const flags = 0; - var amt: usize = 0; - hdrs: { - var i: usize = 0; - while (i < headers.len) { - amt = try posix.sendfile(out_fd, in_fd, offset, count, headers[i..], trls, flags); - while (amt >= headers[i].len) { - amt -= headers[i].len; - i += 1; - if (i >= headers.len) break :hdrs; - } - headers[i].base += amt; - headers[i].len -= amt; - } - } - if (count == 0) { - var off: u64 = amt; - while (true) { - amt = try posix.sendfile(out_fd, in_fd, offset + off, 0, zero_iovec, zero_iovec, flags); - if (amt == 0) break; - off += amt; - } - } else { - var off: u64 = amt; - while (off < count) { - amt = try posix.sendfile(out_fd, in_fd, offset + off, count - off, zero_iovec, trailers, flags); - off += amt; - } - amt = @as(usize, @intCast(off - count)); - } - var i: usize = 0; - while (i < trailers.len) { - while (amt >= trailers[i].len) { - amt -= trailers[i].len; - i += 1; - if (i >= trailers.len) return; - } - trailers[i].base += amt; - trailers[i].len -= amt; - amt = try posix.writev(self.handle, trailers[i..]); - } -} - /// Deprecated in favor of `Reader`. pub const DeprecatedReader = io.GenericReader(File, ReadError, read); @@ -1242,7 +1135,7 @@ pub const Reader = struct { err: ?ReadError = null, mode: Reader.Mode = .positional, /// Tracks the true seek position in the file. To obtain the logical - /// position, subtract the buffer size from this value. + /// position, use `logicalPos`. pos: u64 = 0, size: ?u64 = null, size_err: ?GetEndPosError = null, @@ -1335,14 +1228,12 @@ pub const Reader = struct { pub fn seekBy(r: *Reader, offset: i64) Reader.SeekError!void { switch (r.mode) { .positional, .positional_reading => { - // TODO: make += operator allow any integer types - r.pos = @intCast(@as(i64, @intCast(r.pos)) + offset); + setPosAdjustingBuffer(r, @intCast(@as(i64, @intCast(r.pos)) + offset)); }, .streaming, .streaming_reading => { const seek_err = r.seek_err orelse e: { if (posix.lseek_CUR(r.file.handle, offset)) |_| { - // TODO: make += operator allow any integer types - r.pos = @intCast(@as(i64, @intCast(r.pos)) + offset); + setPosAdjustingBuffer(r, @intCast(@as(i64, @intCast(r.pos)) + offset)); return; } else |err| { r.seek_err = err; @@ -1358,6 +1249,8 @@ pub const Reader = struct { r.pos += n; remaining -= n; } + r.interface.seek = 0; + r.interface.end = 0; }, .failure => return r.seek_err.?, } @@ -1366,7 +1259,7 @@ pub const Reader = struct { pub fn seekTo(r: *Reader, offset: u64) Reader.SeekError!void { switch (r.mode) { .positional, .positional_reading => { - r.pos = offset; + setPosAdjustingBuffer(r, offset); }, .streaming, .streaming_reading => { if (offset >= r.pos) return Reader.seekBy(r, @intCast(offset - r.pos)); @@ -1375,12 +1268,28 @@ pub const Reader = struct { r.seek_err = err; return err; }; - r.pos = offset; + setPosAdjustingBuffer(r, offset); }, .failure => return r.seek_err.?, } } + pub fn logicalPos(r: *const Reader) u64 { + return r.pos - r.interface.bufferedLen(); + } + + fn setPosAdjustingBuffer(r: *Reader, offset: u64) void { + const logical_pos = logicalPos(r); + if (offset < logical_pos or offset >= r.pos) { + r.interface.seek = 0; + r.interface.end = 0; + r.pos = offset; + } else { + const logical_delta: usize = @intCast(offset - logical_pos); + r.interface.seek += logical_delta; + } + } + /// Number of slices to store on the stack, when trying to send as many byte /// vectors through the underlying read calls as possible. const max_buffers_len = 16; @@ -1526,7 +1435,7 @@ pub const Reader = struct { } return 0; }; - const n = @min(size - pos, std.math.maxInt(i64), @intFromEnum(limit)); + const n = @min(size - pos, maxInt(i64), @intFromEnum(limit)); file.seekBy(n) catch |err| { r.seek_err = err; return 0; @@ -1645,7 +1554,10 @@ pub const Writer = struct { return .{ .vtable = &.{ .drain = drain, - .sendFile = sendFile, + .sendFile = switch (builtin.zig_backend) { + else => sendFile, + .stage2_aarch64 => std.io.Writer.unimplementedSendFile, + }, }, .buffer = buffer, }; @@ -1715,7 +1627,6 @@ pub const Writer = struct { const pattern = data[data.len - 1]; if (pattern.len == 0 or splat == 0) return 0; const n = windows.WriteFile(handle, pattern, null) catch |err| { - std.debug.print("windows write file failed3: {t}\n", .{err}); w.err = err; return error.WriteFailed; }; @@ -1817,18 +1728,141 @@ pub const Writer = struct { file_reader: *Reader, limit: std.io.Limit, ) std.io.Writer.FileError!usize { + const reader_buffered = file_reader.interface.buffered(); + if (reader_buffered.len >= @intFromEnum(limit)) + return sendFileBuffered(io_w, file_reader, reader_buffered); + const writer_buffered = io_w.buffered(); + const file_limit = @intFromEnum(limit) - reader_buffered.len; const w: *Writer = @alignCast(@fieldParentPtr("interface", io_w)); const out_fd = w.file.handle; const in_fd = file_reader.file.handle; - // TODO try using copy_file_range on FreeBSD - // TODO try using sendfile on macOS - // TODO try using sendfile on FreeBSD + + if (file_reader.size) |size| { + if (size - file_reader.pos == 0) { + if (reader_buffered.len != 0) { + return sendFileBuffered(io_w, file_reader, reader_buffered); + } else { + return error.EndOfStream; + } + } + } + + if (native_os == .freebsd and w.mode == .streaming) sf: { + // Try using sendfile on FreeBSD. + if (w.sendfile_err != null) break :sf; + const offset = std.math.cast(std.c.off_t, file_reader.pos) orelse break :sf; + var hdtr_data: std.c.sf_hdtr = undefined; + var headers: [2]posix.iovec_const = undefined; + var headers_i: u8 = 0; + if (writer_buffered.len != 0) { + headers[headers_i] = .{ .base = writer_buffered.ptr, .len = writer_buffered.len }; + headers_i += 1; + } + if (reader_buffered.len != 0) { + headers[headers_i] = .{ .base = reader_buffered.ptr, .len = reader_buffered.len }; + headers_i += 1; + } + const hdtr: ?*std.c.sf_hdtr = if (headers_i == 0) null else b: { + hdtr_data = .{ + .headers = &headers, + .hdr_cnt = headers_i, + .trailers = null, + .trl_cnt = 0, + }; + break :b &hdtr_data; + }; + var sbytes: std.c.off_t = undefined; + const nbytes: usize = @min(file_limit, maxInt(usize)); + const flags = 0; + switch (posix.errno(std.c.sendfile(in_fd, out_fd, offset, nbytes, hdtr, &sbytes, flags))) { + .SUCCESS, .INTR => {}, + .INVAL, .OPNOTSUPP, .NOTSOCK, .NOSYS => w.sendfile_err = error.UnsupportedOperation, + .BADF => if (builtin.mode == .Debug) @panic("race condition") else { + w.sendfile_err = error.Unexpected; + }, + .FAULT => if (builtin.mode == .Debug) @panic("segmentation fault") else { + w.sendfile_err = error.Unexpected; + }, + .NOTCONN => w.sendfile_err = error.BrokenPipe, + .AGAIN, .BUSY => if (sbytes == 0) { + w.sendfile_err = error.WouldBlock; + }, + .IO => w.sendfile_err = error.InputOutput, + .PIPE => w.sendfile_err = error.BrokenPipe, + .NOBUFS => w.sendfile_err = error.SystemResources, + else => |err| w.sendfile_err = posix.unexpectedErrno(err), + } + if (sbytes == 0) { + file_reader.size = file_reader.pos; + return error.EndOfStream; + } + const consumed = io_w.consume(@intCast(sbytes)); + file_reader.seekTo(file_reader.pos + consumed) catch return error.ReadFailed; + return consumed; + } + + if (native_os.isDarwin() and w.mode == .streaming) sf: { + // Try using sendfile on macOS. + if (w.sendfile_err != null) break :sf; + const offset = std.math.cast(std.c.off_t, file_reader.pos) orelse break :sf; + var hdtr_data: std.c.sf_hdtr = undefined; + var headers: [2]posix.iovec_const = undefined; + var headers_i: u8 = 0; + if (writer_buffered.len != 0) { + headers[headers_i] = .{ .base = writer_buffered.ptr, .len = writer_buffered.len }; + headers_i += 1; + } + if (reader_buffered.len != 0) { + headers[headers_i] = .{ .base = reader_buffered.ptr, .len = reader_buffered.len }; + headers_i += 1; + } + const hdtr: ?*std.c.sf_hdtr = if (headers_i == 0) null else b: { + hdtr_data = .{ + .headers = &headers, + .hdr_cnt = headers_i, + .trailers = null, + .trl_cnt = 0, + }; + break :b &hdtr_data; + }; + const max_count = maxInt(i32); // Avoid EINVAL. + var len: std.c.off_t = @min(file_limit, max_count); + const flags = 0; + switch (posix.errno(std.c.sendfile(in_fd, out_fd, offset, &len, hdtr, flags))) { + .SUCCESS, .INTR => {}, + .OPNOTSUPP, .NOTSOCK, .NOSYS => w.sendfile_err = error.UnsupportedOperation, + .BADF => if (builtin.mode == .Debug) @panic("race condition") else { + w.sendfile_err = error.Unexpected; + }, + .FAULT => if (builtin.mode == .Debug) @panic("segmentation fault") else { + w.sendfile_err = error.Unexpected; + }, + .INVAL => if (builtin.mode == .Debug) @panic("invalid API usage") else { + w.sendfile_err = error.Unexpected; + }, + .NOTCONN => w.sendfile_err = error.BrokenPipe, + .AGAIN => if (len == 0) { + w.sendfile_err = error.WouldBlock; + }, + .IO => w.sendfile_err = error.InputOutput, + .PIPE => w.sendfile_err = error.BrokenPipe, + else => |err| w.sendfile_err = posix.unexpectedErrno(err), + } + if (len == 0) { + file_reader.size = file_reader.pos; + return error.EndOfStream; + } + const consumed = io_w.consume(@bitCast(len)); + file_reader.seekTo(file_reader.pos + consumed) catch return error.ReadFailed; + return consumed; + } + if (native_os == .linux and w.mode == .streaming) sf: { // Try using sendfile on Linux. if (w.sendfile_err != null) break :sf; // Linux sendfile does not support headers. - const buffered = limit.slice(file_reader.interface.buffer); - if (io_w.end != 0 or buffered.len != 0) return drain(io_w, &.{buffered}, 1); + if (writer_buffered.len != 0 or reader_buffered.len != 0) + return sendFileBuffered(io_w, file_reader, reader_buffered); const max_count = 0x7ffff000; // Avoid EINVAL. var off: std.os.linux.off_t = undefined; const off_ptr: ?*std.os.linux.off_t, const count: usize = switch (file_reader.mode) { @@ -1875,6 +1909,7 @@ pub const Writer = struct { w.pos += n; return n; } + const copy_file_range = switch (native_os) { .freebsd => std.os.freebsd.copy_file_range, .linux => if (std.c.versionCheck(.{ .major = 2, .minor = 27, .patch = 0 })) std.os.linux.wrapped.copy_file_range else {}, @@ -1882,8 +1917,8 @@ pub const Writer = struct { }; if (@TypeOf(copy_file_range) != void) cfr: { if (w.copy_file_range_err != null) break :cfr; - const buffered = limit.slice(file_reader.interface.buffer); - if (io_w.end != 0 or buffered.len != 0) return drain(io_w, &.{buffered}, 1); + if (writer_buffered.len != 0 or reader_buffered.len != 0) + return sendFileBuffered(io_w, file_reader, reader_buffered); var off_in: i64 = undefined; var off_out: i64 = undefined; const off_in_ptr: ?*i64 = switch (file_reader.mode) { @@ -1922,6 +1957,9 @@ pub const Writer = struct { if (file_reader.pos != 0) break :fcf; if (w.pos != 0) break :fcf; if (limit != .unlimited) break :fcf; + const size = file_reader.getSize() catch break :fcf; + if (writer_buffered.len != 0 or reader_buffered.len != 0) + return sendFileBuffered(io_w, file_reader, reader_buffered); const rc = std.c.fcopyfile(in_fd, out_fd, null, .{ .DATA = true }); switch (posix.errno(rc)) { .SUCCESS => {}, @@ -1942,15 +1980,24 @@ pub const Writer = struct { return 0; }, } - const n = if (file_reader.size) |size| size else @panic("TODO figure out how much copied"); - file_reader.pos = n; - w.pos = n; - return n; + file_reader.pos = size; + w.pos = size; + return size; } return error.Unimplemented; } + fn sendFileBuffered( + io_w: *std.io.Writer, + file_reader: *Reader, + reader_buffered: []const u8, + ) std.io.Writer.FileError!usize { + const n = try drain(io_w, &.{reader_buffered}, 1); + file_reader.seekTo(file_reader.pos + n) catch return error.ReadFailed; + return n; + } + pub fn seekTo(w: *Writer, offset: u64) SeekError!void { switch (w.mode) { .positional, .positional_reading => { @@ -1979,7 +2026,19 @@ pub const Writer = struct { /// along with other write failures. pub fn end(w: *Writer) EndError!void { try w.interface.flush(); - return w.file.setEndPos(w.pos); + switch (w.mode) { + .positional, + .positional_reading, + => w.file.setEndPos(w.pos) catch |err| switch (err) { + error.NonResizable => return, + else => |e| return e, + }, + + .streaming, + .streaming_reading, + .failure, + => {}, + } } }; diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 50cbccf270..4b63873af5 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -1499,32 +1499,18 @@ test "sendfile" { const header2 = "second header\n"; const trailer1 = "trailer1\n"; const trailer2 = "second trailer\n"; - var hdtr = [_]posix.iovec_const{ - .{ - .base = header1, - .len = header1.len, - }, - .{ - .base = header2, - .len = header2.len, - }, - .{ - .base = trailer1, - .len = trailer1.len, - }, - .{ - .base = trailer2, - .len = trailer2.len, - }, - }; + var headers: [2][]const u8 = .{ header1, header2 }; + var trailers: [2][]const u8 = .{ trailer1, trailer2 }; var written_buf: [100]u8 = undefined; - try dest_file.writeFileAll(src_file, .{ - .in_offset = 1, - .in_len = 10, - .headers_and_trailers = &hdtr, - .header_count = 2, - }); + var file_reader = src_file.reader(&.{}); + var fallback_buffer: [50]u8 = undefined; + var file_writer = dest_file.writer(&fallback_buffer); + try file_writer.interface.writeVecAll(&headers); + try file_reader.seekTo(1); + try testing.expectEqual(10, try file_writer.interface.sendFileAll(&file_reader, .limited(10))); + try file_writer.interface.writeVecAll(&trailers); + try file_writer.interface.flush(); const amt = try dest_file.preadAll(&written_buf, 0); try testing.expectEqualStrings("header1\nsecond header\nine1\nsecontrailer1\nsecond trailer\n", written_buf[0..amt]); } @@ -1595,9 +1581,10 @@ test "AtomicFile" { ; { - var af = try ctx.dir.atomicFile(test_out_file, .{}); + var buffer: [100]u8 = undefined; + var af = try ctx.dir.atomicFile(test_out_file, .{ .write_buffer = &buffer }); defer af.deinit(); - try af.file.writeAll(test_content); + try af.file_writer.interface.writeAll(test_content); try af.finish(); } const content = try ctx.dir.readFileAlloc(allocator, test_out_file, 9999); @@ -2073,7 +2060,7 @@ test "invalid UTF-8/WTF-8 paths" { } test "read file non vectored" { - var tmp_dir = std.testing.tmpDir(.{}); + var tmp_dir = testing.tmpDir(.{}); defer tmp_dir.cleanup(); const contents = "hello, world!\n"; @@ -2098,6 +2085,47 @@ test "read file non vectored" { else => |e| return e, }; } - try std.testing.expectEqualStrings(contents, w.buffered()); - try std.testing.expectEqual(contents.len, i); + try testing.expectEqualStrings(contents, w.buffered()); + try testing.expectEqual(contents.len, i); +} + +test "seek keeping partial buffer" { + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + const contents = "0123456789"; + + const file = try tmp_dir.dir.createFile("input.txt", .{ .read = true }); + defer file.close(); + { + var file_writer: std.fs.File.Writer = .init(file, &.{}); + try file_writer.interface.writeAll(contents); + try file_writer.interface.flush(); + } + + var read_buffer: [3]u8 = undefined; + var file_reader: std.fs.File.Reader = .init(file, &read_buffer); + + try testing.expectEqual(0, file_reader.logicalPos()); + + var buf: [4]u8 = undefined; + try file_reader.interface.readSliceAll(&buf); + + if (file_reader.interface.bufferedLen() != 3) { + // Pass the test if the OS doesn't give us vectored reads. + return; + } + + try testing.expectEqual(4, file_reader.logicalPos()); + try testing.expectEqual(7, file_reader.pos); + try file_reader.seekTo(6); + try testing.expectEqual(6, file_reader.logicalPos()); + try testing.expectEqual(7, file_reader.pos); + + try testing.expectEqualStrings("0123", &buf); + + const n = try file_reader.interface.readSliceShort(&buf); + try testing.expectEqual(4, n); + + try testing.expectEqualStrings("6789", &buf); } diff --git a/lib/std/http/Server.zig b/lib/std/http/Server.zig index 25d3e44253..886aed72dc 100644 --- a/lib/std/http/Server.zig +++ b/lib/std/http/Server.zig @@ -129,11 +129,10 @@ pub const Request = struct { pub const Compression = union(enum) { pub const DeflateDecompressor = std.compress.zlib.Decompressor(std.io.AnyReader); pub const GzipDecompressor = std.compress.gzip.Decompressor(std.io.AnyReader); - pub const ZstdDecompressor = std.compress.zstd.Decompressor(std.io.AnyReader); deflate: DeflateDecompressor, gzip: GzipDecompressor, - zstd: ZstdDecompressor, + zstd: std.compress.zstd.Decompress, none: void, }; diff --git a/lib/std/json.zig b/lib/std/json.zig index f81ac1cd65..c7b7dcf19f 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -69,7 +69,6 @@ pub const ArrayHashMap = @import("json/hashmap.zig").ArrayHashMap; pub const Scanner = @import("json/Scanner.zig"); pub const validate = Scanner.validate; pub const Error = Scanner.Error; -pub const reader = Scanner.reader; pub const default_buffer_size = Scanner.default_buffer_size; pub const Token = Scanner.Token; pub const TokenType = Scanner.TokenType; diff --git a/lib/std/math.zig b/lib/std/math.zig index 1cd9a83a14..9f2d12a65e 100644 --- a/lib/std/math.zig +++ b/lib/std/math.zig @@ -45,6 +45,7 @@ pub const rad_per_deg = 0.017453292519943295769236907684886127134428718885417254 /// 180.0/pi pub const deg_per_rad = 57.295779513082320876798154814105170332405472466564321549160243861; +pub const Sign = enum(u1) { positive, negative }; pub const FloatRepr = float.FloatRepr; pub const floatExponentBits = float.floatExponentBits; pub const floatMantissaBits = float.floatMantissaBits; @@ -594,27 +595,30 @@ pub fn shlExact(comptime T: type, a: T, shift_amt: Log2Int(T)) !T { /// Shifts left. Overflowed bits are truncated. /// A negative shift amount results in a right shift. pub fn shl(comptime T: type, a: T, shift_amt: anytype) T { + const is_shl = shift_amt >= 0; const abs_shift_amt = @abs(shift_amt); - - const casted_shift_amt = blk: { - if (@typeInfo(T) == .vector) { - const C = @typeInfo(T).vector.child; - const len = @typeInfo(T).vector.len; - if (abs_shift_amt >= @typeInfo(C).int.bits) return @splat(0); - break :blk @as(@Vector(len, Log2Int(C)), @splat(@as(Log2Int(C), @intCast(abs_shift_amt)))); - } else { - if (abs_shift_amt >= @typeInfo(T).int.bits) return 0; - break :blk @as(Log2Int(T), @intCast(abs_shift_amt)); - } + const casted_shift_amt = casted_shift_amt: switch (@typeInfo(T)) { + .int => |info| { + if (abs_shift_amt < info.bits) break :casted_shift_amt @as( + Log2Int(T), + @intCast(abs_shift_amt), + ); + if (info.signedness == .unsigned or is_shl) return 0; + return a >> (info.bits - 1); + }, + .vector => |info| { + const Child = info.child; + const child_info = @typeInfo(Child).int; + if (abs_shift_amt < child_info.bits) break :casted_shift_amt @as( + @Vector(info.len, Log2Int(Child)), + @splat(@as(Log2Int(Child), @intCast(abs_shift_amt))), + ); + if (child_info.signedness == .unsigned or is_shl) return @splat(0); + return a >> @splat(child_info.bits - 1); + }, + else => comptime unreachable, }; - - if (@TypeOf(shift_amt) == comptime_int or @typeInfo(@TypeOf(shift_amt)).int.signedness == .signed) { - if (shift_amt < 0) { - return a >> casted_shift_amt; - } - } - - return a << casted_shift_amt; + return if (is_shl) a << casted_shift_amt else a >> casted_shift_amt; } test shl { @@ -629,32 +633,40 @@ test shl { try testing.expect(shl(@Vector(1, u32), @Vector(1, u32){42}, @as(usize, 1))[0] == @as(u32, 42) << 1); try testing.expect(shl(@Vector(1, u32), @Vector(1, u32){42}, @as(isize, -1))[0] == @as(u32, 42) >> 1); try testing.expect(shl(@Vector(1, u32), @Vector(1, u32){42}, 33)[0] == 0); + + try testing.expect(shl(i8, -1, -100) == -1); + try testing.expect(shl(i8, -1, 100) == 0); + try testing.expect(@reduce(.And, shl(@Vector(2, i8), .{ -1, 1 }, -100) == @Vector(2, i8){ -1, 0 })); + try testing.expect(@reduce(.And, shl(@Vector(2, i8), .{ -1, 1 }, 100) == @Vector(2, i8){ 0, 0 })); } /// Shifts right. Overflowed bits are truncated. /// A negative shift amount results in a left shift. pub fn shr(comptime T: type, a: T, shift_amt: anytype) T { + const is_shl = shift_amt < 0; const abs_shift_amt = @abs(shift_amt); - - const casted_shift_amt = blk: { - if (@typeInfo(T) == .vector) { - const C = @typeInfo(T).vector.child; - const len = @typeInfo(T).vector.len; - if (abs_shift_amt >= @typeInfo(C).int.bits) return @splat(0); - break :blk @as(@Vector(len, Log2Int(C)), @splat(@as(Log2Int(C), @intCast(abs_shift_amt)))); - } else { - if (abs_shift_amt >= @typeInfo(T).int.bits) return 0; - break :blk @as(Log2Int(T), @intCast(abs_shift_amt)); - } + const casted_shift_amt = casted_shift_amt: switch (@typeInfo(T)) { + .int => |info| { + if (abs_shift_amt < info.bits) break :casted_shift_amt @as( + Log2Int(T), + @intCast(abs_shift_amt), + ); + if (info.signedness == .unsigned or is_shl) return 0; + return a >> (info.bits - 1); + }, + .vector => |info| { + const Child = info.child; + const child_info = @typeInfo(Child).int; + if (abs_shift_amt < child_info.bits) break :casted_shift_amt @as( + @Vector(info.len, Log2Int(Child)), + @splat(@as(Log2Int(Child), @intCast(abs_shift_amt))), + ); + if (child_info.signedness == .unsigned or is_shl) return @splat(0); + return a >> @splat(child_info.bits - 1); + }, + else => comptime unreachable, }; - - if (@TypeOf(shift_amt) == comptime_int or @typeInfo(@TypeOf(shift_amt)).int.signedness == .signed) { - if (shift_amt < 0) { - return a << casted_shift_amt; - } - } - - return a >> casted_shift_amt; + return if (is_shl) a << casted_shift_amt else a >> casted_shift_amt; } test shr { @@ -669,6 +681,11 @@ test shr { try testing.expect(shr(@Vector(1, u32), @Vector(1, u32){42}, @as(usize, 1))[0] == @as(u32, 42) >> 1); try testing.expect(shr(@Vector(1, u32), @Vector(1, u32){42}, @as(isize, -1))[0] == @as(u32, 42) << 1); try testing.expect(shr(@Vector(1, u32), @Vector(1, u32){42}, 33)[0] == 0); + + try testing.expect(shr(i8, -1, -100) == 0); + try testing.expect(shr(i8, -1, 100) == -1); + try testing.expect(@reduce(.And, shr(@Vector(2, i8), .{ -1, 1 }, -100) == @Vector(2, i8){ 0, 0 })); + try testing.expect(@reduce(.And, shr(@Vector(2, i8), .{ -1, 1 }, 100) == @Vector(2, i8){ -1, 0 })); } /// Rotates right. Only unsigned values can be rotated. Negative shift diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig index f44b254cf1..bb6deeb778 100644 --- a/lib/std/math/big/int_test.zig +++ b/lib/std/math/big/int_test.zig @@ -2774,7 +2774,6 @@ test "bitNotWrap more than two limbs" { // This test requires int sizes greater than 128 bits. if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO // LLVM: unexpected runtime library name: __umodei4 if (builtin.zig_backend == .stage2_llvm and comptime builtin.target.cpu.arch.isWasm()) return error.SkipZigTest; // TODO diff --git a/lib/std/math/float.zig b/lib/std/math/float.zig index df7d7fe1ab..6ffbd85bd2 100644 --- a/lib/std/math/float.zig +++ b/lib/std/math/float.zig @@ -4,8 +4,6 @@ const assert = std.debug.assert; const expect = std.testing.expect; const expectEqual = std.testing.expectEqual; -pub const Sign = enum(u1) { positive, negative }; - pub fn FloatRepr(comptime Float: type) type { const fractional_bits = floatFractionalBits(Float); const exponent_bits = floatExponentBits(Float); @@ -14,7 +12,7 @@ pub fn FloatRepr(comptime Float: type) type { mantissa: StoredMantissa, exponent: BiasedExponent, - sign: Sign, + sign: std.math.Sign, pub const StoredMantissa = @Type(.{ .int = .{ .signedness = .unsigned, @@ -69,7 +67,7 @@ pub fn FloatRepr(comptime Float: type) type { /// This currently truncates denormal values, which needs to be fixed before this can be used to /// produce a rounded value. - pub fn reconstruct(normalized: Normalized, sign: Sign) Float { + pub fn reconstruct(normalized: Normalized, sign: std.math.Sign) Float { if (normalized.exponent > BiasedExponent.max_normal.unbias()) return @bitCast(Repr{ .mantissa = 0, .exponent = .infinite, diff --git a/lib/std/math/log10.zig b/lib/std/math/log10.zig index 655a42215e..9ac5c6da24 100644 --- a/lib/std/math/log10.zig +++ b/lib/std/math/log10.zig @@ -132,7 +132,6 @@ inline fn less_than_5(x: u32) u32 { test log10_int { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_llvm and comptime builtin.target.cpu.arch.isWasm()) return error.SkipZigTest; // TODO diff --git a/lib/std/mem.zig b/lib/std/mem.zig index 1a61076f32..3b72a2b579 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -676,6 +676,7 @@ test lessThan { const eqlBytes_allowed = switch (builtin.zig_backend) { // These backends don't support vectors yet. + .stage2_aarch64, .stage2_powerpc, .stage2_riscv64, => false, @@ -4482,7 +4483,7 @@ pub fn doNotOptimizeAway(val: anytype) void { ); asm volatile ("" : - : [val2] "r" (val2), + : [_] "r" (val2), ); } else doNotOptimizeAway(&val); }, @@ -4490,7 +4491,7 @@ pub fn doNotOptimizeAway(val: anytype) void { if ((t.float.bits == 32 or t.float.bits == 64) and builtin.zig_backend != .stage2_c) { asm volatile ("" : - : [val] "rm" (val), + : [_] "rm" (val), ); } else doNotOptimizeAway(&val); }, @@ -4500,7 +4501,7 @@ pub fn doNotOptimizeAway(val: anytype) void { } else { asm volatile ("" : - : [val] "m" (val), + : [_] "m" (val), : .{ .memory = true }); } }, diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index 75494145b9..a02451c0fd 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -503,7 +503,6 @@ pub var elf_aux_maybe: ?[*]std.elf.Auxv = null; /// Whether an external or internal getauxval implementation is used. const extern_getauxval = switch (builtin.zig_backend) { // Calling extern functions is not yet supported with these backends - .stage2_aarch64, .stage2_arm, .stage2_powerpc, .stage2_riscv64, diff --git a/lib/std/posix.zig b/lib/std/posix.zig index e3e1657705..54c6470d2c 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -192,10 +192,27 @@ pub const iovec_const = extern struct { len: usize, }; -pub const ACCMODE = enum(u2) { - RDONLY = 0, - WRONLY = 1, - RDWR = 2, +pub const ACCMODE = switch (native_os) { + // POSIX has a note about the access mode values: + // + // In historical implementations the value of O_RDONLY is zero. Because of + // that, it is not possible to detect the presence of O_RDONLY and another + // option. Future implementations should encode O_RDONLY and O_WRONLY as + // bit flags so that: O_RDONLY | O_WRONLY == O_RDWR + // + // In practice SerenityOS is the only system supported by Zig that + // implements this suggestion. + // https://github.com/SerenityOS/serenity/blob/4adc51fdf6af7d50679c48b39362e062f5a3b2cb/Kernel/API/POSIX/fcntl.h#L28-L30 + .serenity => enum(u2) { + RDONLY = 1, + WRONLY = 2, + RDWR = 3, + }, + else => enum(u2) { + RDONLY = 0, + WRONLY = 1, + RDWR = 2, + }, }; pub const TCSA = enum(c_uint) { @@ -1035,6 +1052,7 @@ pub const TruncateError = error{ FileBusy, AccessDenied, PermissionDenied, + NonResizable, } || UnexpectedError; /// Length must be positive when treated as an i64. @@ -1074,7 +1092,7 @@ pub fn ftruncate(fd: fd_t, length: u64) TruncateError!void { .PERM => return error.PermissionDenied, .TXTBSY => return error.FileBusy, .BADF => unreachable, // Handle not open for writing - .INVAL => unreachable, // Handle not open for writing, negative length, or non-resizable handle + .INVAL => return error.NonResizable, .NOTCAPABLE => return error.AccessDenied, else => |err| return unexpectedErrno(err), } @@ -1090,7 +1108,7 @@ pub fn ftruncate(fd: fd_t, length: u64) TruncateError!void { .PERM => return error.PermissionDenied, .TXTBSY => return error.FileBusy, .BADF => unreachable, // Handle not open for writing - .INVAL => unreachable, // Handle not open for writing, negative length, or non-resizable handle + .INVAL => return error.NonResizable, // This is returned for /dev/null for example. else => |err| return unexpectedErrno(err), } } @@ -6326,295 +6344,6 @@ pub fn send( }; } -pub const SendFileError = PReadError || WriteError || SendError; - -/// Transfer data between file descriptors, with optional headers and trailers. -/// -/// Returns the number of bytes written, which can be zero. -/// -/// The `sendfile` call copies `in_len` bytes from one file descriptor to another. When possible, -/// this is done within the operating system kernel, which can provide better performance -/// characteristics than transferring data from kernel to user space and back, such as with -/// `read` and `write` calls. When `in_len` is `0`, it means to copy until the end of the input file has been -/// reached. Note, however, that partial writes are still possible in this case. -/// -/// `in_fd` must be a file descriptor opened for reading, and `out_fd` must be a file descriptor -/// opened for writing. They may be any kind of file descriptor; however, if `in_fd` is not a regular -/// file system file, it may cause this function to fall back to calling `read` and `write`, in which case -/// atomicity guarantees no longer apply. -/// -/// Copying begins reading at `in_offset`. The input file descriptor seek position is ignored and not updated. -/// If the output file descriptor has a seek position, it is updated as bytes are written. When -/// `in_offset` is past the end of the input file, it successfully reads 0 bytes. -/// -/// `flags` has different meanings per operating system; refer to the respective man pages. -/// -/// These systems support atomically sending everything, including headers and trailers: -/// * macOS -/// * FreeBSD -/// -/// These systems support in-kernel data copying, but headers and trailers are not sent atomically: -/// * Linux -/// -/// Other systems fall back to calling `read` / `write`. -/// -/// Linux has a limit on how many bytes may be transferred in one `sendfile` call, which is `0x7ffff000` -/// on both 64-bit and 32-bit systems. This is due to using a signed C int as the return value, as -/// well as stuffing the errno codes into the last `4096` values. This is noted on the `sendfile` man page. -/// The limit on Darwin is `0x7fffffff`, trying to write more than that returns EINVAL. -/// The corresponding POSIX limit on this is `maxInt(isize)`. -pub fn sendfile( - out_fd: fd_t, - in_fd: fd_t, - in_offset: u64, - in_len: u64, - headers: []const iovec_const, - trailers: []const iovec_const, - flags: u32, -) SendFileError!usize { - var header_done = false; - var total_written: usize = 0; - - // Prevents EOVERFLOW. - const size_t = std.meta.Int(.unsigned, @typeInfo(usize).int.bits - 1); - const max_count = switch (native_os) { - .linux => 0x7ffff000, - .macos, .ios, .watchos, .tvos, .visionos => maxInt(i32), - else => maxInt(size_t), - }; - - switch (native_os) { - .linux => sf: { - if (headers.len != 0) { - const amt = try writev(out_fd, headers); - total_written += amt; - if (amt < count_iovec_bytes(headers)) return total_written; - header_done = true; - } - - // Here we match BSD behavior, making a zero count value send as many bytes as possible. - const adjusted_count = if (in_len == 0) max_count else @min(in_len, max_count); - - const sendfile_sym = if (lfs64_abi) system.sendfile64 else system.sendfile; - while (true) { - var offset: off_t = @bitCast(in_offset); - const rc = sendfile_sym(out_fd, in_fd, &offset, adjusted_count); - switch (errno(rc)) { - .SUCCESS => { - const amt: usize = @bitCast(rc); - total_written += amt; - if (in_len == 0 and amt == 0) { - // We have detected EOF from `in_fd`. - break; - } else if (amt < in_len) { - return total_written; - } else { - break; - } - }, - - .BADF => unreachable, // Always a race condition. - .FAULT => unreachable, // Segmentation fault. - .OVERFLOW => unreachable, // We avoid passing too large of a `count`. - .NOTCONN => return error.BrokenPipe, // `out_fd` is an unconnected socket - - .INVAL => { - // EINVAL could be any of the following situations: - // * Descriptor is not valid or locked - // * an mmap(2)-like operation is not available for in_fd - // * count is negative - // * out_fd has the APPEND flag set - // Because of the "mmap(2)-like operation" possibility, we fall back to doing read/write - // manually. - break :sf; - }, - .AGAIN => return error.WouldBlock, - .IO => return error.InputOutput, - .PIPE => return error.BrokenPipe, - .NOMEM => return error.SystemResources, - .NXIO => return error.Unseekable, - .SPIPE => return error.Unseekable, - else => |err| { - unexpectedErrno(err) catch {}; - break :sf; - }, - } - } - - if (trailers.len != 0) { - total_written += try writev(out_fd, trailers); - } - - return total_written; - }, - .freebsd => sf: { - var hdtr_data: std.c.sf_hdtr = undefined; - var hdtr: ?*std.c.sf_hdtr = null; - if (headers.len != 0 or trailers.len != 0) { - // Here we carefully avoid `@intCast` by returning partial writes when - // too many io vectors are provided. - const hdr_cnt = cast(u31, headers.len) orelse maxInt(u31); - if (headers.len > hdr_cnt) return writev(out_fd, headers); - - const trl_cnt = cast(u31, trailers.len) orelse maxInt(u31); - - hdtr_data = std.c.sf_hdtr{ - .headers = headers.ptr, - .hdr_cnt = hdr_cnt, - .trailers = trailers.ptr, - .trl_cnt = trl_cnt, - }; - hdtr = &hdtr_data; - } - - while (true) { - var sbytes: off_t = undefined; - const err = errno(system.sendfile(in_fd, out_fd, @bitCast(in_offset), @min(in_len, max_count), hdtr, &sbytes, flags)); - const amt: usize = @bitCast(sbytes); - switch (err) { - .SUCCESS => return amt, - - .BADF => unreachable, // Always a race condition. - .FAULT => unreachable, // Segmentation fault. - .NOTCONN => return error.BrokenPipe, // `out_fd` is an unconnected socket - - .INVAL, .OPNOTSUPP, .NOTSOCK, .NOSYS => { - // EINVAL could be any of the following situations: - // * The fd argument is not a regular file. - // * The s argument is not a SOCK.STREAM type socket. - // * The offset argument is negative. - // Because of some of these possibilities, we fall back to doing read/write - // manually, the same as ENOSYS. - break :sf; - }, - - .INTR => if (amt != 0) return amt else continue, - - .AGAIN => if (amt != 0) { - return amt; - } else { - return error.WouldBlock; - }, - - .BUSY => if (amt != 0) { - return amt; - } else { - return error.WouldBlock; - }, - - .IO => return error.InputOutput, - .NOBUFS => return error.SystemResources, - .PIPE => return error.BrokenPipe, - - else => { - unexpectedErrno(err) catch {}; - if (amt != 0) { - return amt; - } else { - break :sf; - } - }, - } - } - }, - .macos, .ios, .tvos, .watchos, .visionos => sf: { - var hdtr_data: std.c.sf_hdtr = undefined; - var hdtr: ?*std.c.sf_hdtr = null; - if (headers.len != 0 or trailers.len != 0) { - // Here we carefully avoid `@intCast` by returning partial writes when - // too many io vectors are provided. - const hdr_cnt = cast(u31, headers.len) orelse maxInt(u31); - if (headers.len > hdr_cnt) return writev(out_fd, headers); - - const trl_cnt = cast(u31, trailers.len) orelse maxInt(u31); - - hdtr_data = std.c.sf_hdtr{ - .headers = headers.ptr, - .hdr_cnt = hdr_cnt, - .trailers = trailers.ptr, - .trl_cnt = trl_cnt, - }; - hdtr = &hdtr_data; - } - - while (true) { - var sbytes: off_t = @min(in_len, max_count); - const err = errno(system.sendfile(in_fd, out_fd, @bitCast(in_offset), &sbytes, hdtr, flags)); - const amt: usize = @bitCast(sbytes); - switch (err) { - .SUCCESS => return amt, - - .BADF => unreachable, // Always a race condition. - .FAULT => unreachable, // Segmentation fault. - .INVAL => unreachable, - .NOTCONN => return error.BrokenPipe, // `out_fd` is an unconnected socket - - .OPNOTSUPP, .NOTSOCK, .NOSYS => break :sf, - - .INTR => if (amt != 0) return amt else continue, - - .AGAIN => if (amt != 0) { - return amt; - } else { - return error.WouldBlock; - }, - - .IO => return error.InputOutput, - .PIPE => return error.BrokenPipe, - - else => { - unexpectedErrno(err) catch {}; - if (amt != 0) { - return amt; - } else { - break :sf; - } - }, - } - } - }, - else => {}, // fall back to read/write - } - - if (headers.len != 0 and !header_done) { - const amt = try writev(out_fd, headers); - total_written += amt; - if (amt < count_iovec_bytes(headers)) return total_written; - } - - rw: { - var buf: [8 * 4096]u8 = undefined; - // Here we match BSD behavior, making a zero count value send as many bytes as possible. - const adjusted_count = if (in_len == 0) buf.len else @min(buf.len, in_len); - const amt_read = try pread(in_fd, buf[0..adjusted_count], in_offset); - if (amt_read == 0) { - if (in_len == 0) { - // We have detected EOF from `in_fd`. - break :rw; - } else { - return total_written; - } - } - const amt_written = try write(out_fd, buf[0..amt_read]); - total_written += amt_written; - if (amt_written < in_len or in_len == 0) return total_written; - } - - if (trailers.len != 0) { - total_written += try writev(out_fd, trailers); - } - - return total_written; -} - -fn count_iovec_bytes(iovs: []const iovec_const) usize { - var count: usize = 0; - for (iovs) |iov| { - count += iov.len; - } - return count; -} - pub const CopyFileRangeError = error{ FileTooBig, InputOutput, diff --git a/lib/std/process/Child.zig b/lib/std/process/Child.zig index c2effb523a..21cc545f12 100644 --- a/lib/std/process/Child.zig +++ b/lib/std/process/Child.zig @@ -14,6 +14,7 @@ const assert = std.debug.assert; const native_os = builtin.os.tag; const Allocator = std.mem.Allocator; const ChildProcess = @This(); +const ArrayList = std.ArrayListUnmanaged; pub const Id = switch (native_os) { .windows => windows.HANDLE, @@ -348,19 +349,6 @@ pub const RunResult = struct { stderr: []u8, }; -fn writeFifoDataToArrayList(allocator: Allocator, list: *std.ArrayListUnmanaged(u8), fifo: *std.io.PollFifo) !void { - if (fifo.head != 0) fifo.realign(); - if (list.capacity == 0) { - list.* = .{ - .items = fifo.buf[0..fifo.count], - .capacity = fifo.buf.len, - }; - fifo.* = std.io.PollFifo.init(fifo.allocator); - } else { - try list.appendSlice(allocator, fifo.buf[0..fifo.count]); - } -} - /// Collect the output from the process's stdout and stderr. Will return once all output /// has been collected. This does not mean that the process has ended. `wait` should still /// be called to wait for and clean up the process. @@ -370,28 +358,48 @@ pub fn collectOutput( child: ChildProcess, /// Used for `stdout` and `stderr`. allocator: Allocator, - stdout: *std.ArrayListUnmanaged(u8), - stderr: *std.ArrayListUnmanaged(u8), + stdout: *ArrayList(u8), + stderr: *ArrayList(u8), max_output_bytes: usize, ) !void { assert(child.stdout_behavior == .Pipe); assert(child.stderr_behavior == .Pipe); - var poller = std.io.poll(allocator, enum { stdout, stderr }, .{ + var poller = std.Io.poll(allocator, enum { stdout, stderr }, .{ .stdout = child.stdout.?, .stderr = child.stderr.?, }); defer poller.deinit(); + const stdout_r = poller.reader(.stdout); + stdout_r.buffer = stdout.allocatedSlice(); + stdout_r.seek = 0; + stdout_r.end = stdout.items.len; + + const stderr_r = poller.reader(.stderr); + stderr_r.buffer = stderr.allocatedSlice(); + stderr_r.seek = 0; + stderr_r.end = stderr.items.len; + + defer { + stdout.* = .{ + .items = stdout_r.buffer[0..stdout_r.end], + .capacity = stdout_r.buffer.len, + }; + stderr.* = .{ + .items = stderr_r.buffer[0..stderr_r.end], + .capacity = stderr_r.buffer.len, + }; + stdout_r.buffer = &.{}; + stderr_r.buffer = &.{}; + } + while (try poller.poll()) { - if (poller.fifo(.stdout).count > max_output_bytes) + if (stdout_r.bufferedLen() > max_output_bytes) return error.StdoutStreamTooLong; - if (poller.fifo(.stderr).count > max_output_bytes) + if (stderr_r.bufferedLen() > max_output_bytes) return error.StderrStreamTooLong; } - - try writeFifoDataToArrayList(allocator, stdout, poller.fifo(.stdout)); - try writeFifoDataToArrayList(allocator, stderr, poller.fifo(.stderr)); } pub const RunError = posix.GetCwdError || posix.ReadError || SpawnError || posix.PollError || error{ @@ -421,10 +429,10 @@ pub fn run(args: struct { child.expand_arg0 = args.expand_arg0; child.progress_node = args.progress_node; - var stdout: std.ArrayListUnmanaged(u8) = .empty; - errdefer stdout.deinit(args.allocator); - var stderr: std.ArrayListUnmanaged(u8) = .empty; - errdefer stderr.deinit(args.allocator); + var stdout: ArrayList(u8) = .empty; + defer stdout.deinit(args.allocator); + var stderr: ArrayList(u8) = .empty; + defer stderr.deinit(args.allocator); try child.spawn(); errdefer { @@ -432,7 +440,7 @@ pub fn run(args: struct { } try child.collectOutput(args.allocator, &stdout, &stderr, args.max_output_bytes); - return RunResult{ + return .{ .stdout = try stdout.toOwnedSlice(args.allocator), .stderr = try stderr.toOwnedSlice(args.allocator), .term = try child.wait(), @@ -878,12 +886,12 @@ fn spawnWindows(self: *ChildProcess) SpawnError!void { var cmd_line_cache = WindowsCommandLineCache.init(self.allocator, self.argv); defer cmd_line_cache.deinit(); - var app_buf: std.ArrayListUnmanaged(u16) = .empty; + var app_buf: ArrayList(u16) = .empty; defer app_buf.deinit(self.allocator); try app_buf.appendSlice(self.allocator, app_name_w); - var dir_buf: std.ArrayListUnmanaged(u16) = .empty; + var dir_buf: ArrayList(u16) = .empty; defer dir_buf.deinit(self.allocator); if (cwd_path_w.len > 0) { @@ -1003,13 +1011,16 @@ fn forkChildErrReport(fd: i32, err: ChildProcess.SpawnError) noreturn { } fn writeIntFd(fd: i32, value: ErrInt) !void { - const file: File = .{ .handle = fd }; - file.deprecatedWriter().writeInt(u64, @intCast(value), .little) catch return error.SystemResources; + var buffer: [8]u8 = undefined; + var fw: std.fs.File.Writer = .initMode(.{ .handle = fd }, &buffer, .streaming); + fw.interface.writeInt(u64, value, .little) catch unreachable; + fw.interface.flush() catch return error.SystemResources; } fn readIntFd(fd: i32) !ErrInt { - const file: File = .{ .handle = fd }; - return @intCast(file.deprecatedReader().readInt(u64, .little) catch return error.SystemResources); + var buffer: [8]u8 = undefined; + var fr: std.fs.File.Reader = .initMode(.{ .handle = fd }, &buffer, .streaming); + return @intCast(fr.interface.takeInt(u64, .little) catch return error.SystemResources); } const ErrInt = std.meta.Int(.unsigned, @sizeOf(anyerror) * 8); @@ -1020,8 +1031,8 @@ const ErrInt = std.meta.Int(.unsigned, @sizeOf(anyerror) * 8); /// Note: If the dir is the cwd, dir_buf should be empty (len = 0). fn windowsCreateProcessPathExt( allocator: mem.Allocator, - dir_buf: *std.ArrayListUnmanaged(u16), - app_buf: *std.ArrayListUnmanaged(u16), + dir_buf: *ArrayList(u16), + app_buf: *ArrayList(u16), pathext: [:0]const u16, cmd_line_cache: *WindowsCommandLineCache, envp_ptr: ?[*]u16, @@ -1504,7 +1515,7 @@ const WindowsCommandLineCache = struct { /// Returns the absolute path of `cmd.exe` within the Windows system directory. /// The caller owns the returned slice. fn windowsCmdExePath(allocator: mem.Allocator) error{ OutOfMemory, Unexpected }![:0]u16 { - var buf = try std.ArrayListUnmanaged(u16).initCapacity(allocator, 128); + var buf = try ArrayList(u16).initCapacity(allocator, 128); errdefer buf.deinit(allocator); while (true) { const unused_slice = buf.unusedCapacitySlice(); diff --git a/lib/std/start.zig b/lib/std/start.zig index 22ccda1e40..43355d34f4 100644 --- a/lib/std/start.zig +++ b/lib/std/start.zig @@ -101,17 +101,11 @@ comptime { // Simplified start code for stage2 until it supports more language features /// fn main2() callconv(.c) c_int { - root.main(); - return 0; + return callMain(); } fn _start2() callconv(.withStackAlign(.c, 1)) noreturn { - callMain2(); -} - -fn callMain2() noreturn { - root.main(); - exit2(0); + std.posix.exit(callMain()); } fn spirvMain2() callconv(.kernel) void { @@ -119,51 +113,7 @@ fn spirvMain2() callconv(.kernel) void { } fn wWinMainCRTStartup2() callconv(.c) noreturn { - root.main(); - exit2(0); -} - -fn exit2(code: usize) noreturn { - switch (native_os) { - .linux => switch (builtin.cpu.arch) { - .x86_64 => { - asm volatile ("syscall" - : - : [number] "{rax}" (231), - [arg1] "{rdi}" (code), - : .{ .rcx = true, .r11 = true, .memory = true }); - }, - .arm => { - asm volatile ("svc #0" - : - : [number] "{r7}" (1), - [arg1] "{r0}" (code), - : .{ .memory = true }); - }, - .aarch64 => { - asm volatile ("svc #0" - : - : [number] "{x8}" (93), - [arg1] "{x0}" (code), - : .{ .memory = true }); - }, - .sparc64 => { - asm volatile ("ta 0x6d" - : - : [number] "{g1}" (1), - [arg1] "{o0}" (code), - : .{ .o0 = true, .o1 = true, .o2 = true, .o3 = true, .o4 = true, .o5 = true, .o6 = true, .o7 = true, .memory = true }); - }, - else => @compileError("TODO"), - }, - // exits(0) - .plan9 => std.os.plan9.exits(null), - .windows => { - std.os.windows.ntdll.RtlExitUserProcess(@truncate(code)); - }, - else => @compileError("TODO"), - } - unreachable; + std.posix.exit(callMain()); } //////////////////////////////////////////////////////////////////////////////// @@ -676,10 +626,11 @@ pub inline fn callMain() u8 { const result = root.main() catch |err| { switch (builtin.zig_backend) { + .stage2_aarch64, .stage2_powerpc, .stage2_riscv64, => { - std.debug.print("error: failed with error\n", .{}); + _ = std.posix.write(std.posix.STDERR_FILENO, "error: failed with error\n") catch {}; return 1; }, else => {}, diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 729a07db0a..e397677cf3 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -19,7 +19,7 @@ const std = @import("std"); const assert = std.debug.assert; const testing = std.testing; -pub const writer = @import("tar/writer.zig").writer; +pub const Writer = @import("tar/Writer.zig"); /// Provide this to receive detailed error messages. /// When this is provided, some errors which would otherwise be returned @@ -293,28 +293,6 @@ fn nullStr(str: []const u8) []const u8 { return str; } -/// Options for iterator. -/// Buffers should be provided by the caller. -pub const IteratorOptions = struct { - /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. - file_name_buffer: []u8, - /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. - link_name_buffer: []u8, - /// Collects error messages during unpacking - diagnostics: ?*Diagnostics = null, -}; - -/// Iterates over files in tar archive. -/// `next` returns each file in tar archive. -pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) { - return .{ - .reader = reader, - .diagnostics = options.diagnostics, - .file_name_buffer = options.file_name_buffer, - .link_name_buffer = options.link_name_buffer, - }; -} - /// Type of the file returned by iterator `next` method. pub const FileKind = enum { directory, @@ -323,206 +301,192 @@ pub const FileKind = enum { }; /// Iterator over entries in the tar file represented by reader. -pub fn Iterator(comptime ReaderType: type) type { - return struct { - reader: ReaderType, - diagnostics: ?*Diagnostics = null, - - // buffers for heeader and file attributes - header_buffer: [Header.SIZE]u8 = undefined, - file_name_buffer: []u8, - link_name_buffer: []u8, - - // bytes of padding to the end of the block - padding: usize = 0, - // not consumed bytes of file from last next iteration - unread_file_bytes: u64 = 0, - - pub const File = struct { - name: []const u8, // name of file, symlink or directory - link_name: []const u8, // target name of symlink - size: u64 = 0, // size of the file in bytes - mode: u32 = 0, - kind: FileKind = .file, - - unread_bytes: *u64, - parent_reader: ReaderType, - - pub const Reader = std.io.GenericReader(File, ReaderType.Error, File.read); +pub const Iterator = struct { + reader: *std.Io.Reader, + diagnostics: ?*Diagnostics = null, - pub fn reader(self: File) Reader { - return .{ .context = self }; - } + // buffers for heeader and file attributes + header_buffer: [Header.SIZE]u8 = undefined, + file_name_buffer: []u8, + link_name_buffer: []u8, - pub fn read(self: File, dest: []u8) ReaderType.Error!usize { - const buf = dest[0..@min(dest.len, self.unread_bytes.*)]; - const n = try self.parent_reader.read(buf); - self.unread_bytes.* -= n; - return n; - } + // bytes of padding to the end of the block + padding: usize = 0, + // not consumed bytes of file from last next iteration + unread_file_bytes: u64 = 0, - // Writes file content to writer. - pub fn writeAll(self: File, out_writer: anytype) !void { - var buffer: [4096]u8 = undefined; + /// Options for iterator. + /// Buffers should be provided by the caller. + pub const Options = struct { + /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. + file_name_buffer: []u8, + /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. + link_name_buffer: []u8, + /// Collects error messages during unpacking + diagnostics: ?*Diagnostics = null, + }; - while (self.unread_bytes.* > 0) { - const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)]; - try self.parent_reader.readNoEof(buf); - try out_writer.writeAll(buf); - self.unread_bytes.* -= buf.len; - } - } + /// Iterates over files in tar archive. + /// `next` returns each file in tar archive. + pub fn init(reader: *std.Io.Reader, options: Options) Iterator { + return .{ + .reader = reader, + .diagnostics = options.diagnostics, + .file_name_buffer = options.file_name_buffer, + .link_name_buffer = options.link_name_buffer, }; + } - const Self = @This(); - - fn readHeader(self: *Self) !?Header { - if (self.padding > 0) { - try self.reader.skipBytes(self.padding, .{}); - } - const n = try self.reader.readAll(&self.header_buffer); - if (n == 0) return null; - if (n < Header.SIZE) return error.UnexpectedEndOfStream; - const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; - if (try header.checkChksum() == 0) return null; - return header; - } + pub const File = struct { + name: []const u8, // name of file, symlink or directory + link_name: []const u8, // target name of symlink + size: u64 = 0, // size of the file in bytes + mode: u32 = 0, + kind: FileKind = .file, + }; - fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 { - if (size > buffer.len) return error.TarInsufficientBuffer; - const buf = buffer[0..size]; - try self.reader.readNoEof(buf); - return nullStr(buf); + fn readHeader(self: *Iterator) !?Header { + if (self.padding > 0) { + try self.reader.discardAll(self.padding); } + const n = try self.reader.readSliceShort(&self.header_buffer); + if (n == 0) return null; + if (n < Header.SIZE) return error.UnexpectedEndOfStream; + const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; + if (try header.checkChksum() == 0) return null; + return header; + } - fn newFile(self: *Self) File { - return .{ - .name = self.file_name_buffer[0..0], - .link_name = self.link_name_buffer[0..0], - .parent_reader = self.reader, - .unread_bytes = &self.unread_file_bytes, - }; - } + fn readString(self: *Iterator, size: usize, buffer: []u8) ![]const u8 { + if (size > buffer.len) return error.TarInsufficientBuffer; + const buf = buffer[0..size]; + try self.reader.readSliceAll(buf); + return nullStr(buf); + } - // Number of padding bytes in the last file block. - fn blockPadding(size: u64) usize { - const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary - return @intCast(block_rounded - size); - } + fn newFile(self: *Iterator) File { + return .{ + .name = self.file_name_buffer[0..0], + .link_name = self.link_name_buffer[0..0], + }; + } - /// Iterates through the tar archive as if it is a series of files. - /// Internally, the tar format often uses entries (header with optional - /// content) to add meta data that describes the next file. These - /// entries should not normally be visible to the outside. As such, this - /// loop iterates through one or more entries until it collects a all - /// file attributes. - pub fn next(self: *Self) !?File { - if (self.unread_file_bytes > 0) { - // If file content was not consumed by caller - try self.reader.skipBytes(self.unread_file_bytes, .{}); - self.unread_file_bytes = 0; - } - var file: File = self.newFile(); - - while (try self.readHeader()) |header| { - const kind = header.kind(); - const size: u64 = try header.size(); - self.padding = blockPadding(size); - - switch (kind) { - // File types to return upstream - .directory, .normal, .symbolic_link => { - file.kind = switch (kind) { - .directory => .directory, - .normal => .file, - .symbolic_link => .sym_link, - else => unreachable, - }; - file.mode = try header.mode(); - - // set file attributes if not already set by prefix/extended headers - if (file.size == 0) { - file.size = size; - } - if (file.link_name.len == 0) { - file.link_name = try header.linkName(self.link_name_buffer); - } - if (file.name.len == 0) { - file.name = try header.fullName(self.file_name_buffer); - } + // Number of padding bytes in the last file block. + fn blockPadding(size: u64) usize { + const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary + return @intCast(block_rounded - size); + } - self.padding = blockPadding(file.size); - self.unread_file_bytes = file.size; - return file; - }, - // Prefix header types - .gnu_long_name => { - file.name = try self.readString(@intCast(size), self.file_name_buffer); - }, - .gnu_long_link => { - file.link_name = try self.readString(@intCast(size), self.link_name_buffer); - }, - .extended_header => { - // Use just attributes from last extended header. - file = self.newFile(); - - var rdr = paxIterator(self.reader, @intCast(size)); - while (try rdr.next()) |attr| { - switch (attr.kind) { - .path => { - file.name = try attr.value(self.file_name_buffer); - }, - .linkpath => { - file.link_name = try attr.value(self.link_name_buffer); - }, - .size => { - var buf: [pax_max_size_attr_len]u8 = undefined; - file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); - }, - } - } - }, - // Ignored header type - .global_extended_header => { - self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; - }, - // All other are unsupported header types - else => { - const d = self.diagnostics orelse return error.TarUnsupportedHeader; - try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ - .file_name = try d.allocator.dupe(u8, header.name()), - .file_type = kind, - } }); - if (kind == .gnu_sparse) { - try self.skipGnuSparseExtendedHeaders(header); + /// Iterates through the tar archive as if it is a series of files. + /// Internally, the tar format often uses entries (header with optional + /// content) to add meta data that describes the next file. These + /// entries should not normally be visible to the outside. As such, this + /// loop iterates through one or more entries until it collects a all + /// file attributes. + pub fn next(self: *Iterator) !?File { + if (self.unread_file_bytes > 0) { + // If file content was not consumed by caller + try self.reader.discardAll64(self.unread_file_bytes); + self.unread_file_bytes = 0; + } + var file: File = self.newFile(); + + while (try self.readHeader()) |header| { + const kind = header.kind(); + const size: u64 = try header.size(); + self.padding = blockPadding(size); + + switch (kind) { + // File types to return upstream + .directory, .normal, .symbolic_link => { + file.kind = switch (kind) { + .directory => .directory, + .normal => .file, + .symbolic_link => .sym_link, + else => unreachable, + }; + file.mode = try header.mode(); + + // set file attributes if not already set by prefix/extended headers + if (file.size == 0) { + file.size = size; + } + if (file.link_name.len == 0) { + file.link_name = try header.linkName(self.link_name_buffer); + } + if (file.name.len == 0) { + file.name = try header.fullName(self.file_name_buffer); + } + + self.padding = blockPadding(file.size); + self.unread_file_bytes = file.size; + return file; + }, + // Prefix header types + .gnu_long_name => { + file.name = try self.readString(@intCast(size), self.file_name_buffer); + }, + .gnu_long_link => { + file.link_name = try self.readString(@intCast(size), self.link_name_buffer); + }, + .extended_header => { + // Use just attributes from last extended header. + file = self.newFile(); + + var rdr: PaxIterator = .{ + .reader = self.reader, + .size = @intCast(size), + }; + while (try rdr.next()) |attr| { + switch (attr.kind) { + .path => { + file.name = try attr.value(self.file_name_buffer); + }, + .linkpath => { + file.link_name = try attr.value(self.link_name_buffer); + }, + .size => { + var buf: [pax_max_size_attr_len]u8 = undefined; + file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); + }, } - self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig; - }, - } + } + }, + // Ignored header type + .global_extended_header => { + self.reader.discardAll64(size) catch return error.TarHeadersTooBig; + }, + // All other are unsupported header types + else => { + const d = self.diagnostics orelse return error.TarUnsupportedHeader; + try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ + .file_name = try d.allocator.dupe(u8, header.name()), + .file_type = kind, + } }); + if (kind == .gnu_sparse) { + try self.skipGnuSparseExtendedHeaders(header); + } + self.reader.discardAll64(size) catch return error.TarHeadersTooBig; + }, } - return null; } + return null; + } - fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void { - var is_extended = header.bytes[482] > 0; - while (is_extended) { - var buf: [Header.SIZE]u8 = undefined; - const n = try self.reader.readAll(&buf); - if (n < Header.SIZE) return error.UnexpectedEndOfStream; - is_extended = buf[504] > 0; - } - } - }; -} + pub fn streamRemaining(it: *Iterator, file: File, w: *std.Io.Writer) std.Io.Reader.StreamError!void { + try it.reader.streamExact64(w, file.size); + it.unread_file_bytes = 0; + } -/// Pax attributes iterator. -/// Size is length of pax extended header in reader. -fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) { - return PaxIterator(@TypeOf(reader)){ - .reader = reader, - .size = size, - }; -} + fn skipGnuSparseExtendedHeaders(self: *Iterator, header: Header) !void { + var is_extended = header.bytes[482] > 0; + while (is_extended) { + var buf: [Header.SIZE]u8 = undefined; + try self.reader.readSliceAll(&buf); + is_extended = buf[504] > 0; + } + } +}; const PaxAttributeKind = enum { path, @@ -533,108 +497,99 @@ const PaxAttributeKind = enum { // maxInt(u64) has 20 chars, base 10 in practice we got 24 chars const pax_max_size_attr_len = 64; -fn PaxIterator(comptime ReaderType: type) type { - return struct { - size: usize, // cumulative size of all pax attributes - reader: ReaderType, - // scratch buffer used for reading attribute length and keyword - scratch: [128]u8 = undefined, - - const Self = @This(); - - const Attribute = struct { - kind: PaxAttributeKind, - len: usize, // length of the attribute value - reader: ReaderType, // reader positioned at value start - - // Copies pax attribute value into destination buffer. - // Must be called with destination buffer of size at least Attribute.len. - pub fn value(self: Attribute, dst: []u8) ![]const u8 { - if (self.len > dst.len) return error.TarInsufficientBuffer; - // assert(self.len <= dst.len); - const buf = dst[0..self.len]; - const n = try self.reader.readAll(buf); - if (n < self.len) return error.UnexpectedEndOfStream; - try validateAttributeEnding(self.reader); - if (hasNull(buf)) return error.PaxNullInValue; - return buf; - } - }; +pub const PaxIterator = struct { + size: usize, // cumulative size of all pax attributes + reader: *std.Io.Reader, - // Iterates over pax attributes. Returns known only known attributes. - // Caller has to call value in Attribute, to advance reader across value. - pub fn next(self: *Self) !?Attribute { - // Pax extended header consists of one or more attributes, each constructed as follows: - // "%d %s=%s\n", <length>, <keyword>, <value> - while (self.size > 0) { - const length_buf = try self.readUntil(' '); - const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes - - const keyword = try self.readUntil('='); - if (hasNull(keyword)) return error.PaxNullInKeyword; - - // calculate value_len - const value_start = length_buf.len + keyword.len + 2; // 2 separators - if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream; - const value_len = length - value_start - 1; // \n separator at end - self.size -= length; - - const kind: PaxAttributeKind = if (eql(keyword, "path")) - .path - else if (eql(keyword, "linkpath")) - .linkpath - else if (eql(keyword, "size")) - .size - else { - try self.reader.skipBytes(value_len, .{}); - try validateAttributeEnding(self.reader); - continue; - }; - if (kind == .size and value_len > pax_max_size_attr_len) { - return error.PaxSizeAttrOverflow; - } - return Attribute{ - .kind = kind, - .len = value_len, - .reader = self.reader, - }; - } + const Self = @This(); - return null; + const Attribute = struct { + kind: PaxAttributeKind, + len: usize, // length of the attribute value + reader: *std.Io.Reader, // reader positioned at value start + + // Copies pax attribute value into destination buffer. + // Must be called with destination buffer of size at least Attribute.len. + pub fn value(self: Attribute, dst: []u8) ![]const u8 { + if (self.len > dst.len) return error.TarInsufficientBuffer; + // assert(self.len <= dst.len); + const buf = dst[0..self.len]; + const n = try self.reader.readSliceShort(buf); + if (n < self.len) return error.UnexpectedEndOfStream; + try validateAttributeEnding(self.reader); + if (hasNull(buf)) return error.PaxNullInValue; + return buf; } + }; - fn readUntil(self: *Self, delimiter: u8) ![]const u8 { - var fbs = std.io.fixedBufferStream(&self.scratch); - try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null); - return fbs.getWritten(); + // Iterates over pax attributes. Returns known only known attributes. + // Caller has to call value in Attribute, to advance reader across value. + pub fn next(self: *Self) !?Attribute { + // Pax extended header consists of one or more attributes, each constructed as follows: + // "%d %s=%s\n", <length>, <keyword>, <value> + while (self.size > 0) { + const length_buf = try self.reader.takeSentinel(' '); + const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes + + const keyword = try self.reader.takeSentinel('='); + if (hasNull(keyword)) return error.PaxNullInKeyword; + + // calculate value_len + const value_start = length_buf.len + keyword.len + 2; // 2 separators + if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream; + const value_len = length - value_start - 1; // \n separator at end + self.size -= length; + + const kind: PaxAttributeKind = if (eql(keyword, "path")) + .path + else if (eql(keyword, "linkpath")) + .linkpath + else if (eql(keyword, "size")) + .size + else { + try self.reader.discardAll(value_len); + try validateAttributeEnding(self.reader); + continue; + }; + if (kind == .size and value_len > pax_max_size_attr_len) { + return error.PaxSizeAttrOverflow; + } + return .{ + .kind = kind, + .len = value_len, + .reader = self.reader, + }; } - fn eql(a: []const u8, b: []const u8) bool { - return std.mem.eql(u8, a, b); - } + return null; + } - fn hasNull(str: []const u8) bool { - return (std.mem.indexOfScalar(u8, str, 0)) != null; - } + fn eql(a: []const u8, b: []const u8) bool { + return std.mem.eql(u8, a, b); + } - // Checks that each record ends with new line. - fn validateAttributeEnding(reader: ReaderType) !void { - if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd; - } - }; -} + fn hasNull(str: []const u8) bool { + return (std.mem.indexOfScalar(u8, str, 0)) != null; + } + + // Checks that each record ends with new line. + fn validateAttributeEnding(reader: *std.Io.Reader) !void { + if (try reader.takeByte() != '\n') return error.PaxInvalidAttributeEnd; + } +}; /// Saves tar file content to the file systems. -pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void { +pub fn pipeToFileSystem(dir: std.fs.Dir, reader: *std.Io.Reader, options: PipeOptions) !void { var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; - var iter = iterator(reader, .{ + var file_contents_buffer: [1024]u8 = undefined; + var it: Iterator = .init(reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, .diagnostics = options.diagnostics, }); - while (try iter.next()) |file| { + while (try it.next()) |file| { const file_name = stripComponents(file.name, options.strip_components); if (file_name.len == 0 and file.kind != .directory) { const d = options.diagnostics orelse return error.TarComponentsOutsideStrippedPrefix; @@ -656,7 +611,9 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) .file => { if (createDirAndFile(dir, file_name, fileMode(file.mode, options))) |fs_file| { defer fs_file.close(); - try file.writeAll(fs_file); + var file_writer = fs_file.writer(&file_contents_buffer); + try it.streamRemaining(file, &file_writer.interface); + try file_writer.interface.flush(); } else |err| { const d = options.diagnostics orelse return err; try d.errors.append(d.allocator, .{ .unable_to_create_file = .{ @@ -826,11 +783,14 @@ test PaxIterator { var buffer: [1024]u8 = undefined; outer: for (cases) |case| { - var stream = std.io.fixedBufferStream(case.data); - var iter = paxIterator(stream.reader(), case.data.len); + var reader: std.Io.Reader = .fixed(case.data); + var it: PaxIterator = .{ + .size = case.data.len, + .reader = &reader, + }; var i: usize = 0; - while (iter.next() catch |err| { + while (it.next() catch |err| { if (case.err) |e| { try testing.expectEqual(e, err); continue; @@ -853,12 +813,6 @@ test PaxIterator { } } -test { - _ = @import("tar/test.zig"); - _ = @import("tar/writer.zig"); - _ = Diagnostics; -} - test "header parse size" { const cases = [_]struct { in: []const u8, @@ -941,7 +895,7 @@ test "create file and symlink" { file.close(); } -test iterator { +test Iterator { // Example tar file is created from this tree structure: // $ tree example // example @@ -962,19 +916,19 @@ test iterator { // example/empty/ const data = @embedFile("tar/testdata/example.tar"); - var fbs = std.io.fixedBufferStream(data); + var reader: std.Io.Reader = .fixed(data); // User provided buffers to the iterator var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; // Create iterator - var iter = iterator(fbs.reader(), .{ + var it: Iterator = .init(&reader, .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer, }); // Iterate over files in example.tar var file_no: usize = 0; - while (try iter.next()) |file| : (file_no += 1) { + while (try it.next()) |file| : (file_no += 1) { switch (file.kind) { .directory => { switch (file_no) { @@ -987,10 +941,10 @@ test iterator { }, .file => { try testing.expectEqualStrings("example/a/file", file.name); - // Read file content var buf: [16]u8 = undefined; - const n = try file.reader().readAll(&buf); - try testing.expectEqualStrings("content\n", buf[0..n]); + var w: std.Io.Writer = .fixed(&buf); + try it.streamRemaining(file, &w); + try testing.expectEqualStrings("content\n", w.buffered()); }, .sym_link => { try testing.expectEqualStrings("example/b/symlink", file.name); @@ -1021,15 +975,14 @@ test pipeToFileSystem { // example/empty/ const data = @embedFile("tar/testdata/example.tar"); - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); var tmp = testing.tmpDir(.{ .no_follow = true }); defer tmp.cleanup(); const dir = tmp.dir; - // Save tar from `reader` to the file system `dir` - pipeToFileSystem(dir, reader, .{ + // Save tar from reader to the file system `dir` + pipeToFileSystem(dir, &reader, .{ .mode_mode = .ignore, .strip_components = 1, .exclude_empty_directories = true, @@ -1053,8 +1006,7 @@ test pipeToFileSystem { test "pipeToFileSystem root_dir" { const data = @embedFile("tar/testdata/example.tar"); - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); // with strip_components = 1 { @@ -1063,7 +1015,7 @@ test "pipeToFileSystem root_dir" { var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; defer diagnostics.deinit(); - pipeToFileSystem(tmp.dir, reader, .{ + pipeToFileSystem(tmp.dir, &reader, .{ .strip_components = 1, .diagnostics = &diagnostics, }) catch |err| { @@ -1079,13 +1031,13 @@ test "pipeToFileSystem root_dir" { // with strip_components = 0 { - fbs.reset(); + reader = .fixed(data); var tmp = testing.tmpDir(.{ .no_follow = true }); defer tmp.cleanup(); var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; defer diagnostics.deinit(); - pipeToFileSystem(tmp.dir, reader, .{ + pipeToFileSystem(tmp.dir, &reader, .{ .strip_components = 0, .diagnostics = &diagnostics, }) catch |err| { @@ -1102,45 +1054,42 @@ test "pipeToFileSystem root_dir" { test "findRoot with single file archive" { const data = @embedFile("tar/testdata/22752.tar"); - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); var tmp = testing.tmpDir(.{}); defer tmp.cleanup(); var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; defer diagnostics.deinit(); - try pipeToFileSystem(tmp.dir, reader, .{ .diagnostics = &diagnostics }); + try pipeToFileSystem(tmp.dir, &reader, .{ .diagnostics = &diagnostics }); try testing.expectEqualStrings("", diagnostics.root_dir); } test "findRoot without explicit root dir" { const data = @embedFile("tar/testdata/19820.tar"); - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); var tmp = testing.tmpDir(.{}); defer tmp.cleanup(); var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; defer diagnostics.deinit(); - try pipeToFileSystem(tmp.dir, reader, .{ .diagnostics = &diagnostics }); + try pipeToFileSystem(tmp.dir, &reader, .{ .diagnostics = &diagnostics }); try testing.expectEqualStrings("root", diagnostics.root_dir); } test "pipeToFileSystem strip_components" { const data = @embedFile("tar/testdata/example.tar"); - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); var tmp = testing.tmpDir(.{ .no_follow = true }); defer tmp.cleanup(); var diagnostics: Diagnostics = .{ .allocator = testing.allocator }; defer diagnostics.deinit(); - pipeToFileSystem(tmp.dir, reader, .{ + pipeToFileSystem(tmp.dir, &reader, .{ .strip_components = 3, .diagnostics = &diagnostics, }) catch |err| { @@ -1194,13 +1143,12 @@ test "executable bit" { const data = @embedFile("tar/testdata/example.tar"); for ([_]PipeOptions.ModeMode{ .ignore, .executable_bit_only }) |opt| { - var fbs = std.io.fixedBufferStream(data); - const reader = fbs.reader(); + var reader: std.Io.Reader = .fixed(data); var tmp = testing.tmpDir(.{ .no_follow = true }); //defer tmp.cleanup(); - pipeToFileSystem(tmp.dir, reader, .{ + pipeToFileSystem(tmp.dir, &reader, .{ .strip_components = 1, .exclude_empty_directories = true, .mode_mode = opt, @@ -1226,3 +1174,9 @@ test "executable bit" { } } } + +test { + _ = @import("tar/test.zig"); + _ = Writer; + _ = Diagnostics; +} diff --git a/lib/std/tar/Writer.zig b/lib/std/tar/Writer.zig new file mode 100644 index 0000000000..61ae00b24e --- /dev/null +++ b/lib/std/tar/Writer.zig @@ -0,0 +1,462 @@ +const std = @import("std"); +const assert = std.debug.assert; +const testing = std.testing; +const Writer = @This(); + +const block_size = @sizeOf(Header); + +/// Options for writing file/dir/link. If left empty 0o664 is used for +/// file mode and current time for mtime. +pub const Options = struct { + /// File system permission mode. + mode: u32 = 0, + /// File system modification time. + mtime: u64 = 0, +}; + +underlying_writer: *std.Io.Writer, +prefix: []const u8 = "", +mtime_now: u64 = 0, + +const Error = error{ + WriteFailed, + OctalOverflow, + NameTooLong, +}; + +/// Sets prefix for all other write* method paths. +pub fn setRoot(w: *Writer, root: []const u8) Error!void { + if (root.len > 0) + try w.writeDir(root, .{}); + + w.prefix = root; +} + +pub fn writeDir(w: *Writer, sub_path: []const u8, options: Options) Error!void { + try w.writeHeader(.directory, sub_path, "", 0, options); +} + +pub const WriteFileError = std.Io.Writer.FileError || Error || std.fs.File.GetEndPosError; + +pub fn writeFile( + w: *Writer, + sub_path: []const u8, + file_reader: *std.fs.File.Reader, + stat_mtime: i128, +) WriteFileError!void { + const size = try file_reader.getSize(); + const mtime: u64 = @intCast(@divFloor(stat_mtime, std.time.ns_per_s)); + + var header: Header = .{}; + try w.setPath(&header, sub_path); + try header.setSize(size); + try header.setMtime(mtime); + try header.updateChecksum(); + + try w.underlying_writer.writeAll(@ptrCast((&header)[0..1])); + _ = try w.underlying_writer.sendFileAll(file_reader, .unlimited); + try w.writePadding64(size); +} + +pub const WriteFileStreamError = Error || std.Io.Reader.StreamError; + +/// Writes file reading file content from `reader`. Reads exactly `size` bytes +/// from `reader`, or returns `error.EndOfStream`. +pub fn writeFileStream( + w: *Writer, + sub_path: []const u8, + size: u64, + reader: *std.Io.Reader, + options: Options, +) WriteFileStreamError!void { + try w.writeHeader(.regular, sub_path, "", size, options); + try reader.streamExact64(w.underlying_writer, size); + try w.writePadding64(size); +} + +/// Writes file using bytes buffer `content` for size and file content. +pub fn writeFileBytes(w: *Writer, sub_path: []const u8, content: []const u8, options: Options) Error!void { + try w.writeHeader(.regular, sub_path, "", content.len, options); + try w.underlying_writer.writeAll(content); + try w.writePadding(content.len); +} + +pub fn writeLink(w: *Writer, sub_path: []const u8, link_name: []const u8, options: Options) Error!void { + try w.writeHeader(.symbolic_link, sub_path, link_name, 0, options); +} + +fn writeHeader( + w: *Writer, + typeflag: Header.FileType, + sub_path: []const u8, + link_name: []const u8, + size: u64, + options: Options, +) Error!void { + var header = Header.init(typeflag); + try w.setPath(&header, sub_path); + try header.setSize(size); + try header.setMtime(options.mtime); + if (options.mode != 0) + try header.setMode(options.mode); + if (typeflag == .symbolic_link) + header.setLinkname(link_name) catch |err| switch (err) { + error.NameTooLong => try w.writeExtendedHeader(.gnu_long_link, &.{link_name}), + else => return err, + }; + try header.write(w.underlying_writer); +} + +/// Writes path in posix header, if don't fit (in name+prefix; 100+155 +/// bytes) writes it in gnu extended header. +fn setPath(w: *Writer, header: *Header, sub_path: []const u8) Error!void { + header.setPath(w.prefix, sub_path) catch |err| switch (err) { + error.NameTooLong => { + // write extended header + const buffers: []const []const u8 = if (w.prefix.len == 0) + &.{sub_path} + else + &.{ w.prefix, "/", sub_path }; + try w.writeExtendedHeader(.gnu_long_name, buffers); + }, + else => return err, + }; +} + +/// Writes gnu extended header: gnu_long_name or gnu_long_link. +fn writeExtendedHeader(w: *Writer, typeflag: Header.FileType, buffers: []const []const u8) Error!void { + var len: usize = 0; + for (buffers) |buf| len += buf.len; + + var header: Header = .init(typeflag); + try header.setSize(len); + try header.write(w.underlying_writer); + for (buffers) |buf| + try w.underlying_writer.writeAll(buf); + try w.writePadding(len); +} + +fn writePadding(w: *Writer, bytes: usize) std.Io.Writer.Error!void { + return writePaddingPos(w, bytes % block_size); +} + +fn writePadding64(w: *Writer, bytes: u64) std.Io.Writer.Error!void { + return writePaddingPos(w, @intCast(bytes % block_size)); +} + +fn writePaddingPos(w: *Writer, pos: usize) std.Io.Writer.Error!void { + if (pos == 0) return; + try w.underlying_writer.splatByteAll(0, block_size - pos); +} + +/// According to the specification, tar should finish with two zero blocks, but +/// "reasonable system must not assume that such a block exists when reading an +/// archive". Therefore, the Zig standard library recommends to not call this +/// function. +pub fn finishPedantically(w: *Writer) std.Io.Writer.Error!void { + try w.underlying_writer.splatByteAll(0, block_size * 2); +} + +/// A struct that is exactly 512 bytes and matches tar file format. This is +/// intended to be used for outputting tar files; for parsing there is +/// `std.tar.Header`. +pub const Header = extern struct { + // This struct was originally copied from + // https://github.com/mattnite/tar/blob/main/src/main.zig which is MIT + // licensed. + // + // The name, linkname, magic, uname, and gname are null-terminated character + // strings. All other fields are zero-filled octal numbers in ASCII. Each + // numeric field of width w contains w minus 1 digits, and a null. + // Reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html + // POSIX header: byte offset + name: [100]u8 = [_]u8{0} ** 100, // 0 + mode: [7:0]u8 = default_mode.file, // 100 + uid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 108 + gid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 116 + size: [11:0]u8 = [_:0]u8{'0'} ** 11, // 124 + mtime: [11:0]u8 = [_:0]u8{'0'} ** 11, // 136 + checksum: [7:0]u8 = [_:0]u8{' '} ** 7, // 148 + typeflag: FileType = .regular, // 156 + linkname: [100]u8 = [_]u8{0} ** 100, // 157 + magic: [6]u8 = [_]u8{ 'u', 's', 't', 'a', 'r', 0 }, // 257 + version: [2]u8 = [_]u8{ '0', '0' }, // 263 + uname: [32]u8 = [_]u8{0} ** 32, // unused 265 + gname: [32]u8 = [_]u8{0} ** 32, // unused 297 + devmajor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 329 + devminor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 337 + prefix: [155]u8 = [_]u8{0} ** 155, // 345 + pad: [12]u8 = [_]u8{0} ** 12, // unused 500 + + pub const FileType = enum(u8) { + regular = '0', + symbolic_link = '2', + directory = '5', + gnu_long_name = 'L', + gnu_long_link = 'K', + }; + + const default_mode = struct { + const file = [_:0]u8{ '0', '0', '0', '0', '6', '6', '4' }; // 0o664 + const dir = [_:0]u8{ '0', '0', '0', '0', '7', '7', '5' }; // 0o775 + const sym_link = [_:0]u8{ '0', '0', '0', '0', '7', '7', '7' }; // 0o777 + const other = [_:0]u8{ '0', '0', '0', '0', '0', '0', '0' }; // 0o000 + }; + + pub fn init(typeflag: FileType) Header { + return .{ + .typeflag = typeflag, + .mode = switch (typeflag) { + .directory => default_mode.dir, + .symbolic_link => default_mode.sym_link, + .regular => default_mode.file, + else => default_mode.other, + }, + }; + } + + pub fn setSize(w: *Header, size: u64) error{OctalOverflow}!void { + try octal(&w.size, size); + } + + fn octal(buf: []u8, value: u64) error{OctalOverflow}!void { + var remainder: u64 = value; + var pos: usize = buf.len; + while (remainder > 0 and pos > 0) { + pos -= 1; + const c: u8 = @as(u8, @intCast(remainder % 8)) + '0'; + buf[pos] = c; + remainder /= 8; + if (pos == 0 and remainder > 0) return error.OctalOverflow; + } + } + + pub fn setMode(w: *Header, mode: u32) error{OctalOverflow}!void { + try octal(&w.mode, mode); + } + + // Integer number of seconds since January 1, 1970, 00:00 Coordinated Universal Time. + // mtime == 0 will use current time + pub fn setMtime(w: *Header, mtime: u64) error{OctalOverflow}!void { + try octal(&w.mtime, mtime); + } + + pub fn updateChecksum(w: *Header) !void { + var checksum: usize = ' '; // other 7 w.checksum bytes are initialized to ' ' + for (std.mem.asBytes(w)) |val| + checksum += val; + try octal(&w.checksum, checksum); + } + + pub fn write(h: *Header, bw: *std.Io.Writer) error{ OctalOverflow, WriteFailed }!void { + try h.updateChecksum(); + try bw.writeAll(std.mem.asBytes(h)); + } + + pub fn setLinkname(w: *Header, link: []const u8) !void { + if (link.len > w.linkname.len) return error.NameTooLong; + @memcpy(w.linkname[0..link.len], link); + } + + pub fn setPath(w: *Header, prefix: []const u8, sub_path: []const u8) !void { + const max_prefix = w.prefix.len; + const max_name = w.name.len; + const sep = std.fs.path.sep_posix; + + if (prefix.len + sub_path.len > max_name + max_prefix or prefix.len > max_prefix) + return error.NameTooLong; + + // both fit into name + if (prefix.len > 0 and prefix.len + sub_path.len < max_name) { + @memcpy(w.name[0..prefix.len], prefix); + w.name[prefix.len] = sep; + @memcpy(w.name[prefix.len + 1 ..][0..sub_path.len], sub_path); + return; + } + + // sub_path fits into name + // there is no prefix or prefix fits into prefix + if (sub_path.len <= max_name) { + @memcpy(w.name[0..sub_path.len], sub_path); + @memcpy(w.prefix[0..prefix.len], prefix); + return; + } + + if (prefix.len > 0) { + @memcpy(w.prefix[0..prefix.len], prefix); + w.prefix[prefix.len] = sep; + } + const prefix_pos = if (prefix.len > 0) prefix.len + 1 else 0; + + // add as much to prefix as you can, must split at / + const prefix_remaining = max_prefix - prefix_pos; + if (std.mem.lastIndexOf(u8, sub_path[0..@min(prefix_remaining, sub_path.len)], &.{'/'})) |sep_pos| { + @memcpy(w.prefix[prefix_pos..][0..sep_pos], sub_path[0..sep_pos]); + if ((sub_path.len - sep_pos - 1) > max_name) return error.NameTooLong; + @memcpy(w.name[0..][0 .. sub_path.len - sep_pos - 1], sub_path[sep_pos + 1 ..]); + return; + } + + return error.NameTooLong; + } + + comptime { + assert(@sizeOf(Header) == 512); + } + + test "setPath" { + const cases = [_]struct { + in: []const []const u8, + out: []const []const u8, + }{ + .{ + .in = &.{ "", "123456789" }, + .out = &.{ "", "123456789" }, + }, + // can fit into name + .{ + .in = &.{ "prefix", "sub_path" }, + .out = &.{ "", "prefix/sub_path" }, + }, + // no more both fits into name + .{ + .in = &.{ "prefix", "0123456789/" ** 8 ++ "basename" }, + .out = &.{ "prefix", "0123456789/" ** 8 ++ "basename" }, + }, + // put as much as you can into prefix the rest goes into name + .{ + .in = &.{ "prefix", "0123456789/" ** 10 ++ "basename" }, + .out = &.{ "prefix/" ++ "0123456789/" ** 9 ++ "0123456789", "basename" }, + }, + + .{ + .in = &.{ "prefix", "0123456789/" ** 15 ++ "basename" }, + .out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/0123456789/basename" }, + }, + .{ + .in = &.{ "prefix", "0123456789/" ** 21 ++ "basename" }, + .out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/" ** 8 ++ "basename" }, + }, + .{ + .in = &.{ "", "012345678/" ** 10 ++ "foo" }, + .out = &.{ "012345678/" ** 9 ++ "012345678", "foo" }, + }, + }; + + for (cases) |case| { + var header = Header.init(.regular); + try header.setPath(case.in[0], case.in[1]); + try testing.expectEqualStrings(case.out[0], std.mem.sliceTo(&header.prefix, 0)); + try testing.expectEqualStrings(case.out[1], std.mem.sliceTo(&header.name, 0)); + } + + const error_cases = [_]struct { + in: []const []const u8, + }{ + // basename can't fit into name (106 characters) + .{ .in = &.{ "zig", "test/cases/compile_errors/regression_test_2980_base_type_u32_is_not_type_checked_properly_when_assigning_a_value_within_a_struct.zig" } }, + // cant fit into 255 + sep + .{ .in = &.{ "prefix", "0123456789/" ** 22 ++ "basename" } }, + // can fit but sub_path can't be split (there is no separator) + .{ .in = &.{ "prefix", "0123456789" ** 10 ++ "a" } }, + .{ .in = &.{ "prefix", "0123456789" ** 14 ++ "basename" } }, + }; + + for (error_cases) |case| { + var header = Header.init(.regular); + try testing.expectError( + error.NameTooLong, + header.setPath(case.in[0], case.in[1]), + ); + } + } +}; + +test { + _ = Header; +} + +test "write files" { + const files = [_]struct { + path: []const u8, + content: []const u8, + }{ + .{ .path = "foo", .content = "bar" }, + .{ .path = "a12345678/" ** 10 ++ "foo", .content = "a" ** 511 }, + .{ .path = "b12345678/" ** 24 ++ "foo", .content = "b" ** 512 }, + .{ .path = "c12345678/" ** 25 ++ "foo", .content = "c" ** 513 }, + .{ .path = "d12345678/" ** 51 ++ "foo", .content = "d" ** 1025 }, + .{ .path = "e123456789" ** 11, .content = "e" }, + }; + + var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; + var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; + + // with root + { + const root = "root"; + + var output: std.Io.Writer.Allocating = .init(testing.allocator); + var w: Writer = .{ .underlying_writer = &output.writer }; + defer output.deinit(); + try w.setRoot(root); + for (files) |file| + try w.writeFileBytes(file.path, file.content, .{}); + + var input: std.Io.Reader = .fixed(output.getWritten()); + var it: std.tar.Iterator = .init(&input, .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }); + + // first entry is directory with prefix + { + const actual = (try it.next()).?; + try testing.expectEqualStrings(root, actual.name); + try testing.expectEqual(std.tar.FileKind.directory, actual.kind); + } + + var i: usize = 0; + while (try it.next()) |actual| { + defer i += 1; + const expected = files[i]; + try testing.expectEqualStrings(root, actual.name[0..root.len]); + try testing.expectEqual('/', actual.name[root.len..][0]); + try testing.expectEqualStrings(expected.path, actual.name[root.len + 1 ..]); + + var content: std.Io.Writer.Allocating = .init(testing.allocator); + defer content.deinit(); + try it.streamRemaining(actual, &content.writer); + try testing.expectEqualSlices(u8, expected.content, content.getWritten()); + } + } + // without root + { + var output: std.Io.Writer.Allocating = .init(testing.allocator); + var w: Writer = .{ .underlying_writer = &output.writer }; + defer output.deinit(); + for (files) |file| { + var content: std.Io.Reader = .fixed(file.content); + try w.writeFileStream(file.path, file.content.len, &content, .{}); + } + + var input: std.Io.Reader = .fixed(output.getWritten()); + var it: std.tar.Iterator = .init(&input, .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }); + + var i: usize = 0; + while (try it.next()) |actual| { + defer i += 1; + const expected = files[i]; + try testing.expectEqualStrings(expected.path, actual.name); + + var content: std.Io.Writer.Allocating = .init(testing.allocator); + defer content.deinit(); + try it.streamRemaining(actual, &content.writer); + try testing.expectEqualSlices(u8, expected.content, content.getWritten()); + } + try w.finishPedantically(); + } +} diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig index 3bcb5af90c..3356baacb5 100644 --- a/lib/std/tar/test.zig +++ b/lib/std/tar/test.zig @@ -18,31 +18,72 @@ const Case = struct { err: ?anyerror = null, // parsing should fail with this error }; -const cases = [_]Case{ - .{ - .data = @embedFile("testdata/gnu.tar"), - .files = &[_]Case.File{ - .{ - .name = "small.txt", - .size = 5, - .mode = 0o640, - }, - .{ - .name = "small2.txt", - .size = 11, - .mode = 0o640, - }, +const gnu_case: Case = .{ + .data = @embedFile("testdata/gnu.tar"), + .files = &[_]Case.File{ + .{ + .name = "small.txt", + .size = 5, + .mode = 0o640, }, - .chksums = &[_][]const u8{ - "e38b27eaccb4391bdec553a7f3ae6b2f", - "c65bd2e50a56a2138bf1716f2fd56fe9", + .{ + .name = "small2.txt", + .size = 11, + .mode = 0o640, + }, + }, + .chksums = &[_][]const u8{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, +}; + +const gnu_multi_headers_case: Case = .{ + .data = @embedFile("testdata/gnu-multi-hdrs.tar"), + .files = &[_]Case.File{ + .{ + .name = "GNU2/GNU2/long-path-name", + .link_name = "GNU4/GNU4/long-linkpath-name", + .kind = .sym_link, }, }, - .{ +}; + +const trailing_slash_case: Case = .{ + .data = @embedFile("testdata/trailing-slash.tar"), + .files = &[_]Case.File{ + .{ + .name = "123456789/" ** 30, + .kind = .directory, + }, + }, +}; + +const writer_big_long_case: Case = .{ + // Size in gnu extended format, and name in pax attribute. + .data = @embedFile("testdata/writer-big-long.tar"), + .files = &[_]Case.File{ + .{ + .name = "longname/" ** 15 ++ "16gig.txt", + .size = 16 * 1024 * 1024 * 1024, + .mode = 0o644, + .truncated = true, + }, + }, +}; + +const fuzz1_case: Case = .{ + .data = @embedFile("testdata/fuzz1.tar"), + .err = error.TarInsufficientBuffer, +}; + +test "run test cases" { + try testCase(gnu_case); + try testCase(.{ .data = @embedFile("testdata/sparse-formats.tar"), .err = error.TarUnsupportedHeader, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/star.tar"), .files = &[_]Case.File{ .{ @@ -60,8 +101,8 @@ const cases = [_]Case{ "e38b27eaccb4391bdec553a7f3ae6b2f", "c65bd2e50a56a2138bf1716f2fd56fe9", }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/v7.tar"), .files = &[_]Case.File{ .{ @@ -79,8 +120,8 @@ const cases = [_]Case{ "e38b27eaccb4391bdec553a7f3ae6b2f", "c65bd2e50a56a2138bf1716f2fd56fe9", }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/pax.tar"), .files = &[_]Case.File{ .{ @@ -99,13 +140,13 @@ const cases = [_]Case{ .chksums = &[_][]const u8{ "3c382e8f5b6631aa2db52643912ffd4a", }, - }, - .{ + }); + try testCase(.{ // pax attribute don't end with \n .data = @embedFile("testdata/pax-bad-hdr-file.tar"), .err = error.PaxInvalidAttributeEnd, - }, - .{ + }); + try testCase(.{ // size is in pax attribute .data = @embedFile("testdata/pax-pos-size-file.tar"), .files = &[_]Case.File{ @@ -119,8 +160,8 @@ const cases = [_]Case{ .chksums = &[_][]const u8{ "0afb597b283fe61b5d4879669a350556", }, - }, - .{ + }); + try testCase(.{ // has pax records which we are not interested in .data = @embedFile("testdata/pax-records.tar"), .files = &[_]Case.File{ @@ -128,8 +169,8 @@ const cases = [_]Case{ .name = "file", }, }, - }, - .{ + }); + try testCase(.{ // has global records which we are ignoring .data = @embedFile("testdata/pax-global-records.tar"), .files = &[_]Case.File{ @@ -146,8 +187,8 @@ const cases = [_]Case{ .name = "file4", }, }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/nil-uid.tar"), .files = &[_]Case.File{ .{ @@ -160,8 +201,8 @@ const cases = [_]Case{ .chksums = &[_][]const u8{ "08d504674115e77a67244beac19668f5", }, - }, - .{ + }); + try testCase(.{ // has xattrs and pax records which we are ignoring .data = @embedFile("testdata/xattrs.tar"), .files = &[_]Case.File{ @@ -182,23 +223,14 @@ const cases = [_]Case{ "e38b27eaccb4391bdec553a7f3ae6b2f", "c65bd2e50a56a2138bf1716f2fd56fe9", }, - }, - .{ - .data = @embedFile("testdata/gnu-multi-hdrs.tar"), - .files = &[_]Case.File{ - .{ - .name = "GNU2/GNU2/long-path-name", - .link_name = "GNU4/GNU4/long-linkpath-name", - .kind = .sym_link, - }, - }, - }, - .{ + }); + try testCase(gnu_multi_headers_case); + try testCase(.{ // has gnu type D (directory) and S (sparse) blocks .data = @embedFile("testdata/gnu-incremental.tar"), .err = error.TarUnsupportedHeader, - }, - .{ + }); + try testCase(.{ // should use values only from last pax header .data = @embedFile("testdata/pax-multi-hdrs.tar"), .files = &[_]Case.File{ @@ -208,8 +240,8 @@ const cases = [_]Case{ .kind = .sym_link, }, }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/gnu-long-nul.tar"), .files = &[_]Case.File{ .{ @@ -217,8 +249,8 @@ const cases = [_]Case{ .mode = 0o644, }, }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/gnu-utf8.tar"), .files = &[_]Case.File{ .{ @@ -226,8 +258,8 @@ const cases = [_]Case{ .mode = 0o644, }, }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/gnu-not-utf8.tar"), .files = &[_]Case.File{ .{ @@ -235,33 +267,33 @@ const cases = [_]Case{ .mode = 0o644, }, }, - }, - .{ + }); + try testCase(.{ // null in pax key .data = @embedFile("testdata/pax-nul-xattrs.tar"), .err = error.PaxNullInKeyword, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/pax-nul-path.tar"), .err = error.PaxNullInValue, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/neg-size.tar"), .err = error.TarHeader, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/issue10968.tar"), .err = error.TarHeader, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/issue11169.tar"), .err = error.TarHeader, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/issue12435.tar"), .err = error.TarHeaderChksum, - }, - .{ + }); + try testCase(.{ // has magic with space at end instead of null .data = @embedFile("testdata/invalid-go17.tar"), .files = &[_]Case.File{ @@ -269,8 +301,8 @@ const cases = [_]Case{ .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo", }, }, - }, - .{ + }); + try testCase(.{ .data = @embedFile("testdata/ustar-file-devs.tar"), .files = &[_]Case.File{ .{ @@ -278,17 +310,9 @@ const cases = [_]Case{ .mode = 0o644, }, }, - }, - .{ - .data = @embedFile("testdata/trailing-slash.tar"), - .files = &[_]Case.File{ - .{ - .name = "123456789/" ** 30, - .kind = .directory, - }, - }, - }, - .{ + }); + try testCase(trailing_slash_case); + try testCase(.{ // Has size in gnu extended format. To represent size bigger than 8 GB. .data = @embedFile("testdata/writer-big.tar"), .files = &[_]Case.File{ @@ -299,120 +323,92 @@ const cases = [_]Case{ .mode = 0o640, }, }, - }, - .{ - // Size in gnu extended format, and name in pax attribute. - .data = @embedFile("testdata/writer-big-long.tar"), - .files = &[_]Case.File{ - .{ - .name = "longname/" ** 15 ++ "16gig.txt", - .size = 16 * 1024 * 1024 * 1024, - .mode = 0o644, - .truncated = true, - }, - }, - }, - .{ - .data = @embedFile("testdata/fuzz1.tar"), - .err = error.TarInsufficientBuffer, - }, - .{ + }); + try testCase(writer_big_long_case); + try testCase(fuzz1_case); + try testCase(.{ .data = @embedFile("testdata/fuzz2.tar"), .err = error.PaxSizeAttrOverflow, - }, -}; - -// used in test to calculate file chksum -const Md5Writer = struct { - h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}), - - pub fn writeAll(self: *Md5Writer, buf: []const u8) !void { - self.h.update(buf); - } - - pub fn writeByte(self: *Md5Writer, byte: u8) !void { - self.h.update(&[_]u8{byte}); - } - - pub fn chksum(self: *Md5Writer) [32]u8 { - var s = [_]u8{0} ** 16; - self.h.final(&s); - return std.fmt.bytesToHex(s, .lower); - } -}; + }); +} -test "run test cases" { +fn testCase(case: Case) !void { var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; - for (cases) |case| { - var fsb = std.io.fixedBufferStream(case.data); - var iter = tar.iterator(fsb.reader(), .{ - .file_name_buffer = &file_name_buffer, - .link_name_buffer = &link_name_buffer, - }); - var i: usize = 0; - while (iter.next() catch |err| { - if (case.err) |e| { - try testing.expectEqual(e, err); - continue; - } else { - return err; - } - }) |actual| : (i += 1) { - const expected = case.files[i]; - try testing.expectEqualStrings(expected.name, actual.name); - try testing.expectEqual(expected.size, actual.size); - try testing.expectEqual(expected.kind, actual.kind); - try testing.expectEqual(expected.mode, actual.mode); - try testing.expectEqualStrings(expected.link_name, actual.link_name); + var br: std.io.Reader = .fixed(case.data); + var it: tar.Iterator = .init(&br, .{ + .file_name_buffer = &file_name_buffer, + .link_name_buffer = &link_name_buffer, + }); + var i: usize = 0; + while (it.next() catch |err| { + if (case.err) |e| { + try testing.expectEqual(e, err); + return; + } else { + return err; + } + }) |actual| : (i += 1) { + const expected = case.files[i]; + try testing.expectEqualStrings(expected.name, actual.name); + try testing.expectEqual(expected.size, actual.size); + try testing.expectEqual(expected.kind, actual.kind); + try testing.expectEqual(expected.mode, actual.mode); + try testing.expectEqualStrings(expected.link_name, actual.link_name); - if (case.chksums.len > i) { - var md5writer = Md5Writer{}; - try actual.writeAll(&md5writer); - const chksum = md5writer.chksum(); - try testing.expectEqualStrings(case.chksums[i], &chksum); - } else { - if (expected.truncated) { - iter.unread_file_bytes = 0; - } + if (case.chksums.len > i) { + var aw: std.Io.Writer.Allocating = .init(std.testing.allocator); + defer aw.deinit(); + try it.streamRemaining(actual, &aw.writer); + const chksum = std.fmt.bytesToHex(std.crypto.hash.Md5.hashResult(aw.getWritten()), .lower); + try testing.expectEqualStrings(case.chksums[i], &chksum); + } else { + if (expected.truncated) { + it.unread_file_bytes = 0; } } - try testing.expectEqual(case.files.len, i); } + try testing.expectEqual(case.files.len, i); } test "pax/gnu long names with small buffer" { + try testLongNameCase(gnu_multi_headers_case); + try testLongNameCase(trailing_slash_case); + try testLongNameCase(.{ + .data = @embedFile("testdata/fuzz1.tar"), + .err = error.TarInsufficientBuffer, + }); +} + +fn testLongNameCase(case: Case) !void { // should fail with insufficient buffer error var min_file_name_buffer: [256]u8 = undefined; var min_link_name_buffer: [100]u8 = undefined; - const long_name_cases = [_]Case{ cases[11], cases[25], cases[28] }; - for (long_name_cases) |case| { - var fsb = std.io.fixedBufferStream(case.data); - var iter = tar.iterator(fsb.reader(), .{ - .file_name_buffer = &min_file_name_buffer, - .link_name_buffer = &min_link_name_buffer, - }); + var br: std.io.Reader = .fixed(case.data); + var iter: tar.Iterator = .init(&br, .{ + .file_name_buffer = &min_file_name_buffer, + .link_name_buffer = &min_link_name_buffer, + }); - var iter_err: ?anyerror = null; - while (iter.next() catch |err| brk: { - iter_err = err; - break :brk null; - }) |_| {} + var iter_err: ?anyerror = null; + while (iter.next() catch |err| brk: { + iter_err = err; + break :brk null; + }) |_| {} - try testing.expect(iter_err != null); - try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?); - } + try testing.expect(iter_err != null); + try testing.expectEqual(error.TarInsufficientBuffer, iter_err.?); } test "insufficient buffer in Header name filed" { var min_file_name_buffer: [9]u8 = undefined; var min_link_name_buffer: [100]u8 = undefined; - var fsb = std.io.fixedBufferStream(cases[0].data); - var iter = tar.iterator(fsb.reader(), .{ + var br: std.io.Reader = .fixed(gnu_case.data); + var iter: tar.Iterator = .init(&br, .{ .file_name_buffer = &min_file_name_buffer, .link_name_buffer = &min_link_name_buffer, }); @@ -466,21 +462,21 @@ test "should not overwrite existing file" { // This ensures that file is not overwritten. // const data = @embedFile("testdata/overwrite_file.tar"); - var fsb = std.io.fixedBufferStream(data); + var r: std.io.Reader = .fixed(data); // Unpack with strip_components = 1 should fail var root = std.testing.tmpDir(.{}); defer root.cleanup(); try testing.expectError( error.PathAlreadyExists, - tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }), + tar.pipeToFileSystem(root.dir, &r, .{ .mode_mode = .ignore, .strip_components = 1 }), ); // Unpack with strip_components = 0 should pass - fsb.reset(); + r = .fixed(data); var root2 = std.testing.tmpDir(.{}); defer root2.cleanup(); - try tar.pipeToFileSystem(root2.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 0 }); + try tar.pipeToFileSystem(root2.dir, &r, .{ .mode_mode = .ignore, .strip_components = 0 }); } test "case sensitivity" { @@ -494,12 +490,12 @@ test "case sensitivity" { // 18089/alacritty/Darkermatrix.yml // const data = @embedFile("testdata/18089.tar"); - var fsb = std.io.fixedBufferStream(data); + var r: std.io.Reader = .fixed(data); var root = std.testing.tmpDir(.{}); defer root.cleanup(); - tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }) catch |err| { + tar.pipeToFileSystem(root.dir, &r, .{ .mode_mode = .ignore, .strip_components = 1 }) catch |err| { // on case insensitive fs we fail on overwrite existing file try testing.expectEqual(error.PathAlreadyExists, err); return; diff --git a/lib/std/tar/writer.zig b/lib/std/tar/writer.zig deleted file mode 100644 index 4ced287eec..0000000000 --- a/lib/std/tar/writer.zig +++ /dev/null @@ -1,497 +0,0 @@ -const std = @import("std"); -const assert = std.debug.assert; -const testing = std.testing; - -/// Creates tar Writer which will write tar content to the `underlying_writer`. -/// Use setRoot to nest all following entries under single root. If file don't -/// fit into posix header (name+prefix: 100+155 bytes) gnu extented header will -/// be used for long names. Options enables setting file premission mode and -/// mtime. Default is to use current time for mtime and 0o664 for file mode. -pub fn writer(underlying_writer: anytype) Writer(@TypeOf(underlying_writer)) { - return .{ .underlying_writer = underlying_writer }; -} - -pub fn Writer(comptime WriterType: type) type { - return struct { - const block_size = @sizeOf(Header); - const empty_block: [block_size]u8 = [_]u8{0} ** block_size; - - /// Options for writing file/dir/link. If left empty 0o664 is used for - /// file mode and current time for mtime. - pub const Options = struct { - /// File system permission mode. - mode: u32 = 0, - /// File system modification time. - mtime: u64 = 0, - }; - const Self = @This(); - - underlying_writer: WriterType, - prefix: []const u8 = "", - mtime_now: u64 = 0, - - /// Sets prefix for all other write* method paths. - pub fn setRoot(self: *Self, root: []const u8) !void { - if (root.len > 0) - try self.writeDir(root, .{}); - - self.prefix = root; - } - - /// Writes directory. - pub fn writeDir(self: *Self, sub_path: []const u8, opt: Options) !void { - try self.writeHeader(.directory, sub_path, "", 0, opt); - } - - /// Writes file system file. - pub fn writeFile(self: *Self, sub_path: []const u8, file: std.fs.File) !void { - const stat = try file.stat(); - const mtime: u64 = @intCast(@divFloor(stat.mtime, std.time.ns_per_s)); - - var header = Header{}; - try self.setPath(&header, sub_path); - try header.setSize(stat.size); - try header.setMtime(mtime); - try header.write(self.underlying_writer); - - try self.underlying_writer.writeFile(file); - try self.writePadding(stat.size); - } - - /// Writes file reading file content from `reader`. Number of bytes in - /// reader must be equal to `size`. - pub fn writeFileStream(self: *Self, sub_path: []const u8, size: usize, reader: anytype, opt: Options) !void { - try self.writeHeader(.regular, sub_path, "", @intCast(size), opt); - - var counting_reader = std.io.countingReader(reader); - var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); - try fifo.pump(counting_reader.reader(), self.underlying_writer); - if (counting_reader.bytes_read != size) return error.WrongReaderSize; - try self.writePadding(size); - } - - /// Writes file using bytes buffer `content` for size and file content. - pub fn writeFileBytes(self: *Self, sub_path: []const u8, content: []const u8, opt: Options) !void { - try self.writeHeader(.regular, sub_path, "", @intCast(content.len), opt); - try self.underlying_writer.writeAll(content); - try self.writePadding(content.len); - } - - /// Writes symlink. - pub fn writeLink(self: *Self, sub_path: []const u8, link_name: []const u8, opt: Options) !void { - try self.writeHeader(.symbolic_link, sub_path, link_name, 0, opt); - } - - /// Writes fs.Dir.WalkerEntry. Uses `mtime` from file system entry and - /// default for entry mode . - pub fn writeEntry(self: *Self, entry: std.fs.Dir.Walker.Entry) !void { - switch (entry.kind) { - .directory => { - try self.writeDir(entry.path, .{ .mtime = try entryMtime(entry) }); - }, - .file => { - var file = try entry.dir.openFile(entry.basename, .{}); - defer file.close(); - try self.writeFile(entry.path, file); - }, - .sym_link => { - var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; - const link_name = try entry.dir.readLink(entry.basename, &link_name_buffer); - try self.writeLink(entry.path, link_name, .{ .mtime = try entryMtime(entry) }); - }, - else => { - return error.UnsupportedWalkerEntryKind; - }, - } - } - - fn writeHeader( - self: *Self, - typeflag: Header.FileType, - sub_path: []const u8, - link_name: []const u8, - size: u64, - opt: Options, - ) !void { - var header = Header.init(typeflag); - try self.setPath(&header, sub_path); - try header.setSize(size); - try header.setMtime(if (opt.mtime != 0) opt.mtime else self.mtimeNow()); - if (opt.mode != 0) - try header.setMode(opt.mode); - if (typeflag == .symbolic_link) - header.setLinkname(link_name) catch |err| switch (err) { - error.NameTooLong => try self.writeExtendedHeader(.gnu_long_link, &.{link_name}), - else => return err, - }; - try header.write(self.underlying_writer); - } - - fn mtimeNow(self: *Self) u64 { - if (self.mtime_now == 0) - self.mtime_now = @intCast(std.time.timestamp()); - return self.mtime_now; - } - - fn entryMtime(entry: std.fs.Dir.Walker.Entry) !u64 { - const stat = try entry.dir.statFile(entry.basename); - return @intCast(@divFloor(stat.mtime, std.time.ns_per_s)); - } - - /// Writes path in posix header, if don't fit (in name+prefix; 100+155 - /// bytes) writes it in gnu extended header. - fn setPath(self: *Self, header: *Header, sub_path: []const u8) !void { - header.setPath(self.prefix, sub_path) catch |err| switch (err) { - error.NameTooLong => { - // write extended header - const buffers: []const []const u8 = if (self.prefix.len == 0) - &.{sub_path} - else - &.{ self.prefix, "/", sub_path }; - try self.writeExtendedHeader(.gnu_long_name, buffers); - }, - else => return err, - }; - } - - /// Writes gnu extended header: gnu_long_name or gnu_long_link. - fn writeExtendedHeader(self: *Self, typeflag: Header.FileType, buffers: []const []const u8) !void { - var len: usize = 0; - for (buffers) |buf| - len += buf.len; - - var header = Header.init(typeflag); - try header.setSize(len); - try header.write(self.underlying_writer); - for (buffers) |buf| - try self.underlying_writer.writeAll(buf); - try self.writePadding(len); - } - - fn writePadding(self: *Self, bytes: u64) !void { - const pos: usize = @intCast(bytes % block_size); - if (pos == 0) return; - try self.underlying_writer.writeAll(empty_block[pos..]); - } - - /// Tar should finish with two zero blocks, but 'reasonable system must - /// not assume that such a block exists when reading an archive' (from - /// reference). In practice it is safe to skip this finish. - pub fn finish(self: *Self) !void { - try self.underlying_writer.writeAll(&empty_block); - try self.underlying_writer.writeAll(&empty_block); - } - }; -} - -/// A struct that is exactly 512 bytes and matches tar file format. This is -/// intended to be used for outputting tar files; for parsing there is -/// `std.tar.Header`. -const Header = extern struct { - // This struct was originally copied from - // https://github.com/mattnite/tar/blob/main/src/main.zig which is MIT - // licensed. - // - // The name, linkname, magic, uname, and gname are null-terminated character - // strings. All other fields are zero-filled octal numbers in ASCII. Each - // numeric field of width w contains w minus 1 digits, and a null. - // Reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html - // POSIX header: byte offset - name: [100]u8 = [_]u8{0} ** 100, // 0 - mode: [7:0]u8 = default_mode.file, // 100 - uid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 108 - gid: [7:0]u8 = [_:0]u8{0} ** 7, // unused 116 - size: [11:0]u8 = [_:0]u8{'0'} ** 11, // 124 - mtime: [11:0]u8 = [_:0]u8{'0'} ** 11, // 136 - checksum: [7:0]u8 = [_:0]u8{' '} ** 7, // 148 - typeflag: FileType = .regular, // 156 - linkname: [100]u8 = [_]u8{0} ** 100, // 157 - magic: [6]u8 = [_]u8{ 'u', 's', 't', 'a', 'r', 0 }, // 257 - version: [2]u8 = [_]u8{ '0', '0' }, // 263 - uname: [32]u8 = [_]u8{0} ** 32, // unused 265 - gname: [32]u8 = [_]u8{0} ** 32, // unused 297 - devmajor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 329 - devminor: [7:0]u8 = [_:0]u8{0} ** 7, // unused 337 - prefix: [155]u8 = [_]u8{0} ** 155, // 345 - pad: [12]u8 = [_]u8{0} ** 12, // unused 500 - - pub const FileType = enum(u8) { - regular = '0', - symbolic_link = '2', - directory = '5', - gnu_long_name = 'L', - gnu_long_link = 'K', - }; - - const default_mode = struct { - const file = [_:0]u8{ '0', '0', '0', '0', '6', '6', '4' }; // 0o664 - const dir = [_:0]u8{ '0', '0', '0', '0', '7', '7', '5' }; // 0o775 - const sym_link = [_:0]u8{ '0', '0', '0', '0', '7', '7', '7' }; // 0o777 - const other = [_:0]u8{ '0', '0', '0', '0', '0', '0', '0' }; // 0o000 - }; - - pub fn init(typeflag: FileType) Header { - return .{ - .typeflag = typeflag, - .mode = switch (typeflag) { - .directory => default_mode.dir, - .symbolic_link => default_mode.sym_link, - .regular => default_mode.file, - else => default_mode.other, - }, - }; - } - - pub fn setSize(self: *Header, size: u64) !void { - try octal(&self.size, size); - } - - fn octal(buf: []u8, value: u64) !void { - var remainder: u64 = value; - var pos: usize = buf.len; - while (remainder > 0 and pos > 0) { - pos -= 1; - const c: u8 = @as(u8, @intCast(remainder % 8)) + '0'; - buf[pos] = c; - remainder /= 8; - if (pos == 0 and remainder > 0) return error.OctalOverflow; - } - } - - pub fn setMode(self: *Header, mode: u32) !void { - try octal(&self.mode, mode); - } - - // Integer number of seconds since January 1, 1970, 00:00 Coordinated Universal Time. - // mtime == 0 will use current time - pub fn setMtime(self: *Header, mtime: u64) !void { - try octal(&self.mtime, mtime); - } - - pub fn updateChecksum(self: *Header) !void { - var checksum: usize = ' '; // other 7 self.checksum bytes are initialized to ' ' - for (std.mem.asBytes(self)) |val| - checksum += val; - try octal(&self.checksum, checksum); - } - - pub fn write(self: *Header, output_writer: anytype) !void { - try self.updateChecksum(); - try output_writer.writeAll(std.mem.asBytes(self)); - } - - pub fn setLinkname(self: *Header, link: []const u8) !void { - if (link.len > self.linkname.len) return error.NameTooLong; - @memcpy(self.linkname[0..link.len], link); - } - - pub fn setPath(self: *Header, prefix: []const u8, sub_path: []const u8) !void { - const max_prefix = self.prefix.len; - const max_name = self.name.len; - const sep = std.fs.path.sep_posix; - - if (prefix.len + sub_path.len > max_name + max_prefix or prefix.len > max_prefix) - return error.NameTooLong; - - // both fit into name - if (prefix.len > 0 and prefix.len + sub_path.len < max_name) { - @memcpy(self.name[0..prefix.len], prefix); - self.name[prefix.len] = sep; - @memcpy(self.name[prefix.len + 1 ..][0..sub_path.len], sub_path); - return; - } - - // sub_path fits into name - // there is no prefix or prefix fits into prefix - if (sub_path.len <= max_name) { - @memcpy(self.name[0..sub_path.len], sub_path); - @memcpy(self.prefix[0..prefix.len], prefix); - return; - } - - if (prefix.len > 0) { - @memcpy(self.prefix[0..prefix.len], prefix); - self.prefix[prefix.len] = sep; - } - const prefix_pos = if (prefix.len > 0) prefix.len + 1 else 0; - - // add as much to prefix as you can, must split at / - const prefix_remaining = max_prefix - prefix_pos; - if (std.mem.lastIndexOf(u8, sub_path[0..@min(prefix_remaining, sub_path.len)], &.{'/'})) |sep_pos| { - @memcpy(self.prefix[prefix_pos..][0..sep_pos], sub_path[0..sep_pos]); - if ((sub_path.len - sep_pos - 1) > max_name) return error.NameTooLong; - @memcpy(self.name[0..][0 .. sub_path.len - sep_pos - 1], sub_path[sep_pos + 1 ..]); - return; - } - - return error.NameTooLong; - } - - comptime { - assert(@sizeOf(Header) == 512); - } - - test setPath { - const cases = [_]struct { - in: []const []const u8, - out: []const []const u8, - }{ - .{ - .in = &.{ "", "123456789" }, - .out = &.{ "", "123456789" }, - }, - // can fit into name - .{ - .in = &.{ "prefix", "sub_path" }, - .out = &.{ "", "prefix/sub_path" }, - }, - // no more both fits into name - .{ - .in = &.{ "prefix", "0123456789/" ** 8 ++ "basename" }, - .out = &.{ "prefix", "0123456789/" ** 8 ++ "basename" }, - }, - // put as much as you can into prefix the rest goes into name - .{ - .in = &.{ "prefix", "0123456789/" ** 10 ++ "basename" }, - .out = &.{ "prefix/" ++ "0123456789/" ** 9 ++ "0123456789", "basename" }, - }, - - .{ - .in = &.{ "prefix", "0123456789/" ** 15 ++ "basename" }, - .out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/0123456789/basename" }, - }, - .{ - .in = &.{ "prefix", "0123456789/" ** 21 ++ "basename" }, - .out = &.{ "prefix/" ++ "0123456789/" ** 12 ++ "0123456789", "0123456789/" ** 8 ++ "basename" }, - }, - .{ - .in = &.{ "", "012345678/" ** 10 ++ "foo" }, - .out = &.{ "012345678/" ** 9 ++ "012345678", "foo" }, - }, - }; - - for (cases) |case| { - var header = Header.init(.regular); - try header.setPath(case.in[0], case.in[1]); - try testing.expectEqualStrings(case.out[0], str(&header.prefix)); - try testing.expectEqualStrings(case.out[1], str(&header.name)); - } - - const error_cases = [_]struct { - in: []const []const u8, - }{ - // basename can't fit into name (106 characters) - .{ .in = &.{ "zig", "test/cases/compile_errors/regression_test_2980_base_type_u32_is_not_type_checked_properly_when_assigning_a_value_within_a_struct.zig" } }, - // cant fit into 255 + sep - .{ .in = &.{ "prefix", "0123456789/" ** 22 ++ "basename" } }, - // can fit but sub_path can't be split (there is no separator) - .{ .in = &.{ "prefix", "0123456789" ** 10 ++ "a" } }, - .{ .in = &.{ "prefix", "0123456789" ** 14 ++ "basename" } }, - }; - - for (error_cases) |case| { - var header = Header.init(.regular); - try testing.expectError( - error.NameTooLong, - header.setPath(case.in[0], case.in[1]), - ); - } - } - - // Breaks string on first null character. - fn str(s: []const u8) []const u8 { - for (s, 0..) |c, i| { - if (c == 0) return s[0..i]; - } - return s; - } -}; - -test { - _ = Header; -} - -test "write files" { - const files = [_]struct { - path: []const u8, - content: []const u8, - }{ - .{ .path = "foo", .content = "bar" }, - .{ .path = "a12345678/" ** 10 ++ "foo", .content = "a" ** 511 }, - .{ .path = "b12345678/" ** 24 ++ "foo", .content = "b" ** 512 }, - .{ .path = "c12345678/" ** 25 ++ "foo", .content = "c" ** 513 }, - .{ .path = "d12345678/" ** 51 ++ "foo", .content = "d" ** 1025 }, - .{ .path = "e123456789" ** 11, .content = "e" }, - }; - - var file_name_buffer: [std.fs.max_path_bytes]u8 = undefined; - var link_name_buffer: [std.fs.max_path_bytes]u8 = undefined; - - // with root - { - const root = "root"; - - var output = std.ArrayList(u8).init(testing.allocator); - defer output.deinit(); - var wrt = writer(output.writer()); - try wrt.setRoot(root); - for (files) |file| - try wrt.writeFileBytes(file.path, file.content, .{}); - - var input = std.io.fixedBufferStream(output.items); - var iter = std.tar.iterator( - input.reader(), - .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer }, - ); - - // first entry is directory with prefix - { - const actual = (try iter.next()).?; - try testing.expectEqualStrings(root, actual.name); - try testing.expectEqual(std.tar.FileKind.directory, actual.kind); - } - - var i: usize = 0; - while (try iter.next()) |actual| { - defer i += 1; - const expected = files[i]; - try testing.expectEqualStrings(root, actual.name[0..root.len]); - try testing.expectEqual('/', actual.name[root.len..][0]); - try testing.expectEqualStrings(expected.path, actual.name[root.len + 1 ..]); - - var content = std.ArrayList(u8).init(testing.allocator); - defer content.deinit(); - try actual.writeAll(content.writer()); - try testing.expectEqualSlices(u8, expected.content, content.items); - } - } - // without root - { - var output = std.ArrayList(u8).init(testing.allocator); - defer output.deinit(); - var wrt = writer(output.writer()); - for (files) |file| { - var content = std.io.fixedBufferStream(file.content); - try wrt.writeFileStream(file.path, file.content.len, content.reader(), .{}); - } - - var input = std.io.fixedBufferStream(output.items); - var iter = std.tar.iterator( - input.reader(), - .{ .file_name_buffer = &file_name_buffer, .link_name_buffer = &link_name_buffer }, - ); - - var i: usize = 0; - while (try iter.next()) |actual| { - defer i += 1; - const expected = files[i]; - try testing.expectEqualStrings(expected.path, actual.name); - - var content = std.ArrayList(u8).init(testing.allocator); - defer content.deinit(); - try actual.writeAll(content.writer()); - try testing.expectEqualSlices(u8, expected.content, content.items); - } - try wrt.finish(); - } -} diff --git a/lib/std/testing.zig b/lib/std/testing.zig index f9027a4f47..e80e961b13 100644 --- a/lib/std/testing.zig +++ b/lib/std/testing.zig @@ -33,6 +33,7 @@ pub var log_level = std.log.Level.warn; // Disable printing in tests for simple backends. pub const backend_can_print = switch (builtin.zig_backend) { + .stage2_aarch64, .stage2_powerpc, .stage2_riscv64, .stage2_spirv, diff --git a/lib/std/zig.zig b/lib/std/zig.zig index 486947768d..a692a63795 100644 --- a/lib/std/zig.zig +++ b/lib/std/zig.zig @@ -321,6 +321,27 @@ pub const BuildId = union(enum) { try std.testing.expectError(error.InvalidCharacter, parse("0xfoobbb")); try std.testing.expectError(error.InvalidBuildIdStyle, parse("yaddaxxx")); } + + pub fn format(id: BuildId, writer: *std.io.Writer) std.io.Writer.Error!void { + switch (id) { + .none, .fast, .uuid, .sha1, .md5 => { + try writer.writeAll(@tagName(id)); + }, + .hexstring => |hs| { + try writer.print("0x{x}", .{hs.toSlice()}); + }, + } + } + + test format { + try std.testing.expectFmt("none", "{f}", .{@as(BuildId, .none)}); + try std.testing.expectFmt("fast", "{f}", .{@as(BuildId, .fast)}); + try std.testing.expectFmt("uuid", "{f}", .{@as(BuildId, .uuid)}); + try std.testing.expectFmt("sha1", "{f}", .{@as(BuildId, .sha1)}); + try std.testing.expectFmt("md5", "{f}", .{@as(BuildId, .md5)}); + try std.testing.expectFmt("0x", "{f}", .{BuildId.initHexString("")}); + try std.testing.expectFmt("0x1234cdef", "{f}", .{BuildId.initHexString("\x12\x34\xcd\xef")}); + } }; pub const LtoMode = enum { none, full, thin }; @@ -364,23 +385,23 @@ pub fn serializeCpuAlloc(ally: Allocator, cpu: std.Target.Cpu) Allocator.Error![ /// Return a Formatter for a Zig identifier, escaping it with `@""` syntax if needed. /// /// See also `fmtIdFlags`. -pub fn fmtId(bytes: []const u8) std.fmt.Formatter(FormatId, FormatId.render) { - return .{ .data = .{ .bytes = bytes, .flags = .{} } }; +pub fn fmtId(bytes: []const u8) FormatId { + return .{ .bytes = bytes, .flags = .{} }; } /// Return a Formatter for a Zig identifier, escaping it with `@""` syntax if needed. /// /// See also `fmtId`. -pub fn fmtIdFlags(bytes: []const u8, flags: FormatId.Flags) std.fmt.Formatter(FormatId, FormatId.render) { - return .{ .data = .{ .bytes = bytes, .flags = flags } }; +pub fn fmtIdFlags(bytes: []const u8, flags: FormatId.Flags) FormatId { + return .{ .bytes = bytes, .flags = flags }; } -pub fn fmtIdPU(bytes: []const u8) std.fmt.Formatter(FormatId, FormatId.render) { - return .{ .data = .{ .bytes = bytes, .flags = .{ .allow_primitive = true, .allow_underscore = true } } }; +pub fn fmtIdPU(bytes: []const u8) FormatId { + return .{ .bytes = bytes, .flags = .{ .allow_primitive = true, .allow_underscore = true } }; } -pub fn fmtIdP(bytes: []const u8) std.fmt.Formatter(FormatId, FormatId.render) { - return .{ .data = .{ .bytes = bytes, .flags = .{ .allow_primitive = true } } }; +pub fn fmtIdP(bytes: []const u8) FormatId { + return .{ .bytes = bytes, .flags = .{ .allow_primitive = true } }; } test fmtId { @@ -426,7 +447,7 @@ pub const FormatId = struct { }; /// Print the string as a Zig identifier, escaping it with `@""` syntax if needed. - fn render(ctx: FormatId, writer: *Writer) Writer.Error!void { + pub fn format(ctx: FormatId, writer: *Writer) Writer.Error!void { const bytes = ctx.bytes; if (isValidId(bytes) and (ctx.flags.allow_primitive or !std.zig.isPrimitive(bytes)) and |
