diff options
Diffstat (limited to 'lib/std')
48 files changed, 4732 insertions, 1141 deletions
diff --git a/lib/std/Build.zig b/lib/std/Build.zig index 15c1647957..a375b45454 100644 --- a/lib/std/Build.zig +++ b/lib/std/Build.zig @@ -19,6 +19,8 @@ const NativeTargetInfo = std.zig.system.NativeTargetInfo; const Sha256 = std.crypto.hash.sha2.Sha256; const Build = @This(); +pub const Cache = @import("Build/Cache.zig"); + /// deprecated: use `CompileStep`. pub const LibExeObjStep = CompileStep; /// deprecated: use `Build`. @@ -77,11 +79,12 @@ search_prefixes: ArrayList([]const u8), libc_file: ?[]const u8 = null, installed_files: ArrayList(InstalledFile), /// Path to the directory containing build.zig. -build_root: []const u8, -cache_root: []const u8, -global_cache_root: []const u8, -/// zig lib dir -override_lib_dir: ?[]const u8, +build_root: Cache.Directory, +cache_root: Cache.Directory, +global_cache_root: Cache.Directory, +cache: *Cache, +/// If non-null, overrides the default zig lib dir. +zig_lib_dir: ?[]const u8, vcpkg_root: VcpkgRoot = .unattempted, pkg_config_pkg_list: ?(PkgConfigError![]const PkgConfigPkg) = null, args: ?[][]const u8 = null, @@ -185,10 +188,11 @@ pub const DirList = struct { pub fn create( allocator: Allocator, zig_exe: []const u8, - build_root: []const u8, - cache_root: []const u8, - global_cache_root: []const u8, + build_root: Cache.Directory, + cache_root: Cache.Directory, + global_cache_root: Cache.Directory, host: NativeTargetInfo, + cache: *Cache, ) !*Build { const env_map = try allocator.create(EnvMap); env_map.* = try process.getEnvMap(allocator); @@ -197,8 +201,9 @@ pub fn create( self.* = Build{ .zig_exe = zig_exe, .build_root = build_root, - .cache_root = try fs.path.relative(allocator, build_root, cache_root), + .cache_root = cache_root, .global_cache_root = global_cache_root, + .cache = cache, .verbose = false, .verbose_link = false, .verbose_cc = false, @@ -230,7 +235,7 @@ pub fn create( .step = Step.init(.top_level, "uninstall", allocator, makeUninstall), .description = "Remove build artifacts from prefix path", }, - .override_lib_dir = null, + .zig_lib_dir = null, .install_path = undefined, .args = null, .host = host, @@ -245,7 +250,7 @@ pub fn create( fn createChild( parent: *Build, dep_name: []const u8, - build_root: []const u8, + build_root: Cache.Directory, args: anytype, ) !*Build { const child = try createChildOnly(parent, dep_name, build_root); @@ -253,7 +258,7 @@ fn createChild( return child; } -fn createChildOnly(parent: *Build, dep_name: []const u8, build_root: []const u8) !*Build { +fn createChildOnly(parent: *Build, dep_name: []const u8, build_root: Cache.Directory) !*Build { const allocator = parent.allocator; const child = try allocator.create(Build); child.* = .{ @@ -297,7 +302,8 @@ fn createChildOnly(parent: *Build, dep_name: []const u8, build_root: []const u8) .build_root = build_root, .cache_root = parent.cache_root, .global_cache_root = parent.global_cache_root, - .override_lib_dir = parent.override_lib_dir, + .cache = parent.cache, + .zig_lib_dir = parent.zig_lib_dir, .debug_log_scopes = parent.debug_log_scopes, .debug_compile_errors = parent.debug_compile_errors, .enable_darling = parent.enable_darling, @@ -348,7 +354,7 @@ fn applyArgs(b: *Build, args: anytype) !void { .used = false, }); }, - .Enum => { + .Enum, .EnumLiteral => { try b.user_input_options.put(field.name, .{ .name = field.name, .value = .{ .scalar = @tagName(v) }, @@ -379,7 +385,7 @@ fn applyArgs(b: *Build, args: anytype) !void { _ = std.fmt.bufPrint(&hash_basename, "{s}", .{std.fmt.fmtSliceHexLower(&digest)}) catch unreachable; - const install_prefix = b.pathJoin(&.{ b.cache_root, "i", &hash_basename }); + const install_prefix = try b.cache_root.join(b.allocator, &.{ "i", &hash_basename }); b.resolveInstallPrefix(install_prefix, .{}); } @@ -396,7 +402,7 @@ pub fn resolveInstallPrefix(self: *Build, install_prefix: ?[]const u8, dir_list: self.install_path = self.pathJoin(&.{ dest_dir, self.install_prefix }); } else { self.install_prefix = install_prefix orelse - (self.pathJoin(&.{ self.build_root, "zig-out" })); + (self.build_root.join(self.allocator, &.{"zig-out"}) catch @panic("unhandled error")); self.install_path = self.install_prefix; } @@ -535,6 +541,7 @@ pub const AssemblyOptions = struct { pub fn addAssembly(b: *Build, options: AssemblyOptions) *CompileStep { const obj_step = CompileStep.create(b, .{ .name = options.name, + .kind = .obj, .root_source_file = null, .target = options.target, .optimize = options.optimize, @@ -598,13 +605,39 @@ pub fn addSystemCommand(self: *Build, argv: []const []const u8) *RunStep { return run_step; } +/// Creates a `RunStep` with an executable built with `addExecutable`. +/// Add command line arguments with methods of `RunStep`. +pub fn addRunArtifact(b: *Build, exe: *CompileStep) *RunStep { + assert(exe.kind == .exe or exe.kind == .test_exe); + + // It doesn't have to be native. We catch that if you actually try to run it. + // Consider that this is declarative; the run step may not be run unless a user + // option is supplied. + const run_step = RunStep.create(b, b.fmt("run {s}", .{exe.step.name})); + run_step.addArtifactArg(exe); + + if (exe.kind == .test_exe) { + run_step.addArg(b.zig_exe); + } + + if (exe.vcpkg_bin_path) |path| { + run_step.addPathDir(path); + } + + return run_step; +} + +/// Using the `values` provided, produces a C header file, possibly based on a +/// template input file (e.g. config.h.in). +/// When an input template file is provided, this function will fail the build +/// when an option not found in the input file is provided in `values`, and +/// when an option found in the input file is missing from `values`. pub fn addConfigHeader( b: *Build, - source: FileSource, - style: ConfigHeaderStep.Style, + options: ConfigHeaderStep.Options, values: anytype, ) *ConfigHeaderStep { - const config_header_step = ConfigHeaderStep.create(b, source, style); + const config_header_step = ConfigHeaderStep.create(b, options); config_header_step.addValues(values); return config_header_step; } @@ -669,8 +702,6 @@ pub fn addTranslateC(self: *Build, options: TranslateCStep.Options) *TranslateCS } pub fn make(self: *Build, step_names: []const []const u8) !void { - try self.makePath(self.cache_root); - var wanted_steps = ArrayList(*Step).init(self.allocator); defer wanted_steps.deinit(); @@ -901,7 +932,7 @@ pub fn standardOptimizeOption(self: *Build, options: StandardOptimizeOptionOptio return self.option( std.builtin.Mode, "optimize", - "prioritize performance, safety, or binary size (-O flag)", + "Prioritize performance, safety, or binary size (-O flag)", ) orelse .Debug; } } @@ -1196,13 +1227,6 @@ pub fn spawnChildEnvMap(self: *Build, cwd: ?[]const u8, env_map: *const EnvMap, } } -pub fn makePath(self: *Build, path: []const u8) !void { - fs.cwd().makePath(self.pathFromRoot(path)) catch |err| { - log.err("Unable to create path {s}: {s}", .{ path, @errorName(err) }); - return err; - }; -} - pub fn installArtifact(self: *Build, artifact: *CompileStep) void { self.getInstallStep().dependOn(&self.addInstallArtifact(artifact).step); } @@ -1317,8 +1341,8 @@ pub fn truncateFile(self: *Build, dest_path: []const u8) !void { src_file.close(); } -pub fn pathFromRoot(self: *Build, rel_path: []const u8) []u8 { - return fs.path.resolve(self.allocator, &[_][]const u8{ self.build_root, rel_path }) catch @panic("OOM"); +pub fn pathFromRoot(b: *Build, p: []const u8) []u8 { + return fs.path.resolve(b.allocator, &.{ b.build_root.path orelse ".", p }) catch @panic("OOM"); } pub fn pathJoin(self: *Build, paths: []const []const u8) []u8 { @@ -1539,10 +1563,19 @@ pub fn dependency(b: *Build, name: []const u8, args: anytype) *Dependency { fn dependencyInner( b: *Build, name: []const u8, - build_root: []const u8, + build_root_string: []const u8, comptime build_zig: type, args: anytype, ) *Dependency { + const build_root: std.Build.Cache.Directory = .{ + .path = build_root_string, + .handle = std.fs.cwd().openDir(build_root_string, .{}) catch |err| { + std.debug.print("unable to open '{s}': {s}\n", .{ + build_root_string, @errorName(err), + }); + std.process.exit(1); + }, + }; const sub_builder = b.createChild(name, build_root, args) catch @panic("unhandled error"); sub_builder.runBuild(build_zig) catch @panic("unhandled error"); @@ -1563,26 +1596,6 @@ pub fn runBuild(b: *Build, build_zig: anytype) anyerror!void { } } -test "builder.findProgram compiles" { - if (builtin.os.tag == .wasi) return error.SkipZigTest; - - var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); - defer arena.deinit(); - - const host = try NativeTargetInfo.detect(.{}); - - const builder = try Build.create( - arena.allocator(), - "zig", - "zig-cache", - "zig-cache", - "zig-cache", - host, - ); - defer builder.destroy(); - _ = builder.findProgram(&[_][]const u8{}, &[_][]const u8{}) catch null; -} - pub const Module = struct { builder: *Build, /// This could either be a generated file, in which case the module @@ -1611,7 +1624,6 @@ pub const GeneratedFile = struct { }; /// A file source is a reference to an existing or future file. -/// pub const FileSource = union(enum) { /// A plain file path, relative to build root or absolute. path: []const u8, diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig new file mode 100644 index 0000000000..c459fca633 --- /dev/null +++ b/lib/std/Build/Cache.zig @@ -0,0 +1,1253 @@ +//! Manages `zig-cache` directories. +//! This is not a general-purpose cache. It is designed to be fast and simple, +//! not to withstand attacks using specially-crafted input. + +pub const Directory = struct { + /// This field is redundant for operations that can act on the open directory handle + /// directly, but it is needed when passing the directory to a child process. + /// `null` means cwd. + path: ?[]const u8, + handle: std.fs.Dir, + + pub fn join(self: Directory, allocator: Allocator, paths: []const []const u8) ![]u8 { + if (self.path) |p| { + // TODO clean way to do this with only 1 allocation + const part2 = try std.fs.path.join(allocator, paths); + defer allocator.free(part2); + return std.fs.path.join(allocator, &[_][]const u8{ p, part2 }); + } else { + return std.fs.path.join(allocator, paths); + } + } + + pub fn joinZ(self: Directory, allocator: Allocator, paths: []const []const u8) ![:0]u8 { + if (self.path) |p| { + // TODO clean way to do this with only 1 allocation + const part2 = try std.fs.path.join(allocator, paths); + defer allocator.free(part2); + return std.fs.path.joinZ(allocator, &[_][]const u8{ p, part2 }); + } else { + return std.fs.path.joinZ(allocator, paths); + } + } + + /// Whether or not the handle should be closed, or the path should be freed + /// is determined by usage, however this function is provided for convenience + /// if it happens to be what the caller needs. + pub fn closeAndFree(self: *Directory, gpa: Allocator) void { + self.handle.close(); + if (self.path) |p| gpa.free(p); + self.* = undefined; + } +}; + +gpa: Allocator, +manifest_dir: fs.Dir, +hash: HashHelper = .{}, +/// This value is accessed from multiple threads, protected by mutex. +recent_problematic_timestamp: i128 = 0, +mutex: std.Thread.Mutex = .{}, + +/// A set of strings such as the zig library directory or project source root, which +/// are stripped from the file paths before putting into the cache. They +/// are replaced with single-character indicators. This is not to save +/// space but to eliminate absolute file paths. This improves portability +/// and usefulness of the cache for advanced use cases. +prefixes_buffer: [4]Directory = undefined, +prefixes_len: usize = 0, + +pub const DepTokenizer = @import("Cache/DepTokenizer.zig"); + +const Cache = @This(); +const std = @import("std"); +const builtin = @import("builtin"); +const crypto = std.crypto; +const fs = std.fs; +const assert = std.debug.assert; +const testing = std.testing; +const mem = std.mem; +const fmt = std.fmt; +const Allocator = std.mem.Allocator; +const log = std.log.scoped(.cache); + +pub fn addPrefix(cache: *Cache, directory: Directory) void { + cache.prefixes_buffer[cache.prefixes_len] = directory; + cache.prefixes_len += 1; +} + +/// Be sure to call `Manifest.deinit` after successful initialization. +pub fn obtain(cache: *Cache) Manifest { + return Manifest{ + .cache = cache, + .hash = cache.hash, + .manifest_file = null, + .manifest_dirty = false, + .hex_digest = undefined, + }; +} + +pub fn prefixes(cache: *const Cache) []const Directory { + return cache.prefixes_buffer[0..cache.prefixes_len]; +} + +const PrefixedPath = struct { + prefix: u8, + sub_path: []u8, +}; + +fn findPrefix(cache: *const Cache, file_path: []const u8) !PrefixedPath { + const gpa = cache.gpa; + const resolved_path = try fs.path.resolve(gpa, &[_][]const u8{file_path}); + errdefer gpa.free(resolved_path); + return findPrefixResolved(cache, resolved_path); +} + +/// Takes ownership of `resolved_path` on success. +fn findPrefixResolved(cache: *const Cache, resolved_path: []u8) !PrefixedPath { + const gpa = cache.gpa; + const prefixes_slice = cache.prefixes(); + var i: u8 = 1; // Start at 1 to skip over checking the null prefix. + while (i < prefixes_slice.len) : (i += 1) { + const p = prefixes_slice[i].path.?; + if (mem.startsWith(u8, resolved_path, p)) { + // +1 to skip over the path separator here + const sub_path = try gpa.dupe(u8, resolved_path[p.len + 1 ..]); + gpa.free(resolved_path); + return PrefixedPath{ + .prefix = @intCast(u8, i), + .sub_path = sub_path, + }; + } + } + + return PrefixedPath{ + .prefix = 0, + .sub_path = resolved_path, + }; +} + +/// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6 +pub const bin_digest_len = 16; +pub const hex_digest_len = bin_digest_len * 2; +pub const BinDigest = [bin_digest_len]u8; + +const manifest_file_size_max = 50 * 1024 * 1024; + +/// The type used for hashing file contents. Currently, this is SipHash128(1, 3), because it +/// provides enough collision resistance for the Manifest use cases, while being one of our +/// fastest options right now. +pub const Hasher = crypto.auth.siphash.SipHash128(1, 3); + +/// Initial state, that can be copied. +pub const hasher_init: Hasher = Hasher.init(&[_]u8{0} ** Hasher.key_length); + +pub const File = struct { + prefixed_path: ?PrefixedPath, + max_file_size: ?usize, + stat: Stat, + bin_digest: BinDigest, + contents: ?[]const u8, + + pub const Stat = struct { + inode: fs.File.INode, + size: u64, + mtime: i128, + }; + + pub fn deinit(self: *File, gpa: Allocator) void { + if (self.prefixed_path) |pp| { + gpa.free(pp.sub_path); + self.prefixed_path = null; + } + if (self.contents) |contents| { + gpa.free(contents); + self.contents = null; + } + self.* = undefined; + } +}; + +pub const HashHelper = struct { + hasher: Hasher = hasher_init, + + /// Record a slice of bytes as an dependency of the process being cached + pub fn addBytes(hh: *HashHelper, bytes: []const u8) void { + hh.hasher.update(mem.asBytes(&bytes.len)); + hh.hasher.update(bytes); + } + + pub fn addOptionalBytes(hh: *HashHelper, optional_bytes: ?[]const u8) void { + hh.add(optional_bytes != null); + hh.addBytes(optional_bytes orelse return); + } + + pub fn addListOfBytes(hh: *HashHelper, list_of_bytes: []const []const u8) void { + hh.add(list_of_bytes.len); + for (list_of_bytes) |bytes| hh.addBytes(bytes); + } + + /// Convert the input value into bytes and record it as a dependency of the process being cached. + pub fn add(hh: *HashHelper, x: anytype) void { + switch (@TypeOf(x)) { + std.builtin.Version => { + hh.add(x.major); + hh.add(x.minor); + hh.add(x.patch); + }, + std.Target.Os.TaggedVersionRange => { + switch (x) { + .linux => |linux| { + hh.add(linux.range.min); + hh.add(linux.range.max); + hh.add(linux.glibc); + }, + .windows => |windows| { + hh.add(windows.min); + hh.add(windows.max); + }, + .semver => |semver| { + hh.add(semver.min); + hh.add(semver.max); + }, + .none => {}, + } + }, + else => switch (@typeInfo(@TypeOf(x))) { + .Bool, .Int, .Enum, .Array => hh.addBytes(mem.asBytes(&x)), + else => @compileError("unable to hash type " ++ @typeName(@TypeOf(x))), + }, + } + } + + pub fn addOptional(hh: *HashHelper, optional: anytype) void { + hh.add(optional != null); + hh.add(optional orelse return); + } + + /// Returns a hex encoded hash of the inputs, without modifying state. + pub fn peek(hh: HashHelper) [hex_digest_len]u8 { + var copy = hh; + return copy.final(); + } + + pub fn peekBin(hh: HashHelper) BinDigest { + var copy = hh; + var bin_digest: BinDigest = undefined; + copy.hasher.final(&bin_digest); + return bin_digest; + } + + /// Returns a hex encoded hash of the inputs, mutating the state of the hasher. + pub fn final(hh: *HashHelper) [hex_digest_len]u8 { + var bin_digest: BinDigest = undefined; + hh.hasher.final(&bin_digest); + + var out_digest: [hex_digest_len]u8 = undefined; + _ = std.fmt.bufPrint( + &out_digest, + "{s}", + .{std.fmt.fmtSliceHexLower(&bin_digest)}, + ) catch unreachable; + return out_digest; + } +}; + +pub const Lock = struct { + manifest_file: fs.File, + + pub fn release(lock: *Lock) void { + if (builtin.os.tag == .windows) { + // Windows does not guarantee that locks are immediately unlocked when + // the file handle is closed. See LockFileEx documentation. + lock.manifest_file.unlock(); + } + + lock.manifest_file.close(); + lock.* = undefined; + } +}; + +pub const Manifest = struct { + cache: *Cache, + /// Current state for incremental hashing. + hash: HashHelper, + manifest_file: ?fs.File, + manifest_dirty: bool, + /// Set this flag to true before calling hit() in order to indicate that + /// upon a cache hit, the code using the cache will not modify the files + /// within the cache directory. This allows multiple processes to utilize + /// the same cache directory at the same time. + want_shared_lock: bool = true, + have_exclusive_lock: bool = false, + // Indicate that we want isProblematicTimestamp to perform a filesystem write in + // order to obtain a problematic timestamp for the next call. Calls after that + // will then use the same timestamp, to avoid unnecessary filesystem writes. + want_refresh_timestamp: bool = true, + files: std.ArrayListUnmanaged(File) = .{}, + hex_digest: [hex_digest_len]u8, + /// Populated when hit() returns an error because of one + /// of the files listed in the manifest. + failed_file_index: ?usize = null, + /// Keeps track of the last time we performed a file system write to observe + /// what time the file system thinks it is, according to its own granularity. + recent_problematic_timestamp: i128 = 0, + + /// Add a file as a dependency of process being cached. When `hit` is + /// called, the file's contents will be checked to ensure that it matches + /// the contents from previous times. + /// + /// Max file size will be used to determine the amount of space the file contents + /// are allowed to take up in memory. If max_file_size is null, then the contents + /// will not be loaded into memory. + /// + /// Returns the index of the entry in the `files` array list. You can use it + /// to access the contents of the file after calling `hit()` like so: + /// + /// ``` + /// var file_contents = cache_hash.files.items[file_index].contents.?; + /// ``` + pub fn addFile(self: *Manifest, file_path: []const u8, max_file_size: ?usize) !usize { + assert(self.manifest_file == null); + + const gpa = self.cache.gpa; + try self.files.ensureUnusedCapacity(gpa, 1); + const prefixed_path = try self.cache.findPrefix(file_path); + errdefer gpa.free(prefixed_path.sub_path); + + self.files.addOneAssumeCapacity().* = .{ + .prefixed_path = prefixed_path, + .contents = null, + .max_file_size = max_file_size, + .stat = undefined, + .bin_digest = undefined, + }; + + self.hash.add(prefixed_path.prefix); + self.hash.addBytes(prefixed_path.sub_path); + + return self.files.items.len - 1; + } + + pub fn addOptionalFile(self: *Manifest, optional_file_path: ?[]const u8) !void { + self.hash.add(optional_file_path != null); + const file_path = optional_file_path orelse return; + _ = try self.addFile(file_path, null); + } + + pub fn addListOfFiles(self: *Manifest, list_of_files: []const []const u8) !void { + self.hash.add(list_of_files.len); + for (list_of_files) |file_path| { + _ = try self.addFile(file_path, null); + } + } + + /// Check the cache to see if the input exists in it. If it exists, returns `true`. + /// A hex encoding of its hash is available by calling `final`. + /// + /// This function will also acquire an exclusive lock to the manifest file. This means + /// that a process holding a Manifest will block any other process attempting to + /// acquire the lock. If `want_shared_lock` is `true`, a cache hit guarantees the + /// manifest file to be locked in shared mode, and a cache miss guarantees the manifest + /// file to be locked in exclusive mode. + /// + /// The lock on the manifest file is released when `deinit` is called. As another + /// option, one may call `toOwnedLock` to obtain a smaller object which can represent + /// the lock. `deinit` is safe to call whether or not `toOwnedLock` has been called. + pub fn hit(self: *Manifest) !bool { + const gpa = self.cache.gpa; + assert(self.manifest_file == null); + + self.failed_file_index = null; + + const ext = ".txt"; + var manifest_file_path: [self.hex_digest.len + ext.len]u8 = undefined; + + var bin_digest: BinDigest = undefined; + self.hash.hasher.final(&bin_digest); + + _ = std.fmt.bufPrint( + &self.hex_digest, + "{s}", + .{std.fmt.fmtSliceHexLower(&bin_digest)}, + ) catch unreachable; + + self.hash.hasher = hasher_init; + self.hash.hasher.update(&bin_digest); + + mem.copy(u8, &manifest_file_path, &self.hex_digest); + manifest_file_path[self.hex_digest.len..][0..ext.len].* = ext.*; + + if (self.files.items.len == 0) { + // If there are no file inputs, we check if the manifest file exists instead of + // comparing the hashes on the files used for the cached item + while (true) { + if (self.cache.manifest_dir.openFile(&manifest_file_path, .{ + .mode = .read_write, + .lock = .Exclusive, + .lock_nonblocking = self.want_shared_lock, + })) |manifest_file| { + self.manifest_file = manifest_file; + self.have_exclusive_lock = true; + break; + } else |open_err| switch (open_err) { + error.WouldBlock => { + self.manifest_file = try self.cache.manifest_dir.openFile(&manifest_file_path, .{ + .lock = .Shared, + }); + break; + }, + error.FileNotFound => { + if (self.cache.manifest_dir.createFile(&manifest_file_path, .{ + .read = true, + .truncate = false, + .lock = .Exclusive, + .lock_nonblocking = self.want_shared_lock, + })) |manifest_file| { + self.manifest_file = manifest_file; + self.manifest_dirty = true; + self.have_exclusive_lock = true; + return false; // cache miss; exclusive lock already held + } else |err| switch (err) { + error.WouldBlock => continue, + else => |e| return e, + } + }, + else => |e| return e, + } + } + } else { + if (self.cache.manifest_dir.createFile(&manifest_file_path, .{ + .read = true, + .truncate = false, + .lock = .Exclusive, + .lock_nonblocking = self.want_shared_lock, + })) |manifest_file| { + self.manifest_file = manifest_file; + self.have_exclusive_lock = true; + } else |err| switch (err) { + error.WouldBlock => { + self.manifest_file = try self.cache.manifest_dir.openFile(&manifest_file_path, .{ + .lock = .Shared, + }); + }, + else => |e| return e, + } + } + + self.want_refresh_timestamp = true; + + const file_contents = try self.manifest_file.?.reader().readAllAlloc(gpa, manifest_file_size_max); + defer gpa.free(file_contents); + + const input_file_count = self.files.items.len; + var any_file_changed = false; + var line_iter = mem.tokenize(u8, file_contents, "\n"); + var idx: usize = 0; + while (line_iter.next()) |line| { + defer idx += 1; + + const cache_hash_file = if (idx < input_file_count) &self.files.items[idx] else blk: { + const new = try self.files.addOne(gpa); + new.* = .{ + .prefixed_path = null, + .contents = null, + .max_file_size = null, + .stat = undefined, + .bin_digest = undefined, + }; + break :blk new; + }; + + var iter = mem.tokenize(u8, line, " "); + const size = iter.next() orelse return error.InvalidFormat; + const inode = iter.next() orelse return error.InvalidFormat; + const mtime_nsec_str = iter.next() orelse return error.InvalidFormat; + const digest_str = iter.next() orelse return error.InvalidFormat; + const prefix_str = iter.next() orelse return error.InvalidFormat; + const file_path = iter.rest(); + + cache_hash_file.stat.size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat; + cache_hash_file.stat.inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat; + cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; + _ = std.fmt.hexToBytes(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; + const prefix = fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidFormat; + if (prefix >= self.cache.prefixes_len) return error.InvalidFormat; + + if (file_path.len == 0) { + return error.InvalidFormat; + } + if (cache_hash_file.prefixed_path) |pp| { + if (pp.prefix != prefix or !mem.eql(u8, file_path, pp.sub_path)) { + return error.InvalidFormat; + } + } + + if (cache_hash_file.prefixed_path == null) { + cache_hash_file.prefixed_path = .{ + .prefix = prefix, + .sub_path = try gpa.dupe(u8, file_path), + }; + } + + const pp = cache_hash_file.prefixed_path.?; + const dir = self.cache.prefixes()[pp.prefix].handle; + const this_file = dir.openFile(pp.sub_path, .{ .mode = .read_only }) catch |err| switch (err) { + error.FileNotFound => { + try self.upgradeToExclusiveLock(); + return false; + }, + else => return error.CacheUnavailable, + }; + defer this_file.close(); + + const actual_stat = this_file.stat() catch |err| { + self.failed_file_index = idx; + return err; + }; + const size_match = actual_stat.size == cache_hash_file.stat.size; + const mtime_match = actual_stat.mtime == cache_hash_file.stat.mtime; + const inode_match = actual_stat.inode == cache_hash_file.stat.inode; + + if (!size_match or !mtime_match or !inode_match) { + self.manifest_dirty = true; + + cache_hash_file.stat = .{ + .size = actual_stat.size, + .mtime = actual_stat.mtime, + .inode = actual_stat.inode, + }; + + if (self.isProblematicTimestamp(cache_hash_file.stat.mtime)) { + // The actual file has an unreliable timestamp, force it to be hashed + cache_hash_file.stat.mtime = 0; + cache_hash_file.stat.inode = 0; + } + + var actual_digest: BinDigest = undefined; + hashFile(this_file, &actual_digest) catch |err| { + self.failed_file_index = idx; + return err; + }; + + if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) { + cache_hash_file.bin_digest = actual_digest; + // keep going until we have the input file digests + any_file_changed = true; + } + } + + if (!any_file_changed) { + self.hash.hasher.update(&cache_hash_file.bin_digest); + } + } + + if (any_file_changed) { + // cache miss + // keep the manifest file open + self.unhit(bin_digest, input_file_count); + try self.upgradeToExclusiveLock(); + return false; + } + + if (idx < input_file_count) { + self.manifest_dirty = true; + while (idx < input_file_count) : (idx += 1) { + const ch_file = &self.files.items[idx]; + self.populateFileHash(ch_file) catch |err| { + self.failed_file_index = idx; + return err; + }; + } + try self.upgradeToExclusiveLock(); + return false; + } + + if (self.want_shared_lock) { + try self.downgradeToSharedLock(); + } + + return true; + } + + pub fn unhit(self: *Manifest, bin_digest: BinDigest, input_file_count: usize) void { + // Reset the hash. + self.hash.hasher = hasher_init; + self.hash.hasher.update(&bin_digest); + + // Remove files not in the initial hash. + for (self.files.items[input_file_count..]) |*file| { + file.deinit(self.cache.gpa); + } + self.files.shrinkRetainingCapacity(input_file_count); + + for (self.files.items) |file| { + self.hash.hasher.update(&file.bin_digest); + } + } + + fn isProblematicTimestamp(man: *Manifest, file_time: i128) bool { + // If the file_time is prior to the most recent problematic timestamp + // then we don't need to access the filesystem. + if (file_time < man.recent_problematic_timestamp) + return false; + + // Next we will check the globally shared Cache timestamp, which is accessed + // from multiple threads. + man.cache.mutex.lock(); + defer man.cache.mutex.unlock(); + + // Save the global one to our local one to avoid locking next time. + man.recent_problematic_timestamp = man.cache.recent_problematic_timestamp; + if (file_time < man.recent_problematic_timestamp) + return false; + + // This flag prevents multiple filesystem writes for the same hit() call. + if (man.want_refresh_timestamp) { + man.want_refresh_timestamp = false; + + var file = man.cache.manifest_dir.createFile("timestamp", .{ + .read = true, + .truncate = true, + }) catch return true; + defer file.close(); + + // Save locally and also save globally (we still hold the global lock). + man.recent_problematic_timestamp = (file.stat() catch return true).mtime; + man.cache.recent_problematic_timestamp = man.recent_problematic_timestamp; + } + + return file_time >= man.recent_problematic_timestamp; + } + + fn populateFileHash(self: *Manifest, ch_file: *File) !void { + const pp = ch_file.prefixed_path.?; + const dir = self.cache.prefixes()[pp.prefix].handle; + const file = try dir.openFile(pp.sub_path, .{}); + defer file.close(); + + const actual_stat = try file.stat(); + ch_file.stat = .{ + .size = actual_stat.size, + .mtime = actual_stat.mtime, + .inode = actual_stat.inode, + }; + + if (self.isProblematicTimestamp(ch_file.stat.mtime)) { + // The actual file has an unreliable timestamp, force it to be hashed + ch_file.stat.mtime = 0; + ch_file.stat.inode = 0; + } + + if (ch_file.max_file_size) |max_file_size| { + if (ch_file.stat.size > max_file_size) { + return error.FileTooBig; + } + + const contents = try self.cache.gpa.alloc(u8, @intCast(usize, ch_file.stat.size)); + errdefer self.cache.gpa.free(contents); + + // Hash while reading from disk, to keep the contents in the cpu cache while + // doing hashing. + var hasher = hasher_init; + var off: usize = 0; + while (true) { + // give me everything you've got, captain + const bytes_read = try file.read(contents[off..]); + if (bytes_read == 0) break; + hasher.update(contents[off..][0..bytes_read]); + off += bytes_read; + } + hasher.final(&ch_file.bin_digest); + + ch_file.contents = contents; + } else { + try hashFile(file, &ch_file.bin_digest); + } + + self.hash.hasher.update(&ch_file.bin_digest); + } + + /// Add a file as a dependency of process being cached, after the initial hash has been + /// calculated. This is useful for processes that don't know all the files that + /// are depended on ahead of time. For example, a source file that can import other files + /// will need to be recompiled if the imported file is changed. + pub fn addFilePostFetch(self: *Manifest, file_path: []const u8, max_file_size: usize) ![]const u8 { + assert(self.manifest_file != null); + + const gpa = self.cache.gpa; + const prefixed_path = try self.cache.findPrefix(file_path); + errdefer gpa.free(prefixed_path.sub_path); + + const new_ch_file = try self.files.addOne(gpa); + new_ch_file.* = .{ + .prefixed_path = prefixed_path, + .max_file_size = max_file_size, + .stat = undefined, + .bin_digest = undefined, + .contents = null, + }; + errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); + + try self.populateFileHash(new_ch_file); + + return new_ch_file.contents.?; + } + + /// Add a file as a dependency of process being cached, after the initial hash has been + /// calculated. This is useful for processes that don't know the all the files that + /// are depended on ahead of time. For example, a source file that can import other files + /// will need to be recompiled if the imported file is changed. + pub fn addFilePost(self: *Manifest, file_path: []const u8) !void { + assert(self.manifest_file != null); + + const gpa = self.cache.gpa; + const prefixed_path = try self.cache.findPrefix(file_path); + errdefer gpa.free(prefixed_path.sub_path); + + const new_ch_file = try self.files.addOne(gpa); + new_ch_file.* = .{ + .prefixed_path = prefixed_path, + .max_file_size = null, + .stat = undefined, + .bin_digest = undefined, + .contents = null, + }; + errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); + + try self.populateFileHash(new_ch_file); + } + + /// Like `addFilePost` but when the file contents have already been loaded from disk. + /// On success, cache takes ownership of `resolved_path`. + pub fn addFilePostContents( + self: *Manifest, + resolved_path: []u8, + bytes: []const u8, + stat: File.Stat, + ) error{OutOfMemory}!void { + assert(self.manifest_file != null); + const gpa = self.cache.gpa; + + const ch_file = try self.files.addOne(gpa); + errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); + + const prefixed_path = try self.cache.findPrefixResolved(resolved_path); + errdefer gpa.free(prefixed_path.sub_path); + + ch_file.* = .{ + .prefixed_path = prefixed_path, + .max_file_size = null, + .stat = stat, + .bin_digest = undefined, + .contents = null, + }; + + if (self.isProblematicTimestamp(ch_file.stat.mtime)) { + // The actual file has an unreliable timestamp, force it to be hashed + ch_file.stat.mtime = 0; + ch_file.stat.inode = 0; + } + + { + var hasher = hasher_init; + hasher.update(bytes); + hasher.final(&ch_file.bin_digest); + } + + self.hash.hasher.update(&ch_file.bin_digest); + } + + pub fn addDepFilePost(self: *Manifest, dir: fs.Dir, dep_file_basename: []const u8) !void { + assert(self.manifest_file != null); + + const dep_file_contents = try dir.readFileAlloc(self.cache.gpa, dep_file_basename, manifest_file_size_max); + defer self.cache.gpa.free(dep_file_contents); + + var error_buf = std.ArrayList(u8).init(self.cache.gpa); + defer error_buf.deinit(); + + var it: DepTokenizer = .{ .bytes = dep_file_contents }; + + // Skip first token: target. + switch (it.next() orelse return) { // Empty dep file OK. + .target, .target_must_resolve, .prereq => {}, + else => |err| { + try err.printError(error_buf.writer()); + log.err("failed parsing {s}: {s}", .{ dep_file_basename, error_buf.items }); + return error.InvalidDepFile; + }, + } + // Process 0+ preqreqs. + // Clang is invoked in single-source mode so we never get more targets. + while (true) { + switch (it.next() orelse return) { + .target, .target_must_resolve => return, + .prereq => |file_path| try self.addFilePost(file_path), + else => |err| { + try err.printError(error_buf.writer()); + log.err("failed parsing {s}: {s}", .{ dep_file_basename, error_buf.items }); + return error.InvalidDepFile; + }, + } + } + } + + /// Returns a hex encoded hash of the inputs. + pub fn final(self: *Manifest) [hex_digest_len]u8 { + assert(self.manifest_file != null); + + // We don't close the manifest file yet, because we want to + // keep it locked until the API user is done using it. + // We also don't write out the manifest yet, because until + // cache_release is called we still might be working on creating + // the artifacts to cache. + + var bin_digest: BinDigest = undefined; + self.hash.hasher.final(&bin_digest); + + var out_digest: [hex_digest_len]u8 = undefined; + _ = std.fmt.bufPrint( + &out_digest, + "{s}", + .{std.fmt.fmtSliceHexLower(&bin_digest)}, + ) catch unreachable; + + return out_digest; + } + + /// If `want_shared_lock` is true, this function automatically downgrades the + /// lock from exclusive to shared. + pub fn writeManifest(self: *Manifest) !void { + assert(self.have_exclusive_lock); + + const manifest_file = self.manifest_file.?; + if (self.manifest_dirty) { + self.manifest_dirty = false; + + var contents = std.ArrayList(u8).init(self.cache.gpa); + defer contents.deinit(); + + const writer = contents.writer(); + var encoded_digest: [hex_digest_len]u8 = undefined; + + for (self.files.items) |file| { + _ = std.fmt.bufPrint( + &encoded_digest, + "{s}", + .{std.fmt.fmtSliceHexLower(&file.bin_digest)}, + ) catch unreachable; + try writer.print("{d} {d} {d} {s} {d} {s}\n", .{ + file.stat.size, + file.stat.inode, + file.stat.mtime, + &encoded_digest, + file.prefixed_path.?.prefix, + file.prefixed_path.?.sub_path, + }); + } + + try manifest_file.setEndPos(contents.items.len); + try manifest_file.pwriteAll(contents.items, 0); + } + + if (self.want_shared_lock) { + try self.downgradeToSharedLock(); + } + } + + fn downgradeToSharedLock(self: *Manifest) !void { + if (!self.have_exclusive_lock) return; + + // WASI does not currently support flock, so we bypass it here. + // TODO: If/when flock is supported on WASI, this check should be removed. + // See https://github.com/WebAssembly/wasi-filesystem/issues/2 + if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) { + const manifest_file = self.manifest_file.?; + try manifest_file.downgradeLock(); + } + + self.have_exclusive_lock = false; + } + + fn upgradeToExclusiveLock(self: *Manifest) !void { + if (self.have_exclusive_lock) return; + assert(self.manifest_file != null); + + // WASI does not currently support flock, so we bypass it here. + // TODO: If/when flock is supported on WASI, this check should be removed. + // See https://github.com/WebAssembly/wasi-filesystem/issues/2 + if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) { + const manifest_file = self.manifest_file.?; + // Here we intentionally have a period where the lock is released, in case there are + // other processes holding a shared lock. + manifest_file.unlock(); + try manifest_file.lock(.Exclusive); + } + self.have_exclusive_lock = true; + } + + /// Obtain only the data needed to maintain a lock on the manifest file. + /// The `Manifest` remains safe to deinit. + /// Don't forget to call `writeManifest` before this! + pub fn toOwnedLock(self: *Manifest) Lock { + const lock: Lock = .{ + .manifest_file = self.manifest_file.?, + }; + + self.manifest_file = null; + return lock; + } + + /// Releases the manifest file and frees any memory the Manifest was using. + /// `Manifest.hit` must be called first. + /// Don't forget to call `writeManifest` before this! + pub fn deinit(self: *Manifest) void { + if (self.manifest_file) |file| { + if (builtin.os.tag == .windows) { + // See Lock.release for why this is required on Windows + file.unlock(); + } + + file.close(); + } + for (self.files.items) |*file| { + file.deinit(self.cache.gpa); + } + self.files.deinit(self.cache.gpa); + } +}; + +/// On operating systems that support symlinks, does a readlink. On other operating systems, +/// uses the file contents. Windows supports symlinks but only with elevated privileges, so +/// it is treated as not supporting symlinks. +pub fn readSmallFile(dir: fs.Dir, sub_path: []const u8, buffer: []u8) ![]u8 { + if (builtin.os.tag == .windows) { + return dir.readFile(sub_path, buffer); + } else { + return dir.readLink(sub_path, buffer); + } +} + +/// On operating systems that support symlinks, does a symlink. On other operating systems, +/// uses the file contents. Windows supports symlinks but only with elevated privileges, so +/// it is treated as not supporting symlinks. +/// `data` must be a valid UTF-8 encoded file path and 255 bytes or fewer. +pub fn writeSmallFile(dir: fs.Dir, sub_path: []const u8, data: []const u8) !void { + assert(data.len <= 255); + if (builtin.os.tag == .windows) { + return dir.writeFile(sub_path, data); + } else { + return dir.symLink(data, sub_path, .{}); + } +} + +fn hashFile(file: fs.File, bin_digest: *[Hasher.mac_length]u8) !void { + var buf: [1024]u8 = undefined; + + var hasher = hasher_init; + while (true) { + const bytes_read = try file.read(&buf); + if (bytes_read == 0) break; + hasher.update(buf[0..bytes_read]); + } + + hasher.final(bin_digest); +} + +// Create/Write a file, close it, then grab its stat.mtime timestamp. +fn testGetCurrentFileTimestamp() !i128 { + var file = try fs.cwd().createFile("test-filetimestamp.tmp", .{ + .read = true, + .truncate = true, + }); + defer file.close(); + + return (try file.stat()).mtime; +} + +test "cache file and then recall it" { + if (builtin.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } + + const cwd = fs.cwd(); + + const temp_file = "test.txt"; + const temp_manifest_dir = "temp_manifest_dir"; + + try cwd.writeFile(temp_file, "Hello, world!\n"); + + // Wait for file timestamps to tick + const initial_time = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time) { + std.time.sleep(1); + } + + var digest1: [hex_digest_len]u8 = undefined; + var digest2: [hex_digest_len]u8 = undefined; + + { + var cache = Cache{ + .gpa = testing.allocator, + .manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}), + }; + cache.addPrefix(.{ .path = null, .handle = fs.cwd() }); + defer cache.manifest_dir.close(); + + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.add(true); + ch.hash.add(@as(u16, 1234)); + ch.hash.addBytes("1234"); + _ = try ch.addFile(temp_file, null); + + // There should be nothing in the cache + try testing.expectEqual(false, try ch.hit()); + + digest1 = ch.final(); + try ch.writeManifest(); + } + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.add(true); + ch.hash.add(@as(u16, 1234)); + ch.hash.addBytes("1234"); + _ = try ch.addFile(temp_file, null); + + // Cache hit! We just "built" the same file + try testing.expect(try ch.hit()); + digest2 = ch.final(); + + try testing.expectEqual(false, ch.have_exclusive_lock); + } + + try testing.expectEqual(digest1, digest2); + } + + try cwd.deleteTree(temp_manifest_dir); + try cwd.deleteFile(temp_file); +} + +test "check that changing a file makes cache fail" { + if (builtin.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } + const cwd = fs.cwd(); + + const temp_file = "cache_hash_change_file_test.txt"; + const temp_manifest_dir = "cache_hash_change_file_manifest_dir"; + const original_temp_file_contents = "Hello, world!\n"; + const updated_temp_file_contents = "Hello, world; but updated!\n"; + + try cwd.deleteTree(temp_manifest_dir); + try cwd.deleteTree(temp_file); + + try cwd.writeFile(temp_file, original_temp_file_contents); + + // Wait for file timestamps to tick + const initial_time = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time) { + std.time.sleep(1); + } + + var digest1: [hex_digest_len]u8 = undefined; + var digest2: [hex_digest_len]u8 = undefined; + + { + var cache = Cache{ + .gpa = testing.allocator, + .manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}), + }; + cache.addPrefix(.{ .path = null, .handle = fs.cwd() }); + defer cache.manifest_dir.close(); + + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.addBytes("1234"); + const temp_file_idx = try ch.addFile(temp_file, 100); + + // There should be nothing in the cache + try testing.expectEqual(false, try ch.hit()); + + try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?)); + + digest1 = ch.final(); + + try ch.writeManifest(); + } + + try cwd.writeFile(temp_file, updated_temp_file_contents); + + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.addBytes("1234"); + const temp_file_idx = try ch.addFile(temp_file, 100); + + // A file that we depend on has been updated, so the cache should not contain an entry for it + try testing.expectEqual(false, try ch.hit()); + + // The cache system does not keep the contents of re-hashed input files. + try testing.expect(ch.files.items[temp_file_idx].contents == null); + + digest2 = ch.final(); + + try ch.writeManifest(); + } + + try testing.expect(!mem.eql(u8, digest1[0..], digest2[0..])); + } + + try cwd.deleteTree(temp_manifest_dir); + try cwd.deleteTree(temp_file); +} + +test "no file inputs" { + if (builtin.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } + const cwd = fs.cwd(); + const temp_manifest_dir = "no_file_inputs_manifest_dir"; + defer cwd.deleteTree(temp_manifest_dir) catch {}; + + var digest1: [hex_digest_len]u8 = undefined; + var digest2: [hex_digest_len]u8 = undefined; + + var cache = Cache{ + .gpa = testing.allocator, + .manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}), + }; + cache.addPrefix(.{ .path = null, .handle = fs.cwd() }); + defer cache.manifest_dir.close(); + + { + var man = cache.obtain(); + defer man.deinit(); + + man.hash.addBytes("1234"); + + // There should be nothing in the cache + try testing.expectEqual(false, try man.hit()); + + digest1 = man.final(); + + try man.writeManifest(); + } + { + var man = cache.obtain(); + defer man.deinit(); + + man.hash.addBytes("1234"); + + try testing.expect(try man.hit()); + digest2 = man.final(); + try testing.expectEqual(false, man.have_exclusive_lock); + } + + try testing.expectEqual(digest1, digest2); +} + +test "Manifest with files added after initial hash work" { + if (builtin.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } + const cwd = fs.cwd(); + + const temp_file1 = "cache_hash_post_file_test1.txt"; + const temp_file2 = "cache_hash_post_file_test2.txt"; + const temp_manifest_dir = "cache_hash_post_file_manifest_dir"; + + try cwd.writeFile(temp_file1, "Hello, world!\n"); + try cwd.writeFile(temp_file2, "Hello world the second!\n"); + + // Wait for file timestamps to tick + const initial_time = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time) { + std.time.sleep(1); + } + + var digest1: [hex_digest_len]u8 = undefined; + var digest2: [hex_digest_len]u8 = undefined; + var digest3: [hex_digest_len]u8 = undefined; + + { + var cache = Cache{ + .gpa = testing.allocator, + .manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}), + }; + cache.addPrefix(.{ .path = null, .handle = fs.cwd() }); + defer cache.manifest_dir.close(); + + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.addBytes("1234"); + _ = try ch.addFile(temp_file1, null); + + // There should be nothing in the cache + try testing.expectEqual(false, try ch.hit()); + + _ = try ch.addFilePost(temp_file2); + + digest1 = ch.final(); + try ch.writeManifest(); + } + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.addBytes("1234"); + _ = try ch.addFile(temp_file1, null); + + try testing.expect(try ch.hit()); + digest2 = ch.final(); + + try testing.expectEqual(false, ch.have_exclusive_lock); + } + try testing.expect(mem.eql(u8, &digest1, &digest2)); + + // Modify the file added after initial hash + try cwd.writeFile(temp_file2, "Hello world the second, updated\n"); + + // Wait for file timestamps to tick + const initial_time2 = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time2) { + std.time.sleep(1); + } + + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.addBytes("1234"); + _ = try ch.addFile(temp_file1, null); + + // A file that we depend on has been updated, so the cache should not contain an entry for it + try testing.expectEqual(false, try ch.hit()); + + _ = try ch.addFilePost(temp_file2); + + digest3 = ch.final(); + + try ch.writeManifest(); + } + + try testing.expect(!mem.eql(u8, &digest1, &digest3)); + } + + try cwd.deleteTree(temp_manifest_dir); + try cwd.deleteFile(temp_file1); + try cwd.deleteFile(temp_file2); +} diff --git a/lib/std/Build/Cache/DepTokenizer.zig b/lib/std/Build/Cache/DepTokenizer.zig new file mode 100644 index 0000000000..8f9f2f81cd --- /dev/null +++ b/lib/std/Build/Cache/DepTokenizer.zig @@ -0,0 +1,1069 @@ +const Tokenizer = @This(); + +index: usize = 0, +bytes: []const u8, +state: State = .lhs, + +const std = @import("std"); +const testing = std.testing; +const assert = std.debug.assert; + +pub fn next(self: *Tokenizer) ?Token { + var start = self.index; + var must_resolve = false; + while (self.index < self.bytes.len) { + const char = self.bytes[self.index]; + switch (self.state) { + .lhs => switch (char) { + '\t', '\n', '\r', ' ' => { + // silently ignore whitespace + self.index += 1; + }, + else => { + start = self.index; + self.state = .target; + }, + }, + .target => switch (char) { + '\t', '\n', '\r', ' ' => { + return errorIllegalChar(.invalid_target, self.index, char); + }, + '$' => { + self.state = .target_dollar_sign; + self.index += 1; + }, + '\\' => { + self.state = .target_reverse_solidus; + self.index += 1; + }, + ':' => { + self.state = .target_colon; + self.index += 1; + }, + else => { + self.index += 1; + }, + }, + .target_reverse_solidus => switch (char) { + '\t', '\n', '\r' => { + return errorIllegalChar(.bad_target_escape, self.index, char); + }, + ' ', '#', '\\' => { + must_resolve = true; + self.state = .target; + self.index += 1; + }, + '$' => { + self.state = .target_dollar_sign; + self.index += 1; + }, + else => { + self.state = .target; + self.index += 1; + }, + }, + .target_dollar_sign => switch (char) { + '$' => { + must_resolve = true; + self.state = .target; + self.index += 1; + }, + else => { + return errorIllegalChar(.expected_dollar_sign, self.index, char); + }, + }, + .target_colon => switch (char) { + '\n', '\r' => { + const bytes = self.bytes[start .. self.index - 1]; + if (bytes.len != 0) { + self.state = .lhs; + return finishTarget(must_resolve, bytes); + } + // silently ignore null target + self.state = .lhs; + }, + '/', '\\' => { + self.state = .target_colon_reverse_solidus; + self.index += 1; + }, + else => { + const bytes = self.bytes[start .. self.index - 1]; + if (bytes.len != 0) { + self.state = .rhs; + return finishTarget(must_resolve, bytes); + } + // silently ignore null target + self.state = .lhs; + }, + }, + .target_colon_reverse_solidus => switch (char) { + '\n', '\r' => { + const bytes = self.bytes[start .. self.index - 2]; + if (bytes.len != 0) { + self.state = .lhs; + return finishTarget(must_resolve, bytes); + } + // silently ignore null target + self.state = .lhs; + }, + else => { + self.state = .target; + }, + }, + .rhs => switch (char) { + '\t', ' ' => { + // silently ignore horizontal whitespace + self.index += 1; + }, + '\n', '\r' => { + self.state = .lhs; + }, + '\\' => { + self.state = .rhs_continuation; + self.index += 1; + }, + '"' => { + self.state = .prereq_quote; + self.index += 1; + start = self.index; + }, + else => { + start = self.index; + self.state = .prereq; + }, + }, + .rhs_continuation => switch (char) { + '\n' => { + self.state = .rhs; + self.index += 1; + }, + '\r' => { + self.state = .rhs_continuation_linefeed; + self.index += 1; + }, + else => { + return errorIllegalChar(.continuation_eol, self.index, char); + }, + }, + .rhs_continuation_linefeed => switch (char) { + '\n' => { + self.state = .rhs; + self.index += 1; + }, + else => { + return errorIllegalChar(.continuation_eol, self.index, char); + }, + }, + .prereq_quote => switch (char) { + '"' => { + self.index += 1; + self.state = .rhs; + return Token{ .prereq = self.bytes[start .. self.index - 1] }; + }, + else => { + self.index += 1; + }, + }, + .prereq => switch (char) { + '\t', ' ' => { + self.state = .rhs; + return Token{ .prereq = self.bytes[start..self.index] }; + }, + '\n', '\r' => { + self.state = .lhs; + return Token{ .prereq = self.bytes[start..self.index] }; + }, + '\\' => { + self.state = .prereq_continuation; + self.index += 1; + }, + else => { + self.index += 1; + }, + }, + .prereq_continuation => switch (char) { + '\n' => { + self.index += 1; + self.state = .rhs; + return Token{ .prereq = self.bytes[start .. self.index - 2] }; + }, + '\r' => { + self.state = .prereq_continuation_linefeed; + self.index += 1; + }, + else => { + // not continuation + self.state = .prereq; + self.index += 1; + }, + }, + .prereq_continuation_linefeed => switch (char) { + '\n' => { + self.index += 1; + self.state = .rhs; + return Token{ .prereq = self.bytes[start .. self.index - 1] }; + }, + else => { + return errorIllegalChar(.continuation_eol, self.index, char); + }, + }, + } + } else { + switch (self.state) { + .lhs, + .rhs, + .rhs_continuation, + .rhs_continuation_linefeed, + => return null, + .target => { + return errorPosition(.incomplete_target, start, self.bytes[start..]); + }, + .target_reverse_solidus, + .target_dollar_sign, + => { + const idx = self.index - 1; + return errorIllegalChar(.incomplete_escape, idx, self.bytes[idx]); + }, + .target_colon => { + const bytes = self.bytes[start .. self.index - 1]; + if (bytes.len != 0) { + self.index += 1; + self.state = .rhs; + return finishTarget(must_resolve, bytes); + } + // silently ignore null target + self.state = .lhs; + return null; + }, + .target_colon_reverse_solidus => { + const bytes = self.bytes[start .. self.index - 2]; + if (bytes.len != 0) { + self.index += 1; + self.state = .rhs; + return finishTarget(must_resolve, bytes); + } + // silently ignore null target + self.state = .lhs; + return null; + }, + .prereq_quote => { + return errorPosition(.incomplete_quoted_prerequisite, start, self.bytes[start..]); + }, + .prereq => { + self.state = .lhs; + return Token{ .prereq = self.bytes[start..] }; + }, + .prereq_continuation => { + self.state = .lhs; + return Token{ .prereq = self.bytes[start .. self.index - 1] }; + }, + .prereq_continuation_linefeed => { + self.state = .lhs; + return Token{ .prereq = self.bytes[start .. self.index - 2] }; + }, + } + } + unreachable; +} + +fn errorPosition(comptime id: std.meta.Tag(Token), index: usize, bytes: []const u8) Token { + return @unionInit(Token, @tagName(id), .{ .index = index, .bytes = bytes }); +} + +fn errorIllegalChar(comptime id: std.meta.Tag(Token), index: usize, char: u8) Token { + return @unionInit(Token, @tagName(id), .{ .index = index, .char = char }); +} + +fn finishTarget(must_resolve: bool, bytes: []const u8) Token { + return if (must_resolve) .{ .target_must_resolve = bytes } else .{ .target = bytes }; +} + +const State = enum { + lhs, + target, + target_reverse_solidus, + target_dollar_sign, + target_colon, + target_colon_reverse_solidus, + rhs, + rhs_continuation, + rhs_continuation_linefeed, + prereq_quote, + prereq, + prereq_continuation, + prereq_continuation_linefeed, +}; + +pub const Token = union(enum) { + target: []const u8, + target_must_resolve: []const u8, + prereq: []const u8, + + incomplete_quoted_prerequisite: IndexAndBytes, + incomplete_target: IndexAndBytes, + + invalid_target: IndexAndChar, + bad_target_escape: IndexAndChar, + expected_dollar_sign: IndexAndChar, + continuation_eol: IndexAndChar, + incomplete_escape: IndexAndChar, + + pub const IndexAndChar = struct { + index: usize, + char: u8, + }; + + pub const IndexAndBytes = struct { + index: usize, + bytes: []const u8, + }; + + /// Resolve escapes in target. Only valid with .target_must_resolve. + pub fn resolve(self: Token, writer: anytype) @TypeOf(writer).Error!void { + const bytes = self.target_must_resolve; // resolve called on incorrect token + + var state: enum { start, escape, dollar } = .start; + for (bytes) |c| { + switch (state) { + .start => { + switch (c) { + '\\' => state = .escape, + '$' => state = .dollar, + else => try writer.writeByte(c), + } + }, + .escape => { + switch (c) { + ' ', '#', '\\' => {}, + '$' => { + try writer.writeByte('\\'); + state = .dollar; + continue; + }, + else => try writer.writeByte('\\'), + } + try writer.writeByte(c); + state = .start; + }, + .dollar => { + try writer.writeByte('$'); + switch (c) { + '$' => {}, + else => try writer.writeByte(c), + } + state = .start; + }, + } + } + } + + pub fn printError(self: Token, writer: anytype) @TypeOf(writer).Error!void { + switch (self) { + .target, .target_must_resolve, .prereq => unreachable, // not an error + .incomplete_quoted_prerequisite, + .incomplete_target, + => |index_and_bytes| { + try writer.print("{s} '", .{self.errStr()}); + if (self == .incomplete_target) { + const tmp = Token{ .target_must_resolve = index_and_bytes.bytes }; + try tmp.resolve(writer); + } else { + try printCharValues(writer, index_and_bytes.bytes); + } + try writer.print("' at position {d}", .{index_and_bytes.index}); + }, + .invalid_target, + .bad_target_escape, + .expected_dollar_sign, + .continuation_eol, + .incomplete_escape, + => |index_and_char| { + try writer.writeAll("illegal char "); + try printUnderstandableChar(writer, index_and_char.char); + try writer.print(" at position {d}: {s}", .{ index_and_char.index, self.errStr() }); + }, + } + } + + fn errStr(self: Token) []const u8 { + return switch (self) { + .target, .target_must_resolve, .prereq => unreachable, // not an error + .incomplete_quoted_prerequisite => "incomplete quoted prerequisite", + .incomplete_target => "incomplete target", + .invalid_target => "invalid target", + .bad_target_escape => "bad target escape", + .expected_dollar_sign => "expecting '$'", + .continuation_eol => "continuation expecting end-of-line", + .incomplete_escape => "incomplete escape", + }; + } +}; + +test "empty file" { + try depTokenizer("", ""); +} + +test "empty whitespace" { + try depTokenizer("\n", ""); + try depTokenizer("\r", ""); + try depTokenizer("\r\n", ""); + try depTokenizer(" ", ""); +} + +test "empty colon" { + try depTokenizer(":", ""); + try depTokenizer("\n:", ""); + try depTokenizer("\r:", ""); + try depTokenizer("\r\n:", ""); + try depTokenizer(" :", ""); +} + +test "empty target" { + try depTokenizer("foo.o:", "target = {foo.o}"); + try depTokenizer( + \\foo.o: + \\bar.o: + \\abcd.o: + , + \\target = {foo.o} + \\target = {bar.o} + \\target = {abcd.o} + ); +} + +test "whitespace empty target" { + try depTokenizer("\nfoo.o:", "target = {foo.o}"); + try depTokenizer("\rfoo.o:", "target = {foo.o}"); + try depTokenizer("\r\nfoo.o:", "target = {foo.o}"); + try depTokenizer(" foo.o:", "target = {foo.o}"); +} + +test "escape empty target" { + try depTokenizer("\\ foo.o:", "target = { foo.o}"); + try depTokenizer("\\#foo.o:", "target = {#foo.o}"); + try depTokenizer("\\\\foo.o:", "target = {\\foo.o}"); + try depTokenizer("$$foo.o:", "target = {$foo.o}"); +} + +test "empty target linefeeds" { + try depTokenizer("\n", ""); + try depTokenizer("\r\n", ""); + + const expect = "target = {foo.o}"; + try depTokenizer( + \\foo.o: + , expect); + try depTokenizer( + \\foo.o: + \\ + , expect); + try depTokenizer( + \\foo.o: + , expect); + try depTokenizer( + \\foo.o: + \\ + , expect); +} + +test "empty target linefeeds + continuations" { + const expect = "target = {foo.o}"; + try depTokenizer( + \\foo.o:\ + , expect); + try depTokenizer( + \\foo.o:\ + \\ + , expect); + try depTokenizer( + \\foo.o:\ + , expect); + try depTokenizer( + \\foo.o:\ + \\ + , expect); +} + +test "empty target linefeeds + hspace + continuations" { + const expect = "target = {foo.o}"; + try depTokenizer( + \\foo.o: \ + , expect); + try depTokenizer( + \\foo.o: \ + \\ + , expect); + try depTokenizer( + \\foo.o: \ + , expect); + try depTokenizer( + \\foo.o: \ + \\ + , expect); +} + +test "prereq" { + const expect = + \\target = {foo.o} + \\prereq = {foo.c} + ; + try depTokenizer("foo.o: foo.c", expect); + try depTokenizer( + \\foo.o: \ + \\foo.c + , expect); + try depTokenizer( + \\foo.o: \ + \\ foo.c + , expect); + try depTokenizer( + \\foo.o: \ + \\ foo.c + , expect); +} + +test "prereq continuation" { + const expect = + \\target = {foo.o} + \\prereq = {foo.h} + \\prereq = {bar.h} + ; + try depTokenizer( + \\foo.o: foo.h\ + \\bar.h + , expect); + try depTokenizer( + \\foo.o: foo.h\ + \\bar.h + , expect); +} + +test "multiple prereqs" { + const expect = + \\target = {foo.o} + \\prereq = {foo.c} + \\prereq = {foo.h} + \\prereq = {bar.h} + ; + try depTokenizer("foo.o: foo.c foo.h bar.h", expect); + try depTokenizer( + \\foo.o: \ + \\foo.c foo.h bar.h + , expect); + try depTokenizer( + \\foo.o: foo.c foo.h bar.h\ + , expect); + try depTokenizer( + \\foo.o: foo.c foo.h bar.h\ + \\ + , expect); + try depTokenizer( + \\foo.o: \ + \\foo.c \ + \\ foo.h\ + \\bar.h + \\ + , expect); + try depTokenizer( + \\foo.o: \ + \\foo.c \ + \\ foo.h\ + \\bar.h\ + \\ + , expect); + try depTokenizer( + \\foo.o: \ + \\foo.c \ + \\ foo.h\ + \\bar.h\ + , expect); +} + +test "multiple targets and prereqs" { + try depTokenizer( + \\foo.o: foo.c + \\bar.o: bar.c a.h b.h c.h + \\abc.o: abc.c \ + \\ one.h two.h \ + \\ three.h four.h + , + \\target = {foo.o} + \\prereq = {foo.c} + \\target = {bar.o} + \\prereq = {bar.c} + \\prereq = {a.h} + \\prereq = {b.h} + \\prereq = {c.h} + \\target = {abc.o} + \\prereq = {abc.c} + \\prereq = {one.h} + \\prereq = {two.h} + \\prereq = {three.h} + \\prereq = {four.h} + ); + try depTokenizer( + \\ascii.o: ascii.c + \\base64.o: base64.c stdio.h + \\elf.o: elf.c a.h b.h c.h + \\macho.o: \ + \\ macho.c\ + \\ a.h b.h c.h + , + \\target = {ascii.o} + \\prereq = {ascii.c} + \\target = {base64.o} + \\prereq = {base64.c} + \\prereq = {stdio.h} + \\target = {elf.o} + \\prereq = {elf.c} + \\prereq = {a.h} + \\prereq = {b.h} + \\prereq = {c.h} + \\target = {macho.o} + \\prereq = {macho.c} + \\prereq = {a.h} + \\prereq = {b.h} + \\prereq = {c.h} + ); + try depTokenizer( + \\a$$scii.o: ascii.c + \\\\base64.o: "\base64.c" "s t#dio.h" + \\e\\lf.o: "e\lf.c" "a.h$$" "$$b.h c.h$$" + \\macho.o: \ + \\ "macho!.c" \ + \\ a.h b.h c.h + , + \\target = {a$scii.o} + \\prereq = {ascii.c} + \\target = {\base64.o} + \\prereq = {\base64.c} + \\prereq = {s t#dio.h} + \\target = {e\lf.o} + \\prereq = {e\lf.c} + \\prereq = {a.h$$} + \\prereq = {$$b.h c.h$$} + \\target = {macho.o} + \\prereq = {macho!.c} + \\prereq = {a.h} + \\prereq = {b.h} + \\prereq = {c.h} + ); +} + +test "windows quoted prereqs" { + try depTokenizer( + \\c:\foo.o: "C:\Program Files (x86)\Microsoft Visual Studio\foo.c" + \\c:\foo2.o: "C:\Program Files (x86)\Microsoft Visual Studio\foo2.c" \ + \\ "C:\Program Files (x86)\Microsoft Visual Studio\foo1.h" \ + \\ "C:\Program Files (x86)\Microsoft Visual Studio\foo2.h" + , + \\target = {c:\foo.o} + \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo.c} + \\target = {c:\foo2.o} + \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo2.c} + \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo1.h} + \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo2.h} + ); +} + +test "windows mixed prereqs" { + try depTokenizer( + \\cimport.o: \ + \\ C:\msys64\home\anon\project\zig\master\zig-cache\o\qhvhbUo7GU5iKyQ5mpA8TcQpncCYaQu0wwvr3ybiSTj_Dtqi1Nmcb70kfODJ2Qlg\cimport.h \ + \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\stdio.h" \ + \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt.h" \ + \\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime.h" \ + \\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\sal.h" \ + \\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\concurrencysal.h" \ + \\ C:\msys64\opt\zig\lib\zig\include\vadefs.h \ + \\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vadefs.h" \ + \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstdio.h" \ + \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_stdio_config.h" \ + \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\string.h" \ + \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memory.h" \ + \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memcpy_s.h" \ + \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\errno.h" \ + \\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime_string.h" \ + \\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstring.h" + , + \\target = {cimport.o} + \\prereq = {C:\msys64\home\anon\project\zig\master\zig-cache\o\qhvhbUo7GU5iKyQ5mpA8TcQpncCYaQu0wwvr3ybiSTj_Dtqi1Nmcb70kfODJ2Qlg\cimport.h} + \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\stdio.h} + \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt.h} + \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime.h} + \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\sal.h} + \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\concurrencysal.h} + \\prereq = {C:\msys64\opt\zig\lib\zig\include\vadefs.h} + \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vadefs.h} + \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstdio.h} + \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_stdio_config.h} + \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\string.h} + \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memory.h} + \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memcpy_s.h} + \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\errno.h} + \\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime_string.h} + \\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstring.h} + ); +} + +test "windows funky targets" { + try depTokenizer( + \\C:\Users\anon\foo.o: + \\C:\Users\anon\foo\ .o: + \\C:\Users\anon\foo\#.o: + \\C:\Users\anon\foo$$.o: + \\C:\Users\anon\\\ foo.o: + \\C:\Users\anon\\#foo.o: + \\C:\Users\anon\$$foo.o: + \\C:\Users\anon\\\ \ \ \ \ foo.o: + , + \\target = {C:\Users\anon\foo.o} + \\target = {C:\Users\anon\foo .o} + \\target = {C:\Users\anon\foo#.o} + \\target = {C:\Users\anon\foo$.o} + \\target = {C:\Users\anon\ foo.o} + \\target = {C:\Users\anon\#foo.o} + \\target = {C:\Users\anon\$foo.o} + \\target = {C:\Users\anon\ foo.o} + ); +} + +test "windows drive and forward slashes" { + try depTokenizer( + \\C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj: \ + \\ C:/msys64/opt/zig3/lib/zig/libc/mingw/crt/cxa_thread_atexit.c + , + \\target = {C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj} + \\prereq = {C:/msys64/opt/zig3/lib/zig/libc/mingw/crt/cxa_thread_atexit.c} + ); +} + +test "error incomplete escape - reverse_solidus" { + try depTokenizer("\\", + \\ERROR: illegal char '\' at position 0: incomplete escape + ); + try depTokenizer("\t\\", + \\ERROR: illegal char '\' at position 1: incomplete escape + ); + try depTokenizer("\n\\", + \\ERROR: illegal char '\' at position 1: incomplete escape + ); + try depTokenizer("\r\\", + \\ERROR: illegal char '\' at position 1: incomplete escape + ); + try depTokenizer("\r\n\\", + \\ERROR: illegal char '\' at position 2: incomplete escape + ); + try depTokenizer(" \\", + \\ERROR: illegal char '\' at position 1: incomplete escape + ); +} + +test "error incomplete escape - dollar_sign" { + try depTokenizer("$", + \\ERROR: illegal char '$' at position 0: incomplete escape + ); + try depTokenizer("\t$", + \\ERROR: illegal char '$' at position 1: incomplete escape + ); + try depTokenizer("\n$", + \\ERROR: illegal char '$' at position 1: incomplete escape + ); + try depTokenizer("\r$", + \\ERROR: illegal char '$' at position 1: incomplete escape + ); + try depTokenizer("\r\n$", + \\ERROR: illegal char '$' at position 2: incomplete escape + ); + try depTokenizer(" $", + \\ERROR: illegal char '$' at position 1: incomplete escape + ); +} + +test "error incomplete target" { + try depTokenizer("foo.o", + \\ERROR: incomplete target 'foo.o' at position 0 + ); + try depTokenizer("\tfoo.o", + \\ERROR: incomplete target 'foo.o' at position 1 + ); + try depTokenizer("\nfoo.o", + \\ERROR: incomplete target 'foo.o' at position 1 + ); + try depTokenizer("\rfoo.o", + \\ERROR: incomplete target 'foo.o' at position 1 + ); + try depTokenizer("\r\nfoo.o", + \\ERROR: incomplete target 'foo.o' at position 2 + ); + try depTokenizer(" foo.o", + \\ERROR: incomplete target 'foo.o' at position 1 + ); + + try depTokenizer("\\ foo.o", + \\ERROR: incomplete target ' foo.o' at position 0 + ); + try depTokenizer("\\#foo.o", + \\ERROR: incomplete target '#foo.o' at position 0 + ); + try depTokenizer("\\\\foo.o", + \\ERROR: incomplete target '\foo.o' at position 0 + ); + try depTokenizer("$$foo.o", + \\ERROR: incomplete target '$foo.o' at position 0 + ); +} + +test "error illegal char at position - bad target escape" { + try depTokenizer("\\\t", + \\ERROR: illegal char \x09 at position 1: bad target escape + ); + try depTokenizer("\\\n", + \\ERROR: illegal char \x0A at position 1: bad target escape + ); + try depTokenizer("\\\r", + \\ERROR: illegal char \x0D at position 1: bad target escape + ); + try depTokenizer("\\\r\n", + \\ERROR: illegal char \x0D at position 1: bad target escape + ); +} + +test "error illegal char at position - execting dollar_sign" { + try depTokenizer("$\t", + \\ERROR: illegal char \x09 at position 1: expecting '$' + ); + try depTokenizer("$\n", + \\ERROR: illegal char \x0A at position 1: expecting '$' + ); + try depTokenizer("$\r", + \\ERROR: illegal char \x0D at position 1: expecting '$' + ); + try depTokenizer("$\r\n", + \\ERROR: illegal char \x0D at position 1: expecting '$' + ); +} + +test "error illegal char at position - invalid target" { + try depTokenizer("foo\t.o", + \\ERROR: illegal char \x09 at position 3: invalid target + ); + try depTokenizer("foo\n.o", + \\ERROR: illegal char \x0A at position 3: invalid target + ); + try depTokenizer("foo\r.o", + \\ERROR: illegal char \x0D at position 3: invalid target + ); + try depTokenizer("foo\r\n.o", + \\ERROR: illegal char \x0D at position 3: invalid target + ); +} + +test "error target - continuation expecting end-of-line" { + try depTokenizer("foo.o: \\\t", + \\target = {foo.o} + \\ERROR: illegal char \x09 at position 8: continuation expecting end-of-line + ); + try depTokenizer("foo.o: \\ ", + \\target = {foo.o} + \\ERROR: illegal char ' ' at position 8: continuation expecting end-of-line + ); + try depTokenizer("foo.o: \\x", + \\target = {foo.o} + \\ERROR: illegal char 'x' at position 8: continuation expecting end-of-line + ); + try depTokenizer("foo.o: \\\x0dx", + \\target = {foo.o} + \\ERROR: illegal char 'x' at position 9: continuation expecting end-of-line + ); +} + +test "error prereq - continuation expecting end-of-line" { + try depTokenizer("foo.o: foo.h\\\x0dx", + \\target = {foo.o} + \\ERROR: illegal char 'x' at position 14: continuation expecting end-of-line + ); +} + +// - tokenize input, emit textual representation, and compare to expect +fn depTokenizer(input: []const u8, expect: []const u8) !void { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + const arena = arena_allocator.allocator(); + defer arena_allocator.deinit(); + + var it: Tokenizer = .{ .bytes = input }; + var buffer = std.ArrayList(u8).init(arena); + var resolve_buf = std.ArrayList(u8).init(arena); + var i: usize = 0; + while (it.next()) |token| { + if (i != 0) try buffer.appendSlice("\n"); + switch (token) { + .target, .prereq => |bytes| { + try buffer.appendSlice(@tagName(token)); + try buffer.appendSlice(" = {"); + for (bytes) |b| { + try buffer.append(printable_char_tab[b]); + } + try buffer.appendSlice("}"); + }, + .target_must_resolve => { + try buffer.appendSlice("target = {"); + try token.resolve(resolve_buf.writer()); + for (resolve_buf.items) |b| { + try buffer.append(printable_char_tab[b]); + } + resolve_buf.items.len = 0; + try buffer.appendSlice("}"); + }, + else => { + try buffer.appendSlice("ERROR: "); + try token.printError(buffer.writer()); + break; + }, + } + i += 1; + } + + if (std.mem.eql(u8, expect, buffer.items)) { + try testing.expect(true); + return; + } + + const out = std.io.getStdErr().writer(); + + try out.writeAll("\n"); + try printSection(out, "<<<< input", input); + try printSection(out, "==== expect", expect); + try printSection(out, ">>>> got", buffer.items); + try printRuler(out); + + try testing.expect(false); +} + +fn printSection(out: anytype, label: []const u8, bytes: []const u8) !void { + try printLabel(out, label, bytes); + try hexDump(out, bytes); + try printRuler(out); + try out.writeAll(bytes); + try out.writeAll("\n"); +} + +fn printLabel(out: anytype, label: []const u8, bytes: []const u8) !void { + var buf: [80]u8 = undefined; + var text = try std.fmt.bufPrint(buf[0..], "{s} {d} bytes ", .{ label, bytes.len }); + try out.writeAll(text); + var i: usize = text.len; + const end = 79; + while (i < end) : (i += 1) { + try out.writeAll(&[_]u8{label[0]}); + } + try out.writeAll("\n"); +} + +fn printRuler(out: anytype) !void { + var i: usize = 0; + const end = 79; + while (i < end) : (i += 1) { + try out.writeAll("-"); + } + try out.writeAll("\n"); +} + +fn hexDump(out: anytype, bytes: []const u8) !void { + const n16 = bytes.len >> 4; + var line: usize = 0; + var offset: usize = 0; + while (line < n16) : (line += 1) { + try hexDump16(out, offset, bytes[offset .. offset + 16]); + offset += 16; + } + + const n = bytes.len & 0x0f; + if (n > 0) { + try printDecValue(out, offset, 8); + try out.writeAll(":"); + try out.writeAll(" "); + var end1 = std.math.min(offset + n, offset + 8); + for (bytes[offset..end1]) |b| { + try out.writeAll(" "); + try printHexValue(out, b, 2); + } + var end2 = offset + n; + if (end2 > end1) { + try out.writeAll(" "); + for (bytes[end1..end2]) |b| { + try out.writeAll(" "); + try printHexValue(out, b, 2); + } + } + const short = 16 - n; + var i: usize = 0; + while (i < short) : (i += 1) { + try out.writeAll(" "); + } + if (end2 > end1) { + try out.writeAll(" |"); + } else { + try out.writeAll(" |"); + } + try printCharValues(out, bytes[offset..end2]); + try out.writeAll("|\n"); + offset += n; + } + + try printDecValue(out, offset, 8); + try out.writeAll(":"); + try out.writeAll("\n"); +} + +fn hexDump16(out: anytype, offset: usize, bytes: []const u8) !void { + try printDecValue(out, offset, 8); + try out.writeAll(":"); + try out.writeAll(" "); + for (bytes[0..8]) |b| { + try out.writeAll(" "); + try printHexValue(out, b, 2); + } + try out.writeAll(" "); + for (bytes[8..16]) |b| { + try out.writeAll(" "); + try printHexValue(out, b, 2); + } + try out.writeAll(" |"); + try printCharValues(out, bytes); + try out.writeAll("|\n"); +} + +fn printDecValue(out: anytype, value: u64, width: u8) !void { + var buffer: [20]u8 = undefined; + const len = std.fmt.formatIntBuf(buffer[0..], value, 10, .lower, .{ .width = width, .fill = '0' }); + try out.writeAll(buffer[0..len]); +} + +fn printHexValue(out: anytype, value: u64, width: u8) !void { + var buffer: [16]u8 = undefined; + const len = std.fmt.formatIntBuf(buffer[0..], value, 16, .lower, .{ .width = width, .fill = '0' }); + try out.writeAll(buffer[0..len]); +} + +fn printCharValues(out: anytype, bytes: []const u8) !void { + for (bytes) |b| { + try out.writeAll(&[_]u8{printable_char_tab[b]}); + } +} + +fn printUnderstandableChar(out: anytype, char: u8) !void { + if (std.ascii.isPrint(char)) { + try out.print("'{c}'", .{char}); + } else { + try out.print("\\x{X:0>2}", .{char}); + } +} + +// zig fmt: off +const printable_char_tab: [256]u8 = ( + "................................ !\"#$%&'()*+,-./0123456789:;<=>?" ++ + "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~." ++ + "................................................................" ++ + "................................................................" +).*; diff --git a/lib/std/Build/CompileStep.zig b/lib/std/Build/CompileStep.zig index 879793f781..1f145f8171 100644 --- a/lib/std/Build/CompileStep.zig +++ b/lib/std/Build/CompileStep.zig @@ -83,7 +83,7 @@ max_memory: ?u64 = null, shared_memory: bool = false, global_base: ?u64 = null, c_std: std.Build.CStd, -override_lib_dir: ?[]const u8, +zig_lib_dir: ?[]const u8, main_pkg_path: ?[]const u8, exec_cmd_args: ?[]const ?[]const u8, name_prefix: []const u8, @@ -344,7 +344,7 @@ pub fn create(builder: *std.Build, options: Options) *CompileStep { .installed_headers = ArrayList(*Step).init(builder.allocator), .object_src = undefined, .c_std = std.Build.CStd.C99, - .override_lib_dir = null, + .zig_lib_dir = null, .main_pkg_path = null, .exec_cmd_args = null, .name_prefix = "", @@ -442,6 +442,26 @@ pub fn installHeader(a: *CompileStep, src_path: []const u8, dest_rel_path: []con a.installed_headers.append(&install_file.step) catch @panic("OOM"); } +pub const InstallConfigHeaderOptions = struct { + install_dir: InstallDir = .header, + dest_rel_path: ?[]const u8 = null, +}; + +pub fn installConfigHeader( + cs: *CompileStep, + config_header: *ConfigHeaderStep, + options: InstallConfigHeaderOptions, +) void { + const dest_rel_path = options.dest_rel_path orelse config_header.include_path; + const install_file = cs.builder.addInstallFileWithDir( + .{ .generated = &config_header.output_file }, + options.install_dir, + dest_rel_path, + ); + cs.builder.getInstallStep().dependOn(&install_file.step); + cs.installed_headers.append(&install_file.step) catch @panic("OOM"); +} + pub fn installHeadersDirectory( a: *CompileStep, src_dir_path: []const u8, @@ -486,26 +506,11 @@ pub fn installLibraryHeaders(a: *CompileStep, l: *CompileStep) void { a.installed_headers.appendSlice(l.installed_headers.items) catch @panic("OOM"); } -/// Creates a `RunStep` with an executable built with `addExecutable`. -/// Add command line arguments with `addArg`. +/// Deprecated: use `std.Build.addRunArtifact` +/// This function will run in the context of the package that created the executable, +/// which is undesirable when running an executable provided by a dependency package. pub fn run(exe: *CompileStep) *RunStep { - assert(exe.kind == .exe or exe.kind == .test_exe); - - // It doesn't have to be native. We catch that if you actually try to run it. - // Consider that this is declarative; the run step may not be run unless a user - // option is supplied. - const run_step = RunStep.create(exe.builder, exe.builder.fmt("run {s}", .{exe.step.name})); - run_step.addArtifactArg(exe); - - if (exe.kind == .test_exe) { - run_step.addArg(exe.builder.zig_exe); - } - - if (exe.vcpkg_bin_path) |path| { - run_step.addPathDir(path); - } - - return run_step; + return exe.builder.addRunArtifact(exe); } /// Creates an `EmulatableRunStep` with an executable built with `addExecutable`. @@ -852,7 +857,7 @@ pub fn setVerboseCC(self: *CompileStep, value: bool) void { } pub fn overrideZigLibDir(self: *CompileStep, dir_path: []const u8) void { - self.override_lib_dir = self.builder.dupePath(dir_path); + self.zig_lib_dir = self.builder.dupePath(dir_path); } pub fn setMainPkgPath(self: *CompileStep, dir_path: []const u8) void { @@ -1345,10 +1350,10 @@ fn make(step: *Step) !void { } try zig_args.append("--cache-dir"); - try zig_args.append(builder.pathFromRoot(builder.cache_root)); + try zig_args.append(builder.cache_root.path orelse "."); try zig_args.append("--global-cache-dir"); - try zig_args.append(builder.pathFromRoot(builder.global_cache_root)); + try zig_args.append(builder.global_cache_root.path orelse "."); try zig_args.append("--name"); try zig_args.append(self.name); @@ -1622,8 +1627,9 @@ fn make(step: *Step) !void { } }, .config_header_step => |config_header| { - try zig_args.append("-I"); - try zig_args.append(config_header.output_dir); + const full_file_path = config_header.output_file.path.?; + const header_dir_path = full_file_path[0 .. full_file_path.len - config_header.include_path.len]; + try zig_args.appendSlice(&.{ "-I", header_dir_path }); }, } } @@ -1697,12 +1703,12 @@ fn make(step: *Step) !void { try addFlag(&zig_args, "each-lib-rpath", self.each_lib_rpath); try addFlag(&zig_args, "build-id", self.build_id); - if (self.override_lib_dir) |dir| { + if (self.zig_lib_dir) |dir| { try zig_args.append("--zig-lib-dir"); try zig_args.append(builder.pathFromRoot(dir)); - } else if (builder.override_lib_dir) |dir| { + } else if (builder.zig_lib_dir) |dir| { try zig_args.append("--zig-lib-dir"); - try zig_args.append(builder.pathFromRoot(dir)); + try zig_args.append(dir); } if (self.main_pkg_path) |dir| { @@ -1739,23 +1745,15 @@ fn make(step: *Step) !void { args_length += arg.len + 1; // +1 to account for null terminator } if (args_length >= 30 * 1024) { - const args_dir = try fs.path.join( - builder.allocator, - &[_][]const u8{ builder.pathFromRoot("zig-cache"), "args" }, - ); - try std.fs.cwd().makePath(args_dir); - - var args_arena = std.heap.ArenaAllocator.init(builder.allocator); - defer args_arena.deinit(); + try builder.cache_root.handle.makePath("args"); const args_to_escape = zig_args.items[2..]; - var escaped_args = try ArrayList([]const u8).initCapacity(args_arena.allocator(), args_to_escape.len); - + var escaped_args = try ArrayList([]const u8).initCapacity(builder.allocator, args_to_escape.len); arg_blk: for (args_to_escape) |arg| { for (arg) |c, arg_idx| { if (c == '\\' or c == '"') { // Slow path for arguments that need to be escaped. We'll need to allocate and copy - var escaped = try ArrayList(u8).initCapacity(args_arena.allocator(), arg.len + 1); + var escaped = try ArrayList(u8).initCapacity(builder.allocator, arg.len + 1); const writer = escaped.writer(); try writer.writeAll(arg[0..arg_idx]); for (arg[arg_idx..]) |to_escape| { @@ -1783,11 +1781,16 @@ fn make(step: *Step) !void { .{std.fmt.fmtSliceHexLower(&args_hash)}, ); - const args_file = try fs.path.join(builder.allocator, &[_][]const u8{ args_dir, args_hex_hash[0..] }); - try std.fs.cwd().writeFile(args_file, args); + const args_file = "args" ++ fs.path.sep_str ++ args_hex_hash; + try builder.cache_root.handle.writeFile(args_file, args); + + const resolved_args_file = try mem.concat(builder.allocator, u8, &.{ + "@", + try builder.cache_root.join(builder.allocator, &.{args_file}), + }); zig_args.shrinkRetainingCapacity(2); - try zig_args.append(try std.mem.concat(builder.allocator, u8, &[_][]const u8{ "@", args_file })); + try zig_args.append(resolved_args_file); } const output_dir_nl = try builder.execFromStep(zig_args.items, &self.step); diff --git a/lib/std/Build/ConfigHeaderStep.zig b/lib/std/Build/ConfigHeaderStep.zig index 58a78b939d..f8d6f7bd57 100644 --- a/lib/std/Build/ConfigHeaderStep.zig +++ b/lib/std/Build/ConfigHeaderStep.zig @@ -4,13 +4,24 @@ const Step = std.Build.Step; pub const base_id: Step.Id = .config_header; -pub const Style = enum { +pub const Style = union(enum) { /// The configure format supported by autotools. It uses `#undef foo` to /// mark lines that can be substituted with different values. - autoconf, + autoconf: std.Build.FileSource, /// The configure format supported by CMake. It uses `@@FOO@@` and /// `#cmakedefine` for template substitution. - cmake, + cmake: std.Build.FileSource, + /// Instead of starting with an input file, start with nothing. + blank, + /// Start with nothing, like blank, and output a nasm .asm file. + nasm, + + pub fn getFileSource(style: Style) ?std.Build.FileSource { + switch (style) { + .autoconf, .cmake => |s| return s, + .blank, .nasm => return null, + } + } }; pub const Value = union(enum) { @@ -24,34 +35,50 @@ pub const Value = union(enum) { step: Step, builder: *std.Build, -source: std.Build.FileSource, +values: std.StringArrayHashMap(Value), +output_file: std.Build.GeneratedFile, + style: Style, -values: std.StringHashMap(Value), -max_bytes: usize = 2 * 1024 * 1024, -output_dir: []const u8, -output_basename: []const u8, +max_bytes: usize, +include_path: []const u8, + +pub const Options = struct { + style: Style = .blank, + max_bytes: usize = 2 * 1024 * 1024, + include_path: ?[]const u8 = null, +}; -pub fn create(builder: *std.Build, source: std.Build.FileSource, style: Style) *ConfigHeaderStep { +pub fn create(builder: *std.Build, options: Options) *ConfigHeaderStep { const self = builder.allocator.create(ConfigHeaderStep) catch @panic("OOM"); - const name = builder.fmt("configure header {s}", .{source.getDisplayName()}); + const name = if (options.style.getFileSource()) |s| + builder.fmt("configure {s} header {s}", .{ @tagName(options.style), s.getDisplayName() }) + else + builder.fmt("configure {s} header", .{@tagName(options.style)}); self.* = .{ .builder = builder, .step = Step.init(base_id, name, builder.allocator, make), - .source = source, - .style = style, - .values = std.StringHashMap(Value).init(builder.allocator), - .output_dir = undefined, - .output_basename = "config.h", + .style = options.style, + .values = std.StringArrayHashMap(Value).init(builder.allocator), + + .max_bytes = options.max_bytes, + .include_path = "config.h", + .output_file = .{ .step = &self.step }, }; - switch (source) { + + if (options.style.getFileSource()) |s| switch (s) { .path => |p| { const basename = std.fs.path.basename(p); if (std.mem.endsWith(u8, basename, ".h.in")) { - self.output_basename = basename[0 .. basename.len - 3]; + self.include_path = basename[0 .. basename.len - 3]; } }, else => {}, + }; + + if (options.include_path) |include_path| { + self.include_path = include_path; } + return self; } @@ -59,6 +86,10 @@ pub fn addValues(self: *ConfigHeaderStep, values: anytype) void { return addValuesInner(self, values) catch @panic("OOM"); } +pub fn getFileSource(self: *ConfigHeaderStep) std.Build.FileSource { + return .{ .generated = &self.output_file }; +} + fn addValuesInner(self: *ConfigHeaderStep, values: anytype) !void { inline for (@typeInfo(@TypeOf(values)).Struct.fields) |field| { try putValue(self, field.name, field.type, @field(values, field.name)); @@ -100,6 +131,12 @@ fn putValue(self: *ConfigHeaderStep, field_name: []const u8, comptime T: type, v return; } }, + .Int => { + if (ptr.size == .Slice and ptr.child == u8) { + try self.values.put(field_name, .{ .string = v }); + return; + } + }, else => {}, } @@ -112,8 +149,6 @@ fn putValue(self: *ConfigHeaderStep, field_name: []const u8, comptime T: type, v fn make(step: *Step) !void { const self = @fieldParentPtr(ConfigHeaderStep, "step", step); const gpa = self.builder.allocator; - const src_path = self.source.getPath(self.builder); - const contents = try std.fs.cwd().readFileAlloc(gpa, src_path, self.max_bytes); // The cache is used here not really as a way to speed things up - because writing // the data to a file would probably be very fast - but as a way to find a canonical @@ -130,9 +165,39 @@ fn make(step: *Step) !void { // Random bytes to make ConfigHeaderStep unique. Refresh this with new // random bytes when ConfigHeaderStep implementation is modified in a // non-backwards-compatible way. - var hash = Hasher.init("X1pQzdDt91Zlh7Eh"); - hash.update(self.source.getDisplayName()); - hash.update(contents); + var hash = Hasher.init("PGuDTpidxyMqnkGM"); + + var output = std.ArrayList(u8).init(gpa); + defer output.deinit(); + + const header_text = "This file was generated by ConfigHeaderStep using the Zig Build System."; + const c_generated_line = "/* " ++ header_text ++ " */\n"; + const asm_generated_line = "; " ++ header_text ++ "\n"; + + switch (self.style) { + .autoconf => |file_source| { + try output.appendSlice(c_generated_line); + const src_path = file_source.getPath(self.builder); + const contents = try std.fs.cwd().readFileAlloc(gpa, src_path, self.max_bytes); + try render_autoconf(contents, &output, self.values, src_path); + }, + .cmake => |file_source| { + try output.appendSlice(c_generated_line); + const src_path = file_source.getPath(self.builder); + const contents = try std.fs.cwd().readFileAlloc(gpa, src_path, self.max_bytes); + try render_cmake(contents, &output, self.values, src_path); + }, + .blank => { + try output.appendSlice(c_generated_line); + try render_blank(&output, self.values, self.include_path); + }, + .nasm => { + try output.appendSlice(asm_generated_line); + try render_nasm(&output, self.values); + }, + } + + hash.update(output.items); var digest: [16]u8 = undefined; hash.final(&digest); @@ -143,38 +208,40 @@ fn make(step: *Step) !void { .{std.fmt.fmtSliceHexLower(&digest)}, ) catch unreachable; - self.output_dir = try std.fs.path.join(gpa, &[_][]const u8{ - self.builder.cache_root, "o", &hash_basename, - }); - var dir = std.fs.cwd().makeOpenPath(self.output_dir, .{}) catch |err| { - std.debug.print("unable to make path {s}: {s}\n", .{ self.output_dir, @errorName(err) }); + const output_dir = try self.builder.cache_root.join(gpa, &.{ "o", &hash_basename }); + + // If output_path has directory parts, deal with them. Example: + // output_dir is zig-cache/o/HASH + // output_path is libavutil/avconfig.h + // We want to open directory zig-cache/o/HASH/libavutil/ + // but keep output_dir as zig-cache/o/HASH for -I include + const sub_dir_path = if (std.fs.path.dirname(self.include_path)) |d| + try std.fs.path.join(gpa, &.{ output_dir, d }) + else + output_dir; + + var dir = std.fs.cwd().makeOpenPath(sub_dir_path, .{}) catch |err| { + std.debug.print("unable to make path {s}: {s}\n", .{ output_dir, @errorName(err) }); return err; }; defer dir.close(); - var values_copy = try self.values.clone(); - defer values_copy.deinit(); - - var output = std.ArrayList(u8).init(gpa); - defer output.deinit(); - try output.ensureTotalCapacity(contents.len); - - try output.appendSlice("/* This file was generated by ConfigHeaderStep using the Zig Build System. */\n"); + try dir.writeFile(std.fs.path.basename(self.include_path), output.items); - switch (self.style) { - .autoconf => try render_autoconf(contents, &output, &values_copy, src_path), - .cmake => try render_cmake(contents, &output, &values_copy, src_path), - } - - try dir.writeFile(self.output_basename, output.items); + self.output_file.path = try std.fs.path.join(self.builder.allocator, &.{ + output_dir, self.include_path, + }); } fn render_autoconf( contents: []const u8, output: *std.ArrayList(u8), - values_copy: *std.StringHashMap(Value), + values: std.StringArrayHashMap(Value), src_path: []const u8, ) !void { + var values_copy = try values.clone(); + defer values_copy.deinit(); + var any_errors = false; var line_index: u32 = 0; var line_it = std.mem.split(u8, contents, "\n"); @@ -192,22 +259,18 @@ fn render_autoconf( continue; } const name = it.rest(); - const kv = values_copy.fetchRemove(name) orelse { + const kv = values_copy.fetchSwapRemove(name) orelse { std.debug.print("{s}:{d}: error: unspecified config header value: '{s}'\n", .{ src_path, line_index + 1, name, }); any_errors = true; continue; }; - try renderValue(output, name, kv.value); + try renderValueC(output, name, kv.value); } - { - var it = values_copy.iterator(); - while (it.next()) |entry| { - const name = entry.key_ptr.*; - std.debug.print("{s}: error: config header value unused: '{s}'\n", .{ src_path, name }); - } + for (values_copy.keys()) |name| { + std.debug.print("{s}: error: config header value unused: '{s}'\n", .{ src_path, name }); } if (any_errors) { @@ -218,9 +281,12 @@ fn render_autoconf( fn render_cmake( contents: []const u8, output: *std.ArrayList(u8), - values_copy: *std.StringHashMap(Value), + values: std.StringArrayHashMap(Value), src_path: []const u8, ) !void { + var values_copy = try values.clone(); + defer values_copy.deinit(); + var any_errors = false; var line_index: u32 = 0; var line_it = std.mem.split(u8, contents, "\n"); @@ -244,22 +310,18 @@ fn render_cmake( any_errors = true; continue; }; - const kv = values_copy.fetchRemove(name) orelse { + const kv = values_copy.fetchSwapRemove(name) orelse { std.debug.print("{s}:{d}: error: unspecified config header value: '{s}'\n", .{ src_path, line_index + 1, name, }); any_errors = true; continue; }; - try renderValue(output, name, kv.value); + try renderValueC(output, name, kv.value); } - { - var it = values_copy.iterator(); - while (it.next()) |entry| { - const name = entry.key_ptr.*; - std.debug.print("{s}: error: config header value unused: '{s}'\n", .{ src_path, name }); - } + for (values_copy.keys()) |name| { + std.debug.print("{s}: error: config header value unused: '{s}'\n", .{ src_path, name }); } if (any_errors) { @@ -267,7 +329,44 @@ fn render_cmake( } } -fn renderValue(output: *std.ArrayList(u8), name: []const u8, value: Value) !void { +fn render_blank( + output: *std.ArrayList(u8), + defines: std.StringArrayHashMap(Value), + include_path: []const u8, +) !void { + const include_guard_name = try output.allocator.dupe(u8, include_path); + for (include_guard_name) |*byte| { + switch (byte.*) { + 'a'...'z' => byte.* = byte.* - 'a' + 'A', + 'A'...'Z', '0'...'9' => continue, + else => byte.* = '_', + } + } + + try output.appendSlice("#ifndef "); + try output.appendSlice(include_guard_name); + try output.appendSlice("\n#define "); + try output.appendSlice(include_guard_name); + try output.appendSlice("\n"); + + const values = defines.values(); + for (defines.keys()) |name, i| { + try renderValueC(output, name, values[i]); + } + + try output.appendSlice("#endif /* "); + try output.appendSlice(include_guard_name); + try output.appendSlice(" */\n"); +} + +fn render_nasm(output: *std.ArrayList(u8), defines: std.StringArrayHashMap(Value)) !void { + const values = defines.values(); + for (defines.keys()) |name, i| { + try renderValueNasm(output, name, values[i]); + } +} + +fn renderValueC(output: *std.ArrayList(u8), name: []const u8, value: Value) !void { switch (value) { .undef => { try output.appendSlice("/* #undef "); @@ -297,3 +396,33 @@ fn renderValue(output: *std.ArrayList(u8), name: []const u8, value: Value) !void }, } } + +fn renderValueNasm(output: *std.ArrayList(u8), name: []const u8, value: Value) !void { + switch (value) { + .undef => { + try output.appendSlice("; %undef "); + try output.appendSlice(name); + try output.appendSlice("\n"); + }, + .defined => { + try output.appendSlice("%define "); + try output.appendSlice(name); + try output.appendSlice("\n"); + }, + .boolean => |b| { + try output.appendSlice("%define "); + try output.appendSlice(name); + try output.appendSlice(if (b) " 1\n" else " 0\n"); + }, + .int => |i| { + try output.writer().print("%define {s} {d}\n", .{ name, i }); + }, + .ident => |ident| { + try output.writer().print("%define {s} {s}\n", .{ name, ident }); + }, + .string => |string| { + // TODO: use nasm-specific escaping instead of zig string literals + try output.writer().print("%define {s} \"{}\"\n", .{ name, std.zig.fmtEscapes(string) }); + }, + } +} diff --git a/lib/std/Build/OptionsStep.zig b/lib/std/Build/OptionsStep.zig index 8a50456539..e5c3e23821 100644 --- a/lib/std/Build/OptionsStep.zig +++ b/lib/std/Build/OptionsStep.zig @@ -234,26 +234,20 @@ fn make(step: *Step) !void { ); } - const options_directory = self.builder.pathFromRoot( - try fs.path.join( - self.builder.allocator, - &[_][]const u8{ self.builder.cache_root, "options" }, - ), - ); - - try fs.cwd().makePath(options_directory); + var options_dir = try self.builder.cache_root.handle.makeOpenPath("options", .{}); + defer options_dir.close(); - const options_file = try fs.path.join( - self.builder.allocator, - &[_][]const u8{ options_directory, &self.hashContentsToFileName() }, - ); + const basename = self.hashContentsToFileName(); - try fs.cwd().writeFile(options_file, self.contents.items); + try options_dir.writeFile(&basename, self.contents.items); - self.generated_file.path = options_file; + self.generated_file.path = try self.builder.cache_root.join(self.builder.allocator, &.{ + "options", &basename, + }); } fn hashContentsToFileName(self: *OptionsStep) [64]u8 { + // TODO update to use the cache system instead of this // This implementation is copied from `WriteFileStep.make` var hash = std.crypto.hash.blake2.Blake2b384.init(.{}); @@ -289,13 +283,19 @@ test "OptionsStep" { const host = try std.zig.system.NativeTargetInfo.detect(.{}); + var cache: std.Build.Cache = .{ + .gpa = arena.allocator(), + .manifest_dir = std.fs.cwd(), + }; + var builder = try std.Build.create( arena.allocator(), "test", - "test", - "test", - "test", + .{ .path = "test", .handle = std.fs.cwd() }, + .{ .path = "test", .handle = std.fs.cwd() }, + .{ .path = "test", .handle = std.fs.cwd() }, host, + &cache, ); defer builder.destroy(); diff --git a/lib/std/Build/RunStep.zig b/lib/std/Build/RunStep.zig index 07f2363623..5bc271409a 100644 --- a/lib/std/Build/RunStep.zig +++ b/lib/std/Build/RunStep.zig @@ -39,6 +39,14 @@ expected_exit_code: ?u8 = 0, /// Print the command before running it print: bool, +/// Controls whether execution is skipped if the output file is up-to-date. +/// The default is to always run if there is no output file, and to skip +/// running if all output files are up-to-date. +condition: enum { output_outdated, always } = .output_outdated, + +/// Additional file paths relative to build.zig that, when modified, indicate +/// that the RunStep should be re-executed. +extra_file_dependencies: []const []const u8 = &.{}, pub const StdIoAction = union(enum) { inherit, @@ -51,6 +59,12 @@ pub const Arg = union(enum) { artifact: *CompileStep, file_source: std.Build.FileSource, bytes: []u8, + output: Output, + + pub const Output = struct { + generated_file: *std.Build.GeneratedFile, + basename: []const u8, + }; }; pub fn create(builder: *std.Build, name: []const u8) *RunStep { @@ -71,6 +85,20 @@ pub fn addArtifactArg(self: *RunStep, artifact: *CompileStep) void { self.step.dependOn(&artifact.step); } +/// This provides file path as a command line argument to the command being +/// run, and returns a FileSource which can be used as inputs to other APIs +/// throughout the build system. +pub fn addOutputFileArg(rs: *RunStep, basename: []const u8) std.Build.FileSource { + const generated_file = rs.builder.allocator.create(std.Build.GeneratedFile) catch @panic("OOM"); + generated_file.* = .{ .step = &rs.step }; + rs.argv.append(.{ .output = .{ + .generated_file = generated_file, + .basename = rs.builder.dupe(basename), + } }) catch @panic("OOM"); + + return .{ .generated = generated_file }; +} + pub fn addFileSourceArg(self: *RunStep, file_source: std.Build.FileSource) void { self.argv.append(Arg{ .file_source = file_source.dupe(self.builder), @@ -159,25 +187,105 @@ fn stdIoActionToBehavior(action: StdIoAction) std.ChildProcess.StdIo { }; } +fn needOutputCheck(self: RunStep) bool { + if (self.extra_file_dependencies.len > 0) return true; + + for (self.argv.items) |arg| switch (arg) { + .output => return true, + else => continue, + }; + + return switch (self.condition) { + .always => false, + .output_outdated => true, + }; +} + fn make(step: *Step) !void { const self = @fieldParentPtr(RunStep, "step", step); + const need_output_check = self.needOutputCheck(); var argv_list = ArrayList([]const u8).init(self.builder.allocator); + var output_placeholders = ArrayList(struct { + index: usize, + output: Arg.Output, + }).init(self.builder.allocator); + + var man = self.builder.cache.obtain(); + defer man.deinit(); + for (self.argv.items) |arg| { switch (arg) { - .bytes => |bytes| try argv_list.append(bytes), - .file_source => |file| try argv_list.append(file.getPath(self.builder)), + .bytes => |bytes| { + try argv_list.append(bytes); + man.hash.addBytes(bytes); + }, + .file_source => |file| { + const file_path = file.getPath(self.builder); + try argv_list.append(file_path); + _ = try man.addFile(file_path, null); + }, .artifact => |artifact| { if (artifact.target.isWindows()) { // On Windows we don't have rpaths so we have to add .dll search paths to PATH self.addPathForDynLibs(artifact); } - const executable_path = artifact.installed_path orelse artifact.getOutputSource().getPath(self.builder); - try argv_list.append(executable_path); + const file_path = artifact.installed_path orelse + artifact.getOutputSource().getPath(self.builder); + + try argv_list.append(file_path); + + _ = try man.addFile(file_path, null); + }, + .output => |output| { + man.hash.addBytes(output.basename); + // Add a placeholder into the argument list because we need the + // manifest hash to be updated with all arguments before the + // object directory is computed. + try argv_list.append(""); + try output_placeholders.append(.{ + .index = argv_list.items.len - 1, + .output = output, + }); }, } } + if (need_output_check) { + for (self.extra_file_dependencies) |file_path| { + _ = try man.addFile(self.builder.pathFromRoot(file_path), null); + } + + if (man.hit() catch |err| failWithCacheError(man, err)) { + // cache hit, skip running command + const digest = man.final(); + for (output_placeholders.items) |placeholder| { + placeholder.output.generated_file.path = try self.builder.cache_root.join( + self.builder.allocator, + &.{ "o", &digest, placeholder.output.basename }, + ); + } + return; + } + + const digest = man.final(); + + for (output_placeholders.items) |placeholder| { + const output_path = try self.builder.cache_root.join( + self.builder.allocator, + &.{ "o", &digest, placeholder.output.basename }, + ); + const output_dir = fs.path.dirname(output_path).?; + fs.cwd().makePath(output_dir) catch |err| { + std.debug.print("unable to make path {s}: {s}\n", .{ output_dir, @errorName(err) }); + return err; + }; + + placeholder.output.generated_file.path = output_path; + argv_list.items[placeholder.index] = output_path; + } + } + try runCommand( argv_list.items, self.builder, @@ -189,6 +297,10 @@ fn make(step: *Step) !void { self.cwd, self.print, ); + + if (need_output_check) { + try man.writeManifest(); + } } pub fn runCommand( @@ -202,11 +314,13 @@ pub fn runCommand( maybe_cwd: ?[]const u8, print: bool, ) !void { - const cwd = if (maybe_cwd) |cwd| builder.pathFromRoot(cwd) else builder.build_root; + const cwd = if (maybe_cwd) |cwd| builder.pathFromRoot(cwd) else builder.build_root.path; if (!std.process.can_spawn) { const cmd = try std.mem.join(builder.allocator, " ", argv); - std.debug.print("the following command cannot be executed ({s} does not support spawning a child process):\n{s}", .{ @tagName(builtin.os.tag), cmd }); + std.debug.print("the following command cannot be executed ({s} does not support spawning a child process):\n{s}", .{ + @tagName(builtin.os.tag), cmd, + }); builder.allocator.free(cmd); return ExecError.ExecNotSupported; } @@ -347,6 +461,19 @@ pub fn runCommand( } } +fn failWithCacheError(man: std.Build.Cache.Manifest, err: anyerror) noreturn { + const i = man.failed_file_index orelse failWithSimpleError(err); + const pp = man.files.items[i].prefixed_path orelse failWithSimpleError(err); + const prefix = man.cache.prefixes()[pp.prefix].path orelse ""; + std.debug.print("{s}: {s}/{s}\n", .{ @errorName(err), prefix, pp.sub_path }); + std.process.exit(1); +} + +fn failWithSimpleError(err: anyerror) noreturn { + std.debug.print("{s}\n", .{@errorName(err)}); + std.process.exit(1); +} + fn printCmd(cwd: ?[]const u8, argv: []const []const u8) void { if (cwd) |yes_cwd| std.debug.print("cd {s} && ", .{yes_cwd}); for (argv) |arg| { diff --git a/lib/std/Build/TranslateCStep.zig b/lib/std/Build/TranslateCStep.zig index d9874142d8..fb0adfd0ae 100644 --- a/lib/std/Build/TranslateCStep.zig +++ b/lib/std/Build/TranslateCStep.zig @@ -15,7 +15,6 @@ builder: *std.Build, source: std.Build.FileSource, include_dirs: std.ArrayList([]const u8), c_macros: std.ArrayList([]const u8), -output_dir: ?[]const u8, out_basename: []const u8, target: CrossTarget, optimize: std.builtin.OptimizeMode, @@ -36,7 +35,6 @@ pub fn create(builder: *std.Build, options: Options) *TranslateCStep { .source = source, .include_dirs = std.ArrayList([]const u8).init(builder.allocator), .c_macros = std.ArrayList([]const u8).init(builder.allocator), - .output_dir = null, .out_basename = undefined, .target = options.target, .optimize = options.optimize, @@ -122,15 +120,10 @@ fn make(step: *Step) !void { const output_path = mem.trimRight(u8, output_path_nl, "\r\n"); self.out_basename = fs.path.basename(output_path); - if (self.output_dir) |output_dir| { - const full_dest = try fs.path.join(self.builder.allocator, &[_][]const u8{ output_dir, self.out_basename }); - try self.builder.updateFile(output_path, full_dest); - } else { - self.output_dir = fs.path.dirname(output_path).?; - } + const output_dir = fs.path.dirname(output_path).?; self.output_file.path = try fs.path.join( self.builder.allocator, - &[_][]const u8{ self.output_dir.?, self.out_basename }, + &[_][]const u8{ output_dir, self.out_basename }, ); } diff --git a/lib/std/Build/WriteFileStep.zig b/lib/std/Build/WriteFileStep.zig index 9e8fcdc203..1621295ad8 100644 --- a/lib/std/Build/WriteFileStep.zig +++ b/lib/std/Build/WriteFileStep.zig @@ -9,7 +9,6 @@ pub const base_id = .write_file; step: Step, builder: *std.Build, -output_dir: []const u8, files: std.TailQueue(File), pub const File = struct { @@ -23,7 +22,6 @@ pub fn init(builder: *std.Build) WriteFileStep { .builder = builder, .step = Step.init(.write_file, "writefile", builder.allocator, make), .files = .{}, - .output_dir = undefined, }; } @@ -87,11 +85,11 @@ fn make(step: *Step) !void { .{std.fmt.fmtSliceHexLower(&digest)}, ) catch unreachable; - self.output_dir = try fs.path.join(self.builder.allocator, &[_][]const u8{ - self.builder.cache_root, "o", &hash_basename, + const output_dir = try self.builder.cache_root.join(self.builder.allocator, &.{ + "o", &hash_basename, }); - var dir = fs.cwd().makeOpenPath(self.output_dir, .{}) catch |err| { - std.debug.print("unable to make path {s}: {s}\n", .{ self.output_dir, @errorName(err) }); + var dir = fs.cwd().makeOpenPath(output_dir, .{}) catch |err| { + std.debug.print("unable to make path {s}: {s}\n", .{ output_dir, @errorName(err) }); return err; }; defer dir.close(); @@ -101,14 +99,14 @@ fn make(step: *Step) !void { dir.writeFile(node.data.basename, node.data.bytes) catch |err| { std.debug.print("unable to write {s} into {s}: {s}\n", .{ node.data.basename, - self.output_dir, + output_dir, @errorName(err), }); return err; }; node.data.source.path = try fs.path.join( self.builder.allocator, - &[_][]const u8{ self.output_dir, node.data.basename }, + &[_][]const u8{ output_dir, node.data.basename }, ); } } diff --git a/lib/std/c.zig b/lib/std/c.zig index 57d5beae56..1334b2f2c1 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -173,6 +173,7 @@ pub extern "c" fn readlink(noalias path: [*:0]const u8, noalias buf: [*]u8, bufs pub extern "c" fn readlinkat(dirfd: c.fd_t, noalias path: [*:0]const u8, noalias buf: [*]u8, bufsize: usize) isize; pub extern "c" fn fchmod(fd: c.fd_t, mode: c.mode_t) c_int; pub extern "c" fn fchown(fd: c.fd_t, owner: c.uid_t, group: c.gid_t) c_int; +pub extern "c" fn umask(mode: c.mode_t) c.mode_t; pub extern "c" fn rmdir(path: [*:0]const u8) c_int; pub extern "c" fn getenv(name: [*:0]const u8) ?[*:0]u8; diff --git a/lib/std/child_process.zig b/lib/std/child_process.zig index 21d7b4fe3e..003e37d76b 100644 --- a/lib/std/child_process.zig +++ b/lib/std/child_process.zig @@ -1164,7 +1164,7 @@ fn windowsCreateProcessPathExt( var app_name_unicode_string = windows.UNICODE_STRING{ .Length = app_name_len_bytes, .MaximumLength = app_name_len_bytes, - .Buffer = @qualCast([*:0]u16, app_name_wildcard.ptr), + .Buffer = @constCast(app_name_wildcard.ptr), }; const rc = windows.ntdll.NtQueryDirectoryFile( dir.fd, @@ -1261,7 +1261,7 @@ fn windowsCreateProcessPathExt( var app_name_unicode_string = windows.UNICODE_STRING{ .Length = app_name_len_bytes, .MaximumLength = app_name_len_bytes, - .Buffer = @qualCast([*:0]u16, app_name_appended.ptr), + .Buffer = @constCast(app_name_appended.ptr), }; // Re-use the directory handle but this time we call with the appended app name diff --git a/lib/std/compress.zig b/lib/std/compress.zig index 334d7bfcb8..9af1b30259 100644 --- a/lib/std/compress.zig +++ b/lib/std/compress.zig @@ -2,8 +2,10 @@ const std = @import("std.zig"); pub const deflate = @import("compress/deflate.zig"); pub const gzip = @import("compress/gzip.zig"); -pub const zlib = @import("compress/zlib.zig"); +pub const lzma = @import("compress/lzma.zig"); +pub const lzma2 = @import("compress/lzma2.zig"); pub const xz = @import("compress/xz.zig"); +pub const zlib = @import("compress/zlib.zig"); pub fn HashedReader( comptime ReaderType: anytype, @@ -38,6 +40,8 @@ pub fn hashedReader( test { _ = deflate; _ = gzip; - _ = zlib; + _ = lzma; + _ = lzma2; _ = xz; + _ = zlib; } diff --git a/lib/std/compress/lzma.zig b/lib/std/compress/lzma.zig new file mode 100644 index 0000000000..8bb8c19da1 --- /dev/null +++ b/lib/std/compress/lzma.zig @@ -0,0 +1,90 @@ +const std = @import("../std.zig"); +const math = std.math; +const mem = std.mem; +const Allocator = std.mem.Allocator; + +pub const decode = @import("lzma/decode.zig"); + +pub fn decompress( + allocator: Allocator, + reader: anytype, +) !Decompress(@TypeOf(reader)) { + return decompressWithOptions(allocator, reader, .{}); +} + +pub fn decompressWithOptions( + allocator: Allocator, + reader: anytype, + options: decode.Options, +) !Decompress(@TypeOf(reader)) { + const params = try decode.Params.readHeader(reader, options); + return Decompress(@TypeOf(reader)).init(allocator, reader, params, options.memlimit); +} + +pub fn Decompress(comptime ReaderType: type) type { + return struct { + const Self = @This(); + + pub const Error = + ReaderType.Error || + Allocator.Error || + error{ CorruptInput, EndOfStream, Overflow }; + + pub const Reader = std.io.Reader(*Self, Error, read); + + allocator: Allocator, + in_reader: ReaderType, + to_read: std.ArrayListUnmanaged(u8), + + buffer: decode.lzbuffer.LzCircularBuffer, + decoder: decode.rangecoder.RangeDecoder, + state: decode.DecoderState, + + pub fn init(allocator: Allocator, source: ReaderType, params: decode.Params, memlimit: ?usize) !Self { + return Self{ + .allocator = allocator, + .in_reader = source, + .to_read = .{}, + + .buffer = decode.lzbuffer.LzCircularBuffer.init(params.dict_size, memlimit orelse math.maxInt(usize)), + .decoder = try decode.rangecoder.RangeDecoder.init(source), + .state = try decode.DecoderState.init(allocator, params.properties, params.unpacked_size), + }; + } + + pub fn reader(self: *Self) Reader { + return .{ .context = self }; + } + + pub fn deinit(self: *Self) void { + self.to_read.deinit(self.allocator); + self.buffer.deinit(self.allocator); + self.state.deinit(self.allocator); + self.* = undefined; + } + + pub fn read(self: *Self, output: []u8) Error!usize { + const writer = self.to_read.writer(self.allocator); + while (self.to_read.items.len < output.len) { + switch (try self.state.process(self.allocator, self.in_reader, writer, &self.buffer, &self.decoder)) { + .continue_ => {}, + .finished => { + try self.buffer.finish(writer); + break; + }, + } + } + const input = self.to_read.items; + const n = math.min(input.len, output.len); + mem.copy(u8, output[0..n], input[0..n]); + mem.copy(u8, input, input[n..]); + self.to_read.shrinkRetainingCapacity(input.len - n); + return n; + } + }; +} + +test { + _ = @import("lzma/test.zig"); + _ = @import("lzma/vec2d.zig"); +} diff --git a/lib/std/compress/lzma/decode.zig b/lib/std/compress/lzma/decode.zig new file mode 100644 index 0000000000..6c9a3ae862 --- /dev/null +++ b/lib/std/compress/lzma/decode.zig @@ -0,0 +1,379 @@ +const std = @import("../../std.zig"); +const assert = std.debug.assert; +const math = std.math; +const Allocator = std.mem.Allocator; + +pub const lzbuffer = @import("decode/lzbuffer.zig"); +pub const rangecoder = @import("decode/rangecoder.zig"); + +const LzCircularBuffer = lzbuffer.LzCircularBuffer; +const BitTree = rangecoder.BitTree; +const LenDecoder = rangecoder.LenDecoder; +const RangeDecoder = rangecoder.RangeDecoder; +const Vec2D = @import("vec2d.zig").Vec2D; + +pub const Options = struct { + unpacked_size: UnpackedSize = .read_from_header, + memlimit: ?usize = null, + allow_incomplete: bool = false, +}; + +pub const UnpackedSize = union(enum) { + read_from_header, + read_header_but_use_provided: ?u64, + use_provided: ?u64, +}; + +const ProcessingStatus = enum { + continue_, + finished, +}; + +pub const Properties = struct { + lc: u4, + lp: u3, + pb: u3, + + fn validate(self: Properties) void { + assert(self.lc <= 8); + assert(self.lp <= 4); + assert(self.pb <= 4); + } +}; + +pub const Params = struct { + properties: Properties, + dict_size: u32, + unpacked_size: ?u64, + + pub fn readHeader(reader: anytype, options: Options) !Params { + var props = try reader.readByte(); + if (props >= 225) { + return error.CorruptInput; + } + + const lc = @intCast(u4, props % 9); + props /= 9; + const lp = @intCast(u3, props % 5); + props /= 5; + const pb = @intCast(u3, props); + + const dict_size_provided = try reader.readIntLittle(u32); + const dict_size = math.max(0x1000, dict_size_provided); + + const unpacked_size = switch (options.unpacked_size) { + .read_from_header => blk: { + const unpacked_size_provided = try reader.readIntLittle(u64); + const marker_mandatory = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF; + break :blk if (marker_mandatory) + null + else + unpacked_size_provided; + }, + .read_header_but_use_provided => |x| blk: { + _ = try reader.readIntLittle(u64); + break :blk x; + }, + .use_provided => |x| x, + }; + + return Params{ + .properties = Properties{ .lc = lc, .lp = lp, .pb = pb }, + .dict_size = dict_size, + .unpacked_size = unpacked_size, + }; + } +}; + +pub const DecoderState = struct { + lzma_props: Properties, + unpacked_size: ?u64, + literal_probs: Vec2D(u16), + pos_slot_decoder: [4]BitTree(6), + align_decoder: BitTree(4), + pos_decoders: [115]u16, + is_match: [192]u16, + is_rep: [12]u16, + is_rep_g0: [12]u16, + is_rep_g1: [12]u16, + is_rep_g2: [12]u16, + is_rep_0long: [192]u16, + state: usize, + rep: [4]usize, + len_decoder: LenDecoder, + rep_len_decoder: LenDecoder, + + pub fn init( + allocator: Allocator, + lzma_props: Properties, + unpacked_size: ?u64, + ) !DecoderState { + return .{ + .lzma_props = lzma_props, + .unpacked_size = unpacked_size, + .literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (lzma_props.lc + lzma_props.lp), 0x300 }), + .pos_slot_decoder = .{.{}} ** 4, + .align_decoder = .{}, + .pos_decoders = .{0x400} ** 115, + .is_match = .{0x400} ** 192, + .is_rep = .{0x400} ** 12, + .is_rep_g0 = .{0x400} ** 12, + .is_rep_g1 = .{0x400} ** 12, + .is_rep_g2 = .{0x400} ** 12, + .is_rep_0long = .{0x400} ** 192, + .state = 0, + .rep = .{0} ** 4, + .len_decoder = .{}, + .rep_len_decoder = .{}, + }; + } + + pub fn deinit(self: *DecoderState, allocator: Allocator) void { + self.literal_probs.deinit(allocator); + self.* = undefined; + } + + pub fn resetState(self: *DecoderState, allocator: Allocator, new_props: Properties) !void { + new_props.validate(); + if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) { + self.literal_probs.fill(0x400); + } else { + self.literal_probs.deinit(allocator); + self.literal_probs = try Vec2D(u16).init(allocator, 0x400, .{ @as(usize, 1) << (new_props.lc + new_props.lp), 0x300 }); + } + + self.lzma_props = new_props; + for (self.pos_slot_decoder) |*t| t.reset(); + self.align_decoder.reset(); + self.pos_decoders = .{0x400} ** 115; + self.is_match = .{0x400} ** 192; + self.is_rep = .{0x400} ** 12; + self.is_rep_g0 = .{0x400} ** 12; + self.is_rep_g1 = .{0x400} ** 12; + self.is_rep_g2 = .{0x400} ** 12; + self.is_rep_0long = .{0x400} ** 192; + self.state = 0; + self.rep = .{0} ** 4; + self.len_decoder.reset(); + self.rep_len_decoder.reset(); + } + + fn processNextInner( + self: *DecoderState, + allocator: Allocator, + reader: anytype, + writer: anytype, + buffer: anytype, + decoder: *RangeDecoder, + update: bool, + ) !ProcessingStatus { + const pos_state = buffer.len & ((@as(usize, 1) << self.lzma_props.pb) - 1); + + if (!try decoder.decodeBit( + reader, + &self.is_match[(self.state << 4) + pos_state], + update, + )) { + const byte: u8 = try self.decodeLiteral(reader, buffer, decoder, update); + + if (update) { + try buffer.appendLiteral(allocator, byte, writer); + + self.state = if (self.state < 4) + 0 + else if (self.state < 10) + self.state - 3 + else + self.state - 6; + } + return .continue_; + } + + var len: usize = undefined; + if (try decoder.decodeBit(reader, &self.is_rep[self.state], update)) { + if (!try decoder.decodeBit(reader, &self.is_rep_g0[self.state], update)) { + if (!try decoder.decodeBit( + reader, + &self.is_rep_0long[(self.state << 4) + pos_state], + update, + )) { + if (update) { + self.state = if (self.state < 7) 9 else 11; + const dist = self.rep[0] + 1; + try buffer.appendLz(allocator, 1, dist, writer); + } + return .continue_; + } + } else { + const idx: usize = if (!try decoder.decodeBit(reader, &self.is_rep_g1[self.state], update)) + 1 + else if (!try decoder.decodeBit(reader, &self.is_rep_g2[self.state], update)) + 2 + else + 3; + if (update) { + const dist = self.rep[idx]; + var i = idx; + while (i > 0) : (i -= 1) { + self.rep[i] = self.rep[i - 1]; + } + self.rep[0] = dist; + } + } + + len = try self.rep_len_decoder.decode(reader, decoder, pos_state, update); + + if (update) { + self.state = if (self.state < 7) 8 else 11; + } + } else { + if (update) { + self.rep[3] = self.rep[2]; + self.rep[2] = self.rep[1]; + self.rep[1] = self.rep[0]; + } + + len = try self.len_decoder.decode(reader, decoder, pos_state, update); + + if (update) { + self.state = if (self.state < 7) 7 else 10; + } + + const rep_0 = try self.decodeDistance(reader, decoder, len, update); + + if (update) { + self.rep[0] = rep_0; + if (self.rep[0] == 0xFFFF_FFFF) { + if (decoder.isFinished()) { + return .finished; + } + return error.CorruptInput; + } + } + } + + if (update) { + len += 2; + + const dist = self.rep[0] + 1; + try buffer.appendLz(allocator, len, dist, writer); + } + + return .continue_; + } + + fn processNext( + self: *DecoderState, + allocator: Allocator, + reader: anytype, + writer: anytype, + buffer: anytype, + decoder: *RangeDecoder, + ) !ProcessingStatus { + return self.processNextInner(allocator, reader, writer, buffer, decoder, true); + } + + pub fn process( + self: *DecoderState, + allocator: Allocator, + reader: anytype, + writer: anytype, + buffer: anytype, + decoder: *RangeDecoder, + ) !ProcessingStatus { + process_next: { + if (self.unpacked_size) |unpacked_size| { + if (buffer.len >= unpacked_size) { + break :process_next; + } + } else if (decoder.isFinished()) { + break :process_next; + } + + switch (try self.processNext(allocator, reader, writer, buffer, decoder)) { + .continue_ => return .continue_, + .finished => break :process_next, + } + } + + if (self.unpacked_size) |unpacked_size| { + if (buffer.len != unpacked_size) { + return error.CorruptInput; + } + } + + return .finished; + } + + fn decodeLiteral( + self: *DecoderState, + reader: anytype, + buffer: anytype, + decoder: *RangeDecoder, + update: bool, + ) !u8 { + const def_prev_byte = 0; + const prev_byte = @as(usize, buffer.lastOr(def_prev_byte)); + + var result: usize = 1; + const lit_state = ((buffer.len & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) + + (prev_byte >> (8 - self.lzma_props.lc)); + const probs = try self.literal_probs.getMut(lit_state); + + if (self.state >= 7) { + var match_byte = @as(usize, try buffer.lastN(self.rep[0] + 1)); + + while (result < 0x100) { + const match_bit = (match_byte >> 7) & 1; + match_byte <<= 1; + const bit = @boolToInt(try decoder.decodeBit( + reader, + &probs[((@as(usize, 1) + match_bit) << 8) + result], + update, + )); + result = (result << 1) ^ bit; + if (match_bit != bit) { + break; + } + } + } + + while (result < 0x100) { + result = (result << 1) ^ @boolToInt(try decoder.decodeBit(reader, &probs[result], update)); + } + + return @truncate(u8, result - 0x100); + } + + fn decodeDistance( + self: *DecoderState, + reader: anytype, + decoder: *RangeDecoder, + length: usize, + update: bool, + ) !usize { + const len_state = if (length > 3) 3 else length; + + const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(reader, decoder, update)); + if (pos_slot < 4) + return pos_slot; + + const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1); + var result = (2 ^ (pos_slot & 1)) << num_direct_bits; + + if (pos_slot < 14) { + result += try decoder.parseReverseBitTree( + reader, + num_direct_bits, + &self.pos_decoders, + result - pos_slot, + update, + ); + } else { + result += @as(usize, try decoder.get(reader, num_direct_bits - 4)) << 4; + result += try self.align_decoder.parseReverse(reader, decoder, update); + } + + return result; + } +}; diff --git a/lib/std/compress/lzma/decode/lzbuffer.zig b/lib/std/compress/lzma/decode/lzbuffer.zig new file mode 100644 index 0000000000..80c470c5f9 --- /dev/null +++ b/lib/std/compress/lzma/decode/lzbuffer.zig @@ -0,0 +1,228 @@ +const std = @import("../../../std.zig"); +const math = std.math; +const mem = std.mem; +const Allocator = std.mem.Allocator; +const ArrayListUnmanaged = std.ArrayListUnmanaged; + +/// An accumulating buffer for LZ sequences +pub const LzAccumBuffer = struct { + /// Buffer + buf: ArrayListUnmanaged(u8), + + /// Buffer memory limit + memlimit: usize, + + /// Total number of bytes sent through the buffer + len: usize, + + const Self = @This(); + + pub fn init(memlimit: usize) Self { + return Self{ + .buf = .{}, + .memlimit = memlimit, + .len = 0, + }; + } + + pub fn appendByte(self: *Self, allocator: Allocator, byte: u8) !void { + try self.buf.append(allocator, byte); + self.len += 1; + } + + /// Reset the internal dictionary + pub fn reset(self: *Self, writer: anytype) !void { + try writer.writeAll(self.buf.items); + self.buf.clearRetainingCapacity(); + self.len = 0; + } + + /// Retrieve the last byte or return a default + pub fn lastOr(self: Self, lit: u8) u8 { + const buf_len = self.buf.items.len; + return if (buf_len == 0) + lit + else + self.buf.items[buf_len - 1]; + } + + /// Retrieve the n-th last byte + pub fn lastN(self: Self, dist: usize) !u8 { + const buf_len = self.buf.items.len; + if (dist > buf_len) { + return error.CorruptInput; + } + + return self.buf.items[buf_len - dist]; + } + + /// Append a literal + pub fn appendLiteral( + self: *Self, + allocator: Allocator, + lit: u8, + writer: anytype, + ) !void { + _ = writer; + if (self.len >= self.memlimit) { + return error.CorruptInput; + } + try self.buf.append(allocator, lit); + self.len += 1; + } + + /// Fetch an LZ sequence (length, distance) from inside the buffer + pub fn appendLz( + self: *Self, + allocator: Allocator, + len: usize, + dist: usize, + writer: anytype, + ) !void { + _ = writer; + + const buf_len = self.buf.items.len; + if (dist > buf_len) { + return error.CorruptInput; + } + + var offset = buf_len - dist; + var i: usize = 0; + while (i < len) : (i += 1) { + const x = self.buf.items[offset]; + try self.buf.append(allocator, x); + offset += 1; + } + self.len += len; + } + + pub fn finish(self: *Self, writer: anytype) !void { + try writer.writeAll(self.buf.items); + self.buf.clearRetainingCapacity(); + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + self.buf.deinit(allocator); + self.* = undefined; + } +}; + +/// A circular buffer for LZ sequences +pub const LzCircularBuffer = struct { + /// Circular buffer + buf: ArrayListUnmanaged(u8), + + /// Length of the buffer + dict_size: usize, + + /// Buffer memory limit + memlimit: usize, + + /// Current position + cursor: usize, + + /// Total number of bytes sent through the buffer + len: usize, + + const Self = @This(); + + pub fn init(dict_size: usize, memlimit: usize) Self { + return Self{ + .buf = .{}, + .dict_size = dict_size, + .memlimit = memlimit, + .cursor = 0, + .len = 0, + }; + } + + pub fn get(self: Self, index: usize) u8 { + return if (0 <= index and index < self.buf.items.len) + self.buf.items[index] + else + 0; + } + + pub fn set(self: *Self, allocator: Allocator, index: usize, value: u8) !void { + if (index >= self.memlimit) { + return error.CorruptInput; + } + try self.buf.ensureTotalCapacity(allocator, index + 1); + while (self.buf.items.len < index) { + self.buf.appendAssumeCapacity(0); + } + self.buf.appendAssumeCapacity(value); + } + + /// Retrieve the last byte or return a default + pub fn lastOr(self: Self, lit: u8) u8 { + return if (self.len == 0) + lit + else + self.get((self.dict_size + self.cursor - 1) % self.dict_size); + } + + /// Retrieve the n-th last byte + pub fn lastN(self: Self, dist: usize) !u8 { + if (dist > self.dict_size or dist > self.len) { + return error.CorruptInput; + } + + const offset = (self.dict_size + self.cursor - dist) % self.dict_size; + return self.get(offset); + } + + /// Append a literal + pub fn appendLiteral( + self: *Self, + allocator: Allocator, + lit: u8, + writer: anytype, + ) !void { + try self.set(allocator, self.cursor, lit); + self.cursor += 1; + self.len += 1; + + // Flush the circular buffer to the output + if (self.cursor == self.dict_size) { + try writer.writeAll(self.buf.items); + self.cursor = 0; + } + } + + /// Fetch an LZ sequence (length, distance) from inside the buffer + pub fn appendLz( + self: *Self, + allocator: Allocator, + len: usize, + dist: usize, + writer: anytype, + ) !void { + if (dist > self.dict_size or dist > self.len) { + return error.CorruptInput; + } + + var offset = (self.dict_size + self.cursor - dist) % self.dict_size; + var i: usize = 0; + while (i < len) : (i += 1) { + const x = self.get(offset); + try self.appendLiteral(allocator, x, writer); + offset += 1; + if (offset == self.dict_size) { + offset = 0; + } + } + } + + pub fn finish(self: *Self, writer: anytype) !void { + if (self.cursor > 0) { + try writer.writeAll(self.buf.items[0..self.cursor]); + self.cursor = 0; + } + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + self.buf.deinit(allocator); + self.* = undefined; + } +}; diff --git a/lib/std/compress/lzma/decode/rangecoder.zig b/lib/std/compress/lzma/decode/rangecoder.zig new file mode 100644 index 0000000000..6b6ca15997 --- /dev/null +++ b/lib/std/compress/lzma/decode/rangecoder.zig @@ -0,0 +1,181 @@ +const std = @import("../../../std.zig"); +const mem = std.mem; + +pub const RangeDecoder = struct { + range: u32, + code: u32, + + pub fn init(reader: anytype) !RangeDecoder { + const reserved = try reader.readByte(); + if (reserved != 0) { + return error.CorruptInput; + } + return RangeDecoder{ + .range = 0xFFFF_FFFF, + .code = try reader.readIntBig(u32), + }; + } + + pub fn fromParts( + range: u32, + code: u32, + ) RangeDecoder { + return .{ + .range = range, + .code = code, + }; + } + + pub fn set(self: *RangeDecoder, range: u32, code: u32) void { + self.range = range; + self.code = code; + } + + pub inline fn isFinished(self: RangeDecoder) bool { + return self.code == 0; + } + + inline fn normalize(self: *RangeDecoder, reader: anytype) !void { + if (self.range < 0x0100_0000) { + self.range <<= 8; + self.code = (self.code << 8) ^ @as(u32, try reader.readByte()); + } + } + + inline fn getBit(self: *RangeDecoder, reader: anytype) !bool { + self.range >>= 1; + + const bit = self.code >= self.range; + if (bit) + self.code -= self.range; + + try self.normalize(reader); + return bit; + } + + pub fn get(self: *RangeDecoder, reader: anytype, count: usize) !u32 { + var result: u32 = 0; + var i: usize = 0; + while (i < count) : (i += 1) + result = (result << 1) ^ @boolToInt(try self.getBit(reader)); + return result; + } + + pub inline fn decodeBit(self: *RangeDecoder, reader: anytype, prob: *u16, update: bool) !bool { + const bound = (self.range >> 11) * prob.*; + + if (self.code < bound) { + if (update) + prob.* += (0x800 - prob.*) >> 5; + self.range = bound; + + try self.normalize(reader); + return false; + } else { + if (update) + prob.* -= prob.* >> 5; + self.code -= bound; + self.range -= bound; + + try self.normalize(reader); + return true; + } + } + + fn parseBitTree( + self: *RangeDecoder, + reader: anytype, + num_bits: u5, + probs: []u16, + update: bool, + ) !u32 { + var tmp: u32 = 1; + var i: @TypeOf(num_bits) = 0; + while (i < num_bits) : (i += 1) { + const bit = try self.decodeBit(reader, &probs[tmp], update); + tmp = (tmp << 1) ^ @boolToInt(bit); + } + return tmp - (@as(u32, 1) << num_bits); + } + + pub fn parseReverseBitTree( + self: *RangeDecoder, + reader: anytype, + num_bits: u5, + probs: []u16, + offset: usize, + update: bool, + ) !u32 { + var result: u32 = 0; + var tmp: usize = 1; + var i: @TypeOf(num_bits) = 0; + while (i < num_bits) : (i += 1) { + const bit = @boolToInt(try self.decodeBit(reader, &probs[offset + tmp], update)); + tmp = (tmp << 1) ^ bit; + result ^= @as(u32, bit) << i; + } + return result; + } +}; + +pub fn BitTree(comptime num_bits: usize) type { + return struct { + probs: [1 << num_bits]u16 = .{0x400} ** (1 << num_bits), + + const Self = @This(); + + pub fn parse( + self: *Self, + reader: anytype, + decoder: *RangeDecoder, + update: bool, + ) !u32 { + return decoder.parseBitTree(reader, num_bits, &self.probs, update); + } + + pub fn parseReverse( + self: *Self, + reader: anytype, + decoder: *RangeDecoder, + update: bool, + ) !u32 { + return decoder.parseReverseBitTree(reader, num_bits, &self.probs, 0, update); + } + + pub fn reset(self: *Self) void { + mem.set(u16, &self.probs, 0x400); + } + }; +} + +pub const LenDecoder = struct { + choice: u16 = 0x400, + choice2: u16 = 0x400, + low_coder: [16]BitTree(3) = .{.{}} ** 16, + mid_coder: [16]BitTree(3) = .{.{}} ** 16, + high_coder: BitTree(8) = .{}, + + pub fn decode( + self: *LenDecoder, + reader: anytype, + decoder: *RangeDecoder, + pos_state: usize, + update: bool, + ) !usize { + if (!try decoder.decodeBit(reader, &self.choice, update)) { + return @as(usize, try self.low_coder[pos_state].parse(reader, decoder, update)); + } else if (!try decoder.decodeBit(reader, &self.choice2, update)) { + return @as(usize, try self.mid_coder[pos_state].parse(reader, decoder, update)) + 8; + } else { + return @as(usize, try self.high_coder.parse(reader, decoder, update)) + 16; + } + } + + pub fn reset(self: *LenDecoder) void { + self.choice = 0x400; + self.choice2 = 0x400; + for (self.low_coder) |*t| t.reset(); + for (self.mid_coder) |*t| t.reset(); + self.high_coder.reset(); + } +}; diff --git a/lib/std/compress/lzma/test.zig b/lib/std/compress/lzma/test.zig new file mode 100644 index 0000000000..bdfe2909d8 --- /dev/null +++ b/lib/std/compress/lzma/test.zig @@ -0,0 +1,89 @@ +const std = @import("../../std.zig"); +const lzma = @import("../lzma.zig"); + +fn testDecompress(compressed: []const u8) ![]u8 { + const allocator = std.testing.allocator; + var stream = std.io.fixedBufferStream(compressed); + var decompressor = try lzma.decompress(allocator, stream.reader()); + defer decompressor.deinit(); + const reader = decompressor.reader(); + return reader.readAllAlloc(allocator, std.math.maxInt(usize)); +} + +fn testDecompressEqual(expected: []const u8, compressed: []const u8) !void { + const allocator = std.testing.allocator; + const decomp = try testDecompress(compressed); + defer allocator.free(decomp); + try std.testing.expectEqualSlices(u8, expected, decomp); +} + +fn testDecompressError(expected: anyerror, compressed: []const u8) !void { + return std.testing.expectError(expected, testDecompress(compressed)); +} + +test "LZMA: decompress empty world" { + try testDecompressEqual( + "", + &[_]u8{ + 0x5d, 0x00, 0x00, 0x80, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x83, 0xff, + 0xfb, 0xff, 0xff, 0xc0, 0x00, 0x00, 0x00, + }, + ); +} + +test "LZMA: decompress hello world" { + try testDecompressEqual( + "Hello world\n", + &[_]u8{ + 0x5d, 0x00, 0x00, 0x80, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x24, 0x19, + 0x49, 0x98, 0x6f, 0x10, 0x19, 0xc6, 0xd7, 0x31, 0xeb, 0x36, 0x50, 0xb2, 0x98, 0x48, 0xff, 0xfe, + 0xa5, 0xb0, 0x00, + }, + ); +} + +test "LZMA: decompress huge dict" { + try testDecompressEqual( + "Hello world\n", + &[_]u8{ + 0x5d, 0x7f, 0x7f, 0x7f, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x24, 0x19, + 0x49, 0x98, 0x6f, 0x10, 0x19, 0xc6, 0xd7, 0x31, 0xeb, 0x36, 0x50, 0xb2, 0x98, 0x48, 0xff, 0xfe, + 0xa5, 0xb0, 0x00, + }, + ); +} + +test "LZMA: unknown size with end of payload marker" { + try testDecompressEqual( + "Hello\nWorld!\n", + @embedFile("testdata/good-unknown_size-with_eopm.lzma"), + ); +} + +test "LZMA: known size without end of payload marker" { + try testDecompressEqual( + "Hello\nWorld!\n", + @embedFile("testdata/good-known_size-without_eopm.lzma"), + ); +} + +test "LZMA: known size with end of payload marker" { + try testDecompressEqual( + "Hello\nWorld!\n", + @embedFile("testdata/good-known_size-with_eopm.lzma"), + ); +} + +test "LZMA: too big uncompressed size in header" { + try testDecompressError( + error.CorruptInput, + @embedFile("testdata/bad-too_big_size-with_eopm.lzma"), + ); +} + +test "LZMA: too small uncompressed size in header" { + try testDecompressError( + error.CorruptInput, + @embedFile("testdata/bad-too_small_size-without_eopm-3.lzma"), + ); +} diff --git a/lib/std/compress/lzma/testdata/bad-too_big_size-with_eopm.lzma b/lib/std/compress/lzma/testdata/bad-too_big_size-with_eopm.lzma Binary files differnew file mode 100644 index 0000000000..b7cd3b05fc --- /dev/null +++ b/lib/std/compress/lzma/testdata/bad-too_big_size-with_eopm.lzma diff --git a/lib/std/compress/lzma/testdata/bad-too_small_size-without_eopm-3.lzma b/lib/std/compress/lzma/testdata/bad-too_small_size-without_eopm-3.lzma Binary files differnew file mode 100644 index 0000000000..67a1af3457 --- /dev/null +++ b/lib/std/compress/lzma/testdata/bad-too_small_size-without_eopm-3.lzma diff --git a/lib/std/compress/lzma/testdata/good-known_size-with_eopm.lzma b/lib/std/compress/lzma/testdata/good-known_size-with_eopm.lzma Binary files differnew file mode 100644 index 0000000000..1b45307930 --- /dev/null +++ b/lib/std/compress/lzma/testdata/good-known_size-with_eopm.lzma diff --git a/lib/std/compress/lzma/testdata/good-known_size-without_eopm.lzma b/lib/std/compress/lzma/testdata/good-known_size-without_eopm.lzma Binary files differnew file mode 100644 index 0000000000..83623fde97 --- /dev/null +++ b/lib/std/compress/lzma/testdata/good-known_size-without_eopm.lzma diff --git a/lib/std/compress/lzma/testdata/good-unknown_size-with_eopm.lzma b/lib/std/compress/lzma/testdata/good-unknown_size-with_eopm.lzma Binary files differnew file mode 100644 index 0000000000..0f4ff822e9 --- /dev/null +++ b/lib/std/compress/lzma/testdata/good-unknown_size-with_eopm.lzma diff --git a/lib/std/compress/lzma/vec2d.zig b/lib/std/compress/lzma/vec2d.zig new file mode 100644 index 0000000000..1372d3592c --- /dev/null +++ b/lib/std/compress/lzma/vec2d.zig @@ -0,0 +1,128 @@ +const std = @import("../../std.zig"); +const math = std.math; +const mem = std.mem; +const Allocator = std.mem.Allocator; + +pub fn Vec2D(comptime T: type) type { + return struct { + data: []T, + cols: usize, + + const Self = @This(); + + pub fn init(allocator: Allocator, value: T, size: struct { usize, usize }) !Self { + const len = try math.mul(usize, size[0], size[1]); + const data = try allocator.alloc(T, len); + mem.set(T, data, value); + return Self{ + .data = data, + .cols = size[1], + }; + } + + pub fn deinit(self: *Self, allocator: Allocator) void { + allocator.free(self.data); + self.* = undefined; + } + + pub fn fill(self: *Self, value: T) void { + mem.set(T, self.data, value); + } + + inline fn _get(self: Self, row: usize) ![]T { + const start_row = try math.mul(usize, row, self.cols); + const end_row = try math.add(usize, start_row, self.cols); + return self.data[start_row..end_row]; + } + + pub fn get(self: Self, row: usize) ![]const T { + return self._get(row); + } + + pub fn getMut(self: *Self, row: usize) ![]T { + return self._get(row); + } + }; +} + +const testing = std.testing; +const expectEqualSlices = std.testing.expectEqualSlices; +const expectError = std.testing.expectError; + +test "Vec2D.init" { + const allocator = testing.allocator; + var vec2d = try Vec2D(i32).init(allocator, 1, .{ 2, 3 }); + defer vec2d.deinit(allocator); + + try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(0)); + try expectEqualSlices(i32, &.{ 1, 1, 1 }, try vec2d.get(1)); +} + +test "Vec2D.init overflow" { + const allocator = testing.allocator; + try expectError( + error.Overflow, + Vec2D(i32).init(allocator, 1, .{ math.maxInt(usize), math.maxInt(usize) }), + ); +} + +test "Vec2D.fill" { + const allocator = testing.allocator; + var vec2d = try Vec2D(i32).init(allocator, 0, .{ 2, 3 }); + defer vec2d.deinit(allocator); + + vec2d.fill(7); + + try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(0)); + try expectEqualSlices(i32, &.{ 7, 7, 7 }, try vec2d.get(1)); +} + +test "Vec2D.get" { + var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 }; + const vec2d = Vec2D(i32){ + .data = &data, + .cols = 2, + }; + + try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0)); + try expectEqualSlices(i32, &.{ 2, 3 }, try vec2d.get(1)); + try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2)); + try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3)); +} + +test "Vec2D.getMut" { + var data = [_]i32{ 0, 1, 2, 3, 4, 5, 6, 7 }; + var vec2d = Vec2D(i32){ + .data = &data, + .cols = 2, + }; + + const row = try vec2d.getMut(1); + row[1] = 9; + + try expectEqualSlices(i32, &.{ 0, 1 }, try vec2d.get(0)); + // (1, 1) should be 9. + try expectEqualSlices(i32, &.{ 2, 9 }, try vec2d.get(1)); + try expectEqualSlices(i32, &.{ 4, 5 }, try vec2d.get(2)); + try expectEqualSlices(i32, &.{ 6, 7 }, try vec2d.get(3)); +} + +test "Vec2D.get multiplication overflow" { + const allocator = testing.allocator; + var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 4 }); + defer matrix.deinit(allocator); + + const row = (math.maxInt(usize) / 4) + 1; + try expectError(error.Overflow, matrix.get(row)); + try expectError(error.Overflow, matrix.getMut(row)); +} + +test "Vec2D.get addition overflow" { + const allocator = testing.allocator; + var matrix = try Vec2D(i32).init(allocator, 0, .{ 3, 5 }); + defer matrix.deinit(allocator); + + const row = math.maxInt(usize) / 5; + try expectError(error.Overflow, matrix.get(row)); + try expectError(error.Overflow, matrix.getMut(row)); +} diff --git a/lib/std/compress/lzma2.zig b/lib/std/compress/lzma2.zig new file mode 100644 index 0000000000..2797990f9c --- /dev/null +++ b/lib/std/compress/lzma2.zig @@ -0,0 +1,26 @@ +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; + +pub const decode = @import("lzma2/decode.zig"); + +pub fn decompress( + allocator: Allocator, + reader: anytype, + writer: anytype, +) !void { + var decoder = try decode.Decoder.init(allocator); + defer decoder.deinit(allocator); + return decoder.decompress(allocator, reader, writer); +} + +test { + const expected = "Hello\nWorld!\n"; + const compressed = &[_]u8{ 0x01, 0x00, 0x05, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 0x02, 0x00, 0x06, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x0A, 0x00 }; + + const allocator = std.testing.allocator; + var decomp = std.ArrayList(u8).init(allocator); + defer decomp.deinit(); + var stream = std.io.fixedBufferStream(compressed); + try decompress(allocator, stream.reader(), decomp.writer()); + try std.testing.expectEqualSlices(u8, expected, decomp.items); +} diff --git a/lib/std/compress/lzma2/decode.zig b/lib/std/compress/lzma2/decode.zig new file mode 100644 index 0000000000..7297a1a51b --- /dev/null +++ b/lib/std/compress/lzma2/decode.zig @@ -0,0 +1,169 @@ +const std = @import("../../std.zig"); +const Allocator = std.mem.Allocator; + +const lzma = @import("../lzma.zig"); +const DecoderState = lzma.decode.DecoderState; +const LzAccumBuffer = lzma.decode.lzbuffer.LzAccumBuffer; +const Properties = lzma.decode.Properties; +const RangeDecoder = lzma.decode.rangecoder.RangeDecoder; + +pub const Decoder = struct { + lzma_state: DecoderState, + + pub fn init(allocator: Allocator) !Decoder { + return Decoder{ + .lzma_state = try DecoderState.init( + allocator, + Properties{ + .lc = 0, + .lp = 0, + .pb = 0, + }, + null, + ), + }; + } + + pub fn deinit(self: *Decoder, allocator: Allocator) void { + self.lzma_state.deinit(allocator); + self.* = undefined; + } + + pub fn decompress( + self: *Decoder, + allocator: Allocator, + reader: anytype, + writer: anytype, + ) !void { + var accum = LzAccumBuffer.init(std.math.maxInt(usize)); + defer accum.deinit(allocator); + + while (true) { + const status = try reader.readByte(); + + switch (status) { + 0 => break, + 1 => try parseUncompressed(allocator, reader, writer, &accum, true), + 2 => try parseUncompressed(allocator, reader, writer, &accum, false), + else => try self.parseLzma(allocator, reader, writer, &accum, status), + } + } + + try accum.finish(writer); + } + + fn parseLzma( + self: *Decoder, + allocator: Allocator, + reader: anytype, + writer: anytype, + accum: *LzAccumBuffer, + status: u8, + ) !void { + if (status & 0x80 == 0) { + return error.CorruptInput; + } + + const Reset = struct { + dict: bool, + state: bool, + props: bool, + }; + + const reset = switch ((status >> 5) & 0x3) { + 0 => Reset{ + .dict = false, + .state = false, + .props = false, + }, + 1 => Reset{ + .dict = false, + .state = true, + .props = false, + }, + 2 => Reset{ + .dict = false, + .state = true, + .props = true, + }, + 3 => Reset{ + .dict = true, + .state = true, + .props = true, + }, + else => unreachable, + }; + + const unpacked_size = blk: { + var tmp: u64 = status & 0x1F; + tmp <<= 16; + tmp |= try reader.readIntBig(u16); + break :blk tmp + 1; + }; + + const packed_size = blk: { + const tmp: u17 = try reader.readIntBig(u16); + break :blk tmp + 1; + }; + + if (reset.dict) { + try accum.reset(writer); + } + + if (reset.state) { + var new_props = self.lzma_state.lzma_props; + + if (reset.props) { + var props = try reader.readByte(); + if (props >= 225) { + return error.CorruptInput; + } + + const lc = @intCast(u4, props % 9); + props /= 9; + const lp = @intCast(u3, props % 5); + props /= 5; + const pb = @intCast(u3, props); + + if (lc + lp > 4) { + return error.CorruptInput; + } + + new_props = Properties{ .lc = lc, .lp = lp, .pb = pb }; + } + + try self.lzma_state.resetState(allocator, new_props); + } + + self.lzma_state.unpacked_size = unpacked_size + accum.len; + + var counter = std.io.countingReader(reader); + const counter_reader = counter.reader(); + + var rangecoder = try RangeDecoder.init(counter_reader); + while (try self.lzma_state.process(allocator, counter_reader, writer, accum, &rangecoder) == .continue_) {} + + if (counter.bytes_read != packed_size) { + return error.CorruptInput; + } + } + + fn parseUncompressed( + allocator: Allocator, + reader: anytype, + writer: anytype, + accum: *LzAccumBuffer, + reset_dict: bool, + ) !void { + const unpacked_size = @as(u17, try reader.readIntBig(u16)) + 1; + + if (reset_dict) { + try accum.reset(writer); + } + + var i: @TypeOf(unpacked_size) = 0; + while (i < unpacked_size) : (i += 1) { + try accum.appendByte(allocator, try reader.readByte()); + } + } +}; diff --git a/lib/std/compress/xz.zig b/lib/std/compress/xz.zig index 1b2a543ad1..40735ca6b6 100644 --- a/lib/std/compress/xz.zig +++ b/lib/std/compress/xz.zig @@ -118,7 +118,7 @@ pub fn Decompress(comptime ReaderType: type) type { var hasher = std.compress.hashedReader(self.in_reader, Crc32.init()); const hashed_reader = hasher.reader(); - const backward_size = (try hashed_reader.readIntLittle(u32) + 1) * 4; + const backward_size = (@as(u64, try hashed_reader.readIntLittle(u32)) + 1) * 4; if (backward_size != index_size) return error.CorruptInput; diff --git a/lib/std/compress/xz/block.zig b/lib/std/compress/xz/block.zig index 1b909beaf4..8d3d8f0353 100644 --- a/lib/std/compress/xz/block.zig +++ b/lib/std/compress/xz/block.zig @@ -1,6 +1,7 @@ const std = @import("../../std.zig"); -const lzma = @import("lzma.zig"); +const lzma2 = std.compress.lzma2; const Allocator = std.mem.Allocator; +const ArrayListUnmanaged = std.ArrayListUnmanaged; const Crc32 = std.hash.Crc32; const Crc64 = std.hash.crc.Crc64Xz; const Sha256 = std.crypto.hash.sha2.Sha256; @@ -32,8 +33,7 @@ pub fn Decoder(comptime ReaderType: type) type { inner_reader: ReaderType, check: xz.Check, err: ?Error, - accum: lzma.LzAccumBuffer, - lzma_state: lzma.DecoderState, + to_read: ArrayListUnmanaged(u8), block_count: usize, fn init(allocator: Allocator, in_reader: ReaderType, check: xz.Check) !Self { @@ -42,15 +42,13 @@ pub fn Decoder(comptime ReaderType: type) type { .inner_reader = in_reader, .check = check, .err = null, - .accum = .{}, - .lzma_state = try lzma.DecoderState.init(allocator), + .to_read = .{}, .block_count = 0, }; } pub fn deinit(self: *Self) void { - self.accum.deinit(self.allocator); - self.lzma_state.deinit(self.allocator); + self.to_read.deinit(self.allocator); } pub fn reader(self: *Self) Reader { @@ -59,9 +57,13 @@ pub fn Decoder(comptime ReaderType: type) type { pub fn read(self: *Self, output: []u8) Error!usize { while (true) { - if (self.accum.to_read.items.len > 0) { - const n = self.accum.read(output); - if (self.accum.to_read.items.len == 0 and self.err != null) { + if (self.to_read.items.len > 0) { + const input = self.to_read.items; + const n = std.math.min(input.len, output.len); + std.mem.copy(u8, output[0..n], input[0..n]); + std.mem.copy(u8, input, input[n..]); + self.to_read.shrinkRetainingCapacity(input.len - n); + if (self.to_read.items.len == 0 and self.err != null) { if (self.err.? == DecodeError.EndOfStreamWithNoError) { return n; } @@ -77,15 +79,12 @@ pub fn Decoder(comptime ReaderType: type) type { } self.readBlock() catch |e| { self.err = e; - if (self.accum.to_read.items.len == 0) { - try self.accum.reset(self.allocator); - } }; } } fn readBlock(self: *Self) Error!void { - const unpacked_pos = self.accum.to_read.items.len; + const unpacked_pos = self.to_read.items.len; var block_counter = std.io.countingReader(self.inner_reader); const block_reader = block_counter.reader(); @@ -98,7 +97,7 @@ pub fn Decoder(comptime ReaderType: type) type { var header_hasher = std.compress.hashedReader(block_reader, Crc32.init()); const header_reader = header_hasher.reader(); - const header_size = try header_reader.readByte() * 4; + const header_size = @as(u64, try header_reader.readByte()) * 4; if (header_size == 0) return error.EndOfStreamWithNoError; @@ -156,15 +155,18 @@ pub fn Decoder(comptime ReaderType: type) type { // Compressed Data var packed_counter = std.io.countingReader(block_reader); - const packed_reader = packed_counter.reader(); - while (try self.readLzma2Chunk(packed_reader)) {} + try lzma2.decompress( + self.allocator, + packed_counter.reader(), + self.to_read.writer(self.allocator), + ); if (packed_size) |s| { if (s != packed_counter.bytes_read) return error.CorruptInput; } - const unpacked_bytes = self.accum.to_read.items[unpacked_pos..]; + const unpacked_bytes = self.to_read.items[unpacked_pos..]; if (unpacked_size) |s| { if (s != unpacked_bytes.len) return error.CorruptInput; @@ -205,113 +207,5 @@ pub fn Decoder(comptime ReaderType: type) type { self.block_count += 1; } - - fn readLzma2Chunk(self: *Self, packed_reader: anytype) Error!bool { - const status = try packed_reader.readByte(); - switch (status) { - 0 => { - try self.accum.reset(self.allocator); - return false; - }, - 1, 2 => { - if (status == 1) - try self.accum.reset(self.allocator); - - const size = try packed_reader.readIntBig(u16) + 1; - try self.accum.ensureUnusedCapacity(self.allocator, size); - - var i: usize = 0; - while (i < size) : (i += 1) - self.accum.appendAssumeCapacity(try packed_reader.readByte()); - - return true; - }, - else => { - if (status & 0x80 == 0) - return error.CorruptInput; - - const Reset = struct { - dict: bool, - state: bool, - props: bool, - }; - - const reset = switch ((status >> 5) & 0x3) { - 0 => Reset{ - .dict = false, - .state = false, - .props = false, - }, - 1 => Reset{ - .dict = false, - .state = true, - .props = false, - }, - 2 => Reset{ - .dict = false, - .state = true, - .props = true, - }, - 3 => Reset{ - .dict = true, - .state = true, - .props = true, - }, - else => unreachable, - }; - - const unpacked_size = blk: { - var tmp: u64 = status & 0x1F; - tmp <<= 16; - tmp |= try packed_reader.readIntBig(u16); - break :blk tmp + 1; - }; - - const packed_size = blk: { - const tmp: u17 = try packed_reader.readIntBig(u16); - break :blk tmp + 1; - }; - - if (reset.dict) - try self.accum.reset(self.allocator); - - if (reset.state) { - var new_props = self.lzma_state.lzma_props; - - if (reset.props) { - var props = try packed_reader.readByte(); - if (props >= 225) - return error.CorruptInput; - - const lc = @intCast(u4, props % 9); - props /= 9; - const lp = @intCast(u3, props % 5); - props /= 5; - const pb = @intCast(u3, props); - - if (lc + lp > 4) - return error.CorruptInput; - - new_props = .{ .lc = lc, .lp = lp, .pb = pb }; - } - - try self.lzma_state.reset_state(self.allocator, new_props); - } - - self.lzma_state.unpacked_size = unpacked_size + self.accum.len(); - - const buffer = try self.allocator.alloc(u8, packed_size); - defer self.allocator.free(buffer); - - for (buffer) |*b| - b.* = try packed_reader.readByte(); - - var rangecoder = try lzma.RangeDecoder.init(buffer); - try self.lzma_state.process(self.allocator, &self.accum, &rangecoder); - - return true; - }, - } - } }; } diff --git a/lib/std/compress/xz/lzma.zig b/lib/std/compress/xz/lzma.zig deleted file mode 100644 index 9fe941e2b1..0000000000 --- a/lib/std/compress/xz/lzma.zig +++ /dev/null @@ -1,658 +0,0 @@ -// Ported from https://github.com/gendx/lzma-rs - -const std = @import("../../std.zig"); -const assert = std.debug.assert; -const Allocator = std.mem.Allocator; -const ArrayListUnmanaged = std.ArrayListUnmanaged; - -const LzmaProperties = struct { - lc: u4, - lp: u3, - pb: u3, - - fn validate(self: LzmaProperties) void { - assert(self.lc <= 8); - assert(self.lp <= 4); - assert(self.pb <= 4); - } -}; - -pub const DecoderState = struct { - lzma_props: LzmaProperties, - unpacked_size: ?u64, - literal_probs: Vec2D(u16), - pos_slot_decoder: [4]BitTree, - align_decoder: BitTree, - pos_decoders: [115]u16, - is_match: [192]u16, - is_rep: [12]u16, - is_rep_g0: [12]u16, - is_rep_g1: [12]u16, - is_rep_g2: [12]u16, - is_rep_0long: [192]u16, - state: usize, - rep: [4]usize, - len_decoder: LenDecoder, - rep_len_decoder: LenDecoder, - - pub fn init(allocator: Allocator) !DecoderState { - return .{ - .lzma_props = LzmaProperties{ .lc = 0, .lp = 0, .pb = 0 }, - .unpacked_size = null, - .literal_probs = try Vec2D(u16).init(allocator, 0x400, 1, 0x300), - .pos_slot_decoder = .{ - try BitTree.init(allocator, 6), - try BitTree.init(allocator, 6), - try BitTree.init(allocator, 6), - try BitTree.init(allocator, 6), - }, - .align_decoder = try BitTree.init(allocator, 4), - .pos_decoders = .{0x400} ** 115, - .is_match = .{0x400} ** 192, - .is_rep = .{0x400} ** 12, - .is_rep_g0 = .{0x400} ** 12, - .is_rep_g1 = .{0x400} ** 12, - .is_rep_g2 = .{0x400} ** 12, - .is_rep_0long = .{0x400} ** 192, - .state = 0, - .rep = .{0} ** 4, - .len_decoder = try LenDecoder.init(allocator), - .rep_len_decoder = try LenDecoder.init(allocator), - }; - } - - pub fn deinit(self: *DecoderState, allocator: Allocator) void { - self.literal_probs.deinit(allocator); - for (self.pos_slot_decoder) |*t| t.deinit(allocator); - self.align_decoder.deinit(allocator); - self.len_decoder.deinit(allocator); - self.rep_len_decoder.deinit(allocator); - } - - pub fn reset_state(self: *DecoderState, allocator: Allocator, new_props: LzmaProperties) !void { - new_props.validate(); - if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) { - self.literal_probs.fill(0x400); - } else { - self.literal_probs.deinit(allocator); - self.literal_probs = try Vec2D(u16).init(allocator, 0x400, @as(usize, 1) << (new_props.lc + new_props.lp), 0x300); - } - - self.lzma_props = new_props; - for (self.pos_slot_decoder) |*t| t.reset(); - self.align_decoder.reset(); - self.pos_decoders = .{0x400} ** 115; - self.is_match = .{0x400} ** 192; - self.is_rep = .{0x400} ** 12; - self.is_rep_g0 = .{0x400} ** 12; - self.is_rep_g1 = .{0x400} ** 12; - self.is_rep_g2 = .{0x400} ** 12; - self.is_rep_0long = .{0x400} ** 192; - self.state = 0; - self.rep = .{0} ** 4; - self.len_decoder.reset(); - self.rep_len_decoder.reset(); - } - - fn processNextInner( - self: *DecoderState, - allocator: Allocator, - output: *LzAccumBuffer, - rangecoder: *RangeDecoder, - update: bool, - ) !ProcessingStatus { - const pos_state = output.len() & ((@as(usize, 1) << self.lzma_props.pb) - 1); - - if (!try rangecoder.decodeBit( - &self.is_match[(self.state << 4) + pos_state], - update, - )) { - const byte: u8 = try self.decodeLiteral(output, rangecoder, update); - - if (update) { - try output.appendLiteral(allocator, byte); - - self.state = if (self.state < 4) - 0 - else if (self.state < 10) - self.state - 3 - else - self.state - 6; - } - return .continue_; - } - - var len: usize = undefined; - if (try rangecoder.decodeBit(&self.is_rep[self.state], update)) { - if (!try rangecoder.decodeBit(&self.is_rep_g0[self.state], update)) { - if (!try rangecoder.decodeBit( - &self.is_rep_0long[(self.state << 4) + pos_state], - update, - )) { - if (update) { - self.state = if (self.state < 7) 9 else 11; - const dist = self.rep[0] + 1; - try output.appendLz(allocator, 1, dist); - } - return .continue_; - } - } else { - const idx: usize = if (!try rangecoder.decodeBit(&self.is_rep_g1[self.state], update)) - 1 - else if (!try rangecoder.decodeBit(&self.is_rep_g2[self.state], update)) - 2 - else - 3; - if (update) { - const dist = self.rep[idx]; - var i = idx; - while (i > 0) : (i -= 1) { - self.rep[i] = self.rep[i - 1]; - } - self.rep[0] = dist; - } - } - - len = try self.rep_len_decoder.decode(rangecoder, pos_state, update); - - if (update) { - self.state = if (self.state < 7) 8 else 11; - } - } else { - if (update) { - self.rep[3] = self.rep[2]; - self.rep[2] = self.rep[1]; - self.rep[1] = self.rep[0]; - } - - len = try self.len_decoder.decode(rangecoder, pos_state, update); - - if (update) { - self.state = if (self.state < 7) 7 else 10; - } - - const rep_0 = try self.decodeDistance(rangecoder, len, update); - - if (update) { - self.rep[0] = rep_0; - if (self.rep[0] == 0xFFFF_FFFF) { - if (rangecoder.isFinished()) { - return .finished; - } - return error.CorruptInput; - } - } - } - - if (update) { - len += 2; - - const dist = self.rep[0] + 1; - try output.appendLz(allocator, len, dist); - } - - return .continue_; - } - - fn processNext( - self: *DecoderState, - allocator: Allocator, - output: *LzAccumBuffer, - rangecoder: *RangeDecoder, - ) !ProcessingStatus { - return self.processNextInner(allocator, output, rangecoder, true); - } - - pub fn process( - self: *DecoderState, - allocator: Allocator, - output: *LzAccumBuffer, - rangecoder: *RangeDecoder, - ) !void { - while (true) { - if (self.unpacked_size) |unpacked_size| { - if (output.len() >= unpacked_size) { - break; - } - } else if (rangecoder.isFinished()) { - break; - } - - if (try self.processNext(allocator, output, rangecoder) == .finished) { - break; - } - } - - if (self.unpacked_size) |len| { - if (len != output.len()) { - return error.CorruptInput; - } - } - } - - fn decodeLiteral( - self: *DecoderState, - output: *LzAccumBuffer, - rangecoder: *RangeDecoder, - update: bool, - ) !u8 { - const def_prev_byte = 0; - const prev_byte = @as(usize, output.lastOr(def_prev_byte)); - - var result: usize = 1; - const lit_state = ((output.len() & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) + - (prev_byte >> (8 - self.lzma_props.lc)); - const probs = try self.literal_probs.get(lit_state); - - if (self.state >= 7) { - var match_byte = @as(usize, try output.lastN(self.rep[0] + 1)); - - while (result < 0x100) { - const match_bit = (match_byte >> 7) & 1; - match_byte <<= 1; - const bit = @boolToInt(try rangecoder.decodeBit( - &probs[((@as(usize, 1) + match_bit) << 8) + result], - update, - )); - result = (result << 1) ^ bit; - if (match_bit != bit) { - break; - } - } - } - - while (result < 0x100) { - result = (result << 1) ^ @boolToInt(try rangecoder.decodeBit(&probs[result], update)); - } - - return @truncate(u8, result - 0x100); - } - - fn decodeDistance( - self: *DecoderState, - rangecoder: *RangeDecoder, - length: usize, - update: bool, - ) !usize { - const len_state = if (length > 3) 3 else length; - - const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(rangecoder, update)); - if (pos_slot < 4) - return pos_slot; - - const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1); - var result = (2 ^ (pos_slot & 1)) << num_direct_bits; - - if (pos_slot < 14) { - result += try rangecoder.parseReverseBitTree( - num_direct_bits, - &self.pos_decoders, - result - pos_slot, - update, - ); - } else { - result += @as(usize, try rangecoder.get(num_direct_bits - 4)) << 4; - result += try self.align_decoder.parseReverse(rangecoder, update); - } - - return result; - } -}; - -const ProcessingStatus = enum { - continue_, - finished, -}; - -pub const LzAccumBuffer = struct { - to_read: ArrayListUnmanaged(u8) = .{}, - buf: ArrayListUnmanaged(u8) = .{}, - - pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void { - self.to_read.deinit(allocator); - self.buf.deinit(allocator); - } - - pub fn read(self: *LzAccumBuffer, output: []u8) usize { - const input = self.to_read.items; - const n = std.math.min(input.len, output.len); - std.mem.copy(u8, output[0..n], input[0..n]); - std.mem.copy(u8, input, input[n..]); - self.to_read.shrinkRetainingCapacity(input.len - n); - return n; - } - - pub fn ensureUnusedCapacity( - self: *LzAccumBuffer, - allocator: Allocator, - additional_count: usize, - ) !void { - try self.buf.ensureUnusedCapacity(allocator, additional_count); - } - - pub fn appendAssumeCapacity(self: *LzAccumBuffer, byte: u8) void { - self.buf.appendAssumeCapacity(byte); - } - - pub fn reset(self: *LzAccumBuffer, allocator: Allocator) !void { - try self.to_read.appendSlice(allocator, self.buf.items); - self.buf.clearRetainingCapacity(); - } - - pub fn len(self: *const LzAccumBuffer) usize { - return self.buf.items.len; - } - - pub fn lastOr(self: *const LzAccumBuffer, lit: u8) u8 { - const buf_len = self.buf.items.len; - return if (buf_len == 0) - lit - else - self.buf.items[buf_len - 1]; - } - - pub fn lastN(self: *const LzAccumBuffer, dist: usize) !u8 { - const buf_len = self.buf.items.len; - if (dist > buf_len) { - return error.CorruptInput; - } - - return self.buf.items[buf_len - dist]; - } - - pub fn appendLiteral(self: *LzAccumBuffer, allocator: Allocator, lit: u8) !void { - try self.buf.append(allocator, lit); - } - - pub fn appendLz(self: *LzAccumBuffer, allocator: Allocator, length: usize, dist: usize) !void { - const buf_len = self.buf.items.len; - if (dist > buf_len) { - return error.CorruptInput; - } - - var offset = buf_len - dist; - var i: usize = 0; - while (i < length) : (i += 1) { - const x = self.buf.items[offset]; - try self.buf.append(allocator, x); - offset += 1; - } - } -}; - -pub const RangeDecoder = struct { - stream: std.io.FixedBufferStream([]const u8), - range: u32, - code: u32, - - pub fn init(buffer: []const u8) !RangeDecoder { - var dec = RangeDecoder{ - .stream = std.io.fixedBufferStream(buffer), - .range = 0xFFFF_FFFF, - .code = 0, - }; - const reader = dec.stream.reader(); - _ = try reader.readByte(); - dec.code = try reader.readIntBig(u32); - return dec; - } - - pub fn fromParts( - buffer: []const u8, - range: u32, - code: u32, - ) RangeDecoder { - return .{ - .stream = std.io.fixedBufferStream(buffer), - .range = range, - .code = code, - }; - } - - pub fn set(self: *RangeDecoder, range: u32, code: u32) void { - self.range = range; - self.code = code; - } - - pub fn readInto(self: *RangeDecoder, dest: []u8) !usize { - return self.stream.read(dest); - } - - pub inline fn isFinished(self: *const RangeDecoder) bool { - return self.code == 0 and self.isEof(); - } - - pub inline fn isEof(self: *const RangeDecoder) bool { - return self.stream.pos == self.stream.buffer.len; - } - - inline fn normalize(self: *RangeDecoder) !void { - if (self.range < 0x0100_0000) { - self.range <<= 8; - self.code = (self.code << 8) ^ @as(u32, try self.stream.reader().readByte()); - } - } - - inline fn getBit(self: *RangeDecoder) !bool { - self.range >>= 1; - - const bit = self.code >= self.range; - if (bit) - self.code -= self.range; - - try self.normalize(); - return bit; - } - - fn get(self: *RangeDecoder, count: usize) !u32 { - var result: u32 = 0; - var i: usize = 0; - while (i < count) : (i += 1) - result = (result << 1) ^ @boolToInt(try self.getBit()); - return result; - } - - pub inline fn decodeBit(self: *RangeDecoder, prob: *u16, update: bool) !bool { - const bound = (self.range >> 11) * prob.*; - - if (self.code < bound) { - if (update) - prob.* += (0x800 - prob.*) >> 5; - self.range = bound; - - try self.normalize(); - return false; - } else { - if (update) - prob.* -= prob.* >> 5; - self.code -= bound; - self.range -= bound; - - try self.normalize(); - return true; - } - } - - fn parseBitTree( - self: *RangeDecoder, - num_bits: u5, - probs: []u16, - update: bool, - ) !u32 { - var tmp: u32 = 1; - var i: u5 = 0; - while (i < num_bits) : (i += 1) { - const bit = try self.decodeBit(&probs[tmp], update); - tmp = (tmp << 1) ^ @boolToInt(bit); - } - return tmp - (@as(u32, 1) << num_bits); - } - - pub fn parseReverseBitTree( - self: *RangeDecoder, - num_bits: u5, - probs: []u16, - offset: usize, - update: bool, - ) !u32 { - var result: u32 = 0; - var tmp: usize = 1; - var i: u5 = 0; - while (i < num_bits) : (i += 1) { - const bit = @boolToInt(try self.decodeBit(&probs[offset + tmp], update)); - tmp = (tmp << 1) ^ bit; - result ^= @as(u32, bit) << i; - } - return result; - } -}; - -fn Vec2D(comptime T: type) type { - return struct { - data: []T, - cols: usize, - - const Self = @This(); - - pub fn init(allocator: Allocator, data: T, rows: usize, cols: usize) !Self { - const len = try std.math.mul(usize, rows, cols); - var vec2d = Self{ - .data = try allocator.alloc(T, len), - .cols = cols, - }; - vec2d.fill(data); - return vec2d; - } - - pub fn deinit(self: *Self, allocator: Allocator) void { - allocator.free(self.data); - } - - pub fn fill(self: *Self, value: T) void { - std.mem.set(T, self.data, value); - } - - pub fn get(self: *Self, row: usize) ![]T { - const start_row = try std.math.mul(usize, row, self.cols); - return self.data[start_row .. start_row + self.cols]; - } - }; -} - -const BitTree = struct { - num_bits: u5, - probs: ArrayListUnmanaged(u16), - - pub fn init(allocator: Allocator, num_bits: u5) !BitTree { - var probs_len = @as(usize, 1) << num_bits; - var probs = try ArrayListUnmanaged(u16).initCapacity(allocator, probs_len); - while (probs_len > 0) : (probs_len -= 1) - probs.appendAssumeCapacity(0x400); - return .{ .num_bits = num_bits, .probs = probs }; - } - - pub fn deinit(self: *BitTree, allocator: Allocator) void { - self.probs.deinit(allocator); - } - - pub fn parse( - self: *BitTree, - rangecoder: *RangeDecoder, - update: bool, - ) !u32 { - return rangecoder.parseBitTree(self.num_bits, self.probs.items, update); - } - - pub fn parseReverse( - self: *BitTree, - rangecoder: *RangeDecoder, - update: bool, - ) !u32 { - return rangecoder.parseReverseBitTree(self.num_bits, self.probs.items, 0, update); - } - - pub fn reset(self: *BitTree) void { - std.mem.set(u16, self.probs.items, 0x400); - } -}; - -const LenDecoder = struct { - choice: u16, - choice2: u16, - low_coder: [16]BitTree, - mid_coder: [16]BitTree, - high_coder: BitTree, - - pub fn init(allocator: Allocator) !LenDecoder { - return .{ - .choice = 0x400, - .choice2 = 0x400, - .low_coder = .{ - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - }, - .mid_coder = .{ - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - try BitTree.init(allocator, 3), - }, - .high_coder = try BitTree.init(allocator, 8), - }; - } - - pub fn deinit(self: *LenDecoder, allocator: Allocator) void { - for (self.low_coder) |*t| t.deinit(allocator); - for (self.mid_coder) |*t| t.deinit(allocator); - self.high_coder.deinit(allocator); - } - - pub fn decode( - self: *LenDecoder, - rangecoder: *RangeDecoder, - pos_state: usize, - update: bool, - ) !usize { - if (!try rangecoder.decodeBit(&self.choice, update)) { - return @as(usize, try self.low_coder[pos_state].parse(rangecoder, update)); - } else if (!try rangecoder.decodeBit(&self.choice2, update)) { - return @as(usize, try self.mid_coder[pos_state].parse(rangecoder, update)) + 8; - } else { - return @as(usize, try self.high_coder.parse(rangecoder, update)) + 16; - } - } - - pub fn reset(self: *LenDecoder) void { - self.choice = 0x400; - self.choice2 = 0x400; - for (self.low_coder) |*t| t.reset(); - for (self.mid_coder) |*t| t.reset(); - self.high_coder.reset(); - } -}; diff --git a/lib/std/compress/xz/test.zig b/lib/std/compress/xz/test.zig index 848f518c78..08180e45c0 100644 --- a/lib/std/compress/xz/test.zig +++ b/lib/std/compress/xz/test.zig @@ -78,3 +78,23 @@ test "unsupported" { ); } } + +fn testDontPanic(data: []const u8) !void { + const buf = decompress(data) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => return, + }; + defer testing.allocator.free(buf); +} + +test "size fields: integer overflow avoidance" { + // These cases were found via fuzz testing and each previously caused + // an integer overflow when decoding. We just want to ensure they no longer + // cause a panic + const header_size_overflow = "\xfd7zXZ\x00\x00\x01i\"\xde6z"; + try testDontPanic(header_size_overflow); + const lzma2_chunk_size_overflow = "\xfd7zXZ\x00\x00\x01i\"\xde6\x02\x00!\x01\x08\x00\x00\x00\xd8\x0f#\x13\x01\xff\xff"; + try testDontPanic(lzma2_chunk_size_overflow); + const backward_size_overflow = "\xfd7zXZ\x00\x00\x01i\"\xde6\x00\x00\x00\x00\x1c\xdfD!\x90B\x99\r\x01\x00\x00\xff\xff\x10\x00\x00\x00\x01DD\xff\xff\xff\x01"; + try testDontPanic(backward_size_overflow); +} diff --git a/lib/std/crypto.zig b/lib/std/crypto.zig index 20522c175d..1602fb926c 100644 --- a/lib/std/crypto.zig +++ b/lib/std/crypto.zig @@ -41,11 +41,13 @@ pub const auth = struct { pub const Aegis128LMac = @import("crypto/aegis.zig").Aegis128LMac; pub const Aegis256Mac = @import("crypto/aegis.zig").Aegis256Mac; }; + pub const cmac = @import("crypto/cmac.zig"); }; /// Core functions, that should rarely be used directly by applications. pub const core = struct { pub const aes = @import("crypto/aes.zig"); + pub const Ascon = @import("crypto/ascon.zig").State; pub const Gimli = @import("crypto/gimli.zig").State; pub const Xoodoo = @import("crypto/xoodoo.zig").State; @@ -202,10 +204,13 @@ test { _ = aead.salsa_poly.XSalsa20Poly1305; _ = auth.hmac; + _ = auth.cmac; _ = auth.siphash; _ = core.aes; + _ = core.Ascon; _ = core.Gimli; + _ = core.Xoodoo; _ = core.modes; _ = dh.X25519; diff --git a/lib/std/crypto/ascon.zig b/lib/std/crypto/ascon.zig new file mode 100644 index 0000000000..f692bdbe71 --- /dev/null +++ b/lib/std/crypto/ascon.zig @@ -0,0 +1,227 @@ +//! Ascon is a 320-bit permutation, selected as new standard for lightweight cryptography +//! in the NIST Lightweight Cryptography competition (2019–2023). +//! https://csrc.nist.gov/News/2023/lightweight-cryptography-nist-selects-ascon +//! +//! The permutation is compact, and optimized for timing and side channel resistance, +//! making it a good choice for embedded applications. +//! +//! It is not meant to be used directly, but as a building block for symmetric cryptography. + +const std = @import("std"); +const builtin = std.builtin; +const debug = std.debug; +const mem = std.mem; +const testing = std.testing; +const rotr = std.math.rotr; + +/// An Ascon state. +/// +/// The state is represented as 5 64-bit words. +/// +/// The NIST submission (v1.2) serializes these words as big-endian, +/// but software implementations are free to use native endianness. +pub fn State(comptime endian: builtin.Endian) type { + return struct { + const Self = @This(); + + /// Number of bytes in the state. + pub const block_bytes = 40; + + const Block = [5]u64; + + st: Block, + + /// Initialize the state from a slice of bytes. + pub fn init(initial_state: [block_bytes]u8) Self { + var state = Self{ .st = undefined }; + mem.copy(u8, state.asBytes(), &initial_state); + state.endianSwap(); + return state; + } + + /// Initialize the state from u64 words in native endianness. + pub fn initFromWords(initial_state: [5]u64) Self { + var state = Self{ .st = initial_state }; + return state; + } + + /// Initialize the state for Ascon XOF + pub fn initXof() Self { + return Self{ .st = Block{ + 0xb57e273b814cd416, + 0x2b51042562ae2420, + 0x66a3a7768ddf2218, + 0x5aad0a7a8153650c, + 0x4f3e0e32539493b6, + } }; + } + + /// Initialize the state for Ascon XOFa + pub fn initXofA() Self { + return Self{ .st = Block{ + 0x44906568b77b9832, + 0xcd8d6cae53455532, + 0xf7b5212756422129, + 0x246885e1de0d225b, + 0xa8cb5ce33449973f, + } }; + } + + /// A representation of the state as bytes. The byte order is architecture-dependent. + pub fn asBytes(self: *Self) *[block_bytes]u8 { + return mem.asBytes(&self.st); + } + + /// Byte-swap the entire state if the architecture doesn't match the required endianness. + pub fn endianSwap(self: *Self) void { + for (self.st) |*w| { + w.* = mem.toNative(u64, w.*, endian); + } + } + + /// Set bytes starting at the beginning of the state. + pub fn setBytes(self: *Self, bytes: []const u8) void { + var i: usize = 0; + while (i + 8 <= bytes.len) : (i += 8) { + self.st[i / 8] = mem.readInt(u64, bytes[i..][0..8], endian); + } + if (i < bytes.len) { + var padded = [_]u8{0} ** 8; + mem.copy(u8, padded[0 .. bytes.len - i], bytes[i..]); + self.st[i / 8] = mem.readInt(u64, padded[0..], endian); + } + } + + /// XOR a byte into the state at a given offset. + pub fn addByte(self: *Self, byte: u8, offset: usize) void { + const z = switch (endian) { + .Big => 64 - 8 - 8 * @truncate(u6, offset % 8), + .Little => 8 * @truncate(u6, offset % 8), + }; + self.st[offset / 8] ^= @as(u64, byte) << z; + } + + /// XOR bytes into the beginning of the state. + pub fn addBytes(self: *Self, bytes: []const u8) void { + var i: usize = 0; + while (i + 8 <= bytes.len) : (i += 8) { + self.st[i / 8] ^= mem.readInt(u64, bytes[i..][0..8], endian); + } + if (i < bytes.len) { + var padded = [_]u8{0} ** 8; + mem.copy(u8, padded[0 .. bytes.len - i], bytes[i..]); + self.st[i / 8] ^= mem.readInt(u64, padded[0..], endian); + } + } + + /// Extract the first bytes of the state. + pub fn extractBytes(self: *Self, out: []u8) void { + var i: usize = 0; + while (i + 8 <= out.len) : (i += 8) { + mem.writeInt(u64, out[i..][0..8], self.st[i / 8], endian); + } + if (i < out.len) { + var padded = [_]u8{0} ** 8; + mem.writeInt(u64, padded[0..], self.st[i / 8], endian); + mem.copy(u8, out[i..], padded[0 .. out.len - i]); + } + } + + /// XOR the first bytes of the state into a slice of bytes. + pub fn xorBytes(self: *Self, out: []u8, in: []const u8) void { + debug.assert(out.len == in.len); + + var i: usize = 0; + while (i + 8 <= in.len) : (i += 8) { + const x = mem.readIntNative(u64, in[i..][0..8]) ^ mem.nativeTo(u64, self.st[i / 8], endian); + mem.writeIntNative(u64, out[i..][0..8], x); + } + if (i < in.len) { + var padded = [_]u8{0} ** 8; + mem.copy(u8, padded[0 .. in.len - i], in[i..]); + const x = mem.readIntNative(u64, &padded) ^ mem.nativeTo(u64, self.st[i / 8], endian); + mem.writeIntNative(u64, &padded, x); + mem.copy(u8, out[i..], padded[0 .. in.len - i]); + } + } + + /// Set the words storing the bytes of a given range to zero. + pub fn clear(self: *Self, from: usize, to: usize) void { + mem.set(u64, self.st[from / 8 .. (to + 7) / 8], 0); + } + + /// Clear the entire state, disabling compiler optimizations. + pub fn secureZero(self: *Self) void { + std.crypto.utils.secureZero(u64, &self.st); + } + + /// Apply a reduced-round permutation to the state. + pub inline fn permuteR(state: *Self, comptime rounds: u4) void { + const rks = [12]u64{ 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87, 0x78, 0x69, 0x5a, 0x4b }; + inline for (rks[rks.len - rounds ..]) |rk| { + state.round(rk); + } + } + + /// Apply a full-round permutation to the state. + pub inline fn permute(state: *Self) void { + state.permuteR(12); + } + + // Core Ascon permutation. + inline fn round(state: *Self, rk: u64) void { + const x = &state.st; + x[2] ^= rk; + + x[0] ^= x[4]; + x[4] ^= x[3]; + x[2] ^= x[1]; + var t: Block = .{ + x[0] ^ (~x[1] & x[2]), + x[1] ^ (~x[2] & x[3]), + x[2] ^ (~x[3] & x[4]), + x[3] ^ (~x[4] & x[0]), + x[4] ^ (~x[0] & x[1]), + }; + t[1] ^= t[0]; + t[3] ^= t[2]; + t[0] ^= t[4]; + + x[2] = t[2] ^ rotr(u64, t[2], 6 - 1); + x[3] = t[3] ^ rotr(u64, t[3], 17 - 10); + x[4] = t[4] ^ rotr(u64, t[4], 41 - 7); + x[0] = t[0] ^ rotr(u64, t[0], 28 - 19); + x[1] = t[1] ^ rotr(u64, t[1], 61 - 39); + x[2] = t[2] ^ rotr(u64, x[2], 1); + x[3] = t[3] ^ rotr(u64, x[3], 10); + x[4] = t[4] ^ rotr(u64, x[4], 7); + x[0] = t[0] ^ rotr(u64, x[0], 19); + x[1] = t[1] ^ rotr(u64, x[1], 39); + x[2] = ~x[2]; + } + }; +} + +test "ascon" { + const Ascon = State(.Big); + const bytes = [_]u8{0x01} ** Ascon.block_bytes; + var st = Ascon.init(bytes); + var out: [Ascon.block_bytes]u8 = undefined; + st.permute(); + st.extractBytes(&out); + const expected1 = [_]u8{ 148, 147, 49, 226, 218, 221, 208, 113, 186, 94, 96, 10, 183, 219, 119, 150, 169, 206, 65, 18, 215, 97, 78, 106, 118, 81, 211, 150, 52, 17, 117, 64, 216, 45, 148, 240, 65, 181, 90, 180 }; + try testing.expectEqualSlices(u8, &expected1, &out); + st.clear(0, 10); + st.extractBytes(&out); + const expected2 = [_]u8{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 169, 206, 65, 18, 215, 97, 78, 106, 118, 81, 211, 150, 52, 17, 117, 64, 216, 45, 148, 240, 65, 181, 90, 180 }; + try testing.expectEqualSlices(u8, &expected2, &out); + st.addByte(1, 5); + st.addByte(2, 5); + st.extractBytes(&out); + const expected3 = [_]u8{ 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 169, 206, 65, 18, 215, 97, 78, 106, 118, 81, 211, 150, 52, 17, 117, 64, 216, 45, 148, 240, 65, 181, 90, 180 }; + try testing.expectEqualSlices(u8, &expected3, &out); + st.addBytes(&bytes); + st.extractBytes(&out); + const expected4 = [_]u8{ 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 168, 207, 64, 19, 214, 96, 79, 107, 119, 80, 210, 151, 53, 16, 116, 65, 217, 44, 149, 241, 64, 180, 91, 181 }; + try testing.expectEqualSlices(u8, &expected4, &out); +} diff --git a/lib/std/crypto/benchmark.zig b/lib/std/crypto/benchmark.zig index 58affcef93..c52758b181 100644 --- a/lib/std/crypto/benchmark.zig +++ b/lib/std/crypto/benchmark.zig @@ -66,6 +66,7 @@ const macs = [_]Crypto{ Crypto{ .ty = crypto.auth.siphash.SipHash128(1, 3), .name = "siphash128-1-3" }, Crypto{ .ty = crypto.auth.aegis.Aegis128LMac, .name = "aegis-128l mac" }, Crypto{ .ty = crypto.auth.aegis.Aegis256Mac, .name = "aegis-256 mac" }, + Crypto{ .ty = crypto.auth.cmac.CmacAes128, .name = "aes-cmac" }, }; pub fn benchmarkMac(comptime Mac: anytype, comptime bytes: comptime_int) !u64 { diff --git a/lib/std/crypto/cmac.zig b/lib/std/crypto/cmac.zig new file mode 100644 index 0000000000..911eac7902 --- /dev/null +++ b/lib/std/crypto/cmac.zig @@ -0,0 +1,156 @@ +const std = @import("std"); +const crypto = std.crypto; +const mem = std.mem; + +/// CMAC with AES-128 - RFC 4493 https://www.rfc-editor.org/rfc/rfc4493 +pub const CmacAes128 = Cmac(crypto.core.aes.Aes128); + +/// NIST Special Publication 800-38B - The CMAC Mode for Authentication +/// https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-38b.pdf +pub fn Cmac(comptime BlockCipher: type) type { + const BlockCipherCtx = @typeInfo(@TypeOf(BlockCipher.initEnc)).Fn.return_type.?; + const Block = [BlockCipher.block.block_length]u8; + + return struct { + const Self = @This(); + pub const key_length = BlockCipher.key_bits / 8; + pub const block_length = BlockCipher.block.block_length; + pub const mac_length = block_length; + + cipher_ctx: BlockCipherCtx, + k1: Block, + k2: Block, + buf: Block = [_]u8{0} ** block_length, + pos: usize = 0, + + pub fn create(out: *[mac_length]u8, msg: []const u8, key: *const [key_length]u8) void { + var ctx = Self.init(key); + ctx.update(msg); + ctx.final(out); + } + + pub fn init(key: *const [key_length]u8) Self { + const cipher_ctx = BlockCipher.initEnc(key.*); + const zeros = [_]u8{0} ** block_length; + var k1: Block = undefined; + cipher_ctx.encrypt(&k1, &zeros); + k1 = double(k1); + return Self{ + .cipher_ctx = cipher_ctx, + .k1 = k1, + .k2 = double(k1), + }; + } + + pub fn update(self: *Self, msg: []const u8) void { + const left = block_length - self.pos; + var m = msg; + if (m.len > left) { + for (self.buf[self.pos..]) |*b, i| b.* ^= m[i]; + m = m[left..]; + self.cipher_ctx.encrypt(&self.buf, &self.buf); + self.pos = 0; + } + while (m.len > block_length) { + for (self.buf[0..block_length]) |*b, i| b.* ^= m[i]; + m = m[block_length..]; + self.cipher_ctx.encrypt(&self.buf, &self.buf); + self.pos = 0; + } + if (m.len > 0) { + for (self.buf[self.pos..][0..m.len]) |*b, i| b.* ^= m[i]; + self.pos += m.len; + } + } + + pub fn final(self: *Self, out: *[mac_length]u8) void { + var mac = self.k1; + if (self.pos < block_length) { + mac = self.k2; + mac[self.pos] ^= 0x80; + } + for (mac) |*b, i| b.* ^= self.buf[i]; + self.cipher_ctx.encrypt(out, &mac); + } + + fn double(l: Block) Block { + const Int = std.meta.Int(.unsigned, block_length * 8); + const l_ = mem.readIntBig(Int, &l); + const l_2 = switch (block_length) { + 8 => (l_ << 1) ^ (0x1b & -%(l_ >> 63)), // mod x^64 + x^4 + x^3 + x + 1 + 16 => (l_ << 1) ^ (0x87 & -%(l_ >> 127)), // mod x^128 + x^7 + x^2 + x + 1 + 32 => (l_ << 1) ^ (0x0425 & -%(l_ >> 255)), // mod x^256 + x^10 + x^5 + x^2 + 1 + 64 => (l_ << 1) ^ (0x0125 & -%(l_ >> 511)), // mod x^512 + x^8 + x^5 + x^2 + 1 + else => @compileError("unsupported block length"), + }; + var l2: Block = undefined; + mem.writeIntBig(Int, &l2, l_2); + return l2; + } + }; +} + +const testing = std.testing; + +test "CmacAes128 - Example 1: len = 0" { + const key = [_]u8{ + 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c, + }; + var msg: [0]u8 = undefined; + const exp = [_]u8{ + 0xbb, 0x1d, 0x69, 0x29, 0xe9, 0x59, 0x37, 0x28, 0x7f, 0xa3, 0x7d, 0x12, 0x9b, 0x75, 0x67, 0x46, + }; + var out: [CmacAes128.mac_length]u8 = undefined; + CmacAes128.create(&out, &msg, &key); + try testing.expectEqualSlices(u8, &out, &exp); +} + +test "CmacAes128 - Example 2: len = 16" { + const key = [_]u8{ + 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c, + }; + const msg = [_]u8{ + 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a, + }; + const exp = [_]u8{ + 0x07, 0x0a, 0x16, 0xb4, 0x6b, 0x4d, 0x41, 0x44, 0xf7, 0x9b, 0xdd, 0x9d, 0xd0, 0x4a, 0x28, 0x7c, + }; + var out: [CmacAes128.mac_length]u8 = undefined; + CmacAes128.create(&out, &msg, &key); + try testing.expectEqualSlices(u8, &out, &exp); +} + +test "CmacAes128 - Example 3: len = 40" { + const key = [_]u8{ + 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c, + }; + const msg = [_]u8{ + 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a, + 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51, + 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, + }; + const exp = [_]u8{ + 0xdf, 0xa6, 0x67, 0x47, 0xde, 0x9a, 0xe6, 0x30, 0x30, 0xca, 0x32, 0x61, 0x14, 0x97, 0xc8, 0x27, + }; + var out: [CmacAes128.mac_length]u8 = undefined; + CmacAes128.create(&out, &msg, &key); + try testing.expectEqualSlices(u8, &out, &exp); +} + +test "CmacAes128 - Example 4: len = 64" { + const key = [_]u8{ + 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c, + }; + const msg = [_]u8{ + 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a, + 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51, + 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef, + 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10, + }; + const exp = [_]u8{ + 0x51, 0xf0, 0xbe, 0xbf, 0x7e, 0x3b, 0x9d, 0x92, 0xfc, 0x49, 0x74, 0x17, 0x79, 0x36, 0x3c, 0xfe, + }; + var out: [CmacAes128.mac_length]u8 = undefined; + CmacAes128.create(&out, &msg, &key); + try testing.expectEqualSlices(u8, &out, &exp); +} diff --git a/lib/std/crypto/isap.zig b/lib/std/crypto/isap.zig index c7706df1df..0888cfa4dd 100644 --- a/lib/std/crypto/isap.zig +++ b/lib/std/crypto/isap.zig @@ -1,9 +1,11 @@ const std = @import("std"); +const crypto = std.crypto; const debug = std.debug; const mem = std.mem; const math = std.math; const testing = std.testing; -const AuthenticationError = std.crypto.errors.AuthenticationError; +const Ascon = crypto.core.Ascon(.Big); +const AuthenticationError = crypto.errors.AuthenticationError; /// ISAPv2 is an authenticated encryption system hardened against side channels and fault attacks. /// https://csrc.nist.gov/CSRC/media/Projects/lightweight-cryptography/documents/round-2/spec-doc-rnd2/isap-spec-round2.pdf @@ -25,90 +27,26 @@ pub const IsapA128A = struct { const iv2 = [_]u8{ 0x02, 0x80, 0x40, 0x01, 0x0c, 0x01, 0x06, 0x0c }; const iv3 = [_]u8{ 0x03, 0x80, 0x40, 0x01, 0x0c, 0x01, 0x06, 0x0c }; - const Block = [5]u64; - - block: Block, - - fn round(isap: *IsapA128A, rk: u64) void { - var x = &isap.block; - x[2] ^= rk; - x[0] ^= x[4]; - x[4] ^= x[3]; - x[2] ^= x[1]; - var t = x.*; - x[0] = t[0] ^ ((~t[1]) & t[2]); - x[2] = t[2] ^ ((~t[3]) & t[4]); - x[4] = t[4] ^ ((~t[0]) & t[1]); - x[1] = t[1] ^ ((~t[2]) & t[3]); - x[3] = t[3] ^ ((~t[4]) & t[0]); - x[1] ^= x[0]; - t[1] = x[1]; - x[1] = math.rotr(u64, x[1], 39); - x[3] ^= x[2]; - t[2] = x[2]; - x[2] = math.rotr(u64, x[2], 1); - t[4] = x[4]; - t[2] ^= x[2]; - x[2] = math.rotr(u64, x[2], 5); - t[3] = x[3]; - t[1] ^= x[1]; - x[3] = math.rotr(u64, x[3], 10); - x[0] ^= x[4]; - x[4] = math.rotr(u64, x[4], 7); - t[3] ^= x[3]; - x[2] ^= t[2]; - x[1] = math.rotr(u64, x[1], 22); - t[0] = x[0]; - x[2] = ~x[2]; - x[3] = math.rotr(u64, x[3], 7); - t[4] ^= x[4]; - x[4] = math.rotr(u64, x[4], 34); - x[3] ^= t[3]; - x[1] ^= t[1]; - x[0] = math.rotr(u64, x[0], 19); - x[4] ^= t[4]; - t[0] ^= x[0]; - x[0] = math.rotr(u64, x[0], 9); - x[0] ^= t[0]; - } - - fn p12(isap: *IsapA128A) void { - const rks = [12]u64{ 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87, 0x78, 0x69, 0x5a, 0x4b }; - inline for (rks) |rk| { - isap.round(rk); - } - } - - fn p6(isap: *IsapA128A) void { - const rks = [6]u64{ 0x96, 0x87, 0x78, 0x69, 0x5a, 0x4b }; - inline for (rks) |rk| { - isap.round(rk); - } - } - - fn p1(isap: *IsapA128A) void { - isap.round(0x4b); - } + st: Ascon, fn absorb(isap: *IsapA128A, m: []const u8) void { - var block = &isap.block; var i: usize = 0; while (true) : (i += 8) { const left = m.len - i; if (left >= 8) { - block[0] ^= mem.readIntBig(u64, m[i..][0..8]); - isap.p12(); + isap.st.addBytes(m[i..][0..8]); + isap.st.permute(); if (left == 8) { - block[0] ^= 0x8000000000000000; - isap.p12(); + isap.st.addByte(0x80, 0); + isap.st.permute(); break; } } else { var padded = [_]u8{0} ** 8; mem.copy(u8, padded[0..left], m[i..]); padded[left] = 0x80; - block[0] ^= mem.readIntBig(u64, padded[0..]); - isap.p12(); + isap.st.addBytes(&padded); + isap.st.permute(); break; } } @@ -116,65 +54,59 @@ pub const IsapA128A = struct { fn trickle(k: [16]u8, iv: [8]u8, y: []const u8, comptime out_len: usize) [out_len]u8 { var isap = IsapA128A{ - .block = Block{ + .st = Ascon.initFromWords(.{ mem.readIntBig(u64, k[0..8]), mem.readIntBig(u64, k[8..16]), mem.readIntBig(u64, iv[0..8]), 0, 0, - }, + }), }; - isap.p12(); + isap.st.permute(); var i: usize = 0; while (i < y.len * 8 - 1) : (i += 1) { const cur_byte_pos = i / 8; const cur_bit_pos = @truncate(u3, 7 - (i % 8)); - const cur_bit = @as(u64, ((y[cur_byte_pos] >> cur_bit_pos) & 1) << 7); - isap.block[0] ^= cur_bit << 56; - isap.p1(); + const cur_bit = ((y[cur_byte_pos] >> cur_bit_pos) & 1) << 7; + isap.st.addByte(cur_bit, 0); + isap.st.permuteR(1); } - const cur_bit = @as(u64, (y[y.len - 1] & 1) << 7); - isap.block[0] ^= cur_bit << 56; - isap.p12(); + const cur_bit = (y[y.len - 1] & 1) << 7; + isap.st.addByte(cur_bit, 0); + isap.st.permute(); var out: [out_len]u8 = undefined; - var j: usize = 0; - while (j < out_len) : (j += 8) { - mem.writeIntBig(u64, out[j..][0..8], isap.block[j / 8]); - } - std.crypto.utils.secureZero(u64, &isap.block); + isap.st.extractBytes(&out); + isap.st.secureZero(); return out; } fn mac(c: []const u8, ad: []const u8, npub: [16]u8, key: [16]u8) [16]u8 { var isap = IsapA128A{ - .block = Block{ + .st = Ascon.initFromWords(.{ mem.readIntBig(u64, npub[0..8]), mem.readIntBig(u64, npub[8..16]), mem.readIntBig(u64, iv1[0..]), 0, 0, - }, + }), }; - isap.p12(); + isap.st.permute(); isap.absorb(ad); - isap.block[4] ^= 1; + isap.st.addByte(1, Ascon.block_bytes - 1); isap.absorb(c); var y: [16]u8 = undefined; - mem.writeIntBig(u64, y[0..8], isap.block[0]); - mem.writeIntBig(u64, y[8..16], isap.block[1]); + isap.st.extractBytes(&y); const nb = trickle(key, iv2, y[0..], 16); - isap.block[0] = mem.readIntBig(u64, nb[0..8]); - isap.block[1] = mem.readIntBig(u64, nb[8..16]); - isap.p12(); + isap.st.setBytes(&nb); + isap.st.permute(); var tag: [16]u8 = undefined; - mem.writeIntBig(u64, tag[0..8], isap.block[0]); - mem.writeIntBig(u64, tag[8..16], isap.block[1]); - std.crypto.utils.secureZero(u64, &isap.block); + isap.st.extractBytes(&tag); + isap.st.secureZero(); return tag; } @@ -183,34 +115,31 @@ pub const IsapA128A = struct { const nb = trickle(key, iv3, npub[0..], 24); var isap = IsapA128A{ - .block = Block{ + .st = Ascon.initFromWords(.{ mem.readIntBig(u64, nb[0..8]), mem.readIntBig(u64, nb[8..16]), mem.readIntBig(u64, nb[16..24]), mem.readIntBig(u64, npub[0..8]), mem.readIntBig(u64, npub[8..16]), - }, + }), }; - isap.p6(); + isap.st.permuteR(6); var i: usize = 0; while (true) : (i += 8) { const left = in.len - i; if (left >= 8) { - mem.writeIntNative(u64, out[i..][0..8], mem.bigToNative(u64, isap.block[0]) ^ mem.readIntNative(u64, in[i..][0..8])); + isap.st.xorBytes(out[i..][0..8], in[i..][0..8]); if (left == 8) { break; } - isap.p6(); + isap.st.permuteR(6); } else { - var pad = [_]u8{0} ** 8; - mem.copy(u8, pad[0..left], in[i..][0..left]); - mem.writeIntNative(u64, pad[i..][0..8], mem.bigToNative(u64, isap.block[0]) ^ mem.readIntNative(u64, pad[i..][0..8])); - mem.copy(u8, out[i..][0..left], pad[0..left]); + isap.st.xorBytes(out[i..], in[i..]); break; } } - std.crypto.utils.secureZero(u64, &isap.block); + isap.st.secureZero(); } pub fn encrypt(c: []u8, tag: *[tag_length]u8, m: []const u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) void { @@ -220,12 +149,9 @@ pub const IsapA128A = struct { pub fn decrypt(m: []u8, c: []const u8, tag: [tag_length]u8, ad: []const u8, npub: [nonce_length]u8, key: [key_length]u8) AuthenticationError!void { var computed_tag = mac(c, ad, npub, key); - var acc: u8 = 0; - for (computed_tag) |_, j| { - acc |= (computed_tag[j] ^ tag[j]); - } - std.crypto.utils.secureZero(u8, &computed_tag); - if (acc != 0) { + const res = crypto.utils.timingSafeEql([tag_length]u8, computed_tag, tag); + crypto.utils.secureZero(u8, &computed_tag); + if (!res) { return error.AuthenticationFailed; } xor(m, c, npub, key); diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 2300ad044a..52a93a498f 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -1763,7 +1763,7 @@ pub const Dir = struct { var nt_name = w.UNICODE_STRING{ .Length = path_len_bytes, .MaximumLength = path_len_bytes, - .Buffer = @qualCast([*:0]u16, sub_path_w), + .Buffer = @constCast(sub_path_w), }; var attr = w.OBJECT_ATTRIBUTES{ .Length = @sizeOf(w.OBJECT_ATTRIBUTES), diff --git a/lib/std/json.zig b/lib/std/json.zig index 92afeead90..96e41e93c2 100644 --- a/lib/std/json.zig +++ b/lib/std/json.zig @@ -1163,11 +1163,12 @@ const ArrayList = std.ArrayList; const StringArrayHashMap = std.StringArrayHashMap; pub const ValueTree = struct { - arena: ArenaAllocator, + arena: *ArenaAllocator, root: Value, pub fn deinit(self: *ValueTree) void { self.arena.deinit(); + self.arena.child_allocator.destroy(self.arena); } }; @@ -1639,7 +1640,7 @@ fn parseInternal( const allocator = options.allocator orelse return error.AllocatorRequired; switch (ptrInfo.size) { .One => { - const r: T = try allocator.create(ptrInfo.child); + const r: *ptrInfo.child = try allocator.create(ptrInfo.child); errdefer allocator.destroy(r); r.* = try parseInternal(ptrInfo.child, token, tokens, options); return r; @@ -1678,19 +1679,16 @@ fn parseInternal( if (ptrInfo.child != u8) return error.UnexpectedToken; const source_slice = stringToken.slice(tokens.slice, tokens.i - 1); const len = stringToken.decodedLength(); - const output = try allocator.alloc(u8, len + @boolToInt(ptrInfo.sentinel != null)); + const output = if (ptrInfo.sentinel) |sentinel_ptr| + try allocator.allocSentinel(u8, len, @ptrCast(*const u8, sentinel_ptr).*) + else + try allocator.alloc(u8, len); errdefer allocator.free(output); switch (stringToken.escapes) { .None => mem.copy(u8, output, source_slice), .Some => try unescapeValidString(output, source_slice), } - if (ptrInfo.sentinel) |some| { - const char = @ptrCast(*const u8, some).*; - output[len] = char; - return output[0..len :char]; - } - return output; }, else => return error.UnexpectedToken, @@ -1744,7 +1742,37 @@ pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void { .Struct => |structInfo| { inline for (structInfo.fields) |field| { if (!field.is_comptime) { - parseFree(field.type, @field(value, field.name), options); + var should_free = true; + if (field.default_value) |default| { + switch (@typeInfo(field.type)) { + // We must not attempt to free pointers to struct default values + .Pointer => |fieldPtrInfo| { + const field_value = @field(value, field.name); + const field_ptr = switch (fieldPtrInfo.size) { + .One => field_value, + .Slice => field_value.ptr, + else => unreachable, // Other pointer types are not parseable + }; + const field_addr = @ptrToInt(field_ptr); + + const casted_default = @ptrCast(*const field.type, @alignCast(@alignOf(field.type), default)).*; + const default_ptr = switch (fieldPtrInfo.size) { + .One => casted_default, + .Slice => casted_default.ptr, + else => unreachable, // Other pointer types are not parseable + }; + const default_addr = @ptrToInt(default_ptr); + + if (field_addr == default_addr) { + should_free = false; + } + }, + else => {}, + } + } + if (should_free) { + parseFree(field.type, @field(value, field.name), options); + } } } }, @@ -1809,8 +1837,12 @@ pub const Parser = struct { pub fn parse(p: *Parser, input: []const u8) !ValueTree { var s = TokenStream.init(input); - var arena = ArenaAllocator.init(p.allocator); + var arena = try p.allocator.create(ArenaAllocator); + errdefer p.allocator.destroy(arena); + + arena.* = ArenaAllocator.init(p.allocator); errdefer arena.deinit(); + const allocator = arena.allocator(); while (try s.next()) |token| { @@ -2684,3 +2716,16 @@ test "encodesTo" { try testing.expectEqual(true, encodesTo("😂", "\\ud83d\\ude02")); try testing.expectEqual(true, encodesTo("withąunicode😂", "with\\u0105unicode\\ud83d\\ude02")); } + +test "issue 14600" { + const json = "\"\\n\""; + var token_stream = std.json.TokenStream.init(json); + const options = ParseOptions{ .allocator = std.testing.allocator }; + + // Pre-fix, this line would panic: + const result = try std.json.parse([:0]const u8, &token_stream, options); + defer std.json.parseFree([:0]const u8, result, options); + + // Double-check that we're getting the right result + try testing.expect(mem.eql(u8, result, "\n")); +} diff --git a/lib/std/json/test.zig b/lib/std/json/test.zig index 0bf0797587..3c9414a59c 100644 --- a/lib/std/json/test.zig +++ b/lib/std/json/test.zig @@ -2238,6 +2238,39 @@ test "parse into struct with no fields" { try testing.expectEqual(T{}, try parse(T, &ts, ParseOptions{})); } +const test_const_value: usize = 123; + +test "parse into struct with default const pointer field" { + const T = struct { a: *const usize = &test_const_value }; + var ts = TokenStream.init("{}"); + try testing.expectEqual(T{}, try parse(T, &ts, .{})); +} + +const test_default_usize: usize = 123; +const test_default_usize_ptr: *align(1) const usize = &test_default_usize; +const test_default_str: []const u8 = "test str"; +const test_default_str_slice: [2][]const u8 = [_][]const u8{ + "test1", + "test2", +}; + +test "freeing parsed structs with pointers to default values" { + const T = struct { + int: *const usize = &test_default_usize, + int_ptr: *allowzero align(1) const usize = test_default_usize_ptr, + str: []const u8 = test_default_str, + str_slice: []const []const u8 = &test_default_str_slice, + }; + + var ts = json.TokenStream.init("{}"); + const options = .{ .allocator = std.heap.page_allocator }; + const parsed = try json.parse(T, &ts, options); + + try testing.expectEqual(T{}, parsed); + + json.parseFree(T, parsed, options); +} + test "parse into struct where destination and source lengths mismatch" { const T = struct { a: [2]u8 }; var ts = TokenStream.init("{\"a\": \"bbb\"}"); @@ -2581,6 +2614,24 @@ test "parsing empty string gives appropriate error" { try testing.expectError(error.UnexpectedEndOfJson, testParse(arena_allocator.allocator(), "")); } +test "parse tree should not contain dangling pointers" { + var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena_allocator.deinit(); + + var p = json.Parser.init(arena_allocator.allocator(), false); + defer p.deinit(); + + var tree = try p.parse("[]"); + defer tree.deinit(); + + // Allocation should succeed + var i: usize = 0; + while (i < 100) : (i += 1) { + try tree.root.Array.append(std.json.Value{ .Integer = 100 }); + } + try testing.expectEqual(tree.root.Array.items.len, 100); +} + test "integer after float has proper type" { var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena_allocator.deinit(); diff --git a/lib/std/mem/Allocator.zig b/lib/std/mem/Allocator.zig index 6e56607865..63e5e9a1d5 100644 --- a/lib/std/mem/Allocator.zig +++ b/lib/std/mem/Allocator.zig @@ -112,7 +112,7 @@ pub fn destroy(self: Allocator, ptr: anytype) void { const info = @typeInfo(@TypeOf(ptr)).Pointer; const T = info.child; if (@sizeOf(T) == 0) return; - const non_const_ptr = @intToPtr([*]u8, @ptrToInt(ptr)); + const non_const_ptr = @ptrCast([*]u8, @constCast(ptr)); self.rawFree(non_const_ptr[0..@sizeOf(T)], math.log2(info.alignment), @returnAddress()); } @@ -297,7 +297,7 @@ pub fn free(self: Allocator, memory: anytype) void { const bytes = mem.sliceAsBytes(memory); const bytes_len = bytes.len + if (Slice.sentinel != null) @sizeOf(Slice.child) else 0; if (bytes_len == 0) return; - const non_const_ptr = @intToPtr([*]u8, @ptrToInt(bytes.ptr)); + const non_const_ptr = @constCast(bytes.ptr); // TODO: https://github.com/ziglang/zig/issues/4298 @memset(non_const_ptr, undefined, bytes_len); self.rawFree(non_const_ptr[0..bytes_len], log2a(Slice.alignment), @returnAddress()); diff --git a/lib/std/meta.zig b/lib/std/meta.zig index c7ec4b1702..7ab4c9f25c 100644 --- a/lib/std/meta.zig +++ b/lib/std/meta.zig @@ -332,7 +332,7 @@ pub fn Sentinel(comptime T: type, comptime sentinel_val: Elem(T)) type { @compileError("Unable to derive a sentinel pointer type from " ++ @typeName(T)); } -const assumeSentinel = @compileError("This function has been removed, consider using std.mem.sliceTo() or if needed a @ptrCast()"); +pub const assumeSentinel = @compileError("This function has been removed, consider using std.mem.sliceTo() or if needed a @ptrCast()"); pub fn containerLayout(comptime T: type) Type.ContainerLayout { return switch (@typeInfo(T)) { diff --git a/lib/std/os.zig b/lib/std/os.zig index 3cee30c32d..c5eeb34b1c 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -4513,7 +4513,7 @@ pub fn faccessatW(dirfd: fd_t, sub_path_w: [*:0]const u16, mode: u32, flags: u32 var nt_name = windows.UNICODE_STRING{ .Length = path_len_bytes, .MaximumLength = path_len_bytes, - .Buffer = @qualCast([*:0]u16, sub_path_w), + .Buffer = @constCast(sub_path_w), }; var attr = windows.OBJECT_ATTRIBUTES{ .Length = @sizeOf(windows.OBJECT_ATTRIBUTES), diff --git a/lib/std/os/linux/syscalls.zig b/lib/std/os/linux/syscalls.zig index 36a8bae04c..f176a434b4 100644 --- a/lib/std/os/linux/syscalls.zig +++ b/lib/std/os/linux/syscalls.zig @@ -2057,7 +2057,7 @@ pub const Mips64 = enum(usize) { writev = Linux + 19, access = Linux + 20, pipe = Linux + 21, - select = Linux + 22, + _newselect = Linux + 22, sched_yield = Linux + 23, mremap = Linux + 24, msync = Linux + 25, @@ -2071,8 +2071,8 @@ pub const Mips64 = enum(usize) { pause = Linux + 33, nanosleep = Linux + 34, getitimer = Linux + 35, - alarm = Linux + 36, - setitimer = Linux + 37, + setitimer = Linux + 36, + alarm = Linux + 37, getpid = Linux + 38, sendfile = Linux + 39, socket = Linux + 40, @@ -2286,7 +2286,7 @@ pub const Mips64 = enum(usize) { mknodat = Linux + 249, fchownat = Linux + 250, futimesat = Linux + 251, - newfstatat = Linux + 252, + fstatat64 = Linux + 252, unlinkat = Linux + 253, renameat = Linux + 254, linkat = Linux + 255, @@ -2315,8 +2315,8 @@ pub const Mips64 = enum(usize) { eventfd = Linux + 278, fallocate = Linux + 279, timerfd_create = Linux + 280, - timerfd_settime = Linux + 281, - timerfd_gettime = Linux + 282, + timerfd_gettime = Linux + 281, + timerfd_settime = Linux + 282, signalfd4 = Linux + 283, eventfd2 = Linux + 284, epoll_create1 = Linux + 285, @@ -2382,9 +2382,13 @@ pub const Mips64 = enum(usize) { process_madvise = Linux + 440, epoll_pwait2 = Linux + 441, mount_setattr = Linux + 442, + quotactl_fd = Linux + 443, landlock_create_ruleset = Linux + 444, landlock_add_rule = Linux + 445, landlock_restrict_self = Linux + 446, + process_mrelease = Linux + 448, + futex_waitv = Linux + 449, + set_mempolicy_home_node = Linux + 450, }; pub const PowerPC = enum(usize) { diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 93e762827b..711bc9f349 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -85,7 +85,7 @@ pub fn OpenFile(sub_path_w: []const u16, options: OpenFileOptions) OpenError!HAN var nt_name = UNICODE_STRING{ .Length = path_len_bytes, .MaximumLength = path_len_bytes, - .Buffer = @qualCast([*]u16, sub_path_w.ptr), + .Buffer = @constCast(sub_path_w.ptr), }; var attr = OBJECT_ATTRIBUTES{ .Length = @sizeOf(OBJECT_ATTRIBUTES), @@ -634,7 +634,7 @@ pub fn SetCurrentDirectory(path_name: []const u16) SetCurrentDirectoryError!void var nt_name = UNICODE_STRING{ .Length = path_len_bytes, .MaximumLength = path_len_bytes, - .Buffer = @qualCast([*]u16, path_name.ptr), + .Buffer = @constCast(path_name.ptr), }; const rc = ntdll.RtlSetCurrentDirectory_U(&nt_name); @@ -766,7 +766,7 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin var nt_name = UNICODE_STRING{ .Length = path_len_bytes, .MaximumLength = path_len_bytes, - .Buffer = @qualCast([*]u16, sub_path_w.ptr), + .Buffer = @constCast(sub_path_w.ptr), }; var attr = OBJECT_ATTRIBUTES{ .Length = @sizeOf(OBJECT_ATTRIBUTES), @@ -876,7 +876,7 @@ pub fn DeleteFile(sub_path_w: []const u16, options: DeleteFileOptions) DeleteFil .Length = path_len_bytes, .MaximumLength = path_len_bytes, // The Windows API makes this mutable, but it will not mutate here. - .Buffer = @qualCast([*]u16, sub_path_w.ptr), + .Buffer = @constCast(sub_path_w.ptr), }; if (sub_path_w[0] == '.' and sub_path_w[1] == 0) { @@ -1414,7 +1414,7 @@ pub fn sendmsg( } pub fn sendto(s: ws2_32.SOCKET, buf: [*]const u8, len: usize, flags: u32, to: ?*const ws2_32.sockaddr, to_len: ws2_32.socklen_t) i32 { - var buffer = ws2_32.WSABUF{ .len = @truncate(u31, len), .buf = @qualCast([*]u8, buf) }; + var buffer = ws2_32.WSABUF{ .len = @truncate(u31, len), .buf = @constCast(buf) }; var bytes_send: DWORD = undefined; if (ws2_32.WSASendTo(s, @ptrCast([*]ws2_32.WSABUF, &buffer), 1, &bytes_send, flags, to, @intCast(i32, to_len), null, null) == ws2_32.SOCKET_ERROR) { return ws2_32.SOCKET_ERROR; @@ -1876,13 +1876,13 @@ pub fn eqlIgnoreCaseWTF16(a: []const u16, b: []const u16) bool { const a_string = UNICODE_STRING{ .Length = a_bytes, .MaximumLength = a_bytes, - .Buffer = @qualCast([*]u16, a.ptr), + .Buffer = @constCast(a.ptr), }; const b_bytes = @intCast(u16, b.len * 2); const b_string = UNICODE_STRING{ .Length = b_bytes, .MaximumLength = b_bytes, - .Buffer = @qualCast([*]u16, b.ptr), + .Buffer = @constCast(b.ptr), }; return ntdll.RtlEqualUnicodeString(&a_string, &b_string, TRUE) == TRUE; } diff --git a/lib/std/rand.zig b/lib/std/rand.zig index 914419b863..f5a1ffe57e 100644 --- a/lib/std/rand.zig +++ b/lib/std/rand.zig @@ -18,8 +18,9 @@ const maxInt = std.math.maxInt; pub const DefaultPrng = Xoshiro256; /// Cryptographically secure random numbers. -pub const DefaultCsprng = Xoodoo; +pub const DefaultCsprng = Ascon; +pub const Ascon = @import("rand/Ascon.zig"); pub const Isaac64 = @import("rand/Isaac64.zig"); pub const Xoodoo = @import("rand/Xoodoo.zig"); pub const Pcg = @import("rand/Pcg.zig"); diff --git a/lib/std/rand/Ascon.zig b/lib/std/rand/Ascon.zig new file mode 100644 index 0000000000..b6e8ce4899 --- /dev/null +++ b/lib/std/rand/Ascon.zig @@ -0,0 +1,45 @@ +//! CSPRNG based on the Ascon XOFa construction + +const std = @import("std"); +const min = std.math.min; +const mem = std.mem; +const Random = std.rand.Random; +const Self = @This(); + +state: std.crypto.core.Ascon(.Little), + +const rate = 8; +pub const secret_seed_length = 32; + +/// The seed must be uniform, secret and `secret_seed_length` bytes long. +pub fn init(secret_seed: [secret_seed_length]u8) Self { + var state = std.crypto.core.Ascon(.Little).initXofA(); + var i: usize = 0; + while (i + rate <= secret_seed.len) : (i += rate) { + state.addBytes(secret_seed[i..][0..rate]); + state.permuteR(8); + } + const left = secret_seed.len - i; + if (left > 0) state.addBytes(secret_seed[i..]); + state.addByte(0x80, left); + state.permute(); + return Self{ .state = state }; +} + +pub fn random(self: *Self) Random { + return Random.init(self, fill); +} + +pub fn fill(self: *Self, buf: []u8) void { + var i: usize = 0; + while (true) { + const left = buf.len - i; + const n = min(left, rate); + self.state.extractBytes(buf[i..][0..n]); + if (left == 0) break; + self.state.permuteR(8); + i += n; + } + self.state.clear(0, rate); + self.state.permuteR(8); +} diff --git a/lib/std/zig/Ast.zig b/lib/std/zig/Ast.zig index 80dda052ab..94cdcff4e7 100644 --- a/lib/std/zig/Ast.zig +++ b/lib/std/zig/Ast.zig @@ -207,7 +207,7 @@ pub fn renderError(tree: Ast, parse_error: Error, stream: anytype) !void { return stream.writeAll("declarations are not allowed between container fields"); }, .expected_block => { - return stream.print("expected block or field, found '{s}'", .{ + return stream.print("expected block, found '{s}'", .{ token_tags[parse_error.token + @boolToInt(parse_error.token_is_prev)].symbol(), }); }, diff --git a/lib/std/zig/c_translation.zig b/lib/std/zig/c_translation.zig index d33c74d777..6e95ab53ab 100644 --- a/lib/std/zig/c_translation.zig +++ b/lib/std/zig/c_translation.zig @@ -74,8 +74,10 @@ fn castPtr(comptime DestType: type, target: anytype) DestType { const dest = ptrInfo(DestType); const source = ptrInfo(@TypeOf(target)); - if (source.is_const and !dest.is_const or source.is_volatile and !dest.is_volatile) - return @qualCast(DestType, target) + if (source.is_const and !dest.is_const) + return @constCast(target) + else if (source.is_volatile and !dest.is_volatile) + return @volatileCast(target) else if (@typeInfo(dest.child) == .Opaque) // dest.alignment would error out return @ptrCast(DestType, target) diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig index 1145007ff3..97bc85efac 100644 --- a/lib/std/zig/render.zig +++ b/lib/std/zig/render.zig @@ -2807,14 +2807,6 @@ fn nodeIsBlock(tag: Ast.Node.Tag) bool { .block_semicolon, .block_two, .block_two_semicolon, - .struct_init_dot, - .struct_init_dot_comma, - .struct_init_dot_two, - .struct_init_dot_two_comma, - .array_init_dot, - .array_init_dot_comma, - .array_init_dot_two, - .array_init_dot_two_comma, => true, else => false, }; diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index ba99fe3d2c..0e0742eab3 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -739,6 +739,7 @@ pub const Tokenizer = struct { }, 0 => { if (self.index == self.buffer.len) { + result.tag = .invalid; break; } else { self.checkLiteralCharacter(); @@ -1326,7 +1327,7 @@ test "newline in string literal" { try testTokenize( \\" \\" - , &.{ .invalid, .string_literal }); + , &.{ .invalid, .invalid }); } test "code point literal with unicode escapes" { |
