diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2025-02-24 20:24:52 -0800 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2025-02-26 11:42:03 -0800 |
| commit | d6a88ed74db270c14c669ab334f3ab715cfd2b76 (patch) | |
| tree | 6a82af3d767e00c4682f45ba49e8c8ef48be6a32 /src | |
| parent | 9763dd2901069f80dbdaae7c6b8004fbe1cf1b26 (diff) | |
| download | zig-d6a88ed74db270c14c669ab334f3ab715cfd2b76.tar.gz zig-d6a88ed74db270c14c669ab334f3ab715cfd2b76.zip | |
introduce package id and redo hash format again
Introduces the `id` field to `build.zig.zon`.
Together with name, this represents a globally unique package
identifier. This field should be initialized with a 16-bit random number
when the package is first created, and then *never change*. This allows
Zig to unambiguously detect when one package is an updated version of
another.
When forking a Zig project, this id should be regenerated with a new
random number if the upstream project is still maintained. Otherwise,
the fork is *hostile*, attempting to take control over the original
project's identity.
`0x0000` is invalid because it obviously means a random number wasn't
used.
`0xffff` is reserved to represent "naked" packages.
Tracking issue #14288
Additionally:
* Fix bad path in error messages regarding build.zig.zon file.
* Manifest validates that `name` and `version` field of build.zig.zon
are maximum 32 bytes.
* Introduce error for root package to not switch to enum literal for
name.
* Introduce error for root package to omit `id`.
* Update init template to generate `id`
* Update init template to populate `minimum_zig_version`.
* New package hash format changes:
- name and version limited to 32 bytes via error rather than truncation
- truncate sha256 to 192 bits rather than 40 bits
- include the package id
This means that, given only the package hashes for a complete dependency
tree, it is possible to perform version selection and know the final
size on disk, without doing any fetching whatsoever. This prevents
wasted bandwidth since package versions not selected do not need to be
fetched.
Diffstat (limited to 'src')
| -rw-r--r-- | src/Package.zig | 58 | ||||
| -rw-r--r-- | src/Package/Fetch.zig | 10 | ||||
| -rw-r--r-- | src/Package/Manifest.zig | 49 | ||||
| -rw-r--r-- | src/main.zig | 38 |
4 files changed, 109 insertions, 46 deletions
diff --git a/src/Package.zig b/src/Package.zig index b585644d9e..6d370e9855 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -10,9 +10,17 @@ pub const multihash_len = 1 + 1 + Hash.Algo.digest_length; pub const multihash_hex_digest_len = 2 * multihash_len; pub const MultiHashHexDigest = [multihash_hex_digest_len]u8; +pub fn randomId() u16 { + return std.crypto.random.intRangeLessThan(u16, 0x0001, 0xffff); +} + /// A user-readable, file system safe hash that identifies an exact package /// snapshot, including file contents. /// +/// The hash is not only to prevent collisions but must resist attacks where +/// the adversary fully controls the contents being hashed. Thus, it contains +/// a full SHA-256 digest. +/// /// This data structure can be used to store the legacy hash format too. Legacy /// hash format is scheduled to be removed after 0.14.0 is tagged. /// @@ -26,7 +34,8 @@ pub const Hash = struct { pub const Algo = std.crypto.hash.sha2.Sha256; pub const Digest = [Algo.digest_length]u8; - pub const max_len = 32 + 1 + 32 + 1 + 12; + /// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" + pub const max_len = 32 + 1 + 32 + 1 + (16 + 32 + 192) / 6; pub fn fromSlice(s: []const u8) Hash { assert(s.len <= max_len); @@ -62,48 +71,35 @@ pub const Hash = struct { try std.testing.expect(h.isOld()); } - /// Produces "$name-$semver-$sizedhash". + /// Produces "$name-$semver-$hashplus". /// * name is the name field from build.zig.zon, truncated at 32 bytes and must /// be a valid zig identifier /// * semver is the version field from build.zig.zon, truncated at 32 bytes - /// * sizedhash is the following 9-byte array, base64 encoded using -_ to make + /// * hashplus is the following 39-byte array, base64 encoded using -_ to make /// it filesystem safe: - /// - (4 bytes) LE u32 total decompressed size in bytes - /// - (5 bytes) truncated SHA-256 of hashed files of the package + /// - (2 bytes) LE u16 Package ID + /// - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated + /// - (24 bytes) truncated SHA-256 digest of hashed files of the package /// - /// example: "nasm-2.16.1-2-BWdcABvF_jM1" - pub fn init(digest: Digest, name: []const u8, ver: []const u8, size: u32) Hash { + /// example: "nasm-2.16.1-3-AAD_ZlwACpGU-c3QXp_yNyn07Q5U9Rq-Cb1ur2G1" + pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u16, size: u32) Hash { + assert(name.len <= 32); + assert(ver.len <= 32); var result: Hash = undefined; var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes); - buf.appendSliceAssumeCapacity(name[0..@min(name.len, 32)]); + buf.appendSliceAssumeCapacity(name); buf.appendAssumeCapacity('-'); - buf.appendSliceAssumeCapacity(ver[0..@min(ver.len, 32)]); + buf.appendSliceAssumeCapacity(ver); buf.appendAssumeCapacity('-'); - var sizedhash: [9]u8 = undefined; - std.mem.writeInt(u32, sizedhash[0..4], size, .little); - sizedhash[4..].* = digest[0..5].*; - _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(12), &sizedhash); + var hashplus: [30]u8 = undefined; + std.mem.writeInt(u16, hashplus[0..2], id, .little); + std.mem.writeInt(u32, hashplus[2..6], size, .little); + hashplus[6..].* = digest[0..24].*; + _ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(40), &hashplus); @memset(buf.unusedCapacitySlice(), 0); return result; } - /// Produces "$hashiname-N-$sizedhash". For packages that lack "build.zig.zon" metadata. - /// * hashiname is [5..][0..24] bytes of the SHA-256, urlsafe-base64-encoded, for a total of 32 bytes encoded - /// * the semver section is replaced with a hardcoded N which stands for - /// "naked". It acts as a version number so that any future updates to the - /// hash format can tell this hash format apart. Note that "N" is an - /// invalid semver. - /// * sizedhash is the same as in `init`. - /// - /// The hash is broken up this way so that "sizedhash" can be calculated - /// exactly the same way in both cases, and so that "name" and "hashiname" can - /// be used interchangeably in both cases. - pub fn initNaked(digest: Digest, size: u32) Hash { - var name: [32]u8 = undefined; - _ = std.base64.url_safe_no_pad.Encoder.encode(&name, digest[5..][0..24]); - return init(digest, &name, "N", size); - } - /// Produces a unique hash based on the path provided. The result should /// not be user-visible. pub fn initPath(sub_path: []const u8, is_global: bool) Hash { @@ -144,7 +140,7 @@ pub const MultihashFunction = enum(u16) { pub const multihash_function: MultihashFunction = switch (Hash.Algo) { std.crypto.hash.sha2.Sha256 => .@"sha2-256", - else => @compileError("unreachable"), + else => unreachable, }; pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest { diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index 326b8917a5..bb9fbd9664 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -586,9 +586,11 @@ pub fn computedPackageHash(f: *const Fetch) Package.Hash { if (f.manifest) |man| { var version_buffer: [32]u8 = undefined; const version: []const u8 = std.fmt.bufPrint(&version_buffer, "{}", .{man.version}) catch &version_buffer; - return .init(f.computed_hash.digest, man.name, version, saturated_size); + return .init(f.computed_hash.digest, man.name, version, man.id, saturated_size); } - return .initNaked(f.computed_hash.digest, saturated_size); + // In the future build.zig.zon fields will be added to allow overriding these values + // for naked tarballs. + return .init(f.computed_hash.digest, "N", "V", 0xffff, saturated_size); } /// `computeHash` gets a free check for the existence of `build.zig`, but when @@ -645,11 +647,13 @@ fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { f.manifest = try Manifest.parse(arena, ast.*, .{ .allow_missing_paths_field = f.allow_missing_paths_field, + .allow_missing_id = f.allow_missing_paths_field, + .allow_name_string = f.allow_missing_paths_field, }); const manifest = &f.manifest.?; if (manifest.errors.len > 0) { - const src_path = try eb.printString("{}{s}", .{ pkg_root, Manifest.basename }); + const src_path = try eb.printString("{}" ++ fs.path.sep_str ++ "{s}", .{ pkg_root, Manifest.basename }); try manifest.copyErrorsIntoBundle(ast.*, src_path, eb); return error.FetchFailed; } diff --git a/src/Package/Manifest.zig b/src/Package/Manifest.zig index 82c850d705..083b56264d 100644 --- a/src/Package/Manifest.zig +++ b/src/Package/Manifest.zig @@ -36,6 +36,7 @@ pub const ErrorMessage = struct { }; name: []const u8, +id: u16, version: std.SemanticVersion, version_node: Ast.Node.Index, dependencies: std.StringArrayHashMapUnmanaged(Dependency), @@ -50,6 +51,8 @@ pub const ParseOptions = struct { allow_missing_paths_field: bool = false, /// Deprecated, to be removed after 0.14.0 is tagged. allow_name_string: bool = true, + /// Deprecated, to be removed after 0.14.0 is tagged. + allow_missing_id: bool = true, }; pub const Error = Allocator.Error; @@ -70,6 +73,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { .errors = .{}, .name = undefined, + .id = 0, .version = undefined, .version_node = 0, .dependencies = .{}, @@ -77,6 +81,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { .paths = .{}, .allow_missing_paths_field = options.allow_missing_paths_field, .allow_name_string = options.allow_name_string, + .allow_missing_id = options.allow_missing_id, .minimum_zig_version = null, .buf = .{}, }; @@ -92,6 +97,7 @@ pub fn parse(gpa: Allocator, ast: Ast, options: ParseOptions) Error!Manifest { return .{ .name = p.name, + .id = p.id, .version = p.version, .version_node = p.version_node, .dependencies = try p.dependencies.clone(p.arena), @@ -143,6 +149,7 @@ const Parse = struct { errors: std.ArrayListUnmanaged(ErrorMessage), name: []const u8, + id: u16, version: std.SemanticVersion, version_node: Ast.Node.Index, dependencies: std.StringArrayHashMapUnmanaged(Dependency), @@ -150,6 +157,7 @@ const Parse = struct { paths: std.StringArrayHashMapUnmanaged(void), allow_missing_paths_field: bool, allow_name_string: bool, + allow_missing_id: bool, minimum_zig_version: ?std.SemanticVersion, const InnerError = error{ ParseFailure, OutOfMemory }; @@ -167,6 +175,7 @@ const Parse = struct { var have_name = false; var have_version = false; var have_included_paths = false; + var have_id = false; for (struct_init.ast.fields) |field_init| { const name_token = ast.firstToken(field_init) - 2; @@ -183,6 +192,9 @@ const Parse = struct { } else if (mem.eql(u8, field_name, "name")) { p.name = try parseName(p, field_init); have_name = true; + } else if (mem.eql(u8, field_name, "id")) { + p.id = try parseId(p, field_init); + have_id = true; } else if (mem.eql(u8, field_name, "version")) { p.version_node = field_init; const version_text = try parseString(p, field_init); @@ -206,6 +218,12 @@ const Parse = struct { } } + if (!have_id and !p.allow_missing_id) { + try appendError(p, main_token, "missing top-level 'id' field; suggested value: 0x{x}", .{ + Package.randomId(), + }); + } + if (!have_name) { try appendError(p, main_token, "missing top-level 'name' field", .{}); } @@ -359,6 +377,33 @@ const Parse = struct { } } + fn parseId(p: *Parse, node: Ast.Node.Index) !u16 { + const ast = p.ast; + const node_tags = ast.nodes.items(.tag); + const main_tokens = ast.nodes.items(.main_token); + const main_token = main_tokens[node]; + if (node_tags[node] != .number_literal) { + return fail(p, main_token, "expected integer literal", .{}); + } + const token_bytes = ast.tokenSlice(main_token); + const parsed = std.zig.parseNumberLiteral(token_bytes); + const n = switch (parsed) { + .int => |n| n, + .big_int, .float => return fail(p, main_token, "expected u16 integer literal, found {s}", .{ + @tagName(parsed), + }), + .failure => |err| return fail(p, main_token, "bad integer literal: {s}", .{@tagName(err)}), + }; + const casted = std.math.cast(u16, n) orelse + return fail(p, main_token, "integer value {d} does not fit into u16", .{n}); + switch (casted) { + 0x0000, 0xffff => return fail(p, main_token, "id value 0x{x} reserved; use 0x{x} instead", .{ + casted, Package.randomId(), + }), + else => return casted, + } + } + fn parseName(p: *Parse, node: Ast.Node.Index) ![]const u8 { const ast = p.ast; const node_tags = ast.nodes.items(.tag); @@ -371,7 +416,7 @@ const Parse = struct { return fail(p, main_token, "name must be a valid bare zig identifier (hint: switch from string to enum literal)", .{}); if (name.len > max_name_len) - return fail(p, main_token, "name '{s}' exceeds max length of {d}", .{ + return fail(p, main_token, "name '{}' exceeds max length of {d}", .{ std.zig.fmtId(name), max_name_len, }); @@ -386,7 +431,7 @@ const Parse = struct { return fail(p, main_token, "name must be a valid bare zig identifier", .{}); if (ident_name.len > max_name_len) - return fail(p, main_token, "name '{s}' exceeds max length of {d}", .{ + return fail(p, main_token, "name '{}' exceeds max length of {d}", .{ std.zig.fmtId(ident_name), max_name_len, }); diff --git a/src/main.zig b/src/main.zig index d6b20f94f9..b1680dbf8e 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4751,8 +4751,10 @@ fn cmdInit(gpa: Allocator, arena: Allocator, args: []const []const u8) !void { }; var ok_count: usize = 0; + const id = Package.randomId(); + for (template_paths) |template_path| { - if (templates.write(arena, fs.cwd(), cwd_basename, template_path)) |_| { + if (templates.write(arena, fs.cwd(), cwd_basename, template_path, id)) |_| { std.log.info("created {s}", .{template_path}); ok_count += 1; } else |err| switch (err) { @@ -7430,10 +7432,10 @@ fn loadManifest( 0, ) catch |err| switch (err) { error.FileNotFound => { + const id = Package.randomId(); var templates = findTemplates(gpa, arena); defer templates.deinit(); - - templates.write(arena, options.dir, options.root_name, Package.Manifest.basename) catch |e| { + templates.write(arena, options.dir, options.root_name, Package.Manifest.basename, id) catch |e| { fatal("unable to write {s}: {s}", .{ Package.Manifest.basename, @errorName(e), }); @@ -7491,6 +7493,7 @@ const Templates = struct { out_dir: fs.Dir, root_name: []const u8, template_path: []const u8, + id: u16, ) !void { if (fs.path.dirname(template_path)) |dirname| { out_dir.makePath(dirname) catch |err| { @@ -7504,13 +7507,28 @@ const Templates = struct { }; templates.buffer.clearRetainingCapacity(); try templates.buffer.ensureUnusedCapacity(contents.len); - for (contents) |c| { - if (c == '$') { - try templates.buffer.appendSlice(root_name); - } else { - try templates.buffer.append(c); - } - } + var state: enum { start, dollar } = .start; + for (contents) |c| switch (state) { + .start => switch (c) { + '$' => state = .dollar, + else => try templates.buffer.append(c), + }, + .dollar => switch (c) { + 'n' => { + try templates.buffer.appendSlice(root_name); + state = .start; + }, + 'i' => { + try templates.buffer.writer().print("0x{x}", .{id}); + state = .start; + }, + 'v' => { + try templates.buffer.appendSlice(build_options.version); + state = .start; + }, + else => fatal("unknown substitution: ${c}", .{c}), + }, + }; return out_dir.writeFile(.{ .sub_path = template_path, |
