zig fetch: support SHA-256 Git repositories

Closes #21888
author: Ian Johnson <ian@ianjohnson.dev> 2024-11-20 00:47:00 -0500
committer: Ian Johnson <ian@ianjohnson.dev> 2024-12-13 08:49:45 -0500
commit: 5217da57fc95920be93dda02bc905baffe4ee89d (patch)
tree: 744bdf278eb81111d036cef39d500086a3f378ad /src/Package/Fetch/git.zig
parent: 62cc81a63afcc1c4bb1863c22d247169a2ab8136 (diff)
download: zig-5217da57fc95920be93dda02bc905baffe4ee89d.tar.gz
zig-5217da57fc95920be93dda02bc905baffe4ee89d.zip
1 files changed, 236 insertions, 85 deletions
diff --git a/src/Package/Fetch/git.zig b/src/Package/Fetch/git.zig
index 9f1dcc4cef..f6e3dc1615 100644
--- a/src/Package/Fetch/git.zig
+++ b/src/Package/Fetch/git.zig
@@ -9,32 +9,133 @@ const mem = std.mem;
 const testing = std.testing;
 const Allocator = mem.Allocator;
 const Sha1 = std.crypto.hash.Sha1;
+const Sha256 = std.crypto.hash.sha2.Sha256;
 const assert = std.debug.assert;
 
-pub const oid_length = Sha1.digest_length;
-pub const fmt_oid_length = 2 * oid_length;
-/// The ID of a Git object (an SHA-1 hash).
-pub const Oid = [oid_length]u8;
+/// The ID of a Git object.
+pub const Oid = union(Format) {
+    sha1: [Sha1.digest_length]u8,
+    sha256: [Sha256.digest_length]u8,
 
-pub fn parseOid(s: []const u8) !Oid {
-    if (s.len != fmt_oid_length) return error.InvalidOid;
-    var oid: Oid = undefined;
-    for (&oid, 0..) |*b, i| {
-        b.* = std.fmt.parseUnsigned(u8, s[2 * i ..][0..2], 16) catch return error.InvalidOid;
+    pub const max_formatted_length = len: {
+        var max: usize = 0;
+        for (std.enums.values(Format)) |f| {
+            max = @max(max, f.formattedLength());
+        }
+        break :len max;
+    };
+
+    pub const Format = enum {
+        sha1,
+        sha256,
+
+        pub fn byteLength(f: Format) usize {
+            return switch (f) {
+                .sha1 => Sha1.digest_length,
+                .sha256 => Sha256.digest_length,
+            };
+        }
+
+        pub fn formattedLength(f: Format) usize {
+            return 2 * f.byteLength();
+        }
+    };
+
+    const Hasher = union(Format) {
+        sha1: Sha1,
+        sha256: Sha256,
+
+        fn init(oid_format: Format) Hasher {
+            return switch (oid_format) {
+                .sha1 => .{ .sha1 = Sha1.init(.{}) },
+                .sha256 => .{ .sha256 = Sha256.init(.{}) },
+            };
+        }
+
+        // Must be public for use from HashedReader and HashedWriter.
+        pub fn update(hasher: *Hasher, b: []const u8) void {
+            switch (hasher.*) {
+                inline else => |*inner| inner.update(b),
+            }
+        }
+
+        fn finalResult(hasher: *Hasher) Oid {
+            return switch (hasher.*) {
+                inline else => |*inner, tag| @unionInit(Oid, @tagName(tag), inner.finalResult()),
+            };
+        }
+    };
+
+    pub fn fromBytes(oid_format: Format, bytes: []const u8) Oid {
+        assert(bytes.len == oid_format.byteLength());
+        return switch (oid_format) {
+            inline else => |tag| @unionInit(Oid, @tagName(tag), bytes[0..comptime tag.byteLength()].*),
+        };
     }
-    return oid;
-}
 
-test parseOid {
-    try testing.expectEqualSlices(
-        u8,
-        &.{ 0xCE, 0x91, 0x9C, 0xCF, 0x45, 0x95, 0x18, 0x56, 0xA7, 0x62, 0xFF, 0xDB, 0x8E, 0xF8, 0x50, 0x30, 0x1C, 0xD8, 0xC5, 0x88 },
-        &try parseOid("ce919ccf45951856a762ffdb8ef850301cd8c588"),
-    );
-    try testing.expectError(error.InvalidOid, parseOid("ce919ccf"));
-    try testing.expectError(error.InvalidOid, parseOid("master"));
-    try testing.expectError(error.InvalidOid, parseOid("HEAD"));
-}
+    pub fn readBytes(oid_format: Format, reader: anytype) @TypeOf(reader).NoEofError!Oid {
+        return switch (oid_format) {
+            inline else => |tag| @unionInit(Oid, @tagName(tag), try reader.readBytesNoEof(tag.byteLength())),
+        };
+    }
+
+    pub fn parse(oid_format: Format, s: []const u8) error{InvalidOid}!Oid {
+        switch (oid_format) {
+            inline else => |tag| {
+                if (s.len != tag.formattedLength()) return error.InvalidOid;
+                var bytes: [tag.byteLength()]u8 = undefined;
+                for (&bytes, 0..) |*b, i| {
+                    b.* = std.fmt.parseUnsigned(u8, s[2 * i ..][0..2], 16) catch return error.InvalidOid;
+                }
+                return @unionInit(Oid, @tagName(tag), bytes);
+            },
+        }
+    }
+
+    test parse {
+        try testing.expectEqualSlices(
+            u8,
+            &.{ 0xCE, 0x91, 0x9C, 0xCF, 0x45, 0x95, 0x18, 0x56, 0xA7, 0x62, 0xFF, 0xDB, 0x8E, 0xF8, 0x50, 0x30, 0x1C, 0xD8, 0xC5, 0x88 },
+            &(try parse(.sha1, "ce919ccf45951856a762ffdb8ef850301cd8c588")).sha1,
+        );
+        try testing.expectError(error.InvalidOid, parse(.sha256, "ce919ccf45951856a762ffdb8ef850301cd8c588"));
+        try testing.expectError(error.InvalidOid, parse(.sha1, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a"));
+        try testing.expectEqualSlices(
+            u8,
+            &.{ 0x7F, 0x44, 0x4A, 0x92, 0xBD, 0x45, 0x72, 0xEE, 0x4A, 0x28, 0xB2, 0xC6, 0x30, 0x59, 0x92, 0x4A, 0x9C, 0xA1, 0x82, 0x91, 0x38, 0x55, 0x3E, 0xF3, 0xE7, 0xC4, 0x1E, 0xE1, 0x59, 0xAF, 0xAE, 0x7A },
+            &(try parse(.sha256, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a")).sha256,
+        );
+        try testing.expectError(error.InvalidOid, parse(.sha1, "ce919ccf"));
+        try testing.expectError(error.InvalidOid, parse(.sha256, "ce919ccf"));
+        try testing.expectError(error.InvalidOid, parse(.sha1, "master"));
+        try testing.expectError(error.InvalidOid, parse(.sha256, "master"));
+        try testing.expectError(error.InvalidOid, parse(.sha1, "HEAD"));
+        try testing.expectError(error.InvalidOid, parse(.sha256, "HEAD"));
+    }
+
+    pub fn parseAny(s: []const u8) error{InvalidOid}!Oid {
+        return for (std.enums.values(Format)) |f| {
+            if (s.len == f.formattedLength()) break parse(f, s);
+        } else error.InvalidOid;
+    }
+
+    pub fn format(
+        oid: Oid,
+        comptime fmt: []const u8,
+        options: std.fmt.FormatOptions,
+        writer: anytype,
+    ) @TypeOf(writer).Error!void {
+        _ = fmt;
+        _ = options;
+        try writer.print("{}", .{std.fmt.fmtSliceHexLower(oid.slice())});
+    }
+
+    pub fn slice(oid: *const Oid) []const u8 {
+        return switch (oid.*) {
+            inline else => |*bytes| bytes,
+        };
+    }
+};
 
 pub const Diagnostics = struct {
     allocator: Allocator,
@@ -72,8 +173,8 @@ pub const Diagnostics = struct {
 pub const Repository = struct {
     odb: Odb,
 
-    pub fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Repository {
-        return .{ .odb = try Odb.init(allocator, pack_file, index_file) };
+    pub fn init(allocator: Allocator, format: Oid.Format, pack_file: std.fs.File, index_file: std.fs.File) !Repository {
+        return .{ .odb = try Odb.init(allocator, format, pack_file, index_file) };
     }
 
     pub fn deinit(repository: *Repository) void {
@@ -92,7 +193,7 @@ pub const Repository = struct {
         const tree_oid = tree_oid: {
             const commit_object = try repository.odb.readObject();
             if (commit_object.type != .commit) return error.NotACommit;
-            break :tree_oid try getCommitTree(commit_object.data);
+            break :tree_oid try getCommitTree(repository.odb.format, commit_object.data);
         };
         try repository.checkoutTree(worktree, tree_oid, "", diagnostics);
     }
@@ -114,7 +215,11 @@ pub const Repository = struct {
         const tree_data = try repository.odb.allocator.dupe(u8, tree_object.data);
         defer repository.odb.allocator.free(tree_data);
 
-        var tree_iter: TreeIterator = .{ .data = tree_data };
+        var tree_iter: TreeIterator = .{
+            .format = repository.odb.format,
+            .data = tree_data,
+            .pos = 0,
+        };
         while (try tree_iter.next()) |entry| {
             switch (entry.type) {
                 .directory => {
@@ -170,19 +275,20 @@ pub const Repository = struct {
 
     /// Returns the ID of the tree associated with the given commit (provided as
     /// raw object data).
-    fn getCommitTree(commit_data: []const u8) !Oid {
+    fn getCommitTree(format: Oid.Format, commit_data: []const u8) !Oid {
         if (!mem.startsWith(u8, commit_data, "tree ") or
-            commit_data.len < "tree ".len + fmt_oid_length + "\n".len or
-            commit_data["tree ".len + fmt_oid_length] != '\n')
+            commit_data.len < "tree ".len + format.formattedLength() + "\n".len or
+            commit_data["tree ".len + format.formattedLength()] != '\n')
         {
             return error.InvalidCommit;
         }
-        return try parseOid(commit_data["tree ".len..][0..fmt_oid_length]);
+        return try .parse(format, commit_data["tree ".len..][0..format.formattedLength()]);
     }
 
     const TreeIterator = struct {
+        format: Oid.Format,
         data: []const u8,
-        pos: usize = 0,
+        pos: usize,
 
         const Entry = struct {
             type: Type,
@@ -220,8 +326,9 @@ pub const Repository = struct {
             const name = iterator.data[iterator.pos..name_end :0];
             iterator.pos = name_end + 1;
 
+            const oid_length = iterator.format.byteLength();
             if (iterator.pos + oid_length > iterator.data.len) return error.InvalidTree;
-            const oid = iterator.data[iterator.pos..][0..oid_length].*;
+            const oid: Oid = .fromBytes(iterator.format, iterator.data[iterator.pos..][0..oid_length]);
             iterator.pos += oid_length;
 
             return .{ .type = @"type", .executable = executable, .name = name, .oid = oid };
@@ -235,6 +342,7 @@ pub const Repository = struct {
 /// The format of the packfile and its associated index are documented in
 /// [pack-format](https://git-scm.com/docs/pack-format).
 const Odb = struct {
+    format: Oid.Format,
     pack_file: std.fs.File,
     index_header: IndexHeader,
     index_file: std.fs.File,
@@ -242,11 +350,12 @@ const Odb = struct {
     allocator: Allocator,
 
     /// Initializes the database from open pack and index files.
-    fn init(allocator: Allocator, pack_file: std.fs.File, index_file: std.fs.File) !Odb {
+    fn init(allocator: Allocator, format: Oid.Format, pack_file: std.fs.File, index_file: std.fs.File) !Odb {
         try pack_file.seekTo(0);
         try index_file.seekTo(0);
         const index_header = try IndexHeader.read(index_file.reader());
         return .{
+            .format = format,
             .pack_file = pack_file,
             .index_header = index_header,
             .index_file = index_file,
@@ -268,7 +377,7 @@ const Odb = struct {
         const base_object = while (true) {
             if (odb.cache.get(base_offset)) |base_object| break base_object;
 
-            base_header = try EntryHeader.read(odb.pack_file.reader());
+            base_header = try EntryHeader.read(odb.format, odb.pack_file.reader());
             switch (base_header) {
                 .ofs_delta => |ofs_delta| {
                     try delta_offsets.append(odb.allocator, base_offset);
@@ -292,6 +401,7 @@ const Odb = struct {
 
         const base_data = try resolveDeltaChain(
             odb.allocator,
+            odb.format,
             odb.pack_file,
             base_object,
             delta_offsets.items,
@@ -303,14 +413,15 @@ const Odb = struct {
 
     /// Seeks to the beginning of the object with the given ID.
     fn seekOid(odb: *Odb, oid: Oid) !void {
-        const key = oid[0];
+        const oid_length = odb.format.byteLength();
+        const key = oid.slice()[0];
         var start_index = if (key > 0) odb.index_header.fan_out_table[key - 1] else 0;
         var end_index = odb.index_header.fan_out_table[key];
         const found_index = while (start_index < end_index) {
             const mid_index = start_index + (end_index - start_index) / 2;
             try odb.index_file.seekTo(IndexHeader.size + mid_index * oid_length);
-            const mid_oid = try odb.index_file.reader().readBytesNoEof(oid_length);
-            switch (mem.order(u8, &mid_oid, &oid)) {
+            const mid_oid = try Oid.readBytes(odb.format, odb.index_file.reader());
+            switch (mem.order(u8, mid_oid.slice(), oid.slice())) {
                 .lt => start_index = mid_index + 1,
                 .gt => end_index = mid_index,
                 .eq => break mid_index,
@@ -495,6 +606,7 @@ pub const Session = struct {
     location: Location,
     supports_agent: bool,
     supports_shallow: bool,
+    object_format: Oid.Format,
     allocator: Allocator,
 
     const agent = "zig/" ++ @import("builtin").zig_version_string;
@@ -513,6 +625,7 @@ pub const Session = struct {
             .location = try .init(allocator, uri),
             .supports_agent = false,
             .supports_shallow = false,
+            .object_format = .sha1,
             .allocator = allocator,
         };
         errdefer session.deinit();
@@ -528,6 +641,10 @@ pub const Session = struct {
                         session.supports_shallow = true;
                     }
                 }
+            } else if (mem.eql(u8, capability.key, "object-format")) {
+                if (std.meta.stringToEnum(Oid.Format, capability.value orelse continue)) |format| {
+                    session.object_format = format;
+                }
             }
         }
         return session;
@@ -708,6 +825,11 @@ pub const Session = struct {
         if (session.supports_agent) {
             try Packet.write(.{ .data = agent_capability }, body_writer);
         }
+        {
+            const object_format_packet = try std.fmt.allocPrint(session.allocator, "object-format={s}\n", .{@tagName(session.object_format)});
+            defer session.allocator.free(object_format_packet);
+            try Packet.write(.{ .data = object_format_packet }, body_writer);
+        }
         try Packet.write(.delimiter, body_writer);
         for (options.ref_prefixes) |ref_prefix| {
             const ref_prefix_packet = try std.fmt.allocPrint(session.allocator, "ref-prefix {s}\n", .{ref_prefix});
@@ -739,10 +861,14 @@ pub const Session = struct {
         try request.wait();
         if (request.response.status != .ok) return error.ProtocolError;
 
-        return .{ .request = request };
+        return .{
+            .format = session.object_format,
+            .request = request,
+        };
     }
 
     pub const RefIterator = struct {
+        format: Oid.Format,
         request: std.http.Client.Request,
         buf: [Packet.max_data_length]u8 = undefined,
 
@@ -764,7 +890,7 @@ pub const Session = struct {
                 .data => |data| {
                     const ref_data = Packet.normalizeText(data);
                     const oid_sep_pos = mem.indexOfScalar(u8, ref_data, ' ') orelse return error.InvalidRefPacket;
-                    const oid = parseOid(data[0..oid_sep_pos]) catch return error.InvalidRefPacket;
+                    const oid = Oid.parse(iterator.format, data[0..oid_sep_pos]) catch return error.InvalidRefPacket;
 
                     const name_sep_pos = mem.indexOfScalarPos(u8, ref_data, oid_sep_pos + 1, ' ') orelse ref_data.len;
                     const name = ref_data[oid_sep_pos + 1 .. name_sep_pos];
@@ -778,7 +904,7 @@ pub const Session = struct {
                         if (mem.startsWith(u8, attribute, "symref-target:")) {
                             symref_target = attribute["symref-target:".len..];
                         } else if (mem.startsWith(u8, attribute, "peeled:")) {
-                            peeled = parseOid(attribute["peeled:".len..]) catch return error.InvalidRefPacket;
+                            peeled = Oid.parse(iterator.format, attribute["peeled:".len..]) catch return error.InvalidRefPacket;
                         }
                         last_sep_pos = next_sep_pos;
                     }
@@ -814,6 +940,11 @@ pub const Session = struct {
         if (session.supports_agent) {
             try Packet.write(.{ .data = agent_capability }, body_writer);
         }
+        {
+            const object_format_packet = try std.fmt.allocPrint(session.allocator, "object-format={s}\n", .{@tagName(session.object_format)});
+            defer session.allocator.free(object_format_packet);
+            try Packet.write(.{ .data = object_format_packet }, body_writer);
+        }
         try Packet.write(.delimiter, body_writer);
         // Our packfile parser supports the OFS_DELTA object type
         try Packet.write(.{ .data = "ofs-delta\n" }, body_writer);
@@ -997,7 +1128,7 @@ const EntryHeader = union(Type) {
         };
     }
 
-    fn read(reader: anytype) !EntryHeader {
+    fn read(format: Oid.Format, reader: anytype) !EntryHeader {
         const InitialByte = packed struct { len: u4, type: u3, has_next: bool };
         const initial: InitialByte = @bitCast(reader.readByte() catch |e| switch (e) {
             error.EndOfStream => return error.InvalidFormat,
@@ -1016,7 +1147,7 @@ const EntryHeader = union(Type) {
                 .uncompressed_length = uncompressed_length,
             } },
             .ref_delta => .{ .ref_delta = .{
-                .base_object = reader.readBytesNoEof(oid_length) catch |e| switch (e) {
+                .base_object = Oid.readBytes(format, reader) catch |e| switch (e) {
                     error.EndOfStream => return error.InvalidFormat,
                     else => |other| return other,
                 },
@@ -1081,7 +1212,7 @@ const IndexEntry = struct {
 
 /// Writes out a version 2 index for the given packfile, as documented in
 /// [pack-format](https://git-scm.com/docs/pack-format).
-pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype) !void {
+pub fn indexPack(allocator: Allocator, format: Oid.Format, pack: std.fs.File, index_writer: anytype) !void {
     try pack.seekTo(0);
 
     var index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry) = .empty;
@@ -1089,7 +1220,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
     var pending_deltas: std.ArrayListUnmanaged(IndexEntry) = .empty;
     defer pending_deltas.deinit(allocator);
 
-    const pack_checksum = try indexPackFirstPass(allocator, pack, &index_entries, &pending_deltas);
+    const pack_checksum = try indexPackFirstPass(allocator, format, pack, &index_entries, &pending_deltas);
 
     var cache: ObjectCache = .{};
     defer cache.deinit(allocator);
@@ -1099,7 +1230,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
         while (i > 0) {
             i -= 1;
             const delta = pending_deltas.items[i];
-            if (try indexPackHashDelta(allocator, pack, delta, index_entries, &cache)) |oid| {
+            if (try indexPackHashDelta(allocator, format, pack, delta, index_entries, &cache)) |oid| {
                 try index_entries.put(allocator, oid, delta);
                 _ = pending_deltas.swapRemove(i);
             }
@@ -1117,7 +1248,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
     }
     mem.sortUnstable(Oid, oids.items, {}, struct {
         fn lessThan(_: void, o1: Oid, o2: Oid) bool {
-            return mem.lessThan(u8, &o1, &o2);
+            return mem.lessThan(u8, o1.slice(), o2.slice());
         }
     }.lessThan);
 
@@ -1125,15 +1256,16 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
     var count: u32 = 0;
     var fan_out_index: u8 = 0;
     for (oids.items) |oid| {
-        if (oid[0] > fan_out_index) {
-            @memset(fan_out_table[fan_out_index..oid[0]], count);
-            fan_out_index = oid[0];
+        const key = oid.slice()[0];
+        if (key > fan_out_index) {
+            @memset(fan_out_table[fan_out_index..key], count);
+            fan_out_index = key;
         }
         count += 1;
     }
     @memset(fan_out_table[fan_out_index..], count);
 
-    var index_hashed_writer = std.compress.hashedWriter(index_writer, Sha1.init(.{}));
+    var index_hashed_writer = std.compress.hashedWriter(index_writer, Oid.Hasher.init(format));
     const writer = index_hashed_writer.writer();
     try writer.writeAll(IndexHeader.signature);
     try writer.writeInt(u32, IndexHeader.supported_version, .big);
@@ -1142,7 +1274,7 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
     }
 
     for (oids.items) |oid| {
-        try writer.writeAll(&oid);
+        try writer.writeAll(oid.slice());
     }
 
     for (oids.items) |oid| {
@@ -1165,9 +1297,9 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
         try writer.writeInt(u64, offset, .big);
     }
 
-    try writer.writeAll(&pack_checksum);
+    try writer.writeAll(pack_checksum.slice());
     const index_checksum = index_hashed_writer.hasher.finalResult();
-    try index_writer.writeAll(&index_checksum);
+    try index_writer.writeAll(index_checksum.slice());
 }
 
 /// Performs the first pass over the packfile data for index construction.
@@ -1176,13 +1308,14 @@ pub fn indexPack(allocator: Allocator, pack: std.fs.File, index_writer: anytype)
 /// format).
 fn indexPackFirstPass(
     allocator: Allocator,
+    format: Oid.Format,
     pack: std.fs.File,
     index_entries: *std.AutoHashMapUnmanaged(Oid, IndexEntry),
     pending_deltas: *std.ArrayListUnmanaged(IndexEntry),
-) ![Sha1.digest_length]u8 {
+) !Oid {
     var pack_buffered_reader = std.io.bufferedReader(pack.reader());
     var pack_counting_reader = std.io.countingReader(pack_buffered_reader.reader());
-    var pack_hashed_reader = std.compress.hashedReader(pack_counting_reader.reader(), Sha1.init(.{}));
+    var pack_hashed_reader = std.compress.hashedReader(pack_counting_reader.reader(), Oid.Hasher.init(format));
     const pack_reader = pack_hashed_reader.reader();
 
     const pack_header = try PackHeader.read(pack_reader);
@@ -1191,12 +1324,12 @@ fn indexPackFirstPass(
     while (current_entry < pack_header.total_objects) : (current_entry += 1) {
         const entry_offset = pack_counting_reader.bytes_read;
         var entry_crc32_reader = std.compress.hashedReader(pack_reader, std.hash.Crc32.init());
-        const entry_header = try EntryHeader.read(entry_crc32_reader.reader());
+        const entry_header = try EntryHeader.read(format, entry_crc32_reader.reader());
         switch (entry_header) {
             .commit, .tree, .blob, .tag => |object| {
                 var entry_decompress_stream = std.compress.zlib.decompressor(entry_crc32_reader.reader());
                 var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
-                var entry_hashed_writer = std.compress.hashedWriter(std.io.null_writer, Sha1.init(.{}));
+                var entry_hashed_writer = std.compress.hashedWriter(std.io.null_writer, Oid.Hasher.init(format));
                 const entry_writer = entry_hashed_writer.writer();
                 // The object header is not included in the pack data but is
                 // part of the object's ID
@@ -1229,8 +1362,8 @@ fn indexPackFirstPass(
     }
 
     const pack_checksum = pack_hashed_reader.hasher.finalResult();
-    const recorded_checksum = try pack_buffered_reader.reader().readBytesNoEof(Sha1.digest_length);
-    if (!mem.eql(u8, &pack_checksum, &recorded_checksum)) {
+    const recorded_checksum = try Oid.readBytes(format, pack_buffered_reader.reader());
+    if (!mem.eql(u8, pack_checksum.slice(), recorded_checksum.slice())) {
         return error.CorruptedPack;
     }
     _ = pack_reader.readByte() catch |e| switch (e) {
@@ -1245,6 +1378,7 @@ fn indexPackFirstPass(
 /// delta and we do not yet know the offset of the base object).
 fn indexPackHashDelta(
     allocator: Allocator,
+    format: Oid.Format,
     pack: std.fs.File,
     delta: IndexEntry,
     index_entries: std.AutoHashMapUnmanaged(Oid, IndexEntry),
@@ -1259,7 +1393,7 @@ fn indexPackHashDelta(
         if (cache.get(base_offset)) |base_object| break base_object;
 
         try pack.seekTo(base_offset);
-        base_header = try EntryHeader.read(pack.reader());
+        base_header = try EntryHeader.read(format, pack.reader());
         switch (base_header) {
             .ofs_delta => |ofs_delta| {
                 try delta_offsets.append(allocator, base_offset);
@@ -1279,9 +1413,9 @@ fn indexPackHashDelta(
         }
     };
 
-    const base_data = try resolveDeltaChain(allocator, pack, base_object, delta_offsets.items, cache);
+    const base_data = try resolveDeltaChain(allocator, format, pack, base_object, delta_offsets.items, cache);
 
-    var entry_hasher = Sha1.init(.{});
+    var entry_hasher: Oid.Hasher = .init(format);
     var entry_hashed_writer = std.compress.hashedWriter(std.io.null_writer, &entry_hasher);
     try entry_hashed_writer.writer().print("{s} {}\x00", .{ @tagName(base_object.type), base_data.len });
     entry_hasher.update(base_data);
@@ -1294,6 +1428,7 @@ fn indexPackHashDelta(
 /// to obtain the final object.
 fn resolveDeltaChain(
     allocator: Allocator,
+    format: Oid.Format,
     pack: std.fs.File,
     base_object: Object,
     delta_offsets: []const u64,
@@ -1306,7 +1441,7 @@ fn resolveDeltaChain(
 
         const delta_offset = delta_offsets[i];
         try pack.seekTo(delta_offset);
-        const delta_header = try EntryHeader.read(pack.reader());
+        const delta_header = try EntryHeader.read(format, pack.reader());
         const delta_data = try readObjectRaw(allocator, pack.reader(), delta_header.uncompressedLength());
         defer allocator.free(delta_data);
         var delta_stream = std.io.fixedBufferStream(delta_data);
@@ -1394,16 +1529,22 @@ fn expandDelta(base_object: anytype, delta_reader: anytype, writer: anytype) !vo
     }
 }
 
-test "packfile indexing and checkout" {
-    // To verify the contents of this packfile without using the code in this
-    // file:
-    //
-    // 1. Create a new empty Git repository (`git init`)
-    // 2. `git unpack-objects <path/to/testdata.pack`
-    // 3. `git fsck` -> note the "dangling commit" ID (which matches the commit
-    //    checked out below)
-    // 4. `git checkout dd582c0720819ab7130b103635bd7271b9fd4feb`
-    const testrepo_pack = @embedFile("git/testdata/testrepo.pack");
+/// Runs the packfile indexing and checkout test.
+///
+/// The two testrepo repositories under testdata contain identical commit
+/// histories and contents.
+///
+/// To verify the contents of the packfiles using Git alone, run the
+/// following commands in an empty directory:
+///
+/// 1. `git init --object-format=(sha1|sha256)`
+/// 2. `git unpack-objects <path/to/testrepo.pack`
+/// 3. `git fsck` - will print one "dangling commit":
+///    - SHA-1: `dd582c0720819ab7130b103635bd7271b9fd4feb`
+///    - SHA-256: `7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a`
+/// 4. `git checkout $commit`
+fn runRepositoryTest(comptime format: Oid.Format, head_commit: []const u8) !void {
+    const testrepo_pack = @embedFile("git/testdata/testrepo-" ++ @tagName(format) ++ ".pack");
 
     var git_dir = testing.tmpDir(.{});
     defer git_dir.cleanup();
@@ -1413,27 +1554,27 @@ test "packfile indexing and checkout" {
 
     var index_file = try git_dir.dir.createFile("testrepo.idx", .{ .read = true });
     defer index_file.close();
-    try indexPack(testing.allocator, pack_file, index_file.writer());
+    try indexPack(testing.allocator, format, pack_file, index_file.writer());
 
     // Arbitrary size limit on files read while checking the repository contents
-    // (all files in the test repo are known to be much smaller than this)
-    const max_file_size = 4096;
+    // (all files in the test repo are known to be smaller than this)
+    const max_file_size = 8192;
 
     const index_file_data = try git_dir.dir.readFileAlloc(testing.allocator, "testrepo.idx", max_file_size);
     defer testing.allocator.free(index_file_data);
     // testrepo.idx is generated by Git. The index created by this file should
     // match it exactly. Running `git verify-pack -v testrepo.pack` can verify
     // this.
-    const testrepo_idx = @embedFile("git/testdata/testrepo.idx");
+    const testrepo_idx = @embedFile("git/testdata/testrepo-" ++ @tagName(format) ++ ".idx");
     try testing.expectEqualSlices(u8, testrepo_idx, index_file_data);
 
-    var repository = try Repository.init(testing.allocator, pack_file, index_file);
+    var repository = try Repository.init(testing.allocator, format, pack_file, index_file);
     defer repository.deinit();
 
     var worktree = testing.tmpDir(.{ .iterate = true });
     defer worktree.cleanup();
 
-    const commit_id = try parseOid("dd582c0720819ab7130b103635bd7271b9fd4feb");
+    const commit_id = try Oid.parse(format, head_commit);
 
     var diagnostics: Diagnostics = .{ .allocator = testing.allocator };
     defer diagnostics.deinit();
@@ -1497,6 +1638,14 @@ test "packfile indexing and checkout" {
     try testing.expectEqualStrings(expected_file_contents, actual_file_contents);
 }
 
+test "SHA-1 packfile indexing and checkout" {
+    try runRepositoryTest(.sha1, "dd582c0720819ab7130b103635bd7271b9fd4feb");
+}
+
+test "SHA-256 packfile indexing and checkout" {
+    try runRepositoryTest(.sha256, "7f444a92bd4572ee4a28b2c63059924a9ca1829138553ef3e7c41ee159afae7a");
+}
+
 /// Checks out a commit of a packfile. Intended for experimenting with and
 /// benchmarking possible optimizations to the indexing and checkout behavior.
 pub fn main() !void {
@@ -1504,14 +1653,16 @@ pub fn main() !void {
 
     const args = try std.process.argsAlloc(allocator);
     defer std.process.argsFree(allocator, args);
-    if (args.len != 4) {
-        return error.InvalidArguments; // Arguments: packfile commit worktree
+    if (args.len != 5) {
+        return error.InvalidArguments; // Arguments: format packfile commit worktree
     }
 
-    var pack_file = try std.fs.cwd().openFile(args[1], .{});
+    const format = std.meta.stringToEnum(Oid.Format, args[1]) orelse return error.InvalidFormat;
+
+    var pack_file = try std.fs.cwd().openFile(args[2], .{});
     defer pack_file.close();
-    const commit = try parseOid(args[2]);
-    var worktree = try std.fs.cwd().makeOpenPath(args[3], .{});
+    const commit = try Oid.parse(format, args[3]);
+    var worktree = try std.fs.cwd().makeOpenPath(args[4], .{});
     defer worktree.close();
 
     var git_dir = try worktree.makeOpenPath(".git", .{});
@@ -1521,12 +1672,12 @@ pub fn main() !void {
     var index_file = try git_dir.createFile("idx", .{ .read = true });
     defer index_file.close();
     var index_buffered_writer = std.io.bufferedWriter(index_file.writer());
-    try indexPack(allocator, pack_file, index_buffered_writer.writer());
+    try indexPack(allocator, format, pack_file, index_buffered_writer.writer());
     try index_buffered_writer.flush();
     try index_file.sync();
 
     std.debug.print("Starting checkout...\n", .{});
-    var repository = try Repository.init(allocator, pack_file, index_file);
+    var repository = try Repository.init(allocator, format, pack_file, index_file);
     defer repository.deinit();
     var diagnostics: Diagnostics = .{ .allocator = allocator };
     defer diagnostics.deinit();
author	Ian Johnson <ian@ianjohnson.dev>	2024-11-20 00:47:00 -0500
committer	Ian Johnson <ian@ianjohnson.dev>	2024-12-13 08:49:45 -0500
commit	5217da57fc95920be93dda02bc905baffe4ee89d (patch)
tree	744bdf278eb81111d036cef39d500086a3f378ad /src/Package/Fetch/git.zig
parent	62cc81a63afcc1c4bb1863c22d247169a2ab8136 (diff)
download	zig-5217da57fc95920be93dda02bc905baffe4ee89d.tar.gz zig-5217da57fc95920be93dda02bc905baffe4ee89d.zip