src/link/Wasm/Archive.zig


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186

/// A list of long file names, delimited by a LF character (0x0a).
/// This is stored as a single slice of bytes, as the header-names
/// point to the character index of a file name, rather than the index
/// in the list.
/// Points into `file_contents`.
long_file_names: RelativeSlice,

/// Parsed table of contents.
/// Each symbol name points to a list of all definition
/// sites within the current static archive.
toc: Toc,

/// Key points into `LazyArchive` `file_contents`.
/// Value is allocated with gpa.
const Toc = std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32));

const ARMAG = std.elf.ARMAG;
const ARFMAG = std.elf.ARFMAG;

const RelativeSlice = struct {
    off: u32,
    len: u32,
};

const Header = extern struct {
    /// Member file name, sometimes / terminated.
    name: [16]u8,
    /// File date, decimal seconds since Epoch.
    date: [12]u8,
    /// User ID, in ASCII format.
    uid: [6]u8,
    /// Group ID, in ASCII format.
    gid: [6]u8,
    /// File mode, in ASCII octal.
    mode: [8]u8,
    /// File size, in ASCII decimal.
    size: [10]u8,
    /// Always contains ARFMAG.
    fmag: [2]u8,

    const NameOrIndex = union(enum) {
        name: []const u8,
        index: u32,
    };

    fn nameOrIndex(archive: Header) !NameOrIndex {
        const value = getValue(&archive.name);
        const slash_index = mem.indexOfScalar(u8, value, '/') orelse return error.MalformedArchive;
        const len = value.len;
        if (slash_index == len - 1) {
            // Name stored directly
            return .{ .name = value };
        } else {
            // Name follows the header directly and its length is encoded in
            // the name field.
            const index = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10);
            return .{ .index = index };
        }
    }

    fn parsedSize(archive: Header) !u32 {
        const value = getValue(&archive.size);
        return std.fmt.parseInt(u32, value, 10);
    }

    fn getValue(raw: []const u8) []const u8 {
        return mem.trimEnd(u8, raw, &[_]u8{@as(u8, 0x20)});
    }
};

pub fn deinit(archive: *Archive, gpa: Allocator) void {
    deinitToc(gpa, &archive.toc);
    archive.* = undefined;
}

fn deinitToc(gpa: Allocator, toc: *Toc) void {
    for (toc.values()) |*value| value.deinit(gpa);
    toc.deinit(gpa);
}

pub fn parse(gpa: Allocator, file_contents: []const u8) !Archive {
    var pos: usize = 0;

    if (!mem.eql(u8, file_contents[0..ARMAG.len], ARMAG)) return error.BadArchiveMagic;
    pos += ARMAG.len;

    const header = mem.bytesAsValue(Header, file_contents[pos..][0..@sizeOf(Header)]);
    if (!mem.eql(u8, &header.fmag, ARFMAG)) return error.BadHeaderDelimiter;
    pos += @sizeOf(Header);

    // The size field can have extra spaces padded in front as well as
    // the end, so we trim those first before parsing the ASCII value.
    const size_trimmed = mem.trim(u8, &header.size, " ");
    const sym_tab_size = try std.fmt.parseInt(u32, size_trimmed, 10);

    const num_symbols = mem.readInt(u32, file_contents[pos..][0..4], .big);
    pos += 4;

    const symbol_positions_size = @sizeOf(u32) * num_symbols;
    const symbol_positions_be = mem.bytesAsSlice(u32, file_contents[pos..][0..symbol_positions_size]);
    pos += symbol_positions_size;

    const sym_tab = file_contents[pos..][0 .. sym_tab_size - 4 - symbol_positions_size];
    pos += sym_tab.len;

    var toc: Toc = .empty;
    errdefer deinitToc(gpa, &toc);

    var sym_tab_pos: usize = 0;
    for (0..num_symbols) |i| {
        const name = mem.sliceTo(sym_tab[sym_tab_pos..], 0);
        sym_tab_pos += name.len + 1;
        if (name.len == 0) continue;

        const gop = try toc.getOrPut(gpa, name);
        if (!gop.found_existing) gop.value_ptr.* = .empty;
        try gop.value_ptr.append(gpa, switch (native_endian) {
            .big => symbol_positions_be[i],
            .little => @byteSwap(symbol_positions_be[i]),
        });
    }

    const long_file_names: RelativeSlice = s: {
        const sub_header = mem.bytesAsValue(Header, file_contents[pos..][0..@sizeOf(Header)]);
        pos += @sizeOf(Header);

        if (!mem.eql(u8, &header.fmag, ARFMAG)) return error.BadHeaderDelimiter;
        if (!mem.eql(u8, sub_header.name[0..2], "//")) return error.MissingTableName;
        const table_size = try sub_header.parsedSize();

        break :s .{
            .off = @intCast(pos),
            .len = table_size,
        };
    };

    return .{
        .toc = toc,
        .long_file_names = long_file_names,
    };
}

/// From a given file offset, starts reading for a file header.
/// When found, parses the object file into an `Object` and returns it.
pub fn parseObject(
    archive: Archive,
    wasm: *Wasm,
    file_contents: []const u8,
    object_offset: u32,
    path: Path,
    host_name: Wasm.OptionalString,
    scratch_space: *Object.ScratchSpace,
    must_link: bool,
    gc_sections: bool,
) !Object {
    const header = mem.bytesAsValue(Header, file_contents[object_offset..][0..@sizeOf(Header)]);
    if (!mem.eql(u8, &header.fmag, ARFMAG)) return error.BadHeaderDelimiter;

    const name_or_index = try header.nameOrIndex();
    const object_name = switch (name_or_index) {
        .name => |name| name,
        .index => |index| n: {
            const long_file_names = file_contents[archive.long_file_names.off..][0..archive.long_file_names.len];
            const name = mem.sliceTo(long_file_names[index..], 0x0a);
            break :n mem.trimEnd(u8, name, "/");
        },
    };

    const object_file_size = try header.parsedSize();
    const contents = file_contents[object_offset + @sizeOf(Header) ..][0..object_file_size];

    return Object.parse(wasm, contents, path, object_name, host_name, scratch_space, must_link, gc_sections);
}

const Archive = @This();

const builtin = @import("builtin");
const native_endian = builtin.cpu.arch.endian();

const std = @import("std");
const mem = std.mem;
const Allocator = std.mem.Allocator;
const Path = std.Build.Cache.Path;

const Wasm = @import("../Wasm.zig");
const Object = @import("Object.zig");