aboutsummaryrefslogtreecommitdiff
path: root/src/link/Wasm/Archive.zig
blob: 028ef95726eec61321feb4c03f4d4478b9ff0a32 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
file: fs.File,
name: []const u8,

header: ar_hdr = undefined,

/// A list of long file names, delimited by a LF character (0x0a).
/// This is stored as a single slice of bytes, as the header-names
/// point to the character index of a file name, rather than the index
/// in the list.
long_file_names: []const u8 = undefined,

/// Parsed table of contents.
/// Each symbol name points to a list of all definition
/// sites within the current static archive.
toc: std.StringArrayHashMapUnmanaged(std.ArrayListUnmanaged(u32)) = .{},

// Archive files start with the ARMAG identifying string.  Then follows a
// `struct ar_hdr', and as many bytes of member file data as its `ar_size'
// member indicates, for each member file.
/// String that begins an archive file.
const ARMAG: *const [SARMAG:0]u8 = "!<arch>\n";
/// Size of that string.
const SARMAG: u4 = 8;

/// String in ar_fmag at the end of each header.
const ARFMAG: *const [2:0]u8 = "`\n";

const ar_hdr = extern struct {
    /// Member file name, sometimes / terminated.
    ar_name: [16]u8,

    /// File date, decimal seconds since Epoch.
    ar_date: [12]u8,

    /// User ID, in ASCII format.
    ar_uid: [6]u8,

    /// Group ID, in ASCII format.
    ar_gid: [6]u8,

    /// File mode, in ASCII octal.
    ar_mode: [8]u8,

    /// File size, in ASCII decimal.
    ar_size: [10]u8,

    /// Always contains ARFMAG.
    ar_fmag: [2]u8,

    const NameOrIndex = union(enum) {
        name: []const u8,
        index: u32,
    };

    fn nameOrIndex(archive: ar_hdr) !NameOrIndex {
        const value = getValue(&archive.ar_name);
        const slash_index = mem.indexOfScalar(u8, value, '/') orelse return error.MalformedArchive;
        const len = value.len;
        if (slash_index == len - 1) {
            // Name stored directly
            return NameOrIndex{ .name = value };
        } else {
            // Name follows the header directly and its length is encoded in
            // the name field.
            const index = try std.fmt.parseInt(u32, value[slash_index + 1 ..], 10);
            return NameOrIndex{ .index = index };
        }
    }

    fn date(archive: ar_hdr) !u64 {
        const value = getValue(&archive.ar_date);
        return std.fmt.parseInt(u64, value, 10);
    }

    fn size(archive: ar_hdr) !u32 {
        const value = getValue(&archive.ar_size);
        return std.fmt.parseInt(u32, value, 10);
    }

    fn getValue(raw: []const u8) []const u8 {
        return mem.trimRight(u8, raw, &[_]u8{@as(u8, 0x20)});
    }
};

pub fn deinit(archive: *Archive, allocator: Allocator) void {
    archive.file.close();
    for (archive.toc.keys()) |*key| {
        allocator.free(key.*);
    }
    for (archive.toc.values()) |*value| {
        value.deinit(allocator);
    }
    archive.toc.deinit(allocator);
    allocator.free(archive.long_file_names);
}

pub fn parse(archive: *Archive, allocator: Allocator) !void {
    const reader = archive.file.reader();

    const magic = try reader.readBytesNoEof(SARMAG);
    if (!mem.eql(u8, &magic, ARMAG)) {
        log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic });
        return error.NotArchive;
    }

    archive.header = try reader.readStruct(ar_hdr);
    if (!mem.eql(u8, &archive.header.ar_fmag, ARFMAG)) {
        log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, archive.header.ar_fmag });
        return error.NotArchive;
    }

    try archive.parseTableOfContents(allocator, reader);
    try archive.parseNameTable(allocator, reader);
}

fn parseName(archive: *const Archive, header: ar_hdr) ![]const u8 {
    const name_or_index = try header.nameOrIndex();
    switch (name_or_index) {
        .name => |name| return name,
        .index => |index| {
            const name = mem.sliceTo(archive.long_file_names[index..], 0x0a);
            return mem.trimRight(u8, name, "/");
        },
    }
}

fn parseTableOfContents(archive: *Archive, allocator: Allocator, reader: anytype) !void {
    // size field can have extra spaces padded in front as well as the end,
    // so we trim those first before parsing the ASCII value.
    const size_trimmed = mem.trim(u8, &archive.header.ar_size, " ");
    const sym_tab_size = try std.fmt.parseInt(u32, size_trimmed, 10);

    const num_symbols = try reader.readInt(u32, .big);
    const symbol_positions = try allocator.alloc(u32, num_symbols);
    defer allocator.free(symbol_positions);
    for (symbol_positions) |*index| {
        index.* = try reader.readInt(u32, .big);
    }

    const sym_tab = try allocator.alloc(u8, sym_tab_size - 4 - (4 * num_symbols));
    defer allocator.free(sym_tab);

    reader.readNoEof(sym_tab) catch return error.IncompleteSymbolTable;

    var i: usize = 0;
    var pos: usize = 0;
    while (i < num_symbols) : (i += 1) {
        const string = mem.sliceTo(sym_tab[pos..], 0);
        pos += string.len + 1;
        if (string.len == 0) continue;

        const name = try allocator.dupe(u8, string);
        errdefer allocator.free(name);
        const gop = try archive.toc.getOrPut(allocator, name);
        if (gop.found_existing) {
            allocator.free(name);
        } else {
            gop.value_ptr.* = .{};
        }
        try gop.value_ptr.append(allocator, symbol_positions[i]);
    }
}

fn parseNameTable(archive: *Archive, allocator: Allocator, reader: anytype) !void {
    const header: ar_hdr = try reader.readStruct(ar_hdr);
    if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) {
        return error.InvalidHeaderDelimiter;
    }
    if (!mem.eql(u8, header.ar_name[0..2], "//")) {
        return error.MissingTableName;
    }
    const table_size = try header.size();
    const long_file_names = try allocator.alloc(u8, table_size);
    errdefer allocator.free(long_file_names);
    try reader.readNoEof(long_file_names);
    archive.long_file_names = long_file_names;
}

/// From a given file offset, starts reading for a file header.
/// When found, parses the object file into an `Object` and returns it.
pub fn parseObject(archive: Archive, wasm_file: *const Wasm, file_offset: u32) !Object {
    const gpa = wasm_file.base.comp.gpa;
    try archive.file.seekTo(file_offset);
    const reader = archive.file.reader();
    const header = try reader.readStruct(ar_hdr);
    const current_offset = try archive.file.getPos();
    try archive.file.seekTo(0);

    if (!mem.eql(u8, &header.ar_fmag, ARFMAG)) {
        return error.InvalidHeaderDelimiter;
    }

    const object_name = try archive.parseName(header);
    const name = name: {
        var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
        const path = try std.os.realpath(archive.name, &buffer);
        break :name try std.fmt.allocPrint(gpa, "{s}({s})", .{ path, object_name });
    };
    defer gpa.free(name);

    const object_file = try std.fs.cwd().openFile(archive.name, .{});
    errdefer object_file.close();

    const object_file_size = try header.size();
    try object_file.seekTo(current_offset);
    return Object.create(wasm_file, object_file, name, object_file_size);
}

const std = @import("std");
const assert = std.debug.assert;
const fs = std.fs;
const log = std.log.scoped(.archive);
const mem = std.mem;

const Allocator = mem.Allocator;
const Object = @import("Object.zig");
const Wasm = @import("../Wasm.zig");

const Archive = @This();