wasm-linker: Add Object file parsing

This upstreams the object file parsing from zwld, bringing us closer to being able to link stage2 code with object files/C-code as well as replacing lld with the self-hosted linker once feature complete.
author: Luuk de Gram <luuk@degram.dev> 2022-02-13 16:34:51 +0100
committer: Luuk de Gram <luuk@degram.dev> 2022-02-17 18:11:48 +0100
commit: e7be0bef43e5fc7d19bbe184b9dc5209f52f745c (patch)
tree: 985053858e9995a6c7e64cd0d01a077b5ac144c8 /src/link
parent: d1c74ac42dd4f1306a0dcb7acbbf1f95fe515627 (diff)
download: zig-e7be0bef43e5fc7d19bbe184b9dc5209f52f745c.tar.gz
zig-e7be0bef43e5fc7d19bbe184b9dc5209f52f745c.zip
3 files changed, 920 insertions, 9 deletions
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 8f6dfacf46..dbb9ab801e 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -25,6 +25,7 @@ const LlvmObject = @import("../codegen/llvm.zig").Object;
 const Air = @import("../Air.zig");
 const Liveness = @import("../Liveness.zig");
 const Symbol = @import("Wasm/Symbol.zig");
+const Object = @import("Wasm/Object.zig");
 const types = @import("Wasm/types.zig");
 
 pub const base_tag = link.File.Tag.wasm;
@@ -73,7 +74,7 @@ func_types: std.ArrayListUnmanaged(wasm.Type) = .{},
 /// Output function section
 functions: std.ArrayListUnmanaged(wasm.Func) = .{},
 /// Output global section
-globals: std.ArrayListUnmanaged(wasm.Global) = .{},
+wasm_globals: std.ArrayListUnmanaged(wasm.Global) = .{},
 /// Memory section
 memories: wasm.Memory = .{ .limits = .{ .min = 0, .max = null } },
 
@@ -84,6 +85,17 @@ memories: wasm.Memory = .{ .limits = .{ .min = 0, .max = null } },
 /// Note: Key is symbol index, value represents the index into the table
 function_table: std.AutoHashMapUnmanaged(u32, u32) = .{},
 
+/// All object files and their data which are linked into the final binary
+objects: std.ArrayListUnmanaged(Object) = .{},
+/// Maps discarded symbols and their positions to the location of the symbol
+/// it was resolved to
+discarded: std.AutoHashMapUnmanaged(SymbolLoc, SymbolLoc) = .{},
+/// Mapping between symbol names and their respective location.
+/// This map contains all symbols that will be written into the final binary
+/// and were either defined, or resolved.
+/// TODO: Use string interning and make the key an index, rather than a unique string.
+symbol_resolver: std.StringArrayHashMapUnmanaged(SymbolLoc) = .{},
+
 pub const Segment = struct {
     alignment: u32,
     size: u32,
@@ -98,6 +110,14 @@ pub const FnData = struct {
     };
 };
 
+pub const SymbolLoc = struct {
+    /// The index of the symbol within the specified file
+    index: u32,
+    /// The index of the object file where the symbol resides.
+    /// When this is `null` the symbol comes from a non-object file.
+    file: ?u16,
+};
+
 pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Options) !*Wasm {
     assert(options.object_format == .wasm);
 
@@ -115,7 +135,7 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option
     try file.writeAll(&(wasm.magic ++ wasm.version));
 
     // As sym_index '0' is reserved, we use it for our stack pointer symbol
-    const global = try wasm_bin.globals.addOne(allocator);
+    const global = try wasm_bin.wasm_globals.addOne(allocator);
     global.* = .{
         .global_type = .{
             .valtype = .i32,
@@ -152,6 +172,31 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Wasm {
     return self;
 }
 
+fn parseInputFiles(self: *Wasm, files: []const []const u8) !void {
+    for (files) |path| {
+        if (try self.parseObjectFile(path)) continue;
+        log.warn("Unexpected file format at path: '{s}'", .{path});
+    }
+}
+
+/// Parses the object file from given path. Returns true when the given file was an object
+/// file and parsed successfully. Returns false when file is not an object file.
+/// May return an error instead when parsing failed.
+fn parseObjectFile(self: *Wasm, path: []const u8) !bool {
+    const file = try fs.cwd().openFile(path, .{});
+    errdefer file.close();
+
+    var object = Object.init(self.base.allocator, file, path) catch |err| {
+        if (err == error.InvalidMagicByte) {
+            log.warn("Self hosted linker does not support non-object file parsing", .{});
+            return false;
+        } else return err;
+    };
+    errdefer object.deinit(self.base.allocator);
+    try self.objects.append(self.base.allocator, object);
+    return true;
+}
+
 pub fn deinit(self: *Wasm) void {
     if (build_options.have_llvm) {
         if (self.llvm_object) |llvm_object| llvm_object.destroy(self.base.allocator);
@@ -182,7 +227,7 @@ pub fn deinit(self: *Wasm) void {
     self.imports.deinit(self.base.allocator);
     self.func_types.deinit(self.base.allocator);
     self.functions.deinit(self.base.allocator);
-    self.globals.deinit(self.base.allocator);
+    self.wasm_globals.deinit(self.base.allocator);
     self.function_table.deinit(self.base.allocator);
 }
 
@@ -587,7 +632,7 @@ fn setupMemory(self: *Wasm) !void {
         memory_ptr = std.mem.alignForwardGeneric(u64, memory_ptr, stack_alignment);
         memory_ptr += stack_size;
         // We always put the stack pointer global at index 0
-        self.globals.items[0].init.i32_const = @bitCast(i32, @intCast(u32, memory_ptr));
+        self.wasm_globals.items[0].init.i32_const = @bitCast(i32, @intCast(u32, memory_ptr));
     }
 
     var offset: u32 = @intCast(u32, memory_ptr);
@@ -605,7 +650,7 @@ fn setupMemory(self: *Wasm) !void {
     if (!place_stack_first) {
         memory_ptr = std.mem.alignForwardGeneric(u64, memory_ptr, stack_alignment);
         memory_ptr += stack_size;
-        self.globals.items[0].init.i32_const = @bitCast(i32, @intCast(u32, memory_ptr));
+        self.wasm_globals.items[0].init.i32_const = @bitCast(i32, @intCast(u32, memory_ptr));
     }
 
     // Setup the max amount of pages
@@ -690,6 +735,25 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
     const tracy = trace(@src());
     defer tracy.end();
 
+    // Used for all temporary memory allocated during flushin
+    var arena_instance = std.heap.ArenaAllocator.init(self.base.allocator);
+    defer arena_instance.deinit();
+    const arena = arena_instance.allocator();
+
+    // Positional arguments to the linker such as object files and static archives.
+    var positionals = std.ArrayList([]const u8).init(arena);
+    try positionals.ensureUnusedCapacity(self.base.options.objects.len);
+
+    for (self.base.options.objects) |object| {
+        positionals.appendAssumeCapacity(object.path);
+    }
+
+    for (comp.c_object_table.keys()) |c_object| {
+        try positionals.append(c_object.status.success.object_path);
+    }
+    // TODO: Also link with other objects such as compiler-rt
+    try self.parseInputFiles(positionals.items);
+
     // When we finish/error we reset the state of the linker
     // So we can rebuild the binary file on each incremental update
     defer self.resetState();
@@ -852,7 +916,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
         const header_offset = try reserveVecSectionHeader(file);
         const writer = file.writer();
 
-        for (self.globals.items) |global| {
+        for (self.wasm_globals.items) |global| {
             try writer.writeByte(wasm.valtype(global.global_type.valtype));
             try writer.writeByte(@boolToInt(global.global_type.mutable));
             try emitInit(writer, global.init);
@@ -863,7 +927,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
             header_offset,
             .global,
             @intCast(u32, (try file.getPos()) - header_offset - header_size),
-            @intCast(u32, self.globals.items.len),
+            @intCast(u32, self.wasm_globals.items.len),
         );
     }
 
@@ -1039,7 +1103,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
 
         var funcs = try std.ArrayList(Name).initCapacity(self.base.allocator, self.functions.items.len + self.imported_functions_count);
         defer funcs.deinit();
-        var globals = try std.ArrayList(Name).initCapacity(self.base.allocator, self.globals.items.len);
+        var globals = try std.ArrayList(Name).initCapacity(self.base.allocator, self.wasm_globals.items.len);
         defer globals.deinit();
         var segments = try std.ArrayList(Name).initCapacity(self.base.allocator, self.data_segments.count());
         defer segments.deinit();
diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig
index 09fb6735f7..fc4effd714 100644
--- a/src/link/Wasm/Atom.zig
+++ b/src/link/Wasm/Atom.zig
@@ -50,7 +50,7 @@ pub fn deinit(self: *Atom, gpa: Allocator) void {
     self.relocs.deinit(gpa);
     self.code.deinit(gpa);
 
-    while (self.locals.popOrNull()) |*local| {
+    while (self.locals.items) |*local| {
         local.deinit(gpa);
     }
     self.locals.deinit(gpa);
diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig
new file mode 100644
index 0000000000..df98ea4d37
--- /dev/null
+++ b/src/link/Wasm/Object.zig
@@ -0,0 +1,847 @@
+//! Object represents a wasm object file. When initializing a new
+//! `Object`, it will parse the contents of a given file handler, and verify
+//! the data on correctness. The result can then be used by the linker.
+const Object = @This();
+
+const Atom = @import("Atom.zig");
+const types = @import("types.zig");
+const std = @import("std");
+const Wasm = @import("Wasm.zig");
+const Symbol = @import("Symbol.zig");
+
+const Allocator = std.mem.Allocator;
+const leb = std.leb;
+const meta = std.meta;
+
+const log = std.log.scoped(.zwld);
+
+/// Wasm spec version used for this `Object`
+version: u32 = 0,
+/// The entire object file is read and parsed in a single pass.
+/// For this reason it's a lot simpler to use an arena and store the entire
+/// state after parsing. This also allows to free all memory at once.
+arena: std.heap.ArenaAllocator.State = .{},
+/// The file descriptor that represents the wasm object file.
+file: ?std.fs.File = null,
+/// Name (read path) of the object file.
+name: []const u8,
+/// Parsed type section
+types: []const std.wasm.Type = &.{},
+/// A list of all imports for this module
+imports: []std.wasm.Import = &.{},
+/// Parsed function section
+functions: []std.wasm.Func = &.{},
+/// Parsed table section
+tables: []std.wasm.Table = &.{},
+/// Parsed memory section
+memories: []const std.wasm.Memory = &.{},
+/// Parsed global section
+globals: []std.wasm.Global = &.{},
+/// Parsed export section
+exports: []const std.wasm.Export = &.{},
+/// Parsed element section
+elements: []const std.wasm.Element = &.{},
+/// Represents the function ID that must be called on startup.
+/// This is `null` by default as runtimes may determine the startup
+/// function themselves. This is essentially legacy.
+start: ?u32 = null,
+/// A slice of features that tell the linker what features are mandatory,
+/// used (or therefore missing) and must generate an error when another
+/// object uses features that are not supported by the other.
+features: []const types.Feature = &.{},
+/// A table that maps the relocations we must perform where the key represents
+/// the section that the list of relocations applies to.
+relocations: std.AutoArrayHashMapUnmanaged(u32, []types.Relocation) = .{},
+/// Table of symbols belonging to this Object file
+symtable: []Symbol = &.{},
+/// Extra metadata about the linking section, such as alignment of segments and their name
+segment_info: []const types.Segment = &.{},
+/// A sequence of function initializers that must be called on startup
+init_funcs: []const types.InitFunc = &.{},
+/// Comdat information
+comdat_info: []const types.Comdat = &.{},
+/// Represents non-synthetic sections that can essentially be mem-cpy'd into place
+/// after performing relocations.
+relocatable_data: []RelocatableData = &.{},
+
+/// Represents a single item within a section (depending on its `type`)
+const RelocatableData = struct {
+    /// The type of the relocatable data
+    type: enum { data, code, custom },
+    /// Pointer to the data of the segment, where it's length is written to `size`
+    data: [*]u8,
+    /// The size in bytes of the data representing the segment within the section
+    size: u32,
+    /// The index within the section itself
+    index: u32,
+    /// The offset within the section where the data starts
+    offset: u32,
+    /// Represents the index of the section it belongs to
+    section_index: u32,
+
+    /// Returns the alignment of the segment, by retrieving it from the segment
+    /// meta data of the given object file.
+    /// NOTE: Alignment is encoded as a power of 2, so we shift the symbol's
+    /// alignment to retrieve the natural alignment.
+    pub fn getAlignment(self: RelocatableData, object: *const Object) u32 {
+        if (self.type != .data) return 1;
+        const data_alignment = object.segment_info[self.index].alignment;
+        if (data_alignment == 0) return 1;
+        // Decode from power of 2 to natural alignment
+        return @as(u32, 1) << @intCast(u5, data_alignment);
+    }
+
+    /// Returns the symbol kind that corresponds to the relocatable section
+    pub fn getSymbolKind(self: RelocatableData) Symbol.Tag {
+        return switch (self.type) {
+            .data => .data,
+            .code => .function,
+            .custom => .section,
+        };
+    }
+};
+
+pub const InitError = error{NotObjectFile} || ParseError || std.fs.File.ReadError;
+
+/// Initializes a new `Object` from a wasm object file.
+pub fn init(gpa: Allocator, file: std.fs.File, path: []const u8) InitError!Object {
+    var object: Object = .{
+        .file = file,
+        .name = path,
+    };
+
+    var arena = std.heap.ArenaAllocator.init(gpa);
+    errdefer arena.deinit();
+
+    var is_object_file: bool = false;
+    try object.parse(arena.allocator(), file.reader(), &is_object_file);
+    object.arena = arena.state;
+    if (!is_object_file) return error.NotObjectFile;
+
+    return object;
+}
+
+/// Frees all memory of `Object` at once. The given `Allocator` must be
+/// the same allocator that was used when `init` was called.
+pub fn deinit(self: *Object, gpa: Allocator) void {
+    self.arena.promote(gpa).deinit();
+    self.* = undefined;
+}
+
+/// Finds the import within the list of imports from a given kind and index of that kind.
+/// Asserts the import exists
+pub fn findImport(self: *const Object, import_kind: std.wasm.ExternalKind, index: u32) *std.wasm.Import {
+    var i: u32 = 0;
+    return for (self.imports) |*import| {
+        if (std.meta.activeTag(import.kind) == import_kind) {
+            if (i == index) return import;
+            i += 1;
+        }
+    } else unreachable; // Only existing imports are allowed to be found
+}
+
+/// Counts the entries of imported `kind` and returns the result
+pub fn importedCountByKind(self: *const Object, kind: std.wasm.ExternalKind) u32 {
+    var i: u32 = 0;
+    return for (self.imports) |imp| {
+        if (@as(std.wasm.ExternalKind, imp.kind) == kind) i += 1;
+    } else i;
+}
+
+/// Returns a table by a given id, rather than by its index within the list.
+pub fn getTable(self: *const Object, id: u32) *std.wasm.Table {
+    return for (self.tables) |*table| {
+        if (table.table_idx == id) break table;
+    } else unreachable;
+}
+
+/// Checks if the object file is an MVP version.
+/// When that's the case, we check if there's an import table definiton with its name
+/// set to '__indirect_function_table". When that's also the case,
+/// we initialize a new table symbol that corresponds to that import and return that symbol.
+///
+/// When the object file is *NOT* MVP, we return `null`.
+fn checkLegacyIndirectFunctionTable(self: *Object) !?Symbol {
+    var table_count: usize = 0;
+    for (self.symtable) |sym| {
+        if (sym.tag == .table) table_count += 1;
+    }
+
+    const import_table_count = self.importedCountByKind(.table);
+
+    // For each import table, we also have a symbol so this is not a legacy object file
+    if (import_table_count == table_count) return null;
+
+    if (table_count != 0) {
+        log.err("Expected a table entry symbol for each of the {d} table(s), but instead got {d} symbols.", .{
+            import_table_count,
+            table_count,
+        });
+        return error.MissingTableSymbols;
+    }
+
+    // MVP object files cannot have any table definitions, only imports (for the indirect function table).
+    if (self.tables.len > 0) {
+        log.err("Unexpected table definition without representing table symbols.", .{});
+        return error.UnexpectedTable;
+    }
+
+    if (import_table_count != 1) {
+        log.err("Found more than one table import, but no representing table symbols", .{});
+        return error.MissingTableSymbols;
+    }
+
+    var table_import: std.wasm.Import = for (self.imports) |imp| {
+        if (imp.kind == .table) {
+            break imp;
+        }
+    } else unreachable;
+
+    if (!std.mem.eql(u8, table_import.name, "__indirect_function_table")) {
+        log.err("Non-indirect function table import '{s}' is missing a corresponding symbol", .{table_import.name});
+        return error.MissingTableSymbols;
+    }
+
+    var table_symbol: Symbol = .{
+        .flags = 0,
+        .name = table_import.name,
+        .tag = .table,
+        .index = 0,
+    };
+    table_symbol.setFlag(.WASM_SYM_UNDEFINED);
+    table_symbol.setFlag(.WASM_SYM_NO_STRIP);
+    return table_symbol;
+}
+
+/// Error set containing parsing errors.
+/// Merged with reader's errorset by `Parser`
+pub const ParseError = error{
+    /// The magic byte is either missing or does not contain \0Asm
+    InvalidMagicByte,
+    /// The wasm version is either missing or does not match the supported version.
+    InvalidWasmVersion,
+    /// Expected the functype byte while parsing the Type section but did not find it.
+    ExpectedFuncType,
+    /// Missing an 'end' opcode when defining a constant expression.
+    MissingEndForExpression,
+    /// Missing an 'end' opcode at the end of a body expression.
+    MissingEndForBody,
+    /// The size defined in the section code mismatches with the actual payload size.
+    MalformedSection,
+    /// Stream has reached the end. Unreachable for caller and must be handled internally
+    /// by the parser.
+    EndOfStream,
+    /// Ran out of memory when allocating.
+    OutOfMemory,
+    /// A non-zero flag was provided for comdat info
+    UnexpectedValue,
+    /// An import symbol contains an index to an import that does
+    /// not exist, or no imports were defined.
+    InvalidIndex,
+    /// The section "linking" contains a version that is not supported.
+    UnsupportedVersion,
+    /// When reading the data in leb128 compressed format, its value was overflown.
+    Overflow,
+    /// Found table definitions but no corresponding table symbols
+    MissingTableSymbols,
+    /// Did not expect a table definiton, but did find one
+    UnexpectedTable,
+    /// Object file contains a feature that is unknown to the linker
+    UnknownFeature,
+};
+
+fn parse(self: *Object, gpa: Allocator, reader: anytype, is_object_file: *bool) Parser(@TypeOf(reader)).Error!void {
+    var parser = Parser(@TypeOf(reader)).init(self, reader);
+    return parser.parseObject(gpa, is_object_file);
+}
+
+fn Parser(comptime ReaderType: type) type {
+    return struct {
+        const Self = @This();
+        const Error = ReaderType.Error || ParseError;
+
+        reader: std.io.CountingReader(ReaderType),
+        /// Object file we're building
+        object: *Object,
+
+        fn init(object: *Object, reader: ReaderType) Self {
+            return .{ .object = object, .reader = std.io.countingReader(reader) };
+        }
+
+        /// Verifies that the first 4 bytes contains \0Asm
+        fn verifyMagicBytes(self: *Self) Error!void {
+            var magic_bytes: [4]u8 = undefined;
+
+            try self.reader.reader().readNoEof(&magic_bytes);
+            if (!std.mem.eql(u8, &magic_bytes, &std.wasm.magic)) {
+                log.debug("Invalid magic bytes '{s}'", .{&magic_bytes});
+                return error.InvalidMagicByte;
+            }
+        }
+
+        fn parseObject(self: *Self, gpa: Allocator, is_object_file: *bool) Error!void {
+            try self.verifyMagicBytes();
+            const version = try self.reader.reader().readIntLittle(u32);
+
+            self.object.version = version;
+            var relocatable_data = std.ArrayList(RelocatableData).init(gpa);
+            defer relocatable_data.deinit();
+
+            var section_index: u32 = 0;
+            while (self.reader.reader().readByte()) |byte| : (section_index += 1) {
+                const len = try readLeb(u32, self.reader.reader());
+                const reader = std.io.limitedReader(self.reader.reader(), len).reader();
+                switch (@intToEnum(std.wasm.Section, byte)) {
+                    .custom => {
+                        const name_len = try readLeb(u32, reader);
+                        const name = try gpa.alloc(u8, name_len);
+                        defer gpa.free(name);
+                        try reader.readNoEof(name);
+
+                        if (std.mem.eql(u8, name, "linking")) {
+                            is_object_file.* = true;
+                            try self.parseMetadata(gpa, reader.context.bytes_left);
+                        } else if (std.mem.startsWith(u8, name, "reloc")) {
+                            try self.parseRelocations(gpa);
+                        } else if (std.mem.eql(u8, name, "target_features")) {
+                            try self.parseFeatures(gpa);
+                        } else {
+                            try reader.skipBytes(reader.context.bytes_left, .{});
+                        }
+                    },
+                    .type => {
+                        for (try readVec(&self.object.types, reader, gpa)) |*type_val| {
+                            if ((try reader.readByte()) != std.wasm.function_type) return error.ExpectedFuncType;
+
+                            for (try readVec(&type_val.params, reader, gpa)) |*param| {
+                                param.* = try readEnum(std.wasm.Valtype, reader);
+                            }
+
+                            for (try readVec(&type_val.returns, reader, gpa)) |*result| {
+                                result.* = try readEnum(std.wasm.Valtype, reader);
+                            }
+                        }
+                        try assertEnd(reader);
+                    },
+                    .import => {
+                        for (try readVec(&self.object.imports, reader, gpa)) |*import| {
+                            const module_len = try readLeb(u32, reader);
+                            const module_name = try gpa.alloc(u8, module_len);
+                            try reader.readNoEof(module_name);
+
+                            const name_len = try readLeb(u32, reader);
+                            const name = try gpa.alloc(u8, name_len);
+                            try reader.readNoEof(name);
+
+                            const kind = try readEnum(std.wasm.ExternalKind, reader);
+                            const kind_value: std.wasm.Import.Kind = switch (kind) {
+                                .function => .{ .function = try readLeb(u32, reader) },
+                                .memory => .{ .memory = try readLimits(reader) },
+                                .global => .{ .global = .{
+                                    .valtype = try readEnum(std.wasm.Valtype, reader),
+                                    .mutable = (try reader.readByte()) == 0x01,
+                                } },
+                                .table => .{ .table = .{
+                                    .reftype = try readEnum(std.wasm.RefType, reader),
+                                    .limits = try readLimits(reader),
+                                } },
+                            };
+
+                            import.* = .{
+                                .module_name = module_name,
+                                .name = name,
+                                .kind = kind_value,
+                            };
+                        }
+                        try assertEnd(reader);
+                    },
+                    .function => {
+                        for (try readVec(&self.object.functions, reader, gpa)) |*func| {
+                            func.* = .{ .type_index = try readLeb(u32, reader) };
+                        }
+                        try assertEnd(reader);
+                    },
+                    .table => {
+                        for (try readVec(&self.object.tables, reader, gpa)) |*table| {
+                            table.* = .{
+                                .reftype = try readEnum(std.wasm.RefType, reader),
+                                .limits = try readLimits(reader),
+                            };
+                        }
+                        try assertEnd(reader);
+                    },
+                    .memory => {
+                        for (try readVec(&self.object.memories, reader, gpa)) |*memory| {
+                            memory.* = .{ .limits = try readLimits(reader) };
+                        }
+                        try assertEnd(reader);
+                    },
+                    .global => {
+                        for (try readVec(&self.object.globals, reader, gpa)) |*global| {
+                            global.* = .{
+                                .global_type = .{
+                                    .valtype = try readEnum(std.wasm.Valtype, reader),
+                                    .mutable = (try reader.readByte()) == 0x01,
+                                },
+                                .init = try readInit(reader),
+                            };
+                        }
+                        try assertEnd(reader);
+                    },
+                    .@"export" => {
+                        for (try readVec(&self.object.exports, reader, gpa)) |*exp| {
+                            const name_len = try readLeb(u32, reader);
+                            const name = try gpa.alloc(u8, name_len);
+                            try reader.readNoEof(name);
+                            exp.* = .{
+                                .name = name,
+                                .kind = try readEnum(std.wasm.ExternalKind, reader),
+                                .index = try readLeb(u32, reader),
+                            };
+                        }
+                        try assertEnd(reader);
+                    },
+                    .start => {
+                        self.object.start = try readLeb(u32, reader);
+                        try assertEnd(reader);
+                    },
+                    .element => {
+                        for (try readVec(&self.object.elements, reader, gpa)) |*elem| {
+                            elem.table_index = try readLeb(u32, reader);
+                            elem.offset = try readInit(reader);
+
+                            for (try readVec(&elem.func_indexes, reader, gpa)) |*idx| {
+                                idx.* = try readLeb(u32, reader);
+                            }
+                        }
+                        try assertEnd(reader);
+                    },
+                    .code => {
+                        var start = reader.context.bytes_left;
+                        var index: u32 = 0;
+                        const count = try readLeb(u32, reader);
+                        while (index < count) : (index += 1) {
+                            const code_len = try readLeb(u32, reader);
+                            const offset = @intCast(u32, start - reader.context.bytes_left);
+                            const data = try gpa.alloc(u8, code_len);
+                            try reader.readNoEof(data);
+                            try relocatable_data.append(.{
+                                .type = .code,
+                                .data = data.ptr,
+                                .size = code_len,
+                                .index = self.object.importedCountByKind(.function) + index,
+                                .offset = offset,
+                                .section_index = section_index,
+                            });
+                        }
+                    },
+                    .data => {
+                        var start = reader.context.bytes_left;
+                        var index: u32 = 0;
+                        const count = try readLeb(u32, reader);
+                        while (index < count) : (index += 1) {
+                            const flags = try readLeb(u32, reader);
+                            const data_offset = try readInit(reader);
+                            _ = flags; // TODO: Do we need to check flags to detect passive/active memory?
+                            _ = data_offset;
+                            const data_len = try readLeb(u32, reader);
+                            const offset = @intCast(u32, start - reader.context.bytes_left);
+                            const data = try gpa.alloc(u8, data_len);
+                            try reader.readNoEof(data);
+                            try relocatable_data.append(.{
+                                .type = .data,
+                                .data = data.ptr,
+                                .size = data_len,
+                                .index = index,
+                                .offset = offset,
+                                .section_index = section_index,
+                            });
+                        }
+                    },
+                    else => try self.reader.reader().skipBytes(len, .{}),
+                }
+            } else |err| switch (err) {
+                error.EndOfStream => {}, // finished parsing the file
+                else => |e| return e,
+            }
+            self.object.relocatable_data = relocatable_data.toOwnedSlice();
+        }
+
+        /// Based on the "features" custom section, parses it into a list of
+        /// features that tell the linker what features were enabled and may be mandatory
+        /// to be able to link.
+        /// Logs an info message when an undefined feature is detected.
+        fn parseFeatures(self: *Self, gpa: Allocator) !void {
+            const reader = self.reader.reader();
+            for (try readVec(&self.object.features, reader, gpa)) |*feature| {
+                const prefix = try readEnum(types.Feature.Prefix, reader);
+                const name_len = try leb.readULEB128(u32, reader);
+                const name = try gpa.alloc(u8, name_len);
+                try reader.readNoEof(name);
+
+                const tag = types.known_features.get(name) orelse {
+                    log.err("Object file contains unknown feature: {s}", .{name});
+                    return error.UnknownFeature;
+                };
+                feature.* = .{
+                    .prefix = prefix,
+                    .tag = tag,
+                };
+            }
+        }
+
+        /// Parses a "reloc" custom section into a list of relocations.
+        /// The relocations are mapped into `Object` where the key is the section
+        /// they apply to.
+        fn parseRelocations(self: *Self, gpa: Allocator) !void {
+            const reader = self.reader.reader();
+            const section = try leb.readULEB128(u32, reader);
+            const count = try leb.readULEB128(u32, reader);
+            const relocations = try gpa.alloc(types.Relocation, count);
+
+            log.debug("Found {d} relocations for section ({d})", .{
+                count,
+                section,
+            });
+
+            for (relocations) |*relocation| {
+                const rel_type = try leb.readULEB128(u8, reader);
+                const rel_type_enum = @intToEnum(types.Relocation.RelocationType, rel_type);
+                relocation.* = .{
+                    .relocation_type = rel_type_enum,
+                    .offset = try leb.readULEB128(u32, reader),
+                    .index = try leb.readULEB128(u32, reader),
+                    .addend = if (rel_type_enum.addendIsPresent()) try leb.readULEB128(u32, reader) else null,
+                };
+                log.debug("Found relocation: type({s}) offset({d}) index({d}) addend({d})", .{
+                    @tagName(relocation.relocation_type),
+                    relocation.offset,
+                    relocation.index,
+                    relocation.addend,
+                });
+            }
+
+            try self.object.relocations.putNoClobber(gpa, section, relocations);
+        }
+
+        /// Parses the "linking" custom section. Versions that are not
+        /// supported will be an error. `payload_size` is required to be able
+        /// to calculate the subsections we need to parse, as that data is not
+        /// available within the section itself.
+        fn parseMetadata(self: *Self, gpa: Allocator, payload_size: usize) !void {
+            var limited = std.io.limitedReader(self.reader.reader(), payload_size);
+            const limited_reader = limited.reader();
+
+            const version = try leb.readULEB128(u32, limited_reader);
+            log.debug("Link meta data version: {d}", .{version});
+            if (version != 2) return error.UnsupportedVersion;
+
+            while (limited.bytes_left > 0) {
+                try self.parseSubsection(gpa, limited_reader);
+            }
+        }
+
+        /// Parses a `spec.Subsection`.
+        /// The `reader` param for this is to provide a `LimitedReader`, which allows
+        /// us to only read until a max length.
+        ///
+        /// `self` is used to provide access to other sections that may be needed,
+        /// such as access to the `import` section to find the name of a symbol.
+        fn parseSubsection(self: *Self, gpa: Allocator, reader: anytype) !void {
+            const sub_type = try leb.readULEB128(u8, reader);
+            log.debug("Found subsection: {s}", .{@tagName(@intToEnum(types.SubsectionType, sub_type))});
+            const payload_len = try leb.readULEB128(u32, reader);
+            if (payload_len == 0) return;
+
+            var limited = std.io.limitedReader(reader, payload_len);
+            const limited_reader = limited.reader();
+
+            // every subsection contains a 'count' field
+            const count = try leb.readULEB128(u32, limited_reader);
+
+            switch (@intToEnum(types.SubsectionType, sub_type)) {
+                .WASM_SEGMENT_INFO => {
+                    const segments = try gpa.alloc(types.Segment, count);
+                    for (segments) |*segment| {
+                        const name_len = try leb.readULEB128(u32, reader);
+                        const name = try gpa.alloc(u8, name_len);
+                        try reader.readNoEof(name);
+                        segment.* = .{
+                            .name = name,
+                            .alignment = try leb.readULEB128(u32, reader),
+                            .flags = try leb.readULEB128(u32, reader),
+                        };
+                        log.debug("Found segment: {s} align({d}) flags({b})", .{
+                            segment.name,
+                            segment.alignment,
+                            segment.flags,
+                        });
+                    }
+                    self.object.segment_info = segments;
+                },
+                .WASM_INIT_FUNCS => {
+                    const funcs = try gpa.alloc(types.InitFunc, count);
+                    for (funcs) |*func| {
+                        func.* = .{
+                            .priority = try leb.readULEB128(u32, reader),
+                            .symbol_index = try leb.readULEB128(u32, reader),
+                        };
+                        log.debug("Found function - prio: {d}, index: {d}", .{ func.priority, func.symbol_index });
+                    }
+                    self.object.init_funcs = funcs;
+                },
+                .WASM_COMDAT_INFO => {
+                    const comdats = try gpa.alloc(types.Comdat, count);
+                    for (comdats) |*comdat| {
+                        const name_len = try leb.readULEB128(u32, reader);
+                        const name = try gpa.alloc(u8, name_len);
+                        try reader.readNoEof(name);
+
+                        const flags = try leb.readULEB128(u32, reader);
+                        if (flags != 0) {
+                            return error.UnexpectedValue;
+                        }
+
+                        const symbol_count = try leb.readULEB128(u32, reader);
+                        const symbols = try gpa.alloc(types.ComdatSym, symbol_count);
+                        for (symbols) |*symbol| {
+                            symbol.* = .{
+                                .kind = @intToEnum(types.ComdatSym.Type, try leb.readULEB128(u8, reader)),
+                                .index = try leb.readULEB128(u32, reader),
+                            };
+                        }
+
+                        comdat.* = .{
+                            .name = name,
+                            .flags = flags,
+                            .symbols = symbols,
+                        };
+                    }
+
+                    self.object.comdat_info = comdats;
+                },
+                .WASM_SYMBOL_TABLE => {
+                    var symbols = try std.ArrayList(Symbol).initCapacity(gpa, count);
+
+                    var i: usize = 0;
+                    while (i < count) : (i += 1) {
+                        const symbol = symbols.addOneAssumeCapacity();
+                        symbol.* = try self.parseSymbol(gpa, reader);
+                        log.debug("Found symbol: type({s}) name({s}) flags(0b{b:0>8})", .{
+                            @tagName(symbol.tag),
+                            symbol.name,
+                            symbol.flags,
+                        });
+                    }
+
+                    // we found all symbols, check for indirect function table
+                    // in case of an MVP object file
+                    if (try self.object.checkLegacyIndirectFunctionTable()) |symbol| {
+                        try symbols.append(symbol);
+                        log.debug("Found legacy indirect function table. Created symbol", .{});
+                    }
+
+                    self.object.symtable = symbols.toOwnedSlice();
+                },
+            }
+        }
+
+        /// Parses the symbol information based on its kind,
+        /// requires access to `Object` to find the name of a symbol when it's
+        /// an import and flag `WASM_SYM_EXPLICIT_NAME` is not set.
+        fn parseSymbol(self: *Self, gpa: Allocator, reader: anytype) !Symbol {
+            const tag = @intToEnum(Symbol.Tag, try leb.readULEB128(u8, reader));
+            const flags = try leb.readULEB128(u32, reader);
+            var symbol: Symbol = .{
+                .flags = flags,
+                .tag = tag,
+                .name = undefined,
+                .index = undefined,
+            };
+
+            switch (tag) {
+                .data => {
+                    const name_len = try leb.readULEB128(u32, reader);
+                    const name = try gpa.alloc(u8, name_len);
+                    try reader.readNoEof(name);
+                    symbol.name = name;
+
+                    // Data symbols only have the following fields if the symbol is defined
+                    if (symbol.isDefined()) {
+                        symbol.index = try leb.readULEB128(u32, reader);
+                        // @TODO: We should verify those values
+                        _ = try leb.readULEB128(u32, reader);
+                        _ = try leb.readULEB128(u32, reader);
+                    }
+                },
+                .section => {
+                    symbol.index = try leb.readULEB128(u32, reader);
+                    symbol.name = @tagName(symbol.tag);
+                },
+                else => {
+                    symbol.index = try leb.readULEB128(u32, reader);
+                    var maybe_import: ?*std.wasm.Import = null;
+
+                    const is_undefined = symbol.isUndefined();
+                    if (is_undefined) {
+                        maybe_import = self.object.findImport(symbol.externalType(), symbol.index);
+                    }
+                    const explicit_name = symbol.hasFlag(.WASM_SYM_EXPLICIT_NAME);
+                    if (!(is_undefined and !explicit_name)) {
+                        const name_len = try leb.readULEB128(u32, reader);
+                        const name = try gpa.alloc(u8, name_len);
+                        try reader.readNoEof(name);
+                        symbol.name = name;
+                    } else {
+                        symbol.name = maybe_import.?.name;
+                    }
+                },
+            }
+            return symbol;
+        }
+    };
+}
+
+/// First reads the count from the reader and then allocate
+/// a slice of ptr child's element type.
+fn readVec(ptr: anytype, reader: anytype, gpa: Allocator) ![]ElementType(@TypeOf(ptr)) {
+    const len = try readLeb(u32, reader);
+    const slice = try gpa.alloc(ElementType(@TypeOf(ptr)), len);
+    ptr.* = slice;
+    return slice;
+}
+
+fn ElementType(comptime ptr: type) type {
+    return meta.Elem(meta.Child(ptr));
+}
+
+/// Uses either `readILEB128` or `readULEB128` depending on the
+/// signedness of the given type `T`.
+/// Asserts `T` is an integer.
+fn readLeb(comptime T: type, reader: anytype) !T {
+    if (comptime std.meta.trait.isSignedInt(T)) {
+        return try leb.readILEB128(T, reader);
+    } else {
+        return try leb.readULEB128(T, reader);
+    }
+}
+
+/// Reads an enum type from the given reader.
+/// Asserts `T` is an enum
+fn readEnum(comptime T: type, reader: anytype) !T {
+    switch (@typeInfo(T)) {
+        .Enum => |enum_type| return @intToEnum(T, try readLeb(enum_type.tag_type, reader)),
+        else => @compileError("T must be an enum. Instead was given type " ++ @typeName(T)),
+    }
+}
+
+fn readLimits(reader: anytype) !std.wasm.Limits {
+    const flags = try readLeb(u1, reader);
+    const min = try readLeb(u32, reader);
+    return std.wasm.Limits{
+        .min = min,
+        .max = if (flags == 0) null else try readLeb(u32, reader),
+    };
+}
+
+fn readInit(reader: anytype) !std.wasm.InitExpression {
+    const opcode = try reader.readByte();
+    const init_expr: std.wasm.InitExpression = switch (@intToEnum(std.wasm.Opcode, opcode)) {
+        .i32_const => .{ .i32_const = try readLeb(i32, reader) },
+        .global_get => .{ .global_get = try readLeb(u32, reader) },
+        else => @panic("TODO: initexpression for other opcodes"),
+    };
+
+    if ((try readEnum(std.wasm.Opcode, reader)) != .end) return error.MissingEndForExpression;
+    return init_expr;
+}
+
+fn assertEnd(reader: anytype) !void {
+    var buf: [1]u8 = undefined;
+    const len = try reader.read(&buf);
+    if (len != 0) return error.MalformedSection;
+    if (reader.context.bytes_left != 0) return error.MalformedSection;
+}
+
+/// Parses an object file into atoms, for code and data sections
+pub fn parseIntoAtoms(self: *Object, gpa: Allocator, object_index: u16, wasm_bin: *Wasm) !void {
+    log.debug("Parsing data section into atoms", .{});
+    const Key = struct {
+        kind: Symbol.Tag,
+        index: u32,
+    };
+    var symbol_for_segment = std.AutoArrayHashMap(Key, u32).init(gpa);
+    defer symbol_for_segment.deinit();
+
+    for (self.symtable) |symbol, symbol_index| {
+        switch (symbol.tag) {
+            .function, .data => if (!symbol.isUndefined()) {
+                try symbol_for_segment.putNoClobber(
+                    .{ .kind = symbol.tag, .index = symbol.index },
+                    @intCast(u32, symbol_index),
+                );
+            },
+            else => continue,
+        }
+    }
+
+    for (self.relocatable_data) |relocatable_data, index| {
+        const sym_index = symbol_for_segment.get(.{
+            .kind = relocatable_data.getSymbolKind(),
+            .index = @intCast(u32, relocatable_data.index),
+        }) orelse continue; // encountered a segment we do not create an atom for
+        const final_index = try wasm_bin.getMatchingSegment(gpa, object_index, @intCast(u32, index));
+
+        const atom = try Atom.create(gpa);
+        errdefer atom.deinit(gpa);
+
+        try wasm_bin.managed_atoms.append(gpa, atom);
+        atom.file = object_index;
+        atom.size = relocatable_data.size;
+        atom.alignment = relocatable_data.getAlignment(self);
+        atom.sym_index = sym_index;
+
+        const relocations: []types.Relocation = self.relocations.get(relocatable_data.section_index) orelse &.{};
+        for (relocations) |*relocation| {
+            if (isInbetween(relocatable_data.offset, atom.size, relocation.offset)) {
+                // set the offset relative to the offset of the segment itself,
+                // rather than within the entire section.
+                relocation.offset -= relocatable_data.offset;
+                try atom.relocs.append(gpa, relocation.*);
+
+                if (relocation.isTableIndex()) {
+                    try wasm_bin.elements.appendSymbol(gpa, .{
+                        .file = object_index,
+                        .sym_index = relocation.index,
+                    });
+                }
+            }
+        }
+
+        // TODO: Replace `atom.code` from an existing slice to a pointer to the data
+        try atom.code.appendSlice(gpa, relocatable_data.data[0..relocatable_data.size]);
+
+        const segment: *Wasm.Segment = &wasm_bin.segments.items[final_index];
+        segment.alignment = std.math.max(segment.alignment, atom.alignment);
+        segment.size = std.mem.alignForwardGeneric(
+            u32,
+            std.mem.alignForwardGeneric(u32, segment.size, atom.alignment) + atom.size,
+            segment.alignment,
+        );
+
+        if (wasm_bin.atoms.getPtr(final_index)) |last| {
+            last.*.next = atom;
+            atom.prev = last.*;
+            last.* = atom;
+        } else {
+            try wasm_bin.atoms.putNoClobber(gpa, final_index, atom);
+        }
+        log.debug("Parsed into atom: '{s}'", .{self.symtable[atom.sym_index].name});
+    }
+}
+
+/// Verifies if a given value is in between a minimum -and maximum value.
+/// The maxmimum value is calculated using the length, both start and end are inclusive.
+inline fn isInbetween(min: u32, length: u32, value: u32) bool {
+    return value >= min and value <= min + length;
+}
author	Luuk de Gram <luuk@degram.dev>	2022-02-13 16:34:51 +0100
committer	Luuk de Gram <luuk@degram.dev>	2022-02-17 18:11:48 +0100
commit	e7be0bef43e5fc7d19bbe184b9dc5209f52f745c (patch)
tree	985053858e9995a6c7e64cd0d01a077b5ac144c8 /src/link
parent	d1c74ac42dd4f1306a0dcb7acbbf1f95fe515627 (diff)
download	zig-e7be0bef43e5fc7d19bbe184b9dc5209f52f745c.tar.gz zig-e7be0bef43e5fc7d19bbe184b9dc5209f52f745c.zip