diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2021-06-24 18:41:33 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-06-24 18:41:33 +0200 |
| commit | 8216ce67895f5605d1720df4e5e6636395f2fc92 (patch) | |
| tree | b37a5a6f78901337e6683378ad6becf5dffae6f7 /src | |
| parent | 31c49ad64ded02b9dde57f5d3ef102a771fa5cf7 (diff) | |
| parent | cea8a2f5008bdda8bfd85af58537d148edde75ba (diff) | |
| download | zig-8216ce67895f5605d1720df4e5e6636395f2fc92.tar.gz zig-8216ce67895f5605d1720df4e5e6636395f2fc92.zip | |
Merge pull request #9171 from ziglang/zld-tapi
zld: add TAPI parser for linkers, ship libSystem.B.tbd with Zig, and parse tbd stubs on macOS
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/MachO.zig | 8 | ||||
| -rw-r--r-- | src/link/MachO/Archive.zig | 5 | ||||
| -rw-r--r-- | src/link/MachO/Dylib.zig | 47 | ||||
| -rw-r--r-- | src/link/MachO/Object.zig | 5 | ||||
| -rw-r--r-- | src/link/MachO/Stub.zig | 130 | ||||
| -rw-r--r-- | src/link/MachO/Symbol.zig | 17 | ||||
| -rw-r--r-- | src/link/MachO/Zld.zig | 356 | ||||
| -rw-r--r-- | src/link/tapi.zig | 79 | ||||
| -rw-r--r-- | src/link/tapi/Tokenizer.zig | 439 | ||||
| -rw-r--r-- | src/link/tapi/parse.zig | 713 | ||||
| -rw-r--r-- | src/link/tapi/parse/test.zig | 556 | ||||
| -rw-r--r-- | src/link/tapi/yaml.zig | 651 |
12 files changed, 2849 insertions, 157 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6e1996f9ff..b7696f6a7c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -760,10 +760,9 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { } // Assume ld64 default: -search_paths_first - // Look in each directory for a dylib (tbd), and then for archive + // Look in each directory for a dylib (next, tbd), and then for archive // TODO implement alternative: -search_dylibs_first - // TODO text-based API, or .tbd files. - const exts = &[_][]const u8{ "dylib", "a" }; + const exts = &[_][]const u8{ "dylib", "tbd", "a" }; for (search_lib_names.items) |l_name| { var found = false; @@ -849,6 +848,9 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { try zld.link(positionals.items, full_out_path, .{ .libs = libs.items, .rpaths = rpaths.items, + .libc_stub_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ + "libc", "darwin", "libSystem.B.tbd", + }), }); break :outer; diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index f47228077c..48bc33f0fa 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -235,7 +235,10 @@ pub fn parseObject(self: Archive, offset: u32) !*Object { } pub fn isArchive(file: fs.File) !bool { - const magic = try file.reader().readBytesNoEof(Archive.SARMAG); + const magic = file.reader().readBytesNoEof(Archive.SARMAG) catch |err| switch (err) { + error.EndOfStream => return false, + else => |e| return e, + }; try file.seekTo(0); return mem.eql(u8, &magic, Archive.ARMAG); } diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index a2a703dc08..bcf2433dd7 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -1,6 +1,7 @@ const Dylib = @This(); const std = @import("std"); +const assert = std.debug.assert; const fs = std.fs; const log = std.log.scoped(.dylib); const macho = std.macho; @@ -27,7 +28,10 @@ id_cmd_index: ?u16 = null, id: ?Id = null, -symbols: std.StringArrayHashMapUnmanaged(*Symbol) = .{}, +/// Parsed symbol table represented as hash map of symbols' +/// names. We can and should defer creating *Symbols until +/// a symbol is referenced by an object file. +symbols: std.StringArrayHashMapUnmanaged(void) = .{}, pub const Id = struct { name: []const u8, @@ -50,9 +54,8 @@ pub fn deinit(self: *Dylib) void { } self.load_commands.deinit(self.allocator); - for (self.symbols.values()) |value| { - value.deinit(self.allocator); - self.allocator.destroy(value); + for (self.symbols.keys()) |key| { + self.allocator.free(key); } self.symbols.deinit(self.allocator); @@ -169,23 +172,33 @@ pub fn parseSymbols(self: *Dylib) !void { if (!(Symbol.isSect(sym) and Symbol.isExt(sym))) continue; const name = try self.allocator.dupe(u8, sym_name); - const proxy = try self.allocator.create(Symbol.Proxy); - errdefer self.allocator.destroy(proxy); - - proxy.* = .{ - .base = .{ - .@"type" = .proxy, - .name = name, - }, - .dylib = self, - }; - - try self.symbols.putNoClobber(self.allocator, name, &proxy.base); + try self.symbols.putNoClobber(self.allocator, name, {}); } } pub fn isDylib(file: fs.File) !bool { - const header = try file.reader().readStruct(macho.mach_header_64); + const header = file.reader().readStruct(macho.mach_header_64) catch |err| switch (err) { + error.EndOfStream => return false, + else => |e| return e, + }; try file.seekTo(0); return header.filetype == macho.MH_DYLIB; } + +pub fn createProxy(self: *Dylib, sym_name: []const u8) !?*Symbol { + if (!self.symbols.contains(sym_name)) return null; + + const name = try self.allocator.dupe(u8, sym_name); + const proxy = try self.allocator.create(Symbol.Proxy); + errdefer self.allocator.destroy(proxy); + + proxy.* = .{ + .base = .{ + .@"type" = .proxy, + .name = name, + }, + .file = .{ .dylib = self }, + }; + + return &proxy.base; +} diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index f9c33bfa5c..64db2fe091 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -535,7 +535,10 @@ pub fn parseDataInCode(self: *Object) !void { } pub fn isObject(file: fs.File) !bool { - const header = try file.reader().readStruct(macho.mach_header_64); + const header = file.reader().readStruct(macho.mach_header_64) catch |err| switch (err) { + error.EndOfStream => return false, + else => |e| return e, + }; try file.seekTo(0); return header.filetype == macho.MH_OBJECT; } diff --git a/src/link/MachO/Stub.zig b/src/link/MachO/Stub.zig new file mode 100644 index 0000000000..3e1474539d --- /dev/null +++ b/src/link/MachO/Stub.zig @@ -0,0 +1,130 @@ +const Stub = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const fs = std.fs; +const log = std.log.scoped(.stub); +const macho = std.macho; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Symbol = @import("Symbol.zig"); +pub const LibStub = @import("../tapi.zig").LibStub; + +allocator: *Allocator, +arch: ?std.Target.Cpu.Arch = null, +lib_stub: ?LibStub = null, +name: ?[]const u8 = null, + +ordinal: ?u16 = null, + +id: ?Id = null, + +/// Parsed symbol table represented as hash map of symbols' +/// names. We can and should defer creating *Symbols until +/// a symbol is referenced by an object file. +symbols: std.StringArrayHashMapUnmanaged(void) = .{}, + +pub const Id = struct { + name: []const u8, + timestamp: u32, + current_version: u32, + compatibility_version: u32, + + pub fn deinit(id: *Id, allocator: *Allocator) void { + allocator.free(id.name); + } +}; + +pub fn init(allocator: *Allocator) Stub { + return .{ .allocator = allocator }; +} + +pub fn deinit(self: *Stub) void { + self.symbols.deinit(self.allocator); + + if (self.lib_stub) |*lib_stub| { + lib_stub.deinit(); + } + + if (self.name) |name| { + self.allocator.free(name); + } + + if (self.id) |*id| { + id.deinit(self.allocator); + } +} + +pub fn parse(self: *Stub) !void { + const lib_stub = self.lib_stub orelse return error.EmptyStubFile; + if (lib_stub.inner.len == 0) return error.EmptyStubFile; + + log.debug("parsing shared library from stub '{s}'", .{self.name.?}); + + const umbrella_lib = lib_stub.inner[0]; + self.id = .{ + .name = try self.allocator.dupe(u8, umbrella_lib.install_name), + // TODO parse from the stub + .timestamp = 2, + .current_version = 0, + .compatibility_version = 0, + }; + + const target_string: []const u8 = switch (self.arch.?) { + .aarch64 => "arm64-macos", + .x86_64 => "x86_64-macos", + else => unreachable, + }; + + for (lib_stub.inner) |stub| { + if (!hasTarget(stub.targets, target_string)) continue; + + if (stub.exports) |exports| { + for (exports) |exp| { + if (!hasTarget(exp.targets, target_string)) continue; + + for (exp.symbols) |sym_name| { + if (self.symbols.contains(sym_name)) continue; + try self.symbols.putNoClobber(self.allocator, sym_name, {}); + } + } + } + + if (stub.reexports) |reexports| { + for (reexports) |reexp| { + if (!hasTarget(reexp.targets, target_string)) continue; + + for (reexp.symbols) |sym_name| { + if (self.symbols.contains(sym_name)) continue; + try self.symbols.putNoClobber(self.allocator, sym_name, {}); + } + } + } + } +} + +fn hasTarget(targets: []const []const u8, target: []const u8) bool { + for (targets) |t| { + if (mem.eql(u8, t, target)) return true; + } + return false; +} + +pub fn createProxy(self: *Stub, sym_name: []const u8) !?*Symbol { + if (!self.symbols.contains(sym_name)) return null; + + const name = try self.allocator.dupe(u8, sym_name); + const proxy = try self.allocator.create(Symbol.Proxy); + errdefer self.allocator.destroy(proxy); + + proxy.* = .{ + .base = .{ + .@"type" = .proxy, + .name = name, + }, + .file = .{ .stub = self }, + }; + + return &proxy.base; +} diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index bb97acdf9f..2286b1ea93 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -7,6 +7,7 @@ const mem = std.mem; const Allocator = mem.Allocator; const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); +const Stub = @import("Stub.zig"); pub const Type = enum { regular, @@ -84,10 +85,22 @@ pub const Regular = struct { pub const Proxy = struct { base: Symbol, - /// Dylib where to locate this symbol. - dylib: ?*Dylib = null, + /// Dylib or stub where to locate this symbol. + /// null means self-reference. + file: ?union(enum) { + dylib: *Dylib, + stub: *Stub, + } = null, pub const base_type: Symbol.Type = .proxy; + + pub fn dylibOrdinal(proxy: *Proxy) u16 { + const file = proxy.file orelse return 0; + return switch (file) { + .dylib => |dylib| dylib.ordinal.?, + .stub => |stub| stub.ordinal.?, + }; + } }; pub const Unresolved = struct { diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 9d9a1315bb..0a9d209c61 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -17,6 +17,7 @@ const Archive = @import("Archive.zig"); const CodeSignature = @import("CodeSignature.zig"); const Dylib = @import("Dylib.zig"); const Object = @import("Object.zig"); +const Stub = @import("Stub.zig"); const Symbol = @import("Symbol.zig"); const Trie = @import("Trie.zig"); @@ -37,6 +38,10 @@ stack_size: u64 = 0, objects: std.ArrayListUnmanaged(*Object) = .{}, archives: std.ArrayListUnmanaged(*Archive) = .{}, dylibs: std.ArrayListUnmanaged(*Dylib) = .{}, +lib_stubs: std.ArrayListUnmanaged(*Stub) = .{}, + +libsystem_stub_index: ?u16 = null, +next_dylib_ordinal: u16 = 1, load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, @@ -49,7 +54,6 @@ dyld_info_cmd_index: ?u16 = null, symtab_cmd_index: ?u16 = null, dysymtab_cmd_index: ?u16 = null, dylinker_cmd_index: ?u16 = null, -libsystem_cmd_index: ?u16 = null, data_in_code_cmd_index: ?u16 = null, function_starts_cmd_index: ?u16 = null, main_cmd_index: ?u16 = null, @@ -153,9 +157,20 @@ pub fn deinit(self: *Zld) void { } self.dylibs.deinit(self.allocator); + for (self.lib_stubs.items) |stub| { + stub.deinit(); + self.allocator.destroy(stub); + } + self.lib_stubs.deinit(self.allocator); + + for (self.imports.values()) |proxy| { + proxy.deinit(self.allocator); + self.allocator.destroy(proxy); + } + self.imports.deinit(self.allocator); + self.tentatives.deinit(self.allocator); self.globals.deinit(self.allocator); - self.imports.deinit(self.allocator); self.unresolved.deinit(self.allocator); self.strtab.deinit(self.allocator); @@ -181,6 +196,7 @@ pub fn closeFiles(self: Zld) void { const LinkArgs = struct { libs: []const []const u8, rpaths: []const []const u8, + libc_stub_path: []const u8, }; pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8, args: LinkArgs) !void { @@ -222,6 +238,7 @@ pub fn link(self: *Zld, files: []const []const u8, out_path: []const u8, args: L try self.addRpaths(args.rpaths); try self.parseInputFiles(files); try self.parseLibs(args.libs); + try self.parseLibSystem(args.libc_stub_path); try self.resolveSymbols(); try self.resolveStubsAndGotEntries(); try self.updateMetadata(); @@ -241,14 +258,18 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void { object, archive, dylib, + stub, + }, + origin: union { + file: fs.File, + stub: Stub.LibStub, }, - file: fs.File, name: []const u8, }; var classified = std.ArrayList(Input).init(self.allocator); defer classified.deinit(); - // First, classify input files: object, archive or dylib. + // First, classify input files: object, archive, dylib or stub (tbd). for (files) |file_name| { const file = try fs.cwd().openFile(file_name, .{}); const full_path = full_path: { @@ -261,7 +282,7 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void { if (!(try Object.isObject(file))) break :try_object; try classified.append(.{ .kind = .object, - .file = file, + .origin = .{ .file = file }, .name = full_path, }); continue; @@ -271,7 +292,7 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void { if (!(try Archive.isArchive(file))) break :try_archive; try classified.append(.{ .kind = .archive, - .file = file, + .origin = .{ .file = file }, .name = full_path, }); continue; @@ -281,9 +302,22 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void { if (!(try Dylib.isDylib(file))) break :try_dylib; try classified.append(.{ .kind = .dylib, - .file = file, + .origin = .{ .file = file }, + .name = full_path, + }); + continue; + } + + try_stub: { + var lib_stub = Stub.LibStub.loadFromFile(self.allocator, file) catch { + break :try_stub; + }; + try classified.append(.{ + .kind = .stub, + .origin = .{ .stub = lib_stub }, .name = full_path, }); + file.close(); continue; } @@ -301,7 +335,8 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void { object.* = Object.init(self.allocator); object.arch = self.arch.?; object.name = input.name; - object.file = input.file; + object.file = input.origin.file; + try object.parse(); try self.objects.append(self.allocator, object); }, @@ -312,7 +347,8 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void { archive.* = Archive.init(self.allocator); archive.arch = self.arch.?; archive.name = input.name; - archive.file = input.file; + archive.file = input.origin.file; + try archive.parse(); try self.archives.append(self.allocator, archive); }, @@ -323,27 +359,22 @@ fn parseInputFiles(self: *Zld, files: []const []const u8) !void { dylib.* = Dylib.init(self.allocator); dylib.arch = self.arch.?; dylib.name = input.name; - dylib.file = input.file; + dylib.file = input.origin.file; - const ordinal = @intCast(u16, self.dylibs.items.len); - dylib.ordinal = ordinal + 2; // TODO +2 since 1 is reserved for libSystem - - // TODO Defer parsing of the dylibs until they are actually needed try dylib.parse(); try self.dylibs.append(self.allocator, dylib); + }, + .stub => { + const stub = try self.allocator.create(Stub); + errdefer self.allocator.destroy(stub); - // Add LC_LOAD_DYLIB command - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try createLoadDylibCommand( - self.allocator, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.allocator); - - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); + stub.* = Stub.init(self.allocator); + stub.arch = self.arch.?; + stub.name = input.name; + stub.lib_stub = input.origin.stub; + + try stub.parse(); + try self.lib_stubs.append(self.allocator, stub); }, } } @@ -362,42 +393,79 @@ fn parseLibs(self: *Zld, libs: []const []const u8) !void { dylib.name = try self.allocator.dupe(u8, lib); dylib.file = file; - const ordinal = @intCast(u16, self.dylibs.items.len); - dylib.ordinal = ordinal + 2; // TODO +2 since 1 is reserved for libSystem - - // TODO Defer parsing of the dylibs until they are actually needed try dylib.parse(); try self.dylibs.append(self.allocator, dylib); - - // Add LC_LOAD_DYLIB command - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try createLoadDylibCommand( - self.allocator, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.allocator); - - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - } else if (try Archive.isArchive(file)) { - const archive = try self.allocator.create(Archive); - errdefer self.allocator.destroy(archive); - - archive.* = Archive.init(self.allocator); - archive.arch = self.arch.?; - archive.name = try self.allocator.dupe(u8, lib); - archive.file = file; - try archive.parse(); - try self.archives.append(self.allocator, archive); } else { - file.close(); - log.warn("unknown filetype for a library: '{s}'", .{lib}); + // Try tbd stub file next. + if (Stub.LibStub.loadFromFile(self.allocator, file)) |lib_stub| { + const stub = try self.allocator.create(Stub); + errdefer self.allocator.destroy(stub); + + stub.* = Stub.init(self.allocator); + stub.arch = self.arch.?; + stub.name = try self.allocator.dupe(u8, lib); + stub.lib_stub = lib_stub; + + try stub.parse(); + try self.lib_stubs.append(self.allocator, stub); + } else |_| { + // TODO this entire logic has to be cleaned up. + try file.seekTo(0); + + if (try Archive.isArchive(file)) { + const archive = try self.allocator.create(Archive); + errdefer self.allocator.destroy(archive); + + archive.* = Archive.init(self.allocator); + archive.arch = self.arch.?; + archive.name = try self.allocator.dupe(u8, lib); + archive.file = file; + + try archive.parse(); + try self.archives.append(self.allocator, archive); + } else { + file.close(); + log.warn("unknown filetype for a library: '{s}'", .{lib}); + } + } } } } +fn parseLibSystem(self: *Zld, libc_stub_path: []const u8) !void { + const file = try fs.cwd().openFile(libc_stub_path, .{}); + defer file.close(); + + var lib_stub = try Stub.LibStub.loadFromFile(self.allocator, file); + + const stub = try self.allocator.create(Stub); + errdefer self.allocator.destroy(stub); + + stub.* = Stub.init(self.allocator); + stub.arch = self.arch.?; + stub.name = try self.allocator.dupe(u8, libc_stub_path); + stub.lib_stub = lib_stub; + + try stub.parse(); + + self.libsystem_stub_index = @intCast(u16, self.lib_stubs.items.len); + try self.lib_stubs.append(self.allocator, stub); + + // Add LC_LOAD_DYLIB load command. + stub.ordinal = self.next_dylib_ordinal; + const dylib_id = stub.id orelse unreachable; + var dylib_cmd = try createLoadDylibCommand( + self.allocator, + dylib_id.name, + dylib_id.timestamp, + dylib_id.current_version, + dylib_id.compatibility_version, + ); + errdefer dylib_cmd.deinit(self.allocator); + try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); + self.next_dylib_ordinal += 1; +} + fn mapAndUpdateSections( self: *Zld, object: *Object, @@ -1814,9 +1882,8 @@ fn resolveSymbols(self: *Zld) !void { next_sym += 1; } } + // Third pass, resolve symbols in dynamic libraries. - // TODO Implement libSystem as a hard-coded library, or ship with - // a libSystem.B.tbd definition file? var unresolved = std.ArrayList(*Symbol).init(self.allocator); defer unresolved.deinit(); @@ -1824,61 +1891,109 @@ fn resolveSymbols(self: *Zld) !void { for (self.unresolved.values()) |value| { unresolved.appendAssumeCapacity(value); } - self.unresolved.clearAndFree(self.allocator); - - var has_undefined = false; - while (unresolved.popOrNull()) |undef| { - var found = false; - for (self.dylibs.items) |dylib| { - const proxy = dylib.symbols.get(undef.name) orelse continue; - try self.imports.putNoClobber(self.allocator, proxy.name, proxy); - undef.alias = proxy; - found = true; - } - - if (!found) { - // TODO we currently hardcode all unresolved symbols to libSystem - const proxy = try self.allocator.create(Symbol.Proxy); - errdefer self.allocator.destroy(proxy); + self.unresolved.clearRetainingCapacity(); + + var referenced = std.AutoHashMap(union(enum) { + dylib: *Dylib, + stub: *Stub, + }, void).init(self.allocator); + defer referenced.deinit(); + + loop: while (unresolved.popOrNull()) |undef| { + const proxy = self.imports.get(undef.name) orelse outer: { + const proxy = inner: { + for (self.dylibs.items) |dylib| { + const proxy = (try dylib.createProxy(undef.name)) orelse continue; + try referenced.put(.{ .dylib = dylib }, {}); + break :inner proxy; + } + for (self.lib_stubs.items) |stub, i| { + const proxy = (try stub.createProxy(undef.name)) orelse continue; + if (self.libsystem_stub_index.? != @intCast(u16, i)) { + // LibSystem gets its load command separately. + try referenced.put(.{ .stub = stub }, {}); + } + break :inner proxy; + } + if (mem.eql(u8, undef.name, "___dso_handle")) { + // TODO this is just a temp patch until I work out what to actually + // do with ___dso_handle and __mh_execute_header symbols which are + // synthetically created by the linker on macOS. + const name = try self.allocator.dupe(u8, undef.name); + const proxy = try self.allocator.create(Symbol.Proxy); + errdefer self.allocator.destroy(proxy); + proxy.* = .{ + .base = .{ + .@"type" = .proxy, + .name = name, + }, + .file = null, + }; + break :inner &proxy.base; + } - proxy.* = .{ - .base = .{ - .@"type" = .proxy, - .name = try self.allocator.dupe(u8, undef.name), - }, - .dylib = null, // TODO null means libSystem + self.unresolved.putAssumeCapacityNoClobber(undef.name, undef); + continue :loop; }; - try self.imports.putNoClobber(self.allocator, proxy.base.name, &proxy.base); - undef.alias = &proxy.base; + try self.imports.putNoClobber(self.allocator, proxy.name, proxy); + break :outer proxy; + }; + undef.alias = proxy; + } + + // Add LC_LOAD_DYLIB load command for each referenced dylib/stub. + var it = referenced.iterator(); + while (it.next()) |key| { + var dylib_cmd = blk: { + switch (key.key_ptr.*) { + .dylib => |dylib| { + dylib.ordinal = self.next_dylib_ordinal; + const dylib_id = dylib.id orelse unreachable; + break :blk try createLoadDylibCommand( + self.allocator, + dylib_id.name, + dylib_id.timestamp, + dylib_id.current_version, + dylib_id.compatibility_version, + ); + }, + .stub => |stub| { + stub.ordinal = self.next_dylib_ordinal; + const dylib_id = stub.id orelse unreachable; + break :blk try createLoadDylibCommand( + self.allocator, + dylib_id.name, + dylib_id.timestamp, + dylib_id.current_version, + dylib_id.compatibility_version, + ); + }, + } + }; + errdefer dylib_cmd.deinit(self.allocator); + try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); + self.next_dylib_ordinal += 1; + } - // log.err("undefined reference to symbol '{s}'", .{undef.name}); - // log.err(" | referenced in {s}", .{ - // undef.cast(Symbol.Unresolved).?.file.name.?, - // }); - // has_undefined = true; + if (self.unresolved.count() > 0) { + for (self.unresolved.values()) |undef| { + log.err("undefined reference to symbol '{s}'", .{undef.name}); + log.err(" | referenced in {s}", .{ + undef.cast(Symbol.Unresolved).?.file.name.?, + }); } - } - if (has_undefined) return error.UndefinedSymbolReference; + return error.UndefinedSymbolReference; + } // Finally put dyld_stub_binder as an Import - const dyld_stub_binder = try self.allocator.create(Symbol.Proxy); - errdefer self.allocator.destroy(dyld_stub_binder); - - dyld_stub_binder.* = .{ - .base = .{ - .@"type" = .proxy, - .name = try self.allocator.dupe(u8, "dyld_stub_binder"), - }, - .dylib = null, // TODO null means libSystem + const libsystem_stub = self.lib_stubs.items[self.libsystem_stub_index.?]; + const proxy = (try libsystem_stub.createProxy("dyld_stub_binder")) orelse { + log.err("undefined reference to symbol 'dyld_stub_binder'", .{}); + return error.UndefinedSymbolReference; }; - - try self.imports.putNoClobber( - self.allocator, - dyld_stub_binder.base.name, - &dyld_stub_binder.base, - ); + try self.imports.putNoClobber(self.allocator, proxy.name, proxy); } fn resolveStubsAndGotEntries(self: *Zld) !void { @@ -2437,15 +2552,6 @@ fn populateMetadata(self: *Zld) !void { try self.load_commands.append(self.allocator, .{ .Dylinker = dylinker_cmd }); } - if (self.libsystem_cmd_index == null) { - self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len); - - var dylib_cmd = try createLoadDylibCommand(self.allocator, mem.spanZ(LIB_SYSTEM_PATH), 2, 0, 0); - errdefer dylib_cmd.deinit(self.allocator); - - try self.load_commands.append(self.allocator, .{ .Dylib = dylib_cmd }); - } - if (self.main_cmd_index == null) { self.main_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(self.allocator, .{ @@ -2746,14 +2852,10 @@ fn writeBindInfoTable(self: *Zld) !void { for (self.got_entries.items) |sym| { if (sym.cast(Symbol.Proxy)) |proxy| { - const dylib_ordinal = ordinal: { - const dylib = proxy.dylib orelse break :ordinal 1; // TODO embedded libSystem - break :ordinal dylib.ordinal.?; - }; try pointers.append(.{ .offset = base_offset + proxy.base.got_index.? * @sizeOf(u64), .segment_id = segment_id, - .dylib_ordinal = dylib_ordinal, + .dylib_ordinal = proxy.dylibOrdinal(), .name = proxy.base.name, }); } @@ -2768,15 +2870,11 @@ fn writeBindInfoTable(self: *Zld) !void { const sym = self.imports.get("__tlv_bootstrap") orelse unreachable; const proxy = sym.cast(Symbol.Proxy) orelse unreachable; - const dylib_ordinal = ordinal: { - const dylib = proxy.dylib orelse break :ordinal 1; // TODO embedded libSystem - break :ordinal dylib.ordinal.?; - }; try pointers.append(.{ .offset = base_offset, .segment_id = segment_id, - .dylib_ordinal = dylib_ordinal, + .dylib_ordinal = proxy.dylibOrdinal(), .name = proxy.base.name, }); } @@ -2813,15 +2911,10 @@ fn writeLazyBindInfoTable(self: *Zld) !void { for (self.stubs.items) |sym| { const proxy = sym.cast(Symbol.Proxy) orelse unreachable; - const dylib_ordinal = ordinal: { - const dylib = proxy.dylib orelse break :ordinal 1; // TODO embedded libSystem - break :ordinal dylib.ordinal.?; - }; - pointers.appendAssumeCapacity(.{ .offset = base_offset + sym.stubs_index.? * @sizeOf(u64), .segment_id = segment_id, - .dylib_ordinal = dylib_ordinal, + .dylib_ordinal = proxy.dylibOrdinal(), .name = sym.name, }); } @@ -3128,15 +3221,12 @@ fn writeSymbolTable(self: *Zld) !void { defer undefs.deinit(); for (self.imports.values()) |sym| { - const ordinal = ordinal: { - const dylib = sym.cast(Symbol.Proxy).?.dylib orelse break :ordinal 1; // TODO handle libSystem - break :ordinal dylib.ordinal.?; - }; + const proxy = sym.cast(Symbol.Proxy) orelse unreachable; try undefs.append(.{ .n_strx = try self.makeString(sym.name), .n_type = macho.N_UNDF | macho.N_EXT, .n_sect = 0, - .n_desc = (ordinal * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, + .n_desc = (proxy.dylibOrdinal() * macho.N_SYMBOL_RESOLVER) | macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY, .n_value = 0, }); } diff --git a/src/link/tapi.zig b/src/link/tapi.zig new file mode 100644 index 0000000000..efa7227def --- /dev/null +++ b/src/link/tapi.zig @@ -0,0 +1,79 @@ +const std = @import("std"); +const fs = std.fs; +const mem = std.mem; +const log = std.log.scoped(.tapi); + +const Allocator = mem.Allocator; +const Yaml = @import("tapi/yaml.zig").Yaml; + +pub const LibStub = struct { + /// Underlying memory for stub's contents. + yaml: Yaml, + + /// Typed contents of the tbd file. + inner: []Tbd, + + const Tbd = struct { + tbd_version: u3, + targets: []const []const u8, + uuids: []const struct { + target: []const u8, + value: []const u8, + }, + install_name: []const u8, + current_version: ?union(enum) { + string: []const u8, + float: f64, + int: u64, + }, + reexported_libraries: ?[]const struct { + targets: []const []const u8, + libraries: []const []const u8, + }, + parent_umbrella: ?[]const struct { + targets: []const []const u8, + umbrella: []const u8, + }, + exports: ?[]const struct { + targets: []const []const u8, + symbols: []const []const u8, + }, + reexports: ?[]const struct { + targets: []const []const u8, + symbols: []const []const u8, + }, + allowable_clients: ?[]const struct { + targets: []const []const u8, + clients: []const []const u8, + }, + objc_classes: ?[]const []const u8, + }; + + pub fn loadFromFile(allocator: *Allocator, file: fs.File) !LibStub { + const source = try file.readToEndAlloc(allocator, std.math.maxInt(u32)); + defer allocator.free(source); + + var lib_stub = LibStub{ + .yaml = try Yaml.load(allocator, source), + .inner = undefined, + }; + + lib_stub.inner = lib_stub.yaml.parse([]Tbd) catch |err| blk: { + switch (err) { + error.TypeMismatch => { + // TODO clean this up. + var out = try lib_stub.yaml.arena.allocator.alloc(Tbd, 1); + out[0] = try lib_stub.yaml.parse(Tbd); + break :blk out; + }, + else => |e| return e, + } + }; + + return lib_stub; + } + + pub fn deinit(self: *LibStub) void { + self.yaml.deinit(); + } +}; diff --git a/src/link/tapi/Tokenizer.zig b/src/link/tapi/Tokenizer.zig new file mode 100644 index 0000000000..37fcedbfce --- /dev/null +++ b/src/link/tapi/Tokenizer.zig @@ -0,0 +1,439 @@ +const Tokenizer = @This(); + +const std = @import("std"); +const log = std.log.scoped(.tapi); +const testing = std.testing; + +buffer: []const u8, +index: usize = 0, + +pub const Token = struct { + id: Id, + start: usize, + end: usize, + // Count of spaces/tabs. + // Only active for .Space and .Tab tokens. + count: ?usize = null, + + pub const Id = enum { + Eof, + + NewLine, + DocStart, // --- + DocEnd, // ... + SeqItemInd, // - + MapValueInd, // : + FlowMapStart, // { + FlowMapEnd, // } + FlowSeqStart, // [ + FlowSeqEnd, // ] + + Comma, + Space, + Tab, + Comment, // # + Alias, // * + Anchor, // & + Tag, // ! + SingleQuote, // ' + DoubleQuote, // " + + Literal, + }; +}; + +pub const TokenIndex = usize; + +pub const TokenIterator = struct { + buffer: []const Token, + pos: TokenIndex = 0, + + pub fn next(self: *TokenIterator) Token { + const token = self.buffer[self.pos]; + self.pos += 1; + return token; + } + + pub fn peek(self: TokenIterator) ?Token { + if (self.pos >= self.buffer.len) return null; + return self.buffer[self.pos]; + } + + pub fn reset(self: *TokenIterator) void { + self.pos = 0; + } + + pub fn seekTo(self: *TokenIterator, pos: TokenIndex) void { + self.pos = pos; + } + + pub fn seekBy(self: *TokenIterator, offset: isize) void { + const new_pos = @bitCast(isize, self.pos) + offset; + if (new_pos < 0) { + self.pos = 0; + } else { + self.pos = @intCast(usize, new_pos); + } + } +}; + +pub fn next(self: *Tokenizer) Token { + var result = Token{ + .id = .Eof, + .start = self.index, + .end = undefined, + }; + + var state: union(enum) { + Start, + NewLine, + Space: usize, + Tab: usize, + Hyphen: usize, + Dot: usize, + Literal, + } = .Start; + + while (self.index < self.buffer.len) : (self.index += 1) { + const c = self.buffer[self.index]; + switch (state) { + .Start => switch (c) { + ' ' => { + state = .{ .Space = 1 }; + }, + '\t' => { + state = .{ .Tab = 1 }; + }, + '\n' => { + result.id = .NewLine; + self.index += 1; + break; + }, + '\r' => { + state = .NewLine; + }, + '-' => { + state = .{ .Hyphen = 1 }; + }, + '.' => { + state = .{ .Dot = 1 }; + }, + ',' => { + result.id = .Comma; + self.index += 1; + break; + }, + '#' => { + result.id = .Comment; + self.index += 1; + break; + }, + '*' => { + result.id = .Alias; + self.index += 1; + break; + }, + '&' => { + result.id = .Anchor; + self.index += 1; + break; + }, + '!' => { + result.id = .Tag; + self.index += 1; + break; + }, + '\'' => { + result.id = .SingleQuote; + self.index += 1; + break; + }, + '"' => { + result.id = .DoubleQuote; + self.index += 1; + break; + }, + '[' => { + result.id = .FlowSeqStart; + self.index += 1; + break; + }, + ']' => { + result.id = .FlowSeqEnd; + self.index += 1; + break; + }, + ':' => { + result.id = .MapValueInd; + self.index += 1; + break; + }, + '{' => { + result.id = .FlowMapStart; + self.index += 1; + break; + }, + '}' => { + result.id = .FlowMapEnd; + self.index += 1; + break; + }, + else => { + state = .Literal; + }, + }, + .Space => |*count| switch (c) { + ' ' => { + count.* += 1; + }, + else => { + result.id = .Space; + result.count = count.*; + break; + }, + }, + .Tab => |*count| switch (c) { + ' ' => { + count.* += 1; + }, + else => { + result.id = .Tab; + result.count = count.*; + break; + }, + }, + .NewLine => switch (c) { + '\n' => { + result.id = .NewLine; + self.index += 1; + break; + }, + else => {}, // TODO this should be an error condition + }, + .Hyphen => |*count| switch (c) { + ' ' => { + result.id = .SeqItemInd; + self.index += 1; + break; + }, + '-' => { + count.* += 1; + + if (count.* == 3) { + result.id = .DocStart; + self.index += 1; + break; + } + }, + else => { + state = .Literal; + }, + }, + .Dot => |*count| switch (c) { + '.' => { + count.* += 1; + + if (count.* == 3) { + result.id = .DocEnd; + self.index += 1; + break; + } + }, + else => { + state = .Literal; + }, + }, + .Literal => switch (c) { + '\r', '\n', ' ', '\'', '"', ',', ':', ']', '}' => { + result.id = .Literal; + break; + }, + else => { + result.id = .Literal; + }, + }, + } + } + + if (state == .Literal and result.id == .Eof) { + result.id = .Literal; + } + + result.end = self.index; + + log.debug("{any}", .{result}); + log.debug(" | {s}", .{self.buffer[result.start..result.end]}); + + return result; +} + +fn testExpected(source: []const u8, expected: []const Token.Id) !void { + var tokenizer = Tokenizer{ + .buffer = source, + }; + + for (expected) |exp| { + const token = tokenizer.next(); + try testing.expectEqual(exp, token.id); + } +} + +test "empty doc" { + try testExpected("", &[_]Token.Id{.Eof}); +} + +test "empty doc with explicit markers" { + try testExpected( + \\--- + \\... + , &[_]Token.Id{ + .DocStart, .NewLine, .DocEnd, .Eof, + }); +} + +test "sequence of values" { + try testExpected( + \\- 0 + \\- 1 + \\- 2 + , &[_]Token.Id{ + .SeqItemInd, + .Literal, + .NewLine, + .SeqItemInd, + .Literal, + .NewLine, + .SeqItemInd, + .Literal, + .Eof, + }); +} + +test "sequence of sequences" { + try testExpected( + \\- [ val1, val2] + \\- [val3, val4 ] + , &[_]Token.Id{ + .SeqItemInd, + .FlowSeqStart, + .Space, + .Literal, + .Comma, + .Space, + .Literal, + .FlowSeqEnd, + .NewLine, + .SeqItemInd, + .FlowSeqStart, + .Literal, + .Comma, + .Space, + .Literal, + .Space, + .FlowSeqEnd, + .Eof, + }); +} + +test "mappings" { + try testExpected( + \\key1: value1 + \\key2: value2 + , &[_]Token.Id{ + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .Literal, + .Eof, + }); +} + +test "inline mapped sequence of values" { + try testExpected( + \\key : [ val1, + \\ val2 ] + , &[_]Token.Id{ + .Literal, + .Space, + .MapValueInd, + .Space, + .FlowSeqStart, + .Space, + .Literal, + .Comma, + .Space, + .NewLine, + .Space, + .Literal, + .Space, + .FlowSeqEnd, + .Eof, + }); +} + +test "part of tdb" { + try testExpected( + \\--- !tapi-tbd + \\tbd-version: 4 + \\targets: [ x86_64-macos ] + \\ + \\uuids: + \\ - target: x86_64-macos + \\ value: F86CC732-D5E4-30B5-AA7D-167DF5EC2708 + \\ + \\install-name: '/usr/lib/libSystem.B.dylib' + \\... + , &[_]Token.Id{ + .DocStart, + .Space, + .Tag, + .Literal, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .FlowSeqStart, + .Space, + .Literal, + .Space, + .FlowSeqEnd, + .NewLine, + .NewLine, + .Literal, + .MapValueInd, + .NewLine, + .Space, + .SeqItemInd, + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .Space, + .Literal, + .MapValueInd, + .Space, + .Literal, + .NewLine, + .NewLine, + .Literal, + .MapValueInd, + .Space, + .SingleQuote, + .Literal, + .SingleQuote, + .NewLine, + .DocEnd, + .Eof, + }); +} diff --git a/src/link/tapi/parse.zig b/src/link/tapi/parse.zig new file mode 100644 index 0000000000..1e40ac63dc --- /dev/null +++ b/src/link/tapi/parse.zig @@ -0,0 +1,713 @@ +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.tapi); +const mem = std.mem; +const testing = std.testing; + +const Allocator = mem.Allocator; +const Tokenizer = @import("Tokenizer.zig"); +const Token = Tokenizer.Token; +const TokenIndex = Tokenizer.TokenIndex; +const TokenIterator = Tokenizer.TokenIterator; + +pub const ParseError = error{ + MalformedYaml, + NestedDocuments, + UnexpectedTag, + UnexpectedEof, + UnexpectedToken, + Unhandled, +} || Allocator.Error; + +pub const Node = struct { + tag: Tag, + tree: *const Tree, + + pub const Tag = enum { + doc, + map, + list, + value, + }; + + pub fn cast(self: *const Node, comptime T: type) ?*const T { + if (self.tag != T.base_tag) { + return null; + } + return @fieldParentPtr(T, "base", self); + } + + pub fn deinit(self: *Node, allocator: *Allocator) void { + switch (self.tag) { + .doc => @fieldParentPtr(Node.Doc, "base", self).deinit(allocator), + .map => @fieldParentPtr(Node.Map, "base", self).deinit(allocator), + .list => @fieldParentPtr(Node.List, "base", self).deinit(allocator), + .value => @fieldParentPtr(Node.Value, "base", self).deinit(allocator), + } + } + + pub fn format( + self: *const Node, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + return switch (self.tag) { + .doc => @fieldParentPtr(Node.Doc, "base", self).format(fmt, options, writer), + .map => @fieldParentPtr(Node.Map, "base", self).format(fmt, options, writer), + .list => @fieldParentPtr(Node.List, "base", self).format(fmt, options, writer), + .value => @fieldParentPtr(Node.Value, "base", self).format(fmt, options, writer), + }; + } + + pub const Doc = struct { + base: Node = Node{ .tag = Tag.doc, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + directive: ?TokenIndex = null, + value: ?*Node = null, + + pub const base_tag: Node.Tag = .doc; + + pub fn deinit(self: *Doc, allocator: *Allocator) void { + if (self.value) |node| { + node.deinit(allocator); + allocator.destroy(node); + } + } + + pub fn format( + self: *const Doc, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + if (self.directive) |id| { + try std.fmt.format(writer, "{{ ", .{}); + const directive = self.base.tree.tokens[id]; + try std.fmt.format(writer, ".directive = {s}, ", .{ + self.base.tree.source[directive.start..directive.end], + }); + } + if (self.value) |node| { + try std.fmt.format(writer, "{}", .{node}); + } + if (self.directive != null) { + try std.fmt.format(writer, " }}", .{}); + } + } + }; + + pub const Map = struct { + base: Node = Node{ .tag = Tag.map, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + values: std.ArrayListUnmanaged(Entry) = .{}, + + pub const base_tag: Node.Tag = .map; + + pub const Entry = struct { + key: TokenIndex, + value: *Node, + }; + + pub fn deinit(self: *Map, allocator: *Allocator) void { + for (self.values.items) |entry| { + entry.value.deinit(allocator); + allocator.destroy(entry.value); + } + self.values.deinit(allocator); + } + + pub fn format( + self: *const Map, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "{{ ", .{}); + for (self.values.items) |entry| { + const key = self.base.tree.tokens[entry.key]; + try std.fmt.format(writer, "{s} => {}, ", .{ + self.base.tree.source[key.start..key.end], + entry.value, + }); + } + return std.fmt.format(writer, " }}", .{}); + } + }; + + pub const List = struct { + base: Node = Node{ .tag = Tag.list, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + values: std.ArrayListUnmanaged(*Node) = .{}, + + pub const base_tag: Node.Tag = .list; + + pub fn deinit(self: *List, allocator: *Allocator) void { + for (self.values.items) |node| { + node.deinit(allocator); + allocator.destroy(node); + } + self.values.deinit(allocator); + } + + pub fn format( + self: *const List, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "[ ", .{}); + for (self.values.items) |node| { + try std.fmt.format(writer, "{}, ", .{node}); + } + return std.fmt.format(writer, " ]", .{}); + } + }; + + pub const Value = struct { + base: Node = Node{ .tag = Tag.value, .tree = undefined }, + start: ?TokenIndex = null, + end: ?TokenIndex = null, + + pub const base_tag: Node.Tag = .value; + + pub fn deinit(self: *Value, allocator: *Allocator) void { + _ = self; + _ = allocator; + } + + pub fn format( + self: *const Value, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + const start = self.base.tree.tokens[self.start.?]; + const end = self.base.tree.tokens[self.end.?]; + return std.fmt.format(writer, "{s}", .{ + self.base.tree.source[start.start..end.end], + }); + } + }; +}; + +pub const Tree = struct { + allocator: *Allocator, + source: []const u8, + tokens: []Token, + docs: std.ArrayListUnmanaged(*Node) = .{}, + + pub fn init(allocator: *Allocator) Tree { + return .{ + .allocator = allocator, + .source = undefined, + .tokens = undefined, + }; + } + + pub fn deinit(self: *Tree) void { + self.allocator.free(self.tokens); + for (self.docs.items) |doc| { + doc.deinit(self.allocator); + self.allocator.destroy(doc); + } + self.docs.deinit(self.allocator); + } + + pub fn parse(self: *Tree, source: []const u8) !void { + var tokenizer = Tokenizer{ .buffer = source }; + var tokens = std.ArrayList(Token).init(self.allocator); + errdefer tokens.deinit(); + + while (true) { + const token = tokenizer.next(); + try tokens.append(token); + if (token.id == .Eof) break; + } + + self.source = source; + self.tokens = tokens.toOwnedSlice(); + + var it = TokenIterator{ .buffer = self.tokens }; + var parser = Parser{ + .allocator = self.allocator, + .tree = self, + .token_it = &it, + }; + defer parser.deinit(); + + try parser.scopes.append(self.allocator, .{ + .indent = 0, + }); + + while (true) { + if (parser.token_it.peek() == null) return; + const pos = parser.token_it.pos; + const token = parser.token_it.next(); + + log.debug("Next token: {}, {}", .{ pos, token }); + + switch (token.id) { + .Space, .Comment, .NewLine => {}, + .Eof => break, + else => { + const doc = try parser.doc(pos); + try self.docs.append(self.allocator, &doc.base); + }, + } + } + } +}; + +const Parser = struct { + allocator: *Allocator, + tree: *Tree, + token_it: *TokenIterator, + scopes: std.ArrayListUnmanaged(Scope) = .{}, + + const Scope = struct { + indent: usize, + }; + + fn deinit(self: *Parser) void { + self.scopes.deinit(self.allocator); + } + + fn doc(self: *Parser, start: TokenIndex) ParseError!*Node.Doc { + const node = try self.allocator.create(Node.Doc); + errdefer self.allocator.destroy(node); + node.* = .{ + .start = start, + }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("Doc start: {}, {}", .{ start, self.tree.tokens[start] }); + + const explicit_doc: bool = if (self.eatToken(.DocStart)) |_| explicit_doc: { + if (self.eatToken(.Tag)) |_| { + node.directive = try self.expectToken(.Literal); + } + _ = try self.expectToken(.NewLine); + break :explicit_doc true; + } else false; + + while (true) { + const pos = self.token_it.pos; + const token = self.token_it.next(); + + log.debug("Next token: {}, {}", .{ pos, token }); + + switch (token.id) { + .Tag => { + return error.UnexpectedTag; + }, + .Literal, .SingleQuote, .DoubleQuote => { + _ = try self.expectToken(.MapValueInd); + const map_node = try self.map(pos); + node.value = &map_node.base; + }, + .SeqItemInd => { + const list_node = try self.list(pos); + node.value = &list_node.base; + }, + .FlowSeqStart => { + const list_node = try self.list_bracketed(pos); + node.value = &list_node.base; + }, + .DocEnd => { + if (explicit_doc) break; + return error.UnexpectedToken; + }, + .DocStart, .Eof => { + self.token_it.seekBy(-1); + break; + }, + else => { + return error.UnexpectedToken; + }, + } + } + + node.end = self.token_it.pos - 1; + + log.debug("Doc end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn map(self: *Parser, start: TokenIndex) ParseError!*Node.Map { + const node = try self.allocator.create(Node.Map); + errdefer self.allocator.destroy(node); + node.* = .{ + .start = start, + }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("Map start: {}, {}", .{ start, self.tree.tokens[start] }); + log.debug("Current scope: {}", .{self.scopes.items[self.scopes.items.len - 1]}); + + while (true) { + // Parse key. + const key_pos = self.token_it.pos; + const key = self.token_it.next(); + switch (key.id) { + .Literal => {}, + else => { + self.token_it.seekBy(-1); + break; + }, + } + + log.debug("Map key: {}, '{s}'", .{ key, self.tree.source[key.start..key.end] }); + + // Separator + _ = try self.expectToken(.MapValueInd); + self.eatCommentsAndSpace(); + + // Parse value. + const value: *Node = value: { + if (self.eatToken(.NewLine)) |_| { + // Explicit, complex value such as list or map. + try self.openScope(); + const value_pos = self.token_it.pos; + const value = self.token_it.next(); + switch (value.id) { + .Literal, .SingleQuote, .DoubleQuote => { + // Assume nested map. + const map_node = try self.map(value_pos); + break :value &map_node.base; + }, + .SeqItemInd => { + // Assume list of values. + const list_node = try self.list(value_pos); + break :value &list_node.base; + }, + else => { + log.err("{}", .{key}); + return error.Unhandled; + }, + } + } else { + const value_pos = self.token_it.pos; + const value = self.token_it.next(); + switch (value.id) { + .Literal, .SingleQuote, .DoubleQuote => { + // Assume leaf value. + const leaf_node = try self.leaf_value(value_pos); + break :value &leaf_node.base; + }, + .FlowSeqStart => { + const list_node = try self.list_bracketed(value_pos); + break :value &list_node.base; + }, + else => { + log.err("{}", .{key}); + return error.Unhandled; + }, + } + } + }; + log.debug("Map value: {}", .{value}); + + try node.values.append(self.allocator, .{ + .key = key_pos, + .value = value, + }); + + if (self.eatToken(.NewLine)) |_| { + if (try self.closeScope()) { + break; + } + } + } + + node.end = self.token_it.pos - 1; + + log.debug("Map end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn list(self: *Parser, start: TokenIndex) ParseError!*Node.List { + const node = try self.allocator.create(Node.List); + errdefer self.allocator.destroy(node); + node.* = .{ + .start = start, + }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("List start: {}, {}", .{ start, self.tree.tokens[start] }); + log.debug("Current scope: {}", .{self.scopes.items[self.scopes.items.len - 1]}); + + while (true) { + _ = self.eatToken(.SeqItemInd) orelse { + _ = try self.closeScope(); + break; + }; + self.eatCommentsAndSpace(); + + const pos = self.token_it.pos; + const token = self.token_it.next(); + const value: *Node = value: { + switch (token.id) { + .Literal, .SingleQuote, .DoubleQuote => { + if (self.eatToken(.MapValueInd)) |_| { + if (self.eatToken(.NewLine)) |_| { + try self.openScope(); + } + // nested map + const map_node = try self.map(pos); + break :value &map_node.base; + } else { + // standalone (leaf) value + const leaf_node = try self.leaf_value(pos); + break :value &leaf_node.base; + } + }, + .FlowSeqStart => { + const list_node = try self.list_bracketed(pos); + break :value &list_node.base; + }, + else => { + log.err("{}", .{token}); + return error.Unhandled; + }, + } + }; + try node.values.append(self.allocator, value); + + _ = self.eatToken(.NewLine); + } + + node.end = self.token_it.pos - 1; + + log.debug("List end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn list_bracketed(self: *Parser, start: TokenIndex) ParseError!*Node.List { + const node = try self.allocator.create(Node.List); + errdefer self.allocator.destroy(node); + node.* = .{ + .start = start, + }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("List start: {}, {}", .{ start, self.tree.tokens[start] }); + log.debug("Current scope: {}", .{self.scopes.items[self.scopes.items.len - 1]}); + + _ = try self.expectToken(.FlowSeqStart); + + while (true) { + _ = self.eatToken(.NewLine); + self.eatCommentsAndSpace(); + + const pos = self.token_it.pos; + const token = self.token_it.next(); + + log.debug("Next token: {}, {}", .{ pos, token }); + + const value: *Node = value: { + switch (token.id) { + .FlowSeqStart => { + const list_node = try self.list_bracketed(pos); + break :value &list_node.base; + }, + .FlowSeqEnd => { + break; + }, + .Literal, .SingleQuote, .DoubleQuote => { + const leaf_node = try self.leaf_value(pos); + _ = self.eatToken(.Comma); + // TODO newline + break :value &leaf_node.base; + }, + else => { + log.err("{}", .{token}); + return error.Unhandled; + }, + } + }; + try node.values.append(self.allocator, value); + } + + node.end = self.token_it.pos - 1; + + log.debug("List end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn leaf_value(self: *Parser, start: TokenIndex) ParseError!*Node.Value { + const node = try self.allocator.create(Node.Value); + errdefer self.allocator.destroy(node); + node.* = .{ + .start = start, + }; + node.base.tree = self.tree; + + self.token_it.seekTo(start); + + log.debug("Leaf start: {}, {}", .{ node.start.?, self.tree.tokens[node.start.?] }); + + parse: { + if (self.eatToken(.SingleQuote)) |_| { + node.start = node.start.? + 1; + while (true) { + const tok = self.token_it.next(); + switch (tok.id) { + .SingleQuote => { + node.end = self.token_it.pos - 2; + break :parse; + }, + .NewLine => return error.UnexpectedToken, + else => {}, + } + } + } + + if (self.eatToken(.DoubleQuote)) |_| { + node.start = node.start.? + 1; + while (true) { + const tok = self.token_it.next(); + switch (tok.id) { + .DoubleQuote => { + node.end = self.token_it.pos - 2; + break :parse; + }, + .NewLine => return error.UnexpectedToken, + else => {}, + } + } + } + + // TODO handle multiline strings in new block scope + while (true) { + const tok = self.token_it.next(); + switch (tok.id) { + .Literal => {}, + .Space => { + const trailing = self.token_it.pos - 2; + self.eatCommentsAndSpace(); + if (self.token_it.peek()) |peek| { + if (peek.id != .Literal) { + node.end = trailing; + break; + } + } + }, + else => { + self.token_it.seekBy(-1); + node.end = self.token_it.pos - 1; + break; + }, + } + } + } + + log.debug("Leaf end: {}, {}", .{ node.end.?, self.tree.tokens[node.end.?] }); + + return node; + } + + fn openScope(self: *Parser) !void { + const peek = self.token_it.peek() orelse return error.UnexpectedEof; + if (peek.id != .Space and peek.id != .Tab) { + // No need to open scope. + return; + } + const indent = self.token_it.next().count.?; + const prev_scope = self.scopes.items[self.scopes.items.len - 1]; + if (indent < prev_scope.indent) { + return error.MalformedYaml; + } + + log.debug("Opening scope...", .{}); + + try self.scopes.append(self.allocator, .{ + .indent = indent, + }); + } + + fn closeScope(self: *Parser) !bool { + const indent = indent: { + const peek = self.token_it.peek() orelse return error.UnexpectedEof; + switch (peek.id) { + .Space, .Tab => { + break :indent self.token_it.next().count.?; + }, + else => { + break :indent 0; + }, + } + }; + + const scope = self.scopes.items[self.scopes.items.len - 1]; + if (indent < scope.indent) { + log.debug("Closing scope...", .{}); + _ = self.scopes.pop(); + return true; + } + + return false; + } + + fn eatCommentsAndSpace(self: *Parser) void { + while (true) { + _ = self.token_it.peek() orelse return; + const token = self.token_it.next(); + switch (token.id) { + .Comment, .Space => {}, + else => { + self.token_it.seekBy(-1); + break; + }, + } + } + } + + fn eatToken(self: *Parser, id: Token.Id) ?TokenIndex { + while (true) { + const pos = self.token_it.pos; + _ = self.token_it.peek() orelse return null; + const token = self.token_it.next(); + switch (token.id) { + .Comment, .Space => continue, + else => |next_id| if (next_id == id) { + return pos; + } else { + self.token_it.seekTo(pos); + return null; + }, + } + } + } + + fn expectToken(self: *Parser, id: Token.Id) ParseError!TokenIndex { + return self.eatToken(id) orelse error.UnexpectedToken; + } +}; + +test { + _ = @import("parse/test.zig"); +} diff --git a/src/link/tapi/parse/test.zig b/src/link/tapi/parse/test.zig new file mode 100644 index 0000000000..b96a71fe97 --- /dev/null +++ b/src/link/tapi/parse/test.zig @@ -0,0 +1,556 @@ +const std = @import("std"); +const mem = std.mem; +const testing = std.testing; + +usingnamespace @import("../parse.zig"); + +test "explicit doc" { + const source = + \\--- !tapi-tbd + \\tbd-version: 4 + \\abc-version: 5 + \\... + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + const directive = tree.tokens[doc.directive.?]; + try testing.expectEqual(directive.id, .Literal); + try testing.expect(mem.eql(u8, "tapi-tbd", tree.source[directive.start..directive.end])); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 5); + try testing.expectEqual(map.end.?, 14); + try testing.expectEqual(map.values.items.len, 2); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "tbd-version", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.Value).?; + const value_tok = tree.tokens[value.start.?]; + try testing.expectEqual(value_tok.id, .Literal); + try testing.expect(mem.eql(u8, "4", tree.source[value_tok.start..value_tok.end])); + } + + { + const entry = map.values.items[1]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "abc-version", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.Value).?; + const value_tok = tree.tokens[value.start.?]; + try testing.expectEqual(value_tok.id, .Literal); + try testing.expect(mem.eql(u8, "5", tree.source[value_tok.start..value_tok.end])); + } +} + +test "leaf in quotes" { + const source = + \\key1: no quotes + \\key2: 'single quoted' + \\key3: "double quoted" + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + try testing.expect(doc.directive == null); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 3); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql( + u8, + "key1", + tree.source[key.start..key.end], + )); + + const value = entry.value.cast(Node.Value).?; + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + try testing.expectEqual(start.id, .Literal); + try testing.expectEqual(end.id, .Literal); + try testing.expect(mem.eql( + u8, + "no quotes", + tree.source[start.start..end.end], + )); + } +} + +test "nested maps" { + const source = + \\key1: + \\ key1_1 : value1_1 + \\ key1_2 : value1_2 + \\key2 : value2 + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + try testing.expect(doc.directive == null); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 2); + + { + const entry = map.values.items[0]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key1", tree.source[key.start..key.end])); + + const nested_map = entry.value.cast(Node.Map).?; + try testing.expectEqual(nested_map.start.?, 4); + try testing.expectEqual(nested_map.end.?, 16); + try testing.expectEqual(nested_map.values.items.len, 2); + + { + const nested_entry = nested_map.values.items[0]; + + const nested_key = tree.tokens[nested_entry.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql( + u8, + "key1_1", + tree.source[nested_key.start..nested_key.end], + )); + + const nested_value = nested_entry.value.cast(Node.Value).?; + const nested_value_tok = tree.tokens[nested_value.start.?]; + try testing.expectEqual(nested_value_tok.id, .Literal); + try testing.expect(mem.eql( + u8, + "value1_1", + tree.source[nested_value_tok.start..nested_value_tok.end], + )); + } + + { + const nested_entry = nested_map.values.items[1]; + + const nested_key = tree.tokens[nested_entry.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql( + u8, + "key1_2", + tree.source[nested_key.start..nested_key.end], + )); + + const nested_value = nested_entry.value.cast(Node.Value).?; + const nested_value_tok = tree.tokens[nested_value.start.?]; + try testing.expectEqual(nested_value_tok.id, .Literal); + try testing.expect(mem.eql( + u8, + "value1_2", + tree.source[nested_value_tok.start..nested_value_tok.end], + )); + } + } + + { + const entry = map.values.items[1]; + + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key2", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.Value).?; + const value_tok = tree.tokens[value.start.?]; + try testing.expectEqual(value_tok.id, .Literal); + try testing.expect(mem.eql( + u8, + "value2", + tree.source[value_tok.start..value_tok.end], + )); + } +} + +test "map of list of values" { + const source = + \\ints: + \\ - 0 + \\ - 1 + \\ - 2 + ; + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "ints", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.List).?; + try testing.expectEqual(value.start.?, 4); + try testing.expectEqual(value.end.?, tree.tokens.len - 2); + try testing.expectEqual(value.values.items.len, 3); + + { + const elem = value.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[elem.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "0", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[elem.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "1", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[elem.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "2", tree.source[leaf.start..leaf.end])); + } +} + +test "map of list of maps" { + const source = + \\key1: + \\- key2 : value2 + \\- key3 : value3 + \\- key4 : value4 + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key1", tree.source[key.start..key.end])); + + const value = entry.value.cast(Node.List).?; + try testing.expectEqual(value.start.?, 3); + try testing.expectEqual(value.end.?, tree.tokens.len - 2); + try testing.expectEqual(value.values.items.len, 3); + + { + const elem = value.values.items[0].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql(u8, "key2", tree.source[nested_key.start..nested_key.end])); + + const nested_v = nested.value.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "value2", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[1].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql(u8, "key3", tree.source[nested_key.start..nested_key.end])); + + const nested_v = nested.value.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "value3", tree.source[leaf.start..leaf.end])); + } + + { + const elem = value.values.items[2].cast(Node.Map).?; + const nested = elem.values.items[0]; + const nested_key = tree.tokens[nested.key]; + try testing.expectEqual(nested_key.id, .Literal); + try testing.expect(mem.eql(u8, "key4", tree.source[nested_key.start..nested_key.end])); + + const nested_v = nested.value.cast(Node.Value).?; + const leaf = tree.tokens[nested_v.start.?]; + try testing.expectEqual(leaf.id, .Literal); + try testing.expect(mem.eql(u8, "value4", tree.source[leaf.start..leaf.end])); + } +} + +test "list of lists" { + const source = + \\- [name , hr, avg ] + \\- [Mark McGwire , 65, 0.278] + \\- [Sammy Sosa , 63, 0.288] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .list); + + const list = doc.value.?.cast(Node.List).?; + try testing.expectEqual(list.start.?, 0); + try testing.expectEqual(list.end.?, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .list); + const nested = list.values.items[0].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end])); + } + } + + { + try testing.expectEqual(list.values.items[1].tag, .list); + const nested = list.values.items[1].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + try testing.expect(mem.eql(u8, "Mark McGwire", tree.source[start.start..end.end])); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "65", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "0.278", tree.source[leaf.start..leaf.end])); + } + } + + { + try testing.expectEqual(list.values.items[2].tag, .list); + const nested = list.values.items[2].cast(Node.List).?; + try testing.expectEqual(nested.values.items.len, 3); + + { + try testing.expectEqual(nested.values.items[0].tag, .value); + const value = nested.values.items[0].cast(Node.Value).?; + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + try testing.expect(mem.eql(u8, "Sammy Sosa", tree.source[start.start..end.end])); + } + + { + try testing.expectEqual(nested.values.items[1].tag, .value); + const value = nested.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "63", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(nested.values.items[2].tag, .value); + const value = nested.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "0.288", tree.source[leaf.start..leaf.end])); + } + } +} + +test "inline list" { + const source = + \\[name , hr, avg ] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .list); + + const list = doc.value.?.cast(Node.List).?; + try testing.expectEqual(list.start.?, 0); + try testing.expectEqual(list.end.?, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .value); + const value = list.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[1].tag, .value); + const value = list.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[2].tag, .value); + const value = list.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end])); + } +} + +test "inline list as mapping value" { + const source = + \\key : [ + \\ name , + \\ hr, avg ] + ; + + var tree = Tree.init(testing.allocator); + defer tree.deinit(); + try tree.parse(source); + + try testing.expectEqual(tree.docs.items.len, 1); + + const doc = tree.docs.items[0].cast(Node.Doc).?; + try testing.expectEqual(doc.start.?, 0); + try testing.expectEqual(doc.end.?, tree.tokens.len - 2); + + try testing.expect(doc.value != null); + try testing.expectEqual(doc.value.?.tag, .map); + + const map = doc.value.?.cast(Node.Map).?; + try testing.expectEqual(map.start.?, 0); + try testing.expectEqual(map.end.?, tree.tokens.len - 2); + try testing.expectEqual(map.values.items.len, 1); + + const entry = map.values.items[0]; + const key = tree.tokens[entry.key]; + try testing.expectEqual(key.id, .Literal); + try testing.expect(mem.eql(u8, "key", tree.source[key.start..key.end])); + + const list = entry.value.cast(Node.List).?; + try testing.expectEqual(list.start.?, 4); + try testing.expectEqual(list.end.?, tree.tokens.len - 2); + try testing.expectEqual(list.values.items.len, 3); + + { + try testing.expectEqual(list.values.items[0].tag, .value); + const value = list.values.items[0].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "name", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[1].tag, .value); + const value = list.values.items[1].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "hr", tree.source[leaf.start..leaf.end])); + } + + { + try testing.expectEqual(list.values.items[2].tag, .value); + const value = list.values.items[2].cast(Node.Value).?; + const leaf = tree.tokens[value.start.?]; + try testing.expect(mem.eql(u8, "avg", tree.source[leaf.start..leaf.end])); + } +} diff --git a/src/link/tapi/yaml.zig b/src/link/tapi/yaml.zig new file mode 100644 index 0000000000..b58df7609f --- /dev/null +++ b/src/link/tapi/yaml.zig @@ -0,0 +1,651 @@ +const std = @import("std"); +const assert = std.debug.assert; +const math = std.math; +const mem = std.mem; +const testing = std.testing; +const log = std.log.scoped(.tapi); + +const Allocator = mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; + +pub const Tokenizer = @import("Tokenizer.zig"); +pub const parse = @import("parse.zig"); + +const Node = parse.Node; +const Tree = parse.Tree; +const ParseError = parse.ParseError; + +pub const YamlError = error{ + UnexpectedNodeType, + OutOfMemory, +} || ParseError || std.fmt.ParseIntError; + +pub const ValueType = enum { + empty, + int, + float, + string, + list, + map, +}; + +pub const List = []Value; +pub const Map = std.StringArrayHashMap(Value); + +pub const Value = union(ValueType) { + empty, + int: i64, + float: f64, + string: []const u8, + list: List, + map: Map, + + pub fn asInt(self: Value) !i64 { + if (self != .int) return error.TypeMismatch; + return self.int; + } + + pub fn asFloat(self: Value) !f64 { + if (self != .float) return error.TypeMismatch; + return self.float; + } + + pub fn asString(self: Value) ![]const u8 { + if (self != .string) return error.TypeMismatch; + return self.string; + } + + pub fn asList(self: Value) !List { + if (self != .list) return error.TypeMismatch; + return self.list; + } + + pub fn asMap(self: Value) !Map { + if (self != .map) return error.TypeMismatch; + return self.map; + } + + const StringifyArgs = struct { + indentation: usize = 0, + should_inline_first_key: bool = false, + }; + + pub const StringifyError = std.os.WriteError; + + pub fn stringify(self: Value, writer: anytype, args: StringifyArgs) StringifyError!void { + switch (self) { + .empty => return, + .int => |int| return writer.print("{}", .{int}), + .float => |float| return writer.print("{d}", .{float}), + .string => |string| return writer.print("{s}", .{string}), + .list => |list| { + const len = list.len; + if (len == 0) return; + + const first = list[0]; + if (first.is_compound()) { + for (list) |elem, i| { + try writer.writeByteNTimes(' ', args.indentation); + try writer.writeAll("- "); + try elem.stringify(writer, .{ + .indentation = args.indentation + 2, + .should_inline_first_key = true, + }); + if (i < len - 1) { + try writer.writeByte('\n'); + } + } + return; + } + + try writer.writeAll("[ "); + for (list) |elem, i| { + try elem.stringify(writer, args); + if (i < len - 1) { + try writer.writeAll(", "); + } + } + try writer.writeAll(" ]"); + }, + .map => |map| { + const keys = map.keys(); + const len = keys.len; + if (len == 0) return; + + for (keys) |key, i| { + if (!args.should_inline_first_key or i != 0) { + try writer.writeByteNTimes(' ', args.indentation); + } + try writer.print("{s}: ", .{key}); + + const value = map.get(key) orelse unreachable; + const should_inline = blk: { + if (!value.is_compound()) break :blk true; + if (value == .list and value.list.len > 0 and !value.list[0].is_compound()) break :blk true; + break :blk false; + }; + + if (should_inline) { + try value.stringify(writer, args); + } else { + try writer.writeByte('\n'); + try value.stringify(writer, .{ + .indentation = args.indentation + 4, + }); + } + + if (i < len - 1) { + try writer.writeByte('\n'); + } + } + }, + } + } + + fn is_compound(self: Value) bool { + return switch (self) { + .list, .map => true, + else => false, + }; + } + + fn fromNode(arena: *Allocator, tree: *const Tree, node: *const Node, type_hint: ?ValueType) YamlError!Value { + if (node.cast(Node.Doc)) |doc| { + const inner = doc.value orelse { + // empty doc + return Value{ .empty = .{} }; + }; + return Value.fromNode(arena, tree, inner, null); + } else if (node.cast(Node.Map)) |map| { + var out_map = std.StringArrayHashMap(Value).init(arena); + try out_map.ensureUnusedCapacity(map.values.items.len); + + for (map.values.items) |entry| { + const key_tok = tree.tokens[entry.key]; + const key = try arena.dupe(u8, tree.source[key_tok.start..key_tok.end]); + const value = try Value.fromNode(arena, tree, entry.value, null); + + out_map.putAssumeCapacityNoClobber(key, value); + } + + return Value{ .map = out_map }; + } else if (node.cast(Node.List)) |list| { + var out_list = std.ArrayList(Value).init(arena); + try out_list.ensureUnusedCapacity(list.values.items.len); + + if (list.values.items.len > 0) { + const hint = if (list.values.items[0].cast(Node.Value)) |value| hint: { + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + const raw = tree.source[start.start..end.end]; + _ = std.fmt.parseInt(i64, raw, 10) catch { + _ = std.fmt.parseFloat(f64, raw) catch { + break :hint ValueType.string; + }; + break :hint ValueType.float; + }; + break :hint ValueType.int; + } else null; + + for (list.values.items) |elem| { + const value = try Value.fromNode(arena, tree, elem, hint); + out_list.appendAssumeCapacity(value); + } + } + + return Value{ .list = out_list.toOwnedSlice() }; + } else if (node.cast(Node.Value)) |value| { + const start = tree.tokens[value.start.?]; + const end = tree.tokens[value.end.?]; + const raw = tree.source[start.start..end.end]; + + if (type_hint) |hint| { + return switch (hint) { + .int => Value{ .int = try std.fmt.parseInt(i64, raw, 10) }, + .float => Value{ .float = try std.fmt.parseFloat(f64, raw) }, + .string => Value{ .string = try arena.dupe(u8, raw) }, + else => unreachable, + }; + } + + try_int: { + // TODO infer base for int + const int = std.fmt.parseInt(i64, raw, 10) catch break :try_int; + return Value{ .int = int }; + } + try_float: { + const float = std.fmt.parseFloat(f64, raw) catch break :try_float; + return Value{ .float = float }; + } + return Value{ .string = try arena.dupe(u8, raw) }; + } else { + log.err("Unexpected node type: {}", .{node.tag}); + return error.UnexpectedNodeType; + } + } +}; + +pub const Yaml = struct { + arena: ArenaAllocator, + tree: ?Tree = null, + docs: std.ArrayList(Value), + + pub fn deinit(self: *Yaml) void { + self.arena.deinit(); + } + + pub fn stringify(self: Yaml, writer: anytype) !void { + for (self.docs.items) |doc| { + // if (doc.directive) |directive| { + // try writer.print("--- !{s}\n", .{directive}); + // } + try doc.stringify(writer, .{}); + // if (doc.directive != null) { + // try writer.writeAll("...\n"); + // } + } + } + + pub fn load(allocator: *Allocator, source: []const u8) !Yaml { + var arena = ArenaAllocator.init(allocator); + + var tree = Tree.init(&arena.allocator); + try tree.parse(source); + + var docs = std.ArrayList(Value).init(&arena.allocator); + try docs.ensureUnusedCapacity(tree.docs.items.len); + + for (tree.docs.items) |node| { + const value = try Value.fromNode(&arena.allocator, &tree, node, null); + docs.appendAssumeCapacity(value); + } + + return Yaml{ + .arena = arena, + .tree = tree, + .docs = docs, + }; + } + + pub const Error = error{ + Unimplemented, + TypeMismatch, + StructFieldMissing, + ArraySizeMismatch, + UntaggedUnion, + UnionTagMissing, + Overflow, + OutOfMemory, + }; + + pub fn parse(self: *Yaml, comptime T: type) Error!T { + if (self.docs.items.len == 0) { + if (@typeInfo(T) == .Void) return {}; + return error.TypeMismatch; + } + + if (self.docs.items.len == 1) { + return self.parseValue(T, self.docs.items[0]); + } + + switch (@typeInfo(T)) { + .Array => |info| { + var parsed: T = undefined; + for (self.docs.items) |doc, i| { + parsed[i] = try self.parseValue(info.child, doc); + } + return parsed; + }, + .Pointer => |info| { + switch (info.size) { + .Slice => { + var parsed = try self.arena.allocator.alloc(info.child, self.docs.items.len); + for (self.docs.items) |doc, i| { + parsed[i] = try self.parseValue(info.child, doc); + } + return parsed; + }, + else => return error.TypeMismatch, + } + }, + .Union => return error.Unimplemented, + else => return error.TypeMismatch, + } + } + + fn parseValue(self: *Yaml, comptime T: type, value: Value) Error!T { + return switch (@typeInfo(T)) { + .Int => math.cast(T, try value.asInt()), + .Float => math.lossyCast(T, try value.asFloat()), + .Struct => self.parseStruct(T, try value.asMap()), + .Union => self.parseUnion(T, value), + .Array => self.parseArray(T, try value.asList()), + .Pointer => { + if (value.asList()) |list| { + return self.parsePointer(T, .{ .list = list }); + } else |_| { + return self.parsePointer(T, .{ .string = try value.asString() }); + } + }, + .Void => error.TypeMismatch, + .Optional => unreachable, + else => error.Unimplemented, + }; + } + + fn parseUnion(self: *Yaml, comptime T: type, value: Value) Error!T { + const union_info = @typeInfo(T).Union; + + if (union_info.tag_type) |_| { + inline for (union_info.fields) |field| { + if (self.parseValue(field.field_type, value)) |u_value| { + return @unionInit(T, field.name, u_value); + } else |err| { + if (@as(@TypeOf(err) || error{TypeMismatch}, err) != error.TypeMismatch) return err; + } + } + } else return error.UntaggedUnion; + + return error.UnionTagMissing; + } + + fn parseOptional(self: *Yaml, comptime T: type, value: ?Value) Error!T { + const unwrapped = value orelse return null; + const opt_info = @typeInfo(T).Optional; + return @as(T, try self.parseValue(opt_info.child, unwrapped)); + } + + fn parseStruct(self: *Yaml, comptime T: type, map: Map) Error!T { + const struct_info = @typeInfo(T).Struct; + var parsed: T = undefined; + + inline for (struct_info.fields) |field| { + const value: ?Value = map.get(field.name) orelse blk: { + const field_name = try mem.replaceOwned(u8, &self.arena.allocator, field.name, "_", "-"); + break :blk map.get(field_name); + }; + + if (@typeInfo(field.field_type) == .Optional) { + @field(parsed, field.name) = try self.parseOptional(field.field_type, value); + continue; + } + + const unwrapped = value orelse { + log.err("missing struct field: {s}: {s}", .{ field.name, @typeName(field.field_type) }); + return error.StructFieldMissing; + }; + @field(parsed, field.name) = try self.parseValue(field.field_type, unwrapped); + } + + return parsed; + } + + fn parsePointer(self: *Yaml, comptime T: type, value: Value) Error!T { + const ptr_info = @typeInfo(T).Pointer; + const arena = &self.arena.allocator; + + switch (ptr_info.size) { + .Slice => { + const child_info = @typeInfo(ptr_info.child); + if (child_info == .Int and child_info.Int.bits == 8) { + return value.asString(); + } + + var parsed = try arena.alloc(ptr_info.child, value.list.len); + for (value.list) |elem, i| { + parsed[i] = try self.parseValue(ptr_info.child, elem); + } + return parsed; + }, + else => return error.Unimplemented, + } + } + + fn parseArray(self: *Yaml, comptime T: type, list: List) Error!T { + const array_info = @typeInfo(T).Array; + if (array_info.len != list.len) return error.ArraySizeMismatch; + + var parsed: T = undefined; + for (list) |elem, i| { + parsed[i] = try self.parseValue(array_info.child, elem); + } + + return parsed; + } +}; + +test { + testing.refAllDecls(@This()); +} + +test "simple list" { + const source = + \\- a + \\- b + \\- c + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const list = yaml.docs.items[0].list; + try testing.expectEqual(list.len, 3); + + try testing.expect(mem.eql(u8, list[0].string, "a")); + try testing.expect(mem.eql(u8, list[1].string, "b")); + try testing.expect(mem.eql(u8, list[2].string, "c")); +} + +test "simple list typed as array of strings" { + const source = + \\- a + \\- b + \\- c + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3][]const u8); + try testing.expectEqual(arr.len, 3); + try testing.expect(mem.eql(u8, arr[0], "a")); + try testing.expect(mem.eql(u8, arr[1], "b")); + try testing.expect(mem.eql(u8, arr[2], "c")); +} + +test "simple list typed as array of ints" { + const source = + \\- 0 + \\- 1 + \\- 2 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3]u8); + try testing.expectEqual(arr.len, 3); + try testing.expectEqual(arr[0], 0); + try testing.expectEqual(arr[1], 1); + try testing.expectEqual(arr[2], 2); +} + +test "list of mixed sign integer" { + const source = + \\- 0 + \\- -1 + \\- 2 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const arr = try yaml.parse([3]i8); + try testing.expectEqual(arr.len, 3); + try testing.expectEqual(arr[0], 0); + try testing.expectEqual(arr[1], -1); + try testing.expectEqual(arr[2], 2); +} + +test "simple map untyped" { + const source = + \\a: 0 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectEqual(yaml.docs.items.len, 1); + + const map = yaml.docs.items[0].map; + try testing.expect(map.contains("a")); + try testing.expectEqual(map.get("a").?.int, 0); +} + +test "simple map typed" { + const source = + \\a: 0 + \\b: hello there + \\c: 'wait, what?' + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(struct { a: usize, b: []const u8, c: []const u8 }); + try testing.expectEqual(simple.a, 0); + try testing.expect(mem.eql(u8, simple.b, "hello there")); + try testing.expect(mem.eql(u8, simple.c, "wait, what?")); +} + +test "typed nested structs" { + const source = + \\a: + \\ b: hello there + \\ c: 'wait, what?' + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const simple = try yaml.parse(struct { + a: struct { + b: []const u8, + c: []const u8, + }, + }); + try testing.expect(mem.eql(u8, simple.a.b, "hello there")); + try testing.expect(mem.eql(u8, simple.a.c, "wait, what?")); +} + +test "multidoc typed as a slice of structs" { + const source = + \\--- + \\a: 0 + \\--- + \\a: 1 + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + { + const result = try yaml.parse([2]struct { a: usize }); + try testing.expectEqual(result.len, 2); + try testing.expectEqual(result[0].a, 0); + try testing.expectEqual(result[1].a, 1); + } + + { + const result = try yaml.parse([]struct { a: usize }); + try testing.expectEqual(result.len, 2); + try testing.expectEqual(result[0].a, 0); + try testing.expectEqual(result[1].a, 1); + } +} + +test "multidoc typed as a struct is an error" { + const source = + \\--- + \\a: 0 + \\--- + \\b: 1 + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { a: usize })); + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { b: usize })); + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(struct { a: usize, b: usize })); +} + +test "multidoc typed as a slice of structs with optionals" { + const source = + \\--- + \\a: 0 + \\c: 1.0 + \\--- + \\a: 1 + \\b: different field + \\... + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + const result = try yaml.parse([]struct { a: usize, b: ?[]const u8, c: ?f16 }); + try testing.expectEqual(result.len, 2); + + try testing.expectEqual(result[0].a, 0); + try testing.expect(result[0].b == null); + try testing.expect(result[0].c != null); + try testing.expectEqual(result[0].c.?, 1.0); + + try testing.expectEqual(result[1].a, 1); + try testing.expect(result[1].b != null); + try testing.expect(mem.eql(u8, result[1].b.?, "different field")); + try testing.expect(result[1].c == null); +} + +test "empty yaml can be represented as void" { + const source = ""; + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + const result = try yaml.parse(void); + try testing.expect(@TypeOf(result) == void); +} + +test "nonempty yaml cannot be represented as void" { + const source = + \\a: b + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.TypeMismatch, yaml.parse(void)); +} + +test "typed array size mismatch" { + const source = + \\- 0 + \\- 0 + ; + + var yaml = try Yaml.load(testing.allocator, source); + defer yaml.deinit(); + + try testing.expectError(Yaml.Error.ArraySizeMismatch, yaml.parse([1]usize)); + try testing.expectError(Yaml.Error.ArraySizeMismatch, yaml.parse([5]usize)); +} |
