1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
|
const Wasm = @This();
const std = @import("std");
const mem = std.mem;
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const fs = std.fs;
const leb = std.leb;
const log = std.log.scoped(.link);
const Module = @import("../Module.zig");
const Compilation = @import("../Compilation.zig");
const codegen = @import("../codegen/wasm.zig");
const link = @import("../link.zig");
const trace = @import("../tracy.zig").trace;
const build_options = @import("build_options");
const Cache = @import("../Cache.zig");
/// Various magic numbers defined by the wasm spec
const spec = struct {
const magic = [_]u8{ 0x00, 0x61, 0x73, 0x6D }; // \0asm
const version = [_]u8{ 0x01, 0x00, 0x00, 0x00 }; // version 1
const custom_id = 0;
const types_id = 1;
const imports_id = 2;
const funcs_id = 3;
const tables_id = 4;
const memories_id = 5;
const globals_id = 6;
const exports_id = 7;
const start_id = 8;
const elements_id = 9;
const code_id = 10;
const data_id = 11;
};
pub const base_tag = link.File.Tag.wasm;
pub const FnData = struct {
/// Generated code for the type of the function
functype: std.ArrayListUnmanaged(u8) = .{},
/// Generated code for the body of the function
code: std.ArrayListUnmanaged(u8) = .{},
/// Locations in the generated code where function indexes must be filled in.
/// This must be kept ordered by offset.
idx_refs: std.ArrayListUnmanaged(struct { offset: u32, decl: *Module.Decl }) = .{},
};
base: link.File,
/// List of all function Decls to be written to the output file. The index of
/// each Decl in this list at the time of writing the binary is used as the
/// function index.
/// TODO: can/should we access some data structure in Module directly?
funcs: std.ArrayListUnmanaged(*Module.Decl) = .{},
pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Options) !*Wasm {
assert(options.object_format == .wasm);
if (options.use_llvm) return error.LLVM_BackendIsTODO_ForWasm; // TODO
if (options.use_lld) return error.LLD_LinkingIsTODO_ForWasm; // TODO
// TODO: read the file and keep vaild parts instead of truncating
const file = try options.emit.?.directory.handle.createFile(sub_path, .{ .truncate = true, .read = true });
errdefer file.close();
const wasm = try createEmpty(allocator, options);
errdefer wasm.base.destroy();
wasm.base.file = file;
try file.writeAll(&(spec.magic ++ spec.version));
return wasm;
}
pub fn createEmpty(gpa: *Allocator, options: link.Options) !*Wasm {
const wasm = try gpa.create(Wasm);
wasm.* = .{
.base = .{
.tag = .wasm,
.options = options,
.file = null,
.allocator = gpa,
},
};
return wasm;
}
pub fn deinit(self: *Wasm) void {
for (self.funcs.items) |decl| {
decl.fn_link.wasm.?.functype.deinit(self.base.allocator);
decl.fn_link.wasm.?.code.deinit(self.base.allocator);
decl.fn_link.wasm.?.idx_refs.deinit(self.base.allocator);
}
self.funcs.deinit(self.base.allocator);
}
// Generate code for the Decl, storing it in memory to be later written to
// the file on flush().
pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
if (decl.typed_value.most_recent.typed_value.ty.zigTypeTag() != .Fn)
return error.TODOImplementNonFnDeclsForWasm;
if (decl.fn_link.wasm) |*fn_data| {
fn_data.functype.items.len = 0;
fn_data.code.items.len = 0;
fn_data.idx_refs.items.len = 0;
} else {
decl.fn_link.wasm = .{};
try self.funcs.append(self.base.allocator, decl);
}
const fn_data = &decl.fn_link.wasm.?;
var managed_functype = fn_data.functype.toManaged(self.base.allocator);
var managed_code = fn_data.code.toManaged(self.base.allocator);
try codegen.genFunctype(&managed_functype, decl);
try codegen.genCode(&managed_code, decl);
fn_data.functype = managed_functype.toUnmanaged();
fn_data.code = managed_code.toUnmanaged();
}
pub fn updateDeclExports(
self: *Wasm,
module: *Module,
decl: *const Module.Decl,
exports: []const *Module.Export,
) !void {}
pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
// TODO: remove this assert when non-function Decls are implemented
assert(decl.typed_value.most_recent.typed_value.ty.zigTypeTag() == .Fn);
_ = self.funcs.swapRemove(self.getFuncidx(decl).?);
decl.fn_link.wasm.?.functype.deinit(self.base.allocator);
decl.fn_link.wasm.?.code.deinit(self.base.allocator);
decl.fn_link.wasm.?.idx_refs.deinit(self.base.allocator);
decl.fn_link.wasm = null;
}
pub fn flush(self: *Wasm, comp: *Compilation) !void {
if (build_options.have_llvm and self.base.options.use_lld) {
return self.linkWithLLD(comp);
} else {
return self.flushModule(comp);
}
}
pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
const tracy = trace(@src());
defer tracy.end();
const file = self.base.file.?;
const header_size = 5 + 1;
// No need to rewrite the magic/version header
try file.setEndPos(@sizeOf(@TypeOf(spec.magic ++ spec.version)));
try file.seekTo(@sizeOf(@TypeOf(spec.magic ++ spec.version)));
// Type section
{
const header_offset = try reserveVecSectionHeader(file);
for (self.funcs.items) |decl| {
try file.writeAll(decl.fn_link.wasm.?.functype.items);
}
try writeVecSectionHeader(
file,
header_offset,
spec.types_id,
@intCast(u32, (try file.getPos()) - header_offset - header_size),
@intCast(u32, self.funcs.items.len),
);
}
// Function section
{
const header_offset = try reserveVecSectionHeader(file);
const writer = file.writer();
for (self.funcs.items) |_, typeidx| try leb.writeULEB128(writer, @intCast(u32, typeidx));
try writeVecSectionHeader(
file,
header_offset,
spec.funcs_id,
@intCast(u32, (try file.getPos()) - header_offset - header_size),
@intCast(u32, self.funcs.items.len),
);
}
// Export section
if (self.base.options.module) |module| {
const header_offset = try reserveVecSectionHeader(file);
const writer = file.writer();
var count: u32 = 0;
for (module.decl_exports.entries.items) |entry| {
for (entry.value) |exprt| {
// Export name length + name
try leb.writeULEB128(writer, @intCast(u32, exprt.options.name.len));
try writer.writeAll(exprt.options.name);
switch (exprt.exported_decl.typed_value.most_recent.typed_value.ty.zigTypeTag()) {
.Fn => {
// Type of the export
try writer.writeByte(0x00);
// Exported function index
try leb.writeULEB128(writer, self.getFuncidx(exprt.exported_decl).?);
},
else => return error.TODOImplementNonFnDeclsForWasm,
}
count += 1;
}
}
try writeVecSectionHeader(
file,
header_offset,
spec.exports_id,
@intCast(u32, (try file.getPos()) - header_offset - header_size),
count,
);
}
// Code section
{
const header_offset = try reserveVecSectionHeader(file);
const writer = file.writer();
for (self.funcs.items) |decl| {
const fn_data = &decl.fn_link.wasm.?;
// Write the already generated code to the file, inserting
// function indexes where required.
var current: u32 = 0;
for (fn_data.idx_refs.items) |idx_ref| {
try writer.writeAll(fn_data.code.items[current..idx_ref.offset]);
current = idx_ref.offset;
// Use a fixed width here to make calculating the code size
// in codegen.wasm.genCode() simpler.
var buf: [5]u8 = undefined;
leb.writeUnsignedFixed(5, &buf, self.getFuncidx(idx_ref.decl).?);
try writer.writeAll(&buf);
}
try writer.writeAll(fn_data.code.items[current..]);
}
try writeVecSectionHeader(
file,
header_offset,
spec.code_id,
@intCast(u32, (try file.getPos()) - header_offset - header_size),
@intCast(u32, self.funcs.items.len),
);
}
}
fn linkWithLLD(self: *Wasm, comp: *Compilation) !void {
const tracy = trace(@src());
defer tracy.end();
var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator);
defer arena_allocator.deinit();
const arena = &arena_allocator.allocator;
const directory = self.base.options.emit.?.directory; // Just an alias to make it shorter to type.
// If there is no Zig code to compile, then we should skip flushing the output file because it
// will not be part of the linker line anyway.
const module_obj_path: ?[]const u8 = if (self.base.options.module) |module| blk: {
const use_stage1 = build_options.is_stage1 and self.base.options.use_llvm;
if (use_stage1) {
const obj_basename = try std.zig.binNameAlloc(arena, .{
.root_name = self.base.options.root_name,
.target = self.base.options.target,
.output_mode = .Obj,
});
const o_directory = self.base.options.module.?.zig_cache_artifact_directory;
const full_obj_path = try o_directory.join(arena, &[_][]const u8{obj_basename});
break :blk full_obj_path;
}
try self.flushModule(comp);
const obj_basename = self.base.intermediary_basename.?;
const full_obj_path = try directory.join(arena, &[_][]const u8{obj_basename});
break :blk full_obj_path;
} else null;
const compiler_rt_path: ?[]const u8 = if (self.base.options.include_compiler_rt)
comp.compiler_rt_static_lib.?.full_object_path
else
null;
const target = self.base.options.target;
const id_symlink_basename = "lld.id";
var man: Cache.Manifest = undefined;
defer if (!self.base.options.disable_lld_caching) man.deinit();
var digest: [Cache.hex_digest_len]u8 = undefined;
if (!self.base.options.disable_lld_caching) {
man = comp.cache_parent.obtain();
// We are about to obtain this lock, so here we give other processes a chance first.
self.base.releaseLock();
try man.addListOfFiles(self.base.options.objects);
for (comp.c_object_table.items()) |entry| {
_ = try man.addFile(entry.key.status.success.object_path, null);
}
try man.addOptionalFile(module_obj_path);
try man.addOptionalFile(compiler_rt_path);
man.hash.addOptional(self.base.options.stack_size_override);
man.hash.addListOfBytes(self.base.options.extra_lld_args);
// We don't actually care whether it's a cache hit or miss; we just need the digest and the lock.
_ = try man.hit();
digest = man.final();
var prev_digest_buf: [digest.len]u8 = undefined;
const prev_digest: []u8 = Cache.readSmallFile(
directory.handle,
id_symlink_basename,
&prev_digest_buf,
) catch |err| blk: {
log.debug("WASM LLD new_digest={} error: {}", .{ digest, @errorName(err) });
// Handle this as a cache miss.
break :blk prev_digest_buf[0..0];
};
if (mem.eql(u8, prev_digest, &digest)) {
log.debug("WASM LLD digest={} match - skipping invocation", .{digest});
// Hot diggity dog! The output binary is already there.
self.base.lock = man.toOwnedLock();
return;
}
log.debug("WASM LLD prev_digest={} new_digest={}", .{ prev_digest, digest });
// We are about to change the output file to be different, so we invalidate the build hash now.
directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) {
error.FileNotFound => {},
else => |e| return e,
};
}
const is_obj = self.base.options.output_mode == .Obj;
// Create an LLD command line and invoke it.
var argv = std.ArrayList([]const u8).init(self.base.allocator);
defer argv.deinit();
// We will invoke ourselves as a child process to gain access to LLD.
// This is necessary because LLD does not behave properly as a library -
// it calls exit() and does not reset all global data between invocations.
try argv.appendSlice(&[_][]const u8{ comp.self_exe_path.?, "wasm-ld" });
if (is_obj) {
try argv.append("-r");
}
try argv.append("-error-limit=0");
if (self.base.options.output_mode == .Exe) {
// Increase the default stack size to a more reasonable value of 1MB instead of
// the default of 1 Wasm page being 64KB, unless overriden by the user.
try argv.append("-z");
const stack_size = self.base.options.stack_size_override orelse 1048576;
const arg = try std.fmt.allocPrint(arena, "stack-size={d}", .{stack_size});
try argv.append(arg);
// Put stack before globals so that stack overflow results in segfault immediately
// before corrupting globals. See https://github.com/ziglang/zig/issues/4496
try argv.append("--stack-first");
} else {
try argv.append("--no-entry"); // So lld doesn't look for _start.
try argv.append("--export-all");
}
try argv.appendSlice(&[_][]const u8{
"--allow-undefined",
"-o",
try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}),
});
// Positional arguments to the linker such as object files.
try argv.appendSlice(self.base.options.objects);
for (comp.c_object_table.items()) |entry| {
try argv.append(entry.key.status.success.object_path);
}
if (module_obj_path) |p| {
try argv.append(p);
}
if (self.base.options.output_mode != .Obj and
!self.base.options.skip_linker_dependencies and
!self.base.options.link_libc)
{
try argv.append(comp.libc_static_lib.?.full_object_path);
}
if (compiler_rt_path) |p| {
try argv.append(p);
}
if (self.base.options.verbose_link) {
// Skip over our own name so that the LLD linker name is the first argv item.
Compilation.dump_argv(argv.items[1..]);
}
// Sadly, we must run LLD as a child process because it does not behave
// properly as a library.
const child = try std.ChildProcess.init(argv.items, arena);
defer child.deinit();
if (comp.clang_passthrough_mode) {
child.stdin_behavior = .Inherit;
child.stdout_behavior = .Inherit;
child.stderr_behavior = .Inherit;
const term = child.spawnAndWait() catch |err| {
log.err("unable to spawn {s}: {s}", .{ argv.items[0], @errorName(err) });
return error.UnableToSpawnSelf;
};
switch (term) {
.Exited => |code| {
if (code != 0) {
// TODO https://github.com/ziglang/zig/issues/6342
std.process.exit(1);
}
},
else => std.process.abort(),
}
} else {
child.stdin_behavior = .Ignore;
child.stdout_behavior = .Ignore;
child.stderr_behavior = .Pipe;
try child.spawn();
const stderr = try child.stderr.?.reader().readAllAlloc(arena, 10 * 1024 * 1024);
const term = child.wait() catch |err| {
log.err("unable to spawn {s}: {s}", .{ argv.items[0], @errorName(err) });
return error.UnableToSpawnSelf;
};
switch (term) {
.Exited => |code| {
if (code != 0) {
// TODO parse this output and surface with the Compilation API rather than
// directly outputting to stderr here.
std.debug.print("{s}", .{stderr});
return error.LLDReportedFailure;
}
},
else => {
log.err("{s} terminated with stderr:\n{s}", .{ argv.items[0], stderr });
return error.LLDCrashed;
},
}
if (stderr.len != 0) {
log.warn("unexpected LLD stderr:\n{s}", .{stderr});
}
}
if (!self.base.options.disable_lld_caching) {
// Update the file with the digest. If it fails we can continue; it only
// means that the next invocation will have an unnecessary cache miss.
Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| {
log.warn("failed to save linking hash digest symlink: {}", .{@errorName(err)});
};
// Again failure here only means an unnecessary cache miss.
man.writeManifest() catch |err| {
log.warn("failed to write cache manifest when linking: {}", .{@errorName(err)});
};
// We hang on to this lock so that the output file path can be used without
// other processes clobbering it.
self.base.lock = man.toOwnedLock();
}
}
/// Get the current index of a given Decl in the function list
/// TODO: we could maintain a hash map to potentially make this
fn getFuncidx(self: Wasm, decl: *Module.Decl) ?u32 {
return for (self.funcs.items) |func, idx| {
if (func == decl) break @intCast(u32, idx);
} else null;
}
fn reserveVecSectionHeader(file: fs.File) !u64 {
// section id + fixed leb contents size + fixed leb vector length
const header_size = 1 + 5 + 5;
// TODO: this should be a single lseek(2) call, but fs.File does not
// currently provide a way to do this.
try file.seekBy(header_size);
return (try file.getPos()) - header_size;
}
fn writeVecSectionHeader(file: fs.File, offset: u64, section: u8, size: u32, items: u32) !void {
var buf: [1 + 5 + 5]u8 = undefined;
buf[0] = section;
leb.writeUnsignedFixed(5, buf[1..6], size);
leb.writeUnsignedFixed(5, buf[6..], items);
try file.pwriteAll(&buf, offset);
}
|