diff options
Diffstat (limited to 'lib/std/process')
| -rw-r--r-- | lib/std/process/Child.zig | 1810 |
1 files changed, 1810 insertions, 0 deletions
diff --git a/lib/std/process/Child.zig b/lib/std/process/Child.zig new file mode 100644 index 0000000000..527c31a3b3 --- /dev/null +++ b/lib/std/process/Child.zig @@ -0,0 +1,1810 @@ +const std = @import("../std.zig"); +const builtin = @import("builtin"); +const unicode = std.unicode; +const fs = std.fs; +const process = std.process; +const File = std.fs.File; +const windows = std.os.windows; +const linux = std.os.linux; +const posix = std.posix; +const mem = std.mem; +const EnvMap = std.process.EnvMap; +const maxInt = std.math.maxInt; +const assert = std.debug.assert; +const native_os = builtin.os.tag; +const ChildProcess = @This(); + +pub const Id = switch (native_os) { + .windows => windows.HANDLE, + .wasi => void, + else => posix.pid_t, +}; + +/// Available after calling `spawn()`. This becomes `undefined` after calling `wait()`. +/// On Windows this is the hProcess. +/// On POSIX this is the pid. +id: Id, +thread_handle: if (native_os == .windows) windows.HANDLE else void, + +allocator: mem.Allocator, + +/// The writing end of the child process's standard input pipe. +/// Usage requires `stdin_behavior == StdIo.Pipe`. +/// Available after calling `spawn()`. +stdin: ?File, + +/// The reading end of the child process's standard output pipe. +/// Usage requires `stdout_behavior == StdIo.Pipe`. +/// Available after calling `spawn()`. +stdout: ?File, + +/// The reading end of the child process's standard error pipe. +/// Usage requires `stderr_behavior == StdIo.Pipe`. +/// Available after calling `spawn()`. +stderr: ?File, + +/// Terminated state of the child process. +/// Available after calling `wait()`. +term: ?(SpawnError!Term), + +argv: []const []const u8, + +/// Leave as null to use the current env map using the supplied allocator. +env_map: ?*const EnvMap, + +stdin_behavior: StdIo, +stdout_behavior: StdIo, +stderr_behavior: StdIo, + +/// Set to change the user id when spawning the child process. +uid: if (native_os == .windows or native_os == .wasi) void else ?posix.uid_t, + +/// Set to change the group id when spawning the child process. +gid: if (native_os == .windows or native_os == .wasi) void else ?posix.gid_t, + +/// Set to change the current working directory when spawning the child process. +cwd: ?[]const u8, +/// Set to change the current working directory when spawning the child process. +/// This is not yet implemented for Windows. See https://github.com/ziglang/zig/issues/5190 +/// Once that is done, `cwd` will be deprecated in favor of this field. +cwd_dir: ?fs.Dir = null, + +err_pipe: ?if (native_os == .windows) void else [2]posix.fd_t, + +expand_arg0: Arg0Expand, + +/// Darwin-only. Disable ASLR for the child process. +disable_aslr: bool = false, + +/// Darwin-only. Start child process in suspended state as if SIGSTOP was sent. +start_suspended: bool = false, + +/// Set to true to obtain rusage information for the child process. +/// Depending on the target platform and implementation status, the +/// requested statistics may or may not be available. If they are +/// available, then the `resource_usage_statistics` field will be populated +/// after calling `wait`. +/// On Linux and Darwin, this obtains rusage statistics from wait4(). +request_resource_usage_statistics: bool = false, + +/// This is available after calling wait if +/// `request_resource_usage_statistics` was set to `true` before calling +/// `spawn`. +resource_usage_statistics: ResourceUsageStatistics = .{}, + +pub const ResourceUsageStatistics = struct { + rusage: @TypeOf(rusage_init) = rusage_init, + + /// Returns the peak resident set size of the child process, in bytes, + /// if available. + pub inline fn getMaxRss(rus: ResourceUsageStatistics) ?usize { + switch (native_os) { + .linux => { + if (rus.rusage) |ru| { + return @as(usize, @intCast(ru.maxrss)) * 1024; + } else { + return null; + } + }, + .windows => { + if (rus.rusage) |ru| { + return ru.PeakWorkingSetSize; + } else { + return null; + } + }, + .macos, .ios => { + if (rus.rusage) |ru| { + // Darwin oddly reports in bytes instead of kilobytes. + return @as(usize, @intCast(ru.maxrss)); + } else { + return null; + } + }, + else => return null, + } + } + + const rusage_init = switch (native_os) { + .linux, .macos, .ios => @as(?posix.rusage, null), + .windows => @as(?windows.VM_COUNTERS, null), + else => {}, + }; +}; + +pub const Arg0Expand = posix.Arg0Expand; + +pub const SpawnError = error{ + OutOfMemory, + + /// POSIX-only. `StdIo.Ignore` was selected and opening `/dev/null` returned ENODEV. + NoDevice, + + /// Windows-only. `cwd` or `argv` was provided and it was invalid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, + + /// Windows-only. `cwd` was provided, but the path did not exist when spawning the child process. + CurrentWorkingDirectoryUnlinked, + + /// Windows-only. NUL (U+0000), LF (U+000A), CR (U+000D) are not allowed + /// within arguments when executing a `.bat`/`.cmd` script. + /// - NUL/LF signifiies end of arguments, so anything afterwards + /// would be lost after execution. + /// - CR is stripped by `cmd.exe`, so any CR codepoints + /// would be lost after execution. + InvalidBatchScriptArg, +} || + posix.ExecveError || + posix.SetIdError || + posix.ChangeCurDirError || + windows.CreateProcessError || + windows.GetProcessMemoryInfoError || + windows.WaitForSingleObjectError; + +pub const Term = union(enum) { + Exited: u8, + Signal: u32, + Stopped: u32, + Unknown: u32, +}; + +/// Behavior of the child process's standard input, output, and error +/// streams. +pub const StdIo = enum { + /// Inherit the stream from the parent process. + Inherit, + + /// Pass a null stream to the child process. + /// This is /dev/null on POSIX and NUL on Windows. + Ignore, + + /// Create a pipe for the stream. + /// The corresponding field (`stdout`, `stderr`, or `stdin`) + /// will be assigned a `File` object that can be used + /// to read from or write to the pipe. + Pipe, + + /// Close the stream after the child process spawns. + Close, +}; + +/// First argument in argv is the executable. +pub fn init(argv: []const []const u8, allocator: mem.Allocator) ChildProcess { + return .{ + .allocator = allocator, + .argv = argv, + .id = undefined, + .thread_handle = undefined, + .err_pipe = null, + .term = null, + .env_map = null, + .cwd = null, + .uid = if (native_os == .windows or native_os == .wasi) {} else null, + .gid = if (native_os == .windows or native_os == .wasi) {} else null, + .stdin = null, + .stdout = null, + .stderr = null, + .stdin_behavior = StdIo.Inherit, + .stdout_behavior = StdIo.Inherit, + .stderr_behavior = StdIo.Inherit, + .expand_arg0 = .no_expand, + }; +} + +pub fn setUserName(self: *ChildProcess, name: []const u8) !void { + const user_info = try process.getUserInfo(name); + self.uid = user_info.uid; + self.gid = user_info.gid; +} + +/// On success must call `kill` or `wait`. +/// After spawning the `id` is available. +pub fn spawn(self: *ChildProcess) SpawnError!void { + if (!process.can_spawn) { + @compileError("the target operating system cannot spawn processes"); + } + + if (native_os == .windows) { + return self.spawnWindows(); + } else { + return self.spawnPosix(); + } +} + +pub fn spawnAndWait(self: *ChildProcess) SpawnError!Term { + try self.spawn(); + return self.wait(); +} + +/// Forcibly terminates child process and then cleans up all resources. +pub fn kill(self: *ChildProcess) !Term { + if (native_os == .windows) { + return self.killWindows(1); + } else { + return self.killPosix(); + } +} + +pub fn killWindows(self: *ChildProcess, exit_code: windows.UINT) !Term { + if (self.term) |term| { + self.cleanupStreams(); + return term; + } + + windows.TerminateProcess(self.id, exit_code) catch |err| switch (err) { + error.PermissionDenied => { + // Usually when TerminateProcess triggers a ACCESS_DENIED error, it + // indicates that the process has already exited, but there may be + // some rare edge cases where our process handle no longer has the + // PROCESS_TERMINATE access right, so let's do another check to make + // sure the process is really no longer running: + windows.WaitForSingleObjectEx(self.id, 0, false) catch return err; + return error.AlreadyTerminated; + }, + else => return err, + }; + try self.waitUnwrappedWindows(); + return self.term.?; +} + +pub fn killPosix(self: *ChildProcess) !Term { + if (self.term) |term| { + self.cleanupStreams(); + return term; + } + posix.kill(self.id, posix.SIG.TERM) catch |err| switch (err) { + error.ProcessNotFound => return error.AlreadyTerminated, + else => return err, + }; + try self.waitUnwrapped(); + return self.term.?; +} + +/// Blocks until child process terminates and then cleans up all resources. +pub fn wait(self: *ChildProcess) !Term { + const term = if (native_os == .windows) + try self.waitWindows() + else + try self.waitPosix(); + + self.id = undefined; + + return term; +} + +pub const RunResult = struct { + term: Term, + stdout: []u8, + stderr: []u8, +}; + +fn fifoToOwnedArrayList(fifo: *std.io.PollFifo) std.ArrayList(u8) { + if (fifo.head > 0) { + @memcpy(fifo.buf[0..fifo.count], fifo.buf[fifo.head..][0..fifo.count]); + } + const result = std.ArrayList(u8){ + .items = fifo.buf[0..fifo.count], + .capacity = fifo.buf.len, + .allocator = fifo.allocator, + }; + fifo.* = std.io.PollFifo.init(fifo.allocator); + return result; +} + +/// Collect the output from the process's stdout and stderr. Will return once all output +/// has been collected. This does not mean that the process has ended. `wait` should still +/// be called to wait for and clean up the process. +/// +/// The process must be started with stdout_behavior and stderr_behavior == .Pipe +pub fn collectOutput( + child: ChildProcess, + stdout: *std.ArrayList(u8), + stderr: *std.ArrayList(u8), + max_output_bytes: usize, +) !void { + assert(child.stdout_behavior == .Pipe); + assert(child.stderr_behavior == .Pipe); + + // we could make this work with multiple allocators but YAGNI + if (stdout.allocator.ptr != stderr.allocator.ptr or + stdout.allocator.vtable != stderr.allocator.vtable) + { + unreachable; // ChildProcess.collectOutput only supports 1 allocator + } + + var poller = std.io.poll(stdout.allocator, enum { stdout, stderr }, .{ + .stdout = child.stdout.?, + .stderr = child.stderr.?, + }); + defer poller.deinit(); + + while (try poller.poll()) { + if (poller.fifo(.stdout).count > max_output_bytes) + return error.StdoutStreamTooLong; + if (poller.fifo(.stderr).count > max_output_bytes) + return error.StderrStreamTooLong; + } + + stdout.* = fifoToOwnedArrayList(poller.fifo(.stdout)); + stderr.* = fifoToOwnedArrayList(poller.fifo(.stderr)); +} + +pub const RunError = posix.GetCwdError || posix.ReadError || SpawnError || posix.PollError || error{ + StdoutStreamTooLong, + StderrStreamTooLong, +}; + +/// Spawns a child process, waits for it, collecting stdout and stderr, and then returns. +/// If it succeeds, the caller owns result.stdout and result.stderr memory. +pub fn run(args: struct { + allocator: mem.Allocator, + argv: []const []const u8, + cwd: ?[]const u8 = null, + cwd_dir: ?fs.Dir = null, + env_map: ?*const EnvMap = null, + max_output_bytes: usize = 50 * 1024, + expand_arg0: Arg0Expand = .no_expand, +}) RunError!RunResult { + var child = ChildProcess.init(args.argv, args.allocator); + child.stdin_behavior = .Ignore; + child.stdout_behavior = .Pipe; + child.stderr_behavior = .Pipe; + child.cwd = args.cwd; + child.cwd_dir = args.cwd_dir; + child.env_map = args.env_map; + child.expand_arg0 = args.expand_arg0; + + var stdout = std.ArrayList(u8).init(args.allocator); + var stderr = std.ArrayList(u8).init(args.allocator); + errdefer { + stdout.deinit(); + stderr.deinit(); + } + + try child.spawn(); + try child.collectOutput(&stdout, &stderr, args.max_output_bytes); + + return RunResult{ + .term = try child.wait(), + .stdout = try stdout.toOwnedSlice(), + .stderr = try stderr.toOwnedSlice(), + }; +} + +fn waitWindows(self: *ChildProcess) !Term { + if (self.term) |term| { + self.cleanupStreams(); + return term; + } + + try self.waitUnwrappedWindows(); + return self.term.?; +} + +fn waitPosix(self: *ChildProcess) !Term { + if (self.term) |term| { + self.cleanupStreams(); + return term; + } + + try self.waitUnwrapped(); + return self.term.?; +} + +fn waitUnwrappedWindows(self: *ChildProcess) !void { + const result = windows.WaitForSingleObjectEx(self.id, windows.INFINITE, false); + + self.term = @as(SpawnError!Term, x: { + var exit_code: windows.DWORD = undefined; + if (windows.kernel32.GetExitCodeProcess(self.id, &exit_code) == 0) { + break :x Term{ .Unknown = 0 }; + } else { + break :x Term{ .Exited = @as(u8, @truncate(exit_code)) }; + } + }); + + if (self.request_resource_usage_statistics) { + self.resource_usage_statistics.rusage = try windows.GetProcessMemoryInfo(self.id); + } + + posix.close(self.id); + posix.close(self.thread_handle); + self.cleanupStreams(); + return result; +} + +fn waitUnwrapped(self: *ChildProcess) !void { + const res: posix.WaitPidResult = res: { + if (self.request_resource_usage_statistics) { + switch (native_os) { + .linux, .macos, .ios => { + var ru: posix.rusage = undefined; + const res = posix.wait4(self.id, 0, &ru); + self.resource_usage_statistics.rusage = ru; + break :res res; + }, + else => {}, + } + } + + break :res posix.waitpid(self.id, 0); + }; + const status = res.status; + self.cleanupStreams(); + self.handleWaitResult(status); +} + +fn handleWaitResult(self: *ChildProcess, status: u32) void { + self.term = self.cleanupAfterWait(status); +} + +fn cleanupStreams(self: *ChildProcess) void { + if (self.stdin) |*stdin| { + stdin.close(); + self.stdin = null; + } + if (self.stdout) |*stdout| { + stdout.close(); + self.stdout = null; + } + if (self.stderr) |*stderr| { + stderr.close(); + self.stderr = null; + } +} + +fn cleanupAfterWait(self: *ChildProcess, status: u32) !Term { + if (self.err_pipe) |err_pipe| { + defer destroyPipe(err_pipe); + + if (native_os == .linux) { + var fd = [1]posix.pollfd{posix.pollfd{ + .fd = err_pipe[0], + .events = posix.POLL.IN, + .revents = undefined, + }}; + + // Check if the eventfd buffer stores a non-zero value by polling + // it, that's the error code returned by the child process. + _ = posix.poll(&fd, 0) catch unreachable; + + // According to eventfd(2) the descriptor is readable if the counter + // has a value greater than 0 + if ((fd[0].revents & posix.POLL.IN) != 0) { + const err_int = try readIntFd(err_pipe[0]); + return @as(SpawnError, @errorCast(@errorFromInt(err_int))); + } + } else { + // Write maxInt(ErrInt) to the write end of the err_pipe. This is after + // waitpid, so this write is guaranteed to be after the child + // pid potentially wrote an error. This way we can do a blocking + // read on the error pipe and either get maxInt(ErrInt) (no error) or + // an error code. + try writeIntFd(err_pipe[1], maxInt(ErrInt)); + const err_int = try readIntFd(err_pipe[0]); + // Here we potentially return the fork child's error from the parent + // pid. + if (err_int != maxInt(ErrInt)) { + return @as(SpawnError, @errorCast(@errorFromInt(err_int))); + } + } + } + + return statusToTerm(status); +} + +fn statusToTerm(status: u32) Term { + return if (posix.W.IFEXITED(status)) + Term{ .Exited = posix.W.EXITSTATUS(status) } + else if (posix.W.IFSIGNALED(status)) + Term{ .Signal = posix.W.TERMSIG(status) } + else if (posix.W.IFSTOPPED(status)) + Term{ .Stopped = posix.W.STOPSIG(status) } + else + Term{ .Unknown = status }; +} + +fn spawnPosix(self: *ChildProcess) SpawnError!void { + const pipe_flags: posix.O = .{}; + const stdin_pipe = if (self.stdin_behavior == StdIo.Pipe) try posix.pipe2(pipe_flags) else undefined; + errdefer if (self.stdin_behavior == StdIo.Pipe) { + destroyPipe(stdin_pipe); + }; + + const stdout_pipe = if (self.stdout_behavior == StdIo.Pipe) try posix.pipe2(pipe_flags) else undefined; + errdefer if (self.stdout_behavior == StdIo.Pipe) { + destroyPipe(stdout_pipe); + }; + + const stderr_pipe = if (self.stderr_behavior == StdIo.Pipe) try posix.pipe2(pipe_flags) else undefined; + errdefer if (self.stderr_behavior == StdIo.Pipe) { + destroyPipe(stderr_pipe); + }; + + const any_ignore = (self.stdin_behavior == StdIo.Ignore or self.stdout_behavior == StdIo.Ignore or self.stderr_behavior == StdIo.Ignore); + const dev_null_fd = if (any_ignore) + posix.openZ("/dev/null", .{ .ACCMODE = .RDWR }, 0) catch |err| switch (err) { + error.PathAlreadyExists => unreachable, + error.NoSpaceLeft => unreachable, + error.FileTooBig => unreachable, + error.DeviceBusy => unreachable, + error.FileLocksNotSupported => unreachable, + error.BadPathName => unreachable, // Windows-only + error.WouldBlock => unreachable, + error.NetworkNotFound => unreachable, // Windows-only + else => |e| return e, + } + else + undefined; + defer { + if (any_ignore) posix.close(dev_null_fd); + } + + var arena_allocator = std.heap.ArenaAllocator.init(self.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + // The POSIX standard does not allow malloc() between fork() and execve(), + // and `self.allocator` may be a libc allocator. + // I have personally observed the child process deadlocking when it tries + // to call malloc() due to a heap allocation between fork() and execve(), + // in musl v1.1.24. + // Additionally, we want to reduce the number of possible ways things + // can fail between fork() and execve(). + // Therefore, we do all the allocation for the execve() before the fork(). + // This means we must do the null-termination of argv and env vars here. + const argv_buf = try arena.allocSentinel(?[*:0]const u8, self.argv.len, null); + for (self.argv, 0..) |arg, i| argv_buf[i] = (try arena.dupeZ(u8, arg)).ptr; + + const envp = m: { + if (self.env_map) |env_map| { + const envp_buf = try process.createNullDelimitedEnvMap(arena, env_map); + break :m envp_buf.ptr; + } else if (builtin.link_libc) { + break :m std.c.environ; + } else if (builtin.output_mode == .Exe) { + // Then we have Zig start code and this works. + // TODO type-safety for null-termination of `os.environ`. + break :m @as([*:null]const ?[*:0]const u8, @ptrCast(std.os.environ.ptr)); + } else { + // TODO come up with a solution for this. + @compileError("missing std lib enhancement: ChildProcess implementation has no way to collect the environment variables to forward to the child process"); + } + }; + + // This pipe is used to communicate errors between the time of fork + // and execve from the child process to the parent process. + const err_pipe = blk: { + if (native_os == .linux) { + const fd = try posix.eventfd(0, linux.EFD.CLOEXEC); + // There's no distinction between the readable and the writeable + // end with eventfd + break :blk [2]posix.fd_t{ fd, fd }; + } else { + break :blk try posix.pipe2(.{ .CLOEXEC = true }); + } + }; + errdefer destroyPipe(err_pipe); + + const pid_result = try posix.fork(); + if (pid_result == 0) { + // we are the child + setUpChildIo(self.stdin_behavior, stdin_pipe[0], posix.STDIN_FILENO, dev_null_fd) catch |err| forkChildErrReport(err_pipe[1], err); + setUpChildIo(self.stdout_behavior, stdout_pipe[1], posix.STDOUT_FILENO, dev_null_fd) catch |err| forkChildErrReport(err_pipe[1], err); + setUpChildIo(self.stderr_behavior, stderr_pipe[1], posix.STDERR_FILENO, dev_null_fd) catch |err| forkChildErrReport(err_pipe[1], err); + + if (self.stdin_behavior == .Pipe) { + posix.close(stdin_pipe[0]); + posix.close(stdin_pipe[1]); + } + if (self.stdout_behavior == .Pipe) { + posix.close(stdout_pipe[0]); + posix.close(stdout_pipe[1]); + } + if (self.stderr_behavior == .Pipe) { + posix.close(stderr_pipe[0]); + posix.close(stderr_pipe[1]); + } + + if (self.cwd_dir) |cwd| { + posix.fchdir(cwd.fd) catch |err| forkChildErrReport(err_pipe[1], err); + } else if (self.cwd) |cwd| { + posix.chdir(cwd) catch |err| forkChildErrReport(err_pipe[1], err); + } + + if (self.gid) |gid| { + posix.setregid(gid, gid) catch |err| forkChildErrReport(err_pipe[1], err); + } + + if (self.uid) |uid| { + posix.setreuid(uid, uid) catch |err| forkChildErrReport(err_pipe[1], err); + } + + const err = switch (self.expand_arg0) { + .expand => posix.execvpeZ_expandArg0(.expand, argv_buf.ptr[0].?, argv_buf.ptr, envp), + .no_expand => posix.execvpeZ_expandArg0(.no_expand, argv_buf.ptr[0].?, argv_buf.ptr, envp), + }; + forkChildErrReport(err_pipe[1], err); + } + + // we are the parent + const pid: i32 = @intCast(pid_result); + if (self.stdin_behavior == StdIo.Pipe) { + self.stdin = File{ .handle = stdin_pipe[1] }; + } else { + self.stdin = null; + } + if (self.stdout_behavior == StdIo.Pipe) { + self.stdout = File{ .handle = stdout_pipe[0] }; + } else { + self.stdout = null; + } + if (self.stderr_behavior == StdIo.Pipe) { + self.stderr = File{ .handle = stderr_pipe[0] }; + } else { + self.stderr = null; + } + + self.id = pid; + self.err_pipe = err_pipe; + self.term = null; + + if (self.stdin_behavior == StdIo.Pipe) { + posix.close(stdin_pipe[0]); + } + if (self.stdout_behavior == StdIo.Pipe) { + posix.close(stdout_pipe[1]); + } + if (self.stderr_behavior == StdIo.Pipe) { + posix.close(stderr_pipe[1]); + } +} + +fn spawnWindows(self: *ChildProcess) SpawnError!void { + var saAttr = windows.SECURITY_ATTRIBUTES{ + .nLength = @sizeOf(windows.SECURITY_ATTRIBUTES), + .bInheritHandle = windows.TRUE, + .lpSecurityDescriptor = null, + }; + + const any_ignore = (self.stdin_behavior == StdIo.Ignore or self.stdout_behavior == StdIo.Ignore or self.stderr_behavior == StdIo.Ignore); + + const nul_handle = if (any_ignore) + // "\Device\Null" or "\??\NUL" + windows.OpenFile(&[_]u16{ '\\', 'D', 'e', 'v', 'i', 'c', 'e', '\\', 'N', 'u', 'l', 'l' }, .{ + .access_mask = windows.GENERIC_READ | windows.GENERIC_WRITE | windows.SYNCHRONIZE, + .share_access = windows.FILE_SHARE_READ | windows.FILE_SHARE_WRITE | windows.FILE_SHARE_DELETE, + .sa = &saAttr, + .creation = windows.OPEN_EXISTING, + }) catch |err| switch (err) { + error.PathAlreadyExists => return error.Unexpected, // not possible for "NUL" + error.PipeBusy => return error.Unexpected, // not possible for "NUL" + error.FileNotFound => return error.Unexpected, // not possible for "NUL" + error.AccessDenied => return error.Unexpected, // not possible for "NUL" + error.NameTooLong => return error.Unexpected, // not possible for "NUL" + error.WouldBlock => return error.Unexpected, // not possible for "NUL" + error.NetworkNotFound => return error.Unexpected, // not possible for "NUL" + error.AntivirusInterference => return error.Unexpected, // not possible for "NUL" + else => |e| return e, + } + else + undefined; + defer { + if (any_ignore) posix.close(nul_handle); + } + + var g_hChildStd_IN_Rd: ?windows.HANDLE = null; + var g_hChildStd_IN_Wr: ?windows.HANDLE = null; + switch (self.stdin_behavior) { + StdIo.Pipe => { + try windowsMakePipeIn(&g_hChildStd_IN_Rd, &g_hChildStd_IN_Wr, &saAttr); + }, + StdIo.Ignore => { + g_hChildStd_IN_Rd = nul_handle; + }, + StdIo.Inherit => { + g_hChildStd_IN_Rd = windows.GetStdHandle(windows.STD_INPUT_HANDLE) catch null; + }, + StdIo.Close => { + g_hChildStd_IN_Rd = null; + }, + } + errdefer if (self.stdin_behavior == StdIo.Pipe) { + windowsDestroyPipe(g_hChildStd_IN_Rd, g_hChildStd_IN_Wr); + }; + + var g_hChildStd_OUT_Rd: ?windows.HANDLE = null; + var g_hChildStd_OUT_Wr: ?windows.HANDLE = null; + switch (self.stdout_behavior) { + StdIo.Pipe => { + try windowsMakeAsyncPipe(&g_hChildStd_OUT_Rd, &g_hChildStd_OUT_Wr, &saAttr); + }, + StdIo.Ignore => { + g_hChildStd_OUT_Wr = nul_handle; + }, + StdIo.Inherit => { + g_hChildStd_OUT_Wr = windows.GetStdHandle(windows.STD_OUTPUT_HANDLE) catch null; + }, + StdIo.Close => { + g_hChildStd_OUT_Wr = null; + }, + } + errdefer if (self.stdout_behavior == StdIo.Pipe) { + windowsDestroyPipe(g_hChildStd_OUT_Rd, g_hChildStd_OUT_Wr); + }; + + var g_hChildStd_ERR_Rd: ?windows.HANDLE = null; + var g_hChildStd_ERR_Wr: ?windows.HANDLE = null; + switch (self.stderr_behavior) { + StdIo.Pipe => { + try windowsMakeAsyncPipe(&g_hChildStd_ERR_Rd, &g_hChildStd_ERR_Wr, &saAttr); + }, + StdIo.Ignore => { + g_hChildStd_ERR_Wr = nul_handle; + }, + StdIo.Inherit => { + g_hChildStd_ERR_Wr = windows.GetStdHandle(windows.STD_ERROR_HANDLE) catch null; + }, + StdIo.Close => { + g_hChildStd_ERR_Wr = null; + }, + } + errdefer if (self.stderr_behavior == StdIo.Pipe) { + windowsDestroyPipe(g_hChildStd_ERR_Rd, g_hChildStd_ERR_Wr); + }; + + var siStartInfo = windows.STARTUPINFOW{ + .cb = @sizeOf(windows.STARTUPINFOW), + .hStdError = g_hChildStd_ERR_Wr, + .hStdOutput = g_hChildStd_OUT_Wr, + .hStdInput = g_hChildStd_IN_Rd, + .dwFlags = windows.STARTF_USESTDHANDLES, + + .lpReserved = null, + .lpDesktop = null, + .lpTitle = null, + .dwX = 0, + .dwY = 0, + .dwXSize = 0, + .dwYSize = 0, + .dwXCountChars = 0, + .dwYCountChars = 0, + .dwFillAttribute = 0, + .wShowWindow = 0, + .cbReserved2 = 0, + .lpReserved2 = null, + }; + var piProcInfo: windows.PROCESS_INFORMATION = undefined; + + const cwd_w = if (self.cwd) |cwd| try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd) else null; + defer if (cwd_w) |cwd| self.allocator.free(cwd); + const cwd_w_ptr = if (cwd_w) |cwd| cwd.ptr else null; + + const maybe_envp_buf = if (self.env_map) |env_map| try process.createWindowsEnvBlock(self.allocator, env_map) else null; + defer if (maybe_envp_buf) |envp_buf| self.allocator.free(envp_buf); + const envp_ptr = if (maybe_envp_buf) |envp_buf| envp_buf.ptr else null; + + const app_name_wtf8 = self.argv[0]; + const app_name_is_absolute = fs.path.isAbsolute(app_name_wtf8); + + // the cwd set in ChildProcess is in effect when choosing the executable path + // to match posix semantics + var cwd_path_w_needs_free = false; + const cwd_path_w = x: { + // If the app name is absolute, then we need to use its dirname as the cwd + if (app_name_is_absolute) { + cwd_path_w_needs_free = true; + const dir = fs.path.dirname(app_name_wtf8).?; + break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, dir); + } else if (self.cwd) |cwd| { + cwd_path_w_needs_free = true; + break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd); + } else { + break :x &[_:0]u16{}; // empty for cwd + } + }; + defer if (cwd_path_w_needs_free) self.allocator.free(cwd_path_w); + + // If the app name has more than just a filename, then we need to separate that + // into the basename and dirname and use the dirname as an addition to the cwd + // path. This is because NtQueryDirectoryFile cannot accept FileName params with + // path separators. + const app_basename_wtf8 = fs.path.basename(app_name_wtf8); + // If the app name is absolute, then the cwd will already have the app's dirname in it, + // so only populate app_dirname if app name is a relative path with > 0 path separators. + const maybe_app_dirname_wtf8 = if (!app_name_is_absolute) fs.path.dirname(app_name_wtf8) else null; + const app_dirname_w: ?[:0]u16 = x: { + if (maybe_app_dirname_wtf8) |app_dirname_wtf8| { + break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_dirname_wtf8); + } + break :x null; + }; + defer if (app_dirname_w != null) self.allocator.free(app_dirname_w.?); + + const app_name_w = try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_basename_wtf8); + defer self.allocator.free(app_name_w); + + run: { + const PATH: [:0]const u16 = process.getenvW(unicode.utf8ToUtf16LeStringLiteral("PATH")) orelse &[_:0]u16{}; + const PATHEXT: [:0]const u16 = process.getenvW(unicode.utf8ToUtf16LeStringLiteral("PATHEXT")) orelse &[_:0]u16{}; + + // In case the command ends up being a .bat/.cmd script, we need to escape things using the cmd.exe rules + // and invoke cmd.exe ourselves in order to mitigate arbitrary command execution from maliciously + // constructed arguments. + // + // We'll need to wait until we're actually trying to run the command to know for sure + // if the resolved command has the `.bat` or `.cmd` extension, so we defer actually + // serializing the command line until we determine how it should be serialized. + var cmd_line_cache = WindowsCommandLineCache.init(self.allocator, self.argv); + defer cmd_line_cache.deinit(); + + var app_buf = std.ArrayListUnmanaged(u16){}; + defer app_buf.deinit(self.allocator); + + try app_buf.appendSlice(self.allocator, app_name_w); + + var dir_buf = std.ArrayListUnmanaged(u16){}; + defer dir_buf.deinit(self.allocator); + + if (cwd_path_w.len > 0) { + try dir_buf.appendSlice(self.allocator, cwd_path_w); + } + if (app_dirname_w) |app_dir| { + if (dir_buf.items.len > 0) try dir_buf.append(self.allocator, fs.path.sep); + try dir_buf.appendSlice(self.allocator, app_dir); + } + if (dir_buf.items.len > 0) { + // Need to normalize the path, openDirW can't handle things like double backslashes + const normalized_len = windows.normalizePath(u16, dir_buf.items) catch return error.BadPathName; + dir_buf.shrinkRetainingCapacity(normalized_len); + } + + windowsCreateProcessPathExt(self.allocator, &dir_buf, &app_buf, PATHEXT, &cmd_line_cache, envp_ptr, cwd_w_ptr, &siStartInfo, &piProcInfo) catch |no_path_err| { + const original_err = switch (no_path_err) { + // argv[0] contains unsupported characters that will never resolve to a valid exe. + error.InvalidArg0 => return error.FileNotFound, + error.FileNotFound, error.InvalidExe, error.AccessDenied => |e| e, + error.UnrecoverableInvalidExe => return error.InvalidExe, + else => |e| return e, + }; + + // If the app name had path separators, that disallows PATH searching, + // and there's no need to search the PATH if the app name is absolute. + // We still search the path if the cwd is absolute because of the + // "cwd set in ChildProcess is in effect when choosing the executable path + // to match posix semantics" behavior--we don't want to skip searching + // the PATH just because we were trying to set the cwd of the child process. + if (app_dirname_w != null or app_name_is_absolute) { + return original_err; + } + + var it = mem.tokenizeScalar(u16, PATH, ';'); + while (it.next()) |search_path| { + dir_buf.clearRetainingCapacity(); + try dir_buf.appendSlice(self.allocator, search_path); + // Need to normalize the path, some PATH values can contain things like double + // backslashes which openDirW can't handle + const normalized_len = windows.normalizePath(u16, dir_buf.items) catch continue; + dir_buf.shrinkRetainingCapacity(normalized_len); + + if (windowsCreateProcessPathExt(self.allocator, &dir_buf, &app_buf, PATHEXT, &cmd_line_cache, envp_ptr, cwd_w_ptr, &siStartInfo, &piProcInfo)) { + break :run; + } else |err| switch (err) { + // argv[0] contains unsupported characters that will never resolve to a valid exe. + error.InvalidArg0 => return error.FileNotFound, + error.FileNotFound, error.AccessDenied, error.InvalidExe => continue, + error.UnrecoverableInvalidExe => return error.InvalidExe, + else => |e| return e, + } + } else { + return original_err; + } + }; + } + + if (g_hChildStd_IN_Wr) |h| { + self.stdin = File{ .handle = h }; + } else { + self.stdin = null; + } + if (g_hChildStd_OUT_Rd) |h| { + self.stdout = File{ .handle = h }; + } else { + self.stdout = null; + } + if (g_hChildStd_ERR_Rd) |h| { + self.stderr = File{ .handle = h }; + } else { + self.stderr = null; + } + + self.id = piProcInfo.hProcess; + self.thread_handle = piProcInfo.hThread; + self.term = null; + + if (self.stdin_behavior == StdIo.Pipe) { + posix.close(g_hChildStd_IN_Rd.?); + } + if (self.stderr_behavior == StdIo.Pipe) { + posix.close(g_hChildStd_ERR_Wr.?); + } + if (self.stdout_behavior == StdIo.Pipe) { + posix.close(g_hChildStd_OUT_Wr.?); + } +} + +fn setUpChildIo(stdio: StdIo, pipe_fd: i32, std_fileno: i32, dev_null_fd: i32) !void { + switch (stdio) { + .Pipe => try posix.dup2(pipe_fd, std_fileno), + .Close => posix.close(std_fileno), + .Inherit => {}, + .Ignore => try posix.dup2(dev_null_fd, std_fileno), + } +} + +fn destroyPipe(pipe: [2]posix.fd_t) void { + posix.close(pipe[0]); + if (pipe[0] != pipe[1]) posix.close(pipe[1]); +} + +// Child of fork calls this to report an error to the fork parent. +// Then the child exits. +fn forkChildErrReport(fd: i32, err: ChildProcess.SpawnError) noreturn { + writeIntFd(fd, @as(ErrInt, @intFromError(err))) catch {}; + // If we're linking libc, some naughty applications may have registered atexit handlers + // which we really do not want to run in the fork child. I caught LLVM doing this and + // it caused a deadlock instead of doing an exit syscall. In the words of Avril Lavigne, + // "Why'd you have to go and make things so complicated?" + if (builtin.link_libc) { + // The _exit(2) function does nothing but make the exit syscall, unlike exit(3) + std.c._exit(1); + } + posix.exit(1); +} + +fn writeIntFd(fd: i32, value: ErrInt) !void { + const file: File = .{ .handle = fd }; + file.writer().writeInt(u64, @intCast(value), .little) catch return error.SystemResources; +} + +fn readIntFd(fd: i32) !ErrInt { + const file: File = .{ .handle = fd }; + return @intCast(file.reader().readInt(u64, .little) catch return error.SystemResources); +} + +const ErrInt = std.meta.Int(.unsigned, @sizeOf(anyerror) * 8); + +/// Expects `app_buf` to contain exactly the app name, and `dir_buf` to contain exactly the dir path. +/// After return, `app_buf` will always contain exactly the app name and `dir_buf` will always contain exactly the dir path. +/// Note: `app_buf` should not contain any leading path separators. +/// Note: If the dir is the cwd, dir_buf should be empty (len = 0). +fn windowsCreateProcessPathExt( + allocator: mem.Allocator, + dir_buf: *std.ArrayListUnmanaged(u16), + app_buf: *std.ArrayListUnmanaged(u16), + pathext: [:0]const u16, + cmd_line_cache: *WindowsCommandLineCache, + envp_ptr: ?[*]u16, + cwd_ptr: ?[*:0]u16, + lpStartupInfo: *windows.STARTUPINFOW, + lpProcessInformation: *windows.PROCESS_INFORMATION, +) !void { + const app_name_len = app_buf.items.len; + const dir_path_len = dir_buf.items.len; + + if (app_name_len == 0) return error.FileNotFound; + + defer app_buf.shrinkRetainingCapacity(app_name_len); + defer dir_buf.shrinkRetainingCapacity(dir_path_len); + + // The name of the game here is to avoid CreateProcessW calls at all costs, + // and only ever try calling it when we have a real candidate for execution. + // Secondarily, we want to minimize the number of syscalls used when checking + // for each PATHEXT-appended version of the app name. + // + // An overview of the technique used: + // - Open the search directory for iteration (either cwd or a path from PATH) + // - Use NtQueryDirectoryFile with a wildcard filename of `<app name>*` to + // check if anything that could possibly match either the unappended version + // of the app name or any of the versions with a PATHEXT value appended exists. + // - If the wildcard NtQueryDirectoryFile call found nothing, we can exit early + // without needing to use PATHEXT at all. + // + // This allows us to use a <open dir, NtQueryDirectoryFile, close dir> sequence + // for any directory that doesn't contain any possible matches, instead of having + // to use a separate look up for each individual filename combination (unappended + + // each PATHEXT appended). For directories where the wildcard *does* match something, + // we iterate the matches and take note of any that are either the unappended version, + // or a version with a supported PATHEXT appended. We then try calling CreateProcessW + // with the found versions in the appropriate order. + + var dir = dir: { + // needs to be null-terminated + try dir_buf.append(allocator, 0); + defer dir_buf.shrinkRetainingCapacity(dir_path_len); + const dir_path_z = dir_buf.items[0 .. dir_buf.items.len - 1 :0]; + const prefixed_path = try windows.wToPrefixedFileW(null, dir_path_z); + break :dir fs.cwd().openDirW(prefixed_path.span().ptr, .{ .iterate = true }) catch + return error.FileNotFound; + }; + defer dir.close(); + + // Add wildcard and null-terminator + try app_buf.append(allocator, '*'); + try app_buf.append(allocator, 0); + const app_name_wildcard = app_buf.items[0 .. app_buf.items.len - 1 :0]; + + // This 2048 is arbitrary, we just want it to be large enough to get multiple FILE_DIRECTORY_INFORMATION entries + // returned per NtQueryDirectoryFile call. + var file_information_buf: [2048]u8 align(@alignOf(windows.FILE_DIRECTORY_INFORMATION)) = undefined; + const file_info_maximum_single_entry_size = @sizeOf(windows.FILE_DIRECTORY_INFORMATION) + (windows.NAME_MAX * 2); + if (file_information_buf.len < file_info_maximum_single_entry_size) { + @compileError("file_information_buf must be large enough to contain at least one maximum size FILE_DIRECTORY_INFORMATION entry"); + } + var io_status: windows.IO_STATUS_BLOCK = undefined; + + const num_supported_pathext = @typeInfo(CreateProcessSupportedExtension).Enum.fields.len; + var pathext_seen = [_]bool{false} ** num_supported_pathext; + var any_pathext_seen = false; + var unappended_exists = false; + + // Fully iterate the wildcard matches via NtQueryDirectoryFile and take note of all versions + // of the app_name we should try to spawn. + // Note: This is necessary because the order of the files returned is filesystem-dependent: + // On NTFS, `blah.exe*` will always return `blah.exe` first if it exists. + // On FAT32, it's possible for something like `blah.exe.obj` to be returned first. + while (true) { + const app_name_len_bytes = std.math.cast(u16, app_name_wildcard.len * 2) orelse return error.NameTooLong; + var app_name_unicode_string = windows.UNICODE_STRING{ + .Length = app_name_len_bytes, + .MaximumLength = app_name_len_bytes, + .Buffer = @constCast(app_name_wildcard.ptr), + }; + const rc = windows.ntdll.NtQueryDirectoryFile( + dir.fd, + null, + null, + null, + &io_status, + &file_information_buf, + file_information_buf.len, + .FileDirectoryInformation, + windows.FALSE, // single result + &app_name_unicode_string, + windows.FALSE, // restart iteration + ); + + // If we get nothing with the wildcard, then we can just bail out + // as we know appending PATHEXT will not yield anything. + switch (rc) { + .SUCCESS => {}, + .NO_SUCH_FILE => return error.FileNotFound, + .NO_MORE_FILES => break, + .ACCESS_DENIED => return error.AccessDenied, + else => return windows.unexpectedStatus(rc), + } + + // According to the docs, this can only happen if there is not enough room in the + // buffer to write at least one complete FILE_DIRECTORY_INFORMATION entry. + // Therefore, this condition should not be possible to hit with the buffer size we use. + std.debug.assert(io_status.Information != 0); + + var it = windows.FileInformationIterator(windows.FILE_DIRECTORY_INFORMATION){ .buf = &file_information_buf }; + while (it.next()) |info| { + // Skip directories + if (info.FileAttributes & windows.FILE_ATTRIBUTE_DIRECTORY != 0) continue; + const filename = @as([*]u16, @ptrCast(&info.FileName))[0 .. info.FileNameLength / 2]; + // Because all results start with the app_name since we're using the wildcard `app_name*`, + // if the length is equal to app_name then this is an exact match + if (filename.len == app_name_len) { + // Note: We can't break early here because it's possible that the unappended version + // fails to spawn, in which case we still want to try the PATHEXT appended versions. + unappended_exists = true; + } else if (windowsCreateProcessSupportsExtension(filename[app_name_len..])) |pathext_ext| { + pathext_seen[@intFromEnum(pathext_ext)] = true; + any_pathext_seen = true; + } + } + } + + const unappended_err = unappended: { + if (unappended_exists) { + if (dir_path_len != 0) switch (dir_buf.items[dir_buf.items.len - 1]) { + '/', '\\' => {}, + else => try dir_buf.append(allocator, fs.path.sep), + }; + try dir_buf.appendSlice(allocator, app_buf.items[0..app_name_len]); + try dir_buf.append(allocator, 0); + const full_app_name = dir_buf.items[0 .. dir_buf.items.len - 1 :0]; + + const is_bat_or_cmd = bat_or_cmd: { + const app_name = app_buf.items[0..app_name_len]; + const ext_start = std.mem.lastIndexOfScalar(u16, app_name, '.') orelse break :bat_or_cmd false; + const ext = app_name[ext_start..]; + const ext_enum = windowsCreateProcessSupportsExtension(ext) orelse break :bat_or_cmd false; + switch (ext_enum) { + .cmd, .bat => break :bat_or_cmd true, + else => break :bat_or_cmd false, + } + }; + const cmd_line_w = if (is_bat_or_cmd) + try cmd_line_cache.scriptCommandLine(full_app_name) + else + try cmd_line_cache.commandLine(); + const app_name_w = if (is_bat_or_cmd) + try cmd_line_cache.cmdExePath() + else + full_app_name; + + if (windowsCreateProcess(app_name_w.ptr, cmd_line_w.ptr, envp_ptr, cwd_ptr, lpStartupInfo, lpProcessInformation)) |_| { + return; + } else |err| switch (err) { + error.FileNotFound, + error.AccessDenied, + => break :unappended err, + error.InvalidExe => { + // On InvalidExe, if the extension of the app name is .exe then + // it's treated as an unrecoverable error. Otherwise, it'll be + // skipped as normal. + const app_name = app_buf.items[0..app_name_len]; + const ext_start = std.mem.lastIndexOfScalar(u16, app_name, '.') orelse break :unappended err; + const ext = app_name[ext_start..]; + if (windows.eqlIgnoreCaseWTF16(ext, unicode.utf8ToUtf16LeStringLiteral(".EXE"))) { + return error.UnrecoverableInvalidExe; + } + break :unappended err; + }, + else => return err, + } + } + break :unappended error.FileNotFound; + }; + + if (!any_pathext_seen) return unappended_err; + + // Now try any PATHEXT appended versions that we've seen + var ext_it = mem.tokenizeScalar(u16, pathext, ';'); + while (ext_it.next()) |ext| { + const ext_enum = windowsCreateProcessSupportsExtension(ext) orelse continue; + if (!pathext_seen[@intFromEnum(ext_enum)]) continue; + + dir_buf.shrinkRetainingCapacity(dir_path_len); + if (dir_path_len != 0) switch (dir_buf.items[dir_buf.items.len - 1]) { + '/', '\\' => {}, + else => try dir_buf.append(allocator, fs.path.sep), + }; + try dir_buf.appendSlice(allocator, app_buf.items[0..app_name_len]); + try dir_buf.appendSlice(allocator, ext); + try dir_buf.append(allocator, 0); + const full_app_name = dir_buf.items[0 .. dir_buf.items.len - 1 :0]; + + const is_bat_or_cmd = switch (ext_enum) { + .cmd, .bat => true, + else => false, + }; + const cmd_line_w = if (is_bat_or_cmd) + try cmd_line_cache.scriptCommandLine(full_app_name) + else + try cmd_line_cache.commandLine(); + const app_name_w = if (is_bat_or_cmd) + try cmd_line_cache.cmdExePath() + else + full_app_name; + + if (windowsCreateProcess(app_name_w.ptr, cmd_line_w.ptr, envp_ptr, cwd_ptr, lpStartupInfo, lpProcessInformation)) |_| { + return; + } else |err| switch (err) { + error.FileNotFound => continue, + error.AccessDenied => continue, + error.InvalidExe => { + // On InvalidExe, if the extension of the app name is .exe then + // it's treated as an unrecoverable error. Otherwise, it'll be + // skipped as normal. + if (windows.eqlIgnoreCaseWTF16(ext, unicode.utf8ToUtf16LeStringLiteral(".EXE"))) { + return error.UnrecoverableInvalidExe; + } + continue; + }, + else => return err, + } + } + + return unappended_err; +} + +fn windowsCreateProcess( + app_name: [*:0]u16, + cmd_line: [*:0]u16, + envp_ptr: ?[*]u16, + cwd_ptr: ?[*:0]u16, + lpStartupInfo: *windows.STARTUPINFOW, + lpProcessInformation: *windows.PROCESS_INFORMATION, +) !void { + // TODO the docs for environment pointer say: + // > A pointer to the environment block for the new process. If this parameter + // > is NULL, the new process uses the environment of the calling process. + // > ... + // > An environment block can contain either Unicode or ANSI characters. If + // > the environment block pointed to by lpEnvironment contains Unicode + // > characters, be sure that dwCreationFlags includes CREATE_UNICODE_ENVIRONMENT. + // > If this parameter is NULL and the environment block of the parent process + // > contains Unicode characters, you must also ensure that dwCreationFlags + // > includes CREATE_UNICODE_ENVIRONMENT. + // This seems to imply that we have to somehow know whether our process parent passed + // CREATE_UNICODE_ENVIRONMENT if we want to pass NULL for the environment parameter. + // Since we do not know this information that would imply that we must not pass NULL + // for the parameter. + // However this would imply that programs compiled with -DUNICODE could not pass + // environment variables to programs that were not, which seems unlikely. + // More investigation is needed. + return windows.CreateProcessW( + app_name, + cmd_line, + null, + null, + windows.TRUE, + windows.CREATE_UNICODE_ENVIRONMENT, + @as(?*anyopaque, @ptrCast(envp_ptr)), + cwd_ptr, + lpStartupInfo, + lpProcessInformation, + ); +} + +fn windowsMakePipeIn(rd: *?windows.HANDLE, wr: *?windows.HANDLE, sattr: *const windows.SECURITY_ATTRIBUTES) !void { + var rd_h: windows.HANDLE = undefined; + var wr_h: windows.HANDLE = undefined; + try windows.CreatePipe(&rd_h, &wr_h, sattr); + errdefer windowsDestroyPipe(rd_h, wr_h); + try windows.SetHandleInformation(wr_h, windows.HANDLE_FLAG_INHERIT, 0); + rd.* = rd_h; + wr.* = wr_h; +} + +fn windowsDestroyPipe(rd: ?windows.HANDLE, wr: ?windows.HANDLE) void { + if (rd) |h| posix.close(h); + if (wr) |h| posix.close(h); +} + +fn windowsMakeAsyncPipe(rd: *?windows.HANDLE, wr: *?windows.HANDLE, sattr: *const windows.SECURITY_ATTRIBUTES) !void { + var tmp_bufw: [128]u16 = undefined; + + // Anonymous pipes are built upon Named pipes. + // https://docs.microsoft.com/en-us/windows/win32/api/namedpipeapi/nf-namedpipeapi-createpipe + // Asynchronous (overlapped) read and write operations are not supported by anonymous pipes. + // https://docs.microsoft.com/en-us/windows/win32/ipc/anonymous-pipe-operations + const pipe_path = blk: { + var tmp_buf: [128]u8 = undefined; + // Forge a random path for the pipe. + const pipe_path = std.fmt.bufPrintZ( + &tmp_buf, + "\\\\.\\pipe\\zig-childprocess-{d}-{d}", + .{ windows.GetCurrentProcessId(), pipe_name_counter.fetchAdd(1, .monotonic) }, + ) catch unreachable; + const len = std.unicode.wtf8ToWtf16Le(&tmp_bufw, pipe_path) catch unreachable; + tmp_bufw[len] = 0; + break :blk tmp_bufw[0..len :0]; + }; + + // Create the read handle that can be used with overlapped IO ops. + const read_handle = windows.kernel32.CreateNamedPipeW( + pipe_path.ptr, + windows.PIPE_ACCESS_INBOUND | windows.FILE_FLAG_OVERLAPPED, + windows.PIPE_TYPE_BYTE, + 1, + 4096, + 4096, + 0, + sattr, + ); + if (read_handle == windows.INVALID_HANDLE_VALUE) { + switch (windows.kernel32.GetLastError()) { + else => |err| return windows.unexpectedError(err), + } + } + errdefer posix.close(read_handle); + + var sattr_copy = sattr.*; + const write_handle = windows.kernel32.CreateFileW( + pipe_path.ptr, + windows.GENERIC_WRITE, + 0, + &sattr_copy, + windows.OPEN_EXISTING, + windows.FILE_ATTRIBUTE_NORMAL, + null, + ); + if (write_handle == windows.INVALID_HANDLE_VALUE) { + switch (windows.kernel32.GetLastError()) { + else => |err| return windows.unexpectedError(err), + } + } + errdefer posix.close(write_handle); + + try windows.SetHandleInformation(read_handle, windows.HANDLE_FLAG_INHERIT, 0); + + rd.* = read_handle; + wr.* = write_handle; +} + +var pipe_name_counter = std.atomic.Value(u32).init(1); + +// Should be kept in sync with `windowsCreateProcessSupportsExtension` +const CreateProcessSupportedExtension = enum { + bat, + cmd, + com, + exe, +}; + +/// Case-insensitive WTF-16 lookup +fn windowsCreateProcessSupportsExtension(ext: []const u16) ?CreateProcessSupportedExtension { + if (ext.len != 4) return null; + const State = enum { + start, + dot, + b, + ba, + c, + cm, + co, + e, + ex, + }; + var state: State = .start; + for (ext) |c| switch (state) { + .start => switch (c) { + '.' => state = .dot, + else => return null, + }, + .dot => switch (c) { + 'b', 'B' => state = .b, + 'c', 'C' => state = .c, + 'e', 'E' => state = .e, + else => return null, + }, + .b => switch (c) { + 'a', 'A' => state = .ba, + else => return null, + }, + .c => switch (c) { + 'm', 'M' => state = .cm, + 'o', 'O' => state = .co, + else => return null, + }, + .e => switch (c) { + 'x', 'X' => state = .ex, + else => return null, + }, + .ba => switch (c) { + 't', 'T' => return .bat, + else => return null, + }, + .cm => switch (c) { + 'd', 'D' => return .cmd, + else => return null, + }, + .co => switch (c) { + 'm', 'M' => return .com, + else => return null, + }, + .ex => switch (c) { + 'e', 'E' => return .exe, + else => return null, + }, + }; + return null; +} + +test windowsCreateProcessSupportsExtension { + try std.testing.expectEqual(CreateProcessSupportedExtension.exe, windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e' }).?); + try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null); +} + +/// Serializes argv into a WTF-16 encoded command-line string for use with CreateProcessW. +/// +/// Serialization is done on-demand and the result is cached in order to allow for: +/// - Only serializing the particular type of command line needed (`.bat`/`.cmd` +/// command line serialization is different from `.exe`/etc) +/// - Reusing the serialized command lines if necessary (i.e. if the execution +/// of a command fails and the PATH is going to be continued to be searched +/// for more candidates) +const WindowsCommandLineCache = struct { + cmd_line: ?[:0]u16 = null, + script_cmd_line: ?[:0]u16 = null, + cmd_exe_path: ?[:0]u16 = null, + argv: []const []const u8, + allocator: mem.Allocator, + + fn init(allocator: mem.Allocator, argv: []const []const u8) WindowsCommandLineCache { + return .{ + .allocator = allocator, + .argv = argv, + }; + } + + fn deinit(self: *WindowsCommandLineCache) void { + if (self.cmd_line) |cmd_line| self.allocator.free(cmd_line); + if (self.script_cmd_line) |script_cmd_line| self.allocator.free(script_cmd_line); + if (self.cmd_exe_path) |cmd_exe_path| self.allocator.free(cmd_exe_path); + } + + fn commandLine(self: *WindowsCommandLineCache) ![:0]u16 { + if (self.cmd_line == null) { + self.cmd_line = try argvToCommandLineWindows(self.allocator, self.argv); + } + return self.cmd_line.?; + } + + /// Not cached, since the path to the batch script will change during PATH searching. + /// `script_path` should be as qualified as possible, e.g. if the PATH is being searched, + /// then script_path should include both the search path and the script filename + /// (this allows avoiding cmd.exe having to search the PATH again). + fn scriptCommandLine(self: *WindowsCommandLineCache, script_path: []const u16) ![:0]u16 { + if (self.script_cmd_line) |v| self.allocator.free(v); + self.script_cmd_line = try argvToScriptCommandLineWindows( + self.allocator, + script_path, + self.argv[1..], + ); + return self.script_cmd_line.?; + } + + fn cmdExePath(self: *WindowsCommandLineCache) ![:0]u16 { + if (self.cmd_exe_path == null) { + self.cmd_exe_path = try windowsCmdExePath(self.allocator); + } + return self.cmd_exe_path.?; + } +}; + +/// Returns the absolute path of `cmd.exe` within the Windows system directory. +/// The caller owns the returned slice. +fn windowsCmdExePath(allocator: mem.Allocator) error{ OutOfMemory, Unexpected }![:0]u16 { + var buf = try std.ArrayListUnmanaged(u16).initCapacity(allocator, 128); + errdefer buf.deinit(allocator); + while (true) { + const unused_slice = buf.unusedCapacitySlice(); + // TODO: Get the system directory from PEB.ReadOnlyStaticServerData + const len = windows.kernel32.GetSystemDirectoryW(@ptrCast(unused_slice), @intCast(unused_slice.len)); + if (len == 0) { + switch (windows.kernel32.GetLastError()) { + else => |err| return windows.unexpectedError(err), + } + } + if (len > unused_slice.len) { + try buf.ensureUnusedCapacity(allocator, len); + } else { + buf.items.len = len; + break; + } + } + switch (buf.items[buf.items.len - 1]) { + '/', '\\' => {}, + else => try buf.append(allocator, fs.path.sep), + } + try buf.appendSlice(allocator, unicode.utf8ToUtf16LeStringLiteral("cmd.exe")); + return try buf.toOwnedSliceSentinel(allocator, 0); +} + +const ArgvToCommandLineError = error{ OutOfMemory, InvalidWtf8, InvalidArg0 }; + +/// Serializes `argv` to a Windows command-line string suitable for passing to a child process and +/// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice. +/// +/// To avoid arbitrary command execution, this function should not be used when spawning `.bat`/`.cmd` scripts. +/// https://flatt.tech/research/posts/batbadbut-you-cant-securely-execute-commands-on-windows/ +/// +/// When executing `.bat`/`.cmd` scripts, use `argvToScriptCommandLineWindows` instead. +fn argvToCommandLineWindows( + allocator: mem.Allocator, + argv: []const []const u8, +) ArgvToCommandLineError![:0]u16 { + var buf = std.ArrayList(u8).init(allocator); + defer buf.deinit(); + + if (argv.len != 0) { + const arg0 = argv[0]; + + // The first argument must be quoted if it contains spaces or ASCII control characters + // (excluding DEL). It also follows special quoting rules where backslashes have no special + // interpretation, which makes it impossible to pass certain first arguments containing + // double quotes to a child process without characters from the first argument leaking into + // subsequent ones (which could have security implications). + // + // Empty arguments technically don't need quotes, but we quote them anyway for maximum + // compatibility with different implementations of the 'CommandLineToArgvW' algorithm. + // + // Double quotes are illegal in paths on Windows, so for the sake of simplicity we reject + // all first arguments containing double quotes, even ones that we could theoretically + // serialize in unquoted form. + var needs_quotes = arg0.len == 0; + for (arg0) |c| { + if (c <= ' ') { + needs_quotes = true; + } else if (c == '"') { + return error.InvalidArg0; + } + } + if (needs_quotes) { + try buf.append('"'); + try buf.appendSlice(arg0); + try buf.append('"'); + } else { + try buf.appendSlice(arg0); + } + + for (argv[1..]) |arg| { + try buf.append(' '); + + // Subsequent arguments must be quoted if they contain spaces, tabs or double quotes, + // or if they are empty. For simplicity and for maximum compatibility with different + // implementations of the 'CommandLineToArgvW' algorithm, we also quote all ASCII + // control characters (again, excluding DEL). + needs_quotes = for (arg) |c| { + if (c <= ' ' or c == '"') { + break true; + } + } else arg.len == 0; + if (!needs_quotes) { + try buf.appendSlice(arg); + continue; + } + + try buf.append('"'); + var backslash_count: usize = 0; + for (arg) |byte| { + switch (byte) { + '\\' => { + backslash_count += 1; + }, + '"' => { + try buf.appendNTimes('\\', backslash_count * 2 + 1); + try buf.append('"'); + backslash_count = 0; + }, + else => { + try buf.appendNTimes('\\', backslash_count); + try buf.append(byte); + backslash_count = 0; + }, + } + } + try buf.appendNTimes('\\', backslash_count * 2); + try buf.append('"'); + } + } + + return try unicode.wtf8ToWtf16LeAllocZ(allocator, buf.items); +} + +test argvToCommandLineWindows { + const t = testArgvToCommandLineWindows; + + try t(&.{ + \\C:\Program Files\zig\zig.exe + , + \\run + , + \\.\src\main.zig + , + \\-target + , + \\x86_64-windows-gnu + , + \\-O + , + \\ReleaseSafe + , + \\-- + , + \\--emoji=๐ฟ + , + \\--eval=new Regex("Dwayne \"The Rock\" Johnson") + , + }, + \\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=๐ฟ "--eval=new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")" + ); + + try t(&.{}, ""); + try t(&.{""}, "\"\""); + try t(&.{" "}, "\" \""); + try t(&.{"\t"}, "\"\t\""); + try t(&.{"\x07"}, "\"\x07\""); + try t(&.{"๐ฆ"}, "๐ฆ"); + + try t( + &.{ "zig", "aa aa", "bb\tbb", "cc\ncc", "dd\r\ndd", "ee\x7Fee" }, + "zig \"aa aa\" \"bb\tbb\" \"cc\ncc\" \"dd\r\ndd\" ee\x7Fee", + ); + + try t( + &.{ "\\\\foo bar\\foo bar\\", "\\\\zig zag\\zig zag\\" }, + "\"\\\\foo bar\\foo bar\\\" \"\\\\zig zag\\zig zag\\\\\"", + ); + + try std.testing.expectError( + error.InvalidArg0, + argvToCommandLineWindows(std.testing.allocator, &.{"\"quotes\"quotes\""}), + ); + try std.testing.expectError( + error.InvalidArg0, + argvToCommandLineWindows(std.testing.allocator, &.{"quotes\"quotes"}), + ); + try std.testing.expectError( + error.InvalidArg0, + argvToCommandLineWindows(std.testing.allocator, &.{"q u o t e s \" q u o t e s"}), + ); +} + +fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []const u8) !void { + const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv); + defer std.testing.allocator.free(cmd_line_w); + + const cmd_line = try unicode.wtf16LeToWtf8Alloc(std.testing.allocator, cmd_line_w); + defer std.testing.allocator.free(cmd_line); + + try std.testing.expectEqualStrings(expected_cmd_line, cmd_line); +} + +const ArgvToScriptCommandLineError = error{ + OutOfMemory, + InvalidWtf8, + /// NUL (U+0000), LF (U+000A), CR (U+000D) are not allowed + /// within arguments when executing a `.bat`/`.cmd` script. + /// - NUL/LF signifiies end of arguments, so anything afterwards + /// would be lost after execution. + /// - CR is stripped by `cmd.exe`, so any CR codepoints + /// would be lost after execution. + InvalidBatchScriptArg, +}; + +/// Serializes `argv` to a Windows command-line string that uses `cmd.exe /c` and `cmd.exe`-specific +/// escaping rules. The caller owns the returned slice. +/// +/// Escapes `argv` using the suggested mitigation against arbitrary command execution from: +/// https://flatt.tech/research/posts/batbadbut-you-cant-securely-execute-commands-on-windows/ +/// +/// The return of this function will look like +/// `cmd.exe /d /e:ON /v:OFF /c "<escaped command line>"` +/// and should be used as the `lpCommandLine` of `CreateProcessW`, while the +/// return of `windowsCmdExePath` should be used as `lpApplicationName`. +/// +/// Should only be used when spawning `.bat`/`.cmd` scripts, see `argvToCommandLineWindows` otherwise. +/// The `.bat`/`.cmd` file must be known to both have the `.bat`/`.cmd` extension and exist on the filesystem. +fn argvToScriptCommandLineWindows( + allocator: mem.Allocator, + /// Path to the `.bat`/`.cmd` script. If this path is relative, it is assumed to be relative to the CWD. + /// The script must have been verified to exist at this path before calling this function. + script_path: []const u16, + /// Arguments, not including the script name itself. Expected to be encoded as WTF-8. + script_args: []const []const u8, +) ArgvToScriptCommandLineError![:0]u16 { + var buf = try std.ArrayList(u8).initCapacity(allocator, 64); + defer buf.deinit(); + + // `/d` disables execution of AutoRun commands. + // `/e:ON` and `/v:OFF` are needed for BatBadBut mitigation: + // > If delayed expansion is enabled via the registry value DelayedExpansion, + // > it must be disabled by explicitly calling cmd.exe with the /V:OFF option. + // > Escaping for % requires the command extension to be enabled. + // > If itโs disabled via the registry value EnableExtensions, it must be enabled with the /E:ON option. + // https://flatt.tech/research/posts/batbadbut-you-cant-securely-execute-commands-on-windows/ + buf.appendSliceAssumeCapacity("cmd.exe /d /e:ON /v:OFF /c \""); + + // Always quote the path to the script arg + buf.appendAssumeCapacity('"'); + // We always want the path to the batch script to include a path separator in order to + // avoid cmd.exe searching the PATH for the script. This is not part of the arbitrary + // command execution mitigation, we just know exactly what script we want to execute + // at this point, and potentially making cmd.exe re-find it is unnecessary. + // + // If the script path does not have a path separator, then we know its relative to CWD and + // we can just put `.\` in the front. + if (mem.indexOfAny(u16, script_path, &[_]u16{ mem.nativeToLittle(u16, '\\'), mem.nativeToLittle(u16, '/') }) == null) { + try buf.appendSlice(".\\"); + } + // Note that we don't do any escaping/mitigations for this argument, since the relevant + // characters (", %, etc) are illegal in file paths and this function should only be called + // with script paths that have been verified to exist. + try unicode.wtf16LeToWtf8ArrayList(&buf, script_path); + buf.appendAssumeCapacity('"'); + + for (script_args) |arg| { + // Literal carriage returns get stripped when run through cmd.exe + // and NUL/newlines act as 'end of command.' Because of this, it's basically + // always a mistake to include these characters in argv, so it's + // an error condition in order to ensure that the return of this + // function can always roundtrip through cmd.exe. + if (std.mem.indexOfAny(u8, arg, "\x00\r\n") != null) { + return error.InvalidBatchScriptArg; + } + + // Separate args with a space. + try buf.append(' '); + + // Need to quote if the argument is empty (otherwise the arg would just be lost) + // or if the last character is a `\`, since then something like "%~2" in a .bat + // script would cause the closing " to be escaped which we don't want. + var needs_quotes = arg.len == 0 or arg[arg.len - 1] == '\\'; + if (!needs_quotes) { + for (arg) |c| { + switch (c) { + // Known good characters that don't need to be quoted + 'A'...'Z', 'a'...'z', '0'...'9', '#', '$', '*', '+', '-', '.', '/', ':', '?', '@', '\\', '_' => {}, + // When in doubt, quote + else => { + needs_quotes = true; + break; + }, + } + } + } + if (needs_quotes) { + try buf.append('"'); + } + var backslashes: usize = 0; + for (arg) |c| { + switch (c) { + '\\' => { + backslashes += 1; + }, + '"' => { + try buf.appendNTimes('\\', backslashes); + try buf.append('"'); + backslashes = 0; + }, + // Replace `%` with `%%cd:~,%`. + // + // cmd.exe allows extracting a substring from an environment + // variable with the syntax: `%foo:~<start_index>,<end_index>%`. + // Therefore, `%cd:~,%` will always expand to an empty string + // since both the start and end index are blank, and it is assumed + // that `%cd%` is always available since it is a built-in variable + // that corresponds to the current directory. + // + // This means that replacing `%foo%` with `%%cd:~,%foo%%cd:~,%` + // will stop `%foo%` from being expanded and *after* expansion + // we'll still be left with `%foo%` (the literal string). + '%' => { + // the trailing `%` is appended outside the switch + try buf.appendSlice("%%cd:~,"); + backslashes = 0; + }, + else => { + backslashes = 0; + }, + } + try buf.append(c); + } + if (needs_quotes) { + try buf.appendNTimes('\\', backslashes); + try buf.append('"'); + } + } + + try buf.append('"'); + + return try unicode.wtf8ToWtf16LeAllocZ(allocator, buf.items); +} |
