diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2024-02-25 01:00:25 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-02-25 01:00:25 -0800 |
| commit | 6c2eb0f131588be111652a755a4492ff72d16440 (patch) | |
| tree | 0d317950da0694df32c4eb088278662f159e8736 /lib/std/process.zig | |
| parent | 63ea3e172e2788856cfb69b2f6085930a1c69d5b (diff) | |
| parent | 9fec608b3bbe3c00528e01bd09aa29f9b9f97415 (diff) | |
| download | zig-6c2eb0f131588be111652a755a4492ff72d16440.tar.gz zig-6c2eb0f131588be111652a755a4492ff72d16440.zip | |
Merge pull request #19005 from squeek502/wtf
Fix handling of Windows (WTF-16) and WASI (UTF-8) paths, etc
Diffstat (limited to 'lib/std/process.zig')
| -rw-r--r-- | lib/std/process.zig | 156 |
1 files changed, 92 insertions, 64 deletions
diff --git a/lib/std/process.zig b/lib/std/process.zig index 397e6971e6..5360a96521 100644 --- a/lib/std/process.zig +++ b/lib/std/process.zig @@ -16,11 +16,15 @@ pub const changeCurDir = os.chdir; pub const changeCurDirC = os.chdirC; /// The result is a slice of `out_buffer`, from index `0`. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. pub fn getCwd(out_buffer: []u8) ![]u8 { return os.getcwd(out_buffer); } /// Caller must free the returned memory. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. pub fn getCwdAlloc(allocator: Allocator) ![]u8 { // The use of MAX_PATH_BYTES here is just a heuristic: most paths will fit // in stack_buf, avoiding an extra allocation in the common case. @@ -76,7 +80,7 @@ pub const EnvMap = struct { _ = self; if (builtin.os.tag == .windows) { var h = std.hash.Wyhash.init(0); - var it = std.unicode.Utf8View.initUnchecked(s).iterator(); + var it = std.unicode.Wtf8View.initUnchecked(s).iterator(); while (it.nextCodepoint()) |cp| { const cp_upper = upcase(cp); h.update(&[_]u8{ @@ -93,8 +97,8 @@ pub const EnvMap = struct { pub fn eql(self: @This(), a: []const u8, b: []const u8) bool { _ = self; if (builtin.os.tag == .windows) { - var it_a = std.unicode.Utf8View.initUnchecked(a).iterator(); - var it_b = std.unicode.Utf8View.initUnchecked(b).iterator(); + var it_a = std.unicode.Wtf8View.initUnchecked(a).iterator(); + var it_b = std.unicode.Wtf8View.initUnchecked(b).iterator(); while (true) { const c_a = it_a.nextCodepoint() orelse break; const c_b = it_b.nextCodepoint() orelse return false; @@ -129,8 +133,9 @@ pub const EnvMap = struct { /// Same as `put` but the key and value become owned by the EnvMap rather /// than being copied. /// If `putMove` fails, the ownership of key and value does not transfer. - /// On Windows `key` must be a valid UTF-8 string. + /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string. pub fn putMove(self: *EnvMap, key: []u8, value: []u8) !void { + assert(std.unicode.wtf8ValidateSlice(key)); const get_or_put = try self.hash_map.getOrPut(key); if (get_or_put.found_existing) { self.free(get_or_put.key_ptr.*); @@ -141,8 +146,9 @@ pub const EnvMap = struct { } /// `key` and `value` are copied into the EnvMap. - /// On Windows `key` must be a valid UTF-8 string. + /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string. pub fn put(self: *EnvMap, key: []const u8, value: []const u8) !void { + assert(std.unicode.wtf8ValidateSlice(key)); const value_copy = try self.copy(value); errdefer self.free(value_copy); const get_or_put = try self.hash_map.getOrPut(key); @@ -159,23 +165,26 @@ pub const EnvMap = struct { /// Find the address of the value associated with a key. /// The returned pointer is invalidated if the map resizes. - /// On Windows `key` must be a valid UTF-8 string. + /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string. pub fn getPtr(self: EnvMap, key: []const u8) ?*[]const u8 { + assert(std.unicode.wtf8ValidateSlice(key)); return self.hash_map.getPtr(key); } /// Return the map's copy of the value associated with /// a key. The returned string is invalidated if this /// key is removed from the map. - /// On Windows `key` must be a valid UTF-8 string. + /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string. pub fn get(self: EnvMap, key: []const u8) ?[]const u8 { + assert(std.unicode.wtf8ValidateSlice(key)); return self.hash_map.get(key); } /// Removes the item from the map and frees its value. /// This invalidates the value returned by get() for this key. - /// On Windows `key` must be a valid UTF-8 string. + /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string. pub fn remove(self: *EnvMap, key: []const u8) void { + assert(std.unicode.wtf8ValidateSlice(key)); const kv = self.hash_map.fetchRemove(key) orelse return; self.free(kv.key); self.free(kv.value); @@ -239,18 +248,34 @@ test "EnvMap" { try testing.expectEqual(@as(EnvMap.Size, 1), env.count()); - // test Unicode case-insensitivity on Windows if (builtin.os.tag == .windows) { + // test Unicode case-insensitivity on Windows try env.put("КИРиллИЦА", "something else"); try testing.expectEqualStrings("something else", env.get("кириллица").?); + + // and WTF-8 that's not valid UTF-8 + const wtf8_with_surrogate_pair = try std.unicode.wtf16LeToWtf8Alloc(testing.allocator, &[_]u16{ + std.mem.nativeToLittle(u16, 0xD83D), // unpaired high surrogate + }); + defer testing.allocator.free(wtf8_with_surrogate_pair); + + try env.put(wtf8_with_surrogate_pair, wtf8_with_surrogate_pair); + try testing.expectEqualSlices(u8, wtf8_with_surrogate_pair, env.get(wtf8_with_surrogate_pair).?); } } +pub const GetEnvMapError = error{ + OutOfMemory, + /// WASI-only. `environ_sizes_get` or `environ_get` + /// failed for an unexpected reason. + Unexpected, +}; + /// Returns a snapshot of the environment variables of the current process. /// Any modifications to the resulting EnvMap will not be reflected in the environment, and /// likewise, any future modifications to the environment will not be reflected in the EnvMap. /// Caller owns resulting `EnvMap` and should call its `deinit` fn when done. -pub fn getEnvMap(allocator: Allocator) !EnvMap { +pub fn getEnvMap(allocator: Allocator) GetEnvMapError!EnvMap { var result = EnvMap.init(allocator); errdefer result.deinit(); @@ -269,7 +294,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap { while (ptr[i] != 0 and ptr[i] != '=') : (i += 1) {} const key_w = ptr[key_start..i]; - const key = try std.unicode.utf16leToUtf8Alloc(allocator, key_w); + const key = try std.unicode.wtf16LeToWtf8Alloc(allocator, key_w); errdefer allocator.free(key); if (ptr[i] == '=') i += 1; @@ -277,7 +302,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap { const value_start = i; while (ptr[i] != 0) : (i += 1) {} const value_w = ptr[value_start..i]; - const value = try std.unicode.utf16leToUtf8Alloc(allocator, value_w); + const value = try std.unicode.wtf16LeToWtf8Alloc(allocator, value_w); errdefer allocator.free(value); i += 1; // skip over null byte @@ -355,25 +380,28 @@ pub const GetEnvVarOwnedError = error{ OutOfMemory, EnvironmentVariableNotFound, - /// See https://github.com/ziglang/zig/issues/1774 - InvalidUtf8, + /// On Windows, environment variable keys provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, }; /// Caller must free returned memory. +/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/), +/// then `error.InvalidWtf8` is returned. +/// On Windows, the value is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the value is an opaque sequence of bytes with no particular encoding. pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError![]u8 { if (builtin.os.tag == .windows) { const result_w = blk: { - const key_w = try std.unicode.utf8ToUtf16LeWithNull(allocator, key); - defer allocator.free(key_w); + var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator); + const stack_allocator = stack_alloc.get(); + const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key); + defer stack_allocator.free(key_w); break :blk std.os.getenvW(key_w) orelse return error.EnvironmentVariableNotFound; }; - return std.unicode.utf16leToUtf8Alloc(allocator, result_w) catch |err| switch (err) { - error.DanglingSurrogateHalf => return error.InvalidUtf8, - error.ExpectedSecondSurrogateHalf => return error.InvalidUtf8, - error.UnexpectedSecondSurrogateHalf => return error.InvalidUtf8, - else => |e| return e, - }; + // wtf16LeToWtf8Alloc can only fail with OutOfMemory + return std.unicode.wtf16LeToWtf8Alloc(allocator, result_w); } else if (builtin.os.tag == .wasi and !builtin.link_libc) { var envmap = getEnvMap(allocator) catch return error.OutOfMemory; defer envmap.deinit(); @@ -385,6 +413,7 @@ pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError } } +/// On Windows, `key` must be valid UTF-8. pub fn hasEnvVarConstant(comptime key: []const u8) bool { if (builtin.os.tag == .windows) { const key_w = comptime std.unicode.utf8ToUtf16LeStringLiteral(key); @@ -396,11 +425,22 @@ pub fn hasEnvVarConstant(comptime key: []const u8) bool { } } -pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool { +pub const HasEnvVarError = error{ + OutOfMemory, + + /// On Windows, environment variable keys provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, +}; + +/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/), +/// then `error.InvalidWtf8` is returned. +pub fn hasEnvVar(allocator: Allocator, key: []const u8) HasEnvVarError!bool { if (builtin.os.tag == .windows) { var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator); - const key_w = try std.unicode.utf8ToUtf16LeWithNull(stack_alloc.get(), key); - defer stack_alloc.allocator.free(key_w); + const stack_allocator = stack_alloc.get(); + const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key); + defer stack_allocator.free(key_w); return std.os.getenvW(key_w) != null; } else if (builtin.os.tag == .wasi and !builtin.link_libc) { var envmap = getEnvMap(allocator) catch return error.OutOfMemory; @@ -411,9 +451,22 @@ pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool } } -test "os.getEnvVarOwned" { - const ga = std.testing.allocator; - try testing.expectError(error.EnvironmentVariableNotFound, getEnvVarOwned(ga, "BADENV")); +test getEnvVarOwned { + try testing.expectError( + error.EnvironmentVariableNotFound, + getEnvVarOwned(std.testing.allocator, "BADENV"), + ); +} + +test hasEnvVarConstant { + if (builtin.os.tag == .wasi and !builtin.link_libc) return error.SkipZigTest; + + try testing.expect(!hasEnvVarConstant("BADENV")); +} + +test hasEnvVar { + const has_env = try hasEnvVar(std.testing.allocator, "BADENV"); + try testing.expect(!has_env); } pub const ArgIteratorPosix = struct { @@ -531,6 +584,7 @@ pub const ArgIteratorWasi = struct { pub const ArgIteratorWindows = struct { allocator: Allocator, /// Owned by the iterator. + /// Encoded as WTF-8. cmd_line: []const u8, index: usize = 0, /// Owned by the iterator. Long enough to hold the entire `cmd_line` plus a null terminator. @@ -538,20 +592,14 @@ pub const ArgIteratorWindows = struct { start: usize = 0, end: usize = 0, - pub const InitError = error{ OutOfMemory, InvalidCmdLine }; + pub const InitError = error{OutOfMemory}; - /// `cmd_line_w` *must* be an UTF16-LE-encoded string. + /// `cmd_line_w` *must* be a WTF16-LE-encoded string. /// - /// The iterator makes a copy of `cmd_line_w` converted UTF-8 and keeps it; it does *not* take + /// The iterator makes a copy of `cmd_line_w` converted WTF-8 and keeps it; it does *not* take /// ownership of `cmd_line_w`. pub fn init(allocator: Allocator, cmd_line_w: [*:0]const u16) InitError!ArgIteratorWindows { - const cmd_line = std.unicode.utf16leToUtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0)) catch |err| switch (err) { - error.DanglingSurrogateHalf, - error.ExpectedSecondSurrogateHalf, - error.UnexpectedSecondSurrogateHalf, - => return error.InvalidCmdLine, - error.OutOfMemory => return error.OutOfMemory, - }; + const cmd_line = try std.unicode.wtf16LeToWtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0)); errdefer allocator.free(cmd_line); const buffer = try allocator.alloc(u8, cmd_line.len + 1); @@ -566,6 +614,7 @@ pub const ArgIteratorWindows = struct { /// Returns the next argument and advances the iterator. Returns `null` if at the end of the /// command-line string. The iterator owns the returned slice. + /// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). pub fn next(self: *ArgIteratorWindows) ?[:0]const u8 { return self.nextWithStrategy(next_strategy); } @@ -777,7 +826,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type { pub const Self = @This(); pub const InitError = error{OutOfMemory}; - pub const InitUtf16leError = error{ OutOfMemory, InvalidCmdLine }; /// cmd_line_utf8 MUST remain valid and constant while using this instance pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self { @@ -805,30 +853,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type { }; } - /// cmd_line_utf16le MUST be encoded UTF16-LE, and is converted to UTF-8 in an internal buffer - pub fn initUtf16le(allocator: Allocator, cmd_line_utf16le: [*:0]const u16) InitUtf16leError!Self { - const utf16le_slice = mem.sliceTo(cmd_line_utf16le, 0); - const cmd_line = std.unicode.utf16leToUtf8Alloc(allocator, utf16le_slice) catch |err| switch (err) { - error.ExpectedSecondSurrogateHalf, - error.DanglingSurrogateHalf, - error.UnexpectedSecondSurrogateHalf, - => return error.InvalidCmdLine, - - error.OutOfMemory => return error.OutOfMemory, - }; - errdefer allocator.free(cmd_line); - - const buffer = try allocator.alloc(u8, cmd_line.len + 1); - errdefer allocator.free(buffer); - - return Self{ - .allocator = allocator, - .cmd_line = cmd_line, - .free_cmd_line_on_deinit = true, - .buffer = buffer, - }; - } - // Skips over whitespace in the cmd_line. // Returns false if the terminating sentinel is reached, true otherwise. // Also skips over comments (if supported). @@ -1021,6 +1045,8 @@ pub const ArgIterator = struct { /// Get the next argument. Returns 'null' if we are at the end. /// Returned slice is pointing to the iterator's internal buffer. + /// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). + /// On other platforms, the result is an opaque sequence of bytes with no particular encoding. pub fn next(self: *ArgIterator) ?([:0]const u8) { return self.inner.next(); } @@ -1057,6 +1083,8 @@ pub fn argsWithAllocator(allocator: Allocator) ArgIterator.InitError!ArgIterator } /// Caller must call argsFree on result. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. pub fn argsAlloc(allocator: Allocator) ![][:0]u8 { // TODO refactor to only make 1 allocation. var it = try argsWithAllocator(allocator); @@ -1201,7 +1229,7 @@ test "ArgIteratorWindows" { } fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void { - const cmd_line_w = try std.unicode.utf8ToUtf16LeWithNull(testing.allocator, cmd_line); + const cmd_line_w = try std.unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line); defer testing.allocator.free(cmd_line_w); // next |
