aboutsummaryrefslogtreecommitdiff
path: root/lib/std/process.zig
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2024-02-25 01:00:25 -0800
committerGitHub <noreply@github.com>2024-02-25 01:00:25 -0800
commit6c2eb0f131588be111652a755a4492ff72d16440 (patch)
tree0d317950da0694df32c4eb088278662f159e8736 /lib/std/process.zig
parent63ea3e172e2788856cfb69b2f6085930a1c69d5b (diff)
parent9fec608b3bbe3c00528e01bd09aa29f9b9f97415 (diff)
downloadzig-6c2eb0f131588be111652a755a4492ff72d16440.tar.gz
zig-6c2eb0f131588be111652a755a4492ff72d16440.zip
Merge pull request #19005 from squeek502/wtf
Fix handling of Windows (WTF-16) and WASI (UTF-8) paths, etc
Diffstat (limited to 'lib/std/process.zig')
-rw-r--r--lib/std/process.zig156
1 files changed, 92 insertions, 64 deletions
diff --git a/lib/std/process.zig b/lib/std/process.zig
index 397e6971e6..5360a96521 100644
--- a/lib/std/process.zig
+++ b/lib/std/process.zig
@@ -16,11 +16,15 @@ pub const changeCurDir = os.chdir;
pub const changeCurDirC = os.chdirC;
/// The result is a slice of `out_buffer`, from index `0`.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn getCwd(out_buffer: []u8) ![]u8 {
return os.getcwd(out_buffer);
}
/// Caller must free the returned memory.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn getCwdAlloc(allocator: Allocator) ![]u8 {
// The use of MAX_PATH_BYTES here is just a heuristic: most paths will fit
// in stack_buf, avoiding an extra allocation in the common case.
@@ -76,7 +80,7 @@ pub const EnvMap = struct {
_ = self;
if (builtin.os.tag == .windows) {
var h = std.hash.Wyhash.init(0);
- var it = std.unicode.Utf8View.initUnchecked(s).iterator();
+ var it = std.unicode.Wtf8View.initUnchecked(s).iterator();
while (it.nextCodepoint()) |cp| {
const cp_upper = upcase(cp);
h.update(&[_]u8{
@@ -93,8 +97,8 @@ pub const EnvMap = struct {
pub fn eql(self: @This(), a: []const u8, b: []const u8) bool {
_ = self;
if (builtin.os.tag == .windows) {
- var it_a = std.unicode.Utf8View.initUnchecked(a).iterator();
- var it_b = std.unicode.Utf8View.initUnchecked(b).iterator();
+ var it_a = std.unicode.Wtf8View.initUnchecked(a).iterator();
+ var it_b = std.unicode.Wtf8View.initUnchecked(b).iterator();
while (true) {
const c_a = it_a.nextCodepoint() orelse break;
const c_b = it_b.nextCodepoint() orelse return false;
@@ -129,8 +133,9 @@ pub const EnvMap = struct {
/// Same as `put` but the key and value become owned by the EnvMap rather
/// than being copied.
/// If `putMove` fails, the ownership of key and value does not transfer.
- /// On Windows `key` must be a valid UTF-8 string.
+ /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn putMove(self: *EnvMap, key: []u8, value: []u8) !void {
+ assert(std.unicode.wtf8ValidateSlice(key));
const get_or_put = try self.hash_map.getOrPut(key);
if (get_or_put.found_existing) {
self.free(get_or_put.key_ptr.*);
@@ -141,8 +146,9 @@ pub const EnvMap = struct {
}
/// `key` and `value` are copied into the EnvMap.
- /// On Windows `key` must be a valid UTF-8 string.
+ /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn put(self: *EnvMap, key: []const u8, value: []const u8) !void {
+ assert(std.unicode.wtf8ValidateSlice(key));
const value_copy = try self.copy(value);
errdefer self.free(value_copy);
const get_or_put = try self.hash_map.getOrPut(key);
@@ -159,23 +165,26 @@ pub const EnvMap = struct {
/// Find the address of the value associated with a key.
/// The returned pointer is invalidated if the map resizes.
- /// On Windows `key` must be a valid UTF-8 string.
+ /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn getPtr(self: EnvMap, key: []const u8) ?*[]const u8 {
+ assert(std.unicode.wtf8ValidateSlice(key));
return self.hash_map.getPtr(key);
}
/// Return the map's copy of the value associated with
/// a key. The returned string is invalidated if this
/// key is removed from the map.
- /// On Windows `key` must be a valid UTF-8 string.
+ /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn get(self: EnvMap, key: []const u8) ?[]const u8 {
+ assert(std.unicode.wtf8ValidateSlice(key));
return self.hash_map.get(key);
}
/// Removes the item from the map and frees its value.
/// This invalidates the value returned by get() for this key.
- /// On Windows `key` must be a valid UTF-8 string.
+ /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn remove(self: *EnvMap, key: []const u8) void {
+ assert(std.unicode.wtf8ValidateSlice(key));
const kv = self.hash_map.fetchRemove(key) orelse return;
self.free(kv.key);
self.free(kv.value);
@@ -239,18 +248,34 @@ test "EnvMap" {
try testing.expectEqual(@as(EnvMap.Size, 1), env.count());
- // test Unicode case-insensitivity on Windows
if (builtin.os.tag == .windows) {
+ // test Unicode case-insensitivity on Windows
try env.put("КИРиллИЦА", "something else");
try testing.expectEqualStrings("something else", env.get("кириллица").?);
+
+ // and WTF-8 that's not valid UTF-8
+ const wtf8_with_surrogate_pair = try std.unicode.wtf16LeToWtf8Alloc(testing.allocator, &[_]u16{
+ std.mem.nativeToLittle(u16, 0xD83D), // unpaired high surrogate
+ });
+ defer testing.allocator.free(wtf8_with_surrogate_pair);
+
+ try env.put(wtf8_with_surrogate_pair, wtf8_with_surrogate_pair);
+ try testing.expectEqualSlices(u8, wtf8_with_surrogate_pair, env.get(wtf8_with_surrogate_pair).?);
}
}
+pub const GetEnvMapError = error{
+ OutOfMemory,
+ /// WASI-only. `environ_sizes_get` or `environ_get`
+ /// failed for an unexpected reason.
+ Unexpected,
+};
+
/// Returns a snapshot of the environment variables of the current process.
/// Any modifications to the resulting EnvMap will not be reflected in the environment, and
/// likewise, any future modifications to the environment will not be reflected in the EnvMap.
/// Caller owns resulting `EnvMap` and should call its `deinit` fn when done.
-pub fn getEnvMap(allocator: Allocator) !EnvMap {
+pub fn getEnvMap(allocator: Allocator) GetEnvMapError!EnvMap {
var result = EnvMap.init(allocator);
errdefer result.deinit();
@@ -269,7 +294,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap {
while (ptr[i] != 0 and ptr[i] != '=') : (i += 1) {}
const key_w = ptr[key_start..i];
- const key = try std.unicode.utf16leToUtf8Alloc(allocator, key_w);
+ const key = try std.unicode.wtf16LeToWtf8Alloc(allocator, key_w);
errdefer allocator.free(key);
if (ptr[i] == '=') i += 1;
@@ -277,7 +302,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap {
const value_start = i;
while (ptr[i] != 0) : (i += 1) {}
const value_w = ptr[value_start..i];
- const value = try std.unicode.utf16leToUtf8Alloc(allocator, value_w);
+ const value = try std.unicode.wtf16LeToWtf8Alloc(allocator, value_w);
errdefer allocator.free(value);
i += 1; // skip over null byte
@@ -355,25 +380,28 @@ pub const GetEnvVarOwnedError = error{
OutOfMemory,
EnvironmentVariableNotFound,
- /// See https://github.com/ziglang/zig/issues/1774
- InvalidUtf8,
+ /// On Windows, environment variable keys provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
};
/// Caller must free returned memory.
+/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/),
+/// then `error.InvalidWtf8` is returned.
+/// On Windows, the value is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the value is an opaque sequence of bytes with no particular encoding.
pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError![]u8 {
if (builtin.os.tag == .windows) {
const result_w = blk: {
- const key_w = try std.unicode.utf8ToUtf16LeWithNull(allocator, key);
- defer allocator.free(key_w);
+ var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator);
+ const stack_allocator = stack_alloc.get();
+ const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key);
+ defer stack_allocator.free(key_w);
break :blk std.os.getenvW(key_w) orelse return error.EnvironmentVariableNotFound;
};
- return std.unicode.utf16leToUtf8Alloc(allocator, result_w) catch |err| switch (err) {
- error.DanglingSurrogateHalf => return error.InvalidUtf8,
- error.ExpectedSecondSurrogateHalf => return error.InvalidUtf8,
- error.UnexpectedSecondSurrogateHalf => return error.InvalidUtf8,
- else => |e| return e,
- };
+ // wtf16LeToWtf8Alloc can only fail with OutOfMemory
+ return std.unicode.wtf16LeToWtf8Alloc(allocator, result_w);
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
var envmap = getEnvMap(allocator) catch return error.OutOfMemory;
defer envmap.deinit();
@@ -385,6 +413,7 @@ pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError
}
}
+/// On Windows, `key` must be valid UTF-8.
pub fn hasEnvVarConstant(comptime key: []const u8) bool {
if (builtin.os.tag == .windows) {
const key_w = comptime std.unicode.utf8ToUtf16LeStringLiteral(key);
@@ -396,11 +425,22 @@ pub fn hasEnvVarConstant(comptime key: []const u8) bool {
}
}
-pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool {
+pub const HasEnvVarError = error{
+ OutOfMemory,
+
+ /// On Windows, environment variable keys provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
+};
+
+/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/),
+/// then `error.InvalidWtf8` is returned.
+pub fn hasEnvVar(allocator: Allocator, key: []const u8) HasEnvVarError!bool {
if (builtin.os.tag == .windows) {
var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator);
- const key_w = try std.unicode.utf8ToUtf16LeWithNull(stack_alloc.get(), key);
- defer stack_alloc.allocator.free(key_w);
+ const stack_allocator = stack_alloc.get();
+ const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key);
+ defer stack_allocator.free(key_w);
return std.os.getenvW(key_w) != null;
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
var envmap = getEnvMap(allocator) catch return error.OutOfMemory;
@@ -411,9 +451,22 @@ pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool
}
}
-test "os.getEnvVarOwned" {
- const ga = std.testing.allocator;
- try testing.expectError(error.EnvironmentVariableNotFound, getEnvVarOwned(ga, "BADENV"));
+test getEnvVarOwned {
+ try testing.expectError(
+ error.EnvironmentVariableNotFound,
+ getEnvVarOwned(std.testing.allocator, "BADENV"),
+ );
+}
+
+test hasEnvVarConstant {
+ if (builtin.os.tag == .wasi and !builtin.link_libc) return error.SkipZigTest;
+
+ try testing.expect(!hasEnvVarConstant("BADENV"));
+}
+
+test hasEnvVar {
+ const has_env = try hasEnvVar(std.testing.allocator, "BADENV");
+ try testing.expect(!has_env);
}
pub const ArgIteratorPosix = struct {
@@ -531,6 +584,7 @@ pub const ArgIteratorWasi = struct {
pub const ArgIteratorWindows = struct {
allocator: Allocator,
/// Owned by the iterator.
+ /// Encoded as WTF-8.
cmd_line: []const u8,
index: usize = 0,
/// Owned by the iterator. Long enough to hold the entire `cmd_line` plus a null terminator.
@@ -538,20 +592,14 @@ pub const ArgIteratorWindows = struct {
start: usize = 0,
end: usize = 0,
- pub const InitError = error{ OutOfMemory, InvalidCmdLine };
+ pub const InitError = error{OutOfMemory};
- /// `cmd_line_w` *must* be an UTF16-LE-encoded string.
+ /// `cmd_line_w` *must* be a WTF16-LE-encoded string.
///
- /// The iterator makes a copy of `cmd_line_w` converted UTF-8 and keeps it; it does *not* take
+ /// The iterator makes a copy of `cmd_line_w` converted WTF-8 and keeps it; it does *not* take
/// ownership of `cmd_line_w`.
pub fn init(allocator: Allocator, cmd_line_w: [*:0]const u16) InitError!ArgIteratorWindows {
- const cmd_line = std.unicode.utf16leToUtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0)) catch |err| switch (err) {
- error.DanglingSurrogateHalf,
- error.ExpectedSecondSurrogateHalf,
- error.UnexpectedSecondSurrogateHalf,
- => return error.InvalidCmdLine,
- error.OutOfMemory => return error.OutOfMemory,
- };
+ const cmd_line = try std.unicode.wtf16LeToWtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0));
errdefer allocator.free(cmd_line);
const buffer = try allocator.alloc(u8, cmd_line.len + 1);
@@ -566,6 +614,7 @@ pub const ArgIteratorWindows = struct {
/// Returns the next argument and advances the iterator. Returns `null` if at the end of the
/// command-line string. The iterator owns the returned slice.
+ /// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
pub fn next(self: *ArgIteratorWindows) ?[:0]const u8 {
return self.nextWithStrategy(next_strategy);
}
@@ -777,7 +826,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type {
pub const Self = @This();
pub const InitError = error{OutOfMemory};
- pub const InitUtf16leError = error{ OutOfMemory, InvalidCmdLine };
/// cmd_line_utf8 MUST remain valid and constant while using this instance
pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {
@@ -805,30 +853,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type {
};
}
- /// cmd_line_utf16le MUST be encoded UTF16-LE, and is converted to UTF-8 in an internal buffer
- pub fn initUtf16le(allocator: Allocator, cmd_line_utf16le: [*:0]const u16) InitUtf16leError!Self {
- const utf16le_slice = mem.sliceTo(cmd_line_utf16le, 0);
- const cmd_line = std.unicode.utf16leToUtf8Alloc(allocator, utf16le_slice) catch |err| switch (err) {
- error.ExpectedSecondSurrogateHalf,
- error.DanglingSurrogateHalf,
- error.UnexpectedSecondSurrogateHalf,
- => return error.InvalidCmdLine,
-
- error.OutOfMemory => return error.OutOfMemory,
- };
- errdefer allocator.free(cmd_line);
-
- const buffer = try allocator.alloc(u8, cmd_line.len + 1);
- errdefer allocator.free(buffer);
-
- return Self{
- .allocator = allocator,
- .cmd_line = cmd_line,
- .free_cmd_line_on_deinit = true,
- .buffer = buffer,
- };
- }
-
// Skips over whitespace in the cmd_line.
// Returns false if the terminating sentinel is reached, true otherwise.
// Also skips over comments (if supported).
@@ -1021,6 +1045,8 @@ pub const ArgIterator = struct {
/// Get the next argument. Returns 'null' if we are at the end.
/// Returned slice is pointing to the iterator's internal buffer.
+ /// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+ /// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn next(self: *ArgIterator) ?([:0]const u8) {
return self.inner.next();
}
@@ -1057,6 +1083,8 @@ pub fn argsWithAllocator(allocator: Allocator) ArgIterator.InitError!ArgIterator
}
/// Caller must call argsFree on result.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn argsAlloc(allocator: Allocator) ![][:0]u8 {
// TODO refactor to only make 1 allocation.
var it = try argsWithAllocator(allocator);
@@ -1201,7 +1229,7 @@ test "ArgIteratorWindows" {
}
fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void {
- const cmd_line_w = try std.unicode.utf8ToUtf16LeWithNull(testing.allocator, cmd_line);
+ const cmd_line_w = try std.unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line);
defer testing.allocator.free(cmd_line_w);
// next