diff options
| author | Ryan Liptak <squeek502@hotmail.com> | 2024-02-13 16:56:50 -0800 |
|---|---|---|
| committer | Ryan Liptak <squeek502@hotmail.com> | 2024-02-24 14:05:24 -0800 |
| commit | 68b87918df9ad82cf3161f323c55f2e238319922 (patch) | |
| tree | c802aea2b636236f767e6c1cdd864f01c2b47d15 /lib | |
| parent | f6b6b8a4ae4780f21c45824d34d8e14b3c3b5037 (diff) | |
| download | zig-68b87918df9ad82cf3161f323c55f2e238319922.tar.gz zig-68b87918df9ad82cf3161f323c55f2e238319922.zip | |
Fix handling of Windows (WTF-16) and WASI (UTF-8) paths
Windows paths now use WTF-16 <-> WTF-8 conversion everywhere, which is lossless. Previously, conversion of ill-formed UTF-16 paths would either fail or invoke illegal behavior.
WASI paths must be valid UTF-8, and the relevant function calls have been updated to handle the possibility of failure due to paths not being encoded/encodable as valid UTF-8.
Closes #18694
Closes #1774
Closes #2565
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/std/Build/Cache.zig | 2 | ||||
| -rw-r--r-- | lib/std/Thread.zig | 13 | ||||
| -rw-r--r-- | lib/std/child_process.zig | 43 | ||||
| -rw-r--r-- | lib/std/fs.zig | 177 | ||||
| -rw-r--r-- | lib/std/fs/Dir.zig | 164 | ||||
| -rw-r--r-- | lib/std/fs/File.zig | 5 | ||||
| -rw-r--r-- | lib/std/fs/path.zig | 28 | ||||
| -rw-r--r-- | lib/std/fs/test.zig | 134 | ||||
| -rw-r--r-- | lib/std/fs/watch.zig | 719 | ||||
| -rw-r--r-- | lib/std/os.zig | 297 | ||||
| -rw-r--r-- | lib/std/os/windows.zig | 88 | ||||
| -rw-r--r-- | lib/std/process.zig | 152 | ||||
| -rw-r--r-- | lib/std/unicode.zig | 78 | ||||
| -rw-r--r-- | lib/std/zig/system.zig | 12 | ||||
| -rw-r--r-- | lib/std/zig/system/NativePaths.zig | 4 |
15 files changed, 901 insertions, 1015 deletions
diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig index 119fcc7be3..0bfaf283db 100644 --- a/lib/std/Build/Cache.zig +++ b/lib/std/Build/Cache.zig @@ -162,7 +162,7 @@ fn findPrefixResolved(cache: *const Cache, resolved_path: []u8) !PrefixedPath { fn getPrefixSubpath(allocator: Allocator, prefix: []const u8, path: []u8) ![]u8 { const relative = try std.fs.path.relative(allocator, prefix, path); errdefer allocator.free(relative); - var component_iterator = std.fs.path.NativeUtf8ComponentIterator.init(relative) catch { + var component_iterator = std.fs.path.NativeComponentIterator.init(relative) catch { return error.NotASubPath; }; if (component_iterator.root() != null) { diff --git a/lib/std/Thread.zig b/lib/std/Thread.zig index daeecc9914..ae794f44af 100644 --- a/lib/std/Thread.zig +++ b/lib/std/Thread.zig @@ -91,7 +91,7 @@ pub fn setName(self: Thread, name: []const u8) SetNameError!void { }, .windows => { var buf: [max_name_len]u16 = undefined; - const len = try std.unicode.utf8ToUtf16Le(&buf, name); + const len = try std.unicode.wtf8ToWtf16Le(&buf, name); const byte_len = math.cast(c_ushort, len * 2) orelse return error.NameTooLong; // Note: NT allocates its own copy, no use-after-free here. @@ -157,17 +157,12 @@ pub fn setName(self: Thread, name: []const u8) SetNameError!void { } pub const GetNameError = error{ - // For Windows, the name is converted from UTF16 to UTF8 - CodepointTooLarge, - Utf8CannotEncodeSurrogateHalf, - DanglingSurrogateHalf, - ExpectedSecondSurrogateHalf, - UnexpectedSecondSurrogateHalf, - Unsupported, Unexpected, } || os.PrctlError || os.ReadError || std.fs.File.OpenError || std.fmt.BufPrintError; +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. pub fn getName(self: Thread, buffer_ptr: *[max_name_len:0]u8) GetNameError!?[]const u8 { buffer_ptr[max_name_len] = 0; var buffer: [:0]u8 = buffer_ptr; @@ -213,7 +208,7 @@ pub fn getName(self: Thread, buffer_ptr: *[max_name_len:0]u8) GetNameError!?[]co )) { .SUCCESS => { const string = @as(*const os.windows.UNICODE_STRING, @ptrCast(&buf)); - const len = try std.unicode.utf16LeToUtf8(buffer, string.Buffer[0 .. string.Length / 2]); + const len = std.unicode.wtf16LeToWtf8(buffer, string.Buffer[0 .. string.Length / 2]); return if (len > 0) buffer[0..len] else null; }, .NOT_IMPLEMENTED => return error.Unsupported, diff --git a/lib/std/child_process.zig b/lib/std/child_process.zig index dcc5d50a09..eb0c8c13b8 100644 --- a/lib/std/child_process.zig +++ b/lib/std/child_process.zig @@ -129,10 +129,9 @@ pub const ChildProcess = struct { /// POSIX-only. `StdIo.Ignore` was selected and opening `/dev/null` returned ENODEV. NoDevice, - /// Windows-only. One of: - /// * `cwd` was provided and it could not be re-encoded into UTF16LE, or - /// * The `PATH` or `PATHEXT` environment variable contained invalid UTF-8. - InvalidUtf8, + /// Windows-only. `cwd` or `argv` was provided and it was invalid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, /// Windows-only. `cwd` was provided, but the path did not exist when spawning the child process. CurrentWorkingDirectoryUnlinked, @@ -767,7 +766,7 @@ pub const ChildProcess = struct { }; var piProcInfo: windows.PROCESS_INFORMATION = undefined; - const cwd_w = if (self.cwd) |cwd| try unicode.utf8ToUtf16LeAllocZ(self.allocator, cwd) else null; + const cwd_w = if (self.cwd) |cwd| try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd) else null; defer if (cwd_w) |cwd| self.allocator.free(cwd); const cwd_w_ptr = if (cwd_w) |cwd| cwd.ptr else null; @@ -775,8 +774,8 @@ pub const ChildProcess = struct { defer if (maybe_envp_buf) |envp_buf| self.allocator.free(envp_buf); const envp_ptr = if (maybe_envp_buf) |envp_buf| envp_buf.ptr else null; - const app_name_utf8 = self.argv[0]; - const app_name_is_absolute = fs.path.isAbsolute(app_name_utf8); + const app_name_wtf8 = self.argv[0]; + const app_name_is_absolute = fs.path.isAbsolute(app_name_wtf8); // the cwd set in ChildProcess is in effect when choosing the executable path // to match posix semantics @@ -785,11 +784,11 @@ pub const ChildProcess = struct { // If the app name is absolute, then we need to use its dirname as the cwd if (app_name_is_absolute) { cwd_path_w_needs_free = true; - const dir = fs.path.dirname(app_name_utf8).?; - break :x try unicode.utf8ToUtf16LeAllocZ(self.allocator, dir); + const dir = fs.path.dirname(app_name_wtf8).?; + break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, dir); } else if (self.cwd) |cwd| { cwd_path_w_needs_free = true; - break :x try unicode.utf8ToUtf16LeAllocZ(self.allocator, cwd); + break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd); } else { break :x &[_:0]u16{}; // empty for cwd } @@ -800,19 +799,19 @@ pub const ChildProcess = struct { // into the basename and dirname and use the dirname as an addition to the cwd // path. This is because NtQueryDirectoryFile cannot accept FileName params with // path separators. - const app_basename_utf8 = fs.path.basename(app_name_utf8); + const app_basename_wtf8 = fs.path.basename(app_name_wtf8); // If the app name is absolute, then the cwd will already have the app's dirname in it, // so only populate app_dirname if app name is a relative path with > 0 path separators. - const maybe_app_dirname_utf8 = if (!app_name_is_absolute) fs.path.dirname(app_name_utf8) else null; + const maybe_app_dirname_wtf8 = if (!app_name_is_absolute) fs.path.dirname(app_name_wtf8) else null; const app_dirname_w: ?[:0]u16 = x: { - if (maybe_app_dirname_utf8) |app_dirname_utf8| { - break :x try unicode.utf8ToUtf16LeAllocZ(self.allocator, app_dirname_utf8); + if (maybe_app_dirname_wtf8) |app_dirname_wtf8| { + break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_dirname_wtf8); } break :x null; }; defer if (app_dirname_w != null) self.allocator.free(app_dirname_w.?); - const app_name_w = try unicode.utf8ToUtf16LeAllocZ(self.allocator, app_basename_utf8); + const app_name_w = try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_basename_wtf8); defer self.allocator.free(app_name_w); const cmd_line_w = argvToCommandLineWindows(self.allocator, self.argv) catch |err| switch (err) { @@ -1173,7 +1172,7 @@ const CreateProcessSupportedExtension = enum { exe, }; -/// Case-insensitive UTF-16 lookup +/// Case-insensitive WTF-16 lookup fn windowsCreateProcessSupportsExtension(ext: []const u16) ?CreateProcessSupportedExtension { if (ext.len != 4) return null; const State = enum { @@ -1237,7 +1236,7 @@ test "windowsCreateProcessSupportsExtension" { try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null); } -pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidUtf8, InvalidArg0 }; +pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidWtf8, InvalidArg0 }; /// Serializes `argv` to a Windows command-line string suitable for passing to a child process and /// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice. @@ -1320,7 +1319,7 @@ pub fn argvToCommandLineWindows( } } - return try unicode.utf8ToUtf16LeAllocZ(allocator, buf.items); + return try unicode.wtf8ToWtf16LeAllocZ(allocator, buf.items); } test "argvToCommandLineWindows" { @@ -1386,7 +1385,7 @@ fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []c const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv); defer std.testing.allocator.free(cmd_line_w); - const cmd_line = try unicode.utf16LeToUtf8Alloc(std.testing.allocator, cmd_line_w); + const cmd_line = try unicode.wtf16LeToWtf8Alloc(std.testing.allocator, cmd_line_w); defer std.testing.allocator.free(cmd_line); try std.testing.expectEqualStrings(expected_cmd_line, cmd_line); @@ -1424,7 +1423,7 @@ fn windowsMakeAsyncPipe(rd: *?windows.HANDLE, wr: *?windows.HANDLE, sattr: *cons "\\\\.\\pipe\\zig-childprocess-{d}-{d}", .{ windows.kernel32.GetCurrentProcessId(), pipe_name_counter.fetchAdd(1, .Monotonic) }, ) catch unreachable; - const len = std.unicode.utf8ToUtf16Le(&tmp_bufw, pipe_path) catch unreachable; + const len = std.unicode.wtf8ToWtf16Le(&tmp_bufw, pipe_path) catch unreachable; tmp_bufw[len] = 0; break :blk tmp_bufw[0..len :0]; }; @@ -1521,10 +1520,10 @@ pub fn createWindowsEnvBlock(allocator: mem.Allocator, env_map: *const EnvMap) ! var it = env_map.iterator(); var i: usize = 0; while (it.next()) |pair| { - i += try unicode.utf8ToUtf16Le(result[i..], pair.key_ptr.*); + i += try unicode.wtf8ToWtf16Le(result[i..], pair.key_ptr.*); result[i] = '='; i += 1; - i += try unicode.utf8ToUtf16Le(result[i..], pair.value_ptr.*); + i += try unicode.wtf8ToWtf16Le(result[i..], pair.value_ptr.*); result[i] = 0; i += 1; } diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 9b88f4dd52..cfb4d7958b 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -31,18 +31,21 @@ pub const realpathW = os.realpathW; pub const getAppDataDir = @import("fs/get_app_data_dir.zig").getAppDataDir; pub const GetAppDataDirError = @import("fs/get_app_data_dir.zig").GetAppDataDirError; -/// This represents the maximum size of a UTF-8 encoded file path that the +/// This represents the maximum size of a `[]u8` file path that the /// operating system will accept. Paths, including those returned from file /// system operations, may be longer than this length, but such paths cannot /// be successfully passed back in other file system operations. However, /// all path components returned by file system operations are assumed to -/// fit into a UTF-8 encoded array of this length. +/// fit into a `u8` array of this length. /// The byte count includes room for a null sentinel byte. +/// On Windows, `[]u8` file paths are encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `[]u8` file paths are encoded as valid UTF-8. +/// On other platforms, `[]u8` file paths are opaque sequences of bytes with no particular encoding. pub const MAX_PATH_BYTES = switch (builtin.os.tag) { .linux, .macos, .ios, .freebsd, .openbsd, .netbsd, .dragonfly, .haiku, .solaris, .illumos, .plan9, .emscripten => os.PATH_MAX, - // Each UTF-16LE character may be expanded to 3 UTF-8 bytes. - // If it would require 4 UTF-8 bytes, then there would be a surrogate - // pair in the UTF-16LE, and we (over)account 3 bytes for it that way. + // Each WTF-16LE code unit may be expanded to 3 WTF-8 bytes. + // If it would require 4 WTF-8 bytes, then there would be a surrogate + // pair in the WTF-16LE, and we (over)account 3 bytes for it that way. // +1 for the null byte at the end, which can be encoded in 1 byte. .windows => os.windows.PATH_MAX_WIDE * 3 + 1, // TODO work out what a reasonable value we should use here @@ -53,18 +56,21 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) { @compileError("PATH_MAX not implemented for " ++ @tagName(builtin.os.tag)), }; -/// This represents the maximum size of a UTF-8 encoded file name component that +/// This represents the maximum size of a `[]u8` file name component that /// the platform's common file systems support. File name components returned by file system -/// operations are likely to fit into a UTF-8 encoded array of this length, but +/// operations are likely to fit into a `u8` array of this length, but /// (depending on the platform) this assumption may not hold for every configuration. /// The byte count does not include a null sentinel byte. +/// On Windows, `[]u8` file name components are encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, file name components are encoded as valid UTF-8. +/// On other platforms, `[]u8` components are an opaque sequence of bytes with no particular encoding. pub const MAX_NAME_BYTES = switch (builtin.os.tag) { .linux, .macos, .ios, .freebsd, .openbsd, .netbsd, .dragonfly, .solaris, .illumos => os.NAME_MAX, // Haiku's NAME_MAX includes the null terminator, so subtract one. .haiku => os.NAME_MAX - 1, - // Each UTF-16LE character may be expanded to 3 UTF-8 bytes. - // If it would require 4 UTF-8 bytes, then there would be a surrogate - // pair in the UTF-16LE, and we (over)account 3 bytes for it that way. + // Each WTF-16LE character may be expanded to 3 WTF-8 bytes. + // If it would require 4 WTF-8 bytes, then there would be a surrogate + // pair in the WTF-16LE, and we (over)account 3 bytes for it that way. .windows => os.windows.NAME_MAX * 3, // For WASI, the MAX_NAME will depend on the host OS, so it needs to be // as large as the largest MAX_NAME_BYTES (Windows) in order to work on any host OS. @@ -86,6 +92,9 @@ pub const base64_decoder = base64.Base64Decoder.init(base64_alphabet, null); /// TODO remove the allocator requirement from this API /// TODO move to Dir +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn atomicSymLink(allocator: Allocator, existing_path: []const u8, new_path: []const u8) !void { if (cwd().symLink(existing_path, new_path, .{})) { return; @@ -117,6 +126,9 @@ pub fn atomicSymLink(allocator: Allocator, existing_path: []const u8, new_path: /// Same as `Dir.updateFile`, except asserts that both `source_path` and `dest_path` /// are absolute. See `Dir.updateFile` for a function that operates on both /// absolute and relative paths. +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn updateFileAbsolute( source_path: []const u8, dest_path: []const u8, @@ -131,6 +143,9 @@ pub fn updateFileAbsolute( /// Same as `Dir.copyFile`, except asserts that both `source_path` and `dest_path` /// are absolute. See `Dir.copyFile` for a function that operates on both /// absolute and relative paths. +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn copyFileAbsolute( source_path: []const u8, dest_path: []const u8, @@ -145,24 +160,30 @@ pub fn copyFileAbsolute( /// Create a new directory, based on an absolute path. /// Asserts that the path is absolute. See `Dir.makeDir` for a function that operates /// on both absolute and relative paths. +/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `absolute_path` should be encoded as valid UTF-8. +/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding. pub fn makeDirAbsolute(absolute_path: []const u8) !void { assert(path.isAbsolute(absolute_path)); return os.mkdir(absolute_path, Dir.default_mode); } -/// Same as `makeDirAbsolute` except the parameter is a null-terminated UTF-8-encoded string. +/// Same as `makeDirAbsolute` except the parameter is null-terminated. pub fn makeDirAbsoluteZ(absolute_path_z: [*:0]const u8) !void { assert(path.isAbsoluteZ(absolute_path_z)); return os.mkdirZ(absolute_path_z, Dir.default_mode); } -/// Same as `makeDirAbsolute` except the parameter is a null-terminated WTF-16-encoded string. +/// Same as `makeDirAbsolute` except the parameter is a null-terminated WTF-16 LE-encoded string. pub fn makeDirAbsoluteW(absolute_path_w: [*:0]const u16) !void { assert(path.isAbsoluteWindowsW(absolute_path_w)); return os.mkdirW(absolute_path_w, Dir.default_mode); } /// Same as `Dir.deleteDir` except the path is absolute. +/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `dir_path` should be encoded as valid UTF-8. +/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding. pub fn deleteDirAbsolute(dir_path: []const u8) !void { assert(path.isAbsolute(dir_path)); return os.rmdir(dir_path); @@ -181,6 +202,9 @@ pub fn deleteDirAbsoluteW(dir_path: [*:0]const u16) !void { } /// Same as `Dir.rename` except the paths are absolute. +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn renameAbsolute(old_path: []const u8, new_path: []const u8) !void { assert(path.isAbsolute(old_path)); assert(path.isAbsolute(new_path)); @@ -211,7 +235,7 @@ pub fn renameZ(old_dir: Dir, old_sub_path_z: [*:0]const u8, new_dir: Dir, new_su return os.renameatZ(old_dir.fd, old_sub_path_z, new_dir.fd, new_sub_path_z); } -/// Same as `rename` except the parameters are UTF16LE, NT prefixed. +/// Same as `rename` except the parameters are WTF16LE, NT prefixed. /// This function is Windows-only. pub fn renameW(old_dir: Dir, old_sub_path_w: []const u16, new_dir: Dir, new_sub_path_w: []const u16) !void { return os.renameatW(old_dir.fd, old_sub_path_w, new_dir.fd, new_sub_path_w); @@ -240,6 +264,9 @@ pub fn defaultWasiCwd() std.os.wasi.fd_t { /// See `openDirAbsoluteZ` for a function that accepts a null-terminated path. /// /// Asserts that the path parameter has no null bytes. +/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `absolute_path` should be encoded as valid UTF-8. +/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding. pub fn openDirAbsolute(absolute_path: []const u8, flags: Dir.OpenDirOptions) File.OpenError!Dir { assert(path.isAbsolute(absolute_path)); return cwd().openDir(absolute_path, flags); @@ -262,6 +289,9 @@ pub fn openDirAbsoluteW(absolute_path_c: [*:0]const u16, flags: Dir.OpenDirOptio /// operates on both absolute and relative paths. /// Asserts that the path parameter has no null bytes. See `openFileAbsoluteZ` for a function /// that accepts a null-terminated path. +/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `absolute_path` should be encoded as valid UTF-8. +/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding. pub fn openFileAbsolute(absolute_path: []const u8, flags: File.OpenFlags) File.OpenError!File { assert(path.isAbsolute(absolute_path)); return cwd().openFile(absolute_path, flags); @@ -280,11 +310,13 @@ pub fn openFileAbsoluteW(absolute_path_w: []const u16, flags: File.OpenFlags) Fi } /// Test accessing `path`. -/// `path` is UTF-8-encoded. /// Be careful of Time-Of-Check-Time-Of-Use race conditions when using this function. /// For example, instead of testing if a file exists and then opening it, just /// open it and handle the error for file not found. /// See `accessAbsoluteZ` for a function that accepts a null-terminated path. +/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `absolute_path` should be encoded as valid UTF-8. +/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding. pub fn accessAbsolute(absolute_path: []const u8, flags: File.OpenFlags) Dir.AccessError!void { assert(path.isAbsolute(absolute_path)); try cwd().access(absolute_path, flags); @@ -306,6 +338,9 @@ pub fn accessAbsoluteW(absolute_path: [*:0]const u16, flags: File.OpenFlags) Dir /// operates on both absolute and relative paths. /// Asserts that the path parameter has no null bytes. See `createFileAbsoluteC` for a function /// that accepts a null-terminated path. +/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `absolute_path` should be encoded as valid UTF-8. +/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding. pub fn createFileAbsolute(absolute_path: []const u8, flags: File.CreateFlags) File.OpenError!File { assert(path.isAbsolute(absolute_path)); return cwd().createFile(absolute_path, flags); @@ -327,6 +362,9 @@ pub fn createFileAbsoluteW(absolute_path_w: [*:0]const u16, flags: File.CreateFl /// Asserts that the path is absolute. See `Dir.deleteFile` for a function that /// operates on both absolute and relative paths. /// Asserts that the path parameter has no null bytes. +/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `absolute_path` should be encoded as valid UTF-8. +/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding. pub fn deleteFileAbsolute(absolute_path: []const u8) Dir.DeleteFileError!void { assert(path.isAbsolute(absolute_path)); return cwd().deleteFile(absolute_path); @@ -349,6 +387,9 @@ pub fn deleteFileAbsoluteW(absolute_path_w: [*:0]const u16) Dir.DeleteFileError! /// Asserts that the path is absolute. See `Dir.deleteTree` for a function that /// operates on both absolute and relative paths. /// Asserts that the path parameter has no null bytes. +/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `absolute_path` should be encoded as valid UTF-8. +/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding. pub fn deleteTreeAbsolute(absolute_path: []const u8) !void { assert(path.isAbsolute(absolute_path)); const dirname = path.dirname(absolute_path) orelse return error{ @@ -364,6 +405,9 @@ pub fn deleteTreeAbsolute(absolute_path: []const u8) !void { } /// Same as `Dir.readLink`, except it asserts the path is absolute. +/// On Windows, `pathname` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `pathname` should be encoded as valid UTF-8. +/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding. pub fn readLinkAbsolute(pathname: []const u8, buffer: *[MAX_PATH_BYTES]u8) ![]u8 { assert(path.isAbsolute(pathname)); return os.readlink(pathname, buffer); @@ -387,6 +431,9 @@ pub fn readLinkAbsoluteZ(pathname_c: [*:0]const u8, buffer: *[MAX_PATH_BYTES]u8) /// one; the latter case is known as a dangling link. /// If `sym_link_path` exists, it will not be overwritten. /// See also `symLinkAbsoluteZ` and `symLinkAbsoluteW`. +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn symLinkAbsolute( target_path: []const u8, sym_link_path: []const u8, @@ -402,7 +449,7 @@ pub fn symLinkAbsolute( return os.symlink(target_path, sym_link_path); } -/// Windows-only. Same as `symLinkAbsolute` except the parameters are null-terminated, WTF16 encoded. +/// Windows-only. Same as `symLinkAbsolute` except the parameters are null-terminated, WTF16 LE encoded. /// Note that this function will by default try creating a symbolic link to a file. If you would /// like to create a symbolic link to a directory, specify this with `SymLinkFlags{ .is_directory = true }`. /// See also `symLinkAbsolute`, `symLinkAbsoluteZ`. @@ -426,27 +473,14 @@ pub fn symLinkAbsoluteZ( assert(path.isAbsoluteZ(target_path_c)); assert(path.isAbsoluteZ(sym_link_path_c)); if (builtin.os.tag == .windows) { - const target_path_w = try os.windows.cStrToWin32PrefixedFileW(target_path_c); - const sym_link_path_w = try os.windows.cStrToWin32PrefixedFileW(sym_link_path_c); - return os.windows.CreateSymbolicLink(sym_link_path_w.span(), target_path_w.span(), flags.is_directory); + const target_path_w = try os.windows.cStrToPrefixedFileW(null, target_path_c); + const sym_link_path_w = try os.windows.cStrToPrefixedFileW(null, sym_link_path_c); + return os.windows.CreateSymbolicLink(null, sym_link_path_w.span(), target_path_w.span(), flags.is_directory); } return os.symlinkZ(target_path_c, sym_link_path_c); } -pub const OpenSelfExeError = error{ - SharingViolation, - PathAlreadyExists, - FileNotFound, - AccessDenied, - PipeBusy, - NameTooLong, - /// On Windows, file paths must be valid Unicode. - InvalidUtf8, - /// On Windows, file paths cannot contain these characters: - /// '/', '*', '?', '"', '<', '>', '|' - BadPathName, - Unexpected, -} || os.OpenError || SelfExePathError || os.FlockError; +pub const OpenSelfExeError = os.OpenError || SelfExePathError || os.FlockError; pub fn openSelfExe(flags: File.OpenFlags) OpenSelfExeError!File { if (builtin.os.tag == .linux) { @@ -469,7 +503,45 @@ pub fn openSelfExe(flags: File.OpenFlags) OpenSelfExeError!File { return openFileAbsoluteZ(buf[0..self_exe_path.len :0].ptr, flags); } -pub const SelfExePathError = os.ReadLinkError || os.SysCtlError || os.RealPathError; +// This is os.ReadLinkError || os.RealPathError with impossible errors excluded +pub const SelfExePathError = error{ + FileNotFound, + AccessDenied, + NameTooLong, + NotSupported, + NotDir, + SymLinkLoop, + InputOutput, + FileTooBig, + IsDir, + ProcessFdQuotaExceeded, + SystemFdQuotaExceeded, + NoDevice, + SystemResources, + NoSpaceLeft, + FileSystem, + BadPathName, + DeviceBusy, + SharingViolation, + PipeBusy, + NotLink, + PathAlreadyExists, + InvalidHandle, + + /// On Windows, `\\server` or `\\server\share` was not found. + NetworkNotFound, + + /// On Windows, antivirus software is enabled by default. It can be + /// disabled, but Windows Update sometimes ignores the user's preference + /// and re-enables it. When enabled, antivirus software on Windows + /// intercepts file system operations and makes them significantly slower + /// in addition to possibly failing with this error code. + AntivirusInterference, + + /// On Windows, the volume does not contain a recognized file system. File + /// system drivers might not be loaded, or the volume may be corrupt. + UnrecognizedVolume, +} || os.SysCtlError; /// `selfExePath` except allocates the result on the heap. /// Caller owns returned memory. @@ -491,6 +563,8 @@ pub fn selfExePathAlloc(allocator: Allocator) ![]u8 { /// This function may return an error if the current executable /// was deleted after spawning. /// Returned value is a slice of out_buffer. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. /// /// On Linux, depends on procfs being mounted. If the currently executing binary has /// been deleted, the file path looks something like `/a/b/c/exe (deleted)`. @@ -505,15 +579,31 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 { if (rc != 0) return error.NameTooLong; var real_path_buf: [MAX_PATH_BYTES]u8 = undefined; - const real_path = try std.os.realpathZ(&symlink_path_buf, &real_path_buf); + const real_path = std.os.realpathZ(&symlink_path_buf, &real_path_buf) catch |err| switch (err) { + error.InvalidWtf8 => unreachable, // Windows-only + error.NetworkNotFound => unreachable, // Windows-only + else => |e| return e, + }; if (real_path.len > out_buffer.len) return error.NameTooLong; const result = out_buffer[0..real_path.len]; @memcpy(result, real_path); return result; } switch (builtin.os.tag) { - .linux => return os.readlinkZ("/proc/self/exe", out_buffer), - .solaris, .illumos => return os.readlinkZ("/proc/self/path/a.out", out_buffer), + .linux => return os.readlinkZ("/proc/self/exe", out_buffer) catch |err| switch (err) { + error.InvalidUtf8 => unreachable, // WASI-only + error.InvalidWtf8 => unreachable, // Windows-only + error.UnsupportedReparsePointType => unreachable, // Windows-only + error.NetworkNotFound => unreachable, // Windows-only + else => |e| return e, + }, + .solaris, .illumos => return os.readlinkZ("/proc/self/path/a.out", out_buffer) catch |err| switch (err) { + error.InvalidUtf8 => unreachable, // WASI-only + error.InvalidWtf8 => unreachable, // Windows-only + error.UnsupportedReparsePointType => unreachable, // Windows-only + error.NetworkNotFound => unreachable, // Windows-only + else => |e| return e, + }, .freebsd, .dragonfly => { var mib = [4]c_int{ os.CTL.KERN, os.KERN.PROC, os.KERN.PROC_PATHNAME, -1 }; var out_len: usize = out_buffer.len; @@ -537,7 +627,11 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 { if (mem.indexOf(u8, argv0, "/") != null) { // argv[0] is a path (relative or absolute): use realpath(3) directly var real_path_buf: [MAX_PATH_BYTES]u8 = undefined; - const real_path = try os.realpathZ(os.argv[0], &real_path_buf); + const real_path = os.realpathZ(os.argv[0], &real_path_buf) catch |err| switch (err) { + error.InvalidWtf8 => unreachable, // Windows-only + error.NetworkNotFound => unreachable, // Windows-only + else => |e| return e, + }; if (real_path.len > out_buffer.len) return error.NameTooLong; const result = out_buffer[0..real_path.len]; @@ -575,7 +669,10 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 { // symlink, not the path that the symlink points to. We want the path // that the symlink points to, though, so we need to get the realpath. const pathname_w = try os.windows.wToPrefixedFileW(null, image_path_name); - return std.fs.cwd().realpathW(pathname_w.span(), out_buffer); + return std.fs.cwd().realpathW(pathname_w.span(), out_buffer) catch |err| switch (err) { + error.InvalidWtf8 => unreachable, + else => |e| return e, + }; }, else => @compileError("std.fs.selfExePath not supported for this target"), } @@ -599,6 +696,8 @@ pub fn selfExeDirPathAlloc(allocator: Allocator) ![]u8 { /// Get the directory path that contains the current executable. /// Returned value is a slice of out_buffer. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 { const self_exe_path = try selfExePath(out_buffer); // Assume that the OS APIs return absolute paths, and therefore dirname @@ -607,6 +706,8 @@ pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 { } /// `realpath`, except caller must free the returned memory. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. /// See also `Dir.realpath`. pub fn realpathAlloc(allocator: Allocator, pathname: []const u8) ![]u8 { // Use of MAX_PATH_BYTES here is valid as the realpath function does not diff --git a/lib/std/fs/Dir.zig b/lib/std/fs/Dir.zig index f12af67aa2..ad9b467266 100644 --- a/lib/std/fs/Dir.zig +++ b/lib/std/fs/Dir.zig @@ -9,7 +9,14 @@ pub const Entry = struct { pub const Kind = File.Kind; }; -const IteratorError = error{ AccessDenied, SystemResources } || posix.UnexpectedError; +const IteratorError = error{ + AccessDenied, + SystemResources, + /// WASI-only. The path of an entry could not be encoded as valid UTF-8. + /// WASI is unable to handle paths that cannot be encoded as well-formed UTF-8. + /// https://github.com/WebAssembly/wasi-filesystem/issues/17#issuecomment-1430639353 + InvalidUtf8, +} || posix.UnexpectedError; pub const Iterator = switch (builtin.os.tag) { .macos, .ios, .freebsd, .netbsd, .dragonfly, .openbsd, .solaris, .illumos => struct { @@ -445,13 +452,12 @@ pub const Iterator = switch (builtin.os.tag) { self.index = self.buf.len; } - const name_utf16le = @as([*]u16, @ptrCast(&dir_info.FileName))[0 .. dir_info.FileNameLength / 2]; + const name_wtf16le = @as([*]u16, @ptrCast(&dir_info.FileName))[0 .. dir_info.FileNameLength / 2]; - if (mem.eql(u16, name_utf16le, &[_]u16{'.'}) or mem.eql(u16, name_utf16le, &[_]u16{ '.', '.' })) + if (mem.eql(u16, name_wtf16le, &[_]u16{'.'}) or mem.eql(u16, name_wtf16le, &[_]u16{ '.', '.' })) continue; - // Trust that Windows gives us valid UTF-16LE - const name_utf8_len = std.unicode.utf16LeToUtf8(self.name_data[0..], name_utf16le) catch unreachable; - const name_utf8 = self.name_data[0..name_utf8_len]; + const name_wtf8_len = std.unicode.wtf16LeToWtf8(self.name_data[0..], name_wtf16le); + const name_wtf8 = self.name_data[0..name_wtf8_len]; const kind: Entry.Kind = blk: { const attrs = dir_info.FileAttributes; if (attrs & w.FILE_ATTRIBUTE_DIRECTORY != 0) break :blk .directory; @@ -459,7 +465,7 @@ pub const Iterator = switch (builtin.os.tag) { break :blk .file; }; return Entry{ - .name = name_utf8, + .name = name_wtf8, .kind = kind, }; } @@ -516,6 +522,7 @@ pub const Iterator = switch (builtin.os.tag) { .INVAL => unreachable, .NOENT => return error.DirNotFound, // The directory being iterated was deleted during iteration. .NOTCAPABLE => return error.AccessDenied, + .ILSEQ => return error.InvalidUtf8, // An entry's name cannot be encoded as UTF-8. else => |err| return posix.unexpectedErrno(err), } if (bufused == 0) return null; @@ -743,7 +750,11 @@ pub const OpenError = error{ SystemFdQuotaExceeded, NoDevice, SystemResources, + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, BadPathName, DeviceBusy, /// On Windows, `\\server` or `\\server\share` was not found. @@ -759,6 +770,9 @@ pub fn close(self: *Dir) void { /// To create a new file, see `createFile`. /// Call `File.close` to release the resource. /// Asserts that the path parameter has no null bytes. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. pub fn openFile(self: Dir, sub_path: []const u8, flags: File.OpenFlags) File.OpenError!File { if (builtin.os.tag == .windows) { const path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sub_path); @@ -911,6 +925,9 @@ pub fn openFileW(self: Dir, sub_path_w: []const u16, flags: File.OpenFlags) File /// Creates, opens, or overwrites a file with write access. /// Call `File.close` on the result when done. /// Asserts that the path parameter has no null bytes. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. pub fn createFile(self: Dir, sub_path: []const u8, flags: File.CreateFlags) File.OpenError!File { if (builtin.os.tag == .windows) { const path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sub_path); @@ -1060,18 +1077,21 @@ pub fn createFileW(self: Dir, sub_path_w: []const u16, flags: File.CreateFlags) /// Creates a single directory with a relative or absolute path. /// To create multiple directories to make an entire path, see `makePath`. /// To operate on only absolute paths, see `makeDirAbsolute`. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. pub fn makeDir(self: Dir, sub_path: []const u8) !void { try posix.mkdirat(self.fd, sub_path, default_mode); } -/// Creates a single directory with a relative or absolute null-terminated UTF-8-encoded path. +/// Same as `makeDir`, but `sub_path` is null-terminated. /// To create multiple directories to make an entire path, see `makePath`. /// To operate on only absolute paths, see `makeDirAbsoluteZ`. pub fn makeDirZ(self: Dir, sub_path: [*:0]const u8) !void { try posix.mkdiratZ(self.fd, sub_path, default_mode); } -/// Creates a single directory with a relative or absolute null-terminated WTF-16-encoded path. +/// Creates a single directory with a relative or absolute null-terminated WTF-16 LE-encoded path. /// To create multiple directories to make an entire path, see `makePath`. /// To operate on only absolute paths, see `makeDirAbsoluteW`. pub fn makeDirW(self: Dir, sub_path: [*:0]const u16) !void { @@ -1083,6 +1103,9 @@ pub fn makeDirW(self: Dir, sub_path: [*:0]const u16) !void { /// Returns success if the path already exists and is a directory. /// This function is not atomic, and if it returns an error, the file system may /// have been modified regardless. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. /// /// Paths containing `..` components are handled differently depending on the platform: /// - On Windows, `..` are resolved before the path is passed to NtCreateFile, meaning @@ -1119,16 +1142,17 @@ pub fn makePath(self: Dir, sub_path: []const u8) !void { } } -/// Calls makeOpenDirAccessMaskW iteratively to make an entire path +/// Windows only. Calls makeOpenDirAccessMaskW iteratively to make an entire path /// (i.e. creating any parent directories that do not exist). /// Opens the dir if the path already exists and is a directory. /// This function is not atomic, and if it returns an error, the file system may /// have been modified regardless. +/// `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). fn makeOpenPathAccessMaskW(self: Dir, sub_path: []const u8, access_mask: u32, no_follow: bool) OpenError!Dir { const w = std.os.windows; var it = try fs.path.componentIterator(sub_path); // If there are no components in the path, then create a dummy component with the full path. - var component = it.last() orelse fs.path.NativeUtf8ComponentIterator.Component{ + var component = it.last() orelse fs.path.NativeComponentIterator.Component{ .name = "", .path = sub_path, }; @@ -1156,7 +1180,9 @@ fn makeOpenPathAccessMaskW(self: Dir, sub_path: []const u8, access_mask: u32, no /// This function performs `makePath`, followed by `openDir`. /// If supported by the OS, this operation is atomic. It is not atomic on /// all operating systems. -/// On Windows, this function performs `makeOpenPathAccessMaskW`. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. pub fn makeOpenPath(self: Dir, sub_path: []const u8, open_dir_options: OpenDirOptions) !Dir { return switch (builtin.os.tag) { .windows => { @@ -1185,6 +1211,10 @@ pub const RealPathError = posix.RealPathError; /// `pathname` relative to this `Dir`. If `pathname` is absolute, ignores this /// `Dir` handle and returns the canonicalized absolute pathname of `pathname` /// argument. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. /// This function is not universally supported by all platforms. /// Currently supported hosts are: Linux, macOS, and Windows. /// See also `Dir.realpathZ`, `Dir.realpathW`, and `Dir.realpathAlloc`. @@ -1224,6 +1254,7 @@ pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) RealPathE error.FileLocksNotSupported => return error.Unexpected, error.FileBusy => return error.Unexpected, error.WouldBlock => return error.Unexpected, + error.InvalidUtf8 => unreachable, // WASI-only else => |e| return e, }; defer posix.close(fd); @@ -1246,7 +1277,8 @@ pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) RealPathE return result; } -/// Windows-only. Same as `Dir.realpath` except `pathname` is WTF16 encoded. +/// Windows-only. Same as `Dir.realpath` except `pathname` is WTF16 LE encoded. +/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). /// See also `Dir.realpath`, `realpathW`. pub fn realpathW(self: Dir, pathname: []const u16, out_buffer: []u8) RealPathError![]u8 { const w = std.os.windows; @@ -1272,16 +1304,7 @@ pub fn realpathW(self: Dir, pathname: []const u16, out_buffer: []u8) RealPathErr var wide_buf: [w.PATH_MAX_WIDE]u16 = undefined; const wide_slice = try w.GetFinalPathNameByHandle(h_file, .{}, &wide_buf); var big_out_buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const end_index = std.unicode.utf16leToUtf8(&big_out_buf, wide_slice) catch |e| switch (e) { - // TODO: Windows file paths can be arbitrary arrays of u16 values and - // must not fail with InvalidUtf8. - error.DanglingSurrogateHalf, - error.ExpectedSecondSurrogateHalf, - error.UnexpectedSecondSurrogateHalf, - error.CodepointTooLarge, - error.Utf8CannotEncodeSurrogateHalf, - => return error.InvalidUtf8, - }; + const end_index = std.unicode.wtf16LeToWtf8(&big_out_buf, wide_slice); if (end_index > out_buffer.len) return error.NameTooLong; const result = out_buffer[0..end_index]; @@ -1344,6 +1367,9 @@ pub const OpenDirOptions = struct { /// open until `close` is called on the result. /// The directory cannot be iterated unless the `iterate` option is set to `true`. /// +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. /// Asserts that the path parameter has no null bytes. pub fn openDir(self: Dir, sub_path: []const u8, args: OpenDirOptions) OpenError!Dir { switch (builtin.os.tag) { @@ -1428,7 +1454,7 @@ pub fn openDirZ(self: Dir, sub_path_c: [*:0]const u8, args: OpenDirOptions) Open } } -/// Same as `openDir` except the path parameter is WTF-16 encoded, NT-prefixed. +/// Same as `openDir` except the path parameter is WTF-16 LE encoded, NT-prefixed. /// This function asserts the target OS is Windows. pub fn openDirW(self: Dir, sub_path_w: [*:0]const u16, args: OpenDirOptions) OpenError!Dir { const w = std.os.windows; @@ -1518,6 +1544,9 @@ fn makeOpenDirAccessMaskW(self: Dir, sub_path_w: [*:0]const u16, access_mask: u3 pub const DeleteFileError = posix.UnlinkError; /// Delete a file name and possibly the file it refers to, based on an open directory handle. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. /// Asserts that the path parameter has no null bytes. pub fn deleteFile(self: Dir, sub_path: []const u8) DeleteFileError!void { if (builtin.os.tag == .windows) { @@ -1553,7 +1582,7 @@ pub fn deleteFileZ(self: Dir, sub_path_c: [*:0]const u8) DeleteFileError!void { }; } -/// Same as `deleteFile` except the parameter is WTF-16 encoded. +/// Same as `deleteFile` except the parameter is WTF-16 LE encoded. pub fn deleteFileW(self: Dir, sub_path_w: []const u16) DeleteFileError!void { posix.unlinkatW(self.fd, sub_path_w, 0) catch |err| switch (err) { error.DirNotEmpty => unreachable, // not passing AT.REMOVEDIR @@ -1572,7 +1601,11 @@ pub const DeleteDirError = error{ NotDir, SystemResources, ReadOnlyFileSystem, + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, BadPathName, /// On Windows, `\\server` or `\\server\share` was not found. NetworkNotFound, @@ -1581,6 +1614,9 @@ pub const DeleteDirError = error{ /// Returns `error.DirNotEmpty` if the directory is not empty. /// To delete a directory recursively, see `deleteTree`. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. /// Asserts that the path parameter has no null bytes. pub fn deleteDir(self: Dir, sub_path: []const u8) DeleteDirError!void { if (builtin.os.tag == .windows) { @@ -1605,7 +1641,7 @@ pub fn deleteDirZ(self: Dir, sub_path_c: [*:0]const u8) DeleteDirError!void { }; } -/// Same as `deleteDir` except the parameter is UTF16LE, NT prefixed. +/// Same as `deleteDir` except the parameter is WTF16LE, NT prefixed. /// This function is Windows-only. pub fn deleteDirW(self: Dir, sub_path_w: []const u16) DeleteDirError!void { posix.unlinkatW(self.fd, sub_path_w, posix.AT.REMOVEDIR) catch |err| switch (err) { @@ -1620,6 +1656,9 @@ pub const RenameError = posix.RenameError; /// If new_sub_path already exists, it will be replaced. /// Renaming a file over an existing directory or a directory /// over an existing file will fail with `error.IsDir` or `error.NotDir` +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn rename(self: Dir, old_sub_path: []const u8, new_sub_path: []const u8) RenameError!void { return posix.renameat(self.fd, old_sub_path, self.fd, new_sub_path); } @@ -1629,7 +1668,7 @@ pub fn renameZ(self: Dir, old_sub_path_z: [*:0]const u8, new_sub_path_z: [*:0]co return posix.renameatZ(self.fd, old_sub_path_z, self.fd, new_sub_path_z); } -/// Same as `rename` except the parameters are UTF16LE, NT prefixed. +/// Same as `rename` except the parameters are WTF16LE, NT prefixed. /// This function is Windows-only. pub fn renameW(self: Dir, old_sub_path_w: []const u16, new_sub_path_w: []const u16) RenameError!void { return posix.renameatW(self.fd, old_sub_path_w, self.fd, new_sub_path_w); @@ -1647,6 +1686,9 @@ pub const SymLinkFlags = struct { /// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent /// one; the latter case is known as a dangling link. /// If `sym_link_path` exists, it will not be overwritten. +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn symLink( self: Dir, target_path: []const u8, @@ -1662,7 +1704,7 @@ pub fn symLink( // when converting to an NT namespaced path. CreateSymbolicLink in // symLinkW will handle the necessary conversion. var target_path_w: std.os.windows.PathSpace = undefined; - target_path_w.len = try std.unicode.utf8ToUtf16Le(&target_path_w.data, target_path); + target_path_w.len = try std.unicode.wtf8ToWtf16Le(&target_path_w.data, target_path); target_path_w.data[target_path_w.len] = 0; const sym_link_path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sym_link_path); return self.symLinkW(target_path_w.span(), sym_link_path_w.span(), flags); @@ -1698,7 +1740,7 @@ pub fn symLinkZ( } /// Windows-only. Same as `symLink` except the pathname parameters -/// are null-terminated, WTF16 encoded. +/// are WTF16 LE encoded. pub fn symLinkW( self: Dir, /// WTF-16, does not need to be NT-prefixed. The NT-prefixing @@ -1716,6 +1758,9 @@ pub const ReadLinkError = posix.ReadLinkError; /// Read value of a symbolic link. /// The return value is a slice of `buffer`, from index `0`. /// Asserts that the path parameter has no null bytes. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ReadLinkError![]u8 { if (builtin.os.tag == .wasi and !builtin.link_libc) { return self.readLinkWasi(sub_path, buffer); @@ -1733,7 +1778,7 @@ pub fn readLinkWasi(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 { return posix.readlinkat(self.fd, sub_path, buffer); } -/// Same as `readLink`, except the `pathname` parameter is null-terminated. +/// Same as `readLink`, except the `sub_path_c` parameter is null-terminated. pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 { if (builtin.os.tag == .windows) { const sub_path_w = try std.os.windows.cStrToPrefixedFileW(self.fd, sub_path_c); @@ -1743,7 +1788,7 @@ pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 { } /// Windows-only. Same as `readLink` except the pathname parameter -/// is null-terminated, WTF16 encoded. +/// is WTF16 LE encoded. pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 { return std.os.windows.ReadLink(self.fd, sub_path_w, buffer); } @@ -1753,6 +1798,9 @@ pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 { /// the situation is ambiguous. It could either mean that the entire file was read, and /// it exactly fits the buffer, or it could mean the buffer was not big enough for the /// entire file. +/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `file_path` should be encoded as valid UTF-8. +/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. pub fn readFile(self: Dir, file_path: []const u8, buffer: []u8) ![]u8 { var file = try self.openFile(file_path, .{}); defer file.close(); @@ -1763,6 +1811,9 @@ pub fn readFile(self: Dir, file_path: []const u8, buffer: []u8) ![]u8 { /// On success, caller owns returned buffer. /// If the file is larger than `max_bytes`, returns `error.FileTooBig`. +/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `file_path` should be encoded as valid UTF-8. +/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. pub fn readFileAlloc(self: Dir, allocator: mem.Allocator, file_path: []const u8, max_bytes: usize) ![]u8 { return self.readFileAllocOptions(allocator, file_path, max_bytes, null, @alignOf(u8), null); } @@ -1772,6 +1823,9 @@ pub fn readFileAlloc(self: Dir, allocator: mem.Allocator, file_path: []const u8, /// If `size_hint` is specified the initial buffer size is calculated using /// that value, otherwise the effective file size is used instead. /// Allows specifying alignment and a sentinel value. +/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `file_path` should be encoded as valid UTF-8. +/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. pub fn readFileAllocOptions( self: Dir, allocator: mem.Allocator, @@ -1811,9 +1865,13 @@ pub const DeleteTreeError = error{ /// This error is unreachable if `sub_path` does not contain a path separator. NotDir, - /// On Windows, file paths must be valid Unicode. + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, + /// On Windows, file paths cannot contain these characters: /// '/', '*', '?', '"', '<', '>', '|' BadPathName, @@ -1826,6 +1884,9 @@ pub const DeleteTreeError = error{ /// removes it. If it cannot be removed because it is a non-empty directory, /// this function recursively removes its entries and then tries again. /// This operation is not atomic on most file systems. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void { var initial_iterable_dir = (try self.deleteTreeOpenInitialSubpath(sub_path, .file)) orelse return; @@ -1879,6 +1940,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void { error.SystemResources, error.Unexpected, error.InvalidUtf8, + error.InvalidWtf8, error.BadPathName, error.NetworkNotFound, error.DeviceBusy, @@ -1910,6 +1972,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void { error.AccessDenied, error.InvalidUtf8, + error.InvalidWtf8, error.SymLinkLoop, error.NameTooLong, error.SystemResources, @@ -1973,6 +2036,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void { error.SystemResources, error.Unexpected, error.InvalidUtf8, + error.InvalidWtf8, error.BadPathName, error.NetworkNotFound, error.DeviceBusy, @@ -1994,6 +2058,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void { error.AccessDenied, error.InvalidUtf8, + error.InvalidWtf8, error.SymLinkLoop, error.NameTooLong, error.SystemResources, @@ -2022,6 +2087,9 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void { /// Like `deleteTree`, but only keeps one `Iterator` active at a time to minimize the function's stack size. /// This is slower than `deleteTree` but uses less stack space. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. pub fn deleteTreeMinStackSize(self: Dir, sub_path: []const u8) DeleteTreeError!void { return self.deleteTreeMinStackSizeWithKindHint(sub_path, .file); } @@ -2074,6 +2142,7 @@ fn deleteTreeMinStackSizeWithKindHint(self: Dir, sub_path: []const u8, kind_hint error.SystemResources, error.Unexpected, error.InvalidUtf8, + error.InvalidWtf8, error.BadPathName, error.NetworkNotFound, error.DeviceBusy, @@ -2102,6 +2171,7 @@ fn deleteTreeMinStackSizeWithKindHint(self: Dir, sub_path: []const u8, kind_hint error.AccessDenied, error.InvalidUtf8, + error.InvalidWtf8, error.SymLinkLoop, error.NameTooLong, error.SystemResources, @@ -2171,6 +2241,7 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File error.SystemResources, error.Unexpected, error.InvalidUtf8, + error.InvalidWtf8, error.BadPathName, error.DeviceBusy, error.NetworkNotFound, @@ -2189,6 +2260,7 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File error.AccessDenied, error.InvalidUtf8, + error.InvalidWtf8, error.SymLinkLoop, error.NameTooLong, error.SystemResources, @@ -2209,6 +2281,9 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File pub const WriteFileError = File.WriteError || File.OpenError; /// Deprecated: use `writeFile2`. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. pub fn writeFile(self: Dir, sub_path: []const u8, data: []const u8) WriteFileError!void { return writeFile2(self, .{ .sub_path = sub_path, @@ -2218,6 +2293,9 @@ pub fn writeFile(self: Dir, sub_path: []const u8, data: []const u8) WriteFileErr } pub const WriteFileOptions = struct { + /// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). + /// On WASI, `sub_path` should be encoded as valid UTF-8. + /// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. sub_path: []const u8, data: []const u8, flags: File.CreateFlags = .{}, @@ -2232,8 +2310,10 @@ pub fn writeFile2(self: Dir, options: WriteFileOptions) WriteFileError!void { pub const AccessError = posix.AccessError; -/// Test accessing `path`. -/// `path` is UTF-8-encoded. +/// Test accessing `sub_path`. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. /// Be careful of Time-Of-Check-Time-Of-Use race conditions when using this function. /// For example, instead of testing if a file exists and then opening it, just /// open it and handle the error for file not found. @@ -2268,9 +2348,9 @@ pub fn accessZ(self: Dir, sub_path: [*:0]const u8, flags: File.OpenFlags) Access } /// Same as `access` except asserts the target OS is Windows and the path parameter is -/// * WTF-16 encoded +/// * WTF-16 LE encoded /// * null-terminated -/// * NtDll prefixed +/// * relative or has the NT namespace prefix /// TODO currently this ignores `flags`. pub fn accessW(self: Dir, sub_path_w: [*:0]const u16, flags: File.OpenFlags) AccessError!void { _ = flags; @@ -2292,6 +2372,9 @@ pub const PrevStatus = enum { /// atime, and mode of the source file so that the next call to `updateFile` will not need a copy. /// Returns the previous status of the file before updating. /// If any of the directories do not exist for dest_path, they are created. +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn updateFile( source_dir: Dir, source_path: []const u8, @@ -2343,6 +2426,9 @@ pub const CopyFileError = File.OpenError || File.StatError || /// On Linux, until https://patchwork.kernel.org/patch/9636735/ is merged and readily available, /// there is a possibility of power loss or application termination leaving temporary files present /// in the same directory as dest_path. +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn copyFile( source_dir: Dir, source_path: []const u8, @@ -2430,6 +2516,9 @@ pub const AtomicFileOptions = struct { /// Always call `AtomicFile.deinit` to clean up, regardless of whether /// `AtomicFile.finish` succeeded. `dest_path` must remain valid until /// `AtomicFile.deinit` is called. +/// On Windows, `dest_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `dest_path` should be encoded as valid UTF-8. +/// On other platforms, `dest_path` is an opaque sequence of bytes with no particular encoding. pub fn atomicFile(self: Dir, dest_path: []const u8, options: AtomicFileOptions) !AtomicFile { if (fs.path.dirname(dest_path)) |dirname| { const dir = if (options.make_path) @@ -2461,6 +2550,9 @@ pub const StatFileError = File.OpenError || File.StatError || posix.FStatAtError /// Symlinks are followed. /// /// `sub_path` may be absolute, in which case `self` is ignored. +/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding. pub fn statFile(self: Dir, sub_path: []const u8) StatFileError!Stat { if (builtin.os.tag == .windows) { var file = try self.openFile(sub_path, .{}); diff --git a/lib/std/fs/File.zig b/lib/std/fs/File.zig index 20b3976ea6..669f1b72e3 100644 --- a/lib/std/fs/File.zig +++ b/lib/std/fs/File.zig @@ -40,8 +40,11 @@ pub const OpenError = error{ AccessDenied, PipeBusy, NameTooLong, - /// On Windows, file paths must be valid Unicode. + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, /// On Windows, file paths cannot contain these characters: /// '/', '*', '?', '"', '<', '>', '|' BadPathName, diff --git a/lib/std/fs/path.zig b/lib/std/fs/path.zig index 55d4490053..a0cba08673 100644 --- a/lib/std/fs/path.zig +++ b/lib/std/fs/path.zig @@ -1,3 +1,17 @@ +//! POSIX paths are arbitrary sequences of `u8` with no particular encoding. +//! +//! Windows paths are arbitrary sequences of `u16` (WTF-16). +//! For cross-platform APIs that deal with sequences of `u8`, Windows +//! paths are encoded by Zig as [WTF-8](https://simonsapin.github.io/wtf-8/). +//! WTF-8 is a superset of UTF-8 that allows encoding surrogate codepoints, +//! which enables lossless roundtripping when converting to/from WTF-16 +//! (as long as the WTF-8 encoded surrogate codepoints do not form a pair). +//! +//! WASI paths are sequences of valid Unicode scalar values, +//! which means that WASI is unable to handle paths that cannot be +//! encoded as well-formed UTF-8/UTF-16. +//! https://github.com/WebAssembly/wasi-filesystem/issues/17#issuecomment-1430639353 + const builtin = @import("builtin"); const std = @import("../std.zig"); const debug = std.debug; @@ -438,7 +452,7 @@ fn networkShareServersEql(ns1: []const u8, ns2: []const u8) bool { var it1 = mem.tokenizeScalar(u8, ns1, sep1); var it2 = mem.tokenizeScalar(u8, ns2, sep2); - return windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?); + return windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?); } fn compareDiskDesignators(kind: WindowsPath.Kind, p1: []const u8, p2: []const u8) bool { @@ -458,7 +472,7 @@ fn compareDiskDesignators(kind: WindowsPath.Kind, p1: []const u8, p2: []const u8 var it1 = mem.tokenizeScalar(u8, p1, sep1); var it2 = mem.tokenizeScalar(u8, p2, sep2); - return windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?) and windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?); + return windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?) and windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?); }, } } @@ -1099,7 +1113,7 @@ pub fn relativeWindows(allocator: Allocator, from: []const u8, to: []const u8) ! const from_component = from_it.next() orelse return allocator.dupe(u8, to_it.rest()); const to_rest = to_it.rest(); if (to_it.next()) |to_component| { - if (windows.eqlIgnoreCaseUtf8(from_component, to_component)) + if (windows.eqlIgnoreCaseWtf8(from_component, to_component)) continue; } var up_index_end = "..".len; @@ -1564,14 +1578,14 @@ pub fn ComponentIterator(comptime path_type: PathType, comptime T: type) type { }; } -pub const NativeUtf8ComponentIterator = ComponentIterator(switch (native_os) { +pub const NativeComponentIterator = ComponentIterator(switch (native_os) { .windows => .windows, .uefi => .uefi, else => .posix, }, u8); -pub fn componentIterator(path: []const u8) !NativeUtf8ComponentIterator { - return NativeUtf8ComponentIterator.init(path); +pub fn componentIterator(path: []const u8) !NativeComponentIterator { + return NativeComponentIterator.init(path); } test "ComponentIterator posix" { @@ -1826,7 +1840,7 @@ test "ComponentIterator windows" { } } -test "ComponentIterator windows UTF-16" { +test "ComponentIterator windows WTF-16" { // TODO: Fix on big endian architectures if (builtin.cpu.arch.endian() != .little) { return error.SkipZigTest; diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 312e0c398f..33bea3c322 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -26,39 +26,39 @@ const PathType = enum { } pub const TransformError = std.os.RealPathError || error{OutOfMemory}; - pub const TransformFn = fn (allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8; + pub const TransformFn = fn (allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8; pub fn getTransformFn(comptime path_type: PathType) TransformFn { switch (path_type) { .relative => return struct { - fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 { + fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 { _ = allocator; _ = dir; return relative_path; } }.transform, .absolute => return struct { - fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 { + fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 { // The final path may not actually exist which would cause realpath to fail. // So instead, we get the path of the dir and join it with the relative path. var fd_path_buf: [fs.MAX_PATH_BYTES]u8 = undefined; const dir_path = try os.getFdPath(dir.fd, &fd_path_buf); - return fs.path.join(allocator, &.{ dir_path, relative_path }); + return fs.path.joinZ(allocator, &.{ dir_path, relative_path }); } }.transform, .unc => return struct { - fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 { + fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 { // Any drive absolute path (C:\foo) can be converted into a UNC path by // using '127.0.0.1' as the server name and '<drive letter>$' as the share name. var fd_path_buf: [fs.MAX_PATH_BYTES]u8 = undefined; const dir_path = try os.getFdPath(dir.fd, &fd_path_buf); const windows_path_type = std.os.windows.getUnprefixedPathType(u8, dir_path); switch (windows_path_type) { - .unc_absolute => return fs.path.join(allocator, &.{ dir_path, relative_path }), + .unc_absolute => return fs.path.joinZ(allocator, &.{ dir_path, relative_path }), .drive_absolute => { // `C:\<...>` -> `\\127.0.0.1\C$\<...>` const prepended = "\\\\127.0.0.1\\"; - var path = try fs.path.join(allocator, &.{ prepended, dir_path, relative_path }); + var path = try fs.path.joinZ(allocator, &.{ prepended, dir_path, relative_path }); path[prepended.len + 1] = '$'; return path; }, @@ -96,7 +96,7 @@ const TestContext = struct { /// Returns the `relative_path` transformed into the TestContext's `path_type`. /// The result is allocated by the TestContext's arena and will be free'd during /// `TestContext.deinit`. - pub fn transformPath(self: *TestContext, relative_path: []const u8) ![]const u8 { + pub fn transformPath(self: *TestContext, relative_path: [:0]const u8) ![:0]const u8 { return self.transform_fn(self.arena.allocator(), self.dir, relative_path); } }; @@ -1001,6 +1001,16 @@ test "openSelfExe" { self_exe_file.close(); } +test "selfExePath" { + if (builtin.os.tag == .wasi) return error.SkipZigTest; + + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const buf_self_exe_path = try std.fs.selfExePath(&buf); + const alloc_self_exe_path = try std.fs.selfExePathAlloc(testing.allocator); + defer testing.allocator.free(alloc_self_exe_path); + try testing.expectEqualSlices(u8, buf_self_exe_path, alloc_self_exe_path); +} + test "deleteTree does not follow symlinks" { var tmp = tmpDir(.{}); defer tmp.cleanup(); @@ -1907,3 +1917,111 @@ test "delete a setAsCwd directory on Windows" { // Close the parent "tmp" so we don't leak the HANDLE. tmp.parent_dir.close(); } + +test "invalid UTF-8/WTF-8 paths" { + const expected_err = switch (builtin.os.tag) { + .wasi => error.InvalidUtf8, + .windows => error.InvalidWtf8, + else => return error.SkipZigTest, + }; + + try testWithAllSupportedPathTypes(struct { + fn impl(ctx: *TestContext) !void { + // This is both invalid UTF-8 and WTF-8, since \xFF is an invalid start byte + const invalid_path = try ctx.transformPath("\xFF"); + + try testing.expectError(expected_err, ctx.dir.openFile(invalid_path, .{})); + try testing.expectError(expected_err, ctx.dir.openFileZ(invalid_path, .{})); + + try testing.expectError(expected_err, ctx.dir.createFile(invalid_path, .{})); + try testing.expectError(expected_err, ctx.dir.createFileZ(invalid_path, .{})); + + try testing.expectError(expected_err, ctx.dir.makeDir(invalid_path)); + try testing.expectError(expected_err, ctx.dir.makeDirZ(invalid_path)); + + try testing.expectError(expected_err, ctx.dir.makePath(invalid_path)); + try testing.expectError(expected_err, ctx.dir.makeOpenPath(invalid_path, .{})); + + try testing.expectError(expected_err, ctx.dir.openDir(invalid_path, .{})); + try testing.expectError(expected_err, ctx.dir.openDirZ(invalid_path, .{})); + + try testing.expectError(expected_err, ctx.dir.deleteFile(invalid_path)); + try testing.expectError(expected_err, ctx.dir.deleteFileZ(invalid_path)); + + try testing.expectError(expected_err, ctx.dir.deleteDir(invalid_path)); + try testing.expectError(expected_err, ctx.dir.deleteDirZ(invalid_path)); + + try testing.expectError(expected_err, ctx.dir.rename(invalid_path, invalid_path)); + try testing.expectError(expected_err, ctx.dir.renameZ(invalid_path, invalid_path)); + + try testing.expectError(expected_err, ctx.dir.symLink(invalid_path, invalid_path, .{})); + try testing.expectError(expected_err, ctx.dir.symLinkZ(invalid_path, invalid_path, .{})); + if (builtin.os.tag == .wasi) { + try testing.expectError(expected_err, ctx.dir.symLinkWasi(invalid_path, invalid_path, .{})); + } + + try testing.expectError(expected_err, ctx.dir.readLink(invalid_path, &[_]u8{})); + try testing.expectError(expected_err, ctx.dir.readLinkZ(invalid_path, &[_]u8{})); + if (builtin.os.tag == .wasi) { + try testing.expectError(expected_err, ctx.dir.readLinkWasi(invalid_path, &[_]u8{})); + } + + try testing.expectError(expected_err, ctx.dir.readFile(invalid_path, &[_]u8{})); + try testing.expectError(expected_err, ctx.dir.readFileAlloc(testing.allocator, invalid_path, 0)); + + try testing.expectError(expected_err, ctx.dir.deleteTree(invalid_path)); + try testing.expectError(expected_err, ctx.dir.deleteTreeMinStackSize(invalid_path)); + + try testing.expectError(expected_err, ctx.dir.writeFile(invalid_path, "")); + try testing.expectError(expected_err, ctx.dir.writeFile2(.{ + .sub_path = invalid_path, + .data = "", + })); + + try testing.expectError(expected_err, ctx.dir.access(invalid_path, .{})); + try testing.expectError(expected_err, ctx.dir.accessZ(invalid_path, .{})); + + try testing.expectError(expected_err, ctx.dir.updateFile(invalid_path, ctx.dir, invalid_path, .{})); + try testing.expectError(expected_err, ctx.dir.copyFile(invalid_path, ctx.dir, invalid_path, .{})); + + try testing.expectError(expected_err, ctx.dir.statFile(invalid_path)); + + if (builtin.os.tag != .wasi) { + try testing.expectError(expected_err, ctx.dir.realpath(invalid_path, &[_]u8{})); + try testing.expectError(expected_err, ctx.dir.realpathZ(invalid_path, &[_]u8{})); + try testing.expectError(expected_err, ctx.dir.realpathAlloc(testing.allocator, invalid_path)); + } + + try testing.expectError(expected_err, fs.rename(ctx.dir, invalid_path, ctx.dir, invalid_path)); + try testing.expectError(expected_err, fs.renameZ(ctx.dir, invalid_path, ctx.dir, invalid_path)); + + if (builtin.os.tag != .wasi and ctx.path_type != .relative) { + try testing.expectError(expected_err, fs.updateFileAbsolute(invalid_path, invalid_path, .{})); + try testing.expectError(expected_err, fs.copyFileAbsolute(invalid_path, invalid_path, .{})); + try testing.expectError(expected_err, fs.makeDirAbsolute(invalid_path)); + try testing.expectError(expected_err, fs.makeDirAbsoluteZ(invalid_path)); + try testing.expectError(expected_err, fs.deleteDirAbsolute(invalid_path)); + try testing.expectError(expected_err, fs.deleteDirAbsoluteZ(invalid_path)); + try testing.expectError(expected_err, fs.renameAbsolute(invalid_path, invalid_path)); + try testing.expectError(expected_err, fs.renameAbsoluteZ(invalid_path, invalid_path)); + try testing.expectError(expected_err, fs.openDirAbsolute(invalid_path, .{})); + try testing.expectError(expected_err, fs.openDirAbsoluteZ(invalid_path, .{})); + try testing.expectError(expected_err, fs.openFileAbsolute(invalid_path, .{})); + try testing.expectError(expected_err, fs.openFileAbsoluteZ(invalid_path, .{})); + try testing.expectError(expected_err, fs.accessAbsolute(invalid_path, .{})); + try testing.expectError(expected_err, fs.accessAbsoluteZ(invalid_path, .{})); + try testing.expectError(expected_err, fs.createFileAbsolute(invalid_path, .{})); + try testing.expectError(expected_err, fs.createFileAbsoluteZ(invalid_path, .{})); + try testing.expectError(expected_err, fs.deleteFileAbsolute(invalid_path)); + try testing.expectError(expected_err, fs.deleteFileAbsoluteZ(invalid_path)); + try testing.expectError(expected_err, fs.deleteTreeAbsolute(invalid_path)); + var readlink_buf: [fs.MAX_PATH_BYTES]u8 = undefined; + try testing.expectError(expected_err, fs.readLinkAbsolute(invalid_path, &readlink_buf)); + try testing.expectError(expected_err, fs.readLinkAbsoluteZ(invalid_path, &readlink_buf)); + try testing.expectError(expected_err, fs.symLinkAbsolute(invalid_path, invalid_path, .{})); + try testing.expectError(expected_err, fs.symLinkAbsoluteZ(invalid_path, invalid_path, .{})); + try testing.expectError(expected_err, fs.realpathAlloc(testing.allocator, invalid_path)); + } + } + }.impl); +} diff --git a/lib/std/fs/watch.zig b/lib/std/fs/watch.zig deleted file mode 100644 index 3dd7e41b8d..0000000000 --- a/lib/std/fs/watch.zig +++ /dev/null @@ -1,719 +0,0 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const event = std.event; -const assert = std.debug.assert; -const testing = std.testing; -const os = std.os; -const mem = std.mem; -const windows = os.windows; -const Loop = event.Loop; -const fd_t = os.fd_t; -const File = std.fs.File; -const Allocator = mem.Allocator; - -const global_event_loop = Loop.instance orelse - @compileError("std.fs.Watch currently only works with event-based I/O"); - -const WatchEventId = enum { - CloseWrite, - Delete, -}; - -const WatchEventError = error{ - UserResourceLimitReached, - SystemResources, - AccessDenied, - Unexpected, // TODO remove this possibility -}; - -pub fn Watch(comptime V: type) type { - return struct { - channel: event.Channel(Event.Error!Event), - os_data: OsData, - allocator: Allocator, - - const OsData = switch (builtin.os.tag) { - // TODO https://github.com/ziglang/zig/issues/3778 - .macos, .freebsd, .netbsd, .dragonfly, .openbsd => KqOsData, - .linux => LinuxOsData, - .windows => WindowsOsData, - - else => @compileError("Unsupported OS"), - }; - - const KqOsData = struct { - table_lock: event.Lock, - file_table: FileTable, - - const FileTable = std.StringHashMapUnmanaged(*Put); - const Put = struct { - putter_frame: @Frame(kqPutEvents), - cancelled: bool = false, - value: V, - }; - }; - - const WindowsOsData = struct { - table_lock: event.Lock, - dir_table: DirTable, - cancelled: bool = false, - - const DirTable = std.StringHashMapUnmanaged(*Dir); - const FileTable = std.StringHashMapUnmanaged(V); - - const Dir = struct { - putter_frame: @Frame(windowsDirReader), - file_table: FileTable, - dir_handle: os.windows.HANDLE, - }; - }; - - const LinuxOsData = struct { - putter_frame: @Frame(linuxEventPutter), - inotify_fd: i32, - wd_table: WdTable, - table_lock: event.Lock, - cancelled: bool = false, - - const WdTable = std.AutoHashMapUnmanaged(i32, Dir); - const FileTable = std.StringHashMapUnmanaged(V); - - const Dir = struct { - dirname: []const u8, - file_table: FileTable, - }; - }; - - const Self = @This(); - - pub const Event = struct { - id: Id, - data: V, - dirname: []const u8, - basename: []const u8, - - pub const Id = WatchEventId; - pub const Error = WatchEventError; - }; - - pub fn init(allocator: Allocator, event_buf_count: usize) !*Self { - const self = try allocator.create(Self); - errdefer allocator.destroy(self); - - switch (builtin.os.tag) { - .linux => { - const inotify_fd = try os.inotify_init1(os.linux.IN_NONBLOCK | os.linux.IN_CLOEXEC); - errdefer os.close(inotify_fd); - - self.* = Self{ - .allocator = allocator, - .channel = undefined, - .os_data = OsData{ - .putter_frame = undefined, - .inotify_fd = inotify_fd, - .wd_table = OsData.WdTable.init(allocator), - .table_lock = event.Lock{}, - }, - }; - - const buf = try allocator.alloc(Event.Error!Event, event_buf_count); - self.channel.init(buf); - self.os_data.putter_frame = async self.linuxEventPutter(); - return self; - }, - - .windows => { - self.* = Self{ - .allocator = allocator, - .channel = undefined, - .os_data = OsData{ - .table_lock = event.Lock{}, - .dir_table = OsData.DirTable.init(allocator), - }, - }; - - const buf = try allocator.alloc(Event.Error!Event, event_buf_count); - self.channel.init(buf); - return self; - }, - - .macos, .freebsd, .netbsd, .dragonfly, .openbsd => { - self.* = Self{ - .allocator = allocator, - .channel = undefined, - .os_data = OsData{ - .table_lock = event.Lock{}, - .file_table = OsData.FileTable.init(allocator), - }, - }; - - const buf = try allocator.alloc(Event.Error!Event, event_buf_count); - self.channel.init(buf); - return self; - }, - else => @compileError("Unsupported OS"), - } - } - - pub fn deinit(self: *Self) void { - switch (builtin.os.tag) { - .macos, .freebsd, .netbsd, .dragonfly, .openbsd => { - var it = self.os_data.file_table.iterator(); - while (it.next()) |entry| { - const key = entry.key_ptr.*; - const value = entry.value_ptr.*; - value.cancelled = true; - // @TODO Close the fd here? - await value.putter_frame; - self.allocator.free(key); - self.allocator.destroy(value); - } - }, - .linux => { - self.os_data.cancelled = true; - { - // Remove all directory watches linuxEventPutter will take care of - // cleaning up the memory and closing the inotify fd. - var dir_it = self.os_data.wd_table.keyIterator(); - while (dir_it.next()) |wd_key| { - const rc = os.linux.inotify_rm_watch(self.os_data.inotify_fd, wd_key.*); - // Errno can only be EBADF, EINVAL if either the inotify fs or the wd are invalid - std.debug.assert(rc == 0); - } - } - await self.os_data.putter_frame; - }, - .windows => { - self.os_data.cancelled = true; - var dir_it = self.os_data.dir_table.iterator(); - while (dir_it.next()) |dir_entry| { - if (windows.kernel32.CancelIoEx(dir_entry.value.dir_handle, null) != 0) { - // We canceled the pending ReadDirectoryChangesW operation, but our - // frame is still suspending, now waiting indefinitely. - // Thus, it is safe to resume it ourslves - resume dir_entry.value.putter_frame; - } else { - std.debug.assert(windows.kernel32.GetLastError() == .NOT_FOUND); - // We are at another suspend point, we can await safely for the - // function to exit the loop - await dir_entry.value.putter_frame; - } - - self.allocator.free(dir_entry.key_ptr.*); - var file_it = dir_entry.value.file_table.keyIterator(); - while (file_it.next()) |file_entry| { - self.allocator.free(file_entry.*); - } - dir_entry.value.file_table.deinit(self.allocator); - self.allocator.destroy(dir_entry.value_ptr.*); - } - self.os_data.dir_table.deinit(self.allocator); - }, - else => @compileError("Unsupported OS"), - } - self.allocator.free(self.channel.buffer_nodes); - self.channel.deinit(); - self.allocator.destroy(self); - } - - pub fn addFile(self: *Self, file_path: []const u8, value: V) !?V { - switch (builtin.os.tag) { - .macos, .freebsd, .netbsd, .dragonfly, .openbsd => return addFileKEvent(self, file_path, value), - .linux => return addFileLinux(self, file_path, value), - .windows => return addFileWindows(self, file_path, value), - else => @compileError("Unsupported OS"), - } - } - - fn addFileKEvent(self: *Self, file_path: []const u8, value: V) !?V { - var realpath_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; - const realpath = try os.realpath(file_path, &realpath_buf); - - const held = self.os_data.table_lock.acquire(); - defer held.release(); - - const gop = try self.os_data.file_table.getOrPut(self.allocator, realpath); - errdefer assert(self.os_data.file_table.remove(realpath)); - if (gop.found_existing) { - const prev_value = gop.value_ptr.value; - gop.value_ptr.value = value; - return prev_value; - } - - gop.key_ptr.* = try self.allocator.dupe(u8, realpath); - errdefer self.allocator.free(gop.key_ptr.*); - gop.value_ptr.* = try self.allocator.create(OsData.Put); - errdefer self.allocator.destroy(gop.value_ptr.*); - gop.value_ptr.* = .{ - .putter_frame = undefined, - .value = value, - }; - - // @TODO Can I close this fd and get an error from bsdWaitKev? - const flags = if (comptime builtin.target.isDarwin()) os.O.SYMLINK | os.O.EVTONLY else 0; - const fd = try os.open(realpath, flags, 0); - gop.value_ptr.putter_frame = async self.kqPutEvents(fd, gop.key_ptr.*, gop.value_ptr.*); - return null; - } - - fn kqPutEvents(self: *Self, fd: os.fd_t, file_path: []const u8, put: *OsData.Put) void { - global_event_loop.beginOneEvent(); - defer { - global_event_loop.finishOneEvent(); - // @TODO: Remove this if we force close otherwise - os.close(fd); - } - - // We need to manually do a bsdWaitKev to access the fflags. - var resume_node = event.Loop.ResumeNode.Basic{ - .base = .{ - .id = .Basic, - .handle = @frame(), - .overlapped = event.Loop.ResumeNode.overlapped_init, - }, - .kev = undefined, - }; - - var kevs = [1]os.Kevent{undefined}; - const kev = &kevs[0]; - - while (!put.cancelled) { - kev.* = os.Kevent{ - .ident = @as(usize, @intCast(fd)), - .filter = os.EVFILT_VNODE, - .flags = os.EV_ADD | os.EV_ENABLE | os.EV_CLEAR | os.EV_ONESHOT | - os.NOTE_WRITE | os.NOTE_DELETE | os.NOTE_REVOKE, - .fflags = 0, - .data = 0, - .udata = @intFromPtr(&resume_node.base), - }; - suspend { - global_event_loop.beginOneEvent(); - errdefer global_event_loop.finishOneEvent(); - - const empty_kevs = &[0]os.Kevent{}; - _ = os.kevent(global_event_loop.os_data.kqfd, &kevs, empty_kevs, null) catch |err| switch (err) { - error.EventNotFound, - error.ProcessNotFound, - error.Overflow, - => unreachable, - error.AccessDenied, error.SystemResources => |e| { - self.channel.put(e); - continue; - }, - }; - } - - if (kev.flags & os.EV_ERROR != 0) { - self.channel.put(os.unexpectedErrno(os.errno(kev.data))); - continue; - } - - if (kev.fflags & os.NOTE_DELETE != 0 or kev.fflags & os.NOTE_REVOKE != 0) { - self.channel.put(Self.Event{ - .id = .Delete, - .data = put.value, - .dirname = std.fs.path.dirname(file_path) orelse "/", - .basename = std.fs.path.basename(file_path), - }); - } else if (kev.fflags & os.NOTE_WRITE != 0) { - self.channel.put(Self.Event{ - .id = .CloseWrite, - .data = put.value, - .dirname = std.fs.path.dirname(file_path) orelse "/", - .basename = std.fs.path.basename(file_path), - }); - } - } - } - - fn addFileLinux(self: *Self, file_path: []const u8, value: V) !?V { - const dirname = std.fs.path.dirname(file_path) orelse if (file_path[0] == '/') "/" else "."; - const basename = std.fs.path.basename(file_path); - - const wd = try os.inotify_add_watch( - self.os_data.inotify_fd, - dirname, - os.linux.IN_CLOSE_WRITE | os.linux.IN_ONLYDIR | os.linux.IN_DELETE | os.linux.IN_EXCL_UNLINK, - ); - // wd is either a newly created watch or an existing one. - - const held = self.os_data.table_lock.acquire(); - defer held.release(); - - const gop = try self.os_data.wd_table.getOrPut(self.allocator, wd); - errdefer assert(self.os_data.wd_table.remove(wd)); - if (!gop.found_existing) { - gop.value_ptr.* = OsData.Dir{ - .dirname = try self.allocator.dupe(u8, dirname), - .file_table = OsData.FileTable.init(self.allocator), - }; - } - - const dir = gop.value_ptr; - const file_table_gop = try dir.file_table.getOrPut(self.allocator, basename); - errdefer assert(dir.file_table.remove(basename)); - if (file_table_gop.found_existing) { - const prev_value = file_table_gop.value_ptr.*; - file_table_gop.value_ptr.* = value; - return prev_value; - } else { - file_table_gop.key_ptr.* = try self.allocator.dupe(u8, basename); - file_table_gop.value_ptr.* = value; - return null; - } - } - - fn addFileWindows(self: *Self, file_path: []const u8, value: V) !?V { - // TODO we might need to convert dirname and basename to canonical file paths ("short"?) - const dirname = std.fs.path.dirname(file_path) orelse if (file_path[0] == '/') "/" else "."; - var dirname_path_space: windows.PathSpace = undefined; - dirname_path_space.len = try std.unicode.utf8ToUtf16Le(&dirname_path_space.data, dirname); - dirname_path_space.data[dirname_path_space.len] = 0; - - const basename = std.fs.path.basename(file_path); - var basename_path_space: windows.PathSpace = undefined; - basename_path_space.len = try std.unicode.utf8ToUtf16Le(&basename_path_space.data, basename); - basename_path_space.data[basename_path_space.len] = 0; - - const held = self.os_data.table_lock.acquire(); - defer held.release(); - - const gop = try self.os_data.dir_table.getOrPut(self.allocator, dirname); - errdefer assert(self.os_data.dir_table.remove(dirname)); - if (gop.found_existing) { - const dir = gop.value_ptr.*; - - const file_gop = try dir.file_table.getOrPut(self.allocator, basename); - errdefer assert(dir.file_table.remove(basename)); - if (file_gop.found_existing) { - const prev_value = file_gop.value_ptr.*; - file_gop.value_ptr.* = value; - return prev_value; - } else { - file_gop.value_ptr.* = value; - file_gop.key_ptr.* = try self.allocator.dupe(u8, basename); - return null; - } - } else { - const dir_handle = try windows.OpenFile(dirname_path_space.span(), .{ - .dir = std.fs.cwd().fd, - .access_mask = windows.FILE_LIST_DIRECTORY, - .creation = windows.FILE_OPEN, - .io_mode = .evented, - .filter = .dir_only, - }); - errdefer windows.CloseHandle(dir_handle); - - const dir = try self.allocator.create(OsData.Dir); - errdefer self.allocator.destroy(dir); - - gop.key_ptr.* = try self.allocator.dupe(u8, dirname); - errdefer self.allocator.free(gop.key_ptr.*); - - dir.* = OsData.Dir{ - .file_table = OsData.FileTable.init(self.allocator), - .putter_frame = undefined, - .dir_handle = dir_handle, - }; - gop.value_ptr.* = dir; - try dir.file_table.put(self.allocator, try self.allocator.dupe(u8, basename), value); - dir.putter_frame = async self.windowsDirReader(dir, gop.key_ptr.*); - return null; - } - } - - fn windowsDirReader(self: *Self, dir: *OsData.Dir, dirname: []const u8) void { - defer os.close(dir.dir_handle); - var resume_node = Loop.ResumeNode.Basic{ - .base = Loop.ResumeNode{ - .id = .Basic, - .handle = @frame(), - .overlapped = windows.OVERLAPPED{ - .Internal = 0, - .InternalHigh = 0, - .DUMMYUNIONNAME = .{ - .DUMMYSTRUCTNAME = .{ - .Offset = 0, - .OffsetHigh = 0, - }, - }, - .hEvent = null, - }, - }, - }; - - var event_buf: [4096]u8 align(@alignOf(windows.FILE_NOTIFY_INFORMATION)) = undefined; - - global_event_loop.beginOneEvent(); - defer global_event_loop.finishOneEvent(); - - while (!self.os_data.cancelled) main_loop: { - suspend { - _ = windows.kernel32.ReadDirectoryChangesW( - dir.dir_handle, - &event_buf, - event_buf.len, - windows.FALSE, // watch subtree - windows.FILE_NOTIFY_CHANGE_FILE_NAME | windows.FILE_NOTIFY_CHANGE_DIR_NAME | - windows.FILE_NOTIFY_CHANGE_ATTRIBUTES | windows.FILE_NOTIFY_CHANGE_SIZE | - windows.FILE_NOTIFY_CHANGE_LAST_WRITE | windows.FILE_NOTIFY_CHANGE_LAST_ACCESS | - windows.FILE_NOTIFY_CHANGE_CREATION | windows.FILE_NOTIFY_CHANGE_SECURITY, - null, // number of bytes transferred (unused for async) - &resume_node.base.overlapped, - null, // completion routine - unused because we use IOCP - ); - } - - var bytes_transferred: windows.DWORD = undefined; - if (windows.kernel32.GetOverlappedResult( - dir.dir_handle, - &resume_node.base.overlapped, - &bytes_transferred, - windows.FALSE, - ) == 0) { - const potential_error = windows.kernel32.GetLastError(); - const err = switch (potential_error) { - .OPERATION_ABORTED, .IO_INCOMPLETE => err_blk: { - if (self.os_data.cancelled) - break :main_loop - else - break :err_blk windows.unexpectedError(potential_error); - }, - else => |err| windows.unexpectedError(err), - }; - self.channel.put(err); - } else { - var ptr: [*]u8 = &event_buf; - const end_ptr = ptr + bytes_transferred; - while (@intFromPtr(ptr) < @intFromPtr(end_ptr)) { - const ev = @as(*const windows.FILE_NOTIFY_INFORMATION, @ptrCast(ptr)); - const emit = switch (ev.Action) { - windows.FILE_ACTION_REMOVED => WatchEventId.Delete, - windows.FILE_ACTION_MODIFIED => .CloseWrite, - else => null, - }; - if (emit) |id| { - const basename_ptr = @as([*]u16, @ptrCast(ptr + @sizeOf(windows.FILE_NOTIFY_INFORMATION))); - const basename_utf16le = basename_ptr[0 .. ev.FileNameLength / 2]; - var basename_data: [std.fs.MAX_PATH_BYTES]u8 = undefined; - const basename = basename_data[0 .. std.unicode.utf16LeToUtf8(&basename_data, basename_utf16le) catch unreachable]; - - if (dir.file_table.getEntry(basename)) |entry| { - self.channel.put(Event{ - .id = id, - .data = entry.value_ptr.*, - .dirname = dirname, - .basename = entry.key_ptr.*, - }); - } - } - - if (ev.NextEntryOffset == 0) break; - ptr = @alignCast(ptr + ev.NextEntryOffset); - } - } - } - } - - pub fn removeFile(self: *Self, file_path: []const u8) !?V { - switch (builtin.os.tag) { - .linux => { - const dirname = std.fs.path.dirname(file_path) orelse if (file_path[0] == '/') "/" else "."; - const basename = std.fs.path.basename(file_path); - - const held = self.os_data.table_lock.acquire(); - defer held.release(); - - const dir = self.os_data.wd_table.get(dirname) orelse return null; - if (dir.file_table.fetchRemove(basename)) |file_entry| { - self.allocator.free(file_entry.key); - return file_entry.value; - } - return null; - }, - .windows => { - const dirname = std.fs.path.dirname(file_path) orelse if (file_path[0] == '/') "/" else "."; - const basename = std.fs.path.basename(file_path); - - const held = self.os_data.table_lock.acquire(); - defer held.release(); - - const dir = self.os_data.dir_table.get(dirname) orelse return null; - if (dir.file_table.fetchRemove(basename)) |file_entry| { - self.allocator.free(file_entry.key); - return file_entry.value; - } - return null; - }, - .macos, .freebsd, .netbsd, .dragonfly, .openbsd => { - var realpath_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; - const realpath = try os.realpath(file_path, &realpath_buf); - - const held = self.os_data.table_lock.acquire(); - defer held.release(); - - const entry = self.os_data.file_table.getEntry(realpath) orelse return null; - entry.value_ptr.cancelled = true; - // @TODO Close the fd here? - await entry.value_ptr.putter_frame; - self.allocator.free(entry.key_ptr.*); - self.allocator.destroy(entry.value_ptr.*); - - assert(self.os_data.file_table.remove(realpath)); - }, - else => @compileError("Unsupported OS"), - } - } - - fn linuxEventPutter(self: *Self) void { - global_event_loop.beginOneEvent(); - - defer { - std.debug.assert(self.os_data.wd_table.count() == 0); - self.os_data.wd_table.deinit(self.allocator); - os.close(self.os_data.inotify_fd); - self.allocator.free(self.channel.buffer_nodes); - self.channel.deinit(); - global_event_loop.finishOneEvent(); - } - - var event_buf: [4096]u8 align(@alignOf(os.linux.inotify_event)) = undefined; - - while (!self.os_data.cancelled) { - const bytes_read = global_event_loop.read(self.os_data.inotify_fd, &event_buf, false) catch unreachable; - - var ptr: [*]u8 = &event_buf; - const end_ptr = ptr + bytes_read; - while (@intFromPtr(ptr) < @intFromPtr(end_ptr)) { - const ev = @as(*const os.linux.inotify_event, @ptrCast(ptr)); - if (ev.mask & os.linux.IN_CLOSE_WRITE == os.linux.IN_CLOSE_WRITE) { - const basename_ptr = ptr + @sizeOf(os.linux.inotify_event); - const basename = std.mem.span(@as([*:0]u8, @ptrCast(basename_ptr))); - - const dir = &self.os_data.wd_table.get(ev.wd).?; - if (dir.file_table.getEntry(basename)) |file_value| { - self.channel.put(Event{ - .id = .CloseWrite, - .data = file_value.value_ptr.*, - .dirname = dir.dirname, - .basename = file_value.key_ptr.*, - }); - } - } else if (ev.mask & os.linux.IN_IGNORED == os.linux.IN_IGNORED) { - // Directory watch was removed - const held = self.os_data.table_lock.acquire(); - defer held.release(); - if (self.os_data.wd_table.fetchRemove(ev.wd)) |wd_entry| { - var file_it = wd_entry.value.file_table.keyIterator(); - while (file_it.next()) |file_entry| { - self.allocator.free(file_entry.*); - } - self.allocator.free(wd_entry.value.dirname); - wd_entry.value.file_table.deinit(self.allocator); - } - } else if (ev.mask & os.linux.IN_DELETE == os.linux.IN_DELETE) { - // File or directory was removed or deleted - const basename_ptr = ptr + @sizeOf(os.linux.inotify_event); - const basename = std.mem.span(@as([*:0]u8, @ptrCast(basename_ptr))); - - const dir = &self.os_data.wd_table.get(ev.wd).?; - if (dir.file_table.getEntry(basename)) |file_value| { - self.channel.put(Event{ - .id = .Delete, - .data = file_value.value_ptr.*, - .dirname = dir.dirname, - .basename = file_value.key_ptr.*, - }); - } - } - - ptr = @alignCast(ptr + @sizeOf(os.linux.inotify_event) + ev.len); - } - } - } - }; -} - -const test_tmp_dir = "std_event_fs_test"; - -test "write a file, watch it, write it again, delete it" { - if (!std.io.is_async) return error.SkipZigTest; - // TODO https://github.com/ziglang/zig/issues/1908 - if (builtin.single_threaded) return error.SkipZigTest; - - try std.fs.cwd().makePath(test_tmp_dir); - defer std.fs.cwd().deleteTree(test_tmp_dir) catch {}; - - return testWriteWatchWriteDelete(std.testing.allocator); -} - -fn testWriteWatchWriteDelete(allocator: Allocator) !void { - const file_path = try std.fs.path.join(allocator, &[_][]const u8{ test_tmp_dir, "file.txt" }); - defer allocator.free(file_path); - - const contents = - \\line 1 - \\line 2 - ; - const line2_offset = 7; - - // first just write then read the file - try std.fs.cwd().writeFile(file_path, contents); - - const read_contents = try std.fs.cwd().readFileAlloc(allocator, file_path, 1024 * 1024); - defer allocator.free(read_contents); - try testing.expectEqualSlices(u8, contents, read_contents); - - // now watch the file - var watch = try Watch(void).init(allocator, 0); - defer watch.deinit(); - - try testing.expect((try watch.addFile(file_path, {})) == null); - - var ev = async watch.channel.get(); - var ev_consumed = false; - defer if (!ev_consumed) { - _ = await ev; - }; - - // overwrite line 2 - const file = try std.fs.cwd().openFile(file_path, .{ .mode = .read_write }); - { - defer file.close(); - const write_contents = "lorem ipsum"; - var iovec = [_]os.iovec_const{.{ - .iov_base = write_contents, - .iov_len = write_contents.len, - }}; - _ = try file.pwritevAll(&iovec, line2_offset); - } - - switch ((try await ev).id) { - .CloseWrite => { - ev_consumed = true; - }, - .Delete => @panic("wrong event"), - } - - const contents_updated = try std.fs.cwd().readFileAlloc(allocator, file_path, 1024 * 1024); - defer allocator.free(contents_updated); - - try testing.expectEqualSlices(u8, - \\line 1 - \\lorem ipsum - , contents_updated); - - ev = async watch.channel.get(); - ev_consumed = false; - - try std.fs.cwd().deleteFile(file_path); - switch ((try await ev).id) { - .Delete => { - ev_consumed = true; - }, - .CloseWrite => @panic("wrong event"), - } -} - -// TODO Test: Add another file watch, remove the old file watch, get an event in the new diff --git a/lib/std/os.zig b/lib/std/os.zig index a31a2d943c..00496602fd 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -3,7 +3,7 @@ //! * Convert "errno"-style error codes into Zig errors. //! * When null-terminated byte buffers are required, provide APIs which accept //! slices as well as APIs which accept null-terminated byte buffers. Same goes -//! for UTF-16LE encoding. +//! for WTF-16LE encoding. //! * Where operating systems share APIs, e.g. POSIX, these thin wrappers provide //! cross platform abstracting. //! * When there exists a corresponding libc function and linking libc, the libc @@ -498,6 +498,7 @@ fn fchmodat2(dirfd: fd_t, path: []const u8, mode: mode_t, flags: u32) FChmodAtEr const stat = fstatatZ(pathfd, "", AT.EMPTY_PATH) catch |err| switch (err) { error.NameTooLong => unreachable, error.FileNotFound => unreachable, + error.InvalidUtf8 => unreachable, else => |e| return e, }; if ((stat.mode & S.IFMT) == S.IFLNK) @@ -1614,9 +1615,16 @@ pub const OpenError = error{ /// The underlying filesystem does not support file locks FileLocksNotSupported, + /// Path contains characters that are disallowed by the underlying filesystem. BadPathName, + + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, + /// On Windows, `\\server` or `\\server\share` was not found. NetworkNotFound, @@ -1634,6 +1642,9 @@ pub const OpenError = error{ } || UnexpectedError; /// Open and possibly create a file. Keeps trying if it gets interrupted. +/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `file_path` should be encoded as valid UTF-8. +/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. /// See also `openZ`. pub fn open(file_path: []const u8, flags: O, perm: mode_t) OpenError!fd_t { if (builtin.os.tag == .windows) { @@ -1646,6 +1657,9 @@ pub fn open(file_path: []const u8, flags: O, perm: mode_t) OpenError!fd_t { } /// Open and possibly create a file. Keeps trying if it gets interrupted. +/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `file_path` should be encoded as valid UTF-8. +/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. /// See also `open`. pub fn openZ(file_path: [*:0]const u8, flags: O, perm: mode_t) OpenError!fd_t { if (builtin.os.tag == .windows) { @@ -1687,6 +1701,9 @@ pub fn openZ(file_path: [*:0]const u8, flags: O, perm: mode_t) OpenError!fd_t { /// Open and possibly create a file. Keeps trying if it gets interrupted. /// `file_path` is relative to the open directory handle `dir_fd`. +/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `file_path` should be encoded as valid UTF-8. +/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. /// See also `openatZ`. pub fn openat(dir_fd: fd_t, file_path: []const u8, flags: O, mode: mode_t) OpenError!fd_t { if (builtin.os.tag == .windows) { @@ -1829,6 +1846,7 @@ pub fn openatWasi( .EXIST => return error.PathAlreadyExists, .BUSY => return error.DeviceBusy, .NOTCAPABLE => return error.AccessDenied, + .ILSEQ => return error.InvalidUtf8, else => |err| return unexpectedErrno(err), } } @@ -1836,6 +1854,9 @@ pub fn openatWasi( /// Open and possibly create a file. Keeps trying if it gets interrupted. /// `file_path` is relative to the open directory handle `dir_fd`. +/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `file_path` should be encoded as valid UTF-8. +/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. /// See also `openat`. pub fn openatZ(dir_fd: fd_t, file_path: [*:0]const u8, flags: O, mode: mode_t) OpenError!fd_t { if (builtin.os.tag == .windows) { @@ -2156,13 +2177,23 @@ pub const SymLinkError = error{ ReadOnlyFileSystem, NotDir, NameTooLong, + + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, + BadPathName, } || UnexpectedError; /// Creates a symbolic link named `sym_link_path` which contains the string `target_path`. /// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent /// one; the latter case is known as a dangling link. +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. /// If `sym_link_path` exists, it will not be overwritten. /// See also `symlinkZ. pub fn symlink(target_path: []const u8, sym_link_path: []const u8) SymLinkError!void { @@ -2200,6 +2231,10 @@ pub fn symlinkZ(target_path: [*:0]const u8, sym_link_path: [*:0]const u8) SymLin .NOMEM => return error.SystemResources, .NOSPC => return error.NoSpaceLeft, .ROFS => return error.ReadOnlyFileSystem, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } @@ -2208,6 +2243,9 @@ pub fn symlinkZ(target_path: [*:0]const u8, sym_link_path: [*:0]const u8) SymLin /// `target_path` **relative** to `newdirfd` directory handle. /// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent /// one; the latter case is known as a dangling link. +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. /// If `sym_link_path` exists, it will not be overwritten. /// See also `symlinkatWasi`, `symlinkatZ` and `symlinkatW`. pub fn symlinkat(target_path: []const u8, newdirfd: fd_t, sym_link_path: []const u8) SymLinkError!void { @@ -2242,6 +2280,7 @@ pub fn symlinkatWasi(target_path: []const u8, newdirfd: fd_t, sym_link_path: []c .NOSPC => return error.NoSpaceLeft, .ROFS => return error.ReadOnlyFileSystem, .NOTCAPABLE => return error.AccessDenied, + .ILSEQ => return error.InvalidUtf8, else => |err| return unexpectedErrno(err), } } @@ -2270,6 +2309,10 @@ pub fn symlinkatZ(target_path: [*:0]const u8, newdirfd: fd_t, sym_link_path: [*: .NOMEM => return error.SystemResources, .NOSPC => return error.NoSpaceLeft, .ROFS => return error.ReadOnlyFileSystem, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } @@ -2287,8 +2330,13 @@ pub const LinkError = UnexpectedError || error{ NoSpaceLeft, ReadOnlyFileSystem, NotSameFileSystem, + + /// WASI-only; file paths must be valid UTF-8. + InvalidUtf8, }; +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn linkZ(oldpath: [*:0]const u8, newpath: [*:0]const u8, flags: i32) LinkError!void { if (builtin.os.tag == .wasi and !builtin.link_libc) { return link(mem.sliceTo(oldpath, 0), mem.sliceTo(newpath, 0), flags); @@ -2310,10 +2358,16 @@ pub fn linkZ(oldpath: [*:0]const u8, newpath: [*:0]const u8, flags: i32) LinkErr .ROFS => return error.ReadOnlyFileSystem, .XDEV => return error.NotSameFileSystem, .INVAL => unreachable, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn link(oldpath: []const u8, newpath: []const u8, flags: i32) LinkError!void { if (builtin.os.tag == .wasi and !builtin.link_libc) { return linkat(wasi.AT.FDCWD, oldpath, wasi.AT.FDCWD, newpath, flags) catch |err| switch (err) { @@ -2328,6 +2382,8 @@ pub fn link(oldpath: []const u8, newpath: []const u8, flags: i32) LinkError!void pub const LinkatError = LinkError || error{NotDir}; +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn linkatZ( olddir: fd_t, oldpath: [*:0]const u8, @@ -2356,10 +2412,16 @@ pub fn linkatZ( .ROFS => return error.ReadOnlyFileSystem, .XDEV => return error.NotSameFileSystem, .INVAL => unreachable, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn linkat( olddir: fd_t, oldpath: []const u8, @@ -2399,6 +2461,7 @@ pub fn linkat( .ROFS => return error.ReadOnlyFileSystem, .XDEV => return error.NotSameFileSystem, .INVAL => unreachable, + .ILSEQ => return error.InvalidUtf8, else => |err| return unexpectedErrno(err), } } @@ -2422,9 +2485,13 @@ pub const UnlinkError = error{ SystemResources, ReadOnlyFileSystem, - /// On Windows, file paths must be valid Unicode. + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, + /// On Windows, file paths cannot contain these characters: /// '/', '*', '?', '"', '<', '>', '|' BadPathName, @@ -2434,6 +2501,9 @@ pub const UnlinkError = error{ } || UnexpectedError; /// Delete a name and possibly the file it refers to. +/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `file_path` should be encoded as valid UTF-8. +/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. /// See also `unlinkZ`. pub fn unlink(file_path: []const u8) UnlinkError!void { if (builtin.os.tag == .wasi and !builtin.link_libc) { @@ -2450,7 +2520,7 @@ pub fn unlink(file_path: []const u8) UnlinkError!void { } } -/// Same as `unlink` except the parameter is a null terminated UTF8-encoded string. +/// Same as `unlink` except the parameter is null terminated. pub fn unlinkZ(file_path: [*:0]const u8) UnlinkError!void { if (builtin.os.tag == .windows) { const file_path_w = try windows.cStrToPrefixedFileW(null, file_path); @@ -2473,11 +2543,15 @@ pub fn unlinkZ(file_path: [*:0]const u8) UnlinkError!void { .NOTDIR => return error.NotDir, .NOMEM => return error.SystemResources, .ROFS => return error.ReadOnlyFileSystem, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } -/// Windows-only. Same as `unlink` except the parameter is null-terminated, WTF16 encoded. +/// Windows-only. Same as `unlink` except the parameter is null-terminated, WTF16 LE encoded. pub fn unlinkW(file_path_w: []const u16) UnlinkError!void { windows.DeleteFile(file_path_w, .{ .dir = std.fs.cwd().fd }) catch |err| switch (err) { error.DirNotEmpty => unreachable, // we're not passing .remove_dir = true @@ -2491,6 +2565,9 @@ pub const UnlinkatError = UnlinkError || error{ }; /// Delete a file name and possibly the file it refers to, based on an open directory handle. +/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `file_path` should be encoded as valid UTF-8. +/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. /// Asserts that the path parameter has no null bytes. pub fn unlinkat(dirfd: fd_t, file_path: []const u8, flags: u32) UnlinkatError!void { if (builtin.os.tag == .windows) { @@ -2528,6 +2605,7 @@ pub fn unlinkatWasi(dirfd: fd_t, file_path: []const u8, flags: u32) UnlinkatErro .ROFS => return error.ReadOnlyFileSystem, .NOTEMPTY => return error.DirNotEmpty, .NOTCAPABLE => return error.AccessDenied, + .ILSEQ => return error.InvalidUtf8, .INVAL => unreachable, // invalid flags, or pathname has . as last component .BADF => unreachable, // always a race condition @@ -2560,6 +2638,10 @@ pub fn unlinkatZ(dirfd: fd_t, file_path_c: [*:0]const u8, flags: u32) UnlinkatEr .ROFS => return error.ReadOnlyFileSystem, .EXIST => return error.DirNotEmpty, .NOTEMPTY => return error.DirNotEmpty, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), .INVAL => unreachable, // invalid flags, or pathname has . as last component .BADF => unreachable, // always a race condition @@ -2568,7 +2650,7 @@ pub fn unlinkatZ(dirfd: fd_t, file_path_c: [*:0]const u8, flags: u32) UnlinkatEr } } -/// Same as `unlinkat` but `sub_path_w` is UTF16LE, NT prefixed. Windows only. +/// Same as `unlinkat` but `sub_path_w` is WTF16LE, NT prefixed. Windows only. pub fn unlinkatW(dirfd: fd_t, sub_path_w: []const u16, flags: u32) UnlinkatError!void { const remove_dir = (flags & AT.REMOVEDIR) != 0; return windows.DeleteFile(sub_path_w, .{ .dir = dirfd, .remove_dir = remove_dir }); @@ -2594,7 +2676,11 @@ pub const RenameError = error{ PathAlreadyExists, ReadOnlyFileSystem, RenameAcrossMountPoints, + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, BadPathName, NoDevice, SharingViolation, @@ -2610,6 +2696,9 @@ pub const RenameError = error{ } || UnexpectedError; /// Change the name or location of a file. +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn rename(old_path: []const u8, new_path: []const u8) RenameError!void { if (builtin.os.tag == .wasi and !builtin.link_libc) { return renameat(wasi.AT.FDCWD, old_path, wasi.AT.FDCWD, new_path); @@ -2624,7 +2713,7 @@ pub fn rename(old_path: []const u8, new_path: []const u8) RenameError!void { } } -/// Same as `rename` except the parameters are null-terminated byte arrays. +/// Same as `rename` except the parameters are null-terminated. pub fn renameZ(old_path: [*:0]const u8, new_path: [*:0]const u8) RenameError!void { if (builtin.os.tag == .windows) { const old_path_w = try windows.cStrToPrefixedFileW(null, old_path); @@ -2653,11 +2742,15 @@ pub fn renameZ(old_path: [*:0]const u8, new_path: [*:0]const u8) RenameError!voi .NOTEMPTY => return error.PathAlreadyExists, .ROFS => return error.ReadOnlyFileSystem, .XDEV => return error.RenameAcrossMountPoints, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } -/// Same as `rename` except the parameters are null-terminated UTF16LE encoded byte arrays. +/// Same as `rename` except the parameters are null-terminated and WTF16LE encoded. /// Assumes target is Windows. pub fn renameW(old_path: [*:0]const u16, new_path: [*:0]const u16) RenameError!void { const flags = windows.MOVEFILE_REPLACE_EXISTING | windows.MOVEFILE_WRITE_THROUGH; @@ -2665,6 +2758,9 @@ pub fn renameW(old_path: [*:0]const u16, new_path: [*:0]const u16) RenameError!v } /// Change the name or location of a file based on an open directory handle. +/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, both paths should be encoded as valid UTF-8. +/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding. pub fn renameat( old_dir_fd: fd_t, old_path: []const u8, @@ -2710,11 +2806,12 @@ pub fn renameatWasi(old: RelativePathWasi, new: RelativePathWasi) RenameError!vo .ROFS => return error.ReadOnlyFileSystem, .XDEV => return error.RenameAcrossMountPoints, .NOTCAPABLE => return error.AccessDenied, + .ILSEQ => return error.InvalidUtf8, else => |err| return unexpectedErrno(err), } } -/// Same as `renameat` except the parameters are null-terminated byte arrays. +/// Same as `renameat` except the parameters are null-terminated. pub fn renameatZ( old_dir_fd: fd_t, old_path: [*:0]const u8, @@ -2749,6 +2846,10 @@ pub fn renameatZ( .NOTEMPTY => return error.PathAlreadyExists, .ROFS => return error.ReadOnlyFileSystem, .XDEV => return error.RenameAcrossMountPoints, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } @@ -2860,6 +2961,9 @@ pub fn renameatW( } } +/// On Windows, `sub_dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `sub_dir_path` should be encoded as valid UTF-8. +/// On other platforms, `sub_dir_path` is an opaque sequence of bytes with no particular encoding. pub fn mkdirat(dir_fd: fd_t, sub_dir_path: []const u8, mode: u32) MakeDirError!void { if (builtin.os.tag == .windows) { const sub_dir_path_w = try windows.sliceToPrefixedFileW(dir_fd, sub_dir_path); @@ -2891,14 +2995,16 @@ pub fn mkdiratWasi(dir_fd: fd_t, sub_dir_path: []const u8, mode: u32) MakeDirErr .NOTDIR => return error.NotDir, .ROFS => return error.ReadOnlyFileSystem, .NOTCAPABLE => return error.AccessDenied, + .ILSEQ => return error.InvalidUtf8, else => |err| return unexpectedErrno(err), } } +/// Same as `mkdirat` except the parameters are null-terminated. pub fn mkdiratZ(dir_fd: fd_t, sub_dir_path: [*:0]const u8, mode: u32) MakeDirError!void { if (builtin.os.tag == .windows) { const sub_dir_path_w = try windows.cStrToPrefixedFileW(dir_fd, sub_dir_path); - return mkdiratW(dir_fd, sub_dir_path_w.span().ptr, mode); + return mkdiratW(dir_fd, sub_dir_path_w.span(), mode); } else if (builtin.os.tag == .wasi and !builtin.link_libc) { return mkdirat(dir_fd, mem.sliceTo(sub_dir_path, 0), mode); } @@ -2920,10 +3026,15 @@ pub fn mkdiratZ(dir_fd: fd_t, sub_dir_path: [*:0]const u8, mode: u32) MakeDirErr .ROFS => return error.ReadOnlyFileSystem, // dragonfly: when dir_fd is unlinked from filesystem .NOTCONN => return error.FileNotFound, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } +/// Windows-only. Same as `mkdirat` except the parameter WTF16 LE encoded. pub fn mkdiratW(dir_fd: fd_t, sub_path_w: []const u16, mode: u32) MakeDirError!void { _ = mode; const sub_dir_handle = windows.OpenFile(sub_path_w, .{ @@ -2955,7 +3066,11 @@ pub const MakeDirError = error{ NoSpaceLeft, NotDir, ReadOnlyFileSystem, + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, BadPathName, NoDevice, /// On Windows, `\\server` or `\\server\share` was not found. @@ -2964,6 +3079,9 @@ pub const MakeDirError = error{ /// Create a directory. /// `mode` is ignored on Windows and WASI. +/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `dir_path` should be encoded as valid UTF-8. +/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding. pub fn mkdir(dir_path: []const u8, mode: u32) MakeDirError!void { if (builtin.os.tag == .wasi and !builtin.link_libc) { return mkdirat(wasi.AT.FDCWD, dir_path, mode); @@ -2976,7 +3094,10 @@ pub fn mkdir(dir_path: []const u8, mode: u32) MakeDirError!void { } } -/// Same as `mkdir` but the parameter is a null-terminated UTF8-encoded string. +/// Same as `mkdir` but the parameter is null-terminated. +/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `dir_path` should be encoded as valid UTF-8. +/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding. pub fn mkdirZ(dir_path: [*:0]const u8, mode: u32) MakeDirError!void { if (builtin.os.tag == .windows) { const dir_path_w = try windows.cStrToPrefixedFileW(null, dir_path); @@ -2999,11 +3120,15 @@ pub fn mkdirZ(dir_path: [*:0]const u8, mode: u32) MakeDirError!void { .NOSPC => return error.NoSpaceLeft, .NOTDIR => return error.NotDir, .ROFS => return error.ReadOnlyFileSystem, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } -/// Windows-only. Same as `mkdir` but the parameters is WTF16 encoded. +/// Windows-only. Same as `mkdir` but the parameters is WTF16LE encoded. pub fn mkdirW(dir_path_w: []const u16, mode: u32) MakeDirError!void { _ = mode; const sub_dir_handle = windows.OpenFile(dir_path_w, .{ @@ -3031,13 +3156,20 @@ pub const DeleteDirError = error{ NotDir, DirNotEmpty, ReadOnlyFileSystem, + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, BadPathName, /// On Windows, `\\server` or `\\server\share` was not found. NetworkNotFound, } || UnexpectedError; /// Deletes an empty directory. +/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `dir_path` should be encoded as valid UTF-8. +/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding. pub fn rmdir(dir_path: []const u8) DeleteDirError!void { if (builtin.os.tag == .wasi and !builtin.link_libc) { return unlinkat(wasi.AT.FDCWD, dir_path, AT.REMOVEDIR) catch |err| switch (err) { @@ -3055,6 +3187,9 @@ pub fn rmdir(dir_path: []const u8) DeleteDirError!void { } /// Same as `rmdir` except the parameter is null-terminated. +/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `dir_path` should be encoded as valid UTF-8. +/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding. pub fn rmdirZ(dir_path: [*:0]const u8) DeleteDirError!void { if (builtin.os.tag == .windows) { const dir_path_w = try windows.cStrToPrefixedFileW(null, dir_path); @@ -3077,11 +3212,15 @@ pub fn rmdirZ(dir_path: [*:0]const u8) DeleteDirError!void { .EXIST => return error.DirNotEmpty, .NOTEMPTY => return error.DirNotEmpty, .ROFS => return error.ReadOnlyFileSystem, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } -/// Windows-only. Same as `rmdir` except the parameter is WTF16 encoded. +/// Windows-only. Same as `rmdir` except the parameter is WTF-16 LE encoded. pub fn rmdirW(dir_path_w: []const u16) DeleteDirError!void { return windows.DeleteFile(dir_path_w, .{ .dir = std.fs.cwd().fd, .remove_dir = true }) catch |err| switch (err) { error.IsDir => unreachable, @@ -3098,21 +3237,25 @@ pub const ChangeCurDirError = error{ SystemResources, NotDir, BadPathName, - - /// On Windows, file paths must be valid Unicode. + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, } || UnexpectedError; /// Changes the current working directory of the calling process. -/// `dir_path` is recommended to be a UTF-8 encoded string. +/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `dir_path` should be encoded as valid UTF-8. +/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding. pub fn chdir(dir_path: []const u8) ChangeCurDirError!void { if (builtin.os.tag == .wasi and !builtin.link_libc) { @compileError("WASI does not support os.chdir"); } else if (builtin.os.tag == .windows) { - var utf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined; - const len = try std.unicode.utf8ToUtf16Le(utf16_dir_path[0..], dir_path); - if (len > utf16_dir_path.len) return error.NameTooLong; - return chdirW(utf16_dir_path[0..len]); + var wtf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined; + const len = try std.unicode.wtf8ToWtf16Le(wtf16_dir_path[0..], dir_path); + if (len > wtf16_dir_path.len) return error.NameTooLong; + return chdirW(wtf16_dir_path[0..len]); } else { const dir_path_c = try toPosixPath(dir_path); return chdirZ(&dir_path_c); @@ -3120,12 +3263,15 @@ pub fn chdir(dir_path: []const u8) ChangeCurDirError!void { } /// Same as `chdir` except the parameter is null-terminated. +/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `dir_path` should be encoded as valid UTF-8. +/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding. pub fn chdirZ(dir_path: [*:0]const u8) ChangeCurDirError!void { if (builtin.os.tag == .windows) { - var utf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined; - const len = try std.unicode.utf8ToUtf16Le(utf16_dir_path[0..], mem.span(dir_path)); - if (len > utf16_dir_path.len) return error.NameTooLong; - return chdirW(utf16_dir_path[0..len]); + var wtf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined; + const len = try std.unicode.wtf8ToWtf16Le(wtf16_dir_path[0..], mem.span(dir_path)); + if (len > wtf16_dir_path.len) return error.NameTooLong; + return chdirW(wtf16_dir_path[0..len]); } else if (builtin.os.tag == .wasi and !builtin.link_libc) { return chdir(mem.span(dir_path)); } @@ -3139,11 +3285,15 @@ pub fn chdirZ(dir_path: [*:0]const u8) ChangeCurDirError!void { .NOENT => return error.FileNotFound, .NOMEM => return error.SystemResources, .NOTDIR => return error.NotDir, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } -/// Windows-only. Same as `chdir` except the parameter is WTF16 encoded. +/// Windows-only. Same as `chdir` except the parameter is WTF16 LE encoded. pub fn chdirW(dir_path: []const u16) ChangeCurDirError!void { windows.SetCurrentDirectory(dir_path) catch |err| switch (err) { error.NoDevice => return error.FileSystem, @@ -3183,7 +3333,11 @@ pub const ReadLinkError = error{ SystemResources, NotLink, NotDir, + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, BadPathName, /// Windows-only. This error may occur if the opened reparse point is /// of unsupported type. @@ -3193,7 +3347,13 @@ pub const ReadLinkError = error{ } || UnexpectedError; /// Read value of a symbolic link. +/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `file_path` should be encoded as valid UTF-8. +/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. /// The return value is a slice of `out_buffer` from index 0. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, the result is encoded as UTF-8. +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 { if (builtin.os.tag == .wasi and !builtin.link_libc) { return readlinkat(wasi.AT.FDCWD, file_path, out_buffer); @@ -3206,7 +3366,8 @@ pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 { } } -/// Windows-only. Same as `readlink` except `file_path` is WTF16 encoded. +/// Windows-only. Same as `readlink` except `file_path` is WTF16 LE encoded. +/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). /// See also `readlinkZ`. pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 { return windows.ReadLink(std.fs.cwd().fd, file_path, out_buffer); @@ -3215,7 +3376,7 @@ pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 { /// Same as `readlink` except `file_path` is null-terminated. pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 { if (builtin.os.tag == .windows) { - const file_path_w = try windows.cStrToWin32PrefixedFileW(file_path); + const file_path_w = try windows.cStrToPrefixedFileW(null, file_path); return readlinkW(file_path_w.span(), out_buffer); } else if (builtin.os.tag == .wasi and !builtin.link_libc) { return readlink(mem.sliceTo(file_path, 0), out_buffer); @@ -3232,12 +3393,22 @@ pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 .NOENT => return error.FileNotFound, .NOMEM => return error.SystemResources, .NOTDIR => return error.NotDir, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } /// Similar to `readlink` except reads value of a symbolink link **relative** to `dirfd` directory handle. +/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `file_path` should be encoded as valid UTF-8. +/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding. /// The return value is a slice of `out_buffer` from index 0. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, the result is encoded as UTF-8. +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. /// See also `readlinkatWasi`, `realinkatZ` and `realinkatW`. pub fn readlinkat(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 { if (builtin.os.tag == .wasi and !builtin.link_libc) { @@ -3267,11 +3438,13 @@ pub fn readlinkatWasi(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) Read .NOMEM => return error.SystemResources, .NOTDIR => return error.NotDir, .NOTCAPABLE => return error.AccessDenied, + .ILSEQ => return error.InvalidUtf8, else => |err| return unexpectedErrno(err), } } -/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 encoded. +/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 LE encoded. +/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). /// See also `readlinkat`. pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 { return windows.ReadLink(dirfd, file_path, out_buffer); @@ -3298,6 +3471,10 @@ pub fn readlinkatZ(dirfd: fd_t, file_path: [*:0]const u8, out_buffer: []u8) Read .NOENT => return error.FileNotFound, .NOMEM => return error.SystemResources, .NOTDIR => return error.NotDir, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } @@ -4274,10 +4451,18 @@ pub fn fstat_wasi(fd: fd_t) FStatError!wasi.filestat_t { } } -pub const FStatAtError = FStatError || error{ NameTooLong, FileNotFound, SymLinkLoop }; +pub const FStatAtError = FStatError || error{ + NameTooLong, + FileNotFound, + SymLinkLoop, + /// WASI-only; file paths must be valid UTF-8. + InvalidUtf8, +}; /// Similar to `fstat`, but returns stat of a resource pointed to by `pathname` /// which is relative to `dirfd` handle. +/// On WASI, `pathname` should be encoded as valid UTF-8. +/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding. /// See also `fstatatZ` and `fstatat_wasi`. pub fn fstatat(dirfd: fd_t, pathname: []const u8, flags: u32) FStatAtError!Stat { if (builtin.os.tag == .wasi and !builtin.link_libc) { @@ -4294,6 +4479,7 @@ pub fn fstatat(dirfd: fd_t, pathname: []const u8, flags: u32) FStatAtError!Stat } /// WASI-only. Same as `fstatat` but targeting WASI. +/// `pathname` should be encoded as valid UTF-8. /// See also `fstatat`. pub fn fstatat_wasi(dirfd: fd_t, pathname: []const u8, flags: wasi.lookupflags_t) FStatAtError!wasi.filestat_t { var stat: wasi.filestat_t = undefined; @@ -4308,6 +4494,7 @@ pub fn fstatat_wasi(dirfd: fd_t, pathname: []const u8, flags: wasi.lookupflags_t .NOENT => return error.FileNotFound, .NOTDIR => return error.FileNotFound, .NOTCAPABLE => return error.AccessDenied, + .ILSEQ => return error.InvalidUtf8, else => |err| return unexpectedErrno(err), } } @@ -4337,6 +4524,10 @@ pub fn fstatatZ(dirfd: fd_t, pathname: [*:0]const u8, flags: u32) FStatAtError!S .LOOP => return error.SymLinkLoop, .NOENT => return error.FileNotFound, .NOTDIR => return error.FileNotFound, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } @@ -4693,12 +4884,17 @@ pub const AccessError = error{ FileBusy, SymLinkLoop, ReadOnlyFileSystem, - - /// On Windows, file paths must be valid Unicode. + /// WASI-only; file paths must be valid UTF-8. InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, } || UnexpectedError; /// check user's permissions for a file +/// On Windows, `path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `path` should be encoded as valid UTF-8. +/// On other platforms, `path` is an opaque sequence of bytes with no particular encoding. /// TODO currently this assumes `mode` is `F.OK` on Windows. pub fn access(path: []const u8, mode: u32) AccessError!void { if (builtin.os.tag == .windows) { @@ -4740,12 +4936,16 @@ pub fn accessZ(path: [*:0]const u8, mode: u32) AccessError!void { .FAULT => unreachable, .IO => return error.InputOutput, .NOMEM => return error.SystemResources, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } -/// Call from Windows-specific code if you already have a UTF-16LE encoded, null terminated string. -/// Otherwise use `access` or `accessC`. +/// Call from Windows-specific code if you already have a WTF-16LE encoded, null terminated string. +/// Otherwise use `access` or `accessZ`. /// TODO currently this ignores `mode`. pub fn accessW(path: [*:0]const u16, mode: u32) windows.GetFileAttributesError!void { _ = mode; @@ -4762,6 +4962,9 @@ pub fn accessW(path: [*:0]const u16, mode: u32) windows.GetFileAttributesError!v } /// Check user's permissions for a file, based on an open directory handle. +/// On Windows, `path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On WASI, `path` should be encoded as valid UTF-8. +/// On other platforms, `path` is an opaque sequence of bytes with no particular encoding. /// TODO currently this ignores `mode` and `flags` on Windows. pub fn faccessat(dirfd: fd_t, path: []const u8, mode: u32, flags: u32) AccessError!void { if (builtin.os.tag == .windows) { @@ -4832,6 +5035,10 @@ pub fn faccessatZ(dirfd: fd_t, path: [*:0]const u8, mode: u32, flags: u32) Acces .FAULT => unreachable, .IO => return error.InputOutput, .NOMEM => return error.SystemResources, + .ILSEQ => |err| if (builtin.os.tag == .wasi) + return error.InvalidUtf8 + else + return unexpectedErrno(err), else => |err| return unexpectedErrno(err), } } @@ -5339,8 +5546,9 @@ pub const RealPathError = error{ /// On WASI, the current CWD may not be associated with an absolute path. InvalidHandle, - /// On Windows, file paths must be valid Unicode. - InvalidUtf8, + /// Windows-only; file paths provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, /// On Windows, `\\server` or `\\server\share` was not found. NetworkNotFound, @@ -5362,8 +5570,12 @@ pub const RealPathError = error{ /// Return the canonicalized absolute pathname. /// Expands all symbolic links and resolves references to `.`, `..`, and /// extra `/` characters in `pathname`. +/// On Windows, `pathname` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding. /// The return value is a slice of `out_buffer`, but not necessarily from the beginning. /// See also `realpathZ` and `realpathW`. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. /// Calling this function is usually a bug. pub fn realpath(pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { if (builtin.os.tag == .windows) { @@ -5402,6 +5614,7 @@ pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealP error.WouldBlock => unreachable, error.FileBusy => unreachable, // not asking for write permissions error.InvalidHandle => unreachable, // WASI-only + error.InvalidUtf8 => unreachable, // WASI-only else => |e| return e, }; defer close(fd); @@ -5425,7 +5638,8 @@ pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealP return mem.sliceTo(result_path, 0); } -/// Same as `realpath` except `pathname` is UTF16LE-encoded. +/// Same as `realpath` except `pathname` is WTF16LE-encoded. +/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). /// Calling this function is usually a bug. pub fn realpathW(pathname: []const u16, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { const w = windows; @@ -5475,6 +5689,8 @@ pub fn isGetFdPathSupportedOnTarget(os: std.Target.Os) bool { /// This function is very host-specific and is not universally supported by all hosts. /// For example, while it generally works on Linux, macOS, FreeBSD or Windows, it is /// unsupported on WASI. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. /// Calling this function is usually a bug. pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { if (!comptime isGetFdPathSupportedOnTarget(builtin.os)) { @@ -5485,10 +5701,7 @@ pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { var wide_buf: [windows.PATH_MAX_WIDE]u16 = undefined; const wide_slice = try windows.GetFinalPathNameByHandle(fd, .{}, wide_buf[0..]); - // TODO: Windows file paths can be arbitrary arrays of u16 values - // and must not fail with InvalidUtf8. - const end_index = std.unicode.utf16leToUtf8(out_buffer, wide_slice) catch - return error.InvalidUtf8; + const end_index = std.unicode.wtf16LeToWtf8(out_buffer, wide_slice); return out_buffer[0..end_index]; }, .macos, .ios, .watchos, .tvos => { @@ -5512,8 +5725,12 @@ pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 { const target = readlinkZ(proc_path, out_buffer) catch |err| { switch (err) { - error.UnsupportedReparsePointType => unreachable, // Windows only, error.NotLink => unreachable, + error.BadPathName => unreachable, + error.InvalidUtf8 => unreachable, // WASI-only + error.InvalidWtf8 => unreachable, // Windows-only + error.UnsupportedReparsePointType => unreachable, // Windows-only + error.NetworkNotFound => unreachable, // Windows-only else => |e| return e, } }; diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 4ae2045ae6..deb903b283 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -1,8 +1,8 @@ //! This file contains thin wrappers around Windows-specific APIs, with these //! specific goals in mind: //! * Convert "errno"-style error codes into Zig errors. -//! * When null-terminated or UTF16LE byte buffers are required, provide APIs which accept -//! slices as well as APIs which accept null-terminated UTF16LE byte buffers. +//! * When null-terminated or WTF16LE byte buffers are required, provide APIs which accept +//! slices as well as APIs which accept null-terminated WTF16LE byte buffers. const builtin = @import("builtin"); const std = @import("../std.zig"); @@ -548,7 +548,6 @@ pub fn WriteFile( pub const SetCurrentDirectoryError = error{ NameTooLong, - InvalidUtf8, FileNotFound, NotDir, AccessDenied, @@ -587,24 +586,24 @@ pub const GetCurrentDirectoryError = error{ }; /// The result is a slice of `buffer`, indexed from 0. +/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). pub fn GetCurrentDirectory(buffer: []u8) GetCurrentDirectoryError![]u8 { - var utf16le_buf: [PATH_MAX_WIDE]u16 = undefined; - const result = kernel32.GetCurrentDirectoryW(utf16le_buf.len, &utf16le_buf); + var wtf16le_buf: [PATH_MAX_WIDE]u16 = undefined; + const result = kernel32.GetCurrentDirectoryW(wtf16le_buf.len, &wtf16le_buf); if (result == 0) { switch (kernel32.GetLastError()) { else => |err| return unexpectedError(err), } } - assert(result <= utf16le_buf.len); - const utf16le_slice = utf16le_buf[0..result]; - // Trust that Windows gives us valid UTF-16LE. + assert(result <= wtf16le_buf.len); + const wtf16le_slice = wtf16le_buf[0..result]; var end_index: usize = 0; - var it = std.unicode.Utf16LeIterator.init(utf16le_slice); - while (it.nextCodepoint() catch unreachable) |codepoint| { + var it = std.unicode.Wtf16LeIterator.init(wtf16le_slice); + while (it.nextCodepoint()) |codepoint| { const seq_len = std.unicode.utf8CodepointSequenceLength(codepoint) catch unreachable; if (end_index + seq_len >= buffer.len) return error.NameTooLong; - end_index += std.unicode.utf8Encode(codepoint, buffer[end_index..]) catch unreachable; + end_index += std.unicode.wtf8Encode(codepoint, buffer[end_index..]) catch unreachable; } return buffer[0..end_index]; } @@ -812,6 +811,8 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin } } +/// Asserts that there is enough space is `out_buffer`. +/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u8 { const win32_namespace_path = path: { if (is_relative) break :path path; @@ -821,7 +822,7 @@ fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u }; break :path win32_path.span(); }; - const out_len = std.unicode.utf16LeToUtf8(out_buffer, win32_namespace_path) catch unreachable; + const out_len = std.unicode.wtf16LeToWtf8(out_buffer, win32_namespace_path); return out_buffer[0..out_len]; } @@ -1942,13 +1943,13 @@ pub fn eqlIgnoreCaseWTF16(a: []const u16, b: []const u16) bool { if (@inComptime() or builtin.os.tag != .windows) { // This function compares the strings code unit by code unit (aka u16-to-u16), // so any length difference implies inequality. In other words, there's no possible - // conversion that changes the number of UTF-16 code units needed for the uppercase/lowercase + // conversion that changes the number of WTF-16 code units needed for the uppercase/lowercase // version in the conversion table since only codepoints <= max(u16) are eligible // for conversion at all. if (a.len != b.len) return false; for (a, b) |a_c, b_c| { - // The slices are always UTF-16 LE, so need to convert the elements to native + // The slices are always WTF-16 LE, so need to convert the elements to native // endianness for the uppercasing const a_c_native = std.mem.littleToNative(u16, a_c); const b_c_native = std.mem.littleToNative(u16, b_c); @@ -1975,18 +1976,18 @@ pub fn eqlIgnoreCaseWTF16(a: []const u16, b: []const u16) bool { return ntdll.RtlEqualUnicodeString(&a_string, &b_string, TRUE) == TRUE; } -/// Compares two UTF-8 strings using the equivalent functionality of +/// Compares two WTF-8 strings using the equivalent functionality of /// `RtlEqualUnicodeString` (with case insensitive comparison enabled). /// This function can be called on any target. -/// Assumes `a` and `b` are valid UTF-8. -pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool { +/// Assumes `a` and `b` are valid WTF-8. +pub fn eqlIgnoreCaseWtf8(a: []const u8, b: []const u8) bool { // A length equality check is not possible here because there are // some codepoints that have a different length uppercase UTF-8 representations // than their lowercase counterparts, e.g. U+0250 (2 bytes) <-> U+2C6F (3 bytes). // There are 7 such codepoints in the uppercase data used by Windows. - var a_utf8_it = std.unicode.Utf8View.initUnchecked(a).iterator(); - var b_utf8_it = std.unicode.Utf8View.initUnchecked(b).iterator(); + var a_wtf8_it = std.unicode.Wtf8View.initUnchecked(a).iterator(); + var b_wtf8_it = std.unicode.Wtf8View.initUnchecked(b).iterator(); // Use RtlUpcaseUnicodeChar on Windows when not in comptime to avoid including a // redundant copy of the uppercase data. @@ -1996,8 +1997,8 @@ pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool { }; while (true) { - const a_cp = a_utf8_it.nextCodepoint() orelse break; - const b_cp = b_utf8_it.nextCodepoint() orelse return false; + const a_cp = a_wtf8_it.nextCodepoint() orelse break; + const b_cp = b_wtf8_it.nextCodepoint() orelse return false; if (a_cp <= std.math.maxInt(u16) and b_cp <= std.math.maxInt(u16)) { if (a_cp != b_cp and upcaseImpl(@intCast(a_cp)) != upcaseImpl(@intCast(b_cp))) { @@ -2008,26 +2009,26 @@ pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool { } } // Make sure there are no leftover codepoints in b - if (b_utf8_it.nextCodepoint() != null) return false; + if (b_wtf8_it.nextCodepoint() != null) return false; return true; } fn testEqlIgnoreCase(comptime expect_eql: bool, comptime a: []const u8, comptime b: []const u8) !void { - try std.testing.expectEqual(expect_eql, eqlIgnoreCaseUtf8(a, b)); + try std.testing.expectEqual(expect_eql, eqlIgnoreCaseWtf8(a, b)); try std.testing.expectEqual(expect_eql, eqlIgnoreCaseWTF16( std.unicode.utf8ToUtf16LeStringLiteral(a), std.unicode.utf8ToUtf16LeStringLiteral(b), )); - try comptime std.testing.expect(expect_eql == eqlIgnoreCaseUtf8(a, b)); + try comptime std.testing.expect(expect_eql == eqlIgnoreCaseWtf8(a, b)); try comptime std.testing.expect(expect_eql == eqlIgnoreCaseWTF16( std.unicode.utf8ToUtf16LeStringLiteral(a), std.unicode.utf8ToUtf16LeStringLiteral(b), )); } -test "eqlIgnoreCaseWTF16/Utf8" { +test "eqlIgnoreCaseWTF16/Wtf8" { try testEqlIgnoreCase(true, "\x01 a B Λ ɐ", "\x01 A b λ Ɐ"); // does not do case-insensitive comparison for codepoints >= U+10000 try testEqlIgnoreCase(false, "𐓏", "𐓷"); @@ -2117,20 +2118,32 @@ pub fn normalizePath(comptime T: type, path: []T) RemoveDotDirsError!usize { return prefix_len + try removeDotDirsSanitized(T, path[prefix_len..new_len]); } +pub const Wtf8ToPrefixedFileWError = error{InvalidWtf8} || Wtf16ToPrefixedFileWError; + /// Same as `sliceToPrefixedFileW` but accepts a pointer -/// to a null-terminated path. -pub fn cStrToPrefixedFileW(dir: ?HANDLE, s: [*:0]const u8) !PathSpace { +/// to a null-terminated WTF-8 encoded path. +/// https://simonsapin.github.io/wtf-8/ +pub fn cStrToPrefixedFileW(dir: ?HANDLE, s: [*:0]const u8) Wtf8ToPrefixedFileWError!PathSpace { return sliceToPrefixedFileW(dir, mem.sliceTo(s, 0)); } -/// Same as `wToPrefixedFileW` but accepts a UTF-8 encoded path. -pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) !PathSpace { +/// Same as `wToPrefixedFileW` but accepts a WTF-8 encoded path. +/// https://simonsapin.github.io/wtf-8/ +pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) Wtf8ToPrefixedFileWError!PathSpace { var temp_path: PathSpace = undefined; - temp_path.len = try std.unicode.utf8ToUtf16Le(&temp_path.data, path); + temp_path.len = try std.unicode.wtf8ToWtf16Le(&temp_path.data, path); temp_path.data[temp_path.len] = 0; return wToPrefixedFileW(dir, temp_path.span()); } +pub const Wtf16ToPrefixedFileWError = error{ + AccessDenied, + BadPathName, + FileNotFound, + NameTooLong, + Unexpected, +}; + /// Converts the `path` to WTF16, null-terminated. If the path contains any /// namespace prefix, or is anything but a relative path (rooted, drive relative, /// etc) the result will have the NT-style prefix `\??\`. @@ -2142,7 +2155,7 @@ pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) !PathSpace { /// is non-null, or the CWD if it is null. /// - Special case device names like COM1, NUL, etc are not handled specially (TODO) /// - . and space are not stripped from the end of relative paths (potential TODO) -pub fn wToPrefixedFileW(dir: ?HANDLE, path: [:0]const u16) !PathSpace { +pub fn wToPrefixedFileW(dir: ?HANDLE, path: [:0]const u16) Wtf16ToPrefixedFileWError!PathSpace { const nt_prefix = [_]u16{ '\\', '?', '?', '\\' }; switch (getNamespacePrefix(u16, path)) { // TODO: Figure out a way to design an API that can avoid the copy for .nt, @@ -2312,7 +2325,7 @@ pub const NamespacePrefix = enum { nt, }; -/// If `T` is `u16`, then `path` should be encoded as UTF-16LE. +/// If `T` is `u16`, then `path` should be encoded as WTF-16LE. pub fn getNamespacePrefix(comptime T: type, path: []const T) NamespacePrefix { if (path.len < 4) return .none; var all_backslash = switch (mem.littleToNative(T, path[0])) { @@ -2366,7 +2379,7 @@ pub const UnprefixedPathType = enum { /// Get the path type of a path that is known to not have any namespace prefixes /// (`\\?\`, `\\.\`, `\??\`). -/// If `T` is `u16`, then `path` should be encoded as UTF-16LE. +/// If `T` is `u16`, then `path` should be encoded as WTF-16LE. pub fn getUnprefixedPathType(comptime T: type, path: []const T) UnprefixedPathType { if (path.len < 1) return .relative; @@ -2420,7 +2433,7 @@ test getUnprefixedPathType { /// Functionality is based on the ReactOS test cases found here: /// https://github.com/reactos/reactos/blob/master/modules/rostests/apitests/ntdll/RtlNtPathNameToDosPathName.c /// -/// `path` should be encoded as UTF-16LE. +/// `path` should be encoded as WTF-16LE. pub fn ntToWin32Namespace(path: []const u16) !PathSpace { if (path.len > PATH_MAX_WIDE) return error.NameTooLong; @@ -2530,7 +2543,6 @@ pub fn unexpectedError(err: Win32Error) std.os.UnexpectedError { if (std.os.unexpected_error_tracing) { // 614 is the length of the longest windows error description var buf_wstr: [614]WCHAR = undefined; - var buf_utf8: [614]u8 = undefined; const len = kernel32.FormatMessageW( FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, null, @@ -2540,8 +2552,10 @@ pub fn unexpectedError(err: Win32Error) std.os.UnexpectedError { buf_wstr.len, null, ); - _ = std.unicode.utf16LeToUtf8(&buf_utf8, buf_wstr[0..len]) catch unreachable; - std.debug.print("error.Unexpected: GetLastError({}): {s}\n", .{ @intFromEnum(err), buf_utf8[0..len] }); + std.debug.print("error.Unexpected: GetLastError({}): {}\n", .{ + @intFromEnum(err), + std.unicode.fmtUtf16Le(buf_wstr[0..len]), + }); std.debug.dumpCurrentStackTrace(@returnAddress()); } return error.Unexpected; diff --git a/lib/std/process.zig b/lib/std/process.zig index 23db567057..98a564a250 100644 --- a/lib/std/process.zig +++ b/lib/std/process.zig @@ -16,11 +16,15 @@ pub const changeCurDir = os.chdir; pub const changeCurDirC = os.chdirC; /// The result is a slice of `out_buffer`, from index `0`. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. pub fn getCwd(out_buffer: []u8) ![]u8 { return os.getcwd(out_buffer); } /// Caller must free the returned memory. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. pub fn getCwdAlloc(allocator: Allocator) ![]u8 { // The use of MAX_PATH_BYTES here is just a heuristic: most paths will fit // in stack_buf, avoiding an extra allocation in the common case. @@ -76,7 +80,7 @@ pub const EnvMap = struct { _ = self; if (builtin.os.tag == .windows) { var h = std.hash.Wyhash.init(0); - var it = std.unicode.Utf8View.initUnchecked(s).iterator(); + var it = std.unicode.Wtf8View.initUnchecked(s).iterator(); while (it.nextCodepoint()) |cp| { const cp_upper = upcase(cp); h.update(&[_]u8{ @@ -93,8 +97,8 @@ pub const EnvMap = struct { pub fn eql(self: @This(), a: []const u8, b: []const u8) bool { _ = self; if (builtin.os.tag == .windows) { - var it_a = std.unicode.Utf8View.initUnchecked(a).iterator(); - var it_b = std.unicode.Utf8View.initUnchecked(b).iterator(); + var it_a = std.unicode.Wtf8View.initUnchecked(a).iterator(); + var it_b = std.unicode.Wtf8View.initUnchecked(b).iterator(); while (true) { const c_a = it_a.nextCodepoint() orelse break; const c_b = it_b.nextCodepoint() orelse return false; @@ -129,8 +133,9 @@ pub const EnvMap = struct { /// Same as `put` but the key and value become owned by the EnvMap rather /// than being copied. /// If `putMove` fails, the ownership of key and value does not transfer. - /// On Windows `key` must be a valid UTF-8 string. + /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string. pub fn putMove(self: *EnvMap, key: []u8, value: []u8) !void { + assert(std.unicode.wtf8ValidateSlice(key)); const get_or_put = try self.hash_map.getOrPut(key); if (get_or_put.found_existing) { self.free(get_or_put.key_ptr.*); @@ -141,8 +146,9 @@ pub const EnvMap = struct { } /// `key` and `value` are copied into the EnvMap. - /// On Windows `key` must be a valid UTF-8 string. + /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string. pub fn put(self: *EnvMap, key: []const u8, value: []const u8) !void { + assert(std.unicode.wtf8ValidateSlice(key)); const value_copy = try self.copy(value); errdefer self.free(value_copy); const get_or_put = try self.hash_map.getOrPut(key); @@ -159,23 +165,26 @@ pub const EnvMap = struct { /// Find the address of the value associated with a key. /// The returned pointer is invalidated if the map resizes. - /// On Windows `key` must be a valid UTF-8 string. + /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string. pub fn getPtr(self: EnvMap, key: []const u8) ?*[]const u8 { + assert(std.unicode.wtf8ValidateSlice(key)); return self.hash_map.getPtr(key); } /// Return the map's copy of the value associated with /// a key. The returned string is invalidated if this /// key is removed from the map. - /// On Windows `key` must be a valid UTF-8 string. + /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string. pub fn get(self: EnvMap, key: []const u8) ?[]const u8 { + assert(std.unicode.wtf8ValidateSlice(key)); return self.hash_map.get(key); } /// Removes the item from the map and frees its value. /// This invalidates the value returned by get() for this key. - /// On Windows `key` must be a valid UTF-8 string. + /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string. pub fn remove(self: *EnvMap, key: []const u8) void { + assert(std.unicode.wtf8ValidateSlice(key)); const kv = self.hash_map.fetchRemove(key) orelse return; self.free(kv.key); self.free(kv.value); @@ -239,18 +248,34 @@ test "EnvMap" { try testing.expectEqual(@as(EnvMap.Size, 1), env.count()); - // test Unicode case-insensitivity on Windows if (builtin.os.tag == .windows) { + // test Unicode case-insensitivity on Windows try env.put("КИРиллИЦА", "something else"); try testing.expectEqualStrings("something else", env.get("кириллица").?); + + // and WTF-8 that's not valid UTF-8 + const wtf8_with_surrogate_pair = try std.unicode.wtf16LeToWtf8Alloc(testing.allocator, &[_]u16{ + std.mem.nativeToLittle(u16, 0xD83D), // unpaired high surrogate + }); + defer testing.allocator.free(wtf8_with_surrogate_pair); + + try env.put(wtf8_with_surrogate_pair, wtf8_with_surrogate_pair); + try testing.expectEqualSlices(u8, wtf8_with_surrogate_pair, env.get(wtf8_with_surrogate_pair).?); } } +pub const GetEnvMapError = error{ + OutOfMemory, + /// WASI-only. `environ_sizes_get` or `environ_get` + /// failed for an unexpected reason. + Unexpected, +}; + /// Returns a snapshot of the environment variables of the current process. /// Any modifications to the resulting EnvMap will not be reflected in the environment, and /// likewise, any future modifications to the environment will not be reflected in the EnvMap. /// Caller owns resulting `EnvMap` and should call its `deinit` fn when done. -pub fn getEnvMap(allocator: Allocator) !EnvMap { +pub fn getEnvMap(allocator: Allocator) GetEnvMapError!EnvMap { var result = EnvMap.init(allocator); errdefer result.deinit(); @@ -269,7 +294,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap { while (ptr[i] != 0 and ptr[i] != '=') : (i += 1) {} const key_w = ptr[key_start..i]; - const key = try std.unicode.utf16LeToUtf8Alloc(allocator, key_w); + const key = try std.unicode.wtf16LeToWtf8Alloc(allocator, key_w); errdefer allocator.free(key); if (ptr[i] == '=') i += 1; @@ -277,7 +302,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap { const value_start = i; while (ptr[i] != 0) : (i += 1) {} const value_w = ptr[value_start..i]; - const value = try std.unicode.utf16LeToUtf8Alloc(allocator, value_w); + const value = try std.unicode.wtf16LeToWtf8Alloc(allocator, value_w); errdefer allocator.free(value); i += 1; // skip over null byte @@ -355,25 +380,26 @@ pub const GetEnvVarOwnedError = error{ OutOfMemory, EnvironmentVariableNotFound, - /// See https://github.com/ziglang/zig/issues/1774 - InvalidUtf8, + /// On Windows, environment variable keys provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, }; /// Caller must free returned memory. +/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/), +/// then `error.InvalidWtf8` is returned. +/// On Windows, the value is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the value is an opaque sequence of bytes with no particular encoding. pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError![]u8 { if (builtin.os.tag == .windows) { const result_w = blk: { - const key_w = try std.unicode.utf8ToUtf16LeAllocZ(allocator, key); + const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(allocator, key); defer allocator.free(key_w); break :blk std.os.getenvW(key_w) orelse return error.EnvironmentVariableNotFound; }; - return std.unicode.utf16LeToUtf8Alloc(allocator, result_w) catch |err| switch (err) { - error.DanglingSurrogateHalf => return error.InvalidUtf8, - error.ExpectedSecondSurrogateHalf => return error.InvalidUtf8, - error.UnexpectedSecondSurrogateHalf => return error.InvalidUtf8, - else => |e| return e, - }; + // wtf16LeToWtf8Alloc can only fail with OutOfMemory + return std.unicode.wtf16LeToWtf8Alloc(allocator, result_w); } else if (builtin.os.tag == .wasi and !builtin.link_libc) { var envmap = getEnvMap(allocator) catch return error.OutOfMemory; defer envmap.deinit(); @@ -385,6 +411,7 @@ pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError } } +/// On Windows, `key` must be valid UTF-8. pub fn hasEnvVarConstant(comptime key: []const u8) bool { if (builtin.os.tag == .windows) { const key_w = comptime std.unicode.utf8ToUtf16LeStringLiteral(key); @@ -396,11 +423,22 @@ pub fn hasEnvVarConstant(comptime key: []const u8) bool { } } -pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool { +pub const HasEnvVarError = error{ + OutOfMemory, + + /// On Windows, environment variable keys provided by the user must be valid WTF-8. + /// https://simonsapin.github.io/wtf-8/ + InvalidWtf8, +}; + +/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/), +/// then `error.InvalidWtf8` is returned. +pub fn hasEnvVar(allocator: Allocator, key: []const u8) HasEnvVarError!bool { if (builtin.os.tag == .windows) { var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator); - const key_w = try std.unicode.utf8ToUtf16LeAllocZ(stack_alloc.get(), key); - defer stack_alloc.allocator.free(key_w); + const stack_allocator = stack_alloc.get(); + const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key); + defer stack_allocator.free(key_w); return std.os.getenvW(key_w) != null; } else if (builtin.os.tag == .wasi and !builtin.link_libc) { var envmap = getEnvMap(allocator) catch return error.OutOfMemory; @@ -411,9 +449,22 @@ pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool } } -test "os.getEnvVarOwned" { - const ga = std.testing.allocator; - try testing.expectError(error.EnvironmentVariableNotFound, getEnvVarOwned(ga, "BADENV")); +test getEnvVarOwned { + try testing.expectError( + error.EnvironmentVariableNotFound, + getEnvVarOwned(std.testing.allocator, "BADENV"), + ); +} + +test hasEnvVarConstant { + if (builtin.os.tag == .wasi and !builtin.link_libc) return error.SkipZigTest; + + try testing.expect(!hasEnvVarConstant("BADENV")); +} + +test hasEnvVar { + const has_env = try hasEnvVar(std.testing.allocator, "BADENV"); + try testing.expect(!has_env); } pub const ArgIteratorPosix = struct { @@ -531,6 +582,7 @@ pub const ArgIteratorWasi = struct { pub const ArgIteratorWindows = struct { allocator: Allocator, /// Owned by the iterator. + /// Encoded as WTF-8. cmd_line: []const u8, index: usize = 0, /// Owned by the iterator. Long enough to hold the entire `cmd_line` plus a null terminator. @@ -538,20 +590,14 @@ pub const ArgIteratorWindows = struct { start: usize = 0, end: usize = 0, - pub const InitError = error{ OutOfMemory, InvalidCmdLine }; + pub const InitError = error{OutOfMemory}; - /// `cmd_line_w` *must* be an UTF16-LE-encoded string. + /// `cmd_line_w` *must* be a WTF16-LE-encoded string. /// - /// The iterator makes a copy of `cmd_line_w` converted UTF-8 and keeps it; it does *not* take + /// The iterator makes a copy of `cmd_line_w` converted WTF-8 and keeps it; it does *not* take /// ownership of `cmd_line_w`. pub fn init(allocator: Allocator, cmd_line_w: [*:0]const u16) InitError!ArgIteratorWindows { - const cmd_line = std.unicode.utf16LeToUtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0)) catch |err| switch (err) { - error.DanglingSurrogateHalf, - error.ExpectedSecondSurrogateHalf, - error.UnexpectedSecondSurrogateHalf, - => return error.InvalidCmdLine, - error.OutOfMemory => return error.OutOfMemory, - }; + const cmd_line = try std.unicode.wtf16LeToWtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0)); errdefer allocator.free(cmd_line); const buffer = try allocator.alloc(u8, cmd_line.len + 1); @@ -566,6 +612,7 @@ pub const ArgIteratorWindows = struct { /// Returns the next argument and advances the iterator. Returns `null` if at the end of the /// command-line string. The iterator owns the returned slice. + /// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). pub fn next(self: *ArgIteratorWindows) ?[:0]const u8 { return self.nextWithStrategy(next_strategy); } @@ -777,7 +824,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type { pub const Self = @This(); pub const InitError = error{OutOfMemory}; - pub const InitUtf16leError = error{ OutOfMemory, InvalidCmdLine }; /// cmd_line_utf8 MUST remain valid and constant while using this instance pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self { @@ -805,30 +851,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type { }; } - /// cmd_line_utf16le MUST be encoded UTF16-LE, and is converted to UTF-8 in an internal buffer - pub fn initUtf16le(allocator: Allocator, cmd_line_utf16le: [*:0]const u16) InitUtf16leError!Self { - const utf16le_slice = mem.sliceTo(cmd_line_utf16le, 0); - const cmd_line = std.unicode.utf16LeToUtf8Alloc(allocator, utf16le_slice) catch |err| switch (err) { - error.ExpectedSecondSurrogateHalf, - error.DanglingSurrogateHalf, - error.UnexpectedSecondSurrogateHalf, - => return error.InvalidCmdLine, - - error.OutOfMemory => return error.OutOfMemory, - }; - errdefer allocator.free(cmd_line); - - const buffer = try allocator.alloc(u8, cmd_line.len + 1); - errdefer allocator.free(buffer); - - return Self{ - .allocator = allocator, - .cmd_line = cmd_line, - .free_cmd_line_on_deinit = true, - .buffer = buffer, - }; - } - // Skips over whitespace in the cmd_line. // Returns false if the terminating sentinel is reached, true otherwise. // Also skips over comments (if supported). @@ -1021,6 +1043,8 @@ pub const ArgIterator = struct { /// Get the next argument. Returns 'null' if we are at the end. /// Returned slice is pointing to the iterator's internal buffer. + /// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). + /// On other platforms, the result is an opaque sequence of bytes with no particular encoding. pub fn next(self: *ArgIterator) ?([:0]const u8) { return self.inner.next(); } @@ -1057,6 +1081,8 @@ pub fn argsWithAllocator(allocator: Allocator) ArgIterator.InitError!ArgIterator } /// Caller must call argsFree on result. +/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/). +/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. pub fn argsAlloc(allocator: Allocator) ![][:0]u8 { // TODO refactor to only make 1 allocation. var it = try argsWithAllocator(allocator); @@ -1201,7 +1227,7 @@ test "ArgIteratorWindows" { } fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void { - const cmd_line_w = try std.unicode.utf8ToUtf16LeAllocZ(testing.allocator, cmd_line); + const cmd_line_w = try std.unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line); defer testing.allocator.free(cmd_line_w); // next diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index 143dbdc15d..224b0b3801 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -488,7 +488,9 @@ pub const Utf16LeIterator = struct { }; } - pub fn nextCodepoint(it: *Utf16LeIterator) !?u21 { + pub const NextCodepointError = error{ DanglingSurrogateHalf, ExpectedSecondSurrogateHalf, UnexpectedSecondSurrogateHalf }; + + pub fn nextCodepoint(it: *Utf16LeIterator) NextCodepointError!?u21 { assert(it.i <= it.bytes.len); if (it.i == it.bytes.len) return null; var code_units: [2]u16 = undefined; @@ -923,7 +925,14 @@ test "fmtUtf8" { try expectFmt("����A", "{}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")}); } -fn utf16LeToUtf8ArrayListImpl(array_list: *std.ArrayList(u8), utf16le: []const u16, comptime surrogates: Surrogates) !void { +fn utf16LeToUtf8ArrayListImpl( + array_list: *std.ArrayList(u8), + utf16le: []const u16, + comptime surrogates: Surrogates, +) (switch (surrogates) { + .cannot_encode_surrogate_half => Utf16LeToUtf8AllocError, + .can_encode_surrogate_half => mem.Allocator.Error, +})!void { // optimistically guess that it will all be ascii. try array_list.ensureTotalCapacityPrecise(utf16le.len); @@ -975,7 +984,9 @@ fn utf16LeToUtf8ArrayListImpl(array_list: *std.ArrayList(u8), utf16le: []const u } } -pub fn utf16LeToUtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) !void { +pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error; + +pub fn utf16LeToUtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void { return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .cannot_encode_surrogate_half); } @@ -983,7 +994,7 @@ pub fn utf16LeToUtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u pub const utf16leToUtf8Alloc = utf16LeToUtf8Alloc; /// Caller must free returned memory. -pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) ![]u8 { +pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![]u8 { // optimistically guess that it will all be ascii. var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len); errdefer result.deinit(); @@ -997,7 +1008,7 @@ pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) ![]u8 pub const utf16leToUtf8AllocZ = utf16LeToUtf8AllocZ; /// Caller must free returned memory. -pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) ![:0]u8 { +pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![:0]u8 { // optimistically guess that it will all be ascii (and allocate space for the null terminator) var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1); errdefer result.deinit(); @@ -1007,9 +1018,14 @@ pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) ![:0] return result.toOwnedSliceSentinel(0); } +pub const Utf16LeToUtf8Error = Utf16LeIterator.NextCodepointError; + /// Asserts that the output buffer is big enough. /// Returns end byte index into utf8. -fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surrogates) !usize { +fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surrogates) (switch (surrogates) { + .cannot_encode_surrogate_half => Utf16LeToUtf8Error, + .can_encode_surrogate_half => error{}, +})!usize { var end_index: usize = 0; var remaining = utf16le; @@ -1043,7 +1059,9 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr // The maximum possible codepoint encoded by UTF-16 is U+10FFFF, // which is within the valid codepoint range. error.CodepointTooLarge => unreachable, - else => |e| return e, + // We know the codepoint was valid in UTF-16, meaning it is not + // an unpaired surrogate codepoint. + error.Utf8CannotEncodeSurrogateHalf => unreachable, }; } }, @@ -1064,7 +1082,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr /// Deprecated; renamed to utf16LeToUtf8 pub const utf16leToUtf8 = utf16LeToUtf8; -pub fn utf16LeToUtf8(utf8: []u8, utf16le: []const u16) !usize { +pub fn utf16LeToUtf8(utf8: []u8, utf16le: []const u16) Utf16LeToUtf8Error!usize { return utf16LeToUtf8Impl(utf8, utf16le, .cannot_encode_surrogate_half); } @@ -1176,11 +1194,11 @@ fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, } } -pub fn utf8ToUtf16LeArrayList(array_list: *std.ArrayList(u16), utf8: []const u8) !void { +pub fn utf8ToUtf16LeArrayList(array_list: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void { return utf8ToUtf16LeArrayListImpl(array_list, utf8, .cannot_encode_surrogate_half); } -pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) ![]u16 { +pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![]u16 { // optimistically guess that it will not require surrogate pairs var result = try std.ArrayList(u16).initCapacity(allocator, utf8.len); errdefer result.deinit(); @@ -1193,7 +1211,7 @@ pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) ![]u16 { /// Deprecated; renamed to utf8ToUtf16LeAllocZ pub const utf8ToUtf16LeWithNull = utf8ToUtf16LeAllocZ; -pub fn utf8ToUtf16LeAllocZ(allocator: mem.Allocator, utf8: []const u8) ![:0]u16 { +pub fn utf8ToUtf16LeAllocZ(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![:0]u16 { // optimistically guess that it will not require surrogate pairs var result = try std.ArrayList(u16).initCapacity(allocator, utf8.len + 1); errdefer result.deinit(); @@ -1205,7 +1223,7 @@ pub fn utf8ToUtf16LeAllocZ(allocator: mem.Allocator, utf8: []const u8) ![:0]u16 /// Returns index of next character. If exact fit, returned index equals output slice length. /// Assumes there is enough space for the output. -pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize { +pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) error{InvalidUtf8}!usize { return utf8ToUtf16LeImpl(utf16le, utf8, .cannot_encode_surrogate_half); } @@ -1236,11 +1254,14 @@ pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: var src_i: usize = 0; while (src_i < remaining.len) { - const n = utf8ByteSequenceLength(remaining[src_i]) catch return error.InvalidUtf8; + const n = utf8ByteSequenceLength(remaining[src_i]) catch return switch (surrogates) { + .cannot_encode_surrogate_half => error.InvalidUtf8, + .can_encode_surrogate_half => error.InvalidWtf8, + }; const next_src_i = src_i + n; const codepoint = switch (surrogates) { .cannot_encode_surrogate_half => utf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidUtf8, - .can_encode_surrogate_half => wtf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidUtf8, + .can_encode_surrogate_half => wtf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidWtf8, }; if (codepoint < 0x10000) { const short = @as(u16, @intCast(codepoint)); @@ -1600,9 +1621,9 @@ fn testValidateWtf8Slice() !void { pub const Wtf8View = struct { bytes: []const u8, - pub fn init(s: []const u8) !Wtf8View { + pub fn init(s: []const u8) error{InvalidWtf8}!Wtf8View { if (!wtf8ValidateSlice(s)) { - return error.InvalidUtf8; + return error.InvalidWtf8; } return initUnchecked(s); @@ -1614,8 +1635,8 @@ pub const Wtf8View = struct { pub inline fn initComptime(comptime s: []const u8) Wtf8View { return comptime if (init(s)) |r| r else |err| switch (err) { - error.InvalidUtf8 => { - @compileError("invalid utf8 detected in wtf8 string"); + error.InvalidWtf8 => { + @compileError("invalid wtf8"); }, }; } @@ -1665,12 +1686,12 @@ pub const Wtf8Iterator = struct { } }; -pub fn wtf16LeToWtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) !void { +pub fn wtf16LeToWtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void { return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .can_encode_surrogate_half); } /// Caller must free returned memory. -pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) ![]u8 { +pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) mem.Allocator.Error![]u8 { // optimistically guess that it will all be ascii. var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len); errdefer result.deinit(); @@ -1681,7 +1702,7 @@ pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) ![]u8 } /// Caller must free returned memory. -pub fn wtf16LeToWtf8AllocZ(allocator: mem.Allocator, wtf16le: []const u16) ![:0]u8 { +pub fn wtf16LeToWtf8AllocZ(allocator: mem.Allocator, wtf16le: []const u16) mem.Allocator.Error![:0]u8 { // optimistically guess that it will all be ascii (and allocate space for the null terminator) var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len + 1); errdefer result.deinit(); @@ -1695,11 +1716,11 @@ pub fn wtf16LeToWtf8(wtf8: []u8, wtf16le: []const u16) usize { return utf16LeToUtf8Impl(wtf8, wtf16le, .can_encode_surrogate_half) catch |err| switch (err) {}; } -pub fn wtf8ToWtf16LeArrayList(array_list: *std.ArrayList(u16), wtf8: []const u8) !void { +pub fn wtf8ToWtf16LeArrayList(array_list: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void { return utf8ToUtf16LeArrayListImpl(array_list, wtf8, .can_encode_surrogate_half); } -pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) ![]u16 { +pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u16 { // optimistically guess that it will not require surrogate pairs var result = try std.ArrayList(u16).initCapacity(allocator, wtf8.len); errdefer result.deinit(); @@ -1709,7 +1730,7 @@ pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) ![]u16 { return result.toOwnedSlice(); } -pub fn wtf8ToWtf16LeAllocZ(allocator: mem.Allocator, wtf8: []const u8) ![:0]u16 { +pub fn wtf8ToWtf16LeAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u16 { // optimistically guess that it will not require surrogate pairs var result = try std.ArrayList(u16).initCapacity(allocator, wtf8.len + 1); errdefer result.deinit(); @@ -1721,7 +1742,7 @@ pub fn wtf8ToWtf16LeAllocZ(allocator: mem.Allocator, wtf8: []const u8) ![:0]u16 /// Returns index of next character. If exact fit, returned index equals output slice length. /// Assumes there is enough space for the output. -pub fn wtf8ToWtf16Le(wtf16le: []u16, wtf8: []const u8) !usize { +pub fn wtf8ToWtf16Le(wtf16le: []u16, wtf8: []const u8) error{InvalidWtf8}!usize { return utf8ToUtf16LeImpl(wtf16le, wtf8, .can_encode_surrogate_half); } @@ -1732,7 +1753,8 @@ pub fn wtf8ToWtf16Le(wtf16le: []u16, wtf8: []const u8) !usize { /// In-place conversion is supported when `utf8` and `wtf8` refer to the same slice. /// Note: If `wtf8` is entirely composed of well-formed UTF-8, then no conversion is necessary. /// `utf8ValidateSlice` can be used to check if lossy conversion is worthwhile. -pub fn wtf8ToUtf8Lossy(utf8: []u8, wtf8: []const u8) !void { +/// If `wtf8` is not valid WTF-8, then `error.InvalidWtf8` is returned. +pub fn wtf8ToUtf8Lossy(utf8: []u8, wtf8: []const u8) error{InvalidWtf8}!void { assert(utf8.len >= wtf8.len); const in_place = utf8.ptr == wtf8.ptr; @@ -1762,7 +1784,7 @@ pub fn wtf8ToUtf8Lossy(utf8: []u8, wtf8: []const u8) !void { } } -pub fn wtf8ToUtf8LossyAlloc(allocator: mem.Allocator, wtf8: []const u8) ![]u8 { +pub fn wtf8ToUtf8LossyAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u8 { const utf8 = try allocator.alloc(u8, wtf8.len); errdefer allocator.free(utf8); @@ -1771,7 +1793,7 @@ pub fn wtf8ToUtf8LossyAlloc(allocator: mem.Allocator, wtf8: []const u8) ![]u8 { return utf8; } -pub fn wtf8ToUtf8LossyAllocZ(allocator: mem.Allocator, wtf8: []const u8) ![:0]u8 { +pub fn wtf8ToUtf8LossyAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u8 { const utf8 = try allocator.allocSentinel(u8, wtf8.len, 0); errdefer allocator.free(utf8); diff --git a/lib/std/zig/system.zig b/lib/std/zig/system.zig index b29cf939d0..d30706e5e2 100644 --- a/lib/std/zig/system.zig +++ b/lib/std/zig/system.zig @@ -639,7 +639,8 @@ pub fn abiAndDynamicLinkerFromFile( var link_buf: [std.os.PATH_MAX]u8 = undefined; const link_name = std.os.readlink(dl_path, &link_buf) catch |err| switch (err) { error.NameTooLong => unreachable, - error.InvalidUtf8 => unreachable, // Windows only + error.InvalidUtf8 => unreachable, // WASI only + error.InvalidWtf8 => unreachable, // Windows only error.BadPathName => unreachable, // Windows only error.UnsupportedReparsePointType => unreachable, // Windows only error.NetworkNotFound => unreachable, // Windows only @@ -730,7 +731,8 @@ test glibcVerFromLinkName { fn glibcVerFromRPath(rpath: []const u8) !std.SemanticVersion { var dir = fs.cwd().openDir(rpath, .{}) catch |err| switch (err) { error.NameTooLong => unreachable, - error.InvalidUtf8 => unreachable, + error.InvalidUtf8 => unreachable, // WASI only + error.InvalidWtf8 => unreachable, // Windows-only error.BadPathName => unreachable, error.DeviceBusy => unreachable, error.NetworkNotFound => unreachable, // Windows-only @@ -761,7 +763,8 @@ fn glibcVerFromRPath(rpath: []const u8) !std.SemanticVersion { const glibc_so_basename = "libc.so.6"; var f = dir.openFile(glibc_so_basename, .{}) catch |err| switch (err) { error.NameTooLong => unreachable, - error.InvalidUtf8 => unreachable, // Windows only + error.InvalidUtf8 => unreachable, // WASI only + error.InvalidWtf8 => unreachable, // Windows only error.BadPathName => unreachable, // Windows only error.PipeBusy => unreachable, // Windows-only error.SharingViolation => unreachable, // Windows-only @@ -998,7 +1001,8 @@ fn detectAbiAndDynamicLinker( error.NameTooLong => unreachable, error.PathAlreadyExists => unreachable, error.SharingViolation => unreachable, - error.InvalidUtf8 => unreachable, + error.InvalidUtf8 => unreachable, // WASI only + error.InvalidWtf8 => unreachable, // Windows only error.BadPathName => unreachable, error.PipeBusy => unreachable, error.FileLocksNotSupported => unreachable, diff --git a/lib/std/zig/system/NativePaths.zig b/lib/std/zig/system/NativePaths.zig index de800f866b..1d3ce10d9b 100644 --- a/lib/std/zig/system/NativePaths.zig +++ b/lib/std/zig/system/NativePaths.zig @@ -41,7 +41,7 @@ pub fn detect(arena: Allocator, native_target: std.Target) !NativePaths { } } } else |err| switch (err) { - error.InvalidUtf8 => {}, + error.InvalidWtf8 => unreachable, error.EnvironmentVariableNotFound => {}, error.OutOfMemory => |e| return e, } @@ -73,7 +73,7 @@ pub fn detect(arena: Allocator, native_target: std.Target) !NativePaths { } } } else |err| switch (err) { - error.InvalidUtf8 => {}, + error.InvalidWtf8 => unreachable, error.EnvironmentVariableNotFound => {}, error.OutOfMemory => |e| return e, } |
