diff options
| author | Ryan Liptak <squeek502@hotmail.com> | 2025-12-18 02:00:34 -0800 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2025-12-23 22:15:10 -0800 |
| commit | a2416c685a83788780fec1c379008a2d795f7bd2 (patch) | |
| tree | 82daf738420120d11f36608336ca91d034894922 /lib/std | |
| parent | d72983da44b7ebe967868ba5236a2c4b05e84510 (diff) | |
| download | zig-a2416c685a83788780fec1c379008a2d795f7bd2.tar.gz zig-a2416c685a83788780fec1c379008a2d795f7bd2.zip | |
Implement dirReadWindows
Diffstat (limited to 'lib/std')
| -rw-r--r-- | lib/std/Io/Dir.zig | 5 | ||||
| -rw-r--r-- | lib/std/Io/Threaded.zig | 145 |
2 files changed, 145 insertions, 5 deletions
diff --git a/lib/std/Io/Dir.zig b/lib/std/Io/Dir.zig index 12fcea6576..970cddc82e 100644 --- a/lib/std/Io/Dir.zig +++ b/lib/std/Io/Dir.zig @@ -101,7 +101,10 @@ pub const Reader = struct { /// Fill position of `buffer`. end: usize, - pub const min_buffer_len = 32; + pub const min_buffer_len = switch (native_os) { + .windows => std.mem.alignForward(usize, max_name_bytes, @alignOf(usize)), + else => 32, // TODO: what is this based on? + }; pub const State = enum { /// Indicates the next call to `read` should rewind and start over the diff --git a/lib/std/Io/Threaded.zig b/lib/std/Io/Threaded.zig index e38c56ac1f..fc66345785 100644 --- a/lib/std/Io/Threaded.zig +++ b/lib/std/Io/Threaded.zig @@ -3674,10 +3674,147 @@ fn dirReadHaiku(userdata: ?*anyopaque, dr: *Dir.Reader, buffer: []Dir.Entry) Dir } fn dirReadWindows(userdata: ?*anyopaque, dr: *Dir.Reader, buffer: []Dir.Entry) Dir.Reader.Error!usize { - _ = userdata; - _ = dr; - _ = buffer; - @panic("TODO"); + const t: *Threaded = @ptrCast(@alignCast(userdata)); + const current_thread = Thread.getCurrent(t); + const w = windows; + + // We want to be able to use the `dr.buffer` for both the NtQueryDirectoryFile call (which + // returns WTF-16 names) *and* as a buffer for storing those WTF-16 names as WTF-8 to be able + // to return them in `Dir.Entry.name`. However, the problem that needs to be overcome in order to do + // that is that each WTF-16 code unit can be encoded as a maximum of 3 WTF-8 bytes, which means + // that it's not guaranteed that the memory used for the WTF-16 name will be sufficient + // for the WTF-8 encoding of the same name (for example, € is encoded as one WTF-16 code unit, + // [2 bytes] but encoded in WTF-8 as 3 bytes). + // + // The approach taken here is to "reserve" enough space in the `dr.buffer` to ensure that + // at least one entry with the maximum possible WTF-8 name length can be stored without clobbering + // any entries that follow it. That is, we determine how much space is needed to allow that, + // and then only provide the remaining portion of `dr.buffer` to the NtQueryDirectoryFile + // call. The WTF-16 names can then be safely converted using the full `dr.buffer` slice, making + // sure that each name can only potentially overwrite the data of its own entry. + // + // The worst case, where an entry's name is both the maximum length of a component and + // made up entirely of code points that are encoded as one WTF-16 code unit/three WTF-8 bytes, + // would therefore look like the diagram below, and only one entry would be able to be returned: + // + // | reserved | remaining unreserved buffer | + // | entry 1 | entry 2 | ... | + // | wtf-8 name of entry 1 | + // + // However, in the average case we will be able to store more than one WTF-8 name at a time in the + // available buffer and therefore we will be able to populate more than one `Dir.Entry` at a time. + // That might look something like this (where name 1, name 2, etc are the converted WTF-8 names): + // + // | reserved | remaining unreserved buffer | + // | entry 1 | entry 2 | ... | + // | name 1 | name 2 | name 3 | name 4 | ... | + // + // Note: More than the minimum amount of space could be reserved to make the "worst case" + // less likely, but since the worst-case also requires a maximum length component to matter, + // it's unlikely for it to become a problem in normal scenarios even if all names on the filesystem + // are made up of non-ASCII characters that have the "one WTF-16 code unit <-> three WTF-8 bytes" + // property (e.g. code points >= U+0800 and <= U+FFFF), as it's unlikely for a significant + // number of components to be maximum length. + + // We need `3 * NAME_MAX` bytes to store a max-length component as WTF-8 safely. + // Because needing to store a max-length component depends on a `FileName` *with* the maximum + // component length, we know that the corresponding populated `FILE_BOTH_DIR_INFORMATION` will + // be of size `@sizeOf(w.FILE_BOTH_DIR_INFORMATION) + 2 * NAME_MAX` bytes, so we only need to + // reserve enough to get us to up to having `3 * NAME_MAX` bytes available when taking into account + // that we have the ability to write over top of the reserved memory + the full footprint of that + // particular `FILE_BOTH_DIR_INFORMATION`. + const reserve_needed = w.NAME_MAX - @sizeOf(w.FILE_BOTH_DIR_INFORMATION); + const unreserved_start = std.mem.alignForward(usize, reserve_needed, @alignOf(usize)); + const unreserved_buffer = dr.buffer[unreserved_start..]; + // This is enforced by `Dir.Reader` + assert(unreserved_buffer.len >= @sizeOf(w.FILE_BOTH_DIR_INFORMATION) + w.NAME_MAX * 2); + + var name_index: usize = 0; + var buffer_index: usize = 0; + while (buffer.len - buffer_index != 0) { + if (dr.end - dr.index == 0) { + // Refill the buffer, unless we've already created references to + // buffered data. + if (buffer_index != 0) break; + + try current_thread.checkCancel(); + var io_status_block: w.IO_STATUS_BLOCK = undefined; + const rc = w.ntdll.NtQueryDirectoryFile( + dr.dir.handle, + null, + null, + null, + &io_status_block, + unreserved_buffer.ptr, + unreserved_buffer.len, + .BothDirectory, + w.FALSE, + null, + @intFromBool(dr.state == .reset), + ); + dr.state = .reading; + if (io_status_block.Information == 0) { + dr.state = .finished; + return 0; + } + dr.index = 0; + dr.end = io_status_block.Information; + switch (rc) { + .SUCCESS => {}, + .ACCESS_DENIED => return error.AccessDenied, // Double-check that the Dir was opened with iteration ability + else => return w.unexpectedStatus(rc), + } + } + + // While the official API docs guarantee FILE_BOTH_DIR_INFORMATION to be aligned properly + // this may not always be the case (e.g. due to faulty VM/sandboxing tools) + const dir_info: *align(2) w.FILE_BOTH_DIR_INFORMATION = @ptrCast(@alignCast(&unreserved_buffer[dr.index])); + const backtrack_index = dr.index; + if (dir_info.NextEntryOffset != 0) { + dr.index += dir_info.NextEntryOffset; + } else { + dr.index = dr.end; + } + + const name_wtf16le = @as([*]u16, @ptrCast(&dir_info.FileName))[0 .. dir_info.FileNameLength / 2]; + + if (std.mem.eql(u16, name_wtf16le, &[_]u16{'.'}) or std.mem.eql(u16, name_wtf16le, &[_]u16{ '.', '.' })) { + continue; + } + + // Read any relevant information from the `dir_info` now since it's possible the WTF-8 + // name will overwrite it. + const kind: File.Kind = blk: { + const attrs = dir_info.FileAttributes; + if (attrs.REPARSE_POINT) break :blk .sym_link; + if (attrs.DIRECTORY) break :blk .directory; + break :blk .file; + }; + const inode: File.INode = dir_info.FileIndex; + + // If there's no more space for WTF-8 names without bleeding over into + // the remaining unprocessed entries, then backtrack and return what we have so far. + if (name_index + std.unicode.calcWtf8Len(name_wtf16le) > unreserved_start + dr.index) { + // We should always be able to fit at least one entry into the buffer no matter what + std.debug.assert(buffer_index != 0); + dr.index = backtrack_index; + break; + } + + const name_buf = dr.buffer[name_index..]; + const name_wtf8_len = std.unicode.wtf16LeToWtf8(name_buf, name_wtf16le); + const name_wtf8 = name_buf[0..name_wtf8_len]; + name_index += name_wtf8_len; + + buffer[buffer_index] = .{ + .name = name_wtf8, + .kind = kind, + .inode = inode, + }; + buffer_index += 1; + } + + return buffer_index; } fn dirReadWasi(userdata: ?*anyopaque, dr: *Dir.Reader, buffer: []Dir.Entry) Dir.Reader.Error!usize { |
