aboutsummaryrefslogtreecommitdiff
path: root/lib/std
diff options
context:
space:
mode:
Diffstat (limited to 'lib/std')
-rw-r--r--lib/std/Build.zig20
-rw-r--r--lib/std/Build/Cache.zig2
-rw-r--r--lib/std/Build/Module.zig12
-rw-r--r--lib/std/Build/Step/Compile.zig25
-rw-r--r--lib/std/Build/Step/Run.zig12
-rw-r--r--lib/std/Random.zig438
-rw-r--r--lib/std/Random/Ascon.zig (renamed from lib/std/rand/Ascon.zig)7
-rw-r--r--lib/std/Random/ChaCha.zig (renamed from lib/std/rand/ChaCha.zig)7
-rw-r--r--lib/std/Random/Isaac64.zig (renamed from lib/std/rand/Isaac64.zig)5
-rw-r--r--lib/std/Random/Pcg.zig (renamed from lib/std/rand/Pcg.zig)7
-rw-r--r--lib/std/Random/RomuTrio.zig (renamed from lib/std/rand/RomuTrio.zig)7
-rw-r--r--lib/std/Random/Sfc64.zig (renamed from lib/std/rand/Sfc64.zig)5
-rw-r--r--lib/std/Random/SplitMix64.zig21
-rw-r--r--lib/std/Random/Xoroshiro128.zig (renamed from lib/std/rand/Xoroshiro128.zig)7
-rw-r--r--lib/std/Random/Xoshiro256.zig (renamed from lib/std/rand/Xoshiro256.zig)8
-rw-r--r--lib/std/Random/benchmark.zig (renamed from lib/std/rand/benchmark.zig)18
-rw-r--r--lib/std/Random/test.zig (renamed from lib/std/rand/test.zig)8
-rw-r--r--lib/std/Random/ziggurat.zig (renamed from lib/std/rand/ziggurat.zig)6
-rw-r--r--lib/std/Thread.zig13
-rw-r--r--lib/std/Thread/RwLock.zig2
-rw-r--r--lib/std/Thread/Semaphore.zig50
-rw-r--r--lib/std/Uri.zig231
-rw-r--r--lib/std/array_list.zig23
-rw-r--r--lib/std/builtin.zig3
-rw-r--r--lib/std/c.zig1427
-rw-r--r--lib/std/c/darwin.zig273
-rw-r--r--lib/std/c/dragonfly.zig71
-rw-r--r--lib/std/c/emscripten.zig29
-rw-r--r--lib/std/c/freebsd.zig88
-rw-r--r--lib/std/c/fuchsia.zig11
-rw-r--r--lib/std/c/haiku.zig83
-rw-r--r--lib/std/c/hermit.zig12
-rw-r--r--lib/std/c/linux.zig64
-rw-r--r--lib/std/c/minix.zig18
-rw-r--r--lib/std/c/netbsd.zig252
-rw-r--r--lib/std/c/openbsd.zig208
-rw-r--r--lib/std/c/solaris.zig94
-rw-r--r--lib/std/c/wasi.zig135
-rw-r--r--lib/std/c/windows.zig33
-rw-r--r--lib/std/child_process.zig81
-rw-r--r--lib/std/compress.zig10
-rw-r--r--lib/std/compress/deflate.zig44
-rw-r--r--lib/std/compress/deflate/bits_utils.zig33
-rw-r--r--lib/std/compress/deflate/compressor.zig1110
-rw-r--r--lib/std/compress/deflate/compressor_test.zig531
-rw-r--r--lib/std/compress/deflate/decompressor.zig1119
-rw-r--r--lib/std/compress/deflate/deflate_const.zig28
-rw-r--r--lib/std/compress/deflate/deflate_fast.zig720
-rw-r--r--lib/std/compress/deflate/deflate_fast_test.zig160
-rw-r--r--lib/std/compress/deflate/dict_decoder.zig423
-rw-r--r--lib/std/compress/deflate/huffman_bit_writer.zig1686
-rw-r--r--lib/std/compress/deflate/huffman_code.zig432
-rw-r--r--lib/std/compress/deflate/testdata/compress-e.txt1
-rw-r--r--lib/std/compress/deflate/testdata/compress-gettysburg.txt29
-rw-r--r--lib/std/compress/deflate/testdata/compress-pi.txt1
-rw-r--r--lib/std/compress/deflate/token.zig103
-rw-r--r--lib/std/compress/flate.zig481
-rw-r--r--lib/std/compress/flate/CircularBuffer.zig234
-rw-r--r--lib/std/compress/flate/Lookup.zig125
-rw-r--r--lib/std/compress/flate/SlidingWindow.zig160
-rw-r--r--lib/std/compress/flate/Token.zig327
-rw-r--r--lib/std/compress/flate/bit_reader.zig333
-rw-r--r--lib/std/compress/flate/bit_writer.zig99
-rw-r--r--lib/std/compress/flate/block_writer.zig706
-rw-r--r--lib/std/compress/flate/consts.zig49
-rw-r--r--lib/std/compress/flate/container.zig207
-rw-r--r--lib/std/compress/flate/deflate.zig744
-rw-r--r--lib/std/compress/flate/huffman_decoder.zig308
-rw-r--r--lib/std/compress/flate/huffman_encoder.zig536
-rw-r--r--lib/std/compress/flate/inflate.zig542
-rw-r--r--lib/std/compress/flate/testdata/block_writer.zig606
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect (renamed from lib/std/compress/deflate/testdata/huffman-null-max.dyn.expect)bin78 -> 78 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-null-max.dyn.expect-noinput)bin78 -> 78 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-null-max.huff.expect (renamed from lib/std/compress/deflate/testdata/huffman-null-max.golden)bin8204 -> 8204 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-null-max.input (renamed from lib/std/compress/deflate/testdata/huffman-null-max.input)bin65535 -> 65535 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect (renamed from lib/std/compress/deflate/testdata/huffman-null-max.wb.expect)bin78 -> 78 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-null-max.wb.expect-noinput)bin78 -> 78 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect (renamed from lib/std/compress/deflate/testdata/huffman-pi.dyn.expect)bin1696 -> 1696 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-pi.dyn.expect-noinput)bin1696 -> 1696 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-pi.huff.expect (renamed from lib/std/compress/deflate/testdata/huffman-pi.golden)bin1606 -> 1606 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-pi.input (renamed from lib/std/compress/deflate/testdata/huffman-pi.input)0
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect (renamed from lib/std/compress/deflate/testdata/huffman-pi.wb.expect)bin1696 -> 1696 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-pi.wb.expect-noinput)bin1696 -> 1696 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect (renamed from lib/std/compress/deflate/testdata/huffman-rand-1k.dyn.expect)bin1005 -> 1005 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-rand-1k.dyn.expect-noinput)bin1054 -> 1054 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.huff.expect (renamed from lib/std/compress/deflate/testdata/huffman-rand-1k.golden)bin1005 -> 1005 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.input (renamed from lib/std/compress/deflate/testdata/huffman-rand-1k.input)bin1000 -> 1000 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect (renamed from lib/std/compress/deflate/testdata/huffman-rand-1k.wb.expect)bin1005 -> 1005 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-rand-1k.wb.expect-noinput)bin1054 -> 1054 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect (renamed from lib/std/compress/deflate/testdata/huffman-rand-limit.dyn.expect)bin229 -> 229 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-rand-limit.dyn.expect-noinput)bin229 -> 229 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.huff.expect (renamed from lib/std/compress/deflate/testdata/huffman-rand-limit.golden)bin252 -> 252 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.input (renamed from lib/std/compress/deflate/testdata/huffman-rand-limit.input)0
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect (renamed from lib/std/compress/deflate/testdata/huffman-rand-limit.wb.expect)bin186 -> 186 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-rand-limit.wb.expect-noinput)bin186 -> 186 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-max.huff.expect (renamed from lib/std/compress/deflate/testdata/huffman-rand-max.golden)bin65540 -> 65540 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-rand-max.input (renamed from lib/std/compress/deflate/testdata/huffman-rand-max.input)bin65535 -> 65535 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect (renamed from lib/std/compress/deflate/testdata/huffman-shifts.dyn.expect)bin32 -> 32 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-shifts.dyn.expect-noinput)bin32 -> 32 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-shifts.huff.expect (renamed from lib/std/compress/deflate/testdata/huffman-shifts.golden)bin1812 -> 1812 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-shifts.input (renamed from lib/std/compress/deflate/testdata/huffman-shifts.input)0
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect (renamed from lib/std/compress/deflate/testdata/huffman-shifts.wb.expect)bin32 -> 32 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-shifts.wb.expect-noinput)bin32 -> 32 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect (renamed from lib/std/compress/deflate/testdata/huffman-text-shift.dyn.expect)bin231 -> 231 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-text-shift.dyn.expect-noinput)bin231 -> 231 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text-shift.huff.expect (renamed from lib/std/compress/deflate/testdata/huffman-text-shift.golden)bin231 -> 231 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text-shift.input (renamed from lib/std/compress/deflate/testdata/huffman-text-shift.input)0
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect (renamed from lib/std/compress/deflate/testdata/huffman-text-shift.wb.expect)bin231 -> 231 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-text-shift.wb.expect-noinput)bin231 -> 231 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect (renamed from lib/std/compress/deflate/testdata/huffman-text.dyn.expect)bin217 -> 217 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-text.dyn.expect-noinput)bin217 -> 217 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text.huff.expect (renamed from lib/std/compress/deflate/testdata/huffman-text.golden)bin219 -> 219 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text.input (renamed from lib/std/compress/deflate/testdata/huffman-text.input)0
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect (renamed from lib/std/compress/deflate/testdata/huffman-text.wb.expect)bin217 -> 217 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-text.wb.expect-noinput)bin217 -> 217 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect (renamed from lib/std/compress/deflate/testdata/huffman-zero.dyn.expect)bin17 -> 17 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-zero.dyn.expect-noinput)bin17 -> 17 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-zero.huff.expect (renamed from lib/std/compress/deflate/testdata/huffman-zero.golden)bin51 -> 51 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-zero.input (renamed from lib/std/compress/deflate/testdata/huffman-zero.input)0
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect (renamed from lib/std/compress/deflate/testdata/huffman-zero.wb.expect)bin6 -> 6 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect-noinput (renamed from lib/std/compress/deflate/testdata/huffman-zero.wb.expect-noinput)bin6 -> 6 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/null-long-match.dyn.expect-noinput (renamed from lib/std/compress/deflate/testdata/null-long-match.dyn.expect-noinput)bin206 -> 206 bytes
-rw-r--r--lib/std/compress/flate/testdata/block_writer/null-long-match.wb.expect-noinput (renamed from lib/std/compress/deflate/testdata/null-long-match.wb.expect-noinput)bin206 -> 206 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/bug_18966.expect17
-rw-r--r--lib/std/compress/flate/testdata/fuzz/bug_18966.inputbin0 -> 340 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/deflate-stream.expect22
-rw-r--r--lib/std/compress/flate/testdata/fuzz/deflate-stream.input3
-rw-r--r--lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet01.inputbin0 -> 12 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet02.inputbin0 -> 13 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/end-of-stream.input1
-rw-r--r--lib/std/compress/flate/testdata/fuzz/fuzz1.inputbin0 -> 60 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/fuzz2.inputbin0 -> 56 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/fuzz3.inputbin0 -> 8 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/fuzz4.inputbin0 -> 20 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/invalid-distance.inputbin0 -> 6 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/invalid-tree01.input1
-rw-r--r--lib/std/compress/flate/testdata/fuzz/invalid-tree02.inputbin0 -> 14 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/invalid-tree03.inputbin0 -> 12 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/lengths-overflow.input1
-rw-r--r--lib/std/compress/flate/testdata/fuzz/out-of-codes.inputbin0 -> 9 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff01.inputbin0 -> 5 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff02.inputbin0 -> 5 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff03.inputbin0 -> 6 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff04.input1
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff05.input1
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff06.input1
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff07.inputbin0 -> 14 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff08.inputbin0 -> 14 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff09.inputbin0 -> 3 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff10.input1
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff11.inputbin0 -> 12 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff12.inputbin0 -> 3 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff13.inputbin0 -> 4 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff14.inputbin0 -> 4 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff15.input1
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff16.inputbin0 -> 6 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff17.inputbin0 -> 6 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff18.inputbin0 -> 52 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff19.inputbin0 -> 65 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff20.inputbin0 -> 12 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff21.inputbin0 -> 64 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff22.inputbin0 -> 142 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff23.inputbin0 -> 404 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff24.inputbin0 -> 68 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff25.inputbin0 -> 64 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff26.inputbin0 -> 346 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/puff27.inputbin0 -> 49 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/roundtrip1.inputbin0 -> 370 bytes
-rw-r--r--lib/std/compress/flate/testdata/fuzz/roundtrip2.inputbin0 -> 371 bytes
-rw-r--r--lib/std/compress/flate/testdata/rfc1951.txt (renamed from lib/std/compress/deflate/testdata/rfc1951.txt)0
-rw-r--r--lib/std/compress/gzip.zig412
-rw-r--r--lib/std/compress/testdata/rfc1951.txt955
-rw-r--r--lib/std/compress/testdata/rfc1951.txt.fixed.z.9bin12836 -> 0 bytes
-rw-r--r--lib/std/compress/testdata/rfc1951.txt.z.0bin36960 -> 0 bytes
-rw-r--r--lib/std/compress/testdata/rfc1951.txt.z.9bin11111 -> 0 bytes
-rw-r--r--lib/std/compress/testdata/rfc1952.txt675
-rw-r--r--lib/std/compress/testdata/rfc1952.txt.gzbin8056 -> 0 bytes
-rw-r--r--lib/std/compress/zlib.zig308
-rw-r--r--lib/std/compress/zstandard.zig217
-rw-r--r--lib/std/compress/zstandard/decompress.zig2
-rw-r--r--lib/std/compress/zstandard/types.zig4
-rw-r--r--lib/std/crypto/aes.zig2
-rw-r--r--lib/std/crypto/aes_ocb.zig4
-rw-r--r--lib/std/crypto/benchmark.zig2
-rw-r--r--lib/std/crypto/blake3.zig2
-rw-r--r--lib/std/crypto/ecdsa.zig1
-rw-r--r--lib/std/crypto/ff.zig3
-rw-r--r--lib/std/crypto/kyber_d00.zig2
-rw-r--r--lib/std/crypto/pcurves/p384.zig1
-rw-r--r--lib/std/crypto/pcurves/secp256k1.zig1
-rw-r--r--lib/std/crypto/salsa20.zig5
-rw-r--r--lib/std/crypto/sha2.zig2
-rw-r--r--lib/std/crypto/tlcsprng.zig4
-rw-r--r--lib/std/debug.zig20
-rw-r--r--lib/std/dwarf.zig1386
-rw-r--r--lib/std/dwarf/TAG.zig3
-rw-r--r--lib/std/dwarf/expressions.zig18
-rw-r--r--lib/std/dynamic_library.zig2
-rw-r--r--lib/std/elf.zig509
-rw-r--r--lib/std/event.zig23
-rw-r--r--lib/std/event/batch.zig141
-rw-r--r--lib/std/event/channel.zig334
-rw-r--r--lib/std/event/future.zig115
-rw-r--r--lib/std/event/group.zig160
-rw-r--r--lib/std/event/lock.zig162
-rw-r--r--lib/std/event/locked.zig42
-rw-r--r--lib/std/event/loop.zig1791
-rw-r--r--lib/std/event/rwlock.zig292
-rw-r--r--lib/std/event/rwlocked.zig57
-rw-r--r--lib/std/event/wait_group.zig115
-rw-r--r--lib/std/fmt.zig2
-rw-r--r--lib/std/fmt/parse_float.zig7
-rw-r--r--lib/std/fmt/parse_float/decimal.zig2
-rw-r--r--lib/std/fmt/parse_float/parse_float.zig32
-rw-r--r--lib/std/fs.zig199
-rw-r--r--lib/std/fs/Dir.zig720
-rw-r--r--lib/std/fs/File.zig249
-rw-r--r--lib/std/fs/path.zig43
-rw-r--r--lib/std/fs/test.zig146
-rw-r--r--lib/std/fs/watch.zig719
-rw-r--r--lib/std/hash/benchmark.zig2
-rw-r--r--lib/std/hash_map.zig32
-rw-r--r--lib/std/heap.zig11
-rw-r--r--lib/std/heap/arena_allocator.zig2
-rw-r--r--lib/std/http.zig28
-rw-r--r--lib/std/http/ChunkParser.zig131
-rw-r--r--lib/std/http/Client.zig1115
-rw-r--r--lib/std/http/HeadParser.zig371
-rw-r--r--lib/std/http/HeaderIterator.zig68
-rw-r--r--lib/std/http/Headers.zig527
-rw-r--r--lib/std/http/Server.zig1679
-rw-r--r--lib/std/http/protocol.zig632
-rw-r--r--lib/std/http/test.zig1202
-rw-r--r--lib/std/io.zig72
-rw-r--r--lib/std/io/Reader.zig12
-rw-r--r--lib/std/io/buffered_tee.zig379
-rw-r--r--lib/std/io/fixed_buffer_stream.zig8
-rw-r--r--lib/std/io/test.zig2
-rw-r--r--lib/std/json/stringify.zig2
-rw-r--r--lib/std/log.zig14
-rw-r--r--lib/std/math/big/int_test.zig5
-rw-r--r--lib/std/math/big/rational.zig2
-rw-r--r--lib/std/math/log10.zig84
-rw-r--r--lib/std/math/nextafter.zig2
-rw-r--r--lib/std/mem.zig27
-rw-r--r--lib/std/meta.zig12
-rw-r--r--lib/std/net.zig323
-rw-r--r--lib/std/net/test.zig74
-rw-r--r--lib/std/os.zig837
-rw-r--r--lib/std/os/emscripten.zig212
-rw-r--r--lib/std/os/linux.zig744
-rw-r--r--lib/std/os/linux/arm-eabi.zig23
-rw-r--r--lib/std/os/linux/arm64.zig23
-rw-r--r--lib/std/os/linux/io_uring.zig144
-rw-r--r--lib/std/os/linux/mips.zig23
-rw-r--r--lib/std/os/linux/mips64.zig23
-rw-r--r--lib/std/os/linux/powerpc.zig23
-rw-r--r--lib/std/os/linux/powerpc64.zig23
-rw-r--r--lib/std/os/linux/riscv64.zig23
-rw-r--r--lib/std/os/linux/sparc64.zig23
-rw-r--r--lib/std/os/linux/test.zig4
-rw-r--r--lib/std/os/linux/x86.zig23
-rw-r--r--lib/std/os/linux/x86_64.zig23
-rw-r--r--lib/std/os/plan9.zig28
-rw-r--r--lib/std/os/test.zig90
-rw-r--r--lib/std/os/wasi.zig389
-rw-r--r--lib/std/os/windows.zig377
-rw-r--r--lib/std/os/windows/test.zig4
-rw-r--r--lib/std/pdb.zig2
-rw-r--r--lib/std/priority_dequeue.zig14
-rw-r--r--lib/std/process.zig156
-rw-r--r--lib/std/rand.zig460
-rw-r--r--lib/std/sort.zig2
-rw-r--r--lib/std/start.zig90
-rw-r--r--lib/std/std.zig112
-rw-r--r--lib/std/tar.zig192
-rw-r--r--lib/std/tar/test.zig123
-rw-r--r--lib/std/tar/testdata/18089.tarbin0 -> 10240 bytes
-rw-r--r--lib/std/tar/testdata/fuzz1.tarbin0 -> 2052 bytes
-rw-r--r--lib/std/tar/testdata/fuzz2.tarbin0 -> 2140 bytes
-rw-r--r--lib/std/tar/testdata/overwrite_file.tarbin0 -> 10240 bytes
-rw-r--r--lib/std/tar/testdata/pipe_to_file_system_test.tarbin0 -> 10240 bytes
-rw-r--r--lib/std/time.zig132
-rw-r--r--lib/std/treap.zig8
-rw-r--r--lib/std/unicode.zig1179
-rw-r--r--lib/std/zig/Server.zig9
-rw-r--r--lib/std/zig/c_translation.zig8
-rw-r--r--lib/std/zig/parser_test.zig11
-rw-r--r--lib/std/zig/render.zig3
-rw-r--r--lib/std/zig/system.zig15
-rw-r--r--lib/std/zig/system/NativePaths.zig4
-rw-r--r--lib/std/zig/system/linux.zig52
-rw-r--r--lib/std/zig/system/windows.zig2
293 files changed, 17174 insertions, 20921 deletions
diff --git a/lib/std/Build.zig b/lib/std/Build.zig
index d962a82d89..3892b9ca73 100644
--- a/lib/std/Build.zig
+++ b/lib/std/Build.zig
@@ -855,7 +855,9 @@ pub const TestOptions = struct {
optimize: std.builtin.OptimizeMode = .Debug,
version: ?std.SemanticVersion = null,
max_rss: usize = 0,
+ /// deprecated: use `.filters = &.{filter}` instead of `.filter = filter`.
filter: ?[]const u8 = null,
+ filters: []const []const u8 = &.{},
test_runner: ?[]const u8 = null,
link_libc: ?bool = null,
single_threaded: ?bool = null,
@@ -888,7 +890,12 @@ pub fn addTest(b: *Build, options: TestOptions) *Step.Compile {
.error_tracing = options.error_tracing,
},
.max_rss = options.max_rss,
- .filter = options.filter,
+ .filters = if (options.filter != null and options.filters.len > 0) filters: {
+ const filters = b.allocator.alloc([]const u8, 1 + options.filters.len) catch @panic("OOM");
+ filters[0] = b.dupe(options.filter.?);
+ for (filters[1..], options.filters) |*dest, source| dest.* = b.dupe(source);
+ break :filters filters;
+ } else b.dupeStrings(if (options.filter) |filter| &.{filter} else options.filters),
.test_runner = options.test_runner,
.use_llvm = options.use_llvm,
.use_lld = options.use_lld,
@@ -993,9 +1000,7 @@ pub fn dupe(self: *Build, bytes: []const u8) []u8 {
/// Duplicates an array of strings without the need to handle out of memory.
pub fn dupeStrings(self: *Build, strings: []const []const u8) [][]u8 {
const array = self.allocator.alloc([]u8, strings.len) catch @panic("OOM");
- for (strings, 0..) |s, i| {
- array[i] = self.dupe(s);
- }
+ for (array, strings) |*dest, source| dest.* = self.dupe(source);
return array;
}
@@ -1284,11 +1289,16 @@ pub fn standardTargetOptions(b: *Build, args: StandardTargetOptionsArgs) Resolve
return b.resolveTargetQuery(query);
}
+/// Obtain a target query from a string, reporting diagnostics to stderr if the
+/// parsing failed.
+/// Asserts that the `diagnostics` field of `options` is `null`. This use case
+/// is handled instead by calling `std.Target.Query.parse` directly.
pub fn parseTargetQuery(options: std.Target.Query.ParseOptions) error{ParseFailed}!std.Target.Query {
+ assert(options.diagnostics == null);
var diags: Target.Query.ParseOptions.Diagnostics = .{};
var opts_copy = options;
opts_copy.diagnostics = &diags;
- return std.Target.Query.parse(options) catch |err| switch (err) {
+ return std.Target.Query.parse(opts_copy) catch |err| switch (err) {
error.UnknownCpuModel => {
std.debug.print("unknown CPU: '{s}'\navailable CPUs for architecture '{s}':\n", .{
diags.cpu_name.?, @tagName(diags.arch.?),
diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig
index 119fcc7be3..0bfaf283db 100644
--- a/lib/std/Build/Cache.zig
+++ b/lib/std/Build/Cache.zig
@@ -162,7 +162,7 @@ fn findPrefixResolved(cache: *const Cache, resolved_path: []u8) !PrefixedPath {
fn getPrefixSubpath(allocator: Allocator, prefix: []const u8, path: []u8) ![]u8 {
const relative = try std.fs.path.relative(allocator, prefix, path);
errdefer allocator.free(relative);
- var component_iterator = std.fs.path.NativeUtf8ComponentIterator.init(relative) catch {
+ var component_iterator = std.fs.path.NativeComponentIterator.init(relative) catch {
return error.NotASubPath;
};
if (component_iterator.root() != null) {
diff --git a/lib/std/Build/Module.zig b/lib/std/Build/Module.zig
index b481f21916..c6d908158c 100644
--- a/lib/std/Build/Module.zig
+++ b/lib/std/Build/Module.zig
@@ -79,9 +79,9 @@ pub const SystemLib = struct {
};
pub const CSourceFiles = struct {
- dependency: ?*std.Build.Dependency,
- /// If `dependency` is not null relative to it,
- /// else relative to the build root.
+ root: LazyPath,
+ /// `files` is relative to `root`, which is
+ /// the build root by default
files: []const []const u8,
flags: []const []const u8,
};
@@ -453,9 +453,9 @@ pub fn linkFramework(m: *Module, name: []const u8, options: LinkFrameworkOptions
}
pub const AddCSourceFilesOptions = struct {
- /// When provided, `files` are relative to `dependency` rather than the
+ /// When provided, `files` are relative to `root` rather than the
/// package that owns the `Compile` step.
- dependency: ?*std.Build.Dependency = null,
+ root: LazyPath = .{ .path = "" },
files: []const []const u8,
flags: []const []const u8 = &.{},
};
@@ -466,7 +466,7 @@ pub fn addCSourceFiles(m: *Module, options: AddCSourceFilesOptions) void {
const allocator = b.allocator;
const c_source_files = allocator.create(CSourceFiles) catch @panic("OOM");
c_source_files.* = .{
- .dependency = options.dependency,
+ .root = options.root,
.files = b.dupeStrings(options.files),
.flags = b.dupeStrings(options.flags),
};
diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig
index 0c438069f8..5ee92ffc22 100644
--- a/lib/std/Build/Step/Compile.zig
+++ b/lib/std/Build/Step/Compile.zig
@@ -54,8 +54,7 @@ global_base: ?u64 = null,
/// Set via options; intended to be read-only after that.
zig_lib_dir: ?LazyPath,
exec_cmd_args: ?[]const ?[]const u8,
-filter: ?[]const u8,
-test_evented_io: bool = false,
+filters: []const []const u8,
test_runner: ?[]const u8,
test_server_mode: bool,
wasi_exec_model: ?std.builtin.WasiExecModel = null,
@@ -224,7 +223,7 @@ pub const Options = struct {
linkage: ?Linkage = null,
version: ?std.SemanticVersion = null,
max_rss: usize = 0,
- filter: ?[]const u8 = null,
+ filters: []const []const u8 = &.{},
test_runner: ?[]const u8 = null,
use_llvm: ?bool = null,
use_lld: ?bool = null,
@@ -311,7 +310,7 @@ pub fn create(owner: *std.Build, options: Options) *Compile {
.installed_headers = ArrayList(*Step).init(owner.allocator),
.zig_lib_dir = null,
.exec_cmd_args = null,
- .filter = options.filter,
+ .filters = options.filters,
.test_runner = options.test_runner,
.test_server_mode = options.test_runner == null,
.rdynamic = false,
@@ -1198,15 +1197,11 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
prev_has_cflags = true;
}
- if (c_source_files.dependency) |dep| {
- for (c_source_files.files) |file| {
- try zig_args.append(dep.builder.pathFromRoot(file));
- }
- } else {
- for (c_source_files.files) |file| {
- try zig_args.append(b.pathFromRoot(file));
- }
+ const root_path = c_source_files.root.getPath2(module.owner, step);
+ for (c_source_files.files) |file| {
+ try zig_args.append(b.pathJoin(&.{ root_path, file }));
}
+
total_linker_objects += c_source_files.files.len;
},
@@ -1302,15 +1297,11 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
try zig_args.append(b.fmt("0x{x}", .{image_base}));
}
- if (self.filter) |filter| {
+ for (self.filters) |filter| {
try zig_args.append("--test-filter");
try zig_args.append(filter);
}
- if (self.test_evented_io) {
- try zig_args.append("--test-evented-io");
- }
-
if (self.test_runner) |test_runner| {
try zig_args.append("--test-runner");
try zig_args.append(b.pathFromRoot(test_runner));
diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig
index 3df3d9ee53..6364ec0ecf 100644
--- a/lib/std/Build/Step/Run.zig
+++ b/lib/std/Build/Step/Run.zig
@@ -1147,19 +1147,14 @@ fn evalZigTest(
test_count = tm_hdr.tests_len;
const names_bytes = body[@sizeOf(TmHdr)..][0 .. test_count * @sizeOf(u32)];
- const async_frame_lens_bytes = body[@sizeOf(TmHdr) + names_bytes.len ..][0 .. test_count * @sizeOf(u32)];
- const expected_panic_msgs_bytes = body[@sizeOf(TmHdr) + names_bytes.len + async_frame_lens_bytes.len ..][0 .. test_count * @sizeOf(u32)];
- const string_bytes = body[@sizeOf(TmHdr) + names_bytes.len + async_frame_lens_bytes.len + expected_panic_msgs_bytes.len ..][0..tm_hdr.string_bytes_len];
+ const expected_panic_msgs_bytes = body[@sizeOf(TmHdr) + names_bytes.len ..][0 .. test_count * @sizeOf(u32)];
+ const string_bytes = body[@sizeOf(TmHdr) + names_bytes.len + expected_panic_msgs_bytes.len ..][0..tm_hdr.string_bytes_len];
const names = std.mem.bytesAsSlice(u32, names_bytes);
- const async_frame_lens = std.mem.bytesAsSlice(u32, async_frame_lens_bytes);
const expected_panic_msgs = std.mem.bytesAsSlice(u32, expected_panic_msgs_bytes);
const names_aligned = try arena.alloc(u32, names.len);
for (names_aligned, names) |*dest, src| dest.* = src;
- const async_frame_lens_aligned = try arena.alloc(u32, async_frame_lens.len);
- for (async_frame_lens_aligned, async_frame_lens) |*dest, src| dest.* = src;
-
const expected_panic_msgs_aligned = try arena.alloc(u32, expected_panic_msgs.len);
for (expected_panic_msgs_aligned, expected_panic_msgs) |*dest, src| dest.* = src;
@@ -1167,7 +1162,6 @@ fn evalZigTest(
metadata = .{
.string_bytes = try arena.dupe(u8, string_bytes),
.names = names_aligned,
- .async_frame_lens = async_frame_lens_aligned,
.expected_panic_msgs = expected_panic_msgs_aligned,
.next_index = 0,
.prog_node = prog_node,
@@ -1237,7 +1231,6 @@ fn evalZigTest(
const TestMetadata = struct {
names: []const u32,
- async_frame_lens: []const u32,
expected_panic_msgs: []const u32,
string_bytes: []const u8,
next_index: u32,
@@ -1253,7 +1246,6 @@ fn requestNextTest(in: fs.File, metadata: *TestMetadata, sub_prog_node: *?std.Pr
const i = metadata.next_index;
metadata.next_index += 1;
- if (metadata.async_frame_lens[i] != 0) continue;
if (metadata.expected_panic_msgs[i] != 0) continue;
const name = metadata.testName(i);
diff --git a/lib/std/Random.zig b/lib/std/Random.zig
new file mode 100644
index 0000000000..681f70bc98
--- /dev/null
+++ b/lib/std/Random.zig
@@ -0,0 +1,438 @@
+//! The engines provided here should be initialized from an external source.
+//! For a thread-local cryptographically secure pseudo random number generator,
+//! use `std.crypto.random`.
+//! Be sure to use a CSPRNG when required, otherwise using a normal PRNG will
+//! be faster and use substantially less stack space.
+
+const std = @import("std.zig");
+const math = std.math;
+const mem = std.mem;
+const assert = std.debug.assert;
+const maxInt = std.math.maxInt;
+pub const Random = @This(); // Remove pub when `std.rand` namespace is removed.
+
+/// Fast unbiased random numbers.
+pub const DefaultPrng = Xoshiro256;
+
+/// Cryptographically secure random numbers.
+pub const DefaultCsprng = ChaCha;
+
+pub const Ascon = @import("Random/Ascon.zig");
+pub const ChaCha = @import("Random/ChaCha.zig");
+
+pub const Isaac64 = @import("Random/Isaac64.zig");
+pub const Pcg = @import("Random/Pcg.zig");
+pub const Xoroshiro128 = @import("Random/Xoroshiro128.zig");
+pub const Xoshiro256 = @import("Random/Xoshiro256.zig");
+pub const Sfc64 = @import("Random/Sfc64.zig");
+pub const RomuTrio = @import("Random/RomuTrio.zig");
+pub const SplitMix64 = @import("Random/SplitMix64.zig");
+pub const ziggurat = @import("Random/ziggurat.zig");
+
+ptr: *anyopaque,
+fillFn: *const fn (ptr: *anyopaque, buf: []u8) void,
+
+pub fn init(pointer: anytype, comptime fillFn: fn (ptr: @TypeOf(pointer), buf: []u8) void) Random {
+ const Ptr = @TypeOf(pointer);
+ assert(@typeInfo(Ptr) == .Pointer); // Must be a pointer
+ assert(@typeInfo(Ptr).Pointer.size == .One); // Must be a single-item pointer
+ assert(@typeInfo(@typeInfo(Ptr).Pointer.child) == .Struct); // Must point to a struct
+ const gen = struct {
+ fn fill(ptr: *anyopaque, buf: []u8) void {
+ const self: Ptr = @ptrCast(@alignCast(ptr));
+ fillFn(self, buf);
+ }
+ };
+
+ return .{
+ .ptr = pointer,
+ .fillFn = gen.fill,
+ };
+}
+
+/// Read random bytes into the specified buffer until full.
+pub fn bytes(r: Random, buf: []u8) void {
+ r.fillFn(r.ptr, buf);
+}
+
+pub fn boolean(r: Random) bool {
+ return r.int(u1) != 0;
+}
+
+/// Returns a random value from an enum, evenly distributed.
+///
+/// Note that this will not yield consistent results across all targets
+/// due to dependence on the representation of `usize` as an index.
+/// See `enumValueWithIndex` for further commentary.
+pub inline fn enumValue(r: Random, comptime EnumType: type) EnumType {
+ return r.enumValueWithIndex(EnumType, usize);
+}
+
+/// Returns a random value from an enum, evenly distributed.
+///
+/// An index into an array of all named values is generated using the
+/// specified `Index` type to determine the return value.
+/// This allows for results to be independent of `usize` representation.
+///
+/// Prefer `enumValue` if this isn't important.
+///
+/// See `uintLessThan`, which this function uses in most cases,
+/// for commentary on the runtime of this function.
+pub fn enumValueWithIndex(r: Random, comptime EnumType: type, comptime Index: type) EnumType {
+ comptime assert(@typeInfo(EnumType) == .Enum);
+
+ // We won't use int -> enum casting because enum elements can have
+ // arbitrary values. Instead we'll randomly pick one of the type's values.
+ const values = comptime std.enums.values(EnumType);
+ comptime assert(values.len > 0); // can't return anything
+ comptime assert(maxInt(Index) >= values.len - 1); // can't access all values
+ comptime if (values.len == 1) return values[0];
+
+ const index = if (comptime values.len - 1 == maxInt(Index))
+ r.int(Index)
+ else
+ r.uintLessThan(Index, values.len);
+
+ const MinInt = MinArrayIndex(Index);
+ return values[@as(MinInt, @intCast(index))];
+}
+
+/// Returns a random int `i` such that `minInt(T) <= i <= maxInt(T)`.
+/// `i` is evenly distributed.
+pub fn int(r: Random, comptime T: type) T {
+ const bits = @typeInfo(T).Int.bits;
+ const UnsignedT = std.meta.Int(.unsigned, bits);
+ const ceil_bytes = comptime std.math.divCeil(u16, bits, 8) catch unreachable;
+ const ByteAlignedT = std.meta.Int(.unsigned, ceil_bytes * 8);
+
+ var rand_bytes: [ceil_bytes]u8 = undefined;
+ r.bytes(&rand_bytes);
+
+ // use LE instead of native endian for better portability maybe?
+ // TODO: endian portability is pointless if the underlying prng isn't endian portable.
+ // TODO: document the endian portability of this library.
+ const byte_aligned_result = mem.readInt(ByteAlignedT, &rand_bytes, .little);
+ const unsigned_result: UnsignedT = @truncate(byte_aligned_result);
+ return @bitCast(unsigned_result);
+}
+
+/// Constant-time implementation off `uintLessThan`.
+/// The results of this function may be biased.
+pub fn uintLessThanBiased(r: Random, comptime T: type, less_than: T) T {
+ comptime assert(@typeInfo(T).Int.signedness == .unsigned);
+ assert(0 < less_than);
+ return limitRangeBiased(T, r.int(T), less_than);
+}
+
+/// Returns an evenly distributed random unsigned integer `0 <= i < less_than`.
+/// This function assumes that the underlying `fillFn` produces evenly distributed values.
+/// Within this assumption, the runtime of this function is exponentially distributed.
+/// If `fillFn` were backed by a true random generator,
+/// the runtime of this function would technically be unbounded.
+/// However, if `fillFn` is backed by any evenly distributed pseudo random number generator,
+/// this function is guaranteed to return.
+/// If you need deterministic runtime bounds, use `uintLessThanBiased`.
+pub fn uintLessThan(r: Random, comptime T: type, less_than: T) T {
+ comptime assert(@typeInfo(T).Int.signedness == .unsigned);
+ const bits = @typeInfo(T).Int.bits;
+ assert(0 < less_than);
+
+ // adapted from:
+ // http://www.pcg-random.org/posts/bounded-rands.html
+ // "Lemire's (with an extra tweak from me)"
+ var x = r.int(T);
+ var m = math.mulWide(T, x, less_than);
+ var l: T = @truncate(m);
+ if (l < less_than) {
+ var t = -%less_than;
+
+ if (t >= less_than) {
+ t -= less_than;
+ if (t >= less_than) {
+ t %= less_than;
+ }
+ }
+ while (l < t) {
+ x = r.int(T);
+ m = math.mulWide(T, x, less_than);
+ l = @truncate(m);
+ }
+ }
+ return @intCast(m >> bits);
+}
+
+/// Constant-time implementation off `uintAtMost`.
+/// The results of this function may be biased.
+pub fn uintAtMostBiased(r: Random, comptime T: type, at_most: T) T {
+ assert(@typeInfo(T).Int.signedness == .unsigned);
+ if (at_most == maxInt(T)) {
+ // have the full range
+ return r.int(T);
+ }
+ return r.uintLessThanBiased(T, at_most + 1);
+}
+
+/// Returns an evenly distributed random unsigned integer `0 <= i <= at_most`.
+/// See `uintLessThan`, which this function uses in most cases,
+/// for commentary on the runtime of this function.
+pub fn uintAtMost(r: Random, comptime T: type, at_most: T) T {
+ assert(@typeInfo(T).Int.signedness == .unsigned);
+ if (at_most == maxInt(T)) {
+ // have the full range
+ return r.int(T);
+ }
+ return r.uintLessThan(T, at_most + 1);
+}
+
+/// Constant-time implementation off `intRangeLessThan`.
+/// The results of this function may be biased.
+pub fn intRangeLessThanBiased(r: Random, comptime T: type, at_least: T, less_than: T) T {
+ assert(at_least < less_than);
+ const info = @typeInfo(T).Int;
+ if (info.signedness == .signed) {
+ // Two's complement makes this math pretty easy.
+ const UnsignedT = std.meta.Int(.unsigned, info.bits);
+ const lo: UnsignedT = @bitCast(at_least);
+ const hi: UnsignedT = @bitCast(less_than);
+ const result = lo +% r.uintLessThanBiased(UnsignedT, hi -% lo);
+ return @bitCast(result);
+ } else {
+ // The signed implementation would work fine, but we can use stricter arithmetic operators here.
+ return at_least + r.uintLessThanBiased(T, less_than - at_least);
+ }
+}
+
+/// Returns an evenly distributed random integer `at_least <= i < less_than`.
+/// See `uintLessThan`, which this function uses in most cases,
+/// for commentary on the runtime of this function.
+pub fn intRangeLessThan(r: Random, comptime T: type, at_least: T, less_than: T) T {
+ assert(at_least < less_than);
+ const info = @typeInfo(T).Int;
+ if (info.signedness == .signed) {
+ // Two's complement makes this math pretty easy.
+ const UnsignedT = std.meta.Int(.unsigned, info.bits);
+ const lo: UnsignedT = @bitCast(at_least);
+ const hi: UnsignedT = @bitCast(less_than);
+ const result = lo +% r.uintLessThan(UnsignedT, hi -% lo);
+ return @bitCast(result);
+ } else {
+ // The signed implementation would work fine, but we can use stricter arithmetic operators here.
+ return at_least + r.uintLessThan(T, less_than - at_least);
+ }
+}
+
+/// Constant-time implementation off `intRangeAtMostBiased`.
+/// The results of this function may be biased.
+pub fn intRangeAtMostBiased(r: Random, comptime T: type, at_least: T, at_most: T) T {
+ assert(at_least <= at_most);
+ const info = @typeInfo(T).Int;
+ if (info.signedness == .signed) {
+ // Two's complement makes this math pretty easy.
+ const UnsignedT = std.meta.Int(.unsigned, info.bits);
+ const lo: UnsignedT = @bitCast(at_least);
+ const hi: UnsignedT = @bitCast(at_most);
+ const result = lo +% r.uintAtMostBiased(UnsignedT, hi -% lo);
+ return @bitCast(result);
+ } else {
+ // The signed implementation would work fine, but we can use stricter arithmetic operators here.
+ return at_least + r.uintAtMostBiased(T, at_most - at_least);
+ }
+}
+
+/// Returns an evenly distributed random integer `at_least <= i <= at_most`.
+/// See `uintLessThan`, which this function uses in most cases,
+/// for commentary on the runtime of this function.
+pub fn intRangeAtMost(r: Random, comptime T: type, at_least: T, at_most: T) T {
+ assert(at_least <= at_most);
+ const info = @typeInfo(T).Int;
+ if (info.signedness == .signed) {
+ // Two's complement makes this math pretty easy.
+ const UnsignedT = std.meta.Int(.unsigned, info.bits);
+ const lo: UnsignedT = @bitCast(at_least);
+ const hi: UnsignedT = @bitCast(at_most);
+ const result = lo +% r.uintAtMost(UnsignedT, hi -% lo);
+ return @bitCast(result);
+ } else {
+ // The signed implementation would work fine, but we can use stricter arithmetic operators here.
+ return at_least + r.uintAtMost(T, at_most - at_least);
+ }
+}
+
+/// Return a floating point value evenly distributed in the range [0, 1).
+pub fn float(r: Random, comptime T: type) T {
+ // Generate a uniformly random value for the mantissa.
+ // Then generate an exponentially biased random value for the exponent.
+ // This covers every possible value in the range.
+ switch (T) {
+ f32 => {
+ // Use 23 random bits for the mantissa, and the rest for the exponent.
+ // If all 41 bits are zero, generate additional random bits, until a
+ // set bit is found, or 126 bits have been generated.
+ const rand = r.int(u64);
+ var rand_lz = @clz(rand);
+ if (rand_lz >= 41) {
+ // TODO: when #5177 or #489 is implemented,
+ // tell the compiler it is unlikely (1/2^41) to reach this point.
+ // (Same for the if branch and the f64 calculations below.)
+ rand_lz = 41 + @clz(r.int(u64));
+ if (rand_lz == 41 + 64) {
+ // It is astronomically unlikely to reach this point.
+ rand_lz += @clz(r.int(u32) | 0x7FF);
+ }
+ }
+ const mantissa: u23 = @truncate(rand);
+ const exponent = @as(u32, 126 - rand_lz) << 23;
+ return @bitCast(exponent | mantissa);
+ },
+ f64 => {
+ // Use 52 random bits for the mantissa, and the rest for the exponent.
+ // If all 12 bits are zero, generate additional random bits, until a
+ // set bit is found, or 1022 bits have been generated.
+ const rand = r.int(u64);
+ var rand_lz: u64 = @clz(rand);
+ if (rand_lz >= 12) {
+ rand_lz = 12;
+ while (true) {
+ // It is astronomically unlikely for this loop to execute more than once.
+ const addl_rand_lz = @clz(r.int(u64));
+ rand_lz += addl_rand_lz;
+ if (addl_rand_lz != 64) {
+ break;
+ }
+ if (rand_lz >= 1022) {
+ rand_lz = 1022;
+ break;
+ }
+ }
+ }
+ const mantissa = rand & 0xFFFFFFFFFFFFF;
+ const exponent = (1022 - rand_lz) << 52;
+ return @bitCast(exponent | mantissa);
+ },
+ else => @compileError("unknown floating point type"),
+ }
+}
+
+/// Return a floating point value normally distributed with mean = 0, stddev = 1.
+///
+/// To use different parameters, use: floatNorm(...) * desiredStddev + desiredMean.
+pub fn floatNorm(r: Random, comptime T: type) T {
+ const value = ziggurat.next_f64(r, ziggurat.NormDist);
+ switch (T) {
+ f32 => return @floatCast(value),
+ f64 => return value,
+ else => @compileError("unknown floating point type"),
+ }
+}
+
+/// Return an exponentially distributed float with a rate parameter of 1.
+///
+/// To use a different rate parameter, use: floatExp(...) / desiredRate.
+pub fn floatExp(r: Random, comptime T: type) T {
+ const value = ziggurat.next_f64(r, ziggurat.ExpDist);
+ switch (T) {
+ f32 => return @floatCast(value),
+ f64 => return value,
+ else => @compileError("unknown floating point type"),
+ }
+}
+
+/// Shuffle a slice into a random order.
+///
+/// Note that this will not yield consistent results across all targets
+/// due to dependence on the representation of `usize` as an index.
+/// See `shuffleWithIndex` for further commentary.
+pub inline fn shuffle(r: Random, comptime T: type, buf: []T) void {
+ r.shuffleWithIndex(T, buf, usize);
+}
+
+/// Shuffle a slice into a random order, using an index of a
+/// specified type to maintain distribution across targets.
+/// Asserts the index type can represent `buf.len`.
+///
+/// Indexes into the slice are generated using the specified `Index`
+/// type, which determines distribution properties. This allows for
+/// results to be independent of `usize` representation.
+///
+/// Prefer `shuffle` if this isn't important.
+///
+/// See `intRangeLessThan`, which this function uses,
+/// for commentary on the runtime of this function.
+pub fn shuffleWithIndex(r: Random, comptime T: type, buf: []T, comptime Index: type) void {
+ const MinInt = MinArrayIndex(Index);
+ if (buf.len < 2) {
+ return;
+ }
+
+ // `i <= j < max <= maxInt(MinInt)`
+ const max: MinInt = @intCast(buf.len);
+ var i: MinInt = 0;
+ while (i < max - 1) : (i += 1) {
+ const j: MinInt = @intCast(r.intRangeLessThan(Index, i, max));
+ mem.swap(T, &buf[i], &buf[j]);
+ }
+}
+
+/// Randomly selects an index into `proportions`, where the likelihood of each
+/// index is weighted by that proportion.
+/// It is more likely for the index of the last proportion to be returned
+/// than the index of the first proportion in the slice, and vice versa.
+///
+/// This is useful for selecting an item from a slice where weights are not equal.
+/// `T` must be a numeric type capable of holding the sum of `proportions`.
+pub fn weightedIndex(r: Random, comptime T: type, proportions: []const T) usize {
+ // This implementation works by summing the proportions and picking a
+ // random point in [0, sum). We then loop over the proportions,
+ // accumulating until our accumulator is greater than the random point.
+
+ const sum = s: {
+ var sum: T = 0;
+ for (proportions) |v| sum += v;
+ break :s sum;
+ };
+
+ const point = switch (@typeInfo(T)) {
+ .Int => |int_info| switch (int_info.signedness) {
+ .signed => r.intRangeLessThan(T, 0, sum),
+ .unsigned => r.uintLessThan(T, sum),
+ },
+ // take care that imprecision doesn't lead to a value slightly greater than sum
+ .Float => @min(r.float(T) * sum, sum - std.math.floatEps(T)),
+ else => @compileError("weightedIndex does not support proportions of type " ++
+ @typeName(T)),
+ };
+
+ assert(point < sum);
+
+ var accumulator: T = 0;
+ for (proportions, 0..) |p, index| {
+ accumulator += p;
+ if (point < accumulator) return index;
+ } else unreachable;
+}
+
+/// Convert a random integer 0 <= random_int <= maxValue(T),
+/// into an integer 0 <= result < less_than.
+/// This function introduces a minor bias.
+pub fn limitRangeBiased(comptime T: type, random_int: T, less_than: T) T {
+ comptime assert(@typeInfo(T).Int.signedness == .unsigned);
+ const bits = @typeInfo(T).Int.bits;
+
+ // adapted from:
+ // http://www.pcg-random.org/posts/bounded-rands.html
+ // "Integer Multiplication (Biased)"
+ const m = math.mulWide(T, random_int, less_than);
+ return @intCast(m >> bits);
+}
+
+/// Returns the smallest of `Index` and `usize`.
+fn MinArrayIndex(comptime Index: type) type {
+ const index_info = @typeInfo(Index).Int;
+ assert(index_info.signedness == .unsigned);
+ return if (index_info.bits >= @typeInfo(usize).Int.bits) usize else Index;
+}
+
+test {
+ std.testing.refAllDecls(@This());
+ _ = @import("Random/test.zig");
+}
diff --git a/lib/std/rand/Ascon.zig b/lib/std/Random/Ascon.zig
index 6a3cb13165..6464fbe2d1 100644
--- a/lib/std/rand/Ascon.zig
+++ b/lib/std/Random/Ascon.zig
@@ -10,7 +10,6 @@
const std = @import("std");
const mem = std.mem;
-const Random = std.rand.Random;
const Self = @This();
const Ascon = std.crypto.core.Ascon(.little);
@@ -39,9 +38,9 @@ pub fn addEntropy(self: *Self, bytes: []const u8) void {
self.state.permute();
}
-/// Returns a `std.rand.Random` structure backed by the current RNG.
-pub fn random(self: *Self) Random {
- return Random.init(self, fill);
+/// Returns a `std.Random` structure backed by the current RNG.
+pub fn random(self: *Self) std.Random {
+ return std.Random.init(self, fill);
}
/// Fills the buffer with random bytes.
diff --git a/lib/std/rand/ChaCha.zig b/lib/std/Random/ChaCha.zig
index 75f62c9a47..5783ee4152 100644
--- a/lib/std/rand/ChaCha.zig
+++ b/lib/std/Random/ChaCha.zig
@@ -5,7 +5,6 @@
const std = @import("std");
const mem = std.mem;
-const Random = std.rand.Random;
const Self = @This();
const Cipher = std.crypto.stream.chacha.ChaCha8IETF;
@@ -53,9 +52,9 @@ pub fn addEntropy(self: *Self, bytes: []const u8) void {
self.refill();
}
-/// Returns a `std.rand.Random` structure backed by the current RNG.
-pub fn random(self: *Self) Random {
- return Random.init(self, fill);
+/// Returns a `std.Random` structure backed by the current RNG.
+pub fn random(self: *Self) std.Random {
+ return std.Random.init(self, fill);
}
// Refills the buffer with random bytes, overwriting the previous key.
diff --git a/lib/std/rand/Isaac64.zig b/lib/std/Random/Isaac64.zig
index 8684ba8e22..a3dceabd5c 100644
--- a/lib/std/rand/Isaac64.zig
+++ b/lib/std/Random/Isaac64.zig
@@ -4,7 +4,6 @@
//! https://doc.rust-lang.org/rand/src/rand/prng/isaac64.rs.html
const std = @import("std");
-const Random = std.rand.Random;
const mem = std.mem;
const Isaac64 = @This();
@@ -30,8 +29,8 @@ pub fn init(init_s: u64) Isaac64 {
return isaac;
}
-pub fn random(self: *Isaac64) Random {
- return Random.init(self, fill);
+pub fn random(self: *Isaac64) std.Random {
+ return std.Random.init(self, fill);
}
fn step(self: *Isaac64, mix: u64, base: usize, comptime m1: usize, comptime m2: usize) void {
diff --git a/lib/std/rand/Pcg.zig b/lib/std/Random/Pcg.zig
index d7d233659f..1ff65f0f44 100644
--- a/lib/std/rand/Pcg.zig
+++ b/lib/std/Random/Pcg.zig
@@ -3,7 +3,6 @@
//! PRNG
const std = @import("std");
-const Random = std.rand.Random;
const Pcg = @This();
const default_multiplier = 6364136223846793005;
@@ -21,8 +20,8 @@ pub fn init(init_s: u64) Pcg {
return pcg;
}
-pub fn random(self: *Pcg) Random {
- return Random.init(self, fill);
+pub fn random(self: *Pcg) std.Random {
+ return std.Random.init(self, fill);
}
fn next(self: *Pcg) u32 {
@@ -37,7 +36,7 @@ fn next(self: *Pcg) u32 {
fn seed(self: *Pcg, init_s: u64) void {
// Pcg requires 128-bits of seed.
- var gen = std.rand.SplitMix64.init(init_s);
+ var gen = std.Random.SplitMix64.init(init_s);
self.seedTwo(gen.next(), gen.next());
}
diff --git a/lib/std/rand/RomuTrio.zig b/lib/std/Random/RomuTrio.zig
index 4caf5f0ce8..9cc8cf13e1 100644
--- a/lib/std/rand/RomuTrio.zig
+++ b/lib/std/Random/RomuTrio.zig
@@ -3,7 +3,6 @@
// Beware: this PRNG is trivially predictable. While fast, it should *never* be used for cryptographic purposes.
const std = @import("std");
-const Random = std.rand.Random;
const math = std.math;
const RomuTrio = @This();
@@ -17,8 +16,8 @@ pub fn init(init_s: u64) RomuTrio {
return x;
}
-pub fn random(self: *RomuTrio) Random {
- return Random.init(self, fill);
+pub fn random(self: *RomuTrio) std.Random {
+ return std.Random.init(self, fill);
}
fn next(self: *RomuTrio) u64 {
@@ -42,7 +41,7 @@ pub fn seedWithBuf(self: *RomuTrio, buf: [24]u8) void {
pub fn seed(self: *RomuTrio, init_s: u64) void {
// RomuTrio requires 192-bits of seed.
- var gen = std.rand.SplitMix64.init(init_s);
+ var gen = std.Random.SplitMix64.init(init_s);
self.x_state = gen.next();
self.y_state = gen.next();
diff --git a/lib/std/rand/Sfc64.zig b/lib/std/Random/Sfc64.zig
index b4a8988e4d..004167952e 100644
--- a/lib/std/rand/Sfc64.zig
+++ b/lib/std/Random/Sfc64.zig
@@ -3,7 +3,6 @@
//! See http://pracrand.sourceforge.net/
const std = @import("std");
-const Random = std.rand.Random;
const math = std.math;
const Sfc64 = @This();
@@ -23,8 +22,8 @@ pub fn init(init_s: u64) Sfc64 {
return x;
}
-pub fn random(self: *Sfc64) Random {
- return Random.init(self, fill);
+pub fn random(self: *Sfc64) std.Random {
+ return std.Random.init(self, fill);
}
fn next(self: *Sfc64) u64 {
diff --git a/lib/std/Random/SplitMix64.zig b/lib/std/Random/SplitMix64.zig
new file mode 100644
index 0000000000..9f085162e3
--- /dev/null
+++ b/lib/std/Random/SplitMix64.zig
@@ -0,0 +1,21 @@
+//! Generator to extend 64-bit seed values into longer sequences.
+//!
+//! The number of cycles is thus limited to 64-bits regardless of the engine, but this
+//! is still plenty for practical purposes.
+
+const SplitMix64 = @This();
+
+s: u64,
+
+pub fn init(seed: u64) SplitMix64 {
+ return SplitMix64{ .s = seed };
+}
+
+pub fn next(self: *SplitMix64) u64 {
+ self.s +%= 0x9e3779b97f4a7c15;
+
+ var z = self.s;
+ z = (z ^ (z >> 30)) *% 0xbf58476d1ce4e5b9;
+ z = (z ^ (z >> 27)) *% 0x94d049bb133111eb;
+ return z ^ (z >> 31);
+}
diff --git a/lib/std/rand/Xoroshiro128.zig b/lib/std/Random/Xoroshiro128.zig
index 0272419ba2..6966a4fc5a 100644
--- a/lib/std/rand/Xoroshiro128.zig
+++ b/lib/std/Random/Xoroshiro128.zig
@@ -3,7 +3,6 @@
//! PRNG
const std = @import("std");
-const Random = std.rand.Random;
const math = std.math;
const Xoroshiro128 = @This();
@@ -16,8 +15,8 @@ pub fn init(init_s: u64) Xoroshiro128 {
return x;
}
-pub fn random(self: *Xoroshiro128) Random {
- return Random.init(self, fill);
+pub fn random(self: *Xoroshiro128) std.Random {
+ return std.Random.init(self, fill);
}
pub fn next(self: *Xoroshiro128) u64 {
@@ -59,7 +58,7 @@ pub fn jump(self: *Xoroshiro128) void {
pub fn seed(self: *Xoroshiro128, init_s: u64) void {
// Xoroshiro requires 128-bits of seed.
- var gen = std.rand.SplitMix64.init(init_s);
+ var gen = std.Random.SplitMix64.init(init_s);
self.s[0] = gen.next();
self.s[1] = gen.next();
diff --git a/lib/std/rand/Xoshiro256.zig b/lib/std/Random/Xoshiro256.zig
index 85ae669ecd..75e860b89f 100644
--- a/lib/std/rand/Xoshiro256.zig
+++ b/lib/std/Random/Xoshiro256.zig
@@ -3,7 +3,6 @@
//! PRNG
const std = @import("std");
-const Random = std.rand.Random;
const math = std.math;
const Xoshiro256 = @This();
@@ -18,8 +17,8 @@ pub fn init(init_s: u64) Xoshiro256 {
return x;
}
-pub fn random(self: *Xoshiro256) Random {
- return Random.init(self, fill);
+pub fn random(self: *Xoshiro256) std.Random {
+ return std.Random.init(self, fill);
}
pub fn next(self: *Xoshiro256) u64 {
@@ -57,7 +56,7 @@ pub fn jump(self: *Xoshiro256) void {
pub fn seed(self: *Xoshiro256, init_s: u64) void {
// Xoshiro requires 256-bits of seed.
- var gen = std.rand.SplitMix64.init(init_s);
+ var gen = std.Random.SplitMix64.init(init_s);
self.s[0] = gen.next();
self.s[1] = gen.next();
@@ -91,7 +90,6 @@ pub fn fill(self: *Xoshiro256, buf: []u8) void {
test "xoroshiro sequence" {
if (@import("builtin").zig_backend == .stage2_c) return error.SkipZigTest;
- if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest;
var r = Xoshiro256.init(0);
diff --git a/lib/std/rand/benchmark.zig b/lib/std/Random/benchmark.zig
index 530556517c..f3ea468181 100644
--- a/lib/std/rand/benchmark.zig
+++ b/lib/std/Random/benchmark.zig
@@ -4,7 +4,7 @@ const std = @import("std");
const builtin = @import("builtin");
const time = std.time;
const Timer = time.Timer;
-const rand = std.rand;
+const Random = std.Random;
const KiB = 1024;
const MiB = 1024 * KiB;
@@ -19,32 +19,32 @@ const Rng = struct {
const prngs = [_]Rng{
Rng{
- .ty = rand.Isaac64,
+ .ty = Random.Isaac64,
.name = "isaac64",
.init_u64 = 0,
},
Rng{
- .ty = rand.Pcg,
+ .ty = Random.Pcg,
.name = "pcg",
.init_u64 = 0,
},
Rng{
- .ty = rand.RomuTrio,
+ .ty = Random.RomuTrio,
.name = "romutrio",
.init_u64 = 0,
},
Rng{
- .ty = std.rand.Sfc64,
+ .ty = Random.Sfc64,
.name = "sfc64",
.init_u64 = 0,
},
Rng{
- .ty = std.rand.Xoroshiro128,
+ .ty = Random.Xoroshiro128,
.name = "xoroshiro128",
.init_u64 = 0,
},
Rng{
- .ty = std.rand.Xoshiro256,
+ .ty = Random.Xoshiro256,
.name = "xoshiro256",
.init_u64 = 0,
},
@@ -52,12 +52,12 @@ const prngs = [_]Rng{
const csprngs = [_]Rng{
Rng{
- .ty = rand.Ascon,
+ .ty = Random.Ascon,
.name = "ascon",
.init_u8s = &[_]u8{0} ** 32,
},
Rng{
- .ty = rand.ChaCha,
+ .ty = Random.ChaCha,
.name = "chacha",
.init_u8s = &[_]u8{0} ** 32,
},
diff --git a/lib/std/rand/test.zig b/lib/std/Random/test.zig
index d498985097..8ceacbc934 100644
--- a/lib/std/rand/test.zig
+++ b/lib/std/Random/test.zig
@@ -1,9 +1,9 @@
const std = @import("../std.zig");
const math = std.math;
-const DefaultPrng = std.rand.DefaultPrng;
-const Random = std.rand.Random;
-const SplitMix64 = std.rand.SplitMix64;
-const DefaultCsprng = std.rand.DefaultCsprng;
+const Random = std.Random;
+const DefaultPrng = Random.DefaultPrng;
+const SplitMix64 = Random.SplitMix64;
+const DefaultCsprng = Random.DefaultCsprng;
const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual;
diff --git a/lib/std/rand/ziggurat.zig b/lib/std/Random/ziggurat.zig
index 87045e4077..2bed6a065b 100644
--- a/lib/std/rand/ziggurat.zig
+++ b/lib/std/Random/ziggurat.zig
@@ -10,7 +10,7 @@
const std = @import("../std.zig");
const builtin = @import("builtin");
const math = std.math;
-const Random = std.rand.Random;
+const Random = std.Random;
pub fn next_f64(random: Random, comptime tables: ZigTable) f64 {
while (true) {
@@ -127,7 +127,7 @@ pub fn norm_zero_case(random: Random, u: f64) f64 {
}
test "normal dist sanity" {
- var prng = std.rand.DefaultPrng.init(0);
+ var prng = Random.DefaultPrng.init(0);
const random = prng.random();
var i: usize = 0;
@@ -156,7 +156,7 @@ pub fn exp_zero_case(random: Random, _: f64) f64 {
}
test "exp dist smoke test" {
- var prng = std.rand.DefaultPrng.init(0);
+ var prng = Random.DefaultPrng.init(0);
const random = prng.random();
var i: usize = 0;
diff --git a/lib/std/Thread.zig b/lib/std/Thread.zig
index c3f628da79..ae794f44af 100644
--- a/lib/std/Thread.zig
+++ b/lib/std/Thread.zig
@@ -91,7 +91,7 @@ pub fn setName(self: Thread, name: []const u8) SetNameError!void {
},
.windows => {
var buf: [max_name_len]u16 = undefined;
- const len = try std.unicode.utf8ToUtf16Le(&buf, name);
+ const len = try std.unicode.wtf8ToWtf16Le(&buf, name);
const byte_len = math.cast(c_ushort, len * 2) orelse return error.NameTooLong;
// Note: NT allocates its own copy, no use-after-free here.
@@ -157,17 +157,12 @@ pub fn setName(self: Thread, name: []const u8) SetNameError!void {
}
pub const GetNameError = error{
- // For Windows, the name is converted from UTF16 to UTF8
- CodepointTooLarge,
- Utf8CannotEncodeSurrogateHalf,
- DanglingSurrogateHalf,
- ExpectedSecondSurrogateHalf,
- UnexpectedSecondSurrogateHalf,
-
Unsupported,
Unexpected,
} || os.PrctlError || os.ReadError || std.fs.File.OpenError || std.fmt.BufPrintError;
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn getName(self: Thread, buffer_ptr: *[max_name_len:0]u8) GetNameError!?[]const u8 {
buffer_ptr[max_name_len] = 0;
var buffer: [:0]u8 = buffer_ptr;
@@ -213,7 +208,7 @@ pub fn getName(self: Thread, buffer_ptr: *[max_name_len:0]u8) GetNameError!?[]co
)) {
.SUCCESS => {
const string = @as(*const os.windows.UNICODE_STRING, @ptrCast(&buf));
- const len = try std.unicode.utf16leToUtf8(buffer, string.Buffer[0 .. string.Length / 2]);
+ const len = std.unicode.wtf16LeToWtf8(buffer, string.Buffer[0 .. string.Length / 2]);
return if (len > 0) buffer[0..len] else null;
},
.NOT_IMPLEMENTED => return error.Unsupported,
diff --git a/lib/std/Thread/RwLock.zig b/lib/std/Thread/RwLock.zig
index a05d68df88..e0923c40ad 100644
--- a/lib/std/Thread/RwLock.zig
+++ b/lib/std/Thread/RwLock.zig
@@ -328,7 +328,7 @@ test "RwLock - concurrent access" {
}
fn writer(self: *Self, thread_idx: usize) !void {
- var prng = std.rand.DefaultPrng.init(thread_idx);
+ var prng = std.Random.DefaultPrng.init(thread_idx);
var rnd = prng.random();
while (true) {
diff --git a/lib/std/Thread/Semaphore.zig b/lib/std/Thread/Semaphore.zig
index 1b182d4c2a..3253d17a8e 100644
--- a/lib/std/Thread/Semaphore.zig
+++ b/lib/std/Thread/Semaphore.zig
@@ -1,6 +1,23 @@
//! A semaphore is an unsigned integer that blocks the kernel thread if
//! the number would become negative.
//! This API supports static initialization and does not require deinitialization.
+//!
+//! Example:
+//! ```
+//! var s = Semaphore{};
+//!
+//! fn consumer() void {
+//! s.wait();
+//! }
+//!
+//! fn producer() void {
+//! s.post();
+//! }
+//!
+//! const thread = try std.Thread.spawn(.{}, producer, .{});
+//! consumer();
+//! thread.join();
+//! ```
mutex: Mutex = .{},
cond: Condition = .{},
@@ -26,6 +43,26 @@ pub fn wait(sem: *Semaphore) void {
sem.cond.signal();
}
+pub fn timedWait(sem: *Semaphore, timeout_ns: u64) error{Timeout}!void {
+ var timeout_timer = std.time.Timer.start() catch unreachable;
+
+ sem.mutex.lock();
+ defer sem.mutex.unlock();
+
+ while (sem.permits == 0) {
+ const elapsed = timeout_timer.read();
+ if (elapsed > timeout_ns)
+ return error.Timeout;
+
+ const local_timeout_ns = timeout_ns - elapsed;
+ try sem.cond.timedWait(&sem.mutex, local_timeout_ns);
+ }
+
+ sem.permits -= 1;
+ if (sem.permits > 0)
+ sem.cond.signal();
+}
+
pub fn post(sem: *Semaphore) void {
sem.mutex.lock();
defer sem.mutex.unlock();
@@ -59,3 +96,16 @@ test "Thread.Semaphore" {
sem.wait();
try testing.expect(n == num_threads);
}
+
+test "Thread.Semaphore - timedWait" {
+ var sem = Semaphore{};
+ try testing.expectEqual(0, sem.permits);
+
+ try testing.expectError(error.Timeout, sem.timedWait(1));
+
+ sem.post();
+ try testing.expectEqual(1, sem.permits);
+
+ try sem.timedWait(1);
+ try testing.expectEqual(0, sem.permits);
+}
diff --git a/lib/std/Uri.zig b/lib/std/Uri.zig
index 8b455c6c71..0903c129c4 100644
--- a/lib/std/Uri.zig
+++ b/lib/std/Uri.zig
@@ -4,6 +4,7 @@
const Uri = @This();
const std = @import("std.zig");
const testing = std.testing;
+const Allocator = std.mem.Allocator;
scheme: []const u8,
user: ?[]const u8 = null,
@@ -15,15 +16,15 @@ query: ?[]const u8 = null,
fragment: ?[]const u8 = null,
/// Applies URI encoding and replaces all reserved characters with their respective %XX code.
-pub fn escapeString(allocator: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]u8 {
+pub fn escapeString(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 {
return escapeStringWithFn(allocator, input, isUnreserved);
}
-pub fn escapePath(allocator: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]u8 {
+pub fn escapePath(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 {
return escapeStringWithFn(allocator, input, isPathChar);
}
-pub fn escapeQuery(allocator: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]u8 {
+pub fn escapeQuery(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 {
return escapeStringWithFn(allocator, input, isQueryChar);
}
@@ -39,7 +40,7 @@ pub fn writeEscapedQuery(writer: anytype, input: []const u8) !void {
return writeEscapedStringWithFn(writer, input, isQueryChar);
}
-pub fn escapeStringWithFn(allocator: std.mem.Allocator, input: []const u8, comptime keepUnescaped: fn (c: u8) bool) std.mem.Allocator.Error![]u8 {
+pub fn escapeStringWithFn(allocator: Allocator, input: []const u8, comptime keepUnescaped: fn (c: u8) bool) Allocator.Error![]u8 {
var outsize: usize = 0;
for (input) |c| {
outsize += if (keepUnescaped(c)) @as(usize, 1) else 3;
@@ -76,7 +77,7 @@ pub fn writeEscapedStringWithFn(writer: anytype, input: []const u8, comptime kee
/// Parses a URI string and unescapes all %XX where XX is a valid hex number. Otherwise, verbatim copies
/// them to the output.
-pub fn unescapeString(allocator: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]u8 {
+pub fn unescapeString(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 {
var outsize: usize = 0;
var inptr: usize = 0;
while (inptr < input.len) {
@@ -341,7 +342,7 @@ pub fn format(
/// The return value will contain unescaped strings pointing into the
/// original `text`. Each component that is provided, will be non-`null`.
pub fn parse(text: []const u8) ParseError!Uri {
- var reader = SliceReader{ .slice = text };
+ var reader: SliceReader = .{ .slice = text };
const scheme = reader.readWhile(isSchemeChar);
// after the scheme, a ':' must appear
@@ -358,111 +359,145 @@ pub fn parse(text: []const u8) ParseError!Uri {
return uri;
}
-/// Implementation of RFC 3986, Section 5.2.4. Removes dot segments from a URI path.
-///
-/// `std.fs.path.resolvePosix` is not sufficient here because it may return relative paths and does not preserve trailing slashes.
-fn removeDotSegments(allocator: std.mem.Allocator, paths: []const []const u8) std.mem.Allocator.Error![]const u8 {
- var result = std.ArrayList(u8).init(allocator);
- defer result.deinit();
-
- for (paths) |p| {
- var it = std.mem.tokenizeScalar(u8, p, '/');
- while (it.next()) |component| {
- if (std.mem.eql(u8, component, ".")) {
- continue;
- } else if (std.mem.eql(u8, component, "..")) {
- if (result.items.len == 0)
- continue;
+pub const ResolveInplaceError = ParseError || error{OutOfMemory};
- while (true) {
- const ends_with_slash = result.items[result.items.len - 1] == '/';
- result.items.len -= 1;
- if (ends_with_slash or result.items.len == 0) break;
- }
- } else {
- try result.ensureUnusedCapacity(1 + component.len);
- result.appendAssumeCapacity('/');
- result.appendSliceAssumeCapacity(component);
- }
- }
- }
+/// Resolves a URI against a base URI, conforming to RFC 3986, Section 5.
+/// Copies `new` to the beginning of `aux_buf`, allowing the slices to overlap,
+/// then parses `new` as a URI, and then resolves the path in place.
+/// If a merge needs to take place, the newly constructed path will be stored
+/// in `aux_buf` just after the copied `new`.
+pub fn resolve_inplace(base: Uri, new: []const u8, aux_buf: []u8) ResolveInplaceError!Uri {
+ std.mem.copyBackwards(u8, aux_buf, new);
+ // At this point, new is an invalid pointer.
+ const new_mut = aux_buf[0..new.len];
+
+ const new_parsed, const has_scheme = p: {
+ break :p .{
+ parse(new_mut) catch |first_err| {
+ break :p .{
+ parseWithoutScheme(new_mut) catch return first_err,
+ false,
+ };
+ },
+ true,
+ };
+ };
- // ensure a trailing slash is kept
- const last_path = paths[paths.len - 1];
- if (last_path.len > 0 and last_path[last_path.len - 1] == '/') {
- try result.append('/');
- }
+ // As you can see above, `new_mut` is not a const pointer.
+ const new_path: []u8 = @constCast(new_parsed.path);
+
+ if (has_scheme) return .{
+ .scheme = new_parsed.scheme,
+ .user = new_parsed.user,
+ .host = new_parsed.host,
+ .port = new_parsed.port,
+ .path = remove_dot_segments(new_path),
+ .query = new_parsed.query,
+ .fragment = new_parsed.fragment,
+ };
- return result.toOwnedSlice();
-}
+ if (new_parsed.host) |host| return .{
+ .scheme = base.scheme,
+ .user = new_parsed.user,
+ .host = host,
+ .port = new_parsed.port,
+ .path = remove_dot_segments(new_path),
+ .query = new_parsed.query,
+ .fragment = new_parsed.fragment,
+ };
-/// Resolves a URI against a base URI, conforming to RFC 3986, Section 5.
-///
-/// Assumes `arena` owns all memory in `base` and `ref`. `arena` will own all memory in the returned URI.
-pub fn resolve(base: Uri, ref: Uri, strict: bool, arena: std.mem.Allocator) std.mem.Allocator.Error!Uri {
- var target: Uri = Uri{
- .scheme = "",
- .user = null,
- .password = null,
- .host = null,
- .port = null,
- .path = "",
- .query = null,
- .fragment = null,
+ const path, const query = b: {
+ if (new_path.len == 0)
+ break :b .{
+ base.path,
+ new_parsed.query orelse base.query,
+ };
+
+ if (new_path[0] == '/')
+ break :b .{
+ remove_dot_segments(new_path),
+ new_parsed.query,
+ };
+
+ break :b .{
+ try merge_paths(base.path, new_path, aux_buf[new_mut.len..]),
+ new_parsed.query,
+ };
};
- if (ref.scheme.len > 0 and (strict or !std.mem.eql(u8, ref.scheme, base.scheme))) {
- target.scheme = ref.scheme;
- target.user = ref.user;
- target.host = ref.host;
- target.port = ref.port;
- target.path = try removeDotSegments(arena, &.{ref.path});
- target.query = ref.query;
- } else {
- target.scheme = base.scheme;
- if (ref.host) |host| {
- target.user = ref.user;
- target.host = host;
- target.port = ref.port;
- target.path = ref.path;
- target.path = try removeDotSegments(arena, &.{ref.path});
- target.query = ref.query;
+ return .{
+ .scheme = base.scheme,
+ .user = base.user,
+ .host = base.host,
+ .port = base.port,
+ .path = path,
+ .query = query,
+ .fragment = new_parsed.fragment,
+ };
+}
+
+/// In-place implementation of RFC 3986, Section 5.2.4.
+fn remove_dot_segments(path: []u8) []u8 {
+ var in_i: usize = 0;
+ var out_i: usize = 0;
+ while (in_i < path.len) {
+ if (std.mem.startsWith(u8, path[in_i..], "./")) {
+ in_i += 2;
+ } else if (std.mem.startsWith(u8, path[in_i..], "../")) {
+ in_i += 3;
+ } else if (std.mem.startsWith(u8, path[in_i..], "/./")) {
+ in_i += 2;
+ } else if (std.mem.eql(u8, path[in_i..], "/.")) {
+ in_i += 1;
+ path[in_i] = '/';
+ } else if (std.mem.startsWith(u8, path[in_i..], "/../")) {
+ in_i += 3;
+ while (out_i > 0) {
+ out_i -= 1;
+ if (path[out_i] == '/') break;
+ }
+ } else if (std.mem.eql(u8, path[in_i..], "/..")) {
+ in_i += 2;
+ path[in_i] = '/';
+ while (out_i > 0) {
+ out_i -= 1;
+ if (path[out_i] == '/') break;
+ }
+ } else if (std.mem.eql(u8, path[in_i..], ".")) {
+ in_i += 1;
+ } else if (std.mem.eql(u8, path[in_i..], "..")) {
+ in_i += 2;
} else {
- if (ref.path.len == 0) {
- target.path = base.path;
- target.query = ref.query orelse base.query;
- } else {
- if (ref.path[0] == '/') {
- target.path = try removeDotSegments(arena, &.{ref.path});
- } else {
- target.path = try removeDotSegments(arena, &.{ std.fs.path.dirnamePosix(base.path) orelse "", ref.path });
- }
- target.query = ref.query;
+ while (true) {
+ path[out_i] = path[in_i];
+ out_i += 1;
+ in_i += 1;
+ if (in_i >= path.len or path[in_i] == '/') break;
}
-
- target.user = base.user;
- target.host = base.host;
- target.port = base.port;
}
}
-
- target.fragment = ref.fragment;
-
- return target;
+ return path[0..out_i];
}
-test resolve {
- const base = try parse("http://a/b/c/d;p?q");
-
- var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena.deinit();
+test remove_dot_segments {
+ {
+ var buffer = "/a/b/c/./../../g".*;
+ try std.testing.expectEqualStrings("/a/g", remove_dot_segments(&buffer));
+ }
+}
- try std.testing.expectEqualDeep(try parse("http://a/b/c/blog/"), try base.resolve(try parseWithoutScheme("blog/"), true, arena.allocator()));
- try std.testing.expectEqualDeep(try parse("http://a/b/c/blog/?k"), try base.resolve(try parseWithoutScheme("blog/?k"), true, arena.allocator()));
- try std.testing.expectEqualDeep(try parse("http://a/b/blog/"), try base.resolve(try parseWithoutScheme("../blog/"), true, arena.allocator()));
- try std.testing.expectEqualDeep(try parse("http://a/b/blog"), try base.resolve(try parseWithoutScheme("../blog"), true, arena.allocator()));
- try std.testing.expectEqualDeep(try parse("http://e"), try base.resolve(try parseWithoutScheme("//e"), true, arena.allocator()));
- try std.testing.expectEqualDeep(try parse("https://a:1/"), try base.resolve(try parse("https://a:1/"), true, arena.allocator()));
+/// 5.2.3. Merge Paths
+fn merge_paths(base: []const u8, new: []u8, aux: []u8) error{OutOfMemory}![]u8 {
+ if (aux.len < base.len + 1 + new.len) return error.OutOfMemory;
+ if (base.len == 0) {
+ aux[0] = '/';
+ @memcpy(aux[1..][0..new.len], new);
+ return remove_dot_segments(aux[0 .. new.len + 1]);
+ }
+ const pos = std.mem.lastIndexOfScalar(u8, base, '/') orelse return remove_dot_segments(new);
+ @memcpy(aux[0 .. pos + 1], base[0 .. pos + 1]);
+ @memcpy(aux[pos + 1 ..][0..new.len], new);
+ return remove_dot_segments(aux[0 .. pos + 1 + new.len]);
}
const SliceReader = struct {
diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig
index 79ed5c192c..1926f627f3 100644
--- a/lib/std/array_list.zig
+++ b/lib/std/array_list.zig
@@ -937,14 +937,33 @@ pub fn ArrayListAlignedUnmanaged(comptime T: type, comptime alignment: ?u29) typ
return .{ .context = .{ .self = self, .allocator = allocator } };
}
- /// Same as `append` except it returns the number of bytes written, which is always the same
- /// as `m.len`. The purpose of this function existing is to match `std.io.Writer` API.
+ /// Same as `append` except it returns the number of bytes written,
+ /// which is always the same as `m.len`. The purpose of this function
+ /// existing is to match `std.io.Writer` API.
/// Invalidates element pointers if additional memory is needed.
fn appendWrite(context: WriterContext, m: []const u8) Allocator.Error!usize {
try context.self.appendSlice(context.allocator, m);
return m.len;
}
+ pub const FixedWriter = std.io.Writer(*Self, Allocator.Error, appendWriteFixed);
+
+ /// Initializes a Writer which will append to the list but will return
+ /// `error.OutOfMemory` rather than increasing capacity.
+ pub fn fixedWriter(self: *Self) FixedWriter {
+ return .{ .context = self };
+ }
+
+ /// The purpose of this function existing is to match `std.io.Writer` API.
+ fn appendWriteFixed(self: *Self, m: []const u8) error{OutOfMemory}!usize {
+ const available_capacity = self.capacity - self.items.len;
+ if (m.len > available_capacity)
+ return error.OutOfMemory;
+
+ self.appendSliceAssumeCapacity(m);
+ return m.len;
+ }
+
/// Append a value to the list `n` times.
/// Allocates more memory as necessary.
/// Invalidates element pointers if additional memory is needed.
diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig
index a0fbaea7de..fc85d32d52 100644
--- a/lib/std/builtin.zig
+++ b/lib/std/builtin.zig
@@ -738,7 +738,6 @@ pub const CompilerBackend = enum(u64) {
pub const TestFn = struct {
name: []const u8,
func: *const fn () anyerror!void,
- async_frame_size: ?usize,
};
/// This function type is used by the Zig language code generation and
@@ -765,7 +764,7 @@ pub fn default_panic(msg: []const u8, error_return_trace: ?*StackTrace, ret_addr
builtin.zig_backend == .stage2_arm or
builtin.zig_backend == .stage2_aarch64 or
builtin.zig_backend == .stage2_x86 or
- (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) or
+ (builtin.zig_backend == .stage2_x86_64 and (builtin.target.ofmt != .elf and builtin.target.ofmt != .macho)) or
builtin.zig_backend == .stage2_riscv64 or
builtin.zig_backend == .stage2_sparc64 or
builtin.zig_backend == .stage2_spirv64)
diff --git a/lib/std/c.zig b/lib/std/c.zig
index e03a16314f..cc5483cce9 100644
--- a/lib/std/c.zig
+++ b/lib/std/c.zig
@@ -4,6 +4,10 @@ const c = @This();
const page_size = std.mem.page_size;
const iovec = std.os.iovec;
const iovec_const = std.os.iovec_const;
+const wasi = @import("c/wasi.zig");
+const native_abi = builtin.abi;
+const native_arch = builtin.cpu.arch;
+const native_os = builtin.os.tag;
/// If not linking libc, returns false.
/// If linking musl libc, returns true.
@@ -13,7 +17,7 @@ const iovec_const = std.os.iovec_const;
pub inline fn versionCheck(comptime glibc_version: std.SemanticVersion) bool {
return comptime blk: {
if (!builtin.link_libc) break :blk false;
- if (builtin.abi.isMusl()) break :blk true;
+ if (native_abi.isMusl()) break :blk true;
if (builtin.target.isGnuLibC()) {
const ver = builtin.os.version_range.linux.glibc;
const order = ver.order(glibc_version);
@@ -27,7 +31,7 @@ pub inline fn versionCheck(comptime glibc_version: std.SemanticVersion) bool {
};
}
-pub usingnamespace switch (builtin.os.tag) {
+pub usingnamespace switch (native_os) {
.linux => @import("c/linux.zig"),
.windows => @import("c/windows.zig"),
.macos, .ios, .tvos, .watchos => @import("c/darwin.zig"),
@@ -36,16 +40,504 @@ pub usingnamespace switch (builtin.os.tag) {
.dragonfly => @import("c/dragonfly.zig"),
.openbsd => @import("c/openbsd.zig"),
.haiku => @import("c/haiku.zig"),
- .hermit => @import("c/hermit.zig"),
.solaris, .illumos => @import("c/solaris.zig"),
- .fuchsia => @import("c/fuchsia.zig"),
- .minix => @import("c/minix.zig"),
.emscripten => @import("c/emscripten.zig"),
- .wasi => @import("c/wasi.zig"),
+ .wasi => wasi,
else => struct {},
};
-pub const MAP = switch (builtin.os.tag) {
+pub const pthread_mutex_t = switch (native_os) {
+ .linux, .minix => extern struct {
+ data: [data_len]u8 align(@alignOf(usize)) = [_]u8{0} ** data_len,
+
+ const data_len = switch (native_abi) {
+ .musl, .musleabi, .musleabihf => if (@sizeOf(usize) == 8) 40 else 24,
+ .gnu, .gnuabin32, .gnuabi64, .gnueabi, .gnueabihf, .gnux32 => switch (native_arch) {
+ .aarch64 => 48,
+ .x86_64 => if (native_abi == .gnux32) 40 else 32,
+ .mips64, .powerpc64, .powerpc64le, .sparc64 => 40,
+ else => if (@sizeOf(usize) == 8) 40 else 24,
+ },
+ .android => if (@sizeOf(usize) == 8) 40 else 4,
+ else => @compileError("unsupported ABI"),
+ };
+ },
+ .macos, .ios, .tvos, .watchos => extern struct {
+ sig: c_long = 0x32AAABA7,
+ data: [data_len]u8 = [_]u8{0} ** data_len,
+
+ const data_len = if (@sizeOf(usize) == 8) 56 else 40;
+ },
+ .freebsd, .kfreebsd, .dragonfly, .openbsd => extern struct {
+ inner: ?*anyopaque = null,
+ },
+ .hermit => extern struct {
+ ptr: usize = std.math.maxInt(usize),
+ },
+ .netbsd => extern struct {
+ magic: u32 = 0x33330003,
+ errorcheck: c.padded_pthread_spin_t = 0,
+ ceiling: c.padded_pthread_spin_t = 0,
+ owner: usize = 0,
+ waiters: ?*u8 = null,
+ recursed: u32 = 0,
+ spare2: ?*anyopaque = null,
+ },
+ .haiku => extern struct {
+ flags: u32 = 0,
+ lock: i32 = 0,
+ unused: i32 = -42,
+ owner: i32 = -1,
+ owner_count: i32 = 0,
+ },
+ .solaris, .illumos => extern struct {
+ flag1: u16 = 0,
+ flag2: u8 = 0,
+ ceiling: u8 = 0,
+ type: u16 = 0,
+ magic: u16 = 0x4d58,
+ lock: u64 = 0,
+ data: u64 = 0,
+ },
+ .fuchsia => extern struct {
+ data: [40]u8 align(@alignOf(usize)) = [_]u8{0} ** 40,
+ },
+ .emscripten => extern struct {
+ data: [24]u8 align(4) = [_]u8{0} ** 24,
+ },
+ else => @compileError("target libc does not have pthread_mutex_t"),
+};
+
+pub const pthread_cond_t = switch (native_os) {
+ .linux => extern struct {
+ data: [48]u8 align(@alignOf(usize)) = [_]u8{0} ** 48,
+ },
+ .macos, .ios, .tvos, .watchos => extern struct {
+ sig: c_long = 0x3CB0B1BB,
+ data: [data_len]u8 = [_]u8{0} ** data_len,
+ const data_len = if (@sizeOf(usize) == 8) 40 else 24;
+ },
+ .freebsd, .kfreebsd, .dragonfly, .openbsd => extern struct {
+ inner: ?*anyopaque = null,
+ },
+ .hermit => extern struct {
+ ptr: usize = std.math.maxInt(usize),
+ },
+ .netbsd => extern struct {
+ magic: u32 = 0x55550005,
+ lock: c.pthread_spin_t = 0,
+ waiters_first: ?*u8 = null,
+ waiters_last: ?*u8 = null,
+ mutex: ?*pthread_mutex_t = null,
+ private: ?*anyopaque = null,
+ },
+ .haiku => extern struct {
+ flags: u32 = 0,
+ unused: i32 = -42,
+ mutex: ?*anyopaque = null,
+ waiter_count: i32 = 0,
+ lock: i32 = 0,
+ },
+ .solaris, .illumos => extern struct {
+ flag: [4]u8 = [_]u8{0} ** 4,
+ type: u16 = 0,
+ magic: u16 = 0x4356,
+ data: u64 = 0,
+ },
+ .fuchsia, .minix, .emscripten => extern struct {
+ data: [48]u8 align(@alignOf(usize)) = [_]u8{0} ** 48,
+ },
+ else => @compileError("target libc does not have pthread_cond_t"),
+};
+
+pub const pthread_rwlock_t = switch (native_os) {
+ .linux => switch (native_abi) {
+ .android => switch (@sizeOf(usize)) {
+ 4 => extern struct {
+ data: [40]u8 align(@alignOf(usize)) = [_]u8{0} ** 40,
+ },
+ 8 => extern struct {
+ data: [56]u8 align(@alignOf(usize)) = [_]u8{0} ** 56,
+ },
+ else => @compileError("impossible pointer size"),
+ },
+ else => extern struct {
+ data: [56]u8 align(@alignOf(usize)) = [_]u8{0} ** 56,
+ },
+ },
+ .macos, .ios, .tvos, .watchos => extern struct {
+ sig: c_long = 0x2DA8B3B4,
+ data: [192]u8 = [_]u8{0} ** 192,
+ },
+ .freebsd, .kfreebsd, .dragonfly, .openbsd => extern struct {
+ ptr: ?*anyopaque = null,
+ },
+ .hermit => extern struct {
+ ptr: usize = std.math.maxInt(usize),
+ },
+ .netbsd => extern struct {
+ magic: c_uint = 0x99990009,
+ interlock: switch (builtin.cpu.arch) {
+ .aarch64, .sparc, .x86_64, .x86 => u8,
+ .arm, .powerpc => c_int,
+ else => unreachable,
+ } = 0,
+ rblocked_first: ?*u8 = null,
+ rblocked_last: ?*u8 = null,
+ wblocked_first: ?*u8 = null,
+ wblocked_last: ?*u8 = null,
+ nreaders: c_uint = 0,
+ owner: ?pthread_t = null,
+ private: ?*anyopaque = null,
+ },
+ .solaris, .illumos => extern struct {
+ readers: i32 = 0,
+ type: u16 = 0,
+ magic: u16 = 0x5257,
+ mutex: pthread_mutex_t = .{},
+ readercv: pthread_cond_t = .{},
+ writercv: pthread_cond_t = .{},
+ },
+ .fuchsia => extern struct {
+ size: [56]u8 align(@alignOf(usize)) = [_]u8{0} ** 56,
+ },
+ .emscripten => extern struct {
+ size: [32]u8 align(4) = [_]u8{0} ** 32,
+ },
+ else => @compileError("target libc does not have pthread_rwlock_t"),
+};
+
+pub const AT = switch (native_os) {
+ .linux => std.os.linux.AT,
+ .windows => struct {
+ /// Remove directory instead of unlinking file
+ pub const REMOVEDIR = 0x200;
+ },
+ .macos, .ios, .tvos, .watchos => struct {
+ pub const FDCWD = -2;
+ /// Use effective ids in access check
+ pub const EACCESS = 0x0010;
+ /// Act on the symlink itself not the target
+ pub const SYMLINK_NOFOLLOW = 0x0020;
+ /// Act on target of symlink
+ pub const SYMLINK_FOLLOW = 0x0040;
+ /// Path refers to directory
+ pub const REMOVEDIR = 0x0080;
+ },
+ .freebsd, .kfreebsd => struct {
+ /// Magic value that specify the use of the current working directory
+ /// to determine the target of relative file paths in the openat() and
+ /// similar syscalls.
+ pub const FDCWD = -100;
+ /// Check access using effective user and group ID
+ pub const EACCESS = 0x0100;
+ /// Do not follow symbolic links
+ pub const SYMLINK_NOFOLLOW = 0x0200;
+ /// Follow symbolic link
+ pub const SYMLINK_FOLLOW = 0x0400;
+ /// Remove directory instead of file
+ pub const REMOVEDIR = 0x0800;
+ /// Fail if not under dirfd
+ pub const BENEATH = 0x1000;
+ },
+ .netbsd => struct {
+ /// Magic value that specify the use of the current working directory
+ /// to determine the target of relative file paths in the openat() and
+ /// similar syscalls.
+ pub const FDCWD = -100;
+ /// Check access using effective user and group ID
+ pub const EACCESS = 0x0100;
+ /// Do not follow symbolic links
+ pub const SYMLINK_NOFOLLOW = 0x0200;
+ /// Follow symbolic link
+ pub const SYMLINK_FOLLOW = 0x0400;
+ /// Remove directory instead of file
+ pub const REMOVEDIR = 0x0800;
+ },
+ .dragonfly => struct {
+ pub const FDCWD = -328243;
+ pub const SYMLINK_NOFOLLOW = 1;
+ pub const REMOVEDIR = 2;
+ pub const EACCESS = 4;
+ pub const SYMLINK_FOLLOW = 8;
+ },
+ .openbsd => struct {
+ /// Magic value that specify the use of the current working directory
+ /// to determine the target of relative file paths in the openat() and
+ /// similar syscalls.
+ pub const FDCWD = -100;
+ /// Check access using effective user and group ID
+ pub const EACCESS = 0x01;
+ /// Do not follow symbolic links
+ pub const SYMLINK_NOFOLLOW = 0x02;
+ /// Follow symbolic link
+ pub const SYMLINK_FOLLOW = 0x04;
+ /// Remove directory instead of file
+ pub const REMOVEDIR = 0x08;
+ },
+ .haiku => struct {
+ pub const FDCWD = -1;
+ pub const SYMLINK_NOFOLLOW = 0x01;
+ pub const SYMLINK_FOLLOW = 0x02;
+ pub const REMOVEDIR = 0x04;
+ pub const EACCESS = 0x08;
+ },
+ .solaris, .illumos => struct {
+ /// Magic value that specify the use of the current working directory
+ /// to determine the target of relative file paths in the openat() and
+ /// similar syscalls.
+ pub const FDCWD: c.fd_t = @bitCast(@as(u32, 0xffd19553));
+ /// Do not follow symbolic links
+ pub const SYMLINK_NOFOLLOW = 0x1000;
+ /// Follow symbolic link
+ pub const SYMLINK_FOLLOW = 0x2000;
+ /// Remove directory instead of file
+ pub const REMOVEDIR = 0x1;
+ pub const TRIGGER = 0x2;
+ /// Check access using effective user and group ID
+ pub const EACCESS = 0x4;
+ },
+ .emscripten => struct {
+ pub const FDCWD = -100;
+ pub const SYMLINK_NOFOLLOW = 0x100;
+ pub const REMOVEDIR = 0x200;
+ pub const SYMLINK_FOLLOW = 0x400;
+ pub const NO_AUTOMOUNT = 0x800;
+ pub const EMPTY_PATH = 0x1000;
+ pub const STATX_SYNC_TYPE = 0x6000;
+ pub const STATX_SYNC_AS_STAT = 0x0000;
+ pub const STATX_FORCE_SYNC = 0x2000;
+ pub const STATX_DONT_SYNC = 0x4000;
+ pub const RECURSIVE = 0x8000;
+ },
+ .wasi => struct {
+ pub const SYMLINK_NOFOLLOW = 0x100;
+ pub const SYMLINK_FOLLOW = 0x400;
+ pub const REMOVEDIR: u32 = 0x4;
+ /// When linking libc, we follow their convention and use -2 for current working directory.
+ /// However, without libc, Zig does a different convention: it assumes the
+ /// current working directory is the first preopen. This behavior can be
+ /// overridden with a public function called `wasi_cwd` in the root source
+ /// file.
+ pub const FDCWD: c.fd_t = if (builtin.link_libc) -2 else 3;
+ },
+
+ else => @compileError("target libc does not have AT"),
+};
+
+pub const O = switch (native_os) {
+ .linux => std.os.linux.O,
+ .emscripten => packed struct(u32) {
+ ACCMODE: std.os.ACCMODE = .RDONLY,
+ _2: u4 = 0,
+ CREAT: bool = false,
+ EXCL: bool = false,
+ NOCTTY: bool = false,
+ TRUNC: bool = false,
+ APPEND: bool = false,
+ NONBLOCK: bool = false,
+ DSYNC: bool = false,
+ ASYNC: bool = false,
+ DIRECT: bool = false,
+ LARGEFILE: bool = false,
+ DIRECTORY: bool = false,
+ NOFOLLOW: bool = false,
+ NOATIME: bool = false,
+ CLOEXEC: bool = false,
+ SYNC: bool = false,
+ PATH: bool = false,
+ TMPFILE: bool = false,
+ _: u9 = 0,
+ },
+ .wasi => packed struct(u32) {
+ APPEND: bool = false,
+ DSYNC: bool = false,
+ NONBLOCK: bool = false,
+ RSYNC: bool = false,
+ SYNC: bool = false,
+ _5: u7 = 0,
+ CREAT: bool = false,
+ DIRECTORY: bool = false,
+ EXCL: bool = false,
+ TRUNC: bool = false,
+ _16: u8 = 0,
+ NOFOLLOW: bool = false,
+ EXEC: bool = false,
+ read: bool = false,
+ SEARCH: bool = false,
+ write: bool = false,
+ _: u3 = 0,
+ },
+ .solaris, .illumos => packed struct(u32) {
+ ACCMODE: std.os.ACCMODE = .RDONLY,
+ NDELAY: bool = false,
+ APPEND: bool = false,
+ SYNC: bool = false,
+ _5: u1 = 0,
+ DSYNC: bool = false,
+ NONBLOCK: bool = false,
+ CREAT: bool = false,
+ TRUNC: bool = false,
+ EXCL: bool = false,
+ NOCTTY: bool = false,
+ _12: u1 = 0,
+ LARGEFILE: bool = false,
+ XATTR: bool = false,
+ RSYNC: bool = false,
+ _16: u1 = 0,
+ NOFOLLOW: bool = false,
+ NOLINKS: bool = false,
+ _19: u2 = 0,
+ SEARCH: bool = false,
+ EXEC: bool = false,
+ CLOEXEC: bool = false,
+ DIRECTORY: bool = false,
+ DIRECT: bool = false,
+ _: u6 = 0,
+ },
+ .netbsd => packed struct(u32) {
+ ACCMODE: std.os.ACCMODE = .RDONLY,
+ NONBLOCK: bool = false,
+ APPEND: bool = false,
+ SHLOCK: bool = false,
+ EXLOCK: bool = false,
+ ASYNC: bool = false,
+ SYNC: bool = false,
+ NOFOLLOW: bool = false,
+ CREAT: bool = false,
+ TRUNC: bool = false,
+ EXCL: bool = false,
+ _12: u3 = 0,
+ NOCTTY: bool = false,
+ DSYNC: bool = false,
+ RSYNC: bool = false,
+ ALT_IO: bool = false,
+ DIRECT: bool = false,
+ _20: u1 = 0,
+ DIRECTORY: bool = false,
+ CLOEXEC: bool = false,
+ SEARCH: bool = false,
+ _: u8 = 0,
+ },
+ .openbsd => packed struct(u32) {
+ ACCMODE: std.os.ACCMODE = .RDONLY,
+ NONBLOCK: bool = false,
+ APPEND: bool = false,
+ SHLOCK: bool = false,
+ EXLOCK: bool = false,
+ ASYNC: bool = false,
+ SYNC: bool = false,
+ NOFOLLOW: bool = false,
+ CREAT: bool = false,
+ TRUNC: bool = false,
+ EXCL: bool = false,
+ _12: u3 = 0,
+ NOCTTY: bool = false,
+ CLOEXEC: bool = false,
+ DIRECTORY: bool = false,
+ _: u14 = 0,
+ },
+ .haiku => packed struct(u32) {
+ ACCMODE: std.os.ACCMODE = .RDONLY,
+ _2: u4 = 0,
+ CLOEXEC: bool = false,
+ NONBLOCK: bool = false,
+ EXCL: bool = false,
+ CREAT: bool = false,
+ TRUNC: bool = false,
+ APPEND: bool = false,
+ NOCTTY: bool = false,
+ NOTRAVERSE: bool = false,
+ _14: u2 = 0,
+ SYNC: bool = false,
+ RSYNC: bool = false,
+ DSYNC: bool = false,
+ NOFOLLOW: bool = false,
+ DIRECT: bool = false,
+ DIRECTORY: bool = false,
+ _: u10 = 0,
+ },
+ .macos, .ios, .tvos, .watchos => packed struct(u32) {
+ ACCMODE: std.os.ACCMODE = .RDONLY,
+ NONBLOCK: bool = false,
+ APPEND: bool = false,
+ SHLOCK: bool = false,
+ EXLOCK: bool = false,
+ ASYNC: bool = false,
+ SYNC: bool = false,
+ NOFOLLOW: bool = false,
+ CREAT: bool = false,
+ TRUNC: bool = false,
+ EXCL: bool = false,
+ _12: u3 = 0,
+ EVTONLY: bool = false,
+ _16: u1 = 0,
+ NOCTTY: bool = false,
+ _18: u2 = 0,
+ DIRECTORY: bool = false,
+ SYMLINK: bool = false,
+ DSYNC: bool = false,
+ _23: u1 = 0,
+ CLOEXEC: bool = false,
+ _25: u4 = 0,
+ ALERT: bool = false,
+ _30: u1 = 0,
+ POPUP: bool = false,
+ },
+ .dragonfly => packed struct(u32) {
+ ACCMODE: std.os.ACCMODE = .RDONLY,
+ NONBLOCK: bool = false,
+ APPEND: bool = false,
+ SHLOCK: bool = false,
+ EXLOCK: bool = false,
+ ASYNC: bool = false,
+ SYNC: bool = false,
+ NOFOLLOW: bool = false,
+ CREAT: bool = false,
+ TRUNC: bool = false,
+ EXCL: bool = false,
+ _12: u3 = 0,
+ NOCTTY: bool = false,
+ DIRECT: bool = false,
+ CLOEXEC: bool = false,
+ FBLOCKING: bool = false,
+ FNONBLOCKING: bool = false,
+ FAPPEND: bool = false,
+ FOFFSET: bool = false,
+ FSYNCWRITE: bool = false,
+ FASYNCWRITE: bool = false,
+ _24: u3 = 0,
+ DIRECTORY: bool = false,
+ _: u4 = 0,
+ },
+ .freebsd => packed struct(u32) {
+ ACCMODE: std.os.ACCMODE = .RDONLY,
+ NONBLOCK: bool = false,
+ APPEND: bool = false,
+ SHLOCK: bool = false,
+ EXLOCK: bool = false,
+ ASYNC: bool = false,
+ SYNC: bool = false,
+ NOFOLLOW: bool = false,
+ CREAT: bool = false,
+ TRUNC: bool = false,
+ EXCL: bool = false,
+ DSYNC: bool = false,
+ _13: u2 = 0,
+ NOCTTY: bool = false,
+ DIRECT: bool = false,
+ DIRECTORY: bool = false,
+ NOATIME: bool = false,
+ _19: u1 = 0,
+ CLOEXEC: bool = false,
+ PATH: bool = false,
+ TMPFILE: bool = false,
+ _: u9 = 0,
+ },
+ else => @compileError("target libc does not have O"),
+};
+
+pub const MAP = switch (native_os) {
.linux => std.os.linux.MAP,
.emscripten => packed struct(u32) {
TYPE: enum(u4) {
@@ -187,10 +679,822 @@ pub const MAP = switch (builtin.os.tag) {
/// Used by libc to communicate failure. Not actually part of the underlying syscall.
pub const MAP_FAILED: *anyopaque = @ptrFromInt(std.math.maxInt(usize));
-pub const whence_t = if (builtin.os.tag == .wasi) std.os.wasi.whence_t else c_int;
+pub const cc_t = u8;
+
+/// Indices into the `cc` array in the `termios` struct.
+pub const V = switch (native_os) {
+ .linux => std.os.linux.V,
+ .macos, .ios, .tvos, .watchos, .netbsd, .openbsd => enum {
+ EOF,
+ EOL,
+ EOL2,
+ ERASE,
+ WERASE,
+ KILL,
+ REPRINT,
+ reserved,
+ INTR,
+ QUIT,
+ SUSP,
+ DSUSP,
+ START,
+ STOP,
+ LNEXT,
+ DISCARD,
+ MIN,
+ TIME,
+ STATUS,
+ },
+ .freebsd, .kfreebsd => enum {
+ EOF,
+ EOL,
+ EOL2,
+ ERASE,
+ WERASE,
+ KILL,
+ REPRINT,
+ ERASE2,
+ INTR,
+ QUIT,
+ SUSP,
+ DSUSP,
+ START,
+ STOP,
+ LNEXT,
+ DISCARD,
+ MIN,
+ TIME,
+ STATUS,
+ },
+ .haiku => enum {
+ INTR,
+ QUIT,
+ ERASE,
+ KILL,
+ EOF,
+ EOL,
+ EOL2,
+ SWTCH,
+ START,
+ STOP,
+ SUSP,
+ },
+ .solaris, .illumos => enum {
+ INTR,
+ QUIT,
+ ERASE,
+ KILL,
+ EOF,
+ EOL,
+ EOL2,
+ SWTCH,
+ START,
+ STOP,
+ SUSP,
+ DSUSP,
+ REPRINT,
+ DISCARD,
+ WERASE,
+ LNEXT,
+ STATUS,
+ ERASE2,
+ },
+ .emscripten, .wasi => enum {
+ INTR,
+ QUIT,
+ ERASE,
+ KILL,
+ EOF,
+ TIME,
+ MIN,
+ SWTC,
+ START,
+ STOP,
+ SUSP,
+ EOL,
+ REPRINT,
+ DISCARD,
+ WERASE,
+ LNEXT,
+ EOL2,
+ },
+ else => @compileError("target libc does not have cc_t"),
+};
+
+pub const NCCS = switch (native_os) {
+ .linux => std.os.linux.NCCS,
+ .macos, .ios, .tvos, .watchos, .freebsd, .kfreebsd, .netbsd, .openbsd, .dragonfly => 20,
+ .haiku => 11,
+ .solaris, .illumos => 19,
+ .emscripten, .wasi => 32,
+ else => @compileError("target libc does not have NCCS"),
+};
+
+pub const termios = switch (native_os) {
+ .linux => std.os.linux.termios,
+ .macos, .ios, .tvos, .watchos => extern struct {
+ iflag: tc_iflag_t,
+ oflag: tc_oflag_t,
+ cflag: tc_cflag_t,
+ lflag: tc_lflag_t,
+ cc: [NCCS]cc_t,
+ ispeed: speed_t align(8),
+ ospeed: speed_t,
+ },
+ .freebsd, .kfreebsd, .netbsd, .dragonfly, .openbsd => extern struct {
+ iflag: tc_iflag_t,
+ oflag: tc_oflag_t,
+ cflag: tc_cflag_t,
+ lflag: tc_lflag_t,
+ cc: [NCCS]cc_t,
+ ispeed: speed_t,
+ ospeed: speed_t,
+ },
+ .haiku => extern struct {
+ iflag: tc_iflag_t,
+ oflag: tc_oflag_t,
+ cflag: tc_cflag_t,
+ lflag: tc_lflag_t,
+ line: cc_t,
+ ispeed: speed_t,
+ ospeed: speed_t,
+ cc: [NCCS]cc_t,
+ },
+ .solaris, .illumos => extern struct {
+ iflag: tc_iflag_t,
+ oflag: tc_oflag_t,
+ cflag: tc_cflag_t,
+ lflag: tc_lflag_t,
+ cc: [NCCS]cc_t,
+ },
+ .emscripten, .wasi => extern struct {
+ iflag: tc_iflag_t,
+ oflag: tc_oflag_t,
+ cflag: tc_cflag_t,
+ lflag: tc_lflag_t,
+ line: std.c.cc_t,
+ cc: [NCCS]cc_t,
+ ispeed: speed_t,
+ ospeed: speed_t,
+ },
+ else => @compileError("target libc does not have termios"),
+};
+
+pub const tc_iflag_t = switch (native_os) {
+ .linux => std.os.linux.tc_iflag_t,
+ .macos, .ios, .tvos, .watchos => packed struct(u64) {
+ IGNBRK: bool = false,
+ BRKINT: bool = false,
+ IGNPAR: bool = false,
+ PARMRK: bool = false,
+ INPCK: bool = false,
+ ISTRIP: bool = false,
+ INLCR: bool = false,
+ IGNCR: bool = false,
+ ICRNL: bool = false,
+ IXON: bool = false,
+ IXOFF: bool = false,
+ IXANY: bool = false,
+ _12: u1 = 0,
+ IMAXBEL: bool = false,
+ IUTF8: bool = false,
+ _: u49 = 0,
+ },
+ .netbsd, .freebsd, .kfreebsd, .dragonfly => packed struct(u32) {
+ IGNBRK: bool = false,
+ BRKINT: bool = false,
+ IGNPAR: bool = false,
+ PARMRK: bool = false,
+ INPCK: bool = false,
+ ISTRIP: bool = false,
+ INLCR: bool = false,
+ IGNCR: bool = false,
+ ICRNL: bool = false,
+ IXON: bool = false,
+ IXOFF: bool = false,
+ IXANY: bool = false,
+ _12: u1 = 0,
+ IMAXBEL: bool = false,
+ _: u18 = 0,
+ },
+ .openbsd => packed struct(u32) {
+ IGNBRK: bool = false,
+ BRKINT: bool = false,
+ IGNPAR: bool = false,
+ PARMRK: bool = false,
+ INPCK: bool = false,
+ ISTRIP: bool = false,
+ INLCR: bool = false,
+ IGNCR: bool = false,
+ ICRNL: bool = false,
+ IXON: bool = false,
+ IXOFF: bool = false,
+ IXANY: bool = false,
+ IUCLC: bool = false,
+ IMAXBEL: bool = false,
+ _: u18 = 0,
+ },
+ .haiku => packed struct(u32) {
+ IGNBRK: bool = false,
+ BRKINT: bool = false,
+ IGNPAR: bool = false,
+ PARMRK: bool = false,
+ INPCK: bool = false,
+ ISTRIP: bool = false,
+ INLCR: bool = false,
+ IGNCR: bool = false,
+ ICRNL: bool = false,
+ IUCLC: bool = false,
+ IXON: bool = false,
+ IXANY: bool = false,
+ IXOFF: bool = false,
+ _: u19 = 0,
+ },
+ .solaris, .illumos => packed struct(u32) {
+ IGNBRK: bool = false,
+ BRKINT: bool = false,
+ IGNPAR: bool = false,
+ PARMRK: bool = false,
+ INPCK: bool = false,
+ ISTRIP: bool = false,
+ INLCR: bool = false,
+ IGNCR: bool = false,
+ ICRNL: bool = false,
+ IUCLC: bool = false,
+ IXON: bool = false,
+ IXANY: bool = false,
+ _12: u1 = 0,
+ IMAXBEL: bool = false,
+ _14: u1 = 0,
+ DOSMODE: bool = false,
+ _: u16 = 0,
+ },
+ .emscripten, .wasi => packed struct(u32) {
+ IGNBRK: bool = false,
+ BRKINT: bool = false,
+ IGNPAR: bool = false,
+ PARMRK: bool = false,
+ INPCK: bool = false,
+ ISTRIP: bool = false,
+ INLCR: bool = false,
+ IGNCR: bool = false,
+ ICRNL: bool = false,
+ IUCLC: bool = false,
+ IXON: bool = false,
+ IXANY: bool = false,
+ IXOFF: bool = false,
+ IMAXBEL: bool = false,
+ IUTF8: bool = false,
+ _: u17 = 0,
+ },
+ else => @compileError("target libc does not have tc_iflag_t"),
+};
+
+pub const tc_oflag_t = switch (native_os) {
+ .linux => std.os.linux.tc_oflag_t,
+ .macos, .ios, .tvos, .watchos => packed struct(u64) {
+ OPOST: bool = false,
+ ONLCR: bool = false,
+ OXTABS: bool = false,
+ ONOEOT: bool = false,
+ OCRNL: bool = false,
+ ONOCR: bool = false,
+ ONLRET: bool = false,
+ OFILL: bool = false,
+ NLDLY: u2 = 0,
+ TABDLY: u2 = 0,
+ CRDLY: u2 = 0,
+ FFDLY: u1 = 0,
+ BSDLY: u1 = 0,
+ VTDLY: u1 = 0,
+ OFDEL: bool = false,
+ _: u46 = 0,
+ },
+ .netbsd => packed struct(u32) {
+ OPOST: bool = false,
+ ONLCR: bool = false,
+ OXTABS: bool = false,
+ ONOEOT: bool = false,
+ OCRNL: bool = false,
+ _5: u1 = 0,
+ ONOCR: bool = false,
+ ONLRET: bool = false,
+ _: u24 = 0,
+ },
+ .openbsd => packed struct(u32) {
+ OPOST: bool = false,
+ ONLCR: bool = false,
+ OXTABS: bool = false,
+ ONOEOT: bool = false,
+ OCRNL: bool = false,
+ OLCUC: bool = false,
+ ONOCR: bool = false,
+ ONLRET: bool = false,
+ _: u24 = 0,
+ },
+ .freebsd, .kfreebsd, .dragonfly => packed struct(u32) {
+ OPOST: bool = false,
+ ONLCR: bool = false,
+ _2: u1 = 0,
+ ONOEOT: bool = false,
+ OCRNL: bool = false,
+ ONOCR: bool = false,
+ ONLRET: bool = false,
+ _: u25 = 0,
+ },
+ .solaris, .illumos => packed struct(u32) {
+ OPOST: bool = false,
+ OLCUC: bool = false,
+ ONLCR: bool = false,
+ OCRNL: bool = false,
+ ONOCR: bool = false,
+ ONLRET: bool = false,
+ OFILL: bool = false,
+ OFDEL: bool = false,
+ NLDLY: u1 = 0,
+ CRDLY: u2 = 0,
+ TABDLY: u2 = 0,
+ BSDLY: u1 = 0,
+ VTDLY: u1 = 0,
+ FFDLY: u1 = 0,
+ PAGEOUT: bool = false,
+ WRAP: bool = false,
+ _: u14 = 0,
+ },
+ .haiku, .wasi, .emscripten => packed struct(u32) {
+ OPOST: bool = false,
+ OLCUC: bool = false,
+ ONLCR: bool = false,
+ OCRNL: bool = false,
+ ONOCR: bool = false,
+ ONLRET: bool = false,
+ OFILL: bool = false,
+ OFDEL: bool = false,
+ NLDLY: u1 = 0,
+ CRDLY: u2 = 0,
+ TABDLY: u2 = 0,
+ BSDLY: u1 = 0,
+ VTDLY: u1 = 0,
+ FFDLY: u1 = 0,
+ _: u16 = 0,
+ },
+ else => @compileError("target libc does not have tc_oflag_t"),
+};
+
+pub const CSIZE = switch (native_os) {
+ .linux => std.os.linux.CSIZE,
+ .haiku => enum(u1) { CS7, CS8 },
+ else => enum(u2) { CS5, CS6, CS7, CS8 },
+};
+
+pub const tc_cflag_t = switch (native_os) {
+ .linux => std.os.linux.tc_cflag_t,
+ .macos, .ios, .tvos, .watchos => packed struct(u64) {
+ CIGNORE: bool = false,
+ _1: u5 = 0,
+ CSTOPB: bool = false,
+ _7: u1 = 0,
+ CSIZE: CSIZE = .CS5,
+ _10: u1 = 0,
+ CREAD: bool = false,
+ PARENB: bool = false,
+ PARODD: bool = false,
+ HUPCL: bool = false,
+ CLOCAL: bool = false,
+ CCTS_OFLOW: bool = false,
+ CRTS_IFLOW: bool = false,
+ CDTR_IFLOW: bool = false,
+ CDSR_OFLOW: bool = false,
+ CCAR_OFLOW: bool = false,
+ _: u43 = 0,
+ },
+ .freebsd, .kfreebsd => packed struct(u32) {
+ CIGNORE: bool = false,
+ _1: u7 = 0,
+ CSIZE: CSIZE = .CS5,
+ CSTOPB: bool = false,
+ CREAD: bool = false,
+ PARENB: bool = false,
+ PARODD: bool = false,
+ HUPCL: bool = false,
+ CLOCAL: bool = false,
+ CCTS_OFLOW: bool = false,
+ CRTS_IFLOW: bool = false,
+ CDTR_IFLOW: bool = false,
+ CDSR_OFLOW: bool = false,
+ CCAR_OFLOW: bool = false,
+ CNO_RTSDTR: bool = false,
+ _: u10 = 0,
+ },
+ .netbsd => packed struct(u32) {
+ CIGNORE: bool = false,
+ _1: u7 = 0,
+ CSIZE: CSIZE = .CS5,
+ CSTOPB: bool = false,
+ CREAD: bool = false,
+ PARENB: bool = false,
+ PARODD: bool = false,
+ HUPCL: bool = false,
+ CLOCAL: bool = false,
+ CRTSCTS: bool = false,
+ CDTRCTS: bool = false,
+ _18: u2 = 0,
+ MDMBUF: bool = false,
+ _: u11 = 0,
+ },
+ .dragonfly => packed struct(u32) {
+ CIGNORE: bool = false,
+ _1: u7 = 0,
+ CSIZE: CSIZE = .CS5,
+ CSTOPB: bool = false,
+ CREAD: bool = false,
+ PARENB: bool = false,
+ PARODD: bool = false,
+ HUPCL: bool = false,
+ CLOCAL: bool = false,
+ CCTS_OFLOW: bool = false,
+ CRTS_IFLOW: bool = false,
+ CDTR_IFLOW: bool = false,
+ CDSR_OFLOW: bool = false,
+ CCAR_OFLOW: bool = false,
+ _: u11 = 0,
+ },
+ .openbsd => packed struct(u32) {
+ CIGNORE: bool = false,
+ _1: u7 = 0,
+ CSIZE: CSIZE = .CS5,
+ CSTOPB: bool = false,
+ CREAD: bool = false,
+ PARENB: bool = false,
+ PARODD: bool = false,
+ HUPCL: bool = false,
+ CLOCAL: bool = false,
+ CRTSCTS: bool = false,
+ _17: u3 = 0,
+ MDMBUF: bool = false,
+ _: u11 = 0,
+ },
+ .haiku => packed struct(u32) {
+ _0: u5 = 0,
+ CSIZE: CSIZE = .CS7,
+ CSTOPB: bool = false,
+ CREAD: bool = false,
+ PARENB: bool = false,
+ PARODD: bool = false,
+ HUPCL: bool = false,
+ CLOCAL: bool = false,
+ XLOBLK: bool = false,
+ CTSFLOW: bool = false,
+ RTSFLOW: bool = false,
+ _: u17 = 0,
+ },
+ .solaris, .illumos => packed struct(u32) {
+ _0: u4 = 0,
+ CSIZE: CSIZE = .CS5,
+ CSTOPB: bool = false,
+ CREAD: bool = false,
+ PARENB: bool = false,
+ PARODD: bool = false,
+ HUPCL: bool = false,
+ CLOCAL: bool = false,
+ RCV1EN: bool = false,
+ XMT1EN: bool = false,
+ LOBLK: bool = false,
+ XCLUDE: bool = false,
+ _16: u4 = 0,
+ PAREXT: bool = false,
+ CBAUDEXT: bool = false,
+ CIBAUDEXT: bool = false,
+ _23: u7 = 0,
+ CRTSXOFF: bool = false,
+ CRTSCTS: bool = false,
+ },
+ .wasi, .emscripten => packed struct(u32) {
+ _0: u4 = 0,
+ CSIZE: CSIZE = .CS5,
+ CSTOPB: bool = false,
+ CREAD: bool = false,
+ PARENB: bool = false,
+ PARODD: bool = false,
+ HUPCL: bool = false,
+ CLOCAL: bool = false,
+ _: u20 = 0,
+ },
+ else => @compileError("target libc does not have tc_cflag_t"),
+};
+
+pub const tc_lflag_t = switch (native_os) {
+ .linux => std.os.linux.tc_lflag_t,
+ .macos, .ios, .tvos, .watchos => packed struct(u64) {
+ ECHOKE: bool = false,
+ ECHOE: bool = false,
+ ECHOK: bool = false,
+ ECHO: bool = false,
+ ECHONL: bool = false,
+ ECHOPRT: bool = false,
+ ECHOCTL: bool = false,
+ ISIG: bool = false,
+ ICANON: bool = false,
+ ALTWERASE: bool = false,
+ IEXTEN: bool = false,
+ EXTPROC: bool = false,
+ _12: u10 = 0,
+ TOSTOP: bool = false,
+ FLUSHO: bool = false,
+ _24: u1 = 0,
+ NOKERNINFO: bool = false,
+ _26: u3 = 0,
+ PENDIN: bool = false,
+ _30: u1 = 0,
+ NOFLSH: bool = false,
+ _: u32 = 0,
+ },
+ .netbsd, .freebsd, .kfreebsd, .dragonfly => packed struct(u32) {
+ ECHOKE: bool = false,
+ ECHOE: bool = false,
+ ECHOK: bool = false,
+ ECHO: bool = false,
+ ECHONL: bool = false,
+ ECHOPRT: bool = false,
+ ECHOCTL: bool = false,
+ ISIG: bool = false,
+ ICANON: bool = false,
+ ALTWERASE: bool = false,
+ IEXTEN: bool = false,
+ EXTPROC: bool = false,
+ _12: u10 = 0,
+ TOSTOP: bool = false,
+ FLUSHO: bool = false,
+ _24: u1 = 0,
+ NOKERNINFO: bool = false,
+ _26: u3 = 0,
+ PENDIN: bool = false,
+ _30: u1 = 0,
+ NOFLSH: bool = false,
+ },
+ .openbsd => packed struct(u32) {
+ ECHOKE: bool = false,
+ ECHOE: bool = false,
+ ECHOK: bool = false,
+ ECHO: bool = false,
+ ECHONL: bool = false,
+ ECHOPRT: bool = false,
+ ECHOCTL: bool = false,
+ ISIG: bool = false,
+ ICANON: bool = false,
+ ALTWERASE: bool = false,
+ IEXTEN: bool = false,
+ EXTPROC: bool = false,
+ _12: u10 = 0,
+ TOSTOP: bool = false,
+ FLUSHO: bool = false,
+ XCASE: bool = false,
+ NOKERNINFO: bool = false,
+ _26: u3 = 0,
+ PENDIN: bool = false,
+ _30: u1 = 0,
+ NOFLSH: bool = false,
+ },
+ .haiku => packed struct(u32) {
+ ISIG: bool = false,
+ ICANON: bool = false,
+ XCASE: bool = false,
+ ECHO: bool = false,
+ ECHOE: bool = false,
+ ECHOK: bool = false,
+ ECHONL: bool = false,
+ NOFLSH: bool = false,
+ TOSTOP: bool = false,
+ IEXTEN: bool = false,
+ ECHOCTL: bool = false,
+ ECHOPRT: bool = false,
+ ECHOKE: bool = false,
+ FLUSHO: bool = false,
+ PENDIN: bool = false,
+ _: u17 = 0,
+ },
+ .solaris, .illumos => packed struct(u32) {
+ ISIG: bool = false,
+ ICANON: bool = false,
+ XCASE: bool = false,
+ ECHO: bool = false,
+ ECHOE: bool = false,
+ ECHOK: bool = false,
+ ECHONL: bool = false,
+ NOFLSH: bool = false,
+ TOSTOP: bool = false,
+ ECHOCTL: bool = false,
+ ECHOPRT: bool = false,
+ ECHOKE: bool = false,
+ DEFECHO: bool = false,
+ FLUSHO: bool = false,
+ PENDIN: bool = false,
+ IEXTEN: bool = false,
+ _: u16 = 0,
+ },
+ .wasi, .emscripten => packed struct(u32) {
+ ISIG: bool = false,
+ ICANON: bool = false,
+ _2: u1 = 0,
+ ECHO: bool = false,
+ ECHOE: bool = false,
+ ECHOK: bool = false,
+ ECHONL: bool = false,
+ NOFLSH: bool = false,
+ TOSTOP: bool = false,
+ _9: u6 = 0,
+ IEXTEN: bool = false,
+ _: u16 = 0,
+ },
+ else => @compileError("target libc does not have tc_lflag_t"),
+};
+
+pub const speed_t = switch (native_os) {
+ .linux => std.os.linux.speed_t,
+ .macos, .ios, .tvos, .watchos, .openbsd => enum(u64) {
+ B0 = 0,
+ B50 = 50,
+ B75 = 75,
+ B110 = 110,
+ B134 = 134,
+ B150 = 150,
+ B200 = 200,
+ B300 = 300,
+ B600 = 600,
+ B1200 = 1200,
+ B1800 = 1800,
+ B2400 = 2400,
+ B4800 = 4800,
+ B9600 = 9600,
+ B19200 = 19200,
+ B38400 = 38400,
+ B7200 = 7200,
+ B14400 = 14400,
+ B28800 = 28800,
+ B57600 = 57600,
+ B76800 = 76800,
+ B115200 = 115200,
+ B230400 = 230400,
+ },
+ .freebsd, .kfreebsd, .netbsd => enum(c_uint) {
+ B0 = 0,
+ B50 = 50,
+ B75 = 75,
+ B110 = 110,
+ B134 = 134,
+ B150 = 150,
+ B200 = 200,
+ B300 = 300,
+ B600 = 600,
+ B1200 = 1200,
+ B1800 = 1800,
+ B2400 = 2400,
+ B4800 = 4800,
+ B9600 = 9600,
+ B19200 = 19200,
+ B38400 = 38400,
+ B7200 = 7200,
+ B14400 = 14400,
+ B28800 = 28800,
+ B57600 = 57600,
+ B76800 = 76800,
+ B115200 = 115200,
+ B230400 = 230400,
+ B460800 = 460800,
+ B500000 = 500000,
+ B921600 = 921600,
+ B1000000 = 1000000,
+ B1500000 = 1500000,
+ B2000000 = 2000000,
+ B2500000 = 2500000,
+ B3000000 = 3000000,
+ B3500000 = 3500000,
+ B4000000 = 4000000,
+ },
+ .dragonfly => enum(c_uint) {
+ B0 = 0,
+ B50 = 50,
+ B75 = 75,
+ B110 = 110,
+ B134 = 134,
+ B150 = 150,
+ B200 = 200,
+ B300 = 300,
+ B600 = 600,
+ B1200 = 1200,
+ B1800 = 1800,
+ B2400 = 2400,
+ B4800 = 4800,
+ B9600 = 9600,
+ B19200 = 19200,
+ B38400 = 38400,
+ B7200 = 7200,
+ B14400 = 14400,
+ B28800 = 28800,
+ B57600 = 57600,
+ B76800 = 76800,
+ B115200 = 115200,
+ B230400 = 230400,
+ B460800 = 460800,
+ B921600 = 921600,
+ },
+ .haiku => enum(u8) {
+ B0 = 0x00,
+ B50 = 0x01,
+ B75 = 0x02,
+ B110 = 0x03,
+ B134 = 0x04,
+ B150 = 0x05,
+ B200 = 0x06,
+ B300 = 0x07,
+ B600 = 0x08,
+ B1200 = 0x09,
+ B1800 = 0x0A,
+ B2400 = 0x0B,
+ B4800 = 0x0C,
+ B9600 = 0x0D,
+ B19200 = 0x0E,
+ B38400 = 0x0F,
+ B57600 = 0x10,
+ B115200 = 0x11,
+ B230400 = 0x12,
+ B31250 = 0x13,
+ },
+ .solaris, .illumos => enum(c_uint) {
+ B0 = 0,
+ B50 = 1,
+ B75 = 2,
+ B110 = 3,
+ B134 = 4,
+ B150 = 5,
+ B200 = 6,
+ B300 = 7,
+ B600 = 8,
+ B1200 = 9,
+ B1800 = 10,
+ B2400 = 11,
+ B4800 = 12,
+ B9600 = 13,
+ B19200 = 14,
+ B38400 = 15,
+ B57600 = 16,
+ B76800 = 17,
+ B115200 = 18,
+ B153600 = 19,
+ B230400 = 20,
+ B307200 = 21,
+ B460800 = 22,
+ B921600 = 23,
+ B1000000 = 24,
+ B1152000 = 25,
+ B1500000 = 26,
+ B2000000 = 27,
+ B2500000 = 28,
+ B3000000 = 29,
+ B3500000 = 30,
+ B4000000 = 31,
+ },
+ .emscripten, .wasi => enum(u32) {
+ B0 = 0o0000000,
+ B50 = 0o0000001,
+ B75 = 0o0000002,
+ B110 = 0o0000003,
+ B134 = 0o0000004,
+ B150 = 0o0000005,
+ B200 = 0o0000006,
+ B300 = 0o0000007,
+ B600 = 0o0000010,
+ B1200 = 0o0000011,
+ B1800 = 0o0000012,
+ B2400 = 0o0000013,
+ B4800 = 0o0000014,
+ B9600 = 0o0000015,
+ B19200 = 0o0000016,
+ B38400 = 0o0000017,
+
+ B57600 = 0o0010001,
+ B115200 = 0o0010002,
+ B230400 = 0o0010003,
+ B460800 = 0o0010004,
+ B500000 = 0o0010005,
+ B576000 = 0o0010006,
+ B921600 = 0o0010007,
+ B1000000 = 0o0010010,
+ B1152000 = 0o0010011,
+ B1500000 = 0o0010012,
+ B2000000 = 0o0010013,
+ B2500000 = 0o0010014,
+ B3000000 = 0o0010015,
+ B3500000 = 0o0010016,
+ B4000000 = 0o0010017,
+ },
+ else => @compileError("target libc does not have speed_t"),
+};
+
+pub const whence_t = if (native_os == .wasi) std.os.wasi.whence_t else c_int;
// Unix-like systems
-pub usingnamespace switch (builtin.os.tag) {
+pub usingnamespace switch (native_os) {
.netbsd, .windows => struct {},
else => struct {
pub const DIR = opaque {};
@@ -217,33 +1521,54 @@ pub usingnamespace switch (builtin.os.tag) {
pub extern "c" fn socket(domain: c_uint, sock_type: c_uint, protocol: c_uint) c_int;
- pub extern "c" fn stat(noalias path: [*:0]const u8, noalias buf: *c.Stat) c_int;
-
pub extern "c" fn alarm(seconds: c_uint) c_uint;
pub extern "c" fn msync(addr: *align(page_size) const anyopaque, len: usize, flags: c_int) c_int;
},
};
-pub usingnamespace switch (builtin.os.tag) {
- .netbsd, .macos, .ios, .watchos, .tvos, .windows => struct {},
- else => struct {
- pub extern "c" fn fstat(fd: c.fd_t, buf: *c.Stat) c_int;
- pub extern "c" fn readdir(dp: *c.DIR) ?*c.dirent;
+pub const fstat = switch (native_os) {
+ .macos => switch (native_arch) {
+ .x86_64 => private.@"fstat$INODE64",
+ else => private.fstat,
},
+ .netbsd => private.__fstat50,
+ else => private.fstat,
};
-pub usingnamespace switch (builtin.os.tag) {
- .macos, .ios, .watchos, .tvos => struct {},
- else => struct {
- pub extern "c" fn realpath(noalias file_name: [*:0]const u8, noalias resolved_name: [*]u8) ?[*:0]u8;
- pub extern "c" fn fstatat(dirfd: c.fd_t, path: [*:0]const u8, stat_buf: *c.Stat, flags: u32) c_int;
+pub const fstatat = switch (native_os) {
+ .macos => switch (native_arch) {
+ .x86_64 => private.@"fstatat$INODE64",
+ else => private.fstatat,
},
+ else => private.fstatat,
+};
+
+pub const readdir = switch (native_os) {
+ .macos => switch (native_arch) {
+ .x86_64 => private.@"readdir$INODE64",
+ else => private.readdir,
+ },
+ .windows => @compileError("not available"),
+ else => private.readdir,
+};
+
+pub const realpath = switch (native_os) {
+ .macos, .ios, .watchos, .tvos => private.@"realpath$DARWIN_EXTSN",
+ else => private.realpath,
+};
+
+pub const stat = switch (native_os) {
+ .macos => switch (native_arch) {
+ .x86_64 => private.@"stat$INODE64",
+ else => private.stat,
+ },
+ else => private.stat,
};
pub fn getErrno(rc: anytype) c.E {
if (rc == -1) {
- return @as(c.E, @enumFromInt(c._errno().*));
+ return @enumFromInt(c._errno().*);
} else {
return .SUCCESS;
}
@@ -263,8 +1588,8 @@ pub extern "c" fn _exit(code: c_int) noreturn;
pub extern "c" fn isatty(fd: c.fd_t) c_int;
pub extern "c" fn close(fd: c.fd_t) c_int;
pub extern "c" fn lseek(fd: c.fd_t, offset: c.off_t, whence: whence_t) c.off_t;
-pub extern "c" fn open(path: [*:0]const u8, oflag: c_uint, ...) c_int;
-pub extern "c" fn openat(fd: c_int, path: [*:0]const u8, oflag: c_uint, ...) c_int;
+pub extern "c" fn open(path: [*:0]const u8, oflag: O, ...) c_int;
+pub extern "c" fn openat(fd: c_int, path: [*:0]const u8, oflag: O, ...) c_int;
pub extern "c" fn ftruncate(fd: c_int, length: c.off_t) c_int;
pub extern "c" fn raise(sig: c_int) c_int;
pub extern "c" fn read(fd: c.fd_t, buf: [*]u8, nbyte: usize) isize;
@@ -275,7 +1600,7 @@ pub extern "c" fn writev(fd: c_int, iov: [*]const iovec_const, iovcnt: c_uint) i
pub extern "c" fn pwritev(fd: c_int, iov: [*]const iovec_const, iovcnt: c_uint, offset: c.off_t) isize;
pub extern "c" fn write(fd: c.fd_t, buf: [*]const u8, nbyte: usize) isize;
pub extern "c" fn pwrite(fd: c.fd_t, buf: [*]const u8, nbyte: usize, offset: c.off_t) isize;
-pub extern "c" fn mmap(addr: ?*align(page_size) anyopaque, len: usize, prot: c_uint, flags: c_uint, fd: c.fd_t, offset: c.off_t) *anyopaque;
+pub extern "c" fn mmap(addr: ?*align(page_size) anyopaque, len: usize, prot: c_uint, flags: MAP, fd: c.fd_t, offset: c.off_t) *anyopaque;
pub extern "c" fn munmap(addr: *align(page_size) const anyopaque, len: usize) c_int;
pub extern "c" fn mprotect(addr: *align(page_size) anyopaque, len: usize, prot: c_uint) c_int;
pub extern "c" fn link(oldpath: [*:0]const u8, newpath: [*:0]const u8, flags: c_int) c_int;
@@ -348,7 +1673,7 @@ pub extern "c" fn recv(
arg1: ?*anyopaque,
arg2: usize,
arg3: c_int,
-) if (builtin.os.tag == .windows) c_int else isize;
+) if (native_os == .windows) c_int else isize;
pub extern "c" fn recvfrom(
sockfd: c.fd_t,
noalias buf: *anyopaque,
@@ -356,7 +1681,7 @@ pub extern "c" fn recvfrom(
flags: u32,
noalias src_addr: ?*c.sockaddr,
noalias addrlen: ?*c.socklen_t,
-) if (builtin.os.tag == .windows) c_int else isize;
+) if (native_os == .windows) c_int else isize;
pub extern "c" fn recvmsg(sockfd: c.fd_t, msg: *c.msghdr, flags: u32) isize;
pub extern "c" fn kill(pid: c.pid_t, sig: c_int) c_int;
@@ -492,18 +1817,18 @@ pub extern "c" fn dn_expand(
length: c_int,
) c_int;
-pub const PTHREAD_MUTEX_INITIALIZER = c.pthread_mutex_t{};
-pub extern "c" fn pthread_mutex_lock(mutex: *c.pthread_mutex_t) c.E;
-pub extern "c" fn pthread_mutex_unlock(mutex: *c.pthread_mutex_t) c.E;
-pub extern "c" fn pthread_mutex_trylock(mutex: *c.pthread_mutex_t) c.E;
-pub extern "c" fn pthread_mutex_destroy(mutex: *c.pthread_mutex_t) c.E;
+pub const PTHREAD_MUTEX_INITIALIZER = pthread_mutex_t{};
+pub extern "c" fn pthread_mutex_lock(mutex: *pthread_mutex_t) c.E;
+pub extern "c" fn pthread_mutex_unlock(mutex: *pthread_mutex_t) c.E;
+pub extern "c" fn pthread_mutex_trylock(mutex: *pthread_mutex_t) c.E;
+pub extern "c" fn pthread_mutex_destroy(mutex: *pthread_mutex_t) c.E;
-pub const PTHREAD_COND_INITIALIZER = c.pthread_cond_t{};
-pub extern "c" fn pthread_cond_wait(noalias cond: *c.pthread_cond_t, noalias mutex: *c.pthread_mutex_t) c.E;
-pub extern "c" fn pthread_cond_timedwait(noalias cond: *c.pthread_cond_t, noalias mutex: *c.pthread_mutex_t, noalias abstime: *const c.timespec) c.E;
-pub extern "c" fn pthread_cond_signal(cond: *c.pthread_cond_t) c.E;
-pub extern "c" fn pthread_cond_broadcast(cond: *c.pthread_cond_t) c.E;
-pub extern "c" fn pthread_cond_destroy(cond: *c.pthread_cond_t) c.E;
+pub const PTHREAD_COND_INITIALIZER = pthread_cond_t{};
+pub extern "c" fn pthread_cond_wait(noalias cond: *pthread_cond_t, noalias mutex: *pthread_mutex_t) c.E;
+pub extern "c" fn pthread_cond_timedwait(noalias cond: *pthread_cond_t, noalias mutex: *pthread_mutex_t, noalias abstime: *const c.timespec) c.E;
+pub extern "c" fn pthread_cond_signal(cond: *pthread_cond_t) c.E;
+pub extern "c" fn pthread_cond_broadcast(cond: *pthread_cond_t) c.E;
+pub extern "c" fn pthread_cond_destroy(cond: *pthread_cond_t) c.E;
pub extern "c" fn pthread_rwlock_destroy(rwl: *c.pthread_rwlock_t) callconv(.C) c.E;
pub extern "c" fn pthread_rwlock_rdlock(rwl: *c.pthread_rwlock_t) callconv(.C) c.E;
@@ -542,14 +1867,14 @@ pub usingnamespace if (builtin.target.isAndroid()) struct {
// android bionic libc does not implement getcontext,
// and std.os.linux.getcontext also cannot be built for
// bionic libc currently.
-} else if (builtin.os.tag == .linux and builtin.target.isMusl()) struct {
+} else if (native_os == .linux and builtin.target.isMusl()) struct {
// musl does not implement getcontext
pub const getcontext = std.os.linux.getcontext;
} else struct {
pub extern "c" fn getcontext(ucp: *std.os.ucontext_t) c_int;
};
-pub const max_align_t = if (builtin.abi == .msvc)
+pub const max_align_t = if (native_abi == .msvc)
f64
else if (builtin.target.isDarwin())
c_longdouble
@@ -558,3 +1883,25 @@ else
a: c_longlong,
b: c_longdouble,
};
+
+const private = struct {
+ extern "c" fn fstat(fd: c.fd_t, buf: *c.Stat) c_int;
+ extern "c" fn fstatat(dirfd: c.fd_t, path: [*:0]const u8, buf: *c.Stat, flag: u32) c_int;
+ extern "c" fn readdir(dir: *c.DIR) ?*c.dirent;
+ extern "c" fn realpath(noalias file_name: [*:0]const u8, noalias resolved_name: [*]u8) ?[*:0]u8;
+ extern "c" fn stat(noalias path: [*:0]const u8, noalias buf: *c.Stat) c_int;
+
+ /// macos modernized symbols.
+ /// x86_64 links to $INODE64 suffix for 64-bit support.
+ /// Note these are not necessary on aarch64.
+ extern "c" fn @"fstat$INODE64"(fd: c.fd_t, buf: *c.Stat) c_int;
+ extern "c" fn @"fstatat$INODE64"(dirfd: c.fd_t, path: [*:0]const u8, buf: *c.Stat, flag: u32) c_int;
+ extern "c" fn @"readdir$INODE64"(dir: *c.DIR) ?*c.dirent;
+ extern "c" fn @"stat$INODE64"(noalias path: [*:0]const u8, noalias buf: *c.Stat) c_int;
+
+ /// macos modernized symbols.
+ extern "c" fn @"realpath$DARWIN_EXTSN"(noalias file_name: [*:0]const u8, noalias resolved_name: [*]u8) ?[*:0]u8;
+
+ /// netbsd modernized symbols.
+ extern "c" fn __fstat50(fd: c.fd_t, buf: *c.Stat) c_int;
+};
diff --git a/lib/std/c/darwin.zig b/lib/std/c/darwin.zig
index a80c65f76c..947abe58c9 100644
--- a/lib/std/c/darwin.zig
+++ b/lib/std/c/darwin.zig
@@ -169,31 +169,8 @@ pub const COPYFILE_DATA = 1 << 3;
pub const copyfile_state_t = *opaque {};
pub extern "c" fn fcopyfile(from: fd_t, to: fd_t, state: ?copyfile_state_t, flags: u32) c_int;
-pub extern "c" fn @"realpath$DARWIN_EXTSN"(noalias file_name: [*:0]const u8, noalias resolved_name: [*]u8) ?[*:0]u8;
-pub const realpath = @"realpath$DARWIN_EXTSN";
-
pub extern "c" fn __getdirentries64(fd: c_int, buf_ptr: [*]u8, buf_len: usize, basep: *i64) isize;
-const private = struct {
- extern "c" fn fstat(fd: fd_t, buf: *Stat) c_int;
- /// On x86_64 Darwin, fstat has to be manually linked with $INODE64 suffix to
- /// force 64bit version.
- /// Note that this is fixed on aarch64 and no longer necessary.
- extern "c" fn @"fstat$INODE64"(fd: fd_t, buf: *Stat) c_int;
-
- extern "c" fn fstatat(dirfd: fd_t, path: [*:0]const u8, stat_buf: *Stat, flags: u32) c_int;
- /// On x86_64 Darwin, fstatat has to be manually linked with $INODE64 suffix to
- /// force 64bit version.
- /// Note that this is fixed on aarch64 and no longer necessary.
- extern "c" fn @"fstatat$INODE64"(dirfd: fd_t, path_name: [*:0]const u8, buf: *Stat, flags: u32) c_int;
-
- extern "c" fn readdir(dir: *std.c.DIR) ?*dirent;
- extern "c" fn @"readdir$INODE64"(dir: *std.c.DIR) ?*dirent;
-};
-pub const fstat = if (native_arch == .aarch64) private.fstat else private.@"fstat$INODE64";
-pub const fstatat = if (native_arch == .aarch64) private.fstatat else private.@"fstatat$INODE64";
-pub const readdir = if (native_arch == .aarch64) private.readdir else private.@"readdir$INODE64";
-
pub extern "c" fn mach_absolute_time() u64;
pub extern "c" fn mach_continuous_time() u64;
pub extern "c" fn mach_timebase_info(tinfo: ?*mach_timebase_info_data) kern_return_t;
@@ -866,21 +843,7 @@ pub const qos_class_t = enum(c_uint) {
QOS_CLASS_UNSPECIFIED = 0x00,
};
-pub const pthread_mutex_t = extern struct {
- __sig: c_long = 0x32AAABA7,
- __opaque: [__PTHREAD_MUTEX_SIZE__]u8 = [_]u8{0} ** __PTHREAD_MUTEX_SIZE__,
-};
-pub const pthread_cond_t = extern struct {
- __sig: c_long = 0x3CB0B1BB,
- __opaque: [__PTHREAD_COND_SIZE__]u8 = [_]u8{0} ** __PTHREAD_COND_SIZE__,
-};
-pub const pthread_rwlock_t = extern struct {
- __sig: c_long = 0x2DA8B3B4,
- __opaque: [192]u8 = [_]u8{0} ** 192,
-};
pub const sem_t = c_int;
-const __PTHREAD_MUTEX_SIZE__ = if (@sizeOf(usize) == 8) 56 else 40;
-const __PTHREAD_COND_SIZE__ = if (@sizeOf(usize) == 8) 40 else 24;
pub const pthread_attr_t = extern struct {
__sig: c_long,
@@ -1202,16 +1165,12 @@ pub const Sigaction = extern struct {
};
pub const dirent = extern struct {
- d_ino: u64,
- d_seekoff: u64,
- d_reclen: u16,
- d_namlen: u16,
- d_type: u8,
- d_name: [1024]u8,
-
- pub fn reclen(self: dirent) u16 {
- return self.d_reclen;
- }
+ ino: u64,
+ seekoff: u64,
+ reclen: u16,
+ namlen: u16,
+ type: u8,
+ name: [1024]u8,
};
/// Renamed from `kevent` to `Kevent` to avoid conflict with function name.
@@ -1346,49 +1305,6 @@ pub const X_OK = 1;
pub const W_OK = 2;
pub const R_OK = 4;
-pub const O = struct {
- pub const PATH = 0x0000;
- /// open for reading only
- pub const RDONLY = 0x0000;
- /// open for writing only
- pub const WRONLY = 0x0001;
- /// open for reading and writing
- pub const RDWR = 0x0002;
- /// do not block on open or for data to become available
- pub const NONBLOCK = 0x0004;
- /// append on each write
- pub const APPEND = 0x0008;
- /// create file if it does not exist
- pub const CREAT = 0x0200;
- /// truncate size to 0
- pub const TRUNC = 0x0400;
- /// error if CREAT and the file exists
- pub const EXCL = 0x0800;
- /// atomically obtain a shared lock
- pub const SHLOCK = 0x0010;
- /// atomically obtain an exclusive lock
- pub const EXLOCK = 0x0020;
- /// do not follow symlinks
- pub const NOFOLLOW = 0x0100;
- /// allow open of symlinks
- pub const SYMLINK = 0x200000;
- /// descriptor requested for event notifications only
- pub const EVTONLY = 0x8000;
- /// mark as close-on-exec
- pub const CLOEXEC = 0x1000000;
- pub const ACCMODE = 3;
- pub const ALERT = 536870912;
- pub const ASYNC = 64;
- pub const DIRECTORY = 1048576;
- pub const DP_GETRAWENCRYPTED = 1;
- pub const DP_GETRAWUNENCRYPTED = 2;
- pub const DSYNC = 4194304;
- pub const FSYNC = SYNC;
- pub const NOCTTY = 131072;
- pub const POPUP = 2147483648;
- pub const SYNC = 128;
-};
-
pub const SEEK = struct {
pub const SET = 0x0;
pub const CUR = 0x1;
@@ -2529,18 +2445,6 @@ pub const S = struct {
pub const HOST_NAME_MAX = 72;
-pub const AT = struct {
- pub const FDCWD = -2;
- /// Use effective ids in access check
- pub const EACCESS = 0x0010;
- /// Act on the symlink itself not the target
- pub const SYMLINK_NOFOLLOW = 0x0020;
- /// Act on target of symlink
- pub const SYMLINK_FOLLOW = 0x0040;
- /// Path refers to directory
- pub const REMOVEDIR = 0x0080;
-};
-
pub const addrinfo = extern struct {
flags: i32,
family: i32,
@@ -2788,127 +2692,6 @@ pub const SHUT = struct {
pub const RDWR = 2;
};
-// Term
-pub const V = struct {
- pub const EOF = 0;
- pub const EOL = 1;
- pub const EOL2 = 2;
- pub const ERASE = 3;
- pub const WERASE = 4;
- pub const KILL = 5;
- pub const REPRINT = 6;
- pub const INTR = 8;
- pub const QUIT = 9;
- pub const SUSP = 10;
- pub const DSUSP = 11;
- pub const START = 12;
- pub const STOP = 13;
- pub const LNEXT = 14;
- pub const DISCARD = 15;
- pub const MIN = 16;
- pub const TIME = 17;
- pub const STATUS = 18;
-};
-
-pub const NCCS = 20; // 2 spares (7, 19)
-
-pub const cc_t = u8;
-pub const speed_t = u64;
-pub const tcflag_t = u64;
-
-pub const IGNBRK: tcflag_t = 0x00000001; // ignore BREAK condition
-pub const BRKINT: tcflag_t = 0x00000002; // map BREAK to SIGINTR
-pub const IGNPAR: tcflag_t = 0x00000004; // ignore (discard) parity errors
-pub const PARMRK: tcflag_t = 0x00000008; // mark parity and framing errors
-pub const INPCK: tcflag_t = 0x00000010; // enable checking of parity errors
-pub const ISTRIP: tcflag_t = 0x00000020; // strip 8th bit off chars
-pub const INLCR: tcflag_t = 0x00000040; // map NL into CR
-pub const IGNCR: tcflag_t = 0x00000080; // ignore CR
-pub const ICRNL: tcflag_t = 0x00000100; // map CR to NL (ala CRMOD)
-pub const IXON: tcflag_t = 0x00000200; // enable output flow control
-pub const IXOFF: tcflag_t = 0x00000400; // enable input flow control
-pub const IXANY: tcflag_t = 0x00000800; // any char will restart after stop
-pub const IMAXBEL: tcflag_t = 0x00002000; // ring bell on input queue full
-pub const IUTF8: tcflag_t = 0x00004000; // maintain state for UTF-8 VERASE
-
-pub const OPOST: tcflag_t = 0x00000001; //enable following output processing
-pub const ONLCR: tcflag_t = 0x00000002; // map NL to CR-NL (ala CRMOD)
-pub const OXTABS: tcflag_t = 0x00000004; // expand tabs to spaces
-pub const ONOEOT: tcflag_t = 0x00000008; // discard EOT's (^D) on output)
-
-pub const OCRNL: tcflag_t = 0x00000010; // map CR to NL on output
-pub const ONOCR: tcflag_t = 0x00000020; // no CR output at column 0
-pub const ONLRET: tcflag_t = 0x00000040; // NL performs CR function
-pub const OFILL: tcflag_t = 0x00000080; // use fill characters for delay
-pub const NLDLY: tcflag_t = 0x00000300; // \n delay
-pub const TABDLY: tcflag_t = 0x00000c04; // horizontal tab delay
-pub const CRDLY: tcflag_t = 0x00003000; // \r delay
-pub const FFDLY: tcflag_t = 0x00004000; // form feed delay
-pub const BSDLY: tcflag_t = 0x00008000; // \b delay
-pub const VTDLY: tcflag_t = 0x00010000; // vertical tab delay
-pub const OFDEL: tcflag_t = 0x00020000; // fill is DEL, else NUL
-
-pub const NL0: tcflag_t = 0x00000000;
-pub const NL1: tcflag_t = 0x00000100;
-pub const NL2: tcflag_t = 0x00000200;
-pub const NL3: tcflag_t = 0x00000300;
-pub const TAB0: tcflag_t = 0x00000000;
-pub const TAB1: tcflag_t = 0x00000400;
-pub const TAB2: tcflag_t = 0x00000800;
-pub const TAB3: tcflag_t = 0x00000004;
-pub const CR0: tcflag_t = 0x00000000;
-pub const CR1: tcflag_t = 0x00001000;
-pub const CR2: tcflag_t = 0x00002000;
-pub const CR3: tcflag_t = 0x00003000;
-pub const FF0: tcflag_t = 0x00000000;
-pub const FF1: tcflag_t = 0x00004000;
-pub const BS0: tcflag_t = 0x00000000;
-pub const BS1: tcflag_t = 0x00008000;
-pub const VT0: tcflag_t = 0x00000000;
-pub const VT1: tcflag_t = 0x00010000;
-
-pub const CIGNORE: tcflag_t = 0x00000001; // ignore control flags
-pub const CSIZE: tcflag_t = 0x00000300; // character size mask
-pub const CS5: tcflag_t = 0x00000000; // 5 bits (pseudo)
-pub const CS6: tcflag_t = 0x00000100; // 6 bits
-pub const CS7: tcflag_t = 0x00000200; // 7 bits
-pub const CS8: tcflag_t = 0x00000300; // 8 bits
-pub const CSTOPB: tcflag_t = 0x0000040; // send 2 stop bits
-pub const CREAD: tcflag_t = 0x00000800; // enable receiver
-pub const PARENB: tcflag_t = 0x00001000; // parity enable
-pub const PARODD: tcflag_t = 0x00002000; // odd parity, else even
-pub const HUPCL: tcflag_t = 0x00004000; // hang up on last close
-pub const CLOCAL: tcflag_t = 0x00008000; // ignore modem status lines
-pub const CCTS_OFLOW: tcflag_t = 0x00010000; // CTS flow control of output
-pub const CRTSCTS: tcflag_t = (CCTS_OFLOW | CRTS_IFLOW);
-pub const CRTS_IFLOW: tcflag_t = 0x00020000; // RTS flow control of input
-pub const CDTR_IFLOW: tcflag_t = 0x00040000; // DTR flow control of input
-pub const CDSR_OFLOW: tcflag_t = 0x00080000; // DSR flow control of output
-pub const CCAR_OFLOW: tcflag_t = 0x00100000; // DCD flow control of output
-pub const MDMBUF: tcflag_t = 0x00100000; // old name for CCAR_OFLOW
-
-pub const ECHOKE: tcflag_t = 0x00000001; // visual erase for line kill
-pub const ECHOE: tcflag_t = 0x00000002; // visually erase chars
-pub const ECHOK: tcflag_t = 0x00000004; // echo NL after line kill
-pub const ECHO: tcflag_t = 0x00000008; // enable echoing
-pub const ECHONL: tcflag_t = 0x00000010; // echo NL even if ECHO is off
-pub const ECHOPRT: tcflag_t = 0x00000020; // visual erase mode for hardcopy
-pub const ECHOCTL: tcflag_t = 0x00000040; // echo control chars as ^(Char)
-pub const ISIG: tcflag_t = 0x00000080; // enable signals INTR, QUIT, [D]SUSP
-pub const ICANON: tcflag_t = 0x00000100; // canonicalize input lines
-pub const ALTWERASE: tcflag_t = 0x00000200; // use alternate WERASE algorithm
-pub const IEXTEN: tcflag_t = 0x00000400; // enable DISCARD and LNEXT
-pub const EXTPROC: tcflag_t = 0x00000800; // external processing
-pub const TOSTOP: tcflag_t = 0x00400000; // stop background jobs from output
-pub const FLUSHO: tcflag_t = 0x00800000; // output being flushed (state)
-pub const NOKERNINFO: tcflag_t = 0x02000000; // no kernel output from VSTATUS
-pub const PENDIN: tcflag_t = 0x20000000; // XXX retype pending input (state)
-pub const NOFLSH: tcflag_t = 0x80000000; // don't flush after interrupt
-
-pub const TCSANOW: tcflag_t = 0; // make change immediate
-pub const TCSADRAIN: tcflag_t = 1; // drain output, then change
-pub const TCSAFLUSH: tcflag_t = 2; // drain output, flush input
-pub const TCSASOFT: tcflag_t = 0x10; // flag - don't alter h.w. state
pub const TCSA = enum(c_uint) {
NOW,
DRAIN,
@@ -2916,50 +2699,6 @@ pub const TCSA = enum(c_uint) {
_,
};
-pub const B0: tcflag_t = 0;
-pub const B50: tcflag_t = 50;
-pub const B75: tcflag_t = 75;
-pub const B110: tcflag_t = 110;
-pub const B134: tcflag_t = 134;
-pub const B150: tcflag_t = 150;
-pub const B200: tcflag_t = 200;
-pub const B300: tcflag_t = 300;
-pub const B600: tcflag_t = 600;
-pub const B1200: tcflag_t = 1200;
-pub const B1800: tcflag_t = 1800;
-pub const B2400: tcflag_t = 2400;
-pub const B4800: tcflag_t = 4800;
-pub const B9600: tcflag_t = 9600;
-pub const B19200: tcflag_t = 19200;
-pub const B38400: tcflag_t = 38400;
-pub const B7200: tcflag_t = 7200;
-pub const B14400: tcflag_t = 14400;
-pub const B28800: tcflag_t = 28800;
-pub const B57600: tcflag_t = 57600;
-pub const B76800: tcflag_t = 76800;
-pub const B115200: tcflag_t = 115200;
-pub const B230400: tcflag_t = 230400;
-pub const EXTA: tcflag_t = 19200;
-pub const EXTB: tcflag_t = 38400;
-
-pub const TCIFLUSH: tcflag_t = 1;
-pub const TCOFLUSH: tcflag_t = 2;
-pub const TCIOFLUSH: tcflag_t = 3;
-pub const TCOOFF: tcflag_t = 1;
-pub const TCOON: tcflag_t = 2;
-pub const TCIOFF: tcflag_t = 3;
-pub const TCION: tcflag_t = 4;
-
-pub const termios = extern struct {
- iflag: tcflag_t, // input flags
- oflag: tcflag_t, // output flags
- cflag: tcflag_t, // control flags
- lflag: tcflag_t, // local flags
- cc: [NCCS]cc_t, // control chars
- ispeed: speed_t align(8), // input speed
- ospeed: speed_t, // output speed
-};
-
pub const winsize = extern struct {
ws_row: u16,
ws_col: u16,
diff --git a/lib/std/c/dragonfly.zig b/lib/std/c/dragonfly.zig
index 5474d79d38..9e4f27bf04 100644
--- a/lib/std/c/dragonfly.zig
+++ b/lib/std/c/dragonfly.zig
@@ -12,7 +12,7 @@ pub fn _errno() *c_int {
pub extern "c" fn getdents(fd: c_int, buf_ptr: [*]u8, nbytes: usize) c_int;
pub extern "c" fn sigaltstack(ss: ?*stack_t, old_ss: ?*stack_t) c_int;
pub extern "c" fn getrandom(buf_ptr: [*]u8, buf_len: usize, flags: c_uint) isize;
-pub extern "c" fn pipe2(fds: *[2]fd_t, flags: u32) c_int;
+pub extern "c" fn pipe2(fds: *[2]fd_t, flags: std.c.O) c_int;
pub extern "c" fn arc4random_buf(buf: [*]u8, len: usize) void;
pub const dl_iterate_phdr_callback = *const fn (info: *dl_phdr_info, size: usize, data: ?*anyopaque) callconv(.C) c_int;
@@ -22,22 +22,11 @@ pub extern "c" fn lwp_gettid() c_int;
pub extern "c" fn posix_memalign(memptr: *?*anyopaque, alignment: usize, size: usize) c_int;
-pub const pthread_mutex_t = extern struct {
- inner: ?*anyopaque = null,
-};
-pub const pthread_cond_t = extern struct {
- inner: ?*anyopaque = null,
-};
-
pub const pthread_attr_t = extern struct { // copied from freebsd
__size: [56]u8,
__align: c_long,
};
-pub const pthread_rwlock_t = extern struct {
- ptr: ?*anyopaque = null,
-};
-
pub const sem_t = ?*opaque {};
pub extern "c" fn pthread_setname_np(thread: std.c.pthread_t, name: [*:0]const u8) E;
@@ -394,35 +383,6 @@ pub const X_OK = 1; // test for execute or search permission
pub const W_OK = 2; // test for write permission
pub const R_OK = 4; // test for read permission
-pub const O = struct {
- pub const RDONLY = 0;
- pub const NDELAY = NONBLOCK;
- pub const WRONLY = 1;
- pub const RDWR = 2;
- pub const ACCMODE = 3;
- pub const NONBLOCK = 4;
- pub const APPEND = 8;
- pub const SHLOCK = 16;
- pub const EXLOCK = 32;
- pub const ASYNC = 64;
- pub const FSYNC = 128;
- pub const SYNC = 128;
- pub const NOFOLLOW = 256;
- pub const CREAT = 512;
- pub const TRUNC = 1024;
- pub const EXCL = 2048;
- pub const NOCTTY = 32768;
- pub const DIRECT = 65536;
- pub const CLOEXEC = 131072;
- pub const FBLOCKING = 262144;
- pub const FNONBLOCKING = 524288;
- pub const FAPPEND = 1048576;
- pub const FOFFSET = 2097152;
- pub const FSYNCWRITE = 4194304;
- pub const FASYNCWRITE = 8388608;
- pub const DIRECTORY = 134217728;
-};
-
pub const SEEK = struct {
pub const SET = 0;
pub const CUR = 1;
@@ -458,24 +418,16 @@ pub const F = struct {
pub const FD_CLOEXEC = 1;
-pub const AT = struct {
- pub const FDCWD = -328243;
- pub const SYMLINK_NOFOLLOW = 1;
- pub const REMOVEDIR = 2;
- pub const EACCESS = 4;
- pub const SYMLINK_FOLLOW = 8;
-};
-
pub const dirent = extern struct {
- d_fileno: c_ulong,
- d_namlen: u16,
- d_type: u8,
- d_unused1: u8,
- d_unused2: u32,
- d_name: [256]u8,
+ fileno: c_ulong,
+ namlen: u16,
+ type: u8,
+ unused1: u8,
+ unused2: u32,
+ name: [256]u8,
pub fn reclen(self: dirent) u16 {
- return (@offsetOf(dirent, "d_name") + self.d_namlen + 1 + 7) & ~@as(u16, 7);
+ return (@offsetOf(dirent, "name") + self.namlen + 1 + 7) & ~@as(u16, 7);
}
};
@@ -605,6 +557,13 @@ pub const NOTE_FFCTRLMASK = 3221225472;
pub const NOTE_FFCOPY = 3221225472;
pub const NOTE_PCTRLMASK = 4026531840;
+pub const TCSA = enum(c_uint) {
+ NOW,
+ DRAIN,
+ FLUSH,
+ _,
+};
+
pub const stack_t = extern struct {
sp: [*]u8,
size: isize,
diff --git a/lib/std/c/emscripten.zig b/lib/std/c/emscripten.zig
index ca64473016..501fcb4386 100644
--- a/lib/std/c/emscripten.zig
+++ b/lib/std/c/emscripten.zig
@@ -3,7 +3,6 @@ const maxInt = std.math.maxInt;
const emscripten = std.os.emscripten;
pub const AF = emscripten.AF;
-pub const AT = emscripten.AT;
pub const CLOCK = emscripten.CLOCK;
pub const CPU_COUNT = emscripten.CPU_COUNT;
pub const E = emscripten.E;
@@ -19,7 +18,6 @@ pub const MADV = emscripten.MADV;
pub const MSF = emscripten.MSF;
pub const MSG = emscripten.MSG;
pub const NAME_MAX = emscripten.NAME_MAX;
-pub const O = emscripten.O;
pub const PATH_MAX = emscripten.PATH_MAX;
pub const POLL = emscripten.POLL;
pub const PROT = emscripten.PROT;
@@ -74,8 +72,6 @@ pub const sigset_t = emscripten.sigset_t;
pub const sockaddr = emscripten.sockaddr;
pub const socklen_t = emscripten.socklen_t;
pub const stack_t = emscripten.stack_t;
-pub const tcflag_t = emscripten.tcflag_t;
-pub const termios = emscripten.termios;
pub const time_t = emscripten.time_t;
pub const timespec = emscripten.timespec;
pub const timeval = emscripten.timeval;
@@ -151,7 +147,7 @@ pub const pwritev64 = std.c.pwritev;
pub const setrlimit64 = std.c.setrlimit;
pub extern "c" fn sigaltstack(ss: ?*stack_t, old_ss: ?*stack_t) c_int;
-pub extern "c" fn pipe2(fds: *[2]fd_t, flags: u32) c_int;
+pub extern "c" fn pipe2(fds: *[2]fd_t, flags: std.c.O) c_int;
pub extern "c" fn getentropy(buffer: [*]u8, size: usize) c_int;
pub const pthread_attr_t = extern struct {
@@ -159,19 +155,6 @@ pub const pthread_attr_t = extern struct {
__align: c_long,
};
-pub const pthread_mutex_t = extern struct {
- size: [__SIZEOF_PTHREAD_MUTEX_T]u8 align(4) = [_]u8{0} ** __SIZEOF_PTHREAD_MUTEX_T,
-};
-pub const pthread_cond_t = extern struct {
- size: [__SIZEOF_PTHREAD_COND_T]u8 align(@alignOf(usize)) = [_]u8{0} ** __SIZEOF_PTHREAD_COND_T,
-};
-pub const pthread_rwlock_t = extern struct {
- size: [32]u8 align(4) = [_]u8{0} ** 32,
-};
-
-const __SIZEOF_PTHREAD_COND_T = 48;
-const __SIZEOF_PTHREAD_MUTEX_T = 24;
-
pub const pthread_key_t = c_uint;
pub const sem_t = extern struct {
__size: [__SIZEOF_SEM_T]u8 align(@alignOf(usize)),
@@ -189,9 +172,9 @@ pub const RTLD = struct {
};
pub const dirent = struct {
- d_ino: c_uint,
- d_off: c_uint,
- d_reclen: c_ushort,
- d_type: u8,
- d_name: [256]u8,
+ ino: c_uint,
+ off: c_uint,
+ reclen: c_ushort,
+ type: u8,
+ name: [256]u8,
};
diff --git a/lib/std/c/freebsd.zig b/lib/std/c/freebsd.zig
index a1b64893b8..94854cf090 100644
--- a/lib/std/c/freebsd.zig
+++ b/lib/std/c/freebsd.zig
@@ -15,7 +15,7 @@ pub extern "c" fn getrandom(buf_ptr: [*]u8, buf_len: usize, flags: c_uint) isize
pub extern "c" fn pthread_getthreadid_np() c_int;
pub extern "c" fn pthread_set_name_np(thread: std.c.pthread_t, name: [*:0]const u8) void;
pub extern "c" fn pthread_get_name_np(thread: std.c.pthread_t, name: [*:0]u8, len: usize) void;
-pub extern "c" fn pipe2(fds: *[2]fd_t, flags: u32) c_int;
+pub extern "c" fn pipe2(fds: *[2]fd_t, flags: std.c.O) c_int;
pub extern "c" fn arc4random_buf(buf: [*]u8, len: usize) void;
pub extern "c" fn posix_memalign(memptr: *?*anyopaque, alignment: usize, size: usize) c_int;
@@ -44,16 +44,6 @@ pub extern "c" fn sendfile(
pub const dl_iterate_phdr_callback = *const fn (info: *dl_phdr_info, size: usize, data: ?*anyopaque) callconv(.C) c_int;
pub extern "c" fn dl_iterate_phdr(callback: dl_iterate_phdr_callback, data: ?*anyopaque) c_int;
-pub const pthread_mutex_t = extern struct {
- inner: ?*anyopaque = null,
-};
-pub const pthread_cond_t = extern struct {
- inner: ?*anyopaque = null,
-};
-pub const pthread_rwlock_t = extern struct {
- ptr: ?*anyopaque = null,
-};
-
pub const pthread_attr_t = extern struct {
inner: ?*anyopaque = null,
};
@@ -376,23 +366,19 @@ pub const timeval = extern struct {
pub const dirent = extern struct {
/// File number of entry.
- d_fileno: ino_t,
+ fileno: ino_t,
/// Directory offset of entry.
- d_off: off_t,
+ off: off_t,
/// Length of this record.
- d_reclen: u16,
+ reclen: u16,
/// File type, one of DT_.
- d_type: u8,
- _d_pad0: u8,
- /// Length of the d_name member.
- d_namlen: u16,
- _d_pad1: u16,
+ type: u8,
+ pad0: u8 = 0,
+ /// Length of the name member.
+ namlen: u16,
+ pad1: u16 = 0,
/// Name of entry.
- d_name: [255:0]u8,
-
- pub fn reclen(self: dirent) u16 {
- return self.d_reclen;
- }
+ name: [255:0]u8,
};
pub const in_port_t = u16;
@@ -746,36 +732,6 @@ pub const X_OK = 1; // test for execute or search permission
pub const W_OK = 2; // test for write permission
pub const R_OK = 4; // test for read permission
-pub const O = struct {
- pub const RDONLY = 0x0000;
- pub const WRONLY = 0x0001;
- pub const RDWR = 0x0002;
- pub const ACCMODE = 0x0003;
-
- pub const SHLOCK = 0x0010;
- pub const EXLOCK = 0x0020;
-
- pub const CREAT = 0x0200;
- pub const EXCL = 0x0800;
- pub const NOCTTY = 0x8000;
- pub const TRUNC = 0x0400;
- pub const APPEND = 0x0008;
- pub const NONBLOCK = 0x0004;
- pub const DSYNC = 0o10000;
- pub const SYNC = 0x0080;
- pub const RSYNC = 0o4010000;
- pub const DIRECTORY = 0x20000;
- pub const NOFOLLOW = 0x0100;
- pub const CLOEXEC = 0x00100000;
-
- pub const ASYNC = 0x0040;
- pub const DIRECT = 0x00010000;
- pub const NOATIME = 0o1000000;
- pub const PATH = 0o10000000;
- pub const TMPFILE = 0o20200000;
- pub const NDELAY = NONBLOCK;
-};
-
/// Command flags for fcntl(2).
pub const F = struct {
/// Duplicate file descriptor.
@@ -1197,6 +1153,13 @@ pub const T = struct {
pub const IOCSIG = 0x2004745f;
};
+pub const TCSA = enum(c_uint) {
+ NOW,
+ DRAIN,
+ FLUSH,
+ _,
+};
+
pub const winsize = extern struct {
ws_row: u16,
ws_col: u16,
@@ -1573,23 +1536,6 @@ pub const S = struct {
pub const HOST_NAME_MAX = 255;
-pub const AT = struct {
- /// Magic value that specify the use of the current working directory
- /// to determine the target of relative file paths in the openat() and
- /// similar syscalls.
- pub const FDCWD = -100;
- /// Check access using effective user and group ID
- pub const EACCESS = 0x0100;
- /// Do not follow symbolic links
- pub const SYMLINK_NOFOLLOW = 0x0200;
- /// Follow symbolic link
- pub const SYMLINK_FOLLOW = 0x0400;
- /// Remove directory instead of file
- pub const REMOVEDIR = 0x0800;
- /// Fail if not under dirfd
- pub const BENEATH = 0x1000;
-};
-
pub const addrinfo = extern struct {
flags: i32,
family: i32,
diff --git a/lib/std/c/fuchsia.zig b/lib/std/c/fuchsia.zig
deleted file mode 100644
index af6c4756b9..0000000000
--- a/lib/std/c/fuchsia.zig
+++ /dev/null
@@ -1,11 +0,0 @@
-pub const pthread_mutex_t = extern struct {
- size: [__SIZEOF_PTHREAD_MUTEX_T]u8 align(@alignOf(usize)) = [_]u8{0} ** __SIZEOF_PTHREAD_MUTEX_T,
-};
-pub const pthread_cond_t = extern struct {
- size: [__SIZEOF_PTHREAD_COND_T]u8 align(@alignOf(usize)) = [_]u8{0} ** __SIZEOF_PTHREAD_COND_T,
-};
-pub const pthread_rwlock_t = extern struct {
- size: [56]u8 align(@alignOf(usize)) = [_]u8{0} ** 56,
-};
-const __SIZEOF_PTHREAD_COND_T = 48;
-const __SIZEOF_PTHREAD_MUTEX_T = 40;
diff --git a/lib/std/c/haiku.zig b/lib/std/c/haiku.zig
index c8cc16563a..723d953d2d 100644
--- a/lib/std/c/haiku.zig
+++ b/lib/std/c/haiku.zig
@@ -45,22 +45,6 @@ pub const pthread_attr_t = extern struct {
__stack_address: ?*anyopaque,
};
-pub const pthread_mutex_t = extern struct {
- flags: u32 = 0,
- lock: i32 = 0,
- unused: i32 = -42,
- owner: i32 = -1,
- owner_count: i32 = 0,
-};
-
-pub const pthread_cond_t = extern struct {
- flags: u32 = 0,
- unused: i32 = -42,
- mutex: ?*anyopaque = null,
- waiter_count: i32 = 0,
- lock: i32 = 0,
-};
-
pub const EAI = enum(c_int) {
/// address family for hostname not supported
ADDRFAMILY = 1,
@@ -238,16 +222,12 @@ pub const timespec = extern struct {
};
pub const dirent = extern struct {
- d_dev: i32,
- d_pdev: i32,
- d_ino: i64,
- d_pino: i64,
- d_reclen: u16,
- d_name: [256]u8,
-
- pub fn reclen(self: dirent) u16 {
- return self.d_reclen;
- }
+ dev: i32,
+ pdev: i32,
+ ino: i64,
+ pino: i64,
+ reclen: u16,
+ name: [256]u8,
};
pub const B_OS_NAME_LENGTH = 32; // OS.h
@@ -510,32 +490,6 @@ pub const X_OK = 1; // test for execute or search permission
pub const W_OK = 2; // test for write permission
pub const R_OK = 4; // test for read permission
-pub const O = struct {
- pub const RDONLY = 0x0000;
- pub const WRONLY = 0x0001;
- pub const RDWR = 0x0002;
- pub const ACCMODE = 0x0003;
- pub const RWMASK = ACCMODE;
-
- pub const EXCL = 0x0100;
- pub const CREAT = 0x0200;
- pub const TRUNC = 0x0400;
- pub const NOCTTY = 0x1000;
- pub const NOTRAVERSE = 0x2000;
-
- pub const CLOEXEC = 0x00000040;
- pub const NONBLOCK = 0x00000080;
- pub const NDELAY = NONBLOCK;
- pub const APPEND = 0x00000800;
- pub const SYNC = 0x00010000;
- pub const RSYNC = 0x00020000;
- pub const DSYNC = 0x00040000;
- pub const NOFOLLOW = 0x00080000;
- pub const DIRECT = 0x00100000;
- pub const NOCACHE = DIRECT;
- pub const DIRECTORY = 0x00200000;
-};
-
pub const F = struct {
pub const DUPFD = 0x0001;
pub const GETFD = 0x0002;
@@ -923,14 +877,6 @@ pub const S = struct {
pub const HOST_NAME_MAX = 255;
-pub const AT = struct {
- pub const FDCWD = -1;
- pub const SYMLINK_NOFOLLOW = 0x01;
- pub const SYMLINK_FOLLOW = 0x02;
- pub const REMOVEDIR = 0x04;
- pub const EACCESS = 0x08;
-};
-
pub const addrinfo = extern struct {
flags: i32,
family: i32,
@@ -1004,21 +950,4 @@ pub const directory_which = enum(c_int) {
_,
};
-pub const cc_t = u8;
-pub const speed_t = u8;
-pub const tcflag_t = u32;
-
-pub const NCCS = 11;
-
-pub const termios = extern struct {
- c_iflag: tcflag_t,
- c_oflag: tcflag_t,
- c_cflag: tcflag_t,
- c_lflag: tcflag_t,
- c_line: cc_t,
- c_ispeed: speed_t,
- c_ospeed: speed_t,
- cc_t: [NCCS]cc_t,
-};
-
pub const MSG_NOSIGNAL = 0x0800;
diff --git a/lib/std/c/hermit.zig b/lib/std/c/hermit.zig
deleted file mode 100644
index 879346ba13..0000000000
--- a/lib/std/c/hermit.zig
+++ /dev/null
@@ -1,12 +0,0 @@
-const std = @import("std");
-const maxInt = std.math.maxInt;
-
-pub const pthread_mutex_t = extern struct {
- inner: usize = ~@as(usize, 0),
-};
-pub const pthread_cond_t = extern struct {
- inner: usize = ~@as(usize, 0),
-};
-pub const pthread_rwlock_t = extern struct {
- ptr: usize = maxInt(usize),
-};
diff --git a/lib/std/c/linux.zig b/lib/std/c/linux.zig
index c847d58bef..03c90e5760 100644
--- a/lib/std/c/linux.zig
+++ b/lib/std/c/linux.zig
@@ -9,7 +9,6 @@ const FILE = std.c.FILE;
pub const AF = linux.AF;
pub const ARCH = linux.ARCH;
-pub const AT = linux.AT;
pub const CLOCK = linux.CLOCK;
pub const CPU_COUNT = linux.CPU_COUNT;
pub const E = linux.E;
@@ -28,7 +27,6 @@ pub const MSF = linux.MSF;
pub const MMAP2_UNIT = linux.MMAP2_UNIT;
pub const MSG = linux.MSG;
pub const NAME_MAX = linux.NAME_MAX;
-pub const O = linux.O;
pub const PATH_MAX = linux.PATH_MAX;
pub const POLL = linux.POLL;
pub const PROT = linux.PROT;
@@ -50,6 +48,7 @@ pub const STDIN_FILENO = linux.STDIN_FILENO;
pub const STDOUT_FILENO = linux.STDOUT_FILENO;
pub const SYS = linux.SYS;
pub const Sigaction = linux.Sigaction;
+pub const T = linux.T;
pub const TCP = linux.TCP;
pub const TCSA = linux.TCSA;
pub const VDSO = linux.VDSO;
@@ -87,8 +86,6 @@ pub const sigset_t = linux.sigset_t;
pub const sockaddr = linux.sockaddr;
pub const socklen_t = linux.socklen_t;
pub const stack_t = linux.stack_t;
-pub const tcflag_t = linux.tcflag_t;
-pub const termios = linux.termios;
pub const time_t = linux.time_t;
pub const timespec = linux.timespec;
pub const timeval = linux.timeval;
@@ -97,6 +94,7 @@ pub const ucontext_t = linux.ucontext_t;
pub const uid_t = linux.uid_t;
pub const user_desc = linux.user_desc;
pub const utsname = linux.utsname;
+pub const winsize = linux.winsize;
pub const PR = linux.PR;
pub const _errno = switch (native_abi) {
@@ -241,8 +239,8 @@ pub extern "c" fn ftruncate64(fd: c_int, length: off_t) c_int;
pub extern "c" fn getrlimit64(resource: rlimit_resource, rlim: *rlimit) c_int;
pub extern "c" fn lseek64(fd: fd_t, offset: i64, whence: c_int) i64;
pub extern "c" fn mmap64(addr: ?*align(std.mem.page_size) anyopaque, len: usize, prot: c_uint, flags: c_uint, fd: fd_t, offset: i64) *anyopaque;
-pub extern "c" fn open64(path: [*:0]const u8, oflag: c_uint, ...) c_int;
-pub extern "c" fn openat64(fd: c_int, path: [*:0]const u8, oflag: c_uint, ...) c_int;
+pub extern "c" fn open64(path: [*:0]const u8, oflag: linux.O, ...) c_int;
+pub extern "c" fn openat64(fd: c_int, path: [*:0]const u8, oflag: linux.O, ...) c_int;
pub extern "c" fn pread64(fd: fd_t, buf: [*]u8, nbyte: usize, offset: i64) isize;
pub extern "c" fn preadv64(fd: c_int, iov: [*]const iovec, iovcnt: c_uint, offset: i64) isize;
pub extern "c" fn pwrite64(fd: fd_t, buf: [*]const u8, nbyte: usize, offset: i64) isize;
@@ -277,7 +275,7 @@ pub extern "c" fn dl_iterate_phdr(callback: dl_iterate_phdr_callback, data: ?*an
pub extern "c" fn sigaltstack(ss: ?*stack_t, old_ss: ?*stack_t) c_int;
pub extern "c" fn memfd_create(name: [*:0]const u8, flags: c_uint) c_int;
-pub extern "c" fn pipe2(fds: *[2]fd_t, flags: u32) c_int;
+pub extern "c" fn pipe2(fds: *[2]fd_t, flags: linux.O) c_int;
pub extern "c" fn fallocate(fd: fd_t, mode: c_int, offset: off_t, len: off_t) c_int;
@@ -313,43 +311,11 @@ pub const pthread_attr_t = extern struct {
__align: c_long,
};
-pub const pthread_mutex_t = extern struct {
- size: [__SIZEOF_PTHREAD_MUTEX_T]u8 align(@alignOf(usize)) = [_]u8{0} ** __SIZEOF_PTHREAD_MUTEX_T,
-};
-pub const pthread_cond_t = extern struct {
- size: [__SIZEOF_PTHREAD_COND_T]u8 align(@alignOf(usize)) = [_]u8{0} ** __SIZEOF_PTHREAD_COND_T,
-};
-pub const pthread_rwlock_t = switch (native_abi) {
- .android => switch (@sizeOf(usize)) {
- 4 => extern struct {
- size: [40]u8 align(@alignOf(usize)) = [_]u8{0} ** 40,
- },
- 8 => extern struct {
- size: [56]u8 align(@alignOf(usize)) = [_]u8{0} ** 56,
- },
- else => @compileError("impossible pointer size"),
- },
- else => extern struct {
- size: [56]u8 align(@alignOf(usize)) = [_]u8{0} ** 56,
- },
-};
pub const pthread_key_t = c_uint;
pub const sem_t = extern struct {
__size: [__SIZEOF_SEM_T]u8 align(@alignOf(usize)),
};
-const __SIZEOF_PTHREAD_COND_T = 48;
-const __SIZEOF_PTHREAD_MUTEX_T = switch (native_abi) {
- .musl, .musleabi, .musleabihf => if (@sizeOf(usize) == 8) 40 else 24,
- .gnu, .gnuabin32, .gnuabi64, .gnueabi, .gnueabihf, .gnux32 => switch (native_arch) {
- .aarch64 => 48,
- .x86_64 => if (native_abi == .gnux32) 40 else 32,
- .mips64, .powerpc64, .powerpc64le, .sparc64 => 40,
- else => if (@sizeOf(usize) == 8) 40 else 24,
- },
- .android => if (@sizeOf(usize) == 8) 40 else 4,
- else => @compileError("unsupported ABI"),
-};
const __SIZEOF_SEM_T = 4 * @sizeOf(usize);
pub extern "c" fn pthread_setname_np(thread: std.c.pthread_t, name: [*:0]const u8) E;
@@ -365,16 +331,16 @@ pub const RTLD = struct {
};
pub const dirent = struct {
- d_ino: c_uint,
- d_off: c_uint,
- d_reclen: c_ushort,
- d_type: u8,
- d_name: [256]u8,
+ ino: c_uint,
+ off: c_uint,
+ reclen: c_ushort,
+ type: u8,
+ name: [256]u8,
};
pub const dirent64 = struct {
- d_ino: c_ulong,
- d_off: c_ulong,
- d_reclen: c_ushort,
- d_type: u8,
- d_name: [256]u8,
+ ino: c_ulong,
+ off: c_ulong,
+ reclen: c_ushort,
+ type: u8,
+ name: [256]u8,
};
diff --git a/lib/std/c/minix.zig b/lib/std/c/minix.zig
deleted file mode 100644
index 62cefc14fb..0000000000
--- a/lib/std/c/minix.zig
+++ /dev/null
@@ -1,18 +0,0 @@
-const builtin = @import("builtin");
-pub const pthread_mutex_t = extern struct {
- size: [__SIZEOF_PTHREAD_MUTEX_T]u8 align(@alignOf(usize)) = [_]u8{0} ** __SIZEOF_PTHREAD_MUTEX_T,
-};
-pub const pthread_cond_t = extern struct {
- size: [__SIZEOF_PTHREAD_COND_T]u8 align(@alignOf(usize)) = [_]u8{0} ** __SIZEOF_PTHREAD_COND_T,
-};
-const __SIZEOF_PTHREAD_COND_T = 48;
-const __SIZEOF_PTHREAD_MUTEX_T = switch (builtin.abi) {
- .musl, .musleabi, .musleabihf => if (@sizeOf(usize) == 8) 40 else 24,
- .gnu, .gnuabin32, .gnuabi64, .gnueabi, .gnueabihf, .gnux32 => switch (builtin.cpu.arch) {
- .aarch64 => 48,
- .x86_64 => if (builtin.abi == .gnux32) 40 else 32,
- .mips64, .powerpc64, .powerpc64le, .sparc64 => 40,
- else => if (@sizeOf(usize) == 8) 40 else 24,
- },
- else => unreachable,
-};
diff --git a/lib/std/c/netbsd.zig b/lib/std/c/netbsd.zig
index 2c100e2c37..5961b3f490 100644
--- a/lib/std/c/netbsd.zig
+++ b/lib/std/c/netbsd.zig
@@ -15,12 +15,9 @@ pub extern "c" fn dl_iterate_phdr(callback: dl_iterate_phdr_callback, data: ?*an
pub extern "c" fn _lwp_self() lwpid_t;
-pub extern "c" fn pipe2(fds: *[2]fd_t, flags: u32) c_int;
+pub extern "c" fn pipe2(fds: *[2]fd_t, flags: std.c.O) c_int;
pub extern "c" fn arc4random_buf(buf: [*]u8, len: usize) void;
-pub extern "c" fn __fstat50(fd: fd_t, buf: *Stat) c_int;
-pub const fstat = __fstat50;
-
pub extern "c" fn __stat50(path: [*:0]const u8, buf: *Stat) c_int;
pub const stat = __stat50;
@@ -62,42 +59,7 @@ pub extern "c" fn posix_memalign(memptr: *?*anyopaque, alignment: usize, size: u
pub extern "c" fn __msync13(addr: *align(std.mem.page_size) const anyopaque, len: usize, flags: c_int) c_int;
pub const msync = __msync13;
-pub const pthread_mutex_t = extern struct {
- magic: u32 = 0x33330003,
- errorcheck: padded_pthread_spin_t = 0,
- ceiling: padded_pthread_spin_t = 0,
- owner: usize = 0,
- waiters: ?*u8 = null,
- recursed: u32 = 0,
- spare2: ?*anyopaque = null,
-};
-
-pub const pthread_cond_t = extern struct {
- magic: u32 = 0x55550005,
- lock: pthread_spin_t = 0,
- waiters_first: ?*u8 = null,
- waiters_last: ?*u8 = null,
- mutex: ?*pthread_mutex_t = null,
- private: ?*anyopaque = null,
-};
-
-pub const pthread_rwlock_t = extern struct {
- magic: c_uint = 0x99990009,
- interlock: switch (builtin.cpu.arch) {
- .aarch64, .sparc, .x86_64, .x86 => u8,
- .arm, .powerpc => c_int,
- else => unreachable,
- } = 0,
- rblocked_first: ?*u8 = null,
- rblocked_last: ?*u8 = null,
- wblocked_first: ?*u8 = null,
- wblocked_last: ?*u8 = null,
- nreaders: c_uint = 0,
- owner: ?std.c.pthread_t = null,
- private: ?*anyopaque = null,
-};
-
-const pthread_spin_t = switch (builtin.cpu.arch) {
+pub const pthread_spin_t = switch (builtin.cpu.arch) {
.aarch64, .aarch64_be, .aarch64_32 => u8,
.mips, .mipsel, .mips64, .mips64el => u32,
.powerpc, .powerpc64, .powerpc64le => i32,
@@ -108,7 +70,7 @@ const pthread_spin_t = switch (builtin.cpu.arch) {
else => @compileError("undefined pthread_spin_t for this arch"),
};
-const padded_pthread_spin_t = switch (builtin.cpu.arch) {
+pub const padded_pthread_spin_t = switch (builtin.cpu.arch) {
.x86, .x86_64 => u32,
.sparc, .sparcel, .sparc64 => u32,
else => pthread_spin_t,
@@ -337,15 +299,11 @@ pub const timeval = extern struct {
pub const MAXNAMLEN = 511;
pub const dirent = extern struct {
- d_fileno: ino_t,
- d_reclen: u16,
- d_namlen: u16,
- d_type: u8,
- d_name: [MAXNAMLEN + 1]u8,
-
- pub fn reclen(self: dirent) u16 {
- return self.d_reclen;
- }
+ fileno: ino_t,
+ reclen: u16,
+ namlen: u16,
+ type: u8,
+ name: [MAXNAMLEN + 1]u8,
};
pub const SOCK = struct {
@@ -630,53 +588,6 @@ pub const X_OK = 1; // test for execute or search permission
pub const W_OK = 2; // test for write permission
pub const R_OK = 4; // test for read permission
-pub const O = struct {
- /// open for reading only
- pub const RDONLY = 0x00000000;
- /// open for writing only
- pub const WRONLY = 0x00000001;
- /// open for reading and writing
- pub const RDWR = 0x00000002;
- /// mask for above modes
- pub const ACCMODE = 0x00000003;
- /// no delay
- pub const NONBLOCK = 0x00000004;
- /// set append mode
- pub const APPEND = 0x00000008;
- /// open with shared file lock
- pub const SHLOCK = 0x00000010;
- /// open with exclusive file lock
- pub const EXLOCK = 0x00000020;
- /// signal pgrp when data ready
- pub const ASYNC = 0x00000040;
- /// synchronous writes
- pub const SYNC = 0x00000080;
- /// don't follow symlinks on the last
- pub const NOFOLLOW = 0x00000100;
- /// create if nonexistent
- pub const CREAT = 0x00000200;
- /// truncate to zero length
- pub const TRUNC = 0x00000400;
- /// error if already exists
- pub const EXCL = 0x00000800;
- /// don't assign controlling terminal
- pub const NOCTTY = 0x00008000;
- /// write: I/O data completion
- pub const DSYNC = 0x00010000;
- /// read: I/O completion as for write
- pub const RSYNC = 0x00020000;
- /// use alternate i/o semantics
- pub const ALT_IO = 0x00040000;
- /// direct I/O hint
- pub const DIRECT = 0x00080000;
- /// fail if not a directory
- pub const DIRECTORY = 0x00200000;
- /// set close on exec
- pub const CLOEXEC = 0x00400000;
- /// skip search permission checks
- pub const SEARCH = 0x00800000;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
@@ -895,135 +806,13 @@ pub const T = struct {
pub const IOCXMTFRAME = 0x80087444;
};
-// Term
-const V = struct {
- pub const EOF = 0; // ICANON
- pub const EOL = 1; // ICANON
- pub const EOL2 = 2; // ICANON
- pub const ERASE = 3; // ICANON
- pub const WERASE = 4; // ICANON
- pub const KILL = 5; // ICANON
- pub const REPRINT = 6; // ICANON
- // 7 spare 1
- pub const INTR = 8; // ISIG
- pub const QUIT = 9; // ISIG
- pub const SUSP = 10; // ISIG
- pub const DSUSP = 11; // ISIG
- pub const START = 12; // IXON, IXOFF
- pub const STOP = 13; // IXON, IXOFF
- pub const LNEXT = 14; // IEXTEN
- pub const DISCARD = 15; // IEXTEN
- pub const MIN = 16; // !ICANON
- pub const TIME = 17; // !ICANON
- pub const STATUS = 18; // ICANON
- // 19 spare 2
-};
-
-// Input flags - software input processing
-pub const IGNBRK: tcflag_t = 0x00000001; // ignore BREAK condition
-pub const BRKINT: tcflag_t = 0x00000002; // map BREAK to SIGINT
-pub const IGNPAR: tcflag_t = 0x00000004; // ignore (discard) parity errors
-pub const PARMRK: tcflag_t = 0x00000008; // mark parity and framing errors
-pub const INPCK: tcflag_t = 0x00000010; // enable checking of parity errors
-pub const ISTRIP: tcflag_t = 0x00000020; // strip 8th bit off chars
-pub const INLCR: tcflag_t = 0x00000040; // map NL into CR
-pub const IGNCR: tcflag_t = 0x00000080; // ignore CR
-pub const ICRNL: tcflag_t = 0x00000100; // map CR to NL (ala CRMOD)
-pub const IXON: tcflag_t = 0x00000200; // enable output flow control
-pub const IXOFF: tcflag_t = 0x00000400; // enable input flow control
-pub const IXANY: tcflag_t = 0x00000800; // any char will restart after stop
-pub const IMAXBEL: tcflag_t = 0x00002000; // ring bell on input queue full
-
-// Output flags - software output processing
-pub const OPOST: tcflag_t = 0x00000001; // enable following output processing
-pub const ONLCR: tcflag_t = 0x00000002; // map NL to CR-NL (ala CRMOD)
-pub const OXTABS: tcflag_t = 0x00000004; // expand tabs to spaces
-pub const ONOEOT: tcflag_t = 0x00000008; // discard EOT's (^D) on output
-pub const OCRNL: tcflag_t = 0x00000010; // map CR to NL
-pub const ONOCR: tcflag_t = 0x00000040; // discard CR's when on column 0
-pub const ONLRET: tcflag_t = 0x00000080; // move to column 0 on CR
-
-// Control flags - hardware control of terminal
-pub const CIGNORE: tcflag_t = 0x00000001; // ignore control flags
-pub const CSIZE: tcflag_t = 0x00000300; // character size mask
-pub const CS5: tcflag_t = 0x00000000; // 5 bits (pseudo)
-pub const CS6: tcflag_t = 0x00000100; // 6 bits
-pub const CS7: tcflag_t = 0x00000200; // 7 bits
-pub const CS8: tcflag_t = 0x00000300; // 8 bits
-pub const CSTOPB: tcflag_t = 0x00000400; // send 2 stop bits
-pub const CREAD: tcflag_t = 0x00000800; // enable receiver
-pub const PARENB: tcflag_t = 0x00001000; // parity enable
-pub const PARODD: tcflag_t = 0x00002000; // odd parity, else even
-pub const HUPCL: tcflag_t = 0x00004000; // hang up on last close
-pub const CLOCAL: tcflag_t = 0x00008000; // ignore modem status lines
-pub const CRTSCTS: tcflag_t = 0x00010000; // RTS/CTS full-duplex flow control
-pub const CRTS_IFLOW: tcflag_t = CRTSCTS; // XXX compat
-pub const CCTS_OFLOW: tcflag_t = CRTSCTS; // XXX compat
-pub const CDTRCTS: tcflag_t = 0x00020000; // DTR/CTS full-duplex flow control
-pub const MDMBUF: tcflag_t = 0x00100000; // DTR/DCD hardware flow control
-pub const CHWFLOW: tcflag_t = (MDMBUF | CRTSCTS | CDTRCTS); // all types of hw flow control
-
-pub const tcflag_t = c_uint;
-pub const speed_t = c_uint;
-pub const cc_t = u8;
-
-pub const NCCS = 20;
-
-pub const termios = extern struct {
- iflag: tcflag_t, // input flags
- oflag: tcflag_t, // output flags
- cflag: tcflag_t, // control flags
- lflag: tcflag_t, // local flags
- cc: [NCCS]cc_t, // control chars
- ispeed: c_int, // input speed
- ospeed: c_int, // output speed
-};
-
-// Commands passed to tcsetattr() for setting the termios structure.
-pub const TCSA = struct {
- pub const NOW = 0; // make change immediate
- pub const DRAIN = 1; // drain output, then chage
- pub const FLUSH = 2; // drain output, flush input
- pub const SOFT = 0x10; // flag - don't alter h.w. state
+pub const TCSA = enum(c_uint) {
+ NOW,
+ DRAIN,
+ FLUSH,
+ _,
};
-// Standard speeds
-pub const B0: c_uint = 0;
-pub const B50: c_uint = 50;
-pub const B75: c_uint = 75;
-pub const B110: c_uint = 110;
-pub const B134: c_uint = 134;
-pub const B150: c_uint = 150;
-pub const B200: c_uint = 200;
-pub const B300: c_uint = 300;
-pub const B600: c_uint = 600;
-pub const B1200: c_uint = 1200;
-pub const B1800: c_uint = 1800;
-pub const B2400: c_uint = 2400;
-pub const B4800: c_uint = 4800;
-pub const B9600: c_uint = 9600;
-pub const B19200: c_uint = 19200;
-pub const B38400: c_uint = 38400;
-pub const B7200: c_uint = 7200;
-pub const B14400: c_uint = 14400;
-pub const B28800: c_uint = 28800;
-pub const B57600: c_uint = 57600;
-pub const B76800: c_uint = 76800;
-pub const B115200: c_uint = 115200;
-pub const B230400: c_uint = 230400;
-pub const B460800: c_uint = 460800;
-pub const B500000: c_uint = 500000;
-pub const B921600: c_uint = 921600;
-pub const B1000000: c_uint = 1000000;
-pub const B1500000: c_uint = 1500000;
-pub const B2000000: c_uint = 2000000;
-pub const B2500000: c_uint = 2500000;
-pub const B3000000: c_uint = 3000000;
-pub const B3500000: c_uint = 3500000;
-pub const B4000000: c_uint = 4000000;
-pub const EXTA: c_uint = 19200;
-pub const EXTB: c_uint = 38400;
-
pub const TCIFLUSH = 1;
pub const TCOFLUSH = 2;
pub const TCIOFLUSH = 3;
@@ -1466,21 +1255,6 @@ pub const S = struct {
}
};
-pub const AT = struct {
- /// Magic value that specify the use of the current working directory
- /// to determine the target of relative file paths in the openat() and
- /// similar syscalls.
- pub const FDCWD = -100;
- /// Check access using effective user and group ID
- pub const EACCESS = 0x0100;
- /// Do not follow symbolic links
- pub const SYMLINK_NOFOLLOW = 0x0200;
- /// Follow symbolic link
- pub const SYMLINK_FOLLOW = 0x0400;
- /// Remove directory instead of file
- pub const REMOVEDIR = 0x0800;
-};
-
pub const HOST_NAME_MAX = 255;
pub const IPPROTO = struct {
diff --git a/lib/std/c/openbsd.zig b/lib/std/c/openbsd.zig
index a5e275913b..4b02517bef 100644
--- a/lib/std/c/openbsd.zig
+++ b/lib/std/c/openbsd.zig
@@ -14,20 +14,11 @@ pub extern "c" fn dl_iterate_phdr(callback: dl_iterate_phdr_callback, data: ?*an
pub extern "c" fn arc4random_buf(buf: [*]u8, len: usize) void;
pub extern "c" fn getthrid() pid_t;
-pub extern "c" fn pipe2(fds: *[2]fd_t, flags: u32) c_int;
+pub extern "c" fn pipe2(fds: *[2]fd_t, flags: std.c.O) c_int;
pub extern "c" fn getdents(fd: c_int, buf_ptr: [*]u8, nbytes: usize) c_int;
pub extern "c" fn sigaltstack(ss: ?*stack_t, old_ss: ?*stack_t) c_int;
-pub const pthread_mutex_t = extern struct {
- inner: ?*anyopaque = null,
-};
-pub const pthread_cond_t = extern struct {
- inner: ?*anyopaque = null,
-};
-pub const pthread_rwlock_t = extern struct {
- ptr: ?*anyopaque = null,
-};
pub const pthread_spinlock_t = extern struct {
inner: ?*anyopaque = null,
};
@@ -336,17 +327,13 @@ pub const timezone = extern struct {
pub const MAXNAMLEN = 255;
pub const dirent = extern struct {
- d_fileno: ino_t,
- d_off: off_t,
- d_reclen: u16,
- d_type: u8,
- d_namlen: u8,
- __d_padding: [4]u8,
- d_name: [MAXNAMLEN + 1]u8,
-
- pub fn reclen(self: dirent) u16 {
- return self.d_reclen;
- }
+ fileno: ino_t,
+ off: off_t,
+ reclen: u16,
+ type: u8,
+ namlen: u8,
+ _: u32 align(1) = 0,
+ name: [MAXNAMLEN + 1]u8,
};
pub const in_port_t = u16;
@@ -489,47 +476,6 @@ pub const X_OK = 1; // test for execute or search permission
pub const W_OK = 2; // test for write permission
pub const R_OK = 4; // test for read permission
-pub const O = struct {
- /// open for reading only
- pub const RDONLY = 0x00000000;
- /// open for writing only
- pub const WRONLY = 0x00000001;
- /// open for reading and writing
- pub const RDWR = 0x00000002;
- /// mask for above modes
- pub const ACCMODE = 0x00000003;
- /// no delay
- pub const NONBLOCK = 0x00000004;
- /// set append mode
- pub const APPEND = 0x00000008;
- /// open with shared file lock
- pub const SHLOCK = 0x00000010;
- /// open with exclusive file lock
- pub const EXLOCK = 0x00000020;
- /// signal pgrp when data ready
- pub const ASYNC = 0x00000040;
- /// synchronous writes
- pub const SYNC = 0x00000080;
- /// don't follow symlinks on the last
- pub const NOFOLLOW = 0x00000100;
- /// create if nonexistent
- pub const CREAT = 0x00000200;
- /// truncate to zero length
- pub const TRUNC = 0x00000400;
- /// error if already exists
- pub const EXCL = 0x00000800;
- /// don't assign controlling terminal
- pub const NOCTTY = 0x00008000;
- /// write: I/O data completion
- pub const DSYNC = SYNC;
- /// read: I/O completion as for write
- pub const RSYNC = SYNC;
- /// fail if not a directory
- pub const DIRECTORY = 0x20000;
- /// set close on exec
- pub const CLOEXEC = 0x10000;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
@@ -822,126 +768,13 @@ pub const AUTH = struct {
pub const ALLOW: c_int = (OKAY | ROOTOKAY | SECURE);
};
-// Term
-pub const V = struct {
- pub const EOF = 0; // ICANON
- pub const EOL = 1; // ICANON
- pub const EOL2 = 2; // ICANON
- pub const ERASE = 3; // ICANON
- pub const WERASE = 4; // ICANON
- pub const KILL = 5; // ICANON
- pub const REPRINT = 6; // ICANON
- // 7 spare 1
- pub const INTR = 8; // ISIG
- pub const QUIT = 9; // ISIG
- pub const SUSP = 10; // ISIG
- pub const DSUSP = 11; // ISIG
- pub const START = 12; // IXON, IXOFF
- pub const STOP = 13; // IXON, IXOFF
- pub const LNEXT = 14; // IEXTEN
- pub const DISCARD = 15; // IEXTEN
- pub const MIN = 16; // !ICANON
- pub const TIME = 17; // !ICANON
- pub const STATUS = 18; // ICANON
- // 19 spare 2
-};
-
-pub const tcflag_t = c_uint;
-pub const speed_t = c_uint;
-pub const cc_t = u8;
-
-pub const NCCS = 20;
-
-// Input flags - software input processing
-pub const IGNBRK: tcflag_t = 0x00000001; // ignore BREAK condition
-pub const BRKINT: tcflag_t = 0x00000002; // map BREAK to SIGINT
-pub const IGNPAR: tcflag_t = 0x00000004; // ignore (discard) parity errors
-pub const PARMRK: tcflag_t = 0x00000008; // mark parity and framing errors
-pub const INPCK: tcflag_t = 0x00000010; // enable checking of parity errors
-pub const ISTRIP: tcflag_t = 0x00000020; // strip 8th bit off chars
-pub const INLCR: tcflag_t = 0x00000040; // map NL into CR
-pub const IGNCR: tcflag_t = 0x00000080; // ignore CR
-pub const ICRNL: tcflag_t = 0x00000100; // map CR to NL (ala CRMOD)
-pub const IXON: tcflag_t = 0x00000200; // enable output flow control
-pub const IXOFF: tcflag_t = 0x00000400; // enable input flow control
-pub const IXANY: tcflag_t = 0x00000800; // any char will restart after stop
-pub const IUCLC: tcflag_t = 0x00001000; // translate upper to lower case
-pub const IMAXBEL: tcflag_t = 0x00002000; // ring bell on input queue full
-
-// Output flags - software output processing
-pub const OPOST: tcflag_t = 0x00000001; // enable following output processing
-pub const ONLCR: tcflag_t = 0x00000002; // map NL to CR-NL (ala CRMOD)
-pub const OXTABS: tcflag_t = 0x00000004; // expand tabs to spaces
-pub const ONOEOT: tcflag_t = 0x00000008; // discard EOT's (^D) on output
-pub const OCRNL: tcflag_t = 0x00000010; // map CR to NL
-pub const OLCUC: tcflag_t = 0x00000020; // translate lower case to upper case
-pub const ONOCR: tcflag_t = 0x00000040; // No CR output at column 0
-pub const ONLRET: tcflag_t = 0x00000080; // NL performs the CR function
-
-// Control flags - hardware control of terminal
-pub const CIGNORE: tcflag_t = 0x00000001; // ignore control flags
-pub const CSIZE: tcflag_t = 0x00000300; // character size mask
-pub const CS5: tcflag_t = 0x00000000; // 5 bits (pseudo)
-pub const CS6: tcflag_t = 0x00000100; // 6 bits
-pub const CS7: tcflag_t = 0x00000200; // 7 bits
-pub const CS8: tcflag_t = 0x00000300; // 8 bits
-pub const CSTOPB: tcflag_t = 0x00000400; // send 2 stop bits
-pub const CREAD: tcflag_t = 0x00000800; // enable receiver
-pub const PARENB: tcflag_t = 0x00001000; // parity enable
-pub const PARODD: tcflag_t = 0x00002000; // odd parity, else even
-pub const HUPCL: tcflag_t = 0x00004000; // hang up on last close
-pub const CLOCAL: tcflag_t = 0x00008000; // ignore modem status lines
-pub const CRTSCTS: tcflag_t = 0x00010000; // RTS/CTS full-duplex flow control
-pub const CRTS_IFLOW: tcflag_t = CRTSCTS; // XXX compat
-pub const CCTS_OFLOW: tcflag_t = CRTSCTS; // XXX compat
-pub const MDMBUF: tcflag_t = 0x00100000; // DTR/DCD hardware flow control
-pub const CHWFLOW: tcflag_t = (MDMBUF | CRTSCTS); // all types of hw flow control
-
-pub const termios = extern struct {
- iflag: tcflag_t, // input flags
- oflag: tcflag_t, // output flags
- cflag: tcflag_t, // control flags
- lflag: tcflag_t, // local flags
- cc: [NCCS]cc_t, // control chars
- ispeed: c_int, // input speed
- ospeed: c_int, // output speed
-};
-
-// Commands passed to tcsetattr() for setting the termios structure.
-pub const TCSA = struct {
- pub const NOW = 0; // make change immediate
- pub const DRAIN = 1; // drain output, then change
- pub const FLUSH = 2; // drain output, flush input
- pub const SOFT = 0x10; // flag - don't alter h.w. state
+pub const TCSA = enum(c_uint) {
+ NOW,
+ DRAIN,
+ FLUSH,
+ _,
};
-// Standard speeds
-pub const B0 = 0;
-pub const B50 = 50;
-pub const B75 = 75;
-pub const B110 = 110;
-pub const B134 = 134;
-pub const B150 = 150;
-pub const B200 = 200;
-pub const B300 = 300;
-pub const B600 = 600;
-pub const B1200 = 1200;
-pub const B1800 = 1800;
-pub const B2400 = 2400;
-pub const B4800 = 4800;
-pub const B9600 = 9600;
-pub const B19200 = 19200;
-pub const B38400 = 38400;
-pub const B7200 = 7200;
-pub const B14400 = 14400;
-pub const B28800 = 28800;
-pub const B57600 = 57600;
-pub const B76800 = 76800;
-pub const B115200 = 115200;
-pub const B230400 = 230400;
-pub const EXTA = 19200;
-pub const EXTB = 38400;
-
pub const TCIFLUSH = 1;
pub const TCOFLUSH = 2;
pub const TCIOFLUSH = 3;
@@ -1312,21 +1145,6 @@ pub const S = struct {
}
};
-pub const AT = struct {
- /// Magic value that specify the use of the current working directory
- /// to determine the target of relative file paths in the openat() and
- /// similar syscalls.
- pub const FDCWD = -100;
- /// Check access using effective user and group ID
- pub const EACCESS = 0x01;
- /// Do not follow symbolic links
- pub const SYMLINK_NOFOLLOW = 0x02;
- /// Follow symbolic link
- pub const SYMLINK_FOLLOW = 0x04;
- /// Remove directory instead of file
- pub const REMOVEDIR = 0x08;
-};
-
pub const HOST_NAME_MAX = 255;
pub const IPPROTO = struct {
diff --git a/lib/std/c/solaris.zig b/lib/std/c/solaris.zig
index c533ef2ca5..ef64acd43b 100644
--- a/lib/std/c/solaris.zig
+++ b/lib/std/c/solaris.zig
@@ -14,36 +14,13 @@ pub extern "c" fn dl_iterate_phdr(callback: dl_iterate_phdr_callback, data: ?*an
pub extern "c" fn getdents(fd: c_int, buf_ptr: [*]u8, nbytes: usize) usize;
pub extern "c" fn sigaltstack(ss: ?*stack_t, old_ss: ?*stack_t) c_int;
-pub extern "c" fn pipe2(fds: *[2]fd_t, flags: u32) c_int;
+pub extern "c" fn pipe2(fds: *[2]fd_t, flags: std.c.O) c_int;
pub extern "c" fn arc4random_buf(buf: [*]u8, len: usize) void;
pub extern "c" fn posix_memalign(memptr: *?*anyopaque, alignment: usize, size: usize) c_int;
pub extern "c" fn sysconf(sc: c_int) i64;
pub extern "c" fn signalfd(fd: fd_t, mask: *const sigset_t, flags: u32) c_int;
pub extern "c" fn madvise(address: [*]u8, len: usize, advise: u32) c_int;
-pub const pthread_mutex_t = extern struct {
- flag1: u16 = 0,
- flag2: u8 = 0,
- ceiling: u8 = 0,
- type: u16 = 0,
- magic: u16 = 0x4d58,
- lock: u64 = 0,
- data: u64 = 0,
-};
-pub const pthread_cond_t = extern struct {
- flag: [4]u8 = [_]u8{0} ** 4,
- type: u16 = 0,
- magic: u16 = 0x4356,
- data: u64 = 0,
-};
-pub const pthread_rwlock_t = extern struct {
- readers: i32 = 0,
- type: u16 = 0,
- magic: u16 = 0x5257,
- mutex: pthread_mutex_t = .{},
- readercv: pthread_cond_t = .{},
- writercv: pthread_cond_t = .{},
-};
pub const pthread_attr_t = extern struct {
mutexattr: ?*anyopaque = null,
};
@@ -266,17 +243,13 @@ pub const MAXNAMLEN = 511;
pub const dirent = extern struct {
/// Inode number of entry.
- d_ino: ino_t,
+ ino: ino_t,
/// Offset of this entry on disk.
- d_off: off_t,
+ off: off_t,
/// Length of this record.
- d_reclen: u16,
+ reclen: u16,
/// File name.
- d_name: [MAXNAMLEN:0]u8,
-
- pub fn reclen(self: dirent) u16 {
- return self.d_reclen;
- }
+ name: [MAXNAMLEN:0]u8,
};
pub const SOCK = struct {
@@ -708,32 +681,6 @@ pub const F = struct {
pub const RMDNY = 0x4;
};
-pub const O = struct {
- pub const RDONLY = 0;
- pub const WRONLY = 1;
- pub const RDWR = 2;
- pub const SEARCH = 0x200000;
- pub const EXEC = 0x400000;
- pub const NDELAY = 0x04;
- pub const APPEND = 0x08;
- pub const SYNC = 0x10;
- pub const DSYNC = 0x40;
- pub const RSYNC = 0x8000;
- pub const NONBLOCK = 0x80;
- pub const LARGEFILE = 0x2000;
-
- pub const CREAT = 0x100;
- pub const TRUNC = 0x200;
- pub const EXCL = 0x400;
- pub const NOCTTY = 0x800;
- pub const XATTR = 0x4000;
- pub const NOFOLLOW = 0x20000;
- pub const NOLINKS = 0x40000;
- pub const CLOEXEC = 0x800000;
- pub const DIRECTORY = 0x1000000;
- pub const DIRECT = 0x2000000;
-};
-
pub const LOCK = struct {
pub const SH = 1;
pub const EX = 2;
@@ -751,20 +698,6 @@ pub const SEEK = struct {
pub const HOLE = 4;
};
-pub const tcflag_t = c_uint;
-pub const cc_t = u8;
-pub const speed_t = c_uint;
-
-pub const NCCS = 19;
-
-pub const termios = extern struct {
- c_iflag: tcflag_t,
- c_oflag: tcflag_t,
- c_cflag: tcflag_t,
- c_lflag: tcflag_t,
- c_cc: [NCCS]cc_t,
-};
-
fn tioc(t: u16, num: u8) u16 {
return (t << 8) | num;
}
@@ -1430,23 +1363,6 @@ pub const S = struct {
}
};
-pub const AT = struct {
- /// Magic value that specify the use of the current working directory
- /// to determine the target of relative file paths in the openat() and
- /// similar syscalls.
- pub const FDCWD = @as(fd_t, @bitCast(@as(u32, 0xffd19553)));
-
- /// Do not follow symbolic links
- pub const SYMLINK_NOFOLLOW = 0x1000;
- /// Follow symbolic link
- pub const SYMLINK_FOLLOW = 0x2000;
- /// Remove directory instead of file
- pub const REMOVEDIR = 0x1;
- pub const TRIGGER = 0x2;
- /// Check access using effective user and group ID
- pub const EACCESS = 0x4;
-};
-
pub const POSIX_FADV = struct {
pub const NORMAL = 0;
pub const RANDOM = 1;
diff --git a/lib/std/c/wasi.zig b/lib/std/c/wasi.zig
index e1940054b6..95558787ae 100644
--- a/lib/std/c/wasi.zig
+++ b/lib/std/c/wasi.zig
@@ -1,6 +1,6 @@
+const builtin = @import("builtin");
const std = @import("../std.zig");
const wasi = std.os.wasi;
-const FDFLAG = wasi.FDFLAG;
extern threadlocal var errno: c_int;
@@ -8,42 +8,82 @@ pub fn _errno() *c_int {
return &errno;
}
-pub const AT = wasi.AT;
-pub const CLOCK = wasi.CLOCK;
-pub const E = wasi.E;
-pub const IOV_MAX = wasi.IOV_MAX;
-pub const LOCK = wasi.LOCK;
-pub const S = wasi.S;
-pub const STDERR_FILENO = wasi.STDERR_FILENO;
-pub const STDIN_FILENO = wasi.STDIN_FILENO;
-pub const STDOUT_FILENO = wasi.STDOUT_FILENO;
+pub const mode_t = u32;
+pub const time_t = i64;
+
+pub const timespec = extern struct {
+ tv_sec: time_t,
+ tv_nsec: isize,
+
+ pub fn fromTimestamp(tm: wasi.timestamp_t) timespec {
+ const tv_sec: wasi.timestamp_t = tm / 1_000_000_000;
+ const tv_nsec = tm - tv_sec * 1_000_000_000;
+ return .{
+ .tv_sec = @as(time_t, @intCast(tv_sec)),
+ .tv_nsec = @as(isize, @intCast(tv_nsec)),
+ };
+ }
+
+ pub fn toTimestamp(ts: timespec) wasi.timestamp_t {
+ return @as(wasi.timestamp_t, @intCast(ts.tv_sec * 1_000_000_000)) +
+ @as(wasi.timestamp_t, @intCast(ts.tv_nsec));
+ }
+};
+
+pub const STDIN_FILENO = 0;
+pub const STDOUT_FILENO = 1;
+pub const STDERR_FILENO = 2;
+
+pub const E = wasi.errno_t;
+
+pub const CLOCK = wasi.clockid_t;
+pub const IOV_MAX = 1024;
+pub const LOCK = struct {
+ pub const SH = 0x1;
+ pub const EX = 0x2;
+ pub const NB = 0x4;
+ pub const UN = 0x8;
+};
+pub const S = struct {
+ pub const IEXEC = @compileError("TODO audit this");
+ pub const IFBLK = 0x6000;
+ pub const IFCHR = 0x2000;
+ pub const IFDIR = 0x4000;
+ pub const IFIFO = 0xc000;
+ pub const IFLNK = 0xa000;
+ pub const IFMT = IFBLK | IFCHR | IFDIR | IFIFO | IFLNK | IFREG | IFSOCK;
+ pub const IFREG = 0x8000;
+ /// There's no concept of UNIX domain socket but we define this value here
+ /// in order to line with other OSes.
+ pub const IFSOCK = 0x1;
+};
pub const fd_t = wasi.fd_t;
pub const pid_t = c_int;
pub const uid_t = u32;
pub const gid_t = u32;
pub const off_t = i64;
-pub const ino_t = wasi.ino_t;
-pub const mode_t = wasi.mode_t;
-pub const time_t = wasi.time_t;
-pub const timespec = wasi.timespec;
+pub const ino_t = wasi.inode_t;
+pub const dev_t = wasi.device_t;
+pub const nlink_t = c_ulonglong;
+pub const blksize_t = c_long;
+pub const blkcnt_t = c_longlong;
pub const Stat = extern struct {
- dev: i32,
+ dev: dev_t,
ino: ino_t,
- nlink: u64,
-
+ nlink: nlink_t,
mode: mode_t,
uid: uid_t,
gid: gid_t,
- __pad0: isize,
- rdev: i32,
+ __pad0: c_uint = 0,
+ rdev: dev_t,
size: off_t,
- blksize: i32,
- blocks: i64,
-
+ blksize: blksize_t,
+ blocks: blkcnt_t,
atim: timespec,
mtim: timespec,
ctim: timespec,
+ __reserved: [3]c_longlong = [3]c_longlong{ 0, 0, 0 },
pub fn atime(self: @This()) timespec {
return self.atim;
@@ -56,30 +96,35 @@ pub const Stat = extern struct {
pub fn ctime(self: @This()) timespec {
return self.ctim;
}
-};
-/// Derived from
-/// https://github.com/WebAssembly/wasi-libc/blob/main/expected/wasm32-wasi/predefined-macros.txt
-pub const O = struct {
- pub const ACCMODE = (EXEC | RDWR | SEARCH);
- pub const APPEND = @as(u32, FDFLAG.APPEND);
- pub const CLOEXEC = (0);
- pub const CREAT = ((1 << 0) << 12); // = __WASI_OFLAGS_CREAT << 12
- pub const DIRECTORY = ((1 << 1) << 12); // = __WASI_OFLAGS_DIRECTORY << 12
- pub const DSYNC = @as(u32, FDFLAG.DSYNC);
- pub const EXCL = ((1 << 2) << 12); // = __WASI_OFLAGS_EXCL << 12
- pub const EXEC = (0x02000000);
- pub const NOCTTY = (0);
- pub const NOFOLLOW = (0x01000000);
- pub const NONBLOCK = @as(u32, FDFLAG.NONBLOCK);
- pub const RDONLY = (0x04000000);
- pub const RDWR = (RDONLY | WRONLY);
- pub const RSYNC = @as(u32, FDFLAG.RSYNC);
- pub const SEARCH = (0x08000000);
- pub const SYNC = @as(u32, FDFLAG.SYNC);
- pub const TRUNC = ((1 << 3) << 12); // = __WASI_OFLAGS_TRUNC << 12
- pub const TTY_INIT = (0);
- pub const WRONLY = (0x10000000);
+ pub fn fromFilestat(stat: wasi.filestat_t) Stat {
+ return .{
+ .dev = stat.dev,
+ .ino = stat.ino,
+ .mode = switch (stat.filetype) {
+ .UNKNOWN => 0,
+ .BLOCK_DEVICE => S.IFBLK,
+ .CHARACTER_DEVICE => S.IFCHR,
+ .DIRECTORY => S.IFDIR,
+ .REGULAR_FILE => S.IFREG,
+ .SOCKET_DGRAM => S.IFSOCK,
+ .SOCKET_STREAM => S.IFIFO,
+ .SYMBOLIC_LINK => S.IFLNK,
+ _ => 0,
+ },
+ .nlink = stat.nlink,
+ .size = @intCast(stat.size),
+ .atim = timespec.fromTimestamp(stat.atim),
+ .mtim = timespec.fromTimestamp(stat.mtim),
+ .ctim = timespec.fromTimestamp(stat.ctim),
+
+ .uid = 0,
+ .gid = 0,
+ .rdev = 0,
+ .blksize = 0,
+ .blocks = 0,
+ };
+ }
};
pub const F = struct {
diff --git a/lib/std/c/windows.zig b/lib/std/c/windows.zig
index b6ce03d21e..758f3dbadc 100644
--- a/lib/std/c/windows.zig
+++ b/lib/std/c/windows.zig
@@ -11,7 +11,6 @@ pub extern "c" fn _msize(memblock: ?*anyopaque) usize;
// need to verify which of these is actually supported on windows
pub extern "c" fn clock_getres(clk_id: c_int, tp: *timespec) c_int;
pub extern "c" fn clock_gettime(clk_id: c_int, tp: *timespec) c_int;
-pub extern "c" fn fstat(fd: fd_t, buf: *Stat) c_int;
pub extern "c" fn getrusage(who: c_int, usage: *rusage) c_int;
pub extern "c" fn gettimeofday(noalias tv: ?*timeval, noalias tz: ?*timezone) c_int;
pub extern "c" fn nanosleep(rqtp: *const timespec, rmtp: ?*timespec) c_int;
@@ -200,11 +199,6 @@ pub const STRUNCATE = 80;
pub const F_OK = 0;
-/// Remove directory instead of unlinking file
-pub const AT = struct {
- pub const REMOVEDIR = 0x200;
-};
-
pub const in_port_t = u16;
pub const sa_family_t = ws2_32.ADDRESS_FAMILY;
pub const socklen_t = ws2_32.socklen_t;
@@ -229,31 +223,4 @@ pub const SOL = ws2_32.SOL;
pub const SO = ws2_32.SO;
pub const PVD_CONFIG = ws2_32.PVD_CONFIG;
-pub const O = struct {
- pub const RDONLY = 0o0;
- pub const WRONLY = 0o1;
- pub const RDWR = 0o2;
-
- pub const CREAT = 0o100;
- pub const EXCL = 0o200;
- pub const NOCTTY = 0o400;
- pub const TRUNC = 0o1000;
- pub const APPEND = 0o2000;
- pub const NONBLOCK = 0o4000;
- pub const DSYNC = 0o10000;
- pub const SYNC = 0o4010000;
- pub const RSYNC = 0o4010000;
- pub const DIRECTORY = 0o200000;
- pub const NOFOLLOW = 0o400000;
- pub const CLOEXEC = 0o2000000;
-
- pub const ASYNC = 0o20000;
- pub const DIRECT = 0o40000;
- pub const LARGEFILE = 0;
- pub const NOATIME = 0o1000000;
- pub const PATH = 0o10000000;
- pub const TMPFILE = 0o20200000;
- pub const NDELAY = NONBLOCK;
-};
-
pub const IFNAMESIZE = 30;
diff --git a/lib/std/child_process.zig b/lib/std/child_process.zig
index 8b15b7d63b..eb0c8c13b8 100644
--- a/lib/std/child_process.zig
+++ b/lib/std/child_process.zig
@@ -129,10 +129,9 @@ pub const ChildProcess = struct {
/// POSIX-only. `StdIo.Ignore` was selected and opening `/dev/null` returned ENODEV.
NoDevice,
- /// Windows-only. One of:
- /// * `cwd` was provided and it could not be re-encoded into UTF16LE, or
- /// * The `PATH` or `PATHEXT` environment variable contained invalid UTF-8.
- InvalidUtf8,
+ /// Windows-only. `cwd` or `argv` was provided and it was invalid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
/// Windows-only. `cwd` was provided, but the path did not exist when spawning the child process.
CurrentWorkingDirectoryUnlinked,
@@ -495,7 +494,7 @@ pub const ChildProcess = struct {
}
fn spawnPosix(self: *ChildProcess) SpawnError!void {
- const pipe_flags = if (io.is_async) os.O.NONBLOCK else 0;
+ const pipe_flags: os.O = .{};
const stdin_pipe = if (self.stdin_behavior == StdIo.Pipe) try os.pipe2(pipe_flags) else undefined;
errdefer if (self.stdin_behavior == StdIo.Pipe) {
destroyPipe(stdin_pipe);
@@ -513,7 +512,7 @@ pub const ChildProcess = struct {
const any_ignore = (self.stdin_behavior == StdIo.Ignore or self.stdout_behavior == StdIo.Ignore or self.stderr_behavior == StdIo.Ignore);
const dev_null_fd = if (any_ignore)
- os.openZ("/dev/null", os.O.RDWR, 0) catch |err| switch (err) {
+ os.openZ("/dev/null", .{ .ACCMODE = .RDWR }, 0) catch |err| switch (err) {
error.PathAlreadyExists => unreachable,
error.NoSpaceLeft => unreachable,
error.FileTooBig => unreachable,
@@ -572,7 +571,7 @@ pub const ChildProcess = struct {
// end with eventfd
break :blk [2]os.fd_t{ fd, fd };
} else {
- break :blk try os.pipe2(os.O.CLOEXEC);
+ break :blk try os.pipe2(.{ .CLOEXEC = true });
}
};
errdefer destroyPipe(err_pipe);
@@ -667,15 +666,15 @@ pub const ChildProcess = struct {
.share_access = windows.FILE_SHARE_READ | windows.FILE_SHARE_WRITE,
.sa = &saAttr,
.creation = windows.OPEN_EXISTING,
- .io_mode = .blocking,
}) catch |err| switch (err) {
- error.PathAlreadyExists => unreachable, // not possible for "NUL"
- error.PipeBusy => unreachable, // not possible for "NUL"
- error.FileNotFound => unreachable, // not possible for "NUL"
- error.AccessDenied => unreachable, // not possible for "NUL"
- error.NameTooLong => unreachable, // not possible for "NUL"
- error.WouldBlock => unreachable, // not possible for "NUL"
- error.NetworkNotFound => unreachable, // not possible for "NUL"
+ error.PathAlreadyExists => return error.Unexpected, // not possible for "NUL"
+ error.PipeBusy => return error.Unexpected, // not possible for "NUL"
+ error.FileNotFound => return error.Unexpected, // not possible for "NUL"
+ error.AccessDenied => return error.Unexpected, // not possible for "NUL"
+ error.NameTooLong => return error.Unexpected, // not possible for "NUL"
+ error.WouldBlock => return error.Unexpected, // not possible for "NUL"
+ error.NetworkNotFound => return error.Unexpected, // not possible for "NUL"
+ error.AntivirusInterference => return error.Unexpected, // not possible for "NUL"
else => |e| return e,
}
else
@@ -720,7 +719,7 @@ pub const ChildProcess = struct {
g_hChildStd_OUT_Wr = null;
},
}
- errdefer if (self.stdin_behavior == StdIo.Pipe) {
+ errdefer if (self.stdout_behavior == StdIo.Pipe) {
windowsDestroyPipe(g_hChildStd_OUT_Rd, g_hChildStd_OUT_Wr);
};
@@ -740,7 +739,7 @@ pub const ChildProcess = struct {
g_hChildStd_ERR_Wr = null;
},
}
- errdefer if (self.stdin_behavior == StdIo.Pipe) {
+ errdefer if (self.stderr_behavior == StdIo.Pipe) {
windowsDestroyPipe(g_hChildStd_ERR_Rd, g_hChildStd_ERR_Wr);
};
@@ -767,7 +766,7 @@ pub const ChildProcess = struct {
};
var piProcInfo: windows.PROCESS_INFORMATION = undefined;
- const cwd_w = if (self.cwd) |cwd| try unicode.utf8ToUtf16LeWithNull(self.allocator, cwd) else null;
+ const cwd_w = if (self.cwd) |cwd| try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd) else null;
defer if (cwd_w) |cwd| self.allocator.free(cwd);
const cwd_w_ptr = if (cwd_w) |cwd| cwd.ptr else null;
@@ -775,8 +774,8 @@ pub const ChildProcess = struct {
defer if (maybe_envp_buf) |envp_buf| self.allocator.free(envp_buf);
const envp_ptr = if (maybe_envp_buf) |envp_buf| envp_buf.ptr else null;
- const app_name_utf8 = self.argv[0];
- const app_name_is_absolute = fs.path.isAbsolute(app_name_utf8);
+ const app_name_wtf8 = self.argv[0];
+ const app_name_is_absolute = fs.path.isAbsolute(app_name_wtf8);
// the cwd set in ChildProcess is in effect when choosing the executable path
// to match posix semantics
@@ -785,11 +784,11 @@ pub const ChildProcess = struct {
// If the app name is absolute, then we need to use its dirname as the cwd
if (app_name_is_absolute) {
cwd_path_w_needs_free = true;
- const dir = fs.path.dirname(app_name_utf8).?;
- break :x try unicode.utf8ToUtf16LeWithNull(self.allocator, dir);
+ const dir = fs.path.dirname(app_name_wtf8).?;
+ break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, dir);
} else if (self.cwd) |cwd| {
cwd_path_w_needs_free = true;
- break :x try unicode.utf8ToUtf16LeWithNull(self.allocator, cwd);
+ break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd);
} else {
break :x &[_:0]u16{}; // empty for cwd
}
@@ -800,19 +799,19 @@ pub const ChildProcess = struct {
// into the basename and dirname and use the dirname as an addition to the cwd
// path. This is because NtQueryDirectoryFile cannot accept FileName params with
// path separators.
- const app_basename_utf8 = fs.path.basename(app_name_utf8);
+ const app_basename_wtf8 = fs.path.basename(app_name_wtf8);
// If the app name is absolute, then the cwd will already have the app's dirname in it,
// so only populate app_dirname if app name is a relative path with > 0 path separators.
- const maybe_app_dirname_utf8 = if (!app_name_is_absolute) fs.path.dirname(app_name_utf8) else null;
+ const maybe_app_dirname_wtf8 = if (!app_name_is_absolute) fs.path.dirname(app_name_wtf8) else null;
const app_dirname_w: ?[:0]u16 = x: {
- if (maybe_app_dirname_utf8) |app_dirname_utf8| {
- break :x try unicode.utf8ToUtf16LeWithNull(self.allocator, app_dirname_utf8);
+ if (maybe_app_dirname_wtf8) |app_dirname_wtf8| {
+ break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_dirname_wtf8);
}
break :x null;
};
defer if (app_dirname_w != null) self.allocator.free(app_dirname_w.?);
- const app_name_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, app_basename_utf8);
+ const app_name_w = try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_basename_wtf8);
defer self.allocator.free(app_name_w);
const cmd_line_w = argvToCommandLineWindows(self.allocator, self.argv) catch |err| switch (err) {
@@ -1173,7 +1172,7 @@ const CreateProcessSupportedExtension = enum {
exe,
};
-/// Case-insensitive UTF-16 lookup
+/// Case-insensitive WTF-16 lookup
fn windowsCreateProcessSupportsExtension(ext: []const u16) ?CreateProcessSupportedExtension {
if (ext.len != 4) return null;
const State = enum {
@@ -1237,7 +1236,7 @@ test "windowsCreateProcessSupportsExtension" {
try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null);
}
-pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidUtf8, InvalidArg0 };
+pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidWtf8, InvalidArg0 };
/// Serializes `argv` to a Windows command-line string suitable for passing to a child process and
/// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice.
@@ -1320,7 +1319,7 @@ pub fn argvToCommandLineWindows(
}
}
- return try unicode.utf8ToUtf16LeWithNull(allocator, buf.items);
+ return try unicode.wtf8ToWtf16LeAllocZ(allocator, buf.items);
}
test "argvToCommandLineWindows" {
@@ -1386,7 +1385,7 @@ fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []c
const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv);
defer std.testing.allocator.free(cmd_line_w);
- const cmd_line = try unicode.utf16leToUtf8Alloc(std.testing.allocator, cmd_line_w);
+ const cmd_line = try unicode.wtf16LeToWtf8Alloc(std.testing.allocator, cmd_line_w);
defer std.testing.allocator.free(cmd_line);
try std.testing.expectEqualStrings(expected_cmd_line, cmd_line);
@@ -1424,7 +1423,7 @@ fn windowsMakeAsyncPipe(rd: *?windows.HANDLE, wr: *?windows.HANDLE, sattr: *cons
"\\\\.\\pipe\\zig-childprocess-{d}-{d}",
.{ windows.kernel32.GetCurrentProcessId(), pipe_name_counter.fetchAdd(1, .Monotonic) },
) catch unreachable;
- const len = std.unicode.utf8ToUtf16Le(&tmp_bufw, pipe_path) catch unreachable;
+ const len = std.unicode.wtf8ToWtf16Le(&tmp_bufw, pipe_path) catch unreachable;
tmp_bufw[len] = 0;
break :blk tmp_bufw[0..len :0];
};
@@ -1493,20 +1492,12 @@ fn forkChildErrReport(fd: i32, err: ChildProcess.SpawnError) noreturn {
const ErrInt = std.meta.Int(.unsigned, @sizeOf(anyerror) * 8);
fn writeIntFd(fd: i32, value: ErrInt) !void {
- const file = File{
- .handle = fd,
- .capable_io_mode = .blocking,
- .intended_io_mode = .blocking,
- };
+ const file = File{ .handle = fd };
file.writer().writeInt(u64, @intCast(value), .little) catch return error.SystemResources;
}
fn readIntFd(fd: i32) !ErrInt {
- const file = File{
- .handle = fd,
- .capable_io_mode = .blocking,
- .intended_io_mode = .blocking,
- };
+ const file = File{ .handle = fd };
return @as(ErrInt, @intCast(file.reader().readInt(u64, .little) catch return error.SystemResources));
}
@@ -1529,10 +1520,10 @@ pub fn createWindowsEnvBlock(allocator: mem.Allocator, env_map: *const EnvMap) !
var it = env_map.iterator();
var i: usize = 0;
while (it.next()) |pair| {
- i += try unicode.utf8ToUtf16Le(result[i..], pair.key_ptr.*);
+ i += try unicode.wtf8ToWtf16Le(result[i..], pair.key_ptr.*);
result[i] = '=';
i += 1;
- i += try unicode.utf8ToUtf16Le(result[i..], pair.value_ptr.*);
+ i += try unicode.wtf8ToWtf16Le(result[i..], pair.value_ptr.*);
result[i] = 0;
i += 1;
}
diff --git a/lib/std/compress.zig b/lib/std/compress.zig
index e56008cefe..a6d0a40b26 100644
--- a/lib/std/compress.zig
+++ b/lib/std/compress.zig
@@ -1,11 +1,11 @@
const std = @import("std.zig");
-pub const deflate = @import("compress/deflate.zig");
+pub const flate = @import("compress/flate.zig");
pub const gzip = @import("compress/gzip.zig");
+pub const zlib = @import("compress/zlib.zig");
pub const lzma = @import("compress/lzma.zig");
pub const lzma2 = @import("compress/lzma2.zig");
pub const xz = @import("compress/xz.zig");
-pub const zlib = @import("compress/zlib.zig");
pub const zstd = @import("compress/zstandard.zig");
pub fn HashedReader(
@@ -69,11 +69,11 @@ pub fn hashedWriter(
}
test {
- _ = deflate;
- _ = gzip;
_ = lzma;
_ = lzma2;
_ = xz;
- _ = zlib;
_ = zstd;
+ _ = flate;
+ _ = gzip;
+ _ = zlib;
}
diff --git a/lib/std/compress/deflate.zig b/lib/std/compress/deflate.zig
deleted file mode 100644
index 2fe5969067..0000000000
--- a/lib/std/compress/deflate.zig
+++ /dev/null
@@ -1,44 +0,0 @@
-//! The deflate package is a translation of the Go code of the compress/flate package from
-//! https://go.googlesource.com/go/+/refs/tags/go1.17/src/compress/flate/
-
-const deflate = @import("deflate/compressor.zig");
-const inflate = @import("deflate/decompressor.zig");
-
-pub const Compression = deflate.Compression;
-pub const CompressorOptions = deflate.CompressorOptions;
-pub const Compressor = deflate.Compressor;
-pub const Decompressor = inflate.Decompressor;
-
-pub const compressor = deflate.compressor;
-pub const decompressor = inflate.decompressor;
-
-/// Copies elements from a source `src` slice into a destination `dst` slice.
-/// The copy never returns an error but might not be complete if the destination is too small.
-/// Returns the number of elements copied, which will be the minimum of `src.len` and `dst.len`.
-/// TODO: remove this smelly function
-pub fn copy(dst: []u8, src: []const u8) usize {
- if (dst.len <= src.len) {
- @memcpy(dst, src[0..dst.len]);
- return dst.len;
- } else {
- @memcpy(dst[0..src.len], src);
- return src.len;
- }
-}
-
-test {
- _ = @import("deflate/token.zig");
- _ = @import("deflate/bits_utils.zig");
- _ = @import("deflate/dict_decoder.zig");
-
- _ = @import("deflate/huffman_code.zig");
- _ = @import("deflate/huffman_bit_writer.zig");
-
- _ = @import("deflate/compressor.zig");
- _ = @import("deflate/compressor_test.zig");
-
- _ = @import("deflate/deflate_fast.zig");
- _ = @import("deflate/deflate_fast_test.zig");
-
- _ = @import("deflate/decompressor.zig");
-}
diff --git a/lib/std/compress/deflate/bits_utils.zig b/lib/std/compress/deflate/bits_utils.zig
deleted file mode 100644
index 97a557d0da..0000000000
--- a/lib/std/compress/deflate/bits_utils.zig
+++ /dev/null
@@ -1,33 +0,0 @@
-const math = @import("std").math;
-
-// Reverse bit-by-bit a N-bit code.
-pub fn bitReverse(comptime T: type, value: T, N: usize) T {
- const r = @bitReverse(value);
- return r >> @as(math.Log2Int(T), @intCast(@typeInfo(T).Int.bits - N));
-}
-
-test "bitReverse" {
- const std = @import("std");
-
- const ReverseBitsTest = struct {
- in: u16,
- bit_count: u5,
- out: u16,
- };
-
- const reverse_bits_tests = [_]ReverseBitsTest{
- .{ .in = 1, .bit_count = 1, .out = 1 },
- .{ .in = 1, .bit_count = 2, .out = 2 },
- .{ .in = 1, .bit_count = 3, .out = 4 },
- .{ .in = 1, .bit_count = 4, .out = 8 },
- .{ .in = 1, .bit_count = 5, .out = 16 },
- .{ .in = 17, .bit_count = 5, .out = 17 },
- .{ .in = 257, .bit_count = 9, .out = 257 },
- .{ .in = 29, .bit_count = 5, .out = 23 },
- };
-
- for (reverse_bits_tests) |h| {
- const v = bitReverse(u16, h.in, h.bit_count);
- try std.testing.expectEqual(h.out, v);
- }
-}
diff --git a/lib/std/compress/deflate/compressor.zig b/lib/std/compress/deflate/compressor.zig
deleted file mode 100644
index 0326668793..0000000000
--- a/lib/std/compress/deflate/compressor.zig
+++ /dev/null
@@ -1,1110 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-const fmt = std.fmt;
-const io = std.io;
-const math = std.math;
-const mem = std.mem;
-
-const Allocator = std.mem.Allocator;
-
-const deflate_const = @import("deflate_const.zig");
-const fast = @import("deflate_fast.zig");
-const hm_bw = @import("huffman_bit_writer.zig");
-const token = @import("token.zig");
-
-pub const Compression = enum(i5) {
- /// huffman_only disables Lempel-Ziv match searching and only performs Huffman
- /// entropy encoding. This mode is useful in compressing data that has
- /// already been compressed with an LZ style algorithm (e.g. Snappy or LZ4)
- /// that lacks an entropy encoder. Compression gains are achieved when
- /// certain bytes in the input stream occur more frequently than others.
- ///
- /// Note that huffman_only produces a compressed output that is
- /// RFC 1951 compliant. That is, any valid DEFLATE decompressor will
- /// continue to be able to decompress this output.
- huffman_only = -2,
- /// Same as level_6
- default_compression = -1,
- /// Does not attempt any compression; only adds the necessary DEFLATE framing.
- no_compression = 0,
- /// Prioritizes speed over output size, based on Snappy's LZ77-style encoder
- best_speed = 1,
- level_2 = 2,
- level_3 = 3,
- level_4 = 4,
- level_5 = 5,
- level_6 = 6,
- level_7 = 7,
- level_8 = 8,
- /// Prioritizes smaller output size over speed
- best_compression = 9,
-};
-
-const log_window_size = 15;
-const window_size = 1 << log_window_size;
-const window_mask = window_size - 1;
-
-// The LZ77 step produces a sequence of literal tokens and <length, offset>
-// pair tokens. The offset is also known as distance. The underlying wire
-// format limits the range of lengths and offsets. For example, there are
-// 256 legitimate lengths: those in the range [3, 258]. This package's
-// compressor uses a higher minimum match length, enabling optimizations
-// such as finding matches via 32-bit loads and compares.
-const base_match_length = deflate_const.base_match_length; // The smallest match length per the RFC section 3.2.5
-const min_match_length = 4; // The smallest match length that the compressor actually emits
-const max_match_length = deflate_const.max_match_length;
-const base_match_offset = deflate_const.base_match_offset; // The smallest match offset
-const max_match_offset = deflate_const.max_match_offset; // The largest match offset
-
-// The maximum number of tokens we put into a single flate block, just to
-// stop things from getting too large.
-const max_flate_block_tokens = 1 << 14;
-const max_store_block_size = deflate_const.max_store_block_size;
-const hash_bits = 17; // After 17 performance degrades
-const hash_size = 1 << hash_bits;
-const hash_mask = (1 << hash_bits) - 1;
-const max_hash_offset = 1 << 24;
-
-const skip_never = math.maxInt(u32);
-
-const CompressionLevel = struct {
- good: u16,
- lazy: u16,
- nice: u16,
- chain: u16,
- fast_skip_hashshing: u32,
-};
-
-fn levels(compression: Compression) CompressionLevel {
- switch (compression) {
- .no_compression,
- .best_speed, // best_speed uses a custom algorithm; see deflate_fast.zig
- .huffman_only,
- => return .{
- .good = 0,
- .lazy = 0,
- .nice = 0,
- .chain = 0,
- .fast_skip_hashshing = 0,
- },
- // For levels 2-3 we don't bother trying with lazy matches.
- .level_2 => return .{
- .good = 4,
- .lazy = 0,
- .nice = 16,
- .chain = 8,
- .fast_skip_hashshing = 5,
- },
- .level_3 => return .{
- .good = 4,
- .lazy = 0,
- .nice = 32,
- .chain = 32,
- .fast_skip_hashshing = 6,
- },
-
- // Levels 4-9 use increasingly more lazy matching and increasingly stringent conditions for
- // "good enough".
- .level_4 => return .{
- .good = 4,
- .lazy = 4,
- .nice = 16,
- .chain = 16,
- .fast_skip_hashshing = skip_never,
- },
- .level_5 => return .{
- .good = 8,
- .lazy = 16,
- .nice = 32,
- .chain = 32,
- .fast_skip_hashshing = skip_never,
- },
- .default_compression,
- .level_6,
- => return .{
- .good = 8,
- .lazy = 16,
- .nice = 128,
- .chain = 128,
- .fast_skip_hashshing = skip_never,
- },
- .level_7 => return .{
- .good = 8,
- .lazy = 32,
- .nice = 128,
- .chain = 256,
- .fast_skip_hashshing = skip_never,
- },
- .level_8 => return .{
- .good = 32,
- .lazy = 128,
- .nice = 258,
- .chain = 1024,
- .fast_skip_hashshing = skip_never,
- },
- .best_compression => return .{
- .good = 32,
- .lazy = 258,
- .nice = 258,
- .chain = 4096,
- .fast_skip_hashshing = skip_never,
- },
- }
-}
-
-// matchLen returns the number of matching bytes in a and b
-// up to length 'max'. Both slices must be at least 'max'
-// bytes in size.
-fn matchLen(a: []u8, b: []u8, max: u32) u32 {
- const bounded_a = a[0..max];
- const bounded_b = b[0..max];
- for (bounded_a, 0..) |av, i| {
- if (bounded_b[i] != av) {
- return @as(u32, @intCast(i));
- }
- }
- return max;
-}
-
-const hash_mul = 0x1e35a7bd;
-
-// hash4 returns a hash representation of the first 4 bytes
-// of the supplied slice.
-// The caller must ensure that b.len >= 4.
-fn hash4(b: []u8) u32 {
- return ((@as(u32, b[3]) |
- @as(u32, b[2]) << 8 |
- @as(u32, b[1]) << 16 |
- @as(u32, b[0]) << 24) *% hash_mul) >> (32 - hash_bits);
-}
-
-// bulkHash4 will compute hashes using the same
-// algorithm as hash4
-fn bulkHash4(b: []u8, dst: []u32) u32 {
- if (b.len < min_match_length) {
- return 0;
- }
- var hb =
- @as(u32, b[3]) |
- @as(u32, b[2]) << 8 |
- @as(u32, b[1]) << 16 |
- @as(u32, b[0]) << 24;
-
- dst[0] = (hb *% hash_mul) >> (32 - hash_bits);
- const end = b.len - min_match_length + 1;
- var i: u32 = 1;
- while (i < end) : (i += 1) {
- hb = (hb << 8) | @as(u32, b[i + 3]);
- dst[i] = (hb *% hash_mul) >> (32 - hash_bits);
- }
-
- return hb;
-}
-
-pub const CompressorOptions = struct {
- level: Compression = .default_compression,
- dictionary: ?[]const u8 = null,
-};
-
-/// Returns a new Compressor compressing data at the given level.
-/// Following zlib, levels range from 1 (best_speed) to 9 (best_compression);
-/// higher levels typically run slower but compress more. Level 0
-/// (no_compression) does not attempt any compression; it only adds the
-/// necessary DEFLATE framing.
-/// Level -1 (default_compression) uses the default compression level.
-/// Level -2 (huffman_only) will use Huffman compression only, giving
-/// a very fast compression for all types of input, but sacrificing considerable
-/// compression efficiency.
-///
-/// `dictionary` is optional and initializes the new `Compressor` with a preset dictionary.
-/// The returned Compressor behaves as if the dictionary had been written to it without producing
-/// any compressed output. The compressed data written to hm_bw can only be decompressed by a
-/// Decompressor initialized with the same dictionary.
-///
-/// The compressed data will be passed to the provided `writer`, see `writer()` and `write()`.
-pub fn compressor(
- allocator: Allocator,
- writer: anytype,
- options: CompressorOptions,
-) !Compressor(@TypeOf(writer)) {
- return Compressor(@TypeOf(writer)).init(allocator, writer, options);
-}
-
-pub fn Compressor(comptime WriterType: anytype) type {
- return struct {
- const Self = @This();
-
- /// A Writer takes data written to it and writes the compressed
- /// form of that data to an underlying writer.
- pub const Writer = io.Writer(*Self, Error, write);
-
- /// Returns a Writer that takes data written to it and writes the compressed
- /// form of that data to an underlying writer.
- pub fn writer(self: *Self) Writer {
- return .{ .context = self };
- }
-
- pub const Error = WriterType.Error;
-
- allocator: Allocator,
-
- compression: Compression,
- compression_level: CompressionLevel,
-
- // Inner writer wrapped in a HuffmanBitWriter
- hm_bw: hm_bw.HuffmanBitWriter(WriterType) = undefined,
- bulk_hasher: *const fn ([]u8, []u32) u32,
-
- sync: bool, // requesting flush
- best_speed_enc: *fast.DeflateFast, // Encoder for best_speed
-
- // Input hash chains
- // hash_head[hashValue] contains the largest inputIndex with the specified hash value
- // If hash_head[hashValue] is within the current window, then
- // hash_prev[hash_head[hashValue] & window_mask] contains the previous index
- // with the same hash value.
- chain_head: u32,
- hash_head: []u32, // [hash_size]u32,
- hash_prev: []u32, // [window_size]u32,
- hash_offset: u32,
-
- // input window: unprocessed data is window[index..window_end]
- index: u32,
- window: []u8,
- window_end: usize,
- block_start: usize, // window index where current tokens start
- byte_available: bool, // if true, still need to process window[index-1].
-
- // queued output tokens
- tokens: []token.Token,
- tokens_count: u16,
-
- // deflate state
- length: u32,
- offset: u32,
- hash: u32,
- max_insert_index: usize,
- err: bool,
-
- // hash_match must be able to contain hashes for the maximum match length.
- hash_match: []u32, // [max_match_length - 1]u32,
-
- // dictionary
- dictionary: ?[]const u8,
-
- fn fillDeflate(self: *Self, b: []const u8) u32 {
- if (self.index >= 2 * window_size - (min_match_length + max_match_length)) {
- // shift the window by window_size
- mem.copyForwards(u8, self.window, self.window[window_size .. 2 * window_size]);
- self.index -= window_size;
- self.window_end -= window_size;
- if (self.block_start >= window_size) {
- self.block_start -= window_size;
- } else {
- self.block_start = math.maxInt(u32);
- }
- self.hash_offset += window_size;
- if (self.hash_offset > max_hash_offset) {
- const delta = self.hash_offset - 1;
- self.hash_offset -= delta;
- self.chain_head -|= delta;
-
- // Iterate over slices instead of arrays to avoid copying
- // the entire table onto the stack (https://golang.org/issue/18625).
- for (self.hash_prev, 0..) |v, i| {
- if (v > delta) {
- self.hash_prev[i] = @as(u32, @intCast(v - delta));
- } else {
- self.hash_prev[i] = 0;
- }
- }
- for (self.hash_head, 0..) |v, i| {
- if (v > delta) {
- self.hash_head[i] = @as(u32, @intCast(v - delta));
- } else {
- self.hash_head[i] = 0;
- }
- }
- }
- }
- const n = std.compress.deflate.copy(self.window[self.window_end..], b);
- self.window_end += n;
- return @as(u32, @intCast(n));
- }
-
- fn writeBlock(self: *Self, tokens: []token.Token, index: usize) !void {
- if (index > 0) {
- var window: ?[]u8 = null;
- if (self.block_start <= index) {
- window = self.window[self.block_start..index];
- }
- self.block_start = index;
- try self.hm_bw.writeBlock(tokens, false, window);
- return;
- }
- return;
- }
-
- // fillWindow will fill the current window with the supplied
- // dictionary and calculate all hashes.
- // This is much faster than doing a full encode.
- // Should only be used after a reset.
- fn fillWindow(self: *Self, in_b: []const u8) void {
- var b = in_b;
- // Do not fill window if we are in store-only mode (look at the fill() function to see
- // Compressions which use fillStore() instead of fillDeflate()).
- if (self.compression == .no_compression or
- self.compression == .huffman_only or
- self.compression == .best_speed)
- {
- return;
- }
-
- // fillWindow() must not be called with stale data
- assert(self.index == 0 and self.window_end == 0);
-
- // If we are given too much, cut it.
- if (b.len > window_size) {
- b = b[b.len - window_size ..];
- }
- // Add all to window.
- @memcpy(self.window[0..b.len], b);
- const n = b.len;
-
- // Calculate 256 hashes at the time (more L1 cache hits)
- const loops = (n + 256 - min_match_length) / 256;
- var j: usize = 0;
- while (j < loops) : (j += 1) {
- const index = j * 256;
- var end = index + 256 + min_match_length - 1;
- if (end > n) {
- end = n;
- }
- const to_check = self.window[index..end];
- const dst_size = to_check.len - min_match_length + 1;
-
- if (dst_size <= 0) {
- continue;
- }
-
- const dst = self.hash_match[0..dst_size];
- _ = self.bulk_hasher(to_check, dst);
- var new_h: u32 = 0;
- for (dst, 0..) |val, i| {
- const di = i + index;
- new_h = val;
- const hh = &self.hash_head[new_h & hash_mask];
- // Get previous value with the same hash.
- // Our chain should point to the previous value.
- self.hash_prev[di & window_mask] = hh.*;
- // Set the head of the hash chain to us.
- hh.* = @as(u32, @intCast(di + self.hash_offset));
- }
- self.hash = new_h;
- }
- // Update window information.
- self.window_end = n;
- self.index = @as(u32, @intCast(n));
- }
-
- const Match = struct {
- length: u32,
- offset: u32,
- ok: bool,
- };
-
- // Try to find a match starting at pos whose length is greater than prev_length.
- // We only look at self.compression_level.chain possibilities before giving up.
- fn findMatch(
- self: *Self,
- pos: u32,
- prev_head: u32,
- prev_length: u32,
- lookahead: u32,
- ) Match {
- var length: u32 = 0;
- var offset: u32 = 0;
- var ok: bool = false;
-
- var min_match_look: u32 = max_match_length;
- if (lookahead < min_match_look) {
- min_match_look = lookahead;
- }
-
- var win = self.window[0 .. pos + min_match_look];
-
- // We quit when we get a match that's at least nice long
- var nice = win.len - pos;
- if (self.compression_level.nice < nice) {
- nice = self.compression_level.nice;
- }
-
- // If we've got a match that's good enough, only look in 1/4 the chain.
- var tries = self.compression_level.chain;
- length = prev_length;
- if (length >= self.compression_level.good) {
- tries >>= 2;
- }
-
- var w_end = win[pos + length];
- const w_pos = win[pos..];
- const min_index = pos -| window_size;
-
- var i = prev_head;
- while (tries > 0) : (tries -= 1) {
- if (w_end == win[i + length]) {
- const n = matchLen(win[i..], w_pos, min_match_look);
-
- if (n > length and (n > min_match_length or pos - i <= 4096)) {
- length = n;
- offset = pos - i;
- ok = true;
- if (n >= nice) {
- // The match is good enough that we don't try to find a better one.
- break;
- }
- w_end = win[pos + n];
- }
- }
- if (i == min_index) {
- // hash_prev[i & window_mask] has already been overwritten, so stop now.
- break;
- }
-
- if (@as(u32, @intCast(self.hash_prev[i & window_mask])) < self.hash_offset) {
- break;
- }
-
- i = @as(u32, @intCast(self.hash_prev[i & window_mask])) - self.hash_offset;
- if (i < min_index) {
- break;
- }
- }
-
- return Match{ .length = length, .offset = offset, .ok = ok };
- }
-
- fn writeStoredBlock(self: *Self, buf: []u8) !void {
- try self.hm_bw.writeStoredHeader(buf.len, false);
- try self.hm_bw.writeBytes(buf);
- }
-
- // encSpeed will compress and store the currently added data,
- // if enough has been accumulated or we at the end of the stream.
- fn encSpeed(self: *Self) !void {
- // We only compress if we have max_store_block_size.
- if (self.window_end < max_store_block_size) {
- if (!self.sync) {
- return;
- }
-
- // Handle small sizes.
- if (self.window_end < 128) {
- switch (self.window_end) {
- 0 => return,
- 1...16 => {
- try self.writeStoredBlock(self.window[0..self.window_end]);
- },
- else => {
- try self.hm_bw.writeBlockHuff(false, self.window[0..self.window_end]);
- self.err = self.hm_bw.err;
- },
- }
- self.window_end = 0;
- self.best_speed_enc.reset();
- return;
- }
- }
- // Encode the block.
- self.tokens_count = 0;
- self.best_speed_enc.encode(
- self.tokens,
- &self.tokens_count,
- self.window[0..self.window_end],
- );
-
- // If we removed less than 1/16th, Huffman compress the block.
- if (self.tokens_count > self.window_end - (self.window_end >> 4)) {
- try self.hm_bw.writeBlockHuff(false, self.window[0..self.window_end]);
- } else {
- try self.hm_bw.writeBlockDynamic(
- self.tokens[0..self.tokens_count],
- false,
- self.window[0..self.window_end],
- );
- }
- self.err = self.hm_bw.err;
- self.window_end = 0;
- }
-
- fn initDeflate(self: *Self) !void {
- self.window = try self.allocator.alloc(u8, 2 * window_size);
- self.hash_offset = 1;
- self.tokens = try self.allocator.alloc(token.Token, max_flate_block_tokens);
- self.tokens_count = 0;
- @memset(self.tokens, 0);
- self.length = min_match_length - 1;
- self.offset = 0;
- self.byte_available = false;
- self.index = 0;
- self.hash = 0;
- self.chain_head = 0;
- self.bulk_hasher = bulkHash4;
- }
-
- fn deflate(self: *Self) !void {
- if (self.window_end - self.index < min_match_length + max_match_length and !self.sync) {
- return;
- }
-
- self.max_insert_index = self.window_end -| (min_match_length - 1);
- if (self.index < self.max_insert_index) {
- self.hash = hash4(self.window[self.index .. self.index + min_match_length]);
- }
-
- while (true) {
- assert(self.index <= self.window_end);
-
- const lookahead = self.window_end -| self.index;
- if (lookahead < min_match_length + max_match_length) {
- if (!self.sync) {
- break;
- }
- assert(self.index <= self.window_end);
-
- if (lookahead == 0) {
- // Flush current output block if any.
- if (self.byte_available) {
- // There is still one pending token that needs to be flushed
- self.tokens[self.tokens_count] = token.literalToken(@as(u32, @intCast(self.window[self.index - 1])));
- self.tokens_count += 1;
- self.byte_available = false;
- }
- if (self.tokens.len > 0) {
- try self.writeBlock(self.tokens[0..self.tokens_count], self.index);
- self.tokens_count = 0;
- }
- break;
- }
- }
- if (self.index < self.max_insert_index) {
- // Update the hash
- self.hash = hash4(self.window[self.index .. self.index + min_match_length]);
- const hh = &self.hash_head[self.hash & hash_mask];
- self.chain_head = @as(u32, @intCast(hh.*));
- self.hash_prev[self.index & window_mask] = @as(u32, @intCast(self.chain_head));
- hh.* = @as(u32, @intCast(self.index + self.hash_offset));
- }
- const prev_length = self.length;
- const prev_offset = self.offset;
- self.length = min_match_length - 1;
- self.offset = 0;
- const min_index = self.index -| window_size;
-
- if (self.hash_offset <= self.chain_head and
- self.chain_head - self.hash_offset >= min_index and
- (self.compression_level.fast_skip_hashshing != skip_never and
- lookahead > min_match_length - 1 or
- self.compression_level.fast_skip_hashshing == skip_never and
- lookahead > prev_length and
- prev_length < self.compression_level.lazy))
- {
- {
- const fmatch = self.findMatch(
- self.index,
- self.chain_head -| self.hash_offset,
- min_match_length - 1,
- @as(u32, @intCast(lookahead)),
- );
- if (fmatch.ok) {
- self.length = fmatch.length;
- self.offset = fmatch.offset;
- }
- }
- }
- if (self.compression_level.fast_skip_hashshing != skip_never and
- self.length >= min_match_length or
- self.compression_level.fast_skip_hashshing == skip_never and
- prev_length >= min_match_length and
- self.length <= prev_length)
- {
- // There was a match at the previous step, and the current match is
- // not better. Output the previous match.
- if (self.compression_level.fast_skip_hashshing != skip_never) {
- self.tokens[self.tokens_count] = token.matchToken(@as(u32, @intCast(self.length - base_match_length)), @as(u32, @intCast(self.offset - base_match_offset)));
- self.tokens_count += 1;
- } else {
- self.tokens[self.tokens_count] = token.matchToken(
- @as(u32, @intCast(prev_length - base_match_length)),
- @as(u32, @intCast(prev_offset -| base_match_offset)),
- );
- self.tokens_count += 1;
- }
- // Insert in the hash table all strings up to the end of the match.
- // index and index-1 are already inserted. If there is not enough
- // lookahead, the last two strings are not inserted into the hash
- // table.
- if (self.length <= self.compression_level.fast_skip_hashshing) {
- var newIndex: u32 = 0;
- if (self.compression_level.fast_skip_hashshing != skip_never) {
- newIndex = self.index + self.length;
- } else {
- newIndex = self.index + prev_length - 1;
- }
- var index = self.index;
- index += 1;
- while (index < newIndex) : (index += 1) {
- if (index < self.max_insert_index) {
- self.hash = hash4(self.window[index .. index + min_match_length]);
- // Get previous value with the same hash.
- // Our chain should point to the previous value.
- const hh = &self.hash_head[self.hash & hash_mask];
- self.hash_prev[index & window_mask] = hh.*;
- // Set the head of the hash chain to us.
- hh.* = @as(u32, @intCast(index + self.hash_offset));
- }
- }
- self.index = index;
-
- if (self.compression_level.fast_skip_hashshing == skip_never) {
- self.byte_available = false;
- self.length = min_match_length - 1;
- }
- } else {
- // For matches this long, we don't bother inserting each individual
- // item into the table.
- self.index += self.length;
- if (self.index < self.max_insert_index) {
- self.hash = hash4(self.window[self.index .. self.index + min_match_length]);
- }
- }
- if (self.tokens_count == max_flate_block_tokens) {
- // The block includes the current character
- try self.writeBlock(self.tokens[0..self.tokens_count], self.index);
- self.tokens_count = 0;
- }
- } else {
- if (self.compression_level.fast_skip_hashshing != skip_never or self.byte_available) {
- var i = self.index -| 1;
- if (self.compression_level.fast_skip_hashshing != skip_never) {
- i = self.index;
- }
- self.tokens[self.tokens_count] = token.literalToken(@as(u32, @intCast(self.window[i])));
- self.tokens_count += 1;
- if (self.tokens_count == max_flate_block_tokens) {
- try self.writeBlock(self.tokens[0..self.tokens_count], i + 1);
- self.tokens_count = 0;
- }
- }
- self.index += 1;
- if (self.compression_level.fast_skip_hashshing == skip_never) {
- self.byte_available = true;
- }
- }
- }
- }
-
- fn fillStore(self: *Self, b: []const u8) u32 {
- const n = std.compress.deflate.copy(self.window[self.window_end..], b);
- self.window_end += n;
- return @as(u32, @intCast(n));
- }
-
- fn store(self: *Self) !void {
- if (self.window_end > 0 and (self.window_end == max_store_block_size or self.sync)) {
- try self.writeStoredBlock(self.window[0..self.window_end]);
- self.window_end = 0;
- }
- }
-
- // storeHuff compresses and stores the currently added data
- // when the self.window is full or we are at the end of the stream.
- fn storeHuff(self: *Self) !void {
- if (self.window_end < self.window.len and !self.sync or self.window_end == 0) {
- return;
- }
- try self.hm_bw.writeBlockHuff(false, self.window[0..self.window_end]);
- self.err = self.hm_bw.err;
- self.window_end = 0;
- }
-
- pub fn bytesWritten(self: *Self) usize {
- return self.hm_bw.bytes_written;
- }
-
- /// Writes the compressed form of `input` to the underlying writer.
- pub fn write(self: *Self, input: []const u8) Error!usize {
- var buf = input;
-
- // writes data to hm_bw, which will eventually write the
- // compressed form of data to its underlying writer.
- while (buf.len > 0) {
- try self.step();
- const filled = self.fill(buf);
- buf = buf[filled..];
- }
-
- return input.len;
- }
-
- /// Flushes any pending data to the underlying writer.
- /// It is useful mainly in compressed network protocols, to ensure that
- /// a remote reader has enough data to reconstruct a packet.
- /// Flush does not return until the data has been written.
- /// Calling `flush()` when there is no pending data still causes the Writer
- /// to emit a sync marker of at least 4 bytes.
- /// If the underlying writer returns an error, `flush()` returns that error.
- ///
- /// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
- pub fn flush(self: *Self) Error!void {
- self.sync = true;
- try self.step();
- try self.hm_bw.writeStoredHeader(0, false);
- try self.hm_bw.flush();
- self.sync = false;
- return;
- }
-
- fn step(self: *Self) !void {
- switch (self.compression) {
- .no_compression => return self.store(),
- .huffman_only => return self.storeHuff(),
- .best_speed => return self.encSpeed(),
- .default_compression,
- .level_2,
- .level_3,
- .level_4,
- .level_5,
- .level_6,
- .level_7,
- .level_8,
- .best_compression,
- => return self.deflate(),
- }
- }
-
- fn fill(self: *Self, b: []const u8) u32 {
- switch (self.compression) {
- .no_compression => return self.fillStore(b),
- .huffman_only => return self.fillStore(b),
- .best_speed => return self.fillStore(b),
- .default_compression,
- .level_2,
- .level_3,
- .level_4,
- .level_5,
- .level_6,
- .level_7,
- .level_8,
- .best_compression,
- => return self.fillDeflate(b),
- }
- }
-
- fn init(
- allocator: Allocator,
- in_writer: WriterType,
- options: CompressorOptions,
- ) !Self {
- var s = Self{
- .allocator = undefined,
- .compression = undefined,
- .compression_level = undefined,
- .hm_bw = undefined, // HuffmanBitWriter
- .bulk_hasher = undefined,
- .sync = false,
- .best_speed_enc = undefined, // Best speed encoder
- .chain_head = 0,
- .hash_head = undefined,
- .hash_prev = undefined, // previous hash
- .hash_offset = 0,
- .index = 0,
- .window = undefined,
- .window_end = 0,
- .block_start = 0,
- .byte_available = false,
- .tokens = undefined,
- .tokens_count = 0,
- .length = 0,
- .offset = 0,
- .hash = 0,
- .max_insert_index = 0,
- .err = false, // Error
- .hash_match = undefined,
- .dictionary = options.dictionary,
- };
-
- s.hm_bw = try hm_bw.huffmanBitWriter(allocator, in_writer);
- s.allocator = allocator;
-
- s.hash_head = try allocator.alloc(u32, hash_size);
- s.hash_prev = try allocator.alloc(u32, window_size);
- s.hash_match = try allocator.alloc(u32, max_match_length - 1);
- @memset(s.hash_head, 0);
- @memset(s.hash_prev, 0);
- @memset(s.hash_match, 0);
-
- switch (options.level) {
- .no_compression => {
- s.compression = options.level;
- s.compression_level = levels(options.level);
- s.window = try allocator.alloc(u8, max_store_block_size);
- s.tokens = try allocator.alloc(token.Token, 0);
- },
- .huffman_only => {
- s.compression = options.level;
- s.compression_level = levels(options.level);
- s.window = try allocator.alloc(u8, max_store_block_size);
- s.tokens = try allocator.alloc(token.Token, 0);
- },
- .best_speed => {
- s.compression = options.level;
- s.compression_level = levels(options.level);
- s.window = try allocator.alloc(u8, max_store_block_size);
- s.tokens = try allocator.alloc(token.Token, max_store_block_size);
- s.best_speed_enc = try allocator.create(fast.DeflateFast);
- s.best_speed_enc.* = fast.deflateFast();
- try s.best_speed_enc.init(allocator);
- },
- .default_compression => {
- s.compression = .level_6;
- s.compression_level = levels(.level_6);
- try s.initDeflate();
- if (options.dictionary != null) {
- s.fillWindow(options.dictionary.?);
- }
- },
- .level_2,
- .level_3,
- .level_4,
- .level_5,
- .level_6,
- .level_7,
- .level_8,
- .best_compression,
- => {
- s.compression = options.level;
- s.compression_level = levels(options.level);
- try s.initDeflate();
- if (options.dictionary != null) {
- s.fillWindow(options.dictionary.?);
- }
- },
- }
- return s;
- }
-
- /// Release all allocated memory.
- pub fn deinit(self: *Self) void {
- self.hm_bw.deinit();
- self.allocator.free(self.window);
- self.allocator.free(self.tokens);
- self.allocator.free(self.hash_head);
- self.allocator.free(self.hash_prev);
- self.allocator.free(self.hash_match);
- if (self.compression == .best_speed) {
- self.best_speed_enc.deinit();
- self.allocator.destroy(self.best_speed_enc);
- }
- }
-
- /// Reset discards the inner writer's state and replace the inner writer with new_writer.
- /// new_writer must be of the same type as the previous writer.
- pub fn reset(self: *Self, new_writer: WriterType) void {
- self.hm_bw.reset(new_writer);
- self.sync = false;
- switch (self.compression) {
- // Reset window
- .no_compression => self.window_end = 0,
- // Reset window, tokens, and encoder
- .best_speed => {
- self.window_end = 0;
- self.tokens_count = 0;
- self.best_speed_enc.reset();
- },
- // Reset everything and reinclude the dictionary if there is one
- .huffman_only,
- .default_compression,
- .level_2,
- .level_3,
- .level_4,
- .level_5,
- .level_6,
- .level_7,
- .level_8,
- .best_compression,
- => {
- self.chain_head = 0;
- @memset(self.hash_head, 0);
- @memset(self.hash_prev, 0);
- self.hash_offset = 1;
- self.index = 0;
- self.window_end = 0;
- self.block_start = 0;
- self.byte_available = false;
- self.tokens_count = 0;
- self.length = min_match_length - 1;
- self.offset = 0;
- self.hash = 0;
- self.max_insert_index = 0;
-
- if (self.dictionary != null) {
- self.fillWindow(self.dictionary.?);
- }
- },
- }
- }
-
- /// Writes any pending data to the underlying writer.
- pub fn close(self: *Self) Error!void {
- self.sync = true;
- try self.step();
- try self.hm_bw.writeStoredHeader(0, true);
- try self.hm_bw.flush();
- return;
- }
- };
-}
-
-// tests
-
-const expect = std.testing.expect;
-const testing = std.testing;
-
-const ArrayList = std.ArrayList;
-
-const DeflateTest = struct {
- in: []const u8,
- level: Compression,
- out: []const u8,
-};
-
-var deflate_tests = [_]DeflateTest{
- // Level 0
- .{
- .in = &[_]u8{},
- .level = .no_compression,
- .out = &[_]u8{ 1, 0, 0, 255, 255 },
- },
-
- // Level -1
- .{
- .in = &[_]u8{0x11},
- .level = .default_compression,
- .out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
- },
- .{
- .in = &[_]u8{0x11},
- .level = .level_6,
- .out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
- },
-
- // Level 4
- .{
- .in = &[_]u8{0x11},
- .level = .level_4,
- .out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
- },
-
- // Level 0
- .{
- .in = &[_]u8{0x11},
- .level = .no_compression,
- .out = &[_]u8{ 0, 1, 0, 254, 255, 17, 1, 0, 0, 255, 255 },
- },
- .{
- .in = &[_]u8{ 0x11, 0x12 },
- .level = .no_compression,
- .out = &[_]u8{ 0, 2, 0, 253, 255, 17, 18, 1, 0, 0, 255, 255 },
- },
- .{
- .in = &[_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 },
- .level = .no_compression,
- .out = &[_]u8{ 0, 8, 0, 247, 255, 17, 17, 17, 17, 17, 17, 17, 17, 1, 0, 0, 255, 255 },
- },
-
- // Level 2
- .{
- .in = &[_]u8{},
- .level = .level_2,
- .out = &[_]u8{ 1, 0, 0, 255, 255 },
- },
- .{
- .in = &[_]u8{0x11},
- .level = .level_2,
- .out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
- },
- .{
- .in = &[_]u8{ 0x11, 0x12 },
- .level = .level_2,
- .out = &[_]u8{ 18, 20, 2, 4, 0, 0, 255, 255 },
- },
- .{
- .in = &[_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 },
- .level = .level_2,
- .out = &[_]u8{ 18, 132, 2, 64, 0, 0, 0, 255, 255 },
- },
-
- // Level 9
- .{
- .in = &[_]u8{},
- .level = .best_compression,
- .out = &[_]u8{ 1, 0, 0, 255, 255 },
- },
- .{
- .in = &[_]u8{0x11},
- .level = .best_compression,
- .out = &[_]u8{ 18, 4, 4, 0, 0, 255, 255 },
- },
- .{
- .in = &[_]u8{ 0x11, 0x12 },
- .level = .best_compression,
- .out = &[_]u8{ 18, 20, 2, 4, 0, 0, 255, 255 },
- },
- .{
- .in = &[_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 },
- .level = .best_compression,
- .out = &[_]u8{ 18, 132, 2, 64, 0, 0, 0, 255, 255 },
- },
-};
-
-test "deflate" {
- for (deflate_tests) |dt| {
- var output = ArrayList(u8).init(testing.allocator);
- defer output.deinit();
-
- var comp = try compressor(testing.allocator, output.writer(), .{ .level = dt.level });
- _ = try comp.write(dt.in);
- try comp.close();
- comp.deinit();
-
- try testing.expectEqualSlices(u8, dt.out, output.items);
- }
-}
-
-test "bulkHash4" {
- for (deflate_tests) |x| {
- if (x.out.len < min_match_length) {
- continue;
- }
- // double the test data
- var out = try testing.allocator.alloc(u8, x.out.len * 2);
- defer testing.allocator.free(out);
- @memcpy(out[0..x.out.len], x.out);
- @memcpy(out[x.out.len..], x.out);
-
- var j: usize = 4;
- while (j < out.len) : (j += 1) {
- var y = out[0..j];
-
- const dst = try testing.allocator.alloc(u32, y.len - min_match_length + 1);
- defer testing.allocator.free(dst);
-
- _ = bulkHash4(y, dst);
- for (dst, 0..) |got, i| {
- const want = hash4(y[i..]);
- try testing.expectEqual(want, got);
- }
- }
- }
-}
diff --git a/lib/std/compress/deflate/compressor_test.zig b/lib/std/compress/deflate/compressor_test.zig
deleted file mode 100644
index f7f5b34a9a..0000000000
--- a/lib/std/compress/deflate/compressor_test.zig
+++ /dev/null
@@ -1,531 +0,0 @@
-const std = @import("std");
-const expect = std.testing.expect;
-const fifo = std.fifo;
-const io = std.io;
-const math = std.math;
-const mem = std.mem;
-const testing = std.testing;
-
-const ArrayList = std.ArrayList;
-
-const deflate = @import("compressor.zig");
-const inflate = @import("decompressor.zig");
-
-const compressor = deflate.compressor;
-const decompressor = inflate.decompressor;
-const huffman_only = deflate.huffman_only;
-
-fn testSync(level: deflate.Compression, input: []const u8) !void {
- if (input.len == 0) {
- return;
- }
-
- var divided_buf = fifo
- .LinearFifo(u8, fifo.LinearFifoBufferType.Dynamic)
- .init(testing.allocator);
- defer divided_buf.deinit();
- var whole_buf = std.ArrayList(u8).init(testing.allocator);
- defer whole_buf.deinit();
-
- const multi_writer = io.multiWriter(.{
- divided_buf.writer(),
- whole_buf.writer(),
- }).writer();
-
- var comp = try compressor(
- testing.allocator,
- multi_writer,
- .{ .level = level },
- );
- defer comp.deinit();
-
- {
- var decomp = try decompressor(
- testing.allocator,
- divided_buf.reader(),
- null,
- );
- defer decomp.deinit();
-
- // Write first half of the input and flush()
- const half: usize = (input.len + 1) / 2;
- var half_len: usize = half - 0;
- {
- _ = try comp.writer().writeAll(input[0..half]);
-
- // Flush
- try comp.flush();
-
- // Read back
- const decompressed = try testing.allocator.alloc(u8, half_len);
- defer testing.allocator.free(decompressed);
-
- const read = try decomp.reader().readAll(decompressed); // read at least half
- try testing.expectEqual(half_len, read);
- try testing.expectEqualSlices(u8, input[0..half], decompressed);
- }
-
- // Write last half of the input and close()
- half_len = input.len - half;
- {
- _ = try comp.writer().writeAll(input[half..]);
-
- // Close
- try comp.close();
-
- // Read back
- const decompressed = try testing.allocator.alloc(u8, half_len);
- defer testing.allocator.free(decompressed);
-
- var read = try decomp.reader().readAll(decompressed);
- try testing.expectEqual(half_len, read);
- try testing.expectEqualSlices(u8, input[half..], decompressed);
-
- // Extra read
- var final: [10]u8 = undefined;
- read = try decomp.reader().readAll(&final);
- try testing.expectEqual(@as(usize, 0), read); // expect ended stream to return 0 bytes
-
- try decomp.close();
- }
- }
-
- _ = try comp.writer().writeAll(input);
- try comp.close();
-
- // stream should work for ordinary reader too (reading whole_buf in one go)
- const whole_buf_reader = io.fixedBufferStream(whole_buf.items).reader();
- var decomp = try decompressor(testing.allocator, whole_buf_reader, null);
- defer decomp.deinit();
-
- const decompressed = try testing.allocator.alloc(u8, input.len);
- defer testing.allocator.free(decompressed);
-
- _ = try decomp.reader().readAll(decompressed);
- try decomp.close();
-
- try testing.expectEqualSlices(u8, input, decompressed);
-}
-
-fn testToFromWithLevelAndLimit(level: deflate.Compression, input: []const u8, limit: u32) !void {
- var compressed = std.ArrayList(u8).init(testing.allocator);
- defer compressed.deinit();
-
- var comp = try compressor(testing.allocator, compressed.writer(), .{ .level = level });
- defer comp.deinit();
-
- try comp.writer().writeAll(input);
- try comp.close();
-
- if (limit > 0) {
- try expect(compressed.items.len <= limit);
- }
-
- var fib = io.fixedBufferStream(compressed.items);
- var decomp = try decompressor(testing.allocator, fib.reader(), null);
- defer decomp.deinit();
-
- const decompressed = try testing.allocator.alloc(u8, input.len);
- defer testing.allocator.free(decompressed);
-
- const read: usize = try decomp.reader().readAll(decompressed);
- try testing.expectEqual(input.len, read);
- try testing.expectEqualSlices(u8, input, decompressed);
-
- if (false) {
- // TODO: this test has regressed
- try testSync(level, input);
- }
-}
-
-fn testToFromWithLimit(input: []const u8, limit: [11]u32) !void {
- try testToFromWithLevelAndLimit(.no_compression, input, limit[0]);
- try testToFromWithLevelAndLimit(.best_speed, input, limit[1]);
- try testToFromWithLevelAndLimit(.level_2, input, limit[2]);
- try testToFromWithLevelAndLimit(.level_3, input, limit[3]);
- try testToFromWithLevelAndLimit(.level_4, input, limit[4]);
- try testToFromWithLevelAndLimit(.level_5, input, limit[5]);
- try testToFromWithLevelAndLimit(.level_6, input, limit[6]);
- try testToFromWithLevelAndLimit(.level_7, input, limit[7]);
- try testToFromWithLevelAndLimit(.level_8, input, limit[8]);
- try testToFromWithLevelAndLimit(.best_compression, input, limit[9]);
- try testToFromWithLevelAndLimit(.huffman_only, input, limit[10]);
-}
-
-test "deflate/inflate" {
- const limits = [_]u32{0} ** 11;
-
- var test0 = [_]u8{};
- var test1 = [_]u8{0x11};
- var test2 = [_]u8{ 0x11, 0x12 };
- var test3 = [_]u8{ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 };
- var test4 = [_]u8{ 0x11, 0x10, 0x13, 0x41, 0x21, 0x21, 0x41, 0x13, 0x87, 0x78, 0x13 };
-
- try testToFromWithLimit(&test0, limits);
- try testToFromWithLimit(&test1, limits);
- try testToFromWithLimit(&test2, limits);
- try testToFromWithLimit(&test3, limits);
- try testToFromWithLimit(&test4, limits);
-
- var large_data_chunk = try testing.allocator.alloc(u8, 100_000);
- defer testing.allocator.free(large_data_chunk);
- // fill with random data
- for (large_data_chunk, 0..) |_, i| {
- large_data_chunk[i] = @as(u8, @truncate(i)) *% @as(u8, @truncate(i));
- }
- try testToFromWithLimit(large_data_chunk, limits);
-}
-
-test "very long sparse chunk" {
- // A SparseReader returns a stream consisting of 0s ending with 65,536 (1<<16) 1s.
- // This tests missing hash references in a very large input.
- const SparseReader = struct {
- l: usize, // length
- cur: usize, // current position
-
- const Self = @This();
- const Error = error{};
-
- pub const Reader = io.Reader(*Self, Error, read);
-
- pub fn reader(self: *Self) Reader {
- return .{ .context = self };
- }
-
- fn read(s: *Self, b: []u8) Error!usize {
- var n: usize = 0; // amount read
-
- if (s.cur >= s.l) {
- return 0;
- }
- n = b.len;
- var cur = s.cur + n;
- if (cur > s.l) {
- n -= cur - s.l;
- cur = s.l;
- }
- for (b[0..n], 0..) |_, i| {
- if (s.cur + i >= s.l -| (1 << 16)) {
- b[i] = 1;
- } else {
- b[i] = 0;
- }
- }
- s.cur = cur;
- return n;
- }
- };
-
- var comp = try compressor(
- testing.allocator,
- io.null_writer,
- .{ .level = .best_speed },
- );
- defer comp.deinit();
- var writer = comp.writer();
-
- var sparse = SparseReader{ .l = 0x23e8, .cur = 0 };
- var reader = sparse.reader();
-
- var read: usize = 1;
- var written: usize = 0;
- while (read > 0) {
- var buf: [1 << 15]u8 = undefined; // 32,768 bytes buffer
- read = try reader.read(&buf);
- written += try writer.write(buf[0..read]);
- }
- try testing.expectEqual(@as(usize, 0x23e8), written);
-}
-
-test "compressor reset" {
- for (std.enums.values(deflate.Compression)) |c| {
- try testWriterReset(c, null);
- try testWriterReset(c, "dict");
- try testWriterReset(c, "hello");
- }
-}
-
-fn testWriterReset(level: deflate.Compression, dict: ?[]const u8) !void {
- const filler = struct {
- fn writeData(c: anytype) !void {
- const msg = "all your base are belong to us";
- try c.writer().writeAll(msg);
- try c.flush();
-
- const hello = "hello world";
- var i: usize = 0;
- while (i < 1024) : (i += 1) {
- try c.writer().writeAll(hello);
- }
-
- i = 0;
- while (i < 65000) : (i += 1) {
- try c.writer().writeAll("x");
- }
- }
- };
-
- var buf1 = ArrayList(u8).init(testing.allocator);
- defer buf1.deinit();
- var buf2 = ArrayList(u8).init(testing.allocator);
- defer buf2.deinit();
-
- var comp = try compressor(
- testing.allocator,
- buf1.writer(),
- .{ .level = level, .dictionary = dict },
- );
- defer comp.deinit();
-
- try filler.writeData(&comp);
- try comp.close();
-
- comp.reset(buf2.writer());
- try filler.writeData(&comp);
- try comp.close();
-
- try testing.expectEqualSlices(u8, buf1.items, buf2.items);
-}
-
-test "decompressor dictionary" {
- const dict = "hello world"; // dictionary
- const text = "hello again world";
-
- var compressed = fifo
- .LinearFifo(u8, fifo.LinearFifoBufferType.Dynamic)
- .init(testing.allocator);
- defer compressed.deinit();
-
- var comp = try compressor(
- testing.allocator,
- compressed.writer(),
- .{
- .level = .level_5,
- .dictionary = null, // no dictionary
- },
- );
- defer comp.deinit();
-
- // imitate a compressor with a dictionary
- try comp.writer().writeAll(dict);
- try comp.flush();
- compressed.discard(compressed.readableLength()); // empty the output
- try comp.writer().writeAll(text);
- try comp.close();
-
- const decompressed = try testing.allocator.alloc(u8, text.len);
- defer testing.allocator.free(decompressed);
-
- var decomp = try decompressor(
- testing.allocator,
- compressed.reader(),
- dict,
- );
- defer decomp.deinit();
-
- _ = try decomp.reader().readAll(decompressed);
- try testing.expectEqualSlices(u8, "hello again world", decompressed);
-}
-
-test "compressor dictionary" {
- const dict = "hello world";
- const text = "hello again world";
-
- var compressed_nd = fifo
- .LinearFifo(u8, fifo.LinearFifoBufferType.Dynamic)
- .init(testing.allocator); // compressed with no dictionary
- defer compressed_nd.deinit();
-
- var compressed_d = ArrayList(u8).init(testing.allocator); // compressed with a dictionary
- defer compressed_d.deinit();
-
- // imitate a compressor with a dictionary
- var comp_nd = try compressor(
- testing.allocator,
- compressed_nd.writer(),
- .{
- .level = .level_5,
- .dictionary = null, // no dictionary
- },
- );
- defer comp_nd.deinit();
- try comp_nd.writer().writeAll(dict);
- try comp_nd.flush();
- compressed_nd.discard(compressed_nd.readableLength()); // empty the output
- try comp_nd.writer().writeAll(text);
- try comp_nd.close();
-
- // use a compressor with a dictionary
- var comp_d = try compressor(
- testing.allocator,
- compressed_d.writer(),
- .{
- .level = .level_5,
- .dictionary = dict, // with a dictionary
- },
- );
- defer comp_d.deinit();
- try comp_d.writer().writeAll(text);
- try comp_d.close();
-
- try testing.expectEqualSlices(u8, compressed_d.items, compressed_nd.readableSlice(0));
-}
-
-// Update the hash for best_speed only if d.index < d.maxInsertIndex
-// See https://golang.org/issue/2508
-test "Go non-regression test for 2508" {
- var comp = try compressor(
- testing.allocator,
- io.null_writer,
- .{ .level = .best_speed },
- );
- defer comp.deinit();
-
- var buf = [_]u8{0} ** 1024;
-
- var i: usize = 0;
- while (i < 131_072) : (i += 1) {
- try comp.writer().writeAll(&buf);
- try comp.close();
- }
-}
-
-test "deflate/inflate string" {
- const StringTest = struct {
- filename: []const u8,
- limit: [11]u32,
- };
-
- const deflate_inflate_string_tests = [_]StringTest{
- .{
- .filename = "compress-e.txt",
- .limit = [11]u32{
- 100_018, // no_compression
- 50_650, // best_speed
- 50_960, // 2
- 51_150, // 3
- 50_930, // 4
- 50_790, // 5
- 50_790, // 6
- 50_790, // 7
- 50_790, // 8
- 50_790, // best_compression
- 43_683, // huffman_only
- },
- },
- .{
- .filename = "rfc1951.txt",
- .limit = [11]u32{
- 36_954, // no_compression
- 12_952, // best_speed
- 12_228, // 2
- 12_016, // 3
- 11_466, // 4
- 11_191, // 5
- 11_129, // 6
- 11_120, // 7
- 11_112, // 8
- 11_109, // best_compression
- 20_273, // huffman_only
- },
- },
- };
-
- inline for (deflate_inflate_string_tests) |t| {
- const golden = @embedFile("testdata/" ++ t.filename);
- try testToFromWithLimit(golden, t.limit);
- }
-}
-
-test "inflate reset" {
- const strings = [_][]const u8{
- "lorem ipsum izzle fo rizzle",
- "the quick brown fox jumped over",
- };
-
- var compressed_strings = [_]ArrayList(u8){
- ArrayList(u8).init(testing.allocator),
- ArrayList(u8).init(testing.allocator),
- };
- defer compressed_strings[0].deinit();
- defer compressed_strings[1].deinit();
-
- for (strings, 0..) |s, i| {
- var comp = try compressor(
- testing.allocator,
- compressed_strings[i].writer(),
- .{ .level = .level_6 },
- );
- defer comp.deinit();
-
- try comp.writer().writeAll(s);
- try comp.close();
- }
-
- var fib = io.fixedBufferStream(compressed_strings[0].items);
- var decomp = try decompressor(testing.allocator, fib.reader(), null);
- defer decomp.deinit();
-
- const decompressed_0: []u8 = try decomp.reader()
- .readAllAlloc(testing.allocator, math.maxInt(usize));
- defer testing.allocator.free(decompressed_0);
-
- fib = io.fixedBufferStream(compressed_strings[1].items);
- try decomp.reset(fib.reader(), null);
-
- const decompressed_1: []u8 = try decomp.reader()
- .readAllAlloc(testing.allocator, math.maxInt(usize));
- defer testing.allocator.free(decompressed_1);
-
- try decomp.close();
-
- try testing.expectEqualSlices(u8, strings[0], decompressed_0);
- try testing.expectEqualSlices(u8, strings[1], decompressed_1);
-}
-
-test "inflate reset dictionary" {
- const dict = "the lorem fox";
- const strings = [_][]const u8{
- "lorem ipsum izzle fo rizzle",
- "the quick brown fox jumped over",
- };
-
- var compressed_strings = [_]ArrayList(u8){
- ArrayList(u8).init(testing.allocator),
- ArrayList(u8).init(testing.allocator),
- };
- defer compressed_strings[0].deinit();
- defer compressed_strings[1].deinit();
-
- for (strings, 0..) |s, i| {
- var comp = try compressor(
- testing.allocator,
- compressed_strings[i].writer(),
- .{ .level = .level_6 },
- );
- defer comp.deinit();
-
- try comp.writer().writeAll(s);
- try comp.close();
- }
-
- var fib = io.fixedBufferStream(compressed_strings[0].items);
- var decomp = try decompressor(testing.allocator, fib.reader(), dict);
- defer decomp.deinit();
-
- const decompressed_0: []u8 = try decomp.reader()
- .readAllAlloc(testing.allocator, math.maxInt(usize));
- defer testing.allocator.free(decompressed_0);
-
- fib = io.fixedBufferStream(compressed_strings[1].items);
- try decomp.reset(fib.reader(), dict);
-
- const decompressed_1: []u8 = try decomp.reader()
- .readAllAlloc(testing.allocator, math.maxInt(usize));
- defer testing.allocator.free(decompressed_1);
-
- try decomp.close();
-
- try testing.expectEqualSlices(u8, strings[0], decompressed_0);
- try testing.expectEqualSlices(u8, strings[1], decompressed_1);
-}
diff --git a/lib/std/compress/deflate/decompressor.zig b/lib/std/compress/deflate/decompressor.zig
deleted file mode 100644
index 896f931a66..0000000000
--- a/lib/std/compress/deflate/decompressor.zig
+++ /dev/null
@@ -1,1119 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-const math = std.math;
-const mem = std.mem;
-
-const Allocator = std.mem.Allocator;
-const ArrayList = std.ArrayList;
-
-const bu = @import("bits_utils.zig");
-const ddec = @import("dict_decoder.zig");
-const deflate_const = @import("deflate_const.zig");
-
-const max_match_offset = deflate_const.max_match_offset;
-const end_block_marker = deflate_const.end_block_marker;
-
-const max_code_len = 16; // max length of Huffman code
-// The next three numbers come from the RFC section 3.2.7, with the
-// additional proviso in section 3.2.5 which implies that distance codes
-// 30 and 31 should never occur in compressed data.
-const max_num_lit = 286;
-const max_num_dist = 30;
-const num_codes = 19; // number of codes in Huffman meta-code
-
-var corrupt_input_error_offset: u64 = undefined;
-
-const InflateError = error{
- CorruptInput, // A CorruptInput error reports the presence of corrupt input at a given offset.
- BadInternalState, // An BadInternalState reports an error in the flate code itself.
- BadReaderState, // An error was encountered while accessing the inner reader
- UnexpectedEndOfStream,
- EndOfStreamWithNoError,
-};
-
-// The data structure for decoding Huffman tables is based on that of
-// zlib. There is a lookup table of a fixed bit width (huffman_chunk_bits),
-// For codes smaller than the table width, there are multiple entries
-// (each combination of trailing bits has the same value). For codes
-// larger than the table width, the table contains a link to an overflow
-// table. The width of each entry in the link table is the maximum code
-// size minus the chunk width.
-//
-// Note that you can do a lookup in the table even without all bits
-// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
-// have the property that shorter codes come before longer ones, the
-// bit length estimate in the result is a lower bound on the actual
-// number of bits.
-//
-// See the following:
-// https://github.com/madler/zlib/raw/master/doc/algorithm.txt
-
-// chunk & 15 is number of bits
-// chunk >> 4 is value, including table link
-
-const huffman_chunk_bits = 9;
-const huffman_num_chunks = 1 << huffman_chunk_bits; // 512
-const huffman_count_mask = 15; // 0b1111
-const huffman_value_shift = 4;
-
-const HuffmanDecoder = struct {
- const Self = @This();
-
- allocator: Allocator = undefined,
-
- min: u32 = 0, // the minimum code length
- chunks: [huffman_num_chunks]u16 = [1]u16{0} ** huffman_num_chunks, // chunks as described above
- links: [][]u16 = undefined, // overflow links
- link_mask: u32 = 0, // mask the width of the link table
- initialized: bool = false,
- sub_chunks: ArrayList(u32) = undefined,
-
- // Initialize Huffman decoding tables from array of code lengths.
- // Following this function, self is guaranteed to be initialized into a complete
- // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
- // degenerate case where the tree has only a single symbol with length 1. Empty
- // trees are permitted.
- fn init(self: *Self, allocator: Allocator, lengths: []u32) !bool {
-
- // Sanity enables additional runtime tests during Huffman
- // table construction. It's intended to be used during
- // development and debugging
- const sanity = false;
-
- if (self.min != 0) {
- self.* = HuffmanDecoder{};
- }
-
- self.allocator = allocator;
-
- // Count number of codes of each length,
- // compute min and max length.
- var count: [max_code_len]u32 = [1]u32{0} ** max_code_len;
- var min: u32 = 0;
- var max: u32 = 0;
- for (lengths) |n| {
- if (n == 0) {
- continue;
- }
- if (min == 0) {
- min = n;
- }
- min = @min(n, min);
- max = @max(n, max);
- count[n] += 1;
- }
-
- // Empty tree. The decompressor.huffSym function will fail later if the tree
- // is used. Technically, an empty tree is only valid for the HDIST tree and
- // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
- // is guaranteed to fail since it will attempt to use the tree to decode the
- // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
- // guaranteed to fail later since the compressed data section must be
- // composed of at least one symbol (the end-of-block marker).
- if (max == 0) {
- return true;
- }
-
- var next_code: [max_code_len]u32 = [1]u32{0} ** max_code_len;
- var code: u32 = 0;
- {
- var i = min;
- while (i <= max) : (i += 1) {
- code <<= 1;
- next_code[i] = code;
- code += count[i];
- }
- }
-
- // Check that the coding is complete (i.e., that we've
- // assigned all 2-to-the-max possible bit sequences).
- // Exception: To be compatible with zlib, we also need to
- // accept degenerate single-code codings. See also
- // TestDegenerateHuffmanCoding.
- if (code != @as(u32, 1) << @as(u5, @intCast(max)) and !(code == 1 and max == 1)) {
- return false;
- }
-
- self.min = min;
- if (max > huffman_chunk_bits) {
- const num_links = @as(u32, 1) << @as(u5, @intCast(max - huffman_chunk_bits));
- self.link_mask = @as(u32, @intCast(num_links - 1));
-
- // create link tables
- const link = next_code[huffman_chunk_bits + 1] >> 1;
- self.links = try self.allocator.alloc([]u16, huffman_num_chunks - link);
- self.sub_chunks = ArrayList(u32).init(self.allocator);
- self.initialized = true;
- var j = @as(u32, @intCast(link));
- while (j < huffman_num_chunks) : (j += 1) {
- var reverse = @as(u32, @intCast(bu.bitReverse(u16, @as(u16, @intCast(j)), 16)));
- reverse >>= @as(u32, @intCast(16 - huffman_chunk_bits));
- const off = j - @as(u32, @intCast(link));
- if (sanity) {
- // check we are not overwriting an existing chunk
- assert(self.chunks[reverse] == 0);
- }
- self.chunks[reverse] = @as(u16, @intCast(off << huffman_value_shift | (huffman_chunk_bits + 1)));
- self.links[off] = try self.allocator.alloc(u16, num_links);
- if (sanity) {
- // initialize to a known invalid chunk code (0) to see if we overwrite
- // this value later on
- @memset(self.links[off], 0);
- }
- try self.sub_chunks.append(off);
- }
- }
-
- for (lengths, 0..) |n, li| {
- if (n == 0) {
- continue;
- }
- const ncode = next_code[n];
- next_code[n] += 1;
- const chunk = @as(u16, @intCast((li << huffman_value_shift) | n));
- var reverse = @as(u16, @intCast(bu.bitReverse(u16, @as(u16, @intCast(ncode)), 16)));
- reverse >>= @as(u4, @intCast(16 - n));
- if (n <= huffman_chunk_bits) {
- var off = reverse;
- while (off < self.chunks.len) : (off += @as(u16, 1) << @as(u4, @intCast(n))) {
- // We should never need to overwrite
- // an existing chunk. Also, 0 is
- // never a valid chunk, because the
- // lower 4 "count" bits should be
- // between 1 and 15.
- if (sanity) {
- assert(self.chunks[off] == 0);
- }
- self.chunks[off] = chunk;
- }
- } else {
- const j = reverse & (huffman_num_chunks - 1);
- if (sanity) {
- // Expect an indirect chunk
- assert(self.chunks[j] & huffman_count_mask == huffman_chunk_bits + 1);
- // Longer codes should have been
- // associated with a link table above.
- }
- const value = self.chunks[j] >> huffman_value_shift;
- var link_tab = self.links[value];
- reverse >>= huffman_chunk_bits;
- var off = reverse;
- while (off < link_tab.len) : (off += @as(u16, 1) << @as(u4, @intCast(n - huffman_chunk_bits))) {
- if (sanity) {
- // check we are not overwriting an existing chunk
- assert(link_tab[off] == 0);
- }
- link_tab[off] = @as(u16, @intCast(chunk));
- }
- }
- }
-
- if (sanity) {
- // Above we've sanity checked that we never overwrote
- // an existing entry. Here we additionally check that
- // we filled the tables completely.
- for (self.chunks, 0..) |chunk, i| {
- // As an exception, in the degenerate
- // single-code case, we allow odd
- // chunks to be missing.
- if (code == 1 and i % 2 == 1) {
- continue;
- }
-
- // Assert we are not missing a chunk.
- // All chunks should have been written once
- // thus losing their initial value of 0
- assert(chunk != 0);
- }
-
- if (self.initialized) {
- for (self.links) |link_tab| {
- for (link_tab) |chunk| {
- // Assert we are not missing a chunk.
- assert(chunk != 0);
- }
- }
- }
- }
-
- return true;
- }
-
- /// Release all allocated memory.
- pub fn deinit(self: *Self) void {
- if (self.initialized and self.links.len > 0) {
- for (self.sub_chunks.items) |off| {
- self.allocator.free(self.links[off]);
- }
- self.allocator.free(self.links);
- self.sub_chunks.deinit();
- self.initialized = false;
- }
- }
-};
-
-var fixed_huffman_decoder: ?HuffmanDecoder = null;
-
-fn fixedHuffmanDecoderInit(allocator: Allocator) !HuffmanDecoder {
- if (fixed_huffman_decoder != null) {
- return fixed_huffman_decoder.?;
- }
-
- // These come from the RFC section 3.2.6.
- var bits: [288]u32 = undefined;
- var i: u32 = 0;
- while (i < 144) : (i += 1) {
- bits[i] = 8;
- }
- while (i < 256) : (i += 1) {
- bits[i] = 9;
- }
- while (i < 280) : (i += 1) {
- bits[i] = 7;
- }
- while (i < 288) : (i += 1) {
- bits[i] = 8;
- }
-
- fixed_huffman_decoder = HuffmanDecoder{};
- _ = try fixed_huffman_decoder.?.init(allocator, &bits);
- return fixed_huffman_decoder.?;
-}
-
-const DecompressorState = enum {
- init,
- dict,
-};
-
-/// Returns a new Decompressor that can be used to read the uncompressed version of `reader`.
-/// `dictionary` is optional and initializes the Decompressor with a preset dictionary.
-/// The returned Decompressor behaves as if the uncompressed data stream started with the given
-/// dictionary, which has already been read. Use the same `dictionary` as the compressor used to
-/// compress the data.
-/// This decompressor may use at most 300 KiB of heap memory from the provided allocator.
-/// The uncompressed data will be written into the provided buffer, see `reader()` and `read()`.
-pub fn decompressor(allocator: Allocator, reader: anytype, dictionary: ?[]const u8) !Decompressor(@TypeOf(reader)) {
- return Decompressor(@TypeOf(reader)).init(allocator, reader, dictionary);
-}
-
-pub fn Decompressor(comptime ReaderType: type) type {
- return struct {
- const Self = @This();
-
- pub const Error =
- ReaderType.Error ||
- error{EndOfStream} ||
- InflateError ||
- Allocator.Error;
- pub const Reader = io.Reader(*Self, Error, read);
-
- allocator: Allocator,
-
- // Input source.
- inner_reader: ReaderType,
- roffset: u64,
-
- // Input bits, in top of b.
- b: u32,
- nb: u32,
-
- // Huffman decoders for literal/length, distance.
- hd1: HuffmanDecoder,
- hd2: HuffmanDecoder,
-
- // Length arrays used to define Huffman codes.
- bits: *[max_num_lit + max_num_dist]u32,
- codebits: *[num_codes]u32,
-
- // Output history, buffer.
- dict: ddec.DictDecoder,
-
- // Temporary buffer (avoids repeated allocation).
- buf: [4]u8,
-
- // Next step in the decompression,
- // and decompression state.
- step: *const fn (*Self) Error!void,
- step_state: DecompressorState,
- final: bool,
- err: ?Error,
- to_read: []u8,
- // Huffman states for the lit/length values
- hl: ?*HuffmanDecoder,
- // Huffman states for the distance values.
- hd: ?*HuffmanDecoder,
- copy_len: u32,
- copy_dist: u32,
-
- /// Returns a Reader that reads compressed data from an underlying reader and outputs
- /// uncompressed data.
- pub fn reader(self: *Self) Reader {
- return .{ .context = self };
- }
-
- fn init(allocator: Allocator, in_reader: ReaderType, dict: ?[]const u8) !Self {
- fixed_huffman_decoder = try fixedHuffmanDecoderInit(allocator);
-
- const bits = try allocator.create([max_num_lit + max_num_dist]u32);
- const codebits = try allocator.create([num_codes]u32);
-
- var dd = ddec.DictDecoder{};
- try dd.init(allocator, max_match_offset, dict);
-
- return Self{
- .allocator = allocator,
-
- // Input source.
- .inner_reader = in_reader,
- .roffset = 0,
-
- // Input bits, in top of b.
- .b = 0,
- .nb = 0,
-
- // Huffman decoders for literal/length, distance.
- .hd1 = HuffmanDecoder{},
- .hd2 = HuffmanDecoder{},
-
- // Length arrays used to define Huffman codes.
- .bits = bits,
- .codebits = codebits,
-
- // Output history, buffer.
- .dict = dd,
-
- // Temporary buffer (avoids repeated allocation).
- .buf = [_]u8{0} ** 4,
-
- // Next step in the decompression and decompression state.
- .step = nextBlock,
- .step_state = .init,
- .final = false,
- .err = null,
- .to_read = &[0]u8{},
- .hl = null,
- .hd = null,
- .copy_len = 0,
- .copy_dist = 0,
- };
- }
-
- /// Release all allocated memory.
- pub fn deinit(self: *Self) void {
- self.hd2.deinit();
- self.hd1.deinit();
- self.dict.deinit();
- self.allocator.destroy(self.codebits);
- self.allocator.destroy(self.bits);
- }
-
- fn nextBlock(self: *Self) Error!void {
- while (self.nb < 1 + 2) {
- self.moreBits() catch |e| {
- self.err = e;
- return e;
- };
- }
- self.final = self.b & 1 == 1;
- self.b >>= 1;
- const typ = self.b & 3;
- self.b >>= 2;
- self.nb -= 1 + 2;
- switch (typ) {
- 0 => try self.dataBlock(),
- 1 => {
- // compressed, fixed Huffman tables
- self.hl = &fixed_huffman_decoder.?;
- self.hd = null;
- try self.huffmanBlock();
- },
- 2 => {
- // compressed, dynamic Huffman tables
- self.hd2.deinit();
- self.hd1.deinit();
- try self.readHuffman();
- self.hl = &self.hd1;
- self.hd = &self.hd2;
- try self.huffmanBlock();
- },
- else => {
- // 3 is reserved.
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- },
- }
- }
-
- /// Reads compressed data from the underlying reader and outputs uncompressed data into
- /// `output`.
- pub fn read(self: *Self, output: []u8) Error!usize {
- while (true) {
- if (self.to_read.len > 0) {
- const n = std.compress.deflate.copy(output, self.to_read);
- self.to_read = self.to_read[n..];
- if (self.to_read.len == 0 and
- self.err != null)
- {
- if (self.err.? == InflateError.EndOfStreamWithNoError) {
- return n;
- }
- return self.err.?;
- }
- return n;
- }
- if (self.err != null) {
- if (self.err.? == InflateError.EndOfStreamWithNoError) {
- return 0;
- }
- return self.err.?;
- }
- self.step(self) catch |e| {
- self.err = e;
- if (self.to_read.len == 0) {
- self.to_read = self.dict.readFlush(); // Flush what's left in case of error
- }
- };
- }
- }
-
- pub fn close(self: *Self) Error!void {
- if (self.err) |err| {
- if (err != error.EndOfStreamWithNoError) return err;
- }
- }
-
- // RFC 1951 section 3.2.7.
- // Compression with dynamic Huffman codes
-
- const code_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
-
- fn readHuffman(self: *Self) Error!void {
- // HLIT[5], HDIST[5], HCLEN[4].
- while (self.nb < 5 + 5 + 4) {
- try self.moreBits();
- }
- const nlit = @as(u32, @intCast(self.b & 0x1F)) + 257;
- if (nlit > max_num_lit) {
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- }
- self.b >>= 5;
- const ndist = @as(u32, @intCast(self.b & 0x1F)) + 1;
- if (ndist > max_num_dist) {
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- }
- self.b >>= 5;
- const nclen = @as(u32, @intCast(self.b & 0xF)) + 4;
- // num_codes is 19, so nclen is always valid.
- self.b >>= 4;
- self.nb -= 5 + 5 + 4;
-
- // (HCLEN+4)*3 bits: code lengths in the magic code_order order.
- var i: u32 = 0;
- while (i < nclen) : (i += 1) {
- while (self.nb < 3) {
- try self.moreBits();
- }
- self.codebits[code_order[i]] = @as(u32, @intCast(self.b & 0x7));
- self.b >>= 3;
- self.nb -= 3;
- }
- i = nclen;
- while (i < code_order.len) : (i += 1) {
- self.codebits[code_order[i]] = 0;
- }
- if (!try self.hd1.init(self.allocator, self.codebits[0..])) {
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- }
-
- // HLIT + 257 code lengths, HDIST + 1 code lengths,
- // using the code length Huffman code.
- i = 0;
- const n = nlit + ndist;
- while (i < n) {
- const x = try self.huffSym(&self.hd1);
- if (x < 16) {
- // Actual length.
- self.bits[i] = x;
- i += 1;
- continue;
- }
- // Repeat previous length or zero.
- var rep: u32 = 0;
- var nb: u32 = 0;
- var b: u32 = 0;
- switch (x) {
- 16 => {
- rep = 3;
- nb = 2;
- if (i == 0) {
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- }
- b = self.bits[i - 1];
- },
- 17 => {
- rep = 3;
- nb = 3;
- b = 0;
- },
- 18 => {
- rep = 11;
- nb = 7;
- b = 0;
- },
- else => return error.BadInternalState, // unexpected length code
- }
- while (self.nb < nb) {
- try self.moreBits();
- }
- rep += @as(u32, @intCast(self.b & (@as(u32, 1) << @as(u5, @intCast(nb))) - 1));
- self.b >>= @as(u5, @intCast(nb));
- self.nb -= nb;
- if (i + rep > n) {
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- }
- var j: u32 = 0;
- while (j < rep) : (j += 1) {
- self.bits[i] = b;
- i += 1;
- }
- }
-
- if (!try self.hd1.init(self.allocator, self.bits[0..nlit]) or
- !try self.hd2.init(self.allocator, self.bits[nlit..][0..ndist]))
- {
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- }
-
- // As an optimization, we can initialize the min bits to read at a time
- // for the HLIT tree to the length of the EOB marker since we know that
- // every block must terminate with one. This preserves the property that
- // we never read any extra bytes after the end of the DEFLATE stream.
- if (self.hd1.min < self.bits[end_block_marker]) {
- self.hd1.min = self.bits[end_block_marker];
- }
-
- return;
- }
-
- // Decode a single Huffman block.
- // hl and hd are the Huffman states for the lit/length values
- // and the distance values, respectively. If hd == null, using the
- // fixed distance encoding associated with fixed Huffman blocks.
- fn huffmanBlock(self: *Self) Error!void {
- while (true) {
- switch (self.step_state) {
- .init => {
- // Read literal and/or (length, distance) according to RFC section 3.2.3.
- const v = try self.huffSym(self.hl.?);
- var n: u32 = 0; // number of bits extra
- var length: u32 = 0;
- switch (v) {
- 0...255 => {
- self.dict.writeByte(@as(u8, @intCast(v)));
- if (self.dict.availWrite() == 0) {
- self.to_read = self.dict.readFlush();
- self.step = huffmanBlock;
- self.step_state = .init;
- return;
- }
- self.step_state = .init;
- continue;
- },
- 256 => {
- self.finishBlock();
- return;
- },
- // otherwise, reference to older data
- 257...264 => {
- length = v - (257 - 3);
- n = 0;
- },
- 265...268 => {
- length = v * 2 - (265 * 2 - 11);
- n = 1;
- },
- 269...272 => {
- length = v * 4 - (269 * 4 - 19);
- n = 2;
- },
- 273...276 => {
- length = v * 8 - (273 * 8 - 35);
- n = 3;
- },
- 277...280 => {
- length = v * 16 - (277 * 16 - 67);
- n = 4;
- },
- 281...284 => {
- length = v * 32 - (281 * 32 - 131);
- n = 5;
- },
- max_num_lit - 1 => { // 285
- length = 258;
- n = 0;
- },
- else => {
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- },
- }
- if (n > 0) {
- while (self.nb < n) {
- try self.moreBits();
- }
- length += @as(u32, @intCast(self.b)) & ((@as(u32, 1) << @as(u5, @intCast(n))) - 1);
- self.b >>= @as(u5, @intCast(n));
- self.nb -= n;
- }
-
- var dist: u32 = 0;
- if (self.hd == null) {
- while (self.nb < 5) {
- try self.moreBits();
- }
- dist = @as(
- u32,
- @intCast(bu.bitReverse(u8, @as(u8, @intCast((self.b & 0x1F) << 3)), 8)),
- );
- self.b >>= 5;
- self.nb -= 5;
- } else {
- dist = try self.huffSym(self.hd.?);
- }
-
- switch (dist) {
- 0...3 => dist += 1,
- 4...max_num_dist - 1 => { // 4...29
- const nb = @as(u32, @intCast(dist - 2)) >> 1;
- // have 1 bit in bottom of dist, need nb more.
- var extra = (dist & 1) << @as(u5, @intCast(nb));
- while (self.nb < nb) {
- try self.moreBits();
- }
- extra |= @as(u32, @intCast(self.b & (@as(u32, 1) << @as(u5, @intCast(nb))) - 1));
- self.b >>= @as(u5, @intCast(nb));
- self.nb -= nb;
- dist = (@as(u32, 1) << @as(u5, @intCast(nb + 1))) + 1 + extra;
- },
- else => {
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- },
- }
-
- // No check on length; encoding can be prescient.
- if (dist > self.dict.histSize()) {
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- }
-
- self.copy_len = length;
- self.copy_dist = dist;
- self.step_state = .dict;
- },
-
- .dict => {
- // Perform a backwards copy according to RFC section 3.2.3.
- var cnt = self.dict.tryWriteCopy(self.copy_dist, self.copy_len);
- if (cnt == 0) {
- cnt = self.dict.writeCopy(self.copy_dist, self.copy_len);
- }
- self.copy_len -= cnt;
-
- if (self.dict.availWrite() == 0 or self.copy_len > 0) {
- self.to_read = self.dict.readFlush();
- self.step = huffmanBlock; // We need to continue this work
- self.step_state = .dict;
- return;
- }
- self.step_state = .init;
- },
- }
- }
- }
-
- // Copy a single uncompressed data block from input to output.
- fn dataBlock(self: *Self) Error!void {
- // Uncompressed.
- // Discard current half-byte.
- self.nb = 0;
- self.b = 0;
-
- // Length then ones-complement of length.
- const nr: u32 = 4;
- self.inner_reader.readNoEof(self.buf[0..nr]) catch {
- self.err = InflateError.UnexpectedEndOfStream;
- return InflateError.UnexpectedEndOfStream;
- };
- self.roffset += @as(u64, @intCast(nr));
- const n = @as(u32, @intCast(self.buf[0])) | @as(u32, @intCast(self.buf[1])) << 8;
- const nn = @as(u32, @intCast(self.buf[2])) | @as(u32, @intCast(self.buf[3])) << 8;
- if (@as(u16, @intCast(nn)) != @as(u16, @truncate(~n))) {
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- }
-
- if (n == 0) {
- self.to_read = self.dict.readFlush();
- self.finishBlock();
- return;
- }
-
- self.copy_len = n;
- try self.copyData();
- }
-
- // copyData copies self.copy_len bytes from the underlying reader into self.hist.
- // It pauses for reads when self.hist is full.
- fn copyData(self: *Self) Error!void {
- var buf = self.dict.writeSlice();
- if (buf.len > self.copy_len) {
- buf = buf[0..self.copy_len];
- }
-
- const cnt = try self.inner_reader.read(buf);
- if (cnt < buf.len) {
- self.err = InflateError.UnexpectedEndOfStream;
- }
- self.roffset += @as(u64, @intCast(cnt));
- self.copy_len -= @as(u32, @intCast(cnt));
- self.dict.writeMark(@as(u32, @intCast(cnt)));
- if (self.err != null) {
- return InflateError.UnexpectedEndOfStream;
- }
-
- if (self.dict.availWrite() == 0 or self.copy_len > 0) {
- self.to_read = self.dict.readFlush();
- self.step = copyData;
- return;
- }
- self.finishBlock();
- }
-
- fn finishBlock(self: *Self) void {
- if (self.final) {
- if (self.dict.availRead() > 0) {
- self.to_read = self.dict.readFlush();
- }
- self.err = InflateError.EndOfStreamWithNoError;
- }
- self.step = nextBlock;
- }
-
- fn moreBits(self: *Self) InflateError!void {
- const c = self.inner_reader.readByte() catch |e| {
- if (e == error.EndOfStream) {
- return InflateError.UnexpectedEndOfStream;
- }
- return InflateError.BadReaderState;
- };
- self.roffset += 1;
- self.b |= @as(u32, c) << @as(u5, @intCast(self.nb));
- self.nb += 8;
- return;
- }
-
- // Read the next Huffman-encoded symbol according to h.
- fn huffSym(self: *Self, h: *HuffmanDecoder) InflateError!u32 {
- // Since a HuffmanDecoder can be empty or be composed of a degenerate tree
- // with single element, huffSym must error on these two edge cases. In both
- // cases, the chunks slice will be 0 for the invalid sequence, leading it
- // satisfy the n == 0 check below.
- var n: u32 = h.min;
- // Optimization. Go compiler isn't smart enough to keep self.b, self.nb in registers,
- // but is smart enough to keep local variables in registers, so use nb and b,
- // inline call to moreBits and reassign b, nb back to self on return.
- var nb = self.nb;
- var b = self.b;
- while (true) {
- while (nb < n) {
- const c = self.inner_reader.readByte() catch |e| {
- self.b = b;
- self.nb = nb;
- if (e == error.EndOfStream) {
- return error.UnexpectedEndOfStream;
- }
- return InflateError.BadReaderState;
- };
- self.roffset += 1;
- b |= @as(u32, @intCast(c)) << @as(u5, @intCast(nb & 31));
- nb += 8;
- }
- var chunk = h.chunks[b & (huffman_num_chunks - 1)];
- n = @as(u32, @intCast(chunk & huffman_count_mask));
- if (n > huffman_chunk_bits) {
- chunk = h.links[chunk >> huffman_value_shift][(b >> huffman_chunk_bits) & h.link_mask];
- n = @as(u32, @intCast(chunk & huffman_count_mask));
- }
- if (n <= nb) {
- if (n == 0) {
- self.b = b;
- self.nb = nb;
- corrupt_input_error_offset = self.roffset;
- self.err = InflateError.CorruptInput;
- return InflateError.CorruptInput;
- }
- self.b = b >> @as(u5, @intCast(n & 31));
- self.nb = nb - n;
- return @as(u32, @intCast(chunk >> huffman_value_shift));
- }
- }
- }
-
- /// Replaces the inner reader and dictionary with new_reader and new_dict.
- /// new_reader must be of the same type as the reader being replaced.
- pub fn reset(s: *Self, new_reader: ReaderType, new_dict: ?[]const u8) Error!void {
- s.inner_reader = new_reader;
- s.step = nextBlock;
- s.err = null;
- s.nb = 0;
-
- s.dict.deinit();
- try s.dict.init(s.allocator, max_match_offset, new_dict);
-
- return;
- }
- };
-}
-
-// tests
-const expectError = std.testing.expectError;
-const io = std.io;
-const testing = std.testing;
-
-test "confirm decompressor resets" {
- var compressed = std.ArrayList(u8).init(std.testing.allocator);
- defer compressed.deinit();
-
- inline for (.{
- &[_]u8{ 0x5d, 0xc0, 0x21, 0x01, 0x00, 0x00, 0x00, 0x80, 0x20, 0xff, 0xaf, 0xa6, 0x4b, 0x03 },
- &[_]u8{ 0x55, 0xc1, 0x41, 0x0d, 0x00, 0x00, 0x00, 0x02, 0xa1, 0x94, 0x96, 0x34, 0x25, 0xef, 0x1b, 0x5f, 0x01 },
- }) |data| {
- try compressed.writer().writeAll(data);
- }
-
- var stream = std.io.fixedBufferStream(compressed.items);
- var decomp = try decompressor(std.testing.allocator, stream.reader(), null);
- defer decomp.deinit();
-
- while (true) {
- if (try stream.getPos() == try stream.getEndPos()) break;
-
- const buf = try decomp.reader().readAllAlloc(std.testing.allocator, 1024 * 100);
- defer std.testing.allocator.free(buf);
-
- try decomp.close();
-
- try decomp.reset(stream.reader(), null);
- }
-}
-
-test "truncated input" {
- const TruncatedTest = struct {
- input: []const u8,
- output: []const u8,
- };
-
- const tests = [_]TruncatedTest{
- .{ .input = "\x00", .output = "" },
- .{ .input = "\x00\x0c", .output = "" },
- .{ .input = "\x00\x0c\x00", .output = "" },
- .{ .input = "\x00\x0c\x00\xf3\xff", .output = "" },
- .{ .input = "\x00\x0c\x00\xf3\xffhello", .output = "hello" },
- .{ .input = "\x00\x0c\x00\xf3\xffhello, world", .output = "hello, world" },
- .{ .input = "\x02", .output = "" },
- .{ .input = "\xf2H\xcd", .output = "He" },
- .{ .input = "\xf2H͙0a\u{0084}\t", .output = "Hel\x90\x90\x90\x90\x90" },
- .{ .input = "\xf2H͙0a\u{0084}\t\x00", .output = "Hel\x90\x90\x90\x90\x90" },
- };
-
- for (tests) |t| {
- var fib = io.fixedBufferStream(t.input);
- const r = fib.reader();
- var z = try decompressor(testing.allocator, r, null);
- defer z.deinit();
- var zr = z.reader();
-
- var output = [1]u8{0} ** 12;
- try expectError(error.UnexpectedEndOfStream, zr.readAll(&output));
- try testing.expectEqualSlices(u8, t.output, output[0..t.output.len]);
- }
-}
-
-test "Go non-regression test for 9842" {
- // See https://golang.org/issue/9842
-
- const Test = struct {
- err: ?anyerror,
- input: []const u8,
- };
-
- const tests = [_]Test{
- .{ .err = error.UnexpectedEndOfStream, .input = ("\x95\x90=o\xc20\x10\x86\xf30") },
- .{ .err = error.CorruptInput, .input = ("\x950\x00\x0000000") },
-
- // Huffman.construct errors
-
- // lencode
- .{ .err = error.CorruptInput, .input = ("\x950000") },
- .{ .err = error.CorruptInput, .input = ("\x05000") },
- // hlen
- .{ .err = error.CorruptInput, .input = ("\x05\xea\x01\t\x00\x00\x00\x01\x00\\\xbf.\t\x00") },
- // hdist
- .{ .err = error.CorruptInput, .input = ("\x05\xe0\x01A\x00\x00\x00\x00\x10\\\xbf.") },
-
- // like the "empty distance alphabet" test but for ndist instead of nlen
- .{ .err = error.CorruptInput, .input = ("\x05\xe0\x01\t\x00\x00\x00\x00\x10\\\xbf\xce") },
- .{ .err = null, .input = "\x15\xe0\x01\t\x00\x00\x00\x00\x10\\\xbf.0" },
- };
-
- for (tests) |t| {
- var fib = std.io.fixedBufferStream(t.input);
- const reader = fib.reader();
- var decomp = try decompressor(testing.allocator, reader, null);
- defer decomp.deinit();
-
- var output: [10]u8 = undefined;
- if (t.err != null) {
- try expectError(t.err.?, decomp.reader().read(&output));
- } else {
- _ = try decomp.reader().read(&output);
- }
- }
-}
-
-test "inflate A Tale of Two Cities (1859) intro" {
- const compressed = [_]u8{
- 0x74, 0xeb, 0xcd, 0x0d, 0x80, 0x20, 0x0c, 0x47, 0x71, 0xdc, 0x9d, 0xa2, 0x03, 0xb8, 0x88,
- 0x63, 0xf0, 0xf1, 0x47, 0x9a, 0x00, 0x35, 0xb4, 0x86, 0xf5, 0x0d, 0x27, 0x63, 0x82, 0xe7,
- 0xdf, 0x7b, 0x87, 0xd1, 0x70, 0x4a, 0x96, 0x41, 0x1e, 0x6a, 0x24, 0x89, 0x8c, 0x2b, 0x74,
- 0xdf, 0xf8, 0x95, 0x21, 0xfd, 0x8f, 0xdc, 0x89, 0x09, 0x83, 0x35, 0x4a, 0x5d, 0x49, 0x12,
- 0x29, 0xac, 0xb9, 0x41, 0xbf, 0x23, 0x2e, 0x09, 0x79, 0x06, 0x1e, 0x85, 0x91, 0xd6, 0xc6,
- 0x2d, 0x74, 0xc4, 0xfb, 0xa1, 0x7b, 0x0f, 0x52, 0x20, 0x84, 0x61, 0x28, 0x0c, 0x63, 0xdf,
- 0x53, 0xf4, 0x00, 0x1e, 0xc3, 0xa5, 0x97, 0x88, 0xf4, 0xd9, 0x04, 0xa5, 0x2d, 0x49, 0x54,
- 0xbc, 0xfd, 0x90, 0xa5, 0x0c, 0xae, 0xbf, 0x3f, 0x84, 0x77, 0x88, 0x3f, 0xaf, 0xc0, 0x40,
- 0xd6, 0x5b, 0x14, 0x8b, 0x54, 0xf6, 0x0f, 0x9b, 0x49, 0xf7, 0xbf, 0xbf, 0x36, 0x54, 0x5a,
- 0x0d, 0xe6, 0x3e, 0xf0, 0x9e, 0x29, 0xcd, 0xa1, 0x41, 0x05, 0x36, 0x48, 0x74, 0x4a, 0xe9,
- 0x46, 0x66, 0x2a, 0x19, 0x17, 0xf4, 0x71, 0x8e, 0xcb, 0x15, 0x5b, 0x57, 0xe4, 0xf3, 0xc7,
- 0xe7, 0x1e, 0x9d, 0x50, 0x08, 0xc3, 0x50, 0x18, 0xc6, 0x2a, 0x19, 0xa0, 0xdd, 0xc3, 0x35,
- 0x82, 0x3d, 0x6a, 0xb0, 0x34, 0x92, 0x16, 0x8b, 0xdb, 0x1b, 0xeb, 0x7d, 0xbc, 0xf8, 0x16,
- 0xf8, 0xc2, 0xe1, 0xaf, 0x81, 0x7e, 0x58, 0xf4, 0x9f, 0x74, 0xf8, 0xcd, 0x39, 0xd3, 0xaa,
- 0x0f, 0x26, 0x31, 0xcc, 0x8d, 0x9a, 0xd2, 0x04, 0x3e, 0x51, 0xbe, 0x7e, 0xbc, 0xc5, 0x27,
- 0x3d, 0xa5, 0xf3, 0x15, 0x63, 0x94, 0x42, 0x75, 0x53, 0x6b, 0x61, 0xc8, 0x01, 0x13, 0x4d,
- 0x23, 0xba, 0x2a, 0x2d, 0x6c, 0x94, 0x65, 0xc7, 0x4b, 0x86, 0x9b, 0x25, 0x3e, 0xba, 0x01,
- 0x10, 0x84, 0x81, 0x28, 0x80, 0x55, 0x1c, 0xc0, 0xa5, 0xaa, 0x36, 0xa6, 0x09, 0xa8, 0xa1,
- 0x85, 0xf9, 0x7d, 0x45, 0xbf, 0x80, 0xe4, 0xd1, 0xbb, 0xde, 0xb9, 0x5e, 0xf1, 0x23, 0x89,
- 0x4b, 0x00, 0xd5, 0x59, 0x84, 0x85, 0xe3, 0xd4, 0xdc, 0xb2, 0x66, 0xe9, 0xc1, 0x44, 0x0b,
- 0x1e, 0x84, 0xec, 0xe6, 0xa1, 0xc7, 0x42, 0x6a, 0x09, 0x6d, 0x9a, 0x5e, 0x70, 0xa2, 0x36,
- 0x94, 0x29, 0x2c, 0x85, 0x3f, 0x24, 0x39, 0xf3, 0xae, 0xc3, 0xca, 0xca, 0xaf, 0x2f, 0xce,
- 0x8e, 0x58, 0x91, 0x00, 0x25, 0xb5, 0xb3, 0xe9, 0xd4, 0xda, 0xef, 0xfa, 0x48, 0x7b, 0x3b,
- 0xe2, 0x63, 0x12, 0x00, 0x00, 0x20, 0x04, 0x80, 0x70, 0x36, 0x8c, 0xbd, 0x04, 0x71, 0xff,
- 0xf6, 0x0f, 0x66, 0x38, 0xcf, 0xa1, 0x39, 0x11, 0x0f,
- };
-
- const expected =
- \\It was the best of times,
- \\it was the worst of times,
- \\it was the age of wisdom,
- \\it was the age of foolishness,
- \\it was the epoch of belief,
- \\it was the epoch of incredulity,
- \\it was the season of Light,
- \\it was the season of Darkness,
- \\it was the spring of hope,
- \\it was the winter of despair,
- \\
- \\we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way---in short, the period was so far like the present period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only.
- \\
- ;
-
- var fib = std.io.fixedBufferStream(&compressed);
- const reader = fib.reader();
- var decomp = try decompressor(testing.allocator, reader, null);
- defer decomp.deinit();
-
- var got: [700]u8 = undefined;
- const got_len = try decomp.reader().read(&got);
- try testing.expectEqual(@as(usize, 616), got_len);
- try testing.expectEqualSlices(u8, expected, got[0..expected.len]);
-}
-
-test "lengths overflow" {
- // malformed final dynamic block, tries to write 321 code lengths (MAXCODES is 316)
- // f dy hlit hdist hclen 16 17 18 0 (18) x138 (18) x138 (18) x39 (16) x6
- // 1 10 11101 11101 0000 010 010 010 010 (11) 1111111 (11) 1111111 (11) 0011100 (01) 11
- const stream = [_]u8{
- 0b11101101, 0b00011101, 0b00100100, 0b11101001, 0b11111111, 0b11111111, 0b00111001,
- 0b00001110,
- };
- try expectError(error.CorruptInput, decompress(stream[0..]));
-}
-
-test "empty distance alphabet" {
- // dynamic block with empty distance alphabet is valid if only literals and end of data symbol are used
- // f dy hlit hdist hclen 16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15 (18) x128 (18) x128 (1) ( 0) (256)
- // 1 10 00000 00000 1111 000 000 010 010 000 000 000 000 000 000 000 000 000 000 000 000 000 001 000 (11) 1110101 (11) 1110101 (0) (10) (0)
- const stream = [_]u8{
- 0b00000101, 0b11100000, 0b00000001, 0b00001001, 0b00000000, 0b00000000,
- 0b00000000, 0b00000000, 0b00010000, 0b01011100, 0b10111111, 0b00101110,
- };
- try decompress(stream[0..]);
-}
-
-test "distance past beginning of output stream" {
- // f fx ('A') ('B') ('C') <len=4, dist=4> (end)
- // 1 01 (01110001) (01110010) (01110011) (0000010) (00011) (0000000)
- const stream = [_]u8{ 0b01110011, 0b01110100, 0b01110010, 0b00000110, 0b01100001, 0b00000000 };
- try std.testing.expectError(error.CorruptInput, decompress(stream[0..]));
-}
-
-test "fuzzing" {
- const compressed = [_]u8{
- 0x0a, 0x08, 0x50, 0xeb, 0x25, 0x05, 0xfc, 0x30, 0x0b, 0x0a, 0x08, 0x50, 0xeb, 0x25, 0x05,
- } ++ [_]u8{0xe1} ** 15 ++ [_]u8{0x30} ++ [_]u8{0xe1} ** 1481;
- try expectError(error.UnexpectedEndOfStream, decompress(&compressed));
-
- // see https://github.com/ziglang/zig/issues/9842
- try expectError(error.UnexpectedEndOfStream, decompress("\x95\x90=o\xc20\x10\x86\xf30"));
- try expectError(error.CorruptInput, decompress("\x950\x00\x0000000"));
-
- // Huffman errors
- // lencode
- try expectError(error.CorruptInput, decompress("\x950000"));
- try expectError(error.CorruptInput, decompress("\x05000"));
- // hlen
- try expectError(error.CorruptInput, decompress("\x05\xea\x01\t\x00\x00\x00\x01\x00\\\xbf.\t\x00"));
- // hdist
- try expectError(error.CorruptInput, decompress("\x05\xe0\x01A\x00\x00\x00\x00\x10\\\xbf."));
-
- // like the "empty distance alphabet" test but for ndist instead of nlen
- try expectError(error.CorruptInput, decompress("\x05\xe0\x01\t\x00\x00\x00\x00\x10\\\xbf\xce"));
- try decompress("\x15\xe0\x01\t\x00\x00\x00\x00\x10\\\xbf.0");
-}
-
-fn decompress(input: []const u8) !void {
- const allocator = testing.allocator;
- var fib = std.io.fixedBufferStream(input);
- const reader = fib.reader();
- var decomp = try decompressor(allocator, reader, null);
- defer decomp.deinit();
- const output = try decomp.reader().readAllAlloc(allocator, math.maxInt(usize));
- defer std.testing.allocator.free(output);
-}
diff --git a/lib/std/compress/deflate/deflate_const.zig b/lib/std/compress/deflate/deflate_const.zig
deleted file mode 100644
index bf328b562c..0000000000
--- a/lib/std/compress/deflate/deflate_const.zig
+++ /dev/null
@@ -1,28 +0,0 @@
-// Deflate
-
-// Biggest block size for uncompressed block.
-pub const max_store_block_size = 65535;
-// The special code used to mark the end of a block.
-pub const end_block_marker = 256;
-
-// LZ77
-
-// The smallest match length per the RFC section 3.2.5
-pub const base_match_length = 3;
-// The smallest match offset.
-pub const base_match_offset = 1;
-// The largest match length.
-pub const max_match_length = 258;
-// The largest match offset.
-pub const max_match_offset = 1 << 15;
-
-// Huffman Codes
-
-// The largest offset code.
-pub const offset_code_count = 30;
-// Max number of frequencies used for a Huffman Code
-// Possible lengths are codegenCodeCount (19), offset_code_count (30) and max_num_lit (286).
-// The largest of these is max_num_lit.
-pub const max_num_frequencies = max_num_lit;
-// Maximum number of literals.
-pub const max_num_lit = 286;
diff --git a/lib/std/compress/deflate/deflate_fast.zig b/lib/std/compress/deflate/deflate_fast.zig
deleted file mode 100644
index 3a2668762e..0000000000
--- a/lib/std/compress/deflate/deflate_fast.zig
+++ /dev/null
@@ -1,720 +0,0 @@
-// This encoding algorithm, which prioritizes speed over output size, is
-// based on Snappy's LZ77-style encoder: github.com/golang/snappy
-
-const std = @import("std");
-const math = std.math;
-const mem = std.mem;
-
-const Allocator = std.mem.Allocator;
-
-const deflate_const = @import("deflate_const.zig");
-const deflate = @import("compressor.zig");
-const token = @import("token.zig");
-
-const base_match_length = deflate_const.base_match_length;
-const base_match_offset = deflate_const.base_match_offset;
-const max_match_length = deflate_const.max_match_length;
-const max_match_offset = deflate_const.max_match_offset;
-const max_store_block_size = deflate_const.max_store_block_size;
-
-const table_bits = 14; // Bits used in the table.
-const table_mask = table_size - 1; // Mask for table indices. Redundant, but can eliminate bounds checks.
-const table_shift = 32 - table_bits; // Right-shift to get the table_bits most significant bits of a uint32.
-const table_size = 1 << table_bits; // Size of the table.
-
-// Reset the buffer offset when reaching this.
-// Offsets are stored between blocks as i32 values.
-// Since the offset we are checking against is at the beginning
-// of the buffer, we need to subtract the current and input
-// buffer to not risk overflowing the i32.
-const buffer_reset = math.maxInt(i32) - max_store_block_size * 2;
-
-fn load32(b: []u8, i: i32) u32 {
- const s = b[@as(usize, @intCast(i)) .. @as(usize, @intCast(i)) + 4];
- return @as(u32, @intCast(s[0])) |
- @as(u32, @intCast(s[1])) << 8 |
- @as(u32, @intCast(s[2])) << 16 |
- @as(u32, @intCast(s[3])) << 24;
-}
-
-fn load64(b: []u8, i: i32) u64 {
- const s = b[@as(usize, @intCast(i))..@as(usize, @intCast(i + 8))];
- return @as(u64, @intCast(s[0])) |
- @as(u64, @intCast(s[1])) << 8 |
- @as(u64, @intCast(s[2])) << 16 |
- @as(u64, @intCast(s[3])) << 24 |
- @as(u64, @intCast(s[4])) << 32 |
- @as(u64, @intCast(s[5])) << 40 |
- @as(u64, @intCast(s[6])) << 48 |
- @as(u64, @intCast(s[7])) << 56;
-}
-
-fn hash(u: u32) u32 {
- return (u *% 0x1e35a7bd) >> table_shift;
-}
-
-// These constants are defined by the Snappy implementation so that its
-// assembly implementation can fast-path some 16-bytes-at-a-time copies.
-// They aren't necessary in the pure Go implementation, and may not be
-// necessary in Zig, but using the same thresholds doesn't really hurt.
-const input_margin = 16 - 1;
-const min_non_literal_block_size = 1 + 1 + input_margin;
-
-const TableEntry = struct {
- val: u32, // Value at destination
- offset: i32,
-};
-
-pub fn deflateFast() DeflateFast {
- return DeflateFast{
- .table = [_]TableEntry{.{ .val = 0, .offset = 0 }} ** table_size,
- .prev = undefined,
- .prev_len = 0,
- .cur = max_store_block_size,
- .allocator = undefined,
- };
-}
-
-// DeflateFast maintains the table for matches,
-// and the previous byte block for cross block matching.
-pub const DeflateFast = struct {
- table: [table_size]TableEntry,
- prev: []u8, // Previous block, zero length if unknown.
- prev_len: u32, // Previous block length
- cur: i32, // Current match offset.
- allocator: Allocator,
-
- const Self = @This();
-
- pub fn init(self: *Self, allocator: Allocator) !void {
- self.allocator = allocator;
- self.prev = try allocator.alloc(u8, max_store_block_size);
- self.prev_len = 0;
- }
-
- pub fn deinit(self: *Self) void {
- self.allocator.free(self.prev);
- self.prev_len = 0;
- }
-
- // Encodes a block given in `src` and appends tokens to `dst` and returns the result.
- pub fn encode(self: *Self, dst: []token.Token, tokens_count: *u16, src: []u8) void {
-
- // Ensure that self.cur doesn't wrap.
- if (self.cur >= buffer_reset) {
- self.shiftOffsets();
- }
-
- // This check isn't in the Snappy implementation, but there, the caller
- // instead of the callee handles this case.
- if (src.len < min_non_literal_block_size) {
- self.cur += max_store_block_size;
- self.prev_len = 0;
- emitLiteral(dst, tokens_count, src);
- return;
- }
-
- // s_limit is when to stop looking for offset/length copies. The input_margin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- const s_limit = @as(i32, @intCast(src.len - input_margin));
-
- // next_emit is where in src the next emitLiteral should start from.
- var next_emit: i32 = 0;
- var s: i32 = 0;
- var cv: u32 = load32(src, s);
- var next_hash: u32 = hash(cv);
-
- outer: while (true) {
- // Copied from the C++ snappy implementation:
- //
- // Heuristic match skipping: If 32 bytes are scanned with no matches
- // found, start looking only at every other byte. If 32 more bytes are
- // scanned (or skipped), look at every third byte, etc.. When a match
- // is found, immediately go back to looking at every byte. This is a
- // small loss (~5% performance, ~0.1% density) for compressible data
- // due to more bookkeeping, but for non-compressible data (such as
- // JPEG) it's a huge win since the compressor quickly "realizes" the
- // data is incompressible and doesn't bother looking for matches
- // everywhere.
- //
- // The "skip" variable keeps track of how many bytes there are since
- // the last match; dividing it by 32 (ie. right-shifting by five) gives
- // the number of bytes to move ahead for each iteration.
- var skip: i32 = 32;
-
- var next_s: i32 = s;
- var candidate: TableEntry = undefined;
- while (true) {
- s = next_s;
- const bytes_between_hash_lookups = skip >> 5;
- next_s = s + bytes_between_hash_lookups;
- skip += bytes_between_hash_lookups;
- if (next_s > s_limit) {
- break :outer;
- }
- candidate = self.table[next_hash & table_mask];
- const now = load32(src, next_s);
- self.table[next_hash & table_mask] = .{ .offset = s + self.cur, .val = cv };
- next_hash = hash(now);
-
- const offset = s - (candidate.offset - self.cur);
- if (offset > max_match_offset or cv != candidate.val) {
- // Out of range or not matched.
- cv = now;
- continue;
- }
- break;
- }
-
- // A 4-byte match has been found. We'll later see if more than 4 bytes
- // match. But, prior to the match, src[next_emit..s] are unmatched. Emit
- // them as literal bytes.
- emitLiteral(dst, tokens_count, src[@as(usize, @intCast(next_emit))..@as(usize, @intCast(s))]);
-
- // Call emitCopy, and then see if another emitCopy could be our next
- // move. Repeat until we find no match for the input immediately after
- // what was consumed by the last emitCopy call.
- //
- // If we exit this loop normally then we need to call emitLiteral next,
- // though we don't yet know how big the literal will be. We handle that
- // by proceeding to the next iteration of the main loop. We also can
- // exit this loop via goto if we get close to exhausting the input.
- while (true) {
- // Invariant: we have a 4-byte match at s, and no need to emit any
- // literal bytes prior to s.
-
- // Extend the 4-byte match as long as possible.
- //
- s += 4;
- const t = candidate.offset - self.cur + 4;
- const l = self.matchLen(s, t, src);
-
- // matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
- dst[tokens_count.*] = token.matchToken(
- @as(u32, @intCast(l + 4 - base_match_length)),
- @as(u32, @intCast(s - t - base_match_offset)),
- );
- tokens_count.* += 1;
- s += l;
- next_emit = s;
- if (s >= s_limit) {
- break :outer;
- }
-
- // We could immediately start working at s now, but to improve
- // compression we first update the hash table at s-1 and at s. If
- // another emitCopy is not our next move, also calculate next_hash
- // at s+1. At least on amd64 architecture, these three hash calculations
- // are faster as one load64 call (with some shifts) instead of
- // three load32 calls.
- var x = load64(src, s - 1);
- const prev_hash = hash(@as(u32, @truncate(x)));
- self.table[prev_hash & table_mask] = TableEntry{
- .offset = self.cur + s - 1,
- .val = @as(u32, @truncate(x)),
- };
- x >>= 8;
- const curr_hash = hash(@as(u32, @truncate(x)));
- candidate = self.table[curr_hash & table_mask];
- self.table[curr_hash & table_mask] = TableEntry{
- .offset = self.cur + s,
- .val = @as(u32, @truncate(x)),
- };
-
- const offset = s - (candidate.offset - self.cur);
- if (offset > max_match_offset or @as(u32, @truncate(x)) != candidate.val) {
- cv = @as(u32, @truncate(x >> 8));
- next_hash = hash(cv);
- s += 1;
- break;
- }
- }
- }
-
- if (@as(u32, @intCast(next_emit)) < src.len) {
- emitLiteral(dst, tokens_count, src[@as(usize, @intCast(next_emit))..]);
- }
- self.cur += @as(i32, @intCast(src.len));
- self.prev_len = @as(u32, @intCast(src.len));
- @memcpy(self.prev[0..self.prev_len], src);
- return;
- }
-
- fn emitLiteral(dst: []token.Token, tokens_count: *u16, lit: []u8) void {
- for (lit) |v| {
- dst[tokens_count.*] = token.literalToken(@as(u32, @intCast(v)));
- tokens_count.* += 1;
- }
- return;
- }
-
- // matchLen returns the match length between src[s..] and src[t..].
- // t can be negative to indicate the match is starting in self.prev.
- // We assume that src[s-4 .. s] and src[t-4 .. t] already match.
- fn matchLen(self: *Self, s: i32, t: i32, src: []u8) i32 {
- var s1 = @as(u32, @intCast(s)) + max_match_length - 4;
- if (s1 > src.len) {
- s1 = @as(u32, @intCast(src.len));
- }
-
- // If we are inside the current block
- if (t >= 0) {
- var b = src[@as(usize, @intCast(t))..];
- const a = src[@as(usize, @intCast(s))..@as(usize, @intCast(s1))];
- b = b[0..a.len];
- // Extend the match to be as long as possible.
- for (a, 0..) |_, i| {
- if (a[i] != b[i]) {
- return @as(i32, @intCast(i));
- }
- }
- return @as(i32, @intCast(a.len));
- }
-
- // We found a match in the previous block.
- const tp = @as(i32, @intCast(self.prev_len)) + t;
- if (tp < 0) {
- return 0;
- }
-
- // Extend the match to be as long as possible.
- var a = src[@as(usize, @intCast(s))..@as(usize, @intCast(s1))];
- var b = self.prev[@as(usize, @intCast(tp))..@as(usize, @intCast(self.prev_len))];
- if (b.len > a.len) {
- b = b[0..a.len];
- }
- a = a[0..b.len];
- for (b, 0..) |_, i| {
- if (a[i] != b[i]) {
- return @as(i32, @intCast(i));
- }
- }
-
- // If we reached our limit, we matched everything we are
- // allowed to in the previous block and we return.
- const n = @as(i32, @intCast(b.len));
- if (@as(u32, @intCast(s + n)) == s1) {
- return n;
- }
-
- // Continue looking for more matches in the current block.
- a = src[@as(usize, @intCast(s + n))..@as(usize, @intCast(s1))];
- b = src[0..a.len];
- for (a, 0..) |_, i| {
- if (a[i] != b[i]) {
- return @as(i32, @intCast(i)) + n;
- }
- }
- return @as(i32, @intCast(a.len)) + n;
- }
-
- // Reset resets the encoding history.
- // This ensures that no matches are made to the previous block.
- pub fn reset(self: *Self) void {
- self.prev_len = 0;
- // Bump the offset, so all matches will fail distance check.
- // Nothing should be >= self.cur in the table.
- self.cur += max_match_offset;
-
- // Protect against self.cur wraparound.
- if (self.cur >= buffer_reset) {
- self.shiftOffsets();
- }
- }
-
- // shiftOffsets will shift down all match offset.
- // This is only called in rare situations to prevent integer overflow.
- //
- // See https://golang.org/issue/18636 and https://golang.org/issues/34121.
- fn shiftOffsets(self: *Self) void {
- if (self.prev_len == 0) {
- // We have no history; just clear the table.
- for (self.table, 0..) |_, i| {
- self.table[i] = TableEntry{ .val = 0, .offset = 0 };
- }
- self.cur = max_match_offset + 1;
- return;
- }
-
- // Shift down everything in the table that isn't already too far away.
- for (self.table, 0..) |_, i| {
- var v = self.table[i].offset - self.cur + max_match_offset + 1;
- if (v < 0) {
- // We want to reset self.cur to max_match_offset + 1, so we need to shift
- // all table entries down by (self.cur - (max_match_offset + 1)).
- // Because we ignore matches > max_match_offset, we can cap
- // any negative offsets at 0.
- v = 0;
- }
- self.table[i].offset = v;
- }
- self.cur = max_match_offset + 1;
- }
-};
-
-test "best speed match 1/3" {
- const expectEqual = std.testing.expectEqual;
-
- {
- var previous = [_]u8{ 0, 0, 0, 1, 2 };
- var e = DeflateFast{
- .prev = &previous,
- .prev_len = previous.len,
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- var current = [_]u8{ 3, 4, 5, 0, 1, 2, 3, 4, 5 };
- const got: i32 = e.matchLen(3, -3, &current);
- try expectEqual(@as(i32, 6), got);
- }
- {
- var previous = [_]u8{ 0, 0, 0, 1, 2 };
- var e = DeflateFast{
- .prev = &previous,
- .prev_len = previous.len,
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- var current = [_]u8{ 2, 4, 5, 0, 1, 2, 3, 4, 5 };
- const got: i32 = e.matchLen(3, -3, &current);
- try expectEqual(@as(i32, 3), got);
- }
- {
- var previous = [_]u8{ 0, 0, 0, 1, 1 };
- var e = DeflateFast{
- .prev = &previous,
- .prev_len = previous.len,
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- var current = [_]u8{ 3, 4, 5, 0, 1, 2, 3, 4, 5 };
- const got: i32 = e.matchLen(3, -3, &current);
- try expectEqual(@as(i32, 2), got);
- }
- {
- var previous = [_]u8{ 0, 0, 0, 1, 2 };
- var e = DeflateFast{
- .prev = &previous,
- .prev_len = previous.len,
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
- const got: i32 = e.matchLen(0, -1, &current);
- try expectEqual(@as(i32, 4), got);
- }
- {
- var previous = [_]u8{ 0, 0, 0, 1, 2, 3, 4, 5, 2, 2 };
- var e = DeflateFast{
- .prev = &previous,
- .prev_len = previous.len,
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
- const got: i32 = e.matchLen(4, -7, &current);
- try expectEqual(@as(i32, 5), got);
- }
- {
- var previous = [_]u8{ 9, 9, 9, 9, 9 };
- var e = DeflateFast{
- .prev = &previous,
- .prev_len = previous.len,
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
- const got: i32 = e.matchLen(0, -1, &current);
- try expectEqual(@as(i32, 0), got);
- }
- {
- var previous = [_]u8{ 9, 9, 9, 9, 9 };
- var e = DeflateFast{
- .prev = &previous,
- .prev_len = previous.len,
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
- const got: i32 = e.matchLen(1, 0, &current);
- try expectEqual(@as(i32, 0), got);
- }
-}
-
-test "best speed match 2/3" {
- const expectEqual = std.testing.expectEqual;
-
- {
- var previous = [_]u8{};
- var e = DeflateFast{
- .prev = &previous,
- .prev_len = previous.len,
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
- const got: i32 = e.matchLen(1, -5, &current);
- try expectEqual(@as(i32, 0), got);
- }
- {
- var previous = [_]u8{};
- var e = DeflateFast{
- .prev = &previous,
- .prev_len = previous.len,
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- var current = [_]u8{ 9, 2, 2, 2, 1, 2, 3, 4, 5 };
- const got: i32 = e.matchLen(1, -1, &current);
- try expectEqual(@as(i32, 0), got);
- }
- {
- var previous = [_]u8{};
- var e = DeflateFast{
- .prev = &previous,
- .prev_len = previous.len,
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- var current = [_]u8{ 2, 2, 2, 2, 1, 2, 3, 4, 5 };
- const got: i32 = e.matchLen(1, 0, &current);
- try expectEqual(@as(i32, 3), got);
- }
- {
- var previous = [_]u8{ 3, 4, 5 };
- var e = DeflateFast{
- .prev = &previous,
- .prev_len = previous.len,
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- var current = [_]u8{ 3, 4, 5 };
- const got: i32 = e.matchLen(0, -3, &current);
- try expectEqual(@as(i32, 3), got);
- }
-}
-
-test "best speed match 2/2" {
- const testing = std.testing;
- const expectEqual = testing.expectEqual;
-
- const Case = struct {
- previous: u32,
- current: u32,
- s: i32,
- t: i32,
- expected: i32,
- };
-
- const cases = [_]Case{
- .{
- .previous = 1000,
- .current = 1000,
- .s = 0,
- .t = -1000,
- .expected = max_match_length - 4,
- },
- .{
- .previous = 200,
- .s = 0,
- .t = -200,
- .current = 500,
- .expected = max_match_length - 4,
- },
- .{
- .previous = 200,
- .s = 1,
- .t = 0,
- .current = 500,
- .expected = max_match_length - 4,
- },
- .{
- .previous = max_match_length - 4,
- .s = 0,
- .t = -(max_match_length - 4),
- .current = 500,
- .expected = max_match_length - 4,
- },
- .{
- .previous = 200,
- .s = 400,
- .t = -200,
- .current = 500,
- .expected = 100,
- },
- .{
- .previous = 10,
- .s = 400,
- .t = 200,
- .current = 500,
- .expected = 100,
- },
- };
-
- for (cases) |c| {
- const previous = try testing.allocator.alloc(u8, c.previous);
- defer testing.allocator.free(previous);
- @memset(previous, 0);
-
- const current = try testing.allocator.alloc(u8, c.current);
- defer testing.allocator.free(current);
- @memset(current, 0);
-
- var e = DeflateFast{
- .prev = previous,
- .prev_len = @as(u32, @intCast(previous.len)),
- .table = undefined,
- .allocator = undefined,
- .cur = 0,
- };
- const got: i32 = e.matchLen(c.s, c.t, current);
- try expectEqual(@as(i32, c.expected), got);
- }
-}
-
-test "best speed shift offsets" {
- const testing = std.testing;
- const expect = std.testing.expect;
-
- // Test if shiftoffsets properly preserves matches and resets out-of-range matches
- // seen in https://github.com/golang/go/issues/4142
- var enc = deflateFast();
- try enc.init(testing.allocator);
- defer enc.deinit();
-
- // test_data may not generate internal matches.
- var test_data = [32]u8{
- 0xf5, 0x25, 0xf2, 0x55, 0xf6, 0xc1, 0x1f, 0x0b, 0x10, 0xa1,
- 0xd0, 0x77, 0x56, 0x38, 0xf1, 0x9c, 0x7f, 0x85, 0xc5, 0xbd,
- 0x16, 0x28, 0xd4, 0xf9, 0x03, 0xd4, 0xc0, 0xa1, 0x1e, 0x58,
- 0x5b, 0xc9,
- };
-
- var tokens = [_]token.Token{0} ** 32;
- var tokens_count: u16 = 0;
-
- // Encode the testdata with clean state.
- // Second part should pick up matches from the first block.
- tokens_count = 0;
- enc.encode(&tokens, &tokens_count, &test_data);
- const want_first_tokens = tokens_count;
- tokens_count = 0;
- enc.encode(&tokens, &tokens_count, &test_data);
- const want_second_tokens = tokens_count;
-
- try expect(want_first_tokens > want_second_tokens);
-
- // Forward the current indicator to before wraparound.
- enc.cur = buffer_reset - @as(i32, @intCast(test_data.len));
-
- // Part 1 before wrap, should match clean state.
- tokens_count = 0;
- enc.encode(&tokens, &tokens_count, &test_data);
- var got = tokens_count;
- try testing.expectEqual(want_first_tokens, got);
-
- // Verify we are about to wrap.
- try testing.expectEqual(@as(i32, buffer_reset), enc.cur);
-
- // Part 2 should match clean state as well even if wrapped.
- tokens_count = 0;
- enc.encode(&tokens, &tokens_count, &test_data);
- got = tokens_count;
- try testing.expectEqual(want_second_tokens, got);
-
- // Verify that we wrapped.
- try expect(enc.cur < buffer_reset);
-
- // Forward the current buffer, leaving the matches at the bottom.
- enc.cur = buffer_reset;
- enc.shiftOffsets();
-
- // Ensure that no matches were picked up.
- tokens_count = 0;
- enc.encode(&tokens, &tokens_count, &test_data);
- got = tokens_count;
- try testing.expectEqual(want_first_tokens, got);
-}
-
-test "best speed reset" {
- // test that encoding is consistent across a warparound of the table offset.
- // See https://github.com/golang/go/issues/34121
- const fmt = std.fmt;
- const testing = std.testing;
-
- const ArrayList = std.ArrayList;
-
- const input_size = 65536;
- const input = try testing.allocator.alloc(u8, input_size);
- defer testing.allocator.free(input);
-
- var i: usize = 0;
- while (i < input_size) : (i += 1) {
- _ = try fmt.bufPrint(input, "asdfasdfasdfasdf{d}{d}fghfgujyut{d}yutyu\n", .{ i, i, i });
- }
- // This is specific to level 1 (best_speed).
- const level = .best_speed;
- const offset: usize = 1;
-
- // We do an encode with a clean buffer to compare.
- var want = ArrayList(u8).init(testing.allocator);
- defer want.deinit();
- var clean_comp = try deflate.compressor(
- testing.allocator,
- want.writer(),
- .{ .level = level },
- );
- defer clean_comp.deinit();
-
- // Write 3 times, close.
- try clean_comp.writer().writeAll(input);
- try clean_comp.writer().writeAll(input);
- try clean_comp.writer().writeAll(input);
- try clean_comp.close();
-
- var o = offset;
- while (o <= 256) : (o *= 2) {
- var discard = ArrayList(u8).init(testing.allocator);
- defer discard.deinit();
-
- var comp = try deflate.compressor(
- testing.allocator,
- discard.writer(),
- .{ .level = level },
- );
- defer comp.deinit();
-
- // Reset until we are right before the wraparound.
- // Each reset adds max_match_offset to the offset.
- i = 0;
- const limit = (buffer_reset - input.len - o - max_match_offset) / max_match_offset;
- while (i < limit) : (i += 1) {
- // skip ahead to where we are close to wrap around...
- comp.reset(discard.writer());
- }
- var got = ArrayList(u8).init(testing.allocator);
- defer got.deinit();
- comp.reset(got.writer());
-
- // Write 3 times, close.
- try comp.writer().writeAll(input);
- try comp.writer().writeAll(input);
- try comp.writer().writeAll(input);
- try comp.close();
-
- // output must match at wraparound
- try testing.expectEqualSlices(u8, want.items, got.items);
- }
-}
diff --git a/lib/std/compress/deflate/deflate_fast_test.zig b/lib/std/compress/deflate/deflate_fast_test.zig
deleted file mode 100644
index fdb8e3fd6a..0000000000
--- a/lib/std/compress/deflate/deflate_fast_test.zig
+++ /dev/null
@@ -1,160 +0,0 @@
-const std = @import("std");
-const expect = std.testing.expect;
-const io = std.io;
-const mem = std.mem;
-const testing = std.testing;
-
-const ArrayList = std.ArrayList;
-
-const deflate = @import("compressor.zig");
-const inflate = @import("decompressor.zig");
-const deflate_const = @import("deflate_const.zig");
-
-test "best speed" {
- // Tests that round-tripping through deflate and then inflate recovers the original input.
- // The Write sizes are near the thresholds in the compressor.encSpeed method (0, 16, 128), as well
- // as near `deflate_const.max_store_block_size` (65535).
-
- var abcabc = try testing.allocator.alloc(u8, 131_072);
- defer testing.allocator.free(abcabc);
-
- for (abcabc, 0..) |_, i| {
- abcabc[i] = @as(u8, @intCast(i % 128));
- }
-
- var tc_01 = [_]u32{ 65536, 0 };
- var tc_02 = [_]u32{ 65536, 1 };
- var tc_03 = [_]u32{ 65536, 1, 256 };
- var tc_04 = [_]u32{ 65536, 1, 65536 };
- var tc_05 = [_]u32{ 65536, 14 };
- var tc_06 = [_]u32{ 65536, 15 };
- var tc_07 = [_]u32{ 65536, 16 };
- var tc_08 = [_]u32{ 65536, 16, 256 };
- var tc_09 = [_]u32{ 65536, 16, 65536 };
- var tc_10 = [_]u32{ 65536, 127 };
- var tc_11 = [_]u32{ 65536, 127 };
- var tc_12 = [_]u32{ 65536, 128 };
- var tc_13 = [_]u32{ 65536, 128, 256 };
- var tc_14 = [_]u32{ 65536, 128, 65536 };
- var tc_15 = [_]u32{ 65536, 129 };
- var tc_16 = [_]u32{ 65536, 65536, 256 };
- var tc_17 = [_]u32{ 65536, 65536, 65536 };
- const test_cases = [_][]u32{
- &tc_01, &tc_02, &tc_03, &tc_04, &tc_05, &tc_06, &tc_07, &tc_08, &tc_09, &tc_10,
- &tc_11, &tc_12, &tc_13, &tc_14, &tc_15, &tc_16, &tc_17,
- };
-
- for (test_cases) |tc| {
- const firsts = [_]u32{ 1, 65534, 65535, 65536, 65537, 131072 };
-
- for (firsts) |first_n| {
- tc[0] = first_n;
-
- const to_flush = [_]bool{ false, true };
- for (to_flush) |flush| {
- var compressed = ArrayList(u8).init(testing.allocator);
- defer compressed.deinit();
-
- var want = ArrayList(u8).init(testing.allocator);
- defer want.deinit();
-
- var comp = try deflate.compressor(
- testing.allocator,
- compressed.writer(),
- .{ .level = .best_speed },
- );
- defer comp.deinit();
-
- for (tc) |n| {
- try want.appendSlice(abcabc[0..n]);
- try comp.writer().writeAll(abcabc[0..n]);
- if (flush) {
- try comp.flush();
- }
- }
-
- try comp.close();
-
- const decompressed = try testing.allocator.alloc(u8, want.items.len);
- defer testing.allocator.free(decompressed);
-
- var fib = io.fixedBufferStream(compressed.items);
- var decomp = try inflate.decompressor(testing.allocator, fib.reader(), null);
- defer decomp.deinit();
-
- const read = try decomp.reader().readAll(decompressed);
- try decomp.close();
-
- try testing.expectEqual(want.items.len, read);
- try testing.expectEqualSlices(u8, want.items, decompressed);
- }
- }
- }
-}
-
-test "best speed max match offset" {
- const abc = "abcdefgh";
- const xyz = "stuvwxyz";
- const input_margin = 16 - 1;
-
- const match_before = [_]bool{ false, true };
- for (match_before) |do_match_before| {
- const extras = [_]u32{
- 0,
- input_margin - 1,
- input_margin,
- input_margin + 1,
- 2 * input_margin,
- };
- for (extras) |extra| {
- var offset_adj: i32 = -5;
- while (offset_adj <= 5) : (offset_adj += 1) {
- const offset = deflate_const.max_match_offset + offset_adj;
-
- // Make src to be a []u8 of the form
- // fmt("{s}{s}{s}{s}{s}", .{abc, zeros0, xyzMaybe, abc, zeros1})
- // where:
- // zeros0 is approximately max_match_offset zeros.
- // xyzMaybe is either xyz or the empty string.
- // zeros1 is between 0 and 30 zeros.
- // The difference between the two abc's will be offset, which
- // is max_match_offset plus or minus a small adjustment.
- const src_len: usize = @as(usize, @intCast(offset + @as(i32, abc.len) + @as(i32, @intCast(extra))));
- var src = try testing.allocator.alloc(u8, src_len);
- defer testing.allocator.free(src);
-
- @memcpy(src[0..abc.len], abc);
- if (!do_match_before) {
- const src_offset: usize = @as(usize, @intCast(offset - @as(i32, xyz.len)));
- @memcpy(src[src_offset..][0..xyz.len], xyz);
- }
- const src_offset: usize = @as(usize, @intCast(offset));
- @memcpy(src[src_offset..][0..abc.len], abc);
-
- var compressed = ArrayList(u8).init(testing.allocator);
- defer compressed.deinit();
-
- var comp = try deflate.compressor(
- testing.allocator,
- compressed.writer(),
- .{ .level = .best_speed },
- );
- defer comp.deinit();
- try comp.writer().writeAll(src);
- _ = try comp.close();
-
- const decompressed = try testing.allocator.alloc(u8, src.len);
- defer testing.allocator.free(decompressed);
-
- var fib = io.fixedBufferStream(compressed.items);
- var decomp = try inflate.decompressor(testing.allocator, fib.reader(), null);
- defer decomp.deinit();
- const read = try decomp.reader().readAll(decompressed);
- try decomp.close();
-
- try testing.expectEqual(src.len, read);
- try testing.expectEqualSlices(u8, src, decompressed);
- }
- }
- }
-}
diff --git a/lib/std/compress/deflate/dict_decoder.zig b/lib/std/compress/deflate/dict_decoder.zig
deleted file mode 100644
index 72a3f6310b..0000000000
--- a/lib/std/compress/deflate/dict_decoder.zig
+++ /dev/null
@@ -1,423 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-const mem = std.mem;
-
-const Allocator = std.mem.Allocator;
-
-// Implements the LZ77 sliding dictionary as used in decompression.
-// LZ77 decompresses data through sequences of two forms of commands:
-//
-// * Literal insertions: Runs of one or more symbols are inserted into the data
-// stream as is. This is accomplished through the writeByte method for a
-// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
-// Any valid stream must start with a literal insertion if no preset dictionary
-// is used.
-//
-// * Backward copies: Runs of one or more symbols are copied from previously
-// emitted data. Backward copies come as the tuple (dist, length) where dist
-// determines how far back in the stream to copy from and length determines how
-// many bytes to copy. Note that it is valid for the length to be greater than
-// the distance. Since LZ77 uses forward copies, that situation is used to
-// perform a form of run-length encoding on repeated runs of symbols.
-// The writeCopy and tryWriteCopy are used to implement this command.
-//
-// For performance reasons, this implementation performs little to no sanity
-// checks about the arguments. As such, the invariants documented for each
-// method call must be respected.
-pub const DictDecoder = struct {
- const Self = @This();
-
- allocator: Allocator = undefined,
-
- hist: []u8 = undefined, // Sliding window history
-
- // Invariant: 0 <= rd_pos <= wr_pos <= hist.len
- wr_pos: u32 = 0, // Current output position in buffer
- rd_pos: u32 = 0, // Have emitted hist[0..rd_pos] already
- full: bool = false, // Has a full window length been written yet?
-
- // init initializes DictDecoder to have a sliding window dictionary of the given
- // size. If a preset dict is provided, it will initialize the dictionary with
- // the contents of dict.
- pub fn init(self: *Self, allocator: Allocator, size: u32, dict: ?[]const u8) !void {
- self.allocator = allocator;
-
- self.hist = try allocator.alloc(u8, size);
-
- self.wr_pos = 0;
-
- if (dict != null) {
- const src = dict.?[dict.?.len -| self.hist.len..];
- @memcpy(self.hist[0..src.len], src);
- self.wr_pos = @as(u32, @intCast(dict.?.len));
- }
-
- if (self.wr_pos == self.hist.len) {
- self.wr_pos = 0;
- self.full = true;
- }
- self.rd_pos = self.wr_pos;
- }
-
- pub fn deinit(self: *Self) void {
- self.allocator.free(self.hist);
- }
-
- // Reports the total amount of historical data in the dictionary.
- pub fn histSize(self: *Self) u32 {
- if (self.full) {
- return @as(u32, @intCast(self.hist.len));
- }
- return self.wr_pos;
- }
-
- // Reports the number of bytes that can be flushed by readFlush.
- pub fn availRead(self: *Self) u32 {
- return self.wr_pos - self.rd_pos;
- }
-
- // Reports the available amount of output buffer space.
- pub fn availWrite(self: *Self) u32 {
- return @as(u32, @intCast(self.hist.len - self.wr_pos));
- }
-
- // Returns a slice of the available buffer to write data to.
- //
- // This invariant will be kept: s.len <= availWrite()
- pub fn writeSlice(self: *Self) []u8 {
- return self.hist[self.wr_pos..];
- }
-
- // Advances the writer pointer by `count`.
- //
- // This invariant must be kept: 0 <= count <= availWrite()
- pub fn writeMark(self: *Self, count: u32) void {
- assert(0 <= count and count <= self.availWrite());
- self.wr_pos += count;
- }
-
- // Writes a single byte to the dictionary.
- //
- // This invariant must be kept: 0 < availWrite()
- pub fn writeByte(self: *Self, byte: u8) void {
- self.hist[self.wr_pos] = byte;
- self.wr_pos += 1;
- }
-
- /// TODO: eliminate this function because the callsites should care about whether
- /// or not their arguments alias and then they should directly call `@memcpy` or
- /// `mem.copyForwards`.
- fn copy(dst: []u8, src: []const u8) u32 {
- if (src.len > dst.len) {
- mem.copyForwards(u8, dst, src[0..dst.len]);
- return @as(u32, @intCast(dst.len));
- }
- mem.copyForwards(u8, dst[0..src.len], src);
- return @as(u32, @intCast(src.len));
- }
-
- // Copies a string at a given (dist, length) to the output.
- // This returns the number of bytes copied and may be less than the requested
- // length if the available space in the output buffer is too small.
- //
- // This invariant must be kept: 0 < dist <= histSize()
- pub fn writeCopy(self: *Self, dist: u32, length: u32) u32 {
- assert(0 < dist and dist <= self.histSize());
- const dst_base = self.wr_pos;
- var dst_pos = dst_base;
- var src_pos: i32 = @as(i32, @intCast(dst_pos)) - @as(i32, @intCast(dist));
- var end_pos = dst_pos + length;
- if (end_pos > self.hist.len) {
- end_pos = @as(u32, @intCast(self.hist.len));
- }
-
- // Copy non-overlapping section after destination position.
- //
- // This section is non-overlapping in that the copy length for this section
- // is always less than or equal to the backwards distance. This can occur
- // if a distance refers to data that wraps-around in the buffer.
- // Thus, a backwards copy is performed here; that is, the exact bytes in
- // the source prior to the copy is placed in the destination.
- if (src_pos < 0) {
- src_pos += @as(i32, @intCast(self.hist.len));
- dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@as(usize, @intCast(src_pos))..]);
- src_pos = 0;
- }
-
- // Copy possibly overlapping section before destination position.
- //
- // This section can overlap if the copy length for this section is larger
- // than the backwards distance. This is allowed by LZ77 so that repeated
- // strings can be succinctly represented using (dist, length) pairs.
- // Thus, a forwards copy is performed here; that is, the bytes copied is
- // possibly dependent on the resulting bytes in the destination as the copy
- // progresses along. This is functionally equivalent to the following:
- //
- // var i = 0;
- // while(i < end_pos - dst_pos) : (i+=1) {
- // self.hist[dst_pos+i] = self.hist[src_pos+i];
- // }
- // dst_pos = end_pos;
- //
- while (dst_pos < end_pos) {
- dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@as(usize, @intCast(src_pos))..dst_pos]);
- }
-
- self.wr_pos = dst_pos;
- return dst_pos - dst_base;
- }
-
- // Tries to copy a string at a given (distance, length) to the
- // output. This specialized version is optimized for short distances.
- //
- // This method is designed to be inlined for performance reasons.
- //
- // This invariant must be kept: 0 < dist <= histSize()
- pub fn tryWriteCopy(self: *Self, dist: u32, length: u32) u32 {
- var dst_pos = self.wr_pos;
- const end_pos = dst_pos + length;
- if (dst_pos < dist or end_pos > self.hist.len) {
- return 0;
- }
- const dst_base = dst_pos;
- const src_pos = dst_pos - dist;
-
- // Copy possibly overlapping section before destination position.
- while (dst_pos < end_pos) {
- dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[src_pos..dst_pos]);
- }
-
- self.wr_pos = dst_pos;
- return dst_pos - dst_base;
- }
-
- // Returns a slice of the historical buffer that is ready to be
- // emitted to the user. The data returned by readFlush must be fully consumed
- // before calling any other DictDecoder methods.
- pub fn readFlush(self: *Self) []u8 {
- const to_read = self.hist[self.rd_pos..self.wr_pos];
- self.rd_pos = self.wr_pos;
- if (self.wr_pos == self.hist.len) {
- self.wr_pos = 0;
- self.rd_pos = 0;
- self.full = true;
- }
- return to_read;
- }
-};
-
-// tests
-
-test "dictionary decoder" {
- const ArrayList = std.ArrayList;
- const testing = std.testing;
-
- const abc = "ABC\n";
- const fox = "The quick brown fox jumped over the lazy dog!\n";
- const poem: []const u8 =
- \\The Road Not Taken
- \\Robert Frost
- \\
- \\Two roads diverged in a yellow wood,
- \\And sorry I could not travel both
- \\And be one traveler, long I stood
- \\And looked down one as far as I could
- \\To where it bent in the undergrowth;
- \\
- \\Then took the other, as just as fair,
- \\And having perhaps the better claim,
- \\Because it was grassy and wanted wear;
- \\Though as for that the passing there
- \\Had worn them really about the same,
- \\
- \\And both that morning equally lay
- \\In leaves no step had trodden black.
- \\Oh, I kept the first for another day!
- \\Yet knowing how way leads on to way,
- \\I doubted if I should ever come back.
- \\
- \\I shall be telling this with a sigh
- \\Somewhere ages and ages hence:
- \\Two roads diverged in a wood, and I-
- \\I took the one less traveled by,
- \\And that has made all the difference.
- \\
- ;
-
- const uppercase: []const u8 =
- \\THE ROAD NOT TAKEN
- \\ROBERT FROST
- \\
- \\TWO ROADS DIVERGED IN A YELLOW WOOD,
- \\AND SORRY I COULD NOT TRAVEL BOTH
- \\AND BE ONE TRAVELER, LONG I STOOD
- \\AND LOOKED DOWN ONE AS FAR AS I COULD
- \\TO WHERE IT BENT IN THE UNDERGROWTH;
- \\
- \\THEN TOOK THE OTHER, AS JUST AS FAIR,
- \\AND HAVING PERHAPS THE BETTER CLAIM,
- \\BECAUSE IT WAS GRASSY AND WANTED WEAR;
- \\THOUGH AS FOR THAT THE PASSING THERE
- \\HAD WORN THEM REALLY ABOUT THE SAME,
- \\
- \\AND BOTH THAT MORNING EQUALLY LAY
- \\IN LEAVES NO STEP HAD TRODDEN BLACK.
- \\OH, I KEPT THE FIRST FOR ANOTHER DAY!
- \\YET KNOWING HOW WAY LEADS ON TO WAY,
- \\I DOUBTED IF I SHOULD EVER COME BACK.
- \\
- \\I SHALL BE TELLING THIS WITH A SIGH
- \\SOMEWHERE AGES AND AGES HENCE:
- \\TWO ROADS DIVERGED IN A WOOD, AND I-
- \\I TOOK THE ONE LESS TRAVELED BY,
- \\AND THAT HAS MADE ALL THE DIFFERENCE.
- \\
- ;
-
- const PoemRefs = struct {
- dist: u32, // Backward distance (0 if this is an insertion)
- length: u32, // Length of copy or insertion
- };
-
- const poem_refs = [_]PoemRefs{
- .{ .dist = 0, .length = 38 }, .{ .dist = 33, .length = 3 }, .{ .dist = 0, .length = 48 },
- .{ .dist = 79, .length = 3 }, .{ .dist = 0, .length = 11 }, .{ .dist = 34, .length = 5 },
- .{ .dist = 0, .length = 6 }, .{ .dist = 23, .length = 7 }, .{ .dist = 0, .length = 8 },
- .{ .dist = 50, .length = 3 }, .{ .dist = 0, .length = 2 }, .{ .dist = 69, .length = 3 },
- .{ .dist = 34, .length = 5 }, .{ .dist = 0, .length = 4 }, .{ .dist = 97, .length = 3 },
- .{ .dist = 0, .length = 4 }, .{ .dist = 43, .length = 5 }, .{ .dist = 0, .length = 6 },
- .{ .dist = 7, .length = 4 }, .{ .dist = 88, .length = 7 }, .{ .dist = 0, .length = 12 },
- .{ .dist = 80, .length = 3 }, .{ .dist = 0, .length = 2 }, .{ .dist = 141, .length = 4 },
- .{ .dist = 0, .length = 1 }, .{ .dist = 196, .length = 3 }, .{ .dist = 0, .length = 3 },
- .{ .dist = 157, .length = 3 }, .{ .dist = 0, .length = 6 }, .{ .dist = 181, .length = 3 },
- .{ .dist = 0, .length = 2 }, .{ .dist = 23, .length = 3 }, .{ .dist = 77, .length = 3 },
- .{ .dist = 28, .length = 5 }, .{ .dist = 128, .length = 3 }, .{ .dist = 110, .length = 4 },
- .{ .dist = 70, .length = 3 }, .{ .dist = 0, .length = 4 }, .{ .dist = 85, .length = 6 },
- .{ .dist = 0, .length = 2 }, .{ .dist = 182, .length = 6 }, .{ .dist = 0, .length = 4 },
- .{ .dist = 133, .length = 3 }, .{ .dist = 0, .length = 7 }, .{ .dist = 47, .length = 5 },
- .{ .dist = 0, .length = 20 }, .{ .dist = 112, .length = 5 }, .{ .dist = 0, .length = 1 },
- .{ .dist = 58, .length = 3 }, .{ .dist = 0, .length = 8 }, .{ .dist = 59, .length = 3 },
- .{ .dist = 0, .length = 4 }, .{ .dist = 173, .length = 3 }, .{ .dist = 0, .length = 5 },
- .{ .dist = 114, .length = 3 }, .{ .dist = 0, .length = 4 }, .{ .dist = 92, .length = 5 },
- .{ .dist = 0, .length = 2 }, .{ .dist = 71, .length = 3 }, .{ .dist = 0, .length = 2 },
- .{ .dist = 76, .length = 5 }, .{ .dist = 0, .length = 1 }, .{ .dist = 46, .length = 3 },
- .{ .dist = 96, .length = 4 }, .{ .dist = 130, .length = 4 }, .{ .dist = 0, .length = 3 },
- .{ .dist = 360, .length = 3 }, .{ .dist = 0, .length = 3 }, .{ .dist = 178, .length = 5 },
- .{ .dist = 0, .length = 7 }, .{ .dist = 75, .length = 3 }, .{ .dist = 0, .length = 3 },
- .{ .dist = 45, .length = 6 }, .{ .dist = 0, .length = 6 }, .{ .dist = 299, .length = 6 },
- .{ .dist = 180, .length = 3 }, .{ .dist = 70, .length = 6 }, .{ .dist = 0, .length = 1 },
- .{ .dist = 48, .length = 3 }, .{ .dist = 66, .length = 4 }, .{ .dist = 0, .length = 3 },
- .{ .dist = 47, .length = 5 }, .{ .dist = 0, .length = 9 }, .{ .dist = 325, .length = 3 },
- .{ .dist = 0, .length = 1 }, .{ .dist = 359, .length = 3 }, .{ .dist = 318, .length = 3 },
- .{ .dist = 0, .length = 2 }, .{ .dist = 199, .length = 3 }, .{ .dist = 0, .length = 1 },
- .{ .dist = 344, .length = 3 }, .{ .dist = 0, .length = 3 }, .{ .dist = 248, .length = 3 },
- .{ .dist = 0, .length = 10 }, .{ .dist = 310, .length = 3 }, .{ .dist = 0, .length = 3 },
- .{ .dist = 93, .length = 6 }, .{ .dist = 0, .length = 3 }, .{ .dist = 252, .length = 3 },
- .{ .dist = 157, .length = 4 }, .{ .dist = 0, .length = 2 }, .{ .dist = 273, .length = 5 },
- .{ .dist = 0, .length = 14 }, .{ .dist = 99, .length = 4 }, .{ .dist = 0, .length = 1 },
- .{ .dist = 464, .length = 4 }, .{ .dist = 0, .length = 2 }, .{ .dist = 92, .length = 4 },
- .{ .dist = 495, .length = 3 }, .{ .dist = 0, .length = 1 }, .{ .dist = 322, .length = 4 },
- .{ .dist = 16, .length = 4 }, .{ .dist = 0, .length = 3 }, .{ .dist = 402, .length = 3 },
- .{ .dist = 0, .length = 2 }, .{ .dist = 237, .length = 4 }, .{ .dist = 0, .length = 2 },
- .{ .dist = 432, .length = 4 }, .{ .dist = 0, .length = 1 }, .{ .dist = 483, .length = 5 },
- .{ .dist = 0, .length = 2 }, .{ .dist = 294, .length = 4 }, .{ .dist = 0, .length = 2 },
- .{ .dist = 306, .length = 3 }, .{ .dist = 113, .length = 5 }, .{ .dist = 0, .length = 1 },
- .{ .dist = 26, .length = 4 }, .{ .dist = 164, .length = 3 }, .{ .dist = 488, .length = 4 },
- .{ .dist = 0, .length = 1 }, .{ .dist = 542, .length = 3 }, .{ .dist = 248, .length = 6 },
- .{ .dist = 0, .length = 5 }, .{ .dist = 205, .length = 3 }, .{ .dist = 0, .length = 8 },
- .{ .dist = 48, .length = 3 }, .{ .dist = 449, .length = 6 }, .{ .dist = 0, .length = 2 },
- .{ .dist = 192, .length = 3 }, .{ .dist = 328, .length = 4 }, .{ .dist = 9, .length = 5 },
- .{ .dist = 433, .length = 3 }, .{ .dist = 0, .length = 3 }, .{ .dist = 622, .length = 25 },
- .{ .dist = 615, .length = 5 }, .{ .dist = 46, .length = 5 }, .{ .dist = 0, .length = 2 },
- .{ .dist = 104, .length = 3 }, .{ .dist = 475, .length = 10 }, .{ .dist = 549, .length = 3 },
- .{ .dist = 0, .length = 4 }, .{ .dist = 597, .length = 8 }, .{ .dist = 314, .length = 3 },
- .{ .dist = 0, .length = 1 }, .{ .dist = 473, .length = 6 }, .{ .dist = 317, .length = 5 },
- .{ .dist = 0, .length = 1 }, .{ .dist = 400, .length = 3 }, .{ .dist = 0, .length = 3 },
- .{ .dist = 109, .length = 3 }, .{ .dist = 151, .length = 3 }, .{ .dist = 48, .length = 4 },
- .{ .dist = 0, .length = 4 }, .{ .dist = 125, .length = 3 }, .{ .dist = 108, .length = 3 },
- .{ .dist = 0, .length = 2 },
- };
-
- var got_list = ArrayList(u8).init(testing.allocator);
- defer got_list.deinit();
- var got = got_list.writer();
-
- var want_list = ArrayList(u8).init(testing.allocator);
- defer want_list.deinit();
- var want = want_list.writer();
-
- var dd = DictDecoder{};
- try dd.init(testing.allocator, 1 << 11, null);
- defer dd.deinit();
-
- const util = struct {
- fn writeCopy(dst_dd: *DictDecoder, dst: anytype, dist: u32, length: u32) !void {
- var len = length;
- while (len > 0) {
- var n = dst_dd.tryWriteCopy(dist, len);
- if (n == 0) {
- n = dst_dd.writeCopy(dist, len);
- }
-
- len -= n;
- if (dst_dd.availWrite() == 0) {
- _ = try dst.write(dst_dd.readFlush());
- }
- }
- }
- fn writeString(dst_dd: *DictDecoder, dst: anytype, str: []const u8) !void {
- var string = str;
- while (string.len > 0) {
- const cnt = DictDecoder.copy(dst_dd.writeSlice(), string);
- dst_dd.writeMark(cnt);
- string = string[cnt..];
- if (dst_dd.availWrite() == 0) {
- _ = try dst.write(dst_dd.readFlush());
- }
- }
- }
- };
-
- try util.writeString(&dd, got, ".");
- _ = try want.write(".");
-
- var str = poem;
- for (poem_refs, 0..) |ref, i| {
- _ = i;
- if (ref.dist == 0) {
- try util.writeString(&dd, got, str[0..ref.length]);
- } else {
- try util.writeCopy(&dd, got, ref.dist, ref.length);
- }
- str = str[ref.length..];
- }
- _ = try want.write(poem);
-
- try util.writeCopy(&dd, got, dd.histSize(), 33);
- _ = try want.write(want_list.items[0..33]);
-
- try util.writeString(&dd, got, abc);
- try util.writeCopy(&dd, got, abc.len, 59 * abc.len);
- _ = try want.write(abc ** 60);
-
- try util.writeString(&dd, got, fox);
- try util.writeCopy(&dd, got, fox.len, 9 * fox.len);
- _ = try want.write(fox ** 10);
-
- try util.writeString(&dd, got, ".");
- try util.writeCopy(&dd, got, 1, 9);
- _ = try want.write("." ** 10);
-
- try util.writeString(&dd, got, uppercase);
- try util.writeCopy(&dd, got, uppercase.len, 7 * uppercase.len);
- var i: u8 = 0;
- while (i < 8) : (i += 1) {
- _ = try want.write(uppercase);
- }
-
- try util.writeCopy(&dd, got, dd.histSize(), 10);
- _ = try want.write(want_list.items[want_list.items.len - dd.histSize() ..][0..10]);
-
- _ = try got.write(dd.readFlush());
- try testing.expectEqualSlices(u8, want_list.items, got_list.items);
-}
diff --git a/lib/std/compress/deflate/huffman_bit_writer.zig b/lib/std/compress/deflate/huffman_bit_writer.zig
deleted file mode 100644
index a79dc91aa8..0000000000
--- a/lib/std/compress/deflate/huffman_bit_writer.zig
+++ /dev/null
@@ -1,1686 +0,0 @@
-const std = @import("std");
-const io = std.io;
-
-const Allocator = std.mem.Allocator;
-
-const deflate_const = @import("deflate_const.zig");
-const hm_code = @import("huffman_code.zig");
-const token = @import("token.zig");
-
-// The first length code.
-const length_codes_start = 257;
-
-// The number of codegen codes.
-const codegen_code_count = 19;
-const bad_code = 255;
-
-// buffer_flush_size indicates the buffer size
-// after which bytes are flushed to the writer.
-// Should preferably be a multiple of 6, since
-// we accumulate 6 bytes between writes to the buffer.
-const buffer_flush_size = 240;
-
-// buffer_size is the actual output byte buffer size.
-// It must have additional headroom for a flush
-// which can contain up to 8 bytes.
-const buffer_size = buffer_flush_size + 8;
-
-// The number of extra bits needed by length code X - LENGTH_CODES_START.
-var length_extra_bits = [_]u8{
- 0, 0, 0, // 257
- 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, // 260
- 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, // 270
- 4, 5, 5, 5, 5, 0, // 280
-};
-
-// The length indicated by length code X - LENGTH_CODES_START.
-var length_base = [_]u32{
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
- 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
- 64, 80, 96, 112, 128, 160, 192, 224, 255,
-};
-
-// offset code word extra bits.
-var offset_extra_bits = [_]i8{
- 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
- 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
- 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
-};
-
-var offset_base = [_]u32{
- 0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
- 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
- 0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
- 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
- 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
- 0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
-};
-
-// The odd order in which the codegen code sizes are written.
-var codegen_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
-
-pub fn HuffmanBitWriter(comptime WriterType: type) type {
- return struct {
- const Self = @This();
- pub const Error = WriterType.Error;
-
- // writer is the underlying writer.
- // Do not use it directly; use the write method, which ensures
- // that Write errors are sticky.
- inner_writer: WriterType,
- bytes_written: usize,
-
- // Data waiting to be written is bytes[0 .. nbytes]
- // and then the low nbits of bits. Data is always written
- // sequentially into the bytes array.
- bits: u64,
- nbits: u32, // number of bits
- bytes: [buffer_size]u8,
- codegen_freq: [codegen_code_count]u16,
- nbytes: u32, // number of bytes
- literal_freq: []u16,
- offset_freq: []u16,
- codegen: []u8,
- literal_encoding: hm_code.HuffmanEncoder,
- offset_encoding: hm_code.HuffmanEncoder,
- codegen_encoding: hm_code.HuffmanEncoder,
- err: bool = false,
- fixed_literal_encoding: hm_code.HuffmanEncoder,
- fixed_offset_encoding: hm_code.HuffmanEncoder,
- allocator: Allocator,
- huff_offset: hm_code.HuffmanEncoder,
-
- pub fn reset(self: *Self, new_writer: WriterType) void {
- self.inner_writer = new_writer;
- self.bytes_written = 0;
- self.bits = 0;
- self.nbits = 0;
- self.nbytes = 0;
- self.err = false;
- }
-
- pub fn flush(self: *Self) Error!void {
- if (self.err) {
- self.nbits = 0;
- return;
- }
- var n = self.nbytes;
- while (self.nbits != 0) {
- self.bytes[n] = @as(u8, @truncate(self.bits));
- self.bits >>= 8;
- if (self.nbits > 8) { // Avoid underflow
- self.nbits -= 8;
- } else {
- self.nbits = 0;
- }
- n += 1;
- }
- self.bits = 0;
- try self.write(self.bytes[0..n]);
- self.nbytes = 0;
- }
-
- fn write(self: *Self, b: []const u8) Error!void {
- if (self.err) {
- return;
- }
- try self.inner_writer.writeAll(b);
- self.bytes_written += b.len;
- }
-
- fn writeBits(self: *Self, b: u32, nb: u32) Error!void {
- if (self.err) {
- return;
- }
- self.bits |= @as(u64, @intCast(b)) << @as(u6, @intCast(self.nbits));
- self.nbits += nb;
- if (self.nbits >= 48) {
- const bits = self.bits;
- self.bits >>= 48;
- self.nbits -= 48;
- var n = self.nbytes;
- var bytes = self.bytes[n..][0..6];
- bytes[0] = @as(u8, @truncate(bits));
- bytes[1] = @as(u8, @truncate(bits >> 8));
- bytes[2] = @as(u8, @truncate(bits >> 16));
- bytes[3] = @as(u8, @truncate(bits >> 24));
- bytes[4] = @as(u8, @truncate(bits >> 32));
- bytes[5] = @as(u8, @truncate(bits >> 40));
- n += 6;
- if (n >= buffer_flush_size) {
- try self.write(self.bytes[0..n]);
- n = 0;
- }
- self.nbytes = n;
- }
- }
-
- pub fn writeBytes(self: *Self, bytes: []const u8) Error!void {
- if (self.err) {
- return;
- }
- var n = self.nbytes;
- if (self.nbits & 7 != 0) {
- self.err = true; // unfinished bits
- return;
- }
- while (self.nbits != 0) {
- self.bytes[n] = @as(u8, @truncate(self.bits));
- self.bits >>= 8;
- self.nbits -= 8;
- n += 1;
- }
- if (n != 0) {
- try self.write(self.bytes[0..n]);
- }
- self.nbytes = 0;
- try self.write(bytes);
- }
-
- // RFC 1951 3.2.7 specifies a special run-length encoding for specifying
- // the literal and offset lengths arrays (which are concatenated into a single
- // array). This method generates that run-length encoding.
- //
- // The result is written into the codegen array, and the frequencies
- // of each code is written into the codegen_freq array.
- // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
- // information. Code bad_code is an end marker
- //
- // num_literals: The number of literals in literal_encoding
- // num_offsets: The number of offsets in offset_encoding
- // lit_enc: The literal encoder to use
- // off_enc: The offset encoder to use
- fn generateCodegen(
- self: *Self,
- num_literals: u32,
- num_offsets: u32,
- lit_enc: *hm_code.HuffmanEncoder,
- off_enc: *hm_code.HuffmanEncoder,
- ) void {
- for (self.codegen_freq, 0..) |_, i| {
- self.codegen_freq[i] = 0;
- }
-
- // Note that we are using codegen both as a temporary variable for holding
- // a copy of the frequencies, and as the place where we put the result.
- // This is fine because the output is always shorter than the input used
- // so far.
- var codegen = self.codegen; // cache
- // Copy the concatenated code sizes to codegen. Put a marker at the end.
- var cgnl = codegen[0..num_literals];
- for (cgnl, 0..) |_, i| {
- cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len));
- }
-
- cgnl = codegen[num_literals .. num_literals + num_offsets];
- for (cgnl, 0..) |_, i| {
- cgnl[i] = @as(u8, @intCast(off_enc.codes[i].len));
- }
- codegen[num_literals + num_offsets] = bad_code;
-
- var size = codegen[0];
- var count: i32 = 1;
- var out_index: u32 = 0;
- var in_index: u32 = 1;
- while (size != bad_code) : (in_index += 1) {
- // INVARIANT: We have seen "count" copies of size that have not yet
- // had output generated for them.
- const next_size = codegen[in_index];
- if (next_size == size) {
- count += 1;
- continue;
- }
- // We need to generate codegen indicating "count" of size.
- if (size != 0) {
- codegen[out_index] = size;
- out_index += 1;
- self.codegen_freq[size] += 1;
- count -= 1;
- while (count >= 3) {
- var n: i32 = 6;
- if (n > count) {
- n = count;
- }
- codegen[out_index] = 16;
- out_index += 1;
- codegen[out_index] = @as(u8, @intCast(n - 3));
- out_index += 1;
- self.codegen_freq[16] += 1;
- count -= n;
- }
- } else {
- while (count >= 11) {
- var n: i32 = 138;
- if (n > count) {
- n = count;
- }
- codegen[out_index] = 18;
- out_index += 1;
- codegen[out_index] = @as(u8, @intCast(n - 11));
- out_index += 1;
- self.codegen_freq[18] += 1;
- count -= n;
- }
- if (count >= 3) {
- // 3 <= count <= 10
- codegen[out_index] = 17;
- out_index += 1;
- codegen[out_index] = @as(u8, @intCast(count - 3));
- out_index += 1;
- self.codegen_freq[17] += 1;
- count = 0;
- }
- }
- count -= 1;
- while (count >= 0) : (count -= 1) {
- codegen[out_index] = size;
- out_index += 1;
- self.codegen_freq[size] += 1;
- }
- // Set up invariant for next time through the loop.
- size = next_size;
- count = 1;
- }
- // Marker indicating the end of the codegen.
- codegen[out_index] = bad_code;
- }
-
- // dynamicSize returns the size of dynamically encoded data in bits.
- fn dynamicSize(
- self: *Self,
- lit_enc: *hm_code.HuffmanEncoder, // literal encoder
- off_enc: *hm_code.HuffmanEncoder, // offset encoder
- extra_bits: u32,
- ) DynamicSize {
- var num_codegens = self.codegen_freq.len;
- while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) {
- num_codegens -= 1;
- }
- const header = 3 + 5 + 5 + 4 + (3 * num_codegens) +
- self.codegen_encoding.bitLength(self.codegen_freq[0..]) +
- self.codegen_freq[16] * 2 +
- self.codegen_freq[17] * 3 +
- self.codegen_freq[18] * 7;
- const size = header +
- lit_enc.bitLength(self.literal_freq) +
- off_enc.bitLength(self.offset_freq) +
- extra_bits;
-
- return DynamicSize{
- .size = @as(u32, @intCast(size)),
- .num_codegens = @as(u32, @intCast(num_codegens)),
- };
- }
-
- // fixedSize returns the size of dynamically encoded data in bits.
- fn fixedSize(self: *Self, extra_bits: u32) u32 {
- return 3 +
- self.fixed_literal_encoding.bitLength(self.literal_freq) +
- self.fixed_offset_encoding.bitLength(self.offset_freq) +
- extra_bits;
- }
-
- // storedSizeFits calculates the stored size, including header.
- // The function returns the size in bits and whether the block
- // fits inside a single block.
- fn storedSizeFits(in: ?[]const u8) StoredSize {
- if (in == null) {
- return .{ .size = 0, .storable = false };
- }
- if (in.?.len <= deflate_const.max_store_block_size) {
- return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true };
- }
- return .{ .size = 0, .storable = false };
- }
-
- fn writeCode(self: *Self, c: hm_code.HuffCode) Error!void {
- if (self.err) {
- return;
- }
- self.bits |= @as(u64, @intCast(c.code)) << @as(u6, @intCast(self.nbits));
- self.nbits += @as(u32, @intCast(c.len));
- if (self.nbits >= 48) {
- const bits = self.bits;
- self.bits >>= 48;
- self.nbits -= 48;
- var n = self.nbytes;
- var bytes = self.bytes[n..][0..6];
- bytes[0] = @as(u8, @truncate(bits));
- bytes[1] = @as(u8, @truncate(bits >> 8));
- bytes[2] = @as(u8, @truncate(bits >> 16));
- bytes[3] = @as(u8, @truncate(bits >> 24));
- bytes[4] = @as(u8, @truncate(bits >> 32));
- bytes[5] = @as(u8, @truncate(bits >> 40));
- n += 6;
- if (n >= buffer_flush_size) {
- try self.write(self.bytes[0..n]);
- n = 0;
- }
- self.nbytes = n;
- }
- }
-
- // Write the header of a dynamic Huffman block to the output stream.
- //
- // num_literals: The number of literals specified in codegen
- // num_offsets: The number of offsets specified in codegen
- // num_codegens: The number of codegens used in codegen
- // is_eof: Is it the end-of-file? (end of stream)
- fn writeDynamicHeader(
- self: *Self,
- num_literals: u32,
- num_offsets: u32,
- num_codegens: u32,
- is_eof: bool,
- ) Error!void {
- if (self.err) {
- return;
- }
- var first_bits: u32 = 4;
- if (is_eof) {
- first_bits = 5;
- }
- try self.writeBits(first_bits, 3);
- try self.writeBits(@as(u32, @intCast(num_literals - 257)), 5);
- try self.writeBits(@as(u32, @intCast(num_offsets - 1)), 5);
- try self.writeBits(@as(u32, @intCast(num_codegens - 4)), 4);
-
- var i: u32 = 0;
- while (i < num_codegens) : (i += 1) {
- const value = @as(u32, @intCast(self.codegen_encoding.codes[codegen_order[i]].len));
- try self.writeBits(@as(u32, @intCast(value)), 3);
- }
-
- i = 0;
- while (true) {
- const code_word: u32 = @as(u32, @intCast(self.codegen[i]));
- i += 1;
- if (code_word == bad_code) {
- break;
- }
- try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]);
-
- switch (code_word) {
- 16 => {
- try self.writeBits(@as(u32, @intCast(self.codegen[i])), 2);
- i += 1;
- },
- 17 => {
- try self.writeBits(@as(u32, @intCast(self.codegen[i])), 3);
- i += 1;
- },
- 18 => {
- try self.writeBits(@as(u32, @intCast(self.codegen[i])), 7);
- i += 1;
- },
- else => {},
- }
- }
- }
-
- pub fn writeStoredHeader(self: *Self, length: usize, is_eof: bool) Error!void {
- if (self.err) {
- return;
- }
- var flag: u32 = 0;
- if (is_eof) {
- flag = 1;
- }
- try self.writeBits(flag, 3);
- try self.flush();
- try self.writeBits(@as(u32, @intCast(length)), 16);
- try self.writeBits(@as(u32, @intCast(~@as(u16, @intCast(length)))), 16);
- }
-
- fn writeFixedHeader(self: *Self, is_eof: bool) Error!void {
- if (self.err) {
- return;
- }
- // Indicate that we are a fixed Huffman block
- var value: u32 = 2;
- if (is_eof) {
- value = 3;
- }
- try self.writeBits(value, 3);
- }
-
- // Write a block of tokens with the smallest encoding.
- // The original input can be supplied, and if the huffman encoded data
- // is larger than the original bytes, the data will be written as a
- // stored block.
- // If the input is null, the tokens will always be Huffman encoded.
- pub fn writeBlock(
- self: *Self,
- tokens: []const token.Token,
- eof: bool,
- input: ?[]const u8,
- ) Error!void {
- if (self.err) {
- return;
- }
-
- const lit_and_off = self.indexTokens(tokens);
- const num_literals = lit_and_off.num_literals;
- const num_offsets = lit_and_off.num_offsets;
-
- var extra_bits: u32 = 0;
- const ret = storedSizeFits(input);
- const stored_size = ret.size;
- const storable = ret.storable;
-
- if (storable) {
- // We only bother calculating the costs of the extra bits required by
- // the length of offset fields (which will be the same for both fixed
- // and dynamic encoding), if we need to compare those two encodings
- // against stored encoding.
- var length_code: u32 = length_codes_start + 8;
- while (length_code < num_literals) : (length_code += 1) {
- // First eight length codes have extra size = 0.
- extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) *
- @as(u32, @intCast(length_extra_bits[length_code - length_codes_start]));
- }
- var offset_code: u32 = 4;
- while (offset_code < num_offsets) : (offset_code += 1) {
- // First four offset codes have extra size = 0.
- extra_bits += @as(u32, @intCast(self.offset_freq[offset_code])) *
- @as(u32, @intCast(offset_extra_bits[offset_code]));
- }
- }
-
- // Figure out smallest code.
- // Fixed Huffman baseline.
- var literal_encoding = &self.fixed_literal_encoding;
- var offset_encoding = &self.fixed_offset_encoding;
- var size = self.fixedSize(extra_bits);
-
- // Dynamic Huffman?
- var num_codegens: u32 = 0;
-
- // Generate codegen and codegenFrequencies, which indicates how to encode
- // the literal_encoding and the offset_encoding.
- self.generateCodegen(
- num_literals,
- num_offsets,
- &self.literal_encoding,
- &self.offset_encoding,
- );
- self.codegen_encoding.generate(self.codegen_freq[0..], 7);
- const dynamic_size = self.dynamicSize(
- &self.literal_encoding,
- &self.offset_encoding,
- extra_bits,
- );
- const dyn_size = dynamic_size.size;
- num_codegens = dynamic_size.num_codegens;
-
- if (dyn_size < size) {
- size = dyn_size;
- literal_encoding = &self.literal_encoding;
- offset_encoding = &self.offset_encoding;
- }
-
- // Stored bytes?
- if (storable and stored_size < size) {
- try self.writeStoredHeader(input.?.len, eof);
- try self.writeBytes(input.?);
- return;
- }
-
- // Huffman.
- if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) {
- try self.writeFixedHeader(eof);
- } else {
- try self.writeDynamicHeader(num_literals, num_offsets, num_codegens, eof);
- }
-
- // Write the tokens.
- try self.writeTokens(tokens, literal_encoding.codes, offset_encoding.codes);
- }
-
- // writeBlockDynamic encodes a block using a dynamic Huffman table.
- // This should be used if the symbols used have a disproportionate
- // histogram distribution.
- // If input is supplied and the compression savings are below 1/16th of the
- // input size the block is stored.
- pub fn writeBlockDynamic(
- self: *Self,
- tokens: []const token.Token,
- eof: bool,
- input: ?[]const u8,
- ) Error!void {
- if (self.err) {
- return;
- }
-
- const total_tokens = self.indexTokens(tokens);
- const num_literals = total_tokens.num_literals;
- const num_offsets = total_tokens.num_offsets;
-
- // Generate codegen and codegenFrequencies, which indicates how to encode
- // the literal_encoding and the offset_encoding.
- self.generateCodegen(
- num_literals,
- num_offsets,
- &self.literal_encoding,
- &self.offset_encoding,
- );
- self.codegen_encoding.generate(self.codegen_freq[0..], 7);
- const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.offset_encoding, 0);
- const size = dynamic_size.size;
- const num_codegens = dynamic_size.num_codegens;
-
- // Store bytes, if we don't get a reasonable improvement.
-
- const stored_size = storedSizeFits(input);
- const ssize = stored_size.size;
- const storable = stored_size.storable;
- if (storable and ssize < (size + (size >> 4))) {
- try self.writeStoredHeader(input.?.len, eof);
- try self.writeBytes(input.?);
- return;
- }
-
- // Write Huffman table.
- try self.writeDynamicHeader(num_literals, num_offsets, num_codegens, eof);
-
- // Write the tokens.
- try self.writeTokens(tokens, self.literal_encoding.codes, self.offset_encoding.codes);
- }
-
- const TotalIndexedTokens = struct {
- num_literals: u32,
- num_offsets: u32,
- };
-
- // Indexes a slice of tokens followed by an end_block_marker, and updates
- // literal_freq and offset_freq, and generates literal_encoding
- // and offset_encoding.
- // The number of literal and offset tokens is returned.
- fn indexTokens(self: *Self, tokens: []const token.Token) TotalIndexedTokens {
- var num_literals: u32 = 0;
- var num_offsets: u32 = 0;
-
- for (self.literal_freq, 0..) |_, i| {
- self.literal_freq[i] = 0;
- }
- for (self.offset_freq, 0..) |_, i| {
- self.offset_freq[i] = 0;
- }
-
- for (tokens) |t| {
- if (t < token.match_type) {
- self.literal_freq[token.literal(t)] += 1;
- continue;
- }
- const length = token.length(t);
- const offset = token.offset(t);
- self.literal_freq[length_codes_start + token.lengthCode(length)] += 1;
- self.offset_freq[token.offsetCode(offset)] += 1;
- }
- // add end_block_marker token at the end
- self.literal_freq[token.literal(deflate_const.end_block_marker)] += 1;
-
- // get the number of literals
- num_literals = @as(u32, @intCast(self.literal_freq.len));
- while (self.literal_freq[num_literals - 1] == 0) {
- num_literals -= 1;
- }
- // get the number of offsets
- num_offsets = @as(u32, @intCast(self.offset_freq.len));
- while (num_offsets > 0 and self.offset_freq[num_offsets - 1] == 0) {
- num_offsets -= 1;
- }
- if (num_offsets == 0) {
- // We haven't found a single match. If we want to go with the dynamic encoding,
- // we should count at least one offset to be sure that the offset huffman tree could be encoded.
- self.offset_freq[0] = 1;
- num_offsets = 1;
- }
- self.literal_encoding.generate(self.literal_freq, 15);
- self.offset_encoding.generate(self.offset_freq, 15);
- return TotalIndexedTokens{
- .num_literals = num_literals,
- .num_offsets = num_offsets,
- };
- }
-
- // Writes a slice of tokens to the output followed by and end_block_marker.
- // codes for literal and offset encoding must be supplied.
- fn writeTokens(
- self: *Self,
- tokens: []const token.Token,
- le_codes: []hm_code.HuffCode,
- oe_codes: []hm_code.HuffCode,
- ) Error!void {
- if (self.err) {
- return;
- }
- for (tokens) |t| {
- if (t < token.match_type) {
- try self.writeCode(le_codes[token.literal(t)]);
- continue;
- }
- // Write the length
- const length = token.length(t);
- const length_code = token.lengthCode(length);
- try self.writeCode(le_codes[length_code + length_codes_start]);
- const extra_length_bits = @as(u32, @intCast(length_extra_bits[length_code]));
- if (extra_length_bits > 0) {
- const extra_length = @as(u32, @intCast(length - length_base[length_code]));
- try self.writeBits(extra_length, extra_length_bits);
- }
- // Write the offset
- const offset = token.offset(t);
- const offset_code = token.offsetCode(offset);
- try self.writeCode(oe_codes[offset_code]);
- const extra_offset_bits = @as(u32, @intCast(offset_extra_bits[offset_code]));
- if (extra_offset_bits > 0) {
- const extra_offset = @as(u32, @intCast(offset - offset_base[offset_code]));
- try self.writeBits(extra_offset, extra_offset_bits);
- }
- }
- // add end_block_marker at the end
- try self.writeCode(le_codes[token.literal(deflate_const.end_block_marker)]);
- }
-
- // Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes
- // if the results only gains very little from compression.
- pub fn writeBlockHuff(self: *Self, eof: bool, input: []const u8) Error!void {
- if (self.err) {
- return;
- }
-
- // Clear histogram
- for (self.literal_freq, 0..) |_, i| {
- self.literal_freq[i] = 0;
- }
-
- // Add everything as literals
- histogram(input, &self.literal_freq);
-
- self.literal_freq[deflate_const.end_block_marker] = 1;
-
- const num_literals = deflate_const.end_block_marker + 1;
- self.offset_freq[0] = 1;
- const num_offsets = 1;
-
- self.literal_encoding.generate(self.literal_freq, 15);
-
- // Figure out smallest code.
- // Always use dynamic Huffman or Store
- var num_codegens: u32 = 0;
-
- // Generate codegen and codegenFrequencies, which indicates how to encode
- // the literal_encoding and the offset_encoding.
- self.generateCodegen(
- num_literals,
- num_offsets,
- &self.literal_encoding,
- &self.huff_offset,
- );
- self.codegen_encoding.generate(self.codegen_freq[0..], 7);
- const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_offset, 0);
- const size = dynamic_size.size;
- num_codegens = dynamic_size.num_codegens;
-
- // Store bytes, if we don't get a reasonable improvement.
-
- const stored_size_ret = storedSizeFits(input);
- const ssize = stored_size_ret.size;
- const storable = stored_size_ret.storable;
-
- if (storable and ssize < (size + (size >> 4))) {
- try self.writeStoredHeader(input.len, eof);
- try self.writeBytes(input);
- return;
- }
-
- // Huffman.
- try self.writeDynamicHeader(num_literals, num_offsets, num_codegens, eof);
- const encoding = self.literal_encoding.codes[0..257];
- var n = self.nbytes;
- for (input) |t| {
- // Bitwriting inlined, ~30% speedup
- const c = encoding[t];
- self.bits |= @as(u64, @intCast(c.code)) << @as(u6, @intCast(self.nbits));
- self.nbits += @as(u32, @intCast(c.len));
- if (self.nbits < 48) {
- continue;
- }
- // Store 6 bytes
- const bits = self.bits;
- self.bits >>= 48;
- self.nbits -= 48;
- var bytes = self.bytes[n..][0..6];
- bytes[0] = @as(u8, @truncate(bits));
- bytes[1] = @as(u8, @truncate(bits >> 8));
- bytes[2] = @as(u8, @truncate(bits >> 16));
- bytes[3] = @as(u8, @truncate(bits >> 24));
- bytes[4] = @as(u8, @truncate(bits >> 32));
- bytes[5] = @as(u8, @truncate(bits >> 40));
- n += 6;
- if (n < buffer_flush_size) {
- continue;
- }
- try self.write(self.bytes[0..n]);
- if (self.err) {
- return; // Return early in the event of write failures
- }
- n = 0;
- }
- self.nbytes = n;
- try self.writeCode(encoding[deflate_const.end_block_marker]);
- }
-
- pub fn deinit(self: *Self) void {
- self.allocator.free(self.literal_freq);
- self.allocator.free(self.offset_freq);
- self.allocator.free(self.codegen);
- self.literal_encoding.deinit();
- self.codegen_encoding.deinit();
- self.offset_encoding.deinit();
- self.fixed_literal_encoding.deinit();
- self.fixed_offset_encoding.deinit();
- self.huff_offset.deinit();
- }
- };
-}
-
-const DynamicSize = struct {
- size: u32,
- num_codegens: u32,
-};
-
-const StoredSize = struct {
- size: u32,
- storable: bool,
-};
-
-pub fn huffmanBitWriter(allocator: Allocator, writer: anytype) !HuffmanBitWriter(@TypeOf(writer)) {
- var offset_freq = [1]u16{0} ** deflate_const.offset_code_count;
- offset_freq[0] = 1;
- // huff_offset is a static offset encoder used for huffman only encoding.
- // It can be reused since we will not be encoding offset values.
- var huff_offset = try hm_code.newHuffmanEncoder(allocator, deflate_const.offset_code_count);
- huff_offset.generate(offset_freq[0..], 15);
-
- return HuffmanBitWriter(@TypeOf(writer)){
- .inner_writer = writer,
- .bytes_written = 0,
- .bits = 0,
- .nbits = 0,
- .nbytes = 0,
- .bytes = [1]u8{0} ** buffer_size,
- .codegen_freq = [1]u16{0} ** codegen_code_count,
- .literal_freq = try allocator.alloc(u16, deflate_const.max_num_lit),
- .offset_freq = try allocator.alloc(u16, deflate_const.offset_code_count),
- .codegen = try allocator.alloc(u8, deflate_const.max_num_lit + deflate_const.offset_code_count + 1),
- .literal_encoding = try hm_code.newHuffmanEncoder(allocator, deflate_const.max_num_lit),
- .codegen_encoding = try hm_code.newHuffmanEncoder(allocator, codegen_code_count),
- .offset_encoding = try hm_code.newHuffmanEncoder(allocator, deflate_const.offset_code_count),
- .allocator = allocator,
- .fixed_literal_encoding = try hm_code.generateFixedLiteralEncoding(allocator),
- .fixed_offset_encoding = try hm_code.generateFixedOffsetEncoding(allocator),
- .huff_offset = huff_offset,
- };
-}
-
-// histogram accumulates a histogram of b in h.
-//
-// h.len must be >= 256, and h's elements must be all zeroes.
-fn histogram(b: []const u8, h: *[]u16) void {
- var lh = h.*[0..256];
- for (b) |t| {
- lh[t] += 1;
- }
-}
-
-// tests
-const expect = std.testing.expect;
-const fmt = std.fmt;
-const math = std.math;
-const mem = std.mem;
-const testing = std.testing;
-
-const ArrayList = std.ArrayList;
-
-test "writeBlockHuff" {
- // Tests huffman encoding against reference files to detect possible regressions.
- // If encoding/bit allocation changes you can regenerate these files
-
- try testBlockHuff(
- "huffman-null-max.input",
- "huffman-null-max.golden",
- );
- try testBlockHuff(
- "huffman-pi.input",
- "huffman-pi.golden",
- );
- try testBlockHuff(
- "huffman-rand-1k.input",
- "huffman-rand-1k.golden",
- );
- try testBlockHuff(
- "huffman-rand-limit.input",
- "huffman-rand-limit.golden",
- );
- try testBlockHuff(
- "huffman-rand-max.input",
- "huffman-rand-max.golden",
- );
- try testBlockHuff(
- "huffman-shifts.input",
- "huffman-shifts.golden",
- );
- try testBlockHuff(
- "huffman-text.input",
- "huffman-text.golden",
- );
- try testBlockHuff(
- "huffman-text-shift.input",
- "huffman-text-shift.golden",
- );
- try testBlockHuff(
- "huffman-zero.input",
- "huffman-zero.golden",
- );
-}
-
-fn testBlockHuff(comptime in_name: []const u8, comptime want_name: []const u8) !void {
- const in: []const u8 = @embedFile("testdata/" ++ in_name);
- const want: []const u8 = @embedFile("testdata/" ++ want_name);
-
- var buf = ArrayList(u8).init(testing.allocator);
- defer buf.deinit();
- var bw = try huffmanBitWriter(testing.allocator, buf.writer());
- defer bw.deinit();
- try bw.writeBlockHuff(false, in);
- try bw.flush();
-
- try std.testing.expectEqualSlices(u8, want, buf.items);
-
- // Test if the writer produces the same output after reset.
- var buf_after_reset = ArrayList(u8).init(testing.allocator);
- defer buf_after_reset.deinit();
-
- bw.reset(buf_after_reset.writer());
-
- try bw.writeBlockHuff(false, in);
- try bw.flush();
-
- try std.testing.expectEqualSlices(u8, buf.items, buf_after_reset.items);
- try std.testing.expectEqualSlices(u8, want, buf_after_reset.items);
-
- try testWriterEOF(.write_huffman_block, &[0]token.Token{}, in);
-}
-
-const HuffTest = struct {
- tokens: []const token.Token,
- input: []const u8 = "", // File name of input data matching the tokens.
- want: []const u8 = "", // File name of data with the expected output with input available.
- want_no_input: []const u8 = "", // File name of the expected output when no input is available.
-};
-
-const ml = 0x7fc00000; // Maximum length token. Used to reduce the size of writeBlockTests
-
-const writeBlockTests = &[_]HuffTest{
- HuffTest{
- .input = "huffman-null-max.input",
- .want = "huffman-null-max.{s}.expect",
- .want_no_input = "huffman-null-max.{s}.expect-noinput",
- .tokens = &[_]token.Token{
- 0x0, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, 0x0, 0x0,
- },
- },
- HuffTest{
- .input = "huffman-pi.input",
- .want = "huffman-pi.{s}.expect",
- .want_no_input = "huffman-pi.{s}.expect-noinput",
- .tokens = &[_]token.Token{
- 0x33, 0x2e, 0x31, 0x34, 0x31, 0x35, 0x39,
- 0x32, 0x36, 0x35, 0x33, 0x35, 0x38, 0x39,
- 0x37, 0x39, 0x33, 0x32, 0x33, 0x38, 0x34,
- 0x36, 0x32, 0x36, 0x34, 0x33, 0x33, 0x38,
- 0x33, 0x32, 0x37, 0x39, 0x35, 0x30, 0x32,
- 0x38, 0x38, 0x34, 0x31, 0x39, 0x37, 0x31,
- 0x36, 0x39, 0x33, 0x39, 0x39, 0x33, 0x37,
- 0x35, 0x31, 0x30, 0x35, 0x38, 0x32, 0x30,
- 0x39, 0x37, 0x34, 0x39, 0x34, 0x34, 0x35,
- 0x39, 0x32, 0x33, 0x30, 0x37, 0x38, 0x31,
- 0x36, 0x34, 0x30, 0x36, 0x32, 0x38, 0x36,
- 0x32, 0x30, 0x38, 0x39, 0x39, 0x38, 0x36,
- 0x32, 0x38, 0x30, 0x33, 0x34, 0x38, 0x32,
- 0x35, 0x33, 0x34, 0x32, 0x31, 0x31, 0x37,
- 0x30, 0x36, 0x37, 0x39, 0x38, 0x32, 0x31,
- 0x34, 0x38, 0x30, 0x38, 0x36, 0x35, 0x31,
- 0x33, 0x32, 0x38, 0x32, 0x33, 0x30, 0x36,
- 0x36, 0x34, 0x37, 0x30, 0x39, 0x33, 0x38,
- 0x34, 0x34, 0x36, 0x30, 0x39, 0x35, 0x35,
- 0x30, 0x35, 0x38, 0x32, 0x32, 0x33, 0x31,
- 0x37, 0x32, 0x35, 0x33, 0x35, 0x39, 0x34,
- 0x30, 0x38, 0x31, 0x32, 0x38, 0x34, 0x38,
- 0x31, 0x31, 0x31, 0x37, 0x34, 0x4040007e, 0x34,
- 0x31, 0x30, 0x32, 0x37, 0x30, 0x31, 0x39,
- 0x33, 0x38, 0x35, 0x32, 0x31, 0x31, 0x30,
- 0x35, 0x35, 0x35, 0x39, 0x36, 0x34, 0x34,
- 0x36, 0x32, 0x32, 0x39, 0x34, 0x38, 0x39,
- 0x35, 0x34, 0x39, 0x33, 0x30, 0x33, 0x38,
- 0x31, 0x40400012, 0x32, 0x38, 0x38, 0x31, 0x30,
- 0x39, 0x37, 0x35, 0x36, 0x36, 0x35, 0x39,
- 0x33, 0x33, 0x34, 0x34, 0x36, 0x40400047, 0x37,
- 0x35, 0x36, 0x34, 0x38, 0x32, 0x33, 0x33,
- 0x37, 0x38, 0x36, 0x37, 0x38, 0x33, 0x31,
- 0x36, 0x35, 0x32, 0x37, 0x31, 0x32, 0x30,
- 0x31, 0x39, 0x30, 0x39, 0x31, 0x34, 0x4040001a,
- 0x35, 0x36, 0x36, 0x39, 0x32, 0x33, 0x34,
- 0x36, 0x404000b2, 0x36, 0x31, 0x30, 0x34, 0x35,
- 0x34, 0x33, 0x32, 0x36, 0x40400032, 0x31, 0x33,
- 0x33, 0x39, 0x33, 0x36, 0x30, 0x37, 0x32,
- 0x36, 0x30, 0x32, 0x34, 0x39, 0x31, 0x34,
- 0x31, 0x32, 0x37, 0x33, 0x37, 0x32, 0x34,
- 0x35, 0x38, 0x37, 0x30, 0x30, 0x36, 0x36,
- 0x30, 0x36, 0x33, 0x31, 0x35, 0x35, 0x38,
- 0x38, 0x31, 0x37, 0x34, 0x38, 0x38, 0x31,
- 0x35, 0x32, 0x30, 0x39, 0x32, 0x30, 0x39,
- 0x36, 0x32, 0x38, 0x32, 0x39, 0x32, 0x35,
- 0x34, 0x30, 0x39, 0x31, 0x37, 0x31, 0x35,
- 0x33, 0x36, 0x34, 0x33, 0x36, 0x37, 0x38,
- 0x39, 0x32, 0x35, 0x39, 0x30, 0x33, 0x36,
- 0x30, 0x30, 0x31, 0x31, 0x33, 0x33, 0x30,
- 0x35, 0x33, 0x30, 0x35, 0x34, 0x38, 0x38,
- 0x32, 0x30, 0x34, 0x36, 0x36, 0x35, 0x32,
- 0x31, 0x33, 0x38, 0x34, 0x31, 0x34, 0x36,
- 0x39, 0x35, 0x31, 0x39, 0x34, 0x31, 0x35,
- 0x31, 0x31, 0x36, 0x30, 0x39, 0x34, 0x33,
- 0x33, 0x30, 0x35, 0x37, 0x32, 0x37, 0x30,
- 0x33, 0x36, 0x35, 0x37, 0x35, 0x39, 0x35,
- 0x39, 0x31, 0x39, 0x35, 0x33, 0x30, 0x39,
- 0x32, 0x31, 0x38, 0x36, 0x31, 0x31, 0x37,
- 0x404000e9, 0x33, 0x32, 0x40400009, 0x39, 0x33, 0x31,
- 0x30, 0x35, 0x31, 0x31, 0x38, 0x35, 0x34,
- 0x38, 0x30, 0x37, 0x4040010e, 0x33, 0x37, 0x39,
- 0x39, 0x36, 0x32, 0x37, 0x34, 0x39, 0x35,
- 0x36, 0x37, 0x33, 0x35, 0x31, 0x38, 0x38,
- 0x35, 0x37, 0x35, 0x32, 0x37, 0x32, 0x34,
- 0x38, 0x39, 0x31, 0x32, 0x32, 0x37, 0x39,
- 0x33, 0x38, 0x31, 0x38, 0x33, 0x30, 0x31,
- 0x31, 0x39, 0x34, 0x39, 0x31, 0x32, 0x39,
- 0x38, 0x33, 0x33, 0x36, 0x37, 0x33, 0x33,
- 0x36, 0x32, 0x34, 0x34, 0x30, 0x36, 0x35,
- 0x36, 0x36, 0x34, 0x33, 0x30, 0x38, 0x36,
- 0x30, 0x32, 0x31, 0x33, 0x39, 0x34, 0x39,
- 0x34, 0x36, 0x33, 0x39, 0x35, 0x32, 0x32,
- 0x34, 0x37, 0x33, 0x37, 0x31, 0x39, 0x30,
- 0x37, 0x30, 0x32, 0x31, 0x37, 0x39, 0x38,
- 0x40800099, 0x37, 0x30, 0x32, 0x37, 0x37, 0x30,
- 0x35, 0x33, 0x39, 0x32, 0x31, 0x37, 0x31,
- 0x37, 0x36, 0x32, 0x39, 0x33, 0x31, 0x37,
- 0x36, 0x37, 0x35, 0x40800232, 0x37, 0x34, 0x38,
- 0x31, 0x40400006, 0x36, 0x36, 0x39, 0x34, 0x30,
- 0x404001e7, 0x30, 0x30, 0x30, 0x35, 0x36, 0x38,
- 0x31, 0x32, 0x37, 0x31, 0x34, 0x35, 0x32,
- 0x36, 0x33, 0x35, 0x36, 0x30, 0x38, 0x32,
- 0x37, 0x37, 0x38, 0x35, 0x37, 0x37, 0x31,
- 0x33, 0x34, 0x32, 0x37, 0x35, 0x37, 0x37,
- 0x38, 0x39, 0x36, 0x40400129, 0x33, 0x36, 0x33,
- 0x37, 0x31, 0x37, 0x38, 0x37, 0x32, 0x31,
- 0x34, 0x36, 0x38, 0x34, 0x34, 0x30, 0x39,
- 0x30, 0x31, 0x32, 0x32, 0x34, 0x39, 0x35,
- 0x33, 0x34, 0x33, 0x30, 0x31, 0x34, 0x36,
- 0x35, 0x34, 0x39, 0x35, 0x38, 0x35, 0x33,
- 0x37, 0x31, 0x30, 0x35, 0x30, 0x37, 0x39,
- 0x404000ca, 0x36, 0x40400153, 0x38, 0x39, 0x32, 0x33,
- 0x35, 0x34, 0x404001c9, 0x39, 0x35, 0x36, 0x31,
- 0x31, 0x32, 0x31, 0x32, 0x39, 0x30, 0x32,
- 0x31, 0x39, 0x36, 0x30, 0x38, 0x36, 0x34,
- 0x30, 0x33, 0x34, 0x34, 0x31, 0x38, 0x31,
- 0x35, 0x39, 0x38, 0x31, 0x33, 0x36, 0x32,
- 0x39, 0x37, 0x37, 0x34, 0x40400074, 0x30, 0x39,
- 0x39, 0x36, 0x30, 0x35, 0x31, 0x38, 0x37,
- 0x30, 0x37, 0x32, 0x31, 0x31, 0x33, 0x34,
- 0x39, 0x40800000, 0x38, 0x33, 0x37, 0x32, 0x39,
- 0x37, 0x38, 0x30, 0x34, 0x39, 0x39, 0x404002da,
- 0x39, 0x37, 0x33, 0x31, 0x37, 0x33, 0x32,
- 0x38, 0x4040018a, 0x36, 0x33, 0x31, 0x38, 0x35,
- 0x40400301, 0x404002e8, 0x34, 0x35, 0x35, 0x33, 0x34,
- 0x36, 0x39, 0x30, 0x38, 0x33, 0x30, 0x32,
- 0x36, 0x34, 0x32, 0x35, 0x32, 0x32, 0x33,
- 0x30, 0x404002e3, 0x40400267, 0x38, 0x35, 0x30, 0x33,
- 0x35, 0x32, 0x36, 0x31, 0x39, 0x33, 0x31,
- 0x31, 0x40400212, 0x31, 0x30, 0x31, 0x30, 0x30,
- 0x30, 0x33, 0x31, 0x33, 0x37, 0x38, 0x33,
- 0x38, 0x37, 0x35, 0x32, 0x38, 0x38, 0x36,
- 0x35, 0x38, 0x37, 0x35, 0x33, 0x33, 0x32,
- 0x30, 0x38, 0x33, 0x38, 0x31, 0x34, 0x32,
- 0x30, 0x36, 0x40400140, 0x4040012b, 0x31, 0x34, 0x37,
- 0x33, 0x30, 0x33, 0x35, 0x39, 0x4080032e, 0x39,
- 0x30, 0x34, 0x32, 0x38, 0x37, 0x35, 0x35,
- 0x34, 0x36, 0x38, 0x37, 0x33, 0x31, 0x31,
- 0x35, 0x39, 0x35, 0x40400355, 0x33, 0x38, 0x38,
- 0x32, 0x33, 0x35, 0x33, 0x37, 0x38, 0x37,
- 0x35, 0x4080037f, 0x39, 0x4040013a, 0x31, 0x40400148, 0x38,
- 0x30, 0x35, 0x33, 0x4040018a, 0x32, 0x32, 0x36,
- 0x38, 0x30, 0x36, 0x36, 0x31, 0x33, 0x30,
- 0x30, 0x31, 0x39, 0x32, 0x37, 0x38, 0x37,
- 0x36, 0x36, 0x31, 0x31, 0x31, 0x39, 0x35,
- 0x39, 0x40400237, 0x36, 0x40800124, 0x38, 0x39, 0x33,
- 0x38, 0x30, 0x39, 0x35, 0x32, 0x35, 0x37,
- 0x32, 0x30, 0x31, 0x30, 0x36, 0x35, 0x34,
- 0x38, 0x35, 0x38, 0x36, 0x33, 0x32, 0x37,
- 0x4040009a, 0x39, 0x33, 0x36, 0x31, 0x35, 0x33,
- 0x40400220, 0x4080015c, 0x32, 0x33, 0x30, 0x33, 0x30,
- 0x31, 0x39, 0x35, 0x32, 0x30, 0x33, 0x35,
- 0x33, 0x30, 0x31, 0x38, 0x35, 0x32, 0x40400171,
- 0x40400075, 0x33, 0x36, 0x32, 0x32, 0x35, 0x39,
- 0x39, 0x34, 0x31, 0x33, 0x40400254, 0x34, 0x39,
- 0x37, 0x32, 0x31, 0x37, 0x404000de, 0x33, 0x34,
- 0x37, 0x39, 0x31, 0x33, 0x31, 0x35, 0x31,
- 0x35, 0x35, 0x37, 0x34, 0x38, 0x35, 0x37,
- 0x32, 0x34, 0x32, 0x34, 0x35, 0x34, 0x31,
- 0x35, 0x30, 0x36, 0x39, 0x4040013f, 0x38, 0x32,
- 0x39, 0x35, 0x33, 0x33, 0x31, 0x31, 0x36,
- 0x38, 0x36, 0x31, 0x37, 0x32, 0x37, 0x38,
- 0x40400337, 0x39, 0x30, 0x37, 0x35, 0x30, 0x39,
- 0x4040010d, 0x37, 0x35, 0x34, 0x36, 0x33, 0x37,
- 0x34, 0x36, 0x34, 0x39, 0x33, 0x39, 0x33,
- 0x31, 0x39, 0x32, 0x35, 0x35, 0x30, 0x36,
- 0x30, 0x34, 0x30, 0x30, 0x39, 0x4040026b, 0x31,
- 0x36, 0x37, 0x31, 0x31, 0x33, 0x39, 0x30,
- 0x30, 0x39, 0x38, 0x40400335, 0x34, 0x30, 0x31,
- 0x32, 0x38, 0x35, 0x38, 0x33, 0x36, 0x31,
- 0x36, 0x30, 0x33, 0x35, 0x36, 0x33, 0x37,
- 0x30, 0x37, 0x36, 0x36, 0x30, 0x31, 0x30,
- 0x34, 0x40400172, 0x38, 0x31, 0x39, 0x34, 0x32,
- 0x39, 0x4080041e, 0x404000ef, 0x4040028b, 0x37, 0x38, 0x33,
- 0x37, 0x34, 0x404004a8, 0x38, 0x32, 0x35, 0x35,
- 0x33, 0x37, 0x40800209, 0x32, 0x36, 0x38, 0x4040002e,
- 0x34, 0x30, 0x34, 0x37, 0x404001d1, 0x34, 0x404004b5,
- 0x4040038d, 0x38, 0x34, 0x404003a8, 0x36, 0x40c0031f, 0x33,
- 0x33, 0x31, 0x33, 0x36, 0x37, 0x37, 0x30,
- 0x32, 0x38, 0x39, 0x38, 0x39, 0x31, 0x35,
- 0x32, 0x40400062, 0x35, 0x32, 0x31, 0x36, 0x32,
- 0x30, 0x35, 0x36, 0x39, 0x36, 0x40400411, 0x30,
- 0x35, 0x38, 0x40400477, 0x35, 0x40400498, 0x35, 0x31,
- 0x31, 0x40400209, 0x38, 0x32, 0x34, 0x33, 0x30,
- 0x30, 0x33, 0x35, 0x35, 0x38, 0x37, 0x36,
- 0x34, 0x30, 0x32, 0x34, 0x37, 0x34, 0x39,
- 0x36, 0x34, 0x37, 0x33, 0x32, 0x36, 0x33,
- 0x4040043e, 0x39, 0x39, 0x32, 0x4040044b, 0x34, 0x32,
- 0x36, 0x39, 0x40c002c5, 0x37, 0x404001d6, 0x34, 0x4040053d,
- 0x4040041d, 0x39, 0x33, 0x34, 0x31, 0x37, 0x404001ad,
- 0x31, 0x32, 0x4040002a, 0x34, 0x4040019e, 0x31, 0x35,
- 0x30, 0x33, 0x30, 0x32, 0x38, 0x36, 0x31,
- 0x38, 0x32, 0x39, 0x37, 0x34, 0x35, 0x35,
- 0x35, 0x37, 0x30, 0x36, 0x37, 0x34, 0x40400135,
- 0x35, 0x30, 0x35, 0x34, 0x39, 0x34, 0x35,
- 0x38, 0x404001c5, 0x39, 0x40400051, 0x35, 0x36, 0x404001ec,
- 0x37, 0x32, 0x31, 0x30, 0x37, 0x39, 0x40400159,
- 0x33, 0x30, 0x4040010a, 0x33, 0x32, 0x31, 0x31,
- 0x36, 0x35, 0x33, 0x34, 0x34, 0x39, 0x38,
- 0x37, 0x32, 0x30, 0x32, 0x37, 0x4040011b, 0x30,
- 0x32, 0x33, 0x36, 0x34, 0x4040022e, 0x35, 0x34,
- 0x39, 0x39, 0x31, 0x31, 0x39, 0x38, 0x40400418,
- 0x34, 0x4040011b, 0x35, 0x33, 0x35, 0x36, 0x36,
- 0x33, 0x36, 0x39, 0x40400450, 0x32, 0x36, 0x35,
- 0x404002e4, 0x37, 0x38, 0x36, 0x32, 0x35, 0x35,
- 0x31, 0x404003da, 0x31, 0x37, 0x35, 0x37, 0x34,
- 0x36, 0x37, 0x32, 0x38, 0x39, 0x30, 0x39,
- 0x37, 0x37, 0x37, 0x37, 0x40800453, 0x30, 0x30,
- 0x30, 0x404005fd, 0x37, 0x30, 0x404004df, 0x36, 0x404003e9,
- 0x34, 0x39, 0x31, 0x4040041e, 0x40400297, 0x32, 0x31,
- 0x34, 0x37, 0x37, 0x32, 0x33, 0x35, 0x30,
- 0x31, 0x34, 0x31, 0x34, 0x40400643, 0x33, 0x35,
- 0x36, 0x404004af, 0x31, 0x36, 0x31, 0x33, 0x36,
- 0x31, 0x31, 0x35, 0x37, 0x33, 0x35, 0x32,
- 0x35, 0x40400504, 0x33, 0x34, 0x4040005b, 0x31, 0x38,
- 0x4040047b, 0x38, 0x34, 0x404005e7, 0x33, 0x33, 0x32,
- 0x33, 0x39, 0x30, 0x37, 0x33, 0x39, 0x34,
- 0x31, 0x34, 0x33, 0x33, 0x33, 0x34, 0x35,
- 0x34, 0x37, 0x37, 0x36, 0x32, 0x34, 0x40400242,
- 0x32, 0x35, 0x31, 0x38, 0x39, 0x38, 0x33,
- 0x35, 0x36, 0x39, 0x34, 0x38, 0x35, 0x35,
- 0x36, 0x32, 0x30, 0x39, 0x39, 0x32, 0x31,
- 0x39, 0x32, 0x32, 0x32, 0x31, 0x38, 0x34,
- 0x32, 0x37, 0x4040023e, 0x32, 0x404000ba, 0x36, 0x38,
- 0x38, 0x37, 0x36, 0x37, 0x31, 0x37, 0x39,
- 0x30, 0x40400055, 0x30, 0x40800106, 0x36, 0x36, 0x404003e7,
- 0x38, 0x38, 0x36, 0x32, 0x37, 0x32, 0x404006dc,
- 0x31, 0x37, 0x38, 0x36, 0x30, 0x38, 0x35,
- 0x37, 0x40400073, 0x33, 0x408002fc, 0x37, 0x39, 0x37,
- 0x36, 0x36, 0x38, 0x31, 0x404002bd, 0x30, 0x30,
- 0x39, 0x35, 0x33, 0x38, 0x38, 0x40400638, 0x33,
- 0x404006a5, 0x30, 0x36, 0x38, 0x30, 0x30, 0x36,
- 0x34, 0x32, 0x32, 0x35, 0x31, 0x32, 0x35,
- 0x32, 0x4040057b, 0x37, 0x33, 0x39, 0x32, 0x40400297,
- 0x40400474, 0x34, 0x408006b3, 0x38, 0x36, 0x32, 0x36,
- 0x39, 0x34, 0x35, 0x404001e5, 0x34, 0x31, 0x39,
- 0x36, 0x35, 0x32, 0x38, 0x35, 0x30, 0x40400099,
- 0x4040039c, 0x31, 0x38, 0x36, 0x33, 0x404001be, 0x34,
- 0x40800154, 0x32, 0x30, 0x33, 0x39, 0x4040058b, 0x34,
- 0x35, 0x404002bc, 0x32, 0x33, 0x37, 0x4040042c, 0x36,
- 0x40400510, 0x35, 0x36, 0x40400638, 0x37, 0x31, 0x39,
- 0x31, 0x37, 0x32, 0x38, 0x40400171, 0x37, 0x36,
- 0x34, 0x36, 0x35, 0x37, 0x35, 0x37, 0x33,
- 0x39, 0x40400101, 0x33, 0x38, 0x39, 0x40400748, 0x38,
- 0x33, 0x32, 0x36, 0x34, 0x35, 0x39, 0x39,
- 0x35, 0x38, 0x404006a7, 0x30, 0x34, 0x37, 0x38,
- 0x404001de, 0x40400328, 0x39, 0x4040002d, 0x36, 0x34, 0x30,
- 0x37, 0x38, 0x39, 0x35, 0x31, 0x4040008e, 0x36,
- 0x38, 0x33, 0x4040012f, 0x32, 0x35, 0x39, 0x35,
- 0x37, 0x30, 0x40400468, 0x38, 0x32, 0x32, 0x404002c8,
- 0x32, 0x4040061b, 0x34, 0x30, 0x37, 0x37, 0x32,
- 0x36, 0x37, 0x31, 0x39, 0x34, 0x37, 0x38,
- 0x40400319, 0x38, 0x32, 0x36, 0x30, 0x31, 0x34,
- 0x37, 0x36, 0x39, 0x39, 0x30, 0x39, 0x404004e8,
- 0x30, 0x31, 0x33, 0x36, 0x33, 0x39, 0x34,
- 0x34, 0x33, 0x4040027f, 0x33, 0x30, 0x40400105, 0x32,
- 0x30, 0x33, 0x34, 0x39, 0x36, 0x32, 0x35,
- 0x32, 0x34, 0x35, 0x31, 0x37, 0x404003b5, 0x39,
- 0x36, 0x35, 0x31, 0x34, 0x33, 0x31, 0x34,
- 0x32, 0x39, 0x38, 0x30, 0x39, 0x31, 0x39,
- 0x30, 0x36, 0x35, 0x39, 0x32, 0x40400282, 0x37,
- 0x32, 0x32, 0x31, 0x36, 0x39, 0x36, 0x34,
- 0x36, 0x40400419, 0x4040007a, 0x35, 0x4040050e, 0x34, 0x40800565,
- 0x38, 0x40400559, 0x39, 0x37, 0x4040057b, 0x35, 0x34,
- 0x4040049d, 0x4040023e, 0x37, 0x4040065a, 0x38, 0x34, 0x36,
- 0x38, 0x31, 0x33, 0x4040008c, 0x36, 0x38, 0x33,
- 0x38, 0x36, 0x38, 0x39, 0x34, 0x32, 0x37,
- 0x37, 0x34, 0x31, 0x35, 0x35, 0x39, 0x39,
- 0x31, 0x38, 0x35, 0x4040005a, 0x32, 0x34, 0x35,
- 0x39, 0x35, 0x33, 0x39, 0x35, 0x39, 0x34,
- 0x33, 0x31, 0x404005b7, 0x37, 0x40400012, 0x36, 0x38,
- 0x30, 0x38, 0x34, 0x35, 0x404002e7, 0x37, 0x33,
- 0x4040081e, 0x39, 0x35, 0x38, 0x34, 0x38, 0x36,
- 0x35, 0x33, 0x38, 0x404006e8, 0x36, 0x32, 0x404000f2,
- 0x36, 0x30, 0x39, 0x404004b6, 0x36, 0x30, 0x38,
- 0x30, 0x35, 0x31, 0x32, 0x34, 0x33, 0x38,
- 0x38, 0x34, 0x4040013a, 0x4040000b, 0x34, 0x31, 0x33,
- 0x4040030f, 0x37, 0x36, 0x32, 0x37, 0x38, 0x40400341,
- 0x37, 0x31, 0x35, 0x4040059b, 0x33, 0x35, 0x39,
- 0x39, 0x37, 0x37, 0x30, 0x30, 0x31, 0x32,
- 0x39, 0x40400472, 0x38, 0x39, 0x34, 0x34, 0x31,
- 0x40400277, 0x36, 0x38, 0x35, 0x35, 0x4040005f, 0x34,
- 0x30, 0x36, 0x33, 0x404008e6, 0x32, 0x30, 0x37,
- 0x32, 0x32, 0x40400158, 0x40800203, 0x34, 0x38, 0x31,
- 0x35, 0x38, 0x40400205, 0x404001fe, 0x4040027a, 0x40400298, 0x33,
- 0x39, 0x34, 0x35, 0x32, 0x32, 0x36, 0x37,
- 0x40c00496, 0x38, 0x4040058a, 0x32, 0x31, 0x404002ea, 0x32,
- 0x40400387, 0x35, 0x34, 0x36, 0x36, 0x36, 0x4040051b,
- 0x32, 0x33, 0x39, 0x38, 0x36, 0x34, 0x35,
- 0x36, 0x404004c4, 0x31, 0x36, 0x33, 0x35, 0x40800253,
- 0x40400811, 0x37, 0x404008ad, 0x39, 0x38, 0x4040045e, 0x39,
- 0x33, 0x36, 0x33, 0x34, 0x4040075b, 0x37, 0x34,
- 0x33, 0x32, 0x34, 0x4040047b, 0x31, 0x35, 0x30,
- 0x37, 0x36, 0x404004bb, 0x37, 0x39, 0x34, 0x35,
- 0x31, 0x30, 0x39, 0x4040003e, 0x30, 0x39, 0x34,
- 0x30, 0x404006a6, 0x38, 0x38, 0x37, 0x39, 0x37,
- 0x31, 0x30, 0x38, 0x39, 0x33, 0x404008f0, 0x36,
- 0x39, 0x31, 0x33, 0x36, 0x38, 0x36, 0x37,
- 0x32, 0x4040025b, 0x404001fe, 0x35, 0x4040053f, 0x40400468, 0x40400801,
- 0x31, 0x37, 0x39, 0x32, 0x38, 0x36, 0x38,
- 0x404008cc, 0x38, 0x37, 0x34, 0x37, 0x4080079e, 0x38,
- 0x32, 0x34, 0x4040097a, 0x38, 0x4040025b, 0x37, 0x31,
- 0x34, 0x39, 0x30, 0x39, 0x36, 0x37, 0x35,
- 0x39, 0x38, 0x404006ef, 0x33, 0x36, 0x35, 0x40400134,
- 0x38, 0x31, 0x4040005c, 0x40400745, 0x40400936, 0x36, 0x38,
- 0x32, 0x39, 0x4040057e, 0x38, 0x37, 0x32, 0x32,
- 0x36, 0x35, 0x38, 0x38, 0x30, 0x40400611, 0x35,
- 0x40400249, 0x34, 0x32, 0x37, 0x30, 0x34, 0x37,
- 0x37, 0x35, 0x35, 0x4040081e, 0x33, 0x37, 0x39,
- 0x36, 0x34, 0x31, 0x34, 0x35, 0x31, 0x35,
- 0x32, 0x404005fd, 0x32, 0x33, 0x34, 0x33, 0x36,
- 0x34, 0x35, 0x34, 0x404005de, 0x34, 0x34, 0x34,
- 0x37, 0x39, 0x35, 0x4040003c, 0x40400523, 0x408008e6, 0x34,
- 0x31, 0x4040052a, 0x33, 0x40400304, 0x35, 0x32, 0x33,
- 0x31, 0x40800841, 0x31, 0x36, 0x36, 0x31, 0x404008b2,
- 0x35, 0x39, 0x36, 0x39, 0x35, 0x33, 0x36,
- 0x32, 0x33, 0x31, 0x34, 0x404005ff, 0x32, 0x34,
- 0x38, 0x34, 0x39, 0x33, 0x37, 0x31, 0x38,
- 0x37, 0x31, 0x31, 0x30, 0x31, 0x34, 0x35,
- 0x37, 0x36, 0x35, 0x34, 0x40400761, 0x30, 0x32,
- 0x37, 0x39, 0x39, 0x33, 0x34, 0x34, 0x30,
- 0x33, 0x37, 0x34, 0x32, 0x30, 0x30, 0x37,
- 0x4040093f, 0x37, 0x38, 0x35, 0x33, 0x39, 0x30,
- 0x36, 0x32, 0x31, 0x39, 0x40800299, 0x40400345, 0x38,
- 0x34, 0x37, 0x408003d2, 0x38, 0x33, 0x33, 0x32,
- 0x31, 0x34, 0x34, 0x35, 0x37, 0x31, 0x40400284,
- 0x40400776, 0x34, 0x33, 0x35, 0x30, 0x40400928, 0x40400468,
- 0x35, 0x33, 0x31, 0x39, 0x31, 0x30, 0x34,
- 0x38, 0x34, 0x38, 0x31, 0x30, 0x30, 0x35,
- 0x33, 0x37, 0x30, 0x36, 0x404008bc, 0x4080059d, 0x40800781,
- 0x31, 0x40400559, 0x37, 0x4040031b, 0x35, 0x404007ec, 0x4040040c,
- 0x36, 0x33, 0x408007dc, 0x34, 0x40400971, 0x4080034e, 0x408003f5,
- 0x38, 0x4080052d, 0x40800887, 0x39, 0x40400187, 0x39, 0x31,
- 0x404008ce, 0x38, 0x31, 0x34, 0x36, 0x37, 0x35,
- 0x31, 0x4040062b, 0x31, 0x32, 0x33, 0x39, 0x40c001a9,
- 0x39, 0x30, 0x37, 0x31, 0x38, 0x36, 0x34,
- 0x39, 0x34, 0x32, 0x33, 0x31, 0x39, 0x36,
- 0x31, 0x35, 0x36, 0x404001ec, 0x404006bc, 0x39, 0x35,
- 0x40400926, 0x40400469, 0x4040011b, 0x36, 0x30, 0x33, 0x38,
- 0x40400a25, 0x4040016f, 0x40400384, 0x36, 0x32, 0x4040045a, 0x35,
- 0x4040084c, 0x36, 0x33, 0x38, 0x39, 0x33, 0x37,
- 0x37, 0x38, 0x37, 0x404008c5, 0x404000f8, 0x39, 0x37,
- 0x39, 0x32, 0x30, 0x37, 0x37, 0x33, 0x404005d7,
- 0x32, 0x31, 0x38, 0x32, 0x35, 0x36, 0x404007df,
- 0x36, 0x36, 0x404006d6, 0x34, 0x32, 0x4080067e, 0x36,
- 0x404006e6, 0x34, 0x34, 0x40400024, 0x35, 0x34, 0x39,
- 0x32, 0x30, 0x32, 0x36, 0x30, 0x35, 0x40400ab3,
- 0x408003e4, 0x32, 0x30, 0x31, 0x34, 0x39, 0x404004d2,
- 0x38, 0x35, 0x30, 0x37, 0x33, 0x40400599, 0x36,
- 0x36, 0x36, 0x30, 0x40400194, 0x32, 0x34, 0x33,
- 0x34, 0x30, 0x40400087, 0x30, 0x4040076b, 0x38, 0x36,
- 0x33, 0x40400956, 0x404007e4, 0x4040042b, 0x40400174, 0x35, 0x37,
- 0x39, 0x36, 0x32, 0x36, 0x38, 0x35, 0x36,
- 0x40400140, 0x35, 0x30, 0x38, 0x40400523, 0x35, 0x38,
- 0x37, 0x39, 0x36, 0x39, 0x39, 0x40400711, 0x35,
- 0x37, 0x34, 0x40400a18, 0x38, 0x34, 0x30, 0x404008b3,
- 0x31, 0x34, 0x35, 0x39, 0x31, 0x4040078c, 0x37,
- 0x30, 0x40400234, 0x30, 0x31, 0x40400be7, 0x31, 0x32,
- 0x40400c74, 0x30, 0x404003c3, 0x33, 0x39, 0x40400b2a, 0x40400112,
- 0x37, 0x31, 0x35, 0x404003b0, 0x34, 0x32, 0x30,
- 0x40800bf2, 0x39, 0x40400bc2, 0x30, 0x37, 0x40400341, 0x40400795,
- 0x40400aaf, 0x40400c62, 0x32, 0x31, 0x40400960, 0x32, 0x35,
- 0x31, 0x4040057b, 0x40400944, 0x39, 0x32, 0x404001b2, 0x38,
- 0x32, 0x36, 0x40400b66, 0x32, 0x40400278, 0x33, 0x32,
- 0x31, 0x35, 0x37, 0x39, 0x31, 0x39, 0x38,
- 0x34, 0x31, 0x34, 0x4080087b, 0x39, 0x31, 0x36,
- 0x34, 0x408006e8, 0x39, 0x40800b58, 0x404008db, 0x37, 0x32,
- 0x32, 0x40400321, 0x35, 0x404008a4, 0x40400141, 0x39, 0x31,
- 0x30, 0x404000bc, 0x40400c5b, 0x35, 0x32, 0x38, 0x30,
- 0x31, 0x37, 0x40400231, 0x37, 0x31, 0x32, 0x40400914,
- 0x38, 0x33, 0x32, 0x40400373, 0x31, 0x40400589, 0x30,
- 0x39, 0x33, 0x35, 0x33, 0x39, 0x36, 0x35,
- 0x37, 0x4040064b, 0x31, 0x30, 0x38, 0x33, 0x40400069,
- 0x35, 0x31, 0x4040077a, 0x40400d5a, 0x31, 0x34, 0x34,
- 0x34, 0x32, 0x31, 0x30, 0x30, 0x40400202, 0x30,
- 0x33, 0x4040019c, 0x31, 0x31, 0x30, 0x33, 0x40400c81,
- 0x40400009, 0x40400026, 0x40c00602, 0x35, 0x31, 0x36, 0x404005d9,
- 0x40800883, 0x4040092a, 0x35, 0x40800c42, 0x38, 0x35, 0x31,
- 0x37, 0x31, 0x34, 0x33, 0x37, 0x40400605, 0x4040006d,
- 0x31, 0x35, 0x35, 0x36, 0x35, 0x30, 0x38,
- 0x38, 0x404003b9, 0x39, 0x38, 0x39, 0x38, 0x35,
- 0x39, 0x39, 0x38, 0x32, 0x33, 0x38, 0x404001cf,
- 0x404009ba, 0x33, 0x4040016c, 0x4040043e, 0x404009c3, 0x38, 0x40800e05,
- 0x33, 0x32, 0x40400107, 0x35, 0x40400305, 0x33, 0x404001ca,
- 0x39, 0x4040041b, 0x39, 0x38, 0x4040087d, 0x34, 0x40400cb8,
- 0x37, 0x4040064b, 0x30, 0x37, 0x404000e5, 0x34, 0x38,
- 0x31, 0x34, 0x31, 0x40400539, 0x38, 0x35, 0x39,
- 0x34, 0x36, 0x31, 0x40400bc9, 0x38, 0x30,
- },
- },
- HuffTest{
- .input = "huffman-rand-1k.input",
- .want = "huffman-rand-1k.{s}.expect",
- .want_no_input = "huffman-rand-1k.{s}.expect-noinput",
- .tokens = &[_]token.Token{
- 0xf8, 0x8b, 0x96, 0x76, 0x48, 0xd, 0x85, 0x94, 0x25, 0x80, 0xaf, 0xc2, 0xfe, 0x8d,
- 0xe8, 0x20, 0xeb, 0x17, 0x86, 0xc9, 0xb7, 0xc5, 0xde, 0x6, 0xea, 0x7d, 0x18, 0x8b,
- 0xe7, 0x3e, 0x7, 0xda, 0xdf, 0xff, 0x6c, 0x73, 0xde, 0xcc, 0xe7, 0x6d, 0x8d, 0x4,
- 0x19, 0x49, 0x7f, 0x47, 0x1f, 0x48, 0x15, 0xb0, 0xe8, 0x9e, 0xf2, 0x31, 0x59, 0xde,
- 0x34, 0xb4, 0x5b, 0xe5, 0xe0, 0x9, 0x11, 0x30, 0xc2, 0x88, 0x5b, 0x7c, 0x5d, 0x14,
- 0x13, 0x6f, 0x23, 0xa9, 0xd, 0xbc, 0x2d, 0x23, 0xbe, 0xd9, 0xed, 0x75, 0x4, 0x6c,
- 0x99, 0xdf, 0xfd, 0x70, 0x66, 0xe6, 0xee, 0xd9, 0xb1, 0x9e, 0x6e, 0x83, 0x59, 0xd5,
- 0xd4, 0x80, 0x59, 0x98, 0x77, 0x89, 0x43, 0x38, 0xc9, 0xaf, 0x30, 0x32, 0x9a, 0x20,
- 0x1b, 0x46, 0x3d, 0x67, 0x6e, 0xd7, 0x72, 0x9e, 0x4e, 0x21, 0x4f, 0xc6, 0xe0, 0xd4,
- 0x7b, 0x4, 0x8d, 0xa5, 0x3, 0xf6, 0x5, 0x9b, 0x6b, 0xdc, 0x2a, 0x93, 0x77, 0x28,
- 0xfd, 0xb4, 0x62, 0xda, 0x20, 0xe7, 0x1f, 0xab, 0x6b, 0x51, 0x43, 0x39, 0x2f, 0xa0,
- 0x92, 0x1, 0x6c, 0x75, 0x3e, 0xf4, 0x35, 0xfd, 0x43, 0x2e, 0xf7, 0xa4, 0x75, 0xda,
- 0xea, 0x9b, 0xa, 0x64, 0xb, 0xe0, 0x23, 0x29, 0xbd, 0xf7, 0xe7, 0x83, 0x3c, 0xfb,
- 0xdf, 0xb3, 0xae, 0x4f, 0xa4, 0x47, 0x55, 0x99, 0xde, 0x2f, 0x96, 0x6e, 0x1c, 0x43,
- 0x4c, 0x87, 0xe2, 0x7c, 0xd9, 0x5f, 0x4c, 0x7c, 0xe8, 0x90, 0x3, 0xdb, 0x30, 0x95,
- 0xd6, 0x22, 0xc, 0x47, 0xb8, 0x4d, 0x6b, 0xbd, 0x24, 0x11, 0xab, 0x2c, 0xd7, 0xbe,
- 0x6e, 0x7a, 0xd6, 0x8, 0xa3, 0x98, 0xd8, 0xdd, 0x15, 0x6a, 0xfa, 0x93, 0x30, 0x1,
- 0x25, 0x1d, 0xa2, 0x74, 0x86, 0x4b, 0x6a, 0x95, 0xe8, 0xe1, 0x4e, 0xe, 0x76, 0xb9,
- 0x49, 0xa9, 0x5f, 0xa0, 0xa6, 0x63, 0x3c, 0x7e, 0x7e, 0x20, 0x13, 0x4f, 0xbb, 0x66,
- 0x92, 0xb8, 0x2e, 0xa4, 0xfa, 0x48, 0xcb, 0xae, 0xb9, 0x3c, 0xaf, 0xd3, 0x1f, 0xe1,
- 0xd5, 0x8d, 0x42, 0x6d, 0xf0, 0xfc, 0x8c, 0xc, 0x0, 0xde, 0x40, 0xab, 0x8b, 0x47,
- 0x97, 0x4e, 0xa8, 0xcf, 0x8e, 0xdb, 0xa6, 0x8b, 0x20, 0x9, 0x84, 0x7a, 0x66, 0xe5,
- 0x98, 0x29, 0x2, 0x95, 0xe6, 0x38, 0x32, 0x60, 0x3, 0xe3, 0x9a, 0x1e, 0x54, 0xe8,
- 0x63, 0x80, 0x48, 0x9c, 0xe7, 0x63, 0x33, 0x6e, 0xa0, 0x65, 0x83, 0xfa, 0xc6, 0xba,
- 0x7a, 0x43, 0x71, 0x5, 0xf5, 0x68, 0x69, 0x85, 0x9c, 0xba, 0x45, 0xcd, 0x6b, 0xb,
- 0x19, 0xd1, 0xbb, 0x7f, 0x70, 0x85, 0x92, 0xd1, 0xb4, 0x64, 0x82, 0xb1, 0xe4, 0x62,
- 0xc5, 0x3c, 0x46, 0x1f, 0x92, 0x31, 0x1c, 0x4e, 0x41, 0x77, 0xf7, 0xe7, 0x87, 0xa2,
- 0xf, 0x6e, 0xe8, 0x92, 0x3, 0x6b, 0xa, 0xe7, 0xa9, 0x3b, 0x11, 0xda, 0x66, 0x8a,
- 0x29, 0xda, 0x79, 0xe1, 0x64, 0x8d, 0xe3, 0x54, 0xd4, 0xf5, 0xef, 0x64, 0x87, 0x3b,
- 0xf4, 0xc2, 0xf4, 0x71, 0x13, 0xa9, 0xe9, 0xe0, 0xa2, 0x6, 0x14, 0xab, 0x5d, 0xa7,
- 0x96, 0x0, 0xd6, 0xc3, 0xcc, 0x57, 0xed, 0x39, 0x6a, 0x25, 0xcd, 0x76, 0xea, 0xba,
- 0x3a, 0xf2, 0xa1, 0x95, 0x5d, 0xe5, 0x71, 0xcf, 0x9c, 0x62, 0x9e, 0x6a, 0xfa, 0xd5,
- 0x31, 0xd1, 0xa8, 0x66, 0x30, 0x33, 0xaa, 0x51, 0x17, 0x13, 0x82, 0x99, 0xc8, 0x14,
- 0x60, 0x9f, 0x4d, 0x32, 0x6d, 0xda, 0x19, 0x26, 0x21, 0xdc, 0x7e, 0x2e, 0x25, 0x67,
- 0x72, 0xca, 0xf, 0x92, 0xcd, 0xf6, 0xd6, 0xcb, 0x97, 0x8a, 0x33, 0x58, 0x73, 0x70,
- 0x91, 0x1d, 0xbf, 0x28, 0x23, 0xa3, 0xc, 0xf1, 0x83, 0xc3, 0xc8, 0x56, 0x77, 0x68,
- 0xe3, 0x82, 0xba, 0xb9, 0x57, 0x56, 0x57, 0x9c, 0xc3, 0xd6, 0x14, 0x5, 0x3c, 0xb1,
- 0xaf, 0x93, 0xc8, 0x8a, 0x57, 0x7f, 0x53, 0xfa, 0x2f, 0xaa, 0x6e, 0x66, 0x83, 0xfa,
- 0x33, 0xd1, 0x21, 0xab, 0x1b, 0x71, 0xb4, 0x7c, 0xda, 0xfd, 0xfb, 0x7f, 0x20, 0xab,
- 0x5e, 0xd5, 0xca, 0xfd, 0xdd, 0xe0, 0xee, 0xda, 0xba, 0xa8, 0x27, 0x99, 0x97, 0x69,
- 0xc1, 0x3c, 0x82, 0x8c, 0xa, 0x5c, 0x2d, 0x5b, 0x88, 0x3e, 0x34, 0x35, 0x86, 0x37,
- 0x46, 0x79, 0xe1, 0xaa, 0x19, 0xfb, 0xaa, 0xde, 0x15, 0x9, 0xd, 0x1a, 0x57, 0xff,
- 0xb5, 0xf, 0xf3, 0x2b, 0x5a, 0x6a, 0x4d, 0x19, 0x77, 0x71, 0x45, 0xdf, 0x4f, 0xb3,
- 0xec, 0xf1, 0xeb, 0x18, 0x53, 0x3e, 0x3b, 0x47, 0x8, 0x9a, 0x73, 0xa0, 0x5c, 0x8c,
- 0x5f, 0xeb, 0xf, 0x3a, 0xc2, 0x43, 0x67, 0xb4, 0x66, 0x67, 0x80, 0x58, 0xe, 0xc1,
- 0xec, 0x40, 0xd4, 0x22, 0x94, 0xca, 0xf9, 0xe8, 0x92, 0xe4, 0x69, 0x38, 0xbe, 0x67,
- 0x64, 0xca, 0x50, 0xc7, 0x6, 0x67, 0x42, 0x6e, 0xa3, 0xf0, 0xb7, 0x6c, 0xf2, 0xe8,
- 0x5f, 0xb1, 0xaf, 0xe7, 0xdb, 0xbb, 0x77, 0xb5, 0xf8, 0xcb, 0x8, 0xc4, 0x75, 0x7e,
- 0xc0, 0xf9, 0x1c, 0x7f, 0x3c, 0x89, 0x2f, 0xd2, 0x58, 0x3a, 0xe2, 0xf8, 0x91, 0xb6,
- 0x7b, 0x24, 0x27, 0xe9, 0xae, 0x84, 0x8b, 0xde, 0x74, 0xac, 0xfd, 0xd9, 0xb7, 0x69,
- 0x2a, 0xec, 0x32, 0x6f, 0xf0, 0x92, 0x84, 0xf1, 0x40, 0xc, 0x8a, 0xbc, 0x39, 0x6e,
- 0x2e, 0x73, 0xd4, 0x6e, 0x8a, 0x74, 0x2a, 0xdc, 0x60, 0x1f, 0xa3, 0x7, 0xde, 0x75,
- 0x8b, 0x74, 0xc8, 0xfe, 0x63, 0x75, 0xf6, 0x3d, 0x63, 0xac, 0x33, 0x89, 0xc3, 0xf0,
- 0xf8, 0x2d, 0x6b, 0xb4, 0x9e, 0x74, 0x8b, 0x5c, 0x33, 0xb4, 0xca, 0xa8, 0xe4, 0x99,
- 0xb6, 0x90, 0xa1, 0xef, 0xf, 0xd3, 0x61, 0xb2, 0xc6, 0x1a, 0x94, 0x7c, 0x44, 0x55,
- 0xf4, 0x45, 0xff, 0x9e, 0xa5, 0x5a, 0xc6, 0xa0, 0xe8, 0x2a, 0xc1, 0x8d, 0x6f, 0x34,
- 0x11, 0xb9, 0xbe, 0x4e, 0xd9, 0x87, 0x97, 0x73, 0xcf, 0x3d, 0x23, 0xae, 0xd5, 0x1a,
- 0x5e, 0xae, 0x5d, 0x6a, 0x3, 0xf9, 0x22, 0xd, 0x10, 0xd9, 0x47, 0x69, 0x15, 0x3f,
- 0xee, 0x52, 0xa3, 0x8, 0xd2, 0x3c, 0x51, 0xf4, 0xf8, 0x9d, 0xe4, 0x98, 0x89, 0xc8,
- 0x67, 0x39, 0xd5, 0x5e, 0x35, 0x78, 0x27, 0xe8, 0x3c, 0x80, 0xae, 0x79, 0x71, 0xd2,
- 0x93, 0xf4, 0xaa, 0x51, 0x12, 0x1c, 0x4b, 0x1b, 0xe5, 0x6e, 0x15, 0x6f, 0xe4, 0xbb,
- 0x51, 0x9b, 0x45, 0x9f, 0xf9, 0xc4, 0x8c, 0x2a, 0xfb, 0x1a, 0xdf, 0x55, 0xd3, 0x48,
- 0x93, 0x27, 0x1, 0x26, 0xc2, 0x6b, 0x55, 0x6d, 0xa2, 0xfb, 0x84, 0x8b, 0xc9, 0x9e,
- 0x28, 0xc2, 0xef, 0x1a, 0x24, 0xec, 0x9b, 0xae, 0xbd, 0x60, 0xe9, 0x15, 0x35, 0xee,
- 0x42, 0xa4, 0x33, 0x5b, 0xfa, 0xf, 0xb6, 0xf7, 0x1, 0xa6, 0x2, 0x4c, 0xca, 0x90,
- 0x58, 0x3a, 0x96, 0x41, 0xe7, 0xcb, 0x9, 0x8c, 0xdb, 0x85, 0x4d, 0xa8, 0x89, 0xf3,
- 0xb5, 0x8e, 0xfd, 0x75, 0x5b, 0x4f, 0xed, 0xde, 0x3f, 0xeb, 0x38, 0xa3, 0xbe, 0xb0,
- 0x73, 0xfc, 0xb8, 0x54, 0xf7, 0x4c, 0x30, 0x67, 0x2e, 0x38, 0xa2, 0x54, 0x18, 0xba,
- 0x8, 0xbf, 0xf2, 0x39, 0xd5, 0xfe, 0xa5, 0x41, 0xc6, 0x66, 0x66, 0xba, 0x81, 0xef,
- 0x67, 0xe4, 0xe6, 0x3c, 0xc, 0xca, 0xa4, 0xa, 0x79, 0xb3, 0x57, 0x8b, 0x8a, 0x75,
- 0x98, 0x18, 0x42, 0x2f, 0x29, 0xa3, 0x82, 0xef, 0x9f, 0x86, 0x6, 0x23, 0xe1, 0x75,
- 0xfa, 0x8, 0xb1, 0xde, 0x17, 0x4a,
- },
- },
- HuffTest{
- .input = "huffman-rand-limit.input",
- .want = "huffman-rand-limit.{s}.expect",
- .want_no_input = "huffman-rand-limit.{s}.expect-noinput",
- .tokens = &[_]token.Token{
- 0x61, 0x51c00000, 0xa, 0xf8, 0x8b, 0x96, 0x76, 0x48, 0xa, 0x85, 0x94, 0x25, 0x80,
- 0xaf, 0xc2, 0xfe, 0x8d, 0xe8, 0x20, 0xeb, 0x17, 0x86, 0xc9, 0xb7, 0xc5, 0xde,
- 0x6, 0xea, 0x7d, 0x18, 0x8b, 0xe7, 0x3e, 0x7, 0xda, 0xdf, 0xff, 0x6c, 0x73,
- 0xde, 0xcc, 0xe7, 0x6d, 0x8d, 0x4, 0x19, 0x49, 0x7f, 0x47, 0x1f, 0x48, 0x15,
- 0xb0, 0xe8, 0x9e, 0xf2, 0x31, 0x59, 0xde, 0x34, 0xb4, 0x5b, 0xe5, 0xe0, 0x9,
- 0x11, 0x30, 0xc2, 0x88, 0x5b, 0x7c, 0x5d, 0x14, 0x13, 0x6f, 0x23, 0xa9, 0xa,
- 0xbc, 0x2d, 0x23, 0xbe, 0xd9, 0xed, 0x75, 0x4, 0x6c, 0x99, 0xdf, 0xfd, 0x70,
- 0x66, 0xe6, 0xee, 0xd9, 0xb1, 0x9e, 0x6e, 0x83, 0x59, 0xd5, 0xd4, 0x80, 0x59,
- 0x98, 0x77, 0x89, 0x43, 0x38, 0xc9, 0xaf, 0x30, 0x32, 0x9a, 0x20, 0x1b, 0x46,
- 0x3d, 0x67, 0x6e, 0xd7, 0x72, 0x9e, 0x4e, 0x21, 0x4f, 0xc6, 0xe0, 0xd4, 0x7b,
- 0x4, 0x8d, 0xa5, 0x3, 0xf6, 0x5, 0x9b, 0x6b, 0xdc, 0x2a, 0x93, 0x77, 0x28,
- 0xfd, 0xb4, 0x62, 0xda, 0x20, 0xe7, 0x1f, 0xab, 0x6b, 0x51, 0x43, 0x39, 0x2f,
- 0xa0, 0x92, 0x1, 0x6c, 0x75, 0x3e, 0xf4, 0x35, 0xfd, 0x43, 0x2e, 0xf7, 0xa4,
- 0x75, 0xda, 0xea, 0x9b, 0xa,
- },
- },
- HuffTest{
- .input = "huffman-shifts.input",
- .want = "huffman-shifts.{s}.expect",
- .want_no_input = "huffman-shifts.{s}.expect-noinput",
- .tokens = &[_]token.Token{
- 0x31, 0x30, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001,
- 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001,
- 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x52400001, 0xd, 0xa, 0x32,
- 0x33, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7fc00001,
- 0x7fc00001, 0x7fc00001, 0x7fc00001, 0x7f400001,
- },
- },
- HuffTest{
- .input = "huffman-text-shift.input",
- .want = "huffman-text-shift.{s}.expect",
- .want_no_input = "huffman-text-shift.{s}.expect-noinput",
- .tokens = &[_]token.Token{
- 0x2f, 0x2f, 0x43, 0x6f, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68,
- 0x74, 0x32, 0x30, 0x30, 0x39, 0x54, 0x68, 0x47, 0x6f, 0x41,
- 0x75, 0x74, 0x68, 0x6f, 0x72, 0x2e, 0x41, 0x6c, 0x6c, 0x40800016,
- 0x72, 0x72, 0x76, 0x64, 0x2e, 0xd, 0xa, 0x2f, 0x2f, 0x55,
- 0x6f, 0x66, 0x74, 0x68, 0x69, 0x6f, 0x75, 0x72, 0x63, 0x63,
- 0x6f, 0x64, 0x69, 0x67, 0x6f, 0x76, 0x72, 0x6e, 0x64, 0x62,
- 0x79, 0x42, 0x53, 0x44, 0x2d, 0x74, 0x79, 0x6c, 0x40400020, 0x6c,
- 0x69, 0x63, 0x6e, 0x74, 0x68, 0x74, 0x63, 0x6e, 0x62, 0x66,
- 0x6f, 0x75, 0x6e, 0x64, 0x69, 0x6e, 0x74, 0x68, 0x4c, 0x49,
- 0x43, 0x45, 0x4e, 0x53, 0x45, 0x66, 0x69, 0x6c, 0x2e, 0xd,
- 0xa, 0xd, 0xa, 0x70, 0x63, 0x6b, 0x67, 0x6d, 0x69, 0x6e,
- 0x4040000a, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x22, 0x6f, 0x22,
- 0x4040000c, 0x66, 0x75, 0x6e, 0x63, 0x6d, 0x69, 0x6e, 0x28, 0x29,
- 0x7b, 0xd, 0xa, 0x9, 0x76, 0x72, 0x62, 0x3d, 0x6d, 0x6b,
- 0x28, 0x5b, 0x5d, 0x62, 0x79, 0x74, 0x2c, 0x36, 0x35, 0x35,
- 0x33, 0x35, 0x29, 0xd, 0xa, 0x9, 0x66, 0x2c, 0x5f, 0x3a,
- 0x3d, 0x6f, 0x2e, 0x43, 0x72, 0x74, 0x28, 0x22, 0x68, 0x75,
- 0x66, 0x66, 0x6d, 0x6e, 0x2d, 0x6e, 0x75, 0x6c, 0x6c, 0x2d,
- 0x6d, 0x78, 0x2e, 0x69, 0x6e, 0x22, 0x40800021, 0x2e, 0x57, 0x72,
- 0x69, 0x74, 0x28, 0x62, 0x29, 0xd, 0xa, 0x7d, 0xd, 0xa,
- 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a,
- 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54,
- 0x55, 0x56, 0x58, 0x78, 0x79, 0x7a, 0x21, 0x22, 0x23, 0xc2,
- 0xa4, 0x25, 0x26, 0x2f, 0x3f, 0x22,
- },
- },
- HuffTest{
- .input = "huffman-text.input",
- .want = "huffman-text.{s}.expect",
- .want_no_input = "huffman-text.{s}.expect-noinput",
- .tokens = &[_]token.Token{
- 0x2f, 0x2f, 0x20, 0x7a, 0x69, 0x67, 0x20, 0x76,
- 0x30, 0x2e, 0x31, 0x30, 0x2e, 0x30, 0x0a, 0x2f,
- 0x2f, 0x20, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65,
- 0x20, 0x61, 0x20, 0x66, 0x69, 0x6c, 0x65, 0x40400004,
- 0x6c, 0x65, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68,
- 0x20, 0x30, 0x78, 0x30, 0x30, 0x0a, 0x63, 0x6f,
- 0x6e, 0x73, 0x74, 0x20, 0x73, 0x74, 0x64, 0x20,
- 0x3d, 0x20, 0x40, 0x69, 0x6d, 0x70, 0x6f, 0x72,
- 0x74, 0x28, 0x22, 0x73, 0x74, 0x64, 0x22, 0x29,
- 0x3b, 0x0a, 0x0a, 0x70, 0x75, 0x62, 0x20, 0x66,
- 0x6e, 0x20, 0x6d, 0x61, 0x69, 0x6e, 0x28, 0x29,
- 0x20, 0x21, 0x76, 0x6f, 0x69, 0x64, 0x20, 0x7b,
- 0x0a, 0x20, 0x20, 0x20, 0x20, 0x76, 0x61, 0x72,
- 0x20, 0x62, 0x20, 0x3d, 0x20, 0x5b, 0x31, 0x5d,
- 0x75, 0x38, 0x7b, 0x30, 0x7d, 0x20, 0x2a, 0x2a,
- 0x20, 0x36, 0x35, 0x35, 0x33, 0x35, 0x3b, 0x4080001e,
- 0x40c00055, 0x66, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x79,
- 0x4040005d, 0x2e, 0x66, 0x73, 0x2e, 0x63, 0x77, 0x64,
- 0x28, 0x29, 0x2e, 0x40c0008f, 0x46, 0x69, 0x6c, 0x65,
- 0x28, 0x4080002a, 0x40400000, 0x22, 0x68, 0x75, 0x66, 0x66,
- 0x6d, 0x61, 0x6e, 0x2d, 0x6e, 0x75, 0x6c, 0x6c,
- 0x2d, 0x6d, 0x61, 0x78, 0x2e, 0x69, 0x6e, 0x22,
- 0x2c, 0x4180001e, 0x2e, 0x7b, 0x20, 0x2e, 0x72, 0x65,
- 0x61, 0x64, 0x4080004e, 0x75, 0x65, 0x20, 0x7d, 0x40c0001a,
- 0x29, 0x40c0006b, 0x64, 0x65, 0x66, 0x65, 0x72, 0x20,
- 0x66, 0x2e, 0x63, 0x6c, 0x6f, 0x73, 0x65, 0x28,
- 0x404000b6, 0x40400015, 0x5f, 0x4100007b, 0x66, 0x2e, 0x77, 0x72,
- 0x69, 0x74, 0x65, 0x41, 0x6c, 0x6c, 0x28, 0x62,
- 0x5b, 0x30, 0x2e, 0x2e, 0x5d, 0x29, 0x3b, 0x0a,
- 0x7d, 0x0a,
- },
- },
- HuffTest{
- .input = "huffman-zero.input",
- .want = "huffman-zero.{s}.expect",
- .want_no_input = "huffman-zero.{s}.expect-noinput",
- .tokens = &[_]token.Token{ 0x30, ml, 0x4b800000 },
- },
- HuffTest{
- .input = "",
- .want = "",
- .want_no_input = "null-long-match.{s}.expect-noinput",
- .tokens = &[_]token.Token{
- 0x0, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
- ml, ml, ml, 0x41400000,
- },
- },
-};
-
-const TestType = enum {
- write_block,
- write_dyn_block, // write dynamic block
- write_huffman_block,
-
- fn to_s(self: TestType) []const u8 {
- return switch (self) {
- .write_block => "wb",
- .write_dyn_block => "dyn",
- .write_huffman_block => "huff",
- };
- }
-};
-
-test "writeBlock" {
- // tests if the writeBlock encoding has changed.
-
- const ttype: TestType = .write_block;
- try testBlock(writeBlockTests[0], ttype);
- try testBlock(writeBlockTests[1], ttype);
- try testBlock(writeBlockTests[2], ttype);
- try testBlock(writeBlockTests[3], ttype);
- try testBlock(writeBlockTests[4], ttype);
- try testBlock(writeBlockTests[5], ttype);
- try testBlock(writeBlockTests[6], ttype);
- try testBlock(writeBlockTests[7], ttype);
- try testBlock(writeBlockTests[8], ttype);
-}
-
-test "writeBlockDynamic" {
- // tests if the writeBlockDynamic encoding has changed.
-
- const ttype: TestType = .write_dyn_block;
- try testBlock(writeBlockTests[0], ttype);
- try testBlock(writeBlockTests[1], ttype);
- try testBlock(writeBlockTests[2], ttype);
- try testBlock(writeBlockTests[3], ttype);
- try testBlock(writeBlockTests[4], ttype);
- try testBlock(writeBlockTests[5], ttype);
- try testBlock(writeBlockTests[6], ttype);
- try testBlock(writeBlockTests[7], ttype);
- try testBlock(writeBlockTests[8], ttype);
-}
-
-// testBlock tests a block against its references,
-// or regenerate the references, if "-update" flag is set.
-fn testBlock(comptime ht: HuffTest, comptime ttype: TestType) !void {
- if (ht.input.len != 0 and ht.want.len != 0) {
- const want_name = comptime fmt.comptimePrint(ht.want, .{ttype.to_s()});
- const input = @embedFile("testdata/" ++ ht.input);
- const want = @embedFile("testdata/" ++ want_name);
-
- var buf = ArrayList(u8).init(testing.allocator);
- var bw = try huffmanBitWriter(testing.allocator, buf.writer());
- try writeToType(ttype, &bw, ht.tokens, input);
-
- var got = buf.items;
- try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
-
- // Test if the writer produces the same output after reset.
- buf.deinit();
- buf = ArrayList(u8).init(testing.allocator);
- defer buf.deinit();
-
- bw.reset(buf.writer());
- defer bw.deinit();
-
- try writeToType(ttype, &bw, ht.tokens, input);
- try bw.flush();
- got = buf.items;
- try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
- try testWriterEOF(.write_block, ht.tokens, input);
- }
-
- const want_name_no_input = comptime fmt.comptimePrint(ht.want_no_input, .{ttype.to_s()});
- const want_ni = @embedFile("testdata/" ++ want_name_no_input);
-
- var buf = ArrayList(u8).init(testing.allocator);
- var bw = try huffmanBitWriter(testing.allocator, buf.writer());
-
- try writeToType(ttype, &bw, ht.tokens, null);
-
- var got = buf.items;
- try testing.expectEqualSlices(u8, want_ni, got); // expect writeBlock to yield expected result
- try expect(got[0] & 1 != 1); // expect no EOF
-
- // Test if the writer produces the same output after reset.
- buf.deinit();
- buf = ArrayList(u8).init(testing.allocator);
- defer buf.deinit();
-
- bw.reset(buf.writer());
- defer bw.deinit();
-
- try writeToType(ttype, &bw, ht.tokens, null);
- try bw.flush();
- got = buf.items;
-
- try testing.expectEqualSlices(u8, want_ni, got); // expect writeBlock to yield expected result
- try testWriterEOF(.write_block, ht.tokens, &[0]u8{});
-}
-
-fn writeToType(ttype: TestType, bw: anytype, tok: []const token.Token, input: ?[]const u8) !void {
- switch (ttype) {
- .write_block => try bw.writeBlock(tok, false, input),
- .write_dyn_block => try bw.writeBlockDynamic(tok, false, input),
- else => unreachable,
- }
- try bw.flush();
-}
-
-// Tests if the written block contains an EOF marker.
-fn testWriterEOF(ttype: TestType, ht_tokens: []const token.Token, input: []const u8) !void {
- var buf = ArrayList(u8).init(testing.allocator);
- defer buf.deinit();
- var bw = try huffmanBitWriter(testing.allocator, buf.writer());
- defer bw.deinit();
-
- switch (ttype) {
- .write_block => try bw.writeBlock(ht_tokens, true, input),
- .write_dyn_block => try bw.writeBlockDynamic(ht_tokens, true, input),
- .write_huffman_block => try bw.writeBlockHuff(true, input),
- }
-
- try bw.flush();
-
- const b = buf.items;
- try expect(b.len > 0);
- try expect(b[0] & 1 == 1);
-}
diff --git a/lib/std/compress/deflate/huffman_code.zig b/lib/std/compress/deflate/huffman_code.zig
deleted file mode 100644
index c484d71fad..0000000000
--- a/lib/std/compress/deflate/huffman_code.zig
+++ /dev/null
@@ -1,432 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-const math = std.math;
-const mem = std.mem;
-const sort = std.sort;
-const testing = std.testing;
-
-const Allocator = std.mem.Allocator;
-
-const bu = @import("bits_utils.zig");
-const deflate_const = @import("deflate_const.zig");
-
-const max_bits_limit = 16;
-
-const LiteralNode = struct {
- literal: u16,
- freq: u16,
-};
-
-// Describes the state of the constructed tree for a given depth.
-const LevelInfo = struct {
- // Our level. for better printing
- level: u32,
-
- // The frequency of the last node at this level
- last_freq: u32,
-
- // The frequency of the next character to add to this level
- next_char_freq: u32,
-
- // The frequency of the next pair (from level below) to add to this level.
- // Only valid if the "needed" value of the next lower level is 0.
- next_pair_freq: u32,
-
- // The number of chains remaining to generate for this level before moving
- // up to the next level
- needed: u32,
-};
-
-// hcode is a huffman code with a bit code and bit length.
-pub const HuffCode = struct {
- code: u16 = 0,
- len: u16 = 0,
-
- // set sets the code and length of an hcode.
- fn set(self: *HuffCode, code: u16, length: u16) void {
- self.len = length;
- self.code = code;
- }
-};
-
-pub const HuffmanEncoder = struct {
- codes: []HuffCode,
- freq_cache: []LiteralNode = undefined,
- bit_count: [17]u32 = undefined,
- lns: []LiteralNode = undefined, // sorted by literal, stored to avoid repeated allocation in generate
- lfs: []LiteralNode = undefined, // sorted by frequency, stored to avoid repeated allocation in generate
- allocator: Allocator,
-
- pub fn deinit(self: *HuffmanEncoder) void {
- self.allocator.free(self.codes);
- self.allocator.free(self.freq_cache);
- }
-
- // Update this Huffman Code object to be the minimum code for the specified frequency count.
- //
- // freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
- // max_bits The maximum number of bits to use for any literal.
- pub fn generate(self: *HuffmanEncoder, freq: []u16, max_bits: u32) void {
- var list = self.freq_cache[0 .. freq.len + 1];
- // Number of non-zero literals
- var count: u32 = 0;
- // Set list to be the set of all non-zero literals and their frequencies
- for (freq, 0..) |f, i| {
- if (f != 0) {
- list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f };
- count += 1;
- } else {
- list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
- self.codes[i].len = 0;
- }
- }
- list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
-
- list = list[0..count];
- if (count <= 2) {
- // Handle the small cases here, because they are awkward for the general case code. With
- // two or fewer literals, everything has bit length 1.
- for (list, 0..) |node, i| {
- // "list" is in order of increasing literal value.
- self.codes[node.literal].set(@as(u16, @intCast(i)), 1);
- }
- return;
- }
- self.lfs = list;
- mem.sort(LiteralNode, self.lfs, {}, byFreq);
-
- // Get the number of literals for each bit count
- const bit_count = self.bitCounts(list, max_bits);
- // And do the assignment
- self.assignEncodingAndSize(bit_count, list);
- }
-
- pub fn bitLength(self: *HuffmanEncoder, freq: []u16) u32 {
- var total: u32 = 0;
- for (freq, 0..) |f, i| {
- if (f != 0) {
- total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len));
- }
- }
- return total;
- }
-
- // Return the number of literals assigned to each bit size in the Huffman encoding
- //
- // This method is only called when list.len >= 3
- // The cases of 0, 1, and 2 literals are handled by special case code.
- //
- // list: An array of the literals with non-zero frequencies
- // and their associated frequencies. The array is in order of increasing
- // frequency, and has as its last element a special element with frequency
- // std.math.maxInt(i32)
- //
- // max_bits: The maximum number of bits that should be used to encode any literal.
- // Must be less than 16.
- //
- // Returns an integer array in which array[i] indicates the number of literals
- // that should be encoded in i bits.
- fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize) []u32 {
- var max_bits = max_bits_to_use;
- const n = list.len;
-
- assert(max_bits < max_bits_limit);
-
- // The tree can't have greater depth than n - 1, no matter what. This
- // saves a little bit of work in some small cases
- max_bits = @min(max_bits, n - 1);
-
- // Create information about each of the levels.
- // A bogus "Level 0" whose sole purpose is so that
- // level1.prev.needed == 0. This makes level1.next_pair_freq
- // be a legitimate value that never gets chosen.
- var levels: [max_bits_limit]LevelInfo = mem.zeroes([max_bits_limit]LevelInfo);
- // leaf_counts[i] counts the number of literals at the left
- // of ancestors of the rightmost node at level i.
- // leaf_counts[i][j] is the number of literals at the left
- // of the level j ancestor.
- var leaf_counts: [max_bits_limit][max_bits_limit]u32 = mem.zeroes([max_bits_limit][max_bits_limit]u32);
-
- {
- var level = @as(u32, 1);
- while (level <= max_bits) : (level += 1) {
- // For every level, the first two items are the first two characters.
- // We initialize the levels as if we had already figured this out.
- levels[level] = LevelInfo{
- .level = level,
- .last_freq = list[1].freq,
- .next_char_freq = list[2].freq,
- .next_pair_freq = list[0].freq + list[1].freq,
- .needed = 0,
- };
- leaf_counts[level][level] = 2;
- if (level == 1) {
- levels[level].next_pair_freq = math.maxInt(i32);
- }
- }
- }
-
- // We need a total of 2*n - 2 items at top level and have already generated 2.
- levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4;
-
- {
- var level = max_bits;
- while (true) {
- var l = &levels[level];
- if (l.next_pair_freq == math.maxInt(i32) and l.next_char_freq == math.maxInt(i32)) {
- // We've run out of both leafs and pairs.
- // End all calculations for this level.
- // To make sure we never come back to this level or any lower level,
- // set next_pair_freq impossibly large.
- l.needed = 0;
- levels[level + 1].next_pair_freq = math.maxInt(i32);
- level += 1;
- continue;
- }
-
- const prev_freq = l.last_freq;
- if (l.next_char_freq < l.next_pair_freq) {
- // The next item on this row is a leaf node.
- const next = leaf_counts[level][level] + 1;
- l.last_freq = l.next_char_freq;
- // Lower leaf_counts are the same of the previous node.
- leaf_counts[level][level] = next;
- if (next >= list.len) {
- l.next_char_freq = maxNode().freq;
- } else {
- l.next_char_freq = list[next].freq;
- }
- } else {
- // The next item on this row is a pair from the previous row.
- // next_pair_freq isn't valid until we generate two
- // more values in the level below
- l.last_freq = l.next_pair_freq;
- // Take leaf counts from the lower level, except counts[level] remains the same.
- @memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
- levels[l.level - 1].needed = 2;
- }
-
- l.needed -= 1;
- if (l.needed == 0) {
- // We've done everything we need to do for this level.
- // Continue calculating one level up. Fill in next_pair_freq
- // of that level with the sum of the two nodes we've just calculated on
- // this level.
- if (l.level == max_bits) {
- // All done!
- break;
- }
- levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
- level += 1;
- } else {
- // If we stole from below, move down temporarily to replenish it.
- while (levels[level - 1].needed > 0) {
- level -= 1;
- if (level == 0) {
- break;
- }
- }
- }
- }
- }
-
- // Somethings is wrong if at the end, the top level is null or hasn't used
- // all of the leaves.
- assert(leaf_counts[max_bits][max_bits] == n);
-
- var bit_count = self.bit_count[0 .. max_bits + 1];
- var bits: u32 = 1;
- const counts = &leaf_counts[max_bits];
- {
- var level = max_bits;
- while (level > 0) : (level -= 1) {
- // counts[level] gives the number of literals requiring at least "bits"
- // bits to encode.
- bit_count[bits] = counts[level] - counts[level - 1];
- bits += 1;
- if (level == 0) {
- break;
- }
- }
- }
- return bit_count;
- }
-
- // Look at the leaves and assign them a bit count and an encoding as specified
- // in RFC 1951 3.2.2
- fn assignEncodingAndSize(self: *HuffmanEncoder, bit_count: []u32, list_arg: []LiteralNode) void {
- var code = @as(u16, 0);
- var list = list_arg;
-
- for (bit_count, 0..) |bits, n| {
- code <<= 1;
- if (n == 0 or bits == 0) {
- continue;
- }
- // The literals list[list.len-bits] .. list[list.len-bits]
- // are encoded using "bits" bits, and get the values
- // code, code + 1, .... The code values are
- // assigned in literal order (not frequency order).
- const chunk = list[list.len - @as(u32, @intCast(bits)) ..];
-
- self.lns = chunk;
- mem.sort(LiteralNode, self.lns, {}, byLiteral);
-
- for (chunk) |node| {
- self.codes[node.literal] = HuffCode{
- .code = bu.bitReverse(u16, code, @as(u5, @intCast(n))),
- .len = @as(u16, @intCast(n)),
- };
- code += 1;
- }
- list = list[0 .. list.len - @as(u32, @intCast(bits))];
- }
- }
-};
-
-fn maxNode() LiteralNode {
- return LiteralNode{
- .literal = math.maxInt(u16),
- .freq = math.maxInt(u16),
- };
-}
-
-pub fn newHuffmanEncoder(allocator: Allocator, size: u32) !HuffmanEncoder {
- return HuffmanEncoder{
- .codes = try allocator.alloc(HuffCode, size),
- // Allocate a reusable buffer with the longest possible frequency table.
- // (deflate_const.max_num_frequencies).
- .freq_cache = try allocator.alloc(LiteralNode, deflate_const.max_num_frequencies + 1),
- .allocator = allocator,
- };
-}
-
-// Generates a HuffmanCode corresponding to the fixed literal table
-pub fn generateFixedLiteralEncoding(allocator: Allocator) !HuffmanEncoder {
- const h = try newHuffmanEncoder(allocator, deflate_const.max_num_frequencies);
- var codes = h.codes;
- var ch: u16 = 0;
-
- while (ch < deflate_const.max_num_frequencies) : (ch += 1) {
- var bits: u16 = undefined;
- var size: u16 = undefined;
- switch (ch) {
- 0...143 => {
- // size 8, 000110000 .. 10111111
- bits = ch + 48;
- size = 8;
- },
- 144...255 => {
- // size 9, 110010000 .. 111111111
- bits = ch + 400 - 144;
- size = 9;
- },
- 256...279 => {
- // size 7, 0000000 .. 0010111
- bits = ch - 256;
- size = 7;
- },
- else => {
- // size 8, 11000000 .. 11000111
- bits = ch + 192 - 280;
- size = 8;
- },
- }
- codes[ch] = HuffCode{ .code = bu.bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size };
- }
- return h;
-}
-
-pub fn generateFixedOffsetEncoding(allocator: Allocator) !HuffmanEncoder {
- const h = try newHuffmanEncoder(allocator, 30);
- var codes = h.codes;
- for (codes, 0..) |_, ch| {
- codes[ch] = HuffCode{ .code = bu.bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 };
- }
- return h;
-}
-
-fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
- _ = context;
- return a.literal < b.literal;
-}
-
-fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
- _ = context;
- if (a.freq == b.freq) {
- return a.literal < b.literal;
- }
- return a.freq < b.freq;
-}
-
-test "generate a Huffman code from an array of frequencies" {
- var freqs: [19]u16 = [_]u16{
- 8, // 0
- 1, // 1
- 1, // 2
- 2, // 3
- 5, // 4
- 10, // 5
- 9, // 6
- 1, // 7
- 0, // 8
- 0, // 9
- 0, // 10
- 0, // 11
- 0, // 12
- 0, // 13
- 0, // 14
- 0, // 15
- 1, // 16
- 3, // 17
- 5, // 18
- };
-
- var enc = try newHuffmanEncoder(testing.allocator, freqs.len);
- defer enc.deinit();
- enc.generate(freqs[0..], 7);
-
- try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
-
- try testing.expectEqual(@as(usize, 3), enc.codes[0].len);
- try testing.expectEqual(@as(usize, 6), enc.codes[1].len);
- try testing.expectEqual(@as(usize, 6), enc.codes[2].len);
- try testing.expectEqual(@as(usize, 5), enc.codes[3].len);
- try testing.expectEqual(@as(usize, 3), enc.codes[4].len);
- try testing.expectEqual(@as(usize, 2), enc.codes[5].len);
- try testing.expectEqual(@as(usize, 2), enc.codes[6].len);
- try testing.expectEqual(@as(usize, 6), enc.codes[7].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[8].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[9].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[10].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[11].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[12].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[13].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[14].len);
- try testing.expectEqual(@as(usize, 0), enc.codes[15].len);
- try testing.expectEqual(@as(usize, 6), enc.codes[16].len);
- try testing.expectEqual(@as(usize, 5), enc.codes[17].len);
- try testing.expectEqual(@as(usize, 3), enc.codes[18].len);
-
- try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code);
- try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code);
- try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code);
- try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code);
- try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code);
- try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code);
- try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code);
- try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code);
- try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code);
- try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
- try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
-}
-
-test "generate a Huffman code for the fixed literal table specific to Deflate" {
- var enc = try generateFixedLiteralEncoding(testing.allocator);
- defer enc.deinit();
-}
-
-test "generate a Huffman code for the 30 possible relative offsets (LZ77 distances) of Deflate" {
- var enc = try generateFixedOffsetEncoding(testing.allocator);
- defer enc.deinit();
-}
diff --git a/lib/std/compress/deflate/testdata/compress-e.txt b/lib/std/compress/deflate/testdata/compress-e.txt
deleted file mode 100644
index 5ca186f14c..0000000000
--- a/lib/std/compress/deflate/testdata/compress-e.txt
+++ /dev/null
@@ -1 +0,0 @@
-2.
diff --git a/lib/std/compress/deflate/testdata/compress-gettysburg.txt b/lib/std/compress/deflate/testdata/compress-gettysburg.txt
deleted file mode 100644
index 2c9bcde360..0000000000
--- a/lib/std/compress/deflate/testdata/compress-gettysburg.txt
+++ /dev/null
@@ -1,29 +0,0 @@
- Four score and seven years ago our fathers brought forth on
-this continent, a new nation, conceived in Liberty, and dedicated
-to the proposition that all men are created equal.
- Now we are engaged in a great Civil War, testing whether that
-nation, or any nation so conceived and so dedicated, can long
-endure.
- We are met on a great battle-field of that war.
- We have come to dedicate a portion of that field, as a final
-resting place for those who here gave their lives that that
-nation might live. It is altogether fitting and proper that
-we should do this.
- But, in a larger sense, we can not dedicate - we can not
-consecrate - we can not hallow - this ground.
- The brave men, living and dead, who struggled here, have
-consecrated it, far above our poor power to add or detract.
-The world will little note, nor long remember what we say here,
-but it can never forget what they did here.
- It is for us the living, rather, to be dedicated here to the
-unfinished work which they who fought here have thus far so
-nobly advanced. It is rather for us to be here dedicated to
-the great task remaining before us - that from these honored
-dead we take increased devotion to that cause for which they
-gave the last full measure of devotion -
- that we here highly resolve that these dead shall not have
-died in vain - that this nation, under God, shall have a new
-birth of freedom - and that government of the people, by the
-people, for the people, shall not perish from this earth.
-
-Abraham Lincoln, November 19, 1863, Gettysburg, Pennsylvania
diff --git a/lib/std/compress/deflate/testdata/compress-pi.txt b/lib/std/compress/deflate/testdata/compress-pi.txt
deleted file mode 100644
index ca99bbc2a2..0000000000
--- a/lib/std/compress/deflate/testdata/compress-pi.txt
+++ /dev/null
@@ -1 +0,0 @@
-3.
diff --git a/lib/std/compress/deflate/token.zig b/lib/std/compress/deflate/token.zig
deleted file mode 100644
index 744fcdeb12..0000000000
--- a/lib/std/compress/deflate/token.zig
+++ /dev/null
@@ -1,103 +0,0 @@
-// 2 bits: type, can be 0 (literal), 1 (EOF), 2 (Match) or 3 (Unused).
-// 8 bits: xlength (length - MIN_MATCH_LENGTH).
-// 22 bits: xoffset (offset - MIN_OFFSET_SIZE), or literal.
-const length_shift = 22;
-const offset_mask = (1 << length_shift) - 1; // 4_194_303
-const literal_type = 0 << 30; // 0
-pub const match_type = 1 << 30; // 1_073_741_824
-
-// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
-// is length_codes[length - MIN_MATCH_LENGTH]
-var length_codes = [_]u32{
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
- 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
- 13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
- 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
- 17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
- 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
- 19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
- 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 27, 27, 27, 28,
-};
-
-var offset_codes = [_]u32{
- 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
- 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
- 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
- 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
-};
-
-pub const Token = u32;
-
-// Convert a literal into a literal token.
-pub fn literalToken(lit: u32) Token {
- return literal_type + lit;
-}
-
-// Convert a < xlength, xoffset > pair into a match token.
-pub fn matchToken(xlength: u32, xoffset: u32) Token {
- return match_type + (xlength << length_shift) + xoffset;
-}
-
-// Returns the literal of a literal token
-pub fn literal(t: Token) u32 {
- return @as(u32, @intCast(t - literal_type));
-}
-
-// Returns the extra offset of a match token
-pub fn offset(t: Token) u32 {
- return @as(u32, @intCast(t)) & offset_mask;
-}
-
-pub fn length(t: Token) u32 {
- return @as(u32, @intCast((t - match_type) >> length_shift));
-}
-
-pub fn lengthCode(len: u32) u32 {
- return length_codes[len];
-}
-
-// Returns the offset code corresponding to a specific offset
-pub fn offsetCode(off: u32) u32 {
- if (off < @as(u32, @intCast(offset_codes.len))) {
- return offset_codes[off];
- }
- if (off >> 7 < @as(u32, @intCast(offset_codes.len))) {
- return offset_codes[off >> 7] + 14;
- }
- return offset_codes[off >> 14] + 28;
-}
-
-test {
- const std = @import("std");
- try std.testing.expectEqual(@as(Token, 3_401_581_099), matchToken(555, 555));
-}
diff --git a/lib/std/compress/flate.zig b/lib/std/compress/flate.zig
new file mode 100644
index 0000000000..5338328a21
--- /dev/null
+++ b/lib/std/compress/flate.zig
@@ -0,0 +1,481 @@
+/// Deflate is a lossless data compression file format that uses a combination
+/// of LZ77 and Huffman coding.
+pub const deflate = @import("flate/deflate.zig");
+
+/// Inflate is the decoding process that takes a Deflate bitstream for
+/// decompression and correctly produces the original full-size data or file.
+pub const inflate = @import("flate/inflate.zig");
+
+/// Decompress compressed data from reader and write plain data to the writer.
+pub fn decompress(reader: anytype, writer: anytype) !void {
+ try inflate.decompress(.raw, reader, writer);
+}
+
+/// Decompressor type
+pub fn Decompressor(comptime ReaderType: type) type {
+ return inflate.Inflate(.raw, ReaderType);
+}
+
+/// Create Decompressor which will read compressed data from reader.
+pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) {
+ return inflate.decompressor(.raw, reader);
+}
+
+/// Compression level, trades between speed and compression size.
+pub const Options = deflate.Options;
+
+/// Compress plain data from reader and write compressed data to the writer.
+pub fn compress(reader: anytype, writer: anytype, options: Options) !void {
+ try deflate.compress(.raw, reader, writer, options);
+}
+
+/// Compressor type
+pub fn Compressor(comptime WriterType: type) type {
+ return deflate.Compressor(.raw, WriterType);
+}
+
+/// Create Compressor which outputs compressed data to the writer.
+pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) {
+ return try deflate.compressor(.raw, writer, options);
+}
+
+/// Huffman only compression. Without Lempel-Ziv match searching. Faster
+/// compression, less memory requirements but bigger compressed sizes.
+pub const huffman = struct {
+ pub fn compress(reader: anytype, writer: anytype) !void {
+ try deflate.huffman.compress(.raw, reader, writer);
+ }
+
+ pub fn Compressor(comptime WriterType: type) type {
+ return deflate.huffman.Compressor(.raw, WriterType);
+ }
+
+ pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) {
+ return deflate.huffman.compressor(.raw, writer);
+ }
+};
+
+// No compression store only. Compressed size is slightly bigger than plain.
+pub const store = struct {
+ pub fn compress(reader: anytype, writer: anytype) !void {
+ try deflate.store.compress(.raw, reader, writer);
+ }
+
+ pub fn Compressor(comptime WriterType: type) type {
+ return deflate.store.Compressor(.raw, WriterType);
+ }
+
+ pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) {
+ return deflate.store.compressor(.raw, writer);
+ }
+};
+
+/// Container defines header/footer arround deflate bit stream. Gzip and zlib
+/// compression algorithms are containers arround deflate bit stream body.
+const Container = @import("flate/container.zig").Container;
+const std = @import("std");
+const testing = std.testing;
+const fixedBufferStream = std.io.fixedBufferStream;
+const print = std.debug.print;
+const builtin = @import("builtin");
+
+test {
+ _ = deflate;
+ _ = inflate;
+}
+
+test "flate compress/decompress" {
+ if (builtin.target.cpu.arch == .wasm32) return error.SkipZigTest;
+
+ var cmp_buf: [64 * 1024]u8 = undefined; // compressed data buffer
+ var dcm_buf: [64 * 1024]u8 = undefined; // decompressed data buffer
+
+ const levels = [_]deflate.Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
+ const cases = [_]struct {
+ data: []const u8, // uncompressed content
+ // compressed data sizes per level 4-9
+ gzip_sizes: [levels.len]usize = [_]usize{0} ** levels.len,
+ huffman_only_size: usize = 0,
+ store_size: usize = 0,
+ }{
+ .{
+ .data = @embedFile("flate/testdata/rfc1951.txt"),
+ .gzip_sizes = [_]usize{ 11513, 11217, 11139, 11126, 11122, 11119 },
+ .huffman_only_size = 20287,
+ .store_size = 36967,
+ },
+ .{
+ .data = @embedFile("flate/testdata/fuzz/roundtrip1.input"),
+ .gzip_sizes = [_]usize{ 373, 370, 370, 370, 370, 370 },
+ .huffman_only_size = 393,
+ .store_size = 393,
+ },
+ .{
+ .data = @embedFile("flate/testdata/fuzz/roundtrip2.input"),
+ .gzip_sizes = [_]usize{ 373, 373, 373, 373, 373, 373 },
+ .huffman_only_size = 394,
+ .store_size = 394,
+ },
+ .{
+ .data = @embedFile("flate/testdata/fuzz/deflate-stream.expect"),
+ .gzip_sizes = [_]usize{ 351, 347, 347, 347, 347, 347 },
+ .huffman_only_size = 498,
+ .store_size = 747,
+ },
+ };
+
+ for (cases, 0..) |case, case_no| { // for each case
+ const data = case.data;
+
+ for (levels, 0..) |level, i| { // for each compression level
+
+ inline for (Container.list) |container| { // for each wrapping
+ var compressed_size: usize = if (case.gzip_sizes[i] > 0)
+ case.gzip_sizes[i] - Container.gzip.size() + container.size()
+ else
+ 0;
+
+ // compress original stream to compressed stream
+ {
+ var original = fixedBufferStream(data);
+ var compressed = fixedBufferStream(&cmp_buf);
+ try deflate.compress(container, original.reader(), compressed.writer(), .{ .level = level });
+ if (compressed_size == 0) {
+ if (container == .gzip)
+ print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos });
+ compressed_size = compressed.pos;
+ }
+ try testing.expectEqual(compressed_size, compressed.pos);
+ }
+ // decompress compressed stream to decompressed stream
+ {
+ var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
+ var decompressed = fixedBufferStream(&dcm_buf);
+ try inflate.decompress(container, compressed.reader(), decompressed.writer());
+ try testing.expectEqualSlices(u8, data, decompressed.getWritten());
+ }
+
+ // compressor writer interface
+ {
+ var compressed = fixedBufferStream(&cmp_buf);
+ var cmp = try deflate.compressor(container, compressed.writer(), .{ .level = level });
+ var cmp_wrt = cmp.writer();
+ try cmp_wrt.writeAll(data);
+ try cmp.finish();
+
+ try testing.expectEqual(compressed_size, compressed.pos);
+ }
+ // decompressor reader interface
+ {
+ var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
+ var dcm = inflate.decompressor(container, compressed.reader());
+ var dcm_rdr = dcm.reader();
+ const n = try dcm_rdr.readAll(&dcm_buf);
+ try testing.expectEqual(data.len, n);
+ try testing.expectEqualSlices(u8, data, dcm_buf[0..n]);
+ }
+ }
+ }
+ // huffman only compression
+ {
+ inline for (Container.list) |container| { // for each wrapping
+ var compressed_size: usize = if (case.huffman_only_size > 0)
+ case.huffman_only_size - Container.gzip.size() + container.size()
+ else
+ 0;
+
+ // compress original stream to compressed stream
+ {
+ var original = fixedBufferStream(data);
+ var compressed = fixedBufferStream(&cmp_buf);
+ var cmp = try deflate.huffman.compressor(container, compressed.writer());
+ try cmp.compress(original.reader());
+ try cmp.finish();
+ if (compressed_size == 0) {
+ if (container == .gzip)
+ print("case {d} huffman only compressed size: {d}\n", .{ case_no, compressed.pos });
+ compressed_size = compressed.pos;
+ }
+ try testing.expectEqual(compressed_size, compressed.pos);
+ }
+ // decompress compressed stream to decompressed stream
+ {
+ var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
+ var decompressed = fixedBufferStream(&dcm_buf);
+ try inflate.decompress(container, compressed.reader(), decompressed.writer());
+ try testing.expectEqualSlices(u8, data, decompressed.getWritten());
+ }
+ }
+ }
+
+ // store only
+ {
+ inline for (Container.list) |container| { // for each wrapping
+ var compressed_size: usize = if (case.store_size > 0)
+ case.store_size - Container.gzip.size() + container.size()
+ else
+ 0;
+
+ // compress original stream to compressed stream
+ {
+ var original = fixedBufferStream(data);
+ var compressed = fixedBufferStream(&cmp_buf);
+ var cmp = try deflate.store.compressor(container, compressed.writer());
+ try cmp.compress(original.reader());
+ try cmp.finish();
+ if (compressed_size == 0) {
+ if (container == .gzip)
+ print("case {d} store only compressed size: {d}\n", .{ case_no, compressed.pos });
+ compressed_size = compressed.pos;
+ }
+
+ try testing.expectEqual(compressed_size, compressed.pos);
+ }
+ // decompress compressed stream to decompressed stream
+ {
+ var compressed = fixedBufferStream(cmp_buf[0..compressed_size]);
+ var decompressed = fixedBufferStream(&dcm_buf);
+ try inflate.decompress(container, compressed.reader(), decompressed.writer());
+ try testing.expectEqualSlices(u8, data, decompressed.getWritten());
+ }
+ }
+ }
+ }
+}
+
+fn testDecompress(comptime container: Container, compressed: []const u8, expected_plain: []const u8) !void {
+ var in = fixedBufferStream(compressed);
+ var out = std.ArrayList(u8).init(testing.allocator);
+ defer out.deinit();
+
+ try inflate.decompress(container, in.reader(), out.writer());
+ try testing.expectEqualSlices(u8, expected_plain, out.items);
+}
+
+test "flate don't read past deflate stream's end" {
+ try testDecompress(.zlib, &[_]u8{
+ 0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
+ 0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
+ 0x83, 0x95, 0x0b, 0xf5,
+ }, &[_]u8{
+ 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
+ 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
+ 0x00, 0x00, 0xff, 0xff, 0xff,
+ });
+}
+
+test "flate zlib header" {
+ // Truncated header
+ try testing.expectError(
+ error.EndOfStream,
+ testDecompress(.zlib, &[_]u8{0x78}, ""),
+ );
+ // Wrong CM
+ try testing.expectError(
+ error.BadZlibHeader,
+ testDecompress(.zlib, &[_]u8{ 0x79, 0x94 }, ""),
+ );
+ // Wrong CINFO
+ try testing.expectError(
+ error.BadZlibHeader,
+ testDecompress(.zlib, &[_]u8{ 0x88, 0x98 }, ""),
+ );
+ // Wrong checksum
+ try testing.expectError(
+ error.WrongZlibChecksum,
+ testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
+ );
+ // Truncated checksum
+ try testing.expectError(
+ error.EndOfStream,
+ testDecompress(.zlib, &[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
+ );
+}
+
+test "flate gzip header" {
+ // Truncated header
+ try testing.expectError(
+ error.EndOfStream,
+ testDecompress(.gzip, &[_]u8{ 0x1f, 0x8B }, undefined),
+ );
+ // Wrong CM
+ try testing.expectError(
+ error.BadGzipHeader,
+ testDecompress(.gzip, &[_]u8{
+ 0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x03,
+ }, undefined),
+ );
+
+ // Wrong checksum
+ try testing.expectError(
+ error.WrongGzipChecksum,
+ testDecompress(.gzip, &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x00, 0x00, 0x00,
+ }, undefined),
+ );
+ // Truncated checksum
+ try testing.expectError(
+ error.EndOfStream,
+ testDecompress(.gzip, &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
+ }, undefined),
+ );
+ // Wrong initial size
+ try testing.expectError(
+ error.WrongGzipSize,
+ testDecompress(.gzip, &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01,
+ }, undefined),
+ );
+ // Truncated initial size field
+ try testing.expectError(
+ error.EndOfStream,
+ testDecompress(.gzip, &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00,
+ }, undefined),
+ );
+
+ try testDecompress(.gzip, &[_]u8{
+ // GZIP header
+ 0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00,
+ // header.FHCRC (should cover entire header)
+ 0x99, 0xd6,
+ // GZIP data
+ 0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ }, "");
+}
+
+test "flate public interface" {
+ if (builtin.target.cpu.arch == .wasm32) return error.SkipZigTest;
+
+ const plain_data = [_]u8{ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a };
+
+ // deflate final stored block, header + plain (stored) data
+ const deflate_block = [_]u8{
+ 0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
+ } ++ plain_data;
+
+ // gzip header/footer + deflate block
+ const gzip_data =
+ [_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 } ++ // gzip header (10 bytes)
+ deflate_block ++
+ [_]u8{ 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00 }; // gzip footer checksum (4 byte), size (4 bytes)
+
+ // zlib header/footer + deflate block
+ const zlib_data = [_]u8{ 0x78, 0b10_0_11100 } ++ // zlib header (2 bytes)}
+ deflate_block ++
+ [_]u8{ 0x1c, 0xf2, 0x04, 0x47 }; // zlib footer: checksum
+
+ const gzip = @import("gzip.zig");
+ const zlib = @import("zlib.zig");
+ const flate = @This();
+
+ try testInterface(gzip, &gzip_data, &plain_data);
+ try testInterface(zlib, &zlib_data, &plain_data);
+ try testInterface(flate, &deflate_block, &plain_data);
+}
+
+fn testInterface(comptime pkg: type, gzip_data: []const u8, plain_data: []const u8) !void {
+ var buffer1: [64]u8 = undefined;
+ var buffer2: [64]u8 = undefined;
+
+ var compressed = fixedBufferStream(&buffer1);
+ var plain = fixedBufferStream(&buffer2);
+
+ // decompress
+ {
+ var in = fixedBufferStream(gzip_data);
+ try pkg.decompress(in.reader(), plain.writer());
+ try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
+ }
+ plain.reset();
+ compressed.reset();
+
+ // compress/decompress
+ {
+ var in = fixedBufferStream(plain_data);
+ try pkg.compress(in.reader(), compressed.writer(), .{});
+ compressed.reset();
+ try pkg.decompress(compressed.reader(), plain.writer());
+ try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
+ }
+ plain.reset();
+ compressed.reset();
+
+ // compressor/decompressor
+ {
+ var in = fixedBufferStream(plain_data);
+ var cmp = try pkg.compressor(compressed.writer(), .{});
+ try cmp.compress(in.reader());
+ try cmp.finish();
+
+ compressed.reset();
+ var dcp = pkg.decompressor(compressed.reader());
+ try dcp.decompress(plain.writer());
+ try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
+ }
+ plain.reset();
+ compressed.reset();
+
+ // huffman
+ {
+ // huffman compress/decompress
+ {
+ var in = fixedBufferStream(plain_data);
+ try pkg.huffman.compress(in.reader(), compressed.writer());
+ compressed.reset();
+ try pkg.decompress(compressed.reader(), plain.writer());
+ try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
+ }
+ plain.reset();
+ compressed.reset();
+
+ // huffman compressor/decompressor
+ {
+ var in = fixedBufferStream(plain_data);
+ var cmp = try pkg.huffman.compressor(compressed.writer());
+ try cmp.compress(in.reader());
+ try cmp.finish();
+
+ compressed.reset();
+ try pkg.decompress(compressed.reader(), plain.writer());
+ try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
+ }
+ }
+ plain.reset();
+ compressed.reset();
+
+ // store
+ {
+ // store compress/decompress
+ {
+ var in = fixedBufferStream(plain_data);
+ try pkg.store.compress(in.reader(), compressed.writer());
+ compressed.reset();
+ try pkg.decompress(compressed.reader(), plain.writer());
+ try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
+ }
+ plain.reset();
+ compressed.reset();
+
+ // store compressor/decompressor
+ {
+ var in = fixedBufferStream(plain_data);
+ var cmp = try pkg.store.compressor(compressed.writer());
+ try cmp.compress(in.reader());
+ try cmp.finish();
+
+ compressed.reset();
+ try pkg.decompress(compressed.reader(), plain.writer());
+ try testing.expectEqualSlices(u8, plain_data, plain.getWritten());
+ }
+ }
+}
diff --git a/lib/std/compress/flate/CircularBuffer.zig b/lib/std/compress/flate/CircularBuffer.zig
new file mode 100644
index 0000000000..f25a231ee4
--- /dev/null
+++ b/lib/std/compress/flate/CircularBuffer.zig
@@ -0,0 +1,234 @@
+//! 64K buffer of uncompressed data created in inflate (decompression). Has enough
+//! history to support writing match<length, distance>; copying length of bytes
+//! from the position distance backward from current.
+//!
+//! Reads can return less than available bytes if they are spread across
+//! different circles. So reads should repeat until get required number of bytes
+//! or until returned slice is zero length.
+//!
+//! Note on deflate limits:
+//! * non-compressible block is limited to 65,535 bytes.
+//! * backward pointer is limited in distance to 32K bytes and in length to 258 bytes.
+//!
+//! Whole non-compressed block can be written without overlap. We always have
+//! history of up to 64K, more then 32K needed.
+//!
+const std = @import("std");
+const assert = std.debug.assert;
+const testing = std.testing;
+
+const consts = @import("consts.zig").match;
+
+const mask = 0xffff; // 64K - 1
+const buffer_len = mask + 1; // 64K buffer
+
+const Self = @This();
+
+buffer: [buffer_len]u8 = undefined,
+wp: usize = 0, // write position
+rp: usize = 0, // read position
+
+fn writeAll(self: *Self, buf: []const u8) void {
+ for (buf) |c| self.write(c);
+}
+
+/// Write literal.
+pub fn write(self: *Self, b: u8) void {
+ assert(self.wp - self.rp < mask);
+ self.buffer[self.wp & mask] = b;
+ self.wp += 1;
+}
+
+/// Write match (back-reference to the same data slice) starting at `distance`
+/// back from current write position, and `length` of bytes.
+pub fn writeMatch(self: *Self, length: u16, distance: u16) !void {
+ if (self.wp < distance or
+ length < consts.base_length or length > consts.max_length or
+ distance < consts.min_distance or distance > consts.max_distance)
+ {
+ return error.InvalidMatch;
+ }
+ assert(self.wp - self.rp < mask);
+
+ var from: usize = self.wp - distance;
+ const from_end: usize = from + length;
+ var to: usize = self.wp;
+ const to_end: usize = to + length;
+
+ self.wp += length;
+
+ // Fast path using memcpy
+ if (length <= distance and // no overlapping buffers
+ (from >> 16 == from_end >> 16) and // start and and at the same circle
+ (to >> 16 == to_end >> 16))
+ {
+ @memcpy(self.buffer[to & mask .. to_end & mask], self.buffer[from & mask .. from_end & mask]);
+ return;
+ }
+
+ // Slow byte by byte
+ while (to < to_end) {
+ self.buffer[to & mask] = self.buffer[from & mask];
+ to += 1;
+ from += 1;
+ }
+}
+
+/// Returns writable part of the internal buffer of size `n` at most. Advances
+/// write pointer, assumes that returned buffer will be filled with data.
+pub fn getWritable(self: *Self, n: usize) []u8 {
+ const wp = self.wp & mask;
+ const len = @min(n, buffer_len - wp);
+ self.wp += len;
+ return self.buffer[wp .. wp + len];
+}
+
+/// Read available data. Can return part of the available data if it is
+/// spread across two circles. So read until this returns zero length.
+pub fn read(self: *Self) []const u8 {
+ return self.readAtMost(buffer_len);
+}
+
+/// Read part of available data. Can return less than max even if there are
+/// more than max decoded data.
+pub fn readAtMost(self: *Self, limit: usize) []const u8 {
+ const rb = self.readBlock(if (limit == 0) buffer_len else limit);
+ defer self.rp += rb.len;
+ return self.buffer[rb.head..rb.tail];
+}
+
+const ReadBlock = struct {
+ head: usize,
+ tail: usize,
+ len: usize,
+};
+
+/// Returns position of continous read block data.
+fn readBlock(self: *Self, max: usize) ReadBlock {
+ const r = self.rp & mask;
+ const w = self.wp & mask;
+ const n = @min(
+ max,
+ if (w >= r) w - r else buffer_len - r,
+ );
+ return .{
+ .head = r,
+ .tail = r + n,
+ .len = n,
+ };
+}
+
+/// Number of free bytes for write.
+pub fn free(self: *Self) usize {
+ return buffer_len - (self.wp - self.rp);
+}
+
+/// Full if largest match can't fit. 258 is largest match length. That much
+/// bytes can be produced in single decode step.
+pub fn full(self: *Self) bool {
+ return self.free() < 258 + 1;
+}
+
+// example from: https://youtu.be/SJPvNi4HrWQ?t=3558
+test "flate.CircularBuffer writeMatch" {
+ var cb: Self = .{};
+
+ cb.writeAll("a salad; ");
+ try cb.writeMatch(5, 9);
+ try cb.writeMatch(3, 3);
+
+ try testing.expectEqualStrings("a salad; a salsal", cb.read());
+}
+
+test "flate.CircularBuffer writeMatch overlap" {
+ var cb: Self = .{};
+
+ cb.writeAll("a b c ");
+ try cb.writeMatch(8, 4);
+ cb.write('d');
+
+ try testing.expectEqualStrings("a b c b c b c d", cb.read());
+}
+
+test "flate.CircularBuffer readAtMost" {
+ var cb: Self = .{};
+
+ cb.writeAll("0123456789");
+ try cb.writeMatch(50, 10);
+
+ try testing.expectEqualStrings("0123456789" ** 6, cb.buffer[cb.rp..cb.wp]);
+ for (0..6) |i| {
+ try testing.expectEqual(i * 10, cb.rp);
+ try testing.expectEqualStrings("0123456789", cb.readAtMost(10));
+ }
+ try testing.expectEqualStrings("", cb.readAtMost(10));
+ try testing.expectEqualStrings("", cb.read());
+}
+
+test "flate.CircularBuffer" {
+ var cb: Self = .{};
+
+ const data = "0123456789abcdef" ** (1024 / 16);
+ cb.writeAll(data);
+ try testing.expectEqual(@as(usize, 0), cb.rp);
+ try testing.expectEqual(@as(usize, 1024), cb.wp);
+ try testing.expectEqual(@as(usize, 1024 * 63), cb.free());
+
+ for (0..62 * 4) |_|
+ try cb.writeMatch(256, 1024); // write 62K
+
+ try testing.expectEqual(@as(usize, 0), cb.rp);
+ try testing.expectEqual(@as(usize, 63 * 1024), cb.wp);
+ try testing.expectEqual(@as(usize, 1024), cb.free());
+
+ cb.writeAll(data[0..200]);
+ _ = cb.readAtMost(1024); // make some space
+ cb.writeAll(data); // overflows write position
+ try testing.expectEqual(@as(usize, 200 + 65536), cb.wp);
+ try testing.expectEqual(@as(usize, 1024), cb.rp);
+ try testing.expectEqual(@as(usize, 1024 - 200), cb.free());
+
+ const rb = cb.readBlock(Self.buffer_len);
+ try testing.expectEqual(@as(usize, 65536 - 1024), rb.len);
+ try testing.expectEqual(@as(usize, 1024), rb.head);
+ try testing.expectEqual(@as(usize, 65536), rb.tail);
+
+ try testing.expectEqual(@as(usize, 65536 - 1024), cb.read().len); // read to the end of the buffer
+ try testing.expectEqual(@as(usize, 200 + 65536), cb.wp);
+ try testing.expectEqual(@as(usize, 65536), cb.rp);
+ try testing.expectEqual(@as(usize, 65536 - 200), cb.free());
+
+ try testing.expectEqual(@as(usize, 200), cb.read().len); // read the rest
+}
+
+test "flate.CircularBuffer write overlap" {
+ var cb: Self = .{};
+ cb.wp = cb.buffer.len - 15;
+ cb.rp = cb.wp;
+
+ cb.writeAll("0123456789");
+ cb.writeAll("abcdefghij");
+
+ try testing.expectEqual(cb.buffer.len + 5, cb.wp);
+ try testing.expectEqual(cb.buffer.len - 15, cb.rp);
+
+ try testing.expectEqualStrings("0123456789abcde", cb.read());
+ try testing.expectEqualStrings("fghij", cb.read());
+
+ try testing.expect(cb.wp == cb.rp);
+}
+
+test "flate.CircularBuffer writeMatch/read overlap" {
+ var cb: Self = .{};
+ cb.wp = cb.buffer.len - 15;
+ cb.rp = cb.wp;
+
+ cb.writeAll("0123456789");
+ try cb.writeMatch(15, 5);
+
+ try testing.expectEqualStrings("012345678956789", cb.read());
+ try testing.expectEqualStrings("5678956789", cb.read());
+
+ try cb.writeMatch(20, 25);
+ try testing.expectEqualStrings("01234567895678956789", cb.read());
+}
diff --git a/lib/std/compress/flate/Lookup.zig b/lib/std/compress/flate/Lookup.zig
new file mode 100644
index 0000000000..b5d1fd3c97
--- /dev/null
+++ b/lib/std/compress/flate/Lookup.zig
@@ -0,0 +1,125 @@
+/// Lookup of the previous locations for the same 4 byte data. Works on hash of
+/// 4 bytes data. Head contains position of the first match for each hash. Chain
+/// points to the previous position of the same hash given the current location.
+///
+const std = @import("std");
+const testing = std.testing;
+const expect = testing.expect;
+const consts = @import("consts.zig");
+
+const Self = @This();
+
+const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761
+const chain_len = 2 * consts.history.len;
+
+// Maps hash => first position
+head: [consts.lookup.len]u16 = [_]u16{0} ** consts.lookup.len,
+// Maps position => previous positions for the same hash value
+chain: [chain_len]u16 = [_]u16{0} ** (chain_len),
+
+// Calculates hash of the 4 bytes from data.
+// Inserts `pos` position of that hash in the lookup tables.
+// Returns previous location with the same hash value.
+pub fn add(self: *Self, data: []const u8, pos: u16) u16 {
+ if (data.len < 4) return 0;
+ const h = hash(data[0..4]);
+ return self.set(h, pos);
+}
+
+// Retruns previous location with the same hash value given the current
+// position.
+pub fn prev(self: *Self, pos: u16) u16 {
+ return self.chain[pos];
+}
+
+fn set(self: *Self, h: u32, pos: u16) u16 {
+ const p = self.head[h];
+ self.head[h] = pos;
+ self.chain[pos] = p;
+ return p;
+}
+
+// Slide all positions in head and chain for `n`
+pub fn slide(self: *Self, n: u16) void {
+ for (&self.head) |*v| {
+ v.* -|= n;
+ }
+ var i: usize = 0;
+ while (i < n) : (i += 1) {
+ self.chain[i] = self.chain[i + n] -| n;
+ }
+}
+
+// Add `len` 4 bytes hashes from `data` into lookup.
+// Position of the first byte is `pos`.
+pub fn bulkAdd(self: *Self, data: []const u8, len: u16, pos: u16) void {
+ if (len == 0 or data.len < consts.match.min_length) {
+ return;
+ }
+ var hb =
+ @as(u32, data[3]) |
+ @as(u32, data[2]) << 8 |
+ @as(u32, data[1]) << 16 |
+ @as(u32, data[0]) << 24;
+ _ = self.set(hashu(hb), pos);
+
+ var i = pos;
+ for (4..@min(len + 3, data.len)) |j| {
+ hb = (hb << 8) | @as(u32, data[j]);
+ i += 1;
+ _ = self.set(hashu(hb), i);
+ }
+}
+
+// Calculates hash of the first 4 bytes of `b`.
+fn hash(b: *const [4]u8) u32 {
+ return hashu(@as(u32, b[3]) |
+ @as(u32, b[2]) << 8 |
+ @as(u32, b[1]) << 16 |
+ @as(u32, b[0]) << 24);
+}
+
+fn hashu(v: u32) u32 {
+ return @intCast((v *% prime4) >> consts.lookup.shift);
+}
+
+test "flate.Lookup add/prev" {
+ const data = [_]u8{
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03,
+ };
+
+ var h: Self = .{};
+ for (data, 0..) |_, i| {
+ const p = h.add(data[i..], @intCast(i));
+ if (i >= 8 and i < 24) {
+ try expect(p == i - 8);
+ } else {
+ try expect(p == 0);
+ }
+ }
+
+ const v = Self.hash(data[2 .. 2 + 4]);
+ try expect(h.head[v] == 2 + 16);
+ try expect(h.chain[2 + 16] == 2 + 8);
+ try expect(h.chain[2 + 8] == 2);
+}
+
+test "flate.Lookup bulkAdd" {
+ const data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
+
+ // one by one
+ var h: Self = .{};
+ for (data, 0..) |_, i| {
+ _ = h.add(data[i..], @intCast(i));
+ }
+
+ // in bulk
+ var bh: Self = .{};
+ bh.bulkAdd(data, data.len, 0);
+
+ try testing.expectEqualSlices(u16, &h.head, &bh.head);
+ try testing.expectEqualSlices(u16, &h.chain, &bh.chain);
+}
diff --git a/lib/std/compress/flate/SlidingWindow.zig b/lib/std/compress/flate/SlidingWindow.zig
new file mode 100644
index 0000000000..2e935fa52b
--- /dev/null
+++ b/lib/std/compress/flate/SlidingWindow.zig
@@ -0,0 +1,160 @@
+//! Used in deflate (compression), holds uncompressed data form which Tokens are
+//! produces. In combination with Lookup it is used to find matches in history data.
+//!
+const std = @import("std");
+const consts = @import("consts.zig");
+
+const expect = testing.expect;
+const assert = std.debug.assert;
+const testing = std.testing;
+
+const hist_len = consts.history.len;
+const buffer_len = 2 * hist_len;
+const min_lookahead = consts.match.min_length + consts.match.max_length;
+const max_rp = buffer_len - min_lookahead;
+
+const Self = @This();
+
+buffer: [buffer_len]u8 = undefined,
+wp: usize = 0, // write position
+rp: usize = 0, // read position
+fp: isize = 0, // last flush position, tokens are build from fp..rp
+
+/// Returns number of bytes written, or 0 if buffer is full and need to slide.
+pub fn write(self: *Self, buf: []const u8) usize {
+ if (self.rp >= max_rp) return 0; // need to slide
+
+ const n = @min(buf.len, buffer_len - self.wp);
+ @memcpy(self.buffer[self.wp .. self.wp + n], buf[0..n]);
+ self.wp += n;
+ return n;
+}
+
+/// Slide buffer for hist_len.
+/// Drops old history, preserves between hist_len and hist_len - min_lookahead.
+/// Returns number of bytes removed.
+pub fn slide(self: *Self) u16 {
+ assert(self.rp >= max_rp and self.wp >= self.rp);
+ const n = self.wp - hist_len;
+ @memcpy(self.buffer[0..n], self.buffer[hist_len..self.wp]);
+ self.rp -= hist_len;
+ self.wp -= hist_len;
+ self.fp -= hist_len;
+ return @intCast(n);
+}
+
+/// Data from the current position (read position). Those part of the buffer is
+/// not converted to tokens yet.
+fn lookahead(self: *Self) []const u8 {
+ assert(self.wp >= self.rp);
+ return self.buffer[self.rp..self.wp];
+}
+
+/// Returns part of the lookahead buffer. If should_flush is set no lookahead is
+/// preserved otherwise preserves enough data for the longest match. Returns
+/// null if there is not enough data.
+pub fn activeLookahead(self: *Self, should_flush: bool) ?[]const u8 {
+ const min: usize = if (should_flush) 0 else min_lookahead;
+ const lh = self.lookahead();
+ return if (lh.len > min) lh else null;
+}
+
+/// Advances read position, shrinks lookahead.
+pub fn advance(self: *Self, n: u16) void {
+ assert(self.wp >= self.rp + n);
+ self.rp += n;
+}
+
+/// Returns writable part of the buffer, where new uncompressed data can be
+/// written.
+pub fn writable(self: *Self) []u8 {
+ return self.buffer[self.wp..];
+}
+
+/// Notification of what part of writable buffer is filled with data.
+pub fn written(self: *Self, n: usize) void {
+ self.wp += n;
+}
+
+/// Finds match length between previous and current position.
+/// Used in hot path!
+pub fn match(self: *Self, prev_pos: u16, curr_pos: u16, min_len: u16) u16 {
+ const max_len: usize = @min(self.wp - curr_pos, consts.match.max_length);
+ // lookahead buffers from previous and current positions
+ const prev_lh = self.buffer[prev_pos..][0..max_len];
+ const curr_lh = self.buffer[curr_pos..][0..max_len];
+
+ // If we alread have match (min_len > 0),
+ // test the first byte above previous len a[min_len] != b[min_len]
+ // and then all the bytes from that position to zero.
+ // That is likely positions to find difference than looping from first bytes.
+ var i: usize = min_len;
+ if (i > 0) {
+ if (max_len <= i) return 0;
+ while (true) {
+ if (prev_lh[i] != curr_lh[i]) return 0;
+ if (i == 0) break;
+ i -= 1;
+ }
+ i = min_len;
+ }
+ while (i < max_len) : (i += 1)
+ if (prev_lh[i] != curr_lh[i]) break;
+ return if (i >= consts.match.min_length) @intCast(i) else 0;
+}
+
+/// Current position of non-compressed data. Data before rp are already converted
+/// to tokens.
+pub fn pos(self: *Self) u16 {
+ return @intCast(self.rp);
+}
+
+/// Notification that token list is cleared.
+pub fn flush(self: *Self) void {
+ self.fp = @intCast(self.rp);
+}
+
+/// Part of the buffer since last flush or null if there was slide in between (so
+/// fp becomes negative).
+pub fn tokensBuffer(self: *Self) ?[]const u8 {
+ assert(self.fp <= self.rp);
+ if (self.fp < 0) return null;
+ return self.buffer[@intCast(self.fp)..self.rp];
+}
+
+test "flate.SlidingWindow match" {
+ const data = "Blah blah blah blah blah!";
+ var win: Self = .{};
+ try expect(win.write(data) == data.len);
+ try expect(win.wp == data.len);
+ try expect(win.rp == 0);
+
+ // length between l symbols
+ try expect(win.match(1, 6, 0) == 18);
+ try expect(win.match(1, 11, 0) == 13);
+ try expect(win.match(1, 16, 0) == 8);
+ try expect(win.match(1, 21, 0) == 0);
+
+ // position 15 = "blah blah!"
+ // position 20 = "blah!"
+ try expect(win.match(15, 20, 0) == 4);
+ try expect(win.match(15, 20, 3) == 4);
+ try expect(win.match(15, 20, 4) == 0);
+}
+
+test "flate.SlidingWindow slide" {
+ var win: Self = .{};
+ win.wp = Self.buffer_len - 11;
+ win.rp = Self.buffer_len - 111;
+ win.buffer[win.rp] = 0xab;
+ try expect(win.lookahead().len == 100);
+ try expect(win.tokensBuffer().?.len == win.rp);
+
+ const n = win.slide();
+ try expect(n == 32757);
+ try expect(win.buffer[win.rp] == 0xab);
+ try expect(win.rp == Self.hist_len - 111);
+ try expect(win.wp == Self.hist_len - 11);
+ try expect(win.lookahead().len == 100);
+ try expect(win.tokensBuffer() == null);
+}
diff --git a/lib/std/compress/flate/Token.zig b/lib/std/compress/flate/Token.zig
new file mode 100644
index 0000000000..7e067daf83
--- /dev/null
+++ b/lib/std/compress/flate/Token.zig
@@ -0,0 +1,327 @@
+//! Token cat be literal: single byte of data or match; reference to the slice of
+//! data in the same stream represented with <length, distance>. Where length
+//! can be 3 - 258 bytes, and distance 1 - 32768 bytes.
+//!
+const std = @import("std");
+const assert = std.debug.assert;
+const print = std.debug.print;
+const expect = std.testing.expect;
+const consts = @import("consts.zig").match;
+
+const Token = @This();
+
+pub const Kind = enum(u1) {
+ literal,
+ match,
+};
+
+// Distance range 1 - 32768, stored in dist as 0 - 32767 (fits u15)
+dist: u15 = 0,
+// Length range 3 - 258, stored in len_lit as 0 - 255 (fits u8)
+len_lit: u8 = 0,
+kind: Kind = .literal,
+
+pub fn literal(t: Token) u8 {
+ return t.len_lit;
+}
+
+pub fn distance(t: Token) u16 {
+ return @as(u16, t.dist) + consts.min_distance;
+}
+
+pub fn length(t: Token) u16 {
+ return @as(u16, t.len_lit) + consts.base_length;
+}
+
+pub fn initLiteral(lit: u8) Token {
+ return .{ .kind = .literal, .len_lit = lit };
+}
+
+// distance range 1 - 32768, stored in dist as 0 - 32767 (u15)
+// length range 3 - 258, stored in len_lit as 0 - 255 (u8)
+pub fn initMatch(dist: u16, len: u16) Token {
+ assert(len >= consts.min_length and len <= consts.max_length);
+ assert(dist >= consts.min_distance and dist <= consts.max_distance);
+ return .{
+ .kind = .match,
+ .dist = @intCast(dist - consts.min_distance),
+ .len_lit = @intCast(len - consts.base_length),
+ };
+}
+
+pub fn eql(t: Token, o: Token) bool {
+ return t.kind == o.kind and
+ t.dist == o.dist and
+ t.len_lit == o.len_lit;
+}
+
+pub fn lengthCode(t: Token) u16 {
+ return match_lengths[match_lengths_index[t.len_lit]].code;
+}
+
+pub fn lengthEncoding(t: Token) MatchLength {
+ var c = match_lengths[match_lengths_index[t.len_lit]];
+ c.extra_length = t.len_lit - c.base_scaled;
+ return c;
+}
+
+// Returns the distance code corresponding to a specific distance.
+// Distance code is in range: 0 - 29.
+pub fn distanceCode(t: Token) u8 {
+ var dist: u16 = t.dist;
+ if (dist < match_distances_index.len) {
+ return match_distances_index[dist];
+ }
+ dist >>= 7;
+ if (dist < match_distances_index.len) {
+ return match_distances_index[dist] + 14;
+ }
+ dist >>= 7;
+ return match_distances_index[dist] + 28;
+}
+
+pub fn distanceEncoding(t: Token) MatchDistance {
+ var c = match_distances[t.distanceCode()];
+ c.extra_distance = t.dist - c.base_scaled;
+ return c;
+}
+
+pub fn lengthExtraBits(code: u32) u8 {
+ return match_lengths[code - length_codes_start].extra_bits;
+}
+
+pub fn matchLength(code: u8) MatchLength {
+ return match_lengths[code];
+}
+
+pub fn matchDistance(code: u8) MatchDistance {
+ return match_distances[code];
+}
+
+pub fn distanceExtraBits(code: u32) u8 {
+ return match_distances[code].extra_bits;
+}
+
+pub fn show(t: Token) void {
+ if (t.kind == .literal) {
+ print("L('{c}'), ", .{t.literal()});
+ } else {
+ print("M({d}, {d}), ", .{ t.distance(), t.length() });
+ }
+}
+
+// Retruns index in match_lengths table for each length in range 0-255.
+const match_lengths_index = [_]u8{
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 8,
+ 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
+ 13, 13, 13, 13, 14, 14, 14, 14, 15, 15,
+ 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+ 17, 17, 17, 17, 17, 17, 17, 17, 18, 18,
+ 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+ 19, 19, 19, 19, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+ 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+ 23, 23, 23, 23, 23, 23, 23, 23, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+ 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+ 26, 26, 26, 26, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 28,
+};
+
+const MatchLength = struct {
+ code: u16,
+ base_scaled: u8, // base - 3, scaled to fit into u8 (0-255), same as lit_len field in Token.
+ base: u16, // 3-258
+ extra_length: u8 = 0,
+ extra_bits: u4,
+};
+
+// match_lengths represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12)
+//
+// Extra Extra Extra
+// Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
+// ---- ---- ------ ---- ---- ------- ---- ---- -------
+// 257 0 3 267 1 15,16 277 4 67-82
+// 258 0 4 268 1 17,18 278 4 83-98
+// 259 0 5 269 2 19-22 279 4 99-114
+// 260 0 6 270 2 23-26 280 4 115-130
+// 261 0 7 271 2 27-30 281 5 131-162
+// 262 0 8 272 2 31-34 282 5 163-194
+// 263 0 9 273 3 35-42 283 5 195-226
+// 264 0 10 274 3 43-50 284 5 227-257
+// 265 1 11,12 275 3 51-58 285 0 258
+// 266 1 13,14 276 3 59-66
+//
+pub const length_codes_start = 257;
+
+const match_lengths = [_]MatchLength{
+ .{ .extra_bits = 0, .base_scaled = 0, .base = 3, .code = 257 },
+ .{ .extra_bits = 0, .base_scaled = 1, .base = 4, .code = 258 },
+ .{ .extra_bits = 0, .base_scaled = 2, .base = 5, .code = 259 },
+ .{ .extra_bits = 0, .base_scaled = 3, .base = 6, .code = 260 },
+ .{ .extra_bits = 0, .base_scaled = 4, .base = 7, .code = 261 },
+ .{ .extra_bits = 0, .base_scaled = 5, .base = 8, .code = 262 },
+ .{ .extra_bits = 0, .base_scaled = 6, .base = 9, .code = 263 },
+ .{ .extra_bits = 0, .base_scaled = 7, .base = 10, .code = 264 },
+ .{ .extra_bits = 1, .base_scaled = 8, .base = 11, .code = 265 },
+ .{ .extra_bits = 1, .base_scaled = 10, .base = 13, .code = 266 },
+ .{ .extra_bits = 1, .base_scaled = 12, .base = 15, .code = 267 },
+ .{ .extra_bits = 1, .base_scaled = 14, .base = 17, .code = 268 },
+ .{ .extra_bits = 2, .base_scaled = 16, .base = 19, .code = 269 },
+ .{ .extra_bits = 2, .base_scaled = 20, .base = 23, .code = 270 },
+ .{ .extra_bits = 2, .base_scaled = 24, .base = 27, .code = 271 },
+ .{ .extra_bits = 2, .base_scaled = 28, .base = 31, .code = 272 },
+ .{ .extra_bits = 3, .base_scaled = 32, .base = 35, .code = 273 },
+ .{ .extra_bits = 3, .base_scaled = 40, .base = 43, .code = 274 },
+ .{ .extra_bits = 3, .base_scaled = 48, .base = 51, .code = 275 },
+ .{ .extra_bits = 3, .base_scaled = 56, .base = 59, .code = 276 },
+ .{ .extra_bits = 4, .base_scaled = 64, .base = 67, .code = 277 },
+ .{ .extra_bits = 4, .base_scaled = 80, .base = 83, .code = 278 },
+ .{ .extra_bits = 4, .base_scaled = 96, .base = 99, .code = 279 },
+ .{ .extra_bits = 4, .base_scaled = 112, .base = 115, .code = 280 },
+ .{ .extra_bits = 5, .base_scaled = 128, .base = 131, .code = 281 },
+ .{ .extra_bits = 5, .base_scaled = 160, .base = 163, .code = 282 },
+ .{ .extra_bits = 5, .base_scaled = 192, .base = 195, .code = 283 },
+ .{ .extra_bits = 5, .base_scaled = 224, .base = 227, .code = 284 },
+ .{ .extra_bits = 0, .base_scaled = 255, .base = 258, .code = 285 },
+};
+
+// Used in distanceCode fn to get index in match_distance table for each distance in range 0-32767.
+const match_distances_index = [_]u8{
+ 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+ 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+};
+
+const MatchDistance = struct {
+ base_scaled: u16, // base - 1, same as Token dist field
+ base: u16,
+ extra_distance: u16 = 0,
+ code: u8,
+ extra_bits: u4,
+};
+
+// match_distances represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12)
+//
+// Extra Extra Extra
+// Code Bits Dist Code Bits Dist Code Bits Distance
+// ---- ---- ---- ---- ---- ------ ---- ---- --------
+// 0 0 1 10 4 33-48 20 9 1025-1536
+// 1 0 2 11 4 49-64 21 9 1537-2048
+// 2 0 3 12 5 65-96 22 10 2049-3072
+// 3 0 4 13 5 97-128 23 10 3073-4096
+// 4 1 5,6 14 6 129-192 24 11 4097-6144
+// 5 1 7,8 15 6 193-256 25 11 6145-8192
+// 6 2 9-12 16 7 257-384 26 12 8193-12288
+// 7 2 13-16 17 7 385-512 27 12 12289-16384
+// 8 3 17-24 18 8 513-768 28 13 16385-24576
+// 9 3 25-32 19 8 769-1024 29 13 24577-32768
+//
+const match_distances = [_]MatchDistance{
+ .{ .extra_bits = 0, .base_scaled = 0x0000, .code = 0, .base = 1 },
+ .{ .extra_bits = 0, .base_scaled = 0x0001, .code = 1, .base = 2 },
+ .{ .extra_bits = 0, .base_scaled = 0x0002, .code = 2, .base = 3 },
+ .{ .extra_bits = 0, .base_scaled = 0x0003, .code = 3, .base = 4 },
+ .{ .extra_bits = 1, .base_scaled = 0x0004, .code = 4, .base = 5 },
+ .{ .extra_bits = 1, .base_scaled = 0x0006, .code = 5, .base = 7 },
+ .{ .extra_bits = 2, .base_scaled = 0x0008, .code = 6, .base = 9 },
+ .{ .extra_bits = 2, .base_scaled = 0x000c, .code = 7, .base = 13 },
+ .{ .extra_bits = 3, .base_scaled = 0x0010, .code = 8, .base = 17 },
+ .{ .extra_bits = 3, .base_scaled = 0x0018, .code = 9, .base = 25 },
+ .{ .extra_bits = 4, .base_scaled = 0x0020, .code = 10, .base = 33 },
+ .{ .extra_bits = 4, .base_scaled = 0x0030, .code = 11, .base = 49 },
+ .{ .extra_bits = 5, .base_scaled = 0x0040, .code = 12, .base = 65 },
+ .{ .extra_bits = 5, .base_scaled = 0x0060, .code = 13, .base = 97 },
+ .{ .extra_bits = 6, .base_scaled = 0x0080, .code = 14, .base = 129 },
+ .{ .extra_bits = 6, .base_scaled = 0x00c0, .code = 15, .base = 193 },
+ .{ .extra_bits = 7, .base_scaled = 0x0100, .code = 16, .base = 257 },
+ .{ .extra_bits = 7, .base_scaled = 0x0180, .code = 17, .base = 385 },
+ .{ .extra_bits = 8, .base_scaled = 0x0200, .code = 18, .base = 513 },
+ .{ .extra_bits = 8, .base_scaled = 0x0300, .code = 19, .base = 769 },
+ .{ .extra_bits = 9, .base_scaled = 0x0400, .code = 20, .base = 1025 },
+ .{ .extra_bits = 9, .base_scaled = 0x0600, .code = 21, .base = 1537 },
+ .{ .extra_bits = 10, .base_scaled = 0x0800, .code = 22, .base = 2049 },
+ .{ .extra_bits = 10, .base_scaled = 0x0c00, .code = 23, .base = 3073 },
+ .{ .extra_bits = 11, .base_scaled = 0x1000, .code = 24, .base = 4097 },
+ .{ .extra_bits = 11, .base_scaled = 0x1800, .code = 25, .base = 6145 },
+ .{ .extra_bits = 12, .base_scaled = 0x2000, .code = 26, .base = 8193 },
+ .{ .extra_bits = 12, .base_scaled = 0x3000, .code = 27, .base = 12289 },
+ .{ .extra_bits = 13, .base_scaled = 0x4000, .code = 28, .base = 16385 },
+ .{ .extra_bits = 13, .base_scaled = 0x6000, .code = 29, .base = 24577 },
+};
+
+test "flate.Token size" {
+ try expect(@sizeOf(Token) == 4);
+}
+
+// testing table https://datatracker.ietf.org/doc/html/rfc1951#page-12
+test "flate.Token MatchLength" {
+ var c = Token.initMatch(1, 4).lengthEncoding();
+ try expect(c.code == 258);
+ try expect(c.extra_bits == 0);
+ try expect(c.extra_length == 0);
+
+ c = Token.initMatch(1, 11).lengthEncoding();
+ try expect(c.code == 265);
+ try expect(c.extra_bits == 1);
+ try expect(c.extra_length == 0);
+
+ c = Token.initMatch(1, 12).lengthEncoding();
+ try expect(c.code == 265);
+ try expect(c.extra_bits == 1);
+ try expect(c.extra_length == 1);
+
+ c = Token.initMatch(1, 130).lengthEncoding();
+ try expect(c.code == 280);
+ try expect(c.extra_bits == 4);
+ try expect(c.extra_length == 130 - 115);
+}
+
+test "flate.Token MatchDistance" {
+ var c = Token.initMatch(1, 4).distanceEncoding();
+ try expect(c.code == 0);
+ try expect(c.extra_bits == 0);
+ try expect(c.extra_distance == 0);
+
+ c = Token.initMatch(192, 4).distanceEncoding();
+ try expect(c.code == 14);
+ try expect(c.extra_bits == 6);
+ try expect(c.extra_distance == 192 - 129);
+}
+
+test "flate.Token match_lengths" {
+ for (match_lengths, 0..) |ml, i| {
+ try expect(@as(u16, ml.base_scaled) + 3 == ml.base);
+ try expect(i + 257 == ml.code);
+ }
+
+ for (match_distances, 0..) |mo, i| {
+ try expect(mo.base_scaled + 1 == mo.base);
+ try expect(i == mo.code);
+ }
+}
diff --git a/lib/std/compress/flate/bit_reader.zig b/lib/std/compress/flate/bit_reader.zig
new file mode 100644
index 0000000000..40e9b76fb8
--- /dev/null
+++ b/lib/std/compress/flate/bit_reader.zig
@@ -0,0 +1,333 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const testing = std.testing;
+
+pub fn bitReader(reader: anytype) BitReader(@TypeOf(reader)) {
+ return BitReader(@TypeOf(reader)).init(reader);
+}
+
+/// Bit reader used during inflate (decompression). Has internal buffer of 64
+/// bits which shifts right after bits are consumed. Uses forward_reader to fill
+/// that internal buffer when needed.
+///
+/// readF is the core function. Supports few different ways of getting bits
+/// controlled by flags. In hot path we try to avoid checking whether we need to
+/// fill buffer from forward_reader by calling fill in advance and readF with
+/// buffered flag set.
+///
+pub fn BitReader(comptime ReaderType: type) type {
+ return struct {
+ // Underlying reader used for filling internal bits buffer
+ forward_reader: ReaderType = undefined,
+ // Internal buffer of 64 bits
+ bits: u64 = 0,
+ // Number of bits in the buffer
+ nbits: u32 = 0,
+
+ const Self = @This();
+
+ pub const Error = ReaderType.Error || error{EndOfStream};
+
+ pub fn init(rdr: ReaderType) Self {
+ var self = Self{ .forward_reader = rdr };
+ self.fill(1) catch {};
+ return self;
+ }
+
+ /// Try to have `nice` bits are available in buffer. Reads from
+ /// forward reader if there is no `nice` bits in buffer. Returns error
+ /// if end of forward stream is reached and internal buffer is empty.
+ /// It will not error if less than `nice` bits are in buffer, only when
+ /// all bits are exhausted. During inflate we usually know what is the
+ /// maximum bits for the next step but usually that step will need less
+ /// bits to decode. So `nice` is not hard limit, it will just try to have
+ /// that number of bits available. If end of forward stream is reached
+ /// it may be some extra zero bits in buffer.
+ pub inline fn fill(self: *Self, nice: u6) !void {
+ if (self.nbits >= nice) {
+ return; // We have enought bits
+ }
+ // Read more bits from forward reader
+
+ // Number of empty bytes in bits, round nbits to whole bytes.
+ const empty_bytes =
+ @as(u8, if (self.nbits & 0x7 == 0) 8 else 7) - // 8 for 8, 16, 24..., 7 otherwise
+ (self.nbits >> 3); // 0 for 0-7, 1 for 8-16, ... same as / 8
+
+ var buf: [8]u8 = [_]u8{0} ** 8;
+ const bytes_read = self.forward_reader.readAll(buf[0..empty_bytes]) catch 0;
+ if (bytes_read > 0) {
+ const u: u64 = std.mem.readInt(u64, buf[0..8], .little);
+ self.bits |= u << @as(u6, @intCast(self.nbits));
+ self.nbits += 8 * @as(u8, @intCast(bytes_read));
+ return;
+ }
+
+ if (self.nbits == 0)
+ return error.EndOfStream;
+ }
+
+ /// Read exactly buf.len bytes into buf.
+ pub fn readAll(self: *Self, buf: []u8) !void {
+ assert(self.alignBits() == 0); // internal bits must be at byte boundary
+
+ // First read from internal bits buffer.
+ var n: usize = 0;
+ while (self.nbits > 0 and n < buf.len) {
+ buf[n] = try self.readF(u8, flag.buffered);
+ n += 1;
+ }
+ // Then use forward reader for all other bytes.
+ try self.forward_reader.readNoEof(buf[n..]);
+ }
+
+ pub const flag = struct {
+ pub const peek: u3 = 0b001; // dont advance internal buffer, just get bits, leave them in buffer
+ pub const buffered: u3 = 0b010; // assume that there is no need to fill, fill should be called before
+ pub const reverse: u3 = 0b100; // bit reverse readed bits
+ };
+
+ /// Alias for readF(U, 0).
+ pub fn read(self: *Self, comptime U: type) !U {
+ return self.readF(U, 0);
+ }
+
+ /// Alias for readF with flag.peak set.
+ pub inline fn peekF(self: *Self, comptime U: type, comptime how: u3) !U {
+ return self.readF(U, how | flag.peek);
+ }
+
+ /// Read with flags provided.
+ pub fn readF(self: *Self, comptime U: type, comptime how: u3) !U {
+ const n: u6 = @bitSizeOf(U);
+ switch (how) {
+ 0 => { // `normal` read
+ try self.fill(n); // ensure that there are n bits in the buffer
+ const u: U = @truncate(self.bits); // get n bits
+ try self.shift(n); // advance buffer for n
+ return u;
+ },
+ (flag.peek) => { // no shift, leave bits in the buffer
+ try self.fill(n);
+ return @truncate(self.bits);
+ },
+ flag.buffered => { // no fill, assume that buffer has enought bits
+ const u: U = @truncate(self.bits);
+ try self.shift(n);
+ return u;
+ },
+ (flag.reverse) => { // same as 0 with bit reverse
+ try self.fill(n);
+ const u: U = @truncate(self.bits);
+ try self.shift(n);
+ return @bitReverse(u);
+ },
+ (flag.peek | flag.reverse) => {
+ try self.fill(n);
+ return @bitReverse(@as(U, @truncate(self.bits)));
+ },
+ (flag.buffered | flag.reverse) => {
+ const u: U = @truncate(self.bits);
+ try self.shift(n);
+ return @bitReverse(u);
+ },
+ (flag.peek | flag.buffered) => {
+ return @truncate(self.bits);
+ },
+ (flag.peek | flag.buffered | flag.reverse) => {
+ return @bitReverse(@as(U, @truncate(self.bits)));
+ },
+ }
+ }
+
+ /// Read n number of bits.
+ /// Only buffered flag can be used in how.
+ pub fn readN(self: *Self, n: u4, comptime how: u3) !u16 {
+ switch (how) {
+ 0 => {
+ try self.fill(n);
+ },
+ flag.buffered => {},
+ else => unreachable,
+ }
+ const mask: u16 = (@as(u16, 1) << n) - 1;
+ const u: u16 = @as(u16, @truncate(self.bits)) & mask;
+ try self.shift(n);
+ return u;
+ }
+
+ /// Advance buffer for n bits.
+ pub fn shift(self: *Self, n: u6) !void {
+ if (n > self.nbits) return error.EndOfStream;
+ self.bits >>= n;
+ self.nbits -= n;
+ }
+
+ /// Skip n bytes.
+ pub fn skipBytes(self: *Self, n: u16) !void {
+ for (0..n) |_| {
+ try self.fill(8);
+ try self.shift(8);
+ }
+ }
+
+ // Number of bits to align stream to the byte boundary.
+ fn alignBits(self: *Self) u3 {
+ return @intCast(self.nbits & 0x7);
+ }
+
+ /// Align stream to the byte boundary.
+ pub fn alignToByte(self: *Self) void {
+ const ab = self.alignBits();
+ if (ab > 0) self.shift(ab) catch unreachable;
+ }
+
+ /// Skip zero terminated string.
+ pub fn skipStringZ(self: *Self) !void {
+ while (true) {
+ if (try self.readF(u8, 0) == 0) break;
+ }
+ }
+
+ /// Read deflate fixed fixed code.
+ /// Reads first 7 bits, and then mybe 1 or 2 more to get full 7,8 or 9 bit code.
+ /// ref: https://datatracker.ietf.org/doc/html/rfc1951#page-12
+ /// Lit Value Bits Codes
+ /// --------- ---- -----
+ /// 0 - 143 8 00110000 through
+ /// 10111111
+ /// 144 - 255 9 110010000 through
+ /// 111111111
+ /// 256 - 279 7 0000000 through
+ /// 0010111
+ /// 280 - 287 8 11000000 through
+ /// 11000111
+ pub fn readFixedCode(self: *Self) !u16 {
+ try self.fill(7 + 2);
+ const code7 = try self.readF(u7, flag.buffered | flag.reverse);
+ if (code7 <= 0b0010_111) { // 7 bits, 256-279, codes 0000_000 - 0010_111
+ return @as(u16, code7) + 256;
+ } else if (code7 <= 0b1011_111) { // 8 bits, 0-143, codes 0011_0000 through 1011_1111
+ return (@as(u16, code7) << 1) + @as(u16, try self.readF(u1, flag.buffered)) - 0b0011_0000;
+ } else if (code7 <= 0b1100_011) { // 8 bit, 280-287, codes 1100_0000 - 1100_0111
+ return (@as(u16, code7 - 0b1100000) << 1) + try self.readF(u1, flag.buffered) + 280;
+ } else { // 9 bit, 144-255, codes 1_1001_0000 - 1_1111_1111
+ return (@as(u16, code7 - 0b1100_100) << 2) + @as(u16, try self.readF(u2, flag.buffered | flag.reverse)) + 144;
+ }
+ }
+ };
+}
+
+test "flate.BitReader" {
+ var fbs = std.io.fixedBufferStream(&[_]u8{ 0xf3, 0x48, 0xcd, 0xc9, 0x00, 0x00 });
+ var br = bitReader(fbs.reader());
+ const F = BitReader(@TypeOf(fbs.reader())).flag;
+
+ try testing.expectEqual(@as(u8, 48), br.nbits);
+ try testing.expectEqual(@as(u64, 0xc9cd48f3), br.bits);
+
+ try testing.expect(try br.readF(u1, 0) == 0b0000_0001);
+ try testing.expect(try br.readF(u2, 0) == 0b0000_0001);
+ try testing.expectEqual(@as(u8, 48 - 3), br.nbits);
+ try testing.expectEqual(@as(u3, 5), br.alignBits());
+
+ try testing.expect(try br.readF(u8, F.peek) == 0b0001_1110);
+ try testing.expect(try br.readF(u9, F.peek) == 0b1_0001_1110);
+ try br.shift(9);
+ try testing.expectEqual(@as(u8, 36), br.nbits);
+ try testing.expectEqual(@as(u3, 4), br.alignBits());
+
+ try testing.expect(try br.readF(u4, 0) == 0b0100);
+ try testing.expectEqual(@as(u8, 32), br.nbits);
+ try testing.expectEqual(@as(u3, 0), br.alignBits());
+
+ try br.shift(1);
+ try testing.expectEqual(@as(u3, 7), br.alignBits());
+ try br.shift(1);
+ try testing.expectEqual(@as(u3, 6), br.alignBits());
+ br.alignToByte();
+ try testing.expectEqual(@as(u3, 0), br.alignBits());
+
+ try testing.expectEqual(@as(u64, 0xc9), br.bits);
+ try testing.expectEqual(@as(u16, 0x9), try br.readN(4, 0));
+ try testing.expectEqual(@as(u16, 0xc), try br.readN(4, 0));
+}
+
+test "flate.BitReader read block type 1 data" {
+ const data = [_]u8{
+ 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
+ 0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
+ 0x0c, 0x01, 0x02, 0x03, //
+ 0xaa, 0xbb, 0xcc, 0xdd,
+ };
+ var fbs = std.io.fixedBufferStream(&data);
+ var br = bitReader(fbs.reader());
+ const F = BitReader(@TypeOf(fbs.reader())).flag;
+
+ try testing.expectEqual(@as(u1, 1), try br.readF(u1, 0)); // bfinal
+ try testing.expectEqual(@as(u2, 1), try br.readF(u2, 0)); // block_type
+
+ for ("Hello world\n") |c| {
+ try testing.expectEqual(@as(u8, c), try br.readF(u8, F.reverse) - 0x30);
+ }
+ try testing.expectEqual(@as(u7, 0), try br.readF(u7, 0)); // end of block
+ br.alignToByte();
+ try testing.expectEqual(@as(u32, 0x0302010c), try br.readF(u32, 0));
+ try testing.expectEqual(@as(u16, 0xbbaa), try br.readF(u16, 0));
+ try testing.expectEqual(@as(u16, 0xddcc), try br.readF(u16, 0));
+}
+
+test "flate.BitReader init" {
+ const data = [_]u8{
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ };
+ var fbs = std.io.fixedBufferStream(&data);
+ var br = bitReader(fbs.reader());
+
+ try testing.expectEqual(@as(u64, 0x08_07_06_05_04_03_02_01), br.bits);
+ try br.shift(8);
+ try testing.expectEqual(@as(u64, 0x00_08_07_06_05_04_03_02), br.bits);
+ try br.fill(60); // fill with 1 byte
+ try testing.expectEqual(@as(u64, 0x01_08_07_06_05_04_03_02), br.bits);
+ try br.shift(8 * 4 + 4);
+ try testing.expectEqual(@as(u64, 0x00_00_00_00_00_10_80_70), br.bits);
+
+ try br.fill(60); // fill with 4 bytes (shift by 4)
+ try testing.expectEqual(@as(u64, 0x00_50_40_30_20_10_80_70), br.bits);
+ try testing.expectEqual(@as(u8, 8 * 7 + 4), br.nbits);
+
+ try br.shift(@intCast(br.nbits)); // clear buffer
+ try br.fill(8); // refill with the rest of the bytes
+ try testing.expectEqual(@as(u64, 0x00_00_00_00_00_08_07_06), br.bits);
+}
+
+test "flate.BitReader readAll" {
+ const data = [_]u8{
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ };
+ var fbs = std.io.fixedBufferStream(&data);
+ var br = bitReader(fbs.reader());
+
+ try testing.expectEqual(@as(u64, 0x08_07_06_05_04_03_02_01), br.bits);
+
+ var out: [16]u8 = undefined;
+ try br.readAll(out[0..]);
+ try testing.expect(br.nbits == 0);
+ try testing.expect(br.bits == 0);
+
+ try testing.expectEqualSlices(u8, data[0..16], &out);
+}
+
+test "flate.BitReader readFixedCode" {
+ const fixed_codes = @import("huffman_encoder.zig").fixed_codes;
+
+ var fbs = std.io.fixedBufferStream(&fixed_codes);
+ var rdr = bitReader(fbs.reader());
+
+ for (0..286) |c| {
+ try testing.expectEqual(c, try rdr.readFixedCode());
+ }
+ try testing.expect(rdr.nbits == 0);
+}
diff --git a/lib/std/compress/flate/bit_writer.zig b/lib/std/compress/flate/bit_writer.zig
new file mode 100644
index 0000000000..b5d84c7e2a
--- /dev/null
+++ b/lib/std/compress/flate/bit_writer.zig
@@ -0,0 +1,99 @@
+const std = @import("std");
+const assert = std.debug.assert;
+
+/// Bit writer for use in deflate (compression).
+///
+/// Has internal bits buffer of 64 bits and internal bytes buffer of 248 bytes.
+/// When we accumulate 48 bits 6 bytes are moved to the bytes buffer. When we
+/// accumulate 240 bytes they are flushed to the underlying inner_writer.
+///
+pub fn BitWriter(comptime WriterType: type) type {
+ // buffer_flush_size indicates the buffer size
+ // after which bytes are flushed to the writer.
+ // Should preferably be a multiple of 6, since
+ // we accumulate 6 bytes between writes to the buffer.
+ const buffer_flush_size = 240;
+
+ // buffer_size is the actual output byte buffer size.
+ // It must have additional headroom for a flush
+ // which can contain up to 8 bytes.
+ const buffer_size = buffer_flush_size + 8;
+
+ return struct {
+ inner_writer: WriterType,
+
+ // Data waiting to be written is bytes[0 .. nbytes]
+ // and then the low nbits of bits. Data is always written
+ // sequentially into the bytes array.
+ bits: u64 = 0,
+ nbits: u32 = 0, // number of bits
+ bytes: [buffer_size]u8 = undefined,
+ nbytes: u32 = 0, // number of bytes
+
+ const Self = @This();
+
+ pub const Error = WriterType.Error || error{UnfinishedBits};
+
+ pub fn init(writer: WriterType) Self {
+ return .{ .inner_writer = writer };
+ }
+
+ pub fn setWriter(self: *Self, new_writer: WriterType) void {
+ //assert(self.bits == 0 and self.nbits == 0 and self.nbytes == 0);
+ self.inner_writer = new_writer;
+ }
+
+ pub fn flush(self: *Self) Error!void {
+ var n = self.nbytes;
+ while (self.nbits != 0) {
+ self.bytes[n] = @as(u8, @truncate(self.bits));
+ self.bits >>= 8;
+ if (self.nbits > 8) { // Avoid underflow
+ self.nbits -= 8;
+ } else {
+ self.nbits = 0;
+ }
+ n += 1;
+ }
+ self.bits = 0;
+ _ = try self.inner_writer.write(self.bytes[0..n]);
+ self.nbytes = 0;
+ }
+
+ pub fn writeBits(self: *Self, b: u32, nb: u32) Error!void {
+ self.bits |= @as(u64, @intCast(b)) << @as(u6, @intCast(self.nbits));
+ self.nbits += nb;
+ if (self.nbits < 48)
+ return;
+
+ var n = self.nbytes;
+ std.mem.writeInt(u64, self.bytes[n..][0..8], self.bits, .little);
+ n += 6;
+ if (n >= buffer_flush_size) {
+ _ = try self.inner_writer.write(self.bytes[0..n]);
+ n = 0;
+ }
+ self.nbytes = n;
+ self.bits >>= 48;
+ self.nbits -= 48;
+ }
+
+ pub fn writeBytes(self: *Self, bytes: []const u8) Error!void {
+ var n = self.nbytes;
+ if (self.nbits & 7 != 0) {
+ return error.UnfinishedBits;
+ }
+ while (self.nbits != 0) {
+ self.bytes[n] = @as(u8, @truncate(self.bits));
+ self.bits >>= 8;
+ self.nbits -= 8;
+ n += 1;
+ }
+ if (n != 0) {
+ _ = try self.inner_writer.write(self.bytes[0..n]);
+ }
+ self.nbytes = 0;
+ _ = try self.inner_writer.write(bytes);
+ }
+ };
+}
diff --git a/lib/std/compress/flate/block_writer.zig b/lib/std/compress/flate/block_writer.zig
new file mode 100644
index 0000000000..394f1cbec5
--- /dev/null
+++ b/lib/std/compress/flate/block_writer.zig
@@ -0,0 +1,706 @@
+const std = @import("std");
+const io = std.io;
+const assert = std.debug.assert;
+
+const hc = @import("huffman_encoder.zig");
+const consts = @import("consts.zig").huffman;
+const Token = @import("Token.zig");
+const BitWriter = @import("bit_writer.zig").BitWriter;
+
+pub fn blockWriter(writer: anytype) BlockWriter(@TypeOf(writer)) {
+ return BlockWriter(@TypeOf(writer)).init(writer);
+}
+
+/// Accepts list of tokens, decides what is best block type to write. What block
+/// type will provide best compression. Writes header and body of the block.
+///
+pub fn BlockWriter(comptime WriterType: type) type {
+ const BitWriterType = BitWriter(WriterType);
+ return struct {
+ const codegen_order = consts.codegen_order;
+ const end_code_mark = 255;
+ const Self = @This();
+
+ pub const Error = BitWriterType.Error;
+ bit_writer: BitWriterType,
+
+ codegen_freq: [consts.codegen_code_count]u16 = undefined,
+ literal_freq: [consts.max_num_lit]u16 = undefined,
+ distance_freq: [consts.distance_code_count]u16 = undefined,
+ codegen: [consts.max_num_lit + consts.distance_code_count + 1]u8 = undefined,
+ literal_encoding: hc.LiteralEncoder = .{},
+ distance_encoding: hc.DistanceEncoder = .{},
+ codegen_encoding: hc.CodegenEncoder = .{},
+ fixed_literal_encoding: hc.LiteralEncoder,
+ fixed_distance_encoding: hc.DistanceEncoder,
+ huff_distance: hc.DistanceEncoder,
+
+ pub fn init(writer: WriterType) Self {
+ return .{
+ .bit_writer = BitWriterType.init(writer),
+ .fixed_literal_encoding = hc.fixedLiteralEncoder(),
+ .fixed_distance_encoding = hc.fixedDistanceEncoder(),
+ .huff_distance = hc.huffmanDistanceEncoder(),
+ };
+ }
+
+ /// Flush intrenal bit buffer to the writer.
+ /// Should be called only when bit stream is at byte boundary.
+ ///
+ /// That is after final block; when last byte could be incomplete or
+ /// after stored block; which is aligned to the byte bounday (it has x
+ /// padding bits after first 3 bits).
+ pub fn flush(self: *Self) Error!void {
+ try self.bit_writer.flush();
+ }
+
+ pub fn setWriter(self: *Self, new_writer: WriterType) void {
+ self.bit_writer.setWriter(new_writer);
+ }
+
+ fn writeCode(self: *Self, c: hc.HuffCode) Error!void {
+ try self.bit_writer.writeBits(c.code, c.len);
+ }
+
+ // RFC 1951 3.2.7 specifies a special run-length encoding for specifying
+ // the literal and distance lengths arrays (which are concatenated into a single
+ // array). This method generates that run-length encoding.
+ //
+ // The result is written into the codegen array, and the frequencies
+ // of each code is written into the codegen_freq array.
+ // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
+ // information. Code bad_code is an end marker
+ //
+ // num_literals: The number of literals in literal_encoding
+ // num_distances: The number of distances in distance_encoding
+ // lit_enc: The literal encoder to use
+ // dist_enc: The distance encoder to use
+ fn generateCodegen(
+ self: *Self,
+ num_literals: u32,
+ num_distances: u32,
+ lit_enc: *hc.LiteralEncoder,
+ dist_enc: *hc.DistanceEncoder,
+ ) void {
+ for (self.codegen_freq, 0..) |_, i| {
+ self.codegen_freq[i] = 0;
+ }
+
+ // Note that we are using codegen both as a temporary variable for holding
+ // a copy of the frequencies, and as the place where we put the result.
+ // This is fine because the output is always shorter than the input used
+ // so far.
+ var codegen = &self.codegen; // cache
+ // Copy the concatenated code sizes to codegen. Put a marker at the end.
+ var cgnl = codegen[0..num_literals];
+ for (cgnl, 0..) |_, i| {
+ cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len));
+ }
+
+ cgnl = codegen[num_literals .. num_literals + num_distances];
+ for (cgnl, 0..) |_, i| {
+ cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len));
+ }
+ codegen[num_literals + num_distances] = end_code_mark;
+
+ var size = codegen[0];
+ var count: i32 = 1;
+ var out_index: u32 = 0;
+ var in_index: u32 = 1;
+ while (size != end_code_mark) : (in_index += 1) {
+ // INVARIANT: We have seen "count" copies of size that have not yet
+ // had output generated for them.
+ const next_size = codegen[in_index];
+ if (next_size == size) {
+ count += 1;
+ continue;
+ }
+ // We need to generate codegen indicating "count" of size.
+ if (size != 0) {
+ codegen[out_index] = size;
+ out_index += 1;
+ self.codegen_freq[size] += 1;
+ count -= 1;
+ while (count >= 3) {
+ var n: i32 = 6;
+ if (n > count) {
+ n = count;
+ }
+ codegen[out_index] = 16;
+ out_index += 1;
+ codegen[out_index] = @as(u8, @intCast(n - 3));
+ out_index += 1;
+ self.codegen_freq[16] += 1;
+ count -= n;
+ }
+ } else {
+ while (count >= 11) {
+ var n: i32 = 138;
+ if (n > count) {
+ n = count;
+ }
+ codegen[out_index] = 18;
+ out_index += 1;
+ codegen[out_index] = @as(u8, @intCast(n - 11));
+ out_index += 1;
+ self.codegen_freq[18] += 1;
+ count -= n;
+ }
+ if (count >= 3) {
+ // 3 <= count <= 10
+ codegen[out_index] = 17;
+ out_index += 1;
+ codegen[out_index] = @as(u8, @intCast(count - 3));
+ out_index += 1;
+ self.codegen_freq[17] += 1;
+ count = 0;
+ }
+ }
+ count -= 1;
+ while (count >= 0) : (count -= 1) {
+ codegen[out_index] = size;
+ out_index += 1;
+ self.codegen_freq[size] += 1;
+ }
+ // Set up invariant for next time through the loop.
+ size = next_size;
+ count = 1;
+ }
+ // Marker indicating the end of the codegen.
+ codegen[out_index] = end_code_mark;
+ }
+
+ const DynamicSize = struct {
+ size: u32,
+ num_codegens: u32,
+ };
+
+ // dynamicSize returns the size of dynamically encoded data in bits.
+ fn dynamicSize(
+ self: *Self,
+ lit_enc: *hc.LiteralEncoder, // literal encoder
+ dist_enc: *hc.DistanceEncoder, // distance encoder
+ extra_bits: u32,
+ ) DynamicSize {
+ var num_codegens = self.codegen_freq.len;
+ while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) {
+ num_codegens -= 1;
+ }
+ const header = 3 + 5 + 5 + 4 + (3 * num_codegens) +
+ self.codegen_encoding.bitLength(self.codegen_freq[0..]) +
+ self.codegen_freq[16] * 2 +
+ self.codegen_freq[17] * 3 +
+ self.codegen_freq[18] * 7;
+ const size = header +
+ lit_enc.bitLength(&self.literal_freq) +
+ dist_enc.bitLength(&self.distance_freq) +
+ extra_bits;
+
+ return DynamicSize{
+ .size = @as(u32, @intCast(size)),
+ .num_codegens = @as(u32, @intCast(num_codegens)),
+ };
+ }
+
+ // fixedSize returns the size of dynamically encoded data in bits.
+ fn fixedSize(self: *Self, extra_bits: u32) u32 {
+ return 3 +
+ self.fixed_literal_encoding.bitLength(&self.literal_freq) +
+ self.fixed_distance_encoding.bitLength(&self.distance_freq) +
+ extra_bits;
+ }
+
+ const StoredSize = struct {
+ size: u32,
+ storable: bool,
+ };
+
+ // storedSizeFits calculates the stored size, including header.
+ // The function returns the size in bits and whether the block
+ // fits inside a single block.
+ fn storedSizeFits(in: ?[]const u8) StoredSize {
+ if (in == null) {
+ return .{ .size = 0, .storable = false };
+ }
+ if (in.?.len <= consts.max_store_block_size) {
+ return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true };
+ }
+ return .{ .size = 0, .storable = false };
+ }
+
+ // Write the header of a dynamic Huffman block to the output stream.
+ //
+ // num_literals: The number of literals specified in codegen
+ // num_distances: The number of distances specified in codegen
+ // num_codegens: The number of codegens used in codegen
+ // eof: Is it the end-of-file? (end of stream)
+ fn dynamicHeader(
+ self: *Self,
+ num_literals: u32,
+ num_distances: u32,
+ num_codegens: u32,
+ eof: bool,
+ ) Error!void {
+ const first_bits: u32 = if (eof) 5 else 4;
+ try self.bit_writer.writeBits(first_bits, 3);
+ try self.bit_writer.writeBits(num_literals - 257, 5);
+ try self.bit_writer.writeBits(num_distances - 1, 5);
+ try self.bit_writer.writeBits(num_codegens - 4, 4);
+
+ var i: u32 = 0;
+ while (i < num_codegens) : (i += 1) {
+ const value = self.codegen_encoding.codes[codegen_order[i]].len;
+ try self.bit_writer.writeBits(value, 3);
+ }
+
+ i = 0;
+ while (true) {
+ const code_word: u32 = @as(u32, @intCast(self.codegen[i]));
+ i += 1;
+ if (code_word == end_code_mark) {
+ break;
+ }
+ try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]);
+
+ switch (code_word) {
+ 16 => {
+ try self.bit_writer.writeBits(self.codegen[i], 2);
+ i += 1;
+ },
+ 17 => {
+ try self.bit_writer.writeBits(self.codegen[i], 3);
+ i += 1;
+ },
+ 18 => {
+ try self.bit_writer.writeBits(self.codegen[i], 7);
+ i += 1;
+ },
+ else => {},
+ }
+ }
+ }
+
+ fn storedHeader(self: *Self, length: usize, eof: bool) Error!void {
+ assert(length <= 65535);
+ const flag: u32 = if (eof) 1 else 0;
+ try self.bit_writer.writeBits(flag, 3);
+ try self.flush();
+ const l: u16 = @intCast(length);
+ try self.bit_writer.writeBits(l, 16);
+ try self.bit_writer.writeBits(~l, 16);
+ }
+
+ fn fixedHeader(self: *Self, eof: bool) Error!void {
+ // Indicate that we are a fixed Huffman block
+ var value: u32 = 2;
+ if (eof) {
+ value = 3;
+ }
+ try self.bit_writer.writeBits(value, 3);
+ }
+
+ // Write a block of tokens with the smallest encoding. Will choose block type.
+ // The original input can be supplied, and if the huffman encoded data
+ // is larger than the original bytes, the data will be written as a
+ // stored block.
+ // If the input is null, the tokens will always be Huffman encoded.
+ pub fn write(self: *Self, tokens: []const Token, eof: bool, input: ?[]const u8) Error!void {
+ const lit_and_dist = self.indexTokens(tokens);
+ const num_literals = lit_and_dist.num_literals;
+ const num_distances = lit_and_dist.num_distances;
+
+ var extra_bits: u32 = 0;
+ const ret = storedSizeFits(input);
+ const stored_size = ret.size;
+ const storable = ret.storable;
+
+ if (storable) {
+ // We only bother calculating the costs of the extra bits required by
+ // the length of distance fields (which will be the same for both fixed
+ // and dynamic encoding), if we need to compare those two encodings
+ // against stored encoding.
+ var length_code: u16 = Token.length_codes_start + 8;
+ while (length_code < num_literals) : (length_code += 1) {
+ // First eight length codes have extra size = 0.
+ extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) *
+ @as(u32, @intCast(Token.lengthExtraBits(length_code)));
+ }
+ var distance_code: u16 = 4;
+ while (distance_code < num_distances) : (distance_code += 1) {
+ // First four distance codes have extra size = 0.
+ extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) *
+ @as(u32, @intCast(Token.distanceExtraBits(distance_code)));
+ }
+ }
+
+ // Figure out smallest code.
+ // Fixed Huffman baseline.
+ var literal_encoding = &self.fixed_literal_encoding;
+ var distance_encoding = &self.fixed_distance_encoding;
+ var size = self.fixedSize(extra_bits);
+
+ // Dynamic Huffman?
+ var num_codegens: u32 = 0;
+
+ // Generate codegen and codegenFrequencies, which indicates how to encode
+ // the literal_encoding and the distance_encoding.
+ self.generateCodegen(
+ num_literals,
+ num_distances,
+ &self.literal_encoding,
+ &self.distance_encoding,
+ );
+ self.codegen_encoding.generate(self.codegen_freq[0..], 7);
+ const dynamic_size = self.dynamicSize(
+ &self.literal_encoding,
+ &self.distance_encoding,
+ extra_bits,
+ );
+ const dyn_size = dynamic_size.size;
+ num_codegens = dynamic_size.num_codegens;
+
+ if (dyn_size < size) {
+ size = dyn_size;
+ literal_encoding = &self.literal_encoding;
+ distance_encoding = &self.distance_encoding;
+ }
+
+ // Stored bytes?
+ if (storable and stored_size < size) {
+ try self.storedBlock(input.?, eof);
+ return;
+ }
+
+ // Huffman.
+ if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) {
+ try self.fixedHeader(eof);
+ } else {
+ try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
+ }
+
+ // Write the tokens.
+ try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes);
+ }
+
+ pub fn storedBlock(self: *Self, input: []const u8, eof: bool) Error!void {
+ try self.storedHeader(input.len, eof);
+ try self.bit_writer.writeBytes(input);
+ }
+
+ // writeBlockDynamic encodes a block using a dynamic Huffman table.
+ // This should be used if the symbols used have a disproportionate
+ // histogram distribution.
+ // If input is supplied and the compression savings are below 1/16th of the
+ // input size the block is stored.
+ fn dynamicBlock(
+ self: *Self,
+ tokens: []const Token,
+ eof: bool,
+ input: ?[]const u8,
+ ) Error!void {
+ const total_tokens = self.indexTokens(tokens);
+ const num_literals = total_tokens.num_literals;
+ const num_distances = total_tokens.num_distances;
+
+ // Generate codegen and codegenFrequencies, which indicates how to encode
+ // the literal_encoding and the distance_encoding.
+ self.generateCodegen(
+ num_literals,
+ num_distances,
+ &self.literal_encoding,
+ &self.distance_encoding,
+ );
+ self.codegen_encoding.generate(self.codegen_freq[0..], 7);
+ const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0);
+ const size = dynamic_size.size;
+ const num_codegens = dynamic_size.num_codegens;
+
+ // Store bytes, if we don't get a reasonable improvement.
+
+ const stored_size = storedSizeFits(input);
+ const ssize = stored_size.size;
+ const storable = stored_size.storable;
+ if (storable and ssize < (size + (size >> 4))) {
+ try self.storedBlock(input.?, eof);
+ return;
+ }
+
+ // Write Huffman table.
+ try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
+
+ // Write the tokens.
+ try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes);
+ }
+
+ const TotalIndexedTokens = struct {
+ num_literals: u32,
+ num_distances: u32,
+ };
+
+ // Indexes a slice of tokens followed by an end_block_marker, and updates
+ // literal_freq and distance_freq, and generates literal_encoding
+ // and distance_encoding.
+ // The number of literal and distance tokens is returned.
+ fn indexTokens(self: *Self, tokens: []const Token) TotalIndexedTokens {
+ var num_literals: u32 = 0;
+ var num_distances: u32 = 0;
+
+ for (self.literal_freq, 0..) |_, i| {
+ self.literal_freq[i] = 0;
+ }
+ for (self.distance_freq, 0..) |_, i| {
+ self.distance_freq[i] = 0;
+ }
+
+ for (tokens) |t| {
+ if (t.kind == Token.Kind.literal) {
+ self.literal_freq[t.literal()] += 1;
+ continue;
+ }
+ self.literal_freq[t.lengthCode()] += 1;
+ self.distance_freq[t.distanceCode()] += 1;
+ }
+ // add end_block_marker token at the end
+ self.literal_freq[consts.end_block_marker] += 1;
+
+ // get the number of literals
+ num_literals = @as(u32, @intCast(self.literal_freq.len));
+ while (self.literal_freq[num_literals - 1] == 0) {
+ num_literals -= 1;
+ }
+ // get the number of distances
+ num_distances = @as(u32, @intCast(self.distance_freq.len));
+ while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) {
+ num_distances -= 1;
+ }
+ if (num_distances == 0) {
+ // We haven't found a single match. If we want to go with the dynamic encoding,
+ // we should count at least one distance to be sure that the distance huffman tree could be encoded.
+ self.distance_freq[0] = 1;
+ num_distances = 1;
+ }
+ self.literal_encoding.generate(&self.literal_freq, 15);
+ self.distance_encoding.generate(&self.distance_freq, 15);
+ return TotalIndexedTokens{
+ .num_literals = num_literals,
+ .num_distances = num_distances,
+ };
+ }
+
+ // Writes a slice of tokens to the output followed by and end_block_marker.
+ // codes for literal and distance encoding must be supplied.
+ fn writeTokens(
+ self: *Self,
+ tokens: []const Token,
+ le_codes: []hc.HuffCode,
+ oe_codes: []hc.HuffCode,
+ ) Error!void {
+ for (tokens) |t| {
+ if (t.kind == Token.Kind.literal) {
+ try self.writeCode(le_codes[t.literal()]);
+ continue;
+ }
+
+ // Write the length
+ const le = t.lengthEncoding();
+ try self.writeCode(le_codes[le.code]);
+ if (le.extra_bits > 0) {
+ try self.bit_writer.writeBits(le.extra_length, le.extra_bits);
+ }
+
+ // Write the distance
+ const oe = t.distanceEncoding();
+ try self.writeCode(oe_codes[oe.code]);
+ if (oe.extra_bits > 0) {
+ try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits);
+ }
+ }
+ // add end_block_marker at the end
+ try self.writeCode(le_codes[consts.end_block_marker]);
+ }
+
+ // Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes
+ // if the results only gains very little from compression.
+ pub fn huffmanBlock(self: *Self, input: []const u8, eof: bool) Error!void {
+ // Add everything as literals
+ histogram(input, &self.literal_freq);
+
+ self.literal_freq[consts.end_block_marker] = 1;
+
+ const num_literals = consts.end_block_marker + 1;
+ self.distance_freq[0] = 1;
+ const num_distances = 1;
+
+ self.literal_encoding.generate(&self.literal_freq, 15);
+
+ // Figure out smallest code.
+ // Always use dynamic Huffman or Store
+ var num_codegens: u32 = 0;
+
+ // Generate codegen and codegenFrequencies, which indicates how to encode
+ // the literal_encoding and the distance_encoding.
+ self.generateCodegen(
+ num_literals,
+ num_distances,
+ &self.literal_encoding,
+ &self.huff_distance,
+ );
+ self.codegen_encoding.generate(self.codegen_freq[0..], 7);
+ const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0);
+ const size = dynamic_size.size;
+ num_codegens = dynamic_size.num_codegens;
+
+ // Store bytes, if we don't get a reasonable improvement.
+ const stored_size_ret = storedSizeFits(input);
+ const ssize = stored_size_ret.size;
+ const storable = stored_size_ret.storable;
+
+ if (storable and ssize < (size + (size >> 4))) {
+ try self.storedBlock(input, eof);
+ return;
+ }
+
+ // Huffman.
+ try self.dynamicHeader(num_literals, num_distances, num_codegens, eof);
+ const encoding = self.literal_encoding.codes[0..257];
+
+ for (input) |t| {
+ const c = encoding[t];
+ try self.bit_writer.writeBits(c.code, c.len);
+ }
+ try self.writeCode(encoding[consts.end_block_marker]);
+ }
+
+ // histogram accumulates a histogram of b in h.
+ fn histogram(b: []const u8, h: *[286]u16) void {
+ // Clear histogram
+ for (h, 0..) |_, i| {
+ h[i] = 0;
+ }
+
+ var lh = h.*[0..256];
+ for (b) |t| {
+ lh[t] += 1;
+ }
+ }
+ };
+}
+
+// tests
+const expect = std.testing.expect;
+const fmt = std.fmt;
+const testing = std.testing;
+const ArrayList = std.ArrayList;
+
+const TestCase = @import("testdata/block_writer.zig").TestCase;
+const testCases = @import("testdata/block_writer.zig").testCases;
+
+// tests if the writeBlock encoding has changed.
+test "flate.BlockWriter write" {
+ inline for (0..testCases.len) |i| {
+ try testBlock(testCases[i], .write_block);
+ }
+}
+
+// tests if the writeBlockDynamic encoding has changed.
+test "flate.BlockWriter dynamicBlock" {
+ inline for (0..testCases.len) |i| {
+ try testBlock(testCases[i], .write_dyn_block);
+ }
+}
+
+test "flate.BlockWriter huffmanBlock" {
+ inline for (0..testCases.len) |i| {
+ try testBlock(testCases[i], .write_huffman_block);
+ }
+ try testBlock(.{
+ .tokens = &[_]Token{},
+ .input = "huffman-rand-max.input",
+ .want = "huffman-rand-max.{s}.expect",
+ }, .write_huffman_block);
+}
+
+const TestFn = enum {
+ write_block,
+ write_dyn_block, // write dynamic block
+ write_huffman_block,
+
+ fn to_s(self: TestFn) []const u8 {
+ return switch (self) {
+ .write_block => "wb",
+ .write_dyn_block => "dyn",
+ .write_huffman_block => "huff",
+ };
+ }
+
+ fn write(
+ comptime self: TestFn,
+ bw: anytype,
+ tok: []const Token,
+ input: ?[]const u8,
+ final: bool,
+ ) !void {
+ switch (self) {
+ .write_block => try bw.write(tok, final, input),
+ .write_dyn_block => try bw.dynamicBlock(tok, final, input),
+ .write_huffman_block => try bw.huffmanBlock(input.?, final),
+ }
+ try bw.flush();
+ }
+};
+
+// testBlock tests a block against its references
+//
+// size
+// 64K [file-name].input - input non compressed file
+// 8.1K [file-name].golden -
+// 78 [file-name].dyn.expect - output with writeBlockDynamic
+// 78 [file-name].wb.expect - output with writeBlock
+// 8.1K [file-name].huff.expect - output with writeBlockHuff
+// 78 [file-name].dyn.expect-noinput - output with writeBlockDynamic when input is null
+// 78 [file-name].wb.expect-noinput - output with writeBlock when input is null
+//
+// wb - writeBlock
+// dyn - writeBlockDynamic
+// huff - writeBlockHuff
+//
+fn testBlock(comptime tc: TestCase, comptime tfn: TestFn) !void {
+ if (tc.input.len != 0 and tc.want.len != 0) {
+ const want_name = comptime fmt.comptimePrint(tc.want, .{tfn.to_s()});
+ const input = @embedFile("testdata/block_writer/" ++ tc.input);
+ const want = @embedFile("testdata/block_writer/" ++ want_name);
+ try testWriteBlock(tfn, input, want, tc.tokens);
+ }
+
+ if (tfn == .write_huffman_block) {
+ return;
+ }
+
+ const want_name_no_input = comptime fmt.comptimePrint(tc.want_no_input, .{tfn.to_s()});
+ const want = @embedFile("testdata/block_writer/" ++ want_name_no_input);
+ try testWriteBlock(tfn, null, want, tc.tokens);
+}
+
+// Uses writer function `tfn` to write `tokens`, tests that we got `want` as output.
+fn testWriteBlock(comptime tfn: TestFn, input: ?[]const u8, want: []const u8, tokens: []const Token) !void {
+ var buf = ArrayList(u8).init(testing.allocator);
+ var bw = blockWriter(buf.writer());
+ try tfn.write(&bw, tokens, input, false);
+ var got = buf.items;
+ try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
+ try expect(got[0] & 0b0000_0001 == 0); // bfinal is not set
+ //
+ // Test if the writer produces the same output after reset.
+ buf.deinit();
+ buf = ArrayList(u8).init(testing.allocator);
+ defer buf.deinit();
+ bw.setWriter(buf.writer());
+
+ try tfn.write(&bw, tokens, input, true);
+ try bw.flush();
+ got = buf.items;
+
+ try expect(got[0] & 1 == 1); // bfinal is set
+ buf.items[0] &= 0b1111_1110; // remove bfinal bit, so we can run test slices
+ try testing.expectEqualSlices(u8, want, got); // expect writeBlock to yield expected result
+}
diff --git a/lib/std/compress/flate/consts.zig b/lib/std/compress/flate/consts.zig
new file mode 100644
index 0000000000..c28b40f68e
--- /dev/null
+++ b/lib/std/compress/flate/consts.zig
@@ -0,0 +1,49 @@
+pub const deflate = struct {
+ // Number of tokens to accumlate in deflate before starting block encoding.
+ //
+ // In zlib this depends on memlevel: 6 + memlevel, where default memlevel is
+ // 8 and max 9 that gives 14 or 15 bits.
+ pub const tokens = 1 << 15;
+};
+
+pub const match = struct {
+ pub const base_length = 3; // smallest match length per the RFC section 3.2.5
+ pub const min_length = 4; // min length used in this algorithm
+ pub const max_length = 258;
+
+ pub const min_distance = 1;
+ pub const max_distance = 32768;
+};
+
+pub const history = struct {
+ pub const len = match.max_distance;
+};
+
+pub const lookup = struct {
+ pub const bits = 15;
+ pub const len = 1 << bits;
+ pub const shift = 32 - bits;
+};
+
+pub const huffman = struct {
+ // The odd order in which the codegen code sizes are written.
+ pub const codegen_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
+ // The number of codegen codes.
+ pub const codegen_code_count = 19;
+
+ // The largest distance code.
+ pub const distance_code_count = 30;
+
+ // Maximum number of literals.
+ pub const max_num_lit = 286;
+
+ // Max number of frequencies used for a Huffman Code
+ // Possible lengths are codegen_code_count (19), distance_code_count (30) and max_num_lit (286).
+ // The largest of these is max_num_lit.
+ pub const max_num_frequencies = max_num_lit;
+
+ // Biggest block size for uncompressed block.
+ pub const max_store_block_size = 65535;
+ // The special code used to mark the end of a block.
+ pub const end_block_marker = 256;
+};
diff --git a/lib/std/compress/flate/container.zig b/lib/std/compress/flate/container.zig
new file mode 100644
index 0000000000..23eec920de
--- /dev/null
+++ b/lib/std/compress/flate/container.zig
@@ -0,0 +1,207 @@
+//! Container of the deflate bit stream body. Container adds header before
+//! deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
+//! no footer, raw bit stream).
+//!
+//! Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes
+//! addler 32 checksum.
+//!
+//! Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
+//! crc32 checksum and 4 bytes of uncompressed data length.
+//!
+//!
+//! rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
+//! rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
+//!
+
+const std = @import("std");
+
+pub const Container = enum {
+ raw, // no header or footer
+ gzip, // gzip header and footer
+ zlib, // zlib header and footer
+
+ pub fn size(w: Container) usize {
+ return headerSize(w) + footerSize(w);
+ }
+
+ pub fn headerSize(w: Container) usize {
+ return switch (w) {
+ .gzip => 10,
+ .zlib => 2,
+ .raw => 0,
+ };
+ }
+
+ pub fn footerSize(w: Container) usize {
+ return switch (w) {
+ .gzip => 8,
+ .zlib => 4,
+ .raw => 0,
+ };
+ }
+
+ pub const list = [_]Container{ .raw, .gzip, .zlib };
+
+ pub const Error = error{
+ BadGzipHeader,
+ BadZlibHeader,
+ WrongGzipChecksum,
+ WrongGzipSize,
+ WrongZlibChecksum,
+ };
+
+ pub fn writeHeader(comptime wrap: Container, writer: anytype) !void {
+ switch (wrap) {
+ .gzip => {
+ // GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5):
+ // - ID1 (IDentification 1), always 0x1f
+ // - ID2 (IDentification 2), always 0x8b
+ // - CM (Compression Method), always 8 = deflate
+ // - FLG (Flags), all set to 0
+ // - 4 bytes, MTIME (Modification time), not used, all set to zero
+ // - XFL (eXtra FLags), all set to zero
+ // - OS (Operating System), 03 = Unix
+ const gzipHeader = [_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 };
+ try writer.writeAll(&gzipHeader);
+ },
+ .zlib => {
+ // ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4):
+ // 1st byte:
+ // - First four bits is the CINFO (compression info), which is 7 for the default deflate window size.
+ // - The next four bits is the CM (compression method), which is 8 for deflate.
+ // 2nd byte:
+ // - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best.
+ // - The next bit, FDICT, is set if a dictionary is given.
+ // - The final five FCHECK bits form a mod-31 checksum.
+ //
+ // CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100
+ const zlibHeader = [_]u8{ 0x78, 0b10_0_11100 };
+ try writer.writeAll(&zlibHeader);
+ },
+ .raw => {},
+ }
+ }
+
+ pub fn writeFooter(comptime wrap: Container, hasher: *Hasher(wrap), writer: anytype) !void {
+ var bits: [4]u8 = undefined;
+ switch (wrap) {
+ .gzip => {
+ // GZIP 8 bytes footer
+ // - 4 bytes, CRC32 (CRC-32)
+ // - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
+ std.mem.writeInt(u32, &bits, hasher.chksum(), .little);
+ try writer.writeAll(&bits);
+
+ std.mem.writeInt(u32, &bits, hasher.bytesRead(), .little);
+ try writer.writeAll(&bits);
+ },
+ .zlib => {
+ // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
+ // 4 bytes of ADLER32 (Adler-32 checksum)
+ // Checksum value of the uncompressed data (excluding any
+ // dictionary data) computed according to Adler-32
+ // algorithm.
+ std.mem.writeInt(u32, &bits, hasher.chksum(), .big);
+ try writer.writeAll(&bits);
+ },
+ .raw => {},
+ }
+ }
+
+ pub fn parseHeader(comptime wrap: Container, reader: anytype) !void {
+ switch (wrap) {
+ .gzip => try parseGzipHeader(reader),
+ .zlib => try parseZlibHeader(reader),
+ .raw => {},
+ }
+ }
+
+ fn parseGzipHeader(reader: anytype) !void {
+ const magic1 = try reader.read(u8);
+ const magic2 = try reader.read(u8);
+ const method = try reader.read(u8);
+ const flags = try reader.read(u8);
+ try reader.skipBytes(6); // mtime(4), xflags, os
+ if (magic1 != 0x1f or magic2 != 0x8b or method != 0x08)
+ return error.BadGzipHeader;
+ // Flags description: https://www.rfc-editor.org/rfc/rfc1952.html#page-5
+ if (flags != 0) {
+ if (flags & 0b0000_0100 != 0) { // FEXTRA
+ const extra_len = try reader.read(u16);
+ try reader.skipBytes(extra_len);
+ }
+ if (flags & 0b0000_1000 != 0) { // FNAME
+ try reader.skipStringZ();
+ }
+ if (flags & 0b0001_0000 != 0) { // FCOMMENT
+ try reader.skipStringZ();
+ }
+ if (flags & 0b0000_0010 != 0) { // FHCRC
+ try reader.skipBytes(2);
+ }
+ }
+ }
+
+ fn parseZlibHeader(reader: anytype) !void {
+ const cm = try reader.read(u4);
+ const cinfo = try reader.read(u4);
+ _ = try reader.read(u8);
+ if (cm != 8 or cinfo > 7) {
+ return error.BadZlibHeader;
+ }
+ }
+
+ pub fn parseFooter(comptime wrap: Container, hasher: *Hasher(wrap), reader: anytype) !void {
+ switch (wrap) {
+ .gzip => {
+ if (try reader.read(u32) != hasher.chksum()) return error.WrongGzipChecksum;
+ if (try reader.read(u32) != hasher.bytesRead()) return error.WrongGzipSize;
+ },
+ .zlib => {
+ const chksum: u32 = @byteSwap(hasher.chksum());
+ if (try reader.read(u32) != chksum) return error.WrongZlibChecksum;
+ },
+ .raw => {},
+ }
+ }
+
+ pub fn Hasher(comptime wrap: Container) type {
+ const HasherType = switch (wrap) {
+ .gzip => std.hash.Crc32,
+ .zlib => std.hash.Adler32,
+ .raw => struct {
+ pub fn init() @This() {
+ return .{};
+ }
+ },
+ };
+
+ return struct {
+ hasher: HasherType = HasherType.init(),
+ bytes: usize = 0,
+
+ const Self = @This();
+
+ pub fn update(self: *Self, buf: []const u8) void {
+ switch (wrap) {
+ .raw => {},
+ else => {
+ self.hasher.update(buf);
+ self.bytes += buf.len;
+ },
+ }
+ }
+
+ pub fn chksum(self: *Self) u32 {
+ switch (wrap) {
+ .raw => return 0,
+ else => return self.hasher.final(),
+ }
+ }
+
+ pub fn bytesRead(self: *Self) u32 {
+ return @truncate(self.bytes);
+ }
+ };
+ }
+};
diff --git a/lib/std/compress/flate/deflate.zig b/lib/std/compress/flate/deflate.zig
new file mode 100644
index 0000000000..794ab02247
--- /dev/null
+++ b/lib/std/compress/flate/deflate.zig
@@ -0,0 +1,744 @@
+const std = @import("std");
+const io = std.io;
+const assert = std.debug.assert;
+const testing = std.testing;
+const expect = testing.expect;
+const print = std.debug.print;
+
+const Token = @import("Token.zig");
+const consts = @import("consts.zig");
+const BlockWriter = @import("block_writer.zig").BlockWriter;
+const Container = @import("container.zig").Container;
+const SlidingWindow = @import("SlidingWindow.zig");
+const Lookup = @import("Lookup.zig");
+
+pub const Options = struct {
+ level: Level = .default,
+};
+
+/// Trades between speed and compression size.
+/// Starts with level 4: in [zlib](https://github.com/madler/zlib/blob/abd3d1a28930f89375d4b41408b39f6c1be157b2/deflate.c#L115C1-L117C43)
+/// levels 1-3 are using different algorithm to perform faster but with less
+/// compression. That is not implemented here.
+pub const Level = enum(u4) {
+ // zig fmt: off
+ fast = 0xb, level_4 = 4,
+ level_5 = 5,
+ default = 0xc, level_6 = 6,
+ level_7 = 7,
+ level_8 = 8,
+ best = 0xd, level_9 = 9,
+ // zig fmt: on
+};
+
+/// Algorithm knobs for each level.
+const LevelArgs = struct {
+ good: u16, // Do less lookups if we already have match of this length.
+ nice: u16, // Stop looking for better match if we found match with at least this length.
+ lazy: u16, // Don't do lazy match find if got match with at least this length.
+ chain: u16, // How many lookups for previous match to perform.
+
+ pub fn get(level: Level) LevelArgs {
+ // zig fmt: off
+ return switch (level) {
+ .fast, .level_4 => .{ .good = 4, .lazy = 4, .nice = 16, .chain = 16 },
+ .level_5 => .{ .good = 8, .lazy = 16, .nice = 32, .chain = 32 },
+ .default, .level_6 => .{ .good = 8, .lazy = 16, .nice = 128, .chain = 128 },
+ .level_7 => .{ .good = 8, .lazy = 32, .nice = 128, .chain = 256 },
+ .level_8 => .{ .good = 32, .lazy = 128, .nice = 258, .chain = 1024 },
+ .best, .level_9 => .{ .good = 32, .lazy = 258, .nice = 258, .chain = 4096 },
+ };
+ // zig fmt: on
+ }
+};
+
+/// Compress plain data from reader into compressed stream written to writer.
+pub fn compress(comptime container: Container, reader: anytype, writer: anytype, options: Options) !void {
+ var c = try compressor(container, writer, options);
+ try c.compress(reader);
+ try c.finish();
+}
+
+/// Create compressor for writer type.
+pub fn compressor(comptime container: Container, writer: anytype, options: Options) !Compressor(
+ container,
+ @TypeOf(writer),
+) {
+ return try Compressor(container, @TypeOf(writer)).init(writer, options);
+}
+
+/// Compressor type.
+pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
+ const TokenWriterType = BlockWriter(WriterType);
+ return Deflate(container, WriterType, TokenWriterType);
+}
+
+/// Default compression algorithm. Has two steps: tokenization and token
+/// encoding.
+///
+/// Tokenization takes uncompressed input stream and produces list of tokens.
+/// Each token can be literal (byte of data) or match (backrefernce to previous
+/// data with length and distance). Tokenization accumulators 32K tokens, when
+/// full or `flush` is called tokens are passed to the `block_writer`. Level
+/// defines how hard (how slow) it tries to find match.
+///
+/// Block writer will decide which type of deflate block to write (stored, fixed,
+/// dynamic) and encode tokens to the output byte stream. Client has to call
+/// `finish` to write block with the final bit set.
+///
+/// Container defines type of header and footer which can be gzip, zlib or raw.
+/// They all share same deflate body. Raw has no header or footer just deflate
+/// body.
+///
+/// Compression algorithm explained in rfc-1951 (slightly edited for this case):
+///
+/// The compressor uses a chained hash table `lookup` to find duplicated
+/// strings, using a hash function that operates on 4-byte sequences. At any
+/// given point during compression, let XYZW be the next 4 input bytes
+/// (lookahead) to be examined (not necessarily all different, of course).
+/// First, the compressor examines the hash chain for XYZW. If the chain is
+/// empty, the compressor simply writes out X as a literal byte and advances
+/// one byte in the input. If the hash chain is not empty, indicating that the
+/// sequence XYZW (or, if we are unlucky, some other 4 bytes with the same
+/// hash function value) has occurred recently, the compressor compares all
+/// strings on the XYZW hash chain with the actual input data sequence
+/// starting at the current point, and selects the longest match.
+///
+/// To improve overall compression, the compressor defers the selection of
+/// matches ("lazy matching"): after a match of length N has been found, the
+/// compressor searches for a longer match starting at the next input byte. If
+/// it finds a longer match, it truncates the previous match to a length of
+/// one (thus producing a single literal byte) and then emits the longer
+/// match. Otherwise, it emits the original match, and, as described above,
+/// advances N bytes before continuing.
+///
+///
+/// Allocates statically ~400K (192K lookup, 128K tokens, 64K window).
+///
+/// Deflate function accepts BlockWriterType so we can change that in test to test
+/// just tokenization part.
+///
+fn Deflate(comptime container: Container, comptime WriterType: type, comptime BlockWriterType: type) type {
+ return struct {
+ lookup: Lookup = .{},
+ win: SlidingWindow = .{},
+ tokens: Tokens = .{},
+ wrt: WriterType,
+ block_writer: BlockWriterType,
+ level: LevelArgs,
+ hasher: container.Hasher() = .{},
+
+ // Match and literal at the previous position.
+ // Used for lazy match finding in processWindow.
+ prev_match: ?Token = null,
+ prev_literal: ?u8 = null,
+
+ const Self = @This();
+
+ pub fn init(wrt: WriterType, options: Options) !Self {
+ const self = Self{
+ .wrt = wrt,
+ .block_writer = BlockWriterType.init(wrt),
+ .level = LevelArgs.get(options.level),
+ };
+ try container.writeHeader(self.wrt);
+ return self;
+ }
+
+ const FlushOption = enum { none, flush, final };
+
+ // Process data in window and create tokens. If token buffer is full
+ // flush tokens to the token writer. In the case of `flush` or `final`
+ // option it will process all data from the window. In the `none` case
+ // it will preserve some data for the next match.
+ fn tokenize(self: *Self, flush_opt: FlushOption) !void {
+ // flush - process all data from window
+ const should_flush = (flush_opt != .none);
+
+ // While there is data in active lookahead buffer.
+ while (self.win.activeLookahead(should_flush)) |lh| {
+ var step: u16 = 1; // 1 in the case of literal, match length otherwise
+ const pos: u16 = self.win.pos();
+ const literal = lh[0]; // literal at current position
+ const min_len: u16 = if (self.prev_match) |m| m.length() else 0;
+
+ // Try to find match at least min_len long.
+ if (self.findMatch(pos, lh, min_len)) |match| {
+ // Found better match than previous.
+ try self.addPrevLiteral();
+
+ // Is found match length good enough?
+ if (match.length() >= self.level.lazy) {
+ // Don't try to lazy find better match, use this.
+ step = try self.addMatch(match);
+ } else {
+ // Store this match.
+ self.prev_literal = literal;
+ self.prev_match = match;
+ }
+ } else {
+ // There is no better match at current pos then it was previous.
+ // Write previous match or literal.
+ if (self.prev_match) |m| {
+ // Write match from previous position.
+ step = try self.addMatch(m) - 1; // we already advanced 1 from previous position
+ } else {
+ // No match at previous postition.
+ // Write previous literal if any, and remember this literal.
+ try self.addPrevLiteral();
+ self.prev_literal = literal;
+ }
+ }
+ // Advance window and add hashes.
+ self.windowAdvance(step, lh, pos);
+ }
+
+ if (should_flush) {
+ // In the case of flushing, last few lookahead buffers were smaller then min match len.
+ // So only last literal can be unwritten.
+ assert(self.prev_match == null);
+ try self.addPrevLiteral();
+ self.prev_literal = null;
+
+ try self.flushTokens(flush_opt);
+ }
+ }
+
+ fn windowAdvance(self: *Self, step: u16, lh: []const u8, pos: u16) void {
+ // current position is already added in findMatch
+ self.lookup.bulkAdd(lh[1..], step - 1, pos + 1);
+ self.win.advance(step);
+ }
+
+ // Add previous literal (if any) to the tokens list.
+ fn addPrevLiteral(self: *Self) !void {
+ if (self.prev_literal) |l| try self.addToken(Token.initLiteral(l));
+ }
+
+ // Add match to the tokens list, reset prev pointers.
+ // Returns length of the added match.
+ fn addMatch(self: *Self, m: Token) !u16 {
+ try self.addToken(m);
+ self.prev_literal = null;
+ self.prev_match = null;
+ return m.length();
+ }
+
+ fn addToken(self: *Self, token: Token) !void {
+ self.tokens.add(token);
+ if (self.tokens.full()) try self.flushTokens(.none);
+ }
+
+ // Finds largest match in the history window with the data at current pos.
+ fn findMatch(self: *Self, pos: u16, lh: []const u8, min_len: u16) ?Token {
+ var len: u16 = min_len;
+ // Previous location with the same hash (same 4 bytes).
+ var prev_pos = self.lookup.add(lh, pos);
+ // Last found match.
+ var match: ?Token = null;
+
+ // How much back-references to try, performance knob.
+ var chain: usize = self.level.chain;
+ if (len >= self.level.good) {
+ // If we've got a match that's good enough, only look in 1/4 the chain.
+ chain >>= 2;
+ }
+
+ // Hot path loop!
+ while (prev_pos > 0 and chain > 0) : (chain -= 1) {
+ const distance = pos - prev_pos;
+ if (distance > consts.match.max_distance)
+ break;
+
+ const new_len = self.win.match(prev_pos, pos, len);
+ if (new_len > len) {
+ match = Token.initMatch(@intCast(distance), new_len);
+ if (new_len >= self.level.nice) {
+ // The match is good enough that we don't try to find a better one.
+ return match;
+ }
+ len = new_len;
+ }
+ prev_pos = self.lookup.prev(prev_pos);
+ }
+
+ return match;
+ }
+
+ fn flushTokens(self: *Self, flush_opt: FlushOption) !void {
+ // Pass tokens to the token writer
+ try self.block_writer.write(self.tokens.tokens(), flush_opt == .final, self.win.tokensBuffer());
+ // Stored block ensures byte aligment.
+ // It has 3 bits (final, block_type) and then padding until byte boundary.
+ // After that everyting is aligned to the boundary in the stored block.
+ // Empty stored block is Ob000 + (0-7) bits of padding + 0x00 0x00 0xFF 0xFF.
+ // Last 4 bytes are byte aligned.
+ if (flush_opt == .flush) {
+ try self.block_writer.storedBlock("", false);
+ }
+ if (flush_opt != .none) {
+ // Safe to call only when byte aligned or it is OK to add
+ // padding bits (on last byte of the final block).
+ try self.block_writer.flush();
+ }
+ // Reset internal tokens store.
+ self.tokens.reset();
+ // Notify win that tokens are flushed.
+ self.win.flush();
+ }
+
+ // Slide win and if needed lookup tables.
+ fn slide(self: *Self) void {
+ const n = self.win.slide();
+ self.lookup.slide(n);
+ }
+
+ /// Compresses as much data as possible, stops when the reader becomes
+ /// empty. It will introduce some output latency (reading input without
+ /// producing all output) because some data are still in internal
+ /// buffers.
+ ///
+ /// It is up to the caller to call flush (if needed) or finish (required)
+ /// when is need to output any pending data or complete stream.
+ ///
+ pub fn compress(self: *Self, reader: anytype) !void {
+ while (true) {
+ // Fill window from reader
+ const buf = self.win.writable();
+ if (buf.len == 0) {
+ try self.tokenize(.none);
+ self.slide();
+ continue;
+ }
+ const n = try reader.readAll(buf);
+ self.hasher.update(buf[0..n]);
+ self.win.written(n);
+ // Process window
+ try self.tokenize(.none);
+ // Exit when no more data in reader
+ if (n < buf.len) break;
+ }
+ }
+
+ /// Flushes internal buffers to the output writer. Outputs empty stored
+ /// block to sync bit stream to the byte boundary, so that the
+ /// decompressor can get all input data available so far.
+ ///
+ /// It is useful mainly in compressed network protocols, to ensure that
+ /// deflate bit stream can be used as byte stream. May degrade
+ /// compression so it should be used only when necessary.
+ ///
+ /// Completes the current deflate block and follows it with an empty
+ /// stored block that is three zero bits plus filler bits to the next
+ /// byte, followed by four bytes (00 00 ff ff).
+ ///
+ pub fn flush(self: *Self) !void {
+ try self.tokenize(.flush);
+ }
+
+ /// Completes deflate bit stream by writing any pending data as deflate
+ /// final deflate block. HAS to be called once all data are written to
+ /// the compressor as a signal that next block has to have final bit
+ /// set.
+ ///
+ pub fn finish(self: *Self) !void {
+ try self.tokenize(.final);
+ try container.writeFooter(&self.hasher, self.wrt);
+ }
+
+ /// Use another writer while preserving history. Most probably flush
+ /// should be called on old writer before setting new.
+ pub fn setWriter(self: *Self, new_writer: WriterType) void {
+ self.block_writer.setWriter(new_writer);
+ self.wrt = new_writer;
+ }
+
+ // Writer interface
+
+ pub const Writer = io.Writer(*Self, Error, write);
+ pub const Error = BlockWriterType.Error;
+
+ /// Write `input` of uncompressed data.
+ /// See compress.
+ pub fn write(self: *Self, input: []const u8) !usize {
+ var fbs = io.fixedBufferStream(input);
+ try self.compress(fbs.reader());
+ return input.len;
+ }
+
+ pub fn writer(self: *Self) Writer {
+ return .{ .context = self };
+ }
+ };
+}
+
+// Tokens store
+const Tokens = struct {
+ list: [consts.deflate.tokens]Token = undefined,
+ pos: usize = 0,
+
+ fn add(self: *Tokens, t: Token) void {
+ self.list[self.pos] = t;
+ self.pos += 1;
+ }
+
+ fn full(self: *Tokens) bool {
+ return self.pos == self.list.len;
+ }
+
+ fn reset(self: *Tokens) void {
+ self.pos = 0;
+ }
+
+ fn tokens(self: *Tokens) []const Token {
+ return self.list[0..self.pos];
+ }
+};
+
+/// Creates huffman only deflate blocks. Disables Lempel-Ziv match searching and
+/// only performs Huffman entropy encoding. Results in faster compression, much
+/// less memory requirements during compression but bigger compressed sizes.
+pub const huffman = struct {
+ pub fn compress(comptime container: Container, reader: anytype, writer: anytype) !void {
+ var c = try huffman.compressor(container, writer);
+ try c.compress(reader);
+ try c.finish();
+ }
+
+ pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
+ return SimpleCompressor(.huffman, container, WriterType);
+ }
+
+ pub fn compressor(comptime container: Container, writer: anytype) !huffman.Compressor(container, @TypeOf(writer)) {
+ return try huffman.Compressor(container, @TypeOf(writer)).init(writer);
+ }
+};
+
+/// Creates store blocks only. Data are not compressed only packed into deflate
+/// store blocks. That adds 9 bytes of header for each block. Max stored block
+/// size is 64K. Block is emitted when flush is called on on finish.
+pub const store = struct {
+ pub fn compress(comptime container: Container, reader: anytype, writer: anytype) !void {
+ var c = try store.compressor(container, writer);
+ try c.compress(reader);
+ try c.finish();
+ }
+
+ pub fn Compressor(comptime container: Container, comptime WriterType: type) type {
+ return SimpleCompressor(.store, container, WriterType);
+ }
+
+ pub fn compressor(comptime container: Container, writer: anytype) !store.Compressor(container, @TypeOf(writer)) {
+ return try store.Compressor(container, @TypeOf(writer)).init(writer);
+ }
+};
+
+const SimpleCompressorKind = enum {
+ huffman,
+ store,
+};
+
+fn simpleCompressor(
+ comptime kind: SimpleCompressorKind,
+ comptime container: Container,
+ writer: anytype,
+) !SimpleCompressor(kind, container, @TypeOf(writer)) {
+ return try SimpleCompressor(kind, container, @TypeOf(writer)).init(writer);
+}
+
+fn SimpleCompressor(
+ comptime kind: SimpleCompressorKind,
+ comptime container: Container,
+ comptime WriterType: type,
+) type {
+ const BlockWriterType = BlockWriter(WriterType);
+ return struct {
+ buffer: [65535]u8 = undefined, // because store blocks are limited to 65535 bytes
+ wp: usize = 0,
+
+ wrt: WriterType,
+ block_writer: BlockWriterType,
+ hasher: container.Hasher() = .{},
+
+ const Self = @This();
+
+ pub fn init(wrt: WriterType) !Self {
+ const self = Self{
+ .wrt = wrt,
+ .block_writer = BlockWriterType.init(wrt),
+ };
+ try container.writeHeader(self.wrt);
+ return self;
+ }
+
+ pub fn flush(self: *Self) !void {
+ try self.flushBuffer(false);
+ try self.block_writer.storedBlock("", false);
+ try self.block_writer.flush();
+ }
+
+ pub fn finish(self: *Self) !void {
+ try self.flushBuffer(true);
+ try self.block_writer.flush();
+ try container.writeFooter(&self.hasher, self.wrt);
+ }
+
+ fn flushBuffer(self: *Self, final: bool) !void {
+ const buf = self.buffer[0..self.wp];
+ switch (kind) {
+ .huffman => try self.block_writer.huffmanBlock(buf, final),
+ .store => try self.block_writer.storedBlock(buf, final),
+ }
+ self.wp = 0;
+ }
+
+ // Writes all data from the input reader of uncompressed data.
+ // It is up to the caller to call flush or finish if there is need to
+ // output compressed blocks.
+ pub fn compress(self: *Self, reader: anytype) !void {
+ while (true) {
+ // read from rdr into buffer
+ const buf = self.buffer[self.wp..];
+ if (buf.len == 0) {
+ try self.flushBuffer(false);
+ continue;
+ }
+ const n = try reader.readAll(buf);
+ self.hasher.update(buf[0..n]);
+ self.wp += n;
+ if (n < buf.len) break; // no more data in reader
+ }
+ }
+
+ // Writer interface
+
+ pub const Writer = io.Writer(*Self, Error, write);
+ pub const Error = BlockWriterType.Error;
+
+ // Write `input` of uncompressed data.
+ pub fn write(self: *Self, input: []const u8) !usize {
+ var fbs = io.fixedBufferStream(input);
+ try self.compress(fbs.reader());
+ return input.len;
+ }
+
+ pub fn writer(self: *Self) Writer {
+ return .{ .context = self };
+ }
+ };
+}
+
+const builtin = @import("builtin");
+
+test "tokenization" {
+ const L = Token.initLiteral;
+ const M = Token.initMatch;
+
+ const cases = [_]struct {
+ data: []const u8,
+ tokens: []const Token,
+ }{
+ .{
+ .data = "Blah blah blah blah blah!",
+ .tokens = &[_]Token{ L('B'), L('l'), L('a'), L('h'), L(' '), L('b'), M(5, 18), L('!') },
+ },
+ .{
+ .data = "ABCDEABCD ABCDEABCD",
+ .tokens = &[_]Token{
+ L('A'), L('B'), L('C'), L('D'), L('E'), L('A'), L('B'), L('C'), L('D'), L(' '),
+ L('A'), M(10, 8),
+ },
+ },
+ };
+
+ for (cases) |c| {
+ inline for (Container.list) |container| { // for each wrapping
+
+ var cw = io.countingWriter(io.null_writer);
+ const cww = cw.writer();
+ var df = try Deflate(container, @TypeOf(cww), TestTokenWriter).init(cww, .{});
+
+ _ = try df.write(c.data);
+ try df.flush();
+
+ // df.token_writer.show();
+ try expect(df.block_writer.pos == c.tokens.len); // number of tokens written
+ try testing.expectEqualSlices(Token, df.block_writer.get(), c.tokens); // tokens match
+
+ try testing.expectEqual(container.headerSize(), cw.bytes_written);
+ try df.finish();
+ try testing.expectEqual(container.size(), cw.bytes_written);
+ }
+ }
+}
+
+// Tests that tokens writen are equal to expected token list.
+const TestTokenWriter = struct {
+ const Self = @This();
+
+ pos: usize = 0,
+ actual: [128]Token = undefined,
+
+ pub fn init(_: anytype) Self {
+ return .{};
+ }
+ pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void {
+ for (tokens) |t| {
+ self.actual[self.pos] = t;
+ self.pos += 1;
+ }
+ }
+
+ pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {}
+
+ pub fn get(self: *Self) []Token {
+ return self.actual[0..self.pos];
+ }
+
+ pub fn show(self: *Self) void {
+ print("\n", .{});
+ for (self.get()) |t| {
+ t.show();
+ }
+ }
+
+ pub fn flush(_: *Self) !void {}
+};
+
+test "file tokenization" {
+ const levels = [_]Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
+ const cases = [_]struct {
+ data: []const u8, // uncompressed content
+ // expected number of tokens producet in deflate tokenization
+ tokens_count: [levels.len]usize = .{0} ** levels.len,
+ }{
+ .{
+ .data = @embedFile("testdata/rfc1951.txt"),
+ .tokens_count = .{ 7675, 7672, 7599, 7594, 7598, 7599 },
+ },
+
+ .{
+ .data = @embedFile("testdata/block_writer/huffman-null-max.input"),
+ .tokens_count = .{ 257, 257, 257, 257, 257, 257 },
+ },
+ .{
+ .data = @embedFile("testdata/block_writer/huffman-pi.input"),
+ .tokens_count = .{ 2570, 2564, 2564, 2564, 2564, 2564 },
+ },
+ .{
+ .data = @embedFile("testdata/block_writer/huffman-text.input"),
+ .tokens_count = .{ 235, 234, 234, 234, 234, 234 },
+ },
+ .{
+ .data = @embedFile("testdata/fuzz/roundtrip1.input"),
+ .tokens_count = .{ 333, 331, 331, 331, 331, 331 },
+ },
+ .{
+ .data = @embedFile("testdata/fuzz/roundtrip2.input"),
+ .tokens_count = .{ 334, 334, 334, 334, 334, 334 },
+ },
+ };
+
+ for (cases) |case| { // for each case
+ const data = case.data;
+
+ for (levels, 0..) |level, i| { // for each compression level
+ var original = io.fixedBufferStream(data);
+
+ // buffer for decompressed data
+ var al = std.ArrayList(u8).init(testing.allocator);
+ defer al.deinit();
+ const writer = al.writer();
+
+ // create compressor
+ const WriterType = @TypeOf(writer);
+ const TokenWriter = TokenDecoder(@TypeOf(writer));
+ var cmp = try Deflate(.raw, WriterType, TokenWriter).init(writer, .{ .level = level });
+
+ // Stream uncompressed `orignal` data to the compressor. It will
+ // produce tokens list and pass that list to the TokenDecoder. This
+ // TokenDecoder uses CircularBuffer from inflate to convert list of
+ // tokens back to the uncompressed stream.
+ try cmp.compress(original.reader());
+ try cmp.flush();
+ const expected_count = case.tokens_count[i];
+ const actual = cmp.block_writer.tokens_count;
+ if (expected_count == 0) {
+ print("actual token count {d}\n", .{actual});
+ } else {
+ try testing.expectEqual(expected_count, actual);
+ }
+
+ try testing.expectEqual(data.len, al.items.len);
+ try testing.expectEqualSlices(u8, data, al.items);
+ }
+ }
+}
+
+fn TokenDecoder(comptime WriterType: type) type {
+ return struct {
+ const CircularBuffer = @import("CircularBuffer.zig");
+ hist: CircularBuffer = .{},
+ wrt: WriterType,
+ tokens_count: usize = 0,
+
+ const Self = @This();
+
+ pub fn init(wrt: WriterType) Self {
+ return .{ .wrt = wrt };
+ }
+
+ pub fn write(self: *Self, tokens: []const Token, _: bool, _: ?[]const u8) !void {
+ self.tokens_count += tokens.len;
+ for (tokens) |t| {
+ switch (t.kind) {
+ .literal => self.hist.write(t.literal()),
+ .match => try self.hist.writeMatch(t.length(), t.distance()),
+ }
+ if (self.hist.free() < 285) try self.flushWin();
+ }
+ try self.flushWin();
+ }
+
+ pub fn storedBlock(_: *Self, _: []const u8, _: bool) !void {}
+
+ fn flushWin(self: *Self) !void {
+ while (true) {
+ const buf = self.hist.read();
+ if (buf.len == 0) break;
+ try self.wrt.writeAll(buf);
+ }
+ }
+
+ pub fn flush(_: *Self) !void {}
+ };
+}
+
+test "store simple compressor" {
+ const data = "Hello world!";
+ const expected = [_]u8{
+ 0x1, // block type 0, final bit set
+ 0xc, 0x0, // len = 12
+ 0xf3, 0xff, // ~len
+ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!', //
+ //0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21,
+ };
+
+ var fbs = std.io.fixedBufferStream(data);
+ var al = std.ArrayList(u8).init(testing.allocator);
+ defer al.deinit();
+
+ var cmp = try store.compressor(.raw, al.writer());
+ try cmp.compress(fbs.reader());
+ try cmp.finish();
+ try testing.expectEqualSlices(u8, &expected, al.items);
+
+ fbs.reset();
+ try al.resize(0);
+
+ // huffman only compresoor will also emit store block for this small sample
+ var hc = try huffman.compressor(.raw, al.writer());
+ try hc.compress(fbs.reader());
+ try hc.finish();
+ try testing.expectEqualSlices(u8, &expected, al.items);
+}
diff --git a/lib/std/compress/flate/huffman_decoder.zig b/lib/std/compress/flate/huffman_decoder.zig
new file mode 100644
index 0000000000..a7c14f426b
--- /dev/null
+++ b/lib/std/compress/flate/huffman_decoder.zig
@@ -0,0 +1,308 @@
+const std = @import("std");
+const testing = std.testing;
+
+pub const Symbol = packed struct {
+ pub const Kind = enum(u2) {
+ literal,
+ end_of_block,
+ match,
+ };
+
+ symbol: u8 = 0, // symbol from alphabet
+ code_bits: u4 = 0, // number of bits in code 0-15
+ kind: Kind = .literal,
+
+ code: u16 = 0, // huffman code of the symbol
+ next: u16 = 0, // pointer to the next symbol in linked list
+ // it is safe to use 0 as null pointer, when sorted 0 has shortest code and fits into lookup
+
+ // Sorting less than function.
+ pub fn asc(_: void, a: Symbol, b: Symbol) bool {
+ if (a.code_bits == b.code_bits) {
+ if (a.kind == b.kind) {
+ return a.symbol < b.symbol;
+ }
+ return @intFromEnum(a.kind) < @intFromEnum(b.kind);
+ }
+ return a.code_bits < b.code_bits;
+ }
+};
+
+pub const LiteralDecoder = HuffmanDecoder(286, 15, 9);
+pub const DistanceDecoder = HuffmanDecoder(30, 15, 9);
+pub const CodegenDecoder = HuffmanDecoder(19, 7, 7);
+
+pub const Error = error{
+ InvalidCode,
+ OversubscribedHuffmanTree,
+ IncompleteHuffmanTree,
+ MissingEndOfBlockCode,
+};
+
+/// Creates huffman tree codes from list of code lengths (in `build`).
+///
+/// `find` then finds symbol for code bits. Code can be any length between 1 and
+/// 15 bits. When calling `find` we don't know how many bits will be used to
+/// find symbol. When symbol is returned it has code_bits field which defines
+/// how much we should advance in bit stream.
+///
+/// Lookup table is used to map 15 bit int to symbol. Same symbol is written
+/// many times in this table; 32K places for 286 (at most) symbols.
+/// Small lookup table is optimization for faster search.
+/// It is variation of the algorithm explained in [zlib](https://github.com/madler/zlib/blob/643e17b7498d12ab8d15565662880579692f769d/doc/algorithm.txt#L92)
+/// with difference that we here use statically allocated arrays.
+///
+fn HuffmanDecoder(
+ comptime alphabet_size: u16,
+ comptime max_code_bits: u4,
+ comptime lookup_bits: u4,
+) type {
+ const lookup_shift = max_code_bits - lookup_bits;
+
+ return struct {
+ // all symbols in alaphabet, sorted by code_len, symbol
+ symbols: [alphabet_size]Symbol = undefined,
+ // lookup table code -> symbol
+ lookup: [1 << lookup_bits]Symbol = undefined,
+
+ const Self = @This();
+
+ /// Generates symbols and lookup tables from list of code lens for each symbol.
+ pub fn generate(self: *Self, lens: []const u4) !void {
+ try checkCompleteness(lens);
+
+ // init alphabet with code_bits
+ for (self.symbols, 0..) |_, i| {
+ const cb: u4 = if (i < lens.len) lens[i] else 0;
+ self.symbols[i] = if (i < 256)
+ .{ .kind = .literal, .symbol = @intCast(i), .code_bits = cb }
+ else if (i == 256)
+ .{ .kind = .end_of_block, .symbol = 0xff, .code_bits = cb }
+ else
+ .{ .kind = .match, .symbol = @intCast(i - 257), .code_bits = cb };
+ }
+ std.sort.heap(Symbol, &self.symbols, {}, Symbol.asc);
+
+ // reset lookup table
+ for (0..self.lookup.len) |i| {
+ self.lookup[i] = .{};
+ }
+
+ // assign code to symbols
+ // reference: https://youtu.be/9_YEGLe33NA?list=PLU4IQLU9e_OrY8oASHx0u3IXAL9TOdidm&t=2639
+ var code: u16 = 0;
+ var idx: u16 = 0;
+ for (&self.symbols, 0..) |*sym, pos| {
+ //print("sym: {}\n", .{sym});
+ if (sym.code_bits == 0) continue; // skip unused
+ sym.code = code;
+
+ const next_code = code + (@as(u16, 1) << (max_code_bits - sym.code_bits));
+ const next_idx = next_code >> lookup_shift;
+
+ if (next_idx > self.lookup.len or idx >= self.lookup.len) break;
+ if (sym.code_bits <= lookup_bits) {
+ // fill small lookup table
+ for (idx..next_idx) |j|
+ self.lookup[j] = sym.*;
+ } else {
+ // insert into linked table starting at root
+ const root = &self.lookup[idx];
+ const root_next = root.next;
+ root.next = @intCast(pos);
+ sym.next = root_next;
+ }
+
+ idx = next_idx;
+ code = next_code;
+ }
+ //print("decoder generate, code: {d}, idx: {d}\n", .{ code, idx });
+ }
+
+ /// Given the list of code lengths check that it represents a canonical
+ /// Huffman code for n symbols.
+ ///
+ /// Reference: https://github.com/madler/zlib/blob/5c42a230b7b468dff011f444161c0145b5efae59/contrib/puff/puff.c#L340
+ fn checkCompleteness(lens: []const u4) !void {
+ if (alphabet_size == 286)
+ if (lens[256] == 0) return error.MissingEndOfBlockCode;
+
+ var count = [_]u16{0} ** (@as(usize, max_code_bits) + 1);
+ var max: usize = 0;
+ for (lens) |n| {
+ if (n == 0) continue;
+ if (n > max) max = n;
+ count[n] += 1;
+ }
+ if (max == 0) // emtpy tree
+ return;
+
+ // check for an over-subscribed or incomplete set of lengths
+ var left: usize = 1; // one possible code of zero length
+ for (1..count.len) |len| {
+ left <<= 1; // one more bit, double codes left
+ if (count[len] > left)
+ return error.OversubscribedHuffmanTree;
+ left -= count[len]; // deduct count from possible codes
+ }
+ if (left > 0) { // left > 0 means incomplete
+ // incomplete code ok only for single length 1 code
+ if (max_code_bits > 7 and max == count[0] + count[1]) return;
+ return error.IncompleteHuffmanTree;
+ }
+ }
+
+ /// Finds symbol for lookup table code.
+ pub fn find(self: *Self, code: u16) !Symbol {
+ // try to find in lookup table
+ const idx = code >> lookup_shift;
+ const sym = self.lookup[idx];
+ if (sym.code_bits != 0) return sym;
+ // if not use linked list of symbols with same prefix
+ return self.findLinked(code, sym.next);
+ }
+
+ inline fn findLinked(self: *Self, code: u16, start: u16) !Symbol {
+ var pos = start;
+ while (pos > 0) {
+ const sym = self.symbols[pos];
+ const shift = max_code_bits - sym.code_bits;
+ // compare code_bits number of upper bits
+ if ((code ^ sym.code) >> shift == 0) return sym;
+ pos = sym.next;
+ }
+ return error.InvalidCode;
+ }
+ };
+}
+
+test "flate.HuffmanDecoder init/find" {
+ // example data from: https://youtu.be/SJPvNi4HrWQ?t=8423
+ const code_lens = [_]u4{ 4, 3, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 2 };
+ var h: CodegenDecoder = .{};
+ try h.generate(&code_lens);
+
+ const expected = [_]struct {
+ sym: Symbol,
+ code: u16,
+ }{
+ .{
+ .code = 0b00_00000,
+ .sym = .{ .symbol = 3, .code_bits = 2 },
+ },
+ .{
+ .code = 0b01_00000,
+ .sym = .{ .symbol = 18, .code_bits = 2 },
+ },
+ .{
+ .code = 0b100_0000,
+ .sym = .{ .symbol = 1, .code_bits = 3 },
+ },
+ .{
+ .code = 0b101_0000,
+ .sym = .{ .symbol = 4, .code_bits = 3 },
+ },
+ .{
+ .code = 0b110_0000,
+ .sym = .{ .symbol = 17, .code_bits = 3 },
+ },
+ .{
+ .code = 0b1110_000,
+ .sym = .{ .symbol = 0, .code_bits = 4 },
+ },
+ .{
+ .code = 0b1111_000,
+ .sym = .{ .symbol = 16, .code_bits = 4 },
+ },
+ };
+
+ // unused symbols
+ for (0..12) |i| {
+ try testing.expectEqual(0, h.symbols[i].code_bits);
+ }
+ // used, from index 12
+ for (expected, 12..) |e, i| {
+ try testing.expectEqual(e.sym.symbol, h.symbols[i].symbol);
+ try testing.expectEqual(e.sym.code_bits, h.symbols[i].code_bits);
+ const sym_from_code = try h.find(e.code);
+ try testing.expectEqual(e.sym.symbol, sym_from_code.symbol);
+ }
+
+ // All possible codes for each symbol.
+ // Lookup table has 126 elements, to cover all possible 7 bit codes.
+ for (0b0000_000..0b0100_000) |c| // 0..32 (32)
+ try testing.expectEqual(3, (try h.find(@intCast(c))).symbol);
+
+ for (0b0100_000..0b1000_000) |c| // 32..64 (32)
+ try testing.expectEqual(18, (try h.find(@intCast(c))).symbol);
+
+ for (0b1000_000..0b1010_000) |c| // 64..80 (16)
+ try testing.expectEqual(1, (try h.find(@intCast(c))).symbol);
+
+ for (0b1010_000..0b1100_000) |c| // 80..96 (16)
+ try testing.expectEqual(4, (try h.find(@intCast(c))).symbol);
+
+ for (0b1100_000..0b1110_000) |c| // 96..112 (16)
+ try testing.expectEqual(17, (try h.find(@intCast(c))).symbol);
+
+ for (0b1110_000..0b1111_000) |c| // 112..120 (8)
+ try testing.expectEqual(0, (try h.find(@intCast(c))).symbol);
+
+ for (0b1111_000..0b1_0000_000) |c| // 120...128 (8)
+ try testing.expectEqual(16, (try h.find(@intCast(c))).symbol);
+}
+
+const print = std.debug.print;
+const assert = std.debug.assert;
+const expect = std.testing.expect;
+
+test "flate.HuffmanDecoder encode/decode literals" {
+ const LiteralEncoder = @import("huffman_encoder.zig").LiteralEncoder;
+
+ for (1..286) |j| { // for all different number of codes
+ var enc: LiteralEncoder = .{};
+ // create freqencies
+ var freq = [_]u16{0} ** 286;
+ freq[256] = 1; // ensure we have end of block code
+ for (&freq, 1..) |*f, i| {
+ if (i % j == 0)
+ f.* = @intCast(i);
+ }
+
+ // encoder from freqencies
+ enc.generate(&freq, 15);
+
+ // get code_lens from encoder
+ var code_lens = [_]u4{0} ** 286;
+ for (code_lens, 0..) |_, i| {
+ code_lens[i] = @intCast(enc.codes[i].len);
+ }
+ // generate decoder from code lens
+ var dec: LiteralDecoder = .{};
+ try dec.generate(&code_lens);
+
+ // expect decoder code to match original encoder code
+ for (dec.symbols) |s| {
+ if (s.code_bits == 0) continue;
+ const c_code: u16 = @bitReverse(@as(u15, @intCast(s.code)));
+ const symbol: u16 = switch (s.kind) {
+ .literal => s.symbol,
+ .end_of_block => 256,
+ .match => @as(u16, s.symbol) + 257,
+ };
+
+ const c = enc.codes[symbol];
+ try expect(c.code == c_code);
+ }
+
+ // find each symbol by code
+ for (enc.codes) |c| {
+ if (c.len == 0) continue;
+
+ const s_code: u15 = @bitReverse(@as(u15, @intCast(c.code)));
+ const s = try dec.find(s_code);
+ try expect(s.code == s_code);
+ try expect(s.code_bits == c.len);
+ }
+ }
+}
diff --git a/lib/std/compress/flate/huffman_encoder.zig b/lib/std/compress/flate/huffman_encoder.zig
new file mode 100644
index 0000000000..a8553ebb5e
--- /dev/null
+++ b/lib/std/compress/flate/huffman_encoder.zig
@@ -0,0 +1,536 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const math = std.math;
+const mem = std.mem;
+const sort = std.sort;
+const testing = std.testing;
+
+const consts = @import("consts.zig").huffman;
+
+const LiteralNode = struct {
+ literal: u16,
+ freq: u16,
+};
+
+// Describes the state of the constructed tree for a given depth.
+const LevelInfo = struct {
+ // Our level. for better printing
+ level: u32,
+
+ // The frequency of the last node at this level
+ last_freq: u32,
+
+ // The frequency of the next character to add to this level
+ next_char_freq: u32,
+
+ // The frequency of the next pair (from level below) to add to this level.
+ // Only valid if the "needed" value of the next lower level is 0.
+ next_pair_freq: u32,
+
+ // The number of chains remaining to generate for this level before moving
+ // up to the next level
+ needed: u32,
+};
+
+// hcode is a huffman code with a bit code and bit length.
+pub const HuffCode = struct {
+ code: u16 = 0,
+ len: u16 = 0,
+
+ // set sets the code and length of an hcode.
+ fn set(self: *HuffCode, code: u16, length: u16) void {
+ self.len = length;
+ self.code = code;
+ }
+};
+
+pub fn HuffmanEncoder(comptime size: usize) type {
+ return struct {
+ codes: [size]HuffCode = undefined,
+ // Reusable buffer with the longest possible frequency table.
+ freq_cache: [consts.max_num_frequencies + 1]LiteralNode = undefined,
+ bit_count: [17]u32 = undefined,
+ lns: []LiteralNode = undefined, // sorted by literal, stored to avoid repeated allocation in generate
+ lfs: []LiteralNode = undefined, // sorted by frequency, stored to avoid repeated allocation in generate
+
+ const Self = @This();
+
+ // Update this Huffman Code object to be the minimum code for the specified frequency count.
+ //
+ // freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
+ // max_bits The maximum number of bits to use for any literal.
+ pub fn generate(self: *Self, freq: []u16, max_bits: u32) void {
+ var list = self.freq_cache[0 .. freq.len + 1];
+ // Number of non-zero literals
+ var count: u32 = 0;
+ // Set list to be the set of all non-zero literals and their frequencies
+ for (freq, 0..) |f, i| {
+ if (f != 0) {
+ list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f };
+ count += 1;
+ } else {
+ list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
+ self.codes[i].len = 0;
+ }
+ }
+ list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
+
+ list = list[0..count];
+ if (count <= 2) {
+ // Handle the small cases here, because they are awkward for the general case code. With
+ // two or fewer literals, everything has bit length 1.
+ for (list, 0..) |node, i| {
+ // "list" is in order of increasing literal value.
+ self.codes[node.literal].set(@as(u16, @intCast(i)), 1);
+ }
+ return;
+ }
+ self.lfs = list;
+ mem.sort(LiteralNode, self.lfs, {}, byFreq);
+
+ // Get the number of literals for each bit count
+ const bit_count = self.bitCounts(list, max_bits);
+ // And do the assignment
+ self.assignEncodingAndSize(bit_count, list);
+ }
+
+ pub fn bitLength(self: *Self, freq: []u16) u32 {
+ var total: u32 = 0;
+ for (freq, 0..) |f, i| {
+ if (f != 0) {
+ total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len));
+ }
+ }
+ return total;
+ }
+
+ // Return the number of literals assigned to each bit size in the Huffman encoding
+ //
+ // This method is only called when list.len >= 3
+ // The cases of 0, 1, and 2 literals are handled by special case code.
+ //
+ // list: An array of the literals with non-zero frequencies
+ // and their associated frequencies. The array is in order of increasing
+ // frequency, and has as its last element a special element with frequency
+ // std.math.maxInt(i32)
+ //
+ // max_bits: The maximum number of bits that should be used to encode any literal.
+ // Must be less than 16.
+ //
+ // Returns an integer array in which array[i] indicates the number of literals
+ // that should be encoded in i bits.
+ fn bitCounts(self: *Self, list: []LiteralNode, max_bits_to_use: usize) []u32 {
+ var max_bits = max_bits_to_use;
+ const n = list.len;
+ const max_bits_limit = 16;
+
+ assert(max_bits < max_bits_limit);
+
+ // The tree can't have greater depth than n - 1, no matter what. This
+ // saves a little bit of work in some small cases
+ max_bits = @min(max_bits, n - 1);
+
+ // Create information about each of the levels.
+ // A bogus "Level 0" whose sole purpose is so that
+ // level1.prev.needed == 0. This makes level1.next_pair_freq
+ // be a legitimate value that never gets chosen.
+ var levels: [max_bits_limit]LevelInfo = mem.zeroes([max_bits_limit]LevelInfo);
+ // leaf_counts[i] counts the number of literals at the left
+ // of ancestors of the rightmost node at level i.
+ // leaf_counts[i][j] is the number of literals at the left
+ // of the level j ancestor.
+ var leaf_counts: [max_bits_limit][max_bits_limit]u32 = mem.zeroes([max_bits_limit][max_bits_limit]u32);
+
+ {
+ var level = @as(u32, 1);
+ while (level <= max_bits) : (level += 1) {
+ // For every level, the first two items are the first two characters.
+ // We initialize the levels as if we had already figured this out.
+ levels[level] = LevelInfo{
+ .level = level,
+ .last_freq = list[1].freq,
+ .next_char_freq = list[2].freq,
+ .next_pair_freq = list[0].freq + list[1].freq,
+ .needed = 0,
+ };
+ leaf_counts[level][level] = 2;
+ if (level == 1) {
+ levels[level].next_pair_freq = math.maxInt(i32);
+ }
+ }
+ }
+
+ // We need a total of 2*n - 2 items at top level and have already generated 2.
+ levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4;
+
+ {
+ var level = max_bits;
+ while (true) {
+ var l = &levels[level];
+ if (l.next_pair_freq == math.maxInt(i32) and l.next_char_freq == math.maxInt(i32)) {
+ // We've run out of both leafs and pairs.
+ // End all calculations for this level.
+ // To make sure we never come back to this level or any lower level,
+ // set next_pair_freq impossibly large.
+ l.needed = 0;
+ levels[level + 1].next_pair_freq = math.maxInt(i32);
+ level += 1;
+ continue;
+ }
+
+ const prev_freq = l.last_freq;
+ if (l.next_char_freq < l.next_pair_freq) {
+ // The next item on this row is a leaf node.
+ const next = leaf_counts[level][level] + 1;
+ l.last_freq = l.next_char_freq;
+ // Lower leaf_counts are the same of the previous node.
+ leaf_counts[level][level] = next;
+ if (next >= list.len) {
+ l.next_char_freq = maxNode().freq;
+ } else {
+ l.next_char_freq = list[next].freq;
+ }
+ } else {
+ // The next item on this row is a pair from the previous row.
+ // next_pair_freq isn't valid until we generate two
+ // more values in the level below
+ l.last_freq = l.next_pair_freq;
+ // Take leaf counts from the lower level, except counts[level] remains the same.
+ @memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
+ levels[l.level - 1].needed = 2;
+ }
+
+ l.needed -= 1;
+ if (l.needed == 0) {
+ // We've done everything we need to do for this level.
+ // Continue calculating one level up. Fill in next_pair_freq
+ // of that level with the sum of the two nodes we've just calculated on
+ // this level.
+ if (l.level == max_bits) {
+ // All done!
+ break;
+ }
+ levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
+ level += 1;
+ } else {
+ // If we stole from below, move down temporarily to replenish it.
+ while (levels[level - 1].needed > 0) {
+ level -= 1;
+ if (level == 0) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // Somethings is wrong if at the end, the top level is null or hasn't used
+ // all of the leaves.
+ assert(leaf_counts[max_bits][max_bits] == n);
+
+ var bit_count = self.bit_count[0 .. max_bits + 1];
+ var bits: u32 = 1;
+ const counts = &leaf_counts[max_bits];
+ {
+ var level = max_bits;
+ while (level > 0) : (level -= 1) {
+ // counts[level] gives the number of literals requiring at least "bits"
+ // bits to encode.
+ bit_count[bits] = counts[level] - counts[level - 1];
+ bits += 1;
+ if (level == 0) {
+ break;
+ }
+ }
+ }
+ return bit_count;
+ }
+
+ // Look at the leaves and assign them a bit count and an encoding as specified
+ // in RFC 1951 3.2.2
+ fn assignEncodingAndSize(self: *Self, bit_count: []u32, list_arg: []LiteralNode) void {
+ var code = @as(u16, 0);
+ var list = list_arg;
+
+ for (bit_count, 0..) |bits, n| {
+ code <<= 1;
+ if (n == 0 or bits == 0) {
+ continue;
+ }
+ // The literals list[list.len-bits] .. list[list.len-bits]
+ // are encoded using "bits" bits, and get the values
+ // code, code + 1, .... The code values are
+ // assigned in literal order (not frequency order).
+ const chunk = list[list.len - @as(u32, @intCast(bits)) ..];
+
+ self.lns = chunk;
+ mem.sort(LiteralNode, self.lns, {}, byLiteral);
+
+ for (chunk) |node| {
+ self.codes[node.literal] = HuffCode{
+ .code = bitReverse(u16, code, @as(u5, @intCast(n))),
+ .len = @as(u16, @intCast(n)),
+ };
+ code += 1;
+ }
+ list = list[0 .. list.len - @as(u32, @intCast(bits))];
+ }
+ }
+ };
+}
+
+fn maxNode() LiteralNode {
+ return LiteralNode{
+ .literal = math.maxInt(u16),
+ .freq = math.maxInt(u16),
+ };
+}
+
+pub fn huffmanEncoder(comptime size: u32) HuffmanEncoder(size) {
+ return .{};
+}
+
+pub const LiteralEncoder = HuffmanEncoder(consts.max_num_frequencies);
+pub const DistanceEncoder = HuffmanEncoder(consts.distance_code_count);
+pub const CodegenEncoder = HuffmanEncoder(19);
+
+// Generates a HuffmanCode corresponding to the fixed literal table
+pub fn fixedLiteralEncoder() LiteralEncoder {
+ var h: LiteralEncoder = undefined;
+ var ch: u16 = 0;
+
+ while (ch < consts.max_num_frequencies) : (ch += 1) {
+ var bits: u16 = undefined;
+ var size: u16 = undefined;
+ switch (ch) {
+ 0...143 => {
+ // size 8, 000110000 .. 10111111
+ bits = ch + 48;
+ size = 8;
+ },
+ 144...255 => {
+ // size 9, 110010000 .. 111111111
+ bits = ch + 400 - 144;
+ size = 9;
+ },
+ 256...279 => {
+ // size 7, 0000000 .. 0010111
+ bits = ch - 256;
+ size = 7;
+ },
+ else => {
+ // size 8, 11000000 .. 11000111
+ bits = ch + 192 - 280;
+ size = 8;
+ },
+ }
+ h.codes[ch] = HuffCode{ .code = bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size };
+ }
+ return h;
+}
+
+pub fn fixedDistanceEncoder() DistanceEncoder {
+ var h: DistanceEncoder = undefined;
+ for (h.codes, 0..) |_, ch| {
+ h.codes[ch] = HuffCode{ .code = bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 };
+ }
+ return h;
+}
+
+pub fn huffmanDistanceEncoder() DistanceEncoder {
+ var distance_freq = [1]u16{0} ** consts.distance_code_count;
+ distance_freq[0] = 1;
+ // huff_distance is a static distance encoder used for huffman only encoding.
+ // It can be reused since we will not be encoding distance values.
+ var h: DistanceEncoder = .{};
+ h.generate(distance_freq[0..], 15);
+ return h;
+}
+
+fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
+ _ = context;
+ return a.literal < b.literal;
+}
+
+fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
+ _ = context;
+ if (a.freq == b.freq) {
+ return a.literal < b.literal;
+ }
+ return a.freq < b.freq;
+}
+
+test "flate.HuffmanEncoder generate a Huffman code from an array of frequencies" {
+ var freqs: [19]u16 = [_]u16{
+ 8, // 0
+ 1, // 1
+ 1, // 2
+ 2, // 3
+ 5, // 4
+ 10, // 5
+ 9, // 6
+ 1, // 7
+ 0, // 8
+ 0, // 9
+ 0, // 10
+ 0, // 11
+ 0, // 12
+ 0, // 13
+ 0, // 14
+ 0, // 15
+ 1, // 16
+ 3, // 17
+ 5, // 18
+ };
+
+ var enc = huffmanEncoder(19);
+ enc.generate(freqs[0..], 7);
+
+ try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
+
+ try testing.expectEqual(@as(usize, 3), enc.codes[0].len);
+ try testing.expectEqual(@as(usize, 6), enc.codes[1].len);
+ try testing.expectEqual(@as(usize, 6), enc.codes[2].len);
+ try testing.expectEqual(@as(usize, 5), enc.codes[3].len);
+ try testing.expectEqual(@as(usize, 3), enc.codes[4].len);
+ try testing.expectEqual(@as(usize, 2), enc.codes[5].len);
+ try testing.expectEqual(@as(usize, 2), enc.codes[6].len);
+ try testing.expectEqual(@as(usize, 6), enc.codes[7].len);
+ try testing.expectEqual(@as(usize, 0), enc.codes[8].len);
+ try testing.expectEqual(@as(usize, 0), enc.codes[9].len);
+ try testing.expectEqual(@as(usize, 0), enc.codes[10].len);
+ try testing.expectEqual(@as(usize, 0), enc.codes[11].len);
+ try testing.expectEqual(@as(usize, 0), enc.codes[12].len);
+ try testing.expectEqual(@as(usize, 0), enc.codes[13].len);
+ try testing.expectEqual(@as(usize, 0), enc.codes[14].len);
+ try testing.expectEqual(@as(usize, 0), enc.codes[15].len);
+ try testing.expectEqual(@as(usize, 6), enc.codes[16].len);
+ try testing.expectEqual(@as(usize, 5), enc.codes[17].len);
+ try testing.expectEqual(@as(usize, 3), enc.codes[18].len);
+
+ try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code);
+ try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code);
+ try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code);
+ try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code);
+ try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code);
+ try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code);
+ try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code);
+ try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code);
+ try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code);
+ try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
+ try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
+}
+
+test "flate.HuffmanEncoder generate a Huffman code for the fixed literal table specific to Deflate" {
+ const enc = fixedLiteralEncoder();
+ for (enc.codes) |c| {
+ switch (c.len) {
+ 7 => {
+ const v = @bitReverse(@as(u7, @intCast(c.code)));
+ try testing.expect(v <= 0b0010111);
+ },
+ 8 => {
+ const v = @bitReverse(@as(u8, @intCast(c.code)));
+ try testing.expect((v >= 0b000110000 and v <= 0b10111111) or
+ (v >= 0b11000000 and v <= 11000111));
+ },
+ 9 => {
+ const v = @bitReverse(@as(u9, @intCast(c.code)));
+ try testing.expect(v >= 0b110010000 and v <= 0b111111111);
+ },
+ else => unreachable,
+ }
+ }
+}
+
+test "flate.HuffmanEncoder generate a Huffman code for the 30 possible relative distances (LZ77 distances) of Deflate" {
+ const enc = fixedDistanceEncoder();
+ for (enc.codes) |c| {
+ const v = @bitReverse(@as(u5, @intCast(c.code)));
+ try testing.expect(v <= 29);
+ try testing.expect(c.len == 5);
+ }
+}
+
+// Reverse bit-by-bit a N-bit code.
+fn bitReverse(comptime T: type, value: T, n: usize) T {
+ const r = @bitReverse(value);
+ return r >> @as(math.Log2Int(T), @intCast(@typeInfo(T).Int.bits - n));
+}
+
+test "flate bitReverse" {
+ const ReverseBitsTest = struct {
+ in: u16,
+ bit_count: u5,
+ out: u16,
+ };
+
+ const reverse_bits_tests = [_]ReverseBitsTest{
+ .{ .in = 1, .bit_count = 1, .out = 1 },
+ .{ .in = 1, .bit_count = 2, .out = 2 },
+ .{ .in = 1, .bit_count = 3, .out = 4 },
+ .{ .in = 1, .bit_count = 4, .out = 8 },
+ .{ .in = 1, .bit_count = 5, .out = 16 },
+ .{ .in = 17, .bit_count = 5, .out = 17 },
+ .{ .in = 257, .bit_count = 9, .out = 257 },
+ .{ .in = 29, .bit_count = 5, .out = 23 },
+ };
+
+ for (reverse_bits_tests) |h| {
+ const v = bitReverse(u16, h.in, h.bit_count);
+ try std.testing.expectEqual(h.out, v);
+ }
+}
+
+test "flate.HuffmanEncoder fixedLiteralEncoder codes" {
+ var al = std.ArrayList(u8).init(testing.allocator);
+ defer al.deinit();
+ var bw = std.io.bitWriter(.little, al.writer());
+
+ const f = fixedLiteralEncoder();
+ for (f.codes) |c| {
+ try bw.writeBits(c.code, c.len);
+ }
+ try testing.expectEqualSlices(u8, &fixed_codes, al.items);
+}
+
+pub const fixed_codes = [_]u8{
+ 0b00001100, 0b10001100, 0b01001100, 0b11001100, 0b00101100, 0b10101100, 0b01101100, 0b11101100,
+ 0b00011100, 0b10011100, 0b01011100, 0b11011100, 0b00111100, 0b10111100, 0b01111100, 0b11111100,
+ 0b00000010, 0b10000010, 0b01000010, 0b11000010, 0b00100010, 0b10100010, 0b01100010, 0b11100010,
+ 0b00010010, 0b10010010, 0b01010010, 0b11010010, 0b00110010, 0b10110010, 0b01110010, 0b11110010,
+ 0b00001010, 0b10001010, 0b01001010, 0b11001010, 0b00101010, 0b10101010, 0b01101010, 0b11101010,
+ 0b00011010, 0b10011010, 0b01011010, 0b11011010, 0b00111010, 0b10111010, 0b01111010, 0b11111010,
+ 0b00000110, 0b10000110, 0b01000110, 0b11000110, 0b00100110, 0b10100110, 0b01100110, 0b11100110,
+ 0b00010110, 0b10010110, 0b01010110, 0b11010110, 0b00110110, 0b10110110, 0b01110110, 0b11110110,
+ 0b00001110, 0b10001110, 0b01001110, 0b11001110, 0b00101110, 0b10101110, 0b01101110, 0b11101110,
+ 0b00011110, 0b10011110, 0b01011110, 0b11011110, 0b00111110, 0b10111110, 0b01111110, 0b11111110,
+ 0b00000001, 0b10000001, 0b01000001, 0b11000001, 0b00100001, 0b10100001, 0b01100001, 0b11100001,
+ 0b00010001, 0b10010001, 0b01010001, 0b11010001, 0b00110001, 0b10110001, 0b01110001, 0b11110001,
+ 0b00001001, 0b10001001, 0b01001001, 0b11001001, 0b00101001, 0b10101001, 0b01101001, 0b11101001,
+ 0b00011001, 0b10011001, 0b01011001, 0b11011001, 0b00111001, 0b10111001, 0b01111001, 0b11111001,
+ 0b00000101, 0b10000101, 0b01000101, 0b11000101, 0b00100101, 0b10100101, 0b01100101, 0b11100101,
+ 0b00010101, 0b10010101, 0b01010101, 0b11010101, 0b00110101, 0b10110101, 0b01110101, 0b11110101,
+ 0b00001101, 0b10001101, 0b01001101, 0b11001101, 0b00101101, 0b10101101, 0b01101101, 0b11101101,
+ 0b00011101, 0b10011101, 0b01011101, 0b11011101, 0b00111101, 0b10111101, 0b01111101, 0b11111101,
+ 0b00010011, 0b00100110, 0b01001110, 0b10011010, 0b00111100, 0b01100101, 0b11101010, 0b10110100,
+ 0b11101001, 0b00110011, 0b01100110, 0b11001110, 0b10011010, 0b00111101, 0b01100111, 0b11101110,
+ 0b10111100, 0b11111001, 0b00001011, 0b00010110, 0b00101110, 0b01011010, 0b10111100, 0b01100100,
+ 0b11101001, 0b10110010, 0b11100101, 0b00101011, 0b01010110, 0b10101110, 0b01011010, 0b10111101,
+ 0b01100110, 0b11101101, 0b10111010, 0b11110101, 0b00011011, 0b00110110, 0b01101110, 0b11011010,
+ 0b10111100, 0b01100101, 0b11101011, 0b10110110, 0b11101101, 0b00111011, 0b01110110, 0b11101110,
+ 0b11011010, 0b10111101, 0b01100111, 0b11101111, 0b10111110, 0b11111101, 0b00000111, 0b00001110,
+ 0b00011110, 0b00111010, 0b01111100, 0b11100100, 0b11101000, 0b10110001, 0b11100011, 0b00100111,
+ 0b01001110, 0b10011110, 0b00111010, 0b01111101, 0b11100110, 0b11101100, 0b10111001, 0b11110011,
+ 0b00010111, 0b00101110, 0b01011110, 0b10111010, 0b01111100, 0b11100101, 0b11101010, 0b10110101,
+ 0b11101011, 0b00110111, 0b01101110, 0b11011110, 0b10111010, 0b01111101, 0b11100111, 0b11101110,
+ 0b10111101, 0b11111011, 0b00001111, 0b00011110, 0b00111110, 0b01111010, 0b11111100, 0b11100100,
+ 0b11101001, 0b10110011, 0b11100111, 0b00101111, 0b01011110, 0b10111110, 0b01111010, 0b11111101,
+ 0b11100110, 0b11101101, 0b10111011, 0b11110111, 0b00011111, 0b00111110, 0b01111110, 0b11111010,
+ 0b11111100, 0b11100101, 0b11101011, 0b10110111, 0b11101111, 0b00111111, 0b01111110, 0b11111110,
+ 0b11111010, 0b11111101, 0b11100111, 0b11101111, 0b10111111, 0b11111111, 0b00000000, 0b00100000,
+ 0b00001000, 0b00001100, 0b10000001, 0b11000010, 0b11100000, 0b00001000, 0b00100100, 0b00001010,
+ 0b10001101, 0b11000001, 0b11100010, 0b11110000, 0b00000100, 0b00100010, 0b10001001, 0b01001100,
+ 0b10100001, 0b11010010, 0b11101000, 0b00000011, 0b10000011, 0b01000011, 0b11000011, 0b00100011,
+ 0b10100011,
+};
diff --git a/lib/std/compress/flate/inflate.zig b/lib/std/compress/flate/inflate.zig
new file mode 100644
index 0000000000..5e286d2af3
--- /dev/null
+++ b/lib/std/compress/flate/inflate.zig
@@ -0,0 +1,542 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const testing = std.testing;
+
+const hfd = @import("huffman_decoder.zig");
+const BitReader = @import("bit_reader.zig").BitReader;
+const CircularBuffer = @import("CircularBuffer.zig");
+const Container = @import("container.zig").Container;
+const Token = @import("Token.zig");
+const codegen_order = @import("consts.zig").huffman.codegen_order;
+
+/// Decompresses deflate bit stream `reader` and writes uncompressed data to the
+/// `writer` stream.
+pub fn decompress(comptime container: Container, reader: anytype, writer: anytype) !void {
+ var d = decompressor(container, reader);
+ try d.decompress(writer);
+}
+
+/// Inflate decompressor for the reader type.
+pub fn decompressor(comptime container: Container, reader: anytype) Inflate(container, @TypeOf(reader)) {
+ return Inflate(container, @TypeOf(reader)).init(reader);
+}
+
+/// Inflate decompresses deflate bit stream. Reads compressed data from reader
+/// provided in init. Decompressed data are stored in internal hist buffer and
+/// can be accesses iterable `next` or reader interface.
+///
+/// Container defines header/footer wrapper around deflate bit stream. Can be
+/// gzip or zlib.
+///
+/// Deflate bit stream consists of multiple blocks. Block can be one of three types:
+/// * stored, non compressed, max 64k in size
+/// * fixed, huffman codes are predefined
+/// * dynamic, huffman code tables are encoded at the block start
+///
+/// `step` function runs decoder until internal `hist` buffer is full. Client
+/// than needs to read that data in order to proceed with decoding.
+///
+/// Allocates 74.5K of internal buffers, most important are:
+/// * 64K for history (CircularBuffer)
+/// * ~10K huffman decoders (Literal and DistanceDecoder)
+///
+pub fn Inflate(comptime container: Container, comptime ReaderType: type) type {
+ return struct {
+ const BitReaderType = BitReader(ReaderType);
+ const F = BitReaderType.flag;
+
+ bits: BitReaderType = .{},
+ hist: CircularBuffer = .{},
+ // Hashes, produces checkusm, of uncompressed data for gzip/zlib footer.
+ hasher: container.Hasher() = .{},
+
+ // dynamic block huffman code decoders
+ lit_dec: hfd.LiteralDecoder = .{}, // literals
+ dst_dec: hfd.DistanceDecoder = .{}, // distances
+
+ // current read state
+ bfinal: u1 = 0,
+ block_type: u2 = 0b11,
+ state: ReadState = .protocol_header,
+
+ const ReadState = enum {
+ protocol_header,
+ block_header,
+ block,
+ protocol_footer,
+ end,
+ };
+
+ const Self = @This();
+
+ pub const Error = BitReaderType.Error || Container.Error || hfd.Error || error{
+ InvalidCode,
+ InvalidMatch,
+ InvalidBlockType,
+ WrongStoredBlockNlen,
+ InvalidDynamicBlockHeader,
+ };
+
+ pub fn init(rt: ReaderType) Self {
+ return .{ .bits = BitReaderType.init(rt) };
+ }
+
+ fn blockHeader(self: *Self) !void {
+ self.bfinal = try self.bits.read(u1);
+ self.block_type = try self.bits.read(u2);
+ }
+
+ fn storedBlock(self: *Self) !bool {
+ self.bits.alignToByte(); // skip padding until byte boundary
+ // everyting after this is byte aligned in stored block
+ var len = try self.bits.read(u16);
+ const nlen = try self.bits.read(u16);
+ if (len != ~nlen) return error.WrongStoredBlockNlen;
+
+ while (len > 0) {
+ const buf = self.hist.getWritable(len);
+ try self.bits.readAll(buf);
+ len -= @intCast(buf.len);
+ }
+ return true;
+ }
+
+ fn fixedBlock(self: *Self) !bool {
+ while (!self.hist.full()) {
+ const code = try self.bits.readFixedCode();
+ switch (code) {
+ 0...255 => self.hist.write(@intCast(code)),
+ 256 => return true, // end of block
+ 257...285 => try self.fixedDistanceCode(@intCast(code - 257)),
+ else => return error.InvalidCode,
+ }
+ }
+ return false;
+ }
+
+ // Handles fixed block non literal (length) code.
+ // Length code is followed by 5 bits of distance code.
+ fn fixedDistanceCode(self: *Self, code: u8) !void {
+ try self.bits.fill(5 + 5 + 13);
+ const length = try self.decodeLength(code);
+ const distance = try self.decodeDistance(try self.bits.readF(u5, F.buffered | F.reverse));
+ try self.hist.writeMatch(length, distance);
+ }
+
+ inline fn decodeLength(self: *Self, code: u8) !u16 {
+ if (code > 28) return error.InvalidCode;
+ const ml = Token.matchLength(code);
+ return if (ml.extra_bits == 0) // 0 - 5 extra bits
+ ml.base
+ else
+ ml.base + try self.bits.readN(ml.extra_bits, F.buffered);
+ }
+
+ fn decodeDistance(self: *Self, code: u8) !u16 {
+ if (code > 29) return error.InvalidCode;
+ const md = Token.matchDistance(code);
+ return if (md.extra_bits == 0) // 0 - 13 extra bits
+ md.base
+ else
+ md.base + try self.bits.readN(md.extra_bits, F.buffered);
+ }
+
+ fn dynamicBlockHeader(self: *Self) !void {
+ const hlit: u16 = @as(u16, try self.bits.read(u5)) + 257; // number of ll code entries present - 257
+ const hdist: u16 = @as(u16, try self.bits.read(u5)) + 1; // number of distance code entries - 1
+ const hclen: u8 = @as(u8, try self.bits.read(u4)) + 4; // hclen + 4 code lenths are encoded
+
+ if (hlit > 286 or hdist > 30)
+ return error.InvalidDynamicBlockHeader;
+
+ // lengths for code lengths
+ var cl_lens = [_]u4{0} ** 19;
+ for (0..hclen) |i| {
+ cl_lens[codegen_order[i]] = try self.bits.read(u3);
+ }
+ var cl_dec: hfd.CodegenDecoder = .{};
+ try cl_dec.generate(&cl_lens);
+
+ // decoded code lengths
+ var dec_lens = [_]u4{0} ** (286 + 30);
+ var pos: usize = 0;
+ while (pos < hlit + hdist) {
+ const sym = try cl_dec.find(try self.bits.peekF(u7, F.reverse));
+ try self.bits.shift(sym.code_bits);
+ pos += try self.dynamicCodeLength(sym.symbol, &dec_lens, pos);
+ }
+ if (pos > hlit + hdist) {
+ return error.InvalidDynamicBlockHeader;
+ }
+
+ // literal code lengts to literal decoder
+ try self.lit_dec.generate(dec_lens[0..hlit]);
+
+ // distance code lengths to distance decoder
+ try self.dst_dec.generate(dec_lens[hlit .. hlit + hdist]);
+ }
+
+ // Decode code length symbol to code length. Writes decoded length into
+ // lens slice starting at position pos. Returns number of positions
+ // advanced.
+ fn dynamicCodeLength(self: *Self, code: u16, lens: []u4, pos: usize) !usize {
+ if (pos >= lens.len)
+ return error.InvalidDynamicBlockHeader;
+
+ switch (code) {
+ 0...15 => {
+ // Represent code lengths of 0 - 15
+ lens[pos] = @intCast(code);
+ return 1;
+ },
+ 16 => {
+ // Copy the previous code length 3 - 6 times.
+ // The next 2 bits indicate repeat length
+ const n: u8 = @as(u8, try self.bits.read(u2)) + 3;
+ if (pos == 0 or pos + n > lens.len)
+ return error.InvalidDynamicBlockHeader;
+ for (0..n) |i| {
+ lens[pos + i] = lens[pos + i - 1];
+ }
+ return n;
+ },
+ // Repeat a code length of 0 for 3 - 10 times. (3 bits of length)
+ 17 => return @as(u8, try self.bits.read(u3)) + 3,
+ // Repeat a code length of 0 for 11 - 138 times (7 bits of length)
+ 18 => return @as(u8, try self.bits.read(u7)) + 11,
+ else => return error.InvalidDynamicBlockHeader,
+ }
+ }
+
+ // In larger archives most blocks are usually dynamic, so decompression
+ // performance depends on this function.
+ fn dynamicBlock(self: *Self) !bool {
+ // Hot path loop!
+ while (!self.hist.full()) {
+ try self.bits.fill(15); // optimization so other bit reads can be buffered (avoiding one `if` in hot path)
+ const sym = try self.decodeSymbol(&self.lit_dec);
+
+ switch (sym.kind) {
+ .literal => self.hist.write(sym.symbol),
+ .match => { // Decode match backreference <length, distance>
+ try self.bits.fill(5 + 15 + 13); // so we can use buffered reads
+ const length = try self.decodeLength(sym.symbol);
+ const dsm = try self.decodeSymbol(&self.dst_dec);
+ const distance = try self.decodeDistance(dsm.symbol);
+ try self.hist.writeMatch(length, distance);
+ },
+ .end_of_block => return true,
+ }
+ }
+ return false;
+ }
+
+ // Peek 15 bits from bits reader (maximum code len is 15 bits). Use
+ // decoder to find symbol for that code. We then know how many bits is
+ // used. Shift bit reader for that much bits, those bits are used. And
+ // return symbol.
+ fn decodeSymbol(self: *Self, decoder: anytype) !hfd.Symbol {
+ const sym = try decoder.find(try self.bits.peekF(u15, F.buffered | F.reverse));
+ try self.bits.shift(sym.code_bits);
+ return sym;
+ }
+
+ fn step(self: *Self) !void {
+ switch (self.state) {
+ .protocol_header => {
+ try container.parseHeader(&self.bits);
+ self.state = .block_header;
+ },
+ .block_header => {
+ try self.blockHeader();
+ self.state = .block;
+ if (self.block_type == 2) try self.dynamicBlockHeader();
+ },
+ .block => {
+ const done = switch (self.block_type) {
+ 0 => try self.storedBlock(),
+ 1 => try self.fixedBlock(),
+ 2 => try self.dynamicBlock(),
+ else => return error.InvalidBlockType,
+ };
+ if (done) {
+ self.state = if (self.bfinal == 1) .protocol_footer else .block_header;
+ }
+ },
+ .protocol_footer => {
+ self.bits.alignToByte();
+ try container.parseFooter(&self.hasher, &self.bits);
+ self.state = .end;
+ },
+ .end => {},
+ }
+ }
+
+ /// Replaces the inner reader with new reader.
+ pub fn setReader(self: *Self, new_reader: ReaderType) void {
+ self.bits.forward_reader = new_reader;
+ if (self.state == .end or self.state == .protocol_footer) {
+ self.state = .protocol_header;
+ }
+ }
+
+ // Reads all compressed data from the internal reader and outputs plain
+ // (uncompressed) data to the provided writer.
+ pub fn decompress(self: *Self, writer: anytype) !void {
+ while (try self.next()) |buf| {
+ try writer.writeAll(buf);
+ }
+ }
+
+ /// Returns the number of bytes that have been read from the internal
+ /// reader but not yet consumed by the decompressor.
+ pub fn unreadBytes(self: Self) usize {
+ // There can be no error here: the denominator is not zero, and
+ // overflow is not possible since the type is unsigned.
+ return std.math.divCeil(usize, self.bits.nbits, 8) catch unreachable;
+ }
+
+ // Iterator interface
+
+ /// Can be used in iterator like loop without memcpy to another buffer:
+ /// while (try inflate.next()) |buf| { ... }
+ pub fn next(self: *Self) Error!?[]const u8 {
+ const out = try self.get(0);
+ if (out.len == 0) return null;
+ return out;
+ }
+
+ /// Returns decompressed data from internal sliding window buffer.
+ /// Returned buffer can be any length between 0 and `limit` bytes. 0
+ /// returned bytes means end of stream reached. With limit=0 returns as
+ /// much data it can. It newer will be more than 65536 bytes, which is
+ /// size of internal buffer.
+ pub fn get(self: *Self, limit: usize) Error![]const u8 {
+ while (true) {
+ const out = self.hist.readAtMost(limit);
+ if (out.len > 0) {
+ self.hasher.update(out);
+ return out;
+ }
+ if (self.state == .end) return out;
+ try self.step();
+ }
+ }
+
+ // Reader interface
+
+ pub const Reader = std.io.Reader(*Self, Error, read);
+
+ /// Returns the number of bytes read. It may be less than buffer.len.
+ /// If the number of bytes read is 0, it means end of stream.
+ /// End of stream is not an error condition.
+ pub fn read(self: *Self, buffer: []u8) Error!usize {
+ const out = try self.get(buffer.len);
+ @memcpy(buffer[0..out.len], out);
+ return out.len;
+ }
+
+ pub fn reader(self: *Self) Reader {
+ return .{ .context = self };
+ }
+ };
+}
+
+test "flate.Inflate decompress" {
+ const cases = [_]struct {
+ in: []const u8,
+ out: []const u8,
+ }{
+ // non compressed block (type 0)
+ .{
+ .in = &[_]u8{
+ 0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
+ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
+ },
+ .out = "Hello world\n",
+ },
+ // fixed code block (type 1)
+ .{
+ .in = &[_]u8{
+ 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
+ 0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
+ },
+ .out = "Hello world\n",
+ },
+ // dynamic block (type 2)
+ .{
+ .in = &[_]u8{
+ 0x3d, 0xc6, 0x39, 0x11, 0x00, 0x00, 0x0c, 0x02, // deflate data block type 2
+ 0x30, 0x2b, 0xb5, 0x52, 0x1e, 0xff, 0x96, 0x38,
+ 0x16, 0x96, 0x5c, 0x1e, 0x94, 0xcb, 0x6d, 0x01,
+ },
+ .out = "ABCDEABCD ABCDEABCD",
+ },
+ };
+ for (cases) |c| {
+ var fb = std.io.fixedBufferStream(c.in);
+ var al = std.ArrayList(u8).init(testing.allocator);
+ defer al.deinit();
+
+ try decompress(.raw, fb.reader(), al.writer());
+ try testing.expectEqualStrings(c.out, al.items);
+ }
+}
+
+test "flate.Inflate gzip decompress" {
+ const cases = [_]struct {
+ in: []const u8,
+ out: []const u8,
+ }{
+ // non compressed block (type 0)
+ .{
+ .in = &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // gzip header (10 bytes)
+ 0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
+ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
+ 0xd5, 0xe0, 0x39, 0xb7, // gzip footer: checksum
+ 0x0c, 0x00, 0x00, 0x00, // gzip footer: size
+ },
+ .out = "Hello world\n",
+ },
+ // fixed code block (type 1)
+ .{
+ .in = &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x03, // gzip header (10 bytes)
+ 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, // deflate data block type 1
+ 0x2f, 0xca, 0x49, 0xe1, 0x02, 0x00,
+ 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00, // gzip footer (chksum, len)
+ },
+ .out = "Hello world\n",
+ },
+ // dynamic block (type 2)
+ .{
+ .in = &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, // gzip header (10 bytes)
+ 0x3d, 0xc6, 0x39, 0x11, 0x00, 0x00, 0x0c, 0x02, // deflate data block type 2
+ 0x30, 0x2b, 0xb5, 0x52, 0x1e, 0xff, 0x96, 0x38,
+ 0x16, 0x96, 0x5c, 0x1e, 0x94, 0xcb, 0x6d, 0x01,
+ 0x17, 0x1c, 0x39, 0xb4, 0x13, 0x00, 0x00, 0x00, // gzip footer (chksum, len)
+ },
+ .out = "ABCDEABCD ABCDEABCD",
+ },
+ // gzip header with name
+ .{
+ .in = &[_]u8{
+ 0x1f, 0x8b, 0x08, 0x08, 0xe5, 0x70, 0xb1, 0x65, 0x00, 0x03, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x2e,
+ 0x74, 0x78, 0x74, 0x00, 0xf3, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0xe1,
+ 0x02, 0x00, 0xd5, 0xe0, 0x39, 0xb7, 0x0c, 0x00, 0x00, 0x00,
+ },
+ .out = "Hello world\n",
+ },
+ };
+ for (cases) |c| {
+ var fb = std.io.fixedBufferStream(c.in);
+ var al = std.ArrayList(u8).init(testing.allocator);
+ defer al.deinit();
+
+ try decompress(.gzip, fb.reader(), al.writer());
+ try testing.expectEqualStrings(c.out, al.items);
+ }
+}
+
+test "flate.Inflate zlib decompress" {
+ const cases = [_]struct {
+ in: []const u8,
+ out: []const u8,
+ }{
+ // non compressed block (type 0)
+ .{
+ .in = &[_]u8{
+ 0x78, 0b10_0_11100, // zlib header (2 bytes)
+ 0b0000_0001, 0b0000_1100, 0x00, 0b1111_0011, 0xff, // deflate fixed buffer header len, nlen
+ 'H', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', 0x0a, // non compressed data
+ 0x1c, 0xf2, 0x04, 0x47, // zlib footer: checksum
+ },
+ .out = "Hello world\n",
+ },
+ };
+ for (cases) |c| {
+ var fb = std.io.fixedBufferStream(c.in);
+ var al = std.ArrayList(u8).init(testing.allocator);
+ defer al.deinit();
+
+ try decompress(.zlib, fb.reader(), al.writer());
+ try testing.expectEqualStrings(c.out, al.items);
+ }
+}
+
+test "flate.Inflate fuzzing tests" {
+ const cases = [_]struct {
+ input: []const u8,
+ out: []const u8 = "",
+ err: ?anyerror = null,
+ }{
+ .{ .input = "deflate-stream", .out = @embedFile("testdata/fuzz/deflate-stream.expect") }, // 0
+ .{ .input = "empty-distance-alphabet01" },
+ .{ .input = "empty-distance-alphabet02" },
+ .{ .input = "end-of-stream", .err = error.EndOfStream },
+ .{ .input = "invalid-distance", .err = error.InvalidMatch },
+ .{ .input = "invalid-tree01", .err = error.IncompleteHuffmanTree }, // 5
+ .{ .input = "invalid-tree02", .err = error.IncompleteHuffmanTree },
+ .{ .input = "invalid-tree03", .err = error.IncompleteHuffmanTree },
+ .{ .input = "lengths-overflow", .err = error.InvalidDynamicBlockHeader },
+ .{ .input = "out-of-codes", .err = error.InvalidCode },
+ .{ .input = "puff01", .err = error.WrongStoredBlockNlen }, // 10
+ .{ .input = "puff02", .err = error.EndOfStream },
+ .{ .input = "puff03", .out = &[_]u8{0xa} },
+ .{ .input = "puff04", .err = error.InvalidCode },
+ .{ .input = "puff05", .err = error.EndOfStream },
+ .{ .input = "puff06", .err = error.EndOfStream },
+ .{ .input = "puff08", .err = error.InvalidCode },
+ .{ .input = "puff09", .out = "P" },
+ .{ .input = "puff10", .err = error.InvalidCode },
+ .{ .input = "puff11", .err = error.InvalidMatch },
+ .{ .input = "puff12", .err = error.InvalidDynamicBlockHeader }, // 20
+ .{ .input = "puff13", .err = error.IncompleteHuffmanTree },
+ .{ .input = "puff14", .err = error.EndOfStream },
+ .{ .input = "puff15", .err = error.IncompleteHuffmanTree },
+ .{ .input = "puff16", .err = error.InvalidDynamicBlockHeader },
+ .{ .input = "puff17", .err = error.MissingEndOfBlockCode }, // 25
+ .{ .input = "fuzz1", .err = error.InvalidDynamicBlockHeader },
+ .{ .input = "fuzz2", .err = error.InvalidDynamicBlockHeader },
+ .{ .input = "fuzz3", .err = error.InvalidMatch },
+ .{ .input = "fuzz4", .err = error.OversubscribedHuffmanTree },
+ .{ .input = "puff18", .err = error.OversubscribedHuffmanTree }, // 30
+ .{ .input = "puff19", .err = error.OversubscribedHuffmanTree },
+ .{ .input = "puff20", .err = error.OversubscribedHuffmanTree },
+ .{ .input = "puff21", .err = error.OversubscribedHuffmanTree },
+ .{ .input = "puff22", .err = error.OversubscribedHuffmanTree },
+ .{ .input = "puff23", .err = error.OversubscribedHuffmanTree }, // 35
+ .{ .input = "puff24", .err = error.IncompleteHuffmanTree },
+ .{ .input = "puff25", .err = error.OversubscribedHuffmanTree },
+ .{ .input = "puff26", .err = error.InvalidDynamicBlockHeader },
+ .{ .input = "puff27", .err = error.InvalidDynamicBlockHeader },
+ };
+
+ inline for (cases, 0..) |c, case_no| {
+ var in = std.io.fixedBufferStream(@embedFile("testdata/fuzz/" ++ c.input ++ ".input"));
+ var out = std.ArrayList(u8).init(testing.allocator);
+ defer out.deinit();
+ errdefer std.debug.print("test case failed {}\n", .{case_no});
+
+ if (c.err) |expected_err| {
+ try testing.expectError(expected_err, decompress(.raw, in.reader(), out.writer()));
+ } else {
+ try decompress(.raw, in.reader(), out.writer());
+ try testing.expectEqualStrings(c.out, out.items);
+ }
+ }
+}
+
+test "flate bug 18966" {
+ const input = @embedFile("testdata/fuzz/bug_18966.input");
+ const expect = @embedFile("testdata/fuzz/bug_18966.expect");
+
+ var in = std.io.fixedBufferStream(input);
+ var out = std.ArrayList(u8).init(testing.allocator);
+ defer out.deinit();
+
+ try decompress(.gzip, in.reader(), out.writer());
+ try testing.expectEqualStrings(expect, out.items);
+}
diff --git a/lib/std/compress/flate/testdata/block_writer.zig b/lib/std/compress/flate/testdata/block_writer.zig
new file mode 100644
index 0000000000..cb8f3028d1
--- /dev/null
+++ b/lib/std/compress/flate/testdata/block_writer.zig
@@ -0,0 +1,606 @@
+const Token = @import("../Token.zig");
+
+pub const TestCase = struct {
+ tokens: []const Token,
+ input: []const u8 = "", // File name of input data matching the tokens.
+ want: []const u8 = "", // File name of data with the expected output with input available.
+ want_no_input: []const u8 = "", // File name of the expected output when no input is available.
+};
+
+pub const testCases = blk: {
+ @setEvalBranchQuota(4096 * 2);
+
+ const L = Token.initLiteral;
+ const M = Token.initMatch;
+ const ml = M(1, 258); // Maximum length token. Used to reduce the size of writeBlockTests
+
+ break :blk &[_]TestCase{
+ TestCase{
+ .input = "huffman-null-max.input",
+ .want = "huffman-null-max.{s}.expect",
+ .want_no_input = "huffman-null-max.{s}.expect-noinput",
+ .tokens = &[_]Token{
+ L(0x0), ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, L(0x0), L(0x0),
+ },
+ },
+ TestCase{
+ .input = "huffman-pi.input",
+ .want = "huffman-pi.{s}.expect",
+ .want_no_input = "huffman-pi.{s}.expect-noinput",
+ .tokens = &[_]Token{
+ L('3'), L('.'), L('1'), L('4'), L('1'), L('5'), L('9'), L('2'),
+ L('6'), L('5'), L('3'), L('5'), L('8'), L('9'), L('7'), L('9'),
+ L('3'), L('2'), L('3'), L('8'), L('4'), L('6'), L('2'), L('6'),
+ L('4'), L('3'), L('3'), L('8'), L('3'), L('2'), L('7'), L('9'),
+ L('5'), L('0'), L('2'), L('8'), L('8'), L('4'), L('1'), L('9'),
+ L('7'), L('1'), L('6'), L('9'), L('3'), L('9'), L('9'), L('3'),
+ L('7'), L('5'), L('1'), L('0'), L('5'), L('8'), L('2'), L('0'),
+ L('9'), L('7'), L('4'), L('9'), L('4'), L('4'), L('5'), L('9'),
+ L('2'), L('3'), L('0'), L('7'), L('8'), L('1'), L('6'), L('4'),
+ L('0'), L('6'), L('2'), L('8'), L('6'), L('2'), L('0'), L('8'),
+ L('9'), L('9'), L('8'), L('6'), L('2'), L('8'), L('0'), L('3'),
+ L('4'), L('8'), L('2'), L('5'), L('3'), L('4'), L('2'), L('1'),
+ L('1'), L('7'), L('0'), L('6'), L('7'), L('9'), L('8'), L('2'),
+ L('1'), L('4'), L('8'), L('0'), L('8'), L('6'), L('5'), L('1'),
+ L('3'), L('2'), L('8'), L('2'), L('3'), L('0'), L('6'), L('6'),
+ L('4'), L('7'), L('0'), L('9'), L('3'), L('8'), L('4'), L('4'),
+ L('6'), L('0'), L('9'), L('5'), L('5'), L('0'), L('5'), L('8'),
+ L('2'), L('2'), L('3'), L('1'), L('7'), L('2'), L('5'), L('3'),
+ L('5'), L('9'), L('4'), L('0'), L('8'), L('1'), L('2'), L('8'),
+ L('4'), L('8'), L('1'), L('1'), L('1'), L('7'), L('4'), M(127, 4),
+ L('4'), L('1'), L('0'), L('2'), L('7'), L('0'), L('1'), L('9'),
+ L('3'), L('8'), L('5'), L('2'), L('1'), L('1'), L('0'), L('5'),
+ L('5'), L('5'), L('9'), L('6'), L('4'), L('4'), L('6'), L('2'),
+ L('2'), L('9'), L('4'), L('8'), L('9'), L('5'), L('4'), L('9'),
+ L('3'), L('0'), L('3'), L('8'), L('1'), M(19, 4), L('2'), L('8'),
+ L('8'), L('1'), L('0'), L('9'), L('7'), L('5'), L('6'), L('6'),
+ L('5'), L('9'), L('3'), L('3'), L('4'), L('4'), L('6'), M(72, 4),
+ L('7'), L('5'), L('6'), L('4'), L('8'), L('2'), L('3'), L('3'),
+ L('7'), L('8'), L('6'), L('7'), L('8'), L('3'), L('1'), L('6'),
+ L('5'), L('2'), L('7'), L('1'), L('2'), L('0'), L('1'), L('9'),
+ L('0'), L('9'), L('1'), L('4'), M(27, 4), L('5'), L('6'), L('6'),
+ L('9'), L('2'), L('3'), L('4'), L('6'), M(179, 4), L('6'), L('1'),
+ L('0'), L('4'), L('5'), L('4'), L('3'), L('2'), L('6'), M(51, 4),
+ L('1'), L('3'), L('3'), L('9'), L('3'), L('6'), L('0'), L('7'),
+ L('2'), L('6'), L('0'), L('2'), L('4'), L('9'), L('1'), L('4'),
+ L('1'), L('2'), L('7'), L('3'), L('7'), L('2'), L('4'), L('5'),
+ L('8'), L('7'), L('0'), L('0'), L('6'), L('6'), L('0'), L('6'),
+ L('3'), L('1'), L('5'), L('5'), L('8'), L('8'), L('1'), L('7'),
+ L('4'), L('8'), L('8'), L('1'), L('5'), L('2'), L('0'), L('9'),
+ L('2'), L('0'), L('9'), L('6'), L('2'), L('8'), L('2'), L('9'),
+ L('2'), L('5'), L('4'), L('0'), L('9'), L('1'), L('7'), L('1'),
+ L('5'), L('3'), L('6'), L('4'), L('3'), L('6'), L('7'), L('8'),
+ L('9'), L('2'), L('5'), L('9'), L('0'), L('3'), L('6'), L('0'),
+ L('0'), L('1'), L('1'), L('3'), L('3'), L('0'), L('5'), L('3'),
+ L('0'), L('5'), L('4'), L('8'), L('8'), L('2'), L('0'), L('4'),
+ L('6'), L('6'), L('5'), L('2'), L('1'), L('3'), L('8'), L('4'),
+ L('1'), L('4'), L('6'), L('9'), L('5'), L('1'), L('9'), L('4'),
+ L('1'), L('5'), L('1'), L('1'), L('6'), L('0'), L('9'), L('4'),
+ L('3'), L('3'), L('0'), L('5'), L('7'), L('2'), L('7'), L('0'),
+ L('3'), L('6'), L('5'), L('7'), L('5'), L('9'), L('5'), L('9'),
+ L('1'), L('9'), L('5'), L('3'), L('0'), L('9'), L('2'), L('1'),
+ L('8'), L('6'), L('1'), L('1'), L('7'), M(234, 4), L('3'), L('2'),
+ M(10, 4), L('9'), L('3'), L('1'), L('0'), L('5'), L('1'), L('1'),
+ L('8'), L('5'), L('4'), L('8'), L('0'), L('7'), M(271, 4), L('3'),
+ L('7'), L('9'), L('9'), L('6'), L('2'), L('7'), L('4'), L('9'),
+ L('5'), L('6'), L('7'), L('3'), L('5'), L('1'), L('8'), L('8'),
+ L('5'), L('7'), L('5'), L('2'), L('7'), L('2'), L('4'), L('8'),
+ L('9'), L('1'), L('2'), L('2'), L('7'), L('9'), L('3'), L('8'),
+ L('1'), L('8'), L('3'), L('0'), L('1'), L('1'), L('9'), L('4'),
+ L('9'), L('1'), L('2'), L('9'), L('8'), L('3'), L('3'), L('6'),
+ L('7'), L('3'), L('3'), L('6'), L('2'), L('4'), L('4'), L('0'),
+ L('6'), L('5'), L('6'), L('6'), L('4'), L('3'), L('0'), L('8'),
+ L('6'), L('0'), L('2'), L('1'), L('3'), L('9'), L('4'), L('9'),
+ L('4'), L('6'), L('3'), L('9'), L('5'), L('2'), L('2'), L('4'),
+ L('7'), L('3'), L('7'), L('1'), L('9'), L('0'), L('7'), L('0'),
+ L('2'), L('1'), L('7'), L('9'), L('8'), M(154, 5), L('7'), L('0'),
+ L('2'), L('7'), L('7'), L('0'), L('5'), L('3'), L('9'), L('2'),
+ L('1'), L('7'), L('1'), L('7'), L('6'), L('2'), L('9'), L('3'),
+ L('1'), L('7'), L('6'), L('7'), L('5'), M(563, 5), L('7'), L('4'),
+ L('8'), L('1'), M(7, 4), L('6'), L('6'), L('9'), L('4'), L('0'),
+ M(488, 4), L('0'), L('0'), L('0'), L('5'), L('6'), L('8'), L('1'),
+ L('2'), L('7'), L('1'), L('4'), L('5'), L('2'), L('6'), L('3'),
+ L('5'), L('6'), L('0'), L('8'), L('2'), L('7'), L('7'), L('8'),
+ L('5'), L('7'), L('7'), L('1'), L('3'), L('4'), L('2'), L('7'),
+ L('5'), L('7'), L('7'), L('8'), L('9'), L('6'), M(298, 4), L('3'),
+ L('6'), L('3'), L('7'), L('1'), L('7'), L('8'), L('7'), L('2'),
+ L('1'), L('4'), L('6'), L('8'), L('4'), L('4'), L('0'), L('9'),
+ L('0'), L('1'), L('2'), L('2'), L('4'), L('9'), L('5'), L('3'),
+ L('4'), L('3'), L('0'), L('1'), L('4'), L('6'), L('5'), L('4'),
+ L('9'), L('5'), L('8'), L('5'), L('3'), L('7'), L('1'), L('0'),
+ L('5'), L('0'), L('7'), L('9'), M(203, 4), L('6'), M(340, 4), L('8'),
+ L('9'), L('2'), L('3'), L('5'), L('4'), M(458, 4), L('9'), L('5'),
+ L('6'), L('1'), L('1'), L('2'), L('1'), L('2'), L('9'), L('0'),
+ L('2'), L('1'), L('9'), L('6'), L('0'), L('8'), L('6'), L('4'),
+ L('0'), L('3'), L('4'), L('4'), L('1'), L('8'), L('1'), L('5'),
+ L('9'), L('8'), L('1'), L('3'), L('6'), L('2'), L('9'), L('7'),
+ L('7'), L('4'), M(117, 4), L('0'), L('9'), L('9'), L('6'), L('0'),
+ L('5'), L('1'), L('8'), L('7'), L('0'), L('7'), L('2'), L('1'),
+ L('1'), L('3'), L('4'), L('9'), M(1, 5), L('8'), L('3'), L('7'),
+ L('2'), L('9'), L('7'), L('8'), L('0'), L('4'), L('9'), L('9'),
+ M(731, 4), L('9'), L('7'), L('3'), L('1'), L('7'), L('3'), L('2'),
+ L('8'), M(395, 4), L('6'), L('3'), L('1'), L('8'), L('5'), M(770, 4),
+ M(745, 4), L('4'), L('5'), L('5'), L('3'), L('4'), L('6'), L('9'),
+ L('0'), L('8'), L('3'), L('0'), L('2'), L('6'), L('4'), L('2'),
+ L('5'), L('2'), L('2'), L('3'), L('0'), M(740, 4), M(616, 4), L('8'),
+ L('5'), L('0'), L('3'), L('5'), L('2'), L('6'), L('1'), L('9'),
+ L('3'), L('1'), L('1'), M(531, 4), L('1'), L('0'), L('1'), L('0'),
+ L('0'), L('0'), L('3'), L('1'), L('3'), L('7'), L('8'), L('3'),
+ L('8'), L('7'), L('5'), L('2'), L('8'), L('8'), L('6'), L('5'),
+ L('8'), L('7'), L('5'), L('3'), L('3'), L('2'), L('0'), L('8'),
+ L('3'), L('8'), L('1'), L('4'), L('2'), L('0'), L('6'), M(321, 4),
+ M(300, 4), L('1'), L('4'), L('7'), L('3'), L('0'), L('3'), L('5'),
+ L('9'), M(815, 5), L('9'), L('0'), L('4'), L('2'), L('8'), L('7'),
+ L('5'), L('5'), L('4'), L('6'), L('8'), L('7'), L('3'), L('1'),
+ L('1'), L('5'), L('9'), L('5'), M(854, 4), L('3'), L('8'), L('8'),
+ L('2'), L('3'), L('5'), L('3'), L('7'), L('8'), L('7'), L('5'),
+ M(896, 5), L('9'), M(315, 4), L('1'), M(329, 4), L('8'), L('0'), L('5'),
+ L('3'), M(395, 4), L('2'), L('2'), L('6'), L('8'), L('0'), L('6'),
+ L('6'), L('1'), L('3'), L('0'), L('0'), L('1'), L('9'), L('2'),
+ L('7'), L('8'), L('7'), L('6'), L('6'), L('1'), L('1'), L('1'),
+ L('9'), L('5'), L('9'), M(568, 4), L('6'), M(293, 5), L('8'), L('9'),
+ L('3'), L('8'), L('0'), L('9'), L('5'), L('2'), L('5'), L('7'),
+ L('2'), L('0'), L('1'), L('0'), L('6'), L('5'), L('4'), L('8'),
+ L('5'), L('8'), L('6'), L('3'), L('2'), L('7'), M(155, 4), L('9'),
+ L('3'), L('6'), L('1'), L('5'), L('3'), M(545, 4), M(349, 5), L('2'),
+ L('3'), L('0'), L('3'), L('0'), L('1'), L('9'), L('5'), L('2'),
+ L('0'), L('3'), L('5'), L('3'), L('0'), L('1'), L('8'), L('5'),
+ L('2'), M(370, 4), M(118, 4), L('3'), L('6'), L('2'), L('2'), L('5'),
+ L('9'), L('9'), L('4'), L('1'), L('3'), M(597, 4), L('4'), L('9'),
+ L('7'), L('2'), L('1'), L('7'), M(223, 4), L('3'), L('4'), L('7'),
+ L('9'), L('1'), L('3'), L('1'), L('5'), L('1'), L('5'), L('5'),
+ L('7'), L('4'), L('8'), L('5'), L('7'), L('2'), L('4'), L('2'),
+ L('4'), L('5'), L('4'), L('1'), L('5'), L('0'), L('6'), L('9'),
+ M(320, 4), L('8'), L('2'), L('9'), L('5'), L('3'), L('3'), L('1'),
+ L('1'), L('6'), L('8'), L('6'), L('1'), L('7'), L('2'), L('7'),
+ L('8'), M(824, 4), L('9'), L('0'), L('7'), L('5'), L('0'), L('9'),
+ M(270, 4), L('7'), L('5'), L('4'), L('6'), L('3'), L('7'), L('4'),
+ L('6'), L('4'), L('9'), L('3'), L('9'), L('3'), L('1'), L('9'),
+ L('2'), L('5'), L('5'), L('0'), L('6'), L('0'), L('4'), L('0'),
+ L('0'), L('9'), M(620, 4), L('1'), L('6'), L('7'), L('1'), L('1'),
+ L('3'), L('9'), L('0'), L('0'), L('9'), L('8'), M(822, 4), L('4'),
+ L('0'), L('1'), L('2'), L('8'), L('5'), L('8'), L('3'), L('6'),
+ L('1'), L('6'), L('0'), L('3'), L('5'), L('6'), L('3'), L('7'),
+ L('0'), L('7'), L('6'), L('6'), L('0'), L('1'), L('0'), L('4'),
+ M(371, 4), L('8'), L('1'), L('9'), L('4'), L('2'), L('9'), M(1055, 5),
+ M(240, 4), M(652, 4), L('7'), L('8'), L('3'), L('7'), L('4'), M(1193, 4),
+ L('8'), L('2'), L('5'), L('5'), L('3'), L('7'), M(522, 5), L('2'),
+ L('6'), L('8'), M(47, 4), L('4'), L('0'), L('4'), L('7'), M(466, 4),
+ L('4'), M(1206, 4), M(910, 4), L('8'), L('4'), M(937, 4), L('6'), M(800, 6),
+ L('3'), L('3'), L('1'), L('3'), L('6'), L('7'), L('7'), L('0'),
+ L('2'), L('8'), L('9'), L('8'), L('9'), L('1'), L('5'), L('2'),
+ M(99, 4), L('5'), L('2'), L('1'), L('6'), L('2'), L('0'), L('5'),
+ L('6'), L('9'), L('6'), M(1042, 4), L('0'), L('5'), L('8'), M(1144, 4),
+ L('5'), M(1177, 4), L('5'), L('1'), L('1'), M(522, 4), L('8'), L('2'),
+ L('4'), L('3'), L('0'), L('0'), L('3'), L('5'), L('5'), L('8'),
+ L('7'), L('6'), L('4'), L('0'), L('2'), L('4'), L('7'), L('4'),
+ L('9'), L('6'), L('4'), L('7'), L('3'), L('2'), L('6'), L('3'),
+ M(1087, 4), L('9'), L('9'), L('2'), M(1100, 4), L('4'), L('2'), L('6'),
+ L('9'), M(710, 6), L('7'), M(471, 4), L('4'), M(1342, 4), M(1054, 4), L('9'),
+ L('3'), L('4'), L('1'), L('7'), M(430, 4), L('1'), L('2'), M(43, 4),
+ L('4'), M(415, 4), L('1'), L('5'), L('0'), L('3'), L('0'), L('2'),
+ L('8'), L('6'), L('1'), L('8'), L('2'), L('9'), L('7'), L('4'),
+ L('5'), L('5'), L('5'), L('7'), L('0'), L('6'), L('7'), L('4'),
+ M(310, 4), L('5'), L('0'), L('5'), L('4'), L('9'), L('4'), L('5'),
+ L('8'), M(454, 4), L('9'), M(82, 4), L('5'), L('6'), M(493, 4), L('7'),
+ L('2'), L('1'), L('0'), L('7'), L('9'), M(346, 4), L('3'), L('0'),
+ M(267, 4), L('3'), L('2'), L('1'), L('1'), L('6'), L('5'), L('3'),
+ L('4'), L('4'), L('9'), L('8'), L('7'), L('2'), L('0'), L('2'),
+ L('7'), M(284, 4), L('0'), L('2'), L('3'), L('6'), L('4'), M(559, 4),
+ L('5'), L('4'), L('9'), L('9'), L('1'), L('1'), L('9'), L('8'),
+ M(1049, 4), L('4'), M(284, 4), L('5'), L('3'), L('5'), L('6'), L('6'),
+ L('3'), L('6'), L('9'), M(1105, 4), L('2'), L('6'), L('5'), M(741, 4),
+ L('7'), L('8'), L('6'), L('2'), L('5'), L('5'), L('1'), M(987, 4),
+ L('1'), L('7'), L('5'), L('7'), L('4'), L('6'), L('7'), L('2'),
+ L('8'), L('9'), L('0'), L('9'), L('7'), L('7'), L('7'), L('7'),
+ M(1108, 5), L('0'), L('0'), L('0'), M(1534, 4), L('7'), L('0'), M(1248, 4),
+ L('6'), M(1002, 4), L('4'), L('9'), L('1'), M(1055, 4), M(664, 4), L('2'),
+ L('1'), L('4'), L('7'), L('7'), L('2'), L('3'), L('5'), L('0'),
+ L('1'), L('4'), L('1'), L('4'), M(1604, 4), L('3'), L('5'), L('6'),
+ M(1200, 4), L('1'), L('6'), L('1'), L('3'), L('6'), L('1'), L('1'),
+ L('5'), L('7'), L('3'), L('5'), L('2'), L('5'), M(1285, 4), L('3'),
+ L('4'), M(92, 4), L('1'), L('8'), M(1148, 4), L('8'), L('4'), M(1512, 4),
+ L('3'), L('3'), L('2'), L('3'), L('9'), L('0'), L('7'), L('3'),
+ L('9'), L('4'), L('1'), L('4'), L('3'), L('3'), L('3'), L('4'),
+ L('5'), L('4'), L('7'), L('7'), L('6'), L('2'), L('4'), M(579, 4),
+ L('2'), L('5'), L('1'), L('8'), L('9'), L('8'), L('3'), L('5'),
+ L('6'), L('9'), L('4'), L('8'), L('5'), L('5'), L('6'), L('2'),
+ L('0'), L('9'), L('9'), L('2'), L('1'), L('9'), L('2'), L('2'),
+ L('2'), L('1'), L('8'), L('4'), L('2'), L('7'), M(575, 4), L('2'),
+ M(187, 4), L('6'), L('8'), L('8'), L('7'), L('6'), L('7'), L('1'),
+ L('7'), L('9'), L('0'), M(86, 4), L('0'), M(263, 5), L('6'), L('6'),
+ M(1000, 4), L('8'), L('8'), L('6'), L('2'), L('7'), L('2'), M(1757, 4),
+ L('1'), L('7'), L('8'), L('6'), L('0'), L('8'), L('5'), L('7'),
+ M(116, 4), L('3'), M(765, 5), L('7'), L('9'), L('7'), L('6'), L('6'),
+ L('8'), L('1'), M(702, 4), L('0'), L('0'), L('9'), L('5'), L('3'),
+ L('8'), L('8'), M(1593, 4), L('3'), M(1702, 4), L('0'), L('6'), L('8'),
+ L('0'), L('0'), L('6'), L('4'), L('2'), L('2'), L('5'), L('1'),
+ L('2'), L('5'), L('2'), M(1404, 4), L('7'), L('3'), L('9'), L('2'),
+ M(664, 4), M(1141, 4), L('4'), M(1716, 5), L('8'), L('6'), L('2'), L('6'),
+ L('9'), L('4'), L('5'), M(486, 4), L('4'), L('1'), L('9'), L('6'),
+ L('5'), L('2'), L('8'), L('5'), L('0'), M(154, 4), M(925, 4), L('1'),
+ L('8'), L('6'), L('3'), M(447, 4), L('4'), M(341, 5), L('2'), L('0'),
+ L('3'), L('9'), M(1420, 4), L('4'), L('5'), M(701, 4), L('2'), L('3'),
+ L('7'), M(1069, 4), L('6'), M(1297, 4), L('5'), L('6'), M(1593, 4), L('7'),
+ L('1'), L('9'), L('1'), L('7'), L('2'), L('8'), M(370, 4), L('7'),
+ L('6'), L('4'), L('6'), L('5'), L('7'), L('5'), L('7'), L('3'),
+ L('9'), M(258, 4), L('3'), L('8'), L('9'), M(1865, 4), L('8'), L('3'),
+ L('2'), L('6'), L('4'), L('5'), L('9'), L('9'), L('5'), L('8'),
+ M(1704, 4), L('0'), L('4'), L('7'), L('8'), M(479, 4), M(809, 4), L('9'),
+ M(46, 4), L('6'), L('4'), L('0'), L('7'), L('8'), L('9'), L('5'),
+ L('1'), M(143, 4), L('6'), L('8'), L('3'), M(304, 4), L('2'), L('5'),
+ L('9'), L('5'), L('7'), L('0'), M(1129, 4), L('8'), L('2'), L('2'),
+ M(713, 4), L('2'), M(1564, 4), L('4'), L('0'), L('7'), L('7'), L('2'),
+ L('6'), L('7'), L('1'), L('9'), L('4'), L('7'), L('8'), M(794, 4),
+ L('8'), L('2'), L('6'), L('0'), L('1'), L('4'), L('7'), L('6'),
+ L('9'), L('9'), L('0'), L('9'), M(1257, 4), L('0'), L('1'), L('3'),
+ L('6'), L('3'), L('9'), L('4'), L('4'), L('3'), M(640, 4), L('3'),
+ L('0'), M(262, 4), L('2'), L('0'), L('3'), L('4'), L('9'), L('6'),
+ L('2'), L('5'), L('2'), L('4'), L('5'), L('1'), L('7'), M(950, 4),
+ L('9'), L('6'), L('5'), L('1'), L('4'), L('3'), L('1'), L('4'),
+ L('2'), L('9'), L('8'), L('0'), L('9'), L('1'), L('9'), L('0'),
+ L('6'), L('5'), L('9'), L('2'), M(643, 4), L('7'), L('2'), L('2'),
+ L('1'), L('6'), L('9'), L('6'), L('4'), L('6'), M(1050, 4), M(123, 4),
+ L('5'), M(1295, 4), L('4'), M(1382, 5), L('8'), M(1370, 4), L('9'), L('7'),
+ M(1404, 4), L('5'), L('4'), M(1182, 4), M(575, 4), L('7'), M(1627, 4), L('8'),
+ L('4'), L('6'), L('8'), L('1'), L('3'), M(141, 4), L('6'), L('8'),
+ L('3'), L('8'), L('6'), L('8'), L('9'), L('4'), L('2'), L('7'),
+ L('7'), L('4'), L('1'), L('5'), L('5'), L('9'), L('9'), L('1'),
+ L('8'), L('5'), M(91, 4), L('2'), L('4'), L('5'), L('9'), L('5'),
+ L('3'), L('9'), L('5'), L('9'), L('4'), L('3'), L('1'), M(1464, 4),
+ L('7'), M(19, 4), L('6'), L('8'), L('0'), L('8'), L('4'), L('5'),
+ M(744, 4), L('7'), L('3'), M(2079, 4), L('9'), L('5'), L('8'), L('4'),
+ L('8'), L('6'), L('5'), L('3'), L('8'), M(1769, 4), L('6'), L('2'),
+ M(243, 4), L('6'), L('0'), L('9'), M(1207, 4), L('6'), L('0'), L('8'),
+ L('0'), L('5'), L('1'), L('2'), L('4'), L('3'), L('8'), L('8'),
+ L('4'), M(315, 4), M(12, 4), L('4'), L('1'), L('3'), M(784, 4), L('7'),
+ L('6'), L('2'), L('7'), L('8'), M(834, 4), L('7'), L('1'), L('5'),
+ M(1436, 4), L('3'), L('5'), L('9'), L('9'), L('7'), L('7'), L('0'),
+ L('0'), L('1'), L('2'), L('9'), M(1139, 4), L('8'), L('9'), L('4'),
+ L('4'), L('1'), M(632, 4), L('6'), L('8'), L('5'), L('5'), M(96, 4),
+ L('4'), L('0'), L('6'), L('3'), M(2279, 4), L('2'), L('0'), L('7'),
+ L('2'), L('2'), M(345, 4), M(516, 5), L('4'), L('8'), L('1'), L('5'),
+ L('8'), M(518, 4), M(511, 4), M(635, 4), M(665, 4), L('3'), L('9'), L('4'),
+ L('5'), L('2'), L('2'), L('6'), L('7'), M(1175, 6), L('8'), M(1419, 4),
+ L('2'), L('1'), M(747, 4), L('2'), M(904, 4), L('5'), L('4'), L('6'),
+ L('6'), L('6'), M(1308, 4), L('2'), L('3'), L('9'), L('8'), L('6'),
+ L('4'), L('5'), L('6'), M(1221, 4), L('1'), L('6'), L('3'), L('5'),
+ M(596, 5), M(2066, 4), L('7'), M(2222, 4), L('9'), L('8'), M(1119, 4), L('9'),
+ L('3'), L('6'), L('3'), L('4'), M(1884, 4), L('7'), L('4'), L('3'),
+ L('2'), L('4'), M(1148, 4), L('1'), L('5'), L('0'), L('7'), L('6'),
+ M(1212, 4), L('7'), L('9'), L('4'), L('5'), L('1'), L('0'), L('9'),
+ M(63, 4), L('0'), L('9'), L('4'), L('0'), M(1703, 4), L('8'), L('8'),
+ L('7'), L('9'), L('7'), L('1'), L('0'), L('8'), L('9'), L('3'),
+ M(2289, 4), L('6'), L('9'), L('1'), L('3'), L('6'), L('8'), L('6'),
+ L('7'), L('2'), M(604, 4), M(511, 4), L('5'), M(1344, 4), M(1129, 4), M(2050, 4),
+ L('1'), L('7'), L('9'), L('2'), L('8'), L('6'), L('8'), M(2253, 4),
+ L('8'), L('7'), L('4'), L('7'), M(1951, 5), L('8'), L('2'), L('4'),
+ M(2427, 4), L('8'), M(604, 4), L('7'), L('1'), L('4'), L('9'), L('0'),
+ L('9'), L('6'), L('7'), L('5'), L('9'), L('8'), M(1776, 4), L('3'),
+ L('6'), L('5'), M(309, 4), L('8'), L('1'), M(93, 4), M(1862, 4), M(2359, 4),
+ L('6'), L('8'), L('2'), L('9'), M(1407, 4), L('8'), L('7'), L('2'),
+ L('2'), L('6'), L('5'), L('8'), L('8'), L('0'), M(1554, 4), L('5'),
+ M(586, 4), L('4'), L('2'), L('7'), L('0'), L('4'), L('7'), L('7'),
+ L('5'), L('5'), M(2079, 4), L('3'), L('7'), L('9'), L('6'), L('4'),
+ L('1'), L('4'), L('5'), L('1'), L('5'), L('2'), M(1534, 4), L('2'),
+ L('3'), L('4'), L('3'), L('6'), L('4'), L('5'), L('4'), M(1503, 4),
+ L('4'), L('4'), L('4'), L('7'), L('9'), L('5'), M(61, 4), M(1316, 4),
+ M(2279, 5), L('4'), L('1'), M(1323, 4), L('3'), M(773, 4), L('5'), L('2'),
+ L('3'), L('1'), M(2114, 5), L('1'), L('6'), L('6'), L('1'), M(2227, 4),
+ L('5'), L('9'), L('6'), L('9'), L('5'), L('3'), L('6'), L('2'),
+ L('3'), L('1'), L('4'), M(1536, 4), L('2'), L('4'), L('8'), L('4'),
+ L('9'), L('3'), L('7'), L('1'), L('8'), L('7'), L('1'), L('1'),
+ L('0'), L('1'), L('4'), L('5'), L('7'), L('6'), L('5'), L('4'),
+ M(1890, 4), L('0'), L('2'), L('7'), L('9'), L('9'), L('3'), L('4'),
+ L('4'), L('0'), L('3'), L('7'), L('4'), L('2'), L('0'), L('0'),
+ L('7'), M(2368, 4), L('7'), L('8'), L('5'), L('3'), L('9'), L('0'),
+ L('6'), L('2'), L('1'), L('9'), M(666, 5), M(838, 4), L('8'), L('4'),
+ L('7'), M(979, 5), L('8'), L('3'), L('3'), L('2'), L('1'), L('4'),
+ L('4'), L('5'), L('7'), L('1'), M(645, 4), M(1911, 4), L('4'), L('3'),
+ L('5'), L('0'), M(2345, 4), M(1129, 4), L('5'), L('3'), L('1'), L('9'),
+ L('1'), L('0'), L('4'), L('8'), L('4'), L('8'), L('1'), L('0'),
+ L('0'), L('5'), L('3'), L('7'), L('0'), L('6'), M(2237, 4), M(1438, 5),
+ M(1922, 5), L('1'), M(1370, 4), L('7'), M(796, 4), L('5'), M(2029, 4), M(1037, 4),
+ L('6'), L('3'), M(2013, 5), L('4'), M(2418, 4), M(847, 5), M(1014, 5), L('8'),
+ M(1326, 5), M(2184, 5), L('9'), M(392, 4), L('9'), L('1'), M(2255, 4), L('8'),
+ L('1'), L('4'), L('6'), L('7'), L('5'), L('1'), M(1580, 4), L('1'),
+ L('2'), L('3'), L('9'), M(426, 6), L('9'), L('0'), L('7'), L('1'),
+ L('8'), L('6'), L('4'), L('9'), L('4'), L('2'), L('3'), L('1'),
+ L('9'), L('6'), L('1'), L('5'), L('6'), M(493, 4), M(1725, 4), L('9'),
+ L('5'), M(2343, 4), M(1130, 4), M(284, 4), L('6'), L('0'), L('3'), L('8'),
+ M(2598, 4), M(368, 4), M(901, 4), L('6'), L('2'), M(1115, 4), L('5'), M(2125, 4),
+ L('6'), L('3'), L('8'), L('9'), L('3'), L('7'), L('7'), L('8'),
+ L('7'), M(2246, 4), M(249, 4), L('9'), L('7'), L('9'), L('2'), L('0'),
+ L('7'), L('7'), L('3'), M(1496, 4), L('2'), L('1'), L('8'), L('2'),
+ L('5'), L('6'), M(2016, 4), L('6'), L('6'), M(1751, 4), L('4'), L('2'),
+ M(1663, 5), L('6'), M(1767, 4), L('4'), L('4'), M(37, 4), L('5'), L('4'),
+ L('9'), L('2'), L('0'), L('2'), L('6'), L('0'), L('5'), M(2740, 4),
+ M(997, 5), L('2'), L('0'), L('1'), L('4'), L('9'), M(1235, 4), L('8'),
+ L('5'), L('0'), L('7'), L('3'), M(1434, 4), L('6'), L('6'), L('6'),
+ L('0'), M(405, 4), L('2'), L('4'), L('3'), L('4'), L('0'), M(136, 4),
+ L('0'), M(1900, 4), L('8'), L('6'), L('3'), M(2391, 4), M(2021, 4), M(1068, 4),
+ M(373, 4), L('5'), L('7'), L('9'), L('6'), L('2'), L('6'), L('8'),
+ L('5'), L('6'), M(321, 4), L('5'), L('0'), L('8'), M(1316, 4), L('5'),
+ L('8'), L('7'), L('9'), L('6'), L('9'), L('9'), M(1810, 4), L('5'),
+ L('7'), L('4'), M(2585, 4), L('8'), L('4'), L('0'), M(2228, 4), L('1'),
+ L('4'), L('5'), L('9'), L('1'), M(1933, 4), L('7'), L('0'), M(565, 4),
+ L('0'), L('1'), M(3048, 4), L('1'), L('2'), M(3189, 4), L('0'), M(964, 4),
+ L('3'), L('9'), M(2859, 4), M(275, 4), L('7'), L('1'), L('5'), M(945, 4),
+ L('4'), L('2'), L('0'), M(3059, 5), L('9'), M(3011, 4), L('0'), L('7'),
+ M(834, 4), M(1942, 4), M(2736, 4), M(3171, 4), L('2'), L('1'), M(2401, 4), L('2'),
+ L('5'), L('1'), M(1404, 4), M(2373, 4), L('9'), L('2'), M(435, 4), L('8'),
+ L('2'), L('6'), M(2919, 4), L('2'), M(633, 4), L('3'), L('2'), L('1'),
+ L('5'), L('7'), L('9'), L('1'), L('9'), L('8'), L('4'), L('1'),
+ L('4'), M(2172, 5), L('9'), L('1'), L('6'), L('4'), M(1769, 5), L('9'),
+ M(2905, 5), M(2268, 4), L('7'), L('2'), L('2'), M(802, 4), L('5'), M(2213, 4),
+ M(322, 4), L('9'), L('1'), L('0'), M(189, 4), M(3164, 4), L('5'), L('2'),
+ L('8'), L('0'), L('1'), L('7'), M(562, 4), L('7'), L('1'), L('2'),
+ M(2325, 4), L('8'), L('3'), L('2'), M(884, 4), L('1'), M(1418, 4), L('0'),
+ L('9'), L('3'), L('5'), L('3'), L('9'), L('6'), L('5'), L('7'),
+ M(1612, 4), L('1'), L('0'), L('8'), L('3'), M(106, 4), L('5'), L('1'),
+ M(1915, 4), M(3419, 4), L('1'), L('4'), L('4'), L('4'), L('2'), L('1'),
+ L('0'), L('0'), M(515, 4), L('0'), L('3'), M(413, 4), L('1'), L('1'),
+ L('0'), L('3'), M(3202, 4), M(10, 4), M(39, 4), M(1539, 6), L('5'), L('1'),
+ L('6'), M(1498, 4), M(2180, 5), M(2347, 4), L('5'), M(3139, 5), L('8'), L('5'),
+ L('1'), L('7'), L('1'), L('4'), L('3'), L('7'), M(1542, 4), M(110, 4),
+ L('1'), L('5'), L('5'), L('6'), L('5'), L('0'), L('8'), L('8'),
+ M(954, 4), L('9'), L('8'), L('9'), L('8'), L('5'), L('9'), L('9'),
+ L('8'), L('2'), L('3'), L('8'), M(464, 4), M(2491, 4), L('3'), M(365, 4),
+ M(1087, 4), M(2500, 4), L('8'), M(3590, 5), L('3'), L('2'), M(264, 4), L('5'),
+ M(774, 4), L('3'), M(459, 4), L('9'), M(1052, 4), L('9'), L('8'), M(2174, 4),
+ L('4'), M(3257, 4), L('7'), M(1612, 4), L('0'), L('7'), M(230, 4), L('4'),
+ L('8'), L('1'), L('4'), L('1'), M(1338, 4), L('8'), L('5'), L('9'),
+ L('4'), L('6'), L('1'), M(3018, 4), L('8'), L('0'),
+ },
+ },
+ TestCase{
+ .input = "huffman-rand-1k.input",
+ .want = "huffman-rand-1k.{s}.expect",
+ .want_no_input = "huffman-rand-1k.{s}.expect-noinput",
+ .tokens = &[_]Token{
+ L(0xf8), L(0x8b), L(0x96), L(0x76), L(0x48), L(0xd), L(0x85), L(0x94), L(0x25), L(0x80), L(0xaf), L(0xc2), L(0xfe), L(0x8d),
+ L(0xe8), L(0x20), L(0xeb), L(0x17), L(0x86), L(0xc9), L(0xb7), L(0xc5), L(0xde), L(0x6), L(0xea), L(0x7d), L(0x18), L(0x8b),
+ L(0xe7), L(0x3e), L(0x7), L(0xda), L(0xdf), L(0xff), L(0x6c), L(0x73), L(0xde), L(0xcc), L(0xe7), L(0x6d), L(0x8d), L(0x4),
+ L(0x19), L(0x49), L(0x7f), L(0x47), L(0x1f), L(0x48), L(0x15), L(0xb0), L(0xe8), L(0x9e), L(0xf2), L(0x31), L(0x59), L(0xde),
+ L(0x34), L(0xb4), L(0x5b), L(0xe5), L(0xe0), L(0x9), L(0x11), L(0x30), L(0xc2), L(0x88), L(0x5b), L(0x7c), L(0x5d), L(0x14),
+ L(0x13), L(0x6f), L(0x23), L(0xa9), L(0xd), L(0xbc), L(0x2d), L(0x23), L(0xbe), L(0xd9), L(0xed), L(0x75), L(0x4), L(0x6c),
+ L(0x99), L(0xdf), L(0xfd), L(0x70), L(0x66), L(0xe6), L(0xee), L(0xd9), L(0xb1), L(0x9e), L(0x6e), L(0x83), L(0x59), L(0xd5),
+ L(0xd4), L(0x80), L(0x59), L(0x98), L(0x77), L(0x89), L(0x43), L(0x38), L(0xc9), L(0xaf), L(0x30), L(0x32), L(0x9a), L(0x20),
+ L(0x1b), L(0x46), L(0x3d), L(0x67), L(0x6e), L(0xd7), L(0x72), L(0x9e), L(0x4e), L(0x21), L(0x4f), L(0xc6), L(0xe0), L(0xd4),
+ L(0x7b), L(0x4), L(0x8d), L(0xa5), L(0x3), L(0xf6), L(0x5), L(0x9b), L(0x6b), L(0xdc), L(0x2a), L(0x93), L(0x77), L(0x28),
+ L(0xfd), L(0xb4), L(0x62), L(0xda), L(0x20), L(0xe7), L(0x1f), L(0xab), L(0x6b), L(0x51), L(0x43), L(0x39), L(0x2f), L(0xa0),
+ L(0x92), L(0x1), L(0x6c), L(0x75), L(0x3e), L(0xf4), L(0x35), L(0xfd), L(0x43), L(0x2e), L(0xf7), L(0xa4), L(0x75), L(0xda),
+ L(0xea), L(0x9b), L(0xa), L(0x64), L(0xb), L(0xe0), L(0x23), L(0x29), L(0xbd), L(0xf7), L(0xe7), L(0x83), L(0x3c), L(0xfb),
+ L(0xdf), L(0xb3), L(0xae), L(0x4f), L(0xa4), L(0x47), L(0x55), L(0x99), L(0xde), L(0x2f), L(0x96), L(0x6e), L(0x1c), L(0x43),
+ L(0x4c), L(0x87), L(0xe2), L(0x7c), L(0xd9), L(0x5f), L(0x4c), L(0x7c), L(0xe8), L(0x90), L(0x3), L(0xdb), L(0x30), L(0x95),
+ L(0xd6), L(0x22), L(0xc), L(0x47), L(0xb8), L(0x4d), L(0x6b), L(0xbd), L(0x24), L(0x11), L(0xab), L(0x2c), L(0xd7), L(0xbe),
+ L(0x6e), L(0x7a), L(0xd6), L(0x8), L(0xa3), L(0x98), L(0xd8), L(0xdd), L(0x15), L(0x6a), L(0xfa), L(0x93), L(0x30), L(0x1),
+ L(0x25), L(0x1d), L(0xa2), L(0x74), L(0x86), L(0x4b), L(0x6a), L(0x95), L(0xe8), L(0xe1), L(0x4e), L(0xe), L(0x76), L(0xb9),
+ L(0x49), L(0xa9), L(0x5f), L(0xa0), L(0xa6), L(0x63), L(0x3c), L(0x7e), L(0x7e), L(0x20), L(0x13), L(0x4f), L(0xbb), L(0x66),
+ L(0x92), L(0xb8), L(0x2e), L(0xa4), L(0xfa), L(0x48), L(0xcb), L(0xae), L(0xb9), L(0x3c), L(0xaf), L(0xd3), L(0x1f), L(0xe1),
+ L(0xd5), L(0x8d), L(0x42), L(0x6d), L(0xf0), L(0xfc), L(0x8c), L(0xc), L(0x0), L(0xde), L(0x40), L(0xab), L(0x8b), L(0x47),
+ L(0x97), L(0x4e), L(0xa8), L(0xcf), L(0x8e), L(0xdb), L(0xa6), L(0x8b), L(0x20), L(0x9), L(0x84), L(0x7a), L(0x66), L(0xe5),
+ L(0x98), L(0x29), L(0x2), L(0x95), L(0xe6), L(0x38), L(0x32), L(0x60), L(0x3), L(0xe3), L(0x9a), L(0x1e), L(0x54), L(0xe8),
+ L(0x63), L(0x80), L(0x48), L(0x9c), L(0xe7), L(0x63), L(0x33), L(0x6e), L(0xa0), L(0x65), L(0x83), L(0xfa), L(0xc6), L(0xba),
+ L(0x7a), L(0x43), L(0x71), L(0x5), L(0xf5), L(0x68), L(0x69), L(0x85), L(0x9c), L(0xba), L(0x45), L(0xcd), L(0x6b), L(0xb),
+ L(0x19), L(0xd1), L(0xbb), L(0x7f), L(0x70), L(0x85), L(0x92), L(0xd1), L(0xb4), L(0x64), L(0x82), L(0xb1), L(0xe4), L(0x62),
+ L(0xc5), L(0x3c), L(0x46), L(0x1f), L(0x92), L(0x31), L(0x1c), L(0x4e), L(0x41), L(0x77), L(0xf7), L(0xe7), L(0x87), L(0xa2),
+ L(0xf), L(0x6e), L(0xe8), L(0x92), L(0x3), L(0x6b), L(0xa), L(0xe7), L(0xa9), L(0x3b), L(0x11), L(0xda), L(0x66), L(0x8a),
+ L(0x29), L(0xda), L(0x79), L(0xe1), L(0x64), L(0x8d), L(0xe3), L(0x54), L(0xd4), L(0xf5), L(0xef), L(0x64), L(0x87), L(0x3b),
+ L(0xf4), L(0xc2), L(0xf4), L(0x71), L(0x13), L(0xa9), L(0xe9), L(0xe0), L(0xa2), L(0x6), L(0x14), L(0xab), L(0x5d), L(0xa7),
+ L(0x96), L(0x0), L(0xd6), L(0xc3), L(0xcc), L(0x57), L(0xed), L(0x39), L(0x6a), L(0x25), L(0xcd), L(0x76), L(0xea), L(0xba),
+ L(0x3a), L(0xf2), L(0xa1), L(0x95), L(0x5d), L(0xe5), L(0x71), L(0xcf), L(0x9c), L(0x62), L(0x9e), L(0x6a), L(0xfa), L(0xd5),
+ L(0x31), L(0xd1), L(0xa8), L(0x66), L(0x30), L(0x33), L(0xaa), L(0x51), L(0x17), L(0x13), L(0x82), L(0x99), L(0xc8), L(0x14),
+ L(0x60), L(0x9f), L(0x4d), L(0x32), L(0x6d), L(0xda), L(0x19), L(0x26), L(0x21), L(0xdc), L(0x7e), L(0x2e), L(0x25), L(0x67),
+ L(0x72), L(0xca), L(0xf), L(0x92), L(0xcd), L(0xf6), L(0xd6), L(0xcb), L(0x97), L(0x8a), L(0x33), L(0x58), L(0x73), L(0x70),
+ L(0x91), L(0x1d), L(0xbf), L(0x28), L(0x23), L(0xa3), L(0xc), L(0xf1), L(0x83), L(0xc3), L(0xc8), L(0x56), L(0x77), L(0x68),
+ L(0xe3), L(0x82), L(0xba), L(0xb9), L(0x57), L(0x56), L(0x57), L(0x9c), L(0xc3), L(0xd6), L(0x14), L(0x5), L(0x3c), L(0xb1),
+ L(0xaf), L(0x93), L(0xc8), L(0x8a), L(0x57), L(0x7f), L(0x53), L(0xfa), L(0x2f), L(0xaa), L(0x6e), L(0x66), L(0x83), L(0xfa),
+ L(0x33), L(0xd1), L(0x21), L(0xab), L(0x1b), L(0x71), L(0xb4), L(0x7c), L(0xda), L(0xfd), L(0xfb), L(0x7f), L(0x20), L(0xab),
+ L(0x5e), L(0xd5), L(0xca), L(0xfd), L(0xdd), L(0xe0), L(0xee), L(0xda), L(0xba), L(0xa8), L(0x27), L(0x99), L(0x97), L(0x69),
+ L(0xc1), L(0x3c), L(0x82), L(0x8c), L(0xa), L(0x5c), L(0x2d), L(0x5b), L(0x88), L(0x3e), L(0x34), L(0x35), L(0x86), L(0x37),
+ L(0x46), L(0x79), L(0xe1), L(0xaa), L(0x19), L(0xfb), L(0xaa), L(0xde), L(0x15), L(0x9), L(0xd), L(0x1a), L(0x57), L(0xff),
+ L(0xb5), L(0xf), L(0xf3), L(0x2b), L(0x5a), L(0x6a), L(0x4d), L(0x19), L(0x77), L(0x71), L(0x45), L(0xdf), L(0x4f), L(0xb3),
+ L(0xec), L(0xf1), L(0xeb), L(0x18), L(0x53), L(0x3e), L(0x3b), L(0x47), L(0x8), L(0x9a), L(0x73), L(0xa0), L(0x5c), L(0x8c),
+ L(0x5f), L(0xeb), L(0xf), L(0x3a), L(0xc2), L(0x43), L(0x67), L(0xb4), L(0x66), L(0x67), L(0x80), L(0x58), L(0xe), L(0xc1),
+ L(0xec), L(0x40), L(0xd4), L(0x22), L(0x94), L(0xca), L(0xf9), L(0xe8), L(0x92), L(0xe4), L(0x69), L(0x38), L(0xbe), L(0x67),
+ L(0x64), L(0xca), L(0x50), L(0xc7), L(0x6), L(0x67), L(0x42), L(0x6e), L(0xa3), L(0xf0), L(0xb7), L(0x6c), L(0xf2), L(0xe8),
+ L(0x5f), L(0xb1), L(0xaf), L(0xe7), L(0xdb), L(0xbb), L(0x77), L(0xb5), L(0xf8), L(0xcb), L(0x8), L(0xc4), L(0x75), L(0x7e),
+ L(0xc0), L(0xf9), L(0x1c), L(0x7f), L(0x3c), L(0x89), L(0x2f), L(0xd2), L(0x58), L(0x3a), L(0xe2), L(0xf8), L(0x91), L(0xb6),
+ L(0x7b), L(0x24), L(0x27), L(0xe9), L(0xae), L(0x84), L(0x8b), L(0xde), L(0x74), L(0xac), L(0xfd), L(0xd9), L(0xb7), L(0x69),
+ L(0x2a), L(0xec), L(0x32), L(0x6f), L(0xf0), L(0x92), L(0x84), L(0xf1), L(0x40), L(0xc), L(0x8a), L(0xbc), L(0x39), L(0x6e),
+ L(0x2e), L(0x73), L(0xd4), L(0x6e), L(0x8a), L(0x74), L(0x2a), L(0xdc), L(0x60), L(0x1f), L(0xa3), L(0x7), L(0xde), L(0x75),
+ L(0x8b), L(0x74), L(0xc8), L(0xfe), L(0x63), L(0x75), L(0xf6), L(0x3d), L(0x63), L(0xac), L(0x33), L(0x89), L(0xc3), L(0xf0),
+ L(0xf8), L(0x2d), L(0x6b), L(0xb4), L(0x9e), L(0x74), L(0x8b), L(0x5c), L(0x33), L(0xb4), L(0xca), L(0xa8), L(0xe4), L(0x99),
+ L(0xb6), L(0x90), L(0xa1), L(0xef), L(0xf), L(0xd3), L(0x61), L(0xb2), L(0xc6), L(0x1a), L(0x94), L(0x7c), L(0x44), L(0x55),
+ L(0xf4), L(0x45), L(0xff), L(0x9e), L(0xa5), L(0x5a), L(0xc6), L(0xa0), L(0xe8), L(0x2a), L(0xc1), L(0x8d), L(0x6f), L(0x34),
+ L(0x11), L(0xb9), L(0xbe), L(0x4e), L(0xd9), L(0x87), L(0x97), L(0x73), L(0xcf), L(0x3d), L(0x23), L(0xae), L(0xd5), L(0x1a),
+ L(0x5e), L(0xae), L(0x5d), L(0x6a), L(0x3), L(0xf9), L(0x22), L(0xd), L(0x10), L(0xd9), L(0x47), L(0x69), L(0x15), L(0x3f),
+ L(0xee), L(0x52), L(0xa3), L(0x8), L(0xd2), L(0x3c), L(0x51), L(0xf4), L(0xf8), L(0x9d), L(0xe4), L(0x98), L(0x89), L(0xc8),
+ L(0x67), L(0x39), L(0xd5), L(0x5e), L(0x35), L(0x78), L(0x27), L(0xe8), L(0x3c), L(0x80), L(0xae), L(0x79), L(0x71), L(0xd2),
+ L(0x93), L(0xf4), L(0xaa), L(0x51), L(0x12), L(0x1c), L(0x4b), L(0x1b), L(0xe5), L(0x6e), L(0x15), L(0x6f), L(0xe4), L(0xbb),
+ L(0x51), L(0x9b), L(0x45), L(0x9f), L(0xf9), L(0xc4), L(0x8c), L(0x2a), L(0xfb), L(0x1a), L(0xdf), L(0x55), L(0xd3), L(0x48),
+ L(0x93), L(0x27), L(0x1), L(0x26), L(0xc2), L(0x6b), L(0x55), L(0x6d), L(0xa2), L(0xfb), L(0x84), L(0x8b), L(0xc9), L(0x9e),
+ L(0x28), L(0xc2), L(0xef), L(0x1a), L(0x24), L(0xec), L(0x9b), L(0xae), L(0xbd), L(0x60), L(0xe9), L(0x15), L(0x35), L(0xee),
+ L(0x42), L(0xa4), L(0x33), L(0x5b), L(0xfa), L(0xf), L(0xb6), L(0xf7), L(0x1), L(0xa6), L(0x2), L(0x4c), L(0xca), L(0x90),
+ L(0x58), L(0x3a), L(0x96), L(0x41), L(0xe7), L(0xcb), L(0x9), L(0x8c), L(0xdb), L(0x85), L(0x4d), L(0xa8), L(0x89), L(0xf3),
+ L(0xb5), L(0x8e), L(0xfd), L(0x75), L(0x5b), L(0x4f), L(0xed), L(0xde), L(0x3f), L(0xeb), L(0x38), L(0xa3), L(0xbe), L(0xb0),
+ L(0x73), L(0xfc), L(0xb8), L(0x54), L(0xf7), L(0x4c), L(0x30), L(0x67), L(0x2e), L(0x38), L(0xa2), L(0x54), L(0x18), L(0xba),
+ L(0x8), L(0xbf), L(0xf2), L(0x39), L(0xd5), L(0xfe), L(0xa5), L(0x41), L(0xc6), L(0x66), L(0x66), L(0xba), L(0x81), L(0xef),
+ L(0x67), L(0xe4), L(0xe6), L(0x3c), L(0xc), L(0xca), L(0xa4), L(0xa), L(0x79), L(0xb3), L(0x57), L(0x8b), L(0x8a), L(0x75),
+ L(0x98), L(0x18), L(0x42), L(0x2f), L(0x29), L(0xa3), L(0x82), L(0xef), L(0x9f), L(0x86), L(0x6), L(0x23), L(0xe1), L(0x75),
+ L(0xfa), L(0x8), L(0xb1), L(0xde), L(0x17), L(0x4a),
+ },
+ },
+ TestCase{
+ .input = "huffman-rand-limit.input",
+ .want = "huffman-rand-limit.{s}.expect",
+ .want_no_input = "huffman-rand-limit.{s}.expect-noinput",
+ .tokens = &[_]Token{
+ L(0x61), M(1, 74), L(0xa), L(0xf8), L(0x8b), L(0x96), L(0x76), L(0x48), L(0xa), L(0x85), L(0x94), L(0x25), L(0x80),
+ L(0xaf), L(0xc2), L(0xfe), L(0x8d), L(0xe8), L(0x20), L(0xeb), L(0x17), L(0x86), L(0xc9), L(0xb7), L(0xc5), L(0xde),
+ L(0x6), L(0xea), L(0x7d), L(0x18), L(0x8b), L(0xe7), L(0x3e), L(0x7), L(0xda), L(0xdf), L(0xff), L(0x6c), L(0x73),
+ L(0xde), L(0xcc), L(0xe7), L(0x6d), L(0x8d), L(0x4), L(0x19), L(0x49), L(0x7f), L(0x47), L(0x1f), L(0x48), L(0x15),
+ L(0xb0), L(0xe8), L(0x9e), L(0xf2), L(0x31), L(0x59), L(0xde), L(0x34), L(0xb4), L(0x5b), L(0xe5), L(0xe0), L(0x9),
+ L(0x11), L(0x30), L(0xc2), L(0x88), L(0x5b), L(0x7c), L(0x5d), L(0x14), L(0x13), L(0x6f), L(0x23), L(0xa9), L(0xa),
+ L(0xbc), L(0x2d), L(0x23), L(0xbe), L(0xd9), L(0xed), L(0x75), L(0x4), L(0x6c), L(0x99), L(0xdf), L(0xfd), L(0x70),
+ L(0x66), L(0xe6), L(0xee), L(0xd9), L(0xb1), L(0x9e), L(0x6e), L(0x83), L(0x59), L(0xd5), L(0xd4), L(0x80), L(0x59),
+ L(0x98), L(0x77), L(0x89), L(0x43), L(0x38), L(0xc9), L(0xaf), L(0x30), L(0x32), L(0x9a), L(0x20), L(0x1b), L(0x46),
+ L(0x3d), L(0x67), L(0x6e), L(0xd7), L(0x72), L(0x9e), L(0x4e), L(0x21), L(0x4f), L(0xc6), L(0xe0), L(0xd4), L(0x7b),
+ L(0x4), L(0x8d), L(0xa5), L(0x3), L(0xf6), L(0x5), L(0x9b), L(0x6b), L(0xdc), L(0x2a), L(0x93), L(0x77), L(0x28),
+ L(0xfd), L(0xb4), L(0x62), L(0xda), L(0x20), L(0xe7), L(0x1f), L(0xab), L(0x6b), L(0x51), L(0x43), L(0x39), L(0x2f),
+ L(0xa0), L(0x92), L(0x1), L(0x6c), L(0x75), L(0x3e), L(0xf4), L(0x35), L(0xfd), L(0x43), L(0x2e), L(0xf7), L(0xa4),
+ L(0x75), L(0xda), L(0xea), L(0x9b), L(0xa),
+ },
+ },
+ TestCase{
+ .input = "huffman-shifts.input",
+ .want = "huffman-shifts.{s}.expect",
+ .want_no_input = "huffman-shifts.{s}.expect-noinput",
+ .tokens = &[_]Token{
+ L('1'), L('0'), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258),
+ M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258),
+ M(2, 258), M(2, 76), L(0xd), L(0xa), L('2'), L('3'), M(2, 258), M(2, 258),
+ M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 258), M(2, 256),
+ },
+ },
+ TestCase{
+ .input = "huffman-text-shift.input",
+ .want = "huffman-text-shift.{s}.expect",
+ .want_no_input = "huffman-text-shift.{s}.expect-noinput",
+ .tokens = &[_]Token{
+ L('/'), L('/'), L('C'), L('o'), L('p'), L('y'), L('r'), L('i'),
+ L('g'), L('h'), L('t'), L('2'), L('0'), L('0'), L('9'), L('T'),
+ L('h'), L('G'), L('o'), L('A'), L('u'), L('t'), L('h'), L('o'),
+ L('r'), L('.'), L('A'), L('l'), L('l'), M(23, 5), L('r'), L('r'),
+ L('v'), L('d'), L('.'), L(0xd), L(0xa), L('/'), L('/'), L('U'),
+ L('o'), L('f'), L('t'), L('h'), L('i'), L('o'), L('u'), L('r'),
+ L('c'), L('c'), L('o'), L('d'), L('i'), L('g'), L('o'), L('v'),
+ L('r'), L('n'), L('d'), L('b'), L('y'), L('B'), L('S'), L('D'),
+ L('-'), L('t'), L('y'), L('l'), M(33, 4), L('l'), L('i'), L('c'),
+ L('n'), L('t'), L('h'), L('t'), L('c'), L('n'), L('b'), L('f'),
+ L('o'), L('u'), L('n'), L('d'), L('i'), L('n'), L('t'), L('h'),
+ L('L'), L('I'), L('C'), L('E'), L('N'), L('S'), L('E'), L('f'),
+ L('i'), L('l'), L('.'), L(0xd), L(0xa), L(0xd), L(0xa), L('p'),
+ L('c'), L('k'), L('g'), L('m'), L('i'), L('n'), M(11, 4), L('i'),
+ L('m'), L('p'), L('o'), L('r'), L('t'), L('"'), L('o'), L('"'),
+ M(13, 4), L('f'), L('u'), L('n'), L('c'), L('m'), L('i'), L('n'),
+ L('('), L(')'), L('{'), L(0xd), L(0xa), L(0x9), L('v'), L('r'),
+ L('b'), L('='), L('m'), L('k'), L('('), L('['), L(']'), L('b'),
+ L('y'), L('t'), L(','), L('6'), L('5'), L('5'), L('3'), L('5'),
+ L(')'), L(0xd), L(0xa), L(0x9), L('f'), L(','), L('_'), L(':'),
+ L('='), L('o'), L('.'), L('C'), L('r'), L('t'), L('('), L('"'),
+ L('h'), L('u'), L('f'), L('f'), L('m'), L('n'), L('-'), L('n'),
+ L('u'), L('l'), L('l'), L('-'), L('m'), L('x'), L('.'), L('i'),
+ L('n'), L('"'), M(34, 5), L('.'), L('W'), L('r'), L('i'), L('t'),
+ L('('), L('b'), L(')'), L(0xd), L(0xa), L('}'), L(0xd), L(0xa),
+ L('A'), L('B'), L('C'), L('D'), L('E'), L('F'), L('G'), L('H'),
+ L('I'), L('J'), L('K'), L('L'), L('M'), L('N'), L('O'), L('P'),
+ L('Q'), L('R'), L('S'), L('T'), L('U'), L('V'), L('X'), L('x'),
+ L('y'), L('z'), L('!'), L('"'), L('#'), L(0xc2), L(0xa4), L('%'),
+ L('&'), L('/'), L('?'), L('"'),
+ },
+ },
+ TestCase{
+ .input = "huffman-text.input",
+ .want = "huffman-text.{s}.expect",
+ .want_no_input = "huffman-text.{s}.expect-noinput",
+ .tokens = &[_]Token{
+ L('/'), L('/'), L(' '), L('z'), L('i'), L('g'), L(' '), L('v'),
+ L('0'), L('.'), L('1'), L('0'), L('.'), L('0'), L(0xa), L('/'),
+ L('/'), L(' '), L('c'), L('r'), L('e'), L('a'), L('t'), L('e'),
+ L(' '), L('a'), L(' '), L('f'), L('i'), L('l'), L('e'), M(5, 4),
+ L('l'), L('e'), L('d'), L(' '), L('w'), L('i'), L('t'), L('h'),
+ L(' '), L('0'), L('x'), L('0'), L('0'), L(0xa), L('c'), L('o'),
+ L('n'), L('s'), L('t'), L(' '), L('s'), L('t'), L('d'), L(' '),
+ L('='), L(' '), L('@'), L('i'), L('m'), L('p'), L('o'), L('r'),
+ L('t'), L('('), L('"'), L('s'), L('t'), L('d'), L('"'), L(')'),
+ L(';'), L(0xa), L(0xa), L('p'), L('u'), L('b'), L(' '), L('f'),
+ L('n'), L(' '), L('m'), L('a'), L('i'), L('n'), L('('), L(')'),
+ L(' '), L('!'), L('v'), L('o'), L('i'), L('d'), L(' '), L('{'),
+ L(0xa), L(' '), L(' '), L(' '), L(' '), L('v'), L('a'), L('r'),
+ L(' '), L('b'), L(' '), L('='), L(' '), L('['), L('1'), L(']'),
+ L('u'), L('8'), L('{'), L('0'), L('}'), L(' '), L('*'), L('*'),
+ L(' '), L('6'), L('5'), L('5'), L('3'), L('5'), L(';'), M(31, 5),
+ M(86, 6), L('f'), L(' '), L('='), L(' '), L('t'), L('r'), L('y'),
+ M(94, 4), L('.'), L('f'), L('s'), L('.'), L('c'), L('w'), L('d'),
+ L('('), L(')'), L('.'), M(144, 6), L('F'), L('i'), L('l'), L('e'),
+ L('('), M(43, 5), M(1, 4), L('"'), L('h'), L('u'), L('f'), L('f'),
+ L('m'), L('a'), L('n'), L('-'), L('n'), L('u'), L('l'), L('l'),
+ L('-'), L('m'), L('a'), L('x'), L('.'), L('i'), L('n'), L('"'),
+ L(','), M(31, 9), L('.'), L('{'), L(' '), L('.'), L('r'), L('e'),
+ L('a'), L('d'), M(79, 5), L('u'), L('e'), L(' '), L('}'), M(27, 6),
+ L(')'), M(108, 6), L('d'), L('e'), L('f'), L('e'), L('r'), L(' '),
+ L('f'), L('.'), L('c'), L('l'), L('o'), L('s'), L('e'), L('('),
+ M(183, 4), M(22, 4), L('_'), M(124, 7), L('f'), L('.'), L('w'), L('r'),
+ L('i'), L('t'), L('e'), L('A'), L('l'), L('l'), L('('), L('b'),
+ L('['), L('0'), L('.'), L('.'), L(']'), L(')'), L(';'), L(0xa),
+ L('}'), L(0xa),
+ },
+ },
+ TestCase{
+ .input = "huffman-zero.input",
+ .want = "huffman-zero.{s}.expect",
+ .want_no_input = "huffman-zero.{s}.expect-noinput",
+ .tokens = &[_]Token{ L(0x30), ml, M(1, 49) },
+ },
+ TestCase{
+ .input = "",
+ .want = "",
+ .want_no_input = "null-long-match.{s}.expect-noinput",
+ .tokens = &[_]Token{
+ L(0x0), ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml, ml,
+ ml, ml, ml, M(1, 8),
+ },
+ },
+ };
+};
diff --git a/lib/std/compress/deflate/testdata/huffman-null-max.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect
index c08165143f..c08165143f 100644
--- a/lib/std/compress/deflate/testdata/huffman-null-max.dyn.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-null-max.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect-noinput
index c08165143f..c08165143f 100644
--- a/lib/std/compress/deflate/testdata/huffman-null-max.dyn.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.dyn.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-null-max.golden b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.huff.expect
index db422ca398..db422ca398 100644
--- a/lib/std/compress/deflate/testdata/huffman-null-max.golden
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.huff.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-null-max.input b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.input
index 5dfddf075b..5dfddf075b 100644
--- a/lib/std/compress/deflate/testdata/huffman-null-max.input
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.input
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-null-max.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect
index c08165143f..c08165143f 100644
--- a/lib/std/compress/deflate/testdata/huffman-null-max.wb.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-null-max.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect-noinput
index c08165143f..c08165143f 100644
--- a/lib/std/compress/deflate/testdata/huffman-null-max.wb.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-null-max.wb.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-pi.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect
index e4396ac6fe..e4396ac6fe 100644
--- a/lib/std/compress/deflate/testdata/huffman-pi.dyn.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-pi.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect-noinput
index e4396ac6fe..e4396ac6fe 100644
--- a/lib/std/compress/deflate/testdata/huffman-pi.dyn.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-pi.dyn.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-pi.golden b/lib/std/compress/flate/testdata/block_writer/huffman-pi.huff.expect
index 23d8f7f98b..23d8f7f98b 100644
--- a/lib/std/compress/deflate/testdata/huffman-pi.golden
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-pi.huff.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-pi.input b/lib/std/compress/flate/testdata/block_writer/huffman-pi.input
index efaed43431..efaed43431 100644
--- a/lib/std/compress/deflate/testdata/huffman-pi.input
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-pi.input
diff --git a/lib/std/compress/deflate/testdata/huffman-pi.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect
index e4396ac6fe..e4396ac6fe 100644
--- a/lib/std/compress/deflate/testdata/huffman-pi.wb.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-pi.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect-noinput
index e4396ac6fe..e4396ac6fe 100644
--- a/lib/std/compress/deflate/testdata/huffman-pi.wb.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-pi.wb.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-1k.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect
index 09dc798ee3..09dc798ee3 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-1k.dyn.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-1k.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect-noinput
index 0c24742fde..0c24742fde 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-1k.dyn.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.dyn.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-1k.golden b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.huff.expect
index 09dc798ee3..09dc798ee3 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-1k.golden
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.huff.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-1k.input b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.input
index ce038ebb5b..ce038ebb5b 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-1k.input
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.input
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-1k.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect
index 09dc798ee3..09dc798ee3 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-1k.wb.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-1k.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect-noinput
index 0c24742fde..0c24742fde 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-1k.wb.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-1k.wb.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-limit.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect
index 2d6527934e..2d6527934e 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-limit.dyn.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-limit.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect-noinput
index 2d6527934e..2d6527934e 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-limit.dyn.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.dyn.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-limit.golden b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.huff.expect
index 57e59322e9..57e59322e9 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-limit.golden
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.huff.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-limit.input b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.input
index fb5b1be619..fb5b1be619 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-limit.input
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.input
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-limit.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect
index 881e59c9ab..881e59c9ab 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-limit.wb.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-limit.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect-noinput
index 881e59c9ab..881e59c9ab 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-limit.wb.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-limit.wb.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-max.golden b/lib/std/compress/flate/testdata/block_writer/huffman-rand-max.huff.expect
index 47d53c89c0..47d53c89c0 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-max.golden
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-max.huff.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-rand-max.input b/lib/std/compress/flate/testdata/block_writer/huffman-rand-max.input
index 8418633d2a..8418633d2a 100644
--- a/lib/std/compress/deflate/testdata/huffman-rand-max.input
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-rand-max.input
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-shifts.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect
index 7812c1c62d..7812c1c62d 100644
--- a/lib/std/compress/deflate/testdata/huffman-shifts.dyn.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-shifts.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect-noinput
index 7812c1c62d..7812c1c62d 100644
--- a/lib/std/compress/deflate/testdata/huffman-shifts.dyn.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.dyn.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-shifts.golden b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.huff.expect
index f5133778e1..f5133778e1 100644
--- a/lib/std/compress/deflate/testdata/huffman-shifts.golden
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.huff.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-shifts.input b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.input
index 7c7a50d158..7c7a50d158 100644
--- a/lib/std/compress/deflate/testdata/huffman-shifts.input
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.input
diff --git a/lib/std/compress/deflate/testdata/huffman-shifts.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect
index 7812c1c62d..7812c1c62d 100644
--- a/lib/std/compress/deflate/testdata/huffman-shifts.wb.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-shifts.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect-noinput
index 7812c1c62d..7812c1c62d 100644
--- a/lib/std/compress/deflate/testdata/huffman-shifts.wb.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-shifts.wb.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-text-shift.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect
index 71ce3aeb75..71ce3aeb75 100644
--- a/lib/std/compress/deflate/testdata/huffman-text-shift.dyn.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-text-shift.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect-noinput
index 71ce3aeb75..71ce3aeb75 100644
--- a/lib/std/compress/deflate/testdata/huffman-text-shift.dyn.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.dyn.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-text-shift.golden b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.huff.expect
index ff023114bb..ff023114bb 100644
--- a/lib/std/compress/deflate/testdata/huffman-text-shift.golden
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.huff.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-text-shift.input b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.input
index cc5c3ad69d..cc5c3ad69d 100644
--- a/lib/std/compress/deflate/testdata/huffman-text-shift.input
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.input
diff --git a/lib/std/compress/deflate/testdata/huffman-text-shift.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect
index 71ce3aeb75..71ce3aeb75 100644
--- a/lib/std/compress/deflate/testdata/huffman-text-shift.wb.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-text-shift.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect-noinput
index 71ce3aeb75..71ce3aeb75 100644
--- a/lib/std/compress/deflate/testdata/huffman-text-shift.wb.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text-shift.wb.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-text.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect
index fbffc3f36b..fbffc3f36b 100644
--- a/lib/std/compress/deflate/testdata/huffman-text.dyn.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-text.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect-noinput
index fbffc3f36b..fbffc3f36b 100644
--- a/lib/std/compress/deflate/testdata/huffman-text.dyn.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text.dyn.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-text.golden b/lib/std/compress/flate/testdata/block_writer/huffman-text.huff.expect
index 46fa51fdad..46fa51fdad 100644
--- a/lib/std/compress/deflate/testdata/huffman-text.golden
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text.huff.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-text.input b/lib/std/compress/flate/testdata/block_writer/huffman-text.input
index df97174253..df97174253 100644
--- a/lib/std/compress/deflate/testdata/huffman-text.input
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text.input
diff --git a/lib/std/compress/deflate/testdata/huffman-text.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect
index fbffc3f36b..fbffc3f36b 100644
--- a/lib/std/compress/deflate/testdata/huffman-text.wb.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-text.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect-noinput
index fbffc3f36b..fbffc3f36b 100644
--- a/lib/std/compress/deflate/testdata/huffman-text.wb.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-text.wb.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-zero.dyn.expect b/lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect
index 830348a79a..830348a79a 100644
--- a/lib/std/compress/deflate/testdata/huffman-zero.dyn.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-zero.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect-noinput
index 830348a79a..830348a79a 100644
--- a/lib/std/compress/deflate/testdata/huffman-zero.dyn.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-zero.dyn.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-zero.golden b/lib/std/compress/flate/testdata/block_writer/huffman-zero.huff.expect
index 5abdbaff9a..5abdbaff9a 100644
--- a/lib/std/compress/deflate/testdata/huffman-zero.golden
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-zero.huff.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-zero.input b/lib/std/compress/flate/testdata/block_writer/huffman-zero.input
index 349be0e6ec..349be0e6ec 100644
--- a/lib/std/compress/deflate/testdata/huffman-zero.input
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-zero.input
diff --git a/lib/std/compress/deflate/testdata/huffman-zero.wb.expect b/lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect
index dbe401c54c..dbe401c54c 100644
--- a/lib/std/compress/deflate/testdata/huffman-zero.wb.expect
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/huffman-zero.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect-noinput
index dbe401c54c..dbe401c54c 100644
--- a/lib/std/compress/deflate/testdata/huffman-zero.wb.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/huffman-zero.wb.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/null-long-match.dyn.expect-noinput b/lib/std/compress/flate/testdata/block_writer/null-long-match.dyn.expect-noinput
index 8b92d9fc20..8b92d9fc20 100644
--- a/lib/std/compress/deflate/testdata/null-long-match.dyn.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/null-long-match.dyn.expect-noinput
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/null-long-match.wb.expect-noinput b/lib/std/compress/flate/testdata/block_writer/null-long-match.wb.expect-noinput
index 8b92d9fc20..8b92d9fc20 100644
--- a/lib/std/compress/deflate/testdata/null-long-match.wb.expect-noinput
+++ b/lib/std/compress/flate/testdata/block_writer/null-long-match.wb.expect-noinput
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/bug_18966.expect b/lib/std/compress/flate/testdata/fuzz/bug_18966.expect
new file mode 100644
index 0000000000..b8741a644e
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/bug_18966.expect
@@ -0,0 +1,17 @@
+conditions,correction,exchange,id,participant_timestamp,price,sequence_number,sip_timestamp,size,tape,trf_id,trf_timestamp
+,,1,,0,9.92,1929969624145677,1063224024145000000,200,1,,0
+,,1,,0,9.86,1929967211195252,1063221611195000000,100,1,,0
+,,1,,0,9.86,1929967094672373,1063221494672000000,200,1,,0
+,,1,,0,9.85,1929967050174259,1063221450174000000,100,1,,0
+,,1,,0,9.85,1929967013672246,1063221413672000000,200,1,,0
+,,1,,0,9.85,1929966984671861,1063221384671000000,200,1,,0
+,,1,,0,9.9,1929966976177115,1063221376177000000,200,1,,0
+,,1,,0,9.85,1929966961186410,1063221361186000000,1000,1,,0
+,,1,,0,9.85,1929957130426170,1063211530426000000,100,1,,0
+,,1,,0,9.85,1929956938455804,1063211338455000000,100,1,,0
+,,1,,0,9.85,1929956786452149,1063211186452000000,300,1,,0
+,,1,,0,9.85,1929956658456522,1063211058456000000,500,1,,0
+,,1,,0,9.85,1929956650952287,1063211050952000000,300,1,,0
+,,1,,0,9.85,1929956528452578,1063210928452000000,1500,1,,0
+,,1,,0,9.85,1929950990948195,1063205390948000000,100,1,,0
+,,1,,0,9.85,1929947489432428,1063201889432000000,100,1,,0
diff --git a/lib/std/compress/flate/testdata/fuzz/bug_18966.input b/lib/std/compress/flate/testdata/fuzz/bug_18966.input
new file mode 100644
index 0000000000..25b47e8963
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/bug_18966.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/deflate-stream.expect b/lib/std/compress/flate/testdata/fuzz/deflate-stream.expect
new file mode 100644
index 0000000000..ab73c04108
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/deflate-stream.expect
@@ -0,0 +1,22 @@
+[
+ { id: "brieflz",
+ name: "BriefLZ",
+ libraryUrl: "https://github.com/jibsen/brieflz",
+ license: "MIT",
+ revision: "bcaa6a1ee7ccf005512b5c23aa92b40cf75f9ed1",
+ codecs: [ { name: "brieflz" } ], },
+ { id: "brotli",
+ name: "Brotli",
+ libraryUrl: "https://github.com/google/brotli",
+ license: "Apache 2.0",
+ revision: "1dd66ef114fd244778d9dcb5da09c28b49a0df33",
+ codecs: [ { name: "brotli",
+ levels: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
+ streaming: true } ], },
+ { id: "bsc",
+ name: "bsc",
+ libraryUrl: "http://libbsc.com/",
+ license: "Apache 2.0",
+ revision: "b2b07421381b19b2fada8b291f3cdead10578abc",
+ codecs: [ { name: "bsc" } ] }
+]
diff --git a/lib/std/compress/flate/testdata/fuzz/deflate-stream.input b/lib/std/compress/flate/testdata/fuzz/deflate-stream.input
new file mode 100644
index 0000000000..a0ed06f872
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/deflate-stream.input
@@ -0,0 +1,3 @@
+=o0+NڭRK}>W!AI@j+{
+|<Fx[ش\9f;P%0৷#iuVUWDQLYFtTGU_|SQD<M4)Xk%MeSdűK0 ]Ca s[v;ɕMSVJ@N5EatJNY$Y[eѤVs27ܺ8}wӆ.H1A` c3P W%uY@߮jN]$
+,<L4<Ksa2is#p1ZV4˵؎o \ No newline at end of file
diff --git a/lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet01.input b/lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet01.input
new file mode 100644
index 0000000000..9cd29e157f
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet01.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet02.input b/lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet02.input
new file mode 100644
index 0000000000..32a51d3180
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/empty-distance-alphabet02.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/end-of-stream.input b/lib/std/compress/flate/testdata/fuzz/end-of-stream.input
new file mode 100644
index 0000000000..05e05dc014
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/end-of-stream.input
@@ -0,0 +1 @@
+=o00 \ No newline at end of file
diff --git a/lib/std/compress/flate/testdata/fuzz/fuzz1.input b/lib/std/compress/flate/testdata/fuzz/fuzz1.input
new file mode 100644
index 0000000000..6854c5c0a2
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/fuzz1.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/fuzz2.input b/lib/std/compress/flate/testdata/fuzz/fuzz2.input
new file mode 100644
index 0000000000..e54aafb161
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/fuzz2.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/fuzz3.input b/lib/std/compress/flate/testdata/fuzz/fuzz3.input
new file mode 100644
index 0000000000..5b7c08ddf2
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/fuzz3.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/fuzz4.input b/lib/std/compress/flate/testdata/fuzz/fuzz4.input
new file mode 100644
index 0000000000..520c9543fc
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/fuzz4.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/invalid-distance.input b/lib/std/compress/flate/testdata/fuzz/invalid-distance.input
new file mode 100644
index 0000000000..fd05e41653
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/invalid-distance.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/invalid-tree01.input b/lib/std/compress/flate/testdata/fuzz/invalid-tree01.input
new file mode 100644
index 0000000000..2a6b1952bf
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/invalid-tree01.input
@@ -0,0 +1 @@
+000 \ No newline at end of file
diff --git a/lib/std/compress/flate/testdata/fuzz/invalid-tree02.input b/lib/std/compress/flate/testdata/fuzz/invalid-tree02.input
new file mode 100644
index 0000000000..a4725951a2
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/invalid-tree02.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/invalid-tree03.input b/lib/std/compress/flate/testdata/fuzz/invalid-tree03.input
new file mode 100644
index 0000000000..a6b335a105
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/invalid-tree03.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/lengths-overflow.input b/lib/std/compress/flate/testdata/fuzz/lengths-overflow.input
new file mode 100644
index 0000000000..af0620e4b3
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/lengths-overflow.input
@@ -0,0 +1 @@
+$9 \ No newline at end of file
diff --git a/lib/std/compress/flate/testdata/fuzz/out-of-codes.input b/lib/std/compress/flate/testdata/fuzz/out-of-codes.input
new file mode 100644
index 0000000000..86c8c7be02
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/out-of-codes.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff01.input b/lib/std/compress/flate/testdata/fuzz/puff01.input
new file mode 100644
index 0000000000..40b450dd9d
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff01.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff02.input b/lib/std/compress/flate/testdata/fuzz/puff02.input
new file mode 100644
index 0000000000..8aed0c90bd
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff02.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff03.input b/lib/std/compress/flate/testdata/fuzz/puff03.input
new file mode 100644
index 0000000000..3e5998658d
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff03.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff04.input b/lib/std/compress/flate/testdata/fuzz/puff04.input
new file mode 100644
index 0000000000..843dc9c1d1
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff04.input
@@ -0,0 +1 @@
+~ \ No newline at end of file
diff --git a/lib/std/compress/flate/testdata/fuzz/puff05.input b/lib/std/compress/flate/testdata/fuzz/puff05.input
new file mode 100644
index 0000000000..25cb955ba2
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff05.input
@@ -0,0 +1 @@
+ \ No newline at end of file
diff --git a/lib/std/compress/flate/testdata/fuzz/puff06.input b/lib/std/compress/flate/testdata/fuzz/puff06.input
new file mode 100644
index 0000000000..5750c4837b
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff06.input
@@ -0,0 +1 @@
+I$I$ \ No newline at end of file
diff --git a/lib/std/compress/flate/testdata/fuzz/puff07.input b/lib/std/compress/flate/testdata/fuzz/puff07.input
new file mode 100644
index 0000000000..de6786f0d2
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff07.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff08.input b/lib/std/compress/flate/testdata/fuzz/puff08.input
new file mode 100644
index 0000000000..19402ac49f
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff08.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff09.input b/lib/std/compress/flate/testdata/fuzz/puff09.input
new file mode 100644
index 0000000000..ef731eb5e7
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff09.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff10.input b/lib/std/compress/flate/testdata/fuzz/puff10.input
new file mode 100644
index 0000000000..bb26068639
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff10.input
@@ -0,0 +1 @@
+ \ No newline at end of file
diff --git a/lib/std/compress/flate/testdata/fuzz/puff11.input b/lib/std/compress/flate/testdata/fuzz/puff11.input
new file mode 100644
index 0000000000..138da04b61
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff11.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff12.input b/lib/std/compress/flate/testdata/fuzz/puff12.input
new file mode 100644
index 0000000000..cb896978fb
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff12.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff13.input b/lib/std/compress/flate/testdata/fuzz/puff13.input
new file mode 100644
index 0000000000..644f6437a1
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff13.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff14.input b/lib/std/compress/flate/testdata/fuzz/puff14.input
new file mode 100644
index 0000000000..e532a74ff7
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff14.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff15.input b/lib/std/compress/flate/testdata/fuzz/puff15.input
new file mode 100644
index 0000000000..f2d7e2529c
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff15.input
@@ -0,0 +1 @@
+I$I$Ä \ No newline at end of file
diff --git a/lib/std/compress/flate/testdata/fuzz/puff16.input b/lib/std/compress/flate/testdata/fuzz/puff16.input
new file mode 100644
index 0000000000..12d281499e
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff16.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff17.input b/lib/std/compress/flate/testdata/fuzz/puff17.input
new file mode 100644
index 0000000000..3f801b25c5
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff17.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff18.input b/lib/std/compress/flate/testdata/fuzz/puff18.input
new file mode 100644
index 0000000000..0621183f94
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff18.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff19.input b/lib/std/compress/flate/testdata/fuzz/puff19.input
new file mode 100644
index 0000000000..131352affc
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff19.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff20.input b/lib/std/compress/flate/testdata/fuzz/puff20.input
new file mode 100644
index 0000000000..9589f19c57
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff20.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff21.input b/lib/std/compress/flate/testdata/fuzz/puff21.input
new file mode 100644
index 0000000000..1d115a3bbf
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff21.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff22.input b/lib/std/compress/flate/testdata/fuzz/puff22.input
new file mode 100644
index 0000000000..71f0e31c3c
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff22.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff23.input b/lib/std/compress/flate/testdata/fuzz/puff23.input
new file mode 100644
index 0000000000..ff48a74c38
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff23.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff24.input b/lib/std/compress/flate/testdata/fuzz/puff24.input
new file mode 100644
index 0000000000..c0373b24a8
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff24.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff25.input b/lib/std/compress/flate/testdata/fuzz/puff25.input
new file mode 100644
index 0000000000..4422bcad42
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff25.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff26.input b/lib/std/compress/flate/testdata/fuzz/puff26.input
new file mode 100644
index 0000000000..23ddec41b5
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff26.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/puff27.input b/lib/std/compress/flate/testdata/fuzz/puff27.input
new file mode 100644
index 0000000000..f323679da3
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/puff27.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/roundtrip1.input b/lib/std/compress/flate/testdata/fuzz/roundtrip1.input
new file mode 100644
index 0000000000..4e3353d0fa
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/roundtrip1.input
Binary files differ
diff --git a/lib/std/compress/flate/testdata/fuzz/roundtrip2.input b/lib/std/compress/flate/testdata/fuzz/roundtrip2.input
new file mode 100644
index 0000000000..26216d59c0
--- /dev/null
+++ b/lib/std/compress/flate/testdata/fuzz/roundtrip2.input
Binary files differ
diff --git a/lib/std/compress/deflate/testdata/rfc1951.txt b/lib/std/compress/flate/testdata/rfc1951.txt
index 403c8c722f..403c8c722f 100644
--- a/lib/std/compress/deflate/testdata/rfc1951.txt
+++ b/lib/std/compress/flate/testdata/rfc1951.txt
diff --git a/lib/std/compress/gzip.zig b/lib/std/compress/gzip.zig
index 0576812a09..8bb09c612a 100644
--- a/lib/std/compress/gzip.zig
+++ b/lib/std/compress/gzip.zig
@@ -1,382 +1,66 @@
-//
-// Compressor/Decompressor for GZIP data streams (RFC1952)
+const deflate = @import("flate/deflate.zig");
+const inflate = @import("flate/inflate.zig");
-const std = @import("../std.zig");
-const io = std.io;
-const fs = std.fs;
-const testing = std.testing;
-const mem = std.mem;
-const deflate = std.compress.deflate;
-
-const magic = &[2]u8{ 0x1f, 0x8b };
-
-// Flags for the FLG field in the header
-const FTEXT = 1 << 0;
-const FHCRC = 1 << 1;
-const FEXTRA = 1 << 2;
-const FNAME = 1 << 3;
-const FCOMMENT = 1 << 4;
-
-const max_string_len = 1024;
-
-pub const Header = struct {
- extra: ?[]const u8 = null,
- filename: ?[]const u8 = null,
- comment: ?[]const u8 = null,
- modification_time: u32 = 0,
- operating_system: u8 = 255,
-};
-
-pub fn Decompress(comptime ReaderType: type) type {
- return struct {
- const Self = @This();
-
- pub const Error = ReaderType.Error ||
- deflate.Decompressor(ReaderType).Error ||
- error{ CorruptedData, WrongChecksum };
- pub const Reader = io.Reader(*Self, Error, read);
-
- allocator: mem.Allocator,
- inflater: deflate.Decompressor(ReaderType),
- in_reader: ReaderType,
- hasher: std.hash.Crc32,
- read_amt: u32,
-
- info: Header,
-
- fn init(allocator: mem.Allocator, in_reader: ReaderType) !Self {
- var hasher = std.compress.hashedReader(in_reader, std.hash.Crc32.init());
- const hashed_reader = hasher.reader();
-
- // gzip header format is specified in RFC1952
- const header = try hashed_reader.readBytesNoEof(10);
-
- // Check the ID1/ID2 fields
- if (!std.mem.eql(u8, header[0..2], magic))
- return error.BadHeader;
-
- const CM = header[2];
- // The CM field must be 8 to indicate the use of DEFLATE
- if (CM != 8) return error.InvalidCompression;
- // Flags
- const FLG = header[3];
- // Modification time, as a Unix timestamp.
- // If zero there's no timestamp available.
- const MTIME = mem.readInt(u32, header[4..8], .little);
- // Extra flags
- const XFL = header[8];
- // Operating system where the compression took place
- const OS = header[9];
- _ = XFL;
-
- const extra = if (FLG & FEXTRA != 0) blk: {
- const len = try hashed_reader.readInt(u16, .little);
- const tmp_buf = try allocator.alloc(u8, len);
- errdefer allocator.free(tmp_buf);
-
- try hashed_reader.readNoEof(tmp_buf);
- break :blk tmp_buf;
- } else null;
- errdefer if (extra) |p| allocator.free(p);
-
- const filename = if (FLG & FNAME != 0)
- try hashed_reader.readUntilDelimiterAlloc(allocator, 0, max_string_len)
- else
- null;
- errdefer if (filename) |p| allocator.free(p);
-
- const comment = if (FLG & FCOMMENT != 0)
- try hashed_reader.readUntilDelimiterAlloc(allocator, 0, max_string_len)
- else
- null;
- errdefer if (comment) |p| allocator.free(p);
-
- if (FLG & FHCRC != 0) {
- const hash = try in_reader.readInt(u16, .little);
- if (hash != @as(u16, @truncate(hasher.hasher.final())))
- return error.WrongChecksum;
- }
-
- return .{
- .allocator = allocator,
- .inflater = try deflate.decompressor(allocator, in_reader, null),
- .in_reader = in_reader,
- .hasher = std.hash.Crc32.init(),
- .info = .{
- .filename = filename,
- .comment = comment,
- .extra = extra,
- .modification_time = MTIME,
- .operating_system = OS,
- },
- .read_amt = 0,
- };
- }
-
- pub fn deinit(self: *Self) void {
- self.inflater.deinit();
- if (self.info.extra) |extra|
- self.allocator.free(extra);
- if (self.info.filename) |filename|
- self.allocator.free(filename);
- if (self.info.comment) |comment|
- self.allocator.free(comment);
- }
-
- /// Implements the io.Reader interface
- pub fn read(self: *Self, buffer: []u8) Error!usize {
- if (buffer.len == 0)
- return 0;
-
- // Read from the compressed stream and update the computed checksum
- const r = try self.inflater.read(buffer);
- if (r != 0) {
- self.hasher.update(buffer[0..r]);
- self.read_amt +%= @truncate(r);
- return r;
- }
-
- try self.inflater.close();
-
- // We've reached the end of stream, check if the checksum matches
- const hash = try self.in_reader.readInt(u32, .little);
- if (hash != self.hasher.final())
- return error.WrongChecksum;
-
- // The ISIZE field is the size of the uncompressed input modulo 2^32
- const input_size = try self.in_reader.readInt(u32, .little);
- if (self.read_amt != input_size)
- return error.CorruptedData;
-
- return 0;
- }
-
- pub fn reader(self: *Self) Reader {
- return .{ .context = self };
- }
- };
+/// Decompress compressed data from reader and write plain data to the writer.
+pub fn decompress(reader: anytype, writer: anytype) !void {
+ try inflate.decompress(.gzip, reader, writer);
}
-pub fn decompress(allocator: mem.Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
- return Decompress(@TypeOf(reader)).init(allocator, reader);
+/// Decompressor type
+pub fn Decompressor(comptime ReaderType: type) type {
+ return inflate.Inflate(.gzip, ReaderType);
}
-pub const CompressOptions = struct {
- header: Header = .{},
- hash_header: bool = true,
- level: deflate.Compression = .default_compression,
-};
-
-pub fn Compress(comptime WriterType: type) type {
- return struct {
- const Self = @This();
-
- pub const Error = WriterType.Error ||
- deflate.Compressor(WriterType).Error;
- pub const Writer = io.Writer(*Self, Error, write);
-
- allocator: mem.Allocator,
- deflater: deflate.Compressor(WriterType),
- out_writer: WriterType,
- hasher: std.hash.Crc32,
- write_amt: u32,
-
- fn init(allocator: mem.Allocator, out_writer: WriterType, options: CompressOptions) !Self {
- var hasher = std.compress.hashedWriter(out_writer, std.hash.Crc32.init());
- const hashed_writer = hasher.writer();
-
- // ID1/ID2
- try hashed_writer.writeAll(magic);
- // CM
- try hashed_writer.writeByte(8);
- // Flags
- try hashed_writer.writeByte(
- @as(u8, if (options.hash_header) FHCRC else 0) |
- @as(u8, if (options.header.extra) |_| FEXTRA else 0) |
- @as(u8, if (options.header.filename) |_| FNAME else 0) |
- @as(u8, if (options.header.comment) |_| FCOMMENT else 0),
- );
- // Modification time
- try hashed_writer.writeInt(u32, options.header.modification_time, .little);
- // Extra flags
- try hashed_writer.writeByte(0);
- // Operating system
- try hashed_writer.writeByte(options.header.operating_system);
-
- if (options.header.extra) |extra| {
- try hashed_writer.writeInt(u16, @intCast(extra.len), .little);
- try hashed_writer.writeAll(extra);
- }
-
- if (options.header.filename) |filename| {
- try hashed_writer.writeAll(filename);
- try hashed_writer.writeByte(0);
- }
-
- if (options.header.comment) |comment| {
- try hashed_writer.writeAll(comment);
- try hashed_writer.writeByte(0);
- }
-
- if (options.hash_header) {
- try out_writer.writeInt(
- u16,
- @truncate(hasher.hasher.final()),
- .little,
- );
- }
-
- return .{
- .allocator = allocator,
- .deflater = try deflate.compressor(allocator, out_writer, .{ .level = options.level }),
- .out_writer = out_writer,
- .hasher = std.hash.Crc32.init(),
- .write_amt = 0,
- };
- }
-
- pub fn deinit(self: *Self) void {
- self.deflater.deinit();
- }
-
- /// Implements the io.Writer interface
- pub fn write(self: *Self, buffer: []const u8) Error!usize {
- if (buffer.len == 0)
- return 0;
-
- // Write to the compressed stream and update the computed checksum
- const r = try self.deflater.write(buffer);
- self.hasher.update(buffer[0..r]);
- self.write_amt +%= @truncate(r);
- return r;
- }
-
- pub fn writer(self: *Self) Writer {
- return .{ .context = self };
- }
+/// Create Decompressor which will read compressed data from reader.
+pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) {
+ return inflate.decompressor(.gzip, reader);
+}
- pub fn flush(self: *Self) Error!void {
- try self.deflater.flush();
- }
+/// Compression level, trades between speed and compression size.
+pub const Options = deflate.Options;
- pub fn close(self: *Self) Error!void {
- try self.deflater.close();
- try self.out_writer.writeInt(u32, self.hasher.final(), .little);
- try self.out_writer.writeInt(u32, self.write_amt, .little);
- }
- };
+/// Compress plain data from reader and write compressed data to the writer.
+pub fn compress(reader: anytype, writer: anytype, options: Options) !void {
+ try deflate.compress(.gzip, reader, writer, options);
}
-pub fn compress(allocator: mem.Allocator, writer: anytype, options: CompressOptions) !Compress(@TypeOf(writer)) {
- return Compress(@TypeOf(writer)).init(allocator, writer, options);
+/// Compressor type
+pub fn Compressor(comptime WriterType: type) type {
+ return deflate.Compressor(.gzip, WriterType);
}
-fn testReader(expected: []const u8, data: []const u8) !void {
- var in_stream = io.fixedBufferStream(data);
-
- var gzip_stream = try decompress(testing.allocator, in_stream.reader());
- defer gzip_stream.deinit();
-
- // Read and decompress the whole file
- const buf = try gzip_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));
- defer testing.allocator.free(buf);
-
- // Check against the reference
- try testing.expectEqualSlices(u8, expected, buf);
+/// Create Compressor which outputs compressed data to the writer.
+pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) {
+ return try deflate.compressor(.gzip, writer, options);
}
-fn testWriter(expected: []const u8, data: []const u8, options: CompressOptions) !void {
- var actual = std.ArrayList(u8).init(testing.allocator);
- defer actual.deinit();
+/// Huffman only compression. Without Lempel-Ziv match searching. Faster
+/// compression, less memory requirements but bigger compressed sizes.
+pub const huffman = struct {
+ pub fn compress(reader: anytype, writer: anytype) !void {
+ try deflate.huffman.compress(.gzip, reader, writer);
+ }
- var gzip_stream = try compress(testing.allocator, actual.writer(), options);
- defer gzip_stream.deinit();
-
- // Write and compress the whole file
- try gzip_stream.writer().writeAll(data);
- try gzip_stream.close();
-
- // Check against the reference
- try testing.expectEqualSlices(u8, expected, actual.items);
-}
+ pub fn Compressor(comptime WriterType: type) type {
+ return deflate.huffman.Compressor(.gzip, WriterType);
+ }
-// All the test cases are obtained by compressing the RFC1952 text
-//
-// https://tools.ietf.org/rfc/rfc1952.txt length=25037 bytes
-// SHA256=164ef0897b4cbec63abf1b57f069f3599bd0fb7c72c2a4dee21bd7e03ec9af67
-test "compressed data" {
- const plain = @embedFile("testdata/rfc1952.txt");
- const compressed = @embedFile("testdata/rfc1952.txt.gz");
- try testReader(plain, compressed);
- try testWriter(compressed, plain, .{
- .header = .{
- .filename = "rfc1952.txt",
- .modification_time = 1706533053,
- .operating_system = 3,
- },
- });
-}
-
-test "sanity checks" {
- // Truncated header
- try testing.expectError(
- error.EndOfStream,
- testReader(undefined, &[_]u8{ 0x1f, 0x8B }),
- );
- // Wrong CM
- try testing.expectError(
- error.InvalidCompression,
- testReader(undefined, &[_]u8{
- 0x1f, 0x8b, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x03,
- }),
- );
- // Wrong checksum
- try testing.expectError(
- error.WrongChecksum,
- testReader(undefined, &[_]u8{
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01,
- 0x00, 0x00, 0x00, 0x00,
- }),
- );
- // Truncated checksum
- try testing.expectError(
- error.EndOfStream,
- testReader(undefined, &[_]u8{
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
- }),
- );
- // Wrong initial size
- try testing.expectError(
- error.CorruptedData,
- testReader(undefined, &[_]u8{
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x01,
- }),
- );
- // Truncated initial size field
- try testing.expectError(
- error.EndOfStream,
- testReader(undefined, &[_]u8{
- 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00,
- }),
- );
-}
+ pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) {
+ return deflate.huffman.compressor(.gzip, writer);
+ }
+};
-test "header checksum" {
- try testReader("", &[_]u8{
- // GZIP header
- 0x1f, 0x8b, 0x08, 0x12, 0x00, 0x09, 0x6e, 0x88, 0x00, 0xff, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00,
+// No compression store only. Compressed size is slightly bigger than plain.
+pub const store = struct {
+ pub fn compress(reader: anytype, writer: anytype) !void {
+ try deflate.store.compress(.gzip, reader, writer);
+ }
- // header.FHCRC (should cover entire header)
- 0x99, 0xd6,
+ pub fn Compressor(comptime WriterType: type) type {
+ return deflate.store.Compressor(.gzip, WriterType);
+ }
- // GZIP data
- 0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- });
-}
+ pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) {
+ return deflate.store.compressor(.gzip, writer);
+ }
+};
diff --git a/lib/std/compress/testdata/rfc1951.txt b/lib/std/compress/testdata/rfc1951.txt
deleted file mode 100644
index 403c8c722f..0000000000
--- a/lib/std/compress/testdata/rfc1951.txt
+++ /dev/null
@@ -1,955 +0,0 @@
-
-
-
-
-
-
-Network Working Group P. Deutsch
-Request for Comments: 1951 Aladdin Enterprises
-Category: Informational May 1996
-
-
- DEFLATE Compressed Data Format Specification version 1.3
-
-Status of This Memo
-
- This memo provides information for the Internet community. This memo
- does not specify an Internet standard of any kind. Distribution of
- this memo is unlimited.
-
-IESG Note:
-
- The IESG takes no position on the validity of any Intellectual
- Property Rights statements contained in this document.
-
-Notices
-
- Copyright (c) 1996 L. Peter Deutsch
-
- Permission is granted to copy and distribute this document for any
- purpose and without charge, including translations into other
- languages and incorporation into compilations, provided that the
- copyright notice and this notice are preserved, and that any
- substantive changes or deletions from the original are clearly
- marked.
-
- A pointer to the latest version of this and related documentation in
- HTML format can be found at the URL
- <ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
-
-Abstract
-
- This specification defines a lossless compressed data format that
- compresses data using a combination of the LZ77 algorithm and Huffman
- coding, with efficiency comparable to the best currently available
- general-purpose compression methods. The data can be produced or
- consumed, even for an arbitrarily long sequentially presented input
- data stream, using only an a priori bounded amount of intermediate
- storage. The format can be implemented readily in a manner not
- covered by patents.
-
-
-
-
-
-
-
-
-Deutsch Informational [Page 1]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
-Table of Contents
-
- 1. Introduction ................................................... 2
- 1.1. Purpose ................................................... 2
- 1.2. Intended audience ......................................... 3
- 1.3. Scope ..................................................... 3
- 1.4. Compliance ................................................ 3
- 1.5. Definitions of terms and conventions used ................ 3
- 1.6. Changes from previous versions ............................ 4
- 2. Compressed representation overview ............................. 4
- 3. Detailed specification ......................................... 5
- 3.1. Overall conventions ....................................... 5
- 3.1.1. Packing into bytes .................................. 5
- 3.2. Compressed block format ................................... 6
- 3.2.1. Synopsis of prefix and Huffman coding ............... 6
- 3.2.2. Use of Huffman coding in the "deflate" format ....... 7
- 3.2.3. Details of block format ............................. 9
- 3.2.4. Non-compressed blocks (BTYPE=00) ................... 11
- 3.2.5. Compressed blocks (length and distance codes) ...... 11
- 3.2.6. Compression with fixed Huffman codes (BTYPE=01) .... 12
- 3.2.7. Compression with dynamic Huffman codes (BTYPE=10) .. 13
- 3.3. Compliance ............................................... 14
- 4. Compression algorithm details ................................. 14
- 5. References .................................................... 16
- 6. Security Considerations ....................................... 16
- 7. Source code ................................................... 16
- 8. Acknowledgements .............................................. 16
- 9. Author's Address .............................................. 17
-
-1. Introduction
-
- 1.1. Purpose
-
- The purpose of this specification is to define a lossless
- compressed data format that:
- * Is independent of CPU type, operating system, file system,
- and character set, and hence can be used for interchange;
- * Can be produced or consumed, even for an arbitrarily long
- sequentially presented input data stream, using only an a
- priori bounded amount of intermediate storage, and hence
- can be used in data communications or similar structures
- such as Unix filters;
- * Compresses data with efficiency comparable to the best
- currently available general-purpose compression methods,
- and in particular considerably better than the "compress"
- program;
- * Can be implemented readily in a manner not covered by
- patents, and hence can be practiced freely;
-
-
-
-Deutsch Informational [Page 2]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- * Is compatible with the file format produced by the current
- widely used gzip utility, in that conforming decompressors
- will be able to read data produced by the existing gzip
- compressor.
-
- The data format defined by this specification does not attempt to:
-
- * Allow random access to compressed data;
- * Compress specialized data (e.g., raster graphics) as well
- as the best currently available specialized algorithms.
-
- A simple counting argument shows that no lossless compression
- algorithm can compress every possible input data set. For the
- format defined here, the worst case expansion is 5 bytes per 32K-
- byte block, i.e., a size increase of 0.015% for large data sets.
- English text usually compresses by a factor of 2.5 to 3;
- executable files usually compress somewhat less; graphical data
- such as raster images may compress much more.
-
- 1.2. Intended audience
-
- This specification is intended for use by implementors of software
- to compress data into "deflate" format and/or decompress data from
- "deflate" format.
-
- The text of the specification assumes a basic background in
- programming at the level of bits and other primitive data
- representations. Familiarity with the technique of Huffman coding
- is helpful but not required.
-
- 1.3. Scope
-
- The specification specifies a method for representing a sequence
- of bytes as a (usually shorter) sequence of bits, and a method for
- packing the latter bit sequence into bytes.
-
- 1.4. Compliance
-
- Unless otherwise indicated below, a compliant decompressor must be
- able to accept and decompress any data set that conforms to all
- the specifications presented here; a compliant compressor must
- produce data sets that conform to all the specifications presented
- here.
-
- 1.5. Definitions of terms and conventions used
-
- Byte: 8 bits stored or transmitted as a unit (same as an octet).
- For this specification, a byte is exactly 8 bits, even on machines
-
-
-
-Deutsch Informational [Page 3]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- which store a character on a number of bits different from eight.
- See below, for the numbering of bits within a byte.
-
- String: a sequence of arbitrary bytes.
-
- 1.6. Changes from previous versions
-
- There have been no technical changes to the deflate format since
- version 1.1 of this specification. In version 1.2, some
- terminology was changed. Version 1.3 is a conversion of the
- specification to RFC style.
-
-2. Compressed representation overview
-
- A compressed data set consists of a series of blocks, corresponding
- to successive blocks of input data. The block sizes are arbitrary,
- except that non-compressible blocks are limited to 65,535 bytes.
-
- Each block is compressed using a combination of the LZ77 algorithm
- and Huffman coding. The Huffman trees for each block are independent
- of those for previous or subsequent blocks; the LZ77 algorithm may
- use a reference to a duplicated string occurring in a previous block,
- up to 32K input bytes before.
-
- Each block consists of two parts: a pair of Huffman code trees that
- describe the representation of the compressed data part, and a
- compressed data part. (The Huffman trees themselves are compressed
- using Huffman encoding.) The compressed data consists of a series of
- elements of two types: literal bytes (of strings that have not been
- detected as duplicated within the previous 32K input bytes), and
- pointers to duplicated strings, where a pointer is represented as a
- pair <length, backward distance>. The representation used in the
- "deflate" format limits distances to 32K bytes and lengths to 258
- bytes, but does not limit the size of a block, except for
- uncompressible blocks, which are limited as noted above.
-
- Each type of value (literals, distances, and lengths) in the
- compressed data is represented using a Huffman code, using one code
- tree for literals and lengths and a separate code tree for distances.
- The code trees for each block appear in a compact form just before
- the compressed data for that block.
-
-
-
-
-
-
-
-
-
-
-Deutsch Informational [Page 4]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
-3. Detailed specification
-
- 3.1. Overall conventions In the diagrams below, a box like this:
-
- +---+
- | | <-- the vertical bars might be missing
- +---+
-
- represents one byte; a box like this:
-
- +==============+
- | |
- +==============+
-
- represents a variable number of bytes.
-
- Bytes stored within a computer do not have a "bit order", since
- they are always treated as a unit. However, a byte considered as
- an integer between 0 and 255 does have a most- and least-
- significant bit, and since we write numbers with the most-
- significant digit on the left, we also write bytes with the most-
- significant bit on the left. In the diagrams below, we number the
- bits of a byte so that bit 0 is the least-significant bit, i.e.,
- the bits are numbered:
-
- +--------+
- |76543210|
- +--------+
-
- Within a computer, a number may occupy multiple bytes. All
- multi-byte numbers in the format described here are stored with
- the least-significant byte first (at the lower memory address).
- For example, the decimal number 520 is stored as:
-
- 0 1
- +--------+--------+
- |00001000|00000010|
- +--------+--------+
- ^ ^
- | |
- | + more significant byte = 2 x 256
- + less significant byte = 8
-
- 3.1.1. Packing into bytes
-
- This document does not address the issue of the order in which
- bits of a byte are transmitted on a bit-sequential medium,
- since the final data format described here is byte- rather than
-
-
-
-Deutsch Informational [Page 5]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- bit-oriented. However, we describe the compressed block format
- in below, as a sequence of data elements of various bit
- lengths, not a sequence of bytes. We must therefore specify
- how to pack these data elements into bytes to form the final
- compressed byte sequence:
-
- * Data elements are packed into bytes in order of
- increasing bit number within the byte, i.e., starting
- with the least-significant bit of the byte.
- * Data elements other than Huffman codes are packed
- starting with the least-significant bit of the data
- element.
- * Huffman codes are packed starting with the most-
- significant bit of the code.
-
- In other words, if one were to print out the compressed data as
- a sequence of bytes, starting with the first byte at the
- *right* margin and proceeding to the *left*, with the most-
- significant bit of each byte on the left as usual, one would be
- able to parse the result from right to left, with fixed-width
- elements in the correct MSB-to-LSB order and Huffman codes in
- bit-reversed order (i.e., with the first bit of the code in the
- relative LSB position).
-
- 3.2. Compressed block format
-
- 3.2.1. Synopsis of prefix and Huffman coding
-
- Prefix coding represents symbols from an a priori known
- alphabet by bit sequences (codes), one code for each symbol, in
- a manner such that different symbols may be represented by bit
- sequences of different lengths, but a parser can always parse
- an encoded string unambiguously symbol-by-symbol.
-
- We define a prefix code in terms of a binary tree in which the
- two edges descending from each non-leaf node are labeled 0 and
- 1 and in which the leaf nodes correspond one-for-one with (are
- labeled with) the symbols of the alphabet; then the code for a
- symbol is the sequence of 0's and 1's on the edges leading from
- the root to the leaf labeled with that symbol. For example:
-
-
-
-
-
-
-
-
-
-
-
-Deutsch Informational [Page 6]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- /\ Symbol Code
- 0 1 ------ ----
- / \ A 00
- /\ B B 1
- 0 1 C 011
- / \ D 010
- A /\
- 0 1
- / \
- D C
-
- A parser can decode the next symbol from an encoded input
- stream by walking down the tree from the root, at each step
- choosing the edge corresponding to the next input bit.
-
- Given an alphabet with known symbol frequencies, the Huffman
- algorithm allows the construction of an optimal prefix code
- (one which represents strings with those symbol frequencies
- using the fewest bits of any possible prefix codes for that
- alphabet). Such a code is called a Huffman code. (See
- reference [1] in Chapter 5, references for additional
- information on Huffman codes.)
-
- Note that in the "deflate" format, the Huffman codes for the
- various alphabets must not exceed certain maximum code lengths.
- This constraint complicates the algorithm for computing code
- lengths from symbol frequencies. Again, see Chapter 5,
- references for details.
-
- 3.2.2. Use of Huffman coding in the "deflate" format
-
- The Huffman codes used for each alphabet in the "deflate"
- format have two additional rules:
-
- * All codes of a given bit length have lexicographically
- consecutive values, in the same order as the symbols
- they represent;
-
- * Shorter codes lexicographically precede longer codes.
-
-
-
-
-
-
-
-
-
-
-
-
-Deutsch Informational [Page 7]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- We could recode the example above to follow this rule as
- follows, assuming that the order of the alphabet is ABCD:
-
- Symbol Code
- ------ ----
- A 10
- B 0
- C 110
- D 111
-
- I.e., 0 precedes 10 which precedes 11x, and 110 and 111 are
- lexicographically consecutive.
-
- Given this rule, we can define the Huffman code for an alphabet
- just by giving the bit lengths of the codes for each symbol of
- the alphabet in order; this is sufficient to determine the
- actual codes. In our example, the code is completely defined
- by the sequence of bit lengths (2, 1, 3, 3). The following
- algorithm generates the codes as integers, intended to be read
- from most- to least-significant bit. The code lengths are
- initially in tree[I].Len; the codes are produced in
- tree[I].Code.
-
- 1) Count the number of codes for each code length. Let
- bl_count[N] be the number of codes of length N, N >= 1.
-
- 2) Find the numerical value of the smallest code for each
- code length:
-
- code = 0;
- bl_count[0] = 0;
- for (bits = 1; bits <= MAX_BITS; bits++) {
- code = (code + bl_count[bits-1]) << 1;
- next_code[bits] = code;
- }
-
- 3) Assign numerical values to all codes, using consecutive
- values for all codes of the same length with the base
- values determined at step 2. Codes that are never used
- (which have a bit length of zero) must not be assigned a
- value.
-
- for (n = 0; n <= max_code; n++) {
- len = tree[n].Len;
- if (len != 0) {
- tree[n].Code = next_code[len];
- next_code[len]++;
- }
-
-
-
-Deutsch Informational [Page 8]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- }
-
- Example:
-
- Consider the alphabet ABCDEFGH, with bit lengths (3, 3, 3, 3,
- 3, 2, 4, 4). After step 1, we have:
-
- N bl_count[N]
- - -----------
- 2 1
- 3 5
- 4 2
-
- Step 2 computes the following next_code values:
-
- N next_code[N]
- - ------------
- 1 0
- 2 0
- 3 2
- 4 14
-
- Step 3 produces the following code values:
-
- Symbol Length Code
- ------ ------ ----
- A 3 010
- B 3 011
- C 3 100
- D 3 101
- E 3 110
- F 2 00
- G 4 1110
- H 4 1111
-
- 3.2.3. Details of block format
-
- Each block of compressed data begins with 3 header bits
- containing the following data:
-
- first bit BFINAL
- next 2 bits BTYPE
-
- Note that the header bits do not necessarily begin on a byte
- boundary, since a block does not necessarily occupy an integral
- number of bytes.
-
-
-
-
-
-Deutsch Informational [Page 9]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- BFINAL is set if and only if this is the last block of the data
- set.
-
- BTYPE specifies how the data are compressed, as follows:
-
- 00 - no compression
- 01 - compressed with fixed Huffman codes
- 10 - compressed with dynamic Huffman codes
- 11 - reserved (error)
-
- The only difference between the two compressed cases is how the
- Huffman codes for the literal/length and distance alphabets are
- defined.
-
- In all cases, the decoding algorithm for the actual data is as
- follows:
-
- do
- read block header from input stream.
- if stored with no compression
- skip any remaining bits in current partially
- processed byte
- read LEN and NLEN (see next section)
- copy LEN bytes of data to output
- otherwise
- if compressed with dynamic Huffman codes
- read representation of code trees (see
- subsection below)
- loop (until end of block code recognized)
- decode literal/length value from input stream
- if value < 256
- copy value (literal byte) to output stream
- otherwise
- if value = end of block (256)
- break from loop
- otherwise (value = 257..285)
- decode distance from input stream
-
- move backwards distance bytes in the output
- stream, and copy length bytes from this
- position to the output stream.
- end loop
- while not last block
-
- Note that a duplicated string reference may refer to a string
- in a previous block; i.e., the backward distance may cross one
- or more block boundaries. However a distance cannot refer past
- the beginning of the output stream. (An application using a
-
-
-
-Deutsch Informational [Page 10]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- preset dictionary might discard part of the output stream; a
- distance can refer to that part of the output stream anyway)
- Note also that the referenced string may overlap the current
- position; for example, if the last 2 bytes decoded have values
- X and Y, a string reference with <length = 5, distance = 2>
- adds X,Y,X,Y,X to the output stream.
-
- We now specify each compression method in turn.
-
- 3.2.4. Non-compressed blocks (BTYPE=00)
-
- Any bits of input up to the next byte boundary are ignored.
- The rest of the block consists of the following information:
-
- 0 1 2 3 4...
- +---+---+---+---+================================+
- | LEN | NLEN |... LEN bytes of literal data...|
- +---+---+---+---+================================+
-
- LEN is the number of data bytes in the block. NLEN is the
- one's complement of LEN.
-
- 3.2.5. Compressed blocks (length and distance codes)
-
- As noted above, encoded data blocks in the "deflate" format
- consist of sequences of symbols drawn from three conceptually
- distinct alphabets: either literal bytes, from the alphabet of
- byte values (0..255), or <length, backward distance> pairs,
- where the length is drawn from (3..258) and the distance is
- drawn from (1..32,768). In fact, the literal and length
- alphabets are merged into a single alphabet (0..285), where
- values 0..255 represent literal bytes, the value 256 indicates
- end-of-block, and values 257..285 represent length codes
- (possibly in conjunction with extra bits following the symbol
- code) as follows:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Deutsch Informational [Page 11]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- Extra Extra Extra
- Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
- ---- ---- ------ ---- ---- ------- ---- ---- -------
- 257 0 3 267 1 15,16 277 4 67-82
- 258 0 4 268 1 17,18 278 4 83-98
- 259 0 5 269 2 19-22 279 4 99-114
- 260 0 6 270 2 23-26 280 4 115-130
- 261 0 7 271 2 27-30 281 5 131-162
- 262 0 8 272 2 31-34 282 5 163-194
- 263 0 9 273 3 35-42 283 5 195-226
- 264 0 10 274 3 43-50 284 5 227-257
- 265 1 11,12 275 3 51-58 285 0 258
- 266 1 13,14 276 3 59-66
-
- The extra bits should be interpreted as a machine integer
- stored with the most-significant bit first, e.g., bits 1110
- represent the value 14.
-
- Extra Extra Extra
- Code Bits Dist Code Bits Dist Code Bits Distance
- ---- ---- ---- ---- ---- ------ ---- ---- --------
- 0 0 1 10 4 33-48 20 9 1025-1536
- 1 0 2 11 4 49-64 21 9 1537-2048
- 2 0 3 12 5 65-96 22 10 2049-3072
- 3 0 4 13 5 97-128 23 10 3073-4096
- 4 1 5,6 14 6 129-192 24 11 4097-6144
- 5 1 7,8 15 6 193-256 25 11 6145-8192
- 6 2 9-12 16 7 257-384 26 12 8193-12288
- 7 2 13-16 17 7 385-512 27 12 12289-16384
- 8 3 17-24 18 8 513-768 28 13 16385-24576
- 9 3 25-32 19 8 769-1024 29 13 24577-32768
-
- 3.2.6. Compression with fixed Huffman codes (BTYPE=01)
-
- The Huffman codes for the two alphabets are fixed, and are not
- represented explicitly in the data. The Huffman code lengths
- for the literal/length alphabet are:
-
- Lit Value Bits Codes
- --------- ---- -----
- 0 - 143 8 00110000 through
- 10111111
- 144 - 255 9 110010000 through
- 111111111
- 256 - 279 7 0000000 through
- 0010111
- 280 - 287 8 11000000 through
- 11000111
-
-
-
-Deutsch Informational [Page 12]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- The code lengths are sufficient to generate the actual codes,
- as described above; we show the codes in the table for added
- clarity. Literal/length values 286-287 will never actually
- occur in the compressed data, but participate in the code
- construction.
-
- Distance codes 0-31 are represented by (fixed-length) 5-bit
- codes, with possible additional bits as shown in the table
- shown in Paragraph 3.2.5, above. Note that distance codes 30-
- 31 will never actually occur in the compressed data.
-
- 3.2.7. Compression with dynamic Huffman codes (BTYPE=10)
-
- The Huffman codes for the two alphabets appear in the block
- immediately after the header bits and before the actual
- compressed data, first the literal/length code and then the
- distance code. Each code is defined by a sequence of code
- lengths, as discussed in Paragraph 3.2.2, above. For even
- greater compactness, the code length sequences themselves are
- compressed using a Huffman code. The alphabet for code lengths
- is as follows:
-
- 0 - 15: Represent code lengths of 0 - 15
- 16: Copy the previous code length 3 - 6 times.
- The next 2 bits indicate repeat length
- (0 = 3, ... , 3 = 6)
- Example: Codes 8, 16 (+2 bits 11),
- 16 (+2 bits 10) will expand to
- 12 code lengths of 8 (1 + 6 + 5)
- 17: Repeat a code length of 0 for 3 - 10 times.
- (3 bits of length)
- 18: Repeat a code length of 0 for 11 - 138 times
- (7 bits of length)
-
- A code length of 0 indicates that the corresponding symbol in
- the literal/length or distance alphabet will not occur in the
- block, and should not participate in the Huffman code
- construction algorithm given earlier. If only one distance
- code is used, it is encoded using one bit, not zero bits; in
- this case there is a single code length of one, with one unused
- code. One distance code of zero bits means that there are no
- distance codes used at all (the data is all literals).
-
- We can now define the format of the block:
-
- 5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286)
- 5 Bits: HDIST, # of Distance codes - 1 (1 - 32)
- 4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19)
-
-
-
-Deutsch Informational [Page 13]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- (HCLEN + 4) x 3 bits: code lengths for the code length
- alphabet given just above, in the order: 16, 17, 18,
- 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
-
- These code lengths are interpreted as 3-bit integers
- (0-7); as above, a code length of 0 means the
- corresponding symbol (literal/length or distance code
- length) is not used.
-
- HLIT + 257 code lengths for the literal/length alphabet,
- encoded using the code length Huffman code
-
- HDIST + 1 code lengths for the distance alphabet,
- encoded using the code length Huffman code
-
- The actual compressed data of the block,
- encoded using the literal/length and distance Huffman
- codes
-
- The literal/length symbol 256 (end of data),
- encoded using the literal/length Huffman code
-
- The code length repeat codes can cross from HLIT + 257 to the
- HDIST + 1 code lengths. In other words, all code lengths form
- a single sequence of HLIT + HDIST + 258 values.
-
- 3.3. Compliance
-
- A compressor may limit further the ranges of values specified in
- the previous section and still be compliant; for example, it may
- limit the range of backward pointers to some value smaller than
- 32K. Similarly, a compressor may limit the size of blocks so that
- a compressible block fits in memory.
-
- A compliant decompressor must accept the full range of possible
- values defined in the previous section, and must accept blocks of
- arbitrary size.
-
-4. Compression algorithm details
-
- While it is the intent of this document to define the "deflate"
- compressed data format without reference to any particular
- compression algorithm, the format is related to the compressed
- formats produced by LZ77 (Lempel-Ziv 1977, see reference [2] below);
- since many variations of LZ77 are patented, it is strongly
- recommended that the implementor of a compressor follow the general
- algorithm presented here, which is known not to be patented per se.
- The material in this section is not part of the definition of the
-
-
-
-Deutsch Informational [Page 14]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
- specification per se, and a compressor need not follow it in order to
- be compliant.
-
- The compressor terminates a block when it determines that starting a
- new block with fresh trees would be useful, or when the block size
- fills up the compressor's block buffer.
-
- The compressor uses a chained hash table to find duplicated strings,
- using a hash function that operates on 3-byte sequences. At any
- given point during compression, let XYZ be the next 3 input bytes to
- be examined (not necessarily all different, of course). First, the
- compressor examines the hash chain for XYZ. If the chain is empty,
- the compressor simply writes out X as a literal byte and advances one
- byte in the input. If the hash chain is not empty, indicating that
- the sequence XYZ (or, if we are unlucky, some other 3 bytes with the
- same hash function value) has occurred recently, the compressor
- compares all strings on the XYZ hash chain with the actual input data
- sequence starting at the current point, and selects the longest
- match.
-
- The compressor searches the hash chains starting with the most recent
- strings, to favor small distances and thus take advantage of the
- Huffman encoding. The hash chains are singly linked. There are no
- deletions from the hash chains; the algorithm simply discards matches
- that are too old. To avoid a worst-case situation, very long hash
- chains are arbitrarily truncated at a certain length, determined by a
- run-time parameter.
-
- To improve overall compression, the compressor optionally defers the
- selection of matches ("lazy matching"): after a match of length N has
- been found, the compressor searches for a longer match starting at
- the next input byte. If it finds a longer match, it truncates the
- previous match to a length of one (thus producing a single literal
- byte) and then emits the longer match. Otherwise, it emits the
- original match, and, as described above, advances N bytes before
- continuing.
-
- Run-time parameters also control this "lazy match" procedure. If
- compression ratio is most important, the compressor attempts a
- complete second search regardless of the length of the first match.
- In the normal case, if the current match is "long enough", the
- compressor reduces the search for a longer match, thus speeding up
- the process. If speed is most important, the compressor inserts new
- strings in the hash table only when no match was found, or when the
- match is not "too long". This degrades the compression ratio but
- saves time since there are both fewer insertions and fewer searches.
-
-
-
-
-
-Deutsch Informational [Page 15]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
-5. References
-
- [1] Huffman, D. A., "A Method for the Construction of Minimum
- Redundancy Codes", Proceedings of the Institute of Radio
- Engineers, September 1952, Volume 40, Number 9, pp. 1098-1101.
-
- [2] Ziv J., Lempel A., "A Universal Algorithm for Sequential Data
- Compression", IEEE Transactions on Information Theory, Vol. 23,
- No. 3, pp. 337-343.
-
- [3] Gailly, J.-L., and Adler, M., ZLIB documentation and sources,
- available in ftp://ftp.uu.net/pub/archiving/zip/doc/
-
- [4] Gailly, J.-L., and Adler, M., GZIP documentation and sources,
- available as gzip-*.tar in ftp://prep.ai.mit.edu/pub/gnu/
-
- [5] Schwartz, E. S., and Kallick, B. "Generating a canonical prefix
- encoding." Comm. ACM, 7,3 (Mar. 1964), pp. 166-169.
-
- [6] Hirschberg and Lelewer, "Efficient decoding of prefix codes,"
- Comm. ACM, 33,4, April 1990, pp. 449-459.
-
-6. Security Considerations
-
- Any data compression method involves the reduction of redundancy in
- the data. Consequently, any corruption of the data is likely to have
- severe effects and be difficult to correct. Uncompressed text, on
- the other hand, will probably still be readable despite the presence
- of some corrupted bytes.
-
- It is recommended that systems using this data format provide some
- means of validating the integrity of the compressed data. See
- reference [3], for example.
-
-7. Source code
-
- Source code for a C language implementation of a "deflate" compliant
- compressor and decompressor is available within the zlib package at
- ftp://ftp.uu.net/pub/archiving/zip/zlib/.
-
-8. Acknowledgements
-
- Trademarks cited in this document are the property of their
- respective owners.
-
- Phil Katz designed the deflate format. Jean-Loup Gailly and Mark
- Adler wrote the related software described in this specification.
- Glenn Randers-Pehrson converted this document to RFC and HTML format.
-
-
-
-Deutsch Informational [Page 16]
-
-RFC 1951 DEFLATE Compressed Data Format Specification May 1996
-
-
-9. Author's Address
-
- L. Peter Deutsch
- Aladdin Enterprises
- 203 Santa Margarita Ave.
- Menlo Park, CA 94025
-
- Phone: (415) 322-0103 (AM only)
- FAX: (415) 322-1734
- EMail: <ghost@aladdin.com>
-
- Questions about the technical content of this specification can be
- sent by email to:
-
- Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
- Mark Adler <madler@alumni.caltech.edu>
-
- Editorial comments on this specification can be sent by email to:
-
- L. Peter Deutsch <ghost@aladdin.com> and
- Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Deutsch Informational [Page 17]
-
diff --git a/lib/std/compress/testdata/rfc1951.txt.fixed.z.9 b/lib/std/compress/testdata/rfc1951.txt.fixed.z.9
deleted file mode 100644
index 8ea5904770..0000000000
--- a/lib/std/compress/testdata/rfc1951.txt.fixed.z.9
+++ /dev/null
Binary files differ
diff --git a/lib/std/compress/testdata/rfc1951.txt.z.0 b/lib/std/compress/testdata/rfc1951.txt.z.0
deleted file mode 100644
index 3f50fb68f8..0000000000
--- a/lib/std/compress/testdata/rfc1951.txt.z.0
+++ /dev/null
Binary files differ
diff --git a/lib/std/compress/testdata/rfc1951.txt.z.9 b/lib/std/compress/testdata/rfc1951.txt.z.9
deleted file mode 100644
index 84e7cbe5b7..0000000000
--- a/lib/std/compress/testdata/rfc1951.txt.z.9
+++ /dev/null
Binary files differ
diff --git a/lib/std/compress/testdata/rfc1952.txt b/lib/std/compress/testdata/rfc1952.txt
deleted file mode 100644
index a8e51b4567..0000000000
--- a/lib/std/compress/testdata/rfc1952.txt
+++ /dev/null
@@ -1,675 +0,0 @@
-
-
-
-
-
-
-Network Working Group P. Deutsch
-Request for Comments: 1952 Aladdin Enterprises
-Category: Informational May 1996
-
-
- GZIP file format specification version 4.3
-
-Status of This Memo
-
- This memo provides information for the Internet community. This memo
- does not specify an Internet standard of any kind. Distribution of
- this memo is unlimited.
-
-IESG Note:
-
- The IESG takes no position on the validity of any Intellectual
- Property Rights statements contained in this document.
-
-Notices
-
- Copyright (c) 1996 L. Peter Deutsch
-
- Permission is granted to copy and distribute this document for any
- purpose and without charge, including translations into other
- languages and incorporation into compilations, provided that the
- copyright notice and this notice are preserved, and that any
- substantive changes or deletions from the original are clearly
- marked.
-
- A pointer to the latest version of this and related documentation in
- HTML format can be found at the URL
- <ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
-
-Abstract
-
- This specification defines a lossless compressed data format that is
- compatible with the widely used GZIP utility. The format includes a
- cyclic redundancy check value for detecting data corruption. The
- format presently uses the DEFLATE method of compression but can be
- easily extended to use other compression methods. The format can be
- implemented readily in a manner not covered by patents.
-
-
-
-
-
-
-
-
-
-
-Deutsch Informational [Page 1]
-
-RFC 1952 GZIP File Format Specification May 1996
-
-
-Table of Contents
-
- 1. Introduction ................................................... 2
- 1.1. Purpose ................................................... 2
- 1.2. Intended audience ......................................... 3
- 1.3. Scope ..................................................... 3
- 1.4. Compliance ................................................ 3
- 1.5. Definitions of terms and conventions used ................. 3
- 1.6. Changes from previous versions ............................ 3
- 2. Detailed specification ......................................... 4
- 2.1. Overall conventions ....................................... 4
- 2.2. File format ............................................... 5
- 2.3. Member format ............................................. 5
- 2.3.1. Member header and trailer ........................... 6
- 2.3.1.1. Extra field ................................... 8
- 2.3.1.2. Compliance .................................... 9
- 3. References .................................................. 9
- 4. Security Considerations .................................... 10
- 5. Acknowledgements ........................................... 10
- 6. Author's Address ........................................... 10
- 7. Appendix: Jean-Loup Gailly's gzip utility .................. 11
- 8. Appendix: Sample CRC Code .................................. 11
-
-1. Introduction
-
- 1.1. Purpose
-
- The purpose of this specification is to define a lossless
- compressed data format that:
-
- * Is independent of CPU type, operating system, file system,
- and character set, and hence can be used for interchange;
- * Can compress or decompress a data stream (as opposed to a
- randomly accessible file) to produce another data stream,
- using only an a priori bounded amount of intermediate
- storage, and hence can be used in data communications or
- similar structures such as Unix filters;
- * Compresses data with efficiency comparable to the best
- currently available general-purpose compression methods,
- and in particular considerably better than the "compress"
- program;
- * Can be implemented readily in a manner not covered by
- patents, and hence can be practiced freely;
- * Is compatible with the file format produced by the current
- widely used gzip utility, in that conforming decompressors
- will be able to read data produced by the existing gzip
- compressor.
-
-
-
-
-Deutsch Informational [Page 2]
-
-RFC 1952 GZIP File Format Specification May 1996
-
-
- The data format defined by this specification does not attempt to:
-
- * Provide random access to compressed data;
- * Compress specialized data (e.g., raster graphics) as well as
- the best currently available specialized algorithms.
-
- 1.2. Intended audience
-
- This specification is intended for use by implementors of software
- to compress data into gzip format and/or decompress data from gzip
- format.
-
- The text of the specification assumes a basic background in
- programming at the level of bits and other primitive data
- representations.
-
- 1.3. Scope
-
- The specification specifies a compression method and a file format
- (the latter assuming only that a file can store a sequence of
- arbitrary bytes). It does not specify any particular interface to
- a file system or anything about character sets or encodings
- (except for file names and comments, which are optional).
-
- 1.4. Compliance
-
- Unless otherwise indicated below, a compliant decompressor must be
- able to accept and decompress any file that conforms to all the
- specifications presented here; a compliant compressor must produce
- files that conform to all the specifications presented here. The
- material in the appendices is not part of the specification per se
- and is not relevant to compliance.
-
- 1.5. Definitions of terms and conventions used
-
- byte: 8 bits stored or transmitted as a unit (same as an octet).
- (For this specification, a byte is exactly 8 bits, even on
- machines which store a character on a number of bits different
- from 8.) See below for the numbering of bits within a byte.
-
- 1.6. Changes from previous versions
-
- There have been no technical changes to the gzip format since
- version 4.1 of this specification. In version 4.2, some
- terminology was changed, and the sample CRC code was rewritten for
- clarity and to eliminate the requirement for the caller to do pre-
- and post-conditioning. Version 4.3 is a conversion of the
- specification to RFC style.
-
-
-
-Deutsch Informational [Page 3]
-
-RFC 1952 GZIP File Format Specification May 1996
-
-
-2. Detailed specification
-
- 2.1. Overall conventions
-
- In the diagrams below, a box like this:
-
- +---+
- | | <-- the vertical bars might be missing
- +---+
-
- represents one byte; a box like this:
-
- +==============+
- | |
- +==============+
-
- represents a variable number of bytes.
-
- Bytes stored within a computer do not have a "bit order", since
- they are always treated as a unit. However, a byte considered as
- an integer between 0 and 255 does have a most- and least-
- significant bit, and since we write numbers with the most-
- significant digit on the left, we also write bytes with the most-
- significant bit on the left. In the diagrams below, we number the
- bits of a byte so that bit 0 is the least-significant bit, i.e.,
- the bits are numbered:
-
- +--------+
- |76543210|
- +--------+
-
- This document does not address the issue of the order in which
- bits of a byte are transmitted on a bit-sequential medium, since
- the data format described here is byte- rather than bit-oriented.
-
- Within a computer, a number may occupy multiple bytes. All
- multi-byte numbers in the format described here are stored with
- the least-significant byte first (at the lower memory address).
- For example, the decimal number 520 is stored as:
-
- 0 1
- +--------+--------+
- |00001000|00000010|
- +--------+--------+
- ^ ^
- | |
- | + more significant byte = 2 x 256
- + less significant byte = 8
-
-
-
-Deutsch Informational [Page 4]
-
-RFC 1952 GZIP File Format Specification May 1996
-
-
- 2.2. File format
-
- A gzip file consists of a series of "members" (compressed data
- sets). The format of each member is specified in the following
- section. The members simply appear one after another in the file,
- with no additional information before, between, or after them.
-
- 2.3. Member format
-
- Each member has the following structure:
-
- +---+---+---+---+---+---+---+---+---+---+
- |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
- +---+---+---+---+---+---+---+---+---+---+
-
- (if FLG.FEXTRA set)
-
- +---+---+=================================+
- | XLEN |...XLEN bytes of "extra field"...| (more-->)
- +---+---+=================================+
-
- (if FLG.FNAME set)
-
- +=========================================+
- |...original file name, zero-terminated...| (more-->)
- +=========================================+
-
- (if FLG.FCOMMENT set)
-
- +===================================+
- |...file comment, zero-terminated...| (more-->)
- +===================================+
-
- (if FLG.FHCRC set)
-
- +---+---+
- | CRC16 |
- +---+---+
-
- +=======================+
- |...compressed blocks...| (more-->)
- +=======================+
-
- 0 1 2 3 4 5 6 7
- +---+---+---+---+---+---+---+---+
- | CRC32 | ISIZE |
- +---+---+---+---+---+---+---+---+
-
-
-
-
-Deutsch Informational [Page 5]
-
-RFC 1952 GZIP File Format Specification May 1996
-
-
- 2.3.1. Member header and trailer
-
- ID1 (IDentification 1)
- ID2 (IDentification 2)
- These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139
- (0x8b, \213), to identify the file as being in gzip format.
-
- CM (Compression Method)
- This identifies the compression method used in the file. CM
- = 0-7 are reserved. CM = 8 denotes the "deflate"
- compression method, which is the one customarily used by
- gzip and which is documented elsewhere.
-
- FLG (FLaGs)
- This flag byte is divided into individual bits as follows:
-
- bit 0 FTEXT
- bit 1 FHCRC
- bit 2 FEXTRA
- bit 3 FNAME
- bit 4 FCOMMENT
- bit 5 reserved
- bit 6 reserved
- bit 7 reserved
-
- If FTEXT is set, the file is probably ASCII text. This is
- an optional indication, which the compressor may set by
- checking a small amount of the input data to see whether any
- non-ASCII characters are present. In case of doubt, FTEXT
- is cleared, indicating binary data. For systems which have
- different file formats for ascii text and binary data, the
- decompressor can use FTEXT to choose the appropriate format.
- We deliberately do not specify the algorithm used to set
- this bit, since a compressor always has the option of
- leaving it cleared and a decompressor always has the option
- of ignoring it and letting some other program handle issues
- of data conversion.
-
- If FHCRC is set, a CRC16 for the gzip header is present,
- immediately before the compressed data. The CRC16 consists
- of the two least significant bytes of the CRC32 for all
- bytes of the gzip header up to and not including the CRC16.
- [The FHCRC bit was never set by versions of gzip up to
- 1.2.4, even though it was documented with a different
- meaning in gzip 1.2.4.]
-
- If FEXTRA is set, optional extra fields are present, as
- described in a following section.
-
-
-
-Deutsch Informational [Page 6]
-
-RFC 1952 GZIP File Format Specification May 1996
-
-
- If FNAME is set, an original file name is present,
- terminated by a zero byte. The name must consist of ISO
- 8859-1 (LATIN-1) characters; on operating systems using
- EBCDIC or any other character set for file names, the name
- must be translated to the ISO LATIN-1 character set. This
- is the original name of the file being compressed, with any
- directory components removed, and, if the file being
- compressed is on a file system with case insensitive names,
- forced to lower case. There is no original file name if the
- data was compressed from a source other than a named file;
- for example, if the source was stdin on a Unix system, there
- is no file name.
-
- If FCOMMENT is set, a zero-terminated file comment is
- present. This comment is not interpreted; it is only
- intended for human consumption. The comment must consist of
- ISO 8859-1 (LATIN-1) characters. Line breaks should be
- denoted by a single line feed character (10 decimal).
-
- Reserved FLG bits must be zero.
-
- MTIME (Modification TIME)
- This gives the most recent modification time of the original
- file being compressed. The time is in Unix format, i.e.,
- seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this
- may cause problems for MS-DOS and other systems that use
- local rather than Universal time.) If the compressed data
- did not come from a file, MTIME is set to the time at which
- compression started. MTIME = 0 means no time stamp is
- available.
-
- XFL (eXtra FLags)
- These flags are available for use by specific compression
- methods. The "deflate" method (CM = 8) sets these flags as
- follows:
-
- XFL = 2 - compressor used maximum compression,
- slowest algorithm
- XFL = 4 - compressor used fastest algorithm
-
- OS (Operating System)
- This identifies the type of file system on which compression
- took place. This may be useful in determining end-of-line
- convention for text files. The currently defined values are
- as follows:
-
-
-
-
-
-
-Deutsch Informational [Page 7]
-
-RFC 1952 GZIP File Format Specification May 1996
-
-
- 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
- 1 - Amiga
- 2 - VMS (or OpenVMS)
- 3 - Unix
- 4 - VM/CMS
- 5 - Atari TOS
- 6 - HPFS filesystem (OS/2, NT)
- 7 - Macintosh
- 8 - Z-System
- 9 - CP/M
- 10 - TOPS-20
- 11 - NTFS filesystem (NT)
- 12 - QDOS
- 13 - Acorn RISCOS
- 255 - unknown
-
- XLEN (eXtra LENgth)
- If FLG.FEXTRA is set, this gives the length of the optional
- extra field. See below for details.
-
- CRC32 (CRC-32)
- This contains a Cyclic Redundancy Check value of the
- uncompressed data computed according to CRC-32 algorithm
- used in the ISO 3309 standard and in section 8.1.1.6.2 of
- ITU-T recommendation V.42. (See http://www.iso.ch for
- ordering ISO documents. See gopher://info.itu.ch for an
- online version of ITU-T V.42.)
-
- ISIZE (Input SIZE)
- This contains the size of the original (uncompressed) input
- data modulo 2^32.
-
- 2.3.1.1. Extra field
-
- If the FLG.FEXTRA bit is set, an "extra field" is present in
- the header, with total length XLEN bytes. It consists of a
- series of subfields, each of the form:
-
- +---+---+---+---+==================================+
- |SI1|SI2| LEN |... LEN bytes of subfield data ...|
- +---+---+---+---+==================================+
-
- SI1 and SI2 provide a subfield ID, typically two ASCII letters
- with some mnemonic value. Jean-Loup Gailly
- <gzip@prep.ai.mit.edu> is maintaining a registry of subfield
- IDs; please send him any subfield ID you wish to use. Subfield
- IDs with SI2 = 0 are reserved for future use. The following
- IDs are currently defined:
-
-
-
-Deutsch Informational [Page 8]
-
-RFC 1952 GZIP File Format Specification May 1996
-
-
- SI1 SI2 Data
- ---------- ---------- ----
- 0x41 ('A') 0x70 ('P') Apollo file type information
-
- LEN gives the length of the subfield data, excluding the 4
- initial bytes.
-
- 2.3.1.2. Compliance
-
- A compliant compressor must produce files with correct ID1,
- ID2, CM, CRC32, and ISIZE, but may set all the other fields in
- the fixed-length part of the header to default values (255 for
- OS, 0 for all others). The compressor must set all reserved
- bits to zero.
-
- A compliant decompressor must check ID1, ID2, and CM, and
- provide an error indication if any of these have incorrect
- values. It must examine FEXTRA/XLEN, FNAME, FCOMMENT and FHCRC
- at least so it can skip over the optional fields if they are
- present. It need not examine any other part of the header or
- trailer; in particular, a decompressor may ignore FTEXT and OS
- and always produce binary output, and still be compliant. A
- compliant decompressor must give an error indication if any
- reserved bit is non-zero, since such a bit could indicate the
- presence of a new field that would cause subsequent data to be
- interpreted incorrectly.
-
-3. References
-
- [1] "Information Processing - 8-bit single-byte coded graphic
- character sets - Part 1: Latin alphabet No.1" (ISO 8859-1:1987).
- The ISO 8859-1 (Latin-1) character set is a superset of 7-bit
- ASCII. Files defining this character set are available as
- iso_8859-1.* in ftp://ftp.uu.net/graphics/png/documents/
-
- [2] ISO 3309
-
- [3] ITU-T recommendation V.42
-
- [4] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification",
- available in ftp://ftp.uu.net/pub/archiving/zip/doc/
-
- [5] Gailly, J.-L., GZIP documentation, available as gzip-*.tar in
- ftp://prep.ai.mit.edu/pub/gnu/
-
- [6] Sarwate, D.V., "Computation of Cyclic Redundancy Checks via Table
- Look-Up", Communications of the ACM, 31(8), pp.1008-1013.
-
-
-
-
-Deutsch Informational [Page 9]
-
-RFC 1952 GZIP File Format Specification May 1996
-
-
- [7] Schwaderer, W.D., "CRC Calculation", April 85 PC Tech Journal,
- pp.118-133.
-
- [8] ftp://ftp.adelaide.edu.au/pub/rocksoft/papers/crc_v3.txt,
- describing the CRC concept.
-
-4. Security Considerations
-
- Any data compression method involves the reduction of redundancy in
- the data. Consequently, any corruption of the data is likely to have
- severe effects and be difficult to correct. Uncompressed text, on
- the other hand, will probably still be readable despite the presence
- of some corrupted bytes.
-
- It is recommended that systems using this data format provide some
- means of validating the integrity of the compressed data, such as by
- setting and checking the CRC-32 check value.
-
-5. Acknowledgements
-
- Trademarks cited in this document are the property of their
- respective owners.
-
- Jean-Loup Gailly designed the gzip format and wrote, with Mark Adler,
- the related software described in this specification. Glenn
- Randers-Pehrson converted this document to RFC and HTML format.
-
-6. Author's Address
-
- L. Peter Deutsch
- Aladdin Enterprises
- 203 Santa Margarita Ave.
- Menlo Park, CA 94025
-
- Phone: (415) 322-0103 (AM only)
- FAX: (415) 322-1734
- EMail: <ghost@aladdin.com>
-
- Questions about the technical content of this specification can be
- sent by email to:
-
- Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
- Mark Adler <madler@alumni.caltech.edu>
-
- Editorial comments on this specification can be sent by email to:
-
- L. Peter Deutsch <ghost@aladdin.com> and
- Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
-
-
-
-Deutsch Informational [Page 10]
-
-RFC 1952 GZIP File Format Specification May 1996
-
-
-7. Appendix: Jean-Loup Gailly's gzip utility
-
- The most widely used implementation of gzip compression, and the
- original documentation on which this specification is based, were
- created by Jean-Loup Gailly <gzip@prep.ai.mit.edu>. Since this
- implementation is a de facto standard, we mention some more of its
- features here. Again, the material in this section is not part of
- the specification per se, and implementations need not follow it to
- be compliant.
-
- When compressing or decompressing a file, gzip preserves the
- protection, ownership, and modification time attributes on the local
- file system, since there is no provision for representing protection
- attributes in the gzip file format itself. Since the file format
- includes a modification time, the gzip decompressor provides a
- command line switch that assigns the modification time from the file,
- rather than the local modification time of the compressed input, to
- the decompressed output.
-
-8. Appendix: Sample CRC Code
-
- The following sample code represents a practical implementation of
- the CRC (Cyclic Redundancy Check). (See also ISO 3309 and ITU-T V.42
- for a formal specification.)
-
- The sample code is in the ANSI C programming language. Non C users
- may find it easier to read with these hints:
-
- & Bitwise AND operator.
- ^ Bitwise exclusive-OR operator.
- >> Bitwise right shift operator. When applied to an
- unsigned quantity, as here, right shift inserts zero
- bit(s) at the left.
- ! Logical NOT operator.
- ++ "n++" increments the variable n.
- 0xNNN 0x introduces a hexadecimal (base 16) constant.
- Suffix L indicates a long value (at least 32 bits).
-
- /* Table of CRCs of all 8-bit messages. */
- unsigned long crc_table[256];
-
- /* Flag: has the table been computed? Initially false. */
- int crc_table_computed = 0;
-
- /* Make the table for a fast CRC. */
- void make_crc_table(void)
- {
- unsigned long c;
-
-
-
-Deutsch Informational [Page 11]
-
-RFC 1952 GZIP File Format Specification May 1996
-
-
- int n, k;
- for (n = 0; n < 256; n++) {
- c = (unsigned long) n;
- for (k = 0; k < 8; k++) {
- if (c & 1) {
- c = 0xedb88320L ^ (c >> 1);
- } else {
- c = c >> 1;
- }
- }
- crc_table[n] = c;
- }
- crc_table_computed = 1;
- }
-
- /*
- Update a running crc with the bytes buf[0..len-1] and return
- the updated crc. The crc should be initialized to zero. Pre- and
- post-conditioning (one's complement) is performed within this
- function so it shouldn't be done by the caller. Usage example:
-
- unsigned long crc = 0L;
-
- while (read_buffer(buffer, length) != EOF) {
- crc = update_crc(crc, buffer, length);
- }
- if (crc != original_crc) error();
- */
- unsigned long update_crc(unsigned long crc,
- unsigned char *buf, int len)
- {
- unsigned long c = crc ^ 0xffffffffL;
- int n;
-
- if (!crc_table_computed)
- make_crc_table();
- for (n = 0; n < len; n++) {
- c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8);
- }
- return c ^ 0xffffffffL;
- }
-
- /* Return the CRC of the bytes buf[0..len-1]. */
- unsigned long crc(unsigned char *buf, int len)
- {
- return update_crc(0L, buf, len);
- }
-
-
-
-
-Deutsch Informational [Page 12]
-
diff --git a/lib/std/compress/testdata/rfc1952.txt.gz b/lib/std/compress/testdata/rfc1952.txt.gz
deleted file mode 100644
index 17958d64f3..0000000000
--- a/lib/std/compress/testdata/rfc1952.txt.gz
+++ /dev/null
Binary files differ
diff --git a/lib/std/compress/zlib.zig b/lib/std/compress/zlib.zig
index 6708875930..33401ce845 100644
--- a/lib/std/compress/zlib.zig
+++ b/lib/std/compress/zlib.zig
@@ -1,282 +1,66 @@
-//
-// Compressor/Decompressor for ZLIB data streams (RFC1950)
+const deflate = @import("flate/deflate.zig");
+const inflate = @import("flate/inflate.zig");
-const std = @import("std");
-const io = std.io;
-const fs = std.fs;
-const testing = std.testing;
-const mem = std.mem;
-const deflate = std.compress.deflate;
-
-// Zlib header format as specified in RFC1950
-const ZLibHeader = packed struct {
- checksum: u5,
- preset_dict: u1,
- compression_level: u2,
- compression_method: u4,
- compression_info: u4,
-
- const DEFLATE = 8;
- const WINDOW_32K = 7;
-};
-
-pub fn DecompressStream(comptime ReaderType: type) type {
- return struct {
- const Self = @This();
-
- pub const Error = ReaderType.Error ||
- deflate.Decompressor(ReaderType).Error ||
- error{ WrongChecksum, Unsupported };
- pub const Reader = io.Reader(*Self, Error, read);
-
- allocator: mem.Allocator,
- inflater: deflate.Decompressor(ReaderType),
- in_reader: ReaderType,
- hasher: std.hash.Adler32,
-
- fn init(allocator: mem.Allocator, source: ReaderType) !Self {
- // Zlib header format is specified in RFC1950
- const header_u16 = try source.readInt(u16, .big);
-
- // verify the header checksum
- if (header_u16 % 31 != 0)
- return error.BadHeader;
- const header = @as(ZLibHeader, @bitCast(header_u16));
-
- // The CM field must be 8 to indicate the use of DEFLATE
- if (header.compression_method != ZLibHeader.DEFLATE)
- return error.InvalidCompression;
- // CINFO is the base-2 logarithm of the LZ77 window size, minus 8.
- // Values above 7 are unspecified and therefore rejected.
- if (header.compression_info > ZLibHeader.WINDOW_32K)
- return error.InvalidWindowSize;
-
- const dictionary = null;
- // TODO: Support this case
- if (header.preset_dict != 0)
- return error.Unsupported;
-
- return Self{
- .allocator = allocator,
- .inflater = try deflate.decompressor(allocator, source, dictionary),
- .in_reader = source,
- .hasher = std.hash.Adler32.init(),
- };
- }
-
- pub fn deinit(self: *Self) void {
- self.inflater.deinit();
- }
-
- // Implements the io.Reader interface
- pub fn read(self: *Self, buffer: []u8) Error!usize {
- if (buffer.len == 0)
- return 0;
-
- // Read from the compressed stream and update the computed checksum
- const r = try self.inflater.read(buffer);
- if (r != 0) {
- self.hasher.update(buffer[0..r]);
- return r;
- }
-
- // We've reached the end of stream, check if the checksum matches
- const hash = try self.in_reader.readInt(u32, .big);
- if (hash != self.hasher.final())
- return error.WrongChecksum;
-
- return 0;
- }
-
- pub fn reader(self: *Self) Reader {
- return .{ .context = self };
- }
- };
+/// Decompress compressed data from reader and write plain data to the writer.
+pub fn decompress(reader: anytype, writer: anytype) !void {
+ try inflate.decompress(.zlib, reader, writer);
}
-pub fn decompressStream(allocator: mem.Allocator, reader: anytype) !DecompressStream(@TypeOf(reader)) {
- return DecompressStream(@TypeOf(reader)).init(allocator, reader);
+/// Decompressor type
+pub fn Decompressor(comptime ReaderType: type) type {
+ return inflate.Inflate(.zlib, ReaderType);
}
-pub const CompressionLevel = enum(u2) {
- no_compression = 0,
- fastest = 1,
- default = 2,
- maximum = 3,
-};
-
-pub const CompressStreamOptions = struct {
- level: CompressionLevel = .default,
-};
-
-pub fn CompressStream(comptime WriterType: type) type {
- return struct {
- const Self = @This();
-
- const Error = WriterType.Error ||
- deflate.Compressor(WriterType).Error;
- pub const Writer = io.Writer(*Self, Error, write);
-
- allocator: mem.Allocator,
- deflator: deflate.Compressor(WriterType),
- in_writer: WriterType,
- hasher: std.hash.Adler32,
-
- fn init(allocator: mem.Allocator, dest: WriterType, options: CompressStreamOptions) !Self {
- var header = ZLibHeader{
- .compression_info = ZLibHeader.WINDOW_32K,
- .compression_method = ZLibHeader.DEFLATE,
- .compression_level = @intFromEnum(options.level),
- .preset_dict = 0,
- .checksum = 0,
- };
- header.checksum = @as(u5, @truncate(31 - @as(u16, @bitCast(header)) % 31));
-
- try dest.writeInt(u16, @as(u16, @bitCast(header)), .big);
-
- const compression_level: deflate.Compression = switch (options.level) {
- .no_compression => .no_compression,
- .fastest => .best_speed,
- .default => .default_compression,
- .maximum => .best_compression,
- };
-
- return Self{
- .allocator = allocator,
- .deflator = try deflate.compressor(allocator, dest, .{ .level = compression_level }),
- .in_writer = dest,
- .hasher = std.hash.Adler32.init(),
- };
- }
-
- pub fn write(self: *Self, bytes: []const u8) Error!usize {
- if (bytes.len == 0) {
- return 0;
- }
-
- const w = try self.deflator.write(bytes);
-
- self.hasher.update(bytes[0..w]);
- return w;
- }
-
- pub fn writer(self: *Self) Writer {
- return .{ .context = self };
- }
-
- pub fn deinit(self: *Self) void {
- self.deflator.deinit();
- }
-
- pub fn finish(self: *Self) !void {
- const hash = self.hasher.final();
- try self.deflator.close();
- try self.in_writer.writeInt(u32, hash, .big);
- }
- };
+/// Create Decompressor which will read compressed data from reader.
+pub fn decompressor(reader: anytype) Decompressor(@TypeOf(reader)) {
+ return inflate.decompressor(.zlib, reader);
}
-pub fn compressStream(allocator: mem.Allocator, writer: anytype, options: CompressStreamOptions) !CompressStream(@TypeOf(writer)) {
- return CompressStream(@TypeOf(writer)).init(allocator, writer, options);
-}
-
-fn testDecompress(data: []const u8, expected: []const u8) !void {
- var in_stream = io.fixedBufferStream(data);
-
- var zlib_stream = try decompressStream(testing.allocator, in_stream.reader());
- defer zlib_stream.deinit();
+/// Compression level, trades between speed and compression size.
+pub const Options = deflate.Options;
- // Read and decompress the whole file
- const buf = try zlib_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));
- defer testing.allocator.free(buf);
-
- // Check against the reference
- try testing.expectEqualSlices(u8, expected, buf);
+/// Compress plain data from reader and write compressed data to the writer.
+pub fn compress(reader: anytype, writer: anytype, options: Options) !void {
+ try deflate.compress(.zlib, reader, writer, options);
}
-// All the test cases are obtained by compressing the RFC1951 text
-//
-// https://tools.ietf.org/rfc/rfc1951.txt length=36944 bytes
-// SHA256=5ebf4b5b7fe1c3a0c0ab9aa3ac8c0f3853a7dc484905e76e03b0b0f301350009
-test "compressed data" {
- const rfc1951_txt = @embedFile("testdata/rfc1951.txt");
-
- // Compressed with compression level = 0
- try testDecompress(
- @embedFile("testdata/rfc1951.txt.z.0"),
- rfc1951_txt,
- );
- // Compressed with compression level = 9
- try testDecompress(
- @embedFile("testdata/rfc1951.txt.z.9"),
- rfc1951_txt,
- );
- // Compressed with compression level = 9 and fixed Huffman codes
- try testDecompress(
- @embedFile("testdata/rfc1951.txt.fixed.z.9"),
- rfc1951_txt,
- );
+/// Compressor type
+pub fn Compressor(comptime WriterType: type) type {
+ return deflate.Compressor(.zlib, WriterType);
}
-test "don't read past deflate stream's end" {
- try testDecompress(&[_]u8{
- 0x08, 0xd7, 0x63, 0xf8, 0xcf, 0xc0, 0xc0, 0x00, 0xc1, 0xff,
- 0xff, 0x43, 0x30, 0x03, 0x03, 0xc3, 0xff, 0xff, 0xff, 0x01,
- 0x83, 0x95, 0x0b, 0xf5,
- }, &[_]u8{
- 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
- 0x00, 0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
- 0x00, 0x00, 0xff, 0xff, 0xff,
- });
+/// Create Compressor which outputs compressed data to the writer.
+pub fn compressor(writer: anytype, options: Options) !Compressor(@TypeOf(writer)) {
+ return try deflate.compressor(.zlib, writer, options);
}
-test "sanity checks" {
- // Truncated header
- try testing.expectError(
- error.EndOfStream,
- testDecompress(&[_]u8{0x78}, ""),
- );
- // Failed FCHECK check
- try testing.expectError(
- error.BadHeader,
- testDecompress(&[_]u8{ 0x78, 0x9D }, ""),
- );
- // Wrong CM
- try testing.expectError(
- error.InvalidCompression,
- testDecompress(&[_]u8{ 0x79, 0x94 }, ""),
- );
- // Wrong CINFO
- try testing.expectError(
- error.InvalidWindowSize,
- testDecompress(&[_]u8{ 0x88, 0x98 }, ""),
- );
- // Wrong checksum
- try testing.expectError(
- error.WrongChecksum,
- testDecompress(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00 }, ""),
- );
- // Truncated checksum
- try testing.expectError(
- error.EndOfStream,
- testDecompress(&[_]u8{ 0x78, 0xda, 0x03, 0x00, 0x00 }, ""),
- );
-}
+/// Huffman only compression. Without Lempel-Ziv match searching. Faster
+/// compression, less memory requirements but bigger compressed sizes.
+pub const huffman = struct {
+ pub fn compress(reader: anytype, writer: anytype) !void {
+ try deflate.huffman.compress(.zlib, reader, writer);
+ }
-test "compress data" {
- const allocator = testing.allocator;
- const rfc1951_txt = @embedFile("testdata/rfc1951.txt");
+ pub fn Compressor(comptime WriterType: type) type {
+ return deflate.huffman.Compressor(.zlib, WriterType);
+ }
- for (std.meta.tags(CompressionLevel)) |level| {
- var compressed_data = std.ArrayList(u8).init(allocator);
- defer compressed_data.deinit();
+ pub fn compressor(writer: anytype) !huffman.Compressor(@TypeOf(writer)) {
+ return deflate.huffman.compressor(.zlib, writer);
+ }
+};
- var compressor = try compressStream(allocator, compressed_data.writer(), .{ .level = level });
- defer compressor.deinit();
+// No compression store only. Compressed size is slightly bigger than plain.
+pub const store = struct {
+ pub fn compress(reader: anytype, writer: anytype) !void {
+ try deflate.store.compress(.zlib, reader, writer);
+ }
- try compressor.writer().writeAll(rfc1951_txt);
- try compressor.finish();
+ pub fn Compressor(comptime WriterType: type) type {
+ return deflate.store.Compressor(.zlib, WriterType);
+ }
- try testDecompress(compressed_data.items, rfc1951_txt);
+ pub fn compressor(writer: anytype) !store.Compressor(@TypeOf(writer)) {
+ return deflate.store.compressor(.zlib, writer);
}
-}
+};
diff --git a/lib/std/compress/zstandard.zig b/lib/std/compress/zstandard.zig
index 4d9421acac..cfe5618bde 100644
--- a/lib/std/compress/zstandard.zig
+++ b/lib/std/compress/zstandard.zig
@@ -1,5 +1,4 @@
const std = @import("std");
-const Allocator = std.mem.Allocator;
const RingBuffer = std.RingBuffer;
const types = @import("zstandard/types.zig");
@@ -8,32 +7,41 @@ pub const compressed_block = types.compressed_block;
pub const decompress = @import("zstandard/decompress.zig");
-pub const DecompressStreamOptions = struct {
+pub const DecompressorOptions = struct {
verify_checksum: bool = true,
- window_size_max: usize = 1 << 23, // 8MiB default maximum window size
+ window_buffer: []u8,
+
+ /// Recommended amount by the standard. Lower than this may result
+ /// in inability to decompress common streams.
+ pub const default_window_buffer_len = 8 * 1024 * 1024;
};
-pub fn DecompressStream(
- comptime ReaderType: type,
- comptime options: DecompressStreamOptions,
-) type {
+pub fn Decompressor(comptime ReaderType: type) type {
return struct {
const Self = @This();
- allocator: Allocator,
+ const table_size_max = types.compressed_block.table_size_max;
+
source: std.io.CountingReader(ReaderType),
state: enum { NewFrame, InFrame, LastBlock },
decode_state: decompress.block.DecodeState,
frame_context: decompress.FrameContext,
- buffer: RingBuffer,
- literal_fse_buffer: []types.compressed_block.Table.Fse,
- match_fse_buffer: []types.compressed_block.Table.Fse,
- offset_fse_buffer: []types.compressed_block.Table.Fse,
- literals_buffer: []u8,
- sequence_buffer: []u8,
- checksum: if (options.verify_checksum) ?u32 else void,
+ buffer: WindowBuffer,
+ literal_fse_buffer: [table_size_max.literal]types.compressed_block.Table.Fse,
+ match_fse_buffer: [table_size_max.match]types.compressed_block.Table.Fse,
+ offset_fse_buffer: [table_size_max.offset]types.compressed_block.Table.Fse,
+ literals_buffer: [types.block_size_max]u8,
+ sequence_buffer: [types.block_size_max]u8,
+ verify_checksum: bool,
+ checksum: ?u32,
current_frame_decompressed_size: usize,
+ const WindowBuffer = struct {
+ data: []u8 = undefined,
+ read_index: usize = 0,
+ write_index: usize = 0,
+ };
+
pub const Error = ReaderType.Error || error{
ChecksumFailure,
DictionaryIdFlagUnsupported,
@@ -44,19 +52,19 @@ pub fn DecompressStream(
pub const Reader = std.io.Reader(*Self, Error, read);
- pub fn init(allocator: Allocator, source: ReaderType) Self {
- return Self{
- .allocator = allocator,
+ pub fn init(source: ReaderType, options: DecompressorOptions) Self {
+ return .{
.source = std.io.countingReader(source),
.state = .NewFrame,
.decode_state = undefined,
.frame_context = undefined,
- .buffer = undefined,
+ .buffer = .{ .data = options.window_buffer },
.literal_fse_buffer = undefined,
.match_fse_buffer = undefined,
.offset_fse_buffer = undefined,
.literals_buffer = undefined,
.sequence_buffer = undefined,
+ .verify_checksum = options.verify_checksum,
.checksum = undefined,
.current_frame_decompressed_size = undefined,
};
@@ -72,53 +80,20 @@ pub fn DecompressStream(
.zstandard => |header| {
const frame_context = try decompress.FrameContext.init(
header,
- options.window_size_max,
- options.verify_checksum,
- );
-
- const literal_fse_buffer = try self.allocator.alloc(
- types.compressed_block.Table.Fse,
- types.compressed_block.table_size_max.literal,
+ self.buffer.data.len,
+ self.verify_checksum,
);
- errdefer self.allocator.free(literal_fse_buffer);
-
- const match_fse_buffer = try self.allocator.alloc(
- types.compressed_block.Table.Fse,
- types.compressed_block.table_size_max.match,
- );
- errdefer self.allocator.free(match_fse_buffer);
-
- const offset_fse_buffer = try self.allocator.alloc(
- types.compressed_block.Table.Fse,
- types.compressed_block.table_size_max.offset,
- );
- errdefer self.allocator.free(offset_fse_buffer);
const decode_state = decompress.block.DecodeState.init(
- literal_fse_buffer,
- match_fse_buffer,
- offset_fse_buffer,
+ &self.literal_fse_buffer,
+ &self.match_fse_buffer,
+ &self.offset_fse_buffer,
);
- const buffer = try RingBuffer.init(self.allocator, frame_context.window_size);
-
- const literals_data = try self.allocator.alloc(u8, options.window_size_max);
- errdefer self.allocator.free(literals_data);
-
- const sequence_data = try self.allocator.alloc(u8, options.window_size_max);
- errdefer self.allocator.free(sequence_data);
-
- self.literal_fse_buffer = literal_fse_buffer;
- self.match_fse_buffer = match_fse_buffer;
- self.offset_fse_buffer = offset_fse_buffer;
- self.literals_buffer = literals_data;
- self.sequence_buffer = sequence_data;
-
- self.buffer = buffer;
self.decode_state = decode_state;
self.frame_context = frame_context;
- self.checksum = if (options.verify_checksum) null else {};
+ self.checksum = null;
self.current_frame_decompressed_size = 0;
self.state = .InFrame;
@@ -126,16 +101,6 @@ pub fn DecompressStream(
}
}
- pub fn deinit(self: *Self) void {
- if (self.state == .NewFrame) return;
- self.allocator.free(self.decode_state.literal_fse_buffer);
- self.allocator.free(self.decode_state.match_fse_buffer);
- self.allocator.free(self.decode_state.offset_fse_buffer);
- self.allocator.free(self.literals_buffer);
- self.allocator.free(self.sequence_buffer);
- self.buffer.deinit(self.allocator);
- }
-
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
@@ -153,7 +118,6 @@ pub fn DecompressStream(
0
else
error.MalformedFrame,
- error.OutOfMemory => return error.OutOfMemory,
else => return error.MalformedFrame,
};
}
@@ -165,20 +129,30 @@ pub fn DecompressStream(
fn readInner(self: *Self, buffer: []u8) Error!usize {
std.debug.assert(self.state != .NewFrame);
+ var ring_buffer = RingBuffer{
+ .data = self.buffer.data,
+ .read_index = self.buffer.read_index,
+ .write_index = self.buffer.write_index,
+ };
+ defer {
+ self.buffer.read_index = ring_buffer.read_index;
+ self.buffer.write_index = ring_buffer.write_index;
+ }
+
const source_reader = self.source.reader();
- while (self.buffer.isEmpty() and self.state != .LastBlock) {
+ while (ring_buffer.isEmpty() and self.state != .LastBlock) {
const header_bytes = source_reader.readBytesNoEof(3) catch
return error.MalformedFrame;
const block_header = decompress.block.decodeBlockHeader(&header_bytes);
decompress.block.decodeBlockReader(
- &self.buffer,
+ &ring_buffer,
source_reader,
block_header,
&self.decode_state,
self.frame_context.block_size_max,
- self.literals_buffer,
- self.sequence_buffer,
+ &self.literals_buffer,
+ &self.sequence_buffer,
) catch
return error.MalformedBlock;
@@ -186,12 +160,12 @@ pub fn DecompressStream(
if (self.current_frame_decompressed_size > size) return error.MalformedFrame;
}
- const size = self.buffer.len();
+ const size = ring_buffer.len();
self.current_frame_decompressed_size += size;
if (self.frame_context.hasher_opt) |*hasher| {
if (size > 0) {
- const written_slice = self.buffer.sliceLast(size);
+ const written_slice = ring_buffer.sliceLast(size);
hasher.update(written_slice.first);
hasher.update(written_slice.second);
}
@@ -201,7 +175,7 @@ pub fn DecompressStream(
if (self.frame_context.has_checksum) {
const checksum = source_reader.readInt(u32, .little) catch
return error.MalformedFrame;
- if (comptime options.verify_checksum) {
+ if (self.verify_checksum) {
if (self.frame_context.hasher_opt) |*hasher| {
if (checksum != decompress.computeChecksum(hasher))
return error.ChecksumFailure;
@@ -216,43 +190,28 @@ pub fn DecompressStream(
}
}
- const size = @min(self.buffer.len(), buffer.len);
+ const size = @min(ring_buffer.len(), buffer.len);
if (size > 0) {
- self.buffer.readFirstAssumeLength(buffer, size);
+ ring_buffer.readFirstAssumeLength(buffer, size);
}
- if (self.state == .LastBlock and self.buffer.len() == 0) {
+ if (self.state == .LastBlock and ring_buffer.len() == 0) {
self.state = .NewFrame;
- self.allocator.free(self.literal_fse_buffer);
- self.allocator.free(self.match_fse_buffer);
- self.allocator.free(self.offset_fse_buffer);
- self.allocator.free(self.literals_buffer);
- self.allocator.free(self.sequence_buffer);
- self.buffer.deinit(self.allocator);
}
return size;
}
};
}
-pub fn decompressStreamOptions(
- allocator: Allocator,
- reader: anytype,
- comptime options: DecompressStreamOptions,
-) DecompressStream(@TypeOf(reader, options)) {
- return DecompressStream(@TypeOf(reader), options).init(allocator, reader);
-}
-
-pub fn decompressStream(
- allocator: Allocator,
- reader: anytype,
-) DecompressStream(@TypeOf(reader), .{}) {
- return DecompressStream(@TypeOf(reader), .{}).init(allocator, reader);
+pub fn decompressor(reader: anytype, options: DecompressorOptions) Decompressor(@TypeOf(reader)) {
+ return Decompressor(@TypeOf(reader)).init(reader, options);
}
fn testDecompress(data: []const u8) ![]u8 {
+ const window_buffer = try std.testing.allocator.alloc(u8, 1 << 23);
+ defer std.testing.allocator.free(window_buffer);
+
var in_stream = std.io.fixedBufferStream(data);
- var zstd_stream = decompressStream(std.testing.allocator, in_stream.reader());
- defer zstd_stream.deinit();
+ var zstd_stream = decompressor(in_stream.reader(), .{ .window_buffer = window_buffer });
const result = zstd_stream.reader().readAllAlloc(std.testing.allocator, std.math.maxInt(usize));
return result;
}
@@ -278,38 +237,48 @@ test "zstandard decompression" {
const res19 = try decompress.decode(buffer, compressed19, true);
try std.testing.expectEqual(uncompressed.len, res19);
try std.testing.expectEqualSlices(u8, uncompressed, buffer);
+}
+
+test "zstandard streaming decompression" {
+ // default stack size for wasm32 is too low for Decompressor - slightly
+ // over 1MiB stack space is needed via the --stack CLI flag
+ if (@import("builtin").target.cpu.arch == .wasm32) return error.SkipZigTest;
+
+ const uncompressed = @embedFile("testdata/rfc8478.txt");
+ const compressed3 = @embedFile("testdata/rfc8478.txt.zst.3");
+ const compressed19 = @embedFile("testdata/rfc8478.txt.zst.19");
try testReader(compressed3, uncompressed);
try testReader(compressed19, uncompressed);
}
fn expectEqualDecoded(expected: []const u8, input: []const u8) !void {
- const allocator = std.testing.allocator;
-
{
- const result = try decompress.decodeAlloc(allocator, input, false, 1 << 23);
- defer allocator.free(result);
+ const result = try decompress.decodeAlloc(std.testing.allocator, input, false, 1 << 23);
+ defer std.testing.allocator.free(result);
try std.testing.expectEqualStrings(expected, result);
}
{
- var buffer = try allocator.alloc(u8, 2 * expected.len);
- defer allocator.free(buffer);
+ var buffer = try std.testing.allocator.alloc(u8, 2 * expected.len);
+ defer std.testing.allocator.free(buffer);
const size = try decompress.decode(buffer, input, false);
try std.testing.expectEqualStrings(expected, buffer[0..size]);
}
+}
- {
- var in_stream = std.io.fixedBufferStream(input);
- var stream = decompressStream(allocator, in_stream.reader());
- defer stream.deinit();
+fn expectEqualDecodedStreaming(expected: []const u8, input: []const u8) !void {
+ const window_buffer = try std.testing.allocator.alloc(u8, 1 << 23);
+ defer std.testing.allocator.free(window_buffer);
- const result = try stream.reader().readAllAlloc(allocator, std.math.maxInt(usize));
- defer allocator.free(result);
+ var in_stream = std.io.fixedBufferStream(input);
+ var stream = decompressor(in_stream.reader(), .{ .window_buffer = window_buffer });
- try std.testing.expectEqualStrings(expected, result);
- }
+ const result = try stream.reader().readAllAlloc(std.testing.allocator, std.math.maxInt(usize));
+ defer std.testing.allocator.free(result);
+
+ try std.testing.expectEqualStrings(expected, result);
}
test "zero sized block" {
@@ -327,3 +296,23 @@ test "zero sized block" {
try expectEqualDecoded("", input_raw);
try expectEqualDecoded("", input_rle);
}
+
+test "zero sized block streaming" {
+ // default stack size for wasm32 is too low for Decompressor - slightly
+ // over 1MiB stack space is needed via the --stack CLI flag
+ if (@import("builtin").target.cpu.arch == .wasm32) return error.SkipZigTest;
+
+ const input_raw =
+ "\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
+ "\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero
+ "\x01\x00\x00"; // block header with: last_block set, block_type raw, block_size zero
+
+ const input_rle =
+ "\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
+ "\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero
+ "\x03\x00\x00" ++ // block header with: last_block set, block_type rle, block_size zero
+ "\xaa"; // block_content
+
+ try expectEqualDecodedStreaming("", input_raw);
+ try expectEqualDecodedStreaming("", input_rle);
+}
diff --git a/lib/std/compress/zstandard/decompress.zig b/lib/std/compress/zstandard/decompress.zig
index a012312ab1..86be16268f 100644
--- a/lib/std/compress/zstandard/decompress.zig
+++ b/lib/std/compress/zstandard/decompress.zig
@@ -409,7 +409,7 @@ pub const FrameContext = struct {
.hasher_opt = if (should_compute_checksum) std.hash.XxHash64.init(0) else null,
.window_size = window_size,
.has_checksum = frame_header.descriptor.content_checksum_flag,
- .block_size_max = @min(1 << 17, window_size),
+ .block_size_max = @min(types.block_size_max, window_size),
.content_size = content_size,
};
}
diff --git a/lib/std/compress/zstandard/types.zig b/lib/std/compress/zstandard/types.zig
index db4fbdee2d..41c3797d16 100644
--- a/lib/std/compress/zstandard/types.zig
+++ b/lib/std/compress/zstandard/types.zig
@@ -1,3 +1,5 @@
+pub const block_size_max = 1 << 17;
+
pub const frame = struct {
pub const Kind = enum { zstandard, skippable };
@@ -391,7 +393,7 @@ pub const compressed_block = struct {
pub const table_size_max = struct {
pub const literal = 1 << table_accuracy_log_max.literal;
pub const match = 1 << table_accuracy_log_max.match;
- pub const offset = 1 << table_accuracy_log_max.match;
+ pub const offset = 1 << table_accuracy_log_max.offset;
};
};
diff --git a/lib/std/crypto/aes.zig b/lib/std/crypto/aes.zig
index f5752888fc..5e5ae04b58 100644
--- a/lib/std/crypto/aes.zig
+++ b/lib/std/crypto/aes.zig
@@ -6,7 +6,7 @@ const has_aesni = std.Target.x86.featureSetHas(builtin.cpu.features, .aes);
const has_avx = std.Target.x86.featureSetHas(builtin.cpu.features, .avx);
const has_armaes = std.Target.aarch64.featureSetHas(builtin.cpu.features, .aes);
// C backend doesn't currently support passing vectors to inline asm.
-const impl = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_c and builtin.zig_backend != .stage2_x86_64 and has_aesni and has_avx) impl: {
+const impl = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_c and has_aesni and has_avx) impl: {
break :impl @import("aes/aesni.zig");
} else if (builtin.cpu.arch == .aarch64 and builtin.zig_backend != .stage2_c and has_armaes)
impl: {
diff --git a/lib/std/crypto/aes_ocb.zig b/lib/std/crypto/aes_ocb.zig
index 879710d848..6cbb2e0867 100644
--- a/lib/std/crypto/aes_ocb.zig
+++ b/lib/std/crypto/aes_ocb.zig
@@ -262,7 +262,6 @@ const hexToBytes = std.fmt.hexToBytes;
test "AesOcb test vector 1" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
var k: [Aes128Ocb.key_length]u8 = undefined;
var nonce: [Aes128Ocb.nonce_length]u8 = undefined;
@@ -282,7 +281,6 @@ test "AesOcb test vector 1" {
test "AesOcb test vector 2" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
var k: [Aes128Ocb.key_length]u8 = undefined;
var nonce: [Aes128Ocb.nonce_length]u8 = undefined;
@@ -304,7 +302,6 @@ test "AesOcb test vector 2" {
test "AesOcb test vector 3" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
var k: [Aes128Ocb.key_length]u8 = undefined;
var nonce: [Aes128Ocb.nonce_length]u8 = undefined;
@@ -329,7 +326,6 @@ test "AesOcb test vector 3" {
test "AesOcb test vector 4" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
var k: [Aes128Ocb.key_length]u8 = undefined;
var nonce: [Aes128Ocb.nonce_length]u8 = undefined;
diff --git a/lib/std/crypto/benchmark.zig b/lib/std/crypto/benchmark.zig
index 17f11382ca..ee08cdf8ab 100644
--- a/lib/std/crypto/benchmark.zig
+++ b/lib/std/crypto/benchmark.zig
@@ -10,7 +10,7 @@ const crypto = std.crypto;
const KiB = 1024;
const MiB = 1024 * KiB;
-var prng = std.rand.DefaultPrng.init(0);
+var prng = std.Random.DefaultPrng.init(0);
const random = prng.random();
const Crypto = struct {
diff --git a/lib/std/crypto/blake3.zig b/lib/std/crypto/blake3.zig
index d87211fb1e..585c338417 100644
--- a/lib/std/crypto/blake3.zig
+++ b/lib/std/crypto/blake3.zig
@@ -200,7 +200,7 @@ const CompressGeneric = struct {
}
};
-const compress = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64)
+const compress = if (builtin.cpu.arch == .x86_64)
CompressVectorized.compress
else
CompressGeneric.compress;
diff --git a/lib/std/crypto/ecdsa.zig b/lib/std/crypto/ecdsa.zig
index 321923525b..7c4df6e35d 100644
--- a/lib/std/crypto/ecdsa.zig
+++ b/lib/std/crypto/ecdsa.zig
@@ -389,7 +389,6 @@ test "ECDSA - Basic operations over EcdsaP384Sha384" {
test "ECDSA - Basic operations over Secp256k1" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
const Scheme = EcdsaSecp256k1Sha256oSha256;
const kp = try Scheme.KeyPair.create(null);
diff --git a/lib/std/crypto/ff.zig b/lib/std/crypto/ff.zig
index 90b1b9f441..b917ce7a43 100644
--- a/lib/std/crypto/ff.zig
+++ b/lib/std/crypto/ff.zig
@@ -907,9 +907,8 @@ const ct_unprotected = struct {
}
};
-test {
+test "finite field arithmetic" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
const M = Modulus(256);
const m = try M.fromPrimitive(u256, 3429938563481314093726330772853735541133072814650493833233);
diff --git a/lib/std/crypto/kyber_d00.zig b/lib/std/crypto/kyber_d00.zig
index ad8e060765..00246bbf40 100644
--- a/lib/std/crypto/kyber_d00.zig
+++ b/lib/std/crypto/kyber_d00.zig
@@ -110,7 +110,7 @@ const assert = std.debug.assert;
const crypto = std.crypto;
const math = std.math;
const mem = std.mem;
-const RndGen = std.rand.DefaultPrng;
+const RndGen = std.Random.DefaultPrng;
const sha3 = crypto.hash.sha3;
// Q is the parameter q ≡ 3329 = 2¹¹ + 2¹⁰ + 2⁸ + 1.
diff --git a/lib/std/crypto/pcurves/p384.zig b/lib/std/crypto/pcurves/p384.zig
index 6585a0adfd..f8c5713209 100644
--- a/lib/std/crypto/pcurves/p384.zig
+++ b/lib/std/crypto/pcurves/p384.zig
@@ -479,7 +479,6 @@ pub const AffineCoordinates = struct {
test {
if (@import("builtin").zig_backend == .stage2_c) return error.SkipZigTest;
- if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest;
_ = @import("tests/p384.zig");
}
diff --git a/lib/std/crypto/pcurves/secp256k1.zig b/lib/std/crypto/pcurves/secp256k1.zig
index a623e25de4..c2d9e37dfe 100644
--- a/lib/std/crypto/pcurves/secp256k1.zig
+++ b/lib/std/crypto/pcurves/secp256k1.zig
@@ -557,7 +557,6 @@ pub const AffineCoordinates = struct {
test {
if (@import("builtin").zig_backend == .stage2_c) return error.SkipZigTest;
- if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest;
_ = @import("tests/secp256k1.zig");
}
diff --git a/lib/std/crypto/salsa20.zig b/lib/std/crypto/salsa20.zig
index 7f4c1b0157..c791c6b773 100644
--- a/lib/std/crypto/salsa20.zig
+++ b/lib/std/crypto/salsa20.zig
@@ -302,7 +302,10 @@ fn SalsaNonVecImpl(comptime rounds: comptime_int) type {
};
}
-const SalsaImpl = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64) SalsaVecImpl else SalsaNonVecImpl;
+const SalsaImpl = if (builtin.cpu.arch == .x86_64)
+ SalsaVecImpl
+else
+ SalsaNonVecImpl;
fn keyToWords(key: [32]u8) [8]u32 {
var k: [8]u32 = undefined;
diff --git a/lib/std/crypto/sha2.zig b/lib/std/crypto/sha2.zig
index 10909cfaec..31884c7381 100644
--- a/lib/std/crypto/sha2.zig
+++ b/lib/std/crypto/sha2.zig
@@ -238,7 +238,7 @@ fn Sha2x32(comptime params: Sha2Params32) type {
return;
},
// C backend doesn't currently support passing vectors to inline asm.
- .x86_64 => if (builtin.zig_backend != .stage2_c and builtin.zig_backend != .stage2_x86_64 and comptime std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sha, .avx2 })) {
+ .x86_64 => if (builtin.zig_backend != .stage2_c and comptime std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sha, .avx2 })) {
var x: v4u32 = [_]u32{ d.s[5], d.s[4], d.s[1], d.s[0] };
var y: v4u32 = [_]u32{ d.s[7], d.s[6], d.s[3], d.s[2] };
const s_v = @as(*[16]v4u32, @ptrCast(&s));
diff --git a/lib/std/crypto/tlcsprng.zig b/lib/std/crypto/tlcsprng.zig
index bbca32024e..973a0f8bb4 100644
--- a/lib/std/crypto/tlcsprng.zig
+++ b/lib/std/crypto/tlcsprng.zig
@@ -10,7 +10,7 @@ const os = std.os;
/// We use this as a layer of indirection because global const pointers cannot
/// point to thread-local variables.
-pub const interface = std.rand.Random{
+pub const interface = std.Random{
.ptr = undefined,
.fillFn = tlsCsprngFill,
};
@@ -43,7 +43,7 @@ const maybe_have_wipe_on_fork = builtin.os.isAtLeast(.linux, .{
}) orelse true;
const is_haiku = builtin.os.tag == .haiku;
-const Rng = std.rand.DefaultCsprng;
+const Rng = std.Random.DefaultCsprng;
const Context = struct {
init_state: enum(u8) { uninitialized = 0, initialized, failed },
diff --git a/lib/std/debug.zig b/lib/std/debug.zig
index 7f44cfa770..97a0c6912b 100644
--- a/lib/std/debug.zig
+++ b/lib/std/debug.zig
@@ -688,7 +688,7 @@ pub const StackIterator = struct {
}
return true;
- } else if (@hasDecl(os.system, "msync") and native_os != .wasi) {
+ } else if (@hasDecl(os.system, "msync") and native_os != .wasi and native_os != .emscripten) {
os.msync(aligned_memory, os.MSF.ASYNC) catch |err| {
switch (err) {
os.MSyncError.UnmappedMemory => {
@@ -1141,8 +1141,8 @@ pub fn readElfDebugInfo(
) !ModuleDebugInfo {
nosuspend {
const elf_file = (if (elf_filename) |filename| blk: {
- break :blk fs.cwd().openFile(filename, .{ .intended_io_mode = .blocking });
- } else fs.openSelfExe(.{ .intended_io_mode = .blocking })) catch |err| switch (err) {
+ break :blk fs.cwd().openFile(filename, .{});
+ } else fs.openSelfExe(.{})) catch |err| switch (err) {
error.FileNotFound => return error.MissingDebugInfo,
else => return err,
};
@@ -1212,8 +1212,7 @@ pub fn readElfDebugInfo(
const chdr = section_reader.readStruct(elf.Chdr) catch continue;
if (chdr.ch_type != .ZLIB) continue;
- var zlib_stream = std.compress.zlib.decompressStream(allocator, section_stream.reader()) catch continue;
- defer zlib_stream.deinit();
+ var zlib_stream = std.compress.zlib.decompressor(section_stream.reader());
const decompressed_section = try allocator.alloc(u8, chdr.ch_size);
errdefer allocator.free(decompressed_section);
@@ -1452,7 +1451,7 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn
fn printLineFromFileAnyOs(out_stream: anytype, line_info: LineInfo) !void {
// Need this to always block even in async I/O mode, because this could potentially
// be called from e.g. the event loop code crashing.
- var f = try fs.cwd().openFile(line_info.file_name, .{ .intended_io_mode = .blocking });
+ var f = try fs.cwd().openFile(line_info.file_name, .{});
defer f.close();
// TODO fstat and make sure that the file has the correct size
@@ -1640,7 +1639,6 @@ const MachoSymbol = struct {
}
};
-/// `file` is expected to have been opened with .intended_io_mode == .blocking.
/// Takes ownership of file, even on error.
/// TODO it's weird to take ownership even on error, rework this code.
fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 {
@@ -1824,9 +1822,7 @@ pub const DebugInfo = struct {
errdefer self.allocator.destroy(obj_di);
const macho_path = mem.sliceTo(std.c._dyld_get_image_name(i), 0);
- const macho_file = fs.cwd().openFile(macho_path, .{
- .intended_io_mode = .blocking,
- }) catch |err| switch (err) {
+ const macho_file = fs.cwd().openFile(macho_path, .{}) catch |err| switch (err) {
error.FileNotFound => return error.MissingDebugInfo,
else => return err,
};
@@ -2162,7 +2158,7 @@ pub const ModuleDebugInfo = switch (native_os) {
}
fn loadOFile(self: *@This(), allocator: mem.Allocator, o_file_path: []const u8) !*OFileInfo {
- const o_file = try fs.cwd().openFile(o_file_path, .{ .intended_io_mode = .blocking });
+ const o_file = try fs.cwd().openFile(o_file_path, .{});
const mapped_mem = try mapWholeFile(o_file);
const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr));
@@ -2448,7 +2444,7 @@ pub const ModuleDebugInfo = switch (native_os) {
return &self.dwarf;
}
},
- .wasi => struct {
+ .wasi, .emscripten => struct {
pub fn deinit(self: *@This(), allocator: mem.Allocator) void {
_ = self;
_ = allocator;
diff --git a/lib/std/dwarf.zig b/lib/std/dwarf.zig
index 97864c03ff..2544f35c42 100644
--- a/lib/std/dwarf.zig
+++ b/lib/std/dwarf.zig
@@ -1,12 +1,9 @@
const builtin = @import("builtin");
const std = @import("std.zig");
const debug = std.debug;
-const fs = std.fs;
-const io = std.io;
const mem = std.mem;
const math = std.math;
-const leb = @import("leb128.zig");
-const assert = std.debug.assert;
+const assert = debug.assert;
const native_endian = builtin.cpu.arch.endian();
pub const TAG = @import("dwarf/TAG.zig");
@@ -167,8 +164,8 @@ const Func = struct {
pub const CompileUnit = struct {
version: u16,
- is_64: bool,
- die: *Die,
+ format: Format,
+ die: Die,
pc_range: ?PcRange,
str_offsets_base: usize,
@@ -178,101 +175,88 @@ pub const CompileUnit = struct {
frame_base: ?*const FormValue,
};
-const AbbrevTable = std.ArrayList(AbbrevTableEntry);
-
-const AbbrevTableHeader = struct {
- // offset from .debug_abbrev
- offset: u64,
- table: AbbrevTable,
-
- fn deinit(header: *AbbrevTableHeader) void {
- for (header.table.items) |*entry| {
- entry.deinit();
- }
- header.table.deinit();
- }
-};
-
-const AbbrevTableEntry = struct {
- has_children: bool,
- abbrev_code: u64,
+const Abbrev = struct {
+ code: u64,
tag_id: u64,
- attrs: std.ArrayList(AbbrevAttr),
+ has_children: bool,
+ attrs: []Attr,
- fn deinit(entry: *AbbrevTableEntry) void {
- entry.attrs.deinit();
+ fn deinit(abbrev: *Abbrev, allocator: mem.Allocator) void {
+ allocator.free(abbrev.attrs);
+ abbrev.* = undefined;
}
-};
-const AbbrevAttr = struct {
- attr_id: u64,
- form_id: u64,
- /// Only valid if form_id is .implicit_const
- payload: i64,
-};
+ const Attr = struct {
+ id: u64,
+ form_id: u64,
+ /// Only valid if form_id is .implicit_const
+ payload: i64,
+ };
-pub const FormValue = union(enum) {
- Address: u64,
- AddrOffset: usize,
- Block: []u8,
- Const: Constant,
- ExprLoc: []u8,
- Flag: bool,
- SecOffset: u64,
- Ref: u64,
- RefAddr: u64,
- String: []const u8,
- StrPtr: u64,
- StrOffset: usize,
- LineStrPtr: u64,
- LocListOffset: u64,
- RangeListOffset: u64,
- data16: [16]u8,
-
- fn getString(fv: FormValue, di: DwarfInfo) ![]const u8 {
- switch (fv) {
- .String => |s| return s,
- .StrPtr => |off| return di.getString(off),
- .LineStrPtr => |off| return di.getLineString(off),
- else => return badDwarf(),
+ const Table = struct {
+ // offset from .debug_abbrev
+ offset: u64,
+ abbrevs: []Abbrev,
+
+ fn deinit(table: *Table, allocator: mem.Allocator) void {
+ for (table.abbrevs) |*abbrev| {
+ abbrev.deinit(allocator);
+ }
+ allocator.free(table.abbrevs);
+ table.* = undefined;
}
- }
- fn getUInt(fv: FormValue, comptime U: type) !U {
- switch (fv) {
- .Const => |c| {
- const int = try c.asUnsignedLe();
- return math.cast(U, int) orelse return badDwarf();
- },
- .SecOffset => |x| return math.cast(U, x) orelse return badDwarf(),
- else => return badDwarf(),
+ fn get(table: *const Table, abbrev_code: u64) ?*const Abbrev {
+ return for (table.abbrevs) |*abbrev| {
+ if (abbrev.code == abbrev_code) break abbrev;
+ } else null;
}
- }
+ };
+};
- fn getData16(fv: FormValue) ![16]u8 {
+pub const FormValue = union(enum) {
+ addr: u64,
+ addrx: usize,
+ block: []const u8,
+ udata: u64,
+ data16: *const [16]u8,
+ sdata: i64,
+ exprloc: []const u8,
+ flag: bool,
+ sec_offset: u64,
+ ref: u64,
+ ref_addr: u64,
+ string: [:0]const u8,
+ strp: u64,
+ strx: usize,
+ line_strp: u64,
+ loclistx: u64,
+ rnglistx: u64,
+
+ fn getString(fv: FormValue, di: DwarfInfo) ![:0]const u8 {
switch (fv) {
- .data16 => |d| return d,
+ .string => |s| return s,
+ .strp => |off| return di.getString(off),
+ .line_strp => |off| return di.getLineString(off),
else => return badDwarf(),
}
}
-};
-
-const Constant = struct {
- payload: u64,
- signed: bool,
- fn asUnsignedLe(self: Constant) !u64 {
- if (self.signed) return badDwarf();
- return self.payload;
+ fn getUInt(fv: FormValue, comptime U: type) !U {
+ return switch (fv) {
+ inline .udata,
+ .sdata,
+ .sec_offset,
+ => |c| math.cast(U, c) orelse badDwarf(),
+ else => badDwarf(),
+ };
}
};
const Die = struct {
- // Arena for Die's Attr's and FormValue's.
- arena: std.heap.ArenaAllocator,
tag_id: u64,
has_children: bool,
- attrs: std.ArrayListUnmanaged(Attr) = .{},
+ attrs: []Attr,
const Attr = struct {
id: u64,
@@ -280,12 +264,12 @@ const Die = struct {
};
fn deinit(self: *Die, allocator: mem.Allocator) void {
- self.arena.deinit();
- self.attrs.deinit(allocator);
+ allocator.free(self.attrs);
+ self.* = undefined;
}
fn getAttr(self: *const Die, id: u64) ?*const FormValue {
- for (self.attrs.items) |*attr| {
+ for (self.attrs) |*attr| {
if (attr.id == id) return &attr.value;
}
return null;
@@ -299,8 +283,8 @@ const Die = struct {
) error{ InvalidDebugInfo, MissingDebugInfo }!u64 {
const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
return switch (form_value.*) {
- FormValue.Address => |value| value,
- FormValue.AddrOffset => |index| di.readDebugAddr(compile_unit, index),
+ .addr => |value| value,
+ .addrx => |index| di.readDebugAddr(compile_unit, index),
else => error.InvalidDebugInfo,
};
}
@@ -313,7 +297,7 @@ const Die = struct {
fn getAttrUnsignedLe(self: *const Die, id: u64) !u64 {
const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
return switch (form_value.*) {
- FormValue.Const => |value| value.asUnsignedLe(),
+ .Const => |value| value.asUnsignedLe(),
else => error.InvalidDebugInfo,
};
}
@@ -321,7 +305,7 @@ const Die = struct {
fn getAttrRef(self: *const Die, id: u64) !u64 {
const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
return switch (form_value.*) {
- FormValue.Ref => |value| value,
+ .ref => |value| value,
else => error.InvalidDebugInfo,
};
}
@@ -335,24 +319,27 @@ const Die = struct {
) error{ InvalidDebugInfo, MissingDebugInfo }![]const u8 {
const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
switch (form_value.*) {
- FormValue.String => |value| return value,
- FormValue.StrPtr => |offset| return di.getString(offset),
- FormValue.StrOffset => |index| {
+ .string => |value| return value,
+ .strp => |offset| return di.getString(offset),
+ .strx => |index| {
const debug_str_offsets = di.section(.debug_str_offsets) orelse return badDwarf();
if (compile_unit.str_offsets_base == 0) return badDwarf();
- if (compile_unit.is_64) {
- const byte_offset = compile_unit.str_offsets_base + 8 * index;
- if (byte_offset + 8 > debug_str_offsets.len) return badDwarf();
- const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian);
- return getStringGeneric(opt_str, offset);
- } else {
- const byte_offset = compile_unit.str_offsets_base + 4 * index;
- if (byte_offset + 4 > debug_str_offsets.len) return badDwarf();
- const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian);
- return getStringGeneric(opt_str, offset);
+ switch (compile_unit.format) {
+ .@"32" => {
+ const byte_offset = compile_unit.str_offsets_base + 4 * index;
+ if (byte_offset + 4 > debug_str_offsets.len) return badDwarf();
+ const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian);
+ return getStringGeneric(opt_str, offset);
+ },
+ .@"64" => {
+ const byte_offset = compile_unit.str_offsets_base + 8 * index;
+ if (byte_offset + 8 > debug_str_offsets.len) return badDwarf();
+ const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian);
+ return getStringGeneric(opt_str, offset);
+ },
}
},
- FormValue.LineStrPtr => |offset| return di.getLineString(offset),
+ .line_strp => |offset| return di.getLineString(offset),
else => return badDwarf(),
}
}
@@ -458,7 +445,7 @@ const LineNumberProgram = struct {
if (file_entry.dir_index >= self.include_dirs.len) return badDwarf();
const dir_name = self.include_dirs[file_entry.dir_index].path;
- const file_name = try fs.path.join(allocator, &[_][]const u8{
+ const file_name = try std.fs.path.join(allocator, &[_][]const u8{
dir_name, file_entry.path,
});
@@ -481,168 +468,97 @@ const LineNumberProgram = struct {
}
};
-fn readUnitLength(in_stream: anytype, endian: std.builtin.Endian, is_64: *bool) !u64 {
- const first_32_bits = try in_stream.readInt(u32, endian);
- is_64.* = (first_32_bits == 0xffffffff);
- if (is_64.*) {
- return in_stream.readInt(u64, endian);
- } else {
- if (first_32_bits >= 0xfffffff0) return badDwarf();
- // TODO this cast should not be needed
- return @as(u64, first_32_bits);
- }
-}
-
-// TODO the nosuspends here are workarounds
-fn readAllocBytes(allocator: mem.Allocator, in_stream: anytype, size: usize) ![]u8 {
- const buf = try allocator.alloc(u8, size);
- errdefer allocator.free(buf);
- if ((try nosuspend in_stream.read(buf)) < size) return error.EndOfFile;
- return buf;
-}
-
-// TODO the nosuspends here are workarounds
-fn readAddress(in_stream: anytype, endian: std.builtin.Endian, is_64: bool) !u64 {
- return nosuspend if (is_64)
- try in_stream.readInt(u64, endian)
- else
- @as(u64, try in_stream.readInt(u32, endian));
-}
-
-fn parseFormValueBlockLen(allocator: mem.Allocator, in_stream: anytype, size: usize) !FormValue {
- const buf = try readAllocBytes(allocator, in_stream, size);
- return FormValue{ .Block = buf };
-}
-
-// TODO the nosuspends here are workarounds
-fn parseFormValueBlock(allocator: mem.Allocator, in_stream: anytype, endian: std.builtin.Endian, size: usize) !FormValue {
- const block_len = try nosuspend in_stream.readVarInt(usize, endian, size);
- return parseFormValueBlockLen(allocator, in_stream, block_len);
-}
-
-fn parseFormValueConstant(in_stream: anytype, signed: bool, endian: std.builtin.Endian, comptime size: i32) !FormValue {
- // TODO: Please forgive me, I've worked around zig not properly spilling some intermediate values here.
- // `nosuspend` should be removed from all the function calls once it is fixed.
- return FormValue{
- .Const = Constant{
- .signed = signed,
- .payload = switch (size) {
- 1 => try nosuspend in_stream.readInt(u8, endian),
- 2 => try nosuspend in_stream.readInt(u16, endian),
- 4 => try nosuspend in_stream.readInt(u32, endian),
- 8 => try nosuspend in_stream.readInt(u64, endian),
- -1 => blk: {
- if (signed) {
- const x = try nosuspend leb.readILEB128(i64, in_stream);
- break :blk @as(u64, @bitCast(x));
- } else {
- const x = try nosuspend leb.readULEB128(u64, in_stream);
- break :blk x;
- }
- },
- else => @compileError("Invalid size"),
- },
+const UnitHeader = struct {
+ format: Format,
+ header_length: u4,
+ unit_length: u64,
+};
+fn readUnitHeader(fbr: *FixedBufferReader) !UnitHeader {
+ return switch (try fbr.readInt(u32)) {
+ 0...0xfffffff0 - 1 => |unit_length| .{
+ .format = .@"32",
+ .header_length = 4,
+ .unit_length = unit_length,
},
- };
-}
-
-// TODO the nosuspends here are workarounds
-fn parseFormValueRef(in_stream: anytype, endian: std.builtin.Endian, size: i32) !FormValue {
- return FormValue{
- .Ref = switch (size) {
- 1 => try nosuspend in_stream.readInt(u8, endian),
- 2 => try nosuspend in_stream.readInt(u16, endian),
- 4 => try nosuspend in_stream.readInt(u32, endian),
- 8 => try nosuspend in_stream.readInt(u64, endian),
- -1 => try nosuspend leb.readULEB128(u64, in_stream),
- else => unreachable,
+ 0xfffffff0...0xffffffff - 1 => badDwarf(),
+ 0xffffffff => .{
+ .format = .@"64",
+ .header_length = 12,
+ .unit_length = try fbr.readInt(u64),
},
};
}
-// TODO the nosuspends here are workarounds
-fn parseFormValue(allocator: mem.Allocator, in_stream: anytype, form_id: u64, endian: std.builtin.Endian, is_64: bool) anyerror!FormValue {
+fn parseFormValue(
+ fbr: *FixedBufferReader,
+ form_id: u64,
+ format: Format,
+ implicit_const: ?i64,
+) anyerror!FormValue {
return switch (form_id) {
- FORM.addr => FormValue{ .Address = try readAddress(in_stream, endian, @sizeOf(usize) == 8) },
- FORM.addrx1 => return FormValue{ .AddrOffset = try in_stream.readInt(u8, endian) },
- FORM.addrx2 => return FormValue{ .AddrOffset = try in_stream.readInt(u16, endian) },
- FORM.addrx3 => return FormValue{ .AddrOffset = try in_stream.readInt(u24, endian) },
- FORM.addrx4 => return FormValue{ .AddrOffset = try in_stream.readInt(u32, endian) },
- FORM.addrx => return FormValue{ .AddrOffset = try nosuspend leb.readULEB128(usize, in_stream) },
-
- FORM.block1 => parseFormValueBlock(allocator, in_stream, endian, 1),
- FORM.block2 => parseFormValueBlock(allocator, in_stream, endian, 2),
- FORM.block4 => parseFormValueBlock(allocator, in_stream, endian, 4),
- FORM.block => {
- const block_len = try nosuspend leb.readULEB128(usize, in_stream);
- return parseFormValueBlockLen(allocator, in_stream, block_len);
- },
- FORM.data1 => parseFormValueConstant(in_stream, false, endian, 1),
- FORM.data2 => parseFormValueConstant(in_stream, false, endian, 2),
- FORM.data4 => parseFormValueConstant(in_stream, false, endian, 4),
- FORM.data8 => parseFormValueConstant(in_stream, false, endian, 8),
- FORM.data16 => {
- var buf: [16]u8 = undefined;
- if ((try nosuspend in_stream.readAll(&buf)) < 16) return error.EndOfFile;
- return FormValue{ .data16 = buf };
- },
- FORM.udata, FORM.sdata => {
- const signed = form_id == FORM.sdata;
- return parseFormValueConstant(in_stream, signed, endian, -1);
- },
- FORM.exprloc => {
- const size = try nosuspend leb.readULEB128(usize, in_stream);
- const buf = try readAllocBytes(allocator, in_stream, size);
- return FormValue{ .ExprLoc = buf };
- },
- FORM.flag => FormValue{ .Flag = (try nosuspend in_stream.readByte()) != 0 },
- FORM.flag_present => FormValue{ .Flag = true },
- FORM.sec_offset => FormValue{ .SecOffset = try readAddress(in_stream, endian, is_64) },
-
- FORM.ref1 => parseFormValueRef(in_stream, endian, 1),
- FORM.ref2 => parseFormValueRef(in_stream, endian, 2),
- FORM.ref4 => parseFormValueRef(in_stream, endian, 4),
- FORM.ref8 => parseFormValueRef(in_stream, endian, 8),
- FORM.ref_udata => parseFormValueRef(in_stream, endian, -1),
-
- FORM.ref_addr => FormValue{ .RefAddr = try readAddress(in_stream, endian, is_64) },
- FORM.ref_sig8 => FormValue{ .Ref = try nosuspend in_stream.readInt(u64, endian) },
-
- FORM.string => FormValue{ .String = try in_stream.readUntilDelimiterAlloc(allocator, 0, math.maxInt(usize)) },
- FORM.strp => FormValue{ .StrPtr = try readAddress(in_stream, endian, is_64) },
- FORM.strx1 => return FormValue{ .StrOffset = try in_stream.readInt(u8, endian) },
- FORM.strx2 => return FormValue{ .StrOffset = try in_stream.readInt(u16, endian) },
- FORM.strx3 => return FormValue{ .StrOffset = try in_stream.readInt(u24, endian) },
- FORM.strx4 => return FormValue{ .StrOffset = try in_stream.readInt(u32, endian) },
- FORM.strx => return FormValue{ .StrOffset = try nosuspend leb.readULEB128(usize, in_stream) },
- FORM.line_strp => FormValue{ .LineStrPtr = try readAddress(in_stream, endian, is_64) },
- FORM.indirect => {
- const child_form_id = try nosuspend leb.readULEB128(u64, in_stream);
- if (true) {
- return parseFormValue(allocator, in_stream, child_form_id, endian, is_64);
- }
- const F = @TypeOf(async parseFormValue(allocator, in_stream, child_form_id, endian, is_64));
- const frame = try allocator.create(F);
- defer allocator.destroy(frame);
- return await @asyncCall(frame, {}, parseFormValue, .{ allocator, in_stream, child_form_id, endian, is_64 });
- },
- FORM.implicit_const => FormValue{ .Const = Constant{ .signed = true, .payload = undefined } },
- FORM.loclistx => return FormValue{ .LocListOffset = try nosuspend leb.readULEB128(u64, in_stream) },
- FORM.rnglistx => return FormValue{ .RangeListOffset = try nosuspend leb.readULEB128(u64, in_stream) },
+ FORM.addr => .{ .addr = try fbr.readAddress(switch (@bitSizeOf(usize)) {
+ 32 => .@"32",
+ 64 => .@"64",
+ else => @compileError("unsupported @sizeOf(usize)"),
+ }) },
+ FORM.addrx1 => .{ .addrx = try fbr.readInt(u8) },
+ FORM.addrx2 => .{ .addrx = try fbr.readInt(u16) },
+ FORM.addrx3 => .{ .addrx = try fbr.readInt(u24) },
+ FORM.addrx4 => .{ .addrx = try fbr.readInt(u32) },
+ FORM.addrx => .{ .addrx = try fbr.readUleb128(usize) },
+
+ FORM.block1,
+ FORM.block2,
+ FORM.block4,
+ FORM.block,
+ => .{ .block = try fbr.readBytes(switch (form_id) {
+ FORM.block1 => try fbr.readInt(u8),
+ FORM.block2 => try fbr.readInt(u16),
+ FORM.block4 => try fbr.readInt(u32),
+ FORM.block => try fbr.readUleb128(usize),
+ else => unreachable,
+ }) },
+
+ FORM.data1 => .{ .udata = try fbr.readInt(u8) },
+ FORM.data2 => .{ .udata = try fbr.readInt(u16) },
+ FORM.data4 => .{ .udata = try fbr.readInt(u32) },
+ FORM.data8 => .{ .udata = try fbr.readInt(u64) },
+ FORM.data16 => .{ .data16 = (try fbr.readBytes(16))[0..16] },
+ FORM.udata => .{ .udata = try fbr.readUleb128(u64) },
+ FORM.sdata => .{ .sdata = try fbr.readIleb128(i64) },
+ FORM.exprloc => .{ .exprloc = try fbr.readBytes(try fbr.readUleb128(usize)) },
+ FORM.flag => .{ .flag = (try fbr.readByte()) != 0 },
+ FORM.flag_present => .{ .flag = true },
+ FORM.sec_offset => .{ .sec_offset = try fbr.readAddress(format) },
+
+ FORM.ref1 => .{ .ref = try fbr.readInt(u8) },
+ FORM.ref2 => .{ .ref = try fbr.readInt(u16) },
+ FORM.ref4 => .{ .ref = try fbr.readInt(u32) },
+ FORM.ref8 => .{ .ref = try fbr.readInt(u64) },
+ FORM.ref_udata => .{ .ref = try fbr.readUleb128(u64) },
+
+ FORM.ref_addr => .{ .ref_addr = try fbr.readAddress(format) },
+ FORM.ref_sig8 => .{ .ref = try fbr.readInt(u64) },
+
+ FORM.string => .{ .string = try fbr.readBytesTo(0) },
+ FORM.strp => .{ .strp = try fbr.readAddress(format) },
+ FORM.strx1 => .{ .strx = try fbr.readInt(u8) },
+ FORM.strx2 => .{ .strx = try fbr.readInt(u16) },
+ FORM.strx3 => .{ .strx = try fbr.readInt(u24) },
+ FORM.strx4 => .{ .strx = try fbr.readInt(u32) },
+ FORM.strx => .{ .strx = try fbr.readUleb128(usize) },
+ FORM.line_strp => .{ .line_strp = try fbr.readAddress(format) },
+ FORM.indirect => parseFormValue(fbr, try fbr.readUleb128(u64), format, implicit_const),
+ FORM.implicit_const => .{ .sdata = implicit_const orelse return badDwarf() },
+ FORM.loclistx => .{ .loclistx = try fbr.readUleb128(u64) },
+ FORM.rnglistx => .{ .rnglistx = try fbr.readUleb128(u64) },
else => {
- //std.debug.print("unrecognized form id: {x}\n", .{form_id});
+ //debug.print("unrecognized form id: {x}\n", .{form_id});
return badDwarf();
},
};
}
-fn getAbbrevTableEntry(abbrev_table: *const AbbrevTable, abbrev_code: u64) ?*const AbbrevTableEntry {
- for (abbrev_table.items) |*table_entry| {
- if (table_entry.abbrev_code == abbrev_code) return table_entry;
- }
- return null;
-}
-
pub const DwarfSection = enum {
debug_info,
debug_abbrev,
@@ -690,7 +606,7 @@ pub const DwarfInfo = struct {
is_macho: bool,
// Filled later by the initializer
- abbrev_table_list: std.ArrayListUnmanaged(AbbrevTableHeader) = .{},
+ abbrev_table_list: std.ArrayListUnmanaged(Abbrev.Table) = .{},
compile_unit_list: std.ArrayListUnmanaged(CompileUnit) = .{},
func_list: std.ArrayListUnmanaged(Func) = .{},
@@ -713,17 +629,17 @@ pub const DwarfInfo = struct {
if (opt_section) |s| if (s.owned) allocator.free(s.data);
}
for (di.abbrev_table_list.items) |*abbrev| {
- abbrev.deinit();
+ abbrev.deinit(allocator);
}
di.abbrev_table_list.deinit(allocator);
for (di.compile_unit_list.items) |*cu| {
cu.die.deinit(allocator);
- allocator.destroy(cu.die);
}
di.compile_unit_list.deinit(allocator);
di.func_list.deinit(allocator);
di.cie_map.deinit(allocator);
di.fde_list.deinit(allocator);
+ di.* = undefined;
}
pub fn getSymbolName(di: *DwarfInfo, address: u64) ?[]const u8 {
@@ -739,102 +655,125 @@ pub const DwarfInfo = struct {
}
fn scanAllFunctions(di: *DwarfInfo, allocator: mem.Allocator) !void {
- var stream = io.fixedBufferStream(di.section(.debug_info).?);
- const in = stream.reader();
- const seekable = stream.seekableStream();
+ var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian };
var this_unit_offset: u64 = 0;
- var tmp_arena = std.heap.ArenaAllocator.init(allocator);
- defer tmp_arena.deinit();
- const arena = tmp_arena.allocator();
+ while (this_unit_offset < fbr.buf.len) {
+ try fbr.seekTo(this_unit_offset);
- while (this_unit_offset < try seekable.getEndPos()) {
- try seekable.seekTo(this_unit_offset);
+ const unit_header = try readUnitHeader(&fbr);
+ if (unit_header.unit_length == 0) return;
+ const next_offset = unit_header.header_length + unit_header.unit_length;
- var is_64: bool = undefined;
- const unit_length = try readUnitLength(in, di.endian, &is_64);
- if (unit_length == 0) return;
- const next_offset = unit_length + (if (is_64) @as(usize, 12) else @as(usize, 4));
-
- const version = try in.readInt(u16, di.endian);
+ const version = try fbr.readInt(u16);
if (version < 2 or version > 5) return badDwarf();
var address_size: u8 = undefined;
var debug_abbrev_offset: u64 = undefined;
if (version >= 5) {
- const unit_type = try in.readInt(u8, di.endian);
+ const unit_type = try fbr.readInt(u8);
if (unit_type != UT.compile) return badDwarf();
- address_size = try in.readByte();
- debug_abbrev_offset = if (is_64)
- try in.readInt(u64, di.endian)
- else
- try in.readInt(u32, di.endian);
+ address_size = try fbr.readByte();
+ debug_abbrev_offset = try fbr.readAddress(unit_header.format);
} else {
- debug_abbrev_offset = if (is_64)
- try in.readInt(u64, di.endian)
- else
- try in.readInt(u32, di.endian);
- address_size = try in.readByte();
+ debug_abbrev_offset = try fbr.readAddress(unit_header.format);
+ address_size = try fbr.readByte();
}
if (address_size != @sizeOf(usize)) return badDwarf();
- const compile_unit_pos = try seekable.getPos();
const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset);
- try seekable.seekTo(compile_unit_pos);
+ var max_attrs: usize = 0;
+ var zig_padding_abbrev_code: u7 = 0;
+ for (abbrev_table.abbrevs) |abbrev| {
+ max_attrs = @max(max_attrs, abbrev.attrs.len);
+ if (math.cast(u7, abbrev.code)) |code| {
+ if (abbrev.tag_id == TAG.ZIG_padding and
+ !abbrev.has_children and
+ abbrev.attrs.len == 0)
+ {
+ zig_padding_abbrev_code = code;
+ }
+ }
+ }
+ const attrs_buf = try allocator.alloc(Die.Attr, max_attrs * 3);
+ defer allocator.free(attrs_buf);
+ var attrs_bufs: [3][]Die.Attr = undefined;
+ for (&attrs_bufs, 0..) |*buf, index| buf.* = attrs_buf[index * max_attrs ..][0..max_attrs];
const next_unit_pos = this_unit_offset + next_offset;
- var compile_unit: CompileUnit = undefined;
+ var compile_unit: CompileUnit = .{
+ .version = version,
+ .format = unit_header.format,
+ .die = undefined,
+ .pc_range = null,
- while ((try seekable.getPos()) < next_unit_pos) {
- var die_obj = (try di.parseDie(arena, in, abbrev_table, is_64)) orelse continue;
- const after_die_offset = try seekable.getPos();
+ .str_offsets_base = 0,
+ .addr_base = 0,
+ .rnglists_base = 0,
+ .loclists_base = 0,
+ .frame_base = null,
+ };
+
+ while (true) {
+ fbr.pos = mem.indexOfNonePos(u8, fbr.buf, fbr.pos, &.{
+ zig_padding_abbrev_code, 0,
+ }) orelse fbr.buf.len;
+ if (fbr.pos >= next_unit_pos) break;
+ var die_obj = (try parseDie(
+ &fbr,
+ attrs_bufs[0],
+ abbrev_table,
+ unit_header.format,
+ )) orelse continue;
switch (die_obj.tag_id) {
TAG.compile_unit => {
- compile_unit = .{
- .version = version,
- .is_64 = is_64,
- .die = &die_obj,
- .pc_range = null,
-
- .str_offsets_base = if (die_obj.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0,
- .addr_base = if (die_obj.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0,
- .rnglists_base = if (die_obj.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0,
- .loclists_base = if (die_obj.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0,
- .frame_base = die_obj.getAttr(AT.frame_base),
- };
+ compile_unit.die = die_obj;
+ compile_unit.die.attrs = attrs_bufs[1][0..die_obj.attrs.len];
+ @memcpy(compile_unit.die.attrs, die_obj.attrs);
+
+ compile_unit.str_offsets_base = if (die_obj.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0;
+ compile_unit.addr_base = if (die_obj.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0;
+ compile_unit.rnglists_base = if (die_obj.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0;
+ compile_unit.loclists_base = if (die_obj.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0;
+ compile_unit.frame_base = die_obj.getAttr(AT.frame_base);
},
TAG.subprogram, TAG.inlined_subroutine, TAG.subroutine, TAG.entry_point => {
const fn_name = x: {
- var depth: i32 = 3;
var this_die_obj = die_obj;
// Prevent endless loops
- while (depth > 0) : (depth -= 1) {
+ for (0..3) |_| {
if (this_die_obj.getAttr(AT.name)) |_| {
break :x try this_die_obj.getAttrString(di, AT.name, di.section(.debug_str), compile_unit);
} else if (this_die_obj.getAttr(AT.abstract_origin)) |_| {
+ const after_die_offset = fbr.pos;
+ defer fbr.pos = after_die_offset;
+
// Follow the DIE it points to and repeat
const ref_offset = try this_die_obj.getAttrRef(AT.abstract_origin);
if (ref_offset > next_offset) return badDwarf();
- try seekable.seekTo(this_unit_offset + ref_offset);
- this_die_obj = (try di.parseDie(
- arena,
- in,
+ try fbr.seekTo(this_unit_offset + ref_offset);
+ this_die_obj = (try parseDie(
+ &fbr,
+ attrs_bufs[2],
abbrev_table,
- is_64,
+ unit_header.format,
)) orelse return badDwarf();
} else if (this_die_obj.getAttr(AT.specification)) |_| {
+ const after_die_offset = fbr.pos;
+ defer fbr.pos = after_die_offset;
+
// Follow the DIE it points to and repeat
const ref_offset = try this_die_obj.getAttrRef(AT.specification);
if (ref_offset > next_offset) return badDwarf();
- try seekable.seekTo(this_unit_offset + ref_offset);
- this_die_obj = (try di.parseDie(
- arena,
- in,
+ try fbr.seekTo(this_unit_offset + ref_offset);
+ this_die_obj = (try parseDie(
+ &fbr,
+ attrs_bufs[2],
abbrev_table,
- is_64,
+ unit_header.format,
)) orelse return badDwarf();
} else {
break :x null;
@@ -847,15 +786,12 @@ pub const DwarfInfo = struct {
var range_added = if (die_obj.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| blk: {
if (die_obj.getAttr(AT.high_pc)) |high_pc_value| {
const pc_end = switch (high_pc_value.*) {
- FormValue.Address => |value| value,
- FormValue.Const => |value| b: {
- const offset = try value.asUnsignedLe();
- break :b (low_pc + offset);
- },
+ .addr => |value| value,
+ .udata => |offset| low_pc + offset,
else => return badDwarf(),
};
- try di.func_list.append(allocator, Func{
+ try di.func_list.append(allocator, .{
.name = fn_name,
.pc_range = .{
.start = low_pc,
@@ -880,7 +816,7 @@ pub const DwarfInfo = struct {
while (try iter.next()) |range| {
range_added = true;
- try di.func_list.append(allocator, Func{
+ try di.func_list.append(allocator, .{
.name = fn_name,
.pc_range = .{
.start = range.start_addr,
@@ -891,7 +827,7 @@ pub const DwarfInfo = struct {
}
if (fn_name != null and !range_added) {
- try di.func_list.append(allocator, Func{
+ try di.func_list.append(allocator, .{
.name = fn_name,
.pc_range = null,
});
@@ -899,8 +835,6 @@ pub const DwarfInfo = struct {
},
else => {},
}
-
- try seekable.seekTo(after_die_offset);
}
this_unit_offset += next_offset;
@@ -908,56 +842,57 @@ pub const DwarfInfo = struct {
}
fn scanAllCompileUnits(di: *DwarfInfo, allocator: mem.Allocator) !void {
- var stream = io.fixedBufferStream(di.section(.debug_info).?);
- const in = stream.reader();
- const seekable = stream.seekableStream();
+ var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian };
var this_unit_offset: u64 = 0;
- while (this_unit_offset < try seekable.getEndPos()) {
- try seekable.seekTo(this_unit_offset);
+ var attrs_buf = std.ArrayList(Die.Attr).init(allocator);
+ defer attrs_buf.deinit();
+
+ while (this_unit_offset < fbr.buf.len) {
+ try fbr.seekTo(this_unit_offset);
- var is_64: bool = undefined;
- const unit_length = try readUnitLength(in, di.endian, &is_64);
- if (unit_length == 0) return;
- const next_offset = unit_length + (if (is_64) @as(usize, 12) else @as(usize, 4));
+ const unit_header = try readUnitHeader(&fbr);
+ if (unit_header.unit_length == 0) return;
+ const next_offset = unit_header.header_length + unit_header.unit_length;
- const version = try in.readInt(u16, di.endian);
+ const version = try fbr.readInt(u16);
if (version < 2 or version > 5) return badDwarf();
var address_size: u8 = undefined;
var debug_abbrev_offset: u64 = undefined;
if (version >= 5) {
- const unit_type = try in.readInt(u8, di.endian);
+ const unit_type = try fbr.readInt(u8);
if (unit_type != UT.compile) return badDwarf();
- address_size = try in.readByte();
- debug_abbrev_offset = if (is_64)
- try in.readInt(u64, di.endian)
- else
- try in.readInt(u32, di.endian);
+ address_size = try fbr.readByte();
+ debug_abbrev_offset = try fbr.readAddress(unit_header.format);
} else {
- debug_abbrev_offset = if (is_64)
- try in.readInt(u64, di.endian)
- else
- try in.readInt(u32, di.endian);
- address_size = try in.readByte();
+ debug_abbrev_offset = try fbr.readAddress(unit_header.format);
+ address_size = try fbr.readByte();
}
if (address_size != @sizeOf(usize)) return badDwarf();
- const compile_unit_pos = try seekable.getPos();
const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset);
- try seekable.seekTo(compile_unit_pos);
+ var max_attrs: usize = 0;
+ for (abbrev_table.abbrevs) |abbrev| {
+ max_attrs = @max(max_attrs, abbrev.attrs.len);
+ }
+ try attrs_buf.resize(max_attrs);
- const compile_unit_die = try allocator.create(Die);
- errdefer allocator.destroy(compile_unit_die);
- compile_unit_die.* = (try di.parseDie(allocator, in, abbrev_table, is_64)) orelse
- return badDwarf();
+ var compile_unit_die = (try parseDie(
+ &fbr,
+ attrs_buf.items,
+ abbrev_table,
+ unit_header.format,
+ )) orelse return badDwarf();
if (compile_unit_die.tag_id != TAG.compile_unit) return badDwarf();
+ compile_unit_die.attrs = try allocator.dupe(Die.Attr, compile_unit_die.attrs);
+
var compile_unit: CompileUnit = .{
.version = version,
- .is_64 = is_64,
+ .format = unit_header.format,
.pc_range = null,
.die = compile_unit_die,
.str_offsets_base = if (compile_unit_die.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0,
@@ -971,11 +906,8 @@ pub const DwarfInfo = struct {
if (compile_unit_die.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| {
if (compile_unit_die.getAttr(AT.high_pc)) |high_pc_value| {
const pc_end = switch (high_pc_value.*) {
- FormValue.Address => |value| value,
- FormValue.Const => |value| b: {
- const offset = try value.asUnsignedLe();
- break :b (low_pc + offset);
- },
+ .addr => |value| value,
+ .udata => |offset| low_pc + offset,
else => return badDwarf(),
};
break :x PcRange{
@@ -1002,40 +934,39 @@ pub const DwarfInfo = struct {
section_type: DwarfSection,
di: *const DwarfInfo,
compile_unit: *const CompileUnit,
- stream: io.FixedBufferStream([]const u8),
+ fbr: FixedBufferReader,
pub fn init(ranges_value: *const FormValue, di: *const DwarfInfo, compile_unit: *const CompileUnit) !@This() {
const section_type = if (compile_unit.version >= 5) DwarfSection.debug_rnglists else DwarfSection.debug_ranges;
const debug_ranges = di.section(section_type) orelse return error.MissingDebugInfo;
const ranges_offset = switch (ranges_value.*) {
- .SecOffset => |off| off,
- .Const => |c| try c.asUnsignedLe(),
- .RangeListOffset => |idx| off: {
- if (compile_unit.is_64) {
- const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx));
- if (offset_loc + 8 > debug_ranges.len) return badDwarf();
- const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], di.endian);
- break :off compile_unit.rnglists_base + offset;
- } else {
- const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx));
- if (offset_loc + 4 > debug_ranges.len) return badDwarf();
- const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], di.endian);
- break :off compile_unit.rnglists_base + offset;
+ .sec_offset, .udata => |off| off,
+ .rnglistx => |idx| off: {
+ switch (compile_unit.format) {
+ .@"32" => {
+ const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx));
+ if (offset_loc + 4 > debug_ranges.len) return badDwarf();
+ const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], di.endian);
+ break :off compile_unit.rnglists_base + offset;
+ },
+ .@"64" => {
+ const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx));
+ if (offset_loc + 8 > debug_ranges.len) return badDwarf();
+ const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], di.endian);
+ break :off compile_unit.rnglists_base + offset;
+ },
}
},
else => return badDwarf(),
};
- var stream = io.fixedBufferStream(debug_ranges);
- try stream.seekTo(ranges_offset);
-
// All the addresses in the list are relative to the value
// specified by DW_AT.low_pc or to some other value encoded
// in the list itself.
// If no starting value is specified use zero.
const base_address = compile_unit.die.getAttrAddr(di, AT.low_pc, compile_unit.*) catch |err| switch (err) {
- error.MissingDebugInfo => @as(u64, 0), // TODO https://github.com/ziglang/zig/issues/11135
+ error.MissingDebugInfo => 0,
else => return err,
};
@@ -1044,28 +975,31 @@ pub const DwarfInfo = struct {
.section_type = section_type,
.di = di,
.compile_unit = compile_unit,
- .stream = stream,
+ .fbr = .{
+ .buf = debug_ranges,
+ .pos = math.cast(usize, ranges_offset) orelse return badDwarf(),
+ .endian = di.endian,
+ },
};
}
// Returns the next range in the list, or null if the end was reached.
pub fn next(self: *@This()) !?struct { start_addr: u64, end_addr: u64 } {
- const in = self.stream.reader();
switch (self.section_type) {
.debug_rnglists => {
- const kind = try in.readByte();
+ const kind = try self.fbr.readByte();
switch (kind) {
RLE.end_of_list => return null,
RLE.base_addressx => {
- const index = try leb.readULEB128(usize, in);
+ const index = try self.fbr.readUleb128(usize);
self.base_address = try self.di.readDebugAddr(self.compile_unit.*, index);
return try self.next();
},
RLE.startx_endx => {
- const start_index = try leb.readULEB128(usize, in);
+ const start_index = try self.fbr.readUleb128(usize);
const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index);
- const end_index = try leb.readULEB128(usize, in);
+ const end_index = try self.fbr.readUleb128(usize);
const end_addr = try self.di.readDebugAddr(self.compile_unit.*, end_index);
return .{
@@ -1074,10 +1008,10 @@ pub const DwarfInfo = struct {
};
},
RLE.startx_length => {
- const start_index = try leb.readULEB128(usize, in);
+ const start_index = try self.fbr.readUleb128(usize);
const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index);
- const len = try leb.readULEB128(usize, in);
+ const len = try self.fbr.readUleb128(usize);
const end_addr = start_addr + len;
return .{
@@ -1086,8 +1020,8 @@ pub const DwarfInfo = struct {
};
},
RLE.offset_pair => {
- const start_addr = try leb.readULEB128(usize, in);
- const end_addr = try leb.readULEB128(usize, in);
+ const start_addr = try self.fbr.readUleb128(usize);
+ const end_addr = try self.fbr.readUleb128(usize);
// This is the only kind that uses the base address
return .{
@@ -1096,12 +1030,12 @@ pub const DwarfInfo = struct {
};
},
RLE.base_address => {
- self.base_address = try in.readInt(usize, self.di.endian);
+ self.base_address = try self.fbr.readInt(usize);
return try self.next();
},
RLE.start_end => {
- const start_addr = try in.readInt(usize, self.di.endian);
- const end_addr = try in.readInt(usize, self.di.endian);
+ const start_addr = try self.fbr.readInt(usize);
+ const end_addr = try self.fbr.readInt(usize);
return .{
.start_addr = start_addr,
@@ -1109,8 +1043,8 @@ pub const DwarfInfo = struct {
};
},
RLE.start_length => {
- const start_addr = try in.readInt(usize, self.di.endian);
- const len = try leb.readULEB128(usize, in);
+ const start_addr = try self.fbr.readInt(usize);
+ const len = try self.fbr.readUleb128(usize);
const end_addr = start_addr + len;
return .{
@@ -1122,8 +1056,8 @@ pub const DwarfInfo = struct {
}
},
.debug_ranges => {
- const start_addr = try in.readInt(usize, self.di.endian);
- const end_addr = try in.readInt(usize, self.di.endian);
+ const start_addr = try self.fbr.readInt(usize);
+ const end_addr = try self.fbr.readInt(usize);
if (start_addr == 0 and end_addr == 0) return null;
// This entry selects a new value for the base address
@@ -1160,93 +1094,96 @@ pub const DwarfInfo = struct {
/// Gets an already existing AbbrevTable given the abbrev_offset, or if not found,
/// seeks in the stream and parses it.
- fn getAbbrevTable(di: *DwarfInfo, allocator: mem.Allocator, abbrev_offset: u64) !*const AbbrevTable {
- for (di.abbrev_table_list.items) |*header| {
- if (header.offset == abbrev_offset) {
- return &header.table;
+ fn getAbbrevTable(di: *DwarfInfo, allocator: mem.Allocator, abbrev_offset: u64) !*const Abbrev.Table {
+ for (di.abbrev_table_list.items) |*table| {
+ if (table.offset == abbrev_offset) {
+ return table;
}
}
- try di.abbrev_table_list.append(allocator, AbbrevTableHeader{
- .offset = abbrev_offset,
- .table = try di.parseAbbrevTable(allocator, abbrev_offset),
- });
- return &di.abbrev_table_list.items[di.abbrev_table_list.items.len - 1].table;
+ try di.abbrev_table_list.append(
+ allocator,
+ try di.parseAbbrevTable(allocator, abbrev_offset),
+ );
+ return &di.abbrev_table_list.items[di.abbrev_table_list.items.len - 1];
}
- fn parseAbbrevTable(di: *DwarfInfo, allocator: mem.Allocator, offset: u64) !AbbrevTable {
- var stream = io.fixedBufferStream(di.section(.debug_abbrev).?);
- const in = stream.reader();
- const seekable = stream.seekableStream();
+ fn parseAbbrevTable(di: *DwarfInfo, allocator: mem.Allocator, offset: u64) !Abbrev.Table {
+ var fbr: FixedBufferReader = .{
+ .buf = di.section(.debug_abbrev).?,
+ .pos = math.cast(usize, offset) orelse return badDwarf(),
+ .endian = di.endian,
+ };
- try seekable.seekTo(offset);
- var result = AbbrevTable.init(allocator);
- errdefer {
- for (result.items) |*entry| {
- entry.attrs.deinit();
+ var abbrevs = std.ArrayList(Abbrev).init(allocator);
+ defer {
+ for (abbrevs.items) |*abbrev| {
+ abbrev.deinit(allocator);
}
- result.deinit();
+ abbrevs.deinit();
}
+ var attrs = std.ArrayList(Abbrev.Attr).init(allocator);
+ defer attrs.deinit();
+
while (true) {
- const abbrev_code = try leb.readULEB128(u64, in);
- if (abbrev_code == 0) return result;
- try result.append(AbbrevTableEntry{
- .abbrev_code = abbrev_code,
- .tag_id = try leb.readULEB128(u64, in),
- .has_children = (try in.readByte()) == CHILDREN.yes,
- .attrs = std.ArrayList(AbbrevAttr).init(allocator),
- });
- const attrs = &result.items[result.items.len - 1].attrs;
+ const code = try fbr.readUleb128(u64);
+ if (code == 0) break;
+ const tag_id = try fbr.readUleb128(u64);
+ const has_children = (try fbr.readByte()) == CHILDREN.yes;
while (true) {
- const attr_id = try leb.readULEB128(u64, in);
- const form_id = try leb.readULEB128(u64, in);
+ const attr_id = try fbr.readUleb128(u64);
+ const form_id = try fbr.readUleb128(u64);
if (attr_id == 0 and form_id == 0) break;
- // DW_FORM_implicit_const stores its value immediately after the attribute pair :(
- const payload = if (form_id == FORM.implicit_const) try leb.readILEB128(i64, in) else undefined;
- try attrs.append(AbbrevAttr{
- .attr_id = attr_id,
+ try attrs.append(.{
+ .id = attr_id,
.form_id = form_id,
- .payload = payload,
+ .payload = switch (form_id) {
+ FORM.implicit_const => try fbr.readIleb128(i64),
+ else => undefined,
+ },
});
}
+
+ try abbrevs.append(.{
+ .code = code,
+ .tag_id = tag_id,
+ .has_children = has_children,
+ .attrs = try attrs.toOwnedSlice(),
+ });
}
+
+ return .{
+ .offset = offset,
+ .abbrevs = try abbrevs.toOwnedSlice(),
+ };
}
fn parseDie(
- di: *DwarfInfo,
- allocator: mem.Allocator,
- in_stream: anytype,
- abbrev_table: *const AbbrevTable,
- is_64: bool,
+ fbr: *FixedBufferReader,
+ attrs_buf: []Die.Attr,
+ abbrev_table: *const Abbrev.Table,
+ format: Format,
) !?Die {
- const abbrev_code = try leb.readULEB128(u64, in_stream);
+ const abbrev_code = try fbr.readUleb128(u64);
if (abbrev_code == 0) return null;
- const table_entry = getAbbrevTableEntry(abbrev_table, abbrev_code) orelse return badDwarf();
-
- var result = Die{
- // Lives as long as the Die.
- .arena = std.heap.ArenaAllocator.init(allocator),
+ const table_entry = abbrev_table.get(abbrev_code) orelse return badDwarf();
+
+ const attrs = attrs_buf[0..table_entry.attrs.len];
+ for (attrs, table_entry.attrs) |*result_attr, attr| result_attr.* = Die.Attr{
+ .id = attr.id,
+ .value = try parseFormValue(
+ fbr,
+ attr.form_id,
+ format,
+ attr.payload,
+ ),
+ };
+ return .{
.tag_id = table_entry.tag_id,
.has_children = table_entry.has_children,
+ .attrs = attrs,
};
- try result.attrs.resize(allocator, table_entry.attrs.items.len);
- for (table_entry.attrs.items, 0..) |attr, i| {
- result.attrs.items[i] = Die.Attr{
- .id = attr.attr_id,
- .value = try parseFormValue(
- result.arena.allocator(),
- in_stream,
- attr.form_id,
- di.endian,
- is_64,
- ),
- };
- if (attr.form_id == FORM.implicit_const) {
- result.attrs.items[i].value.Const.payload = @as(u64, @bitCast(attr.payload));
- }
- }
- return result;
}
pub fn getLineNumberInfo(
@@ -1255,50 +1192,47 @@ pub const DwarfInfo = struct {
compile_unit: CompileUnit,
target_address: u64,
) !debug.LineInfo {
- var stream = io.fixedBufferStream(di.section(.debug_line).?);
- const in = stream.reader();
- const seekable = stream.seekableStream();
-
const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit);
const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list);
- try seekable.seekTo(line_info_offset);
+ var fbr: FixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian };
+ try fbr.seekTo(line_info_offset);
- var is_64: bool = undefined;
- const unit_length = try readUnitLength(in, di.endian, &is_64);
- if (unit_length == 0) {
- return missingDwarf();
- }
- const next_offset = unit_length + (if (is_64) @as(usize, 12) else @as(usize, 4));
+ const unit_header = try readUnitHeader(&fbr);
+ if (unit_header.unit_length == 0) return missingDwarf();
+ const next_offset = unit_header.header_length + unit_header.unit_length;
- const version = try in.readInt(u16, di.endian);
+ const version = try fbr.readInt(u16);
if (version < 2) return badDwarf();
- var addr_size: u8 = if (is_64) 8 else 4;
+ var addr_size: u8 = switch (unit_header.format) {
+ .@"32" => 4,
+ .@"64" => 8,
+ };
var seg_size: u8 = 0;
if (version >= 5) {
- addr_size = try in.readByte();
- seg_size = try in.readByte();
+ addr_size = try fbr.readByte();
+ seg_size = try fbr.readByte();
}
- const prologue_length = if (is_64) try in.readInt(u64, di.endian) else try in.readInt(u32, di.endian);
- const prog_start_offset = (try seekable.getPos()) + prologue_length;
+ const prologue_length = try fbr.readAddress(unit_header.format);
+ const prog_start_offset = fbr.pos + prologue_length;
- const minimum_instruction_length = try in.readByte();
+ const minimum_instruction_length = try fbr.readByte();
if (minimum_instruction_length == 0) return badDwarf();
if (version >= 4) {
// maximum_operations_per_instruction
- _ = try in.readByte();
+ _ = try fbr.readByte();
}
- const default_is_stmt = (try in.readByte()) != 0;
- const line_base = try in.readByteSigned();
+ const default_is_stmt = (try fbr.readByte()) != 0;
+ const line_base = try fbr.readByteSigned();
- const line_range = try in.readByte();
+ const line_range = try fbr.readByte();
if (line_range == 0) return badDwarf();
- const opcode_base = try in.readByte();
+ const opcode_base = try fbr.readByte();
const standard_opcode_lengths = try allocator.alloc(u8, opcode_base - 1);
defer allocator.free(standard_opcode_lengths);
@@ -1306,33 +1240,31 @@ pub const DwarfInfo = struct {
{
var i: usize = 0;
while (i < opcode_base - 1) : (i += 1) {
- standard_opcode_lengths[i] = try in.readByte();
+ standard_opcode_lengths[i] = try fbr.readByte();
}
}
- var tmp_arena = std.heap.ArenaAllocator.init(allocator);
- defer tmp_arena.deinit();
- const arena = tmp_arena.allocator();
-
- var include_directories = std.ArrayList(FileEntry).init(arena);
- var file_entries = std.ArrayList(FileEntry).init(arena);
+ var include_directories = std.ArrayList(FileEntry).init(allocator);
+ defer include_directories.deinit();
+ var file_entries = std.ArrayList(FileEntry).init(allocator);
+ defer file_entries.deinit();
if (version < 5) {
try include_directories.append(.{ .path = compile_unit_cwd });
while (true) {
- const dir = try in.readUntilDelimiterAlloc(arena, 0, math.maxInt(usize));
+ const dir = try fbr.readBytesTo(0);
if (dir.len == 0) break;
try include_directories.append(.{ .path = dir });
}
while (true) {
- const file_name = try in.readUntilDelimiterAlloc(arena, 0, math.maxInt(usize));
+ const file_name = try fbr.readBytesTo(0);
if (file_name.len == 0) break;
- const dir_index = try leb.readULEB128(u32, in);
- const mtime = try leb.readULEB128(u64, in);
- const size = try leb.readULEB128(u64, in);
- try file_entries.append(FileEntry{
+ const dir_index = try fbr.readUleb128(u32);
+ const mtime = try fbr.readUleb128(u64);
+ const size = try fbr.readUleb128(u64);
+ try file_entries.append(.{
.path = file_name,
.dir_index = dir_index,
.mtime = mtime,
@@ -1346,16 +1278,16 @@ pub const DwarfInfo = struct {
};
{
var dir_ent_fmt_buf: [10]FileEntFmt = undefined;
- const directory_entry_format_count = try in.readByte();
+ const directory_entry_format_count = try fbr.readByte();
if (directory_entry_format_count > dir_ent_fmt_buf.len) return badDwarf();
for (dir_ent_fmt_buf[0..directory_entry_format_count]) |*ent_fmt| {
ent_fmt.* = .{
- .content_type_code = try leb.readULEB128(u8, in),
- .form_code = try leb.readULEB128(u16, in),
+ .content_type_code = try fbr.readUleb128(u8),
+ .form_code = try fbr.readUleb128(u16),
};
}
- const directories_count = try leb.readULEB128(usize, in);
+ const directories_count = try fbr.readUleb128(usize);
try include_directories.ensureUnusedCapacity(directories_count);
{
var i: usize = 0;
@@ -1363,18 +1295,20 @@ pub const DwarfInfo = struct {
var e: FileEntry = .{ .path = &.{} };
for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| {
const form_value = try parseFormValue(
- arena,
- in,
+ &fbr,
ent_fmt.form_code,
- di.endian,
- is_64,
+ unit_header.format,
+ null,
);
switch (ent_fmt.content_type_code) {
LNCT.path => e.path = try form_value.getString(di.*),
LNCT.directory_index => e.dir_index = try form_value.getUInt(u32),
LNCT.timestamp => e.mtime = try form_value.getUInt(u64),
LNCT.size => e.size = try form_value.getUInt(u64),
- LNCT.MD5 => e.md5 = try form_value.getData16(),
+ LNCT.MD5 => e.md5 = switch (form_value) {
+ .data16 => |data16| data16.*,
+ else => return badDwarf(),
+ },
else => continue,
}
}
@@ -1384,16 +1318,16 @@ pub const DwarfInfo = struct {
}
var file_ent_fmt_buf: [10]FileEntFmt = undefined;
- const file_name_entry_format_count = try in.readByte();
+ const file_name_entry_format_count = try fbr.readByte();
if (file_name_entry_format_count > file_ent_fmt_buf.len) return badDwarf();
for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| {
ent_fmt.* = .{
- .content_type_code = try leb.readULEB128(u8, in),
- .form_code = try leb.readULEB128(u16, in),
+ .content_type_code = try fbr.readUleb128(u8),
+ .form_code = try fbr.readUleb128(u16),
};
}
- const file_names_count = try leb.readULEB128(usize, in);
+ const file_names_count = try fbr.readUleb128(usize);
try file_entries.ensureUnusedCapacity(file_names_count);
{
var i: usize = 0;
@@ -1401,18 +1335,20 @@ pub const DwarfInfo = struct {
var e: FileEntry = .{ .path = &.{} };
for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| {
const form_value = try parseFormValue(
- arena,
- in,
+ &fbr,
ent_fmt.form_code,
- di.endian,
- is_64,
+ unit_header.format,
+ null,
);
switch (ent_fmt.content_type_code) {
LNCT.path => e.path = try form_value.getString(di.*),
LNCT.directory_index => e.dir_index = try form_value.getUInt(u32),
LNCT.timestamp => e.mtime = try form_value.getUInt(u64),
LNCT.size => e.size = try form_value.getUInt(u64),
- LNCT.MD5 => e.md5 = try form_value.getData16(),
+ LNCT.MD5 => e.md5 = switch (form_value) {
+ .data16 => |data16| data16.*,
+ else => return badDwarf(),
+ },
else => continue,
}
}
@@ -1428,17 +1364,17 @@ pub const DwarfInfo = struct {
version,
);
- try seekable.seekTo(prog_start_offset);
+ try fbr.seekTo(prog_start_offset);
const next_unit_pos = line_info_offset + next_offset;
- while ((try seekable.getPos()) < next_unit_pos) {
- const opcode = try in.readByte();
+ while (fbr.pos < next_unit_pos) {
+ const opcode = try fbr.readByte();
if (opcode == LNS.extended_op) {
- const op_size = try leb.readULEB128(u64, in);
+ const op_size = try fbr.readUleb128(u64);
if (op_size < 1) return badDwarf();
- const sub_op = try in.readByte();
+ const sub_op = try fbr.readByte();
switch (sub_op) {
LNE.end_sequence => {
prog.end_sequence = true;
@@ -1446,25 +1382,22 @@ pub const DwarfInfo = struct {
prog.reset();
},
LNE.set_address => {
- const addr = try in.readInt(usize, di.endian);
+ const addr = try fbr.readInt(usize);
prog.address = addr;
},
LNE.define_file => {
- const path = try in.readUntilDelimiterAlloc(arena, 0, math.maxInt(usize));
- const dir_index = try leb.readULEB128(u32, in);
- const mtime = try leb.readULEB128(u64, in);
- const size = try leb.readULEB128(u64, in);
- try file_entries.append(FileEntry{
+ const path = try fbr.readBytesTo(0);
+ const dir_index = try fbr.readUleb128(u32);
+ const mtime = try fbr.readUleb128(u64);
+ const size = try fbr.readUleb128(u64);
+ try file_entries.append(.{
.path = path,
.dir_index = dir_index,
.mtime = mtime,
.size = size,
});
},
- else => {
- const fwd_amt = math.cast(isize, op_size - 1) orelse return badDwarf();
- try seekable.seekBy(fwd_amt);
- },
+ else => try fbr.seekForward(op_size - 1),
}
} else if (opcode >= opcode_base) {
// special opcodes
@@ -1482,19 +1415,19 @@ pub const DwarfInfo = struct {
prog.basic_block = false;
},
LNS.advance_pc => {
- const arg = try leb.readULEB128(usize, in);
+ const arg = try fbr.readUleb128(usize);
prog.address += arg * minimum_instruction_length;
},
LNS.advance_line => {
- const arg = try leb.readILEB128(i64, in);
+ const arg = try fbr.readIleb128(i64);
prog.line += arg;
},
LNS.set_file => {
- const arg = try leb.readULEB128(usize, in);
+ const arg = try fbr.readUleb128(usize);
prog.file = arg;
},
LNS.set_column => {
- const arg = try leb.readULEB128(u64, in);
+ const arg = try fbr.readUleb128(u64);
prog.column = arg;
},
LNS.negate_stmt => {
@@ -1508,14 +1441,13 @@ pub const DwarfInfo = struct {
prog.address += inc_addr;
},
LNS.fixed_advance_pc => {
- const arg = try in.readInt(u16, di.endian);
+ const arg = try fbr.readInt(u16);
prog.address += arg;
},
LNS.set_prologue_end => {},
else => {
if (opcode - 1 >= standard_opcode_lengths.len) return badDwarf();
- const len_bytes = standard_opcode_lengths[opcode - 1];
- try seekable.seekBy(len_bytes);
+ try fbr.seekForward(standard_opcode_lengths[opcode - 1]);
},
}
}
@@ -1524,11 +1456,11 @@ pub const DwarfInfo = struct {
return missingDwarf();
}
- fn getString(di: DwarfInfo, offset: u64) ![]const u8 {
+ fn getString(di: DwarfInfo, offset: u64) ![:0]const u8 {
return getStringGeneric(di.section(.debug_str), offset);
}
- fn getLineString(di: DwarfInfo, offset: u64) ![]const u8 {
+ fn getLineString(di: DwarfInfo, offset: u64) ![:0]const u8 {
return getStringGeneric(di.section(.debug_line_str), offset);
}
@@ -1564,38 +1496,37 @@ pub const DwarfInfo = struct {
/// of FDEs is built for binary searching during unwinding.
pub fn scanAllUnwindInfo(di: *DwarfInfo, allocator: mem.Allocator, base_address: usize) !void {
if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: {
- var stream = io.fixedBufferStream(eh_frame_hdr);
- const reader = stream.reader();
+ var fbr: FixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian };
- const version = try reader.readByte();
+ const version = try fbr.readByte();
if (version != 1) break :blk;
- const eh_frame_ptr_enc = try reader.readByte();
+ const eh_frame_ptr_enc = try fbr.readByte();
if (eh_frame_ptr_enc == EH.PE.omit) break :blk;
- const fde_count_enc = try reader.readByte();
+ const fde_count_enc = try fbr.readByte();
if (fde_count_enc == EH.PE.omit) break :blk;
- const table_enc = try reader.readByte();
+ const table_enc = try fbr.readByte();
if (table_enc == EH.PE.omit) break :blk;
- const eh_frame_ptr = std.math.cast(usize, try readEhPointer(reader, eh_frame_ptr_enc, @sizeOf(usize), .{
- .pc_rel_base = @intFromPtr(&eh_frame_hdr[stream.pos]),
+ const eh_frame_ptr = math.cast(usize, try readEhPointer(&fbr, eh_frame_ptr_enc, @sizeOf(usize), .{
+ .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.pos]),
.follow_indirect = true,
- }, builtin.cpu.arch.endian()) orelse return badDwarf()) orelse return badDwarf();
+ }) orelse return badDwarf()) orelse return badDwarf();
- const fde_count = std.math.cast(usize, try readEhPointer(reader, fde_count_enc, @sizeOf(usize), .{
- .pc_rel_base = @intFromPtr(&eh_frame_hdr[stream.pos]),
+ const fde_count = math.cast(usize, try readEhPointer(&fbr, fde_count_enc, @sizeOf(usize), .{
+ .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.pos]),
.follow_indirect = true,
- }, builtin.cpu.arch.endian()) orelse return badDwarf()) orelse return badDwarf();
+ }) orelse return badDwarf()) orelse return badDwarf();
const entry_size = try ExceptionFrameHeader.entrySize(table_enc);
const entries_len = fde_count * entry_size;
- if (entries_len > eh_frame_hdr.len - stream.pos) return badDwarf();
+ if (entries_len > eh_frame_hdr.len - fbr.pos) return badDwarf();
di.eh_frame_hdr = .{
.eh_frame_ptr = eh_frame_ptr,
.table_enc = table_enc,
.fde_count = fde_count,
- .entries = eh_frame_hdr[stream.pos..][0..entries_len],
+ .entries = eh_frame_hdr[fbr.pos..][0..entries_len],
};
// No need to scan .eh_frame, we have a binary search table already
@@ -1605,16 +1536,16 @@ pub const DwarfInfo = struct {
const frame_sections = [2]DwarfSection{ .eh_frame, .debug_frame };
for (frame_sections) |frame_section| {
if (di.section(frame_section)) |section_data| {
- var stream = io.fixedBufferStream(section_data);
- while (stream.pos < stream.buffer.len) {
- const entry_header = try EntryHeader.read(&stream, frame_section, di.endian);
+ var fbr: FixedBufferReader = .{ .buf = section_data, .endian = di.endian };
+ while (fbr.pos < fbr.buf.len) {
+ const entry_header = try EntryHeader.read(&fbr, frame_section);
switch (entry_header.type) {
.cie => {
const cie = try CommonInformationEntry.parse(
entry_header.entry_bytes,
di.sectionVirtualOffset(frame_section, base_address).?,
true,
- entry_header.is_64,
+ entry_header.format,
frame_section,
entry_header.length_offset,
@sizeOf(usize),
@@ -1638,7 +1569,7 @@ pub const DwarfInfo = struct {
}
}
- std.mem.sortUnstable(FrameDescriptionEntry, di.fde_list.items, {}, struct {
+ mem.sortUnstable(FrameDescriptionEntry, di.fde_list.items, {}, struct {
fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool {
_ = ctx;
return a.pc_begin < b.pc_begin;
@@ -1668,27 +1599,31 @@ pub const DwarfInfo = struct {
const frame_section = di.section(dwarf_section) orelse return error.MissingFDE;
if (fde_offset >= frame_section.len) return error.MissingFDE;
- var stream = io.fixedBufferStream(frame_section);
- try stream.seekTo(fde_offset);
+ var fbr: FixedBufferReader = .{
+ .buf = frame_section,
+ .pos = fde_offset,
+ .endian = di.endian,
+ };
- const fde_entry_header = try EntryHeader.read(&stream, dwarf_section, di.endian);
+ const fde_entry_header = try EntryHeader.read(&fbr, dwarf_section);
if (fde_entry_header.type != .fde) return error.MissingFDE;
const cie_offset = fde_entry_header.type.fde;
- try stream.seekTo(cie_offset);
+ try fbr.seekTo(cie_offset);
- const cie_entry_header = try EntryHeader.read(&stream, dwarf_section, builtin.cpu.arch.endian());
+ fbr.endian = native_endian;
+ const cie_entry_header = try EntryHeader.read(&fbr, dwarf_section);
if (cie_entry_header.type != .cie) return badDwarf();
cie = try CommonInformationEntry.parse(
cie_entry_header.entry_bytes,
0,
true,
- cie_entry_header.is_64,
+ cie_entry_header.format,
dwarf_section,
cie_entry_header.length_offset,
@sizeOf(usize),
- builtin.cpu.arch.endian(),
+ native_endian,
);
fde = try FrameDescriptionEntry.parse(
@@ -1697,7 +1632,7 @@ pub const DwarfInfo = struct {
true,
cie,
@sizeOf(usize),
- builtin.cpu.arch.endian(),
+ native_endian,
);
} else if (di.eh_frame_hdr) |header| {
const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null;
@@ -1711,7 +1646,7 @@ pub const DwarfInfo = struct {
);
} else {
const index = std.sort.binarySearch(FrameDescriptionEntry, context.pc, di.fde_list.items, {}, struct {
- pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order {
+ pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) math.Order {
if (pc < mid_item.pc_begin) return .lt;
const range_end = mid_item.pc_begin + mid_item.pc_range;
@@ -1725,8 +1660,8 @@ pub const DwarfInfo = struct {
cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE;
}
- var expression_context = .{
- .is_64 = cie.is_64,
+ var expression_context: expressions.ExpressionContext = .{
+ .format = cie.format,
.isValidMemory = context.isValidMemory,
.compile_unit = di.findCompileUnit(fde.pc_begin) catch null,
.thread_context = context.thread_context,
@@ -1973,10 +1908,10 @@ pub fn unwindFrameMachO(context: *UnwindContext, unwind_info: []const u8, eh_fra
.raw_encoding = common_encodings[entry.encodingIndex],
};
} else {
- const local_index = try std.math.sub(
+ const local_index = try math.sub(
u8,
entry.encodingIndex,
- std.math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo,
+ math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo,
);
const local_encodings = mem.bytesAsSlice(
macho.compact_unwind_encoding_t,
@@ -2187,7 +2122,7 @@ pub fn unwindFrameMachO(context: *UnwindContext, unwind_info: []const u8, eh_fra
fn unwindFrameMachODwarf(context: *UnwindContext, eh_frame: []const u8, fde_offset: usize) !usize {
var di = DwarfInfo{
- .endian = builtin.cpu.arch.endian(),
+ .endian = native_endian,
.is_macho = true,
};
defer di.deinit(context.allocator);
@@ -2207,8 +2142,8 @@ pub const UnwindContext = struct {
thread_context: *debug.ThreadContext,
reg_context: abi.RegisterContext,
isValidMemory: *const fn (address: usize) bool,
- vm: call_frame.VirtualMachine = .{},
- stack_machine: expressions.StackMachine(.{ .call_frame_context = true }) = .{},
+ vm: call_frame.VirtualMachine,
+ stack_machine: expressions.StackMachine(.{ .call_frame_context = true }),
pub fn init(allocator: mem.Allocator, thread_context: *const debug.ThreadContext, isValidMemory: *const fn (address: usize) bool) !UnwindContext {
const pc = abi.stripInstructionPtrAuthCode((try abi.regValueNative(usize, thread_context, abi.ipRegNum(), null)).*);
@@ -2223,6 +2158,8 @@ pub const UnwindContext = struct {
.thread_context = context_copy,
.reg_context = undefined,
.isValidMemory = isValidMemory,
+ .vm = .{},
+ .stack_machine = .{},
};
}
@@ -2230,6 +2167,7 @@ pub const UnwindContext = struct {
self.vm.deinit(self.allocator);
self.stack_machine.deinit(self.allocator);
self.allocator.destroy(self.thread_context);
+ self.* = undefined;
}
pub fn getFp(self: *const UnwindContext) !usize {
@@ -2281,8 +2219,7 @@ const EhPointerContext = struct {
text_rel_base: ?u64 = null,
function_rel_base: ?u64 = null,
};
-
-fn readEhPointer(reader: anytype, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext, endian: std.builtin.Endian) !?u64 {
+fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 {
if (enc == EH.PE.omit) return null;
const value: union(enum) {
@@ -2291,20 +2228,20 @@ fn readEhPointer(reader: anytype, enc: u8, addr_size_bytes: u8, ctx: EhPointerCo
} = switch (enc & EH.PE.type_mask) {
EH.PE.absptr => .{
.unsigned = switch (addr_size_bytes) {
- 2 => try reader.readInt(u16, endian),
- 4 => try reader.readInt(u32, endian),
- 8 => try reader.readInt(u64, endian),
+ 2 => try fbr.readInt(u16),
+ 4 => try fbr.readInt(u32),
+ 8 => try fbr.readInt(u64),
else => return error.InvalidAddrSize,
},
},
- EH.PE.uleb128 => .{ .unsigned = try leb.readULEB128(u64, reader) },
- EH.PE.udata2 => .{ .unsigned = try reader.readInt(u16, endian) },
- EH.PE.udata4 => .{ .unsigned = try reader.readInt(u32, endian) },
- EH.PE.udata8 => .{ .unsigned = try reader.readInt(u64, endian) },
- EH.PE.sleb128 => .{ .signed = try leb.readILEB128(i64, reader) },
- EH.PE.sdata2 => .{ .signed = try reader.readInt(i16, endian) },
- EH.PE.sdata4 => .{ .signed = try reader.readInt(i32, endian) },
- EH.PE.sdata8 => .{ .signed = try reader.readInt(i64, endian) },
+ EH.PE.uleb128 => .{ .unsigned = try fbr.readUleb128(u64) },
+ EH.PE.udata2 => .{ .unsigned = try fbr.readInt(u16) },
+ EH.PE.udata4 => .{ .unsigned = try fbr.readInt(u32) },
+ EH.PE.udata8 => .{ .unsigned = try fbr.readInt(u64) },
+ EH.PE.sleb128 => .{ .signed = try fbr.readIleb128(i64) },
+ EH.PE.sdata2 => .{ .signed = try fbr.readInt(i16) },
+ EH.PE.sdata4 => .{ .signed = try fbr.readInt(i32) },
+ EH.PE.sdata8 => .{ .signed = try fbr.readInt(i64) },
else => return badDwarf(),
};
@@ -2396,18 +2333,17 @@ pub const ExceptionFrameHeader = struct {
var left: usize = 0;
var len: usize = self.fde_count;
- var stream = io.fixedBufferStream(self.entries);
- const reader = stream.reader();
+ var fbr: FixedBufferReader = .{ .buf = self.entries, .endian = native_endian };
while (len > 1) {
const mid = left + len / 2;
- try stream.seekTo(mid * entry_size);
- const pc_begin = try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{
- .pc_rel_base = @intFromPtr(&self.entries[stream.pos]),
+ fbr.pos = mid * entry_size;
+ const pc_begin = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{
+ .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]),
.follow_indirect = true,
.data_rel_base = eh_frame_hdr_ptr,
- }, builtin.cpu.arch.endian()) orelse return badDwarf();
+ }) orelse return badDwarf();
if (pc < pc_begin) {
len /= 2;
@@ -2419,20 +2355,20 @@ pub const ExceptionFrameHeader = struct {
}
if (len == 0) return badDwarf();
- try stream.seekTo(left * entry_size);
+ fbr.pos = left * entry_size;
// Read past the pc_begin field of the entry
- _ = try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{
- .pc_rel_base = @intFromPtr(&self.entries[stream.pos]),
+ _ = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{
+ .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]),
.follow_indirect = true,
.data_rel_base = eh_frame_hdr_ptr,
- }, builtin.cpu.arch.endian()) orelse return badDwarf();
+ }) orelse return badDwarf();
- const fde_ptr = math.cast(usize, try readEhPointer(reader, self.table_enc, @sizeOf(usize), .{
- .pc_rel_base = @intFromPtr(&self.entries[stream.pos]),
+ const fde_ptr = math.cast(usize, try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{
+ .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]),
.follow_indirect = true,
.data_rel_base = eh_frame_hdr_ptr,
- }, builtin.cpu.arch.endian()) orelse return badDwarf()) orelse return badDwarf();
+ }) orelse return badDwarf()) orelse return badDwarf();
// Verify the length fields of the FDE header are readable
if (!self.isValidPtr(fde_ptr, isValidMemory, eh_frame_len) or fde_ptr < self.eh_frame_ptr) return badDwarf();
@@ -2445,17 +2381,20 @@ pub const ExceptionFrameHeader = struct {
const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0 .. eh_frame_len orelse math.maxInt(u32)];
const fde_offset = fde_ptr - self.eh_frame_ptr;
- var eh_frame_stream = io.fixedBufferStream(eh_frame);
- try eh_frame_stream.seekTo(fde_offset);
+ var eh_frame_fbr: FixedBufferReader = .{
+ .buf = eh_frame,
+ .pos = fde_offset,
+ .endian = native_endian,
+ };
- const fde_entry_header = try EntryHeader.read(&eh_frame_stream, .eh_frame, builtin.cpu.arch.endian());
+ const fde_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame);
if (!self.isValidPtr(@intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]), isValidMemory, eh_frame_len)) return badDwarf();
if (fde_entry_header.type != .fde) return badDwarf();
// CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable
const cie_offset = fde_entry_header.type.fde;
- try eh_frame_stream.seekTo(cie_offset);
- const cie_entry_header = try EntryHeader.read(&eh_frame_stream, .eh_frame, builtin.cpu.arch.endian());
+ try eh_frame_fbr.seekTo(cie_offset);
+ const cie_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame);
if (!self.isValidPtr(@intFromPtr(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]), isValidMemory, eh_frame_len)) return badDwarf();
if (cie_entry_header.type != .cie) return badDwarf();
@@ -2463,11 +2402,11 @@ pub const ExceptionFrameHeader = struct {
cie_entry_header.entry_bytes,
0,
true,
- cie_entry_header.is_64,
+ cie_entry_header.format,
.eh_frame,
cie_entry_header.length_offset,
@sizeOf(usize),
- builtin.cpu.arch.endian(),
+ native_endian,
);
fde.* = try FrameDescriptionEntry.parse(
@@ -2476,7 +2415,7 @@ pub const ExceptionFrameHeader = struct {
true,
cie.*,
@sizeOf(usize),
- builtin.cpu.arch.endian(),
+ native_endian,
);
}
};
@@ -2484,62 +2423,60 @@ pub const ExceptionFrameHeader = struct {
pub const EntryHeader = struct {
/// Offset of the length field in the backing buffer
length_offset: usize,
- is_64: bool,
+ format: Format,
type: union(enum) {
cie,
/// Value is the offset of the corresponding CIE
fde: u64,
- terminator: void,
+ terminator,
},
/// The entry's contents, not including the ID field
entry_bytes: []const u8,
- /// Reads a header for either an FDE or a CIE, then advances the stream to the position after the trailing structure.
- /// `stream` must be a stream backed by either the .eh_frame or .debug_frame sections.
- pub fn read(stream: *std.io.FixedBufferStream([]const u8), dwarf_section: DwarfSection, endian: std.builtin.Endian) !EntryHeader {
- assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame);
+ /// The length of the entry including the ID field, but not the length field itself
+ pub fn entryLength(self: EntryHeader) usize {
+ return self.entry_bytes.len + @as(u8, if (self.is_64) 8 else 4);
+ }
- const reader = stream.reader();
- const length_offset = stream.pos;
+ /// Reads a header for either an FDE or a CIE, then advances the fbr to the position after the trailing structure.
+ /// `fbr` must be a FixedBufferReader backed by either the .eh_frame or .debug_frame sections.
+ pub fn read(fbr: *FixedBufferReader, dwarf_section: DwarfSection) !EntryHeader {
+ assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame);
- var is_64: bool = undefined;
- const length = math.cast(usize, try readUnitLength(reader, endian, &is_64)) orelse return badDwarf();
- if (length == 0) return .{
+ const length_offset = fbr.pos;
+ const unit_header = try readUnitHeader(fbr);
+ const unit_length = math.cast(usize, unit_header.unit_length) orelse return badDwarf();
+ if (unit_length == 0) return .{
.length_offset = length_offset,
- .is_64 = is_64,
- .type = .{ .terminator = {} },
+ .format = unit_header.format,
+ .type = .terminator,
.entry_bytes = &.{},
};
+ const start_offset = fbr.pos;
+ const end_offset = start_offset + unit_length;
+ defer fbr.pos = end_offset;
- const id_len = @as(u8, if (is_64) 8 else 4);
- const id = if (is_64) try reader.readInt(u64, endian) else try reader.readInt(u32, endian);
- const entry_bytes = stream.buffer[stream.pos..][0 .. length - id_len];
+ const id = try fbr.readAddress(unit_header.format);
+ const entry_bytes = fbr.buf[fbr.pos..end_offset];
const cie_id: u64 = switch (dwarf_section) {
.eh_frame => CommonInformationEntry.eh_id,
- .debug_frame => if (is_64) CommonInformationEntry.dwarf64_id else CommonInformationEntry.dwarf32_id,
+ .debug_frame => switch (unit_header.format) {
+ .@"32" => CommonInformationEntry.dwarf32_id,
+ .@"64" => CommonInformationEntry.dwarf64_id,
+ },
else => unreachable,
};
- const result = EntryHeader{
+ return .{
.length_offset = length_offset,
- .is_64 = is_64,
- .type = if (id == cie_id) .{ .cie = {} } else .{
- .fde = switch (dwarf_section) {
- .eh_frame => try std.math.sub(u64, stream.pos - id_len, id),
- .debug_frame => id,
- else => unreachable,
- },
- },
+ .format = unit_header.format,
+ .type = if (id == cie_id) .cie else .{ .fde = switch (dwarf_section) {
+ .eh_frame => try math.sub(u64, start_offset, id),
+ .debug_frame => id,
+ else => unreachable,
+ } },
.entry_bytes = entry_bytes,
};
-
- stream.pos += entry_bytes.len;
- return result;
- }
-
- /// The length of the entry including the ID field, but not the length field itself
- pub fn entryLength(self: EntryHeader) usize {
- return self.entry_bytes.len + @as(u8, if (self.is_64) 8 else 4);
}
};
@@ -2558,7 +2495,7 @@ pub const CommonInformationEntry = struct {
length_offset: u64,
version: u8,
address_size: u8,
- is_64: bool,
+ format: Format,
// Only present in version 4
segment_selector_size: ?u8,
@@ -2602,7 +2539,7 @@ pub const CommonInformationEntry = struct {
cie_bytes: []const u8,
pc_rel_offset: i64,
is_runtime: bool,
- is_64: bool,
+ format: Format,
dwarf_section: DwarfSection,
length_offset: u64,
addr_size_bytes: u8,
@@ -2610,10 +2547,9 @@ pub const CommonInformationEntry = struct {
) !CommonInformationEntry {
if (addr_size_bytes > 8) return error.UnsupportedAddrSize;
- var stream = io.fixedBufferStream(cie_bytes);
- const reader = stream.reader();
+ var fbr: FixedBufferReader = .{ .buf = cie_bytes, .endian = endian };
- const version = try reader.readByte();
+ const version = try fbr.readByte();
switch (dwarf_section) {
.eh_frame => if (version != 1 and version != 3) return error.UnsupportedDwarfVersion,
.debug_frame => if (version != 4) return error.UnsupportedDwarfVersion,
@@ -2624,9 +2560,9 @@ pub const CommonInformationEntry = struct {
var has_aug_data = false;
var aug_str_len: usize = 0;
- const aug_str_start = stream.pos;
- var aug_byte = try reader.readByte();
- while (aug_byte != 0) : (aug_byte = try reader.readByte()) {
+ const aug_str_start = fbr.pos;
+ var aug_byte = try fbr.readByte();
+ while (aug_byte != 0) : (aug_byte = try fbr.readByte()) {
switch (aug_byte) {
'z' => {
if (aug_str_len != 0) return badDwarf();
@@ -2634,7 +2570,7 @@ pub const CommonInformationEntry = struct {
},
'e' => {
if (has_aug_data or aug_str_len != 0) return badDwarf();
- if (try reader.readByte() != 'h') return badDwarf();
+ if (try fbr.readByte() != 'h') return badDwarf();
has_eh_data = true;
},
else => if (has_eh_data) return badDwarf(),
@@ -2645,15 +2581,15 @@ pub const CommonInformationEntry = struct {
if (has_eh_data) {
// legacy data created by older versions of gcc - unsupported here
- for (0..addr_size_bytes) |_| _ = try reader.readByte();
+ for (0..addr_size_bytes) |_| _ = try fbr.readByte();
}
- const address_size = if (version == 4) try reader.readByte() else addr_size_bytes;
- const segment_selector_size = if (version == 4) try reader.readByte() else null;
+ const address_size = if (version == 4) try fbr.readByte() else addr_size_bytes;
+ const segment_selector_size = if (version == 4) try fbr.readByte() else null;
- const code_alignment_factor = try leb.readULEB128(u32, reader);
- const data_alignment_factor = try leb.readILEB128(i32, reader);
- const return_address_register = if (version == 1) try reader.readByte() else try leb.readULEB128(u8, reader);
+ const code_alignment_factor = try fbr.readUleb128(u32);
+ const data_alignment_factor = try fbr.readIleb128(i32);
+ const return_address_register = if (version == 1) try fbr.readByte() else try fbr.readUleb128(u8);
var lsda_pointer_enc: u8 = EH.PE.omit;
var personality_enc: ?u8 = null;
@@ -2662,31 +2598,25 @@ pub const CommonInformationEntry = struct {
var aug_data: []const u8 = &[_]u8{};
const aug_str = if (has_aug_data) blk: {
- const aug_data_len = try leb.readULEB128(usize, reader);
- const aug_data_start = stream.pos;
+ const aug_data_len = try fbr.readUleb128(usize);
+ const aug_data_start = fbr.pos;
aug_data = cie_bytes[aug_data_start..][0..aug_data_len];
const aug_str = cie_bytes[aug_str_start..][0..aug_str_len];
for (aug_str[1..]) |byte| {
switch (byte) {
'L' => {
- lsda_pointer_enc = try reader.readByte();
+ lsda_pointer_enc = try fbr.readByte();
},
'P' => {
- personality_enc = try reader.readByte();
- personality_routine_pointer = try readEhPointer(
- reader,
- personality_enc.?,
- addr_size_bytes,
- .{
- .pc_rel_base = try pcRelBase(@intFromPtr(&cie_bytes[stream.pos]), pc_rel_offset),
- .follow_indirect = is_runtime,
- },
- endian,
- );
+ personality_enc = try fbr.readByte();
+ personality_routine_pointer = try readEhPointer(&fbr, personality_enc.?, addr_size_bytes, .{
+ .pc_rel_base = try pcRelBase(@intFromPtr(&cie_bytes[fbr.pos]), pc_rel_offset),
+ .follow_indirect = is_runtime,
+ });
},
'R' => {
- fde_pointer_enc = try reader.readByte();
+ fde_pointer_enc = try fbr.readByte();
},
'S', 'B', 'G' => {},
else => return badDwarf(),
@@ -2694,16 +2624,16 @@ pub const CommonInformationEntry = struct {
}
// aug_data_len can include padding so the CIE ends on an address boundary
- try stream.seekTo(aug_data_start + aug_data_len);
+ fbr.pos = aug_data_start + aug_data_len;
break :blk aug_str;
} else &[_]u8{};
- const initial_instructions = cie_bytes[stream.pos..];
+ const initial_instructions = cie_bytes[fbr.pos..];
return .{
.length_offset = length_offset,
.version = version,
.address_size = address_size,
- .is_64 = is_64,
+ .format = format,
.segment_selector_size = segment_selector_size,
.code_alignment_factor = code_alignment_factor,
.data_alignment_factor = data_alignment_factor,
@@ -2751,56 +2681,37 @@ pub const FrameDescriptionEntry = struct {
) !FrameDescriptionEntry {
if (addr_size_bytes > 8) return error.InvalidAddrSize;
- var stream = io.fixedBufferStream(fde_bytes);
- const reader = stream.reader();
+ var fbr: FixedBufferReader = .{ .buf = fde_bytes, .endian = endian };
- const pc_begin = try readEhPointer(
- reader,
- cie.fde_pointer_enc,
- addr_size_bytes,
- .{
- .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[stream.pos]), pc_rel_offset),
- .follow_indirect = is_runtime,
- },
- endian,
- ) orelse return badDwarf();
-
- const pc_range = try readEhPointer(
- reader,
- cie.fde_pointer_enc,
- addr_size_bytes,
- .{
- .pc_rel_base = 0,
- .follow_indirect = false,
- },
- endian,
- ) orelse return badDwarf();
+ const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{
+ .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset),
+ .follow_indirect = is_runtime,
+ }) orelse return badDwarf();
+
+ const pc_range = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{
+ .pc_rel_base = 0,
+ .follow_indirect = false,
+ }) orelse return badDwarf();
var aug_data: []const u8 = &[_]u8{};
const lsda_pointer = if (cie.aug_str.len > 0) blk: {
- const aug_data_len = try leb.readULEB128(usize, reader);
- const aug_data_start = stream.pos;
+ const aug_data_len = try fbr.readUleb128(usize);
+ const aug_data_start = fbr.pos;
aug_data = fde_bytes[aug_data_start..][0..aug_data_len];
const lsda_pointer = if (cie.lsda_pointer_enc != EH.PE.omit)
- try readEhPointer(
- reader,
- cie.lsda_pointer_enc,
- addr_size_bytes,
- .{
- .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[stream.pos]), pc_rel_offset),
- .follow_indirect = is_runtime,
- },
- endian,
- )
+ try readEhPointer(&fbr, cie.lsda_pointer_enc, addr_size_bytes, .{
+ .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset),
+ .follow_indirect = is_runtime,
+ })
else
null;
- try stream.seekTo(aug_data_start + aug_data_len);
+ fbr.pos = aug_data_start + aug_data_len;
break :blk lsda_pointer;
} else null;
- const instructions = fde_bytes[stream.pos..];
+ const instructions = fde_bytes[fbr.pos..];
return .{
.cie_length_offset = cie.length_offset,
.pc_begin = pc_begin,
@@ -2820,6 +2731,75 @@ fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize {
}
}
+// Reading debug info needs to be fast, even when compiled in debug mode,
+// so avoid using a `std.io.FixedBufferStream` which is too slow.
+const FixedBufferReader = struct {
+ buf: []const u8,
+ pos: usize = 0,
+ endian: std.builtin.Endian,
+
+ pub const Error = error{ EndOfBuffer, Overflow };
+
+ fn seekTo(fbr: *FixedBufferReader, pos: u64) Error!void {
+ if (pos > fbr.buf.len) return error.EndOfBuffer;
+ fbr.pos = @intCast(pos);
+ }
+
+ fn seekForward(fbr: *FixedBufferReader, amount: u64) Error!void {
+ if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer;
+ fbr.pos += @intCast(amount);
+ }
+
+ pub inline fn readByte(fbr: *FixedBufferReader) Error!u8 {
+ if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer;
+ defer fbr.pos += 1;
+ return fbr.buf[fbr.pos];
+ }
+
+ fn readByteSigned(fbr: *FixedBufferReader) Error!i8 {
+ return @bitCast(try fbr.readByte());
+ }
+
+ fn readInt(fbr: *FixedBufferReader, comptime T: type) Error!T {
+ const size = @divExact(@typeInfo(T).Int.bits, 8);
+ if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer;
+ defer fbr.pos += size;
+ return mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian);
+ }
+
+ fn readUleb128(fbr: *FixedBufferReader, comptime T: type) Error!T {
+ return std.leb.readULEB128(T, fbr);
+ }
+
+ fn readIleb128(fbr: *FixedBufferReader, comptime T: type) Error!T {
+ return std.leb.readILEB128(T, fbr);
+ }
+
+ fn readAddress(fbr: *FixedBufferReader, format: Format) Error!u64 {
+ return switch (format) {
+ .@"32" => try fbr.readInt(u32),
+ .@"64" => try fbr.readInt(u64),
+ };
+ }
+
+ fn readBytes(fbr: *FixedBufferReader, len: usize) Error![]const u8 {
+ if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer;
+ defer fbr.pos += len;
+ return fbr.buf[fbr.pos..][0..len];
+ }
+
+ fn readBytesTo(fbr: *FixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 {
+ const end = @call(.always_inline, mem.indexOfScalarPos, .{
+ u8,
+ fbr.buf,
+ fbr.pos,
+ sentinel,
+ }) orelse return error.EndOfBuffer;
+ defer fbr.pos = end + 1;
+ return fbr.buf[fbr.pos..end :sentinel];
+ }
+};
+
test {
std.testing.refAllDecls(@This());
}
diff --git a/lib/std/dwarf/TAG.zig b/lib/std/dwarf/TAG.zig
index 50e7cb3889..3e16925ee0 100644
--- a/lib/std/dwarf/TAG.zig
+++ b/lib/std/dwarf/TAG.zig
@@ -116,3 +116,6 @@ pub const upc_relaxed_type = 0x8767;
// PGI (STMicroelectronics; extensions. No documentation available.
pub const PGI_kanji_type = 0xA000;
pub const PGI_interface_block = 0xA020;
+
+// ZIG extensions.
+pub const ZIG_padding = 0xfdb1;
diff --git a/lib/std/dwarf/expressions.zig b/lib/std/dwarf/expressions.zig
index 61acab7793..4582ec717e 100644
--- a/lib/std/dwarf/expressions.zig
+++ b/lib/std/dwarf/expressions.zig
@@ -12,8 +12,8 @@ const native_endian = builtin.cpu.arch.endian();
/// Callers should specify all the fields relevant to their context. If a field is required
/// by the expression and it isn't in the context, error.IncompleteExpressionContext is returned.
pub const ExpressionContext = struct {
- /// This expression is from a DWARF64 section
- is_64: bool = false,
+ /// The dwarf format of the section this expression is in
+ format: dwarf.Format = .@"32",
/// If specified, any addresses will pass through this function before being acccessed
isValidMemory: ?*const fn (address: usize) bool = null,
@@ -190,10 +190,10 @@ pub fn StackMachine(comptime options: ExpressionOptions) type {
const reader = stream.reader();
return switch (opcode) {
OP.addr => generic(try reader.readInt(addr_type, options.endian)),
- OP.call_ref => if (context.is_64)
- generic(try reader.readInt(u64, options.endian))
- else
- generic(try reader.readInt(u32, options.endian)),
+ OP.call_ref => switch (context.format) {
+ .@"32" => generic(try reader.readInt(u32, options.endian)),
+ .@"64" => generic(try reader.readInt(u64, options.endian)),
+ },
OP.const1u,
OP.pick,
=> generic(try reader.readByte()),
@@ -366,15 +366,15 @@ pub fn StackMachine(comptime options: ExpressionOptions) type {
_ = offset;
switch (context.compile_unit.?.frame_base.?.*) {
- .ExprLoc => {
+ .exprloc => {
// TODO: Run this expression in a nested stack machine
return error.UnimplementedOpcode;
},
- .LocListOffset => {
+ .loclistx => {
// TODO: Read value from .debug_loclists
return error.UnimplementedOpcode;
},
- .SecOffset => {
+ .sec_offset => {
// TODO: Read value from .debug_loclists
return error.UnimplementedOpcode;
},
diff --git a/lib/std/dynamic_library.zig b/lib/std/dynamic_library.zig
index ee2b905aeb..ebfe8fe0ee 100644
--- a/lib/std/dynamic_library.zig
+++ b/lib/std/dynamic_library.zig
@@ -115,7 +115,7 @@ pub const ElfDynLib = struct {
/// Trusts the file. Malicious file will be able to execute arbitrary code.
pub fn open(path: []const u8) !ElfDynLib {
- const fd = try os.open(path, 0, os.O.RDONLY | os.O.CLOEXEC);
+ const fd = try os.open(path, .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
defer os.close(fd);
const stat = try os.fstat(fd);
diff --git a/lib/std/elf.zig b/lib/std/elf.zig
index a72c96c51e..e40c215e83 100644
--- a/lib/std/elf.zig
+++ b/lib/std/elf.zig
@@ -1806,91 +1806,430 @@ pub const COMPRESS = enum(u32) {
};
/// AMD x86-64 relocations.
-/// No reloc
-pub const R_X86_64_NONE = 0;
-/// Direct 64 bit
-pub const R_X86_64_64 = 1;
-/// PC relative 32 bit signed
-pub const R_X86_64_PC32 = 2;
-/// 32 bit GOT entry
-pub const R_X86_64_GOT32 = 3;
-/// 32 bit PLT address
-pub const R_X86_64_PLT32 = 4;
-/// Copy symbol at runtime
-pub const R_X86_64_COPY = 5;
-/// Create GOT entry
-pub const R_X86_64_GLOB_DAT = 6;
-/// Create PLT entry
-pub const R_X86_64_JUMP_SLOT = 7;
-/// Adjust by program base
-pub const R_X86_64_RELATIVE = 8;
-/// 32 bit signed PC relative offset to GOT
-pub const R_X86_64_GOTPCREL = 9;
-/// Direct 32 bit zero extended
-pub const R_X86_64_32 = 10;
-/// Direct 32 bit sign extended
-pub const R_X86_64_32S = 11;
-/// Direct 16 bit zero extended
-pub const R_X86_64_16 = 12;
-/// 16 bit sign extended pc relative
-pub const R_X86_64_PC16 = 13;
-/// Direct 8 bit sign extended
-pub const R_X86_64_8 = 14;
-/// 8 bit sign extended pc relative
-pub const R_X86_64_PC8 = 15;
-/// ID of module containing symbol
-pub const R_X86_64_DTPMOD64 = 16;
-/// Offset in module's TLS block
-pub const R_X86_64_DTPOFF64 = 17;
-/// Offset in initial TLS block
-pub const R_X86_64_TPOFF64 = 18;
-/// 32 bit signed PC relative offset to two GOT entries for GD symbol
-pub const R_X86_64_TLSGD = 19;
-/// 32 bit signed PC relative offset to two GOT entries for LD symbol
-pub const R_X86_64_TLSLD = 20;
-/// Offset in TLS block
-pub const R_X86_64_DTPOFF32 = 21;
-/// 32 bit signed PC relative offset to GOT entry for IE symbol
-pub const R_X86_64_GOTTPOFF = 22;
-/// Offset in initial TLS block
-pub const R_X86_64_TPOFF32 = 23;
-/// PC relative 64 bit
-pub const R_X86_64_PC64 = 24;
-/// 64 bit offset to GOT
-pub const R_X86_64_GOTOFF64 = 25;
-/// 32 bit signed pc relative offset to GOT
-pub const R_X86_64_GOTPC32 = 26;
-/// 64 bit GOT entry offset
-pub const R_X86_64_GOT64 = 27;
-/// 64 bit PC relative offset to GOT entry
-pub const R_X86_64_GOTPCREL64 = 28;
-/// 64 bit PC relative offset to GOT
-pub const R_X86_64_GOTPC64 = 29;
-/// Like GOT64, says PLT entry needed
-pub const R_X86_64_GOTPLT64 = 30;
-/// 64-bit GOT relative offset to PLT entry
-pub const R_X86_64_PLTOFF64 = 31;
-/// Size of symbol plus 32-bit addend
-pub const R_X86_64_SIZE32 = 32;
-/// Size of symbol plus 64-bit addend
-pub const R_X86_64_SIZE64 = 33;
-/// GOT offset for TLS descriptor
-pub const R_X86_64_GOTPC32_TLSDESC = 34;
-/// Marker for call through TLS descriptor
-pub const R_X86_64_TLSDESC_CALL = 35;
-/// TLS descriptor
-pub const R_X86_64_TLSDESC = 36;
-/// Adjust indirectly by program base
-pub const R_X86_64_IRELATIVE = 37;
-/// 64-bit adjust by program base
-pub const R_X86_64_RELATIVE64 = 38;
-/// 39 Reserved was R_X86_64_PC32_BND
-/// 40 Reserved was R_X86_64_PLT32_BND
-/// Load from 32 bit signed pc relative offset to GOT entry without REX prefix, relaxable
-pub const R_X86_64_GOTPCRELX = 41;
-/// Load from 32 bit signed PC relative offset to GOT entry with REX prefix, relaxable
-pub const R_X86_64_REX_GOTPCRELX = 42;
-pub const R_X86_64_NUM = 43;
+pub const R_X86_64 = enum(u32) {
+ /// No reloc
+ NONE = 0,
+ /// Direct 64 bit
+ @"64" = 1,
+ /// PC relative 32 bit signed
+ PC32 = 2,
+ /// 32 bit GOT entry
+ GOT32 = 3,
+ /// 32 bit PLT address
+ PLT32 = 4,
+ /// Copy symbol at runtime
+ COPY = 5,
+ /// Create GOT entry
+ GLOB_DAT = 6,
+ /// Create PLT entry
+ JUMP_SLOT = 7,
+ /// Adjust by program base
+ RELATIVE = 8,
+ /// 32 bit signed PC relative offset to GOT
+ GOTPCREL = 9,
+ /// Direct 32 bit zero extended
+ @"32" = 10,
+ /// Direct 32 bit sign extended
+ @"32S" = 11,
+ /// Direct 16 bit zero extended
+ @"16" = 12,
+ /// 16 bit sign extended pc relative
+ PC16 = 13,
+ /// Direct 8 bit sign extended
+ @"8" = 14,
+ /// 8 bit sign extended pc relative
+ PC8 = 15,
+ /// ID of module containing symbol
+ DTPMOD64 = 16,
+ /// Offset in module's TLS block
+ DTPOFF64 = 17,
+ /// Offset in initial TLS block
+ TPOFF64 = 18,
+ /// 32 bit signed PC relative offset to two GOT entries for GD symbol
+ TLSGD = 19,
+ /// 32 bit signed PC relative offset to two GOT entries for LD symbol
+ TLSLD = 20,
+ /// Offset in TLS block
+ DTPOFF32 = 21,
+ /// 32 bit signed PC relative offset to GOT entry for IE symbol
+ GOTTPOFF = 22,
+ /// Offset in initial TLS block
+ TPOFF32 = 23,
+ /// PC relative 64 bit
+ PC64 = 24,
+ /// 64 bit offset to GOT
+ GOTOFF64 = 25,
+ /// 32 bit signed pc relative offset to GOT
+ GOTPC32 = 26,
+ /// 64 bit GOT entry offset
+ GOT64 = 27,
+ /// 64 bit PC relative offset to GOT entry
+ GOTPCREL64 = 28,
+ /// 64 bit PC relative offset to GOT
+ GOTPC64 = 29,
+ /// Like GOT64, says PLT entry needed
+ GOTPLT64 = 30,
+ /// 64-bit GOT relative offset to PLT entry
+ PLTOFF64 = 31,
+ /// Size of symbol plus 32-bit addend
+ SIZE32 = 32,
+ /// Size of symbol plus 64-bit addend
+ SIZE64 = 33,
+ /// GOT offset for TLS descriptor
+ GOTPC32_TLSDESC = 34,
+ /// Marker for call through TLS descriptor
+ TLSDESC_CALL = 35,
+ /// TLS descriptor
+ TLSDESC = 36,
+ /// Adjust indirectly by program base
+ IRELATIVE = 37,
+ /// 64-bit adjust by program base
+ RELATIVE64 = 38,
+ /// 39 Reserved was PC32_BND
+ /// 40 Reserved was PLT32_BND
+ /// Load from 32 bit signed pc relative offset to GOT entry without REX prefix, relaxable
+ GOTPCRELX = 41,
+ /// Load from 32 bit signed PC relative offset to GOT entry with REX prefix, relaxable
+ REX_GOTPCRELX = 42,
+ _,
+};
+
+/// AArch64 relocs.
+pub const R_AARCH64 = enum(u32) {
+ /// No relocation.
+ NONE = 0,
+ /// ILP32 AArch64 relocs.
+ /// Direct 32 bit.
+ P32_ABS32 = 1,
+ /// Copy symbol at runtime.
+ P32_COPY = 180,
+ /// Create GOT entry.
+ P32_GLOB_DAT = 181,
+ /// Create PLT entry.
+ P32_JUMP_SLOT = 182,
+ /// Adjust by program base.
+ P32_RELATIVE = 183,
+ /// Module number, 32 bit.
+ P32_TLS_DTPMOD = 184,
+ /// Module-relative offset, 32 bit.
+ P32_TLS_DTPREL = 185,
+ /// TP-relative offset, 32 bit.
+ P32_TLS_TPREL = 186,
+ /// TLS Descriptor.
+ P32_TLSDESC = 187,
+ /// STT_GNU_IFUNC relocation.
+ P32_IRELATIVE = 188,
+ /// LP64 AArch64 relocs.
+ /// Direct 64 bit.
+ ABS64 = 257,
+ /// Direct 32 bit.
+ ABS32 = 258,
+ /// Direct 16-bit.
+ ABS16 = 259,
+ /// PC-relative 64-bit.
+ PREL64 = 260,
+ /// PC-relative 32-bit.
+ PREL32 = 261,
+ /// PC-relative 16-bit.
+ PREL16 = 262,
+ /// Dir. MOVZ imm. from bits 15:0.
+ MOVW_UABS_G0 = 263,
+ /// Likewise for MOVK; no check.
+ MOVW_UABS_G0_NC = 264,
+ /// Dir. MOVZ imm. from bits 31:16.
+ MOVW_UABS_G1 = 265,
+ /// Likewise for MOVK; no check.
+ MOVW_UABS_G1_NC = 266,
+ /// Dir. MOVZ imm. from bits 47:32.
+ MOVW_UABS_G2 = 267,
+ /// Likewise for MOVK; no check.
+ MOVW_UABS_G2_NC = 268,
+ /// Dir. MOV{K,Z} imm. from 63:48.
+ MOVW_UABS_G3 = 269,
+ /// Dir. MOV{N,Z} imm. from 15:0.
+ MOVW_SABS_G0 = 270,
+ /// Dir. MOV{N,Z} imm. from 31:16.
+ MOVW_SABS_G1 = 271,
+ /// Dir. MOV{N,Z} imm. from 47:32.
+ MOVW_SABS_G2 = 272,
+ /// PC-rel. LD imm. from bits 20:2.
+ LD_PREL_LO19 = 273,
+ /// PC-rel. ADR imm. from bits 20:0.
+ ADR_PREL_LO21 = 274,
+ /// Page-rel. ADRP imm. from 32:12.
+ ADR_PREL_PG_HI21 = 275,
+ /// Likewise; no overflow check.
+ ADR_PREL_PG_HI21_NC = 276,
+ /// Dir. ADD imm. from bits 11:0.
+ ADD_ABS_LO12_NC = 277,
+ /// Likewise for LD/ST; no check.
+ LDST8_ABS_LO12_NC = 278,
+ /// PC-rel. TBZ/TBNZ imm. from 15:2.
+ TSTBR14 = 279,
+ /// PC-rel. cond. br. imm. from 20:2.
+ CONDBR19 = 280,
+ /// PC-rel. B imm. from bits 27:2.
+ JUMP26 = 282,
+ /// Likewise for CALL.
+ CALL26 = 283,
+ /// Dir. ADD imm. from bits 11:1.
+ LDST16_ABS_LO12_NC = 284,
+ /// Likewise for bits 11:2.
+ LDST32_ABS_LO12_NC = 285,
+ /// Likewise for bits 11:3.
+ LDST64_ABS_LO12_NC = 286,
+ /// PC-rel. MOV{N,Z} imm. from 15:0.
+ MOVW_PREL_G0 = 287,
+ /// Likewise for MOVK; no check.
+ MOVW_PREL_G0_NC = 288,
+ /// PC-rel. MOV{N,Z} imm. from 31:16.
+ MOVW_PREL_G1 = 289,
+ /// Likewise for MOVK; no check.
+ MOVW_PREL_G1_NC = 290,
+ /// PC-rel. MOV{N,Z} imm. from 47:32.
+ MOVW_PREL_G2 = 291,
+ /// Likewise for MOVK; no check.
+ MOVW_PREL_G2_NC = 292,
+ /// PC-rel. MOV{N,Z} imm. from 63:48.
+ MOVW_PREL_G3 = 293,
+ /// Dir. ADD imm. from bits 11:4.
+ LDST128_ABS_LO12_NC = 299,
+ /// GOT-rel. off. MOV{N,Z} imm. 15:0.
+ MOVW_GOTOFF_G0 = 300,
+ /// Likewise for MOVK; no check.
+ MOVW_GOTOFF_G0_NC = 301,
+ /// GOT-rel. o. MOV{N,Z} imm. 31:16.
+ MOVW_GOTOFF_G1 = 302,
+ /// Likewise for MOVK; no check.
+ MOVW_GOTOFF_G1_NC = 303,
+ /// GOT-rel. o. MOV{N,Z} imm. 47:32.
+ MOVW_GOTOFF_G2 = 304,
+ /// Likewise for MOVK; no check.
+ MOVW_GOTOFF_G2_NC = 305,
+ /// GOT-rel. o. MOV{N,Z} imm. 63:48.
+ MOVW_GOTOFF_G3 = 306,
+ /// GOT-relative 64-bit.
+ GOTREL64 = 307,
+ /// GOT-relative 32-bit.
+ GOTREL32 = 308,
+ /// PC-rel. GOT off. load imm. 20:2.
+ GOT_LD_PREL19 = 309,
+ /// GOT-rel. off. LD/ST imm. 14:3.
+ LD64_GOTOFF_LO15 = 310,
+ /// P-page-rel. GOT off. ADRP 32:12.
+ ADR_GOT_PAGE = 311,
+ /// Dir. GOT off. LD/ST imm. 11:3.
+ LD64_GOT_LO12_NC = 312,
+ /// GOT-page-rel. GOT off. LD/ST 14:3
+ LD64_GOTPAGE_LO15 = 313,
+ /// PC-relative ADR imm. 20:0.
+ TLSGD_ADR_PREL21 = 512,
+ /// page-rel. ADRP imm. 32:12.
+ TLSGD_ADR_PAGE21 = 513,
+ /// direct ADD imm. from 11:0.
+ TLSGD_ADD_LO12_NC = 514,
+ /// GOT-rel. MOV{N,Z} 31:16.
+ TLSGD_MOVW_G1 = 515,
+ /// GOT-rel. MOVK imm. 15:0.
+ TLSGD_MOVW_G0_NC = 516,
+ /// Like 512; local dynamic model.
+ TLSLD_ADR_PREL21 = 517,
+ /// Like 513; local dynamic model.
+ TLSLD_ADR_PAGE21 = 518,
+ /// Like 514; local dynamic model.
+ TLSLD_ADD_LO12_NC = 519,
+ /// Like 515; local dynamic model.
+ TLSLD_MOVW_G1 = 520,
+ /// Like 516; local dynamic model.
+ TLSLD_MOVW_G0_NC = 521,
+ /// TLS PC-rel. load imm. 20:2.
+ TLSLD_LD_PREL19 = 522,
+ /// TLS DTP-rel. MOV{N,Z} 47:32.
+ TLSLD_MOVW_DTPREL_G2 = 523,
+ /// TLS DTP-rel. MOV{N,Z} 31:16.
+ TLSLD_MOVW_DTPREL_G1 = 524,
+ /// Likewise; MOVK; no check.
+ TLSLD_MOVW_DTPREL_G1_NC = 525,
+ /// TLS DTP-rel. MOV{N,Z} 15:0.
+ TLSLD_MOVW_DTPREL_G0 = 526,
+ /// Likewise; MOVK; no check.
+ TLSLD_MOVW_DTPREL_G0_NC = 527,
+ /// DTP-rel. ADD imm. from 23:12.
+ TLSLD_ADD_DTPREL_HI12 = 528,
+ /// DTP-rel. ADD imm. from 11:0.
+ TLSLD_ADD_DTPREL_LO12 = 529,
+ /// Likewise; no ovfl. check.
+ TLSLD_ADD_DTPREL_LO12_NC = 530,
+ /// DTP-rel. LD/ST imm. 11:0.
+ TLSLD_LDST8_DTPREL_LO12 = 531,
+ /// Likewise; no check.
+ TLSLD_LDST8_DTPREL_LO12_NC = 532,
+ /// DTP-rel. LD/ST imm. 11:1.
+ TLSLD_LDST16_DTPREL_LO12 = 533,
+ /// Likewise; no check.
+ TLSLD_LDST16_DTPREL_LO12_NC = 534,
+ /// DTP-rel. LD/ST imm. 11:2.
+ TLSLD_LDST32_DTPREL_LO12 = 535,
+ /// Likewise; no check.
+ TLSLD_LDST32_DTPREL_LO12_NC = 536,
+ /// DTP-rel. LD/ST imm. 11:3.
+ TLSLD_LDST64_DTPREL_LO12 = 537,
+ /// Likewise; no check.
+ TLSLD_LDST64_DTPREL_LO12_NC = 538,
+ /// GOT-rel. MOV{N,Z} 31:16.
+ TLSIE_MOVW_GOTTPREL_G1 = 539,
+ /// GOT-rel. MOVK 15:0.
+ TLSIE_MOVW_GOTTPREL_G0_NC = 540,
+ /// Page-rel. ADRP 32:12.
+ TLSIE_ADR_GOTTPREL_PAGE21 = 541,
+ /// Direct LD off. 11:3.
+ TLSIE_LD64_GOTTPREL_LO12_NC = 542,
+ /// PC-rel. load imm. 20:2.
+ TLSIE_LD_GOTTPREL_PREL19 = 543,
+ /// TLS TP-rel. MOV{N,Z} 47:32.
+ TLSLE_MOVW_TPREL_G2 = 544,
+ /// TLS TP-rel. MOV{N,Z} 31:16.
+ TLSLE_MOVW_TPREL_G1 = 545,
+ /// Likewise; MOVK; no check.
+ TLSLE_MOVW_TPREL_G1_NC = 546,
+ /// TLS TP-rel. MOV{N,Z} 15:0.
+ TLSLE_MOVW_TPREL_G0 = 547,
+ /// Likewise; MOVK; no check.
+ TLSLE_MOVW_TPREL_G0_NC = 548,
+ /// TP-rel. ADD imm. 23:12.
+ TLSLE_ADD_TPREL_HI12 = 549,
+ /// TP-rel. ADD imm. 11:0.
+ TLSLE_ADD_TPREL_LO12 = 550,
+ /// Likewise; no ovfl. check.
+ TLSLE_ADD_TPREL_LO12_NC = 551,
+ /// TP-rel. LD/ST off. 11:0.
+ TLSLE_LDST8_TPREL_LO12 = 552,
+ /// Likewise; no ovfl. check.
+ TLSLE_LDST8_TPREL_LO12_NC = 553,
+ /// TP-rel. LD/ST off. 11:1.
+ TLSLE_LDST16_TPREL_LO12 = 554,
+ /// Likewise; no check.
+ TLSLE_LDST16_TPREL_LO12_NC = 555,
+ /// TP-rel. LD/ST off. 11:2.
+ TLSLE_LDST32_TPREL_LO12 = 556,
+ /// Likewise; no check.
+ TLSLE_LDST32_TPREL_LO12_NC = 557,
+ /// TP-rel. LD/ST off. 11:3.
+ TLSLE_LDST64_TPREL_LO12 = 558,
+ /// Likewise; no check.
+ TLSLE_LDST64_TPREL_LO12_NC = 559,
+ /// PC-rel. load immediate 20:2.
+ TLSDESC_LD_PREL19 = 560,
+ /// PC-rel. ADR immediate 20:0.
+ TLSDESC_ADR_PREL21 = 561,
+ /// Page-rel. ADRP imm. 32:12.
+ TLSDESC_ADR_PAGE21 = 562,
+ /// Direct LD off. from 11:3.
+ TLSDESC_LD64_LO12 = 563,
+ /// Direct ADD imm. from 11:0.
+ TLSDESC_ADD_LO12 = 564,
+ /// GOT-rel. MOV{N,Z} imm. 31:16.
+ TLSDESC_OFF_G1 = 565,
+ /// GOT-rel. MOVK imm. 15:0; no ck.
+ TLSDESC_OFF_G0_NC = 566,
+ /// Relax LDR.
+ TLSDESC_LDR = 567,
+ /// Relax ADD.
+ TLSDESC_ADD = 568,
+ /// Relax BLR.
+ TLSDESC_CALL = 569,
+ /// TP-rel. LD/ST off. 11:4.
+ TLSLE_LDST128_TPREL_LO12 = 570,
+ /// Likewise; no check.
+ TLSLE_LDST128_TPREL_LO12_NC = 571,
+ /// DTP-rel. LD/ST imm. 11:4.
+ TLSLD_LDST128_DTPREL_LO12 = 572,
+ /// Likewise; no check.
+ TLSLD_LDST128_DTPREL_LO12_NC = 573,
+ /// Copy symbol at runtime.
+ COPY = 1024,
+ /// Create GOT entry.
+ GLOB_DAT = 1025,
+ /// Create PLT entry.
+ JUMP_SLOT = 1026,
+ /// Adjust by program base.
+ RELATIVE = 1027,
+ /// Module number, 64 bit.
+ TLS_DTPMOD = 1028,
+ /// Module-relative offset, 64 bit.
+ TLS_DTPREL = 1029,
+ /// TP-relative offset, 64 bit.
+ TLS_TPREL = 1030,
+ /// TLS Descriptor.
+ TLSDESC = 1031,
+ /// STT_GNU_IFUNC relocation.
+ IRELATIVE = 1032,
+ _,
+};
+
+/// RISC-V relocations.
+pub const R_RISCV = enum(u32) {
+ NONE = 0,
+ @"32" = 1,
+ @"64" = 2,
+ RELATIVE = 3,
+ COPY = 4,
+ JUMP_SLOT = 5,
+ TLS_DTPMOD32 = 6,
+ TLS_DTPMOD64 = 7,
+ TLS_DTPREL32 = 8,
+ TLS_DTPREL64 = 9,
+ TLS_TPREL32 = 10,
+ TLS_TPREL64 = 11,
+ TLSDESC = 12,
+ BRANCH = 16,
+ JAL = 17,
+ CALL = 18,
+ CALL_PLT = 19,
+ GOT_HI20 = 20,
+ TLS_GOT_HI20 = 21,
+ TLS_GD_HI20 = 22,
+ PCREL_HI20 = 23,
+ PCREL_LO12_I = 24,
+ PCREL_LO12_S = 25,
+ HI20 = 26,
+ LO12_I = 27,
+ LO12_S = 28,
+ TPREL_HI20 = 29,
+ TPREL_LO12_I = 30,
+ TPREL_LO12_S = 31,
+ TPREL_ADD = 32,
+ ADD8 = 33,
+ ADD16 = 34,
+ ADD32 = 35,
+ ADD64 = 36,
+ SUB8 = 37,
+ SUB16 = 38,
+ SUB32 = 39,
+ SUB64 = 40,
+ GNU_VTINHERIT = 41,
+ GNU_VTENTRY = 42,
+ ALIGN = 43,
+ RVC_BRANCH = 44,
+ RVC_JUMP = 45,
+ RVC_LUI = 46,
+ GPREL_I = 47,
+ GPREL_S = 48,
+ TPREL_I = 49,
+ TPREL_S = 50,
+ RELAX = 51,
+ SUB6 = 52,
+ SET6 = 53,
+ SET8 = 54,
+ SET16 = 55,
+ SET32 = 56,
+ @"32_PCREL" = 57,
+ IRELATIVE = 58,
+ PLT32 = 59,
+ SET_ULEB128 = 60,
+ SUB_ULEB128 = 61,
+ _,
+};
pub const STV = enum(u2) {
DEFAULT = 0,
diff --git a/lib/std/event.zig b/lib/std/event.zig
deleted file mode 100644
index b0d61afbd9..0000000000
--- a/lib/std/event.zig
+++ /dev/null
@@ -1,23 +0,0 @@
-pub const Channel = @import("event/channel.zig").Channel;
-pub const Future = @import("event/future.zig").Future;
-pub const Group = @import("event/group.zig").Group;
-pub const Batch = @import("event/batch.zig").Batch;
-pub const Lock = @import("event/lock.zig").Lock;
-pub const Locked = @import("event/locked.zig").Locked;
-pub const RwLock = @import("event/rwlock.zig").RwLock;
-pub const RwLocked = @import("event/rwlocked.zig").RwLocked;
-pub const Loop = @import("event/loop.zig").Loop;
-pub const WaitGroup = @import("event/wait_group.zig").WaitGroup;
-
-test {
- _ = @import("event/channel.zig");
- _ = @import("event/future.zig");
- _ = @import("event/group.zig");
- _ = @import("event/batch.zig");
- _ = @import("event/lock.zig");
- _ = @import("event/locked.zig");
- _ = @import("event/rwlock.zig");
- _ = @import("event/rwlocked.zig");
- _ = @import("event/loop.zig");
- _ = @import("event/wait_group.zig");
-}
diff --git a/lib/std/event/batch.zig b/lib/std/event/batch.zig
deleted file mode 100644
index 9703a2512e..0000000000
--- a/lib/std/event/batch.zig
+++ /dev/null
@@ -1,141 +0,0 @@
-const std = @import("../std.zig");
-const testing = std.testing;
-
-/// Performs multiple async functions in parallel, without heap allocation.
-/// Async function frames are managed externally to this abstraction, and
-/// passed in via the `add` function. Once all the jobs are added, call `wait`.
-/// This API is *not* thread-safe. The object must be accessed from one thread at
-/// a time, however, it need not be the same thread.
-pub fn Batch(
- /// The return value for each job.
- /// If a job slot was re-used due to maxed out concurrency, then its result
- /// value will be overwritten. The values can be accessed with the `results` field.
- comptime Result: type,
- /// How many jobs to run in parallel.
- comptime max_jobs: comptime_int,
- /// Controls whether the `add` and `wait` functions will be async functions.
- comptime async_behavior: enum {
- /// Observe the value of `std.io.is_async` to decide whether `add`
- /// and `wait` will be async functions. Asserts that the jobs do not suspend when
- /// `std.options.io_mode == .blocking`. This is a generally safe assumption, and the
- /// usual recommended option for this parameter.
- auto_async,
-
- /// Always uses the `nosuspend` keyword when using `await` on the jobs,
- /// making `add` and `wait` non-async functions. Asserts that the jobs do not suspend.
- never_async,
-
- /// `add` and `wait` use regular `await` keyword, making them async functions.
- always_async,
- },
-) type {
- return struct {
- jobs: [max_jobs]Job,
- next_job_index: usize,
- collected_result: CollectedResult,
-
- const Job = struct {
- frame: ?anyframe->Result,
- result: Result,
- };
-
- const Self = @This();
-
- const CollectedResult = switch (@typeInfo(Result)) {
- .ErrorUnion => Result,
- else => void,
- };
-
- const async_ok = switch (async_behavior) {
- .auto_async => std.io.is_async,
- .never_async => false,
- .always_async => true,
- };
-
- pub fn init() Self {
- return Self{
- .jobs = [1]Job{
- .{
- .frame = null,
- .result = undefined,
- },
- } ** max_jobs,
- .next_job_index = 0,
- .collected_result = {},
- };
- }
-
- /// Add a frame to the Batch. If all jobs are in-flight, then this function
- /// waits until one completes.
- /// This function is *not* thread-safe. It must be called from one thread at
- /// a time, however, it need not be the same thread.
- /// TODO: "select" language feature to use the next available slot, rather than
- /// awaiting the next index.
- pub fn add(self: *Self, frame: anyframe->Result) void {
- const job = &self.jobs[self.next_job_index];
- self.next_job_index = (self.next_job_index + 1) % max_jobs;
- if (job.frame) |existing| {
- job.result = if (async_ok) await existing else nosuspend await existing;
- if (CollectedResult != void) {
- job.result catch |err| {
- self.collected_result = err;
- };
- }
- }
- job.frame = frame;
- }
-
- /// Wait for all the jobs to complete.
- /// Safe to call any number of times.
- /// If `Result` is an error union, this function returns the last error that occurred, if any.
- /// Unlike the `results` field, the return value of `wait` will report any error that occurred;
- /// hitting max parallelism will not compromise the result.
- /// This function is *not* thread-safe. It must be called from one thread at
- /// a time, however, it need not be the same thread.
- pub fn wait(self: *Self) CollectedResult {
- for (self.jobs) |*job|
- if (job.frame) |f| {
- job.result = if (async_ok) await f else nosuspend await f;
- if (CollectedResult != void) {
- job.result catch |err| {
- self.collected_result = err;
- };
- }
- job.frame = null;
- };
- return self.collected_result;
- }
- };
-}
-
-test "std.event.Batch" {
- if (true) return error.SkipZigTest;
- var count: usize = 0;
- var batch = Batch(void, 2, .auto_async).init();
- batch.add(&async sleepALittle(&count));
- batch.add(&async increaseByTen(&count));
- batch.wait();
- try testing.expect(count == 11);
-
- var another = Batch(anyerror!void, 2, .auto_async).init();
- another.add(&async somethingElse());
- another.add(&async doSomethingThatFails());
- try testing.expectError(error.ItBroke, another.wait());
-}
-
-fn sleepALittle(count: *usize) void {
- std.time.sleep(1 * std.time.ns_per_ms);
- _ = @atomicRmw(usize, count, .Add, 1, .SeqCst);
-}
-
-fn increaseByTen(count: *usize) void {
- var i: usize = 0;
- while (i < 10) : (i += 1) {
- _ = @atomicRmw(usize, count, .Add, 1, .SeqCst);
- }
-}
-
-fn doSomethingThatFails() anyerror!void {}
-fn somethingElse() anyerror!void {
- return error.ItBroke;
-}
diff --git a/lib/std/event/channel.zig b/lib/std/event/channel.zig
deleted file mode 100644
index 3329694da7..0000000000
--- a/lib/std/event/channel.zig
+++ /dev/null
@@ -1,334 +0,0 @@
-const std = @import("../std.zig");
-const builtin = @import("builtin");
-const assert = std.debug.assert;
-const testing = std.testing;
-const Loop = std.event.Loop;
-
-/// Many producer, many consumer, thread-safe, runtime configurable buffer size.
-/// When buffer is empty, consumers suspend and are resumed by producers.
-/// When buffer is full, producers suspend and are resumed by consumers.
-pub fn Channel(comptime T: type) type {
- return struct {
- getters: std.atomic.Queue(GetNode),
- or_null_queue: std.atomic.Queue(*std.atomic.Queue(GetNode).Node),
- putters: std.atomic.Queue(PutNode),
- get_count: usize,
- put_count: usize,
- dispatch_lock: bool,
- need_dispatch: bool,
-
- // simple fixed size ring buffer
- buffer_nodes: []T,
- buffer_index: usize,
- buffer_len: usize,
-
- const SelfChannel = @This();
- const GetNode = struct {
- tick_node: *Loop.NextTickNode,
- data: Data,
-
- const Data = union(enum) {
- Normal: Normal,
- OrNull: OrNull,
- };
-
- const Normal = struct {
- ptr: *T,
- };
-
- const OrNull = struct {
- ptr: *?T,
- or_null: *std.atomic.Queue(*std.atomic.Queue(GetNode).Node).Node,
- };
- };
- const PutNode = struct {
- data: T,
- tick_node: *Loop.NextTickNode,
- };
-
- const global_event_loop = Loop.instance orelse
- @compileError("std.event.Channel currently only works with event-based I/O");
-
- /// Call `deinit` to free resources when done.
- /// `buffer` must live until `deinit` is called.
- /// For a zero length buffer, use `[0]T{}`.
- /// TODO https://github.com/ziglang/zig/issues/2765
- pub fn init(self: *SelfChannel, buffer: []T) void {
- // The ring buffer implementation only works with power of 2 buffer sizes
- // because of relying on subtracting across zero. For example (0 -% 1) % 10 == 5
- assert(buffer.len == 0 or @popCount(buffer.len) == 1);
-
- self.* = SelfChannel{
- .buffer_len = 0,
- .buffer_nodes = buffer,
- .buffer_index = 0,
- .dispatch_lock = false,
- .need_dispatch = false,
- .getters = std.atomic.Queue(GetNode).init(),
- .putters = std.atomic.Queue(PutNode).init(),
- .or_null_queue = std.atomic.Queue(*std.atomic.Queue(GetNode).Node).init(),
- .get_count = 0,
- .put_count = 0,
- };
- }
-
- /// Must be called when all calls to put and get have suspended and no more calls occur.
- /// This can be omitted if caller can guarantee that the suspended putters and getters
- /// do not need to be run to completion. Note that this may leave awaiters hanging.
- pub fn deinit(self: *SelfChannel) void {
- while (self.getters.get()) |get_node| {
- resume get_node.data.tick_node.data;
- }
- while (self.putters.get()) |put_node| {
- resume put_node.data.tick_node.data;
- }
- self.* = undefined;
- }
-
- /// puts a data item in the channel. The function returns when the value has been added to the
- /// buffer, or in the case of a zero size buffer, when the item has been retrieved by a getter.
- /// Or when the channel is destroyed.
- pub fn put(self: *SelfChannel, data: T) void {
- var my_tick_node = Loop.NextTickNode{ .data = @frame() };
- var queue_node = std.atomic.Queue(PutNode).Node{
- .data = PutNode{
- .tick_node = &my_tick_node,
- .data = data,
- },
- };
-
- suspend {
- self.putters.put(&queue_node);
- _ = @atomicRmw(usize, &self.put_count, .Add, 1, .SeqCst);
-
- self.dispatch();
- }
- }
-
- /// await this function to get an item from the channel. If the buffer is empty, the frame will
- /// complete when the next item is put in the channel.
- pub fn get(self: *SelfChannel) callconv(.Async) T {
- // TODO https://github.com/ziglang/zig/issues/2765
- var result: T = undefined;
- var my_tick_node = Loop.NextTickNode{ .data = @frame() };
- var queue_node = std.atomic.Queue(GetNode).Node{
- .data = GetNode{
- .tick_node = &my_tick_node,
- .data = GetNode.Data{
- .Normal = GetNode.Normal{ .ptr = &result },
- },
- },
- };
-
- suspend {
- self.getters.put(&queue_node);
- _ = @atomicRmw(usize, &self.get_count, .Add, 1, .SeqCst);
-
- self.dispatch();
- }
- return result;
- }
-
- //pub async fn select(comptime EnumUnion: type, channels: ...) EnumUnion {
- // assert(@memberCount(EnumUnion) == channels.len); // enum union and channels mismatch
- // assert(channels.len != 0); // enum unions cannot have 0 fields
- // if (channels.len == 1) {
- // const result = await (async channels[0].get() catch unreachable);
- // return @unionInit(EnumUnion, @memberName(EnumUnion, 0), result);
- // }
- //}
-
- /// Get an item from the channel. If the buffer is empty and there are no
- /// puts waiting, this returns `null`.
- pub fn getOrNull(self: *SelfChannel) ?T {
- // TODO integrate this function with named return values
- // so we can get rid of this extra result copy
- var result: ?T = null;
- var my_tick_node = Loop.NextTickNode{ .data = @frame() };
- var or_null_node = std.atomic.Queue(*std.atomic.Queue(GetNode).Node).Node{ .data = undefined };
- var queue_node = std.atomic.Queue(GetNode).Node{
- .data = GetNode{
- .tick_node = &my_tick_node,
- .data = GetNode.Data{
- .OrNull = GetNode.OrNull{
- .ptr = &result,
- .or_null = &or_null_node,
- },
- },
- },
- };
- or_null_node.data = &queue_node;
-
- suspend {
- self.getters.put(&queue_node);
- _ = @atomicRmw(usize, &self.get_count, .Add, 1, .SeqCst);
- self.or_null_queue.put(&or_null_node);
-
- self.dispatch();
- }
- return result;
- }
-
- fn dispatch(self: *SelfChannel) void {
- // set the "need dispatch" flag
- @atomicStore(bool, &self.need_dispatch, true, .SeqCst);
-
- lock: while (true) {
- // set the lock flag
- if (@atomicRmw(bool, &self.dispatch_lock, .Xchg, true, .SeqCst)) return;
-
- // clear the need_dispatch flag since we're about to do it
- @atomicStore(bool, &self.need_dispatch, false, .SeqCst);
-
- while (true) {
- one_dispatch: {
- // later we correct these extra subtractions
- var get_count = @atomicRmw(usize, &self.get_count, .Sub, 1, .SeqCst);
- var put_count = @atomicRmw(usize, &self.put_count, .Sub, 1, .SeqCst);
-
- // transfer self.buffer to self.getters
- while (self.buffer_len != 0) {
- if (get_count == 0) break :one_dispatch;
-
- const get_node = &self.getters.get().?.data;
- switch (get_node.data) {
- GetNode.Data.Normal => |info| {
- info.ptr.* = self.buffer_nodes[(self.buffer_index -% self.buffer_len) % self.buffer_nodes.len];
- },
- GetNode.Data.OrNull => |info| {
- _ = self.or_null_queue.remove(info.or_null);
- info.ptr.* = self.buffer_nodes[(self.buffer_index -% self.buffer_len) % self.buffer_nodes.len];
- },
- }
- global_event_loop.onNextTick(get_node.tick_node);
- self.buffer_len -= 1;
-
- get_count = @atomicRmw(usize, &self.get_count, .Sub, 1, .SeqCst);
- }
-
- // direct transfer self.putters to self.getters
- while (get_count != 0 and put_count != 0) {
- const get_node = &self.getters.get().?.data;
- const put_node = &self.putters.get().?.data;
-
- switch (get_node.data) {
- GetNode.Data.Normal => |info| {
- info.ptr.* = put_node.data;
- },
- GetNode.Data.OrNull => |info| {
- _ = self.or_null_queue.remove(info.or_null);
- info.ptr.* = put_node.data;
- },
- }
- global_event_loop.onNextTick(get_node.tick_node);
- global_event_loop.onNextTick(put_node.tick_node);
-
- get_count = @atomicRmw(usize, &self.get_count, .Sub, 1, .SeqCst);
- put_count = @atomicRmw(usize, &self.put_count, .Sub, 1, .SeqCst);
- }
-
- // transfer self.putters to self.buffer
- while (self.buffer_len != self.buffer_nodes.len and put_count != 0) {
- const put_node = &self.putters.get().?.data;
-
- self.buffer_nodes[self.buffer_index % self.buffer_nodes.len] = put_node.data;
- global_event_loop.onNextTick(put_node.tick_node);
- self.buffer_index +%= 1;
- self.buffer_len += 1;
-
- put_count = @atomicRmw(usize, &self.put_count, .Sub, 1, .SeqCst);
- }
- }
-
- // undo the extra subtractions
- _ = @atomicRmw(usize, &self.get_count, .Add, 1, .SeqCst);
- _ = @atomicRmw(usize, &self.put_count, .Add, 1, .SeqCst);
-
- // All the "get or null" functions should resume now.
- var remove_count: usize = 0;
- while (self.or_null_queue.get()) |or_null_node| {
- remove_count += @intFromBool(self.getters.remove(or_null_node.data));
- global_event_loop.onNextTick(or_null_node.data.data.tick_node);
- }
- if (remove_count != 0) {
- _ = @atomicRmw(usize, &self.get_count, .Sub, remove_count, .SeqCst);
- }
-
- // clear need-dispatch flag
- if (@atomicRmw(bool, &self.need_dispatch, .Xchg, false, .SeqCst)) continue;
-
- assert(@atomicRmw(bool, &self.dispatch_lock, .Xchg, false, .SeqCst));
-
- // we have to check again now that we unlocked
- if (@atomicLoad(bool, &self.need_dispatch, .SeqCst)) continue :lock;
-
- return;
- }
- }
- }
- };
-}
-
-test "std.event.Channel" {
- if (!std.io.is_async) return error.SkipZigTest;
-
- // https://github.com/ziglang/zig/issues/1908
- if (builtin.single_threaded) return error.SkipZigTest;
-
- // https://github.com/ziglang/zig/issues/3251
- if (builtin.os.tag == .freebsd) return error.SkipZigTest;
-
- var channel: Channel(i32) = undefined;
- channel.init(&[0]i32{});
- defer channel.deinit();
-
- var handle = async testChannelGetter(&channel);
- var putter = async testChannelPutter(&channel);
-
- await handle;
- await putter;
-}
-
-test "std.event.Channel wraparound" {
-
- // TODO provide a way to run tests in evented I/O mode
- if (!std.io.is_async) return error.SkipZigTest;
-
- const channel_size = 2;
-
- var buf: [channel_size]i32 = undefined;
- var channel: Channel(i32) = undefined;
- channel.init(&buf);
- defer channel.deinit();
-
- // add items to channel and pull them out until
- // the buffer wraps around, make sure it doesn't crash.
- channel.put(5);
- try testing.expectEqual(@as(i32, 5), channel.get());
- channel.put(6);
- try testing.expectEqual(@as(i32, 6), channel.get());
- channel.put(7);
- try testing.expectEqual(@as(i32, 7), channel.get());
-}
-fn testChannelGetter(channel: *Channel(i32)) callconv(.Async) void {
- const value1 = channel.get();
- try testing.expect(value1 == 1234);
-
- const value2 = channel.get();
- try testing.expect(value2 == 4567);
-
- const value3 = channel.getOrNull();
- try testing.expect(value3 == null);
-
- var last_put = async testPut(channel, 4444);
- const value4 = channel.getOrNull();
- try testing.expect(value4.? == 4444);
- await last_put;
-}
-fn testChannelPutter(channel: *Channel(i32)) callconv(.Async) void {
- channel.put(1234);
- channel.put(4567);
-}
-fn testPut(channel: *Channel(i32), value: i32) callconv(.Async) void {
- channel.put(value);
-}
diff --git a/lib/std/event/future.zig b/lib/std/event/future.zig
deleted file mode 100644
index e38d54537d..0000000000
--- a/lib/std/event/future.zig
+++ /dev/null
@@ -1,115 +0,0 @@
-const std = @import("../std.zig");
-const builtin = @import("builtin");
-const assert = std.debug.assert;
-const testing = std.testing;
-const Lock = std.event.Lock;
-
-/// This is a value that starts out unavailable, until resolve() is called.
-/// While it is unavailable, functions suspend when they try to get() it,
-/// and then are resumed when resolve() is called.
-/// At this point the value remains forever available, and another resolve() is not allowed.
-pub fn Future(comptime T: type) type {
- return struct {
- lock: Lock,
- data: T,
- available: Available,
-
- const Available = enum(u8) {
- NotStarted,
- Started,
- Finished,
- };
-
- const Self = @This();
- const Queue = std.atomic.Queue(anyframe);
-
- pub fn init() Self {
- return Self{
- .lock = Lock.initLocked(),
- .available = .NotStarted,
- .data = undefined,
- };
- }
-
- /// Obtain the value. If it's not available, wait until it becomes
- /// available.
- /// Thread-safe.
- pub fn get(self: *Self) callconv(.Async) *T {
- if (@atomicLoad(Available, &self.available, .SeqCst) == .Finished) {
- return &self.data;
- }
- const held = self.lock.acquire();
- held.release();
-
- return &self.data;
- }
-
- /// Gets the data without waiting for it. If it's available, a pointer is
- /// returned. Otherwise, null is returned.
- pub fn getOrNull(self: *Self) ?*T {
- if (@atomicLoad(Available, &self.available, .SeqCst) == .Finished) {
- return &self.data;
- } else {
- return null;
- }
- }
-
- /// If someone else has started working on the data, wait for them to complete
- /// and return a pointer to the data. Otherwise, return null, and the caller
- /// should start working on the data.
- /// It's not required to call start() before resolve() but it can be useful since
- /// this method is thread-safe.
- pub fn start(self: *Self) callconv(.Async) ?*T {
- const state = @cmpxchgStrong(Available, &self.available, .NotStarted, .Started, .SeqCst, .SeqCst) orelse return null;
- switch (state) {
- .Started => {
- const held = self.lock.acquire();
- held.release();
- return &self.data;
- },
- .Finished => return &self.data,
- else => unreachable,
- }
- }
-
- /// Make the data become available. May be called only once.
- /// Before calling this, modify the `data` property.
- pub fn resolve(self: *Self) void {
- const prev = @atomicRmw(Available, &self.available, .Xchg, .Finished, .SeqCst);
- assert(prev != .Finished); // resolve() called twice
- Lock.Held.release(Lock.Held{ .lock = &self.lock });
- }
- };
-}
-
-test "std.event.Future" {
- // https://github.com/ziglang/zig/issues/1908
- if (builtin.single_threaded) return error.SkipZigTest;
- // https://github.com/ziglang/zig/issues/3251
- if (builtin.os.tag == .freebsd) return error.SkipZigTest;
- // TODO provide a way to run tests in evented I/O mode
- if (!std.io.is_async) return error.SkipZigTest;
-
- testFuture();
-}
-
-fn testFuture() void {
- var future = Future(i32).init();
-
- var a = async waitOnFuture(&future);
- var b = async waitOnFuture(&future);
- resolveFuture(&future);
-
- const result = (await a) + (await b);
-
- try testing.expect(result == 12);
-}
-
-fn waitOnFuture(future: *Future(i32)) i32 {
- return future.get().*;
-}
-
-fn resolveFuture(future: *Future(i32)) void {
- future.data = 6;
- future.resolve();
-}
diff --git a/lib/std/event/group.zig b/lib/std/event/group.zig
deleted file mode 100644
index 6d513000f4..0000000000
--- a/lib/std/event/group.zig
+++ /dev/null
@@ -1,160 +0,0 @@
-const std = @import("../std.zig");
-const builtin = @import("builtin");
-const Lock = std.event.Lock;
-const testing = std.testing;
-const Allocator = std.mem.Allocator;
-
-/// ReturnType must be `void` or `E!void`
-/// TODO This API was created back with the old design of async/await, when calling any
-/// async function required an allocator. There is an ongoing experiment to transition
-/// all uses of this API to the simpler and more resource-aware `std.event.Batch` API.
-/// If the transition goes well, all usages of `Group` will be gone, and this API
-/// will be deleted.
-pub fn Group(comptime ReturnType: type) type {
- return struct {
- frame_stack: Stack,
- alloc_stack: AllocStack,
- lock: Lock,
- allocator: Allocator,
-
- const Self = @This();
-
- const Error = switch (@typeInfo(ReturnType)) {
- .ErrorUnion => |payload| payload.error_set,
- else => void,
- };
- const Stack = std.atomic.Stack(anyframe->ReturnType);
- const AllocStack = std.atomic.Stack(Node);
-
- pub const Node = struct {
- bytes: []const u8 = &[0]u8{},
- handle: anyframe->ReturnType,
- };
-
- pub fn init(allocator: Allocator) Self {
- return Self{
- .frame_stack = Stack.init(),
- .alloc_stack = AllocStack.init(),
- .lock = .{},
- .allocator = allocator,
- };
- }
-
- /// Add a frame to the group. Thread-safe.
- pub fn add(self: *Self, handle: anyframe->ReturnType) (error{OutOfMemory}!void) {
- const node = try self.allocator.create(AllocStack.Node);
- node.* = AllocStack.Node{
- .next = undefined,
- .data = Node{
- .handle = handle,
- },
- };
- self.alloc_stack.push(node);
- }
-
- /// Add a node to the group. Thread-safe. Cannot fail.
- /// `node.data` should be the frame handle to add to the group.
- /// The node's memory should be in the function frame of
- /// the handle that is in the node, or somewhere guaranteed to live
- /// at least as long.
- pub fn addNode(self: *Self, node: *Stack.Node) void {
- self.frame_stack.push(node);
- }
-
- /// This is equivalent to adding a frame to the group but the memory of its frame is
- /// allocated by the group and freed by `wait`.
- /// `func` must be async and have return type `ReturnType`.
- /// Thread-safe.
- pub fn call(self: *Self, comptime func: anytype, args: anytype) error{OutOfMemory}!void {
- const frame = try self.allocator.create(@TypeOf(@call(.{ .modifier = .async_kw }, func, args)));
- errdefer self.allocator.destroy(frame);
- const node = try self.allocator.create(AllocStack.Node);
- errdefer self.allocator.destroy(node);
- node.* = AllocStack.Node{
- .next = undefined,
- .data = Node{
- .handle = frame,
- .bytes = std.mem.asBytes(frame),
- },
- };
- frame.* = @call(.{ .modifier = .async_kw }, func, args);
- self.alloc_stack.push(node);
- }
-
- /// Wait for all the calls and promises of the group to complete.
- /// Thread-safe.
- /// Safe to call any number of times.
- pub fn wait(self: *Self) callconv(.Async) ReturnType {
- const held = self.lock.acquire();
- defer held.release();
-
- var result: ReturnType = {};
-
- while (self.frame_stack.pop()) |node| {
- if (Error == void) {
- await node.data;
- } else {
- (await node.data) catch |err| {
- result = err;
- };
- }
- }
- while (self.alloc_stack.pop()) |node| {
- const handle = node.data.handle;
- if (Error == void) {
- await handle;
- } else {
- (await handle) catch |err| {
- result = err;
- };
- }
- self.allocator.free(node.data.bytes);
- self.allocator.destroy(node);
- }
- return result;
- }
- };
-}
-
-test "std.event.Group" {
- // https://github.com/ziglang/zig/issues/1908
- if (builtin.single_threaded) return error.SkipZigTest;
-
- if (!std.io.is_async) return error.SkipZigTest;
-
- // TODO this file has bit-rotted. repair it
- if (true) return error.SkipZigTest;
-
- _ = async testGroup(std.heap.page_allocator);
-}
-fn testGroup(allocator: Allocator) callconv(.Async) void {
- var count: usize = 0;
- var group = Group(void).init(allocator);
- var sleep_a_little_frame = async sleepALittle(&count);
- group.add(&sleep_a_little_frame) catch @panic("memory");
- var increase_by_ten_frame = async increaseByTen(&count);
- group.add(&increase_by_ten_frame) catch @panic("memory");
- group.wait();
- try testing.expect(count == 11);
-
- var another = Group(anyerror!void).init(allocator);
- var something_else_frame = async somethingElse();
- another.add(&something_else_frame) catch @panic("memory");
- var something_that_fails_frame = async doSomethingThatFails();
- another.add(&something_that_fails_frame) catch @panic("memory");
- try testing.expectError(error.ItBroke, another.wait());
-}
-fn sleepALittle(count: *usize) callconv(.Async) void {
- std.time.sleep(1 * std.time.ns_per_ms);
- _ = @atomicRmw(usize, count, .Add, 1, .SeqCst);
-}
-fn increaseByTen(count: *usize) callconv(.Async) void {
- var i: usize = 0;
- while (i < 10) : (i += 1) {
- _ = @atomicRmw(usize, count, .Add, 1, .SeqCst);
- }
-}
-fn doSomethingThatFails() callconv(.Async) anyerror!void {}
-fn somethingElse() callconv(.Async) anyerror!void {
- return error.ItBroke;
-}
diff --git a/lib/std/event/lock.zig b/lib/std/event/lock.zig
deleted file mode 100644
index 8608298c29..0000000000
--- a/lib/std/event/lock.zig
+++ /dev/null
@@ -1,162 +0,0 @@
-const std = @import("../std.zig");
-const builtin = @import("builtin");
-const assert = std.debug.assert;
-const testing = std.testing;
-const mem = std.mem;
-const Loop = std.event.Loop;
-
-/// Thread-safe async/await lock.
-/// Functions which are waiting for the lock are suspended, and
-/// are resumed when the lock is released, in order.
-/// Allows only one actor to hold the lock.
-/// TODO: make this API also work in blocking I/O mode.
-pub const Lock = struct {
- mutex: std.Thread.Mutex = std.Thread.Mutex{},
- head: usize = UNLOCKED,
-
- const UNLOCKED = 0;
- const LOCKED = 1;
-
- const global_event_loop = Loop.instance orelse
- @compileError("std.event.Lock currently only works with event-based I/O");
-
- const Waiter = struct {
- // forced Waiter alignment to ensure it doesn't clash with LOCKED
- next: ?*Waiter align(2),
- tail: *Waiter,
- node: Loop.NextTickNode,
- };
-
- pub fn initLocked() Lock {
- return Lock{ .head = LOCKED };
- }
-
- pub fn acquire(self: *Lock) Held {
- self.mutex.lock();
-
- // self.head transitions from multiple stages depending on the value:
- // UNLOCKED -> LOCKED:
- // acquire Lock ownership when there are no waiters
- // LOCKED -> <Waiter head ptr>:
- // Lock is already owned, enqueue first Waiter
- // <head ptr> -> <head ptr>:
- // Lock is owned with pending waiters. Push our waiter to the queue.
-
- if (self.head == UNLOCKED) {
- self.head = LOCKED;
- self.mutex.unlock();
- return Held{ .lock = self };
- }
-
- var waiter: Waiter = undefined;
- waiter.next = null;
- waiter.tail = &waiter;
-
- const head = switch (self.head) {
- UNLOCKED => unreachable,
- LOCKED => null,
- else => @as(*Waiter, @ptrFromInt(self.head)),
- };
-
- if (head) |h| {
- h.tail.next = &waiter;
- h.tail = &waiter;
- } else {
- self.head = @intFromPtr(&waiter);
- }
-
- suspend {
- waiter.node = Loop.NextTickNode{
- .prev = undefined,
- .next = undefined,
- .data = @frame(),
- };
- self.mutex.unlock();
- }
-
- return Held{ .lock = self };
- }
-
- pub const Held = struct {
- lock: *Lock,
-
- pub fn release(self: Held) void {
- const waiter = blk: {
- self.lock.mutex.lock();
- defer self.lock.mutex.unlock();
-
- // self.head goes through the reverse transition from acquire():
- // <head ptr> -> <new head ptr>:
- // pop a waiter from the queue to give Lock ownership when there are still others pending
- // <head ptr> -> LOCKED:
- // pop the laster waiter from the queue, while also giving it lock ownership when awaken
- // LOCKED -> UNLOCKED:
- // last lock owner releases lock while no one else is waiting for it
-
- switch (self.lock.head) {
- UNLOCKED => {
- unreachable; // Lock unlocked while unlocking
- },
- LOCKED => {
- self.lock.head = UNLOCKED;
- break :blk null;
- },
- else => {
- const waiter = @as(*Waiter, @ptrFromInt(self.lock.head));
- self.lock.head = if (waiter.next == null) LOCKED else @intFromPtr(waiter.next);
- if (waiter.next) |next|
- next.tail = waiter.tail;
- break :blk waiter;
- },
- }
- };
-
- if (waiter) |w| {
- global_event_loop.onNextTick(&w.node);
- }
- }
- };
-};
-
-test "std.event.Lock" {
- if (!std.io.is_async) return error.SkipZigTest;
-
- // TODO https://github.com/ziglang/zig/issues/1908
- if (builtin.single_threaded) return error.SkipZigTest;
-
- // TODO https://github.com/ziglang/zig/issues/3251
- if (builtin.os.tag == .freebsd) return error.SkipZigTest;
-
- var lock = Lock{};
- testLock(&lock);
-
- const expected_result = [1]i32{3 * @as(i32, @intCast(shared_test_data.len))} ** shared_test_data.len;
- try testing.expectEqualSlices(i32, &expected_result, &shared_test_data);
-}
-fn testLock(lock: *Lock) void {
- var handle1 = async lockRunner(lock);
- var handle2 = async lockRunner(lock);
- var handle3 = async lockRunner(lock);
-
- await handle1;
- await handle2;
- await handle3;
-}
-
-var shared_test_data = [1]i32{0} ** 10;
-var shared_test_index: usize = 0;
-
-fn lockRunner(lock: *Lock) void {
- Lock.global_event_loop.yield();
-
- var i: usize = 0;
- while (i < shared_test_data.len) : (i += 1) {
- const handle = lock.acquire();
- defer handle.release();
-
- shared_test_index = 0;
- while (shared_test_index < shared_test_data.len) : (shared_test_index += 1) {
- shared_test_data[shared_test_index] = shared_test_data[shared_test_index] + 1;
- }
- }
-}
diff --git a/lib/std/event/locked.zig b/lib/std/event/locked.zig
deleted file mode 100644
index 66495c3772..0000000000
--- a/lib/std/event/locked.zig
+++ /dev/null
@@ -1,42 +0,0 @@
-const std = @import("../std.zig");
-const Lock = std.event.Lock;
-
-/// Thread-safe async/await lock that protects one piece of data.
-/// Functions which are waiting for the lock are suspended, and
-/// are resumed when the lock is released, in order.
-pub fn Locked(comptime T: type) type {
- return struct {
- lock: Lock,
- private_data: T,
-
- const Self = @This();
-
- pub const HeldLock = struct {
- value: *T,
- held: Lock.Held,
-
- pub fn release(self: HeldLock) void {
- self.held.release();
- }
- };
-
- pub fn init(data: T) Self {
- return Self{
- .lock = .{},
- .private_data = data,
- };
- }
-
- pub fn deinit(self: *Self) void {
- self.lock.deinit();
- }
-
- pub fn acquire(self: *Self) callconv(.Async) HeldLock {
- return HeldLock{
- // TODO guaranteed allocation elision
- .held = self.lock.acquire(),
- .value = &self.private_data,
- };
- }
- };
-}
diff --git a/lib/std/event/loop.zig b/lib/std/event/loop.zig
deleted file mode 100644
index d7b75a6672..0000000000
--- a/lib/std/event/loop.zig
+++ /dev/null
@@ -1,1791 +0,0 @@
-const std = @import("../std.zig");
-const builtin = @import("builtin");
-const assert = std.debug.assert;
-const testing = std.testing;
-const mem = std.mem;
-const os = std.os;
-const windows = os.windows;
-const maxInt = std.math.maxInt;
-const Thread = std.Thread;
-
-const is_windows = builtin.os.tag == .windows;
-
-pub const Loop = struct {
- next_tick_queue: std.atomic.Queue(anyframe),
- os_data: OsData,
- final_resume_node: ResumeNode,
- pending_event_count: usize,
- extra_threads: []Thread,
- /// TODO change this to a pool of configurable number of threads
- /// and rename it to be not file-system-specific. it will become
- /// a thread pool for turning non-CPU-bound blocking things into
- /// async things. A fallback for any missing OS-specific API.
- fs_thread: Thread,
- fs_queue: std.atomic.Queue(Request),
- fs_end_request: Request.Node,
- fs_thread_wakeup: std.Thread.ResetEvent,
-
- /// For resources that have the same lifetime as the `Loop`.
- /// This is only used by `Loop` for the thread pool and associated resources.
- arena: std.heap.ArenaAllocator,
-
- /// State which manages frames that are sleeping on timers
- delay_queue: DelayQueue,
-
- /// Pre-allocated eventfds. All permanently active.
- /// This is how `Loop` sends promises to be resumed on other threads.
- available_eventfd_resume_nodes: std.atomic.Stack(ResumeNode.EventFd),
- eventfd_resume_nodes: []std.atomic.Stack(ResumeNode.EventFd).Node,
-
- pub const NextTickNode = std.atomic.Queue(anyframe).Node;
-
- pub const ResumeNode = struct {
- id: Id,
- handle: anyframe,
- overlapped: Overlapped,
-
- pub const overlapped_init = switch (builtin.os.tag) {
- .windows => windows.OVERLAPPED{
- .Internal = 0,
- .InternalHigh = 0,
- .DUMMYUNIONNAME = .{
- .DUMMYSTRUCTNAME = .{
- .Offset = 0,
- .OffsetHigh = 0,
- },
- },
- .hEvent = null,
- },
- else => {},
- };
- pub const Overlapped = @TypeOf(overlapped_init);
-
- pub const Id = enum {
- basic,
- stop,
- event_fd,
- };
-
- pub const EventFd = switch (builtin.os.tag) {
- .macos, .ios, .tvos, .watchos, .freebsd, .netbsd, .dragonfly, .openbsd => KEventFd,
- .linux => struct {
- base: ResumeNode,
- epoll_op: u32,
- eventfd: i32,
- },
- .windows => struct {
- base: ResumeNode,
- completion_key: usize,
- },
- else => struct {},
- };
-
- const KEventFd = struct {
- base: ResumeNode,
- kevent: os.Kevent,
- };
-
- pub const Basic = switch (builtin.os.tag) {
- .macos, .ios, .tvos, .watchos, .freebsd, .netbsd, .dragonfly, .openbsd => KEventBasic,
- .linux => struct {
- base: ResumeNode,
- },
- .windows => struct {
- base: ResumeNode,
- },
- else => @compileError("unsupported OS"),
- };
-
- const KEventBasic = struct {
- base: ResumeNode,
- kev: os.Kevent,
- };
- };
-
- pub const Instance = switch (std.options.io_mode) {
- .blocking => @TypeOf(null),
- .evented => ?*Loop,
- };
- pub const instance = std.options.event_loop;
-
- var global_instance_state: Loop = undefined;
- pub const default_instance = switch (std.options.io_mode) {
- .blocking => null,
- .evented => &global_instance_state,
- };
-
- pub const Mode = enum {
- single_threaded,
- multi_threaded,
- };
- pub const default_mode = .multi_threaded;
-
- /// TODO copy elision / named return values so that the threads referencing *Loop
- /// have the correct pointer value.
- /// https://github.com/ziglang/zig/issues/2761 and https://github.com/ziglang/zig/issues/2765
- pub fn init(self: *Loop) !void {
- if (builtin.single_threaded or std.options.event_loop_mode == .single_threaded) {
- return self.initSingleThreaded();
- } else {
- return self.initMultiThreaded();
- }
- }
-
- /// After initialization, call run().
- /// TODO copy elision / named return values so that the threads referencing *Loop
- /// have the correct pointer value.
- /// https://github.com/ziglang/zig/issues/2761 and https://github.com/ziglang/zig/issues/2765
- pub fn initSingleThreaded(self: *Loop) !void {
- return self.initThreadPool(1);
- }
-
- /// After initialization, call run().
- /// This is the same as `initThreadPool` using `Thread.getCpuCount` to determine the thread
- /// pool size.
- /// TODO copy elision / named return values so that the threads referencing *Loop
- /// have the correct pointer value.
- /// https://github.com/ziglang/zig/issues/2761 and https://github.com/ziglang/zig/issues/2765
- pub fn initMultiThreaded(self: *Loop) !void {
- if (builtin.single_threaded)
- @compileError("initMultiThreaded unavailable when building in single-threaded mode");
- const core_count = try Thread.getCpuCount();
- return self.initThreadPool(core_count);
- }
-
- /// Thread count is the total thread count. The thread pool size will be
- /// max(thread_count - 1, 0)
- pub fn initThreadPool(self: *Loop, thread_count: usize) !void {
- self.* = Loop{
- .arena = std.heap.ArenaAllocator.init(std.heap.page_allocator),
- .pending_event_count = 1,
- .os_data = undefined,
- .next_tick_queue = std.atomic.Queue(anyframe).init(),
- .extra_threads = undefined,
- .available_eventfd_resume_nodes = std.atomic.Stack(ResumeNode.EventFd).init(),
- .eventfd_resume_nodes = undefined,
- .final_resume_node = ResumeNode{
- .id = .stop,
- .handle = undefined,
- .overlapped = ResumeNode.overlapped_init,
- },
- .fs_end_request = .{ .data = .{ .msg = .end, .finish = .no_action } },
- .fs_queue = std.atomic.Queue(Request).init(),
- .fs_thread = undefined,
- .fs_thread_wakeup = .{},
- .delay_queue = undefined,
- };
- errdefer self.arena.deinit();
-
- // We need at least one of these in case the fs thread wants to use onNextTick
- const extra_thread_count = thread_count - 1;
- const resume_node_count = @max(extra_thread_count, 1);
- self.eventfd_resume_nodes = try self.arena.allocator().alloc(
- std.atomic.Stack(ResumeNode.EventFd).Node,
- resume_node_count,
- );
-
- self.extra_threads = try self.arena.allocator().alloc(Thread, extra_thread_count);
-
- try self.initOsData(extra_thread_count);
- errdefer self.deinitOsData();
-
- if (!builtin.single_threaded) {
- self.fs_thread = try Thread.spawn(.{}, posixFsRun, .{self});
- }
- errdefer if (!builtin.single_threaded) {
- self.posixFsRequest(&self.fs_end_request);
- self.fs_thread.join();
- };
-
- if (!builtin.single_threaded)
- try self.delay_queue.init();
- }
-
- pub fn deinit(self: *Loop) void {
- self.deinitOsData();
- self.arena.deinit();
- self.* = undefined;
- }
-
- const InitOsDataError = os.EpollCreateError || mem.Allocator.Error || os.EventFdError ||
- Thread.SpawnError || os.EpollCtlError || os.KEventError ||
- windows.CreateIoCompletionPortError;
-
- const wakeup_bytes = [_]u8{0x1} ** 8;
-
- fn initOsData(self: *Loop, extra_thread_count: usize) InitOsDataError!void {
- nosuspend switch (builtin.os.tag) {
- .linux => {
- errdefer {
- while (self.available_eventfd_resume_nodes.pop()) |node| os.close(node.data.eventfd);
- }
- for (self.eventfd_resume_nodes) |*eventfd_node| {
- eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
- .data = ResumeNode.EventFd{
- .base = ResumeNode{
- .id = .event_fd,
- .handle = undefined,
- .overlapped = ResumeNode.overlapped_init,
- },
- .eventfd = try os.eventfd(1, os.linux.EFD.CLOEXEC | os.linux.EFD.NONBLOCK),
- .epoll_op = os.linux.EPOLL.CTL_ADD,
- },
- .next = undefined,
- };
- self.available_eventfd_resume_nodes.push(eventfd_node);
- }
-
- self.os_data.epollfd = try os.epoll_create1(os.linux.EPOLL.CLOEXEC);
- errdefer os.close(self.os_data.epollfd);
-
- self.os_data.final_eventfd = try os.eventfd(0, os.linux.EFD.CLOEXEC | os.linux.EFD.NONBLOCK);
- errdefer os.close(self.os_data.final_eventfd);
-
- self.os_data.final_eventfd_event = os.linux.epoll_event{
- .events = os.linux.EPOLL.IN,
- .data = os.linux.epoll_data{ .ptr = @intFromPtr(&self.final_resume_node) },
- };
- try os.epoll_ctl(
- self.os_data.epollfd,
- os.linux.EPOLL.CTL_ADD,
- self.os_data.final_eventfd,
- &self.os_data.final_eventfd_event,
- );
-
- if (builtin.single_threaded) {
- assert(extra_thread_count == 0);
- return;
- }
-
- var extra_thread_index: usize = 0;
- errdefer {
- // writing 8 bytes to an eventfd cannot fail
- const amt = os.write(self.os_data.final_eventfd, &wakeup_bytes) catch unreachable;
- assert(amt == wakeup_bytes.len);
- while (extra_thread_index != 0) {
- extra_thread_index -= 1;
- self.extra_threads[extra_thread_index].join();
- }
- }
- while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
- self.extra_threads[extra_thread_index] = try Thread.spawn(.{}, workerRun, .{self});
- }
- },
- .macos, .ios, .tvos, .watchos, .freebsd, .netbsd, .dragonfly => {
- self.os_data.kqfd = try os.kqueue();
- errdefer os.close(self.os_data.kqfd);
-
- const empty_kevs = &[0]os.Kevent{};
-
- for (self.eventfd_resume_nodes, 0..) |*eventfd_node, i| {
- eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
- .data = ResumeNode.EventFd{
- .base = ResumeNode{
- .id = .event_fd,
- .handle = undefined,
- .overlapped = ResumeNode.overlapped_init,
- },
- // this one is for sending events
- .kevent = os.Kevent{
- .ident = i,
- .filter = os.system.EVFILT_USER,
- .flags = os.system.EV_CLEAR | os.system.EV_ADD | os.system.EV_DISABLE,
- .fflags = 0,
- .data = 0,
- .udata = @intFromPtr(&eventfd_node.data.base),
- },
- },
- .next = undefined,
- };
- self.available_eventfd_resume_nodes.push(eventfd_node);
- const kevent_array = @as(*const [1]os.Kevent, &eventfd_node.data.kevent);
- _ = try os.kevent(self.os_data.kqfd, kevent_array, empty_kevs, null);
- eventfd_node.data.kevent.flags = os.system.EV_CLEAR | os.system.EV_ENABLE;
- eventfd_node.data.kevent.fflags = os.system.NOTE_TRIGGER;
- }
-
- // Pre-add so that we cannot get error.SystemResources
- // later when we try to activate it.
- self.os_data.final_kevent = os.Kevent{
- .ident = extra_thread_count,
- .filter = os.system.EVFILT_USER,
- .flags = os.system.EV_ADD | os.system.EV_DISABLE,
- .fflags = 0,
- .data = 0,
- .udata = @intFromPtr(&self.final_resume_node),
- };
- const final_kev_arr = @as(*const [1]os.Kevent, &self.os_data.final_kevent);
- _ = try os.kevent(self.os_data.kqfd, final_kev_arr, empty_kevs, null);
- self.os_data.final_kevent.flags = os.system.EV_ENABLE;
- self.os_data.final_kevent.fflags = os.system.NOTE_TRIGGER;
-
- if (builtin.single_threaded) {
- assert(extra_thread_count == 0);
- return;
- }
-
- var extra_thread_index: usize = 0;
- errdefer {
- _ = os.kevent(self.os_data.kqfd, final_kev_arr, empty_kevs, null) catch unreachable;
- while (extra_thread_index != 0) {
- extra_thread_index -= 1;
- self.extra_threads[extra_thread_index].join();
- }
- }
- while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
- self.extra_threads[extra_thread_index] = try Thread.spawn(.{}, workerRun, .{self});
- }
- },
- .openbsd => {
- self.os_data.kqfd = try os.kqueue();
- errdefer os.close(self.os_data.kqfd);
-
- const empty_kevs = &[0]os.Kevent{};
-
- for (self.eventfd_resume_nodes, 0..) |*eventfd_node, i| {
- eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
- .data = ResumeNode.EventFd{
- .base = ResumeNode{
- .id = .event_fd,
- .handle = undefined,
- .overlapped = ResumeNode.overlapped_init,
- },
- // this one is for sending events
- .kevent = os.Kevent{
- .ident = i,
- .filter = os.system.EVFILT_TIMER,
- .flags = os.system.EV_CLEAR | os.system.EV_ADD | os.system.EV_DISABLE | os.system.EV_ONESHOT,
- .fflags = 0,
- .data = 0,
- .udata = @intFromPtr(&eventfd_node.data.base),
- },
- },
- .next = undefined,
- };
- self.available_eventfd_resume_nodes.push(eventfd_node);
- const kevent_array = @as(*const [1]os.Kevent, &eventfd_node.data.kevent);
- _ = try os.kevent(self.os_data.kqfd, kevent_array, empty_kevs, null);
- eventfd_node.data.kevent.flags = os.system.EV_CLEAR | os.system.EV_ENABLE;
- }
-
- // Pre-add so that we cannot get error.SystemResources
- // later when we try to activate it.
- self.os_data.final_kevent = os.Kevent{
- .ident = extra_thread_count,
- .filter = os.system.EVFILT_TIMER,
- .flags = os.system.EV_ADD | os.system.EV_ONESHOT | os.system.EV_DISABLE,
- .fflags = 0,
- .data = 0,
- .udata = @intFromPtr(&self.final_resume_node),
- };
- const final_kev_arr = @as(*const [1]os.Kevent, &self.os_data.final_kevent);
- _ = try os.kevent(self.os_data.kqfd, final_kev_arr, empty_kevs, null);
- self.os_data.final_kevent.flags = os.system.EV_ENABLE;
-
- if (builtin.single_threaded) {
- assert(extra_thread_count == 0);
- return;
- }
-
- var extra_thread_index: usize = 0;
- errdefer {
- _ = os.kevent(self.os_data.kqfd, final_kev_arr, empty_kevs, null) catch unreachable;
- while (extra_thread_index != 0) {
- extra_thread_index -= 1;
- self.extra_threads[extra_thread_index].join();
- }
- }
- while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
- self.extra_threads[extra_thread_index] = try Thread.spawn(.{}, workerRun, .{self});
- }
- },
- .windows => {
- self.os_data.io_port = try windows.CreateIoCompletionPort(
- windows.INVALID_HANDLE_VALUE,
- null,
- undefined,
- maxInt(windows.DWORD),
- );
- errdefer windows.CloseHandle(self.os_data.io_port);
-
- for (self.eventfd_resume_nodes) |*eventfd_node| {
- eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
- .data = ResumeNode.EventFd{
- .base = ResumeNode{
- .id = .event_fd,
- .handle = undefined,
- .overlapped = ResumeNode.overlapped_init,
- },
- // this one is for sending events
- .completion_key = @intFromPtr(&eventfd_node.data.base),
- },
- .next = undefined,
- };
- self.available_eventfd_resume_nodes.push(eventfd_node);
- }
-
- if (builtin.single_threaded) {
- assert(extra_thread_count == 0);
- return;
- }
-
- var extra_thread_index: usize = 0;
- errdefer {
- var i: usize = 0;
- while (i < extra_thread_index) : (i += 1) {
- while (true) {
- const overlapped = &self.final_resume_node.overlapped;
- windows.PostQueuedCompletionStatus(self.os_data.io_port, undefined, undefined, overlapped) catch continue;
- break;
- }
- }
- while (extra_thread_index != 0) {
- extra_thread_index -= 1;
- self.extra_threads[extra_thread_index].join();
- }
- }
- while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
- self.extra_threads[extra_thread_index] = try Thread.spawn(.{}, workerRun, .{self});
- }
- },
- else => {},
- };
- }
-
- fn deinitOsData(self: *Loop) void {
- nosuspend switch (builtin.os.tag) {
- .linux => {
- os.close(self.os_data.final_eventfd);
- while (self.available_eventfd_resume_nodes.pop()) |node| os.close(node.data.eventfd);
- os.close(self.os_data.epollfd);
- },
- .macos, .ios, .tvos, .watchos, .freebsd, .netbsd, .dragonfly, .openbsd => {
- os.close(self.os_data.kqfd);
- },
- .windows => {
- windows.CloseHandle(self.os_data.io_port);
- },
- else => {},
- };
- }
-
- /// resume_node must live longer than the anyframe that it holds a reference to.
- /// flags must contain EPOLLET
- pub fn linuxAddFd(self: *Loop, fd: i32, resume_node: *ResumeNode, flags: u32) !void {
- assert(flags & os.linux.EPOLL.ET == os.linux.EPOLL.ET);
- self.beginOneEvent();
- errdefer self.finishOneEvent();
- try self.linuxModFd(
- fd,
- os.linux.EPOLL.CTL_ADD,
- flags,
- resume_node,
- );
- }
-
- pub fn linuxModFd(self: *Loop, fd: i32, op: u32, flags: u32, resume_node: *ResumeNode) !void {
- assert(flags & os.linux.EPOLL.ET == os.linux.EPOLL.ET);
- var ev = os.linux.epoll_event{
- .events = flags,
- .data = os.linux.epoll_data{ .ptr = @intFromPtr(resume_node) },
- };
- try os.epoll_ctl(self.os_data.epollfd, op, fd, &ev);
- }
-
- pub fn linuxRemoveFd(self: *Loop, fd: i32) void {
- os.epoll_ctl(self.os_data.epollfd, os.linux.EPOLL.CTL_DEL, fd, null) catch {};
- self.finishOneEvent();
- }
-
- pub fn linuxWaitFd(self: *Loop, fd: i32, flags: u32) void {
- assert(flags & os.linux.EPOLL.ET == os.linux.EPOLL.ET);
- assert(flags & os.linux.EPOLL.ONESHOT == os.linux.EPOLL.ONESHOT);
- var resume_node = ResumeNode.Basic{
- .base = ResumeNode{
- .id = .basic,
- .handle = @frame(),
- .overlapped = ResumeNode.overlapped_init,
- },
- };
- var need_to_delete = true;
- defer if (need_to_delete) self.linuxRemoveFd(fd);
-
- suspend {
- self.linuxAddFd(fd, &resume_node.base, flags) catch |err| switch (err) {
- error.FileDescriptorNotRegistered => unreachable,
- error.OperationCausesCircularLoop => unreachable,
- error.FileDescriptorIncompatibleWithEpoll => unreachable,
- error.FileDescriptorAlreadyPresentInSet => unreachable, // evented writes to the same fd is not thread-safe
-
- error.SystemResources,
- error.UserResourceLimitReached,
- error.Unexpected,
- => {
- need_to_delete = false;
- // Fall back to a blocking poll(). Ideally this codepath is never hit, since
- // epoll should be just fine. But this is better than incorrect behavior.
- var poll_flags: i16 = 0;
- if ((flags & os.linux.EPOLL.IN) != 0) poll_flags |= os.POLL.IN;
- if ((flags & os.linux.EPOLL.OUT) != 0) poll_flags |= os.POLL.OUT;
- var pfd = [1]os.pollfd{os.pollfd{
- .fd = fd,
- .events = poll_flags,
- .revents = undefined,
- }};
- _ = os.poll(&pfd, -1) catch |poll_err| switch (poll_err) {
- error.NetworkSubsystemFailed => unreachable, // only possible on windows
-
- error.SystemResources,
- error.Unexpected,
- => {
- // Even poll() didn't work. The best we can do now is sleep for a
- // small duration and then hope that something changed.
- std.time.sleep(1 * std.time.ns_per_ms);
- },
- };
- resume @frame();
- },
- };
- }
- }
-
- pub fn waitUntilFdReadable(self: *Loop, fd: os.fd_t) void {
- switch (builtin.os.tag) {
- .linux => {
- self.linuxWaitFd(fd, os.linux.EPOLL.ET | os.linux.EPOLL.ONESHOT | os.linux.EPOLL.IN);
- },
- .macos, .ios, .tvos, .watchos, .freebsd, .netbsd, .dragonfly, .openbsd => {
- self.bsdWaitKev(@as(usize, @intCast(fd)), os.system.EVFILT_READ, os.system.EV_ONESHOT);
- },
- else => @compileError("Unsupported OS"),
- }
- }
-
- pub fn waitUntilFdWritable(self: *Loop, fd: os.fd_t) void {
- switch (builtin.os.tag) {
- .linux => {
- self.linuxWaitFd(fd, os.linux.EPOLL.ET | os.linux.EPOLL.ONESHOT | os.linux.EPOLL.OUT);
- },
- .macos, .ios, .tvos, .watchos, .freebsd, .netbsd, .dragonfly, .openbsd => {
- self.bsdWaitKev(@as(usize, @intCast(fd)), os.system.EVFILT_WRITE, os.system.EV_ONESHOT);
- },
- else => @compileError("Unsupported OS"),
- }
- }
-
- pub fn waitUntilFdWritableOrReadable(self: *Loop, fd: os.fd_t) void {
- switch (builtin.os.tag) {
- .linux => {
- self.linuxWaitFd(fd, os.linux.EPOLL.ET | os.linux.EPOLL.ONESHOT | os.linux.EPOLL.OUT | os.linux.EPOLL.IN);
- },
- .macos, .ios, .tvos, .watchos, .freebsd, .netbsd, .dragonfly, .openbsd => {
- self.bsdWaitKev(@as(usize, @intCast(fd)), os.system.EVFILT_READ, os.system.EV_ONESHOT);
- self.bsdWaitKev(@as(usize, @intCast(fd)), os.system.EVFILT_WRITE, os.system.EV_ONESHOT);
- },
- else => @compileError("Unsupported OS"),
- }
- }
-
- pub fn bsdWaitKev(self: *Loop, ident: usize, filter: i16, flags: u16) void {
- var resume_node = ResumeNode.Basic{
- .base = ResumeNode{
- .id = .basic,
- .handle = @frame(),
- .overlapped = ResumeNode.overlapped_init,
- },
- .kev = undefined,
- };
-
- defer {
- // If the kevent was set to be ONESHOT, it doesn't need to be deleted manually.
- if (flags & os.system.EV_ONESHOT != 0) {
- self.bsdRemoveKev(ident, filter);
- }
- }
-
- suspend {
- self.bsdAddKev(&resume_node, ident, filter, flags) catch unreachable;
- }
- }
-
- /// resume_node must live longer than the anyframe that it holds a reference to.
- pub fn bsdAddKev(self: *Loop, resume_node: *ResumeNode.Basic, ident: usize, filter: i16, flags: u16) !void {
- self.beginOneEvent();
- errdefer self.finishOneEvent();
- var kev = [1]os.Kevent{os.Kevent{
- .ident = ident,
- .filter = filter,
- .flags = os.system.EV_ADD | os.system.EV_ENABLE | os.system.EV_CLEAR | flags,
- .fflags = 0,
- .data = 0,
- .udata = @intFromPtr(&resume_node.base),
- }};
- const empty_kevs = &[0]os.Kevent{};
- _ = try os.kevent(self.os_data.kqfd, &kev, empty_kevs, null);
- }
-
- pub fn bsdRemoveKev(self: *Loop, ident: usize, filter: i16) void {
- var kev = [1]os.Kevent{os.Kevent{
- .ident = ident,
- .filter = filter,
- .flags = os.system.EV_DELETE,
- .fflags = 0,
- .data = 0,
- .udata = 0,
- }};
- const empty_kevs = &[0]os.Kevent{};
- _ = os.kevent(self.os_data.kqfd, &kev, empty_kevs, null) catch undefined;
- self.finishOneEvent();
- }
-
- fn dispatch(self: *Loop) void {
- while (self.available_eventfd_resume_nodes.pop()) |resume_stack_node| {
- const next_tick_node = self.next_tick_queue.get() orelse {
- self.available_eventfd_resume_nodes.push(resume_stack_node);
- return;
- };
- const eventfd_node = &resume_stack_node.data;
- eventfd_node.base.handle = next_tick_node.data;
- switch (builtin.os.tag) {
- .macos, .ios, .tvos, .watchos, .freebsd, .netbsd, .dragonfly, .openbsd => {
- const kevent_array = @as(*const [1]os.Kevent, &eventfd_node.kevent);
- const empty_kevs = &[0]os.Kevent{};
- _ = os.kevent(self.os_data.kqfd, kevent_array, empty_kevs, null) catch {
- self.next_tick_queue.unget(next_tick_node);
- self.available_eventfd_resume_nodes.push(resume_stack_node);
- return;
- };
- },
- .linux => {
- // the pending count is already accounted for
- const epoll_events = os.linux.EPOLL.ONESHOT | os.linux.EPOLL.IN | os.linux.EPOLL.OUT |
- os.linux.EPOLL.ET;
- self.linuxModFd(
- eventfd_node.eventfd,
- eventfd_node.epoll_op,
- epoll_events,
- &eventfd_node.base,
- ) catch {
- self.next_tick_queue.unget(next_tick_node);
- self.available_eventfd_resume_nodes.push(resume_stack_node);
- return;
- };
- },
- .windows => {
- windows.PostQueuedCompletionStatus(
- self.os_data.io_port,
- undefined,
- undefined,
- &eventfd_node.base.overlapped,
- ) catch {
- self.next_tick_queue.unget(next_tick_node);
- self.available_eventfd_resume_nodes.push(resume_stack_node);
- return;
- };
- },
- else => @compileError("unsupported OS"),
- }
- }
- }
-
- /// Bring your own linked list node. This means it can't fail.
- pub fn onNextTick(self: *Loop, node: *NextTickNode) void {
- self.beginOneEvent(); // finished in dispatch()
- self.next_tick_queue.put(node);
- self.dispatch();
- }
-
- pub fn cancelOnNextTick(self: *Loop, node: *NextTickNode) void {
- if (self.next_tick_queue.remove(node)) {
- self.finishOneEvent();
- }
- }
-
- pub fn run(self: *Loop) void {
- self.finishOneEvent(); // the reference we start with
-
- self.workerRun();
-
- if (!builtin.single_threaded) {
- switch (builtin.os.tag) {
- .linux,
- .macos,
- .ios,
- .tvos,
- .watchos,
- .freebsd,
- .netbsd,
- .dragonfly,
- .openbsd,
- => self.fs_thread.join(),
- else => {},
- }
- }
-
- for (self.extra_threads) |extra_thread| {
- extra_thread.join();
- }
-
- self.delay_queue.deinit();
- }
-
- /// Runs the provided function asynchronously. The function's frame is allocated
- /// with `allocator` and freed when the function returns.
- /// `func` must return void and it can be an async function.
- /// Yields to the event loop, running the function on the next tick.
- pub fn runDetached(self: *Loop, alloc: mem.Allocator, comptime func: anytype, args: anytype) error{OutOfMemory}!void {
- if (!std.io.is_async) @compileError("Can't use runDetached in non-async mode!");
- if (@TypeOf(@call(.{}, func, args)) != void) {
- @compileError("`func` must not have a return value");
- }
-
- const Wrapper = struct {
- const Args = @TypeOf(args);
- fn run(func_args: Args, loop: *Loop, allocator: mem.Allocator) void {
- loop.beginOneEvent();
- loop.yield();
- @call(.{}, func, func_args); // compile error when called with non-void ret type
- suspend {
- loop.finishOneEvent();
- allocator.destroy(@frame());
- }
- }
- };
-
- const run_frame = try alloc.create(@Frame(Wrapper.run));
- run_frame.* = async Wrapper.run(args, self, alloc);
- }
-
- /// Yielding lets the event loop run, starting any unstarted async operations.
- /// Note that async operations automatically start when a function yields for any other reason,
- /// for example, when async I/O is performed. This function is intended to be used only when
- /// CPU bound tasks would be waiting in the event loop but never get started because no async I/O
- /// is performed.
- pub fn yield(self: *Loop) void {
- suspend {
- var my_tick_node = NextTickNode{
- .prev = undefined,
- .next = undefined,
- .data = @frame(),
- };
- self.onNextTick(&my_tick_node);
- }
- }
-
- /// If the build is multi-threaded and there is an event loop, then it calls `yield`. Otherwise,
- /// does nothing.
- pub fn startCpuBoundOperation() void {
- if (builtin.single_threaded) {
- return;
- } else if (instance) |event_loop| {
- event_loop.yield();
- }
- }
-
- /// call finishOneEvent when done
- pub fn beginOneEvent(self: *Loop) void {
- _ = @atomicRmw(usize, &self.pending_event_count, .Add, 1, .SeqCst);
- }
-
- pub fn finishOneEvent(self: *Loop) void {
- nosuspend {
- const prev = @atomicRmw(usize, &self.pending_event_count, .Sub, 1, .SeqCst);
- if (prev != 1) return;
-
- // cause all the threads to stop
- self.posixFsRequest(&self.fs_end_request);
-
- switch (builtin.os.tag) {
- .linux => {
- // writing to the eventfd will only wake up one thread, thus multiple writes
- // are needed to wakeup all the threads
- var i: usize = 0;
- while (i < self.extra_threads.len + 1) : (i += 1) {
- // writing 8 bytes to an eventfd cannot fail
- const amt = os.write(self.os_data.final_eventfd, &wakeup_bytes) catch unreachable;
- assert(amt == wakeup_bytes.len);
- }
- return;
- },
- .macos, .ios, .tvos, .watchos, .freebsd, .netbsd, .dragonfly, .openbsd => {
- const final_kevent = @as(*const [1]os.Kevent, &self.os_data.final_kevent);
- const empty_kevs = &[0]os.Kevent{};
- // cannot fail because we already added it and this just enables it
- _ = os.kevent(self.os_data.kqfd, final_kevent, empty_kevs, null) catch unreachable;
- return;
- },
- .windows => {
- var i: usize = 0;
- while (i < self.extra_threads.len + 1) : (i += 1) {
- while (true) {
- const overlapped = &self.final_resume_node.overlapped;
- windows.PostQueuedCompletionStatus(self.os_data.io_port, undefined, undefined, overlapped) catch continue;
- break;
- }
- }
- return;
- },
- else => @compileError("unsupported OS"),
- }
- }
- }
-
- pub fn sleep(self: *Loop, nanoseconds: u64) void {
- if (builtin.single_threaded)
- @compileError("TODO: integrate timers with epoll/kevent/iocp for single-threaded");
-
- suspend {
- const now = self.delay_queue.timer.read();
-
- var entry: DelayQueue.Waiters.Entry = undefined;
- entry.init(@frame(), now + nanoseconds);
- self.delay_queue.waiters.insert(&entry);
-
- // Speculatively wake up the timer thread when we add a new entry.
- // If the timer thread is sleeping on a longer entry, we need to
- // interrupt it so that our entry can be expired in time.
- self.delay_queue.event.set();
- }
- }
-
- const DelayQueue = struct {
- timer: std.time.Timer,
- waiters: Waiters,
- thread: std.Thread,
- event: std.Thread.ResetEvent,
- is_running: std.atomic.Value(bool),
-
- /// Initialize the delay queue by spawning the timer thread
- /// and starting any timer resources.
- fn init(self: *DelayQueue) !void {
- self.* = DelayQueue{
- .timer = try std.time.Timer.start(),
- .waiters = DelayQueue.Waiters{
- .entries = std.atomic.Queue(anyframe).init(),
- },
- .thread = undefined,
- .event = .{},
- .is_running = std.atomic.Value(bool).init(true),
- };
-
- // Must be after init so that it can read the other state, such as `is_running`.
- self.thread = try std.Thread.spawn(.{}, DelayQueue.run, .{self});
- }
-
- fn deinit(self: *DelayQueue) void {
- self.is_running.store(false, .SeqCst);
- self.event.set();
- self.thread.join();
- }
-
- /// Entry point for the timer thread
- /// which waits for timer entries to expire and reschedules them.
- fn run(self: *DelayQueue) void {
- const loop = @fieldParentPtr(Loop, "delay_queue", self);
-
- while (self.is_running.load(.SeqCst)) {
- self.event.reset();
- const now = self.timer.read();
-
- if (self.waiters.popExpired(now)) |entry| {
- loop.onNextTick(&entry.node);
- continue;
- }
-
- if (self.waiters.nextExpire()) |expires| {
- if (now >= expires)
- continue;
- self.event.timedWait(expires - now) catch {};
- } else {
- self.event.wait();
- }
- }
- }
-
- // TODO: use a tickless hierarchical timer wheel:
- // https://github.com/wahern/timeout/
- const Waiters = struct {
- entries: std.atomic.Queue(anyframe),
-
- const Entry = struct {
- node: NextTickNode,
- expires: u64,
-
- fn init(self: *Entry, frame: anyframe, expires: u64) void {
- self.node.data = frame;
- self.expires = expires;
- }
- };
-
- /// Registers the entry into the queue of waiting frames
- fn insert(self: *Waiters, entry: *Entry) void {
- self.entries.put(&entry.node);
- }
-
- /// Dequeues one expired event relative to `now`
- fn popExpired(self: *Waiters, now: u64) ?*Entry {
- const entry = self.peekExpiringEntry() orelse return null;
- if (entry.expires > now)
- return null;
-
- assert(self.entries.remove(&entry.node));
- return entry;
- }
-
- /// Returns an estimate for the amount of time
- /// to wait until the next waiting entry expires.
- fn nextExpire(self: *Waiters) ?u64 {
- const entry = self.peekExpiringEntry() orelse return null;
- return entry.expires;
- }
-
- fn peekExpiringEntry(self: *Waiters) ?*Entry {
- self.entries.mutex.lock();
- defer self.entries.mutex.unlock();
-
- // starting from the head
- var head = self.entries.head orelse return null;
-
- // traverse the list of waiting entries to
- // find the Node with the smallest `expires` field
- var min = head;
- while (head.next) |node| {
- const minEntry = @fieldParentPtr(Entry, "node", min);
- const nodeEntry = @fieldParentPtr(Entry, "node", node);
- if (nodeEntry.expires < minEntry.expires)
- min = node;
- head = node;
- }
-
- return @fieldParentPtr(Entry, "node", min);
- }
- };
- };
-
- /// ------- I/0 APIs -------
- pub fn accept(
- self: *Loop,
- /// This argument is a socket that has been created with `socket`, bound to a local address
- /// with `bind`, and is listening for connections after a `listen`.
- sockfd: os.socket_t,
- /// This argument is a pointer to a sockaddr structure. This structure is filled in with the
- /// address of the peer socket, as known to the communications layer. The exact format of the
- /// address returned addr is determined by the socket's address family (see `socket` and the
- /// respective protocol man pages).
- addr: *os.sockaddr,
- /// This argument is a value-result argument: the caller must initialize it to contain the
- /// size (in bytes) of the structure pointed to by addr; on return it will contain the actual size
- /// of the peer address.
- ///
- /// The returned address is truncated if the buffer provided is too small; in this case, `addr_size`
- /// will return a value greater than was supplied to the call.
- addr_size: *os.socklen_t,
- /// The following values can be bitwise ORed in flags to obtain different behavior:
- /// * `SOCK.CLOEXEC` - Set the close-on-exec (`FD_CLOEXEC`) flag on the new file descriptor. See the
- /// description of the `O.CLOEXEC` flag in `open` for reasons why this may be useful.
- flags: u32,
- ) os.AcceptError!os.socket_t {
- while (true) {
- return os.accept(sockfd, addr, addr_size, flags | os.SOCK.NONBLOCK) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdReadable(sockfd);
- continue;
- },
- else => return err,
- };
- }
- }
-
- pub fn connect(self: *Loop, sockfd: os.socket_t, sock_addr: *const os.sockaddr, len: os.socklen_t) os.ConnectError!void {
- os.connect(sockfd, sock_addr, len) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdWritable(sockfd);
- return os.getsockoptError(sockfd);
- },
- else => return err,
- };
- }
-
- /// Performs an async `os.open` using a separate thread.
- pub fn openZ(self: *Loop, file_path: [*:0]const u8, flags: u32, mode: os.mode_t) os.OpenError!os.fd_t {
- var req_node = Request.Node{
- .data = .{
- .msg = .{
- .open = .{
- .path = file_path,
- .flags = flags,
- .mode = mode,
- .result = undefined,
- },
- },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- return req_node.data.msg.open.result;
- }
-
- /// Performs an async `os.opent` using a separate thread.
- pub fn openatZ(self: *Loop, fd: os.fd_t, file_path: [*:0]const u8, flags: u32, mode: os.mode_t) os.OpenError!os.fd_t {
- var req_node = Request.Node{
- .data = .{
- .msg = .{
- .openat = .{
- .fd = fd,
- .path = file_path,
- .flags = flags,
- .mode = mode,
- .result = undefined,
- },
- },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- return req_node.data.msg.openat.result;
- }
-
- /// Performs an async `os.close` using a separate thread.
- pub fn close(self: *Loop, fd: os.fd_t) void {
- var req_node = Request.Node{
- .data = .{
- .msg = .{ .close = .{ .fd = fd } },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- }
-
- /// Performs an async `os.read` using a separate thread.
- /// `fd` must block and not return EAGAIN.
- pub fn read(self: *Loop, fd: os.fd_t, buf: []u8, simulate_evented: bool) os.ReadError!usize {
- if (simulate_evented) {
- var req_node = Request.Node{
- .data = .{
- .msg = .{
- .read = .{
- .fd = fd,
- .buf = buf,
- .result = undefined,
- },
- },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- return req_node.data.msg.read.result;
- } else {
- while (true) {
- return os.read(fd, buf) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdReadable(fd);
- continue;
- },
- else => return err,
- };
- }
- }
- }
-
- /// Performs an async `os.readv` using a separate thread.
- /// `fd` must block and not return EAGAIN.
- pub fn readv(self: *Loop, fd: os.fd_t, iov: []const os.iovec, simulate_evented: bool) os.ReadError!usize {
- if (simulate_evented) {
- var req_node = Request.Node{
- .data = .{
- .msg = .{
- .readv = .{
- .fd = fd,
- .iov = iov,
- .result = undefined,
- },
- },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- return req_node.data.msg.readv.result;
- } else {
- while (true) {
- return os.readv(fd, iov) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdReadable(fd);
- continue;
- },
- else => return err,
- };
- }
- }
- }
-
- /// Performs an async `os.pread` using a separate thread.
- /// `fd` must block and not return EAGAIN.
- pub fn pread(self: *Loop, fd: os.fd_t, buf: []u8, offset: u64, simulate_evented: bool) os.PReadError!usize {
- if (simulate_evented) {
- var req_node = Request.Node{
- .data = .{
- .msg = .{
- .pread = .{
- .fd = fd,
- .buf = buf,
- .offset = offset,
- .result = undefined,
- },
- },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- return req_node.data.msg.pread.result;
- } else {
- while (true) {
- return os.pread(fd, buf, offset) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdReadable(fd);
- continue;
- },
- else => return err,
- };
- }
- }
- }
-
- /// Performs an async `os.preadv` using a separate thread.
- /// `fd` must block and not return EAGAIN.
- pub fn preadv(self: *Loop, fd: os.fd_t, iov: []const os.iovec, offset: u64, simulate_evented: bool) os.ReadError!usize {
- if (simulate_evented) {
- var req_node = Request.Node{
- .data = .{
- .msg = .{
- .preadv = .{
- .fd = fd,
- .iov = iov,
- .offset = offset,
- .result = undefined,
- },
- },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- return req_node.data.msg.preadv.result;
- } else {
- while (true) {
- return os.preadv(fd, iov, offset) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdReadable(fd);
- continue;
- },
- else => return err,
- };
- }
- }
- }
-
- /// Performs an async `os.write` using a separate thread.
- /// `fd` must block and not return EAGAIN.
- pub fn write(self: *Loop, fd: os.fd_t, bytes: []const u8, simulate_evented: bool) os.WriteError!usize {
- if (simulate_evented) {
- var req_node = Request.Node{
- .data = .{
- .msg = .{
- .write = .{
- .fd = fd,
- .bytes = bytes,
- .result = undefined,
- },
- },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- return req_node.data.msg.write.result;
- } else {
- while (true) {
- return os.write(fd, bytes) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdWritable(fd);
- continue;
- },
- else => return err,
- };
- }
- }
- }
-
- /// Performs an async `os.writev` using a separate thread.
- /// `fd` must block and not return EAGAIN.
- pub fn writev(self: *Loop, fd: os.fd_t, iov: []const os.iovec_const, simulate_evented: bool) os.WriteError!usize {
- if (simulate_evented) {
- var req_node = Request.Node{
- .data = .{
- .msg = .{
- .writev = .{
- .fd = fd,
- .iov = iov,
- .result = undefined,
- },
- },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- return req_node.data.msg.writev.result;
- } else {
- while (true) {
- return os.writev(fd, iov) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdWritable(fd);
- continue;
- },
- else => return err,
- };
- }
- }
- }
-
- /// Performs an async `os.pwrite` using a separate thread.
- /// `fd` must block and not return EAGAIN.
- pub fn pwrite(self: *Loop, fd: os.fd_t, bytes: []const u8, offset: u64, simulate_evented: bool) os.PerformsWriteError!usize {
- if (simulate_evented) {
- var req_node = Request.Node{
- .data = .{
- .msg = .{
- .pwrite = .{
- .fd = fd,
- .bytes = bytes,
- .offset = offset,
- .result = undefined,
- },
- },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- return req_node.data.msg.pwrite.result;
- } else {
- while (true) {
- return os.pwrite(fd, bytes, offset) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdWritable(fd);
- continue;
- },
- else => return err,
- };
- }
- }
- }
-
- /// Performs an async `os.pwritev` using a separate thread.
- /// `fd` must block and not return EAGAIN.
- pub fn pwritev(self: *Loop, fd: os.fd_t, iov: []const os.iovec_const, offset: u64, simulate_evented: bool) os.PWriteError!usize {
- if (simulate_evented) {
- var req_node = Request.Node{
- .data = .{
- .msg = .{
- .pwritev = .{
- .fd = fd,
- .iov = iov,
- .offset = offset,
- .result = undefined,
- },
- },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- return req_node.data.msg.pwritev.result;
- } else {
- while (true) {
- return os.pwritev(fd, iov, offset) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdWritable(fd);
- continue;
- },
- else => return err,
- };
- }
- }
- }
-
- pub fn sendto(
- self: *Loop,
- /// The file descriptor of the sending socket.
- sockfd: os.fd_t,
- /// Message to send.
- buf: []const u8,
- flags: u32,
- dest_addr: ?*const os.sockaddr,
- addrlen: os.socklen_t,
- ) os.SendToError!usize {
- while (true) {
- return os.sendto(sockfd, buf, flags, dest_addr, addrlen) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdWritable(sockfd);
- continue;
- },
- else => return err,
- };
- }
- }
-
- pub fn recvfrom(
- self: *Loop,
- sockfd: os.fd_t,
- buf: []u8,
- flags: u32,
- src_addr: ?*os.sockaddr,
- addrlen: ?*os.socklen_t,
- ) os.RecvFromError!usize {
- while (true) {
- return os.recvfrom(sockfd, buf, flags, src_addr, addrlen) catch |err| switch (err) {
- error.WouldBlock => {
- self.waitUntilFdReadable(sockfd);
- continue;
- },
- else => return err,
- };
- }
- }
-
- /// Performs an async `os.faccessatZ` using a separate thread.
- /// `fd` must block and not return EAGAIN.
- pub fn faccessatZ(
- self: *Loop,
- dirfd: os.fd_t,
- path_z: [*:0]const u8,
- mode: u32,
- flags: u32,
- ) os.AccessError!void {
- var req_node = Request.Node{
- .data = .{
- .msg = .{
- .faccessat = .{
- .dirfd = dirfd,
- .path = path_z,
- .mode = mode,
- .flags = flags,
- .result = undefined,
- },
- },
- .finish = .{ .tick_node = .{ .data = @frame() } },
- },
- };
- suspend {
- self.posixFsRequest(&req_node);
- }
- return req_node.data.msg.faccessat.result;
- }
-
- fn workerRun(self: *Loop) void {
- while (true) {
- while (true) {
- const next_tick_node = self.next_tick_queue.get() orelse break;
- self.dispatch();
- resume next_tick_node.data;
- self.finishOneEvent();
- }
-
- switch (builtin.os.tag) {
- .linux => {
- // only process 1 event so we don't steal from other threads
- var events: [1]os.linux.epoll_event = undefined;
- const count = os.epoll_wait(self.os_data.epollfd, events[0..], -1);
- for (events[0..count]) |ev| {
- const resume_node = @as(*ResumeNode, @ptrFromInt(ev.data.ptr));
- const handle = resume_node.handle;
- const resume_node_id = resume_node.id;
- switch (resume_node_id) {
- .basic => {},
- .stop => return,
- .event_fd => {
- const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
- event_fd_node.epoll_op = os.linux.EPOLL.CTL_MOD;
- const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
- self.available_eventfd_resume_nodes.push(stack_node);
- },
- }
- resume handle;
- if (resume_node_id == .event_fd) {
- self.finishOneEvent();
- }
- }
- },
- .macos, .ios, .tvos, .watchos, .freebsd, .netbsd, .dragonfly, .openbsd => {
- var eventlist: [1]os.Kevent = undefined;
- const empty_kevs = &[0]os.Kevent{};
- const count = os.kevent(self.os_data.kqfd, empty_kevs, eventlist[0..], null) catch unreachable;
- for (eventlist[0..count]) |ev| {
- const resume_node = @as(*ResumeNode, @ptrFromInt(ev.udata));
- const handle = resume_node.handle;
- const resume_node_id = resume_node.id;
- switch (resume_node_id) {
- .basic => {
- const basic_node = @fieldParentPtr(ResumeNode.Basic, "base", resume_node);
- basic_node.kev = ev;
- },
- .stop => return,
- .event_fd => {
- const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
- const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
- self.available_eventfd_resume_nodes.push(stack_node);
- },
- }
- resume handle;
- if (resume_node_id == .event_fd) {
- self.finishOneEvent();
- }
- }
- },
- .windows => {
- var completion_key: usize = undefined;
- const overlapped = while (true) {
- var nbytes: windows.DWORD = undefined;
- var overlapped: ?*windows.OVERLAPPED = undefined;
- switch (windows.GetQueuedCompletionStatus(self.os_data.io_port, &nbytes, &completion_key, &overlapped, windows.INFINITE)) {
- .Aborted => return,
- .Normal => {},
- .EOF => {},
- .Cancelled => continue,
- }
- if (overlapped) |o| break o;
- };
- const resume_node = @fieldParentPtr(ResumeNode, "overlapped", overlapped);
- const handle = resume_node.handle;
- const resume_node_id = resume_node.id;
- switch (resume_node_id) {
- .basic => {},
- .stop => return,
- .event_fd => {
- const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
- const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
- self.available_eventfd_resume_nodes.push(stack_node);
- },
- }
- resume handle;
- self.finishOneEvent();
- },
- else => @compileError("unsupported OS"),
- }
- }
- }
-
- fn posixFsRequest(self: *Loop, request_node: *Request.Node) void {
- self.beginOneEvent(); // finished in posixFsRun after processing the msg
- self.fs_queue.put(request_node);
- self.fs_thread_wakeup.set();
- }
-
- fn posixFsCancel(self: *Loop, request_node: *Request.Node) void {
- if (self.fs_queue.remove(request_node)) {
- self.finishOneEvent();
- }
- }
-
- fn posixFsRun(self: *Loop) void {
- nosuspend while (true) {
- self.fs_thread_wakeup.reset();
- while (self.fs_queue.get()) |node| {
- switch (node.data.msg) {
- .end => return,
- .read => |*msg| {
- msg.result = os.read(msg.fd, msg.buf);
- },
- .readv => |*msg| {
- msg.result = os.readv(msg.fd, msg.iov);
- },
- .write => |*msg| {
- msg.result = os.write(msg.fd, msg.bytes);
- },
- .writev => |*msg| {
- msg.result = os.writev(msg.fd, msg.iov);
- },
- .pwrite => |*msg| {
- msg.result = os.pwrite(msg.fd, msg.bytes, msg.offset);
- },
- .pwritev => |*msg| {
- msg.result = os.pwritev(msg.fd, msg.iov, msg.offset);
- },
- .pread => |*msg| {
- msg.result = os.pread(msg.fd, msg.buf, msg.offset);
- },
- .preadv => |*msg| {
- msg.result = os.preadv(msg.fd, msg.iov, msg.offset);
- },
- .open => |*msg| {
- if (is_windows) unreachable; // TODO
- msg.result = os.openZ(msg.path, msg.flags, msg.mode);
- },
- .openat => |*msg| {
- if (is_windows) unreachable; // TODO
- msg.result = os.openatZ(msg.fd, msg.path, msg.flags, msg.mode);
- },
- .faccessat => |*msg| {
- msg.result = os.faccessatZ(msg.dirfd, msg.path, msg.mode, msg.flags);
- },
- .close => |*msg| os.close(msg.fd),
- }
- switch (node.data.finish) {
- .tick_node => |*tick_node| self.onNextTick(tick_node),
- .no_action => {},
- }
- self.finishOneEvent();
- }
- self.fs_thread_wakeup.wait();
- };
- }
-
- const OsData = switch (builtin.os.tag) {
- .linux => LinuxOsData,
- .macos, .ios, .tvos, .watchos, .freebsd, .netbsd, .dragonfly, .openbsd => KEventData,
- .windows => struct {
- io_port: windows.HANDLE,
- extra_thread_count: usize,
- },
- else => struct {},
- };
-
- const KEventData = struct {
- kqfd: i32,
- final_kevent: os.Kevent,
- };
-
- const LinuxOsData = struct {
- epollfd: i32,
- final_eventfd: i32,
- final_eventfd_event: os.linux.epoll_event,
- };
-
- pub const Request = struct {
- msg: Msg,
- finish: Finish,
-
- pub const Node = std.atomic.Queue(Request).Node;
-
- pub const Finish = union(enum) {
- tick_node: Loop.NextTickNode,
- no_action,
- };
-
- pub const Msg = union(enum) {
- read: Read,
- readv: ReadV,
- write: Write,
- writev: WriteV,
- pwrite: PWrite,
- pwritev: PWriteV,
- pread: PRead,
- preadv: PReadV,
- open: Open,
- openat: OpenAt,
- close: Close,
- faccessat: FAccessAt,
-
- /// special - means the fs thread should exit
- end,
-
- pub const Read = struct {
- fd: os.fd_t,
- buf: []u8,
- result: Error!usize,
-
- pub const Error = os.ReadError;
- };
-
- pub const ReadV = struct {
- fd: os.fd_t,
- iov: []const os.iovec,
- result: Error!usize,
-
- pub const Error = os.ReadError;
- };
-
- pub const Write = struct {
- fd: os.fd_t,
- bytes: []const u8,
- result: Error!usize,
-
- pub const Error = os.WriteError;
- };
-
- pub const WriteV = struct {
- fd: os.fd_t,
- iov: []const os.iovec_const,
- result: Error!usize,
-
- pub const Error = os.WriteError;
- };
-
- pub const PWrite = struct {
- fd: os.fd_t,
- bytes: []const u8,
- offset: usize,
- result: Error!usize,
-
- pub const Error = os.PWriteError;
- };
-
- pub const PWriteV = struct {
- fd: os.fd_t,
- iov: []const os.iovec_const,
- offset: usize,
- result: Error!usize,
-
- pub const Error = os.PWriteError;
- };
-
- pub const PRead = struct {
- fd: os.fd_t,
- buf: []u8,
- offset: usize,
- result: Error!usize,
-
- pub const Error = os.PReadError;
- };
-
- pub const PReadV = struct {
- fd: os.fd_t,
- iov: []const os.iovec,
- offset: usize,
- result: Error!usize,
-
- pub const Error = os.PReadError;
- };
-
- pub const Open = struct {
- path: [*:0]const u8,
- flags: u32,
- mode: os.mode_t,
- result: Error!os.fd_t,
-
- pub const Error = os.OpenError;
- };
-
- pub const OpenAt = struct {
- fd: os.fd_t,
- path: [*:0]const u8,
- flags: u32,
- mode: os.mode_t,
- result: Error!os.fd_t,
-
- pub const Error = os.OpenError;
- };
-
- pub const Close = struct {
- fd: os.fd_t,
- };
-
- pub const FAccessAt = struct {
- dirfd: os.fd_t,
- path: [*:0]const u8,
- mode: u32,
- flags: u32,
- result: Error!void,
-
- pub const Error = os.AccessError;
- };
- };
- };
-};
-
-test "std.event.Loop - basic" {
- // https://github.com/ziglang/zig/issues/1908
- if (builtin.single_threaded) return error.SkipZigTest;
-
- if (true) {
- // https://github.com/ziglang/zig/issues/4922
- return error.SkipZigTest;
- }
-
- var loop: Loop = undefined;
- try loop.initMultiThreaded();
- defer loop.deinit();
-
- loop.run();
-}
-
-fn testEventLoop() i32 {
- return 1234;
-}
-
-fn testEventLoop2(h: anyframe->i32, did_it: *bool) void {
- const value = await h;
- try testing.expect(value == 1234);
- did_it.* = true;
-}
-
-var testRunDetachedData: usize = 0;
-test "std.event.Loop - runDetached" {
- // https://github.com/ziglang/zig/issues/1908
- if (builtin.single_threaded) return error.SkipZigTest;
- if (!std.io.is_async) return error.SkipZigTest;
- if (true) {
- // https://github.com/ziglang/zig/issues/4922
- return error.SkipZigTest;
- }
-
- var loop: Loop = undefined;
- try loop.initMultiThreaded();
- defer loop.deinit();
-
- // Schedule the execution, won't actually start until we start the
- // event loop.
- try loop.runDetached(std.testing.allocator, testRunDetached, .{});
-
- // Now we can start the event loop. The function will return only
- // after all tasks have been completed, allowing us to synchronize
- // with the previous runDetached.
- loop.run();
-
- try testing.expect(testRunDetachedData == 1);
-}
-
-fn testRunDetached() void {
- testRunDetachedData += 1;
-}
-
-test "std.event.Loop - sleep" {
- // https://github.com/ziglang/zig/issues/1908
- if (builtin.single_threaded) return error.SkipZigTest;
- if (!std.io.is_async) return error.SkipZigTest;
-
- const frames = try testing.allocator.alloc(@Frame(testSleep), 10);
- defer testing.allocator.free(frames);
-
- const wait_time = 100 * std.time.ns_per_ms;
- var sleep_count: usize = 0;
-
- for (frames) |*frame|
- frame.* = async testSleep(wait_time, &sleep_count);
- for (frames) |*frame|
- await frame;
-
- try testing.expect(sleep_count == frames.len);
-}
-
-fn testSleep(wait_ns: u64, sleep_count: *usize) void {
- Loop.instance.?.sleep(wait_ns);
- _ = @atomicRmw(usize, sleep_count, .Add, 1, .SeqCst);
-}
diff --git a/lib/std/event/rwlock.zig b/lib/std/event/rwlock.zig
deleted file mode 100644
index 0f017a0ca0..0000000000
--- a/lib/std/event/rwlock.zig
+++ /dev/null
@@ -1,292 +0,0 @@
-const std = @import("../std.zig");
-const builtin = @import("builtin");
-const assert = std.debug.assert;
-const testing = std.testing;
-const mem = std.mem;
-const Loop = std.event.Loop;
-const Allocator = std.mem.Allocator;
-
-/// Thread-safe async/await lock.
-/// Functions which are waiting for the lock are suspended, and
-/// are resumed when the lock is released, in order.
-/// Many readers can hold the lock at the same time; however locking for writing is exclusive.
-/// When a read lock is held, it will not be released until the reader queue is empty.
-/// When a write lock is held, it will not be released until the writer queue is empty.
-/// TODO: make this API also work in blocking I/O mode
-pub const RwLock = struct {
- shared_state: State,
- writer_queue: Queue,
- reader_queue: Queue,
- writer_queue_empty: bool,
- reader_queue_empty: bool,
- reader_lock_count: usize,
-
- const State = enum(u8) {
- Unlocked,
- WriteLock,
- ReadLock,
- };
-
- const Queue = std.atomic.Queue(anyframe);
-
- const global_event_loop = Loop.instance orelse
- @compileError("std.event.RwLock currently only works with event-based I/O");
-
- pub const HeldRead = struct {
- lock: *RwLock,
-
- pub fn release(self: HeldRead) void {
- // If other readers still hold the lock, we're done.
- if (@atomicRmw(usize, &self.lock.reader_lock_count, .Sub, 1, .SeqCst) != 1) {
- return;
- }
-
- @atomicStore(bool, &self.lock.reader_queue_empty, true, .SeqCst);
- if (@cmpxchgStrong(State, &self.lock.shared_state, .ReadLock, .Unlocked, .SeqCst, .SeqCst) != null) {
- // Didn't unlock. Someone else's problem.
- return;
- }
-
- self.lock.commonPostUnlock();
- }
- };
-
- pub const HeldWrite = struct {
- lock: *RwLock,
-
- pub fn release(self: HeldWrite) void {
- // See if we can leave it locked for writing, and pass the lock to the next writer
- // in the queue to grab the lock.
- if (self.lock.writer_queue.get()) |node| {
- global_event_loop.onNextTick(node);
- return;
- }
-
- // We need to release the write lock. Check if any readers are waiting to grab the lock.
- if (!@atomicLoad(bool, &self.lock.reader_queue_empty, .SeqCst)) {
- // Switch to a read lock.
- @atomicStore(State, &self.lock.shared_state, .ReadLock, .SeqCst);
- while (self.lock.reader_queue.get()) |node| {
- global_event_loop.onNextTick(node);
- }
- return;
- }
-
- @atomicStore(bool, &self.lock.writer_queue_empty, true, .SeqCst);
- @atomicStore(State, &self.lock.shared_state, .Unlocked, .SeqCst);
-
- self.lock.commonPostUnlock();
- }
- };
-
- pub fn init() RwLock {
- return .{
- .shared_state = .Unlocked,
- .writer_queue = Queue.init(),
- .writer_queue_empty = true,
- .reader_queue = Queue.init(),
- .reader_queue_empty = true,
- .reader_lock_count = 0,
- };
- }
-
- /// Must be called when not locked. Not thread safe.
- /// All calls to acquire() and release() must complete before calling deinit().
- pub fn deinit(self: *RwLock) void {
- assert(self.shared_state == .Unlocked);
- while (self.writer_queue.get()) |node| resume node.data;
- while (self.reader_queue.get()) |node| resume node.data;
- }
-
- pub fn acquireRead(self: *RwLock) callconv(.Async) HeldRead {
- _ = @atomicRmw(usize, &self.reader_lock_count, .Add, 1, .SeqCst);
-
- suspend {
- var my_tick_node = Loop.NextTickNode{
- .data = @frame(),
- .prev = undefined,
- .next = undefined,
- };
-
- self.reader_queue.put(&my_tick_node);
-
- // At this point, we are in the reader_queue, so we might have already been resumed.
-
- // We set this bit so that later we can rely on the fact, that if reader_queue_empty == true,
- // some actor will attempt to grab the lock.
- @atomicStore(bool, &self.reader_queue_empty, false, .SeqCst);
-
- // Here we don't care if we are the one to do the locking or if it was already locked for reading.
- const have_read_lock = if (@cmpxchgStrong(State, &self.shared_state, .Unlocked, .ReadLock, .SeqCst, .SeqCst)) |old_state| old_state == .ReadLock else true;
- if (have_read_lock) {
- // Give out all the read locks.
- if (self.reader_queue.get()) |first_node| {
- while (self.reader_queue.get()) |node| {
- global_event_loop.onNextTick(node);
- }
- resume first_node.data;
- }
- }
- }
- return HeldRead{ .lock = self };
- }
-
- pub fn acquireWrite(self: *RwLock) callconv(.Async) HeldWrite {
- suspend {
- var my_tick_node = Loop.NextTickNode{
- .data = @frame(),
- .prev = undefined,
- .next = undefined,
- };
-
- self.writer_queue.put(&my_tick_node);
-
- // At this point, we are in the writer_queue, so we might have already been resumed.
-
- // We set this bit so that later we can rely on the fact, that if writer_queue_empty == true,
- // some actor will attempt to grab the lock.
- @atomicStore(bool, &self.writer_queue_empty, false, .SeqCst);
-
- // Here we must be the one to acquire the write lock. It cannot already be locked.
- if (@cmpxchgStrong(State, &self.shared_state, .Unlocked, .WriteLock, .SeqCst, .SeqCst) == null) {
- // We now have a write lock.
- if (self.writer_queue.get()) |node| {
- // Whether this node is us or someone else, we tail resume it.
- resume node.data;
- }
- }
- }
- return HeldWrite{ .lock = self };
- }
-
- fn commonPostUnlock(self: *RwLock) void {
- while (true) {
- // There might be a writer_queue item or a reader_queue item
- // If we check and both are empty, we can be done, because the other actors will try to
- // obtain the lock.
- // But if there's a writer_queue item or a reader_queue item,
- // we are the actor which must loop and attempt to grab the lock again.
- if (!@atomicLoad(bool, &self.writer_queue_empty, .SeqCst)) {
- if (@cmpxchgStrong(State, &self.shared_state, .Unlocked, .WriteLock, .SeqCst, .SeqCst) != null) {
- // We did not obtain the lock. Great, the queues are someone else's problem.
- return;
- }
- // If there's an item in the writer queue, give them the lock, and we're done.
- if (self.writer_queue.get()) |node| {
- global_event_loop.onNextTick(node);
- return;
- }
- // Release the lock again.
- @atomicStore(bool, &self.writer_queue_empty, true, .SeqCst);
- @atomicStore(State, &self.shared_state, .Unlocked, .SeqCst);
- continue;
- }
-
- if (!@atomicLoad(bool, &self.reader_queue_empty, .SeqCst)) {
- if (@cmpxchgStrong(State, &self.shared_state, .Unlocked, .ReadLock, .SeqCst, .SeqCst) != null) {
- // We did not obtain the lock. Great, the queues are someone else's problem.
- return;
- }
- // If there are any items in the reader queue, give out all the reader locks, and we're done.
- if (self.reader_queue.get()) |first_node| {
- global_event_loop.onNextTick(first_node);
- while (self.reader_queue.get()) |node| {
- global_event_loop.onNextTick(node);
- }
- return;
- }
- // Release the lock again.
- @atomicStore(bool, &self.reader_queue_empty, true, .SeqCst);
- if (@cmpxchgStrong(State, &self.shared_state, .ReadLock, .Unlocked, .SeqCst, .SeqCst) != null) {
- // Didn't unlock. Someone else's problem.
- return;
- }
- continue;
- }
- return;
- }
- }
-};
-
-test "std.event.RwLock" {
- // https://github.com/ziglang/zig/issues/2377
- if (true) return error.SkipZigTest;
-
- // https://github.com/ziglang/zig/issues/1908
- if (builtin.single_threaded) return error.SkipZigTest;
-
- // TODO provide a way to run tests in evented I/O mode
- if (!std.io.is_async) return error.SkipZigTest;
-
- var lock = RwLock.init();
- defer lock.deinit();
-
- _ = testLock(std.heap.page_allocator, &lock);
-
- const expected_result = [1]i32{shared_it_count * @as(i32, @intCast(shared_test_data.len))} ** shared_test_data.len;
- try testing.expectEqualSlices(i32, expected_result, shared_test_data);
-}
-fn testLock(allocator: Allocator, lock: *RwLock) callconv(.Async) void {
- var read_nodes: [100]Loop.NextTickNode = undefined;
- for (&read_nodes) |*read_node| {
- const frame = allocator.create(@Frame(readRunner)) catch @panic("memory");
- read_node.data = frame;
- frame.* = async readRunner(lock);
- Loop.instance.?.onNextTick(read_node);
- }
-
- var write_nodes: [shared_it_count]Loop.NextTickNode = undefined;
- for (&write_nodes) |*write_node| {
- const frame = allocator.create(@Frame(writeRunner)) catch @panic("memory");
- write_node.data = frame;
- frame.* = async writeRunner(lock);
- Loop.instance.?.onNextTick(write_node);
- }
-
- for (&write_nodes) |*write_node| {
- const casted = @as(*const @Frame(writeRunner), @ptrCast(write_node.data));
- await casted;
- allocator.destroy(casted);
- }
- for (&read_nodes) |*read_node| {
- const casted = @as(*const @Frame(readRunner), @ptrCast(read_node.data));
- await casted;
- allocator.destroy(casted);
- }
-}
-
-const shared_it_count = 10;
-var shared_test_data = [1]i32{0} ** 10;
-var shared_test_index: usize = 0;
-var shared_count: usize = 0;
-fn writeRunner(lock: *RwLock) callconv(.Async) void {
- suspend {} // resumed by onNextTick
-
- var i: usize = 0;
- while (i < shared_test_data.len) : (i += 1) {
- std.time.sleep(100 * std.time.microsecond);
- const lock_promise = async lock.acquireWrite();
- const handle = await lock_promise;
- defer handle.release();
-
- shared_count += 1;
- while (shared_test_index < shared_test_data.len) : (shared_test_index += 1) {
- shared_test_data[shared_test_index] = shared_test_data[shared_test_index] + 1;
- }
- shared_test_index = 0;
- }
-}
-fn readRunner(lock: *RwLock) callconv(.Async) void {
- suspend {} // resumed by onNextTick
- std.time.sleep(1);
-
- var i: usize = 0;
- while (i < shared_test_data.len) : (i += 1) {
- const lock_promise = async lock.acquireRead();
- const handle = await lock_promise;
- defer handle.release();
-
- try testing.expect(shared_test_index == 0);
- try testing.expect(shared_test_data[i] == @as(i32, @intCast(shared_count)));
- }
-}
diff --git a/lib/std/event/rwlocked.zig b/lib/std/event/rwlocked.zig
deleted file mode 100644
index 9a569e8f1f..0000000000
--- a/lib/std/event/rwlocked.zig
+++ /dev/null
@@ -1,57 +0,0 @@
-const std = @import("../std.zig");
-const RwLock = std.event.RwLock;
-
-/// Thread-safe async/await RW lock that protects one piece of data.
-/// Functions which are waiting for the lock are suspended, and
-/// are resumed when the lock is released, in order.
-pub fn RwLocked(comptime T: type) type {
- return struct {
- lock: RwLock,
- locked_data: T,
-
- const Self = @This();
-
- pub const HeldReadLock = struct {
- value: *const T,
- held: RwLock.HeldRead,
-
- pub fn release(self: HeldReadLock) void {
- self.held.release();
- }
- };
-
- pub const HeldWriteLock = struct {
- value: *T,
- held: RwLock.HeldWrite,
-
- pub fn release(self: HeldWriteLock) void {
- self.held.release();
- }
- };
-
- pub fn init(data: T) Self {
- return Self{
- .lock = RwLock.init(),
- .locked_data = data,
- };
- }
-
- pub fn deinit(self: *Self) void {
- self.lock.deinit();
- }
-
- pub fn acquireRead(self: *Self) callconv(.Async) HeldReadLock {
- return HeldReadLock{
- .held = self.lock.acquireRead(),
- .value = &self.locked_data,
- };
- }
-
- pub fn acquireWrite(self: *Self) callconv(.Async) HeldWriteLock {
- return HeldWriteLock{
- .held = self.lock.acquireWrite(),
- .value = &self.locked_data,
- };
- }
- };
-}
diff --git a/lib/std/event/wait_group.zig b/lib/std/event/wait_group.zig
deleted file mode 100644
index c88b01c812..0000000000
--- a/lib/std/event/wait_group.zig
+++ /dev/null
@@ -1,115 +0,0 @@
-const std = @import("../std.zig");
-const builtin = @import("builtin");
-const Loop = std.event.Loop;
-
-/// A WaitGroup keeps track and waits for a group of async tasks to finish.
-/// Call `begin` when creating new tasks, and have tasks call `finish` when done.
-/// You can provide a count for both operations to perform them in bulk.
-/// Call `wait` to suspend until all tasks are completed.
-/// Multiple waiters are supported.
-///
-/// WaitGroup is an instance of WaitGroupGeneric, which takes in a bitsize
-/// for the internal counter. WaitGroup defaults to a `usize` counter.
-/// It's also possible to define a max value for the counter so that
-/// `begin` will return error.Overflow when the limit is reached, even
-/// if the integer type has not has not overflowed.
-/// By default `max_value` is set to std.math.maxInt(CounterType).
-pub const WaitGroup = WaitGroupGeneric(@bitSizeOf(usize));
-
-pub fn WaitGroupGeneric(comptime counter_size: u16) type {
- const CounterType = std.meta.Int(.unsigned, counter_size);
-
- const global_event_loop = Loop.instance orelse
- @compileError("std.event.WaitGroup currently only works with event-based I/O");
-
- return struct {
- counter: CounterType = 0,
- max_counter: CounterType = std.math.maxInt(CounterType),
- mutex: std.Thread.Mutex = .{},
- waiters: ?*Waiter = null,
- const Waiter = struct {
- next: ?*Waiter,
- tail: *Waiter,
- node: Loop.NextTickNode,
- };
-
- const Self = @This();
- pub fn begin(self: *Self, count: CounterType) error{Overflow}!void {
- self.mutex.lock();
- defer self.mutex.unlock();
-
- const new_counter = try std.math.add(CounterType, self.counter, count);
- if (new_counter > self.max_counter) return error.Overflow;
- self.counter = new_counter;
- }
-
- pub fn finish(self: *Self, count: CounterType) void {
- var waiters = blk: {
- self.mutex.lock();
- defer self.mutex.unlock();
- self.counter = std.math.sub(CounterType, self.counter, count) catch unreachable;
- if (self.counter == 0) {
- const temp = self.waiters;
- self.waiters = null;
- break :blk temp;
- }
- break :blk null;
- };
-
- // We don't need to hold the lock to reschedule any potential waiter.
- while (waiters) |w| {
- const temp_w = w;
- waiters = w.next;
- global_event_loop.onNextTick(&temp_w.node);
- }
- }
-
- pub fn wait(self: *Self) void {
- self.mutex.lock();
-
- if (self.counter == 0) {
- self.mutex.unlock();
- return;
- }
-
- var self_waiter: Waiter = undefined;
- self_waiter.node.data = @frame();
- if (self.waiters) |head| {
- head.tail.next = &self_waiter;
- head.tail = &self_waiter;
- } else {
- self.waiters = &self_waiter;
- self_waiter.tail = &self_waiter;
- self_waiter.next = null;
- }
- suspend {
- self.mutex.unlock();
- }
- }
- };
-}
-
-test "basic WaitGroup usage" {
- if (!std.io.is_async) return error.SkipZigTest;
-
- // TODO https://github.com/ziglang/zig/issues/1908
- if (builtin.single_threaded) return error.SkipZigTest;
-
- // TODO https://github.com/ziglang/zig/issues/3251
- if (builtin.os.tag == .freebsd) return error.SkipZigTest;
-
- var initial_wg = WaitGroup{};
- var final_wg = WaitGroup{};
-
- try initial_wg.begin(1);
- try final_wg.begin(1);
- var task_frame = async task(&initial_wg, &final_wg);
- initial_wg.finish(1);
- final_wg.wait();
- await task_frame;
-}
-
-fn task(wg_i: *WaitGroup, wg_f: *WaitGroup) void {
- wg_i.wait();
- wg_f.finish(1);
-}
diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig
index e5bfca02b2..262d51bcee 100644
--- a/lib/std/fmt.zig
+++ b/lib/std/fmt.zig
@@ -2790,8 +2790,6 @@ test "positional/alignment/width/precision" {
}
test "vector" {
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
if (builtin.target.cpu.arch == .riscv64) {
// https://github.com/ziglang/zig/issues/4486
return error.SkipZigTest;
diff --git a/lib/std/fmt/parse_float.zig b/lib/std/fmt/parse_float.zig
index cdd11a6c59..d2589b8ff1 100644
--- a/lib/std/fmt/parse_float.zig
+++ b/lib/std/fmt/parse_float.zig
@@ -78,6 +78,13 @@ test "fmt.parseFloat nan and inf" {
}
}
+test "fmt.parseFloat largest normals" {
+ try expectEqual(@as(u16, @bitCast(try parseFloat(f16, "65504"))), 0x7bff);
+ try expectEqual(@as(u32, @bitCast(try parseFloat(f32, "3.4028234664E38"))), 0x7f7f_ffff);
+ try expectEqual(@as(u64, @bitCast(try parseFloat(f64, "1.7976931348623157E308"))), 0x7fef_ffff_ffff_ffff);
+ try expectEqual(@as(u128, @bitCast(try parseFloat(f128, "1.1897314953572317650857593266280070162E4932"))), 0x7ffe_ffff_ffff_ffff_ffff_ffff_ffff_ffff);
+}
+
test "fmt.parseFloat #11169" {
try expectEqual(try parseFloat(f128, "9007199254740993.0"), 9007199254740993.0);
}
diff --git a/lib/std/fmt/parse_float/decimal.zig b/lib/std/fmt/parse_float/decimal.zig
index f7612cffa3..4e8a84bcf6 100644
--- a/lib/std/fmt/parse_float/decimal.zig
+++ b/lib/std/fmt/parse_float/decimal.zig
@@ -63,7 +63,7 @@ pub fn Decimal(comptime T: type) type {
pub const max_digits_without_overflow = if (MantissaT == u64) 19 else 38;
pub const decimal_point_range = if (MantissaT == u64) 2047 else 32767;
pub const min_exponent = if (MantissaT == u64) -324 else -4966;
- pub const max_exponent = if (MantissaT == u64) 310 else 4933;
+ pub const max_exponent = if (MantissaT == u64) 310 else 4934;
pub const max_decimal_digits = if (MantissaT == u64) 18 else 37;
/// The number of significant digits in the decimal.
diff --git a/lib/std/fmt/parse_float/parse_float.zig b/lib/std/fmt/parse_float/parse_float.zig
index 08d1c55862..d7980d8937 100644
--- a/lib/std/fmt/parse_float/parse_float.zig
+++ b/lib/std/fmt/parse_float/parse_float.zig
@@ -5,8 +5,6 @@ const convertEiselLemire = @import("convert_eisel_lemire.zig").convertEiselLemir
const convertSlow = @import("convert_slow.zig").convertSlow;
const convertHex = @import("convert_hex.zig").convertHex;
-const optimize = true;
-
pub const ParseFloatError = error{
InvalidCharacter,
};
@@ -41,25 +39,23 @@ pub fn parseFloat(comptime T: type, s: []const u8) ParseFloatError!T {
return convertHex(T, n);
}
- if (optimize) {
- if (convertFast(T, n)) |f| {
- return f;
- }
+ if (convertFast(T, n)) |f| {
+ return f;
+ }
- if (T == f16 or T == f32 or T == f64) {
- // If significant digits were truncated, then we can have rounding error
- // only if `mantissa + 1` produces a different result. We also avoid
- // redundantly using the Eisel-Lemire algorithm if it was unable to
- // correctly round on the first pass.
- if (convertEiselLemire(T, n.exponent, n.mantissa)) |bf| {
- if (!n.many_digits) {
+ if (T == f16 or T == f32 or T == f64) {
+ // If significant digits were truncated, then we can have rounding error
+ // only if `mantissa + 1` produces a different result. We also avoid
+ // redundantly using the Eisel-Lemire algorithm if it was unable to
+ // correctly round on the first pass.
+ if (convertEiselLemire(T, n.exponent, n.mantissa)) |bf| {
+ if (!n.many_digits) {
+ return bf.toFloat(T, n.negative);
+ }
+ if (convertEiselLemire(T, n.exponent, n.mantissa + 1)) |bf2| {
+ if (bf.eql(bf2)) {
return bf.toFloat(T, n.negative);
}
- if (convertEiselLemire(T, n.exponent, n.mantissa + 1)) |bf2| {
- if (bf.eql(bf2)) {
- return bf.toFloat(T, n.negative);
- }
- }
}
}
}
diff --git a/lib/std/fs.zig b/lib/std/fs.zig
index 93125c2530..cfb4d7958b 100644
--- a/lib/std/fs.zig
+++ b/lib/std/fs.zig
@@ -31,20 +31,21 @@ pub const realpathW = os.realpathW;
pub const getAppDataDir = @import("fs/get_app_data_dir.zig").getAppDataDir;
pub const GetAppDataDirError = @import("fs/get_app_data_dir.zig").GetAppDataDirError;
-pub const Watch = @import("fs/watch.zig").Watch;
-
-/// This represents the maximum size of a UTF-8 encoded file path that the
+/// This represents the maximum size of a `[]u8` file path that the
/// operating system will accept. Paths, including those returned from file
/// system operations, may be longer than this length, but such paths cannot
/// be successfully passed back in other file system operations. However,
/// all path components returned by file system operations are assumed to
-/// fit into a UTF-8 encoded array of this length.
+/// fit into a `u8` array of this length.
/// The byte count includes room for a null sentinel byte.
+/// On Windows, `[]u8` file paths are encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `[]u8` file paths are encoded as valid UTF-8.
+/// On other platforms, `[]u8` file paths are opaque sequences of bytes with no particular encoding.
pub const MAX_PATH_BYTES = switch (builtin.os.tag) {
- .linux, .macos, .ios, .freebsd, .openbsd, .netbsd, .dragonfly, .haiku, .solaris, .illumos, .plan9 => os.PATH_MAX,
- // Each UTF-16LE character may be expanded to 3 UTF-8 bytes.
- // If it would require 4 UTF-8 bytes, then there would be a surrogate
- // pair in the UTF-16LE, and we (over)account 3 bytes for it that way.
+ .linux, .macos, .ios, .freebsd, .openbsd, .netbsd, .dragonfly, .haiku, .solaris, .illumos, .plan9, .emscripten => os.PATH_MAX,
+ // Each WTF-16LE code unit may be expanded to 3 WTF-8 bytes.
+ // If it would require 4 WTF-8 bytes, then there would be a surrogate
+ // pair in the WTF-16LE, and we (over)account 3 bytes for it that way.
// +1 for the null byte at the end, which can be encoded in 1 byte.
.windows => os.windows.PATH_MAX_WIDE * 3 + 1,
// TODO work out what a reasonable value we should use here
@@ -55,18 +56,21 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) {
@compileError("PATH_MAX not implemented for " ++ @tagName(builtin.os.tag)),
};
-/// This represents the maximum size of a UTF-8 encoded file name component that
+/// This represents the maximum size of a `[]u8` file name component that
/// the platform's common file systems support. File name components returned by file system
-/// operations are likely to fit into a UTF-8 encoded array of this length, but
+/// operations are likely to fit into a `u8` array of this length, but
/// (depending on the platform) this assumption may not hold for every configuration.
/// The byte count does not include a null sentinel byte.
+/// On Windows, `[]u8` file name components are encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, file name components are encoded as valid UTF-8.
+/// On other platforms, `[]u8` components are an opaque sequence of bytes with no particular encoding.
pub const MAX_NAME_BYTES = switch (builtin.os.tag) {
.linux, .macos, .ios, .freebsd, .openbsd, .netbsd, .dragonfly, .solaris, .illumos => os.NAME_MAX,
// Haiku's NAME_MAX includes the null terminator, so subtract one.
.haiku => os.NAME_MAX - 1,
- // Each UTF-16LE character may be expanded to 3 UTF-8 bytes.
- // If it would require 4 UTF-8 bytes, then there would be a surrogate
- // pair in the UTF-16LE, and we (over)account 3 bytes for it that way.
+ // Each WTF-16LE character may be expanded to 3 WTF-8 bytes.
+ // If it would require 4 WTF-8 bytes, then there would be a surrogate
+ // pair in the WTF-16LE, and we (over)account 3 bytes for it that way.
.windows => os.windows.NAME_MAX * 3,
// For WASI, the MAX_NAME will depend on the host OS, so it needs to be
// as large as the largest MAX_NAME_BYTES (Windows) in order to work on any host OS.
@@ -86,15 +90,11 @@ pub const base64_encoder = base64.Base64Encoder.init(base64_alphabet, null);
/// Base64 decoder, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem.
pub const base64_decoder = base64.Base64Decoder.init(base64_alphabet, null);
-/// Whether or not async file system syscalls need a dedicated thread because the operating
-/// system does not support non-blocking I/O on the file system.
-pub const need_async_thread = std.io.is_async and switch (builtin.os.tag) {
- .windows, .other => false,
- else => true,
-};
-
/// TODO remove the allocator requirement from this API
/// TODO move to Dir
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn atomicSymLink(allocator: Allocator, existing_path: []const u8, new_path: []const u8) !void {
if (cwd().symLink(existing_path, new_path, .{})) {
return;
@@ -126,6 +126,9 @@ pub fn atomicSymLink(allocator: Allocator, existing_path: []const u8, new_path:
/// Same as `Dir.updateFile`, except asserts that both `source_path` and `dest_path`
/// are absolute. See `Dir.updateFile` for a function that operates on both
/// absolute and relative paths.
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn updateFileAbsolute(
source_path: []const u8,
dest_path: []const u8,
@@ -140,6 +143,9 @@ pub fn updateFileAbsolute(
/// Same as `Dir.copyFile`, except asserts that both `source_path` and `dest_path`
/// are absolute. See `Dir.copyFile` for a function that operates on both
/// absolute and relative paths.
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn copyFileAbsolute(
source_path: []const u8,
dest_path: []const u8,
@@ -154,24 +160,30 @@ pub fn copyFileAbsolute(
/// Create a new directory, based on an absolute path.
/// Asserts that the path is absolute. See `Dir.makeDir` for a function that operates
/// on both absolute and relative paths.
+/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `absolute_path` should be encoded as valid UTF-8.
+/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn makeDirAbsolute(absolute_path: []const u8) !void {
assert(path.isAbsolute(absolute_path));
return os.mkdir(absolute_path, Dir.default_mode);
}
-/// Same as `makeDirAbsolute` except the parameter is a null-terminated UTF-8-encoded string.
+/// Same as `makeDirAbsolute` except the parameter is null-terminated.
pub fn makeDirAbsoluteZ(absolute_path_z: [*:0]const u8) !void {
assert(path.isAbsoluteZ(absolute_path_z));
return os.mkdirZ(absolute_path_z, Dir.default_mode);
}
-/// Same as `makeDirAbsolute` except the parameter is a null-terminated WTF-16-encoded string.
+/// Same as `makeDirAbsolute` except the parameter is a null-terminated WTF-16 LE-encoded string.
pub fn makeDirAbsoluteW(absolute_path_w: [*:0]const u16) !void {
assert(path.isAbsoluteWindowsW(absolute_path_w));
return os.mkdirW(absolute_path_w, Dir.default_mode);
}
/// Same as `Dir.deleteDir` except the path is absolute.
+/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `dir_path` should be encoded as valid UTF-8.
+/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn deleteDirAbsolute(dir_path: []const u8) !void {
assert(path.isAbsolute(dir_path));
return os.rmdir(dir_path);
@@ -190,6 +202,9 @@ pub fn deleteDirAbsoluteW(dir_path: [*:0]const u16) !void {
}
/// Same as `Dir.rename` except the paths are absolute.
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn renameAbsolute(old_path: []const u8, new_path: []const u8) !void {
assert(path.isAbsolute(old_path));
assert(path.isAbsolute(new_path));
@@ -220,7 +235,7 @@ pub fn renameZ(old_dir: Dir, old_sub_path_z: [*:0]const u8, new_dir: Dir, new_su
return os.renameatZ(old_dir.fd, old_sub_path_z, new_dir.fd, new_sub_path_z);
}
-/// Same as `rename` except the parameters are UTF16LE, NT prefixed.
+/// Same as `rename` except the parameters are WTF16LE, NT prefixed.
/// This function is Windows-only.
pub fn renameW(old_dir: Dir, old_sub_path_w: []const u16, new_dir: Dir, new_sub_path_w: []const u16) !void {
return os.renameatW(old_dir.fd, old_sub_path_w, new_dir.fd, new_sub_path_w);
@@ -231,17 +246,17 @@ pub fn renameW(old_dir: Dir, old_sub_path_w: []const u16, new_dir: Dir, new_sub_
/// On POSIX targets, this function is comptime-callable.
pub fn cwd() Dir {
if (builtin.os.tag == .windows) {
- return Dir{ .fd = os.windows.peb().ProcessParameters.CurrentDirectory.Handle };
+ return .{ .fd = os.windows.peb().ProcessParameters.CurrentDirectory.Handle };
} else if (builtin.os.tag == .wasi) {
- return std.options.wasiCwd();
+ return .{ .fd = std.options.wasiCwd() };
} else {
- return Dir{ .fd = os.AT.FDCWD };
+ return .{ .fd = os.AT.FDCWD };
}
}
-pub fn defaultWasiCwd() Dir {
+pub fn defaultWasiCwd() std.os.wasi.fd_t {
// Expect the first preopen to be current working directory.
- return .{ .fd = 3 };
+ return 3;
}
/// Opens a directory at the given path. The directory is a system resource that remains
@@ -249,6 +264,9 @@ pub fn defaultWasiCwd() Dir {
/// See `openDirAbsoluteZ` for a function that accepts a null-terminated path.
///
/// Asserts that the path parameter has no null bytes.
+/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `absolute_path` should be encoded as valid UTF-8.
+/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn openDirAbsolute(absolute_path: []const u8, flags: Dir.OpenDirOptions) File.OpenError!Dir {
assert(path.isAbsolute(absolute_path));
return cwd().openDir(absolute_path, flags);
@@ -271,6 +289,9 @@ pub fn openDirAbsoluteW(absolute_path_c: [*:0]const u16, flags: Dir.OpenDirOptio
/// operates on both absolute and relative paths.
/// Asserts that the path parameter has no null bytes. See `openFileAbsoluteZ` for a function
/// that accepts a null-terminated path.
+/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `absolute_path` should be encoded as valid UTF-8.
+/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn openFileAbsolute(absolute_path: []const u8, flags: File.OpenFlags) File.OpenError!File {
assert(path.isAbsolute(absolute_path));
return cwd().openFile(absolute_path, flags);
@@ -289,11 +310,13 @@ pub fn openFileAbsoluteW(absolute_path_w: []const u16, flags: File.OpenFlags) Fi
}
/// Test accessing `path`.
-/// `path` is UTF-8-encoded.
/// Be careful of Time-Of-Check-Time-Of-Use race conditions when using this function.
/// For example, instead of testing if a file exists and then opening it, just
/// open it and handle the error for file not found.
/// See `accessAbsoluteZ` for a function that accepts a null-terminated path.
+/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `absolute_path` should be encoded as valid UTF-8.
+/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn accessAbsolute(absolute_path: []const u8, flags: File.OpenFlags) Dir.AccessError!void {
assert(path.isAbsolute(absolute_path));
try cwd().access(absolute_path, flags);
@@ -315,6 +338,9 @@ pub fn accessAbsoluteW(absolute_path: [*:0]const u16, flags: File.OpenFlags) Dir
/// operates on both absolute and relative paths.
/// Asserts that the path parameter has no null bytes. See `createFileAbsoluteC` for a function
/// that accepts a null-terminated path.
+/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `absolute_path` should be encoded as valid UTF-8.
+/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn createFileAbsolute(absolute_path: []const u8, flags: File.CreateFlags) File.OpenError!File {
assert(path.isAbsolute(absolute_path));
return cwd().createFile(absolute_path, flags);
@@ -336,6 +362,9 @@ pub fn createFileAbsoluteW(absolute_path_w: [*:0]const u16, flags: File.CreateFl
/// Asserts that the path is absolute. See `Dir.deleteFile` for a function that
/// operates on both absolute and relative paths.
/// Asserts that the path parameter has no null bytes.
+/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `absolute_path` should be encoded as valid UTF-8.
+/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn deleteFileAbsolute(absolute_path: []const u8) Dir.DeleteFileError!void {
assert(path.isAbsolute(absolute_path));
return cwd().deleteFile(absolute_path);
@@ -358,6 +387,9 @@ pub fn deleteFileAbsoluteW(absolute_path_w: [*:0]const u16) Dir.DeleteFileError!
/// Asserts that the path is absolute. See `Dir.deleteTree` for a function that
/// operates on both absolute and relative paths.
/// Asserts that the path parameter has no null bytes.
+/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `absolute_path` should be encoded as valid UTF-8.
+/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn deleteTreeAbsolute(absolute_path: []const u8) !void {
assert(path.isAbsolute(absolute_path));
const dirname = path.dirname(absolute_path) orelse return error{
@@ -373,6 +405,9 @@ pub fn deleteTreeAbsolute(absolute_path: []const u8) !void {
}
/// Same as `Dir.readLink`, except it asserts the path is absolute.
+/// On Windows, `pathname` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `pathname` should be encoded as valid UTF-8.
+/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding.
pub fn readLinkAbsolute(pathname: []const u8, buffer: *[MAX_PATH_BYTES]u8) ![]u8 {
assert(path.isAbsolute(pathname));
return os.readlink(pathname, buffer);
@@ -396,6 +431,9 @@ pub fn readLinkAbsoluteZ(pathname_c: [*:0]const u8, buffer: *[MAX_PATH_BYTES]u8)
/// one; the latter case is known as a dangling link.
/// If `sym_link_path` exists, it will not be overwritten.
/// See also `symLinkAbsoluteZ` and `symLinkAbsoluteW`.
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn symLinkAbsolute(
target_path: []const u8,
sym_link_path: []const u8,
@@ -411,7 +449,7 @@ pub fn symLinkAbsolute(
return os.symlink(target_path, sym_link_path);
}
-/// Windows-only. Same as `symLinkAbsolute` except the parameters are null-terminated, WTF16 encoded.
+/// Windows-only. Same as `symLinkAbsolute` except the parameters are null-terminated, WTF16 LE encoded.
/// Note that this function will by default try creating a symbolic link to a file. If you would
/// like to create a symbolic link to a directory, specify this with `SymLinkFlags{ .is_directory = true }`.
/// See also `symLinkAbsolute`, `symLinkAbsoluteZ`.
@@ -435,27 +473,14 @@ pub fn symLinkAbsoluteZ(
assert(path.isAbsoluteZ(target_path_c));
assert(path.isAbsoluteZ(sym_link_path_c));
if (builtin.os.tag == .windows) {
- const target_path_w = try os.windows.cStrToWin32PrefixedFileW(target_path_c);
- const sym_link_path_w = try os.windows.cStrToWin32PrefixedFileW(sym_link_path_c);
- return os.windows.CreateSymbolicLink(sym_link_path_w.span(), target_path_w.span(), flags.is_directory);
+ const target_path_w = try os.windows.cStrToPrefixedFileW(null, target_path_c);
+ const sym_link_path_w = try os.windows.cStrToPrefixedFileW(null, sym_link_path_c);
+ return os.windows.CreateSymbolicLink(null, sym_link_path_w.span(), target_path_w.span(), flags.is_directory);
}
return os.symlinkZ(target_path_c, sym_link_path_c);
}
-pub const OpenSelfExeError = error{
- SharingViolation,
- PathAlreadyExists,
- FileNotFound,
- AccessDenied,
- PipeBusy,
- NameTooLong,
- /// On Windows, file paths must be valid Unicode.
- InvalidUtf8,
- /// On Windows, file paths cannot contain these characters:
- /// '/', '*', '?', '"', '<', '>', '|'
- BadPathName,
- Unexpected,
-} || os.OpenError || SelfExePathError || os.FlockError;
+pub const OpenSelfExeError = os.OpenError || SelfExePathError || os.FlockError;
pub fn openSelfExe(flags: File.OpenFlags) OpenSelfExeError!File {
if (builtin.os.tag == .linux) {
@@ -478,7 +503,45 @@ pub fn openSelfExe(flags: File.OpenFlags) OpenSelfExeError!File {
return openFileAbsoluteZ(buf[0..self_exe_path.len :0].ptr, flags);
}
-pub const SelfExePathError = os.ReadLinkError || os.SysCtlError || os.RealPathError;
+// This is os.ReadLinkError || os.RealPathError with impossible errors excluded
+pub const SelfExePathError = error{
+ FileNotFound,
+ AccessDenied,
+ NameTooLong,
+ NotSupported,
+ NotDir,
+ SymLinkLoop,
+ InputOutput,
+ FileTooBig,
+ IsDir,
+ ProcessFdQuotaExceeded,
+ SystemFdQuotaExceeded,
+ NoDevice,
+ SystemResources,
+ NoSpaceLeft,
+ FileSystem,
+ BadPathName,
+ DeviceBusy,
+ SharingViolation,
+ PipeBusy,
+ NotLink,
+ PathAlreadyExists,
+ InvalidHandle,
+
+ /// On Windows, `\\server` or `\\server\share` was not found.
+ NetworkNotFound,
+
+ /// On Windows, antivirus software is enabled by default. It can be
+ /// disabled, but Windows Update sometimes ignores the user's preference
+ /// and re-enables it. When enabled, antivirus software on Windows
+ /// intercepts file system operations and makes them significantly slower
+ /// in addition to possibly failing with this error code.
+ AntivirusInterference,
+
+ /// On Windows, the volume does not contain a recognized file system. File
+ /// system drivers might not be loaded, or the volume may be corrupt.
+ UnrecognizedVolume,
+} || os.SysCtlError;
/// `selfExePath` except allocates the result on the heap.
/// Caller owns returned memory.
@@ -500,6 +563,8 @@ pub fn selfExePathAlloc(allocator: Allocator) ![]u8 {
/// This function may return an error if the current executable
/// was deleted after spawning.
/// Returned value is a slice of out_buffer.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
///
/// On Linux, depends on procfs being mounted. If the currently executing binary has
/// been deleted, the file path looks something like `/a/b/c/exe (deleted)`.
@@ -514,15 +579,31 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {
if (rc != 0) return error.NameTooLong;
var real_path_buf: [MAX_PATH_BYTES]u8 = undefined;
- const real_path = try std.os.realpathZ(&symlink_path_buf, &real_path_buf);
+ const real_path = std.os.realpathZ(&symlink_path_buf, &real_path_buf) catch |err| switch (err) {
+ error.InvalidWtf8 => unreachable, // Windows-only
+ error.NetworkNotFound => unreachable, // Windows-only
+ else => |e| return e,
+ };
if (real_path.len > out_buffer.len) return error.NameTooLong;
const result = out_buffer[0..real_path.len];
@memcpy(result, real_path);
return result;
}
switch (builtin.os.tag) {
- .linux => return os.readlinkZ("/proc/self/exe", out_buffer),
- .solaris, .illumos => return os.readlinkZ("/proc/self/path/a.out", out_buffer),
+ .linux => return os.readlinkZ("/proc/self/exe", out_buffer) catch |err| switch (err) {
+ error.InvalidUtf8 => unreachable, // WASI-only
+ error.InvalidWtf8 => unreachable, // Windows-only
+ error.UnsupportedReparsePointType => unreachable, // Windows-only
+ error.NetworkNotFound => unreachable, // Windows-only
+ else => |e| return e,
+ },
+ .solaris, .illumos => return os.readlinkZ("/proc/self/path/a.out", out_buffer) catch |err| switch (err) {
+ error.InvalidUtf8 => unreachable, // WASI-only
+ error.InvalidWtf8 => unreachable, // Windows-only
+ error.UnsupportedReparsePointType => unreachable, // Windows-only
+ error.NetworkNotFound => unreachable, // Windows-only
+ else => |e| return e,
+ },
.freebsd, .dragonfly => {
var mib = [4]c_int{ os.CTL.KERN, os.KERN.PROC, os.KERN.PROC_PATHNAME, -1 };
var out_len: usize = out_buffer.len;
@@ -546,7 +627,11 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {
if (mem.indexOf(u8, argv0, "/") != null) {
// argv[0] is a path (relative or absolute): use realpath(3) directly
var real_path_buf: [MAX_PATH_BYTES]u8 = undefined;
- const real_path = try os.realpathZ(os.argv[0], &real_path_buf);
+ const real_path = os.realpathZ(os.argv[0], &real_path_buf) catch |err| switch (err) {
+ error.InvalidWtf8 => unreachable, // Windows-only
+ error.NetworkNotFound => unreachable, // Windows-only
+ else => |e| return e,
+ };
if (real_path.len > out_buffer.len)
return error.NameTooLong;
const result = out_buffer[0..real_path.len];
@@ -584,7 +669,10 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {
// symlink, not the path that the symlink points to. We want the path
// that the symlink points to, though, so we need to get the realpath.
const pathname_w = try os.windows.wToPrefixedFileW(null, image_path_name);
- return std.fs.cwd().realpathW(pathname_w.span(), out_buffer);
+ return std.fs.cwd().realpathW(pathname_w.span(), out_buffer) catch |err| switch (err) {
+ error.InvalidWtf8 => unreachable,
+ else => |e| return e,
+ };
},
else => @compileError("std.fs.selfExePath not supported for this target"),
}
@@ -608,6 +696,8 @@ pub fn selfExeDirPathAlloc(allocator: Allocator) ![]u8 {
/// Get the directory path that contains the current executable.
/// Returned value is a slice of out_buffer.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 {
const self_exe_path = try selfExePath(out_buffer);
// Assume that the OS APIs return absolute paths, and therefore dirname
@@ -616,6 +706,8 @@ pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 {
}
/// `realpath`, except caller must free the returned memory.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
/// See also `Dir.realpath`.
pub fn realpathAlloc(allocator: Allocator, pathname: []const u8) ![]u8 {
// Use of MAX_PATH_BYTES here is valid as the realpath function does not
@@ -641,5 +733,4 @@ test {
_ = &path;
_ = @import("fs/test.zig");
_ = @import("fs/get_app_data_dir.zig");
- _ = @import("fs/watch.zig");
}
diff --git a/lib/std/fs/Dir.zig b/lib/std/fs/Dir.zig
index 0f996affcc..ad9b467266 100644
--- a/lib/std/fs/Dir.zig
+++ b/lib/std/fs/Dir.zig
@@ -9,7 +9,14 @@ pub const Entry = struct {
pub const Kind = File.Kind;
};
-const IteratorError = error{ AccessDenied, SystemResources } || posix.UnexpectedError;
+const IteratorError = error{
+ AccessDenied,
+ SystemResources,
+ /// WASI-only. The path of an entry could not be encoded as valid UTF-8.
+ /// WASI is unable to handle paths that cannot be encoded as well-formed UTF-8.
+ /// https://github.com/WebAssembly/wasi-filesystem/issues/17#issuecomment-1430639353
+ InvalidUtf8,
+} || posix.UnexpectedError;
pub const Iterator = switch (builtin.os.tag) {
.macos, .ios, .freebsd, .netbsd, .dragonfly, .openbsd, .solaris, .illumos => struct {
@@ -62,16 +69,16 @@ pub const Iterator = switch (builtin.os.tag) {
self.end_index = @as(usize, @intCast(rc));
}
const darwin_entry = @as(*align(1) posix.system.dirent, @ptrCast(&self.buf[self.index]));
- const next_index = self.index + darwin_entry.reclen();
+ const next_index = self.index + darwin_entry.reclen;
self.index = next_index;
- const name = @as([*]u8, @ptrCast(&darwin_entry.d_name))[0..darwin_entry.d_namlen];
+ const name = @as([*]u8, @ptrCast(&darwin_entry.name))[0..darwin_entry.namlen];
- if (mem.eql(u8, name, ".") or mem.eql(u8, name, "..") or (darwin_entry.d_ino == 0)) {
+ if (mem.eql(u8, name, ".") or mem.eql(u8, name, "..") or (darwin_entry.ino == 0)) {
continue :start_over;
}
- const entry_kind: Entry.Kind = switch (darwin_entry.d_type) {
+ const entry_kind: Entry.Kind = switch (darwin_entry.type) {
posix.DT.BLK => .block_device,
posix.DT.CHR => .character_device,
posix.DT.DIR => .directory,
@@ -110,14 +117,14 @@ pub const Iterator = switch (builtin.os.tag) {
self.end_index = @as(usize, @intCast(rc));
}
const entry = @as(*align(1) posix.system.dirent, @ptrCast(&self.buf[self.index]));
- const next_index = self.index + entry.reclen();
+ const next_index = self.index + entry.reclen;
self.index = next_index;
- const name = mem.sliceTo(@as([*:0]u8, @ptrCast(&entry.d_name)), 0);
+ const name = mem.sliceTo(@as([*:0]u8, @ptrCast(&entry.name)), 0);
if (mem.eql(u8, name, ".") or mem.eql(u8, name, ".."))
continue :start_over;
- // Solaris dirent doesn't expose d_type, so we have to call stat to get it.
+ // Solaris dirent doesn't expose type, so we have to call stat to get it.
const stat_info = posix.fstatat(
self.dir.fd,
name,
@@ -174,23 +181,23 @@ pub const Iterator = switch (builtin.os.tag) {
self.end_index = @as(usize, @intCast(rc));
}
const bsd_entry = @as(*align(1) posix.system.dirent, @ptrCast(&self.buf[self.index]));
- const next_index = self.index + bsd_entry.reclen();
+ const next_index = self.index + if (@hasDecl(posix.system.dirent, "reclen")) bsd_entry.reclen() else bsd_entry.reclen;
self.index = next_index;
- const name = @as([*]u8, @ptrCast(&bsd_entry.d_name))[0..bsd_entry.d_namlen];
+ const name = @as([*]u8, @ptrCast(&bsd_entry.name))[0..bsd_entry.namlen];
const skip_zero_fileno = switch (builtin.os.tag) {
- // d_fileno=0 is used to mark invalid entries or deleted files.
+ // fileno=0 is used to mark invalid entries or deleted files.
.openbsd, .netbsd => true,
else => false,
};
if (mem.eql(u8, name, ".") or mem.eql(u8, name, "..") or
- (skip_zero_fileno and bsd_entry.d_fileno == 0))
+ (skip_zero_fileno and bsd_entry.fileno == 0))
{
continue :start_over;
}
- const entry_kind: Entry.Kind = switch (bsd_entry.d_type) {
+ const entry_kind: Entry.Kind = switch (bsd_entry.type) {
posix.DT.BLK => .block_device,
posix.DT.CHR => .character_device,
posix.DT.DIR => .directory,
@@ -256,18 +263,18 @@ pub const Iterator = switch (builtin.os.tag) {
self.end_index = @as(usize, @intCast(rc));
}
const haiku_entry = @as(*align(1) posix.system.dirent, @ptrCast(&self.buf[self.index]));
- const next_index = self.index + haiku_entry.reclen();
+ const next_index = self.index + haiku_entry.reclen;
self.index = next_index;
- const name = mem.sliceTo(@as([*:0]u8, @ptrCast(&haiku_entry.d_name)), 0);
+ const name = mem.sliceTo(@as([*:0]u8, @ptrCast(&haiku_entry.name)), 0);
- if (mem.eql(u8, name, ".") or mem.eql(u8, name, "..") or (haiku_entry.d_ino == 0)) {
+ if (mem.eql(u8, name, ".") or mem.eql(u8, name, "..") or (haiku_entry.ino == 0)) {
continue :start_over;
}
var stat_info: posix.Stat = undefined;
const rc = posix.system._kern_read_stat(
self.dir.fd,
- &haiku_entry.d_name,
+ &haiku_entry.name,
false,
&stat_info,
0,
@@ -359,17 +366,17 @@ pub const Iterator = switch (builtin.os.tag) {
self.end_index = rc;
}
const linux_entry = @as(*align(1) linux.dirent64, @ptrCast(&self.buf[self.index]));
- const next_index = self.index + linux_entry.reclen();
+ const next_index = self.index + linux_entry.reclen;
self.index = next_index;
- const name = mem.sliceTo(@as([*:0]u8, @ptrCast(&linux_entry.d_name)), 0);
+ const name = mem.sliceTo(@as([*:0]u8, @ptrCast(&linux_entry.name)), 0);
// skip . and .. entries
if (mem.eql(u8, name, ".") or mem.eql(u8, name, "..")) {
continue :start_over;
}
- const entry_kind: Entry.Kind = switch (linux_entry.d_type) {
+ const entry_kind: Entry.Kind = switch (linux_entry.type) {
linux.DT.BLK => .block_device,
linux.DT.CHR => .character_device,
linux.DT.DIR => .directory,
@@ -445,13 +452,12 @@ pub const Iterator = switch (builtin.os.tag) {
self.index = self.buf.len;
}
- const name_utf16le = @as([*]u16, @ptrCast(&dir_info.FileName))[0 .. dir_info.FileNameLength / 2];
+ const name_wtf16le = @as([*]u16, @ptrCast(&dir_info.FileName))[0 .. dir_info.FileNameLength / 2];
- if (mem.eql(u16, name_utf16le, &[_]u16{'.'}) or mem.eql(u16, name_utf16le, &[_]u16{ '.', '.' }))
+ if (mem.eql(u16, name_wtf16le, &[_]u16{'.'}) or mem.eql(u16, name_wtf16le, &[_]u16{ '.', '.' }))
continue;
- // Trust that Windows gives us valid UTF-16LE
- const name_utf8_len = std.unicode.utf16leToUtf8(self.name_data[0..], name_utf16le) catch unreachable;
- const name_utf8 = self.name_data[0..name_utf8_len];
+ const name_wtf8_len = std.unicode.wtf16LeToWtf8(self.name_data[0..], name_wtf16le);
+ const name_wtf8 = self.name_data[0..name_wtf8_len];
const kind: Entry.Kind = blk: {
const attrs = dir_info.FileAttributes;
if (attrs & w.FILE_ATTRIBUTE_DIRECTORY != 0) break :blk .directory;
@@ -459,7 +465,7 @@ pub const Iterator = switch (builtin.os.tag) {
break :blk .file;
};
return Entry{
- .name = name_utf8,
+ .name = name_wtf8,
.kind = kind,
};
}
@@ -516,6 +522,7 @@ pub const Iterator = switch (builtin.os.tag) {
.INVAL => unreachable,
.NOENT => return error.DirNotFound, // The directory being iterated was deleted during iteration.
.NOTCAPABLE => return error.AccessDenied,
+ .ILSEQ => return error.InvalidUtf8, // An entry's name cannot be encoded as UTF-8.
else => |err| return posix.unexpectedErrno(err),
}
if (bufused == 0) return null;
@@ -525,23 +532,23 @@ pub const Iterator = switch (builtin.os.tag) {
const entry = @as(*align(1) w.dirent_t, @ptrCast(&self.buf[self.index]));
const entry_size = @sizeOf(w.dirent_t);
const name_index = self.index + entry_size;
- if (name_index + entry.d_namlen > self.end_index) {
+ if (name_index + entry.namlen > self.end_index) {
// This case, the name is truncated, so we need to call readdir to store the entire name.
self.end_index = self.index; // Force fd_readdir in the next loop.
continue :start_over;
}
- const name = self.buf[name_index .. name_index + entry.d_namlen];
+ const name = self.buf[name_index .. name_index + entry.namlen];
- const next_index = name_index + entry.d_namlen;
+ const next_index = name_index + entry.namlen;
self.index = next_index;
- self.cookie = entry.d_next;
+ self.cookie = entry.next;
// skip . and .. entries
if (mem.eql(u8, name, ".") or mem.eql(u8, name, "..")) {
continue :start_over;
}
- const entry_kind: Entry.Kind = switch (entry.d_type) {
+ const entry_kind: Entry.Kind = switch (entry.type) {
.BLOCK_DEVICE => .block_device,
.CHARACTER_DEVICE => .character_device,
.DIRECTORY => .directory,
@@ -743,7 +750,11 @@ pub const OpenError = error{
SystemFdQuotaExceeded,
NoDevice,
SystemResources,
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
BadPathName,
DeviceBusy,
/// On Windows, `\\server` or `\\server\share` was not found.
@@ -751,11 +762,7 @@ pub const OpenError = error{
} || posix.UnexpectedError;
pub fn close(self: *Dir) void {
- if (fs.need_async_thread) {
- std.event.Loop.instance.?.close(self.fd);
- } else {
- posix.close(self.fd);
- }
+ posix.close(self.fd);
self.* = undefined;
}
@@ -763,89 +770,87 @@ pub fn close(self: *Dir) void {
/// To create a new file, see `createFile`.
/// Call `File.close` to release the resource.
/// Asserts that the path parameter has no null bytes.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn openFile(self: Dir, sub_path: []const u8, flags: File.OpenFlags) File.OpenError!File {
if (builtin.os.tag == .windows) {
const path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sub_path);
return self.openFileW(path_w.span(), flags);
}
- if (builtin.os.tag == .wasi and !builtin.link_libc) {
- return self.openFileWasi(sub_path, flags);
+ if (builtin.os.tag == .wasi) {
+ var base: std.os.wasi.rights_t = .{};
+ if (flags.isRead()) {
+ base.FD_READ = true;
+ base.FD_TELL = true;
+ base.FD_SEEK = true;
+ base.FD_FILESTAT_GET = true;
+ }
+ if (flags.isWrite()) {
+ base.FD_WRITE = true;
+ base.FD_TELL = true;
+ base.FD_SEEK = true;
+ base.FD_DATASYNC = true;
+ base.FD_FDSTAT_SET_FLAGS = true;
+ base.FD_SYNC = true;
+ base.FD_ALLOCATE = true;
+ base.FD_ADVISE = true;
+ base.FD_FILESTAT_SET_TIMES = true;
+ base.FD_FILESTAT_SET_SIZE = true;
+ }
+ const fd = try posix.openatWasi(self.fd, sub_path, .{}, .{}, .{}, base, .{});
+ return .{ .handle = fd };
}
const path_c = try posix.toPosixPath(sub_path);
return self.openFileZ(&path_c, flags);
}
-/// Same as `openFile` but WASI only.
-pub fn openFileWasi(self: Dir, sub_path: []const u8, flags: File.OpenFlags) File.OpenError!File {
- const w = std.os.wasi;
- var fdflags: w.fdflags_t = 0x0;
- var base: w.rights_t = 0x0;
- if (flags.isRead()) {
- base |= w.RIGHT.FD_READ | w.RIGHT.FD_TELL | w.RIGHT.FD_SEEK | w.RIGHT.FD_FILESTAT_GET;
- }
- if (flags.isWrite()) {
- fdflags |= w.FDFLAG.APPEND;
- base |= w.RIGHT.FD_WRITE |
- w.RIGHT.FD_TELL |
- w.RIGHT.FD_SEEK |
- w.RIGHT.FD_DATASYNC |
- w.RIGHT.FD_FDSTAT_SET_FLAGS |
- w.RIGHT.FD_SYNC |
- w.RIGHT.FD_ALLOCATE |
- w.RIGHT.FD_ADVISE |
- w.RIGHT.FD_FILESTAT_SET_TIMES |
- w.RIGHT.FD_FILESTAT_SET_SIZE;
- }
- const fd = try posix.openatWasi(self.fd, sub_path, 0x0, 0x0, fdflags, base, 0x0);
- return File{ .handle = fd };
-}
-
/// Same as `openFile` but the path parameter is null-terminated.
pub fn openFileZ(self: Dir, sub_path: [*:0]const u8, flags: File.OpenFlags) File.OpenError!File {
- if (builtin.os.tag == .windows) {
- const path_w = try std.os.windows.cStrToPrefixedFileW(self.fd, sub_path);
- return self.openFileW(path_w.span(), flags);
+ switch (builtin.os.tag) {
+ .windows => {
+ const path_w = try std.os.windows.cStrToPrefixedFileW(self.fd, sub_path);
+ return self.openFileW(path_w.span(), flags);
+ },
+ .wasi => {
+ return openFile(self, mem.sliceTo(sub_path, 0), flags);
+ },
+ else => {},
}
- var os_flags: u32 = 0;
- if (@hasDecl(posix.O, "CLOEXEC")) os_flags = posix.O.CLOEXEC;
+ var os_flags: posix.O = .{
+ .ACCMODE = switch (flags.mode) {
+ .read_only => .RDONLY,
+ .write_only => .WRONLY,
+ .read_write => .RDWR,
+ },
+ };
+ if (@hasField(posix.O, "CLOEXEC")) os_flags.CLOEXEC = true;
+ if (@hasField(posix.O, "LARGEFILE")) os_flags.LARGEFILE = true;
+ if (@hasField(posix.O, "NOCTTY")) os_flags.NOCTTY = !flags.allow_ctty;
// Use the O locking flags if the os supports them to acquire the lock
// atomically.
- const has_flock_open_flags = @hasDecl(posix.O, "EXLOCK");
+ const has_flock_open_flags = @hasField(posix.O, "EXLOCK");
if (has_flock_open_flags) {
- // Note that the O.NONBLOCK flag is removed after the openat() call
+ // Note that the NONBLOCK flag is removed after the openat() call
// is successful.
- const nonblocking_lock_flag: u32 = if (flags.lock_nonblocking)
- posix.O.NONBLOCK
- else
- 0;
- os_flags |= switch (flags.lock) {
- .none => @as(u32, 0),
- .shared => posix.O.SHLOCK | nonblocking_lock_flag,
- .exclusive => posix.O.EXLOCK | nonblocking_lock_flag,
- };
- }
- if (@hasDecl(posix.O, "LARGEFILE")) {
- os_flags |= posix.O.LARGEFILE;
- }
- if (@hasDecl(posix.O, "NOCTTY") and !flags.allow_ctty) {
- os_flags |= posix.O.NOCTTY;
+ switch (flags.lock) {
+ .none => {},
+ .shared => {
+ os_flags.SHLOCK = true;
+ os_flags.NONBLOCK = flags.lock_nonblocking;
+ },
+ .exclusive => {
+ os_flags.EXLOCK = true;
+ os_flags.NONBLOCK = flags.lock_nonblocking;
+ },
+ }
}
- os_flags |= switch (flags.mode) {
- .read_only => @as(u32, posix.O.RDONLY),
- .write_only => @as(u32, posix.O.WRONLY),
- .read_write => @as(u32, posix.O.RDWR),
- };
- const fd = if (flags.intended_io_mode != .blocking)
- try std.event.Loop.instance.?.openatZ(self.fd, sub_path, os_flags, 0)
- else
- try posix.openatZ(self.fd, sub_path, os_flags, 0);
+ const fd = try posix.openatZ(self.fd, sub_path, os_flags, 0);
errdefer posix.close(fd);
- // WASI doesn't have posix.flock so we intetinally check OS prior to the inner if block
- // since it is not compiltime-known and we need to avoid undefined symbol in Wasm.
- if (@hasDecl(posix.system, "LOCK") and builtin.target.os.tag != .wasi) {
+ if (@hasDecl(posix.system, "LOCK")) {
if (!has_flock_open_flags and flags.lock != .none) {
// TODO: integrate async I/O
const lock_nonblocking: i32 = if (flags.lock_nonblocking) posix.LOCK.NB else 0;
@@ -866,7 +871,7 @@ pub fn openFileZ(self: Dir, sub_path: [*:0]const u8, flags: File.OpenFlags) File
error.LockedRegionLimitExceeded => unreachable,
else => |e| return e,
};
- fl_flags &= ~@as(usize, posix.O.NONBLOCK);
+ fl_flags &= ~@as(usize, 1 << @bitOffsetOf(posix.O, "NONBLOCK"));
_ = posix.fcntl(fd, posix.F.SETFL, fl_flags) catch |err| switch (err) {
error.FileBusy => unreachable,
error.Locked => unreachable,
@@ -877,11 +882,7 @@ pub fn openFileZ(self: Dir, sub_path: [*:0]const u8, flags: File.OpenFlags) File
};
}
- return File{
- .handle = fd,
- .capable_io_mode = .blocking,
- .intended_io_mode = flags.intended_io_mode,
- };
+ return .{ .handle = fd };
}
/// Same as `openFile` but Windows-only and the path parameter is
@@ -895,10 +896,7 @@ pub fn openFileW(self: Dir, sub_path_w: []const u16, flags: File.OpenFlags) File
(if (flags.isRead()) @as(u32, w.GENERIC_READ) else 0) |
(if (flags.isWrite()) @as(u32, w.GENERIC_WRITE) else 0),
.creation = w.FILE_OPEN,
- .io_mode = flags.intended_io_mode,
}),
- .capable_io_mode = std.io.default_mode,
- .intended_io_mode = flags.intended_io_mode,
};
errdefer file.close();
var io: w.IO_STATUS_BLOCK = undefined;
@@ -927,91 +925,89 @@ pub fn openFileW(self: Dir, sub_path_w: []const u16, flags: File.OpenFlags) File
/// Creates, opens, or overwrites a file with write access.
/// Call `File.close` on the result when done.
/// Asserts that the path parameter has no null bytes.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn createFile(self: Dir, sub_path: []const u8, flags: File.CreateFlags) File.OpenError!File {
if (builtin.os.tag == .windows) {
const path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sub_path);
return self.createFileW(path_w.span(), flags);
}
- if (builtin.os.tag == .wasi and !builtin.link_libc) {
- return self.createFileWasi(sub_path, flags);
+ if (builtin.os.tag == .wasi) {
+ return .{
+ .handle = try posix.openatWasi(self.fd, sub_path, .{}, .{
+ .CREAT = true,
+ .TRUNC = flags.truncate,
+ .EXCL = flags.exclusive,
+ }, .{}, .{
+ .FD_READ = flags.read,
+ .FD_WRITE = true,
+ .FD_DATASYNC = true,
+ .FD_SEEK = true,
+ .FD_TELL = true,
+ .FD_FDSTAT_SET_FLAGS = true,
+ .FD_SYNC = true,
+ .FD_ALLOCATE = true,
+ .FD_ADVISE = true,
+ .FD_FILESTAT_SET_TIMES = true,
+ .FD_FILESTAT_SET_SIZE = true,
+ .FD_FILESTAT_GET = true,
+ }, .{}),
+ };
}
const path_c = try posix.toPosixPath(sub_path);
return self.createFileZ(&path_c, flags);
}
-/// Same as `createFile` but WASI only.
-pub fn createFileWasi(self: Dir, sub_path: []const u8, flags: File.CreateFlags) File.OpenError!File {
- const w = std.os.wasi;
- var oflags = w.O.CREAT;
- var base: w.rights_t = w.RIGHT.FD_WRITE |
- w.RIGHT.FD_DATASYNC |
- w.RIGHT.FD_SEEK |
- w.RIGHT.FD_TELL |
- w.RIGHT.FD_FDSTAT_SET_FLAGS |
- w.RIGHT.FD_SYNC |
- w.RIGHT.FD_ALLOCATE |
- w.RIGHT.FD_ADVISE |
- w.RIGHT.FD_FILESTAT_SET_TIMES |
- w.RIGHT.FD_FILESTAT_SET_SIZE |
- w.RIGHT.FD_FILESTAT_GET;
- if (flags.read) {
- base |= w.RIGHT.FD_READ;
- }
- if (flags.truncate) {
- oflags |= w.O.TRUNC;
- }
- if (flags.exclusive) {
- oflags |= w.O.EXCL;
- }
- const fd = try posix.openatWasi(self.fd, sub_path, 0x0, oflags, 0x0, base, 0x0);
- return File{ .handle = fd };
-}
-
/// Same as `createFile` but the path parameter is null-terminated.
pub fn createFileZ(self: Dir, sub_path_c: [*:0]const u8, flags: File.CreateFlags) File.OpenError!File {
- if (builtin.os.tag == .windows) {
- const path_w = try std.os.windows.cStrToPrefixedFileW(self.fd, sub_path_c);
- return self.createFileW(path_w.span(), flags);
+ switch (builtin.os.tag) {
+ .windows => {
+ const path_w = try std.os.windows.cStrToPrefixedFileW(self.fd, sub_path_c);
+ return self.createFileW(path_w.span(), flags);
+ },
+ .wasi => {
+ return createFile(self, mem.sliceTo(sub_path_c, 0), flags);
+ },
+ else => {},
}
+ var os_flags: std.os.O = .{
+ .ACCMODE = if (flags.read) .RDWR else .WRONLY,
+ .CREAT = true,
+ .TRUNC = flags.truncate,
+ .EXCL = flags.exclusive,
+ };
+ if (@hasField(posix.O, "LARGEFILE")) os_flags.LARGEFILE = true;
+ if (@hasField(posix.O, "CLOEXEC")) os_flags.CLOEXEC = true;
+
// Use the O locking flags if the os supports them to acquire the lock
- // atomically.
- const has_flock_open_flags = @hasDecl(posix.O, "EXLOCK");
- // Note that the O.NONBLOCK flag is removed after the openat() call
- // is successful.
- const nonblocking_lock_flag: u32 = if (has_flock_open_flags and flags.lock_nonblocking)
- posix.O.NONBLOCK
- else
- 0;
- const lock_flag: u32 = if (has_flock_open_flags) switch (flags.lock) {
- .none => @as(u32, 0),
- .shared => posix.O.SHLOCK | nonblocking_lock_flag,
- .exclusive => posix.O.EXLOCK | nonblocking_lock_flag,
- } else 0;
-
- const O_LARGEFILE = if (@hasDecl(posix.O, "LARGEFILE")) posix.O.LARGEFILE else 0;
- const os_flags = lock_flag | O_LARGEFILE | posix.O.CREAT | posix.O.CLOEXEC |
- (if (flags.truncate) @as(u32, posix.O.TRUNC) else 0) |
- (if (flags.read) @as(u32, posix.O.RDWR) else posix.O.WRONLY) |
- (if (flags.exclusive) @as(u32, posix.O.EXCL) else 0);
- const fd = if (flags.intended_io_mode != .blocking)
- try std.event.Loop.instance.?.openatZ(self.fd, sub_path_c, os_flags, flags.mode)
- else
- try posix.openatZ(self.fd, sub_path_c, os_flags, flags.mode);
+ // atomically. Note that the NONBLOCK flag is removed after the openat()
+ // call is successful.
+ const has_flock_open_flags = @hasField(posix.O, "EXLOCK");
+ if (has_flock_open_flags) switch (flags.lock) {
+ .none => {},
+ .shared => {
+ os_flags.SHLOCK = true;
+ os_flags.NONBLOCK = flags.lock_nonblocking;
+ },
+ .exclusive => {
+ os_flags.EXLOCK = true;
+ os_flags.NONBLOCK = flags.lock_nonblocking;
+ },
+ };
+
+ const fd = try posix.openatZ(self.fd, sub_path_c, os_flags, flags.mode);
errdefer posix.close(fd);
- // WASI doesn't have posix.flock so we intetinally check OS prior to the inner if block
- // since it is not compiltime-known and we need to avoid undefined symbol in Wasm.
- if (builtin.target.os.tag != .wasi) {
- if (!has_flock_open_flags and flags.lock != .none) {
- // TODO: integrate async I/O
- const lock_nonblocking: i32 = if (flags.lock_nonblocking) posix.LOCK.NB else 0;
- try posix.flock(fd, switch (flags.lock) {
- .none => unreachable,
- .shared => posix.LOCK.SH | lock_nonblocking,
- .exclusive => posix.LOCK.EX | lock_nonblocking,
- });
- }
+ if (!has_flock_open_flags and flags.lock != .none) {
+ // TODO: integrate async I/O
+ const lock_nonblocking: i32 = if (flags.lock_nonblocking) posix.LOCK.NB else 0;
+ try posix.flock(fd, switch (flags.lock) {
+ .none => unreachable,
+ .shared => posix.LOCK.SH | lock_nonblocking,
+ .exclusive => posix.LOCK.EX | lock_nonblocking,
+ });
}
if (has_flock_open_flags and flags.lock_nonblocking) {
@@ -1023,7 +1019,7 @@ pub fn createFileZ(self: Dir, sub_path_c: [*:0]const u8, flags: File.CreateFlags
error.LockedRegionLimitExceeded => unreachable,
else => |e| return e,
};
- fl_flags &= ~@as(usize, posix.O.NONBLOCK);
+ fl_flags &= ~@as(usize, 1 << @bitOffsetOf(posix.O, "NONBLOCK"));
_ = posix.fcntl(fd, posix.F.SETFL, fl_flags) catch |err| switch (err) {
error.FileBusy => unreachable,
error.Locked => unreachable,
@@ -1034,11 +1030,7 @@ pub fn createFileZ(self: Dir, sub_path_c: [*:0]const u8, flags: File.CreateFlags
};
}
- return File{
- .handle = fd,
- .capable_io_mode = .blocking,
- .intended_io_mode = flags.intended_io_mode,
- };
+ return .{ .handle = fd };
}
/// Same as `createFile` but Windows-only and the path parameter is
@@ -1056,10 +1048,7 @@ pub fn createFileW(self: Dir, sub_path_w: []const u16, flags: File.CreateFlags)
@as(u32, w.FILE_OVERWRITE_IF)
else
@as(u32, w.FILE_OPEN_IF),
- .io_mode = flags.intended_io_mode,
}),
- .capable_io_mode = std.io.default_mode,
- .intended_io_mode = flags.intended_io_mode,
};
errdefer file.close();
var io: w.IO_STATUS_BLOCK = undefined;
@@ -1088,18 +1077,21 @@ pub fn createFileW(self: Dir, sub_path_w: []const u16, flags: File.CreateFlags)
/// Creates a single directory with a relative or absolute path.
/// To create multiple directories to make an entire path, see `makePath`.
/// To operate on only absolute paths, see `makeDirAbsolute`.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn makeDir(self: Dir, sub_path: []const u8) !void {
try posix.mkdirat(self.fd, sub_path, default_mode);
}
-/// Creates a single directory with a relative or absolute null-terminated UTF-8-encoded path.
+/// Same as `makeDir`, but `sub_path` is null-terminated.
/// To create multiple directories to make an entire path, see `makePath`.
/// To operate on only absolute paths, see `makeDirAbsoluteZ`.
pub fn makeDirZ(self: Dir, sub_path: [*:0]const u8) !void {
try posix.mkdiratZ(self.fd, sub_path, default_mode);
}
-/// Creates a single directory with a relative or absolute null-terminated WTF-16-encoded path.
+/// Creates a single directory with a relative or absolute null-terminated WTF-16 LE-encoded path.
/// To create multiple directories to make an entire path, see `makePath`.
/// To operate on only absolute paths, see `makeDirAbsoluteW`.
pub fn makeDirW(self: Dir, sub_path: [*:0]const u16) !void {
@@ -1111,6 +1103,9 @@ pub fn makeDirW(self: Dir, sub_path: [*:0]const u16) !void {
/// Returns success if the path already exists and is a directory.
/// This function is not atomic, and if it returns an error, the file system may
/// have been modified regardless.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
///
/// Paths containing `..` components are handled differently depending on the platform:
/// - On Windows, `..` are resolved before the path is passed to NtCreateFile, meaning
@@ -1147,16 +1142,17 @@ pub fn makePath(self: Dir, sub_path: []const u8) !void {
}
}
-/// Calls makeOpenDirAccessMaskW iteratively to make an entire path
+/// Windows only. Calls makeOpenDirAccessMaskW iteratively to make an entire path
/// (i.e. creating any parent directories that do not exist).
/// Opens the dir if the path already exists and is a directory.
/// This function is not atomic, and if it returns an error, the file system may
/// have been modified regardless.
+/// `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
fn makeOpenPathAccessMaskW(self: Dir, sub_path: []const u8, access_mask: u32, no_follow: bool) OpenError!Dir {
const w = std.os.windows;
var it = try fs.path.componentIterator(sub_path);
// If there are no components in the path, then create a dummy component with the full path.
- var component = it.last() orelse fs.path.NativeUtf8ComponentIterator.Component{
+ var component = it.last() orelse fs.path.NativeComponentIterator.Component{
.name = "",
.path = sub_path,
};
@@ -1184,7 +1180,9 @@ fn makeOpenPathAccessMaskW(self: Dir, sub_path: []const u8, access_mask: u32, no
/// This function performs `makePath`, followed by `openDir`.
/// If supported by the OS, this operation is atomic. It is not atomic on
/// all operating systems.
-/// On Windows, this function performs `makeOpenPathAccessMaskW`.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn makeOpenPath(self: Dir, sub_path: []const u8, open_dir_options: OpenDirOptions) !Dir {
return switch (builtin.os.tag) {
.windows => {
@@ -1207,14 +1205,20 @@ pub fn makeOpenPath(self: Dir, sub_path: []const u8, open_dir_options: OpenDirOp
};
}
+pub const RealPathError = posix.RealPathError;
+
/// This function returns the canonicalized absolute pathname of
/// `pathname` relative to this `Dir`. If `pathname` is absolute, ignores this
/// `Dir` handle and returns the canonicalized absolute pathname of `pathname`
/// argument.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
/// This function is not universally supported by all platforms.
/// Currently supported hosts are: Linux, macOS, and Windows.
/// See also `Dir.realpathZ`, `Dir.realpathW`, and `Dir.realpathAlloc`.
-pub fn realpath(self: Dir, pathname: []const u8, out_buffer: []u8) ![]u8 {
+pub fn realpath(self: Dir, pathname: []const u8, out_buffer: []u8) RealPathError![]u8 {
if (builtin.os.tag == .wasi) {
@compileError("realpath is not available on WASI");
}
@@ -1228,18 +1232,29 @@ pub fn realpath(self: Dir, pathname: []const u8, out_buffer: []u8) ![]u8 {
/// Same as `Dir.realpath` except `pathname` is null-terminated.
/// See also `Dir.realpath`, `realpathZ`.
-pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) ![]u8 {
+pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) RealPathError![]u8 {
if (builtin.os.tag == .windows) {
const pathname_w = try posix.windows.cStrToPrefixedFileW(self.fd, pathname);
return self.realpathW(pathname_w.span(), out_buffer);
}
- const flags = if (builtin.os.tag == .linux)
- posix.O.PATH | posix.O.NONBLOCK | posix.O.CLOEXEC
- else
- posix.O.NONBLOCK | posix.O.CLOEXEC;
+ const flags: posix.O = switch (builtin.os.tag) {
+ .linux => .{
+ .NONBLOCK = true,
+ .CLOEXEC = true,
+ .PATH = true,
+ },
+ else => .{
+ .NONBLOCK = true,
+ .CLOEXEC = true,
+ },
+ };
+
const fd = posix.openatZ(self.fd, pathname, flags, 0) catch |err| switch (err) {
- error.FileLocksNotSupported => unreachable,
+ error.FileLocksNotSupported => return error.Unexpected,
+ error.FileBusy => return error.Unexpected,
+ error.WouldBlock => return error.Unexpected,
+ error.InvalidUtf8 => unreachable, // WASI-only
else => |e| return e,
};
defer posix.close(fd);
@@ -1262,9 +1277,10 @@ pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) ![]u8 {
return result;
}
-/// Windows-only. Same as `Dir.realpath` except `pathname` is WTF16 encoded.
+/// Windows-only. Same as `Dir.realpath` except `pathname` is WTF16 LE encoded.
+/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// See also `Dir.realpath`, `realpathW`.
-pub fn realpathW(self: Dir, pathname: []const u16, out_buffer: []u8) ![]u8 {
+pub fn realpathW(self: Dir, pathname: []const u16, out_buffer: []u8) RealPathError![]u8 {
const w = std.os.windows;
const access_mask = w.GENERIC_READ | w.SYNCHRONIZE;
@@ -1276,7 +1292,6 @@ pub fn realpathW(self: Dir, pathname: []const u16, out_buffer: []u8) ![]u8 {
.access_mask = access_mask,
.share_access = share_access,
.creation = creation,
- .io_mode = .blocking,
.filter = .any,
}) catch |err| switch (err) {
error.WouldBlock => unreachable,
@@ -1286,27 +1301,22 @@ pub fn realpathW(self: Dir, pathname: []const u16, out_buffer: []u8) ![]u8 {
};
defer w.CloseHandle(h_file);
- // Use of MAX_PATH_BYTES here is valid as the realpath function does not
- // have a variant that takes an arbitrary-size buffer.
- // TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008
- // NULL out parameter (GNU's canonicalize_file_name) to handle overelong
- // paths. musl supports passing NULL but restricts the output to PATH_MAX
- // anyway.
- var buffer: [fs.MAX_PATH_BYTES]u8 = undefined;
- const out_path = try posix.getFdPath(h_file, &buffer);
-
- if (out_path.len > out_buffer.len) {
+ var wide_buf: [w.PATH_MAX_WIDE]u16 = undefined;
+ const wide_slice = try w.GetFinalPathNameByHandle(h_file, .{}, &wide_buf);
+ var big_out_buf: [fs.MAX_PATH_BYTES]u8 = undefined;
+ const end_index = std.unicode.wtf16LeToWtf8(&big_out_buf, wide_slice);
+ if (end_index > out_buffer.len)
return error.NameTooLong;
- }
-
- const result = out_buffer[0..out_path.len];
- @memcpy(result, out_path);
+ const result = out_buffer[0..end_index];
+ @memcpy(result, big_out_buf[0..end_index]);
return result;
}
+pub const RealPathAllocError = RealPathError || Allocator.Error;
+
/// Same as `Dir.realpath` except caller must free the returned memory.
/// See also `Dir.realpath`.
-pub fn realpathAlloc(self: Dir, allocator: Allocator, pathname: []const u8) ![]u8 {
+pub fn realpathAlloc(self: Dir, allocator: Allocator, pathname: []const u8) RealPathAllocError![]u8 {
// Use of MAX_PATH_BYTES here is valid as the realpath function does not
// have a variant that takes an arbitrary-size buffer.
// TODO(#4812): Consider reimplementing realpath or using the POSIX.1-2008
@@ -1357,82 +1367,94 @@ pub const OpenDirOptions = struct {
/// open until `close` is called on the result.
/// The directory cannot be iterated unless the `iterate` option is set to `true`.
///
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
/// Asserts that the path parameter has no null bytes.
pub fn openDir(self: Dir, sub_path: []const u8, args: OpenDirOptions) OpenError!Dir {
- if (builtin.os.tag == .windows) {
- const sub_path_w = try posix.windows.sliceToPrefixedFileW(self.fd, sub_path);
- return self.openDirW(sub_path_w.span().ptr, args);
- } else if (builtin.os.tag == .wasi and !builtin.link_libc) {
- return self.openDirWasi(sub_path, args);
- } else {
- const sub_path_c = try posix.toPosixPath(sub_path);
- return self.openDirZ(&sub_path_c, args);
- }
-}
+ switch (builtin.os.tag) {
+ .windows => {
+ const sub_path_w = try posix.windows.sliceToPrefixedFileW(self.fd, sub_path);
+ return self.openDirW(sub_path_w.span().ptr, args);
+ },
+ .wasi => {
+ var base: std.os.wasi.rights_t = .{
+ .FD_FILESTAT_GET = true,
+ .FD_FDSTAT_SET_FLAGS = true,
+ .FD_FILESTAT_SET_TIMES = true,
+ };
+ if (args.access_sub_paths) {
+ base.FD_READDIR = true;
+ base.PATH_CREATE_DIRECTORY = true;
+ base.PATH_CREATE_FILE = true;
+ base.PATH_LINK_SOURCE = true;
+ base.PATH_LINK_TARGET = true;
+ base.PATH_OPEN = true;
+ base.PATH_READLINK = true;
+ base.PATH_RENAME_SOURCE = true;
+ base.PATH_RENAME_TARGET = true;
+ base.PATH_FILESTAT_GET = true;
+ base.PATH_FILESTAT_SET_SIZE = true;
+ base.PATH_FILESTAT_SET_TIMES = true;
+ base.PATH_SYMLINK = true;
+ base.PATH_REMOVE_DIRECTORY = true;
+ base.PATH_UNLINK_FILE = true;
+ }
-/// Same as `openDir` except only WASI.
-pub fn openDirWasi(self: Dir, sub_path: []const u8, args: OpenDirOptions) OpenError!Dir {
- const w = std.os.wasi;
- var base: w.rights_t = w.RIGHT.FD_FILESTAT_GET | w.RIGHT.FD_FDSTAT_SET_FLAGS | w.RIGHT.FD_FILESTAT_SET_TIMES;
- if (args.access_sub_paths) {
- base |= w.RIGHT.FD_READDIR |
- w.RIGHT.PATH_CREATE_DIRECTORY |
- w.RIGHT.PATH_CREATE_FILE |
- w.RIGHT.PATH_LINK_SOURCE |
- w.RIGHT.PATH_LINK_TARGET |
- w.RIGHT.PATH_OPEN |
- w.RIGHT.PATH_READLINK |
- w.RIGHT.PATH_RENAME_SOURCE |
- w.RIGHT.PATH_RENAME_TARGET |
- w.RIGHT.PATH_FILESTAT_GET |
- w.RIGHT.PATH_FILESTAT_SET_SIZE |
- w.RIGHT.PATH_FILESTAT_SET_TIMES |
- w.RIGHT.PATH_SYMLINK |
- w.RIGHT.PATH_REMOVE_DIRECTORY |
- w.RIGHT.PATH_UNLINK_FILE;
+ const result = posix.openatWasi(
+ self.fd,
+ sub_path,
+ .{ .SYMLINK_FOLLOW = !args.no_follow },
+ .{ .DIRECTORY = true },
+ .{},
+ base,
+ base,
+ );
+ const fd = result catch |err| switch (err) {
+ error.FileTooBig => unreachable, // can't happen for directories
+ error.IsDir => unreachable, // we're setting DIRECTORY
+ error.NoSpaceLeft => unreachable, // not setting CREAT
+ error.PathAlreadyExists => unreachable, // not setting CREAT
+ error.FileLocksNotSupported => unreachable, // locking folders is not supported
+ error.WouldBlock => unreachable, // can't happen for directories
+ error.FileBusy => unreachable, // can't happen for directories
+ else => |e| return e,
+ };
+ return .{ .fd = fd };
+ },
+ else => {
+ const sub_path_c = try posix.toPosixPath(sub_path);
+ return self.openDirZ(&sub_path_c, args);
+ },
}
- const symlink_flags: w.lookupflags_t = if (args.no_follow) 0x0 else w.LOOKUP_SYMLINK_FOLLOW;
- // TODO do we really need all the rights here?
- const inheriting: w.rights_t = w.RIGHT.ALL ^ w.RIGHT.SOCK_SHUTDOWN;
-
- const result = posix.openatWasi(
- self.fd,
- sub_path,
- symlink_flags,
- w.O.DIRECTORY,
- 0x0,
- base,
- inheriting,
- );
- const fd = result catch |err| switch (err) {
- error.FileTooBig => unreachable, // can't happen for directories
- error.IsDir => unreachable, // we're providing O.DIRECTORY
- error.NoSpaceLeft => unreachable, // not providing O.CREAT
- error.PathAlreadyExists => unreachable, // not providing O.CREAT
- error.FileLocksNotSupported => unreachable, // locking folders is not supported
- error.WouldBlock => unreachable, // can't happen for directories
- error.FileBusy => unreachable, // can't happen for directories
- else => |e| return e,
- };
- return Dir{ .fd = fd };
}
/// Same as `openDir` except the parameter is null-terminated.
pub fn openDirZ(self: Dir, sub_path_c: [*:0]const u8, args: OpenDirOptions) OpenError!Dir {
- if (builtin.os.tag == .windows) {
- const sub_path_w = try std.os.windows.cStrToPrefixedFileW(self.fd, sub_path_c);
- return self.openDirW(sub_path_w.span().ptr, args);
- }
- const symlink_flags: u32 = if (args.no_follow) posix.O.NOFOLLOW else 0x0;
- if (!args.iterate) {
- const O_PATH = if (@hasDecl(posix.O, "PATH")) posix.O.PATH else 0;
- return self.openDirFlagsZ(sub_path_c, posix.O.DIRECTORY | posix.O.RDONLY | posix.O.CLOEXEC | O_PATH | symlink_flags);
- } else {
- return self.openDirFlagsZ(sub_path_c, posix.O.DIRECTORY | posix.O.RDONLY | posix.O.CLOEXEC | symlink_flags);
+ switch (builtin.os.tag) {
+ .windows => {
+ const sub_path_w = try std.os.windows.cStrToPrefixedFileW(self.fd, sub_path_c);
+ return self.openDirW(sub_path_w.span().ptr, args);
+ },
+ .wasi => {
+ return openDir(self, mem.sliceTo(sub_path_c, 0), args);
+ },
+ else => {
+ var symlink_flags: posix.O = .{
+ .ACCMODE = .RDONLY,
+ .NOFOLLOW = args.no_follow,
+ .DIRECTORY = true,
+ .CLOEXEC = true,
+ };
+ if (@hasField(posix.O, "PATH") and !args.iterate)
+ symlink_flags.PATH = true;
+
+ return self.openDirFlagsZ(sub_path_c, symlink_flags);
+ },
}
}
-/// Same as `openDir` except the path parameter is WTF-16 encoded, NT-prefixed.
+/// Same as `openDir` except the path parameter is WTF-16 LE encoded, NT-prefixed.
/// This function asserts the target OS is Windows.
pub fn openDirW(self: Dir, sub_path_w: [*:0]const u16, args: OpenDirOptions) OpenError!Dir {
const w = std.os.windows;
@@ -1447,17 +1469,14 @@ pub fn openDirW(self: Dir, sub_path_w: [*:0]const u16, args: OpenDirOptions) Ope
return dir;
}
-/// `flags` must contain `posix.O.DIRECTORY`.
-fn openDirFlagsZ(self: Dir, sub_path_c: [*:0]const u8, flags: u32) OpenError!Dir {
- const result = if (fs.need_async_thread)
- std.event.Loop.instance.?.openatZ(self.fd, sub_path_c, flags, 0)
- else
- posix.openatZ(self.fd, sub_path_c, flags, 0);
- const fd = result catch |err| switch (err) {
+/// Asserts `flags` has `DIRECTORY` set.
+fn openDirFlagsZ(self: Dir, sub_path_c: [*:0]const u8, flags: posix.O) OpenError!Dir {
+ assert(flags.DIRECTORY);
+ const fd = posix.openatZ(self.fd, sub_path_c, flags, 0) catch |err| switch (err) {
error.FileTooBig => unreachable, // can't happen for directories
- error.IsDir => unreachable, // we're providing O.DIRECTORY
- error.NoSpaceLeft => unreachable, // not providing O.CREAT
- error.PathAlreadyExists => unreachable, // not providing O.CREAT
+ error.IsDir => unreachable, // we're setting DIRECTORY
+ error.NoSpaceLeft => unreachable, // not setting CREAT
+ error.PathAlreadyExists => unreachable, // not setting CREAT
error.FileLocksNotSupported => unreachable, // locking folders is not supported
error.WouldBlock => unreachable, // can't happen for directories
error.FileBusy => unreachable, // can't happen for directories
@@ -1525,6 +1544,9 @@ fn makeOpenDirAccessMaskW(self: Dir, sub_path_w: [*:0]const u16, access_mask: u3
pub const DeleteFileError = posix.UnlinkError;
/// Delete a file name and possibly the file it refers to, based on an open directory handle.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
/// Asserts that the path parameter has no null bytes.
pub fn deleteFile(self: Dir, sub_path: []const u8) DeleteFileError!void {
if (builtin.os.tag == .windows) {
@@ -1560,7 +1582,7 @@ pub fn deleteFileZ(self: Dir, sub_path_c: [*:0]const u8) DeleteFileError!void {
};
}
-/// Same as `deleteFile` except the parameter is WTF-16 encoded.
+/// Same as `deleteFile` except the parameter is WTF-16 LE encoded.
pub fn deleteFileW(self: Dir, sub_path_w: []const u16) DeleteFileError!void {
posix.unlinkatW(self.fd, sub_path_w, 0) catch |err| switch (err) {
error.DirNotEmpty => unreachable, // not passing AT.REMOVEDIR
@@ -1579,7 +1601,11 @@ pub const DeleteDirError = error{
NotDir,
SystemResources,
ReadOnlyFileSystem,
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
BadPathName,
/// On Windows, `\\server` or `\\server\share` was not found.
NetworkNotFound,
@@ -1588,6 +1614,9 @@ pub const DeleteDirError = error{
/// Returns `error.DirNotEmpty` if the directory is not empty.
/// To delete a directory recursively, see `deleteTree`.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
/// Asserts that the path parameter has no null bytes.
pub fn deleteDir(self: Dir, sub_path: []const u8) DeleteDirError!void {
if (builtin.os.tag == .windows) {
@@ -1612,7 +1641,7 @@ pub fn deleteDirZ(self: Dir, sub_path_c: [*:0]const u8) DeleteDirError!void {
};
}
-/// Same as `deleteDir` except the parameter is UTF16LE, NT prefixed.
+/// Same as `deleteDir` except the parameter is WTF16LE, NT prefixed.
/// This function is Windows-only.
pub fn deleteDirW(self: Dir, sub_path_w: []const u16) DeleteDirError!void {
posix.unlinkatW(self.fd, sub_path_w, posix.AT.REMOVEDIR) catch |err| switch (err) {
@@ -1627,6 +1656,9 @@ pub const RenameError = posix.RenameError;
/// If new_sub_path already exists, it will be replaced.
/// Renaming a file over an existing directory or a directory
/// over an existing file will fail with `error.IsDir` or `error.NotDir`
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn rename(self: Dir, old_sub_path: []const u8, new_sub_path: []const u8) RenameError!void {
return posix.renameat(self.fd, old_sub_path, self.fd, new_sub_path);
}
@@ -1636,7 +1668,7 @@ pub fn renameZ(self: Dir, old_sub_path_z: [*:0]const u8, new_sub_path_z: [*:0]co
return posix.renameatZ(self.fd, old_sub_path_z, self.fd, new_sub_path_z);
}
-/// Same as `rename` except the parameters are UTF16LE, NT prefixed.
+/// Same as `rename` except the parameters are WTF16LE, NT prefixed.
/// This function is Windows-only.
pub fn renameW(self: Dir, old_sub_path_w: []const u16, new_sub_path_w: []const u16) RenameError!void {
return posix.renameatW(self.fd, old_sub_path_w, self.fd, new_sub_path_w);
@@ -1654,6 +1686,9 @@ pub const SymLinkFlags = struct {
/// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent
/// one; the latter case is known as a dangling link.
/// If `sym_link_path` exists, it will not be overwritten.
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn symLink(
self: Dir,
target_path: []const u8,
@@ -1669,7 +1704,7 @@ pub fn symLink(
// when converting to an NT namespaced path. CreateSymbolicLink in
// symLinkW will handle the necessary conversion.
var target_path_w: std.os.windows.PathSpace = undefined;
- target_path_w.len = try std.unicode.utf8ToUtf16Le(&target_path_w.data, target_path);
+ target_path_w.len = try std.unicode.wtf8ToWtf16Le(&target_path_w.data, target_path);
target_path_w.data[target_path_w.len] = 0;
const sym_link_path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sym_link_path);
return self.symLinkW(target_path_w.span(), sym_link_path_w.span(), flags);
@@ -1705,7 +1740,7 @@ pub fn symLinkZ(
}
/// Windows-only. Same as `symLink` except the pathname parameters
-/// are null-terminated, WTF16 encoded.
+/// are WTF16 LE encoded.
pub fn symLinkW(
self: Dir,
/// WTF-16, does not need to be NT-prefixed. The NT-prefixing
@@ -1723,6 +1758,9 @@ pub const ReadLinkError = posix.ReadLinkError;
/// Read value of a symbolic link.
/// The return value is a slice of `buffer`, from index `0`.
/// Asserts that the path parameter has no null bytes.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ReadLinkError![]u8 {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return self.readLinkWasi(sub_path, buffer);
@@ -1740,7 +1778,7 @@ pub fn readLinkWasi(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 {
return posix.readlinkat(self.fd, sub_path, buffer);
}
-/// Same as `readLink`, except the `pathname` parameter is null-terminated.
+/// Same as `readLink`, except the `sub_path_c` parameter is null-terminated.
pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {
if (builtin.os.tag == .windows) {
const sub_path_w = try std.os.windows.cStrToPrefixedFileW(self.fd, sub_path_c);
@@ -1750,7 +1788,7 @@ pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {
}
/// Windows-only. Same as `readLink` except the pathname parameter
-/// is null-terminated, WTF16 encoded.
+/// is WTF16 LE encoded.
pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 {
return std.os.windows.ReadLink(self.fd, sub_path_w, buffer);
}
@@ -1760,6 +1798,9 @@ pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 {
/// the situation is ambiguous. It could either mean that the entire file was read, and
/// it exactly fits the buffer, or it could mean the buffer was not big enough for the
/// entire file.
+/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `file_path` should be encoded as valid UTF-8.
+/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
pub fn readFile(self: Dir, file_path: []const u8, buffer: []u8) ![]u8 {
var file = try self.openFile(file_path, .{});
defer file.close();
@@ -1770,6 +1811,9 @@ pub fn readFile(self: Dir, file_path: []const u8, buffer: []u8) ![]u8 {
/// On success, caller owns returned buffer.
/// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
+/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `file_path` should be encoded as valid UTF-8.
+/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
pub fn readFileAlloc(self: Dir, allocator: mem.Allocator, file_path: []const u8, max_bytes: usize) ![]u8 {
return self.readFileAllocOptions(allocator, file_path, max_bytes, null, @alignOf(u8), null);
}
@@ -1779,6 +1823,9 @@ pub fn readFileAlloc(self: Dir, allocator: mem.Allocator, file_path: []const u8,
/// If `size_hint` is specified the initial buffer size is calculated using
/// that value, otherwise the effective file size is used instead.
/// Allows specifying alignment and a sentinel value.
+/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `file_path` should be encoded as valid UTF-8.
+/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
pub fn readFileAllocOptions(
self: Dir,
allocator: mem.Allocator,
@@ -1818,9 +1865,13 @@ pub const DeleteTreeError = error{
/// This error is unreachable if `sub_path` does not contain a path separator.
NotDir,
- /// On Windows, file paths must be valid Unicode.
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
+
/// On Windows, file paths cannot contain these characters:
/// '/', '*', '?', '"', '<', '>', '|'
BadPathName,
@@ -1833,6 +1884,9 @@ pub const DeleteTreeError = error{
/// removes it. If it cannot be removed because it is a non-empty directory,
/// this function recursively removes its entries and then tries again.
/// This operation is not atomic on most file systems.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
var initial_iterable_dir = (try self.deleteTreeOpenInitialSubpath(sub_path, .file)) orelse return;
@@ -1886,6 +1940,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
error.SystemResources,
error.Unexpected,
error.InvalidUtf8,
+ error.InvalidWtf8,
error.BadPathName,
error.NetworkNotFound,
error.DeviceBusy,
@@ -1917,6 +1972,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
error.AccessDenied,
error.InvalidUtf8,
+ error.InvalidWtf8,
error.SymLinkLoop,
error.NameTooLong,
error.SystemResources,
@@ -1980,6 +2036,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
error.SystemResources,
error.Unexpected,
error.InvalidUtf8,
+ error.InvalidWtf8,
error.BadPathName,
error.NetworkNotFound,
error.DeviceBusy,
@@ -2001,6 +2058,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
error.AccessDenied,
error.InvalidUtf8,
+ error.InvalidWtf8,
error.SymLinkLoop,
error.NameTooLong,
error.SystemResources,
@@ -2029,6 +2087,9 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
/// Like `deleteTree`, but only keeps one `Iterator` active at a time to minimize the function's stack size.
/// This is slower than `deleteTree` but uses less stack space.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn deleteTreeMinStackSize(self: Dir, sub_path: []const u8) DeleteTreeError!void {
return self.deleteTreeMinStackSizeWithKindHint(sub_path, .file);
}
@@ -2081,6 +2142,7 @@ fn deleteTreeMinStackSizeWithKindHint(self: Dir, sub_path: []const u8, kind_hint
error.SystemResources,
error.Unexpected,
error.InvalidUtf8,
+ error.InvalidWtf8,
error.BadPathName,
error.NetworkNotFound,
error.DeviceBusy,
@@ -2109,6 +2171,7 @@ fn deleteTreeMinStackSizeWithKindHint(self: Dir, sub_path: []const u8, kind_hint
error.AccessDenied,
error.InvalidUtf8,
+ error.InvalidWtf8,
error.SymLinkLoop,
error.NameTooLong,
error.SystemResources,
@@ -2178,6 +2241,7 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File
error.SystemResources,
error.Unexpected,
error.InvalidUtf8,
+ error.InvalidWtf8,
error.BadPathName,
error.DeviceBusy,
error.NetworkNotFound,
@@ -2196,6 +2260,7 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File
error.AccessDenied,
error.InvalidUtf8,
+ error.InvalidWtf8,
error.SymLinkLoop,
error.NameTooLong,
error.SystemResources,
@@ -2216,6 +2281,9 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File
pub const WriteFileError = File.WriteError || File.OpenError;
/// Deprecated: use `writeFile2`.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn writeFile(self: Dir, sub_path: []const u8, data: []const u8) WriteFileError!void {
return writeFile2(self, .{
.sub_path = sub_path,
@@ -2225,6 +2293,9 @@ pub fn writeFile(self: Dir, sub_path: []const u8, data: []const u8) WriteFileErr
}
pub const WriteFileOptions = struct {
+ /// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+ /// On WASI, `sub_path` should be encoded as valid UTF-8.
+ /// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
sub_path: []const u8,
data: []const u8,
flags: File.CreateFlags = .{},
@@ -2239,8 +2310,10 @@ pub fn writeFile2(self: Dir, options: WriteFileOptions) WriteFileError!void {
pub const AccessError = posix.AccessError;
-/// Test accessing `path`.
-/// `path` is UTF-8-encoded.
+/// Test accessing `sub_path`.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
/// Be careful of Time-Of-Check-Time-Of-Use race conditions when using this function.
/// For example, instead of testing if a file exists and then opening it, just
/// open it and handle the error for file not found.
@@ -2270,17 +2343,14 @@ pub fn accessZ(self: Dir, sub_path: [*:0]const u8, flags: File.OpenFlags) Access
.write_only => @as(u32, posix.W_OK),
.read_write => @as(u32, posix.R_OK | posix.W_OK),
};
- const result = if (fs.need_async_thread and flags.intended_io_mode != .blocking)
- std.event.Loop.instance.?.faccessatZ(self.fd, sub_path, os_mode, 0)
- else
- posix.faccessatZ(self.fd, sub_path, os_mode, 0);
+ const result = posix.faccessatZ(self.fd, sub_path, os_mode, 0);
return result;
}
/// Same as `access` except asserts the target OS is Windows and the path parameter is
-/// * WTF-16 encoded
+/// * WTF-16 LE encoded
/// * null-terminated
-/// * NtDll prefixed
+/// * relative or has the NT namespace prefix
/// TODO currently this ignores `flags`.
pub fn accessW(self: Dir, sub_path_w: [*:0]const u16, flags: File.OpenFlags) AccessError!void {
_ = flags;
@@ -2302,6 +2372,9 @@ pub const PrevStatus = enum {
/// atime, and mode of the source file so that the next call to `updateFile` will not need a copy.
/// Returns the previous status of the file before updating.
/// If any of the directories do not exist for dest_path, they are created.
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn updateFile(
source_dir: Dir,
source_path: []const u8,
@@ -2353,6 +2426,9 @@ pub const CopyFileError = File.OpenError || File.StatError ||
/// On Linux, until https://patchwork.kernel.org/patch/9636735/ is merged and readily available,
/// there is a possibility of power loss or application termination leaving temporary files present
/// in the same directory as dest_path.
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn copyFile(
source_dir: Dir,
source_path: []const u8,
@@ -2440,6 +2516,9 @@ pub const AtomicFileOptions = struct {
/// Always call `AtomicFile.deinit` to clean up, regardless of whether
/// `AtomicFile.finish` succeeded. `dest_path` must remain valid until
/// `AtomicFile.deinit` is called.
+/// On Windows, `dest_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `dest_path` should be encoded as valid UTF-8.
+/// On other platforms, `dest_path` is an opaque sequence of bytes with no particular encoding.
pub fn atomicFile(self: Dir, dest_path: []const u8, options: AtomicFileOptions) !AtomicFile {
if (fs.path.dirname(dest_path)) |dirname| {
const dir = if (options.make_path)
@@ -2457,10 +2536,7 @@ pub const Stat = File.Stat;
pub const StatError = File.StatError;
pub fn stat(self: Dir) StatError!Stat {
- const file: File = .{
- .handle = self.fd,
- .capable_io_mode = .blocking,
- };
+ const file: File = .{ .handle = self.fd };
return file.stat();
}
@@ -2474,6 +2550,9 @@ pub const StatFileError = File.OpenError || File.StatError || posix.FStatAtError
/// Symlinks are followed.
///
/// `sub_path` may be absolute, in which case `self` is ignored.
+/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn statFile(self: Dir, sub_path: []const u8) StatFileError!Stat {
if (builtin.os.tag == .windows) {
var file = try self.openFile(sub_path, .{});
@@ -2481,8 +2560,8 @@ pub fn statFile(self: Dir, sub_path: []const u8) StatFileError!Stat {
return file.stat();
}
if (builtin.os.tag == .wasi and !builtin.link_libc) {
- const st = try posix.fstatatWasi(self.fd, sub_path, posix.wasi.LOOKUP_SYMLINK_FOLLOW);
- return Stat.fromSystem(st);
+ const st = try posix.fstatat_wasi(self.fd, sub_path, .{ .SYMLINK_FOLLOW = true });
+ return Stat.fromWasi(st);
}
const st = try posix.fstatat(self.fd, sub_path, 0);
return Stat.fromSystem(st);
@@ -2496,10 +2575,7 @@ pub const ChmodError = File.ChmodError;
/// of the directory. Additionally, the directory must have been opened
/// with `OpenDirOptions{ .iterate = true }`.
pub fn chmod(self: Dir, new_mode: File.Mode) ChmodError!void {
- const file: File = .{
- .handle = self.fd,
- .capable_io_mode = .blocking,
- };
+ const file: File = .{ .handle = self.fd };
try file.chmod(new_mode);
}
@@ -2510,10 +2586,7 @@ pub fn chmod(self: Dir, new_mode: File.Mode) ChmodError!void {
/// must have been opened with `OpenDirOptions{ .iterate = true }`. If the
/// owner or group is specified as `null`, the ID is not changed.
pub fn chown(self: Dir, owner: ?File.Uid, group: ?File.Gid) ChownError!void {
- const file: File = .{
- .handle = self.fd,
- .capable_io_mode = .blocking,
- };
+ const file: File = .{ .handle = self.fd };
try file.chown(owner, group);
}
@@ -2525,10 +2598,7 @@ pub const SetPermissionsError = File.SetPermissionsError;
/// Sets permissions according to the provided `Permissions` struct.
/// This method is *NOT* available on WASI
pub fn setPermissions(self: Dir, permissions: Permissions) SetPermissionsError!void {
- const file: File = .{
- .handle = self.fd,
- .capable_io_mode = .blocking,
- };
+ const file: File = .{ .handle = self.fd };
try file.setPermissions(permissions);
}
@@ -2537,10 +2607,7 @@ pub const MetadataError = File.MetadataError;
/// Returns a `Metadata` struct, representing the permissions on the directory
pub fn metadata(self: Dir) MetadataError!Metadata {
- const file: File = .{
- .handle = self.fd,
- .capable_io_mode = .blocking,
- };
+ const file: File = .{ .handle = self.fd };
return try file.metadata();
}
@@ -2554,3 +2621,4 @@ const posix = std.os;
const mem = std.mem;
const fs = std.fs;
const Allocator = std.mem.Allocator;
+const assert = std.debug.assert;
diff --git a/lib/std/fs/File.zig b/lib/std/fs/File.zig
index b286380c85..669f1b72e3 100644
--- a/lib/std/fs/File.zig
+++ b/lib/std/fs/File.zig
@@ -1,20 +1,6 @@
/// The OS-specific file descriptor or file handle.
handle: Handle,
-/// On some systems, such as Linux, file system file descriptors are incapable
-/// of non-blocking I/O. This forces us to perform asynchronous I/O on a dedicated thread,
-/// to achieve non-blocking file-system I/O. To do this, `File` must be aware of whether
-/// it is a file system file descriptor, or, more specifically, whether the I/O is always
-/// blocking.
-capable_io_mode: io.ModeOverride = io.default_mode,
-
-/// Furthermore, even when `std.options.io_mode` is async, it is still sometimes desirable
-/// to perform blocking I/O, although not by default. For example, when printing a
-/// stack trace to stderr. This field tracks both by acting as an overriding I/O mode.
-/// When not building in async I/O mode, the type only has the `.blocking` tag, making
-/// it a zero-bit type.
-intended_io_mode: io.ModeOverride = io.default_mode,
-
pub const Handle = posix.fd_t;
pub const Mode = posix.mode_t;
pub const INode = posix.ino_t;
@@ -54,14 +40,23 @@ pub const OpenError = error{
AccessDenied,
PipeBusy,
NameTooLong,
- /// On Windows, file paths must be valid Unicode.
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
/// On Windows, file paths cannot contain these characters:
/// '/', '*', '?', '"', '<', '>', '|'
BadPathName,
Unexpected,
/// On Windows, `\\server` or `\\server\share` was not found.
NetworkNotFound,
+ /// On Windows, antivirus software is enabled by default. It can be
+ /// disabled, but Windows Update sometimes ignores the user's preference
+ /// and re-enables it. When enabled, antivirus software on Windows
+ /// intercepts file system operations and makes them significantly slower
+ /// in addition to possibly failing with this error code.
+ AntivirusInterference,
} || posix.OpenError || posix.FlockError;
pub const OpenMode = enum {
@@ -108,16 +103,8 @@ pub const OpenFlags = struct {
/// Sets whether or not to wait until the file is locked to return. If set to true,
/// `error.WouldBlock` will be returned. Otherwise, the file will wait until the file
/// is available to proceed.
- /// In async I/O mode, non-blocking at the OS level is
- /// determined by `intended_io_mode`, and `true` means `error.WouldBlock` is returned,
- /// and `false` means `error.WouldBlock` is handled by the event loop.
lock_nonblocking: bool = false,
- /// Setting this to `.blocking` prevents `O.NONBLOCK` from being passed even
- /// if `std.io.is_async`. It allows the use of `nosuspend` when calling functions
- /// related to opening the file, reading, writing, and locking.
- intended_io_mode: io.ModeOverride = io.default_mode,
-
/// Set this to allow the opened file to automatically become the
/// controlling TTY for the current process.
allow_ctty: bool = false,
@@ -172,19 +159,11 @@ pub const CreateFlags = struct {
/// Sets whether or not to wait until the file is locked to return. If set to true,
/// `error.WouldBlock` will be returned. Otherwise, the file will wait until the file
/// is available to proceed.
- /// In async I/O mode, non-blocking at the OS level is
- /// determined by `intended_io_mode`, and `true` means `error.WouldBlock` is returned,
- /// and `false` means `error.WouldBlock` is handled by the event loop.
lock_nonblocking: bool = false,
/// For POSIX systems this is the file system mode the file will
/// be created with. On other systems this is always 0.
mode: Mode = default_mode,
-
- /// Setting this to `.blocking` prevents `O.NONBLOCK` from being passed even
- /// if `std.io.is_async`. It allows the use of `nosuspend` when calling functions
- /// related to opening the file, reading, writing, and locking.
- intended_io_mode: io.ModeOverride = io.default_mode,
};
/// Upon success, the stream is in an uninitialized state. To continue using it,
@@ -192,8 +171,6 @@ pub const CreateFlags = struct {
pub fn close(self: File) void {
if (is_windows) {
windows.CloseHandle(self.handle);
- } else if (self.capable_io_mode != self.intended_io_mode) {
- std.event.Loop.instance.?.close(self.handle);
} else {
posix.close(self.handle);
}
@@ -322,49 +299,59 @@ pub const Stat = struct {
/// Last status/metadata change time in nanoseconds, relative to UTC 1970-01-01.
ctime: i128,
- pub fn fromSystem(st: posix.system.Stat) Stat {
+ pub fn fromSystem(st: posix.Stat) Stat {
const atime = st.atime();
const mtime = st.mtime();
const ctime = st.ctime();
- const kind: Kind = if (builtin.os.tag == .wasi and !builtin.link_libc) switch (st.filetype) {
- .BLOCK_DEVICE => .block_device,
- .CHARACTER_DEVICE => .character_device,
- .DIRECTORY => .directory,
- .SYMBOLIC_LINK => .sym_link,
- .REGULAR_FILE => .file,
- .SOCKET_STREAM, .SOCKET_DGRAM => .unix_domain_socket,
- else => .unknown,
- } else blk: {
- const m = st.mode & posix.S.IFMT;
- switch (m) {
- posix.S.IFBLK => break :blk .block_device,
- posix.S.IFCHR => break :blk .character_device,
- posix.S.IFDIR => break :blk .directory,
- posix.S.IFIFO => break :blk .named_pipe,
- posix.S.IFLNK => break :blk .sym_link,
- posix.S.IFREG => break :blk .file,
- posix.S.IFSOCK => break :blk .unix_domain_socket,
- else => {},
- }
- if (builtin.os.tag.isSolarish()) switch (m) {
- posix.S.IFDOOR => break :blk .door,
- posix.S.IFPORT => break :blk .event_port,
- else => {},
- };
-
- break :blk .unknown;
- };
-
- return Stat{
+ return .{
.inode = st.ino,
- .size = @as(u64, @bitCast(st.size)),
+ .size = @bitCast(st.size),
.mode = st.mode,
- .kind = kind,
+ .kind = k: {
+ const m = st.mode & posix.S.IFMT;
+ switch (m) {
+ posix.S.IFBLK => break :k .block_device,
+ posix.S.IFCHR => break :k .character_device,
+ posix.S.IFDIR => break :k .directory,
+ posix.S.IFIFO => break :k .named_pipe,
+ posix.S.IFLNK => break :k .sym_link,
+ posix.S.IFREG => break :k .file,
+ posix.S.IFSOCK => break :k .unix_domain_socket,
+ else => {},
+ }
+ if (builtin.os.tag.isSolarish()) switch (m) {
+ posix.S.IFDOOR => break :k .door,
+ posix.S.IFPORT => break :k .event_port,
+ else => {},
+ };
+
+ break :k .unknown;
+ },
.atime = @as(i128, atime.tv_sec) * std.time.ns_per_s + atime.tv_nsec,
.mtime = @as(i128, mtime.tv_sec) * std.time.ns_per_s + mtime.tv_nsec,
.ctime = @as(i128, ctime.tv_sec) * std.time.ns_per_s + ctime.tv_nsec,
};
}
+
+ pub fn fromWasi(st: std.os.wasi.filestat_t) Stat {
+ return .{
+ .inode = st.ino,
+ .size = @bitCast(st.size),
+ .mode = 0,
+ .kind = switch (st.filetype) {
+ .BLOCK_DEVICE => .block_device,
+ .CHARACTER_DEVICE => .character_device,
+ .DIRECTORY => .directory,
+ .SYMBOLIC_LINK => .sym_link,
+ .REGULAR_FILE => .file,
+ .SOCKET_STREAM, .SOCKET_DGRAM => .unix_domain_socket,
+ else => .unknown,
+ },
+ .atime = st.atim,
+ .mtime = st.mtim,
+ .ctime = st.ctim,
+ };
+ }
};
pub const StatError = posix.FStatError;
@@ -387,7 +374,7 @@ pub fn stat(self: File) StatError!Stat {
.ACCESS_DENIED => return error.AccessDenied,
else => return windows.unexpectedStatus(rc),
}
- return Stat{
+ return .{
.inode = info.InternalInformation.IndexNumber,
.size = @as(u64, @bitCast(info.StandardInformation.EndOfFile)),
.mode = 0,
@@ -417,6 +404,11 @@ pub fn stat(self: File) StatError!Stat {
};
}
+ if (builtin.os.tag == .wasi and !builtin.link_libc) {
+ const st = try posix.fstat_wasi(self.handle);
+ return Stat.fromWasi(st);
+ }
+
const st = try posix.fstat(self.handle);
return Stat.fromSystem(st);
}
@@ -608,10 +600,11 @@ pub fn setPermissions(self: File, permissions: Permissions) SetPermissionsError!
/// Cross-platform representation of file metadata.
/// Platform-specific functionality is available through the `inner` field.
pub const Metadata = struct {
- /// You may use the `inner` field to use platform-specific functionality
+ /// Exposes platform-specific functionality.
inner: switch (builtin.os.tag) {
.windows => MetadataWindows,
.linux => MetadataLinux,
+ .wasi => MetadataWasi,
else => MetadataUnix,
},
@@ -660,12 +653,12 @@ pub const MetadataUnix = struct {
/// Returns the size of the file
pub fn size(self: Self) u64 {
- return @as(u64, @intCast(self.stat.size));
+ return @intCast(self.stat.size);
}
/// Returns a `Permissions` struct, representing the permissions on the file
pub fn permissions(self: Self) Permissions {
- return Permissions{ .inner = PermissionsUnix{ .mode = self.stat.mode } };
+ return .{ .inner = .{ .mode = self.stat.mode } };
}
/// Returns the `Kind` of the file
@@ -788,6 +781,42 @@ pub const MetadataLinux = struct {
}
};
+pub const MetadataWasi = struct {
+ stat: std.os.wasi.filestat_t,
+
+ pub fn size(self: @This()) u64 {
+ return self.stat.size;
+ }
+
+ pub fn permissions(self: @This()) Permissions {
+ return .{ .inner = .{ .mode = self.stat.mode } };
+ }
+
+ pub fn kind(self: @This()) Kind {
+ return switch (self.stat.filetype) {
+ .BLOCK_DEVICE => .block_device,
+ .CHARACTER_DEVICE => .character_device,
+ .DIRECTORY => .directory,
+ .SYMBOLIC_LINK => .sym_link,
+ .REGULAR_FILE => .file,
+ .SOCKET_STREAM, .SOCKET_DGRAM => .unix_domain_socket,
+ else => .unknown,
+ };
+ }
+
+ pub fn accessed(self: @This()) i128 {
+ return self.stat.atim;
+ }
+
+ pub fn modified(self: @This()) i128 {
+ return self.stat.mtim;
+ }
+
+ pub fn created(self: @This()) ?i128 {
+ return self.stat.ctim;
+ }
+};
+
pub const MetadataWindows = struct {
attributes: windows.DWORD,
reparse_tag: windows.DWORD,
@@ -805,7 +834,7 @@ pub const MetadataWindows = struct {
/// Returns a `Permissions` struct, representing the permissions on the file
pub fn permissions(self: Self) Permissions {
- return Permissions{ .inner = PermissionsWindows{ .attributes = self.attributes } };
+ return .{ .inner = .{ .attributes = self.attributes } };
}
/// Returns the `Kind` of the file.
@@ -843,7 +872,7 @@ pub const MetadataWindows = struct {
pub const MetadataError = posix.FStatError;
pub fn metadata(self: File) MetadataError!Metadata {
- return Metadata{
+ return .{
.inner = switch (builtin.os.tag) {
.windows => blk: {
var io_status_block: windows.IO_STATUS_BLOCK = undefined;
@@ -878,7 +907,7 @@ pub fn metadata(self: File) MetadataError!Metadata {
break :reparse_blk 0;
};
- break :blk MetadataWindows{
+ break :blk .{
.attributes = info.BasicInformation.FileAttributes,
.reparse_tag = reparse_tag,
._size = @as(u64, @bitCast(info.StandardInformation.EndOfFile)),
@@ -919,16 +948,12 @@ pub fn metadata(self: File) MetadataError!Metadata {
else => |err| return posix.unexpectedErrno(err),
}
- break :blk MetadataLinux{
+ break :blk .{
.statx = stx,
};
},
- else => blk: {
- const st = try posix.fstat(self.handle);
- break :blk MetadataUnix{
- .stat = st,
- };
- },
+ .wasi => .{ .stat = try posix.fstat_wasi(self.handle) },
+ else => .{ .stat = try posix.fstat(self.handle) },
},
};
}
@@ -1013,14 +1038,10 @@ pub const PReadError = posix.PReadError;
pub fn read(self: File, buffer: []u8) ReadError!usize {
if (is_windows) {
- return windows.ReadFile(self.handle, buffer, null, self.intended_io_mode);
+ return windows.ReadFile(self.handle, buffer, null);
}
- if (self.intended_io_mode == .blocking) {
- return posix.read(self.handle, buffer);
- } else {
- return std.event.Loop.instance.?.read(self.handle, buffer, self.capable_io_mode != self.intended_io_mode);
- }
+ return posix.read(self.handle, buffer);
}
/// Returns the number of bytes read. If the number read is smaller than `buffer.len`, it
@@ -1039,14 +1060,10 @@ pub fn readAll(self: File, buffer: []u8) ReadError!usize {
/// https://github.com/ziglang/zig/issues/12783
pub fn pread(self: File, buffer: []u8, offset: u64) PReadError!usize {
if (is_windows) {
- return windows.ReadFile(self.handle, buffer, offset, self.intended_io_mode);
+ return windows.ReadFile(self.handle, buffer, offset);
}
- if (self.intended_io_mode == .blocking) {
- return posix.pread(self.handle, buffer, offset);
- } else {
- return std.event.Loop.instance.?.pread(self.handle, buffer, offset, self.capable_io_mode != self.intended_io_mode);
- }
+ return posix.pread(self.handle, buffer, offset);
}
/// Returns the number of bytes read. If the number read is smaller than `buffer.len`, it
@@ -1069,14 +1086,10 @@ pub fn readv(self: File, iovecs: []const posix.iovec) ReadError!usize {
// TODO improve this to use ReadFileScatter
if (iovecs.len == 0) return @as(usize, 0);
const first = iovecs[0];
- return windows.ReadFile(self.handle, first.iov_base[0..first.iov_len], null, self.intended_io_mode);
+ return windows.ReadFile(self.handle, first.iov_base[0..first.iov_len], null);
}
- if (self.intended_io_mode == .blocking) {
- return posix.readv(self.handle, iovecs);
- } else {
- return std.event.Loop.instance.?.readv(self.handle, iovecs, self.capable_io_mode != self.intended_io_mode);
- }
+ return posix.readv(self.handle, iovecs);
}
/// Returns the number of bytes read. If the number read is smaller than the total bytes
@@ -1129,14 +1142,10 @@ pub fn preadv(self: File, iovecs: []const posix.iovec, offset: u64) PReadError!u
// TODO improve this to use ReadFileScatter
if (iovecs.len == 0) return @as(usize, 0);
const first = iovecs[0];
- return windows.ReadFile(self.handle, first.iov_base[0..first.iov_len], offset, self.intended_io_mode);
+ return windows.ReadFile(self.handle, first.iov_base[0..first.iov_len], offset);
}
- if (self.intended_io_mode == .blocking) {
- return posix.preadv(self.handle, iovecs, offset);
- } else {
- return std.event.Loop.instance.?.preadv(self.handle, iovecs, offset, self.capable_io_mode != self.intended_io_mode);
- }
+ return posix.preadv(self.handle, iovecs, offset);
}
/// Returns the number of bytes read. If the number read is smaller than the total bytes
@@ -1173,14 +1182,10 @@ pub const PWriteError = posix.PWriteError;
pub fn write(self: File, bytes: []const u8) WriteError!usize {
if (is_windows) {
- return windows.WriteFile(self.handle, bytes, null, self.intended_io_mode);
+ return windows.WriteFile(self.handle, bytes, null);
}
- if (self.intended_io_mode == .blocking) {
- return posix.write(self.handle, bytes);
- } else {
- return std.event.Loop.instance.?.write(self.handle, bytes, self.capable_io_mode != self.intended_io_mode);
- }
+ return posix.write(self.handle, bytes);
}
pub fn writeAll(self: File, bytes: []const u8) WriteError!void {
@@ -1194,14 +1199,10 @@ pub fn writeAll(self: File, bytes: []const u8) WriteError!void {
/// https://github.com/ziglang/zig/issues/12783
pub fn pwrite(self: File, bytes: []const u8, offset: u64) PWriteError!usize {
if (is_windows) {
- return windows.WriteFile(self.handle, bytes, offset, self.intended_io_mode);
+ return windows.WriteFile(self.handle, bytes, offset);
}
- if (self.intended_io_mode == .blocking) {
- return posix.pwrite(self.handle, bytes, offset);
- } else {
- return std.event.Loop.instance.?.pwrite(self.handle, bytes, offset, self.capable_io_mode != self.intended_io_mode);
- }
+ return posix.pwrite(self.handle, bytes, offset);
}
/// On Windows, this function currently does alter the file pointer.
@@ -1220,14 +1221,10 @@ pub fn writev(self: File, iovecs: []const posix.iovec_const) WriteError!usize {
// TODO improve this to use WriteFileScatter
if (iovecs.len == 0) return @as(usize, 0);
const first = iovecs[0];
- return windows.WriteFile(self.handle, first.iov_base[0..first.iov_len], null, self.intended_io_mode);
+ return windows.WriteFile(self.handle, first.iov_base[0..first.iov_len], null);
}
- if (self.intended_io_mode == .blocking) {
- return posix.writev(self.handle, iovecs);
- } else {
- return std.event.Loop.instance.?.writev(self.handle, iovecs, self.capable_io_mode != self.intended_io_mode);
- }
+ return posix.writev(self.handle, iovecs);
}
/// The `iovecs` parameter is mutable because:
@@ -1271,14 +1268,10 @@ pub fn pwritev(self: File, iovecs: []posix.iovec_const, offset: u64) PWriteError
// TODO improve this to use WriteFileScatter
if (iovecs.len == 0) return @as(usize, 0);
const first = iovecs[0];
- return windows.WriteFile(self.handle, first.iov_base[0..first.iov_len], offset, self.intended_io_mode);
+ return windows.WriteFile(self.handle, first.iov_base[0..first.iov_len], offset);
}
- if (self.intended_io_mode == .blocking) {
- return posix.pwritev(self.handle, iovecs, offset);
- } else {
- return std.event.Loop.instance.?.pwritev(self.handle, iovecs, offset, self.capable_io_mode != self.intended_io_mode);
- }
+ return posix.pwritev(self.handle, iovecs, offset);
}
/// The `iovecs` parameter is mutable because this function needs to mutate the fields in
diff --git a/lib/std/fs/path.zig b/lib/std/fs/path.zig
index 55d4490053..c297bb1e2a 100644
--- a/lib/std/fs/path.zig
+++ b/lib/std/fs/path.zig
@@ -1,3 +1,17 @@
+//! POSIX paths are arbitrary sequences of `u8` with no particular encoding.
+//!
+//! Windows paths are arbitrary sequences of `u16` (WTF-16).
+//! For cross-platform APIs that deal with sequences of `u8`, Windows
+//! paths are encoded by Zig as [WTF-8](https://simonsapin.github.io/wtf-8/).
+//! WTF-8 is a superset of UTF-8 that allows encoding surrogate codepoints,
+//! which enables lossless roundtripping when converting to/from WTF-16
+//! (as long as the WTF-8 encoded surrogate codepoints do not form a pair).
+//!
+//! WASI paths are sequences of valid Unicode scalar values,
+//! which means that WASI is unable to handle paths that cannot be
+//! encoded as well-formed UTF-8/UTF-16.
+//! https://github.com/WebAssembly/wasi-filesystem/issues/17#issuecomment-1430639353
+
const builtin = @import("builtin");
const std = @import("../std.zig");
const debug = std.debug;
@@ -438,7 +452,7 @@ fn networkShareServersEql(ns1: []const u8, ns2: []const u8) bool {
var it1 = mem.tokenizeScalar(u8, ns1, sep1);
var it2 = mem.tokenizeScalar(u8, ns2, sep2);
- return windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?);
+ return windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?);
}
fn compareDiskDesignators(kind: WindowsPath.Kind, p1: []const u8, p2: []const u8) bool {
@@ -458,7 +472,7 @@ fn compareDiskDesignators(kind: WindowsPath.Kind, p1: []const u8, p2: []const u8
var it1 = mem.tokenizeScalar(u8, p1, sep1);
var it2 = mem.tokenizeScalar(u8, p2, sep2);
- return windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?) and windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?);
+ return windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?) and windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?);
},
}
}
@@ -1099,7 +1113,7 @@ pub fn relativeWindows(allocator: Allocator, from: []const u8, to: []const u8) !
const from_component = from_it.next() orelse return allocator.dupe(u8, to_it.rest());
const to_rest = to_it.rest();
if (to_it.next()) |to_component| {
- if (windows.eqlIgnoreCaseUtf8(from_component, to_component))
+ if (windows.eqlIgnoreCaseWtf8(from_component, to_component))
continue;
}
var up_index_end = "..".len;
@@ -1564,14 +1578,14 @@ pub fn ComponentIterator(comptime path_type: PathType, comptime T: type) type {
};
}
-pub const NativeUtf8ComponentIterator = ComponentIterator(switch (native_os) {
+pub const NativeComponentIterator = ComponentIterator(switch (native_os) {
.windows => .windows,
.uefi => .uefi,
else => .posix,
}, u8);
-pub fn componentIterator(path: []const u8) !NativeUtf8ComponentIterator {
- return NativeUtf8ComponentIterator.init(path);
+pub fn componentIterator(path: []const u8) !NativeComponentIterator {
+ return NativeComponentIterator.init(path);
}
test "ComponentIterator posix" {
@@ -1826,7 +1840,7 @@ test "ComponentIterator windows" {
}
}
-test "ComponentIterator windows UTF-16" {
+test "ComponentIterator windows WTF-16" {
// TODO: Fix on big endian architectures
if (builtin.cpu.arch.endian() != .little) {
return error.SkipZigTest;
@@ -1925,3 +1939,18 @@ test "ComponentIterator roots" {
try std.testing.expectEqualStrings("//a/b//", it.root().?);
}
}
+
+/// Format a path encoded as bytes for display as UTF-8.
+/// Returns a Formatter for the given path. The path will be converted to valid UTF-8
+/// during formatting. This is a lossy conversion if the path contains any ill-formed UTF-8.
+/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)
+/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of
+/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder
+pub const fmtAsUtf8Lossy = std.unicode.fmtUtf8;
+
+/// Format a path encoded as WTF-16 LE for display as UTF-8.
+/// Return a Formatter for a (potentially ill-formed) UTF-16 LE path.
+/// The path will be converted to valid UTF-8 during formatting. This is
+/// a lossy conversion if the path contains any unpaired surrogates.
+/// Unpaired surrogates are replaced by the replacement character (U+FFFD).
+pub const fmtWtf16LeAsUtf8Lossy = std.unicode.fmtUtf16Le;
diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig
index 63102e49d2..33bea3c322 100644
--- a/lib/std/fs/test.zig
+++ b/lib/std/fs/test.zig
@@ -26,39 +26,39 @@ const PathType = enum {
}
pub const TransformError = std.os.RealPathError || error{OutOfMemory};
- pub const TransformFn = fn (allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8;
+ pub const TransformFn = fn (allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8;
pub fn getTransformFn(comptime path_type: PathType) TransformFn {
switch (path_type) {
.relative => return struct {
- fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 {
+ fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 {
_ = allocator;
_ = dir;
return relative_path;
}
}.transform,
.absolute => return struct {
- fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 {
+ fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 {
// The final path may not actually exist which would cause realpath to fail.
// So instead, we get the path of the dir and join it with the relative path.
var fd_path_buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const dir_path = try os.getFdPath(dir.fd, &fd_path_buf);
- return fs.path.join(allocator, &.{ dir_path, relative_path });
+ return fs.path.joinZ(allocator, &.{ dir_path, relative_path });
}
}.transform,
.unc => return struct {
- fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 {
+ fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 {
// Any drive absolute path (C:\foo) can be converted into a UNC path by
// using '127.0.0.1' as the server name and '<drive letter>$' as the share name.
var fd_path_buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const dir_path = try os.getFdPath(dir.fd, &fd_path_buf);
const windows_path_type = std.os.windows.getUnprefixedPathType(u8, dir_path);
switch (windows_path_type) {
- .unc_absolute => return fs.path.join(allocator, &.{ dir_path, relative_path }),
+ .unc_absolute => return fs.path.joinZ(allocator, &.{ dir_path, relative_path }),
.drive_absolute => {
// `C:\<...>` -> `\\127.0.0.1\C$\<...>`
const prepended = "\\\\127.0.0.1\\";
- var path = try fs.path.join(allocator, &.{ prepended, dir_path, relative_path });
+ var path = try fs.path.joinZ(allocator, &.{ prepended, dir_path, relative_path });
path[prepended.len + 1] = '$';
return path;
},
@@ -96,7 +96,7 @@ const TestContext = struct {
/// Returns the `relative_path` transformed into the TestContext's `path_type`.
/// The result is allocated by the TestContext's arena and will be free'd during
/// `TestContext.deinit`.
- pub fn transformPath(self: *TestContext, relative_path: []const u8) ![]const u8 {
+ pub fn transformPath(self: *TestContext, relative_path: [:0]const u8) ![:0]const u8 {
return self.transform_fn(self.arena.allocator(), self.dir, relative_path);
}
};
@@ -242,7 +242,12 @@ test "File.stat on a File that is a symlink returns Kind.sym_link" {
const sub_path_c = try os.toPosixPath("symlink");
// the O_NOFOLLOW | O_PATH combination can obtain a fd to a symlink
// note that if O_DIRECTORY is set, then this will error with ENOTDIR
- const flags = os.O.NOFOLLOW | os.O.PATH | os.O.RDONLY | os.O.CLOEXEC;
+ const flags: os.O = .{
+ .NOFOLLOW = true,
+ .PATH = true,
+ .ACCMODE = .RDONLY,
+ .CLOEXEC = true,
+ };
const fd = try os.openatZ(ctx.dir.fd, &sub_path_c, flags, 0);
break :linux_symlink Dir{ .fd = fd };
},
@@ -996,6 +1001,16 @@ test "openSelfExe" {
self_exe_file.close();
}
+test "selfExePath" {
+ if (builtin.os.tag == .wasi) return error.SkipZigTest;
+
+ var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
+ const buf_self_exe_path = try std.fs.selfExePath(&buf);
+ const alloc_self_exe_path = try std.fs.selfExePathAlloc(testing.allocator);
+ defer testing.allocator.free(alloc_self_exe_path);
+ try testing.expectEqualSlices(u8, buf_self_exe_path, alloc_self_exe_path);
+}
+
test "deleteTree does not follow symlinks" {
var tmp = tmpDir(.{});
defer tmp.cleanup();
@@ -1508,11 +1523,6 @@ test "open file with exclusive and shared nonblocking lock" {
test "open file with exclusive lock twice, make sure second lock waits" {
if (builtin.single_threaded) return error.SkipZigTest;
- if (std.io.is_async) {
- // This test starts its own threads and is not compatible with async I/O.
- return error.SkipZigTest;
- }
-
try testWithAllSupportedPathTypes(struct {
fn impl(ctx: *TestContext) !void {
const filename = try ctx.transformPath("file_lock_test.txt");
@@ -1907,3 +1917,111 @@ test "delete a setAsCwd directory on Windows" {
// Close the parent "tmp" so we don't leak the HANDLE.
tmp.parent_dir.close();
}
+
+test "invalid UTF-8/WTF-8 paths" {
+ const expected_err = switch (builtin.os.tag) {
+ .wasi => error.InvalidUtf8,
+ .windows => error.InvalidWtf8,
+ else => return error.SkipZigTest,
+ };
+
+ try testWithAllSupportedPathTypes(struct {
+ fn impl(ctx: *TestContext) !void {
+ // This is both invalid UTF-8 and WTF-8, since \xFF is an invalid start byte
+ const invalid_path = try ctx.transformPath("\xFF");
+
+ try testing.expectError(expected_err, ctx.dir.openFile(invalid_path, .{}));
+ try testing.expectError(expected_err, ctx.dir.openFileZ(invalid_path, .{}));
+
+ try testing.expectError(expected_err, ctx.dir.createFile(invalid_path, .{}));
+ try testing.expectError(expected_err, ctx.dir.createFileZ(invalid_path, .{}));
+
+ try testing.expectError(expected_err, ctx.dir.makeDir(invalid_path));
+ try testing.expectError(expected_err, ctx.dir.makeDirZ(invalid_path));
+
+ try testing.expectError(expected_err, ctx.dir.makePath(invalid_path));
+ try testing.expectError(expected_err, ctx.dir.makeOpenPath(invalid_path, .{}));
+
+ try testing.expectError(expected_err, ctx.dir.openDir(invalid_path, .{}));
+ try testing.expectError(expected_err, ctx.dir.openDirZ(invalid_path, .{}));
+
+ try testing.expectError(expected_err, ctx.dir.deleteFile(invalid_path));
+ try testing.expectError(expected_err, ctx.dir.deleteFileZ(invalid_path));
+
+ try testing.expectError(expected_err, ctx.dir.deleteDir(invalid_path));
+ try testing.expectError(expected_err, ctx.dir.deleteDirZ(invalid_path));
+
+ try testing.expectError(expected_err, ctx.dir.rename(invalid_path, invalid_path));
+ try testing.expectError(expected_err, ctx.dir.renameZ(invalid_path, invalid_path));
+
+ try testing.expectError(expected_err, ctx.dir.symLink(invalid_path, invalid_path, .{}));
+ try testing.expectError(expected_err, ctx.dir.symLinkZ(invalid_path, invalid_path, .{}));
+ if (builtin.os.tag == .wasi) {
+ try testing.expectError(expected_err, ctx.dir.symLinkWasi(invalid_path, invalid_path, .{}));
+ }
+
+ try testing.expectError(expected_err, ctx.dir.readLink(invalid_path, &[_]u8{}));
+ try testing.expectError(expected_err, ctx.dir.readLinkZ(invalid_path, &[_]u8{}));
+ if (builtin.os.tag == .wasi) {
+ try testing.expectError(expected_err, ctx.dir.readLinkWasi(invalid_path, &[_]u8{}));
+ }
+
+ try testing.expectError(expected_err, ctx.dir.readFile(invalid_path, &[_]u8{}));
+ try testing.expectError(expected_err, ctx.dir.readFileAlloc(testing.allocator, invalid_path, 0));
+
+ try testing.expectError(expected_err, ctx.dir.deleteTree(invalid_path));
+ try testing.expectError(expected_err, ctx.dir.deleteTreeMinStackSize(invalid_path));
+
+ try testing.expectError(expected_err, ctx.dir.writeFile(invalid_path, ""));
+ try testing.expectError(expected_err, ctx.dir.writeFile2(.{
+ .sub_path = invalid_path,
+ .data = "",
+ }));
+
+ try testing.expectError(expected_err, ctx.dir.access(invalid_path, .{}));
+ try testing.expectError(expected_err, ctx.dir.accessZ(invalid_path, .{}));
+
+ try testing.expectError(expected_err, ctx.dir.updateFile(invalid_path, ctx.dir, invalid_path, .{}));
+ try testing.expectError(expected_err, ctx.dir.copyFile(invalid_path, ctx.dir, invalid_path, .{}));
+
+ try testing.expectError(expected_err, ctx.dir.statFile(invalid_path));
+
+ if (builtin.os.tag != .wasi) {
+ try testing.expectError(expected_err, ctx.dir.realpath(invalid_path, &[_]u8{}));
+ try testing.expectError(expected_err, ctx.dir.realpathZ(invalid_path, &[_]u8{}));
+ try testing.expectError(expected_err, ctx.dir.realpathAlloc(testing.allocator, invalid_path));
+ }
+
+ try testing.expectError(expected_err, fs.rename(ctx.dir, invalid_path, ctx.dir, invalid_path));
+ try testing.expectError(expected_err, fs.renameZ(ctx.dir, invalid_path, ctx.dir, invalid_path));
+
+ if (builtin.os.tag != .wasi and ctx.path_type != .relative) {
+ try testing.expectError(expected_err, fs.updateFileAbsolute(invalid_path, invalid_path, .{}));
+ try testing.expectError(expected_err, fs.copyFileAbsolute(invalid_path, invalid_path, .{}));
+ try testing.expectError(expected_err, fs.makeDirAbsolute(invalid_path));
+ try testing.expectError(expected_err, fs.makeDirAbsoluteZ(invalid_path));
+ try testing.expectError(expected_err, fs.deleteDirAbsolute(invalid_path));
+ try testing.expectError(expected_err, fs.deleteDirAbsoluteZ(invalid_path));
+ try testing.expectError(expected_err, fs.renameAbsolute(invalid_path, invalid_path));
+ try testing.expectError(expected_err, fs.renameAbsoluteZ(invalid_path, invalid_path));
+ try testing.expectError(expected_err, fs.openDirAbsolute(invalid_path, .{}));
+ try testing.expectError(expected_err, fs.openDirAbsoluteZ(invalid_path, .{}));
+ try testing.expectError(expected_err, fs.openFileAbsolute(invalid_path, .{}));
+ try testing.expectError(expected_err, fs.openFileAbsoluteZ(invalid_path, .{}));
+ try testing.expectError(expected_err, fs.accessAbsolute(invalid_path, .{}));
+ try testing.expectError(expected_err, fs.accessAbsoluteZ(invalid_path, .{}));
+ try testing.expectError(expected_err, fs.createFileAbsolute(invalid_path, .{}));
+ try testing.expectError(expected_err, fs.createFileAbsoluteZ(invalid_path, .{}));
+ try testing.expectError(expected_err, fs.deleteFileAbsolute(invalid_path));
+ try testing.expectError(expected_err, fs.deleteFileAbsoluteZ(invalid_path));
+ try testing.expectError(expected_err, fs.deleteTreeAbsolute(invalid_path));
+ var readlink_buf: [fs.MAX_PATH_BYTES]u8 = undefined;
+ try testing.expectError(expected_err, fs.readLinkAbsolute(invalid_path, &readlink_buf));
+ try testing.expectError(expected_err, fs.readLinkAbsoluteZ(invalid_path, &readlink_buf));
+ try testing.expectError(expected_err, fs.symLinkAbsolute(invalid_path, invalid_path, .{}));
+ try testing.expectError(expected_err, fs.symLinkAbsoluteZ(invalid_path, invalid_path, .{}));
+ try testing.expectError(expected_err, fs.realpathAlloc(testing.allocator, invalid_path));
+ }
+ }
+ }.impl);
+}
diff --git a/lib/std/fs/watch.zig b/lib/std/fs/watch.zig
deleted file mode 100644
index e6485093ca..0000000000
--- a/lib/std/fs/watch.zig
+++ /dev/null
@@ -1,719 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const event = std.event;
-const assert = std.debug.assert;
-const testing = std.testing;
-const os = std.os;
-const mem = std.mem;
-const windows = os.windows;
-const Loop = event.Loop;
-const fd_t = os.fd_t;
-const File = std.fs.File;
-const Allocator = mem.Allocator;
-
-const global_event_loop = Loop.instance orelse
- @compileError("std.fs.Watch currently only works with event-based I/O");
-
-const WatchEventId = enum {
- CloseWrite,
- Delete,
-};
-
-const WatchEventError = error{
- UserResourceLimitReached,
- SystemResources,
- AccessDenied,
- Unexpected, // TODO remove this possibility
-};
-
-pub fn Watch(comptime V: type) type {
- return struct {
- channel: event.Channel(Event.Error!Event),
- os_data: OsData,
- allocator: Allocator,
-
- const OsData = switch (builtin.os.tag) {
- // TODO https://github.com/ziglang/zig/issues/3778
- .macos, .freebsd, .netbsd, .dragonfly, .openbsd => KqOsData,
- .linux => LinuxOsData,
- .windows => WindowsOsData,
-
- else => @compileError("Unsupported OS"),
- };
-
- const KqOsData = struct {
- table_lock: event.Lock,
- file_table: FileTable,
-
- const FileTable = std.StringHashMapUnmanaged(*Put);
- const Put = struct {
- putter_frame: @Frame(kqPutEvents),
- cancelled: bool = false,
- value: V,
- };
- };
-
- const WindowsOsData = struct {
- table_lock: event.Lock,
- dir_table: DirTable,
- cancelled: bool = false,
-
- const DirTable = std.StringHashMapUnmanaged(*Dir);
- const FileTable = std.StringHashMapUnmanaged(V);
-
- const Dir = struct {
- putter_frame: @Frame(windowsDirReader),
- file_table: FileTable,
- dir_handle: os.windows.HANDLE,
- };
- };
-
- const LinuxOsData = struct {
- putter_frame: @Frame(linuxEventPutter),
- inotify_fd: i32,
- wd_table: WdTable,
- table_lock: event.Lock,
- cancelled: bool = false,
-
- const WdTable = std.AutoHashMapUnmanaged(i32, Dir);
- const FileTable = std.StringHashMapUnmanaged(V);
-
- const Dir = struct {
- dirname: []const u8,
- file_table: FileTable,
- };
- };
-
- const Self = @This();
-
- pub const Event = struct {
- id: Id,
- data: V,
- dirname: []const u8,
- basename: []const u8,
-
- pub const Id = WatchEventId;
- pub const Error = WatchEventError;
- };
-
- pub fn init(allocator: Allocator, event_buf_count: usize) !*Self {
- const self = try allocator.create(Self);
- errdefer allocator.destroy(self);
-
- switch (builtin.os.tag) {
- .linux => {
- const inotify_fd = try os.inotify_init1(os.linux.IN_NONBLOCK | os.linux.IN_CLOEXEC);
- errdefer os.close(inotify_fd);
-
- self.* = Self{
- .allocator = allocator,
- .channel = undefined,
- .os_data = OsData{
- .putter_frame = undefined,
- .inotify_fd = inotify_fd,
- .wd_table = OsData.WdTable.init(allocator),
- .table_lock = event.Lock{},
- },
- };
-
- const buf = try allocator.alloc(Event.Error!Event, event_buf_count);
- self.channel.init(buf);
- self.os_data.putter_frame = async self.linuxEventPutter();
- return self;
- },
-
- .windows => {
- self.* = Self{
- .allocator = allocator,
- .channel = undefined,
- .os_data = OsData{
- .table_lock = event.Lock{},
- .dir_table = OsData.DirTable.init(allocator),
- },
- };
-
- const buf = try allocator.alloc(Event.Error!Event, event_buf_count);
- self.channel.init(buf);
- return self;
- },
-
- .macos, .freebsd, .netbsd, .dragonfly, .openbsd => {
- self.* = Self{
- .allocator = allocator,
- .channel = undefined,
- .os_data = OsData{
- .table_lock = event.Lock{},
- .file_table = OsData.FileTable.init(allocator),
- },
- };
-
- const buf = try allocator.alloc(Event.Error!Event, event_buf_count);
- self.channel.init(buf);
- return self;
- },
- else => @compileError("Unsupported OS"),
- }
- }
-
- pub fn deinit(self: *Self) void {
- switch (builtin.os.tag) {
- .macos, .freebsd, .netbsd, .dragonfly, .openbsd => {
- var it = self.os_data.file_table.iterator();
- while (it.next()) |entry| {
- const key = entry.key_ptr.*;
- const value = entry.value_ptr.*;
- value.cancelled = true;
- // @TODO Close the fd here?
- await value.putter_frame;
- self.allocator.free(key);
- self.allocator.destroy(value);
- }
- },
- .linux => {
- self.os_data.cancelled = true;
- {
- // Remove all directory watches linuxEventPutter will take care of
- // cleaning up the memory and closing the inotify fd.
- var dir_it = self.os_data.wd_table.keyIterator();
- while (dir_it.next()) |wd_key| {
- const rc = os.linux.inotify_rm_watch(self.os_data.inotify_fd, wd_key.*);
- // Errno can only be EBADF, EINVAL if either the inotify fs or the wd are invalid
- std.debug.assert(rc == 0);
- }
- }
- await self.os_data.putter_frame;
- },
- .windows => {
- self.os_data.cancelled = true;
- var dir_it = self.os_data.dir_table.iterator();
- while (dir_it.next()) |dir_entry| {
- if (windows.kernel32.CancelIoEx(dir_entry.value.dir_handle, null) != 0) {
- // We canceled the pending ReadDirectoryChangesW operation, but our
- // frame is still suspending, now waiting indefinitely.
- // Thus, it is safe to resume it ourslves
- resume dir_entry.value.putter_frame;
- } else {
- std.debug.assert(windows.kernel32.GetLastError() == .NOT_FOUND);
- // We are at another suspend point, we can await safely for the
- // function to exit the loop
- await dir_entry.value.putter_frame;
- }
-
- self.allocator.free(dir_entry.key_ptr.*);
- var file_it = dir_entry.value.file_table.keyIterator();
- while (file_it.next()) |file_entry| {
- self.allocator.free(file_entry.*);
- }
- dir_entry.value.file_table.deinit(self.allocator);
- self.allocator.destroy(dir_entry.value_ptr.*);
- }
- self.os_data.dir_table.deinit(self.allocator);
- },
- else => @compileError("Unsupported OS"),
- }
- self.allocator.free(self.channel.buffer_nodes);
- self.channel.deinit();
- self.allocator.destroy(self);
- }
-
- pub fn addFile(self: *Self, file_path: []const u8, value: V) !?V {
- switch (builtin.os.tag) {
- .macos, .freebsd, .netbsd, .dragonfly, .openbsd => return addFileKEvent(self, file_path, value),
- .linux => return addFileLinux(self, file_path, value),
- .windows => return addFileWindows(self, file_path, value),
- else => @compileError("Unsupported OS"),
- }
- }
-
- fn addFileKEvent(self: *Self, file_path: []const u8, value: V) !?V {
- var realpath_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
- const realpath = try os.realpath(file_path, &realpath_buf);
-
- const held = self.os_data.table_lock.acquire();
- defer held.release();
-
- const gop = try self.os_data.file_table.getOrPut(self.allocator, realpath);
- errdefer assert(self.os_data.file_table.remove(realpath));
- if (gop.found_existing) {
- const prev_value = gop.value_ptr.value;
- gop.value_ptr.value = value;
- return prev_value;
- }
-
- gop.key_ptr.* = try self.allocator.dupe(u8, realpath);
- errdefer self.allocator.free(gop.key_ptr.*);
- gop.value_ptr.* = try self.allocator.create(OsData.Put);
- errdefer self.allocator.destroy(gop.value_ptr.*);
- gop.value_ptr.* = .{
- .putter_frame = undefined,
- .value = value,
- };
-
- // @TODO Can I close this fd and get an error from bsdWaitKev?
- const flags = if (comptime builtin.target.isDarwin()) os.O.SYMLINK | os.O.EVTONLY else 0;
- const fd = try os.open(realpath, flags, 0);
- gop.value_ptr.putter_frame = async self.kqPutEvents(fd, gop.key_ptr.*, gop.value_ptr.*);
- return null;
- }
-
- fn kqPutEvents(self: *Self, fd: os.fd_t, file_path: []const u8, put: *OsData.Put) void {
- global_event_loop.beginOneEvent();
- defer {
- global_event_loop.finishOneEvent();
- // @TODO: Remove this if we force close otherwise
- os.close(fd);
- }
-
- // We need to manually do a bsdWaitKev to access the fflags.
- var resume_node = event.Loop.ResumeNode.Basic{
- .base = .{
- .id = .Basic,
- .handle = @frame(),
- .overlapped = event.Loop.ResumeNode.overlapped_init,
- },
- .kev = undefined,
- };
-
- var kevs = [1]os.Kevent{undefined};
- const kev = &kevs[0];
-
- while (!put.cancelled) {
- kev.* = os.Kevent{
- .ident = @as(usize, @intCast(fd)),
- .filter = os.EVFILT_VNODE,
- .flags = os.EV_ADD | os.EV_ENABLE | os.EV_CLEAR | os.EV_ONESHOT |
- os.NOTE_WRITE | os.NOTE_DELETE | os.NOTE_REVOKE,
- .fflags = 0,
- .data = 0,
- .udata = @intFromPtr(&resume_node.base),
- };
- suspend {
- global_event_loop.beginOneEvent();
- errdefer global_event_loop.finishOneEvent();
-
- const empty_kevs = &[0]os.Kevent{};
- _ = os.kevent(global_event_loop.os_data.kqfd, &kevs, empty_kevs, null) catch |err| switch (err) {
- error.EventNotFound,
- error.ProcessNotFound,
- error.Overflow,
- => unreachable,
- error.AccessDenied, error.SystemResources => |e| {
- self.channel.put(e);
- continue;
- },
- };
- }
-
- if (kev.flags & os.EV_ERROR != 0) {
- self.channel.put(os.unexpectedErrno(os.errno(kev.data)));
- continue;
- }
-
- if (kev.fflags & os.NOTE_DELETE != 0 or kev.fflags & os.NOTE_REVOKE != 0) {
- self.channel.put(Self.Event{
- .id = .Delete,
- .data = put.value,
- .dirname = std.fs.path.dirname(file_path) orelse "/",
- .basename = std.fs.path.basename(file_path),
- });
- } else if (kev.fflags & os.NOTE_WRITE != 0) {
- self.channel.put(Self.Event{
- .id = .CloseWrite,
- .data = put.value,
- .dirname = std.fs.path.dirname(file_path) orelse "/",
- .basename = std.fs.path.basename(file_path),
- });
- }
- }
- }
-
- fn addFileLinux(self: *Self, file_path: []const u8, value: V) !?V {
- const dirname = std.fs.path.dirname(file_path) orelse if (file_path[0] == '/') "/" else ".";
- const basename = std.fs.path.basename(file_path);
-
- const wd = try os.inotify_add_watch(
- self.os_data.inotify_fd,
- dirname,
- os.linux.IN_CLOSE_WRITE | os.linux.IN_ONLYDIR | os.linux.IN_DELETE | os.linux.IN_EXCL_UNLINK,
- );
- // wd is either a newly created watch or an existing one.
-
- const held = self.os_data.table_lock.acquire();
- defer held.release();
-
- const gop = try self.os_data.wd_table.getOrPut(self.allocator, wd);
- errdefer assert(self.os_data.wd_table.remove(wd));
- if (!gop.found_existing) {
- gop.value_ptr.* = OsData.Dir{
- .dirname = try self.allocator.dupe(u8, dirname),
- .file_table = OsData.FileTable.init(self.allocator),
- };
- }
-
- const dir = gop.value_ptr;
- const file_table_gop = try dir.file_table.getOrPut(self.allocator, basename);
- errdefer assert(dir.file_table.remove(basename));
- if (file_table_gop.found_existing) {
- const prev_value = file_table_gop.value_ptr.*;
- file_table_gop.value_ptr.* = value;
- return prev_value;
- } else {
- file_table_gop.key_ptr.* = try self.allocator.dupe(u8, basename);
- file_table_gop.value_ptr.* = value;
- return null;
- }
- }
-
- fn addFileWindows(self: *Self, file_path: []const u8, value: V) !?V {
- // TODO we might need to convert dirname and basename to canonical file paths ("short"?)
- const dirname = std.fs.path.dirname(file_path) orelse if (file_path[0] == '/') "/" else ".";
- var dirname_path_space: windows.PathSpace = undefined;
- dirname_path_space.len = try std.unicode.utf8ToUtf16Le(&dirname_path_space.data, dirname);
- dirname_path_space.data[dirname_path_space.len] = 0;
-
- const basename = std.fs.path.basename(file_path);
- var basename_path_space: windows.PathSpace = undefined;
- basename_path_space.len = try std.unicode.utf8ToUtf16Le(&basename_path_space.data, basename);
- basename_path_space.data[basename_path_space.len] = 0;
-
- const held = self.os_data.table_lock.acquire();
- defer held.release();
-
- const gop = try self.os_data.dir_table.getOrPut(self.allocator, dirname);
- errdefer assert(self.os_data.dir_table.remove(dirname));
- if (gop.found_existing) {
- const dir = gop.value_ptr.*;
-
- const file_gop = try dir.file_table.getOrPut(self.allocator, basename);
- errdefer assert(dir.file_table.remove(basename));
- if (file_gop.found_existing) {
- const prev_value = file_gop.value_ptr.*;
- file_gop.value_ptr.* = value;
- return prev_value;
- } else {
- file_gop.value_ptr.* = value;
- file_gop.key_ptr.* = try self.allocator.dupe(u8, basename);
- return null;
- }
- } else {
- const dir_handle = try windows.OpenFile(dirname_path_space.span(), .{
- .dir = std.fs.cwd().fd,
- .access_mask = windows.FILE_LIST_DIRECTORY,
- .creation = windows.FILE_OPEN,
- .io_mode = .evented,
- .filter = .dir_only,
- });
- errdefer windows.CloseHandle(dir_handle);
-
- const dir = try self.allocator.create(OsData.Dir);
- errdefer self.allocator.destroy(dir);
-
- gop.key_ptr.* = try self.allocator.dupe(u8, dirname);
- errdefer self.allocator.free(gop.key_ptr.*);
-
- dir.* = OsData.Dir{
- .file_table = OsData.FileTable.init(self.allocator),
- .putter_frame = undefined,
- .dir_handle = dir_handle,
- };
- gop.value_ptr.* = dir;
- try dir.file_table.put(self.allocator, try self.allocator.dupe(u8, basename), value);
- dir.putter_frame = async self.windowsDirReader(dir, gop.key_ptr.*);
- return null;
- }
- }
-
- fn windowsDirReader(self: *Self, dir: *OsData.Dir, dirname: []const u8) void {
- defer os.close(dir.dir_handle);
- var resume_node = Loop.ResumeNode.Basic{
- .base = Loop.ResumeNode{
- .id = .Basic,
- .handle = @frame(),
- .overlapped = windows.OVERLAPPED{
- .Internal = 0,
- .InternalHigh = 0,
- .DUMMYUNIONNAME = .{
- .DUMMYSTRUCTNAME = .{
- .Offset = 0,
- .OffsetHigh = 0,
- },
- },
- .hEvent = null,
- },
- },
- };
-
- var event_buf: [4096]u8 align(@alignOf(windows.FILE_NOTIFY_INFORMATION)) = undefined;
-
- global_event_loop.beginOneEvent();
- defer global_event_loop.finishOneEvent();
-
- while (!self.os_data.cancelled) main_loop: {
- suspend {
- _ = windows.kernel32.ReadDirectoryChangesW(
- dir.dir_handle,
- &event_buf,
- event_buf.len,
- windows.FALSE, // watch subtree
- windows.FILE_NOTIFY_CHANGE_FILE_NAME | windows.FILE_NOTIFY_CHANGE_DIR_NAME |
- windows.FILE_NOTIFY_CHANGE_ATTRIBUTES | windows.FILE_NOTIFY_CHANGE_SIZE |
- windows.FILE_NOTIFY_CHANGE_LAST_WRITE | windows.FILE_NOTIFY_CHANGE_LAST_ACCESS |
- windows.FILE_NOTIFY_CHANGE_CREATION | windows.FILE_NOTIFY_CHANGE_SECURITY,
- null, // number of bytes transferred (unused for async)
- &resume_node.base.overlapped,
- null, // completion routine - unused because we use IOCP
- );
- }
-
- var bytes_transferred: windows.DWORD = undefined;
- if (windows.kernel32.GetOverlappedResult(
- dir.dir_handle,
- &resume_node.base.overlapped,
- &bytes_transferred,
- windows.FALSE,
- ) == 0) {
- const potential_error = windows.kernel32.GetLastError();
- const err = switch (potential_error) {
- .OPERATION_ABORTED, .IO_INCOMPLETE => err_blk: {
- if (self.os_data.cancelled)
- break :main_loop
- else
- break :err_blk windows.unexpectedError(potential_error);
- },
- else => |err| windows.unexpectedError(err),
- };
- self.channel.put(err);
- } else {
- var ptr: [*]u8 = &event_buf;
- const end_ptr = ptr + bytes_transferred;
- while (@intFromPtr(ptr) < @intFromPtr(end_ptr)) {
- const ev = @as(*const windows.FILE_NOTIFY_INFORMATION, @ptrCast(ptr));
- const emit = switch (ev.Action) {
- windows.FILE_ACTION_REMOVED => WatchEventId.Delete,
- windows.FILE_ACTION_MODIFIED => .CloseWrite,
- else => null,
- };
- if (emit) |id| {
- const basename_ptr = @as([*]u16, @ptrCast(ptr + @sizeOf(windows.FILE_NOTIFY_INFORMATION)));
- const basename_utf16le = basename_ptr[0 .. ev.FileNameLength / 2];
- var basename_data: [std.fs.MAX_PATH_BYTES]u8 = undefined;
- const basename = basename_data[0 .. std.unicode.utf16leToUtf8(&basename_data, basename_utf16le) catch unreachable];
-
- if (dir.file_table.getEntry(basename)) |entry| {
- self.channel.put(Event{
- .id = id,
- .data = entry.value_ptr.*,
- .dirname = dirname,
- .basename = entry.key_ptr.*,
- });
- }
- }
-
- if (ev.NextEntryOffset == 0) break;
- ptr = @alignCast(ptr + ev.NextEntryOffset);
- }
- }
- }
- }
-
- pub fn removeFile(self: *Self, file_path: []const u8) !?V {
- switch (builtin.os.tag) {
- .linux => {
- const dirname = std.fs.path.dirname(file_path) orelse if (file_path[0] == '/') "/" else ".";
- const basename = std.fs.path.basename(file_path);
-
- const held = self.os_data.table_lock.acquire();
- defer held.release();
-
- const dir = self.os_data.wd_table.get(dirname) orelse return null;
- if (dir.file_table.fetchRemove(basename)) |file_entry| {
- self.allocator.free(file_entry.key);
- return file_entry.value;
- }
- return null;
- },
- .windows => {
- const dirname = std.fs.path.dirname(file_path) orelse if (file_path[0] == '/') "/" else ".";
- const basename = std.fs.path.basename(file_path);
-
- const held = self.os_data.table_lock.acquire();
- defer held.release();
-
- const dir = self.os_data.dir_table.get(dirname) orelse return null;
- if (dir.file_table.fetchRemove(basename)) |file_entry| {
- self.allocator.free(file_entry.key);
- return file_entry.value;
- }
- return null;
- },
- .macos, .freebsd, .netbsd, .dragonfly, .openbsd => {
- var realpath_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
- const realpath = try os.realpath(file_path, &realpath_buf);
-
- const held = self.os_data.table_lock.acquire();
- defer held.release();
-
- const entry = self.os_data.file_table.getEntry(realpath) orelse return null;
- entry.value_ptr.cancelled = true;
- // @TODO Close the fd here?
- await entry.value_ptr.putter_frame;
- self.allocator.free(entry.key_ptr.*);
- self.allocator.destroy(entry.value_ptr.*);
-
- assert(self.os_data.file_table.remove(realpath));
- },
- else => @compileError("Unsupported OS"),
- }
- }
-
- fn linuxEventPutter(self: *Self) void {
- global_event_loop.beginOneEvent();
-
- defer {
- std.debug.assert(self.os_data.wd_table.count() == 0);
- self.os_data.wd_table.deinit(self.allocator);
- os.close(self.os_data.inotify_fd);
- self.allocator.free(self.channel.buffer_nodes);
- self.channel.deinit();
- global_event_loop.finishOneEvent();
- }
-
- var event_buf: [4096]u8 align(@alignOf(os.linux.inotify_event)) = undefined;
-
- while (!self.os_data.cancelled) {
- const bytes_read = global_event_loop.read(self.os_data.inotify_fd, &event_buf, false) catch unreachable;
-
- var ptr: [*]u8 = &event_buf;
- const end_ptr = ptr + bytes_read;
- while (@intFromPtr(ptr) < @intFromPtr(end_ptr)) {
- const ev = @as(*const os.linux.inotify_event, @ptrCast(ptr));
- if (ev.mask & os.linux.IN_CLOSE_WRITE == os.linux.IN_CLOSE_WRITE) {
- const basename_ptr = ptr + @sizeOf(os.linux.inotify_event);
- const basename = std.mem.span(@as([*:0]u8, @ptrCast(basename_ptr)));
-
- const dir = &self.os_data.wd_table.get(ev.wd).?;
- if (dir.file_table.getEntry(basename)) |file_value| {
- self.channel.put(Event{
- .id = .CloseWrite,
- .data = file_value.value_ptr.*,
- .dirname = dir.dirname,
- .basename = file_value.key_ptr.*,
- });
- }
- } else if (ev.mask & os.linux.IN_IGNORED == os.linux.IN_IGNORED) {
- // Directory watch was removed
- const held = self.os_data.table_lock.acquire();
- defer held.release();
- if (self.os_data.wd_table.fetchRemove(ev.wd)) |wd_entry| {
- var file_it = wd_entry.value.file_table.keyIterator();
- while (file_it.next()) |file_entry| {
- self.allocator.free(file_entry.*);
- }
- self.allocator.free(wd_entry.value.dirname);
- wd_entry.value.file_table.deinit(self.allocator);
- }
- } else if (ev.mask & os.linux.IN_DELETE == os.linux.IN_DELETE) {
- // File or directory was removed or deleted
- const basename_ptr = ptr + @sizeOf(os.linux.inotify_event);
- const basename = std.mem.span(@as([*:0]u8, @ptrCast(basename_ptr)));
-
- const dir = &self.os_data.wd_table.get(ev.wd).?;
- if (dir.file_table.getEntry(basename)) |file_value| {
- self.channel.put(Event{
- .id = .Delete,
- .data = file_value.value_ptr.*,
- .dirname = dir.dirname,
- .basename = file_value.key_ptr.*,
- });
- }
- }
-
- ptr = @alignCast(ptr + @sizeOf(os.linux.inotify_event) + ev.len);
- }
- }
- }
- };
-}
-
-const test_tmp_dir = "std_event_fs_test";
-
-test "write a file, watch it, write it again, delete it" {
- if (!std.io.is_async) return error.SkipZigTest;
- // TODO https://github.com/ziglang/zig/issues/1908
- if (builtin.single_threaded) return error.SkipZigTest;
-
- try std.fs.cwd().makePath(test_tmp_dir);
- defer std.fs.cwd().deleteTree(test_tmp_dir) catch {};
-
- return testWriteWatchWriteDelete(std.testing.allocator);
-}
-
-fn testWriteWatchWriteDelete(allocator: Allocator) !void {
- const file_path = try std.fs.path.join(allocator, &[_][]const u8{ test_tmp_dir, "file.txt" });
- defer allocator.free(file_path);
-
- const contents =
- \\line 1
- \\line 2
- ;
- const line2_offset = 7;
-
- // first just write then read the file
- try std.fs.cwd().writeFile(file_path, contents);
-
- const read_contents = try std.fs.cwd().readFileAlloc(allocator, file_path, 1024 * 1024);
- defer allocator.free(read_contents);
- try testing.expectEqualSlices(u8, contents, read_contents);
-
- // now watch the file
- var watch = try Watch(void).init(allocator, 0);
- defer watch.deinit();
-
- try testing.expect((try watch.addFile(file_path, {})) == null);
-
- var ev = async watch.channel.get();
- var ev_consumed = false;
- defer if (!ev_consumed) {
- _ = await ev;
- };
-
- // overwrite line 2
- const file = try std.fs.cwd().openFile(file_path, .{ .mode = .read_write });
- {
- defer file.close();
- const write_contents = "lorem ipsum";
- var iovec = [_]os.iovec_const{.{
- .iov_base = write_contents,
- .iov_len = write_contents.len,
- }};
- _ = try file.pwritevAll(&iovec, line2_offset);
- }
-
- switch ((try await ev).id) {
- .CloseWrite => {
- ev_consumed = true;
- },
- .Delete => @panic("wrong event"),
- }
-
- const contents_updated = try std.fs.cwd().readFileAlloc(allocator, file_path, 1024 * 1024);
- defer allocator.free(contents_updated);
-
- try testing.expectEqualSlices(u8,
- \\line 1
- \\lorem ipsum
- , contents_updated);
-
- ev = async watch.channel.get();
- ev_consumed = false;
-
- try std.fs.cwd().deleteFile(file_path);
- switch ((try await ev).id) {
- .Delete => {
- ev_consumed = true;
- },
- .CloseWrite => @panic("wrong event"),
- }
-}
-
-// TODO Test: Add another file watch, remove the old file watch, get an event in the new
diff --git a/lib/std/hash/benchmark.zig b/lib/std/hash/benchmark.zig
index a56bd0b5a8..35e96a655b 100644
--- a/lib/std/hash/benchmark.zig
+++ b/lib/std/hash/benchmark.zig
@@ -10,7 +10,7 @@ const KiB = 1024;
const MiB = 1024 * KiB;
const GiB = 1024 * MiB;
-var prng = std.rand.DefaultPrng.init(0);
+var prng = std.Random.DefaultPrng.init(0);
const random = prng.random();
const Hash = struct {
diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig
index ad84254792..7db2c40559 100644
--- a/lib/std/hash_map.zig
+++ b/lib/std/hash_map.zig
@@ -92,27 +92,24 @@ pub fn hashString(s: []const u8) u64 {
pub const StringIndexContext = struct {
bytes: *const std.ArrayListUnmanaged(u8),
- pub fn eql(self: @This(), a: u32, b: u32) bool {
- _ = self;
+ pub fn eql(_: @This(), a: u32, b: u32) bool {
return a == b;
}
- pub fn hash(self: @This(), x: u32) u64 {
- const x_slice = mem.sliceTo(@as([*:0]const u8, @ptrCast(self.bytes.items.ptr)) + x, 0);
- return hashString(x_slice);
+ pub fn hash(ctx: @This(), key: u32) u64 {
+ return hashString(mem.sliceTo(ctx.bytes.items[key..], 0));
}
};
pub const StringIndexAdapter = struct {
bytes: *const std.ArrayListUnmanaged(u8),
- pub fn eql(self: @This(), a_slice: []const u8, b: u32) bool {
- const b_slice = mem.sliceTo(@as([*:0]const u8, @ptrCast(self.bytes.items.ptr)) + b, 0);
- return mem.eql(u8, a_slice, b_slice);
+ pub fn eql(ctx: @This(), a: []const u8, b: u32) bool {
+ return mem.eql(u8, a, mem.sliceTo(ctx.bytes.items[b..], 0));
}
- pub fn hash(self: @This(), adapted_key: []const u8) u64 {
- _ = self;
+ pub fn hash(_: @This(), adapted_key: []const u8) u64 {
+ assert(mem.indexOfScalar(u8, adapted_key, 0) == null);
return hashString(adapted_key);
}
};
@@ -897,7 +894,7 @@ pub fn HashMapUnmanaged(
}
fn capacityForSize(size: Size) Size {
- var new_cap: u32 = @truncate((@as(u64, size) * 100) / max_load_percentage + 1);
+ var new_cap: u32 = @intCast((@as(u64, size) * 100) / max_load_percentage + 1);
new_cap = math.ceilPowerOfTwo(u32, new_cap) catch unreachable;
return new_cap;
}
@@ -1540,14 +1537,15 @@ pub fn HashMapUnmanaged(
const val_align = if (@sizeOf(V) == 0) 1 else @alignOf(V);
const max_align = comptime @max(header_align, key_align, val_align);
- const meta_size = @sizeOf(Header) + new_capacity * @sizeOf(Metadata);
+ const new_cap: usize = new_capacity;
+ const meta_size = @sizeOf(Header) + new_cap * @sizeOf(Metadata);
comptime assert(@alignOf(Metadata) == 1);
const keys_start = std.mem.alignForward(usize, meta_size, key_align);
- const keys_end = keys_start + new_capacity * @sizeOf(K);
+ const keys_end = keys_start + new_cap * @sizeOf(K);
const vals_start = std.mem.alignForward(usize, keys_end, val_align);
- const vals_end = vals_start + new_capacity * @sizeOf(V);
+ const vals_end = vals_start + new_cap * @sizeOf(V);
const total_size = std.mem.alignForward(usize, vals_end, max_align);
@@ -1575,7 +1573,7 @@ pub fn HashMapUnmanaged(
const val_align = if (@sizeOf(V) == 0) 1 else @alignOf(V);
const max_align = comptime @max(header_align, key_align, val_align);
- const cap = self.capacity();
+ const cap: usize = self.capacity();
const meta_size = @sizeOf(Header) + cap * @sizeOf(Metadata);
comptime assert(@alignOf(Metadata) == 1);
@@ -1884,7 +1882,7 @@ test "std.hash_map put and remove loop in random order" {
while (i < size) : (i += 1) {
try keys.append(i);
}
- var prng = std.rand.DefaultPrng.init(0);
+ var prng = std.Random.DefaultPrng.init(0);
const random = prng.random();
while (i < iterations) : (i += 1) {
@@ -1916,7 +1914,7 @@ test "std.hash_map remove one million elements in random order" {
keys.append(i) catch unreachable;
}
- var prng = std.rand.DefaultPrng.init(0);
+ var prng = std.Random.DefaultPrng.init(0);
const random = prng.random();
random.shuffle(u32, keys.items);
diff --git a/lib/std/heap.zig b/lib/std/heap.zig
index 93532d63f9..03e0682ea6 100644
--- a/lib/std/heap.zig
+++ b/lib/std/heap.zig
@@ -196,7 +196,16 @@ fn rawCResize(
) bool {
_ = log2_old_align;
_ = ret_addr;
- return new_len <= buf.len;
+
+ if (new_len <= buf.len)
+ return true;
+
+ if (CAllocator.supports_malloc_size) {
+ const full_len = CAllocator.malloc_size(buf.ptr);
+ if (new_len <= full_len) return true;
+ }
+
+ return false;
}
fn rawCFree(
diff --git a/lib/std/heap/arena_allocator.zig b/lib/std/heap/arena_allocator.zig
index 09f2e609f4..bde62ce8c2 100644
--- a/lib/std/heap/arena_allocator.zig
+++ b/lib/std/heap/arena_allocator.zig
@@ -250,7 +250,7 @@ test "ArenaAllocator (reset with preheating)" {
var arena_allocator = ArenaAllocator.init(std.testing.allocator);
defer arena_allocator.deinit();
// provides some variance in the allocated data
- var rng_src = std.rand.DefaultPrng.init(19930913);
+ var rng_src = std.Random.DefaultPrng.init(19930913);
const random = rng_src.random();
var rounds: usize = 25;
while (rounds > 0) {
diff --git a/lib/std/http.zig b/lib/std/http.zig
index 9b2bce1338..af966d89e7 100644
--- a/lib/std/http.zig
+++ b/lib/std/http.zig
@@ -1,12 +1,9 @@
-const std = @import("std.zig");
-
pub const Client = @import("http/Client.zig");
pub const Server = @import("http/Server.zig");
pub const protocol = @import("http/protocol.zig");
-const headers = @import("http/Headers.zig");
-
-pub const Headers = headers.Headers;
-pub const Field = headers.Field;
+pub const HeadParser = @import("http/HeadParser.zig");
+pub const ChunkParser = @import("http/ChunkParser.zig");
+pub const HeaderIterator = @import("http/HeaderIterator.zig");
pub const Version = enum {
@"HTTP/1.0",
@@ -18,7 +15,7 @@ pub const Version = enum {
/// https://datatracker.ietf.org/doc/html/rfc7231#section-4 Initial definition
///
/// https://datatracker.ietf.org/doc/html/rfc5789#section-2 PATCH
-pub const Method = enum(u64) { // TODO: should be u192 or u256, but neither is supported by the C backend, and therefore cannot pass CI
+pub const Method = enum(u64) {
GET = parse("GET"),
HEAD = parse("HEAD"),
POST = parse("POST"),
@@ -46,10 +43,6 @@ pub const Method = enum(u64) { // TODO: should be u192 or u256, but neither is s
try w.writeAll(str);
}
- pub fn format(value: Method, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) @TypeOf(writer).Error!void {
- return try value.write(writer);
- }
-
/// Returns true if a request of this method is allowed to have a body
/// Actual behavior from servers may vary and should still be checked
pub fn requestHasBody(self: Method) bool {
@@ -309,9 +302,22 @@ pub const Connection = enum {
close,
};
+pub const Header = struct {
+ name: []const u8,
+ value: []const u8,
+};
+
+const builtin = @import("builtin");
+const std = @import("std.zig");
+
test {
_ = Client;
_ = Method;
_ = Server;
_ = Status;
+ _ = HeadParser;
+ _ = ChunkParser;
+ if (builtin.os.tag != .wasi) {
+ _ = @import("http/test.zig");
+ }
}
diff --git a/lib/std/http/ChunkParser.zig b/lib/std/http/ChunkParser.zig
new file mode 100644
index 0000000000..adcdc74bc7
--- /dev/null
+++ b/lib/std/http/ChunkParser.zig
@@ -0,0 +1,131 @@
+//! Parser for transfer-encoding: chunked.
+
+state: State,
+chunk_len: u64,
+
+pub const init: ChunkParser = .{
+ .state = .head_size,
+ .chunk_len = 0,
+};
+
+pub const State = enum {
+ head_size,
+ head_ext,
+ head_r,
+ data,
+ data_suffix,
+ data_suffix_r,
+ invalid,
+};
+
+/// Returns the number of bytes consumed by the chunk size. This is always
+/// less than or equal to `bytes.len`.
+///
+/// After this function returns, `chunk_len` will contain the parsed chunk size
+/// in bytes when `state` is `data`. Alternately, `state` may become `invalid`,
+/// indicating a syntax error in the input stream.
+///
+/// If the amount returned is less than `bytes.len`, the parser is in the
+/// `chunk_data` state and the first byte of the chunk is at `bytes[result]`.
+///
+/// Asserts `state` is neither `data` nor `invalid`.
+pub fn feed(p: *ChunkParser, bytes: []const u8) usize {
+ for (bytes, 0..) |c, i| switch (p.state) {
+ .data_suffix => switch (c) {
+ '\r' => p.state = .data_suffix_r,
+ '\n' => p.state = .head_size,
+ else => {
+ p.state = .invalid;
+ return i;
+ },
+ },
+ .data_suffix_r => switch (c) {
+ '\n' => p.state = .head_size,
+ else => {
+ p.state = .invalid;
+ return i;
+ },
+ },
+ .head_size => {
+ const digit = switch (c) {
+ '0'...'9' => |b| b - '0',
+ 'A'...'Z' => |b| b - 'A' + 10,
+ 'a'...'z' => |b| b - 'a' + 10,
+ '\r' => {
+ p.state = .head_r;
+ continue;
+ },
+ '\n' => {
+ p.state = .data;
+ return i + 1;
+ },
+ else => {
+ p.state = .head_ext;
+ continue;
+ },
+ };
+
+ const new_len = p.chunk_len *% 16 +% digit;
+ if (new_len <= p.chunk_len and p.chunk_len != 0) {
+ p.state = .invalid;
+ return i;
+ }
+
+ p.chunk_len = new_len;
+ },
+ .head_ext => switch (c) {
+ '\r' => p.state = .head_r,
+ '\n' => {
+ p.state = .data;
+ return i + 1;
+ },
+ else => continue,
+ },
+ .head_r => switch (c) {
+ '\n' => {
+ p.state = .data;
+ return i + 1;
+ },
+ else => {
+ p.state = .invalid;
+ return i;
+ },
+ },
+ .data => unreachable,
+ .invalid => unreachable,
+ };
+ return bytes.len;
+}
+
+const ChunkParser = @This();
+const std = @import("std");
+
+test feed {
+ const testing = std.testing;
+
+ const data = "Ff\r\nf0f000 ; ext\n0\r\nffffffffffffffffffffffffffffffffffffffff\r\n";
+
+ var p = init;
+ const first = p.feed(data[0..]);
+ try testing.expectEqual(@as(u32, 4), first);
+ try testing.expectEqual(@as(u64, 0xff), p.chunk_len);
+ try testing.expectEqual(.data, p.state);
+
+ p = init;
+ const second = p.feed(data[first..]);
+ try testing.expectEqual(@as(u32, 13), second);
+ try testing.expectEqual(@as(u64, 0xf0f000), p.chunk_len);
+ try testing.expectEqual(.data, p.state);
+
+ p = init;
+ const third = p.feed(data[first + second ..]);
+ try testing.expectEqual(@as(u32, 3), third);
+ try testing.expectEqual(@as(u64, 0), p.chunk_len);
+ try testing.expectEqual(.data, p.state);
+
+ p = init;
+ const fourth = p.feed(data[first + second + third ..]);
+ try testing.expectEqual(@as(u32, 16), fourth);
+ try testing.expectEqual(@as(u64, 0xffffffffffffffff), p.chunk_len);
+ try testing.expectEqual(.invalid, p.state);
+}
diff --git a/lib/std/http/Client.zig b/lib/std/http/Client.zig
index ed6aec55aa..1ffe1e8ea3 100644
--- a/lib/std/http/Client.zig
+++ b/lib/std/http/Client.zig
@@ -20,9 +20,7 @@ const proto = @import("protocol.zig");
pub const disable_tls = std.options.http_disable_tls;
-/// Allocator used for all allocations made by the client.
-///
-/// This allocator must be thread-safe.
+/// Used for all client allocations. Must be thread-safe.
allocator: Allocator,
ca_bundle: if (disable_tls) void else std.crypto.Certificate.Bundle = if (disable_tls) {} else .{},
@@ -35,14 +33,25 @@ next_https_rescan_certs: bool = true,
/// The pool of connections that can be reused (and currently in use).
connection_pool: ConnectionPool = .{},
-/// This is the proxy that will handle http:// connections. It *must not* be modified when the client has any active connections.
-http_proxy: ?Proxy = null,
-
-/// This is the proxy that will handle https:// connections. It *must not* be modified when the client has any active connections.
-https_proxy: ?Proxy = null,
+/// If populated, all http traffic travels through this third party.
+/// This field cannot be modified while the client has active connections.
+/// Pointer to externally-owned memory.
+http_proxy: ?*Proxy = null,
+/// If populated, all https traffic travels through this third party.
+/// This field cannot be modified while the client has active connections.
+/// Pointer to externally-owned memory.
+https_proxy: ?*Proxy = null,
/// A set of linked lists of connections that can be reused.
pub const ConnectionPool = struct {
+ mutex: std.Thread.Mutex = .{},
+ /// Open connections that are currently in use.
+ used: Queue = .{},
+ /// Open connections that are not currently in use.
+ free: Queue = .{},
+ free_len: usize = 0,
+ free_size: usize = 32,
+
/// The criteria for a connection to be considered a match.
pub const Criteria = struct {
host: []const u8,
@@ -53,14 +62,6 @@ pub const ConnectionPool = struct {
const Queue = std.DoublyLinkedList(Connection);
pub const Node = Queue.Node;
- mutex: std.Thread.Mutex = .{},
- /// Open connections that are currently in use.
- used: Queue = .{},
- /// Open connections that are not currently in use.
- free: Queue = .{},
- free_len: usize = 0,
- free_size: usize = 32,
-
/// Finds and acquires a connection from the connection pool matching the criteria. This function is threadsafe.
/// If no connection is found, null is returned.
pub fn findConnection(pool: *ConnectionPool, criteria: Criteria) ?*Connection {
@@ -189,11 +190,6 @@ pub const ConnectionPool = struct {
/// An interface to either a plain or TLS connection.
pub const Connection = struct {
- pub const buffer_size = std.crypto.tls.max_ciphertext_record_len;
- const BufferSize = std.math.IntFittingRange(0, buffer_size);
-
- pub const Protocol = enum { plain, tls };
-
stream: net.Stream,
/// undefined unless protocol is tls.
tls_client: if (!disable_tls) *std.crypto.tls.Client else void,
@@ -219,6 +215,11 @@ pub const Connection = struct {
read_buf: [buffer_size]u8 = undefined,
write_buf: [buffer_size]u8 = undefined,
+ pub const buffer_size = std.crypto.tls.max_ciphertext_record_len;
+ const BufferSize = std.math.IntFittingRange(0, buffer_size);
+
+ pub const Protocol = enum { plain, tls };
+
pub fn readvDirectTls(conn: *Connection, buffers: []std.os.iovec) ReadError!usize {
return conn.tls_client.readv(conn.stream, buffers) catch |err| {
// https://github.com/ziglang/zig/issues/2473
@@ -404,33 +405,65 @@ pub const RequestTransfer = union(enum) {
/// The decompressor for response messages.
pub const Compression = union(enum) {
- pub const DeflateDecompressor = std.compress.zlib.DecompressStream(Request.TransferReader);
- pub const GzipDecompressor = std.compress.gzip.Decompress(Request.TransferReader);
- pub const ZstdDecompressor = std.compress.zstd.DecompressStream(Request.TransferReader, .{});
+ pub const DeflateDecompressor = std.compress.zlib.Decompressor(Request.TransferReader);
+ pub const GzipDecompressor = std.compress.gzip.Decompressor(Request.TransferReader);
+ // https://github.com/ziglang/zig/issues/18937
+ //pub const ZstdDecompressor = std.compress.zstd.DecompressStream(Request.TransferReader, .{});
deflate: DeflateDecompressor,
gzip: GzipDecompressor,
- zstd: ZstdDecompressor,
+ // https://github.com/ziglang/zig/issues/18937
+ //zstd: ZstdDecompressor,
none: void,
};
/// A HTTP response originating from a server.
pub const Response = struct {
- pub const ParseError = Allocator.Error || error{
+ version: http.Version,
+ status: http.Status,
+ reason: []const u8,
+
+ /// Points into the user-provided `server_header_buffer`.
+ location: ?[]const u8 = null,
+ /// Points into the user-provided `server_header_buffer`.
+ content_type: ?[]const u8 = null,
+ /// Points into the user-provided `server_header_buffer`.
+ content_disposition: ?[]const u8 = null,
+
+ keep_alive: bool = false,
+
+ /// If present, the number of bytes in the response body.
+ content_length: ?u64 = null,
+
+ /// If present, the transfer encoding of the response body, otherwise none.
+ transfer_encoding: http.TransferEncoding = .none,
+
+ /// If present, the compression of the response body, otherwise identity (no compression).
+ transfer_compression: http.ContentEncoding = .identity,
+
+ parser: proto.HeadersParser,
+ compression: Compression = .none,
+
+ /// Whether the response body should be skipped. Any data read from the
+ /// response body will be discarded.
+ skip: bool = false,
+
+ pub const ParseError = error{
HttpHeadersInvalid,
HttpHeaderContinuationsUnsupported,
HttpTransferEncodingUnsupported,
HttpConnectionHeaderUnsupported,
InvalidContentLength,
- CompressionNotSupported,
+ CompressionUnsupported,
};
- pub fn parse(res: *Response, bytes: []const u8, trailing: bool) ParseError!void {
- var it = mem.tokenizeAny(u8, bytes, "\r\n");
+ pub fn parse(res: *Response, bytes: []const u8) ParseError!void {
+ var it = mem.splitSequence(u8, bytes, "\r\n");
- const first_line = it.next() orelse return error.HttpHeadersInvalid;
- if (first_line.len < 12)
+ const first_line = it.next().?;
+ if (first_line.len < 12) {
return error.HttpHeadersInvalid;
+ }
const version: http.Version = switch (int64(first_line[0..8])) {
int64("HTTP/1.0") => .@"HTTP/1.0",
@@ -445,24 +478,27 @@ pub const Response = struct {
res.status = status;
res.reason = reason;
- res.headers.clearRetainingCapacity();
-
while (it.next()) |line| {
- if (line.len == 0) return error.HttpHeadersInvalid;
+ if (line.len == 0) return;
switch (line[0]) {
' ', '\t' => return error.HttpHeaderContinuationsUnsupported,
else => {},
}
- var line_it = mem.tokenizeAny(u8, line, ": ");
- const header_name = line_it.next() orelse return error.HttpHeadersInvalid;
+ var line_it = mem.splitSequence(u8, line, ": ");
+ const header_name = line_it.next().?;
const header_value = line_it.rest();
-
- try res.headers.append(header_name, header_value);
-
- if (trailing) continue;
-
- if (std.ascii.eqlIgnoreCase(header_name, "transfer-encoding")) {
+ if (header_name.len == 0) return error.HttpHeadersInvalid;
+
+ if (std.ascii.eqlIgnoreCase(header_name, "connection")) {
+ res.keep_alive = !std.ascii.eqlIgnoreCase(header_value, "close");
+ } else if (std.ascii.eqlIgnoreCase(header_name, "content-type")) {
+ res.content_type = header_value;
+ } else if (std.ascii.eqlIgnoreCase(header_name, "location")) {
+ res.location = header_value;
+ } else if (std.ascii.eqlIgnoreCase(header_name, "content-disposition")) {
+ res.content_disposition = header_value;
+ } else if (std.ascii.eqlIgnoreCase(header_name, "transfer-encoding")) {
// Transfer-Encoding: second, first
// Transfer-Encoding: deflate, chunked
var iter = mem.splitBackwardsScalar(u8, header_value, ',');
@@ -508,6 +544,7 @@ pub const Response = struct {
}
}
}
+ return error.HttpHeadersInvalid; // missing empty line
}
inline fn int64(array: *const [8]u8) u64 {
@@ -531,60 +568,25 @@ pub const Response = struct {
try expectEqual(@as(u10, 999), parseInt3("999"));
}
- /// The HTTP version this response is using.
- version: http.Version,
-
- /// The status code of the response.
- status: http.Status,
-
- /// The reason phrase of the response.
- reason: []const u8,
-
- /// If present, the number of bytes in the response body.
- content_length: ?u64 = null,
-
- /// If present, the transfer encoding of the response body, otherwise none.
- transfer_encoding: http.TransferEncoding = .none,
-
- /// If present, the compression of the response body, otherwise identity (no compression).
- transfer_compression: http.ContentEncoding = .identity,
-
- /// The headers received from the server.
- headers: http.Headers,
- parser: proto.HeadersParser,
- compression: Compression = .none,
-
- /// Whether the response body should be skipped. Any data read from the response body will be discarded.
- skip: bool = false,
+ pub fn iterateHeaders(r: Response) http.HeaderIterator {
+ return http.HeaderIterator.init(r.parser.get());
+ }
};
/// A HTTP request that has been sent.
///
/// Order of operations: open -> send[ -> write -> finish] -> wait -> read
pub const Request = struct {
- /// The uri that this request is being sent to.
uri: Uri,
-
- /// The client that this request was created from.
client: *Client,
-
- /// Underlying connection to the server. This is null when the connection is released.
+ /// This is null when the connection is released.
connection: ?*Connection,
+ keep_alive: bool,
method: http.Method,
version: http.Version = .@"HTTP/1.1",
-
- /// The list of HTTP request headers.
- headers: http.Headers,
-
- /// The transfer encoding of the request body.
- transfer_encoding: RequestTransfer = .none,
-
- /// The redirect quota left for this request.
- redirects_left: u32,
-
- /// Whether the request should follow redirects.
- handle_redirects: bool,
+ transfer_encoding: RequestTransfer,
+ redirect_behavior: RedirectBehavior,
/// Whether the request should handle a 100-continue response before sending the request body.
handle_continue: bool,
@@ -594,25 +596,60 @@ pub const Request = struct {
/// This field is undefined until `wait` is called.
response: Response,
- /// Used as a allocator for resolving redirects locations.
- arena: std.heap.ArenaAllocator,
+ /// Standard headers that have default, but overridable, behavior.
+ headers: Headers,
+
+ /// These headers are kept including when following a redirect to a
+ /// different domain.
+ /// Externally-owned; must outlive the Request.
+ extra_headers: []const http.Header,
+
+ /// These headers are stripped when following a redirect to a different
+ /// domain.
+ /// Externally-owned; must outlive the Request.
+ privileged_headers: []const http.Header,
+
+ pub const Headers = struct {
+ host: Value = .default,
+ authorization: Value = .default,
+ user_agent: Value = .default,
+ connection: Value = .default,
+ accept_encoding: Value = .default,
+ content_type: Value = .default,
+
+ pub const Value = union(enum) {
+ default,
+ omit,
+ override: []const u8,
+ };
+ };
- /// Frees all resources associated with the request.
- pub fn deinit(req: *Request) void {
- switch (req.response.compression) {
- .none => {},
- .deflate => |*deflate| deflate.deinit(),
- .gzip => |*gzip| gzip.deinit(),
- .zstd => |*zstd| zstd.deinit(),
+ /// Any value other than `not_allowed` or `unhandled` means that integer represents
+ /// how many remaining redirects are allowed.
+ pub const RedirectBehavior = enum(u16) {
+ /// The next redirect will cause an error.
+ not_allowed = 0,
+ /// Redirects are passed to the client to analyze the redirect response
+ /// directly.
+ unhandled = std.math.maxInt(u16),
+ _,
+
+ pub fn subtractOne(rb: *RedirectBehavior) void {
+ switch (rb.*) {
+ .not_allowed => unreachable,
+ .unhandled => unreachable,
+ _ => rb.* = @enumFromInt(@intFromEnum(rb.*) - 1),
+ }
}
- req.headers.deinit();
- req.response.headers.deinit();
-
- if (req.response.parser.header_bytes_owned) {
- req.response.parser.header_bytes.deinit(req.client.allocator);
+ pub fn remaining(rb: RedirectBehavior) u16 {
+ assert(rb != .unhandled);
+ return @intFromEnum(rb);
}
+ };
+ /// Frees all resources associated with the request.
+ pub fn deinit(req: *Request) void {
if (req.connection) |connection| {
if (!req.response.parser.done) {
// If the response wasn't fully read, then we need to close the connection.
@@ -620,23 +657,15 @@ pub const Request = struct {
}
req.client.connection_pool.release(req.client.allocator, connection);
}
-
- req.arena.deinit();
req.* = undefined;
}
- // This function must deallocate all resources associated with the request, or keep those which will be used
- // This needs to be kept in sync with deinit and request
+ // This function must deallocate all resources associated with the request,
+ // or keep those which will be used.
+ // This needs to be kept in sync with deinit and request.
fn redirect(req: *Request, uri: Uri) !void {
assert(req.response.parser.done);
- switch (req.response.compression) {
- .none => {},
- .deflate => |*deflate| deflate.deinit(),
- .gzip => |*gzip| gzip.deinit(),
- .zstd => |*zstd| zstd.deinit(),
- }
-
req.client.connection_pool.release(req.client.allocator, req.connection.?);
req.connection = null;
@@ -651,15 +680,13 @@ pub const Request = struct {
req.uri = uri;
req.connection = try req.client.connect(host, port, protocol);
- req.redirects_left -= 1;
- req.response.headers.clearRetainingCapacity();
+ req.redirect_behavior.subtractOne();
req.response.parser.reset();
req.response = .{
.status = undefined,
.reason = undefined,
.version = undefined,
- .headers = req.response.headers,
.parser = req.response.parser,
};
}
@@ -667,15 +694,17 @@ pub const Request = struct {
pub const SendError = Connection.WriteError || error{ InvalidContentLength, UnsupportedTransferEncoding };
pub const SendOptions = struct {
- /// Specifies that the uri should be used as is. You guarantee that the uri is already escaped.
+ /// Specifies that the uri is already escaped.
raw_uri: bool = false,
};
/// Send the HTTP request headers to the server.
pub fn send(req: *Request, options: SendOptions) SendError!void {
- if (!req.method.requestHasBody() and req.transfer_encoding != .none) return error.UnsupportedTransferEncoding;
+ if (!req.method.requestHasBody() and req.transfer_encoding != .none)
+ return error.UnsupportedTransferEncoding;
- const w = req.connection.?.writer();
+ const connection = req.connection.?;
+ const w = connection.writer();
try req.method.write(w);
try w.writeByte(' ');
@@ -684,9 +713,9 @@ pub const Request = struct {
try req.uri.writeToStream(.{ .authority = true }, w);
} else {
try req.uri.writeToStream(.{
- .scheme = req.connection.?.proxied,
- .authentication = req.connection.?.proxied,
- .authority = req.connection.?.proxied,
+ .scheme = connection.proxied,
+ .authentication = connection.proxied,
+ .authority = connection.proxied,
.path = true,
.query = true,
.raw = options.raw_uri,
@@ -696,97 +725,93 @@ pub const Request = struct {
try w.writeAll(@tagName(req.version));
try w.writeAll("\r\n");
- if (!req.headers.contains("host")) {
- try w.writeAll("Host: ");
+ if (try emitOverridableHeader("host: ", req.headers.host, w)) {
+ try w.writeAll("host: ");
try req.uri.writeToStream(.{ .authority = true }, w);
try w.writeAll("\r\n");
}
- if ((req.uri.user != null or req.uri.password != null) and
- !req.headers.contains("authorization"))
- {
- try w.writeAll("Authorization: ");
- const authorization = try req.connection.?.allocWriteBuffer(
- @intCast(basic_authorization.valueLengthFromUri(req.uri)),
- );
- std.debug.assert(basic_authorization.value(req.uri, authorization).len == authorization.len);
- try w.writeAll("\r\n");
+ if (try emitOverridableHeader("authorization: ", req.headers.authorization, w)) {
+ if (req.uri.user != null or req.uri.password != null) {
+ try w.writeAll("authorization: ");
+ const authorization = try connection.allocWriteBuffer(
+ @intCast(basic_authorization.valueLengthFromUri(req.uri)),
+ );
+ assert(basic_authorization.value(req.uri, authorization).len == authorization.len);
+ try w.writeAll("\r\n");
+ }
}
- if (!req.headers.contains("user-agent")) {
- try w.writeAll("User-Agent: zig/");
+ if (try emitOverridableHeader("user-agent: ", req.headers.user_agent, w)) {
+ try w.writeAll("user-agent: zig/");
try w.writeAll(builtin.zig_version_string);
try w.writeAll(" (std.http)\r\n");
}
- if (!req.headers.contains("connection")) {
- try w.writeAll("Connection: keep-alive\r\n");
+ if (try emitOverridableHeader("connection: ", req.headers.connection, w)) {
+ if (req.keep_alive) {
+ try w.writeAll("connection: keep-alive\r\n");
+ } else {
+ try w.writeAll("connection: close\r\n");
+ }
}
- if (!req.headers.contains("accept-encoding")) {
- try w.writeAll("Accept-Encoding: gzip, deflate, zstd\r\n");
+ if (try emitOverridableHeader("accept-encoding: ", req.headers.accept_encoding, w)) {
+ // https://github.com/ziglang/zig/issues/18937
+ //try w.writeAll("accept-encoding: gzip, deflate, zstd\r\n");
+ try w.writeAll("accept-encoding: gzip, deflate\r\n");
}
- if (!req.headers.contains("te")) {
- try w.writeAll("TE: gzip, deflate, trailers\r\n");
+ switch (req.transfer_encoding) {
+ .chunked => try w.writeAll("transfer-encoding: chunked\r\n"),
+ .content_length => |len| try w.print("content-length: {d}\r\n", .{len}),
+ .none => {},
}
- const has_transfer_encoding = req.headers.contains("transfer-encoding");
- const has_content_length = req.headers.contains("content-length");
-
- if (!has_transfer_encoding and !has_content_length) {
- switch (req.transfer_encoding) {
- .chunked => try w.writeAll("Transfer-Encoding: chunked\r\n"),
- .content_length => |content_length| try w.print("Content-Length: {d}\r\n", .{content_length}),
- .none => {},
- }
- } else {
- if (has_transfer_encoding) {
- const transfer_encoding = req.headers.getFirstValue("transfer-encoding").?;
- if (std.mem.eql(u8, transfer_encoding, "chunked")) {
- req.transfer_encoding = .chunked;
- } else {
- return error.UnsupportedTransferEncoding;
- }
- } else if (has_content_length) {
- const content_length = std.fmt.parseInt(u64, req.headers.getFirstValue("content-length").?, 10) catch return error.InvalidContentLength;
-
- req.transfer_encoding = .{ .content_length = content_length };
- } else {
- req.transfer_encoding = .none;
- }
+ if (try emitOverridableHeader("content-type: ", req.headers.content_type, w)) {
+ // The default is to omit content-type if not provided because
+ // "application/octet-stream" is redundant.
}
- for (req.headers.list.items) |entry| {
- if (entry.value.len == 0) continue;
+ for (req.extra_headers) |header| {
+ assert(header.name.len != 0);
- try w.writeAll(entry.name);
+ try w.writeAll(header.name);
try w.writeAll(": ");
- try w.writeAll(entry.value);
+ try w.writeAll(header.value);
try w.writeAll("\r\n");
}
- if (req.connection.?.proxied) {
- const proxy_headers: ?http.Headers = switch (req.connection.?.protocol) {
- .plain => if (req.client.http_proxy) |proxy| proxy.headers else null,
- .tls => if (req.client.https_proxy) |proxy| proxy.headers else null,
- };
-
- if (proxy_headers) |headers| {
- for (headers.list.items) |entry| {
- if (entry.value.len == 0) continue;
+ if (connection.proxied) proxy: {
+ const proxy = switch (connection.protocol) {
+ .plain => req.client.http_proxy,
+ .tls => req.client.https_proxy,
+ } orelse break :proxy;
- try w.writeAll(entry.name);
- try w.writeAll(": ");
- try w.writeAll(entry.value);
- try w.writeAll("\r\n");
- }
- }
+ const authorization = proxy.authorization orelse break :proxy;
+ try w.writeAll("proxy-authorization: ");
+ try w.writeAll(authorization);
+ try w.writeAll("\r\n");
}
try w.writeAll("\r\n");
- try req.connection.?.flush();
+ try connection.flush();
+ }
+
+ /// Returns true if the default behavior is required, otherwise handles
+ /// writing (or not writing) the header.
+ fn emitOverridableHeader(prefix: []const u8, v: Headers.Value, w: anytype) !bool {
+ switch (v) {
+ .default => return true,
+ .omit => return false,
+ .override => |x| {
+ try w.writeAll(prefix);
+ try w.writeAll(x);
+ try w.writeAll("\r\n");
+ return false;
+ },
+ }
}
const TransferReadError = Connection.ReadError || proto.HeadersParser.ReadError;
@@ -810,145 +835,172 @@ pub const Request = struct {
return index;
}
- pub const WaitError = RequestError || SendError || TransferReadError || proto.HeadersParser.CheckCompleteHeadError || Response.ParseError || Uri.ParseError || error{ TooManyHttpRedirects, RedirectRequiresResend, HttpRedirectMissingLocation, CompressionInitializationFailed, CompressionNotSupported };
+ pub const WaitError = RequestError || SendError || TransferReadError ||
+ proto.HeadersParser.CheckCompleteHeadError || Response.ParseError ||
+ error{ // TODO: file zig fmt issue for this bad indentation
+ TooManyHttpRedirects,
+ RedirectRequiresResend,
+ HttpRedirectLocationMissing,
+ HttpRedirectLocationInvalid,
+ CompressionInitializationFailed,
+ CompressionUnsupported,
+ };
/// Waits for a response from the server and parses any headers that are sent.
/// This function will block until the final response is received.
///
- /// If `handle_redirects` is true and the request has no payload, then this function will automatically follow
- /// redirects. If a request payload is present, then this function will error with error.RedirectRequiresResend.
+ /// If handling redirects and the request has no payload, then this
+ /// function will automatically follow redirects. If a request payload is
+ /// present, then this function will error with
+ /// error.RedirectRequiresResend.
///
- /// Must be called after `send` and, if any data was written to the request body, then also after `finish`.
+ /// Must be called after `send` and, if any data was written to the request
+ /// body, then also after `finish`.
pub fn wait(req: *Request) WaitError!void {
- while (true) { // handle redirects
+ while (true) {
+ // This while loop is for handling redirects, which means the request's
+ // connection may be different than the previous iteration. However, it
+ // is still guaranteed to be non-null with each iteration of this loop.
+ const connection = req.connection.?;
+
while (true) { // read headers
- try req.connection.?.fill();
+ try connection.fill();
- const nchecked = try req.response.parser.checkCompleteHead(req.client.allocator, req.connection.?.peek());
- req.connection.?.drop(@intCast(nchecked));
+ const nchecked = try req.response.parser.checkCompleteHead(connection.peek());
+ connection.drop(@intCast(nchecked));
if (req.response.parser.state.isContent()) break;
}
- try req.response.parse(req.response.parser.header_bytes.items, false);
+ try req.response.parse(req.response.parser.get());
if (req.response.status == .@"continue") {
- req.response.parser.done = true; // we're done parsing the continue response, reset to prepare for the real response
+ // We're done parsing the continue response; reset to prepare
+ // for the real response.
+ req.response.parser.done = true;
req.response.parser.reset();
if (req.handle_continue)
continue;
- return; // we're not handling the 100-continue, return to the caller
+ return; // we're not handling the 100-continue
}
// we're switching protocols, so this connection is no longer doing http
if (req.method == .CONNECT and req.response.status.class() == .success) {
- req.connection.?.closing = false;
+ connection.closing = false;
req.response.parser.done = true;
-
- return; // the connection is not HTTP past this point, return to the caller
+ return; // the connection is not HTTP past this point
}
- // we default to using keep-alive if not provided in the client if the server asks for it
- const req_connection = req.headers.getFirstValue("connection");
- const req_keepalive = req_connection != null and !std.ascii.eqlIgnoreCase("close", req_connection.?);
-
- const res_connection = req.response.headers.getFirstValue("connection");
- const res_keepalive = res_connection != null and !std.ascii.eqlIgnoreCase("close", res_connection.?);
- if (res_keepalive and (req_keepalive or req_connection == null)) {
- req.connection.?.closing = false;
- } else {
- req.connection.?.closing = true;
- }
+ connection.closing = !req.response.keep_alive or !req.keep_alive;
- // Any response to a HEAD request and any response with a 1xx (Informational), 204 (No Content), or 304 (Not Modified)
- // status code is always terminated by the first empty line after the header fields, regardless of the header fields
- // present in the message
- if (req.method == .HEAD or req.response.status.class() == .informational or req.response.status == .no_content or req.response.status == .not_modified) {
+ // Any response to a HEAD request and any response with a 1xx
+ // (Informational), 204 (No Content), or 304 (Not Modified) status
+ // code is always terminated by the first empty line after the
+ // header fields, regardless of the header fields present in the
+ // message.
+ if (req.method == .HEAD or req.response.status.class() == .informational or
+ req.response.status == .no_content or req.response.status == .not_modified)
+ {
req.response.parser.done = true;
-
- return; // the response is empty, no further setup or redirection is necessary
+ return; // The response is empty; no further setup or redirection is necessary.
}
- if (req.response.transfer_encoding != .none) {
- switch (req.response.transfer_encoding) {
- .none => unreachable,
- .chunked => {
- req.response.parser.next_chunk_length = 0;
- req.response.parser.state = .chunk_head_size;
- },
- }
- } else if (req.response.content_length) |cl| {
- req.response.parser.next_chunk_length = cl;
+ switch (req.response.transfer_encoding) {
+ .none => {
+ if (req.response.content_length) |cl| {
+ req.response.parser.next_chunk_length = cl;
- if (cl == 0) req.response.parser.done = true;
- } else {
- // read until the connection is closed
- req.response.parser.next_chunk_length = std.math.maxInt(u64);
+ if (cl == 0) req.response.parser.done = true;
+ } else {
+ // read until the connection is closed
+ req.response.parser.next_chunk_length = std.math.maxInt(u64);
+ }
+ },
+ .chunked => {
+ req.response.parser.next_chunk_length = 0;
+ req.response.parser.state = .chunk_head_size;
+ },
}
- if (req.response.status.class() == .redirect and req.handle_redirects) {
+ if (req.response.status.class() == .redirect and req.redirect_behavior != .unhandled) {
+ // skip the body of the redirect response, this will at least
+ // leave the connection in a known good state.
req.response.skip = true;
-
- // skip the body of the redirect response, this will at least leave the connection in a known good state.
- const empty = @as([*]u8, undefined)[0..0];
- assert(try req.transferRead(empty) == 0); // we're skipping, no buffer is necessary
-
- if (req.redirects_left == 0) return error.TooManyHttpRedirects;
-
- const location = req.response.headers.getFirstValue("location") orelse
- return error.HttpRedirectMissingLocation;
-
- const arena = req.arena.allocator();
-
- const location_duped = try arena.dupe(u8, location);
-
- const new_url = Uri.parse(location_duped) catch try Uri.parseWithoutScheme(location_duped);
- const resolved_url = try req.uri.resolve(new_url, false, arena);
-
- // is the redirect location on the same domain, or a subdomain of the original request?
- const is_same_domain_or_subdomain = std.ascii.endsWithIgnoreCase(resolved_url.host.?, req.uri.host.?) and (resolved_url.host.?.len == req.uri.host.?.len or resolved_url.host.?[resolved_url.host.?.len - req.uri.host.?.len - 1] == '.');
-
- if (resolved_url.host == null or !is_same_domain_or_subdomain or !std.ascii.eqlIgnoreCase(resolved_url.scheme, req.uri.scheme)) {
- // we're redirecting to a different domain, strip privileged headers like cookies
- _ = req.headers.delete("authorization");
- _ = req.headers.delete("www-authenticate");
- _ = req.headers.delete("cookie");
- _ = req.headers.delete("cookie2");
+ assert(try req.transferRead(&.{}) == 0); // we're skipping, no buffer is necessary
+
+ if (req.redirect_behavior == .not_allowed) return error.TooManyHttpRedirects;
+
+ const location = req.response.location orelse
+ return error.HttpRedirectLocationMissing;
+
+ // This mutates the beginning of header_buffer and uses that
+ // for the backing memory of the returned new_uri.
+ const header_buffer = req.response.parser.header_bytes_buffer;
+ const new_uri = req.uri.resolve_inplace(location, header_buffer) catch
+ return error.HttpRedirectLocationInvalid;
+
+ // The new URI references the beginning of header_bytes_buffer memory.
+ // That memory will be kept, but everything after it will be
+ // reused by the subsequent request. In other words,
+ // header_bytes_buffer must be large enough to store all
+ // redirect locations as well as the final request header.
+ const path_end = new_uri.path.ptr + new_uri.path.len;
+ // https://github.com/ziglang/zig/issues/1738
+ const path_offset = @intFromPtr(path_end) - @intFromPtr(header_buffer.ptr);
+ const end_offset = @max(path_offset, location.len);
+ req.response.parser.header_bytes_buffer = header_buffer[end_offset..];
+
+ const is_same_domain_or_subdomain =
+ std.ascii.endsWithIgnoreCase(new_uri.host.?, req.uri.host.?) and
+ (new_uri.host.?.len == req.uri.host.?.len or
+ new_uri.host.?[new_uri.host.?.len - req.uri.host.?.len - 1] == '.');
+
+ if (new_uri.host == null or !is_same_domain_or_subdomain or
+ !std.ascii.eqlIgnoreCase(new_uri.scheme, req.uri.scheme))
+ {
+ // When redirecting to a different domain, strip privileged headers.
+ req.privileged_headers = &.{};
}
- if (req.response.status == .see_other or ((req.response.status == .moved_permanently or req.response.status == .found) and req.method == .POST)) {
- // we're redirecting to a GET, so we need to change the method and remove the body
+ if (switch (req.response.status) {
+ .see_other => true,
+ .moved_permanently, .found => req.method == .POST,
+ else => false,
+ }) {
+ // A redirect to a GET must change the method and remove the body.
req.method = .GET;
req.transfer_encoding = .none;
- _ = req.headers.delete("transfer-encoding");
- _ = req.headers.delete("content-length");
- _ = req.headers.delete("content-type");
+ req.headers.content_type = .omit;
}
if (req.transfer_encoding != .none) {
- return error.RedirectRequiresResend; // The request body has already been sent. The request is still in a valid state, but the redirect must be handled manually.
+ // The request body has already been sent. The request is
+ // still in a valid state, but the redirect must be handled
+ // manually.
+ return error.RedirectRequiresResend;
}
- try req.redirect(resolved_url);
-
+ try req.redirect(new_uri);
try req.send(.{});
} else {
req.response.skip = false;
if (!req.response.parser.done) {
switch (req.response.transfer_compression) {
.identity => req.response.compression = .none,
- .compress, .@"x-compress" => return error.CompressionNotSupported,
+ .compress, .@"x-compress" => return error.CompressionUnsupported,
.deflate => req.response.compression = .{
- .deflate = std.compress.zlib.decompressStream(req.client.allocator, req.transferReader()) catch return error.CompressionInitializationFailed,
+ .deflate = std.compress.zlib.decompressor(req.transferReader()),
},
.gzip, .@"x-gzip" => req.response.compression = .{
- .gzip = std.compress.gzip.decompress(req.client.allocator, req.transferReader()) catch return error.CompressionInitializationFailed,
- },
- .zstd => req.response.compression = .{
- .zstd = std.compress.zstd.decompressStream(req.client.allocator, req.transferReader()),
+ .gzip = std.compress.gzip.decompressor(req.transferReader()),
},
+ // https://github.com/ziglang/zig/issues/18937
+ //.zstd => req.response.compression = .{
+ // .zstd = std.compress.zstd.decompressStream(req.client.allocator, req.transferReader()),
+ //},
+ .zstd => return error.CompressionUnsupported,
}
}
@@ -957,7 +1009,8 @@ pub const Request = struct {
}
}
- pub const ReadError = TransferReadError || proto.HeadersParser.CheckCompleteHeadError || error{ DecompressionFailure, InvalidTrailers };
+ pub const ReadError = TransferReadError || proto.HeadersParser.CheckCompleteHeadError ||
+ error{ DecompressionFailure, InvalidTrailers };
pub const Reader = std.io.Reader(*Request, ReadError, read);
@@ -970,28 +1023,20 @@ pub const Request = struct {
const out_index = switch (req.response.compression) {
.deflate => |*deflate| deflate.read(buffer) catch return error.DecompressionFailure,
.gzip => |*gzip| gzip.read(buffer) catch return error.DecompressionFailure,
- .zstd => |*zstd| zstd.read(buffer) catch return error.DecompressionFailure,
+ // https://github.com/ziglang/zig/issues/18937
+ //.zstd => |*zstd| zstd.read(buffer) catch return error.DecompressionFailure,
else => try req.transferRead(buffer),
};
+ if (out_index > 0) return out_index;
- if (out_index == 0) {
- const has_trail = !req.response.parser.state.isContent();
-
- while (!req.response.parser.state.isContent()) { // read trailing headers
- try req.connection.?.fill();
-
- const nchecked = try req.response.parser.checkCompleteHead(req.client.allocator, req.connection.?.peek());
- req.connection.?.drop(@intCast(nchecked));
- }
+ while (!req.response.parser.state.isContent()) { // read trailing headers
+ try req.connection.?.fill();
- if (has_trail) {
- // The response headers before the trailers are already guaranteed to be valid, so they will always be parsed again and cannot return an error.
- // This will *only* fail for a malformed trailer.
- req.response.parse(req.response.parser.header_bytes.items, true) catch return error.InvalidTrailers;
- }
+ const nchecked = try req.response.parser.checkCompleteHead(req.connection.?.peek());
+ req.connection.?.drop(@intCast(nchecked));
}
- return out_index;
+ return 0;
}
/// Reads data from the response body. Must be called after `wait`.
@@ -1061,16 +1106,12 @@ pub const Request = struct {
}
};
-/// A HTTP proxy server.
pub const Proxy = struct {
- allocator: Allocator,
- headers: http.Headers,
-
protocol: Connection.Protocol,
host: []const u8,
+ authorization: ?[]const u8,
port: u16,
-
- supports_connect: bool = true,
+ supports_connect: bool,
};
/// Release all associated resources with the client.
@@ -1082,116 +1123,71 @@ pub fn deinit(client: *Client) void {
client.connection_pool.deinit(client.allocator);
- if (client.http_proxy) |*proxy| {
- proxy.allocator.free(proxy.host);
- proxy.headers.deinit();
- }
-
- if (client.https_proxy) |*proxy| {
- proxy.allocator.free(proxy.host);
- proxy.headers.deinit();
- }
-
if (!disable_tls)
client.ca_bundle.deinit(client.allocator);
client.* = undefined;
}
-/// Uses the *_proxy environment variable to set any unset proxies for the client.
-/// This function *must not* be called when the client has any active connections.
-pub fn loadDefaultProxies(client: *Client) !void {
+/// Populates `http_proxy` and `http_proxy` via standard proxy environment variables.
+/// Asserts the client has no active connections.
+/// Uses `arena` for a few small allocations that must outlive the client, or
+/// at least until those fields are set to different values.
+pub fn initDefaultProxies(client: *Client, arena: Allocator) !void {
// Prevent any new connections from being created.
client.connection_pool.mutex.lock();
defer client.connection_pool.mutex.unlock();
- assert(client.connection_pool.used.first == null); // There are still active requests.
+ assert(client.connection_pool.used.first == null); // There are active requests.
- if (client.http_proxy == null) http: {
- const content: []const u8 = if (std.process.hasEnvVarConstant("http_proxy"))
- try std.process.getEnvVarOwned(client.allocator, "http_proxy")
- else if (std.process.hasEnvVarConstant("HTTP_PROXY"))
- try std.process.getEnvVarOwned(client.allocator, "HTTP_PROXY")
- else if (std.process.hasEnvVarConstant("all_proxy"))
- try std.process.getEnvVarOwned(client.allocator, "all_proxy")
- else if (std.process.hasEnvVarConstant("ALL_PROXY"))
- try std.process.getEnvVarOwned(client.allocator, "ALL_PROXY")
- else
- break :http;
- defer client.allocator.free(content);
-
- const uri = Uri.parse(content) catch
- Uri.parseWithoutScheme(content) catch
- break :http;
-
- const protocol = if (uri.scheme.len == 0)
- .plain // No scheme, assume http://
- else
- protocol_map.get(uri.scheme) orelse break :http; // Unknown scheme, ignore
-
- const host = if (uri.host) |host| try client.allocator.dupe(u8, host) else break :http; // Missing host, ignore
- client.http_proxy = .{
- .allocator = client.allocator,
- .headers = .{ .allocator = client.allocator },
-
- .protocol = protocol,
- .host = host,
- .port = uri.port orelse switch (protocol) {
- .plain => 80,
- .tls => 443,
- },
- };
+ if (client.http_proxy == null) {
+ client.http_proxy = try createProxyFromEnvVar(arena, &.{
+ "http_proxy", "HTTP_PROXY", "all_proxy", "ALL_PROXY",
+ });
+ }
- if (uri.user != null or uri.password != null) {
- const authorization = try client.allocator.alloc(u8, basic_authorization.valueLengthFromUri(uri));
- errdefer client.allocator.free(authorization);
- std.debug.assert(basic_authorization.value(uri, authorization).len == authorization.len);
- try client.http_proxy.?.headers.appendOwned(.{ .unowned = "proxy-authorization" }, .{ .owned = authorization });
- }
+ if (client.https_proxy == null) {
+ client.https_proxy = try createProxyFromEnvVar(arena, &.{
+ "https_proxy", "HTTPS_PROXY", "all_proxy", "ALL_PROXY",
+ });
}
+}
- if (client.https_proxy == null) https: {
- const content: []const u8 = if (std.process.hasEnvVarConstant("https_proxy"))
- try std.process.getEnvVarOwned(client.allocator, "https_proxy")
- else if (std.process.hasEnvVarConstant("HTTPS_PROXY"))
- try std.process.getEnvVarOwned(client.allocator, "HTTPS_PROXY")
- else if (std.process.hasEnvVarConstant("all_proxy"))
- try std.process.getEnvVarOwned(client.allocator, "all_proxy")
- else if (std.process.hasEnvVarConstant("ALL_PROXY"))
- try std.process.getEnvVarOwned(client.allocator, "ALL_PROXY")
- else
- break :https;
- defer client.allocator.free(content);
-
- const uri = Uri.parse(content) catch
- Uri.parseWithoutScheme(content) catch
- break :https;
-
- const protocol = if (uri.scheme.len == 0)
- .plain // No scheme, assume http://
- else
- protocol_map.get(uri.scheme) orelse break :https; // Unknown scheme, ignore
-
- const host = if (uri.host) |host| try client.allocator.dupe(u8, host) else break :https; // Missing host, ignore
- client.https_proxy = .{
- .allocator = client.allocator,
- .headers = .{ .allocator = client.allocator },
-
- .protocol = protocol,
- .host = host,
- .port = uri.port orelse switch (protocol) {
- .plain => 80,
- .tls => 443,
- },
+fn createProxyFromEnvVar(arena: Allocator, env_var_names: []const []const u8) !?*Proxy {
+ const content = for (env_var_names) |name| {
+ break std.process.getEnvVarOwned(arena, name) catch |err| switch (err) {
+ error.EnvironmentVariableNotFound => continue,
+ else => |e| return e,
};
+ } else return null;
- if (uri.user != null or uri.password != null) {
- const authorization = try client.allocator.alloc(u8, basic_authorization.valueLengthFromUri(uri));
- errdefer client.allocator.free(authorization);
- std.debug.assert(basic_authorization.value(uri, authorization).len == authorization.len);
- try client.https_proxy.?.headers.appendOwned(.{ .unowned = "proxy-authorization" }, .{ .owned = authorization });
- }
- }
+ const uri = Uri.parse(content) catch try Uri.parseWithoutScheme(content);
+
+ const protocol = if (uri.scheme.len == 0)
+ .plain // No scheme, assume http://
+ else
+ protocol_map.get(uri.scheme) orelse return null; // Unknown scheme, ignore
+
+ const host = uri.host orelse return error.HttpProxyMissingHost;
+
+ const authorization: ?[]const u8 = if (uri.user != null or uri.password != null) a: {
+ const authorization = try arena.alloc(u8, basic_authorization.valueLengthFromUri(uri));
+ assert(basic_authorization.value(uri, authorization).len == authorization.len);
+ break :a authorization;
+ } else null;
+
+ const proxy = try arena.create(Proxy);
+ proxy.* = .{
+ .protocol = protocol,
+ .host = host,
+ .authorization = authorization,
+ .port = uri.port orelse switch (protocol) {
+ .plain => 80,
+ .tls => 443,
+ },
+ .supports_connect = true,
+ };
+ return proxy;
}
pub const basic_authorization = struct {
@@ -1213,8 +1209,8 @@ pub const basic_authorization = struct {
}
pub fn value(uri: Uri, out: []u8) []u8 {
- std.debug.assert(uri.user == null or uri.user.?.len <= max_user_len);
- std.debug.assert(uri.password == null or uri.password.?.len <= max_password_len);
+ assert(uri.user == null or uri.user.?.len <= max_user_len);
+ assert(uri.password == null or uri.password.?.len <= max_password_len);
@memcpy(out[0..prefix.len], prefix);
@@ -1288,14 +1284,12 @@ pub fn connectTcp(client: *Client, host: []const u8, port: u16, protocol: Connec
return &conn.data;
}
-pub const ConnectUnixError = Allocator.Error || std.os.SocketError || error{ NameTooLong, Unsupported } || std.os.ConnectError;
+pub const ConnectUnixError = Allocator.Error || std.os.SocketError || error{NameTooLong} || std.os.ConnectError;
/// Connect to `path` as a unix domain socket. This will reuse a connection if one is already open.
///
/// This function is threadsafe.
pub fn connectUnix(client: *Client, path: []const u8) ConnectUnixError!*Connection {
- if (!net.has_unix_sockets) return error.Unsupported;
-
if (client.connection_pool.findConnection(.{
.host = path,
.port = 0,
@@ -1325,7 +1319,8 @@ pub fn connectUnix(client: *Client, path: []const u8) ConnectUnixError!*Connecti
return &conn.data;
}
-/// Connect to `tunnel_host:tunnel_port` using the specified proxy with HTTP CONNECT. This will reuse a connection if one is already open.
+/// Connect to `tunnel_host:tunnel_port` using the specified proxy with HTTP
+/// CONNECT. This will reuse a connection if one is already open.
///
/// This function is threadsafe.
pub fn connectTunnel(
@@ -1351,7 +1346,7 @@ pub fn connectTunnel(
client.connection_pool.release(client.allocator, conn);
}
- const uri = Uri{
+ const uri: Uri = .{
.scheme = "http",
.user = null,
.password = null,
@@ -1362,13 +1357,11 @@ pub fn connectTunnel(
.fragment = null,
};
- // we can use a small buffer here because a CONNECT response should be very small
var buffer: [8096]u8 = undefined;
-
- var req = client.open(.CONNECT, uri, proxy.headers, .{
- .handle_redirects = false,
+ var req = client.open(.CONNECT, uri, .{
+ .redirect_behavior = .unhandled,
.connection = conn,
- .header_strategy = .{ .static = &buffer },
+ .server_header_buffer = &buffer,
}) catch |err| {
std.log.debug("err {}", .{err});
break :tunnel err;
@@ -1407,45 +1400,51 @@ pub fn connectTunnel(
const ConnectErrorPartial = ConnectTcpError || error{ UnsupportedUrlScheme, ConnectionRefused };
pub const ConnectError = ConnectErrorPartial || RequestError;
-/// Connect to `host:port` using the specified protocol. This will reuse a connection if one is already open.
-/// If a proxy is configured for the client, then the proxy will be used to connect to the host.
+/// Connect to `host:port` using the specified protocol. This will reuse a
+/// connection if one is already open.
+/// If a proxy is configured for the client, then the proxy will be used to
+/// connect to the host.
///
/// This function is threadsafe.
-pub fn connect(client: *Client, host: []const u8, port: u16, protocol: Connection.Protocol) ConnectError!*Connection {
- // pointer required so that `supports_connect` can be updated if a CONNECT fails
- const potential_proxy: ?*Proxy = switch (protocol) {
- .plain => if (client.http_proxy) |*proxy_info| proxy_info else null,
- .tls => if (client.https_proxy) |*proxy_info| proxy_info else null,
- };
-
- if (potential_proxy) |proxy| {
- // don't attempt to proxy the proxy thru itself.
- if (std.mem.eql(u8, proxy.host, host) and proxy.port == port and proxy.protocol == protocol) {
- return client.connectTcp(host, port, protocol);
- }
-
- if (proxy.supports_connect) tunnel: {
- return connectTunnel(client, proxy, host, port) catch |err| switch (err) {
- error.TunnelNotSupported => break :tunnel,
- else => |e| return e,
- };
- }
+pub fn connect(
+ client: *Client,
+ host: []const u8,
+ port: u16,
+ protocol: Connection.Protocol,
+) ConnectError!*Connection {
+ const proxy = switch (protocol) {
+ .plain => client.http_proxy,
+ .tls => client.https_proxy,
+ } orelse return client.connectTcp(host, port, protocol);
+
+ // Prevent proxying through itself.
+ if (std.ascii.eqlIgnoreCase(proxy.host, host) and
+ proxy.port == port and proxy.protocol == protocol)
+ {
+ return client.connectTcp(host, port, protocol);
+ }
- // fall back to using the proxy as a normal http proxy
- const conn = try client.connectTcp(proxy.host, proxy.port, proxy.protocol);
- errdefer {
- conn.closing = true;
- client.connection_pool.release(conn);
- }
+ if (proxy.supports_connect) tunnel: {
+ return connectTunnel(client, proxy, host, port) catch |err| switch (err) {
+ error.TunnelNotSupported => break :tunnel,
+ else => |e| return e,
+ };
+ }
- conn.proxied = true;
- return conn;
+ // fall back to using the proxy as a normal http proxy
+ const conn = try client.connectTcp(proxy.host, proxy.port, proxy.protocol);
+ errdefer {
+ conn.closing = true;
+ client.connection_pool.release(conn);
}
- return client.connectTcp(host, port, protocol);
+ conn.proxied = true;
+ return conn;
}
-pub const RequestError = ConnectTcpError || ConnectErrorPartial || Request.SendError || std.fmt.ParseIntError || Connection.WriteError || error{
+pub const RequestError = ConnectTcpError || ConnectErrorPartial || Request.SendError ||
+ std.fmt.ParseIntError || Connection.WriteError ||
+ error{ // TODO: file a zig fmt issue for this bad indentation
UnsupportedUrlScheme,
UriMissingHost,
@@ -1456,36 +1455,44 @@ pub const RequestError = ConnectTcpError || ConnectErrorPartial || Request.SendE
pub const RequestOptions = struct {
version: http.Version = .@"HTTP/1.1",
- /// Automatically ignore 100 Continue responses. This assumes you don't care, and will have sent the body before you
- /// wait for the response.
+ /// Automatically ignore 100 Continue responses. This assumes you don't
+ /// care, and will have sent the body before you wait for the response.
///
- /// If this is not the case AND you know the server will send a 100 Continue, set this to false and wait for a
- /// response before sending the body. If you wait AND the server does not send a 100 Continue before you finish the
- /// request, then the request *will* deadlock.
+ /// If this is not the case AND you know the server will send a 100
+ /// Continue, set this to false and wait for a response before sending the
+ /// body. If you wait AND the server does not send a 100 Continue before
+ /// you finish the request, then the request *will* deadlock.
handle_continue: bool = true,
- /// Automatically follow redirects. This will only follow redirects for repeatable requests (ie. with no payload or the server has acknowledged the payload)
- handle_redirects: bool = true,
+ /// If false, close the connection after the one request. If true,
+ /// participate in the client connection pool.
+ keep_alive: bool = true,
+
+ /// This field specifies whether to automatically follow redirects, and if
+ /// so, how many redirects to follow before returning an error.
+ ///
+ /// This will only follow redirects for repeatable requests (ie. with no
+ /// payload or the server has acknowledged the payload).
+ redirect_behavior: Request.RedirectBehavior = @enumFromInt(3),
- /// How many redirects to follow before returning an error.
- max_redirects: u32 = 3,
- header_strategy: StorageStrategy = .{ .dynamic = 16 * 1024 },
+ /// Externally-owned memory used to store the server's entire HTTP header.
+ /// `error.HttpHeadersOversize` is returned from read() when a
+ /// client sends too many bytes of HTTP headers.
+ server_header_buffer: []u8,
/// Must be an already acquired connection.
connection: ?*Connection = null,
- pub const StorageStrategy = union(enum) {
- /// In this case, the client's Allocator will be used to store the
- /// entire HTTP header. This value is the maximum total size of
- /// HTTP headers allowed, otherwise
- /// error.HttpHeadersExceededSizeLimit is returned from read().
- dynamic: usize,
- /// This is used to store the entire HTTP header. If the HTTP
- /// header is too big to fit, `error.HttpHeadersExceededSizeLimit`
- /// is returned from read(). When this is used, `error.OutOfMemory`
- /// cannot be returned from `read()`.
- static: []u8,
- };
+ /// Standard headers that have default, but overridable, behavior.
+ headers: Request.Headers = .{},
+ /// These headers are kept including when following a redirect to a
+ /// different domain.
+ /// Externally-owned; must outlive the Request.
+ extra_headers: []const http.Header = &.{},
+ /// These headers are stripped when following a redirect to a different
+ /// domain.
+ /// Externally-owned; must outlive the Request.
+ privileged_headers: []const http.Header = &.{},
};
pub const protocol_map = std.ComptimeStringMap(Connection.Protocol, .{
@@ -1498,11 +1505,31 @@ pub const protocol_map = std.ComptimeStringMap(Connection.Protocol, .{
/// Open a connection to the host specified by `uri` and prepare to send a HTTP request.
///
/// `uri` must remain alive during the entire request.
-/// `headers` is cloned and may be freed after this function returns.
///
/// The caller is responsible for calling `deinit()` on the `Request`.
/// This function is threadsafe.
-pub fn open(client: *Client, method: http.Method, uri: Uri, headers: http.Headers, options: RequestOptions) RequestError!Request {
+///
+/// Asserts that "\r\n" does not occur in any header name or value.
+pub fn open(
+ client: *Client,
+ method: http.Method,
+ uri: Uri,
+ options: RequestOptions,
+) RequestError!Request {
+ if (std.debug.runtime_safety) {
+ for (options.extra_headers) |header| {
+ assert(header.name.len != 0);
+ assert(std.mem.indexOfScalar(u8, header.name, ':') == null);
+ assert(std.mem.indexOfPosLinear(u8, header.name, 0, "\r\n") == null);
+ assert(std.mem.indexOfPosLinear(u8, header.value, 0, "\r\n") == null);
+ }
+ for (options.privileged_headers) |header| {
+ assert(header.name.len != 0);
+ assert(std.mem.indexOfPosLinear(u8, header.name, 0, "\r\n") == null);
+ assert(std.mem.indexOfPosLinear(u8, header.value, 0, "\r\n") == null);
+ }
+ }
+
const protocol = protocol_map.get(uri.scheme) orelse return error.UnsupportedUrlScheme;
const port: u16 = uri.port orelse switch (protocol) {
@@ -1530,163 +1557,131 @@ pub fn open(client: *Client, method: http.Method, uri: Uri, headers: http.Header
.uri = uri,
.client = client,
.connection = conn,
- .headers = try headers.clone(client.allocator), // Headers must be cloned to properly handle header transformations in redirects.
+ .keep_alive = options.keep_alive,
.method = method,
.version = options.version,
- .redirects_left = options.max_redirects,
- .handle_redirects = options.handle_redirects,
+ .transfer_encoding = .none,
+ .redirect_behavior = options.redirect_behavior,
.handle_continue = options.handle_continue,
.response = .{
.status = undefined,
.reason = undefined,
.version = undefined,
- .headers = http.Headers{ .allocator = client.allocator, .owned = false },
- .parser = switch (options.header_strategy) {
- .dynamic => |max| proto.HeadersParser.initDynamic(max),
- .static => |buf| proto.HeadersParser.initStatic(buf),
- },
+ .parser = proto.HeadersParser.init(options.server_header_buffer),
},
- .arena = undefined,
+ .headers = options.headers,
+ .extra_headers = options.extra_headers,
+ .privileged_headers = options.privileged_headers,
};
errdefer req.deinit();
- req.arena = std.heap.ArenaAllocator.init(client.allocator);
-
return req;
}
pub const FetchOptions = struct {
+ server_header_buffer: ?[]u8 = null,
+ redirect_behavior: ?Request.RedirectBehavior = null,
+
+ /// If the server sends a body, it will be appended to this ArrayList.
+ /// `max_append_size` provides an upper limit for how much they can grow.
+ response_storage: ResponseStorage = .ignore,
+ max_append_size: ?usize = null,
+
+ location: Location,
+ method: ?http.Method = null,
+ payload: ?[]const u8 = null,
+ raw_uri: bool = false,
+ keep_alive: bool = true,
+
+ /// Standard headers that have default, but overridable, behavior.
+ headers: Request.Headers = .{},
+ /// These headers are kept including when following a redirect to a
+ /// different domain.
+ /// Externally-owned; must outlive the Request.
+ extra_headers: []const http.Header = &.{},
+ /// These headers are stripped when following a redirect to a different
+ /// domain.
+ /// Externally-owned; must outlive the Request.
+ privileged_headers: []const http.Header = &.{},
+
pub const Location = union(enum) {
url: []const u8,
uri: Uri,
};
- pub const Payload = union(enum) {
- string: []const u8,
- file: std.fs.File,
- none,
+ pub const ResponseStorage = union(enum) {
+ ignore,
+ /// Only the existing capacity will be used.
+ static: *std.ArrayListUnmanaged(u8),
+ dynamic: *std.ArrayList(u8),
};
-
- pub const ResponseStrategy = union(enum) {
- storage: RequestOptions.StorageStrategy,
- file: std.fs.File,
- none,
- };
-
- header_strategy: RequestOptions.StorageStrategy = .{ .dynamic = 16 * 1024 },
- response_strategy: ResponseStrategy = .{ .storage = .{ .dynamic = 16 * 1024 * 1024 } },
-
- location: Location,
- method: http.Method = .GET,
- headers: http.Headers = http.Headers{ .allocator = std.heap.page_allocator, .owned = false },
- payload: Payload = .none,
- raw_uri: bool = false,
};
pub const FetchResult = struct {
status: http.Status,
- body: ?[]const u8 = null,
- headers: http.Headers,
-
- allocator: Allocator,
- options: FetchOptions,
-
- pub fn deinit(res: *FetchResult) void {
- if (res.options.response_strategy == .storage and res.options.response_strategy.storage == .dynamic) {
- if (res.body) |body| res.allocator.free(body);
- }
-
- res.headers.deinit();
- }
};
/// Perform a one-shot HTTP request with the provided options.
///
/// This function is threadsafe.
-pub fn fetch(client: *Client, allocator: Allocator, options: FetchOptions) !FetchResult {
- const has_transfer_encoding = options.headers.contains("transfer-encoding");
- const has_content_length = options.headers.contains("content-length");
-
- if (has_content_length or has_transfer_encoding) return error.UnsupportedHeader;
-
+pub fn fetch(client: *Client, options: FetchOptions) !FetchResult {
const uri = switch (options.location) {
.url => |u| try Uri.parse(u),
.uri => |u| u,
};
-
- var req = try open(client, options.method, uri, options.headers, .{
- .header_strategy = options.header_strategy,
- .handle_redirects = options.payload == .none,
+ var server_header_buffer: [16 * 1024]u8 = undefined;
+
+ const method: http.Method = options.method orelse
+ if (options.payload != null) .POST else .GET;
+
+ var req = try open(client, method, uri, .{
+ .server_header_buffer = options.server_header_buffer orelse &server_header_buffer,
+ .redirect_behavior = options.redirect_behavior orelse
+ if (options.payload == null) @enumFromInt(3) else .unhandled,
+ .headers = options.headers,
+ .extra_headers = options.extra_headers,
+ .privileged_headers = options.privileged_headers,
+ .keep_alive = options.keep_alive,
});
defer req.deinit();
- { // Block to maintain lock of file to attempt to prevent a race condition where another process modifies the file while we are reading it.
- // This relies on other processes actually obeying the advisory lock, which is not guaranteed.
- if (options.payload == .file) try options.payload.file.lock(.shared);
- defer if (options.payload == .file) options.payload.file.unlock();
+ if (options.payload) |payload| req.transfer_encoding = .{ .content_length = payload.len };
- switch (options.payload) {
- .string => |str| req.transfer_encoding = .{ .content_length = str.len },
- .file => |file| req.transfer_encoding = .{ .content_length = (try file.stat()).size },
- .none => {},
- }
-
- try req.send(.{ .raw_uri = options.raw_uri });
+ try req.send(.{ .raw_uri = options.raw_uri });
- switch (options.payload) {
- .string => |str| try req.writeAll(str),
- .file => |file| {
- try file.seekTo(0);
- var fifo = std.fifo.LinearFifo(u8, .{ .Static = 8192 }).init();
- try fifo.pump(file.reader(), req.writer());
- },
- .none => {},
- }
-
- try req.finish();
- }
+ if (options.payload) |payload| try req.writeAll(payload);
+ try req.finish();
try req.wait();
- var res = FetchResult{
- .status = req.response.status,
- .headers = try req.response.headers.clone(allocator),
-
- .allocator = allocator,
- .options = options,
- };
-
- switch (options.response_strategy) {
- .storage => |storage| switch (storage) {
- .dynamic => |max| res.body = try req.reader().readAllAlloc(allocator, max),
- .static => |buf| res.body = buf[0..try req.reader().readAll(buf)],
+ switch (options.response_storage) {
+ .ignore => {
+ // Take advantage of request internals to discard the response body
+ // and make the connection available for another request.
+ req.response.skip = true;
+ assert(try req.transferRead(&.{}) == 0); // No buffer is necessary when skipping.
},
- .file => |file| {
- var fifo = std.fifo.LinearFifo(u8, .{ .Static = 8192 }).init();
- try fifo.pump(req.reader(), file.writer());
+ .dynamic => |list| {
+ const max_append_size = options.max_append_size orelse 2 * 1024 * 1024;
+ try req.reader().readAllArrayList(list, max_append_size);
},
- .none => { // Take advantage of request internals to discard the response body and make the connection available for another request.
- req.response.skip = true;
-
- const empty = @as([*]u8, undefined)[0..0];
- assert(try req.transferRead(empty) == 0); // we're skipping, no buffer is necessary
+ .static => |list| {
+ const buf = b: {
+ const buf = list.unusedCapacitySlice();
+ if (options.max_append_size) |len| {
+ if (len < buf.len) break :b buf[0..len];
+ }
+ break :b buf;
+ };
+ list.items.len += try req.reader().readAll(buf);
},
}
- return res;
+ return .{
+ .status = req.response.status,
+ };
}
test {
- const native_endian = comptime builtin.cpu.arch.endian();
- if (builtin.zig_backend == .stage2_llvm and native_endian == .big) {
- // https://github.com/ziglang/zig/issues/13782
- return error.SkipZigTest;
- }
-
- if (builtin.os.tag == .wasi) return error.SkipZigTest;
-
- if (builtin.zig_backend == .stage2_x86_64 and
- !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .avx)) return error.SkipZigTest;
-
- std.testing.refAllDecls(@This());
+ _ = &initDefaultProxies;
}
diff --git a/lib/std/http/HeadParser.zig b/lib/std/http/HeadParser.zig
new file mode 100644
index 0000000000..bb49faa14b
--- /dev/null
+++ b/lib/std/http/HeadParser.zig
@@ -0,0 +1,371 @@
+//! Finds the end of an HTTP head in a stream.
+
+state: State = .start,
+
+pub const State = enum {
+ start,
+ seen_n,
+ seen_r,
+ seen_rn,
+ seen_rnr,
+ finished,
+};
+
+/// Returns the number of bytes consumed by headers. This is always less
+/// than or equal to `bytes.len`.
+///
+/// If the amount returned is less than `bytes.len`, the parser is in a
+/// content state and the first byte of content is located at
+/// `bytes[result]`.
+pub fn feed(p: *HeadParser, bytes: []const u8) usize {
+ const vector_len: comptime_int = @max(std.simd.suggestVectorLength(u8) orelse 1, 8);
+ var index: usize = 0;
+
+ while (true) {
+ switch (p.state) {
+ .finished => return index,
+ .start => switch (bytes.len - index) {
+ 0 => return index,
+ 1 => {
+ switch (bytes[index]) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ return index + 1;
+ },
+ 2 => {
+ const b16 = int16(bytes[index..][0..2]);
+ const b8 = intShift(u8, b16);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ return index + 2;
+ },
+ 3 => {
+ const b24 = int24(bytes[index..][0..3]);
+ const b16 = intShift(u16, b24);
+ const b8 = intShift(u8, b24);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ switch (b24) {
+ int24("\r\n\r") => p.state = .seen_rnr,
+ else => {},
+ }
+
+ return index + 3;
+ },
+ 4...vector_len - 1 => {
+ const b32 = int32(bytes[index..][0..4]);
+ const b24 = intShift(u24, b32);
+ const b16 = intShift(u16, b32);
+ const b8 = intShift(u8, b32);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ switch (b24) {
+ int24("\r\n\r") => p.state = .seen_rnr,
+ else => {},
+ }
+
+ switch (b32) {
+ int32("\r\n\r\n") => p.state = .finished,
+ else => {},
+ }
+
+ index += 4;
+ continue;
+ },
+ else => {
+ const chunk = bytes[index..][0..vector_len];
+ const matches = if (use_vectors) matches: {
+ const Vector = @Vector(vector_len, u8);
+ // const BoolVector = @Vector(vector_len, bool);
+ const BitVector = @Vector(vector_len, u1);
+ const SizeVector = @Vector(vector_len, u8);
+
+ const v: Vector = chunk.*;
+ const matches_r: BitVector = @bitCast(v == @as(Vector, @splat('\r')));
+ const matches_n: BitVector = @bitCast(v == @as(Vector, @splat('\n')));
+ const matches_or: SizeVector = matches_r | matches_n;
+
+ break :matches @reduce(.Add, matches_or);
+ } else matches: {
+ var matches: u8 = 0;
+ for (chunk) |byte| switch (byte) {
+ '\r', '\n' => matches += 1,
+ else => {},
+ };
+ break :matches matches;
+ };
+ switch (matches) {
+ 0 => {},
+ 1 => switch (chunk[vector_len - 1]) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ },
+ 2 => {
+ const b16 = int16(chunk[vector_len - 2 ..][0..2]);
+ const b8 = intShift(u8, b16);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+ },
+ 3 => {
+ const b24 = int24(chunk[vector_len - 3 ..][0..3]);
+ const b16 = intShift(u16, b24);
+ const b8 = intShift(u8, b24);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ switch (b24) {
+ int24("\r\n\r") => p.state = .seen_rnr,
+ else => {},
+ }
+ },
+ 4...vector_len => {
+ inline for (0..vector_len - 3) |i_usize| {
+ const i = @as(u32, @truncate(i_usize));
+
+ const b32 = int32(chunk[i..][0..4]);
+ const b16 = intShift(u16, b32);
+
+ if (b32 == int32("\r\n\r\n")) {
+ p.state = .finished;
+ return index + i + 4;
+ } else if (b16 == int16("\n\n")) {
+ p.state = .finished;
+ return index + i + 2;
+ }
+ }
+
+ const b24 = int24(chunk[vector_len - 3 ..][0..3]);
+ const b16 = intShift(u16, b24);
+ const b8 = intShift(u8, b24);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => {},
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ switch (b24) {
+ int24("\r\n\r") => p.state = .seen_rnr,
+ else => {},
+ }
+ },
+ else => unreachable,
+ }
+
+ index += vector_len;
+ continue;
+ },
+ },
+ .seen_n => switch (bytes.len - index) {
+ 0 => return index,
+ else => {
+ switch (bytes[index]) {
+ '\n' => p.state = .finished,
+ else => p.state = .start,
+ }
+
+ index += 1;
+ continue;
+ },
+ },
+ .seen_r => switch (bytes.len - index) {
+ 0 => return index,
+ 1 => {
+ switch (bytes[index]) {
+ '\n' => p.state = .seen_rn,
+ '\r' => p.state = .seen_r,
+ else => p.state = .start,
+ }
+
+ return index + 1;
+ },
+ 2 => {
+ const b16 = int16(bytes[index..][0..2]);
+ const b8 = intShift(u8, b16);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_rn,
+ else => p.state = .start,
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\r") => p.state = .seen_rnr,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ return index + 2;
+ },
+ else => {
+ const b24 = int24(bytes[index..][0..3]);
+ const b16 = intShift(u16, b24);
+ const b8 = intShift(u8, b24);
+
+ switch (b8) {
+ '\r' => p.state = .seen_r,
+ '\n' => p.state = .seen_n,
+ else => p.state = .start,
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .seen_rn,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ switch (b24) {
+ int24("\n\r\n") => p.state = .finished,
+ else => {},
+ }
+
+ index += 3;
+ continue;
+ },
+ },
+ .seen_rn => switch (bytes.len - index) {
+ 0 => return index,
+ 1 => {
+ switch (bytes[index]) {
+ '\r' => p.state = .seen_rnr,
+ '\n' => p.state = .seen_n,
+ else => p.state = .start,
+ }
+
+ return index + 1;
+ },
+ else => {
+ const b16 = int16(bytes[index..][0..2]);
+ const b8 = intShift(u8, b16);
+
+ switch (b8) {
+ '\r' => p.state = .seen_rnr,
+ '\n' => p.state = .seen_n,
+ else => p.state = .start,
+ }
+
+ switch (b16) {
+ int16("\r\n") => p.state = .finished,
+ int16("\n\n") => p.state = .finished,
+ else => {},
+ }
+
+ index += 2;
+ continue;
+ },
+ },
+ .seen_rnr => switch (bytes.len - index) {
+ 0 => return index,
+ else => {
+ switch (bytes[index]) {
+ '\n' => p.state = .finished,
+ else => p.state = .start,
+ }
+
+ index += 1;
+ continue;
+ },
+ },
+ }
+
+ return index;
+ }
+}
+
+inline fn int16(array: *const [2]u8) u16 {
+ return @bitCast(array.*);
+}
+
+inline fn int24(array: *const [3]u8) u24 {
+ return @bitCast(array.*);
+}
+
+inline fn int32(array: *const [4]u8) u32 {
+ return @bitCast(array.*);
+}
+
+inline fn intShift(comptime T: type, x: anytype) T {
+ switch (@import("builtin").cpu.arch.endian()) {
+ .little => return @truncate(x >> (@bitSizeOf(@TypeOf(x)) - @bitSizeOf(T))),
+ .big => return @truncate(x),
+ }
+}
+
+const HeadParser = @This();
+const std = @import("std");
+const use_vectors = builtin.zig_backend != .stage2_x86_64;
+const builtin = @import("builtin");
+
+test feed {
+ const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\nHello";
+
+ for (0..36) |i| {
+ var p: HeadParser = .{};
+ try std.testing.expectEqual(i, p.feed(data[0..i]));
+ try std.testing.expectEqual(35 - i, p.feed(data[i..]));
+ }
+}
diff --git a/lib/std/http/HeaderIterator.zig b/lib/std/http/HeaderIterator.zig
new file mode 100644
index 0000000000..515058859d
--- /dev/null
+++ b/lib/std/http/HeaderIterator.zig
@@ -0,0 +1,68 @@
+bytes: []const u8,
+index: usize,
+is_trailer: bool,
+
+pub fn init(bytes: []const u8) HeaderIterator {
+ return .{
+ .bytes = bytes,
+ .index = std.mem.indexOfPosLinear(u8, bytes, 0, "\r\n").? + 2,
+ .is_trailer = false,
+ };
+}
+
+pub fn next(it: *HeaderIterator) ?std.http.Header {
+ const end = std.mem.indexOfPosLinear(u8, it.bytes, it.index, "\r\n").?;
+ var kv_it = std.mem.splitSequence(u8, it.bytes[it.index..end], ": ");
+ const name = kv_it.next().?;
+ const value = kv_it.rest();
+ if (name.len == 0 and value.len == 0) {
+ if (it.is_trailer) return null;
+ const next_end = std.mem.indexOfPosLinear(u8, it.bytes, end + 2, "\r\n") orelse
+ return null;
+ it.is_trailer = true;
+ it.index = next_end + 2;
+ kv_it = std.mem.splitSequence(u8, it.bytes[end + 2 .. next_end], ": ");
+ return .{
+ .name = kv_it.next().?,
+ .value = kv_it.rest(),
+ };
+ }
+ it.index = end + 2;
+ return .{
+ .name = name,
+ .value = value,
+ };
+}
+
+test next {
+ var it = HeaderIterator.init("200 OK\r\na: b\r\nc: \r\nd: e\r\n\r\nf: g\r\n\r\n");
+ try std.testing.expect(!it.is_trailer);
+ {
+ const header = it.next().?;
+ try std.testing.expect(!it.is_trailer);
+ try std.testing.expectEqualStrings("a", header.name);
+ try std.testing.expectEqualStrings("b", header.value);
+ }
+ {
+ const header = it.next().?;
+ try std.testing.expect(!it.is_trailer);
+ try std.testing.expectEqualStrings("c", header.name);
+ try std.testing.expectEqualStrings("", header.value);
+ }
+ {
+ const header = it.next().?;
+ try std.testing.expect(!it.is_trailer);
+ try std.testing.expectEqualStrings("d", header.name);
+ try std.testing.expectEqualStrings("e", header.value);
+ }
+ {
+ const header = it.next().?;
+ try std.testing.expect(it.is_trailer);
+ try std.testing.expectEqualStrings("f", header.name);
+ try std.testing.expectEqualStrings("g", header.value);
+ }
+ try std.testing.expectEqual(null, it.next());
+}
+
+const HeaderIterator = @This();
+const std = @import("../std.zig");
diff --git a/lib/std/http/Headers.zig b/lib/std/http/Headers.zig
deleted file mode 100644
index 653ec05126..0000000000
--- a/lib/std/http/Headers.zig
+++ /dev/null
@@ -1,527 +0,0 @@
-const std = @import("../std.zig");
-
-const Allocator = std.mem.Allocator;
-
-const testing = std.testing;
-const ascii = std.ascii;
-const assert = std.debug.assert;
-
-pub const HeaderList = std.ArrayListUnmanaged(Field);
-pub const HeaderIndexList = std.ArrayListUnmanaged(usize);
-pub const HeaderIndex = std.HashMapUnmanaged([]const u8, HeaderIndexList, CaseInsensitiveStringContext, std.hash_map.default_max_load_percentage);
-
-pub const CaseInsensitiveStringContext = struct {
- pub fn hash(self: @This(), s: []const u8) u64 {
- _ = self;
- var buf: [64]u8 = undefined;
- var i: usize = 0;
-
- var h = std.hash.Wyhash.init(0);
- while (i + 64 < s.len) : (i += 64) {
- const ret = ascii.lowerString(buf[0..], s[i..][0..64]);
- h.update(ret);
- }
-
- const left = @min(64, s.len - i);
- const ret = ascii.lowerString(buf[0..], s[i..][0..left]);
- h.update(ret);
-
- return h.final();
- }
-
- pub fn eql(self: @This(), a: []const u8, b: []const u8) bool {
- _ = self;
- return ascii.eqlIgnoreCase(a, b);
- }
-};
-
-/// A single HTTP header field.
-pub const Field = struct {
- name: []const u8,
- value: []const u8,
-
- fn lessThan(ctx: void, a: Field, b: Field) bool {
- _ = ctx;
- if (a.name.ptr == b.name.ptr) return false;
-
- return ascii.lessThanIgnoreCase(a.name, b.name);
- }
-};
-
-/// A list of HTTP header fields.
-pub const Headers = struct {
- allocator: Allocator,
- list: HeaderList = .{},
- index: HeaderIndex = .{},
-
- /// When this is false, names and values will not be duplicated.
- /// Use with caution.
- owned: bool = true,
-
- /// Initialize an empty list of headers.
- pub fn init(allocator: Allocator) Headers {
- return .{ .allocator = allocator };
- }
-
- /// Initialize a pre-populated list of headers from a list of fields.
- pub fn initList(allocator: Allocator, list: []const Field) !Headers {
- var new = Headers.init(allocator);
-
- try new.list.ensureTotalCapacity(allocator, list.len);
- try new.index.ensureTotalCapacity(allocator, @intCast(list.len));
- for (list) |field| {
- try new.append(field.name, field.value);
- }
-
- return new;
- }
-
- /// Deallocate all memory associated with the headers.
- ///
- /// If the `owned` field is false, this will not free the names and values of the headers.
- pub fn deinit(headers: *Headers) void {
- headers.deallocateIndexListsAndFields();
- headers.index.deinit(headers.allocator);
- headers.list.deinit(headers.allocator);
-
- headers.* = undefined;
- }
-
- /// Appends a header to the list.
- ///
- /// If the `owned` field is true, both name and value will be copied.
- pub fn append(headers: *Headers, name: []const u8, value: []const u8) !void {
- try headers.appendOwned(.{ .unowned = name }, .{ .unowned = value });
- }
-
- pub const OwnedString = union(enum) {
- /// A string allocated by the `allocator` field.
- owned: []u8,
- /// A string to be copied by the `allocator` field.
- unowned: []const u8,
- };
-
- /// Appends a header to the list.
- ///
- /// If the `owned` field is true, `name` and `value` will be copied if unowned.
- pub fn appendOwned(headers: *Headers, name: OwnedString, value: OwnedString) !void {
- const n = headers.list.items.len;
- try headers.list.ensureUnusedCapacity(headers.allocator, 1);
-
- const owned_value = switch (value) {
- .owned => |owned| owned,
- .unowned => |unowned| if (headers.owned)
- try headers.allocator.dupe(u8, unowned)
- else
- unowned,
- };
- errdefer if (value == .unowned and headers.owned) headers.allocator.free(owned_value);
-
- var entry = Field{ .name = undefined, .value = owned_value };
-
- if (headers.index.getEntry(switch (name) {
- inline else => |string| string,
- })) |kv| {
- defer switch (name) {
- .owned => |owned| headers.allocator.free(owned),
- .unowned => {},
- };
-
- entry.name = kv.key_ptr.*;
- try kv.value_ptr.append(headers.allocator, n);
- } else {
- const owned_name = switch (name) {
- .owned => |owned| owned,
- .unowned => |unowned| if (headers.owned)
- try std.ascii.allocLowerString(headers.allocator, unowned)
- else
- unowned,
- };
- errdefer if (name == .unowned and headers.owned) headers.allocator.free(owned_name);
-
- entry.name = owned_name;
-
- var new_index = try HeaderIndexList.initCapacity(headers.allocator, 1);
- errdefer new_index.deinit(headers.allocator);
-
- new_index.appendAssumeCapacity(n);
- try headers.index.put(headers.allocator, owned_name, new_index);
- }
-
- headers.list.appendAssumeCapacity(entry);
- }
-
- /// Returns true if this list of headers contains the given name.
- pub fn contains(headers: Headers, name: []const u8) bool {
- return headers.index.contains(name);
- }
-
- /// Removes all headers with the given name.
- pub fn delete(headers: *Headers, name: []const u8) bool {
- if (headers.index.fetchRemove(name)) |kv| {
- var index = kv.value;
-
- // iterate backwards
- var i = index.items.len;
- while (i > 0) {
- i -= 1;
- const data_index = index.items[i];
- const removed = headers.list.orderedRemove(data_index);
-
- assert(ascii.eqlIgnoreCase(removed.name, name)); // ensure the index hasn't been corrupted
- if (headers.owned) headers.allocator.free(removed.value);
- }
-
- if (headers.owned) headers.allocator.free(kv.key);
- index.deinit(headers.allocator);
- headers.rebuildIndex();
-
- return true;
- } else {
- return false;
- }
- }
-
- /// Returns the index of the first occurrence of a header with the given name.
- pub fn firstIndexOf(headers: Headers, name: []const u8) ?usize {
- const index = headers.index.get(name) orelse return null;
-
- return index.items[0];
- }
-
- /// Returns a list of indices containing headers with the given name.
- pub fn getIndices(headers: Headers, name: []const u8) ?[]const usize {
- const index = headers.index.get(name) orelse return null;
-
- return index.items;
- }
-
- /// Returns the entry of the first occurrence of a header with the given name.
- pub fn getFirstEntry(headers: Headers, name: []const u8) ?Field {
- const first_index = headers.firstIndexOf(name) orelse return null;
-
- return headers.list.items[first_index];
- }
-
- /// Returns a slice containing each header with the given name.
- /// The caller owns the returned slice, but NOT the values in the slice.
- pub fn getEntries(headers: Headers, allocator: Allocator, name: []const u8) !?[]const Field {
- const indices = headers.getIndices(name) orelse return null;
-
- const buf = try allocator.alloc(Field, indices.len);
- for (indices, 0..) |idx, n| {
- buf[n] = headers.list.items[idx];
- }
-
- return buf;
- }
-
- /// Returns the value in the entry of the first occurrence of a header with the given name.
- pub fn getFirstValue(headers: Headers, name: []const u8) ?[]const u8 {
- const first_index = headers.firstIndexOf(name) orelse return null;
-
- return headers.list.items[first_index].value;
- }
-
- /// Returns a slice containing the value of each header with the given name.
- /// The caller owns the returned slice, but NOT the values in the slice.
- pub fn getValues(headers: Headers, allocator: Allocator, name: []const u8) !?[]const []const u8 {
- const indices = headers.getIndices(name) orelse return null;
-
- const buf = try allocator.alloc([]const u8, indices.len);
- for (indices, 0..) |idx, n| {
- buf[n] = headers.list.items[idx].value;
- }
-
- return buf;
- }
-
- fn rebuildIndex(headers: *Headers) void {
- // clear out the indexes
- var it = headers.index.iterator();
- while (it.next()) |entry| {
- entry.value_ptr.shrinkRetainingCapacity(0);
- }
-
- // fill up indexes again; we know capacity is fine from before
- for (headers.list.items, 0..) |entry, i| {
- headers.index.getEntry(entry.name).?.value_ptr.appendAssumeCapacity(i);
- }
- }
-
- /// Sorts the headers in lexicographical order.
- pub fn sort(headers: *Headers) void {
- std.mem.sort(Field, headers.list.items, {}, Field.lessThan);
- headers.rebuildIndex();
- }
-
- /// Writes the headers to the given stream.
- pub fn format(
- headers: Headers,
- comptime fmt: []const u8,
- options: std.fmt.FormatOptions,
- out_stream: anytype,
- ) !void {
- _ = fmt;
- _ = options;
-
- for (headers.list.items) |entry| {
- if (entry.value.len == 0) continue;
-
- try out_stream.writeAll(entry.name);
- try out_stream.writeAll(": ");
- try out_stream.writeAll(entry.value);
- try out_stream.writeAll("\r\n");
- }
- }
-
- /// Writes all of the headers with the given name to the given stream, separated by commas.
- ///
- /// This is useful for headers like `Set-Cookie` which can have multiple values. RFC 9110, Section 5.2
- pub fn formatCommaSeparated(
- headers: Headers,
- name: []const u8,
- out_stream: anytype,
- ) !void {
- const indices = headers.getIndices(name) orelse return;
-
- try out_stream.writeAll(name);
- try out_stream.writeAll(": ");
-
- for (indices, 0..) |idx, n| {
- if (n != 0) try out_stream.writeAll(", ");
- try out_stream.writeAll(headers.list.items[idx].value);
- }
-
- try out_stream.writeAll("\r\n");
- }
-
- /// Frees all `HeaderIndexList`s within `index`.
- /// Frees names and values of all fields if they are owned.
- fn deallocateIndexListsAndFields(headers: *Headers) void {
- var it = headers.index.iterator();
- while (it.next()) |entry| {
- entry.value_ptr.deinit(headers.allocator);
-
- if (headers.owned) headers.allocator.free(entry.key_ptr.*);
- }
-
- if (headers.owned) {
- for (headers.list.items) |entry| {
- headers.allocator.free(entry.value);
- }
- }
- }
-
- /// Clears and frees the underlying data structures.
- /// Frees names and values if they are owned.
- pub fn clearAndFree(headers: *Headers) void {
- headers.deallocateIndexListsAndFields();
- headers.index.clearAndFree(headers.allocator);
- headers.list.clearAndFree(headers.allocator);
- }
-
- /// Clears the underlying data structures while retaining their capacities.
- /// Frees names and values if they are owned.
- pub fn clearRetainingCapacity(headers: *Headers) void {
- headers.deallocateIndexListsAndFields();
- headers.index.clearRetainingCapacity();
- headers.list.clearRetainingCapacity();
- }
-
- /// Creates a copy of the headers using the provided allocator.
- pub fn clone(headers: Headers, allocator: Allocator) !Headers {
- var new = Headers.init(allocator);
-
- try new.list.ensureTotalCapacity(allocator, headers.list.capacity);
- try new.index.ensureTotalCapacity(allocator, headers.index.capacity());
- for (headers.list.items) |field| {
- try new.append(field.name, field.value);
- }
-
- return new;
- }
-};
-
-test "Headers.append" {
- var h = Headers{ .allocator = std.testing.allocator };
- defer h.deinit();
-
- try h.append("foo", "bar");
- try h.append("hello", "world");
-
- try testing.expect(h.contains("Foo"));
- try testing.expect(!h.contains("Bar"));
-}
-
-test "Headers.delete" {
- var h = Headers{ .allocator = std.testing.allocator };
- defer h.deinit();
-
- try h.append("foo", "bar");
- try h.append("hello", "world");
-
- try testing.expect(h.contains("Foo"));
-
- _ = h.delete("Foo");
-
- try testing.expect(!h.contains("foo"));
-}
-
-test "Headers consistency" {
- var h = Headers{ .allocator = std.testing.allocator };
- defer h.deinit();
-
- try h.append("foo", "bar");
- try h.append("hello", "world");
- _ = h.delete("Foo");
-
- try h.append("foo", "bar");
- try h.append("bar", "world");
- try h.append("foo", "baz");
- try h.append("baz", "hello");
-
- try testing.expectEqual(@as(?usize, 0), h.firstIndexOf("hello"));
- try testing.expectEqual(@as(?usize, 1), h.firstIndexOf("foo"));
- try testing.expectEqual(@as(?usize, 2), h.firstIndexOf("bar"));
- try testing.expectEqual(@as(?usize, 4), h.firstIndexOf("baz"));
- try testing.expectEqual(@as(?usize, null), h.firstIndexOf("pog"));
-
- try testing.expectEqualSlices(usize, &[_]usize{0}, h.getIndices("hello").?);
- try testing.expectEqualSlices(usize, &[_]usize{ 1, 3 }, h.getIndices("foo").?);
- try testing.expectEqualSlices(usize, &[_]usize{2}, h.getIndices("bar").?);
- try testing.expectEqualSlices(usize, &[_]usize{4}, h.getIndices("baz").?);
- try testing.expectEqual(@as(?[]const usize, null), h.getIndices("pog"));
-
- try testing.expectEqualStrings("world", h.getFirstEntry("hello").?.value);
- try testing.expectEqualStrings("bar", h.getFirstEntry("foo").?.value);
- try testing.expectEqualStrings("world", h.getFirstEntry("bar").?.value);
- try testing.expectEqualStrings("hello", h.getFirstEntry("baz").?.value);
-
- const hello_entries = (try h.getEntries(testing.allocator, "hello")).?;
- defer testing.allocator.free(hello_entries);
- try testing.expectEqualDeep(@as([]const Field, &[_]Field{
- .{ .name = "hello", .value = "world" },
- }), hello_entries);
-
- const foo_entries = (try h.getEntries(testing.allocator, "foo")).?;
- defer testing.allocator.free(foo_entries);
- try testing.expectEqualDeep(@as([]const Field, &[_]Field{
- .{ .name = "foo", .value = "bar" },
- .{ .name = "foo", .value = "baz" },
- }), foo_entries);
-
- const bar_entries = (try h.getEntries(testing.allocator, "bar")).?;
- defer testing.allocator.free(bar_entries);
- try testing.expectEqualDeep(@as([]const Field, &[_]Field{
- .{ .name = "bar", .value = "world" },
- }), bar_entries);
-
- const baz_entries = (try h.getEntries(testing.allocator, "baz")).?;
- defer testing.allocator.free(baz_entries);
- try testing.expectEqualDeep(@as([]const Field, &[_]Field{
- .{ .name = "baz", .value = "hello" },
- }), baz_entries);
-
- const pog_entries = (try h.getEntries(testing.allocator, "pog"));
- try testing.expectEqual(@as(?[]const Field, null), pog_entries);
-
- try testing.expectEqualStrings("world", h.getFirstValue("hello").?);
- try testing.expectEqualStrings("bar", h.getFirstValue("foo").?);
- try testing.expectEqualStrings("world", h.getFirstValue("bar").?);
- try testing.expectEqualStrings("hello", h.getFirstValue("baz").?);
- try testing.expectEqual(@as(?[]const u8, null), h.getFirstValue("pog"));
-
- const hello_values = (try h.getValues(testing.allocator, "hello")).?;
- defer testing.allocator.free(hello_values);
- try testing.expectEqualDeep(@as([]const []const u8, &[_][]const u8{"world"}), hello_values);
-
- const foo_values = (try h.getValues(testing.allocator, "foo")).?;
- defer testing.allocator.free(foo_values);
- try testing.expectEqualDeep(@as([]const []const u8, &[_][]const u8{ "bar", "baz" }), foo_values);
-
- const bar_values = (try h.getValues(testing.allocator, "bar")).?;
- defer testing.allocator.free(bar_values);
- try testing.expectEqualDeep(@as([]const []const u8, &[_][]const u8{"world"}), bar_values);
-
- const baz_values = (try h.getValues(testing.allocator, "baz")).?;
- defer testing.allocator.free(baz_values);
- try testing.expectEqualDeep(@as([]const []const u8, &[_][]const u8{"hello"}), baz_values);
-
- const pog_values = (try h.getValues(testing.allocator, "pog"));
- try testing.expectEqual(@as(?[]const []const u8, null), pog_values);
-
- h.sort();
-
- try testing.expectEqualSlices(usize, &[_]usize{0}, h.getIndices("bar").?);
- try testing.expectEqualSlices(usize, &[_]usize{1}, h.getIndices("baz").?);
- try testing.expectEqualSlices(usize, &[_]usize{ 2, 3 }, h.getIndices("foo").?);
- try testing.expectEqualSlices(usize, &[_]usize{4}, h.getIndices("hello").?);
-
- const formatted_values = try std.fmt.allocPrint(testing.allocator, "{}", .{h});
- defer testing.allocator.free(formatted_values);
-
- try testing.expectEqualStrings("bar: world\r\nbaz: hello\r\nfoo: bar\r\nfoo: baz\r\nhello: world\r\n", formatted_values);
-
- var buf: [128]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buf);
- const writer = fbs.writer();
-
- try h.formatCommaSeparated("foo", writer);
- try testing.expectEqualStrings("foo: bar, baz\r\n", fbs.getWritten());
-}
-
-test "Headers.clearRetainingCapacity and clearAndFree" {
- var h = Headers.init(std.testing.allocator);
- defer h.deinit();
-
- h.clearRetainingCapacity();
-
- try h.append("foo", "bar");
- try h.append("bar", "world");
- try h.append("foo", "baz");
- try h.append("baz", "hello");
- try testing.expectEqual(@as(usize, 4), h.list.items.len);
- try testing.expectEqual(@as(usize, 3), h.index.count());
- const list_capacity = h.list.capacity;
- const index_capacity = h.index.capacity();
-
- h.clearRetainingCapacity();
- try testing.expectEqual(@as(usize, 0), h.list.items.len);
- try testing.expectEqual(@as(usize, 0), h.index.count());
- try testing.expectEqual(list_capacity, h.list.capacity);
- try testing.expectEqual(index_capacity, h.index.capacity());
-
- try h.append("foo", "bar");
- try h.append("bar", "world");
- try h.append("foo", "baz");
- try h.append("baz", "hello");
- try testing.expectEqual(@as(usize, 4), h.list.items.len);
- try testing.expectEqual(@as(usize, 3), h.index.count());
- // Capacity should still be the same since we shouldn't have needed to grow
- // when adding back the same fields
- try testing.expectEqual(list_capacity, h.list.capacity);
- try testing.expectEqual(index_capacity, h.index.capacity());
-
- h.clearAndFree();
- try testing.expectEqual(@as(usize, 0), h.list.items.len);
- try testing.expectEqual(@as(usize, 0), h.index.count());
- try testing.expectEqual(@as(usize, 0), h.list.capacity);
- try testing.expectEqual(@as(usize, 0), h.index.capacity());
-}
-
-test "Headers.initList" {
- var h = try Headers.initList(std.testing.allocator, &.{
- .{ .name = "Accept-Encoding", .value = "gzip" },
- .{ .name = "Authorization", .value = "it's over 9000!" },
- });
- defer h.deinit();
-
- const encoding_values = (try h.getValues(testing.allocator, "Accept-Encoding")).?;
- defer testing.allocator.free(encoding_values);
- try testing.expectEqualDeep(@as([]const []const u8, &[_][]const u8{"gzip"}), encoding_values);
-
- const authorization_values = (try h.getValues(testing.allocator, "Authorization")).?;
- defer testing.allocator.free(authorization_values);
- try testing.expectEqualDeep(@as([]const []const u8, &[_][]const u8{"it's over 9000!"}), authorization_values);
-}
diff --git a/lib/std/http/Server.zig b/lib/std/http/Server.zig
index 48c4e2cbfb..a2e3a8060c 100644
--- a/lib/std/http/Server.zig
+++ b/lib/std/http/Server.zig
@@ -1,873 +1,1054 @@
-//! HTTP Server implementation.
-//!
-//! This server assumes *all* clients are well behaved and standard compliant; it can and will deadlock if a client holds a connection open without sending a request.
-//!
-//! Example usage:
-//!
-//! ```zig
-//! var server = Server.init(.{ .reuse_address = true });
-//! defer server.deinit();
-//!
-//! try server.listen(bind_addr);
-//!
-//! while (true) {
-//! var res = try server.accept(.{ .allocator = gpa });
-//! defer res.deinit();
-//!
-//! while (res.reset() != .closing) {
-//! res.wait() catch |err| switch (err) {
-//! error.HttpHeadersInvalid => break,
-//! error.HttpHeadersExceededSizeLimit => {
-//! res.status = .request_header_fields_too_large;
-//! res.send() catch break;
-//! break;
-//! },
-//! else => {
-//! res.status = .bad_request;
-//! res.send() catch break;
-//! break;
-//! },
-//! }
-//!
-//! res.status = .ok;
-//! res.transfer_encoding = .chunked;
-//!
-//! try res.send();
-//! try res.writeAll("Hello, World!\n");
-//! try res.finish();
-//! }
-//! }
-//! ```
-
-const std = @import("../std.zig");
-const testing = std.testing;
-const http = std.http;
-const mem = std.mem;
-const net = std.net;
-const Uri = std.Uri;
-const Allocator = mem.Allocator;
-const assert = std.debug.assert;
-
-const Server = @This();
-const proto = @import("protocol.zig");
-
-/// The underlying server socket.
-socket: net.StreamServer,
-
-/// An interface to a plain connection.
-pub const Connection = struct {
- pub const buffer_size = std.crypto.tls.max_ciphertext_record_len;
- pub const Protocol = enum { plain };
+//! Blocking HTTP server implementation.
+//! Handles a single connection's lifecycle.
+
+connection: net.Server.Connection,
+/// Keeps track of whether the Server is ready to accept a new request on the
+/// same connection, and makes invalid API usage cause assertion failures
+/// rather than HTTP protocol violations.
+state: State,
+/// User-provided buffer that must outlive this Server.
+/// Used to store the client's entire HTTP header.
+read_buffer: []u8,
+/// Amount of available data inside read_buffer.
+read_buffer_len: usize,
+/// Index into `read_buffer` of the first byte of the next HTTP request.
+next_request_start: usize,
+
+pub const State = enum {
+ /// The connection is available to be used for the first time, or reused.
+ ready,
+ /// An error occurred in `receiveHead`.
+ receiving_head,
+ /// A Request object has been obtained and from there a Response can be
+ /// opened.
+ received_head,
+ /// The client is uploading something to this Server.
+ receiving_body,
+ /// The connection is eligible for another HTTP request, however the client
+ /// and server did not negotiate connection: keep-alive.
+ closing,
+};
- stream: net.Stream,
- protocol: Protocol,
-
- closing: bool = true,
-
- read_buf: [buffer_size]u8 = undefined,
- read_start: u16 = 0,
- read_end: u16 = 0,
-
- pub fn rawReadAtLeast(conn: *Connection, buffer: []u8, len: usize) ReadError!usize {
- return switch (conn.protocol) {
- .plain => conn.stream.readAtLeast(buffer, len),
- // .tls => conn.tls_client.readAtLeast(conn.stream, buffer, len),
- } catch |err| {
- switch (err) {
- error.ConnectionResetByPeer, error.BrokenPipe => return error.ConnectionResetByPeer,
- else => return error.UnexpectedReadFailure,
- }
- };
- }
+/// Initialize an HTTP server that can respond to multiple requests on the same
+/// connection.
+/// The returned `Server` is ready for `receiveHead` to be called.
+pub fn init(connection: net.Server.Connection, read_buffer: []u8) Server {
+ return .{
+ .connection = connection,
+ .state = .ready,
+ .read_buffer = read_buffer,
+ .read_buffer_len = 0,
+ .next_request_start = 0,
+ };
+}
- pub fn fill(conn: *Connection) ReadError!void {
- if (conn.read_end != conn.read_start) return;
+pub const ReceiveHeadError = error{
+ /// Client sent too many bytes of HTTP headers.
+ /// The HTTP specification suggests to respond with a 431 status code
+ /// before closing the connection.
+ HttpHeadersOversize,
+ /// Client sent headers that did not conform to the HTTP protocol.
+ HttpHeadersInvalid,
+ /// A low level I/O error occurred trying to read the headers.
+ HttpHeadersUnreadable,
+ /// Partial HTTP request was received but the connection was closed before
+ /// fully receiving the headers.
+ HttpRequestTruncated,
+ /// The client sent 0 bytes of headers before closing the stream.
+ /// In other words, a keep-alive connection was finally closed.
+ HttpConnectionClosing,
+};
- const nread = try conn.rawReadAtLeast(conn.read_buf[0..], 1);
- if (nread == 0) return error.EndOfStream;
- conn.read_start = 0;
- conn.read_end = @as(u16, @intCast(nread));
+/// The header bytes reference the read buffer that Server was initialized with
+/// and remain alive until the next call to receiveHead.
+pub fn receiveHead(s: *Server) ReceiveHeadError!Request {
+ assert(s.state == .ready);
+ s.state = .received_head;
+ errdefer s.state = .receiving_head;
+
+ // In case of a reused connection, move the next request's bytes to the
+ // beginning of the buffer.
+ if (s.next_request_start > 0) {
+ if (s.read_buffer_len > s.next_request_start) {
+ rebase(s, 0);
+ } else {
+ s.read_buffer_len = 0;
+ }
}
- pub fn peek(conn: *Connection) []const u8 {
- return conn.read_buf[conn.read_start..conn.read_end];
- }
+ var hp: http.HeadParser = .{};
- pub fn drop(conn: *Connection, num: u16) void {
- conn.read_start += num;
+ if (s.read_buffer_len > 0) {
+ const bytes = s.read_buffer[0..s.read_buffer_len];
+ const end = hp.feed(bytes);
+ if (hp.state == .finished)
+ return finishReceivingHead(s, end);
}
- pub fn readAtLeast(conn: *Connection, buffer: []u8, len: usize) ReadError!usize {
- assert(len <= buffer.len);
-
- var out_index: u16 = 0;
- while (out_index < len) {
- const available_read = conn.read_end - conn.read_start;
- const available_buffer = buffer.len - out_index;
-
- if (available_read > available_buffer) { // partially read buffered data
- @memcpy(buffer[out_index..], conn.read_buf[conn.read_start..conn.read_end][0..available_buffer]);
- out_index += @as(u16, @intCast(available_buffer));
- conn.read_start += @as(u16, @intCast(available_buffer));
-
- break;
- } else if (available_read > 0) { // fully read buffered data
- @memcpy(buffer[out_index..][0..available_read], conn.read_buf[conn.read_start..conn.read_end]);
- out_index += available_read;
- conn.read_start += available_read;
-
- if (out_index >= len) break;
- }
-
- const leftover_buffer = available_buffer - available_read;
- const leftover_len = len - out_index;
-
- if (leftover_buffer > conn.read_buf.len) {
- // skip the buffer if the output is large enough
- return conn.rawReadAtLeast(buffer[out_index..], leftover_len);
+ while (true) {
+ const buf = s.read_buffer[s.read_buffer_len..];
+ if (buf.len == 0)
+ return error.HttpHeadersOversize;
+ const read_n = s.connection.stream.read(buf) catch
+ return error.HttpHeadersUnreadable;
+ if (read_n == 0) {
+ if (s.read_buffer_len > 0) {
+ return error.HttpRequestTruncated;
+ } else {
+ return error.HttpConnectionClosing;
}
-
- try conn.fill();
}
-
- return out_index;
- }
-
- pub fn read(conn: *Connection, buffer: []u8) ReadError!usize {
- return conn.readAtLeast(buffer, 1);
- }
-
- pub const ReadError = error{
- ConnectionTimedOut,
- ConnectionResetByPeer,
- UnexpectedReadFailure,
- EndOfStream,
- };
-
- pub const Reader = std.io.Reader(*Connection, ReadError, read);
-
- pub fn reader(conn: *Connection) Reader {
- return Reader{ .context = conn };
- }
-
- pub fn writeAll(conn: *Connection, buffer: []const u8) WriteError!void {
- return switch (conn.protocol) {
- .plain => conn.stream.writeAll(buffer),
- // .tls => return conn.tls_client.writeAll(conn.stream, buffer),
- } catch |err| switch (err) {
- error.BrokenPipe, error.ConnectionResetByPeer => return error.ConnectionResetByPeer,
- else => return error.UnexpectedWriteFailure,
- };
- }
-
- pub fn write(conn: *Connection, buffer: []const u8) WriteError!usize {
- return switch (conn.protocol) {
- .plain => conn.stream.write(buffer),
- // .tls => return conn.tls_client.write(conn.stream, buffer),
- } catch |err| switch (err) {
- error.BrokenPipe, error.ConnectionResetByPeer => return error.ConnectionResetByPeer,
- else => return error.UnexpectedWriteFailure,
- };
+ s.read_buffer_len += read_n;
+ const bytes = buf[0..read_n];
+ const end = hp.feed(bytes);
+ if (hp.state == .finished)
+ return finishReceivingHead(s, s.read_buffer_len - bytes.len + end);
}
+}
- pub const WriteError = error{
- ConnectionResetByPeer,
- UnexpectedWriteFailure,
+fn finishReceivingHead(s: *Server, head_end: usize) ReceiveHeadError!Request {
+ return .{
+ .server = s,
+ .head_end = head_end,
+ .head = Request.Head.parse(s.read_buffer[0..head_end]) catch
+ return error.HttpHeadersInvalid,
+ .reader_state = undefined,
};
+}
- pub const Writer = std.io.Writer(*Connection, WriteError, write);
-
- pub fn writer(conn: *Connection) Writer {
- return Writer{ .context = conn };
- }
-
- pub fn close(conn: *Connection) void {
- conn.stream.close();
- }
-};
-
-/// The mode of transport for responses.
-pub const ResponseTransfer = union(enum) {
- content_length: u64,
- chunked: void,
- none: void,
-};
-
-/// The decompressor for request messages.
-pub const Compression = union(enum) {
- pub const DeflateDecompressor = std.compress.zlib.DecompressStream(Response.TransferReader);
- pub const GzipDecompressor = std.compress.gzip.Decompress(Response.TransferReader);
- pub const ZstdDecompressor = std.compress.zstd.DecompressStream(Response.TransferReader, .{});
-
- deflate: DeflateDecompressor,
- gzip: GzipDecompressor,
- zstd: ZstdDecompressor,
- none: void,
-};
-
-/// A HTTP request originating from a client.
pub const Request = struct {
- pub const ParseError = Allocator.Error || error{
- UnknownHttpMethod,
- HttpHeadersInvalid,
- HttpHeaderContinuationsUnsupported,
- HttpTransferEncodingUnsupported,
- HttpConnectionHeaderUnsupported,
- InvalidContentLength,
- CompressionNotSupported,
+ server: *Server,
+ /// Index into Server's read_buffer.
+ head_end: usize,
+ head: Head,
+ reader_state: union {
+ remaining_content_length: u64,
+ chunk_parser: http.ChunkParser,
+ },
+
+ pub const Compression = union(enum) {
+ pub const DeflateDecompressor = std.compress.zlib.Decompressor(std.io.AnyReader);
+ pub const GzipDecompressor = std.compress.gzip.Decompressor(std.io.AnyReader);
+ pub const ZstdDecompressor = std.compress.zstd.Decompressor(std.io.AnyReader);
+
+ deflate: DeflateDecompressor,
+ gzip: GzipDecompressor,
+ zstd: ZstdDecompressor,
+ none: void,
};
- pub fn parse(req: *Request, bytes: []const u8) ParseError!void {
- var it = mem.tokenizeAny(u8, bytes, "\r\n");
-
- const first_line = it.next() orelse return error.HttpHeadersInvalid;
- if (first_line.len < 10)
- return error.HttpHeadersInvalid;
-
- const method_end = mem.indexOfScalar(u8, first_line, ' ') orelse return error.HttpHeadersInvalid;
- if (method_end > 24) return error.HttpHeadersInvalid;
-
- const method_str = first_line[0..method_end];
- const method: http.Method = @enumFromInt(http.Method.parse(method_str));
-
- const version_start = mem.lastIndexOfScalar(u8, first_line, ' ') orelse return error.HttpHeadersInvalid;
- if (version_start == method_end) return error.HttpHeadersInvalid;
-
- const version_str = first_line[version_start + 1 ..];
- if (version_str.len != 8) return error.HttpHeadersInvalid;
- const version: http.Version = switch (int64(version_str[0..8])) {
- int64("HTTP/1.0") => .@"HTTP/1.0",
- int64("HTTP/1.1") => .@"HTTP/1.1",
- else => return error.HttpHeadersInvalid,
+ pub const Head = struct {
+ method: http.Method,
+ target: []const u8,
+ version: http.Version,
+ expect: ?[]const u8,
+ content_type: ?[]const u8,
+ content_length: ?u64,
+ transfer_encoding: http.TransferEncoding,
+ transfer_compression: http.ContentEncoding,
+ keep_alive: bool,
+ compression: Compression,
+
+ pub const ParseError = error{
+ UnknownHttpMethod,
+ HttpHeadersInvalid,
+ HttpHeaderContinuationsUnsupported,
+ HttpTransferEncodingUnsupported,
+ HttpConnectionHeaderUnsupported,
+ InvalidContentLength,
+ CompressionUnsupported,
+ MissingFinalNewline,
};
- const target = first_line[method_end + 1 .. version_start];
-
- req.method = method;
- req.target = target;
- req.version = version;
-
- while (it.next()) |line| {
- if (line.len == 0) return error.HttpHeadersInvalid;
- switch (line[0]) {
- ' ', '\t' => return error.HttpHeaderContinuationsUnsupported,
- else => {},
- }
-
- var line_it = mem.tokenizeAny(u8, line, ": ");
- const header_name = line_it.next() orelse return error.HttpHeadersInvalid;
- const header_value = line_it.rest();
-
- try req.headers.append(header_name, header_value);
-
- if (std.ascii.eqlIgnoreCase(header_name, "content-length")) {
- if (req.content_length != null) return error.HttpHeadersInvalid;
- req.content_length = std.fmt.parseInt(u64, header_value, 10) catch return error.InvalidContentLength;
- } else if (std.ascii.eqlIgnoreCase(header_name, "transfer-encoding")) {
- // Transfer-Encoding: second, first
- // Transfer-Encoding: deflate, chunked
- var iter = mem.splitBackwardsScalar(u8, header_value, ',');
-
- const first = iter.first();
- const trimmed_first = mem.trim(u8, first, " ");
-
- var next: ?[]const u8 = first;
- if (std.meta.stringToEnum(http.TransferEncoding, trimmed_first)) |transfer| {
- if (req.transfer_encoding != .none) return error.HttpHeadersInvalid; // we already have a transfer encoding
- req.transfer_encoding = transfer;
-
- next = iter.next();
+ pub fn parse(bytes: []const u8) ParseError!Head {
+ var it = mem.splitSequence(u8, bytes, "\r\n");
+
+ const first_line = it.next().?;
+ if (first_line.len < 10)
+ return error.HttpHeadersInvalid;
+
+ const method_end = mem.indexOfScalar(u8, first_line, ' ') orelse
+ return error.HttpHeadersInvalid;
+ if (method_end > 24) return error.HttpHeadersInvalid;
+
+ const method_str = first_line[0..method_end];
+ const method: http.Method = @enumFromInt(http.Method.parse(method_str));
+
+ const version_start = mem.lastIndexOfScalar(u8, first_line, ' ') orelse
+ return error.HttpHeadersInvalid;
+ if (version_start == method_end) return error.HttpHeadersInvalid;
+
+ const version_str = first_line[version_start + 1 ..];
+ if (version_str.len != 8) return error.HttpHeadersInvalid;
+ const version: http.Version = switch (int64(version_str[0..8])) {
+ int64("HTTP/1.0") => .@"HTTP/1.0",
+ int64("HTTP/1.1") => .@"HTTP/1.1",
+ else => return error.HttpHeadersInvalid,
+ };
+
+ const target = first_line[method_end + 1 .. version_start];
+
+ var head: Head = .{
+ .method = method,
+ .target = target,
+ .version = version,
+ .expect = null,
+ .content_type = null,
+ .content_length = null,
+ .transfer_encoding = .none,
+ .transfer_compression = .identity,
+ .keep_alive = false,
+ .compression = .none,
+ };
+
+ while (it.next()) |line| {
+ if (line.len == 0) return head;
+ switch (line[0]) {
+ ' ', '\t' => return error.HttpHeaderContinuationsUnsupported,
+ else => {},
}
- if (next) |second| {
- const trimmed_second = mem.trim(u8, second, " ");
-
- if (std.meta.stringToEnum(http.ContentEncoding, trimmed_second)) |transfer| {
- if (req.transfer_compression != .identity) return error.HttpHeadersInvalid; // double compression is not supported
- req.transfer_compression = transfer;
+ var line_it = mem.splitSequence(u8, line, ": ");
+ const header_name = line_it.next().?;
+ const header_value = line_it.rest();
+ if (header_name.len == 0) return error.HttpHeadersInvalid;
+
+ if (std.ascii.eqlIgnoreCase(header_name, "connection")) {
+ head.keep_alive = !std.ascii.eqlIgnoreCase(header_value, "close");
+ } else if (std.ascii.eqlIgnoreCase(header_name, "expect")) {
+ head.expect = header_value;
+ } else if (std.ascii.eqlIgnoreCase(header_name, "content-type")) {
+ head.content_type = header_value;
+ } else if (std.ascii.eqlIgnoreCase(header_name, "content-length")) {
+ if (head.content_length != null) return error.HttpHeadersInvalid;
+ head.content_length = std.fmt.parseInt(u64, header_value, 10) catch
+ return error.InvalidContentLength;
+ } else if (std.ascii.eqlIgnoreCase(header_name, "content-encoding")) {
+ if (head.transfer_compression != .identity) return error.HttpHeadersInvalid;
+
+ const trimmed = mem.trim(u8, header_value, " ");
+
+ if (std.meta.stringToEnum(http.ContentEncoding, trimmed)) |ce| {
+ head.transfer_compression = ce;
} else {
return error.HttpTransferEncodingUnsupported;
}
- }
-
- if (iter.next()) |_| return error.HttpTransferEncodingUnsupported;
- } else if (std.ascii.eqlIgnoreCase(header_name, "content-encoding")) {
- if (req.transfer_compression != .identity) return error.HttpHeadersInvalid;
-
- const trimmed = mem.trim(u8, header_value, " ");
-
- if (std.meta.stringToEnum(http.ContentEncoding, trimmed)) |ce| {
- req.transfer_compression = ce;
- } else {
- return error.HttpTransferEncodingUnsupported;
- }
- }
- }
- }
-
- inline fn int64(array: *const [8]u8) u64 {
- return @as(u64, @bitCast(array.*));
- }
-
- /// The HTTP request method.
- method: http.Method,
-
- /// The HTTP request target.
- target: []const u8,
-
- /// The HTTP version of this request.
- version: http.Version,
-
- /// The length of the request body, if known.
- content_length: ?u64 = null,
-
- /// The transfer encoding of the request body, or .none if not present.
- transfer_encoding: http.TransferEncoding = .none,
-
- /// The compression of the request body, or .identity (no compression) if not present.
- transfer_compression: http.ContentEncoding = .identity,
-
- /// The list of HTTP request headers
- headers: http.Headers,
-
- parser: proto.HeadersParser,
- compression: Compression = .none,
-};
+ } else if (std.ascii.eqlIgnoreCase(header_name, "transfer-encoding")) {
+ // Transfer-Encoding: second, first
+ // Transfer-Encoding: deflate, chunked
+ var iter = mem.splitBackwardsScalar(u8, header_value, ',');
-/// A HTTP response waiting to be sent.
-///
-/// Order of operations:
-/// ```
-/// [/ <--------------------------------------- \]
-/// accept -> wait -> send [ -> write -> finish][ -> reset /]
-/// \ -> read /
-/// ```
-pub const Response = struct {
- version: http.Version = .@"HTTP/1.1",
- status: http.Status = .ok,
- reason: ?[]const u8 = null,
-
- transfer_encoding: ResponseTransfer = .none,
+ const first = iter.first();
+ const trimmed_first = mem.trim(u8, first, " ");
- /// The allocator responsible for allocating memory for this response.
- allocator: Allocator,
+ var next: ?[]const u8 = first;
+ if (std.meta.stringToEnum(http.TransferEncoding, trimmed_first)) |transfer| {
+ if (head.transfer_encoding != .none)
+ return error.HttpHeadersInvalid; // we already have a transfer encoding
+ head.transfer_encoding = transfer;
- /// The peer's address
- address: net.Address,
-
- /// The underlying connection for this response.
- connection: Connection,
+ next = iter.next();
+ }
- /// The HTTP response headers
- headers: http.Headers,
+ if (next) |second| {
+ const trimmed_second = mem.trim(u8, second, " ");
- /// The HTTP request that this response is responding to.
- ///
- /// This field is only valid after calling `wait`.
- request: Request,
+ if (std.meta.stringToEnum(http.ContentEncoding, trimmed_second)) |transfer| {
+ if (head.transfer_compression != .identity)
+ return error.HttpHeadersInvalid; // double compression is not supported
+ head.transfer_compression = transfer;
+ } else {
+ return error.HttpTransferEncodingUnsupported;
+ }
+ }
- state: State = .first,
+ if (iter.next()) |_| return error.HttpTransferEncodingUnsupported;
+ }
+ }
+ return error.MissingFinalNewline;
+ }
- const State = enum {
- first,
- start,
- waited,
- responded,
- finished,
+ inline fn int64(array: *const [8]u8) u64 {
+ return @bitCast(array.*);
+ }
};
- /// Free all resources associated with this response.
- pub fn deinit(res: *Response) void {
- res.connection.close();
-
- res.headers.deinit();
- res.request.headers.deinit();
-
- if (res.request.parser.header_bytes_owned) {
- res.request.parser.header_bytes.deinit(res.allocator);
- }
+ pub fn iterateHeaders(r: *Request) http.HeaderIterator {
+ return http.HeaderIterator.init(r.server.read_buffer[0..r.head_end]);
}
- pub const ResetState = enum { reset, closing };
+ pub const RespondOptions = struct {
+ version: http.Version = .@"HTTP/1.1",
+ status: http.Status = .ok,
+ reason: ?[]const u8 = null,
+ keep_alive: bool = true,
+ extra_headers: []const http.Header = &.{},
+ transfer_encoding: ?http.TransferEncoding = null,
+ };
- /// Reset this response to its initial state. This must be called before handling a second request on the same connection.
- pub fn reset(res: *Response) ResetState {
- if (res.state == .first) {
- res.state = .start;
- return .reset;
+ /// Send an entire HTTP response to the client, including headers and body.
+ ///
+ /// Automatically handles HEAD requests by omitting the body.
+ ///
+ /// Unless `transfer_encoding` is specified, uses the "content-length"
+ /// header.
+ ///
+ /// If the request contains a body and the connection is to be reused,
+ /// discards the request body, leaving the Server in the `ready` state. If
+ /// this discarding fails, the connection is marked as not to be reused and
+ /// no error is surfaced.
+ ///
+ /// Asserts status is not `continue`.
+ /// Asserts there are at most 25 extra_headers.
+ /// Asserts that "\r\n" does not occur in any header name or value.
+ pub fn respond(
+ request: *Request,
+ content: []const u8,
+ options: RespondOptions,
+ ) Response.WriteError!void {
+ const max_extra_headers = 25;
+ assert(options.status != .@"continue");
+ assert(options.extra_headers.len <= max_extra_headers);
+ if (std.debug.runtime_safety) {
+ for (options.extra_headers) |header| {
+ assert(header.name.len != 0);
+ assert(std.mem.indexOfScalar(u8, header.name, ':') == null);
+ assert(std.mem.indexOfPosLinear(u8, header.name, 0, "\r\n") == null);
+ assert(std.mem.indexOfPosLinear(u8, header.value, 0, "\r\n") == null);
+ }
}
- if (!res.request.parser.done) {
- // If the response wasn't fully read, then we need to close the connection.
- res.connection.closing = true;
- return .closing;
+ const transfer_encoding_none = (options.transfer_encoding orelse .chunked) == .none;
+ const server_keep_alive = !transfer_encoding_none and options.keep_alive;
+ const keep_alive = request.discardBody(server_keep_alive);
+
+ const phrase = options.reason orelse options.status.phrase() orelse "";
+
+ var first_buffer: [500]u8 = undefined;
+ var h = std.ArrayListUnmanaged(u8).initBuffer(&first_buffer);
+ if (request.head.expect != null) {
+ // reader() and hence discardBody() above sets expect to null if it
+ // is handled. So the fact that it is not null here means unhandled.
+ h.appendSliceAssumeCapacity("HTTP/1.1 417 Expectation Failed\r\n");
+ if (keep_alive) h.appendSliceAssumeCapacity("connection: keep-alive\r\n");
+ h.appendSliceAssumeCapacity("content-length: 0\r\n\r\n");
+ try request.server.connection.stream.writeAll(h.items);
+ return;
}
+ h.fixedWriter().print("{s} {d} {s}\r\n", .{
+ @tagName(options.version), @intFromEnum(options.status), phrase,
+ }) catch unreachable;
- // A connection is only keep-alive if the Connection header is present and it's value is not "close".
- // The server and client must both agree
- //
- // send() defaults to using keep-alive if the client requests it.
- const res_connection = res.headers.getFirstValue("connection");
- const res_keepalive = res_connection != null and !std.ascii.eqlIgnoreCase("close", res_connection.?);
-
- const req_connection = res.request.headers.getFirstValue("connection");
- const req_keepalive = req_connection != null and !std.ascii.eqlIgnoreCase("close", req_connection.?);
- if (req_keepalive and (res_keepalive or res_connection == null)) {
- res.connection.closing = false;
- } else {
- res.connection.closing = true;
- }
+ if (keep_alive) h.appendSliceAssumeCapacity("connection: keep-alive\r\n");
- switch (res.request.compression) {
+ if (options.transfer_encoding) |transfer_encoding| switch (transfer_encoding) {
.none => {},
- .deflate => |*deflate| deflate.deinit(),
- .gzip => |*gzip| gzip.deinit(),
- .zstd => |*zstd| zstd.deinit(),
+ .chunked => h.appendSliceAssumeCapacity("transfer-encoding: chunked\r\n"),
+ } else {
+ h.fixedWriter().print("content-length: {d}\r\n", .{content.len}) catch unreachable;
}
- res.state = .start;
- res.version = .@"HTTP/1.1";
- res.status = .ok;
- res.reason = null;
-
- res.transfer_encoding = .none;
-
- res.headers.clearRetainingCapacity();
+ var chunk_header_buffer: [18]u8 = undefined;
+ var iovecs: [max_extra_headers * 4 + 3]std.posix.iovec_const = undefined;
+ var iovecs_len: usize = 0;
- res.request.headers.clearAndFree(); // FIXME: figure out why `clearRetainingCapacity` causes a leak in hash_map here
- res.request.parser.reset();
-
- res.request = Request{
- .version = undefined,
- .method = undefined,
- .target = undefined,
- .headers = res.request.headers,
- .parser = res.request.parser,
+ iovecs[iovecs_len] = .{
+ .iov_base = h.items.ptr,
+ .iov_len = h.items.len,
};
+ iovecs_len += 1;
+
+ for (options.extra_headers) |header| {
+ iovecs[iovecs_len] = .{
+ .iov_base = header.name.ptr,
+ .iov_len = header.name.len,
+ };
+ iovecs_len += 1;
+
+ iovecs[iovecs_len] = .{
+ .iov_base = ": ",
+ .iov_len = 2,
+ };
+ iovecs_len += 1;
+
+ if (header.value.len != 0) {
+ iovecs[iovecs_len] = .{
+ .iov_base = header.value.ptr,
+ .iov_len = header.value.len,
+ };
+ iovecs_len += 1;
+ }
- if (res.connection.closing) {
- return .closing;
- } else {
- return .reset;
- }
- }
-
- pub const SendError = Connection.WriteError || error{ UnsupportedTransferEncoding, InvalidContentLength };
-
- /// Send the HTTP response headers to the client.
- pub fn send(res: *Response) SendError!void {
- switch (res.state) {
- .waited => res.state = .responded,
- .first, .start, .responded, .finished => unreachable,
+ iovecs[iovecs_len] = .{
+ .iov_base = "\r\n",
+ .iov_len = 2,
+ };
+ iovecs_len += 1;
}
- var buffered = std.io.bufferedWriter(res.connection.writer());
- const w = buffered.writer();
-
- try w.writeAll(@tagName(res.version));
- try w.writeByte(' ');
- try w.print("{d}", .{@intFromEnum(res.status)});
- try w.writeByte(' ');
- if (res.reason) |reason| {
- try w.writeAll(reason);
- } else if (res.status.phrase()) |phrase| {
- try w.writeAll(phrase);
- }
- try w.writeAll("\r\n");
+ iovecs[iovecs_len] = .{
+ .iov_base = "\r\n",
+ .iov_len = 2,
+ };
+ iovecs_len += 1;
+
+ if (request.head.method != .HEAD) {
+ const is_chunked = (options.transfer_encoding orelse .none) == .chunked;
+ if (is_chunked) {
+ if (content.len > 0) {
+ const chunk_header = std.fmt.bufPrint(
+ &chunk_header_buffer,
+ "{x}\r\n",
+ .{content.len},
+ ) catch unreachable;
+
+ iovecs[iovecs_len] = .{
+ .iov_base = chunk_header.ptr,
+ .iov_len = chunk_header.len,
+ };
+ iovecs_len += 1;
+
+ iovecs[iovecs_len] = .{
+ .iov_base = content.ptr,
+ .iov_len = content.len,
+ };
+ iovecs_len += 1;
+
+ iovecs[iovecs_len] = .{
+ .iov_base = "\r\n",
+ .iov_len = 2,
+ };
+ iovecs_len += 1;
+ }
- if (res.status == .@"continue") {
- res.state = .waited; // we still need to send another request after this
- } else {
- if (!res.headers.contains("server")) {
- try w.writeAll("Server: zig (std.http)\r\n");
+ iovecs[iovecs_len] = .{
+ .iov_base = "0\r\n\r\n",
+ .iov_len = 5,
+ };
+ iovecs_len += 1;
+ } else if (content.len > 0) {
+ iovecs[iovecs_len] = .{
+ .iov_base = content.ptr,
+ .iov_len = content.len,
+ };
+ iovecs_len += 1;
}
+ }
- if (!res.headers.contains("connection")) {
- const req_connection = res.request.headers.getFirstValue("connection");
- const req_keepalive = req_connection != null and !std.ascii.eqlIgnoreCase("close", req_connection.?);
-
- if (req_keepalive) {
- try w.writeAll("Connection: keep-alive\r\n");
- } else {
- try w.writeAll("Connection: close\r\n");
- }
- }
+ try request.server.connection.stream.writevAll(iovecs[0..iovecs_len]);
+ }
- const has_transfer_encoding = res.headers.contains("transfer-encoding");
- const has_content_length = res.headers.contains("content-length");
+ pub const RespondStreamingOptions = struct {
+ /// An externally managed slice of memory used to batch bytes before
+ /// sending. `respondStreaming` asserts this is large enough to store
+ /// the full HTTP response head.
+ ///
+ /// Must outlive the returned Response.
+ send_buffer: []u8,
+ /// If provided, the response will use the content-length header;
+ /// otherwise it will use transfer-encoding: chunked.
+ content_length: ?u64 = null,
+ /// Options that are shared with the `respond` method.
+ respond_options: RespondOptions = .{},
+ };
- if (!has_transfer_encoding and !has_content_length) {
- switch (res.transfer_encoding) {
- .chunked => try w.writeAll("Transfer-Encoding: chunked\r\n"),
- .content_length => |content_length| try w.print("Content-Length: {d}\r\n", .{content_length}),
- .none => {},
- }
+ /// The header is buffered but not sent until Response.flush is called.
+ ///
+ /// If the request contains a body and the connection is to be reused,
+ /// discards the request body, leaving the Server in the `ready` state. If
+ /// this discarding fails, the connection is marked as not to be reused and
+ /// no error is surfaced.
+ ///
+ /// HEAD requests are handled transparently by setting a flag on the
+ /// returned Response to omit the body. However it may be worth noticing
+ /// that flag and skipping any expensive work that would otherwise need to
+ /// be done to satisfy the request.
+ ///
+ /// Asserts `send_buffer` is large enough to store the entire response header.
+ /// Asserts status is not `continue`.
+ pub fn respondStreaming(request: *Request, options: RespondStreamingOptions) Response {
+ const o = options.respond_options;
+ assert(o.status != .@"continue");
+ const transfer_encoding_none = (o.transfer_encoding orelse .chunked) == .none;
+ const server_keep_alive = !transfer_encoding_none and o.keep_alive;
+ const keep_alive = request.discardBody(server_keep_alive);
+ const phrase = o.reason orelse o.status.phrase() orelse "";
+
+ var h = std.ArrayListUnmanaged(u8).initBuffer(options.send_buffer);
+
+ const elide_body = if (request.head.expect != null) eb: {
+ // reader() and hence discardBody() above sets expect to null if it
+ // is handled. So the fact that it is not null here means unhandled.
+ h.appendSliceAssumeCapacity("HTTP/1.1 417 Expectation Failed\r\n");
+ if (keep_alive) h.appendSliceAssumeCapacity("connection: keep-alive\r\n");
+ h.appendSliceAssumeCapacity("content-length: 0\r\n\r\n");
+ break :eb true;
+ } else eb: {
+ h.fixedWriter().print("{s} {d} {s}\r\n", .{
+ @tagName(o.version), @intFromEnum(o.status), phrase,
+ }) catch unreachable;
+ if (keep_alive) h.appendSliceAssumeCapacity("connection: keep-alive\r\n");
+
+ if (o.transfer_encoding) |transfer_encoding| switch (transfer_encoding) {
+ .chunked => h.appendSliceAssumeCapacity("transfer-encoding: chunked\r\n"),
+ .none => {},
+ } else if (options.content_length) |len| {
+ h.fixedWriter().print("content-length: {d}\r\n", .{len}) catch unreachable;
} else {
- if (has_content_length) {
- const content_length = std.fmt.parseInt(u64, res.headers.getFirstValue("content-length").?, 10) catch return error.InvalidContentLength;
-
- res.transfer_encoding = .{ .content_length = content_length };
- } else if (has_transfer_encoding) {
- const transfer_encoding = res.headers.getFirstValue("transfer-encoding").?;
- if (std.mem.eql(u8, transfer_encoding, "chunked")) {
- res.transfer_encoding = .chunked;
- } else {
- return error.UnsupportedTransferEncoding;
- }
- } else {
- res.transfer_encoding = .none;
- }
+ h.appendSliceAssumeCapacity("transfer-encoding: chunked\r\n");
}
- try w.print("{}", .{res.headers});
- }
-
- if (res.request.method == .HEAD) {
- res.transfer_encoding = .none;
- }
+ for (o.extra_headers) |header| {
+ assert(header.name.len != 0);
+ h.appendSliceAssumeCapacity(header.name);
+ h.appendSliceAssumeCapacity(": ");
+ h.appendSliceAssumeCapacity(header.value);
+ h.appendSliceAssumeCapacity("\r\n");
+ }
- try w.writeAll("\r\n");
+ h.appendSliceAssumeCapacity("\r\n");
+ break :eb request.head.method == .HEAD;
+ };
- try buffered.flush();
+ return .{
+ .stream = request.server.connection.stream,
+ .send_buffer = options.send_buffer,
+ .send_buffer_start = 0,
+ .send_buffer_end = h.items.len,
+ .transfer_encoding = if (o.transfer_encoding) |te| switch (te) {
+ .chunked => .chunked,
+ .none => .none,
+ } else if (options.content_length) |len| .{
+ .content_length = len,
+ } else .chunked,
+ .elide_body = elide_body,
+ .chunk_len = 0,
+ };
}
- const TransferReadError = Connection.ReadError || proto.HeadersParser.ReadError;
+ pub const ReadError = net.Stream.ReadError || error{
+ HttpChunkInvalid,
+ HttpHeadersOversize,
+ };
- const TransferReader = std.io.Reader(*Response, TransferReadError, transferRead);
+ fn read_cl(context: *const anyopaque, buffer: []u8) ReadError!usize {
+ const request: *Request = @constCast(@alignCast(@ptrCast(context)));
+ const s = request.server;
- fn transferReader(res: *Response) TransferReader {
- return .{ .context = res };
+ const remaining_content_length = &request.reader_state.remaining_content_length;
+ if (remaining_content_length.* == 0) {
+ s.state = .ready;
+ return 0;
+ }
+ assert(s.state == .receiving_body);
+ const available = try fill(s, request.head_end);
+ const len = @min(remaining_content_length.*, available.len, buffer.len);
+ @memcpy(buffer[0..len], available[0..len]);
+ remaining_content_length.* -= len;
+ s.next_request_start += len;
+ if (remaining_content_length.* == 0)
+ s.state = .ready;
+ return len;
}
- fn transferRead(res: *Response, buf: []u8) TransferReadError!usize {
- if (res.request.parser.done) return 0;
+ fn fill(s: *Server, head_end: usize) ReadError![]u8 {
+ const available = s.read_buffer[s.next_request_start..s.read_buffer_len];
+ if (available.len > 0) return available;
+ s.next_request_start = head_end;
+ s.read_buffer_len = head_end + try s.connection.stream.read(s.read_buffer[head_end..]);
+ return s.read_buffer[head_end..s.read_buffer_len];
+ }
- var index: usize = 0;
- while (index == 0) {
- const amt = try res.request.parser.read(&res.connection, buf[index..], false);
- if (amt == 0 and res.request.parser.done) break;
- index += amt;
+ fn read_chunked(context: *const anyopaque, buffer: []u8) ReadError!usize {
+ const request: *Request = @constCast(@alignCast(@ptrCast(context)));
+ const s = request.server;
+
+ const cp = &request.reader_state.chunk_parser;
+ const head_end = request.head_end;
+
+ // Protect against returning 0 before the end of stream.
+ var out_end: usize = 0;
+ while (out_end == 0) {
+ switch (cp.state) {
+ .invalid => return 0,
+ .data => {
+ assert(s.state == .receiving_body);
+ const available = try fill(s, head_end);
+ const len = @min(cp.chunk_len, available.len, buffer.len);
+ @memcpy(buffer[0..len], available[0..len]);
+ cp.chunk_len -= len;
+ if (cp.chunk_len == 0)
+ cp.state = .data_suffix;
+ out_end += len;
+ s.next_request_start += len;
+ continue;
+ },
+ else => {
+ assert(s.state == .receiving_body);
+ const available = try fill(s, head_end);
+ const n = cp.feed(available);
+ switch (cp.state) {
+ .invalid => return error.HttpChunkInvalid,
+ .data => {
+ if (cp.chunk_len == 0) {
+ // The next bytes in the stream are trailers,
+ // or \r\n to indicate end of chunked body.
+ //
+ // This function must append the trailers at
+ // head_end so that headers and trailers are
+ // together.
+ //
+ // Since returning 0 would indicate end of
+ // stream, this function must read all the
+ // trailers before returning.
+ if (s.next_request_start > head_end) rebase(s, head_end);
+ var hp: http.HeadParser = .{};
+ {
+ const bytes = s.read_buffer[head_end..s.read_buffer_len];
+ const end = hp.feed(bytes);
+ if (hp.state == .finished) {
+ cp.state = .invalid;
+ s.state = .ready;
+ s.next_request_start = s.read_buffer_len - bytes.len + end;
+ return out_end;
+ }
+ }
+ while (true) {
+ const buf = s.read_buffer[s.read_buffer_len..];
+ if (buf.len == 0)
+ return error.HttpHeadersOversize;
+ const read_n = try s.connection.stream.read(buf);
+ s.read_buffer_len += read_n;
+ const bytes = buf[0..read_n];
+ const end = hp.feed(bytes);
+ if (hp.state == .finished) {
+ cp.state = .invalid;
+ s.state = .ready;
+ s.next_request_start = s.read_buffer_len - bytes.len + end;
+ return out_end;
+ }
+ }
+ }
+ const data = available[n..];
+ const len = @min(cp.chunk_len, data.len, buffer.len);
+ @memcpy(buffer[0..len], data[0..len]);
+ cp.chunk_len -= len;
+ if (cp.chunk_len == 0)
+ cp.state = .data_suffix;
+ out_end += len;
+ s.next_request_start += n + len;
+ continue;
+ },
+ else => continue,
+ }
+ },
+ }
}
-
- return index;
+ return out_end;
}
- pub const WaitError = Connection.ReadError || proto.HeadersParser.CheckCompleteHeadError || Request.ParseError || error{ CompressionInitializationFailed, CompressionNotSupported };
+ pub const ReaderError = Response.WriteError || error{
+ /// The client sent an expect HTTP header value other than
+ /// "100-continue".
+ HttpExpectationFailed,
+ };
- /// Wait for the client to send a complete request head.
+ /// In the case that the request contains "expect: 100-continue", this
+ /// function writes the continuation header, which means it can fail with a
+ /// write error. After sending the continuation header, it sets the
+ /// request's expect field to `null`.
///
- /// For correct behavior, the following rules must be followed:
- ///
- /// * If this returns any error in `Connection.ReadError`, you MUST immediately close the connection by calling `deinit`.
- /// * If this returns `error.HttpHeadersInvalid`, you MAY immediately close the connection by calling `deinit`.
- /// * If this returns `error.HttpHeadersExceededSizeLimit`, you MUST respond with a 431 status code and then call `deinit`.
- /// * If this returns any error in `Request.ParseError`, you MUST respond with a 400 status code and then call `deinit`.
- /// * If this returns any other error, you MUST respond with a 400 status code and then call `deinit`.
- /// * If the request has an Expect header containing 100-continue, you MUST either:
- /// * Respond with a 100 status code, then call `wait` again.
- /// * Respond with a 417 status code.
- pub fn wait(res: *Response) WaitError!void {
- switch (res.state) {
- .first, .start => res.state = .waited,
- .waited, .responded, .finished => unreachable,
+ /// Asserts that this function is only called once.
+ pub fn reader(request: *Request) ReaderError!std.io.AnyReader {
+ const s = request.server;
+ assert(s.state == .received_head);
+ s.state = .receiving_body;
+ s.next_request_start = request.head_end;
+
+ if (request.head.expect) |expect| {
+ if (mem.eql(u8, expect, "100-continue")) {
+ try request.server.connection.stream.writeAll("HTTP/1.1 100 Continue\r\n\r\n");
+ request.head.expect = null;
+ } else {
+ return error.HttpExpectationFailed;
+ }
}
- while (true) {
- try res.connection.fill();
-
- const nchecked = try res.request.parser.checkCompleteHead(res.allocator, res.connection.peek());
- res.connection.drop(@as(u16, @intCast(nchecked)));
-
- if (res.request.parser.state.isContent()) break;
+ switch (request.head.transfer_encoding) {
+ .chunked => {
+ request.reader_state = .{ .chunk_parser = http.ChunkParser.init };
+ return .{
+ .readFn = read_chunked,
+ .context = request,
+ };
+ },
+ .none => {
+ request.reader_state = .{
+ .remaining_content_length = request.head.content_length orelse 0,
+ };
+ return .{
+ .readFn = read_cl,
+ .context = request,
+ };
+ },
}
+ }
- res.request.headers = .{ .allocator = res.allocator, .owned = true };
- try res.request.parse(res.request.parser.header_bytes.items);
-
- if (res.request.transfer_encoding != .none) {
- switch (res.request.transfer_encoding) {
- .none => unreachable,
- .chunked => {
- res.request.parser.next_chunk_length = 0;
- res.request.parser.state = .chunk_head_size;
- },
- }
- } else if (res.request.content_length) |cl| {
- res.request.parser.next_chunk_length = cl;
+ /// Returns whether the connection: keep-alive header should be sent to the client.
+ /// If it would fail, it instead sets the Server state to `receiving_body`
+ /// and returns false.
+ fn discardBody(request: *Request, keep_alive: bool) bool {
+ // Prepare to receive another request on the same connection.
+ // There are two factors to consider:
+ // * Any body the client sent must be discarded.
+ // * The Server's read_buffer may already have some bytes in it from
+ // whatever came after the head, which may be the next HTTP request
+ // or the request body.
+ // If the connection won't be kept alive, then none of this matters
+ // because the connection will be severed after the response is sent.
+ const s = request.server;
+ if (keep_alive and request.head.keep_alive) switch (s.state) {
+ .received_head => {
+ const r = request.reader() catch return false;
+ _ = r.discard() catch return false;
+ assert(s.state == .ready);
+ return true;
+ },
+ .receiving_body, .ready => return true,
+ else => unreachable,
+ };
- if (cl == 0) res.request.parser.done = true;
- } else {
- res.request.parser.done = true;
+ // Avoid clobbering the state in case a reading stream already exists.
+ switch (s.state) {
+ .received_head => s.state = .closing,
+ else => {},
}
+ return false;
+ }
+};
- if (!res.request.parser.done) {
- switch (res.request.transfer_compression) {
- .identity => res.request.compression = .none,
- .compress, .@"x-compress" => return error.CompressionNotSupported,
- .deflate => res.request.compression = .{
- .deflate = std.compress.zlib.decompressStream(res.allocator, res.transferReader()) catch return error.CompressionInitializationFailed,
- },
- .gzip, .@"x-gzip" => res.request.compression = .{
- .gzip = std.compress.gzip.decompress(res.allocator, res.transferReader()) catch return error.CompressionInitializationFailed,
- },
- .zstd => res.request.compression = .{
- .zstd = std.compress.zstd.decompressStream(res.allocator, res.transferReader()),
- },
- }
+pub const Response = struct {
+ stream: net.Stream,
+ send_buffer: []u8,
+ /// Index of the first byte in `send_buffer`.
+ /// This is 0 unless a short write happens in `write`.
+ send_buffer_start: usize,
+ /// Index of the last byte + 1 in `send_buffer`.
+ send_buffer_end: usize,
+ /// `null` means transfer-encoding: chunked.
+ /// As a debugging utility, counts down to zero as bytes are written.
+ transfer_encoding: TransferEncoding,
+ elide_body: bool,
+ /// Indicates how much of the end of the `send_buffer` corresponds to a
+ /// chunk. This amount of data will be wrapped by an HTTP chunk header.
+ chunk_len: usize,
+
+ pub const TransferEncoding = union(enum) {
+ /// End of connection signals the end of the stream.
+ none,
+ /// As a debugging utility, counts down to zero as bytes are written.
+ content_length: u64,
+ /// Each chunk is wrapped in a header and trailer.
+ chunked,
+ };
+
+ pub const WriteError = net.Stream.WriteError;
+
+ /// When using content-length, asserts that the amount of data sent matches
+ /// the value sent in the header, then calls `flush`.
+ /// Otherwise, transfer-encoding: chunked is being used, and it writes the
+ /// end-of-stream message, then flushes the stream to the system.
+ /// Respects the value of `elide_body` to omit all data after the headers.
+ pub fn end(r: *Response) WriteError!void {
+ switch (r.transfer_encoding) {
+ .content_length => |len| {
+ assert(len == 0); // Trips when end() called before all bytes written.
+ try flush_cl(r);
+ },
+ .none => {
+ try flush_cl(r);
+ },
+ .chunked => {
+ try flush_chunked(r, &.{});
+ },
}
+ r.* = undefined;
}
- pub const ReadError = TransferReadError || proto.HeadersParser.CheckCompleteHeadError || error{ DecompressionFailure, InvalidTrailers };
-
- pub const Reader = std.io.Reader(*Response, ReadError, read);
+ pub const EndChunkedOptions = struct {
+ trailers: []const http.Header = &.{},
+ };
- pub fn reader(res: *Response) Reader {
- return .{ .context = res };
+ /// Asserts that the Response is using transfer-encoding: chunked.
+ /// Writes the end-of-stream message and any optional trailers, then
+ /// flushes the stream to the system.
+ /// Respects the value of `elide_body` to omit all data after the headers.
+ /// Asserts there are at most 25 trailers.
+ pub fn endChunked(r: *Response, options: EndChunkedOptions) WriteError!void {
+ assert(r.transfer_encoding == .chunked);
+ try flush_chunked(r, options.trailers);
+ r.* = undefined;
}
- /// Reads data from the response body. Must be called after `wait`.
- pub fn read(res: *Response, buffer: []u8) ReadError!usize {
- switch (res.state) {
- .waited, .responded, .finished => {},
- .first, .start => unreachable,
+ /// If using content-length, asserts that writing these bytes to the client
+ /// would not exceed the content-length value sent in the HTTP header.
+ /// May return 0, which does not indicate end of stream. The caller decides
+ /// when the end of stream occurs by calling `end`.
+ pub fn write(r: *Response, bytes: []const u8) WriteError!usize {
+ switch (r.transfer_encoding) {
+ .content_length, .none => return write_cl(r, bytes),
+ .chunked => return write_chunked(r, bytes),
}
+ }
- const out_index = switch (res.request.compression) {
- .deflate => |*deflate| deflate.read(buffer) catch return error.DecompressionFailure,
- .gzip => |*gzip| gzip.read(buffer) catch return error.DecompressionFailure,
- .zstd => |*zstd| zstd.read(buffer) catch return error.DecompressionFailure,
- else => try res.transferRead(buffer),
- };
+ fn write_cl(context: *const anyopaque, bytes: []const u8) WriteError!usize {
+ const r: *Response = @constCast(@alignCast(@ptrCast(context)));
- if (out_index == 0) {
- const has_trail = !res.request.parser.state.isContent();
+ var trash: u64 = std.math.maxInt(u64);
+ const len = switch (r.transfer_encoding) {
+ .content_length => |*len| len,
+ else => &trash,
+ };
- while (!res.request.parser.state.isContent()) { // read trailing headers
- try res.connection.fill();
+ if (r.elide_body) {
+ len.* -= bytes.len;
+ return bytes.len;
+ }
- const nchecked = try res.request.parser.checkCompleteHead(res.allocator, res.connection.peek());
- res.connection.drop(@as(u16, @intCast(nchecked)));
+ if (bytes.len + r.send_buffer_end > r.send_buffer.len) {
+ const send_buffer_len = r.send_buffer_end - r.send_buffer_start;
+ var iovecs: [2]std.posix.iovec_const = .{
+ .{
+ .iov_base = r.send_buffer.ptr + r.send_buffer_start,
+ .iov_len = send_buffer_len,
+ },
+ .{
+ .iov_base = bytes.ptr,
+ .iov_len = bytes.len,
+ },
+ };
+ const n = try r.stream.writev(&iovecs);
+
+ if (n >= send_buffer_len) {
+ // It was enough to reset the buffer.
+ r.send_buffer_start = 0;
+ r.send_buffer_end = 0;
+ const bytes_n = n - send_buffer_len;
+ len.* -= bytes_n;
+ return bytes_n;
}
- if (has_trail) {
- res.request.headers = http.Headers{ .allocator = res.allocator, .owned = false };
-
- // The response headers before the trailers are already guaranteed to be valid, so they will always be parsed again and cannot return an error.
- // This will *only* fail for a malformed trailer.
- res.request.parse(res.request.parser.header_bytes.items) catch return error.InvalidTrailers;
- }
+ // It didn't even make it through the existing buffer, let
+ // alone the new bytes provided.
+ r.send_buffer_start += n;
+ return 0;
}
- return out_index;
- }
-
- /// Reads data from the response body. Must be called after `wait`.
- pub fn readAll(res: *Response, buffer: []u8) !usize {
- var index: usize = 0;
- while (index < buffer.len) {
- const amt = try read(res, buffer[index..]);
- if (amt == 0) break;
- index += amt;
- }
- return index;
+ // All bytes can be stored in the remaining space of the buffer.
+ @memcpy(r.send_buffer[r.send_buffer_end..][0..bytes.len], bytes);
+ r.send_buffer_end += bytes.len;
+ len.* -= bytes.len;
+ return bytes.len;
}
- pub const WriteError = Connection.WriteError || error{ NotWriteable, MessageTooLong };
+ fn write_chunked(context: *const anyopaque, bytes: []const u8) WriteError!usize {
+ const r: *Response = @constCast(@alignCast(@ptrCast(context)));
+ assert(r.transfer_encoding == .chunked);
- pub const Writer = std.io.Writer(*Response, WriteError, write);
+ if (r.elide_body)
+ return bytes.len;
- pub fn writer(res: *Response) Writer {
- return .{ .context = res };
- }
+ if (bytes.len + r.send_buffer_end > r.send_buffer.len) {
+ const send_buffer_len = r.send_buffer_end - r.send_buffer_start;
+ const chunk_len = r.chunk_len + bytes.len;
+ var header_buf: [18]u8 = undefined;
+ const chunk_header = std.fmt.bufPrint(&header_buf, "{x}\r\n", .{chunk_len}) catch unreachable;
- /// Write `bytes` to the server. The `transfer_encoding` request header determines how data will be sent.
- /// Must be called after `send` and before `finish`.
- pub fn write(res: *Response, bytes: []const u8) WriteError!usize {
- switch (res.state) {
- .responded => {},
- .first, .waited, .start, .finished => unreachable,
+ var iovecs: [5]std.posix.iovec_const = .{
+ .{
+ .iov_base = r.send_buffer.ptr + r.send_buffer_start,
+ .iov_len = send_buffer_len - r.chunk_len,
+ },
+ .{
+ .iov_base = chunk_header.ptr,
+ .iov_len = chunk_header.len,
+ },
+ .{
+ .iov_base = r.send_buffer.ptr + r.send_buffer_end - r.chunk_len,
+ .iov_len = r.chunk_len,
+ },
+ .{
+ .iov_base = bytes.ptr,
+ .iov_len = bytes.len,
+ },
+ .{
+ .iov_base = "\r\n",
+ .iov_len = 2,
+ },
+ };
+ // TODO make this writev instead of writevAll, which involves
+ // complicating the logic of this function.
+ try r.stream.writevAll(&iovecs);
+ r.send_buffer_start = 0;
+ r.send_buffer_end = 0;
+ r.chunk_len = 0;
+ return bytes.len;
}
- switch (res.transfer_encoding) {
- .chunked => {
- try res.connection.writer().print("{x}\r\n", .{bytes.len});
- try res.connection.writeAll(bytes);
- try res.connection.writeAll("\r\n");
-
- return bytes.len;
- },
- .content_length => |*len| {
- if (len.* < bytes.len) return error.MessageTooLong;
-
- const amt = try res.connection.write(bytes);
- len.* -= amt;
- return amt;
- },
- .none => return error.NotWriteable,
- }
+ // All bytes can be stored in the remaining space of the buffer.
+ @memcpy(r.send_buffer[r.send_buffer_end..][0..bytes.len], bytes);
+ r.send_buffer_end += bytes.len;
+ r.chunk_len += bytes.len;
+ return bytes.len;
}
- /// Write `bytes` to the server. The `transfer_encoding` request header determines how data will be sent.
- /// Must be called after `send` and before `finish`.
- pub fn writeAll(req: *Response, bytes: []const u8) WriteError!void {
+ /// If using content-length, asserts that writing these bytes to the client
+ /// would not exceed the content-length value sent in the HTTP header.
+ pub fn writeAll(r: *Response, bytes: []const u8) WriteError!void {
var index: usize = 0;
while (index < bytes.len) {
- index += try write(req, bytes[index..]);
+ index += try write(r, bytes[index..]);
}
}
- pub const FinishError = WriteError || error{MessageNotCompleted};
-
- /// Finish the body of a request. This notifies the server that you have no more data to send.
- /// Must be called after `send`.
- pub fn finish(res: *Response) FinishError!void {
- switch (res.state) {
- .responded => res.state = .finished,
- .first, .waited, .start, .finished => unreachable,
+ /// Sends all buffered data to the client.
+ /// This is redundant after calling `end`.
+ /// Respects the value of `elide_body` to omit all data after the headers.
+ pub fn flush(r: *Response) WriteError!void {
+ switch (r.transfer_encoding) {
+ .none, .content_length => return flush_cl(r),
+ .chunked => return flush_chunked(r, null),
}
+ }
- switch (res.transfer_encoding) {
- .chunked => try res.connection.writeAll("0\r\n\r\n"),
- .content_length => |len| if (len != 0) return error.MessageNotCompleted,
- .none => {},
- }
+ fn flush_cl(r: *Response) WriteError!void {
+ try r.stream.writeAll(r.send_buffer[r.send_buffer_start..r.send_buffer_end]);
+ r.send_buffer_start = 0;
+ r.send_buffer_end = 0;
}
-};
-/// Create a new HTTP server.
-pub fn init(options: net.StreamServer.Options) Server {
- return .{
- .socket = net.StreamServer.init(options),
- };
-}
+ fn flush_chunked(r: *Response, end_trailers: ?[]const http.Header) WriteError!void {
+ const max_trailers = 25;
+ if (end_trailers) |trailers| assert(trailers.len <= max_trailers);
+ assert(r.transfer_encoding == .chunked);
-/// Free all resources associated with this server.
-pub fn deinit(server: *Server) void {
- server.socket.deinit();
-}
+ const http_headers = r.send_buffer[r.send_buffer_start .. r.send_buffer_end - r.chunk_len];
-pub const ListenError = std.os.SocketError || std.os.BindError || std.os.ListenError || std.os.SetSockOptError || std.os.GetSockNameError;
+ if (r.elide_body) {
+ try r.stream.writeAll(http_headers);
+ r.send_buffer_start = 0;
+ r.send_buffer_end = 0;
+ r.chunk_len = 0;
+ return;
+ }
-/// Start the HTTP server listening on the given address.
-pub fn listen(server: *Server, address: net.Address) ListenError!void {
- try server.socket.listen(address);
-}
+ var header_buf: [18]u8 = undefined;
+ const chunk_header = std.fmt.bufPrint(&header_buf, "{x}\r\n", .{r.chunk_len}) catch unreachable;
-pub const AcceptError = net.StreamServer.AcceptError || Allocator.Error;
-
-pub const HeaderStrategy = union(enum) {
- /// In this case, the client's Allocator will be used to store the
- /// entire HTTP header. This value is the maximum total size of
- /// HTTP headers allowed, otherwise
- /// error.HttpHeadersExceededSizeLimit is returned from read().
- dynamic: usize,
- /// This is used to store the entire HTTP header. If the HTTP
- /// header is too big to fit, `error.HttpHeadersExceededSizeLimit`
- /// is returned from read(). When this is used, `error.OutOfMemory`
- /// cannot be returned from `read()`.
- static: []u8,
-};
+ var iovecs: [max_trailers * 4 + 5]std.posix.iovec_const = undefined;
+ var iovecs_len: usize = 0;
-pub const AcceptOptions = struct {
- allocator: Allocator,
- header_strategy: HeaderStrategy = .{ .dynamic = 8192 },
-};
+ iovecs[iovecs_len] = .{
+ .iov_base = http_headers.ptr,
+ .iov_len = http_headers.len,
+ };
+ iovecs_len += 1;
+
+ if (r.chunk_len > 0) {
+ iovecs[iovecs_len] = .{
+ .iov_base = chunk_header.ptr,
+ .iov_len = chunk_header.len,
+ };
+ iovecs_len += 1;
+
+ iovecs[iovecs_len] = .{
+ .iov_base = r.send_buffer.ptr + r.send_buffer_end - r.chunk_len,
+ .iov_len = r.chunk_len,
+ };
+ iovecs_len += 1;
+
+ iovecs[iovecs_len] = .{
+ .iov_base = "\r\n",
+ .iov_len = 2,
+ };
+ iovecs_len += 1;
+ }
-/// Accept a new connection.
-pub fn accept(server: *Server, options: AcceptOptions) AcceptError!Response {
- const in = try server.socket.accept();
-
- return Response{
- .allocator = options.allocator,
- .address = in.address,
- .connection = .{
- .stream = in.stream,
- .protocol = .plain,
- },
- .headers = .{ .allocator = options.allocator },
- .request = .{
- .version = undefined,
- .method = undefined,
- .target = undefined,
- .headers = .{ .allocator = options.allocator, .owned = false },
- .parser = switch (options.header_strategy) {
- .dynamic => |max| proto.HeadersParser.initDynamic(max),
- .static => |buf| proto.HeadersParser.initStatic(buf),
- },
- },
- };
-}
+ if (end_trailers) |trailers| {
+ iovecs[iovecs_len] = .{
+ .iov_base = "0\r\n",
+ .iov_len = 3,
+ };
+ iovecs_len += 1;
+
+ for (trailers) |trailer| {
+ iovecs[iovecs_len] = .{
+ .iov_base = trailer.name.ptr,
+ .iov_len = trailer.name.len,
+ };
+ iovecs_len += 1;
+
+ iovecs[iovecs_len] = .{
+ .iov_base = ": ",
+ .iov_len = 2,
+ };
+ iovecs_len += 1;
+
+ if (trailer.value.len != 0) {
+ iovecs[iovecs_len] = .{
+ .iov_base = trailer.value.ptr,
+ .iov_len = trailer.value.len,
+ };
+ iovecs_len += 1;
+ }
-test "HTTP server handles a chunked transfer coding request" {
- const builtin = @import("builtin");
+ iovecs[iovecs_len] = .{
+ .iov_base = "\r\n",
+ .iov_len = 2,
+ };
+ iovecs_len += 1;
+ }
- // This test requires spawning threads.
- if (builtin.single_threaded) {
- return error.SkipZigTest;
+ iovecs[iovecs_len] = .{
+ .iov_base = "\r\n",
+ .iov_len = 2,
+ };
+ iovecs_len += 1;
+ }
+
+ try r.stream.writevAll(iovecs[0..iovecs_len]);
+ r.send_buffer_start = 0;
+ r.send_buffer_end = 0;
+ r.chunk_len = 0;
}
- const native_endian = comptime builtin.cpu.arch.endian();
- if (builtin.zig_backend == .stage2_llvm and native_endian == .big) {
- // https://github.com/ziglang/zig/issues/13782
- return error.SkipZigTest;
+ pub fn writer(r: *Response) std.io.AnyWriter {
+ return .{
+ .writeFn = switch (r.transfer_encoding) {
+ .none, .content_length => write_cl,
+ .chunked => write_chunked,
+ },
+ .context = r,
+ };
}
+};
- if (builtin.os.tag == .wasi) return error.SkipZigTest;
-
- const allocator = std.testing.allocator;
- const expect = std.testing.expect;
-
- const max_header_size = 8192;
- var server = std.http.Server.init(.{ .reuse_address = true });
- defer server.deinit();
-
- const address = try std.net.Address.parseIp("127.0.0.1", 0);
- try server.listen(address);
- const server_port = server.socket.listen_address.in.getPort();
-
- const server_thread = try std.Thread.spawn(.{}, (struct {
- fn apply(s: *std.http.Server) !void {
- var res = try s.accept(.{
- .allocator = allocator,
- .header_strategy = .{ .dynamic = max_header_size },
- });
- defer res.deinit();
- defer _ = res.reset();
- try res.wait();
-
- try expect(res.request.transfer_encoding == .chunked);
-
- const server_body: []const u8 = "message from server!\n";
- res.transfer_encoding = .{ .content_length = server_body.len };
- try res.headers.append("content-type", "text/plain");
- try res.headers.append("connection", "close");
- try res.send();
-
- var buf: [128]u8 = undefined;
- const n = try res.readAll(&buf);
- try expect(std.mem.eql(u8, buf[0..n], "ABCD"));
- _ = try res.writer().writeAll(server_body);
- try res.finish();
- }
- }).apply, .{&server});
-
- const request_bytes =
- "POST / HTTP/1.1\r\n" ++
- "Content-Type: text/plain\r\n" ++
- "Transfer-Encoding: chunked\r\n" ++
- "\r\n" ++
- "1\r\n" ++
- "A\r\n" ++
- "1\r\n" ++
- "B\r\n" ++
- "2\r\n" ++
- "CD\r\n" ++
- "0\r\n" ++
- "\r\n";
-
- const stream = try std.net.tcpConnectToHost(allocator, "127.0.0.1", server_port);
- defer stream.close();
- _ = try stream.writeAll(request_bytes[0..]);
-
- server_thread.join();
+fn rebase(s: *Server, index: usize) void {
+ const leftover = s.read_buffer[s.next_request_start..s.read_buffer_len];
+ const dest = s.read_buffer[index..][0..leftover.len];
+ if (leftover.len <= s.next_request_start - index) {
+ @memcpy(dest, leftover);
+ } else {
+ mem.copyBackwards(u8, dest, leftover);
+ }
+ s.read_buffer_len = index + leftover.len;
}
+
+const std = @import("../std.zig");
+const http = std.http;
+const mem = std.mem;
+const net = std.net;
+const Uri = std.Uri;
+const assert = std.debug.assert;
+
+const Server = @This();
diff --git a/lib/std/http/protocol.zig b/lib/std/http/protocol.zig
index 0ccafd2ee5..78511f435d 100644
--- a/lib/std/http/protocol.zig
+++ b/lib/std/http/protocol.zig
@@ -7,15 +7,19 @@ const assert = std.debug.assert;
const use_vectors = builtin.zig_backend != .stage2_x86_64;
pub const State = enum {
- /// Begin header parsing states.
invalid,
+
+ // Begin header and trailer parsing states.
+
start,
seen_n,
seen_r,
seen_rn,
seen_rnr,
finished,
- /// Begin transfer-encoding: chunked parsing states.
+
+ // Begin transfer-encoding: chunked parsing states.
+
chunk_head_size,
chunk_head_ext,
chunk_head_r,
@@ -34,484 +38,114 @@ pub const State = enum {
pub const HeadersParser = struct {
state: State = .start,
- /// Whether or not `header_bytes` is allocated or was provided as a fixed buffer.
- header_bytes_owned: bool,
- /// Either a fixed buffer of len `max_header_bytes` or a dynamic buffer that can grow up to `max_header_bytes`.
+ /// A fixed buffer of len `max_header_bytes`.
/// Pointers into this buffer are not stable until after a message is complete.
- header_bytes: std.ArrayListUnmanaged(u8),
- /// The maximum allowed size of `header_bytes`.
- max_header_bytes: usize,
- next_chunk_length: u64 = 0,
- /// Whether this parser is done parsing a complete message.
- /// A message is only done when the entire payload has been read.
- done: bool = false,
-
- /// Initializes the parser with a dynamically growing header buffer of up to `max` bytes.
- pub fn initDynamic(max: usize) HeadersParser {
- return .{
- .header_bytes = .{},
- .max_header_bytes = max,
- .header_bytes_owned = true,
- };
- }
+ header_bytes_buffer: []u8,
+ header_bytes_len: u32,
+ next_chunk_length: u64,
+ /// `false`: headers. `true`: trailers.
+ done: bool,
/// Initializes the parser with a provided buffer `buf`.
- pub fn initStatic(buf: []u8) HeadersParser {
+ pub fn init(buf: []u8) HeadersParser {
return .{
- .header_bytes = .{ .items = buf[0..0], .capacity = buf.len },
- .max_header_bytes = buf.len,
- .header_bytes_owned = false,
+ .header_bytes_buffer = buf,
+ .header_bytes_len = 0,
+ .done = false,
+ .next_chunk_length = 0,
};
}
- /// Completely resets the parser to it's initial state.
- /// This must be called after a message is complete.
- pub fn reset(r: *HeadersParser) void {
- assert(r.done); // The message must be completely read before reset, otherwise the parser is in an invalid state.
-
- r.header_bytes.clearRetainingCapacity();
-
- r.* = .{
- .header_bytes = r.header_bytes,
- .max_header_bytes = r.max_header_bytes,
- .header_bytes_owned = r.header_bytes_owned,
+ /// Reinitialize the parser.
+ /// Asserts the parser is in the "done" state.
+ pub fn reset(hp: *HeadersParser) void {
+ assert(hp.done);
+ hp.* = .{
+ .state = .start,
+ .header_bytes_buffer = hp.header_bytes_buffer,
+ .header_bytes_len = 0,
+ .done = false,
+ .next_chunk_length = 0,
};
}
- /// Returns the number of bytes consumed by headers. This is always less than or equal to `bytes.len`.
- /// You should check `r.state.isContent()` after this to check if the headers are done.
- ///
- /// If the amount returned is less than `bytes.len`, you may assume that the parser is in a content state and the
- /// first byte of content is located at `bytes[result]`.
- pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 {
- const vector_len: comptime_int = @max(std.simd.suggestVectorLength(u8) orelse 1, 8);
- const len: u32 = @intCast(bytes.len);
- var index: u32 = 0;
-
- while (true) {
- switch (r.state) {
- .invalid => unreachable,
- .finished => return index,
- .start => switch (len - index) {
- 0 => return index,
- 1 => {
- switch (bytes[index]) {
- '\r' => r.state = .seen_r,
- '\n' => r.state = .seen_n,
- else => {},
- }
-
- return index + 1;
- },
- 2 => {
- const b16 = int16(bytes[index..][0..2]);
- const b8 = intShift(u8, b16);
-
- switch (b8) {
- '\r' => r.state = .seen_r,
- '\n' => r.state = .seen_n,
- else => {},
- }
-
- switch (b16) {
- int16("\r\n") => r.state = .seen_rn,
- int16("\n\n") => r.state = .finished,
- else => {},
- }
-
- return index + 2;
- },
- 3 => {
- const b24 = int24(bytes[index..][0..3]);
- const b16 = intShift(u16, b24);
- const b8 = intShift(u8, b24);
-
- switch (b8) {
- '\r' => r.state = .seen_r,
- '\n' => r.state = .seen_n,
- else => {},
- }
-
- switch (b16) {
- int16("\r\n") => r.state = .seen_rn,
- int16("\n\n") => r.state = .finished,
- else => {},
- }
-
- switch (b24) {
- int24("\r\n\r") => r.state = .seen_rnr,
- else => {},
- }
-
- return index + 3;
- },
- 4...vector_len - 1 => {
- const b32 = int32(bytes[index..][0..4]);
- const b24 = intShift(u24, b32);
- const b16 = intShift(u16, b32);
- const b8 = intShift(u8, b32);
-
- switch (b8) {
- '\r' => r.state = .seen_r,
- '\n' => r.state = .seen_n,
- else => {},
- }
-
- switch (b16) {
- int16("\r\n") => r.state = .seen_rn,
- int16("\n\n") => r.state = .finished,
- else => {},
- }
-
- switch (b24) {
- int24("\r\n\r") => r.state = .seen_rnr,
- else => {},
- }
-
- switch (b32) {
- int32("\r\n\r\n") => r.state = .finished,
- else => {},
- }
-
- index += 4;
- continue;
- },
- else => {
- const chunk = bytes[index..][0..vector_len];
- const matches = if (use_vectors) matches: {
- const Vector = @Vector(vector_len, u8);
- // const BoolVector = @Vector(vector_len, bool);
- const BitVector = @Vector(vector_len, u1);
- const SizeVector = @Vector(vector_len, u8);
-
- const v: Vector = chunk.*;
- const matches_r: BitVector = @bitCast(v == @as(Vector, @splat('\r')));
- const matches_n: BitVector = @bitCast(v == @as(Vector, @splat('\n')));
- const matches_or: SizeVector = matches_r | matches_n;
-
- break :matches @reduce(.Add, matches_or);
- } else matches: {
- var matches: u8 = 0;
- for (chunk) |byte| switch (byte) {
- '\r', '\n' => matches += 1,
- else => {},
- };
- break :matches matches;
- };
- switch (matches) {
- 0 => {},
- 1 => switch (chunk[vector_len - 1]) {
- '\r' => r.state = .seen_r,
- '\n' => r.state = .seen_n,
- else => {},
- },
- 2 => {
- const b16 = int16(chunk[vector_len - 2 ..][0..2]);
- const b8 = intShift(u8, b16);
-
- switch (b8) {
- '\r' => r.state = .seen_r,
- '\n' => r.state = .seen_n,
- else => {},
- }
-
- switch (b16) {
- int16("\r\n") => r.state = .seen_rn,
- int16("\n\n") => r.state = .finished,
- else => {},
- }
- },
- 3 => {
- const b24 = int24(chunk[vector_len - 3 ..][0..3]);
- const b16 = intShift(u16, b24);
- const b8 = intShift(u8, b24);
-
- switch (b8) {
- '\r' => r.state = .seen_r,
- '\n' => r.state = .seen_n,
- else => {},
- }
-
- switch (b16) {
- int16("\r\n") => r.state = .seen_rn,
- int16("\n\n") => r.state = .finished,
- else => {},
- }
-
- switch (b24) {
- int24("\r\n\r") => r.state = .seen_rnr,
- else => {},
- }
- },
- 4...vector_len => {
- inline for (0..vector_len - 3) |i_usize| {
- const i = @as(u32, @truncate(i_usize));
-
- const b32 = int32(chunk[i..][0..4]);
- const b16 = intShift(u16, b32);
-
- if (b32 == int32("\r\n\r\n")) {
- r.state = .finished;
- return index + i + 4;
- } else if (b16 == int16("\n\n")) {
- r.state = .finished;
- return index + i + 2;
- }
- }
-
- const b24 = int24(chunk[vector_len - 3 ..][0..3]);
- const b16 = intShift(u16, b24);
- const b8 = intShift(u8, b24);
-
- switch (b8) {
- '\r' => r.state = .seen_r,
- '\n' => r.state = .seen_n,
- else => {},
- }
-
- switch (b16) {
- int16("\r\n") => r.state = .seen_rn,
- int16("\n\n") => r.state = .finished,
- else => {},
- }
-
- switch (b24) {
- int24("\r\n\r") => r.state = .seen_rnr,
- else => {},
- }
- },
- else => unreachable,
- }
-
- index += vector_len;
- continue;
- },
- },
- .seen_n => switch (len - index) {
- 0 => return index,
- else => {
- switch (bytes[index]) {
- '\n' => r.state = .finished,
- else => r.state = .start,
- }
-
- index += 1;
- continue;
- },
- },
- .seen_r => switch (len - index) {
- 0 => return index,
- 1 => {
- switch (bytes[index]) {
- '\n' => r.state = .seen_rn,
- '\r' => r.state = .seen_r,
- else => r.state = .start,
- }
-
- return index + 1;
- },
- 2 => {
- const b16 = int16(bytes[index..][0..2]);
- const b8 = intShift(u8, b16);
-
- switch (b8) {
- '\r' => r.state = .seen_r,
- '\n' => r.state = .seen_rn,
- else => r.state = .start,
- }
-
- switch (b16) {
- int16("\r\n") => r.state = .seen_rn,
- int16("\n\r") => r.state = .seen_rnr,
- int16("\n\n") => r.state = .finished,
- else => {},
- }
-
- return index + 2;
- },
- else => {
- const b24 = int24(bytes[index..][0..3]);
- const b16 = intShift(u16, b24);
- const b8 = intShift(u8, b24);
-
- switch (b8) {
- '\r' => r.state = .seen_r,
- '\n' => r.state = .seen_n,
- else => r.state = .start,
- }
-
- switch (b16) {
- int16("\r\n") => r.state = .seen_rn,
- int16("\n\n") => r.state = .finished,
- else => {},
- }
-
- switch (b24) {
- int24("\n\r\n") => r.state = .finished,
- else => {},
- }
-
- index += 3;
- continue;
- },
- },
- .seen_rn => switch (len - index) {
- 0 => return index,
- 1 => {
- switch (bytes[index]) {
- '\r' => r.state = .seen_rnr,
- '\n' => r.state = .seen_n,
- else => r.state = .start,
- }
-
- return index + 1;
- },
- else => {
- const b16 = int16(bytes[index..][0..2]);
- const b8 = intShift(u8, b16);
-
- switch (b8) {
- '\r' => r.state = .seen_rnr,
- '\n' => r.state = .seen_n,
- else => r.state = .start,
- }
-
- switch (b16) {
- int16("\r\n") => r.state = .finished,
- int16("\n\n") => r.state = .finished,
- else => {},
- }
-
- index += 2;
- continue;
- },
- },
- .seen_rnr => switch (len - index) {
- 0 => return index,
- else => {
- switch (bytes[index]) {
- '\n' => r.state = .finished,
- else => r.state = .start,
- }
-
- index += 1;
- continue;
- },
- },
- .chunk_head_size => unreachable,
- .chunk_head_ext => unreachable,
- .chunk_head_r => unreachable,
- .chunk_data => unreachable,
- .chunk_data_suffix => unreachable,
- .chunk_data_suffix_r => unreachable,
- }
+ pub fn get(hp: HeadersParser) []u8 {
+ return hp.header_bytes_buffer[0..hp.header_bytes_len];
+ }
- return index;
- }
+ pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 {
+ var hp: std.http.HeadParser = .{
+ .state = switch (r.state) {
+ .start => .start,
+ .seen_n => .seen_n,
+ .seen_r => .seen_r,
+ .seen_rn => .seen_rn,
+ .seen_rnr => .seen_rnr,
+ .finished => .finished,
+ else => unreachable,
+ },
+ };
+ const result = hp.feed(bytes);
+ r.state = switch (hp.state) {
+ .start => .start,
+ .seen_n => .seen_n,
+ .seen_r => .seen_r,
+ .seen_rn => .seen_rn,
+ .seen_rnr => .seen_rnr,
+ .finished => .finished,
+ };
+ return @intCast(result);
}
- /// Returns the number of bytes consumed by the chunk size. This is always less than or equal to `bytes.len`.
- /// You should check `r.state == .chunk_data` after this to check if the chunk size has been fully parsed.
- ///
- /// If the amount returned is less than `bytes.len`, you may assume that the parser is in the `chunk_data` state
- /// and that the first byte of the chunk is at `bytes[result]`.
pub fn findChunkedLen(r: *HeadersParser, bytes: []const u8) u32 {
- const len = @as(u32, @intCast(bytes.len));
-
- for (bytes[0..], 0..) |c, i| {
- const index = @as(u32, @intCast(i));
- switch (r.state) {
- .chunk_data_suffix => switch (c) {
- '\r' => r.state = .chunk_data_suffix_r,
- '\n' => r.state = .chunk_head_size,
- else => {
- r.state = .invalid;
- return index;
- },
- },
- .chunk_data_suffix_r => switch (c) {
- '\n' => r.state = .chunk_head_size,
- else => {
- r.state = .invalid;
- return index;
- },
- },
- .chunk_head_size => {
- const digit = switch (c) {
- '0'...'9' => |b| b - '0',
- 'A'...'Z' => |b| b - 'A' + 10,
- 'a'...'z' => |b| b - 'a' + 10,
- '\r' => {
- r.state = .chunk_head_r;
- continue;
- },
- '\n' => {
- r.state = .chunk_data;
- return index + 1;
- },
- else => {
- r.state = .chunk_head_ext;
- continue;
- },
- };
-
- const new_len = r.next_chunk_length *% 16 +% digit;
- if (new_len <= r.next_chunk_length and r.next_chunk_length != 0) {
- r.state = .invalid;
- return index;
- }
-
- r.next_chunk_length = new_len;
- },
- .chunk_head_ext => switch (c) {
- '\r' => r.state = .chunk_head_r,
- '\n' => {
- r.state = .chunk_data;
- return index + 1;
- },
- else => continue,
- },
- .chunk_head_r => switch (c) {
- '\n' => {
- r.state = .chunk_data;
- return index + 1;
- },
- else => {
- r.state = .invalid;
- return index;
- },
- },
+ var cp: std.http.ChunkParser = .{
+ .state = switch (r.state) {
+ .chunk_head_size => .head_size,
+ .chunk_head_ext => .head_ext,
+ .chunk_head_r => .head_r,
+ .chunk_data => .data,
+ .chunk_data_suffix => .data_suffix,
+ .chunk_data_suffix_r => .data_suffix_r,
+ .invalid => .invalid,
else => unreachable,
- }
- }
-
- return len;
+ },
+ .chunk_len = r.next_chunk_length,
+ };
+ const result = cp.feed(bytes);
+ r.state = switch (cp.state) {
+ .head_size => .chunk_head_size,
+ .head_ext => .chunk_head_ext,
+ .head_r => .chunk_head_r,
+ .data => .chunk_data,
+ .data_suffix => .chunk_data_suffix,
+ .data_suffix_r => .chunk_data_suffix_r,
+ .invalid => .invalid,
+ };
+ r.next_chunk_length = cp.chunk_len;
+ return @intCast(result);
}
- /// Returns whether or not the parser has finished parsing a complete message. A message is only complete after the
- /// entire body has been read and any trailing headers have been parsed.
+ /// Returns whether or not the parser has finished parsing a complete
+ /// message. A message is only complete after the entire body has been read
+ /// and any trailing headers have been parsed.
pub fn isComplete(r: *HeadersParser) bool {
return r.done and r.state == .finished;
}
- pub const CheckCompleteHeadError = mem.Allocator.Error || error{HttpHeadersExceededSizeLimit};
+ pub const CheckCompleteHeadError = error{HttpHeadersOversize};
- /// Pushes `in` into the parser. Returns the number of bytes consumed by the header. Any header bytes are appended
- /// to the `header_bytes` buffer.
- ///
- /// This function only uses `allocator` if `r.header_bytes_owned` is true, and may be undefined otherwise.
- pub fn checkCompleteHead(r: *HeadersParser, allocator: std.mem.Allocator, in: []const u8) CheckCompleteHeadError!u32 {
- if (r.state.isContent()) return 0;
+ /// Pushes `in` into the parser. Returns the number of bytes consumed by
+ /// the header. Any header bytes are appended to `header_bytes_buffer`.
+ pub fn checkCompleteHead(hp: *HeadersParser, in: []const u8) CheckCompleteHeadError!u32 {
+ if (hp.state.isContent()) return 0;
- const i = r.findHeadersEnd(in);
+ const i = hp.findHeadersEnd(in);
const data = in[0..i];
- if (r.header_bytes.items.len + data.len > r.max_header_bytes) {
- return error.HttpHeadersExceededSizeLimit;
- } else {
- if (r.header_bytes_owned) try r.header_bytes.ensureUnusedCapacity(allocator, data.len);
+ if (hp.header_bytes_len + data.len > hp.header_bytes_buffer.len)
+ return error.HttpHeadersOversize;
- r.header_bytes.appendSliceAssumeCapacity(data);
- }
+ @memcpy(hp.header_bytes_buffer[hp.header_bytes_len..][0..data.len], data);
+ hp.header_bytes_len += @intCast(data.len);
return i;
}
@@ -520,7 +154,8 @@ pub const HeadersParser = struct {
HttpChunkInvalid,
};
- /// Reads the body of the message into `buffer`. Returns the number of bytes placed in the buffer.
+ /// Reads the body of the message into `buffer`. Returns the number of
+ /// bytes placed in the buffer.
///
/// If `skip` is true, the buffer will be unused and the body will be skipped.
///
@@ -571,9 +206,10 @@ pub const HeadersParser = struct {
.chunk_data => if (r.next_chunk_length == 0) {
if (std.mem.eql(u8, conn.peek(), "\r\n")) {
r.state = .finished;
- r.done = true;
+ conn.drop(2);
} else {
- // The trailer section is formatted identically to the header section.
+ // The trailer section is formatted identically
+ // to the header section.
r.state = .seen_rn;
}
r.done = true;
@@ -713,57 +349,11 @@ const MockBufferedConnection = struct {
}
};
-test "HeadersParser.findHeadersEnd" {
- var r: HeadersParser = undefined;
- const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\nHello";
-
- for (0..36) |i| {
- r = HeadersParser.initDynamic(0);
- try std.testing.expectEqual(@as(u32, @intCast(i)), r.findHeadersEnd(data[0..i]));
- try std.testing.expectEqual(@as(u32, @intCast(35 - i)), r.findHeadersEnd(data[i..]));
- }
-}
-
-test "HeadersParser.findChunkedLen" {
- var r: HeadersParser = undefined;
- const data = "Ff\r\nf0f000 ; ext\n0\r\nffffffffffffffffffffffffffffffffffffffff\r\n";
-
- r = HeadersParser.initDynamic(0);
- r.state = .chunk_head_size;
- r.next_chunk_length = 0;
-
- const first = r.findChunkedLen(data[0..]);
- try testing.expectEqual(@as(u32, 4), first);
- try testing.expectEqual(@as(u64, 0xff), r.next_chunk_length);
- try testing.expectEqual(State.chunk_data, r.state);
- r.state = .chunk_head_size;
- r.next_chunk_length = 0;
-
- const second = r.findChunkedLen(data[first..]);
- try testing.expectEqual(@as(u32, 13), second);
- try testing.expectEqual(@as(u64, 0xf0f000), r.next_chunk_length);
- try testing.expectEqual(State.chunk_data, r.state);
- r.state = .chunk_head_size;
- r.next_chunk_length = 0;
-
- const third = r.findChunkedLen(data[first + second ..]);
- try testing.expectEqual(@as(u32, 3), third);
- try testing.expectEqual(@as(u64, 0), r.next_chunk_length);
- try testing.expectEqual(State.chunk_data, r.state);
- r.state = .chunk_head_size;
- r.next_chunk_length = 0;
-
- const fourth = r.findChunkedLen(data[first + second + third ..]);
- try testing.expectEqual(@as(u32, 16), fourth);
- try testing.expectEqual(@as(u64, 0xffffffffffffffff), r.next_chunk_length);
- try testing.expectEqual(State.invalid, r.state);
-}
-
test "HeadersParser.read length" {
// mock BufferedConnection for read
+ var headers_buf: [256]u8 = undefined;
- var r = HeadersParser.initDynamic(256);
- defer r.header_bytes.deinit(std.testing.allocator);
+ var r = HeadersParser.init(&headers_buf);
const data = "GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\nHello";
var conn: MockBufferedConnection = .{
@@ -773,8 +363,8 @@ test "HeadersParser.read length" {
while (true) { // read headers
try conn.fill();
- const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek());
- conn.drop(@as(u16, @intCast(nchecked)));
+ const nchecked = try r.checkCompleteHead(conn.peek());
+ conn.drop(@intCast(nchecked));
if (r.state.isContent()) break;
}
@@ -786,14 +376,14 @@ test "HeadersParser.read length" {
try std.testing.expectEqual(@as(usize, 5), len);
try std.testing.expectEqualStrings("Hello", buf[0..len]);
- try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\n", r.header_bytes.items);
+ try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\n", r.get());
}
test "HeadersParser.read chunked" {
// mock BufferedConnection for read
- var r = HeadersParser.initDynamic(256);
- defer r.header_bytes.deinit(std.testing.allocator);
+ var headers_buf: [256]u8 = undefined;
+ var r = HeadersParser.init(&headers_buf);
const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\n2\r\nHe\r\n2\r\nll\r\n1\r\no\r\n0\r\n\r\n";
var conn: MockBufferedConnection = .{
@@ -803,8 +393,8 @@ test "HeadersParser.read chunked" {
while (true) { // read headers
try conn.fill();
- const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek());
- conn.drop(@as(u16, @intCast(nchecked)));
+ const nchecked = try r.checkCompleteHead(conn.peek());
+ conn.drop(@intCast(nchecked));
if (r.state.isContent()) break;
}
@@ -815,14 +405,14 @@ test "HeadersParser.read chunked" {
try std.testing.expectEqual(@as(usize, 5), len);
try std.testing.expectEqualStrings("Hello", buf[0..len]);
- try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\n", r.header_bytes.items);
+ try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\n", r.get());
}
test "HeadersParser.read chunked trailer" {
// mock BufferedConnection for read
- var r = HeadersParser.initDynamic(256);
- defer r.header_bytes.deinit(std.testing.allocator);
+ var headers_buf: [256]u8 = undefined;
+ var r = HeadersParser.init(&headers_buf);
const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\n2\r\nHe\r\n2\r\nll\r\n1\r\no\r\n0\r\nContent-Type: text/plain\r\n\r\n";
var conn: MockBufferedConnection = .{
@@ -832,8 +422,8 @@ test "HeadersParser.read chunked trailer" {
while (true) { // read headers
try conn.fill();
- const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek());
- conn.drop(@as(u16, @intCast(nchecked)));
+ const nchecked = try r.checkCompleteHead(conn.peek());
+ conn.drop(@intCast(nchecked));
if (r.state.isContent()) break;
}
@@ -847,11 +437,11 @@ test "HeadersParser.read chunked trailer" {
while (true) { // read headers
try conn.fill();
- const nchecked = try r.checkCompleteHead(std.testing.allocator, conn.peek());
- conn.drop(@as(u16, @intCast(nchecked)));
+ const nchecked = try r.checkCompleteHead(conn.peek());
+ conn.drop(@intCast(nchecked));
if (r.state.isContent()) break;
}
- try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\nContent-Type: text/plain\r\n\r\n", r.header_bytes.items);
+ try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\nContent-Type: text/plain\r\n\r\n", r.get());
}
diff --git a/lib/std/http/test.zig b/lib/std/http/test.zig
new file mode 100644
index 0000000000..ff0dfe6634
--- /dev/null
+++ b/lib/std/http/test.zig
@@ -0,0 +1,1202 @@
+const builtin = @import("builtin");
+const std = @import("std");
+const http = std.http;
+const mem = std.mem;
+const native_endian = builtin.cpu.arch.endian();
+const expect = std.testing.expect;
+const expectEqual = std.testing.expectEqual;
+const expectEqualStrings = std.testing.expectEqualStrings;
+const expectError = std.testing.expectError;
+
+test "trailers" {
+ const test_server = try createTestServer(struct {
+ fn run(net_server: *std.net.Server) anyerror!void {
+ var header_buffer: [1024]u8 = undefined;
+ var remaining: usize = 1;
+ while (remaining != 0) : (remaining -= 1) {
+ const conn = try net_server.accept();
+ defer conn.stream.close();
+
+ var server = http.Server.init(conn, &header_buffer);
+
+ try expectEqual(.ready, server.state);
+ var request = try server.receiveHead();
+ try serve(&request);
+ try expectEqual(.ready, server.state);
+ }
+ }
+
+ fn serve(request: *http.Server.Request) !void {
+ try expectEqualStrings(request.head.target, "/trailer");
+
+ var send_buffer: [1024]u8 = undefined;
+ var response = request.respondStreaming(.{
+ .send_buffer = &send_buffer,
+ });
+ try response.writeAll("Hello, ");
+ try response.flush();
+ try response.writeAll("World!\n");
+ try response.flush();
+ try response.endChunked(.{
+ .trailers = &.{
+ .{ .name = "X-Checksum", .value = "aaaa" },
+ },
+ });
+ }
+ });
+ defer test_server.destroy();
+
+ const gpa = std.testing.allocator;
+
+ var client: http.Client = .{ .allocator = gpa };
+ defer client.deinit();
+
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/trailer", .{
+ test_server.port(),
+ });
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ {
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("Hello, World!\n", body);
+
+ var it = req.response.iterateHeaders();
+ {
+ const header = it.next().?;
+ try expect(!it.is_trailer);
+ try expectEqualStrings("connection", header.name);
+ try expectEqualStrings("keep-alive", header.value);
+ }
+ {
+ const header = it.next().?;
+ try expect(!it.is_trailer);
+ try expectEqualStrings("transfer-encoding", header.name);
+ try expectEqualStrings("chunked", header.value);
+ }
+ {
+ const header = it.next().?;
+ try expect(it.is_trailer);
+ try expectEqualStrings("X-Checksum", header.name);
+ try expectEqualStrings("aaaa", header.value);
+ }
+ try expectEqual(null, it.next());
+ }
+
+ // connection has been kept alive
+ try expect(client.connection_pool.free_len == 1);
+}
+
+test "HTTP server handles a chunked transfer coding request" {
+ const test_server = try createTestServer(struct {
+ fn run(net_server: *std.net.Server) !void {
+ var header_buffer: [8192]u8 = undefined;
+ const conn = try net_server.accept();
+ defer conn.stream.close();
+
+ var server = http.Server.init(conn, &header_buffer);
+ var request = try server.receiveHead();
+
+ try expect(request.head.transfer_encoding == .chunked);
+
+ var buf: [128]u8 = undefined;
+ const n = try (try request.reader()).readAll(&buf);
+ try expect(mem.eql(u8, buf[0..n], "ABCD"));
+
+ try request.respond("message from server!\n", .{
+ .extra_headers = &.{
+ .{ .name = "content-type", .value = "text/plain" },
+ },
+ .keep_alive = false,
+ });
+ }
+ });
+ defer test_server.destroy();
+
+ const request_bytes =
+ "POST / HTTP/1.1\r\n" ++
+ "Content-Type: text/plain\r\n" ++
+ "Transfer-Encoding: chunked\r\n" ++
+ "\r\n" ++
+ "1\r\n" ++
+ "A\r\n" ++
+ "1\r\n" ++
+ "B\r\n" ++
+ "2\r\n" ++
+ "CD\r\n" ++
+ "0\r\n" ++
+ "\r\n";
+
+ const gpa = std.testing.allocator;
+ const stream = try std.net.tcpConnectToHost(gpa, "127.0.0.1", test_server.port());
+ defer stream.close();
+ try stream.writeAll(request_bytes);
+
+ const response = try stream.reader().readAllAlloc(gpa, 100);
+ defer gpa.free(response);
+
+ const expected_response =
+ "HTTP/1.1 200 OK\r\n" ++
+ "content-length: 21\r\n" ++
+ "content-type: text/plain\r\n" ++
+ "\r\n" ++
+ "message from server!\n";
+ try expectEqualStrings(expected_response, response);
+}
+
+test "echo content server" {
+ const test_server = try createTestServer(struct {
+ fn run(net_server: *std.net.Server) anyerror!void {
+ var read_buffer: [1024]u8 = undefined;
+
+ accept: while (true) {
+ const conn = try net_server.accept();
+ defer conn.stream.close();
+
+ var http_server = http.Server.init(conn, &read_buffer);
+
+ while (http_server.state == .ready) {
+ var request = http_server.receiveHead() catch |err| switch (err) {
+ error.HttpConnectionClosing => continue :accept,
+ else => |e| return e,
+ };
+ if (mem.eql(u8, request.head.target, "/end")) {
+ return request.respond("", .{ .keep_alive = false });
+ }
+ if (request.head.expect) |expect_header_value| {
+ if (mem.eql(u8, expect_header_value, "garbage")) {
+ try expectError(error.HttpExpectationFailed, request.reader());
+ try request.respond("", .{ .keep_alive = false });
+ continue;
+ }
+ }
+ handleRequest(&request) catch |err| {
+ // This message helps the person troubleshooting determine whether
+ // output comes from the server thread or the client thread.
+ std.debug.print("handleRequest failed with '{s}'\n", .{@errorName(err)});
+ return err;
+ };
+ }
+ }
+ }
+
+ fn handleRequest(request: *http.Server.Request) !void {
+ //std.debug.print("server received {s} {s} {s}\n", .{
+ // @tagName(request.head.method),
+ // @tagName(request.head.version),
+ // request.head.target,
+ //});
+
+ const body = try (try request.reader()).readAllAlloc(std.testing.allocator, 8192);
+ defer std.testing.allocator.free(body);
+
+ try expect(mem.startsWith(u8, request.head.target, "/echo-content"));
+ try expectEqualStrings("Hello, World!\n", body);
+ try expectEqualStrings("text/plain", request.head.content_type.?);
+
+ var send_buffer: [100]u8 = undefined;
+ var response = request.respondStreaming(.{
+ .send_buffer = &send_buffer,
+ .content_length = switch (request.head.transfer_encoding) {
+ .chunked => null,
+ .none => len: {
+ try expectEqual(14, request.head.content_length.?);
+ break :len 14;
+ },
+ },
+ });
+
+ try response.flush(); // Test an early flush to send the HTTP headers before the body.
+ const w = response.writer();
+ try w.writeAll("Hello, ");
+ try w.writeAll("World!\n");
+ try response.end();
+ //std.debug.print(" server finished responding\n", .{});
+ }
+ });
+ defer test_server.destroy();
+
+ {
+ var client: http.Client = .{ .allocator = std.testing.allocator };
+ defer client.deinit();
+
+ try echoTests(&client, test_server.port());
+ }
+}
+
+test "Server.Request.respondStreaming non-chunked, unknown content-length" {
+ // In this case, the response is expected to stream until the connection is
+ // closed, indicating the end of the body.
+ const test_server = try createTestServer(struct {
+ fn run(net_server: *std.net.Server) anyerror!void {
+ var header_buffer: [1000]u8 = undefined;
+ var remaining: usize = 1;
+ while (remaining != 0) : (remaining -= 1) {
+ const conn = try net_server.accept();
+ defer conn.stream.close();
+
+ var server = http.Server.init(conn, &header_buffer);
+
+ try expectEqual(.ready, server.state);
+ var request = try server.receiveHead();
+ try expectEqualStrings(request.head.target, "/foo");
+ var send_buffer: [500]u8 = undefined;
+ var response = request.respondStreaming(.{
+ .send_buffer = &send_buffer,
+ .respond_options = .{
+ .transfer_encoding = .none,
+ },
+ });
+ var total: usize = 0;
+ for (0..500) |i| {
+ var buf: [30]u8 = undefined;
+ const line = try std.fmt.bufPrint(&buf, "{d}, ah ha ha!\n", .{i});
+ try response.writeAll(line);
+ total += line.len;
+ }
+ try expectEqual(7390, total);
+ try response.end();
+ try expectEqual(.closing, server.state);
+ }
+ }
+ });
+ defer test_server.destroy();
+
+ const request_bytes = "GET /foo HTTP/1.1\r\n\r\n";
+ const gpa = std.testing.allocator;
+ const stream = try std.net.tcpConnectToHost(gpa, "127.0.0.1", test_server.port());
+ defer stream.close();
+ try stream.writeAll(request_bytes);
+
+ const response = try stream.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(response);
+
+ var expected_response = std.ArrayList(u8).init(gpa);
+ defer expected_response.deinit();
+
+ try expected_response.appendSlice("HTTP/1.1 200 OK\r\n\r\n");
+
+ {
+ var total: usize = 0;
+ for (0..500) |i| {
+ var buf: [30]u8 = undefined;
+ const line = try std.fmt.bufPrint(&buf, "{d}, ah ha ha!\n", .{i});
+ try expected_response.appendSlice(line);
+ total += line.len;
+ }
+ try expectEqual(7390, total);
+ }
+
+ try expectEqualStrings(expected_response.items, response);
+}
+
+test "receiving arbitrary http headers from the client" {
+ const test_server = try createTestServer(struct {
+ fn run(net_server: *std.net.Server) anyerror!void {
+ var read_buffer: [666]u8 = undefined;
+ var remaining: usize = 1;
+ while (remaining != 0) : (remaining -= 1) {
+ const conn = try net_server.accept();
+ defer conn.stream.close();
+
+ var server = http.Server.init(conn, &read_buffer);
+ try expectEqual(.ready, server.state);
+ var request = try server.receiveHead();
+ try expectEqualStrings("/bar", request.head.target);
+ var it = request.iterateHeaders();
+ {
+ const header = it.next().?;
+ try expectEqualStrings("CoNneCtIoN", header.name);
+ try expectEqualStrings("close", header.value);
+ try expect(!it.is_trailer);
+ }
+ {
+ const header = it.next().?;
+ try expectEqualStrings("aoeu", header.name);
+ try expectEqualStrings("asdf", header.value);
+ try expect(!it.is_trailer);
+ }
+ try request.respond("", .{});
+ }
+ }
+ });
+ defer test_server.destroy();
+
+ const request_bytes = "GET /bar HTTP/1.1\r\n" ++
+ "CoNneCtIoN: close\r\n" ++
+ "aoeu: asdf\r\n" ++
+ "\r\n";
+ const gpa = std.testing.allocator;
+ const stream = try std.net.tcpConnectToHost(gpa, "127.0.0.1", test_server.port());
+ defer stream.close();
+ try stream.writeAll(request_bytes);
+
+ const response = try stream.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(response);
+
+ var expected_response = std.ArrayList(u8).init(gpa);
+ defer expected_response.deinit();
+
+ try expected_response.appendSlice("HTTP/1.1 200 OK\r\n");
+ try expected_response.appendSlice("content-length: 0\r\n\r\n");
+ try expectEqualStrings(expected_response.items, response);
+}
+
+test "general client/server API coverage" {
+ if (builtin.os.tag == .windows) {
+ // This test was never passing on Windows.
+ return error.SkipZigTest;
+ }
+
+ const global = struct {
+ var handle_new_requests = true;
+ };
+ const test_server = try createTestServer(struct {
+ fn run(net_server: *std.net.Server) anyerror!void {
+ var client_header_buffer: [1024]u8 = undefined;
+ outer: while (global.handle_new_requests) {
+ var connection = try net_server.accept();
+ defer connection.stream.close();
+
+ var http_server = http.Server.init(connection, &client_header_buffer);
+
+ while (http_server.state == .ready) {
+ var request = http_server.receiveHead() catch |err| switch (err) {
+ error.HttpConnectionClosing => continue :outer,
+ else => |e| return e,
+ };
+
+ try handleRequest(&request, net_server.listen_address.getPort());
+ }
+ }
+ }
+
+ fn handleRequest(request: *http.Server.Request, listen_port: u16) !void {
+ const log = std.log.scoped(.server);
+
+ log.info("{} {s} {s}", .{
+ request.head.method,
+ @tagName(request.head.version),
+ request.head.target,
+ });
+
+ const gpa = std.testing.allocator;
+ const body = try (try request.reader()).readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ var send_buffer: [100]u8 = undefined;
+
+ if (mem.startsWith(u8, request.head.target, "/get")) {
+ var response = request.respondStreaming(.{
+ .send_buffer = &send_buffer,
+ .content_length = if (mem.indexOf(u8, request.head.target, "?chunked") == null)
+ 14
+ else
+ null,
+ .respond_options = .{
+ .extra_headers = &.{
+ .{ .name = "content-type", .value = "text/plain" },
+ },
+ },
+ });
+ const w = response.writer();
+ try w.writeAll("Hello, ");
+ try w.writeAll("World!\n");
+ try response.end();
+ // Writing again would cause an assertion failure.
+ } else if (mem.startsWith(u8, request.head.target, "/large")) {
+ var response = request.respondStreaming(.{
+ .send_buffer = &send_buffer,
+ .content_length = 14 * 1024 + 14 * 10,
+ });
+
+ try response.flush(); // Test an early flush to send the HTTP headers before the body.
+
+ const w = response.writer();
+
+ var i: u32 = 0;
+ while (i < 5) : (i += 1) {
+ try w.writeAll("Hello, World!\n");
+ }
+
+ try w.writeAll("Hello, World!\n" ** 1024);
+
+ i = 0;
+ while (i < 5) : (i += 1) {
+ try w.writeAll("Hello, World!\n");
+ }
+
+ try response.end();
+ } else if (mem.eql(u8, request.head.target, "/redirect/1")) {
+ var response = request.respondStreaming(.{
+ .send_buffer = &send_buffer,
+ .respond_options = .{
+ .status = .found,
+ .extra_headers = &.{
+ .{ .name = "location", .value = "../../get" },
+ },
+ },
+ });
+
+ const w = response.writer();
+ try w.writeAll("Hello, ");
+ try w.writeAll("Redirected!\n");
+ try response.end();
+ } else if (mem.eql(u8, request.head.target, "/redirect/2")) {
+ try request.respond("Hello, Redirected!\n", .{
+ .status = .found,
+ .extra_headers = &.{
+ .{ .name = "location", .value = "/redirect/1" },
+ },
+ });
+ } else if (mem.eql(u8, request.head.target, "/redirect/3")) {
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/redirect/2", .{
+ listen_port,
+ });
+ defer gpa.free(location);
+
+ try request.respond("Hello, Redirected!\n", .{
+ .status = .found,
+ .extra_headers = &.{
+ .{ .name = "location", .value = location },
+ },
+ });
+ } else if (mem.eql(u8, request.head.target, "/redirect/4")) {
+ try request.respond("Hello, Redirected!\n", .{
+ .status = .found,
+ .extra_headers = &.{
+ .{ .name = "location", .value = "/redirect/3" },
+ },
+ });
+ } else if (mem.eql(u8, request.head.target, "/redirect/invalid")) {
+ const invalid_port = try getUnusedTcpPort();
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}", .{invalid_port});
+ defer gpa.free(location);
+
+ try request.respond("", .{
+ .status = .found,
+ .extra_headers = &.{
+ .{ .name = "location", .value = location },
+ },
+ });
+ } else if (mem.eql(u8, request.head.target, "/empty")) {
+ try request.respond("", .{
+ .extra_headers = &.{
+ .{ .name = "empty", .value = "" },
+ },
+ });
+ } else {
+ try request.respond("", .{ .status = .not_found });
+ }
+ }
+
+ fn getUnusedTcpPort() !u16 {
+ const addr = try std.net.Address.parseIp("127.0.0.1", 0);
+ var s = try addr.listen(.{});
+ defer s.deinit();
+ return s.listen_address.in.getPort();
+ }
+ });
+ defer {
+ global.handle_new_requests = false;
+ test_server.destroy();
+ }
+
+ const log = std.log.scoped(.client);
+
+ const gpa = std.testing.allocator;
+ var client: http.Client = .{ .allocator = gpa };
+ errdefer client.deinit();
+ // defer client.deinit(); handled below
+
+ const port = test_server.port();
+
+ { // read content-length response
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/get", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("Hello, World!\n", body);
+ try expectEqualStrings("text/plain", req.response.content_type.?);
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // read large content-length response
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/large", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192 * 1024);
+ defer gpa.free(body);
+
+ try expectEqual(@as(usize, 14 * 1024 + 14 * 10), body.len);
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // send head request and not read chunked
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/get", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.HEAD, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("", body);
+ try expectEqualStrings("text/plain", req.response.content_type.?);
+ try expectEqual(14, req.response.content_length.?);
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // read chunked response
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/get?chunked", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("Hello, World!\n", body);
+ try expectEqualStrings("text/plain", req.response.content_type.?);
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // send head request and not read chunked
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/get?chunked", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.HEAD, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("", body);
+ try expectEqualStrings("text/plain", req.response.content_type.?);
+ try expect(req.response.transfer_encoding == .chunked);
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // read content-length response with connection close
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/get", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ .keep_alive = false,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("Hello, World!\n", body);
+ try expectEqualStrings("text/plain", req.response.content_type.?);
+ }
+
+ // connection has been closed
+ try expect(client.connection_pool.free_len == 0);
+
+ { // handle empty header field value
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/empty", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ .extra_headers = &.{
+ .{ .name = "empty", .value = "" },
+ },
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ try std.testing.expectEqual(.ok, req.response.status);
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("", body);
+
+ var it = req.response.iterateHeaders();
+ {
+ const header = it.next().?;
+ try expect(!it.is_trailer);
+ try expectEqualStrings("connection", header.name);
+ try expectEqualStrings("keep-alive", header.value);
+ }
+ {
+ const header = it.next().?;
+ try expect(!it.is_trailer);
+ try expectEqualStrings("content-length", header.name);
+ try expectEqualStrings("0", header.value);
+ }
+ {
+ const header = it.next().?;
+ try expect(!it.is_trailer);
+ try expectEqualStrings("empty", header.name);
+ try expectEqualStrings("", header.value);
+ }
+ try expectEqual(null, it.next());
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // relative redirect
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/redirect/1", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("Hello, World!\n", body);
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // redirect from root
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/redirect/2", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("Hello, World!\n", body);
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // absolute redirect
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/redirect/3", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("Hello, World!\n", body);
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // too many redirects
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/redirect/4", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ req.wait() catch |err| switch (err) {
+ error.TooManyHttpRedirects => {},
+ else => return err,
+ };
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // check client without segfault by connection error after redirection
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/redirect/invalid", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ const result = req.wait();
+
+ // a proxy without an upstream is likely to return a 5xx status.
+ if (client.http_proxy == null) {
+ try expectError(error.ConnectionRefused, result); // expects not segfault but the regular error
+ }
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // issue 16282 *** This test leaves the client in an invalid state, it must be last ***
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/get", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ const total_connections = client.connection_pool.free_size + 64;
+ var requests = try gpa.alloc(http.Client.Request, total_connections);
+ defer gpa.free(requests);
+
+ var header_bufs = std.ArrayList([]u8).init(gpa);
+ defer header_bufs.deinit();
+ defer for (header_bufs.items) |item| gpa.free(item);
+
+ for (0..total_connections) |i| {
+ const headers_buf = try gpa.alloc(u8, 1024);
+ try header_bufs.append(headers_buf);
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = headers_buf,
+ });
+ req.response.parser.done = true;
+ req.connection.?.closing = false;
+ requests[i] = req;
+ }
+
+ for (0..total_connections) |i| {
+ requests[i].deinit();
+ }
+
+ // free connections should be full now
+ try expect(client.connection_pool.free_len == client.connection_pool.free_size);
+ }
+
+ client.deinit();
+
+ {
+ global.handle_new_requests = false;
+
+ const conn = try std.net.tcpConnectToAddress(test_server.net_server.listen_address);
+ conn.close();
+ }
+}
+
+test "Server streams both reading and writing" {
+ const test_server = try createTestServer(struct {
+ fn run(net_server: *std.net.Server) anyerror!void {
+ var header_buffer: [1024]u8 = undefined;
+ const conn = try net_server.accept();
+ defer conn.stream.close();
+
+ var server = http.Server.init(conn, &header_buffer);
+ var request = try server.receiveHead();
+ const reader = try request.reader();
+
+ var send_buffer: [777]u8 = undefined;
+ var response = request.respondStreaming(.{
+ .send_buffer = &send_buffer,
+ .respond_options = .{
+ .transfer_encoding = .none, // Causes keep_alive=false
+ },
+ });
+ const writer = response.writer();
+
+ while (true) {
+ try response.flush();
+ var buf: [100]u8 = undefined;
+ const n = try reader.read(&buf);
+ if (n == 0) break;
+ const sub_buf = buf[0..n];
+ for (sub_buf) |*b| b.* = std.ascii.toUpper(b.*);
+ try writer.writeAll(sub_buf);
+ }
+ try response.end();
+ }
+ });
+ defer test_server.destroy();
+
+ var client: http.Client = .{ .allocator = std.testing.allocator };
+ defer client.deinit();
+
+ var server_header_buffer: [555]u8 = undefined;
+ var req = try client.open(.POST, .{
+ .scheme = "http",
+ .host = "127.0.0.1",
+ .port = test_server.port(),
+ .path = "/",
+ }, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ req.transfer_encoding = .chunked;
+ try req.send(.{});
+ try req.wait();
+
+ try req.writeAll("one ");
+ try req.writeAll("fish");
+
+ try req.finish();
+
+ const body = try req.reader().readAllAlloc(std.testing.allocator, 8192);
+ defer std.testing.allocator.free(body);
+
+ try expectEqualStrings("ONE FISH", body);
+}
+
+fn echoTests(client: *http.Client, port: u16) !void {
+ const gpa = std.testing.allocator;
+ var location_buffer: [100]u8 = undefined;
+
+ { // send content-length request
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/echo-content", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.POST, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ .extra_headers = &.{
+ .{ .name = "content-type", .value = "text/plain" },
+ },
+ });
+ defer req.deinit();
+
+ req.transfer_encoding = .{ .content_length = 14 };
+
+ try req.send(.{});
+ try req.writeAll("Hello, ");
+ try req.writeAll("World!\n");
+ try req.finish();
+
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("Hello, World!\n", body);
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // send chunked request
+ const uri = try std.Uri.parse(try std.fmt.bufPrint(
+ &location_buffer,
+ "http://127.0.0.1:{d}/echo-content",
+ .{port},
+ ));
+
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.POST, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ .extra_headers = &.{
+ .{ .name = "content-type", .value = "text/plain" },
+ },
+ });
+ defer req.deinit();
+
+ req.transfer_encoding = .chunked;
+
+ try req.send(.{});
+ try req.writeAll("Hello, ");
+ try req.writeAll("World!\n");
+ try req.finish();
+
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("Hello, World!\n", body);
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
+ { // Client.fetch()
+
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/echo-content#fetch", .{port});
+ defer gpa.free(location);
+
+ var body = std.ArrayList(u8).init(gpa);
+ defer body.deinit();
+
+ const res = try client.fetch(.{
+ .location = .{ .url = location },
+ .method = .POST,
+ .payload = "Hello, World!\n",
+ .extra_headers = &.{
+ .{ .name = "content-type", .value = "text/plain" },
+ },
+ .response_storage = .{ .dynamic = &body },
+ });
+ try expectEqual(.ok, res.status);
+ try expectEqualStrings("Hello, World!\n", body.items);
+ }
+
+ { // expect: 100-continue
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/echo-content#expect-100", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.POST, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ .extra_headers = &.{
+ .{ .name = "expect", .value = "100-continue" },
+ .{ .name = "content-type", .value = "text/plain" },
+ },
+ });
+ defer req.deinit();
+
+ req.transfer_encoding = .chunked;
+
+ try req.send(.{});
+ try req.writeAll("Hello, ");
+ try req.writeAll("World!\n");
+ try req.finish();
+
+ try req.wait();
+ try expectEqual(.ok, req.response.status);
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("Hello, World!\n", body);
+ }
+
+ { // expect: garbage
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/echo-content#expect-garbage", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.POST, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ .extra_headers = &.{
+ .{ .name = "content-type", .value = "text/plain" },
+ .{ .name = "expect", .value = "garbage" },
+ },
+ });
+ defer req.deinit();
+
+ req.transfer_encoding = .chunked;
+
+ try req.send(.{});
+ try req.wait();
+ try expectEqual(.expectation_failed, req.response.status);
+ }
+
+ _ = try client.fetch(.{
+ .location = .{
+ .url = try std.fmt.bufPrint(&location_buffer, "http://127.0.0.1:{d}/end", .{port}),
+ },
+ });
+}
+
+const TestServer = struct {
+ server_thread: std.Thread,
+ net_server: std.net.Server,
+
+ fn destroy(self: *@This()) void {
+ self.server_thread.join();
+ self.net_server.deinit();
+ std.testing.allocator.destroy(self);
+ }
+
+ fn port(self: @This()) u16 {
+ return self.net_server.listen_address.in.getPort();
+ }
+};
+
+fn createTestServer(S: type) !*TestServer {
+ if (builtin.single_threaded) return error.SkipZigTest;
+ if (builtin.zig_backend == .stage2_llvm and native_endian == .big) {
+ // https://github.com/ziglang/zig/issues/13782
+ return error.SkipZigTest;
+ }
+
+ const address = try std.net.Address.parseIp("127.0.0.1", 0);
+ const test_server = try std.testing.allocator.create(TestServer);
+ test_server.net_server = try address.listen(.{ .reuse_address = true });
+ test_server.server_thread = try std.Thread.spawn(.{}, S.run, .{&test_server.net_server});
+ return test_server;
+}
+
+test "redirect to different connection" {
+ const test_server_new = try createTestServer(struct {
+ fn run(net_server: *std.net.Server) anyerror!void {
+ var header_buffer: [888]u8 = undefined;
+
+ const conn = try net_server.accept();
+ defer conn.stream.close();
+
+ var server = http.Server.init(conn, &header_buffer);
+ var request = try server.receiveHead();
+ try expectEqualStrings(request.head.target, "/ok");
+ try request.respond("good job, you pass", .{});
+ }
+ });
+ defer test_server_new.destroy();
+
+ const global = struct {
+ var other_port: ?u16 = null;
+ };
+ global.other_port = test_server_new.port();
+
+ const test_server_orig = try createTestServer(struct {
+ fn run(net_server: *std.net.Server) anyerror!void {
+ var header_buffer: [999]u8 = undefined;
+ var send_buffer: [100]u8 = undefined;
+
+ const conn = try net_server.accept();
+ defer conn.stream.close();
+
+ const new_loc = try std.fmt.bufPrint(&send_buffer, "http://127.0.0.1:{d}/ok", .{
+ global.other_port.?,
+ });
+
+ var server = http.Server.init(conn, &header_buffer);
+ var request = try server.receiveHead();
+ try expectEqualStrings(request.head.target, "/help");
+ try request.respond("", .{
+ .status = .found,
+ .extra_headers = &.{
+ .{ .name = "location", .value = new_loc },
+ },
+ });
+ }
+ });
+ defer test_server_orig.destroy();
+
+ const gpa = std.testing.allocator;
+
+ var client: http.Client = .{ .allocator = gpa };
+ defer client.deinit();
+
+ var loc_buf: [100]u8 = undefined;
+ const location = try std.fmt.bufPrint(&loc_buf, "http://127.0.0.1:{d}/help", .{
+ test_server_orig.port(),
+ });
+ const uri = try std.Uri.parse(location);
+
+ {
+ var server_header_buffer: [666]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("good job, you pass", body);
+ }
+}
diff --git a/lib/std/io.zig b/lib/std/io.zig
index 985297e051..75693b8b1e 100644
--- a/lib/std/io.zig
+++ b/lib/std/io.zig
@@ -12,22 +12,6 @@ const meta = std.meta;
const File = std.fs.File;
const Allocator = std.mem.Allocator;
-pub const Mode = enum {
- /// I/O operates normally, waiting for the operating system syscalls to complete.
- blocking,
-
- /// I/O functions are generated async and rely on a global event loop. Event-based I/O.
- evented,
-};
-
-const mode = std.options.io_mode;
-pub const is_async = mode != .blocking;
-
-/// This is an enum value to use for I/O mode at runtime, since it takes up zero bytes at runtime,
-/// and makes expressions comptime-known when `is_async` is `false`.
-pub const ModeOverride = if (is_async) Mode else enum { blocking };
-pub const default_mode: ModeOverride = if (is_async) Mode.evented else .blocking;
-
fn getStdOutHandle() os.fd_t {
if (builtin.os.tag == .windows) {
if (builtin.zig_backend == .stage2_aarch64) {
@@ -44,14 +28,8 @@ fn getStdOutHandle() os.fd_t {
return os.STDOUT_FILENO;
}
-/// TODO: async stdout on windows without a dedicated thread.
-/// https://github.com/ziglang/zig/pull/4816#issuecomment-604521023
pub fn getStdOut() File {
- return File{
- .handle = getStdOutHandle(),
- .capable_io_mode = .blocking,
- .intended_io_mode = default_mode,
- };
+ return File{ .handle = getStdOutHandle() };
}
fn getStdErrHandle() os.fd_t {
@@ -70,14 +48,8 @@ fn getStdErrHandle() os.fd_t {
return os.STDERR_FILENO;
}
-/// This returns a `File` that is configured to block with every write, in order
-/// to facilitate better debugging. This can be changed by modifying the `intended_io_mode` field.
pub fn getStdErr() File {
- return File{
- .handle = getStdErrHandle(),
- .capable_io_mode = .blocking,
- .intended_io_mode = .blocking,
- };
+ return File{ .handle = getStdErrHandle() };
}
fn getStdInHandle() os.fd_t {
@@ -96,14 +68,8 @@ fn getStdInHandle() os.fd_t {
return os.STDIN_FILENO;
}
-/// TODO: async stdin on windows without a dedicated thread.
-/// https://github.com/ziglang/zig/pull/4816#issuecomment-604521023
pub fn getStdIn() File {
- return File{
- .handle = getStdInHandle(),
- .capable_io_mode = .blocking,
- .intended_io_mode = default_mode,
- };
+ return File{ .handle = getStdInHandle() };
}
pub fn GenericReader(
@@ -445,6 +411,9 @@ pub const BufferedAtomicFile = @import("io/buffered_atomic_file.zig").BufferedAt
pub const StreamSource = @import("io/stream_source.zig").StreamSource;
+pub const BufferedTee = @import("io/buffered_tee.zig").BufferedTee;
+pub const bufferedTee = @import("io/buffered_tee.zig").bufferedTee;
+
pub const tty = @import("io/tty.zig");
/// A Writer that doesn't write to anything.
@@ -543,9 +512,17 @@ pub fn Poller(comptime StreamEnum: type) type {
pub fn poll(self: *Self) !bool {
if (builtin.os.tag == .windows) {
- return pollWindows(self);
+ return pollWindows(self, null);
+ } else {
+ return pollPosix(self, null);
+ }
+ }
+
+ pub fn pollTimeout(self: *Self, nanoseconds: u64) !bool {
+ if (builtin.os.tag == .windows) {
+ return pollWindows(self, nanoseconds);
} else {
- return pollPosix(self);
+ return pollPosix(self, nanoseconds);
}
}
@@ -553,7 +530,7 @@ pub fn Poller(comptime StreamEnum: type) type {
return &self.fifos[@intFromEnum(which)];
}
- fn pollWindows(self: *Self) !bool {
+ fn pollWindows(self: *Self, nanoseconds: ?u64) !bool {
const bump_amt = 512;
if (!self.windows.first_read_done) {
@@ -584,10 +561,15 @@ pub fn Poller(comptime StreamEnum: type) type {
self.windows.active.count,
&self.windows.active.handles_buf,
0,
- os.windows.INFINITE,
+ if (nanoseconds) |ns|
+ @min(std.math.cast(u32, ns / std.time.ns_per_ms) orelse (os.windows.INFINITE - 1), os.windows.INFINITE - 1)
+ else
+ os.windows.INFINITE,
);
if (status == os.windows.WAIT_FAILED)
return os.windows.unexpectedError(os.windows.kernel32.GetLastError());
+ if (status == os.windows.WAIT_TIMEOUT)
+ return true;
if (status < os.windows.WAIT_OBJECT_0 or status > os.windows.WAIT_OBJECT_0 + enum_fields.len - 1)
unreachable;
@@ -625,7 +607,7 @@ pub fn Poller(comptime StreamEnum: type) type {
}
}
- fn pollPosix(self: *Self) !bool {
+ fn pollPosix(self: *Self, nanoseconds: ?u64) !bool {
// We ask for ensureUnusedCapacity with this much extra space. This
// has more of an effect on small reads because once the reads
// start to get larger the amount of space an ArrayList will
@@ -634,7 +616,10 @@ pub fn Poller(comptime StreamEnum: type) type {
const err_mask = os.POLL.ERR | os.POLL.NVAL | os.POLL.HUP;
- const events_len = try os.poll(&self.poll_fds, std.math.maxInt(i32));
+ const events_len = try os.poll(&self.poll_fds, if (nanoseconds) |ns|
+ std.math.cast(i32, ns / std.time.ns_per_ms) orelse std.math.maxInt(i32)
+ else
+ -1);
if (events_len == 0) {
for (self.poll_fds) |poll_fd| {
if (poll_fd.fd != -1) return true;
@@ -726,4 +711,5 @@ test {
_ = @import("io/seekable_stream.zig");
_ = @import("io/stream_source.zig");
_ = @import("io/test.zig");
+ _ = @import("io/buffered_tee.zig");
}
diff --git a/lib/std/io/Reader.zig b/lib/std/io/Reader.zig
index 0d96629e7a..9569d8d565 100644
--- a/lib/std/io/Reader.zig
+++ b/lib/std/io/Reader.zig
@@ -360,6 +360,18 @@ pub fn readEnum(self: Self, comptime Enum: type, endian: std.builtin.Endian) any
return E.InvalidValue;
}
+/// Reads the stream until the end, ignoring all the data.
+/// Returns the number of bytes discarded.
+pub fn discard(self: Self) anyerror!u64 {
+ var trash: [4096]u8 = undefined;
+ var index: u64 = 0;
+ while (true) {
+ const n = try self.read(&trash);
+ if (n == 0) return index;
+ index += n;
+ }
+}
+
const std = @import("../std.zig");
const Self = @This();
const math = std.math;
diff --git a/lib/std/io/buffered_tee.zig b/lib/std/io/buffered_tee.zig
new file mode 100644
index 0000000000..304b360c41
--- /dev/null
+++ b/lib/std/io/buffered_tee.zig
@@ -0,0 +1,379 @@
+const std = @import("std");
+const io = std.io;
+const assert = std.debug.assert;
+const testing = std.testing;
+
+/// BufferedTee provides reader interface to the consumer. Data read by consumer
+/// is also written to the output. Output is hold lookahead_size bytes behind
+/// consumer. Allowing consumer to put back some bytes to be read again. On flush
+/// all consumed bytes are flushed to the output.
+///
+/// input -> tee -> consumer
+/// |
+/// output
+///
+/// input - underlying unbuffered reader
+/// output - writer, receives data read by consumer
+/// consumer - uses provided reader interface
+///
+/// If lookahead_size is zero output always has same bytes as consumer.
+///
+pub fn BufferedTee(
+ comptime buffer_size: usize, // internal buffer size in bytes
+ comptime lookahead_size: usize, // lookahead, number of bytes to hold output behind consumer
+ comptime InputReaderType: type,
+ comptime OutputWriterType: type,
+) type {
+ comptime assert(buffer_size > lookahead_size);
+
+ return struct {
+ input: InputReaderType,
+ output: OutputWriterType,
+
+ buf: [buffer_size]u8 = undefined, // internal buffer
+ tail: usize = 0, // buffer is filled up to this position with bytes from input
+ rp: usize = 0, // reader pointer; consumer has read up to this position
+ wp: usize = 0, // writer pointer; data is sent to the output up to this position
+
+ pub const Error = InputReaderType.Error || OutputWriterType.Error;
+ pub const Reader = io.Reader(*Self, Error, read);
+
+ const Self = @This();
+
+ pub fn read(self: *Self, dest: []u8) Error!usize {
+ var dest_index: usize = 0;
+
+ while (dest_index < dest.len) {
+ const written = @min(dest.len - dest_index, self.tail - self.rp);
+ if (written == 0) {
+ try self.preserveLookahead();
+ // fill upper part of the buf
+ const n = try self.input.read(self.buf[self.tail..]);
+ if (n == 0) {
+ // reading from the unbuffered stream returned nothing
+ // so we have nothing left to read.
+ return dest_index;
+ }
+ self.tail += n;
+ } else {
+ @memcpy(dest[dest_index..][0..written], self.buf[self.rp..][0..written]);
+ self.rp += written;
+ dest_index += written;
+ try self.flush_(lookahead_size);
+ }
+ }
+ return dest.len;
+ }
+
+ /// Move lookahead_size bytes to the buffer start.
+ fn preserveLookahead(self: *Self) !void {
+ assert(self.tail == self.rp);
+ if (lookahead_size == 0) {
+ // Flush is called on each read so wp must follow rp when lookahead_size == 0.
+ assert(self.wp == self.rp);
+ // Nothing to preserve rewind pointer to the buffer start
+ self.rp = 0;
+ self.wp = 0;
+ self.tail = 0;
+ return;
+ }
+ if (self.tail <= lookahead_size) {
+ // There is still palce in the buffer, append to buffer from tail position.
+ return;
+ }
+ try self.flush_(lookahead_size);
+ const head = self.tail - lookahead_size;
+ // Preserve head..tail at the start of the buffer.
+ std.mem.copyForwards(u8, self.buf[0..lookahead_size], self.buf[head..self.tail]);
+ self.wp -= head;
+ assert(self.wp <= lookahead_size);
+ self.rp = lookahead_size;
+ self.tail = lookahead_size;
+ }
+
+ /// Flush to the output all but lookahead size bytes.
+ fn flush_(self: *Self, lookahead: usize) !void {
+ if (self.rp <= self.wp + lookahead) return;
+ const new_wp = self.rp - lookahead;
+ try self.output.writeAll(self.buf[self.wp..new_wp]);
+ self.wp = new_wp;
+ }
+
+ /// Flush to the output all consumed bytes.
+ pub fn flush(self: *Self) !void {
+ try self.flush_(0);
+ }
+
+ /// Put back some bytes to be consumed again. Usefull when we overshoot
+ /// reading and want to return that overshoot bytes. Can return maximum
+ /// of lookahead_size number of bytes.
+ pub fn putBack(self: *Self, n: usize) void {
+ assert(n <= lookahead_size and n <= self.rp);
+ self.rp -= n;
+ }
+
+ pub fn reader(self: *Self) Reader {
+ return .{ .context = self };
+ }
+ };
+}
+
+pub fn bufferedTee(
+ comptime buffer_size: usize,
+ comptime lookahead_size: usize,
+ input: anytype,
+ output: anytype,
+) BufferedTee(
+ buffer_size,
+ lookahead_size,
+ @TypeOf(input),
+ @TypeOf(output),
+) {
+ return .{ .input = input, .output = output };
+}
+
+// Running test from std.io.BufferedReader on BufferedTee
+// It should act as BufferedReader for consumer.
+
+fn BufferedReader(comptime buffer_size: usize, comptime ReaderType: type) type {
+ return BufferedTee(buffer_size, 0, ReaderType, @TypeOf(io.null_writer));
+}
+
+fn bufferedReader(reader: anytype) BufferedReader(4096, @TypeOf(reader)) {
+ return .{
+ .input = reader,
+ .output = io.null_writer,
+ };
+}
+
+test "io.BufferedTee io.BufferedReader OneByte" {
+ const OneByteReadReader = struct {
+ str: []const u8,
+ curr: usize,
+
+ const Error = error{NoError};
+ const Self = @This();
+ const Reader = io.Reader(*Self, Error, read);
+
+ fn init(str: []const u8) Self {
+ return Self{
+ .str = str,
+ .curr = 0,
+ };
+ }
+
+ fn read(self: *Self, dest: []u8) Error!usize {
+ if (self.str.len <= self.curr or dest.len == 0)
+ return 0;
+
+ dest[0] = self.str[self.curr];
+ self.curr += 1;
+ return 1;
+ }
+
+ fn reader(self: *Self) Reader {
+ return .{ .context = self };
+ }
+ };
+
+ const str = "This is a test";
+ var one_byte_stream = OneByteReadReader.init(str);
+ var buf_reader = bufferedReader(one_byte_stream.reader());
+ const stream = buf_reader.reader();
+
+ const res = try stream.readAllAlloc(testing.allocator, str.len + 1);
+ defer testing.allocator.free(res);
+ try testing.expectEqualSlices(u8, str, res);
+}
+
+test "io.BufferedTee io.BufferedReader Block" {
+ const BlockReader = struct {
+ block: []const u8,
+ reads_allowed: usize,
+ curr_read: usize,
+
+ const Error = error{NoError};
+ const Self = @This();
+ const Reader = io.Reader(*Self, Error, read);
+
+ fn init(block: []const u8, reads_allowed: usize) Self {
+ return Self{
+ .block = block,
+ .reads_allowed = reads_allowed,
+ .curr_read = 0,
+ };
+ }
+
+ fn read(self: *Self, dest: []u8) Error!usize {
+ if (self.curr_read >= self.reads_allowed) return 0;
+ @memcpy(dest[0..self.block.len], self.block);
+
+ self.curr_read += 1;
+ return self.block.len;
+ }
+
+ fn reader(self: *Self) Reader {
+ return .{ .context = self };
+ }
+ };
+
+ const block = "0123";
+
+ // len out == block
+ {
+ var test_buf_reader: BufferedReader(4, BlockReader) = .{
+ .input = BlockReader.init(block, 2),
+ .output = io.null_writer,
+ };
+ var out_buf: [4]u8 = undefined;
+ _ = try test_buf_reader.read(&out_buf);
+ try testing.expectEqualSlices(u8, &out_buf, block);
+ _ = try test_buf_reader.read(&out_buf);
+ try testing.expectEqualSlices(u8, &out_buf, block);
+ try testing.expectEqual(try test_buf_reader.read(&out_buf), 0);
+ }
+
+ // len out < block
+ {
+ var test_buf_reader: BufferedReader(4, BlockReader) = .{
+ .input = BlockReader.init(block, 2),
+ .output = io.null_writer,
+ };
+ var out_buf: [3]u8 = undefined;
+ _ = try test_buf_reader.read(&out_buf);
+ try testing.expectEqualSlices(u8, &out_buf, "012");
+ _ = try test_buf_reader.read(&out_buf);
+ try testing.expectEqualSlices(u8, &out_buf, "301");
+ const n = try test_buf_reader.read(&out_buf);
+ try testing.expectEqualSlices(u8, out_buf[0..n], "23");
+ try testing.expectEqual(try test_buf_reader.read(&out_buf), 0);
+ }
+
+ // len out > block
+ {
+ var test_buf_reader: BufferedReader(4, BlockReader) = .{
+ .input = BlockReader.init(block, 2),
+ .output = io.null_writer,
+ };
+ var out_buf: [5]u8 = undefined;
+ _ = try test_buf_reader.read(&out_buf);
+ try testing.expectEqualSlices(u8, &out_buf, "01230");
+ const n = try test_buf_reader.read(&out_buf);
+ try testing.expectEqualSlices(u8, out_buf[0..n], "123");
+ try testing.expectEqual(try test_buf_reader.read(&out_buf), 0);
+ }
+
+ // len out == 0
+ {
+ var test_buf_reader: BufferedReader(4, BlockReader) = .{
+ .input = BlockReader.init(block, 2),
+ .output = io.null_writer,
+ };
+ var out_buf: [0]u8 = undefined;
+ _ = try test_buf_reader.read(&out_buf);
+ try testing.expectEqualSlices(u8, &out_buf, "");
+ }
+
+ // len bufreader buf > block
+ {
+ var test_buf_reader: BufferedReader(5, BlockReader) = .{
+ .input = BlockReader.init(block, 2),
+ .output = io.null_writer,
+ };
+ var out_buf: [4]u8 = undefined;
+ _ = try test_buf_reader.read(&out_buf);
+ try testing.expectEqualSlices(u8, &out_buf, block);
+ _ = try test_buf_reader.read(&out_buf);
+ try testing.expectEqualSlices(u8, &out_buf, block);
+ try testing.expectEqual(try test_buf_reader.read(&out_buf), 0);
+ }
+}
+
+test "io.BufferedTee with zero lookahead" {
+ // output has same bytes as consumer
+ const data = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } ** 12;
+ var in = io.fixedBufferStream(&data);
+ var out = std.ArrayList(u8).init(testing.allocator);
+ defer out.deinit();
+
+ var bt = bufferedTee(8, 0, in.reader(), out.writer());
+
+ var buf: [16]u8 = undefined;
+ var read_len: usize = 0;
+ for (0..buf.len) |i| {
+ const n = try bt.read(buf[0..i]);
+ try testing.expectEqual(i, n);
+ read_len += i;
+ try testing.expectEqual(read_len, out.items.len);
+ }
+}
+
+test "io.BufferedTee with lookahead" {
+ // output is lookahead bytes behind consumer
+ inline for (1..8) |lookahead| {
+ const data = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } ** 12;
+ var in = io.fixedBufferStream(&data);
+ var out = std.ArrayList(u8).init(testing.allocator);
+ defer out.deinit();
+
+ var bt = bufferedTee(8, lookahead, in.reader(), out.writer());
+ var buf: [16]u8 = undefined;
+
+ var read_len: usize = 0;
+ for (1..buf.len) |i| {
+ const n = try bt.read(buf[0..i]);
+ try testing.expectEqual(i, n);
+ read_len += i;
+ const out_len = if (read_len < lookahead) 0 else read_len - lookahead;
+ try testing.expectEqual(out_len, out.items.len);
+ }
+ try testing.expectEqual(read_len, out.items.len + lookahead);
+ try bt.flush();
+ try testing.expectEqual(read_len, out.items.len);
+ }
+}
+
+test "io.BufferedTee internal state" {
+ const data = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } ** 2;
+ var in = io.fixedBufferStream(&data);
+ var out = std.ArrayList(u8).init(testing.allocator);
+ defer out.deinit();
+
+ var bt = bufferedTee(8, 4, in.reader(), out.writer());
+
+ var buf: [16]u8 = undefined;
+ var n = try bt.read(buf[0..3]);
+ try testing.expectEqual(3, n);
+ try testing.expectEqualSlices(u8, data[0..3], buf[0..n]);
+ try testing.expectEqual(8, bt.tail);
+ try testing.expectEqual(3, bt.rp);
+ try testing.expectEqual(0, out.items.len);
+
+ n = try bt.read(buf[0..6]);
+ try testing.expectEqual(6, n);
+ try testing.expectEqualSlices(u8, data[3..9], buf[0..n]);
+ try testing.expectEqual(8, bt.tail);
+ try testing.expectEqual(5, bt.rp);
+ try testing.expectEqualSlices(u8, data[4..12], &bt.buf);
+ try testing.expectEqual(5, out.items.len);
+
+ n = try bt.read(buf[0..9]);
+ try testing.expectEqual(9, n);
+ try testing.expectEqualSlices(u8, data[9..18], buf[0..n]);
+ try testing.expectEqual(8, bt.tail);
+ try testing.expectEqual(6, bt.rp);
+ try testing.expectEqualSlices(u8, data[12..20], &bt.buf);
+ try testing.expectEqual(14, out.items.len);
+
+ try bt.flush();
+ try testing.expectEqual(18, out.items.len);
+
+ bt.putBack(4);
+ n = try bt.read(buf[0..4]);
+ try testing.expectEqual(4, n);
+ try testing.expectEqualSlices(u8, data[14..18], buf[0..n]);
+
+ try testing.expectEqual(18, out.items.len);
+ try bt.flush();
+ try testing.expectEqual(18, out.items.len);
+}
diff --git a/lib/std/io/fixed_buffer_stream.zig b/lib/std/io/fixed_buffer_stream.zig
index f62ac415a4..8b64923fe7 100644
--- a/lib/std/io/fixed_buffer_stream.zig
+++ b/lib/std/io/fixed_buffer_stream.zig
@@ -62,11 +62,7 @@ pub fn FixedBufferStream(comptime Buffer: type) type {
if (bytes.len == 0) return 0;
if (self.pos >= self.buffer.len) return error.NoSpaceLeft;
- const n = if (self.pos + bytes.len <= self.buffer.len)
- bytes.len
- else
- self.buffer.len - self.pos;
-
+ const n = @min(self.buffer.len - self.pos, bytes.len);
@memcpy(self.buffer[self.pos..][0..n], bytes[0..n]);
self.pos += n;
@@ -76,7 +72,7 @@ pub fn FixedBufferStream(comptime Buffer: type) type {
}
pub fn seekTo(self: *Self, pos: u64) SeekError!void {
- self.pos = if (std.math.cast(usize, pos)) |x| @min(self.buffer.len, x) else self.buffer.len;
+ self.pos = @min(std.math.lossyCast(usize, pos), self.buffer.len);
}
pub fn seekBy(self: *Self, amt: i64) SeekError!void {
diff --git a/lib/std/io/test.zig b/lib/std/io/test.zig
index 79ea020092..2f9464eef4 100644
--- a/lib/std/io/test.zig
+++ b/lib/std/io/test.zig
@@ -1,6 +1,6 @@
const std = @import("std");
const io = std.io;
-const DefaultPrng = std.rand.DefaultPrng;
+const DefaultPrng = std.Random.DefaultPrng;
const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual;
const expectError = std.testing.expectError;
diff --git a/lib/std/json/stringify.zig b/lib/std/json/stringify.zig
index 37fd7efb17..add35e80bf 100644
--- a/lib/std/json/stringify.zig
+++ b/lib/std/json/stringify.zig
@@ -88,7 +88,7 @@ pub fn stringifyAlloc(
allocator: Allocator,
value: anytype,
options: StringifyOptions,
-) error{OutOfMemory}![]const u8 {
+) error{OutOfMemory}![]u8 {
var list = std.ArrayList(u8).init(allocator);
errdefer list.deinit();
try stringifyArbitraryDepth(allocator, value, options, list.writer());
diff --git a/lib/std/log.zig b/lib/std/log.zig
index 1c5b60ff1a..0562d09c51 100644
--- a/lib/std/log.zig
+++ b/lib/std/log.zig
@@ -18,12 +18,12 @@
//! ```
//! const std = @import("std");
//!
-//! pub const std_options = struct {
+//! pub const std_options = .{
//! // Set the log level to info
-//! pub const log_level = .info;
+//! .log_level = .info,
//!
//! // Define logFn to override the std implementation
-//! pub const logFn = myLogFn;
+//! .logFn = myLogFn,
//! };
//!
//! pub fn myLogFn(
@@ -149,9 +149,15 @@ pub fn defaultLog(
const level_txt = comptime message_level.asText();
const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): ";
const stderr = std.io.getStdErr().writer();
+ var bw = std.io.bufferedWriter(stderr);
+ const writer = bw.writer();
+
std.debug.getStderrMutex().lock();
defer std.debug.getStderrMutex().unlock();
- nosuspend stderr.print(level_txt ++ prefix2 ++ format ++ "\n", args) catch return;
+ nosuspend {
+ writer.print(level_txt ++ prefix2 ++ format ++ "\n", args) catch return;
+ bw.flush() catch return;
+ }
}
/// Returns a scoped logging namespace that logs all messages using the scope
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index e1f423e3c1..558ada3374 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -918,7 +918,6 @@ test "big.int mul multi-single" {
test "big.int mul multi-multi" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
var op1: u256 = 0x998888efefefefefefefef;
var op2: u256 = 0x333000abababababababab;
@@ -1042,7 +1041,6 @@ test "big.int mulWrap single-single signed" {
test "big.int mulWrap multi-multi unsigned" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
var op1: u256 = 0x998888efefefefefefefef;
var op2: u256 = 0x333000abababababababab;
@@ -1698,7 +1696,6 @@ test "big.int div multi-multi (2 branch)" {
test "big.int div multi-multi (3.1/3.3 branch)" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
var a = try Managed.initSet(testing.allocator, 0x11111111111111111111111111111111111111111111111111111111111111);
defer a.deinit();
@@ -2187,7 +2184,6 @@ test "big.int bitNotWrap more than two limbs" {
// This test requires int sizes greater than 128 bits.
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
// LLVM: unexpected runtime library name: __umodei4
@@ -2537,7 +2533,6 @@ test "big.int gcd non-one large" {
test "big.int gcd large multi-limb result" {
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest;
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
var a = try Managed.initSet(testing.allocator, 0x12345678123456781234567812345678123456781234567812345678);
defer a.deinit();
diff --git a/lib/std/math/big/rational.zig b/lib/std/math/big/rational.zig
index 5529a6ef26..b095d005f0 100644
--- a/lib/std/math/big/rational.zig
+++ b/lib/std/math/big/rational.zig
@@ -594,7 +594,7 @@ test "big.rational toFloat" {
test "big.rational set/to Float round-trip" {
var a = try Rational.init(testing.allocator);
defer a.deinit();
- var prng = std.rand.DefaultPrng.init(0x5EED);
+ var prng = std.Random.DefaultPrng.init(0x5EED);
const random = prng.random();
var i: usize = 0;
while (i < 512) : (i += 1) {
diff --git a/lib/std/math/log10.zig b/lib/std/math/log10.zig
index 0619fdbc17..5b3ee8a840 100644
--- a/lib/std/math/log10.zig
+++ b/lib/std/math/log10.zig
@@ -1,10 +1,6 @@
const std = @import("../std.zig");
const builtin = @import("builtin");
-const math = std.math;
const testing = std.testing;
-const maxInt = std.math.maxInt;
-const assert = std.debug.assert;
-const Log2Int = std.math.Log2Int;
/// Returns the base-10 logarithm of x.
///
@@ -38,13 +34,13 @@ pub fn log10(x: anytype) @TypeOf(x) {
/// Return the log base 10 of integer value x, rounding down to the
/// nearest integer.
-pub fn log10_int(x: anytype) Log2Int(@TypeOf(x)) {
+pub fn log10_int(x: anytype) std.math.Log2Int(@TypeOf(x)) {
const T = @TypeOf(x);
- const OutT = Log2Int(T);
+ const OutT = std.math.Log2Int(T);
if (@typeInfo(T) != .Int or @typeInfo(T).Int.signedness != .unsigned)
@compileError("log10_int requires an unsigned integer, found " ++ @typeName(T));
- assert(x != 0);
+ std.debug.assert(x != 0);
const bit_size = @typeInfo(T).Int.bits;
@@ -133,18 +129,7 @@ inline fn less_than_5(x: u32) u32 {
return (((x + C1) & (x + C2)) ^ ((x + C3) & (x + C4))) >> 17;
}
-fn oldlog10(x: anytype) u8 {
- return @as(u8, @intFromFloat(@log10(@as(f64, @floatFromInt(x)))));
-}
-
-test "oldlog10 doesn't work" {
- try testing.expect(14 != oldlog10(pow10(15) - 1));
-
- // log10(10**15 -1) should indeed be 14
- try testing.expect(14 == log10_int(@as(u64, pow10(15) - 1)));
-}
-
-test "log10_int vs old implementation" {
+test log10_int {
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
@@ -152,53 +137,22 @@ test "log10_int vs old implementation" {
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
if (builtin.zig_backend == .stage2_llvm and comptime builtin.target.isWasm()) return error.SkipZigTest; // TODO
- const int_types = .{ u8, u16, u32, u64, u128 };
-
- inline for (int_types) |T| {
- const last = @min(maxInt(T), 100_000);
- for (1..last) |i| {
- const x = @as(T, @intCast(i));
- try testing.expectEqual(oldlog10(x), log10_int(x));
- }
-
- const max_int: T = maxInt(T);
- try testing.expectEqual(oldlog10(max_int), log10_int(max_int));
- }
-}
-
-test "log10_int close to powers of 10" {
- if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_llvm and comptime builtin.target.isWasm()) return error.SkipZigTest; // TODO
- if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
- const int_types = .{ u8, u16, u32, u64, u128, u256, u512 };
- const max_log_values: [7]usize = .{ 2, 4, 9, 19, 38, 77, 154 };
-
- inline for (int_types, max_log_values) |T, expected_max_ilog| {
- const max_val: T = maxInt(T);
-
- try testing.expectEqual(expected_max_ilog, log10_int(max_val));
-
- for (0..(expected_max_ilog + 1)) |idx| {
- const i = @as(T, @intCast(idx));
- const p: T = try math.powi(T, 10, i);
-
- const b = @as(Log2Int(T), @intCast(i));
-
- if (p >= 10) {
- try testing.expectEqual(b - 1, log10_int(p - 9));
- try testing.expectEqual(b - 1, log10_int(p - 1));
- }
-
- try testing.expectEqual(b, log10_int(p));
- try testing.expectEqual(b, log10_int(p + 1));
- if (p >= 10) {
- try testing.expectEqual(b, log10_int(p + 9));
+ inline for (
+ .{ u8, u16, u32, u64, u128, u256, u512 },
+ .{ 2, 4, 9, 19, 38, 77, 154 },
+ ) |T, max_exponent| {
+ for (0..max_exponent + 1) |exponent_usize| {
+ const exponent: std.math.Log2Int(T) = @intCast(exponent_usize);
+ const power_of_ten = try std.math.powi(T, 10, exponent);
+
+ if (exponent > 0) {
+ try testing.expectEqual(exponent - 1, log10_int(power_of_ten - 9));
+ try testing.expectEqual(exponent - 1, log10_int(power_of_ten - 1));
}
+ try testing.expectEqual(exponent, log10_int(power_of_ten));
+ try testing.expectEqual(exponent, log10_int(power_of_ten + 1));
+ try testing.expectEqual(exponent, log10_int(power_of_ten + 8));
}
+ try testing.expectEqual(max_exponent, log10_int(@as(T, std.math.maxInt(T))));
}
}
diff --git a/lib/std/math/nextafter.zig b/lib/std/math/nextafter.zig
index 9008858a62..7d2396ed26 100644
--- a/lib/std/math/nextafter.zig
+++ b/lib/std/math/nextafter.zig
@@ -102,8 +102,6 @@ fn nextAfterFloat(comptime T: type, x: T, y: T) T {
}
test "math.nextAfter.int" {
- if (@import("builtin").zig_backend == .stage2_x86_64) return error.SkipZigTest;
-
try expect(nextAfter(i0, 0, 0) == 0);
try expect(nextAfter(u0, 0, 0) == 0);
try expect(nextAfter(i1, 0, 0) == 0);
diff --git a/lib/std/mem.zig b/lib/std/mem.zig
index 55c51d5959..fc0c226894 100644
--- a/lib/std/mem.zig
+++ b/lib/std/mem.zig
@@ -1338,7 +1338,7 @@ pub fn indexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize
pub fn lastIndexOfLinear(comptime T: type, haystack: []const T, needle: []const T) ?usize {
var i: usize = haystack.len - needle.len;
while (true) : (i -= 1) {
- if (mem.eql(T, haystack[i .. i + needle.len], needle)) return i;
+ if (mem.eql(T, haystack[i..][0..needle.len], needle)) return i;
if (i == 0) return null;
}
}
@@ -1346,14 +1346,35 @@ pub fn lastIndexOfLinear(comptime T: type, haystack: []const T, needle: []const
/// Consider using `indexOfPos` instead of this, which will automatically use a
/// more sophisticated algorithm on larger inputs.
pub fn indexOfPosLinear(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize {
+ if (needle.len > haystack.len) return null;
var i: usize = start_index;
const end = haystack.len - needle.len;
while (i <= end) : (i += 1) {
- if (eql(T, haystack[i .. i + needle.len], needle)) return i;
+ if (eql(T, haystack[i..][0..needle.len], needle)) return i;
}
return null;
}
+test indexOfPosLinear {
+ try testing.expectEqual(0, indexOfPosLinear(u8, "", 0, ""));
+ try testing.expectEqual(0, indexOfPosLinear(u8, "123", 0, ""));
+
+ try testing.expectEqual(null, indexOfPosLinear(u8, "", 0, "1"));
+ try testing.expectEqual(0, indexOfPosLinear(u8, "1", 0, "1"));
+ try testing.expectEqual(null, indexOfPosLinear(u8, "2", 0, "1"));
+ try testing.expectEqual(1, indexOfPosLinear(u8, "21", 0, "1"));
+ try testing.expectEqual(null, indexOfPosLinear(u8, "222", 0, "1"));
+
+ try testing.expectEqual(null, indexOfPosLinear(u8, "", 0, "12"));
+ try testing.expectEqual(null, indexOfPosLinear(u8, "1", 0, "12"));
+ try testing.expectEqual(null, indexOfPosLinear(u8, "2", 0, "12"));
+ try testing.expectEqual(0, indexOfPosLinear(u8, "12", 0, "12"));
+ try testing.expectEqual(null, indexOfPosLinear(u8, "21", 0, "12"));
+ try testing.expectEqual(1, indexOfPosLinear(u8, "212", 0, "12"));
+ try testing.expectEqual(0, indexOfPosLinear(u8, "122", 0, "12"));
+ try testing.expectEqual(1, indexOfPosLinear(u8, "212112", 0, "12"));
+}
+
fn boyerMooreHorspoolPreprocessReverse(pattern: []const u8, table: *[256]usize) void {
for (table) |*c| {
c.* = pattern.len;
@@ -4423,7 +4444,7 @@ test "read/write(Var)PackedInt" {
const foreign_endian: Endian = if (native_endian == .big) .little else .big;
const expect = std.testing.expect;
- var prng = std.rand.DefaultPrng.init(1234);
+ var prng = std.Random.DefaultPrng.init(1234);
const random = prng.random();
@setEvalBranchQuota(10_000);
diff --git a/lib/std/meta.zig b/lib/std/meta.zig
index 0df80359ac..17df0650f3 100644
--- a/lib/std/meta.zig
+++ b/lib/std/meta.zig
@@ -460,13 +460,12 @@ test "std.meta.FieldType" {
try testing.expect(FieldType(U, .d) == *const u8);
}
-pub fn fieldNames(comptime T: type) *const [fields(T).len][]const u8 {
+pub fn fieldNames(comptime T: type) *const [fields(T).len][:0]const u8 {
return comptime blk: {
const fieldInfos = fields(T);
- var names: [fieldInfos.len][]const u8 = undefined;
- for (fieldInfos, 0..) |field, i| {
- names[i] = field.name;
- }
+ var names: [fieldInfos.len][:0]const u8 = undefined;
+ // This concat can be removed with the next zig1 update.
+ for (&names, fieldInfos) |*name, field| name.* = field.name ++ "";
break :blk &names;
};
}
@@ -1287,5 +1286,6 @@ test "hasUniqueRepresentation" {
try testing.expect(!hasUniqueRepresentation([]u8));
try testing.expect(!hasUniqueRepresentation([]const u8));
- try testing.expect(hasUniqueRepresentation(@Vector(4, u16)));
+ try testing.expect(hasUniqueRepresentation(@Vector(std.simd.suggestVectorLength(u8) orelse 1, u8)));
+ try testing.expect(@sizeOf(@Vector(3, u8)) == 3 or !hasUniqueRepresentation(@Vector(3, u8)));
}
diff --git a/lib/std/net.zig b/lib/std/net.zig
index 28967cba55..66b90867c6 100644
--- a/lib/std/net.zig
+++ b/lib/std/net.zig
@@ -4,15 +4,17 @@ const assert = std.debug.assert;
const net = @This();
const mem = std.mem;
const os = std.os;
+const posix = std.posix;
const fs = std.fs;
const io = std.io;
const native_endian = builtin.target.cpu.arch.endian();
// Windows 10 added support for unix sockets in build 17063, redstone 4 is the
// first release to support them.
-pub const has_unix_sockets = @hasDecl(os.sockaddr, "un") and
- (builtin.target.os.tag != .windows or
- builtin.os.version_range.windows.isAtLeast(.win10_rs4) orelse false);
+pub const has_unix_sockets = switch (builtin.os.tag) {
+ .windows => builtin.os.version_range.windows.isAtLeast(.win10_rs4) orelse false,
+ else => true,
+};
pub const IPParseError = error{
Overflow,
@@ -122,7 +124,7 @@ pub const Address = extern union {
@memset(&sock_addr.path, 0);
@memcpy(sock_addr.path[0..path.len], path);
- return Address{ .un = sock_addr };
+ return .{ .un = sock_addr };
}
/// Returns the port in native endian.
@@ -206,6 +208,60 @@ pub const Address = extern union {
else => unreachable,
}
}
+
+ pub const ListenError = posix.SocketError || posix.BindError || posix.ListenError ||
+ posix.SetSockOptError || posix.GetSockNameError;
+
+ pub const ListenOptions = struct {
+ /// How many connections the kernel will accept on the application's behalf.
+ /// If more than this many connections pool in the kernel, clients will start
+ /// seeing "Connection refused".
+ kernel_backlog: u31 = 128,
+ /// Sets SO_REUSEADDR and SO_REUSEPORT on POSIX.
+ /// Sets SO_REUSEADDR on Windows, which is roughly equivalent.
+ reuse_address: bool = false,
+ /// Deprecated. Does the same thing as reuse_address.
+ reuse_port: bool = false,
+ force_nonblocking: bool = false,
+ };
+
+ /// The returned `Server` has an open `stream`.
+ pub fn listen(address: Address, options: ListenOptions) ListenError!Server {
+ const nonblock: u32 = if (options.force_nonblocking) posix.SOCK.NONBLOCK else 0;
+ const sock_flags = posix.SOCK.STREAM | posix.SOCK.CLOEXEC | nonblock;
+ const proto: u32 = if (address.any.family == posix.AF.UNIX) 0 else posix.IPPROTO.TCP;
+
+ const sockfd = try posix.socket(address.any.family, sock_flags, proto);
+ var s: Server = .{
+ .listen_address = undefined,
+ .stream = .{ .handle = sockfd },
+ };
+ errdefer s.stream.close();
+
+ if (options.reuse_address or options.reuse_port) {
+ try posix.setsockopt(
+ sockfd,
+ posix.SOL.SOCKET,
+ posix.SO.REUSEADDR,
+ &mem.toBytes(@as(c_int, 1)),
+ );
+ switch (builtin.os.tag) {
+ .windows => {},
+ else => try posix.setsockopt(
+ sockfd,
+ posix.SOL.SOCKET,
+ posix.SO.REUSEPORT,
+ &mem.toBytes(@as(c_int, 1)),
+ ),
+ }
+ }
+
+ var socklen = address.getOsSockLen();
+ try posix.bind(sockfd, &address.any, socklen);
+ try posix.listen(sockfd, options.kernel_backlog);
+ try posix.getsockname(sockfd, &s.listen_address.any, &socklen);
+ return s;
+ }
};
pub const Ip4Address = extern struct {
@@ -651,33 +707,25 @@ pub const Ip6Address = extern struct {
};
pub fn connectUnixSocket(path: []const u8) !Stream {
- const opt_non_block = if (std.io.is_async) os.SOCK.NONBLOCK else 0;
+ const opt_non_block = 0;
const sockfd = try os.socket(
os.AF.UNIX,
os.SOCK.STREAM | os.SOCK.CLOEXEC | opt_non_block,
0,
);
- errdefer os.closeSocket(sockfd);
+ errdefer Stream.close(.{ .handle = sockfd });
var addr = try std.net.Address.initUnix(path);
+ try os.connect(sockfd, &addr.any, addr.getOsSockLen());
- if (std.io.is_async) {
- const loop = std.event.Loop.instance orelse return error.WouldBlock;
- try loop.connect(sockfd, &addr.any, addr.getOsSockLen());
- } else {
- try os.connect(sockfd, &addr.any, addr.getOsSockLen());
- }
-
- return Stream{
- .handle = sockfd,
- };
+ return Stream{ .handle = sockfd };
}
fn if_nametoindex(name: []const u8) IPv6InterfaceError!u32 {
if (builtin.target.os.tag == .linux) {
var ifr: os.ifreq = undefined;
const sockfd = try os.socket(os.AF.UNIX, os.SOCK.DGRAM | os.SOCK.CLOEXEC, 0);
- defer os.closeSocket(sockfd);
+ defer Stream.close(.{ .handle = sockfd });
@memcpy(ifr.ifrn.name[0..name.len], name);
ifr.ifrn.name[name.len] = 0;
@@ -742,18 +790,13 @@ pub fn tcpConnectToHost(allocator: mem.Allocator, name: []const u8, port: u16) T
pub const TcpConnectToAddressError = std.os.SocketError || std.os.ConnectError;
pub fn tcpConnectToAddress(address: Address) TcpConnectToAddressError!Stream {
- const nonblock = if (std.io.is_async) os.SOCK.NONBLOCK else 0;
+ const nonblock = 0;
const sock_flags = os.SOCK.STREAM | nonblock |
(if (builtin.target.os.tag == .windows) 0 else os.SOCK.CLOEXEC);
const sockfd = try os.socket(address.any.family, sock_flags, os.IPPROTO.TCP);
- errdefer os.closeSocket(sockfd);
+ errdefer Stream.close(.{ .handle = sockfd });
- if (std.io.is_async) {
- const loop = std.event.Loop.instance orelse return error.WouldBlock;
- try loop.connect(sockfd, &address.any, address.getOsSockLen());
- } else {
- try os.connect(sockfd, &address.any, address.getOsSockLen());
- }
+ try os.connect(sockfd, &address.any, address.getOsSockLen());
return Stream{ .handle = sockfd };
}
@@ -1081,7 +1124,7 @@ fn linuxLookupName(
var prefixlen: i32 = 0;
const sock_flags = os.SOCK.DGRAM | os.SOCK.CLOEXEC;
if (os.socket(addr.addr.any.family, sock_flags, os.IPPROTO.UDP)) |fd| syscalls: {
- defer os.closeSocket(fd);
+ defer Stream.close(.{ .handle = fd });
os.connect(fd, da, dalen) catch break :syscalls;
key |= DAS_USABLE;
os.getsockname(fd, sa, &salen) catch break :syscalls;
@@ -1566,7 +1609,7 @@ fn resMSendRc(
},
else => |e| return e,
};
- defer os.closeSocket(fd);
+ defer Stream.close(.{ .handle = fd });
// Past this point, there are no errors. Each individual query will
// yield either no reply (indicated by zero length) or an answer
@@ -1618,11 +1661,7 @@ fn resMSendRc(
if (answers[i].len == 0) {
var j: usize = 0;
while (j < ns.len) : (j += 1) {
- if (std.io.is_async) {
- _ = std.event.Loop.instance.?.sendto(fd, queries[i], os.MSG.NOSIGNAL, &ns[j].any, sl) catch undefined;
- } else {
- _ = os.sendto(fd, queries[i], os.MSG.NOSIGNAL, &ns[j].any, sl) catch undefined;
- }
+ _ = os.sendto(fd, queries[i], os.MSG.NOSIGNAL, &ns[j].any, sl) catch undefined;
}
}
}
@@ -1637,10 +1676,7 @@ fn resMSendRc(
while (true) {
var sl_copy = sl;
- const rlen = if (std.io.is_async)
- std.event.Loop.instance.?.recvfrom(fd, answer_bufs[next], 0, &sa.any, &sl_copy) catch break
- else
- os.recvfrom(fd, answer_bufs[next], 0, &sa.any, &sl_copy) catch break;
+ const rlen = os.recvfrom(fd, answer_bufs[next], 0, &sa.any, &sl_copy) catch break;
// Ignore non-identifiable packets
if (rlen < 4) continue;
@@ -1666,11 +1702,7 @@ fn resMSendRc(
0, 3 => {},
2 => if (servfail_retry != 0) {
servfail_retry -= 1;
- if (std.io.is_async) {
- _ = std.event.Loop.instance.?.sendto(fd, queries[i], os.MSG.NOSIGNAL, &ns[j].any, sl) catch undefined;
- } else {
- _ = os.sendto(fd, queries[i], os.MSG.NOSIGNAL, &ns[j].any, sl) catch undefined;
- }
+ _ = os.sendto(fd, queries[i], os.MSG.NOSIGNAL, &ns[j].any, sl) catch undefined;
},
else => continue,
}
@@ -1753,13 +1785,15 @@ fn dnsParseCallback(ctx: dpc_ctx, rr: u8, data: []const u8, packet: []const u8)
}
pub const Stream = struct {
- // Underlying socket descriptor.
- // Note that on some platforms this may not be interchangeable with a
- // regular files descriptor.
- handle: os.socket_t,
-
- pub fn close(self: Stream) void {
- os.closeSocket(self.handle);
+ /// Underlying platform-defined type which may or may not be
+ /// interchangeable with a file system file descriptor.
+ handle: posix.socket_t,
+
+ pub fn close(s: Stream) void {
+ switch (builtin.os.tag) {
+ .windows => std.os.windows.closesocket(s.handle) catch unreachable,
+ else => posix.close(s.handle),
+ }
}
pub const ReadError = os.ReadError;
@@ -1778,14 +1812,10 @@ pub const Stream = struct {
pub fn read(self: Stream, buffer: []u8) ReadError!usize {
if (builtin.os.tag == .windows) {
- return os.windows.ReadFile(self.handle, buffer, null, io.default_mode);
+ return os.windows.ReadFile(self.handle, buffer, null);
}
- if (std.io.is_async) {
- return std.event.Loop.instance.?.read(self.handle, buffer, false);
- } else {
- return os.read(self.handle, buffer);
- }
+ return os.read(self.handle, buffer);
}
pub fn readv(s: Stream, iovecs: []const os.iovec) ReadError!usize {
@@ -1793,7 +1823,7 @@ pub const Stream = struct {
// TODO improve this to use ReadFileScatter
if (iovecs.len == 0) return @as(usize, 0);
const first = iovecs[0];
- return os.windows.ReadFile(s.handle, first.iov_base[0..first.iov_len], null, io.default_mode);
+ return os.windows.ReadFile(s.handle, first.iov_base[0..first.iov_len], null);
}
return os.readv(s.handle, iovecs);
@@ -1827,14 +1857,10 @@ pub const Stream = struct {
/// use non-blocking I/O.
pub fn write(self: Stream, buffer: []const u8) WriteError!usize {
if (builtin.os.tag == .windows) {
- return os.windows.WriteFile(self.handle, buffer, null, io.default_mode);
+ return os.windows.WriteFile(self.handle, buffer, null);
}
- if (std.io.is_async) {
- return std.event.Loop.instance.?.write(self.handle, buffer, false);
- } else {
- return os.write(self.handle, buffer);
- }
+ return os.write(self.handle, buffer);
}
pub fn writeAll(self: Stream, bytes: []const u8) WriteError!void {
@@ -1847,15 +1873,7 @@ pub const Stream = struct {
/// See https://github.com/ziglang/zig/issues/7699
/// See equivalent function: `std.fs.File.writev`.
pub fn writev(self: Stream, iovecs: []const os.iovec_const) WriteError!usize {
- if (std.io.is_async) {
- // TODO improve to actually take advantage of writev syscall, if available.
- if (iovecs.len == 0) return 0;
- const first_buffer = iovecs[0].iov_base[0..iovecs[0].iov_len];
- try self.write(first_buffer);
- return first_buffer.len;
- } else {
- return os.writev(self.handle, iovecs);
- }
+ return os.writev(self.handle, iovecs);
}
/// The `iovecs` parameter is mutable because this function needs to mutate the fields in
@@ -1879,163 +1897,38 @@ pub const Stream = struct {
}
};
-pub const StreamServer = struct {
- /// Copied from `Options` on `init`.
- kernel_backlog: u31,
- reuse_address: bool,
- reuse_port: bool,
- force_nonblocking: bool,
-
- /// `undefined` until `listen` returns successfully.
+pub const Server = struct {
listen_address: Address,
+ stream: std.net.Stream,
- sockfd: ?os.socket_t,
-
- pub const Options = struct {
- /// How many connections the kernel will accept on the application's behalf.
- /// If more than this many connections pool in the kernel, clients will start
- /// seeing "Connection refused".
- kernel_backlog: u31 = 128,
-
- /// Enable SO.REUSEADDR on the socket.
- reuse_address: bool = false,
-
- /// Enable SO.REUSEPORT on the socket.
- reuse_port: bool = false,
-
- /// Force non-blocking mode.
- force_nonblocking: bool = false,
+ pub const Connection = struct {
+ stream: std.net.Stream,
+ address: Address,
};
- /// After this call succeeds, resources have been acquired and must
- /// be released with `deinit`.
- pub fn init(options: Options) StreamServer {
- return StreamServer{
- .sockfd = null,
- .kernel_backlog = options.kernel_backlog,
- .reuse_address = options.reuse_address,
- .reuse_port = options.reuse_port,
- .force_nonblocking = options.force_nonblocking,
- .listen_address = undefined,
- };
- }
-
- /// Release all resources. The `StreamServer` memory becomes `undefined`.
- pub fn deinit(self: *StreamServer) void {
- self.close();
- self.* = undefined;
- }
-
- pub fn listen(self: *StreamServer, address: Address) !void {
- const nonblock = if (std.io.is_async) os.SOCK.NONBLOCK else 0;
- const sock_flags = os.SOCK.STREAM | os.SOCK.CLOEXEC | nonblock;
- var use_sock_flags: u32 = sock_flags;
- if (self.force_nonblocking) use_sock_flags |= os.SOCK.NONBLOCK;
- const proto = if (address.any.family == os.AF.UNIX) @as(u32, 0) else os.IPPROTO.TCP;
-
- const sockfd = try os.socket(address.any.family, use_sock_flags, proto);
- self.sockfd = sockfd;
- errdefer {
- os.closeSocket(sockfd);
- self.sockfd = null;
- }
-
- if (self.reuse_address) {
- try os.setsockopt(
- sockfd,
- os.SOL.SOCKET,
- os.SO.REUSEADDR,
- &mem.toBytes(@as(c_int, 1)),
- );
- }
- if (@hasDecl(os.SO, "REUSEPORT") and self.reuse_port) {
- try os.setsockopt(
- sockfd,
- os.SOL.SOCKET,
- os.SO.REUSEPORT,
- &mem.toBytes(@as(c_int, 1)),
- );
- }
-
- var socklen = address.getOsSockLen();
- try os.bind(sockfd, &address.any, socklen);
- try os.listen(sockfd, self.kernel_backlog);
- try os.getsockname(sockfd, &self.listen_address.any, &socklen);
- }
-
- /// Stop listening. It is still necessary to call `deinit` after stopping listening.
- /// Calling `deinit` will automatically call `close`. It is safe to call `close` when
- /// not listening.
- pub fn close(self: *StreamServer) void {
- if (self.sockfd) |fd| {
- os.closeSocket(fd);
- self.sockfd = null;
- self.listen_address = undefined;
- }
+ pub fn deinit(s: *Server) void {
+ s.stream.close();
+ s.* = undefined;
}
- pub const AcceptError = error{
- ConnectionAborted,
-
- /// The per-process limit on the number of open file descriptors has been reached.
- ProcessFdQuotaExceeded,
-
- /// The system-wide limit on the total number of open files has been reached.
- SystemFdQuotaExceeded,
-
- /// Not enough free memory. This often means that the memory allocation
- /// is limited by the socket buffer limits, not by the system memory.
- SystemResources,
-
- /// Socket is not listening for new connections.
- SocketNotListening,
-
- ProtocolFailure,
-
- /// Socket is in non-blocking mode and there is no connection to accept.
- WouldBlock,
-
- /// Firewall rules forbid connection.
- BlockedByFirewall,
-
- FileDescriptorNotASocket,
-
- ConnectionResetByPeer,
+ pub const AcceptError = posix.AcceptError;
- NetworkSubsystemFailed,
-
- OperationNotSupported,
- } || os.UnexpectedError;
-
- pub const Connection = struct {
- stream: Stream,
- address: Address,
- };
-
- /// If this function succeeds, the returned `Connection` is a caller-managed resource.
- pub fn accept(self: *StreamServer) AcceptError!Connection {
+ /// Blocks until a client connects to the server. The returned `Connection` has
+ /// an open stream.
+ pub fn accept(s: *Server) AcceptError!Connection {
var accepted_addr: Address = undefined;
- var adr_len: os.socklen_t = @sizeOf(Address);
- const accept_result = blk: {
- if (std.io.is_async) {
- const loop = std.event.Loop.instance orelse return error.UnexpectedError;
- break :blk loop.accept(self.sockfd.?, &accepted_addr.any, &adr_len, os.SOCK.CLOEXEC);
- } else {
- break :blk os.accept(self.sockfd.?, &accepted_addr.any, &adr_len, os.SOCK.CLOEXEC);
- }
+ var addr_len: posix.socklen_t = @sizeOf(Address);
+ const fd = try posix.accept(s.stream.handle, &accepted_addr.any, &addr_len, posix.SOCK.CLOEXEC);
+ return .{
+ .stream = .{ .handle = fd },
+ .address = accepted_addr,
};
-
- if (accept_result) |fd| {
- return Connection{
- .stream = Stream{ .handle = fd },
- .address = accepted_addr,
- };
- } else |err| {
- return err;
- }
}
};
test {
_ = @import("net/test.zig");
+ _ = Server;
+ _ = Stream;
+ _ = Address;
}
diff --git a/lib/std/net/test.zig b/lib/std/net/test.zig
index 8cc2a09d10..3e316c5456 100644
--- a/lib/std/net/test.zig
+++ b/lib/std/net/test.zig
@@ -181,11 +181,9 @@ test "listen on a port, send bytes, receive bytes" {
// configured.
const localhost = try net.Address.parseIp("127.0.0.1", 0);
- var server = net.StreamServer.init(.{});
+ var server = try localhost.listen(.{});
defer server.deinit();
- try server.listen(localhost);
-
const S = struct {
fn clientFn(server_address: net.Address) !void {
const socket = try net.tcpConnectToAddress(server_address);
@@ -207,54 +205,6 @@ test "listen on a port, send bytes, receive bytes" {
try testing.expectEqualSlices(u8, "Hello world!", buf[0..n]);
}
-test "listen on a port, send bytes, receive bytes, async-only" {
- if (!std.io.is_async) return error.SkipZigTest;
-
- if (builtin.os.tag != .linux and !builtin.os.tag.isDarwin()) {
- // TODO build abstractions for other operating systems
- return error.SkipZigTest;
- }
-
- // TODO doing this at comptime crashed the compiler
- const localhost = try net.Address.parseIp("127.0.0.1", 0);
-
- var server = net.StreamServer.init(net.StreamServer.Options{});
- defer server.deinit();
- try server.listen(localhost);
-
- var server_frame = async testServer(&server);
- var client_frame = async testClient(server.listen_address);
-
- try await server_frame;
- try await client_frame;
-}
-
-test "listen on ipv4 try connect on ipv6 then ipv4" {
- if (!std.io.is_async) return error.SkipZigTest;
-
- if (builtin.os.tag != .linux and !builtin.os.tag.isDarwin()) {
- // TODO build abstractions for other operating systems
- return error.SkipZigTest;
- }
-
- // TODO doing this at comptime crashed the compiler
- const localhost = try net.Address.parseIp("127.0.0.1", 0);
-
- var server = net.StreamServer.init(net.StreamServer.Options{});
- defer server.deinit();
- try server.listen(localhost);
-
- var server_frame = async testServer(&server);
- var client_frame = async testClientToHost(
- testing.allocator,
- "localhost",
- server.listen_address.getPort(),
- );
-
- try await server_frame;
- try await client_frame;
-}
-
test "listen on an in use port" {
if (builtin.os.tag != .linux and comptime !builtin.os.tag.isDarwin()) {
// TODO build abstractions for other operating systems
@@ -263,17 +213,11 @@ test "listen on an in use port" {
const localhost = try net.Address.parseIp("127.0.0.1", 0);
- var server1 = net.StreamServer.init(net.StreamServer.Options{
- .reuse_port = true,
- });
+ var server1 = try localhost.listen(.{ .reuse_port = true });
defer server1.deinit();
- try server1.listen(localhost);
- var server2 = net.StreamServer.init(net.StreamServer.Options{
- .reuse_port = true,
- });
+ var server2 = try server1.listen_address.listen(.{ .reuse_port = true });
defer server2.deinit();
- try server2.listen(server1.listen_address);
}
fn testClientToHost(allocator: mem.Allocator, name: []const u8, port: u16) anyerror!void {
@@ -300,7 +244,7 @@ fn testClient(addr: net.Address) anyerror!void {
try testing.expect(mem.eql(u8, msg, "hello from server\n"));
}
-fn testServer(server: *net.StreamServer) anyerror!void {
+fn testServer(server: *net.Server) anyerror!void {
if (builtin.os.tag == .wasi) return error.SkipZigTest;
var client = try server.accept();
@@ -322,15 +266,14 @@ test "listen on a unix socket, send bytes, receive bytes" {
}
}
- var server = net.StreamServer.init(.{});
- defer server.deinit();
-
const socket_path = try generateFileName("socket.unix");
defer testing.allocator.free(socket_path);
const socket_addr = try net.Address.initUnix(socket_path);
defer std.fs.cwd().deleteFile(socket_path) catch {};
- try server.listen(socket_addr);
+
+ var server = try socket_addr.listen(.{});
+ defer server.deinit();
const S = struct {
fn clientFn(path: []const u8) !void {
@@ -371,9 +314,8 @@ test "non-blocking tcp server" {
}
const localhost = try net.Address.parseIp("127.0.0.1", 0);
- var server = net.StreamServer.init(.{ .force_nonblocking = true });
+ var server = localhost.listen(.{ .force_nonblocking = true });
defer server.deinit();
- try server.listen(localhost);
const accept_err = server.accept();
try testing.expectError(error.WouldBlock, accept_err);
diff --git a/lib/std/os.zig b/lib/std/os.zig
index de414338f4..00496602fd 100644
--- a/lib/std/os.zig
+++ b/lib/std/os.zig
@@ -3,7 +3,7 @@
//! * Convert "errno"-style error codes into Zig errors.
//! * When null-terminated byte buffers are required, provide APIs which accept
//! slices as well as APIs which accept null-terminated byte buffers. Same goes
-//! for UTF-16LE encoding.
+//! for WTF-16LE encoding.
//! * Where operating systems share APIs, e.g. POSIX, these thin wrappers provide
//! cross platform abstracting.
//! * When there exists a corresponding libc function and linking libc, the libc
@@ -24,7 +24,6 @@ const elf = std.elf;
const fs = std.fs;
const dl = @import("dynamic_library.zig");
const MAX_PATH_BYTES = std.fs.MAX_PATH_BYTES;
-const is_windows = builtin.os.tag == .windows;
pub const darwin = std.c;
pub const dragonfly = std.c;
@@ -60,18 +59,23 @@ test {
/// Applications can override the `system` API layer in their root source file.
/// Otherwise, when linking libc, this is the C API.
/// When not linking libc, it is the OS-specific system interface.
-pub const system = if (@hasDecl(root, "os") and root.os != @This())
+pub const system = if (@hasDecl(root, "os") and @hasDecl(root.os, "system") and root.os != @This())
root.os.system
-else if (builtin.link_libc or is_windows)
+else if (use_libc)
std.c
else switch (builtin.os.tag) {
.linux => linux,
.plan9 => plan9,
- .wasi => wasi,
.uefi => uefi,
else => struct {},
};
+/// Whether to use libc for the POSIX API layer.
+const use_libc = builtin.link_libc or switch (builtin.os.tag) {
+ .windows, .wasi => true,
+ else => false,
+};
+
pub const AF = system.AF;
pub const AF_SUN = system.AF_SUN;
pub const ARCH = system.ARCH;
@@ -87,10 +91,7 @@ pub const F = system.F;
pub const FD_CLOEXEC = system.FD_CLOEXEC;
pub const Flock = system.Flock;
pub const HOST_NAME_MAX = system.HOST_NAME_MAX;
-pub const HW = switch (builtin.os.tag) {
- .openbsd => system.HW,
- else => .{},
-};
+pub const HW = system.HW;
pub const IFNAMESIZE = system.IFNAMESIZE;
pub const IOV_MAX = system.IOV_MAX;
pub const IPPROTO = system.IPPROTO;
@@ -105,19 +106,13 @@ pub const MFD = system.MFD;
pub const MMAP2_UNIT = system.MMAP2_UNIT;
pub const MSG = system.MSG;
pub const NAME_MAX = system.NAME_MAX;
-pub const O = switch (builtin.os.tag) {
- // We want to expose the POSIX-like OFLAGS, so we use std.c.wasi.O instead
- // of std.os.wasi.O, which is for non-POSIX-like `wasi.path_open`, etc.
- .wasi => std.c.O,
- else => system.O,
-};
+pub const O = system.O;
pub const PATH_MAX = system.PATH_MAX;
pub const POLL = system.POLL;
pub const POSIX_FADV = system.POSIX_FADV;
pub const PR = system.PR;
pub const PROT = system.PROT;
pub const REG = system.REG;
-pub const RIGHT = system.RIGHT;
pub const RLIM = system.RLIM;
pub const RR = system.RR;
pub const S = system.S;
@@ -137,6 +132,7 @@ pub const STDOUT_FILENO = system.STDOUT_FILENO;
pub const SYS = system.SYS;
pub const Sigaction = system.Sigaction;
pub const Stat = system.Stat;
+pub const T = system.T;
pub const TCSA = system.TCSA;
pub const TCP = system.TCP;
pub const VDSO = system.VDSO;
@@ -151,12 +147,9 @@ pub const dl_phdr_info = system.dl_phdr_info;
pub const empty_sigset = system.empty_sigset;
pub const filled_sigset = system.filled_sigset;
pub const fd_t = system.fd_t;
-pub const fdflags_t = system.fdflags_t;
-pub const fdstat_t = system.fdstat_t;
pub const gid_t = system.gid_t;
pub const ifreq = system.ifreq;
pub const ino_t = system.ino_t;
-pub const lookupflags_t = system.lookupflags_t;
pub const mcontext_t = system.mcontext_t;
pub const mode_t = system.mode_t;
pub const msghdr = system.msghdr;
@@ -164,14 +157,12 @@ pub const msghdr_const = system.msghdr_const;
pub const nfds_t = system.nfds_t;
pub const nlink_t = system.nlink_t;
pub const off_t = system.off_t;
-pub const oflags_t = system.oflags_t;
pub const pid_t = system.pid_t;
pub const pollfd = system.pollfd;
pub const port_t = system.port_t;
pub const port_event = system.port_event;
pub const port_notify = system.port_notify;
pub const file_obj = system.file_obj;
-pub const rights_t = system.rights_t;
pub const rlim_t = system.rlim_t;
pub const rlimit = system.rlimit;
pub const rlimit_resource = system.rlimit_resource;
@@ -182,8 +173,6 @@ pub const sigset_t = system.sigset_t;
pub const sockaddr = system.sockaddr;
pub const socklen_t = system.socklen_t;
pub const stack_t = system.stack_t;
-pub const tcflag_t = system.tcflag_t;
-pub const termios = system.termios;
pub const time_t = system.time_t;
pub const timespec = system.timespec;
pub const timestamp_t = system.timestamp_t;
@@ -193,6 +182,18 @@ pub const ucontext_t = system.ucontext_t;
pub const uid_t = system.uid_t;
pub const user_desc = system.user_desc;
pub const utsname = system.utsname;
+pub const winsize = system.winsize;
+
+pub const termios = system.termios;
+pub const CSIZE = system.CSIZE;
+pub const NCCS = system.NCCS;
+pub const cc_t = system.cc_t;
+pub const V = system.V;
+pub const speed_t = system.speed_t;
+pub const tc_iflag_t = system.tc_iflag_t;
+pub const tc_oflag_t = system.tc_oflag_t;
+pub const tc_cflag_t = system.tc_cflag_t;
+pub const tc_lflag_t = system.tc_lflag_t;
pub const F_OK = system.F_OK;
pub const R_OK = system.R_OK;
@@ -209,6 +210,12 @@ pub const iovec_const = extern struct {
iov_len: usize,
};
+pub const ACCMODE = enum(u2) {
+ RDONLY = 0,
+ WRONLY = 1,
+ RDWR = 2,
+};
+
pub const LOG = struct {
/// system is unusable
pub const EMERG = 0;
@@ -460,13 +467,13 @@ fn fchmodat2(dirfd: fd_t, path: []const u8, mode: mode_t, flags: u32) FChmodAtEr
// Fallback to changing permissions using procfs:
//
- // 1. Open `path` as an `O.PATH` descriptor.
+ // 1. Open `path` as a `PATH` descriptor.
// 2. Stat the fd and check if it isn't a symbolic link.
// 3. Generate the procfs reference to the fd via `/proc/self/fd/{fd}`.
// 4. Pass the procfs path to `chmod` with the `mode`.
var pathfd: fd_t = undefined;
while (true) {
- const rc = system.openat(dirfd, &path_c, O.PATH | O.NOFOLLOW | O.CLOEXEC, @as(mode_t, 0));
+ const rc = system.openat(dirfd, &path_c, .{ .PATH = true, .NOFOLLOW = true, .CLOEXEC = true }, @as(mode_t, 0));
switch (system.getErrno(rc)) {
.SUCCESS => {
pathfd = @as(fd_t, @intCast(rc));
@@ -491,6 +498,7 @@ fn fchmodat2(dirfd: fd_t, path: []const u8, mode: mode_t, flags: u32) FChmodAtEr
const stat = fstatatZ(pathfd, "", AT.EMPTY_PATH) catch |err| switch (err) {
error.NameTooLong => unreachable,
error.FileNotFound => unreachable,
+ error.InvalidUtf8 => unreachable,
else => |e| return e,
};
if ((stat.mode & S.IFMT) == S.IFLNK)
@@ -536,8 +544,10 @@ pub const FChownError = error{
/// any group of which the owner is a member. If the owner or group is
/// specified as `null`, the ID is not changed.
pub fn fchown(fd: fd_t, owner: ?uid_t, group: ?gid_t) FChownError!void {
- if (builtin.os.tag == .windows or builtin.os.tag == .wasi)
- @compileError("Unsupported OS");
+ switch (builtin.os.tag) {
+ .windows, .wasi => @compileError("Unsupported OS"),
+ else => {},
+ }
while (true) {
const res = system.fchown(fd, owner orelse @as(u32, 0) -% 1, group orelse @as(u32, 0) -% 1);
@@ -675,7 +685,7 @@ pub fn getrandom(buffer: []u8) GetRandomError!void {
}
fn getRandomBytesDevURandom(buf: []u8) !void {
- const fd = try openZ("/dev/urandom", O.RDONLY | O.CLOEXEC, 0);
+ const fd = try openZ("/dev/urandom", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
defer close(fd);
const st = try fstat(fd);
@@ -683,11 +693,7 @@ fn getRandomBytesDevURandom(buf: []u8) !void {
return error.NoDevice;
}
- const file = std.fs.File{
- .handle = fd,
- .capable_io_mode = .blocking,
- .intended_io_mode = .blocking,
- };
+ const file = std.fs.File{ .handle = fd };
const stream = file.reader();
stream.readNoEof(buf) catch return error.Unexpected;
}
@@ -747,7 +753,7 @@ pub fn abort() noreturn {
exit(127); // Pid 1 might not be signalled in some containers.
}
switch (builtin.os.tag) {
- .uefi, .wasi, .cuda, .amdhsa => @trap(),
+ .uefi, .wasi, .emscripten, .cuda, .amdhsa => @trap(),
else => system.abort(),
}
}
@@ -831,9 +837,6 @@ pub const ReadError = error{
NotOpenForReading,
SocketNotConnected,
- // Windows only
- NetNameDeleted,
-
/// This error occurs when no global event loop is configured,
/// and reading from the file descriptor would block.
WouldBlock,
@@ -856,7 +859,7 @@ pub const ReadError = error{
pub fn read(fd: fd_t, buf: []u8) ReadError!usize {
if (buf.len == 0) return 0;
if (builtin.os.tag == .windows) {
- return windows.ReadFile(fd, buf, null, std.io.default_mode);
+ return windows.ReadFile(fd, buf, null);
}
if (builtin.os.tag == .wasi and !builtin.link_libc) {
const iovs = [1]iovec{iovec{
@@ -995,7 +998,7 @@ pub const PReadError = ReadError || error{Unseekable};
pub fn pread(fd: fd_t, buf: []u8, offset: u64) PReadError!usize {
if (buf.len == 0) return 0;
if (builtin.os.tag == .windows) {
- return windows.ReadFile(fd, buf, offset, std.io.default_mode);
+ return windows.ReadFile(fd, buf, offset);
}
if (builtin.os.tag == .wasi and !builtin.link_libc) {
const iovs = [1]iovec{iovec{
@@ -1257,7 +1260,7 @@ pub const WriteError = error{
pub fn write(fd: fd_t, bytes: []const u8) WriteError!usize {
if (bytes.len == 0) return 0;
if (builtin.os.tag == .windows) {
- return windows.WriteFile(fd, bytes, null, std.io.default_mode);
+ return windows.WriteFile(fd, bytes, null);
}
if (builtin.os.tag == .wasi and !builtin.link_libc) {
@@ -1415,7 +1418,7 @@ pub const PWriteError = WriteError || error{Unseekable};
pub fn pwrite(fd: fd_t, bytes: []const u8, offset: u64) PWriteError!usize {
if (bytes.len == 0) return 0;
if (builtin.os.tag == .windows) {
- return windows.WriteFile(fd, bytes, offset, std.io.default_mode);
+ return windows.WriteFile(fd, bytes, offset);
}
if (builtin.os.tag == .wasi and !builtin.link_libc) {
const ciovs = [1]iovec_const{iovec_const{
@@ -1594,27 +1597,34 @@ pub const OpenError = error{
/// for 64-bit targets, as well as when opening directories.
FileTooBig,
- /// The path refers to directory but the `O.DIRECTORY` flag was not provided.
+ /// The path refers to directory but the `DIRECTORY` flag was not provided.
IsDir,
/// A new path cannot be created because the device has no room for the new file.
- /// This error is only reachable when the `O.CREAT` flag is provided.
+ /// This error is only reachable when the `CREAT` flag is provided.
NoSpaceLeft,
/// A component used as a directory in the path was not, in fact, a directory, or
- /// `O.DIRECTORY` was specified and the path was not a directory.
+ /// `DIRECTORY` was specified and the path was not a directory.
NotDir,
- /// The path already exists and the `O.CREAT` and `O.EXCL` flags were provided.
+ /// The path already exists and the `CREAT` and `EXCL` flags were provided.
PathAlreadyExists,
DeviceBusy,
/// The underlying filesystem does not support file locks
FileLocksNotSupported,
+ /// Path contains characters that are disallowed by the underlying filesystem.
BadPathName,
+
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
+
/// On Windows, `\\server` or `\\server\share` was not found.
NetworkNotFound,
@@ -1632,24 +1642,28 @@ pub const OpenError = error{
} || UnexpectedError;
/// Open and possibly create a file. Keeps trying if it gets interrupted.
+/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `file_path` should be encoded as valid UTF-8.
+/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// See also `openZ`.
-pub fn open(file_path: []const u8, flags: u32, perm: mode_t) OpenError!fd_t {
+pub fn open(file_path: []const u8, flags: O, perm: mode_t) OpenError!fd_t {
if (builtin.os.tag == .windows) {
- const file_path_w = try windows.sliceToPrefixedFileW(null, file_path);
- return openW(file_path_w.span(), flags, perm);
+ @compileError("Windows does not support POSIX; use Windows-specific API or cross-platform std.fs API");
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
- return openat(wasi.AT.FDCWD, file_path, flags, perm);
+ return openat(AT.FDCWD, file_path, flags, perm);
}
const file_path_c = try toPosixPath(file_path);
return openZ(&file_path_c, flags, perm);
}
/// Open and possibly create a file. Keeps trying if it gets interrupted.
+/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `file_path` should be encoded as valid UTF-8.
+/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// See also `open`.
-pub fn openZ(file_path: [*:0]const u8, flags: u32, perm: mode_t) OpenError!fd_t {
+pub fn openZ(file_path: [*:0]const u8, flags: O, perm: mode_t) OpenError!fd_t {
if (builtin.os.tag == .windows) {
- const file_path_w = try windows.cStrToPrefixedFileW(null, file_path);
- return openW(file_path_w.span(), flags, perm);
+ @compileError("Windows does not support POSIX; use Windows-specific API or cross-platform std.fs API");
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
return open(mem.sliceTo(file_path, 0), flags, perm);
}
@@ -1685,65 +1699,18 @@ pub fn openZ(file_path: [*:0]const u8, flags: u32, perm: mode_t) OpenError!fd_t
}
}
-fn openOptionsFromFlagsWindows(flags: u32) windows.OpenFileOptions {
- const w = windows;
-
- var access_mask: w.ULONG = w.READ_CONTROL | w.FILE_WRITE_ATTRIBUTES | w.SYNCHRONIZE;
- if (flags & O.RDWR != 0) {
- access_mask |= w.GENERIC_READ | w.GENERIC_WRITE;
- } else if (flags & O.WRONLY != 0) {
- access_mask |= w.GENERIC_WRITE;
- } else {
- access_mask |= w.GENERIC_READ | w.GENERIC_WRITE;
- }
-
- const filter: windows.OpenFileOptions.Filter = if (flags & O.DIRECTORY != 0) .dir_only else .file_only;
- const follow_symlinks: bool = flags & O.NOFOLLOW == 0;
-
- const creation: w.ULONG = blk: {
- if (flags & O.CREAT != 0) {
- if (flags & O.EXCL != 0) {
- break :blk w.FILE_CREATE;
- }
- }
- break :blk w.FILE_OPEN;
- };
-
- return .{
- .access_mask = access_mask,
- .io_mode = .blocking,
- .creation = creation,
- .filter = filter,
- .follow_symlinks = follow_symlinks,
- };
-}
-
-/// Windows-only. The path parameter is
-/// [WTF-16](https://simonsapin.github.io/wtf-8/#potentially-ill-formed-utf-16) encoded.
-/// Translates the POSIX open API call to a Windows API call.
-/// TODO currently, this function does not handle all flag combinations
-/// or makes use of perm argument.
-pub fn openW(file_path_w: []const u16, flags: u32, perm: mode_t) OpenError!fd_t {
- _ = perm;
- var options = openOptionsFromFlagsWindows(flags);
- options.dir = std.fs.cwd().fd;
- return windows.OpenFile(file_path_w, options) catch |err| switch (err) {
- error.WouldBlock => unreachable,
- error.PipeBusy => unreachable,
- else => |e| return e,
- };
-}
-
/// Open and possibly create a file. Keeps trying if it gets interrupted.
/// `file_path` is relative to the open directory handle `dir_fd`.
+/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `file_path` should be encoded as valid UTF-8.
+/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// See also `openatZ`.
-pub fn openat(dir_fd: fd_t, file_path: []const u8, flags: u32, mode: mode_t) OpenError!fd_t {
+pub fn openat(dir_fd: fd_t, file_path: []const u8, flags: O, mode: mode_t) OpenError!fd_t {
if (builtin.os.tag == .windows) {
- const file_path_w = try windows.sliceToPrefixedFileW(dir_fd, file_path);
- return openatW(dir_fd, file_path_w.span(), flags, mode);
+ @compileError("Windows does not support POSIX; use Windows-specific API or cross-platform std.fs API");
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
// `mode` is ignored on WASI, which does not support unix-style file permissions
- const opts = try openOptionsFromFlagsWasi(dir_fd, flags);
+ const opts = try openOptionsFromFlagsWasi(flags);
const fd = try openatWasi(
dir_fd,
file_path,
@@ -1755,8 +1722,8 @@ pub fn openat(dir_fd: fd_t, file_path: []const u8, flags: u32, mode: mode_t) Ope
);
errdefer close(fd);
- if (flags & O.WRONLY != 0) {
- const info = try fstat(fd);
+ if (flags.write) {
+ const info = try fstat_wasi(fd);
if (info.filetype == .DIRECTORY)
return error.IsDir;
}
@@ -1767,6 +1734,37 @@ pub fn openat(dir_fd: fd_t, file_path: []const u8, flags: u32, mode: mode_t) Ope
return openatZ(dir_fd, &file_path_c, flags, mode);
}
+pub const CommonOpenFlags = packed struct {
+ ACCMODE: ACCMODE = .RDONLY,
+ CREAT: bool = false,
+ EXCL: bool = false,
+ LARGEFILE: bool = false,
+ DIRECTORY: bool = false,
+ CLOEXEC: bool = false,
+ NONBLOCK: bool = false,
+
+ pub fn lower(cof: CommonOpenFlags) O {
+ if (builtin.os.tag == .wasi) return .{
+ .read = cof.ACCMODE != .WRONLY,
+ .write = cof.ACCMODE != .RDONLY,
+ .CREAT = cof.CREAT,
+ .EXCL = cof.EXCL,
+ .DIRECTORY = cof.DIRECTORY,
+ .NONBLOCK = cof.NONBLOCK,
+ };
+ var result: O = .{
+ .ACCMODE = cof.ACCMODE,
+ .CREAT = cof.CREAT,
+ .EXCL = cof.EXCL,
+ .DIRECTORY = cof.DIRECTORY,
+ .NONBLOCK = cof.NONBLOCK,
+ .CLOEXEC = cof.CLOEXEC,
+ };
+ if (@hasField(O, "LARGEFILE")) result.LARGEFILE = cof.LARGEFILE;
+ return result;
+ }
+};
+
/// A struct to contain all lookup/rights flags accepted by `wasi.path_open`
const WasiOpenOptions = struct {
oflags: wasi.oflags_t,
@@ -1777,42 +1775,38 @@ const WasiOpenOptions = struct {
};
/// Compute rights + flags corresponding to the provided POSIX access mode.
-fn openOptionsFromFlagsWasi(fd: fd_t, oflag: u32) OpenError!WasiOpenOptions {
+fn openOptionsFromFlagsWasi(oflag: O) OpenError!WasiOpenOptions {
const w = std.os.wasi;
- // First, discover the rights that we can derive from `fd`
- var fsb_cur: wasi.fdstat_t = undefined;
- _ = switch (w.fd_fdstat_get(fd, &fsb_cur)) {
- .SUCCESS => .{},
- .BADF => return error.InvalidHandle,
- else => |err| return unexpectedErrno(err),
- };
-
// Next, calculate the read/write rights to request, depending on the
// provided POSIX access mode
- var rights: w.rights_t = 0;
- if (oflag & O.RDONLY != 0) {
- rights |= w.RIGHT.FD_READ | w.RIGHT.FD_READDIR;
+ var rights: w.rights_t = .{};
+ if (oflag.read) {
+ rights.FD_READ = true;
+ rights.FD_READDIR = true;
}
- if (oflag & O.WRONLY != 0) {
- rights |= w.RIGHT.FD_DATASYNC | w.RIGHT.FD_WRITE |
- w.RIGHT.FD_ALLOCATE | w.RIGHT.FD_FILESTAT_SET_SIZE;
+ if (oflag.write) {
+ rights.FD_DATASYNC = true;
+ rights.FD_WRITE = true;
+ rights.FD_ALLOCATE = true;
+ rights.FD_FILESTAT_SET_SIZE = true;
}
- // Request all other rights unconditionally
- rights |= ~(w.RIGHT.FD_DATASYNC | w.RIGHT.FD_READ |
- w.RIGHT.FD_WRITE | w.RIGHT.FD_ALLOCATE |
- w.RIGHT.FD_READDIR | w.RIGHT.FD_FILESTAT_SET_SIZE);
+ // https://github.com/ziglang/zig/issues/18882
+ const flag_bits: u32 = @bitCast(oflag);
+ const oflags_int: u16 = @as(u12, @truncate(flag_bits >> 12));
+ const fs_flags_int: u16 = @as(u12, @truncate(flag_bits));
- // But only take rights that we can actually inherit
- rights &= fsb_cur.fs_rights_inheriting;
-
- return WasiOpenOptions{
- .oflags = @as(w.oflags_t, @truncate((oflag >> 12))) & 0xfff,
- .lookup_flags = if (oflag & O.NOFOLLOW == 0) w.LOOKUP_SYMLINK_FOLLOW else 0,
+ return .{
+ // https://github.com/ziglang/zig/issues/18882
+ .oflags = @bitCast(oflags_int),
+ .lookup_flags = .{
+ .SYMLINK_FOLLOW = !oflag.NOFOLLOW,
+ },
.fs_rights_base = rights,
- .fs_rights_inheriting = fsb_cur.fs_rights_inheriting,
- .fs_flags = @as(w.fdflags_t, @truncate(oflag & 0xfff)),
+ .fs_rights_inheriting = rights,
+ // https://github.com/ziglang/zig/issues/18882
+ .fs_flags = @bitCast(fs_flags_int),
};
}
@@ -1820,11 +1814,11 @@ fn openOptionsFromFlagsWasi(fd: fd_t, oflag: u32) OpenError!WasiOpenOptions {
pub fn openatWasi(
dir_fd: fd_t,
file_path: []const u8,
- lookup_flags: lookupflags_t,
- oflags: oflags_t,
- fdflags: fdflags_t,
- base: rights_t,
- inheriting: rights_t,
+ lookup_flags: wasi.lookupflags_t,
+ oflags: wasi.oflags_t,
+ fdflags: wasi.fdflags_t,
+ base: wasi.rights_t,
+ inheriting: wasi.rights_t,
) OpenError!fd_t {
while (true) {
var fd: fd_t = undefined;
@@ -1834,6 +1828,7 @@ pub fn openatWasi(
.FAULT => unreachable,
.INVAL => unreachable,
+ .BADF => unreachable,
.ACCES => return error.AccessDenied,
.FBIG => return error.FileTooBig,
.OVERFLOW => return error.FileTooBig,
@@ -1851,6 +1846,7 @@ pub fn openatWasi(
.EXIST => return error.PathAlreadyExists,
.BUSY => return error.DeviceBusy,
.NOTCAPABLE => return error.AccessDenied,
+ .ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
@@ -1858,11 +1854,13 @@ pub fn openatWasi(
/// Open and possibly create a file. Keeps trying if it gets interrupted.
/// `file_path` is relative to the open directory handle `dir_fd`.
+/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `file_path` should be encoded as valid UTF-8.
+/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// See also `openat`.
-pub fn openatZ(dir_fd: fd_t, file_path: [*:0]const u8, flags: u32, mode: mode_t) OpenError!fd_t {
+pub fn openatZ(dir_fd: fd_t, file_path: [*:0]const u8, flags: O, mode: mode_t) OpenError!fd_t {
if (builtin.os.tag == .windows) {
- const file_path_w = try windows.cStrToPrefixedFileW(dir_fd, file_path);
- return openatW(dir_fd, file_path_w.span(), flags, mode);
+ @compileError("Windows does not support POSIX; use Windows-specific API or cross-platform std.fs API");
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
return openat(dir_fd, mem.sliceTo(file_path, 0), flags, mode);
}
@@ -1902,21 +1900,6 @@ pub fn openatZ(dir_fd: fd_t, file_path: [*:0]const u8, flags: u32, mode: mode_t)
}
}
-/// Windows-only. Similar to `openat` but with pathname argument null-terminated
-/// WTF16 encoded.
-/// TODO currently, this function does not handle all flag combinations
-/// or makes use of perm argument.
-pub fn openatW(dir_fd: fd_t, file_path_w: []const u16, flags: u32, mode: mode_t) OpenError!fd_t {
- _ = mode;
- var options = openOptionsFromFlagsWindows(flags);
- options.dir = dir_fd;
- return windows.OpenFile(file_path_w, options) catch |err| switch (err) {
- error.WouldBlock => unreachable,
- error.PipeBusy => unreachable,
- else => |e| return e,
- };
-}
-
pub fn dup(old_fd: fd_t) !fd_t {
const rc = system.dup(old_fd);
return switch (errno(rc)) {
@@ -2194,13 +2177,23 @@ pub const SymLinkError = error{
ReadOnlyFileSystem,
NotDir,
NameTooLong,
+
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
+
BadPathName,
} || UnexpectedError;
/// Creates a symbolic link named `sym_link_path` which contains the string `target_path`.
/// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent
/// one; the latter case is known as a dangling link.
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
/// If `sym_link_path` exists, it will not be overwritten.
/// See also `symlinkZ.
pub fn symlink(target_path: []const u8, sym_link_path: []const u8) SymLinkError!void {
@@ -2238,6 +2231,10 @@ pub fn symlinkZ(target_path: [*:0]const u8, sym_link_path: [*:0]const u8) SymLin
.NOMEM => return error.SystemResources,
.NOSPC => return error.NoSpaceLeft,
.ROFS => return error.ReadOnlyFileSystem,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -2246,6 +2243,9 @@ pub fn symlinkZ(target_path: [*:0]const u8, sym_link_path: [*:0]const u8) SymLin
/// `target_path` **relative** to `newdirfd` directory handle.
/// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent
/// one; the latter case is known as a dangling link.
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
/// If `sym_link_path` exists, it will not be overwritten.
/// See also `symlinkatWasi`, `symlinkatZ` and `symlinkatW`.
pub fn symlinkat(target_path: []const u8, newdirfd: fd_t, sym_link_path: []const u8) SymLinkError!void {
@@ -2280,6 +2280,7 @@ pub fn symlinkatWasi(target_path: []const u8, newdirfd: fd_t, sym_link_path: []c
.NOSPC => return error.NoSpaceLeft,
.ROFS => return error.ReadOnlyFileSystem,
.NOTCAPABLE => return error.AccessDenied,
+ .ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
@@ -2308,6 +2309,10 @@ pub fn symlinkatZ(target_path: [*:0]const u8, newdirfd: fd_t, sym_link_path: [*:
.NOMEM => return error.SystemResources,
.NOSPC => return error.NoSpaceLeft,
.ROFS => return error.ReadOnlyFileSystem,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -2325,8 +2330,13 @@ pub const LinkError = UnexpectedError || error{
NoSpaceLeft,
ReadOnlyFileSystem,
NotSameFileSystem,
+
+ /// WASI-only; file paths must be valid UTF-8.
+ InvalidUtf8,
};
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn linkZ(oldpath: [*:0]const u8, newpath: [*:0]const u8, flags: i32) LinkError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return link(mem.sliceTo(oldpath, 0), mem.sliceTo(newpath, 0), flags);
@@ -2348,10 +2358,16 @@ pub fn linkZ(oldpath: [*:0]const u8, newpath: [*:0]const u8, flags: i32) LinkErr
.ROFS => return error.ReadOnlyFileSystem,
.XDEV => return error.NotSameFileSystem,
.INVAL => unreachable,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn link(oldpath: []const u8, newpath: []const u8, flags: i32) LinkError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return linkat(wasi.AT.FDCWD, oldpath, wasi.AT.FDCWD, newpath, flags) catch |err| switch (err) {
@@ -2366,6 +2382,8 @@ pub fn link(oldpath: []const u8, newpath: []const u8, flags: i32) LinkError!void
pub const LinkatError = LinkError || error{NotDir};
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn linkatZ(
olddir: fd_t,
oldpath: [*:0]const u8,
@@ -2394,10 +2412,16 @@ pub fn linkatZ(
.ROFS => return error.ReadOnlyFileSystem,
.XDEV => return error.NotSameFileSystem,
.INVAL => unreachable,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn linkat(
olddir: fd_t,
oldpath: []const u8,
@@ -2408,42 +2432,44 @@ pub fn linkat(
if (builtin.os.tag == .wasi and !builtin.link_libc) {
const old: RelativePathWasi = .{ .dir_fd = olddir, .relative_path = oldpath };
const new: RelativePathWasi = .{ .dir_fd = newdir, .relative_path = newpath };
- return linkatWasi(old, new, flags);
+ const old_flags: wasi.lookupflags_t = .{
+ .SYMLINK_FOLLOW = (flags & AT.SYMLINK_FOLLOW) != 0,
+ };
+ switch (wasi.path_link(
+ old.dir_fd,
+ old_flags,
+ old.relative_path.ptr,
+ old.relative_path.len,
+ new.dir_fd,
+ new.relative_path.ptr,
+ new.relative_path.len,
+ )) {
+ .SUCCESS => return,
+ .ACCES => return error.AccessDenied,
+ .DQUOT => return error.DiskQuota,
+ .EXIST => return error.PathAlreadyExists,
+ .FAULT => unreachable,
+ .IO => return error.FileSystem,
+ .LOOP => return error.SymLinkLoop,
+ .MLINK => return error.LinkQuotaExceeded,
+ .NAMETOOLONG => return error.NameTooLong,
+ .NOENT => return error.FileNotFound,
+ .NOMEM => return error.SystemResources,
+ .NOSPC => return error.NoSpaceLeft,
+ .NOTDIR => return error.NotDir,
+ .PERM => return error.AccessDenied,
+ .ROFS => return error.ReadOnlyFileSystem,
+ .XDEV => return error.NotSameFileSystem,
+ .INVAL => unreachable,
+ .ILSEQ => return error.InvalidUtf8,
+ else => |err| return unexpectedErrno(err),
+ }
}
const old = try toPosixPath(oldpath);
const new = try toPosixPath(newpath);
return try linkatZ(olddir, &old, newdir, &new, flags);
}
-/// WASI-only. The same as `linkat` but targeting WASI.
-/// See also `linkat`.
-pub fn linkatWasi(old: RelativePathWasi, new: RelativePathWasi, flags: i32) LinkatError!void {
- var old_flags: wasi.lookupflags_t = 0;
- // TODO: Why is this not defined in wasi-libc?
- if (flags & linux.AT.SYMLINK_FOLLOW != 0) old_flags |= wasi.LOOKUP_SYMLINK_FOLLOW;
-
- switch (wasi.path_link(old.dir_fd, old_flags, old.relative_path.ptr, old.relative_path.len, new.dir_fd, new.relative_path.ptr, new.relative_path.len)) {
- .SUCCESS => return,
- .ACCES => return error.AccessDenied,
- .DQUOT => return error.DiskQuota,
- .EXIST => return error.PathAlreadyExists,
- .FAULT => unreachable,
- .IO => return error.FileSystem,
- .LOOP => return error.SymLinkLoop,
- .MLINK => return error.LinkQuotaExceeded,
- .NAMETOOLONG => return error.NameTooLong,
- .NOENT => return error.FileNotFound,
- .NOMEM => return error.SystemResources,
- .NOSPC => return error.NoSpaceLeft,
- .NOTDIR => return error.NotDir,
- .PERM => return error.AccessDenied,
- .ROFS => return error.ReadOnlyFileSystem,
- .XDEV => return error.NotSameFileSystem,
- .INVAL => unreachable,
- else => |err| return unexpectedErrno(err),
- }
-}
-
pub const UnlinkError = error{
FileNotFound,
@@ -2459,9 +2485,13 @@ pub const UnlinkError = error{
SystemResources,
ReadOnlyFileSystem,
- /// On Windows, file paths must be valid Unicode.
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
+
/// On Windows, file paths cannot contain these characters:
/// '/', '*', '?', '"', '<', '>', '|'
BadPathName,
@@ -2471,6 +2501,9 @@ pub const UnlinkError = error{
} || UnexpectedError;
/// Delete a name and possibly the file it refers to.
+/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `file_path` should be encoded as valid UTF-8.
+/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// See also `unlinkZ`.
pub fn unlink(file_path: []const u8) UnlinkError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
@@ -2487,7 +2520,7 @@ pub fn unlink(file_path: []const u8) UnlinkError!void {
}
}
-/// Same as `unlink` except the parameter is a null terminated UTF8-encoded string.
+/// Same as `unlink` except the parameter is null terminated.
pub fn unlinkZ(file_path: [*:0]const u8) UnlinkError!void {
if (builtin.os.tag == .windows) {
const file_path_w = try windows.cStrToPrefixedFileW(null, file_path);
@@ -2510,11 +2543,15 @@ pub fn unlinkZ(file_path: [*:0]const u8) UnlinkError!void {
.NOTDIR => return error.NotDir,
.NOMEM => return error.SystemResources,
.ROFS => return error.ReadOnlyFileSystem,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
-/// Windows-only. Same as `unlink` except the parameter is null-terminated, WTF16 encoded.
+/// Windows-only. Same as `unlink` except the parameter is null-terminated, WTF16 LE encoded.
pub fn unlinkW(file_path_w: []const u16) UnlinkError!void {
windows.DeleteFile(file_path_w, .{ .dir = std.fs.cwd().fd }) catch |err| switch (err) {
error.DirNotEmpty => unreachable, // we're not passing .remove_dir = true
@@ -2528,6 +2565,9 @@ pub const UnlinkatError = UnlinkError || error{
};
/// Delete a file name and possibly the file it refers to, based on an open directory handle.
+/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `file_path` should be encoded as valid UTF-8.
+/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// Asserts that the path parameter has no null bytes.
pub fn unlinkat(dirfd: fd_t, file_path: []const u8, flags: u32) UnlinkatError!void {
if (builtin.os.tag == .windows) {
@@ -2565,6 +2605,7 @@ pub fn unlinkatWasi(dirfd: fd_t, file_path: []const u8, flags: u32) UnlinkatErro
.ROFS => return error.ReadOnlyFileSystem,
.NOTEMPTY => return error.DirNotEmpty,
.NOTCAPABLE => return error.AccessDenied,
+ .ILSEQ => return error.InvalidUtf8,
.INVAL => unreachable, // invalid flags, or pathname has . as last component
.BADF => unreachable, // always a race condition
@@ -2597,6 +2638,10 @@ pub fn unlinkatZ(dirfd: fd_t, file_path_c: [*:0]const u8, flags: u32) UnlinkatEr
.ROFS => return error.ReadOnlyFileSystem,
.EXIST => return error.DirNotEmpty,
.NOTEMPTY => return error.DirNotEmpty,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
.INVAL => unreachable, // invalid flags, or pathname has . as last component
.BADF => unreachable, // always a race condition
@@ -2605,7 +2650,7 @@ pub fn unlinkatZ(dirfd: fd_t, file_path_c: [*:0]const u8, flags: u32) UnlinkatEr
}
}
-/// Same as `unlinkat` but `sub_path_w` is UTF16LE, NT prefixed. Windows only.
+/// Same as `unlinkat` but `sub_path_w` is WTF16LE, NT prefixed. Windows only.
pub fn unlinkatW(dirfd: fd_t, sub_path_w: []const u16, flags: u32) UnlinkatError!void {
const remove_dir = (flags & AT.REMOVEDIR) != 0;
return windows.DeleteFile(sub_path_w, .{ .dir = dirfd, .remove_dir = remove_dir });
@@ -2631,16 +2676,29 @@ pub const RenameError = error{
PathAlreadyExists,
ReadOnlyFileSystem,
RenameAcrossMountPoints,
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
BadPathName,
NoDevice,
SharingViolation,
PipeBusy,
/// On Windows, `\\server` or `\\server\share` was not found.
NetworkNotFound,
+ /// On Windows, antivirus software is enabled by default. It can be
+ /// disabled, but Windows Update sometimes ignores the user's preference
+ /// and re-enables it. When enabled, antivirus software on Windows
+ /// intercepts file system operations and makes them significantly slower
+ /// in addition to possibly failing with this error code.
+ AntivirusInterference,
} || UnexpectedError;
/// Change the name or location of a file.
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn rename(old_path: []const u8, new_path: []const u8) RenameError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return renameat(wasi.AT.FDCWD, old_path, wasi.AT.FDCWD, new_path);
@@ -2655,7 +2713,7 @@ pub fn rename(old_path: []const u8, new_path: []const u8) RenameError!void {
}
}
-/// Same as `rename` except the parameters are null-terminated byte arrays.
+/// Same as `rename` except the parameters are null-terminated.
pub fn renameZ(old_path: [*:0]const u8, new_path: [*:0]const u8) RenameError!void {
if (builtin.os.tag == .windows) {
const old_path_w = try windows.cStrToPrefixedFileW(null, old_path);
@@ -2684,11 +2742,15 @@ pub fn renameZ(old_path: [*:0]const u8, new_path: [*:0]const u8) RenameError!voi
.NOTEMPTY => return error.PathAlreadyExists,
.ROFS => return error.ReadOnlyFileSystem,
.XDEV => return error.RenameAcrossMountPoints,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
-/// Same as `rename` except the parameters are null-terminated UTF16LE encoded byte arrays.
+/// Same as `rename` except the parameters are null-terminated and WTF16LE encoded.
/// Assumes target is Windows.
pub fn renameW(old_path: [*:0]const u16, new_path: [*:0]const u16) RenameError!void {
const flags = windows.MOVEFILE_REPLACE_EXISTING | windows.MOVEFILE_WRITE_THROUGH;
@@ -2696,6 +2758,9 @@ pub fn renameW(old_path: [*:0]const u16, new_path: [*:0]const u16) RenameError!v
}
/// Change the name or location of a file based on an open directory handle.
+/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, both paths should be encoded as valid UTF-8.
+/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn renameat(
old_dir_fd: fd_t,
old_path: []const u8,
@@ -2741,11 +2806,12 @@ pub fn renameatWasi(old: RelativePathWasi, new: RelativePathWasi) RenameError!vo
.ROFS => return error.ReadOnlyFileSystem,
.XDEV => return error.RenameAcrossMountPoints,
.NOTCAPABLE => return error.AccessDenied,
+ .ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
-/// Same as `renameat` except the parameters are null-terminated byte arrays.
+/// Same as `renameat` except the parameters are null-terminated.
pub fn renameatZ(
old_dir_fd: fd_t,
old_path: [*:0]const u8,
@@ -2780,6 +2846,10 @@ pub fn renameatZ(
.NOTEMPTY => return error.PathAlreadyExists,
.ROFS => return error.ReadOnlyFileSystem,
.XDEV => return error.RenameAcrossMountPoints,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -2797,7 +2867,6 @@ pub fn renameatW(
.dir = old_dir_fd,
.access_mask = windows.SYNCHRONIZE | windows.GENERIC_WRITE | windows.DELETE,
.creation = windows.FILE_OPEN,
- .io_mode = .blocking,
.filter = .any, // This function is supposed to rename both files and directories.
.follow_symlinks = false,
}) catch |err| switch (err) {
@@ -2892,6 +2961,9 @@ pub fn renameatW(
}
}
+/// On Windows, `sub_dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `sub_dir_path` should be encoded as valid UTF-8.
+/// On other platforms, `sub_dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn mkdirat(dir_fd: fd_t, sub_dir_path: []const u8, mode: u32) MakeDirError!void {
if (builtin.os.tag == .windows) {
const sub_dir_path_w = try windows.sliceToPrefixedFileW(dir_fd, sub_dir_path);
@@ -2923,14 +2995,16 @@ pub fn mkdiratWasi(dir_fd: fd_t, sub_dir_path: []const u8, mode: u32) MakeDirErr
.NOTDIR => return error.NotDir,
.ROFS => return error.ReadOnlyFileSystem,
.NOTCAPABLE => return error.AccessDenied,
+ .ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
+/// Same as `mkdirat` except the parameters are null-terminated.
pub fn mkdiratZ(dir_fd: fd_t, sub_dir_path: [*:0]const u8, mode: u32) MakeDirError!void {
if (builtin.os.tag == .windows) {
const sub_dir_path_w = try windows.cStrToPrefixedFileW(dir_fd, sub_dir_path);
- return mkdiratW(dir_fd, sub_dir_path_w.span().ptr, mode);
+ return mkdiratW(dir_fd, sub_dir_path_w.span(), mode);
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
return mkdirat(dir_fd, mem.sliceTo(sub_dir_path, 0), mode);
}
@@ -2952,22 +3026,27 @@ pub fn mkdiratZ(dir_fd: fd_t, sub_dir_path: [*:0]const u8, mode: u32) MakeDirErr
.ROFS => return error.ReadOnlyFileSystem,
// dragonfly: when dir_fd is unlinked from filesystem
.NOTCONN => return error.FileNotFound,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
+/// Windows-only. Same as `mkdirat` except the parameter WTF16 LE encoded.
pub fn mkdiratW(dir_fd: fd_t, sub_path_w: []const u16, mode: u32) MakeDirError!void {
_ = mode;
const sub_dir_handle = windows.OpenFile(sub_path_w, .{
.dir = dir_fd,
.access_mask = windows.GENERIC_READ | windows.SYNCHRONIZE,
.creation = windows.FILE_CREATE,
- .io_mode = .blocking,
.filter = .dir_only,
}) catch |err| switch (err) {
- error.IsDir => unreachable,
- error.PipeBusy => unreachable,
- error.WouldBlock => unreachable,
+ error.IsDir => return error.Unexpected,
+ error.PipeBusy => return error.Unexpected,
+ error.WouldBlock => return error.Unexpected,
+ error.AntivirusInterference => return error.Unexpected,
else => |e| return e,
};
windows.CloseHandle(sub_dir_handle);
@@ -2987,7 +3066,11 @@ pub const MakeDirError = error{
NoSpaceLeft,
NotDir,
ReadOnlyFileSystem,
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
BadPathName,
NoDevice,
/// On Windows, `\\server` or `\\server\share` was not found.
@@ -2996,6 +3079,9 @@ pub const MakeDirError = error{
/// Create a directory.
/// `mode` is ignored on Windows and WASI.
+/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `dir_path` should be encoded as valid UTF-8.
+/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn mkdir(dir_path: []const u8, mode: u32) MakeDirError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return mkdirat(wasi.AT.FDCWD, dir_path, mode);
@@ -3008,7 +3094,10 @@ pub fn mkdir(dir_path: []const u8, mode: u32) MakeDirError!void {
}
}
-/// Same as `mkdir` but the parameter is a null-terminated UTF8-encoded string.
+/// Same as `mkdir` but the parameter is null-terminated.
+/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `dir_path` should be encoded as valid UTF-8.
+/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn mkdirZ(dir_path: [*:0]const u8, mode: u32) MakeDirError!void {
if (builtin.os.tag == .windows) {
const dir_path_w = try windows.cStrToPrefixedFileW(null, dir_path);
@@ -3031,23 +3120,27 @@ pub fn mkdirZ(dir_path: [*:0]const u8, mode: u32) MakeDirError!void {
.NOSPC => return error.NoSpaceLeft,
.NOTDIR => return error.NotDir,
.ROFS => return error.ReadOnlyFileSystem,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
-/// Windows-only. Same as `mkdir` but the parameters is WTF16 encoded.
+/// Windows-only. Same as `mkdir` but the parameters is WTF16LE encoded.
pub fn mkdirW(dir_path_w: []const u16, mode: u32) MakeDirError!void {
_ = mode;
const sub_dir_handle = windows.OpenFile(dir_path_w, .{
.dir = std.fs.cwd().fd,
.access_mask = windows.GENERIC_READ | windows.SYNCHRONIZE,
.creation = windows.FILE_CREATE,
- .io_mode = .blocking,
.filter = .dir_only,
}) catch |err| switch (err) {
- error.IsDir => unreachable,
- error.PipeBusy => unreachable,
- error.WouldBlock => unreachable,
+ error.IsDir => return error.Unexpected,
+ error.PipeBusy => return error.Unexpected,
+ error.WouldBlock => return error.Unexpected,
+ error.AntivirusInterference => return error.Unexpected,
else => |e| return e,
};
windows.CloseHandle(sub_dir_handle);
@@ -3063,13 +3156,20 @@ pub const DeleteDirError = error{
NotDir,
DirNotEmpty,
ReadOnlyFileSystem,
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
BadPathName,
/// On Windows, `\\server` or `\\server\share` was not found.
NetworkNotFound,
} || UnexpectedError;
/// Deletes an empty directory.
+/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `dir_path` should be encoded as valid UTF-8.
+/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn rmdir(dir_path: []const u8) DeleteDirError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return unlinkat(wasi.AT.FDCWD, dir_path, AT.REMOVEDIR) catch |err| switch (err) {
@@ -3087,6 +3187,9 @@ pub fn rmdir(dir_path: []const u8) DeleteDirError!void {
}
/// Same as `rmdir` except the parameter is null-terminated.
+/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `dir_path` should be encoded as valid UTF-8.
+/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn rmdirZ(dir_path: [*:0]const u8) DeleteDirError!void {
if (builtin.os.tag == .windows) {
const dir_path_w = try windows.cStrToPrefixedFileW(null, dir_path);
@@ -3109,11 +3212,15 @@ pub fn rmdirZ(dir_path: [*:0]const u8) DeleteDirError!void {
.EXIST => return error.DirNotEmpty,
.NOTEMPTY => return error.DirNotEmpty,
.ROFS => return error.ReadOnlyFileSystem,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
-/// Windows-only. Same as `rmdir` except the parameter is WTF16 encoded.
+/// Windows-only. Same as `rmdir` except the parameter is WTF-16 LE encoded.
pub fn rmdirW(dir_path_w: []const u16) DeleteDirError!void {
return windows.DeleteFile(dir_path_w, .{ .dir = std.fs.cwd().fd, .remove_dir = true }) catch |err| switch (err) {
error.IsDir => unreachable,
@@ -3130,21 +3237,25 @@ pub const ChangeCurDirError = error{
SystemResources,
NotDir,
BadPathName,
-
- /// On Windows, file paths must be valid Unicode.
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
} || UnexpectedError;
/// Changes the current working directory of the calling process.
-/// `dir_path` is recommended to be a UTF-8 encoded string.
+/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `dir_path` should be encoded as valid UTF-8.
+/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn chdir(dir_path: []const u8) ChangeCurDirError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
@compileError("WASI does not support os.chdir");
} else if (builtin.os.tag == .windows) {
- var utf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;
- const len = try std.unicode.utf8ToUtf16Le(utf16_dir_path[0..], dir_path);
- if (len > utf16_dir_path.len) return error.NameTooLong;
- return chdirW(utf16_dir_path[0..len]);
+ var wtf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;
+ const len = try std.unicode.wtf8ToWtf16Le(wtf16_dir_path[0..], dir_path);
+ if (len > wtf16_dir_path.len) return error.NameTooLong;
+ return chdirW(wtf16_dir_path[0..len]);
} else {
const dir_path_c = try toPosixPath(dir_path);
return chdirZ(&dir_path_c);
@@ -3152,12 +3263,15 @@ pub fn chdir(dir_path: []const u8) ChangeCurDirError!void {
}
/// Same as `chdir` except the parameter is null-terminated.
+/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `dir_path` should be encoded as valid UTF-8.
+/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn chdirZ(dir_path: [*:0]const u8) ChangeCurDirError!void {
if (builtin.os.tag == .windows) {
- var utf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;
- const len = try std.unicode.utf8ToUtf16Le(utf16_dir_path[0..], mem.span(dir_path));
- if (len > utf16_dir_path.len) return error.NameTooLong;
- return chdirW(utf16_dir_path[0..len]);
+ var wtf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;
+ const len = try std.unicode.wtf8ToWtf16Le(wtf16_dir_path[0..], mem.span(dir_path));
+ if (len > wtf16_dir_path.len) return error.NameTooLong;
+ return chdirW(wtf16_dir_path[0..len]);
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
return chdir(mem.span(dir_path));
}
@@ -3171,11 +3285,15 @@ pub fn chdirZ(dir_path: [*:0]const u8) ChangeCurDirError!void {
.NOENT => return error.FileNotFound,
.NOMEM => return error.SystemResources,
.NOTDIR => return error.NotDir,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
-/// Windows-only. Same as `chdir` except the parameter is WTF16 encoded.
+/// Windows-only. Same as `chdir` except the parameter is WTF16 LE encoded.
pub fn chdirW(dir_path: []const u16) ChangeCurDirError!void {
windows.SetCurrentDirectory(dir_path) catch |err| switch (err) {
error.NoDevice => return error.FileSystem,
@@ -3215,7 +3333,11 @@ pub const ReadLinkError = error{
SystemResources,
NotLink,
NotDir,
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
BadPathName,
/// Windows-only. This error may occur if the opened reparse point is
/// of unsupported type.
@@ -3225,7 +3347,13 @@ pub const ReadLinkError = error{
} || UnexpectedError;
/// Read value of a symbolic link.
+/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `file_path` should be encoded as valid UTF-8.
+/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// The return value is a slice of `out_buffer` from index 0.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, the result is encoded as UTF-8.
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return readlinkat(wasi.AT.FDCWD, file_path, out_buffer);
@@ -3238,7 +3366,8 @@ pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {
}
}
-/// Windows-only. Same as `readlink` except `file_path` is WTF16 encoded.
+/// Windows-only. Same as `readlink` except `file_path` is WTF16 LE encoded.
+/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// See also `readlinkZ`.
pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
return windows.ReadLink(std.fs.cwd().fd, file_path, out_buffer);
@@ -3247,7 +3376,7 @@ pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
/// Same as `readlink` except `file_path` is null-terminated.
pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 {
if (builtin.os.tag == .windows) {
- const file_path_w = try windows.cStrToWin32PrefixedFileW(file_path);
+ const file_path_w = try windows.cStrToPrefixedFileW(null, file_path);
return readlinkW(file_path_w.span(), out_buffer);
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
return readlink(mem.sliceTo(file_path, 0), out_buffer);
@@ -3264,12 +3393,22 @@ pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8
.NOENT => return error.FileNotFound,
.NOMEM => return error.SystemResources,
.NOTDIR => return error.NotDir,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
/// Similar to `readlink` except reads value of a symbolink link **relative** to `dirfd` directory handle.
+/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `file_path` should be encoded as valid UTF-8.
+/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// The return value is a slice of `out_buffer` from index 0.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, the result is encoded as UTF-8.
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
/// See also `readlinkatWasi`, `realinkatZ` and `realinkatW`.
pub fn readlinkat(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
@@ -3299,11 +3438,13 @@ pub fn readlinkatWasi(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) Read
.NOMEM => return error.SystemResources,
.NOTDIR => return error.NotDir,
.NOTCAPABLE => return error.AccessDenied,
+ .ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
-/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 encoded.
+/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 LE encoded.
+/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// See also `readlinkat`.
pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
return windows.ReadLink(dirfd, file_path, out_buffer);
@@ -3330,6 +3471,10 @@ pub fn readlinkatZ(dirfd: fd_t, file_path: [*:0]const u8, out_buffer: []u8) Read
.NOENT => return error.FileNotFound,
.NOMEM => return error.SystemResources,
.NOTDIR => return error.NotDir,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -3412,20 +3557,16 @@ pub fn isatty(handle: fd_t) bool {
return system.isatty(handle) != 0;
}
if (builtin.os.tag == .wasi) {
- var statbuf: fdstat_t = undefined;
- const err = system.fd_fdstat_get(handle, &statbuf);
- if (err != .SUCCESS) {
- // errno = err;
+ var statbuf: wasi.fdstat_t = undefined;
+ const err = wasi.fd_fdstat_get(handle, &statbuf);
+ if (err != .SUCCESS)
return false;
- }
// A tty is a character device that we can't seek or tell on.
- if (statbuf.fs_filetype != .CHARACTER_DEVICE or
- (statbuf.fs_rights_base & (RIGHT.FD_SEEK | RIGHT.FD_TELL)) != 0)
- {
- // errno = ENOTTY;
+ if (statbuf.fs_filetype != .CHARACTER_DEVICE)
+ return false;
+ if (statbuf.fs_rights_base.FD_SEEK or statbuf.fs_rights_base.FD_TELL)
return false;
- }
return true;
}
@@ -3631,14 +3772,6 @@ pub fn shutdown(sock: socket_t, how: ShutdownHow) ShutdownError!void {
}
}
-pub fn closeSocket(sock: socket_t) void {
- if (builtin.os.tag == .windows) {
- windows.closesocket(sock) catch unreachable;
- } else {
- close(sock);
- }
-}
-
pub const BindError = error{
/// The address is protected, and the user is not the superuser.
/// For UNIX domain sockets: Search permission is denied on a component
@@ -3846,11 +3979,11 @@ pub fn accept(
/// will return a value greater than was supplied to the call.
addr_size: ?*socklen_t,
/// The following values can be bitwise ORed in flags to obtain different behavior:
- /// * `SOCK.NONBLOCK` - Set the `O.NONBLOCK` file status flag on the open file description (see `open`)
+ /// * `SOCK.NONBLOCK` - Set the `NONBLOCK` file status flag on the open file description (see `open`)
/// referred to by the new file descriptor. Using this flag saves extra calls to `fcntl` to achieve
/// the same result.
/// * `SOCK.CLOEXEC` - Set the close-on-exec (`FD_CLOEXEC`) flag on the new file descriptor. See the
- /// description of the `O.CLOEXEC` flag in `open` for reasons why this may be useful.
+ /// description of the `CLOEXEC` flag in `open` for reasons why this may be useful.
flags: u32,
) AcceptError!socket_t {
const have_accept4 = comptime !(builtin.target.isDarwin() or builtin.os.tag == .windows);
@@ -4286,16 +4419,7 @@ pub const FStatError = error{
/// Return information about a file descriptor.
pub fn fstat(fd: fd_t) FStatError!Stat {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
- var stat: wasi.filestat_t = undefined;
- switch (wasi.fd_filestat_get(fd, &stat)) {
- .SUCCESS => return Stat.fromFilestat(stat),
- .INVAL => unreachable,
- .BADF => unreachable, // Always a race condition.
- .NOMEM => return error.SystemResources,
- .ACCES => return error.AccessDenied,
- .NOTCAPABLE => return error.AccessDenied,
- else => |err| return unexpectedErrno(err),
- }
+ return Stat.fromFilestat(try fstat_wasi(fd));
}
if (builtin.os.tag == .windows) {
@compileError("fstat is not yet implemented on Windows");
@@ -4314,15 +4438,38 @@ pub fn fstat(fd: fd_t) FStatError!Stat {
}
}
-pub const FStatAtError = FStatError || error{ NameTooLong, FileNotFound, SymLinkLoop };
+pub fn fstat_wasi(fd: fd_t) FStatError!wasi.filestat_t {
+ var stat: wasi.filestat_t = undefined;
+ switch (wasi.fd_filestat_get(fd, &stat)) {
+ .SUCCESS => return stat,
+ .INVAL => unreachable,
+ .BADF => unreachable, // Always a race condition.
+ .NOMEM => return error.SystemResources,
+ .ACCES => return error.AccessDenied,
+ .NOTCAPABLE => return error.AccessDenied,
+ else => |err| return unexpectedErrno(err),
+ }
+}
+
+pub const FStatAtError = FStatError || error{
+ NameTooLong,
+ FileNotFound,
+ SymLinkLoop,
+ /// WASI-only; file paths must be valid UTF-8.
+ InvalidUtf8,
+};
/// Similar to `fstat`, but returns stat of a resource pointed to by `pathname`
/// which is relative to `dirfd` handle.
-/// See also `fstatatZ` and `fstatatWasi`.
+/// On WASI, `pathname` should be encoded as valid UTF-8.
+/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding.
+/// See also `fstatatZ` and `fstatat_wasi`.
pub fn fstatat(dirfd: fd_t, pathname: []const u8, flags: u32) FStatAtError!Stat {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
- const wasi_flags = if (flags & linux.AT.SYMLINK_NOFOLLOW == 0) wasi.LOOKUP_SYMLINK_FOLLOW else 0;
- return fstatatWasi(dirfd, pathname, wasi_flags);
+ const filestat = try fstatat_wasi(dirfd, pathname, .{
+ .SYMLINK_FOLLOW = (flags & AT.SYMLINK_NOFOLLOW) == 0,
+ });
+ return Stat.fromFilestat(filestat);
} else if (builtin.os.tag == .windows) {
@compileError("fstatat is not yet implemented on Windows");
} else {
@@ -4332,11 +4479,12 @@ pub fn fstatat(dirfd: fd_t, pathname: []const u8, flags: u32) FStatAtError!Stat
}
/// WASI-only. Same as `fstatat` but targeting WASI.
+/// `pathname` should be encoded as valid UTF-8.
/// See also `fstatat`.
-pub fn fstatatWasi(dirfd: fd_t, pathname: []const u8, flags: u32) FStatAtError!Stat {
+pub fn fstatat_wasi(dirfd: fd_t, pathname: []const u8, flags: wasi.lookupflags_t) FStatAtError!wasi.filestat_t {
var stat: wasi.filestat_t = undefined;
switch (wasi.path_filestat_get(dirfd, flags, pathname.ptr, pathname.len, &stat)) {
- .SUCCESS => return Stat.fromFilestat(stat),
+ .SUCCESS => return stat,
.INVAL => unreachable,
.BADF => unreachable, // Always a race condition.
.NOMEM => return error.SystemResources,
@@ -4346,6 +4494,7 @@ pub fn fstatatWasi(dirfd: fd_t, pathname: []const u8, flags: u32) FStatAtError!S
.NOENT => return error.FileNotFound,
.NOTDIR => return error.FileNotFound,
.NOTCAPABLE => return error.AccessDenied,
+ .ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
@@ -4354,7 +4503,10 @@ pub fn fstatatWasi(dirfd: fd_t, pathname: []const u8, flags: u32) FStatAtError!S
/// See also `fstatat`.
pub fn fstatatZ(dirfd: fd_t, pathname: [*:0]const u8, flags: u32) FStatAtError!Stat {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
- return fstatatWasi(dirfd, mem.sliceTo(pathname), flags);
+ const filestat = try fstatat_wasi(dirfd, mem.sliceTo(pathname, 0), .{
+ .SYMLINK_FOLLOW = (flags & AT.SYMLINK_NOFOLLOW) == 0,
+ });
+ return Stat.fromFilestat(filestat);
}
const fstatat_sym = if (lfs64_abi) system.fstatat64 else system.fstatat;
@@ -4372,6 +4524,10 @@ pub fn fstatatZ(dirfd: fd_t, pathname: [*:0]const u8, flags: u32) FStatAtError!S
.LOOP => return error.SymLinkLoop,
.NOENT => return error.FileNotFound,
.NOTDIR => return error.FileNotFound,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -4635,7 +4791,7 @@ pub const MMapError = error{
/// A file descriptor refers to a non-regular file. Or a file mapping was requested,
/// but the file descriptor is not open for reading. Or `MAP.SHARED` was requested
- /// and `PROT_WRITE` is set, but the file descriptor is not open in `O.RDWR` mode.
+ /// and `PROT_WRITE` is set, but the file descriptor is not open in `RDWR` mode.
/// Or `PROT_WRITE` is set, but the file is append-only.
AccessDenied,
@@ -4728,12 +4884,17 @@ pub const AccessError = error{
FileBusy,
SymLinkLoop,
ReadOnlyFileSystem,
-
- /// On Windows, file paths must be valid Unicode.
+ /// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
} || UnexpectedError;
/// check user's permissions for a file
+/// On Windows, `path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `path` should be encoded as valid UTF-8.
+/// On other platforms, `path` is an opaque sequence of bytes with no particular encoding.
/// TODO currently this assumes `mode` is `F.OK` on Windows.
pub fn access(path: []const u8, mode: u32) AccessError!void {
if (builtin.os.tag == .windows) {
@@ -4775,12 +4936,16 @@ pub fn accessZ(path: [*:0]const u8, mode: u32) AccessError!void {
.FAULT => unreachable,
.IO => return error.InputOutput,
.NOMEM => return error.SystemResources,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
-/// Call from Windows-specific code if you already have a UTF-16LE encoded, null terminated string.
-/// Otherwise use `access` or `accessC`.
+/// Call from Windows-specific code if you already have a WTF-16LE encoded, null terminated string.
+/// Otherwise use `access` or `accessZ`.
/// TODO currently this ignores `mode`.
pub fn accessW(path: [*:0]const u16, mode: u32) windows.GetFileAttributesError!void {
_ = mode;
@@ -4797,16 +4962,21 @@ pub fn accessW(path: [*:0]const u16, mode: u32) windows.GetFileAttributesError!v
}
/// Check user's permissions for a file, based on an open directory handle.
+/// On Windows, `path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On WASI, `path` should be encoded as valid UTF-8.
+/// On other platforms, `path` is an opaque sequence of bytes with no particular encoding.
/// TODO currently this ignores `mode` and `flags` on Windows.
pub fn faccessat(dirfd: fd_t, path: []const u8, mode: u32, flags: u32) AccessError!void {
if (builtin.os.tag == .windows) {
const path_w = try windows.sliceToPrefixedFileW(dirfd, path);
return faccessatW(dirfd, path_w.span().ptr, mode, flags);
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
- const resolved = RelativePathWasi{ .dir_fd = dirfd, .relative_path = path };
+ const resolved: RelativePathWasi = .{ .dir_fd = dirfd, .relative_path = path };
- const file = blk: {
- break :blk fstatat(dirfd, path, flags);
+ const st = blk: {
+ break :blk fstatat_wasi(dirfd, path, .{
+ .SYMLINK_FOLLOW = (flags & AT.SYMLINK_NOFOLLOW) == 0,
+ });
} catch |err| switch (err) {
error.AccessDenied => return error.PermissionDenied,
else => |e| return e,
@@ -4818,19 +4988,23 @@ pub fn faccessat(dirfd: fd_t, path: []const u8, mode: u32, flags: u32) AccessErr
return error.PermissionDenied;
}
- var rights: wasi.rights_t = 0;
+ var rights: wasi.rights_t = .{};
if (mode & R_OK != 0) {
- rights |= if (file.filetype == .DIRECTORY)
- wasi.RIGHT.FD_READDIR
- else
- wasi.RIGHT.FD_READ;
+ if (st.filetype == .DIRECTORY) {
+ rights.FD_READDIR = true;
+ } else {
+ rights.FD_READ = true;
+ }
}
if (mode & W_OK != 0) {
- rights |= wasi.RIGHT.FD_WRITE;
+ rights.FD_WRITE = true;
}
// No validation for X_OK
- if ((rights & directory.fs_rights_inheriting) != rights) {
+ // https://github.com/ziglang/zig/issues/18882
+ const rights_int: u64 = @bitCast(rights);
+ const inheriting_int: u64 = @bitCast(directory.fs_rights_inheriting);
+ if ((rights_int & inheriting_int) != rights_int) {
return error.PermissionDenied;
}
}
@@ -4861,6 +5035,10 @@ pub fn faccessatZ(dirfd: fd_t, path: [*:0]const u8, mode: u32, flags: u32) Acces
.FAULT => unreachable,
.IO => return error.InputOutput,
.NOMEM => return error.SystemResources,
+ .ILSEQ => |err| if (builtin.os.tag == .wasi)
+ return error.InvalidUtf8
+ else
+ return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -4923,7 +5101,7 @@ pub fn pipe() PipeError![2]fd_t {
}
}
-pub fn pipe2(flags: u32) PipeError![2]fd_t {
+pub fn pipe2(flags: O) PipeError![2]fd_t {
if (@hasDecl(system, "pipe2")) {
var fds: [2]fd_t = undefined;
switch (errno(system.pipe2(&fds, flags))) {
@@ -4942,12 +5120,13 @@ pub fn pipe2(flags: u32) PipeError![2]fd_t {
close(fds[1]);
}
- if (flags == 0)
+ // https://github.com/ziglang/zig/issues/18882
+ if (@as(u32, @bitCast(flags)) == 0)
return fds;
- // O.CLOEXEC is special, it's a file descriptor flag and must be set using
+ // CLOEXEC is special, it's a file descriptor flag and must be set using
// F.SETFD.
- if (flags & O.CLOEXEC != 0) {
+ if (flags.CLOEXEC) {
for (fds) |fd| {
switch (errno(system.fcntl(fd, F.SETFD, @as(u32, FD_CLOEXEC)))) {
.SUCCESS => {},
@@ -4958,7 +5137,11 @@ pub fn pipe2(flags: u32) PipeError![2]fd_t {
}
}
- const new_flags = flags & ~@as(u32, O.CLOEXEC);
+ const new_flags: u32 = f: {
+ var new_flags = flags;
+ new_flags.CLOEXEC = false;
+ break :f @bitCast(new_flags);
+ };
// Set every other flag affecting the file status using F.SETFL.
if (new_flags != 0) {
for (fds) |fd| {
@@ -5297,7 +5480,7 @@ fn setSockFlags(sock: socket_t, flags: u32) !void {
error.LockedRegionLimitExceeded => unreachable,
else => |e| return e,
};
- fl_flags |= O.NONBLOCK;
+ fl_flags |= 1 << @bitOffsetOf(O, "NONBLOCK");
_ = fcntl(sock, F.SETFL, fl_flags) catch |err| switch (err) {
error.FileBusy => unreachable,
error.Locked => unreachable,
@@ -5363,20 +5546,37 @@ pub const RealPathError = error{
/// On WASI, the current CWD may not be associated with an absolute path.
InvalidHandle,
- /// On Windows, file paths must be valid Unicode.
- InvalidUtf8,
+ /// Windows-only; file paths provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
/// On Windows, `\\server` or `\\server\share` was not found.
NetworkNotFound,
PathAlreadyExists,
+
+ /// On Windows, antivirus software is enabled by default. It can be
+ /// disabled, but Windows Update sometimes ignores the user's preference
+ /// and re-enables it. When enabled, antivirus software on Windows
+ /// intercepts file system operations and makes them significantly slower
+ /// in addition to possibly failing with this error code.
+ AntivirusInterference,
+
+ /// On Windows, the volume does not contain a recognized file system. File
+ /// system drivers might not be loaded, or the volume may be corrupt.
+ UnrecognizedVolume,
} || UnexpectedError;
/// Return the canonicalized absolute pathname.
/// Expands all symbolic links and resolves references to `.`, `..`, and
/// extra `/` characters in `pathname`.
+/// On Windows, `pathname` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding.
/// The return value is a slice of `out_buffer`, but not necessarily from the beginning.
/// See also `realpathZ` and `realpathW`.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
+/// Calling this function is usually a bug.
pub fn realpath(pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
if (builtin.os.tag == .windows) {
const pathname_w = try windows.sliceToPrefixedFileW(null, pathname);
@@ -5389,6 +5589,7 @@ pub fn realpath(pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathE
}
/// Same as `realpath` except `pathname` is null-terminated.
+/// Calling this function is usually a bug.
pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
if (builtin.os.tag == .windows) {
const pathname_w = try windows.cStrToPrefixedFileW(null, pathname);
@@ -5397,12 +5598,23 @@ pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealP
return realpath(mem.sliceTo(pathname, 0), out_buffer);
}
if (!builtin.link_libc) {
- const flags = if (builtin.os.tag == .linux) O.PATH | O.NONBLOCK | O.CLOEXEC else O.NONBLOCK | O.CLOEXEC;
+ const flags: O = switch (builtin.os.tag) {
+ .linux => .{
+ .NONBLOCK = true,
+ .CLOEXEC = true,
+ .PATH = true,
+ },
+ else => .{
+ .NONBLOCK = true,
+ .CLOEXEC = true,
+ },
+ };
const fd = openZ(pathname, flags, 0) catch |err| switch (err) {
error.FileLocksNotSupported => unreachable,
error.WouldBlock => unreachable,
error.FileBusy => unreachable, // not asking for write permissions
error.InvalidHandle => unreachable, // WASI-only
+ error.InvalidUtf8 => unreachable, // WASI-only
else => |e| return e,
};
defer close(fd);
@@ -5426,7 +5638,9 @@ pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealP
return mem.sliceTo(result_path, 0);
}
-/// Same as `realpath` except `pathname` is UTF16LE-encoded.
+/// Same as `realpath` except `pathname` is WTF16LE-encoded.
+/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// Calling this function is usually a bug.
pub fn realpathW(pathname: []const u16, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
const w = windows;
@@ -5440,7 +5654,6 @@ pub fn realpathW(pathname: []const u16, out_buffer: *[MAX_PATH_BYTES]u8) RealPat
.access_mask = access_mask,
.share_access = share_access,
.creation = creation,
- .io_mode = .blocking,
.filter = .any,
}) catch |err| switch (err) {
error.WouldBlock => unreachable,
@@ -5455,15 +5668,17 @@ pub fn realpathW(pathname: []const u16, out_buffer: *[MAX_PATH_BYTES]u8) RealPat
pub fn isGetFdPathSupportedOnTarget(os: std.Target.Os) bool {
return switch (os.tag) {
- // zig fmt: off
.windows,
- .macos, .ios, .watchos, .tvos,
+ .macos,
+ .ios,
+ .watchos,
+ .tvos,
.linux,
.solaris,
.illumos,
.freebsd,
=> true,
- // zig fmt: on
+
.dragonfly => os.version_range.semver.max.order(.{ .major = 6, .minor = 0, .patch = 0 }) != .lt,
.netbsd => os.version_range.semver.max.order(.{ .major = 10, .minor = 0, .patch = 0 }) != .lt,
else => false,
@@ -5474,6 +5689,9 @@ pub fn isGetFdPathSupportedOnTarget(os: std.Target.Os) bool {
/// This function is very host-specific and is not universally supported by all hosts.
/// For example, while it generally works on Linux, macOS, FreeBSD or Windows, it is
/// unsupported on WASI.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
+/// Calling this function is usually a bug.
pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
if (!comptime isGetFdPathSupportedOnTarget(builtin.os)) {
@compileError("querying for canonical path of a handle is unsupported on this host");
@@ -5483,8 +5701,7 @@ pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
var wide_buf: [windows.PATH_MAX_WIDE]u16 = undefined;
const wide_slice = try windows.GetFinalPathNameByHandle(fd, .{}, wide_buf[0..]);
- // Trust that Windows gives us valid UTF-16LE.
- const end_index = std.unicode.utf16leToUtf8(out_buffer, wide_slice) catch unreachable;
+ const end_index = std.unicode.wtf16LeToWtf8(out_buffer, wide_slice);
return out_buffer[0..end_index];
},
.macos, .ios, .watchos, .tvos => {
@@ -5508,8 +5725,12 @@ pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
const target = readlinkZ(proc_path, out_buffer) catch |err| {
switch (err) {
- error.UnsupportedReparsePointType => unreachable, // Windows only,
error.NotLink => unreachable,
+ error.BadPathName => unreachable,
+ error.InvalidUtf8 => unreachable, // WASI-only
+ error.InvalidWtf8 => unreachable, // Windows-only
+ error.UnsupportedReparsePointType => unreachable, // Windows-only
+ error.NetworkNotFound => unreachable, // Windows-only
else => |e| return e,
}
};
@@ -5902,7 +6123,10 @@ pub fn futimens(fd: fd_t, times: *const [2]timespec) FutimensError!void {
// this here, but we should really handle it somehow.
const atim = times[0].toTimestamp();
const mtim = times[1].toTimestamp();
- switch (wasi.fd_filestat_set_times(fd, atim, mtim, wasi.FILESTAT_SET_ATIM | wasi.FILESTAT_SET_MTIM)) {
+ switch (wasi.fd_filestat_set_times(fd, atim, mtim, .{
+ .ATIM = true,
+ .MTIM = true,
+ })) {
.SUCCESS => return,
.ACCES => return error.AccessDenied,
.PERM => return error.PermissionDenied,
@@ -6399,17 +6623,12 @@ pub fn sendfile(
// * Descriptor is not valid or locked
// * an mmap(2)-like operation is not available for in_fd
// * count is negative
- // * out_fd has the O.APPEND flag set
+ // * out_fd has the APPEND flag set
// Because of the "mmap(2)-like operation" possibility, we fall back to doing read/write
// manually, the same as ENOSYS.
break :sf;
},
- .AGAIN => if (std.event.Loop.instance) |loop| {
- loop.waitUntilFdWritable(out_fd);
- continue;
- } else {
- return error.WouldBlock;
- },
+ .AGAIN => return error.WouldBlock,
.IO => return error.InputOutput,
.PIPE => return error.BrokenPipe,
.NOMEM => return error.SystemResources,
@@ -6476,18 +6695,12 @@ pub fn sendfile(
.AGAIN => if (amt != 0) {
return amt;
- } else if (std.event.Loop.instance) |loop| {
- loop.waitUntilFdWritable(out_fd);
- continue;
} else {
return error.WouldBlock;
},
.BUSY => if (amt != 0) {
return amt;
- } else if (std.event.Loop.instance) |loop| {
- loop.waitUntilFdReadable(in_fd);
- continue;
} else {
return error.WouldBlock;
},
@@ -6550,9 +6763,6 @@ pub fn sendfile(
.AGAIN => if (amt != 0) {
return amt;
- } else if (std.event.Loop.instance) |loop| {
- loop.waitUntilFdWritable(out_fd);
- continue;
} else {
return error.WouldBlock;
},
@@ -6611,7 +6821,7 @@ pub const CopyFileRangeError = error{
FileTooBig,
InputOutput,
/// `fd_in` is not open for reading; or `fd_out` is not open for writing;
- /// or the `O.APPEND` flag is set for `fd_out`.
+ /// or the `APPEND` flag is set for `fd_out`.
FilesOpenedWithWrongFlags,
IsDir,
OutOfMemory,
@@ -7448,7 +7658,7 @@ pub const TimerFdCreateError = error{
pub const TimerFdGetError = error{InvalidHandle} || UnexpectedError;
pub const TimerFdSetError = TimerFdGetError || error{Canceled};
-pub fn timerfd_create(clokid: i32, flags: u32) TimerFdCreateError!fd_t {
+pub fn timerfd_create(clokid: i32, flags: linux.TFD) TimerFdCreateError!fd_t {
const rc = linux.timerfd_create(clokid, flags);
return switch (errno(rc)) {
.SUCCESS => @as(fd_t, @intCast(rc)),
@@ -7462,7 +7672,12 @@ pub fn timerfd_create(clokid: i32, flags: u32) TimerFdCreateError!fd_t {
};
}
-pub fn timerfd_settime(fd: i32, flags: u32, new_value: *const linux.itimerspec, old_value: ?*linux.itimerspec) TimerFdSetError!void {
+pub fn timerfd_settime(
+ fd: i32,
+ flags: linux.TFD.TIMER,
+ new_value: *const linux.itimerspec,
+ old_value: ?*linux.itimerspec,
+) TimerFdSetError!void {
const rc = linux.timerfd_settime(fd, flags, new_value, old_value);
return switch (errno(rc)) {
.SUCCESS => {},
diff --git a/lib/std/os/emscripten.zig b/lib/std/os/emscripten.zig
index 883136b39a..6b8fa6ff33 100644
--- a/lib/std/os/emscripten.zig
+++ b/lib/std/os/emscripten.zig
@@ -127,20 +127,6 @@ pub const AF = struct {
pub const MAX = PF.MAX;
};
-pub const AT = struct {
- pub const FDCWD = -100;
- pub const SYMLINK_NOFOLLOW = 0x100;
- pub const REMOVEDIR = 0x200;
- pub const SYMLINK_FOLLOW = 0x400;
- pub const NO_AUTOMOUNT = 0x800;
- pub const EMPTY_PATH = 0x1000;
- pub const STATX_SYNC_TYPE = 0x6000;
- pub const STATX_SYNC_AS_STAT = 0x0000;
- pub const STATX_FORCE_SYNC = 0x2000;
- pub const STATX_DONT_SYNC = 0x4000;
- pub const RECURSIVE = 0x8000;
-};
-
pub const CLOCK = struct {
pub const REALTIME = 0;
pub const MONOTONIC = 1;
@@ -169,85 +155,85 @@ pub fn CPU_COUNT(set: cpu_set_t) cpu_count_t {
}
pub const E = enum(u16) {
- SUCCESS = @intFromEnum(wasi.E.SUCCESS),
- @"2BIG" = @intFromEnum(wasi.E.@"2BIG"),
- ACCES = @intFromEnum(wasi.E.ACCES),
- ADDRINUSE = @intFromEnum(wasi.E.ADDRINUSE),
- ADDRNOTAVAIL = @intFromEnum(wasi.E.ADDRNOTAVAIL),
- AFNOSUPPORT = @intFromEnum(wasi.E.AFNOSUPPORT),
+ SUCCESS = @intFromEnum(wasi.errno_t.SUCCESS),
+ @"2BIG" = @intFromEnum(wasi.errno_t.@"2BIG"),
+ ACCES = @intFromEnum(wasi.errno_t.ACCES),
+ ADDRINUSE = @intFromEnum(wasi.errno_t.ADDRINUSE),
+ ADDRNOTAVAIL = @intFromEnum(wasi.errno_t.ADDRNOTAVAIL),
+ AFNOSUPPORT = @intFromEnum(wasi.errno_t.AFNOSUPPORT),
/// This is also the error code used for `WOULDBLOCK`.
- AGAIN = @intFromEnum(wasi.E.AGAIN),
- ALREADY = @intFromEnum(wasi.E.ALREADY),
- BADF = @intFromEnum(wasi.E.BADF),
- BADMSG = @intFromEnum(wasi.E.BADMSG),
- BUSY = @intFromEnum(wasi.E.BUSY),
- CANCELED = @intFromEnum(wasi.E.CANCELED),
- CHILD = @intFromEnum(wasi.E.CHILD),
- CONNABORTED = @intFromEnum(wasi.E.CONNABORTED),
- CONNREFUSED = @intFromEnum(wasi.E.CONNREFUSED),
- CONNRESET = @intFromEnum(wasi.E.CONNRESET),
- DEADLK = @intFromEnum(wasi.E.DEADLK),
- DESTADDRREQ = @intFromEnum(wasi.E.DESTADDRREQ),
- DOM = @intFromEnum(wasi.E.DOM),
- DQUOT = @intFromEnum(wasi.E.DQUOT),
- EXIST = @intFromEnum(wasi.E.EXIST),
- FAULT = @intFromEnum(wasi.E.FAULT),
- FBIG = @intFromEnum(wasi.E.FBIG),
- HOSTUNREACH = @intFromEnum(wasi.E.HOSTUNREACH),
- IDRM = @intFromEnum(wasi.E.IDRM),
- ILSEQ = @intFromEnum(wasi.E.ILSEQ),
- INPROGRESS = @intFromEnum(wasi.E.INPROGRESS),
- INTR = @intFromEnum(wasi.E.INTR),
- INVAL = @intFromEnum(wasi.E.INVAL),
- IO = @intFromEnum(wasi.E.IO),
- ISCONN = @intFromEnum(wasi.E.ISCONN),
- ISDIR = @intFromEnum(wasi.E.ISDIR),
- LOOP = @intFromEnum(wasi.E.LOOP),
- MFILE = @intFromEnum(wasi.E.MFILE),
- MLINK = @intFromEnum(wasi.E.MLINK),
- MSGSIZE = @intFromEnum(wasi.E.MSGSIZE),
- MULTIHOP = @intFromEnum(wasi.E.MULTIHOP),
- NAMETOOLONG = @intFromEnum(wasi.E.NAMETOOLONG),
- NETDOWN = @intFromEnum(wasi.E.NETDOWN),
- NETRESET = @intFromEnum(wasi.E.NETRESET),
- NETUNREACH = @intFromEnum(wasi.E.NETUNREACH),
- NFILE = @intFromEnum(wasi.E.NFILE),
- NOBUFS = @intFromEnum(wasi.E.NOBUFS),
- NODEV = @intFromEnum(wasi.E.NODEV),
- NOENT = @intFromEnum(wasi.E.NOENT),
- NOEXEC = @intFromEnum(wasi.E.NOEXEC),
- NOLCK = @intFromEnum(wasi.E.NOLCK),
- NOLINK = @intFromEnum(wasi.E.NOLINK),
- NOMEM = @intFromEnum(wasi.E.NOMEM),
- NOMSG = @intFromEnum(wasi.E.NOMSG),
- NOPROTOOPT = @intFromEnum(wasi.E.NOPROTOOPT),
- NOSPC = @intFromEnum(wasi.E.NOSPC),
- NOSYS = @intFromEnum(wasi.E.NOSYS),
- NOTCONN = @intFromEnum(wasi.E.NOTCONN),
- NOTDIR = @intFromEnum(wasi.E.NOTDIR),
- NOTEMPTY = @intFromEnum(wasi.E.NOTEMPTY),
- NOTRECOVERABLE = @intFromEnum(wasi.E.NOTRECOVERABLE),
- NOTSOCK = @intFromEnum(wasi.E.NOTSOCK),
+ AGAIN = @intFromEnum(wasi.errno_t.AGAIN),
+ ALREADY = @intFromEnum(wasi.errno_t.ALREADY),
+ BADF = @intFromEnum(wasi.errno_t.BADF),
+ BADMSG = @intFromEnum(wasi.errno_t.BADMSG),
+ BUSY = @intFromEnum(wasi.errno_t.BUSY),
+ CANCELED = @intFromEnum(wasi.errno_t.CANCELED),
+ CHILD = @intFromEnum(wasi.errno_t.CHILD),
+ CONNABORTED = @intFromEnum(wasi.errno_t.CONNABORTED),
+ CONNREFUSED = @intFromEnum(wasi.errno_t.CONNREFUSED),
+ CONNRESET = @intFromEnum(wasi.errno_t.CONNRESET),
+ DEADLK = @intFromEnum(wasi.errno_t.DEADLK),
+ DESTADDRREQ = @intFromEnum(wasi.errno_t.DESTADDRREQ),
+ DOM = @intFromEnum(wasi.errno_t.DOM),
+ DQUOT = @intFromEnum(wasi.errno_t.DQUOT),
+ EXIST = @intFromEnum(wasi.errno_t.EXIST),
+ FAULT = @intFromEnum(wasi.errno_t.FAULT),
+ FBIG = @intFromEnum(wasi.errno_t.FBIG),
+ HOSTUNREACH = @intFromEnum(wasi.errno_t.HOSTUNREACH),
+ IDRM = @intFromEnum(wasi.errno_t.IDRM),
+ ILSEQ = @intFromEnum(wasi.errno_t.ILSEQ),
+ INPROGRESS = @intFromEnum(wasi.errno_t.INPROGRESS),
+ INTR = @intFromEnum(wasi.errno_t.INTR),
+ INVAL = @intFromEnum(wasi.errno_t.INVAL),
+ IO = @intFromEnum(wasi.errno_t.IO),
+ ISCONN = @intFromEnum(wasi.errno_t.ISCONN),
+ ISDIR = @intFromEnum(wasi.errno_t.ISDIR),
+ LOOP = @intFromEnum(wasi.errno_t.LOOP),
+ MFILE = @intFromEnum(wasi.errno_t.MFILE),
+ MLINK = @intFromEnum(wasi.errno_t.MLINK),
+ MSGSIZE = @intFromEnum(wasi.errno_t.MSGSIZE),
+ MULTIHOP = @intFromEnum(wasi.errno_t.MULTIHOP),
+ NAMETOOLONG = @intFromEnum(wasi.errno_t.NAMETOOLONG),
+ NETDOWN = @intFromEnum(wasi.errno_t.NETDOWN),
+ NETRESET = @intFromEnum(wasi.errno_t.NETRESET),
+ NETUNREACH = @intFromEnum(wasi.errno_t.NETUNREACH),
+ NFILE = @intFromEnum(wasi.errno_t.NFILE),
+ NOBUFS = @intFromEnum(wasi.errno_t.NOBUFS),
+ NODEV = @intFromEnum(wasi.errno_t.NODEV),
+ NOENT = @intFromEnum(wasi.errno_t.NOENT),
+ NOEXEC = @intFromEnum(wasi.errno_t.NOEXEC),
+ NOLCK = @intFromEnum(wasi.errno_t.NOLCK),
+ NOLINK = @intFromEnum(wasi.errno_t.NOLINK),
+ NOMEM = @intFromEnum(wasi.errno_t.NOMEM),
+ NOMSG = @intFromEnum(wasi.errno_t.NOMSG),
+ NOPROTOOPT = @intFromEnum(wasi.errno_t.NOPROTOOPT),
+ NOSPC = @intFromEnum(wasi.errno_t.NOSPC),
+ NOSYS = @intFromEnum(wasi.errno_t.NOSYS),
+ NOTCONN = @intFromEnum(wasi.errno_t.NOTCONN),
+ NOTDIR = @intFromEnum(wasi.errno_t.NOTDIR),
+ NOTEMPTY = @intFromEnum(wasi.errno_t.NOTEMPTY),
+ NOTRECOVERABLE = @intFromEnum(wasi.errno_t.NOTRECOVERABLE),
+ NOTSOCK = @intFromEnum(wasi.errno_t.NOTSOCK),
/// This is also the code used for `NOTSUP`.
- OPNOTSUPP = @intFromEnum(wasi.E.OPNOTSUPP),
- NOTTY = @intFromEnum(wasi.E.NOTTY),
- NXIO = @intFromEnum(wasi.E.NXIO),
- OVERFLOW = @intFromEnum(wasi.E.OVERFLOW),
- OWNERDEAD = @intFromEnum(wasi.E.OWNERDEAD),
- PERM = @intFromEnum(wasi.E.PERM),
- PIPE = @intFromEnum(wasi.E.PIPE),
- PROTO = @intFromEnum(wasi.E.PROTO),
- PROTONOSUPPORT = @intFromEnum(wasi.E.PROTONOSUPPORT),
- PROTOTYPE = @intFromEnum(wasi.E.PROTOTYPE),
- RANGE = @intFromEnum(wasi.E.RANGE),
- ROFS = @intFromEnum(wasi.E.ROFS),
- SPIPE = @intFromEnum(wasi.E.SPIPE),
- SRCH = @intFromEnum(wasi.E.SRCH),
- STALE = @intFromEnum(wasi.E.STALE),
- TIMEDOUT = @intFromEnum(wasi.E.TIMEDOUT),
- TXTBSY = @intFromEnum(wasi.E.TXTBSY),
- XDEV = @intFromEnum(wasi.E.XDEV),
- NOTCAPABLE = @intFromEnum(wasi.E.NOTCAPABLE),
+ OPNOTSUPP = @intFromEnum(wasi.errno_t.OPNOTSUPP),
+ NOTTY = @intFromEnum(wasi.errno_t.NOTTY),
+ NXIO = @intFromEnum(wasi.errno_t.NXIO),
+ OVERFLOW = @intFromEnum(wasi.errno_t.OVERFLOW),
+ OWNERDEAD = @intFromEnum(wasi.errno_t.OWNERDEAD),
+ PERM = @intFromEnum(wasi.errno_t.PERM),
+ PIPE = @intFromEnum(wasi.errno_t.PIPE),
+ PROTO = @intFromEnum(wasi.errno_t.PROTO),
+ PROTONOSUPPORT = @intFromEnum(wasi.errno_t.PROTONOSUPPORT),
+ PROTOTYPE = @intFromEnum(wasi.errno_t.PROTOTYPE),
+ RANGE = @intFromEnum(wasi.errno_t.RANGE),
+ ROFS = @intFromEnum(wasi.errno_t.ROFS),
+ SPIPE = @intFromEnum(wasi.errno_t.SPIPE),
+ SRCH = @intFromEnum(wasi.errno_t.SRCH),
+ STALE = @intFromEnum(wasi.errno_t.STALE),
+ TIMEDOUT = @intFromEnum(wasi.errno_t.TIMEDOUT),
+ TXTBSY = @intFromEnum(wasi.errno_t.TXTBSY),
+ XDEV = @intFromEnum(wasi.errno_t.XDEV),
+ NOTCAPABLE = @intFromEnum(wasi.errno_t.NOTCAPABLE),
ENOSTR = 100,
EBFONT = 101,
@@ -479,33 +465,6 @@ pub const MSG = struct {
pub const CMSG_CLOEXEC = 0x40000000;
};
-pub const O = struct {
- pub const RDONLY = 0o0;
- pub const WRONLY = 0o1;
- pub const RDWR = 0o2;
-
- pub const CREAT = 0o100;
- pub const EXCL = 0o200;
- pub const NOCTTY = 0o400;
- pub const TRUNC = 0o1000;
- pub const APPEND = 0o2000;
- pub const NONBLOCK = 0o4000;
- pub const DSYNC = 0o10000;
- pub const SYNC = 0o4010000;
- pub const RSYNC = 0o4010000;
- pub const DIRECTORY = 0o200000;
- pub const NOFOLLOW = 0o400000;
- pub const CLOEXEC = 0o2000000;
-
- pub const ASYNC = 0o20000;
- pub const DIRECT = 0o40000;
- pub const LARGEFILE = 0o100000;
- pub const NOATIME = 0o1000000;
- pub const PATH = 0o10000000;
- pub const TMPFILE = 0o20200000;
- pub const NDELAY = NONBLOCK;
-};
-
pub const POLL = struct {
pub const IN = 0x001;
pub const PRI = 0x002;
@@ -1139,23 +1098,6 @@ pub const stack_t = extern struct {
size: usize,
};
-pub const cc_t = u8;
-pub const speed_t = u32;
-pub const tcflag_t = u32;
-
-pub const NCCS = 32;
-
-pub const termios = extern struct {
- iflag: tcflag_t,
- oflag: tcflag_t,
- cflag: tcflag_t,
- lflag: tcflag_t,
- line: cc_t,
- cc: [NCCS]cc_t,
- ispeed: speed_t,
- ospeed: speed_t,
-};
-
pub const timespec = extern struct {
tv_sec: time_t,
tv_nsec: isize,
diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig
index 0a6e4fee1d..04676c3477 100644
--- a/lib/std/os/linux.zig
+++ b/lib/std/os/linux.zig
@@ -20,6 +20,7 @@ const is_ppc64 = native_arch.isPPC64();
const is_sparc = native_arch.isSPARC();
const iovec = std.os.iovec;
const iovec_const = std.os.iovec_const;
+const ACCMODE = std.os.ACCMODE;
test {
if (builtin.os.tag == .linux) {
@@ -241,12 +242,145 @@ pub const MAP = switch (native_arch) {
else => @compileError("missing std.os.linux.MAP constants for this architecture"),
};
-pub const O = struct {
- pub usingnamespace arch_bits.O;
-
- pub const RDONLY = 0o0;
- pub const WRONLY = 0o1;
- pub const RDWR = 0o2;
+pub const O = switch (native_arch) {
+ .x86_64 => packed struct(u32) {
+ ACCMODE: ACCMODE = .RDONLY,
+ _2: u4 = 0,
+ CREAT: bool = false,
+ EXCL: bool = false,
+ NOCTTY: bool = false,
+ TRUNC: bool = false,
+ APPEND: bool = false,
+ NONBLOCK: bool = false,
+ DSYNC: bool = false,
+ ASYNC: bool = false,
+ DIRECT: bool = false,
+ _15: u1 = 0,
+ DIRECTORY: bool = false,
+ NOFOLLOW: bool = false,
+ NOATIME: bool = false,
+ CLOEXEC: bool = false,
+ SYNC: bool = false,
+ PATH: bool = false,
+ TMPFILE: bool = false,
+ _: u9 = 0,
+ },
+ .x86, .riscv64 => packed struct(u32) {
+ ACCMODE: ACCMODE = .RDONLY,
+ _2: u4 = 0,
+ CREAT: bool = false,
+ EXCL: bool = false,
+ NOCTTY: bool = false,
+ TRUNC: bool = false,
+ APPEND: bool = false,
+ NONBLOCK: bool = false,
+ DSYNC: bool = false,
+ ASYNC: bool = false,
+ DIRECT: bool = false,
+ LARGEFILE: bool = false,
+ DIRECTORY: bool = false,
+ NOFOLLOW: bool = false,
+ NOATIME: bool = false,
+ CLOEXEC: bool = false,
+ SYNC: bool = false,
+ PATH: bool = false,
+ TMPFILE: bool = false,
+ _: u9 = 0,
+ },
+ .aarch64, .aarch64_be, .arm, .thumb => packed struct(u32) {
+ ACCMODE: ACCMODE = .RDONLY,
+ _2: u4 = 0,
+ CREAT: bool = false,
+ EXCL: bool = false,
+ NOCTTY: bool = false,
+ TRUNC: bool = false,
+ APPEND: bool = false,
+ NONBLOCK: bool = false,
+ DSYNC: bool = false,
+ ASYNC: bool = false,
+ DIRECTORY: bool = false,
+ NOFOLLOW: bool = false,
+ DIRECT: bool = false,
+ LARGEFILE: bool = false,
+ NOATIME: bool = false,
+ CLOEXEC: bool = false,
+ SYNC: bool = false,
+ PATH: bool = false,
+ TMPFILE: bool = false,
+ _: u9 = 0,
+ },
+ .sparc64 => packed struct(u32) {
+ ACCMODE: ACCMODE = .RDONLY,
+ _2: u1 = 0,
+ APPEND: bool = false,
+ _4: u2 = 0,
+ ASYNC: bool = false,
+ _7: u2 = 0,
+ CREAT: bool = false,
+ TRUNC: bool = false,
+ EXCL: bool = false,
+ _12: u1 = 0,
+ DSYNC: bool = false,
+ NONBLOCK: bool = false,
+ NOCTTY: bool = false,
+ DIRECTORY: bool = false,
+ NOFOLLOW: bool = false,
+ _18: u2 = 0,
+ DIRECT: bool = false,
+ NOATIME: bool = false,
+ CLOEXEC: bool = false,
+ SYNC: bool = false,
+ PATH: bool = false,
+ TMPFILE: bool = false,
+ _: u6 = 0,
+ },
+ .mips, .mipsel, .mips64, .mips64el => packed struct(u32) {
+ ACCMODE: ACCMODE = .RDONLY,
+ _2: u1 = 0,
+ APPEND: bool = false,
+ DSYNC: bool = false,
+ _5: u2 = 0,
+ NONBLOCK: bool = false,
+ CREAT: bool = false,
+ TRUNC: bool = false,
+ EXCL: bool = false,
+ NOCTTY: bool = false,
+ ASYNC: bool = false,
+ LARGEFILE: bool = false,
+ SYNC: bool = false,
+ DIRECT: bool = false,
+ DIRECTORY: bool = false,
+ NOFOLLOW: bool = false,
+ NOATIME: bool = false,
+ CLOEXEC: bool = false,
+ _20: u1 = 0,
+ PATH: bool = false,
+ TMPFILE: bool = false,
+ _: u9 = 0,
+ },
+ .powerpc, .powerpcle, .powerpc64, .powerpc64le => packed struct(u32) {
+ ACCMODE: ACCMODE = .RDONLY,
+ _2: u4 = 0,
+ CREAT: bool = false,
+ EXCL: bool = false,
+ NOCTTY: bool = false,
+ TRUNC: bool = false,
+ APPEND: bool = false,
+ NONBLOCK: bool = false,
+ DSYNC: bool = false,
+ ASYNC: bool = false,
+ DIRECTORY: bool = false,
+ NOFOLLOW: bool = false,
+ LARGEFILE: bool = false,
+ DIRECT: bool = false,
+ NOATIME: bool = false,
+ CLOEXEC: bool = false,
+ SYNC: bool = false,
+ PATH: bool = false,
+ TMPFILE: bool = false,
+ _: u9 = 0,
+ },
+ else => @compileError("missing std.os.linux.O constants for this architecture"),
};
pub usingnamespace @import("linux/io_uring.zig");
@@ -620,20 +754,20 @@ pub fn umount2(special: [*:0]const u8, flags: u32) usize {
return syscall2(.umount2, @intFromPtr(special), flags);
}
-pub fn mmap(address: ?[*]u8, length: usize, prot: usize, flags: u32, fd: i32, offset: i64) usize {
+pub fn mmap(address: ?[*]u8, length: usize, prot: usize, flags: MAP, fd: i32, offset: i64) usize {
if (@hasField(SYS, "mmap2")) {
// Make sure the offset is also specified in multiples of page size
if ((offset & (MMAP2_UNIT - 1)) != 0)
- return @as(usize, @bitCast(-@as(isize, @intFromEnum(E.INVAL))));
+ return @bitCast(-@as(isize, @intFromEnum(E.INVAL)));
return syscall6(
.mmap2,
@intFromPtr(address),
length,
prot,
- flags,
- @as(usize, @bitCast(@as(isize, fd))),
- @as(usize, @truncate(@as(u64, @bitCast(offset)) / MMAP2_UNIT)),
+ @as(u32, @bitCast(flags)),
+ @bitCast(@as(isize, fd)),
+ @truncate(@as(u64, @bitCast(offset)) / MMAP2_UNIT),
);
} else {
return syscall6(
@@ -641,8 +775,8 @@ pub fn mmap(address: ?[*]u8, length: usize, prot: usize, flags: u32, fd: i32, of
@intFromPtr(address),
length,
prot,
- flags,
- @as(usize, @bitCast(@as(isize, fd))),
+ @as(u32, @bitCast(flags)),
+ @bitCast(@as(isize, fd)),
@as(u64, @bitCast(offset)),
);
}
@@ -840,12 +974,12 @@ pub fn pipe(fd: *[2]i32) usize {
}
}
-pub fn pipe2(fd: *[2]i32, flags: u32) usize {
- return syscall2(.pipe2, @intFromPtr(fd), flags);
+pub fn pipe2(fd: *[2]i32, flags: O) usize {
+ return syscall2(.pipe2, @intFromPtr(fd), @as(u32, @bitCast(flags)));
}
pub fn write(fd: i32, buf: [*]const u8, count: usize) usize {
- return syscall3(.write, @as(usize, @bitCast(@as(isize, fd))), @intFromPtr(buf), count);
+ return syscall3(.write, @bitCast(@as(isize, fd)), @intFromPtr(buf), count);
}
pub fn ftruncate(fd: i32, length: i64) usize {
@@ -958,15 +1092,15 @@ pub fn renameat2(oldfd: i32, oldpath: [*:0]const u8, newfd: i32, newpath: [*:0]c
);
}
-pub fn open(path: [*:0]const u8, flags: u32, perm: mode_t) usize {
+pub fn open(path: [*:0]const u8, flags: O, perm: mode_t) usize {
if (@hasField(SYS, "open")) {
- return syscall3(.open, @intFromPtr(path), flags, perm);
+ return syscall3(.open, @intFromPtr(path), @as(u32, @bitCast(flags)), perm);
} else {
return syscall4(
.openat,
- @as(usize, @bitCast(@as(isize, AT.FDCWD))),
+ @bitCast(@as(isize, AT.FDCWD)),
@intFromPtr(path),
- flags,
+ @as(u32, @bitCast(flags)),
perm,
);
}
@@ -976,9 +1110,9 @@ pub fn create(path: [*:0]const u8, perm: mode_t) usize {
return syscall2(.creat, @intFromPtr(path), perm);
}
-pub fn openat(dirfd: i32, path: [*:0]const u8, flags: u32, mode: mode_t) usize {
+pub fn openat(dirfd: i32, path: [*:0]const u8, flags: O, mode: mode_t) usize {
// dirfd could be negative, for example AT.FDCWD is -100
- return syscall4(.openat, @as(usize, @bitCast(@as(isize, dirfd))), @intFromPtr(path), flags, mode);
+ return syscall4(.openat, @bitCast(@as(isize, dirfd)), @intFromPtr(path), @as(u32, @bitCast(flags)), mode);
}
/// See also `clone` (from the arch-specific include)
@@ -1800,8 +1934,8 @@ pub fn eventfd(count: u32, flags: u32) usize {
return syscall2(.eventfd2, count, flags);
}
-pub fn timerfd_create(clockid: i32, flags: u32) usize {
- return syscall2(.timerfd_create, @as(usize, @bitCast(@as(isize, clockid))), flags);
+pub fn timerfd_create(clockid: i32, flags: TFD) usize {
+ return syscall2(.timerfd_create, @bitCast(@as(isize, clockid)), @as(u32, @bitCast(flags)));
}
pub const itimerspec = extern struct {
@@ -1810,11 +1944,11 @@ pub const itimerspec = extern struct {
};
pub fn timerfd_gettime(fd: i32, curr_value: *itimerspec) usize {
- return syscall2(.timerfd_gettime, @as(usize, @bitCast(@as(isize, fd))), @intFromPtr(curr_value));
+ return syscall2(.timerfd_gettime, @bitCast(@as(isize, fd)), @intFromPtr(curr_value));
}
-pub fn timerfd_settime(fd: i32, flags: u32, new_value: *const itimerspec, old_value: ?*itimerspec) usize {
- return syscall4(.timerfd_settime, @as(usize, @bitCast(@as(isize, fd))), flags, @intFromPtr(new_value), @intFromPtr(old_value));
+pub fn timerfd_settime(fd: i32, flags: TFD.TIMER, new_value: *const itimerspec, old_value: ?*itimerspec) usize {
+ return syscall4(.timerfd_settime, @bitCast(@as(isize, fd)), @as(u32, @bitCast(flags)), @intFromPtr(new_value), @intFromPtr(old_value));
}
// Flags for the 'setitimer' system call
@@ -2478,19 +2612,10 @@ pub const SIG = if (is_mips) struct {
pub const kernel_rwf = u32;
pub const RWF = struct {
- /// high priority request, poll if possible
pub const HIPRI: kernel_rwf = 0x00000001;
-
- /// per-IO O.DSYNC
pub const DSYNC: kernel_rwf = 0x00000002;
-
- /// per-IO O.SYNC
pub const SYNC: kernel_rwf = 0x00000004;
-
- /// per-IO, return -EAGAIN if operation would block
pub const NOWAIT: kernel_rwf = 0x00000008;
-
- /// per-IO O.APPEND
pub const APPEND: kernel_rwf = 0x00000010;
};
@@ -3257,7 +3382,7 @@ pub const T = struct {
};
pub const EPOLL = struct {
- pub const CLOEXEC = O.CLOEXEC;
+ pub const CLOEXEC = 1 << @bitOffsetOf(O, "CLOEXEC");
pub const CTL_ADD = 1;
pub const CTL_DEL = 2;
@@ -3338,8 +3463,8 @@ pub const CLONE = struct {
pub const EFD = struct {
pub const SEMAPHORE = 1;
- pub const CLOEXEC = O.CLOEXEC;
- pub const NONBLOCK = O.NONBLOCK;
+ pub const CLOEXEC = 1 << @bitOffsetOf(O, "CLOEXEC");
+ pub const NONBLOCK = 1 << @bitOffsetOf(O, "NONBLOCK");
};
pub const MS = struct {
@@ -3388,8 +3513,8 @@ pub const MNT = struct {
pub const UMOUNT_NOFOLLOW = 8;
pub const IN = struct {
- pub const CLOEXEC = O.CLOEXEC;
- pub const NONBLOCK = O.NONBLOCK;
+ pub const CLOEXEC = 1 << @bitOffsetOf(O, "CLOEXEC");
+ pub const NONBLOCK = 1 << @bitOffsetOf(O, "NONBLOCK");
pub const ACCESS = 0x00000001;
pub const MODIFY = 0x00000002;
@@ -3534,12 +3659,40 @@ pub const UTIME = struct {
pub const OMIT = 0x3ffffffe;
};
-pub const TFD = struct {
- pub const NONBLOCK = O.NONBLOCK;
- pub const CLOEXEC = O.CLOEXEC;
+const TFD_TIMER = packed struct(u32) {
+ ABSTIME: bool = false,
+ CANCEL_ON_SET: bool = false,
+ _: u30 = 0,
+};
+
+pub const TFD = switch (native_arch) {
+ .sparc64 => packed struct(u32) {
+ _0: u14 = 0,
+ NONBLOCK: bool = false,
+ _15: u7 = 0,
+ CLOEXEC: bool = false,
+ _: u9 = 0,
+
+ pub const TIMER = TFD_TIMER;
+ },
+ .mips, .mipsel, .mips64, .mips64el => packed struct(u32) {
+ _0: u7 = 0,
+ NONBLOCK: bool = false,
+ _8: u11 = 0,
+ CLOEXEC: bool = false,
+ _: u12 = 0,
- pub const TIMER_ABSTIME = 1;
- pub const TIMER_CANCEL_ON_SET = (1 << 1);
+ pub const TIMER = TFD_TIMER;
+ },
+ else => packed struct(u32) {
+ _0: u11 = 0,
+ NONBLOCK: bool = false,
+ _12: u7 = 0,
+ CLOEXEC: bool = false,
+ _: u12 = 0,
+
+ pub const TIMER = TFD_TIMER;
+ },
};
pub const winsize = extern struct {
@@ -3603,8 +3756,8 @@ pub const empty_sigset = [_]u32{0} ** sigset_len;
pub const filled_sigset = [_]u32{(1 << (31 & (usize_bits - 1))) - 1} ++ [_]u32{0} ** (sigset_len - 1);
pub const SFD = struct {
- pub const CLOEXEC = O.CLOEXEC;
- pub const NONBLOCK = O.NONBLOCK;
+ pub const CLOEXEC = 1 << @bitOffsetOf(O, "CLOEXEC");
+ pub const NONBLOCK = 1 << @bitOffsetOf(O, "NONBLOCK");
};
pub const signalfd_siginfo = extern struct {
@@ -3865,15 +4018,11 @@ pub const inotify_event = extern struct {
};
pub const dirent64 = extern struct {
- d_ino: u64,
- d_off: u64,
- d_reclen: u16,
- d_type: u8,
- d_name: u8, // field address is the address of first byte of name https://github.com/ziglang/zig/issues/173
-
- pub fn reclen(self: dirent64) u16 {
- return self.d_reclen;
- }
+ ino: u64,
+ off: u64,
+ reclen: u16,
+ type: u8,
+ name: u8, // field address is the address of first byte of name https://github.com/ziglang/zig/issues/173
};
pub const dl_phdr_info = extern struct {
@@ -4151,6 +4300,13 @@ pub const IORING_OP = enum(u8) {
URING_CMD,
SEND_ZC,
SENDMSG_ZC,
+ READ_MULTISHOT,
+ WAITID,
+ FUTEX_WAIT,
+ FUTEX_WAKE,
+ FUTEX_WAITV,
+ FIXED_FD_INSTALL,
+ FTRUNCATE,
_,
};
@@ -4848,175 +5004,295 @@ pub const rusage = extern struct {
pub const THREAD = 1;
};
-pub const cc_t = u8;
-pub const speed_t = u32;
-pub const tcflag_t = u32;
-
-pub const NCCS = 32;
-
-pub const B0 = 0o0000000;
-pub const B50 = 0o0000001;
-pub const B75 = 0o0000002;
-pub const B110 = 0o0000003;
-pub const B134 = 0o0000004;
-pub const B150 = 0o0000005;
-pub const B200 = 0o0000006;
-pub const B300 = 0o0000007;
-pub const B600 = 0o0000010;
-pub const B1200 = 0o0000011;
-pub const B1800 = 0o0000012;
-pub const B2400 = 0o0000013;
-pub const B4800 = 0o0000014;
-pub const B9600 = 0o0000015;
-pub const B19200 = 0o0000016;
-pub const B38400 = 0o0000017;
-pub const BOTHER = 0o0010000;
-pub const B57600 = 0o0010001;
-pub const B115200 = 0o0010002;
-pub const B230400 = 0o0010003;
-pub const B460800 = 0o0010004;
-pub const B500000 = 0o0010005;
-pub const B576000 = 0o0010006;
-pub const B921600 = 0o0010007;
-pub const B1000000 = 0o0010010;
-pub const B1152000 = 0o0010011;
-pub const B1500000 = 0o0010012;
-pub const B2000000 = 0o0010013;
-pub const B2500000 = 0o0010014;
-pub const B3000000 = 0o0010015;
-pub const B3500000 = 0o0010016;
-pub const B4000000 = 0o0010017;
+pub const NCCS = switch (native_arch) {
+ .powerpc, .powerpcle, .powerpc64, .powerpc64le => 19,
+ else => 32,
+};
+
+pub const speed_t = switch (native_arch) {
+ .powerpc, .powerpcle, .powerpc64, .powerpc64le => enum(u32) {
+ B0 = 0o0000000,
+ B50 = 0o0000001,
+ B75 = 0o0000002,
+ B110 = 0o0000003,
+ B134 = 0o0000004,
+ B150 = 0o0000005,
+ B200 = 0o0000006,
+ B300 = 0o0000007,
+ B600 = 0o0000010,
+ B1200 = 0o0000011,
+ B1800 = 0o0000012,
+ B2400 = 0o0000013,
+ B4800 = 0o0000014,
+ B9600 = 0o0000015,
+ B19200 = 0o0000016,
+ B38400 = 0o0000017,
+
+ B57600 = 0o00020,
+ B115200 = 0o00021,
+ B230400 = 0o00022,
+ B460800 = 0o00023,
+ B500000 = 0o00024,
+ B576000 = 0o00025,
+ B921600 = 0o00026,
+ B1000000 = 0o00027,
+ B1152000 = 0o00030,
+ B1500000 = 0o00031,
+ B2000000 = 0o00032,
+ B2500000 = 0o00033,
+ B3000000 = 0o00034,
+ B3500000 = 0o00035,
+ B4000000 = 0o00036,
+ },
+ else => enum(u32) {
+ B0 = 0o0000000,
+ B50 = 0o0000001,
+ B75 = 0o0000002,
+ B110 = 0o0000003,
+ B134 = 0o0000004,
+ B150 = 0o0000005,
+ B200 = 0o0000006,
+ B300 = 0o0000007,
+ B600 = 0o0000010,
+ B1200 = 0o0000011,
+ B1800 = 0o0000012,
+ B2400 = 0o0000013,
+ B4800 = 0o0000014,
+ B9600 = 0o0000015,
+ B19200 = 0o0000016,
+ B38400 = 0o0000017,
+
+ B57600 = 0o0010001,
+ B115200 = 0o0010002,
+ B230400 = 0o0010003,
+ B460800 = 0o0010004,
+ B500000 = 0o0010005,
+ B576000 = 0o0010006,
+ B921600 = 0o0010007,
+ B1000000 = 0o0010010,
+ B1152000 = 0o0010011,
+ B1500000 = 0o0010012,
+ B2000000 = 0o0010013,
+ B2500000 = 0o0010014,
+ B3000000 = 0o0010015,
+ B3500000 = 0o0010016,
+ B4000000 = 0o0010017,
+ },
+};
-pub const V = switch (native_arch) {
- .powerpc, .powerpc64, .powerpc64le => struct {
- pub const INTR = 0;
- pub const QUIT = 1;
- pub const ERASE = 2;
- pub const KILL = 3;
- pub const EOF = 4;
- pub const MIN = 5;
- pub const EOL = 6;
- pub const TIME = 7;
- pub const EOL2 = 8;
- pub const SWTC = 9;
- pub const WERASE = 10;
- pub const REPRINT = 11;
- pub const SUSP = 12;
- pub const START = 13;
- pub const STOP = 14;
- pub const LNEXT = 15;
- pub const DISCARD = 16;
+pub const tc_iflag_t = switch (native_arch) {
+ .powerpc, .powerpcle, .powerpc64, .powerpc64le => packed struct(u32) {
+ IGNBRK: bool = false,
+ BRKINT: bool = false,
+ IGNPAR: bool = false,
+ PARMRK: bool = false,
+ INPCK: bool = false,
+ ISTRIP: bool = false,
+ INLCR: bool = false,
+ IGNCR: bool = false,
+ ICRNL: bool = false,
+ IXON: bool = false,
+ IXOFF: bool = false,
+ IXANY: bool = false,
+ IUCLC: bool = false,
+ IMAXBEL: bool = false,
+ IUTF8: bool = false,
+ _: u17 = 0,
},
- .sparc, .sparc64 => struct {
- pub const INTR = 0;
- pub const QUIT = 1;
- pub const ERASE = 2;
- pub const KILL = 3;
- pub const EOF = 4;
- pub const EOL = 5;
- pub const EOL2 = 6;
- pub const SWTC = 7;
- pub const START = 8;
- pub const STOP = 9;
- pub const SUSP = 10;
- pub const DSUSP = 11;
- pub const REPRINT = 12;
- pub const DISCARD = 13;
- pub const WERASE = 14;
- pub const LNEXT = 15;
- pub const MIN = EOF;
- pub const TIME = EOL;
+ else => packed struct(u32) {
+ IGNBRK: bool = false,
+ BRKINT: bool = false,
+ IGNPAR: bool = false,
+ PARMRK: bool = false,
+ INPCK: bool = false,
+ ISTRIP: bool = false,
+ INLCR: bool = false,
+ IGNCR: bool = false,
+ ICRNL: bool = false,
+ IUCLC: bool = false,
+ IXON: bool = false,
+ IXANY: bool = false,
+ IXOFF: bool = false,
+ IMAXBEL: bool = false,
+ IUTF8: bool = false,
+ _: u17 = 0,
+ },
+};
+
+pub const tc_oflag_t = switch (native_arch) {
+ .powerpc, .powerpcle, .powerpc64, .powerpc64le => packed struct(u32) {
+ OPOST: bool = false,
+ ONLCR: bool = false,
+ OLCUC: bool = false,
+ OCRNL: bool = false,
+ ONOCR: bool = false,
+ ONLRET: bool = false,
+ OFILL: bool = false,
+ OFDEL: bool = false,
+ NLDLY: u2 = 0,
+ TABDLY: u2 = 0,
+ CRDLY: u2 = 0,
+ FFDLY: u1 = 0,
+ BSDLY: u1 = 0,
+ VTDLY: u1 = 0,
+ _: u15 = 0,
},
- .mips, .mipsel, .mips64, .mips64el => struct {
- pub const INTR = 0;
- pub const QUIT = 1;
- pub const ERASE = 2;
- pub const KILL = 3;
- pub const MIN = 4;
- pub const TIME = 5;
- pub const EOL2 = 6;
- pub const SWTC = 7;
- pub const SWTCH = 7;
- pub const START = 8;
- pub const STOP = 9;
- pub const SUSP = 10;
- pub const REPRINT = 12;
- pub const DISCARD = 13;
- pub const WERASE = 14;
- pub const LNEXT = 15;
- pub const EOF = 16;
- pub const EOL = 17;
+ else => packed struct(u32) {
+ OPOST: bool = false,
+ OLCUC: bool = false,
+ ONLCR: bool = false,
+ OCRNL: bool = false,
+ ONOCR: bool = false,
+ ONLRET: bool = false,
+ OFILL: bool = false,
+ OFDEL: bool = false,
+ NLDLY: u1 = 0,
+ CRDLY: u2 = 0,
+ TABDLY: u2 = 0,
+ BSDLY: u1 = 0,
+ VTDLY: u1 = 0,
+ FFDLY: u1 = 0,
+ _: u16 = 0,
},
- else => struct {
- pub const INTR = 0;
- pub const QUIT = 1;
- pub const ERASE = 2;
- pub const KILL = 3;
- pub const EOF = 4;
- pub const TIME = 5;
- pub const MIN = 6;
- pub const SWTC = 7;
- pub const START = 8;
- pub const STOP = 9;
- pub const SUSP = 10;
- pub const EOL = 11;
- pub const REPRINT = 12;
- pub const DISCARD = 13;
- pub const WERASE = 14;
- pub const LNEXT = 15;
- pub const EOL2 = 16;
+};
+
+pub const CSIZE = enum(u2) { CS5, CS6, CS7, CS8 };
+
+pub const tc_cflag_t = switch (native_arch) {
+ .powerpc, .powerpcle, .powerpc64, .powerpc64le => packed struct(u32) {
+ _0: u8 = 0,
+ CSIZE: CSIZE = .CS5,
+ CSTOPB: bool = false,
+ CREAD: bool = false,
+ PARENB: bool = false,
+ PARODD: bool = false,
+ HUPCL: bool = false,
+ CLOCAL: bool = false,
+ _: u16 = 0,
+ },
+ else => packed struct(u32) {
+ _0: u4 = 0,
+ CSIZE: CSIZE = .CS5,
+ CSTOPB: bool = false,
+ CREAD: bool = false,
+ PARENB: bool = false,
+ PARODD: bool = false,
+ HUPCL: bool = false,
+ CLOCAL: bool = false,
+ _: u20 = 0,
+ },
+};
+
+pub const tc_lflag_t = switch (native_arch) {
+ .powerpc, .powerpcle, .powerpc64, .powerpc64le => packed struct(u32) {
+ _0: u1 = 0,
+ ECHOE: bool = false,
+ ECHOK: bool = false,
+ ECHO: bool = false,
+ ECHONL: bool = false,
+ _5: u2 = 0,
+ ISIG: bool = false,
+ ICANON: bool = false,
+ _9: u1 = 0,
+ IEXTEN: bool = false,
+ _11: u11 = 0,
+ TOSTOP: bool = false,
+ _23: u8 = 0,
+ NOFLSH: bool = false,
+ },
+ .mips, .mipsel, .mips64, .mips64el => packed struct(u32) {
+ ISIG: bool = false,
+ ICANON: bool = false,
+ _2: u1 = 0,
+ ECHO: bool = false,
+ ECHOE: bool = false,
+ ECHOK: bool = false,
+ ECHONL: bool = false,
+ NOFLSH: bool = false,
+ IEXTEN: bool = false,
+ _9: u6 = 0,
+ TOSTOP: bool = false,
+ _: u16 = 0,
+ },
+ else => packed struct(u32) {
+ ISIG: bool = false,
+ ICANON: bool = false,
+ _2: u1 = 0,
+ ECHO: bool = false,
+ ECHOE: bool = false,
+ ECHOK: bool = false,
+ ECHONL: bool = false,
+ NOFLSH: bool = false,
+ TOSTOP: bool = false,
+ _9: u6 = 0,
+ IEXTEN: bool = false,
+ _: u16 = 0,
},
};
-pub const IGNBRK: tcflag_t = 1;
-pub const BRKINT: tcflag_t = 2;
-pub const IGNPAR: tcflag_t = 4;
-pub const PARMRK: tcflag_t = 8;
-pub const INPCK: tcflag_t = 16;
-pub const ISTRIP: tcflag_t = 32;
-pub const INLCR: tcflag_t = 64;
-pub const IGNCR: tcflag_t = 128;
-pub const ICRNL: tcflag_t = 256;
-pub const IUCLC: tcflag_t = 512;
-pub const IXON: tcflag_t = 1024;
-pub const IXANY: tcflag_t = 2048;
-pub const IXOFF: tcflag_t = 4096;
-pub const IMAXBEL: tcflag_t = 8192;
-pub const IUTF8: tcflag_t = 16384;
-
-pub const OPOST: tcflag_t = 1;
-pub const OLCUC: tcflag_t = 2;
-pub const ONLCR: tcflag_t = 4;
-pub const OCRNL: tcflag_t = 8;
-pub const ONOCR: tcflag_t = 16;
-pub const ONLRET: tcflag_t = 32;
-pub const OFILL: tcflag_t = 64;
-pub const OFDEL: tcflag_t = 128;
-pub const VTDLY: tcflag_t = 16384;
-pub const VT0: tcflag_t = 0;
-pub const VT1: tcflag_t = 16384;
-
-pub const CSIZE: tcflag_t = 48;
-pub const CS5: tcflag_t = 0;
-pub const CS6: tcflag_t = 16;
-pub const CS7: tcflag_t = 32;
-pub const CS8: tcflag_t = 48;
-pub const CSTOPB: tcflag_t = 64;
-pub const CREAD: tcflag_t = 128;
-pub const PARENB: tcflag_t = 256;
-pub const PARODD: tcflag_t = 512;
-pub const HUPCL: tcflag_t = 1024;
-pub const CLOCAL: tcflag_t = 2048;
-
-pub const ISIG: tcflag_t = 1;
-pub const ICANON: tcflag_t = 2;
-pub const ECHO: tcflag_t = 8;
-pub const ECHOE: tcflag_t = 16;
-pub const ECHOK: tcflag_t = 32;
-pub const ECHONL: tcflag_t = 64;
-pub const NOFLSH: tcflag_t = 128;
-pub const TOSTOP: tcflag_t = 256;
-pub const IEXTEN: tcflag_t = 32768;
+pub const cc_t = u8;
+
+/// Indices into the `cc` array in the `termios` struct.
+pub const V = switch (native_arch) {
+ .mips, .mipsel, .mips64, .mips64el => enum {
+ INTR,
+ QUIT,
+ ERASE,
+ KILL,
+ MIN,
+ TIME,
+ EOL2,
+ SWTC,
+ START,
+ STOP,
+ SUSP,
+ reserved,
+ REPRINT,
+ DISCARD,
+ WERASE,
+ LNEXT,
+ EOF,
+ EOL,
+ },
+ .powerpc, .powerpcle, .powerpc64, .powerpc64le => enum {
+ INTR,
+ QUIT,
+ ERASE,
+ KILL,
+ EOF,
+ MIN,
+ EOL,
+ TIME,
+ EOL2,
+ SWTC,
+ WERASE,
+ REPRINT,
+ SUSP,
+ START,
+ STOP,
+ LNEXT,
+ DISCARD,
+ },
+ else => enum {
+ INTR,
+ QUIT,
+ ERASE,
+ KILL,
+ EOF,
+ TIME,
+ MIN,
+ SWTC,
+ START,
+ STOP,
+ SUSP,
+ EOL,
+ REPRINT,
+ DISCARD,
+ WERASE,
+ LNEXT,
+ EOL2,
+ },
+};
pub const TCSA = enum(c_uint) {
NOW,
@@ -5025,15 +5301,27 @@ pub const TCSA = enum(c_uint) {
_,
};
-pub const termios = extern struct {
- iflag: tcflag_t,
- oflag: tcflag_t,
- cflag: tcflag_t,
- lflag: tcflag_t,
- line: cc_t,
- cc: [NCCS]cc_t,
- ispeed: speed_t,
- ospeed: speed_t,
+pub const termios = switch (native_arch) {
+ .powerpc, .powerpcle, .powerpc64, .powerpc64le => extern struct {
+ iflag: tc_iflag_t,
+ oflag: tc_oflag_t,
+ cflag: tc_cflag_t,
+ lflag: tc_lflag_t,
+ cc: [NCCS]cc_t,
+ line: cc_t,
+ ispeed: speed_t,
+ ospeed: speed_t,
+ },
+ else => extern struct {
+ iflag: tc_iflag_t,
+ oflag: tc_oflag_t,
+ cflag: tc_cflag_t,
+ lflag: tc_lflag_t,
+ line: cc_t,
+ cc: [NCCS]cc_t,
+ ispeed: speed_t,
+ ospeed: speed_t,
+ },
};
pub const SIOCGIFINDEX = 0x8933;
diff --git a/lib/std/os/linux/arm-eabi.zig b/lib/std/os/linux/arm-eabi.zig
index 74c381f496..68575c3344 100644
--- a/lib/std/os/linux/arm-eabi.zig
+++ b/lib/std/os/linux/arm-eabi.zig
@@ -141,29 +141,6 @@ pub fn restore_rt() callconv(.Naked) noreturn {
pub const MMAP2_UNIT = 4096;
-pub const O = struct {
- pub const CREAT = 0o100;
- pub const EXCL = 0o200;
- pub const NOCTTY = 0o400;
- pub const TRUNC = 0o1000;
- pub const APPEND = 0o2000;
- pub const NONBLOCK = 0o4000;
- pub const DSYNC = 0o10000;
- pub const SYNC = 0o4010000;
- pub const RSYNC = 0o4010000;
- pub const DIRECTORY = 0o40000;
- pub const NOFOLLOW = 0o100000;
- pub const CLOEXEC = 0o2000000;
-
- pub const ASYNC = 0o20000;
- pub const DIRECT = 0o200000;
- pub const LARGEFILE = 0o400000;
- pub const NOATIME = 0o1000000;
- pub const PATH = 0o10000000;
- pub const TMPFILE = 0o20040000;
- pub const NDELAY = NONBLOCK;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
diff --git a/lib/std/os/linux/arm64.zig b/lib/std/os/linux/arm64.zig
index 40dad5656e..f2331c1309 100644
--- a/lib/std/os/linux/arm64.zig
+++ b/lib/std/os/linux/arm64.zig
@@ -123,29 +123,6 @@ pub fn restore_rt() callconv(.Naked) noreturn {
}
}
-pub const O = struct {
- pub const CREAT = 0o100;
- pub const EXCL = 0o200;
- pub const NOCTTY = 0o400;
- pub const TRUNC = 0o1000;
- pub const APPEND = 0o2000;
- pub const NONBLOCK = 0o4000;
- pub const DSYNC = 0o10000;
- pub const SYNC = 0o4010000;
- pub const RSYNC = 0o4010000;
- pub const DIRECTORY = 0o40000;
- pub const NOFOLLOW = 0o100000;
- pub const CLOEXEC = 0o2000000;
-
- pub const ASYNC = 0o20000;
- pub const DIRECT = 0o200000;
- pub const LARGEFILE = 0o400000;
- pub const NOATIME = 0o1000000;
- pub const PATH = 0o10000000;
- pub const TMPFILE = 0o20040000;
- pub const NDELAY = NONBLOCK;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
diff --git a/lib/std/os/linux/io_uring.zig b/lib/std/os/linux/io_uring.zig
index 77e134feec..16c542714c 100644
--- a/lib/std/os/linux/io_uring.zig
+++ b/lib/std/os/linux/io_uring.zig
@@ -4,6 +4,7 @@ const assert = std.debug.assert;
const mem = std.mem;
const net = std.net;
const os = std.os;
+const posix = std.posix;
const linux = os.linux;
const testing = std.testing;
@@ -268,29 +269,34 @@ pub const IO_Uring = struct {
/// See https://github.com/axboe/liburing/issues/103#issuecomment-686665007.
/// Matches the implementation of io_uring_peek_batch_cqe() in liburing, but supports waiting.
pub fn copy_cqes(self: *IO_Uring, cqes: []linux.io_uring_cqe, wait_nr: u32) !u32 {
- const count = self.copy_cqes_ready(cqes, wait_nr);
+ const count = self.copy_cqes_ready(cqes);
if (count > 0) return count;
if (self.cq_ring_needs_flush() or wait_nr > 0) {
_ = try self.enter(0, wait_nr, linux.IORING_ENTER_GETEVENTS);
- return self.copy_cqes_ready(cqes, wait_nr);
+ return self.copy_cqes_ready(cqes);
}
return 0;
}
- fn copy_cqes_ready(self: *IO_Uring, cqes: []linux.io_uring_cqe, wait_nr: u32) u32 {
- _ = wait_nr;
+ fn copy_cqes_ready(self: *IO_Uring, cqes: []linux.io_uring_cqe) u32 {
const ready = self.cq_ready();
const count = @min(cqes.len, ready);
- var head = self.cq.head.*;
- const tail = head +% count;
- // TODO Optimize this by using 1 or 2 memcpy's (if the tail wraps) rather than a loop.
- var i: usize = 0;
- // Do not use "less-than" operator since head and tail may wrap:
- while (head != tail) {
- cqes[i] = self.cq.cqes[head & self.cq.mask]; // Copy struct by value.
- head +%= 1;
- i += 1;
+ const head = self.cq.head.* & self.cq.mask;
+ const tail = (self.cq.head.* +% count) & self.cq.mask;
+
+ if (head <= tail) {
+ // head behind tail -> no wrapping
+ @memcpy(cqes[0..count], self.cq.cqes[head..tail]);
+ } else {
+ // head in front of tail -> buffer wraps
+ const two_copies_required: bool = self.cq.cqes.len - head < count;
+ const amount_to_copy_in_first = if (two_copies_required) self.cq.cqes.len - head else count;
+ @memcpy(cqes[0..amount_to_copy_in_first], self.cq.cqes[head .. head + amount_to_copy_in_first]);
+ if (two_copies_required) {
+ @memcpy(cqes[amount_to_copy_in_first..count], self.cq.cqes[0..tail]);
+ }
}
+
self.cq_advance(count);
return count;
}
@@ -760,7 +766,7 @@ pub const IO_Uring = struct {
user_data: u64,
fd: os.fd_t,
path: [*:0]const u8,
- flags: u32,
+ flags: linux.O,
mode: os.mode_t,
) !*linux.io_uring_sqe {
const sqe = try self.get_sqe();
@@ -785,7 +791,7 @@ pub const IO_Uring = struct {
user_data: u64,
fd: os.fd_t,
path: [*:0]const u8,
- flags: u32,
+ flags: linux.O,
mode: os.mode_t,
file_index: u32,
) !*linux.io_uring_sqe {
@@ -1114,6 +1120,23 @@ pub const IO_Uring = struct {
return sqe;
}
+ /// Queues (but does not submit) an SQE to perform a `waitid(2)`.
+ /// Returns a pointer to the SQE.
+ pub fn waitid(
+ self: *IO_Uring,
+ user_data: u64,
+ id_type: linux.P,
+ id: i32,
+ infop: *linux.siginfo_t,
+ options: u32,
+ flags: u32,
+ ) !*linux.io_uring_sqe {
+ const sqe = try self.get_sqe();
+ io_uring_prep_waitid(sqe, id_type, id, infop, options, flags);
+ sqe.user_data = user_data;
+ return sqe;
+ }
+
/// Registers an array of file descriptors.
/// Every time a file descriptor is put in an SQE and submitted to the kernel, the kernel must
/// retrieve a reference to the file, and once I/O has completed the file reference must be
@@ -1658,18 +1681,18 @@ pub fn io_uring_prep_openat(
sqe: *linux.io_uring_sqe,
fd: os.fd_t,
path: [*:0]const u8,
- flags: u32,
+ flags: linux.O,
mode: os.mode_t,
) void {
io_uring_prep_rw(.OPENAT, sqe, fd, @intFromPtr(path), mode, 0);
- sqe.rw_flags = flags;
+ sqe.rw_flags = @bitCast(flags);
}
pub fn io_uring_prep_openat_direct(
sqe: *linux.io_uring_sqe,
fd: os.fd_t,
path: [*:0]const u8,
- flags: u32,
+ flags: linux.O,
mode: os.mode_t,
file_index: u32,
) void {
@@ -1962,6 +1985,19 @@ pub fn io_uring_prep_socket_direct_alloc(
__io_uring_set_target_fixed_file(sqe, linux.IORING_FILE_INDEX_ALLOC);
}
+pub fn io_uring_prep_waitid(
+ sqe: *linux.io_uring_sqe,
+ id_type: linux.P,
+ id: i32,
+ infop: *linux.siginfo_t,
+ options: u32,
+ flags: u32,
+) void {
+ io_uring_prep_rw(.WAITID, sqe, id, 0, @intFromEnum(id_type), @intFromPtr(infop));
+ sqe.rw_flags = flags;
+ sqe.splice_fd_in = @bitCast(options);
+}
+
test "structs/offsets/entries" {
if (builtin.os.tag != .linux) return error.SkipZigTest;
@@ -2054,7 +2090,7 @@ test "readv" {
};
defer ring.deinit();
- const fd = try os.openZ("/dev/zero", os.O.RDONLY | os.O.CLOEXEC, 0);
+ const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
defer os.close(fd);
// Linux Kernel 5.4 supports IORING_REGISTER_FILES but not sparse fd sets (i.e. an fd of -1).
@@ -2361,7 +2397,7 @@ test "openat" {
break :p @intFromPtr(workaround);
} else @intFromPtr(path);
- const flags: u32 = os.O.CLOEXEC | os.O.RDWR | os.O.CREAT;
+ const flags: linux.O = .{ .CLOEXEC = true, .ACCMODE = .RDWR, .CREAT = true };
const mode: os.mode_t = 0o666;
const sqe_openat = try ring.openat(0x33333333, tmp.dir.fd, path, flags, mode);
try testing.expectEqual(linux.io_uring_sqe{
@@ -2372,7 +2408,7 @@ test "openat" {
.off = 0,
.addr = path_addr,
.len = mode,
- .rw_flags = flags,
+ .rw_flags = @bitCast(flags),
.user_data = 0x33333333,
.buf_index = 0,
.personality = 0,
@@ -2888,7 +2924,7 @@ test "register_files_update" {
};
defer ring.deinit();
- const fd = try os.openZ("/dev/zero", os.O.RDONLY | os.O.CLOEXEC, 0);
+ const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
defer os.close(fd);
var registered_fds = [_]os.fd_t{0} ** 2;
@@ -2906,7 +2942,7 @@ test "register_files_update" {
// Test IORING_REGISTER_FILES_UPDATE
// Only available since Linux 5.5
- const fd2 = try os.openZ("/dev/zero", os.O.RDONLY | os.O.CLOEXEC, 0);
+ const fd2 = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
defer os.close(fd2);
registered_fds[fd_index] = fd2;
@@ -3311,7 +3347,7 @@ test "provide_buffers: read" {
};
defer ring.deinit();
- const fd = try os.openZ("/dev/zero", os.O.RDONLY | os.O.CLOEXEC, 0);
+ const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
defer os.close(fd);
const group_id = 1337;
@@ -3443,7 +3479,7 @@ test "remove_buffers" {
};
defer ring.deinit();
- const fd = try os.openZ("/dev/zero", os.O.RDONLY | os.O.CLOEXEC, 0);
+ const fd = try os.openZ("/dev/zero", .{ .ACCMODE = .RDONLY, .CLOEXEC = true }, 0);
defer os.close(fd);
const group_id = 1337;
@@ -3695,8 +3731,8 @@ const SocketTestHarness = struct {
client: os.socket_t,
fn close(self: SocketTestHarness) void {
- os.closeSocket(self.client);
- os.closeSocket(self.listener);
+ posix.close(self.client);
+ posix.close(self.listener);
}
};
@@ -3704,7 +3740,7 @@ fn createSocketTestHarness(ring: *IO_Uring) !SocketTestHarness {
// Create a TCP server socket
var address = try net.Address.parseIp4("127.0.0.1", 0);
const listener_socket = try createListenerSocket(&address);
- errdefer os.closeSocket(listener_socket);
+ errdefer posix.close(listener_socket);
// Submit 1 accept
var accept_addr: os.sockaddr = undefined;
@@ -3713,7 +3749,7 @@ fn createSocketTestHarness(ring: *IO_Uring) !SocketTestHarness {
// Create a TCP client socket
const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- errdefer os.closeSocket(client);
+ errdefer posix.close(client);
_ = try ring.connect(0xcccccccc, client, &address.any, address.getOsSockLen());
try testing.expectEqual(@as(u32, 2), try ring.submit());
@@ -3753,7 +3789,7 @@ fn createSocketTestHarness(ring: *IO_Uring) !SocketTestHarness {
fn createListenerSocket(address: *net.Address) !os.socket_t {
const kernel_backlog = 1;
const listener_socket = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- errdefer os.closeSocket(listener_socket);
+ errdefer posix.close(listener_socket);
try os.setsockopt(listener_socket, os.SOL.SOCKET, os.SO.REUSEADDR, &mem.toBytes(@as(c_int, 1)));
try os.bind(listener_socket, &address.any, address.getOsSockLen());
@@ -3778,7 +3814,7 @@ test "accept multishot" {
var address = try net.Address.parseIp4("127.0.0.1", 0);
const listener_socket = try createListenerSocket(&address);
- defer os.closeSocket(listener_socket);
+ defer posix.close(listener_socket);
// submit multishot accept operation
var addr: os.sockaddr = undefined;
@@ -3791,7 +3827,7 @@ test "accept multishot" {
while (nr > 0) : (nr -= 1) {
// connect client
const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
- errdefer os.closeSocket(client);
+ errdefer posix.close(client);
try os.connect(client, &address.any, address.getOsSockLen());
// test accept completion
@@ -3801,7 +3837,7 @@ test "accept multishot" {
try testing.expect(cqe.user_data == userdata);
try testing.expect(cqe.flags & linux.IORING_CQE_F_MORE > 0); // more flag is set
- os.closeSocket(client);
+ posix.close(client);
}
}
@@ -3874,7 +3910,7 @@ test "accept_direct" {
try ring.register_files(registered_fds[0..]);
const listener_socket = try createListenerSocket(&address);
- defer os.closeSocket(listener_socket);
+ defer posix.close(listener_socket);
const accept_userdata: u64 = 0xaaaaaaaa;
const read_userdata: u64 = 0xbbbbbbbb;
@@ -3892,7 +3928,7 @@ test "accept_direct" {
// connect
const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
try os.connect(client, &address.any, address.getOsSockLen());
- defer os.closeSocket(client);
+ defer posix.close(client);
// accept completion
const cqe_accept = try ring.copy_cqe();
@@ -3926,7 +3962,7 @@ test "accept_direct" {
// connect
const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
try os.connect(client, &address.any, address.getOsSockLen());
- defer os.closeSocket(client);
+ defer posix.close(client);
// completion with error
const cqe_accept = try ring.copy_cqe();
try testing.expect(cqe_accept.user_data == accept_userdata);
@@ -3954,7 +3990,7 @@ test "accept_multishot_direct" {
try ring.register_files(registered_fds[0..]);
const listener_socket = try createListenerSocket(&address);
- defer os.closeSocket(listener_socket);
+ defer posix.close(listener_socket);
const accept_userdata: u64 = 0xaaaaaaaa;
@@ -3968,7 +4004,7 @@ test "accept_multishot_direct" {
// connect
const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
try os.connect(client, &address.any, address.getOsSockLen());
- defer os.closeSocket(client);
+ defer posix.close(client);
// accept completion
const cqe_accept = try ring.copy_cqe();
@@ -3983,7 +4019,7 @@ test "accept_multishot_direct" {
// connect
const client = try os.socket(address.any.family, os.SOCK.STREAM | os.SOCK.CLOEXEC, 0);
try os.connect(client, &address.any, address.getOsSockLen());
- defer os.closeSocket(client);
+ defer posix.close(client);
// completion with error
const cqe_accept = try ring.copy_cqe();
try testing.expect(cqe_accept.user_data == accept_userdata);
@@ -4057,7 +4093,7 @@ test "socket_direct/socket_direct_alloc/close_direct" {
// use sockets from registered_fds in connect operation
var address = try net.Address.parseIp4("127.0.0.1", 0);
const listener_socket = try createListenerSocket(&address);
- defer os.closeSocket(listener_socket);
+ defer posix.close(listener_socket);
const accept_userdata: u64 = 0xaaaaaaaa;
const connect_userdata: u64 = 0xbbbbbbbb;
const close_userdata: u64 = 0xcccccccc;
@@ -4113,7 +4149,7 @@ test "openat_direct/close_direct" {
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
const path = "test_io_uring_close_direct";
- const flags: u32 = os.O.RDWR | os.O.CREAT;
+ const flags: linux.O = .{ .ACCMODE = .RDWR, .CREAT = true };
const mode: os.mode_t = 0o666;
const user_data: u64 = 0;
@@ -4148,6 +4184,32 @@ test "openat_direct/close_direct" {
try ring.unregister_files();
}
+test "waitid" {
+ try skipKernelLessThan(.{ .major = 6, .minor = 7, .patch = 0 });
+
+ var ring = IO_Uring.init(16, 0) catch |err| switch (err) {
+ error.SystemOutdated => return error.SkipZigTest,
+ error.PermissionDenied => return error.SkipZigTest,
+ else => return err,
+ };
+ defer ring.deinit();
+
+ const pid = try os.fork();
+ if (pid == 0) {
+ os.exit(7);
+ }
+
+ var siginfo: os.siginfo_t = undefined;
+ _ = try ring.waitid(0, .PID, pid, &siginfo, os.W.EXITED, 0);
+
+ try testing.expectEqual(1, try ring.submit());
+
+ const cqe_waitid = try ring.copy_cqe();
+ try testing.expectEqual(0, cqe_waitid.res);
+ try testing.expectEqual(pid, siginfo.fields.common.first.piduid.pid);
+ try testing.expectEqual(7, siginfo.fields.common.second.sigchld.status);
+}
+
/// For use in tests. Returns SkipZigTest is kernel version is less than required.
inline fn skipKernelLessThan(required: std.SemanticVersion) !void {
if (builtin.os.tag != .linux) return error.SkipZigTest;
diff --git a/lib/std/os/linux/mips.zig b/lib/std/os/linux/mips.zig
index 17c6c8a150..896757f1f6 100644
--- a/lib/std/os/linux/mips.zig
+++ b/lib/std/os/linux/mips.zig
@@ -213,29 +213,6 @@ pub fn restore_rt() callconv(.Naked) noreturn {
);
}
-pub const O = struct {
- pub const CREAT = 0o0400;
- pub const EXCL = 0o02000;
- pub const NOCTTY = 0o04000;
- pub const TRUNC = 0o01000;
- pub const APPEND = 0o0010;
- pub const NONBLOCK = 0o0200;
- pub const DSYNC = 0o0020;
- pub const SYNC = 0o040020;
- pub const RSYNC = 0o040020;
- pub const DIRECTORY = 0o0200000;
- pub const NOFOLLOW = 0o0400000;
- pub const CLOEXEC = 0o02000000;
-
- pub const ASYNC = 0o010000;
- pub const DIRECT = 0o0100000;
- pub const LARGEFILE = 0o020000;
- pub const NOATIME = 0o01000000;
- pub const PATH = 0o010000000;
- pub const TMPFILE = 0o020200000;
- pub const NDELAY = NONBLOCK;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
diff --git a/lib/std/os/linux/mips64.zig b/lib/std/os/linux/mips64.zig
index 09499cbf9f..4a34f30dd9 100644
--- a/lib/std/os/linux/mips64.zig
+++ b/lib/std/os/linux/mips64.zig
@@ -198,29 +198,6 @@ pub fn restore_rt() callconv(.Naked) noreturn {
);
}
-pub const O = struct {
- pub const CREAT = 0o0400;
- pub const EXCL = 0o02000;
- pub const NOCTTY = 0o04000;
- pub const TRUNC = 0o01000;
- pub const APPEND = 0o0010;
- pub const NONBLOCK = 0o0200;
- pub const DSYNC = 0o0020;
- pub const SYNC = 0o040020;
- pub const RSYNC = 0o040020;
- pub const DIRECTORY = 0o0200000;
- pub const NOFOLLOW = 0o0400000;
- pub const CLOEXEC = 0o02000000;
-
- pub const ASYNC = 0o010000;
- pub const DIRECT = 0o0100000;
- pub const LARGEFILE = 0o020000;
- pub const NOATIME = 0o01000000;
- pub const PATH = 0o010000000;
- pub const TMPFILE = 0o020200000;
- pub const NDELAY = NONBLOCK;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
diff --git a/lib/std/os/linux/powerpc.zig b/lib/std/os/linux/powerpc.zig
index 5f07370489..4d13e90166 100644
--- a/lib/std/os/linux/powerpc.zig
+++ b/lib/std/os/linux/powerpc.zig
@@ -142,29 +142,6 @@ pub fn restore_rt() callconv(.Naked) noreturn {
);
}
-pub const O = struct {
- pub const CREAT = 0o100;
- pub const EXCL = 0o200;
- pub const NOCTTY = 0o400;
- pub const TRUNC = 0o1000;
- pub const APPEND = 0o2000;
- pub const NONBLOCK = 0o4000;
- pub const DSYNC = 0o10000;
- pub const SYNC = 0o4010000;
- pub const RSYNC = 0o4010000;
- pub const DIRECTORY = 0o40000;
- pub const NOFOLLOW = 0o100000;
- pub const CLOEXEC = 0o2000000;
-
- pub const ASYNC = 0o20000;
- pub const DIRECT = 0o400000;
- pub const LARGEFILE = 0o200000;
- pub const NOATIME = 0o1000000;
- pub const PATH = 0o10000000;
- pub const TMPFILE = 0o20040000;
- pub const NDELAY = NONBLOCK;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
diff --git a/lib/std/os/linux/powerpc64.zig b/lib/std/os/linux/powerpc64.zig
index 99e52fb5ad..c81ef382c2 100644
--- a/lib/std/os/linux/powerpc64.zig
+++ b/lib/std/os/linux/powerpc64.zig
@@ -142,29 +142,6 @@ pub fn restore_rt() callconv(.Naked) noreturn {
);
}
-pub const O = struct {
- pub const CREAT = 0o100;
- pub const EXCL = 0o200;
- pub const NOCTTY = 0o400;
- pub const TRUNC = 0o1000;
- pub const APPEND = 0o2000;
- pub const NONBLOCK = 0o4000;
- pub const DSYNC = 0o10000;
- pub const SYNC = 0o4010000;
- pub const RSYNC = 0o4010000;
- pub const DIRECTORY = 0o40000;
- pub const NOFOLLOW = 0o100000;
- pub const CLOEXEC = 0o2000000;
-
- pub const ASYNC = 0o20000;
- pub const DIRECT = 0o400000;
- pub const LARGEFILE = 0o200000;
- pub const NOATIME = 0o1000000;
- pub const PATH = 0o10000000;
- pub const TMPFILE = 0o20200000;
- pub const NDELAY = NONBLOCK;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
diff --git a/lib/std/os/linux/riscv64.zig b/lib/std/os/linux/riscv64.zig
index 45821ddefa..c23fc5e4df 100644
--- a/lib/std/os/linux/riscv64.zig
+++ b/lib/std/os/linux/riscv64.zig
@@ -110,29 +110,6 @@ pub fn restore_rt() callconv(.Naked) noreturn {
);
}
-pub const O = struct {
- pub const CREAT = 0o100;
- pub const EXCL = 0o200;
- pub const NOCTTY = 0o400;
- pub const TRUNC = 0o1000;
- pub const APPEND = 0o2000;
- pub const NONBLOCK = 0o4000;
- pub const DSYNC = 0o10000;
- pub const SYNC = 0o4010000;
- pub const RSYNC = 0o4010000;
- pub const DIRECTORY = 0o200000;
- pub const NOFOLLOW = 0o400000;
- pub const CLOEXEC = 0o2000000;
-
- pub const ASYNC = 0o20000;
- pub const DIRECT = 0o40000;
- pub const LARGEFILE = 0o100000;
- pub const NOATIME = 0o1000000;
- pub const PATH = 0o10000000;
- pub const TMPFILE = 0o20200000;
- pub const NDELAY = NONBLOCK;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
diff --git a/lib/std/os/linux/sparc64.zig b/lib/std/os/linux/sparc64.zig
index ef4e1281b5..0a344e2bf4 100644
--- a/lib/std/os/linux/sparc64.zig
+++ b/lib/std/os/linux/sparc64.zig
@@ -195,29 +195,6 @@ pub fn restore_rt() callconv(.C) void {
);
}
-pub const O = struct {
- pub const CREAT = 0x200;
- pub const EXCL = 0x800;
- pub const NOCTTY = 0x8000;
- pub const TRUNC = 0x400;
- pub const APPEND = 0x8;
- pub const NONBLOCK = 0x4000;
- pub const SYNC = 0x802000;
- pub const DSYNC = 0x2000;
- pub const RSYNC = SYNC;
- pub const DIRECTORY = 0x10000;
- pub const NOFOLLOW = 0x20000;
- pub const CLOEXEC = 0x400000;
-
- pub const ASYNC = 0x40;
- pub const DIRECT = 0x100000;
- pub const LARGEFILE = 0;
- pub const NOATIME = 0x200000;
- pub const PATH = 0x1000000;
- pub const TMPFILE = 0x2010000;
- pub const NDELAY = NONBLOCK | 0x4;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
diff --git a/lib/std/os/linux/test.zig b/lib/std/os/linux/test.zig
index 170bde6334..e831f11a5f 100644
--- a/lib/std/os/linux/test.zig
+++ b/lib/std/os/linux/test.zig
@@ -37,7 +37,7 @@ test "timer" {
var err: linux.E = linux.getErrno(epoll_fd);
try expect(err == .SUCCESS);
- const timer_fd = linux.timerfd_create(linux.CLOCK.MONOTONIC, 0);
+ const timer_fd = linux.timerfd_create(linux.CLOCK.MONOTONIC, .{});
try expect(linux.getErrno(timer_fd) == .SUCCESS);
const time_interval = linux.timespec{
@@ -50,7 +50,7 @@ test "timer" {
.it_value = time_interval,
};
- err = linux.getErrno(linux.timerfd_settime(@as(i32, @intCast(timer_fd)), 0, &new_time, null));
+ err = linux.getErrno(linux.timerfd_settime(@as(i32, @intCast(timer_fd)), .{}, &new_time, null));
try expect(err == .SUCCESS);
var event = linux.epoll_event{
diff --git a/lib/std/os/linux/x86.zig b/lib/std/os/linux/x86.zig
index a6be4ac380..44ee45d316 100644
--- a/lib/std/os/linux/x86.zig
+++ b/lib/std/os/linux/x86.zig
@@ -159,29 +159,6 @@ pub fn restore_rt() callconv(.Naked) noreturn {
}
}
-pub const O = struct {
- pub const CREAT = 0o100;
- pub const EXCL = 0o200;
- pub const NOCTTY = 0o400;
- pub const TRUNC = 0o1000;
- pub const APPEND = 0o2000;
- pub const NONBLOCK = 0o4000;
- pub const DSYNC = 0o10000;
- pub const SYNC = 0o4010000;
- pub const RSYNC = 0o4010000;
- pub const DIRECTORY = 0o200000;
- pub const NOFOLLOW = 0o400000;
- pub const CLOEXEC = 0o2000000;
-
- pub const ASYNC = 0o20000;
- pub const DIRECT = 0o40000;
- pub const LARGEFILE = 0o100000;
- pub const NOATIME = 0o1000000;
- pub const PATH = 0o10000000;
- pub const TMPFILE = 0o20200000;
- pub const NDELAY = NONBLOCK;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
diff --git a/lib/std/os/linux/x86_64.zig b/lib/std/os/linux/x86_64.zig
index 6d4ab11abb..2d69d539ae 100644
--- a/lib/std/os/linux/x86_64.zig
+++ b/lib/std/os/linux/x86_64.zig
@@ -131,29 +131,6 @@ pub const nlink_t = usize;
pub const blksize_t = isize;
pub const blkcnt_t = isize;
-pub const O = struct {
- pub const CREAT = 0o100;
- pub const EXCL = 0o200;
- pub const NOCTTY = 0o400;
- pub const TRUNC = 0o1000;
- pub const APPEND = 0o2000;
- pub const NONBLOCK = 0o4000;
- pub const DSYNC = 0o10000;
- pub const SYNC = 0o4010000;
- pub const RSYNC = 0o4010000;
- pub const DIRECTORY = 0o200000;
- pub const NOFOLLOW = 0o400000;
- pub const CLOEXEC = 0o2000000;
-
- pub const ASYNC = 0o20000;
- pub const DIRECT = 0o40000;
- pub const LARGEFILE = 0;
- pub const NOATIME = 0o1000000;
- pub const PATH = 0o10000000;
- pub const TMPFILE = 0o20200000;
- pub const NDELAY = NONBLOCK;
-};
-
pub const F = struct {
pub const DUPFD = 0;
pub const GETFD = 1;
diff --git a/lib/std/os/plan9.zig b/lib/std/os/plan9.zig
index b42fd52245..354e05e570 100644
--- a/lib/std/os/plan9.zig
+++ b/lib/std/os/plan9.zig
@@ -242,17 +242,23 @@ pub fn close(fd: i32) usize {
return syscall_bits.syscall1(.CLOSE, @bitCast(@as(isize, fd)));
}
pub const mode_t = i32;
-pub const O = struct {
- pub const READ = 0; // open for read
- pub const RDONLY = 0;
- pub const WRITE = 1; // write
- pub const WRONLY = 1;
- pub const RDWR = 2; // read and write
- pub const EXEC = 3; // execute, == read but check execute permission
- pub const TRUNC = 16; // or'ed in (except for exec), truncate file first
- pub const CEXEC = 32; // or'ed in (per file descriptor), close on exec
- pub const RCLOSE = 64; // or'ed in, remove on close
- pub const EXCL = 0x1000; // or'ed in, exclusive create
+
+pub const AccessMode = enum(u2) {
+ RDONLY,
+ WRONLY,
+ RDWR,
+ EXEC,
+};
+
+pub const O = packed struct(u32) {
+ access: AccessMode,
+ _2: u2 = 0,
+ TRUNC: bool = false,
+ CEXEC: bool = false,
+ RCLOSE: bool = false,
+ _7: u5 = 0,
+ EXCL: bool = false,
+ _: u19 = 0,
};
pub const ExecData = struct {
diff --git a/lib/std/os/test.zig b/lib/std/os/test.zig
index 3a170d6aec..6794e9ec88 100644
--- a/lib/std/os/test.zig
+++ b/lib/std/os/test.zig
@@ -87,6 +87,7 @@ test "chdir smoke test" {
test "open smoke test" {
if (native_os == .wasi) return error.SkipZigTest;
+ if (native_os == .windows) return error.SkipZigTest;
// TODO verify file attributes using `fstat`
@@ -109,21 +110,21 @@ test "open smoke test" {
// Create some file using `open`.
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_file" });
- fd = try os.open(file_path, os.O.RDWR | os.O.CREAT | os.O.EXCL, mode);
+ fd = try os.open(file_path, .{ .ACCMODE = .RDWR, .CREAT = true, .EXCL = true }, mode);
os.close(fd);
// Try this again with the same flags. This op should fail with error.PathAlreadyExists.
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_file" });
- try expectError(error.PathAlreadyExists, os.open(file_path, os.O.RDWR | os.O.CREAT | os.O.EXCL, mode));
+ try expectError(error.PathAlreadyExists, os.open(file_path, .{ .ACCMODE = .RDWR, .CREAT = true, .EXCL = true }, mode));
- // Try opening without `O.EXCL` flag.
+ // Try opening without `EXCL` flag.
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_file" });
- fd = try os.open(file_path, os.O.RDWR | os.O.CREAT, mode);
+ fd = try os.open(file_path, .{ .ACCMODE = .RDWR, .CREAT = true }, mode);
os.close(fd);
// Try opening as a directory which should fail.
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_file" });
- try expectError(error.NotDir, os.open(file_path, os.O.RDWR | os.O.DIRECTORY, mode));
+ try expectError(error.NotDir, os.open(file_path, .{ .ACCMODE = .RDWR, .DIRECTORY = true }, mode));
// Create some directory
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_dir" });
@@ -131,16 +132,17 @@ test "open smoke test" {
// Open dir using `open`
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_dir" });
- fd = try os.open(file_path, os.O.RDONLY | os.O.DIRECTORY, mode);
+ fd = try os.open(file_path, .{ .ACCMODE = .RDONLY, .DIRECTORY = true }, mode);
os.close(fd);
// Try opening as file which should fail.
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_dir" });
- try expectError(error.IsDir, os.open(file_path, os.O.RDWR, mode));
+ try expectError(error.IsDir, os.open(file_path, .{ .ACCMODE = .RDWR }, mode));
}
test "openat smoke test" {
if (native_os == .wasi and builtin.link_libc) return error.SkipZigTest;
+ if (native_os == .windows) return error.SkipZigTest;
// TODO verify file attributes using `fstatat`
@@ -151,28 +153,47 @@ test "openat smoke test" {
const mode: os.mode_t = if (native_os == .windows) 0 else 0o666;
// Create some file using `openat`.
- fd = try os.openat(tmp.dir.fd, "some_file", os.O.RDWR | os.O.CREAT | os.O.EXCL, mode);
+ fd = try os.openat(tmp.dir.fd, "some_file", os.CommonOpenFlags.lower(.{
+ .ACCMODE = .RDWR,
+ .CREAT = true,
+ .EXCL = true,
+ }), mode);
os.close(fd);
// Try this again with the same flags. This op should fail with error.PathAlreadyExists.
- try expectError(error.PathAlreadyExists, os.openat(tmp.dir.fd, "some_file", os.O.RDWR | os.O.CREAT | os.O.EXCL, mode));
-
- // Try opening without `O.EXCL` flag.
- fd = try os.openat(tmp.dir.fd, "some_file", os.O.RDWR | os.O.CREAT, mode);
+ try expectError(error.PathAlreadyExists, os.openat(tmp.dir.fd, "some_file", os.CommonOpenFlags.lower(.{
+ .ACCMODE = .RDWR,
+ .CREAT = true,
+ .EXCL = true,
+ }), mode));
+
+ // Try opening without `EXCL` flag.
+ fd = try os.openat(tmp.dir.fd, "some_file", os.CommonOpenFlags.lower(.{
+ .ACCMODE = .RDWR,
+ .CREAT = true,
+ }), mode);
os.close(fd);
// Try opening as a directory which should fail.
- try expectError(error.NotDir, os.openat(tmp.dir.fd, "some_file", os.O.RDWR | os.O.DIRECTORY, mode));
+ try expectError(error.NotDir, os.openat(tmp.dir.fd, "some_file", os.CommonOpenFlags.lower(.{
+ .ACCMODE = .RDWR,
+ .DIRECTORY = true,
+ }), mode));
// Create some directory
try os.mkdirat(tmp.dir.fd, "some_dir", mode);
// Open dir using `open`
- fd = try os.openat(tmp.dir.fd, "some_dir", os.O.RDONLY | os.O.DIRECTORY, mode);
+ fd = try os.openat(tmp.dir.fd, "some_dir", os.CommonOpenFlags.lower(.{
+ .ACCMODE = .RDONLY,
+ .DIRECTORY = true,
+ }), mode);
os.close(fd);
// Try opening as file which should fail.
- try expectError(error.IsDir, os.openat(tmp.dir.fd, "some_dir", os.O.RDWR, mode));
+ try expectError(error.IsDir, os.openat(tmp.dir.fd, "some_dir", os.CommonOpenFlags.lower(.{
+ .ACCMODE = .RDWR,
+ }), mode));
}
test "symlink with relative paths" {
@@ -688,7 +709,7 @@ test "fcntl" {
tmp.dir.deleteFile(test_out_file) catch {};
}
- // Note: The test assumes createFile opens the file with O.CLOEXEC
+ // Note: The test assumes createFile opens the file with CLOEXEC
{
const flags = try os.fcntl(file.handle, os.F.GETFD, 0);
try expect((flags & os.FD_CLOEXEC) != 0);
@@ -752,11 +773,6 @@ test "fsync" {
}
test "getrlimit and setrlimit" {
- if (builtin.target.os.tag == .macos) {
- // https://github.com/ziglang/zig/issues/18395
- return error.SkipZigTest;
- }
-
if (!@hasDecl(os.system, "rlimit")) {
return error.SkipZigTest;
}
@@ -765,6 +781,13 @@ test "getrlimit and setrlimit" {
const resource = @as(os.rlimit_resource, @enumFromInt(field.value));
const limit = try os.getrlimit(resource);
+ // XNU kernel does not support RLIMIT_STACK if a custom stack is active,
+ // which looks to always be the case. EINVAL is returned.
+ // See https://github.com/apple-oss-distributions/xnu/blob/5e3eaea39dcf651e66cb99ba7d70e32cc4a99587/bsd/kern/kern_resource.c#L1173
+ if (builtin.os.tag.isDarwin() and resource == .STACK) {
+ continue;
+ }
+
// On 32 bit MIPS musl includes a fix which changes limits greater than -1UL/2 to RLIM_INFINITY.
// See http://git.musl-libc.org/cgit/musl/commit/src/misc/getrlimit.c?id=8258014fd1e34e942a549c88c7e022a00445c352
//
@@ -796,7 +819,7 @@ test "shutdown socket" {
error.SocketNotConnected => {},
else => |e| return e,
};
- os.closeSocket(sock);
+ std.net.Stream.close(.{ .handle = sock });
}
test "sigaction" {
@@ -987,6 +1010,7 @@ test "POSIX file locking with fcntl" {
test "rename smoke test" {
if (native_os == .wasi) return error.SkipZigTest;
+ if (native_os == .windows) return error.SkipZigTest;
var tmp = tmpDir(.{});
defer tmp.cleanup();
@@ -1007,7 +1031,7 @@ test "rename smoke test" {
// Create some file using `open`.
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_file" });
- fd = try os.open(file_path, os.O.RDWR | os.O.CREAT | os.O.EXCL, mode);
+ fd = try os.open(file_path, .{ .ACCMODE = .RDWR, .CREAT = true, .EXCL = true }, mode);
os.close(fd);
// Rename the file
@@ -1016,12 +1040,12 @@ test "rename smoke test" {
// Try opening renamed file
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_other_file" });
- fd = try os.open(file_path, os.O.RDWR, mode);
+ fd = try os.open(file_path, .{ .ACCMODE = .RDWR }, mode);
os.close(fd);
// Try opening original file - should fail with error.FileNotFound
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_file" });
- try expectError(error.FileNotFound, os.open(file_path, os.O.RDWR, mode));
+ try expectError(error.FileNotFound, os.open(file_path, .{ .ACCMODE = .RDWR }, mode));
// Create some directory
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_dir" });
@@ -1033,16 +1057,17 @@ test "rename smoke test" {
// Try opening renamed directory
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_other_dir" });
- fd = try os.open(file_path, os.O.RDONLY | os.O.DIRECTORY, mode);
+ fd = try os.open(file_path, .{ .ACCMODE = .RDONLY, .DIRECTORY = true }, mode);
os.close(fd);
// Try opening original directory - should fail with error.FileNotFound
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_dir" });
- try expectError(error.FileNotFound, os.open(file_path, os.O.RDONLY | os.O.DIRECTORY, mode));
+ try expectError(error.FileNotFound, os.open(file_path, .{ .ACCMODE = .RDONLY, .DIRECTORY = true }, mode));
}
test "access smoke test" {
if (native_os == .wasi) return error.SkipZigTest;
+ if (native_os == .windows) return error.SkipZigTest;
var tmp = tmpDir(.{});
defer tmp.cleanup();
@@ -1063,7 +1088,7 @@ test "access smoke test" {
// Create some file using `open`.
file_path = try fs.path.join(allocator, &[_][]const u8{ base_path, "some_file" });
- fd = try os.open(file_path, os.O.RDWR | os.O.CREAT | os.O.EXCL, mode);
+ fd = try os.open(file_path, .{ .ACCMODE = .RDWR, .CREAT = true, .EXCL = true }, mode);
os.close(fd);
// Try to access() the file
@@ -1088,16 +1113,15 @@ test "access smoke test" {
}
test "timerfd" {
- if (native_os != .linux)
- return error.SkipZigTest;
+ if (native_os != .linux) return error.SkipZigTest;
const linux = os.linux;
- const tfd = try os.timerfd_create(linux.CLOCK.MONOTONIC, linux.TFD.CLOEXEC);
+ const tfd = try os.timerfd_create(linux.CLOCK.MONOTONIC, .{ .CLOEXEC = true });
defer os.close(tfd);
// Fire event 10_000_000ns = 10ms after the os.timerfd_settime call.
var sit: linux.itimerspec = .{ .it_interval = .{ .tv_sec = 0, .tv_nsec = 0 }, .it_value = .{ .tv_sec = 0, .tv_nsec = 10 * (1000 * 1000) } };
- try os.timerfd_settime(tfd, 0, &sit, null);
+ try os.timerfd_settime(tfd, .{}, &sit, null);
var fds: [1]os.pollfd = .{.{ .fd = tfd, .events = os.linux.POLL.IN, .revents = 0 }};
try expectEqual(@as(usize, 1), try os.poll(&fds, -1)); // -1 => infinite waiting
@@ -1232,7 +1256,7 @@ test "fchmodat smoke test" {
const fd = try os.openat(
tmp.dir.fd,
"regfile",
- os.O.WRONLY | os.O.CREAT | os.O.EXCL | os.O.TRUNC,
+ .{ .ACCMODE = .WRONLY, .CREAT = true, .EXCL = true, .TRUNC = true },
0o644,
);
os.close(fd);
diff --git a/lib/std/os/wasi.zig b/lib/std/os/wasi.zig
index e286c5b947..016ce38a9f 100644
--- a/lib/std/os/wasi.zig
+++ b/lib/std/os/wasi.zig
@@ -1,6 +1,7 @@
-// wasi_snapshot_preview1 spec available (in witx format) here:
-// * typenames -- https://github.com/WebAssembly/WASI/blob/main/legacy/preview1/witx/typenames.witx
-// * module -- https://github.com/WebAssembly/WASI/blob/main/legacy/preview1/witx/wasi_snapshot_preview1.witx
+//! wasi_snapshot_preview1 spec available (in witx format) here:
+//! * typenames -- https://github.com/WebAssembly/WASI/blob/main/legacy/preview1/witx/typenames.witx
+//! * module -- https://github.com/WebAssembly/WASI/blob/main/legacy/preview1/witx/wasi_snapshot_preview1.witx
+//! Note that libc API does *not* go in this file. wasi libc API goes into std/c/wasi.zig instead.
const builtin = @import("builtin");
const std = @import("std");
const assert = std.debug.assert;
@@ -16,11 +17,6 @@ comptime {
// assert(@alignOf(u64) == 8);
}
-pub const F_OK = 0;
-pub const X_OK = 1;
-pub const W_OK = 2;
-pub const R_OK = 4;
-
pub const iovec_t = std.os.iovec;
pub const ciovec_t = std.os.iovec_const;
@@ -82,106 +78,22 @@ pub extern "wasi_snapshot_preview1" fn sock_recv(sock: fd_t, ri_data: [*]iovec_t
pub extern "wasi_snapshot_preview1" fn sock_send(sock: fd_t, si_data: [*]const ciovec_t, si_data_len: usize, si_flags: siflags_t, so_datalen: *usize) errno_t;
pub extern "wasi_snapshot_preview1" fn sock_shutdown(sock: fd_t, how: sdflags_t) errno_t;
-/// Get the errno from a syscall return value, or 0 for no error.
-pub fn getErrno(r: errno_t) errno_t {
- return r;
-}
-
-pub const STDIN_FILENO = 0;
-pub const STDOUT_FILENO = 1;
-pub const STDERR_FILENO = 2;
-
-pub const mode_t = u32;
-
-pub const time_t = i64; // match https://github.com/CraneStation/wasi-libc
-
-pub const timespec = extern struct {
- tv_sec: time_t,
- tv_nsec: isize,
-
- pub fn fromTimestamp(tm: timestamp_t) timespec {
- const tv_sec: timestamp_t = tm / 1_000_000_000;
- const tv_nsec = tm - tv_sec * 1_000_000_000;
- return timespec{
- .tv_sec = @as(time_t, @intCast(tv_sec)),
- .tv_nsec = @as(isize, @intCast(tv_nsec)),
- };
- }
-
- pub fn toTimestamp(ts: timespec) timestamp_t {
- const tm = @as(timestamp_t, @intCast(ts.tv_sec * 1_000_000_000)) + @as(timestamp_t, @intCast(ts.tv_nsec));
- return tm;
- }
-};
-
-pub const Stat = struct {
- dev: device_t,
- ino: inode_t,
- mode: mode_t,
- filetype: filetype_t,
- nlink: linkcount_t,
- size: filesize_t,
- atim: timespec,
- mtim: timespec,
- ctim: timespec,
-
- const Self = @This();
-
- pub fn fromFilestat(stat: filestat_t) Self {
- return Self{
- .dev = stat.dev,
- .ino = stat.ino,
- .mode = 0,
- .filetype = stat.filetype,
- .nlink = stat.nlink,
- .size = stat.size,
- .atim = stat.atime(),
- .mtim = stat.mtime(),
- .ctim = stat.ctime(),
- };
- }
-
- pub fn atime(self: Self) timespec {
- return self.atim;
- }
-
- pub fn mtime(self: Self) timespec {
- return self.mtim;
- }
-
- pub fn ctime(self: Self) timespec {
- return self.ctim;
- }
-};
-
-pub const IOV_MAX = 1024;
-
-pub const AT = struct {
- pub const REMOVEDIR: u32 = 0x4;
- /// When linking libc, we follow their convention and use -2 for current working directory.
- /// However, without libc, Zig does a different convention: it assumes the
- /// current working directory is the first preopen. This behavior can be
- /// overridden with a public function called `wasi_cwd` in the root source
- /// file.
- pub const FDCWD: fd_t = if (builtin.link_libc) -2 else 3;
-};
-
// As defined in the wasi_snapshot_preview1 spec file:
// https://github.com/WebAssembly/WASI/blob/master/phases/snapshot/witx/typenames.witx
-pub const advice_t = u8;
-pub const ADVICE_NORMAL: advice_t = 0;
-pub const ADVICE_SEQUENTIAL: advice_t = 1;
-pub const ADVICE_RANDOM: advice_t = 2;
-pub const ADVICE_WILLNEED: advice_t = 3;
-pub const ADVICE_DONTNEED: advice_t = 4;
-pub const ADVICE_NOREUSE: advice_t = 5;
-
-pub const clockid_t = u32;
-pub const CLOCK = struct {
- pub const REALTIME: clockid_t = 0;
- pub const MONOTONIC: clockid_t = 1;
- pub const PROCESS_CPUTIME_ID: clockid_t = 2;
- pub const THREAD_CPUTIME_ID: clockid_t = 3;
+pub const advice_t = enum(u8) {
+ NORMAL = 0,
+ SEQUENTIAL = 1,
+ RANDOM = 2,
+ WILLNEED = 3,
+ DONTNEED = 4,
+ NOREUSE = 5,
+};
+
+pub const clockid_t = enum(u32) {
+ REALTIME = 0,
+ MONOTONIC = 1,
+ PROCESS_CPUTIME_ID = 2,
+ THREAD_CPUTIME_ID = 3,
};
pub const device_t = u64;
@@ -192,10 +104,10 @@ pub const DIRCOOKIE_START: dircookie_t = 0;
pub const dirnamlen_t = u32;
pub const dirent_t = extern struct {
- d_next: dircookie_t,
- d_ino: inode_t,
- d_namlen: dirnamlen_t,
- d_type: filetype_t,
+ next: dircookie_t,
+ ino: inode_t,
+ namlen: dirnamlen_t,
+ type: filetype_t,
};
pub const errno_t = enum(u16) {
@@ -280,7 +192,6 @@ pub const errno_t = enum(u16) {
NOTCAPABLE = 76,
_,
};
-pub const E = errno_t;
pub const event_t = extern struct {
userdata: userdata_t,
@@ -297,22 +208,23 @@ pub const eventfdreadwrite_t = extern struct {
pub const eventrwflags_t = u16;
pub const EVENT_FD_READWRITE_HANGUP: eventrwflags_t = 0x0001;
-pub const eventtype_t = u8;
-pub const EVENTTYPE_CLOCK: eventtype_t = 0;
-pub const EVENTTYPE_FD_READ: eventtype_t = 1;
-pub const EVENTTYPE_FD_WRITE: eventtype_t = 2;
+pub const eventtype_t = enum(u8) {
+ CLOCK = 0,
+ FD_READ = 1,
+ FD_WRITE = 2,
+};
pub const exitcode_t = u32;
pub const fd_t = i32;
-pub const fdflags_t = u16;
-pub const FDFLAG = struct {
- pub const APPEND: fdflags_t = 0x0001;
- pub const DSYNC: fdflags_t = 0x0002;
- pub const NONBLOCK: fdflags_t = 0x0004;
- pub const RSYNC: fdflags_t = 0x0008;
- pub const SYNC: fdflags_t = 0x0010;
+pub const fdflags_t = packed struct(u16) {
+ APPEND: bool = false,
+ DSYNC: bool = false,
+ NONBLOCK: bool = false,
+ RSYNC: bool = false,
+ SYNC: bool = false,
+ _: u11 = 0,
};
pub const fdstat_t = extern struct {
@@ -335,21 +247,8 @@ pub const filestat_t = extern struct {
atim: timestamp_t,
mtim: timestamp_t,
ctim: timestamp_t,
-
- pub fn atime(self: filestat_t) timespec {
- return timespec.fromTimestamp(self.atim);
- }
-
- pub fn mtime(self: filestat_t) timespec {
- return timespec.fromTimestamp(self.mtim);
- }
-
- pub fn ctime(self: filestat_t) timespec {
- return timespec.fromTimestamp(self.ctim);
- }
};
-/// Also known as `FILETYPE`.
pub const filetype_t = enum(u8) {
UNKNOWN,
BLOCK_DEVICE,
@@ -362,26 +261,29 @@ pub const filetype_t = enum(u8) {
_,
};
-pub const fstflags_t = u16;
-pub const FILESTAT_SET_ATIM: fstflags_t = 0x0001;
-pub const FILESTAT_SET_ATIM_NOW: fstflags_t = 0x0002;
-pub const FILESTAT_SET_MTIM: fstflags_t = 0x0004;
-pub const FILESTAT_SET_MTIM_NOW: fstflags_t = 0x0008;
+pub const fstflags_t = packed struct(u16) {
+ ATIM: bool = false,
+ ATIM_NOW: bool = false,
+ MTIM: bool = false,
+ MTIM_NOW: bool = false,
+ _: u12 = 0,
+};
pub const inode_t = u64;
-pub const ino_t = inode_t;
pub const linkcount_t = u64;
-pub const lookupflags_t = u32;
-pub const LOOKUP_SYMLINK_FOLLOW: lookupflags_t = 0x00000001;
+pub const lookupflags_t = packed struct(u32) {
+ SYMLINK_FOLLOW: bool = false,
+ _: u31 = 0,
+};
-pub const oflags_t = u16;
-pub const O = struct {
- pub const CREAT: oflags_t = 0x0001;
- pub const DIRECTORY: oflags_t = 0x0002;
- pub const EXCL: oflags_t = 0x0004;
- pub const TRUNC: oflags_t = 0x0008;
+pub const oflags_t = packed struct(u16) {
+ CREAT: bool = false,
+ DIRECTORY: bool = false,
+ EXCL: bool = false,
+ TRUNC: bool = false,
+ _: u12 = 0,
};
pub const preopentype_t = u8;
@@ -410,110 +312,81 @@ pub const SOCK = struct {
pub const RECV_DATA_TRUNCATED: roflags_t = 0x0001;
};
-pub const rights_t = u64;
-pub const RIGHT = struct {
- pub const FD_DATASYNC: rights_t = 0x0000000000000001;
- pub const FD_READ: rights_t = 0x0000000000000002;
- pub const FD_SEEK: rights_t = 0x0000000000000004;
- pub const FD_FDSTAT_SET_FLAGS: rights_t = 0x0000000000000008;
- pub const FD_SYNC: rights_t = 0x0000000000000010;
- pub const FD_TELL: rights_t = 0x0000000000000020;
- pub const FD_WRITE: rights_t = 0x0000000000000040;
- pub const FD_ADVISE: rights_t = 0x0000000000000080;
- pub const FD_ALLOCATE: rights_t = 0x0000000000000100;
- pub const PATH_CREATE_DIRECTORY: rights_t = 0x0000000000000200;
- pub const PATH_CREATE_FILE: rights_t = 0x0000000000000400;
- pub const PATH_LINK_SOURCE: rights_t = 0x0000000000000800;
- pub const PATH_LINK_TARGET: rights_t = 0x0000000000001000;
- pub const PATH_OPEN: rights_t = 0x0000000000002000;
- pub const FD_READDIR: rights_t = 0x0000000000004000;
- pub const PATH_READLINK: rights_t = 0x0000000000008000;
- pub const PATH_RENAME_SOURCE: rights_t = 0x0000000000010000;
- pub const PATH_RENAME_TARGET: rights_t = 0x0000000000020000;
- pub const PATH_FILESTAT_GET: rights_t = 0x0000000000040000;
- pub const PATH_FILESTAT_SET_SIZE: rights_t = 0x0000000000080000;
- pub const PATH_FILESTAT_SET_TIMES: rights_t = 0x0000000000100000;
- pub const FD_FILESTAT_GET: rights_t = 0x0000000000200000;
- pub const FD_FILESTAT_SET_SIZE: rights_t = 0x0000000000400000;
- pub const FD_FILESTAT_SET_TIMES: rights_t = 0x0000000000800000;
- pub const PATH_SYMLINK: rights_t = 0x0000000001000000;
- pub const PATH_REMOVE_DIRECTORY: rights_t = 0x0000000002000000;
- pub const PATH_UNLINK_FILE: rights_t = 0x0000000004000000;
- pub const POLL_FD_READWRITE: rights_t = 0x0000000008000000;
- pub const SOCK_SHUTDOWN: rights_t = 0x0000000010000000;
- pub const SOCK_ACCEPT: rights_t = 0x0000000020000000;
- pub const ALL: rights_t = FD_DATASYNC |
- FD_READ |
- FD_SEEK |
- FD_FDSTAT_SET_FLAGS |
- FD_SYNC |
- FD_TELL |
- FD_WRITE |
- FD_ADVISE |
- FD_ALLOCATE |
- PATH_CREATE_DIRECTORY |
- PATH_CREATE_FILE |
- PATH_LINK_SOURCE |
- PATH_LINK_TARGET |
- PATH_OPEN |
- FD_READDIR |
- PATH_READLINK |
- PATH_RENAME_SOURCE |
- PATH_RENAME_TARGET |
- PATH_FILESTAT_GET |
- PATH_FILESTAT_SET_SIZE |
- PATH_FILESTAT_SET_TIMES |
- FD_FILESTAT_GET |
- FD_FILESTAT_SET_SIZE |
- FD_FILESTAT_SET_TIMES |
- PATH_SYMLINK |
- PATH_REMOVE_DIRECTORY |
- PATH_UNLINK_FILE |
- POLL_FD_READWRITE |
- SOCK_SHUTDOWN |
- SOCK_ACCEPT;
+pub const rights_t = packed struct(u64) {
+ FD_DATASYNC: bool = false,
+ FD_READ: bool = false,
+ FD_SEEK: bool = false,
+ FD_FDSTAT_SET_FLAGS: bool = false,
+ FD_SYNC: bool = false,
+ FD_TELL: bool = false,
+ FD_WRITE: bool = false,
+ FD_ADVISE: bool = false,
+ FD_ALLOCATE: bool = false,
+ PATH_CREATE_DIRECTORY: bool = false,
+ PATH_CREATE_FILE: bool = false,
+ PATH_LINK_SOURCE: bool = false,
+ PATH_LINK_TARGET: bool = false,
+ PATH_OPEN: bool = false,
+ FD_READDIR: bool = false,
+ PATH_READLINK: bool = false,
+ PATH_RENAME_SOURCE: bool = false,
+ PATH_RENAME_TARGET: bool = false,
+ PATH_FILESTAT_GET: bool = false,
+ PATH_FILESTAT_SET_SIZE: bool = false,
+ PATH_FILESTAT_SET_TIMES: bool = false,
+ FD_FILESTAT_GET: bool = false,
+ FD_FILESTAT_SET_SIZE: bool = false,
+ FD_FILESTAT_SET_TIMES: bool = false,
+ PATH_SYMLINK: bool = false,
+ PATH_REMOVE_DIRECTORY: bool = false,
+ PATH_UNLINK_FILE: bool = false,
+ POLL_FD_READWRITE: bool = false,
+ SOCK_SHUTDOWN: bool = false,
+ SOCK_ACCEPT: bool = false,
+ _: u34 = 0,
};
-pub const sdflags_t = u8;
-pub const SHUT = struct {
- pub const RD: sdflags_t = 0x01;
- pub const WR: sdflags_t = 0x02;
+pub const sdflags_t = packed struct(u8) {
+ RD: bool = false,
+ WR: bool = false,
+ _: u6 = 0,
};
pub const siflags_t = u16;
-pub const signal_t = u8;
-pub const SIGNONE: signal_t = 0;
-pub const SIGHUP: signal_t = 1;
-pub const SIGINT: signal_t = 2;
-pub const SIGQUIT: signal_t = 3;
-pub const SIGILL: signal_t = 4;
-pub const SIGTRAP: signal_t = 5;
-pub const SIGABRT: signal_t = 6;
-pub const SIGBUS: signal_t = 7;
-pub const SIGFPE: signal_t = 8;
-pub const SIGKILL: signal_t = 9;
-pub const SIGUSR1: signal_t = 10;
-pub const SIGSEGV: signal_t = 11;
-pub const SIGUSR2: signal_t = 12;
-pub const SIGPIPE: signal_t = 13;
-pub const SIGALRM: signal_t = 14;
-pub const SIGTERM: signal_t = 15;
-pub const SIGCHLD: signal_t = 16;
-pub const SIGCONT: signal_t = 17;
-pub const SIGSTOP: signal_t = 18;
-pub const SIGTSTP: signal_t = 19;
-pub const SIGTTIN: signal_t = 20;
-pub const SIGTTOU: signal_t = 21;
-pub const SIGURG: signal_t = 22;
-pub const SIGXCPU: signal_t = 23;
-pub const SIGXFSZ: signal_t = 24;
-pub const SIGVTALRM: signal_t = 25;
-pub const SIGPROF: signal_t = 26;
-pub const SIGWINCH: signal_t = 27;
-pub const SIGPOLL: signal_t = 28;
-pub const SIGPWR: signal_t = 29;
-pub const SIGSYS: signal_t = 30;
+pub const signal_t = enum(u8) {
+ NONE = 0,
+ HUP = 1,
+ INT = 2,
+ QUIT = 3,
+ ILL = 4,
+ TRAP = 5,
+ ABRT = 6,
+ BUS = 7,
+ FPE = 8,
+ KILL = 9,
+ USR1 = 10,
+ SEGV = 11,
+ USR2 = 12,
+ PIPE = 13,
+ ALRM = 14,
+ TERM = 15,
+ CHLD = 16,
+ CONT = 17,
+ STOP = 18,
+ TSTP = 19,
+ TTIN = 20,
+ TTOU = 21,
+ URG = 22,
+ XCPU = 23,
+ XFSZ = 24,
+ VTALRM = 25,
+ PROF = 26,
+ WINCH = 27,
+ POLL = 28,
+ PWR = 29,
+ SYS = 30,
+};
pub const subclockflags_t = u16;
pub const SUBSCRIPTION_CLOCK_ABSTIME: subclockflags_t = 0x0001;
@@ -545,29 +418,9 @@ pub const subscription_u_u_t = extern union {
fd_write: subscription_fd_readwrite_t,
};
+/// Nanoseconds.
pub const timestamp_t = u64;
pub const userdata_t = u64;
-/// Also known as `WHENCE`.
pub const whence_t = enum(u8) { SET, CUR, END };
-
-pub const S = struct {
- pub const IEXEC = @compileError("TODO audit this");
- pub const IFBLK = 0x6000;
- pub const IFCHR = 0x2000;
- pub const IFDIR = 0x4000;
- pub const IFIFO = 0xc000;
- pub const IFLNK = 0xa000;
- pub const IFMT = IFBLK | IFCHR | IFDIR | IFIFO | IFLNK | IFREG | IFSOCK;
- pub const IFREG = 0x8000;
- // There's no concept of UNIX domain socket but we define this value here in order to line with other OSes.
- pub const IFSOCK = 0x1;
-};
-
-pub const LOCK = struct {
- pub const SH = 0x1;
- pub const EX = 0x2;
- pub const NB = 0x4;
- pub const UN = 0x8;
-};
diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig
index d48261a97a..deb903b283 100644
--- a/lib/std/os/windows.zig
+++ b/lib/std/os/windows.zig
@@ -1,8 +1,8 @@
//! This file contains thin wrappers around Windows-specific APIs, with these
//! specific goals in mind:
//! * Convert "errno"-style error codes into Zig errors.
-//! * When null-terminated or UTF16LE byte buffers are required, provide APIs which accept
-//! slices as well as APIs which accept null-terminated UTF16LE byte buffers.
+//! * When null-terminated or WTF16LE byte buffers are required, provide APIs which accept
+//! slices as well as APIs which accept null-terminated WTF16LE byte buffers.
const builtin = @import("builtin");
const std = @import("../std.zig");
@@ -41,6 +41,7 @@ pub const OpenError = error{
NameTooLong,
WouldBlock,
NetworkNotFound,
+ AntivirusInterference,
};
pub const OpenFileOptions = struct {
@@ -49,7 +50,6 @@ pub const OpenFileOptions = struct {
sa: ?*SECURITY_ATTRIBUTES = null,
share_access: ULONG = FILE_SHARE_WRITE | FILE_SHARE_READ | FILE_SHARE_DELETE,
creation: ULONG,
- io_mode: std.io.ModeOverride,
/// If true, tries to open path as a directory.
/// Defaults to false.
filter: Filter = .file_only,
@@ -95,7 +95,7 @@ pub fn OpenFile(sub_path_w: []const u16, options: OpenFileOptions) OpenError!HAN
.SecurityQualityOfService = null,
};
var io: IO_STATUS_BLOCK = undefined;
- const blocking_flag: ULONG = if (options.io_mode == .blocking) FILE_SYNCHRONOUS_IO_NONALERT else 0;
+ const blocking_flag: ULONG = FILE_SYNCHRONOUS_IO_NONALERT;
const file_or_dir_flag: ULONG = switch (options.filter) {
.file_only => FILE_NON_DIRECTORY_FILE,
.dir_only => FILE_DIRECTORY_FILE,
@@ -119,12 +119,7 @@ pub fn OpenFile(sub_path_w: []const u16, options: OpenFileOptions) OpenError!HAN
0,
);
switch (rc) {
- .SUCCESS => {
- if (std.io.is_async and options.io_mode == .evented) {
- _ = CreateIoCompletionPort(result, std.event.Loop.instance.?.os_data.io_port, undefined, undefined) catch undefined;
- }
- return result;
- },
+ .SUCCESS => return result,
.OBJECT_NAME_INVALID => unreachable,
.OBJECT_NAME_NOT_FOUND => return error.FileNotFound,
.OBJECT_PATH_NOT_FOUND => return error.FileNotFound,
@@ -151,6 +146,7 @@ pub fn OpenFile(sub_path_w: []const u16, options: OpenFileOptions) OpenError!HAN
std.time.sleep(std.time.ns_per_ms);
continue;
},
+ .VIRUS_INFECTED, .VIRUS_DELETED => return error.AntivirusInterference,
else => return unexpectedStatus(rc),
}
}
@@ -182,7 +178,13 @@ pub fn CreateEventExW(attributes: ?*SECURITY_ATTRIBUTES, nameW: [*:0]const u16,
}
}
-pub const DeviceIoControlError = error{ AccessDenied, Unexpected };
+pub const DeviceIoControlError = error{
+ AccessDenied,
+ /// The volume does not contain a recognized file system. File system
+ /// drivers might not be loaded, or the volume may be corrupt.
+ UnrecognizedVolume,
+ Unexpected,
+};
/// A Zig wrapper around `NtDeviceIoControlFile` and `NtFsControlFile` syscalls.
/// It implements similar behavior to `DeviceIoControl` and is meant to serve
@@ -238,6 +240,7 @@ pub fn DeviceIoControl(
.ACCESS_DENIED => return error.AccessDenied,
.INVALID_DEVICE_REQUEST => return error.AccessDenied, // Not supported by the underlying filesystem
.INVALID_PARAMETER => unreachable,
+ .UNRECOGNIZED_VOLUME => return error.UnrecognizedVolume,
else => return unexpectedStatus(rc),
}
}
@@ -450,88 +453,44 @@ pub fn FindClose(hFindFile: HANDLE) void {
pub const ReadFileError = error{
BrokenPipe,
- NetNameDeleted,
+ /// The specified network name is no longer available.
+ ConnectionResetByPeer,
OperationAborted,
Unexpected,
};
/// If buffer's length exceeds what a Windows DWORD integer can hold, it will be broken into
/// multiple non-atomic reads.
-pub fn ReadFile(in_hFile: HANDLE, buffer: []u8, offset: ?u64, io_mode: std.io.ModeOverride) ReadFileError!usize {
- if (io_mode != .blocking) {
- const loop = std.event.Loop.instance.?;
- // TODO make getting the file position non-blocking
- const off = if (offset) |o| o else try SetFilePointerEx_CURRENT_get(in_hFile);
- var resume_node = std.event.Loop.ResumeNode.Basic{
- .base = .{
- .id = .Basic,
- .handle = @frame(),
- .overlapped = OVERLAPPED{
- .Internal = 0,
- .InternalHigh = 0,
- .DUMMYUNIONNAME = .{
- .DUMMYSTRUCTNAME = .{
- .Offset = @as(u32, @truncate(off)),
- .OffsetHigh = @as(u32, @truncate(off >> 32)),
- },
+pub fn ReadFile(in_hFile: HANDLE, buffer: []u8, offset: ?u64) ReadFileError!usize {
+ while (true) {
+ const want_read_count: DWORD = @min(@as(DWORD, maxInt(DWORD)), buffer.len);
+ var amt_read: DWORD = undefined;
+ var overlapped_data: OVERLAPPED = undefined;
+ const overlapped: ?*OVERLAPPED = if (offset) |off| blk: {
+ overlapped_data = .{
+ .Internal = 0,
+ .InternalHigh = 0,
+ .DUMMYUNIONNAME = .{
+ .DUMMYSTRUCTNAME = .{
+ .Offset = @as(u32, @truncate(off)),
+ .OffsetHigh = @as(u32, @truncate(off >> 32)),
},
- .hEvent = null,
},
- },
- };
- loop.beginOneEvent();
- suspend {
- // TODO handle buffer bigger than DWORD can hold
- _ = kernel32.ReadFile(in_hFile, buffer.ptr, @as(DWORD, @intCast(buffer.len)), null, &resume_node.base.overlapped);
- }
- var bytes_transferred: DWORD = undefined;
- if (kernel32.GetOverlappedResult(in_hFile, &resume_node.base.overlapped, &bytes_transferred, FALSE) == 0) {
+ .hEvent = null,
+ };
+ break :blk &overlapped_data;
+ } else null;
+ if (kernel32.ReadFile(in_hFile, buffer.ptr, want_read_count, &amt_read, overlapped) == 0) {
switch (kernel32.GetLastError()) {
.IO_PENDING => unreachable,
- .OPERATION_ABORTED => return error.OperationAborted,
- .BROKEN_PIPE => return error.BrokenPipe,
- .NETNAME_DELETED => return error.NetNameDeleted,
- .HANDLE_EOF => return @as(usize, bytes_transferred),
+ .OPERATION_ABORTED => continue,
+ .BROKEN_PIPE => return 0,
+ .HANDLE_EOF => return 0,
+ .NETNAME_DELETED => return error.ConnectionResetByPeer,
else => |err| return unexpectedError(err),
}
}
- if (offset == null) {
- // TODO make setting the file position non-blocking
- const new_off = off + bytes_transferred;
- try SetFilePointerEx_CURRENT(in_hFile, @as(i64, @bitCast(new_off)));
- }
- return @as(usize, bytes_transferred);
- } else {
- while (true) {
- const want_read_count: DWORD = @min(@as(DWORD, maxInt(DWORD)), buffer.len);
- var amt_read: DWORD = undefined;
- var overlapped_data: OVERLAPPED = undefined;
- const overlapped: ?*OVERLAPPED = if (offset) |off| blk: {
- overlapped_data = .{
- .Internal = 0,
- .InternalHigh = 0,
- .DUMMYUNIONNAME = .{
- .DUMMYSTRUCTNAME = .{
- .Offset = @as(u32, @truncate(off)),
- .OffsetHigh = @as(u32, @truncate(off >> 32)),
- },
- },
- .hEvent = null,
- };
- break :blk &overlapped_data;
- } else null;
- if (kernel32.ReadFile(in_hFile, buffer.ptr, want_read_count, &amt_read, overlapped) == 0) {
- switch (kernel32.GetLastError()) {
- .IO_PENDING => unreachable,
- .OPERATION_ABORTED => continue,
- .BROKEN_PIPE => return 0,
- .HANDLE_EOF => return 0,
- .NETNAME_DELETED => return error.NetNameDeleted,
- else => |err| return unexpectedError(err),
- }
- }
- return amt_read;
- }
+ return amt_read;
}
}
@@ -543,6 +502,8 @@ pub const WriteFileError = error{
/// The process cannot access the file because another process has locked
/// a portion of the file.
LockViolation,
+ /// The specified network name is no longer available.
+ ConnectionResetByPeer,
Unexpected,
};
@@ -550,90 +511,43 @@ pub fn WriteFile(
handle: HANDLE,
bytes: []const u8,
offset: ?u64,
- io_mode: std.io.ModeOverride,
) WriteFileError!usize {
- if (std.event.Loop.instance != null and io_mode != .blocking) {
- const loop = std.event.Loop.instance.?;
- // TODO make getting the file position non-blocking
- const off = if (offset) |o| o else try SetFilePointerEx_CURRENT_get(handle);
- var resume_node = std.event.Loop.ResumeNode.Basic{
- .base = .{
- .id = .Basic,
- .handle = @frame(),
- .overlapped = OVERLAPPED{
- .Internal = 0,
- .InternalHigh = 0,
- .DUMMYUNIONNAME = .{
- .DUMMYSTRUCTNAME = .{
- .Offset = @as(u32, @truncate(off)),
- .OffsetHigh = @as(u32, @truncate(off >> 32)),
- },
- },
- .hEvent = null,
+ var bytes_written: DWORD = undefined;
+ var overlapped_data: OVERLAPPED = undefined;
+ const overlapped: ?*OVERLAPPED = if (offset) |off| blk: {
+ overlapped_data = .{
+ .Internal = 0,
+ .InternalHigh = 0,
+ .DUMMYUNIONNAME = .{
+ .DUMMYSTRUCTNAME = .{
+ .Offset = @truncate(off),
+ .OffsetHigh = @truncate(off >> 32),
},
},
+ .hEvent = null,
};
- loop.beginOneEvent();
- suspend {
- const adjusted_len = math.cast(DWORD, bytes.len) orelse maxInt(DWORD);
- _ = kernel32.WriteFile(handle, bytes.ptr, adjusted_len, null, &resume_node.base.overlapped);
- }
- var bytes_transferred: DWORD = undefined;
- if (kernel32.GetOverlappedResult(handle, &resume_node.base.overlapped, &bytes_transferred, FALSE) == 0) {
- switch (kernel32.GetLastError()) {
- .IO_PENDING => unreachable,
- .INVALID_USER_BUFFER => return error.SystemResources,
- .NOT_ENOUGH_MEMORY => return error.SystemResources,
- .OPERATION_ABORTED => return error.OperationAborted,
- .NOT_ENOUGH_QUOTA => return error.SystemResources,
- .BROKEN_PIPE => return error.BrokenPipe,
- else => |err| return unexpectedError(err),
- }
- }
- if (offset == null) {
- // TODO make setting the file position non-blocking
- const new_off = off + bytes_transferred;
- try SetFilePointerEx_CURRENT(handle, @as(i64, @bitCast(new_off)));
- }
- return bytes_transferred;
- } else {
- var bytes_written: DWORD = undefined;
- var overlapped_data: OVERLAPPED = undefined;
- const overlapped: ?*OVERLAPPED = if (offset) |off| blk: {
- overlapped_data = .{
- .Internal = 0,
- .InternalHigh = 0,
- .DUMMYUNIONNAME = .{
- .DUMMYSTRUCTNAME = .{
- .Offset = @as(u32, @truncate(off)),
- .OffsetHigh = @as(u32, @truncate(off >> 32)),
- },
- },
- .hEvent = null,
- };
- break :blk &overlapped_data;
- } else null;
- const adjusted_len = math.cast(u32, bytes.len) orelse maxInt(u32);
- if (kernel32.WriteFile(handle, bytes.ptr, adjusted_len, &bytes_written, overlapped) == 0) {
- switch (kernel32.GetLastError()) {
- .INVALID_USER_BUFFER => return error.SystemResources,
- .NOT_ENOUGH_MEMORY => return error.SystemResources,
- .OPERATION_ABORTED => return error.OperationAborted,
- .NOT_ENOUGH_QUOTA => return error.SystemResources,
- .IO_PENDING => unreachable,
- .BROKEN_PIPE => return error.BrokenPipe,
- .INVALID_HANDLE => return error.NotOpenForWriting,
- .LOCK_VIOLATION => return error.LockViolation,
- else => |err| return unexpectedError(err),
- }
+ break :blk &overlapped_data;
+ } else null;
+ const adjusted_len = math.cast(u32, bytes.len) orelse maxInt(u32);
+ if (kernel32.WriteFile(handle, bytes.ptr, adjusted_len, &bytes_written, overlapped) == 0) {
+ switch (kernel32.GetLastError()) {
+ .INVALID_USER_BUFFER => return error.SystemResources,
+ .NOT_ENOUGH_MEMORY => return error.SystemResources,
+ .OPERATION_ABORTED => return error.OperationAborted,
+ .NOT_ENOUGH_QUOTA => return error.SystemResources,
+ .IO_PENDING => unreachable,
+ .BROKEN_PIPE => return error.BrokenPipe,
+ .INVALID_HANDLE => return error.NotOpenForWriting,
+ .LOCK_VIOLATION => return error.LockViolation,
+ .NETNAME_DELETED => return error.ConnectionResetByPeer,
+ else => |err| return unexpectedError(err),
}
- return bytes_written;
}
+ return bytes_written;
}
pub const SetCurrentDirectoryError = error{
NameTooLong,
- InvalidUtf8,
FileNotFound,
NotDir,
AccessDenied,
@@ -672,24 +586,24 @@ pub const GetCurrentDirectoryError = error{
};
/// The result is a slice of `buffer`, indexed from 0.
+/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
pub fn GetCurrentDirectory(buffer: []u8) GetCurrentDirectoryError![]u8 {
- var utf16le_buf: [PATH_MAX_WIDE]u16 = undefined;
- const result = kernel32.GetCurrentDirectoryW(utf16le_buf.len, &utf16le_buf);
+ var wtf16le_buf: [PATH_MAX_WIDE]u16 = undefined;
+ const result = kernel32.GetCurrentDirectoryW(wtf16le_buf.len, &wtf16le_buf);
if (result == 0) {
switch (kernel32.GetLastError()) {
else => |err| return unexpectedError(err),
}
}
- assert(result <= utf16le_buf.len);
- const utf16le_slice = utf16le_buf[0..result];
- // Trust that Windows gives us valid UTF-16LE.
+ assert(result <= wtf16le_buf.len);
+ const wtf16le_slice = wtf16le_buf[0..result];
var end_index: usize = 0;
- var it = std.unicode.Utf16LeIterator.init(utf16le_slice);
- while (it.nextCodepoint() catch unreachable) |codepoint| {
+ var it = std.unicode.Wtf16LeIterator.init(wtf16le_slice);
+ while (it.nextCodepoint()) |codepoint| {
const seq_len = std.unicode.utf8CodepointSequenceLength(codepoint) catch unreachable;
if (end_index + seq_len >= buffer.len)
return error.NameTooLong;
- end_index += std.unicode.utf8Encode(codepoint, buffer[end_index..]) catch unreachable;
+ end_index += std.unicode.wtf8Encode(codepoint, buffer[end_index..]) catch unreachable;
}
return buffer[0..end_index];
}
@@ -702,6 +616,9 @@ pub const CreateSymbolicLinkError = error{
NoDevice,
NetworkNotFound,
BadPathName,
+ /// The volume does not contain a recognized file system. File system
+ /// drivers might not be loaded, or the volume may be corrupt.
+ UnrecognizedVolume,
Unexpected,
};
@@ -732,13 +649,13 @@ pub fn CreateSymbolicLink(
.access_mask = SYNCHRONIZE | GENERIC_READ | GENERIC_WRITE,
.dir = dir,
.creation = FILE_CREATE,
- .io_mode = .blocking,
.filter = if (is_directory) .dir_only else .file_only,
}) catch |err| switch (err) {
error.IsDir => return error.PathAlreadyExists,
- error.NotDir => unreachable,
- error.WouldBlock => unreachable,
- error.PipeBusy => unreachable,
+ error.NotDir => return error.Unexpected,
+ error.WouldBlock => return error.Unexpected,
+ error.PipeBusy => return error.Unexpected,
+ error.AntivirusInterference => return error.Unexpected,
else => |e| return e,
};
defer CloseHandle(symlink_handle);
@@ -784,12 +701,12 @@ pub fn CreateSymbolicLink(
const target_is_absolute = std.fs.path.isAbsoluteWindowsWTF16(final_target_path);
const symlink_data = SYMLINK_DATA{
.ReparseTag = IO_REPARSE_TAG_SYMLINK,
- .ReparseDataLength = @as(u16, @intCast(buf_len - header_len)),
+ .ReparseDataLength = @intCast(buf_len - header_len),
.Reserved = 0,
- .SubstituteNameOffset = @as(u16, @intCast(final_target_path.len * 2)),
- .SubstituteNameLength = @as(u16, @intCast(final_target_path.len * 2)),
+ .SubstituteNameOffset = @intCast(final_target_path.len * 2),
+ .SubstituteNameLength = @intCast(final_target_path.len * 2),
.PrintNameOffset = 0,
- .PrintNameLength = @as(u16, @intCast(final_target_path.len * 2)),
+ .PrintNameLength = @intCast(final_target_path.len * 2),
.Flags = if (!target_is_absolute) SYMLINK_FLAG_RELATIVE else 0,
};
@@ -865,7 +782,8 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin
var reparse_buf: [MAXIMUM_REPARSE_DATA_BUFFER_SIZE]u8 align(@alignOf(REPARSE_DATA_BUFFER)) = undefined;
_ = DeviceIoControl(result_handle, FSCTL_GET_REPARSE_POINT, null, reparse_buf[0..]) catch |err| switch (err) {
- error.AccessDenied => unreachable,
+ error.AccessDenied => return error.Unexpected,
+ error.UnrecognizedVolume => return error.Unexpected,
else => |e| return e,
};
@@ -893,6 +811,8 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin
}
}
+/// Asserts that there is enough space is `out_buffer`.
+/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u8 {
const win32_namespace_path = path: {
if (is_relative) break :path path;
@@ -902,7 +822,7 @@ fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u
};
break :path win32_path.span();
};
- const out_len = std.unicode.utf16leToUtf8(out_buffer, win32_namespace_path) catch unreachable;
+ const out_len = std.unicode.wtf16LeToWtf8(out_buffer, win32_namespace_path);
return out_buffer[0..out_len];
}
@@ -1180,6 +1100,9 @@ pub const GetFinalPathNameByHandleError = error{
BadPathName,
FileNotFound,
NameTooLong,
+ /// The volume does not contain a recognized file system. File system
+ /// drivers might not be loaded, or the volume may be corrupt.
+ UnrecognizedVolume,
Unexpected,
};
@@ -1256,30 +1179,30 @@ pub fn GetFinalPathNameByHandle(
.access_mask = SYNCHRONIZE,
.share_access = FILE_SHARE_READ | FILE_SHARE_WRITE,
.creation = FILE_OPEN,
- .io_mode = .blocking,
}) catch |err| switch (err) {
- error.IsDir => unreachable,
- error.NotDir => unreachable,
- error.NoDevice => unreachable,
- error.AccessDenied => unreachable,
- error.PipeBusy => unreachable,
- error.PathAlreadyExists => unreachable,
- error.WouldBlock => unreachable,
- error.NetworkNotFound => unreachable,
+ error.IsDir => return error.Unexpected,
+ error.NotDir => return error.Unexpected,
+ error.NoDevice => return error.Unexpected,
+ error.AccessDenied => return error.Unexpected,
+ error.PipeBusy => return error.Unexpected,
+ error.PathAlreadyExists => return error.Unexpected,
+ error.WouldBlock => return error.Unexpected,
+ error.NetworkNotFound => return error.Unexpected,
+ error.AntivirusInterference => return error.Unexpected,
else => |e| return e,
};
defer CloseHandle(mgmt_handle);
- var input_struct = @as(*MOUNTMGR_MOUNT_POINT, @ptrCast(&input_buf[0]));
+ var input_struct: *MOUNTMGR_MOUNT_POINT = @ptrCast(&input_buf[0]);
input_struct.DeviceNameOffset = @sizeOf(MOUNTMGR_MOUNT_POINT);
- input_struct.DeviceNameLength = @as(USHORT, @intCast(volume_name_u16.len * 2));
+ input_struct.DeviceNameLength = @intCast(volume_name_u16.len * 2);
@memcpy(input_buf[@sizeOf(MOUNTMGR_MOUNT_POINT)..][0 .. volume_name_u16.len * 2], @as([*]const u8, @ptrCast(volume_name_u16.ptr)));
DeviceIoControl(mgmt_handle, IOCTL_MOUNTMGR_QUERY_POINTS, &input_buf, &output_buf) catch |err| switch (err) {
- error.AccessDenied => unreachable,
+ error.AccessDenied => return error.Unexpected,
else => |e| return e,
};
- const mount_points_struct = @as(*const MOUNTMGR_MOUNT_POINTS, @ptrCast(&output_buf[0]));
+ const mount_points_struct: *const MOUNTMGR_MOUNT_POINTS = @ptrCast(&output_buf[0]);
const mount_points = @as(
[*]const MOUNTMGR_MOUNT_POINT,
@@ -2020,13 +1943,13 @@ pub fn eqlIgnoreCaseWTF16(a: []const u16, b: []const u16) bool {
if (@inComptime() or builtin.os.tag != .windows) {
// This function compares the strings code unit by code unit (aka u16-to-u16),
// so any length difference implies inequality. In other words, there's no possible
- // conversion that changes the number of UTF-16 code units needed for the uppercase/lowercase
+ // conversion that changes the number of WTF-16 code units needed for the uppercase/lowercase
// version in the conversion table since only codepoints <= max(u16) are eligible
// for conversion at all.
if (a.len != b.len) return false;
for (a, b) |a_c, b_c| {
- // The slices are always UTF-16 LE, so need to convert the elements to native
+ // The slices are always WTF-16 LE, so need to convert the elements to native
// endianness for the uppercasing
const a_c_native = std.mem.littleToNative(u16, a_c);
const b_c_native = std.mem.littleToNative(u16, b_c);
@@ -2053,18 +1976,18 @@ pub fn eqlIgnoreCaseWTF16(a: []const u16, b: []const u16) bool {
return ntdll.RtlEqualUnicodeString(&a_string, &b_string, TRUE) == TRUE;
}
-/// Compares two UTF-8 strings using the equivalent functionality of
+/// Compares two WTF-8 strings using the equivalent functionality of
/// `RtlEqualUnicodeString` (with case insensitive comparison enabled).
/// This function can be called on any target.
-/// Assumes `a` and `b` are valid UTF-8.
-pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool {
+/// Assumes `a` and `b` are valid WTF-8.
+pub fn eqlIgnoreCaseWtf8(a: []const u8, b: []const u8) bool {
// A length equality check is not possible here because there are
// some codepoints that have a different length uppercase UTF-8 representations
// than their lowercase counterparts, e.g. U+0250 (2 bytes) <-> U+2C6F (3 bytes).
// There are 7 such codepoints in the uppercase data used by Windows.
- var a_utf8_it = std.unicode.Utf8View.initUnchecked(a).iterator();
- var b_utf8_it = std.unicode.Utf8View.initUnchecked(b).iterator();
+ var a_wtf8_it = std.unicode.Wtf8View.initUnchecked(a).iterator();
+ var b_wtf8_it = std.unicode.Wtf8View.initUnchecked(b).iterator();
// Use RtlUpcaseUnicodeChar on Windows when not in comptime to avoid including a
// redundant copy of the uppercase data.
@@ -2074,8 +1997,8 @@ pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool {
};
while (true) {
- const a_cp = a_utf8_it.nextCodepoint() orelse break;
- const b_cp = b_utf8_it.nextCodepoint() orelse return false;
+ const a_cp = a_wtf8_it.nextCodepoint() orelse break;
+ const b_cp = b_wtf8_it.nextCodepoint() orelse return false;
if (a_cp <= std.math.maxInt(u16) and b_cp <= std.math.maxInt(u16)) {
if (a_cp != b_cp and upcaseImpl(@intCast(a_cp)) != upcaseImpl(@intCast(b_cp))) {
@@ -2086,26 +2009,26 @@ pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool {
}
}
// Make sure there are no leftover codepoints in b
- if (b_utf8_it.nextCodepoint() != null) return false;
+ if (b_wtf8_it.nextCodepoint() != null) return false;
return true;
}
fn testEqlIgnoreCase(comptime expect_eql: bool, comptime a: []const u8, comptime b: []const u8) !void {
- try std.testing.expectEqual(expect_eql, eqlIgnoreCaseUtf8(a, b));
+ try std.testing.expectEqual(expect_eql, eqlIgnoreCaseWtf8(a, b));
try std.testing.expectEqual(expect_eql, eqlIgnoreCaseWTF16(
std.unicode.utf8ToUtf16LeStringLiteral(a),
std.unicode.utf8ToUtf16LeStringLiteral(b),
));
- try comptime std.testing.expect(expect_eql == eqlIgnoreCaseUtf8(a, b));
+ try comptime std.testing.expect(expect_eql == eqlIgnoreCaseWtf8(a, b));
try comptime std.testing.expect(expect_eql == eqlIgnoreCaseWTF16(
std.unicode.utf8ToUtf16LeStringLiteral(a),
std.unicode.utf8ToUtf16LeStringLiteral(b),
));
}
-test "eqlIgnoreCaseWTF16/Utf8" {
+test "eqlIgnoreCaseWTF16/Wtf8" {
try testEqlIgnoreCase(true, "\x01 a B Λ ɐ", "\x01 A b λ Ɐ");
// does not do case-insensitive comparison for codepoints >= U+10000
try testEqlIgnoreCase(false, "𐓏", "𐓷");
@@ -2195,20 +2118,32 @@ pub fn normalizePath(comptime T: type, path: []T) RemoveDotDirsError!usize {
return prefix_len + try removeDotDirsSanitized(T, path[prefix_len..new_len]);
}
+pub const Wtf8ToPrefixedFileWError = error{InvalidWtf8} || Wtf16ToPrefixedFileWError;
+
/// Same as `sliceToPrefixedFileW` but accepts a pointer
-/// to a null-terminated path.
-pub fn cStrToPrefixedFileW(dir: ?HANDLE, s: [*:0]const u8) !PathSpace {
+/// to a null-terminated WTF-8 encoded path.
+/// https://simonsapin.github.io/wtf-8/
+pub fn cStrToPrefixedFileW(dir: ?HANDLE, s: [*:0]const u8) Wtf8ToPrefixedFileWError!PathSpace {
return sliceToPrefixedFileW(dir, mem.sliceTo(s, 0));
}
-/// Same as `wToPrefixedFileW` but accepts a UTF-8 encoded path.
-pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) !PathSpace {
+/// Same as `wToPrefixedFileW` but accepts a WTF-8 encoded path.
+/// https://simonsapin.github.io/wtf-8/
+pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) Wtf8ToPrefixedFileWError!PathSpace {
var temp_path: PathSpace = undefined;
- temp_path.len = try std.unicode.utf8ToUtf16Le(&temp_path.data, path);
+ temp_path.len = try std.unicode.wtf8ToWtf16Le(&temp_path.data, path);
temp_path.data[temp_path.len] = 0;
return wToPrefixedFileW(dir, temp_path.span());
}
+pub const Wtf16ToPrefixedFileWError = error{
+ AccessDenied,
+ BadPathName,
+ FileNotFound,
+ NameTooLong,
+ Unexpected,
+};
+
/// Converts the `path` to WTF16, null-terminated. If the path contains any
/// namespace prefix, or is anything but a relative path (rooted, drive relative,
/// etc) the result will have the NT-style prefix `\??\`.
@@ -2220,7 +2155,7 @@ pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) !PathSpace {
/// is non-null, or the CWD if it is null.
/// - Special case device names like COM1, NUL, etc are not handled specially (TODO)
/// - . and space are not stripped from the end of relative paths (potential TODO)
-pub fn wToPrefixedFileW(dir: ?HANDLE, path: [:0]const u16) !PathSpace {
+pub fn wToPrefixedFileW(dir: ?HANDLE, path: [:0]const u16) Wtf16ToPrefixedFileWError!PathSpace {
const nt_prefix = [_]u16{ '\\', '?', '?', '\\' };
switch (getNamespacePrefix(u16, path)) {
// TODO: Figure out a way to design an API that can avoid the copy for .nt,
@@ -2299,7 +2234,7 @@ pub fn wToPrefixedFileW(dir: ?HANDLE, path: [:0]const u16) !PathSpace {
.unc_absolute => nt_prefix.len + 2,
else => nt_prefix.len,
};
- const buf_len = @as(u32, @intCast(path_space.data.len - path_buf_offset));
+ const buf_len: u32 = @intCast(path_space.data.len - path_buf_offset);
const path_to_get: [:0]const u16 = path_to_get: {
// If dir is null, then we don't need to bother with GetFinalPathNameByHandle because
// RtlGetFullPathName_U will resolve relative paths against the CWD for us.
@@ -2317,7 +2252,24 @@ pub fn wToPrefixedFileW(dir: ?HANDLE, path: [:0]const u16) !PathSpace {
// canonicalize it. We do this by getting the path of the `dir`
// and appending the relative path to it.
var dir_path_buf: [PATH_MAX_WIDE:0]u16 = undefined;
- const dir_path = try GetFinalPathNameByHandle(dir.?, .{}, &dir_path_buf);
+ const dir_path = GetFinalPathNameByHandle(dir.?, .{}, &dir_path_buf) catch |err| switch (err) {
+ // This mapping is not correct; it is actually expected
+ // that calling GetFinalPathNameByHandle might return
+ // error.UnrecognizedVolume, and in fact has been observed
+ // in the wild. The problem is that wToPrefixedFileW was
+ // never intended to make *any* OS syscall APIs. It's only
+ // supposed to convert a string to one that is eligible to
+ // be used in the ntdll syscalls.
+ //
+ // To solve this, this function needs to no longer call
+ // GetFinalPathNameByHandle under any conditions, or the
+ // calling function needs to get reworked to not need to
+ // call this function.
+ //
+ // This may involve making breaking API changes.
+ error.UnrecognizedVolume => return error.Unexpected,
+ else => |e| return e,
+ };
if (dir_path.len + 1 + path.len > PATH_MAX_WIDE) {
return error.NameTooLong;
}
@@ -2373,7 +2325,7 @@ pub const NamespacePrefix = enum {
nt,
};
-/// If `T` is `u16`, then `path` should be encoded as UTF-16LE.
+/// If `T` is `u16`, then `path` should be encoded as WTF-16LE.
pub fn getNamespacePrefix(comptime T: type, path: []const T) NamespacePrefix {
if (path.len < 4) return .none;
var all_backslash = switch (mem.littleToNative(T, path[0])) {
@@ -2427,7 +2379,7 @@ pub const UnprefixedPathType = enum {
/// Get the path type of a path that is known to not have any namespace prefixes
/// (`\\?\`, `\\.\`, `\??\`).
-/// If `T` is `u16`, then `path` should be encoded as UTF-16LE.
+/// If `T` is `u16`, then `path` should be encoded as WTF-16LE.
pub fn getUnprefixedPathType(comptime T: type, path: []const T) UnprefixedPathType {
if (path.len < 1) return .relative;
@@ -2481,7 +2433,7 @@ test getUnprefixedPathType {
/// Functionality is based on the ReactOS test cases found here:
/// https://github.com/reactos/reactos/blob/master/modules/rostests/apitests/ntdll/RtlNtPathNameToDosPathName.c
///
-/// `path` should be encoded as UTF-16LE.
+/// `path` should be encoded as WTF-16LE.
pub fn ntToWin32Namespace(path: []const u16) !PathSpace {
if (path.len > PATH_MAX_WIDE) return error.NameTooLong;
@@ -2591,7 +2543,6 @@ pub fn unexpectedError(err: Win32Error) std.os.UnexpectedError {
if (std.os.unexpected_error_tracing) {
// 614 is the length of the longest windows error description
var buf_wstr: [614]WCHAR = undefined;
- var buf_utf8: [614]u8 = undefined;
const len = kernel32.FormatMessageW(
FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
null,
@@ -2601,8 +2552,10 @@ pub fn unexpectedError(err: Win32Error) std.os.UnexpectedError {
buf_wstr.len,
null,
);
- _ = std.unicode.utf16leToUtf8(&buf_utf8, buf_wstr[0..len]) catch unreachable;
- std.debug.print("error.Unexpected: GetLastError({}): {s}\n", .{ @intFromEnum(err), buf_utf8[0..len] });
+ std.debug.print("error.Unexpected: GetLastError({}): {}\n", .{
+ @intFromEnum(err),
+ std.unicode.fmtUtf16Le(buf_wstr[0..len]),
+ });
std.debug.dumpCurrentStackTrace(@returnAddress());
}
return error.Unexpected;
diff --git a/lib/std/os/windows/test.zig b/lib/std/os/windows/test.zig
index 87ab5f721e..9936a922af 100644
--- a/lib/std/os/windows/test.zig
+++ b/lib/std/os/windows/test.zig
@@ -30,7 +30,7 @@ fn testToPrefixedFileNoOracle(comptime path: []const u8, comptime expected_path:
const expected_path_utf16 = std.unicode.utf8ToUtf16LeStringLiteral(expected_path);
const actual_path = try windows.wToPrefixedFileW(null, path_utf16);
std.testing.expectEqualSlices(u16, expected_path_utf16, actual_path.span()) catch |e| {
- std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16le(actual_path.span()), std.unicode.fmtUtf16le(expected_path_utf16) });
+ std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16Le(actual_path.span()), std.unicode.fmtUtf16le(expected_path_utf16) });
return e;
};
}
@@ -48,7 +48,7 @@ fn testToPrefixedFileOnlyOracle(comptime path: []const u8) !void {
const zig_result = try windows.wToPrefixedFileW(null, path_utf16);
const win32_api_result = try RtlDosPathNameToNtPathName_U(path_utf16);
std.testing.expectEqualSlices(u16, win32_api_result.span(), zig_result.span()) catch |e| {
- std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16le(zig_result.span()), std.unicode.fmtUtf16le(win32_api_result.span()) });
+ std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16Le(zig_result.span()), std.unicode.fmtUtf16le(win32_api_result.span()) });
return e;
};
}
diff --git a/lib/std/pdb.zig b/lib/std/pdb.zig
index d0623145a0..9640ec3569 100644
--- a/lib/std/pdb.zig
+++ b/lib/std/pdb.zig
@@ -513,7 +513,7 @@ pub const Pdb = struct {
};
pub fn init(allocator: mem.Allocator, path: []const u8) !Pdb {
- const file = try fs.cwd().openFile(path, .{ .intended_io_mode = .blocking });
+ const file = try fs.cwd().openFile(path, .{});
errdefer file.close();
return Pdb{
diff --git a/lib/std/priority_dequeue.zig b/lib/std/priority_dequeue.zig
index dc3981b65e..1b4a82aac3 100644
--- a/lib/std/priority_dequeue.zig
+++ b/lib/std/priority_dequeue.zig
@@ -866,7 +866,7 @@ test "std.PriorityDequeue: shrinkAndFree" {
}
test "std.PriorityDequeue: fuzz testing min" {
- var prng = std.rand.DefaultPrng.init(0x12345678);
+ var prng = std.Random.DefaultPrng.init(0x12345678);
const random = prng.random();
const test_case_count = 100;
@@ -878,7 +878,7 @@ test "std.PriorityDequeue: fuzz testing min" {
}
}
-fn fuzzTestMin(rng: std.rand.Random, comptime queue_size: usize) !void {
+fn fuzzTestMin(rng: std.Random, comptime queue_size: usize) !void {
const allocator = testing.allocator;
const items = try generateRandomSlice(allocator, rng, queue_size);
@@ -895,7 +895,7 @@ fn fuzzTestMin(rng: std.rand.Random, comptime queue_size: usize) !void {
}
test "std.PriorityDequeue: fuzz testing max" {
- var prng = std.rand.DefaultPrng.init(0x87654321);
+ var prng = std.Random.DefaultPrng.init(0x87654321);
const random = prng.random();
const test_case_count = 100;
@@ -907,7 +907,7 @@ test "std.PriorityDequeue: fuzz testing max" {
}
}
-fn fuzzTestMax(rng: std.rand.Random, queue_size: usize) !void {
+fn fuzzTestMax(rng: std.Random, queue_size: usize) !void {
const allocator = testing.allocator;
const items = try generateRandomSlice(allocator, rng, queue_size);
@@ -924,7 +924,7 @@ fn fuzzTestMax(rng: std.rand.Random, queue_size: usize) !void {
}
test "std.PriorityDequeue: fuzz testing min and max" {
- var prng = std.rand.DefaultPrng.init(0x87654321);
+ var prng = std.Random.DefaultPrng.init(0x87654321);
const random = prng.random();
const test_case_count = 100;
@@ -936,7 +936,7 @@ test "std.PriorityDequeue: fuzz testing min and max" {
}
}
-fn fuzzTestMinMax(rng: std.rand.Random, queue_size: usize) !void {
+fn fuzzTestMinMax(rng: std.Random, queue_size: usize) !void {
const allocator = testing.allocator;
const items = try generateRandomSlice(allocator, rng, queue_size);
@@ -963,7 +963,7 @@ fn fuzzTestMinMax(rng: std.rand.Random, queue_size: usize) !void {
}
}
-fn generateRandomSlice(allocator: std.mem.Allocator, rng: std.rand.Random, size: usize) ![]u32 {
+fn generateRandomSlice(allocator: std.mem.Allocator, rng: std.Random, size: usize) ![]u32 {
var array = std.ArrayList(u32).init(allocator);
try array.ensureTotalCapacity(size);
diff --git a/lib/std/process.zig b/lib/std/process.zig
index 397e6971e6..5360a96521 100644
--- a/lib/std/process.zig
+++ b/lib/std/process.zig
@@ -16,11 +16,15 @@ pub const changeCurDir = os.chdir;
pub const changeCurDirC = os.chdirC;
/// The result is a slice of `out_buffer`, from index `0`.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn getCwd(out_buffer: []u8) ![]u8 {
return os.getcwd(out_buffer);
}
/// Caller must free the returned memory.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn getCwdAlloc(allocator: Allocator) ![]u8 {
// The use of MAX_PATH_BYTES here is just a heuristic: most paths will fit
// in stack_buf, avoiding an extra allocation in the common case.
@@ -76,7 +80,7 @@ pub const EnvMap = struct {
_ = self;
if (builtin.os.tag == .windows) {
var h = std.hash.Wyhash.init(0);
- var it = std.unicode.Utf8View.initUnchecked(s).iterator();
+ var it = std.unicode.Wtf8View.initUnchecked(s).iterator();
while (it.nextCodepoint()) |cp| {
const cp_upper = upcase(cp);
h.update(&[_]u8{
@@ -93,8 +97,8 @@ pub const EnvMap = struct {
pub fn eql(self: @This(), a: []const u8, b: []const u8) bool {
_ = self;
if (builtin.os.tag == .windows) {
- var it_a = std.unicode.Utf8View.initUnchecked(a).iterator();
- var it_b = std.unicode.Utf8View.initUnchecked(b).iterator();
+ var it_a = std.unicode.Wtf8View.initUnchecked(a).iterator();
+ var it_b = std.unicode.Wtf8View.initUnchecked(b).iterator();
while (true) {
const c_a = it_a.nextCodepoint() orelse break;
const c_b = it_b.nextCodepoint() orelse return false;
@@ -129,8 +133,9 @@ pub const EnvMap = struct {
/// Same as `put` but the key and value become owned by the EnvMap rather
/// than being copied.
/// If `putMove` fails, the ownership of key and value does not transfer.
- /// On Windows `key` must be a valid UTF-8 string.
+ /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn putMove(self: *EnvMap, key: []u8, value: []u8) !void {
+ assert(std.unicode.wtf8ValidateSlice(key));
const get_or_put = try self.hash_map.getOrPut(key);
if (get_or_put.found_existing) {
self.free(get_or_put.key_ptr.*);
@@ -141,8 +146,9 @@ pub const EnvMap = struct {
}
/// `key` and `value` are copied into the EnvMap.
- /// On Windows `key` must be a valid UTF-8 string.
+ /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn put(self: *EnvMap, key: []const u8, value: []const u8) !void {
+ assert(std.unicode.wtf8ValidateSlice(key));
const value_copy = try self.copy(value);
errdefer self.free(value_copy);
const get_or_put = try self.hash_map.getOrPut(key);
@@ -159,23 +165,26 @@ pub const EnvMap = struct {
/// Find the address of the value associated with a key.
/// The returned pointer is invalidated if the map resizes.
- /// On Windows `key` must be a valid UTF-8 string.
+ /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn getPtr(self: EnvMap, key: []const u8) ?*[]const u8 {
+ assert(std.unicode.wtf8ValidateSlice(key));
return self.hash_map.getPtr(key);
}
/// Return the map's copy of the value associated with
/// a key. The returned string is invalidated if this
/// key is removed from the map.
- /// On Windows `key` must be a valid UTF-8 string.
+ /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn get(self: EnvMap, key: []const u8) ?[]const u8 {
+ assert(std.unicode.wtf8ValidateSlice(key));
return self.hash_map.get(key);
}
/// Removes the item from the map and frees its value.
/// This invalidates the value returned by get() for this key.
- /// On Windows `key` must be a valid UTF-8 string.
+ /// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn remove(self: *EnvMap, key: []const u8) void {
+ assert(std.unicode.wtf8ValidateSlice(key));
const kv = self.hash_map.fetchRemove(key) orelse return;
self.free(kv.key);
self.free(kv.value);
@@ -239,18 +248,34 @@ test "EnvMap" {
try testing.expectEqual(@as(EnvMap.Size, 1), env.count());
- // test Unicode case-insensitivity on Windows
if (builtin.os.tag == .windows) {
+ // test Unicode case-insensitivity on Windows
try env.put("КИРиллИЦА", "something else");
try testing.expectEqualStrings("something else", env.get("кириллица").?);
+
+ // and WTF-8 that's not valid UTF-8
+ const wtf8_with_surrogate_pair = try std.unicode.wtf16LeToWtf8Alloc(testing.allocator, &[_]u16{
+ std.mem.nativeToLittle(u16, 0xD83D), // unpaired high surrogate
+ });
+ defer testing.allocator.free(wtf8_with_surrogate_pair);
+
+ try env.put(wtf8_with_surrogate_pair, wtf8_with_surrogate_pair);
+ try testing.expectEqualSlices(u8, wtf8_with_surrogate_pair, env.get(wtf8_with_surrogate_pair).?);
}
}
+pub const GetEnvMapError = error{
+ OutOfMemory,
+ /// WASI-only. `environ_sizes_get` or `environ_get`
+ /// failed for an unexpected reason.
+ Unexpected,
+};
+
/// Returns a snapshot of the environment variables of the current process.
/// Any modifications to the resulting EnvMap will not be reflected in the environment, and
/// likewise, any future modifications to the environment will not be reflected in the EnvMap.
/// Caller owns resulting `EnvMap` and should call its `deinit` fn when done.
-pub fn getEnvMap(allocator: Allocator) !EnvMap {
+pub fn getEnvMap(allocator: Allocator) GetEnvMapError!EnvMap {
var result = EnvMap.init(allocator);
errdefer result.deinit();
@@ -269,7 +294,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap {
while (ptr[i] != 0 and ptr[i] != '=') : (i += 1) {}
const key_w = ptr[key_start..i];
- const key = try std.unicode.utf16leToUtf8Alloc(allocator, key_w);
+ const key = try std.unicode.wtf16LeToWtf8Alloc(allocator, key_w);
errdefer allocator.free(key);
if (ptr[i] == '=') i += 1;
@@ -277,7 +302,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap {
const value_start = i;
while (ptr[i] != 0) : (i += 1) {}
const value_w = ptr[value_start..i];
- const value = try std.unicode.utf16leToUtf8Alloc(allocator, value_w);
+ const value = try std.unicode.wtf16LeToWtf8Alloc(allocator, value_w);
errdefer allocator.free(value);
i += 1; // skip over null byte
@@ -355,25 +380,28 @@ pub const GetEnvVarOwnedError = error{
OutOfMemory,
EnvironmentVariableNotFound,
- /// See https://github.com/ziglang/zig/issues/1774
- InvalidUtf8,
+ /// On Windows, environment variable keys provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
};
/// Caller must free returned memory.
+/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/),
+/// then `error.InvalidWtf8` is returned.
+/// On Windows, the value is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the value is an opaque sequence of bytes with no particular encoding.
pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError![]u8 {
if (builtin.os.tag == .windows) {
const result_w = blk: {
- const key_w = try std.unicode.utf8ToUtf16LeWithNull(allocator, key);
- defer allocator.free(key_w);
+ var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator);
+ const stack_allocator = stack_alloc.get();
+ const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key);
+ defer stack_allocator.free(key_w);
break :blk std.os.getenvW(key_w) orelse return error.EnvironmentVariableNotFound;
};
- return std.unicode.utf16leToUtf8Alloc(allocator, result_w) catch |err| switch (err) {
- error.DanglingSurrogateHalf => return error.InvalidUtf8,
- error.ExpectedSecondSurrogateHalf => return error.InvalidUtf8,
- error.UnexpectedSecondSurrogateHalf => return error.InvalidUtf8,
- else => |e| return e,
- };
+ // wtf16LeToWtf8Alloc can only fail with OutOfMemory
+ return std.unicode.wtf16LeToWtf8Alloc(allocator, result_w);
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
var envmap = getEnvMap(allocator) catch return error.OutOfMemory;
defer envmap.deinit();
@@ -385,6 +413,7 @@ pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError
}
}
+/// On Windows, `key` must be valid UTF-8.
pub fn hasEnvVarConstant(comptime key: []const u8) bool {
if (builtin.os.tag == .windows) {
const key_w = comptime std.unicode.utf8ToUtf16LeStringLiteral(key);
@@ -396,11 +425,22 @@ pub fn hasEnvVarConstant(comptime key: []const u8) bool {
}
}
-pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool {
+pub const HasEnvVarError = error{
+ OutOfMemory,
+
+ /// On Windows, environment variable keys provided by the user must be valid WTF-8.
+ /// https://simonsapin.github.io/wtf-8/
+ InvalidWtf8,
+};
+
+/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/),
+/// then `error.InvalidWtf8` is returned.
+pub fn hasEnvVar(allocator: Allocator, key: []const u8) HasEnvVarError!bool {
if (builtin.os.tag == .windows) {
var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator);
- const key_w = try std.unicode.utf8ToUtf16LeWithNull(stack_alloc.get(), key);
- defer stack_alloc.allocator.free(key_w);
+ const stack_allocator = stack_alloc.get();
+ const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key);
+ defer stack_allocator.free(key_w);
return std.os.getenvW(key_w) != null;
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
var envmap = getEnvMap(allocator) catch return error.OutOfMemory;
@@ -411,9 +451,22 @@ pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool
}
}
-test "os.getEnvVarOwned" {
- const ga = std.testing.allocator;
- try testing.expectError(error.EnvironmentVariableNotFound, getEnvVarOwned(ga, "BADENV"));
+test getEnvVarOwned {
+ try testing.expectError(
+ error.EnvironmentVariableNotFound,
+ getEnvVarOwned(std.testing.allocator, "BADENV"),
+ );
+}
+
+test hasEnvVarConstant {
+ if (builtin.os.tag == .wasi and !builtin.link_libc) return error.SkipZigTest;
+
+ try testing.expect(!hasEnvVarConstant("BADENV"));
+}
+
+test hasEnvVar {
+ const has_env = try hasEnvVar(std.testing.allocator, "BADENV");
+ try testing.expect(!has_env);
}
pub const ArgIteratorPosix = struct {
@@ -531,6 +584,7 @@ pub const ArgIteratorWasi = struct {
pub const ArgIteratorWindows = struct {
allocator: Allocator,
/// Owned by the iterator.
+ /// Encoded as WTF-8.
cmd_line: []const u8,
index: usize = 0,
/// Owned by the iterator. Long enough to hold the entire `cmd_line` plus a null terminator.
@@ -538,20 +592,14 @@ pub const ArgIteratorWindows = struct {
start: usize = 0,
end: usize = 0,
- pub const InitError = error{ OutOfMemory, InvalidCmdLine };
+ pub const InitError = error{OutOfMemory};
- /// `cmd_line_w` *must* be an UTF16-LE-encoded string.
+ /// `cmd_line_w` *must* be a WTF16-LE-encoded string.
///
- /// The iterator makes a copy of `cmd_line_w` converted UTF-8 and keeps it; it does *not* take
+ /// The iterator makes a copy of `cmd_line_w` converted WTF-8 and keeps it; it does *not* take
/// ownership of `cmd_line_w`.
pub fn init(allocator: Allocator, cmd_line_w: [*:0]const u16) InitError!ArgIteratorWindows {
- const cmd_line = std.unicode.utf16leToUtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0)) catch |err| switch (err) {
- error.DanglingSurrogateHalf,
- error.ExpectedSecondSurrogateHalf,
- error.UnexpectedSecondSurrogateHalf,
- => return error.InvalidCmdLine,
- error.OutOfMemory => return error.OutOfMemory,
- };
+ const cmd_line = try std.unicode.wtf16LeToWtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0));
errdefer allocator.free(cmd_line);
const buffer = try allocator.alloc(u8, cmd_line.len + 1);
@@ -566,6 +614,7 @@ pub const ArgIteratorWindows = struct {
/// Returns the next argument and advances the iterator. Returns `null` if at the end of the
/// command-line string. The iterator owns the returned slice.
+ /// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
pub fn next(self: *ArgIteratorWindows) ?[:0]const u8 {
return self.nextWithStrategy(next_strategy);
}
@@ -777,7 +826,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type {
pub const Self = @This();
pub const InitError = error{OutOfMemory};
- pub const InitUtf16leError = error{ OutOfMemory, InvalidCmdLine };
/// cmd_line_utf8 MUST remain valid and constant while using this instance
pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {
@@ -805,30 +853,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type {
};
}
- /// cmd_line_utf16le MUST be encoded UTF16-LE, and is converted to UTF-8 in an internal buffer
- pub fn initUtf16le(allocator: Allocator, cmd_line_utf16le: [*:0]const u16) InitUtf16leError!Self {
- const utf16le_slice = mem.sliceTo(cmd_line_utf16le, 0);
- const cmd_line = std.unicode.utf16leToUtf8Alloc(allocator, utf16le_slice) catch |err| switch (err) {
- error.ExpectedSecondSurrogateHalf,
- error.DanglingSurrogateHalf,
- error.UnexpectedSecondSurrogateHalf,
- => return error.InvalidCmdLine,
-
- error.OutOfMemory => return error.OutOfMemory,
- };
- errdefer allocator.free(cmd_line);
-
- const buffer = try allocator.alloc(u8, cmd_line.len + 1);
- errdefer allocator.free(buffer);
-
- return Self{
- .allocator = allocator,
- .cmd_line = cmd_line,
- .free_cmd_line_on_deinit = true,
- .buffer = buffer,
- };
- }
-
// Skips over whitespace in the cmd_line.
// Returns false if the terminating sentinel is reached, true otherwise.
// Also skips over comments (if supported).
@@ -1021,6 +1045,8 @@ pub const ArgIterator = struct {
/// Get the next argument. Returns 'null' if we are at the end.
/// Returned slice is pointing to the iterator's internal buffer.
+ /// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+ /// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn next(self: *ArgIterator) ?([:0]const u8) {
return self.inner.next();
}
@@ -1057,6 +1083,8 @@ pub fn argsWithAllocator(allocator: Allocator) ArgIterator.InitError!ArgIterator
}
/// Caller must call argsFree on result.
+/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
+/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn argsAlloc(allocator: Allocator) ![][:0]u8 {
// TODO refactor to only make 1 allocation.
var it = try argsWithAllocator(allocator);
@@ -1201,7 +1229,7 @@ test "ArgIteratorWindows" {
}
fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void {
- const cmd_line_w = try std.unicode.utf8ToUtf16LeWithNull(testing.allocator, cmd_line);
+ const cmd_line_w = try std.unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line);
defer testing.allocator.free(cmd_line_w);
// next
diff --git a/lib/std/rand.zig b/lib/std/rand.zig
deleted file mode 100644
index 708eed13be..0000000000
--- a/lib/std/rand.zig
+++ /dev/null
@@ -1,460 +0,0 @@
-//! The engines provided here should be initialized from an external source.
-//! For a thread-local cryptographically secure pseudo random number generator,
-//! use `std.crypto.random`.
-//! Be sure to use a CSPRNG when required, otherwise using a normal PRNG will
-//! be faster and use substantially less stack space.
-
-const std = @import("std.zig");
-const builtin = @import("builtin");
-const assert = std.debug.assert;
-const mem = std.mem;
-const math = std.math;
-const maxInt = std.math.maxInt;
-
-/// Fast unbiased random numbers.
-pub const DefaultPrng = Xoshiro256;
-
-/// Cryptographically secure random numbers.
-pub const DefaultCsprng = ChaCha;
-
-pub const Ascon = @import("rand/Ascon.zig");
-pub const ChaCha = @import("rand/ChaCha.zig");
-
-pub const Isaac64 = @import("rand/Isaac64.zig");
-pub const Pcg = @import("rand/Pcg.zig");
-pub const Xoroshiro128 = @import("rand/Xoroshiro128.zig");
-pub const Xoshiro256 = @import("rand/Xoshiro256.zig");
-pub const Sfc64 = @import("rand/Sfc64.zig");
-pub const RomuTrio = @import("rand/RomuTrio.zig");
-pub const ziggurat = @import("rand/ziggurat.zig");
-
-pub const Random = struct {
- ptr: *anyopaque,
- fillFn: *const fn (ptr: *anyopaque, buf: []u8) void,
-
- pub fn init(pointer: anytype, comptime fillFn: fn (ptr: @TypeOf(pointer), buf: []u8) void) Random {
- const Ptr = @TypeOf(pointer);
- assert(@typeInfo(Ptr) == .Pointer); // Must be a pointer
- assert(@typeInfo(Ptr).Pointer.size == .One); // Must be a single-item pointer
- assert(@typeInfo(@typeInfo(Ptr).Pointer.child) == .Struct); // Must point to a struct
- const gen = struct {
- fn fill(ptr: *anyopaque, buf: []u8) void {
- const self: Ptr = @ptrCast(@alignCast(ptr));
- fillFn(self, buf);
- }
- };
-
- return .{
- .ptr = pointer,
- .fillFn = gen.fill,
- };
- }
-
- /// Read random bytes into the specified buffer until full.
- pub fn bytes(r: Random, buf: []u8) void {
- r.fillFn(r.ptr, buf);
- }
-
- pub fn boolean(r: Random) bool {
- return r.int(u1) != 0;
- }
-
- /// Returns a random value from an enum, evenly distributed.
- ///
- /// Note that this will not yield consistent results across all targets
- /// due to dependence on the representation of `usize` as an index.
- /// See `enumValueWithIndex` for further commentary.
- pub inline fn enumValue(r: Random, comptime EnumType: type) EnumType {
- return r.enumValueWithIndex(EnumType, usize);
- }
-
- /// Returns a random value from an enum, evenly distributed.
- ///
- /// An index into an array of all named values is generated using the
- /// specified `Index` type to determine the return value.
- /// This allows for results to be independent of `usize` representation.
- ///
- /// Prefer `enumValue` if this isn't important.
- ///
- /// See `uintLessThan`, which this function uses in most cases,
- /// for commentary on the runtime of this function.
- pub fn enumValueWithIndex(r: Random, comptime EnumType: type, comptime Index: type) EnumType {
- comptime assert(@typeInfo(EnumType) == .Enum);
-
- // We won't use int -> enum casting because enum elements can have
- // arbitrary values. Instead we'll randomly pick one of the type's values.
- const values = comptime std.enums.values(EnumType);
- comptime assert(values.len > 0); // can't return anything
- comptime assert(maxInt(Index) >= values.len - 1); // can't access all values
- comptime if (values.len == 1) return values[0];
-
- const index = if (comptime values.len - 1 == maxInt(Index))
- r.int(Index)
- else
- r.uintLessThan(Index, values.len);
-
- const MinInt = MinArrayIndex(Index);
- return values[@as(MinInt, @intCast(index))];
- }
-
- /// Returns a random int `i` such that `minInt(T) <= i <= maxInt(T)`.
- /// `i` is evenly distributed.
- pub fn int(r: Random, comptime T: type) T {
- const bits = @typeInfo(T).Int.bits;
- const UnsignedT = std.meta.Int(.unsigned, bits);
- const ceil_bytes = comptime std.math.divCeil(u16, bits, 8) catch unreachable;
- const ByteAlignedT = std.meta.Int(.unsigned, ceil_bytes * 8);
-
- var rand_bytes: [ceil_bytes]u8 = undefined;
- r.bytes(&rand_bytes);
-
- // use LE instead of native endian for better portability maybe?
- // TODO: endian portability is pointless if the underlying prng isn't endian portable.
- // TODO: document the endian portability of this library.
- const byte_aligned_result = mem.readInt(ByteAlignedT, &rand_bytes, .little);
- const unsigned_result: UnsignedT = @truncate(byte_aligned_result);
- return @bitCast(unsigned_result);
- }
-
- /// Constant-time implementation off `uintLessThan`.
- /// The results of this function may be biased.
- pub fn uintLessThanBiased(r: Random, comptime T: type, less_than: T) T {
- comptime assert(@typeInfo(T).Int.signedness == .unsigned);
- assert(0 < less_than);
- return limitRangeBiased(T, r.int(T), less_than);
- }
-
- /// Returns an evenly distributed random unsigned integer `0 <= i < less_than`.
- /// This function assumes that the underlying `fillFn` produces evenly distributed values.
- /// Within this assumption, the runtime of this function is exponentially distributed.
- /// If `fillFn` were backed by a true random generator,
- /// the runtime of this function would technically be unbounded.
- /// However, if `fillFn` is backed by any evenly distributed pseudo random number generator,
- /// this function is guaranteed to return.
- /// If you need deterministic runtime bounds, use `uintLessThanBiased`.
- pub fn uintLessThan(r: Random, comptime T: type, less_than: T) T {
- comptime assert(@typeInfo(T).Int.signedness == .unsigned);
- const bits = @typeInfo(T).Int.bits;
- assert(0 < less_than);
-
- // adapted from:
- // http://www.pcg-random.org/posts/bounded-rands.html
- // "Lemire's (with an extra tweak from me)"
- var x = r.int(T);
- var m = math.mulWide(T, x, less_than);
- var l: T = @truncate(m);
- if (l < less_than) {
- var t = -%less_than;
-
- if (t >= less_than) {
- t -= less_than;
- if (t >= less_than) {
- t %= less_than;
- }
- }
- while (l < t) {
- x = r.int(T);
- m = math.mulWide(T, x, less_than);
- l = @truncate(m);
- }
- }
- return @intCast(m >> bits);
- }
-
- /// Constant-time implementation off `uintAtMost`.
- /// The results of this function may be biased.
- pub fn uintAtMostBiased(r: Random, comptime T: type, at_most: T) T {
- assert(@typeInfo(T).Int.signedness == .unsigned);
- if (at_most == maxInt(T)) {
- // have the full range
- return r.int(T);
- }
- return r.uintLessThanBiased(T, at_most + 1);
- }
-
- /// Returns an evenly distributed random unsigned integer `0 <= i <= at_most`.
- /// See `uintLessThan`, which this function uses in most cases,
- /// for commentary on the runtime of this function.
- pub fn uintAtMost(r: Random, comptime T: type, at_most: T) T {
- assert(@typeInfo(T).Int.signedness == .unsigned);
- if (at_most == maxInt(T)) {
- // have the full range
- return r.int(T);
- }
- return r.uintLessThan(T, at_most + 1);
- }
-
- /// Constant-time implementation off `intRangeLessThan`.
- /// The results of this function may be biased.
- pub fn intRangeLessThanBiased(r: Random, comptime T: type, at_least: T, less_than: T) T {
- assert(at_least < less_than);
- const info = @typeInfo(T).Int;
- if (info.signedness == .signed) {
- // Two's complement makes this math pretty easy.
- const UnsignedT = std.meta.Int(.unsigned, info.bits);
- const lo: UnsignedT = @bitCast(at_least);
- const hi: UnsignedT = @bitCast(less_than);
- const result = lo +% r.uintLessThanBiased(UnsignedT, hi -% lo);
- return @bitCast(result);
- } else {
- // The signed implementation would work fine, but we can use stricter arithmetic operators here.
- return at_least + r.uintLessThanBiased(T, less_than - at_least);
- }
- }
-
- /// Returns an evenly distributed random integer `at_least <= i < less_than`.
- /// See `uintLessThan`, which this function uses in most cases,
- /// for commentary on the runtime of this function.
- pub fn intRangeLessThan(r: Random, comptime T: type, at_least: T, less_than: T) T {
- assert(at_least < less_than);
- const info = @typeInfo(T).Int;
- if (info.signedness == .signed) {
- // Two's complement makes this math pretty easy.
- const UnsignedT = std.meta.Int(.unsigned, info.bits);
- const lo: UnsignedT = @bitCast(at_least);
- const hi: UnsignedT = @bitCast(less_than);
- const result = lo +% r.uintLessThan(UnsignedT, hi -% lo);
- return @bitCast(result);
- } else {
- // The signed implementation would work fine, but we can use stricter arithmetic operators here.
- return at_least + r.uintLessThan(T, less_than - at_least);
- }
- }
-
- /// Constant-time implementation off `intRangeAtMostBiased`.
- /// The results of this function may be biased.
- pub fn intRangeAtMostBiased(r: Random, comptime T: type, at_least: T, at_most: T) T {
- assert(at_least <= at_most);
- const info = @typeInfo(T).Int;
- if (info.signedness == .signed) {
- // Two's complement makes this math pretty easy.
- const UnsignedT = std.meta.Int(.unsigned, info.bits);
- const lo: UnsignedT = @bitCast(at_least);
- const hi: UnsignedT = @bitCast(at_most);
- const result = lo +% r.uintAtMostBiased(UnsignedT, hi -% lo);
- return @bitCast(result);
- } else {
- // The signed implementation would work fine, but we can use stricter arithmetic operators here.
- return at_least + r.uintAtMostBiased(T, at_most - at_least);
- }
- }
-
- /// Returns an evenly distributed random integer `at_least <= i <= at_most`.
- /// See `uintLessThan`, which this function uses in most cases,
- /// for commentary on the runtime of this function.
- pub fn intRangeAtMost(r: Random, comptime T: type, at_least: T, at_most: T) T {
- assert(at_least <= at_most);
- const info = @typeInfo(T).Int;
- if (info.signedness == .signed) {
- // Two's complement makes this math pretty easy.
- const UnsignedT = std.meta.Int(.unsigned, info.bits);
- const lo: UnsignedT = @bitCast(at_least);
- const hi: UnsignedT = @bitCast(at_most);
- const result = lo +% r.uintAtMost(UnsignedT, hi -% lo);
- return @bitCast(result);
- } else {
- // The signed implementation would work fine, but we can use stricter arithmetic operators here.
- return at_least + r.uintAtMost(T, at_most - at_least);
- }
- }
-
- /// Return a floating point value evenly distributed in the range [0, 1).
- pub fn float(r: Random, comptime T: type) T {
- // Generate a uniformly random value for the mantissa.
- // Then generate an exponentially biased random value for the exponent.
- // This covers every possible value in the range.
- switch (T) {
- f32 => {
- // Use 23 random bits for the mantissa, and the rest for the exponent.
- // If all 41 bits are zero, generate additional random bits, until a
- // set bit is found, or 126 bits have been generated.
- const rand = r.int(u64);
- var rand_lz = @clz(rand);
- if (rand_lz >= 41) {
- // TODO: when #5177 or #489 is implemented,
- // tell the compiler it is unlikely (1/2^41) to reach this point.
- // (Same for the if branch and the f64 calculations below.)
- rand_lz = 41 + @clz(r.int(u64));
- if (rand_lz == 41 + 64) {
- // It is astronomically unlikely to reach this point.
- rand_lz += @clz(r.int(u32) | 0x7FF);
- }
- }
- const mantissa: u23 = @truncate(rand);
- const exponent = @as(u32, 126 - rand_lz) << 23;
- return @bitCast(exponent | mantissa);
- },
- f64 => {
- // Use 52 random bits for the mantissa, and the rest for the exponent.
- // If all 12 bits are zero, generate additional random bits, until a
- // set bit is found, or 1022 bits have been generated.
- const rand = r.int(u64);
- var rand_lz: u64 = @clz(rand);
- if (rand_lz >= 12) {
- rand_lz = 12;
- while (true) {
- // It is astronomically unlikely for this loop to execute more than once.
- const addl_rand_lz = @clz(r.int(u64));
- rand_lz += addl_rand_lz;
- if (addl_rand_lz != 64) {
- break;
- }
- if (rand_lz >= 1022) {
- rand_lz = 1022;
- break;
- }
- }
- }
- const mantissa = rand & 0xFFFFFFFFFFFFF;
- const exponent = (1022 - rand_lz) << 52;
- return @bitCast(exponent | mantissa);
- },
- else => @compileError("unknown floating point type"),
- }
- }
-
- /// Return a floating point value normally distributed with mean = 0, stddev = 1.
- ///
- /// To use different parameters, use: floatNorm(...) * desiredStddev + desiredMean.
- pub fn floatNorm(r: Random, comptime T: type) T {
- const value = ziggurat.next_f64(r, ziggurat.NormDist);
- switch (T) {
- f32 => return @floatCast(value),
- f64 => return value,
- else => @compileError("unknown floating point type"),
- }
- }
-
- /// Return an exponentially distributed float with a rate parameter of 1.
- ///
- /// To use a different rate parameter, use: floatExp(...) / desiredRate.
- pub fn floatExp(r: Random, comptime T: type) T {
- const value = ziggurat.next_f64(r, ziggurat.ExpDist);
- switch (T) {
- f32 => return @floatCast(value),
- f64 => return value,
- else => @compileError("unknown floating point type"),
- }
- }
-
- /// Shuffle a slice into a random order.
- ///
- /// Note that this will not yield consistent results across all targets
- /// due to dependence on the representation of `usize` as an index.
- /// See `shuffleWithIndex` for further commentary.
- pub inline fn shuffle(r: Random, comptime T: type, buf: []T) void {
- r.shuffleWithIndex(T, buf, usize);
- }
-
- /// Shuffle a slice into a random order, using an index of a
- /// specified type to maintain distribution across targets.
- /// Asserts the index type can represent `buf.len`.
- ///
- /// Indexes into the slice are generated using the specified `Index`
- /// type, which determines distribution properties. This allows for
- /// results to be independent of `usize` representation.
- ///
- /// Prefer `shuffle` if this isn't important.
- ///
- /// See `intRangeLessThan`, which this function uses,
- /// for commentary on the runtime of this function.
- pub fn shuffleWithIndex(r: Random, comptime T: type, buf: []T, comptime Index: type) void {
- const MinInt = MinArrayIndex(Index);
- if (buf.len < 2) {
- return;
- }
-
- // `i <= j < max <= maxInt(MinInt)`
- const max: MinInt = @intCast(buf.len);
- var i: MinInt = 0;
- while (i < max - 1) : (i += 1) {
- const j: MinInt = @intCast(r.intRangeLessThan(Index, i, max));
- mem.swap(T, &buf[i], &buf[j]);
- }
- }
-
- /// Randomly selects an index into `proportions`, where the likelihood of each
- /// index is weighted by that proportion.
- /// It is more likely for the index of the last proportion to be returned
- /// than the index of the first proportion in the slice, and vice versa.
- ///
- /// This is useful for selecting an item from a slice where weights are not equal.
- /// `T` must be a numeric type capable of holding the sum of `proportions`.
- pub fn weightedIndex(r: std.rand.Random, comptime T: type, proportions: []const T) usize {
- // This implementation works by summing the proportions and picking a
- // random point in [0, sum). We then loop over the proportions,
- // accumulating until our accumulator is greater than the random point.
-
- const sum = s: {
- var sum: T = 0;
- for (proportions) |v| sum += v;
- break :s sum;
- };
-
- const point = switch (@typeInfo(T)) {
- .Int => |int_info| switch (int_info.signedness) {
- .signed => r.intRangeLessThan(T, 0, sum),
- .unsigned => r.uintLessThan(T, sum),
- },
- // take care that imprecision doesn't lead to a value slightly greater than sum
- .Float => @min(r.float(T) * sum, sum - std.math.floatEps(T)),
- else => @compileError("weightedIndex does not support proportions of type " ++
- @typeName(T)),
- };
-
- assert(point < sum);
-
- var accumulator: T = 0;
- for (proportions, 0..) |p, index| {
- accumulator += p;
- if (point < accumulator) return index;
- } else unreachable;
- }
-
- /// Returns the smallest of `Index` and `usize`.
- fn MinArrayIndex(comptime Index: type) type {
- const index_info = @typeInfo(Index).Int;
- assert(index_info.signedness == .unsigned);
- return if (index_info.bits >= @typeInfo(usize).Int.bits) usize else Index;
- }
-};
-
-/// Convert a random integer 0 <= random_int <= maxValue(T),
-/// into an integer 0 <= result < less_than.
-/// This function introduces a minor bias.
-pub fn limitRangeBiased(comptime T: type, random_int: T, less_than: T) T {
- comptime assert(@typeInfo(T).Int.signedness == .unsigned);
- const bits = @typeInfo(T).Int.bits;
-
- // adapted from:
- // http://www.pcg-random.org/posts/bounded-rands.html
- // "Integer Multiplication (Biased)"
- const m = math.mulWide(T, random_int, less_than);
- return @intCast(m >> bits);
-}
-
-// Generator to extend 64-bit seed values into longer sequences.
-//
-// The number of cycles is thus limited to 64-bits regardless of the engine, but this
-// is still plenty for practical purposes.
-pub const SplitMix64 = struct {
- s: u64,
-
- pub fn init(seed: u64) SplitMix64 {
- return SplitMix64{ .s = seed };
- }
-
- pub fn next(self: *SplitMix64) u64 {
- self.s +%= 0x9e3779b97f4a7c15;
-
- var z = self.s;
- z = (z ^ (z >> 30)) *% 0xbf58476d1ce4e5b9;
- z = (z ^ (z >> 27)) *% 0x94d049bb133111eb;
- return z ^ (z >> 31);
- }
-};
-
-test {
- std.testing.refAllDecls(@This());
- _ = @import("rand/test.zig");
-}
diff --git a/lib/std/sort.zig b/lib/std/sort.zig
index 2781867a54..a697046ea6 100644
--- a/lib/std/sort.zig
+++ b/lib/std/sort.zig
@@ -379,7 +379,7 @@ test "sort with context in the middle of a slice" {
}
test "sort fuzz testing" {
- var prng = std.rand.DefaultPrng.init(0x12345678);
+ var prng = std.Random.DefaultPrng.init(0x12345678);
const random = prng.random();
const test_case_count = 10;
diff --git a/lib/std/start.zig b/lib/std/start.zig
index f04e812271..9e9872fe93 100644
--- a/lib/std/start.zig
+++ b/lib/std/start.zig
@@ -347,7 +347,7 @@ fn WinStartup() callconv(std.os.windows.WINAPI) noreturn {
std.debug.maybeEnableSegfaultHandler();
- std.os.windows.ntdll.RtlExitUserProcess(initEventLoopAndCallMain());
+ std.os.windows.ntdll.RtlExitUserProcess(callMain());
}
fn wWinMainCRTStartup() callconv(std.os.windows.WINAPI) noreturn {
@@ -358,7 +358,7 @@ fn wWinMainCRTStartup() callconv(std.os.windows.WINAPI) noreturn {
std.debug.maybeEnableSegfaultHandler();
- const result: std.os.windows.INT = initEventLoopAndCallWinMain();
+ const result: std.os.windows.INT = call_wWinMain();
std.os.windows.ntdll.RtlExitUserProcess(@as(std.os.windows.UINT, @bitCast(result)));
}
@@ -422,7 +422,7 @@ fn posixCallMainAndExit() callconv(.C) noreturn {
expandStackSize(phdrs);
}
- std.os.exit(@call(.always_inline, callMainWithArgs, .{ argc, argv, envp }));
+ std.os.exit(callMainWithArgs(argc, argv, envp));
}
fn expandStackSize(phdrs: []elf.Phdr) void {
@@ -459,14 +459,14 @@ fn expandStackSize(phdrs: []elf.Phdr) void {
}
}
-fn callMainWithArgs(argc: usize, argv: [*][*:0]u8, envp: [][*:0]u8) u8 {
+inline fn callMainWithArgs(argc: usize, argv: [*][*:0]u8, envp: [][*:0]u8) u8 {
std.os.argv = argv[0..argc];
std.os.environ = envp;
std.debug.maybeEnableSegfaultHandler();
std.os.maybeIgnoreSigpipe();
- return initEventLoopAndCallMain();
+ return callMain();
}
fn main(c_argc: c_int, c_argv: [*][*:0]c_char, c_envp: [*:null]?[*:0]c_char) callconv(.C) c_int {
@@ -481,92 +481,18 @@ fn main(c_argc: c_int, c_argv: [*][*:0]c_char, c_envp: [*:null]?[*:0]c_char) cal
expandStackSize(phdrs);
}
- return @call(.always_inline, callMainWithArgs, .{ @as(usize, @intCast(c_argc)), @as([*][*:0]u8, @ptrCast(c_argv)), envp });
+ return callMainWithArgs(@as(usize, @intCast(c_argc)), @as([*][*:0]u8, @ptrCast(c_argv)), envp);
}
fn mainWithoutEnv(c_argc: c_int, c_argv: [*][*:0]c_char) callconv(.C) c_int {
std.os.argv = @as([*][*:0]u8, @ptrCast(c_argv))[0..@as(usize, @intCast(c_argc))];
- return @call(.always_inline, callMain, .{});
+ return callMain();
}
// General error message for a malformed return type
const bad_main_ret = "expected return type of main to be 'void', '!void', 'noreturn', 'u8', or '!u8'";
-// This is marked inline because for some reason LLVM in release mode fails to inline it,
-// and we want fewer call frames in stack traces.
-inline fn initEventLoopAndCallMain() u8 {
- if (std.event.Loop.instance) |loop| {
- if (loop == std.event.Loop.default_instance) {
- loop.init() catch |err| {
- std.log.err("{s}", .{@errorName(err)});
- if (@errorReturnTrace()) |trace| {
- std.debug.dumpStackTrace(trace.*);
- }
- return 1;
- };
- defer loop.deinit();
-
- var result: u8 = undefined;
- var frame: @Frame(callMainAsync) = undefined;
- _ = @asyncCall(&frame, &result, callMainAsync, .{loop});
- loop.run();
- return result;
- }
- }
-
- // This is marked inline because for some reason LLVM in release mode fails to inline it,
- // and we want fewer call frames in stack traces.
- return @call(.always_inline, callMain, .{});
-}
-
-// This is marked inline because for some reason LLVM in release mode fails to inline it,
-// and we want fewer call frames in stack traces.
-// TODO This function is duplicated from initEventLoopAndCallMain instead of using generics
-// because it is working around stage1 compiler bugs.
-inline fn initEventLoopAndCallWinMain() std.os.windows.INT {
- if (std.event.Loop.instance) |loop| {
- if (loop == std.event.Loop.default_instance) {
- loop.init() catch |err| {
- std.log.err("{s}", .{@errorName(err)});
- if (@errorReturnTrace()) |trace| {
- std.debug.dumpStackTrace(trace.*);
- }
- return 1;
- };
- defer loop.deinit();
-
- var result: std.os.windows.INT = undefined;
- var frame: @Frame(callWinMainAsync) = undefined;
- _ = @asyncCall(&frame, &result, callWinMainAsync, .{loop});
- loop.run();
- return result;
- }
- }
-
- // This is marked inline because for some reason LLVM in release mode fails to inline it,
- // and we want fewer call frames in stack traces.
- return @call(.always_inline, call_wWinMain, .{});
-}
-
-fn callMainAsync(loop: *std.event.Loop) callconv(.Async) u8 {
- // This prevents the event loop from terminating at least until main() has returned.
- // TODO This shouldn't be needed here; it should be in the event loop code.
- loop.beginOneEvent();
- defer loop.finishOneEvent();
- return callMain();
-}
-
-fn callWinMainAsync(loop: *std.event.Loop) callconv(.Async) std.os.windows.INT {
- // This prevents the event loop from terminating at least until main() has returned.
- // TODO This shouldn't be needed here; it should be in the event loop code.
- loop.beginOneEvent();
- defer loop.finishOneEvent();
- return call_wWinMain();
-}
-
-// This is not marked inline because it is called with @asyncCall when
-// there is an event loop.
-pub fn callMain() u8 {
+pub inline fn callMain() u8 {
switch (@typeInfo(@typeInfo(@TypeOf(root.main)).Fn.return_type.?)) {
.NoReturn => {
root.main();
diff --git a/lib/std/std.zig b/lib/std/std.zig
index 047da005c3..98a1bba0dd 100644
--- a/lib/std/std.zig
+++ b/lib/std/std.zig
@@ -36,6 +36,7 @@ pub const PackedIntSliceEndian = @import("packed_int_array.zig").PackedIntSliceE
pub const PriorityQueue = @import("priority_queue.zig").PriorityQueue;
pub const PriorityDequeue = @import("priority_dequeue.zig").PriorityDequeue;
pub const Progress = @import("Progress.zig");
+pub const Random = @import("Random.zig");
pub const RingBuffer = @import("RingBuffer.zig");
pub const SegmentedList = @import("segmented_list.zig").SegmentedList;
pub const SemanticVersion = @import("SemanticVersion.zig");
@@ -92,9 +93,6 @@ pub const elf = @import("elf.zig");
/// Enum-related metaprogramming helpers.
pub const enums = @import("enums.zig");
-/// Evented I/O data structures.
-pub const event = @import("event.zig");
-
/// First in, first out data structures.
pub const fifo = @import("fifo.zig");
@@ -144,7 +142,10 @@ pub const meta = @import("meta.zig");
/// Networking.
pub const net = @import("net.zig");
-/// Wrappers around OS-specific APIs.
+/// POSIX-like API layer.
+pub const posix = @import("os.zig");
+
+/// Non-portable Operating System-specific API.
pub const os = @import("os.zig");
pub const once = @import("once.zig").once;
@@ -159,8 +160,8 @@ pub const pdb = @import("pdb.zig");
/// and spawning of child processes.
pub const process = @import("process.zig");
-/// Fast pseudo-random number generators (i.e. not cryptographically secure).
-pub const rand = @import("rand.zig");
+/// Deprecated: use `Random` instead.
+pub const rand = Random;
/// Sorting.
pub const sort = @import("sort.zig");
@@ -198,79 +199,35 @@ pub const zig = @import("zig.zig");
pub const start = @import("start.zig");
const root = @import("root");
-const options_override = if (@hasDecl(root, "std_options")) root.std_options else struct {};
/// Stdlib-wide options that can be overridden by the root file.
-pub const options = struct {
- pub const enable_segfault_handler: bool = if (@hasDecl(options_override, "enable_segfault_handler"))
- options_override.enable_segfault_handler
- else
- debug.default_enable_segfault_handler;
+pub const options: Options = if (@hasDecl(root, "std_options")) root.std_options else .{};
+
+pub const Options = struct {
+ enable_segfault_handler: bool = debug.default_enable_segfault_handler,
/// Function used to implement `std.fs.cwd` for WASI.
- pub const wasiCwd: fn () fs.Dir = if (@hasDecl(options_override, "wasiCwd"))
- options_override.wasiCwd
- else
- fs.defaultWasiCwd;
-
- /// The application's chosen I/O mode.
- pub const io_mode: io.Mode = if (@hasDecl(options_override, "io_mode"))
- options_override.io_mode
- else if (@hasDecl(options_override, "event_loop"))
- .evented
- else
- .blocking;
-
- pub const event_loop: event.Loop.Instance = if (@hasDecl(options_override, "event_loop"))
- options_override.event_loop
- else
- event.Loop.default_instance;
-
- pub const event_loop_mode: event.Loop.Mode = if (@hasDecl(options_override, "event_loop_mode"))
- options_override.event_loop_mode
- else
- event.Loop.default_mode;
+ wasiCwd: fn () os.wasi.fd_t = fs.defaultWasiCwd,
/// The current log level.
- pub const log_level: log.Level = if (@hasDecl(options_override, "log_level"))
- options_override.log_level
- else
- log.default_level;
+ log_level: log.Level = log.default_level,
- pub const log_scope_levels: []const log.ScopeLevel = if (@hasDecl(options_override, "log_scope_levels"))
- options_override.log_scope_levels
- else
- &.{};
+ log_scope_levels: []const log.ScopeLevel = &.{},
- pub const logFn: fn (
+ logFn: fn (
comptime message_level: log.Level,
comptime scope: @TypeOf(.enum_literal),
comptime format: []const u8,
args: anytype,
- ) void = if (@hasDecl(options_override, "logFn"))
- options_override.logFn
- else
- log.defaultLog;
-
- pub const fmt_max_depth = if (@hasDecl(options_override, "fmt_max_depth"))
- options_override.fmt_max_depth
- else
- fmt.default_max_depth;
-
- pub const cryptoRandomSeed: fn (buffer: []u8) void = if (@hasDecl(options_override, "cryptoRandomSeed"))
- options_override.cryptoRandomSeed
- else
- @import("crypto/tlcsprng.zig").defaultRandomSeed;
-
- pub const crypto_always_getrandom: bool = if (@hasDecl(options_override, "crypto_always_getrandom"))
- options_override.crypto_always_getrandom
- else
- false;
-
- pub const crypto_fork_safety: bool = if (@hasDecl(options_override, "crypto_fork_safety"))
- options_override.crypto_fork_safety
- else
- true;
+ ) void = log.defaultLog,
+
+ fmt_max_depth: usize = fmt.default_max_depth,
+
+ cryptoRandomSeed: fn (buffer: []u8) void = @import("crypto/tlcsprng.zig").defaultRandomSeed,
+
+ crypto_always_getrandom: bool = false,
+
+ crypto_fork_safety: bool = true,
/// By default Zig disables SIGPIPE by setting a "no-op" handler for it. Set this option
/// to `true` to prevent that.
@@ -283,35 +240,22 @@ pub const options = struct {
/// cases it's unclear why the process was terminated. By capturing SIGPIPE instead, functions that
/// write to broken pipes will return the EPIPE error (error.BrokenPipe) and the program can handle
/// it like any other error.
- pub const keep_sigpipe: bool = if (@hasDecl(options_override, "keep_sigpipe"))
- options_override.keep_sigpipe
- else
- false;
+ keep_sigpipe: bool = false,
/// By default, std.http.Client will support HTTPS connections. Set this option to `true` to
/// disable TLS support.
///
/// This will likely reduce the size of the binary, but it will also make it impossible to
/// make a HTTPS connection.
- pub const http_disable_tls = if (@hasDecl(options_override, "http_disable_tls"))
- options_override.http_disable_tls
- else
- false;
-
- pub const side_channels_mitigations: crypto.SideChannelsMitigations = if (@hasDecl(options_override, "side_channels_mitigations"))
- options_override.side_channels_mitigations
- else
- crypto.default_side_channels_mitigations;
+ http_disable_tls: bool = false,
+
+ side_channels_mitigations: crypto.SideChannelsMitigations = crypto.default_side_channels_mitigations,
};
// This forces the start.zig file to be imported, and the comptime logic inside that
// file decides whether to export any appropriate start symbols, and call main.
comptime {
_ = start;
-
- for (@typeInfo(options_override).Struct.decls) |decl| {
- if (!@hasDecl(options, decl.name)) @compileError("no option named " ++ decl.name);
- }
}
test {
diff --git a/lib/std/tar.zig b/lib/std/tar.zig
index e0a50a84cb..a224384925 100644
--- a/lib/std/tar.zig
+++ b/lib/std/tar.zig
@@ -140,11 +140,25 @@ pub const Header = struct {
}
pub fn mode(header: Header) !u32 {
- return @intCast(try header.numeric(100, 8));
+ return @intCast(try header.octal(100, 8));
}
pub fn size(header: Header) !u64 {
- return header.numeric(124, 12);
+ const start = 124;
+ const len = 12;
+ const raw = header.bytes[start..][0..len];
+ // If the leading byte is 0xff (255), all the bytes of the field
+ // (including the leading byte) are concatenated in big-endian order,
+ // with the result being a negative number expressed in two’s
+ // complement form.
+ if (raw[0] == 0xff) return error.TarNumericValueNegative;
+ // If the leading byte is 0x80 (128), the non-leading bytes of the
+ // field are concatenated in big-endian order.
+ if (raw[0] == 0x80) {
+ if (raw[1] != 0 or raw[2] != 0 or raw[3] != 0) return error.TarNumericValueTooBig;
+ return std.mem.readInt(u64, raw[4..12], .big);
+ }
+ return try header.octal(start, len);
}
pub fn chksum(header: Header) !u64 {
@@ -170,22 +184,6 @@ pub const Header = struct {
return nullStr(header.bytes[start .. start + len]);
}
- fn numeric(header: Header, start: usize, len: usize) !u64 {
- const raw = header.bytes[start..][0..len];
- // If the leading byte is 0xff (255), all the bytes of the field
- // (including the leading byte) are concatenated in big-endian order,
- // with the result being a negative number expressed in two’s
- // complement form.
- if (raw[0] == 0xff) return error.TarNumericValueNegative;
- // If the leading byte is 0x80 (128), the non-leading bytes of the
- // field are concatenated in big-endian order.
- if (raw[0] == 0x80) {
- if (raw[1] + raw[2] + raw[3] != 0) return error.TarNumericValueTooBig;
- return std.mem.readInt(u64, raw[4..12], .big);
- }
- return try header.octal(start, len);
- }
-
fn octal(header: Header, start: usize, len: usize) !u64 {
const raw = header.bytes[start..][0..len];
// Zero-filled octal number in ASCII. Each numeric field of width w
@@ -299,14 +297,14 @@ fn Iterator(comptime ReaderType: type) type {
return header;
}
- inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
- assert(buffer.len >= size);
+ fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
+ if (size > buffer.len) return error.TarCorruptInput;
const buf = buffer[0..size];
try self.reader.readNoEof(buf);
return nullStr(buf);
}
- inline fn initFile(self: *Self) void {
+ fn initFile(self: *Self) void {
self.file = File{
.name = self.file_name_buffer[0..0],
.link_name = self.link_name_buffer[0..0],
@@ -318,7 +316,7 @@ fn Iterator(comptime ReaderType: type) type {
}
// Number of padding bytes in the last file block.
- inline fn blockPadding(size: u64) usize {
+ fn blockPadding(size: u64) usize {
const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
return @intCast(block_rounded - size);
}
@@ -378,7 +376,7 @@ fn Iterator(comptime ReaderType: type) type {
self.file.link_name = try attr.value(&self.link_name_buffer);
},
.size => {
- var buf: [64]u8 = undefined;
+ var buf: [pax_max_size_attr_len]u8 = undefined;
self.file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
},
}
@@ -432,6 +430,9 @@ const PaxAttributeKind = enum {
size,
};
+// maxInt(u64) has 20 chars, base 10 in practice we got 24 chars
+const pax_max_size_attr_len = 64;
+
fn PaxIterator(comptime ReaderType: type) type {
return struct {
size: usize, // cumulative size of all pax attributes
@@ -488,6 +489,9 @@ fn PaxIterator(comptime ReaderType: type) type {
try validateAttributeEnding(self.reader);
continue;
};
+ if (kind == .size and value_len > pax_max_size_attr_len) {
+ return error.PaxSizeAttrOverflow;
+ }
return Attribute{
.kind = kind,
.len = value_len,
@@ -498,22 +502,22 @@ fn PaxIterator(comptime ReaderType: type) type {
return null;
}
- inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
+ fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
var fbs = std.io.fixedBufferStream(&self.scratch);
try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
return fbs.getWritten();
}
- inline fn eql(a: []const u8, b: []const u8) bool {
+ fn eql(a: []const u8, b: []const u8) bool {
return std.mem.eql(u8, a, b);
}
- inline fn hasNull(str: []const u8) bool {
+ fn hasNull(str: []const u8) bool {
return (std.mem.indexOfScalar(u8, str, 0)) != null;
}
// Checks that each record ends with new line.
- inline fn validateAttributeEnding(reader: ReaderType) !void {
+ fn validateAttributeEnding(reader: ReaderType) !void {
if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
}
};
@@ -546,31 +550,15 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
const file_name = stripComponents(file.name, options.strip_components);
if (file_name.len == 0) return error.BadFileName;
- const fs_file = dir.createFile(file_name, .{}) catch |err| switch (err) {
- error.FileNotFound => again: {
- const code = code: {
- if (std.fs.path.dirname(file_name)) |dir_name| {
- dir.makePath(dir_name) catch |code| break :code code;
- break :again dir.createFile(file_name, .{}) catch |code| {
- break :code code;
- };
- }
- break :code err;
- };
- const d = options.diagnostics orelse return error.UnableToCreateFile;
- try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
- .code = code,
- .file_name = try d.allocator.dupe(u8, file_name),
- } });
- break :again null;
- },
- else => |e| return e,
- };
- defer if (fs_file) |f| f.close();
-
- if (fs_file) |f| {
- try file.write(f);
- } else {
+ if (createDirAndFile(dir, file_name)) |fs_file| {
+ defer fs_file.close();
+ try file.write(fs_file);
+ } else |err| {
+ const d = options.diagnostics orelse return err;
+ try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
+ .code = err,
+ .file_name = try d.allocator.dupe(u8, file_name),
+ } });
try file.skip();
}
},
@@ -581,21 +569,10 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
// The data inside the symbolic link.
const link_name = file.link_name;
- dir.symLink(link_name, file_name, .{}) catch |err| again: {
- const code = code: {
- if (err == error.FileNotFound) {
- if (std.fs.path.dirname(file_name)) |dir_name| {
- dir.makePath(dir_name) catch |code| break :code code;
- break :again dir.symLink(link_name, file_name, .{}) catch |code| {
- break :code code;
- };
- }
- }
- break :code err;
- };
+ createDirAndSymlink(dir, link_name, file_name) catch |err| {
const d = options.diagnostics orelse return error.UnableToCreateSymLink;
try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{
- .code = code,
+ .code = err,
.file_name = try d.allocator.dupe(u8, file_name),
.link_name = try d.allocator.dupe(u8, link_name),
} });
@@ -606,6 +583,31 @@ pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !voi
}
}
+fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8) !std.fs.File {
+ const fs_file = dir.createFile(file_name, .{ .exclusive = true }) catch |err| {
+ if (err == error.FileNotFound) {
+ if (std.fs.path.dirname(file_name)) |dir_name| {
+ try dir.makePath(dir_name);
+ return try dir.createFile(file_name, .{ .exclusive = true });
+ }
+ }
+ return err;
+ };
+ return fs_file;
+}
+
+fn createDirAndSymlink(dir: std.fs.Dir, link_name: []const u8, file_name: []const u8) !void {
+ dir.symLink(link_name, file_name, .{}) catch |err| {
+ if (err == error.FileNotFound) {
+ if (std.fs.path.dirname(file_name)) |dir_name| {
+ try dir.makePath(dir_name);
+ try dir.symLink(link_name, file_name, .{});
+ }
+ }
+ return err;
+ };
+}
+
fn stripComponents(path: []const u8, count: u32) []const u8 {
var i: usize = 0;
var c = count;
@@ -756,3 +758,63 @@ test "tar PaxIterator" {
test {
_ = @import("tar/test.zig");
}
+
+test "tar header parse size" {
+ const cases = [_]struct {
+ in: []const u8,
+ want: u64 = 0,
+ err: ?anyerror = null,
+ }{
+ // Test base-256 (binary) encoded values.
+ .{ .in = "", .want = 0 },
+ .{ .in = "\x80", .want = 0 },
+ .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", .want = 1 },
+ .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02", .want = 0x0102 },
+ .{ .in = "\x80\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08", .want = 0x0102030405060708 },
+ .{ .in = "\x80\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09", .err = error.TarNumericValueTooBig },
+ .{ .in = "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", .want = 537795476381659745 },
+ .{ .in = "\x80\x80\x80\x00\x01\x02\x03\x04\x05\x06\x07\x08", .err = error.TarNumericValueTooBig },
+
+ // // Test base-8 (octal) encoded values.
+ .{ .in = "00000000227\x00", .want = 0o227 },
+ .{ .in = " 000000227\x00", .want = 0o227 },
+ .{ .in = "00000000228\x00", .err = error.TarHeader },
+ .{ .in = "11111111111\x00", .want = 0o11111111111 },
+ };
+
+ for (cases) |case| {
+ var bytes = [_]u8{0} ** Header.SIZE;
+ @memcpy(bytes[124 .. 124 + case.in.len], case.in);
+ var header = Header{ .bytes = &bytes };
+ if (case.err) |err| {
+ try std.testing.expectError(err, header.size());
+ } else {
+ try std.testing.expectEqual(case.want, try header.size());
+ }
+ }
+}
+
+test "tar header parse mode" {
+ const cases = [_]struct {
+ in: []const u8,
+ want: u64 = 0,
+ err: ?anyerror = null,
+ }{
+ .{ .in = "0000644\x00", .want = 0o644 },
+ .{ .in = "0000777\x00", .want = 0o777 },
+ .{ .in = "7777777\x00", .want = 0o7777777 },
+ .{ .in = "7777778\x00", .err = error.TarHeader },
+ .{ .in = "77777777", .want = 0o77777777 },
+ .{ .in = "777777777777", .want = 0o77777777 },
+ };
+ for (cases) |case| {
+ var bytes = [_]u8{0} ** Header.SIZE;
+ @memcpy(bytes[100 .. 100 + case.in.len], case.in);
+ var header = Header{ .bytes = &bytes };
+ if (case.err) |err| {
+ try std.testing.expectError(err, header.mode());
+ } else {
+ try std.testing.expectEqual(case.want, try header.mode());
+ }
+ }
+}
diff --git a/lib/std/tar/test.zig b/lib/std/tar/test.zig
index 82c73e2546..7397e1f696 100644
--- a/lib/std/tar/test.zig
+++ b/lib/std/tar/test.zig
@@ -313,6 +313,14 @@ test "tar run Go test cases" {
},
},
},
+ .{
+ .data = @embedFile("testdata/fuzz1.tar"),
+ .err = error.TarCorruptInput,
+ },
+ .{
+ .data = @embedFile("testdata/fuzz2.tar"),
+ .err = error.PaxSizeAttrOverflow,
+ },
};
for (cases) |case| {
@@ -365,3 +373,118 @@ const Md5Writer = struct {
return std.fmt.bytesToHex(s, .lower);
}
};
+
+test "tar should not overwrite existing file" {
+ // Starting from this folder structure:
+ // $ tree root
+ // root
+ // ├── a
+ // │   └── b
+ // │   └── c
+ // │   └── file.txt
+ // └── d
+ // └── b
+ // └── c
+ // └── file.txt
+ //
+ // Packed with command:
+ // $ cd root; tar cf overwrite_file.tar *
+ // Resulting tar has following structure:
+ // $ tar tvf overwrite_file.tar
+ // size path
+ // 0 a/
+ // 0 a/b/
+ // 0 a/b/c/
+ // 2 a/b/c/file.txt
+ // 0 d/
+ // 0 d/b/
+ // 0 d/b/c/
+ // 2 d/b/c/file.txt
+ //
+ // Note that there is no root folder in archive.
+ //
+ // With strip_components = 1 resulting unpacked folder was:
+ // root
+ // └── b
+ // └── c
+ // └── file.txt
+ //
+ // a/b/c/file.txt is overwritten with d/b/c/file.txt !!!
+ // This ensures that file is not overwritten.
+ //
+ const data = @embedFile("testdata/overwrite_file.tar");
+ var fsb = std.io.fixedBufferStream(data);
+
+ // Unpack with strip_components = 1 should fail
+ var root = std.testing.tmpDir(.{});
+ defer root.cleanup();
+ try testing.expectError(
+ error.PathAlreadyExists,
+ tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }),
+ );
+
+ // Unpack with strip_components = 0 should pass
+ fsb.reset();
+ var root2 = std.testing.tmpDir(.{});
+ defer root2.cleanup();
+ try tar.pipeToFileSystem(root2.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 0 });
+}
+
+test "tar case sensitivity" {
+ // Mimicking issue #18089, this tar contains, same file name in two case
+ // sensitive name version. Should fail on case insensitive file systems.
+ //
+ // $ tar tvf 18089.tar
+ // 18089/
+ // 18089/alacritty/
+ // 18089/alacritty/darkermatrix.yml
+ // 18089/alacritty/Darkermatrix.yml
+ //
+ const data = @embedFile("testdata/18089.tar");
+ var fsb = std.io.fixedBufferStream(data);
+
+ var root = std.testing.tmpDir(.{});
+ defer root.cleanup();
+
+ tar.pipeToFileSystem(root.dir, fsb.reader(), .{ .mode_mode = .ignore, .strip_components = 1 }) catch |err| {
+ // on case insensitive fs we fail on overwrite existing file
+ try testing.expectEqual(error.PathAlreadyExists, err);
+ return;
+ };
+
+ // on case sensitive os both files are created
+ try testing.expect((try root.dir.statFile("alacritty/darkermatrix.yml")).kind == .file);
+ try testing.expect((try root.dir.statFile("alacritty/Darkermatrix.yml")).kind == .file);
+}
+
+test "tar pipeToFileSystem" {
+ // $ tar tvf
+ // pipe_to_file_system_test/
+ // pipe_to_file_system_test/b/
+ // pipe_to_file_system_test/b/symlink -> ../a/file
+ // pipe_to_file_system_test/a/
+ // pipe_to_file_system_test/a/file
+ // pipe_to_file_system_test/empty/
+ const data = @embedFile("testdata/pipe_to_file_system_test.tar");
+ var fsb = std.io.fixedBufferStream(data);
+
+ var root = std.testing.tmpDir(.{ .no_follow = true });
+ defer root.cleanup();
+
+ tar.pipeToFileSystem(root.dir, fsb.reader(), .{
+ .mode_mode = .ignore,
+ .strip_components = 1,
+ .exclude_empty_directories = true,
+ }) catch |err| {
+ // Skip on platform which don't support symlinks
+ if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
+ return err;
+ };
+
+ try testing.expectError(error.FileNotFound, root.dir.statFile("empty"));
+ try testing.expect((try root.dir.statFile("a/file")).kind == .file);
+ // TODO is there better way to test symlink
+ try testing.expect((try root.dir.statFile("b/symlink")).kind == .file); // statFile follows symlink
+ var buf: [8]u8 = undefined;
+ _ = try root.dir.readLink("b/symlink", &buf);
+}
diff --git a/lib/std/tar/testdata/18089.tar b/lib/std/tar/testdata/18089.tar
new file mode 100644
index 0000000000..c58b3eaea2
--- /dev/null
+++ b/lib/std/tar/testdata/18089.tar
Binary files differ
diff --git a/lib/std/tar/testdata/fuzz1.tar b/lib/std/tar/testdata/fuzz1.tar
new file mode 100644
index 0000000000..545949b82b
--- /dev/null
+++ b/lib/std/tar/testdata/fuzz1.tar
Binary files differ
diff --git a/lib/std/tar/testdata/fuzz2.tar b/lib/std/tar/testdata/fuzz2.tar
new file mode 100644
index 0000000000..ef86cfe386
--- /dev/null
+++ b/lib/std/tar/testdata/fuzz2.tar
Binary files differ
diff --git a/lib/std/tar/testdata/overwrite_file.tar b/lib/std/tar/testdata/overwrite_file.tar
new file mode 100644
index 0000000000..d01e6fdd36
--- /dev/null
+++ b/lib/std/tar/testdata/overwrite_file.tar
Binary files differ
diff --git a/lib/std/tar/testdata/pipe_to_file_system_test.tar b/lib/std/tar/testdata/pipe_to_file_system_test.tar
new file mode 100644
index 0000000000..0c424166ae
--- /dev/null
+++ b/lib/std/tar/testdata/pipe_to_file_system_test.tar
Binary files differ
diff --git a/lib/std/time.zig b/lib/std/time.zig
index dad81385e9..425e028e01 100644
--- a/lib/std/time.zig
+++ b/lib/std/time.zig
@@ -9,11 +9,6 @@ pub const epoch = @import("time/epoch.zig");
/// Spurious wakeups are possible and no precision of timing is guaranteed.
pub fn sleep(nanoseconds: u64) void {
- // TODO: opting out of async sleeping?
- if (std.io.is_async) {
- return std.event.Loop.instance.?.sleep(nanoseconds);
- }
-
if (builtin.os.tag == .windows) {
const big_ms_from_ns = nanoseconds / ns_per_ms;
const ms = math.cast(os.windows.DWORD, big_ms_from_ns) orelse math.maxInt(os.windows.DWORD);
@@ -24,19 +19,17 @@ pub fn sleep(nanoseconds: u64) void {
if (builtin.os.tag == .wasi) {
const w = std.os.wasi;
const userdata: w.userdata_t = 0x0123_45678;
- const clock = w.subscription_clock_t{
- .id = w.CLOCK.MONOTONIC,
+ const clock: w.subscription_clock_t = .{
+ .id = .MONOTONIC,
.timeout = nanoseconds,
.precision = 0,
.flags = 0,
};
- const in = w.subscription_t{
+ const in: w.subscription_t = .{
.userdata = userdata,
- .u = w.subscription_u_t{
- .tag = w.EVENTTYPE_CLOCK,
- .u = w.subscription_u_u_t{
- .clock = clock,
- },
+ .u = .{
+ .tag = .CLOCK,
+ .u = .{ .clock = clock },
},
};
@@ -97,35 +90,36 @@ pub fn microTimestamp() i64 {
/// before the epoch.
/// See `std.os.clock_gettime` for a POSIX timestamp.
pub fn nanoTimestamp() i128 {
- if (builtin.os.tag == .windows) {
- // FileTime has a granularity of 100 nanoseconds and uses the NTFS/Windows epoch,
- // which is 1601-01-01.
- const epoch_adj = epoch.windows * (ns_per_s / 100);
- var ft: os.windows.FILETIME = undefined;
- os.windows.kernel32.GetSystemTimeAsFileTime(&ft);
- const ft64 = (@as(u64, ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
- return @as(i128, @as(i64, @bitCast(ft64)) + epoch_adj) * 100;
- }
-
- if (builtin.os.tag == .wasi and !builtin.link_libc) {
- var ns: os.wasi.timestamp_t = undefined;
- const err = os.wasi.clock_time_get(os.wasi.CLOCK.REALTIME, 1, &ns);
- assert(err == .SUCCESS);
- return ns;
- }
-
- if (builtin.os.tag == .uefi) {
- var value: std.os.uefi.Time = undefined;
- const status = std.os.uefi.system_table.runtime_services.getTime(&value, null);
- assert(status == .Success);
- return value.toEpoch();
+ switch (builtin.os.tag) {
+ .windows => {
+ // FileTime has a granularity of 100 nanoseconds and uses the NTFS/Windows epoch,
+ // which is 1601-01-01.
+ const epoch_adj = epoch.windows * (ns_per_s / 100);
+ var ft: os.windows.FILETIME = undefined;
+ os.windows.kernel32.GetSystemTimeAsFileTime(&ft);
+ const ft64 = (@as(u64, ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
+ return @as(i128, @as(i64, @bitCast(ft64)) + epoch_adj) * 100;
+ },
+ .wasi => {
+ var ns: os.wasi.timestamp_t = undefined;
+ const err = os.wasi.clock_time_get(.REALTIME, 1, &ns);
+ assert(err == .SUCCESS);
+ return ns;
+ },
+ .uefi => {
+ var value: std.os.uefi.Time = undefined;
+ const status = std.os.uefi.system_table.runtime_services.getTime(&value, null);
+ assert(status == .Success);
+ return value.toEpoch();
+ },
+ else => {
+ var ts: os.timespec = undefined;
+ os.clock_gettime(os.CLOCK.REALTIME, &ts) catch |err| switch (err) {
+ error.UnsupportedClock, error.Unexpected => return 0, // "Precision of timing depends on hardware and OS".
+ };
+ return (@as(i128, ts.tv_sec) * ns_per_s) + ts.tv_nsec;
+ },
}
-
- var ts: os.timespec = undefined;
- os.clock_gettime(os.CLOCK.REALTIME, &ts) catch |err| switch (err) {
- error.UnsupportedClock, error.Unexpected => return 0, // "Precision of timing depends on hardware and OS".
- };
- return (@as(i128, ts.tv_sec) * ns_per_s) + ts.tv_nsec;
}
test "timestamp" {
@@ -182,43 +176,43 @@ pub const Instant = struct {
// true if we should use clock_gettime()
const is_posix = switch (builtin.os.tag) {
- .wasi => builtin.link_libc,
- .windows, .uefi => false,
+ .windows, .uefi, .wasi => false,
else => true,
};
/// Queries the system for the current moment of time as an Instant.
- /// This is not guaranteed to be monotonic or steadily increasing, but for most implementations it is.
+ /// This is not guaranteed to be monotonic or steadily increasing, but for
+ /// most implementations it is.
/// Returns `error.Unsupported` when a suitable clock is not detected.
pub fn now() error{Unsupported}!Instant {
- // QPC on windows doesn't fail on >= XP/2000 and includes time suspended.
- if (builtin.os.tag == .windows) {
- return Instant{ .timestamp = os.windows.QueryPerformanceCounter() };
- }
-
- // On WASI without libc, use clock_time_get directly.
- if (builtin.os.tag == .wasi and !builtin.link_libc) {
- var ns: os.wasi.timestamp_t = undefined;
- const rc = os.wasi.clock_time_get(os.wasi.CLOCK.MONOTONIC, 1, &ns);
- if (rc != .SUCCESS) return error.Unsupported;
- return Instant{ .timestamp = ns };
- }
-
- if (builtin.os.tag == .uefi) {
- var value: std.os.uefi.Time = undefined;
- const status = std.os.uefi.system_table.runtime_services.getTime(&value, null);
- if (status != .Success) return error.Unsupported;
- return Instant{ .timestamp = value.toEpoch() };
- }
-
- // On darwin, use UPTIME_RAW instead of MONOTONIC as it ticks while suspended.
- // On linux, use BOOTTIME instead of MONOTONIC as it ticks while suspended.
- // On freebsd derivatives, use MONOTONIC_FAST as currently there's no precision tradeoff.
- // On other posix systems, MONOTONIC is generally the fastest and ticks while suspended.
const clock_id = switch (builtin.os.tag) {
+ .windows => {
+ // QPC on windows doesn't fail on >= XP/2000 and includes time suspended.
+ return Instant{ .timestamp = os.windows.QueryPerformanceCounter() };
+ },
+ .wasi => {
+ var ns: os.wasi.timestamp_t = undefined;
+ const rc = os.wasi.clock_time_get(.MONOTONIC, 1, &ns);
+ if (rc != .SUCCESS) return error.Unsupported;
+ return .{ .timestamp = ns };
+ },
+ .uefi => {
+ var value: std.os.uefi.Time = undefined;
+ const status = std.os.uefi.system_table.runtime_services.getTime(&value, null);
+ if (status != .Success) return error.Unsupported;
+ return Instant{ .timestamp = value.toEpoch() };
+ },
+ // On darwin, use UPTIME_RAW instead of MONOTONIC as it ticks while
+ // suspended.
.macos, .ios, .tvos, .watchos => os.CLOCK.UPTIME_RAW,
+ // On freebsd derivatives, use MONOTONIC_FAST as currently there's
+ // no precision tradeoff.
.freebsd, .dragonfly => os.CLOCK.MONOTONIC_FAST,
+ // On linux, use BOOTTIME instead of MONOTONIC as it ticks while
+ // suspended.
.linux => os.CLOCK.BOOTTIME,
+ // On other posix systems, MONOTONIC is generally the fastest and
+ // ticks while suspended.
else => os.CLOCK.MONOTONIC,
};
@@ -267,7 +261,7 @@ pub const Instant = struct {
}
// WASI timestamps are directly in nanoseconds
- if (builtin.os.tag == .wasi and !builtin.link_libc) {
+ if (builtin.os.tag == .wasi) {
return self.timestamp - earlier.timestamp;
}
diff --git a/lib/std/treap.zig b/lib/std/treap.zig
index a555b49495..a7633d082a 100644
--- a/lib/std/treap.zig
+++ b/lib/std/treap.zig
@@ -18,7 +18,7 @@ pub fn Treap(comptime Key: type, comptime compareFn: anytype) type {
/// A customized pseudo random number generator for the treap.
/// This just helps reducing the memory size of the treap itself
- /// as std.rand.DefaultPrng requires larger state (while producing better entropy for randomness to be fair).
+ /// as std.Random.DefaultPrng requires larger state (while producing better entropy for randomness to be fair).
const Prng = struct {
xorshift: usize = 0,
@@ -305,7 +305,7 @@ pub fn Treap(comptime Key: type, comptime compareFn: anytype) type {
// https://lemire.me/blog/2017/09/18/visiting-all-values-in-an-array-exactly-once-in-random-order/
fn SliceIterRandomOrder(comptime T: type) type {
return struct {
- rng: std.rand.Random,
+ rng: std.Random,
slice: []T,
index: usize = undefined,
offset: usize = undefined,
@@ -313,7 +313,7 @@ fn SliceIterRandomOrder(comptime T: type) type {
const Self = @This();
- pub fn init(slice: []T, rng: std.rand.Random) Self {
+ pub fn init(slice: []T, rng: std.Random) Self {
return Self{
.rng = rng,
.slice = slice,
@@ -353,7 +353,7 @@ test "std.Treap: insert, find, replace, remove" {
var treap = TestTreap{};
var nodes: [10]TestNode = undefined;
- var prng = std.rand.DefaultPrng.init(0xdeadbeef);
+ var prng = std.Random.DefaultPrng.init(0xdeadbeef);
var iter = SliceIterRandomOrder(TestNode).init(&nodes, prng.random());
// insert check
diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig
index 8aae6a1b5f..b2067c4f8f 100644
--- a/lib/std/unicode.zig
+++ b/lib/std/unicode.zig
@@ -39,7 +39,16 @@ pub fn utf8ByteSequenceLength(first_byte: u8) !u3 {
/// out: the out buffer to write to. Must have a len >= utf8CodepointSequenceLength(c).
/// Errors: if c cannot be encoded in UTF-8.
/// Returns: the number of bytes written to out.
-pub fn utf8Encode(c: u21, out: []u8) !u3 {
+pub fn utf8Encode(c: u21, out: []u8) error{ Utf8CannotEncodeSurrogateHalf, CodepointTooLarge }!u3 {
+ return utf8EncodeImpl(c, out, .cannot_encode_surrogate_half);
+}
+
+const Surrogates = enum {
+ cannot_encode_surrogate_half,
+ can_encode_surrogate_half,
+};
+
+fn utf8EncodeImpl(c: u21, out: []u8, comptime surrogates: Surrogates) !u3 {
const length = try utf8CodepointSequenceLength(c);
assert(out.len >= length);
switch (length) {
@@ -53,7 +62,9 @@ pub fn utf8Encode(c: u21, out: []u8) !u3 {
out[1] = @as(u8, @intCast(0b10000000 | (c & 0b111111)));
},
3 => {
- if (0xd800 <= c and c <= 0xdfff) return error.Utf8CannotEncodeSurrogateHalf;
+ if (surrogates == .cannot_encode_surrogate_half and isSurrogateCodepoint(c)) {
+ return error.Utf8CannotEncodeSurrogateHalf;
+ }
out[0] = @as(u8, @intCast(0b11100000 | (c >> 12)));
out[1] = @as(u8, @intCast(0b10000000 | ((c >> 6) & 0b111111)));
out[2] = @as(u8, @intCast(0b10000000 | (c & 0b111111)));
@@ -116,12 +127,22 @@ pub fn utf8Decode2(bytes: []const u8) Utf8Decode2Error!u21 {
return value;
}
-const Utf8Decode3Error = error{
- Utf8ExpectedContinuation,
- Utf8OverlongEncoding,
+const Utf8Decode3Error = Utf8Decode3AllowSurrogateHalfError || error{
Utf8EncodesSurrogateHalf,
};
pub fn utf8Decode3(bytes: []const u8) Utf8Decode3Error!u21 {
+ const value = try utf8Decode3AllowSurrogateHalf(bytes);
+
+ if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;
+
+ return value;
+}
+
+const Utf8Decode3AllowSurrogateHalfError = error{
+ Utf8ExpectedContinuation,
+ Utf8OverlongEncoding,
+};
+pub fn utf8Decode3AllowSurrogateHalf(bytes: []const u8) Utf8Decode3AllowSurrogateHalfError!u21 {
assert(bytes.len == 3);
assert(bytes[0] & 0b11110000 == 0b11100000);
var value: u21 = bytes[0] & 0b00001111;
@@ -135,7 +156,6 @@ pub fn utf8Decode3(bytes: []const u8) Utf8Decode3Error!u21 {
value |= bytes[2] & 0b00111111;
if (value < 0x800) return error.Utf8OverlongEncoding;
- if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;
return value;
}
@@ -213,20 +233,25 @@ pub fn utf8CountCodepoints(s: []const u8) !usize {
/// Returns true if the input consists entirely of UTF-8 codepoints
pub fn utf8ValidateSlice(input: []const u8) bool {
+ return utf8ValidateSliceImpl(input, .cannot_encode_surrogate_half);
+}
+
+fn utf8ValidateSliceImpl(input: []const u8, comptime surrogates: Surrogates) bool {
var remaining = input;
- const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
- const Chunk = @Vector(chunk_len, u8);
+ if (std.simd.suggestVectorLength(u8)) |chunk_len| {
+ const Chunk = @Vector(chunk_len, u8);
- // Fast path. Check for and skip ASCII characters at the start of the input.
- while (remaining.len >= chunk_len) {
- const chunk: Chunk = remaining[0..chunk_len].*;
- const mask: Chunk = @splat(0x80);
- if (@reduce(.Or, chunk & mask == mask)) {
- // found a non ASCII byte
- break;
+ // Fast path. Check for and skip ASCII characters at the start of the input.
+ while (remaining.len >= chunk_len) {
+ const chunk: Chunk = remaining[0..chunk_len].*;
+ const mask: Chunk = @splat(0x80);
+ if (@reduce(.Or, chunk & mask == mask)) {
+ // found a non ASCII byte
+ break;
+ }
+ remaining = remaining[chunk_len..];
}
- remaining = remaining[chunk_len..];
}
// default lowest and highest continuation byte
@@ -240,9 +265,15 @@ pub fn utf8ValidateSlice(input: []const u8) bool {
const xx = 0xF1; // invalid: size 1
const as = 0xF0; // ASCII: size 1
const s1 = 0x02; // accept 0, size 2
- const s2 = 0x13; // accept 1, size 3
+ const s2 = switch (surrogates) {
+ .cannot_encode_surrogate_half => 0x13, // accept 1, size 3
+ .can_encode_surrogate_half => 0x03, // accept 0, size 3
+ };
const s3 = 0x03; // accept 0, size 3
- const s4 = 0x23; // accept 2, size 3
+ const s4 = switch (surrogates) {
+ .cannot_encode_surrogate_half => 0x23, // accept 2, size 3
+ .can_encode_surrogate_half => 0x03, // accept 0, size 3
+ };
const s5 = 0x34; // accept 3, size 4
const s6 = 0x04; // accept 0, size 4
const s7 = 0x44; // accept 4, size 4
@@ -458,7 +489,9 @@ pub const Utf16LeIterator = struct {
};
}
- pub fn nextCodepoint(it: *Utf16LeIterator) !?u21 {
+ pub const NextCodepointError = error{ DanglingSurrogateHalf, ExpectedSecondSurrogateHalf, UnexpectedSecondSurrogateHalf };
+
+ pub fn nextCodepoint(it: *Utf16LeIterator) NextCodepointError!?u21 {
assert(it.i <= it.bytes.len);
if (it.i == it.bytes.len) return null;
var code_units: [2]u16 = undefined;
@@ -569,9 +602,9 @@ fn testUtf8IteratorOnAscii() !void {
const s = Utf8View.initComptime("abc");
var it1 = s.iterator();
- try testing.expect(std.mem.eql(u8, "a", it1.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "b", it1.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "c", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "a", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "b", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "c", it1.nextCodepointSlice().?));
try testing.expect(it1.nextCodepointSlice() == null);
var it2 = s.iterator();
@@ -599,9 +632,9 @@ fn testUtf8ViewOk() !void {
const s = Utf8View.initComptime("東京市");
var it1 = s.iterator();
- try testing.expect(std.mem.eql(u8, "東", it1.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "京", it1.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "市", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "東", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "京", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "市", it1.nextCodepointSlice().?));
try testing.expect(it1.nextCodepointSlice() == null);
var it2 = s.iterator();
@@ -739,20 +772,20 @@ fn testUtf8Peeking() !void {
const s = Utf8View.initComptime("noël");
var it = s.iterator();
- try testing.expect(std.mem.eql(u8, "n", it.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "n", it.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "o", it.peek(1)));
- try testing.expect(std.mem.eql(u8, "oë", it.peek(2)));
- try testing.expect(std.mem.eql(u8, "oël", it.peek(3)));
- try testing.expect(std.mem.eql(u8, "oël", it.peek(4)));
- try testing.expect(std.mem.eql(u8, "oël", it.peek(10)));
+ try testing.expect(mem.eql(u8, "o", it.peek(1)));
+ try testing.expect(mem.eql(u8, "oë", it.peek(2)));
+ try testing.expect(mem.eql(u8, "oël", it.peek(3)));
+ try testing.expect(mem.eql(u8, "oël", it.peek(4)));
+ try testing.expect(mem.eql(u8, "oël", it.peek(10)));
- try testing.expect(std.mem.eql(u8, "o", it.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "ë", it.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "l", it.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "o", it.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "ë", it.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "l", it.nextCodepointSlice().?));
try testing.expect(it.nextCodepointSlice() == null);
- try testing.expect(std.mem.eql(u8, &[_]u8{}, it.peek(1)));
+ try testing.expect(mem.eql(u8, &[_]u8{}, it.peek(1)));
}
fn testError(bytes: []const u8, expected_err: anyerror) !void {
@@ -770,143 +803,294 @@ fn testDecode(bytes: []const u8) !u21 {
return utf8Decode(bytes);
}
-/// Caller must free returned memory.
-pub fn utf16leToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) ![]u8 {
- // optimistically guess that it will all be ascii.
- var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len);
- errdefer result.deinit();
+/// Print the given `utf8` string, encoded as UTF-8 bytes.
+/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)
+/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of
+/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder
+fn formatUtf8(
+ utf8: []const u8,
+ comptime fmt: []const u8,
+ options: std.fmt.FormatOptions,
+ writer: anytype,
+) !void {
+ _ = fmt;
+ _ = options;
+ var buf: [300]u8 = undefined; // just an arbitrary size
+ var u8len: usize = 0;
- var remaining = utf16le;
- if (builtin.zig_backend != .stage2_x86_64) {
- const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
- const Chunk = @Vector(chunk_len, u16);
+ // This implementation is based on this specification:
+ // https://encoding.spec.whatwg.org/#utf-8-decoder
+ var codepoint: u21 = 0;
+ var cont_bytes_seen: u3 = 0;
+ var cont_bytes_needed: u3 = 0;
+ var lower_boundary: u8 = 0x80;
+ var upper_boundary: u8 = 0xBF;
- // Fast path. Check for and encode ASCII characters at the start of the input.
- while (remaining.len >= chunk_len) {
- const chunk: Chunk = remaining[0..chunk_len].*;
- const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));
- if (@reduce(.Or, chunk | mask != mask)) {
- // found a non ASCII code unit
- break;
+ var i: usize = 0;
+ while (i < utf8.len) {
+ const byte = utf8[i];
+ if (cont_bytes_needed == 0) {
+ switch (byte) {
+ 0x00...0x7F => {
+ buf[u8len] = byte;
+ u8len += 1;
+ },
+ 0xC2...0xDF => {
+ cont_bytes_needed = 1;
+ codepoint = byte & 0b00011111;
+ },
+ 0xE0...0xEF => {
+ if (byte == 0xE0) lower_boundary = 0xA0;
+ if (byte == 0xED) upper_boundary = 0x9F;
+ cont_bytes_needed = 2;
+ codepoint = byte & 0b00001111;
+ },
+ 0xF0...0xF4 => {
+ if (byte == 0xF0) lower_boundary = 0x90;
+ if (byte == 0xF4) upper_boundary = 0x8F;
+ cont_bytes_needed = 3;
+ codepoint = byte & 0b00000111;
+ },
+ else => {
+ u8len += utf8Encode(replacement_character, buf[u8len..]) catch unreachable;
+ },
+ }
+ // consume the byte
+ i += 1;
+ } else if (byte < lower_boundary or byte > upper_boundary) {
+ codepoint = 0;
+ cont_bytes_needed = 0;
+ cont_bytes_seen = 0;
+ lower_boundary = 0x80;
+ upper_boundary = 0xBF;
+ u8len += utf8Encode(replacement_character, buf[u8len..]) catch unreachable;
+ // do not consume the current byte, it should now be treated as a possible start byte
+ } else {
+ lower_boundary = 0x80;
+ upper_boundary = 0xBF;
+ codepoint <<= 6;
+ codepoint |= byte & 0b00111111;
+ cont_bytes_seen += 1;
+ // consume the byte
+ i += 1;
+
+ if (cont_bytes_seen == cont_bytes_needed) {
+ const codepoint_len = cont_bytes_seen + 1;
+ const codepoint_start_i = i - codepoint_len;
+ @memcpy(buf[u8len..][0..codepoint_len], utf8[codepoint_start_i..][0..codepoint_len]);
+ u8len += codepoint_len;
+
+ codepoint = 0;
+ cont_bytes_needed = 0;
+ cont_bytes_seen = 0;
}
- const chunk_byte_len = chunk_len * 2;
- const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;
- const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);
- const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];
- // We allocated enough space to encode every UTF-16 code unit
- // as ASCII, so if the entire string is ASCII then we are
- // guaranteed to have enough space allocated
- result.appendSliceAssumeCapacity(&ascii_bytes);
- remaining = remaining[chunk_len..];
+ }
+ // make sure there's always enough room for another maximum length UTF-8 codepoint
+ if (u8len + 4 > buf.len) {
+ try writer.writeAll(buf[0..u8len]);
+ u8len = 0;
}
}
-
- var out_index: usize = result.items.len;
- var it = Utf16LeIterator.init(remaining);
- while (try it.nextCodepoint()) |codepoint| {
- const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;
- try result.resize(result.items.len + utf8_len);
- assert((utf8Encode(codepoint, result.items[out_index..]) catch unreachable) == utf8_len);
- out_index += utf8_len;
+ if (cont_bytes_needed != 0) {
+ // we know there's enough room because we always flush
+ // if there's less than 4 bytes remaining in the buffer.
+ u8len += utf8Encode(replacement_character, buf[u8len..]) catch unreachable;
}
+ try writer.writeAll(buf[0..u8len]);
+}
- return result.toOwnedSlice();
+/// Return a Formatter for a (potentially ill-formed) UTF-8 string.
+/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)
+/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of
+/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder
+pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter(formatUtf8) {
+ return .{ .data = utf8 };
}
-/// Caller must free returned memory.
-pub fn utf16leToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) ![:0]u8 {
- // optimistically guess that it will all be ascii (and allocate space for the null terminator)
- var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1);
- errdefer result.deinit();
+test "fmtUtf8" {
+ const expectFmt = testing.expectFmt;
+ try expectFmt("", "{}", .{fmtUtf8("")});
+ try expectFmt("foo", "{}", .{fmtUtf8("foo")});
+ try expectFmt("𐐷", "{}", .{fmtUtf8("𐐷")});
+
+ // Table 3-8. U+FFFD for Non-Shortest Form Sequences
+ try expectFmt("��������A", "{}", .{fmtUtf8("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82A")});
+
+ // Table 3-9. U+FFFD for Ill-Formed Sequences for Surrogates
+ try expectFmt("��������A", "{}", .{fmtUtf8("\xED\xA0\x80\xED\xBF\xBF\xED\xAFA")});
+
+ // Table 3-10. U+FFFD for Other Ill-Formed Sequences
+ try expectFmt("�����A��B", "{}", .{fmtUtf8("\xF4\x91\x92\x93\xFFA\x80\xBFB")});
+
+ // Table 3-11. U+FFFD for Truncated Sequences
+ try expectFmt("����A", "{}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")});
+}
+
+fn utf16LeToUtf8ArrayListImpl(
+ result: *std.ArrayList(u8),
+ utf16le: []const u16,
+ comptime surrogates: Surrogates,
+) (switch (surrogates) {
+ .cannot_encode_surrogate_half => Utf16LeToUtf8AllocError,
+ .can_encode_surrogate_half => mem.Allocator.Error,
+})!void {
+ assert(result.capacity >= utf16le.len);
var remaining = utf16le;
- if (builtin.zig_backend != .stage2_x86_64) {
- const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
+ vectorized: {
+ const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;
const Chunk = @Vector(chunk_len, u16);
// Fast path. Check for and encode ASCII characters at the start of the input.
while (remaining.len >= chunk_len) {
const chunk: Chunk = remaining[0..chunk_len].*;
- const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));
+ const mask: Chunk = @splat(mem.nativeToLittle(u16, 0x7F));
if (@reduce(.Or, chunk | mask != mask)) {
// found a non ASCII code unit
break;
}
- const chunk_byte_len = chunk_len * 2;
- const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;
- const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);
- const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];
+ const ascii_chunk: @Vector(chunk_len, u8) = @truncate(mem.nativeToLittle(Chunk, chunk));
// We allocated enough space to encode every UTF-16 code unit
// as ASCII, so if the entire string is ASCII then we are
// guaranteed to have enough space allocated
- result.appendSliceAssumeCapacity(&ascii_bytes);
+ result.addManyAsArrayAssumeCapacity(chunk_len).* = ascii_chunk;
remaining = remaining[chunk_len..];
}
}
- var out_index = result.items.len;
- var it = Utf16LeIterator.init(remaining);
- while (try it.nextCodepoint()) |codepoint| {
- const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;
- try result.resize(result.items.len + utf8_len);
- assert((utf8Encode(codepoint, result.items[out_index..]) catch unreachable) == utf8_len);
- out_index += utf8_len;
+ switch (surrogates) {
+ .cannot_encode_surrogate_half => {
+ var it = Utf16LeIterator.init(remaining);
+ while (try it.nextCodepoint()) |codepoint| {
+ const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;
+ assert((utf8Encode(codepoint, try result.addManyAsSlice(utf8_len)) catch unreachable) == utf8_len);
+ }
+ },
+ .can_encode_surrogate_half => {
+ var it = Wtf16LeIterator.init(remaining);
+ while (it.nextCodepoint()) |codepoint| {
+ const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;
+ assert((wtf8Encode(codepoint, try result.addManyAsSlice(utf8_len)) catch unreachable) == utf8_len);
+ }
+ },
}
+}
+
+pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error;
+
+pub fn utf16LeToUtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void {
+ try result.ensureTotalCapacityPrecise(utf16le.len);
+ return utf16LeToUtf8ArrayListImpl(result, utf16le, .cannot_encode_surrogate_half);
+}
+
+/// Deprecated; renamed to utf16LeToUtf8Alloc
+pub const utf16leToUtf8Alloc = utf16LeToUtf8Alloc;
+
+/// Caller must free returned memory.
+pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![]u8 {
+ // optimistically guess that it will all be ascii.
+ var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len);
+ errdefer result.deinit();
+
+ try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half);
+ return result.toOwnedSlice();
+}
+
+/// Deprecated; renamed to utf16LeToUtf8AllocZ
+pub const utf16leToUtf8AllocZ = utf16LeToUtf8AllocZ;
+
+/// Caller must free returned memory.
+pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![:0]u8 {
+ // optimistically guess that it will all be ascii (and allocate space for the null terminator)
+ var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1);
+ errdefer result.deinit();
+
+ try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half);
return result.toOwnedSliceSentinel(0);
}
+pub const Utf16LeToUtf8Error = Utf16LeIterator.NextCodepointError;
+
/// Asserts that the output buffer is big enough.
/// Returns end byte index into utf8.
-pub fn utf16leToUtf8(utf8: []u8, utf16le: []const u16) !usize {
- var end_index: usize = 0;
+fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surrogates) (switch (surrogates) {
+ .cannot_encode_surrogate_half => Utf16LeToUtf8Error,
+ .can_encode_surrogate_half => error{},
+})!usize {
+ var dest_index: usize = 0;
var remaining = utf16le;
- if (builtin.zig_backend != .stage2_x86_64) {
- const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
+ vectorized: {
+ const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;
const Chunk = @Vector(chunk_len, u16);
// Fast path. Check for and encode ASCII characters at the start of the input.
while (remaining.len >= chunk_len) {
const chunk: Chunk = remaining[0..chunk_len].*;
- const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));
+ const mask: Chunk = @splat(mem.nativeToLittle(u16, 0x7F));
if (@reduce(.Or, chunk | mask != mask)) {
// found a non ASCII code unit
break;
}
- const chunk_byte_len = chunk_len * 2;
- const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;
- const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);
- const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];
- @memcpy(utf8[end_index .. end_index + chunk_len], &ascii_bytes);
- end_index += chunk_len;
+ const ascii_chunk: @Vector(chunk_len, u8) = @truncate(mem.nativeToLittle(Chunk, chunk));
+ utf8[dest_index..][0..chunk_len].* = ascii_chunk;
+ dest_index += chunk_len;
remaining = remaining[chunk_len..];
}
}
- var it = Utf16LeIterator.init(remaining);
- while (try it.nextCodepoint()) |codepoint| {
- end_index += try utf8Encode(codepoint, utf8[end_index..]);
+ switch (surrogates) {
+ .cannot_encode_surrogate_half => {
+ var it = Utf16LeIterator.init(remaining);
+ while (try it.nextCodepoint()) |codepoint| {
+ dest_index += utf8Encode(codepoint, utf8[dest_index..]) catch |err| switch (err) {
+ // The maximum possible codepoint encoded by UTF-16 is U+10FFFF,
+ // which is within the valid codepoint range.
+ error.CodepointTooLarge => unreachable,
+ // We know the codepoint was valid in UTF-16, meaning it is not
+ // an unpaired surrogate codepoint.
+ error.Utf8CannotEncodeSurrogateHalf => unreachable,
+ };
+ }
+ },
+ .can_encode_surrogate_half => {
+ var it = Wtf16LeIterator.init(remaining);
+ while (it.nextCodepoint()) |codepoint| {
+ dest_index += wtf8Encode(codepoint, utf8[dest_index..]) catch |err| switch (err) {
+ // The maximum possible codepoint encoded by UTF-16 is U+10FFFF,
+ // which is within the valid codepoint range.
+ error.CodepointTooLarge => unreachable,
+ };
+ }
+ },
}
- return end_index;
+ return dest_index;
+}
+
+/// Deprecated; renamed to utf16LeToUtf8
+pub const utf16leToUtf8 = utf16LeToUtf8;
+
+pub fn utf16LeToUtf8(utf8: []u8, utf16le: []const u16) Utf16LeToUtf8Error!usize {
+ return utf16LeToUtf8Impl(utf8, utf16le, .cannot_encode_surrogate_half);
}
-test "utf16leToUtf8" {
+test utf16LeToUtf8 {
var utf16le: [2]u16 = undefined;
const utf16le_as_bytes = mem.sliceAsBytes(utf16le[0..]);
{
mem.writeInt(u16, utf16le_as_bytes[0..2], 'A', .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 'a', .little);
- const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
- defer std.testing.allocator.free(utf8);
+ const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
+ defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "Aa"));
}
{
mem.writeInt(u16, utf16le_as_bytes[0..2], 0x80, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xffff, .little);
- const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
- defer std.testing.allocator.free(utf8);
+ const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
+ defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "\xc2\x80" ++ "\xef\xbf\xbf"));
}
@@ -914,8 +1098,8 @@ test "utf16leToUtf8" {
// the values just outside the surrogate half range
mem.writeInt(u16, utf16le_as_bytes[0..2], 0xd7ff, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xe000, .little);
- const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
- defer std.testing.allocator.free(utf8);
+ const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
+ defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "\xed\x9f\xbf" ++ "\xee\x80\x80"));
}
@@ -923,8 +1107,8 @@ test "utf16leToUtf8" {
// smallest surrogate pair
mem.writeInt(u16, utf16le_as_bytes[0..2], 0xd800, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdc00, .little);
- const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
- defer std.testing.allocator.free(utf8);
+ const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
+ defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "\xf0\x90\x80\x80"));
}
@@ -932,36 +1116,33 @@ test "utf16leToUtf8" {
// largest surrogate pair
mem.writeInt(u16, utf16le_as_bytes[0..2], 0xdbff, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdfff, .little);
- const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
- defer std.testing.allocator.free(utf8);
+ const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
+ defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "\xf4\x8f\xbf\xbf"));
}
{
mem.writeInt(u16, utf16le_as_bytes[0..2], 0xdbff, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdc00, .little);
- const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
- defer std.testing.allocator.free(utf8);
+ const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
+ defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "\xf4\x8f\xb0\x80"));
}
{
mem.writeInt(u16, utf16le_as_bytes[0..2], 0xdcdc, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdcdc, .little);
- const result = utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
- try std.testing.expectError(error.UnexpectedSecondSurrogateHalf, result);
+ const result = utf16LeToUtf8Alloc(testing.allocator, &utf16le);
+ try testing.expectError(error.UnexpectedSecondSurrogateHalf, result);
}
}
-pub fn utf8ToUtf16LeWithNull(allocator: mem.Allocator, utf8: []const u8) ![:0]u16 {
- // optimistically guess that it will not require surrogate pairs
- var result = try std.ArrayList(u16).initCapacity(allocator, utf8.len + 1);
- errdefer result.deinit();
+fn utf8ToUtf16LeArrayListImpl(result: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void {
+ assert(result.capacity >= utf8.len);
var remaining = utf8;
- // Need support for std.simd.interlace
- if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) {
- const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
+ vectorized: {
+ const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;
const Chunk = @Vector(chunk_len, u8);
// Fast path. Check for and encode ASCII characters at the start of the input.
@@ -972,41 +1153,66 @@ pub fn utf8ToUtf16LeWithNull(allocator: mem.Allocator, utf8: []const u8) ![:0]u1
// found a non ASCII code unit
break;
}
- const zeroes: Chunk = @splat(0);
- const utf16_chunk: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes });
- result.appendSliceAssumeCapacity(std.mem.bytesAsSlice(u16, &utf16_chunk));
+ const utf16_chunk = mem.nativeToLittle(@Vector(chunk_len, u16), chunk);
+ result.addManyAsArrayAssumeCapacity(chunk_len).* = utf16_chunk;
remaining = remaining[chunk_len..];
}
}
- const view = try Utf8View.init(remaining);
+ const view = switch (surrogates) {
+ .cannot_encode_surrogate_half => try Utf8View.init(remaining),
+ .can_encode_surrogate_half => try Wtf8View.init(remaining),
+ };
var it = view.iterator();
while (it.nextCodepoint()) |codepoint| {
if (codepoint < 0x10000) {
- const short = @as(u16, @intCast(codepoint));
- try result.append(mem.nativeToLittle(u16, short));
+ try result.append(mem.nativeToLittle(u16, @intCast(codepoint)));
} else {
const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800;
const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00;
- var out: [2]u16 = undefined;
- out[0] = mem.nativeToLittle(u16, high);
- out[1] = mem.nativeToLittle(u16, low);
- try result.appendSlice(out[0..]);
+ try result.appendSlice(&.{ mem.nativeToLittle(u16, high), mem.nativeToLittle(u16, low) });
}
}
+}
+
+pub fn utf8ToUtf16LeArrayList(result: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void {
+ try result.ensureTotalCapacityPrecise(utf8.len);
+ return utf8ToUtf16LeArrayListImpl(result, utf8, .cannot_encode_surrogate_half);
+}
+
+pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![]u16 {
+ // optimistically guess that it will not require surrogate pairs
+ var result = try std.ArrayList(u16).initCapacity(allocator, utf8.len);
+ errdefer result.deinit();
+ try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half);
+ return result.toOwnedSlice();
+}
+
+/// Deprecated; renamed to utf8ToUtf16LeAllocZ
+pub const utf8ToUtf16LeWithNull = utf8ToUtf16LeAllocZ;
+
+pub fn utf8ToUtf16LeAllocZ(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![:0]u16 {
+ // optimistically guess that it will not require surrogate pairs
+ var result = try std.ArrayList(u16).initCapacity(allocator, utf8.len + 1);
+ errdefer result.deinit();
+
+ try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half);
return result.toOwnedSliceSentinel(0);
}
/// Returns index of next character. If exact fit, returned index equals output slice length.
/// Assumes there is enough space for the output.
-pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {
- var dest_i: usize = 0;
+pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) error{InvalidUtf8}!usize {
+ return utf8ToUtf16LeImpl(utf16le, utf8, .cannot_encode_surrogate_half);
+}
+
+pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: Surrogates) !usize {
+ var dest_index: usize = 0;
var remaining = utf8;
- // Need support for std.simd.interlace
- if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) {
- const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
+ vectorized: {
+ const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;
const Chunk = @Vector(chunk_len, u8);
// Fast path. Check for and encode ASCII characters at the start of the input.
@@ -1017,76 +1223,138 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {
// found a non ASCII code unit
break;
}
- const zeroes: Chunk = @splat(0);
- const utf16_bytes: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes });
- @memcpy(utf16le[dest_i..][0..chunk_len], std.mem.bytesAsSlice(u16, &utf16_bytes));
- dest_i += chunk_len;
+ const utf16_chunk = mem.nativeToLittle(@Vector(chunk_len, u16), chunk);
+ utf16le[dest_index..][0..chunk_len].* = utf16_chunk;
+ dest_index += chunk_len;
remaining = remaining[chunk_len..];
}
}
- var src_i: usize = 0;
- while (src_i < remaining.len) {
- const n = utf8ByteSequenceLength(remaining[src_i]) catch return error.InvalidUtf8;
- const next_src_i = src_i + n;
- const codepoint = utf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidUtf8;
+ const view = switch (surrogates) {
+ .cannot_encode_surrogate_half => try Utf8View.init(remaining),
+ .can_encode_surrogate_half => try Wtf8View.init(remaining),
+ };
+ var it = view.iterator();
+ while (it.nextCodepoint()) |codepoint| {
if (codepoint < 0x10000) {
- const short = @as(u16, @intCast(codepoint));
- utf16le[dest_i] = mem.nativeToLittle(u16, short);
- dest_i += 1;
+ utf16le[dest_index] = mem.nativeToLittle(u16, @intCast(codepoint));
+ dest_index += 1;
} else {
const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800;
const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00;
- utf16le[dest_i] = mem.nativeToLittle(u16, high);
- utf16le[dest_i + 1] = mem.nativeToLittle(u16, low);
- dest_i += 2;
+ utf16le[dest_index..][0..2].* = .{ mem.nativeToLittle(u16, high), mem.nativeToLittle(u16, low) };
+ dest_index += 2;
}
- src_i = next_src_i;
}
- return dest_i;
+ return dest_index;
}
test "utf8ToUtf16Le" {
- var utf16le: [2]u16 = [_]u16{0} ** 2;
+ var utf16le: [128]u16 = undefined;
{
const length = try utf8ToUtf16Le(utf16le[0..], "𐐷");
- try testing.expectEqual(@as(usize, 2), length);
- try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16le[0..]));
+ try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16le[0..length]));
}
{
const length = try utf8ToUtf16Le(utf16le[0..], "\u{10FFFF}");
- try testing.expectEqual(@as(usize, 2), length);
- try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..]));
+ try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..length]));
}
{
const result = utf8ToUtf16Le(utf16le[0..], "\xf4\x90\x80\x80");
try testing.expectError(error.InvalidUtf8, result);
}
+ {
+ const length = try utf8ToUtf16Le(utf16le[0..], "This string has been designed to test the vectorized implementat" ++
+ "ion by beginning with one hundred twenty-seven ASCII characters¡");
+ try testing.expectEqualSlices(u8, &.{
+ 'T', 0, 'h', 0, 'i', 0, 's', 0, ' ', 0, 's', 0, 't', 0, 'r', 0, 'i', 0, 'n', 0, 'g', 0, ' ', 0, 'h', 0, 'a', 0, 's', 0, ' ', 0,
+ 'b', 0, 'e', 0, 'e', 0, 'n', 0, ' ', 0, 'd', 0, 'e', 0, 's', 0, 'i', 0, 'g', 0, 'n', 0, 'e', 0, 'd', 0, ' ', 0, 't', 0, 'o', 0,
+ ' ', 0, 't', 0, 'e', 0, 's', 0, 't', 0, ' ', 0, 't', 0, 'h', 0, 'e', 0, ' ', 0, 'v', 0, 'e', 0, 'c', 0, 't', 0, 'o', 0, 'r', 0,
+ 'i', 0, 'z', 0, 'e', 0, 'd', 0, ' ', 0, 'i', 0, 'm', 0, 'p', 0, 'l', 0, 'e', 0, 'm', 0, 'e', 0, 'n', 0, 't', 0, 'a', 0, 't', 0,
+ 'i', 0, 'o', 0, 'n', 0, ' ', 0, 'b', 0, 'y', 0, ' ', 0, 'b', 0, 'e', 0, 'g', 0, 'i', 0, 'n', 0, 'n', 0, 'i', 0, 'n', 0, 'g', 0,
+ ' ', 0, 'w', 0, 'i', 0, 't', 0, 'h', 0, ' ', 0, 'o', 0, 'n', 0, 'e', 0, ' ', 0, 'h', 0, 'u', 0, 'n', 0, 'd', 0, 'r', 0, 'e', 0,
+ 'd', 0, ' ', 0, 't', 0, 'w', 0, 'e', 0, 'n', 0, 't', 0, 'y', 0, '-', 0, 's', 0, 'e', 0, 'v', 0, 'e', 0, 'n', 0, ' ', 0, 'A', 0,
+ 'S', 0, 'C', 0, 'I', 0, 'I', 0, ' ', 0, 'c', 0, 'h', 0, 'a', 0, 'r', 0, 'a', 0, 'c', 0, 't', 0, 'e', 0, 'r', 0, 's', 0, '¡', 0,
+ }, mem.sliceAsBytes(utf16le[0..length]));
+ }
+}
+
+test utf8ToUtf16LeArrayList {
+ {
+ var list = std.ArrayList(u16).init(testing.allocator);
+ defer list.deinit();
+ try utf8ToUtf16LeArrayList(&list, "𐐷");
+ try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(list.items));
+ }
+ {
+ var list = std.ArrayList(u16).init(testing.allocator);
+ defer list.deinit();
+ try utf8ToUtf16LeArrayList(&list, "\u{10FFFF}");
+ try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(list.items));
+ }
+ {
+ var list = std.ArrayList(u16).init(testing.allocator);
+ defer list.deinit();
+ const result = utf8ToUtf16LeArrayList(&list, "\xf4\x90\x80\x80");
+ try testing.expectError(error.InvalidUtf8, result);
+ }
}
-test "utf8ToUtf16LeWithNull" {
+test utf8ToUtf16LeAlloc {
{
- const utf16 = try utf8ToUtf16LeWithNull(testing.allocator, "𐐷");
+ const utf16 = try utf8ToUtf16LeAlloc(testing.allocator, "𐐷");
defer testing.allocator.free(utf16);
try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16[0..]));
- try testing.expect(utf16[2] == 0);
}
{
- const utf16 = try utf8ToUtf16LeWithNull(testing.allocator, "\u{10FFFF}");
+ const utf16 = try utf8ToUtf16LeAlloc(testing.allocator, "\u{10FFFF}");
defer testing.allocator.free(utf16);
try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..]));
+ }
+ {
+ const result = utf8ToUtf16LeAlloc(testing.allocator, "\xf4\x90\x80\x80");
+ try testing.expectError(error.InvalidUtf8, result);
+ }
+}
+
+test utf8ToUtf16LeAllocZ {
+ {
+ const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "𐐷");
+ defer testing.allocator.free(utf16);
+ try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16));
+ try testing.expect(utf16[2] == 0);
+ }
+ {
+ const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "\u{10FFFF}");
+ defer testing.allocator.free(utf16);
+ try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16));
try testing.expect(utf16[2] == 0);
}
{
- const result = utf8ToUtf16LeWithNull(testing.allocator, "\xf4\x90\x80\x80");
+ const result = utf8ToUtf16LeAllocZ(testing.allocator, "\xf4\x90\x80\x80");
try testing.expectError(error.InvalidUtf8, result);
}
+ {
+ const utf16 = try utf8ToUtf16LeWithNull(testing.allocator, "This string has been designed to test the vectorized implementat" ++
+ "ion by beginning with one hundred twenty-seven ASCII characters¡");
+ defer testing.allocator.free(utf16);
+ try testing.expectEqualSlices(u8, &.{
+ 'T', 0, 'h', 0, 'i', 0, 's', 0, ' ', 0, 's', 0, 't', 0, 'r', 0, 'i', 0, 'n', 0, 'g', 0, ' ', 0, 'h', 0, 'a', 0, 's', 0, ' ', 0,
+ 'b', 0, 'e', 0, 'e', 0, 'n', 0, ' ', 0, 'd', 0, 'e', 0, 's', 0, 'i', 0, 'g', 0, 'n', 0, 'e', 0, 'd', 0, ' ', 0, 't', 0, 'o', 0,
+ ' ', 0, 't', 0, 'e', 0, 's', 0, 't', 0, ' ', 0, 't', 0, 'h', 0, 'e', 0, ' ', 0, 'v', 0, 'e', 0, 'c', 0, 't', 0, 'o', 0, 'r', 0,
+ 'i', 0, 'z', 0, 'e', 0, 'd', 0, ' ', 0, 'i', 0, 'm', 0, 'p', 0, 'l', 0, 'e', 0, 'm', 0, 'e', 0, 'n', 0, 't', 0, 'a', 0, 't', 0,
+ 'i', 0, 'o', 0, 'n', 0, ' ', 0, 'b', 0, 'y', 0, ' ', 0, 'b', 0, 'e', 0, 'g', 0, 'i', 0, 'n', 0, 'n', 0, 'i', 0, 'n', 0, 'g', 0,
+ ' ', 0, 'w', 0, 'i', 0, 't', 0, 'h', 0, ' ', 0, 'o', 0, 'n', 0, 'e', 0, ' ', 0, 'h', 0, 'u', 0, 'n', 0, 'd', 0, 'r', 0, 'e', 0,
+ 'd', 0, ' ', 0, 't', 0, 'w', 0, 'e', 0, 'n', 0, 't', 0, 'y', 0, '-', 0, 's', 0, 'e', 0, 'v', 0, 'e', 0, 'n', 0, ' ', 0, 'A', 0,
+ 'S', 0, 'C', 0, 'I', 0, 'I', 0, ' ', 0, 'c', 0, 'h', 0, 'a', 0, 'r', 0, 'a', 0, 'c', 0, 't', 0, 'e', 0, 'r', 0, 's', 0, '¡', 0,
+ }, mem.sliceAsBytes(utf16));
+ }
}
/// Converts a UTF-8 string literal into a UTF-16LE string literal.
-pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) catch unreachable:0]u16 {
+pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) catch |err| @compileError(err):0]u16 {
return comptime blk: {
- const len: usize = calcUtf16LeLen(utf8) catch |err| @compileError(err);
+ const len: usize = calcUtf16LeLen(utf8) catch unreachable;
var utf16le: [len:0]u16 = [_:0]u16{0} ** len;
const utf16le_len = utf8ToUtf16Le(&utf16le, utf8[0..]) catch |err| @compileError(err);
assert(len == utf16le_len);
@@ -1127,8 +1395,9 @@ test "calculate utf16 string length of given utf8 string in u16" {
try comptime testCalcUtf16LeLen();
}
-/// Print the given `utf16le` string
-fn formatUtf16le(
+/// Print the given `utf16le` string, encoded as UTF-8 bytes.
+/// Unpaired surrogates are replaced by the replacement character (U+FFFD).
+fn formatUtf16Le(
utf16le: []const u16,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
@@ -1136,13 +1405,14 @@ fn formatUtf16le(
) !void {
_ = fmt;
_ = options;
- var buf: [300]u8 = undefined; // just a random size I chose
+ var buf: [300]u8 = undefined; // just an arbitrary size
var it = Utf16LeIterator.init(utf16le);
var u8len: usize = 0;
while (it.nextCodepoint() catch replacement_character) |codepoint| {
u8len += utf8Encode(codepoint, buf[u8len..]) catch
utf8Encode(replacement_character, buf[u8len..]) catch unreachable;
- if (u8len + 3 >= buf.len) {
+ // make sure there's always enough room for another maximum length UTF-8 codepoint
+ if (u8len + 4 > buf.len) {
try writer.writeAll(buf[0..u8len]);
u8len = 0;
}
@@ -1150,22 +1420,27 @@ fn formatUtf16le(
try writer.writeAll(buf[0..u8len]);
}
-/// Return a Formatter for a Utf16le string
-pub fn fmtUtf16le(utf16le: []const u16) std.fmt.Formatter(formatUtf16le) {
+/// Deprecated; renamed to fmtUtf16Le
+pub const fmtUtf16le = fmtUtf16Le;
+
+/// Return a Formatter for a (potentially ill-formed) UTF-16 LE string,
+/// which will be converted to UTF-8 during formatting.
+/// Unpaired surrogates are replaced by the replacement character (U+FFFD).
+pub fn fmtUtf16Le(utf16le: []const u16) std.fmt.Formatter(formatUtf16Le) {
return .{ .data = utf16le };
}
-test "fmtUtf16le" {
- const expectFmt = std.testing.expectFmt;
- try expectFmt("", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral(""))});
- try expectFmt("foo", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral("foo"))});
- try expectFmt("𐐷", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral("𐐷"))});
- try expectFmt("퟿", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\xff\xd7", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\x00\xd8", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\xff\xdb", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\x00\xdc", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\xff\xdf", native_endian)})});
- try expectFmt("", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\x00\xe0", native_endian)})});
+test "fmtUtf16Le" {
+ const expectFmt = testing.expectFmt;
+ try expectFmt("", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))});
+ try expectFmt("foo", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))});
+ try expectFmt("𐐷", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("𐐷"))});
+ try expectFmt("퟿", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xd7", native_endian)})});
+ try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xd8", native_endian)})});
+ try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdb", native_endian)})});
+ try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xdc", native_endian)})});
+ try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdf", native_endian)})});
+ try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xe0", native_endian)})});
}
test "utf8ToUtf16LeStringLiteral" {
@@ -1248,3 +1523,533 @@ test "utf8 valid codepoint" {
try testUtf8ValidCodepoint();
try comptime testUtf8ValidCodepoint();
}
+
+/// Returns true if the codepoint is a surrogate (U+DC00 to U+DFFF)
+pub fn isSurrogateCodepoint(c: u21) bool {
+ return switch (c) {
+ 0xD800...0xDFFF => true,
+ else => false,
+ };
+}
+
+/// Encodes the given codepoint into a WTF-8 byte sequence.
+/// c: the codepoint.
+/// out: the out buffer to write to. Must have a len >= utf8CodepointSequenceLength(c).
+/// Errors: if c cannot be encoded in WTF-8.
+/// Returns: the number of bytes written to out.
+pub fn wtf8Encode(c: u21, out: []u8) error{CodepointTooLarge}!u3 {
+ return utf8EncodeImpl(c, out, .can_encode_surrogate_half);
+}
+
+const Wtf8DecodeError = Utf8Decode2Error || Utf8Decode3AllowSurrogateHalfError || Utf8Decode4Error;
+
+pub fn wtf8Decode(bytes: []const u8) Wtf8DecodeError!u21 {
+ return switch (bytes.len) {
+ 1 => @as(u21, bytes[0]),
+ 2 => utf8Decode2(bytes),
+ 3 => utf8Decode3AllowSurrogateHalf(bytes),
+ 4 => utf8Decode4(bytes),
+ else => unreachable,
+ };
+}
+
+/// Returns true if the input consists entirely of WTF-8 codepoints
+/// (all the same restrictions as UTF-8, but allows surrogate codepoints
+/// U+D800 to U+DFFF).
+/// Does not check for well-formed WTF-8, meaning that this function
+/// does not check that all surrogate halves are unpaired.
+pub fn wtf8ValidateSlice(input: []const u8) bool {
+ return utf8ValidateSliceImpl(input, .can_encode_surrogate_half);
+}
+
+test "validate WTF-8 slice" {
+ try testValidateWtf8Slice();
+ try comptime testValidateWtf8Slice();
+
+ // We skip a variable (based on recommended vector size) chunks of
+ // ASCII characters. Let's make sure we're chunking correctly.
+ const str = [_]u8{'a'} ** 550 ++ "\xc0";
+ for (0..str.len - 3) |i| {
+ try testing.expect(!wtf8ValidateSlice(str[i..]));
+ }
+}
+fn testValidateWtf8Slice() !void {
+ // These are valid/invalid under both UTF-8 and WTF-8 rules.
+ try testing.expect(wtf8ValidateSlice("abc"));
+ try testing.expect(wtf8ValidateSlice("abc\xdf\xbf"));
+ try testing.expect(wtf8ValidateSlice(""));
+ try testing.expect(wtf8ValidateSlice("a"));
+ try testing.expect(wtf8ValidateSlice("abc"));
+ try testing.expect(wtf8ValidateSlice("Ж"));
+ try testing.expect(wtf8ValidateSlice("ЖЖ"));
+ try testing.expect(wtf8ValidateSlice("брэд-ЛГТМ"));
+ try testing.expect(wtf8ValidateSlice("☺☻☹"));
+ try testing.expect(wtf8ValidateSlice("a\u{fffdb}"));
+ try testing.expect(wtf8ValidateSlice("\xf4\x8f\xbf\xbf"));
+ try testing.expect(wtf8ValidateSlice("abc\xdf\xbf"));
+
+ try testing.expect(!wtf8ValidateSlice("abc\xc0"));
+ try testing.expect(!wtf8ValidateSlice("abc\xc0abc"));
+ try testing.expect(!wtf8ValidateSlice("aa\xe2"));
+ try testing.expect(!wtf8ValidateSlice("\x42\xfa"));
+ try testing.expect(!wtf8ValidateSlice("\x42\xfa\x43"));
+ try testing.expect(!wtf8ValidateSlice("abc\xc0"));
+ try testing.expect(!wtf8ValidateSlice("abc\xc0abc"));
+ try testing.expect(!wtf8ValidateSlice("\xf4\x90\x80\x80"));
+ try testing.expect(!wtf8ValidateSlice("\xf7\xbf\xbf\xbf"));
+ try testing.expect(!wtf8ValidateSlice("\xfb\xbf\xbf\xbf\xbf"));
+ try testing.expect(!wtf8ValidateSlice("\xc0\x80"));
+
+ // But surrogate codepoints are only valid in WTF-8.
+ try testing.expect(wtf8ValidateSlice("\xed\xa0\x80"));
+ try testing.expect(wtf8ValidateSlice("\xed\xbf\xbf"));
+}
+
+/// Wtf8View iterates the code points of a WTF-8 encoded string,
+/// including surrogate halves.
+///
+/// ```
+/// var wtf8 = (try std.unicode.Wtf8View.init("hi there")).iterator();
+/// while (wtf8.nextCodepointSlice()) |codepoint| {
+/// // note: codepoint could be a surrogate half which is invalid
+/// // UTF-8, avoid printing or otherwise sending/emitting this directly
+/// }
+/// ```
+pub const Wtf8View = struct {
+ bytes: []const u8,
+
+ pub fn init(s: []const u8) error{InvalidWtf8}!Wtf8View {
+ if (!wtf8ValidateSlice(s)) {
+ return error.InvalidWtf8;
+ }
+
+ return initUnchecked(s);
+ }
+
+ pub fn initUnchecked(s: []const u8) Wtf8View {
+ return Wtf8View{ .bytes = s };
+ }
+
+ pub inline fn initComptime(comptime s: []const u8) Wtf8View {
+ return comptime if (init(s)) |r| r else |err| switch (err) {
+ error.InvalidWtf8 => {
+ @compileError("invalid wtf8");
+ },
+ };
+ }
+
+ pub fn iterator(s: Wtf8View) Wtf8Iterator {
+ return Wtf8Iterator{
+ .bytes = s.bytes,
+ .i = 0,
+ };
+ }
+};
+
+/// Asserts that `bytes` is valid WTF-8
+pub const Wtf8Iterator = struct {
+ bytes: []const u8,
+ i: usize,
+
+ pub fn nextCodepointSlice(it: *Wtf8Iterator) ?[]const u8 {
+ if (it.i >= it.bytes.len) {
+ return null;
+ }
+
+ const cp_len = utf8ByteSequenceLength(it.bytes[it.i]) catch unreachable;
+ it.i += cp_len;
+ return it.bytes[it.i - cp_len .. it.i];
+ }
+
+ pub fn nextCodepoint(it: *Wtf8Iterator) ?u21 {
+ const slice = it.nextCodepointSlice() orelse return null;
+ return wtf8Decode(slice) catch unreachable;
+ }
+
+ /// Look ahead at the next n codepoints without advancing the iterator.
+ /// If fewer than n codepoints are available, then return the remainder of the string.
+ pub fn peek(it: *Wtf8Iterator, n: usize) []const u8 {
+ const original_i = it.i;
+ defer it.i = original_i;
+
+ var end_ix = original_i;
+ var found: usize = 0;
+ while (found < n) : (found += 1) {
+ const next_codepoint = it.nextCodepointSlice() orelse return it.bytes[original_i..];
+ end_ix += next_codepoint.len;
+ }
+
+ return it.bytes[original_i..end_ix];
+ }
+};
+
+pub fn wtf16LeToWtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void {
+ try result.ensureTotalCapacityPrecise(utf16le.len);
+ return utf16LeToUtf8ArrayListImpl(result, utf16le, .can_encode_surrogate_half);
+}
+
+/// Caller must free returned memory.
+pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) mem.Allocator.Error![]u8 {
+ // optimistically guess that it will all be ascii.
+ var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len);
+ errdefer result.deinit();
+
+ try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half);
+ return result.toOwnedSlice();
+}
+
+/// Caller must free returned memory.
+pub fn wtf16LeToWtf8AllocZ(allocator: mem.Allocator, wtf16le: []const u16) mem.Allocator.Error![:0]u8 {
+ // optimistically guess that it will all be ascii (and allocate space for the null terminator)
+ var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len + 1);
+ errdefer result.deinit();
+
+ try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half);
+ return result.toOwnedSliceSentinel(0);
+}
+
+pub fn wtf16LeToWtf8(wtf8: []u8, wtf16le: []const u16) usize {
+ return utf16LeToUtf8Impl(wtf8, wtf16le, .can_encode_surrogate_half) catch |err| switch (err) {};
+}
+
+pub fn wtf8ToWtf16LeArrayList(result: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void {
+ try result.ensureTotalCapacityPrecise(wtf8.len);
+ return utf8ToUtf16LeArrayListImpl(result, wtf8, .can_encode_surrogate_half);
+}
+
+pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u16 {
+ // optimistically guess that it will not require surrogate pairs
+ var result = try std.ArrayList(u16).initCapacity(allocator, wtf8.len);
+ errdefer result.deinit();
+
+ try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half);
+ return result.toOwnedSlice();
+}
+
+pub fn wtf8ToWtf16LeAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u16 {
+ // optimistically guess that it will not require surrogate pairs
+ var result = try std.ArrayList(u16).initCapacity(allocator, wtf8.len + 1);
+ errdefer result.deinit();
+
+ try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half);
+ return result.toOwnedSliceSentinel(0);
+}
+
+/// Returns index of next character. If exact fit, returned index equals output slice length.
+/// Assumes there is enough space for the output.
+pub fn wtf8ToWtf16Le(wtf16le: []u16, wtf8: []const u8) error{InvalidWtf8}!usize {
+ return utf8ToUtf16LeImpl(wtf16le, wtf8, .can_encode_surrogate_half);
+}
+
+/// Surrogate codepoints (U+D800 to U+DFFF) are replaced by the Unicode replacement
+/// character (U+FFFD).
+/// All surrogate codepoints and the replacement character are encoded as three
+/// bytes, meaning the input and output slices will always be the same length.
+/// In-place conversion is supported when `utf8` and `wtf8` refer to the same slice.
+/// Note: If `wtf8` is entirely composed of well-formed UTF-8, then no conversion is necessary.
+/// `utf8ValidateSlice` can be used to check if lossy conversion is worthwhile.
+/// If `wtf8` is not valid WTF-8, then `error.InvalidWtf8` is returned.
+pub fn wtf8ToUtf8Lossy(utf8: []u8, wtf8: []const u8) error{InvalidWtf8}!void {
+ assert(utf8.len >= wtf8.len);
+
+ const in_place = utf8.ptr == wtf8.ptr;
+ const replacement_char_bytes = comptime blk: {
+ var buf: [3]u8 = undefined;
+ assert((utf8Encode(replacement_character, &buf) catch unreachable) == 3);
+ break :blk buf;
+ };
+
+ var dest_i: usize = 0;
+ const view = try Wtf8View.init(wtf8);
+ var it = view.iterator();
+ while (it.nextCodepointSlice()) |codepoint_slice| {
+ // All surrogate codepoints are encoded as 3 bytes
+ if (codepoint_slice.len == 3) {
+ const codepoint = wtf8Decode(codepoint_slice) catch unreachable;
+ if (isSurrogateCodepoint(codepoint)) {
+ @memcpy(utf8[dest_i..][0..replacement_char_bytes.len], &replacement_char_bytes);
+ dest_i += replacement_char_bytes.len;
+ continue;
+ }
+ }
+ if (!in_place) {
+ @memcpy(utf8[dest_i..][0..codepoint_slice.len], codepoint_slice);
+ }
+ dest_i += codepoint_slice.len;
+ }
+}
+
+pub fn wtf8ToUtf8LossyAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u8 {
+ const utf8 = try allocator.alloc(u8, wtf8.len);
+ errdefer allocator.free(utf8);
+
+ try wtf8ToUtf8Lossy(utf8, wtf8);
+
+ return utf8;
+}
+
+pub fn wtf8ToUtf8LossyAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u8 {
+ const utf8 = try allocator.allocSentinel(u8, wtf8.len, 0);
+ errdefer allocator.free(utf8);
+
+ try wtf8ToUtf8Lossy(utf8, wtf8);
+
+ return utf8;
+}
+
+test wtf8ToUtf8Lossy {
+ var buf: [32]u8 = undefined;
+
+ const invalid_utf8 = "\xff";
+ try testing.expectError(error.InvalidWtf8, wtf8ToUtf8Lossy(&buf, invalid_utf8));
+
+ const ascii = "abcd";
+ try wtf8ToUtf8Lossy(&buf, ascii);
+ try testing.expectEqualStrings("abcd", buf[0..ascii.len]);
+
+ const high_surrogate_half = "ab\xed\xa0\xbdcd";
+ try wtf8ToUtf8Lossy(&buf, high_surrogate_half);
+ try testing.expectEqualStrings("ab\u{FFFD}cd", buf[0..high_surrogate_half.len]);
+
+ const low_surrogate_half = "ab\xed\xb2\xa9cd";
+ try wtf8ToUtf8Lossy(&buf, low_surrogate_half);
+ try testing.expectEqualStrings("ab\u{FFFD}cd", buf[0..low_surrogate_half.len]);
+
+ // If the WTF-8 is not well-formed, each surrogate half is converted into a separate
+ // replacement character instead of being interpreted as a surrogate pair.
+ const encoded_surrogate_pair = "ab\xed\xa0\xbd\xed\xb2\xa9cd";
+ try wtf8ToUtf8Lossy(&buf, encoded_surrogate_pair);
+ try testing.expectEqualStrings("ab\u{FFFD}\u{FFFD}cd", buf[0..encoded_surrogate_pair.len]);
+
+ // in place
+ @memcpy(buf[0..low_surrogate_half.len], low_surrogate_half);
+ const slice = buf[0..low_surrogate_half.len];
+ try wtf8ToUtf8Lossy(slice, slice);
+ try testing.expectEqualStrings("ab\u{FFFD}cd", slice);
+}
+
+test wtf8ToUtf8LossyAlloc {
+ const invalid_utf8 = "\xff";
+ try testing.expectError(error.InvalidWtf8, wtf8ToUtf8LossyAlloc(testing.allocator, invalid_utf8));
+
+ {
+ const ascii = "abcd";
+ const utf8 = try wtf8ToUtf8LossyAlloc(testing.allocator, ascii);
+ defer testing.allocator.free(utf8);
+ try testing.expectEqualStrings("abcd", utf8);
+ }
+
+ {
+ const surrogate_half = "ab\xed\xa0\xbdcd";
+ const utf8 = try wtf8ToUtf8LossyAlloc(testing.allocator, surrogate_half);
+ defer testing.allocator.free(utf8);
+ try testing.expectEqualStrings("ab\u{FFFD}cd", utf8);
+ }
+
+ {
+ // If the WTF-8 is not well-formed, each surrogate half is converted into a separate
+ // replacement character instead of being interpreted as a surrogate pair.
+ const encoded_surrogate_pair = "ab\xed\xa0\xbd\xed\xb2\xa9cd";
+ const utf8 = try wtf8ToUtf8LossyAlloc(testing.allocator, encoded_surrogate_pair);
+ defer testing.allocator.free(utf8);
+ try testing.expectEqualStrings("ab\u{FFFD}\u{FFFD}cd", utf8);
+ }
+}
+
+test wtf8ToUtf8LossyAllocZ {
+ const invalid_utf8 = "\xff";
+ try testing.expectError(error.InvalidWtf8, wtf8ToUtf8LossyAllocZ(testing.allocator, invalid_utf8));
+
+ {
+ const ascii = "abcd";
+ const utf8 = try wtf8ToUtf8LossyAllocZ(testing.allocator, ascii);
+ defer testing.allocator.free(utf8);
+ try testing.expectEqualStrings("abcd", utf8);
+ }
+
+ {
+ const surrogate_half = "ab\xed\xa0\xbdcd";
+ const utf8 = try wtf8ToUtf8LossyAllocZ(testing.allocator, surrogate_half);
+ defer testing.allocator.free(utf8);
+ try testing.expectEqualStrings("ab\u{FFFD}cd", utf8);
+ }
+
+ {
+ // If the WTF-8 is not well-formed, each surrogate half is converted into a separate
+ // replacement character instead of being interpreted as a surrogate pair.
+ const encoded_surrogate_pair = "ab\xed\xa0\xbd\xed\xb2\xa9cd";
+ const utf8 = try wtf8ToUtf8LossyAllocZ(testing.allocator, encoded_surrogate_pair);
+ defer testing.allocator.free(utf8);
+ try testing.expectEqualStrings("ab\u{FFFD}\u{FFFD}cd", utf8);
+ }
+}
+
+pub const Wtf16LeIterator = struct {
+ bytes: []const u8,
+ i: usize,
+
+ pub fn init(s: []const u16) Wtf16LeIterator {
+ return Wtf16LeIterator{
+ .bytes = mem.sliceAsBytes(s),
+ .i = 0,
+ };
+ }
+
+ /// If the next codepoint is encoded by a surrogate pair, returns the
+ /// codepoint that the surrogate pair represents.
+ /// If the next codepoint is an unpaired surrogate, returns the codepoint
+ /// of the unpaired surrogate.
+ pub fn nextCodepoint(it: *Wtf16LeIterator) ?u21 {
+ assert(it.i <= it.bytes.len);
+ if (it.i == it.bytes.len) return null;
+ var code_units: [2]u16 = undefined;
+ code_units[0] = mem.readInt(u16, it.bytes[it.i..][0..2], .little);
+ it.i += 2;
+ surrogate_pair: {
+ if (utf16IsHighSurrogate(code_units[0])) {
+ if (it.i >= it.bytes.len) break :surrogate_pair;
+ code_units[1] = mem.readInt(u16, it.bytes[it.i..][0..2], .little);
+ const codepoint = utf16DecodeSurrogatePair(&code_units) catch break :surrogate_pair;
+ it.i += 2;
+ return codepoint;
+ }
+ }
+ return code_units[0];
+ }
+};
+
+test "non-well-formed WTF-8 does not roundtrip" {
+ // This encodes the surrogate pair U+D83D U+DCA9.
+ // The well-formed version of this would be U+1F4A9 which is \xF0\x9F\x92\xA9.
+ const non_well_formed_wtf8 = "\xed\xa0\xbd\xed\xb2\xa9";
+
+ var wtf16_buf: [2]u16 = undefined;
+ const wtf16_len = try wtf8ToWtf16Le(&wtf16_buf, non_well_formed_wtf8);
+ const wtf16 = wtf16_buf[0..wtf16_len];
+
+ try testing.expectEqualSlices(u16, &[_]u16{
+ mem.nativeToLittle(u16, 0xD83D), // high surrogate
+ mem.nativeToLittle(u16, 0xDCA9), // low surrogate
+ }, wtf16);
+
+ var wtf8_buf: [4]u8 = undefined;
+ const wtf8_len = wtf16LeToWtf8(&wtf8_buf, wtf16);
+ const wtf8 = wtf8_buf[0..wtf8_len];
+
+ // Converting to WTF-16 and back results in well-formed WTF-8,
+ // but it does not match the input WTF-8
+ try testing.expectEqualSlices(u8, "\xf0\x9f\x92\xa9", wtf8);
+}
+
+fn testRoundtripWtf8(wtf8: []const u8) !void {
+ // Buffer
+ {
+ var wtf16_buf: [32]u16 = undefined;
+ const wtf16_len = try wtf8ToWtf16Le(&wtf16_buf, wtf8);
+ const wtf16 = wtf16_buf[0..wtf16_len];
+
+ var roundtripped_buf: [32]u8 = undefined;
+ const roundtripped_len = wtf16LeToWtf8(&roundtripped_buf, wtf16);
+ const roundtripped = roundtripped_buf[0..roundtripped_len];
+
+ try testing.expectEqualSlices(u8, wtf8, roundtripped);
+ }
+ // Alloc
+ {
+ const wtf16 = try wtf8ToWtf16LeAlloc(testing.allocator, wtf8);
+ defer testing.allocator.free(wtf16);
+
+ const roundtripped = try wtf16LeToWtf8Alloc(testing.allocator, wtf16);
+ defer testing.allocator.free(roundtripped);
+
+ try testing.expectEqualSlices(u8, wtf8, roundtripped);
+ }
+ // AllocZ
+ {
+ const wtf16 = try wtf8ToWtf16LeAllocZ(testing.allocator, wtf8);
+ defer testing.allocator.free(wtf16);
+
+ const roundtripped = try wtf16LeToWtf8AllocZ(testing.allocator, wtf16);
+ defer testing.allocator.free(roundtripped);
+
+ try testing.expectEqualSlices(u8, wtf8, roundtripped);
+ }
+}
+
+test "well-formed WTF-8 roundtrips" {
+ try testRoundtripWtf8("\xed\x9f\xbf"); // not a surrogate half
+ try testRoundtripWtf8("\xed\xa0\xbd"); // high surrogate
+ try testRoundtripWtf8("\xed\xb2\xa9"); // low surrogate
+ try testRoundtripWtf8("\xed\xa0\xbd \xed\xb2\xa9"); // <high surrogate><space><low surrogate>
+ try testRoundtripWtf8("\xed\xa0\x80\xed\xaf\xbf"); // <high surrogate><high surrogate>
+ try testRoundtripWtf8("\xed\xa0\x80\xee\x80\x80"); // <high surrogate><not surrogate>
+ try testRoundtripWtf8("\xed\x9f\xbf\xed\xb0\x80"); // <not surrogate><low surrogate>
+ try testRoundtripWtf8("a\xed\xb0\x80"); // <not surrogate><low surrogate>
+ try testRoundtripWtf8("\xf0\x9f\x92\xa9"); // U+1F4A9, encoded as a surrogate pair in WTF-16
+}
+
+fn testRoundtripWtf16(wtf16le: []const u16) !void {
+ // Buffer
+ {
+ var wtf8_buf: [32]u8 = undefined;
+ const wtf8_len = wtf16LeToWtf8(&wtf8_buf, wtf16le);
+ const wtf8 = wtf8_buf[0..wtf8_len];
+
+ var roundtripped_buf: [32]u16 = undefined;
+ const roundtripped_len = try wtf8ToWtf16Le(&roundtripped_buf, wtf8);
+ const roundtripped = roundtripped_buf[0..roundtripped_len];
+
+ try testing.expectEqualSlices(u16, wtf16le, roundtripped);
+ }
+ // Alloc
+ {
+ const wtf8 = try wtf16LeToWtf8Alloc(testing.allocator, wtf16le);
+ defer testing.allocator.free(wtf8);
+
+ const roundtripped = try wtf8ToWtf16LeAlloc(testing.allocator, wtf8);
+ defer testing.allocator.free(roundtripped);
+
+ try testing.expectEqualSlices(u16, wtf16le, roundtripped);
+ }
+ // AllocZ
+ {
+ const wtf8 = try wtf16LeToWtf8AllocZ(testing.allocator, wtf16le);
+ defer testing.allocator.free(wtf8);
+
+ const roundtripped = try wtf8ToWtf16LeAllocZ(testing.allocator, wtf8);
+ defer testing.allocator.free(roundtripped);
+
+ try testing.expectEqualSlices(u16, wtf16le, roundtripped);
+ }
+}
+
+test "well-formed WTF-16 roundtrips" {
+ try testRoundtripWtf16(&[_]u16{
+ mem.nativeToLittle(u16, 0xD83D), // high surrogate
+ mem.nativeToLittle(u16, 0xDCA9), // low surrogate
+ });
+ try testRoundtripWtf16(&[_]u16{
+ mem.nativeToLittle(u16, 0xD83D), // high surrogate
+ mem.nativeToLittle(u16, ' '), // not surrogate
+ mem.nativeToLittle(u16, 0xDCA9), // low surrogate
+ });
+ try testRoundtripWtf16(&[_]u16{
+ mem.nativeToLittle(u16, 0xD800), // high surrogate
+ mem.nativeToLittle(u16, 0xDBFF), // high surrogate
+ });
+ try testRoundtripWtf16(&[_]u16{
+ mem.nativeToLittle(u16, 0xD800), // high surrogate
+ mem.nativeToLittle(u16, 0xE000), // not surrogate
+ });
+ try testRoundtripWtf16(&[_]u16{
+ mem.nativeToLittle(u16, 0xD7FF), // not surrogate
+ mem.nativeToLittle(u16, 0xDC00), // low surrogate
+ });
+ try testRoundtripWtf16(&[_]u16{
+ mem.nativeToLittle(u16, 0x61), // not surrogate
+ mem.nativeToLittle(u16, 0xDC00), // low surrogate
+ });
+ try testRoundtripWtf16(&[_]u16{
+ mem.nativeToLittle(u16, 0xDC00), // low surrogate
+ });
+}
diff --git a/lib/std/zig/Server.zig b/lib/std/zig/Server.zig
index 7d5abaf6ea..915450c50a 100644
--- a/lib/std/zig/Server.zig
+++ b/lib/std/zig/Server.zig
@@ -38,8 +38,6 @@ pub const Message = struct {
/// Trailing:
/// * name: [tests_len]u32
/// - null-terminated string_bytes index
- /// * async_frame_len: [tests_len]u32,
- /// - 0 means not async
/// * expected_panic_msg: [tests_len]u32,
/// - null-terminated string_bytes index
/// - 0 means does not expect pani
@@ -210,7 +208,6 @@ pub fn serveErrorBundle(s: *Server, error_bundle: std.zig.ErrorBundle) !void {
pub const TestMetadata = struct {
names: []u32,
- async_frame_sizes: []u32,
expected_panic_msgs: []u32,
string_bytes: []const u8,
};
@@ -220,17 +217,16 @@ pub fn serveTestMetadata(s: *Server, test_metadata: TestMetadata) !void {
.tests_len = bswap(@as(u32, @intCast(test_metadata.names.len))),
.string_bytes_len = bswap(@as(u32, @intCast(test_metadata.string_bytes.len))),
};
+ const trailing = 2;
const bytes_len = @sizeOf(OutMessage.TestMetadata) +
- 3 * 4 * test_metadata.names.len + test_metadata.string_bytes.len;
+ trailing * @sizeOf(u32) * test_metadata.names.len + test_metadata.string_bytes.len;
if (need_bswap) {
bswap_u32_array(test_metadata.names);
- bswap_u32_array(test_metadata.async_frame_sizes);
bswap_u32_array(test_metadata.expected_panic_msgs);
}
defer if (need_bswap) {
bswap_u32_array(test_metadata.names);
- bswap_u32_array(test_metadata.async_frame_sizes);
bswap_u32_array(test_metadata.expected_panic_msgs);
};
@@ -241,7 +237,6 @@ pub fn serveTestMetadata(s: *Server, test_metadata: TestMetadata) !void {
std.mem.asBytes(&header),
// TODO: implement @ptrCast between slices changing the length
std.mem.sliceAsBytes(test_metadata.names),
- std.mem.sliceAsBytes(test_metadata.async_frame_sizes),
std.mem.sliceAsBytes(test_metadata.expected_panic_msgs),
test_metadata.string_bytes,
});
diff --git a/lib/std/zig/c_translation.zig b/lib/std/zig/c_translation.zig
index dfa888e94b..337149e97d 100644
--- a/lib/std/zig/c_translation.zig
+++ b/lib/std/zig/c_translation.zig
@@ -308,14 +308,12 @@ test "promoteIntLiteral" {
/// Convert from clang __builtin_shufflevector index to Zig @shuffle index
/// clang requires __builtin_shufflevector index arguments to be integer constants.
-/// negative values for `this_index` indicate "don't care" so we arbitrarily choose 0
+/// negative values for `this_index` indicate "don't care".
/// clang enforces that `this_index` is less than the total number of vector elements
/// See https://ziglang.org/documentation/master/#shuffle
/// See https://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-shufflevector
pub fn shuffleVectorIndex(comptime this_index: c_int, comptime source_vector_len: usize) i32 {
- if (this_index <= 0) return 0;
-
- const positive_index = @as(usize, @intCast(this_index));
+ const positive_index = std.math.cast(usize, this_index) orelse return undefined;
if (positive_index < source_vector_len) return @as(i32, @intCast(this_index));
const b_index = positive_index - source_vector_len;
return ~@as(i32, @intCast(b_index));
@@ -324,7 +322,7 @@ pub fn shuffleVectorIndex(comptime this_index: c_int, comptime source_vector_len
test "shuffleVectorIndex" {
const vector_len: usize = 4;
- try testing.expect(shuffleVectorIndex(-1, vector_len) == 0);
+ _ = shuffleVectorIndex(-1, vector_len);
try testing.expect(shuffleVectorIndex(0, vector_len) == 0);
try testing.expect(shuffleVectorIndex(1, vector_len) == 1);
diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig
index 0513bd9f03..37d90a68df 100644
--- a/lib/std/zig/parser_test.zig
+++ b/lib/std/zig/parser_test.zig
@@ -2230,6 +2230,10 @@ test "zig fmt: switch cases trailing comma" {
\\ 1,2,3 => {},
\\ 4,5, => {},
\\ 6... 8, => {},
+ \\ 9 ...
+ \\ 10 => {},
+ \\ 11 => {},
+ \\ 12, => {},
\\ else => {},
\\ }
\\}
@@ -2240,7 +2244,12 @@ test "zig fmt: switch cases trailing comma" {
\\ 4,
\\ 5,
\\ => {},
- \\ 6...8 => {},
+ \\ 6...8,
+ \\ => {},
+ \\ 9...10 => {},
+ \\ 11 => {},
+ \\ 12,
+ \\ => {},
\\ else => {},
\\ }
\\}
diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig
index 4b57ab69aa..a87f289642 100644
--- a/lib/std/zig/render.zig
+++ b/lib/std/zig/render.zig
@@ -1894,9 +1894,6 @@ fn renderSwitchCase(
// Render everything before the arrow
if (switch_case.ast.values.len == 0) {
try renderToken(r, switch_case.ast.arrow_token - 1, .space); // else keyword
- } else if (switch_case.ast.values.len == 1 and !has_comment_before_arrow) {
- // render on one line and drop the trailing comma if any
- try renderExpression(r, switch_case.ast.values[0], .space);
} else if (trailing_comma or has_comment_before_arrow) {
// Render each value on a new line
try renderExpressions(r, switch_case.ast.values, .comma);
diff --git a/lib/std/zig/system.zig b/lib/std/zig/system.zig
index c2a9fa4f9f..d30706e5e2 100644
--- a/lib/std/zig/system.zig
+++ b/lib/std/zig/system.zig
@@ -639,7 +639,8 @@ pub fn abiAndDynamicLinkerFromFile(
var link_buf: [std.os.PATH_MAX]u8 = undefined;
const link_name = std.os.readlink(dl_path, &link_buf) catch |err| switch (err) {
error.NameTooLong => unreachable,
- error.InvalidUtf8 => unreachable, // Windows only
+ error.InvalidUtf8 => unreachable, // WASI only
+ error.InvalidWtf8 => unreachable, // Windows only
error.BadPathName => unreachable, // Windows only
error.UnsupportedReparsePointType => unreachable, // Windows only
error.NetworkNotFound => unreachable, // Windows only
@@ -730,7 +731,8 @@ test glibcVerFromLinkName {
fn glibcVerFromRPath(rpath: []const u8) !std.SemanticVersion {
var dir = fs.cwd().openDir(rpath, .{}) catch |err| switch (err) {
error.NameTooLong => unreachable,
- error.InvalidUtf8 => unreachable,
+ error.InvalidUtf8 => unreachable, // WASI only
+ error.InvalidWtf8 => unreachable, // Windows-only
error.BadPathName => unreachable,
error.DeviceBusy => unreachable,
error.NetworkNotFound => unreachable, // Windows-only
@@ -761,11 +763,13 @@ fn glibcVerFromRPath(rpath: []const u8) !std.SemanticVersion {
const glibc_so_basename = "libc.so.6";
var f = dir.openFile(glibc_so_basename, .{}) catch |err| switch (err) {
error.NameTooLong => unreachable,
- error.InvalidUtf8 => unreachable, // Windows only
+ error.InvalidUtf8 => unreachable, // WASI only
+ error.InvalidWtf8 => unreachable, // Windows only
error.BadPathName => unreachable, // Windows only
error.PipeBusy => unreachable, // Windows-only
error.SharingViolation => unreachable, // Windows-only
error.NetworkNotFound => unreachable, // Windows-only
+ error.AntivirusInterference => unreachable, // Windows-only
error.FileLocksNotSupported => unreachable, // No lock requested.
error.NoSpaceLeft => unreachable, // read-only
error.PathAlreadyExists => unreachable, // read-only
@@ -997,12 +1001,14 @@ fn detectAbiAndDynamicLinker(
error.NameTooLong => unreachable,
error.PathAlreadyExists => unreachable,
error.SharingViolation => unreachable,
- error.InvalidUtf8 => unreachable,
+ error.InvalidUtf8 => unreachable, // WASI only
+ error.InvalidWtf8 => unreachable, // Windows only
error.BadPathName => unreachable,
error.PipeBusy => unreachable,
error.FileLocksNotSupported => unreachable,
error.WouldBlock => unreachable,
error.FileBusy => unreachable, // opened without write permissions
+ error.AntivirusInterference => unreachable, // Windows-only error
error.IsDir,
error.NotDir,
@@ -1101,7 +1107,6 @@ fn preadMin(file: fs.File, buf: []u8, offset: u64, min_read_len: usize) !usize {
error.ConnectionResetByPeer => return error.UnableToReadElfFile,
error.ConnectionTimedOut => return error.UnableToReadElfFile,
error.SocketNotConnected => return error.UnableToReadElfFile,
- error.NetNameDeleted => return error.UnableToReadElfFile,
error.Unexpected => return error.Unexpected,
error.InputOutput => return error.FileSystem,
error.AccessDenied => return error.Unexpected,
diff --git a/lib/std/zig/system/NativePaths.zig b/lib/std/zig/system/NativePaths.zig
index de800f866b..1d3ce10d9b 100644
--- a/lib/std/zig/system/NativePaths.zig
+++ b/lib/std/zig/system/NativePaths.zig
@@ -41,7 +41,7 @@ pub fn detect(arena: Allocator, native_target: std.Target) !NativePaths {
}
}
} else |err| switch (err) {
- error.InvalidUtf8 => {},
+ error.InvalidWtf8 => unreachable,
error.EnvironmentVariableNotFound => {},
error.OutOfMemory => |e| return e,
}
@@ -73,7 +73,7 @@ pub fn detect(arena: Allocator, native_target: std.Target) !NativePaths {
}
}
} else |err| switch (err) {
- error.InvalidUtf8 => {},
+ error.InvalidWtf8 => unreachable,
error.EnvironmentVariableNotFound => {},
error.OutOfMemory => |e| return e,
}
diff --git a/lib/std/zig/system/linux.zig b/lib/std/zig/system/linux.zig
index d2d31b4079..b05b86995f 100644
--- a/lib/std/zig/system/linux.zig
+++ b/lib/std/zig/system/linux.zig
@@ -71,6 +71,53 @@ test "cpuinfo: SPARC" {
);
}
+const RiscvCpuinfoImpl = struct {
+ model: ?*const Target.Cpu.Model = null,
+
+ const cpu_names = .{
+ .{ "sifive,u54", &Target.riscv.cpu.sifive_u54 },
+ .{ "sifive,u7", &Target.riscv.cpu.sifive_7_series },
+ .{ "sifive,u74", &Target.riscv.cpu.sifive_u74 },
+ .{ "sifive,u74-mc", &Target.riscv.cpu.sifive_u74 },
+ };
+
+ fn line_hook(self: *RiscvCpuinfoImpl, key: []const u8, value: []const u8) !bool {
+ if (mem.eql(u8, key, "uarch")) {
+ inline for (cpu_names) |pair| {
+ if (mem.eql(u8, value, pair[0])) {
+ self.model = pair[1];
+ break;
+ }
+ }
+ return false;
+ }
+
+ return true;
+ }
+
+ fn finalize(self: *const RiscvCpuinfoImpl, arch: Target.Cpu.Arch) ?Target.Cpu {
+ const model = self.model orelse return null;
+ return Target.Cpu{
+ .arch = arch,
+ .model = model,
+ .features = model.features,
+ };
+ }
+};
+
+const RiscvCpuinfoParser = CpuinfoParser(RiscvCpuinfoImpl);
+
+test "cpuinfo: RISC-V" {
+ try testParser(RiscvCpuinfoParser, .riscv64, &Target.riscv.cpu.sifive_u74,
+ \\processor : 0
+ \\hart : 1
+ \\isa : rv64imafdc
+ \\mmu : sv39
+ \\isa-ext :
+ \\uarch : sifive,u74-mc
+ );
+}
+
const PowerpcCpuinfoImpl = struct {
model: ?*const Target.Cpu.Model = null,
@@ -328,7 +375,7 @@ fn CpuinfoParser(comptime impl: anytype) type {
}
pub fn detectNativeCpuAndFeatures() ?Target.Cpu {
- var f = fs.openFileAbsolute("/proc/cpuinfo", .{ .intended_io_mode = .blocking }) catch |err| switch (err) {
+ var f = fs.openFileAbsolute("/proc/cpuinfo", .{}) catch |err| switch (err) {
else => return null,
};
defer f.close();
@@ -344,6 +391,9 @@ pub fn detectNativeCpuAndFeatures() ?Target.Cpu {
.powerpc, .powerpcle, .powerpc64, .powerpc64le => {
return PowerpcCpuinfoParser.parse(current_arch, f.reader()) catch null;
},
+ .riscv64, .riscv32 => {
+ return RiscvCpuinfoParser.parse(current_arch, f.reader()) catch null;
+ },
else => {},
}
diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig
index 9c5b614c39..34f965a259 100644
--- a/lib/std/zig/system/windows.zig
+++ b/lib/std/zig/system/windows.zig
@@ -160,7 +160,7 @@ fn getCpuInfoFromRegistry(core: usize, args: anytype) !void {
=> {
var buf = @field(args, field.name).value_buf;
const entry = @as(*align(1) const std.os.windows.UNICODE_STRING, @ptrCast(table[i + 1].EntryContext));
- const len = try std.unicode.utf16leToUtf8(buf, entry.Buffer[0 .. entry.Length / 2]);
+ const len = try std.unicode.utf16LeToUtf8(buf, entry.Buffer[0 .. entry.Length / 2]);
buf[len] = 0;
},