diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2025-06-27 20:05:22 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2025-07-07 22:43:51 -0700 |
| commit | 0e37ff0d591dd75ceec9208196bec29efaec607a (patch) | |
| tree | c126fa823a1f3864e9c363aac70e3a3db0219957 /lib/std/unicode.zig | |
| parent | 0b3f0124dc33403d329fb8ee63a93215d9af1f1e (diff) | |
| download | zig-0e37ff0d591dd75ceec9208196bec29efaec607a.tar.gz zig-0e37ff0d591dd75ceec9208196bec29efaec607a.zip | |
std.fmt: breaking API changes
added adapter to AnyWriter and GenericWriter to help bridge the gap
between old and new API
make std.testing.expectFmt work at compile-time
std.fmt no longer has a dependency on std.unicode. Formatted printing
was never properly unicode-aware. Now it no longer pretends to be.
Breakage/deprecations:
* std.fs.File.reader -> std.fs.File.deprecatedReader
* std.fs.File.writer -> std.fs.File.deprecatedWriter
* std.io.GenericReader -> std.io.Reader
* std.io.GenericWriter -> std.io.Writer
* std.io.AnyReader -> std.io.Reader
* std.io.AnyWriter -> std.io.Writer
* std.fmt.format -> std.fmt.deprecatedFormat
* std.fmt.fmtSliceEscapeLower -> std.ascii.hexEscape
* std.fmt.fmtSliceEscapeUpper -> std.ascii.hexEscape
* std.fmt.fmtSliceHexLower -> {x}
* std.fmt.fmtSliceHexUpper -> {X}
* std.fmt.fmtIntSizeDec -> {B}
* std.fmt.fmtIntSizeBin -> {Bi}
* std.fmt.fmtDuration -> {D}
* std.fmt.fmtDurationSigned -> {D}
* {} -> {f} when there is a format method
* format method signature
- anytype -> *std.io.Writer
- inferred error set -> error{WriteFailed}
- options -> (deleted)
* std.fmt.Formatted
- now takes context type explicitly
- no fmt string
Diffstat (limited to 'lib/std/unicode.zig')
| -rw-r--r-- | lib/std/unicode.zig | 59 |
1 files changed, 23 insertions, 36 deletions
diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index 4c6ec1294b..ef694f33ba 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -9,6 +9,7 @@ const native_endian = builtin.cpu.arch.endian(); /// /// See also: https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character pub const replacement_character: u21 = 0xFFFD; +pub const replacement_character_utf8: [3]u8 = utf8EncodeComptime(replacement_character); /// Returns how many bytes the UTF-8 representation would require /// for the given codepoint. @@ -802,14 +803,7 @@ fn testDecode(bytes: []const u8) !u21 { /// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD) /// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of /// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder -fn formatUtf8( - utf8: []const u8, - comptime fmt: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, -) !void { - _ = fmt; - _ = options; +fn formatUtf8(utf8: []const u8, writer: *std.io.Writer) std.io.Writer.Error!void { var buf: [300]u8 = undefined; // just an arbitrary size var u8len: usize = 0; @@ -898,27 +892,27 @@ fn formatUtf8( /// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD) /// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of /// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder -pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter(formatUtf8) { +pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter([]const u8, formatUtf8) { return .{ .data = utf8 }; } test fmtUtf8 { const expectFmt = testing.expectFmt; - try expectFmt("", "{}", .{fmtUtf8("")}); - try expectFmt("foo", "{}", .{fmtUtf8("foo")}); - try expectFmt("𐐷", "{}", .{fmtUtf8("𐐷")}); + try expectFmt("", "{f}", .{fmtUtf8("")}); + try expectFmt("foo", "{f}", .{fmtUtf8("foo")}); + try expectFmt("𐐷", "{f}", .{fmtUtf8("𐐷")}); // Table 3-8. U+FFFD for Non-Shortest Form Sequences - try expectFmt("��������A", "{}", .{fmtUtf8("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82A")}); + try expectFmt("��������A", "{f}", .{fmtUtf8("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82A")}); // Table 3-9. U+FFFD for Ill-Formed Sequences for Surrogates - try expectFmt("��������A", "{}", .{fmtUtf8("\xED\xA0\x80\xED\xBF\xBF\xED\xAFA")}); + try expectFmt("��������A", "{f}", .{fmtUtf8("\xED\xA0\x80\xED\xBF\xBF\xED\xAFA")}); // Table 3-10. U+FFFD for Other Ill-Formed Sequences - try expectFmt("�����A��B", "{}", .{fmtUtf8("\xF4\x91\x92\x93\xFFA\x80\xBFB")}); + try expectFmt("�����A��B", "{f}", .{fmtUtf8("\xF4\x91\x92\x93\xFFA\x80\xBFB")}); // Table 3-11. U+FFFD for Truncated Sequences - try expectFmt("����A", "{}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")}); + try expectFmt("����A", "{f}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")}); } fn utf16LeToUtf8ArrayListImpl( @@ -1477,14 +1471,7 @@ test calcWtf16LeLen { /// Print the given `utf16le` string, encoded as UTF-8 bytes. /// Unpaired surrogates are replaced by the replacement character (U+FFFD). -fn formatUtf16Le( - utf16le: []const u16, - comptime fmt: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, -) !void { - _ = fmt; - _ = options; +fn formatUtf16Le(utf16le: []const u16, writer: *std.io.Writer) std.io.Writer.Error!void { var buf: [300]u8 = undefined; // just an arbitrary size var it = Utf16LeIterator.init(utf16le); var u8len: usize = 0; @@ -1505,23 +1492,23 @@ pub const fmtUtf16le = @compileError("deprecated; renamed to fmtUtf16Le"); /// Return a Formatter for a (potentially ill-formed) UTF-16 LE string, /// which will be converted to UTF-8 during formatting. /// Unpaired surrogates are replaced by the replacement character (U+FFFD). -pub fn fmtUtf16Le(utf16le: []const u16) std.fmt.Formatter(formatUtf16Le) { +pub fn fmtUtf16Le(utf16le: []const u16) std.fmt.Formatter([]const u16, formatUtf16Le) { return .{ .data = utf16le }; } test fmtUtf16Le { const expectFmt = testing.expectFmt; - try expectFmt("", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))}); - try expectFmt("", "{}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral(""))}); - try expectFmt("foo", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))}); - try expectFmt("foo", "{}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("foo"))}); - try expectFmt("𐐷", "{}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("𐐷"))}); - try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xd7", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xd8", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdb", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xdc", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdf", native_endian)})}); - try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xe0", native_endian)})}); + try expectFmt("", "{f}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))}); + try expectFmt("", "{f}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral(""))}); + try expectFmt("foo", "{f}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))}); + try expectFmt("foo", "{f}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("foo"))}); + try expectFmt("𐐷", "{f}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("𐐷"))}); + try expectFmt("", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xd7", native_endian)})}); + try expectFmt("�", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xd8", native_endian)})}); + try expectFmt("�", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdb", native_endian)})}); + try expectFmt("�", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xdc", native_endian)})}); + try expectFmt("�", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdf", native_endian)})}); + try expectFmt("", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xe0", native_endian)})}); } fn testUtf8ToUtf16LeStringLiteral(utf8ToUtf16LeStringLiteral_: anytype) !void { |
