aboutsummaryrefslogtreecommitdiff
path: root/lib/std/unicode.zig
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2025-06-27 20:05:22 -0700
committerAndrew Kelley <andrew@ziglang.org>2025-07-07 22:43:51 -0700
commit0e37ff0d591dd75ceec9208196bec29efaec607a (patch)
treec126fa823a1f3864e9c363aac70e3a3db0219957 /lib/std/unicode.zig
parent0b3f0124dc33403d329fb8ee63a93215d9af1f1e (diff)
downloadzig-0e37ff0d591dd75ceec9208196bec29efaec607a.tar.gz
zig-0e37ff0d591dd75ceec9208196bec29efaec607a.zip
std.fmt: breaking API changes
added adapter to AnyWriter and GenericWriter to help bridge the gap between old and new API make std.testing.expectFmt work at compile-time std.fmt no longer has a dependency on std.unicode. Formatted printing was never properly unicode-aware. Now it no longer pretends to be. Breakage/deprecations: * std.fs.File.reader -> std.fs.File.deprecatedReader * std.fs.File.writer -> std.fs.File.deprecatedWriter * std.io.GenericReader -> std.io.Reader * std.io.GenericWriter -> std.io.Writer * std.io.AnyReader -> std.io.Reader * std.io.AnyWriter -> std.io.Writer * std.fmt.format -> std.fmt.deprecatedFormat * std.fmt.fmtSliceEscapeLower -> std.ascii.hexEscape * std.fmt.fmtSliceEscapeUpper -> std.ascii.hexEscape * std.fmt.fmtSliceHexLower -> {x} * std.fmt.fmtSliceHexUpper -> {X} * std.fmt.fmtIntSizeDec -> {B} * std.fmt.fmtIntSizeBin -> {Bi} * std.fmt.fmtDuration -> {D} * std.fmt.fmtDurationSigned -> {D} * {} -> {f} when there is a format method * format method signature - anytype -> *std.io.Writer - inferred error set -> error{WriteFailed} - options -> (deleted) * std.fmt.Formatted - now takes context type explicitly - no fmt string
Diffstat (limited to 'lib/std/unicode.zig')
-rw-r--r--lib/std/unicode.zig59
1 files changed, 23 insertions, 36 deletions
diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig
index 4c6ec1294b..ef694f33ba 100644
--- a/lib/std/unicode.zig
+++ b/lib/std/unicode.zig
@@ -9,6 +9,7 @@ const native_endian = builtin.cpu.arch.endian();
///
/// See also: https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character
pub const replacement_character: u21 = 0xFFFD;
+pub const replacement_character_utf8: [3]u8 = utf8EncodeComptime(replacement_character);
/// Returns how many bytes the UTF-8 representation would require
/// for the given codepoint.
@@ -802,14 +803,7 @@ fn testDecode(bytes: []const u8) !u21 {
/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)
/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of
/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder
-fn formatUtf8(
- utf8: []const u8,
- comptime fmt: []const u8,
- options: std.fmt.FormatOptions,
- writer: anytype,
-) !void {
- _ = fmt;
- _ = options;
+fn formatUtf8(utf8: []const u8, writer: *std.io.Writer) std.io.Writer.Error!void {
var buf: [300]u8 = undefined; // just an arbitrary size
var u8len: usize = 0;
@@ -898,27 +892,27 @@ fn formatUtf8(
/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)
/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of
/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder
-pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter(formatUtf8) {
+pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter([]const u8, formatUtf8) {
return .{ .data = utf8 };
}
test fmtUtf8 {
const expectFmt = testing.expectFmt;
- try expectFmt("", "{}", .{fmtUtf8("")});
- try expectFmt("foo", "{}", .{fmtUtf8("foo")});
- try expectFmt("𐐷", "{}", .{fmtUtf8("𐐷")});
+ try expectFmt("", "{f}", .{fmtUtf8("")});
+ try expectFmt("foo", "{f}", .{fmtUtf8("foo")});
+ try expectFmt("𐐷", "{f}", .{fmtUtf8("𐐷")});
// Table 3-8. U+FFFD for Non-Shortest Form Sequences
- try expectFmt("��������A", "{}", .{fmtUtf8("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82A")});
+ try expectFmt("��������A", "{f}", .{fmtUtf8("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82A")});
// Table 3-9. U+FFFD for Ill-Formed Sequences for Surrogates
- try expectFmt("��������A", "{}", .{fmtUtf8("\xED\xA0\x80\xED\xBF\xBF\xED\xAFA")});
+ try expectFmt("��������A", "{f}", .{fmtUtf8("\xED\xA0\x80\xED\xBF\xBF\xED\xAFA")});
// Table 3-10. U+FFFD for Other Ill-Formed Sequences
- try expectFmt("�����A��B", "{}", .{fmtUtf8("\xF4\x91\x92\x93\xFFA\x80\xBFB")});
+ try expectFmt("�����A��B", "{f}", .{fmtUtf8("\xF4\x91\x92\x93\xFFA\x80\xBFB")});
// Table 3-11. U+FFFD for Truncated Sequences
- try expectFmt("����A", "{}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")});
+ try expectFmt("����A", "{f}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")});
}
fn utf16LeToUtf8ArrayListImpl(
@@ -1477,14 +1471,7 @@ test calcWtf16LeLen {
/// Print the given `utf16le` string, encoded as UTF-8 bytes.
/// Unpaired surrogates are replaced by the replacement character (U+FFFD).
-fn formatUtf16Le(
- utf16le: []const u16,
- comptime fmt: []const u8,
- options: std.fmt.FormatOptions,
- writer: anytype,
-) !void {
- _ = fmt;
- _ = options;
+fn formatUtf16Le(utf16le: []const u16, writer: *std.io.Writer) std.io.Writer.Error!void {
var buf: [300]u8 = undefined; // just an arbitrary size
var it = Utf16LeIterator.init(utf16le);
var u8len: usize = 0;
@@ -1505,23 +1492,23 @@ pub const fmtUtf16le = @compileError("deprecated; renamed to fmtUtf16Le");
/// Return a Formatter for a (potentially ill-formed) UTF-16 LE string,
/// which will be converted to UTF-8 during formatting.
/// Unpaired surrogates are replaced by the replacement character (U+FFFD).
-pub fn fmtUtf16Le(utf16le: []const u16) std.fmt.Formatter(formatUtf16Le) {
+pub fn fmtUtf16Le(utf16le: []const u16) std.fmt.Formatter([]const u16, formatUtf16Le) {
return .{ .data = utf16le };
}
test fmtUtf16Le {
const expectFmt = testing.expectFmt;
- try expectFmt("", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))});
- try expectFmt("", "{}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral(""))});
- try expectFmt("foo", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))});
- try expectFmt("foo", "{}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("foo"))});
- try expectFmt("𐐷", "{}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("𐐷"))});
- try expectFmt("퟿", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xd7", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xd8", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdb", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xdc", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdf", native_endian)})});
- try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xe0", native_endian)})});
+ try expectFmt("", "{f}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))});
+ try expectFmt("", "{f}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral(""))});
+ try expectFmt("foo", "{f}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))});
+ try expectFmt("foo", "{f}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("foo"))});
+ try expectFmt("𐐷", "{f}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("𐐷"))});
+ try expectFmt("퟿", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xd7", native_endian)})});
+ try expectFmt("�", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xd8", native_endian)})});
+ try expectFmt("�", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdb", native_endian)})});
+ try expectFmt("�", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xdc", native_endian)})});
+ try expectFmt("�", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdf", native_endian)})});
+ try expectFmt("", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xe0", native_endian)})});
}
fn testUtf8ToUtf16LeStringLiteral(utf8ToUtf16LeStringLiteral_: anytype) !void {