diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2020-03-13 15:17:53 -0400 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2020-03-13 15:17:53 -0400 |
| commit | 656ba530d80e67bc7bb9c40e5c2db26a40743a15 (patch) | |
| tree | 767f4d57000922cf122ae965dc825f87c62ec64e /lib/std/unicode.zig | |
| parent | 96c07674fc2293fa040212ab797c05436dc515b1 (diff) | |
| parent | 3eff77bfb52accbc16eb831753ff4917fc2b4873 (diff) | |
| download | zig-656ba530d80e67bc7bb9c40e5c2db26a40743a15.tar.gz zig-656ba530d80e67bc7bb9c40e5c2db26a40743a15.zip | |
Merge remote-tracking branch 'origin/master' into llvm10
Diffstat (limited to 'lib/std/unicode.zig')
| -rw-r--r-- | lib/std/unicode.zig | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index 85c91602d0..8ed51fa145 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -629,3 +629,71 @@ test "utf8ToUtf16LeWithNull" { testing.expect(utf16[2] == 0); } } + +/// Converts a UTF-8 string literal into a UTF-16LE string literal. +pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) :0] u16 { + comptime { + const len: usize = calcUtf16LeLen(utf8); + var utf16le: [len :0]u16 = [_ :0]u16{0} ** len; + const utf16le_len = utf8ToUtf16Le(&utf16le, utf8[0..]) catch |err| @compileError(err); + assert(len == utf16le_len); + return &utf16le; + } +} + +/// Returns length of a supplied UTF-8 string literal. Asserts that the data is valid UTF-8. +fn calcUtf16LeLen(utf8: []const u8) usize { + var src_i: usize = 0; + var dest_len: usize = 0; + while (src_i < utf8.len) { + const n = utf8ByteSequenceLength(utf8[src_i]) catch unreachable; + const next_src_i = src_i + n; + const codepoint = utf8Decode(utf8[src_i..next_src_i]) catch unreachable; + if (codepoint < 0x10000) { + dest_len += 1; + } else { + dest_len += 2; + } + src_i = next_src_i; + } + return dest_len; +} + +test "utf8ToUtf16LeStringLiteral" { +{ + const bytes = [_:0]u16{ 0x41 }; + const utf16 = utf8ToUtf16LeStringLiteral("A"); + testing.expectEqualSlices(u16, &bytes, utf16); + testing.expect(utf16[1] == 0); + } + { + const bytes = [_:0]u16{ 0xD801, 0xDC37 }; + const utf16 = utf8ToUtf16LeStringLiteral("𐐷"); + testing.expectEqualSlices(u16, &bytes, utf16); + testing.expect(utf16[2] == 0); + } + { + const bytes = [_:0]u16{ 0x02FF }; + const utf16 = utf8ToUtf16LeStringLiteral("\u{02FF}"); + testing.expectEqualSlices(u16, &bytes, utf16); + testing.expect(utf16[1] == 0); + } + { + const bytes = [_:0]u16{ 0x7FF }; + const utf16 = utf8ToUtf16LeStringLiteral("\u{7FF}"); + testing.expectEqualSlices(u16, &bytes, utf16); + testing.expect(utf16[1] == 0); + } + { + const bytes = [_:0]u16{ 0x801 }; + const utf16 = utf8ToUtf16LeStringLiteral("\u{801}"); + testing.expectEqualSlices(u16, &bytes, utf16); + testing.expect(utf16[1] == 0); + } + { + const bytes = [_:0]u16{ 0xDBFF, 0xDFFF }; + const utf16 = utf8ToUtf16LeStringLiteral("\u{10FFFF}"); + testing.expectEqualSlices(u16, &bytes, utf16); + testing.expect(utf16[2] == 0); + } +} |
