Merge remote-tracking branch 'origin/master' into llvm10

author: Andrew Kelley <andrew@ziglang.org> 2020-03-13 15:17:53 -0400
committer: Andrew Kelley <andrew@ziglang.org> 2020-03-13 15:17:53 -0400
commit: 656ba530d80e67bc7bb9c40e5c2db26a40743a15 (patch)
tree: 767f4d57000922cf122ae965dc825f87c62ec64e /lib/std/unicode.zig
parent: 96c07674fc2293fa040212ab797c05436dc515b1 (diff)
parent: 3eff77bfb52accbc16eb831753ff4917fc2b4873 (diff)
download: zig-656ba530d80e67bc7bb9c40e5c2db26a40743a15.tar.gz
zig-656ba530d80e67bc7bb9c40e5c2db26a40743a15.zip
1 files changed, 68 insertions, 0 deletions
diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig
index 85c91602d0..8ed51fa145 100644
--- a/lib/std/unicode.zig
+++ b/lib/std/unicode.zig
@@ -629,3 +629,71 @@ test "utf8ToUtf16LeWithNull" {
         testing.expect(utf16[2] == 0);
     }
 }
+
+/// Converts a UTF-8 string literal into a UTF-16LE string literal. 
+pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) :0] u16 {
+    comptime {
+        const len: usize = calcUtf16LeLen(utf8);
+        var utf16le: [len :0]u16 = [_ :0]u16{0} ** len;
+        const utf16le_len = utf8ToUtf16Le(&utf16le, utf8[0..]) catch |err| @compileError(err);
+        assert(len == utf16le_len);
+        return &utf16le;
+    }
+}
+
+/// Returns length of a supplied UTF-8 string literal. Asserts that the data is valid UTF-8.
+fn calcUtf16LeLen(utf8: []const u8) usize {
+    var src_i: usize = 0;
+    var dest_len: usize = 0;
+    while (src_i < utf8.len) {
+        const n = utf8ByteSequenceLength(utf8[src_i]) catch unreachable;
+        const next_src_i = src_i + n;
+        const codepoint = utf8Decode(utf8[src_i..next_src_i]) catch unreachable;
+        if (codepoint < 0x10000) {
+            dest_len += 1;
+        } else {
+            dest_len += 2;
+        }
+        src_i = next_src_i;
+    }
+    return dest_len;
+}
+
+test "utf8ToUtf16LeStringLiteral" {
+{
+        const bytes = [_:0]u16{ 0x41 };
+        const utf16 = utf8ToUtf16LeStringLiteral("A");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[1] == 0);
+    }
+    {
+        const bytes = [_:0]u16{ 0xD801, 0xDC37 };
+        const utf16 = utf8ToUtf16LeStringLiteral("𐐷");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[2] == 0);
+    }
+    {
+        const bytes = [_:0]u16{ 0x02FF };
+        const utf16 = utf8ToUtf16LeStringLiteral("\u{02FF}");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[1] == 0);
+    }
+    {
+        const bytes = [_:0]u16{ 0x7FF };
+        const utf16 = utf8ToUtf16LeStringLiteral("\u{7FF}");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[1] == 0);
+    }
+    {
+        const bytes = [_:0]u16{ 0x801 };
+        const utf16 = utf8ToUtf16LeStringLiteral("\u{801}");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[1] == 0);
+    }
+    {
+        const bytes = [_:0]u16{ 0xDBFF, 0xDFFF };
+        const utf16 = utf8ToUtf16LeStringLiteral("\u{10FFFF}");
+        testing.expectEqualSlices(u16, &bytes, utf16);
+        testing.expect(utf16[2] == 0);
+    }
+}
author	Andrew Kelley <andrew@ziglang.org>	2020-03-13 15:17:53 -0400
committer	Andrew Kelley <andrew@ziglang.org>	2020-03-13 15:17:53 -0400
commit	656ba530d80e67bc7bb9c40e5c2db26a40743a15 (patch)
tree	767f4d57000922cf122ae965dc825f87c62ec64e /lib/std/unicode.zig
parent	96c07674fc2293fa040212ab797c05436dc515b1 (diff)
parent	3eff77bfb52accbc16eb831753ff4917fc2b4873 (diff)
download	zig-656ba530d80e67bc7bb9c40e5c2db26a40743a15.tar.gz zig-656ba530d80e67bc7bb9c40e5c2db26a40743a15.zip