diff options
| author | BenoitJGirard <BenoitJGirard@users.noreply.github.com> | 2019-02-17 14:38:55 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-02-17 14:38:55 -0500 |
| commit | 6daa041932ae5ab03eed953dacf3ca506078390c (patch) | |
| tree | 0f51f6c2ff84dde51b61bba6799e5c5abccf91b4 /std/unicode.zig | |
| parent | f0ec308e26ff957c7fbb50ccc69d3d549c42c4da (diff) | |
| parent | 8d2a902945ef97f28152c3d5a68bb974809c8539 (diff) | |
| download | zig-6daa041932ae5ab03eed953dacf3ca506078390c.tar.gz zig-6daa041932ae5ab03eed953dacf3ca506078390c.zip | |
Merge pull request #2 from ziglang/master
Refreshing fork.
Diffstat (limited to 'std/unicode.zig')
| -rw-r--r-- | std/unicode.zig | 149 |
1 files changed, 68 insertions, 81 deletions
diff --git a/std/unicode.zig b/std/unicode.zig index fcb748401f..fccdf513b9 100644 --- a/std/unicode.zig +++ b/std/unicode.zig @@ -1,7 +1,7 @@ const std = @import("./index.zig"); const builtin = @import("builtin"); -const debug = std.debug; const assert = std.debug.assert; +const testing = std.testing; const mem = std.mem; /// Returns how many bytes the UTF-8 representation would require @@ -32,7 +32,7 @@ pub fn utf8ByteSequenceLength(first_byte: u8) !u3 { /// Returns: the number of bytes written to out. pub fn utf8Encode(c: u32, out: []u8) !u3 { const length = try utf8CodepointSequenceLength(c); - debug.assert(out.len >= length); + assert(out.len >= length); switch (length) { // The pattern for each is the same // - Increasing the initial shift by 6 each time @@ -81,8 +81,8 @@ const Utf8Decode2Error = error{ Utf8OverlongEncoding, }; pub fn utf8Decode2(bytes: []const u8) Utf8Decode2Error!u32 { - debug.assert(bytes.len == 2); - debug.assert(bytes[0] & 0b11100000 == 0b11000000); + assert(bytes.len == 2); + assert(bytes[0] & 0b11100000 == 0b11000000); var value: u32 = bytes[0] & 0b00011111; if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation; @@ -100,8 +100,8 @@ const Utf8Decode3Error = error{ Utf8EncodesSurrogateHalf, }; pub fn utf8Decode3(bytes: []const u8) Utf8Decode3Error!u32 { - debug.assert(bytes.len == 3); - debug.assert(bytes[0] & 0b11110000 == 0b11100000); + assert(bytes.len == 3); + assert(bytes[0] & 0b11110000 == 0b11100000); var value: u32 = bytes[0] & 0b00001111; if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation; @@ -124,8 +124,8 @@ const Utf8Decode4Error = error{ Utf8CodepointTooLarge, }; pub fn utf8Decode4(bytes: []const u8) Utf8Decode4Error!u32 { - debug.assert(bytes.len == 4); - debug.assert(bytes[0] & 0b11111000 == 0b11110000); + assert(bytes.len == 4); + assert(bytes[0] & 0b11111000 == 0b11110000); var value: u32 = bytes[0] & 0b00000111; if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation; @@ -249,12 +249,12 @@ pub const Utf16LeIterator = struct { pub fn nextCodepoint(it: *Utf16LeIterator) !?u32 { assert(it.i <= it.bytes.len); if (it.i == it.bytes.len) return null; - const c0: u32 = mem.readIntLE(u16, it.bytes[it.i .. it.i + 2]); + const c0: u32 = mem.readIntSliceLittle(u16, it.bytes[it.i .. it.i + 2]); if (c0 & ~u32(0x03ff) == 0xd800) { // surrogate pair it.i += 2; if (it.i >= it.bytes.len) return error.DanglingSurrogateHalf; - const c1: u32 = mem.readIntLE(u16, it.bytes[it.i .. it.i + 2]); + const c1: u32 = mem.readIntSliceLittle(u16, it.bytes[it.i .. it.i + 2]); if (c1 & ~u32(0x03ff) != 0xdc00) return error.ExpectedSecondSurrogateHalf; it.i += 2; return 0x10000 + (((c0 & 0x03ff) << 10) | (c1 & 0x03ff)); @@ -274,23 +274,23 @@ test "utf8 encode" { fn testUtf8Encode() !void { // A few taken from wikipedia a few taken elsewhere var array: [4]u8 = undefined; - debug.assert((try utf8Encode(try utf8Decode("€"), array[0..])) == 3); - debug.assert(array[0] == 0b11100010); - debug.assert(array[1] == 0b10000010); - debug.assert(array[2] == 0b10101100); + testing.expect((try utf8Encode(try utf8Decode("€"), array[0..])) == 3); + testing.expect(array[0] == 0b11100010); + testing.expect(array[1] == 0b10000010); + testing.expect(array[2] == 0b10101100); - debug.assert((try utf8Encode(try utf8Decode("$"), array[0..])) == 1); - debug.assert(array[0] == 0b00100100); + testing.expect((try utf8Encode(try utf8Decode("$"), array[0..])) == 1); + testing.expect(array[0] == 0b00100100); - debug.assert((try utf8Encode(try utf8Decode("¢"), array[0..])) == 2); - debug.assert(array[0] == 0b11000010); - debug.assert(array[1] == 0b10100010); + testing.expect((try utf8Encode(try utf8Decode("¢"), array[0..])) == 2); + testing.expect(array[0] == 0b11000010); + testing.expect(array[1] == 0b10100010); - debug.assert((try utf8Encode(try utf8Decode("𐍈"), array[0..])) == 4); - debug.assert(array[0] == 0b11110000); - debug.assert(array[1] == 0b10010000); - debug.assert(array[2] == 0b10001101); - debug.assert(array[3] == 0b10001000); + testing.expect((try utf8Encode(try utf8Decode("𐍈"), array[0..])) == 4); + testing.expect(array[0] == 0b11110000); + testing.expect(array[1] == 0b10010000); + testing.expect(array[2] == 0b10001101); + testing.expect(array[3] == 0b10001000); } test "utf8 encode error" { @@ -306,11 +306,7 @@ fn testUtf8EncodeError() void { } fn testErrorEncode(codePoint: u32, array: []u8, expectedErr: anyerror) void { - if (utf8Encode(codePoint, array)) |_| { - unreachable; - } else |err| { - debug.assert(err == expectedErr); - } + testing.expectError(expectedErr, utf8Encode(codePoint, array)); } test "utf8 iterator on ascii" { @@ -321,16 +317,16 @@ fn testUtf8IteratorOnAscii() void { const s = Utf8View.initComptime("abc"); var it1 = s.iterator(); - debug.assert(std.mem.eql(u8, "a", it1.nextCodepointSlice().?)); - debug.assert(std.mem.eql(u8, "b", it1.nextCodepointSlice().?)); - debug.assert(std.mem.eql(u8, "c", it1.nextCodepointSlice().?)); - debug.assert(it1.nextCodepointSlice() == null); + testing.expect(std.mem.eql(u8, "a", it1.nextCodepointSlice().?)); + testing.expect(std.mem.eql(u8, "b", it1.nextCodepointSlice().?)); + testing.expect(std.mem.eql(u8, "c", it1.nextCodepointSlice().?)); + testing.expect(it1.nextCodepointSlice() == null); var it2 = s.iterator(); - debug.assert(it2.nextCodepoint().? == 'a'); - debug.assert(it2.nextCodepoint().? == 'b'); - debug.assert(it2.nextCodepoint().? == 'c'); - debug.assert(it2.nextCodepoint() == null); + testing.expect(it2.nextCodepoint().? == 'a'); + testing.expect(it2.nextCodepoint().? == 'b'); + testing.expect(it2.nextCodepoint().? == 'c'); + testing.expect(it2.nextCodepoint() == null); } test "utf8 view bad" { @@ -340,12 +336,7 @@ test "utf8 view bad" { fn testUtf8ViewBad() void { // Compile-time error. // const s3 = Utf8View.initComptime("\xfe\xf2"); - const s = Utf8View.init("hel\xadlo"); - if (s) |_| { - unreachable; - } else |err| { - debug.assert(err == error.InvalidUtf8); - } + testing.expectError(error.InvalidUtf8, Utf8View.init("hel\xadlo")); } test "utf8 view ok" { @@ -356,16 +347,16 @@ fn testUtf8ViewOk() void { const s = Utf8View.initComptime("東京市"); var it1 = s.iterator(); - debug.assert(std.mem.eql(u8, "東", it1.nextCodepointSlice().?)); - debug.assert(std.mem.eql(u8, "京", it1.nextCodepointSlice().?)); - debug.assert(std.mem.eql(u8, "市", it1.nextCodepointSlice().?)); - debug.assert(it1.nextCodepointSlice() == null); + testing.expect(std.mem.eql(u8, "東", it1.nextCodepointSlice().?)); + testing.expect(std.mem.eql(u8, "京", it1.nextCodepointSlice().?)); + testing.expect(std.mem.eql(u8, "市", it1.nextCodepointSlice().?)); + testing.expect(it1.nextCodepointSlice() == null); var it2 = s.iterator(); - debug.assert(it2.nextCodepoint().? == 0x6771); - debug.assert(it2.nextCodepoint().? == 0x4eac); - debug.assert(it2.nextCodepoint().? == 0x5e02); - debug.assert(it2.nextCodepoint() == null); + testing.expect(it2.nextCodepoint().? == 0x6771); + testing.expect(it2.nextCodepoint().? == 0x4eac); + testing.expect(it2.nextCodepoint().? == 0x5e02); + testing.expect(it2.nextCodepoint() == null); } test "bad utf8 slice" { @@ -373,10 +364,10 @@ test "bad utf8 slice" { testBadUtf8Slice(); } fn testBadUtf8Slice() void { - debug.assert(utf8ValidateSlice("abc")); - debug.assert(!utf8ValidateSlice("abc\xc0")); - debug.assert(!utf8ValidateSlice("abc\xc0abc")); - debug.assert(utf8ValidateSlice("abc\xdf\xbf")); + testing.expect(utf8ValidateSlice("abc")); + testing.expect(!utf8ValidateSlice("abc\xc0")); + testing.expect(!utf8ValidateSlice("abc\xc0abc")); + testing.expect(utf8ValidateSlice("abc\xdf\xbf")); } test "valid utf8" { @@ -459,21 +450,17 @@ fn testMiscInvalidUtf8() void { } fn testError(bytes: []const u8, expected_err: anyerror) void { - if (testDecode(bytes)) |_| { - unreachable; - } else |err| { - debug.assert(err == expected_err); - } + testing.expectError(expected_err, testDecode(bytes)); } fn testValid(bytes: []const u8, expected_codepoint: u32) void { - debug.assert((testDecode(bytes) catch unreachable) == expected_codepoint); + testing.expect((testDecode(bytes) catch unreachable) == expected_codepoint); } fn testDecode(bytes: []const u8) !u32 { const length = try utf8ByteSequenceLength(bytes[0]); if (bytes.len < length) return error.UnexpectedEof; - debug.assert(bytes.len == length); + testing.expect(bytes.len == length); return utf8Decode(bytes); } @@ -510,48 +497,48 @@ test "utf16leToUtf8" { const utf16le_as_bytes = @sliceToBytes(utf16le[0..]); { - mem.writeInt(utf16le_as_bytes[0..], u16('A'), builtin.Endian.Little); - mem.writeInt(utf16le_as_bytes[2..], u16('a'), builtin.Endian.Little); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[0..], 'A'); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[2..], 'a'); const utf8 = try utf16leToUtf8Alloc(std.debug.global_allocator, utf16le); - assert(mem.eql(u8, utf8, "Aa")); + testing.expect(mem.eql(u8, utf8, "Aa")); } { - mem.writeInt(utf16le_as_bytes[0..], u16(0x80), builtin.Endian.Little); - mem.writeInt(utf16le_as_bytes[2..], u16(0xffff), builtin.Endian.Little); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[0..], 0x80); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[2..], 0xffff); const utf8 = try utf16leToUtf8Alloc(std.debug.global_allocator, utf16le); - assert(mem.eql(u8, utf8, "\xc2\x80" ++ "\xef\xbf\xbf")); + testing.expect(mem.eql(u8, utf8, "\xc2\x80" ++ "\xef\xbf\xbf")); } { // the values just outside the surrogate half range - mem.writeInt(utf16le_as_bytes[0..], u16(0xd7ff), builtin.Endian.Little); - mem.writeInt(utf16le_as_bytes[2..], u16(0xe000), builtin.Endian.Little); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[0..], 0xd7ff); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[2..], 0xe000); const utf8 = try utf16leToUtf8Alloc(std.debug.global_allocator, utf16le); - assert(mem.eql(u8, utf8, "\xed\x9f\xbf" ++ "\xee\x80\x80")); + testing.expect(mem.eql(u8, utf8, "\xed\x9f\xbf" ++ "\xee\x80\x80")); } { // smallest surrogate pair - mem.writeInt(utf16le_as_bytes[0..], u16(0xd800), builtin.Endian.Little); - mem.writeInt(utf16le_as_bytes[2..], u16(0xdc00), builtin.Endian.Little); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[0..], 0xd800); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[2..], 0xdc00); const utf8 = try utf16leToUtf8Alloc(std.debug.global_allocator, utf16le); - assert(mem.eql(u8, utf8, "\xf0\x90\x80\x80")); + testing.expect(mem.eql(u8, utf8, "\xf0\x90\x80\x80")); } { // largest surrogate pair - mem.writeInt(utf16le_as_bytes[0..], u16(0xdbff), builtin.Endian.Little); - mem.writeInt(utf16le_as_bytes[2..], u16(0xdfff), builtin.Endian.Little); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[0..], 0xdbff); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[2..], 0xdfff); const utf8 = try utf16leToUtf8Alloc(std.debug.global_allocator, utf16le); - assert(mem.eql(u8, utf8, "\xf4\x8f\xbf\xbf")); + testing.expect(mem.eql(u8, utf8, "\xf4\x8f\xbf\xbf")); } { - mem.writeInt(utf16le_as_bytes[0..], u16(0xdbff), builtin.Endian.Little); - mem.writeInt(utf16le_as_bytes[2..], u16(0xdc00), builtin.Endian.Little); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[0..], 0xdbff); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[2..], 0xdc00); const utf8 = try utf16leToUtf8Alloc(std.debug.global_allocator, utf16le); - assert(mem.eql(u8, utf8, "\xf4\x8f\xb0\x80")); + testing.expect(mem.eql(u8, utf8, "\xf4\x8f\xb0\x80")); } } @@ -583,7 +570,7 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize { while (it.nextCodepoint()) |codepoint| { if (end_index == utf16le_as_bytes.len) return (end_index / 2) + 1; // TODO surrogate pairs - mem.writeInt(utf16le_as_bytes[end_index..], @intCast(u16, codepoint), builtin.Endian.Little); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[end_index..], @intCast(u16, codepoint)); end_index += 2; } return end_index / 2; |
