diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2019-09-26 01:54:45 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-09-26 01:54:45 -0400 |
| commit | 68bb3945708c43109c48bda3664176307d45b62c (patch) | |
| tree | afb9731e10cef9d192560b52cd9ae2cf179775c4 /lib/std/ascii.zig | |
| parent | 6128bc728d1e1024a178c16c2149f5b1a167a013 (diff) | |
| parent | 4637e8f9699af9c3c6cf4df50ef5bb67c7a318a4 (diff) | |
| download | zig-68bb3945708c43109c48bda3664176307d45b62c.tar.gz zig-68bb3945708c43109c48bda3664176307d45b62c.zip | |
Merge pull request #3315 from ziglang/mv-std-lib
Move std/ to lib/std/
Diffstat (limited to 'lib/std/ascii.zig')
| -rw-r--r-- | lib/std/ascii.zig | 284 |
1 files changed, 284 insertions, 0 deletions
diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig new file mode 100644 index 0000000000..2bc11ba3f2 --- /dev/null +++ b/lib/std/ascii.zig @@ -0,0 +1,284 @@ +// Does NOT look at the locale the way C89's toupper(3), isspace() et cetera does. +// I could have taken only a u7 to make this clear, but it would be slower +// It is my opinion that encodings other than UTF-8 should not be supported. +// +// (and 128 bytes is not much to pay). +// Also does not handle Unicode character classes. +// +// https://upload.wikimedia.org/wikipedia/commons/thumb/c/cf/USASCII_code_chart.png/1200px-USASCII_code_chart.png + +const std = @import("std"); + +const tIndex = enum(u3) { + Alpha, + Hex, + Space, + Digit, + Lower, + Upper, + // Ctrl, < 0x20 || == DEL + // Print, = Graph || == ' '. NOT '\t' et cetera + Punct, + Graph, + //ASCII, | ~0b01111111 + //isBlank, == ' ' || == '\x09' +}; + +const combinedTable = init: { + comptime var table: [256]u8 = undefined; + + const mem = std.mem; + + const alpha = [_]u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + }; + const lower = [_]u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + }; + const upper = [_]u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + const digit = [_]u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + const hex = [_]u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + const space = [_]u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + const punct = [_]u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + }; + const graph = [_]u1{ + // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, + }; + + comptime var i = 0; + inline while (i < 128) : (i += 1) { + table[i] = + u8(alpha[i]) << @enumToInt(tIndex.Alpha) | + u8(hex[i]) << @enumToInt(tIndex.Hex) | + u8(space[i]) << @enumToInt(tIndex.Space) | + u8(digit[i]) << @enumToInt(tIndex.Digit) | + u8(lower[i]) << @enumToInt(tIndex.Lower) | + u8(upper[i]) << @enumToInt(tIndex.Upper) | + u8(punct[i]) << @enumToInt(tIndex.Punct) | + u8(graph[i]) << @enumToInt(tIndex.Graph); + } + mem.set(u8, table[128..256], 0); + break :init table; +}; + +fn inTable(c: u8, t: tIndex) bool { + return (combinedTable[c] & (u8(1) << @enumToInt(t))) != 0; +} + +pub fn isAlNum(c: u8) bool { + return (combinedTable[c] & ((u8(1) << @enumToInt(tIndex.Alpha)) | + u8(1) << @enumToInt(tIndex.Digit))) != 0; +} + +pub fn isAlpha(c: u8) bool { + return inTable(c, tIndex.Alpha); +} + +pub fn isCntrl(c: u8) bool { + return c < 0x20 or c == 127; //DEL +} + +pub fn isDigit(c: u8) bool { + return inTable(c, tIndex.Digit); +} + +pub fn isGraph(c: u8) bool { + return inTable(c, tIndex.Graph); +} + +pub fn isLower(c: u8) bool { + return inTable(c, tIndex.Lower); +} + +pub fn isPrint(c: u8) bool { + return inTable(c, tIndex.Graph) or c == ' '; +} + +pub fn isPunct(c: u8) bool { + return inTable(c, tIndex.Punct); +} + +pub fn isSpace(c: u8) bool { + return inTable(c, tIndex.Space); +} + +pub fn isUpper(c: u8) bool { + return inTable(c, tIndex.Upper); +} + +pub fn isXDigit(c: u8) bool { + return inTable(c, tIndex.Hex); +} + +pub fn isASCII(c: u8) bool { + return c < 128; +} + +pub fn isBlank(c: u8) bool { + return (c == ' ') or (c == '\x09'); +} + +pub fn toUpper(c: u8) u8 { + if (isLower(c)) { + return c & 0b11011111; + } else { + return c; + } +} + +pub fn toLower(c: u8) u8 { + if (isUpper(c)) { + return c | 0b00100000; + } else { + return c; + } +} + +test "ascii character classes" { + const testing = std.testing; + + testing.expect('C' == toUpper('c')); + testing.expect(':' == toUpper(':')); + testing.expect('\xab' == toUpper('\xab')); + testing.expect('c' == toLower('C')); + testing.expect(isAlpha('c')); + testing.expect(!isAlpha('5')); + testing.expect(isSpace(' ')); +} + +pub fn allocLowerString(allocator: *std.mem.Allocator, ascii_string: []const u8) ![]u8 { + const result = try allocator.alloc(u8, ascii_string.len); + for (result) |*c, i| { + c.* = toLower(ascii_string[i]); + } + return result; +} + +test "allocLowerString" { + var buf: [100]u8 = undefined; + const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator; + const result = try allocLowerString(allocator, "aBcDeFgHiJkLmNOPqrst0234+💩!"); + std.testing.expect(std.mem.eql(u8, "abcdefghijklmnopqrst0234+💩!", result)); +} + +pub fn eqlIgnoreCase(a: []const u8, b: []const u8) bool { + if (a.len != b.len) return false; + for (a) |a_c, i| { + if (toLower(a_c) != toLower(b[i])) return false; + } + return true; +} + +test "eqlIgnoreCase" { + std.testing.expect(eqlIgnoreCase("HEl💩Lo!", "hel💩lo!")); + std.testing.expect(!eqlIgnoreCase("hElLo!", "hello! ")); + std.testing.expect(!eqlIgnoreCase("hElLo!", "helro!")); +} + +/// Finds `substr` in `container`, starting at `start_index`. +/// TODO boyer-moore algorithm +pub fn indexOfIgnoreCasePos(container: []const u8, start_index: usize, substr: []const u8) ?usize { + if (substr.len > container.len) return null; + + var i: usize = start_index; + const end = container.len - substr.len; + while (i <= end) : (i += 1) { + if (eqlIgnoreCase(container[i .. i + substr.len], substr)) return i; + } + return null; +} + +/// Finds `substr` in `container`, starting at `start_index`. +pub fn indexOfIgnoreCase(container: []const u8, substr: []const u8) ?usize { + return indexOfIgnoreCasePos(container, 0, substr); +} + +test "indexOfIgnoreCase" { + std.testing.expect(indexOfIgnoreCase("one Two Three Four", "foUr").? == 14); + std.testing.expect(indexOfIgnoreCase("one two three FouR", "gOur") == null); + std.testing.expect(indexOfIgnoreCase("foO", "Foo").? == 0); + std.testing.expect(indexOfIgnoreCase("foo", "fool") == null); + + std.testing.expect(indexOfIgnoreCase("FOO foo", "fOo").? == 0); +} |
