aboutsummaryrefslogtreecommitdiff
path: root/lib/std/ascii.zig
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2019-09-26 01:54:45 -0400
committerGitHub <noreply@github.com>2019-09-26 01:54:45 -0400
commit68bb3945708c43109c48bda3664176307d45b62c (patch)
treeafb9731e10cef9d192560b52cd9ae2cf179775c4 /lib/std/ascii.zig
parent6128bc728d1e1024a178c16c2149f5b1a167a013 (diff)
parent4637e8f9699af9c3c6cf4df50ef5bb67c7a318a4 (diff)
downloadzig-68bb3945708c43109c48bda3664176307d45b62c.tar.gz
zig-68bb3945708c43109c48bda3664176307d45b62c.zip
Merge pull request #3315 from ziglang/mv-std-lib
Move std/ to lib/std/
Diffstat (limited to 'lib/std/ascii.zig')
-rw-r--r--lib/std/ascii.zig284
1 files changed, 284 insertions, 0 deletions
diff --git a/lib/std/ascii.zig b/lib/std/ascii.zig
new file mode 100644
index 0000000000..2bc11ba3f2
--- /dev/null
+++ b/lib/std/ascii.zig
@@ -0,0 +1,284 @@
+// Does NOT look at the locale the way C89's toupper(3), isspace() et cetera does.
+// I could have taken only a u7 to make this clear, but it would be slower
+// It is my opinion that encodings other than UTF-8 should not be supported.
+//
+// (and 128 bytes is not much to pay).
+// Also does not handle Unicode character classes.
+//
+// https://upload.wikimedia.org/wikipedia/commons/thumb/c/cf/USASCII_code_chart.png/1200px-USASCII_code_chart.png
+
+const std = @import("std");
+
+const tIndex = enum(u3) {
+ Alpha,
+ Hex,
+ Space,
+ Digit,
+ Lower,
+ Upper,
+ // Ctrl, < 0x20 || == DEL
+ // Print, = Graph || == ' '. NOT '\t' et cetera
+ Punct,
+ Graph,
+ //ASCII, | ~0b01111111
+ //isBlank, == ' ' || == '\x09'
+};
+
+const combinedTable = init: {
+ comptime var table: [256]u8 = undefined;
+
+ const mem = std.mem;
+
+ const alpha = [_]u1{
+ // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+ };
+ const lower = [_]u1{
+ // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+ };
+ const upper = [_]u1{
+ // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+ const digit = [_]u1{
+ // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+ const hex = [_]u1{
+ // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+
+ 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+ const space = [_]u1{
+ // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+ const punct = [_]u1{
+ // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+ };
+ const graph = [_]u1{
+ // 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+ };
+
+ comptime var i = 0;
+ inline while (i < 128) : (i += 1) {
+ table[i] =
+ u8(alpha[i]) << @enumToInt(tIndex.Alpha) |
+ u8(hex[i]) << @enumToInt(tIndex.Hex) |
+ u8(space[i]) << @enumToInt(tIndex.Space) |
+ u8(digit[i]) << @enumToInt(tIndex.Digit) |
+ u8(lower[i]) << @enumToInt(tIndex.Lower) |
+ u8(upper[i]) << @enumToInt(tIndex.Upper) |
+ u8(punct[i]) << @enumToInt(tIndex.Punct) |
+ u8(graph[i]) << @enumToInt(tIndex.Graph);
+ }
+ mem.set(u8, table[128..256], 0);
+ break :init table;
+};
+
+fn inTable(c: u8, t: tIndex) bool {
+ return (combinedTable[c] & (u8(1) << @enumToInt(t))) != 0;
+}
+
+pub fn isAlNum(c: u8) bool {
+ return (combinedTable[c] & ((u8(1) << @enumToInt(tIndex.Alpha)) |
+ u8(1) << @enumToInt(tIndex.Digit))) != 0;
+}
+
+pub fn isAlpha(c: u8) bool {
+ return inTable(c, tIndex.Alpha);
+}
+
+pub fn isCntrl(c: u8) bool {
+ return c < 0x20 or c == 127; //DEL
+}
+
+pub fn isDigit(c: u8) bool {
+ return inTable(c, tIndex.Digit);
+}
+
+pub fn isGraph(c: u8) bool {
+ return inTable(c, tIndex.Graph);
+}
+
+pub fn isLower(c: u8) bool {
+ return inTable(c, tIndex.Lower);
+}
+
+pub fn isPrint(c: u8) bool {
+ return inTable(c, tIndex.Graph) or c == ' ';
+}
+
+pub fn isPunct(c: u8) bool {
+ return inTable(c, tIndex.Punct);
+}
+
+pub fn isSpace(c: u8) bool {
+ return inTable(c, tIndex.Space);
+}
+
+pub fn isUpper(c: u8) bool {
+ return inTable(c, tIndex.Upper);
+}
+
+pub fn isXDigit(c: u8) bool {
+ return inTable(c, tIndex.Hex);
+}
+
+pub fn isASCII(c: u8) bool {
+ return c < 128;
+}
+
+pub fn isBlank(c: u8) bool {
+ return (c == ' ') or (c == '\x09');
+}
+
+pub fn toUpper(c: u8) u8 {
+ if (isLower(c)) {
+ return c & 0b11011111;
+ } else {
+ return c;
+ }
+}
+
+pub fn toLower(c: u8) u8 {
+ if (isUpper(c)) {
+ return c | 0b00100000;
+ } else {
+ return c;
+ }
+}
+
+test "ascii character classes" {
+ const testing = std.testing;
+
+ testing.expect('C' == toUpper('c'));
+ testing.expect(':' == toUpper(':'));
+ testing.expect('\xab' == toUpper('\xab'));
+ testing.expect('c' == toLower('C'));
+ testing.expect(isAlpha('c'));
+ testing.expect(!isAlpha('5'));
+ testing.expect(isSpace(' '));
+}
+
+pub fn allocLowerString(allocator: *std.mem.Allocator, ascii_string: []const u8) ![]u8 {
+ const result = try allocator.alloc(u8, ascii_string.len);
+ for (result) |*c, i| {
+ c.* = toLower(ascii_string[i]);
+ }
+ return result;
+}
+
+test "allocLowerString" {
+ var buf: [100]u8 = undefined;
+ const allocator = &std.heap.FixedBufferAllocator.init(&buf).allocator;
+ const result = try allocLowerString(allocator, "aBcDeFgHiJkLmNOPqrst0234+💩!");
+ std.testing.expect(std.mem.eql(u8, "abcdefghijklmnopqrst0234+💩!", result));
+}
+
+pub fn eqlIgnoreCase(a: []const u8, b: []const u8) bool {
+ if (a.len != b.len) return false;
+ for (a) |a_c, i| {
+ if (toLower(a_c) != toLower(b[i])) return false;
+ }
+ return true;
+}
+
+test "eqlIgnoreCase" {
+ std.testing.expect(eqlIgnoreCase("HEl💩Lo!", "hel💩lo!"));
+ std.testing.expect(!eqlIgnoreCase("hElLo!", "hello! "));
+ std.testing.expect(!eqlIgnoreCase("hElLo!", "helro!"));
+}
+
+/// Finds `substr` in `container`, starting at `start_index`.
+/// TODO boyer-moore algorithm
+pub fn indexOfIgnoreCasePos(container: []const u8, start_index: usize, substr: []const u8) ?usize {
+ if (substr.len > container.len) return null;
+
+ var i: usize = start_index;
+ const end = container.len - substr.len;
+ while (i <= end) : (i += 1) {
+ if (eqlIgnoreCase(container[i .. i + substr.len], substr)) return i;
+ }
+ return null;
+}
+
+/// Finds `substr` in `container`, starting at `start_index`.
+pub fn indexOfIgnoreCase(container: []const u8, substr: []const u8) ?usize {
+ return indexOfIgnoreCasePos(container, 0, substr);
+}
+
+test "indexOfIgnoreCase" {
+ std.testing.expect(indexOfIgnoreCase("one Two Three Four", "foUr").? == 14);
+ std.testing.expect(indexOfIgnoreCase("one two three FouR", "gOur") == null);
+ std.testing.expect(indexOfIgnoreCase("foO", "Foo").? == 0);
+ std.testing.expect(indexOfIgnoreCase("foo", "fool") == null);
+
+ std.testing.expect(indexOfIgnoreCase("FOO foo", "fOo").? == 0);
+}