aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2020-10-17 21:38:50 -0400
committerGitHub <noreply@github.com>2020-10-17 21:38:50 -0400
commit644400054c5769a397ded4f569e2ac0600d65305 (patch)
treeb4f340abf3922fd0ccbf0110ee5e3f2c8af77b70
parente55244c4c64830d2830dea81ab6011b61c188d25 (diff)
parentff58f09b68de08a5fe33177f1874c677c762c1c0 (diff)
downloadzig-644400054c5769a397ded4f569e2ac0600d65305.tar.gz
zig-644400054c5769a397ded4f569e2ac0600d65305.zip
Merge pull request #6259 from dec05eba/master
Use boyer-moore-horspool algorithm for indexOfPos and lastIndexOf unless the haystack or needle is very small
-rw-r--r--lib/std/mem.zig93
1 files changed, 85 insertions, 8 deletions
diff --git a/lib/std/mem.zig b/lib/std/mem.zig
index 6b50e51393..a3b5dd9228 100644
--- a/lib/std/mem.zig
+++ b/lib/std/mem.zig
@@ -853,10 +853,7 @@ pub fn indexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize
/// Find the index in a slice of a sub-slice, searching from the end backwards.
/// To start looking at a different index, slice the haystack first.
-/// TODO is there even a better algorithm for this?
-pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize {
- if (needle.len > haystack.len) return null;
-
+fn lastIndexOfLinear(comptime T: type, haystack: []const T, needle: []const T) ?usize {
var i: usize = haystack.len - needle.len;
while (true) : (i -= 1) {
if (mem.eql(T, haystack[i .. i + needle.len], needle)) return i;
@@ -864,10 +861,7 @@ pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?us
}
}
-// TODO boyer-moore algorithm
-pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize {
- if (needle.len > haystack.len) return null;
-
+fn indexOfPosLinear(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize {
var i: usize = start_index;
const end = haystack.len - needle.len;
while (i <= end) : (i += 1) {
@@ -876,7 +870,90 @@ pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, nee
return null;
}
+fn boyerMooreHorspoolPreprocessReverse(pattern: []const u8, table: *[256]usize) void {
+ for (table) |*c| {
+ c.* = pattern.len;
+ }
+
+ var i: usize = pattern.len - 1;
+ // The first item is intentionally ignored and the skip size will be pattern.len.
+ // This is the standard way boyer-moore-horspool is implemented.
+ while (i > 0) : (i -= 1) {
+ table[pattern[i]] = i;
+ }
+}
+
+fn boyerMooreHorspoolPreprocess(pattern: []const u8, table: *[256]usize) void {
+ for (table) |*c| {
+ c.* = pattern.len;
+ }
+
+ var i: usize = 0;
+ // The last item is intentionally ignored and the skip size will be pattern.len.
+ // This is the standard way boyer-moore-horspool is implemented.
+ while (i < pattern.len - 1) : (i += 1) {
+ table[pattern[i]] = pattern.len - 1 - i;
+ }
+}
+/// Find the index in a slice of a sub-slice, searching from the end backwards.
+/// To start looking at a different index, slice the haystack first.
+// Reverse boyer-moore-horspool algorithm
+pub fn lastIndexOf(comptime T: type, haystack: []const T, needle: []const T) ?usize {
+ if (needle.len > haystack.len) return null;
+ if (needle.len == 0) return haystack.len;
+
+ if (!meta.trait.hasUniqueRepresentation(T) or haystack.len < 32 or needle.len <= 2)
+ return lastIndexOfLinear(T, haystack, needle);
+
+ const haystack_bytes = sliceAsBytes(haystack);
+ const needle_bytes = sliceAsBytes(needle);
+
+ var skip_table: [256]usize = undefined;
+ boyerMooreHorspoolPreprocessReverse(needle_bytes, skip_table[0..]);
+
+ var i: usize = haystack_bytes.len - needle_bytes.len;
+ while (true) {
+ if (mem.eql(u8, haystack_bytes[i .. i + needle_bytes.len], needle_bytes)) return i;
+ const skip = skip_table[haystack_bytes[i]];
+ if (skip > i) break;
+ i -= skip;
+ }
+
+ return null;
+}
+
+// Boyer-moore-horspool algorithm
+pub fn indexOfPos(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize {
+ if (needle.len > haystack.len) return null;
+ if (needle.len == 0) return 0;
+
+ if (!meta.trait.hasUniqueRepresentation(T) or haystack.len < 52 or needle.len <= 4)
+ return indexOfPosLinear(T, haystack, start_index, needle);
+
+ const haystack_bytes = sliceAsBytes(haystack);
+ const needle_bytes = sliceAsBytes(needle);
+
+ var skip_table: [256]usize = undefined;
+ boyerMooreHorspoolPreprocess(needle_bytes, skip_table[0..]);
+
+ var i: usize = start_index * @sizeOf(T);
+ while (i <= haystack_bytes.len - needle_bytes.len) {
+ if (mem.eql(u8, haystack_bytes[i .. i + needle_bytes.len], needle_bytes)) return i;
+ i += skip_table[haystack_bytes[i + needle_bytes.len - 1]];
+ }
+
+ return null;
+}
+
test "mem.indexOf" {
+ testing.expect(indexOf(u8, "one two three four five six seven eight nine ten", "three four").? == 8);
+ testing.expect(lastIndexOf(u8, "one two three four five six seven eight nine ten", "three four").? == 8);
+ testing.expect(indexOf(u8, "one two three four five six seven eight nine ten", "two two") == null);
+ testing.expect(lastIndexOf(u8, "one two three four five six seven eight nine ten", "two two") == null);
+
+ testing.expect(indexOf(u8, "one two three four five six seven eight nine ten", "").? == 0);
+ testing.expect(lastIndexOf(u8, "one two three four five six seven eight nine ten", "").? == 48);
+
testing.expect(indexOf(u8, "one two three four", "four").? == 14);
testing.expect(lastIndexOf(u8, "one two three two four", "two").? == 14);
testing.expect(indexOf(u8, "one two three four", "gour") == null);