aboutsummaryrefslogtreecommitdiff
path: root/lib/std/compress/flate/Lookup.zig
blob: d1d93de50abd641d85eb7720979db9b11c2ca58a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
//! Lookup of the previous locations for the same 4 byte data. Works on hash of
//! 4 bytes data. Head contains position of the first match for each hash. Chain
//! points to the previous position of the same hash given the current location.

const std = @import("std");
const testing = std.testing;
const expect = testing.expect;
const flate = @import("../flate.zig");
const Token = @import("Token.zig");

const Lookup = @This();

const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761
const chain_len = 2 * flate.history_len;

pub const bits = 15;
pub const len = 1 << bits;
pub const shift = 32 - bits;

// Maps hash => first position
head: [len]u16 = [_]u16{0} ** len,
// Maps position => previous positions for the same hash value
chain: [chain_len]u16 = [_]u16{0} ** (chain_len),

// Calculates hash of the 4 bytes from data.
// Inserts `pos` position of that hash in the lookup tables.
// Returns previous location with the same hash value.
pub fn add(self: *Lookup, data: []const u8, pos: u16) u16 {
    if (data.len < 4) return 0;
    const h = hash(data[0..4]);
    return self.set(h, pos);
}

// Returns previous location with the same hash value given the current
// position.
pub fn prev(self: *Lookup, pos: u16) u16 {
    return self.chain[pos];
}

fn set(self: *Lookup, h: u32, pos: u16) u16 {
    const p = self.head[h];
    self.head[h] = pos;
    self.chain[pos] = p;
    return p;
}

// Slide all positions in head and chain for `n`
pub fn slide(self: *Lookup, n: u16) void {
    for (&self.head) |*v| {
        v.* -|= n;
    }
    var i: usize = 0;
    while (i < n) : (i += 1) {
        self.chain[i] = self.chain[i + n] -| n;
    }
}

// Add `len` 4 bytes hashes from `data` into lookup.
// Position of the first byte is `pos`.
pub fn bulkAdd(self: *Lookup, data: []const u8, length: u16, pos: u16) void {
    if (length == 0 or data.len < Token.min_length) {
        return;
    }
    var hb =
        @as(u32, data[3]) |
        @as(u32, data[2]) << 8 |
        @as(u32, data[1]) << 16 |
        @as(u32, data[0]) << 24;
    _ = self.set(hashu(hb), pos);

    var i = pos;
    for (4..@min(length + 3, data.len)) |j| {
        hb = (hb << 8) | @as(u32, data[j]);
        i += 1;
        _ = self.set(hashu(hb), i);
    }
}

// Calculates hash of the first 4 bytes of `b`.
fn hash(b: *const [4]u8) u32 {
    return hashu(@as(u32, b[3]) |
        @as(u32, b[2]) << 8 |
        @as(u32, b[1]) << 16 |
        @as(u32, b[0]) << 24);
}

fn hashu(v: u32) u32 {
    return @intCast((v *% prime4) >> shift);
}

test add {
    const data = [_]u8{
        0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
        0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
        0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
        0x01, 0x02, 0x03,
    };

    var h: Lookup = .{};
    for (data, 0..) |_, i| {
        const p = h.add(data[i..], @intCast(i));
        if (i >= 8 and i < 24) {
            try expect(p == i - 8);
        } else {
            try expect(p == 0);
        }
    }

    const v = Lookup.hash(data[2 .. 2 + 4]);
    try expect(h.head[v] == 2 + 16);
    try expect(h.chain[2 + 16] == 2 + 8);
    try expect(h.chain[2 + 8] == 2);
}

test bulkAdd {
    const data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";

    // one by one
    var h: Lookup = .{};
    for (data, 0..) |_, i| {
        _ = h.add(data[i..], @intCast(i));
    }

    // in bulk
    var bh: Lookup = .{};
    bh.bulkAdd(data, data.len, 0);

    try testing.expectEqualSlices(u16, &h.head, &bh.head);
    try testing.expectEqualSlices(u16, &h.chain, &bh.chain);
}