1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
local common = require "core.common"
local core = require "core"
local syntax = {}
syntax.items = {}
syntax.plain_text_syntax = { name = "Plain Text", patterns = {}, symbols = {} }
---Checks whether the pattern / regex compiles correctly and matches something.
---A pattern / regex must not match an empty string.
---@param pattern_type "regex"|"pattern"
---@param pattern string
---@return boolean ok
---@return string? error
local function check_pattern(pattern_type, pattern)
local ok, err, mstart, mend
if pattern_type == "regex" then
ok, err = regex.compile(pattern)
if ok then
mstart, mend = regex.find_offsets(ok, "")
if mstart and mstart > mend then
ok, err = false, "Regex matches an empty string"
end
end
else
ok, mstart, mend = pcall(string.ufind, "", pattern)
if ok and mstart and mstart > mend then
ok, err = false, "Pattern matches an empty string"
elseif not ok then
err = mstart --[[@as string]]
end
end
return ok --[[@as boolean]], err
end
function syntax.add(t)
if type(t.space_handling) ~= "boolean" then t.space_handling = true end
if t.patterns then
-- do a sanity check on the patterns / regex to make sure they are actually correct
for i, pattern in ipairs(t.patterns) do
local p, ok, err, name = pattern.pattern or pattern.regex, nil, nil, nil
if type(p) == "table" then
for j = 1, 2 do
ok, err = check_pattern(pattern.pattern and "pattern" or "regex", p[j])
if not ok then name = string.format("#%d:%d <%s>", i, j, p[j]) end
end
elseif type(p) == "string" then
ok, err = check_pattern(pattern.pattern and "pattern" or "regex", p)
if not ok then name = string.format("#%d <%s>", i, p) end
else
ok, err, name = false, "Missing pattern or regex", "#"..i
end
if not ok then
pattern.disabled = true
core.warn("Malformed pattern %s in %s language plugin: %s", name, t.name, err)
end
end
-- the rule %s+ gives us a performance gain for the tokenizer in lines with
-- long amounts of consecutive spaces, can be disabled by plugins where it
-- causes conflicts by declaring the table property: space_handling = false
if t.space_handling then
table.insert(t.patterns, { pattern = "%s+", type = "normal" })
end
-- this rule gives us additional performance gain by matching every word
-- that was not matched by the syntax patterns as a single token, preventing
-- the tokenizer from iterating over each character individually which is a
-- lot slower since iteration occurs in lua instead of C and adding to that
-- it will also try to match every pattern to a single char (same as spaces)
table.insert(t.patterns, { pattern = "%w+%f[%s]", type = "normal" })
end
table.insert(syntax.items, t)
end
local function find(string, field)
local best_match = 0
local best_syntax
for i = #syntax.items, 1, -1 do
local t = syntax.items[i]
local s, e = common.match_pattern(string, t[field] or {})
if s and e - s > best_match then
best_match = e - s
best_syntax = t
end
end
return best_syntax
end
function syntax.get(filename, header)
return (filename and find(filename, "files"))
or (header and find(header, "headers"))
or syntax.plain_text_syntax
end
return syntax
|