aboutsummaryrefslogtreecommitdiff
path: root/data
diff options
context:
space:
mode:
authorTakase <20792268+takase1121@users.noreply.github.com>2025-03-16 00:08:25 +0800
committerGitHub <noreply@github.com>2025-03-15 12:08:25 -0400
commit3501f3d0bb6ee399ecb7cdf04cff8cf138b1ca02 (patch)
tree57b5e575829daafba3be521ab8d4b0a71df86e48 /data
parent8ea0393800bb59a214e8a6efd5b9fb69282803a9 (diff)
downloadlite-xl-3501f3d0bb6ee399ecb7cdf04cff8cf138b1ca02.tar.gz
lite-xl-3501f3d0bb6ee399ecb7cdf04cff8cf138b1ca02.zip
syntax: warn against malformed patterns and disable them (#2029)
Diffstat (limited to 'data')
-rw-r--r--data/core/syntax.lua48
-rw-r--r--data/core/tokenizer.lua1
2 files changed, 49 insertions, 0 deletions
diff --git a/data/core/syntax.lua b/data/core/syntax.lua
index 6555a17c..61b9af8e 100644
--- a/data/core/syntax.lua
+++ b/data/core/syntax.lua
@@ -1,4 +1,5 @@
local common = require "core.common"
+local core = require "core"
local syntax = {}
syntax.items = {}
@@ -6,10 +7,57 @@ syntax.items = {}
syntax.plain_text_syntax = { name = "Plain Text", patterns = {}, symbols = {} }
+---Checks whether the pattern / regex compiles correctly and matches something.
+---A pattern / regex must not match an empty string.
+---@param pattern_type "regex"|"pattern"
+---@param pattern string
+---@return boolean ok
+---@return string? error
+local function check_pattern(pattern_type, pattern)
+ local ok, err, mstart, mend
+ if pattern_type == "regex" then
+ ok, err = regex.compile(pattern)
+ if ok then
+ mstart, mend = regex.find_offsets(ok, "")
+ if mstart and mstart > mend then
+ ok, err = false, "Regex matches an empty string"
+ end
+ end
+ else
+ ok, mstart, mend = pcall(string.ufind, "", pattern)
+ if ok and mstart and mstart > mend then
+ ok, err = false, "Pattern matches an empty string"
+ elseif not ok then
+ err = mstart --[[@as string]]
+ end
+ end
+ return ok --[[@as boolean]], err
+end
+
function syntax.add(t)
if type(t.space_handling) ~= "boolean" then t.space_handling = true end
if t.patterns then
+ -- do a sanity check on the patterns / regex to make sure they are actually correct
+ for i, pattern in ipairs(t.patterns) do
+ local p, ok, err, name = pattern.pattern or pattern.regex, nil, nil, nil
+ if type(p) == "table" then
+ for j = 1, 2 do
+ ok, err = check_pattern(pattern.pattern and "pattern" or "regex", p[j])
+ if not ok then name = string.format("#%d:%d <%s>", i, j, p[j]) end
+ end
+ elseif type(p) == "string" then
+ ok, err = check_pattern(pattern.pattern and "pattern" or "regex", p)
+ if not ok then name = string.format("#%d <%s>", i, p) end
+ else
+ ok, err, name = false, "Missing pattern or regex", "#"..i
+ end
+ if not ok then
+ pattern.disabled = true
+ core.warn("Malformed pattern %s in %s language plugin: %s", name, t.name, err)
+ end
+ end
+
-- the rule %s+ gives us a performance gain for the tokenizer in lines with
-- long amounts of consecutive spaces, can be disabled by plugins where it
-- causes conflicts by declaring the table property: space_handling = false
diff --git a/data/core/tokenizer.lua b/data/core/tokenizer.lua
index b8f0fe04..ca2339bd 100644
--- a/data/core/tokenizer.lua
+++ b/data/core/tokenizer.lua
@@ -203,6 +203,7 @@ function tokenizer.tokenize(incoming_syntax, text, state, resume)
local target, res = p.pattern or p.regex, { 1, offset - 1 }
local p_idx = close and 2 or 1
local code = type(target) == "table" and target[p_idx] or target
+ if p.disabled then return end
if p.whole_line == nil then p.whole_line = { } end
if p.whole_line[p_idx] == nil then