diff options
| author | Takase <20792268+takase1121@users.noreply.github.com> | 2025-03-16 00:08:25 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-03-15 12:08:25 -0400 |
| commit | 3501f3d0bb6ee399ecb7cdf04cff8cf138b1ca02 (patch) | |
| tree | 57b5e575829daafba3be521ab8d4b0a71df86e48 /data | |
| parent | 8ea0393800bb59a214e8a6efd5b9fb69282803a9 (diff) | |
| download | lite-xl-3501f3d0bb6ee399ecb7cdf04cff8cf138b1ca02.tar.gz lite-xl-3501f3d0bb6ee399ecb7cdf04cff8cf138b1ca02.zip | |
syntax: warn against malformed patterns and disable them (#2029)
Diffstat (limited to 'data')
| -rw-r--r-- | data/core/syntax.lua | 48 | ||||
| -rw-r--r-- | data/core/tokenizer.lua | 1 |
2 files changed, 49 insertions, 0 deletions
diff --git a/data/core/syntax.lua b/data/core/syntax.lua index 6555a17c..61b9af8e 100644 --- a/data/core/syntax.lua +++ b/data/core/syntax.lua @@ -1,4 +1,5 @@ local common = require "core.common" +local core = require "core" local syntax = {} syntax.items = {} @@ -6,10 +7,57 @@ syntax.items = {} syntax.plain_text_syntax = { name = "Plain Text", patterns = {}, symbols = {} } +---Checks whether the pattern / regex compiles correctly and matches something. +---A pattern / regex must not match an empty string. +---@param pattern_type "regex"|"pattern" +---@param pattern string +---@return boolean ok +---@return string? error +local function check_pattern(pattern_type, pattern) + local ok, err, mstart, mend + if pattern_type == "regex" then + ok, err = regex.compile(pattern) + if ok then + mstart, mend = regex.find_offsets(ok, "") + if mstart and mstart > mend then + ok, err = false, "Regex matches an empty string" + end + end + else + ok, mstart, mend = pcall(string.ufind, "", pattern) + if ok and mstart and mstart > mend then + ok, err = false, "Pattern matches an empty string" + elseif not ok then + err = mstart --[[@as string]] + end + end + return ok --[[@as boolean]], err +end + function syntax.add(t) if type(t.space_handling) ~= "boolean" then t.space_handling = true end if t.patterns then + -- do a sanity check on the patterns / regex to make sure they are actually correct + for i, pattern in ipairs(t.patterns) do + local p, ok, err, name = pattern.pattern or pattern.regex, nil, nil, nil + if type(p) == "table" then + for j = 1, 2 do + ok, err = check_pattern(pattern.pattern and "pattern" or "regex", p[j]) + if not ok then name = string.format("#%d:%d <%s>", i, j, p[j]) end + end + elseif type(p) == "string" then + ok, err = check_pattern(pattern.pattern and "pattern" or "regex", p) + if not ok then name = string.format("#%d <%s>", i, p) end + else + ok, err, name = false, "Missing pattern or regex", "#"..i + end + if not ok then + pattern.disabled = true + core.warn("Malformed pattern %s in %s language plugin: %s", name, t.name, err) + end + end + -- the rule %s+ gives us a performance gain for the tokenizer in lines with -- long amounts of consecutive spaces, can be disabled by plugins where it -- causes conflicts by declaring the table property: space_handling = false diff --git a/data/core/tokenizer.lua b/data/core/tokenizer.lua index b8f0fe04..ca2339bd 100644 --- a/data/core/tokenizer.lua +++ b/data/core/tokenizer.lua @@ -203,6 +203,7 @@ function tokenizer.tokenize(incoming_syntax, text, state, resume) local target, res = p.pattern or p.regex, { 1, offset - 1 } local p_idx = close and 2 or 1 local code = type(target) == "table" and target[p_idx] or target + if p.disabled then return end if p.whole_line == nil then p.whole_line = { } end if p.whole_line[p_idx] == nil then |
