diff options
Diffstat (limited to 'plugins/editorconfig/parser.lua')
-rw-r--r-- | plugins/editorconfig/parser.lua | 553 |
1 files changed, 553 insertions, 0 deletions
diff --git a/plugins/editorconfig/parser.lua b/plugins/editorconfig/parser.lua new file mode 100644 index 0000000..b0ec689 --- /dev/null +++ b/plugins/editorconfig/parser.lua @@ -0,0 +1,553 @@ +-- Lua parser implementation of the .editorconfig spec as best understood. +-- @copyright Jefferson Gonzalez <jgmdev@gmail.com> +-- @license MIT + +local core = require "core" +local config = require "core.config" + +local STANDALONE = false +for i, argument in ipairs(ARGS) do + if argument == "test" and ARGS[i+1] == "editorconfig" then + STANDALONE = true + end +end + +---Logger that will output using lite-xl logging functions or print to +---terminal if the parser is running in standalone mode. +---@param type "log" | "error" +---@param format string +---@param ... any +local function log(type, format, ...) + if not STANDALONE then + core[type]("[EditorConfig]: " .. format, ...) + else + print("[" .. type:upper() .. "]: " .. string.format(format, ...)) + end +end + +---Represents an .editorconfig path rule/expression. +---@class plugins.editorconfig.parser.rule +---Path expression as found between square brackets. +---@field expression string | table<integer,string> +---The expression converted to a regex. +---@field regex string | table<integer,string> +---@field regex_compiled any? | table<integer,string> +---@field negation boolean Indicates that the expression is a negation. +---@field ranges table<integer,number> List of ranges found on the expression. + +---Represents a section of the .editorconfig with all its config options. +---@class plugins.editorconfig.parser.section +---@field rule plugins.editorconfig.parser.rule +---@field equivalent_rules plugins.editorconfig.parser.rule[] +---@field indent_style "tab" | "space" +---@field indent_size integer +---@field tab_width integer +---@field end_of_line "lf" | "cr" | "crlf" +---@field charset "latin1" | "utf-8" | "utf-8-bom" | "utf-16be" | "utf-16le" +---@field trim_trailing_whitespace boolean +---@field insert_final_newline boolean + +---EditorConfig parser class and filename config matching. +---@class plugins.editorconfig.parser +---@field config_path string +---@field sections plugins.editorconfig.parser.section[] +---@field root boolean +local Parser = {} +Parser.__index = Parser + +---Constructor +---@param config_path string +---@return plugins.editorconfig.parser +function Parser.new(config_path) + local self = {} + setmetatable(self, Parser) + self.config_path = config_path + self.sections = {} + self.root = false + self:read() + return self +end + +--- char to hex cache and automatic converter +---@type table<string,string> +local hex_value = {} +setmetatable(hex_value, { + __index = function(t, k) + local v = rawget(t, k) + if v == nil then + v = string.format("%x", string.byte(k)) + rawset(t, k, v) + end + return v + end +}) + +---Simplifies managing rules with other inner rules like {...} which can +---contain escaped \\{ \\} and expressions that are easier handled after +---converting the escaped special characters to \xXX counterparts. +---@param value string +---@return string escaped_values +local function escapes_to_regex_hex(value) + local escaped_chars = {} + for char in value:ugmatch("\\(.)") do + table.insert(escaped_chars, char) + end + for _, char in ipairs(escaped_chars) do + value = value:ugsub("\\" .. char, "\\x" .. hex_value[char]) + end + return value +end + +---An .editorconfig path expression to regex conversion rule. +---@class rule +---@field rule string Lua pattern. +---Callback conversion function. +---@field conversion fun(match:string, section:plugins.editorconfig.parser.section):string + +---List of conversion rules applied to brace expressions. +---@type rule[] +local RULES_BRACES = { + { rule = "^%(", conversion = function() return "\\(" end }, + { rule = "^%)", conversion = function() return "\\)" end }, + { rule = "^%.", conversion = function() return "\\." end }, + { rule = "^\\%[", conversion = function() return "\\[" end }, + { rule = "^\\%]", conversion = function() return "\\]" end }, + { rule = "^\\!", conversion = function() return "!" end }, + { rule = "^\\;", conversion = function() return ";" end }, + { rule = "^\\#", conversion = function() return "#" end }, + { rule = "^\\,", conversion = function() return "," end }, + { rule = "^\\{", conversion = function() return "{" end }, + { rule = "^\\}", conversion = function() return "}" end }, + { rule = "^,", conversion = function() return "|" end }, + { rule = "^\\%*", conversion = function() return "\\*" end }, + { rule = "^%*", conversion = function() return "[^\\/]*" end }, + { rule = "^%*%*", conversion = function() return ".*" end }, + { rule = "^%?", conversion = function() return "." end }, + { rule = "^{}", conversion = function() return "{}" end }, + { rule = "^{[^,]+}", conversion = function(match) return match end }, + { rule = "^%b{}", + conversion = function(match) + local out = match:ugsub("%(", "\\(") + :ugsub("%)", "\\)") + :ugsub("%.", "\\.") + :ugsub("\\%[", "[\\[]") + :ugsub("\\%]", "[\\]]") + :ugsub("^\\!", "!") + :ugsub("^\\;", ";") + :ugsub("^\\#", "#") + -- negation chars list + :ugsub("%[!(%a+)%]", "[^%1]") + :ugsub("\\\\", "[\\]") + -- escaped braces + :ugsub("\\{", "[{]") + :ugsub("\\}", "[}]") + -- non escaped braces + :ugsub("{([^%]])", "(%1") + :ugsub("}([^%]])", ")%1") + :ugsub("^{", "(") + :ugsub("}$", ")") + -- escaped globs + :ugsub("\\%*", "[\\*]") + :ugsub("\\%?", "[\\?]") + -- non escaped globs + :ugsub("%*%*", "[*][*]") -- prevent this glob from expanding to next sub + :ugsub("%*([^%]])", "[^\\/]*%1") + :ugsub("%[%*%]%[%*%]", ".*") + :ugsub("%?([^%]])", ".%1") + -- escaped comma + :ugsub("\\,", "[,]") + -- non escaped comma + :ugsub(",([^%]])", "|%1") + return out + end + }, + { rule = "^%[[^/%]]*%]", + conversion = function(match) + local negation = match:umatch("^%[!") + local chars = match:umatch("^%[!?(.-)%]") + chars = chars:ugsub("^%-", "\\-"):ugsub("%-$", "\\-") + local out = "" + if negation then + out = "[^"..chars.."]" + else + out = "["..chars.."]" + end + return out + end + }, +} + +---List of conversion rules applied to .editorconfig path expressions. +---@type rule[] +local RULES = { + -- normalize escaped .editorconfig special chars or keep them escaped + { rule = "^\\x[a-fA-F][a-fA-F]", conversion = function(match) return match end }, + { rule = "^\\%*", conversion = function() return "\\*" end }, + { rule = "^\\%?", conversion = function() return "\\?" end }, + { rule = "^\\{", conversion = function() return "{" end }, + { rule = "^\\}", conversion = function() return "}" end }, + { rule = "^\\%[", conversion = function() return "\\[" end }, + { rule = "^\\%]", conversion = function() return "\\]" end }, + { rule = "^\\!", conversion = function() return "!" end }, + { rule = "^\\;", conversion = function() return ";" end }, + { rule = "^\\#", conversion = function() return "#" end }, + -- escape special chars + { rule = "^%.", conversion = function() return "\\." end }, + { rule = "^%(", conversion = function() return "\\(" end }, + { rule = "^%)", conversion = function() return "\\)" end }, + { rule = "^%[[^/%]]*%]", + conversion = function(match) + local negation = match:umatch("^%[!") + local chars = match:umatch("^%[!?(.-)%]") + chars = chars:ugsub("^%-", "\\-"):ugsub("%-$", "\\-") + local out = "" + if negation then + out = "[^"..chars.."]" + else + out = "["..chars.."]" + end + return out + end + }, + -- Is this negation rule valid? + { rule = "^!%w+", + conversion = function(match) + local chars = match:umatch("%w+") + return "[^"..chars.."]" + end + }, + -- escape square brackets + { rule = "^%[", conversion = function() return "\\[" end }, + { rule = "^%]", conversion = function() return "\\]" end }, + -- match any characters + { rule = "^%*%*", conversion = function() return ".*" end }, + -- match any characters excluding path separators, \ not needed but just in case + { rule = "^%*", conversion = function() return "[^\\/]*" end }, + -- match optional character, doesn't matters what or should only be a \w? + { rule = "^%?", conversion = function() return "[^/]" end }, + -- threat empty braces literally + { rule = "^{}", conversion = function() return "{}" end }, + -- match a number range + { rule = "^{%-?%d+%.%.%-?%d+}", + conversion = function(match, section) + local min, max = match:umatch("(-?%d+)%.%.(-?%d+)") + min = tonumber(min) + max = tonumber(max) + if min and max then + if not section.rule.ranges then section.rule.ranges = {} end + table.insert(section.rule.ranges, { + math.min(min, max), + math.max(min, max) + }) + end + local minus = "" + if min < 0 or max < 0 then minus = "\\-?" end + return "(?<!0)("..minus.."[1-9]\\d*)" + end + }, + -- threat single option braces literally + { rule = "^{[^,]+}", conversion = function(match) return match end }, + -- match invalid range + { rule = "^{[^%.]+%.%.[^%.]+}", conversion = function(match) return match end }, + -- match any of the strings separated by commas inside the curly braces + { rule = "^%b{}", + conversion = function(rule, section) + rule = rule:gsub("^{", ""):gsub("}$", "") + local pos, len, exp = 1, rule:ulen(), "" + + while pos <= len do + local found = false + for _, r in ipairs(RULES_BRACES) do + local match = rule:umatch(r.rule, pos) + if match then + exp = exp .. r.conversion(match, section) + pos = pos + match:ulen() + found = true + break + end + end + if not found then + exp = exp .. rule:usub(pos, pos) + pos = pos + 1 + end + end + + return "(" .. exp .. ")" + end + } +} + +---Adds the regex equivalent of a section path expression. +---@param section plugins.editorconfig.parser.section | string +---@return plugins.editorconfig.parser.section +function Parser:rule_to_regex(section) + if type(section) == "string" then + section = {rule = {expression = section}} + end + + local rule = section.rule.expression + + -- match everything rule which is different from regular * + -- that doesn't matches path separators + if rule == "*" then + section.rule.regex = ".+" + section.rule.regex_compiled = regex.compile(".+") + return section + end + + rule = escapes_to_regex_hex(section.rule.expression) + + local pos, len, exp = 1, rule:ulen(), "" + + -- if expression starts with ! it is treated entirely as a negation + local negation = rule:umatch("^%s*!") + if negation then + pos = pos + negation:ulen() + 1 + end + + -- apply all conversion rules by looping the path expression/rule + while pos <= len do + local found = false + for _, r in ipairs(RULES) do + local match = rule:umatch(r.rule, pos) + if match then + exp = exp .. r.conversion(match, section) + pos = pos + match:ulen() + found = true + break + end + end + if not found then + exp = exp .. rule:usub(pos, pos) + pos = pos + 1 + end + end + + -- force match up to the end + exp = exp .. "$" + + -- allow expressions that start with * to match anything on start + if exp:match("^%[^\\/%]%*") then + exp = exp:gsub("^%[^\\/%]%*", ".*") + -- fixes two failing tests + elseif exp:match("^%[") then + exp = "^" .. exp + -- match only on root dir + elseif exp:match("^/") then + exp = exp:gsub("^/", "^") + end + + -- store changes to the section rule + section.rule.regex, section.rule.negation = exp, negation + section.rule.regex_compiled = regex.compile(section.rule.regex) + if not section.rule.regex_compiled then + log( + "error", + "could not compile '[%s]' to regex '%s'", + rule, section.rule.regex + ) + end + + return section +end + +---Parses the associated .editorconfig file and stores each section. +function Parser:read() + local file = io.open(self.config_path, "r") + + self.sections = {} + + if not file then + log("log", "could not read %s", self.config_path) + return + end + + ---@type plugins.editorconfig.parser.section + local section = {} + + for line in file:lines() do + ---@cast line string + + -- first we try to see if the line is a rule section + local rule = "" + rule = line:umatch("^%s*%[(.+)%]%s*$") + if rule then + if section.rule then + -- save previous section and crerate new one + table.insert(self.sections, section) + section = {} + end + section.rule = { + expression = rule + } + -- convert the expression to a regex directly on the section table + self:rule_to_regex(section) + + local clone = rule + if clone:match("//+") or clone:match("/%*%*/") then + section.equivalent_rules = {} + end + while clone:match("//+") or clone:match("/%*%*/") do + ---@type plugins.editorconfig.parser.section[] + if clone:match("//+") then + clone = clone:ugsub("//+", "/", 1) + table.insert(section.equivalent_rules, self:rule_to_regex(clone).rule) + end + if clone:match("/%*%*/") then + clone = clone:ugsub("/%*%*/", "/", 1) + table.insert(section.equivalent_rules, self:rule_to_regex(clone).rule) + end + end + end + + if not rule then + local name, value = line:umatch("^%s*(%w%S+)%s*=%s*([^\n\r]+)") + if name and value then + name = name:ulower() + -- do not lowercase property values that start with test_ + if not name:match("^test_") then + value = value:ulower() + end + if value == "true" then + value = true + elseif value == "false" then + value = false + elseif math.tointeger and math.tointeger(value) then + value = math.tointeger(value) + elseif tonumber(value) then + value = tonumber(value) + end + + if section.rule then + section[name] = value + elseif name == "root" and type(value) == "boolean" then + self.root = value + end + end + end + end + + if section.rule then + table.insert(self.sections, section) + end +end + +---Helper function that converts a regex offset results into a list +---of strings, omitting the first result which is the complete match. +---@param offsets table<integer,integer> +---@param value string +---@return table<integer, string> +local function regex_result_to_table(offsets, value) + local result = {} + local offset_fix = 0 + if not regex.find_offsets then + offset_fix = 1 + end + for i=3, #offsets, 2 do + table.insert(result, value:sub(offsets[i], offsets[i+1]-offset_fix)) + end + return result +end + +---Get a matching config for the given filename or nil if nothing found. +---@param file_name string +---@param defaults? boolean Set indent size to defaults when needed, +---@return plugins.editorconfig.parser.section? +function Parser:getConfig(file_name, defaults) + if PLATFORM == "Windows" then + file_name = file_name:gsub("\\", "/") + end + + local regex_match = regex.match + if regex.find_offsets then + regex_match = regex.find_offsets + end + + local properties = {} + + local found = false + for _, section in ipairs(self.sections) do + if section.rule.regex_compiled then + local negation = section.rule.negation + -- default rule + local matched = {regex_match(section.rule.regex_compiled, file_name)} + -- try equivalent rules if available + if not matched[1] and section.equivalent_rules then + for _, esection in ipairs(section.equivalent_rules) do + matched = {regex_match(esection.regex_compiled, file_name)} + if matched[1] then + break + end + end + end + if (matched[1] and not negation) or (not matched[1] and negation) then + local ranges_match = true + if section.rule.ranges then + local results = regex_result_to_table(matched, file_name) + if #results < #section.rule.ranges then + ranges_match = false + else + for i, range in ipairs(section.rule.ranges) do + local number = tonumber(results[i]) + if not number then + ranges_match = false + break + end + if number < range[1] or number > range[2] then + ranges_match = false + break + end + end + end + end + if ranges_match then + found = true + for name, value in pairs(section) do + if name ~= "rule" and name ~= "equivalent_rules" then + properties[name] = value + end + end + end + end + end + end + + if found and defaults then + if properties.indent_style and properties.indent_style == "space" then + if properties.indent_size and not properties.tab_width then + properties.tab_width = 4 + end + elseif properties.indent_style and properties.indent_style == "tab" then + if not properties.tab_width and not properties.indent_size then + properties.indent_size = "tab" + elseif properties.tab_width then + properties.indent_size = properties.tab_width + end + end + end + + return found and properties or nil +end + +---Get a matching config for the given filename or nil if nothing found. +---@param file_name string +---@return string +function Parser:getConfigString(file_name) + local out = "" + local properties = self:getConfig(file_name, true) + if properties then + local config_sorted = {} + for name, value in pairs(properties) do + table.insert(config_sorted, {name = name, value = value}) + end + table.sort(config_sorted, function(a, b) + return a.name < b.name + end) + for _, value in ipairs(config_sorted) do + out = out .. value.name .. "=" .. tostring(value.value) .. "\n" + end + end + return out +end + +return Parser |