aboutsummaryrefslogtreecommitdiff
path: root/plugins/editorconfig/parser.lua
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/editorconfig/parser.lua')
-rw-r--r--plugins/editorconfig/parser.lua553
1 files changed, 553 insertions, 0 deletions
diff --git a/plugins/editorconfig/parser.lua b/plugins/editorconfig/parser.lua
new file mode 100644
index 0000000..b0ec689
--- /dev/null
+++ b/plugins/editorconfig/parser.lua
@@ -0,0 +1,553 @@
+-- Lua parser implementation of the .editorconfig spec as best understood.
+-- @copyright Jefferson Gonzalez <jgmdev@gmail.com>
+-- @license MIT
+
+local core = require "core"
+local config = require "core.config"
+
+local STANDALONE = false
+for i, argument in ipairs(ARGS) do
+ if argument == "test" and ARGS[i+1] == "editorconfig" then
+ STANDALONE = true
+ end
+end
+
+---Logger that will output using lite-xl logging functions or print to
+---terminal if the parser is running in standalone mode.
+---@param type "log" | "error"
+---@param format string
+---@param ... any
+local function log(type, format, ...)
+ if not STANDALONE then
+ core[type]("[EditorConfig]: " .. format, ...)
+ else
+ print("[" .. type:upper() .. "]: " .. string.format(format, ...))
+ end
+end
+
+---Represents an .editorconfig path rule/expression.
+---@class plugins.editorconfig.parser.rule
+---Path expression as found between square brackets.
+---@field expression string | table<integer,string>
+---The expression converted to a regex.
+---@field regex string | table<integer,string>
+---@field regex_compiled any? | table<integer,string>
+---@field negation boolean Indicates that the expression is a negation.
+---@field ranges table<integer,number> List of ranges found on the expression.
+
+---Represents a section of the .editorconfig with all its config options.
+---@class plugins.editorconfig.parser.section
+---@field rule plugins.editorconfig.parser.rule
+---@field equivalent_rules plugins.editorconfig.parser.rule[]
+---@field indent_style "tab" | "space"
+---@field indent_size integer
+---@field tab_width integer
+---@field end_of_line "lf" | "cr" | "crlf"
+---@field charset "latin1" | "utf-8" | "utf-8-bom" | "utf-16be" | "utf-16le"
+---@field trim_trailing_whitespace boolean
+---@field insert_final_newline boolean
+
+---EditorConfig parser class and filename config matching.
+---@class plugins.editorconfig.parser
+---@field config_path string
+---@field sections plugins.editorconfig.parser.section[]
+---@field root boolean
+local Parser = {}
+Parser.__index = Parser
+
+---Constructor
+---@param config_path string
+---@return plugins.editorconfig.parser
+function Parser.new(config_path)
+ local self = {}
+ setmetatable(self, Parser)
+ self.config_path = config_path
+ self.sections = {}
+ self.root = false
+ self:read()
+ return self
+end
+
+--- char to hex cache and automatic converter
+---@type table<string,string>
+local hex_value = {}
+setmetatable(hex_value, {
+ __index = function(t, k)
+ local v = rawget(t, k)
+ if v == nil then
+ v = string.format("%x", string.byte(k))
+ rawset(t, k, v)
+ end
+ return v
+ end
+})
+
+---Simplifies managing rules with other inner rules like {...} which can
+---contain escaped \\{ \\} and expressions that are easier handled after
+---converting the escaped special characters to \xXX counterparts.
+---@param value string
+---@return string escaped_values
+local function escapes_to_regex_hex(value)
+ local escaped_chars = {}
+ for char in value:ugmatch("\\(.)") do
+ table.insert(escaped_chars, char)
+ end
+ for _, char in ipairs(escaped_chars) do
+ value = value:ugsub("\\" .. char, "\\x" .. hex_value[char])
+ end
+ return value
+end
+
+---An .editorconfig path expression to regex conversion rule.
+---@class rule
+---@field rule string Lua pattern.
+---Callback conversion function.
+---@field conversion fun(match:string, section:plugins.editorconfig.parser.section):string
+
+---List of conversion rules applied to brace expressions.
+---@type rule[]
+local RULES_BRACES = {
+ { rule = "^%(", conversion = function() return "\\(" end },
+ { rule = "^%)", conversion = function() return "\\)" end },
+ { rule = "^%.", conversion = function() return "\\." end },
+ { rule = "^\\%[", conversion = function() return "\\[" end },
+ { rule = "^\\%]", conversion = function() return "\\]" end },
+ { rule = "^\\!", conversion = function() return "!" end },
+ { rule = "^\\;", conversion = function() return ";" end },
+ { rule = "^\\#", conversion = function() return "#" end },
+ { rule = "^\\,", conversion = function() return "," end },
+ { rule = "^\\{", conversion = function() return "{" end },
+ { rule = "^\\}", conversion = function() return "}" end },
+ { rule = "^,", conversion = function() return "|" end },
+ { rule = "^\\%*", conversion = function() return "\\*" end },
+ { rule = "^%*", conversion = function() return "[^\\/]*" end },
+ { rule = "^%*%*", conversion = function() return ".*" end },
+ { rule = "^%?", conversion = function() return "." end },
+ { rule = "^{}", conversion = function() return "{}" end },
+ { rule = "^{[^,]+}", conversion = function(match) return match end },
+ { rule = "^%b{}",
+ conversion = function(match)
+ local out = match:ugsub("%(", "\\(")
+ :ugsub("%)", "\\)")
+ :ugsub("%.", "\\.")
+ :ugsub("\\%[", "[\\[]")
+ :ugsub("\\%]", "[\\]]")
+ :ugsub("^\\!", "!")
+ :ugsub("^\\;", ";")
+ :ugsub("^\\#", "#")
+ -- negation chars list
+ :ugsub("%[!(%a+)%]", "[^%1]")
+ :ugsub("\\\\", "[\\]")
+ -- escaped braces
+ :ugsub("\\{", "[{]")
+ :ugsub("\\}", "[}]")
+ -- non escaped braces
+ :ugsub("{([^%]])", "(%1")
+ :ugsub("}([^%]])", ")%1")
+ :ugsub("^{", "(")
+ :ugsub("}$", ")")
+ -- escaped globs
+ :ugsub("\\%*", "[\\*]")
+ :ugsub("\\%?", "[\\?]")
+ -- non escaped globs
+ :ugsub("%*%*", "[*][*]") -- prevent this glob from expanding to next sub
+ :ugsub("%*([^%]])", "[^\\/]*%1")
+ :ugsub("%[%*%]%[%*%]", ".*")
+ :ugsub("%?([^%]])", ".%1")
+ -- escaped comma
+ :ugsub("\\,", "[,]")
+ -- non escaped comma
+ :ugsub(",([^%]])", "|%1")
+ return out
+ end
+ },
+ { rule = "^%[[^/%]]*%]",
+ conversion = function(match)
+ local negation = match:umatch("^%[!")
+ local chars = match:umatch("^%[!?(.-)%]")
+ chars = chars:ugsub("^%-", "\\-"):ugsub("%-$", "\\-")
+ local out = ""
+ if negation then
+ out = "[^"..chars.."]"
+ else
+ out = "["..chars.."]"
+ end
+ return out
+ end
+ },
+}
+
+---List of conversion rules applied to .editorconfig path expressions.
+---@type rule[]
+local RULES = {
+ -- normalize escaped .editorconfig special chars or keep them escaped
+ { rule = "^\\x[a-fA-F][a-fA-F]", conversion = function(match) return match end },
+ { rule = "^\\%*", conversion = function() return "\\*" end },
+ { rule = "^\\%?", conversion = function() return "\\?" end },
+ { rule = "^\\{", conversion = function() return "{" end },
+ { rule = "^\\}", conversion = function() return "}" end },
+ { rule = "^\\%[", conversion = function() return "\\[" end },
+ { rule = "^\\%]", conversion = function() return "\\]" end },
+ { rule = "^\\!", conversion = function() return "!" end },
+ { rule = "^\\;", conversion = function() return ";" end },
+ { rule = "^\\#", conversion = function() return "#" end },
+ -- escape special chars
+ { rule = "^%.", conversion = function() return "\\." end },
+ { rule = "^%(", conversion = function() return "\\(" end },
+ { rule = "^%)", conversion = function() return "\\)" end },
+ { rule = "^%[[^/%]]*%]",
+ conversion = function(match)
+ local negation = match:umatch("^%[!")
+ local chars = match:umatch("^%[!?(.-)%]")
+ chars = chars:ugsub("^%-", "\\-"):ugsub("%-$", "\\-")
+ local out = ""
+ if negation then
+ out = "[^"..chars.."]"
+ else
+ out = "["..chars.."]"
+ end
+ return out
+ end
+ },
+ -- Is this negation rule valid?
+ { rule = "^!%w+",
+ conversion = function(match)
+ local chars = match:umatch("%w+")
+ return "[^"..chars.."]"
+ end
+ },
+ -- escape square brackets
+ { rule = "^%[", conversion = function() return "\\[" end },
+ { rule = "^%]", conversion = function() return "\\]" end },
+ -- match any characters
+ { rule = "^%*%*", conversion = function() return ".*" end },
+ -- match any characters excluding path separators, \ not needed but just in case
+ { rule = "^%*", conversion = function() return "[^\\/]*" end },
+ -- match optional character, doesn't matters what or should only be a \w?
+ { rule = "^%?", conversion = function() return "[^/]" end },
+ -- threat empty braces literally
+ { rule = "^{}", conversion = function() return "{}" end },
+ -- match a number range
+ { rule = "^{%-?%d+%.%.%-?%d+}",
+ conversion = function(match, section)
+ local min, max = match:umatch("(-?%d+)%.%.(-?%d+)")
+ min = tonumber(min)
+ max = tonumber(max)
+ if min and max then
+ if not section.rule.ranges then section.rule.ranges = {} end
+ table.insert(section.rule.ranges, {
+ math.min(min, max),
+ math.max(min, max)
+ })
+ end
+ local minus = ""
+ if min < 0 or max < 0 then minus = "\\-?" end
+ return "(?<!0)("..minus.."[1-9]\\d*)"
+ end
+ },
+ -- threat single option braces literally
+ { rule = "^{[^,]+}", conversion = function(match) return match end },
+ -- match invalid range
+ { rule = "^{[^%.]+%.%.[^%.]+}", conversion = function(match) return match end },
+ -- match any of the strings separated by commas inside the curly braces
+ { rule = "^%b{}",
+ conversion = function(rule, section)
+ rule = rule:gsub("^{", ""):gsub("}$", "")
+ local pos, len, exp = 1, rule:ulen(), ""
+
+ while pos <= len do
+ local found = false
+ for _, r in ipairs(RULES_BRACES) do
+ local match = rule:umatch(r.rule, pos)
+ if match then
+ exp = exp .. r.conversion(match, section)
+ pos = pos + match:ulen()
+ found = true
+ break
+ end
+ end
+ if not found then
+ exp = exp .. rule:usub(pos, pos)
+ pos = pos + 1
+ end
+ end
+
+ return "(" .. exp .. ")"
+ end
+ }
+}
+
+---Adds the regex equivalent of a section path expression.
+---@param section plugins.editorconfig.parser.section | string
+---@return plugins.editorconfig.parser.section
+function Parser:rule_to_regex(section)
+ if type(section) == "string" then
+ section = {rule = {expression = section}}
+ end
+
+ local rule = section.rule.expression
+
+ -- match everything rule which is different from regular *
+ -- that doesn't matches path separators
+ if rule == "*" then
+ section.rule.regex = ".+"
+ section.rule.regex_compiled = regex.compile(".+")
+ return section
+ end
+
+ rule = escapes_to_regex_hex(section.rule.expression)
+
+ local pos, len, exp = 1, rule:ulen(), ""
+
+ -- if expression starts with ! it is treated entirely as a negation
+ local negation = rule:umatch("^%s*!")
+ if negation then
+ pos = pos + negation:ulen() + 1
+ end
+
+ -- apply all conversion rules by looping the path expression/rule
+ while pos <= len do
+ local found = false
+ for _, r in ipairs(RULES) do
+ local match = rule:umatch(r.rule, pos)
+ if match then
+ exp = exp .. r.conversion(match, section)
+ pos = pos + match:ulen()
+ found = true
+ break
+ end
+ end
+ if not found then
+ exp = exp .. rule:usub(pos, pos)
+ pos = pos + 1
+ end
+ end
+
+ -- force match up to the end
+ exp = exp .. "$"
+
+ -- allow expressions that start with * to match anything on start
+ if exp:match("^%[^\\/%]%*") then
+ exp = exp:gsub("^%[^\\/%]%*", ".*")
+ -- fixes two failing tests
+ elseif exp:match("^%[") then
+ exp = "^" .. exp
+ -- match only on root dir
+ elseif exp:match("^/") then
+ exp = exp:gsub("^/", "^")
+ end
+
+ -- store changes to the section rule
+ section.rule.regex, section.rule.negation = exp, negation
+ section.rule.regex_compiled = regex.compile(section.rule.regex)
+ if not section.rule.regex_compiled then
+ log(
+ "error",
+ "could not compile '[%s]' to regex '%s'",
+ rule, section.rule.regex
+ )
+ end
+
+ return section
+end
+
+---Parses the associated .editorconfig file and stores each section.
+function Parser:read()
+ local file = io.open(self.config_path, "r")
+
+ self.sections = {}
+
+ if not file then
+ log("log", "could not read %s", self.config_path)
+ return
+ end
+
+ ---@type plugins.editorconfig.parser.section
+ local section = {}
+
+ for line in file:lines() do
+ ---@cast line string
+
+ -- first we try to see if the line is a rule section
+ local rule = ""
+ rule = line:umatch("^%s*%[(.+)%]%s*$")
+ if rule then
+ if section.rule then
+ -- save previous section and crerate new one
+ table.insert(self.sections, section)
+ section = {}
+ end
+ section.rule = {
+ expression = rule
+ }
+ -- convert the expression to a regex directly on the section table
+ self:rule_to_regex(section)
+
+ local clone = rule
+ if clone:match("//+") or clone:match("/%*%*/") then
+ section.equivalent_rules = {}
+ end
+ while clone:match("//+") or clone:match("/%*%*/") do
+ ---@type plugins.editorconfig.parser.section[]
+ if clone:match("//+") then
+ clone = clone:ugsub("//+", "/", 1)
+ table.insert(section.equivalent_rules, self:rule_to_regex(clone).rule)
+ end
+ if clone:match("/%*%*/") then
+ clone = clone:ugsub("/%*%*/", "/", 1)
+ table.insert(section.equivalent_rules, self:rule_to_regex(clone).rule)
+ end
+ end
+ end
+
+ if not rule then
+ local name, value = line:umatch("^%s*(%w%S+)%s*=%s*([^\n\r]+)")
+ if name and value then
+ name = name:ulower()
+ -- do not lowercase property values that start with test_
+ if not name:match("^test_") then
+ value = value:ulower()
+ end
+ if value == "true" then
+ value = true
+ elseif value == "false" then
+ value = false
+ elseif math.tointeger and math.tointeger(value) then
+ value = math.tointeger(value)
+ elseif tonumber(value) then
+ value = tonumber(value)
+ end
+
+ if section.rule then
+ section[name] = value
+ elseif name == "root" and type(value) == "boolean" then
+ self.root = value
+ end
+ end
+ end
+ end
+
+ if section.rule then
+ table.insert(self.sections, section)
+ end
+end
+
+---Helper function that converts a regex offset results into a list
+---of strings, omitting the first result which is the complete match.
+---@param offsets table<integer,integer>
+---@param value string
+---@return table<integer, string>
+local function regex_result_to_table(offsets, value)
+ local result = {}
+ local offset_fix = 0
+ if not regex.find_offsets then
+ offset_fix = 1
+ end
+ for i=3, #offsets, 2 do
+ table.insert(result, value:sub(offsets[i], offsets[i+1]-offset_fix))
+ end
+ return result
+end
+
+---Get a matching config for the given filename or nil if nothing found.
+---@param file_name string
+---@param defaults? boolean Set indent size to defaults when needed,
+---@return plugins.editorconfig.parser.section?
+function Parser:getConfig(file_name, defaults)
+ if PLATFORM == "Windows" then
+ file_name = file_name:gsub("\\", "/")
+ end
+
+ local regex_match = regex.match
+ if regex.find_offsets then
+ regex_match = regex.find_offsets
+ end
+
+ local properties = {}
+
+ local found = false
+ for _, section in ipairs(self.sections) do
+ if section.rule.regex_compiled then
+ local negation = section.rule.negation
+ -- default rule
+ local matched = {regex_match(section.rule.regex_compiled, file_name)}
+ -- try equivalent rules if available
+ if not matched[1] and section.equivalent_rules then
+ for _, esection in ipairs(section.equivalent_rules) do
+ matched = {regex_match(esection.regex_compiled, file_name)}
+ if matched[1] then
+ break
+ end
+ end
+ end
+ if (matched[1] and not negation) or (not matched[1] and negation) then
+ local ranges_match = true
+ if section.rule.ranges then
+ local results = regex_result_to_table(matched, file_name)
+ if #results < #section.rule.ranges then
+ ranges_match = false
+ else
+ for i, range in ipairs(section.rule.ranges) do
+ local number = tonumber(results[i])
+ if not number then
+ ranges_match = false
+ break
+ end
+ if number < range[1] or number > range[2] then
+ ranges_match = false
+ break
+ end
+ end
+ end
+ end
+ if ranges_match then
+ found = true
+ for name, value in pairs(section) do
+ if name ~= "rule" and name ~= "equivalent_rules" then
+ properties[name] = value
+ end
+ end
+ end
+ end
+ end
+ end
+
+ if found and defaults then
+ if properties.indent_style and properties.indent_style == "space" then
+ if properties.indent_size and not properties.tab_width then
+ properties.tab_width = 4
+ end
+ elseif properties.indent_style and properties.indent_style == "tab" then
+ if not properties.tab_width and not properties.indent_size then
+ properties.indent_size = "tab"
+ elseif properties.tab_width then
+ properties.indent_size = properties.tab_width
+ end
+ end
+ end
+
+ return found and properties or nil
+end
+
+---Get a matching config for the given filename or nil if nothing found.
+---@param file_name string
+---@return string
+function Parser:getConfigString(file_name)
+ local out = ""
+ local properties = self:getConfig(file_name, true)
+ if properties then
+ local config_sorted = {}
+ for name, value in pairs(properties) do
+ table.insert(config_sorted, {name = name, value = value})
+ end
+ table.sort(config_sorted, function(a, b)
+ return a.name < b.name
+ end)
+ for _, value in ipairs(config_sorted) do
+ out = out .. value.name .. "=" .. tostring(value.value) .. "\n"
+ end
+ end
+ return out
+end
+
+return Parser