aboutsummaryrefslogtreecommitdiff
path: root/plugins/editorconfig/parser.lua
blob: b0ec689f95def34c807ca9fb3a85293dc2fb7c91 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
-- Lua parser implementation of the .editorconfig spec as best understood.
-- @copyright Jefferson Gonzalez <jgmdev@gmail.com>
-- @license MIT

local core = require "core"
local config = require "core.config"

local STANDALONE = false
for i, argument in ipairs(ARGS) do
  if argument == "test" and ARGS[i+1] == "editorconfig" then
    STANDALONE = true
  end
end

---Logger that will output using lite-xl logging functions or print to
---terminal if the parser is running in standalone mode.
---@param type "log" | "error"
---@param format string
---@param ... any
local function log(type, format, ...)
  if not STANDALONE then
    core[type]("[EditorConfig]: " .. format, ...)
  else
    print("[" .. type:upper() .. "]: " .. string.format(format, ...))
  end
end

---Represents an .editorconfig path rule/expression.
---@class plugins.editorconfig.parser.rule
---Path expression as found between square brackets.
---@field expression string | table<integer,string>
---The expression converted to a regex.
---@field regex string | table<integer,string>
---@field regex_compiled any? | table<integer,string>
---@field negation boolean Indicates that the expression is a negation.
---@field ranges table<integer,number> List of ranges found on the expression.

---Represents a section of the .editorconfig with all its config options.
---@class plugins.editorconfig.parser.section
---@field rule plugins.editorconfig.parser.rule
---@field equivalent_rules plugins.editorconfig.parser.rule[]
---@field indent_style "tab" | "space"
---@field indent_size integer
---@field tab_width integer
---@field end_of_line "lf" | "cr" | "crlf"
---@field charset "latin1" | "utf-8" | "utf-8-bom" | "utf-16be" | "utf-16le"
---@field trim_trailing_whitespace boolean
---@field insert_final_newline boolean

---EditorConfig parser class and filename config matching.
---@class plugins.editorconfig.parser
---@field config_path string
---@field sections plugins.editorconfig.parser.section[]
---@field root boolean
local Parser = {}
Parser.__index = Parser

---Constructor
---@param config_path string
---@return plugins.editorconfig.parser
function Parser.new(config_path)
  local self = {}
  setmetatable(self, Parser)
  self.config_path = config_path
  self.sections = {}
  self.root = false
  self:read()
  return self
end

--- char to hex cache and automatic converter
---@type table<string,string>
local hex_value = {}
setmetatable(hex_value, {
  __index = function(t, k)
    local v = rawget(t, k)
    if v == nil then
      v = string.format("%x", string.byte(k))
      rawset(t, k, v)
    end
    return v
  end
})

---Simplifies managing rules with other inner rules like {...} which can
---contain escaped \\{ \\} and expressions that are easier handled after
---converting the escaped special characters to \xXX counterparts.
---@param value string
---@return string escaped_values
local function escapes_to_regex_hex(value)
  local escaped_chars = {}
  for char in value:ugmatch("\\(.)") do
    table.insert(escaped_chars, char)
  end
  for _, char in ipairs(escaped_chars) do
    value = value:ugsub("\\" .. char, "\\x" .. hex_value[char])
  end
  return value
end

---An .editorconfig path expression to regex conversion rule.
---@class rule
---@field rule string Lua pattern.
---Callback conversion function.
---@field conversion fun(match:string, section:plugins.editorconfig.parser.section):string

---List of conversion rules applied to brace expressions.
---@type rule[]
local RULES_BRACES = {
  { rule = "^%(",   conversion = function() return "\\(" end },
  { rule = "^%)",   conversion = function() return "\\)" end },
  { rule = "^%.",   conversion = function() return "\\." end },
  { rule = "^\\%[", conversion = function() return "\\[" end },
  { rule = "^\\%]", conversion = function() return "\\]" end },
  { rule = "^\\!",  conversion = function() return "!" end },
  { rule = "^\\;",  conversion = function() return ";" end },
  { rule = "^\\#",  conversion = function() return "#" end },
  { rule = "^\\,",  conversion = function() return "," end },
  { rule = "^\\{",  conversion = function() return "{" end },
  { rule = "^\\}",  conversion = function() return "}" end },
  { rule = "^,",    conversion = function() return "|" end },
  { rule = "^\\%*", conversion = function() return "\\*" end },
  { rule = "^%*",   conversion = function() return "[^\\/]*" end },
  { rule = "^%*%*", conversion = function() return ".*" end },
  { rule = "^%?",   conversion = function() return "." end },
  { rule = "^{}",   conversion = function() return "{}" end },
  { rule = "^{[^,]+}", conversion = function(match) return match end },
  { rule = "^%b{}",
    conversion = function(match)
      local out = match:ugsub("%(", "\\(")
        :ugsub("%)", "\\)")
        :ugsub("%.", "\\.")
        :ugsub("\\%[", "[\\[]")
        :ugsub("\\%]", "[\\]]")
        :ugsub("^\\!", "!")
        :ugsub("^\\;", ";")
        :ugsub("^\\#", "#")
        -- negation chars list
        :ugsub("%[!(%a+)%]", "[^%1]")
        :ugsub("\\\\", "[\\]")
        -- escaped braces
        :ugsub("\\{", "[{]")
        :ugsub("\\}", "[}]")
        -- non escaped braces
        :ugsub("{([^%]])", "(%1")
        :ugsub("}([^%]])", ")%1")
        :ugsub("^{", "(")
        :ugsub("}$", ")")
        -- escaped globs
        :ugsub("\\%*", "[\\*]")
        :ugsub("\\%?", "[\\?]")
        -- non escaped globs
        :ugsub("%*%*", "[*][*]") -- prevent this glob from expanding to next sub
        :ugsub("%*([^%]])", "[^\\/]*%1")
        :ugsub("%[%*%]%[%*%]", ".*")
        :ugsub("%?([^%]])", ".%1")
        -- escaped comma
        :ugsub("\\,", "[,]")
        -- non escaped comma
        :ugsub(",([^%]])", "|%1")
      return out
    end
  },
  { rule = "^%[[^/%]]*%]",
    conversion = function(match)
      local negation = match:umatch("^%[!")
      local chars = match:umatch("^%[!?(.-)%]")
      chars = chars:ugsub("^%-", "\\-"):ugsub("%-$", "\\-")
      local out = ""
      if negation then
        out = "[^"..chars.."]"
      else
        out = "["..chars.."]"
      end
      return out
    end
  },
}

---List of conversion rules applied to .editorconfig path expressions.
---@type rule[]
local RULES = {
  -- normalize escaped .editorconfig special chars or keep them escaped
  { rule = "^\\x[a-fA-F][a-fA-F]", conversion = function(match) return match end },
  { rule = "^\\%*", conversion = function() return "\\*" end },
  { rule = "^\\%?", conversion = function() return "\\?" end },
  { rule = "^\\{",  conversion = function() return "{" end },
  { rule = "^\\}",  conversion = function() return "}" end },
  { rule = "^\\%[",  conversion = function() return "\\[" end },
  { rule = "^\\%]",  conversion = function() return "\\]" end },
  { rule = "^\\!",  conversion = function() return "!" end },
  { rule = "^\\;",  conversion = function() return ";" end },
  { rule = "^\\#",  conversion = function() return "#" end },
  -- escape special chars
  { rule = "^%.",   conversion = function() return "\\." end },
  { rule = "^%(",   conversion = function() return "\\(" end },
  { rule = "^%)",   conversion = function() return "\\)" end },
  { rule = "^%[[^/%]]*%]",
    conversion = function(match)
      local negation = match:umatch("^%[!")
      local chars = match:umatch("^%[!?(.-)%]")
      chars = chars:ugsub("^%-", "\\-"):ugsub("%-$", "\\-")
      local out = ""
      if negation then
        out = "[^"..chars.."]"
      else
        out = "["..chars.."]"
      end
      return out
    end
  },
  -- Is this negation rule valid?
  { rule = "^!%w+",
    conversion = function(match)
      local chars = match:umatch("%w+")
      return "[^"..chars.."]"
    end
  },
  -- escape square brackets
  { rule = "^%[",   conversion = function() return "\\[" end },
  { rule = "^%]",   conversion = function() return "\\]" end },
  -- match any characters
  { rule = "^%*%*", conversion = function() return ".*" end },
  -- match any characters excluding path separators, \ not needed but just in case
  { rule = "^%*",   conversion = function() return "[^\\/]*" end },
  -- match optional character, doesn't matters what or should only be a \w?
  { rule = "^%?",   conversion = function() return "[^/]" end },
  -- threat empty braces literally
  { rule = "^{}",   conversion = function() return "{}" end },
  -- match a number range
  { rule = "^{%-?%d+%.%.%-?%d+}",
    conversion = function(match, section)
      local min, max = match:umatch("(-?%d+)%.%.(-?%d+)")
      min = tonumber(min)
      max = tonumber(max)
      if min and max then
        if not section.rule.ranges then section.rule.ranges = {} end
        table.insert(section.rule.ranges, {
          math.min(min, max),
          math.max(min, max)
        })
      end
      local minus = ""
      if min < 0 or max < 0 then minus = "\\-?" end
      return "(?<!0)("..minus.."[1-9]\\d*)"
    end
  },
  -- threat single option braces literally
  { rule = "^{[^,]+}", conversion = function(match) return match end },
  -- match invalid range
  { rule = "^{[^%.]+%.%.[^%.]+}", conversion = function(match) return match end },
  -- match any of the strings separated by commas inside the curly braces
  { rule = "^%b{}",
    conversion = function(rule, section)
      rule = rule:gsub("^{", ""):gsub("}$", "")
      local pos, len, exp = 1, rule:ulen(), ""

      while pos <= len do
        local found = false
        for _, r in ipairs(RULES_BRACES) do
          local match = rule:umatch(r.rule, pos)
          if match then
            exp = exp .. r.conversion(match, section)
            pos = pos + match:ulen()
            found = true
            break
          end
        end
        if not found then
          exp = exp .. rule:usub(pos, pos)
          pos = pos + 1
        end
      end

      return "(" .. exp .. ")"
    end
  }
}

---Adds the regex equivalent of a section path expression.
---@param section plugins.editorconfig.parser.section | string
---@return plugins.editorconfig.parser.section
function Parser:rule_to_regex(section)
  if type(section) == "string" then
    section = {rule = {expression = section}}
  end

  local rule = section.rule.expression

  -- match everything rule which is different from regular *
  -- that doesn't matches path separators
  if rule == "*" then
    section.rule.regex = ".+"
    section.rule.regex_compiled = regex.compile(".+")
    return section
  end

  rule = escapes_to_regex_hex(section.rule.expression)

  local pos, len, exp = 1, rule:ulen(), ""

  -- if expression starts with ! it is treated entirely as a negation
  local negation = rule:umatch("^%s*!")
  if negation then
    pos = pos + negation:ulen() + 1
  end

  -- apply all conversion rules by looping the path expression/rule
  while pos <= len do
    local found = false
    for _, r in ipairs(RULES) do
      local match = rule:umatch(r.rule, pos)
      if match then
        exp = exp .. r.conversion(match, section)
        pos = pos + match:ulen()
        found = true
        break
      end
    end
    if not found then
      exp = exp .. rule:usub(pos, pos)
      pos = pos + 1
    end
  end

  -- force match up to the end
  exp = exp .. "$"

  -- allow expressions that start with * to match anything on start
  if exp:match("^%[^\\/%]%*") then
    exp = exp:gsub("^%[^\\/%]%*", ".*")
  -- fixes two failing tests
  elseif exp:match("^%[") then
    exp = "^" .. exp
  -- match only on root dir
  elseif exp:match("^/") then
    exp = exp:gsub("^/", "^")
  end

  -- store changes to the section rule
  section.rule.regex, section.rule.negation = exp, negation
  section.rule.regex_compiled = regex.compile(section.rule.regex)
  if not section.rule.regex_compiled then
    log(
      "error",
      "could not compile '[%s]' to regex '%s'",
      rule, section.rule.regex
    )
  end

  return section
end

---Parses the associated .editorconfig file and stores each section.
function Parser:read()
  local file = io.open(self.config_path, "r")

  self.sections = {}

  if not file then
    log("log", "could not read %s", self.config_path)
    return
  end

  ---@type plugins.editorconfig.parser.section
  local section = {}

  for line in file:lines() do
    ---@cast line string

    -- first we try to see if the line is a rule section
    local rule = ""
    rule = line:umatch("^%s*%[(.+)%]%s*$")
    if rule then
      if section.rule then
        -- save previous section and crerate new one
        table.insert(self.sections, section)
        section = {}
      end
      section.rule = {
        expression = rule
      }
      -- convert the expression to a regex directly on the section table
      self:rule_to_regex(section)

      local clone = rule
      if clone:match("//+") or clone:match("/%*%*/") then
        section.equivalent_rules = {}
      end
      while clone:match("//+") or clone:match("/%*%*/") do
        ---@type plugins.editorconfig.parser.section[]
        if clone:match("//+") then
          clone = clone:ugsub("//+", "/", 1)
          table.insert(section.equivalent_rules, self:rule_to_regex(clone).rule)
        end
        if clone:match("/%*%*/") then
          clone = clone:ugsub("/%*%*/", "/", 1)
          table.insert(section.equivalent_rules, self:rule_to_regex(clone).rule)
        end
      end
    end

    if not rule then
      local name, value = line:umatch("^%s*(%w%S+)%s*=%s*([^\n\r]+)")
      if name and value then
        name = name:ulower()
        -- do not lowercase property values that start with test_
        if not name:match("^test_") then
          value = value:ulower()
        end
        if value == "true" then
          value = true
        elseif value == "false" then
          value = false
        elseif math.tointeger and math.tointeger(value) then
          value = math.tointeger(value)
        elseif tonumber(value) then
          value = tonumber(value)
        end

        if section.rule then
          section[name] = value
        elseif name == "root" and type(value) == "boolean" then
          self.root = value
        end
      end
    end
  end

  if section.rule then
    table.insert(self.sections, section)
  end
end

---Helper function that converts a regex offset results into a list
---of strings, omitting the first result which is the complete match.
---@param offsets table<integer,integer>
---@param value string
---@return table<integer, string>
local function regex_result_to_table(offsets, value)
  local result = {}
  local offset_fix = 0
  if not regex.find_offsets then
    offset_fix = 1
  end
  for i=3, #offsets, 2 do
    table.insert(result, value:sub(offsets[i], offsets[i+1]-offset_fix))
  end
  return result
end

---Get a matching config for the given filename or nil if nothing found.
---@param file_name string
---@param defaults? boolean Set indent size to defaults when needed,
---@return plugins.editorconfig.parser.section?
function Parser:getConfig(file_name, defaults)
  if PLATFORM == "Windows" then
    file_name = file_name:gsub("\\", "/")
  end

  local regex_match = regex.match
  if regex.find_offsets then
    regex_match = regex.find_offsets
  end

  local properties = {}

  local found = false
  for _, section in ipairs(self.sections) do
    if section.rule.regex_compiled then
      local negation = section.rule.negation
      -- default rule
      local matched = {regex_match(section.rule.regex_compiled, file_name)}
      -- try equivalent rules if available
      if not matched[1] and section.equivalent_rules then
        for _, esection in ipairs(section.equivalent_rules) do
          matched = {regex_match(esection.regex_compiled, file_name)}
          if matched[1] then
            break
          end
        end
      end
      if (matched[1] and not negation) or (not matched[1] and negation) then
        local ranges_match = true
        if section.rule.ranges then
          local results = regex_result_to_table(matched, file_name)
          if #results < #section.rule.ranges then
            ranges_match = false
          else
            for i, range in ipairs(section.rule.ranges) do
              local number = tonumber(results[i])
              if not number then
                ranges_match = false
                break
              end
              if number < range[1] or number > range[2] then
                ranges_match = false
                break
              end
            end
          end
        end
        if ranges_match then
          found = true
          for name, value in pairs(section) do
            if name ~= "rule" and name ~= "equivalent_rules" then
              properties[name] = value
            end
          end
        end
      end
    end
  end

  if found and defaults then
    if properties.indent_style and properties.indent_style == "space" then
      if properties.indent_size and not properties.tab_width then
        properties.tab_width = 4
      end
    elseif properties.indent_style and properties.indent_style == "tab" then
      if not properties.tab_width and not properties.indent_size then
        properties.indent_size = "tab"
      elseif properties.tab_width then
        properties.indent_size = properties.tab_width
      end
    end
  end

  return found and properties or nil
end

---Get a matching config for the given filename or nil if nothing found.
---@param file_name string
---@return string
function Parser:getConfigString(file_name)
  local out = ""
  local properties = self:getConfig(file_name, true)
  if properties then
    local config_sorted = {}
    for name, value in pairs(properties) do
      table.insert(config_sorted, {name = name, value = value})
    end
    table.sort(config_sorted, function(a, b)
      return a.name < b.name
    end)
    for _, value in ipairs(config_sorted) do
      out = out .. value.name .. "=" .. tostring(value.value) .. "\n"
    end
  end
  return out
end

return Parser