aboutsummaryrefslogtreecommitdiff
path: root/data/plugins/detectindent.lua
blob: cc137243c3baf7cb99394979285c480156526af6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
-- mod-version:3
local core = require "core"
local command = require "core.command"
local common = require "core.common"
local config = require "core.config"
local core_syntax = require "core.syntax"
local DocView = require "core.docview"
local Doc = require "core.doc"

local cache = setmetatable({}, { __mode = "k" })
local comments_cache = {}
local auto_detect_max_lines = 150


local function indent_occurrences_more_than_once(stat, idx)
  if stat[idx-1] and stat[idx-1] == stat[idx] then
    return true
  elseif stat[idx+1] and stat[idx+1] == stat[idx] then
    return true
  end
  return false
end


local function optimal_indent_from_stat(stat)
  if #stat == 0 then return nil, 0 end
  table.sort(stat, function(a, b) return a > b end)
  local best_indent = 0
  local best_score = 0
  local count = #stat
  for x=1, count do
    local indent = stat[x]
    local score = 0
    for y=1, count do
      if y ~= x and stat[y] % indent == 0 then
        score = score + 1
      elseif
        indent > stat[y]
        and
        (
          indent_occurrences_more_than_once(stat, y)
          or
          (y == count and stat[y] > 1)
        )
      then
        score = 0
        break
      end
    end
    if score > best_score then
      best_indent = indent
      best_score = score
    end
    if score > 0 then
      break
    end
  end
  return best_score > 0 and best_indent or nil, best_score
end


local function escape_comment_tokens(token)
  local special_chars = "*-%[].()+?^$"
  local escaped = ""
  for x=1, token:len() do
    local found = false
    for y=1, special_chars:len() do
      if token:sub(x, x) == special_chars:sub(y, y) then
        escaped = escaped .. "%" .. token:sub(x, x)
        found = true
        break
      end
    end
    if not found then
      escaped = escaped .. token:sub(x, x)
    end
  end
  return escaped
end


local function get_comment_patterns(syntax, _loop)
  _loop = _loop or 1
  if _loop > 5 then return end
  if comments_cache[syntax] then
    if #comments_cache[syntax] > 0 then
      return comments_cache[syntax]
    else
      return nil
    end
  end
  local comments = {}
  for idx=1, #syntax.patterns do
    local pattern = syntax.patterns[idx]
    local startp = ""
    if
      type(pattern.type) == "string"
      and
      (pattern.type == "comment" or pattern.type == "string")
    then
      local not_is_string = pattern.type ~= "string"
      if pattern.pattern then
        startp = type(pattern.pattern) == "table"
          and pattern.pattern[1] or pattern.pattern
        if not_is_string and startp:sub(1, 1) ~= "^" then
          startp = "^%s*" .. startp
        elseif not_is_string then
          startp = "^%s*" .. startp:sub(2, startp:len())
        end
        if type(pattern.pattern) == "table" then
          table.insert(comments, {"p", startp, pattern.pattern[2]})
        elseif not_is_string then
          table.insert(comments, {"p", startp})
        end
      elseif pattern.regex then
        startp = type(pattern.regex) == "table"
          and pattern.regex[1] or pattern.regex
        if not_is_string and startp:sub(1, 1) ~= "^" then
          startp = "^\\s*" .. startp
        elseif not_is_string then
          startp = "^\\s*" .. startp:sub(2, startp:len())
        end
        if type(pattern.regex) == "table" then
          table.insert(comments, {
            "r", regex.compile(startp), regex.compile(pattern.regex[2]), r=startp
          })
        elseif not_is_string then
          table.insert(comments, {"r", regex.compile(startp), r=startp})
        end
      end
    elseif pattern.syntax then
      local subsyntax = type(pattern.syntax) == 'table' and pattern.syntax
        or core_syntax.get("file"..pattern.syntax, "")
      local sub_comments = get_comment_patterns(subsyntax, _loop + 1)
      if sub_comments then
        for s=1, #sub_comments do
          table.insert(comments, sub_comments[s])
        end
      end
    end
  end
  if #comments == 0 then
    local single_line_comment = syntax.comment
      and escape_comment_tokens(syntax.comment) or nil
    local block_comment = nil
    if syntax.block_comment then
      block_comment = {
        escape_comment_tokens(syntax.block_comment[1]),
        escape_comment_tokens(syntax.block_comment[2])
      }
    end
    if single_line_comment then
      table.insert(comments, {"p", "^%s*" .. single_line_comment})
    end
    if block_comment then
      table.insert(comments, {"p", "^%s*" .. block_comment[1], block_comment[2]})
    end
  end
  -- Put comments first and strings last
  table.sort(comments, function(c1, c2)
    local comment1, comment2 = false, false
    if
      (c1[1] == "p" and string.find(c1[2], "^%s*", 1, true))
      or
      (c1[1] == "r" and string.find(c1["r"], "^\\s*", 1, true))
    then
      comment1 = true
    end
    if
      (c2[1] == "p" and string.find(c2[2], "^%s*", 1, true))
      or
      (c2[1] == "r" and string.find(c2["r"], "^\\s*", 1, true))
    then
      comment2 = true
    end
    return comment1 and not comment2
  end)
  comments_cache[syntax] = comments
  if #comments > 0 then
    return comments
  end
  return nil
end


local function get_non_empty_lines(syntax, lines)
  return coroutine.wrap(function()
    local comments = get_comment_patterns(syntax)

    local i = 0
    local end_regex = nil
    local end_pattern = nil
    local inside_comment = false
    for _, line in ipairs(lines) do
      if line:gsub("^%s+", "") ~= "" then
        local is_comment = false
        if comments then
          if not inside_comment then
            for c=1, #comments do
              local comment = comments[c]
              if comment[1] == "p" then
                if comment[3] then
                  local start, ending = line:find(comment[2])
                  if start then
                    if not line:find(comment[3], ending+1) then
                      is_comment = true
                      inside_comment = true
                      end_pattern = comment[3]
                    end
                    break
                  end
                elseif line:find(comment[2]) then
                  is_comment = true
                  break
                end
              else
                if comment[3] then
                  local start, ending = regex.find_offsets(
                    comment[2], line, 1, regex.ANCHORED
                  )
                  if start then
                    if not regex.find_offsets(
                        comment[3], line, ending+1, regex.ANCHORED
                      )
                    then
                      is_comment = true
                      inside_comment = true
                      end_regex = comment[3]
                    end
                    break
                  end
                elseif regex.find_offsets(comment[2], line, 1, regex.ANCHORED) then
                  is_comment = true
                  break
                end
              end
            end
          elseif end_pattern and line:find(end_pattern) then
            is_comment = true
            inside_comment = false
            end_pattern = nil
          elseif end_regex and regex.find_offsets(end_regex, line) then
            is_comment = true
            inside_comment = false
            end_regex = nil
          end
        end
        if
          not is_comment
          and
          not inside_comment
        then
          i = i + 1
          coroutine.yield(i, line)
        end
      end
    end
  end)
end


local function detect_indent_stat(doc)
  local stat = {}
  local tab_count = 0
  local runs = 1
  local max_lines = auto_detect_max_lines
  for i, text in get_non_empty_lines(doc.syntax, doc.lines) do
    local spaces = text:match("^ +")
    if spaces then table.insert(stat, spaces:len()) end
    local tabs = text:match("^\t+")
    if tabs then tab_count = tab_count + 1 end
    -- if nothing found for first lines try at least 4 more times
    if i == max_lines and runs < 5 and #stat == 0 and tab_count == 0 then
      max_lines = max_lines + auto_detect_max_lines
      runs = runs + 1
    -- Stop parsing when files is very long. Not needed for euristic determination.
    elseif i > max_lines then break end
  end
  local indent, score = optimal_indent_from_stat(stat)
  if tab_count > score then
    return "hard", config.indent_size, tab_count
  else
    return "soft", indent or config.indent_size, score or 0
  end
end


local function update_cache(doc)
  local type, size, score = detect_indent_stat(doc)
  local score_threshold = 2
  if score < score_threshold then
    -- use default values
    type = config.tab_type
    size = config.indent_size
  end
  cache[doc] = { type = type, size = size, confirmed = (score >= score_threshold) }
  doc.indent_info = cache[doc]
end

-- Override DocView to ensure we only apply detectindent to visible doc views.
local docview_new = DocView.new
function DocView:new(...)
  docview_new(self, ...)
  self.init_detectindent = true
end

local docview_draw = DocView.draw
function DocView:draw(...)
  docview_draw(self, ...)
  if self.init_detectindent then
    -- perform detection only to ui loaded documents
    if #core.get_views_referencing_doc(self.doc) > 0 then
      local type, size, confirmed = self.doc:get_indent_info()
      if not confirmed then
        update_cache(self.doc)
      else
        cache[self.doc] = { type = type, size = size, confirmed = confirmed }
      end
    end
    self.init_detectindent = nil
  end
end

local clean = Doc.clean
function Doc:clean(...)
  clean(self, ...)
  if cache[self] then
    local _, _, confirmed = self:get_indent_info()
    if not confirmed then
      update_cache(self)
    end
  end
end

local on_close = Doc.on_close
function Doc:on_close()
  on_close(self)
  if cache[self] then cache[self] = nil end
end


local function set_indent_type(doc, type)
  local _, indent_size = doc:get_indent_info()
  cache[doc] = {
    type = type,
    size = indent_size,
    confirmed = true
  }
  doc.indent_info = cache[doc]
end

local function set_indent_type_command(dv)
  core.command_view:enter("Specify indent style for this file", {
    submit = function(value)
      local doc = dv.doc
      value = value:lower()
      set_indent_type(doc, value == "tabs" and "hard" or "soft")
    end,
    suggest = function(text)
      return common.fuzzy_match({"tabs", "spaces"}, text)
    end,
    validate = function(text)
      local t = text:lower()
      return t == "tabs" or t == "spaces"
    end
  })
end


local function set_indent_size(doc, size)
  local indent_type = doc:get_indent_info()
  cache[doc] = {
    type = indent_type,
    size = size,
    confirmed = true
  }
  doc.indent_info = cache[doc]
end

local function set_indent_size_command(dv)
  core.command_view:enter("Specify indent size for current file", {
    submit = function(value)
      value = math.floor(tonumber(value))
      local doc = dv.doc
      set_indent_size(doc, value)
    end,
    validate = function(value)
      value = tonumber(value)
      return value ~= nil and value >= 1
    end
  })
end


command.add("core.docview", {
  ["indent:set-file-indent-type"] = set_indent_type_command,
  ["indent:set-file-indent-size"] = set_indent_size_command
})

command.add(
  function()
    return core.active_view:is(DocView)
      and cache[core.active_view.doc]
      and cache[core.active_view.doc].type == "soft"
  end, {
  ["indent:switch-file-to-tabs-indentation"] = function()
    set_indent_type(core.active_view.doc, "hard")
  end
})

command.add(
  function()
    return core.active_view:is(DocView)
      and cache[core.active_view.doc]
      and cache[core.active_view.doc].type == "hard"
  end, {
  ["indent:switch-file-to-spaces-indentation"] = function()
    set_indent_type(core.active_view.doc, "soft")
  end
})