Consume unmatched character correctly

We must consume the whole UTF-8 character, not just a single byte.
author: Guldoman <giulio.lettieri@gmail.com> 2021-12-11 03:43:33 +0100
committer: Francesco Abbate <francesco.bbt@gmail.com> 2021-12-20 12:04:20 +0100
commit: 29318be9c71e1be290e7507e9f8b1c9445aad1b0 (patch)
tree: d0a348684ebfe644adfb283d9cac9400227a6d8d
parent: 37c00c877a5e21827b00c8f134da7ba7dc507abd (diff)
download: lite-xl-29318be9c71e1be290e7507e9f8b1c9445aad1b0.tar.gz
lite-xl-29318be9c71e1be290e7507e9f8b1c9445aad1b0.zip
1 files changed, 7 insertions, 2 deletions
diff --git a/data/core/tokenizer.lua b/data/core/tokenizer.lua
index d95baeb1..57c17a0b 100644
--- a/data/core/tokenizer.lua
+++ b/data/core/tokenizer.lua
@@ -237,8 +237,13 @@ function tokenizer.tokenize(incoming_syntax, text, state)
 
     -- consume character if we didn't match
     if not matched then
-      push_token(res, "normal", text:sub(i, i))
-      i = i + 1
+      local n = 0
+      -- reach the next character
+      while text:byte(i + n + 1) and common.is_utf8_cont(text, i + n + 1) do
+        n = n + 1
+      end
+      push_token(res, "normal", text:sub(i, i + n))
+      i = i + n + 1
     end
   end
author	Guldoman <giulio.lettieri@gmail.com>	2021-12-11 03:43:33 +0100
committer	Francesco Abbate <francesco.bbt@gmail.com>	2021-12-20 12:04:20 +0100
commit	29318be9c71e1be290e7507e9f8b1c9445aad1b0 (patch)
tree	d0a348684ebfe644adfb283d9cac9400227a6d8d
parent	37c00c877a5e21827b00c8f134da7ba7dc507abd (diff)
download	lite-xl-29318be9c71e1be290e7507e9f8b1c9445aad1b0.tar.gz lite-xl-29318be9c71e1be290e7507e9f8b1c9445aad1b0.zip