diff options
| author | Guldoman <giulio.lettieri@gmail.com> | 2021-12-11 03:43:33 +0100 |
|---|---|---|
| committer | Francesco Abbate <francesco.bbt@gmail.com> | 2021-12-20 12:04:20 +0100 |
| commit | 29318be9c71e1be290e7507e9f8b1c9445aad1b0 (patch) | |
| tree | d0a348684ebfe644adfb283d9cac9400227a6d8d | |
| parent | 37c00c877a5e21827b00c8f134da7ba7dc507abd (diff) | |
| download | lite-xl-29318be9c71e1be290e7507e9f8b1c9445aad1b0.tar.gz lite-xl-29318be9c71e1be290e7507e9f8b1c9445aad1b0.zip | |
Consume unmatched character correctly
We must consume the whole UTF-8 character, not just a single byte.
| -rw-r--r-- | data/core/tokenizer.lua | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/data/core/tokenizer.lua b/data/core/tokenizer.lua index d95baeb1..57c17a0b 100644 --- a/data/core/tokenizer.lua +++ b/data/core/tokenizer.lua @@ -237,8 +237,13 @@ function tokenizer.tokenize(incoming_syntax, text, state) -- consume character if we didn't match if not matched then - push_token(res, "normal", text:sub(i, i)) - i = i + 1 + local n = 0 + -- reach the next character + while text:byte(i + n + 1) and common.is_utf8_cont(text, i + n + 1) do + n = n + 1 + end + push_token(res, "normal", text:sub(i, i + n)) + i = i + n + 1 end end |
