aboutsummaryrefslogtreecommitdiff
path: root/src/api/regex.c
blob: 9f6bd3eefab225431f83650f52a7e1cfca61b4c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#include "api.h"

#define PCRE2_CODE_UNIT_WIDTH 8

#include <string.h>
#include <pcre2.h>

static int f_pcre_gc(lua_State* L) {
  lua_rawgeti(L, -1, 1);
  pcre2_code* re = (pcre2_code*)lua_touserdata(L, -1);
  if (re)
    pcre2_code_free(re);
  return 0;
}

static int f_pcre_compile(lua_State *L) {
  size_t len;
  PCRE2_SIZE errorOffset;
  int errorNumber;
  int pattern = PCRE2_UTF;
  const char* str = luaL_checklstring(L, 1, &len);
  if (lua_gettop(L) > 1) {
    const char* options = luaL_checkstring(L, 2);
    if (strstr(options,"i"))
      pattern |= PCRE2_CASELESS;
    if (strstr(options,"m"))
      pattern |= PCRE2_MULTILINE;
    if (strstr(options,"s"))
      pattern |= PCRE2_DOTALL;
  }
  pcre2_code* re = pcre2_compile(
    (PCRE2_SPTR)str,
    len,
    pattern,
    &errorNumber,
    &errorOffset,
    NULL
  );
  if (re) {
    lua_newtable(L);
    lua_pushlightuserdata(L, re);
    lua_rawseti(L, -2, 1);
    luaL_setmetatable(L, "regex");
    return 1;
  }
  PCRE2_UCHAR buffer[256];
  pcre2_get_error_message(errorNumber, buffer, sizeof(buffer));
  lua_pushnil(L);
  char message[1024];
  len = snprintf(message, sizeof(message), "regex compilation failed at offset %d: %s", (int)errorOffset, buffer);
  lua_pushlstring(L, message, len);
  return 2;
}

// Takes string, compiled regex, returns list of indices of matched groups
// (including the whole match), if a match was found.
static int f_pcre_match(lua_State *L) {
  size_t len, offset = 1, opts = 0;
  luaL_checktype(L, 1, LUA_TTABLE);
  const char* str = luaL_checklstring(L, 2, &len);
  if (lua_gettop(L) > 2)
    offset = luaL_checknumber(L, 3);
  if (lua_gettop(L) > 3)
    opts = luaL_checknumber(L, 4);
  lua_rawgeti(L, 1, 1);
  pcre2_code* re = (pcre2_code*)lua_touserdata(L, -1);
  pcre2_match_data* md = pcre2_match_data_create_from_pattern(re, NULL);
  int rc = pcre2_match(re, (PCRE2_SPTR)str, len, offset - 1, opts, md, NULL);
  if (rc < 0) {
    pcre2_match_data_free(md);
    if (rc != PCRE2_ERROR_NOMATCH) {
      PCRE2_UCHAR buffer[120];
      pcre2_get_error_message(rc, buffer, sizeof(buffer));
      luaL_error(L, "regex matching error %d: %s", rc, buffer);
    }
    return 0;
  }
  PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(md);
  if (ovector[0] > ovector[1]) {
    /* We must guard against patterns such as /(?=.\K)/ that use \K in an
    assertion  to set the start of a match later than its end. In the editor,
    we just detect this case and give up. */
    luaL_error(L, "regex matching error: \\K was used in an assertion to "
    " set the match start after its end");
    pcre2_match_data_free(md);
    return 0;
  }
  for (int i = 0; i < rc*2; i++)
    lua_pushnumber(L, ovector[i]+1);
  pcre2_match_data_free(md);
  return rc*2;
}

static const luaL_Reg lib[] = {
  { "compile",  f_pcre_compile },
  { "cmatch",   f_pcre_match },
  { "__gc",     f_pcre_gc },
  { NULL,       NULL }
};

int luaopen_regex(lua_State *L) {
  luaL_newlib(L, lib);
  lua_pushliteral(L, "regex");
  lua_setfield(L, -2, "__name");
  lua_pushvalue(L, -1);
  lua_setfield(L, LUA_REGISTRYINDEX, "regex");
  lua_pushnumber(L, PCRE2_ANCHORED);
  lua_setfield(L, -2, "ANCHORED");
  lua_pushnumber(L, PCRE2_ANCHORED) ;
  lua_setfield(L, -2, "ENDANCHORED");
  lua_pushnumber(L, PCRE2_NOTBOL);
  lua_setfield(L, -2, "NOTBOL");
  lua_pushnumber(L, PCRE2_NOTEOL);
  lua_setfield(L, -2, "NOTEOL");
  lua_pushnumber(L, PCRE2_NOTEMPTY);
  lua_setfield(L, -2, "NOTEMPTY");
  lua_pushnumber(L, PCRE2_NOTEMPTY_ATSTART);
  lua_setfield(L, -2, "NOTEMPTY_ATSTART");
  return 1;
}