1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
|
diff --git a/src/Makefile b/src/Makefile
index 43238912..0911d07a 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -500,7 +500,7 @@ LJVM_MODE= elfasm
LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \
- lib_buffer.o
+ lib_buffer.o utf8_wrappers.o
LJLIB_C= $(LJLIB_O:.o=.c)
LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
diff --git a/src/luaconf.h b/src/luaconf.h
index 1cf3a03c..c12a2f39 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -152,4 +152,9 @@
#define luai_apicheck(L, o) { (void)L; }
#endif
+#if defined(LUA_LIB) && (defined(lib_aux_c) || defined(lib_io_c) || \
+ defined(lib_package_c) || defined(lib_os_c))
+#include "utf8_wrappers.h"
+#endif
+
#endif
diff --git a/src/utf8_wrappers.c b/src/utf8_wrappers.c
new file mode 100644
index 00000000..5b9b1f62
--- /dev/null
+++ b/src/utf8_wrappers.c
@@ -0,0 +1,129 @@
+/**
+ * Wrappers to provide Unicode (UTF-8) support on Windows.
+ *
+ * Copyright (c) 2018 Peter Wu <peter@lekensteyn.nl>
+ * SPDX-License-Identifier: (GPL-2.0-or-later OR MIT)
+ */
+
+#ifdef _WIN32
+#include <windows.h> /* for MultiByteToWideChar */
+#include <wchar.h> /* for _wrename */
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+// A environment variable has the maximum length of 32767 characters
+// including the terminator.
+#define MAX_ENV_SIZE 32767
+// Set a high limit in case long paths are enabled.
+#define MAX_PATH_SIZE 4096
+#define MAX_MODE_SIZE 128
+// cmd.exe argument length is reportedly limited to 8192.
+#define MAX_CMD_SIZE 8192
+
+static char env_value[MAX_ENV_SIZE];
+
+FILE *fopen_utf8(const char *pathname, const char *mode) {
+ wchar_t pathname_w[MAX_PATH_SIZE];
+ wchar_t mode_w[MAX_MODE_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pathname, -1, pathname_w, MAX_PATH_SIZE) ||
+ !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mode, -1, mode_w, MAX_MODE_SIZE)) {
+ errno = EINVAL;
+ return NULL;
+ }
+ return _wfopen(pathname_w, mode_w);
+}
+
+FILE *freopen_utf8(const char *pathname, const char *mode, FILE *stream) {
+ wchar_t pathname_w[MAX_PATH_SIZE];
+ wchar_t mode_w[MAX_MODE_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pathname, -1, pathname_w, MAX_PATH_SIZE) ||
+ !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mode, -1, mode_w, MAX_MODE_SIZE)) {
+ // Close stream as documented for the error case.
+ fclose(stream);
+ errno = EINVAL;
+ return NULL;
+ }
+ return _wfreopen(pathname_w, mode_w, stream);
+}
+
+int remove_utf8(const char *pathname) {
+ wchar_t pathname_w[MAX_PATH_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pathname, -1, pathname_w, MAX_PATH_SIZE)) {
+ errno = EINVAL;
+ return -1;
+ }
+ return _wremove(pathname_w);
+}
+
+int rename_utf8(const char *oldpath, const char *newpath) {
+ wchar_t oldpath_w[MAX_PATH_SIZE];
+ wchar_t newpath_w[MAX_PATH_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, oldpath, -1, oldpath_w, MAX_PATH_SIZE) ||
+ !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, newpath, -1, newpath_w, MAX_PATH_SIZE)) {
+ errno = EINVAL;
+ return -1;
+ }
+ return _wrename(oldpath_w, newpath_w);
+}
+
+FILE *popen_utf8(const char *command, const char *mode) {
+ wchar_t command_w[MAX_CMD_SIZE];
+ wchar_t mode_w[MAX_MODE_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, command, -1, command_w, MAX_CMD_SIZE) ||
+ !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mode, -1, mode_w, MAX_MODE_SIZE)) {
+ errno = EINVAL;
+ return NULL;
+ }
+ return _wpopen(command_w, mode_w);
+}
+
+int system_utf8(const char *command) {
+ wchar_t command_w[MAX_CMD_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, command, -1, command_w, MAX_CMD_SIZE)) {
+ errno = EINVAL;
+ return -1;
+ }
+ return _wsystem(command_w);
+}
+
+DWORD GetModuleFileNameA_utf8(HMODULE hModule, LPSTR lpFilename, DWORD nSize) {
+ wchar_t filename_w[MAX_PATH + 1];
+ if (!GetModuleFileNameW(hModule, filename_w, MAX_PATH + 1)) {
+ return 0;
+ }
+ return WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, filename_w, -1, lpFilename, nSize, NULL, NULL);
+}
+
+HMODULE LoadLibraryExA_utf8(LPCSTR lpLibFileName, HANDLE hFile, DWORD dwFlags) {
+ wchar_t pathname_w[MAX_PATH_SIZE];
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, lpLibFileName, -1, pathname_w, MAX_PATH_SIZE)) {
+ SetLastError(ERROR_INVALID_NAME);
+ return NULL;
+ }
+ return LoadLibraryExW(pathname_w, hFile, dwFlags);
+}
+
+char* getenv_utf8(const char *varname) {
+ /** This implementation is not thread safe.
+ * The string is only valid until the next call to getenv.
+ * This behavior is allowed per POSIX.1-2017 where it was said that:
+ * > The returned string pointer might be invalidated or the string content might be overwritten by a subsequent call to getenv(), setenv(), unsetenv(), or (if supported) putenv() but they shall not be affected by a call to any other function in this volume of POSIX.1-2017.
+ * > The returned string pointer might also be invalidated if the calling thread is terminated.
+ * > The getenv() function need not be thread-safe.
+ */
+ wchar_t *value_w;
+ wchar_t varname_w[MAX_ENV_SIZE];
+
+ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, varname, -1, varname_w, MAX_ENV_SIZE))
+ return NULL;
+ value_w = _wgetenv((const wchar_t *) varname_w);
+ if (!value_w)
+ return NULL;
+
+ if (!WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, value_w, -1, env_value, MAX_ENV_SIZE, NULL, NULL))
+ return NULL;
+
+ return env_value;
+}
+#endif
diff --git a/src/utf8_wrappers.h b/src/utf8_wrappers.h
new file mode 100644
index 00000000..51ee1e2e
--- /dev/null
+++ b/src/utf8_wrappers.h
@@ -0,0 +1,57 @@
+/**
+ * Wrappers to provide Unicode (UTF-8) support on Windows.
+ *
+ * Copyright (c) 2018 Peter Wu <peter@lekensteyn.nl>
+ * SPDX-License-Identifier: (GPL-2.0-or-later OR MIT)
+ */
+
+#ifdef _WIN32
+
+#ifndef UTF8_WRAPPERS_H
+#define UTF8_WRAPPERS_H
+
+#if defined(lib_package_c) || defined(lib_aux_c) || defined(lib_io_c)
+#include <stdio.h> /* for lib_package_c */
+FILE *fopen_utf8(const char *pathname, const char *mode);
+#define fopen fopen_utf8
+#endif
+
+#ifdef lib_aux_c
+#include <stdio.h>
+FILE *freopen_utf8(const char *pathname, const char *mode, FILE *stream);
+#define freopen freopen_utf8
+#endif
+
+#ifdef lib_io_c
+FILE *popen_utf8(const char *command, const char *mode);
+#define _popen popen_utf8
+#endif
+
+#ifdef lib_os_c
+#include <stdio.h>
+#ifndef _MSC_VER
+ #define UTF8CRTIMP
+#else
+ #include <windows.h>
+ #define UTF8CRTIMP _ACRTIMP
+#endif
+UTF8CRTIMP int remove_utf8(const char *pathname);
+UTF8CRTIMP int rename_utf8(const char *oldpath, const char *newpath);
+UTF8CRTIMP int system_utf8(const char *command);
+UTF8CRTIMP char *getenv_utf8(const char *varname);
+#define remove remove_utf8
+#define rename rename_utf8
+#define system system_utf8
+#define getenv getenv_utf8
+#endif
+
+#ifdef lib_package_c
+#include <windows.h>
+DWORD GetModuleFileNameA_utf8(HMODULE hModule, LPSTR lpFilename, DWORD nSize);
+HMODULE LoadLibraryExA_utf8(LPCSTR lpLibFileName, HANDLE hFile, DWORD dwFlags);
+#define GetModuleFileNameA GetModuleFileNameA_utf8
+#define LoadLibraryExA LoadLibraryExA_utf8
+#endif
+
+#endif /* UTF8_WRAPPERS_H */
+#endif /* _WIN32 */
|