diff options
Diffstat (limited to 'lib')
120 files changed, 3385 insertions, 7543 deletions
diff --git a/lib/libc/include/wasm-wasi-musl/__header_dirent.h b/lib/libc/include/wasm-wasi-musl/__header_dirent.h index 9779065ca7..ccf3deffd9 100644 --- a/lib/libc/include/wasm-wasi-musl/__header_dirent.h +++ b/lib/libc/include/wasm-wasi-musl/__header_dirent.h @@ -11,6 +11,12 @@ #define DT_REG __WASI_FILETYPE_REGULAR_FILE #define DT_UNKNOWN __WASI_FILETYPE_UNKNOWN +#define IFTODT(x) (__wasilibc_iftodt(x)) +#define DTTOIF(x) (__wasilibc_dttoif(x)) + +int __wasilibc_iftodt(int x); +int __wasilibc_dttoif(int x); + #include <__struct_dirent.h> #include <__typedef_DIR.h> diff --git a/lib/libc/include/wasm-wasi-musl/__header_time.h b/lib/libc/include/wasm-wasi-musl/__header_time.h index 2d56b1e76f..2a2ff91d8a 100644 --- a/lib/libc/include/wasm-wasi-musl/__header_time.h +++ b/lib/libc/include/wasm-wasi-musl/__header_time.h @@ -16,12 +16,8 @@ extern const struct __clockid _CLOCK_MONOTONIC; #define CLOCK_MONOTONIC (&_CLOCK_MONOTONIC) -extern const struct __clockid _CLOCK_PROCESS_CPUTIME_ID; -#define CLOCK_PROCESS_CPUTIME_ID (&_CLOCK_PROCESS_CPUTIME_ID) extern const struct __clockid _CLOCK_REALTIME; #define CLOCK_REALTIME (&_CLOCK_REALTIME) -extern const struct __clockid _CLOCK_THREAD_CPUTIME_ID; -#define CLOCK_THREAD_CPUTIME_ID (&_CLOCK_THREAD_CPUTIME_ID) /* * TIME_UTC is the only standardized time base value. diff --git a/lib/libc/include/wasm-wasi-musl/__macro_PAGESIZE.h b/lib/libc/include/wasm-wasi-musl/__macro_PAGESIZE.h index 0243c98c9a..d89222050c 100644 --- a/lib/libc/include/wasm-wasi-musl/__macro_PAGESIZE.h +++ b/lib/libc/include/wasm-wasi-musl/__macro_PAGESIZE.h @@ -5,6 +5,11 @@ * The page size in WebAssembly is fixed at 64 KiB. If this ever changes, * it's expected that applications will need to opt in, so we can change * this. + * + * If this ever needs to be a value outside the range of an `int`, the + * `getpagesize` function which returns this value will need special + * consideration. POSIX has deprecated `getpagesize` in favor of + * `sysconf(_SC_PAGESIZE)` which does not have this problem. */ #define PAGESIZE (0x10000) diff --git a/lib/libc/include/wasm-wasi-musl/__struct_sockaddr.h b/lib/libc/include/wasm-wasi-musl/__struct_sockaddr.h index 668fde3e5c..9891b90bea 100644 --- a/lib/libc/include/wasm-wasi-musl/__struct_sockaddr.h +++ b/lib/libc/include/wasm-wasi-musl/__struct_sockaddr.h @@ -7,7 +7,7 @@ #include <__typedef_sa_family_t.h> struct sockaddr { - _Alignas(max_align_t) sa_family_t sa_family; + __attribute__((aligned(__BIGGEST_ALIGNMENT__))) sa_family_t sa_family; char sa_data[0]; }; diff --git a/lib/libc/include/wasm-wasi-musl/ctype.h b/lib/libc/include/wasm-wasi-musl/ctype.h index 7936536f57..32bcef4dab 100644 --- a/lib/libc/include/wasm-wasi-musl/ctype.h +++ b/lib/libc/include/wasm-wasi-musl/ctype.h @@ -64,7 +64,9 @@ int isascii(int); int toascii(int); #define _tolower(a) ((a)|0x20) #define _toupper(a) ((a)&0x5f) +#ifndef __cplusplus #define isascii(a) (0 ? isascii(a) : (unsigned)(a) < 128) +#endif #endif diff --git a/lib/libc/include/wasm-wasi-musl/limits.h b/lib/libc/include/wasm-wasi-musl/limits.h index a78cb2f7fb..2fc0d2a38a 100644 --- a/lib/libc/include/wasm-wasi-musl/limits.h +++ b/lib/libc/include/wasm-wasi-musl/limits.h @@ -70,7 +70,7 @@ #define PTHREAD_STACK_MIN 2048 #define PTHREAD_DESTRUCTOR_ITERATIONS 4 #endif -#ifdef __wasilibc_unmodified_upstream /* WASI has no semaphores */ +#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) #define SEM_VALUE_MAX 0x7fffffff #define SEM_NSEMS_MAX 256 #endif diff --git a/lib/libc/include/wasm-wasi-musl/locale.h b/lib/libc/include/wasm-wasi-musl/locale.h index 228e5026c8..6a62d1f5a7 100644 --- a/lib/libc/include/wasm-wasi-musl/locale.h +++ b/lib/libc/include/wasm-wasi-musl/locale.h @@ -8,7 +8,9 @@ extern "C" { #include <features.h> #ifdef __wasilibc_unmodified_upstream /* Use the compiler's definition of NULL */ -#ifdef __cplusplus +#if __cplusplus >= 201103L +#define NULL nullptr +#elif defined(__cplusplus) #define NULL 0L #else #define NULL ((void*)0) diff --git a/lib/libc/include/wasm-wasi-musl/netinet/in.h b/lib/libc/include/wasm-wasi-musl/netinet/in.h index b09efab018..d9422af2ea 100644 --- a/lib/libc/include/wasm-wasi-musl/netinet/in.h +++ b/lib/libc/include/wasm-wasi-musl/netinet/in.h @@ -60,6 +60,7 @@ struct ipv6_mreq { #define INADDR_BROADCAST ((in_addr_t) 0xffffffff) #define INADDR_NONE ((in_addr_t) 0xffffffff) #define INADDR_LOOPBACK ((in_addr_t) 0x7f000001) +#define INADDR_DUMMY ((in_addr_t) 0xc0000008) #define INADDR_UNSPEC_GROUP ((in_addr_t) 0xe0000000) #define INADDR_ALLHOSTS_GROUP ((in_addr_t) 0xe0000001) diff --git a/lib/libc/include/wasm-wasi-musl/netinet/tcp.h b/lib/libc/include/wasm-wasi-musl/netinet/tcp.h index b7b997f5fd..fad1d84494 100644 --- a/lib/libc/include/wasm-wasi-musl/netinet/tcp.h +++ b/lib/libc/include/wasm-wasi-musl/netinet/tcp.h @@ -80,6 +80,8 @@ enum { TCP_NLA_SRTT, TCP_NLA_TIMEOUT_REHASH, TCP_NLA_BYTES_NOTSENT, + TCP_NLA_EDT, + TCP_NLA_TTL, }; #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) @@ -281,12 +283,21 @@ struct tcp_repair_window { uint32_t rcv_wup; }; +#define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1 + struct tcp_zerocopy_receive { uint64_t address; uint32_t length; uint32_t recv_skip_hint; uint32_t inq; int32_t err; + uint64_t copybuf_address; + int32_t copybuf_len; + uint32_t flags; + uint64_t msg_control; + uint64_t msg_controllen; + uint32_t msg_flags; + uint32_t reserved; }; #endif diff --git a/lib/libc/include/wasm-wasi-musl/signal.h b/lib/libc/include/wasm-wasi-musl/signal.h index ae74966b96..75b5e55284 100644 --- a/lib/libc/include/wasm-wasi-musl/signal.h +++ b/lib/libc/include/wasm-wasi-musl/signal.h @@ -82,6 +82,8 @@ typedef struct sigaltstack stack_t; #define SEGV_ACCERR 2 #define SEGV_BNDERR 3 #define SEGV_PKUERR 4 +#define SEGV_MTEAERR 8 +#define SEGV_MTESERR 9 #define BUS_ADRALN 1 #define BUS_ADRERR 2 @@ -183,6 +185,9 @@ struct sigaction { #define sa_handler __sa_handler.sa_handler #define sa_sigaction __sa_handler.sa_sigaction +#define SA_UNSUPPORTED 0x00000400 +#define SA_EXPOSE_TAGBITS 0x00000800 + struct sigevent { union sigval sigev_value; int sigev_signo; @@ -277,6 +282,9 @@ void (*sigset(int, void (*)(int)))(int); #if defined(_BSD_SOURCE) || defined(_GNU_SOURCE) #define NSIG _NSIG typedef void (*sig_t)(int); + +#define SYS_SECCOMP 1 +#define SYS_USER_DISPATCH 2 #endif #ifdef _GNU_SOURCE diff --git a/lib/libc/include/wasm-wasi-musl/stdc-predef.h b/lib/libc/include/wasm-wasi-musl/stdc-predef.h index f8cd4b8911..af1a27998f 100644 --- a/lib/libc/include/wasm-wasi-musl/stdc-predef.h +++ b/lib/libc/include/wasm-wasi-musl/stdc-predef.h @@ -7,4 +7,7 @@ #define __STDC_IEC_559__ 1 #endif +#define __STDC_UTF_16__ 1 +#define __STDC_UTF_32__ 1 + #endif diff --git a/lib/libc/include/wasm-wasi-musl/stdio.h b/lib/libc/include/wasm-wasi-musl/stdio.h index 0c3aff9c2c..d63d739f0f 100644 --- a/lib/libc/include/wasm-wasi-musl/stdio.h +++ b/lib/libc/include/wasm-wasi-musl/stdio.h @@ -28,7 +28,9 @@ extern "C" { #include <bits/alltypes.h> #ifdef __wasilibc_unmodified_upstream /* Use the compiler's definition of NULL */ -#ifdef __cplusplus +#if __cplusplus >= 201103L +#define NULL nullptr +#elif defined(__cplusplus) #define NULL 0L #else #define NULL ((void*)0) diff --git a/lib/libc/include/wasm-wasi-musl/stdlib.h b/lib/libc/include/wasm-wasi-musl/stdlib.h index e635275d68..1bcb9ab0aa 100644 --- a/lib/libc/include/wasm-wasi-musl/stdlib.h +++ b/lib/libc/include/wasm-wasi-musl/stdlib.h @@ -13,7 +13,9 @@ extern "C" { #include <features.h> #ifdef __wasilibc_unmodified_upstream /* Use the compiler's definition of NULL */ -#ifdef __cplusplus +#if __cplusplus >= 201103L +#define NULL nullptr +#elif defined(__cplusplus) #define NULL 0L #else #define NULL ((void*)0) @@ -171,6 +173,7 @@ int clearenv(void); #define WCOREDUMP(s) ((s) & 0x80) #define WIFCONTINUED(s) ((s) == 0xffff) void *reallocarray (void *, size_t, size_t); +void qsort_r (void *, size_t, size_t, int (*)(const void *, const void *, void *), void *); #endif #endif diff --git a/lib/libc/include/wasm-wasi-musl/string.h b/lib/libc/include/wasm-wasi-musl/string.h index c2d464c889..dc47b7aee0 100644 --- a/lib/libc/include/wasm-wasi-musl/string.h +++ b/lib/libc/include/wasm-wasi-musl/string.h @@ -12,7 +12,9 @@ extern "C" { #include <features.h> #ifdef __wasilibc_unmodified_upstream /* Use the compiler's definition of NULL */ -#ifdef __cplusplus +#if __cplusplus >= 201103L +#define NULL nullptr +#elif defined(__cplusplus) #define NULL 0L #else #define NULL ((void*)0) diff --git a/lib/libc/include/wasm-wasi-musl/sys/mman.h b/lib/libc/include/wasm-wasi-musl/sys/mman.h index 80615c5d57..335ba2d714 100644 --- a/lib/libc/include/wasm-wasi-musl/sys/mman.h +++ b/lib/libc/include/wasm-wasi-musl/sys/mman.h @@ -44,6 +44,7 @@ extern "C" { #define MAP_HUGE_SHIFT 26 #define MAP_HUGE_MASK 0x3f +#define MAP_HUGE_16KB (14 << 26) #define MAP_HUGE_64KB (16 << 26) #define MAP_HUGE_512KB (19 << 26) #define MAP_HUGE_1MB (20 << 26) diff --git a/lib/libc/include/wasm-wasi-musl/sys/socket.h b/lib/libc/include/wasm-wasi-musl/sys/socket.h index cea24cfd47..4d574c6627 100644 --- a/lib/libc/include/wasm-wasi-musl/sys/socket.h +++ b/lib/libc/include/wasm-wasi-musl/sys/socket.h @@ -298,6 +298,8 @@ struct linger { #define SCM_TXTIME SO_TXTIME #define SO_BINDTOIFINDEX 62 #define SO_DETACH_REUSEPORT_BPF 68 +#define SO_PREFER_BUSY_POLL 69 +#define SO_BUSY_POLL_BUDGET 70 #ifndef SOL_SOCKET #define SOL_SOCKET 1 @@ -404,9 +406,10 @@ int shutdown (int, int); int bind (int, const struct sockaddr *, socklen_t); int connect (int, const struct sockaddr *, socklen_t); int listen (int, int); +#endif + int accept (int, struct sockaddr *__restrict, socklen_t *__restrict); int accept4(int, struct sockaddr *__restrict, socklen_t *__restrict, int); -#endif #ifdef __wasilibc_unmodified_upstream /* WASI has no getsockname/getpeername */ int getsockname (int, struct sockaddr *__restrict, socklen_t *__restrict); diff --git a/lib/libc/include/wasm-wasi-musl/sys/time.h b/lib/libc/include/wasm-wasi-musl/sys/time.h index 389cdcbb02..0f736551f6 100644 --- a/lib/libc/include/wasm-wasi-musl/sys/time.h +++ b/lib/libc/include/wasm-wasi-musl/sys/time.h @@ -23,9 +23,7 @@ struct itimerval { int getitimer (int, struct itimerval *); int setitimer (int, const struct itimerval *__restrict, struct itimerval *__restrict); #endif -#ifdef __wasilibc_unmodified_upstream /* WASI libc doesn't build the legacy functions */ int utimes (const char *, const struct timeval [2]); -#endif #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE) struct timezone { @@ -34,7 +32,9 @@ struct timezone { }; #ifdef __wasilibc_unmodified_upstream /* WASI libc doesn't build the legacy functions */ int futimes(int, const struct timeval [2]); +#endif int futimesat(int, const char *, const struct timeval [2]); +#ifdef __wasilibc_unmodified_upstream /* WASI libc doesn't build the legacy functions */ int lutimes(const char *, const struct timeval [2]); #endif #ifdef __wasilibc_unmodified_upstream /* WASI has no way to set the time */ diff --git a/lib/libc/include/wasm-wasi-musl/time.h b/lib/libc/include/wasm-wasi-musl/time.h index 1fb87689d1..f53414878a 100644 --- a/lib/libc/include/wasm-wasi-musl/time.h +++ b/lib/libc/include/wasm-wasi-musl/time.h @@ -8,7 +8,9 @@ extern "C" { #include <features.h> #ifdef __wasilibc_unmodified_upstream /* Use the compiler's definition of NULL */ -#ifdef __cplusplus +#if __cplusplus >= 201103L +#define NULL nullptr +#elif defined(__cplusplus) #define NULL 0L #else #define NULL ((void*)0) diff --git a/lib/libc/include/wasm-wasi-musl/unistd.h b/lib/libc/include/wasm-wasi-musl/unistd.h index 7ca99aeca0..b5cb5c6630 100644 --- a/lib/libc/include/wasm-wasi-musl/unistd.h +++ b/lib/libc/include/wasm-wasi-musl/unistd.h @@ -15,12 +15,16 @@ extern "C" { #define SEEK_SET 0 #define SEEK_CUR 1 #define SEEK_END 2 +#define SEEK_DATA 3 +#define SEEK_HOLE 4 #else #include <__header_unistd.h> #endif #ifdef __wasilibc_unmodified_upstream /* Use the compiler's definition of NULL */ -#ifdef __cplusplus +#if __cplusplus >= 201103L +#define NULL nullptr +#elif defined(__cplusplus) #define NULL 0L #else #define NULL ((void*)0) @@ -240,7 +244,9 @@ void *sbrk(intptr_t); pid_t vfork(void); int vhangup(void); int chroot(const char *); +#endif int getpagesize(void); +#ifdef __wasilibc_unmodified_upstream /* WASI has no processes */ int getdtablesize(void); int sethostname(const char *, size_t); int getdomainname(char *, size_t); diff --git a/lib/libc/include/wasm-wasi-musl/wasi/api.h b/lib/libc/include/wasm-wasi-musl/wasi/api.h index 7a208be51e..1ab76994ed 100644 --- a/lib/libc/include/wasm-wasi-musl/wasi/api.h +++ b/lib/libc/include/wasm-wasi-musl/wasi/api.h @@ -1,13 +1,9 @@ /** - * THIS FILE IS AUTO-GENERATED from the following files: - * wasi_snapshot_preview1.witx - * - * To regenerate this file execute: - * - * cargo run --manifest-path tools/wasi-headers/Cargo.toml generate-libc - * - * Modifications to this file will cause CI to fail, the code generator tool - * must be modified to change this file. + * <wasi/api.h>. This file contains declarations describing the WASI ABI + * as of "snapshot preview1". It was originally auto-generated from + * wasi_snapshot_preview1.witx, however WASI is in the process of + * transitioning to a new IDL and header file generator, and this file + * is temporarily being manually maintained. * * @file * This file describes the [WASI] interface, consisting of functions, types, @@ -662,6 +658,11 @@ typedef uint64_t __wasi_rights_t; #define __WASI_RIGHTS_SOCK_SHUTDOWN ((__wasi_rights_t)(1 << 28)) /** + * The right to invoke `sock_accept`. + */ +#define __WASI_RIGHTS_SOCK_ACCEPT ((__wasi_rights_t)(1 << 29)) + +/** * A file descriptor handle. */ typedef int __wasi_fd_t; @@ -1301,200 +1302,6 @@ _Static_assert(sizeof(__wasi_exitcode_t) == 4, "witx calculated size"); _Static_assert(_Alignof(__wasi_exitcode_t) == 4, "witx calculated align"); /** - * Signal condition. - */ -typedef uint8_t __wasi_signal_t; - -/** - * No signal. Note that POSIX has special semantics for `kill(pid, 0)`, - * so this value is reserved. - */ -#define __WASI_SIGNAL_NONE (UINT8_C(0)) - -/** - * Hangup. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_HUP (UINT8_C(1)) - -/** - * Terminate interrupt signal. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_INT (UINT8_C(2)) - -/** - * Terminal quit signal. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_QUIT (UINT8_C(3)) - -/** - * Illegal instruction. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_ILL (UINT8_C(4)) - -/** - * Trace/breakpoint trap. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_TRAP (UINT8_C(5)) - -/** - * Process abort signal. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_ABRT (UINT8_C(6)) - -/** - * Access to an undefined portion of a memory object. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_BUS (UINT8_C(7)) - -/** - * Erroneous arithmetic operation. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_FPE (UINT8_C(8)) - -/** - * Kill. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_KILL (UINT8_C(9)) - -/** - * User-defined signal 1. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_USR1 (UINT8_C(10)) - -/** - * Invalid memory reference. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_SEGV (UINT8_C(11)) - -/** - * User-defined signal 2. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_USR2 (UINT8_C(12)) - -/** - * Write on a pipe with no one to read it. - * Action: Ignored. - */ -#define __WASI_SIGNAL_PIPE (UINT8_C(13)) - -/** - * Alarm clock. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_ALRM (UINT8_C(14)) - -/** - * Termination signal. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_TERM (UINT8_C(15)) - -/** - * Child process terminated, stopped, or continued. - * Action: Ignored. - */ -#define __WASI_SIGNAL_CHLD (UINT8_C(16)) - -/** - * Continue executing, if stopped. - * Action: Continues executing, if stopped. - */ -#define __WASI_SIGNAL_CONT (UINT8_C(17)) - -/** - * Stop executing. - * Action: Stops executing. - */ -#define __WASI_SIGNAL_STOP (UINT8_C(18)) - -/** - * Terminal stop signal. - * Action: Stops executing. - */ -#define __WASI_SIGNAL_TSTP (UINT8_C(19)) - -/** - * Background process attempting read. - * Action: Stops executing. - */ -#define __WASI_SIGNAL_TTIN (UINT8_C(20)) - -/** - * Background process attempting write. - * Action: Stops executing. - */ -#define __WASI_SIGNAL_TTOU (UINT8_C(21)) - -/** - * High bandwidth data is available at a socket. - * Action: Ignored. - */ -#define __WASI_SIGNAL_URG (UINT8_C(22)) - -/** - * CPU time limit exceeded. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_XCPU (UINT8_C(23)) - -/** - * File size limit exceeded. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_XFSZ (UINT8_C(24)) - -/** - * Virtual timer expired. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_VTALRM (UINT8_C(25)) - -/** - * Profiling timer expired. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_PROF (UINT8_C(26)) - -/** - * Window changed. - * Action: Ignored. - */ -#define __WASI_SIGNAL_WINCH (UINT8_C(27)) - -/** - * I/O possible. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_POLL (UINT8_C(28)) - -/** - * Power failure. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_PWR (UINT8_C(29)) - -/** - * Bad system call. - * Action: Terminates the process. - */ -#define __WASI_SIGNAL_SYS (UINT8_C(30)) - -_Static_assert(sizeof(__wasi_signal_t) == 1, "witx calculated size"); -_Static_assert(_Alignof(__wasi_signal_t) == 1, "witx calculated align"); - -/** * Flags provided to `sock_recv`. */ typedef uint16_t __wasi_riflags_t; @@ -1592,7 +1399,8 @@ _Static_assert(_Alignof(__wasi_prestat_t) == 4, "witx calculated align"); /** * Read command-line argument data. - * The size of the array should match that returned by `args_sizes_get` + * The size of the array should match that returned by `args_sizes_get`. + * Each argument is expected to be `\0` terminated. */ __wasi_errno_t __wasi_args_get( uint8_t * * argv, @@ -1611,6 +1419,7 @@ __wasi_errno_t __wasi_args_sizes_get( /** * Read environment variable data. * The sizes of the buffers should match that returned by `environ_sizes_get`. + * Key/value pairs are expected to be joined with `=`s, and terminated with `\0`s. */ __wasi_errno_t __wasi_environ_get( uint8_t * * environ, @@ -2182,16 +1991,6 @@ _Noreturn void __wasi_proc_exit( __wasi_exitcode_t rval ); /** - * Send a signal to the process of the calling thread. - * Note: This is similar to `raise` in POSIX. - */ -__wasi_errno_t __wasi_proc_raise( - /** - * The signal condition to trigger. - */ - __wasi_signal_t sig -) __attribute__((__warn_unused_result__)); -/** * Temporarily yield execution of the calling thread. * Note: This is similar to `sched_yield` in POSIX. */ @@ -2214,6 +2013,23 @@ __wasi_errno_t __wasi_random_get( __wasi_size_t buf_len ) __attribute__((__warn_unused_result__)); /** + * Accept a new incoming connection. + * Note: This is similar to `accept` in POSIX. + * @return + * New socket connection + */ +__wasi_errno_t __wasi_sock_accept( + /** + * The listening socket. + */ + __wasi_fd_t fd, + /** + * The desired values of the file descriptor flags. + */ + __wasi_fdflags_t flags, + __wasi_fd_t *retptr0 +) __attribute__((__warn_unused_result__)); +/** * Receive a message from a socket. * Note: This is similar to `recv` in POSIX, though it also supports reading * the data into multiple buffers in the manner of `readv`. @@ -2273,6 +2089,25 @@ __wasi_errno_t __wasi_sock_shutdown( ) __attribute__((__warn_unused_result__)); /** @} */ +#ifdef _REENTRANT +/** + * Request a new thread to be created by the host. + * + * The host will create a new instance of the current module sharing its + * memory, find an exported entry function--`wasi_thread_start`--, and call the + * entry function with `start_arg` in the new thread. + * + * @see https://github.com/WebAssembly/wasi-threads/#readme + */ +__wasi_errno_t __wasi_thread_spawn( + /** + * A pointer to an opaque struct to be passed to the module's entry + * function. + */ + void *start_arg +) __attribute__((__warn_unused_result__)); +#endif + #ifdef __cplusplus } #endif diff --git a/lib/libc/include/wasm-wasi-musl/wasi/libc-environ.h b/lib/libc/include/wasm-wasi-musl/wasi/libc-environ.h index b404adda1b..f84ba8e606 100644 --- a/lib/libc/include/wasm-wasi-musl/wasi/libc-environ.h +++ b/lib/libc/include/wasm-wasi-musl/wasi/libc-environ.h @@ -1,6 +1,10 @@ #ifndef __wasi_libc_environ_h #define __wasi_libc_environ_h +/// This header file is a WASI-libc-specific interface, and is not needed by +/// most programs. Most programs should just use the standard `getenv` and +/// related APIs, which take care of all of the details automatically. + #ifdef __cplusplus extern "C" { #endif @@ -12,6 +16,19 @@ void __wasilibc_initialize_environ(void); /// If `__wasilibc_initialize_environ` has not yet been called, call it. void __wasilibc_ensure_environ(void); +/// De-initialize the global environment variable state, so that subsequent +/// calls to `__wasilibc_ensure_environ` call `__wasilibc_initialize_environ`. +void __wasilibc_deinitialize_environ(void); + +/// Call `__wasilibc_initialize_environ` only if `environ` and `_environ` are +/// referenced in the program. +void __wasilibc_maybe_reinitialize_environ_eagerly(void); + +/// Return the value of the `environ` variable. Using `environ` directly +/// requires eager initialization of the environment variables. Using this +/// function instead of `environ` allows initialization to happen lazily. +char **__wasilibc_get_environ(void); + #ifdef __cplusplus } #endif diff --git a/lib/libc/include/wasm-wasi-musl/wasi/libc-find-relpath.h b/lib/libc/include/wasm-wasi-musl/wasi/libc-find-relpath.h index 445613f5aa..32dbb03510 100644 --- a/lib/libc/include/wasm-wasi-musl/wasi/libc-find-relpath.h +++ b/lib/libc/include/wasm-wasi-musl/wasi/libc-find-relpath.h @@ -70,7 +70,7 @@ int __wasilibc_find_relpath_alloc( char **relative, size_t *relative_len, int can_realloc -) __attribute__((weak)); +) __attribute__((__weak__)); #ifdef __cplusplus } diff --git a/lib/libc/include/wasm-wasi-musl/wchar.h b/lib/libc/include/wasm-wasi-musl/wchar.h index 4f45539f16..06b088aa9b 100644 --- a/lib/libc/include/wasm-wasi-musl/wchar.h +++ b/lib/libc/include/wasm-wasi-musl/wchar.h @@ -41,7 +41,9 @@ extern "C" { #endif #ifdef __wasilibc_unmodified_upstream /* Use the compiler's definition of NULL */ -#ifdef __cplusplus +#if __cplusplus >= 201103L +#define NULL nullptr +#elif defined(__cplusplus) #define NULL 0L #else #define NULL ((void*)0) diff --git a/lib/libc/wasi/LICENSE b/lib/libc/wasi/LICENSE index edf56c0db1..b0bfe6c10f 100644 --- a/lib/libc/wasi/LICENSE +++ b/lib/libc/wasi/LICENSE @@ -7,6 +7,7 @@ Portions of this software are derived from third-party works covered by their own licenses: dlmalloc/ - CC0; see the notice in malloc.c for details +emmalloc/ - MIT; see the notice in emmalloc.c for details libc-bottom-half/cloudlibc/ - BSD-2-Clause; see the LICENSE file for details libc-top-half/musl/ - MIT; see the COPYRIGHT file for details diff --git a/lib/libc/wasi/dlmalloc/include/unistd.h b/lib/libc/wasi/dlmalloc/include/unistd.h deleted file mode 100644 index 7c4f1faa90..0000000000 --- a/lib/libc/wasi/dlmalloc/include/unistd.h +++ /dev/null @@ -1,10 +0,0 @@ -/* Stub include file to support dlmalloc. */ - -#include <stdint.h> -#include <__macro_PAGESIZE.h> - -#define sysconf(name) PAGESIZE -#define _SC_PAGESIZE - -/* Declare sbrk. */ -void *sbrk(intptr_t increment) __attribute__((__warn_unused_result__)); diff --git a/lib/libc/wasi/dlmalloc/src/dlmalloc.c b/lib/libc/wasi/dlmalloc/src/dlmalloc.c deleted file mode 100644 index 331536b44f..0000000000 --- a/lib/libc/wasi/dlmalloc/src/dlmalloc.c +++ /dev/null @@ -1,98 +0,0 @@ -// This file is a wrapper around malloc.c, which is the upstream source file. -// It sets configuration flags and controls which symbols are exported. - -#include <stddef.h> -#include <malloc.h> - -// Define configuration macros for dlmalloc. - -// WebAssembly doesn't have mmap-style memory allocation. -#define HAVE_MMAP 0 - -// WebAssembly doesn't support shrinking linear memory. -#define MORECORE_CANNOT_TRIM 1 - -// Disable sanity checks to reduce code size. -#define ABORT __builtin_unreachable() - -// If threads are enabled, enable support for threads. -#ifdef _REENTRANT -#define USE_LOCKS 1 -#endif - -// Make malloc deterministic. -#define LACKS_TIME_H 1 - -// Disable malloc statistics generation to reduce code size. -#define NO_MALLINFO 1 -#define NO_MALLOC_STATS 1 - -// Align malloc regions to 16, to avoid unaligned SIMD accesses. -#define MALLOC_ALIGNMENT 16 - -// Declare errno values used by dlmalloc. We define them like this to avoid -// putting specific errno values in the ABI. -extern const int __ENOMEM; -#define ENOMEM __ENOMEM -extern const int __EINVAL; -#define EINVAL __EINVAL - -// Define USE_DL_PREFIX so that we leave dlmalloc's names prefixed with 'dl'. -// We define them as "static", and we wrap them with public names below. This -// serves two purposes: -// -// One is to make it easy to control which symbols are exported; dlmalloc -// defines several non-standard functions and we wish to explicitly control -// which functions are part of our public-facing interface. -// -// The other is to protect against compilers optimizing based on the assumption -// that they know what functions with names like "malloc" do. Code in the -// implementation will call functions like "dlmalloc" and assume it can use -// the resulting pointers to access the metadata outside of the nominally -// allocated objects. However, if the function were named "malloc", compilers -// might see code like that and assume it has undefined behavior and can be -// optimized away. By using "dlmalloc" in the implementation, we don't need -// -fno-builtin to avoid this problem. -#define USE_DL_PREFIX 1 -#define DLMALLOC_EXPORT static inline - -// This isn't declared with DLMALLOC_EXPORT so make it static explicitly. -static size_t dlmalloc_usable_size(void*); - -// Include the upstream dlmalloc's malloc.c. -#include "malloc.c" - -// Export the public names. - -void *malloc(size_t size) { - return dlmalloc(size); -} - -void free(void *ptr) { - dlfree(ptr); -} - -void *calloc(size_t nmemb, size_t size) { - return dlcalloc(nmemb, size); -} - -void *realloc(void *ptr, size_t size) { - return dlrealloc(ptr, size); -} - -int posix_memalign(void **memptr, size_t alignment, size_t size) { - return dlposix_memalign(memptr, alignment, size); -} - -void* aligned_alloc(size_t alignment, size_t bytes) { - return dlmemalign(alignment, bytes); -} - -size_t malloc_usable_size(void *ptr) { - return dlmalloc_usable_size(ptr); -} - -// Define these to satisfy musl references. -void *__libc_malloc(size_t) __attribute__((alias("malloc"))); -void __libc_free(void *) __attribute__((alias("free"))); -void *__libc_calloc(size_t nmemb, size_t size) __attribute__((alias("calloc"))); diff --git a/lib/libc/wasi/dlmalloc/src/malloc.c b/lib/libc/wasi/dlmalloc/src/malloc.c deleted file mode 100644 index 03da739e11..0000000000 --- a/lib/libc/wasi/dlmalloc/src/malloc.c +++ /dev/null @@ -1,6352 +0,0 @@ -/* - This is a version (aka dlmalloc) of malloc/free/realloc written by - Doug Lea and released to the public domain, as explained at - http://creativecommons.org/publicdomain/zero/1.0/ Send questions, - comments, complaints, performance data, etc to dl@cs.oswego.edu - -* Version 2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea - Note: There may be an updated version of this malloc obtainable at - ftp://gee.cs.oswego.edu/pub/misc/malloc.c - Check before installing! - -* Quickstart - - This library is all in one file to simplify the most common usage: - ftp it, compile it (-O3), and link it into another program. All of - the compile-time options default to reasonable values for use on - most platforms. You might later want to step through various - compile-time and dynamic tuning options. - - For convenience, an include file for code using this malloc is at: - ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.6.h - You don't really need this .h file unless you call functions not - defined in your system include files. The .h file contains only the - excerpts from this file needed for using this malloc on ANSI C/C++ - systems, so long as you haven't changed compile-time options about - naming and tuning parameters. If you do, then you can create your - own malloc.h that does include all settings by cutting at the point - indicated below. Note that you may already by default be using a C - library containing a malloc that is based on some version of this - malloc (for example in linux). You might still want to use the one - in this file to customize settings or to avoid overheads associated - with library versions. - -* Vital statistics: - - Supported pointer/size_t representation: 4 or 8 bytes - size_t MUST be an unsigned type of the same width as - pointers. (If you are using an ancient system that declares - size_t as a signed type, or need it to be a different width - than pointers, you can use a previous release of this malloc - (e.g. 2.7.2) supporting these.) - - Alignment: 8 bytes (minimum) - This suffices for nearly all current machines and C compilers. - However, you can define MALLOC_ALIGNMENT to be wider than this - if necessary (up to 128bytes), at the expense of using more space. - - Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes) - 8 or 16 bytes (if 8byte sizes) - Each malloced chunk has a hidden word of overhead holding size - and status information, and additional cross-check word - if FOOTERS is defined. - - Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead) - 8-byte ptrs: 32 bytes (including overhead) - - Even a request for zero bytes (i.e., malloc(0)) returns a - pointer to something of the minimum allocatable size. - The maximum overhead wastage (i.e., number of extra bytes - allocated than were requested in malloc) is less than or equal - to the minimum size, except for requests >= mmap_threshold that - are serviced via mmap(), where the worst case wastage is about - 32 bytes plus the remainder from a system page (the minimal - mmap unit); typically 4096 or 8192 bytes. - - Security: static-safe; optionally more or less - The "security" of malloc refers to the ability of malicious - code to accentuate the effects of errors (for example, freeing - space that is not currently malloc'ed or overwriting past the - ends of chunks) in code that calls malloc. This malloc - guarantees not to modify any memory locations below the base of - heap, i.e., static variables, even in the presence of usage - errors. The routines additionally detect most improper frees - and reallocs. All this holds as long as the static bookkeeping - for malloc itself is not corrupted by some other means. This - is only one aspect of security -- these checks do not, and - cannot, detect all possible programming errors. - - If FOOTERS is defined nonzero, then each allocated chunk - carries an additional check word to verify that it was malloced - from its space. These check words are the same within each - execution of a program using malloc, but differ across - executions, so externally crafted fake chunks cannot be - freed. This improves security by rejecting frees/reallocs that - could corrupt heap memory, in addition to the checks preventing - writes to statics that are always on. This may further improve - security at the expense of time and space overhead. (Note that - FOOTERS may also be worth using with MSPACES.) - - By default detected errors cause the program to abort (calling - "abort()"). You can override this to instead proceed past - errors by defining PROCEED_ON_ERROR. In this case, a bad free - has no effect, and a malloc that encounters a bad address - caused by user overwrites will ignore the bad address by - dropping pointers and indices to all known memory. This may - be appropriate for programs that should continue if at all - possible in the face of programming errors, although they may - run out of memory because dropped memory is never reclaimed. - - If you don't like either of these options, you can define - CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything - else. And if if you are sure that your program using malloc has - no errors or vulnerabilities, you can define INSECURE to 1, - which might (or might not) provide a small performance improvement. - - It is also possible to limit the maximum total allocatable - space, using malloc_set_footprint_limit. This is not - designed as a security feature in itself (calls to set limits - are not screened or privileged), but may be useful as one - aspect of a secure implementation. - - Thread-safety: NOT thread-safe unless USE_LOCKS defined non-zero - When USE_LOCKS is defined, each public call to malloc, free, - etc is surrounded with a lock. By default, this uses a plain - pthread mutex, win32 critical section, or a spin-lock if if - available for the platform and not disabled by setting - USE_SPIN_LOCKS=0. However, if USE_RECURSIVE_LOCKS is defined, - recursive versions are used instead (which are not required for - base functionality but may be needed in layered extensions). - Using a global lock is not especially fast, and can be a major - bottleneck. It is designed only to provide minimal protection - in concurrent environments, and to provide a basis for - extensions. If you are using malloc in a concurrent program, - consider instead using nedmalloc - (http://www.nedprod.com/programs/portable/nedmalloc/) or - ptmalloc (See http://www.malloc.de), which are derived from - versions of this malloc. - - System requirements: Any combination of MORECORE and/or MMAP/MUNMAP - This malloc can use unix sbrk or any emulation (invoked using - the CALL_MORECORE macro) and/or mmap/munmap or any emulation - (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system - memory. On most unix systems, it tends to work best if both - MORECORE and MMAP are enabled. On Win32, it uses emulations - based on VirtualAlloc. It also uses common C library functions - like memset. - - Compliance: I believe it is compliant with the Single Unix Specification - (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably - others as well. - -* Overview of algorithms - - This is not the fastest, most space-conserving, most portable, or - most tunable malloc ever written. However it is among the fastest - while also being among the most space-conserving, portable and - tunable. Consistent balance across these factors results in a good - general-purpose allocator for malloc-intensive programs. - - In most ways, this malloc is a best-fit allocator. Generally, it - chooses the best-fitting existing chunk for a request, with ties - broken in approximately least-recently-used order. (This strategy - normally maintains low fragmentation.) However, for requests less - than 256bytes, it deviates from best-fit when there is not an - exactly fitting available chunk by preferring to use space adjacent - to that used for the previous small request, as well as by breaking - ties in approximately most-recently-used order. (These enhance - locality of series of small allocations.) And for very large requests - (>= 256Kb by default), it relies on system memory mapping - facilities, if supported. (This helps avoid carrying around and - possibly fragmenting memory used only for large chunks.) - - All operations (except malloc_stats and mallinfo) have execution - times that are bounded by a constant factor of the number of bits in - a size_t, not counting any clearing in calloc or copying in realloc, - or actions surrounding MORECORE and MMAP that have times - proportional to the number of non-contiguous regions returned by - system allocation routines, which is often just 1. In real-time - applications, you can optionally suppress segment traversals using - NO_SEGMENT_TRAVERSAL, which assures bounded execution even when - system allocators return non-contiguous spaces, at the typical - expense of carrying around more memory and increased fragmentation. - - The implementation is not very modular and seriously overuses - macros. Perhaps someday all C compilers will do as good a job - inlining modular code as can now be done by brute-force expansion, - but now, enough of them seem not to. - - Some compilers issue a lot of warnings about code that is - dead/unreachable only on some platforms, and also about intentional - uses of negation on unsigned types. All known cases of each can be - ignored. - - For a longer but out of date high-level description, see - http://gee.cs.oswego.edu/dl/html/malloc.html - -* MSPACES - If MSPACES is defined, then in addition to malloc, free, etc., - this file also defines mspace_malloc, mspace_free, etc. These - are versions of malloc routines that take an "mspace" argument - obtained using create_mspace, to control all internal bookkeeping. - If ONLY_MSPACES is defined, only these versions are compiled. - So if you would like to use this allocator for only some allocations, - and your system malloc for others, you can compile with - ONLY_MSPACES and then do something like... - static mspace mymspace = create_mspace(0,0); // for example - #define mymalloc(bytes) mspace_malloc(mymspace, bytes) - - (Note: If you only need one instance of an mspace, you can instead - use "USE_DL_PREFIX" to relabel the global malloc.) - - You can similarly create thread-local allocators by storing - mspaces as thread-locals. For example: - static __thread mspace tlms = 0; - void* tlmalloc(size_t bytes) { - if (tlms == 0) tlms = create_mspace(0, 0); - return mspace_malloc(tlms, bytes); - } - void tlfree(void* mem) { mspace_free(tlms, mem); } - - Unless FOOTERS is defined, each mspace is completely independent. - You cannot allocate from one and free to another (although - conformance is only weakly checked, so usage errors are not always - caught). If FOOTERS is defined, then each chunk carries around a tag - indicating its originating mspace, and frees are directed to their - originating spaces. Normally, this requires use of locks. - - ------------------------- Compile-time options --------------------------- - -Be careful in setting #define values for numerical constants of type -size_t. On some systems, literal values are not automatically extended -to size_t precision unless they are explicitly casted. You can also -use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below. - -WIN32 default: defined if _WIN32 defined - Defining WIN32 sets up defaults for MS environment and compilers. - Otherwise defaults are for unix. Beware that there seem to be some - cases where this malloc might not be a pure drop-in replacement for - Win32 malloc: Random-looking failures from Win32 GDI API's (eg; - SetDIBits()) may be due to bugs in some video driver implementations - when pixel buffers are malloc()ed, and the region spans more than - one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb) - default granularity, pixel buffers may straddle virtual allocation - regions more often than when using the Microsoft allocator. You can - avoid this by using VirtualAlloc() and VirtualFree() for all pixel - buffers rather than using malloc(). If this is not possible, - recompile this malloc with a larger DEFAULT_GRANULARITY. Note: - in cases where MSC and gcc (cygwin) are known to differ on WIN32, - conditions use _MSC_VER to distinguish them. - -DLMALLOC_EXPORT default: extern - Defines how public APIs are declared. If you want to export via a - Windows DLL, you might define this as - #define DLMALLOC_EXPORT extern __declspec(dllexport) - If you want a POSIX ELF shared object, you might use - #define DLMALLOC_EXPORT extern __attribute__((visibility("default"))) - -MALLOC_ALIGNMENT default: (size_t)(2 * sizeof(void *)) - Controls the minimum alignment for malloc'ed chunks. It must be a - power of two and at least 8, even on machines for which smaller - alignments would suffice. It may be defined as larger than this - though. Note however that code and data structures are optimized for - the case of 8-byte alignment. - -MSPACES default: 0 (false) - If true, compile in support for independent allocation spaces. - This is only supported if HAVE_MMAP is true. - -ONLY_MSPACES default: 0 (false) - If true, only compile in mspace versions, not regular versions. - -USE_LOCKS default: 0 (false) - Causes each call to each public routine to be surrounded with - pthread or WIN32 mutex lock/unlock. (If set true, this can be - overridden on a per-mspace basis for mspace versions.) If set to a - non-zero value other than 1, locks are used, but their - implementation is left out, so lock functions must be supplied manually, - as described below. - -USE_SPIN_LOCKS default: 1 iff USE_LOCKS and spin locks available - If true, uses custom spin locks for locking. This is currently - supported only gcc >= 4.1, older gccs on x86 platforms, and recent - MS compilers. Otherwise, posix locks or win32 critical sections are - used. - -USE_RECURSIVE_LOCKS default: not defined - If defined nonzero, uses recursive (aka reentrant) locks, otherwise - uses plain mutexes. This is not required for malloc proper, but may - be needed for layered allocators such as nedmalloc. - -LOCK_AT_FORK default: not defined - If defined nonzero, performs pthread_atfork upon initialization - to initialize child lock while holding parent lock. The implementation - assumes that pthread locks (not custom locks) are being used. In other - cases, you may need to customize the implementation. - -FOOTERS default: 0 - If true, provide extra checking and dispatching by placing - information in the footers of allocated chunks. This adds - space and time overhead. - -INSECURE default: 0 - If true, omit checks for usage errors and heap space overwrites. - -USE_DL_PREFIX default: NOT defined - Causes compiler to prefix all public routines with the string 'dl'. - This can be useful when you only want to use this malloc in one part - of a program, using your regular system malloc elsewhere. - -MALLOC_INSPECT_ALL default: NOT defined - If defined, compiles malloc_inspect_all and mspace_inspect_all, that - perform traversal of all heap space. Unless access to these - functions is otherwise restricted, you probably do not want to - include them in secure implementations. - -ABORT default: defined as abort() - Defines how to abort on failed checks. On most systems, a failed - check cannot die with an "assert" or even print an informative - message, because the underlying print routines in turn call malloc, - which will fail again. Generally, the best policy is to simply call - abort(). It's not very useful to do more than this because many - errors due to overwriting will show up as address faults (null, odd - addresses etc) rather than malloc-triggered checks, so will also - abort. Also, most compilers know that abort() does not return, so - can better optimize code conditionally calling it. - -PROCEED_ON_ERROR default: defined as 0 (false) - Controls whether detected bad addresses cause them to bypassed - rather than aborting. If set, detected bad arguments to free and - realloc are ignored. And all bookkeeping information is zeroed out - upon a detected overwrite of freed heap space, thus losing the - ability to ever return it from malloc again, but enabling the - application to proceed. If PROCEED_ON_ERROR is defined, the - static variable malloc_corruption_error_count is compiled in - and can be examined to see if errors have occurred. This option - generates slower code than the default abort policy. - -DEBUG default: NOT defined - The DEBUG setting is mainly intended for people trying to modify - this code or diagnose problems when porting to new platforms. - However, it may also be able to better isolate user errors than just - using runtime checks. The assertions in the check routines spell - out in more detail the assumptions and invariants underlying the - algorithms. The checking is fairly extensive, and will slow down - execution noticeably. Calling malloc_stats or mallinfo with DEBUG - set will attempt to check every non-mmapped allocated and free chunk - in the course of computing the summaries. - -ABORT_ON_ASSERT_FAILURE default: defined as 1 (true) - Debugging assertion failures can be nearly impossible if your - version of the assert macro causes malloc to be called, which will - lead to a cascade of further failures, blowing the runtime stack. - ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(), - which will usually make debugging easier. - -MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32 - The action to take before "return 0" when malloc fails to be able to - return memory because there is none available. - -HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES - True if this system supports sbrk or an emulation of it. - -MORECORE default: sbrk - The name of the sbrk-style system routine to call to obtain more - memory. See below for guidance on writing custom MORECORE - functions. The type of the argument to sbrk/MORECORE varies across - systems. It cannot be size_t, because it supports negative - arguments, so it is normally the signed type of the same width as - size_t (sometimes declared as "intptr_t"). It doesn't much matter - though. Internally, we only call it with arguments less than half - the max value of a size_t, which should work across all reasonable - possibilities, although sometimes generating compiler warnings. - -MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE - If true, take advantage of fact that consecutive calls to MORECORE - with positive arguments always return contiguous increasing - addresses. This is true of unix sbrk. It does not hurt too much to - set it true anyway, since malloc copes with non-contiguities. - Setting it false when definitely non-contiguous saves time - and possibly wasted space it would take to discover this though. - -MORECORE_CANNOT_TRIM default: NOT defined - True if MORECORE cannot release space back to the system when given - negative arguments. This is generally necessary only if you are - using a hand-crafted MORECORE function that cannot handle negative - arguments. - -NO_SEGMENT_TRAVERSAL default: 0 - If non-zero, suppresses traversals of memory segments - returned by either MORECORE or CALL_MMAP. This disables - merging of segments that are contiguous, and selectively - releasing them to the OS if unused, but bounds execution times. - -HAVE_MMAP default: 1 (true) - True if this system supports mmap or an emulation of it. If so, and - HAVE_MORECORE is not true, MMAP is used for all system - allocation. If set and HAVE_MORECORE is true as well, MMAP is - primarily used to directly allocate very large blocks. It is also - used as a backup strategy in cases where MORECORE fails to provide - space from system. Note: A single call to MUNMAP is assumed to be - able to unmap memory that may have be allocated using multiple calls - to MMAP, so long as they are adjacent. - -HAVE_MREMAP default: 1 on linux, else 0 - If true realloc() uses mremap() to re-allocate large blocks and - extend or shrink allocation spaces. - -MMAP_CLEARS default: 1 except on WINCE. - True if mmap clears memory so calloc doesn't need to. This is true - for standard unix mmap using /dev/zero and on WIN32 except for WINCE. - -USE_BUILTIN_FFS default: 0 (i.e., not used) - Causes malloc to use the builtin ffs() function to compute indices. - Some compilers may recognize and intrinsify ffs to be faster than the - supplied C version. Also, the case of x86 using gcc is special-cased - to an asm instruction, so is already as fast as it can be, and so - this setting has no effect. Similarly for Win32 under recent MS compilers. - (On most x86s, the asm version is only slightly faster than the C version.) - -malloc_getpagesize default: derive from system includes, or 4096. - The system page size. To the extent possible, this malloc manages - memory from the system in page-size units. This may be (and - usually is) a function rather than a constant. This is ignored - if WIN32, where page size is determined using getSystemInfo during - initialization. - -USE_DEV_RANDOM default: 0 (i.e., not used) - Causes malloc to use /dev/random to initialize secure magic seed for - stamping footers. Otherwise, the current time is used. - -NO_MALLINFO default: 0 - If defined, don't compile "mallinfo". This can be a simple way - of dealing with mismatches between system declarations and - those in this file. - -MALLINFO_FIELD_TYPE default: size_t - The type of the fields in the mallinfo struct. This was originally - defined as "int" in SVID etc, but is more usefully defined as - size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set - -NO_MALLOC_STATS default: 0 - If defined, don't compile "malloc_stats". This avoids calls to - fprintf and bringing in stdio dependencies you might not want. - -REALLOC_ZERO_BYTES_FREES default: not defined - This should be set if a call to realloc with zero bytes should - be the same as a call to free. Some people think it should. Otherwise, - since this malloc returns a unique pointer for malloc(0), so does - realloc(p, 0). - -LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H -LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H -LACKS_STDLIB_H LACKS_SCHED_H LACKS_TIME_H default: NOT defined unless on WIN32 - Define these if your system does not have these header files. - You might need to manually insert some of the declarations they provide. - -DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, - system_info.dwAllocationGranularity in WIN32, - otherwise 64K. - Also settable using mallopt(M_GRANULARITY, x) - The unit for allocating and deallocating memory from the system. On - most systems with contiguous MORECORE, there is no reason to - make this more than a page. However, systems with MMAP tend to - either require or encourage larger granularities. You can increase - this value to prevent system allocation functions to be called so - often, especially if they are slow. The value must be at least one - page and must be a power of two. Setting to 0 causes initialization - to either page size or win32 region size. (Note: In previous - versions of malloc, the equivalent of this option was called - "TOP_PAD") - -DEFAULT_TRIM_THRESHOLD default: 2MB - Also settable using mallopt(M_TRIM_THRESHOLD, x) - The maximum amount of unused top-most memory to keep before - releasing via malloc_trim in free(). Automatic trimming is mainly - useful in long-lived programs using contiguous MORECORE. Because - trimming via sbrk can be slow on some systems, and can sometimes be - wasteful (in cases where programs immediately afterward allocate - more large chunks) the value should be high enough so that your - overall system performance would improve by releasing this much - memory. As a rough guide, you might set to a value close to the - average size of a process (program) running on your system. - Releasing this much memory would allow such a process to run in - memory. Generally, it is worth tuning trim thresholds when a - program undergoes phases where several large chunks are allocated - and released in ways that can reuse each other's storage, perhaps - mixed with phases where there are no such chunks at all. The trim - value must be greater than page size to have any useful effect. To - disable trimming completely, you can set to MAX_SIZE_T. Note that the trick - some people use of mallocing a huge space and then freeing it at - program startup, in an attempt to reserve system memory, doesn't - have the intended effect under automatic trimming, since that memory - will immediately be returned to the system. - -DEFAULT_MMAP_THRESHOLD default: 256K - Also settable using mallopt(M_MMAP_THRESHOLD, x) - The request size threshold for using MMAP to directly service a - request. Requests of at least this size that cannot be allocated - using already-existing space will be serviced via mmap. (If enough - normal freed space already exists it is used instead.) Using mmap - segregates relatively large chunks of memory so that they can be - individually obtained and released from the host system. A request - serviced through mmap is never reused by any other request (at least - not directly; the system may just so happen to remap successive - requests to the same locations). Segregating space in this way has - the benefits that: Mmapped space can always be individually released - back to the system, which helps keep the system level memory demands - of a long-lived program low. Also, mapped memory doesn't become - `locked' between other chunks, as can happen with normally allocated - chunks, which means that even trimming via malloc_trim would not - release them. However, it has the disadvantage that the space - cannot be reclaimed, consolidated, and then used to service later - requests, as happens with normal chunks. The advantages of mmap - nearly always outweigh disadvantages for "large" chunks, but the - value of "large" may vary across systems. The default is an - empirically derived value that works well in most systems. You can - disable mmap by setting to MAX_SIZE_T. - -MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP - The number of consolidated frees between checks to release - unused segments when freeing. When using non-contiguous segments, - especially with multiple mspaces, checking only for topmost space - doesn't always suffice to trigger trimming. To compensate for this, - free() will, with a period of MAX_RELEASE_CHECK_RATE (or the - current number of segments, if greater) try to release unused - segments to the OS when freeing chunks that result in - consolidation. The best value for this parameter is a compromise - between slowing down frees with relatively costly checks that - rarely trigger versus holding on to unused memory. To effectively - disable, set to MAX_SIZE_T. This may lead to a very slight speed - improvement at the expense of carrying around more memory. -*/ - -/* Version identifier to allow people to support multiple versions */ -#ifndef DLMALLOC_VERSION -#define DLMALLOC_VERSION 20806 -#endif /* DLMALLOC_VERSION */ - -#ifndef DLMALLOC_EXPORT -#define DLMALLOC_EXPORT extern -#endif - -#ifndef WIN32 -#ifdef _WIN32 -#define WIN32 1 -#endif /* _WIN32 */ -#ifdef _WIN32_WCE -#define LACKS_FCNTL_H -#define WIN32 1 -#endif /* _WIN32_WCE */ -#endif /* WIN32 */ -#ifdef WIN32 -#define WIN32_LEAN_AND_MEAN -#include <windows.h> -#include <tchar.h> -#define HAVE_MMAP 1 -#define HAVE_MORECORE 0 -#define LACKS_UNISTD_H -#define LACKS_SYS_PARAM_H -#define LACKS_SYS_MMAN_H -#define LACKS_STRING_H -#define LACKS_STRINGS_H -#define LACKS_SYS_TYPES_H -#define LACKS_ERRNO_H -#define LACKS_SCHED_H -#ifndef MALLOC_FAILURE_ACTION -#define MALLOC_FAILURE_ACTION -#endif /* MALLOC_FAILURE_ACTION */ -#ifndef MMAP_CLEARS -#ifdef _WIN32_WCE /* WINCE reportedly does not clear */ -#define MMAP_CLEARS 0 -#else -#define MMAP_CLEARS 1 -#endif /* _WIN32_WCE */ -#endif /*MMAP_CLEARS */ -#endif /* WIN32 */ - -#if defined(DARWIN) || defined(_DARWIN) -/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */ -#ifndef HAVE_MORECORE -#define HAVE_MORECORE 0 -#define HAVE_MMAP 1 -/* OSX allocators provide 16 byte alignment */ -#ifndef MALLOC_ALIGNMENT -#define MALLOC_ALIGNMENT ((size_t)16U) -#endif -#endif /* HAVE_MORECORE */ -#endif /* DARWIN */ - -#ifndef LACKS_SYS_TYPES_H -#include <sys/types.h> /* For size_t */ -#endif /* LACKS_SYS_TYPES_H */ - -/* The maximum possible size_t value has all bits set */ -#define MAX_SIZE_T (~(size_t)0) - -#ifndef USE_LOCKS /* ensure true if spin or recursive locks set */ -#if 0 -#define USE_LOCKS ((defined(USE_SPIN_LOCKS) && USE_SPIN_LOCKS != 0) || \ - (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0)) -#else -/* Avoid a -Wexpansion-to-defined compiler warning. */ -#if (defined(USE_SPIN_LOCKS) && USE_SPIN_LOCKS != 0) || \ - (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) -#define USE_LOCKS 1 -#else -#define USE_LOCKS 0 -#endif -#endif -#endif /* USE_LOCKS */ - -#if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */ -#if ((defined(__GNUC__) && \ - ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) || \ - defined(__i386__) || defined(__x86_64__))) || \ - (defined(_MSC_VER) && _MSC_VER>=1310)) -#ifndef USE_SPIN_LOCKS -#define USE_SPIN_LOCKS 1 -#endif /* USE_SPIN_LOCKS */ -#elif USE_SPIN_LOCKS -#error "USE_SPIN_LOCKS defined without implementation" -#endif /* ... locks available... */ -#elif !defined(USE_SPIN_LOCKS) -#define USE_SPIN_LOCKS 0 -#endif /* USE_LOCKS */ - -#ifndef ONLY_MSPACES -#define ONLY_MSPACES 0 -#endif /* ONLY_MSPACES */ -#ifndef MSPACES -#if ONLY_MSPACES -#define MSPACES 1 -#else /* ONLY_MSPACES */ -#define MSPACES 0 -#endif /* ONLY_MSPACES */ -#endif /* MSPACES */ -#ifndef MALLOC_ALIGNMENT -#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *))) -#endif /* MALLOC_ALIGNMENT */ -#ifndef FOOTERS -#define FOOTERS 0 -#endif /* FOOTERS */ -#ifndef ABORT -#define ABORT abort() -#endif /* ABORT */ -#ifndef ABORT_ON_ASSERT_FAILURE -#define ABORT_ON_ASSERT_FAILURE 1 -#endif /* ABORT_ON_ASSERT_FAILURE */ -#ifndef PROCEED_ON_ERROR -#define PROCEED_ON_ERROR 0 -#endif /* PROCEED_ON_ERROR */ - -#ifndef INSECURE -#define INSECURE 0 -#endif /* INSECURE */ -#ifndef MALLOC_INSPECT_ALL -#define MALLOC_INSPECT_ALL 0 -#endif /* MALLOC_INSPECT_ALL */ -#ifndef HAVE_MMAP -#define HAVE_MMAP 1 -#endif /* HAVE_MMAP */ -#ifndef MMAP_CLEARS -#define MMAP_CLEARS 1 -#endif /* MMAP_CLEARS */ -#ifndef HAVE_MREMAP -#ifdef linux -#define HAVE_MREMAP 1 -#define _GNU_SOURCE /* Turns on mremap() definition */ -#else /* linux */ -#define HAVE_MREMAP 0 -#endif /* linux */ -#endif /* HAVE_MREMAP */ -#ifndef MALLOC_FAILURE_ACTION -#define MALLOC_FAILURE_ACTION errno = ENOMEM; -#endif /* MALLOC_FAILURE_ACTION */ -#ifndef HAVE_MORECORE -#if ONLY_MSPACES -#define HAVE_MORECORE 0 -#else /* ONLY_MSPACES */ -#define HAVE_MORECORE 1 -#endif /* ONLY_MSPACES */ -#endif /* HAVE_MORECORE */ -#if !HAVE_MORECORE -#define MORECORE_CONTIGUOUS 0 -#else /* !HAVE_MORECORE */ -#define MORECORE_DEFAULT sbrk -#ifndef MORECORE_CONTIGUOUS -#define MORECORE_CONTIGUOUS 1 -#endif /* MORECORE_CONTIGUOUS */ -#endif /* HAVE_MORECORE */ -#ifndef DEFAULT_GRANULARITY -#if (MORECORE_CONTIGUOUS || defined(WIN32)) -#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */ -#else /* MORECORE_CONTIGUOUS */ -#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) -#endif /* MORECORE_CONTIGUOUS */ -#endif /* DEFAULT_GRANULARITY */ -#ifndef DEFAULT_TRIM_THRESHOLD -#ifndef MORECORE_CANNOT_TRIM -#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) -#else /* MORECORE_CANNOT_TRIM */ -#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T -#endif /* MORECORE_CANNOT_TRIM */ -#endif /* DEFAULT_TRIM_THRESHOLD */ -#ifndef DEFAULT_MMAP_THRESHOLD -#if HAVE_MMAP -#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) -#else /* HAVE_MMAP */ -#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T -#endif /* HAVE_MMAP */ -#endif /* DEFAULT_MMAP_THRESHOLD */ -#ifndef MAX_RELEASE_CHECK_RATE -#if HAVE_MMAP -#define MAX_RELEASE_CHECK_RATE 4095 -#else -#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T -#endif /* HAVE_MMAP */ -#endif /* MAX_RELEASE_CHECK_RATE */ -#ifndef USE_BUILTIN_FFS -#define USE_BUILTIN_FFS 0 -#endif /* USE_BUILTIN_FFS */ -#ifndef USE_DEV_RANDOM -#define USE_DEV_RANDOM 0 -#endif /* USE_DEV_RANDOM */ -#ifndef NO_MALLINFO -#define NO_MALLINFO 0 -#endif /* NO_MALLINFO */ -#ifndef MALLINFO_FIELD_TYPE -#define MALLINFO_FIELD_TYPE size_t -#endif /* MALLINFO_FIELD_TYPE */ -#ifndef NO_MALLOC_STATS -#define NO_MALLOC_STATS 0 -#endif /* NO_MALLOC_STATS */ -#ifndef NO_SEGMENT_TRAVERSAL -#define NO_SEGMENT_TRAVERSAL 0 -#endif /* NO_SEGMENT_TRAVERSAL */ - -/* - mallopt tuning options. SVID/XPG defines four standard parameter - numbers for mallopt, normally defined in malloc.h. None of these - are used in this malloc, so setting them has no effect. But this - malloc does support the following options. -*/ - -#define M_TRIM_THRESHOLD (-1) -#define M_GRANULARITY (-2) -#define M_MMAP_THRESHOLD (-3) - -/* ------------------------ Mallinfo declarations ------------------------ */ - -#if !NO_MALLINFO -/* - This version of malloc supports the standard SVID/XPG mallinfo - routine that returns a struct containing usage properties and - statistics. It should work on any system that has a - /usr/include/malloc.h defining struct mallinfo. The main - declaration needed is the mallinfo struct that is returned (by-copy) - by mallinfo(). The malloinfo struct contains a bunch of fields that - are not even meaningful in this version of malloc. These fields are - are instead filled by mallinfo() with other numbers that might be of - interest. - - HAVE_USR_INCLUDE_MALLOC_H should be set if you have a - /usr/include/malloc.h file that includes a declaration of struct - mallinfo. If so, it is included; else a compliant version is - declared below. These must be precisely the same for mallinfo() to - work. The original SVID version of this struct, defined on most - systems with mallinfo, declares all fields as ints. But some others - define as unsigned long. If your system defines the fields using a - type of different width than listed here, you MUST #include your - system version and #define HAVE_USR_INCLUDE_MALLOC_H. -*/ - -/* #define HAVE_USR_INCLUDE_MALLOC_H */ - -#ifdef HAVE_USR_INCLUDE_MALLOC_H -#include "/usr/include/malloc.h" -#else /* HAVE_USR_INCLUDE_MALLOC_H */ -#ifndef STRUCT_MALLINFO_DECLARED -/* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is defined */ -#define _STRUCT_MALLINFO -#define STRUCT_MALLINFO_DECLARED 1 -struct mallinfo { - MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ - MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ - MALLINFO_FIELD_TYPE smblks; /* always 0 */ - MALLINFO_FIELD_TYPE hblks; /* always 0 */ - MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ - MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ - MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ - MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ - MALLINFO_FIELD_TYPE fordblks; /* total free space */ - MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ -}; -#endif /* STRUCT_MALLINFO_DECLARED */ -#endif /* HAVE_USR_INCLUDE_MALLOC_H */ -#endif /* NO_MALLINFO */ - -/* - Try to persuade compilers to inline. The most critical functions for - inlining are defined as macros, so these aren't used for them. -*/ - -#ifndef FORCEINLINE - #if defined(__GNUC__) -#define FORCEINLINE __inline __attribute__ ((always_inline)) - #elif defined(_MSC_VER) - #define FORCEINLINE __forceinline - #endif -#endif -#ifndef NOINLINE - #if defined(__GNUC__) - #define NOINLINE __attribute__ ((noinline)) - #elif defined(_MSC_VER) - #define NOINLINE __declspec(noinline) - #else - #define NOINLINE - #endif -#endif - -#ifdef __cplusplus -extern "C" { -#ifndef FORCEINLINE - #define FORCEINLINE inline -#endif -#endif /* __cplusplus */ -#ifndef FORCEINLINE - #define FORCEINLINE -#endif - -#if !ONLY_MSPACES - -/* ------------------- Declarations of public routines ------------------- */ - -#ifndef USE_DL_PREFIX -#define dlcalloc calloc -#define dlfree free -#define dlmalloc malloc -#define dlmemalign memalign -#define dlposix_memalign posix_memalign -#define dlrealloc realloc -#define dlrealloc_in_place realloc_in_place -#define dlvalloc valloc -#define dlpvalloc pvalloc -#define dlmallinfo mallinfo -#define dlmallopt mallopt -#define dlmalloc_trim malloc_trim -#define dlmalloc_stats malloc_stats -#define dlmalloc_usable_size malloc_usable_size -#define dlmalloc_footprint malloc_footprint -#define dlmalloc_max_footprint malloc_max_footprint -#define dlmalloc_footprint_limit malloc_footprint_limit -#define dlmalloc_set_footprint_limit malloc_set_footprint_limit -#define dlmalloc_inspect_all malloc_inspect_all -#define dlindependent_calloc independent_calloc -#define dlindependent_comalloc independent_comalloc -#define dlbulk_free bulk_free -#endif /* USE_DL_PREFIX */ - -/* - malloc(size_t n) - Returns a pointer to a newly allocated chunk of at least n bytes, or - null if no space is available, in which case errno is set to ENOMEM - on ANSI C systems. - - If n is zero, malloc returns a minimum-sized chunk. (The minimum - size is 16 bytes on most 32bit systems, and 32 bytes on 64bit - systems.) Note that size_t is an unsigned type, so calls with - arguments that would be negative if signed are interpreted as - requests for huge amounts of space, which will often fail. The - maximum supported value of n differs across systems, but is in all - cases less than the maximum representable value of a size_t. -*/ -DLMALLOC_EXPORT void* dlmalloc(size_t); - -/* - free(void* p) - Releases the chunk of memory pointed to by p, that had been previously - allocated using malloc or a related routine such as realloc. - It has no effect if p is null. If p was not malloced or already - freed, free(p) will by default cause the current program to abort. -*/ -DLMALLOC_EXPORT void dlfree(void*); - -/* - calloc(size_t n_elements, size_t element_size); - Returns a pointer to n_elements * element_size bytes, with all locations - set to zero. -*/ -DLMALLOC_EXPORT void* dlcalloc(size_t, size_t); - -/* - realloc(void* p, size_t n) - Returns a pointer to a chunk of size n that contains the same data - as does chunk p up to the minimum of (n, p's size) bytes, or null - if no space is available. - - The returned pointer may or may not be the same as p. The algorithm - prefers extending p in most cases when possible, otherwise it - employs the equivalent of a malloc-copy-free sequence. - - If p is null, realloc is equivalent to malloc. - - If space is not available, realloc returns null, errno is set (if on - ANSI) and p is NOT freed. - - if n is for fewer bytes than already held by p, the newly unused - space is lopped off and freed if possible. realloc with a size - argument of zero (re)allocates a minimum-sized chunk. - - The old unix realloc convention of allowing the last-free'd chunk - to be used as an argument to realloc is not supported. -*/ -DLMALLOC_EXPORT void* dlrealloc(void*, size_t); - -/* - realloc_in_place(void* p, size_t n) - Resizes the space allocated for p to size n, only if this can be - done without moving p (i.e., only if there is adjacent space - available if n is greater than p's current allocated size, or n is - less than or equal to p's size). This may be used instead of plain - realloc if an alternative allocation strategy is needed upon failure - to expand space; for example, reallocation of a buffer that must be - memory-aligned or cleared. You can use realloc_in_place to trigger - these alternatives only when needed. - - Returns p if successful; otherwise null. -*/ -DLMALLOC_EXPORT void* dlrealloc_in_place(void*, size_t); - -/* - memalign(size_t alignment, size_t n); - Returns a pointer to a newly allocated chunk of n bytes, aligned - in accord with the alignment argument. - - The alignment argument should be a power of two. If the argument is - not a power of two, the nearest greater power is used. - 8-byte alignment is guaranteed by normal malloc calls, so don't - bother calling memalign with an argument of 8 or less. - - Overreliance on memalign is a sure way to fragment space. -*/ -DLMALLOC_EXPORT void* dlmemalign(size_t, size_t); - -/* - int posix_memalign(void** pp, size_t alignment, size_t n); - Allocates a chunk of n bytes, aligned in accord with the alignment - argument. Differs from memalign only in that it (1) assigns the - allocated memory to *pp rather than returning it, (2) fails and - returns EINVAL if the alignment is not a power of two (3) fails and - returns ENOMEM if memory cannot be allocated. -*/ -DLMALLOC_EXPORT int dlposix_memalign(void**, size_t, size_t); - -/* - valloc(size_t n); - Equivalent to memalign(pagesize, n), where pagesize is the page - size of the system. If the pagesize is unknown, 4096 is used. -*/ -DLMALLOC_EXPORT void* dlvalloc(size_t); - -/* - mallopt(int parameter_number, int parameter_value) - Sets tunable parameters The format is to provide a - (parameter-number, parameter-value) pair. mallopt then sets the - corresponding parameter to the argument value if it can (i.e., so - long as the value is meaningful), and returns 1 if successful else - 0. To workaround the fact that mallopt is specified to use int, - not size_t parameters, the value -1 is specially treated as the - maximum unsigned size_t value. - - SVID/XPG/ANSI defines four standard param numbers for mallopt, - normally defined in malloc.h. None of these are use in this malloc, - so setting them has no effect. But this malloc also supports other - options in mallopt. See below for details. Briefly, supported - parameters are as follows (listed defaults are for "typical" - configurations). - - Symbol param # default allowed param values - M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables) - M_GRANULARITY -2 page size any power of 2 >= page size - M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) -*/ -DLMALLOC_EXPORT int dlmallopt(int, int); - -/* - malloc_footprint(); - Returns the number of bytes obtained from the system. The total - number of bytes allocated by malloc, realloc etc., is less than this - value. Unlike mallinfo, this function returns only a precomputed - result, so can be called frequently to monitor memory consumption. - Even if locks are otherwise defined, this function does not use them, - so results might not be up to date. -*/ -DLMALLOC_EXPORT size_t dlmalloc_footprint(void); - -/* - malloc_max_footprint(); - Returns the maximum number of bytes obtained from the system. This - value will be greater than current footprint if deallocated space - has been reclaimed by the system. The peak number of bytes allocated - by malloc, realloc etc., is less than this value. Unlike mallinfo, - this function returns only a precomputed result, so can be called - frequently to monitor memory consumption. Even if locks are - otherwise defined, this function does not use them, so results might - not be up to date. -*/ -DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void); - -/* - malloc_footprint_limit(); - Returns the number of bytes that the heap is allowed to obtain from - the system, returning the last value returned by - malloc_set_footprint_limit, or the maximum size_t value if - never set. The returned value reflects a permission. There is no - guarantee that this number of bytes can actually be obtained from - the system. -*/ -DLMALLOC_EXPORT size_t dlmalloc_footprint_limit(); - -/* - malloc_set_footprint_limit(); - Sets the maximum number of bytes to obtain from the system, causing - failure returns from malloc and related functions upon attempts to - exceed this value. The argument value may be subject to page - rounding to an enforceable limit; this actual value is returned. - Using an argument of the maximum possible size_t effectively - disables checks. If the argument is less than or equal to the - current malloc_footprint, then all future allocations that require - additional system memory will fail. However, invocation cannot - retroactively deallocate existing used memory. -*/ -DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes); - -#if MALLOC_INSPECT_ALL -/* - malloc_inspect_all(void(*handler)(void *start, - void *end, - size_t used_bytes, - void* callback_arg), - void* arg); - Traverses the heap and calls the given handler for each managed - region, skipping all bytes that are (or may be) used for bookkeeping - purposes. Traversal does not include include chunks that have been - directly memory mapped. Each reported region begins at the start - address, and continues up to but not including the end address. The - first used_bytes of the region contain allocated data. If - used_bytes is zero, the region is unallocated. The handler is - invoked with the given callback argument. If locks are defined, they - are held during the entire traversal. It is a bad idea to invoke - other malloc functions from within the handler. - - For example, to count the number of in-use chunks with size greater - than 1000, you could write: - static int count = 0; - void count_chunks(void* start, void* end, size_t used, void* arg) { - if (used >= 1000) ++count; - } - then: - malloc_inspect_all(count_chunks, NULL); - - malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined. -*/ -DLMALLOC_EXPORT void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), - void* arg); - -#endif /* MALLOC_INSPECT_ALL */ - -#if !NO_MALLINFO -/* - mallinfo() - Returns (by copy) a struct containing various summary statistics: - - arena: current total non-mmapped bytes allocated from system - ordblks: the number of free chunks - smblks: always zero. - hblks: current number of mmapped regions - hblkhd: total bytes held in mmapped regions - usmblks: the maximum total allocated space. This will be greater - than current total if trimming has occurred. - fsmblks: always zero - uordblks: current total allocated space (normal or mmapped) - fordblks: total free space - keepcost: the maximum number of bytes that could ideally be released - back to system via malloc_trim. ("ideally" means that - it ignores page restrictions etc.) - - Because these fields are ints, but internal bookkeeping may - be kept as longs, the reported values may wrap around zero and - thus be inaccurate. -*/ -DLMALLOC_EXPORT struct mallinfo dlmallinfo(void); -#endif /* NO_MALLINFO */ - -/* - independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); - - independent_calloc is similar to calloc, but instead of returning a - single cleared space, it returns an array of pointers to n_elements - independent elements that can hold contents of size elem_size, each - of which starts out cleared, and can be independently freed, - realloc'ed etc. The elements are guaranteed to be adjacently - allocated (this is not guaranteed to occur with multiple callocs or - mallocs), which may also improve cache locality in some - applications. - - The "chunks" argument is optional (i.e., may be null, which is - probably the most typical usage). If it is null, the returned array - is itself dynamically allocated and should also be freed when it is - no longer needed. Otherwise, the chunks array must be of at least - n_elements in length. It is filled in with the pointers to the - chunks. - - In either case, independent_calloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and "chunks" - is null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be freed when it is no longer needed. This can be - done all at once using bulk_free. - - independent_calloc simplifies and speeds up implementations of many - kinds of pools. It may also be useful when constructing large data - structures that initially have a fixed number of fixed-sized nodes, - but the number is not known at compile time, and some of the nodes - may later need to be freed. For example: - - struct Node { int item; struct Node* next; }; - - struct Node* build_list() { - struct Node** pool; - int n = read_number_of_nodes_needed(); - if (n <= 0) return 0; - pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); - if (pool == 0) die(); - // organize into a linked list... - struct Node* first = pool[0]; - for (i = 0; i < n-1; ++i) - pool[i]->next = pool[i+1]; - free(pool); // Can now free the array (or not, if it is needed later) - return first; - } -*/ -DLMALLOC_EXPORT void** dlindependent_calloc(size_t, size_t, void**); - -/* - independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); - - independent_comalloc allocates, all at once, a set of n_elements - chunks with sizes indicated in the "sizes" array. It returns - an array of pointers to these elements, each of which can be - independently freed, realloc'ed etc. The elements are guaranteed to - be adjacently allocated (this is not guaranteed to occur with - multiple callocs or mallocs), which may also improve cache locality - in some applications. - - The "chunks" argument is optional (i.e., may be null). If it is null - the returned array is itself dynamically allocated and should also - be freed when it is no longer needed. Otherwise, the chunks array - must be of at least n_elements in length. It is filled in with the - pointers to the chunks. - - In either case, independent_comalloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and chunks is - null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be freed when it is no longer needed. This can be - done all at once using bulk_free. - - independent_comallac differs from independent_calloc in that each - element may have a different size, and also that it does not - automatically clear elements. - - independent_comalloc can be used to speed up allocation in cases - where several structs or objects must always be allocated at the - same time. For example: - - struct Head { ... } - struct Foot { ... } - - void send_message(char* msg) { - int msglen = strlen(msg); - size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; - void* chunks[3]; - if (independent_comalloc(3, sizes, chunks) == 0) - die(); - struct Head* head = (struct Head*)(chunks[0]); - char* body = (char*)(chunks[1]); - struct Foot* foot = (struct Foot*)(chunks[2]); - // ... - } - - In general though, independent_comalloc is worth using only for - larger values of n_elements. For small values, you probably won't - detect enough difference from series of malloc calls to bother. - - Overuse of independent_comalloc can increase overall memory usage, - since it cannot reuse existing noncontiguous small chunks that - might be available for some of the elements. -*/ -DLMALLOC_EXPORT void** dlindependent_comalloc(size_t, size_t*, void**); - -/* - bulk_free(void* array[], size_t n_elements) - Frees and clears (sets to null) each non-null pointer in the given - array. This is likely to be faster than freeing them one-by-one. - If footers are used, pointers that have been allocated in different - mspaces are not freed or cleared, and the count of all such pointers - is returned. For large arrays of pointers with poor locality, it - may be worthwhile to sort this array before calling bulk_free. -*/ -DLMALLOC_EXPORT size_t dlbulk_free(void**, size_t n_elements); - -/* - pvalloc(size_t n); - Equivalent to valloc(minimum-page-that-holds(n)), that is, - round up n to nearest pagesize. - */ -DLMALLOC_EXPORT void* dlpvalloc(size_t); - -/* - malloc_trim(size_t pad); - - If possible, gives memory back to the system (via negative arguments - to sbrk) if there is unused memory at the `high' end of the malloc - pool or in unused MMAP segments. You can call this after freeing - large blocks of memory to potentially reduce the system-level memory - requirements of a program. However, it cannot guarantee to reduce - memory. Under some allocation patterns, some large free blocks of - memory will be locked between two used chunks, so they cannot be - given back to the system. - - The `pad' argument to malloc_trim represents the amount of free - trailing space to leave untrimmed. If this argument is zero, only - the minimum amount of memory to maintain internal data structures - will be left. Non-zero arguments can be supplied to maintain enough - trailing space to service future expected allocations without having - to re-obtain memory from the system. - - Malloc_trim returns 1 if it actually released any memory, else 0. -*/ -DLMALLOC_EXPORT int dlmalloc_trim(size_t); - -/* - malloc_stats(); - Prints on stderr the amount of space obtained from the system (both - via sbrk and mmap), the maximum amount (which may be more than - current if malloc_trim and/or munmap got called), and the current - number of bytes allocated via malloc (or realloc, etc) but not yet - freed. Note that this is the number of bytes allocated, not the - number requested. It will be larger than the number requested - because of alignment and bookkeeping overhead. Because it includes - alignment wastage as being in use, this figure may be greater than - zero even when no user-level chunks are allocated. - - The reported current and maximum system memory can be inaccurate if - a program makes other calls to system memory allocation functions - (normally sbrk) outside of malloc. - - malloc_stats prints only the most commonly interesting statistics. - More information can be obtained by calling mallinfo. -*/ -DLMALLOC_EXPORT void dlmalloc_stats(void); - -/* - malloc_usable_size(void* p); - - Returns the number of bytes you can actually use in - an allocated chunk, which may be more than you requested (although - often not) due to alignment and minimum size constraints. - You can use this many bytes without worrying about - overwriting other allocated objects. This is not a particularly great - programming practice. malloc_usable_size can be more useful in - debugging and assertions, for example: - - p = malloc(n); - assert(malloc_usable_size(p) >= 256); -*/ -size_t dlmalloc_usable_size(void*); - -#endif /* ONLY_MSPACES */ - -#if MSPACES - -/* - mspace is an opaque type representing an independent - region of space that supports mspace_malloc, etc. -*/ -typedef void* mspace; - -/* - create_mspace creates and returns a new independent space with the - given initial capacity, or, if 0, the default granularity size. It - returns null if there is no system memory available to create the - space. If argument locked is non-zero, the space uses a separate - lock to control access. The capacity of the space will grow - dynamically as needed to service mspace_malloc requests. You can - control the sizes of incremental increases of this space by - compiling with a different DEFAULT_GRANULARITY or dynamically - setting with mallopt(M_GRANULARITY, value). -*/ -DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked); - -/* - destroy_mspace destroys the given space, and attempts to return all - of its memory back to the system, returning the total number of - bytes freed. After destruction, the results of access to all memory - used by the space become undefined. -*/ -DLMALLOC_EXPORT size_t destroy_mspace(mspace msp); - -/* - create_mspace_with_base uses the memory supplied as the initial base - of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this - space is used for bookkeeping, so the capacity must be at least this - large. (Otherwise 0 is returned.) When this initial space is - exhausted, additional memory will be obtained from the system. - Destroying this space will deallocate all additionally allocated - space (if possible) but not the initial base. -*/ -DLMALLOC_EXPORT mspace create_mspace_with_base(void* base, size_t capacity, int locked); - -/* - mspace_track_large_chunks controls whether requests for large chunks - are allocated in their own untracked mmapped regions, separate from - others in this mspace. By default large chunks are not tracked, - which reduces fragmentation. However, such chunks are not - necessarily released to the system upon destroy_mspace. Enabling - tracking by setting to true may increase fragmentation, but avoids - leakage when relying on destroy_mspace to release all memory - allocated using this space. The function returns the previous - setting. -*/ -DLMALLOC_EXPORT int mspace_track_large_chunks(mspace msp, int enable); - - -/* - mspace_malloc behaves as malloc, but operates within - the given space. -*/ -DLMALLOC_EXPORT void* mspace_malloc(mspace msp, size_t bytes); - -/* - mspace_free behaves as free, but operates within - the given space. - - If compiled with FOOTERS==1, mspace_free is not actually needed. - free may be called instead of mspace_free because freed chunks from - any space are handled by their originating spaces. -*/ -DLMALLOC_EXPORT void mspace_free(mspace msp, void* mem); - -/* - mspace_realloc behaves as realloc, but operates within - the given space. - - If compiled with FOOTERS==1, mspace_realloc is not actually - needed. realloc may be called instead of mspace_realloc because - realloced chunks from any space are handled by their originating - spaces. -*/ -DLMALLOC_EXPORT void* mspace_realloc(mspace msp, void* mem, size_t newsize); - -/* - mspace_calloc behaves as calloc, but operates within - the given space. -*/ -DLMALLOC_EXPORT void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); - -/* - mspace_memalign behaves as memalign, but operates within - the given space. -*/ -DLMALLOC_EXPORT void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); - -/* - mspace_independent_calloc behaves as independent_calloc, but - operates within the given space. -*/ -DLMALLOC_EXPORT void** mspace_independent_calloc(mspace msp, size_t n_elements, - size_t elem_size, void* chunks[]); - -/* - mspace_independent_comalloc behaves as independent_comalloc, but - operates within the given space. -*/ -DLMALLOC_EXPORT void** mspace_independent_comalloc(mspace msp, size_t n_elements, - size_t sizes[], void* chunks[]); - -/* - mspace_footprint() returns the number of bytes obtained from the - system for this space. -*/ -DLMALLOC_EXPORT size_t mspace_footprint(mspace msp); - -/* - mspace_max_footprint() returns the peak number of bytes obtained from the - system for this space. -*/ -DLMALLOC_EXPORT size_t mspace_max_footprint(mspace msp); - - -#if !NO_MALLINFO -/* - mspace_mallinfo behaves as mallinfo, but reports properties of - the given space. -*/ -DLMALLOC_EXPORT struct mallinfo mspace_mallinfo(mspace msp); -#endif /* NO_MALLINFO */ - -/* - malloc_usable_size(void* p) behaves the same as malloc_usable_size; -*/ -DLMALLOC_EXPORT size_t mspace_usable_size(const void* mem); - -/* - mspace_malloc_stats behaves as malloc_stats, but reports - properties of the given space. -*/ -DLMALLOC_EXPORT void mspace_malloc_stats(mspace msp); - -/* - mspace_trim behaves as malloc_trim, but - operates within the given space. -*/ -DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad); - -/* - An alias for mallopt. -*/ -DLMALLOC_EXPORT int mspace_mallopt(int, int); - -#endif /* MSPACES */ - -#ifdef __cplusplus -} /* end of extern "C" */ -#endif /* __cplusplus */ - -/* - ======================================================================== - To make a fully customizable malloc.h header file, cut everything - above this line, put into file malloc.h, edit to suit, and #include it - on the next line, as well as in programs that use this malloc. - ======================================================================== -*/ - -/* #include "malloc.h" */ - -/*------------------------------ internal #includes ---------------------- */ - -#ifdef _MSC_VER -#pragma warning( disable : 4146 ) /* no "unsigned" warnings */ -#endif /* _MSC_VER */ -#if !NO_MALLOC_STATS -#include <stdio.h> /* for printing in malloc_stats */ -#endif /* NO_MALLOC_STATS */ -#ifndef LACKS_ERRNO_H -#include <errno.h> /* for MALLOC_FAILURE_ACTION */ -#endif /* LACKS_ERRNO_H */ -#ifdef DEBUG -#if ABORT_ON_ASSERT_FAILURE -#undef assert -#define assert(x) if(!(x)) ABORT -#else /* ABORT_ON_ASSERT_FAILURE */ -#include <assert.h> -#endif /* ABORT_ON_ASSERT_FAILURE */ -#else /* DEBUG */ -#ifndef assert -#define assert(x) -#endif -#define DEBUG 0 -#endif /* DEBUG */ -#if !defined(WIN32) && !defined(LACKS_TIME_H) -#include <time.h> /* for magic initialization */ -#endif /* WIN32 */ -#ifndef LACKS_STDLIB_H -#include <stdlib.h> /* for abort() */ -#endif /* LACKS_STDLIB_H */ -#ifndef LACKS_STRING_H -#include <string.h> /* for memset etc */ -#endif /* LACKS_STRING_H */ -#if USE_BUILTIN_FFS -#ifndef LACKS_STRINGS_H -#include <strings.h> /* for ffs */ -#endif /* LACKS_STRINGS_H */ -#endif /* USE_BUILTIN_FFS */ -#if HAVE_MMAP -#ifndef LACKS_SYS_MMAN_H -/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */ -#if (defined(linux) && !defined(__USE_GNU)) -#define __USE_GNU 1 -#include <sys/mman.h> /* for mmap */ -#undef __USE_GNU -#else -#include <sys/mman.h> /* for mmap */ -#endif /* linux */ -#endif /* LACKS_SYS_MMAN_H */ -#ifndef LACKS_FCNTL_H -#include <fcntl.h> -#endif /* LACKS_FCNTL_H */ -#endif /* HAVE_MMAP */ -#ifndef LACKS_UNISTD_H -#include <unistd.h> /* for sbrk, sysconf */ -#else /* LACKS_UNISTD_H */ -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) -extern void* sbrk(ptrdiff_t); -#endif /* FreeBSD etc */ -#endif /* LACKS_UNISTD_H */ - -/* Declarations for locking */ -#if USE_LOCKS -#ifndef WIN32 -#if defined (__SVR4) && defined (__sun) /* solaris */ -#include <thread.h> -#elif !defined(LACKS_SCHED_H) -#include <sched.h> -#endif /* solaris or LACKS_SCHED_H */ -#if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || !USE_SPIN_LOCKS -#include <pthread.h> -#endif /* USE_RECURSIVE_LOCKS ... */ -#elif defined(_MSC_VER) -#ifndef _M_AMD64 -/* These are already defined on AMD64 builds */ -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ -LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp); -LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value); -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* _M_AMD64 */ -#pragma intrinsic (_InterlockedCompareExchange) -#pragma intrinsic (_InterlockedExchange) -#define interlockedcompareexchange _InterlockedCompareExchange -#define interlockedexchange _InterlockedExchange -#elif defined(WIN32) && defined(__GNUC__) -#define interlockedcompareexchange(a, b, c) __sync_val_compare_and_swap(a, c, b) -#define interlockedexchange __sync_lock_test_and_set -#endif /* Win32 */ -#else /* USE_LOCKS */ -#endif /* USE_LOCKS */ - -#ifndef LOCK_AT_FORK -#define LOCK_AT_FORK 0 -#endif - -/* Declarations for bit scanning on win32 */ -#if defined(_MSC_VER) && _MSC_VER>=1300 -#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */ -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ -unsigned char _BitScanForward(unsigned long *index, unsigned long mask); -unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#define BitScanForward _BitScanForward -#define BitScanReverse _BitScanReverse -#pragma intrinsic(_BitScanForward) -#pragma intrinsic(_BitScanReverse) -#endif /* BitScanForward */ -#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */ - -#ifndef WIN32 -#ifndef malloc_getpagesize -# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ -# ifndef _SC_PAGE_SIZE -# define _SC_PAGE_SIZE _SC_PAGESIZE -# endif -# endif -# ifdef _SC_PAGE_SIZE -# define malloc_getpagesize sysconf(_SC_PAGE_SIZE) -# else -# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) - extern size_t getpagesize(); -# define malloc_getpagesize getpagesize() -# else -# ifdef WIN32 /* use supplied emulation of getpagesize */ -# define malloc_getpagesize getpagesize() -# else -# ifndef LACKS_SYS_PARAM_H -# include <sys/param.h> -# endif -# ifdef EXEC_PAGESIZE -# define malloc_getpagesize EXEC_PAGESIZE -# else -# ifdef NBPG -# ifndef CLSIZE -# define malloc_getpagesize NBPG -# else -# define malloc_getpagesize (NBPG * CLSIZE) -# endif -# else -# ifdef NBPC -# define malloc_getpagesize NBPC -# else -# ifdef PAGESIZE -# define malloc_getpagesize PAGESIZE -# else /* just guess */ -# define malloc_getpagesize ((size_t)4096U) -# endif -# endif -# endif -# endif -# endif -# endif -# endif -#endif -#endif - -/* ------------------- size_t and alignment properties -------------------- */ - -/* The byte and bit size of a size_t */ -#define SIZE_T_SIZE (sizeof(size_t)) -#define SIZE_T_BITSIZE (sizeof(size_t) << 3) - -/* Some constants coerced to size_t */ -/* Annoying but necessary to avoid errors on some platforms */ -#define SIZE_T_ZERO ((size_t)0) -#define SIZE_T_ONE ((size_t)1) -#define SIZE_T_TWO ((size_t)2) -#define SIZE_T_FOUR ((size_t)4) -#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) -#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) -#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) -#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U) - -/* The bit mask value corresponding to MALLOC_ALIGNMENT */ -#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) - -/* True if address a has acceptable alignment */ -#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0) - -/* the number of bytes to offset an address to align it */ -#define align_offset(A)\ - ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ - ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) - -/* -------------------------- MMAP preliminaries ------------------------- */ - -/* - If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and - checks to fail so compiler optimizer can delete code rather than - using so many "#if"s. -*/ - - -/* MORECORE and MMAP must return MFAIL on failure */ -#define MFAIL ((void*)(MAX_SIZE_T)) -#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ - -#if HAVE_MMAP - -#ifndef WIN32 -#define MUNMAP_DEFAULT(a, s) munmap((a), (s)) -#define MMAP_PROT (PROT_READ|PROT_WRITE) -#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) -#define MAP_ANONYMOUS MAP_ANON -#endif /* MAP_ANON */ -#ifdef MAP_ANONYMOUS -#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) -#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) -#else /* MAP_ANONYMOUS */ -/* - Nearly all versions of mmap support MAP_ANONYMOUS, so the following - is unlikely to be needed, but is supplied just in case. -*/ -#define MMAP_FLAGS (MAP_PRIVATE) -static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ -#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ - (dev_zero_fd = open("/dev/zero", O_RDWR), \ - mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ - mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) -#endif /* MAP_ANONYMOUS */ - -#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) - -#else /* WIN32 */ - -/* Win32 MMAP via VirtualAlloc */ -static FORCEINLINE void* win32mmap(size_t size) { - void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - return (ptr != 0)? ptr: MFAIL; -} - -/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static FORCEINLINE void* win32direct_mmap(size_t size) { - void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, - PAGE_READWRITE); - return (ptr != 0)? ptr: MFAIL; -} - -/* This function supports releasing coalesed segments */ -static FORCEINLINE int win32munmap(void* ptr, size_t size) { - MEMORY_BASIC_INFORMATION minfo; - char* cptr = (char*)ptr; - while (size) { - if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) - return -1; - if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || - minfo.State != MEM_COMMIT || minfo.RegionSize > size) - return -1; - if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) - return -1; - cptr += minfo.RegionSize; - size -= minfo.RegionSize; - } - return 0; -} - -#define MMAP_DEFAULT(s) win32mmap(s) -#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s)) -#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s) -#endif /* WIN32 */ -#endif /* HAVE_MMAP */ - -#if HAVE_MREMAP -#ifndef WIN32 -#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) -#endif /* WIN32 */ -#endif /* HAVE_MREMAP */ - -/** - * Define CALL_MORECORE - */ -#if HAVE_MORECORE - #ifdef MORECORE - #define CALL_MORECORE(S) MORECORE(S) - #else /* MORECORE */ - #define CALL_MORECORE(S) MORECORE_DEFAULT(S) - #endif /* MORECORE */ -#else /* HAVE_MORECORE */ - #define CALL_MORECORE(S) MFAIL -#endif /* HAVE_MORECORE */ - -/** - * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP - */ -#if HAVE_MMAP - #define USE_MMAP_BIT (SIZE_T_ONE) - - #ifdef MMAP - #define CALL_MMAP(s) MMAP(s) - #else /* MMAP */ - #define CALL_MMAP(s) MMAP_DEFAULT(s) - #endif /* MMAP */ - #ifdef MUNMAP - #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) - #else /* MUNMAP */ - #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s)) - #endif /* MUNMAP */ - #ifdef DIRECT_MMAP - #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) - #else /* DIRECT_MMAP */ - #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s) - #endif /* DIRECT_MMAP */ -#else /* HAVE_MMAP */ - #define USE_MMAP_BIT (SIZE_T_ZERO) - - #define MMAP(s) MFAIL - #define MUNMAP(a, s) (-1) - #define DIRECT_MMAP(s) MFAIL - #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) - #define CALL_MMAP(s) MMAP(s) - #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) -#endif /* HAVE_MMAP */ - -/** - * Define CALL_MREMAP - */ -#if HAVE_MMAP && HAVE_MREMAP - #ifdef MREMAP - #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv)) - #else /* MREMAP */ - #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv)) - #endif /* MREMAP */ -#else /* HAVE_MMAP && HAVE_MREMAP */ - #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL -#endif /* HAVE_MMAP && HAVE_MREMAP */ - -/* mstate bit set if continguous morecore disabled or failed */ -#define USE_NONCONTIGUOUS_BIT (4U) - -/* segment bit set in create_mspace_with_base */ -#define EXTERN_BIT (8U) - - -/* --------------------------- Lock preliminaries ------------------------ */ - -/* - When locks are defined, there is one global lock, plus - one per-mspace lock. - - The global lock_ensures that mparams.magic and other unique - mparams values are initialized only once. It also protects - sequences of calls to MORECORE. In many cases sys_alloc requires - two calls, that should not be interleaved with calls by other - threads. This does not protect against direct calls to MORECORE - by other threads not using this lock, so there is still code to - cope the best we can on interference. - - Per-mspace locks surround calls to malloc, free, etc. - By default, locks are simple non-reentrant mutexes. - - Because lock-protected regions generally have bounded times, it is - OK to use the supplied simple spinlocks. Spinlocks are likely to - improve performance for lightly contended applications, but worsen - performance under heavy contention. - - If USE_LOCKS is > 1, the definitions of lock routines here are - bypassed, in which case you will need to define the type MLOCK_T, - and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK - and TRY_LOCK. You must also declare a - static MLOCK_T malloc_global_mutex = { initialization values };. - -*/ - -#if !USE_LOCKS -#define USE_LOCK_BIT (0U) -#define INITIAL_LOCK(l) (0) -#define DESTROY_LOCK(l) (0) -#define ACQUIRE_MALLOC_GLOBAL_LOCK() -#define RELEASE_MALLOC_GLOBAL_LOCK() - -#else -#if USE_LOCKS > 1 -/* ----------------------- User-defined locks ------------------------ */ -/* Define your own lock implementation here */ -/* #define INITIAL_LOCK(lk) ... */ -/* #define DESTROY_LOCK(lk) ... */ -/* #define ACQUIRE_LOCK(lk) ... */ -/* #define RELEASE_LOCK(lk) ... */ -/* #define TRY_LOCK(lk) ... */ -/* static MLOCK_T malloc_global_mutex = ... */ - -#elif USE_SPIN_LOCKS - -/* First, define CAS_LOCK and CLEAR_LOCK on ints */ -/* Note CAS_LOCK defined to return 0 on success */ - -#if defined(__GNUC__)&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) -#define CAS_LOCK(sl) __sync_lock_test_and_set(sl, 1) -#define CLEAR_LOCK(sl) __sync_lock_release(sl) - -#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) -/* Custom spin locks for older gcc on x86 */ -static FORCEINLINE int x86_cas_lock(int *sl) { - int ret; - int val = 1; - int cmp = 0; - __asm__ __volatile__ ("lock; cmpxchgl %1, %2" - : "=a" (ret) - : "r" (val), "m" (*(sl)), "0"(cmp) - : "memory", "cc"); - return ret; -} - -static FORCEINLINE void x86_clear_lock(int* sl) { - assert(*sl != 0); - int prev = 0; - int ret; - __asm__ __volatile__ ("lock; xchgl %0, %1" - : "=r" (ret) - : "m" (*(sl)), "0"(prev) - : "memory"); -} - -#define CAS_LOCK(sl) x86_cas_lock(sl) -#define CLEAR_LOCK(sl) x86_clear_lock(sl) - -#else /* Win32 MSC */ -#define CAS_LOCK(sl) interlockedexchange(sl, (LONG)1) -#define CLEAR_LOCK(sl) interlockedexchange (sl, (LONG)0) - -#endif /* ... gcc spins locks ... */ - -/* How to yield for a spin lock */ -#define SPINS_PER_YIELD 63 -#if defined(_MSC_VER) -#define SLEEP_EX_DURATION 50 /* delay for yield/sleep */ -#define SPIN_LOCK_YIELD SleepEx(SLEEP_EX_DURATION, FALSE) -#elif defined (__SVR4) && defined (__sun) /* solaris */ -#define SPIN_LOCK_YIELD thr_yield(); -#elif !defined(LACKS_SCHED_H) -#define SPIN_LOCK_YIELD sched_yield(); -#else -#define SPIN_LOCK_YIELD -#endif /* ... yield ... */ - -#if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0 -/* Plain spin locks use single word (embedded in malloc_states) */ -static int spin_acquire_lock(int *sl) { - int spins = 0; - while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) { - if ((++spins & SPINS_PER_YIELD) == 0) { - SPIN_LOCK_YIELD; - } - } - return 0; -} - -#define MLOCK_T int -#define TRY_LOCK(sl) !CAS_LOCK(sl) -#define RELEASE_LOCK(sl) CLEAR_LOCK(sl) -#define ACQUIRE_LOCK(sl) (CAS_LOCK(sl)? spin_acquire_lock(sl) : 0) -#define INITIAL_LOCK(sl) (*sl = 0) -#define DESTROY_LOCK(sl) (0) -static MLOCK_T malloc_global_mutex = 0; - -#else /* USE_RECURSIVE_LOCKS */ -/* types for lock owners */ -#ifdef WIN32 -#define THREAD_ID_T DWORD -#define CURRENT_THREAD GetCurrentThreadId() -#define EQ_OWNER(X,Y) ((X) == (Y)) -#else -/* - Note: the following assume that pthread_t is a type that can be - initialized to (casted) zero. If this is not the case, you will need to - somehow redefine these or not use spin locks. -*/ -#define THREAD_ID_T pthread_t -#define CURRENT_THREAD pthread_self() -#define EQ_OWNER(X,Y) pthread_equal(X, Y) -#endif - -struct malloc_recursive_lock { - int sl; - unsigned int c; - THREAD_ID_T threadid; -}; - -#define MLOCK_T struct malloc_recursive_lock -static MLOCK_T malloc_global_mutex = { 0, 0, (THREAD_ID_T)0}; - -static FORCEINLINE void recursive_release_lock(MLOCK_T *lk) { - assert(lk->sl != 0); - if (--lk->c == 0) { - CLEAR_LOCK(&lk->sl); - } -} - -static FORCEINLINE int recursive_acquire_lock(MLOCK_T *lk) { - THREAD_ID_T mythreadid = CURRENT_THREAD; - int spins = 0; - for (;;) { - if (*((volatile int *)(&lk->sl)) == 0) { - if (!CAS_LOCK(&lk->sl)) { - lk->threadid = mythreadid; - lk->c = 1; - return 0; - } - } - else if (EQ_OWNER(lk->threadid, mythreadid)) { - ++lk->c; - return 0; - } - if ((++spins & SPINS_PER_YIELD) == 0) { - SPIN_LOCK_YIELD; - } - } -} - -static FORCEINLINE int recursive_try_lock(MLOCK_T *lk) { - THREAD_ID_T mythreadid = CURRENT_THREAD; - if (*((volatile int *)(&lk->sl)) == 0) { - if (!CAS_LOCK(&lk->sl)) { - lk->threadid = mythreadid; - lk->c = 1; - return 1; - } - } - else if (EQ_OWNER(lk->threadid, mythreadid)) { - ++lk->c; - return 1; - } - return 0; -} - -#define RELEASE_LOCK(lk) recursive_release_lock(lk) -#define TRY_LOCK(lk) recursive_try_lock(lk) -#define ACQUIRE_LOCK(lk) recursive_acquire_lock(lk) -#define INITIAL_LOCK(lk) ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0) -#define DESTROY_LOCK(lk) (0) -#endif /* USE_RECURSIVE_LOCKS */ - -#elif defined(WIN32) /* Win32 critical sections */ -#define MLOCK_T CRITICAL_SECTION -#define ACQUIRE_LOCK(lk) (EnterCriticalSection(lk), 0) -#define RELEASE_LOCK(lk) LeaveCriticalSection(lk) -#define TRY_LOCK(lk) TryEnterCriticalSection(lk) -#define INITIAL_LOCK(lk) (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000|4000)) -#define DESTROY_LOCK(lk) (DeleteCriticalSection(lk), 0) -#define NEED_GLOBAL_LOCK_INIT - -static MLOCK_T malloc_global_mutex; -static volatile LONG malloc_global_mutex_status; - -/* Use spin loop to initialize global lock */ -static void init_malloc_global_mutex() { - for (;;) { - long stat = malloc_global_mutex_status; - if (stat > 0) - return; - /* transition to < 0 while initializing, then to > 0) */ - if (stat == 0 && - interlockedcompareexchange(&malloc_global_mutex_status, (LONG)-1, (LONG)0) == 0) { - InitializeCriticalSection(&malloc_global_mutex); - interlockedexchange(&malloc_global_mutex_status, (LONG)1); - return; - } - SleepEx(0, FALSE); - } -} - -#else /* pthreads-based locks */ -#define MLOCK_T pthread_mutex_t -#define ACQUIRE_LOCK(lk) pthread_mutex_lock(lk) -#define RELEASE_LOCK(lk) pthread_mutex_unlock(lk) -#define TRY_LOCK(lk) (!pthread_mutex_trylock(lk)) -#define INITIAL_LOCK(lk) pthread_init_lock(lk) -#define DESTROY_LOCK(lk) pthread_mutex_destroy(lk) - -#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE) -/* Cope with old-style linux recursive lock initialization by adding */ -/* skipped internal declaration from pthread.h */ -extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr, - int __kind)); -#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP -#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y) -#endif /* USE_RECURSIVE_LOCKS ... */ - -static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER; - -static int pthread_init_lock (MLOCK_T *lk) { - pthread_mutexattr_t attr; - if (pthread_mutexattr_init(&attr)) return 1; -#if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 - if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1; -#endif - if (pthread_mutex_init(lk, &attr)) return 1; - if (pthread_mutexattr_destroy(&attr)) return 1; - return 0; -} - -#endif /* ... lock types ... */ - -/* Common code for all lock types */ -#define USE_LOCK_BIT (2U) - -#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK -#define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex); -#endif - -#ifndef RELEASE_MALLOC_GLOBAL_LOCK -#define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex); -#endif - -#endif /* USE_LOCKS */ - -/* ----------------------- Chunk representations ------------------------ */ - -/* - (The following includes lightly edited explanations by Colin Plumb.) - - The malloc_chunk declaration below is misleading (but accurate and - necessary). It declares a "view" into memory allowing access to - necessary fields at known offsets from a given base. - - Chunks of memory are maintained using a `boundary tag' method as - originally described by Knuth. (See the paper by Paul Wilson - ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such - techniques.) Sizes of free chunks are stored both in the front of - each chunk and at the end. This makes consolidating fragmented - chunks into bigger chunks fast. The head fields also hold bits - representing whether chunks are free or in use. - - Here are some pictures to make it clearer. They are "exploded" to - show that the state of a chunk can be thought of as extending from - the high 31 bits of the head field of its header through the - prev_foot and PINUSE_BIT bit of the following chunk header. - - A chunk that's in use looks like: - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk (if P = 0) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| - | Size of this chunk 1| +-+ - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - +- -+ - | | - +- -+ - | : - +- size - sizeof(size_t) available payload bytes -+ - : | - chunk-> +- -+ - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| - | Size of next chunk (may or may not be in use) | +-+ - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - And if it's free, it looks like this: - - chunk-> +- -+ - | User payload (must be in use, or we would have merged!) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| - | Size of this chunk 0| +-+ - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next pointer | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Prev pointer | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | : - +- size - sizeof(struct chunk) unused bytes -+ - : | - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of this chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| - | Size of next chunk (must be in use, or we would have merged)| +-+ - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | : - +- User payload -+ - : | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |0| - +-+ - Note that since we always merge adjacent free chunks, the chunks - adjacent to a free chunk must be in use. - - Given a pointer to a chunk (which can be derived trivially from the - payload pointer) we can, in O(1) time, find out whether the adjacent - chunks are free, and if so, unlink them from the lists that they - are on and merge them with the current chunk. - - Chunks always begin on even word boundaries, so the mem portion - (which is returned to the user) is also on an even word boundary, and - thus at least double-word aligned. - - The P (PINUSE_BIT) bit, stored in the unused low-order bit of the - chunk size (which is always a multiple of two words), is an in-use - bit for the *previous* chunk. If that bit is *clear*, then the - word before the current chunk size contains the previous chunk - size, and can be used to find the front of the previous chunk. - The very first chunk allocated always has this bit set, preventing - access to non-existent (or non-owned) memory. If pinuse is set for - any given chunk, then you CANNOT determine the size of the - previous chunk, and might even get a memory addressing fault when - trying to do so. - - The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of - the chunk size redundantly records whether the current chunk is - inuse (unless the chunk is mmapped). This redundancy enables usage - checks within free and realloc, and reduces indirection when freeing - and consolidating chunks. - - Each freshly allocated chunk must have both cinuse and pinuse set. - That is, each allocated chunk borders either a previously allocated - and still in-use chunk, or the base of its memory arena. This is - ensured by making all allocations from the `lowest' part of any - found chunk. Further, no free chunk physically borders another one, - so each free chunk is known to be preceded and followed by either - inuse chunks or the ends of memory. - - Note that the `foot' of the current chunk is actually represented - as the prev_foot of the NEXT chunk. This makes it easier to - deal with alignments etc but can be very confusing when trying - to extend or adapt this code. - - The exceptions to all this are - - 1. The special chunk `top' is the top-most available chunk (i.e., - the one bordering the end of available memory). It is treated - specially. Top is never included in any bin, is used only if - no other chunk is available, and is released back to the - system if it is very large (see M_TRIM_THRESHOLD). In effect, - the top chunk is treated as larger (and thus less well - fitting) than any other available chunk. The top chunk - doesn't update its trailing size field since there is no next - contiguous chunk that would have to index off it. However, - space is still allocated for it (TOP_FOOT_SIZE) to enable - separation or merging when space is extended. - - 3. Chunks allocated via mmap, have both cinuse and pinuse bits - cleared in their head fields. Because they are allocated - one-by-one, each must carry its own prev_foot field, which is - also used to hold the offset this chunk has within its mmapped - region, which is needed to preserve alignment. Each mmapped - chunk is trailed by the first two fields of a fake next-chunk - for sake of usage checks. - -*/ - -struct malloc_chunk { - size_t prev_foot; /* Size of previous chunk (if free). */ - size_t head; /* Size and inuse bits. */ - struct malloc_chunk* fd; /* double links -- used only if free. */ - struct malloc_chunk* bk; -}; - -typedef struct malloc_chunk mchunk; -typedef struct malloc_chunk* mchunkptr; -typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */ -typedef unsigned int bindex_t; /* Described below */ -typedef unsigned int binmap_t; /* Described below */ -typedef unsigned int flag_t; /* The type of various bit flag sets */ - -/* ------------------- Chunks sizes and alignments ----------------------- */ - -#define MCHUNK_SIZE (sizeof(mchunk)) - -#if FOOTERS -#define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) -#else /* FOOTERS */ -#define CHUNK_OVERHEAD (SIZE_T_SIZE) -#endif /* FOOTERS */ - -/* MMapped chunks need a second word of overhead ... */ -#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) -/* ... and additional padding for fake next-chunk at foot */ -#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES) - -/* The smallest size we can malloc is an aligned minimal chunk */ -#define MIN_CHUNK_SIZE\ - ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) - -/* conversion from malloc headers to user pointers, and back */ -#define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES)) -#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES)) -/* chunk associated with aligned address A */ -#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) - -/* Bounds on request (not chunk) sizes. */ -#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2) -#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) - -/* pad request bytes into a usable size */ -#define pad_request(req) \ - (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) - -/* pad request, checking for minimum (but not maximum) */ -#define request2size(req) \ - (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) - - -/* ------------------ Operations on head and foot fields ----------------- */ - -/* - The head field of a chunk is or'ed with PINUSE_BIT when previous - adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in - use, unless mmapped, in which case both bits are cleared. - - FLAG4_BIT is not used by this malloc, but might be useful in extensions. -*/ - -#define PINUSE_BIT (SIZE_T_ONE) -#define CINUSE_BIT (SIZE_T_TWO) -#define FLAG4_BIT (SIZE_T_FOUR) -#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) -#define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT) - -/* Head value for fenceposts */ -#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) - -/* extraction of fields from head words */ -#define cinuse(p) ((p)->head & CINUSE_BIT) -#define pinuse(p) ((p)->head & PINUSE_BIT) -#define flag4inuse(p) ((p)->head & FLAG4_BIT) -#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT) -#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0) - -#define chunksize(p) ((p)->head & ~(FLAG_BITS)) - -#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) -#define set_flag4(p) ((p)->head |= FLAG4_BIT) -#define clear_flag4(p) ((p)->head &= ~FLAG4_BIT) - -/* Treat space at ptr +/- offset as a chunk */ -#define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) -#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s))) - -/* Ptr to next or previous physical malloc_chunk. */ -#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS))) -#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) )) - -/* extract next chunk's pinuse bit */ -#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) - -/* Get/set size at footer */ -#define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot) -#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s)) - -/* Set size, pinuse bit, and foot */ -#define set_size_and_pinuse_of_free_chunk(p, s)\ - ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) - -/* Set size, pinuse bit, foot, and clear next pinuse */ -#define set_free_with_pinuse(p, s, n)\ - (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) - -/* Get the internal overhead associated with chunk p */ -#define overhead_for(p)\ - (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD) - -/* Return true if malloced space is not necessarily cleared */ -#if MMAP_CLEARS -#define calloc_must_clear(p) (!is_mmapped(p)) -#else /* MMAP_CLEARS */ -#define calloc_must_clear(p) (1) -#endif /* MMAP_CLEARS */ - -/* ---------------------- Overlaid data structures ----------------------- */ - -/* - When chunks are not in use, they are treated as nodes of either - lists or trees. - - "Small" chunks are stored in circular doubly-linked lists, and look - like this: - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `head:' | Size of chunk, in bytes |P| - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Forward pointer to next chunk in list | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Back pointer to previous chunk in list | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Unused space (may be 0 bytes long) . - . . - . | -nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `foot:' | Size of chunk, in bytes | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Larger chunks are kept in a form of bitwise digital trees (aka - tries) keyed on chunksizes. Because malloc_tree_chunks are only for - free chunks greater than 256 bytes, their size doesn't impose any - constraints on user chunk sizes. Each node looks like: - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `head:' | Size of chunk, in bytes |P| - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Forward pointer to next chunk of same size | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Back pointer to previous chunk of same size | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Pointer to left child (child[0]) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Pointer to right child (child[1]) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Pointer to parent | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | bin index of this chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Unused space . - . | -nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `foot:' | Size of chunk, in bytes | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Each tree holding treenodes is a tree of unique chunk sizes. Chunks - of the same size are arranged in a circularly-linked list, with only - the oldest chunk (the next to be used, in our FIFO ordering) - actually in the tree. (Tree members are distinguished by a non-null - parent pointer.) If a chunk with the same size an an existing node - is inserted, it is linked off the existing node using pointers that - work in the same way as fd/bk pointers of small chunks. - - Each tree contains a power of 2 sized range of chunk sizes (the - smallest is 0x100 <= x < 0x180), which is is divided in half at each - tree level, with the chunks in the smaller half of the range (0x100 - <= x < 0x140 for the top nose) in the left subtree and the larger - half (0x140 <= x < 0x180) in the right subtree. This is, of course, - done by inspecting individual bits. - - Using these rules, each node's left subtree contains all smaller - sizes than its right subtree. However, the node at the root of each - subtree has no particular ordering relationship to either. (The - dividing line between the subtree sizes is based on trie relation.) - If we remove the last chunk of a given size from the interior of the - tree, we need to replace it with a leaf node. The tree ordering - rules permit a node to be replaced by any leaf below it. - - The smallest chunk in a tree (a common operation in a best-fit - allocator) can be found by walking a path to the leftmost leaf in - the tree. Unlike a usual binary tree, where we follow left child - pointers until we reach a null, here we follow the right child - pointer any time the left one is null, until we reach a leaf with - both child pointers null. The smallest chunk in the tree will be - somewhere along that path. - - The worst case number of steps to add, find, or remove a node is - bounded by the number of bits differentiating chunks within - bins. Under current bin calculations, this ranges from 6 up to 21 - (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case - is of course much better. -*/ - -struct malloc_tree_chunk { - /* The first four fields must be compatible with malloc_chunk */ - size_t prev_foot; - size_t head; - struct malloc_tree_chunk* fd; - struct malloc_tree_chunk* bk; - - struct malloc_tree_chunk* child[2]; - struct malloc_tree_chunk* parent; - bindex_t index; -}; - -typedef struct malloc_tree_chunk tchunk; -typedef struct malloc_tree_chunk* tchunkptr; -typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ - -/* A little helper macro for trees */ -#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) - -/* ----------------------------- Segments -------------------------------- */ - -/* - Each malloc space may include non-contiguous segments, held in a - list headed by an embedded malloc_segment record representing the - top-most space. Segments also include flags holding properties of - the space. Large chunks that are directly allocated by mmap are not - included in this list. They are instead independently created and - destroyed without otherwise keeping track of them. - - Segment management mainly comes into play for spaces allocated by - MMAP. Any call to MMAP might or might not return memory that is - adjacent to an existing segment. MORECORE normally contiguously - extends the current space, so this space is almost always adjacent, - which is simpler and faster to deal with. (This is why MORECORE is - used preferentially to MMAP when both are available -- see - sys_alloc.) When allocating using MMAP, we don't use any of the - hinting mechanisms (inconsistently) supported in various - implementations of unix mmap, or distinguish reserving from - committing memory. Instead, we just ask for space, and exploit - contiguity when we get it. It is probably possible to do - better than this on some systems, but no general scheme seems - to be significantly better. - - Management entails a simpler variant of the consolidation scheme - used for chunks to reduce fragmentation -- new adjacent memory is - normally prepended or appended to an existing segment. However, - there are limitations compared to chunk consolidation that mostly - reflect the fact that segment processing is relatively infrequent - (occurring only when getting memory from system) and that we - don't expect to have huge numbers of segments: - - * Segments are not indexed, so traversal requires linear scans. (It - would be possible to index these, but is not worth the extra - overhead and complexity for most programs on most platforms.) - * New segments are only appended to old ones when holding top-most - memory; if they cannot be prepended to others, they are held in - different segments. - - Except for the top-most segment of an mstate, each segment record - is kept at the tail of its segment. Segments are added by pushing - segment records onto the list headed by &mstate.seg for the - containing mstate. - - Segment flags control allocation/merge/deallocation policies: - * If EXTERN_BIT set, then we did not allocate this segment, - and so should not try to deallocate or merge with others. - (This currently holds only for the initial segment passed - into create_mspace_with_base.) - * If USE_MMAP_BIT set, the segment may be merged with - other surrounding mmapped segments and trimmed/de-allocated - using munmap. - * If neither bit is set, then the segment was obtained using - MORECORE so can be merged with surrounding MORECORE'd segments - and deallocated/trimmed using MORECORE with negative arguments. -*/ - -struct malloc_segment { - char* base; /* base address */ - size_t size; /* allocated size */ - struct malloc_segment* next; /* ptr to next segment */ - flag_t sflags; /* mmap and extern flag */ -}; - -#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT) -#define is_extern_segment(S) ((S)->sflags & EXTERN_BIT) - -typedef struct malloc_segment msegment; -typedef struct malloc_segment* msegmentptr; - -/* ---------------------------- malloc_state ----------------------------- */ - -/* - A malloc_state holds all of the bookkeeping for a space. - The main fields are: - - Top - The topmost chunk of the currently active segment. Its size is - cached in topsize. The actual size of topmost space is - topsize+TOP_FOOT_SIZE, which includes space reserved for adding - fenceposts and segment records if necessary when getting more - space from the system. The size at which to autotrim top is - cached from mparams in trim_check, except that it is disabled if - an autotrim fails. - - Designated victim (dv) - This is the preferred chunk for servicing small requests that - don't have exact fits. It is normally the chunk split off most - recently to service another small request. Its size is cached in - dvsize. The link fields of this chunk are not maintained since it - is not kept in a bin. - - SmallBins - An array of bin headers for free chunks. These bins hold chunks - with sizes less than MIN_LARGE_SIZE bytes. Each bin contains - chunks of all the same size, spaced 8 bytes apart. To simplify - use in double-linked lists, each bin header acts as a malloc_chunk - pointing to the real first node, if it exists (else pointing to - itself). This avoids special-casing for headers. But to avoid - waste, we allocate only the fd/bk pointers of bins, and then use - repositioning tricks to treat these as the fields of a chunk. - - TreeBins - Treebins are pointers to the roots of trees holding a range of - sizes. There are 2 equally spaced treebins for each power of two - from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything - larger. - - Bin maps - There is one bit map for small bins ("smallmap") and one for - treebins ("treemap). Each bin sets its bit when non-empty, and - clears the bit when empty. Bit operations are then used to avoid - bin-by-bin searching -- nearly all "search" is done without ever - looking at bins that won't be selected. The bit maps - conservatively use 32 bits per map word, even if on 64bit system. - For a good description of some of the bit-based techniques used - here, see Henry S. Warren Jr's book "Hacker's Delight" (and - supplement at http://hackersdelight.org/). Many of these are - intended to reduce the branchiness of paths through malloc etc, as - well as to reduce the number of memory locations read or written. - - Segments - A list of segments headed by an embedded malloc_segment record - representing the initial space. - - Address check support - The least_addr field is the least address ever obtained from - MORECORE or MMAP. Attempted frees and reallocs of any address less - than this are trapped (unless INSECURE is defined). - - Magic tag - A cross-check field that should always hold same value as mparams.magic. - - Max allowed footprint - The maximum allowed bytes to allocate from system (zero means no limit) - - Flags - Bits recording whether to use MMAP, locks, or contiguous MORECORE - - Statistics - Each space keeps track of current and maximum system memory - obtained via MORECORE or MMAP. - - Trim support - Fields holding the amount of unused topmost memory that should trigger - trimming, and a counter to force periodic scanning to release unused - non-topmost segments. - - Locking - If USE_LOCKS is defined, the "mutex" lock is acquired and released - around every public call using this mspace. - - Extension support - A void* pointer and a size_t field that can be used to help implement - extensions to this malloc. -*/ - -/* Bin types, widths and sizes */ -#define NSMALLBINS (32U) -#define NTREEBINS (32U) -#define SMALLBIN_SHIFT (3U) -#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) -#define TREEBIN_SHIFT (8U) -#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) -#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) -#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) - -struct malloc_state { - binmap_t smallmap; - binmap_t treemap; - size_t dvsize; - size_t topsize; - char* least_addr; - mchunkptr dv; - mchunkptr top; - size_t trim_check; - size_t release_checks; - size_t magic; - mchunkptr smallbins[(NSMALLBINS+1)*2]; - tbinptr treebins[NTREEBINS]; - size_t footprint; - size_t max_footprint; - size_t footprint_limit; /* zero means no limit */ - flag_t mflags; -#if USE_LOCKS - MLOCK_T mutex; /* locate lock among fields that rarely change */ -#endif /* USE_LOCKS */ - msegment seg; - void* extp; /* Unused but available for extensions */ - size_t exts; -}; - -typedef struct malloc_state* mstate; - -/* ------------- Global malloc_state and malloc_params ------------------- */ - -/* - malloc_params holds global properties, including those that can be - dynamically set using mallopt. There is a single instance, mparams, - initialized in init_mparams. Note that the non-zeroness of "magic" - also serves as an initialization flag. -*/ - -struct malloc_params { - size_t magic; - size_t page_size; - size_t granularity; - size_t mmap_threshold; - size_t trim_threshold; - flag_t default_mflags; -}; - -static struct malloc_params mparams; - -/* Ensure mparams initialized */ -#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams()) - -#if !ONLY_MSPACES - -/* The global malloc_state used for all non-"mspace" calls */ -static struct malloc_state _gm_; -#define gm (&_gm_) -#define is_global(M) ((M) == &_gm_) - -#endif /* !ONLY_MSPACES */ - -#define is_initialized(M) ((M)->top != 0) - -/* -------------------------- system alloc setup ------------------------- */ - -/* Operations on mflags */ - -#define use_lock(M) ((M)->mflags & USE_LOCK_BIT) -#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT) -#if USE_LOCKS -#define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT) -#else -#define disable_lock(M) -#endif - -#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT) -#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT) -#if HAVE_MMAP -#define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT) -#else -#define disable_mmap(M) -#endif - -#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT) -#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT) - -#define set_lock(M,L)\ - ((M)->mflags = (L)?\ - ((M)->mflags | USE_LOCK_BIT) :\ - ((M)->mflags & ~USE_LOCK_BIT)) - -/* page-align a size */ -#define page_align(S)\ - (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE)) - -/* granularity-align a size */ -#define granularity_align(S)\ - (((S) + (mparams.granularity - SIZE_T_ONE))\ - & ~(mparams.granularity - SIZE_T_ONE)) - - -/* For mmap, use granularity alignment on windows, else page-align */ -#ifdef WIN32 -#define mmap_align(S) granularity_align(S) -#else -#define mmap_align(S) page_align(S) -#endif - -/* For sys_alloc, enough padding to ensure can malloc request on success */ -#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT) - -#define is_page_aligned(S)\ - (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0) -#define is_granularity_aligned(S)\ - (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0) - -/* True if segment S holds address A */ -#define segment_holds(S, A)\ - ((char*)(A) >= S->base && (char*)(A) < S->base + S->size) - -/* Return segment holding given address */ -static msegmentptr segment_holding(mstate m, char* addr) { - msegmentptr sp = &m->seg; - for (;;) { - if (addr >= sp->base && addr < sp->base + sp->size) - return sp; - if ((sp = sp->next) == 0) - return 0; - } -} - -/* Return true if segment contains a segment link */ -static int has_segment_link(mstate m, msegmentptr ss) { - msegmentptr sp = &m->seg; - for (;;) { - if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size) - return 1; - if ((sp = sp->next) == 0) - return 0; - } -} - -#ifndef MORECORE_CANNOT_TRIM -#define should_trim(M,s) ((s) > (M)->trim_check) -#else /* MORECORE_CANNOT_TRIM */ -#define should_trim(M,s) (0) -#endif /* MORECORE_CANNOT_TRIM */ - -/* - TOP_FOOT_SIZE is padding at the end of a segment, including space - that may be needed to place segment records and fenceposts when new - noncontiguous segments are added. -*/ -#define TOP_FOOT_SIZE\ - (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) - - -/* ------------------------------- Hooks -------------------------------- */ - -/* - PREACTION should be defined to return 0 on success, and nonzero on - failure. If you are not using locking, you can redefine these to do - anything you like. -*/ - -#if USE_LOCKS -#define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) -#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); } -#else /* USE_LOCKS */ - -#ifndef PREACTION -#define PREACTION(M) (0) -#endif /* PREACTION */ - -#ifndef POSTACTION -#define POSTACTION(M) -#endif /* POSTACTION */ - -#endif /* USE_LOCKS */ - -/* - CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses. - USAGE_ERROR_ACTION is triggered on detected bad frees and - reallocs. The argument p is an address that might have triggered the - fault. It is ignored by the two predefined actions, but might be - useful in custom actions that try to help diagnose errors. -*/ - -#if PROCEED_ON_ERROR - -/* A count of the number of corruption errors causing resets */ -int malloc_corruption_error_count; - -/* default corruption action */ -static void reset_on_error(mstate m); - -#define CORRUPTION_ERROR_ACTION(m) reset_on_error(m) -#define USAGE_ERROR_ACTION(m, p) - -#else /* PROCEED_ON_ERROR */ - -#ifndef CORRUPTION_ERROR_ACTION -#define CORRUPTION_ERROR_ACTION(m) ABORT -#endif /* CORRUPTION_ERROR_ACTION */ - -#ifndef USAGE_ERROR_ACTION -#define USAGE_ERROR_ACTION(m,p) ABORT -#endif /* USAGE_ERROR_ACTION */ - -#endif /* PROCEED_ON_ERROR */ - - -/* -------------------------- Debugging setup ---------------------------- */ - -#if ! DEBUG - -#define check_free_chunk(M,P) -#define check_inuse_chunk(M,P) -#define check_malloced_chunk(M,P,N) -#define check_mmapped_chunk(M,P) -#define check_malloc_state(M) -#define check_top_chunk(M,P) - -#else /* DEBUG */ -#define check_free_chunk(M,P) do_check_free_chunk(M,P) -#define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P) -#define check_top_chunk(M,P) do_check_top_chunk(M,P) -#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N) -#define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P) -#define check_malloc_state(M) do_check_malloc_state(M) - -static void do_check_any_chunk(mstate m, mchunkptr p); -static void do_check_top_chunk(mstate m, mchunkptr p); -static void do_check_mmapped_chunk(mstate m, mchunkptr p); -static void do_check_inuse_chunk(mstate m, mchunkptr p); -static void do_check_free_chunk(mstate m, mchunkptr p); -static void do_check_malloced_chunk(mstate m, void* mem, size_t s); -static void do_check_tree(mstate m, tchunkptr t); -static void do_check_treebin(mstate m, bindex_t i); -static void do_check_smallbin(mstate m, bindex_t i); -static void do_check_malloc_state(mstate m); -static int bin_find(mstate m, mchunkptr x); -static size_t traverse_and_check(mstate m); -#endif /* DEBUG */ - -/* ---------------------------- Indexing Bins ---------------------------- */ - -#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) -#define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT) -#define small_index2size(i) ((i) << SMALLBIN_SHIFT) -#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) - -/* addressing by index. See above about smallbin repositioning */ -#define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1]))) -#define treebin_at(M,i) (&((M)->treebins[i])) - -/* assign tree index for size S to variable I. Use x86 asm if possible */ -#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) -#define compute_tree_index(S, I)\ -{\ - unsigned int X = S >> TREEBIN_SHIFT;\ - if (X == 0)\ - I = 0;\ - else if (X > 0xFFFF)\ - I = NTREEBINS-1;\ - else {\ - unsigned int K = (unsigned) sizeof(X)*__CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X); \ - I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ - }\ -} - -#elif defined (__INTEL_COMPILER) -#define compute_tree_index(S, I)\ -{\ - size_t X = S >> TREEBIN_SHIFT;\ - if (X == 0)\ - I = 0;\ - else if (X > 0xFFFF)\ - I = NTREEBINS-1;\ - else {\ - unsigned int K = _bit_scan_reverse (X); \ - I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ - }\ -} - -#elif defined(_MSC_VER) && _MSC_VER>=1300 -#define compute_tree_index(S, I)\ -{\ - size_t X = S >> TREEBIN_SHIFT;\ - if (X == 0)\ - I = 0;\ - else if (X > 0xFFFF)\ - I = NTREEBINS-1;\ - else {\ - unsigned int K;\ - _BitScanReverse((DWORD *) &K, (DWORD) X);\ - I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ - }\ -} - -#else /* GNUC */ -#define compute_tree_index(S, I)\ -{\ - size_t X = S >> TREEBIN_SHIFT;\ - if (X == 0)\ - I = 0;\ - else if (X > 0xFFFF)\ - I = NTREEBINS-1;\ - else {\ - unsigned int Y = (unsigned int)X;\ - unsigned int N = ((Y - 0x100) >> 16) & 8;\ - unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\ - N += K;\ - N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\ - K = 14 - N + ((Y <<= K) >> 15);\ - I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\ - }\ -} -#endif /* GNUC */ - -/* Bit representing maximum resolved size in a treebin at i */ -#define bit_for_tree_index(i) \ - (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) - -/* Shift placing maximum resolved bit in a treebin at i as sign bit */ -#define leftshift_for_tree_index(i) \ - ((i == NTREEBINS-1)? 0 : \ - ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) - -/* The size of the smallest chunk held in bin with index i */ -#define minsize_for_tree_index(i) \ - ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ - (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) - - -/* ------------------------ Operations on bin maps ----------------------- */ - -/* bit corresponding to given index */ -#define idx2bit(i) ((binmap_t)(1) << (i)) - -/* Mark/Clear bits with given index */ -#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) -#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) -#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) - -#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) -#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) -#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) - -/* isolate the least set bit of a bitmap */ -#define least_bit(x) ((x) & -(x)) - -/* mask with all bits to left of least bit of x on */ -#define left_bits(x) ((x<<1) | -(x<<1)) - -/* mask with all bits to left of or equal to least bit of x on */ -#define same_or_left_bits(x) ((x) | -(x)) - -/* index corresponding to given bit. Use x86 asm if possible */ - -#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) -#define compute_bit2idx(X, I)\ -{\ - unsigned int J;\ - J = __builtin_ctz(X); \ - I = (bindex_t)J;\ -} - -#elif defined (__INTEL_COMPILER) -#define compute_bit2idx(X, I)\ -{\ - unsigned int J;\ - J = _bit_scan_forward (X); \ - I = (bindex_t)J;\ -} - -#elif defined(_MSC_VER) && _MSC_VER>=1300 -#define compute_bit2idx(X, I)\ -{\ - unsigned int J;\ - _BitScanForward((DWORD *) &J, X);\ - I = (bindex_t)J;\ -} - -#elif USE_BUILTIN_FFS -#define compute_bit2idx(X, I) I = ffs(X)-1 - -#else -#define compute_bit2idx(X, I)\ -{\ - unsigned int Y = X - 1;\ - unsigned int K = Y >> (16-4) & 16;\ - unsigned int N = K; Y >>= K;\ - N += K = Y >> (8-3) & 8; Y >>= K;\ - N += K = Y >> (4-2) & 4; Y >>= K;\ - N += K = Y >> (2-1) & 2; Y >>= K;\ - N += K = Y >> (1-0) & 1; Y >>= K;\ - I = (bindex_t)(N + Y);\ -} -#endif /* GNUC */ - - -/* ----------------------- Runtime Check Support ------------------------- */ - -/* - For security, the main invariant is that malloc/free/etc never - writes to a static address other than malloc_state, unless static - malloc_state itself has been corrupted, which cannot occur via - malloc (because of these checks). In essence this means that we - believe all pointers, sizes, maps etc held in malloc_state, but - check all of those linked or offsetted from other embedded data - structures. These checks are interspersed with main code in a way - that tends to minimize their run-time cost. - - When FOOTERS is defined, in addition to range checking, we also - verify footer fields of inuse chunks, which can be used guarantee - that the mstate controlling malloc/free is intact. This is a - streamlined version of the approach described by William Robertson - et al in "Run-time Detection of Heap-based Overflows" LISA'03 - http://www.usenix.org/events/lisa03/tech/robertson.html The footer - of an inuse chunk holds the xor of its mstate and a random seed, - that is checked upon calls to free() and realloc(). This is - (probabalistically) unguessable from outside the program, but can be - computed by any code successfully malloc'ing any chunk, so does not - itself provide protection against code that has already broken - security through some other means. Unlike Robertson et al, we - always dynamically check addresses of all offset chunks (previous, - next, etc). This turns out to be cheaper than relying on hashes. -*/ - -#if !INSECURE -/* Check if address a is at least as high as any from MORECORE or MMAP */ -#define ok_address(M, a) ((char*)(a) >= (M)->least_addr) -/* Check if address of next chunk n is higher than base chunk p */ -#define ok_next(p, n) ((char*)(p) < (char*)(n)) -/* Check if p has inuse status */ -#define ok_inuse(p) is_inuse(p) -/* Check if p has its pinuse bit on */ -#define ok_pinuse(p) pinuse(p) - -#else /* !INSECURE */ -#define ok_address(M, a) (1) -#define ok_next(b, n) (1) -#define ok_inuse(p) (1) -#define ok_pinuse(p) (1) -#endif /* !INSECURE */ - -#if (FOOTERS && !INSECURE) -/* Check if (alleged) mstate m has expected magic field */ -#define ok_magic(M) ((M)->magic == mparams.magic) -#else /* (FOOTERS && !INSECURE) */ -#define ok_magic(M) (1) -#endif /* (FOOTERS && !INSECURE) */ - -/* In gcc, use __builtin_expect to minimize impact of checks */ -#if !INSECURE -#if defined(__GNUC__) && __GNUC__ >= 3 -#define RTCHECK(e) __builtin_expect(e, 1) -#else /* GNUC */ -#define RTCHECK(e) (e) -#endif /* GNUC */ -#else /* !INSECURE */ -#define RTCHECK(e) (1) -#endif /* !INSECURE */ - -/* macros to set up inuse chunks with or without footers */ - -#if !FOOTERS - -#define mark_inuse_foot(M,p,s) - -/* Macros for setting head/foot of non-mmapped chunks */ - -/* Set cinuse bit and pinuse bit of next chunk */ -#define set_inuse(M,p,s)\ - ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ - ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) - -/* Set cinuse and pinuse of this chunk and pinuse of next chunk */ -#define set_inuse_and_pinuse(M,p,s)\ - ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ - ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) - -/* Set size, cinuse and pinuse bit of this chunk */ -#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ - ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) - -#else /* FOOTERS */ - -/* Set foot of inuse chunk to be xor of mstate and seed */ -#define mark_inuse_foot(M,p,s)\ - (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic)) - -#define get_mstate_for(p)\ - ((mstate)(((mchunkptr)((char*)(p) +\ - (chunksize(p))))->prev_foot ^ mparams.magic)) - -#define set_inuse(M,p,s)\ - ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ - (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \ - mark_inuse_foot(M,p,s)) - -#define set_inuse_and_pinuse(M,p,s)\ - ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ - (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\ - mark_inuse_foot(M,p,s)) - -#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ - ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ - mark_inuse_foot(M, p, s)) - -#endif /* !FOOTERS */ - -/* ---------------------------- setting mparams -------------------------- */ - -#if LOCK_AT_FORK -static void pre_fork(void) { ACQUIRE_LOCK(&(gm)->mutex); } -static void post_fork_parent(void) { RELEASE_LOCK(&(gm)->mutex); } -static void post_fork_child(void) { INITIAL_LOCK(&(gm)->mutex); } -#endif /* LOCK_AT_FORK */ - -/* Initialize mparams */ -static int init_mparams(void) { -#ifdef NEED_GLOBAL_LOCK_INIT - if (malloc_global_mutex_status <= 0) - init_malloc_global_mutex(); -#endif - - ACQUIRE_MALLOC_GLOBAL_LOCK(); - if (mparams.magic == 0) { - size_t magic; - size_t psize; - size_t gsize; - -#ifndef WIN32 - psize = malloc_getpagesize; - gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize); -#else /* WIN32 */ - { - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - psize = system_info.dwPageSize; - gsize = ((DEFAULT_GRANULARITY != 0)? - DEFAULT_GRANULARITY : system_info.dwAllocationGranularity); - } -#endif /* WIN32 */ - - /* Sanity-check configuration: - size_t must be unsigned and as wide as pointer type. - ints must be at least 4 bytes. - alignment must be at least 8. - Alignment, min chunk size, and page size must all be powers of 2. - */ - if ((sizeof(size_t) != sizeof(char*)) || - (MAX_SIZE_T < MIN_CHUNK_SIZE) || - (sizeof(int) < 4) || - (MALLOC_ALIGNMENT < (size_t)8U) || - ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) || - ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) || - ((gsize & (gsize-SIZE_T_ONE)) != 0) || - ((psize & (psize-SIZE_T_ONE)) != 0)) - ABORT; - mparams.granularity = gsize; - mparams.page_size = psize; - mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; - mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; -#if MORECORE_CONTIGUOUS - mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; -#else /* MORECORE_CONTIGUOUS */ - mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; -#endif /* MORECORE_CONTIGUOUS */ - -#if !ONLY_MSPACES - /* Set up lock for main malloc area */ - gm->mflags = mparams.default_mflags; - (void)INITIAL_LOCK(&gm->mutex); -#endif -#if LOCK_AT_FORK - pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child); -#endif - - { -#if USE_DEV_RANDOM - int fd; - unsigned char buf[sizeof(size_t)]; - /* Try to use /dev/urandom, else fall back on using time */ - if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && - read(fd, buf, sizeof(buf)) == sizeof(buf)) { - magic = *((size_t *) buf); - close(fd); - } - else -#endif /* USE_DEV_RANDOM */ -#ifdef WIN32 - magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U); -#elif defined(LACKS_TIME_H) - magic = (size_t)&magic ^ (size_t)0x55555555U; -#else - magic = (size_t)(time(0) ^ (size_t)0x55555555U); -#endif - magic |= (size_t)8U; /* ensure nonzero */ - magic &= ~(size_t)7U; /* improve chances of fault for bad values */ - /* Until memory modes commonly available, use volatile-write */ - (*(volatile size_t *)(&(mparams.magic))) = magic; - } - } - - RELEASE_MALLOC_GLOBAL_LOCK(); - return 1; -} - -/* support for mallopt */ -static int change_mparam(int param_number, int value) { - size_t val; - ensure_initialization(); - val = (value == -1)? MAX_SIZE_T : (size_t)value; - switch(param_number) { - case M_TRIM_THRESHOLD: - mparams.trim_threshold = val; - return 1; - case M_GRANULARITY: - if (val >= mparams.page_size && ((val & (val-1)) == 0)) { - mparams.granularity = val; - return 1; - } - else - return 0; - case M_MMAP_THRESHOLD: - mparams.mmap_threshold = val; - return 1; - default: - return 0; - } -} - -#if DEBUG -/* ------------------------- Debugging Support --------------------------- */ - -/* Check properties of any chunk, whether free, inuse, mmapped etc */ -static void do_check_any_chunk(mstate m, mchunkptr p) { - assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); - assert(ok_address(m, p)); -} - -/* Check properties of top chunk */ -static void do_check_top_chunk(mstate m, mchunkptr p) { - msegmentptr sp = segment_holding(m, (char*)p); - size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */ - assert(sp != 0); - assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); - assert(ok_address(m, p)); - assert(sz == m->topsize); - assert(sz > 0); - assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE); - assert(pinuse(p)); - assert(!pinuse(chunk_plus_offset(p, sz))); -} - -/* Check properties of (inuse) mmapped chunks */ -static void do_check_mmapped_chunk(mstate m, mchunkptr p) { - size_t sz = chunksize(p); - size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD); - assert(is_mmapped(p)); - assert(use_mmap(m)); - assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); - assert(ok_address(m, p)); - assert(!is_small(sz)); - assert((len & (mparams.page_size-SIZE_T_ONE)) == 0); - assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD); - assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0); -} - -/* Check properties of inuse chunks */ -static void do_check_inuse_chunk(mstate m, mchunkptr p) { - do_check_any_chunk(m, p); - assert(is_inuse(p)); - assert(next_pinuse(p)); - /* If not pinuse and not mmapped, previous chunk has OK offset */ - assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); - if (is_mmapped(p)) - do_check_mmapped_chunk(m, p); -} - -/* Check properties of free chunks */ -static void do_check_free_chunk(mstate m, mchunkptr p) { - size_t sz = chunksize(p); - mchunkptr next = chunk_plus_offset(p, sz); - do_check_any_chunk(m, p); - assert(!is_inuse(p)); - assert(!next_pinuse(p)); - assert (!is_mmapped(p)); - if (p != m->dv && p != m->top) { - if (sz >= MIN_CHUNK_SIZE) { - assert((sz & CHUNK_ALIGN_MASK) == 0); - assert(is_aligned(chunk2mem(p))); - assert(next->prev_foot == sz); - assert(pinuse(p)); - assert (next == m->top || is_inuse(next)); - assert(p->fd->bk == p); - assert(p->bk->fd == p); - } - else /* markers are always of size SIZE_T_SIZE */ - assert(sz == SIZE_T_SIZE); - } -} - -/* Check properties of malloced chunks at the point they are malloced */ -static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { - if (mem != 0) { - mchunkptr p = mem2chunk(mem); - size_t sz = p->head & ~INUSE_BITS; - do_check_inuse_chunk(m, p); - assert((sz & CHUNK_ALIGN_MASK) == 0); - assert(sz >= MIN_CHUNK_SIZE); - assert(sz >= s); - /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */ - assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE)); - } -} - -/* Check a tree and its subtrees. */ -static void do_check_tree(mstate m, tchunkptr t) { - tchunkptr head = 0; - tchunkptr u = t; - bindex_t tindex = t->index; - size_t tsize = chunksize(t); - bindex_t idx; - compute_tree_index(tsize, idx); - assert(tindex == idx); - assert(tsize >= MIN_LARGE_SIZE); - assert(tsize >= minsize_for_tree_index(idx)); - assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1)))); - - do { /* traverse through chain of same-sized nodes */ - do_check_any_chunk(m, ((mchunkptr)u)); - assert(u->index == tindex); - assert(chunksize(u) == tsize); - assert(!is_inuse(u)); - assert(!next_pinuse(u)); - assert(u->fd->bk == u); - assert(u->bk->fd == u); - if (u->parent == 0) { - assert(u->child[0] == 0); - assert(u->child[1] == 0); - } - else { - assert(head == 0); /* only one node on chain has parent */ - head = u; - assert(u->parent != u); - assert (u->parent->child[0] == u || - u->parent->child[1] == u || - *((tbinptr*)(u->parent)) == u); - if (u->child[0] != 0) { - assert(u->child[0]->parent == u); - assert(u->child[0] != u); - do_check_tree(m, u->child[0]); - } - if (u->child[1] != 0) { - assert(u->child[1]->parent == u); - assert(u->child[1] != u); - do_check_tree(m, u->child[1]); - } - if (u->child[0] != 0 && u->child[1] != 0) { - assert(chunksize(u->child[0]) < chunksize(u->child[1])); - } - } - u = u->fd; - } while (u != t); - assert(head != 0); -} - -/* Check all the chunks in a treebin. */ -static void do_check_treebin(mstate m, bindex_t i) { - tbinptr* tb = treebin_at(m, i); - tchunkptr t = *tb; - int empty = (m->treemap & (1U << i)) == 0; - if (t == 0) - assert(empty); - if (!empty) - do_check_tree(m, t); -} - -/* Check all the chunks in a smallbin. */ -static void do_check_smallbin(mstate m, bindex_t i) { - sbinptr b = smallbin_at(m, i); - mchunkptr p = b->bk; - unsigned int empty = (m->smallmap & (1U << i)) == 0; - if (p == b) - assert(empty); - if (!empty) { - for (; p != b; p = p->bk) { - size_t size = chunksize(p); - mchunkptr q; - /* each chunk claims to be free */ - do_check_free_chunk(m, p); - /* chunk belongs in bin */ - assert(small_index(size) == i); - assert(p->bk == b || chunksize(p->bk) == chunksize(p)); - /* chunk is followed by an inuse chunk */ - q = next_chunk(p); - if (q->head != FENCEPOST_HEAD) - do_check_inuse_chunk(m, q); - } - } -} - -/* Find x in a bin. Used in other check functions. */ -static int bin_find(mstate m, mchunkptr x) { - size_t size = chunksize(x); - if (is_small(size)) { - bindex_t sidx = small_index(size); - sbinptr b = smallbin_at(m, sidx); - if (smallmap_is_marked(m, sidx)) { - mchunkptr p = b; - do { - if (p == x) - return 1; - } while ((p = p->fd) != b); - } - } - else { - bindex_t tidx; - compute_tree_index(size, tidx); - if (treemap_is_marked(m, tidx)) { - tchunkptr t = *treebin_at(m, tidx); - size_t sizebits = size << leftshift_for_tree_index(tidx); - while (t != 0 && chunksize(t) != size) { - t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; - sizebits <<= 1; - } - if (t != 0) { - tchunkptr u = t; - do { - if (u == (tchunkptr)x) - return 1; - } while ((u = u->fd) != t); - } - } - } - return 0; -} - -/* Traverse each chunk and check it; return total */ -static size_t traverse_and_check(mstate m) { - size_t sum = 0; - if (is_initialized(m)) { - msegmentptr s = &m->seg; - sum += m->topsize + TOP_FOOT_SIZE; - while (s != 0) { - mchunkptr q = align_as_chunk(s->base); - mchunkptr lastq = 0; - assert(pinuse(q)); - while (segment_holds(s, q) && - q != m->top && q->head != FENCEPOST_HEAD) { - sum += chunksize(q); - if (is_inuse(q)) { - assert(!bin_find(m, q)); - do_check_inuse_chunk(m, q); - } - else { - assert(q == m->dv || bin_find(m, q)); - assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */ - do_check_free_chunk(m, q); - } - lastq = q; - q = next_chunk(q); - } - s = s->next; - } - } - return sum; -} - - -/* Check all properties of malloc_state. */ -static void do_check_malloc_state(mstate m) { - bindex_t i; - size_t total; - /* check bins */ - for (i = 0; i < NSMALLBINS; ++i) - do_check_smallbin(m, i); - for (i = 0; i < NTREEBINS; ++i) - do_check_treebin(m, i); - - if (m->dvsize != 0) { /* check dv chunk */ - do_check_any_chunk(m, m->dv); - assert(m->dvsize == chunksize(m->dv)); - assert(m->dvsize >= MIN_CHUNK_SIZE); - assert(bin_find(m, m->dv) == 0); - } - - if (m->top != 0) { /* check top chunk */ - do_check_top_chunk(m, m->top); - /*assert(m->topsize == chunksize(m->top)); redundant */ - assert(m->topsize > 0); - assert(bin_find(m, m->top) == 0); - } - - total = traverse_and_check(m); - assert(total <= m->footprint); - assert(m->footprint <= m->max_footprint); -} -#endif /* DEBUG */ - -/* ----------------------------- statistics ------------------------------ */ - -#if !NO_MALLINFO -static struct mallinfo internal_mallinfo(mstate m) { - struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - ensure_initialization(); - if (!PREACTION(m)) { - check_malloc_state(m); - if (is_initialized(m)) { - size_t nfree = SIZE_T_ONE; /* top always free */ - size_t mfree = m->topsize + TOP_FOOT_SIZE; - size_t sum = mfree; - msegmentptr s = &m->seg; - while (s != 0) { - mchunkptr q = align_as_chunk(s->base); - while (segment_holds(s, q) && - q != m->top && q->head != FENCEPOST_HEAD) { - size_t sz = chunksize(q); - sum += sz; - if (!is_inuse(q)) { - mfree += sz; - ++nfree; - } - q = next_chunk(q); - } - s = s->next; - } - - nm.arena = sum; - nm.ordblks = nfree; - nm.hblkhd = m->footprint - sum; - nm.usmblks = m->max_footprint; - nm.uordblks = m->footprint - mfree; - nm.fordblks = mfree; - nm.keepcost = m->topsize; - } - - POSTACTION(m); - } - return nm; -} -#endif /* !NO_MALLINFO */ - -#if !NO_MALLOC_STATS -static void internal_malloc_stats(mstate m) { - ensure_initialization(); - if (!PREACTION(m)) { - size_t maxfp = 0; - size_t fp = 0; - size_t used = 0; - check_malloc_state(m); - if (is_initialized(m)) { - msegmentptr s = &m->seg; - maxfp = m->max_footprint; - fp = m->footprint; - used = fp - (m->topsize + TOP_FOOT_SIZE); - - while (s != 0) { - mchunkptr q = align_as_chunk(s->base); - while (segment_holds(s, q) && - q != m->top && q->head != FENCEPOST_HEAD) { - if (!is_inuse(q)) - used -= chunksize(q); - q = next_chunk(q); - } - s = s->next; - } - } - POSTACTION(m); /* drop lock */ - fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp)); - fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp)); - fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used)); - } -} -#endif /* NO_MALLOC_STATS */ - -/* ----------------------- Operations on smallbins ----------------------- */ - -/* - Various forms of linking and unlinking are defined as macros. Even - the ones for trees, which are very long but have very short typical - paths. This is ugly but reduces reliance on inlining support of - compilers. -*/ - -/* Link a free chunk into a smallbin */ -#define insert_small_chunk(M, P, S) {\ - bindex_t I = small_index(S);\ - mchunkptr B = smallbin_at(M, I);\ - mchunkptr F = B;\ - assert(S >= MIN_CHUNK_SIZE);\ - if (!smallmap_is_marked(M, I))\ - mark_smallmap(M, I);\ - else if (RTCHECK(ok_address(M, B->fd)))\ - F = B->fd;\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - B->fd = P;\ - F->bk = P;\ - P->fd = F;\ - P->bk = B;\ -} - -/* Unlink a chunk from a smallbin */ -#define unlink_small_chunk(M, P, S) {\ - mchunkptr F = P->fd;\ - mchunkptr B = P->bk;\ - bindex_t I = small_index(S);\ - assert(P != B);\ - assert(P != F);\ - assert(chunksize(P) == small_index2size(I));\ - if (RTCHECK(F == smallbin_at(M,I) || (ok_address(M, F) && F->bk == P))) { \ - if (B == F) {\ - clear_smallmap(M, I);\ - }\ - else if (RTCHECK(B == smallbin_at(M,I) ||\ - (ok_address(M, B) && B->fd == P))) {\ - F->bk = B;\ - B->fd = F;\ - }\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - }\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - }\ -} - -/* Unlink the first chunk from a smallbin */ -#define unlink_first_small_chunk(M, B, P, I) {\ - mchunkptr F = P->fd;\ - assert(P != B);\ - assert(P != F);\ - assert(chunksize(P) == small_index2size(I));\ - if (B == F) {\ - clear_smallmap(M, I);\ - }\ - else if (RTCHECK(ok_address(M, F) && F->bk == P)) {\ - F->bk = B;\ - B->fd = F;\ - }\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - }\ -} - -/* Replace dv node, binning the old one */ -/* Used only when dvsize known to be small */ -#define replace_dv(M, P, S) {\ - size_t DVS = M->dvsize;\ - assert(is_small(DVS));\ - if (DVS != 0) {\ - mchunkptr DV = M->dv;\ - insert_small_chunk(M, DV, DVS);\ - }\ - M->dvsize = S;\ - M->dv = P;\ -} - -/* ------------------------- Operations on trees ------------------------- */ - -/* Insert chunk into tree */ -#define insert_large_chunk(M, X, S) {\ - tbinptr* H;\ - bindex_t I;\ - compute_tree_index(S, I);\ - H = treebin_at(M, I);\ - X->index = I;\ - X->child[0] = X->child[1] = 0;\ - if (!treemap_is_marked(M, I)) {\ - mark_treemap(M, I);\ - *H = X;\ - X->parent = (tchunkptr)H;\ - X->fd = X->bk = X;\ - }\ - else {\ - tchunkptr T = *H;\ - size_t K = S << leftshift_for_tree_index(I);\ - for (;;) {\ - if (chunksize(T) != S) {\ - tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ - K <<= 1;\ - if (*C != 0)\ - T = *C;\ - else if (RTCHECK(ok_address(M, C))) {\ - *C = X;\ - X->parent = T;\ - X->fd = X->bk = X;\ - break;\ - }\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - break;\ - }\ - }\ - else {\ - tchunkptr F = T->fd;\ - if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\ - T->fd = F->bk = X;\ - X->fd = F;\ - X->bk = T;\ - X->parent = 0;\ - break;\ - }\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - break;\ - }\ - }\ - }\ - }\ -} - -/* - Unlink steps: - - 1. If x is a chained node, unlink it from its same-sized fd/bk links - and choose its bk node as its replacement. - 2. If x was the last node of its size, but not a leaf node, it must - be replaced with a leaf node (not merely one with an open left or - right), to make sure that lefts and rights of descendents - correspond properly to bit masks. We use the rightmost descendent - of x. We could use any other leaf, but this is easy to locate and - tends to counteract removal of leftmosts elsewhere, and so keeps - paths shorter than minimally guaranteed. This doesn't loop much - because on average a node in a tree is near the bottom. - 3. If x is the base of a chain (i.e., has parent links) relink - x's parent and children to x's replacement (or null if none). -*/ - -#define unlink_large_chunk(M, X) {\ - tchunkptr XP = X->parent;\ - tchunkptr R;\ - if (X->bk != X) {\ - tchunkptr F = X->fd;\ - R = X->bk;\ - if (RTCHECK(ok_address(M, F) && F->bk == X && R->fd == X)) {\ - F->bk = R;\ - R->fd = F;\ - }\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - }\ - else {\ - tchunkptr* RP;\ - if (((R = *(RP = &(X->child[1]))) != 0) ||\ - ((R = *(RP = &(X->child[0]))) != 0)) {\ - tchunkptr* CP;\ - while ((*(CP = &(R->child[1])) != 0) ||\ - (*(CP = &(R->child[0])) != 0)) {\ - R = *(RP = CP);\ - }\ - if (RTCHECK(ok_address(M, RP)))\ - *RP = 0;\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - }\ - }\ - if (XP != 0) {\ - tbinptr* H = treebin_at(M, X->index);\ - if (X == *H) {\ - if ((*H = R) == 0) \ - clear_treemap(M, X->index);\ - }\ - else if (RTCHECK(ok_address(M, XP))) {\ - if (XP->child[0] == X) \ - XP->child[0] = R;\ - else \ - XP->child[1] = R;\ - }\ - else\ - CORRUPTION_ERROR_ACTION(M);\ - if (R != 0) {\ - if (RTCHECK(ok_address(M, R))) {\ - tchunkptr C0, C1;\ - R->parent = XP;\ - if ((C0 = X->child[0]) != 0) {\ - if (RTCHECK(ok_address(M, C0))) {\ - R->child[0] = C0;\ - C0->parent = R;\ - }\ - else\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - if ((C1 = X->child[1]) != 0) {\ - if (RTCHECK(ok_address(M, C1))) {\ - R->child[1] = C1;\ - C1->parent = R;\ - }\ - else\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - }\ - else\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - }\ -} - -/* Relays to large vs small bin operations */ - -#define insert_chunk(M, P, S)\ - if (is_small(S)) insert_small_chunk(M, P, S)\ - else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } - -#define unlink_chunk(M, P, S)\ - if (is_small(S)) unlink_small_chunk(M, P, S)\ - else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } - - -/* Relays to internal calls to malloc/free from realloc, memalign etc */ - -#if ONLY_MSPACES -#define internal_malloc(m, b) mspace_malloc(m, b) -#define internal_free(m, mem) mspace_free(m,mem); -#else /* ONLY_MSPACES */ -#if MSPACES -#define internal_malloc(m, b)\ - ((m == gm)? dlmalloc(b) : mspace_malloc(m, b)) -#define internal_free(m, mem)\ - if (m == gm) dlfree(mem); else mspace_free(m,mem); -#else /* MSPACES */ -#define internal_malloc(m, b) dlmalloc(b) -#define internal_free(m, mem) dlfree(mem) -#endif /* MSPACES */ -#endif /* ONLY_MSPACES */ - -/* ----------------------- Direct-mmapping chunks ----------------------- */ - -/* - Directly mmapped chunks are set up with an offset to the start of - the mmapped region stored in the prev_foot field of the chunk. This - allows reconstruction of the required argument to MUNMAP when freed, - and also allows adjustment of the returned chunk to meet alignment - requirements (especially in memalign). -*/ - -/* Malloc using mmap */ -static void* mmap_alloc(mstate m, size_t nb) { - size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); - if (m->footprint_limit != 0) { - size_t fp = m->footprint + mmsize; - if (fp <= m->footprint || fp > m->footprint_limit) - return 0; - } - if (mmsize > nb) { /* Check for wrap around 0 */ - char* mm = (char*)(CALL_DIRECT_MMAP(mmsize)); - if (mm != CMFAIL) { - size_t offset = align_offset(chunk2mem(mm)); - size_t psize = mmsize - offset - MMAP_FOOT_PAD; - mchunkptr p = (mchunkptr)(mm + offset); - p->prev_foot = offset; - p->head = psize; - mark_inuse_foot(m, p, psize); - chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; - chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; - - if (m->least_addr == 0 || mm < m->least_addr) - m->least_addr = mm; - if ((m->footprint += mmsize) > m->max_footprint) - m->max_footprint = m->footprint; - assert(is_aligned(chunk2mem(p))); - check_mmapped_chunk(m, p); - return chunk2mem(p); - } - } - return 0; -} - -/* Realloc using mmap */ -static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb, int flags) { - size_t oldsize = chunksize(oldp); - (void)flags; /* placate people compiling -Wunused */ - if (is_small(nb)) /* Can't shrink mmap regions below small size */ - return 0; - /* Keep old chunk if big enough but not too big */ - if (oldsize >= nb + SIZE_T_SIZE && - (oldsize - nb) <= (mparams.granularity << 1)) - return oldp; - else { - size_t offset = oldp->prev_foot; - size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; - size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); - char* cp = (char*)CALL_MREMAP((char*)oldp - offset, - oldmmsize, newmmsize, flags); - if (cp != CMFAIL) { - mchunkptr newp = (mchunkptr)(cp + offset); - size_t psize = newmmsize - offset - MMAP_FOOT_PAD; - newp->head = psize; - mark_inuse_foot(m, newp, psize); - chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; - chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; - - if (cp < m->least_addr) - m->least_addr = cp; - if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint) - m->max_footprint = m->footprint; - check_mmapped_chunk(m, newp); - return newp; - } - } - return 0; -} - - -/* -------------------------- mspace management -------------------------- */ - -/* Initialize top chunk and its size */ -static void init_top(mstate m, mchunkptr p, size_t psize) { - /* Ensure alignment */ - size_t offset = align_offset(chunk2mem(p)); - p = (mchunkptr)((char*)p + offset); - psize -= offset; - - m->top = p; - m->topsize = psize; - p->head = psize | PINUSE_BIT; - /* set size of fake trailing chunk holding overhead space only once */ - chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; - m->trim_check = mparams.trim_threshold; /* reset on each update */ -} - -/* Initialize bins for a new mstate that is otherwise zeroed out */ -static void init_bins(mstate m) { - /* Establish circular links for smallbins */ - bindex_t i; - for (i = 0; i < NSMALLBINS; ++i) { - sbinptr bin = smallbin_at(m,i); - bin->fd = bin->bk = bin; - } -} - -#if PROCEED_ON_ERROR - -/* default corruption action */ -static void reset_on_error(mstate m) { - int i; - ++malloc_corruption_error_count; - /* Reinitialize fields to forget about all memory */ - m->smallmap = m->treemap = 0; - m->dvsize = m->topsize = 0; - m->seg.base = 0; - m->seg.size = 0; - m->seg.next = 0; - m->top = m->dv = 0; - for (i = 0; i < NTREEBINS; ++i) - *treebin_at(m, i) = 0; - init_bins(m); -} -#endif /* PROCEED_ON_ERROR */ - -/* Allocate chunk and prepend remainder with chunk in successor base. */ -static void* prepend_alloc(mstate m, char* newbase, char* oldbase, - size_t nb) { - mchunkptr p = align_as_chunk(newbase); - mchunkptr oldfirst = align_as_chunk(oldbase); - size_t psize = (char*)oldfirst - (char*)p; - mchunkptr q = chunk_plus_offset(p, nb); - size_t qsize = psize - nb; - set_size_and_pinuse_of_inuse_chunk(m, p, nb); - - assert((char*)oldfirst > (char*)q); - assert(pinuse(oldfirst)); - assert(qsize >= MIN_CHUNK_SIZE); - - /* consolidate remainder with first chunk of old base */ - if (oldfirst == m->top) { - size_t tsize = m->topsize += qsize; - m->top = q; - q->head = tsize | PINUSE_BIT; - check_top_chunk(m, q); - } - else if (oldfirst == m->dv) { - size_t dsize = m->dvsize += qsize; - m->dv = q; - set_size_and_pinuse_of_free_chunk(q, dsize); - } - else { - if (!is_inuse(oldfirst)) { - size_t nsize = chunksize(oldfirst); - unlink_chunk(m, oldfirst, nsize); - oldfirst = chunk_plus_offset(oldfirst, nsize); - qsize += nsize; - } - set_free_with_pinuse(q, qsize, oldfirst); - insert_chunk(m, q, qsize); - check_free_chunk(m, q); - } - - check_malloced_chunk(m, chunk2mem(p), nb); - return chunk2mem(p); -} - -/* Add a segment to hold a new noncontiguous region */ -static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) { - /* Determine locations and sizes of segment, fenceposts, old top */ - char* old_top = (char*)m->top; - msegmentptr oldsp = segment_holding(m, old_top); - char* old_end = oldsp->base + oldsp->size; - size_t ssize = pad_request(sizeof(struct malloc_segment)); - char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); - size_t offset = align_offset(chunk2mem(rawsp)); - char* asp = rawsp + offset; - char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; - mchunkptr sp = (mchunkptr)csp; - msegmentptr ss = (msegmentptr)(chunk2mem(sp)); - mchunkptr tnext = chunk_plus_offset(sp, ssize); - mchunkptr p = tnext; - int nfences = 0; - - /* reset top to new space */ - init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); - - /* Set up segment record */ - assert(is_aligned(ss)); - set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); - *ss = m->seg; /* Push current record */ - m->seg.base = tbase; - m->seg.size = tsize; - m->seg.sflags = mmapped; - m->seg.next = ss; - - /* Insert trailing fenceposts */ - for (;;) { - mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); - p->head = FENCEPOST_HEAD; - ++nfences; - if ((char*)(&(nextp->head)) < old_end) - p = nextp; - else - break; - } - assert(nfences >= 2); - - /* Insert the rest of old top into a bin as an ordinary free chunk */ - if (csp != old_top) { - mchunkptr q = (mchunkptr)old_top; - size_t psize = csp - old_top; - mchunkptr tn = chunk_plus_offset(q, psize); - set_free_with_pinuse(q, psize, tn); - insert_chunk(m, q, psize); - } - - check_top_chunk(m, m->top); -} - -/* -------------------------- System allocation -------------------------- */ - -/* Get memory from system using MORECORE or MMAP */ -static void* sys_alloc(mstate m, size_t nb) { - char* tbase = CMFAIL; - size_t tsize = 0; - flag_t mmap_flag = 0; - size_t asize; /* allocation size */ - - ensure_initialization(); - - /* Directly map large chunks, but only if already initialized */ - if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) { - void* mem = mmap_alloc(m, nb); - if (mem != 0) - return mem; - } - - asize = granularity_align(nb + SYS_ALLOC_PADDING); -#ifdef __wasilibc_unmodified_upstream // Bug fix: set ENOMEM on size overflow - if (asize <= nb) - return 0; /* wraparound */ -#else - if (asize <= nb) { - MALLOC_FAILURE_ACTION; - return 0; /* wraparound */ - } -#endif - if (m->footprint_limit != 0) { - size_t fp = m->footprint + asize; -#ifdef __wasilibc_unmodified_upstream // Bug fix: set ENOMEM on footprint overrun - if (fp <= m->footprint || fp > m->footprint_limit) - return 0; -#else - if (fp <= m->footprint || fp > m->footprint_limit) { - MALLOC_FAILURE_ACTION; - return 0; - } -#endif - } - - /* - Try getting memory in any of three ways (in most-preferred to - least-preferred order): - 1. A call to MORECORE that can normally contiguously extend memory. - (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or - or main space is mmapped or a previous contiguous call failed) - 2. A call to MMAP new space (disabled if not HAVE_MMAP). - Note that under the default settings, if MORECORE is unable to - fulfill a request, and HAVE_MMAP is true, then mmap is - used as a noncontiguous system allocator. This is a useful backup - strategy for systems with holes in address spaces -- in this case - sbrk cannot contiguously expand the heap, but mmap may be able to - find space. - 3. A call to MORECORE that cannot usually contiguously extend memory. - (disabled if not HAVE_MORECORE) - - In all cases, we need to request enough bytes from system to ensure - we can malloc nb bytes upon success, so pad with enough space for - top_foot, plus alignment-pad to make sure we don't lose bytes if - not on boundary, and round this up to a granularity unit. - */ - - if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) { - char* br = CMFAIL; - size_t ssize = asize; /* sbrk call size */ - msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top); - ACQUIRE_MALLOC_GLOBAL_LOCK(); - - if (ss == 0) { /* First time through or recovery */ - char* base = (char*)CALL_MORECORE(0); - if (base != CMFAIL) { - size_t fp; - /* Adjust to end on a page boundary */ - if (!is_page_aligned(base)) - ssize += (page_align((size_t)base) - (size_t)base); - fp = m->footprint + ssize; /* recheck limits */ - if (ssize > nb && ssize < HALF_MAX_SIZE_T && - (m->footprint_limit == 0 || - (fp > m->footprint && fp <= m->footprint_limit)) && - (br = (char*)(CALL_MORECORE(ssize))) == base) { - tbase = base; - tsize = ssize; - } - } - } - else { - /* Subtract out existing available top space from MORECORE request. */ - ssize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING); - /* Use mem here only if it did continuously extend old space */ - if (ssize < HALF_MAX_SIZE_T && - (br = (char*)(CALL_MORECORE(ssize))) == ss->base+ss->size) { - tbase = br; - tsize = ssize; - } - } - - if (tbase == CMFAIL) { /* Cope with partial failure */ - if (br != CMFAIL) { /* Try to use/extend the space we did get */ - if (ssize < HALF_MAX_SIZE_T && - ssize < nb + SYS_ALLOC_PADDING) { - size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - ssize); - if (esize < HALF_MAX_SIZE_T) { - char* end = (char*)CALL_MORECORE(esize); - if (end != CMFAIL) - ssize += esize; - else { /* Can't use; try to release */ - (void) CALL_MORECORE(-ssize); - br = CMFAIL; - } - } - } - } - if (br != CMFAIL) { /* Use the space we did get */ - tbase = br; - tsize = ssize; - } - else - disable_contiguous(m); /* Don't try contiguous path in the future */ - } - - RELEASE_MALLOC_GLOBAL_LOCK(); - } - - if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */ - char* mp = (char*)(CALL_MMAP(asize)); - if (mp != CMFAIL) { - tbase = mp; - tsize = asize; - mmap_flag = USE_MMAP_BIT; - } - } - - if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */ - if (asize < HALF_MAX_SIZE_T) { - char* br = CMFAIL; - char* end = CMFAIL; - ACQUIRE_MALLOC_GLOBAL_LOCK(); - br = (char*)(CALL_MORECORE(asize)); - end = (char*)(CALL_MORECORE(0)); - RELEASE_MALLOC_GLOBAL_LOCK(); - if (br != CMFAIL && end != CMFAIL && br < end) { - size_t ssize = end - br; - if (ssize > nb + TOP_FOOT_SIZE) { - tbase = br; - tsize = ssize; - } - } - } - } - - if (tbase != CMFAIL) { - - if ((m->footprint += tsize) > m->max_footprint) - m->max_footprint = m->footprint; - - if (!is_initialized(m)) { /* first-time initialization */ - if (m->least_addr == 0 || tbase < m->least_addr) - m->least_addr = tbase; - m->seg.base = tbase; - m->seg.size = tsize; - m->seg.sflags = mmap_flag; - m->magic = mparams.magic; - m->release_checks = MAX_RELEASE_CHECK_RATE; - init_bins(m); -#if !ONLY_MSPACES - if (is_global(m)) - init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); - else -#endif - { - /* Offset top by embedded malloc_state */ - mchunkptr mn = next_chunk(mem2chunk(m)); - init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); - } - } - - else { - /* Try to merge with an existing segment */ - msegmentptr sp = &m->seg; - /* Only consider most recent segment if traversal suppressed */ - while (sp != 0 && tbase != sp->base + sp->size) - sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; - if (sp != 0 && - !is_extern_segment(sp) && - (sp->sflags & USE_MMAP_BIT) == mmap_flag && - segment_holds(sp, m->top)) { /* append */ - sp->size += tsize; - init_top(m, m->top, m->topsize + tsize); - } - else { - if (tbase < m->least_addr) - m->least_addr = tbase; - sp = &m->seg; - while (sp != 0 && sp->base != tbase + tsize) - sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; - if (sp != 0 && - !is_extern_segment(sp) && - (sp->sflags & USE_MMAP_BIT) == mmap_flag) { - char* oldbase = sp->base; - sp->base = tbase; - sp->size += tsize; - return prepend_alloc(m, tbase, oldbase, nb); - } - else - add_segment(m, tbase, tsize, mmap_flag); - } - } - - if (nb < m->topsize) { /* Allocate from new or extended top space */ - size_t rsize = m->topsize -= nb; - mchunkptr p = m->top; - mchunkptr r = m->top = chunk_plus_offset(p, nb); - r->head = rsize | PINUSE_BIT; - set_size_and_pinuse_of_inuse_chunk(m, p, nb); - check_top_chunk(m, m->top); - check_malloced_chunk(m, chunk2mem(p), nb); - return chunk2mem(p); - } - } - - MALLOC_FAILURE_ACTION; - return 0; -} - -/* ----------------------- system deallocation -------------------------- */ - -/* Unmap and unlink any mmapped segments that don't contain used chunks */ -static size_t release_unused_segments(mstate m) { - size_t released = 0; - int nsegs = 0; - msegmentptr pred = &m->seg; - msegmentptr sp = pred->next; - while (sp != 0) { - char* base = sp->base; - size_t size = sp->size; - msegmentptr next = sp->next; - ++nsegs; - if (is_mmapped_segment(sp) && !is_extern_segment(sp)) { - mchunkptr p = align_as_chunk(base); - size_t psize = chunksize(p); - /* Can unmap if first chunk holds entire segment and not pinned */ - if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { - tchunkptr tp = (tchunkptr)p; - assert(segment_holds(sp, (char*)sp)); - if (p == m->dv) { - m->dv = 0; - m->dvsize = 0; - } - else { - unlink_large_chunk(m, tp); - } - if (CALL_MUNMAP(base, size) == 0) { - released += size; - m->footprint -= size; - /* unlink obsoleted record */ - sp = pred; - sp->next = next; - } - else { /* back out if cannot unmap */ - insert_large_chunk(m, tp, psize); - } - } - } - if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */ - break; - pred = sp; - sp = next; - } - /* Reset check counter */ - m->release_checks = (((size_t) nsegs > (size_t) MAX_RELEASE_CHECK_RATE)? - (size_t) nsegs : (size_t) MAX_RELEASE_CHECK_RATE); - return released; -} - -static int sys_trim(mstate m, size_t pad) { - size_t released = 0; - ensure_initialization(); - if (pad < MAX_REQUEST && is_initialized(m)) { - pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ - - if (m->topsize > pad) { - /* Shrink top space in granularity-size units, keeping at least one */ - size_t unit = mparams.granularity; - size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - - SIZE_T_ONE) * unit; - msegmentptr sp = segment_holding(m, (char*)m->top); - - if (!is_extern_segment(sp)) { - if (is_mmapped_segment(sp)) { - if (HAVE_MMAP && - sp->size >= extra && - !has_segment_link(m, sp)) { /* can't shrink if pinned */ - size_t newsize = sp->size - extra; - (void)newsize; /* placate people compiling -Wunused-variable */ - /* Prefer mremap, fall back to munmap */ - if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) || - (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { - released = extra; - } - } - } - else if (HAVE_MORECORE) { - if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */ - extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit; - ACQUIRE_MALLOC_GLOBAL_LOCK(); - { - /* Make sure end of memory is where we last set it. */ - char* old_br = (char*)(CALL_MORECORE(0)); - if (old_br == sp->base + sp->size) { - char* rel_br = (char*)(CALL_MORECORE(-extra)); - char* new_br = (char*)(CALL_MORECORE(0)); - if (rel_br != CMFAIL && new_br < old_br) - released = old_br - new_br; - } - } - RELEASE_MALLOC_GLOBAL_LOCK(); - } - } - - if (released != 0) { - sp->size -= released; - m->footprint -= released; - init_top(m, m->top, m->topsize - released); - check_top_chunk(m, m->top); - } - } - - /* Unmap any unused mmapped segments */ - if (HAVE_MMAP) - released += release_unused_segments(m); - - /* On failure, disable autotrim to avoid repeated failed future calls */ - if (released == 0 && m->topsize > m->trim_check) - m->trim_check = MAX_SIZE_T; - } - - return (released != 0)? 1 : 0; -} - -/* Consolidate and bin a chunk. Differs from exported versions - of free mainly in that the chunk need not be marked as inuse. -*/ -static void dispose_chunk(mstate m, mchunkptr p, size_t psize) { - mchunkptr next = chunk_plus_offset(p, psize); - if (!pinuse(p)) { - mchunkptr prev; - size_t prevsize = p->prev_foot; - if (is_mmapped(p)) { - psize += prevsize + MMAP_FOOT_PAD; - if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) - m->footprint -= psize; - return; - } - prev = chunk_minus_offset(p, prevsize); - psize += prevsize; - p = prev; - if (RTCHECK(ok_address(m, prev))) { /* consolidate backward */ - if (p != m->dv) { - unlink_chunk(m, p, prevsize); - } - else if ((next->head & INUSE_BITS) == INUSE_BITS) { - m->dvsize = psize; - set_free_with_pinuse(p, psize, next); - return; - } - } - else { - CORRUPTION_ERROR_ACTION(m); - return; - } - } - if (RTCHECK(ok_address(m, next))) { - if (!cinuse(next)) { /* consolidate forward */ - if (next == m->top) { - size_t tsize = m->topsize += psize; - m->top = p; - p->head = tsize | PINUSE_BIT; - if (p == m->dv) { - m->dv = 0; - m->dvsize = 0; - } - return; - } - else if (next == m->dv) { - size_t dsize = m->dvsize += psize; - m->dv = p; - set_size_and_pinuse_of_free_chunk(p, dsize); - return; - } - else { - size_t nsize = chunksize(next); - psize += nsize; - unlink_chunk(m, next, nsize); - set_size_and_pinuse_of_free_chunk(p, psize); - if (p == m->dv) { - m->dvsize = psize; - return; - } - } - } - else { - set_free_with_pinuse(p, psize, next); - } - insert_chunk(m, p, psize); - } - else { - CORRUPTION_ERROR_ACTION(m); - } -} - -/* ---------------------------- malloc --------------------------- */ - -/* allocate a large request from the best fitting chunk in a treebin */ -static void* tmalloc_large(mstate m, size_t nb) { - tchunkptr v = 0; - size_t rsize = -nb; /* Unsigned negation */ - tchunkptr t; - bindex_t idx; - compute_tree_index(nb, idx); - if ((t = *treebin_at(m, idx)) != 0) { - /* Traverse tree for this bin looking for node with size == nb */ - size_t sizebits = nb << leftshift_for_tree_index(idx); - tchunkptr rst = 0; /* The deepest untaken right subtree */ - for (;;) { - tchunkptr rt; - size_t trem = chunksize(t) - nb; - if (trem < rsize) { - v = t; - if ((rsize = trem) == 0) - break; - } - rt = t->child[1]; - t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; - if (rt != 0 && rt != t) - rst = rt; - if (t == 0) { - t = rst; /* set t to least subtree holding sizes > nb */ - break; - } - sizebits <<= 1; - } - } - if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ - binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; - if (leftbits != 0) { - bindex_t i; - binmap_t leastbit = least_bit(leftbits); - compute_bit2idx(leastbit, i); - t = *treebin_at(m, i); - } - } - - while (t != 0) { /* find smallest of tree or subtree */ - size_t trem = chunksize(t) - nb; - if (trem < rsize) { - rsize = trem; - v = t; - } - t = leftmost_child(t); - } - - /* If dv is a better fit, return 0 so malloc will use it */ - if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { - if (RTCHECK(ok_address(m, v))) { /* split */ - mchunkptr r = chunk_plus_offset(v, nb); - assert(chunksize(v) == rsize + nb); - if (RTCHECK(ok_next(v, r))) { - unlink_large_chunk(m, v); - if (rsize < MIN_CHUNK_SIZE) - set_inuse_and_pinuse(m, v, (rsize + nb)); - else { - set_size_and_pinuse_of_inuse_chunk(m, v, nb); - set_size_and_pinuse_of_free_chunk(r, rsize); - insert_chunk(m, r, rsize); - } - return chunk2mem(v); - } - } - CORRUPTION_ERROR_ACTION(m); - } - return 0; -} - -/* allocate a small request from the best fitting chunk in a treebin */ -static void* tmalloc_small(mstate m, size_t nb) { - tchunkptr t, v; - size_t rsize; - bindex_t i; - binmap_t leastbit = least_bit(m->treemap); - compute_bit2idx(leastbit, i); - v = t = *treebin_at(m, i); - rsize = chunksize(t) - nb; - - while ((t = leftmost_child(t)) != 0) { - size_t trem = chunksize(t) - nb; - if (trem < rsize) { - rsize = trem; - v = t; - } - } - - if (RTCHECK(ok_address(m, v))) { - mchunkptr r = chunk_plus_offset(v, nb); - assert(chunksize(v) == rsize + nb); - if (RTCHECK(ok_next(v, r))) { - unlink_large_chunk(m, v); - if (rsize < MIN_CHUNK_SIZE) - set_inuse_and_pinuse(m, v, (rsize + nb)); - else { - set_size_and_pinuse_of_inuse_chunk(m, v, nb); - set_size_and_pinuse_of_free_chunk(r, rsize); - replace_dv(m, r, rsize); - } - return chunk2mem(v); - } - } - - CORRUPTION_ERROR_ACTION(m); - return 0; -} - -#if !ONLY_MSPACES - -#if __wasilibc_unmodified_upstream // Forward declaration of try_init_allocator. -#else -static void try_init_allocator(void); -#endif - -void* dlmalloc(size_t bytes) { - /* - Basic algorithm: - If a small request (< 256 bytes minus per-chunk overhead): - 1. If one exists, use a remainderless chunk in associated smallbin. - (Remainderless means that there are too few excess bytes to - represent as a chunk.) - 2. If it is big enough, use the dv chunk, which is normally the - chunk adjacent to the one used for the most recent small request. - 3. If one exists, split the smallest available chunk in a bin, - saving remainder in dv. - 4. If it is big enough, use the top chunk. - 5. If available, get memory from system and use it - Otherwise, for a large request: - 1. Find the smallest available binned chunk that fits, and use it - if it is better fitting than dv chunk, splitting if necessary. - 2. If better fitting than any binned chunk, use the dv chunk. - 3. If it is big enough, use the top chunk. - 4. If request size >= mmap threshold, try to directly mmap this chunk. - 5. If available, get memory from system and use it - - The ugly goto's here ensure that postaction occurs along all paths. - */ - -#if USE_LOCKS - ensure_initialization(); /* initialize in sys_alloc if not using locks */ -#endif - -#if __wasilibc_unmodified_upstream // Try to initialize the allocator. -#else - if (!is_initialized(gm)) { - try_init_allocator(); - } -#endif - - if (!PREACTION(gm)) { - void* mem; - size_t nb; - if (bytes <= MAX_SMALL_REQUEST) { - bindex_t idx; - binmap_t smallbits; - nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); - idx = small_index(nb); - smallbits = gm->smallmap >> idx; - - if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ - mchunkptr b, p; - idx += ~smallbits & 1; /* Uses next bin if idx empty */ - b = smallbin_at(gm, idx); - p = b->fd; - assert(chunksize(p) == small_index2size(idx)); - unlink_first_small_chunk(gm, b, p, idx); - set_inuse_and_pinuse(gm, p, small_index2size(idx)); - mem = chunk2mem(p); - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - - else if (nb > gm->dvsize) { - if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ - mchunkptr b, p, r; - size_t rsize; - bindex_t i; - binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); - binmap_t leastbit = least_bit(leftbits); - compute_bit2idx(leastbit, i); - b = smallbin_at(gm, i); - p = b->fd; - assert(chunksize(p) == small_index2size(i)); - unlink_first_small_chunk(gm, b, p, i); - rsize = small_index2size(i) - nb; - /* Fit here cannot be remainderless if 4byte sizes */ - if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) - set_inuse_and_pinuse(gm, p, small_index2size(i)); - else { - set_size_and_pinuse_of_inuse_chunk(gm, p, nb); - r = chunk_plus_offset(p, nb); - set_size_and_pinuse_of_free_chunk(r, rsize); - replace_dv(gm, r, rsize); - } - mem = chunk2mem(p); - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - - else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) { - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - } - } - else if (bytes >= MAX_REQUEST) - nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ - else { - nb = pad_request(bytes); - if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) { - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - } - - if (nb <= gm->dvsize) { - size_t rsize = gm->dvsize - nb; - mchunkptr p = gm->dv; - if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ - mchunkptr r = gm->dv = chunk_plus_offset(p, nb); - gm->dvsize = rsize; - set_size_and_pinuse_of_free_chunk(r, rsize); - set_size_and_pinuse_of_inuse_chunk(gm, p, nb); - } - else { /* exhaust dv */ - size_t dvs = gm->dvsize; - gm->dvsize = 0; - gm->dv = 0; - set_inuse_and_pinuse(gm, p, dvs); - } - mem = chunk2mem(p); - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - - else if (nb < gm->topsize) { /* Split top */ - size_t rsize = gm->topsize -= nb; - mchunkptr p = gm->top; - mchunkptr r = gm->top = chunk_plus_offset(p, nb); - r->head = rsize | PINUSE_BIT; - set_size_and_pinuse_of_inuse_chunk(gm, p, nb); - mem = chunk2mem(p); - check_top_chunk(gm, gm->top); - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - - mem = sys_alloc(gm, nb); - - postaction: - POSTACTION(gm); - return mem; - } - - return 0; -} - -/* ---------------------------- free --------------------------- */ - -void dlfree(void* mem) { - /* - Consolidate freed chunks with preceeding or succeeding bordering - free chunks, if they exist, and then place in a bin. Intermixed - with special cases for top, dv, mmapped chunks, and usage errors. - */ - - if (mem != 0) { - mchunkptr p = mem2chunk(mem); -#if FOOTERS - mstate fm = get_mstate_for(p); - if (!ok_magic(fm)) { - USAGE_ERROR_ACTION(fm, p); - return; - } -#else /* FOOTERS */ -#define fm gm -#endif /* FOOTERS */ - if (!PREACTION(fm)) { - check_inuse_chunk(fm, p); - if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { - size_t psize = chunksize(p); - mchunkptr next = chunk_plus_offset(p, psize); - if (!pinuse(p)) { - size_t prevsize = p->prev_foot; - if (is_mmapped(p)) { - psize += prevsize + MMAP_FOOT_PAD; - if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) - fm->footprint -= psize; - goto postaction; - } - else { - mchunkptr prev = chunk_minus_offset(p, prevsize); - psize += prevsize; - p = prev; - if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ - if (p != fm->dv) { - unlink_chunk(fm, p, prevsize); - } - else if ((next->head & INUSE_BITS) == INUSE_BITS) { - fm->dvsize = psize; - set_free_with_pinuse(p, psize, next); - goto postaction; - } - } - else - goto erroraction; - } - } - - if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { - if (!cinuse(next)) { /* consolidate forward */ - if (next == fm->top) { - size_t tsize = fm->topsize += psize; - fm->top = p; - p->head = tsize | PINUSE_BIT; - if (p == fm->dv) { - fm->dv = 0; - fm->dvsize = 0; - } - if (should_trim(fm, tsize)) - sys_trim(fm, 0); - goto postaction; - } - else if (next == fm->dv) { - size_t dsize = fm->dvsize += psize; - fm->dv = p; - set_size_and_pinuse_of_free_chunk(p, dsize); - goto postaction; - } - else { - size_t nsize = chunksize(next); - psize += nsize; - unlink_chunk(fm, next, nsize); - set_size_and_pinuse_of_free_chunk(p, psize); - if (p == fm->dv) { - fm->dvsize = psize; - goto postaction; - } - } - } - else - set_free_with_pinuse(p, psize, next); - - if (is_small(psize)) { - insert_small_chunk(fm, p, psize); - check_free_chunk(fm, p); - } - else { - tchunkptr tp = (tchunkptr)p; - insert_large_chunk(fm, tp, psize); - check_free_chunk(fm, p); - if (--fm->release_checks == 0) - release_unused_segments(fm); - } - goto postaction; - } - } - erroraction: - USAGE_ERROR_ACTION(fm, p); - postaction: - POSTACTION(fm); - } - } -#if !FOOTERS -#undef fm -#endif /* FOOTERS */ -} - -void* dlcalloc(size_t n_elements, size_t elem_size) { - void* mem; - size_t req = 0; - if (n_elements != 0) { - req = n_elements * elem_size; - if (((n_elements | elem_size) & ~(size_t)0xffff) && - (req / n_elements != elem_size)) - req = MAX_SIZE_T; /* force downstream failure on overflow */ - } - mem = dlmalloc(req); - if (mem != 0 && calloc_must_clear(mem2chunk(mem))) - memset(mem, 0, req); - return mem; -} - -#endif /* !ONLY_MSPACES */ - -/* ------------ Internal support for realloc, memalign, etc -------------- */ - -/* Try to realloc; only in-place unless can_move true */ -static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb, - int can_move) { - mchunkptr newp = 0; - size_t oldsize = chunksize(p); - mchunkptr next = chunk_plus_offset(p, oldsize); - if (RTCHECK(ok_address(m, p) && ok_inuse(p) && - ok_next(p, next) && ok_pinuse(next))) { - if (is_mmapped(p)) { - newp = mmap_resize(m, p, nb, can_move); - } - else if (oldsize >= nb) { /* already big enough */ - size_t rsize = oldsize - nb; - if (rsize >= MIN_CHUNK_SIZE) { /* split off remainder */ - mchunkptr r = chunk_plus_offset(p, nb); - set_inuse(m, p, nb); - set_inuse(m, r, rsize); - dispose_chunk(m, r, rsize); - } - newp = p; - } - else if (next == m->top) { /* extend into top */ - if (oldsize + m->topsize > nb) { - size_t newsize = oldsize + m->topsize; - size_t newtopsize = newsize - nb; - mchunkptr newtop = chunk_plus_offset(p, nb); - set_inuse(m, p, nb); - newtop->head = newtopsize |PINUSE_BIT; - m->top = newtop; - m->topsize = newtopsize; - newp = p; - } - } - else if (next == m->dv) { /* extend into dv */ - size_t dvs = m->dvsize; - if (oldsize + dvs >= nb) { - size_t dsize = oldsize + dvs - nb; - if (dsize >= MIN_CHUNK_SIZE) { - mchunkptr r = chunk_plus_offset(p, nb); - mchunkptr n = chunk_plus_offset(r, dsize); - set_inuse(m, p, nb); - set_size_and_pinuse_of_free_chunk(r, dsize); - clear_pinuse(n); - m->dvsize = dsize; - m->dv = r; - } - else { /* exhaust dv */ - size_t newsize = oldsize + dvs; - set_inuse(m, p, newsize); - m->dvsize = 0; - m->dv = 0; - } - newp = p; - } - } - else if (!cinuse(next)) { /* extend into next free chunk */ - size_t nextsize = chunksize(next); - if (oldsize + nextsize >= nb) { - size_t rsize = oldsize + nextsize - nb; - unlink_chunk(m, next, nextsize); - if (rsize < MIN_CHUNK_SIZE) { - size_t newsize = oldsize + nextsize; - set_inuse(m, p, newsize); - } - else { - mchunkptr r = chunk_plus_offset(p, nb); - set_inuse(m, p, nb); - set_inuse(m, r, rsize); - dispose_chunk(m, r, rsize); - } - newp = p; - } - } - } - else { - USAGE_ERROR_ACTION(m, chunk2mem(p)); - } - return newp; -} - -static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { - void* mem = 0; - if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ - alignment = MIN_CHUNK_SIZE; - if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ - size_t a = MALLOC_ALIGNMENT << 1; - while (a < alignment) a <<= 1; - alignment = a; - } - if (bytes >= MAX_REQUEST - alignment) { - if (m != 0) { /* Test isn't needed but avoids compiler warning */ - MALLOC_FAILURE_ACTION; - } - } - else { - size_t nb = request2size(bytes); - size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; - mem = internal_malloc(m, req); - if (mem != 0) { - mchunkptr p = mem2chunk(mem); - if (PREACTION(m)) - return 0; - if ((((size_t)(mem)) & (alignment - 1)) != 0) { /* misaligned */ - /* - Find an aligned spot inside chunk. Since we need to give - back leading space in a chunk of at least MIN_CHUNK_SIZE, if - the first calculation places us at a spot with less than - MIN_CHUNK_SIZE leader, we can move to the next aligned spot. - We've allocated enough total room so that this is always - possible. - */ - char* br = (char*)mem2chunk((size_t)(((size_t)((char*)mem + alignment - - SIZE_T_ONE)) & - -alignment)); - char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)? - br : br+alignment; - mchunkptr newp = (mchunkptr)pos; - size_t leadsize = pos - (char*)(p); - size_t newsize = chunksize(p) - leadsize; - - if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ - newp->prev_foot = p->prev_foot + leadsize; - newp->head = newsize; - } - else { /* Otherwise, give back leader, use the rest */ - set_inuse(m, newp, newsize); - set_inuse(m, p, leadsize); - dispose_chunk(m, p, leadsize); - } - p = newp; - } - - /* Give back spare room at the end */ - if (!is_mmapped(p)) { - size_t size = chunksize(p); - if (size > nb + MIN_CHUNK_SIZE) { - size_t remainder_size = size - nb; - mchunkptr remainder = chunk_plus_offset(p, nb); - set_inuse(m, p, nb); - set_inuse(m, remainder, remainder_size); - dispose_chunk(m, remainder, remainder_size); - } - } - - mem = chunk2mem(p); - assert (chunksize(p) >= nb); - assert(((size_t)mem & (alignment - 1)) == 0); - check_inuse_chunk(m, p); - POSTACTION(m); - } - } - return mem; -} - -/* - Common support for independent_X routines, handling - all of the combinations that can result. - The opts arg has: - bit 0 set if all elements are same size (using sizes[0]) - bit 1 set if elements should be zeroed -*/ -static void** ialloc(mstate m, - size_t n_elements, - size_t* sizes, - int opts, - void* chunks[]) { - - size_t element_size; /* chunksize of each element, if all same */ - size_t contents_size; /* total size of elements */ - size_t array_size; /* request size of pointer array */ - void* mem; /* malloced aggregate space */ - mchunkptr p; /* corresponding chunk */ - size_t remainder_size; /* remaining bytes while splitting */ - void** marray; /* either "chunks" or malloced ptr array */ - mchunkptr array_chunk; /* chunk for malloced ptr array */ - flag_t was_enabled; /* to disable mmap */ - size_t size; - size_t i; - - ensure_initialization(); - /* compute array length, if needed */ - if (chunks != 0) { - if (n_elements == 0) - return chunks; /* nothing to do */ - marray = chunks; - array_size = 0; - } - else { - /* if empty req, must still return chunk representing empty array */ - if (n_elements == 0) - return (void**)internal_malloc(m, 0); - marray = 0; - array_size = request2size(n_elements * (sizeof(void*))); - } - - /* compute total element size */ - if (opts & 0x1) { /* all-same-size */ - element_size = request2size(*sizes); - contents_size = n_elements * element_size; - } - else { /* add up all the sizes */ - element_size = 0; - contents_size = 0; - for (i = 0; i != n_elements; ++i) - contents_size += request2size(sizes[i]); - } - - size = contents_size + array_size; - - /* - Allocate the aggregate chunk. First disable direct-mmapping so - malloc won't use it, since we would not be able to later - free/realloc space internal to a segregated mmap region. - */ - was_enabled = use_mmap(m); - disable_mmap(m); - mem = internal_malloc(m, size - CHUNK_OVERHEAD); - if (was_enabled) - enable_mmap(m); - if (mem == 0) - return 0; - - if (PREACTION(m)) return 0; - p = mem2chunk(mem); - remainder_size = chunksize(p); - - assert(!is_mmapped(p)); - - if (opts & 0x2) { /* optionally clear the elements */ - memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size); - } - - /* If not provided, allocate the pointer array as final part of chunk */ - if (marray == 0) { - size_t array_chunk_size; - array_chunk = chunk_plus_offset(p, contents_size); - array_chunk_size = remainder_size - contents_size; - marray = (void**) (chunk2mem(array_chunk)); - set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size); - remainder_size = contents_size; - } - - /* split out elements */ - for (i = 0; ; ++i) { - marray[i] = chunk2mem(p); - if (i != n_elements-1) { - if (element_size != 0) - size = element_size; - else - size = request2size(sizes[i]); - remainder_size -= size; - set_size_and_pinuse_of_inuse_chunk(m, p, size); - p = chunk_plus_offset(p, size); - } - else { /* the final element absorbs any overallocation slop */ - set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size); - break; - } - } - -#if DEBUG - if (marray != chunks) { - /* final element must have exactly exhausted chunk */ - if (element_size != 0) { - assert(remainder_size == element_size); - } - else { - assert(remainder_size == request2size(sizes[i])); - } - check_inuse_chunk(m, mem2chunk(marray)); - } - for (i = 0; i != n_elements; ++i) - check_inuse_chunk(m, mem2chunk(marray[i])); - -#endif /* DEBUG */ - - POSTACTION(m); - return marray; -} - -/* Try to free all pointers in the given array. - Note: this could be made faster, by delaying consolidation, - at the price of disabling some user integrity checks, We - still optimize some consolidations by combining adjacent - chunks before freeing, which will occur often if allocated - with ialloc or the array is sorted. -*/ -static size_t internal_bulk_free(mstate m, void* array[], size_t nelem) { - size_t unfreed = 0; - if (!PREACTION(m)) { - void** a; - void** fence = &(array[nelem]); - for (a = array; a != fence; ++a) { - void* mem = *a; - if (mem != 0) { - mchunkptr p = mem2chunk(mem); - size_t psize = chunksize(p); -#if FOOTERS - if (get_mstate_for(p) != m) { - ++unfreed; - continue; - } -#endif - check_inuse_chunk(m, p); - *a = 0; - if (RTCHECK(ok_address(m, p) && ok_inuse(p))) { - void ** b = a + 1; /* try to merge with next chunk */ - mchunkptr next = next_chunk(p); - if (b != fence && *b == chunk2mem(next)) { - size_t newsize = chunksize(next) + psize; - set_inuse(m, p, newsize); - *b = chunk2mem(p); - } - else - dispose_chunk(m, p, psize); - } - else { - CORRUPTION_ERROR_ACTION(m); - break; - } - } - } - if (should_trim(m, m->topsize)) - sys_trim(m, 0); - POSTACTION(m); - } - return unfreed; -} - -/* Traversal */ -#if MALLOC_INSPECT_ALL -static void internal_inspect_all(mstate m, - void(*handler)(void *start, - void *end, - size_t used_bytes, - void* callback_arg), - void* arg) { - if (is_initialized(m)) { - mchunkptr top = m->top; - msegmentptr s; - for (s = &m->seg; s != 0; s = s->next) { - mchunkptr q = align_as_chunk(s->base); - while (segment_holds(s, q) && q->head != FENCEPOST_HEAD) { - mchunkptr next = next_chunk(q); - size_t sz = chunksize(q); - size_t used; - void* start; - if (is_inuse(q)) { - used = sz - CHUNK_OVERHEAD; /* must not be mmapped */ - start = chunk2mem(q); - } - else { - used = 0; - if (is_small(sz)) { /* offset by possible bookkeeping */ - start = (void*)((char*)q + sizeof(struct malloc_chunk)); - } - else { - start = (void*)((char*)q + sizeof(struct malloc_tree_chunk)); - } - } - if (start < (void*)next) /* skip if all space is bookkeeping */ - handler(start, next, used, arg); - if (q == top) - break; - q = next; - } - } - } -} -#endif /* MALLOC_INSPECT_ALL */ - -#ifdef __wasilibc_unmodified_upstream // Define a function that initializes the initial state of dlmalloc. -#else -/* ------------------ Exported try_init_allocator -------------------- */ - -/* Symbol marking the end of data, bss and explicit stack, provided by wasm-ld. */ -extern unsigned char __heap_base; - -/* Initialize the initial state of dlmalloc to be able to use free memory between __heap_base and initial. */ -static void try_init_allocator(void) { - /* Check that it is a first-time initialization. */ - assert(!is_initialized(gm)); - - char *base = (char *)&__heap_base; - /* Calls sbrk(0) that returns the initial memory position. */ - char *init = (char *)CALL_MORECORE(0); - int initial_heap_size = init - base; - - /* Check that initial heap is long enough to serve a minimal allocation request. */ - if (initial_heap_size <= MIN_CHUNK_SIZE + TOP_FOOT_SIZE + MALLOC_ALIGNMENT) { - return; - } - - /* Initialize mstate. */ - ensure_initialization(); - - /* Initialize the dlmalloc internal state. */ - gm->least_addr = base; - gm->seg.base = base; - gm->seg.size = initial_heap_size; - gm->magic = mparams.magic; - gm->release_checks = MAX_RELEASE_CHECK_RATE; - init_bins(gm); - init_top(gm, (mchunkptr)base, initial_heap_size - TOP_FOOT_SIZE); -} -#endif - -/* ------------------ Exported realloc, memalign, etc -------------------- */ - -#if !ONLY_MSPACES - -void* dlrealloc(void* oldmem, size_t bytes) { - void* mem = 0; - if (oldmem == 0) { - mem = dlmalloc(bytes); - } - else if (bytes >= MAX_REQUEST) { - MALLOC_FAILURE_ACTION; - } -#ifdef REALLOC_ZERO_BYTES_FREES - else if (bytes == 0) { - dlfree(oldmem); - } -#endif /* REALLOC_ZERO_BYTES_FREES */ - else { - size_t nb = request2size(bytes); - mchunkptr oldp = mem2chunk(oldmem); -#if ! FOOTERS - mstate m = gm; -#else /* FOOTERS */ - mstate m = get_mstate_for(oldp); - if (!ok_magic(m)) { - USAGE_ERROR_ACTION(m, oldmem); - return 0; - } -#endif /* FOOTERS */ - if (!PREACTION(m)) { - mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1); - POSTACTION(m); - if (newp != 0) { - check_inuse_chunk(m, newp); - mem = chunk2mem(newp); - } - else { - mem = internal_malloc(m, bytes); - if (mem != 0) { - size_t oc = chunksize(oldp) - overhead_for(oldp); - memcpy(mem, oldmem, (oc < bytes)? oc : bytes); - internal_free(m, oldmem); - } - } - } - } - return mem; -} - -void* dlrealloc_in_place(void* oldmem, size_t bytes) { - void* mem = 0; - if (oldmem != 0) { - if (bytes >= MAX_REQUEST) { - MALLOC_FAILURE_ACTION; - } - else { - size_t nb = request2size(bytes); - mchunkptr oldp = mem2chunk(oldmem); -#if ! FOOTERS - mstate m = gm; -#else /* FOOTERS */ - mstate m = get_mstate_for(oldp); - if (!ok_magic(m)) { - USAGE_ERROR_ACTION(m, oldmem); - return 0; - } -#endif /* FOOTERS */ - if (!PREACTION(m)) { - mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0); - POSTACTION(m); - if (newp == oldp) { - check_inuse_chunk(m, newp); - mem = oldmem; - } - } - } - } - return mem; -} - -void* dlmemalign(size_t alignment, size_t bytes) { - if (alignment <= MALLOC_ALIGNMENT) { - return dlmalloc(bytes); - } - return internal_memalign(gm, alignment, bytes); -} - -int dlposix_memalign(void** pp, size_t alignment, size_t bytes) { - void* mem = 0; - if (alignment == MALLOC_ALIGNMENT) - mem = dlmalloc(bytes); - else { - size_t d = alignment / sizeof(void*); - size_t r = alignment % sizeof(void*); - if (r != 0 || d == 0 || (d & (d-SIZE_T_ONE)) != 0) - return EINVAL; - else if (bytes <= MAX_REQUEST - alignment) { - if (alignment < MIN_CHUNK_SIZE) - alignment = MIN_CHUNK_SIZE; - mem = internal_memalign(gm, alignment, bytes); - } - } - if (mem == 0) - return ENOMEM; - else { - *pp = mem; - return 0; - } -} - -void* dlvalloc(size_t bytes) { - size_t pagesz; - ensure_initialization(); - pagesz = mparams.page_size; - return dlmemalign(pagesz, bytes); -} - -void* dlpvalloc(size_t bytes) { - size_t pagesz; - ensure_initialization(); - pagesz = mparams.page_size; - return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE)); -} - -void** dlindependent_calloc(size_t n_elements, size_t elem_size, - void* chunks[]) { - size_t sz = elem_size; /* serves as 1-element array */ - return ialloc(gm, n_elements, &sz, 3, chunks); -} - -void** dlindependent_comalloc(size_t n_elements, size_t sizes[], - void* chunks[]) { - return ialloc(gm, n_elements, sizes, 0, chunks); -} - -size_t dlbulk_free(void* array[], size_t nelem) { - return internal_bulk_free(gm, array, nelem); -} - -#if MALLOC_INSPECT_ALL -void dlmalloc_inspect_all(void(*handler)(void *start, - void *end, - size_t used_bytes, - void* callback_arg), - void* arg) { - ensure_initialization(); - if (!PREACTION(gm)) { - internal_inspect_all(gm, handler, arg); - POSTACTION(gm); - } -} -#endif /* MALLOC_INSPECT_ALL */ - -int dlmalloc_trim(size_t pad) { - int result = 0; - ensure_initialization(); - if (!PREACTION(gm)) { - result = sys_trim(gm, pad); - POSTACTION(gm); - } - return result; -} - -size_t dlmalloc_footprint(void) { - return gm->footprint; -} - -size_t dlmalloc_max_footprint(void) { - return gm->max_footprint; -} - -size_t dlmalloc_footprint_limit(void) { - size_t maf = gm->footprint_limit; - return maf == 0 ? MAX_SIZE_T : maf; -} - -size_t dlmalloc_set_footprint_limit(size_t bytes) { - size_t result; /* invert sense of 0 */ - if (bytes == 0) - result = granularity_align(1); /* Use minimal size */ - if (bytes == MAX_SIZE_T) - result = 0; /* disable */ - else - result = granularity_align(bytes); - return gm->footprint_limit = result; -} - -#if !NO_MALLINFO -struct mallinfo dlmallinfo(void) { - return internal_mallinfo(gm); -} -#endif /* NO_MALLINFO */ - -#if !NO_MALLOC_STATS -void dlmalloc_stats() { - internal_malloc_stats(gm); -} -#endif /* NO_MALLOC_STATS */ - -int dlmallopt(int param_number, int value) { - return change_mparam(param_number, value); -} - -size_t dlmalloc_usable_size(void* mem) { - if (mem != 0) { - mchunkptr p = mem2chunk(mem); - if (is_inuse(p)) - return chunksize(p) - overhead_for(p); - } - return 0; -} - -#endif /* !ONLY_MSPACES */ - -/* ----------------------------- user mspaces ---------------------------- */ - -#if MSPACES - -static mstate init_user_mstate(char* tbase, size_t tsize) { - size_t msize = pad_request(sizeof(struct malloc_state)); - mchunkptr mn; - mchunkptr msp = align_as_chunk(tbase); - mstate m = (mstate)(chunk2mem(msp)); - memset(m, 0, msize); - (void)INITIAL_LOCK(&m->mutex); - msp->head = (msize|INUSE_BITS); - m->seg.base = m->least_addr = tbase; - m->seg.size = m->footprint = m->max_footprint = tsize; - m->magic = mparams.magic; - m->release_checks = MAX_RELEASE_CHECK_RATE; - m->mflags = mparams.default_mflags; - m->extp = 0; - m->exts = 0; - disable_contiguous(m); - init_bins(m); - mn = next_chunk(mem2chunk(m)); - init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE); - check_top_chunk(m, m->top); - return m; -} - -mspace create_mspace(size_t capacity, int locked) { - mstate m = 0; - size_t msize; - ensure_initialization(); - msize = pad_request(sizeof(struct malloc_state)); - if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { - size_t rs = ((capacity == 0)? mparams.granularity : - (capacity + TOP_FOOT_SIZE + msize)); - size_t tsize = granularity_align(rs); - char* tbase = (char*)(CALL_MMAP(tsize)); - if (tbase != CMFAIL) { - m = init_user_mstate(tbase, tsize); - m->seg.sflags = USE_MMAP_BIT; - set_lock(m, locked); - } - } - return (mspace)m; -} - -mspace create_mspace_with_base(void* base, size_t capacity, int locked) { - mstate m = 0; - size_t msize; - ensure_initialization(); - msize = pad_request(sizeof(struct malloc_state)); - if (capacity > msize + TOP_FOOT_SIZE && - capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { - m = init_user_mstate((char*)base, capacity); - m->seg.sflags = EXTERN_BIT; - set_lock(m, locked); - } - return (mspace)m; -} - -int mspace_track_large_chunks(mspace msp, int enable) { - int ret = 0; - mstate ms = (mstate)msp; - if (!PREACTION(ms)) { - if (!use_mmap(ms)) { - ret = 1; - } - if (!enable) { - enable_mmap(ms); - } else { - disable_mmap(ms); - } - POSTACTION(ms); - } - return ret; -} - -size_t destroy_mspace(mspace msp) { - size_t freed = 0; - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - msegmentptr sp = &ms->seg; - (void)DESTROY_LOCK(&ms->mutex); /* destroy before unmapped */ - while (sp != 0) { - char* base = sp->base; - size_t size = sp->size; - flag_t flag = sp->sflags; - (void)base; /* placate people compiling -Wunused-variable */ - sp = sp->next; - if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) && - CALL_MUNMAP(base, size) == 0) - freed += size; - } - } - else { - USAGE_ERROR_ACTION(ms,ms); - } - return freed; -} - -/* - mspace versions of routines are near-clones of the global - versions. This is not so nice but better than the alternatives. -*/ - -void* mspace_malloc(mspace msp, size_t bytes) { - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - return 0; - } - if (!PREACTION(ms)) { - void* mem; - size_t nb; - if (bytes <= MAX_SMALL_REQUEST) { - bindex_t idx; - binmap_t smallbits; - nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); - idx = small_index(nb); - smallbits = ms->smallmap >> idx; - - if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ - mchunkptr b, p; - idx += ~smallbits & 1; /* Uses next bin if idx empty */ - b = smallbin_at(ms, idx); - p = b->fd; - assert(chunksize(p) == small_index2size(idx)); - unlink_first_small_chunk(ms, b, p, idx); - set_inuse_and_pinuse(ms, p, small_index2size(idx)); - mem = chunk2mem(p); - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - - else if (nb > ms->dvsize) { - if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ - mchunkptr b, p, r; - size_t rsize; - bindex_t i; - binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); - binmap_t leastbit = least_bit(leftbits); - compute_bit2idx(leastbit, i); - b = smallbin_at(ms, i); - p = b->fd; - assert(chunksize(p) == small_index2size(i)); - unlink_first_small_chunk(ms, b, p, i); - rsize = small_index2size(i) - nb; - /* Fit here cannot be remainderless if 4byte sizes */ - if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) - set_inuse_and_pinuse(ms, p, small_index2size(i)); - else { - set_size_and_pinuse_of_inuse_chunk(ms, p, nb); - r = chunk_plus_offset(p, nb); - set_size_and_pinuse_of_free_chunk(r, rsize); - replace_dv(ms, r, rsize); - } - mem = chunk2mem(p); - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - - else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - } - } - else if (bytes >= MAX_REQUEST) - nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ - else { - nb = pad_request(bytes); - if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - } - - if (nb <= ms->dvsize) { - size_t rsize = ms->dvsize - nb; - mchunkptr p = ms->dv; - if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ - mchunkptr r = ms->dv = chunk_plus_offset(p, nb); - ms->dvsize = rsize; - set_size_and_pinuse_of_free_chunk(r, rsize); - set_size_and_pinuse_of_inuse_chunk(ms, p, nb); - } - else { /* exhaust dv */ - size_t dvs = ms->dvsize; - ms->dvsize = 0; - ms->dv = 0; - set_inuse_and_pinuse(ms, p, dvs); - } - mem = chunk2mem(p); - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - - else if (nb < ms->topsize) { /* Split top */ - size_t rsize = ms->topsize -= nb; - mchunkptr p = ms->top; - mchunkptr r = ms->top = chunk_plus_offset(p, nb); - r->head = rsize | PINUSE_BIT; - set_size_and_pinuse_of_inuse_chunk(ms, p, nb); - mem = chunk2mem(p); - check_top_chunk(ms, ms->top); - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - - mem = sys_alloc(ms, nb); - - postaction: - POSTACTION(ms); - return mem; - } - - return 0; -} - -void mspace_free(mspace msp, void* mem) { - if (mem != 0) { - mchunkptr p = mem2chunk(mem); -#if FOOTERS - mstate fm = get_mstate_for(p); - (void)msp; /* placate people compiling -Wunused */ -#else /* FOOTERS */ - mstate fm = (mstate)msp; -#endif /* FOOTERS */ - if (!ok_magic(fm)) { - USAGE_ERROR_ACTION(fm, p); - return; - } - if (!PREACTION(fm)) { - check_inuse_chunk(fm, p); - if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { - size_t psize = chunksize(p); - mchunkptr next = chunk_plus_offset(p, psize); - if (!pinuse(p)) { - size_t prevsize = p->prev_foot; - if (is_mmapped(p)) { - psize += prevsize + MMAP_FOOT_PAD; - if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) - fm->footprint -= psize; - goto postaction; - } - else { - mchunkptr prev = chunk_minus_offset(p, prevsize); - psize += prevsize; - p = prev; - if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ - if (p != fm->dv) { - unlink_chunk(fm, p, prevsize); - } - else if ((next->head & INUSE_BITS) == INUSE_BITS) { - fm->dvsize = psize; - set_free_with_pinuse(p, psize, next); - goto postaction; - } - } - else - goto erroraction; - } - } - - if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { - if (!cinuse(next)) { /* consolidate forward */ - if (next == fm->top) { - size_t tsize = fm->topsize += psize; - fm->top = p; - p->head = tsize | PINUSE_BIT; - if (p == fm->dv) { - fm->dv = 0; - fm->dvsize = 0; - } - if (should_trim(fm, tsize)) - sys_trim(fm, 0); - goto postaction; - } - else if (next == fm->dv) { - size_t dsize = fm->dvsize += psize; - fm->dv = p; - set_size_and_pinuse_of_free_chunk(p, dsize); - goto postaction; - } - else { - size_t nsize = chunksize(next); - psize += nsize; - unlink_chunk(fm, next, nsize); - set_size_and_pinuse_of_free_chunk(p, psize); - if (p == fm->dv) { - fm->dvsize = psize; - goto postaction; - } - } - } - else - set_free_with_pinuse(p, psize, next); - - if (is_small(psize)) { - insert_small_chunk(fm, p, psize); - check_free_chunk(fm, p); - } - else { - tchunkptr tp = (tchunkptr)p; - insert_large_chunk(fm, tp, psize); - check_free_chunk(fm, p); - if (--fm->release_checks == 0) - release_unused_segments(fm); - } - goto postaction; - } - } - erroraction: - USAGE_ERROR_ACTION(fm, p); - postaction: - POSTACTION(fm); - } - } -} - -void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) { - void* mem; - size_t req = 0; - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - return 0; - } - if (n_elements != 0) { - req = n_elements * elem_size; - if (((n_elements | elem_size) & ~(size_t)0xffff) && - (req / n_elements != elem_size)) - req = MAX_SIZE_T; /* force downstream failure on overflow */ - } - mem = internal_malloc(ms, req); - if (mem != 0 && calloc_must_clear(mem2chunk(mem))) - memset(mem, 0, req); - return mem; -} - -void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) { - void* mem = 0; - if (oldmem == 0) { - mem = mspace_malloc(msp, bytes); - } - else if (bytes >= MAX_REQUEST) { - MALLOC_FAILURE_ACTION; - } -#ifdef REALLOC_ZERO_BYTES_FREES - else if (bytes == 0) { - mspace_free(msp, oldmem); - } -#endif /* REALLOC_ZERO_BYTES_FREES */ - else { - size_t nb = request2size(bytes); - mchunkptr oldp = mem2chunk(oldmem); -#if ! FOOTERS - mstate m = (mstate)msp; -#else /* FOOTERS */ - mstate m = get_mstate_for(oldp); - if (!ok_magic(m)) { - USAGE_ERROR_ACTION(m, oldmem); - return 0; - } -#endif /* FOOTERS */ - if (!PREACTION(m)) { - mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1); - POSTACTION(m); - if (newp != 0) { - check_inuse_chunk(m, newp); - mem = chunk2mem(newp); - } - else { - mem = mspace_malloc(m, bytes); - if (mem != 0) { - size_t oc = chunksize(oldp) - overhead_for(oldp); - memcpy(mem, oldmem, (oc < bytes)? oc : bytes); - mspace_free(m, oldmem); - } - } - } - } - return mem; -} - -void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) { - void* mem = 0; - if (oldmem != 0) { - if (bytes >= MAX_REQUEST) { - MALLOC_FAILURE_ACTION; - } - else { - size_t nb = request2size(bytes); - mchunkptr oldp = mem2chunk(oldmem); -#if ! FOOTERS - mstate m = (mstate)msp; -#else /* FOOTERS */ - mstate m = get_mstate_for(oldp); - (void)msp; /* placate people compiling -Wunused */ - if (!ok_magic(m)) { - USAGE_ERROR_ACTION(m, oldmem); - return 0; - } -#endif /* FOOTERS */ - if (!PREACTION(m)) { - mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0); - POSTACTION(m); - if (newp == oldp) { - check_inuse_chunk(m, newp); - mem = oldmem; - } - } - } - } - return mem; -} - -void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) { - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - return 0; - } - if (alignment <= MALLOC_ALIGNMENT) - return mspace_malloc(msp, bytes); - return internal_memalign(ms, alignment, bytes); -} - -void** mspace_independent_calloc(mspace msp, size_t n_elements, - size_t elem_size, void* chunks[]) { - size_t sz = elem_size; /* serves as 1-element array */ - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - return 0; - } - return ialloc(ms, n_elements, &sz, 3, chunks); -} - -void** mspace_independent_comalloc(mspace msp, size_t n_elements, - size_t sizes[], void* chunks[]) { - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - return 0; - } - return ialloc(ms, n_elements, sizes, 0, chunks); -} - -size_t mspace_bulk_free(mspace msp, void* array[], size_t nelem) { - return internal_bulk_free((mstate)msp, array, nelem); -} - -#if MALLOC_INSPECT_ALL -void mspace_inspect_all(mspace msp, - void(*handler)(void *start, - void *end, - size_t used_bytes, - void* callback_arg), - void* arg) { - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - if (!PREACTION(ms)) { - internal_inspect_all(ms, handler, arg); - POSTACTION(ms); - } - } - else { - USAGE_ERROR_ACTION(ms,ms); - } -} -#endif /* MALLOC_INSPECT_ALL */ - -int mspace_trim(mspace msp, size_t pad) { - int result = 0; - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - if (!PREACTION(ms)) { - result = sys_trim(ms, pad); - POSTACTION(ms); - } - } - else { - USAGE_ERROR_ACTION(ms,ms); - } - return result; -} - -#if !NO_MALLOC_STATS -void mspace_malloc_stats(mspace msp) { - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - internal_malloc_stats(ms); - } - else { - USAGE_ERROR_ACTION(ms,ms); - } -} -#endif /* NO_MALLOC_STATS */ - -size_t mspace_footprint(mspace msp) { - size_t result = 0; - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - result = ms->footprint; - } - else { - USAGE_ERROR_ACTION(ms,ms); - } - return result; -} - -size_t mspace_max_footprint(mspace msp) { - size_t result = 0; - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - result = ms->max_footprint; - } - else { - USAGE_ERROR_ACTION(ms,ms); - } - return result; -} - -size_t mspace_footprint_limit(mspace msp) { - size_t result = 0; - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - size_t maf = ms->footprint_limit; - result = (maf == 0) ? MAX_SIZE_T : maf; - } - else { - USAGE_ERROR_ACTION(ms,ms); - } - return result; -} - -size_t mspace_set_footprint_limit(mspace msp, size_t bytes) { - size_t result = 0; - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - if (bytes == 0) - result = granularity_align(1); /* Use minimal size */ - if (bytes == MAX_SIZE_T) - result = 0; /* disable */ - else - result = granularity_align(bytes); - ms->footprint_limit = result; - } - else { - USAGE_ERROR_ACTION(ms,ms); - } - return result; -} - -#if !NO_MALLINFO -struct mallinfo mspace_mallinfo(mspace msp) { - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - } - return internal_mallinfo(ms); -} -#endif /* NO_MALLINFO */ - -size_t mspace_usable_size(const void* mem) { - if (mem != 0) { - mchunkptr p = mem2chunk(mem); - if (is_inuse(p)) - return chunksize(p) - overhead_for(p); - } - return 0; -} - -int mspace_mallopt(int param_number, int value) { - return change_mparam(param_number, value); -} - -#endif /* MSPACES */ - - -/* -------------------- Alternative MORECORE functions ------------------- */ - -/* - Guidelines for creating a custom version of MORECORE: - - * For best performance, MORECORE should allocate in multiples of pagesize. - * MORECORE may allocate more memory than requested. (Or even less, - but this will usually result in a malloc failure.) - * MORECORE must not allocate memory when given argument zero, but - instead return one past the end address of memory from previous - nonzero call. - * For best performance, consecutive calls to MORECORE with positive - arguments should return increasing addresses, indicating that - space has been contiguously extended. - * Even though consecutive calls to MORECORE need not return contiguous - addresses, it must be OK for malloc'ed chunks to span multiple - regions in those cases where they do happen to be contiguous. - * MORECORE need not handle negative arguments -- it may instead - just return MFAIL when given negative arguments. - Negative arguments are always multiples of pagesize. MORECORE - must not misinterpret negative args as large positive unsigned - args. You can suppress all such calls from even occurring by defining - MORECORE_CANNOT_TRIM, - - As an example alternative MORECORE, here is a custom allocator - kindly contributed for pre-OSX macOS. It uses virtually but not - necessarily physically contiguous non-paged memory (locked in, - present and won't get swapped out). You can use it by uncommenting - this section, adding some #includes, and setting up the appropriate - defines above: - - #define MORECORE osMoreCore - - There is also a shutdown routine that should somehow be called for - cleanup upon program exit. - - #define MAX_POOL_ENTRIES 100 - #define MINIMUM_MORECORE_SIZE (64 * 1024U) - static int next_os_pool; - void *our_os_pools[MAX_POOL_ENTRIES]; - - void *osMoreCore(int size) - { - void *ptr = 0; - static void *sbrk_top = 0; - - if (size > 0) - { - if (size < MINIMUM_MORECORE_SIZE) - size = MINIMUM_MORECORE_SIZE; - if (CurrentExecutionLevel() == kTaskLevel) - ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); - if (ptr == 0) - { - return (void *) MFAIL; - } - // save ptrs so they can be freed during cleanup - our_os_pools[next_os_pool] = ptr; - next_os_pool++; - ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); - sbrk_top = (char *) ptr + size; - return ptr; - } - else if (size < 0) - { - // we don't currently support shrink behavior - return (void *) MFAIL; - } - else - { - return sbrk_top; - } - } - - // cleanup any allocated memory pools - // called as last thing before shutting down driver - - void osCleanupMem(void) - { - void **ptr; - - for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) - if (*ptr) - { - PoolDeallocate(*ptr); - *ptr = 0; - } - } - -*/ - - -/* ----------------------------------------------------------------------- -History: - v2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea - * fix bad comparison in dlposix_memalign - * don't reuse adjusted asize in sys_alloc - * add LOCK_AT_FORK -- thanks to Kirill Artamonov for the suggestion - * reduce compiler warnings -- thanks to all who reported/suggested these - - v2.8.5 Sun May 22 10:26:02 2011 Doug Lea (dl at gee) - * Always perform unlink checks unless INSECURE - * Add posix_memalign. - * Improve realloc to expand in more cases; expose realloc_in_place. - Thanks to Peter Buhr for the suggestion. - * Add footprint_limit, inspect_all, bulk_free. Thanks - to Barry Hayes and others for the suggestions. - * Internal refactorings to avoid calls while holding locks - * Use non-reentrant locks by default. Thanks to Roland McGrath - for the suggestion. - * Small fixes to mspace_destroy, reset_on_error. - * Various configuration extensions/changes. Thanks - to all who contributed these. - - V2.8.4a Thu Apr 28 14:39:43 2011 (dl at gee.cs.oswego.edu) - * Update Creative Commons URL - - V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) - * Use zeros instead of prev foot for is_mmapped - * Add mspace_track_large_chunks; thanks to Jean Brouwers - * Fix set_inuse in internal_realloc; thanks to Jean Brouwers - * Fix insufficient sys_alloc padding when using 16byte alignment - * Fix bad error check in mspace_footprint - * Adaptations for ptmalloc; thanks to Wolfram Gloger. - * Reentrant spin locks; thanks to Earl Chew and others - * Win32 improvements; thanks to Niall Douglas and Earl Chew - * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options - * Extension hook in malloc_state - * Various small adjustments to reduce warnings on some compilers - * Various configuration extensions/changes for more platforms. Thanks - to all who contributed these. - - V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee) - * Add max_footprint functions - * Ensure all appropriate literals are size_t - * Fix conditional compilation problem for some #define settings - * Avoid concatenating segments with the one provided - in create_mspace_with_base - * Rename some variables to avoid compiler shadowing warnings - * Use explicit lock initialization. - * Better handling of sbrk interference. - * Simplify and fix segment insertion, trimming and mspace_destroy - * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x - * Thanks especially to Dennis Flanagan for help on these. - - V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee) - * Fix memalign brace error. - - V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee) - * Fix improper #endif nesting in C++ - * Add explicit casts needed for C++ - - V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee) - * Use trees for large bins - * Support mspaces - * Use segments to unify sbrk-based and mmap-based system allocation, - removing need for emulation on most platforms without sbrk. - * Default safety checks - * Optional footer checks. Thanks to William Robertson for the idea. - * Internal code refactoring - * Incorporate suggestions and platform-specific changes. - Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas, - Aaron Bachmann, Emery Berger, and others. - * Speed up non-fastbin processing enough to remove fastbins. - * Remove useless cfree() to avoid conflicts with other apps. - * Remove internal memcpy, memset. Compilers handle builtins better. - * Remove some options that no one ever used and rename others. - - V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) - * Fix malloc_state bitmap array misdeclaration - - V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) - * Allow tuning of FIRST_SORTED_BIN_SIZE - * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. - * Better detection and support for non-contiguousness of MORECORE. - Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger - * Bypass most of malloc if no frees. Thanks To Emery Berger. - * Fix freeing of old top non-contiguous chunk im sysmalloc. - * Raised default trim and map thresholds to 256K. - * Fix mmap-related #defines. Thanks to Lubos Lunak. - * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. - * Branch-free bin calculation - * Default trim and mmap thresholds now 256K. - - V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) - * Introduce independent_comalloc and independent_calloc. - Thanks to Michael Pachos for motivation and help. - * Make optional .h file available - * Allow > 2GB requests on 32bit systems. - * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>. - Thanks also to Andreas Mueller <a.mueller at paradatec.de>, - and Anonymous. - * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for - helping test this.) - * memalign: check alignment arg - * realloc: don't try to shift chunks backwards, since this - leads to more fragmentation in some programs and doesn't - seem to help in any others. - * Collect all cases in malloc requiring system memory into sysmalloc - * Use mmap as backup to sbrk - * Place all internal state in malloc_state - * Introduce fastbins (although similar to 2.5.1) - * Many minor tunings and cosmetic improvements - * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK - * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS - Thanks to Tony E. Bennett <tbennett@nvidia.com> and others. - * Include errno.h to support default failure action. - - V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) - * return null for negative arguments - * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com> - * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' - (e.g. WIN32 platforms) - * Cleanup header file inclusion for WIN32 platforms - * Cleanup code to avoid Microsoft Visual C++ compiler complaints - * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing - memory allocation routines - * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) - * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to - usage of 'assert' in non-WIN32 code - * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to - avoid infinite loop - * Always call 'fREe()' rather than 'free()' - - V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) - * Fixed ordering problem with boundary-stamping - - V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) - * Added pvalloc, as recommended by H.J. Liu - * Added 64bit pointer support mainly from Wolfram Gloger - * Added anonymously donated WIN32 sbrk emulation - * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen - * malloc_extend_top: fix mask error that caused wastage after - foreign sbrks - * Add linux mremap support code from HJ Liu - - V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) - * Integrated most documentation with the code. - * Add support for mmap, with help from - Wolfram Gloger (Gloger@lrz.uni-muenchen.de). - * Use last_remainder in more cases. - * Pack bins using idea from colin@nyx10.cs.du.edu - * Use ordered bins instead of best-fit threshhold - * Eliminate block-local decls to simplify tracing and debugging. - * Support another case of realloc via move into top - * Fix error occuring when initial sbrk_base not word-aligned. - * Rely on page size for units instead of SBRK_UNIT to - avoid surprises about sbrk alignment conventions. - * Add mallinfo, mallopt. Thanks to Raymond Nijssen - (raymond@es.ele.tue.nl) for the suggestion. - * Add `pad' argument to malloc_trim and top_pad mallopt parameter. - * More precautions for cases where other routines call sbrk, - courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). - * Added macros etc., allowing use in linux libc from - H.J. Lu (hjl@gnu.ai.mit.edu) - * Inverted this history list - - V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) - * Re-tuned and fixed to behave more nicely with V2.6.0 changes. - * Removed all preallocation code since under current scheme - the work required to undo bad preallocations exceeds - the work saved in good cases for most test programs. - * No longer use return list or unconsolidated bins since - no scheme using them consistently outperforms those that don't - given above changes. - * Use best fit for very large chunks to prevent some worst-cases. - * Added some support for debugging - - V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) - * Removed footers when chunks are in use. Thanks to - Paul Wilson (wilson@cs.texas.edu) for the suggestion. - - V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) - * Added malloc_trim, with help from Wolfram Gloger - (wmglo@Dent.MED.Uni-Muenchen.DE). - - V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) - - V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) - * realloc: try to expand in both directions - * malloc: swap order of clean-bin strategy; - * realloc: only conditionally expand backwards - * Try not to scavenge used bins - * Use bin counts as a guide to preallocation - * Occasionally bin return list chunks in first scan - * Add a few optimizations from colin@nyx10.cs.du.edu - - V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) - * faster bin computation & slightly different binning - * merged all consolidations to one part of malloc proper - (eliminating old malloc_find_space & malloc_clean_bin) - * Scan 2 returns chunks (not just 1) - * Propagate failure in realloc if malloc returns 0 - * Add stuff to allow compilation on non-ANSI compilers - from kpv@research.att.com - - V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) - * removed potential for odd address access in prev_chunk - * removed dependency on getpagesize.h - * misc cosmetics and a bit more internal documentation - * anticosmetics: mangled names in macros to evade debugger strangeness - * tested on sparc, hp-700, dec-mips, rs6000 - with gcc & native cc (hp, dec only) allowing - Detlefs & Zorn comparison study (in SIGPLAN Notices.) - - Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) - * Based loosely on libg++-1.2X malloc. (It retains some of the overall - structure of old version, but most details differ.) - -*/ diff --git a/lib/libc/wasi/emmalloc/emmalloc.c b/lib/libc/wasi/emmalloc/emmalloc.c new file mode 100644 index 0000000000..aed6eee1cc --- /dev/null +++ b/lib/libc/wasi/emmalloc/emmalloc.c @@ -0,0 +1,1535 @@ +/* + * Copyright 2018 The Emscripten Authors. All rights reserved. + * Emscripten is available under two separate licenses, the MIT license and the + * University of Illinois/NCSA Open Source License. Both these licenses can be + * found in the LICENSE file. + * + * Simple minimalistic but efficient sbrk()-based malloc/free that works in + * singlethreaded and multithreaded builds. + * + * Assumptions: + * + * - sbrk() is used to claim new memory (sbrk handles geometric/linear + * - overallocation growth) + * - sbrk() can be used by other code outside emmalloc. + * - sbrk() is very fast in most cases (internal wasm call). + * - sbrk() returns pointers with an alignment of alignof(max_align_t) + * + * Invariants: + * + * - Per-allocation header overhead is 8 bytes, smallest allocated payload + * amount is 8 bytes, and a multiple of 4 bytes. + * - Acquired memory blocks are subdivided into disjoint regions that lie + * next to each other. + * - A region is either in used or free. + * Used regions may be adjacent, and a used and unused region + * may be adjacent, but not two unused ones - they would be + * merged. + * - Memory allocation takes constant time, unless the alloc needs to sbrk() + * or memory is very close to being exhausted. + * + * Debugging: + * + * - If not NDEBUG, runtime assert()s are in use. + * - If EMMALLOC_MEMVALIDATE is defined, a large amount of extra checks are done. + * - If EMMALLOC_VERBOSE is defined, a lot of operations are logged + * out, in addition to EMMALLOC_MEMVALIDATE. + * - Debugging and logging directly uses console.log via uses EM_ASM, not + * printf etc., to minimize any risk of debugging or logging depending on + * malloc. + */ + +#include <stdalign.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <unistd.h> +#include <memory.h> +#include <assert.h> +#include <malloc.h> +#include <limits.h> +#include <stdlib.h> + +#ifdef __EMSCRIPTEN_TRACING__ +#include <emscripten/trace.h> +#endif + +// Defind by the linker to have the address of the start of the heap. +extern unsigned char __heap_base; + +// Behavior of right shifting a signed integer is compiler implementation defined. +static_assert((((int32_t)0x80000000U) >> 31) == -1, "This malloc implementation requires that right-shifting a signed integer produces a sign-extending (arithmetic) shift!"); + +// Configuration: specifies the minimum alignment that malloc()ed memory outputs. Allocation requests with smaller alignment +// than this will yield an allocation with this much alignment. +#define MALLOC_ALIGNMENT alignof(max_align_t) +static_assert(alignof(max_align_t) == 16, "max_align_t must be correct"); + +#define EMMALLOC_EXPORT __attribute__((__weak__)) + +#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#define MAX(x, y) ((x) > (y) ? (x) : (y)) + +#define NUM_FREE_BUCKETS 64 +#define BUCKET_BITMASK_T uint64_t + +// Dynamic memory is subdivided into regions, in the format + +// <size:uint32_t> ..... <size:uint32_t> | <size:uint32_t> ..... <size:uint32_t> | <size:uint32_t> ..... <size:uint32_t> | ..... + +// That is, at the bottom and top end of each memory region, the size of that region is stored. That allows traversing the +// memory regions backwards and forwards. Because each allocation must be at least a multiple of 4 bytes, the lowest two bits of +// each size field is unused. Free regions are distinguished by used regions by having the FREE_REGION_FLAG bit present +// in the size field. I.e. for free regions, the size field is odd, and for used regions, the size field reads even. +#define FREE_REGION_FLAG 0x1u + +// Attempts to malloc() more than this many bytes would cause an overflow when calculating the size of a region, +// therefore allocations larger than this are short-circuited immediately on entry. +#define MAX_ALLOC_SIZE 0xFFFFFFC7u + +// A free region has the following structure: +// <size:size_t> <prevptr> <nextptr> ... <size:size_t> + +typedef struct Region +{ + size_t size; + // Use a circular doubly linked list to represent free region data. + struct Region *prev, *next; + // ... N bytes of free data + size_t _at_the_end_of_this_struct_size; // do not dereference, this is present for convenient struct sizeof() computation only +} Region; + +// Each memory block starts with a RootRegion at the beginning. +// The RootRegion specifies the size of the region block, and forms a linked +// list of all RootRegions in the program, starting with `listOfAllRegions` +// below. +typedef struct RootRegion +{ + uint32_t size; + struct RootRegion *next; + uint8_t* endPtr; +} RootRegion; + +#if defined(__EMSCRIPTEN_PTHREADS__) +// In multithreaded builds, use a simple global spinlock strategy to acquire/release access to the memory allocator. +static volatile uint8_t multithreadingLock = 0; +#define MALLOC_ACQUIRE() while(__sync_lock_test_and_set(&multithreadingLock, 1)) { while(multithreadingLock) { /*nop*/ } } +#define MALLOC_RELEASE() __sync_lock_release(&multithreadingLock) +// Test code to ensure we have tight malloc acquire/release guards in place. +#define ASSERT_MALLOC_IS_ACQUIRED() assert(multithreadingLock == 1) +#else +// In singlethreaded builds, no need for locking. +#define MALLOC_ACQUIRE() ((void)0) +#define MALLOC_RELEASE() ((void)0) +#define ASSERT_MALLOC_IS_ACQUIRED() ((void)0) +#endif + +#define IS_POWER_OF_2(val) (((val) & ((val)-1)) == 0) +#define ALIGN_UP(ptr, alignment) ((uint8_t*)((((uintptr_t)(ptr)) + ((alignment)-1)) & ~((alignment)-1))) +#define HAS_ALIGNMENT(ptr, alignment) ((((uintptr_t)(ptr)) & ((alignment)-1)) == 0) + +static_assert(IS_POWER_OF_2(MALLOC_ALIGNMENT), "MALLOC_ALIGNMENT must be a power of two value!"); +static_assert(MALLOC_ALIGNMENT >= 4, "Smallest possible MALLOC_ALIGNMENT if 4!"); + +// A region that contains as payload a single forward linked list of pointers to +// root regions of each disjoint region blocks. +static RootRegion *listOfAllRegions = NULL; + +// For each of the buckets, maintain a linked list head node. The head node for each +// free region is a sentinel node that does not actually represent any free space, but +// the sentinel is used to avoid awkward testing against (if node == freeRegionHeadNode) +// when adding and removing elements from the linked list, i.e. we are guaranteed that +// the sentinel node is always fixed and there, and the actual free region list elements +// start at freeRegionBuckets[i].next each. +static Region freeRegionBuckets[NUM_FREE_BUCKETS] = { + { .prev = &freeRegionBuckets[0], .next = &freeRegionBuckets[0] }, + { .prev = &freeRegionBuckets[1], .next = &freeRegionBuckets[1] }, + { .prev = &freeRegionBuckets[2], .next = &freeRegionBuckets[2] }, + { .prev = &freeRegionBuckets[3], .next = &freeRegionBuckets[3] }, + { .prev = &freeRegionBuckets[4], .next = &freeRegionBuckets[4] }, + { .prev = &freeRegionBuckets[5], .next = &freeRegionBuckets[5] }, + { .prev = &freeRegionBuckets[6], .next = &freeRegionBuckets[6] }, + { .prev = &freeRegionBuckets[7], .next = &freeRegionBuckets[7] }, + { .prev = &freeRegionBuckets[8], .next = &freeRegionBuckets[8] }, + { .prev = &freeRegionBuckets[9], .next = &freeRegionBuckets[9] }, + { .prev = &freeRegionBuckets[10], .next = &freeRegionBuckets[10] }, + { .prev = &freeRegionBuckets[11], .next = &freeRegionBuckets[11] }, + { .prev = &freeRegionBuckets[12], .next = &freeRegionBuckets[12] }, + { .prev = &freeRegionBuckets[13], .next = &freeRegionBuckets[13] }, + { .prev = &freeRegionBuckets[14], .next = &freeRegionBuckets[14] }, + { .prev = &freeRegionBuckets[15], .next = &freeRegionBuckets[15] }, + { .prev = &freeRegionBuckets[16], .next = &freeRegionBuckets[16] }, + { .prev = &freeRegionBuckets[17], .next = &freeRegionBuckets[17] }, + { .prev = &freeRegionBuckets[18], .next = &freeRegionBuckets[18] }, + { .prev = &freeRegionBuckets[19], .next = &freeRegionBuckets[19] }, + { .prev = &freeRegionBuckets[20], .next = &freeRegionBuckets[20] }, + { .prev = &freeRegionBuckets[21], .next = &freeRegionBuckets[21] }, + { .prev = &freeRegionBuckets[22], .next = &freeRegionBuckets[22] }, + { .prev = &freeRegionBuckets[23], .next = &freeRegionBuckets[23] }, + { .prev = &freeRegionBuckets[24], .next = &freeRegionBuckets[24] }, + { .prev = &freeRegionBuckets[25], .next = &freeRegionBuckets[25] }, + { .prev = &freeRegionBuckets[26], .next = &freeRegionBuckets[26] }, + { .prev = &freeRegionBuckets[27], .next = &freeRegionBuckets[27] }, + { .prev = &freeRegionBuckets[28], .next = &freeRegionBuckets[28] }, + { .prev = &freeRegionBuckets[29], .next = &freeRegionBuckets[29] }, + { .prev = &freeRegionBuckets[30], .next = &freeRegionBuckets[30] }, + { .prev = &freeRegionBuckets[31], .next = &freeRegionBuckets[31] }, + { .prev = &freeRegionBuckets[32], .next = &freeRegionBuckets[32] }, + { .prev = &freeRegionBuckets[33], .next = &freeRegionBuckets[33] }, + { .prev = &freeRegionBuckets[34], .next = &freeRegionBuckets[34] }, + { .prev = &freeRegionBuckets[35], .next = &freeRegionBuckets[35] }, + { .prev = &freeRegionBuckets[36], .next = &freeRegionBuckets[36] }, + { .prev = &freeRegionBuckets[37], .next = &freeRegionBuckets[37] }, + { .prev = &freeRegionBuckets[38], .next = &freeRegionBuckets[38] }, + { .prev = &freeRegionBuckets[39], .next = &freeRegionBuckets[39] }, + { .prev = &freeRegionBuckets[40], .next = &freeRegionBuckets[40] }, + { .prev = &freeRegionBuckets[41], .next = &freeRegionBuckets[41] }, + { .prev = &freeRegionBuckets[42], .next = &freeRegionBuckets[42] }, + { .prev = &freeRegionBuckets[43], .next = &freeRegionBuckets[43] }, + { .prev = &freeRegionBuckets[44], .next = &freeRegionBuckets[44] }, + { .prev = &freeRegionBuckets[45], .next = &freeRegionBuckets[45] }, + { .prev = &freeRegionBuckets[46], .next = &freeRegionBuckets[46] }, + { .prev = &freeRegionBuckets[47], .next = &freeRegionBuckets[47] }, + { .prev = &freeRegionBuckets[48], .next = &freeRegionBuckets[48] }, + { .prev = &freeRegionBuckets[49], .next = &freeRegionBuckets[49] }, + { .prev = &freeRegionBuckets[50], .next = &freeRegionBuckets[50] }, + { .prev = &freeRegionBuckets[51], .next = &freeRegionBuckets[51] }, + { .prev = &freeRegionBuckets[52], .next = &freeRegionBuckets[52] }, + { .prev = &freeRegionBuckets[53], .next = &freeRegionBuckets[53] }, + { .prev = &freeRegionBuckets[54], .next = &freeRegionBuckets[54] }, + { .prev = &freeRegionBuckets[55], .next = &freeRegionBuckets[55] }, + { .prev = &freeRegionBuckets[56], .next = &freeRegionBuckets[56] }, + { .prev = &freeRegionBuckets[57], .next = &freeRegionBuckets[57] }, + { .prev = &freeRegionBuckets[58], .next = &freeRegionBuckets[58] }, + { .prev = &freeRegionBuckets[59], .next = &freeRegionBuckets[59] }, + { .prev = &freeRegionBuckets[60], .next = &freeRegionBuckets[60] }, + { .prev = &freeRegionBuckets[61], .next = &freeRegionBuckets[61] }, + { .prev = &freeRegionBuckets[62], .next = &freeRegionBuckets[62] }, + { .prev = &freeRegionBuckets[63], .next = &freeRegionBuckets[63] }, +}; + +// A bitmask that tracks the population status for each of the 64 distinct memory regions: +// a zero at bit position i means that the free list bucket i is empty. This bitmask is +// used to avoid redundant scanning of the 64 different free region buckets: instead by +// looking at the bitmask we can find in constant time an index to a free region bucket +// that contains free memory of desired size. +static BUCKET_BITMASK_T freeRegionBucketsUsed = 0; + +// Amount of bytes taken up by allocation header data +#define REGION_HEADER_SIZE (2*sizeof(size_t)) + +// Smallest allocation size that is possible is 2*pointer size, since payload of each region must at least contain space +// to store the free region linked list prev and next pointers. An allocation size smaller than this will be rounded up +// to this size. +#define SMALLEST_ALLOCATION_SIZE (2*sizeof(void*)) + +/* Subdivide regions of free space into distinct circular doubly linked lists, where each linked list +represents a range of free space blocks. The following function compute_free_list_bucket() converts +an allocation size to the bucket index that should be looked at. The buckets are grouped as follows: + + Bucket 0: [8, 15], range size=8 + Bucket 1: [16, 23], range size=8 + Bucket 2: [24, 31], range size=8 + Bucket 3: [32, 39], range size=8 + Bucket 4: [40, 47], range size=8 + Bucket 5: [48, 55], range size=8 + Bucket 6: [56, 63], range size=8 + Bucket 7: [64, 71], range size=8 + Bucket 8: [72, 79], range size=8 + Bucket 9: [80, 87], range size=8 + Bucket 10: [88, 95], range size=8 + Bucket 11: [96, 103], range size=8 + Bucket 12: [104, 111], range size=8 + Bucket 13: [112, 119], range size=8 + Bucket 14: [120, 159], range size=40 + Bucket 15: [160, 191], range size=32 + Bucket 16: [192, 223], range size=32 + Bucket 17: [224, 255], range size=32 + Bucket 18: [256, 319], range size=64 + Bucket 19: [320, 383], range size=64 + Bucket 20: [384, 447], range size=64 + Bucket 21: [448, 511], range size=64 + Bucket 22: [512, 639], range size=128 + Bucket 23: [640, 767], range size=128 + Bucket 24: [768, 895], range size=128 + Bucket 25: [896, 1023], range size=128 + Bucket 26: [1024, 1279], range size=256 + Bucket 27: [1280, 1535], range size=256 + Bucket 28: [1536, 1791], range size=256 + Bucket 29: [1792, 2047], range size=256 + Bucket 30: [2048, 2559], range size=512 + Bucket 31: [2560, 3071], range size=512 + Bucket 32: [3072, 3583], range size=512 + Bucket 33: [3584, 6143], range size=2560 + Bucket 34: [6144, 8191], range size=2048 + Bucket 35: [8192, 12287], range size=4096 + Bucket 36: [12288, 16383], range size=4096 + Bucket 37: [16384, 24575], range size=8192 + Bucket 38: [24576, 32767], range size=8192 + Bucket 39: [32768, 49151], range size=16384 + Bucket 40: [49152, 65535], range size=16384 + Bucket 41: [65536, 98303], range size=32768 + Bucket 42: [98304, 131071], range size=32768 + Bucket 43: [131072, 196607], range size=65536 + Bucket 44: [196608, 262143], range size=65536 + Bucket 45: [262144, 393215], range size=131072 + Bucket 46: [393216, 524287], range size=131072 + Bucket 47: [524288, 786431], range size=262144 + Bucket 48: [786432, 1048575], range size=262144 + Bucket 49: [1048576, 1572863], range size=524288 + Bucket 50: [1572864, 2097151], range size=524288 + Bucket 51: [2097152, 3145727], range size=1048576 + Bucket 52: [3145728, 4194303], range size=1048576 + Bucket 53: [4194304, 6291455], range size=2097152 + Bucket 54: [6291456, 8388607], range size=2097152 + Bucket 55: [8388608, 12582911], range size=4194304 + Bucket 56: [12582912, 16777215], range size=4194304 + Bucket 57: [16777216, 25165823], range size=8388608 + Bucket 58: [25165824, 33554431], range size=8388608 + Bucket 59: [33554432, 50331647], range size=16777216 + Bucket 60: [50331648, 67108863], range size=16777216 + Bucket 61: [67108864, 100663295], range size=33554432 + Bucket 62: [100663296, 134217727], range size=33554432 + Bucket 63: 134217728 bytes and larger. */ +static_assert(NUM_FREE_BUCKETS == 64, "Following function is tailored specifically for NUM_FREE_BUCKETS == 64 case"); +static int compute_free_list_bucket(size_t allocSize) +{ + if (allocSize < 128) return (allocSize >> 3) - 1; + int clz = __builtin_clz(allocSize); + int bucketIndex = (clz > 19) ? 110 - (clz<<2) + ((allocSize >> (29-clz)) ^ 4) : MIN(71 - (clz<<1) + ((allocSize >> (30-clz)) ^ 2), NUM_FREE_BUCKETS-1); + assert(bucketIndex >= 0); + assert(bucketIndex < NUM_FREE_BUCKETS); + return bucketIndex; +} + +#define DECODE_CEILING_SIZE(size) ((size_t)((size) & ~FREE_REGION_FLAG)) + +static Region *prev_region(Region *region) +{ + size_t prevRegionSize = ((size_t*)region)[-1]; + prevRegionSize = DECODE_CEILING_SIZE(prevRegionSize); + return (Region*)((uint8_t*)region - prevRegionSize); +} + +static Region *next_region(Region *region) +{ + return (Region*)((uint8_t*)region + region->size); +} + +static size_t region_ceiling_size(Region *region) +{ + return ((size_t*)((uint8_t*)region + region->size))[-1]; +} + +static bool region_is_free(Region *r) +{ + return region_ceiling_size(r) & FREE_REGION_FLAG; +} + +static bool region_is_in_use(Region *r) +{ + return r->size == region_ceiling_size(r); +} + +static size_t size_of_region_from_ceiling(Region *r) +{ + size_t size = region_ceiling_size(r); + return DECODE_CEILING_SIZE(size); +} + +static bool debug_region_is_consistent(Region *r) +{ + assert(r); + size_t sizeAtBottom = r->size; + size_t sizeAtCeiling = size_of_region_from_ceiling(r); + return sizeAtBottom == sizeAtCeiling; +} + +static uint8_t *region_payload_start_ptr(Region *region) +{ + return (uint8_t*)region + sizeof(size_t); +} + +static uint8_t *region_payload_end_ptr(Region *region) +{ + return (uint8_t*)region + region->size - sizeof(size_t); +} + +static void create_used_region(void *ptr, size_t size) +{ + assert(ptr); + assert(HAS_ALIGNMENT(ptr, sizeof(size_t))); + assert(HAS_ALIGNMENT(size, sizeof(size_t))); + assert(size >= sizeof(Region)); + *(size_t*)ptr = size; + ((size_t*)ptr)[(size/sizeof(size_t))-1] = size; +} + +static void create_free_region(void *ptr, size_t size) +{ + assert(ptr); + assert(HAS_ALIGNMENT(ptr, sizeof(size_t))); + assert(HAS_ALIGNMENT(size, sizeof(size_t))); + assert(size >= sizeof(Region)); + Region *freeRegion = (Region*)ptr; + freeRegion->size = size; + ((size_t*)ptr)[(size/sizeof(size_t))-1] = size | FREE_REGION_FLAG; +} + +static void prepend_to_free_list(Region *region, Region *prependTo) +{ + assert(region); + assert(prependTo); + // N.b. the region we are prepending to is always the sentinel node, + // which represents a dummy node that is technically not a free node, so + // region_is_free(prependTo) does not hold. + assert(region_is_free((Region*)region)); + region->next = prependTo; + region->prev = prependTo->prev; + assert(region->prev); + prependTo->prev = region; + region->prev->next = region; +} + +static void unlink_from_free_list(Region *region) +{ + assert(region); + assert(region_is_free((Region*)region)); + assert(region->prev); + assert(region->next); + region->prev->next = region->next; + region->next->prev = region->prev; +} + +static void link_to_free_list(Region *freeRegion) +{ + assert(freeRegion); + assert(freeRegion->size >= sizeof(Region)); + int bucketIndex = compute_free_list_bucket(freeRegion->size-REGION_HEADER_SIZE); + Region *freeListHead = freeRegionBuckets + bucketIndex; + freeRegion->prev = freeListHead; + freeRegion->next = freeListHead->next; + assert(freeRegion->next); + freeListHead->next = freeRegion; + freeRegion->next->prev = freeRegion; + freeRegionBucketsUsed |= ((BUCKET_BITMASK_T)1) << bucketIndex; +} + +#if 0 +static void dump_memory_regions() +{ + ASSERT_MALLOC_IS_ACQUIRED(); + RootRegion *root = listOfAllRegions; + MAIN_THREAD_ASYNC_EM_ASM(console.log('All memory regions:')); + while(root) + { + Region *r = (Region*)root; + assert(debug_region_is_consistent(r)); + uint8_t *lastRegionEnd = root->endPtr; + MAIN_THREAD_ASYNC_EM_ASM(console.log('Region block 0x'+($0>>>0).toString(16)+' - 0x'+($1>>>0).toString(16)+ ' ('+($2>>>0)+' bytes):'), + r, lastRegionEnd, lastRegionEnd-(uint8_t*)r); + while((uint8_t*)r < lastRegionEnd) + { + MAIN_THREAD_ASYNC_EM_ASM(console.log('Region 0x'+($0>>>0).toString(16)+', size: '+($1>>>0)+' ('+($2?"used":"--FREE--")+')'), + r, r->size, region_ceiling_size(r) == r->size); + + assert(debug_region_is_consistent(r)); + size_t sizeFromCeiling = size_of_region_from_ceiling(r); + if (sizeFromCeiling != r->size) + MAIN_THREAD_ASYNC_EM_ASM(console.log('Corrupt region! Size marker at the end of the region does not match: '+($0>>>0)), sizeFromCeiling); + if (r->size == 0) + break; + r = next_region(r); + } + root = root->next; + MAIN_THREAD_ASYNC_EM_ASM(console.log("")); + } + MAIN_THREAD_ASYNC_EM_ASM(console.log('Free regions:')); + for(int i = 0; i < NUM_FREE_BUCKETS; ++i) + { + Region *prev = &freeRegionBuckets[i]; + Region *fr = freeRegionBuckets[i].next; + while(fr != &freeRegionBuckets[i]) + { + MAIN_THREAD_ASYNC_EM_ASM(console.log('In bucket '+$0+', free region 0x'+($1>>>0).toString(16)+', size: ' + ($2>>>0) + ' (size at ceiling: '+($3>>>0)+'), prev: 0x' + ($4>>>0).toString(16) + ', next: 0x' + ($5>>>0).toString(16)), + i, fr, fr->size, size_of_region_from_ceiling(fr), fr->prev, fr->next); + assert(debug_region_is_consistent(fr)); + assert(region_is_free(fr)); + assert(fr->prev == prev); + prev = fr; + assert(fr->next != fr); + assert(fr->prev != fr); + fr = fr->next; + } + } + MAIN_THREAD_ASYNC_EM_ASM(console.log('Free bucket index map: ' + ($0>>>0).toString(2) + ' ' + ($1>>>0).toString(2)), (uint32_t)(freeRegionBucketsUsed >> 32), (uint32_t)freeRegionBucketsUsed); + MAIN_THREAD_ASYNC_EM_ASM(console.log("")); +} + +void emmalloc_dump_memory_regions() +{ + MALLOC_ACQUIRE(); + dump_memory_regions(); + MALLOC_RELEASE(); +} + +static int validate_memory_regions() +{ + ASSERT_MALLOC_IS_ACQUIRED(); + RootRegion *root = listOfAllRegions; + while(root) + { + Region *r = (Region*)root; + if (!debug_region_is_consistent(r)) + { + MAIN_THREAD_ASYNC_EM_ASM(console.error('Used region 0x'+($0>>>0).toString(16)+', size: '+($1>>>0)+' ('+($2?"used":"--FREE--")+') is corrupt (size markers in the beginning and at the end of the region do not match!)'), + r, r->size, region_ceiling_size(r) == r->size); + return 1; + } + uint8_t *lastRegionEnd = root->endPtr; + while((uint8_t*)r < lastRegionEnd) + { + if (!debug_region_is_consistent(r)) + { + MAIN_THREAD_ASYNC_EM_ASM(console.error('Used region 0x'+($0>>>0).toString(16)+', size: '+($1>>>0)+' ('+($2?"used":"--FREE--")+') is corrupt (size markers in the beginning and at the end of the region do not match!)'), + r, r->size, region_ceiling_size(r) == r->size); + return 1; + } + if (r->size == 0) + break; + r = next_region(r); + } + root = root->next; + } + for(int i = 0; i < NUM_FREE_BUCKETS; ++i) + { + Region *prev = &freeRegionBuckets[i]; + Region *fr = freeRegionBuckets[i].next; + while(fr != &freeRegionBuckets[i]) + { + if (!debug_region_is_consistent(fr) || !region_is_free(fr) || fr->prev != prev || fr->next == fr || fr->prev == fr) + { + MAIN_THREAD_ASYNC_EM_ASM(console.log('In bucket '+$0+', free region 0x'+($1>>>0).toString(16)+', size: ' + ($2>>>0) + ' (size at ceiling: '+($3>>>0)+'), prev: 0x' + ($4>>>0).toString(16) + ', next: 0x' + ($5>>>0).toString(16) + ' is corrupt!'), + i, fr, fr->size, size_of_region_from_ceiling(fr), fr->prev, fr->next); + return 1; + } + prev = fr; + fr = fr->next; + } + } + return 0; +} + +int emmalloc_validate_memory_regions() +{ + MALLOC_ACQUIRE(); + int memoryError = validate_memory_regions(); + MALLOC_RELEASE(); + return memoryError; +} +#endif + +static bool claim_more_memory(size_t numBytes) +{ +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('claim_more_memory(numBytes='+($0>>>0)+ ')'), numBytes); +#endif + +#ifdef EMMALLOC_MEMVALIDATE + validate_memory_regions(); +#endif + + uint8_t *startPtr; + uint8_t *endPtr; + do { + // If this is the first time we're called, see if we can use + // the initial heap memory set up by wasm-ld. + if (!listOfAllRegions) { + unsigned char *heap_end = sbrk(0); + if (numBytes <= (size_t)(heap_end - &__heap_base)) { + startPtr = &__heap_base; + endPtr = heap_end; + break; + } + } + + // Round numBytes up to the nearest page size. + numBytes = (numBytes + (PAGE_SIZE-1)) & -PAGE_SIZE; + + // Claim memory via sbrk + startPtr = (uint8_t*)sbrk(numBytes); + if ((intptr_t)startPtr == -1) + { +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.error('claim_more_memory: sbrk failed!')); +#endif + return false; + } +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('claim_more_memory: claimed 0x' + ($0>>>0).toString(16) + ' - 0x' + ($1>>>0).toString(16) + ' (' + ($2>>>0) + ' bytes) via sbrk()'), startPtr, startPtr + numBytes, numBytes); +#endif + assert(HAS_ALIGNMENT(startPtr, alignof(size_t))); + endPtr = startPtr + numBytes; + } while (0); + + // Create a sentinel region at the end of the new heap block + Region *endSentinelRegion = (Region*)(endPtr - sizeof(Region)); + create_used_region(endSentinelRegion, sizeof(Region)); + + // If we are the sole user of sbrk(), it will feed us continuous/consecutive memory addresses - take advantage + // of that if so: instead of creating two disjoint memory regions blocks, expand the previous one to a larger size. + uint8_t *previousSbrkEndAddress = listOfAllRegions ? listOfAllRegions->endPtr : 0; + if (startPtr == previousSbrkEndAddress) + { + Region *prevEndSentinel = prev_region((Region*)startPtr); + assert(debug_region_is_consistent(prevEndSentinel)); + assert(region_is_in_use(prevEndSentinel)); + Region *prevRegion = prev_region(prevEndSentinel); + assert(debug_region_is_consistent(prevRegion)); + + listOfAllRegions->endPtr = endPtr; + + // Two scenarios, either the last region of the previous block was in use, in which case we need to create + // a new free region in the newly allocated space; or it was free, in which case we can extend that region + // to cover a larger size. + if (region_is_free(prevRegion)) + { + size_t newFreeRegionSize = (uint8_t*)endSentinelRegion - (uint8_t*)prevRegion; + unlink_from_free_list(prevRegion); + create_free_region(prevRegion, newFreeRegionSize); + link_to_free_list(prevRegion); + return true; + } + // else: last region of the previous block was in use. Since we are joining two consecutive sbrk() blocks, + // we can swallow the end sentinel of the previous block away. + startPtr -= sizeof(Region); + } + else + { + // Create a root region at the start of the heap block + create_used_region(startPtr, sizeof(Region)); + + // Dynamic heap start region: + RootRegion *newRegionBlock = (RootRegion*)startPtr; + newRegionBlock->next = listOfAllRegions; // Pointer to next region block head + newRegionBlock->endPtr = endPtr; // Pointer to the end address of this region block + listOfAllRegions = newRegionBlock; + startPtr += sizeof(Region); + } + + // Create a new memory region for the new claimed free space. + create_free_region(startPtr, (uint8_t*)endSentinelRegion - startPtr); + link_to_free_list((Region*)startPtr); + return true; +} + +#if 0 +// Initialize emmalloc during static initialization. +// See system/lib/README.md for static constructor ordering. +__attribute__((constructor(47))) +static void initialize_emmalloc_heap() +{ + // Initialize circular doubly linked lists representing free space + // Never useful to unroll this for loop, just takes up code size. +#pragma clang loop unroll(disable) + for(int i = 0; i < NUM_FREE_BUCKETS; ++i) + freeRegionBuckets[i].prev = freeRegionBuckets[i].next = &freeRegionBuckets[i]; + +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('initialize_emmalloc_heap()')); +#endif + + // Start with a tiny dynamic region. + claim_more_memory(3*sizeof(Region)); +} + +void emmalloc_blank_slate_from_orbit() +{ + MALLOC_ACQUIRE(); + listOfAllRegions = NULL; + freeRegionBucketsUsed = 0; + initialize_emmalloc_heap(); + MALLOC_RELEASE(); +} +#endif + +static void *attempt_allocate(Region *freeRegion, size_t alignment, size_t size) +{ + ASSERT_MALLOC_IS_ACQUIRED(); + assert(freeRegion); + // Look at the next potential free region to allocate into. + // First, we should check if the free region has enough of payload bytes contained + // in it to accommodate the new allocation. This check needs to take account the + // requested allocation alignment, so the payload memory area needs to be rounded + // upwards to the desired alignment. + uint8_t *payloadStartPtr = region_payload_start_ptr(freeRegion); + uint8_t *payloadStartPtrAligned = ALIGN_UP(payloadStartPtr, alignment); + uint8_t *payloadEndPtr = region_payload_end_ptr(freeRegion); + + // Do we have enough free space, taking into account alignment? + if (payloadStartPtrAligned + size > payloadEndPtr) + return NULL; + + // We have enough free space, so the memory allocation will be made into this region. Remove this free region + // from the list of free regions: whatever slop remains will be later added back to the free region pool. + unlink_from_free_list(freeRegion); + + // Before we proceed further, fix up the boundary of this region and the region that precedes this one, + // so that the boundary between the two regions happens at a right spot for the payload to be aligned. + if (payloadStartPtr != payloadStartPtrAligned) + { + Region *prevRegion = prev_region((Region*)freeRegion); + // We never have two free regions adjacent to each other, so the region before this free + // region should be in use. + assert(region_is_in_use(prevRegion)); + size_t regionBoundaryBumpAmount = payloadStartPtrAligned - payloadStartPtr; + size_t newThisRegionSize = freeRegion->size - regionBoundaryBumpAmount; + create_used_region(prevRegion, prevRegion->size + regionBoundaryBumpAmount); + freeRegion = (Region *)((uint8_t*)freeRegion + regionBoundaryBumpAmount); + freeRegion->size = newThisRegionSize; + } + // Next, we need to decide whether this region is so large that it should be split into two regions, + // one representing the newly used memory area, and at the high end a remaining leftover free area. + // This splitting to two is done always if there is enough space for the high end to fit a region. + // Carve 'size' bytes of payload off this region. So, + // [sz prev next sz] + // becomes + // [sz payload sz] [sz prev next sz] + if (sizeof(Region) + REGION_HEADER_SIZE + size <= freeRegion->size) + { + // There is enough space to keep a free region at the end of the carved out block + // -> construct the new block + Region *newFreeRegion = (Region *)((uint8_t*)freeRegion + REGION_HEADER_SIZE + size); + create_free_region(newFreeRegion, freeRegion->size - size - REGION_HEADER_SIZE); + link_to_free_list(newFreeRegion); + + // Recreate the resized Region under its new size. + create_used_region(freeRegion, size + REGION_HEADER_SIZE); + } + else + { + // There is not enough space to split the free memory region into used+free parts, so consume the whole + // region as used memory, not leaving a free memory region behind. + // Initialize the free region as used by resetting the ceiling size to the same value as the size at bottom. + ((size_t*)((uint8_t*)freeRegion + freeRegion->size))[-1] = freeRegion->size; + } + +#ifdef __EMSCRIPTEN_TRACING__ + emscripten_trace_record_allocation(freeRegion, freeRegion->size); +#endif + +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('attempt_allocate - succeeded allocating memory, region ptr=0x' + ($0>>>0).toString(16) + ', align=' + $1 + ', payload size=' + ($2>>>0) + ' bytes)'), freeRegion, alignment, size); +#endif + + return (uint8_t*)freeRegion + sizeof(size_t); +} + +static size_t validate_alloc_alignment(size_t alignment) +{ + // Cannot perform allocations that are less than 4 byte aligned, because the Region + // control structures need to be aligned. Also round up to minimum outputted alignment. + alignment = MAX(alignment, MALLOC_ALIGNMENT); + // Arbitrary upper limit on alignment - very likely a programming bug if alignment is higher than this. + assert(alignment <= 1024*1024); + return alignment; +} + +static size_t validate_alloc_size(size_t size) +{ + assert(size + REGION_HEADER_SIZE > size); + + // Allocation sizes must be a multiple of pointer sizes, and at least 2*sizeof(pointer). + size_t validatedSize = size > SMALLEST_ALLOCATION_SIZE ? (size_t)ALIGN_UP(size, sizeof(Region*)) : SMALLEST_ALLOCATION_SIZE; + assert(validatedSize >= size); // 32-bit wraparound should not occur, too large sizes should be stopped before + + return validatedSize; +} + +static void *allocate_memory(size_t alignment, size_t size) +{ + ASSERT_MALLOC_IS_ACQUIRED(); + +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('allocate_memory(align=' + $0 + ', size=' + ($1>>>0) + ' bytes)'), alignment, size); +#endif + +#ifdef EMMALLOC_MEMVALIDATE + validate_memory_regions(); +#endif + + if (!IS_POWER_OF_2(alignment)) + { +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('Allocation failed: alignment not power of 2!')); +#endif + return 0; + } + + if (size > MAX_ALLOC_SIZE) + { +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('Allocation failed: attempted allocation size is too large: ' + ($0 >>> 0) + 'bytes! (negative integer wraparound?)'), size); +#endif + return 0; + } + + alignment = validate_alloc_alignment(alignment); + size = validate_alloc_size(size); + + // Attempt to allocate memory starting from smallest bucket that can contain the required amount of memory. + // Under normal alignment conditions this should always be the first or second bucket we look at, but if + // performing an allocation with complex alignment, we may need to look at multiple buckets. + int bucketIndex = compute_free_list_bucket(size); + BUCKET_BITMASK_T bucketMask = freeRegionBucketsUsed >> bucketIndex; + + // Loop through each bucket that has free regions in it, based on bits set in freeRegionBucketsUsed bitmap. + while(bucketMask) + { + BUCKET_BITMASK_T indexAdd = __builtin_ctzll(bucketMask); + bucketIndex += indexAdd; + bucketMask >>= indexAdd; + assert(bucketIndex >= 0); + assert(bucketIndex <= NUM_FREE_BUCKETS-1); + assert(freeRegionBucketsUsed & (((BUCKET_BITMASK_T)1) << bucketIndex)); + + Region *freeRegion = freeRegionBuckets[bucketIndex].next; + assert(freeRegion); + if (freeRegion != &freeRegionBuckets[bucketIndex]) + { + void *ptr = attempt_allocate(freeRegion, alignment, size); + if (ptr) + return ptr; + + // We were not able to allocate from the first region found in this bucket, so penalize + // the region by cycling it to the end of the doubly circular linked list. (constant time) + // This provides a randomized guarantee that when performing allocations of size k to a + // bucket of [k-something, k+something] range, we will not always attempt to satisfy the + // allocation from the same available region at the front of the list, but we try each + // region in turn. + unlink_from_free_list(freeRegion); + prepend_to_free_list(freeRegion, &freeRegionBuckets[bucketIndex]); + // But do not stick around to attempt to look at other regions in this bucket - move + // to search the next populated bucket index if this did not fit. This gives a practical + // "allocation in constant time" guarantee, since the next higher bucket will only have + // regions that are all of strictly larger size than the requested allocation. Only if + // there is a difficult alignment requirement we may fail to perform the allocation from + // a region in the next bucket, and if so, we keep trying higher buckets until one of them + // works. + ++bucketIndex; + bucketMask >>= 1; + } + else + { + // This bucket was not populated after all with any regions, + // but we just had a stale bit set to mark a populated bucket. + // Reset the bit to update latest status so that we do not + // redundantly look at this bucket again. + freeRegionBucketsUsed &= ~(((BUCKET_BITMASK_T)1) << bucketIndex); + bucketMask ^= 1; + } + // Instead of recomputing bucketMask from scratch at the end of each loop, it is updated as we go, + // to avoid undefined behavior with (x >> 32)/(x >> 64) when bucketIndex reaches 32/64, (the shift would comes out as a no-op instead of 0). + + assert((bucketIndex == NUM_FREE_BUCKETS && bucketMask == 0) || (bucketMask == freeRegionBucketsUsed >> bucketIndex)); + } + + // None of the buckets were able to accommodate an allocation. If this happens we are almost out of memory. + // The largest bucket might contain some suitable regions, but we only looked at one region in that bucket, so + // as a last resort, loop through more free regions in the bucket that represents the largest allocations available. + // But only if the bucket representing largest allocations available is not any of the first thirty buckets, + // these represent allocatable areas less than <1024 bytes - which could be a lot of scrap. + // In such case, prefer to sbrk() in more memory right away. + int largestBucketIndex = NUM_FREE_BUCKETS - 1 - __builtin_clzll(freeRegionBucketsUsed); + // freeRegion will be null if there is absolutely no memory left. (all buckets are 100% used) + Region *freeRegion = freeRegionBucketsUsed ? freeRegionBuckets[largestBucketIndex].next : 0; + if (freeRegionBucketsUsed >> 30) + { + // Look only at a constant number of regions in this bucket max, to avoid bad worst case behavior. + // If this many regions cannot find free space, we give up and prefer to sbrk() more instead. + const int maxRegionsToTryBeforeGivingUp = 99; + int numTriesLeft = maxRegionsToTryBeforeGivingUp; + while(freeRegion != &freeRegionBuckets[largestBucketIndex] && numTriesLeft-- > 0) + { + void *ptr = attempt_allocate(freeRegion, alignment, size); + if (ptr) + return ptr; + freeRegion = freeRegion->next; + } + } + + // We were unable to find a free memory region. Must sbrk() in more memory! + size_t numBytesToClaim = size+sizeof(Region)*3; + assert(numBytesToClaim > size); // 32-bit wraparound should not happen here, allocation size has been validated above! + bool success = claim_more_memory(numBytesToClaim); + if (success) + return allocate_memory(alignment, size); // Recurse back to itself to try again + + // also sbrk() failed, we are really really constrained :( As a last resort, go back to looking at the + // bucket we already looked at above, continuing where the above search left off - perhaps there are + // regions we overlooked the first time that might be able to satisfy the allocation. + if (freeRegion) + { + while(freeRegion != &freeRegionBuckets[largestBucketIndex]) + { + void *ptr = attempt_allocate(freeRegion, alignment, size); + if (ptr) + return ptr; + freeRegion = freeRegion->next; + } + } + +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('Could not find a free memory block!')); +#endif + + return 0; +} + +static +void *emmalloc_memalign(size_t alignment, size_t size) +{ + MALLOC_ACQUIRE(); + void *ptr = allocate_memory(alignment, size); + MALLOC_RELEASE(); + return ptr; +} + +#if 0 +void * EMMALLOC_EXPORT memalign(size_t alignment, size_t size) +{ + return emmalloc_memalign(alignment, size); +} +#endif + +void * EMMALLOC_EXPORT aligned_alloc(size_t alignment, size_t size) +{ + if ((alignment % sizeof(void *) != 0) || (size % alignment) != 0) + return 0; + return emmalloc_memalign(alignment, size); +} + +static +void *emmalloc_malloc(size_t size) +{ + return emmalloc_memalign(MALLOC_ALIGNMENT, size); +} + +void * EMMALLOC_EXPORT malloc(size_t size) +{ + return emmalloc_malloc(size); +} + +static +size_t emmalloc_usable_size(void *ptr) +{ + if (!ptr) + return 0; + + uint8_t *regionStartPtr = (uint8_t*)ptr - sizeof(size_t); + Region *region = (Region*)(regionStartPtr); + assert(HAS_ALIGNMENT(region, sizeof(size_t))); + + MALLOC_ACQUIRE(); + + size_t size = region->size; + assert(size >= sizeof(Region)); + assert(region_is_in_use(region)); + + MALLOC_RELEASE(); + + return size - REGION_HEADER_SIZE; +} + +size_t EMMALLOC_EXPORT malloc_usable_size(void *ptr) +{ + return emmalloc_usable_size(ptr); +} + +static +void emmalloc_free(void *ptr) +{ +#ifdef EMMALLOC_MEMVALIDATE + emmalloc_validate_memory_regions(); +#endif + + if (!ptr) + return; + +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('free(ptr=0x'+($0>>>0).toString(16)+')'), ptr); +#endif + + uint8_t *regionStartPtr = (uint8_t*)ptr - sizeof(size_t); + Region *region = (Region*)(regionStartPtr); + assert(HAS_ALIGNMENT(region, sizeof(size_t))); + + MALLOC_ACQUIRE(); + + size_t size = region->size; +#ifdef EMMALLOC_VERBOSE + if (size < sizeof(Region) || !region_is_in_use(region)) + { + if (debug_region_is_consistent(region)) + // LLVM wasm backend bug: cannot use MAIN_THREAD_ASYNC_EM_ASM() here, that generates internal compiler error + // Reproducible by running e.g. other.test_alloc_3GB + EM_ASM(console.error('Double free at region ptr 0x' + ($0>>>0).toString(16) + ', region->size: 0x' + ($1>>>0).toString(16) + ', region->sizeAtCeiling: 0x' + ($2>>>0).toString(16) + ')'), region, size, region_ceiling_size(region)); + else + MAIN_THREAD_ASYNC_EM_ASM(console.error('Corrupt region at region ptr 0x' + ($0>>>0).toString(16) + ' region->size: 0x' + ($1>>>0).toString(16) + ', region->sizeAtCeiling: 0x' + ($2>>>0).toString(16) + ')'), region, size, region_ceiling_size(region)); + } +#endif + assert(size >= sizeof(Region)); + assert(region_is_in_use(region)); + +#ifdef __EMSCRIPTEN_TRACING__ + emscripten_trace_record_free(region); +#endif + + // Check merging with left side + size_t prevRegionSizeField = ((size_t*)region)[-1]; + size_t prevRegionSize = prevRegionSizeField & ~FREE_REGION_FLAG; + if (prevRegionSizeField != prevRegionSize) // Previous region is free? + { + Region *prevRegion = (Region*)((uint8_t*)region - prevRegionSize); + assert(debug_region_is_consistent(prevRegion)); + unlink_from_free_list(prevRegion); + regionStartPtr = (uint8_t*)prevRegion; + size += prevRegionSize; + } + + // Check merging with right side + Region *nextRegion = next_region(region); + assert(debug_region_is_consistent(nextRegion)); + size_t sizeAtEnd = *(size_t*)region_payload_end_ptr(nextRegion); + if (nextRegion->size != sizeAtEnd) + { + unlink_from_free_list(nextRegion); + size += nextRegion->size; + } + + create_free_region(regionStartPtr, size); + link_to_free_list((Region*)regionStartPtr); + + MALLOC_RELEASE(); + +#ifdef EMMALLOC_MEMVALIDATE + emmalloc_validate_memory_regions(); +#endif +} + +void EMMALLOC_EXPORT free(void *ptr) +{ + emmalloc_free(ptr); +} + +// Can be called to attempt to increase or decrease the size of the given region +// to a new size (in-place). Returns 1 if resize succeeds, and 0 on failure. +static int attempt_region_resize(Region *region, size_t size) +{ + ASSERT_MALLOC_IS_ACQUIRED(); + assert(size > 0); + assert(HAS_ALIGNMENT(size, sizeof(size_t))); + +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('attempt_region_resize(region=0x' + ($0>>>0).toString(16) + ', size=' + ($1>>>0) + ' bytes)'), region, size); +#endif + + // First attempt to resize this region, if the next region that follows this one + // is a free region. + Region *nextRegion = next_region(region); + uint8_t *nextRegionEndPtr = (uint8_t*)nextRegion + nextRegion->size; + size_t sizeAtCeiling = ((size_t*)nextRegionEndPtr)[-1]; + if (nextRegion->size != sizeAtCeiling) // Next region is free? + { + assert(region_is_free(nextRegion)); + uint8_t *newNextRegionStartPtr = (uint8_t*)region + size; + assert(HAS_ALIGNMENT(newNextRegionStartPtr, sizeof(size_t))); + // Next region does not shrink to too small size? + if (newNextRegionStartPtr + sizeof(Region) <= nextRegionEndPtr) + { + unlink_from_free_list(nextRegion); + create_free_region(newNextRegionStartPtr, nextRegionEndPtr - newNextRegionStartPtr); + link_to_free_list((Region*)newNextRegionStartPtr); + create_used_region(region, newNextRegionStartPtr - (uint8_t*)region); + return 1; + } + // If we remove the next region altogether, allocation is satisfied? + if (newNextRegionStartPtr <= nextRegionEndPtr) + { + unlink_from_free_list(nextRegion); + create_used_region(region, region->size + nextRegion->size); + return 1; + } + } + else + { + // Next region is an used region - we cannot change its starting address. However if we are shrinking the + // size of this region, we can create a new free region between this and the next used region. + if (size + sizeof(Region) <= region->size) + { + size_t freeRegionSize = region->size - size; + create_used_region(region, size); + Region *freeRegion = (Region *)((uint8_t*)region + size); + create_free_region(freeRegion, freeRegionSize); + link_to_free_list(freeRegion); + return 1; + } + else if (size <= region->size) + { + // Caller was asking to shrink the size, but due to not being able to fit a full Region in the shrunk + // area, we cannot actually do anything. This occurs if the shrink amount is really small. In such case, + // just call it success without doing any work. + return 1; + } + } +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('attempt_region_resize failed.')); +#endif + return 0; +} + +static int acquire_and_attempt_region_resize(Region *region, size_t size) +{ + MALLOC_ACQUIRE(); + int success = attempt_region_resize(region, size); + MALLOC_RELEASE(); + return success; +} + +static +void *emmalloc_aligned_realloc(void *ptr, size_t alignment, size_t size) +{ +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('aligned_realloc(ptr=0x' + ($0>>>0).toString(16) + ', alignment=' + $1 + ', size=' + ($2>>>0)), ptr, alignment, size); +#endif + + if (!ptr) + return emmalloc_memalign(alignment, size); + + if (size == 0) + { + free(ptr); + return 0; + } + + if (size > MAX_ALLOC_SIZE) + { +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('Allocation failed: attempted allocation size is too large: ' + ($0 >>> 0) + 'bytes! (negative integer wraparound?)'), size); +#endif + return 0; + } + + assert(IS_POWER_OF_2(alignment)); + // aligned_realloc() cannot be used to ask to change the alignment of a pointer. + assert(HAS_ALIGNMENT(ptr, alignment)); + size = validate_alloc_size(size); + + // Calculate the region start address of the original allocation + Region *region = (Region*)((uint8_t*)ptr - sizeof(size_t)); + + // First attempt to resize the given region to avoid having to copy memory around + if (acquire_and_attempt_region_resize(region, size + REGION_HEADER_SIZE)) + { +#ifdef __EMSCRIPTEN_TRACING__ + emscripten_trace_record_reallocation(ptr, ptr, size); +#endif + return ptr; + } + + // If resize failed, we must allocate a new region, copy the data over, and then + // free the old region. + void *newptr = emmalloc_memalign(alignment, size); + if (newptr) + { + memcpy(newptr, ptr, MIN(size, region->size - REGION_HEADER_SIZE)); + free(ptr); + } + // N.B. If there is not enough memory, the old memory block should not be freed and + // null pointer is returned. + return newptr; +} + +#if 0 +void * EMMALLOC_EXPORT aligned_realloc(void *ptr, size_t alignment, size_t size) +{ + return emmalloc_aligned_realloc(ptr, alignment, size); +} +#endif + +#if 0 +// realloc_try() is like realloc(), but only attempts to try to resize the existing memory +// area. If resizing the existing memory area fails, then realloc_try() will return 0 +// (the original memory block is not freed or modified). If resizing succeeds, previous +// memory contents will be valid up to min(old length, new length) bytes. +void *emmalloc_realloc_try(void *ptr, size_t size) +{ + if (!ptr) + return 0; + + if (size == 0) + { + free(ptr); + return 0; + } + + if (size > MAX_ALLOC_SIZE) + { +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('Allocation failed: attempted allocation size is too large: ' + ($0 >>> 0) + 'bytes! (negative integer wraparound?)'), size); +#endif + return 0; + } + + size = validate_alloc_size(size); + + // Calculate the region start address of the original allocation + Region *region = (Region*)((uint8_t*)ptr - sizeof(size_t)); + + // Attempt to resize the given region to avoid having to copy memory around + int success = acquire_and_attempt_region_resize(region, size + REGION_HEADER_SIZE); +#ifdef __EMSCRIPTEN_TRACING__ + if (success) + emscripten_trace_record_reallocation(ptr, ptr, size); +#endif + return success ? ptr : 0; +} + +// emmalloc_aligned_realloc_uninitialized() is like aligned_realloc(), but old memory contents +// will be undefined after reallocation. (old memory is not preserved in any case) +void *emmalloc_aligned_realloc_uninitialized(void *ptr, size_t alignment, size_t size) +{ + if (!ptr) + return emmalloc_memalign(alignment, size); + + if (size == 0) + { + free(ptr); + return 0; + } + + if (size > MAX_ALLOC_SIZE) + { +#ifdef EMMALLOC_VERBOSE + MAIN_THREAD_ASYNC_EM_ASM(console.log('Allocation failed: attempted allocation size is too large: ' + ($0 >>> 0) + 'bytes! (negative integer wraparound?)'), size); +#endif + return 0; + } + + size = validate_alloc_size(size); + + // Calculate the region start address of the original allocation + Region *region = (Region*)((uint8_t*)ptr - sizeof(size_t)); + + // First attempt to resize the given region to avoid having to copy memory around + if (acquire_and_attempt_region_resize(region, size + REGION_HEADER_SIZE)) + { +#ifdef __EMSCRIPTEN_TRACING__ + emscripten_trace_record_reallocation(ptr, ptr, size); +#endif + return ptr; + } + + // If resize failed, drop the old region and allocate a new region. Memory is not + // copied over + free(ptr); + return emmalloc_memalign(alignment, size); +} +#endif + +static +void *emmalloc_realloc(void *ptr, size_t size) +{ + return emmalloc_aligned_realloc(ptr, MALLOC_ALIGNMENT, size); +} + +void * EMMALLOC_EXPORT realloc(void *ptr, size_t size) +{ + return emmalloc_realloc(ptr, size); +} + +#if 0 +// realloc_uninitialized() is like realloc(), but old memory contents +// will be undefined after reallocation. (old memory is not preserved in any case) +void *emmalloc_realloc_uninitialized(void *ptr, size_t size) +{ + return emmalloc_aligned_realloc_uninitialized(ptr, MALLOC_ALIGNMENT, size); +} +#endif + +static +int emmalloc_posix_memalign(void **memptr, size_t alignment, size_t size) +{ + assert(memptr); + if (alignment % sizeof(void *) != 0) + return 22/* EINVAL*/; + *memptr = emmalloc_memalign(alignment, size); + return *memptr ? 0 : 12/*ENOMEM*/; +} + +int EMMALLOC_EXPORT posix_memalign(void **memptr, size_t alignment, size_t size) +{ + return emmalloc_posix_memalign(memptr, alignment, size); +} + +static +void *emmalloc_calloc(size_t num, size_t size) +{ + size_t bytes = num*size; + void *ptr = emmalloc_memalign(MALLOC_ALIGNMENT, bytes); + if (ptr) + memset(ptr, 0, bytes); + return ptr; +} + +void * EMMALLOC_EXPORT calloc(size_t num, size_t size) +{ + return emmalloc_calloc(num, size); +} + +#if 0 +static int count_linked_list_size(Region *list) +{ + int size = 1; + for(Region *i = list->next; i != list; list = list->next) + ++size; + return size; +} + +static size_t count_linked_list_space(Region *list) +{ + size_t space = 0; + for(Region *i = list->next; i != list; list = list->next) + space += region_payload_end_ptr(i) - region_payload_start_ptr(i); + return space; +} + +struct mallinfo emmalloc_mallinfo() +{ + MALLOC_ACQUIRE(); + + struct mallinfo info; + // Non-mmapped space allocated (bytes): For emmalloc, + // let's define this as the difference between heap size and dynamic top end. + info.arena = emscripten_get_heap_size() - (size_t)sbrk(0); + // Number of "ordinary" blocks. Let's define this as the number of highest + // size blocks. (subtract one from each, since there is a sentinel node in each list) + info.ordblks = count_linked_list_size(&freeRegionBuckets[NUM_FREE_BUCKETS-1])-1; + // Number of free "fastbin" blocks. For emmalloc, define this as the number + // of blocks that are not in the largest pristine block. + info.smblks = 0; + // The total number of bytes in free "fastbin" blocks. + info.fsmblks = 0; + for(int i = 0; i < NUM_FREE_BUCKETS-1; ++i) + { + info.smblks += count_linked_list_size(&freeRegionBuckets[i])-1; + info.fsmblks += count_linked_list_space(&freeRegionBuckets[i]); + } + + info.hblks = 0; // Number of mmapped regions: always 0. (no mmap support) + info.hblkhd = 0; // Amount of bytes in mmapped regions: always 0. (no mmap support) + + // Walk through all the heap blocks to report the following data: + // The "highwater mark" for allocated space—that is, the maximum amount of + // space that was ever allocated. Emmalloc does not want to pay code to + // track this, so this is only reported from current allocation data, and + // may not be accurate. + info.usmblks = 0; + info.uordblks = 0; // The total number of bytes used by in-use allocations. + info.fordblks = 0; // The total number of bytes in free blocks. + // The total amount of releasable free space at the top of the heap. + // This is the maximum number of bytes that could ideally be released by malloc_trim(3). + Region *lastActualRegion = prev_region((Region*)(listOfAllRegions->endPtr - sizeof(Region))); + info.keepcost = region_is_free(lastActualRegion) ? lastActualRegion->size : 0; + + RootRegion *root = listOfAllRegions; + while(root) + { + Region *r = (Region*)root; + assert(debug_region_is_consistent(r)); + uint8_t *lastRegionEnd = root->endPtr; + while((uint8_t*)r < lastRegionEnd) + { + assert(debug_region_is_consistent(r)); + + if (region_is_free(r)) + { + // Count only the payload of the free block towards free memory. + info.fordblks += region_payload_end_ptr(r) - region_payload_start_ptr(r); + // But the header data of the free block goes towards used memory. + info.uordblks += REGION_HEADER_SIZE; + } + else + { + info.uordblks += r->size; + } + // Update approximate watermark data + info.usmblks = MAX(info.usmblks, (intptr_t)r + r->size); + + if (r->size == 0) + break; + r = next_region(r); + } + root = root->next; + } + + MALLOC_RELEASE(); + return info; +} + +struct mallinfo EMMALLOC_EXPORT mallinfo() +{ + return emmalloc_mallinfo(); +} + +// Note! This function is not fully multithreadin safe: while this function is running, other threads should not be +// allowed to call sbrk()! +static int trim_dynamic_heap_reservation(size_t pad) +{ + ASSERT_MALLOC_IS_ACQUIRED(); + + if (!listOfAllRegions) + return 0; // emmalloc is not controlling any dynamic memory at all - cannot release memory. + uint8_t *previousSbrkEndAddress = listOfAllRegions->endPtr; + assert(sbrk(0) == previousSbrkEndAddress); + size_t lastMemoryRegionSize = ((size_t*)previousSbrkEndAddress)[-1]; + assert(lastMemoryRegionSize == 16); // // The last memory region should be a sentinel node of exactly 16 bytes in size. + Region *endSentinelRegion = (Region*)(previousSbrkEndAddress - sizeof(Region)); + Region *lastActualRegion = prev_region(endSentinelRegion); + + // Round padding up to multiple of 4 bytes to keep sbrk() and memory region alignment intact. + // Also have at least 8 bytes of payload so that we can form a full free region. + size_t newRegionSize = (size_t)ALIGN_UP(pad, 4); + if (pad > 0) + newRegionSize += sizeof(Region) - (newRegionSize - pad); + + if (!region_is_free(lastActualRegion) || lastActualRegion->size <= newRegionSize) + return 0; // Last actual region is in use, or caller desired to leave more free memory intact than there is. + + // This many bytes will be shrunk away. + size_t shrinkAmount = lastActualRegion->size - newRegionSize; + assert(HAS_ALIGNMENT(shrinkAmount, 4)); + + unlink_from_free_list(lastActualRegion); + // If pad == 0, we should delete the last free region altogether. If pad > 0, + // shrink the last free region to the desired size. + if (newRegionSize > 0) + { + create_free_region(lastActualRegion, newRegionSize); + link_to_free_list(lastActualRegion); + } + + // Recreate the sentinel region at the end of the last free region + endSentinelRegion = (Region*)((uint8_t*)lastActualRegion + newRegionSize); + create_used_region(endSentinelRegion, sizeof(Region)); + + // And update the size field of the whole region block. + listOfAllRegions->endPtr = (uint8_t*)endSentinelRegion + sizeof(Region); + + // Finally call sbrk() to shrink the memory area. + void *oldSbrk = sbrk(-(intptr_t)shrinkAmount); + assert((intptr_t)oldSbrk != -1); // Shrinking with sbrk() should never fail. + assert(oldSbrk == previousSbrkEndAddress); // Another thread should not have raced to increase sbrk() on us! + + // All successful, and we actually trimmed memory! + return 1; +} + +int emmalloc_trim(size_t pad) +{ + MALLOC_ACQUIRE(); + int success = trim_dynamic_heap_reservation(pad); + MALLOC_RELEASE(); + return success; +} + +int EMMALLOC_EXPORT malloc_trim(size_t pad) +{ + return emmalloc_trim(pad); +} + +size_t emmalloc_dynamic_heap_size() +{ + size_t dynamicHeapSize = 0; + + MALLOC_ACQUIRE(); + RootRegion *root = listOfAllRegions; + while(root) + { + dynamicHeapSize += root->endPtr - (uint8_t*)root; + root = root->next; + } + MALLOC_RELEASE(); + return dynamicHeapSize; +} + +size_t emmalloc_free_dynamic_memory() +{ + size_t freeDynamicMemory = 0; + + int bucketIndex = 0; + + MALLOC_ACQUIRE(); + BUCKET_BITMASK_T bucketMask = freeRegionBucketsUsed; + + // Loop through each bucket that has free regions in it, based on bits set in freeRegionBucketsUsed bitmap. + while(bucketMask) + { + BUCKET_BITMASK_T indexAdd = __builtin_ctzll(bucketMask); + bucketIndex += indexAdd; + bucketMask >>= indexAdd; + for(Region *freeRegion = freeRegionBuckets[bucketIndex].next; + freeRegion != &freeRegionBuckets[bucketIndex]; + freeRegion = freeRegion->next) + { + freeDynamicMemory += freeRegion->size - REGION_HEADER_SIZE; + } + ++bucketIndex; + bucketMask >>= 1; + } + MALLOC_RELEASE(); + return freeDynamicMemory; +} + +size_t emmalloc_compute_free_dynamic_memory_fragmentation_map(size_t freeMemorySizeMap[32]) +{ + memset((void*)freeMemorySizeMap, 0, sizeof(freeMemorySizeMap[0])*32); + + size_t numFreeMemoryRegions = 0; + int bucketIndex = 0; + MALLOC_ACQUIRE(); + BUCKET_BITMASK_T bucketMask = freeRegionBucketsUsed; + + // Loop through each bucket that has free regions in it, based on bits set in freeRegionBucketsUsed bitmap. + while(bucketMask) + { + BUCKET_BITMASK_T indexAdd = __builtin_ctzll(bucketMask); + bucketIndex += indexAdd; + bucketMask >>= indexAdd; + for(Region *freeRegion = freeRegionBuckets[bucketIndex].next; + freeRegion != &freeRegionBuckets[bucketIndex]; + freeRegion = freeRegion->next) + { + ++numFreeMemoryRegions; + size_t freeDynamicMemory = freeRegion->size - REGION_HEADER_SIZE; + if (freeDynamicMemory > 0) + ++freeMemorySizeMap[31-__builtin_clz(freeDynamicMemory)]; + else + ++freeMemorySizeMap[0]; + } + ++bucketIndex; + bucketMask >>= 1; + } + MALLOC_RELEASE(); + return numFreeMemoryRegions; +} + +size_t emmalloc_unclaimed_heap_memory(void) { + return emscripten_get_heap_max() - (size_t)sbrk(0); +} +#endif + +// Define these to satisfy musl references. +void *__libc_malloc(size_t) __attribute__((alias("malloc"))); +void __libc_free(void *) __attribute__((alias("free"))); +void *__libc_calloc(size_t nmemb, size_t size) __attribute__((alias("calloc"))); diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/common/errno.h b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/common/errno.h deleted file mode 100644 index 7d178fbc56..0000000000 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/common/errno.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) 2015-2016 Nuxi, https://nuxi.nl/ -// -// SPDX-License-Identifier: BSD-2-Clause - -#ifndef COMMON_ERRNO_H -#define COMMON_ERRNO_H - -#include <wasi/api.h> - -// WASI syscalls should just return ENOTDIR if that's what the problem is. -static inline __wasi_errno_t errno_fixup_directory(__wasi_fd_t fd, - __wasi_errno_t error) { - return error; -} - -// WASI syscalls should just return ENOTSOCK if that's what the problem is. -static inline __wasi_errno_t errno_fixup_socket(__wasi_fd_t fd, - __wasi_errno_t error) { - return error; -} - -#endif diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/common/overflow.h b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/common/overflow.h deleted file mode 100644 index b7b28f2c55..0000000000 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/common/overflow.h +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) 2015-2016 Nuxi, https://nuxi.nl/ -// -// SPDX-License-Identifier: BSD-2-Clause - -#ifndef COMMON_OVERFLOW_H -#define COMMON_OVERFLOW_H - -// Performs an addition, subtraction or multiplication operation, -// returning whether the computation caused an overflow. These -// intrinsics are available as of Clang 3.8 and GCC 5. -#define add_overflow(x, y, out) __builtin_add_overflow(x, y, out) -#define sub_overflow(x, y, out) __builtin_sub_overflow(x, y, out) -#define mul_overflow(x, y, out) __builtin_mul_overflow(x, y, out) - -#endif diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/common/time.h b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/common/time.h index 293678d823..08e2852690 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/common/time.h +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/common/time.h @@ -6,7 +6,6 @@ #define COMMON_TIME_H #include <common/limits.h> -#include <common/overflow.h> #include <sys/time.h> @@ -16,43 +15,6 @@ #define NSEC_PER_SEC 1000000000 -// Timezone agnostic conversion routines. -int __localtime_utc(time_t, struct tm *); -void __mktime_utc(const struct tm *, struct timespec *); - -static inline bool is_leap(time_t year) { - year %= 400; - if (year < 0) - year += 400; - return ((year % 4) == 0 && (year % 100) != 0) || year == 100; -} - -// Gets the length of the months in a year. -static inline const char *get_months(time_t year) { - static const char leap[12] = { - 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, - }; - static const char common[12] = { - 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, - }; - return is_leap(year) ? leap : common; -} - -// Gets the cumulative length of the months in a year. -static inline const short *get_months_cumulative(time_t year) { - static const short leap[13] = { - 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366, - }; - static const short common[13] = { - 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, - }; - return is_leap(year) ? leap : common; -} - -static inline short get_ydays(time_t year) { - return is_leap(year) ? 366 : 365; -} - static inline bool timespec_to_timestamp_exact( const struct timespec *timespec, __wasi_timestamp_t *timestamp) { // Invalid nanoseconds field. @@ -64,8 +26,8 @@ static inline bool timespec_to_timestamp_exact( return false; // Make sure our timestamp does not overflow. - return !mul_overflow(timespec->tv_sec, NSEC_PER_SEC, timestamp) && - !add_overflow(*timestamp, timespec->tv_nsec, timestamp); + return !__builtin_mul_overflow(timespec->tv_sec, NSEC_PER_SEC, timestamp) && + !__builtin_add_overflow(*timestamp, timespec->tv_nsec, timestamp); } static inline bool timespec_to_timestamp_clamp( @@ -77,8 +39,8 @@ static inline bool timespec_to_timestamp_clamp( if (timespec->tv_sec < 0) { // Timestamps before the Epoch are not supported. *timestamp = 0; - } else if (mul_overflow(timespec->tv_sec, NSEC_PER_SEC, timestamp) || - add_overflow(*timestamp, timespec->tv_nsec, timestamp)) { + } else if (__builtin_mul_overflow(timespec->tv_sec, NSEC_PER_SEC, timestamp) || + __builtin_add_overflow(*timestamp, timespec->tv_nsec, timestamp)) { // Make sure our timestamp does not overflow. *timestamp = NUMERIC_MAX(__wasi_timestamp_t); } diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/include/_/cdefs.h b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/include/_/cdefs.h index 246adec414..d9a6f547b0 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/include/_/cdefs.h +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/include/_/cdefs.h @@ -24,126 +24,13 @@ #ifndef ___CDEFS_H_ #define ___CDEFS_H_ -// Version information. -#define __cloudlibc__ 1 -#define __cloudlibc_major__ 0 -#define __cloudlibc_minor__ 102 - -#ifdef __cplusplus -#define __BEGIN_DECLS extern "C" { -#define __END_DECLS } -#else -#define __BEGIN_DECLS -#define __END_DECLS -#endif - -// Whether we should provide inline versions of functions. Due to C++'s -// support for namespaces, it is generally a bad idea to declare -// function macros. -#ifdef __cplusplus -#define _CLOUDLIBC_INLINE_FUNCTIONS 0 -#else -#define _CLOUDLIBC_INLINE_FUNCTIONS 1 -#endif - // Compiler-independent annotations. -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif -#ifndef __has_extension -#define __has_extension(x) __has_feature(x) -#endif -#ifndef __has_feature -#define __has_feature(x) 0 -#endif - -#define __offsetof(type, member) __builtin_offsetof(type, member) -#define __containerof(ptr, type, member) \ - ((type *)((char *)(ptr)-__offsetof(type, member))) - -#define __extname(x) __asm__(x) -#define __malloc_like __attribute__((__malloc__)) -#define __pure2 __attribute__((__const__)) -#define __pure __attribute__((__pure__)) -#define __section(x) __attribute__((__section__(x))) -#define __unused __attribute__((__unused__)) -#define __used __attribute__((__used__)) -#define __weak_symbol __attribute__((__weak__)) - -// Format string argument type checking. -#define __printflike(format, va) \ - __attribute__((__format__(__printf__, format, va))) -#define __scanflike(format, va) \ - __attribute__((__format__(__scanf__, format, va))) -// TODO(ed): Enable this once supported by LLVM: -// https://llvm.org/bugs/show_bug.cgi?id=16810 -#define __wprintflike(format, va) -#define __wscanflike(format, va) - #define __strong_reference(oldsym, newsym) \ extern __typeof__(oldsym) newsym __attribute__((__alias__(#oldsym))) // Convenience macros. #define __arraycount(x) (sizeof(x) / sizeof((x)[0])) -#define __howmany(x, y) (((x) + (y)-1) / (y)) -#define __rounddown(x, y) (((x) / (y)) * (y)) -#define __roundup(x, y) ((((x) + (y)-1) / (y)) * (y)) - -// Lock annotations. - -#if __has_extension(c_thread_safety_attributes) -#define __lock_annotate(x) __attribute__((x)) -#else -#define __lock_annotate(x) -#endif - -#define __lockable __lock_annotate(lockable) - -#define __locks_exclusive(...) \ - __lock_annotate(exclusive_lock_function(__VA_ARGS__)) -#define __locks_shared(...) __lock_annotate(shared_lock_function(__VA_ARGS__)) - -#define __trylocks_exclusive(...) \ - __lock_annotate(exclusive_trylock_function(__VA_ARGS__)) -#define __trylocks_shared(...) \ - __lock_annotate(shared_trylock_function(__VA_ARGS__)) - -#define __unlocks(...) __lock_annotate(unlock_function(__VA_ARGS__)) - -#define __asserts_exclusive(...) \ - __lock_annotate(assert_exclusive_lock(__VA_ARGS__)) -#define __asserts_shared(...) __lock_annotate(assert_shared_lock(__VA_ARGS__)) - -#define __requires_exclusive(...) \ - __lock_annotate(exclusive_locks_required(__VA_ARGS__)) -#define __requires_shared(...) \ - __lock_annotate(shared_locks_required(__VA_ARGS__)) -#define __requires_unlocked(...) __lock_annotate(locks_excluded(__VA_ARGS__)) - -#define __no_lock_analysis __lock_annotate(no_thread_safety_analysis) - -#define __guarded_by(x) __lock_annotate(guarded_by(x)) -#define __pt_guarded_by(x) __lock_annotate(pt_guarded_by(x)) - -// Const preservation. -// -// Functions like strchr() allow you to silently discard a const -// qualifier from a string. This macro can be used to wrap such -// functions to propagate the const keyword where possible. -// -// This macro has many limitations, such as only being able to detect -// constness for void, char and wchar_t. For Clang, it also doesn't seem -// to work on string literals. - -#define __preserve_const(type, name, arg, ...) \ - _Generic(arg, \ - const void *: (const type *)name(__VA_ARGS__), \ - const char *: (const type *)name(__VA_ARGS__), \ - const signed char *: (const type *)name(__VA_ARGS__), \ - const unsigned char *: (const type *)name(__VA_ARGS__), \ - const __wchar_t *: (const type *)name(__VA_ARGS__), \ - default: name(__VA_ARGS__)) #endif diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/include/stdlib.h b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/include/stdlib.h deleted file mode 100644 index ff48afbb49..0000000000 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/include/stdlib.h +++ /dev/null @@ -1,241 +0,0 @@ -// Copyright (c) 2015-2017 Nuxi, https://nuxi.nl/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -// SUCH DAMAGE. - -// <stdlib.h> - standard library definitions -// -// Extensions: -// - MB_CUR_MAX_L(), mblen_l(), mbstowcs_l(), mbtowc_l(), wcstombs_l() -// and wctomb_l(): -// Regular functions always use the C locale. Available on many other -// operating systems. -// - alloca(): -// Present on most other operating systems. -// - arc4random(), arc4random_buf() and arc4random_uniform(): -// Secure random number generator. Available on many other operating -// systems. -// - l64a_r(): -// Thread-safe replacement for l64a(). Part of the SVID, 4th edition. -// - qsort_r(): -// Available on many other operating systems, although the prototype -// is not consistent. This implementation is compatible with glibc. -// It is expected that this version will be standardized in the future. -// - reallocarray(): -// Allows for reallocation of buffers without integer overflows. -// -// Features missing: -// - initstate(), lcong48(), seed48(), setstate(), srand(), srand48() -// and srandom(): -// Randomizer is seeded securely by default. There is no need to seed -// manually. -// - WEXITSTATUS(), WIFEXITED(), WIFSIGNALED(), WIFSTOPPED(), WNOHANG, -// WSTOPSIG(), WTERMSIG(), WUNTRACED: -// Only useful if system() would actually work. -// - l64a(): -// Not thread-safe. Use l64a_r() instead. -// - putenv(), setenv() and unsetenv(): -// Environment variables are not available. -// - grantpt(), posix_openpt(), ptsname() and unlockpt(): -// Pseudo-terminals are not available. -// - mkdtemp(), mkstemp() and realpath(): -// Requires global filesystem namespace. -// - setkey(): -// Password database and encryption schemes not available. -// - system(): -// Requires a command shell. - -#ifndef _STDLIB_H_ -#define _STDLIB_H_ - -#include <_/limits.h> -#include <_/types.h> - -__BEGIN_DECLS -_Noreturn void _Exit(int); -_Noreturn void abort(void); -void *calloc(size_t, size_t); -_Noreturn void exit(int); -void free(void *); -void *malloc(size_t); -void qsort(void *, size_t, size_t, int (*)(const void *, const void *)); -void *realloc(void *, size_t); -__END_DECLS - -#if _CLOUDLIBC_INLINE_FUNCTIONS - -// qsort_r() implementation from Bentley and McIlroy's -// "Engineering a Sort Function". -// -// This sorting function is inlined into this header, so that the -// compiler can create an optimized version that takes the alignment and -// size of the elements into account. It also reduces the overhead of -// indirect function calls. - -static __inline void __qsort_r(void *, size_t, size_t, - int (*)(const void *, const void *, void *), - void *); - -static __inline size_t __qsort_min(size_t __a, size_t __b) { - return __a < __b ? __a : __b; -} - -// Swaps the contents of two buffers. -static __inline void __qsort_swap(char *__a, char *__b, size_t __n) { - char __t; - - while (__n-- > 0) { - __t = *__a; - *__a++ = *__b; - *__b++ = __t; - } -} - -// Implementation of insertionsort for small lists. -static __inline void __qsort_insertionsort( - char *__a, size_t __nel, size_t __width, - int (*__cmp)(const void *, const void *, void *), void *__thunk) { - char *__pm, *__pl; - - for (__pm = __a + __width; __pm < __a + __nel * __width; __pm += __width) - for (__pl = __pm; __pl > __a && __cmp(__pl - __width, __pl, __thunk) > 0; - __pl -= __width) - __qsort_swap(__pl, __pl - __width, __width); -} - -// Returns the median of three elements. -static __inline char *__qsort_med3(char *__a, char *__b, char *__c, - int (*__cmp)(const void *, const void *, - void *), - void *__thunk) { - return __cmp(__a, __b, __thunk) < 0 - ? (__cmp(__b, __c, __thunk) < 0 - ? __b - : __cmp(__a, __c, __thunk) < 0 ? __c : __a) - : (__cmp(__b, __c, __thunk) > 0 - ? __b - : __cmp(__a, __c, __thunk) > 0 ? __c : __a); -} - -// Picks a pivot based on a pseudo-median of three or nine. -// TODO(ed): Does this still guarantee an O(n log n) running time? -static __inline char *__qsort_pickpivot(char *__a, size_t __nel, size_t __width, - int (*__cmp)(const void *, const void *, - void *), - void *__thunk) { - char *__pl, *__pm, *__pn; - size_t __s; - - __pl = __a; - __pm = __a + (__nel / 2) * __width; - __pn = __a + (__nel - 1) * __width; - if (__nel > 40) { - __s = (__nel / 8) * __width; - __pl = __qsort_med3(__pl, __pl + __s, __pl + 2 * __s, __cmp, __thunk); - __pm = __qsort_med3(__pm - __s, __pm, __pm + __s, __cmp, __thunk); - __pn = __qsort_med3(__pn - 2 * __s, __pn - __s, __pn, __cmp, __thunk); - } - return __qsort_med3(__pl, __pm, __pn, __cmp, __thunk); -} - -// Implementation of quicksort for larger lists. -static __inline void __qsort_quicksort(char *__a, size_t __nel, size_t __width, - int (*__cmp)(const void *, const void *, - void *), - void *__thunk) { - char *__pa, *__pb, *__pc, *__pd, *__pn; - int __r; - size_t __s; - - // Select pivot and move it to the head of the list. - __qsort_swap(__a, __qsort_pickpivot(__a, __nel, __width, __cmp, __thunk), - __width); - - // Perform partitioning. - __pa = __pb = __a; - __pc = __pd = __a + (__nel - 1) * __width; - for (;;) { - while (__pb <= __pc && (__r = __cmp(__pb, __a, __thunk)) <= 0) { - if (__r == 0) { - __qsort_swap(__pa, __pb, __width); - __pa += __width; - } - __pb += __width; - } - while (__pc >= __pb && (__r = __cmp(__pc, __a, __thunk)) >= 0) { - if (__r == 0) { - __qsort_swap(__pc, __pd, __width); - __pd -= __width; - } - __pc -= __width; - } - if (__pb > __pc) - break; - __qsort_swap(__pb, __pc, __width); - __pb += __width; - __pc -= __width; - } - - // Store pivot between the two partitions. - __pn = __a + __nel * __width; - __s = __qsort_min((size_t)(__pa - __a), (size_t)(__pb - __pa)); - __qsort_swap(__a, __pb - __s, __s); - __s = __qsort_min((size_t)(__pd - __pc), (size_t)(__pn - __pd) - __width); - __qsort_swap(__pb, __pn - __s, __s); - - // Sort the two partitions. - __s = (size_t)(__pb - __pa); - __qsort_r(__a, __s / __width, __width, __cmp, __thunk); - __s = (size_t)(__pd - __pc); - __qsort_r(__pn - __s, __s / __width, __width, __cmp, __thunk); -} - -static __inline void __qsort_r(void *__base, size_t __nel, size_t __width, - int (*__cmp)(const void *, const void *, void *), - void *__thunk) { - char *__a; - - __a = (char *)__base; - if (__nel < 8) { - __qsort_insertionsort(__a, __nel, __width, __cmp, __thunk); - } else { - __qsort_quicksort(__a, __nel, __width, __cmp, __thunk); - } -} -#define qsort_r(base, nel, width, compar, thunk) \ - __qsort_r(base, nel, width, compar, thunk) - -// qsort(): Call into qsort_r(), providing the callback as the thunk. -// We assume that the optimizer is smart enough to simplify. - -static __inline int __qsort_cmp(const void *__a, const void *__b, - void *__thunk) { - return ((int (*)(const void *, const void *))__thunk)(__a, __b); -} - -static __inline void __qsort(void *__base, size_t __nel, size_t __width, - int (*__cmp)(const void *, const void *)) { - qsort_r(__base, __nel, __width, __qsort_cmp, (void *)__cmp); -} -#define qsort(base, nel, width, compar) __qsort(base, nel, width, compar) -#endif - -#endif diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/dirent/fdopendir.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/dirent/fdopendir.c index 59fc027e8d..4a2136af52 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/dirent/fdopendir.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/dirent/fdopendir.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <wasi/api.h> #include <dirent.h> #include <errno.h> @@ -31,7 +29,7 @@ DIR *fdopendir(int fd) { if (error != 0) { free(dirp->buffer); free(dirp); - errno = errno_fixup_directory(fd, error); + errno = error; return NULL; } diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/fcntl/openat.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/fcntl/openat.c index 46919fe814..09cbbf8008 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/fcntl/openat.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/fcntl/openat.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <assert.h> #include <wasi/api.h> #include <wasi/libc.h> @@ -75,7 +73,7 @@ int __wasilibc_nocwd_openat_nomode(int fd, const char *path, int oflag) { fs_rights_base, fs_rights_inheriting, fs_flags, &newfd); if (error != 0) { - errno = errno_fixup_directory(fd, error); + errno = error; return -1; } return newfd; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/stdio/renameat.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/stdio/renameat.c index e43cef22c2..c1706db48f 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/stdio/renameat.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/stdio/renameat.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <wasi/api.h> #include <errno.h> #include <stdio.h> @@ -12,7 +10,7 @@ int __wasilibc_nocwd_renameat(int oldfd, const char *old, int newfd, const char *new) { __wasi_errno_t error = __wasi_path_rename(oldfd, old, newfd, new); if (error != 0) { - errno = errno_fixup_directory(oldfd, errno_fixup_directory(newfd, error)); + errno = error; return -1; } return 0; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/stdlib/_Exit.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/stdlib/_Exit.c index 10dab2b61e..5e266f0b98 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/stdlib/_Exit.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/stdlib/_Exit.c @@ -3,7 +3,7 @@ // SPDX-License-Identifier: BSD-2-Clause #include <wasi/api.h> -#include <stdlib.h> +#include <_/cdefs.h> #include <stdnoreturn.h> #include <unistd.h> diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/recv.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/recv.c index 49c09d0376..d35f8894fa 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/recv.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/recv.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <sys/socket.h> #include <assert.h> @@ -35,7 +33,7 @@ ssize_t recv(int socket, void *restrict buffer, size_t length, int flags) { &ro_datalen, &ro_flags); if (error != 0) { - errno = errno_fixup_socket(socket, error); + errno = error; return -1; } return ro_datalen; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/send.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/send.c index 0759abf9b2..85a298a731 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/send.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/send.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <sys/socket.h> #include <assert.h> @@ -27,7 +25,7 @@ ssize_t send(int socket, const void *buffer, size_t length, int flags) { size_t so_datalen; __wasi_errno_t error = __wasi_sock_send(socket, si_data, si_data_len, si_flags, &so_datalen); if (error != 0) { - errno = errno_fixup_socket(socket, error); + errno = error; return -1; } return so_datalen; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/shutdown.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/shutdown.c index 883b551ae8..261fcb81ea 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/shutdown.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/socket/shutdown.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <sys/socket.h> #include <assert.h> @@ -22,7 +20,7 @@ int shutdown(int socket, int how) { __wasi_errno_t error = __wasi_sock_shutdown(socket, how); if (error != 0) { - errno = errno_fixup_socket(socket, error); + errno = error; return -1; } return error; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/fstatat.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/fstatat.c index f037d99dbe..25b29ac982 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/fstatat.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/fstatat.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <sys/stat.h> #include <wasi/api.h> @@ -25,7 +23,7 @@ int __wasilibc_nocwd_fstatat(int fd, const char *restrict path, struct stat *res __wasi_errno_t error = __wasi_path_filestat_get(fd, lookup_flags, path, &internal_stat); if (error != 0) { - errno = errno_fixup_directory(fd, error); + errno = error; return -1; } to_public_stat(&internal_stat, buf); diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/mkdirat.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/mkdirat.c index c89ce3db52..fd27d5e173 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/mkdirat.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/mkdirat.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <sys/stat.h> #include <wasi/api.h> @@ -13,7 +11,7 @@ int __wasilibc_nocwd_mkdirat_nomode(int fd, const char *path) { __wasi_errno_t error = __wasi_path_create_directory(fd, path); if (error != 0) { - errno = errno_fixup_directory(fd, error); + errno = error; return -1; } return 0; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/utimensat.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/utimensat.c index a572716201..19508a1365 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/utimensat.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/sys/stat/utimensat.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <sys/stat.h> #include <wasi/api.h> @@ -33,7 +31,7 @@ int __wasilibc_nocwd_utimensat(int fd, const char *path, const struct timespec t __wasi_errno_t error = __wasi_path_filestat_set_times(fd, lookup_flags, path, st_atim, st_mtim, flags); if (error != 0) { - errno = errno_fixup_directory(fd, error); + errno = error; return -1; } return 0; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/CLOCK_PROCESS_CPUTIME_ID.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/CLOCK_PROCESS_CPUTIME_ID.c deleted file mode 100644 index 901fd7300b..0000000000 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/CLOCK_PROCESS_CPUTIME_ID.c +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright (c) 2016 Nuxi, https://nuxi.nl/ -// -// SPDX-License-Identifier: BSD-2-Clause - -#include <common/clock.h> - -#include <wasi/api.h> -#include <time.h> - -const struct __clockid _CLOCK_PROCESS_CPUTIME_ID = { - .id = __WASI_CLOCKID_PROCESS_CPUTIME_ID, -}; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/CLOCK_THREAD_CPUTIME_ID.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/CLOCK_THREAD_CPUTIME_ID.c deleted file mode 100644 index de58c510bb..0000000000 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/CLOCK_THREAD_CPUTIME_ID.c +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright (c) 2016 Nuxi, https://nuxi.nl/ -// -// SPDX-License-Identifier: BSD-2-Clause - -#include <common/clock.h> - -#include <wasi/api.h> -#include <time.h> - -const struct __clockid _CLOCK_THREAD_CPUTIME_ID = { - .id = __WASI_CLOCKID_THREAD_CPUTIME_ID, -}; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/clock_gettime.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/clock_gettime.c index dbb36a72da..c7e1a609e7 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/clock_gettime.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/clock_gettime.c @@ -19,4 +19,4 @@ int __clock_gettime(clockid_t clock_id, struct timespec *tp) { *tp = timestamp_to_timespec(ts); return 0; } -extern __typeof(__clock_gettime) clock_gettime __attribute__((weak, alias("__clock_gettime"))); +weak_alias(__clock_gettime, clock_gettime); diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/clock_nanosleep.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/clock_nanosleep.c index 52b577a0ea..d375056ff4 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/clock_nanosleep.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/clock_nanosleep.c @@ -33,3 +33,5 @@ int clock_nanosleep(clockid_t clock_id, int flags, const struct timespec *rqtp, __wasi_errno_t error = __wasi_poll_oneoff(&sub, &ev, 1, &nevents); return error == 0 && ev.error == 0 ? 0 : ENOTSUP; } + +weak_alias(clock_nanosleep, __clock_nanosleep); diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/nanosleep.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/nanosleep.c index 9ffa1be281..5f26c5cc6d 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/nanosleep.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/time/nanosleep.c @@ -3,7 +3,6 @@ // SPDX-License-Identifier: BSD-2-Clause #include <errno.h> -#include <threads.h> #include <time.h> int nanosleep(const struct timespec *rqtp, struct timespec *rem) { @@ -14,7 +13,3 @@ int nanosleep(const struct timespec *rqtp, struct timespec *rem) { } return 0; } - -#if defined(_REENTRANT) -__strong_reference(nanosleep, thrd_sleep); -#endif diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/faccessat.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/faccessat.c index 077250c667..ffaef6ed62 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/faccessat.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/faccessat.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <wasi/api.h> #include <errno.h> #include <fcntl.h> @@ -24,7 +22,7 @@ int __wasilibc_nocwd_faccessat(int fd, const char *path, int amode, int flag) { __wasi_errno_t error = __wasi_path_filestat_get(fd, lookup_flags, path, &file); if (error != 0) { - errno = errno_fixup_directory(fd, error); + errno = error; return -1; } diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/linkat.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/linkat.c index 683dd4cf65..d57f5621d6 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/linkat.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/linkat.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <wasi/api.h> #include <errno.h> #include <fcntl.h> @@ -19,7 +17,7 @@ int __wasilibc_nocwd_linkat(int fd1, const char *path1, int fd2, const char *pat // Perform system call. __wasi_errno_t error = __wasi_path_link(fd1, lookup1_flags, path1, fd2, path2); if (error != 0) { - errno = errno_fixup_directory(fd1, errno_fixup_directory(fd2, error)); + errno = error; return -1; } return 0; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/lseek.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/lseek.c index 0dfc49e4c9..3e0429f10f 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/lseek.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/lseek.c @@ -22,4 +22,4 @@ off_t __lseek(int fildes, off_t offset, int whence) { return new_offset; } -extern __typeof(__lseek) lseek __attribute__((weak, alias("__lseek"))); +weak_alias(__lseek, lseek); diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/readlinkat.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/readlinkat.c index e08afb9b3c..7a3bce27b7 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/readlinkat.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/readlinkat.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <wasi/api.h> #include <errno.h> #include <string.h> @@ -16,7 +14,7 @@ ssize_t __wasilibc_nocwd_readlinkat(int fd, const char *restrict path, char *res __wasi_errno_t error = __wasi_path_readlink(fd, path, (uint8_t*)buf, bufsize, &bufused); if (error != 0) { - errno = errno_fixup_directory(fd, error); + errno = error; return -1; } return bufused; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/symlinkat.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/symlinkat.c index 1705afc1c5..0aa38be2cd 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/symlinkat.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/symlinkat.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <wasi/api.h> #include <errno.h> #include <string.h> @@ -12,7 +10,7 @@ int __wasilibc_nocwd_symlinkat(const char *path1, int fd, const char *path2) { __wasi_errno_t error = __wasi_path_symlink(path1, fd, path2); if (error != 0) { - errno = errno_fixup_directory(fd, error); + errno = error; return -1; } return 0; diff --git a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/unlinkat.c b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/unlinkat.c index ea795c2a2d..351bf92645 100644 --- a/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/unlinkat.c +++ b/lib/libc/wasi/libc-bottom-half/cloudlibc/src/libc/unistd/unlinkat.c @@ -2,8 +2,6 @@ // // SPDX-License-Identifier: BSD-2-Clause -#include <common/errno.h> - #include <wasi/api.h> #include <wasi/libc.h> #include <errno.h> diff --git a/lib/libc/wasi/libc-bottom-half/crt/crt1-command.c b/lib/libc/wasi/libc-bottom-half/crt/crt1-command.c index 93279fbed3..fb9ee71fb4 100644 --- a/lib/libc/wasi/libc-bottom-half/crt/crt1-command.c +++ b/lib/libc/wasi/libc-bottom-half/crt/crt1-command.c @@ -1,18 +1,53 @@ +#ifdef _REENTRANT +#include <stdatomic.h> +extern void __wasi_init_tp(void); +#endif #include <wasi/api.h> -#include <stdlib.h> extern void __wasm_call_ctors(void); -extern int __original_main(void); +extern int __main_void(void); extern void __wasm_call_dtors(void); __attribute__((export_name("_start"))) void _start(void) { - // Call `__original_main` which will either be the application's zero-argument - // `__original_main` function or a libc routine which calls `__main_void`. - // TODO: Call `main` directly once we no longer have to support old compilers. - int r = __original_main(); + // Commands should only be called once per instance. This simple check + // ensures that the `_start` function isn't started more than once. + // + // We use `volatile` here to prevent the store to `started` from being + // sunk past any subsequent code, and to prevent any compiler from + // optimizing based on the knowledge that `_start` is the program + // entrypoint. +#ifdef _REENTRANT + static volatile _Atomic int started = 0; + int expected = 0; + if (!atomic_compare_exchange_strong(&started, &expected, 1)) { + __builtin_trap(); + } +#else + static volatile int started = 0; + if (started != 0) { + __builtin_trap(); + } + started = 1; +#endif + +#ifdef _REENTRANT + __wasi_init_tp(); +#endif + + // The linker synthesizes this to call constructors. + __wasm_call_ctors(); + + // Call `__main_void` which will either be the application's zero-argument + // `__main_void` function or a libc routine which obtains the command-line + // arguments and calls `__main_argv_argc`. + int r = __main_void(); + + // Call atexit functions, destructors, stdio cleanup, etc. + __wasm_call_dtors(); - // If main exited successfully, just return, otherwise call `exit`. + // If main exited successfully, just return, otherwise call + // `__wasi_proc_exit`. if (r != 0) { - exit(r); + __wasi_proc_exit(r); } } diff --git a/lib/libc/wasi/libc-bottom-half/headers/private/stdlib.h b/lib/libc/wasi/libc-bottom-half/headers/private/stdlib.h index f1e8d6f226..8c2b395a6b 100644 --- a/lib/libc/wasi/libc-bottom-half/headers/private/stdlib.h +++ b/lib/libc/wasi/libc-bottom-half/headers/private/stdlib.h @@ -4,3 +4,5 @@ #include <stddef.h> #include_next <stdlib.h> + +int clearenv(void); diff --git a/lib/libc/wasi/libc-bottom-half/signal/signal.c b/lib/libc/wasi/libc-bottom-half/signal/signal.c index 1c24dfde63..b4634f9590 100644 --- a/lib/libc/wasi/libc-bottom-half/signal/signal.c +++ b/lib/libc/wasi/libc-bottom-half/signal/signal.c @@ -24,7 +24,7 @@ void __SIG_ERR(int sig) { _Noreturn static void core_handler(int sig) { - fprintf(stderr, "Program recieved fatal signal: %s\n", strsignal(sig)); + fprintf(stderr, "Program received fatal signal: %s\n", strsignal(sig)); abort(); } @@ -138,5 +138,5 @@ void (*signal(int sig, void (*func)(int)))(int) { return old; } -extern __typeof(signal) bsd_signal __attribute__((weak, alias("signal"))); -extern __typeof(signal) __sysv_signal __attribute__((weak, alias("signal"))); +extern __typeof(signal) bsd_signal __attribute__((__weak__, alias("signal"))); +extern __typeof(signal) __sysv_signal __attribute__((__weak__, alias("signal"))); diff --git a/lib/libc/wasi/libc-bottom-half/sources/__main_argc_argv.c b/lib/libc/wasi/libc-bottom-half/sources/__main_argc_argv.c deleted file mode 100644 index decaa2d1e3..0000000000 --- a/lib/libc/wasi/libc-bottom-half/sources/__main_argc_argv.c +++ /dev/null @@ -1,10 +0,0 @@ -// New compilers define `__main_argc_argv`. If that doesn't exist, we -// may get called here. Old compilers define `main` expecting an -// argv/argc, so call that. -// TODO: Remove this layer when we no longer have to support old compilers. -int __wasilibc_main(int argc, char *argv[]) asm("main"); - -__attribute__((weak, nodebug)) -int __main_argc_argv(int argc, char *argv[]) { - return __wasilibc_main(argc, argv); -} diff --git a/lib/libc/wasi/libc-bottom-half/sources/__main_void.c b/lib/libc/wasi/libc-bottom-half/sources/__main_void.c index 6be5c1e6a1..997078f43b 100644 --- a/lib/libc/wasi/libc-bottom-half/sources/__main_void.c +++ b/lib/libc/wasi/libc-bottom-half/sources/__main_void.c @@ -2,13 +2,18 @@ #include <stdlib.h> #include <sysexits.h> +int __wasilibc_main(int argc, char *argv[]) asm("main"); + // The user's `main` function, expecting arguments. -int __main_argc_argv(int argc, char *argv[]); +__attribute__((__weak__, nodebug)) +int __main_argc_argv(int argc, char *argv[]) { + return __wasilibc_main(argc, argv); +} // If the user's `main` function expects arguments, the compiler will rename // it to `__main_argc_argv`, and this version will get linked in, which // initializes the argument data and calls `__main_argc_argv`. -__attribute__((weak, nodebug)) +__attribute__((__weak__, nodebug)) int __main_void(void) { __wasi_errno_t err; diff --git a/lib/libc/wasi/libc-bottom-half/sources/__original_main.c b/lib/libc/wasi/libc-bottom-half/sources/__original_main.c deleted file mode 100644 index 73564d46f0..0000000000 --- a/lib/libc/wasi/libc-bottom-half/sources/__original_main.c +++ /dev/null @@ -1,10 +0,0 @@ -// Old compilers define `__original_main`. If that doesn't exist, we -// get called here. New compilers define `__main_void`. If that doesn't -// exist, we'll try something else. -// TODO: Remove this layer when we no longer have to support old compilers. -int __main_void(void); - -__attribute__((weak)) -int __original_main(void) { - return __main_void(); -} diff --git a/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_dt.c b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_dt.c new file mode 100644 index 0000000000..b06460fcc9 --- /dev/null +++ b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_dt.c @@ -0,0 +1,34 @@ +#include <__header_dirent.h> +#include <__mode_t.h> + +int __wasilibc_iftodt(int x) { + switch (x) { + case S_IFDIR: return DT_DIR; + case S_IFCHR: return DT_CHR; + case S_IFBLK: return DT_BLK; + case S_IFREG: return DT_REG; + case S_IFIFO: return DT_FIFO; + case S_IFLNK: return DT_LNK; +#ifdef DT_SOCK + case S_IFSOCK: return DT_SOCK; +#endif + default: return DT_UNKNOWN; + } +} + +int __wasilibc_dttoif(int x) { + switch (x) { + case DT_DIR: return S_IFDIR; + case DT_CHR: return S_IFCHR; + case DT_BLK: return S_IFBLK; + case DT_REG: return S_IFREG; + case DT_FIFO: return S_IFIFO; + case DT_LNK: return S_IFLNK; +#ifdef DT_SOCK + case DT_SOCK: return S_IFSOCK; +#endif + case DT_UNKNOWN: + default: + return S_IFSOCK; + } +} diff --git a/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_environ.c b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_environ.c new file mode 100644 index 0000000000..53d0a553c8 --- /dev/null +++ b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_environ.c @@ -0,0 +1,14 @@ +#include <wasi/libc-environ.h> + +extern char **__wasilibc_environ; + +// See the comments in libc-environ.h. +char **__wasilibc_get_environ(void) { + // Perform lazy initialization if needed. + __wasilibc_ensure_environ(); + + // Return `environ`. Use the `__wasilibc_`-prefixed name so that we don't + // pull in the `environ` symbol directly, which would lead to eager + // initialization being done instead. + return __wasilibc_environ; +} diff --git a/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_initialize_environ.c b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_initialize_environ.c index fe6001a190..2d31c5d03a 100644 --- a/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_initialize_environ.c +++ b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_initialize_environ.c @@ -11,7 +11,7 @@ /// Statically-initialize it to an invalid pointer value so that we can /// detect if it's been explicitly initialized (we can't use `NULL` because /// `clearenv` sets it to NULL. -char **__wasilibc_environ __attribute__((weak)) = (char **)-1; +weak char **__wasilibc_environ = (char **)-1; // See the comments in libc-environ.h. void __wasilibc_ensure_environ(void) { @@ -75,3 +75,19 @@ oserr: software: _Exit(EX_SOFTWARE); } + +// See the comments in libc-environ.h. +void __wasilibc_deinitialize_environ(void) { + if (__wasilibc_environ != (char **)-1) { + // Let libc-top-half clear the old environment-variable strings. + clearenv(); + // Set the pointer to the special init value. + __wasilibc_environ = (char **)-1; + } +} + +// See the comments in libc-environ.h. +weak void __wasilibc_maybe_reinitialize_environ_eagerly(void) { + // This version does nothing. It may be overridden by a version which does + // something if `environ` is used. +} diff --git a/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_real.c b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_real.c index 37ca7d95a1..855a2c6ddd 100644 --- a/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_real.c +++ b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_real.c @@ -599,6 +599,20 @@ __wasi_errno_t __wasi_random_get( return (uint16_t) ret; } +int32_t __imported_wasi_snapshot_preview1_sock_accept(int32_t arg0, int32_t arg1, int32_t arg2) __attribute__(( + __import_module__("wasi_snapshot_preview1"), + __import_name__("sock_accept") +)); + +__wasi_errno_t __wasi_sock_accept( + __wasi_fd_t fd, + __wasi_fdflags_t flags, + __wasi_fd_t *retptr0 +){ + int32_t ret = __imported_wasi_snapshot_preview1_sock_accept((int32_t) fd, flags, (int32_t) retptr0); + return (uint16_t) ret; +} + int32_t __imported_wasi_snapshot_preview1_sock_recv(int32_t arg0, int32_t arg1, int32_t arg2, int32_t arg3, int32_t arg4, int32_t arg5) __attribute__(( __import_module__("wasi_snapshot_preview1"), __import_name__("sock_recv") @@ -645,3 +659,14 @@ __wasi_errno_t __wasi_sock_shutdown( return (uint16_t) ret; } +#ifdef _REENTRANT +int32_t __imported_wasi_thread_spawn(int32_t arg0) __attribute__(( + __import_module__("wasi"), + __import_name__("thread_spawn") +)); + +__wasi_errno_t __wasi_thread_spawn(void* start_arg) { + int32_t ret = __imported_wasi_thread_spawn((int32_t) start_arg); + return (uint16_t) ret; +} +#endif diff --git a/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_rmdirat.c b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_rmdirat.c index 2730c23c52..b2b906aa68 100644 --- a/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_rmdirat.c +++ b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_rmdirat.c @@ -1,4 +1,3 @@ -#include <common/errno.h> #include <wasi/api.h> #include <wasi/libc.h> #include <errno.h> @@ -6,7 +5,7 @@ int __wasilibc_nocwd___wasilibc_rmdirat(int fd, const char *path) { __wasi_errno_t error = __wasi_path_remove_directory(fd, path); if (error != 0) { - errno = errno_fixup_directory(fd, error); + errno = error; return -1; } return 0; diff --git a/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_unlinkat.c b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_unlinkat.c index 21ae69a52b..8b4f6b5cea 100644 --- a/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_unlinkat.c +++ b/lib/libc/wasi/libc-bottom-half/sources/__wasilibc_unlinkat.c @@ -1,4 +1,3 @@ -#include <common/errno.h> #include <wasi/api.h> #include <wasi/libc.h> #include <errno.h> diff --git a/lib/libc/wasi/libc-bottom-half/sources/chdir.c b/lib/libc/wasi/libc-bottom-half/sources/chdir.c index 1a102db20e..37c95a4e56 100644 --- a/lib/libc/wasi/libc-bottom-half/sources/chdir.c +++ b/lib/libc/wasi/libc-bottom-half/sources/chdir.c @@ -9,9 +9,12 @@ #include <wasi/libc.h> #ifdef _REENTRANT -#error "chdir doesn't yet support multiple threads" +void __wasilibc_cwd_lock(void); +void __wasilibc_cwd_unlock(void); +#else +#define __wasilibc_cwd_lock() (void)0 +#define __wasilibc_cwd_unlock() (void)0 #endif - extern char *__wasilibc_cwd; static int __wasilibc_cwd_mallocd = 0; @@ -43,10 +46,10 @@ int chdir(const char *path) // // If `relative_buf` is equal to "." or `abs` is equal to the empty string, // however, we skip that part and the middle slash. - size_t len = strlen(abs) + 1; + size_t abs_len = strlen(abs); int copy_relative = strcmp(relative_buf, ".") != 0; int mid = copy_relative && abs[0] != 0; - char *new_cwd = malloc(len + (copy_relative ? strlen(relative_buf) + mid: 0)+1); + char *new_cwd = malloc(1 + abs_len + mid + (copy_relative ? strlen(relative_buf) : 0) + 1); if (new_cwd == NULL) { errno = ENOMEM; return -1; @@ -54,14 +57,16 @@ int chdir(const char *path) new_cwd[0] = '/'; strcpy(new_cwd + 1, abs); if (mid) - new_cwd[len] = '/'; + new_cwd[1 + abs_len] = '/'; if (copy_relative) - strcpy(new_cwd + 1 + mid + strlen(abs), relative_buf); + strcpy(new_cwd + 1 + abs_len + mid, relative_buf); // And set our new malloc'd buffer into the global cwd, freeing the // previous one if necessary. + __wasilibc_cwd_lock(); char *prev_cwd = __wasilibc_cwd; __wasilibc_cwd = new_cwd; + __wasilibc_cwd_unlock(); if (__wasilibc_cwd_mallocd) free(prev_cwd); __wasilibc_cwd_mallocd = 1; @@ -77,11 +82,13 @@ static const char *make_absolute(const char *path) { return path; } +#ifndef _REENTRANT // If the path is empty, or points to the current directory, then return // the current directory. if (path[0] == 0 || !strcmp(path, ".") || !strcmp(path, "./")) { return __wasilibc_cwd; } +#endif // If the path starts with `./` then we won't be appending that to the cwd. if (path[0] == '.' && path[1] == '/') @@ -90,18 +97,30 @@ static const char *make_absolute(const char *path) { // Otherwise we'll take the current directory, add a `/`, and then add the // input `path`. Note that this doesn't do any normalization (like removing // `/./`). + __wasilibc_cwd_lock(); size_t cwd_len = strlen(__wasilibc_cwd); - size_t path_len = strlen(path); + size_t path_len = path ? strlen(path) : 0; + __wasilibc_cwd_unlock(); int need_slash = __wasilibc_cwd[cwd_len - 1] == '/' ? 0 : 1; size_t alloc_len = cwd_len + path_len + 1 + need_slash; if (alloc_len > make_absolute_len) { char *tmp = realloc(make_absolute_buf, alloc_len); - if (tmp == NULL) + if (tmp == NULL) { + __wasilibc_cwd_unlock(); return NULL; + } make_absolute_buf = tmp; make_absolute_len = alloc_len; } strcpy(make_absolute_buf, __wasilibc_cwd); + __wasilibc_cwd_unlock(); + +#ifdef _REENTRANT + if (path[0] == 0 || !strcmp(path, ".") || !strcmp(path, "./")) { + return make_absolute_buf; + } +#endif + if (need_slash) strcpy(make_absolute_buf + cwd_len, "/"); strcpy(make_absolute_buf + cwd_len + need_slash, path); diff --git a/lib/libc/wasi/libc-bottom-half/sources/environ.c b/lib/libc/wasi/libc-bottom-half/sources/environ.c index bc5a078724..50d60deff4 100644 --- a/lib/libc/wasi/libc-bottom-half/sources/environ.c +++ b/lib/libc/wasi/libc-bottom-half/sources/environ.c @@ -9,10 +9,8 @@ // `__wasilibc_environ`, which is initialized with a constructor function, so // that it's initialized whenever user code might want to access it. char **__wasilibc_environ; -extern __typeof(__wasilibc_environ) _environ - __attribute__((weak, alias("__wasilibc_environ"))); -extern __typeof(__wasilibc_environ) environ - __attribute__((weak, alias("__wasilibc_environ"))); +weak_alias(__wasilibc_environ, _environ); +weak_alias(__wasilibc_environ, environ); // We define this function here in the same source file as // `__wasilibc_environ`, so that this function is called in iff environment @@ -24,3 +22,10 @@ __attribute__((constructor(50))) static void __wasilibc_initialize_environ_eagerly(void) { __wasilibc_initialize_environ(); } + +// See the comments in libc-environ.h. +void __wasilibc_maybe_reinitialize_environ_eagerly(void) { + // This translation unit is linked in if `environ` is used, meaning we need + // to eagerly reinitialize the environment variables. + __wasilibc_initialize_environ(); +} diff --git a/lib/libc/wasi/libc-bottom-half/sources/getcwd.c b/lib/libc/wasi/libc-bottom-half/sources/getcwd.c index 6fea2a20d0..3b1ce70694 100644 --- a/lib/libc/wasi/libc-bottom-half/sources/getcwd.c +++ b/lib/libc/wasi/libc-bottom-half/sources/getcwd.c @@ -1,30 +1,39 @@ #include <unistd.h> #include <errno.h> #include <string.h> +#include "lock.h" + +char *__wasilibc_cwd = "/"; -// For threads this needs to synchronize with chdir #ifdef _REENTRANT -#error "getcwd doesn't yet support multiple threads" +static volatile int lock[1]; +void __wasilibc_cwd_lock(void) { LOCK(lock); } +void __wasilibc_cwd_unlock(void) { UNLOCK(lock); } +#else +#define __wasilibc_cwd_lock() (void)0 +#define __wasilibc_cwd_unlock() (void)0 #endif -char *__wasilibc_cwd = "/"; - char *getcwd(char *buf, size_t size) { + __wasilibc_cwd_lock(); if (!buf) { buf = strdup(__wasilibc_cwd); if (!buf) { errno = ENOMEM; + __wasilibc_cwd_unlock(); return NULL; } } else { size_t len = strlen(__wasilibc_cwd); if (size < len + 1) { errno = ERANGE; + __wasilibc_cwd_unlock(); return NULL; } strcpy(buf, __wasilibc_cwd); } + __wasilibc_cwd_unlock(); return buf; } diff --git a/lib/libc/wasi/libc-bottom-half/sources/getentropy.c b/lib/libc/wasi/libc-bottom-half/sources/getentropy.c index 7f96b85e1f..e540e7e319 100644 --- a/lib/libc/wasi/libc-bottom-half/sources/getentropy.c +++ b/lib/libc/wasi/libc-bottom-half/sources/getentropy.c @@ -1,10 +1,6 @@ -#include <wasi/api.h> #include <errno.h> #include <unistd.h> - -#ifdef _REENTRANT -#error With threads support, getentropy is not intended to be a cancellation point. -#endif +#include <wasi/api.h> int __getentropy(void *buffer, size_t len) { if (len > 256) { @@ -21,4 +17,4 @@ int __getentropy(void *buffer, size_t len) { return 0; } -extern __typeof(__getentropy) getentropy __attribute__((weak, alias("__getentropy"))); +weak_alias(__getentropy, getentropy); diff --git a/lib/libc/wasi/libc-bottom-half/sources/isatty.c b/lib/libc/wasi/libc-bottom-half/sources/isatty.c index c6f8662816..54aee809c7 100644 --- a/lib/libc/wasi/libc-bottom-half/sources/isatty.c +++ b/lib/libc/wasi/libc-bottom-half/sources/isatty.c @@ -19,4 +19,4 @@ int __isatty(int fd) { return 1; } -extern __typeof(__isatty) isatty __attribute__((weak, alias("__isatty"))); +extern __typeof(__isatty) isatty __attribute__((__weak__, alias("__isatty"))); diff --git a/lib/libc/wasi/libc-bottom-half/sources/preopens.c b/lib/libc/wasi/libc-bottom-half/sources/preopens.c index b6fb9f7a7c..7293c8c49e 100644 --- a/lib/libc/wasi/libc-bottom-half/sources/preopens.c +++ b/lib/libc/wasi/libc-bottom-half/sources/preopens.c @@ -2,14 +2,11 @@ //! environment, with associated path prefixes, which can be used to map //! absolute paths to capabilities with relative paths. -#ifdef _REENTRANT -#error "__wasilibc_register_preopened_fd doesn't yet support multiple threads" -#endif - #include <assert.h> #include <errno.h> #include <fcntl.h> #include <limits.h> +#include <lock.h> #include <stdbool.h> #include <stdlib.h> #include <string.h> @@ -32,6 +29,12 @@ static preopen *preopens; static size_t num_preopens; static size_t preopen_capacity; +/// Access to the the above preopen must be protected in the presence of +/// threads. +#ifdef _REENTRANT +static volatile int lock[1]; +#endif + #ifdef NDEBUG #define assert_invariants() // assertions disabled #else @@ -55,14 +58,17 @@ static void assert_invariants(void) { /// Allocate space for more preopens. Returns 0 on success and -1 on failure. static int resize(void) { + LOCK(lock); size_t start_capacity = 4; size_t old_capacity = preopen_capacity; size_t new_capacity = old_capacity == 0 ? start_capacity : old_capacity * 2; preopen *old_preopens = preopens; preopen *new_preopens = calloc(sizeof(preopen), new_capacity); - if (new_preopens == NULL) + if (new_preopens == NULL) { + UNLOCK(lock); return -1; + } memcpy(new_preopens, old_preopens, num_preopens * sizeof(preopen)); preopens = new_preopens; @@ -70,6 +76,7 @@ static int resize(void) { free(old_preopens); assert_invariants(); + UNLOCK(lock); return 0; } @@ -97,21 +104,28 @@ static const char *strip_prefixes(const char *path) { /// /// This function takes ownership of `prefix`. static int internal_register_preopened_fd(__wasi_fd_t fd, const char *relprefix) { + LOCK(lock); + // Check preconditions. assert_invariants(); assert(fd != AT_FDCWD); assert(fd != -1); assert(relprefix != NULL); - if (num_preopens == preopen_capacity && resize() != 0) + if (num_preopens == preopen_capacity && resize() != 0) { + UNLOCK(lock); return -1; + } char *prefix = strdup(strip_prefixes(relprefix)); - if (prefix == NULL) + if (prefix == NULL) { + UNLOCK(lock); return -1; + } preopens[num_preopens++] = (preopen) { prefix, fd, }; assert_invariants(); + UNLOCK(lock); return 0; } @@ -166,6 +180,7 @@ int __wasilibc_find_abspath(const char *path, // recently added preopens take precedence over less recently addded ones. size_t match_len = 0; int fd = -1; + LOCK(lock); for (size_t i = num_preopens; i > 0; --i) { const preopen *pre = &preopens[i - 1]; const char *prefix = pre->prefix; @@ -182,6 +197,7 @@ int __wasilibc_find_abspath(const char *path, *abs_prefix = prefix; } } + UNLOCK(lock); if (fd == -1) { errno = ENOENT; diff --git a/lib/libc/wasi/libc-top-half/musl/arch/wasm32/atomic_arch.h b/lib/libc/wasi/libc-top-half/musl/arch/wasm32/atomic_arch.h index 01412d40bb..dd9428c942 100644 --- a/lib/libc/wasi/libc-top-half/musl/arch/wasm32/atomic_arch.h +++ b/lib/libc/wasi/libc-top-half/musl/arch/wasm32/atomic_arch.h @@ -1,7 +1,3 @@ -#ifdef _REENTRANT -#error "multiple threads not supported in musl yet" -#endif - #define a_barrier() (__sync_synchronize()) #define a_cas(p, t, s) (__sync_val_compare_and_swap((p), (t), (s))) #define a_crash() (__builtin_trap()) diff --git a/lib/libc/wasi/libc-top-half/musl/arch/wasm32/pthread_arch.h b/lib/libc/wasi/libc-top-half/musl/arch/wasm32/pthread_arch.h index 80424d0ae0..58e76ab0a5 100644 --- a/lib/libc/wasi/libc-top-half/musl/arch/wasm32/pthread_arch.h +++ b/lib/libc/wasi/libc-top-half/musl/arch/wasm32/pthread_arch.h @@ -1,12 +1,5 @@ -#ifdef _REENTRANT -#error "multiple threads not supported in musl yet" -#endif +extern _Thread_local struct __pthread __wasilibc_pthread_self; -static inline struct pthread *__pthread_self(void) -{ - return (struct pthread *)-1; +static inline uintptr_t __get_tp() { + return (uintptr_t)&__wasilibc_pthread_self; } - -#define TP_ADJ(p) (p) - -#define tls_mod_off_t unsigned long long diff --git a/lib/libc/wasi/libc-top-half/musl/include/limits.h b/lib/libc/wasi/libc-top-half/musl/include/limits.h index a78cb2f7fb..2fc0d2a38a 100644 --- a/lib/libc/wasi/libc-top-half/musl/include/limits.h +++ b/lib/libc/wasi/libc-top-half/musl/include/limits.h @@ -70,7 +70,7 @@ #define PTHREAD_STACK_MIN 2048 #define PTHREAD_DESTRUCTOR_ITERATIONS 4 #endif -#ifdef __wasilibc_unmodified_upstream /* WASI has no semaphores */ +#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) #define SEM_VALUE_MAX 0x7fffffff #define SEM_NSEMS_MAX 256 #endif diff --git a/lib/libc/wasi/libc-top-half/musl/include/pthread.h b/lib/libc/wasi/libc-top-half/musl/include/pthread.h index b0801d4060..01fe5f2930 100644 --- a/lib/libc/wasi/libc-top-half/musl/include/pthread.h +++ b/lib/libc/wasi/libc-top-half/musl/include/pthread.h @@ -103,8 +103,10 @@ int pthread_setcanceltype(int, int *); void pthread_testcancel(void); int pthread_cancel(pthread_t); +#ifdef __wasilibc_unmodified_upstream /* WASI has no CPU scheduling support. */ int pthread_getschedparam(pthread_t, int *__restrict, struct sched_param *__restrict); int pthread_setschedparam(pthread_t, int, const struct sched_param *); +#endif int pthread_setschedprio(pthread_t, int); int pthread_once(pthread_once_t *, void (*)(void)); @@ -167,8 +169,10 @@ int pthread_attr_getscope(const pthread_attr_t *__restrict, int *__restrict); int pthread_attr_setscope(pthread_attr_t *, int); int pthread_attr_getschedpolicy(const pthread_attr_t *__restrict, int *__restrict); int pthread_attr_setschedpolicy(pthread_attr_t *, int); +#ifdef __wasilibc_unmodified_upstream /* WASI has no CPU scheduling support. */ int pthread_attr_getschedparam(const pthread_attr_t *__restrict, struct sched_param *__restrict); int pthread_attr_setschedparam(pthread_attr_t *__restrict, const struct sched_param *__restrict); +#endif int pthread_attr_getinheritsched(const pthread_attr_t *__restrict, int *__restrict); int pthread_attr_setinheritsched(pthread_attr_t *, int); diff --git a/lib/libc/wasi/libc-top-half/musl/include/unistd.h b/lib/libc/wasi/libc-top-half/musl/include/unistd.h index 9231d605c4..b5cb5c6630 100644 --- a/lib/libc/wasi/libc-top-half/musl/include/unistd.h +++ b/lib/libc/wasi/libc-top-half/musl/include/unistd.h @@ -244,7 +244,9 @@ void *sbrk(intptr_t); pid_t vfork(void); int vhangup(void); int chroot(const char *); +#endif int getpagesize(void); +#ifdef __wasilibc_unmodified_upstream /* WASI has no processes */ int getdtablesize(void); int sethostname(const char *, size_t); int getdomainname(char *, size_t); diff --git a/lib/libc/wasi/libc-top-half/musl/src/conf/sysconf.c b/lib/libc/wasi/libc-top-half/musl/src/conf/sysconf.c index 1a406e60b2..c72174f27c 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/conf/sysconf.c +++ b/lib/libc/wasi/libc-top-half/musl/src/conf/sysconf.c @@ -251,7 +251,7 @@ long sysconf(int name) return DELAYTIMER_MAX; case JT_NPROCESSORS_CONF & 255: case JT_NPROCESSORS_ONLN & 255: ; -#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) +#ifdef __wasilibc_unmodified_upstream unsigned char set[128] = {1}; int i, cnt; __syscall(SYS_sched_getaffinity, 0, sizeof set, set); @@ -259,7 +259,7 @@ long sysconf(int name) for (; set[i]; set[i]&=set[i]-1, cnt++); return cnt; #else - // With no thread support, just say there's 1 processor. + // WASI has no way to query the processor count return 1; #endif #ifdef __wasilibc_unmodified_upstream // WASI has no sysinfo diff --git a/lib/libc/wasi/libc-top-half/musl/src/env/__init_tls.c b/lib/libc/wasi/libc-top-half/musl/src/env/__init_tls.c index a93141ed36..ee785bc11e 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/env/__init_tls.c +++ b/lib/libc/wasi/libc-top-half/musl/src/env/__init_tls.c @@ -1,7 +1,11 @@ +#ifdef __wasilibc_unmodified_upstream #define SYSCALL_NO_TLS 1 #include <elf.h> +#endif #include <limits.h> +#ifdef __wasilibc_unmodified_upstream #include <sys/mman.h> +#endif #include <string.h> #include <stddef.h> #include "pthread_impl.h" @@ -11,15 +15,23 @@ volatile int __thread_list_lock; +#ifndef __wasilibc_unmodified_upstream +void __wasi_init_tp() { + __init_tp((void *)__get_tp()); +} +#endif + int __init_tp(void *p) { pthread_t td = p; td->self = td; +#ifdef __wasilibc_unmodified_upstream int r = __set_thread_area(TP_ADJ(p)); if (r < 0) return -1; if (!r) libc.can_do_threads = 1; td->detach_state = DT_JOINABLE; td->tid = __syscall(SYS_set_tid_address, &__thread_list_lock); +#endif td->locale = &libc.global_locale; td->robust_list.head = &td->robust_list.head; td->sysinfo = __sysinfo; @@ -27,6 +39,8 @@ int __init_tp(void *p) return 0; } +#ifdef __wasilibc_unmodified_upstream + static struct builtin_tls { char c; struct pthread pt; @@ -35,9 +49,15 @@ static struct builtin_tls { #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt) static struct tls_module main_tls; +#endif + +#ifndef __wasilibc_unmodified_upstream +extern void __wasm_init_tls(void*); +#endif void *__copy_tls(unsigned char *mem) { +#ifdef __wasilibc_unmodified_upstream pthread_t td; struct tls_module *p; size_t i; @@ -69,8 +89,20 @@ void *__copy_tls(unsigned char *mem) dtv[0] = libc.tls_cnt; td->dtv = dtv; return td; +#else + size_t tls_align = __builtin_wasm_tls_align(); + volatile void* tls_base = __builtin_wasm_tls_base(); + mem += tls_align; + mem -= (uintptr_t)mem & (tls_align-1); + __wasm_init_tls(mem); + __asm__("local.get %0\n" + "global.set __tls_base\n" + :: "r"(tls_base)); + return mem; +#endif } +#ifdef __wasilibc_unmodified_upstream #if ULONG_MAX == 0xffffffff typedef Elf32_Phdr Phdr; #else @@ -151,3 +183,4 @@ static void static_init_tls(size_t *aux) } weak_alias(static_init_tls, __init_tls); +#endif diff --git a/lib/libc/wasi/libc-top-half/musl/src/internal/libc.h b/lib/libc/wasi/libc-top-half/musl/src/internal/libc.h index 9b3984742b..355c3a4e23 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/internal/libc.h +++ b/lib/libc/wasi/libc-top-half/musl/src/internal/libc.h @@ -18,8 +18,10 @@ struct tls_module { }; struct __libc { -#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) +#ifdef __wasilibc_unmodified_upstream char can_do_threads; +#endif +#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) char threaded; #endif #ifdef __wasilibc_unmodified_upstream // WASI doesn't currently use any code that needs "secure" mode @@ -32,7 +34,7 @@ struct __libc { #ifdef __wasilibc_unmodified_upstream // WASI has no auxv size_t *auxv; #endif -#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) +#ifdef __wasilibc_unmodified_upstream // WASI use different TLS implement struct tls_module *tls_head; size_t tls_size, tls_align, tls_cnt; #endif diff --git a/lib/libc/wasi/libc-top-half/musl/src/internal/pthread_impl.h b/lib/libc/wasi/libc-top-half/musl/src/internal/pthread_impl.h index de2b9d8b47..a6d188bb3c 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/internal/pthread_impl.h +++ b/lib/libc/wasi/libc-top-half/musl/src/internal/pthread_impl.h @@ -2,12 +2,18 @@ #define _PTHREAD_IMPL_H #include <pthread.h> +#ifdef __wasilibc_unmodified_upstream #include <signal.h> +#endif #include <errno.h> #include <limits.h> +#ifdef __wasilibc_unmodified_upstream #include <sys/mman.h> +#endif #include "libc.h" +#ifdef __wasilibc_unmodified_upstream #include "syscall.h" +#endif #include "atomic.h" #include "futex.h" @@ -19,9 +25,11 @@ struct pthread { /* Part 1 -- these fields may be external or * internal (accessed via asm) ABI. Do not change. */ struct pthread *self; +#ifdef __wasilibc_unmodified_upstream #ifndef TLS_ABOVE_TP uintptr_t *dtv; #endif +#endif struct pthread *prev, *next; /* non-ABI */ uintptr_t sysinfo; #ifndef TLS_ABOVE_TP @@ -159,9 +167,14 @@ extern hidden volatile int __eintr_valid_flag; hidden int __clone(int (*)(void *), void *, int, void *, ...); hidden int __set_thread_area(void *); +#ifdef __wasilibc_unmodified_upstream /* WASI has no sigaction */ hidden int __libc_sigaction(int, const struct sigaction *, struct sigaction *); +#endif hidden void __unmapself(void *, size_t); +#ifndef __wasilibc_unmodified_upstream +hidden int __wasilibc_futex_wait(volatile void *, int, int, int64_t); +#endif hidden int __timedwait(volatile int *, int, clockid_t, const struct timespec *, int); hidden int __timedwait_cp(volatile int *, int, clockid_t, const struct timespec *, int); hidden void __wait(volatile int *, volatile int *, int, int); @@ -169,14 +182,22 @@ static inline void __wake(volatile void *addr, int cnt, int priv) { if (priv) priv = FUTEX_PRIVATE; if (cnt<0) cnt = INT_MAX; +#ifdef __wasilibc_unmodified_upstream __syscall(SYS_futex, addr, FUTEX_WAKE|priv, cnt) != -ENOSYS || __syscall(SYS_futex, addr, FUTEX_WAKE, cnt); +#else + __builtin_wasm_memory_atomic_notify((int*)addr, cnt); +#endif } static inline void __futexwait(volatile void *addr, int val, int priv) { +#ifdef __wasilibc_unmodified_upstream if (priv) priv = FUTEX_PRIVATE; __syscall(SYS_futex, addr, FUTEX_WAIT|priv, val, 0) != -ENOSYS || __syscall(SYS_futex, addr, FUTEX_WAIT, val, 0); +#else + __wait(addr, NULL, val, priv); +#endif } hidden void __acquire_ptc(void); diff --git a/lib/libc/wasi/libc-top-half/musl/src/internal/syscall.h b/lib/libc/wasi/libc-top-half/musl/src/internal/syscall.h index d5f294d437..32e0e8ac38 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/internal/syscall.h +++ b/lib/libc/wasi/libc-top-half/musl/src/internal/syscall.h @@ -1,3 +1,4 @@ +#ifdef __wasilibc_unmodified_upstream #ifndef _INTERNAL_SYSCALL_H #define _INTERNAL_SYSCALL_H @@ -396,3 +397,4 @@ hidden void __procfdname(char __buf[static 15+3*sizeof(int)], unsigned); hidden void *__vdsosym(const char *, const char *); #endif +#endif diff --git a/lib/libc/wasi/libc-top-half/musl/src/prng/random.c b/lib/libc/wasi/libc-top-half/musl/src/prng/random.c index 3326f09f0d..daac028d86 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/prng/random.c +++ b/lib/libc/wasi/libc-top-half/musl/src/prng/random.c @@ -23,7 +23,7 @@ static int n = 31; static int i = 3; static int j = 0; static uint32_t *x = init+1; -#ifdef __wasilibc_unmodified_upstream +#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) static volatile int lock[1]; volatile int *const __random_lockptr = lock; #endif diff --git a/lib/libc/wasi/libc-top-half/musl/src/string/memcpy.c b/lib/libc/wasi/libc-top-half/musl/src/string/memcpy.c index 06e88742b1..3cc7e28f3b 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/string/memcpy.c +++ b/lib/libc/wasi/libc-top-half/musl/src/string/memcpy.c @@ -4,6 +4,10 @@ void *memcpy(void *restrict dest, const void *restrict src, size_t n) { +#if defined(__wasm_bulk_memory__) + if (n > BULK_MEMORY_THRESHOLD) + return __builtin_memcpy(dest, src, n); +#endif unsigned char *d = dest; const unsigned char *s = src; diff --git a/lib/libc/wasi/libc-top-half/musl/src/string/memmove.c b/lib/libc/wasi/libc-top-half/musl/src/string/memmove.c index 5dc9cdb924..7376a520bb 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/string/memmove.c +++ b/lib/libc/wasi/libc-top-half/musl/src/string/memmove.c @@ -8,6 +8,10 @@ typedef __attribute__((__may_alias__)) size_t WT; void *memmove(void *dest, const void *src, size_t n) { +#if defined(__wasm_bulk_memory__) + if (n > BULK_MEMORY_THRESHOLD) + return __builtin_memmove(dest, src, n); +#endif char *d = dest; const char *s = src; diff --git a/lib/libc/wasi/libc-top-half/musl/src/string/memset.c b/lib/libc/wasi/libc-top-half/musl/src/string/memset.c index 5613a1486e..f64c9cf5ae 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/string/memset.c +++ b/lib/libc/wasi/libc-top-half/musl/src/string/memset.c @@ -3,6 +3,10 @@ void *memset(void *dest, int c, size_t n) { +#if defined(__wasm_bulk_memory__) + if (n > BULK_MEMORY_THRESHOLD) + return __builtin_memset(dest, c, n); +#endif unsigned char *s = dest; size_t k; diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/__timedwait.c b/lib/libc/wasi/libc-top-half/musl/src/thread/__timedwait.c index 666093be98..7d6f6be4ad 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/__timedwait.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/__timedwait.c @@ -5,6 +5,7 @@ #include "syscall.h" #include "pthread_impl.h" +#ifdef __wasilibc_unmodified_upstream #define IS32BIT(x) !((x)+0x80000000ULL>>32) #define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63)) @@ -28,6 +29,16 @@ static int __futex4_cp(volatile void *addr, int op, int val, const struct timesp static volatile int dummy = 0; weak_alias(dummy, __eintr_valid_flag); +#else +static int __futex4_cp(volatile void *addr, int op, int val, const struct timespec *to) +{ + int64_t max_wait_ns = -1; + if (to) { + max_wait_ns = (int64_t)(to->tv_sec * 1000000000 + to->tv_nsec); + } + return __wasilibc_futex_wait(addr, op, val, max_wait_ns); +} +#endif int __timedwait_cp(volatile int *addr, int val, clockid_t clk, const struct timespec *at, int priv) @@ -51,11 +62,13 @@ int __timedwait_cp(volatile int *addr, int val, r = -__futex4_cp(addr, FUTEX_WAIT|priv, val, top); if (r != EINTR && r != ETIMEDOUT && r != ECANCELED) r = 0; +#ifdef __wasilibc_unmodified_upstream /* Mitigate bug in old kernels wrongly reporting EINTR for non- * interrupting (SA_RESTART) signal handlers. This is only practical * when NO interrupting signal handlers have been installed, and * works by sigaction tracking whether that's the case. */ if (r == EINTR && !__eintr_valid_flag) r = 0; +#endif return r; } diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/__wait.c b/lib/libc/wasi/libc-top-half/musl/src/thread/__wait.c index dc33c1a309..c0e4aac796 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/__wait.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/__wait.c @@ -1,4 +1,38 @@ #include "pthread_impl.h" +#ifndef __wasilibc_unmodified_upstream +#include "assert.h" +#endif + +#ifndef __wasilibc_unmodified_upstream +// Use WebAssembly's `wait` instruction to implement a futex. Note that `op` is +// unused but retained as a parameter to match the original signature of the +// syscall and that, for `max_wait_ns`, -1 (or any negative number) means wait +// indefinitely. +// +// Adapted from Emscripten: see +// https://github.com/emscripten-core/emscripten/blob/058a9fff/system/lib/pthread/emscripten_futex_wait.c#L111-L150. +int __wasilibc_futex_wait(volatile void *addr, int op, int val, int64_t max_wait_ns) +{ + if ((((intptr_t)addr) & 3) != 0) { + return -EINVAL; + } + + int ret = __builtin_wasm_memory_atomic_wait32((int *)addr, val, max_wait_ns); + + // memory.atomic.wait32 returns: + // 0 => "ok", woken by another agent. + // 1 => "not-equal", loaded value != expected value + // 2 => "timed-out", the timeout expired + if (ret == 1) { + return -EWOULDBLOCK; + } + if (ret == 2) { + return -ETIMEDOUT; + } + assert(ret == 0); + return 0; +} +#endif void __wait(volatile int *addr, volatile int *waiters, int val, int priv) { @@ -10,8 +44,12 @@ void __wait(volatile int *addr, volatile int *waiters, int val, int priv) } if (waiters) a_inc(waiters); while (*addr==val) { +#ifdef __wasilibc_unmodified_upstream __syscall(SYS_futex, addr, FUTEX_WAIT|priv, val, 0) != -ENOSYS || __syscall(SYS_futex, addr, FUTEX_WAIT, val, 0); +#else + __wasilibc_futex_wait(addr, FUTEX_WAIT, val, 0); +#endif } if (waiters) a_dec(waiters); } diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_cond_timedwait.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_cond_timedwait.c index 6b761455c4..ba985f9116 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_cond_timedwait.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_cond_timedwait.c @@ -1,5 +1,9 @@ #include "pthread_impl.h" +#ifndef __wasilibc_unmodified_upstream +#include <common/clock.h> +#endif + /* * struct waiter * @@ -48,9 +52,15 @@ static inline void unlock(volatile int *l) static inline void unlock_requeue(volatile int *l, volatile int *r, int w) { a_store(l, 0); +#ifdef __wasilibc_unmodified_upstream if (w) __wake(l, 1, 1); else __syscall(SYS_futex, l, FUTEX_REQUEUE|FUTEX_PRIVATE, 0, 1, r) != -ENOSYS || __syscall(SYS_futex, l, FUTEX_REQUEUE, 0, 1, r); +#else + // Always wake due to lack of requeue system call in WASI + // This can impact the performance, so we might need to re-visit that decision + __wake(l, 1, 1); +#endif } enum { @@ -63,6 +73,9 @@ int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restri { struct waiter node = { 0 }; int e, seq, clock = c->_c_clock, cs, shared=0, oldstate, tmp; +#ifndef __wasilibc_unmodified_upstream + struct __clockid clock_id = { .id = clock }; +#endif volatile int *fut; if ((m->_m_type&15) && (m->_m_lock&INT_MAX) != __pthread_self()->tid) @@ -97,7 +110,11 @@ int __pthread_cond_timedwait(pthread_cond_t *restrict c, pthread_mutex_t *restri __pthread_setcancelstate(PTHREAD_CANCEL_MASKED, &cs); if (cs == PTHREAD_CANCEL_DISABLE) __pthread_setcancelstate(cs, 0); +#ifdef __wasilibc_unmodified_upstream do e = __timedwait_cp(fut, seq, clock, ts, !shared); +#else + do e = __timedwait_cp(fut, seq, &clock_id, ts, !shared); +#endif while (*fut==seq && (!e || e==EINTR)); if (e == EINTR) e = 0; diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_condattr_setclock.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_condattr_setclock.c index 7112594134..21ca070c3e 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_condattr_setclock.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_condattr_setclock.c @@ -1,9 +1,21 @@ #include "pthread_impl.h" +#ifndef __wasilibc_unmodified_upstream +#include <common/clock.h> +#endif + int pthread_condattr_setclock(pthread_condattr_t *a, clockid_t clk) { +#ifdef __wasilibc_unmodified_upstream if (clk < 0 || clk-2U < 2) return EINVAL; +#else + if (clk->id < 0 || clk->id-2U < 2) return EINVAL; +#endif a->__attr &= 0x80000000; +#ifdef __wasilibc_unmodified_upstream a->__attr |= clk; +#else + a->__attr |= clk->id; +#endif return 0; } diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_create.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_create.c index 6f187ee89d..1aa7be71a5 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_create.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_create.c @@ -3,9 +3,16 @@ #include "stdio_impl.h" #include "libc.h" #include "lock.h" +#ifdef __wasilibc_unmodified_upstream #include <sys/mman.h> +#endif #include <string.h> #include <stddef.h> +#ifndef __wasilibc_unmodified_upstream +#include <stdatomic.h> +#endif + +#include <stdalign.h> static void dummy_0() { @@ -14,8 +21,10 @@ weak_alias(dummy_0, __acquire_ptc); weak_alias(dummy_0, __release_ptc); weak_alias(dummy_0, __pthread_tsd_run_dtors); weak_alias(dummy_0, __do_orphaned_stdio_locks); +#ifdef __wasilibc_unmodified_upstream weak_alias(dummy_0, __dl_thread_cleanup); weak_alias(dummy_0, __membarrier_init); +#endif static int tl_lock_count; static int tl_lock_waiters; @@ -69,7 +78,9 @@ _Noreturn void __pthread_exit(void *result) __pthread_tsd_run_dtors(); +#ifdef __wasilibc_unmodified_upstream __block_app_sigs(&set); +#endif /* This atomic potentially competes with a concurrent pthread_detach * call; the loser is responsible for freeing thread resources. */ @@ -80,7 +91,9 @@ _Noreturn void __pthread_exit(void *result) * explicitly wait for vmlock holders first. This must be * done before any locks are taken, to avoid lock ordering * issues that could lead to deadlock. */ +#ifdef __wasilibc_unmodified_upstream __vm_wait(); +#endif } /* Access to target the exiting thread with syscalls that use @@ -101,16 +114,20 @@ _Noreturn void __pthread_exit(void *result) __tl_unlock(); UNLOCK(self->killlock); self->detach_state = state; +#ifdef __wasilibc_unmodified_upstream __restore_sigs(&set); +#endif exit(0); } /* At this point we are committed to thread termination. */ +#ifdef __wasilibc_unmodified_upstream /* Process robust list in userspace to handle non-pshared mutexes * and the detached thread case where the robust list head will * be invalid when the kernel would process it. */ __vm_lock(); +#endif volatile void *volatile *rp; while ((rp=self->robust_list.head) && rp != &self->robust_list.head) { pthread_mutex_t *m = (void *)((char *)rp @@ -124,10 +141,14 @@ _Noreturn void __pthread_exit(void *result) if (cont < 0 || waiters) __wake(&m->_m_lock, 1, priv); } +#ifdef __wasilibc_unmodified_upstream __vm_unlock(); +#endif __do_orphaned_stdio_locks(); +#ifdef __wasilibc_unmodified_upstream __dl_thread_cleanup(); +#endif /* Last, unlink thread from the list. This change will not be visible * until the lock is released, which only happens after SYS_exit @@ -139,6 +160,15 @@ _Noreturn void __pthread_exit(void *result) self->prev->next = self->next; self->prev = self->next = self; +#ifndef __wasilibc_unmodified_upstream + /* On Linux, the thread is created with CLONE_CHILD_CLEARTID, + * and this lock will unlock by kernel when this thread terminates. + * So we should unlock it here in WebAssembly. + * See also set_tid_address(2) */ + __tl_unlock(); +#endif + +#ifdef __wasilibc_unmodified_upstream if (state==DT_DETACHED && self->map_base) { /* Detached threads must block even implementation-internal * signals, since they will not have a stack in their last @@ -154,6 +184,16 @@ _Noreturn void __pthread_exit(void *result) * and then exits without touching the stack. */ __unmapself(self->map_base, self->map_size); } +#else + if (state==DT_DETACHED && self->map_base) { + // __syscall(SYS_exit) would unlock the thread, list + // do it manually here + __tl_unlock(); + free(self->map_base); + // Can't use `exit()` here, because it is too high level + for (;;) __wasi_proc_exit(0); + } +#endif /* Wake any joiner. */ a_store(&self->detach_state, DT_EXITED); @@ -165,7 +205,15 @@ _Noreturn void __pthread_exit(void *result) self->tid = 0; UNLOCK(self->killlock); +#ifdef __wasilibc_unmodified_upstream for (;;) __syscall(SYS_exit, 0); +#else + // __syscall(SYS_exit) would unlock the thread, list + // do it manually here + __tl_unlock(); + // Can't use `exit()` here, because it is too high level + for (;;) __wasi_proc_exit(0); +#endif } void __do_cleanup_push(struct __ptcb *cb) @@ -181,12 +229,19 @@ void __do_cleanup_pop(struct __ptcb *cb) } struct start_args { +#ifdef __wasilibc_unmodified_upstream void *(*start_func)(void *); void *start_arg; volatile int control; unsigned long sig_mask[_NSIG/8/sizeof(long)]; +#else + void *(*start_func)(void *); + void *start_arg; + void *tls_base; +#endif }; +#ifdef __wasilibc_unmodified_upstream static int start(void *p) { struct start_args *args = p; @@ -195,11 +250,15 @@ static int start(void *p) if (a_cas(&args->control, 1, 2)==1) __wait(&args->control, 0, 2, 1); if (args->control) { +#ifdef __wasilibc_unmodified_upstream __syscall(SYS_set_tid_address, &args->control); for (;;) __syscall(SYS_exit, 0); +#endif } } +#ifdef __wasilibc_unmodified_upstream __syscall(SYS_rt_sigprocmask, SIG_SETMASK, &args->sig_mask, 0, _NSIG/8); +#endif __pthread_exit(args->start_func(args->start_arg)); return 0; } @@ -211,6 +270,31 @@ static int start_c11(void *p) __pthread_exit((void *)(uintptr_t)start(args->start_arg)); return 0; } +#else +__attribute__((export_name("wasi_thread_start"))) +_Noreturn void wasi_thread_start(int tid, void *p) +{ + struct start_args *args = p; + __asm__(".globaltype __tls_base, i32\n" + "local.get %0\n" + "global.set __tls_base\n" + :: "r"(args->tls_base)); + pthread_t self = __pthread_self(); + // Set the thread ID (TID) on the pthread structure. The TID is stored + // atomically since it is also stored by the parent thread; this way, + // whichever thread (parent or child) reaches this point first can proceed + // without waiting. + atomic_store((atomic_int *) &(self->tid), tid); + // Set the stack pointer. + __asm__(".globaltype __stack_pointer, i32\n" + "local.get %0\n" + "global.set __stack_pointer\n" + :: "r"(self->stack)); + // Execute the user's start function. + int (*start)(void*) = (int(*)(void*)) args->start_func; + __pthread_exit((void *)(uintptr_t)start(args->start_arg)); +} +#endif #define ROUND(x) (((x)+PAGE_SIZE-1)&-PAGE_SIZE) @@ -236,13 +320,25 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att size_t size, guard; struct pthread *self, *new; unsigned char *map = 0, *stack = 0, *tsd = 0, *stack_limit; +#ifdef __wasilibc_unmodified_upstream unsigned flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | CLONE_DETACHED; +#endif pthread_attr_t attr = { 0 }; sigset_t set; - +#ifndef __wasilibc_unmodified_upstream + size_t tls_size = __builtin_wasm_tls_size(); + size_t tls_align = __builtin_wasm_tls_align(); + void* tls_base = __builtin_wasm_tls_base(); + void* new_tls_base; + size_t tls_offset; + tls_size += tls_align; +#endif + +#ifdef __wasilibc_unmodified_upstream if (!libc.can_do_threads) return ENOSYS; +#endif self = __pthread_self(); if (!libc.threaded) { for (FILE *f=*__ofl_lock(); f; f=f->next) @@ -251,9 +347,13 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att init_file_lock(__stdin_used); init_file_lock(__stdout_used); init_file_lock(__stderr_used); +#ifdef __wasilibc_unmodified_upstream __syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8); +#endif self->tsd = (void **)__pthread_tsd_main; +#ifdef __wasilibc_unmodified_upstream __membarrier_init(); +#endif libc.threaded = 1; } if (attrp && !c11) attr = *attrp; @@ -265,7 +365,11 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att } if (attr._a_stackaddr) { +#ifdef __wasilibc_unmodified_upstream size_t need = libc.tls_size + __pthread_tsd_size; +#else + size_t need = tls_size + __pthread_tsd_size; +#endif size = attr._a_stacksize; stack = (void *)(attr._a_stackaddr & -16); stack_limit = (void *)(attr._a_stackaddr - size); @@ -274,7 +378,11 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att * application's stack space. */ if (need < size/8 && need < 2048) { tsd = stack - __pthread_tsd_size; +#ifdef __wasilibc_unmodified_upstream stack = tsd - libc.tls_size; +#else + stack = tsd - tls_size; +#endif memset(stack, 0, need); } else { size = ROUND(need); @@ -283,10 +391,15 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att } else { guard = ROUND(attr._a_guardsize); size = guard + ROUND(attr._a_stacksize +#ifdef __wasilibc_unmodified_upstream + libc.tls_size + __pthread_tsd_size); +#else + + tls_size + __pthread_tsd_size); +#endif } if (!tsd) { +#ifdef __wasilibc_unmodified_upstream if (guard) { map = __mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANON, -1, 0); if (map == MAP_FAILED) goto fail; @@ -299,14 +412,28 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att map = __mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (map == MAP_FAILED) goto fail; } +#else + map = malloc(size); + if (!map) goto fail; +#endif tsd = map + size - __pthread_tsd_size; if (!stack) { +#ifdef __wasilibc_unmodified_upstream stack = tsd - libc.tls_size; +#else + stack = tsd - tls_size; +#endif stack_limit = map + guard; } } +#ifdef __wasilibc_unmodified_upstream new = __copy_tls(tsd - libc.tls_size); +#else + new_tls_base = __copy_tls(tsd - tls_size); + tls_offset = new_tls_base - tls_base; + new = (void*)((uintptr_t)self + tls_offset); +#endif new->map_base = map; new->map_size = size; new->stack = stack; @@ -327,6 +454,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att /* Setup argument structure for the new thread on its stack. * It's safe to access from the caller only until the thread * list is unlocked. */ +#ifdef __wasilibc_unmodified_upstream stack -= (uintptr_t)stack % sizeof(uintptr_t); stack -= sizeof(struct start_args); struct start_args *args = (void *)stack; @@ -345,11 +473,35 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att memcpy(&args->sig_mask, &set, sizeof args->sig_mask); args->sig_mask[(SIGCANCEL-1)/8/sizeof(long)] &= ~(1UL<<((SIGCANCEL-1)%(8*sizeof(long)))); +#else + /* Align the stack to struct start_args */ + stack -= sizeof(struct start_args); + stack -= (uintptr_t)stack % alignof(struct start_args); + struct start_args *args = (void *)stack; + + /* Align the stack to 16 and store it */ + new->stack = (void *)((uintptr_t) stack & -16); + /* Correct the stack size */ + new->stack_size = stack - stack_limit; + + args->start_func = entry; + args->start_arg = arg; + args->tls_base = (void*)new_tls_base; +#endif __tl_lock(); if (!libc.threads_minus_1++) libc.need_locks = 1; +#ifdef __wasilibc_unmodified_upstream ret = __clone((c11 ? start_c11 : start), stack, flags, args, &new->tid, TP_ADJ(new), &__thread_list_lock); - +#else + /* Instead of `__clone`, WASI uses a host API to instantiate a new version + * of the current module and start executing the entry function. The + * wasi-threads specification requires the module to export a + * `wasi_thread_start` function, which is invoked with `args`. */ + ret = __wasi_thread_spawn((void *) args); +#endif + +#ifdef __wasilibc_unmodified_upstream /* All clone failures translate to EAGAIN. If explicit scheduling * was requested, attempt it before unlocking the thread list so * that the failed thread is never exposed and so that we can @@ -364,6 +516,20 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att if (ret) __wait(&args->control, 0, 3, 0); } +#else + /* `wasi_thread_spawn` will either return a host-provided thread ID (TID) + * (`>= 0`) or an error code (`< 0`). As in the unmodified version, all + * spawn failures translate to EAGAIN; unlike the modified version, there is + * no need to "start up" the child thread--the host does this. If the spawn + * did succeed, then we store the TID atomically, since this parent thread + * is racing with the child thread to set this field; this way, whichever + * thread reaches this point first can continue without waiting. */ + if (ret < 0) { + ret = -EAGAIN; + } else { + atomic_store((atomic_int *) &(new->tid), ret); + } +#endif if (ret >= 0) { new->next = self->next; @@ -374,11 +540,17 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att if (!--libc.threads_minus_1) libc.need_locks = 0; } __tl_unlock(); +#ifdef __wasilibc_unmodified_upstream __restore_sigs(&set); +#endif __release_ptc(); if (ret < 0) { +#ifdef __wasilibc_unmodified_upstream if (map) __munmap(map, size); +#else + free(map); +#endif return -ret; } diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_join.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_join.c index 17dae85d70..b06e7e75ef 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_join.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_join.c @@ -1,6 +1,8 @@ #define _GNU_SOURCE #include "pthread_impl.h" +#ifdef __wasilibc_unmodified_upstream #include <sys/mman.h> +#endif static void dummy1(pthread_t t) { @@ -21,7 +23,11 @@ static int __pthread_timedjoin_np(pthread_t t, void **res, const struct timespec if (r == ETIMEDOUT || r == EINVAL) return r; __tl_sync(t); if (res) *res = t->result; +#ifdef __wasilibc_unmodified_upstream if (t->map_base) __munmap(t->map_base, t->map_size); +#else + if (t->map_base) free(t->map_base); +#endif return 0; } diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_destroy.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_destroy.c index 8d1bf77b87..e53c39c684 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_destroy.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_destroy.c @@ -2,9 +2,17 @@ int pthread_mutex_destroy(pthread_mutex_t *mutex) { +#ifdef __wasilibc_unmodified_upstream /* If the mutex being destroyed is process-shared and has nontrivial * type (tracking ownership), it might be in the pending slot of a * robust_list; wait for quiescence. */ if (mutex->_m_type > 128) __vm_wait(); +#else + /* For now, wasi-libc chooses to avoid implementing robust mutex support + * though this could be added later. The error code indicates that the + * mutex was an invalid type, but it would be more accurate as + * "unimplemented". */ + if (mutex->_m_type > 128) return EINVAL; +#endif return 0; } diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_timedlock.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_timedlock.c index 9279fc5430..d22196a55a 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_timedlock.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_timedlock.c @@ -1,5 +1,6 @@ #include "pthread_impl.h" +#ifdef __wasilibc_unmodified_upstream #define IS32BIT(x) !((x)+0x80000000ULL>>32) #define CLAMP(x) (int)(IS32BIT(x) ? (x) : 0x7fffffffU+((0ULL+(x))>>63)) @@ -52,6 +53,7 @@ static int pthread_mutex_timedlock_pi(pthread_mutex_t *restrict m, const struct while (e != ETIMEDOUT); return e; } +#endif int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec *restrict at) { @@ -65,8 +67,10 @@ int __pthread_mutex_timedlock(pthread_mutex_t *restrict m, const struct timespec r = __pthread_mutex_trylock(m); if (r != EBUSY) return r; +#ifdef __wasilibc_unmodified_upstream if (type&8) return pthread_mutex_timedlock_pi(m, at); - +#endif + int spins = 100; while (spins-- && m->_m_lock && !m->_m_waiters) a_spin(); diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_trylock.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_trylock.c index a24e7c58ac..c60b45feef 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_trylock.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_trylock.c @@ -27,7 +27,9 @@ int __pthread_mutex_trylock_owner(pthread_mutex_t *m) if (type & 128) { if (!self->robust_list.off) { self->robust_list.off = (char*)&m->_m_lock-(char *)&m->_m_next; +#ifdef __wasilibc_unmodified_upstream __syscall(SYS_set_robust_list, &self->robust_list, 3*sizeof(long)); +#endif } if (m->_m_waiters) tid |= 0x80000000; self->robust_list.pending = &m->_m_next; @@ -43,7 +45,9 @@ int __pthread_mutex_trylock_owner(pthread_mutex_t *m) success: if ((type&8) && m->_m_waiters) { int priv = (type & 128) ^ 128; +#ifdef __wasilibc_unmodified_upstream __syscall(SYS_futex, &m->_m_lock, FUTEX_UNLOCK_PI|priv); +#endif self->robust_list.pending = 0; return (type&4) ? ENOTRECOVERABLE : EBUSY; } diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_unlock.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_unlock.c index b66423e6c3..6beaacbc64 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_unlock.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutex_unlock.c @@ -22,7 +22,9 @@ int __pthread_mutex_unlock(pthread_mutex_t *m) new = 0x7fffffff; if (!priv) { self->robust_list.pending = &m->_m_next; +#ifdef __wasilibc_unmodified_upstream __vm_lock(); +#endif } volatile void *prev = m->_m_prev; volatile void *next = m->_m_next; @@ -30,6 +32,7 @@ int __pthread_mutex_unlock(pthread_mutex_t *m) if (next != &self->robust_list.head) *(volatile void *volatile *) ((char *)next - sizeof(void *)) = prev; } +#ifdef __wasilibc_unmodified_upstream if (type&8) { if (old<0 || a_cas(&m->_m_lock, old, new)!=old) { if (new) a_store(&m->_m_waiters, -1); @@ -40,9 +43,14 @@ int __pthread_mutex_unlock(pthread_mutex_t *m) } else { cont = a_swap(&m->_m_lock, new); } +#else + cont = a_swap(&m->_m_lock, new); +#endif if (type != PTHREAD_MUTEX_NORMAL && !priv) { self->robust_list.pending = 0; +#ifdef __wasilibc_unmodified_upstream __vm_unlock(); +#endif } if (waiters || cont<0) __wake(&m->_m_lock, 1, priv); diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutexattr_setprotocol.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutexattr_setprotocol.c index 8b80c1ce9b..84b02ba130 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutexattr_setprotocol.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutexattr_setprotocol.c @@ -11,6 +11,7 @@ int pthread_mutexattr_setprotocol(pthread_mutexattr_t *a, int protocol) a->__attr &= ~8; return 0; case PTHREAD_PRIO_INHERIT: +#ifdef __wasilibc_unmodified_upstream r = check_pi_result; if (r < 0) { volatile int lk = 0; @@ -20,6 +21,9 @@ int pthread_mutexattr_setprotocol(pthread_mutexattr_t *a, int protocol) if (r) return r; a->__attr |= 8; return 0; +#else + return ENOTSUP; +#endif case PTHREAD_PRIO_PROTECT: return ENOTSUP; default: diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutexattr_setrobust.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutexattr_setrobust.c index 30a9ac3bea..649a89130c 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutexattr_setrobust.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_mutexattr_setrobust.c @@ -5,6 +5,7 @@ static volatile int check_robust_result = -1; int pthread_mutexattr_setrobust(pthread_mutexattr_t *a, int robust) { +#ifdef __wasilibc_unmodified_upstream if (robust > 1U) return EINVAL; if (robust) { int r = check_robust_result; @@ -20,4 +21,7 @@ int pthread_mutexattr_setrobust(pthread_mutexattr_t *a, int robust) } a->__attr &= ~4; return 0; +#else + return EINVAL; +#endif } diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_self.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_self.c index bd3bf95bb7..1f3eee1d16 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_self.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_self.c @@ -1,6 +1,11 @@ #include "pthread_impl.h" #include <threads.h> +#if !defined(__wasilibc_unmodified_upstream) && defined(__wasm__) && \ + defined(_REENTRANT) +_Thread_local struct pthread __wasilibc_pthread_self; +#endif + static pthread_t __pthread_self_internal() { return __pthread_self(); diff --git a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_setcancelstate.c b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_setcancelstate.c index 5ab8c338f7..4f7a00e585 100644 --- a/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_setcancelstate.c +++ b/lib/libc/wasi/libc-top-half/musl/src/thread/pthread_setcancelstate.c @@ -2,10 +2,12 @@ int __pthread_setcancelstate(int new, int *old) { +#if defined(__wasilibc_unmodified_upstream) || defined(_REENTRANT) if (new > 2U) return EINVAL; struct pthread *self = __pthread_self(); if (old) *old = self->canceldisable; self->canceldisable = new; +#endif return 0; } diff --git a/lib/std/Thread/Condition.zig b/lib/std/Thread/Condition.zig index 3625aab576..70ad0728eb 100644 --- a/lib/std/Thread/Condition.zig +++ b/lib/std/Thread/Condition.zig @@ -604,77 +604,3 @@ test "Condition - broadcasting - wake all threads" { } } } - -test "Condition - signal wakes one" { - // This test requires spawning threads - if (builtin.single_threaded) { - return error.SkipZigTest; - } - - var num_runs: usize = 1; - const num_threads = 3; - const timeoutDelay = 10 * std.time.ns_per_ms; - - while (num_runs > 0) : (num_runs -= 1) { - - // Start multiple runner threads, wait for them to start and send the signal - // then. Expect that one thread wake up and all other times out. - // - // Test depends on delay in timedWait! If too small all threads can timeout - // before any one gets wake up. - - const Runner = struct { - mutex: Mutex = .{}, - cond: Condition = .{}, - completed: Condition = .{}, - count: usize = 0, - threads: [num_threads]std.Thread = undefined, - wakeups: usize = 0, - timeouts: usize = 0, - - fn run(self: *@This()) void { - self.mutex.lock(); - defer self.mutex.unlock(); - - // The last started thread tells the main test thread it's completed. - self.count += 1; - if (self.count == num_threads) { - self.completed.signal(); - } - - self.cond.timedWait(&self.mutex, timeoutDelay) catch { - self.timeouts += 1; - return; - }; - self.wakeups += 1; - } - }; - - // Start threads - var runner = Runner{}; - for (runner.threads) |*t| { - t.* = try std.Thread.spawn(.{}, Runner.run, .{&runner}); - } - - { - runner.mutex.lock(); - defer runner.mutex.unlock(); - - // Wait for all the threads to spawn. - // timedWait() to detect any potential deadlocks. - while (runner.count != num_threads) { - try runner.completed.timedWait(&runner.mutex, 1 * std.time.ns_per_s); - } - // Signal one thread, the others should get timeout. - runner.cond.signal(); - } - - for (runner.threads) |t| { - t.join(); - } - - // Expect that only one got singal - try std.testing.expectEqual(runner.wakeups, 1); - try std.testing.expectEqual(runner.timeouts, num_threads - 1); - } -} diff --git a/lib/std/Thread/Futex.zig b/lib/std/Thread/Futex.zig index 58f49c483e..15ef35698e 100644 --- a/lib/std/Thread/Futex.zig +++ b/lib/std/Thread/Futex.zig @@ -721,7 +721,7 @@ const PosixImpl = struct { // then cut off the zero bits from the alignment to get the unique address. const addr = @ptrToInt(ptr); assert(addr & (alignment - 1) == 0); - return addr >> @ctz(alignment); + return addr >> @ctz(@as(usize, alignment)); } }; diff --git a/lib/std/crypto/25519/ed25519.zig b/lib/std/crypto/25519/ed25519.zig index 149191040a..7d136fc12d 100644 --- a/lib/std/crypto/25519/ed25519.zig +++ b/lib/std/crypto/25519/ed25519.zig @@ -181,7 +181,7 @@ pub const Ed25519 = struct { const hram = Curve.scalar.reduce64(hram64); const sb_ah = try Curve.basePoint.mulDoubleBasePublic(self.s, self.a.neg(), hram); - if (self.expected_r.sub(sb_ah).clearCofactor().rejectIdentity()) |_| { + if (self.expected_r.sub(sb_ah).rejectLowOrder()) { return error.SignatureVerificationFailed; } else |_| {} } diff --git a/lib/std/crypto/25519/edwards25519.zig b/lib/std/crypto/25519/edwards25519.zig index f7b07738a2..840f4b67d5 100644 --- a/lib/std/crypto/25519/edwards25519.zig +++ b/lib/std/crypto/25519/edwards25519.zig @@ -83,6 +83,19 @@ pub const Edwards25519 = struct { return p.dbl().dbl().dbl(); } + /// Check that the point does not generate a low-order group. + /// Return a `WeakPublicKey` error if it does. + pub fn rejectLowOrder(p: Edwards25519) WeakPublicKeyError!void { + const zi = p.z.invert(); + const x = p.x.mul(zi); + const y = p.y.mul(zi); + const x_neg = x.neg(); + const iy = Fe.sqrtm1.mul(y); + if (x.isZero() or y.isZero() or iy.equivalent(x) or iy.equivalent(x_neg)) { + return error.WeakPublicKey; + } + } + /// Flip the sign of the X coordinate. pub inline fn neg(p: Edwards25519) Edwards25519 { return .{ .x = p.x.neg(), .y = p.y, .z = p.z, .t = p.t.neg() }; diff --git a/lib/std/fs/path.zig b/lib/std/fs/path.zig index feacf38daf..f4b5a3cf6e 100644 --- a/lib/std/fs/path.zig +++ b/lib/std/fs/path.zig @@ -462,7 +462,6 @@ pub fn resolve(allocator: Allocator, paths: []const []const u8) ![]u8 { /// This function is like a series of `cd` statements executed one after another. /// It resolves "." and "..". /// The result does not have a trailing path separator. -/// If all paths are relative it uses the current working directory as a starting point. /// Each drive has its own current working directory. /// Path separators are canonicalized to '\\' and drives are canonicalized to capital letters. /// Note: all usage of this function should be audited due to the existence of symlinks. @@ -572,15 +571,15 @@ pub fn resolveWindows(allocator: Allocator, paths: []const []const u8) ![]u8 { continue; } var it = mem.tokenize(u8, p[parsed.disk_designator.len..], "/\\"); - component: while (it.next()) |component| { + while (it.next()) |component| { if (mem.eql(u8, component, ".")) { continue; } else if (mem.eql(u8, component, "..")) { + if (result.items.len == 0) { + negative_count += 1; + continue; + } while (true) { - if (result.items.len == 0) { - negative_count += 1; - continue :component; - } if (result.items.len == disk_designator_len) { break; } @@ -589,7 +588,7 @@ pub fn resolveWindows(allocator: Allocator, paths: []const []const u8) ![]u8 { else => false, }; result.items.len -= 1; - if (end_with_sep) break; + if (end_with_sep or result.items.len == 0) break; } } else if (!have_abs_path and result.items.len == 0) { try result.appendSlice(component); @@ -659,18 +658,18 @@ pub fn resolvePosix(allocator: Allocator, paths: []const []const u8) Allocator.E result.clearRetainingCapacity(); } var it = mem.tokenize(u8, p, "/"); - component: while (it.next()) |component| { + while (it.next()) |component| { if (mem.eql(u8, component, ".")) { continue; } else if (mem.eql(u8, component, "..")) { + if (result.items.len == 0) { + negative_count += @boolToInt(!is_abs); + continue; + } while (true) { - if (result.items.len == 0) { - negative_count += @boolToInt(!is_abs); - continue :component; - } const ends_with_slash = result.items[result.items.len - 1] == '/'; result.items.len -= 1; - if (ends_with_slash) break; + if (ends_with_slash or result.items.len == 0) break; } } else if (result.items.len > 0 or is_abs) { try result.ensureUnusedCapacity(1 + component.len); @@ -717,10 +716,10 @@ pub fn resolvePosix(allocator: Allocator, paths: []const []const u8) Allocator.E } test "resolve" { - try testResolveWindows(&[_][]const u8{ "a\\b\\c\\", "..\\..\\.." }, ".."); + try testResolveWindows(&[_][]const u8{ "a\\b\\c\\", "..\\..\\.." }, "."); try testResolveWindows(&[_][]const u8{"."}, "."); - try testResolvePosix(&[_][]const u8{ "a/b/c/", "../../.." }, ".."); + try testResolvePosix(&[_][]const u8{ "a/b/c/", "../../.." }, "."); try testResolvePosix(&[_][]const u8{"."}, "."); } @@ -753,19 +752,21 @@ test "resolveWindows" { } test "resolvePosix" { - try testResolvePosix(&[_][]const u8{ "/a/b", "c" }, "/a/b/c"); - try testResolvePosix(&[_][]const u8{ "/a/b", "c", "//d", "e///" }, "/d/e"); - try testResolvePosix(&[_][]const u8{ "/a/b/c", "..", "../" }, "/a"); - try testResolvePosix(&[_][]const u8{ "/", "..", ".." }, "/"); - try testResolvePosix(&[_][]const u8{"/a/b/c/"}, "/a/b/c"); + try testResolvePosix(&.{ "/a/b", "c" }, "/a/b/c"); + try testResolvePosix(&.{ "/a/b", "c", "//d", "e///" }, "/d/e"); + try testResolvePosix(&.{ "/a/b/c", "..", "../" }, "/a"); + try testResolvePosix(&.{ "/", "..", ".." }, "/"); + try testResolvePosix(&.{"/a/b/c/"}, "/a/b/c"); - try testResolvePosix(&[_][]const u8{ "/var/lib", "../", "file/" }, "/var/file"); - try testResolvePosix(&[_][]const u8{ "/var/lib", "/../", "file/" }, "/file"); - try testResolvePosix(&[_][]const u8{ "/some/dir", ".", "/absolute/" }, "/absolute"); - try testResolvePosix(&[_][]const u8{ "/foo/tmp.3/", "../tmp.3/cycles/root.js" }, "/foo/tmp.3/cycles/root.js"); + try testResolvePosix(&.{ "/var/lib", "../", "file/" }, "/var/file"); + try testResolvePosix(&.{ "/var/lib", "/../", "file/" }, "/file"); + try testResolvePosix(&.{ "/some/dir", ".", "/absolute/" }, "/absolute"); + try testResolvePosix(&.{ "/foo/tmp.3/", "../tmp.3/cycles/root.js" }, "/foo/tmp.3/cycles/root.js"); // Keep relative paths relative. - try testResolvePosix(&[_][]const u8{"a/b"}, "a/b"); + try testResolvePosix(&.{"a/b"}, "a/b"); + try testResolvePosix(&.{"."}, "."); + try testResolvePosix(&.{ ".", "src/test.zig", "..", "../test/cases.zig" }, "test/cases.zig"); } fn testResolveWindows(paths: []const []const u8, expected: []const u8) !void { diff --git a/lib/std/heap.zig b/lib/std/heap.zig index 55d295b016..f17f6728f8 100644 --- a/lib/std/heap.zig +++ b/lib/std/heap.zig @@ -855,6 +855,9 @@ pub const FixedBufferAllocator = struct { pub const ThreadSafeFixedBufferAllocator = @compileError("ThreadSafeFixedBufferAllocator has been replaced with `threadSafeAllocator` on FixedBufferAllocator"); +/// Returns a `StackFallbackAllocator` allocating using either a +/// `FixedBufferAllocator` on an array of size `size` and falling back to +/// `fallback_allocator` if that fails. pub fn stackFallback(comptime size: usize, fallback_allocator: Allocator) StackFallbackAllocator(size) { return StackFallbackAllocator(size){ .buffer = undefined, @@ -863,6 +866,10 @@ pub fn stackFallback(comptime size: usize, fallback_allocator: Allocator) StackF }; } +/// An allocator that attempts to allocate using a +/// `FixedBufferAllocator` using an array of size `size`. If the +/// allocation fails, it will fall back to using +/// `fallback_allocator`. Easily created with `stackFallback`. pub fn StackFallbackAllocator(comptime size: usize) type { return struct { const Self = @This(); diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index 1040a6dc8a..9cf31ab6c4 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -238,12 +238,14 @@ pub const Mutable = struct { self.limbs[0] = w_value; } else { var i: usize = 0; - while (w_value != 0) : (i += 1) { + while (true) : (i += 1) { self.limbs[i] = @truncate(Limb, w_value); // TODO: shift == 64 at compile-time fails. Fails on u128 limbs. w_value >>= limb_bits / 2; w_value >>= limb_bits / 2; + + if (w_value == 0) break; } } }, @@ -256,11 +258,13 @@ pub const Mutable = struct { const mask = (1 << limb_bits) - 1; comptime var i = 0; - inline while (w_value != 0) : (i += 1) { + inline while (true) : (i += 1) { self.limbs[i] = w_value & mask; w_value >>= limb_bits / 2; w_value >>= limb_bits / 2; + + if (w_value == 0) break; } } }, diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig index 57211ae299..13c7a5a352 100644 --- a/lib/std/math/big/int_test.zig +++ b/lib/std/math/big/int_test.zig @@ -69,6 +69,14 @@ test "big.int set negative minimum" { try testing.expect((try a.to(i64)) == minInt(i64)); } +test "big.int set double-width maximum then zero" { + var a = try Managed.initSet(testing.allocator, maxInt(DoubleLimb)); + defer a.deinit(); + try a.set(@as(DoubleLimb, 0)); + + try testing.expectEqual(@as(DoubleLimb, 0), try a.to(DoubleLimb)); +} + test "big.int to target too small error" { var a = try Managed.initSet(testing.allocator, 0xffffffff); defer a.deinit(); diff --git a/lib/std/os.zig b/lib/std/os.zig index edea8f1620..7ebe415026 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -2999,15 +2999,19 @@ pub fn chdir(dir_path: []const u8) ChangeCurDirError!void { if (builtin.os.tag == .wasi and !builtin.link_libc) { var buf: [MAX_PATH_BYTES]u8 = undefined; var alloc = std.heap.FixedBufferAllocator.init(&buf); - const path = try fs.resolve(alloc.allocator(), &.{ wasi_cwd.cwd, dir_path }); + const path = fs.path.resolve(alloc.allocator(), &.{ wasi_cwd.cwd, dir_path }) catch |err| switch (err) { + error.OutOfMemory => return error.NameTooLong, + else => |e| return e, + }; const dirinfo = try fstatat(AT.FDCWD, path, 0); if (dirinfo.filetype != .DIRECTORY) { return error.NotDir; } + // This copy is guaranteed to succeed, since buf and path_buffer are the same size. var cwd_alloc = std.heap.FixedBufferAllocator.init(&wasi_cwd.path_buffer); - wasi_cwd.cwd = try cwd_alloc.allocator().dupe(u8, path); + wasi_cwd.cwd = cwd_alloc.allocator().dupe(u8, path) catch unreachable; return; } else if (builtin.os.tag == .windows) { var utf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined; diff --git a/lib/std/os/linux/bpf.zig b/lib/std/os/linux/bpf.zig index 638c792409..db6473d673 100644 --- a/lib/std/os/linux/bpf.zig +++ b/lib/std/os/linux/bpf.zig @@ -667,7 +667,6 @@ pub const Insn = packed struct { } pub fn st(comptime size: Size, dst: Reg, off: i16, imm: i32) Insn { - if (size == .double_word) @compileError("TODO: need to determine how to correctly handle double words"); return Insn{ .code = MEM | @enumToInt(size) | ST, .dst = @enumToInt(dst), @@ -1585,6 +1584,27 @@ pub fn map_delete_elem(fd: fd_t, key: []const u8) !void { } } +pub fn map_get_next_key(fd: fd_t, key: []const u8, next_key: []u8) !bool { + var attr = Attr{ + .map_elem = std.mem.zeroes(MapElemAttr), + }; + + attr.map_elem.map_fd = fd; + attr.map_elem.key = @ptrToInt(key.ptr); + attr.map_elem.result.next_key = @ptrToInt(next_key.ptr); + + const rc = linux.bpf(.map_get_next_key, &attr, @sizeOf(MapElemAttr)); + switch (errno(rc)) { + .SUCCESS => return true, + .BADF => return error.BadFd, + .FAULT => unreachable, + .INVAL => return error.FieldInAttrNeedsZeroing, + .NOENT => return false, + .PERM => return error.AccessDenied, + else => |err| return unexpectedErrno(err), + } +} + test "map lookup, update, and delete" { const key_size = 4; const value_size = 4; @@ -1605,6 +1625,16 @@ test "map lookup, update, and delete" { const second_key = [key_size]u8{ 0, 0, 0, 1 }; try expectError(error.ReachedMaxEntries, map_update_elem(map, &second_key, &value, 0)); + // succeed at iterating all keys of map + var lookup_key = [_]u8{ 1, 0, 0, 0 }; + var next_key = [_]u8{ 2, 3, 4, 5 }; // garbage value + const status = try map_get_next_key(map, &lookup_key, &next_key); + try expectEqual(status, true); + try expectEqual(next_key, key); + std.mem.copy(u8, &lookup_key, &next_key); + const status2 = try map_get_next_key(map, &lookup_key, &next_key); + try expectEqual(status2, false); + // succeed at deleting an existing elem try map_delete_elem(map, &key); try expectError(error.NotFound, map_lookup_elem(map, &key, &value)); diff --git a/lib/std/os/test.zig b/lib/std/os/test.zig index c89026b5de..86f25fc8c6 100644 --- a/lib/std/os/test.zig +++ b/lib/std/os/test.zig @@ -22,7 +22,8 @@ const Dir = std.fs.Dir; const ArenaAllocator = std.heap.ArenaAllocator; test "chdir smoke test" { - if (native_os == .wasi) return error.SkipZigTest; // WASI doesn't allow navigating outside of a preopen + if (native_os == .wasi and builtin.link_libc) return error.SkipZigTest; + if (native_os == .wasi and !builtin.link_libc) try os.initPreopensWasi(std.heap.page_allocator, "/preopens/cwd"); // Get current working directory path var old_cwd_buf: [fs.MAX_PATH_BYTES]u8 = undefined; @@ -35,16 +36,42 @@ test "chdir smoke test" { const new_cwd = try os.getcwd(new_cwd_buf[0..]); try expect(mem.eql(u8, old_cwd, new_cwd)); } - { - // Next, change current working directory to one level above + + // Next, change current working directory to one level above + if (native_os != .wasi) { // WASI does not support navigating outside of Preopens const parent = fs.path.dirname(old_cwd) orelse unreachable; // old_cwd should be absolute try os.chdir(parent); + // Restore cwd because process may have other tests that do not tolerate chdir. defer os.chdir(old_cwd) catch unreachable; + var new_cwd_buf: [fs.MAX_PATH_BYTES]u8 = undefined; const new_cwd = try os.getcwd(new_cwd_buf[0..]); try expect(mem.eql(u8, parent, new_cwd)); } + + // Next, change current working directory to a temp directory one level below + { + // Create a tmp directory + var tmp_dir_buf: [fs.MAX_PATH_BYTES]u8 = undefined; + var tmp_dir_path = path: { + var allocator = std.heap.FixedBufferAllocator.init(&tmp_dir_buf); + break :path try fs.path.resolve(allocator.allocator(), &[_][]const u8{ old_cwd, "zig-test-tmp" }); + }; + var tmp_dir = try fs.cwd().makeOpenPath("zig-test-tmp", .{}); + + // Change current working directory to tmp directory + try os.chdir("zig-test-tmp"); + + var new_cwd_buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const new_cwd = try os.getcwd(new_cwd_buf[0..]); + try expect(mem.eql(u8, tmp_dir_path, new_cwd)); + + // Restore cwd because process may have other tests that do not tolerate chdir. + tmp_dir.close(); + os.chdir(old_cwd) catch unreachable; + try fs.cwd().deleteDir("zig-test-tmp"); + } } test "open smoke test" { diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index ba9979cbb4..93ec10e8be 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -134,6 +134,7 @@ pub fn OpenFile(sub_path_w: []const u16, options: OpenFileOptions) OpenError!HAN .OBJECT_NAME_COLLISION => return error.PathAlreadyExists, .FILE_IS_A_DIRECTORY => return error.IsDir, .NOT_A_DIRECTORY => return error.NotDir, + .USER_MAPPED_FILE => return error.AccessDenied, .INVALID_HANDLE => unreachable, else => return unexpectedStatus(rc), } @@ -2088,6 +2089,7 @@ pub const LPWSTR = [*:0]WCHAR; pub const LPCWSTR = [*:0]const WCHAR; pub const PVOID = *anyopaque; pub const PWSTR = [*:0]WCHAR; +pub const PCWSTR = [*:0]const WCHAR; pub const SIZE_T = usize; pub const UINT = c_uint; pub const ULONG_PTR = usize; @@ -2103,6 +2105,7 @@ pub const USHORT = u16; pub const SHORT = i16; pub const ULONG = u32; pub const LONG = i32; +pub const ULONG64 = u64; pub const ULONGLONG = u64; pub const LONGLONG = i64; pub const HLOCAL = HANDLE; @@ -2503,6 +2506,7 @@ pub const STANDARD_RIGHTS_READ = READ_CONTROL; pub const STANDARD_RIGHTS_WRITE = READ_CONTROL; pub const STANDARD_RIGHTS_EXECUTE = READ_CONTROL; pub const STANDARD_RIGHTS_REQUIRED = DELETE | READ_CONTROL | WRITE_DAC | WRITE_OWNER; +pub const MAXIMUM_ALLOWED = 0x02000000; // disposition for NtCreateFile pub const FILE_SUPERSEDE = 0; @@ -2871,9 +2875,143 @@ pub const PROV_RSA_FULL = 1; pub const REGSAM = ACCESS_MASK; pub const ACCESS_MASK = DWORD; -pub const HKEY = *opaque {}; pub const LSTATUS = LONG; +pub const HKEY = *opaque {}; + +pub const HKEY_LOCAL_MACHINE: HKEY = @intToPtr(HKEY, 0x80000002); + +/// Combines the STANDARD_RIGHTS_REQUIRED, KEY_QUERY_VALUE, KEY_SET_VALUE, KEY_CREATE_SUB_KEY, +/// KEY_ENUMERATE_SUB_KEYS, KEY_NOTIFY, and KEY_CREATE_LINK access rights. +pub const KEY_ALL_ACCESS = 0xF003F; +/// Reserved for system use. +pub const KEY_CREATE_LINK = 0x0020; +/// Required to create a subkey of a registry key. +pub const KEY_CREATE_SUB_KEY = 0x0004; +/// Required to enumerate the subkeys of a registry key. +pub const KEY_ENUMERATE_SUB_KEYS = 0x0008; +/// Equivalent to KEY_READ. +pub const KEY_EXECUTE = 0x20019; +/// Required to request change notifications for a registry key or for subkeys of a registry key. +pub const KEY_NOTIFY = 0x0010; +/// Required to query the values of a registry key. +pub const KEY_QUERY_VALUE = 0x0001; +/// Combines the STANDARD_RIGHTS_READ, KEY_QUERY_VALUE, KEY_ENUMERATE_SUB_KEYS, and KEY_NOTIFY values. +pub const KEY_READ = 0x20019; +/// Required to create, delete, or set a registry value. +pub const KEY_SET_VALUE = 0x0002; +/// Indicates that an application on 64-bit Windows should operate on the 32-bit registry view. +/// This flag is ignored by 32-bit Windows. +pub const KEY_WOW64_32KEY = 0x0200; +/// Indicates that an application on 64-bit Windows should operate on the 64-bit registry view. +/// This flag is ignored by 32-bit Windows. +pub const KEY_WOW64_64KEY = 0x0100; +/// Combines the STANDARD_RIGHTS_WRITE, KEY_SET_VALUE, and KEY_CREATE_SUB_KEY access rights. +pub const KEY_WRITE = 0x20006; + +/// Open symbolic link. +pub const REG_OPTION_OPEN_LINK: DWORD = 0x8; + +pub const RTL_QUERY_REGISTRY_TABLE = extern struct { + QueryRoutine: RTL_QUERY_REGISTRY_ROUTINE, + Flags: ULONG, + Name: ?PWSTR, + EntryContext: ?*anyopaque, + DefaultType: ULONG, + DefaultData: ?*anyopaque, + DefaultLength: ULONG, +}; + +pub const RTL_QUERY_REGISTRY_ROUTINE = ?std.meta.FnPtr(fn ( + PWSTR, + ULONG, + ?*anyopaque, + ULONG, + ?*anyopaque, + ?*anyopaque, +) callconv(WINAPI) NTSTATUS); + +/// Path is a full path +pub const RTL_REGISTRY_ABSOLUTE = 0; +/// \Registry\Machine\System\CurrentControlSet\Services +pub const RTL_REGISTRY_SERVICES = 1; +/// \Registry\Machine\System\CurrentControlSet\Control +pub const RTL_REGISTRY_CONTROL = 2; +/// \Registry\Machine\Software\Microsoft\Windows NT\CurrentVersion +pub const RTL_REGISTRY_WINDOWS_NT = 3; +/// \Registry\Machine\Hardware\DeviceMap +pub const RTL_REGISTRY_DEVICEMAP = 4; +/// \Registry\User\CurrentUser +pub const RTL_REGISTRY_USER = 5; +pub const RTL_REGISTRY_MAXIMUM = 6; + +/// Low order bits are registry handle +pub const RTL_REGISTRY_HANDLE = 0x40000000; +/// Indicates the key node is optional +pub const RTL_REGISTRY_OPTIONAL = 0x80000000; + +/// Name is a subkey and remainder of table or until next subkey are value +/// names for that subkey to look at. +pub const RTL_QUERY_REGISTRY_SUBKEY = 0x00000001; + +/// Reset current key to original key for this and all following table entries. +pub const RTL_QUERY_REGISTRY_TOPKEY = 0x00000002; + +/// Fail if no match found for this table entry. +pub const RTL_QUERY_REGISTRY_REQUIRED = 0x00000004; + +/// Used to mark a table entry that has no value name, just wants a call out, not +/// an enumeration of all values. +pub const RTL_QUERY_REGISTRY_NOVALUE = 0x00000008; + +/// Used to suppress the expansion of REG_MULTI_SZ into multiple callouts or +/// to prevent the expansion of environment variable values in REG_EXPAND_SZ. +pub const RTL_QUERY_REGISTRY_NOEXPAND = 0x00000010; + +/// QueryRoutine field ignored. EntryContext field points to location to store value. +/// For null terminated strings, EntryContext points to UNICODE_STRING structure that +/// that describes maximum size of buffer. If .Buffer field is NULL then a buffer is +/// allocated. +pub const RTL_QUERY_REGISTRY_DIRECT = 0x00000020; + +/// Used to delete value keys after they are queried. +pub const RTL_QUERY_REGISTRY_DELETE = 0x00000040; + +/// Use this flag with the RTL_QUERY_REGISTRY_DIRECT flag to verify that the REG_XXX type +/// of the stored registry value matches the type expected by the caller. +/// If the types do not match, the call fails. +pub const RTL_QUERY_REGISTRY_TYPECHECK = 0x00000100; + +pub const REG = struct { + /// No value type + pub const NONE: ULONG = 0; + /// Unicode nul terminated string + pub const SZ: ULONG = 1; + /// Unicode nul terminated string (with environment variable references) + pub const EXPAND_SZ: ULONG = 2; + /// Free form binary + pub const BINARY: ULONG = 3; + /// 32-bit number + pub const DWORD: ULONG = 4; + /// 32-bit number (same as REG_DWORD) + pub const DWORD_LITTLE_ENDIAN: ULONG = 4; + /// 32-bit number + pub const DWORD_BIG_ENDIAN: ULONG = 5; + /// Symbolic Link (unicode) + pub const LINK: ULONG = 6; + /// Multiple Unicode strings + pub const MULTI_SZ: ULONG = 7; + /// Resource list in the resource map + pub const RESOURCE_LIST: ULONG = 8; + /// Resource list in the hardware description + pub const FULL_RESOURCE_DESCRIPTOR: ULONG = 9; + pub const RESOURCE_REQUIREMENTS_LIST: ULONG = 10; + /// 64-bit number + pub const QWORD: ULONG = 11; + /// 64-bit number (same as REG_QWORD) + pub const QWORD_LITTLE_ENDIAN: ULONG = 11; +}; + pub const FILE_NOTIFY_INFORMATION = extern struct { NextEntryOffset: DWORD, Action: DWORD, @@ -3714,3 +3852,305 @@ pub const CTRL_LOGOFF_EVENT: DWORD = 5; pub const CTRL_SHUTDOWN_EVENT: DWORD = 6; pub const HANDLER_ROUTINE = std.meta.FnPtr(fn (dwCtrlType: DWORD) callconv(WINAPI) BOOL); + +/// Processor feature enumeration. +pub const PF = enum(DWORD) { + /// On a Pentium, a floating-point precision error can occur in rare circumstances. + FLOATING_POINT_PRECISION_ERRATA = 0, + + /// Floating-point operations are emulated using software emulator. + /// This function returns a nonzero value if floating-point operations are emulated; otherwise, it returns zero. + FLOATING_POINT_EMULATED = 1, + + /// The atomic compare and exchange operation (cmpxchg) is available. + COMPARE_EXCHANGE_DOUBLE = 2, + + /// The MMX instruction set is available. + MMX_INSTRUCTIONS_AVAILABLE = 3, + + PPC_MOVEMEM_64BIT_OK = 4, + ALPHA_BYTE_INSTRUCTIONS = 5, + + /// The SSE instruction set is available. + XMMI_INSTRUCTIONS_AVAILABLE = 6, + + /// The 3D-Now instruction is available. + @"3DNOW_INSTRUCTIONS_AVAILABLE" = 7, + + /// The RDTSC instruction is available. + RDTSC_INSTRUCTION_AVAILABLE = 8, + + /// The processor is PAE-enabled. + PAE_ENABLED = 9, + + /// The SSE2 instruction set is available. + XMMI64_INSTRUCTIONS_AVAILABLE = 10, + + SSE_DAZ_MODE_AVAILABLE = 11, + + /// Data execution prevention is enabled. + NX_ENABLED = 12, + + /// The SSE3 instruction set is available. + SSE3_INSTRUCTIONS_AVAILABLE = 13, + + /// The atomic compare and exchange 128-bit operation (cmpxchg16b) is available. + COMPARE_EXCHANGE128 = 14, + + /// The atomic compare 64 and exchange 128-bit operation (cmp8xchg16) is available. + COMPARE64_EXCHANGE128 = 15, + + /// The processor channels are enabled. + CHANNELS_ENABLED = 16, + + /// The processor implements the XSAVI and XRSTOR instructions. + XSAVE_ENABLED = 17, + + /// The VFP/Neon: 32 x 64bit register bank is present. + /// This flag has the same meaning as PF_ARM_VFP_EXTENDED_REGISTERS. + ARM_VFP_32_REGISTERS_AVAILABLE = 18, + + /// This ARM processor implements the ARM v8 NEON instruction set. + ARM_NEON_INSTRUCTIONS_AVAILABLE = 19, + + /// Second Level Address Translation is supported by the hardware. + SECOND_LEVEL_ADDRESS_TRANSLATION = 20, + + /// Virtualization is enabled in the firmware and made available by the operating system. + VIRT_FIRMWARE_ENABLED = 21, + + /// RDFSBASE, RDGSBASE, WRFSBASE, and WRGSBASE instructions are available. + RDWRFSGBASE_AVAILABLE = 22, + + /// _fastfail() is available. + FASTFAIL_AVAILABLE = 23, + + /// The divide instruction_available. + ARM_DIVIDE_INSTRUCTION_AVAILABLE = 24, + + /// The 64-bit load/store atomic instructions are available. + ARM_64BIT_LOADSTORE_ATOMIC = 25, + + /// The external cache is available. + ARM_EXTERNAL_CACHE_AVAILABLE = 26, + + /// The floating-point multiply-accumulate instruction is available. + ARM_FMAC_INSTRUCTIONS_AVAILABLE = 27, + + RDRAND_INSTRUCTION_AVAILABLE = 28, + + /// This ARM processor implements the ARM v8 instructions set. + ARM_V8_INSTRUCTIONS_AVAILABLE = 29, + + /// This ARM processor implements the ARM v8 extra cryptographic instructions (i.e., AES, SHA1 and SHA2). + ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE = 30, + + /// This ARM processor implements the ARM v8 extra CRC32 instructions. + ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE = 31, + + RDTSCP_INSTRUCTION_AVAILABLE = 32, + RDPID_INSTRUCTION_AVAILABLE = 33, + + /// This ARM processor implements the ARM v8.1 atomic instructions (e.g., CAS, SWP). + ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE = 34, + + MONITORX_INSTRUCTION_AVAILABLE = 35, + + /// The SSSE3 instruction set is available. + SSSE3_INSTRUCTIONS_AVAILABLE = 36, + + /// The SSE4_1 instruction set is available. + SSE4_1_INSTRUCTIONS_AVAILABLE = 37, + + /// The SSE4_2 instruction set is available. + SSE4_2_INSTRUCTIONS_AVAILABLE = 38, + + /// The AVX instruction set is available. + AVX_INSTRUCTIONS_AVAILABLE = 39, + + /// The AVX2 instruction set is available. + AVX2_INSTRUCTIONS_AVAILABLE = 40, + + /// The AVX512F instruction set is available. + AVX512F_INSTRUCTIONS_AVAILABLE = 41, + + ERMS_AVAILABLE = 42, + + /// This ARM processor implements the ARM v8.2 Dot Product (DP) instructions. + ARM_V82_DP_INSTRUCTIONS_AVAILABLE = 43, + + /// This ARM processor implements the ARM v8.3 JavaScript conversion (JSCVT) instructions. + ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE = 44, +}; + +pub const MAX_WOW64_SHARED_ENTRIES = 16; +pub const PROCESSOR_FEATURE_MAX = 64; +pub const MAXIMUM_XSTATE_FEATURES = 64; + +pub const KSYSTEM_TIME = extern struct { + LowPart: ULONG, + High1Time: LONG, + High2Time: LONG, +}; + +pub const NT_PRODUCT_TYPE = enum(INT) { + NtProductWinNt = 1, + NtProductLanManNt, + NtProductServer, +}; + +pub const ALTERNATIVE_ARCHITECTURE_TYPE = enum(INT) { + StandardDesign, + NEC98x86, + EndAlternatives, +}; + +pub const XSTATE_FEATURE = extern struct { + Offset: ULONG, + Size: ULONG, +}; + +pub const XSTATE_CONFIGURATION = extern struct { + EnabledFeatures: ULONG64, + Size: ULONG, + OptimizedSave: ULONG, + Features: [MAXIMUM_XSTATE_FEATURES]XSTATE_FEATURE, +}; + +/// Shared Kernel User Data +pub const KUSER_SHARED_DATA = extern struct { + TickCountLowDeprecated: ULONG, + TickCountMultiplier: ULONG, + InterruptTime: KSYSTEM_TIME, + SystemTime: KSYSTEM_TIME, + TimeZoneBias: KSYSTEM_TIME, + ImageNumberLow: USHORT, + ImageNumberHigh: USHORT, + NtSystemRoot: [260]WCHAR, + MaxStackTraceDepth: ULONG, + CryptoExponent: ULONG, + TimeZoneId: ULONG, + LargePageMinimum: ULONG, + AitSamplingValue: ULONG, + AppCompatFlag: ULONG, + RNGSeedVersion: ULONGLONG, + GlobalValidationRunlevel: ULONG, + TimeZoneBiasStamp: LONG, + NtBuildNumber: ULONG, + NtProductType: NT_PRODUCT_TYPE, + ProductTypeIsValid: BOOLEAN, + Reserved0: [1]BOOLEAN, + NativeProcessorArchitecture: USHORT, + NtMajorVersion: ULONG, + NtMinorVersion: ULONG, + ProcessorFeatures: [PROCESSOR_FEATURE_MAX]BOOLEAN, + Reserved1: ULONG, + Reserved3: ULONG, + TimeSlip: ULONG, + AlternativeArchitecture: ALTERNATIVE_ARCHITECTURE_TYPE, + BootId: ULONG, + SystemExpirationDate: LARGE_INTEGER, + SuiteMaskY: ULONG, + KdDebuggerEnabled: BOOLEAN, + DummyUnion1: extern union { + MitigationPolicies: UCHAR, + Alt: packed struct { + NXSupportPolicy: u2, + SEHValidationPolicy: u2, + CurDirDevicesSkippedForDlls: u2, + Reserved: u2, + }, + }, + CyclesPerYield: USHORT, + ActiveConsoleId: ULONG, + DismountCount: ULONG, + ComPlusPackage: ULONG, + LastSystemRITEventTickCount: ULONG, + NumberOfPhysicalPages: ULONG, + SafeBootMode: BOOLEAN, + DummyUnion2: extern union { + VirtualizationFlags: UCHAR, + Alt: packed struct { + ArchStartedInEl2: u1, + QcSlIsSupported: u1, + SpareBits: u6, + }, + }, + Reserved12: [2]UCHAR, + DummyUnion3: extern union { + SharedDataFlags: ULONG, + Alt: packed struct { + DbgErrorPortPresent: u1, + DbgElevationEnabled: u1, + DbgVirtEnabled: u1, + DbgInstallerDetectEnabled: u1, + DbgLkgEnabled: u1, + DbgDynProcessorEnabled: u1, + DbgConsoleBrokerEnabled: u1, + DbgSecureBootEnabled: u1, + DbgMultiSessionSku: u1, + DbgMultiUsersInSessionSku: u1, + DbgStateSeparationEnabled: u1, + SpareBits: u21, + }, + }, + DataFlagsPad: [1]ULONG, + TestRetInstruction: ULONGLONG, + QpcFrequency: LONGLONG, + SystemCall: ULONG, + Reserved2: ULONG, + SystemCallPad: [2]ULONGLONG, + DummyUnion4: extern union { + TickCount: KSYSTEM_TIME, + TickCountQuad: ULONG64, + Alt: extern struct { + ReservedTickCountOverlay: [3]ULONG, + TickCountPad: [1]ULONG, + }, + }, + Cookie: ULONG, + CookiePad: [1]ULONG, + ConsoleSessionForegroundProcessId: LONGLONG, + TimeUpdateLock: ULONGLONG, + BaselineSystemTimeQpc: ULONGLONG, + BaselineInterruptTimeQpc: ULONGLONG, + QpcSystemTimeIncrement: ULONGLONG, + QpcInterruptTimeIncrement: ULONGLONG, + QpcSystemTimeIncrementShift: UCHAR, + QpcInterruptTimeIncrementShift: UCHAR, + UnparkedProcessorCount: USHORT, + EnclaveFeatureMask: [4]ULONG, + TelemetryCoverageRound: ULONG, + UserModeGlobalLogger: [16]USHORT, + ImageFileExecutionOptions: ULONG, + LangGenerationCount: ULONG, + Reserved4: ULONGLONG, + InterruptTimeBias: ULONGLONG, + QpcBias: ULONGLONG, + ActiveProcessorCount: ULONG, + ActiveGroupCount: UCHAR, + Reserved9: UCHAR, + DummyUnion5: extern union { + QpcData: USHORT, + Alt: extern struct { + QpcBypassEnabled: UCHAR, + QpcShift: UCHAR, + }, + }, + TimeZoneBiasEffectiveStart: LARGE_INTEGER, + TimeZoneBiasEffectiveEnd: LARGE_INTEGER, + XState: XSTATE_CONFIGURATION, + FeatureConfigurationChangeStamp: KSYSTEM_TIME, + Spare: ULONG, + UserPointerAuthMask: ULONG64, +}; + +/// Read-only user-mode address for the shared data. +/// https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/ntexapi_x/kuser_shared_data/index.htm +/// https://msrc-blog.microsoft.com/2022/04/05/randomizing-the-kuser_shared_data-structure-on-windows/ +pub const SharedUserData: *const KUSER_SHARED_DATA = @intToPtr(*const KUSER_SHARED_DATA, 0x7FFE0000); + +pub fn IsProcessorFeaturePresent(feature: PF) bool { + if (@enumToInt(feature) >= PROCESSOR_FEATURE_MAX) return false; + return SharedUserData.ProcessorFeatures[@enumToInt(feature)] == 1; +} diff --git a/lib/std/os/windows/kernel32.zig b/lib/std/os/windows/kernel32.zig index 8d146def7f..e0c7b96f84 100644 --- a/lib/std/os/windows/kernel32.zig +++ b/lib/std/os/windows/kernel32.zig @@ -10,6 +10,7 @@ const DWORD = windows.DWORD; const FILE_INFO_BY_HANDLE_CLASS = windows.FILE_INFO_BY_HANDLE_CLASS; const HANDLE = windows.HANDLE; const HMODULE = windows.HMODULE; +const HKEY = windows.HKEY; const HRESULT = windows.HRESULT; const LARGE_INTEGER = windows.LARGE_INTEGER; const LPCWSTR = windows.LPCWSTR; @@ -57,6 +58,8 @@ const UCHAR = windows.UCHAR; const FARPROC = windows.FARPROC; const INIT_ONCE_FN = windows.INIT_ONCE_FN; const PMEMORY_BASIC_INFORMATION = windows.PMEMORY_BASIC_INFORMATION; +const REGSAM = windows.REGSAM; +const LSTATUS = windows.LSTATUS; pub extern "kernel32" fn AddVectoredExceptionHandler(First: c_ulong, Handler: ?VECTORED_EXCEPTION_HANDLER) callconv(WINAPI) ?*anyopaque; pub extern "kernel32" fn RemoveVectoredExceptionHandler(Handle: HANDLE) callconv(WINAPI) c_ulong; @@ -231,6 +234,7 @@ pub extern "kernel32" fn GetQueuedCompletionStatusEx( pub extern "kernel32" fn GetSystemInfo(lpSystemInfo: *SYSTEM_INFO) callconv(WINAPI) void; pub extern "kernel32" fn GetSystemTimeAsFileTime(*FILETIME) callconv(WINAPI) void; +pub extern "kernel32" fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) BOOL; pub extern "kernel32" fn HeapCreate(flOptions: DWORD, dwInitialSize: SIZE_T, dwMaximumSize: SIZE_T) callconv(WINAPI) ?HANDLE; pub extern "kernel32" fn HeapDestroy(hHeap: HANDLE) callconv(WINAPI) BOOL; @@ -411,3 +415,11 @@ pub extern "kernel32" fn SleepConditionVariableSRW( pub extern "kernel32" fn TryAcquireSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) BOOLEAN; pub extern "kernel32" fn AcquireSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) void; pub extern "kernel32" fn ReleaseSRWLockExclusive(s: *SRWLOCK) callconv(WINAPI) void; + +pub extern "kernel32" fn RegOpenKeyExW( + hkey: HKEY, + lpSubKey: LPCWSTR, + ulOptions: DWORD, + samDesired: REGSAM, + phkResult: *HKEY, +) callconv(WINAPI) LSTATUS; diff --git a/lib/std/os/windows/ntdll.zig b/lib/std/os/windows/ntdll.zig index bf9dc9bd2f..b006a785da 100644 --- a/lib/std/os/windows/ntdll.zig +++ b/lib/std/os/windows/ntdll.zig @@ -22,6 +22,8 @@ const RTL_OSVERSIONINFOW = windows.RTL_OSVERSIONINFOW; const FILE_BASIC_INFORMATION = windows.FILE_BASIC_INFORMATION; const SIZE_T = windows.SIZE_T; const CURDIR = windows.CURDIR; +const PCWSTR = windows.PCWSTR; +const RTL_QUERY_REGISTRY_TABLE = windows.RTL_QUERY_REGISTRY_TABLE; pub const THREADINFOCLASS = enum(c_int) { ThreadBasicInformation, @@ -253,3 +255,17 @@ pub extern "ntdll" fn NtUnlockFile( Length: *const LARGE_INTEGER, Key: ?*ULONG, ) callconv(WINAPI) NTSTATUS; + +pub extern "ntdll" fn NtOpenKey( + KeyHandle: *HANDLE, + DesiredAccess: ACCESS_MASK, + ObjectAttributes: OBJECT_ATTRIBUTES, +) callconv(WINAPI) NTSTATUS; + +pub extern "ntdll" fn RtlQueryRegistryValues( + RelativeTo: ULONG, + Path: PCWSTR, + QueryTable: [*]RTL_QUERY_REGISTRY_TABLE, + Context: ?*anyopaque, + Environment: ?*anyopaque, +) callconv(WINAPI) NTSTATUS; diff --git a/lib/std/testing.zig b/lib/std/testing.zig index f1b01b0bd0..00a06dc20f 100644 --- a/lib/std/testing.zig +++ b/lib/std/testing.zig @@ -379,28 +379,13 @@ pub const TmpIterableDir = struct { } }; -fn getCwdOrWasiPreopen() std.fs.Dir { - if (builtin.os.tag == .wasi and !builtin.link_libc) { - var preopens = std.fs.wasi.PreopenList.init(allocator); - defer preopens.deinit(); - preopens.populate(null) catch - @panic("unable to make tmp dir for testing: unable to populate preopens"); - const preopen = preopens.find(std.fs.wasi.PreopenType{ .Dir = "." }) orelse - @panic("unable to make tmp dir for testing: didn't find '.' in the preopens"); - - return std.fs.Dir{ .fd = preopen.fd }; - } else { - return std.fs.cwd(); - } -} - pub fn tmpDir(opts: std.fs.Dir.OpenDirOptions) TmpDir { var random_bytes: [TmpDir.random_bytes_count]u8 = undefined; std.crypto.random.bytes(&random_bytes); var sub_path: [TmpDir.sub_path_len]u8 = undefined; _ = std.fs.base64_encoder.encode(&sub_path, &random_bytes); - var cwd = getCwdOrWasiPreopen(); + var cwd = std.fs.cwd(); var cache_dir = cwd.makeOpenPath("zig-cache", .{}) catch @panic("unable to make tmp dir for testing: unable to make and open zig-cache dir"); defer cache_dir.close(); @@ -422,7 +407,7 @@ pub fn tmpIterableDir(opts: std.fs.Dir.OpenDirOptions) TmpIterableDir { var sub_path: [TmpIterableDir.sub_path_len]u8 = undefined; _ = std.fs.base64_encoder.encode(&sub_path, &random_bytes); - var cwd = getCwdOrWasiPreopen(); + var cwd = std.fs.cwd(); var cache_dir = cwd.makeOpenPath("zig-cache", .{}) catch @panic("unable to make tmp dir for testing: unable to make and open zig-cache dir"); defer cache_dir.close(); diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig index 0226ec2e1d..77ed67b3d2 100644 --- a/lib/std/zig/parse.zig +++ b/lib/std/zig/parse.zig @@ -131,11 +131,23 @@ const Parser = struct { return @intCast(Node.Index, i); } - fn reserveNode(p: *Parser) !usize { + fn reserveNode(p: *Parser, tag: Ast.Node.Tag) !usize { try p.nodes.resize(p.gpa, p.nodes.len + 1); + p.nodes.items(.tag)[p.nodes.len - 1] = tag; return p.nodes.len - 1; } + fn unreserveNode(p: *Parser, node_index: usize) void { + if (p.nodes.len == node_index) { + p.nodes.resize(p.gpa, p.nodes.len - 1) catch unreachable; + } else { + // There is zombie node left in the tree, let's make it as inoffensive as possible + // (sadly there's no no-op node) + p.nodes.items(.tag)[node_index] = .unreachable_literal; + p.nodes.items(.main_token)[node_index] = p.tok_i; + } + } + fn addExtra(p: *Parser, extra: anytype) Allocator.Error!Node.Index { const fields = std.meta.fields(@TypeOf(extra)); try p.extra_data.ensureUnusedCapacity(p.gpa, fields.len); @@ -637,13 +649,15 @@ const Parser = struct { return fn_proto; }, .l_brace => { - const fn_decl_index = try p.reserveNode(); - const body_block = try p.parseBlock(); - assert(body_block != 0); if (is_extern) { try p.warnMsg(.{ .tag = .extern_fn_body, .token = extern_export_inline_token }); return null_node; } + const fn_decl_index = try p.reserveNode(.fn_decl); + errdefer p.unreserveNode(fn_decl_index); + + const body_block = try p.parseBlock(); + assert(body_block != 0); return p.setNode(fn_decl_index, .{ .tag = .fn_decl, .main_token = p.nodes.items(.main_token)[fn_proto], @@ -724,7 +738,8 @@ const Parser = struct { const fn_token = p.eatToken(.keyword_fn) orelse return null_node; // We want the fn proto node to be before its children in the array. - const fn_proto_index = try p.reserveNode(); + const fn_proto_index = try p.reserveNode(.fn_proto); + errdefer p.unreserveNode(fn_proto_index); _ = p.eatToken(.identifier); const params = try p.parseParamDeclList(); diff --git a/lib/std/zig/system/NativeTargetInfo.zig b/lib/std/zig/system/NativeTargetInfo.zig index 09b863cdf7..0232797387 100644 --- a/lib/std/zig/system/NativeTargetInfo.zig +++ b/lib/std/zig/system/NativeTargetInfo.zig @@ -978,6 +978,7 @@ fn detectNativeCpuAndFeatures(cpu_arch: Target.Cpu.Arch, os: Target.Os, cross_ta switch (builtin.os.tag) { .linux => return linux.detectNativeCpuAndFeatures(), .macos => return darwin.macos.detectNativeCpuAndFeatures(), + .windows => return windows.detectNativeCpuAndFeatures(), else => {}, } diff --git a/lib/std/zig/system/arm.zig b/lib/std/zig/system/arm.zig new file mode 100644 index 0000000000..b6f06206bc --- /dev/null +++ b/lib/std/zig/system/arm.zig @@ -0,0 +1,134 @@ +const std = @import("std"); + +pub const CoreInfo = struct { + architecture: u8 = 0, + implementer: u8 = 0, + variant: u8 = 0, + part: u16 = 0, +}; + +pub const cpu_models = struct { + // Shorthands to simplify the tables below. + const A32 = std.Target.arm.cpu; + const A64 = std.Target.aarch64.cpu; + + const E = struct { + part: u16, + variant: ?u8 = null, // null if matches any variant + m32: ?*const std.Target.Cpu.Model = null, + m64: ?*const std.Target.Cpu.Model = null, + }; + + // implementer = 0x41 + const ARM = [_]E{ + E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null }, + E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null }, + E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null }, + E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null }, + E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null }, + E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null }, + E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null }, + E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null }, + E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null }, + E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null }, + E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null }, + E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null }, + E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null }, + E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null }, + E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null }, + E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null }, + E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null }, + E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null }, + E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null }, + E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null }, + E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null }, + E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null }, + E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null }, + E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 }, + E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 }, + E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 }, + E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 }, + E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 }, + E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 }, + E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 }, + E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 }, + E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 }, + E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 }, + E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null }, + E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null }, + E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null }, + E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 }, + E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c }, + // This is a guess based on https://www.notebookcheck.net/Qualcomm-Snapdragon-8cx-Gen-3-Processor-Benchmarks-and-Specs.652916.0.html + E{ .part = 0xd4c, .m32 = &A32.cortex_x1c, .m64 = &A64.cortex_x1c }, + E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 }, + E{ .part = 0xd02, .m64 = &A64.cortex_a34 }, + E{ .part = 0xd06, .m64 = &A64.cortex_a65 }, + E{ .part = 0xd43, .m64 = &A64.cortex_a65ae }, + }; + // implementer = 0x42 + const Broadcom = [_]E{ + E{ .part = 0x516, .m64 = &A64.thunderx2t99 }, + }; + // implementer = 0x43 + const Cavium = [_]E{ + E{ .part = 0x0a0, .m64 = &A64.thunderx }, + E{ .part = 0x0a2, .m64 = &A64.thunderxt81 }, + E{ .part = 0x0a3, .m64 = &A64.thunderxt83 }, + E{ .part = 0x0a1, .m64 = &A64.thunderxt88 }, + E{ .part = 0x0af, .m64 = &A64.thunderx2t99 }, + }; + // implementer = 0x46 + const Fujitsu = [_]E{ + E{ .part = 0x001, .m64 = &A64.a64fx }, + }; + // implementer = 0x48 + const HiSilicon = [_]E{ + E{ .part = 0xd01, .m64 = &A64.tsv110 }, + }; + // implementer = 0x4e + const Nvidia = [_]E{ + E{ .part = 0x004, .m64 = &A64.carmel }, + }; + // implementer = 0x50 + const Ampere = [_]E{ + E{ .part = 0x000, .variant = 3, .m64 = &A64.emag }, + E{ .part = 0x000, .m64 = &A64.xgene1 }, + }; + // implementer = 0x51 + const Qualcomm = [_]E{ + E{ .part = 0x06f, .m32 = &A32.krait }, + E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo }, + E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, + E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, + E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, + E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, + E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, + E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, + E{ .part = 0xc00, .m64 = &A64.falkor }, + E{ .part = 0xc01, .m64 = &A64.saphira }, + }; + + pub fn isKnown(core: CoreInfo, is_64bit: bool) ?*const std.Target.Cpu.Model { + const models = switch (core.implementer) { + 0x41 => &ARM, + 0x42 => &Broadcom, + 0x43 => &Cavium, + 0x46 => &Fujitsu, + 0x48 => &HiSilicon, + 0x50 => &Ampere, + 0x51 => &Qualcomm, + else => return null, + }; + + for (models) |model| { + if (model.part == core.part and + (model.variant == null or model.variant.? == core.variant)) + return if (is_64bit) model.m64 else model.m32; + } + + return null; + } +}; diff --git a/lib/std/zig/system/linux.zig b/lib/std/zig/system/linux.zig index e92aacb6ef..63a49c6472 100644 --- a/lib/std/zig/system/linux.zig +++ b/lib/std/zig/system/linux.zig @@ -159,129 +159,7 @@ const ArmCpuinfoImpl = struct { is_really_v6: bool = false, }; - const cpu_models = struct { - // Shorthands to simplify the tables below. - const A32 = Target.arm.cpu; - const A64 = Target.aarch64.cpu; - - const E = struct { - part: u16, - variant: ?u8 = null, // null if matches any variant - m32: ?*const Target.Cpu.Model = null, - m64: ?*const Target.Cpu.Model = null, - }; - - // implementer = 0x41 - const ARM = [_]E{ - E{ .part = 0x926, .m32 = &A32.arm926ej_s, .m64 = null }, - E{ .part = 0xb02, .m32 = &A32.mpcore, .m64 = null }, - E{ .part = 0xb36, .m32 = &A32.arm1136j_s, .m64 = null }, - E{ .part = 0xb56, .m32 = &A32.arm1156t2_s, .m64 = null }, - E{ .part = 0xb76, .m32 = &A32.arm1176jz_s, .m64 = null }, - E{ .part = 0xc05, .m32 = &A32.cortex_a5, .m64 = null }, - E{ .part = 0xc07, .m32 = &A32.cortex_a7, .m64 = null }, - E{ .part = 0xc08, .m32 = &A32.cortex_a8, .m64 = null }, - E{ .part = 0xc09, .m32 = &A32.cortex_a9, .m64 = null }, - E{ .part = 0xc0d, .m32 = &A32.cortex_a17, .m64 = null }, - E{ .part = 0xc0f, .m32 = &A32.cortex_a15, .m64 = null }, - E{ .part = 0xc0e, .m32 = &A32.cortex_a17, .m64 = null }, - E{ .part = 0xc14, .m32 = &A32.cortex_r4, .m64 = null }, - E{ .part = 0xc15, .m32 = &A32.cortex_r5, .m64 = null }, - E{ .part = 0xc17, .m32 = &A32.cortex_r7, .m64 = null }, - E{ .part = 0xc18, .m32 = &A32.cortex_r8, .m64 = null }, - E{ .part = 0xc20, .m32 = &A32.cortex_m0, .m64 = null }, - E{ .part = 0xc21, .m32 = &A32.cortex_m1, .m64 = null }, - E{ .part = 0xc23, .m32 = &A32.cortex_m3, .m64 = null }, - E{ .part = 0xc24, .m32 = &A32.cortex_m4, .m64 = null }, - E{ .part = 0xc27, .m32 = &A32.cortex_m7, .m64 = null }, - E{ .part = 0xc60, .m32 = &A32.cortex_m0plus, .m64 = null }, - E{ .part = 0xd01, .m32 = &A32.cortex_a32, .m64 = null }, - E{ .part = 0xd03, .m32 = &A32.cortex_a53, .m64 = &A64.cortex_a53 }, - E{ .part = 0xd04, .m32 = &A32.cortex_a35, .m64 = &A64.cortex_a35 }, - E{ .part = 0xd05, .m32 = &A32.cortex_a55, .m64 = &A64.cortex_a55 }, - E{ .part = 0xd07, .m32 = &A32.cortex_a57, .m64 = &A64.cortex_a57 }, - E{ .part = 0xd08, .m32 = &A32.cortex_a72, .m64 = &A64.cortex_a72 }, - E{ .part = 0xd09, .m32 = &A32.cortex_a73, .m64 = &A64.cortex_a73 }, - E{ .part = 0xd0a, .m32 = &A32.cortex_a75, .m64 = &A64.cortex_a75 }, - E{ .part = 0xd0b, .m32 = &A32.cortex_a76, .m64 = &A64.cortex_a76 }, - E{ .part = 0xd0c, .m32 = &A32.neoverse_n1, .m64 = &A64.neoverse_n1 }, - E{ .part = 0xd0d, .m32 = &A32.cortex_a77, .m64 = &A64.cortex_a77 }, - E{ .part = 0xd13, .m32 = &A32.cortex_r52, .m64 = null }, - E{ .part = 0xd20, .m32 = &A32.cortex_m23, .m64 = null }, - E{ .part = 0xd21, .m32 = &A32.cortex_m33, .m64 = null }, - E{ .part = 0xd41, .m32 = &A32.cortex_a78, .m64 = &A64.cortex_a78 }, - E{ .part = 0xd4b, .m32 = &A32.cortex_a78c, .m64 = &A64.cortex_a78c }, - E{ .part = 0xd44, .m32 = &A32.cortex_x1, .m64 = &A64.cortex_x1 }, - E{ .part = 0xd02, .m64 = &A64.cortex_a34 }, - E{ .part = 0xd06, .m64 = &A64.cortex_a65 }, - E{ .part = 0xd43, .m64 = &A64.cortex_a65ae }, - }; - // implementer = 0x42 - const Broadcom = [_]E{ - E{ .part = 0x516, .m64 = &A64.thunderx2t99 }, - }; - // implementer = 0x43 - const Cavium = [_]E{ - E{ .part = 0x0a0, .m64 = &A64.thunderx }, - E{ .part = 0x0a2, .m64 = &A64.thunderxt81 }, - E{ .part = 0x0a3, .m64 = &A64.thunderxt83 }, - E{ .part = 0x0a1, .m64 = &A64.thunderxt88 }, - E{ .part = 0x0af, .m64 = &A64.thunderx2t99 }, - }; - // implementer = 0x46 - const Fujitsu = [_]E{ - E{ .part = 0x001, .m64 = &A64.a64fx }, - }; - // implementer = 0x48 - const HiSilicon = [_]E{ - E{ .part = 0xd01, .m64 = &A64.tsv110 }, - }; - // implementer = 0x4e - const Nvidia = [_]E{ - E{ .part = 0x004, .m64 = &A64.carmel }, - }; - // implementer = 0x50 - const Ampere = [_]E{ - E{ .part = 0x000, .variant = 3, .m64 = &A64.emag }, - E{ .part = 0x000, .m64 = &A64.xgene1 }, - }; - // implementer = 0x51 - const Qualcomm = [_]E{ - E{ .part = 0x06f, .m32 = &A32.krait }, - E{ .part = 0x201, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x205, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x211, .m64 = &A64.kryo, .m32 = &A64.kryo }, - E{ .part = 0x800, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, - E{ .part = 0x801, .m64 = &A64.cortex_a73, .m32 = &A64.cortex_a73 }, - E{ .part = 0x802, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, - E{ .part = 0x803, .m64 = &A64.cortex_a75, .m32 = &A64.cortex_a75 }, - E{ .part = 0x804, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, - E{ .part = 0x805, .m64 = &A64.cortex_a76, .m32 = &A64.cortex_a76 }, - E{ .part = 0xc00, .m64 = &A64.falkor }, - E{ .part = 0xc01, .m64 = &A64.saphira }, - }; - - fn isKnown(core: CoreInfo, is_64bit: bool) ?*const Target.Cpu.Model { - const models = switch (core.implementer) { - 0x41 => &ARM, - 0x42 => &Broadcom, - 0x43 => &Cavium, - 0x46 => &Fujitsu, - 0x48 => &HiSilicon, - 0x50 => &Ampere, - 0x51 => &Qualcomm, - else => return null, - }; - - for (models) |model| { - if (model.part == core.part and - (model.variant == null or model.variant.? == core.variant)) - return if (is_64bit) model.m64 else model.m32; - } - - return null; - } - }; + const cpu_models = @import("arm.zig").cpu_models; fn addOne(self: *ArmCpuinfoImpl) void { if (self.have_fields == 4 and self.core_no < self.cores.len) { @@ -346,7 +224,12 @@ const ArmCpuinfoImpl = struct { var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined; for (self.cores[0..self.core_no]) |core, i| { - known_models[i] = cpu_models.isKnown(core, is_64bit); + known_models[i] = cpu_models.isKnown(.{ + .architecture = core.architecture, + .implementer = core.implementer, + .variant = core.variant, + .part = core.part, + }, is_64bit); } // XXX We pick the first core on big.LITTLE systems, hopefully the diff --git a/lib/std/zig/system/windows.zig b/lib/std/zig/system/windows.zig index 595dac6278..f11905873d 100644 --- a/lib/std/zig/system/windows.zig +++ b/lib/std/zig/system/windows.zig @@ -1,6 +1,12 @@ const std = @import("std"); +const builtin = @import("builtin"); +const mem = std.mem; +const Target = std.Target; pub const WindowsVersion = std.Target.Os.WindowsVersion; +pub const PF = std.os.windows.PF; +pub const REG = std.os.windows.REG; +pub const IsProcessorFeaturePresent = std.os.windows.IsProcessorFeaturePresent; /// Returns the highest known WindowsVersion deduced from reported runtime information. /// Discards information about in-between versions we don't differentiate. @@ -38,3 +44,318 @@ pub fn detectRuntimeVersion() WindowsVersion { return @intToEnum(WindowsVersion, version); } + +// Technically, a registry value can be as long as 1MB. However, MS recommends storing +// values larger than 2048 bytes in a file rather than directly in the registry, and since we +// are only accessing a system hive \Registry\Machine, we stick to MS guidelines. +// https://learn.microsoft.com/en-us/windows/win32/sysinfo/registry-element-size-limits +const max_value_len = 2048; + +const RegistryPair = struct { + key: []const u8, + value: std.os.windows.ULONG, +}; + +fn getCpuInfoFromRegistry( + core: usize, + comptime pairs_num: comptime_int, + comptime pairs: [pairs_num]RegistryPair, + out_buf: *[pairs_num][max_value_len]u8, +) !void { + // Originally, I wanted to issue a single call with a more complex table structure such that we + // would sequentially visit each CPU#d subkey in the registry and pull the value of interest into + // a buffer, however, NT seems to be expecting a single buffer per each table meaning we would + // end up pulling only the last CPU core info, overwriting everything else. + // If anyone can come up with a solution to this, please do! + const table_size = 1 + pairs.len; + var table: [table_size + 1]std.os.windows.RTL_QUERY_REGISTRY_TABLE = undefined; + + const topkey = std.unicode.utf8ToUtf16LeStringLiteral("\\Registry\\Machine\\HARDWARE\\DESCRIPTION\\System\\CentralProcessor"); + + const max_cpu_buf = 4; + var next_cpu_buf: [max_cpu_buf]u8 = undefined; + const next_cpu = try std.fmt.bufPrint(&next_cpu_buf, "{d}", .{core}); + + var subkey: [max_cpu_buf + 1]u16 = undefined; + const subkey_len = try std.unicode.utf8ToUtf16Le(&subkey, next_cpu); + subkey[subkey_len] = 0; + + table[0] = .{ + .QueryRoutine = null, + .Flags = std.os.windows.RTL_QUERY_REGISTRY_SUBKEY | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, + .Name = subkey[0..subkey_len :0], + .EntryContext = null, + .DefaultType = REG.NONE, + .DefaultData = null, + .DefaultLength = 0, + }; + + inline for (pairs) |pair, i| { + const ctx: *anyopaque = blk: { + switch (pair.value) { + REG.SZ, + REG.EXPAND_SZ, + REG.MULTI_SZ, + => { + var buf: [max_value_len / 2]u16 = undefined; + var unicode = std.os.windows.UNICODE_STRING{ + .Length = 0, + .MaximumLength = max_value_len, + .Buffer = &buf, + }; + break :blk &unicode; + }, + + REG.DWORD, + REG.DWORD_BIG_ENDIAN, + => { + var buf: [4]u8 = undefined; + break :blk &buf; + }, + + REG.QWORD => { + var buf: [8]u8 = undefined; + break :blk &buf; + }, + + else => unreachable, + } + }; + const key_namee = std.unicode.utf8ToUtf16LeStringLiteral(pair.key); + + table[i + 1] = .{ + .QueryRoutine = null, + .Flags = std.os.windows.RTL_QUERY_REGISTRY_DIRECT | std.os.windows.RTL_QUERY_REGISTRY_REQUIRED, + .Name = @intToPtr([*:0]u16, @ptrToInt(key_namee)), + .EntryContext = ctx, + .DefaultType = REG.NONE, + .DefaultData = null, + .DefaultLength = 0, + }; + } + + // Table sentinel + table[table_size] = .{ + .QueryRoutine = null, + .Flags = 0, + .Name = null, + .EntryContext = null, + .DefaultType = 0, + .DefaultData = null, + .DefaultLength = 0, + }; + + const res = std.os.windows.ntdll.RtlQueryRegistryValues( + std.os.windows.RTL_REGISTRY_ABSOLUTE, + topkey, + &table, + null, + null, + ); + switch (res) { + .SUCCESS => { + inline for (pairs) |pair, i| switch (pair.value) { + REG.NONE => unreachable, + + REG.SZ, + REG.EXPAND_SZ, + REG.MULTI_SZ, + => { + const entry = @ptrCast(*align(1) const std.os.windows.UNICODE_STRING, table[i + 1].EntryContext); + const len = try std.unicode.utf16leToUtf8(out_buf[i][0..], entry.Buffer[0 .. entry.Length / 2]); + out_buf[i][len] = 0; + }, + + REG.DWORD, + REG.DWORD_BIG_ENDIAN, + REG.QWORD, + => { + const entry = @ptrCast([*]align(1) const u8, table[i + 1].EntryContext); + switch (pair.value) { + REG.DWORD, REG.DWORD_BIG_ENDIAN => { + mem.copy(u8, out_buf[i][0..4], entry[0..4]); + }, + REG.QWORD => { + mem.copy(u8, out_buf[i][0..8], entry[0..8]); + }, + else => unreachable, + } + }, + + else => unreachable, + }; + }, + else => return error.Unexpected, + } +} + +fn getCpuCount() usize { + return std.os.windows.peb().NumberOfProcessors; +} + +const ArmCpuInfoImpl = struct { + cores: [4]CoreInfo = undefined, + core_no: usize = 0, + have_fields: usize = 0, + + const CoreInfo = @import("arm.zig").CoreInfo; + const cpu_models = @import("arm.zig").cpu_models; + + const Data = struct { + cp_4000: []const u8, + identifier: []const u8, + }; + + fn parseDataHook(self: *ArmCpuInfoImpl, data: Data) !void { + const info = &self.cores[self.core_no]; + info.* = .{}; + + // CPU part + info.part = mem.readIntLittle(u16, data.cp_4000[0..2]) >> 4; + self.have_fields += 1; + + // CPU implementer + info.implementer = data.cp_4000[3]; + self.have_fields += 1; + + var tokens = mem.tokenize(u8, data.identifier, " "); + while (tokens.next()) |token| { + if (mem.eql(u8, "Family", token)) { + // CPU architecture + const family = tokens.next() orelse continue; + info.architecture = try std.fmt.parseInt(u8, family, 10); + self.have_fields += 1; + break; + } + } else return; + + self.addOne(); + } + + fn addOne(self: *ArmCpuInfoImpl) void { + if (self.have_fields == 3 and self.core_no < self.cores.len) { + if (self.core_no > 0) { + // Deduplicate the core info. + for (self.cores[0..self.core_no]) |it| { + if (std.meta.eql(it, self.cores[self.core_no])) + return; + } + } + self.core_no += 1; + } + } + + fn finalize(self: ArmCpuInfoImpl, arch: Target.Cpu.Arch) ?Target.Cpu { + if (self.core_no == 0) return null; + + const is_64bit = switch (arch) { + .aarch64, .aarch64_be, .aarch64_32 => true, + else => false, + }; + + var known_models: [self.cores.len]?*const Target.Cpu.Model = undefined; + for (self.cores[0..self.core_no]) |core, i| { + known_models[i] = cpu_models.isKnown(core, is_64bit); + } + + // XXX We pick the first core on big.LITTLE systems, hopefully the + // LITTLE one. + const model = known_models[0] orelse return null; + return Target.Cpu{ + .arch = arch, + .model = model, + .features = model.features, + }; + } +}; + +const ArmCpuInfoParser = CpuInfoParser(ArmCpuInfoImpl); + +fn CpuInfoParser(comptime impl: anytype) type { + return struct { + fn parse(arch: Target.Cpu.Arch) !?Target.Cpu { + var obj: impl = .{}; + var out_buf: [2][max_value_len]u8 = undefined; + + var i: usize = 0; + while (i < getCpuCount()) : (i += 1) { + try getCpuInfoFromRegistry(i, 2, .{ + .{ .key = "CP 4000", .value = REG.QWORD }, + .{ .key = "Identifier", .value = REG.SZ }, + }, &out_buf); + + const cp_4000 = out_buf[0][0..8]; + const identifier = mem.sliceTo(out_buf[1][0..], 0); + + try obj.parseDataHook(.{ + .cp_4000 = cp_4000, + .identifier = identifier, + }); + } + + return obj.finalize(arch); + } + }; +} + +fn genericCpu(comptime arch: Target.Cpu.Arch) Target.Cpu { + return .{ + .arch = arch, + .model = Target.Cpu.Model.generic(arch), + .features = Target.Cpu.Feature.Set.empty, + }; +} + +pub fn detectNativeCpuAndFeatures() ?Target.Cpu { + const current_arch = builtin.cpu.arch; + switch (current_arch) { + .aarch64, .aarch64_be, .aarch64_32 => { + var cpu = cpu: { + var maybe_cpu = ArmCpuInfoParser.parse(current_arch) catch break :cpu genericCpu(current_arch); + break :cpu maybe_cpu orelse genericCpu(current_arch); + }; + + const Feature = Target.aarch64.Feature; + + // Override any features that are either present or absent + if (IsProcessorFeaturePresent(PF.ARM_NEON_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.neon)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.neon)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.crc)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.crc)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.crypto)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.crypto)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.lse)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.lse)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.dotprod)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.dotprod)); + } + + if (IsProcessorFeaturePresent(PF.ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE)) { + cpu.features.addFeature(@enumToInt(Feature.jsconv)); + } else { + cpu.features.removeFeature(@enumToInt(Feature.jsconv)); + } + + return cpu; + }, + else => {}, + } +} |
