diff options
Diffstat (limited to 'SOURCES/futex2.patch')
-rw-r--r-- | SOURCES/futex2.patch | 536 |
1 files changed, 0 insertions, 536 deletions
diff --git a/SOURCES/futex2.patch b/SOURCES/futex2.patch deleted file mode 100644 index 0813182..0000000 --- a/SOURCES/futex2.patch +++ /dev/null @@ -1,536 +0,0 @@ -From 4901e29e3c0237c52eadd2c82deb9bd6e7add5ac Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com> -Date: Thu, 23 Sep 2021 14:11:05 -0300 -Subject: [PATCH 1/2] futex: Implement sys_futex_waitv() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add support to wait on multiple futexes. This is the interface -implemented by this syscall: - -futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, - unsigned int flags, struct timespec *timeout, clockid_t clockid) - -struct futex_waitv { - __u64 val; - __u64 uaddr; - __u32 flags; - __u32 __reserved; -}; - -Given an array of struct futex_waitv, wait on each uaddr. The thread -wakes if a futex_wake() is performed at any uaddr. The syscall returns -immediately if any waiter has *uaddr != val. *timeout is an optional -absolute timeout value for the operation. This syscall supports only -64bit sized timeout structs. The flags argument of the syscall should be -empty, but it can be used for future extensions. Flags for shared -futexes, sizes, etc. should be used on the individual flags of each -waiter. - -__reserved is used for explicit padding and should be 0, but it might be -used for future extensions. If the userspace uses 32-bit pointers, it -should make sure to explicitly cast it when assigning to waitv::uaddr. - -Returns the array index of one of the woken futexes. There’s no given -information of how many were woken, or any particular attribute of it -(if it’s the first woken, if it is of the smaller index...). - -Signed-off-by: André Almeida <andrealmeid@collabora.com> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.com ---- - include/linux/syscalls.h | 6 + - include/uapi/asm-generic/unistd.h | 5 +- - include/uapi/linux/futex.h | 26 +++ - kernel/futex.c | 334 ++++++++++++++++++++++++++++++ - kernel/sys_ni.c | 1 + - 5 files changed, 371 insertions(+), 1 deletion(-) - -diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h -index 050511e8f1f8..8390482cf082 100644 ---- a/include/linux/syscalls.h -+++ b/include/linux/syscalls.h -@@ -58,6 +58,7 @@ struct mq_attr; - struct compat_stat; - struct old_timeval32; - struct robust_list_head; -+struct futex_waitv; - struct getcpu_cache; - struct old_linux_dirent; - struct perf_event_attr; -@@ -623,6 +624,11 @@ asmlinkage long sys_get_robust_list(int pid, - asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, - size_t len); - -+asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, -+ unsigned int nr_futexes, unsigned int flags, -+ struct __kernel_timespec __user *timeout, clockid_t clockid); -+ -+ - /* kernel/hrtimer.c */ - asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, - struct __kernel_timespec __user *rmtp); -diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h -index d2a942086fcb..3f55ac23cea9 100644 ---- a/include/uapi/asm-generic/unistd.h -+++ b/include/uapi/asm-generic/unistd.h -@@ -880,8 +880,11 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) - #define __NR_process_mrelease 448 - __SYSCALL(__NR_process_mrelease, sys_process_mrelease) - -+#define __NR_futex_waitv 449 -+__SYSCALL(__NR_futex_waitv, sys_futex_waitv) -+ - #undef __NR_syscalls --#define __NR_syscalls 449 -+#define __NR_syscalls 450 - - /* - * 32 bit systems traditionally used different -diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h -index a89eb0accd5e..1666f5e4b837 100644 ---- a/include/uapi/linux/futex.h -+++ b/include/uapi/linux/futex.h -@@ -41,6 +41,32 @@ - #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ - FUTEX_PRIVATE_FLAG) - -+ /* -+ * Flags to specify the bit length of the futex word for futex2 syscalls. -+ * Currently, only 32 is supported. -+ */ -+#define FUTEX_32 2 -+ -+/* -+ * Max numbers of elements in a futex_waitv array -+ */ -+#define FUTEX_WAITV_MAX 128 -+ -+/** -+ * struct futex_waitv - A waiter for vectorized wait -+ * @val: Expected value at uaddr -+ * @uaddr: User address to wait on -+ * @flags: Flags for this waiter -+ * @__reserved: Reserved member to preserve data alignment. Should be 0. -+ */ -+struct futex_waitv { -+ __u64 val; -+ __u64 uaddr; -+ __u32 flags; -+ __u32 __reserved; -+}; -+ -+ - /* - * Support for robust futexes: the kernel cleans up held futexes at - * thread exit time. -diff --git a/kernel/futex.c b/kernel/futex.c -index 408cad5e8968..d7dc0bd9379c 100644 ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -285,6 +285,18 @@ static const struct futex_q futex_q_init = { - .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE), - }; - -+/** -+ * struct futex_vector - Auxiliary struct for futex_waitv() -+ * @w: Userspace provided data -+ * @q: Kernel side data -+ * -+ * Struct used to build an array with all data need for futex_waitv() -+ */ -+struct futex_vector { -+ struct futex_waitv w; -+ struct futex_q q; -+}; -+ - /* - * Hash buckets are shared by all the futex_keys that hash to the same - * location. Each key may have multiple futex_q structures, one for each task -@@ -3962,6 +3974,328 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, - } - #endif /* CONFIG_COMPAT */ - -+/* Mask of available flags for each futex in futex_waitv list */ -+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG) -+ -+/** -+ * futex_parse_waitv - Parse a waitv array from userspace -+ * @futexv: Kernel side list of waiters to be filled -+ * @uwaitv: Userspace list to be parsed -+ * @nr_futexes: Length of futexv -+ * -+ * Return: Error code on failure, 0 on success -+ */ -+static int futex_parse_waitv(struct futex_vector *futexv, -+ struct futex_waitv __user *uwaitv, -+ unsigned int nr_futexes) -+{ -+ struct futex_waitv aux; -+ unsigned int i; -+ -+ for (i = 0; i < nr_futexes; i++) { -+ if (copy_from_user(&aux, &uwaitv[i], sizeof(aux))) -+ return -EFAULT; -+ -+ if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved) -+ return -EINVAL; -+ -+ if (!(aux.flags & FUTEX_32)) -+ return -EINVAL; -+ -+ futexv[i].w.flags = aux.flags; -+ futexv[i].w.val = aux.val; -+ futexv[i].w.uaddr = aux.uaddr; -+ futexv[i].q = futex_q_init; -+ } -+ -+ return 0; -+} -+ -+/** -+ * unqueue_multiple - Remove various futexes from their hash bucket -+ * @v: The list of futexes to unqueue -+ * @count: Number of futexes in the list -+ * -+ * Helper to unqueue a list of futexes. This can't fail. -+ * -+ * Return: -+ * - >=0 - Index of the last futex that was awoken; -+ * - -1 - No futex was awoken -+ */ -+static int unqueue_multiple(struct futex_vector *v, int count) -+{ -+ int ret = -1, i; -+ -+ for (i = 0; i < count; i++) { -+ if (!unqueue_me(&v[i].q)) -+ ret = i; -+ } -+ -+ return ret; -+} -+ -+/** -+ * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes -+ * @vs: The futex list to wait on -+ * @count: The size of the list -+ * @woken: Index of the last woken futex, if any. Used to notify the -+ * caller that it can return this index to userspace (return parameter) -+ * -+ * Prepare multiple futexes in a single step and enqueue them. This may fail if -+ * the futex list is invalid or if any futex was already awoken. On success the -+ * task is ready to interruptible sleep. -+ * -+ * Return: -+ * - 1 - One of the futexes was woken by another thread -+ * - 0 - Success -+ * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL -+ */ -+static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken) -+{ -+ struct futex_hash_bucket *hb; -+ bool retry = false; -+ int ret, i; -+ u32 uval; -+ -+ /* -+ * Enqueuing multiple futexes is tricky, because we need to enqueue -+ * each futex on the list before dealing with the next one to avoid -+ * deadlocking on the hash bucket. But, before enqueuing, we need to -+ * make sure that current->state is TASK_INTERRUPTIBLE, so we don't -+ * lose any wake events, which cannot be done before the get_futex_key -+ * of the next key, because it calls get_user_pages, which can sleep. -+ * Thus, we fetch the list of futexes keys in two steps, by first -+ * pinning all the memory keys in the futex key, and only then we read -+ * each key and queue the corresponding futex. -+ * -+ * Private futexes doesn't need to recalculate hash in retry, so skip -+ * get_futex_key() when retrying. -+ */ -+retry: -+ for (i = 0; i < count; i++) { -+ if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry) -+ continue; -+ -+ ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr), -+ !(vs[i].w.flags & FUTEX_PRIVATE_FLAG), -+ &vs[i].q.key, FUTEX_READ); -+ -+ if (unlikely(ret)) -+ return ret; -+ } -+ -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ for (i = 0; i < count; i++) { -+ u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr; -+ struct futex_q *q = &vs[i].q; -+ u32 val = (u32)vs[i].w.val; -+ -+ hb = queue_lock(q); -+ ret = get_futex_value_locked(&uval, uaddr); -+ -+ if (!ret && uval == val) { -+ /* -+ * The bucket lock can't be held while dealing with the -+ * next futex. Queue each futex at this moment so hb can -+ * be unlocked. -+ */ -+ queue_me(q, hb); -+ continue; -+ } -+ -+ queue_unlock(hb); -+ __set_current_state(TASK_RUNNING); -+ -+ /* -+ * Even if something went wrong, if we find out that a futex -+ * was woken, we don't return error and return this index to -+ * userspace -+ */ -+ *woken = unqueue_multiple(vs, i); -+ if (*woken >= 0) -+ return 1; -+ -+ if (ret) { -+ /* -+ * If we need to handle a page fault, we need to do so -+ * without any lock and any enqueued futex (otherwise -+ * we could lose some wakeup). So we do it here, after -+ * undoing all the work done so far. In success, we -+ * retry all the work. -+ */ -+ if (get_user(uval, uaddr)) -+ return -EFAULT; -+ -+ retry = true; -+ goto retry; -+ } -+ -+ if (uval != val) -+ return -EWOULDBLOCK; -+ } -+ -+ return 0; -+} -+ -+/** -+ * futex_sleep_multiple - Check sleeping conditions and sleep -+ * @vs: List of futexes to wait for -+ * @count: Length of vs -+ * @to: Timeout -+ * -+ * Sleep if and only if the timeout hasn't expired and no futex on the list has -+ * been woken up. -+ */ -+static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count, -+ struct hrtimer_sleeper *to) -+{ -+ if (to && !to->task) -+ return; -+ -+ for (; count; count--, vs++) { -+ if (!READ_ONCE(vs->q.lock_ptr)) -+ return; -+ } -+ -+ freezable_schedule(); -+} -+ -+/** -+ * futex_wait_multiple - Prepare to wait on and enqueue several futexes -+ * @vs: The list of futexes to wait on -+ * @count: The number of objects -+ * @to: Timeout before giving up and returning to userspace -+ * -+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function -+ * sleeps on a group of futexes and returns on the first futex that is -+ * wake, or after the timeout has elapsed. -+ * -+ * Return: -+ * - >=0 - Hint to the futex that was awoken -+ * - <0 - On error -+ */ -+int futex_wait_multiple(struct futex_vector *vs, unsigned int count, -+ struct hrtimer_sleeper *to) -+{ -+ int ret, hint = 0; -+ -+ if (to) -+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS); -+ -+ while (1) { -+ ret = futex_wait_multiple_setup(vs, count, &hint); -+ if (ret) { -+ if (ret > 0) { -+ /* A futex was woken during setup */ -+ ret = hint; -+ } -+ return ret; -+ } -+ -+ futex_sleep_multiple(vs, count, to); -+ -+ __set_current_state(TASK_RUNNING); -+ -+ ret = unqueue_multiple(vs, count); -+ if (ret >= 0) -+ return ret; -+ -+ if (to && !to->task) -+ return -ETIMEDOUT; -+ else if (signal_pending(current)) -+ return -ERESTARTSYS; -+ /* -+ * The final case is a spurious wakeup, for -+ * which just retry. -+ */ -+ } -+} -+/* Mask of available flags for each futex in futex_waitv list */ -+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG) -+ -+/** -+ * sys_futex_waitv - Wait on a list of futexes -+ * @waiters: List of futexes to wait on -+ * @nr_futexes: Length of futexv -+ * @flags: Flag for timeout (monotonic/realtime) -+ * @timeout: Optional absolute timeout. -+ * @clockid: Clock to be used for the timeout, realtime or monotonic. -+ * -+ * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes -+ * if a futex_wake() is performed at any uaddr. The syscall returns immediately -+ * if any waiter has *uaddr != val. *timeout is an optional timeout value for -+ * the operation. Each waiter has individual flags. The `flags` argument for -+ * the syscall should be used solely for specifying the timeout as realtime, if -+ * needed. Flags for private futexes, sizes, etc. should be used on the -+ * individual flags of each waiter. -+ * -+ * Returns the array index of one of the woken futexes. No further information -+ * is provided: any number of other futexes may also have been woken by the -+ * same event, and if more than one futex was woken, the retrned index may -+ * refer to any one of them. (It is not necessaryily the futex with the -+ * smallest index, nor the one most recently woken, nor...) -+ */ -+ -+SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, -+ unsigned int, nr_futexes, unsigned int, flags, -+ struct __kernel_timespec __user *, timeout, clockid_t, clockid) -+{ -+ struct hrtimer_sleeper to; -+ struct futex_vector *futexv; -+ struct timespec64 ts; -+ ktime_t time; -+ int ret; -+ -+ /* This syscall supports no flags for now */ -+ if (flags) -+ return -EINVAL; -+ -+ if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters) -+ return -EINVAL; -+ -+ if (timeout) { -+ int flag_clkid = 0, flag_init = 0; -+ -+ if (clockid == CLOCK_REALTIME) { -+ flag_clkid = FLAGS_CLOCKRT; -+ flag_init = FUTEX_CLOCK_REALTIME; -+ } -+ -+ if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) -+ return -EINVAL; -+ -+ if (get_timespec64(&ts, timeout)) -+ return -EFAULT; -+ -+ /* -+ * Since there's no opcode for futex_waitv, use -+ * FUTEX_WAIT_BITSET that uses absolute timeout as well -+ */ -+ ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); -+ if (ret) -+ return ret; -+ -+ futex_setup_timer(&time, &to, flag_clkid, 0); -+ } -+ -+ futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); -+ if (!futexv) -+ return -ENOMEM; -+ -+ ret = futex_parse_waitv(futexv, waiters, nr_futexes); -+ if (!ret) -+ ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL); -+ -+ if (timeout) { -+ hrtimer_cancel(&to.timer); -+ destroy_hrtimer_on_stack(&to.timer); -+ } -+ -+ kfree(futexv); -+ return ret; -+} -+ - #ifdef CONFIG_COMPAT_32BIT_TIME - SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, - const struct old_timespec32 __user *, utime, u32 __user *, uaddr2, -diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c -index 0ea8128468c3..0979fac9414d 100644 ---- a/kernel/sys_ni.c -+++ b/kernel/sys_ni.c -@@ -150,6 +150,7 @@ COND_SYSCALL(set_robust_list); - COND_SYSCALL_COMPAT(set_robust_list); - COND_SYSCALL(get_robust_list); - COND_SYSCALL_COMPAT(get_robust_list); -+COND_SYSCALL(futex_waitv); - - /* kernel/hrtimer.c */ - --- -2.33.1 - -From 4e40f3886e134f33c50ca79bc8b323cea784bd78 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com> -Date: Thu, 23 Sep 2021 14:11:06 -0300 -Subject: [PATCH 2/2] futex,x86: Wire up sys_futex_waitv() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Wire up syscall entry point for x86 arch, for both i386 and x86_64. - -Signed-off-by: André Almeida <andrealmeid@collabora.com> -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lore.kernel.org/r/20210923171111.300673-18-andrealmeid@collabora.com ---- - arch/x86/entry/syscalls/syscall_32.tbl | 1 + - arch/x86/entry/syscalls/syscall_64.tbl | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl -index 4bbc267fb36b..b2b9b9df1355 100644 ---- a/arch/x86/entry/syscalls/syscall_32.tbl -+++ b/arch/x86/entry/syscalls/syscall_32.tbl -@@ -453,3 +453,4 @@ - 446 i386 landlock_restrict_self sys_landlock_restrict_self - 447 i386 memfd_secret sys_memfd_secret - 448 i386 process_mrelease sys_process_mrelease -+449 i386 futex_waitv sys_futex_waitv -diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl -index ce18119ea0d0..bfd4e8f5be34 100644 ---- a/arch/x86/entry/syscalls/syscall_64.tbl -+++ b/arch/x86/entry/syscalls/syscall_64.tbl -@@ -370,6 +370,7 @@ - 446 common landlock_restrict_self sys_landlock_restrict_self - 447 common memfd_secret sys_memfd_secret - 448 common process_mrelease sys_process_mrelease -+449 common futex_waitv sys_futex_waitv - - # - # Due to a historical design error, certain syscalls are numbered differently --- -2.33.1 - |