summaryrefslogtreecommitdiff
path: root/SOURCES/futex2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'SOURCES/futex2.patch')
-rw-r--r--SOURCES/futex2.patch536
1 files changed, 0 insertions, 536 deletions
diff --git a/SOURCES/futex2.patch b/SOURCES/futex2.patch
deleted file mode 100644
index 0813182..0000000
--- a/SOURCES/futex2.patch
+++ /dev/null
@@ -1,536 +0,0 @@
-From 4901e29e3c0237c52eadd2c82deb9bd6e7add5ac Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
-Date: Thu, 23 Sep 2021 14:11:05 -0300
-Subject: [PATCH 1/2] futex: Implement sys_futex_waitv()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Add support to wait on multiple futexes. This is the interface
-implemented by this syscall:
-
-futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
- unsigned int flags, struct timespec *timeout, clockid_t clockid)
-
-struct futex_waitv {
- __u64 val;
- __u64 uaddr;
- __u32 flags;
- __u32 __reserved;
-};
-
-Given an array of struct futex_waitv, wait on each uaddr. The thread
-wakes if a futex_wake() is performed at any uaddr. The syscall returns
-immediately if any waiter has *uaddr != val. *timeout is an optional
-absolute timeout value for the operation. This syscall supports only
-64bit sized timeout structs. The flags argument of the syscall should be
-empty, but it can be used for future extensions. Flags for shared
-futexes, sizes, etc. should be used on the individual flags of each
-waiter.
-
-__reserved is used for explicit padding and should be 0, but it might be
-used for future extensions. If the userspace uses 32-bit pointers, it
-should make sure to explicitly cast it when assigning to waitv::uaddr.
-
-Returns the array index of one of the woken futexes. There’s no given
-information of how many were woken, or any particular attribute of it
-(if it’s the first woken, if it is of the smaller index...).
-
-Signed-off-by: André Almeida <andrealmeid@collabora.com>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.com
----
- include/linux/syscalls.h | 6 +
- include/uapi/asm-generic/unistd.h | 5 +-
- include/uapi/linux/futex.h | 26 +++
- kernel/futex.c | 334 ++++++++++++++++++++++++++++++
- kernel/sys_ni.c | 1 +
- 5 files changed, 371 insertions(+), 1 deletion(-)
-
-diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
-index 050511e8f1f8..8390482cf082 100644
---- a/include/linux/syscalls.h
-+++ b/include/linux/syscalls.h
-@@ -58,6 +58,7 @@ struct mq_attr;
- struct compat_stat;
- struct old_timeval32;
- struct robust_list_head;
-+struct futex_waitv;
- struct getcpu_cache;
- struct old_linux_dirent;
- struct perf_event_attr;
-@@ -623,6 +624,11 @@ asmlinkage long sys_get_robust_list(int pid,
- asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
- size_t len);
-
-+asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
-+ unsigned int nr_futexes, unsigned int flags,
-+ struct __kernel_timespec __user *timeout, clockid_t clockid);
-+
-+
- /* kernel/hrtimer.c */
- asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
- struct __kernel_timespec __user *rmtp);
-diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
-index d2a942086fcb..3f55ac23cea9 100644
---- a/include/uapi/asm-generic/unistd.h
-+++ b/include/uapi/asm-generic/unistd.h
-@@ -880,8 +880,11 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
- #define __NR_process_mrelease 448
- __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
-
-+#define __NR_futex_waitv 449
-+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
-+
- #undef __NR_syscalls
--#define __NR_syscalls 449
-+#define __NR_syscalls 450
-
- /*
- * 32 bit systems traditionally used different
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index a89eb0accd5e..1666f5e4b837 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -41,6 +41,32 @@
- #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
- FUTEX_PRIVATE_FLAG)
-
-+ /*
-+ * Flags to specify the bit length of the futex word for futex2 syscalls.
-+ * Currently, only 32 is supported.
-+ */
-+#define FUTEX_32 2
-+
-+/*
-+ * Max numbers of elements in a futex_waitv array
-+ */
-+#define FUTEX_WAITV_MAX 128
-+
-+/**
-+ * struct futex_waitv - A waiter for vectorized wait
-+ * @val: Expected value at uaddr
-+ * @uaddr: User address to wait on
-+ * @flags: Flags for this waiter
-+ * @__reserved: Reserved member to preserve data alignment. Should be 0.
-+ */
-+struct futex_waitv {
-+ __u64 val;
-+ __u64 uaddr;
-+ __u32 flags;
-+ __u32 __reserved;
-+};
-+
-+
- /*
- * Support for robust futexes: the kernel cleans up held futexes at
- * thread exit time.
-diff --git a/kernel/futex.c b/kernel/futex.c
-index 408cad5e8968..d7dc0bd9379c 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -285,6 +285,18 @@ static const struct futex_q futex_q_init = {
- .requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
- };
-
-+/**
-+ * struct futex_vector - Auxiliary struct for futex_waitv()
-+ * @w: Userspace provided data
-+ * @q: Kernel side data
-+ *
-+ * Struct used to build an array with all data need for futex_waitv()
-+ */
-+struct futex_vector {
-+ struct futex_waitv w;
-+ struct futex_q q;
-+};
-+
- /*
- * Hash buckets are shared by all the futex_keys that hash to the same
- * location. Each key may have multiple futex_q structures, one for each task
-@@ -3962,6 +3974,328 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
- }
- #endif /* CONFIG_COMPAT */
-
-+/* Mask of available flags for each futex in futex_waitv list */
-+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
-+
-+/**
-+ * futex_parse_waitv - Parse a waitv array from userspace
-+ * @futexv: Kernel side list of waiters to be filled
-+ * @uwaitv: Userspace list to be parsed
-+ * @nr_futexes: Length of futexv
-+ *
-+ * Return: Error code on failure, 0 on success
-+ */
-+static int futex_parse_waitv(struct futex_vector *futexv,
-+ struct futex_waitv __user *uwaitv,
-+ unsigned int nr_futexes)
-+{
-+ struct futex_waitv aux;
-+ unsigned int i;
-+
-+ for (i = 0; i < nr_futexes; i++) {
-+ if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
-+ return -EFAULT;
-+
-+ if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved)
-+ return -EINVAL;
-+
-+ if (!(aux.flags & FUTEX_32))
-+ return -EINVAL;
-+
-+ futexv[i].w.flags = aux.flags;
-+ futexv[i].w.val = aux.val;
-+ futexv[i].w.uaddr = aux.uaddr;
-+ futexv[i].q = futex_q_init;
-+ }
-+
-+ return 0;
-+}
-+
-+/**
-+ * unqueue_multiple - Remove various futexes from their hash bucket
-+ * @v: The list of futexes to unqueue
-+ * @count: Number of futexes in the list
-+ *
-+ * Helper to unqueue a list of futexes. This can't fail.
-+ *
-+ * Return:
-+ * - >=0 - Index of the last futex that was awoken;
-+ * - -1 - No futex was awoken
-+ */
-+static int unqueue_multiple(struct futex_vector *v, int count)
-+{
-+ int ret = -1, i;
-+
-+ for (i = 0; i < count; i++) {
-+ if (!unqueue_me(&v[i].q))
-+ ret = i;
-+ }
-+
-+ return ret;
-+}
-+
-+/**
-+ * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes
-+ * @vs: The futex list to wait on
-+ * @count: The size of the list
-+ * @woken: Index of the last woken futex, if any. Used to notify the
-+ * caller that it can return this index to userspace (return parameter)
-+ *
-+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
-+ * the futex list is invalid or if any futex was already awoken. On success the
-+ * task is ready to interruptible sleep.
-+ *
-+ * Return:
-+ * - 1 - One of the futexes was woken by another thread
-+ * - 0 - Success
-+ * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
-+ */
-+static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
-+{
-+ struct futex_hash_bucket *hb;
-+ bool retry = false;
-+ int ret, i;
-+ u32 uval;
-+
-+ /*
-+ * Enqueuing multiple futexes is tricky, because we need to enqueue
-+ * each futex on the list before dealing with the next one to avoid
-+ * deadlocking on the hash bucket. But, before enqueuing, we need to
-+ * make sure that current->state is TASK_INTERRUPTIBLE, so we don't
-+ * lose any wake events, which cannot be done before the get_futex_key
-+ * of the next key, because it calls get_user_pages, which can sleep.
-+ * Thus, we fetch the list of futexes keys in two steps, by first
-+ * pinning all the memory keys in the futex key, and only then we read
-+ * each key and queue the corresponding futex.
-+ *
-+ * Private futexes doesn't need to recalculate hash in retry, so skip
-+ * get_futex_key() when retrying.
-+ */
-+retry:
-+ for (i = 0; i < count; i++) {
-+ if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry)
-+ continue;
-+
-+ ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr),
-+ !(vs[i].w.flags & FUTEX_PRIVATE_FLAG),
-+ &vs[i].q.key, FUTEX_READ);
-+
-+ if (unlikely(ret))
-+ return ret;
-+ }
-+
-+ set_current_state(TASK_INTERRUPTIBLE);
-+
-+ for (i = 0; i < count; i++) {
-+ u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
-+ struct futex_q *q = &vs[i].q;
-+ u32 val = (u32)vs[i].w.val;
-+
-+ hb = queue_lock(q);
-+ ret = get_futex_value_locked(&uval, uaddr);
-+
-+ if (!ret && uval == val) {
-+ /*
-+ * The bucket lock can't be held while dealing with the
-+ * next futex. Queue each futex at this moment so hb can
-+ * be unlocked.
-+ */
-+ queue_me(q, hb);
-+ continue;
-+ }
-+
-+ queue_unlock(hb);
-+ __set_current_state(TASK_RUNNING);
-+
-+ /*
-+ * Even if something went wrong, if we find out that a futex
-+ * was woken, we don't return error and return this index to
-+ * userspace
-+ */
-+ *woken = unqueue_multiple(vs, i);
-+ if (*woken >= 0)
-+ return 1;
-+
-+ if (ret) {
-+ /*
-+ * If we need to handle a page fault, we need to do so
-+ * without any lock and any enqueued futex (otherwise
-+ * we could lose some wakeup). So we do it here, after
-+ * undoing all the work done so far. In success, we
-+ * retry all the work.
-+ */
-+ if (get_user(uval, uaddr))
-+ return -EFAULT;
-+
-+ retry = true;
-+ goto retry;
-+ }
-+
-+ if (uval != val)
-+ return -EWOULDBLOCK;
-+ }
-+
-+ return 0;
-+}
-+
-+/**
-+ * futex_sleep_multiple - Check sleeping conditions and sleep
-+ * @vs: List of futexes to wait for
-+ * @count: Length of vs
-+ * @to: Timeout
-+ *
-+ * Sleep if and only if the timeout hasn't expired and no futex on the list has
-+ * been woken up.
-+ */
-+static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count,
-+ struct hrtimer_sleeper *to)
-+{
-+ if (to && !to->task)
-+ return;
-+
-+ for (; count; count--, vs++) {
-+ if (!READ_ONCE(vs->q.lock_ptr))
-+ return;
-+ }
-+
-+ freezable_schedule();
-+}
-+
-+/**
-+ * futex_wait_multiple - Prepare to wait on and enqueue several futexes
-+ * @vs: The list of futexes to wait on
-+ * @count: The number of objects
-+ * @to: Timeout before giving up and returning to userspace
-+ *
-+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
-+ * sleeps on a group of futexes and returns on the first futex that is
-+ * wake, or after the timeout has elapsed.
-+ *
-+ * Return:
-+ * - >=0 - Hint to the futex that was awoken
-+ * - <0 - On error
-+ */
-+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
-+ struct hrtimer_sleeper *to)
-+{
-+ int ret, hint = 0;
-+
-+ if (to)
-+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
-+
-+ while (1) {
-+ ret = futex_wait_multiple_setup(vs, count, &hint);
-+ if (ret) {
-+ if (ret > 0) {
-+ /* A futex was woken during setup */
-+ ret = hint;
-+ }
-+ return ret;
-+ }
-+
-+ futex_sleep_multiple(vs, count, to);
-+
-+ __set_current_state(TASK_RUNNING);
-+
-+ ret = unqueue_multiple(vs, count);
-+ if (ret >= 0)
-+ return ret;
-+
-+ if (to && !to->task)
-+ return -ETIMEDOUT;
-+ else if (signal_pending(current))
-+ return -ERESTARTSYS;
-+ /*
-+ * The final case is a spurious wakeup, for
-+ * which just retry.
-+ */
-+ }
-+}
-+/* Mask of available flags for each futex in futex_waitv list */
-+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
-+
-+/**
-+ * sys_futex_waitv - Wait on a list of futexes
-+ * @waiters: List of futexes to wait on
-+ * @nr_futexes: Length of futexv
-+ * @flags: Flag for timeout (monotonic/realtime)
-+ * @timeout: Optional absolute timeout.
-+ * @clockid: Clock to be used for the timeout, realtime or monotonic.
-+ *
-+ * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
-+ * if a futex_wake() is performed at any uaddr. The syscall returns immediately
-+ * if any waiter has *uaddr != val. *timeout is an optional timeout value for
-+ * the operation. Each waiter has individual flags. The `flags` argument for
-+ * the syscall should be used solely for specifying the timeout as realtime, if
-+ * needed. Flags for private futexes, sizes, etc. should be used on the
-+ * individual flags of each waiter.
-+ *
-+ * Returns the array index of one of the woken futexes. No further information
-+ * is provided: any number of other futexes may also have been woken by the
-+ * same event, and if more than one futex was woken, the retrned index may
-+ * refer to any one of them. (It is not necessaryily the futex with the
-+ * smallest index, nor the one most recently woken, nor...)
-+ */
-+
-+SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
-+ unsigned int, nr_futexes, unsigned int, flags,
-+ struct __kernel_timespec __user *, timeout, clockid_t, clockid)
-+{
-+ struct hrtimer_sleeper to;
-+ struct futex_vector *futexv;
-+ struct timespec64 ts;
-+ ktime_t time;
-+ int ret;
-+
-+ /* This syscall supports no flags for now */
-+ if (flags)
-+ return -EINVAL;
-+
-+ if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
-+ return -EINVAL;
-+
-+ if (timeout) {
-+ int flag_clkid = 0, flag_init = 0;
-+
-+ if (clockid == CLOCK_REALTIME) {
-+ flag_clkid = FLAGS_CLOCKRT;
-+ flag_init = FUTEX_CLOCK_REALTIME;
-+ }
-+
-+ if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
-+ return -EINVAL;
-+
-+ if (get_timespec64(&ts, timeout))
-+ return -EFAULT;
-+
-+ /*
-+ * Since there's no opcode for futex_waitv, use
-+ * FUTEX_WAIT_BITSET that uses absolute timeout as well
-+ */
-+ ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
-+ if (ret)
-+ return ret;
-+
-+ futex_setup_timer(&time, &to, flag_clkid, 0);
-+ }
-+
-+ futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
-+ if (!futexv)
-+ return -ENOMEM;
-+
-+ ret = futex_parse_waitv(futexv, waiters, nr_futexes);
-+ if (!ret)
-+ ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
-+
-+ if (timeout) {
-+ hrtimer_cancel(&to.timer);
-+ destroy_hrtimer_on_stack(&to.timer);
-+ }
-+
-+ kfree(futexv);
-+ return ret;
-+}
-+
- #ifdef CONFIG_COMPAT_32BIT_TIME
- SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
- const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
-diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
-index 0ea8128468c3..0979fac9414d 100644
---- a/kernel/sys_ni.c
-+++ b/kernel/sys_ni.c
-@@ -150,6 +150,7 @@ COND_SYSCALL(set_robust_list);
- COND_SYSCALL_COMPAT(set_robust_list);
- COND_SYSCALL(get_robust_list);
- COND_SYSCALL_COMPAT(get_robust_list);
-+COND_SYSCALL(futex_waitv);
-
- /* kernel/hrtimer.c */
-
---
-2.33.1
-
-From 4e40f3886e134f33c50ca79bc8b323cea784bd78 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
-Date: Thu, 23 Sep 2021 14:11:06 -0300
-Subject: [PATCH 2/2] futex,x86: Wire up sys_futex_waitv()
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Wire up syscall entry point for x86 arch, for both i386 and x86_64.
-
-Signed-off-by: André Almeida <andrealmeid@collabora.com>
-Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
-Link: https://lore.kernel.org/r/20210923171111.300673-18-andrealmeid@collabora.com
----
- arch/x86/entry/syscalls/syscall_32.tbl | 1 +
- arch/x86/entry/syscalls/syscall_64.tbl | 1 +
- 2 files changed, 2 insertions(+)
-
-diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
-index 4bbc267fb36b..b2b9b9df1355 100644
---- a/arch/x86/entry/syscalls/syscall_32.tbl
-+++ b/arch/x86/entry/syscalls/syscall_32.tbl
-@@ -453,3 +453,4 @@
- 446 i386 landlock_restrict_self sys_landlock_restrict_self
- 447 i386 memfd_secret sys_memfd_secret
- 448 i386 process_mrelease sys_process_mrelease
-+449 i386 futex_waitv sys_futex_waitv
-diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
-index ce18119ea0d0..bfd4e8f5be34 100644
---- a/arch/x86/entry/syscalls/syscall_64.tbl
-+++ b/arch/x86/entry/syscalls/syscall_64.tbl
-@@ -370,6 +370,7 @@
- 446 common landlock_restrict_self sys_landlock_restrict_self
- 447 common memfd_secret sys_memfd_secret
- 448 common process_mrelease sys_process_mrelease
-+449 common futex_waitv sys_futex_waitv
-
- #
- # Due to a historical design error, certain syscalls are numbered differently
---
-2.33.1
-