From 14a106cc87e6d03169ac8c7ea030e3d7fac2dfe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Wed, 5 Aug 2020 12:40:26 -0300 Subject: [PATCH 1/9] futex2: Add new futex interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial implementation for futex2. Support only private u32 wait/wake, with timeout (monotonic and realtime clocks). Signed-off-by: André Almeida Signed-off-by: Jan200101 --- MAINTAINERS | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 2 + arch/x86/entry/syscalls/syscall_64.tbl | 2 + include/linux/syscalls.h | 7 + include/uapi/asm-generic/unistd.h | 8 +- include/uapi/linux/futex.h | 40 ++ init/Kconfig | 7 + kernel/Makefile | 1 + kernel/futex2.c | 484 ++++++++++++++++++ kernel/sys_ni.c | 4 + tools/include/uapi/asm-generic/unistd.h | 9 +- .../arch/x86/entry/syscalls/syscall_64.tbl | 2 + 12 files changed, 565 insertions(+), 3 deletions(-) create mode 100644 kernel/futex2.c diff --git a/MAINTAINERS b/MAINTAINERS index 2daa6ee67..855d38511 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7259,7 +7259,7 @@ F: Documentation/locking/*futex* F: include/asm-generic/futex.h F: include/linux/futex.h F: include/uapi/linux/futex.h -F: kernel/futex.c +F: kernel/futex* F: tools/perf/bench/futex* F: tools/testing/selftests/futex/ diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 0d0667a9f..83a75ff39 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -445,3 +445,5 @@ 438 i386 pidfd_getfd sys_pidfd_getfd 439 i386 faccessat2 sys_faccessat2 440 i386 process_madvise sys_process_madvise +441 i386 futex_wait sys_futex_wait +442 i386 futex_wake sys_futex_wake diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 379819244..6658fd63c 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -362,6 +362,8 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common futex_wait sys_futex_wait +442 common futex_wake sys_futex_wake # # Due to a historical design error, certain syscalls are numbered differently diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 37bea07c1..b6b77cf2b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -589,6 +589,13 @@ asmlinkage long sys_get_robust_list(int pid, asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); +/* kernel/futex2.c */ +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, + unsigned long flags, + struct __kernel_timespec __user __user *timo); +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long nr_wake, + unsigned long flags); + /* kernel/hrtimer.c */ asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 205631898..ae47d6a9e 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -860,8 +860,14 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2) #define __NR_process_madvise 440 __SYSCALL(__NR_process_madvise, sys_process_madvise) +#define __NR_futex_wait 441 +__SYSCALL(__NR_futex_wait, sys_futex_wait) + +#define __NR_futex_wake 442 +__SYSCALL(__NR_futex_wake, sys_futex_wake) + #undef __NR_syscalls -#define __NR_syscalls 441 +#define __NR_syscalls 443 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index a89eb0acc..35a5bf1cd 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -41,6 +41,46 @@ #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) +/* Size argument to futex2 syscall */ +#define FUTEX_8 0 +#define FUTEX_16 1 +#define FUTEX_32 2 + +#define FUTEX_SIZE_MASK 0x3 + +#define FUTEX_SHARED_FLAG 8 + +#define FUTEX_NUMA_FLAG 16 + +/* + * struct futexXX_numa - struct for NUMA-aware futex operation + * @value: futex value + * @hint: node id to operate + */ + +struct futex8_numa { + __u8 value; + __u8 hint; +}; + +struct futex16_numa { + __u16 value; + __u16 hint; +}; + +struct futex32_numa { + __u32 value; + __u32 hint; +}; + +#define FUTEX_WAITV_MAX 128 + +struct futex_waitv { + void *uaddr; + unsigned int val; + unsigned int flags; +}; + /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. diff --git a/init/Kconfig b/init/Kconfig index 02d13ae27..1264687ea 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1522,6 +1522,13 @@ config FUTEX support for "fast userspace mutexes". The resulting kernel may not run glibc-based applications correctly. +config FUTEX2 + bool "Enable futex2 support" if EXPERT + depends on FUTEX + default y + help + Experimental support for futex2 interface. + config FUTEX_PI bool depends on FUTEX && RT_MUTEXES diff --git a/kernel/Makefile b/kernel/Makefile index af601b9bd..bb7f33986 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ obj-$(CONFIG_FUTEX) += futex.o +obj-$(CONFIG_FUTEX2) += futex2.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += smp.o ifneq ($(CONFIG_SMP),y) diff --git a/kernel/futex2.c b/kernel/futex2.c new file mode 100644 index 000000000..107b80a46 --- /dev/null +++ b/kernel/futex2.c @@ -0,0 +1,484 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * futex2 system call interface by André Almeida + * + * Copyright 2020 Collabora Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include + +/** + * struct futex_waiter - List entry for a waiter + * @key.address: Memory address of userspace futex + * @key.mm: Pointer to memory management struct of this process + * @key: Stores information that uniquely identify a futex + * @list: List node struct + * @val: Expected value for this waiter + * @flags: Flags + * @bucket: Pointer to the bucket for this waiter + * @index: Index of waiter in futexv list + */ +struct futex_waiter { + struct futex_key { + uintptr_t address; + struct mm_struct *mm; + } key; + struct list_head list; + unsigned int val; + unsigned int flags; + struct futex_bucket *bucket; + unsigned int index; +}; + +/** + * struct futex_bucket - A bucket of futex's hash table + * @waiters: Number of waiters in the bucket + * @lock: Bucket lock + * @list: List of waiters on this bucket + */ +struct futex_bucket { + atomic_t waiters; + spinlock_t lock; + struct list_head list; +}; + +struct futexv { + struct task_struct *task; + int hint; + struct futex_waiter objects[0]; +}; + +struct futex_single_waiter { + struct futexv parent; + struct futex_waiter waiter; +} __packed; + +struct futex_bucket *futex_table; + +/* mask for futex2 flag operations */ +#define FUTEX2_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG | \ + FUTEX_CLOCK_REALTIME) + +// mask for sys_futex_waitv +#define FUTEXV_MASK (FUTEX_CLOCK_REALTIME) + +// mask for each futex in futex_waitv list +#define FUTEXV_WAITER_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG) + +int futex2_hashsize; + +/* + * Reflects a new waiter being added to the waitqueue. + */ +static inline void bucket_inc_waiters(struct futex_bucket *bucket) +{ +#ifdef CONFIG_SMP + atomic_inc(&bucket->waiters); + /* + * Full barrier (A), see the ordering comment above. + */ + smp_mb__after_atomic(); +#endif +} + +/* + * Reflects a waiter being removed from the waitqueue by wakeup + * paths. + */ +static inline void bucket_dec_waiters(struct futex_bucket *bucket) +{ +#ifdef CONFIG_SMP + atomic_dec(&bucket->waiters); +#endif +} + +/* + * Get the number of waiters in a bucket + */ +static inline int bucket_get_waiters(struct futex_bucket *bucket) +{ +#ifdef CONFIG_SMP + /* + * Full barrier (B), see the ordering comment above. + */ + smp_mb(); + return atomic_read(&bucket->waiters); +#else + return 1; +#endif +} + +/** + * futex_get_bucket - Check if the user address is valid, prepare internal + * data and calculate the hash + * @uaddr: futex user address + * @key: data that uniquely identifies a futex + * + * Return: address of bucket on success, error code otherwise + */ +static struct futex_bucket *futex_get_bucket(void __user *uaddr, + struct futex_key *key) +{ + uintptr_t address = (uintptr_t) uaddr; + u32 hash_key; + + /* Checking if uaddr is valid and accessible */ + if (unlikely(!IS_ALIGNED(address, sizeof(u32)))) + return ERR_PTR(-EINVAL); + if (unlikely(!access_ok(address, sizeof(u32)))) + return ERR_PTR(-EFAULT); + + key->address = address; + key->mm = current->mm; + + /* Generate hash key for this futex using uaddr and current->mm */ + hash_key = jhash2((u32 *) key, sizeof(*key) / sizeof(u32), 0); + + /* Since HASH_SIZE is 2^n, subtracting 1 makes a perfect bit mask */ + return &futex_table[hash_key & (futex2_hashsize - 1)]; +} + +/** + * futex_get_user - Get the userspace value on this address + * @uval: variable to store the value + * @uaddr: userspace address + * + * Check the comment at futex_get_user_val for more information. + */ +static int futex_get_user(u32 *uval, u32 *uaddr) +{ + int ret; + + pagefault_disable(); + ret = __get_user(*uval, uaddr); + pagefault_enable(); + + return ret; +} + +/** + * futex_setup_time - Prepare the timeout mechanism, without starting it. + * @timo: Timeout value from userspace + * @timeout: Pointer to hrtimer handler + * @flags: Flags from userspace, to decide which clockid to use + * + * Return: 0 on success, error code otherwise + */ +static int futex_setup_time(struct __kernel_timespec __user *timo, + struct hrtimer_sleeper *timeout, + unsigned int flags) +{ + ktime_t time; + struct timespec64 ts; + clockid_t clockid = (flags & FUTEX_CLOCK_REALTIME) ? + CLOCK_REALTIME : CLOCK_MONOTONIC; + + if (get_timespec64(&ts, timo)) + return -EFAULT; + + if (!timespec64_valid(&ts)) + return -EINVAL; + + time = timespec64_to_ktime(ts); + + hrtimer_init_sleeper(timeout, clockid, HRTIMER_MODE_ABS); + + hrtimer_set_expires(&timeout->timer, time); + + return 0; +} + + +/** + * futex_get_user_value - Get the value from the userspace address and compares + * with the expected one. In success, leaves the function + * holding the bucket lock. Else, hold no lock. + * @bucket: hash bucket of this address + * @uaddr: futex's userspace address + * @val: expected value + * @multiple: is this call in the wait on multiple path + * + * Return: 0 on success, error code otherwise + */ +static int futex_get_user_value(struct futex_bucket *bucket, u32 __user *uaddr, + unsigned int val, bool multiple) +{ + u32 uval; + int ret; + + /* + * Get the value from user futex address. + * + * Since we are in a hurry, we use a spin lock and we can't sleep. + * Try to get the value with page fault disabled (when enable, we might + * sleep). + * + * If we fail, we aren't sure if the address is invalid or is just a + * page fault. Then, release the lock (so we can sleep) and try to get + * the value with page fault enabled. In order to trigger a page fault + * handling, we just call __get_user() again. + * + * If get_user succeeds, this mean that the address is valid and we do + * the loop again. Since we just handled the page fault, the page is + * likely pinned in memory and we should be luckier this time and be + * able to get the value. If we fail anyway, we will try again. + * + * If even with page faults enabled we get and error, this means that + * the address is not valid and we return from the syscall. + */ + do { + spin_lock(&bucket->lock); + + ret = futex_get_user(&uval, uaddr); + + if (ret) { + spin_unlock(&bucket->lock); + if (multiple || __get_user(uval, uaddr)) + return -EFAULT; + + } + } while (ret); + + if (uval != val) { + spin_unlock(&bucket->lock); + return -EWOULDBLOCK; + } + + return 0; +} + +/** + * futex_dequeue - Remove a futex from a queue + * @bucket: current bucket holding the futex + * @waiter: futex to be removed + * + * Return: True if futex was removed by this function, false if another wake + * thread removed this futex. + * + * This function should be used after we found that this futex was in a queue. + * Thus, it needs to be removed before the next step. However, someone could + * wake it between the time of the first check and the time to get the lock for + * the bucket. Check one more time if the futex is there with the bucket locked. + * If it's there, just remove it and return true. Else, mark the removal as + * false and do nothing. + */ +static bool futex_dequeue(struct futex_bucket *bucket, struct futex_waiter *waiter) +{ + bool removed = true; + + spin_lock(&bucket->lock); + if (list_empty(&waiter->list)) + removed = false; + else + list_del(&waiter->list); + spin_unlock(&bucket->lock); + + if (removed) + bucket_dec_waiters(bucket); + + return removed; +} + +/** + * sys_futex_wait - Wait on a futex address if (*uaddr) == val + * @uaddr: User address of futex + * @val: Expected value of futex + * @flags: Specify the size of futex and the clockid + * @timo: Optional absolute timeout. Supports only 64bit time. + */ +SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val, + unsigned int, flags, struct __kernel_timespec __user *, timo) +{ + unsigned int size = flags & FUTEX_SIZE_MASK; + struct hrtimer_sleeper timeout; + struct futex_bucket *bucket; + struct futex_single_waiter wait_single; + struct futex_waiter *waiter; + int ret; + + wait_single.parent.task = current; + wait_single.parent.hint = 0; + waiter = &wait_single.waiter; + waiter->index = 0; + + if (flags & ~FUTEX2_MASK) + return -EINVAL; + + if (size != FUTEX_32) + return -EINVAL; + + if (timo) { + ret = futex_setup_time(timo, &timeout, flags); + if (ret) + return ret; + } + + /* Get an unlocked hash bucket */ + bucket = futex_get_bucket(uaddr, &waiter->key); + if (IS_ERR(bucket)) + return PTR_ERR(bucket); + + if (timo) + hrtimer_sleeper_start_expires(&timeout, HRTIMER_MODE_ABS); + +retry: + bucket_inc_waiters(bucket); + + /* Compare the expected and current value, get the bucket lock */ + ret = futex_get_user_value(bucket, uaddr, val, false); + if (ret) { + bucket_dec_waiters(bucket); + goto out; + } + + /* Add the waiter to the hash table and sleep */ + set_current_state(TASK_INTERRUPTIBLE); + list_add_tail(&waiter->list, &bucket->list); + spin_unlock(&bucket->lock); + + /* Do not sleep if someone woke this futex or if it was timeouted */ + if (!list_empty_careful(&waiter->list) && (!timo || timeout.task)) + freezable_schedule(); + + __set_current_state(TASK_RUNNING); + + /* + * One of those things triggered this wake: + * + * * We have been removed from the bucket. futex_wake() woke us. We just + * need to return 0 to userspace. + * + * However, if we find ourselves in the bucket we must remove ourselves + * from the bucket and ... + * + * * If the there's a timeout and it has expired, return -ETIMEDOUT. + * + * * If there is a signal pending, something wants to kill our thread. + * Return -ERESTARTSYS. + * + * * If there's no signal pending, it was a spurious wake (scheduler + * gave us a change to do some work, even if we don't want to). We + * need to remove ourselves from the bucket and add again, to prevent + * losing wakeups in the meantime. + */ + + /* Normal wake */ + if (list_empty_careful(&waiter->list)) + goto out; + + if (!futex_dequeue(bucket, waiter)) + goto out; + + /* Timeout */ + if (timo && !timeout.task) + return -ETIMEDOUT; + + /* Spurious wakeup */ + if (!signal_pending(current)) + goto retry; + + /* Some signal is pending */ + ret = -ERESTARTSYS; +out: + if (timo) + hrtimer_cancel(&timeout.timer); + + return ret; +} + +static struct futexv *futex_get_parent(uintptr_t waiter, u8 index) +{ + uintptr_t parent = waiter - sizeof(struct futexv) + - (uintptr_t) (index * sizeof(struct futex_waiter)); + + return (struct futexv *) parent; +} + +/** + * sys_futex_wake - Wake a number of futexes waiting on an address + * @uaddr: Address of futex to be woken up + * @nr_wake: Number of futexes to be woken up + * @flags: TODO + */ +SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake, + unsigned int, flags) +{ + unsigned int size = flags & FUTEX_SIZE_MASK; + struct futex_waiter waiter, *aux, *tmp; + struct futex_bucket *bucket; + struct task_struct *task; + DEFINE_WAKE_Q(wake_q); + int ret = 0; + + if (flags & ~FUTEX2_MASK) + return -EINVAL; + + if (size != FUTEX_32) + return -EINVAL; + + bucket = futex_get_bucket(uaddr, &waiter.key); + if (IS_ERR(bucket)) + return PTR_ERR(bucket); + + if (!bucket_get_waiters(bucket)) + return 0; + + spin_lock(&bucket->lock); + list_for_each_entry_safe(aux, tmp, &bucket->list, list) { + if (ret >= nr_wake) + break; + + if (waiter.key.address == aux->key.address && + waiter.key.mm == aux->key.mm) { + struct futexv *parent = + futex_get_parent((uintptr_t) aux, aux->index); + + parent->hint = 1; + task = parent->task; + get_task_struct(task); + list_del_init_careful(&aux->list); + wake_q_add_safe(&wake_q, task); + ret++; + bucket_dec_waiters(bucket); + } + } + spin_unlock(&bucket->lock); + + wake_up_q(&wake_q); + + return ret; +} + +static int __init futex2_init(void) +{ + int i; + unsigned int futex_shift; + +#if CONFIG_BASE_SMALL + futex2_hashsize = 16; +#else + futex2_hashsize = roundup_pow_of_two(256 * num_possible_cpus()); +#endif + + futex_table = alloc_large_system_hash("futex2", sizeof(struct futex_bucket), + futex2_hashsize, 0, + futex2_hashsize < 256 ? HASH_SMALL : 0, + &futex_shift, NULL, + futex2_hashsize, futex2_hashsize); + futex2_hashsize = 1UL << futex_shift; + + for (i = 0; i < futex2_hashsize; i++) { + INIT_LIST_HEAD(&futex_table[i].list); + spin_lock_init(&futex_table[i].lock); + atomic_set(&futex_table[i].waiters, 0); + } + + return 0; +} +core_initcall(futex2_init); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index f27ac94d5..35ff743b1 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -148,6 +148,10 @@ COND_SYSCALL_COMPAT(set_robust_list); COND_SYSCALL(get_robust_list); COND_SYSCALL_COMPAT(get_robust_list); +/* kernel/futex2.c */ +COND_SYSCALL(futex_wait); +COND_SYSCALL(futex_wake); + /* kernel/hrtimer.c */ /* kernel/itimer.c */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 205631898..cd79f94e0 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -860,8 +860,15 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2) #define __NR_process_madvise 440 __SYSCALL(__NR_process_madvise, sys_process_madvise) +#define __NR_futex_wait 441 +__SYSCALL(__NR_futex_wait, sys_futex_wait) + +#define __NR_futex_wake 442 +__SYSCALL(__NR_futex_wake, sys_futex_wake) + #undef __NR_syscalls -#define __NR_syscalls 441 +#define __NR_syscalls 443 + /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 379819244..47de3bf93 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -362,6 +362,8 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +441 common futex_wait sys_futex_wait +442 common futex_wake sys_futex_wake # # Due to a historical design error, certain syscalls are numbered differently -- 2.29.2 From d71973d99efb1e2fd2542ea4d4b45b0e03e45b9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 15 Oct 2020 17:15:57 -0300 Subject: [PATCH 2/9] futex2: Add suport for vectorized wait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support to wait on multiple futexes Signed-off-by: André Almeida Signed-off-by: Jan200101 --- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + include/uapi/asm-generic/unistd.h | 5 +- kernel/futex2.c | 430 ++++++++++++------ kernel/sys_ni.c | 1 + tools/include/uapi/asm-generic/unistd.h | 5 +- .../arch/x86/entry/syscalls/syscall_64.tbl | 1 + 7 files changed, 309 insertions(+), 135 deletions(-) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 83a75ff39..65734d5e1 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -447,3 +447,4 @@ 440 i386 process_madvise sys_process_madvise 441 i386 futex_wait sys_futex_wait 442 i386 futex_wake sys_futex_wake +443 i386 futex_waitv sys_futex_waitv diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 6658fd63c..f30811b56 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,6 +364,7 @@ 440 common process_madvise sys_process_madvise 441 common futex_wait sys_futex_wait 442 common futex_wake sys_futex_wake +443 common futex_waitv sys_futex_waitv # # Due to a historical design error, certain syscalls are numbered differently diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index ae47d6a9e..81a90b697 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -866,8 +866,11 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_wake 442 __SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_waitv 443 +__SYSCALL(__NR_futex_waitv, sys_futex_waitv) + #undef __NR_syscalls -#define __NR_syscalls 443 +#define __NR_syscalls 444 /* * 32 bit systems traditionally used different diff --git a/kernel/futex2.c b/kernel/futex2.c index 107b80a46..4b782b5ef 100644 --- a/kernel/futex2.c +++ b/kernel/futex2.c @@ -48,14 +48,25 @@ struct futex_bucket { struct list_head list; }; +/** + * struct futexv - List of futexes to be waited + * @task: Task to be awaken + * @hint: Was someone on this list awaken? + * @objects: List of futexes + */ struct futexv { struct task_struct *task; - int hint; + bool hint; struct futex_waiter objects[0]; }; +/** + * struct futex_single_waiter - Wrapper for a futexv of one element + * @futexv: TODO + * @waiter: TODO + */ struct futex_single_waiter { - struct futexv parent; + struct futexv futexv; struct futex_waiter waiter; } __packed; @@ -65,10 +76,10 @@ struct futex_bucket *futex_table; #define FUTEX2_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG | \ FUTEX_CLOCK_REALTIME) -// mask for sys_futex_waitv +/* mask for sys_futex_waitv flag */ #define FUTEXV_MASK (FUTEX_CLOCK_REALTIME) -// mask for each futex in futex_waitv list +/* mask for each futex in futex_waitv list */ #define FUTEXV_WAITER_MASK (FUTEX_SIZE_MASK | FUTEX_SHARED_FLAG) int futex2_hashsize; @@ -151,7 +162,7 @@ static struct futex_bucket *futex_get_bucket(void __user *uaddr, * * Check the comment at futex_get_user_val for more information. */ -static int futex_get_user(u32 *uval, u32 *uaddr) +static int futex_get_user(u32 *uval, u32 __user *uaddr) { int ret; @@ -194,95 +205,227 @@ static int futex_setup_time(struct __kernel_timespec __user *timo, return 0; } +/** + * futex_dequeue_multiple - Remove multiple futexes from hash table + * @futexv: list of waiters + * @nr: number of futexes to be removed + * + * This function should be used after we found that this futex was in a queue. + * Thus, it needs to be removed before the next step. However, someone could + * wake it between the time of the first check and the time to get the lock for + * the bucket. Check one more time if the futex is there with the bucket locked. + * If it's there, just remove it and return true. Else, mark the removal as + * false and do nothing. + * + * Return: + * * -1 if no futex was woken during the removal + * * =< 0 at least one futex was found woken, index of the last one + */ +static int futex_dequeue_multiple(struct futexv *futexv, unsigned int nr) +{ + int i, ret = -1; + + for (i = 0; i < nr; i++) { + spin_lock(&futexv->objects[i].bucket->lock); + if (!list_empty_careful(&futexv->objects[i].list)) { + list_del_init_careful(&futexv->objects[i].list); + bucket_dec_waiters(futexv->objects[i].bucket); + } else { + ret = i; + } + spin_unlock(&futexv->objects[i].bucket->lock); + } + + return ret; +} /** - * futex_get_user_value - Get the value from the userspace address and compares - * with the expected one. In success, leaves the function - * holding the bucket lock. Else, hold no lock. - * @bucket: hash bucket of this address - * @uaddr: futex's userspace address - * @val: expected value - * @multiple: is this call in the wait on multiple path + * futex_enqueue - Check the value and enqueue a futex on a wait list + * + * @futexv: List of futexes + * @nr_futexes: Number of futexes in the list + * @awaken: If a futex was awaken during enqueueing, store the index here + * + * Get the value from the userspace address and compares with the expected one. + * In success, enqueue the futex in the correct bucket + * + * Get the value from user futex address. + * + * Since we are in a hurry, we use a spin lock and we can't sleep. + * Try to get the value with page fault disabled (when enable, we might + * sleep). + * + * If we fail, we aren't sure if the address is invalid or is just a + * page fault. Then, release the lock (so we can sleep) and try to get + * the value with page fault enabled. In order to trigger a page fault + * handling, we just call __get_user() again. If we sleep with enqueued + * futexes, we might miss a wake, so dequeue everything before sleeping. + * + * If get_user succeeds, this mean that the address is valid and we do + * the work again. Since we just handled the page fault, the page is + * likely pinned in memory and we should be luckier this time and be + * able to get the value. If we fail anyway, we will try again. + * + * If even with page faults enabled we get and error, this means that + * the address is not valid and we return from the syscall. + * + * If we got an unexpected value or need to treat a page fault and realized that + * a futex was awaken, we can priority this and return success. * * Return: 0 on success, error code otherwise */ -static int futex_get_user_value(struct futex_bucket *bucket, u32 __user *uaddr, - unsigned int val, bool multiple) +static int futex_enqueue(struct futexv *futexv, unsigned int nr_futexes, + unsigned int *awaken) { - u32 uval; - int ret; + int i, ret; + u32 uval, *uaddr, val; + struct futex_bucket *bucket; - /* - * Get the value from user futex address. - * - * Since we are in a hurry, we use a spin lock and we can't sleep. - * Try to get the value with page fault disabled (when enable, we might - * sleep). - * - * If we fail, we aren't sure if the address is invalid or is just a - * page fault. Then, release the lock (so we can sleep) and try to get - * the value with page fault enabled. In order to trigger a page fault - * handling, we just call __get_user() again. - * - * If get_user succeeds, this mean that the address is valid and we do - * the loop again. Since we just handled the page fault, the page is - * likely pinned in memory and we should be luckier this time and be - * able to get the value. If we fail anyway, we will try again. - * - * If even with page faults enabled we get and error, this means that - * the address is not valid and we return from the syscall. - */ - do { - spin_lock(&bucket->lock); +retry: + set_current_state(TASK_INTERRUPTIBLE); + + for (i = 0; i < nr_futexes; i++) { + uaddr = (u32 * __user) futexv->objects[i].key.address; + val = (u32) futexv->objects[i].val; + bucket = futexv->objects[i].bucket; + + bucket_inc_waiters(bucket); + spin_lock(&bucket->lock); - ret = futex_get_user(&uval, uaddr); + ret = futex_get_user(&uval, uaddr); - if (ret) { + if (unlikely(ret)) { spin_unlock(&bucket->lock); - if (multiple || __get_user(uval, uaddr)) + + bucket_dec_waiters(bucket); + __set_current_state(TASK_RUNNING); + *awaken = futex_dequeue_multiple(futexv, i); + + if (__get_user(uval, uaddr)) return -EFAULT; + if (*awaken >= 0) + return 0; + + goto retry; + } + + if (uval != val) { + spin_unlock(&bucket->lock); + + bucket_dec_waiters(bucket); + __set_current_state(TASK_RUNNING); + *awaken = futex_dequeue_multiple(futexv, i); + + if (*awaken >= 0) + return 0; + + return -EWOULDBLOCK; } - } while (ret); - if (uval != val) { + list_add_tail(&futexv->objects[i].list, &bucket->list); spin_unlock(&bucket->lock); - return -EWOULDBLOCK; } return 0; } + +static int __futex_wait(struct futexv *futexv, + unsigned int nr_futexes, + struct hrtimer_sleeper *timeout) +{ + int ret; + unsigned int awaken = -1; + + while (1) { + ret = futex_enqueue(futexv, nr_futexes, &awaken); + + if (ret < 0) + break; + + if (awaken <= 0) { + return awaken; + } + + + /* Before sleeping, check if someone was woken */ + if (!futexv->hint && (!timeout || timeout->task)) + freezable_schedule(); + + __set_current_state(TASK_RUNNING); + + /* + * One of those things triggered this wake: + * + * * We have been removed from the bucket. futex_wake() woke + * us. We just need to dequeue return 0 to userspace. + * + * However, if no futex was dequeued by a futex_wake(): + * + * * If the there's a timeout and it has expired, + * return -ETIMEDOUT. + * + * * If there is a signal pending, something wants to kill our + * thread, return -ERESTARTSYS. + * + * * If there's no signal pending, it was a spurious wake + * (scheduler gave us a change to do some work, even if we + * don't want to). We need to remove ourselves from the + * bucket and add again, to prevent losing wakeups in the + * meantime. + */ + + ret = futex_dequeue_multiple(futexv, nr_futexes); + + /* Normal wake */ + if (ret >= 0) + break; + + if (timeout && !timeout->task) + return -ETIMEDOUT; + + /* signal */ + if (signal_pending(current)) + return -ERESTARTSYS; + + /* spurious wake, do everything again */ + } + + return ret; +} + /** - * futex_dequeue - Remove a futex from a queue - * @bucket: current bucket holding the futex - * @waiter: futex to be removed + * futex_wait - Setup the timer and wait on a list of futexes + * @futexv: List of waiters + * @nr_futexes: Number of waiters + * @timo: Timeout + * @timeout: Timeout + * @flags: Timeout flags * - * Return: True if futex was removed by this function, false if another wake - * thread removed this futex. - * - * This function should be used after we found that this futex was in a queue. - * Thus, it needs to be removed before the next step. However, someone could - * wake it between the time of the first check and the time to get the lock for - * the bucket. Check one more time if the futex is there with the bucket locked. - * If it's there, just remove it and return true. Else, mark the removal as - * false and do nothing. + * Return: error code, or a hint of one of the waiters */ -static bool futex_dequeue(struct futex_bucket *bucket, struct futex_waiter *waiter) +static int futex_wait(struct futexv *futexv, unsigned int nr_futexes, + struct __kernel_timespec __user *timo, + struct hrtimer_sleeper *timeout, unsigned int flags) { - bool removed = true; + int ret; - spin_lock(&bucket->lock); - if (list_empty(&waiter->list)) - removed = false; - else - list_del(&waiter->list); - spin_unlock(&bucket->lock); + if (timo) { + ret = futex_setup_time(timo, timeout, flags); + if (ret) + return ret; - if (removed) - bucket_dec_waiters(bucket); + hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS); + } - return removed; + ret = __futex_wait(futexv, nr_futexes, timo ? timeout : NULL); + + + if (timo) + hrtimer_cancel(&timeout->timer); + + return ret; } /** @@ -297,15 +440,20 @@ SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val, { unsigned int size = flags & FUTEX_SIZE_MASK; struct hrtimer_sleeper timeout; - struct futex_bucket *bucket; struct futex_single_waiter wait_single; struct futex_waiter *waiter; + struct futexv *futexv; int ret; - wait_single.parent.task = current; - wait_single.parent.hint = 0; + futexv = &wait_single.futexv; + futexv->task = current; + futexv->hint = false; + waiter = &wait_single.waiter; waiter->index = 0; + waiter->val = val; + + INIT_LIST_HEAD(&waiter->list); if (flags & ~FUTEX2_MASK) return -EINVAL; @@ -313,85 +461,101 @@ SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val, if (size != FUTEX_32) return -EINVAL; - if (timo) { - ret = futex_setup_time(timo, &timeout, flags); - if (ret) - return ret; - } - /* Get an unlocked hash bucket */ - bucket = futex_get_bucket(uaddr, &waiter->key); - if (IS_ERR(bucket)) - return PTR_ERR(bucket); + waiter->bucket = futex_get_bucket(uaddr, &waiter->key); + if (IS_ERR(waiter->bucket)) + return PTR_ERR(waiter->bucket); - if (timo) - hrtimer_sleeper_start_expires(&timeout, HRTIMER_MODE_ABS); + ret = futex_wait(futexv, 1, timo, &timeout, flags); -retry: - bucket_inc_waiters(bucket); + return ret; +} - /* Compare the expected and current value, get the bucket lock */ - ret = futex_get_user_value(bucket, uaddr, val, false); - if (ret) { - bucket_dec_waiters(bucket); - goto out; - } +/** + * futex_parse_waitv - Parse a waitv array from userspace + * @futexv: list of waiters + * @uwaitv: userspace list + * @nr_futexes: number of waiters in the list + * + * Return: Error code on failure, pointer to a prepared futexv otherwise + */ +static int futex_parse_waitv(struct futexv *futexv, + struct futex_waitv __user *uwaitv, + unsigned int nr_futexes) +{ + struct futex_waitv waitv; + unsigned int i; + struct futex_bucket *bucket; - /* Add the waiter to the hash table and sleep */ - set_current_state(TASK_INTERRUPTIBLE); - list_add_tail(&waiter->list, &bucket->list); - spin_unlock(&bucket->lock); + for (i = 0; i < nr_futexes; i++) { + if (copy_from_user(&waitv, &uwaitv[i], sizeof(waitv))) + return -EFAULT; - /* Do not sleep if someone woke this futex or if it was timeouted */ - if (!list_empty_careful(&waiter->list) && (!timo || timeout.task)) - freezable_schedule(); + if ((waitv.flags & ~FUTEXV_WAITER_MASK) || + (waitv.flags & FUTEX_SIZE_MASK) != FUTEX_32) + return -EINVAL; - __set_current_state(TASK_RUNNING); + bucket = futex_get_bucket(waitv.uaddr, + &futexv->objects[i].key); + if (IS_ERR(bucket)) + return PTR_ERR(bucket); - /* - * One of those things triggered this wake: - * - * * We have been removed from the bucket. futex_wake() woke us. We just - * need to return 0 to userspace. - * - * However, if we find ourselves in the bucket we must remove ourselves - * from the bucket and ... - * - * * If the there's a timeout and it has expired, return -ETIMEDOUT. - * - * * If there is a signal pending, something wants to kill our thread. - * Return -ERESTARTSYS. - * - * * If there's no signal pending, it was a spurious wake (scheduler - * gave us a change to do some work, even if we don't want to). We - * need to remove ourselves from the bucket and add again, to prevent - * losing wakeups in the meantime. - */ + futexv->objects[i].bucket = bucket; + futexv->objects[i].val = waitv.val; + futexv->objects[i].flags = waitv.flags; + futexv->objects[i].index = i; + INIT_LIST_HEAD(&futexv->objects[i].list); + } - /* Normal wake */ - if (list_empty_careful(&waiter->list)) - goto out; + return 0; +} - if (!futex_dequeue(bucket, waiter)) - goto out; +/** + * sys_futex_waitv - function + * @waiters: TODO + * @nr_futexes: TODO + * @flags: TODO + * @timo: TODO + */ +SYSCALL_DEFINE4(futex_waitv, struct futex_waitv __user *, waiters, + unsigned int, nr_futexes, unsigned int, flags, + struct __kernel_timespec __user *, timo) +{ + struct hrtimer_sleeper timeout; + struct futexv *futexv; + int ret; - /* Timeout */ - if (timo && !timeout.task) - return -ETIMEDOUT; + if (flags & ~FUTEXV_MASK) + return -EINVAL; - /* Spurious wakeup */ - if (!signal_pending(current)) - goto retry; + if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters) + return -EINVAL; - /* Some signal is pending */ - ret = -ERESTARTSYS; -out: - if (timo) - hrtimer_cancel(&timeout.timer); + futexv = kmalloc(sizeof(struct futexv) + + (sizeof(struct futex_waiter) * nr_futexes), + GFP_KERNEL); + if (!futexv) + return -ENOMEM; + + futexv->hint = false; + futexv->task = current; + + ret = futex_parse_waitv(futexv, waiters, nr_futexes); + if (!ret) + ret = futex_wait(futexv, nr_futexes, timo, &timeout, flags); + + kfree(futexv); return ret; } +/** + * futex_get_parent - Get parent + * @waiter: TODO + * @index: TODO + * + * Return: TODO + */ static struct futexv *futex_get_parent(uintptr_t waiter, u8 index) { uintptr_t parent = waiter - sizeof(struct futexv) @@ -439,7 +603,7 @@ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake, struct futexv *parent = futex_get_parent((uintptr_t) aux, aux->index); - parent->hint = 1; + parent->hint = true; task = parent->task; get_task_struct(task); list_del_init_careful(&aux->list); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 35ff743b1..1898e7340 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -151,6 +151,7 @@ COND_SYSCALL_COMPAT(get_robust_list); /* kernel/futex2.c */ COND_SYSCALL(futex_wait); COND_SYSCALL(futex_wake); +COND_SYSCALL(futex_waitv); /* kernel/hrtimer.c */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index cd79f94e0..7de33be59 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -866,8 +866,11 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_wake 442 __SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_waitv 443 +__SYSCALL(__NR_futex_waitv, sys_futex_waitv) + #undef __NR_syscalls -#define __NR_syscalls 443 +#define __NR_syscalls 444 /* diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 47de3bf93..bd47f368f 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,6 +364,7 @@ 440 common process_madvise sys_process_madvise 441 common futex_wait sys_futex_wait 442 common futex_wake sys_futex_wake +443 common futex_waitv sys_futex_waitv # # Due to a historical design error, certain syscalls are numbered differently -- 2.29.2 From 24681616a5432f7680f934abf335a9ab9a1eaf1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 15 Oct 2020 18:06:40 -0300 Subject: [PATCH 3/9] futex2: Add support for shared futexes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for shared futexes for cross-process resources. Signed-off-by: André Almeida Signed-off-by: Jan200101 --- kernel/futex2.c | 187 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 165 insertions(+), 22 deletions(-) diff --git a/kernel/futex2.c b/kernel/futex2.c index 4b782b5ef..5ddb9922d 100644 --- a/kernel/futex2.c +++ b/kernel/futex2.c @@ -6,7 +6,9 @@ */ #include +#include #include +#include #include #include #include @@ -15,6 +17,7 @@ /** * struct futex_waiter - List entry for a waiter + * @uaddr: Memory address of userspace futex * @key.address: Memory address of userspace futex * @key.mm: Pointer to memory management struct of this process * @key: Stores information that uniquely identify a futex @@ -25,9 +28,11 @@ * @index: Index of waiter in futexv list */ struct futex_waiter { + uintptr_t uaddr; struct futex_key { uintptr_t address; struct mm_struct *mm; + unsigned long int offset; } key; struct list_head list; unsigned int val; @@ -125,16 +130,116 @@ static inline int bucket_get_waiters(struct futex_bucket *bucket) #endif } +static u64 get_inode_sequence_number(struct inode *inode) +{ + static atomic64_t i_seq; + u64 old; + + /* Does the inode already have a sequence number? */ + old = atomic64_read(&inode->i_sequence); + if (likely(old)) + return old; + + for (;;) { + u64 new = atomic64_add_return(1, &i_seq); + if (WARN_ON_ONCE(!new)) + continue; + + old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); + if (old) + return old; + return new; + } +} + +#define FUT_OFF_INODE 1 /* We set bit 0 if key has a reference on inode */ +#define FUT_OFF_MMSHARED 2 /* We set bit 1 if key has a reference on mm */ + +static int futex_get_shared_key(uintptr_t address, struct mm_struct *mm, + struct futex_key *key) +{ + int err; + struct page *page, *tail; + struct address_space *mapping; + +again: + err = get_user_pages_fast(address, 1, 0, &page); + + if (err < 0) + return err; + else + err = 0; + + + tail = page; + page = compound_head(page); + mapping = READ_ONCE(page->mapping); + + + if (unlikely(!mapping)) { + int shmem_swizzled; + + lock_page(page); + shmem_swizzled = PageSwapCache(page) || page->mapping; + unlock_page(page); + put_page(page); + + if (shmem_swizzled) + goto again; + + return -EFAULT; + } + + if (PageAnon(page)) { + + key->mm = mm; + key->address = address; + + key->offset |= FUT_OFF_MMSHARED; + + } else { + struct inode *inode; + + rcu_read_lock(); + + if (READ_ONCE(page->mapping) != mapping) { + rcu_read_unlock(); + put_page(page); + + goto again; + } + + inode = READ_ONCE(mapping->host); + if (!inode) { + rcu_read_unlock(); + put_page(page); + + goto again; + } + + key->address = get_inode_sequence_number(inode); + key->mm = (struct mm_struct *) basepage_index(tail); + key->offset |= FUT_OFF_INODE; + + rcu_read_unlock(); + } + + put_page(page); + return err; +} + /** * futex_get_bucket - Check if the user address is valid, prepare internal * data and calculate the hash * @uaddr: futex user address * @key: data that uniquely identifies a futex + * @shared: is this a shared futex? * * Return: address of bucket on success, error code otherwise */ static struct futex_bucket *futex_get_bucket(void __user *uaddr, - struct futex_key *key) + struct futex_key *key, + bool shared) { uintptr_t address = (uintptr_t) uaddr; u32 hash_key; @@ -145,8 +250,15 @@ static struct futex_bucket *futex_get_bucket(void __user *uaddr, if (unlikely(!access_ok(address, sizeof(u32)))) return ERR_PTR(-EFAULT); - key->address = address; - key->mm = current->mm; + key->offset = address % PAGE_SIZE; + address -= key->offset; + + if (!shared) { + key->address = address; + key->mm = current->mm; + } else { + futex_get_shared_key(address, current->mm, key); + } /* Generate hash key for this futex using uaddr and current->mm */ hash_key = jhash2((u32 *) key, sizeof(*key) / sizeof(u32), 0); @@ -275,9 +387,10 @@ static int futex_dequeue_multiple(struct futexv *futexv, unsigned int nr) * Return: 0 on success, error code otherwise */ static int futex_enqueue(struct futexv *futexv, unsigned int nr_futexes, - unsigned int *awaken) + int *awaken) { int i, ret; + bool shared, retry = false; u32 uval, *uaddr, val; struct futex_bucket *bucket; @@ -285,8 +398,18 @@ static int futex_enqueue(struct futexv *futexv, unsigned int nr_futexes, set_current_state(TASK_INTERRUPTIBLE); for (i = 0; i < nr_futexes; i++) { - uaddr = (u32 * __user) futexv->objects[i].key.address; + uaddr = (u32 * __user) futexv->objects[i].uaddr; val = (u32) futexv->objects[i].val; + shared = (futexv->objects[i].flags & FUTEX_SHARED_FLAG) ? true : false; + + if (shared && retry) { + futexv->objects[i].bucket = + futex_get_bucket((void *) uaddr, + &futexv->objects[i].key, true); + if (IS_ERR(futexv->objects[i].bucket)) + return PTR_ERR(futexv->objects[i].bucket); + } + bucket = futexv->objects[i].bucket; bucket_inc_waiters(bucket); @@ -301,24 +424,32 @@ static int futex_enqueue(struct futexv *futexv, unsigned int nr_futexes, __set_current_state(TASK_RUNNING); *awaken = futex_dequeue_multiple(futexv, i); + if (shared) { + retry = true; + goto retry; + } + if (__get_user(uval, uaddr)) return -EFAULT; if (*awaken >= 0) - return 0; + return 1; + retry = true; goto retry; } if (uval != val) { spin_unlock(&bucket->lock); + bucket_dec_waiters(bucket); __set_current_state(TASK_RUNNING); *awaken = futex_dequeue_multiple(futexv, i); - if (*awaken >= 0) - return 0; + if (*awaken >= 0) { + return 1; + } return -EWOULDBLOCK; } @@ -336,19 +467,18 @@ static int __futex_wait(struct futexv *futexv, struct hrtimer_sleeper *timeout) { int ret; - unsigned int awaken = -1; - while (1) { - ret = futex_enqueue(futexv, nr_futexes, &awaken); - if (ret < 0) - break; + while (1) { + int awaken = -1; - if (awaken <= 0) { - return awaken; + ret = futex_enqueue(futexv, nr_futexes, &awaken); + if (ret) { + if (awaken >= 0) + return awaken; + return ret; } - /* Before sleeping, check if someone was woken */ if (!futexv->hint && (!timeout || timeout->task)) freezable_schedule(); @@ -419,6 +549,7 @@ static int futex_wait(struct futexv *futexv, unsigned int nr_futexes, hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS); } + ret = __futex_wait(futexv, nr_futexes, timo ? timeout : NULL); @@ -438,9 +569,10 @@ static int futex_wait(struct futexv *futexv, unsigned int nr_futexes, SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val, unsigned int, flags, struct __kernel_timespec __user *, timo) { + bool shared = (flags & FUTEX_SHARED_FLAG) ? true : false; unsigned int size = flags & FUTEX_SIZE_MASK; - struct hrtimer_sleeper timeout; struct futex_single_waiter wait_single; + struct hrtimer_sleeper timeout; struct futex_waiter *waiter; struct futexv *futexv; int ret; @@ -452,6 +584,7 @@ SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val, waiter = &wait_single.waiter; waiter->index = 0; waiter->val = val; + waiter->uaddr = (uintptr_t) uaddr; INIT_LIST_HEAD(&waiter->list); @@ -462,11 +595,14 @@ SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val, return -EINVAL; /* Get an unlocked hash bucket */ - waiter->bucket = futex_get_bucket(uaddr, &waiter->key); - if (IS_ERR(waiter->bucket)) + waiter->bucket = futex_get_bucket(uaddr, &waiter->key, shared); + if (IS_ERR(waiter->bucket)) { return PTR_ERR(waiter->bucket); + } ret = futex_wait(futexv, 1, timo, &timeout, flags); + if (ret > 0) + ret = 0; return ret; } @@ -486,8 +622,10 @@ static int futex_parse_waitv(struct futexv *futexv, struct futex_waitv waitv; unsigned int i; struct futex_bucket *bucket; + bool shared; for (i = 0; i < nr_futexes; i++) { + if (copy_from_user(&waitv, &uwaitv[i], sizeof(waitv))) return -EFAULT; @@ -495,8 +633,10 @@ static int futex_parse_waitv(struct futexv *futexv, (waitv.flags & FUTEX_SIZE_MASK) != FUTEX_32) return -EINVAL; + shared = (waitv.flags & FUTEX_SHARED_FLAG) ? true : false; + bucket = futex_get_bucket(waitv.uaddr, - &futexv->objects[i].key); + &futexv->objects[i].key, shared); if (IS_ERR(bucket)) return PTR_ERR(bucket); @@ -505,6 +645,7 @@ static int futex_parse_waitv(struct futexv *futexv, futexv->objects[i].flags = waitv.flags; futexv->objects[i].index = i; INIT_LIST_HEAD(&futexv->objects[i].list); + futexv->objects[i].uaddr = (uintptr_t) waitv.uaddr; } return 0; @@ -573,6 +714,7 @@ static struct futexv *futex_get_parent(uintptr_t waiter, u8 index) SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake, unsigned int, flags) { + bool shared = (flags & FUTEX_SHARED_FLAG) ? true : false; unsigned int size = flags & FUTEX_SIZE_MASK; struct futex_waiter waiter, *aux, *tmp; struct futex_bucket *bucket; @@ -586,7 +728,7 @@ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake, if (size != FUTEX_32) return -EINVAL; - bucket = futex_get_bucket(uaddr, &waiter.key); + bucket = futex_get_bucket(uaddr, &waiter.key, shared); if (IS_ERR(bucket)) return PTR_ERR(bucket); @@ -599,7 +741,8 @@ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake, break; if (waiter.key.address == aux->key.address && - waiter.key.mm == aux->key.mm) { + waiter.key.mm == aux->key.mm && + waiter.key.offset == aux->key.offset) { struct futexv *parent = futex_get_parent((uintptr_t) aux, aux->index); -- 2.29.2 From ce3ae4bd9f98763fda07f315c1f239c4aaef4b5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 9 Jul 2020 11:34:40 -0300 Subject: [PATCH 4/9] selftests: futex: Add futex2 wake/wait test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a simple test to test wake/wait mechanism using futex2 interface. Create helper files so more tests can evaluate futex2. While 32bit ABIs from glibc aren't able to use 64 bit sized time variables, add a temporary workaround that implements the required types and calls the appropriated syscalls, since futex2 doesn't supports 32 bit sized time. Signed-off-by: André Almeida Signed-off-by: Jan200101 --- tools/include/uapi/asm-generic/unistd.h | 1 - .../selftests/futex/functional/.gitignore | 1 + .../selftests/futex/functional/Makefile | 4 +- .../selftests/futex/functional/futex2_wait.c | 148 ++++++++++++++++++ .../testing/selftests/futex/functional/run.sh | 3 + .../selftests/futex/include/futex2test.h | 77 +++++++++ 6 files changed, 232 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/futex/functional/futex2_wait.c create mode 100644 tools/testing/selftests/futex/include/futex2test.h diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 7de33be59..81a90b697 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -872,7 +872,6 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #undef __NR_syscalls #define __NR_syscalls 444 - /* * 32 bit systems traditionally used different * syscalls for off_t and loff_t arguments, while diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index 0efcd494d..d61f1df94 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -6,3 +6,4 @@ futex_wait_private_mapped_file futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock +futex2_wait diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 23207829e..7142a94a7 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -5,6 +5,7 @@ LDLIBS := -lpthread -lrt HEADERS := \ ../include/futextest.h \ + ../include/futex2test.h \ ../include/atomic.h \ ../include/logging.h TEST_GEN_FILES := \ @@ -14,7 +15,8 @@ TEST_GEN_FILES := \ futex_requeue_pi_signal_restart \ futex_requeue_pi_mismatched_ops \ futex_wait_uninitialized_heap \ - futex_wait_private_mapped_file + futex_wait_private_mapped_file \ + futex2_wait TEST_PROGS := run.sh diff --git a/tools/testing/selftests/futex/functional/futex2_wait.c b/tools/testing/selftests/futex/functional/futex2_wait.c new file mode 100644 index 000000000..0646a24b7 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex2_wait.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/****************************************************************************** + * + * Copyright Collabora Ltd., 2020 + * + * DESCRIPTION + * Test wait/wake mechanism of futex2, using 32bit sized futexes. + * + * AUTHOR + * André Almeida + * + * HISTORY + * 2020-Jul-9: Initial version by André + * + *****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "futex2test.h" +#include "logging.h" + +#define TEST_NAME "futex2-wait" +#define timeout_ns 30000000 +#define WAKE_WAIT_US 10000 +futex_t *f1; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *waiterfn(void *arg) +{ + struct timespec64 to64; + unsigned int flags = 0; + if (arg) + flags = *((unsigned int *) arg); + + /* setting absolute timeout for futex2 */ + if (gettime64(CLOCK_MONOTONIC, &to64)) + error("gettime64 failed\n", errno); + + to64.tv_nsec += timeout_ns; + + if (to64.tv_nsec >= 1000000000) { + to64.tv_sec++; + to64.tv_nsec -= 1000000000; + } + + if (futex2_wait(f1, *f1, FUTEX_32 | flags, &to64)) + printf("waiter failed errno %d\n", errno); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + pthread_t waiter; + unsigned int flags = FUTEX_SHARED_FLAG; + int res, ret = RET_PASS; + int c; + futex_t f_private = 0; + f1 = &f_private; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + ksft_print_header(); + ksft_set_plan(2); + ksft_print_msg("%s: Test FUTEX2_WAIT\n", + basename(argv[0])); + + info("Calling private futex2_wait on f1: %u @ %p with val=%u\n", *f1, f1, *f1); + + if (pthread_create(&waiter, NULL, waiterfn, NULL)) + error("pthread_create failed\n", errno); + + usleep(WAKE_WAIT_US); + + info("Calling private futex2_wake on f1: %u @ %p with val=%u\n", *f1, f1, *f1); + res = futex2_wake(f1, 1, FUTEX_32); + if (res != 1) { + ksft_test_result_fail("futex2_wake private returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex2_wake private succeeds\n"); + } + + int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666); + if (shm_id < 0) { + perror("shmget"); + exit(1); + } + + unsigned int *shared_data = shmat(shm_id, NULL, 0); + *shared_data = 0; + f1 = shared_data; + + info("Calling shared futex2_wait on f1: %u @ %p with val=%u\n", *f1, f1, *f1); + + if (pthread_create(&waiter, NULL, waiterfn, &flags)) + error("pthread_create failed\n", errno); + + usleep(WAKE_WAIT_US); + + info("Calling shared futex2_wake on f1: %u @ %p with val=%u\n", *f1, f1, *f1); + res = futex2_wake(f1, 1, FUTEX_32 | FUTEX_SHARED_FLAG); + if (res != 1) { + ksft_test_result_fail("futex2_wake shared returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex2_wake shared succeeds\n"); + } + + shmdt(shared_data); + + ksft_print_cnts(); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 1acb6ace1..3730159c8 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -73,3 +73,6 @@ echo echo ./futex_wait_uninitialized_heap $COLOR ./futex_wait_private_mapped_file $COLOR + +echo +./futex2_wait $COLOR diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h new file mode 100644 index 000000000..807b8b57f --- /dev/null +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/****************************************************************************** + * + * Copyright Collabora Ltd., 2020 + * + * DESCRIPTION + * Futex2 library addons for old futex library + * + * AUTHOR + * André Almeida + * + * HISTORY + * 2020-Jul-9: Initial version by André + * + *****************************************************************************/ +#include "futextest.h" +#include + +#define NSEC_PER_SEC 1000000000L + +#ifndef FUTEX_8 +# define FUTEX_8 0 +#endif +#ifndef FUTEX_16 +# define FUTEX_16 1 +#endif +#ifndef FUTEX_32 +#define FUTEX_32 2 +#endif +#ifdef __x86_64__ +# ifndef FUTEX_64 +# define FUTEX_64 3 +# endif +#endif + +/* + * - Y2038 section for 32-bit applications - + * + * Remove this when glibc is ready for y2038. Then, always compile with + * `-DTIME_BITS=64` or `-D__USE_TIME_BITS64`. glibc will provide both + * timespec64 and clock_gettime64 so we won't need to define here. + */ +#if defined(__i386__) || __TIMESIZE == 32 +# define NR_gettime __NR_clock_gettime64 +#else +# define NR_gettime __NR_clock_gettime +#endif + +struct timespec64 { + long long tv_sec; /* seconds */ + long long tv_nsec; /* nanoseconds */ +}; + +int gettime64(clock_t clockid, struct timespec64 *tv) +{ + return syscall(NR_gettime, clockid, tv); +} +/* + * - End of Y2038 section - + */ + +/* + * wait for uaddr if (*uaddr == val) + */ +static inline int futex2_wait(volatile void *uaddr, unsigned long val, + unsigned long flags, struct timespec64 *timo) +{ + return syscall(__NR_futex_wait, uaddr, val, flags, timo); +} + +/* + * wake nr futexes waiting for uaddr + */ +static inline int futex2_wake(volatile void *uaddr, unsigned int nr, unsigned long flags) +{ + return syscall(__NR_futex_wake, uaddr, nr, flags); +} -- 2.29.2 From 1e0349f5a81a43cdb50d9a97812194df6d937b69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 9 Jul 2020 11:36:14 -0300 Subject: [PATCH 5/9] selftests: futex: Add futex2 timeout test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapt existing futex wait timeout file to test the same mechanism for futex2. Signed-off-by: André Almeida Signed-off-by: Jan200101 --- .../futex/functional/futex_wait_timeout.c | 58 ++++++++++++++++--- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c index ee55e6d38..245670e44 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -11,6 +11,7 @@ * * HISTORY * 2009-Nov-6: Initial version by Darren Hart + * 2020-Jul-9: Add futex2 test by André * *****************************************************************************/ @@ -20,7 +21,7 @@ #include #include #include -#include "futextest.h" +#include "futex2test.h" #include "logging.h" #define TEST_NAME "futex-wait-timeout" @@ -40,7 +41,8 @@ void usage(char *prog) int main(int argc, char *argv[]) { futex_t f1 = FUTEX_INITIALIZER; - struct timespec to; + struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + struct timespec64 to64; int res, ret = RET_PASS; int c; @@ -65,22 +67,60 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(3); ksft_print_msg("%s: Block on a futex and wait for timeout\n", basename(argv[0])); ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); - /* initialize timeout */ - to.tv_sec = 0; - to.tv_nsec = timeout_ns; - info("Calling futex_wait on f1: %u @ %p\n", f1, &f1); res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG); if (!res || errno != ETIMEDOUT) { - fail("futex_wait returned %d\n", ret < 0 ? errno : ret); + ksft_test_result_fail("futex_wait returned %d\n", ret < 0 ? errno : ret); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_wait timeout succeeds\n"); + } + + /* setting absolute monotonic timeout for futex2 */ + if (gettime64(CLOCK_MONOTONIC, &to64)) + error("gettime64 failed\n", errno); + + to64.tv_nsec += timeout_ns; + + if (to64.tv_nsec >= 1000000000) { + to64.tv_sec++; + to64.tv_nsec -= 1000000000; + } + + info("Calling futex2_wait on f1: %u @ %p\n", f1, &f1); + res = futex2_wait(&f1, f1, FUTEX_32, &to64); + if (!res || errno != ETIMEDOUT) { + ksft_test_result_fail("futex2_wait monotonic returned %d\n", ret < 0 ? errno : ret); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex2_wait monotonic timeout succeeds\n"); + } + + /* setting absolute realtime timeout for futex2 */ + if (gettime64(CLOCK_REALTIME, &to64)) + error("gettime64 failed\n", errno); + + to64.tv_nsec += timeout_ns; + + if (to64.tv_nsec >= 1000000000) { + to64.tv_sec++; + to64.tv_nsec -= 1000000000; + } + + info("Calling futex2_wait on f1: %u @ %p\n", f1, &f1); + res = futex2_wait(&f1, f1, FUTEX_32 | FUTEX_CLOCK_REALTIME, &to64); + if (!res || errno != ETIMEDOUT) { + ksft_test_result_fail("futex2_wait realtime returned %d\n", ret < 0 ? errno : ret); ret = RET_FAIL; + } else { + ksft_test_result_pass("futex2_wait realtime timeout succeeds\n"); } - print_result(TEST_NAME, ret); + ksft_print_cnts(); return ret; } -- 2.29.2 From 298120f6e3a758cd03e26a104f5ce60a88501b7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 9 Jul 2020 11:37:42 -0300 Subject: [PATCH 6/9] selftests: futex: Add futex2 wouldblock test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapt existing futex wait wouldblock file to test the same mechanism for futex2. Signed-off-by: André Almeida Signed-off-by: Jan200101 --- .../futex/functional/futex_wait_wouldblock.c | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 0ae390ff8..1f72e5928 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -12,6 +12,7 @@ * * HISTORY * 2009-Nov-14: Initial version by Gowrishankar + * 2020-Jul-9: Add futex2 test by André * *****************************************************************************/ @@ -21,7 +22,7 @@ #include #include #include -#include "futextest.h" +#include "futex2test.h" #include "logging.h" #define TEST_NAME "futex-wait-wouldblock" @@ -39,6 +40,7 @@ void usage(char *prog) int main(int argc, char *argv[]) { struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + struct timespec64 to64; futex_t f1 = FUTEX_INITIALIZER; int res, ret = RET_PASS; int c; @@ -61,18 +63,41 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(2); ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", basename(argv[0])); info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG); if (!res || errno != EWOULDBLOCK) { - fail("futex_wait returned: %d %s\n", + ksft_test_result_fail("futex_wait returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_wait wouldblock succeeds\n"); } - print_result(TEST_NAME, ret); + /* setting absolute timeout for futex2 */ + if (gettime64(CLOCK_MONOTONIC, &to64)) + error("gettime64 failed\n", errno); + + to64.tv_nsec += timeout_ns; + + if (to64.tv_nsec >= 1000000000) { + to64.tv_sec++; + to64.tv_nsec -= 1000000000; + } + + info("Calling futex2_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + res = futex2_wait(&f1, f1+1, FUTEX_32, &to64); + if (!res || errno != EWOULDBLOCK) { + ksft_test_result_fail("futex2_wait returned: %d %s\n", + res ? errno : res, res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex2_wait wouldblock succeeds\n"); + } + + ksft_print_cnts(); return ret; } -- 2.29.2 From 05c697a239aad5e8608c6acf0da9239cac5f7a2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Tue, 8 Dec 2020 18:47:31 -0300 Subject: [PATCH 7/9] selftests: futex: Add futex2 waitv test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: André Almeida Signed-off-by: Jan200101 --- .../selftests/futex/functional/.gitignore | 1 + .../selftests/futex/functional/Makefile | 3 +- .../selftests/futex/functional/futex2_waitv.c | 156 ++++++++++++++++++ .../testing/selftests/futex/functional/run.sh | 3 + .../selftests/futex/include/futex2test.h | 25 ++- 5 files changed, 183 insertions(+), 5 deletions(-) create mode 100644 tools/testing/selftests/futex/functional/futex2_waitv.c diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index d61f1df94..d0b8f637b 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -7,3 +7,4 @@ futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock futex2_wait +futex2_waitv diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 7142a94a7..b857b9450 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -16,7 +16,8 @@ TEST_GEN_FILES := \ futex_requeue_pi_mismatched_ops \ futex_wait_uninitialized_heap \ futex_wait_private_mapped_file \ - futex2_wait + futex2_wait \ + futex2_waitv TEST_PROGS := run.sh diff --git a/tools/testing/selftests/futex/functional/futex2_waitv.c b/tools/testing/selftests/futex/functional/futex2_waitv.c new file mode 100644 index 000000000..d4b116651 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex2_waitv.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/****************************************************************************** + * + * Copyright Collabora Ltd., 2020 + * + * DESCRIPTION + * Test waitv/wake mechanism of futex2, using 32bit sized futexes. + * + * AUTHOR + * André Almeida + * + * HISTORY + * 2020-Jul-9: Initial version by André + * + *****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "futex2test.h" +#include "logging.h" + +#define TEST_NAME "futex2-wait" +#define timeout_ns 1000000000 +#define WAKE_WAIT_US 10000 +#define NR_FUTEXES 30 +struct futex_waitv waitv[NR_FUTEXES]; +u_int32_t futexes[NR_FUTEXES] = {0}; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *waiterfn(void *arg) +{ + struct timespec64 to64; + int res; + + /* setting absolute timeout for futex2 */ + if (gettime64(CLOCK_MONOTONIC, &to64)) + error("gettime64 failed\n", errno); + + to64.tv_sec++; + + res = futex2_waitv(waitv, NR_FUTEXES, 0, &to64); + if (res < 0) { + printf("waiter failed errno %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + } + + return NULL; +} + +int main(int argc, char *argv[]) +{ + pthread_t waiter; + int res, ret = RET_PASS; + int c, i; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + ksft_print_header(); + ksft_set_plan(2); + ksft_print_msg("%s: Test FUTEX2_WAITV\n", + basename(argv[0])); + + //info("Calling private futex2_wait on f1: %u @ %p with val=%u\n", *f1, f1, *f1); + + for (i = 0; i < NR_FUTEXES; i++) { + waitv[i].uaddr = &futexes[i]; + waitv[i].flags = FUTEX_32; + waitv[i].val = 0; + } + + if (pthread_create(&waiter, NULL, waiterfn, NULL)) + error("pthread_create failed\n", errno); + + usleep(WAKE_WAIT_US); + + // info("Calling private futex2_wake on f1: %u @ %p with val=%u\n", *f1, f1, *f1); + res = futex2_wake(waitv[NR_FUTEXES - 1].uaddr, 1, FUTEX_32); + if (res != 1) { + ksft_test_result_fail("futex2_wake private returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex2_waitv private succeeds\n"); + } + + for (i = 0; i < NR_FUTEXES; i++) { + int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666); + if (shm_id < 0) { + perror("shmget"); + exit(1); + } + + unsigned int *shared_data = shmat(shm_id, NULL, 0); + *shared_data = 0; + + waitv[i].uaddr = shared_data; + waitv[i].flags = FUTEX_32 | FUTEX_SHARED_FLAG; + waitv[i].val = 0; + } + + //info("Calling shared futex2_wait on f1: %u @ %p with val=%u\n", *f1, f1, *f1); + + if (pthread_create(&waiter, NULL, waiterfn, NULL)) + error("pthread_create failed\n", errno); + + usleep(WAKE_WAIT_US); + + // info("Calling shared futex2_wake on f1: %u @ %p with val=%u\n", *f1, f1, *f1); + res = futex2_wake(waitv[NR_FUTEXES - 1].uaddr, 1, FUTEX_32 | FUTEX_SHARED_FLAG); + if (res != 1) { + ksft_test_result_fail("futex2_wake shared returned: %d %s\n", + res ? errno : res, + res ? strerror(errno) : ""); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex2_wake shared succeeds\n"); + } + + for (i = 0; i < NR_FUTEXES; i++) + shmdt(waitv[i].uaddr); + + ksft_print_cnts(); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 3730159c8..18b3883d7 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -76,3 +76,6 @@ echo echo ./futex2_wait $COLOR + +echo +./futex2_waitv $COLOR diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index 807b8b57f..10be0c504 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -27,10 +27,18 @@ #ifndef FUTEX_32 #define FUTEX_32 2 #endif -#ifdef __x86_64__ -# ifndef FUTEX_64 -# define FUTEX_64 3 -# endif + +#ifndef FUTEX_SHARED_FLAG +#define FUTEX_SHARED_FLAG 8 +#endif + +#ifndef FUTEX_WAITV_MAX +#define FUTEX_WAITV_MAX 128 +struct futex_waitv { + void *uaddr; + unsigned int val; + unsigned int flags; +}; #endif /* @@ -75,3 +83,12 @@ static inline int futex2_wake(volatile void *uaddr, unsigned int nr, unsigned lo { return syscall(__NR_futex_wake, uaddr, nr, flags); } + +/* + * wait for uaddr if (*uaddr == val) + */ +static inline int futex2_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters, + unsigned long flags, struct timespec64 *timo) +{ + return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo); +} -- 2.29.2 From 9358bbdf929a90bc144d13e002fed8f4223d3178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 4 Dec 2020 19:12:23 -0300 Subject: [PATCH 8/9] futex2: Add sysfs entry for syscall numbers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: André Almeida Signed-off-by: Jan200101 --- kernel/futex2.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/kernel/futex2.c b/kernel/futex2.c index 5ddb9922d..58cd8a868 100644 --- a/kernel/futex2.c +++ b/kernel/futex2.c @@ -762,6 +762,48 @@ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake, return ret; } +static ssize_t wait_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", __NR_futex_wait); + +} +static struct kobj_attribute futex2_wait_attr = __ATTR_RO(wait); + +static ssize_t wake_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", __NR_futex_wake); + +} +static struct kobj_attribute futex2_wake_attr = __ATTR_RO(wake); + +static ssize_t waitv_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", __NR_futex_waitv); + +} +static struct kobj_attribute futex2_waitv_attr = __ATTR_RO(waitv); + +static struct attribute *futex2_sysfs_attrs[] = { + &futex2_wait_attr.attr, + &futex2_wake_attr.attr, + &futex2_waitv_attr.attr, + NULL, +}; + +static const struct attribute_group futex2_sysfs_attr_group = { + .attrs = futex2_sysfs_attrs, + .name = "futex2", +}; + +static int __init futex2_sysfs_init(void) +{ + return sysfs_create_group(kernel_kobj, &futex2_sysfs_attr_group); +} +subsys_initcall(futex2_sysfs_init); + static int __init futex2_init(void) { int i; -- 2.29.2 From f7b1c9a2ad05933e559ef78bc7753b2fac1698fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Tue, 5 Jan 2021 15:44:02 -0300 Subject: [PATCH 9/9] perf bench: Add futex2 benchmark tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port existing futex infrastructure to use futex2 calls. Signed-off-by: André Almeida Signed-off-by: Jan200101 --- tools/arch/x86/include/asm/unistd_64.h | 8 +++++ tools/perf/bench/bench.h | 3 ++ tools/perf/bench/futex-hash.c | 24 ++++++++++++--- tools/perf/bench/futex-wake-parallel.c | 41 ++++++++++++++++++++++---- tools/perf/bench/futex-wake.c | 36 ++++++++++++++++++---- tools/perf/bench/futex.h | 17 +++++++++++ tools/perf/builtin-bench.c | 17 ++++++++--- 7 files changed, 127 insertions(+), 19 deletions(-) diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/asm/unistd_64.h index 4205ed415..151a41ceb 100644 --- a/tools/arch/x86/include/asm/unistd_64.h +++ b/tools/arch/x86/include/asm/unistd_64.h @@ -17,3 +17,11 @@ #ifndef __NR_setns #define __NR_setns 308 #endif + +#ifndef __NR_futex_wait +# define __NR_futex_wait 441 +#endif + +#ifndef __NR_futex_wake +# define __NR_futex_wake 442 +#endif diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index eac36afab..f6f881a05 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -38,8 +38,11 @@ int bench_mem_memcpy(int argc, const char **argv); int bench_mem_memset(int argc, const char **argv); int bench_mem_find_bit(int argc, const char **argv); int bench_futex_hash(int argc, const char **argv); +int bench_futex2_hash(int argc, const char **argv); int bench_futex_wake(int argc, const char **argv); +int bench_futex2_wake(int argc, const char **argv); int bench_futex_wake_parallel(int argc, const char **argv); +int bench_futex2_wake_parallel(int argc, const char **argv); int bench_futex_requeue(int argc, const char **argv); /* pi futexes */ int bench_futex_lock_pi(int argc, const char **argv); diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 915bf3da7..72921c22b 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -34,7 +34,7 @@ static unsigned int nthreads = 0; static unsigned int nsecs = 10; /* amount of futexes per thread */ static unsigned int nfutexes = 1024; -static bool fshared = false, done = false, silent = false; +static bool fshared = false, done = false, silent = false, futex2 = false; static int futex_flag = 0; struct timeval bench__start, bench__end, bench__runtime; @@ -86,7 +86,10 @@ static void *workerfn(void *arg) * such as internal waitqueue handling, thus enlarging * the critical region protected by hb->lock. */ - ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag); + if (!futex2) + ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag); + else + ret = futex2_wait(&w->futex[i], 1234, futex_flag, NULL); if (!silent && (!ret || errno != EAGAIN || errno != EWOULDBLOCK)) warn("Non-expected futex return call"); @@ -117,7 +120,7 @@ static void print_summary(void) (int)bench__runtime.tv_sec); } -int bench_futex_hash(int argc, const char **argv) +static int bench_futex_hash_common(int argc, const char **argv) { int ret = 0; cpu_set_t cpuset; @@ -149,7 +152,9 @@ int bench_futex_hash(int argc, const char **argv) if (!worker) goto errmem; - if (!fshared) + if (futex2) + futex_flag = FUTEX_32 | (fshared * FUTEX_SHARED_FLAG); + else if (!fshared) futex_flag = FUTEX_PRIVATE_FLAG; printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", @@ -229,3 +234,14 @@ int bench_futex_hash(int argc, const char **argv) errmem: err(EXIT_FAILURE, "calloc"); } + +int bench_futex_hash(int argc, const char **argv) +{ + return bench_futex_hash_common(argc, argv); +} + +int bench_futex2_hash(int argc, const char **argv) +{ + futex2 = true; + return bench_futex_hash_common(argc, argv); +} diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index cd2b81a84..540104538 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -17,6 +17,12 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__); return 0; } + +int bench_futex2_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused) +{ + pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__); + return 0; +} #else /* HAVE_PTHREAD_BARRIER */ /* For the CLR_() macros */ #include @@ -48,7 +54,7 @@ static unsigned int nwakes = 1; static u_int32_t futex = 0; static pthread_t *blocked_worker; -static bool done = false, silent = false, fshared = false; +static bool done = false, silent = false, fshared = false, futex2 = false; static unsigned int nblocked_threads = 0, nwaking_threads = 0; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; @@ -79,7 +85,11 @@ static void *waking_workerfn(void *arg) gettimeofday(&start, NULL); - waker->nwoken = futex_wake(&futex, nwakes, futex_flag); + if (!futex2) + waker->nwoken = futex_wake(&futex, nwakes, futex_flag); + else + waker->nwoken = futex2_wake(&futex, nwakes, futex_flag); + if (waker->nwoken != nwakes) warnx("couldn't wakeup all tasks (%d/%d)", waker->nwoken, nwakes); @@ -130,8 +140,13 @@ static void *blocked_workerfn(void *arg __maybe_unused) pthread_mutex_unlock(&thread_lock); while (1) { /* handle spurious wakeups */ - if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR) - break; + if (!futex2) { + if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR) + break; + } else { + if (futex2_wait(&futex, 0, futex_flag, NULL) != EINTR) + break; + } } pthread_exit(NULL); @@ -218,7 +233,7 @@ static void toggle_done(int sig __maybe_unused, done = true; } -int bench_futex_wake_parallel(int argc, const char **argv) +static int bench_futex_wake_parallel_common(int argc, const char **argv) { int ret = 0; unsigned int i, j; @@ -262,7 +277,9 @@ int bench_futex_wake_parallel(int argc, const char **argv) if (!blocked_worker) err(EXIT_FAILURE, "calloc"); - if (!fshared) + if (futex2) + futex_flag = FUTEX_32 | (fshared * FUTEX_SHARED_FLAG); + else if (!fshared) futex_flag = FUTEX_PRIVATE_FLAG; printf("Run summary [PID %d]: blocking on %d threads (at [%s] " @@ -322,4 +339,16 @@ int bench_futex_wake_parallel(int argc, const char **argv) free(blocked_worker); return ret; } + +int bench_futex_wake_parallel(int argc, const char **argv) +{ + return bench_futex_wake_parallel_common(argc, argv); +} + +int bench_futex2_wake_parallel(int argc, const char **argv) +{ + futex2 = true; + return bench_futex_wake_parallel_common(argc, argv); +} + #endif /* HAVE_PTHREAD_BARRIER */ diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 2dfcef3e3..b98b84e7b 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -46,6 +46,9 @@ static struct stats waketime_stats, wakeup_stats; static unsigned int threads_starting, nthreads = 0; static int futex_flag = 0; +/* Should we use futex2 API? */ +static bool futex2 = false; + static const struct option options[] = { OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"), @@ -69,8 +72,13 @@ static void *workerfn(void *arg __maybe_unused) pthread_mutex_unlock(&thread_lock); while (1) { - if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR) - break; + if (!futex2) { + if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR) + break; + } else { + if (futex2_wait(&futex1, 0, futex_flag, NULL) != EINTR) + break; + } } pthread_exit(NULL); @@ -118,7 +126,7 @@ static void toggle_done(int sig __maybe_unused, done = true; } -int bench_futex_wake(int argc, const char **argv) +static int bench_futex_wake_common(int argc, const char **argv) { int ret = 0; unsigned int i, j; @@ -148,7 +156,9 @@ int bench_futex_wake(int argc, const char **argv) if (!worker) err(EXIT_FAILURE, "calloc"); - if (!fshared) + if (futex2) + futex_flag = FUTEX_32 | (fshared * FUTEX_SHARED_FLAG); + else if (!fshared) futex_flag = FUTEX_PRIVATE_FLAG; printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), " @@ -181,8 +191,13 @@ int bench_futex_wake(int argc, const char **argv) /* Ok, all threads are patiently blocked, start waking folks up */ gettimeofday(&start, NULL); while (nwoken != nthreads) - nwoken += futex_wake(&futex1, nwakes, futex_flag); + if (!futex2) { + nwoken += futex_wake(&futex1, nwakes, futex_flag); + } else { + nwoken += futex2_wake(&futex1, nwakes, futex_flag); + } gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); update_stats(&wakeup_stats, nwoken); @@ -212,3 +227,14 @@ int bench_futex_wake(int argc, const char **argv) free(worker); return ret; } + +int bench_futex_wake(int argc, const char **argv) +{ + return bench_futex_wake_common(argc, argv); +} + +int bench_futex2_wake(int argc, const char **argv) +{ + futex2 = true; + return bench_futex_wake_common(argc, argv); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 31b53cc7d..5111799b5 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -86,4 +86,21 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, val, opflags); } + +/* + * wait for uaddr if (*uaddr == val) + */ +static inline int futex2_wait(volatile void *uaddr, unsigned long val, + unsigned long flags, struct timespec *timo) +{ + return syscall(__NR_futex_wait, uaddr, val, flags, timo); +} + +/* + * wake nr futexes waiting for uaddr + */ +static inline int futex2_wake(volatile void *uaddr, unsigned int nr, unsigned long flags) +{ + return syscall(__NR_futex_wake, uaddr, nr, flags); +} #endif /* _FUTEX_H */ diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 62a7b7420..200ecacad 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -12,10 +12,11 @@ * * sched ... scheduler and IPC performance * syscall ... System call performance - * mem ... memory access performance - * numa ... NUMA scheduling and MM performance - * futex ... Futex performance - * epoll ... Event poll performance + * mem ... memory access performance + * numa ... NUMA scheduling and MM performance + * futex ... Futex performance + * futex2 ... Futex2 performance + * epoll ... Event poll performance */ #include #include "builtin.h" @@ -75,6 +76,13 @@ static struct bench futex_benchmarks[] = { { NULL, NULL, NULL } }; +static struct bench futex2_benchmarks[] = { + { "hash", "Benchmark for futex2 hash table", bench_futex2_hash }, + { "wake", "Benchmark for futex2 wake calls", bench_futex2_wake }, + { "wake-parallel", "Benchmark for parallel futex2 wake calls", bench_futex2_wake_parallel }, + { NULL, NULL, NULL } +}; + #ifdef HAVE_EVENTFD_SUPPORT static struct bench epoll_benchmarks[] = { { "wait", "Benchmark epoll concurrent epoll_waits", bench_epoll_wait }, @@ -105,6 +113,7 @@ static struct collection collections[] = { { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks }, #endif {"futex", "Futex stressing benchmarks", futex_benchmarks }, + {"futex2", "Futex2 stressing benchmarks", futex2_benchmarks }, #ifdef HAVE_EVENTFD_SUPPORT {"epoll", "Epoll stressing benchmarks", epoll_benchmarks }, #endif -- 2.29.2