aboutsummaryrefslogtreecommitdiff
path: root/SOURCES/futex2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'SOURCES/futex2.patch')
-rw-r--r--SOURCES/futex2.patch6056
1 files changed, 1172 insertions, 4884 deletions
diff --git a/SOURCES/futex2.patch b/SOURCES/futex2.patch
index bae4138..1bc4486 100644
--- a/SOURCES/futex2.patch
+++ b/SOURCES/futex2.patch
@@ -1,7 +1,7 @@
-From ada1f13b98e86cb7ac4140c4976c3d165006d995 Mon Sep 17 00:00:00 2001
+From 14a106cc87e6d03169ac8c7ea030e3d7fac2dfe4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Wed, 5 Aug 2020 12:40:26 -0300
-Subject: [PATCH 01/13] futex2: Add new futex interface
+Subject: [PATCH 1/9] futex2: Add new futex interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -10,25 +10,28 @@ Initial implementation for futex2. Support only private u32 wait/wake, with
timeout (monotonic and realtime clocks).
Signed-off-by: André Almeida <andrealmeid@collabora.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
- MAINTAINERS | 2 +-
- arch/x86/entry/syscalls/syscall_32.tbl | 2 +
- arch/x86/entry/syscalls/syscall_64.tbl | 2 +
- include/linux/syscalls.h | 7 +
- include/uapi/asm-generic/unistd.h | 8 +-
- include/uapi/linux/futex.h | 40 ++
- init/Kconfig | 7 +
- kernel/Makefile | 1 +
- kernel/futex2.c | 484 +++++++++++++++++++++++++
- kernel/sys_ni.c | 4 +
- 10 files changed, 555 insertions(+), 2 deletions(-)
+ MAINTAINERS | 2 +-
+ arch/x86/entry/syscalls/syscall_32.tbl | 2 +
+ arch/x86/entry/syscalls/syscall_64.tbl | 2 +
+ include/linux/syscalls.h | 7 +
+ include/uapi/asm-generic/unistd.h | 8 +-
+ include/uapi/linux/futex.h | 40 ++
+ init/Kconfig | 7 +
+ kernel/Makefile | 1 +
+ kernel/futex2.c | 484 ++++++++++++++++++
+ kernel/sys_ni.c | 4 +
+ tools/include/uapi/asm-generic/unistd.h | 9 +-
+ .../arch/x86/entry/syscalls/syscall_64.tbl | 2 +
+ 12 files changed, 565 insertions(+), 3 deletions(-)
create mode 100644 kernel/futex2.c
diff --git a/MAINTAINERS b/MAINTAINERS
-index 867157311dc8..0c425f74ed88 100644
+index 2daa6ee67..855d38511 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
-@@ -7214,7 +7214,7 @@ F: Documentation/locking/*futex*
+@@ -7259,7 +7259,7 @@ F: Documentation/locking/*futex*
F: include/asm-generic/futex.h
F: include/linux/futex.h
F: include/uapi/linux/futex.h
@@ -38,30 +41,30 @@ index 867157311dc8..0c425f74ed88 100644
F: tools/testing/selftests/futex/
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
-index 9d1102873666..955322962964 100644
+index 0d0667a9f..83a75ff39 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
-@@ -444,3 +444,5 @@
- 437 i386 openat2 sys_openat2
+@@ -445,3 +445,5 @@
438 i386 pidfd_getfd sys_pidfd_getfd
439 i386 faccessat2 sys_faccessat2
-+440 i386 futex_wait sys_futex_wait
-+441 i386 futex_wake sys_futex_wake
+ 440 i386 process_madvise sys_process_madvise
++441 i386 futex_wait sys_futex_wait
++442 i386 futex_wake sys_futex_wake
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
-index f30d6ae9a688..4133bfe96891 100644
+index 379819244..6658fd63c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
-@@ -361,6 +361,8 @@
- 437 common openat2 sys_openat2
+@@ -362,6 +362,8 @@
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
-+440 common futex_wait sys_futex_wait
-+441 common futex_wake sys_futex_wake
+ 440 common process_madvise sys_process_madvise
++441 common futex_wait sys_futex_wait
++442 common futex_wake sys_futex_wake
#
- # x32-specific system call numbers start at 512 to avoid cache impact
+ # Due to a historical design error, certain syscalls are numbered differently
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
-index 75ac7f8ae93c..38c3a87dbfc2 100644
+index 37bea07c1..b6b77cf2b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -589,6 +589,13 @@ asmlinkage long sys_get_robust_list(int pid,
@@ -79,27 +82,27 @@ index 75ac7f8ae93c..38c3a87dbfc2 100644
asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
struct __kernel_timespec __user *rmtp);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
-index 995b36c2ea7d..80567ade774a 100644
+index 205631898..ae47d6a9e 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
-@@ -860,8 +860,14 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
- #define __NR_faccessat2 439
- __SYSCALL(__NR_faccessat2, sys_faccessat2)
+@@ -860,8 +860,14 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2)
+ #define __NR_process_madvise 440
+ __SYSCALL(__NR_process_madvise, sys_process_madvise)
-+#define __NR_futex_wait 440
++#define __NR_futex_wait 441
+__SYSCALL(__NR_futex_wait, sys_futex_wait)
+
-+#define __NR_futex_wake 441
++#define __NR_futex_wake 442
+__SYSCALL(__NR_futex_wake, sys_futex_wake)
+
#undef __NR_syscalls
--#define __NR_syscalls 440
-+#define __NR_syscalls 442
+-#define __NR_syscalls 441
++#define __NR_syscalls 443
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index a89eb0accd5e..35a5bf1cd41b 100644
+index a89eb0acc..35a5bf1cd 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -41,6 +41,46 @@
@@ -150,7 +153,7 @@ index a89eb0accd5e..35a5bf1cd41b 100644
* Support for robust futexes: the kernel cleans up held futexes at
* thread exit time.
diff --git a/init/Kconfig b/init/Kconfig
-index 2a5df1cf838c..440f21f5c3d8 100644
+index 02d13ae27..1264687ea 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1522,6 +1522,13 @@ config FUTEX
@@ -168,10 +171,10 @@ index 2a5df1cf838c..440f21f5c3d8 100644
bool
depends on FUTEX && RT_MUTEXES
diff --git a/kernel/Makefile b/kernel/Makefile
-index 9a20016d4900..51ea9bc647bf 100644
+index af601b9bd..bb7f33986 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
-@@ -57,6 +57,7 @@ obj-$(CONFIG_PROFILING) += profile.o
+@@ -54,6 +54,7 @@ obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
obj-$(CONFIG_FUTEX) += futex.o
@@ -181,7 +184,7 @@ index 9a20016d4900..51ea9bc647bf 100644
ifneq ($(CONFIG_SMP),y)
diff --git a/kernel/futex2.c b/kernel/futex2.c
new file mode 100644
-index 000000000000..107b80a466d0
+index 000000000..107b80a46
--- /dev/null
+++ b/kernel/futex2.c
@@ -0,0 +1,484 @@
@@ -670,7 +673,7 @@ index 000000000000..107b80a466d0
+}
+core_initcall(futex2_init);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
-index 4d59775ea79c..10049bc56c24 100644
+index f27ac94d5..35ff743b1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -148,6 +148,10 @@ COND_SYSCALL_COMPAT(set_robust_list);
@@ -684,13 +687,48 @@ index 4d59775ea79c..10049bc56c24 100644
/* kernel/hrtimer.c */
/* kernel/itimer.c */
+diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
+index 205631898..cd79f94e0 100644
+--- a/tools/include/uapi/asm-generic/unistd.h
++++ b/tools/include/uapi/asm-generic/unistd.h
+@@ -860,8 +860,15 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2)
+ #define __NR_process_madvise 440
+ __SYSCALL(__NR_process_madvise, sys_process_madvise)
+
++#define __NR_futex_wait 441
++__SYSCALL(__NR_futex_wait, sys_futex_wait)
++
++#define __NR_futex_wake 442
++__SYSCALL(__NR_futex_wake, sys_futex_wake)
++
+ #undef __NR_syscalls
+-#define __NR_syscalls 441
++#define __NR_syscalls 443
++
+
+ /*
+ * 32 bit systems traditionally used different
+diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+index 379819244..47de3bf93 100644
+--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
++++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+@@ -362,6 +362,8 @@
+ 438 common pidfd_getfd sys_pidfd_getfd
+ 439 common faccessat2 sys_faccessat2
+ 440 common process_madvise sys_process_madvise
++441 common futex_wait sys_futex_wait
++442 common futex_wake sys_futex_wake
+
+ #
+ # Due to a historical design error, certain syscalls are numbered differently
--
-2.28.0
+2.29.2
+
-From 08110d54945541dd186a7dabeef58be08011dde7 Mon Sep 17 00:00:00 2001
+From d71973d99efb1e2fd2542ea4d4b45b0e03e45b9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Thu, 15 Oct 2020 17:15:57 -0300
-Subject: [PATCH 02/13] futex2: Add suport for vectorized wait
+Subject: [PATCH 2/9] futex2: Add suport for vectorized wait
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -698,54 +736,57 @@ Content-Transfer-Encoding: 8bit
Add support to wait on multiple futexes
Signed-off-by: André Almeida <andrealmeid@collabora.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
- arch/x86/entry/syscalls/syscall_32.tbl | 1 +
- arch/x86/entry/syscalls/syscall_64.tbl | 1 +
- include/uapi/asm-generic/unistd.h | 5 +-
- kernel/futex2.c | 430 +++++++++++++++++--------
- kernel/sys_ni.c | 1 +
- 5 files changed, 304 insertions(+), 134 deletions(-)
+ arch/x86/entry/syscalls/syscall_32.tbl | 1 +
+ arch/x86/entry/syscalls/syscall_64.tbl | 1 +
+ include/uapi/asm-generic/unistd.h | 5 +-
+ kernel/futex2.c | 430 ++++++++++++------
+ kernel/sys_ni.c | 1 +
+ tools/include/uapi/asm-generic/unistd.h | 5 +-
+ .../arch/x86/entry/syscalls/syscall_64.tbl | 1 +
+ 7 files changed, 309 insertions(+), 135 deletions(-)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
-index 955322962964..c844c0cbf0e5 100644
+index 83a75ff39..65734d5e1 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
-@@ -446,3 +446,4 @@
- 439 i386 faccessat2 sys_faccessat2
- 440 i386 futex_wait sys_futex_wait
- 441 i386 futex_wake sys_futex_wake
-+442 i386 futex_waitv sys_futex_waitv
+@@ -447,3 +447,4 @@
+ 440 i386 process_madvise sys_process_madvise
+ 441 i386 futex_wait sys_futex_wait
+ 442 i386 futex_wake sys_futex_wake
++443 i386 futex_waitv sys_futex_waitv
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
-index 4133bfe96891..0901c26c6786 100644
+index 6658fd63c..f30811b56 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
-@@ -363,6 +363,7 @@
- 439 common faccessat2 sys_faccessat2
- 440 common futex_wait sys_futex_wait
- 441 common futex_wake sys_futex_wake
-+442 common futex_waitv sys_futex_waitv
+@@ -364,6 +364,7 @@
+ 440 common process_madvise sys_process_madvise
+ 441 common futex_wait sys_futex_wait
+ 442 common futex_wake sys_futex_wake
++443 common futex_waitv sys_futex_waitv
#
- # x32-specific system call numbers start at 512 to avoid cache impact
+ # Due to a historical design error, certain syscalls are numbered differently
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
-index 80567ade774a..d7ebbed0a18c 100644
+index ae47d6a9e..81a90b697 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -866,8 +866,11 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait)
- #define __NR_futex_wake 441
+ #define __NR_futex_wake 442
__SYSCALL(__NR_futex_wake, sys_futex_wake)
-+#define __NR_futex_waitv 442
++#define __NR_futex_waitv 443
+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+
#undef __NR_syscalls
--#define __NR_syscalls 442
-+#define __NR_syscalls 443
+-#define __NR_syscalls 443
++#define __NR_syscalls 444
/*
* 32 bit systems traditionally used different
diff --git a/kernel/futex2.c b/kernel/futex2.c
-index 107b80a466d0..4b782b5ef615 100644
+index 107b80a46..4b782b5ef 100644
--- a/kernel/futex2.c
+++ b/kernel/futex2.c
@@ -48,14 +48,25 @@ struct futex_bucket {
@@ -1286,7 +1327,7 @@ index 107b80a466d0..4b782b5ef615 100644
get_task_struct(task);
list_del_init_careful(&aux->list);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
-index 10049bc56c24..3e1a713d3e57 100644
+index 35ff743b1..1898e7340 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -151,6 +151,7 @@ COND_SYSCALL_COMPAT(get_robust_list);
@@ -1297,13 +1338,431 @@ index 10049bc56c24..3e1a713d3e57 100644
/* kernel/hrtimer.c */
+diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
+index cd79f94e0..7de33be59 100644
+--- a/tools/include/uapi/asm-generic/unistd.h
++++ b/tools/include/uapi/asm-generic/unistd.h
+@@ -866,8 +866,11 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait)
+ #define __NR_futex_wake 442
+ __SYSCALL(__NR_futex_wake, sys_futex_wake)
+
++#define __NR_futex_waitv 443
++__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
++
+ #undef __NR_syscalls
+-#define __NR_syscalls 443
++#define __NR_syscalls 444
+
+
+ /*
+diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+index 47de3bf93..bd47f368f 100644
+--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
++++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+@@ -364,6 +364,7 @@
+ 440 common process_madvise sys_process_madvise
+ 441 common futex_wait sys_futex_wait
+ 442 common futex_wake sys_futex_wake
++443 common futex_waitv sys_futex_waitv
+
+ #
+ # Due to a historical design error, certain syscalls are numbered differently
+--
+2.29.2
+
+
+From 24681616a5432f7680f934abf335a9ab9a1eaf1e Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
+Date: Thu, 15 Oct 2020 18:06:40 -0300
+Subject: [PATCH 3/9] futex2: Add support for shared futexes
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add support for shared futexes for cross-process resources.
+
+Signed-off-by: André Almeida <andrealmeid@collabora.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
+---
+ kernel/futex2.c | 187 ++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 165 insertions(+), 22 deletions(-)
+
+diff --git a/kernel/futex2.c b/kernel/futex2.c
+index 4b782b5ef..5ddb9922d 100644
+--- a/kernel/futex2.c
++++ b/kernel/futex2.c
+@@ -6,7 +6,9 @@
+ */
+
+ #include <linux/freezer.h>
++#include <linux/hugetlb.h>
+ #include <linux/jhash.h>
++#include <linux/pagemap.h>
+ #include <linux/sched/wake_q.h>
+ #include <linux/spinlock.h>
+ #include <linux/syscalls.h>
+@@ -15,6 +17,7 @@
+
+ /**
+ * struct futex_waiter - List entry for a waiter
++ * @uaddr: Memory address of userspace futex
+ * @key.address: Memory address of userspace futex
+ * @key.mm: Pointer to memory management struct of this process
+ * @key: Stores information that uniquely identify a futex
+@@ -25,9 +28,11 @@
+ * @index: Index of waiter in futexv list
+ */
+ struct futex_waiter {
++ uintptr_t uaddr;
+ struct futex_key {
+ uintptr_t address;
+ struct mm_struct *mm;
++ unsigned long int offset;
+ } key;
+ struct list_head list;
+ unsigned int val;
+@@ -125,16 +130,116 @@ static inline int bucket_get_waiters(struct futex_bucket *bucket)
+ #endif
+ }
+
++static u64 get_inode_sequence_number(struct inode *inode)
++{
++ static atomic64_t i_seq;
++ u64 old;
++
++ /* Does the inode already have a sequence number? */
++ old = atomic64_read(&inode->i_sequence);
++ if (likely(old))
++ return old;
++
++ for (;;) {
++ u64 new = atomic64_add_return(1, &i_seq);
++ if (WARN_ON_ONCE(!new))
++ continue;
++
++ old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
++ if (old)
++ return old;
++ return new;
++ }
++}
++
++#define FUT_OFF_INODE 1 /* We set bit 0 if key has a reference on inode */
++#define FUT_OFF_MMSHARED 2 /* We set bit 1 if key has a reference on mm */
++
++static int futex_get_shared_key(uintptr_t address, struct mm_struct *mm,
++ struct futex_key *key)
++{
++ int err;
++ struct page *page, *tail;
++ struct address_space *mapping;
++
++again:
++ err = get_user_pages_fast(address, 1, 0, &page);
++
++ if (err < 0)
++ return err;
++ else
++ err = 0;
++
++
++ tail = page;
++ page = compound_head(page);
++ mapping = READ_ONCE(page->mapping);
++
++
++ if (unlikely(!mapping)) {
++ int shmem_swizzled;
++
++ lock_page(page);
++ shmem_swizzled = PageSwapCache(page) || page->mapping;
++ unlock_page(page);
++ put_page(page);
++
++ if (shmem_swizzled)
++ goto again;
++
++ return -EFAULT;
++ }
++
++ if (PageAnon(page)) {
++
++ key->mm = mm;
++ key->address = address;
++
++ key->offset |= FUT_OFF_MMSHARED;
++
++ } else {
++ struct inode *inode;
++
++ rcu_read_lock();
++
++ if (READ_ONCE(page->mapping) != mapping) {
++ rcu_read_unlock();
++ put_page(page);
++
++ goto again;
++ }
++
++ inode = READ_ONCE(mapping->host);
++ if (!inode) {
++ rcu_read_unlock();
++ put_page(page);
++
++ goto again;
++ }
++
++ key->address = get_inode_sequence_number(inode);
++ key->mm = (struct mm_struct *) basepage_index(tail);
++ key->offset |= FUT_OFF_INODE;
++
++ rcu_read_unlock();
++ }
++
++ put_page(page);
++ return err;
++}
++
+ /**
+ * futex_get_bucket - Check if the user address is valid, prepare internal
+ * data and calculate the hash
+ * @uaddr: futex user address
+ * @key: data that uniquely identifies a futex
++ * @shared: is this a shared futex?
+ *
+ * Return: address of bucket on success, error code otherwise
+ */
+ static struct futex_bucket *futex_get_bucket(void __user *uaddr,
+- struct futex_key *key)
++ struct futex_key *key,
++ bool shared)
+ {
+ uintptr_t address = (uintptr_t) uaddr;
+ u32 hash_key;
+@@ -145,8 +250,15 @@ static struct futex_bucket *futex_get_bucket(void __user *uaddr,
+ if (unlikely(!access_ok(address, sizeof(u32))))
+ return ERR_PTR(-EFAULT);
+
+- key->address = address;
+- key->mm = current->mm;
++ key->offset = address % PAGE_SIZE;
++ address -= key->offset;
++
++ if (!shared) {
++ key->address = address;
++ key->mm = current->mm;
++ } else {
++ futex_get_shared_key(address, current->mm, key);
++ }
+
+ /* Generate hash key for this futex using uaddr and current->mm */
+ hash_key = jhash2((u32 *) key, sizeof(*key) / sizeof(u32), 0);
+@@ -275,9 +387,10 @@ static int futex_dequeue_multiple(struct futexv *futexv, unsigned int nr)
+ * Return: 0 on success, error code otherwise
+ */
+ static int futex_enqueue(struct futexv *futexv, unsigned int nr_futexes,
+- unsigned int *awaken)
++ int *awaken)
+ {
+ int i, ret;
++ bool shared, retry = false;
+ u32 uval, *uaddr, val;
+ struct futex_bucket *bucket;
+
+@@ -285,8 +398,18 @@ static int futex_enqueue(struct futexv *futexv, unsigned int nr_futexes,
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ for (i = 0; i < nr_futexes; i++) {
+- uaddr = (u32 * __user) futexv->objects[i].key.address;
++ uaddr = (u32 * __user) futexv->objects[i].uaddr;
+ val = (u32) futexv->objects[i].val;
++ shared = (futexv->objects[i].flags & FUTEX_SHARED_FLAG) ? true : false;
++
++ if (shared && retry) {
++ futexv->objects[i].bucket =
++ futex_get_bucket((void *) uaddr,
++ &futexv->objects[i].key, true);
++ if (IS_ERR(futexv->objects[i].bucket))
++ return PTR_ERR(futexv->objects[i].bucket);
++ }
++
+ bucket = futexv->objects[i].bucket;
+
+ bucket_inc_waiters(bucket);
+@@ -301,24 +424,32 @@ static int futex_enqueue(struct futexv *futexv, unsigned int nr_futexes,
+ __set_current_state(TASK_RUNNING);
+ *awaken = futex_dequeue_multiple(futexv, i);
+
++ if (shared) {
++ retry = true;
++ goto retry;
++ }
++
+ if (__get_user(uval, uaddr))
+ return -EFAULT;
+
+ if (*awaken >= 0)
+- return 0;
++ return 1;
+
++ retry = true;
+ goto retry;
+ }
+
+ if (uval != val) {
+ spin_unlock(&bucket->lock);
+
++
+ bucket_dec_waiters(bucket);
+ __set_current_state(TASK_RUNNING);
+ *awaken = futex_dequeue_multiple(futexv, i);
+
+- if (*awaken >= 0)
+- return 0;
++ if (*awaken >= 0) {
++ return 1;
++ }
+
+ return -EWOULDBLOCK;
+ }
+@@ -336,19 +467,18 @@ static int __futex_wait(struct futexv *futexv,
+ struct hrtimer_sleeper *timeout)
+ {
+ int ret;
+- unsigned int awaken = -1;
+
+- while (1) {
+- ret = futex_enqueue(futexv, nr_futexes, &awaken);
+
+- if (ret < 0)
+- break;
++ while (1) {
++ int awaken = -1;
+
+- if (awaken <= 0) {
+- return awaken;
++ ret = futex_enqueue(futexv, nr_futexes, &awaken);
++ if (ret) {
++ if (awaken >= 0)
++ return awaken;
++ return ret;
+ }
+
+-
+ /* Before sleeping, check if someone was woken */
+ if (!futexv->hint && (!timeout || timeout->task))
+ freezable_schedule();
+@@ -419,6 +549,7 @@ static int futex_wait(struct futexv *futexv, unsigned int nr_futexes,
+ hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
+ }
+
++
+ ret = __futex_wait(futexv, nr_futexes, timo ? timeout : NULL);
+
+
+@@ -438,9 +569,10 @@ static int futex_wait(struct futexv *futexv, unsigned int nr_futexes,
+ SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val,
+ unsigned int, flags, struct __kernel_timespec __user *, timo)
+ {
++ bool shared = (flags & FUTEX_SHARED_FLAG) ? true : false;
+ unsigned int size = flags & FUTEX_SIZE_MASK;
+- struct hrtimer_sleeper timeout;
+ struct futex_single_waiter wait_single;
++ struct hrtimer_sleeper timeout;
+ struct futex_waiter *waiter;
+ struct futexv *futexv;
+ int ret;
+@@ -452,6 +584,7 @@ SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val,
+ waiter = &wait_single.waiter;
+ waiter->index = 0;
+ waiter->val = val;
++ waiter->uaddr = (uintptr_t) uaddr;
+
+ INIT_LIST_HEAD(&waiter->list);
+
+@@ -462,11 +595,14 @@ SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val,
+ return -EINVAL;
+
+ /* Get an unlocked hash bucket */
+- waiter->bucket = futex_get_bucket(uaddr, &waiter->key);
+- if (IS_ERR(waiter->bucket))
++ waiter->bucket = futex_get_bucket(uaddr, &waiter->key, shared);
++ if (IS_ERR(waiter->bucket)) {
+ return PTR_ERR(waiter->bucket);
++ }
+
+ ret = futex_wait(futexv, 1, timo, &timeout, flags);
++ if (ret > 0)
++ ret = 0;
+
+ return ret;
+ }
+@@ -486,8 +622,10 @@ static int futex_parse_waitv(struct futexv *futexv,
+ struct futex_waitv waitv;
+ unsigned int i;
+ struct futex_bucket *bucket;
++ bool shared;
+
+ for (i = 0; i < nr_futexes; i++) {
++
+ if (copy_from_user(&waitv, &uwaitv[i], sizeof(waitv)))
+ return -EFAULT;
+
+@@ -495,8 +633,10 @@ static int futex_parse_waitv(struct futexv *futexv,
+ (waitv.flags & FUTEX_SIZE_MASK) != FUTEX_32)
+ return -EINVAL;
+
++ shared = (waitv.flags & FUTEX_SHARED_FLAG) ? true : false;
++
+ bucket = futex_get_bucket(waitv.uaddr,
+- &futexv->objects[i].key);
++ &futexv->objects[i].key, shared);
+ if (IS_ERR(bucket))
+ return PTR_ERR(bucket);
+
+@@ -505,6 +645,7 @@ static int futex_parse_waitv(struct futexv *futexv,
+ futexv->objects[i].flags = waitv.flags;
+ futexv->objects[i].index = i;
+ INIT_LIST_HEAD(&futexv->objects[i].list);
++ futexv->objects[i].uaddr = (uintptr_t) waitv.uaddr;
+ }
+
+ return 0;
+@@ -573,6 +714,7 @@ static struct futexv *futex_get_parent(uintptr_t waiter, u8 index)
+ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake,
+ unsigned int, flags)
+ {
++ bool shared = (flags & FUTEX_SHARED_FLAG) ? true : false;
+ unsigned int size = flags & FUTEX_SIZE_MASK;
+ struct futex_waiter waiter, *aux, *tmp;
+ struct futex_bucket *bucket;
+@@ -586,7 +728,7 @@ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake,
+ if (size != FUTEX_32)
+ return -EINVAL;
+
+- bucket = futex_get_bucket(uaddr, &waiter.key);
++ bucket = futex_get_bucket(uaddr, &waiter.key, shared);
+ if (IS_ERR(bucket))
+ return PTR_ERR(bucket);
+
+@@ -599,7 +741,8 @@ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake,
+ break;
+
+ if (waiter.key.address == aux->key.address &&
+- waiter.key.mm == aux->key.mm) {
++ waiter.key.mm == aux->key.mm &&
++ waiter.key.offset == aux->key.offset) {
+ struct futexv *parent =
+ futex_get_parent((uintptr_t) aux, aux->index);
+
--
-2.28.0
+2.29.2
-From d8120d2ee1729a6933a606a6720f3e3116e4f699 Mon Sep 17 00:00:00 2001
+
+From ce3ae4bd9f98763fda07f315c1f239c4aaef4b5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Thu, 9 Jul 2020 11:34:40 -0300
-Subject: [PATCH 03/13] selftests: futex: Add futex2 wake/wait test
+Subject: [PATCH 4/9] selftests: futex: Add futex2 wake/wait test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -1315,38 +1774,32 @@ temporary workaround that implements the required types and calls the
appropriated syscalls, since futex2 doesn't supports 32 bit sized time.
Signed-off-by: André Almeida <andrealmeid@collabora.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
- tools/include/uapi/asm-generic/unistd.h | 7 +-
+ tools/include/uapi/asm-generic/unistd.h | 1 -
.../selftests/futex/functional/.gitignore | 1 +
.../selftests/futex/functional/Makefile | 4 +-
- .../selftests/futex/functional/futex2_wait.c | 111 ++++++++++++++++++
+ .../selftests/futex/functional/futex2_wait.c | 148 ++++++++++++++++++
.../testing/selftests/futex/functional/run.sh | 3 +
- .../selftests/futex/include/futex2test.h | 77 ++++++++++++
- 6 files changed, 201 insertions(+), 2 deletions(-)
+ .../selftests/futex/include/futex2test.h | 77 +++++++++
+ 6 files changed, 232 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/futex/functional/futex2_wait.c
create mode 100644 tools/testing/selftests/futex/include/futex2test.h
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
-index 995b36c2ea7d..dd457de21bad 100644
+index 7de33be59..81a90b697 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
-@@ -860,8 +860,13 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
- #define __NR_faccessat2 439
- __SYSCALL(__NR_faccessat2, sys_faccessat2)
-
-+#define __NR_futex_wait 440
-+__SYSCALL(__NR_futex_wait, sys_futex_wait)
-+#define __NR_futex_wake 441
-+__SYSCALL(__NR_futex_wake, sys_futex_wake)
-+
+@@ -872,7 +872,6 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
#undef __NR_syscalls
--#define __NR_syscalls 440
-+#define __NR_syscalls 442
+ #define __NR_syscalls 444
+-
/*
* 32 bit systems traditionally used different
+ * syscalls for off_t and loff_t arguments, while
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
-index 0efcd494daab..d61f1df94360 100644
+index 0efcd494d..d61f1df94 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -6,3 +6,4 @@ futex_wait_private_mapped_file
@@ -1355,7 +1808,7 @@ index 0efcd494daab..d61f1df94360 100644
futex_wait_wouldblock
+futex2_wait
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
-index 23207829ec75..7142a94a7ac3 100644
+index 23207829e..7142a94a7 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -5,6 +5,7 @@ LDLIBS := -lpthread -lrt
@@ -1378,10 +1831,10 @@ index 23207829ec75..7142a94a7ac3 100644
diff --git a/tools/testing/selftests/futex/functional/futex2_wait.c b/tools/testing/selftests/futex/functional/futex2_wait.c
new file mode 100644
-index 000000000000..752ed26803b3
+index 000000000..0646a24b7
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex2_wait.c
-@@ -0,0 +1,111 @@
+@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/******************************************************************************
+ *
@@ -1406,13 +1859,14 @@ index 000000000000..752ed26803b3
+#include <string.h>
+#include <time.h>
+#include <pthread.h>
++#include <sys/shm.h>
+#include "futex2test.h"
+#include "logging.h"
+
-+#define TEST_NAME "futex-wait-wouldblock"
++#define TEST_NAME "futex2-wait"
+#define timeout_ns 30000000
+#define WAKE_WAIT_US 10000
-+futex_t f1 = FUTEX_INITIALIZER;
++futex_t *f1;
+
+void usage(char *prog)
+{
@@ -1426,6 +1880,9 @@ index 000000000000..752ed26803b3
+void *waiterfn(void *arg)
+{
+ struct timespec64 to64;
++ unsigned int flags = 0;
++ if (arg)
++ flags = *((unsigned int *) arg);
+
+ /* setting absolute timeout for futex2 */
+ if (gettime64(CLOCK_MONOTONIC, &to64))
@@ -1438,7 +1895,7 @@ index 000000000000..752ed26803b3
+ to64.tv_nsec -= 1000000000;
+ }
+
-+ if (futex2_wait(&f1, f1, FUTEX_PRIVATE_FLAG | FUTEX_32, &to64))
++ if (futex2_wait(f1, *f1, FUTEX_32 | flags, &to64))
+ printf("waiter failed errno %d\n", errno);
+
+ return NULL;
@@ -1447,8 +1904,11 @@ index 000000000000..752ed26803b3
+int main(int argc, char *argv[])
+{
+ pthread_t waiter;
++ unsigned int flags = FUTEX_SHARED_FLAG;
+ int res, ret = RET_PASS;
+ int c;
++ futex_t f_private = 0;
++ f1 = &f_private;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
@@ -1468,33 +1928,63 @@ index 000000000000..752ed26803b3
+ }
+
+ ksft_print_header();
-+ ksft_set_plan(1);
-+ ksft_print_msg("%s: Test FUTEX_WAIT\n",
++ ksft_set_plan(2);
++ ksft_print_msg("%s: Test FUTEX2_WAIT\n",
+ basename(argv[0]));
+
-+ info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1);
++ info("Calling private futex2_wait on f1: %u @ %p with val=%u\n", *f1, f1, *f1);
+
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
-+ info("Calling futex2_wake on f1: %u @ %p with val=%u\n", f1, &f1, f1);
-+ res = futex2_wake(&f1, 1, FUTEX_PRIVATE_FLAG | FUTEX_32);
++ info("Calling private futex2_wake on f1: %u @ %p with val=%u\n", *f1, f1, *f1);
++ res = futex2_wake(f1, 1, FUTEX_32);
++ if (res != 1) {
++ ksft_test_result_fail("futex2_wake private returned: %d %s\n",
++ res ? errno : res,
++ res ? strerror(errno) : "");
++ ret = RET_FAIL;
++ } else {
++ ksft_test_result_pass("futex2_wake private succeeds\n");
++ }
++
++ int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
++ if (shm_id < 0) {
++ perror("shmget");
++ exit(1);
++ }
++
++ unsigned int *shared_data = shmat(shm_id, NULL, 0);
++ *shared_data = 0;
++ f1 = shared_data;
++
++ info("Calling shared futex2_wait on f1: %u @ %p with val=%u\n", *f1, f1, *f1);
++
++ if (pthread_create(&waiter, NULL, waiterfn, &flags))
++ error("pthread_create failed\n", errno);
++
++ usleep(WAKE_WAIT_US);
++
++ info("Calling shared futex2_wake on f1: %u @ %p with val=%u\n", *f1, f1, *f1);
++ res = futex2_wake(f1, 1, FUTEX_32 | FUTEX_SHARED_FLAG);
+ if (res != 1) {
-+ ksft_test_result_fail("futex2_wake returned: %d %s\n",
++ ksft_test_result_fail("futex2_wake shared returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
-+ ksft_test_result_pass("futex2_wake wouldblock succeeds\n");
++ ksft_test_result_pass("futex2_wake shared succeeds\n");
+ }
+
++ shmdt(shared_data);
++
+ ksft_print_cnts();
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
-index 1acb6ace1680..3730159c865a 100755
+index 1acb6ace1..3730159c8 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -73,3 +73,6 @@ echo
@@ -1506,7 +1996,7 @@ index 1acb6ace1680..3730159c865a 100755
+./futex2_wait $COLOR
diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
new file mode 100644
-index 000000000000..807b8b57fe61
+index 000000000..807b8b57f
--- /dev/null
+++ b/tools/testing/selftests/futex/include/futex2test.h
@@ -0,0 +1,77 @@
@@ -1588,12 +2078,13 @@ index 000000000000..807b8b57fe61
+ return syscall(__NR_futex_wake, uaddr, nr, flags);
+}
--
-2.28.0
+2.29.2
+
-From d4a7ca72f276b2e337eaedcbbe58a2782e0e7d3b Mon Sep 17 00:00:00 2001
+From 1e0349f5a81a43cdb50d9a97812194df6d937b69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Thu, 9 Jul 2020 11:36:14 -0300
-Subject: [PATCH 04/13] selftests: futex: Add futex2 timeout test
+Subject: [PATCH 5/9] selftests: futex: Add futex2 timeout test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -1602,12 +2093,13 @@ Adapt existing futex wait timeout file to test the same mechanism for
futex2.
Signed-off-by: André Almeida <andrealmeid@collabora.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
- .../futex/functional/futex_wait_timeout.c | 38 ++++++++++++++-----
- 1 file changed, 29 insertions(+), 9 deletions(-)
+ .../futex/functional/futex_wait_timeout.c | 58 ++++++++++++++++---
+ 1 file changed, 49 insertions(+), 9 deletions(-)
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
-index ee55e6d389a3..d2e7ae18985b 100644
+index ee55e6d38..245670e44 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -11,6 +11,7 @@
@@ -1637,12 +2129,12 @@ index ee55e6d389a3..d2e7ae18985b 100644
int res, ret = RET_PASS;
int c;
-@@ -65,22 +67,40 @@ int main(int argc, char *argv[])
+@@ -65,22 +67,60 @@ int main(int argc, char *argv[])
}
ksft_print_header();
- ksft_set_plan(1);
-+ ksft_set_plan(2);
++ ksft_set_plan(3);
ksft_print_msg("%s: Block on a futex and wait for timeout\n",
basename(argv[0]));
ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
@@ -1661,7 +2153,7 @@ index ee55e6d389a3..d2e7ae18985b 100644
+ ksft_test_result_pass("futex_wait timeout succeeds\n");
+ }
+
-+ /* setting absolute timeout for futex2 */
++ /* setting absolute monotonic timeout for futex2 */
+ if (gettime64(CLOCK_MONOTONIC, &to64))
+ error("gettime64 failed\n", errno);
+
@@ -1673,12 +2165,32 @@ index ee55e6d389a3..d2e7ae18985b 100644
+ }
+
+ info("Calling futex2_wait on f1: %u @ %p\n", f1, &f1);
-+ res = futex2_wait(&f1, f1, FUTEX_PRIVATE_FLAG | FUTEX_32, &to64);
++ res = futex2_wait(&f1, f1, FUTEX_32, &to64);
+ if (!res || errno != ETIMEDOUT) {
-+ ksft_test_result_fail("futex2_wait returned %d\n", ret < 0 ? errno : ret);
++ ksft_test_result_fail("futex2_wait monotonic returned %d\n", ret < 0 ? errno : ret);
++ ret = RET_FAIL;
++ } else {
++ ksft_test_result_pass("futex2_wait monotonic timeout succeeds\n");
++ }
++
++ /* setting absolute realtime timeout for futex2 */
++ if (gettime64(CLOCK_REALTIME, &to64))
++ error("gettime64 failed\n", errno);
++
++ to64.tv_nsec += timeout_ns;
++
++ if (to64.tv_nsec >= 1000000000) {
++ to64.tv_sec++;
++ to64.tv_nsec -= 1000000000;
++ }
++
++ info("Calling futex2_wait on f1: %u @ %p\n", f1, &f1);
++ res = futex2_wait(&f1, f1, FUTEX_32 | FUTEX_CLOCK_REALTIME, &to64);
++ if (!res || errno != ETIMEDOUT) {
++ ksft_test_result_fail("futex2_wait realtime returned %d\n", ret < 0 ? errno : ret);
ret = RET_FAIL;
+ } else {
-+ ksft_test_result_pass("futex2_wait timeout succeeds\n");
++ ksft_test_result_pass("futex2_wait realtime timeout succeeds\n");
}
- print_result(TEST_NAME, ret);
@@ -1686,12 +2198,13 @@ index ee55e6d389a3..d2e7ae18985b 100644
return ret;
}
--
-2.28.0
+2.29.2
+
-From 6d2252d43d36a5eb2b9170351128007e27f47737 Mon Sep 17 00:00:00 2001
+From 298120f6e3a758cd03e26a104f5ce60a88501b7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Thu, 9 Jul 2020 11:37:42 -0300
-Subject: [PATCH 05/13] selftests: futex: Add futex2 wouldblock test
+Subject: [PATCH 6/9] selftests: futex: Add futex2 wouldblock test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
@@ -1700,12 +2213,13 @@ Adapt existing futex wait wouldblock file to test the same mechanism for
futex2.
Signed-off-by: André Almeida <andrealmeid@collabora.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
.../futex/functional/futex_wait_wouldblock.c | 33 ++++++++++++++++---
1 file changed, 29 insertions(+), 4 deletions(-)
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
-index 0ae390ff8164..8187f0754cd2 100644
+index 0ae390ff8..1f72e5928 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -12,6 +12,7 @@
@@ -1766,7 +2280,7 @@ index 0ae390ff8164..8187f0754cd2 100644
+ }
+
+ info("Calling futex2_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
-+ res = futex2_wait(&f1, f1+1, FUTEX_PRIVATE_FLAG | FUTEX_32, &to64);
++ res = futex2_wait(&f1, f1+1, FUTEX_32, &to64);
+ if (!res || errno != EWOULDBLOCK) {
+ ksft_test_result_fail("futex2_wait returned: %d %s\n",
+ res ? errno : res, res ? strerror(errno) : "");
@@ -1779,4919 +2293,693 @@ index 0ae390ff8164..8187f0754cd2 100644
return ret;
}
--
-2.28.0
+2.29.2
-From 6b35a09be663f5a844e089f1ddd370137832e7a7 Mon Sep 17 00:00:00 2001
+
+From 05c697a239aad5e8608c6acf0da9239cac5f7a2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
-Date: Wed, 14 Oct 2020 16:10:09 -0300
-Subject: [PATCH 06/13] DONOTMERGE: futex: Add a clone of futex implementation
+Date: Tue, 8 Dec 2020 18:47:31 -0300
+Subject: [PATCH 7/9] selftests: futex: Add futex2 waitv test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
-For comparative performance tests between the original futex and the new
-futex2 interface, create a clone of the current futex. In that way, we
-can have a fair comparison, since the futex2 table will be empty with no
-contention for the bucket locks. Since futex is widely used in the host
-system, the performance tests could get misleading results by the tests
-competing with the system for resources.
-
Signed-off-by: André Almeida <andrealmeid@collabora.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
- arch/x86/entry/syscalls/syscall_32.tbl | 1 +
- arch/x86/entry/syscalls/syscall_64.tbl | 1 +
- include/linux/syscalls.h | 3 +
- include/uapi/asm-generic/unistd.h | 5 +-
- kernel/Makefile | 1 +
- kernel/futex1.c | 3384 +++++++++++++++++
- kernel/sys_ni.c | 2 +
- tools/arch/x86/include/asm/unistd_64.h | 12 +
- tools/include/uapi/asm-generic/unistd.h | 6 +-
- .../arch/x86/entry/syscalls/syscall_64.tbl | 3 +
- tools/perf/bench/futex.h | 23 +-
- 11 files changed, 3438 insertions(+), 3 deletions(-)
- create mode 100644 kernel/futex1.c
+ .../selftests/futex/functional/.gitignore | 1 +
+ .../selftests/futex/functional/Makefile | 3 +-
+ .../selftests/futex/functional/futex2_waitv.c | 156 ++++++++++++++++++
+ .../testing/selftests/futex/functional/run.sh | 3 +
+ .../selftests/futex/include/futex2test.h | 25 ++-
+ 5 files changed, 183 insertions(+), 5 deletions(-)
+ create mode 100644 tools/testing/selftests/futex/functional/futex2_waitv.c
-diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
-index c844c0cbf0e5..820fa53ccf75 100644
---- a/arch/x86/entry/syscalls/syscall_32.tbl
-+++ b/arch/x86/entry/syscalls/syscall_32.tbl
-@@ -447,3 +447,4 @@
- 440 i386 futex_wait sys_futex_wait
- 441 i386 futex_wake sys_futex_wake
- 442 i386 futex_waitv sys_futex_waitv
-+443 i386 futex1 sys_futex1
-diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
-index 0901c26c6786..99795136cb98 100644
---- a/arch/x86/entry/syscalls/syscall_64.tbl
-+++ b/arch/x86/entry/syscalls/syscall_64.tbl
-@@ -364,6 +364,7 @@
- 440 common futex_wait sys_futex_wait
- 441 common futex_wake sys_futex_wake
- 442 common futex_waitv sys_futex_waitv
-+443 common futex1 sys_futex1
-
- #
- # x32-specific system call numbers start at 512 to avoid cache impact
-diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
-index 38c3a87dbfc2..0351f6ad09a9 100644
---- a/include/linux/syscalls.h
-+++ b/include/linux/syscalls.h
-@@ -596,6 +596,9 @@ asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val,
- asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long nr_wake,
- unsigned long flags);
-
-+asmlinkage long sys_futex1(void __user *uaddr, unsigned long nr_wake,
-+ unsigned long flags);
-+
- /* kernel/hrtimer.c */
- asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
- struct __kernel_timespec __user *rmtp);
-diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
-index d7ebbed0a18c..e3ba6cb1f76d 100644
---- a/include/uapi/asm-generic/unistd.h
-+++ b/include/uapi/asm-generic/unistd.h
-@@ -869,8 +869,11 @@ __SYSCALL(__NR_futex_wake, sys_futex_wake)
- #define __NR_futex_waitv 442
- __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
+index d61f1df94..d0b8f637b 100644
+--- a/tools/testing/selftests/futex/functional/.gitignore
++++ b/tools/testing/selftests/futex/functional/.gitignore
+@@ -7,3 +7,4 @@ futex_wait_timeout
+ futex_wait_uninitialized_heap
+ futex_wait_wouldblock
+ futex2_wait
++futex2_waitv
+diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
+index 7142a94a7..b857b9450 100644
+--- a/tools/testing/selftests/futex/functional/Makefile
++++ b/tools/testing/selftests/futex/functional/Makefile
+@@ -16,7 +16,8 @@ TEST_GEN_FILES := \
+ futex_requeue_pi_mismatched_ops \
+ futex_wait_uninitialized_heap \
+ futex_wait_private_mapped_file \
+- futex2_wait
++ futex2_wait \
++ futex2_waitv
-+#define __NR_futex1 443
-+__SYSCALL(__NR_futex1, sys_futex1)
-+
- #undef __NR_syscalls
--#define __NR_syscalls 443
-+#define __NR_syscalls 444
+ TEST_PROGS := run.sh
- /*
- * 32 bit systems traditionally used different
-diff --git a/kernel/Makefile b/kernel/Makefile
-index 51ea9bc647bf..0fe55a8cb9e2 100644
---- a/kernel/Makefile
-+++ b/kernel/Makefile
-@@ -57,6 +57,7 @@ obj-$(CONFIG_PROFILING) += profile.o
- obj-$(CONFIG_STACKTRACE) += stacktrace.o
- obj-y += time/
- obj-$(CONFIG_FUTEX) += futex.o
-+obj-$(CONFIG_FUTEX2) += futex1.o
- obj-$(CONFIG_FUTEX2) += futex2.o
- obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
- obj-$(CONFIG_SMP) += smp.o
-diff --git a/kernel/futex1.c b/kernel/futex1.c
+diff --git a/tools/testing/selftests/futex/functional/futex2_waitv.c b/tools/testing/selftests/futex/functional/futex2_waitv.c
new file mode 100644
-index 000000000000..4f7bf312fefd
+index 000000000..d4b116651
--- /dev/null
-+++ b/kernel/futex1.c
-@@ -0,0 +1,3384 @@
++++ b/tools/testing/selftests/futex/functional/futex2_waitv.c
+@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
-+/*
-+ * Fast Userspace Mutexes (which I call "Futexes!").
-+ * (C) Rusty Russell, IBM 2002
-+ *
-+ * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
-+ * (C) Copyright 2003 Red Hat Inc, All Rights Reserved
-+ *
-+ * Removed page pinning, fix privately mapped COW pages and other cleanups
-+ * (C) Copyright 2003, 2004 Jamie Lokier
-+ *
-+ * Robust futex support started by Ingo Molnar
-+ * (C) Copyright 2006 Red Hat Inc, All Rights Reserved
-+ * Thanks to Thomas Gleixner for suggestions, analysis and fixes.
-+ *
-+ * PI-futex support started by Ingo Molnar and Thomas Gleixner
-+ * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
-+ * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
-+ *
-+ * PRIVATE futexes by Eric Dumazet
-+ * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
-+ *
-+ * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
-+ * Copyright (C) IBM Corporation, 2009
-+ * Thanks to Thomas Gleixner for conceptual design and careful reviews.
-+ *
-+ * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
-+ * enough at me, Linus for the original (flawed) idea, Matthew
-+ * Kirkwood for proof-of-concept implementation.
-+ *
-+ * "The futexes are also cursed."
-+ * "But they come in a choice of three flavours!"
-+ */
-+#include <linux/compat.h>
-+#include <linux/jhash.h>
-+#include <linux/pagemap.h>
-+#include <linux/syscalls.h>
-+#include <linux/hugetlb.h>
-+#include <linux/freezer.h>
-+#include <linux/memblock.h>
-+#include <linux/fault-inject.h>
-+
-+#include <asm/futex.h>
-+
-+#include "locking/rtmutex_common.h"
-+
-+/*
-+ * READ this before attempting to hack on futexes!
-+ *
-+ * Basic futex operation and ordering guarantees
-+ * =============================================
-+ *
-+ * The waiter reads the futex value in user space and calls
-+ * futex_wait(). This function computes the hash bucket and acquires
-+ * the hash bucket lock. After that it reads the futex user space value
-+ * again and verifies that the data has not changed. If it has not changed
-+ * it enqueues itself into the hash bucket, releases the hash bucket lock
-+ * and schedules.
-+ *
-+ * The waker side modifies the user space value of the futex and calls
-+ * futex_wake(). This function computes the hash bucket and acquires the
-+ * hash bucket lock. Then it looks for waiters on that futex in the hash
-+ * bucket and wakes them.
-+ *
-+ * In futex wake up scenarios where no tasks are blocked on a futex, taking
-+ * the hb spinlock can be avoided and simply return. In order for this
-+ * optimization to work, ordering guarantees must exist so that the waiter
-+ * being added to the list is acknowledged when the list is concurrently being
-+ * checked by the waker, avoiding scenarios like the following:
-+ *
-+ * CPU 0 CPU 1
-+ * val = *futex;
-+ * sys_futex(WAIT, futex, val);
-+ * futex_wait(futex, val);
-+ * uval = *futex;
-+ * *futex = newval;
-+ * sys_futex(WAKE, futex);
-+ * futex_wake(futex);
-+ * if (queue_empty())
-+ * return;
-+ * if (uval == val)
-+ * lock(hash_bucket(futex));
-+ * queue();
-+ * unlock(hash_bucket(futex));
-+ * schedule();
-+ *
-+ * This would cause the waiter on CPU 0 to wait forever because it
-+ * missed the transition of the user space value from val to newval
-+ * and the waker did not find the waiter in the hash bucket queue.
-+ *
-+ * The correct serialization ensures that a waiter either observes
-+ * the changed user space value before blocking or is woken by a
-+ * concurrent waker:
-+ *
-+ * CPU 0 CPU 1
-+ * val = *futex;
-+ * sys_futex(WAIT, futex, val);
-+ * futex_wait(futex, val);
-+ *
-+ * waiters++; (a)
-+ * smp_mb(); (A) <-- paired with -.
-+ * |
-+ * lock(hash_bucket(futex)); |
-+ * |
-+ * uval = *futex; |
-+ * | *futex = newval;
-+ * | sys_futex(WAKE, futex);
-+ * | futex_wake(futex);
-+ * |
-+ * `--------> smp_mb(); (B)
-+ * if (uval == val)
-+ * queue();
-+ * unlock(hash_bucket(futex));
-+ * schedule(); if (waiters)
-+ * lock(hash_bucket(futex));
-+ * else wake_waiters(futex);
-+ * waiters--; (b) unlock(hash_bucket(futex));
-+ *
-+ * Where (A) orders the waiters increment and the futex value read through
-+ * atomic operations (see hb_waiters_inc) and where (B) orders the write
-+ * to futex and the waiters read (see hb_waiters_pending()).
-+ *
-+ * This yields the following case (where X:=waiters, Y:=futex):
-+ *
-+ * X = Y = 0
-+ *
-+ * w[X]=1 w[Y]=1
-+ * MB MB
-+ * r[Y]=y r[X]=x
-+ *
-+ * Which guarantees that x==0 && y==0 is impossible; which translates back into
-+ * the guarantee that we cannot both miss the futex variable change and the
-+ * enqueue.
-+ *
-+ * Note that a new waiter is accounted for in (a) even when it is possible that
-+ * the wait call can return error, in which case we backtrack from it in (b).
-+ * Refer to the comment in queue_lock().
-+ *
-+ * Similarly, in order to account for waiters being requeued on another
-+ * address we always increment the waiters for the destination bucket before
-+ * acquiring the lock. It then decrements them again after releasing it -
-+ * the code that actually moves the futex(es) between hash buckets (requeue_futex)
-+ * will do the additional required waiter count housekeeping. This is done for
-+ * double_lock_hb() and double_unlock_hb(), respectively.
-+ */
-+
-+#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
-+#define futex_cmpxchg_enabled 1
-+#else
-+static int __read_mostly futex_cmpxchg_enabled;
-+#endif
-+
-+/*
-+ * Futex flags used to encode options to functions and preserve them across
-+ * restarts.
-+ */
-+#ifdef CONFIG_MMU
-+# define FLAGS_SHARED 0x01
-+#else
-+/*
-+ * NOMMU does not have per process address space. Let the compiler optimize
-+ * code away.
-+ */
-+# define FLAGS_SHARED 0x00
-+#endif
-+#define FLAGS_CLOCKRT 0x02
-+#define FLAGS_HAS_TIMEOUT 0x04
-+
-+/*
-+ * Priority Inheritance state:
-+ */
-+struct futex_pi_state {
-+ /*
-+ * list of 'owned' pi_state instances - these have to be
-+ * cleaned up in do_exit() if the task exits prematurely:
-+ */
-+ struct list_head list;
-+
-+ /*
-+ * The PI object:
-+ */
-+ struct rt_mutex pi_mutex;
-+
-+ struct task_struct *owner;
-+ refcount_t refcount;
-+
-+ union futex_key key;
-+} __randomize_layout;
-+
-+/**
-+ * struct futex_q - The hashed futex queue entry, one per waiting task
-+ * @list: priority-sorted list of tasks waiting on this futex
-+ * @task: the task waiting on the futex
-+ * @lock_ptr: the hash bucket lock
-+ * @key: the key the futex is hashed on
-+ * @pi_state: optional priority inheritance state
-+ * @rt_waiter: rt_waiter storage for use with requeue_pi
-+ * @requeue_pi_key: the requeue_pi target futex key
-+ * @bitset: bitset for the optional bitmasked wakeup
-+ *
-+ * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
-+ * we can wake only the relevant ones (hashed queues may be shared).
++/******************************************************************************
+ *
-+ * A futex_q has a woken state, just like tasks have TASK_RUNNING.
-+ * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
-+ * The order of wakeup is always to make the first condition true, then
-+ * the second.
++ * Copyright Collabora Ltd., 2020
+ *
-+ * PI futexes are typically woken before they are removed from the hash list via
-+ * the rt_mutex code. See unqueue_me_pi().
-+ */
-+struct futex_q {
-+ struct plist_node list;
-+
-+ struct task_struct *task;
-+ spinlock_t *lock_ptr;
-+ union futex_key key;
-+ struct futex_pi_state *pi_state;
-+ struct rt_mutex_waiter *rt_waiter;
-+ union futex_key *requeue_pi_key;
-+ u32 bitset;
-+} __randomize_layout;
-+
-+static const struct futex_q futex_q_init = {
-+ /* list gets initialized in queue_me()*/
-+ .key = FUTEX_KEY_INIT,
-+ .bitset = FUTEX_BITSET_MATCH_ANY
-+};
-+
-+/*
-+ * Hash buckets are shared by all the futex_keys that hash to the same
-+ * location. Each key may have multiple futex_q structures, one for each task
-+ * waiting on a futex.
-+ */
-+struct futex_hash_bucket {
-+ atomic_t waiters;
-+ spinlock_t lock;
-+ struct plist_head chain;
-+} ____cacheline_aligned_in_smp;
-+
-+/*
-+ * The base of the bucket array and its size are always used together
-+ * (after initialization only in hash_futex()), so ensure that they
-+ * reside in the same cacheline.
-+ */
-+static struct {
-+ struct futex_hash_bucket *queues;
-+ unsigned long hashsize;
-+} __futex_data __read_mostly __aligned(2*sizeof(long));
-+#define futex_queues (__futex_data.queues)
-+#define futex_hashsize (__futex_data.hashsize)
-+
-+
-+/*
-+ * Fault injections for futexes.
-+ */
-+#ifdef CONFIG_FAIL_FUTEX
-+
-+static struct {
-+ struct fault_attr attr;
-+
-+ bool ignore_private;
-+} fail_futex = {
-+ .attr = FAULT_ATTR_INITIALIZER,
-+ .ignore_private = false,
-+};
-+
-+static int __init setup_fail_futex(char *str)
-+{
-+ return setup_fault_attr(&fail_futex.attr, str);
-+}
-+__setup("fail_futex=", setup_fail_futex);
-+
-+static bool should_fail_futex(bool fshared)
-+{
-+ if (fail_futex.ignore_private && !fshared)
-+ return false;
-+
-+ return should_fail(&fail_futex.attr, 1);
-+}
-+
-+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-+
-+static int __init fail_futex_debugfs(void)
-+{
-+ umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
-+ struct dentry *dir;
-+
-+ dir = fault_create_debugfs_attr("fail_futex", NULL,
-+ &fail_futex.attr);
-+ if (IS_ERR(dir))
-+ return PTR_ERR(dir);
-+
-+ debugfs_create_bool("ignore-private", mode, dir,
-+ &fail_futex.ignore_private);
-+ return 0;
-+}
-+
-+late_initcall(fail_futex_debugfs);
-+
-+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
-+
-+#else
-+static inline bool should_fail_futex(bool fshared)
-+{
-+ return false;
-+}
-+#endif /* CONFIG_FAIL_FUTEX */
-+
-+/*
-+ * Reflects a new waiter being added to the waitqueue.
-+ */
-+static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
-+{
-+#ifdef CONFIG_SMP
-+ atomic_inc(&hb->waiters);
-+ /*
-+ * Full barrier (A), see the ordering comment above.
-+ */
-+ smp_mb__after_atomic();
-+#endif
-+}
-+
-+/*
-+ * Reflects a waiter being removed from the waitqueue by wakeup
-+ * paths.
-+ */
-+static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
-+{
-+#ifdef CONFIG_SMP
-+ atomic_dec(&hb->waiters);
-+#endif
-+}
-+
-+static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
-+{
-+#ifdef CONFIG_SMP
-+ /*
-+ * Full barrier (B), see the ordering comment above.
-+ */
-+ smp_mb();
-+ return atomic_read(&hb->waiters);
-+#else
-+ return 1;
-+#endif
-+}
-+
-+/**
-+ * hash_futex - Return the hash bucket in the global hash
-+ * @key: Pointer to the futex key for which the hash is calculated
++ * DESCRIPTION
++ * Test waitv/wake mechanism of futex2, using 32bit sized futexes.
+ *
-+ * We hash on the keys returned from get_futex_key (see below) and return the
-+ * corresponding hash bucket in the global hash.
-+ */
-+static struct futex_hash_bucket *hash_futex(union futex_key *key)
-+{
-+ u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
-+ key->both.offset);
-+
-+ return &futex_queues[hash & (futex_hashsize - 1)];
-+}
-+
-+
-+/**
-+ * match_futex - Check whether two futex keys are equal
-+ * @key1: Pointer to key1
-+ * @key2: Pointer to key2
++ * AUTHOR
++ * André Almeida <andrealmeid@collabora.com>
+ *
-+ * Return 1 if two futex_keys are equal, 0 otherwise.
-+ */
-+static inline int match_futex(union futex_key *key1, union futex_key *key2)
-+{
-+ return (key1 && key2
-+ && key1->both.word == key2->both.word
-+ && key1->both.ptr == key2->both.ptr
-+ && key1->both.offset == key2->both.offset);
-+}
-+
-+enum futex_access {
-+ FUTEX_READ,
-+ FUTEX_WRITE
-+};
-+
-+/**
-+ * futex_setup_timer - set up the sleeping hrtimer.
-+ * @time: ptr to the given timeout value
-+ * @timeout: the hrtimer_sleeper structure to be set up
-+ * @flags: futex flags
-+ * @range_ns: optional range in ns
++ * HISTORY
++ * 2020-Jul-9: Initial version by André <andrealmeid@collabora.com>
+ *
-+ * Return: Initialized hrtimer_sleeper structure or NULL if no timeout
-+ * value given
-+ */
-+static inline struct hrtimer_sleeper *
-+futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
-+ int flags, u64 range_ns)
-+{
-+ if (!time)
-+ return NULL;
++ *****************************************************************************/
+
-+ hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
-+ CLOCK_REALTIME : CLOCK_MONOTONIC,
-+ HRTIMER_MODE_ABS);
-+ /*
-+ * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
-+ * effectively the same as calling hrtimer_set_expires().
-+ */
-+ hrtimer_set_expires_range_ns(&timeout->timer, *time, range_ns);
++#include <errno.h>
++#include <error.h>
++#include <getopt.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <time.h>
++#include <pthread.h>
++#include <sys/shm.h>
++#include "futex2test.h"
++#include "logging.h"
+
-+ return timeout;
-+}
++#define TEST_NAME "futex2-wait"
++#define timeout_ns 1000000000
++#define WAKE_WAIT_US 10000
++#define NR_FUTEXES 30
++struct futex_waitv waitv[NR_FUTEXES];
++u_int32_t futexes[NR_FUTEXES] = {0};
+
-+/*
-+ * Generate a machine wide unique identifier for this inode.
-+ *
-+ * This relies on u64 not wrapping in the life-time of the machine; which with
-+ * 1ns resolution means almost 585 years.
-+ *
-+ * This further relies on the fact that a well formed program will not unmap
-+ * the file while it has a (shared) futex waiting on it. This mapping will have
-+ * a file reference which pins the mount and inode.
-+ *
-+ * If for some reason an inode gets evicted and read back in again, it will get
-+ * a new sequence number and will _NOT_ match, even though it is the exact same
-+ * file.
-+ *
-+ * It is important that match_futex() will never have a false-positive, esp.
-+ * for PI futexes that can mess up the state. The above argues that false-negatives
-+ * are only possible for malformed programs.
-+ */
-+static u64 get_inode_sequence_number(struct inode *inode)
++void usage(char *prog)
+{
-+ static atomic64_t i_seq;
-+ u64 old;
-+
-+ /* Does the inode already have a sequence number? */
-+ old = atomic64_read(&inode->i_sequence);
-+ if (likely(old))
-+ return old;
-+
-+ for (;;) {
-+ u64 new = atomic64_add_return(1, &i_seq);
-+ if (WARN_ON_ONCE(!new))
-+ continue;
-+
-+ old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
-+ if (old)
-+ return old;
-+ return new;
-+ }
++ printf("Usage: %s\n", prog);
++ printf(" -c Use color\n");
++ printf(" -h Display this help message\n");
++ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
++ VQUIET, VCRITICAL, VINFO);
+}
+
-+/**
-+ * get_futex_key() - Get parameters which are the keys for a futex
-+ * @uaddr: virtual address of the futex
-+ * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
-+ * @key: address where result is stored.
-+ * @rw: mapping needs to be read/write (values: FUTEX_READ,
-+ * FUTEX_WRITE)
-+ *
-+ * Return: a negative error code or 0
-+ *
-+ * The key words are stored in @key on success.
-+ *
-+ * For shared mappings (when @fshared), the key is:
-+ *
-+ * ( inode->i_sequence, page->index, offset_within_page )
-+ *
-+ * [ also see get_inode_sequence_number() ]
-+ *
-+ * For private mappings (or when !@fshared), the key is:
-+ *
-+ * ( current->mm, address, 0 )
-+ *
-+ * This allows (cross process, where applicable) identification of the futex
-+ * without keeping the page pinned for the duration of the FUTEX_WAIT.
-+ *
-+ * lock_page() might sleep, the caller should not hold a spinlock.
-+ */
-+static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
-+ enum futex_access rw)
++void *waiterfn(void *arg)
+{
-+ unsigned long address = (unsigned long)uaddr;
-+ struct mm_struct *mm = current->mm;
-+ struct page *page, *tail;
-+ struct address_space *mapping;
-+ int err, ro = 0;
-+
-+ /*
-+ * The futex address must be "naturally" aligned.
-+ */
-+ key->both.offset = address % PAGE_SIZE;
-+ if (unlikely((address % sizeof(u32)) != 0))
-+ return -EINVAL;
-+ address -= key->both.offset;
-+
-+ if (unlikely(!access_ok(uaddr, sizeof(u32))))
-+ return -EFAULT;
-+
-+ if (unlikely(should_fail_futex(fshared)))
-+ return -EFAULT;
-+
-+ /*
-+ * PROCESS_PRIVATE futexes are fast.
-+ * As the mm cannot disappear under us and the 'key' only needs
-+ * virtual address, we dont even have to find the underlying vma.
-+ * Note : We do have to check 'uaddr' is a valid user address,
-+ * but access_ok() should be faster than find_vma()
-+ */
-+ if (!fshared) {
-+ key->private.mm = mm;
-+ key->private.address = address;
-+ return 0;
-+ }
-+
-+again:
-+ /* Ignore any VERIFY_READ mapping (futex common case) */
-+ if (unlikely(should_fail_futex(true)))
-+ return -EFAULT;
-+
-+ err = get_user_pages_fast(address, 1, FOLL_WRITE, &page);
-+ /*
-+ * If write access is not required (eg. FUTEX_WAIT), try
-+ * and get read-only access.
-+ */
-+ if (err == -EFAULT && rw == FUTEX_READ) {
-+ err = get_user_pages_fast(address, 1, 0, &page);
-+ ro = 1;
-+ }
-+ if (err < 0)
-+ return err;
-+ else
-+ err = 0;
-+
-+ /*
-+ * The treatment of mapping from this point on is critical. The page
-+ * lock protects many things but in this context the page lock
-+ * stabilizes mapping, prevents inode freeing in the shared
-+ * file-backed region case and guards against movement to swap cache.
-+ *
-+ * Strictly speaking the page lock is not needed in all cases being
-+ * considered here and page lock forces unnecessarily serialization
-+ * From this point on, mapping will be re-verified if necessary and
-+ * page lock will be acquired only if it is unavoidable
-+ *
-+ * Mapping checks require the head page for any compound page so the
-+ * head page and mapping is looked up now. For anonymous pages, it
-+ * does not matter if the page splits in the future as the key is
-+ * based on the address. For filesystem-backed pages, the tail is
-+ * required as the index of the page determines the key. For
-+ * base pages, there is no tail page and tail == page.
-+ */
-+ tail = page;
-+ page = compound_head(page);
-+ mapping = READ_ONCE(page->mapping);
-+
-+ /*
-+ * If page->mapping is NULL, then it cannot be a PageAnon
-+ * page; but it might be the ZERO_PAGE or in the gate area or
-+ * in a special mapping (all cases which we are happy to fail);
-+ * or it may have been a good file page when get_user_pages_fast
-+ * found it, but truncated or holepunched or subjected to
-+ * invalidate_complete_page2 before we got the page lock (also
-+ * cases which we are happy to fail). And we hold a reference,
-+ * so refcount care in invalidate_complete_page's remove_mapping
-+ * prevents drop_caches from setting mapping to NULL beneath us.
-+ *
-+ * The case we do have to guard against is when memory pressure made
-+ * shmem_writepage move it from filecache to swapcache beneath us:
-+ * an unlikely race, but we do need to retry for page->mapping.
-+ */
-+ if (unlikely(!mapping)) {
-+ int shmem_swizzled;
-+
-+ /*
-+ * Page lock is required to identify which special case above
-+ * applies. If this is really a shmem page then the page lock
-+ * will prevent unexpected transitions.
-+ */
-+ lock_page(page);
-+ shmem_swizzled = PageSwapCache(page) || page->mapping;
-+ unlock_page(page);
-+ put_page(page);
-+
-+ if (shmem_swizzled)
-+ goto again;
-+
-+ return -EFAULT;
-+ }
-+
-+ /*
-+ * Private mappings are handled in a simple way.
-+ *
-+ * If the futex key is stored on an anonymous page, then the associated
-+ * object is the mm which is implicitly pinned by the calling process.
-+ *
-+ * NOTE: When userspace waits on a MAP_SHARED mapping, even if
-+ * it's a read-only handle, it's expected that futexes attach to
-+ * the object not the particular process.
-+ */
-+ if (PageAnon(page)) {
-+ /*
-+ * A RO anonymous page will never change and thus doesn't make
-+ * sense for futex operations.
-+ */
-+ if (unlikely(should_fail_futex(true)) || ro) {
-+ err = -EFAULT;
-+ goto out;
-+ }
-+
-+ key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
-+ key->private.mm = mm;
-+ key->private.address = address;
-+
-+ } else {
-+ struct inode *inode;
-+
-+ /*
-+ * The associated futex object in this case is the inode and
-+ * the page->mapping must be traversed. Ordinarily this should
-+ * be stabilised under page lock but it's not strictly
-+ * necessary in this case as we just want to pin the inode, not
-+ * update the radix tree or anything like that.
-+ *
-+ * The RCU read lock is taken as the inode is finally freed
-+ * under RCU. If the mapping still matches expectations then the
-+ * mapping->host can be safely accessed as being a valid inode.
-+ */
-+ rcu_read_lock();
-+
-+ if (READ_ONCE(page->mapping) != mapping) {
-+ rcu_read_unlock();
-+ put_page(page);
-+
-+ goto again;
-+ }
++ struct timespec64 to64;
++ int res;
+
-+ inode = READ_ONCE(mapping->host);
-+ if (!inode) {
-+ rcu_read_unlock();
-+ put_page(page);
++ /* setting absolute timeout for futex2 */
++ if (gettime64(CLOCK_MONOTONIC, &to64))
++ error("gettime64 failed\n", errno);
+
-+ goto again;
-+ }
++ to64.tv_sec++;
+
-+ key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-+ key->shared.i_seq = get_inode_sequence_number(inode);
-+ key->shared.pgoff = basepage_index(tail);
-+ rcu_read_unlock();
++ res = futex2_waitv(waitv, NR_FUTEXES, 0, &to64);
++ if (res < 0) {
++ printf("waiter failed errno %d %s\n",
++ res ? errno : res,
++ res ? strerror(errno) : "");
+ }
+
-+out:
-+ put_page(page);
-+ return err;
-+}
-+
-+/**
-+ * fault_in_user_writeable() - Fault in user address and verify RW access
-+ * @uaddr: pointer to faulting user space address
-+ *
-+ * Slow path to fixup the fault we just took in the atomic write
-+ * access to @uaddr.
-+ *
-+ * We have no generic implementation of a non-destructive write to the
-+ * user address. We know that we faulted in the atomic pagefault
-+ * disabled section so we can as well avoid the #PF overhead by
-+ * calling get_user_pages() right away.
-+ */
-+static int fault_in_user_writeable(u32 __user *uaddr)
-+{
-+ struct mm_struct *mm = current->mm;
-+ int ret;
-+
-+ mmap_read_lock(mm);
-+ ret = fixup_user_fault(mm, (unsigned long)uaddr,
-+ FAULT_FLAG_WRITE, NULL);
-+ mmap_read_unlock(mm);
-+
-+ return ret < 0 ? ret : 0;
-+}
-+
-+/**
-+ * futex_top_waiter() - Return the highest priority waiter on a futex
-+ * @hb: the hash bucket the futex_q's reside in
-+ * @key: the futex key (to distinguish it from other futex futex_q's)
-+ *
-+ * Must be called with the hb lock held.
-+ */
-+static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
-+ union futex_key *key)
-+{
-+ struct futex_q *this;
-+
-+ plist_for_each_entry(this, &hb->chain, list) {
-+ if (match_futex(&this->key, key))
-+ return this;
-+ }
+ return NULL;
+}
+
-+static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
-+ u32 uval, u32 newval)
-+{
-+ int ret;
-+
-+ pagefault_disable();
-+ ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
-+ pagefault_enable();
-+
-+ return ret;
-+}
-+
-+static int get_futex_value_locked(u32 *dest, u32 __user *from)
-+{
-+ int ret;
-+
-+ pagefault_disable();
-+ ret = __get_user(*dest, from);
-+ pagefault_enable();
-+
-+ return ret ? -EFAULT : 0;
-+}
-+
-+
-+/*
-+ * PI code:
-+ */
-+static int refill_pi_state_cache(void)
-+{
-+ struct futex_pi_state *pi_state;
-+
-+ if (likely(current->pi_state_cache))
-+ return 0;
-+
-+ pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
-+
-+ if (!pi_state)
-+ return -ENOMEM;
-+
-+ INIT_LIST_HEAD(&pi_state->list);
-+ /* pi_mutex gets initialized later */
-+ pi_state->owner = NULL;
-+ refcount_set(&pi_state->refcount, 1);
-+ pi_state->key = FUTEX_KEY_INIT;
-+
-+ current->pi_state_cache = pi_state;
-+
-+ return 0;
-+}
-+
-+static struct futex_pi_state *alloc_pi_state(void)
-+{
-+ struct futex_pi_state *pi_state = current->pi_state_cache;
-+
-+ WARN_ON(!pi_state);
-+ current->pi_state_cache = NULL;
-+
-+ return pi_state;
-+}
-+
-+static void get_pi_state(struct futex_pi_state *pi_state)
-+{
-+ WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
-+}
-+
-+/*
-+ * Drops a reference to the pi_state object and frees or caches it
-+ * when the last reference is gone.
-+ */
-+static void put_pi_state(struct futex_pi_state *pi_state)
-+{
-+ if (!pi_state)
-+ return;
-+
-+ if (!refcount_dec_and_test(&pi_state->refcount))
-+ return;
-+
-+ /*
-+ * If pi_state->owner is NULL, the owner is most probably dying
-+ * and has cleaned up the pi_state already
-+ */
-+ if (pi_state->owner) {
-+ struct task_struct *owner;
-+
-+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+ owner = pi_state->owner;
-+ if (owner) {
-+ raw_spin_lock(&owner->pi_lock);
-+ list_del_init(&pi_state->list);
-+ raw_spin_unlock(&owner->pi_lock);
-+ }
-+ rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+ }
-+
-+ if (current->pi_state_cache) {
-+ kfree(pi_state);
-+ } else {
-+ /*
-+ * pi_state->list is already empty.
-+ * clear pi_state->owner.
-+ * refcount is at 0 - put it back to 1.
-+ */
-+ pi_state->owner = NULL;
-+ refcount_set(&pi_state->refcount, 1);
-+ current->pi_state_cache = pi_state;
-+ }
-+}
-+
-+/*
-+ * We need to check the following states:
-+ *
-+ * Waiter | pi_state | pi->owner | uTID | uODIED | ?
-+ *
-+ * [1] NULL | --- | --- | 0 | 0/1 | Valid
-+ * [2] NULL | --- | --- | >0 | 0/1 | Valid
-+ *
-+ * [3] Found | NULL | -- | Any | 0/1 | Invalid
-+ *
-+ * [4] Found | Found | NULL | 0 | 1 | Valid
-+ * [5] Found | Found | NULL | >0 | 1 | Invalid
-+ *
-+ * [6] Found | Found | task | 0 | 1 | Valid
-+ *
-+ * [7] Found | Found | NULL | Any | 0 | Invalid
-+ *
-+ * [8] Found | Found | task | ==taskTID | 0/1 | Valid
-+ * [9] Found | Found | task | 0 | 0 | Invalid
-+ * [10] Found | Found | task | !=taskTID | 0/1 | Invalid
-+ *
-+ * [1] Indicates that the kernel can acquire the futex atomically. We
-+ * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
-+ *
-+ * [2] Valid, if TID does not belong to a kernel thread. If no matching
-+ * thread is found then it indicates that the owner TID has died.
-+ *
-+ * [3] Invalid. The waiter is queued on a non PI futex
-+ *
-+ * [4] Valid state after exit_robust_list(), which sets the user space
-+ * value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
-+ *
-+ * [5] The user space value got manipulated between exit_robust_list()
-+ * and exit_pi_state_list()
-+ *
-+ * [6] Valid state after exit_pi_state_list() which sets the new owner in
-+ * the pi_state but cannot access the user space value.
-+ *
-+ * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
-+ *
-+ * [8] Owner and user space value match
-+ *
-+ * [9] There is no transient state which sets the user space TID to 0
-+ * except exit_robust_list(), but this is indicated by the
-+ * FUTEX_OWNER_DIED bit. See [4]
-+ *
-+ * [10] There is no transient state which leaves owner and user space
-+ * TID out of sync.
-+ *
-+ *
-+ * Serialization and lifetime rules:
-+ *
-+ * hb->lock:
-+ *
-+ * hb -> futex_q, relation
-+ * futex_q -> pi_state, relation
-+ *
-+ * (cannot be raw because hb can contain arbitrary amount
-+ * of futex_q's)
-+ *
-+ * pi_mutex->wait_lock:
-+ *
-+ * {uval, pi_state}
-+ *
-+ * (and pi_mutex 'obviously')
-+ *
-+ * p->pi_lock:
-+ *
-+ * p->pi_state_list -> pi_state->list, relation
-+ *
-+ * pi_state->refcount:
-+ *
-+ * pi_state lifetime
-+ *
-+ *
-+ * Lock order:
-+ *
-+ * hb->lock
-+ * pi_mutex->wait_lock
-+ * p->pi_lock
-+ *
-+ */
-+
-+/*
-+ * Validate that the existing waiter has a pi_state and sanity check
-+ * the pi_state against the user space value. If correct, attach to
-+ * it.
-+ */
-+static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
-+ struct futex_pi_state *pi_state,
-+ struct futex_pi_state **ps)
-+{
-+ pid_t pid = uval & FUTEX_TID_MASK;
-+ u32 uval2;
-+ int ret;
-+
-+ /*
-+ * Userspace might have messed up non-PI and PI futexes [3]
-+ */
-+ if (unlikely(!pi_state))
-+ return -EINVAL;
-+
-+ /*
-+ * We get here with hb->lock held, and having found a
-+ * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
-+ * has dropped the hb->lock in between queue_me() and unqueue_me_pi(),
-+ * which in turn means that futex_lock_pi() still has a reference on
-+ * our pi_state.
-+ *
-+ * The waiter holding a reference on @pi_state also protects against
-+ * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
-+ * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
-+ * free pi_state before we can take a reference ourselves.
-+ */
-+ WARN_ON(!refcount_read(&pi_state->refcount));
-+
-+ /*
-+ * Now that we have a pi_state, we can acquire wait_lock
-+ * and do the state validation.
-+ */
-+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+
-+ /*
-+ * Since {uval, pi_state} is serialized by wait_lock, and our current
-+ * uval was read without holding it, it can have changed. Verify it
-+ * still is what we expect it to be, otherwise retry the entire
-+ * operation.
-+ */
-+ if (get_futex_value_locked(&uval2, uaddr))
-+ goto out_efault;
-+
-+ if (uval != uval2)
-+ goto out_eagain;
-+
-+ /*
-+ * Handle the owner died case:
-+ */
-+ if (uval & FUTEX_OWNER_DIED) {
-+ /*
-+ * exit_pi_state_list sets owner to NULL and wakes the
-+ * topmost waiter. The task which acquires the
-+ * pi_state->rt_mutex will fixup owner.
-+ */
-+ if (!pi_state->owner) {
-+ /*
-+ * No pi state owner, but the user space TID
-+ * is not 0. Inconsistent state. [5]
-+ */
-+ if (pid)
-+ goto out_einval;
-+ /*
-+ * Take a ref on the state and return success. [4]
-+ */
-+ goto out_attach;
-+ }
-+
-+ /*
-+ * If TID is 0, then either the dying owner has not
-+ * yet executed exit_pi_state_list() or some waiter
-+ * acquired the rtmutex in the pi state, but did not
-+ * yet fixup the TID in user space.
-+ *
-+ * Take a ref on the state and return success. [6]
-+ */
-+ if (!pid)
-+ goto out_attach;
-+ } else {
-+ /*
-+ * If the owner died bit is not set, then the pi_state
-+ * must have an owner. [7]
-+ */
-+ if (!pi_state->owner)
-+ goto out_einval;
-+ }
-+
-+ /*
-+ * Bail out if user space manipulated the futex value. If pi
-+ * state exists then the owner TID must be the same as the
-+ * user space TID. [9/10]
-+ */
-+ if (pid != task_pid_vnr(pi_state->owner))
-+ goto out_einval;
-+
-+out_attach:
-+ get_pi_state(pi_state);
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+ *ps = pi_state;
-+ return 0;
-+
-+out_einval:
-+ ret = -EINVAL;
-+ goto out_error;
-+
-+out_eagain:
-+ ret = -EAGAIN;
-+ goto out_error;
-+
-+out_efault:
-+ ret = -EFAULT;
-+ goto out_error;
-+
-+out_error:
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+ return ret;
-+}
-+
-+/**
-+ * wait_for_owner_exiting - Block until the owner has exited
-+ * @ret: owner's current futex lock status
-+ * @exiting: Pointer to the exiting task
-+ *
-+ * Caller must hold a refcount on @exiting.
-+ */
-+static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
-+{
-+ if (ret != -EBUSY) {
-+ WARN_ON_ONCE(exiting);
-+ return;
-+ }
-+
-+ if (WARN_ON_ONCE(ret == -EBUSY && !exiting))
-+ return;
-+
-+ mutex_lock(&exiting->futex_exit_mutex);
-+ /*
-+ * No point in doing state checking here. If the waiter got here
-+ * while the task was in exec()->exec_futex_release() then it can
-+ * have any FUTEX_STATE_* value when the waiter has acquired the
-+ * mutex. OK, if running, EXITING or DEAD if it reached exit()
-+ * already. Highly unlikely and not a problem. Just one more round
-+ * through the futex maze.
-+ */
-+ mutex_unlock(&exiting->futex_exit_mutex);
-+
-+ put_task_struct(exiting);
-+}
-+
-+static int handle_exit_race(u32 __user *uaddr, u32 uval,
-+ struct task_struct *tsk)
-+{
-+ u32 uval2;
-+
-+ /*
-+ * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
-+ * caller that the alleged owner is busy.
-+ */
-+ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
-+ return -EBUSY;
-+
-+ /*
-+ * Reread the user space value to handle the following situation:
-+ *
-+ * CPU0 CPU1
-+ *
-+ * sys_exit() sys_futex()
-+ * do_exit() futex_lock_pi()
-+ * futex_lock_pi_atomic()
-+ * exit_signals(tsk) No waiters:
-+ * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID
-+ * mm_release(tsk) Set waiter bit
-+ * exit_robust_list(tsk) { *uaddr = 0x80000PID;
-+ * Set owner died attach_to_pi_owner() {
-+ * *uaddr = 0xC0000000; tsk = get_task(PID);
-+ * } if (!tsk->flags & PF_EXITING) {
-+ * ... attach();
-+ * tsk->futex_state = } else {
-+ * FUTEX_STATE_DEAD; if (tsk->futex_state !=
-+ * FUTEX_STATE_DEAD)
-+ * return -EAGAIN;
-+ * return -ESRCH; <--- FAIL
-+ * }
-+ *
-+ * Returning ESRCH unconditionally is wrong here because the
-+ * user space value has been changed by the exiting task.
-+ *
-+ * The same logic applies to the case where the exiting task is
-+ * already gone.
-+ */
-+ if (get_futex_value_locked(&uval2, uaddr))
-+ return -EFAULT;
-+
-+ /* If the user space value has changed, try again. */
-+ if (uval2 != uval)
-+ return -EAGAIN;
-+
-+ /*
-+ * The exiting task did not have a robust list, the robust list was
-+ * corrupted or the user space value in *uaddr is simply bogus.
-+ * Give up and tell user space.
-+ */
-+ return -ESRCH;
-+}
-+
-+/*
-+ * Lookup the task for the TID provided from user space and attach to
-+ * it after doing proper sanity checks.
-+ */
-+static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
-+ struct futex_pi_state **ps,
-+ struct task_struct **exiting)
-+{
-+ pid_t pid = uval & FUTEX_TID_MASK;
-+ struct futex_pi_state *pi_state;
-+ struct task_struct *p;
-+
-+ /*
-+ * We are the first waiter - try to look up the real owner and attach
-+ * the new pi_state to it, but bail out when TID = 0 [1]
-+ *
-+ * The !pid check is paranoid. None of the call sites should end up
-+ * with pid == 0, but better safe than sorry. Let the caller retry
-+ */
-+ if (!pid)
-+ return -EAGAIN;
-+ p = find_get_task_by_vpid(pid);
-+ if (!p)
-+ return handle_exit_race(uaddr, uval, NULL);
-+
-+ if (unlikely(p->flags & PF_KTHREAD)) {
-+ put_task_struct(p);
-+ return -EPERM;
-+ }
-+
-+ /*
-+ * We need to look at the task state to figure out, whether the
-+ * task is exiting. To protect against the change of the task state
-+ * in futex_exit_release(), we do this protected by p->pi_lock:
-+ */
-+ raw_spin_lock_irq(&p->pi_lock);
-+ if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
-+ /*
-+ * The task is on the way out. When the futex state is
-+ * FUTEX_STATE_DEAD, we know that the task has finished
-+ * the cleanup:
-+ */
-+ int ret = handle_exit_race(uaddr, uval, p);
-+
-+ raw_spin_unlock_irq(&p->pi_lock);
-+ /*
-+ * If the owner task is between FUTEX_STATE_EXITING and
-+ * FUTEX_STATE_DEAD then store the task pointer and keep
-+ * the reference on the task struct. The calling code will
-+ * drop all locks, wait for the task to reach
-+ * FUTEX_STATE_DEAD and then drop the refcount. This is
-+ * required to prevent a live lock when the current task
-+ * preempted the exiting task between the two states.
-+ */
-+ if (ret == -EBUSY)
-+ *exiting = p;
-+ else
-+ put_task_struct(p);
-+ return ret;
-+ }
-+
-+ /*
-+ * No existing pi state. First waiter. [2]
-+ *
-+ * This creates pi_state, we have hb->lock held, this means nothing can
-+ * observe this state, wait_lock is irrelevant.
-+ */
-+ pi_state = alloc_pi_state();
-+
-+ /*
-+ * Initialize the pi_mutex in locked state and make @p
-+ * the owner of it:
-+ */
-+ rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
-+
-+ /* Store the key for possible exit cleanups: */
-+ pi_state->key = *key;
-+
-+ WARN_ON(!list_empty(&pi_state->list));
-+ list_add(&pi_state->list, &p->pi_state_list);
-+ /*
-+ * Assignment without holding pi_state->pi_mutex.wait_lock is safe
-+ * because there is no concurrency as the object is not published yet.
-+ */
-+ pi_state->owner = p;
-+ raw_spin_unlock_irq(&p->pi_lock);
-+
-+ put_task_struct(p);
-+
-+ *ps = pi_state;
-+
-+ return 0;
-+}
-+
-+static int lookup_pi_state(u32 __user *uaddr, u32 uval,
-+ struct futex_hash_bucket *hb,
-+ union futex_key *key, struct futex_pi_state **ps,
-+ struct task_struct **exiting)
-+{
-+ struct futex_q *top_waiter = futex_top_waiter(hb, key);
-+
-+ /*
-+ * If there is a waiter on that futex, validate it and
-+ * attach to the pi_state when the validation succeeds.
-+ */
-+ if (top_waiter)
-+ return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
-+
-+ /*
-+ * We are the first waiter - try to look up the owner based on
-+ * @uval and attach to it.
-+ */
-+ return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
-+}
-+
-+static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
-+{
-+ int err;
-+ u32 curval;
-+
-+ if (unlikely(should_fail_futex(true)))
-+ return -EFAULT;
-+
-+ err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
-+ if (unlikely(err))
-+ return err;
-+
-+ /* If user space value changed, let the caller retry */
-+ return curval != uval ? -EAGAIN : 0;
-+}
-+
-+/**
-+ * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
-+ * @uaddr: the pi futex user address
-+ * @hb: the pi futex hash bucket
-+ * @key: the futex key associated with uaddr and hb
-+ * @ps: the pi_state pointer where we store the result of the
-+ * lookup
-+ * @task: the task to perform the atomic lock work for. This will
-+ * be "current" except in the case of requeue pi.
-+ * @exiting: Pointer to store the task pointer of the owner task
-+ * which is in the middle of exiting
-+ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
-+ *
-+ * Return:
-+ * - 0 - ready to wait;
-+ * - 1 - acquired the lock;
-+ * - <0 - error
-+ *
-+ * The hb->lock and futex_key refs shall be held by the caller.
-+ *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
-+ */
-+static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
-+ union futex_key *key,
-+ struct futex_pi_state **ps,
-+ struct task_struct *task,
-+ struct task_struct **exiting,
-+ int set_waiters)
-+{
-+ u32 uval, newval, vpid = task_pid_vnr(task);
-+ struct futex_q *top_waiter;
-+ int ret;
-+
-+ /*
-+ * Read the user space value first so we can validate a few
-+ * things before proceeding further.
-+ */
-+ if (get_futex_value_locked(&uval, uaddr))
-+ return -EFAULT;
-+
-+ if (unlikely(should_fail_futex(true)))
-+ return -EFAULT;
-+
-+ /*
-+ * Detect deadlocks.
-+ */
-+ if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
-+ return -EDEADLK;
-+
-+ if ((unlikely(should_fail_futex(true))))
-+ return -EDEADLK;
-+
-+ /*
-+ * Lookup existing state first. If it exists, try to attach to
-+ * its pi_state.
-+ */
-+ top_waiter = futex_top_waiter(hb, key);
-+ if (top_waiter)
-+ return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
-+
-+ /*
-+ * No waiter and user TID is 0. We are here because the
-+ * waiters or the owner died bit is set or called from
-+ * requeue_cmp_pi or for whatever reason something took the
-+ * syscall.
-+ */
-+ if (!(uval & FUTEX_TID_MASK)) {
-+ /*
-+ * We take over the futex. No other waiters and the user space
-+ * TID is 0. We preserve the owner died bit.
-+ */
-+ newval = uval & FUTEX_OWNER_DIED;
-+ newval |= vpid;
-+
-+ /* The futex requeue_pi code can enforce the waiters bit */
-+ if (set_waiters)
-+ newval |= FUTEX_WAITERS;
-+
-+ ret = lock_pi_update_atomic(uaddr, uval, newval);
-+ /* If the take over worked, return 1 */
-+ return ret < 0 ? ret : 1;
-+ }
-+
-+ /*
-+ * First waiter. Set the waiters bit before attaching ourself to
-+ * the owner. If owner tries to unlock, it will be forced into
-+ * the kernel and blocked on hb->lock.
-+ */
-+ newval = uval | FUTEX_WAITERS;
-+ ret = lock_pi_update_atomic(uaddr, uval, newval);
-+ if (ret)
-+ return ret;
-+ /*
-+ * If the update of the user space value succeeded, we try to
-+ * attach to the owner. If that fails, no harm done, we only
-+ * set the FUTEX_WAITERS bit in the user space variable.
-+ */
-+ return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
-+}
-+
-+/**
-+ * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
-+ * @q: The futex_q to unqueue
-+ *
-+ * The q->lock_ptr must not be NULL and must be held by the caller.
-+ */
-+static void __unqueue_futex(struct futex_q *q)
-+{
-+ struct futex_hash_bucket *hb;
-+
-+ if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list)))
-+ return;
-+ lockdep_assert_held(q->lock_ptr);
-+
-+ hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
-+ plist_del(&q->list, &hb->chain);
-+ hb_waiters_dec(hb);
-+}
-+
-+/*
-+ * The hash bucket lock must be held when this is called.
-+ * Afterwards, the futex_q must not be accessed. Callers
-+ * must ensure to later call wake_up_q() for the actual
-+ * wakeups to occur.
-+ */
-+static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
-+{
-+ struct task_struct *p = q->task;
-+
-+ if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
-+ return;
-+
-+ get_task_struct(p);
-+ __unqueue_futex(q);
-+ /*
-+ * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
-+ * is written, without taking any locks. This is possible in the event
-+ * of a spurious wakeup, for example. A memory barrier is required here
-+ * to prevent the following store to lock_ptr from getting ahead of the
-+ * plist_del in __unqueue_futex().
-+ */
-+ smp_store_release(&q->lock_ptr, NULL);
-+
-+ /*
-+ * Queue the task for later wakeup for after we've released
-+ * the hb->lock.
-+ */
-+ wake_q_add_safe(wake_q, p);
-+}
-+
-+/*
-+ * Caller must hold a reference on @pi_state.
-+ */
-+static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
-+{
-+ u32 curval, newval;
-+ struct task_struct *new_owner;
-+ bool postunlock = false;
-+ DEFINE_WAKE_Q(wake_q);
-+ int ret = 0;
-+
-+ new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
-+ if (WARN_ON_ONCE(!new_owner)) {
-+ /*
-+ * As per the comment in futex_unlock_pi() this should not happen.
-+ *
-+ * When this happens, give up our locks and try again, giving
-+ * the futex_lock_pi() instance time to complete, either by
-+ * waiting on the rtmutex or removing itself from the futex
-+ * queue.
-+ */
-+ ret = -EAGAIN;
-+ goto out_unlock;
-+ }
-+
-+ /*
-+ * We pass it to the next owner. The WAITERS bit is always kept
-+ * enabled while there is PI state around. We cleanup the owner
-+ * died bit, because we are the owner.
-+ */
-+ newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
-+
-+ if (unlikely(should_fail_futex(true)))
-+ ret = -EFAULT;
-+
-+ ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
-+ if (!ret && (curval != uval)) {
-+ /*
-+ * If a unconditional UNLOCK_PI operation (user space did not
-+ * try the TID->0 transition) raced with a waiter setting the
-+ * FUTEX_WAITERS flag between get_user() and locking the hash
-+ * bucket lock, retry the operation.
-+ */
-+ if ((FUTEX_TID_MASK & curval) == uval)
-+ ret = -EAGAIN;
-+ else
-+ ret = -EINVAL;
-+ }
-+
-+ if (ret)
-+ goto out_unlock;
-+
-+ /*
-+ * This is a point of no return; once we modify the uval there is no
-+ * going back and subsequent operations must not fail.
-+ */
-+
-+ raw_spin_lock(&pi_state->owner->pi_lock);
-+ WARN_ON(list_empty(&pi_state->list));
-+ list_del_init(&pi_state->list);
-+ raw_spin_unlock(&pi_state->owner->pi_lock);
-+
-+ raw_spin_lock(&new_owner->pi_lock);
-+ WARN_ON(!list_empty(&pi_state->list));
-+ list_add(&pi_state->list, &new_owner->pi_state_list);
-+ pi_state->owner = new_owner;
-+ raw_spin_unlock(&new_owner->pi_lock);
-+
-+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
-+
-+out_unlock:
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+
-+ if (postunlock)
-+ rt_mutex_postunlock(&wake_q);
-+
-+ return ret;
-+}
-+
-+/*
-+ * Express the locking dependencies for lockdep:
-+ */
-+static inline void
-+double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
-+{
-+ if (hb1 <= hb2) {
-+ spin_lock(&hb1->lock);
-+ if (hb1 < hb2)
-+ spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
-+ } else { /* hb1 > hb2 */
-+ spin_lock(&hb2->lock);
-+ spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
-+ }
-+}
-+
-+static inline void
-+double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
-+{
-+ spin_unlock(&hb1->lock);
-+ if (hb1 != hb2)
-+ spin_unlock(&hb2->lock);
-+}
-+
-+/*
-+ * Wake up waiters matching bitset queued on this futex (uaddr).
-+ */
-+static int
-+futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
-+{
-+ struct futex_hash_bucket *hb;
-+ struct futex_q *this, *next;
-+ union futex_key key = FUTEX_KEY_INIT;
-+ int ret;
-+ DEFINE_WAKE_Q(wake_q);
-+
-+ if (!bitset)
-+ return -EINVAL;
-+
-+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
-+ if (unlikely(ret != 0))
-+ return ret;
-+
-+ hb = hash_futex(&key);
-+
-+ /* Make sure we really have tasks to wakeup */
-+ if (!hb_waiters_pending(hb))
-+ return ret;
-+
-+ spin_lock(&hb->lock);
-+
-+ plist_for_each_entry_safe(this, next, &hb->chain, list) {
-+ if (match_futex (&this->key, &key)) {
-+ if (this->pi_state || this->rt_waiter) {
-+ ret = -EINVAL;
-+ break;
-+ }
-+
-+ /* Check if one of the bits is set in both bitsets */
-+ if (!(this->bitset & bitset))
-+ continue;
-+
-+ mark_wake_futex(&wake_q, this);
-+ if (++ret >= nr_wake)
-+ break;
-+ }
-+ }
-+
-+ spin_unlock(&hb->lock);
-+ wake_up_q(&wake_q);
-+ return ret;
-+}
-+
-+static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
-+{
-+ unsigned int op = (encoded_op & 0x70000000) >> 28;
-+ unsigned int cmp = (encoded_op & 0x0f000000) >> 24;
-+ int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
-+ int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
-+ int oldval, ret;
-+
-+ if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
-+ if (oparg < 0 || oparg > 31) {
-+ char comm[sizeof(current->comm)];
-+ /*
-+ * kill this print and return -EINVAL when userspace
-+ * is sane again
-+ */
-+ pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
-+ get_task_comm(comm, current), oparg);
-+ oparg &= 31;
-+ }
-+ oparg = 1 << oparg;
-+ }
-+
-+ pagefault_disable();
-+ ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
-+ pagefault_enable();
-+ if (ret)
-+ return ret;
-+
-+ switch (cmp) {
-+ case FUTEX_OP_CMP_EQ:
-+ return oldval == cmparg;
-+ case FUTEX_OP_CMP_NE:
-+ return oldval != cmparg;
-+ case FUTEX_OP_CMP_LT:
-+ return oldval < cmparg;
-+ case FUTEX_OP_CMP_GE:
-+ return oldval >= cmparg;
-+ case FUTEX_OP_CMP_LE:
-+ return oldval <= cmparg;
-+ case FUTEX_OP_CMP_GT:
-+ return oldval > cmparg;
-+ default:
-+ return -ENOSYS;
-+ }
-+}
-+
-+/*
-+ * Wake up all waiters hashed on the physical page that is mapped
-+ * to this virtual address:
-+ */
-+static int
-+futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
-+ int nr_wake, int nr_wake2, int op)
-+{
-+ union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
-+ struct futex_hash_bucket *hb1, *hb2;
-+ struct futex_q *this, *next;
-+ int ret, op_ret;
-+ DEFINE_WAKE_Q(wake_q);
-+
-+retry:
-+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
-+ if (unlikely(ret != 0))
-+ return ret;
-+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
-+ if (unlikely(ret != 0))
-+ return ret;
-+
-+ hb1 = hash_futex(&key1);
-+ hb2 = hash_futex(&key2);
-+
-+retry_private:
-+ double_lock_hb(hb1, hb2);
-+ op_ret = futex_atomic_op_inuser(op, uaddr2);
-+ if (unlikely(op_ret < 0)) {
-+ double_unlock_hb(hb1, hb2);
-+
-+ if (!IS_ENABLED(CONFIG_MMU) ||
-+ unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
-+ /*
-+ * we don't get EFAULT from MMU faults if we don't have
-+ * an MMU, but we might get them from range checking
-+ */
-+ ret = op_ret;
-+ return ret;
-+ }
-+
-+ if (op_ret == -EFAULT) {
-+ ret = fault_in_user_writeable(uaddr2);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ if (!(flags & FLAGS_SHARED)) {
-+ cond_resched();
-+ goto retry_private;
-+ }
-+
-+ cond_resched();
-+ goto retry;
-+ }
-+
-+ plist_for_each_entry_safe(this, next, &hb1->chain, list) {
-+ if (match_futex (&this->key, &key1)) {
-+ if (this->pi_state || this->rt_waiter) {
-+ ret = -EINVAL;
-+ goto out_unlock;
-+ }
-+ mark_wake_futex(&wake_q, this);
-+ if (++ret >= nr_wake)
-+ break;
-+ }
-+ }
-+
-+ if (op_ret > 0) {
-+ op_ret = 0;
-+ plist_for_each_entry_safe(this, next, &hb2->chain, list) {
-+ if (match_futex (&this->key, &key2)) {
-+ if (this->pi_state || this->rt_waiter) {
-+ ret = -EINVAL;
-+ goto out_unlock;
-+ }
-+ mark_wake_futex(&wake_q, this);
-+ if (++op_ret >= nr_wake2)
-+ break;
-+ }
-+ }
-+ ret += op_ret;
-+ }
-+
-+out_unlock:
-+ double_unlock_hb(hb1, hb2);
-+ wake_up_q(&wake_q);
-+ return ret;
-+}
-+
-+/**
-+ * requeue_futex() - Requeue a futex_q from one hb to another
-+ * @q: the futex_q to requeue
-+ * @hb1: the source hash_bucket
-+ * @hb2: the target hash_bucket
-+ * @key2: the new key for the requeued futex_q
-+ */
-+static inline
-+void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
-+ struct futex_hash_bucket *hb2, union futex_key *key2)
-+{
-+
-+ /*
-+ * If key1 and key2 hash to the same bucket, no need to
-+ * requeue.
-+ */
-+ if (likely(&hb1->chain != &hb2->chain)) {
-+ plist_del(&q->list, &hb1->chain);
-+ hb_waiters_dec(hb1);
-+ hb_waiters_inc(hb2);
-+ plist_add(&q->list, &hb2->chain);
-+ q->lock_ptr = &hb2->lock;
-+ }
-+ q->key = *key2;
-+}
-+
-+/**
-+ * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
-+ * @q: the futex_q
-+ * @key: the key of the requeue target futex
-+ * @hb: the hash_bucket of the requeue target futex
-+ *
-+ * During futex_requeue, with requeue_pi=1, it is possible to acquire the
-+ * target futex if it is uncontended or via a lock steal. Set the futex_q key
-+ * to the requeue target futex so the waiter can detect the wakeup on the right
-+ * futex, but remove it from the hb and NULL the rt_waiter so it can detect
-+ * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
-+ * to protect access to the pi_state to fixup the owner later. Must be called
-+ * with both q->lock_ptr and hb->lock held.
-+ */
-+static inline
-+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
-+ struct futex_hash_bucket *hb)
-+{
-+ q->key = *key;
-+
-+ __unqueue_futex(q);
-+
-+ WARN_ON(!q->rt_waiter);
-+ q->rt_waiter = NULL;
-+
-+ q->lock_ptr = &hb->lock;
-+
-+ wake_up_state(q->task, TASK_NORMAL);
-+}
-+
-+/**
-+ * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
-+ * @pifutex: the user address of the to futex
-+ * @hb1: the from futex hash bucket, must be locked by the caller
-+ * @hb2: the to futex hash bucket, must be locked by the caller
-+ * @key1: the from futex key
-+ * @key2: the to futex key
-+ * @ps: address to store the pi_state pointer
-+ * @exiting: Pointer to store the task pointer of the owner task
-+ * which is in the middle of exiting
-+ * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
-+ *
-+ * Try and get the lock on behalf of the top waiter if we can do it atomically.
-+ * Wake the top waiter if we succeed. If the caller specified set_waiters,
-+ * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
-+ * hb1 and hb2 must be held by the caller.
-+ *
-+ * @exiting is only set when the return value is -EBUSY. If so, this holds
-+ * a refcount on the exiting task on return and the caller needs to drop it
-+ * after waiting for the exit to complete.
-+ *
-+ * Return:
-+ * - 0 - failed to acquire the lock atomically;
-+ * - >0 - acquired the lock, return value is vpid of the top_waiter
-+ * - <0 - error
-+ */
-+static int
-+futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
-+ struct futex_hash_bucket *hb2, union futex_key *key1,
-+ union futex_key *key2, struct futex_pi_state **ps,
-+ struct task_struct **exiting, int set_waiters)
-+{
-+ struct futex_q *top_waiter = NULL;
-+ u32 curval;
-+ int ret, vpid;
-+
-+ if (get_futex_value_locked(&curval, pifutex))
-+ return -EFAULT;
-+
-+ if (unlikely(should_fail_futex(true)))
-+ return -EFAULT;
-+
-+ /*
-+ * Find the top_waiter and determine if there are additional waiters.
-+ * If the caller intends to requeue more than 1 waiter to pifutex,
-+ * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
-+ * as we have means to handle the possible fault. If not, don't set
-+ * the bit unecessarily as it will force the subsequent unlock to enter
-+ * the kernel.
-+ */
-+ top_waiter = futex_top_waiter(hb1, key1);
-+
-+ /* There are no waiters, nothing for us to do. */
-+ if (!top_waiter)
-+ return 0;
-+
-+ /* Ensure we requeue to the expected futex. */
-+ if (!match_futex(top_waiter->requeue_pi_key, key2))
-+ return -EINVAL;
-+
-+ /*
-+ * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
-+ * the contended case or if set_waiters is 1. The pi_state is returned
-+ * in ps in contended cases.
-+ */
-+ vpid = task_pid_vnr(top_waiter->task);
-+ ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
-+ exiting, set_waiters);
-+ if (ret == 1) {
-+ requeue_pi_wake_futex(top_waiter, key2, hb2);
-+ return vpid;
-+ }
-+ return ret;
-+}
-+
-+/**
-+ * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
-+ * @uaddr1: source futex user address
-+ * @flags: futex flags (FLAGS_SHARED, etc.)
-+ * @uaddr2: target futex user address
-+ * @nr_wake: number of waiters to wake (must be 1 for requeue_pi)
-+ * @nr_requeue: number of waiters to requeue (0-INT_MAX)
-+ * @cmpval: @uaddr1 expected value (or %NULL)
-+ * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
-+ * pi futex (pi to pi requeue is not supported)
-+ *
-+ * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
-+ * uaddr2 atomically on behalf of the top waiter.
-+ *
-+ * Return:
-+ * - >=0 - on success, the number of tasks requeued or woken;
-+ * - <0 - on error
-+ */
-+static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
-+ u32 __user *uaddr2, int nr_wake, int nr_requeue,
-+ u32 *cmpval, int requeue_pi)
++int main(int argc, char *argv[])
+{
-+ union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
-+ int task_count = 0, ret;
-+ struct futex_pi_state *pi_state = NULL;
-+ struct futex_hash_bucket *hb1, *hb2;
-+ struct futex_q *this, *next;
-+ DEFINE_WAKE_Q(wake_q);
-+
-+ if (nr_wake < 0 || nr_requeue < 0)
-+ return -EINVAL;
-+
-+ /*
-+ * When PI not supported: return -ENOSYS if requeue_pi is true,
-+ * consequently the compiler knows requeue_pi is always false past
-+ * this point which will optimize away all the conditional code
-+ * further down.
-+ */
-+ if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
-+ return -ENOSYS;
-+
-+ if (requeue_pi) {
-+ /*
-+ * Requeue PI only works on two distinct uaddrs. This
-+ * check is only valid for private futexes. See below.
-+ */
-+ if (uaddr1 == uaddr2)
-+ return -EINVAL;
-+
-+ /*
-+ * requeue_pi requires a pi_state, try to allocate it now
-+ * without any locks in case it fails.
-+ */
-+ if (refill_pi_state_cache())
-+ return -ENOMEM;
-+ /*
-+ * requeue_pi must wake as many tasks as it can, up to nr_wake
-+ * + nr_requeue, since it acquires the rt_mutex prior to
-+ * returning to userspace, so as to not leave the rt_mutex with
-+ * waiters and no owner. However, second and third wake-ups
-+ * cannot be predicted as they involve race conditions with the
-+ * first wake and a fault while looking up the pi_state. Both
-+ * pthread_cond_signal() and pthread_cond_broadcast() should
-+ * use nr_wake=1.
-+ */
-+ if (nr_wake != 1)
-+ return -EINVAL;
-+ }
-+
-+retry:
-+ ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
-+ if (unlikely(ret != 0))
-+ return ret;
-+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
-+ requeue_pi ? FUTEX_WRITE : FUTEX_READ);
-+ if (unlikely(ret != 0))
-+ return ret;
-+
-+ /*
-+ * The check above which compares uaddrs is not sufficient for
-+ * shared futexes. We need to compare the keys:
-+ */
-+ if (requeue_pi && match_futex(&key1, &key2))
-+ return -EINVAL;
-+
-+ hb1 = hash_futex(&key1);
-+ hb2 = hash_futex(&key2);
-+
-+retry_private:
-+ hb_waiters_inc(hb2);
-+ double_lock_hb(hb1, hb2);
-+
-+ if (likely(cmpval != NULL)) {
-+ u32 curval;
-+
-+ ret = get_futex_value_locked(&curval, uaddr1);
-+
-+ if (unlikely(ret)) {
-+ double_unlock_hb(hb1, hb2);
-+ hb_waiters_dec(hb2);
-+
-+ ret = get_user(curval, uaddr1);
-+ if (ret)
-+ return ret;
-+
-+ if (!(flags & FLAGS_SHARED))
-+ goto retry_private;
-+
-+ goto retry;
-+ }
-+ if (curval != *cmpval) {
-+ ret = -EAGAIN;
-+ goto out_unlock;
-+ }
-+ }
-+
-+ if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
-+ struct task_struct *exiting = NULL;
-+
-+ /*
-+ * Attempt to acquire uaddr2 and wake the top waiter. If we
-+ * intend to requeue waiters, force setting the FUTEX_WAITERS
-+ * bit. We force this here where we are able to easily handle
-+ * faults rather in the requeue loop below.
-+ */
-+ ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
-+ &key2, &pi_state,
-+ &exiting, nr_requeue);
-+
-+ /*
-+ * At this point the top_waiter has either taken uaddr2 or is
-+ * waiting on it. If the former, then the pi_state will not
-+ * exist yet, look it up one more time to ensure we have a
-+ * reference to it. If the lock was taken, ret contains the
-+ * vpid of the top waiter task.
-+ * If the lock was not taken, we have pi_state and an initial
-+ * refcount on it. In case of an error we have nothing.
-+ */
-+ if (ret > 0) {
-+ WARN_ON(pi_state);
-+ task_count++;
-+ /*
-+ * If we acquired the lock, then the user space value
-+ * of uaddr2 should be vpid. It cannot be changed by
-+ * the top waiter as it is blocked on hb2 lock if it
-+ * tries to do so. If something fiddled with it behind
-+ * our back the pi state lookup might unearth it. So
-+ * we rather use the known value than rereading and
-+ * handing potential crap to lookup_pi_state.
-+ *
-+ * If that call succeeds then we have pi_state and an
-+ * initial refcount on it.
-+ */
-+ ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
-+ &pi_state, &exiting);
-+ }
-+
-+ switch (ret) {
-+ case 0:
-+ /* We hold a reference on the pi state. */
-+ break;
-+
-+ /* If the above failed, then pi_state is NULL */
-+ case -EFAULT:
-+ double_unlock_hb(hb1, hb2);
-+ hb_waiters_dec(hb2);
-+ ret = fault_in_user_writeable(uaddr2);
-+ if (!ret)
-+ goto retry;
-+ return ret;
-+ case -EBUSY:
-+ case -EAGAIN:
-+ /*
-+ * Two reasons for this:
-+ * - EBUSY: Owner is exiting and we just wait for the
-+ * exit to complete.
-+ * - EAGAIN: The user space value changed.
-+ */
-+ double_unlock_hb(hb1, hb2);
-+ hb_waiters_dec(hb2);
-+ /*
-+ * Handle the case where the owner is in the middle of
-+ * exiting. Wait for the exit to complete otherwise
-+ * this task might loop forever, aka. live lock.
-+ */
-+ wait_for_owner_exiting(ret, exiting);
-+ cond_resched();
-+ goto retry;
-+ default:
-+ goto out_unlock;
-+ }
-+ }
-+
-+ plist_for_each_entry_safe(this, next, &hb1->chain, list) {
-+ if (task_count - nr_wake >= nr_requeue)
-+ break;
-+
-+ if (!match_futex(&this->key, &key1))
-+ continue;
-+
-+ /*
-+ * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
-+ * be paired with each other and no other futex ops.
-+ *
-+ * We should never be requeueing a futex_q with a pi_state,
-+ * which is awaiting a futex_unlock_pi().
-+ */
-+ if ((requeue_pi && !this->rt_waiter) ||
-+ (!requeue_pi && this->rt_waiter) ||
-+ this->pi_state) {
-+ ret = -EINVAL;
-+ break;
-+ }
-+
-+ /*
-+ * Wake nr_wake waiters. For requeue_pi, if we acquired the
-+ * lock, we already woke the top_waiter. If not, it will be
-+ * woken by futex_unlock_pi().
-+ */
-+ if (++task_count <= nr_wake && !requeue_pi) {
-+ mark_wake_futex(&wake_q, this);
-+ continue;
-+ }
++ pthread_t waiter;
++ int res, ret = RET_PASS;
++ int c, i;
+
-+ /* Ensure we requeue to the expected futex for requeue_pi. */
-+ if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
-+ ret = -EINVAL;
++ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
++ switch (c) {
++ case 'c':
++ log_color(1);
+ break;
-+ }
-+
-+ /*
-+ * Requeue nr_requeue waiters and possibly one more in the case
-+ * of requeue_pi if we couldn't acquire the lock atomically.
-+ */
-+ if (requeue_pi) {
-+ /*
-+ * Prepare the waiter to take the rt_mutex. Take a
-+ * refcount on the pi_state and store the pointer in
-+ * the futex_q object of the waiter.
-+ */
-+ get_pi_state(pi_state);
-+ this->pi_state = pi_state;
-+ ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
-+ this->rt_waiter,
-+ this->task);
-+ if (ret == 1) {
-+ /*
-+ * We got the lock. We do neither drop the
-+ * refcount on pi_state nor clear
-+ * this->pi_state because the waiter needs the
-+ * pi_state for cleaning up the user space
-+ * value. It will drop the refcount after
-+ * doing so.
-+ */
-+ requeue_pi_wake_futex(this, &key2, hb2);
-+ continue;
-+ } else if (ret) {
-+ /*
-+ * rt_mutex_start_proxy_lock() detected a
-+ * potential deadlock when we tried to queue
-+ * that waiter. Drop the pi_state reference
-+ * which we took above and remove the pointer
-+ * to the state from the waiters futex_q
-+ * object.
-+ */
-+ this->pi_state = NULL;
-+ put_pi_state(pi_state);
-+ /*
-+ * We stop queueing more waiters and let user
-+ * space deal with the mess.
-+ */
-+ break;
-+ }
-+ }
-+ requeue_futex(this, hb1, hb2, &key2);
-+ }
-+
-+ /*
-+ * We took an extra initial reference to the pi_state either
-+ * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We
-+ * need to drop it here again.
-+ */
-+ put_pi_state(pi_state);
-+
-+out_unlock:
-+ double_unlock_hb(hb1, hb2);
-+ wake_up_q(&wake_q);
-+ hb_waiters_dec(hb2);
-+ return ret ? ret : task_count;
-+}
-+
-+/* The key must be already stored in q->key. */
-+static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
-+ __acquires(&hb->lock)
-+{
-+ struct futex_hash_bucket *hb;
-+
-+ hb = hash_futex(&q->key);
-+
-+ /*
-+ * Increment the counter before taking the lock so that
-+ * a potential waker won't miss a to-be-slept task that is
-+ * waiting for the spinlock. This is safe as all queue_lock()
-+ * users end up calling queue_me(). Similarly, for housekeeping,
-+ * decrement the counter at queue_unlock() when some error has
-+ * occurred and we don't end up adding the task to the list.
-+ */
-+ hb_waiters_inc(hb); /* implies smp_mb(); (A) */
-+
-+ q->lock_ptr = &hb->lock;
-+
-+ spin_lock(&hb->lock);
-+ return hb;
-+}
-+
-+static inline void
-+queue_unlock(struct futex_hash_bucket *hb)
-+ __releases(&hb->lock)
-+{
-+ spin_unlock(&hb->lock);
-+ hb_waiters_dec(hb);
-+}
-+
-+static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
-+{
-+ int prio;
-+
-+ /*
-+ * The priority used to register this element is
-+ * - either the real thread-priority for the real-time threads
-+ * (i.e. threads with a priority lower than MAX_RT_PRIO)
-+ * - or MAX_RT_PRIO for non-RT threads.
-+ * Thus, all RT-threads are woken first in priority order, and
-+ * the others are woken last, in FIFO order.
-+ */
-+ prio = min(current->normal_prio, MAX_RT_PRIO);
-+
-+ plist_node_init(&q->list, prio);
-+ plist_add(&q->list, &hb->chain);
-+ q->task = current;
-+}
-+
-+/**
-+ * queue_me() - Enqueue the futex_q on the futex_hash_bucket
-+ * @q: The futex_q to enqueue
-+ * @hb: The destination hash bucket
-+ *
-+ * The hb->lock must be held by the caller, and is released here. A call to
-+ * queue_me() is typically paired with exactly one call to unqueue_me(). The
-+ * exceptions involve the PI related operations, which may use unqueue_me_pi()
-+ * or nothing if the unqueue is done as part of the wake process and the unqueue
-+ * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
-+ * an example).
-+ */
-+static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
-+ __releases(&hb->lock)
-+{
-+ __queue_me(q, hb);
-+ spin_unlock(&hb->lock);
-+}
-+
-+/**
-+ * unqueue_me() - Remove the futex_q from its futex_hash_bucket
-+ * @q: The futex_q to unqueue
-+ *
-+ * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
-+ * be paired with exactly one earlier call to queue_me().
-+ *
-+ * Return:
-+ * - 1 - if the futex_q was still queued (and we removed unqueued it);
-+ * - 0 - if the futex_q was already removed by the waking thread
-+ */
-+static int unqueue_me(struct futex_q *q)
-+{
-+ spinlock_t *lock_ptr;
-+ int ret = 0;
-+
-+ /* In the common case we don't take the spinlock, which is nice. */
-+retry:
-+ /*
-+ * q->lock_ptr can change between this read and the following spin_lock.
-+ * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
-+ * optimizing lock_ptr out of the logic below.
-+ */
-+ lock_ptr = READ_ONCE(q->lock_ptr);
-+ if (lock_ptr != NULL) {
-+ spin_lock(lock_ptr);
-+ /*
-+ * q->lock_ptr can change between reading it and
-+ * spin_lock(), causing us to take the wrong lock. This
-+ * corrects the race condition.
-+ *
-+ * Reasoning goes like this: if we have the wrong lock,
-+ * q->lock_ptr must have changed (maybe several times)
-+ * between reading it and the spin_lock(). It can
-+ * change again after the spin_lock() but only if it was
-+ * already changed before the spin_lock(). It cannot,
-+ * however, change back to the original value. Therefore
-+ * we can detect whether we acquired the correct lock.
-+ */
-+ if (unlikely(lock_ptr != q->lock_ptr)) {
-+ spin_unlock(lock_ptr);
-+ goto retry;
-+ }
-+ __unqueue_futex(q);
-+
-+ BUG_ON(q->pi_state);
-+
-+ spin_unlock(lock_ptr);
-+ ret = 1;
-+ }
-+
-+ return ret;
-+}
-+
-+/*
-+ * PI futexes can not be requeued and must remove themself from the
-+ * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
-+ * and dropped here.
-+ */
-+static void unqueue_me_pi(struct futex_q *q)
-+ __releases(q->lock_ptr)
-+{
-+ __unqueue_futex(q);
-+
-+ BUG_ON(!q->pi_state);
-+ put_pi_state(q->pi_state);
-+ q->pi_state = NULL;
-+
-+ spin_unlock(q->lock_ptr);
-+}
-+
-+static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-+ struct task_struct *argowner)
-+{
-+ struct futex_pi_state *pi_state = q->pi_state;
-+ u32 uval, curval, newval;
-+ struct task_struct *oldowner, *newowner;
-+ u32 newtid;
-+ int ret, err = 0;
-+
-+ lockdep_assert_held(q->lock_ptr);
-+
-+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+
-+ oldowner = pi_state->owner;
-+
-+ /*
-+ * We are here because either:
-+ *
-+ * - we stole the lock and pi_state->owner needs updating to reflect
-+ * that (@argowner == current),
-+ *
-+ * or:
-+ *
-+ * - someone stole our lock and we need to fix things to point to the
-+ * new owner (@argowner == NULL).
-+ *
-+ * Either way, we have to replace the TID in the user space variable.
-+ * This must be atomic as we have to preserve the owner died bit here.
-+ *
-+ * Note: We write the user space value _before_ changing the pi_state
-+ * because we can fault here. Imagine swapped out pages or a fork
-+ * that marked all the anonymous memory readonly for cow.
-+ *
-+ * Modifying pi_state _before_ the user space value would leave the
-+ * pi_state in an inconsistent state when we fault here, because we
-+ * need to drop the locks to handle the fault. This might be observed
-+ * in the PID check in lookup_pi_state.
-+ */
-+retry:
-+ if (!argowner) {
-+ if (oldowner != current) {
-+ /*
-+ * We raced against a concurrent self; things are
-+ * already fixed up. Nothing to do.
-+ */
-+ ret = 0;
-+ goto out_unlock;
-+ }
-+
-+ if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
-+ /* We got the lock after all, nothing to fix. */
-+ ret = 0;
-+ goto out_unlock;
-+ }
-+
-+ /*
-+ * Since we just failed the trylock; there must be an owner.
-+ */
-+ newowner = rt_mutex_owner(&pi_state->pi_mutex);
-+ BUG_ON(!newowner);
-+ } else {
-+ WARN_ON_ONCE(argowner != current);
-+ if (oldowner == current) {
-+ /*
-+ * We raced against a concurrent self; things are
-+ * already fixed up. Nothing to do.
-+ */
-+ ret = 0;
-+ goto out_unlock;
-+ }
-+ newowner = argowner;
-+ }
-+
-+ newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
-+ /* Owner died? */
-+ if (!pi_state->owner)
-+ newtid |= FUTEX_OWNER_DIED;
-+
-+ err = get_futex_value_locked(&uval, uaddr);
-+ if (err)
-+ goto handle_err;
-+
-+ for (;;) {
-+ newval = (uval & FUTEX_OWNER_DIED) | newtid;
-+
-+ err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval);
-+ if (err)
-+ goto handle_err;
-+
-+ if (curval == uval)
++ case 'h':
++ usage(basename(argv[0]));
++ exit(0);
++ case 'v':
++ log_verbosity(atoi(optarg));
+ break;
-+ uval = curval;
-+ }
-+
-+ /*
-+ * We fixed up user space. Now we need to fix the pi_state
-+ * itself.
-+ */
-+ if (pi_state->owner != NULL) {
-+ raw_spin_lock(&pi_state->owner->pi_lock);
-+ WARN_ON(list_empty(&pi_state->list));
-+ list_del_init(&pi_state->list);
-+ raw_spin_unlock(&pi_state->owner->pi_lock);
-+ }
-+
-+ pi_state->owner = newowner;
-+
-+ raw_spin_lock(&newowner->pi_lock);
-+ WARN_ON(!list_empty(&pi_state->list));
-+ list_add(&pi_state->list, &newowner->pi_state_list);
-+ raw_spin_unlock(&newowner->pi_lock);
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+
-+ return 0;
-+
-+ /*
-+ * In order to reschedule or handle a page fault, we need to drop the
-+ * locks here. In the case of a fault, this gives the other task
-+ * (either the highest priority waiter itself or the task which stole
-+ * the rtmutex) the chance to try the fixup of the pi_state. So once we
-+ * are back from handling the fault we need to check the pi_state after
-+ * reacquiring the locks and before trying to do another fixup. When
-+ * the fixup has been done already we simply return.
-+ *
-+ * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
-+ * drop hb->lock since the caller owns the hb -> futex_q relation.
-+ * Dropping the pi_mutex->wait_lock requires the state revalidate.
-+ */
-+handle_err:
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+ spin_unlock(q->lock_ptr);
-+
-+ switch (err) {
-+ case -EFAULT:
-+ ret = fault_in_user_writeable(uaddr);
-+ break;
-+
-+ case -EAGAIN:
-+ cond_resched();
-+ ret = 0;
-+ break;
-+
-+ default:
-+ WARN_ON_ONCE(1);
-+ ret = err;
-+ break;
-+ }
-+
-+ spin_lock(q->lock_ptr);
-+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+
-+ /*
-+ * Check if someone else fixed it for us:
-+ */
-+ if (pi_state->owner != oldowner) {
-+ ret = 0;
-+ goto out_unlock;
-+ }
-+
-+ if (ret)
-+ goto out_unlock;
-+
-+ goto retry;
-+
-+out_unlock:
-+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
-+ return ret;
-+}
-+
-+static long futex_wait_restart(struct restart_block *restart);
-+
-+/**
-+ * fixup_owner() - Post lock pi_state and corner case management
-+ * @uaddr: user address of the futex
-+ * @q: futex_q (contains pi_state and access to the rt_mutex)
-+ * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0)
-+ *
-+ * After attempting to lock an rt_mutex, this function is called to cleanup
-+ * the pi_state owner as well as handle race conditions that may allow us to
-+ * acquire the lock. Must be called with the hb lock held.
-+ *
-+ * Return:
-+ * - 1 - success, lock taken;
-+ * - 0 - success, lock not taken;
-+ * - <0 - on error (-EFAULT)
-+ */
-+static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
-+{
-+ int ret = 0;
-+
-+ if (locked) {
-+ /*
-+ * Got the lock. We might not be the anticipated owner if we
-+ * did a lock-steal - fix up the PI-state in that case:
-+ *
-+ * Speculative pi_state->owner read (we don't hold wait_lock);
-+ * since we own the lock pi_state->owner == current is the
-+ * stable state, anything else needs more attention.
-+ */
-+ if (q->pi_state->owner != current)
-+ ret = fixup_pi_state_owner(uaddr, q, current);
-+ return ret ? ret : locked;
-+ }
-+
-+ /*
-+ * If we didn't get the lock; check if anybody stole it from us. In
-+ * that case, we need to fix up the uval to point to them instead of
-+ * us, otherwise bad things happen. [10]
-+ *
-+ * Another speculative read; pi_state->owner == current is unstable
-+ * but needs our attention.
-+ */
-+ if (q->pi_state->owner == current) {
-+ ret = fixup_pi_state_owner(uaddr, q, NULL);
-+ return ret;
-+ }
-+
-+ /*
-+ * Paranoia check. If we did not take the lock, then we should not be
-+ * the owner of the rt_mutex.
-+ */
-+ if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
-+ printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
-+ "pi-state %p\n", ret,
-+ q->pi_state->pi_mutex.owner,
-+ q->pi_state->owner);
-+ }
-+
-+ return ret;
-+}
-+
-+/**
-+ * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
-+ * @hb: the futex hash bucket, must be locked by the caller
-+ * @q: the futex_q to queue up on
-+ * @timeout: the prepared hrtimer_sleeper, or null for no timeout
-+ */
-+static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
-+ struct hrtimer_sleeper *timeout)
-+{
-+ /*
-+ * The task state is guaranteed to be set before another task can
-+ * wake it. set_current_state() is implemented using smp_store_mb() and
-+ * queue_me() calls spin_unlock() upon completion, both serializing
-+ * access to the hash list and forcing another memory barrier.
-+ */
-+ set_current_state(TASK_INTERRUPTIBLE);
-+ queue_me(q, hb);
-+
-+ /* Arm the timer */
-+ if (timeout)
-+ hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
-+
-+ /*
-+ * If we have been removed from the hash list, then another task
-+ * has tried to wake us, and we can skip the call to schedule().
-+ */
-+ if (likely(!plist_node_empty(&q->list))) {
-+ /*
-+ * If the timer has already expired, current will already be
-+ * flagged for rescheduling. Only call schedule if there
-+ * is no timeout, or if it has yet to expire.
-+ */
-+ if (!timeout || timeout->task)
-+ freezable_schedule();
-+ }
-+ __set_current_state(TASK_RUNNING);
-+}
-+
-+/**
-+ * futex_wait_setup() - Prepare to wait on a futex
-+ * @uaddr: the futex userspace address
-+ * @val: the expected value
-+ * @flags: futex flags (FLAGS_SHARED, etc.)
-+ * @q: the associated futex_q
-+ * @hb: storage for hash_bucket pointer to be returned to caller
-+ *
-+ * Setup the futex_q and locate the hash_bucket. Get the futex value and
-+ * compare it with the expected value. Handle atomic faults internally.
-+ * Return with the hb lock held and a q.key reference on success, and unlocked
-+ * with no q.key reference on failure.
-+ *
-+ * Return:
-+ * - 0 - uaddr contains val and hb has been locked;
-+ * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
-+ */
-+static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
-+ struct futex_q *q, struct futex_hash_bucket **hb)
-+{
-+ u32 uval;
-+ int ret;
-+
-+ /*
-+ * Access the page AFTER the hash-bucket is locked.
-+ * Order is important:
-+ *
-+ * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
-+ * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); }
-+ *
-+ * The basic logical guarantee of a futex is that it blocks ONLY
-+ * if cond(var) is known to be true at the time of blocking, for
-+ * any cond. If we locked the hash-bucket after testing *uaddr, that
-+ * would open a race condition where we could block indefinitely with
-+ * cond(var) false, which would violate the guarantee.
-+ *
-+ * On the other hand, we insert q and release the hash-bucket only
-+ * after testing *uaddr. This guarantees that futex_wait() will NOT
-+ * absorb a wakeup if *uaddr does not match the desired values
-+ * while the syscall executes.
-+ */
-+retry:
-+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
-+ if (unlikely(ret != 0))
-+ return ret;
-+
-+retry_private:
-+ *hb = queue_lock(q);
-+
-+ ret = get_futex_value_locked(&uval, uaddr);
-+
-+ if (ret) {
-+ queue_unlock(*hb);
-+
-+ ret = get_user(uval, uaddr);
-+ if (ret)
-+ return ret;
-+
-+ if (!(flags & FLAGS_SHARED))
-+ goto retry_private;
-+
-+ goto retry;
-+ }
-+
-+ if (uval != val) {
-+ queue_unlock(*hb);
-+ ret = -EWOULDBLOCK;
-+ }
-+
-+ return ret;
-+}
-+
-+static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
-+ ktime_t *abs_time, u32 bitset)
-+{
-+ struct hrtimer_sleeper timeout, *to;
-+ struct restart_block *restart;
-+ struct futex_hash_bucket *hb;
-+ struct futex_q q = futex_q_init;
-+ int ret;
-+
-+ if (!bitset)
-+ return -EINVAL;
-+ q.bitset = bitset;
-+
-+ to = futex_setup_timer(abs_time, &timeout, flags,
-+ current->timer_slack_ns);
-+retry:
-+ /*
-+ * Prepare to wait on uaddr. On success, holds hb lock and increments
-+ * q.key refs.
-+ */
-+ ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
-+ if (ret)
-+ goto out;
-+
-+ /* queue_me and wait for wakeup, timeout, or a signal. */
-+ futex_wait_queue_me(hb, &q, to);
-+
-+ /* If we were woken (and unqueued), we succeeded, whatever. */
-+ ret = 0;
-+ /* unqueue_me() drops q.key ref */
-+ if (!unqueue_me(&q))
-+ goto out;
-+ ret = -ETIMEDOUT;
-+ if (to && !to->task)
-+ goto out;
-+
-+ /*
-+ * We expect signal_pending(current), but we might be the
-+ * victim of a spurious wakeup as well.
-+ */
-+ if (!signal_pending(current))
-+ goto retry;
-+
-+ ret = -ERESTARTSYS;
-+ if (!abs_time)
-+ goto out;
-+
-+ restart = &current->restart_block;
-+ restart->fn = futex_wait_restart;
-+ restart->futex.uaddr = uaddr;
-+ restart->futex.val = val;
-+ restart->futex.time = *abs_time;
-+ restart->futex.bitset = bitset;
-+ restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
-+
-+ ret = -ERESTART_RESTARTBLOCK;
-+
-+out:
-+ if (to) {
-+ hrtimer_cancel(&to->timer);
-+ destroy_hrtimer_on_stack(&to->timer);
-+ }
-+ return ret;
-+}
-+
-+
-+static long futex_wait_restart(struct restart_block *restart)
-+{
-+ u32 __user *uaddr = restart->futex.uaddr;
-+ ktime_t t, *tp = NULL;
-+
-+ if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
-+ t = restart->futex.time;
-+ tp = &t;
-+ }
-+ restart->fn = do_no_restart_syscall;
-+
-+ return (long)futex_wait(uaddr, restart->futex.flags,
-+ restart->futex.val, tp, restart->futex.bitset);
-+}
-+
-+
-+/*
-+ * Userspace tried a 0 -> TID atomic transition of the futex value
-+ * and failed. The kernel side here does the whole locking operation:
-+ * if there are waiters then it will block as a consequence of relying
-+ * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
-+ * a 0 value of the futex too.).
-+ *
-+ * Also serves as futex trylock_pi()'ing, and due semantics.
-+ */
-+static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
-+ ktime_t *time, int trylock)
-+{
-+ struct hrtimer_sleeper timeout, *to;
-+ struct futex_pi_state *pi_state = NULL;
-+ struct task_struct *exiting = NULL;
-+ struct rt_mutex_waiter rt_waiter;
-+ struct futex_hash_bucket *hb;
-+ struct futex_q q = futex_q_init;
-+ int res, ret;
-+
-+ if (!IS_ENABLED(CONFIG_FUTEX_PI))
-+ return -ENOSYS;
-+
-+ if (refill_pi_state_cache())
-+ return -ENOMEM;
-+
-+ to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0);
-+
-+retry:
-+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
-+ if (unlikely(ret != 0))
-+ goto out;
-+
-+retry_private:
-+ hb = queue_lock(&q);
-+
-+ ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
-+ &exiting, 0);
-+ if (unlikely(ret)) {
-+ /*
-+ * Atomic work succeeded and we got the lock,
-+ * or failed. Either way, we do _not_ block.
-+ */
-+ switch (ret) {
-+ case 1:
-+ /* We got the lock. */
-+ ret = 0;
-+ goto out_unlock_put_key;
-+ case -EFAULT:
-+ goto uaddr_faulted;
-+ case -EBUSY:
-+ case -EAGAIN:
-+ /*
-+ * Two reasons for this:
-+ * - EBUSY: Task is exiting and we just wait for the
-+ * exit to complete.
-+ * - EAGAIN: The user space value changed.
-+ */
-+ queue_unlock(hb);
-+ /*
-+ * Handle the case where the owner is in the middle of
-+ * exiting. Wait for the exit to complete otherwise
-+ * this task might loop forever, aka. live lock.
-+ */
-+ wait_for_owner_exiting(ret, exiting);
-+ cond_resched();
-+ goto retry;
+ default:
-+ goto out_unlock_put_key;
++ usage(basename(argv[0]));
++ exit(1);
+ }
+ }
+
-+ WARN_ON(!q.pi_state);
-+
-+ /*
-+ * Only actually queue now that the atomic ops are done:
-+ */
-+ __queue_me(&q, hb);
-+
-+ if (trylock) {
-+ ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
-+ /* Fixup the trylock return value: */
-+ ret = ret ? 0 : -EWOULDBLOCK;
-+ goto no_block;
-+ }
-+
-+ rt_mutex_init_waiter(&rt_waiter);
-+
-+ /*
-+ * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
-+ * hold it while doing rt_mutex_start_proxy(), because then it will
-+ * include hb->lock in the blocking chain, even through we'll not in
-+ * fact hold it while blocking. This will lead it to report -EDEADLK
-+ * and BUG when futex_unlock_pi() interleaves with this.
-+ *
-+ * Therefore acquire wait_lock while holding hb->lock, but drop the
-+ * latter before calling __rt_mutex_start_proxy_lock(). This
-+ * interleaves with futex_unlock_pi() -- which does a similar lock
-+ * handoff -- such that the latter can observe the futex_q::pi_state
-+ * before __rt_mutex_start_proxy_lock() is done.
-+ */
-+ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
-+ spin_unlock(q.lock_ptr);
-+ /*
-+ * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
-+ * such that futex_unlock_pi() is guaranteed to observe the waiter when
-+ * it sees the futex_q::pi_state.
-+ */
-+ ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
-+ raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
-+
-+ if (ret) {
-+ if (ret == 1)
-+ ret = 0;
-+ goto cleanup;
-+ }
-+
-+ if (unlikely(to))
-+ hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
-+
-+ ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
-+
-+cleanup:
-+ spin_lock(q.lock_ptr);
-+ /*
-+ * If we failed to acquire the lock (deadlock/signal/timeout), we must
-+ * first acquire the hb->lock before removing the lock from the
-+ * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
-+ * lists consistent.
-+ *
-+ * In particular; it is important that futex_unlock_pi() can not
-+ * observe this inconsistency.
-+ */
-+ if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
-+ ret = 0;
-+
-+no_block:
-+ /*
-+ * Fixup the pi_state owner and possibly acquire the lock if we
-+ * haven't already.
-+ */
-+ res = fixup_owner(uaddr, &q, !ret);
-+ /*
-+ * If fixup_owner() returned an error, proprogate that. If it acquired
-+ * the lock, clear our -ETIMEDOUT or -EINTR.
-+ */
-+ if (res)
-+ ret = (res < 0) ? res : 0;
-+
-+ /*
-+ * If fixup_owner() faulted and was unable to handle the fault, unlock
-+ * it and return the fault to userspace.
-+ */
-+ if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
-+ pi_state = q.pi_state;
-+ get_pi_state(pi_state);
-+ }
-+
-+ /* Unqueue and drop the lock */
-+ unqueue_me_pi(&q);
-+
-+ if (pi_state) {
-+ rt_mutex_futex_unlock(&pi_state->pi_mutex);
-+ put_pi_state(pi_state);
-+ }
-+
-+ goto out;
-+
-+out_unlock_put_key:
-+ queue_unlock(hb);
++ ksft_print_header();
++ ksft_set_plan(2);
++ ksft_print_msg("%s: Test FUTEX2_WAITV\n",
++ basename(argv[0]));
+
-+out:
-+ if (to) {
-+ hrtimer_cancel(&to->timer);
-+ destroy_hrtimer_on_stack(&to->timer);
++ //info("Calling private futex2_wait on f1: %u @ %p with val=%u\n", *f1, f1, *f1);
++
++ for (i = 0; i < NR_FUTEXES; i++) {
++ waitv[i].uaddr = &futexes[i];
++ waitv[i].flags = FUTEX_32;
++ waitv[i].val = 0;
+ }
-+ return ret != -EINTR ? ret : -ERESTARTNOINTR;
-+
-+uaddr_faulted:
-+ queue_unlock(hb);
-+
-+ ret = fault_in_user_writeable(uaddr);
-+ if (ret)
-+ goto out;
-+
-+ if (!(flags & FLAGS_SHARED))
-+ goto retry_private;
+
-+ goto retry;
-+}
-+
-+/*
-+ * Userspace attempted a TID -> 0 atomic transition, and failed.
-+ * This is the in-kernel slowpath: we look up the PI state (if any),
-+ * and do the rt-mutex unlock.
-+ */
-+static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
-+{
-+ u32 curval, uval, vpid = task_pid_vnr(current);
-+ union futex_key key = FUTEX_KEY_INIT;
-+ struct futex_hash_bucket *hb;
-+ struct futex_q *top_waiter;
-+ int ret;
-+
-+ if (!IS_ENABLED(CONFIG_FUTEX_PI))
-+ return -ENOSYS;
-+
-+retry:
-+ if (get_user(uval, uaddr))
-+ return -EFAULT;
-+ /*
-+ * We release only a lock we actually own:
-+ */
-+ if ((uval & FUTEX_TID_MASK) != vpid)
-+ return -EPERM;
-+
-+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
-+ if (ret)
-+ return ret;
-+
-+ hb = hash_futex(&key);
-+ spin_lock(&hb->lock);
-+
-+ /*
-+ * Check waiters first. We do not trust user space values at
-+ * all and we at least want to know if user space fiddled
-+ * with the futex value instead of blindly unlocking.
-+ */
-+ top_waiter = futex_top_waiter(hb, &key);
-+ if (top_waiter) {
-+ struct futex_pi_state *pi_state = top_waiter->pi_state;
-+
-+ ret = -EINVAL;
-+ if (!pi_state)
-+ goto out_unlock;
-+
-+ /*
-+ * If current does not own the pi_state then the futex is
-+ * inconsistent and user space fiddled with the futex value.
-+ */
-+ if (pi_state->owner != current)
-+ goto out_unlock;
-+
-+ get_pi_state(pi_state);
-+ /*
-+ * By taking wait_lock while still holding hb->lock, we ensure
-+ * there is no point where we hold neither; and therefore
-+ * wake_futex_pi() must observe a state consistent with what we
-+ * observed.
-+ *
-+ * In particular; this forces __rt_mutex_start_proxy() to
-+ * complete such that we're guaranteed to observe the
-+ * rt_waiter. Also see the WARN in wake_futex_pi().
-+ */
-+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
-+ spin_unlock(&hb->lock);
-+
-+ /* drops pi_state->pi_mutex.wait_lock */
-+ ret = wake_futex_pi(uaddr, uval, pi_state);
++ if (pthread_create(&waiter, NULL, waiterfn, NULL))
++ error("pthread_create failed\n", errno);
+
-+ put_pi_state(pi_state);
++ usleep(WAKE_WAIT_US);
+
-+ /*
-+ * Success, we're done! No tricky corner cases.
-+ */
-+ if (!ret)
-+ goto out_putkey;
-+ /*
-+ * The atomic access to the futex value generated a
-+ * pagefault, so retry the user-access and the wakeup:
-+ */
-+ if (ret == -EFAULT)
-+ goto pi_faulted;
-+ /*
-+ * A unconditional UNLOCK_PI op raced against a waiter
-+ * setting the FUTEX_WAITERS bit. Try again.
-+ */
-+ if (ret == -EAGAIN)
-+ goto pi_retry;
-+ /*
-+ * wake_futex_pi has detected invalid state. Tell user
-+ * space.
-+ */
-+ goto out_putkey;
++ // info("Calling private futex2_wake on f1: %u @ %p with val=%u\n", *f1, f1, *f1);
++ res = futex2_wake(waitv[NR_FUTEXES - 1].uaddr, 1, FUTEX_32);
++ if (res != 1) {
++ ksft_test_result_fail("futex2_wake private returned: %d %s\n",
++ res ? errno : res,
++ res ? strerror(errno) : "");
++ ret = RET_FAIL;
++ } else {
++ ksft_test_result_pass("futex2_waitv private succeeds\n");
+ }
+
-+ /*
-+ * We have no kernel internal state, i.e. no waiters in the
-+ * kernel. Waiters which are about to queue themselves are stuck
-+ * on hb->lock. So we can safely ignore them. We do neither
-+ * preserve the WAITERS bit not the OWNER_DIED one. We are the
-+ * owner.
-+ */
-+ if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
-+ spin_unlock(&hb->lock);
-+ switch (ret) {
-+ case -EFAULT:
-+ goto pi_faulted;
-+
-+ case -EAGAIN:
-+ goto pi_retry;
-+
-+ default:
-+ WARN_ON_ONCE(1);
-+ goto out_putkey;
++ for (i = 0; i < NR_FUTEXES; i++) {
++ int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
++ if (shm_id < 0) {
++ perror("shmget");
++ exit(1);
+ }
-+ }
-+
-+ /*
-+ * If uval has changed, let user space handle it.
-+ */
-+ ret = (curval == uval) ? 0 : -EAGAIN;
-+
-+out_unlock:
-+ spin_unlock(&hb->lock);
-+out_putkey:
-+ return ret;
-+
-+pi_retry:
-+ cond_resched();
-+ goto retry;
-+
-+pi_faulted:
-+
-+ ret = fault_in_user_writeable(uaddr);
-+ if (!ret)
-+ goto retry;
-+
-+ return ret;
-+}
-+
-+/**
-+ * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
-+ * @hb: the hash_bucket futex_q was original enqueued on
-+ * @q: the futex_q woken while waiting to be requeued
-+ * @key2: the futex_key of the requeue target futex
-+ * @timeout: the timeout associated with the wait (NULL if none)
-+ *
-+ * Detect if the task was woken on the initial futex as opposed to the requeue
-+ * target futex. If so, determine if it was a timeout or a signal that caused
-+ * the wakeup and return the appropriate error code to the caller. Must be
-+ * called with the hb lock held.
-+ *
-+ * Return:
-+ * - 0 = no early wakeup detected;
-+ * - <0 = -ETIMEDOUT or -ERESTARTNOINTR
-+ */
-+static inline
-+int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
-+ struct futex_q *q, union futex_key *key2,
-+ struct hrtimer_sleeper *timeout)
-+{
-+ int ret = 0;
-+
-+ /*
-+ * With the hb lock held, we avoid races while we process the wakeup.
-+ * We only need to hold hb (and not hb2) to ensure atomicity as the
-+ * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
-+ * It can't be requeued from uaddr2 to something else since we don't
-+ * support a PI aware source futex for requeue.
-+ */
-+ if (!match_futex(&q->key, key2)) {
-+ WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
-+ /*
-+ * We were woken prior to requeue by a timeout or a signal.
-+ * Unqueue the futex_q and determine which it was.
-+ */
-+ plist_del(&q->list, &hb->chain);
-+ hb_waiters_dec(hb);
-+
-+ /* Handle spurious wakeups gracefully */
-+ ret = -EWOULDBLOCK;
-+ if (timeout && !timeout->task)
-+ ret = -ETIMEDOUT;
-+ else if (signal_pending(current))
-+ ret = -ERESTARTNOINTR;
-+ }
-+ return ret;
-+}
-+
-+/**
-+ * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
-+ * @uaddr: the futex we initially wait on (non-pi)
-+ * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
-+ * the same type, no requeueing from private to shared, etc.
-+ * @val: the expected value of uaddr
-+ * @abs_time: absolute timeout
-+ * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
-+ * @uaddr2: the pi futex we will take prior to returning to user-space
-+ *
-+ * The caller will wait on uaddr and will be requeued by futex_requeue() to
-+ * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
-+ * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
-+ * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
-+ * without one, the pi logic would not know which task to boost/deboost, if
-+ * there was a need to.
-+ *
-+ * We call schedule in futex_wait_queue_me() when we enqueue and return there
-+ * via the following--
-+ * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
-+ * 2) wakeup on uaddr2 after a requeue
-+ * 3) signal
-+ * 4) timeout
-+ *
-+ * If 3, cleanup and return -ERESTARTNOINTR.
-+ *
-+ * If 2, we may then block on trying to take the rt_mutex and return via:
-+ * 5) successful lock
-+ * 6) signal
-+ * 7) timeout
-+ * 8) other lock acquisition failure
-+ *
-+ * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
-+ *
-+ * If 4 or 7, we cleanup and return with -ETIMEDOUT.
-+ *
-+ * Return:
-+ * - 0 - On success;
-+ * - <0 - On error
-+ */
-+static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
-+ u32 val, ktime_t *abs_time, u32 bitset,
-+ u32 __user *uaddr2)
-+{
-+ struct hrtimer_sleeper timeout, *to;
-+ struct futex_pi_state *pi_state = NULL;
-+ struct rt_mutex_waiter rt_waiter;
-+ struct futex_hash_bucket *hb;
-+ union futex_key key2 = FUTEX_KEY_INIT;
-+ struct futex_q q = futex_q_init;
-+ int res, ret;
-+
-+ if (!IS_ENABLED(CONFIG_FUTEX_PI))
-+ return -ENOSYS;
-+
-+ if (uaddr == uaddr2)
-+ return -EINVAL;
-+
-+ if (!bitset)
-+ return -EINVAL;
+
-+ to = futex_setup_timer(abs_time, &timeout, flags,
-+ current->timer_slack_ns);
-+
-+ /*
-+ * The waiter is allocated on our stack, manipulated by the requeue
-+ * code while we sleep on uaddr.
-+ */
-+ rt_mutex_init_waiter(&rt_waiter);
-+
-+ ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
-+ if (unlikely(ret != 0))
-+ goto out;
-+
-+ q.bitset = bitset;
-+ q.rt_waiter = &rt_waiter;
-+ q.requeue_pi_key = &key2;
-+
-+ /*
-+ * Prepare to wait on uaddr. On success, increments q.key (key1) ref
-+ * count.
-+ */
-+ ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
-+ if (ret)
-+ goto out;
++ unsigned int *shared_data = shmat(shm_id, NULL, 0);
++ *shared_data = 0;
+
-+ /*
-+ * The check above which compares uaddrs is not sufficient for
-+ * shared futexes. We need to compare the keys:
-+ */
-+ if (match_futex(&q.key, &key2)) {
-+ queue_unlock(hb);
-+ ret = -EINVAL;
-+ goto out;
++ waitv[i].uaddr = shared_data;
++ waitv[i].flags = FUTEX_32 | FUTEX_SHARED_FLAG;
++ waitv[i].val = 0;
+ }
+
-+ /* Queue the futex_q, drop the hb lock, wait for wakeup. */
-+ futex_wait_queue_me(hb, &q, to);
++ //info("Calling shared futex2_wait on f1: %u @ %p with val=%u\n", *f1, f1, *f1);
+
-+ spin_lock(&hb->lock);
-+ ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
-+ spin_unlock(&hb->lock);
-+ if (ret)
-+ goto out;
++ if (pthread_create(&waiter, NULL, waiterfn, NULL))
++ error("pthread_create failed\n", errno);
+
-+ /*
-+ * In order for us to be here, we know our q.key == key2, and since
-+ * we took the hb->lock above, we also know that futex_requeue() has
-+ * completed and we no longer have to concern ourselves with a wakeup
-+ * race with the atomic proxy lock acquisition by the requeue code. The
-+ * futex_requeue dropped our key1 reference and incremented our key2
-+ * reference count.
-+ */
++ usleep(WAKE_WAIT_US);
+
-+ /* Check if the requeue code acquired the second futex for us. */
-+ if (!q.rt_waiter) {
-+ /*
-+ * Got the lock. We might not be the anticipated owner if we
-+ * did a lock-steal - fix up the PI-state in that case.
-+ */
-+ if (q.pi_state && (q.pi_state->owner != current)) {
-+ spin_lock(q.lock_ptr);
-+ ret = fixup_pi_state_owner(uaddr2, &q, current);
-+ if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
-+ pi_state = q.pi_state;
-+ get_pi_state(pi_state);
-+ }
-+ /*
-+ * Drop the reference to the pi state which
-+ * the requeue_pi() code acquired for us.
-+ */
-+ put_pi_state(q.pi_state);
-+ spin_unlock(q.lock_ptr);
-+ }
++ // info("Calling shared futex2_wake on f1: %u @ %p with val=%u\n", *f1, f1, *f1);
++ res = futex2_wake(waitv[NR_FUTEXES - 1].uaddr, 1, FUTEX_32 | FUTEX_SHARED_FLAG);
++ if (res != 1) {
++ ksft_test_result_fail("futex2_wake shared returned: %d %s\n",
++ res ? errno : res,
++ res ? strerror(errno) : "");
++ ret = RET_FAIL;
+ } else {
-+ struct rt_mutex *pi_mutex;
-+
-+ /*
-+ * We have been woken up by futex_unlock_pi(), a timeout, or a
-+ * signal. futex_unlock_pi() will not destroy the lock_ptr nor
-+ * the pi_state.
-+ */
-+ WARN_ON(!q.pi_state);
-+ pi_mutex = &q.pi_state->pi_mutex;
-+ ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
-+
-+ spin_lock(q.lock_ptr);
-+ if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
-+ ret = 0;
-+
-+ debug_rt_mutex_free_waiter(&rt_waiter);
-+ /*
-+ * Fixup the pi_state owner and possibly acquire the lock if we
-+ * haven't already.
-+ */
-+ res = fixup_owner(uaddr2, &q, !ret);
-+ /*
-+ * If fixup_owner() returned an error, proprogate that. If it
-+ * acquired the lock, clear -ETIMEDOUT or -EINTR.
-+ */
-+ if (res)
-+ ret = (res < 0) ? res : 0;
-+
-+ /*
-+ * If fixup_pi_state_owner() faulted and was unable to handle
-+ * the fault, unlock the rt_mutex and return the fault to
-+ * userspace.
-+ */
-+ if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
-+ pi_state = q.pi_state;
-+ get_pi_state(pi_state);
-+ }
-+
-+ /* Unqueue and drop the lock. */
-+ unqueue_me_pi(&q);
-+ }
-+
-+ if (pi_state) {
-+ rt_mutex_futex_unlock(&pi_state->pi_mutex);
-+ put_pi_state(pi_state);
++ ksft_test_result_pass("futex2_wake shared succeeds\n");
+ }
+
-+ if (ret == -EINTR) {
-+ /*
-+ * We've already been requeued, but cannot restart by calling
-+ * futex_lock_pi() directly. We could restart this syscall, but
-+ * it would detect that the user space "val" changed and return
-+ * -EWOULDBLOCK. Save the overhead of the restart and return
-+ * -EWOULDBLOCK directly.
-+ */
-+ ret = -EWOULDBLOCK;
-+ }
++ for (i = 0; i < NR_FUTEXES; i++)
++ shmdt(waitv[i].uaddr);
+
-+out:
-+ if (to) {
-+ hrtimer_cancel(&to->timer);
-+ destroy_hrtimer_on_stack(&to->timer);
-+ }
++ ksft_print_cnts();
+ return ret;
+}
-+
-+static long do_futex1(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
-+ u32 __user *uaddr2, u32 val2, u32 val3)
-+{
-+ int cmd = op & FUTEX_CMD_MASK;
-+ unsigned int flags = 0;
-+
-+ if (!(op & FUTEX_PRIVATE_FLAG))
-+ flags |= FLAGS_SHARED;
-+
-+ if (op & FUTEX_CLOCK_REALTIME) {
-+ flags |= FLAGS_CLOCKRT;
-+ if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
-+ cmd != FUTEX_WAIT_REQUEUE_PI)
-+ return -ENOSYS;
-+ }
-+
-+ switch (cmd) {
-+ case FUTEX_LOCK_PI:
-+ case FUTEX_UNLOCK_PI:
-+ case FUTEX_TRYLOCK_PI:
-+ case FUTEX_WAIT_REQUEUE_PI:
-+ case FUTEX_CMP_REQUEUE_PI:
-+ if (!futex_cmpxchg_enabled)
-+ return -ENOSYS;
-+ }
-+
-+ switch (cmd) {
-+ case FUTEX_WAIT:
-+ val3 = FUTEX_BITSET_MATCH_ANY;
-+ fallthrough;
-+ case FUTEX_WAIT_BITSET:
-+ return futex_wait(uaddr, flags, val, timeout, val3);
-+ case FUTEX_WAKE:
-+ val3 = FUTEX_BITSET_MATCH_ANY;
-+ fallthrough;
-+ case FUTEX_WAKE_BITSET:
-+ return futex_wake(uaddr, flags, val, val3);
-+ case FUTEX_REQUEUE:
-+ return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
-+ case FUTEX_CMP_REQUEUE:
-+ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
-+ case FUTEX_WAKE_OP:
-+ return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
-+ case FUTEX_LOCK_PI:
-+ return futex_lock_pi(uaddr, flags, timeout, 0);
-+ case FUTEX_UNLOCK_PI:
-+ return futex_unlock_pi(uaddr, flags);
-+ case FUTEX_TRYLOCK_PI:
-+ return futex_lock_pi(uaddr, flags, NULL, 1);
-+ case FUTEX_WAIT_REQUEUE_PI:
-+ val3 = FUTEX_BITSET_MATCH_ANY;
-+ return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
-+ uaddr2);
-+ case FUTEX_CMP_REQUEUE_PI:
-+ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
-+ }
-+ return -ENOSYS;
-+}
-+
-+
-+SYSCALL_DEFINE6(futex1, u32 __user *, uaddr, int, op, u32, val,
-+ struct __kernel_timespec __user *, utime, u32 __user *, uaddr2,
-+ u32, val3)
-+{
-+ struct timespec64 ts;
-+ ktime_t t, *tp = NULL;
-+ u32 val2 = 0;
-+ int cmd = op & FUTEX_CMD_MASK;
-+
-+ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
-+ cmd == FUTEX_WAIT_BITSET ||
-+ cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+ if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
-+ return -EFAULT;
-+ if (get_timespec64(&ts, utime))
-+ return -EFAULT;
-+ if (!timespec64_valid(&ts))
-+ return -EINVAL;
-+
-+ t = timespec64_to_ktime(ts);
-+ if (cmd == FUTEX_WAIT)
-+ t = ktime_add_safe(ktime_get(), t);
-+ tp = &t;
-+ }
-+ /*
-+ * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
-+ * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
-+ */
-+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
-+ cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
-+ val2 = (u32) (unsigned long) utime;
-+
-+ return do_futex1(uaddr, op, val, tp, uaddr2, val2, val3);
-+}
-+
-+static void __init futex_detect_cmpxchg(void)
-+{
-+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
-+ u32 curval;
-+
-+ /*
-+ * This will fail and we want it. Some arch implementations do
-+ * runtime detection of the futex_atomic_cmpxchg_inatomic()
-+ * functionality. We want to know that before we call in any
-+ * of the complex code paths. Also we want to prevent
-+ * registration of robust lists in that case. NULL is
-+ * guaranteed to fault and we get -EFAULT on functional
-+ * implementation, the non-functional ones will return
-+ * -ENOSYS.
-+ */
-+ if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
-+ futex_cmpxchg_enabled = 1;
-+#endif
-+}
-+
-+static int __init futex_init(void)
-+{
-+ unsigned int futex_shift;
-+ unsigned long i;
-+
-+#if CONFIG_BASE_SMALL
-+ futex_hashsize = 16;
-+#else
-+ futex_hashsize = roundup_pow_of_two(256 * num_possible_cpus());
-+#endif
-+
-+ futex_queues = alloc_large_system_hash("futex1", sizeof(*futex_queues),
-+ futex_hashsize, 0,
-+ futex_hashsize < 256 ? HASH_SMALL : 0,
-+ &futex_shift, NULL,
-+ futex_hashsize, futex_hashsize);
-+ futex_hashsize = 1UL << futex_shift;
-+
-+ futex_detect_cmpxchg();
-+
-+ for (i = 0; i < futex_hashsize; i++) {
-+ atomic_set(&futex_queues[i].waiters, 0);
-+ plist_head_init(&futex_queues[i].chain);
-+ spin_lock_init(&futex_queues[i].lock);
-+ }
-+
-+ return 0;
-+}
-+core_initcall(futex_init);
-diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
-index 3e1a713d3e57..b53a24a99a14 100644
---- a/kernel/sys_ni.c
-+++ b/kernel/sys_ni.c
-@@ -153,6 +153,8 @@ COND_SYSCALL(futex_wait);
- COND_SYSCALL(futex_wake);
- COND_SYSCALL(futex_waitv);
+diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
+index 3730159c8..18b3883d7 100755
+--- a/tools/testing/selftests/futex/functional/run.sh
++++ b/tools/testing/selftests/futex/functional/run.sh
+@@ -76,3 +76,6 @@ echo
-+COND_SYSCALL(futex1);
+ echo
+ ./futex2_wait $COLOR
+
- /* kernel/hrtimer.c */
-
- /* kernel/itimer.c */
-diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/asm/unistd_64.h
-index 4205ed4158bf..43de5a59ac1c 100644
---- a/tools/arch/x86/include/asm/unistd_64.h
-+++ b/tools/arch/x86/include/asm/unistd_64.h
-@@ -17,3 +17,15 @@
- #ifndef __NR_setns
- #define __NR_setns 308
++echo
++./futex2_waitv $COLOR
+diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
+index 807b8b57f..10be0c504 100644
+--- a/tools/testing/selftests/futex/include/futex2test.h
++++ b/tools/testing/selftests/futex/include/futex2test.h
+@@ -27,10 +27,18 @@
+ #ifndef FUTEX_32
+ #define FUTEX_32 2
#endif
+-#ifdef __x86_64__
+-# ifndef FUTEX_64
+-# define FUTEX_64 3
+-# endif
+
-+#ifndef __NR_futex_wait
-+#define __NR_futex_wait 440
-+#endif
-+
-+#ifndef __NR_futex_wake
-+#define __NR_futex_wake 441
-+#endif
-+
-+#ifndef __NR_futex1
-+#define __NR_futex1 442
++#ifndef FUTEX_SHARED_FLAG
++#define FUTEX_SHARED_FLAG 8
+#endif
-diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
-index dd457de21bad..f737eaeecbb6 100644
---- a/tools/include/uapi/asm-generic/unistd.h
-+++ b/tools/include/uapi/asm-generic/unistd.h
-@@ -862,11 +862,15 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2)
-
- #define __NR_futex_wait 440
- __SYSCALL(__NR_futex_wait, sys_futex_wait)
-+
- #define __NR_futex_wake 441
- __SYSCALL(__NR_futex_wake, sys_futex_wake)
-
-+#define __NR_futex1 442
-+__SYSCALL(__NR_futex1, sys_futex1)
+
- #undef __NR_syscalls
--#define __NR_syscalls 442
-+#define __NR_syscalls 443
++#ifndef FUTEX_WAITV_MAX
++#define FUTEX_WAITV_MAX 128
++struct futex_waitv {
++ void *uaddr;
++ unsigned int val;
++ unsigned int flags;
++};
+ #endif
/*
- * 32 bit systems traditionally used different
-diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
-index f30d6ae9a688..1a516b081207 100644
---- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
-+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
-@@ -361,6 +361,9 @@
- 437 common openat2 sys_openat2
- 438 common pidfd_getfd sys_pidfd_getfd
- 439 common faccessat2 sys_faccessat2
-+440 common futex_wait sys_futex_wait
-+441 common futex_wake sys_futex_wake
-+442 common futex1 sys_futex1
-
- #
- # x32-specific system call numbers start at 512 to avoid cache impact
-diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
-index 31b53cc7d5bc..baf6a0d077ac 100644
---- a/tools/perf/bench/futex.h
-+++ b/tools/perf/bench/futex.h
-@@ -8,10 +8,14 @@
- #ifndef _FUTEX_H
- #define _FUTEX_H
-
-+//#define FUTEX1 0
-+#define UNUSED(x) (void)(x)
-+
- #include <unistd.h>
- #include <sys/syscall.h>
- #include <sys/types.h>
- #include <linux/futex.h>
-+#include <linux/unistd.h>
-
- /**
- * futex() - SYS_futex syscall wrapper
-@@ -34,7 +38,13 @@
- * like-named arguments in the following wrappers except where noted below.
- */
- #define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
-- syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
-+ syscall(__NR_futex1, uaddr, op | opflags, val, timeout, uaddr2, val3)
-+
-+#define futex2_wake(uaddr, nr, flags) \
-+ syscall(__NR_futex_wake, uaddr, nr, flags | FUTEX_32)
-+
-+#define futex2_wait(uaddr, val, flags, timeout) \
-+ syscall(__NR_futex_wait, uaddr, val, flags | FUTEX_32, timeout)
-
- /**
- * futex_wait() - block on uaddr with optional timeout
-@@ -43,7 +53,13 @@
- static inline int
- futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags)
- {
-+#ifdef FUTEX1
- return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
-+#else
-+ UNUSED(timeout);
-+ UNUSED(opflags);
-+ return futex2_wait(uaddr, val, 0, NULL);
-+#endif
- }
-
- /**
-@@ -53,7 +69,12 @@ futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflag
- static inline int
- futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
+@@ -75,3 +83,12 @@ static inline int futex2_wake(volatile void *uaddr, unsigned int nr, unsigned lo
{
-+#ifdef FUTEX1
- return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
-+#else
-+ UNUSED(opflags);
-+ return futex2_wake(uaddr, nr_wake, 0);
-+#endif
+ return syscall(__NR_futex_wake, uaddr, nr, flags);
}
-
- /**
++
++/*
++ * wait for uaddr if (*uaddr == val)
++ */
++static inline int futex2_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters,
++ unsigned long flags, struct timespec64 *timo)
++{
++ return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo);
++}
--
-2.28.0
+2.29.2
+
-From 2f5e38a4191ac6fd5040435f6a41433add3711a6 Mon Sep 17 00:00:00 2001
+From 9358bbdf929a90bc144d13e002fed8f4223d3178 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
-Date: Thu, 15 Oct 2020 18:06:40 -0300
-Subject: [PATCH 07/13] futex2: Add support for shared futexes
+Date: Fri, 4 Dec 2020 19:12:23 -0300
+Subject: [PATCH 8/9] futex2: Add sysfs entry for syscall numbers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
-Add support for shared futexes for cross-process resources.
-
Signed-off-by: André Almeida <andrealmeid@collabora.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
- kernel/futex2.c | 169 +++++++++++++++++++++++++++++++++++++++++-------
- 1 file changed, 146 insertions(+), 23 deletions(-)
+ kernel/futex2.c | 42 ++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 42 insertions(+)
diff --git a/kernel/futex2.c b/kernel/futex2.c
-index 4b782b5ef615..ae743ddf223e 100644
+index 5ddb9922d..58cd8a868 100644
--- a/kernel/futex2.c
+++ b/kernel/futex2.c
-@@ -6,7 +6,9 @@
- */
-
- #include <linux/freezer.h>
-+#include <linux/hugetlb.h>
- #include <linux/jhash.h>
-+#include <linux/pagemap.h>
- #include <linux/sched/wake_q.h>
- #include <linux/spinlock.h>
- #include <linux/syscalls.h>
-@@ -15,6 +17,7 @@
-
- /**
- * struct futex_waiter - List entry for a waiter
-+ * @uaddr: Memory address of userspace futex
- * @key.address: Memory address of userspace futex
- * @key.mm: Pointer to memory management struct of this process
- * @key: Stores information that uniquely identify a futex
-@@ -25,6 +28,7 @@
- * @index: Index of waiter in futexv list
- */
- struct futex_waiter {
-+ uintptr_t uaddr;
- struct futex_key {
- uintptr_t address;
- struct mm_struct *mm;
-@@ -125,16 +129,109 @@ static inline int bucket_get_waiters(struct futex_bucket *bucket)
- #endif
+@@ -762,6 +762,48 @@ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake,
+ return ret;
}
-+static u64 get_inode_sequence_number(struct inode *inode)
++static ssize_t wait_show(struct kobject *kobj, struct kobj_attribute *attr,
++ char *buf)
+{
-+ static atomic64_t i_seq;
-+ u64 old;
-+
-+ /* Does the inode already have a sequence number? */
-+ old = atomic64_read(&inode->i_sequence);
-+ if (likely(old))
-+ return old;
-+
-+ for (;;) {
-+ u64 new = atomic64_add_return(1, &i_seq);
-+ if (WARN_ON_ONCE(!new))
-+ continue;
++ return sprintf(buf, "%u\n", __NR_futex_wait);
+
-+ old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
-+ if (old)
-+ return old;
-+ return new;
-+ }
+}
++static struct kobj_attribute futex2_wait_attr = __ATTR_RO(wait);
+
-+static int futex_get_shared_key(uintptr_t address, struct mm_struct *mm,
-+ struct futex_key *key)
++static ssize_t wake_show(struct kobject *kobj, struct kobj_attribute *attr,
++ char *buf)
+{
-+ int err;
-+ struct page *page, *tail;
-+ struct address_space *mapping;
-+
-+again:
-+ err = get_user_pages_fast(address, 1, 0, &page);
-+
-+ if (err < 0)
-+ return err;
-+ else
-+ err = 0;
-+
-+
-+ tail = page;
-+ page = compound_head(page);
-+ mapping = READ_ONCE(page->mapping);
-+
-+
-+ if (unlikely(!mapping)) {
-+ int shmem_swizzled;
-+
-+ lock_page(page);
-+ shmem_swizzled = PageSwapCache(page) || page->mapping;
-+ unlock_page(page);
-+ put_page(page);
-+
-+ if (shmem_swizzled)
-+ goto again;
-+
-+ return -EFAULT;
-+ }
-+
-+ if (PageAnon(page)) {
-+
-+ key->mm = mm;
-+ key->address = address;
-+
-+ } else {
-+ struct inode *inode;
-+
-+ rcu_read_lock();
-+
-+ if (READ_ONCE(page->mapping) != mapping) {
-+ rcu_read_unlock();
-+ put_page(page);
-+
-+ goto again;
-+ }
-+
-+ inode = READ_ONCE(mapping->host);
-+ if (!inode) {
-+ rcu_read_unlock();
-+ put_page(page);
-+
-+ goto again;
-+ }
-+
-+ key->address = get_inode_sequence_number(inode);
-+ key->mm = (struct mm_struct *) basepage_index(tail);
-+ rcu_read_unlock();
-+ }
++ return sprintf(buf, "%u\n", __NR_futex_wake);
+
-+ put_page(page);
-+ return err;
+}
++static struct kobj_attribute futex2_wake_attr = __ATTR_RO(wake);
+
- /**
- * futex_get_bucket - Check if the user address is valid, prepare internal
- * data and calculate the hash
- * @uaddr: futex user address
- * @key: data that uniquely identifies a futex
-+ * @shared: is this a shared futex?
- *
- * Return: address of bucket on success, error code otherwise
- */
- static struct futex_bucket *futex_get_bucket(void __user *uaddr,
-- struct futex_key *key)
-+ struct futex_key *key,
-+ bool shared)
- {
- uintptr_t address = (uintptr_t) uaddr;
- u32 hash_key;
-@@ -145,8 +242,12 @@ static struct futex_bucket *futex_get_bucket(void __user *uaddr,
- if (unlikely(!access_ok(address, sizeof(u32))))
- return ERR_PTR(-EFAULT);
-
-- key->address = address;
-- key->mm = current->mm;
-+ if (!shared) {
-+ key->address = address;
-+ key->mm = current->mm;
-+ } else {
-+ futex_get_shared_key(address, current->mm, key);
-+ }
-
- /* Generate hash key for this futex using uaddr and current->mm */
- hash_key = jhash2((u32 *) key, sizeof(*key) / sizeof(u32), 0);
-@@ -275,9 +376,10 @@ static int futex_dequeue_multiple(struct futexv *futexv, unsigned int nr)
- * Return: 0 on success, error code otherwise
- */
- static int futex_enqueue(struct futexv *futexv, unsigned int nr_futexes,
-- unsigned int *awaken)
-+ int *awaken)
- {
- int i, ret;
-+ bool shared;
- u32 uval, *uaddr, val;
- struct futex_bucket *bucket;
-
-@@ -285,9 +387,13 @@ static int futex_enqueue(struct futexv *futexv, unsigned int nr_futexes,
- set_current_state(TASK_INTERRUPTIBLE);
-
- for (i = 0; i < nr_futexes; i++) {
-- uaddr = (u32 * __user) futexv->objects[i].key.address;
-+ uaddr = (u32 * __user) futexv->objects[i].uaddr;
- val = (u32) futexv->objects[i].val;
-- bucket = futexv->objects[i].bucket;
-+ shared = (futexv->objects[i].flags & FUTEX_SHARED_FLAG) ? true : false;
-+ if (shared)
-+ bucket = futex_get_bucket((void *) uaddr, &futexv->objects[i].key, true);
-+ else
-+ bucket = futexv->objects[i].bucket;
-
- bucket_inc_waiters(bucket);
- spin_lock(&bucket->lock);
-@@ -301,11 +407,14 @@ static int futex_enqueue(struct futexv *futexv, unsigned int nr_futexes,
- __set_current_state(TASK_RUNNING);
- *awaken = futex_dequeue_multiple(futexv, i);
-
-+ if (shared)
-+ goto retry;
++static ssize_t waitv_show(struct kobject *kobj, struct kobj_attribute *attr,
++ char *buf)
++{
++ return sprintf(buf, "%u\n", __NR_futex_waitv);
+
- if (__get_user(uval, uaddr))
- return -EFAULT;
-
- if (*awaken >= 0)
-- return 0;
-+ return 1;
-
- goto retry;
- }
-@@ -313,12 +422,14 @@ static int futex_enqueue(struct futexv *futexv, unsigned int nr_futexes,
- if (uval != val) {
- spin_unlock(&bucket->lock);
-
++}
++static struct kobj_attribute futex2_waitv_attr = __ATTR_RO(waitv);
+
- bucket_dec_waiters(bucket);
- __set_current_state(TASK_RUNNING);
- *awaken = futex_dequeue_multiple(futexv, i);
-
-- if (*awaken >= 0)
-- return 0;
-+ if (*awaken >= 0) {
-+ return 1;
-+ }
-
- return -EWOULDBLOCK;
- }
-@@ -336,19 +447,18 @@ static int __futex_wait(struct futexv *futexv,
- struct hrtimer_sleeper *timeout)
- {
- int ret;
-- unsigned int awaken = -1;
-
-- while (1) {
-- ret = futex_enqueue(futexv, nr_futexes, &awaken);
-
-- if (ret < 0)
-- break;
-+ while (1) {
-+ int awaken = -1;
-
-- if (awaken <= 0) {
-- return awaken;
-+ ret = futex_enqueue(futexv, nr_futexes, &awaken);
-+ if (ret) {
-+ if (awaken >= 0)
-+ return awaken;
-+ return ret;
- }
-
--
- /* Before sleeping, check if someone was woken */
- if (!futexv->hint && (!timeout || timeout->task))
- freezable_schedule();
-@@ -419,6 +529,7 @@ static int futex_wait(struct futexv *futexv, unsigned int nr_futexes,
- hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
- }
-
++static struct attribute *futex2_sysfs_attrs[] = {
++ &futex2_wait_attr.attr,
++ &futex2_wake_attr.attr,
++ &futex2_waitv_attr.attr,
++ NULL,
++};
+
- ret = __futex_wait(futexv, nr_futexes, timo ? timeout : NULL);
-
-
-@@ -438,9 +549,10 @@ static int futex_wait(struct futexv *futexv, unsigned int nr_futexes,
- SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val,
- unsigned int, flags, struct __kernel_timespec __user *, timo)
- {
-+ bool shared = (flags & FUTEX_SHARED_FLAG) ? true : false;
- unsigned int size = flags & FUTEX_SIZE_MASK;
-- struct hrtimer_sleeper timeout;
- struct futex_single_waiter wait_single;
-+ struct hrtimer_sleeper timeout;
- struct futex_waiter *waiter;
- struct futexv *futexv;
- int ret;
-@@ -452,6 +564,7 @@ SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val,
- waiter = &wait_single.waiter;
- waiter->index = 0;
- waiter->val = val;
-+ waiter->uaddr = (uintptr_t) uaddr;
-
- INIT_LIST_HEAD(&waiter->list);
-
-@@ -462,11 +575,14 @@ SYSCALL_DEFINE4(futex_wait, void __user *, uaddr, unsigned int, val,
- return -EINVAL;
-
- /* Get an unlocked hash bucket */
-- waiter->bucket = futex_get_bucket(uaddr, &waiter->key);
-- if (IS_ERR(waiter->bucket))
-+ waiter->bucket = futex_get_bucket(uaddr, &waiter->key, shared);
-+ if (IS_ERR(waiter->bucket)) {
- return PTR_ERR(waiter->bucket);
-+ }
-
- ret = futex_wait(futexv, 1, timo, &timeout, flags);
-+ if (ret > 0)
-+ ret = 0;
-
- return ret;
- }
-@@ -486,8 +602,10 @@ static int futex_parse_waitv(struct futexv *futexv,
- struct futex_waitv waitv;
- unsigned int i;
- struct futex_bucket *bucket;
-+ bool shared;
-
- for (i = 0; i < nr_futexes; i++) {
++static const struct attribute_group futex2_sysfs_attr_group = {
++ .attrs = futex2_sysfs_attrs,
++ .name = "futex2",
++};
+
- if (copy_from_user(&waitv, &uwaitv[i], sizeof(waitv)))
- return -EFAULT;
-
-@@ -495,8 +613,10 @@ static int futex_parse_waitv(struct futexv *futexv,
- (waitv.flags & FUTEX_SIZE_MASK) != FUTEX_32)
- return -EINVAL;
-
-+ shared = (waitv.flags & FUTEX_SHARED_FLAG) ? true : false;
++static int __init futex2_sysfs_init(void)
++{
++ return sysfs_create_group(kernel_kobj, &futex2_sysfs_attr_group);
++}
++subsys_initcall(futex2_sysfs_init);
+
- bucket = futex_get_bucket(waitv.uaddr,
-- &futexv->objects[i].key);
-+ &futexv->objects[i].key, shared);
- if (IS_ERR(bucket))
- return PTR_ERR(bucket);
-
-@@ -505,6 +625,7 @@ static int futex_parse_waitv(struct futexv *futexv,
- futexv->objects[i].flags = waitv.flags;
- futexv->objects[i].index = i;
- INIT_LIST_HEAD(&futexv->objects[i].list);
-+ futexv->objects[i].uaddr = (uintptr_t) waitv.uaddr;
- }
-
- return 0;
-@@ -573,6 +694,7 @@ static struct futexv *futex_get_parent(uintptr_t waiter, u8 index)
- SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake,
- unsigned int, flags)
+ static int __init futex2_init(void)
{
-+ bool shared = (flags & FUTEX_SHARED_FLAG) ? true : false;
- unsigned int size = flags & FUTEX_SIZE_MASK;
- struct futex_waiter waiter, *aux, *tmp;
- struct futex_bucket *bucket;
-@@ -586,9 +708,10 @@ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake,
- if (size != FUTEX_32)
- return -EINVAL;
-
-- bucket = futex_get_bucket(uaddr, &waiter.key);
-- if (IS_ERR(bucket))
-+ bucket = futex_get_bucket(uaddr, &waiter.key, shared);
-+ if (IS_ERR(bucket)) {
- return PTR_ERR(bucket);
-+ }
-
- if (!bucket_get_waiters(bucket))
- return 0;
+ int i;
--
-2.28.0
+2.29.2
-From 909eb056421668b5d42f8c4dfa92339851a43dd8 Mon Sep 17 00:00:00 2001
-From: Gabriel Krisman Bertazi <krisman@collabora.com>
-Date: Mon, 2 Nov 2020 18:41:38 -0500
-Subject: [PATCH 08/13] Revert "futex: Remove needless goto's"
-This reverts commit d7c5ed73b19c4640426d9c106f70ec2cb532034d.
----
- kernel/futex.c | 40 ++++++++++++++++++++++++----------------
- 1 file changed, 24 insertions(+), 16 deletions(-)
-
-diff --git a/kernel/futex.c b/kernel/futex.c
-index 6c00c0952313..a671d371b11f 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -1593,13 +1593,13 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
-
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
- if (unlikely(ret != 0))
-- return ret;
-+ goto out;
-
- hb = hash_futex(&key);
-
- /* Make sure we really have tasks to wakeup */
- if (!hb_waiters_pending(hb))
-- return ret;
-+ goto out;
-
- spin_lock(&hb->lock);
-
-@@ -1622,6 +1622,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
-
- spin_unlock(&hb->lock);
- wake_up_q(&wake_q);
-+out:
- return ret;
- }
-
-@@ -1688,10 +1689,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
- retry:
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
- if (unlikely(ret != 0))
-- return ret;
-+ goto out;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
- if (unlikely(ret != 0))
-- return ret;
-+ goto out;
-
- hb1 = hash_futex(&key1);
- hb2 = hash_futex(&key2);
-@@ -1709,13 +1710,13 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
- * an MMU, but we might get them from range checking
- */
- ret = op_ret;
-- return ret;
-+ goto out;
- }
-
- if (op_ret == -EFAULT) {
- ret = fault_in_user_writeable(uaddr2);
- if (ret)
-- return ret;
-+ goto out;
- }
-
- if (!(flags & FLAGS_SHARED)) {
-@@ -1758,6 +1759,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
- out_unlock:
- double_unlock_hb(hb1, hb2);
- wake_up_q(&wake_q);
-+out:
- return ret;
- }
-
-@@ -1964,18 +1966,20 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
- retry:
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
- if (unlikely(ret != 0))
-- return ret;
-+ goto out;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
- requeue_pi ? FUTEX_WRITE : FUTEX_READ);
- if (unlikely(ret != 0))
-- return ret;
-+ goto out;
-
- /*
- * The check above which compares uaddrs is not sufficient for
- * shared futexes. We need to compare the keys:
- */
-- if (requeue_pi && match_futex(&key1, &key2))
-- return -EINVAL;
-+ if (requeue_pi && match_futex(&key1, &key2)) {
-+ ret = -EINVAL;
-+ goto out;
-+ }
-
- hb1 = hash_futex(&key1);
- hb2 = hash_futex(&key2);
-@@ -1995,7 +1999,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
-
- ret = get_user(curval, uaddr1);
- if (ret)
-- return ret;
-+ goto out;
-
- if (!(flags & FLAGS_SHARED))
- goto retry_private;
-@@ -2061,7 +2065,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
- ret = fault_in_user_writeable(uaddr2);
- if (!ret)
- goto retry;
-- return ret;
-+ goto out;
- case -EBUSY:
- case -EAGAIN:
- /*
-@@ -2180,6 +2184,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
- double_unlock_hb(hb1, hb2);
- wake_up_q(&wake_q);
- hb_waiters_dec(hb2);
-+
-+out:
- return ret ? ret : task_count;
- }
-
-@@ -2537,7 +2543,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
- */
- if (q->pi_state->owner != current)
- ret = fixup_pi_state_owner(uaddr, q, current);
-- return ret ? ret : locked;
-+ goto out;
- }
-
- /*
-@@ -2550,7 +2556,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
- */
- if (q->pi_state->owner == current) {
- ret = fixup_pi_state_owner(uaddr, q, NULL);
-- return ret;
-+ goto out;
- }
-
- /*
-@@ -2564,7 +2570,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
- q->pi_state->owner);
- }
-
-- return ret;
-+out:
-+ return ret ? ret : locked;
- }
-
- /**
-@@ -2661,7 +2668,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
-
- ret = get_user(uval, uaddr);
- if (ret)
-- return ret;
-+ goto out;
-
- if (!(flags & FLAGS_SHARED))
- goto retry_private;
-@@ -2674,6 +2681,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- ret = -EWOULDBLOCK;
- }
-
-+out:
- return ret;
- }
-
---
-2.28.0
+From f7b1c9a2ad05933e559ef78bc7753b2fac1698fd Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
+Date: Tue, 5 Jan 2021 15:44:02 -0300
+Subject: [PATCH 9/9] perf bench: Add futex2 benchmark tests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
-From fee513186b69c4a65534fd790545877974ef17d3 Mon Sep 17 00:00:00 2001
-From: Gabriel Krisman Bertazi <krisman@collabora.com>
-Date: Mon, 2 Nov 2020 18:41:54 -0500
-Subject: [PATCH 09/13] Revert "futex: Remove put_futex_key()"
+Port existing futex infrastructure to use futex2 calls.
-This reverts commit 9180bd467f9abdb44afde650d07e3b9dd66d837c.
+Signed-off-by: André Almeida <andrealmeid@collabora.com>
+Signed-off-by: Jan200101 <sentrycraft123@gmail.com>
---
- kernel/futex.c | 61 ++++++++++++++++++++++++++++++++++++++++----------
- 1 file changed, 49 insertions(+), 12 deletions(-)
+ tools/arch/x86/include/asm/unistd_64.h | 8 +++++
+ tools/perf/bench/bench.h | 3 ++
+ tools/perf/bench/futex-hash.c | 24 ++++++++++++---
+ tools/perf/bench/futex-wake-parallel.c | 41 ++++++++++++++++++++++----
+ tools/perf/bench/futex-wake.c | 36 ++++++++++++++++++----
+ tools/perf/bench/futex.h | 17 +++++++++++
+ tools/perf/builtin-bench.c | 17 ++++++++---
+ 7 files changed, 127 insertions(+), 19 deletions(-)
-diff --git a/kernel/futex.c b/kernel/futex.c
-index a671d371b11f..647de692c874 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -661,6 +661,10 @@ static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
- return err;
- }
-
-+static inline void put_futex_key(union futex_key *key)
-+{
-+}
+diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/asm/unistd_64.h
+index 4205ed415..151a41ceb 100644
+--- a/tools/arch/x86/include/asm/unistd_64.h
++++ b/tools/arch/x86/include/asm/unistd_64.h
+@@ -17,3 +17,11 @@
+ #ifndef __NR_setns
+ #define __NR_setns 308
+ #endif
+
- /**
- * fault_in_user_writeable() - Fault in user address and verify RW access
- * @uaddr: pointer to faulting user space address
-@@ -1599,7 +1603,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
-
- /* Make sure we really have tasks to wakeup */
- if (!hb_waiters_pending(hb))
-- goto out;
-+ goto out_put_key;
-
- spin_lock(&hb->lock);
-
-@@ -1622,6 +1626,8 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
-
- spin_unlock(&hb->lock);
- wake_up_q(&wake_q);
-+out_put_key:
-+ put_futex_key(&key);
- out:
- return ret;
- }
-@@ -1692,7 +1698,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
- goto out;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
- if (unlikely(ret != 0))
-- goto out;
-+ goto out_put_key1;
-
- hb1 = hash_futex(&key1);
- hb2 = hash_futex(&key2);
-@@ -1710,13 +1716,13 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
- * an MMU, but we might get them from range checking
- */
- ret = op_ret;
-- goto out;
-+ goto out_put_keys;
- }
-
- if (op_ret == -EFAULT) {
- ret = fault_in_user_writeable(uaddr2);
- if (ret)
-- goto out;
-+ goto out_put_keys;
- }
-
- if (!(flags & FLAGS_SHARED)) {
-@@ -1724,6 +1730,8 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
- goto retry_private;
- }
-
-+ put_futex_key(&key2);
-+ put_futex_key(&key1);
- cond_resched();
- goto retry;
- }
-@@ -1759,6 +1767,10 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
- out_unlock:
- double_unlock_hb(hb1, hb2);
- wake_up_q(&wake_q);
-+out_put_keys:
-+ put_futex_key(&key2);
-+out_put_key1:
-+ put_futex_key(&key1);
- out:
- return ret;
- }
-@@ -1970,7 +1982,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
- requeue_pi ? FUTEX_WRITE : FUTEX_READ);
- if (unlikely(ret != 0))
-- goto out;
-+ goto out_put_key1;
-
- /*
- * The check above which compares uaddrs is not sufficient for
-@@ -1978,7 +1990,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
- */
- if (requeue_pi && match_futex(&key1, &key2)) {
- ret = -EINVAL;
-- goto out;
-+ goto out_put_keys;
- }
-
- hb1 = hash_futex(&key1);
-@@ -1999,11 +2011,13 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
-
- ret = get_user(curval, uaddr1);
- if (ret)
-- goto out;
-+ goto out_put_keys;
-
- if (!(flags & FLAGS_SHARED))
- goto retry_private;
-
-+ put_futex_key(&key2);
-+ put_futex_key(&key1);
- goto retry;
- }
- if (curval != *cmpval) {
-@@ -2062,6 +2076,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
- case -EFAULT:
- double_unlock_hb(hb1, hb2);
- hb_waiters_dec(hb2);
-+ put_futex_key(&key2);
-+ put_futex_key(&key1);
- ret = fault_in_user_writeable(uaddr2);
- if (!ret)
- goto retry;
-@@ -2076,6 +2092,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
++#ifndef __NR_futex_wait
++# define __NR_futex_wait 441
++#endif
++
++#ifndef __NR_futex_wake
++# define __NR_futex_wake 442
++#endif
+diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
+index eac36afab..f6f881a05 100644
+--- a/tools/perf/bench/bench.h
++++ b/tools/perf/bench/bench.h
+@@ -38,8 +38,11 @@ int bench_mem_memcpy(int argc, const char **argv);
+ int bench_mem_memset(int argc, const char **argv);
+ int bench_mem_find_bit(int argc, const char **argv);
+ int bench_futex_hash(int argc, const char **argv);
++int bench_futex2_hash(int argc, const char **argv);
+ int bench_futex_wake(int argc, const char **argv);
++int bench_futex2_wake(int argc, const char **argv);
+ int bench_futex_wake_parallel(int argc, const char **argv);
++int bench_futex2_wake_parallel(int argc, const char **argv);
+ int bench_futex_requeue(int argc, const char **argv);
+ /* pi futexes */
+ int bench_futex_lock_pi(int argc, const char **argv);
+diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
+index 915bf3da7..72921c22b 100644
+--- a/tools/perf/bench/futex-hash.c
++++ b/tools/perf/bench/futex-hash.c
+@@ -34,7 +34,7 @@ static unsigned int nthreads = 0;
+ static unsigned int nsecs = 10;
+ /* amount of futexes per thread */
+ static unsigned int nfutexes = 1024;
+-static bool fshared = false, done = false, silent = false;
++static bool fshared = false, done = false, silent = false, futex2 = false;
+ static int futex_flag = 0;
+
+ struct timeval bench__start, bench__end, bench__runtime;
+@@ -86,7 +86,10 @@ static void *workerfn(void *arg)
+ * such as internal waitqueue handling, thus enlarging
+ * the critical region protected by hb->lock.
*/
- double_unlock_hb(hb1, hb2);
- hb_waiters_dec(hb2);
-+ put_futex_key(&key2);
-+ put_futex_key(&key1);
- /*
- * Handle the case where the owner is in the middle of
- * exiting. Wait for the exit to complete otherwise
-@@ -2185,6 +2203,10 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
- wake_up_q(&wake_q);
- hb_waiters_dec(hb2);
-
-+out_put_keys:
-+ put_futex_key(&key2);
-+out_put_key1:
-+ put_futex_key(&key1);
- out:
- return ret ? ret : task_count;
- }
-@@ -2673,6 +2695,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- if (!(flags & FLAGS_SHARED))
- goto retry_private;
-
-+ put_futex_key(&q->key);
- goto retry;
- }
-
-@@ -2682,6 +2705,8 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- }
-
- out:
-+ if (ret)
-+ put_futex_key(&q->key);
- return ret;
+- ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
++ if (!futex2)
++ ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
++ else
++ ret = futex2_wait(&w->futex[i], 1234, futex_flag, NULL);
+ if (!silent &&
+ (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
+ warn("Non-expected futex return call");
+@@ -117,7 +120,7 @@ static void print_summary(void)
+ (int)bench__runtime.tv_sec);
}
-@@ -2826,6 +2851,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
- * - EAGAIN: The user space value changed.
- */
- queue_unlock(hb);
-+ put_futex_key(&q.key);
- /*
- * Handle the case where the owner is in the middle of
- * exiting. Wait for the exit to complete otherwise
-@@ -2933,11 +2959,13 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
- put_pi_state(pi_state);
- }
-
-- goto out;
-+ goto out_put_key;
-
- out_unlock_put_key:
- queue_unlock(hb);
-
-+out_put_key:
-+ put_futex_key(&q.key);
- out:
- if (to) {
- hrtimer_cancel(&to->timer);
-@@ -2950,11 +2978,12 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
-
- ret = fault_in_user_writeable(uaddr);
- if (ret)
-- goto out;
-+ goto out_put_key;
-
- if (!(flags & FLAGS_SHARED))
- goto retry_private;
-
-+ put_futex_key(&q.key);
- goto retry;
+-int bench_futex_hash(int argc, const char **argv)
++static int bench_futex_hash_common(int argc, const char **argv)
+ {
+ int ret = 0;
+ cpu_set_t cpuset;
+@@ -149,7 +152,9 @@ int bench_futex_hash(int argc, const char **argv)
+ if (!worker)
+ goto errmem;
+
+- if (!fshared)
++ if (futex2)
++ futex_flag = FUTEX_32 | (fshared * FUTEX_SHARED_FLAG);
++ else if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
+ printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
+@@ -229,3 +234,14 @@ int bench_futex_hash(int argc, const char **argv)
+ errmem:
+ err(EXIT_FAILURE, "calloc");
}
-
-@@ -3083,13 +3112,16 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
- out_unlock:
- spin_unlock(&hb->lock);
- out_putkey:
-+ put_futex_key(&key);
- return ret;
-
- pi_retry:
-+ put_futex_key(&key);
- cond_resched();
- goto retry;
-
- pi_faulted:
-+ put_futex_key(&key);
-
- ret = fault_in_user_writeable(uaddr);
- if (!ret)
-@@ -3231,7 +3263,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
- */
- ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
- if (ret)
-- goto out;
-+ goto out_key2;
-
- /*
- * The check above which compares uaddrs is not sufficient for
-@@ -3240,7 +3272,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
- if (match_futex(&q.key, &key2)) {
- queue_unlock(hb);
- ret = -EINVAL;
-- goto out;
-+ goto out_put_keys;
- }
-
- /* Queue the futex_q, drop the hb lock, wait for wakeup. */
-@@ -3250,7 +3282,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
- ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
- spin_unlock(&hb->lock);
- if (ret)
-- goto out;
-+ goto out_put_keys;
-
- /*
- * In order for us to be here, we know our q.key == key2, and since
-@@ -3340,6 +3372,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
- ret = -EWOULDBLOCK;
- }
-
-+out_put_keys:
-+ put_futex_key(&q.key);
-+out_key2:
-+ put_futex_key(&key2);
+
- out:
- if (to) {
- hrtimer_cancel(&to->timer);
---
-2.28.0
-
-From 3b1489448a277fc1c34ca12e859193c3a7f3446c Mon Sep 17 00:00:00 2001
-From: Gabriel Krisman Bertazi <krisman@collabora.com>
-Date: Fri, 12 Jul 2019 14:16:20 -0400
-Subject: [PATCH 10/13] futex: Split key setup from key queue locking and read
-
-split the futex key setup from the queue locking and key reading. This
-is usefull to support the setup of multiple keys at the same time, like
-what is done in futex_requeue() and what will be done for the
-FUTEX_WAIT_MULTIPLE command.
-
-Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
----
- kernel/futex.c | 71 +++++++++++++++++++++++++++++---------------------
- 1 file changed, 42 insertions(+), 29 deletions(-)
-
-diff --git a/kernel/futex.c b/kernel/futex.c
-index 647de692c874..f05349def492 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -2634,6 +2634,39 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
- __set_current_state(TASK_RUNNING);
- }
-
-+static int __futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
-+ struct futex_q *q, struct futex_hash_bucket **hb)
++int bench_futex_hash(int argc, const char **argv)
+{
++ return bench_futex_hash_common(argc, argv);
++}
+
-+ u32 uval;
-+ int ret;
-+
-+retry_private:
-+ *hb = queue_lock(q);
-+
-+ ret = get_futex_value_locked(&uval, uaddr);
-+
-+ if (ret) {
-+ queue_unlock(*hb);
-+
-+ ret = get_user(uval, uaddr);
-+ if (ret)
-+ return ret;
-+
-+ if (!(flags & FLAGS_SHARED))
-+ goto retry_private;
-+
-+ return 1;
-+ }
-+
-+ if (uval != val) {
-+ queue_unlock(*hb);
-+ ret = -EWOULDBLOCK;
-+ }
++int bench_futex2_hash(int argc, const char **argv)
++{
++ futex2 = true;
++ return bench_futex_hash_common(argc, argv);
++}
+diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
+index cd2b81a84..540104538 100644
+--- a/tools/perf/bench/futex-wake-parallel.c
++++ b/tools/perf/bench/futex-wake-parallel.c
+@@ -17,6 +17,12 @@ int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe
+ pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__);
+ return 0;
+ }
+
-+ return ret;
++int bench_futex2_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)
++{
++ pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__);
++ return 0;
+}
+ #else /* HAVE_PTHREAD_BARRIER */
+ /* For the CLR_() macros */
+ #include <string.h>
+@@ -48,7 +54,7 @@ static unsigned int nwakes = 1;
+ static u_int32_t futex = 0;
+
+ static pthread_t *blocked_worker;
+-static bool done = false, silent = false, fshared = false;
++static bool done = false, silent = false, fshared = false, futex2 = false;
+ static unsigned int nblocked_threads = 0, nwaking_threads = 0;
+ static pthread_mutex_t thread_lock;
+ static pthread_cond_t thread_parent, thread_worker;
+@@ -79,7 +85,11 @@ static void *waking_workerfn(void *arg)
+
+ gettimeofday(&start, NULL);
+
+- waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
++ if (!futex2)
++ waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
++ else
++ waker->nwoken = futex2_wake(&futex, nwakes, futex_flag);
+
- /**
- * futex_wait_setup() - Prepare to wait on a futex
- * @uaddr: the futex userspace address
-@@ -2654,7 +2687,6 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
- static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- struct futex_q *q, struct futex_hash_bucket **hb)
- {
-- u32 uval;
- int ret;
-
- /*
-@@ -2675,38 +2707,19 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- * absorb a wakeup if *uaddr does not match the desired values
- * while the syscall executes.
- */
--retry:
-- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
-- if (unlikely(ret != 0))
-- return ret;
--
--retry_private:
-- *hb = queue_lock(q);
-+ do {
-+ ret = get_futex_key(uaddr, flags & FLAGS_SHARED,
-+ &q->key, FUTEX_READ);
-+ if (unlikely(ret != 0))
-+ return ret;
+ if (waker->nwoken != nwakes)
+ warnx("couldn't wakeup all tasks (%d/%d)",
+ waker->nwoken, nwakes);
+@@ -130,8 +140,13 @@ static void *blocked_workerfn(void *arg __maybe_unused)
+ pthread_mutex_unlock(&thread_lock);
-- ret = get_futex_value_locked(&uval, uaddr);
-+ ret = __futex_wait_setup(uaddr, val, flags, q, hb);
+ while (1) { /* handle spurious wakeups */
+- if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
+- break;
++ if (!futex2) {
++ if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
++ break;
++ } else {
++ if (futex2_wait(&futex, 0, futex_flag, NULL) != EINTR)
++ break;
++ }
+ }
-- if (ret) {
-- queue_unlock(*hb);
--
-- ret = get_user(uval, uaddr);
-+ /* Drop key reference if retry or error. */
- if (ret)
-- goto out;
-+ put_futex_key(&q->key);
-+ } while (ret > 0);
-
-- if (!(flags & FLAGS_SHARED))
-- goto retry_private;
--
-- put_futex_key(&q->key);
-- goto retry;
-- }
--
-- if (uval != val) {
-- queue_unlock(*hb);
-- ret = -EWOULDBLOCK;
-- }
--
--out:
-- if (ret)
-- put_futex_key(&q->key);
- return ret;
+ pthread_exit(NULL);
+@@ -218,7 +233,7 @@ static void toggle_done(int sig __maybe_unused,
+ done = true;
}
---
-2.28.0
-
-From 539862895e53b9a774f3a2271d1e7db57879d0d7 Mon Sep 17 00:00:00 2001
-From: Gabriel Krisman Bertazi <krisman@collabora.com>
-Date: Mon, 8 Jul 2019 09:44:09 -0400
-Subject: [PATCH 11/13] futex: Implement FUTEX_WAIT_MULTIPLE
-
-This is a new futex operation to allow a thread to wait on several
-futexes at the same time, and wake up on any of them. In a sense, it
-implements one of the features that was supported by pooling on the old
-FUTEX_FD interface.
-
-My use case for this feature lies in Wine, where we want to implement a
-similar function available in Windows, mainly for event handling. The
-wine folks have an implementation of the userspace side using eventfd,
-but it suffers from bad performance, as shown in the measurements below.
-
-Technically, the old FUTEX_WAIT implementation can be easily
-reimplemented using do_futex_wait_multiple, with a count one, and I have
-a patch demonstrating how it works. I'm not proposing it, since futex
-is such a tricky code, that I'd be more confortable to have
-FUTEX_WAIT_MULTIPLE running upstream for a couple development cycles,
-before considering modifying FUTEX_WAIT.
-
-This was tested using three mechanisms:
-
-1) By reimplementing FUTEX_WAIT in terms of FUTEX_WAIT_MULTIPLE and
-running tools/testing/selftests/futex and a full linux distro on top of
-this kernel.
-
-2) By an example code that exercises the FUTEX_WAIT_MULTIPLE path on a
-multi thread, event handling setup.
-
-3) By running the Wine fsync implementation and executing multi-threaded
-applications, in particular modern games on top of the implementation.
-
-Signed-off-by: Zebediah Figura <z.figura12@gmail.com>
-Signed-off-by: Steven Noonan <steven@valvesoftware.com>
-Signed-off-by: Pierre-Loup A. Griffais <pgriffais@valvesoftware.com>
-Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
----
- include/uapi/linux/futex.h | 7 ++
- kernel/futex.c | 159 ++++++++++++++++++++++++++++++++++++-
- 2 files changed, 162 insertions(+), 4 deletions(-)
-
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index 35a5bf1cd41b..aefb0b83b784 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -21,6 +21,7 @@
- #define FUTEX_WAKE_BITSET 10
- #define FUTEX_WAIT_REQUEUE_PI 11
- #define FUTEX_CMP_REQUEUE_PI 12
-+#define FUTEX_WAIT_MULTIPLE 13
-
- #define FUTEX_PRIVATE_FLAG 128
- #define FUTEX_CLOCK_REALTIME 256
-@@ -190,4 +191,10 @@ struct robust_list_head {
- (((op & 0xf) << 28) | ((cmp & 0xf) << 24) \
- | ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
-
-+struct futex_wait_block {
-+ __u32 __user *uaddr;
-+ __u32 val;
-+ __u32 bitset;
-+};
-+
- #endif /* _UAPI_LINUX_FUTEX_H */
-diff --git a/kernel/futex.c b/kernel/futex.c
-index f05349def492..775f780a96c4 100644
---- a/kernel/futex.c
-+++ b/kernel/futex.c
-@@ -166,6 +166,7 @@ static int __read_mostly futex_cmpxchg_enabled;
- #endif
- #define FLAGS_CLOCKRT 0x02
- #define FLAGS_HAS_TIMEOUT 0x04
-+#define FLAGS_WAKE_MULTIPLE 0x08
-
- /*
- * Priority Inheritance state:
-@@ -2723,6 +2724,148 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
+-int bench_futex_wake_parallel(int argc, const char **argv)
++static int bench_futex_wake_parallel_common(int argc, const char **argv)
+ {
+ int ret = 0;
+ unsigned int i, j;
+@@ -262,7 +277,9 @@ int bench_futex_wake_parallel(int argc, const char **argv)
+ if (!blocked_worker)
+ err(EXIT_FAILURE, "calloc");
+
+- if (!fshared)
++ if (futex2)
++ futex_flag = FUTEX_32 | (fshared * FUTEX_SHARED_FLAG);
++ else if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
+ printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
+@@ -322,4 +339,16 @@ int bench_futex_wake_parallel(int argc, const char **argv)
+ free(blocked_worker);
return ret;
}
-
-+static int do_futex_wait_multiple(struct futex_wait_block *wb,
-+ u32 count, unsigned int flags,
-+ ktime_t *abs_time)
-+{
-+
-+ struct hrtimer_sleeper timeout, *to;
-+ struct futex_hash_bucket *hb;
-+ struct futex_q *qs = NULL;
-+ int ret;
-+ int i;
-+
-+ qs = kcalloc(count, sizeof(struct futex_q), GFP_KERNEL);
-+ if (!qs)
-+ return -ENOMEM;
-+
-+ to = futex_setup_timer(abs_time, &timeout, flags,
-+ current->timer_slack_ns);
-+ retry:
-+ for (i = 0; i < count; i++) {
-+ qs[i].key = FUTEX_KEY_INIT;
-+ qs[i].bitset = wb[i].bitset;
-+
-+ ret = get_futex_key(wb[i].uaddr, flags & FLAGS_SHARED,
-+ &qs[i].key, FUTEX_READ);
-+ if (unlikely(ret != 0)) {
-+ for (--i; i >= 0; i--)
-+ put_futex_key(&qs[i].key);
-+ goto out;
-+ }
-+ }
-+
-+ set_current_state(TASK_INTERRUPTIBLE);
-+
-+ for (i = 0; i < count; i++) {
-+ ret = __futex_wait_setup(wb[i].uaddr, wb[i].val,
-+ flags, &qs[i], &hb);
-+ if (ret) {
-+ /* Drop the failed key directly. keys 0..(i-1)
-+ * will be put by unqueue_me. */
-+ put_futex_key(&qs[i].key);
-+
-+ /* Undo the partial work we did. */
-+ for (--i; i >= 0; i--)
-+ unqueue_me(&qs[i]);
+
-+ __set_current_state(TASK_RUNNING);
-+ if (ret > 0)
-+ goto retry;
-+ goto out;
-+ }
-+
-+ /* We can't hold to the bucket lock when dealing with
-+ * the next futex. Queue ourselves now so we can unlock
-+ * it before moving on. */
-+ queue_me(&qs[i], hb);
-+ }
-+
-+ if (to)
-+ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
-+
-+ /* There is no easy to way to check if we are wake already on
-+ * multiple futexes without waking through each one of them. So
-+ * just sleep and let the scheduler handle it.
-+ */
-+ if (!to || to->task)
-+ freezable_schedule();
-+
-+ __set_current_state(TASK_RUNNING);
-+
-+ ret = -ETIMEDOUT;
-+ /* If we were woken (and unqueued), we succeeded. */
-+ for (i = 0; i < count; i++)
-+ if (!unqueue_me(&qs[i]))
-+ ret = i;
-+
-+ /* Succeed wakeup */
-+ if (ret >= 0)
-+ goto out;
-+
-+ /* Woken by triggered timeout */
-+ if (to && !to->task)
-+ goto out;
-+
-+ /*
-+ * We expect signal_pending(current), but we might be the
-+ * victim of a spurious wakeup as well.
-+ */
-+ if (!signal_pending(current))
-+ goto retry;
-+
-+ ret = -ERESTARTSYS;
-+ if (!abs_time)
-+ goto out;
-+
-+ ret = -ERESTART_RESTARTBLOCK;
-+ out:
-+ if (to) {
-+ hrtimer_cancel(&to->timer);
-+ destroy_hrtimer_on_stack(&to->timer);
-+ }
-+
-+ kfree(qs);
-+ return ret;
++int bench_futex_wake_parallel(int argc, const char **argv)
++{
++ return bench_futex_wake_parallel_common(argc, argv);
+}
+
-+static int futex_wait_multiple(u32 __user *uaddr, unsigned int flags,
-+ u32 count, ktime_t *abs_time)
++int bench_futex2_wake_parallel(int argc, const char **argv)
+{
-+ struct futex_wait_block *wb;
-+ struct restart_block *restart;
-+ int ret;
-+
-+ if (!count)
-+ return -EINVAL;
-+
-+ wb = kcalloc(count, sizeof(struct futex_wait_block), GFP_KERNEL);
-+ if (!wb)
-+ return -ENOMEM;
-+
-+ if (copy_from_user(wb, uaddr,
-+ count * sizeof(struct futex_wait_block))) {
-+ ret = -EFAULT;
-+ goto out;
-+ }
-+
-+ ret = do_futex_wait_multiple(wb, count, flags, abs_time);
-+
-+ if (ret == -ERESTART_RESTARTBLOCK) {
-+ restart = &current->restart_block;
-+ restart->fn = futex_wait_restart;
-+ restart->futex.uaddr = uaddr;
-+ restart->futex.val = count;
-+ restart->futex.time = *abs_time;
-+ restart->futex.flags = (flags | FLAGS_HAS_TIMEOUT |
-+ FLAGS_WAKE_MULTIPLE);
-+ }
-+
-+out:
-+ kfree(wb);
-+ return ret;
++ futex2 = true;
++ return bench_futex_wake_parallel_common(argc, argv);
+}
+
- static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
- ktime_t *abs_time, u32 bitset)
- {
-@@ -2800,6 +2943,10 @@ static long futex_wait_restart(struct restart_block *restart)
- }
- restart->fn = do_no_restart_syscall;
+ #endif /* HAVE_PTHREAD_BARRIER */
+diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
+index 2dfcef3e3..b98b84e7b 100644
+--- a/tools/perf/bench/futex-wake.c
++++ b/tools/perf/bench/futex-wake.c
+@@ -46,6 +46,9 @@ static struct stats waketime_stats, wakeup_stats;
+ static unsigned int threads_starting, nthreads = 0;
+ static int futex_flag = 0;
-+ if (restart->futex.flags & FLAGS_WAKE_MULTIPLE)
-+ return (long)futex_wait_multiple(uaddr, restart->futex.flags,
-+ restart->futex.val, tp);
++/* Should we use futex2 API? */
++static bool futex2 = false;
+
- return (long)futex_wait(uaddr, restart->futex.flags,
- restart->futex.val, tp, restart->futex.bitset);
- }
-@@ -3843,6 +3990,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
- uaddr2);
- case FUTEX_CMP_REQUEUE_PI:
- return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
-+ case FUTEX_WAIT_MULTIPLE:
-+ return futex_wait_multiple(uaddr, flags, val, timeout);
+ static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
+@@ -69,8 +72,13 @@ static void *workerfn(void *arg __maybe_unused)
+ pthread_mutex_unlock(&thread_lock);
+
+ while (1) {
+- if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
+- break;
++ if (!futex2) {
++ if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
++ break;
++ } else {
++ if (futex2_wait(&futex1, 0, futex_flag, NULL) != EINTR)
++ break;
++ }
}
- return -ENOSYS;
+
+ pthread_exit(NULL);
+@@ -118,7 +126,7 @@ static void toggle_done(int sig __maybe_unused,
+ done = true;
}
-@@ -3859,7 +4008,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
-
- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- cmd == FUTEX_WAIT_BITSET ||
-- cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+ cmd == FUTEX_WAIT_REQUEUE_PI ||
-+ cmd == FUTEX_WAIT_MULTIPLE)) {
- if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
- return -EFAULT;
- if (get_timespec64(&ts, utime))
-@@ -3868,7 +4018,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
- return -EINVAL;
- t = timespec64_to_ktime(ts);
-- if (cmd == FUTEX_WAIT)
-+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- t = ktime_add_safe(ktime_get(), t);
- else if (!(op & FUTEX_CLOCK_REALTIME))
- t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
-@@ -4055,14 +4205,15 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
-
- if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- cmd == FUTEX_WAIT_BITSET ||
-- cmd == FUTEX_WAIT_REQUEUE_PI)) {
-+ cmd == FUTEX_WAIT_REQUEUE_PI ||
-+ cmd == FUTEX_WAIT_MULTIPLE)) {
- if (get_old_timespec32(&ts, utime))
- return -EFAULT;
- if (!timespec64_valid(&ts))
- return -EINVAL;
+-int bench_futex_wake(int argc, const char **argv)
++static int bench_futex_wake_common(int argc, const char **argv)
+ {
+ int ret = 0;
+ unsigned int i, j;
+@@ -148,7 +156,9 @@ int bench_futex_wake(int argc, const char **argv)
+ if (!worker)
+ err(EXIT_FAILURE, "calloc");
+
+- if (!fshared)
++ if (futex2)
++ futex_flag = FUTEX_32 | (fshared * FUTEX_SHARED_FLAG);
++ else if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
+ printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), "
+@@ -181,8 +191,13 @@ int bench_futex_wake(int argc, const char **argv)
+ /* Ok, all threads are patiently blocked, start waking folks up */
+ gettimeofday(&start, NULL);
+ while (nwoken != nthreads)
+- nwoken += futex_wake(&futex1, nwakes, futex_flag);
++ if (!futex2) {
++ nwoken += futex_wake(&futex1, nwakes, futex_flag);
++ } else {
++ nwoken += futex2_wake(&futex1, nwakes, futex_flag);
++ }
+ gettimeofday(&end, NULL);
++
+ timersub(&end, &start, &runtime);
- t = timespec64_to_ktime(ts);
-- if (cmd == FUTEX_WAIT)
-+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
- t = ktime_add_safe(ktime_get(), t);
- else if (!(op & FUTEX_CLOCK_REALTIME))
- t = timens_ktime_to_host(CLOCK_MONOTONIC, t);
---
-2.28.0
-
-From f56b85af005d46e9ef920a6728e61f7c47cf561e Mon Sep 17 00:00:00 2001
-From: Gabriel Krisman Bertazi <krisman@collabora.com>
-Date: Mon, 2 Nov 2020 18:50:26 -0500
-Subject: [PATCH 12/13] futex: Change WAIT_MULTIPLE opcode to 31
-
-Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
----
- include/uapi/linux/futex.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
-index aefb0b83b784..fe2b67ac0c5e 100644
---- a/include/uapi/linux/futex.h
-+++ b/include/uapi/linux/futex.h
-@@ -21,7 +21,7 @@
- #define FUTEX_WAKE_BITSET 10
- #define FUTEX_WAIT_REQUEUE_PI 11
- #define FUTEX_CMP_REQUEUE_PI 12
--#define FUTEX_WAIT_MULTIPLE 13
-+#define FUTEX_WAIT_MULTIPLE 31
-
- #define FUTEX_PRIVATE_FLAG 128
- #define FUTEX_CLOCK_REALTIME 256
---
-2.28.0
-
-From 022e2f888a50fb8d062e26bc385abf02c0be84a3 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
-Date: Mon, 16 Nov 2020 21:22:21 -0300
-Subject: [PATCH 13/13] futex2: Add sysfs entry for syscall numbers
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Signed-off-by: André Almeida <andrealmeid@collabora.com>
----
- kernel/futex2.c | 42 ++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 42 insertions(+)
-
-diff --git a/kernel/futex2.c b/kernel/futex2.c
-index ae743ddf223e..4bdff8bfc78d 100644
---- a/kernel/futex2.c
-+++ b/kernel/futex2.c
-@@ -742,6 +742,48 @@ SYSCALL_DEFINE3(futex_wake, void __user *, uaddr, unsigned int, nr_wake,
+ update_stats(&wakeup_stats, nwoken);
+@@ -212,3 +227,14 @@ int bench_futex_wake(int argc, const char **argv)
+ free(worker);
return ret;
}
-
-+static ssize_t wait_show(struct kobject *kobj, struct kobj_attribute *attr,
-+ char *buf)
-+{
-+ return sprintf(buf, "%u\n", __NR_futex_wait);
+
++int bench_futex_wake(int argc, const char **argv)
++{
++ return bench_futex_wake_common(argc, argv);
+}
-+static struct kobj_attribute futex2_wait_attr = __ATTR_RO(wait);
+
-+static ssize_t wake_show(struct kobject *kobj, struct kobj_attribute *attr,
-+ char *buf)
++int bench_futex2_wake(int argc, const char **argv)
+{
-+ return sprintf(buf, "%u\n", __NR_futex_wake);
-+
++ futex2 = true;
++ return bench_futex_wake_common(argc, argv);
+}
-+static struct kobj_attribute futex2_wake_attr = __ATTR_RO(wake);
+diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
+index 31b53cc7d..5111799b5 100644
+--- a/tools/perf/bench/futex.h
++++ b/tools/perf/bench/futex.h
+@@ -86,4 +86,21 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak
+ return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+ }
+
-+static ssize_t waitv_show(struct kobject *kobj, struct kobj_attribute *attr,
-+ char *buf)
++/*
++ * wait for uaddr if (*uaddr == val)
++ */
++static inline int futex2_wait(volatile void *uaddr, unsigned long val,
++ unsigned long flags, struct timespec *timo)
+{
-+ return sprintf(buf, "%u\n", __NR_futex_waitv);
-+
++ return syscall(__NR_futex_wait, uaddr, val, flags, timo);
+}
-+static struct kobj_attribute futex2_waitv_attr = __ATTR_RO(waitv);
-+
-+static struct attribute *futex2_sysfs_attrs[] = {
-+ &futex2_wait_attr.attr,
-+ &futex2_wake_attr.attr,
-+ &futex2_waitv_attr.attr,
-+ NULL,
-+};
-+
-+static const struct attribute_group futex2_sysfs_attr_group = {
-+ .attrs = futex2_sysfs_attrs,
-+ .name = "futex2",
-+};
+
-+static int __init futex2_sysfs_init(void)
++/*
++ * wake nr futexes waiting for uaddr
++ */
++static inline int futex2_wake(volatile void *uaddr, unsigned int nr, unsigned long flags)
+{
-+ return sysfs_create_group(kernel_kobj, &futex2_sysfs_attr_group);
++ return syscall(__NR_futex_wake, uaddr, nr, flags);
+}
-+subsys_initcall(futex2_sysfs_init);
+ #endif /* _FUTEX_H */
+diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
+index 62a7b7420..200ecacad 100644
+--- a/tools/perf/builtin-bench.c
++++ b/tools/perf/builtin-bench.c
+@@ -12,10 +12,11 @@
+ *
+ * sched ... scheduler and IPC performance
+ * syscall ... System call performance
+- * mem ... memory access performance
+- * numa ... NUMA scheduling and MM performance
+- * futex ... Futex performance
+- * epoll ... Event poll performance
++ * mem ... memory access performance
++ * numa ... NUMA scheduling and MM performance
++ * futex ... Futex performance
++ * futex2 ... Futex2 performance
++ * epoll ... Event poll performance
+ */
+ #include <subcmd/parse-options.h>
+ #include "builtin.h"
+@@ -75,6 +76,13 @@ static struct bench futex_benchmarks[] = {
+ { NULL, NULL, NULL }
+ };
+
++static struct bench futex2_benchmarks[] = {
++ { "hash", "Benchmark for futex2 hash table", bench_futex2_hash },
++ { "wake", "Benchmark for futex2 wake calls", bench_futex2_wake },
++ { "wake-parallel", "Benchmark for parallel futex2 wake calls", bench_futex2_wake_parallel },
++ { NULL, NULL, NULL }
++};
+
- static int __init futex2_init(void)
- {
- int i;
+ #ifdef HAVE_EVENTFD_SUPPORT
+ static struct bench epoll_benchmarks[] = {
+ { "wait", "Benchmark epoll concurrent epoll_waits", bench_epoll_wait },
+@@ -105,6 +113,7 @@ static struct collection collections[] = {
+ { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
+ #endif
+ {"futex", "Futex stressing benchmarks", futex_benchmarks },
++ {"futex2", "Futex2 stressing benchmarks", futex2_benchmarks },
+ #ifdef HAVE_EVENTFD_SUPPORT
+ {"epoll", "Epoll stressing benchmarks", epoll_benchmarks },
+ #endif
--
-2.28.0
+2.29.2