From 2323591ec35bad9ae5469f48a5bb697254f97459 Mon Sep 17 00:00:00 2001 From: Sentry Date: Fri, 6 Nov 2020 18:36:05 +0100 Subject: kernel 5.8.18 --- SOURCES/0003-glitched-base.patch | 447 ------------------- SOURCES/0007-v5.8-fsync.patch | 908 --------------------------------------- SOURCES/fsync.patch | 908 +++++++++++++++++++++++++++++++++++++++ SOURCES/zen.patch | 308 +++++++++++++ 4 files changed, 1216 insertions(+), 1355 deletions(-) delete mode 100644 SOURCES/0003-glitched-base.patch delete mode 100644 SOURCES/0007-v5.8-fsync.patch create mode 100644 SOURCES/fsync.patch create mode 100644 SOURCES/zen.patch (limited to 'SOURCES') diff --git a/SOURCES/0003-glitched-base.patch b/SOURCES/0003-glitched-base.patch deleted file mode 100644 index 60e1d44..0000000 --- a/SOURCES/0003-glitched-base.patch +++ /dev/null @@ -1,447 +0,0 @@ -From 43e519023ea4a79fc6a771bb9ebbb0cfe5fa39bc Mon Sep 17 00:00:00 2001 -From: Sentry -Date: Sun, 12 Jul 2020 20:43:50 +0200 -Subject: [PATCH] glitched base - ---- - .../admin-guide/kernel-parameters.txt | 3 ++ - block/elevator.c | 6 ++-- - drivers/cpufreq/intel_pstate.c | 2 ++ - drivers/infiniband/core/addr.c | 1 + - drivers/tty/Kconfig | 13 ++++++++ - fs/dcache.c | 2 +- - include/linux/blkdev.h | 7 +++- - include/linux/compiler_types.h | 4 +++ - include/linux/mm.h | 5 ++- - include/uapi/linux/vt.h | 15 ++++++++- - init/Kconfig | 33 ++++++++++++++++++- - kernel/sched/core.c | 6 ++-- - kernel/sched/fair.c | 25 ++++++++++++++ - mm/huge_memory.c | 4 +++ - mm/page-writeback.c | 8 +++++ - mm/page_alloc.c | 2 +- - net/ipv4/Kconfig | 4 +++ - net/sched/Kconfig | 4 +++ - scripts/mkcompile_h | 4 +-- - scripts/setlocalversion | 2 +- - 20 files changed, 133 insertions(+), 17 deletions(-) - -diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index 7bc83f3d9..d31f0323c 100644 ---- a/Documentation/admin-guide/kernel-parameters.txt -+++ b/Documentation/admin-guide/kernel-parameters.txt -@@ -1826,6 +1826,9 @@ - disable - Do not enable intel_pstate as the default - scaling driver for the supported processors -+ enable -+ Enable intel_pstate in-case "disable" was passed -+ previously in the kernel boot parameters - passive - Use intel_pstate as a scaling driver, but configure it - to work with generic cpufreq governors (instead of -diff --git a/block/elevator.c b/block/elevator.c -index 4eab3d70e..79669aa39 100644 ---- a/block/elevator.c -+++ b/block/elevator.c -@@ -623,15 +623,15 @@ static inline bool elv_support_iosched(struct request_queue *q) - } - - /* -- * For single queue devices, default to using mq-deadline. If we have multiple -- * queues or mq-deadline is not available, default to "none". -+ * For single queue devices, default to using bfq. If we have multiple -+ * queues or bfq is not available, default to "none". - */ - static struct elevator_type *elevator_get_default(struct request_queue *q) - { - if (q->nr_hw_queues != 1) - return NULL; - -- return elevator_get(q, "mq-deadline", false); -+ return elevator_get(q, "bfq", false); - } - - /* -diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c -index 4d3429b20..8bf9e0adf 100644 ---- a/drivers/cpufreq/intel_pstate.c -+++ b/drivers/cpufreq/intel_pstate.c -@@ -2824,6 +2824,8 @@ static int __init intel_pstate_setup(char *str) - pr_info("HWP disabled\n"); - no_hwp = 1; - } -+ if (!strcmp(str, "enable")) -+ no_load = 0; - if (!strcmp(str, "force")) - force_load = 1; - if (!strcmp(str, "hwp_only")) -diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c -index 1753a9801..184b30282 100644 ---- a/drivers/infiniband/core/addr.c -+++ b/drivers/infiniband/core/addr.c -@@ -816,6 +816,7 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, - union { - struct sockaddr_in _sockaddr_in; - struct sockaddr_in6 _sockaddr_in6; -+ struct sockaddr_ib _sockaddr_ib; - } sgid_addr, dgid_addr; - int ret; - -diff --git a/fs/dcache.c b/fs/dcache.c -index b280e07e1..74e90f940 100644 ---- a/fs/dcache.c -+++ b/fs/dcache.c -@@ -71,7 +71,7 @@ - * If no ancestor relationship: - * arbitrary, since it's serialized on rename_lock - */ --int sysctl_vfs_cache_pressure __read_mostly = 100; -+int sysctl_vfs_cache_pressure __read_mostly = 50; - EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); - - __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); -diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h -index 32868fbed..f028c8070 100644 ---- a/include/linux/blkdev.h -+++ b/include/linux/blkdev.h -@@ -45,7 +45,11 @@ struct blk_queue_stats; - struct blk_stat_callback; - - #define BLKDEV_MIN_RQ 4 -+#ifdef CONFIG_ZENIFY -+#define BLKDEV_MAX_RQ 512 -+#else - #define BLKDEV_MAX_RQ 128 /* Default maximum */ -+#endif - - /* Must be consistent with blk_mq_poll_stats_bkt() */ - #define BLK_MQ_POLL_STATS_BKTS 16 -@@ -614,7 +618,8 @@ struct request_queue { - #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ - - #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ -- (1 << QUEUE_FLAG_SAME_COMP)) -+ (1 << QUEUE_FLAG_SAME_COMP) | \ -+ (1 << QUEUE_FLAG_SAME_FORCE)) - - void blk_queue_flag_set(unsigned int flag, struct request_queue *q); - void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); -diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h -index e970f97a7..f3aa9e6c4 100644 ---- a/include/linux/compiler_types.h -+++ b/include/linux/compiler_types.h -@@ -207,6 +207,10 @@ struct ftrace_likely_data { - # define __no_fgcse - #endif - -+#ifndef asm_volatile_goto -+#define asm_volatile_goto(x...) asm goto(x) -+#endif -+ - /* Are two types/vars the same type (ignoring qualifiers)? */ - #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) - -diff --git a/include/linux/mm.h b/include/linux/mm.h -index f3fe73718..8fb564dbb 100644 ---- a/include/linux/mm.h -+++ b/include/linux/mm.h -@@ -189,8 +189,7 @@ static inline void __mm_zero_struct_page(struct page *page) - * not a hard limit any more. Although some userspace tools can be surprised by - * that. - */ --#define MAPCOUNT_ELF_CORE_MARGIN (5) --#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) -+#define DEFAULT_MAX_MAP_COUNT (262144) - - extern int sysctl_max_map_count; - -@@ -2613,7 +2612,7 @@ int __must_check write_one_page(struct page *page); - void task_dirty_inc(struct task_struct *tsk); - - /* readahead.c */ --#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) -+#define VM_READAHEAD_PAGES (SZ_2M / PAGE_SIZE) - - int force_page_cache_readahead(struct address_space *mapping, struct file *filp, - pgoff_t offset, unsigned long nr_to_read); -diff --git a/init/Kconfig b/init/Kconfig -index 74a5ac656..bc63ba750 100644 ---- a/init/Kconfig -+++ b/init/Kconfig -@@ -61,6 +61,38 @@ config THREAD_INFO_IN_TASK - - menu "General setup" - -+config ZENIFY -+ bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience" -+ default y -+ help -+ Tunes the kernel for responsiveness at the cost of throughput and power usage. -+ -+ --- Virtual Memory Subsystem --------------------------- -+ -+ Mem dirty before bg writeback..: 10 % -> 20 % -+ Mem dirty before sync writeback: 20 % -> 50 % -+ -+ --- Block Layer ---------------------------------------- -+ -+ Queue depth...............: 128 -> 512 -+ Default MQ scheduler......: mq-deadline -> bfq -+ -+ --- CFS CPU Scheduler ---------------------------------- -+ -+ Scheduling latency.............: 6 -> 3 ms -+ Minimal granularity............: 0.75 -> 0.3 ms -+ Wakeup granularity.............: 1 -> 0.5 ms -+ CPU migration cost.............: 0.5 -> 0.25 ms -+ Bandwidth slice size...........: 5 -> 3 ms -+ Ondemand fine upscaling limit..: 95 % -> 85 % -+ -+ --- MuQSS CPU Scheduler -------------------------------- -+ -+ Scheduling interval............: 6 -> 3 ms -+ ISO task max realtime use......: 70 % -> 25 % -+ Ondemand coarse upscaling limit: 80 % -> 45 % -+ Ondemand fine upscaling limit..: 95 % -> 45 % -+ - config BROKEN - bool - -@@ -1240,7 +1272,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE - - config CC_OPTIMIZE_FOR_PERFORMANCE_O3 - bool "Optimize more for performance (-O3)" -- depends on ARC - help - Choosing this option will pass "-O3" to your compiler to optimize - the kernel yet more for performance. -diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 9a2fbf98f..630c93d66 100644 ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -57,7 +57,7 @@ const_debug unsigned int sysctl_sched_features = - * Number of tasks to iterate in a single balance run. - * Limited because this is done with IRQs disabled. - */ --const_debug unsigned int sysctl_sched_nr_migrate = 32; -+const_debug unsigned int sysctl_sched_nr_migrate = 128; - - /* - * period over which we measure -rt task CPU usage in us. -@@ -69,9 +69,9 @@ __read_mostly int scheduler_running; - - /* - * part of the period that we allow rt tasks to run in us. -- * default: 0.95s -+ * XanMod default: 0.98s - */ --int sysctl_sched_rt_runtime = 950000; -+int sysctl_sched_rt_runtime = 980000; - - /* - * __task_rq_lock - lock the rq @p resides on. -diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index da3e5b547..0a8dc0e64 100644 ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -37,8 +37,13 @@ - * - * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) - */ -+#ifdef CONFIG_ZENIFY -+unsigned int sysctl_sched_latency = 3000000ULL; -+static unsigned int normalized_sysctl_sched_latency = 3000000ULL; -+#else - unsigned int sysctl_sched_latency = 6000000ULL; - static unsigned int normalized_sysctl_sched_latency = 6000000ULL; -+#endif - - /* - * The initial- and re-scaling of tunables is configurable -@@ -58,13 +63,22 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L - * - * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) - */ -+#ifdef CONFIG_ZENIFY -+unsigned int sysctl_sched_min_granularity = 300000ULL; -+static unsigned int normalized_sysctl_sched_min_granularity = 300000ULL; -+#else - unsigned int sysctl_sched_min_granularity = 750000ULL; - static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; -+#endif - - /* - * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity - */ -+#ifdef CONFIG_ZENIFY -+static unsigned int sched_nr_latency = 10; -+#else - static unsigned int sched_nr_latency = 8; -+#endif - - /* - * After fork, child runs first. If set to 0 (default) then -@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly; - * - * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) - */ -+#ifdef CONFIG_ZENIFY -+unsigned int sysctl_sched_wakeup_granularity = 500000UL; -+static unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL; -+ -+const_debug unsigned int sysctl_sched_migration_cost = 50000UL; -+#else - unsigned int sysctl_sched_wakeup_granularity = 1000000UL; - static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; - - const_debug unsigned int sysctl_sched_migration_cost = 500000UL; -+#endif - - int sched_thermal_decay_shift; - static int __init setup_sched_thermal_decay_shift(char *str) -@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu) - * - * (default: 5 msec, units: microseconds) - */ -+#ifdef CONFIG_ZENIFY -+unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; -+#else - unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; - #endif -+#endif - - static inline void update_load_add(struct load_weight *lw, unsigned long inc) - { -diff --git a/mm/huge_memory.c b/mm/huge_memory.c -index 6ecd10451..8a3bdff2c 100644 ---- a/mm/huge_memory.c -+++ b/mm/huge_memory.c -@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly = - #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE - (1< -Date: Mon, 20 Apr 2020 14:09:11 +0200 -Subject: Import Fsync v3 patchset - Squashed from https://gitlab.collabora.com/tonyk/linux/-/commits/futex-proton-v3 - -diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h -index a89eb0accd5e2ee527be1e3e11b1117ff5bf94b4..580001e89c6caed57dd8b3cb491d65dce846caff 100644 ---- a/include/uapi/linux/futex.h -+++ b/include/uapi/linux/futex.h -@@ -21,6 +21,7 @@ - #define FUTEX_WAKE_BITSET 10 - #define FUTEX_WAIT_REQUEUE_PI 11 - #define FUTEX_CMP_REQUEUE_PI 12 -+#define FUTEX_WAIT_MULTIPLE 13 - - #define FUTEX_PRIVATE_FLAG 128 - #define FUTEX_CLOCK_REALTIME 256 -@@ -40,6 +41,8 @@ - FUTEX_PRIVATE_FLAG) - #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ - FUTEX_PRIVATE_FLAG) -+#define FUTEX_WAIT_MULTIPLE_PRIVATE (FUTEX_WAIT_MULTIPLE | \ -+ FUTEX_PRIVATE_FLAG) - - /* - * Support for robust futexes: the kernel cleans up held futexes at -@@ -150,4 +153,21 @@ struct robust_list_head { - (((op & 0xf) << 28) | ((cmp & 0xf) << 24) \ - | ((oparg & 0xfff) << 12) | (cmparg & 0xfff)) - -+/* -+ * Maximum number of multiple futexes to wait for -+ */ -+#define FUTEX_MULTIPLE_MAX_COUNT 128 -+ -+/** -+ * struct futex_wait_block - Block of futexes to be waited for -+ * @uaddr: User address of the futex -+ * @val: Futex value expected by userspace -+ * @bitset: Bitset for the optional bitmasked wakeup -+ */ -+struct futex_wait_block { -+ __u32 __user *uaddr; -+ __u32 val; -+ __u32 bitset; -+}; -+ - #endif /* _UAPI_LINUX_FUTEX_H */ -diff --git a/kernel/futex.c b/kernel/futex.c -index 0cf84c8664f207c574325b899ef2e57f01295a94..58cf9eb2b851b4858e29b5ef4114a29a92e676ba 100644 ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -215,6 +215,8 @@ struct futex_pi_state { - * @rt_waiter: rt_waiter storage for use with requeue_pi - * @requeue_pi_key: the requeue_pi target futex key - * @bitset: bitset for the optional bitmasked wakeup -+ * @uaddr: userspace address of futex -+ * @uval: expected futex's value - * - * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so - * we can wake only the relevant ones (hashed queues may be shared). -@@ -237,6 +239,8 @@ struct futex_q { - struct rt_mutex_waiter *rt_waiter; - union futex_key *requeue_pi_key; - u32 bitset; -+ u32 __user *uaddr; -+ u32 uval; - } __randomize_layout; - - static const struct futex_q futex_q_init = { -@@ -2420,6 +2424,29 @@ static int unqueue_me(struct futex_q *q) - return ret; - } - -+/** -+ * unqueue_multiple() - Remove several futexes from their futex_hash_bucket -+ * @q: The list of futexes to unqueue -+ * @count: Number of futexes in the list -+ * -+ * Helper to unqueue a list of futexes. This can't fail. -+ * -+ * Return: -+ * - >=0 - Index of the last futex that was awoken; -+ * - -1 - If no futex was awoken -+ */ -+static int unqueue_multiple(struct futex_q *q, int count) -+{ -+ int ret = -1; -+ int i; -+ -+ for (i = 0; i < count; i++) { -+ if (!unqueue_me(&q[i])) -+ ret = i; -+ } -+ return ret; -+} -+ - /* - * PI futexes can not be requeued and must remove themself from the - * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry -@@ -2783,6 +2810,211 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, - return ret; - } - -+/** -+ * futex_wait_multiple_setup() - Prepare to wait and enqueue multiple futexes -+ * @qs: The corresponding futex list -+ * @count: The size of the lists -+ * @flags: Futex flags (FLAGS_SHARED, etc.) -+ * @awaken: Index of the last awoken futex -+ * -+ * Prepare multiple futexes in a single step and enqueue them. This may fail if -+ * the futex list is invalid or if any futex was already awoken. On success the -+ * task is ready to interruptible sleep. -+ * -+ * Return: -+ * - 1 - One of the futexes was awaken by another thread -+ * - 0 - Success -+ * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL -+ */ -+static int futex_wait_multiple_setup(struct futex_q *qs, int count, -+ unsigned int flags, int *awaken) -+{ -+ struct futex_hash_bucket *hb; -+ int ret, i; -+ u32 uval; -+ -+ /* -+ * Enqueuing multiple futexes is tricky, because we need to -+ * enqueue each futex in the list before dealing with the next -+ * one to avoid deadlocking on the hash bucket. But, before -+ * enqueuing, we need to make sure that current->state is -+ * TASK_INTERRUPTIBLE, so we don't absorb any awake events, which -+ * cannot be done before the get_futex_key of the next key, -+ * because it calls get_user_pages, which can sleep. Thus, we -+ * fetch the list of futexes keys in two steps, by first pinning -+ * all the memory keys in the futex key, and only then we read -+ * each key and queue the corresponding futex. -+ */ -+retry: -+ for (i = 0; i < count; i++) { -+ qs[i].key = FUTEX_KEY_INIT; -+ ret = get_futex_key(qs[i].uaddr, flags & FLAGS_SHARED, -+ &qs[i].key, FUTEX_READ); -+ if (unlikely(ret)) { -+ for (--i; i >= 0; i--) -+ put_futex_key(&qs[i].key); -+ return ret; -+ } -+ } -+ -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ for (i = 0; i < count; i++) { -+ struct futex_q *q = &qs[i]; -+ -+ hb = queue_lock(q); -+ -+ ret = get_futex_value_locked(&uval, q->uaddr); -+ if (ret) { -+ /* -+ * We need to try to handle the fault, which -+ * cannot be done without sleep, so we need to -+ * undo all the work already done, to make sure -+ * we don't miss any wake ups. Therefore, clean -+ * up, handle the fault and retry from the -+ * beginning. -+ */ -+ queue_unlock(hb); -+ -+ /* -+ * Keys 0..(i-1) are implicitly put -+ * on unqueue_multiple. -+ */ -+ put_futex_key(&q->key); -+ -+ *awaken = unqueue_multiple(qs, i); -+ -+ __set_current_state(TASK_RUNNING); -+ -+ /* -+ * On a real fault, prioritize the error even if -+ * some other futex was awoken. Userspace gave -+ * us a bad address, -EFAULT them. -+ */ -+ ret = get_user(uval, q->uaddr); -+ if (ret) -+ return ret; -+ -+ /* -+ * Even if the page fault was handled, If -+ * something was already awaken, we can safely -+ * give up and succeed to give a hint for userspace to -+ * acquire the right futex faster. -+ */ -+ if (*awaken >= 0) -+ return 1; -+ -+ goto retry; -+ } -+ -+ if (uval != q->uval) { -+ queue_unlock(hb); -+ -+ put_futex_key(&qs[i].key); -+ -+ /* -+ * If something was already awaken, we can -+ * safely ignore the error and succeed. -+ */ -+ *awaken = unqueue_multiple(qs, i); -+ __set_current_state(TASK_RUNNING); -+ if (*awaken >= 0) -+ return 1; -+ -+ return -EWOULDBLOCK; -+ } -+ -+ /* -+ * The bucket lock can't be held while dealing with the -+ * next futex. Queue each futex at this moment so hb can -+ * be unlocked. -+ */ -+ queue_me(&qs[i], hb); -+ } -+ return 0; -+} -+ -+/** -+ * futex_wait_multiple() - Prepare to wait on and enqueue several futexes -+ * @qs: The list of futexes to wait on -+ * @op: Operation code from futex's syscall -+ * @count: The number of objects -+ * @abs_time: Timeout before giving up and returning to userspace -+ * -+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function -+ * sleeps on a group of futexes and returns on the first futex that -+ * triggered, or after the timeout has elapsed. -+ * -+ * Return: -+ * - >=0 - Hint to the futex that was awoken -+ * - <0 - On error -+ */ -+static int futex_wait_multiple(struct futex_q *qs, int op, -+ u32 count, ktime_t *abs_time) -+{ -+ struct hrtimer_sleeper timeout, *to; -+ int ret, flags = 0, hint = 0; -+ unsigned int i; -+ -+ if (!(op & FUTEX_PRIVATE_FLAG)) -+ flags |= FLAGS_SHARED; -+ -+ if (op & FUTEX_CLOCK_REALTIME) -+ flags |= FLAGS_CLOCKRT; -+ -+ to = futex_setup_timer(abs_time, &timeout, flags, 0); -+ while (1) { -+ ret = futex_wait_multiple_setup(qs, count, flags, &hint); -+ if (ret) { -+ if (ret > 0) { -+ /* A futex was awaken during setup */ -+ ret = hint; -+ } -+ break; -+ } -+ -+ if (to) -+ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); -+ -+ /* -+ * Avoid sleeping if another thread already tried to -+ * wake us. -+ */ -+ for (i = 0; i < count; i++) { -+ if (plist_node_empty(&qs[i].list)) -+ break; -+ } -+ -+ if (i == count && (!to || to->task)) -+ freezable_schedule(); -+ -+ ret = unqueue_multiple(qs, count); -+ -+ __set_current_state(TASK_RUNNING); -+ -+ if (ret >= 0) -+ break; -+ if (to && !to->task) { -+ ret = -ETIMEDOUT; -+ break; -+ } else if (signal_pending(current)) { -+ ret = -ERESTARTSYS; -+ break; -+ } -+ /* -+ * The final case is a spurious wakeup, for -+ * which just retry. -+ */ -+ } -+ -+ if (to) { -+ hrtimer_cancel(&to->timer); -+ destroy_hrtimer_on_stack(&to->timer); -+ } -+ -+ return ret; -+} -+ - static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, - ktime_t *abs_time, u32 bitset) - { -@@ -3907,6 +4139,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, - return -ENOSYS; - } - -+/** -+ * futex_read_wait_block - Read an array of futex_wait_block from userspace -+ * @uaddr: Userspace address of the block -+ * @count: Number of blocks to be read -+ * -+ * This function creates and allocate an array of futex_q (we zero it to -+ * initialize the fields) and then, for each futex_wait_block element from -+ * userspace, fill a futex_q element with proper values. -+ */ -+inline struct futex_q *futex_read_wait_block(u32 __user *uaddr, u32 count) -+{ -+ unsigned int i; -+ struct futex_q *qs; -+ struct futex_wait_block fwb; -+ struct futex_wait_block __user *entry = -+ (struct futex_wait_block __user *)uaddr; -+ -+ if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) -+ return ERR_PTR(-EINVAL); -+ -+ qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); -+ if (!qs) -+ return ERR_PTR(-ENOMEM); -+ -+ for (i = 0; i < count; i++) { -+ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { -+ kfree(qs); -+ return ERR_PTR(-EFAULT); -+ } -+ -+ qs[i].uaddr = fwb.uaddr; -+ qs[i].uval = fwb.val; -+ qs[i].bitset = fwb.bitset; -+ } -+ -+ return qs; -+} - - SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - struct __kernel_timespec __user *, utime, u32 __user *, uaddr2, -@@ -3919,7 +4188,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - - if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET || -- cmd == FUTEX_WAIT_REQUEUE_PI)) { -+ cmd == FUTEX_WAIT_REQUEUE_PI || -+ cmd == FUTEX_WAIT_MULTIPLE)) { - if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) - return -EFAULT; - if (get_timespec64(&ts, utime)) -@@ -3940,6 +4210,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) - val2 = (u32) (unsigned long) utime; - -+ if (cmd == FUTEX_WAIT_MULTIPLE) { -+ int ret; -+ struct futex_q *qs; -+ -+#ifdef CONFIG_X86_X32 -+ if (unlikely(in_x32_syscall())) -+ return -ENOSYS; -+#endif -+ qs = futex_read_wait_block(uaddr, val); -+ -+ if (IS_ERR(qs)) -+ return PTR_ERR(qs); -+ -+ ret = futex_wait_multiple(qs, op, val, tp); -+ kfree(qs); -+ -+ return ret; -+ } -+ - return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); - } - -@@ -4102,6 +4391,57 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, - #endif /* CONFIG_COMPAT */ - - #ifdef CONFIG_COMPAT_32BIT_TIME -+/** -+ * struct compat_futex_wait_block - Block of futexes to be waited for -+ * @uaddr: User address of the futex (compatible pointer) -+ * @val: Futex value expected by userspace -+ * @bitset: Bitset for the optional bitmasked wakeup -+ */ -+struct compat_futex_wait_block { -+ compat_uptr_t uaddr; -+ __u32 val; -+ __u32 bitset; -+}; -+ -+/** -+ * compat_futex_read_wait_block - Read an array of futex_wait_block from -+ * userspace -+ * @uaddr: Userspace address of the block -+ * @count: Number of blocks to be read -+ * -+ * This function does the same as futex_read_wait_block(), except that it -+ * converts the pointer to the futex from the compat version to the regular one. -+ */ -+inline struct futex_q *compat_futex_read_wait_block(u32 __user *uaddr, -+ u32 count) -+{ -+ unsigned int i; -+ struct futex_q *qs; -+ struct compat_futex_wait_block fwb; -+ struct compat_futex_wait_block __user *entry = -+ (struct compat_futex_wait_block __user *)uaddr; -+ -+ if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) -+ return ERR_PTR(-EINVAL); -+ -+ qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); -+ if (!qs) -+ return ERR_PTR(-ENOMEM); -+ -+ for (i = 0; i < count; i++) { -+ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { -+ kfree(qs); -+ return ERR_PTR(-EFAULT); -+ } -+ -+ qs[i].uaddr = compat_ptr(fwb.uaddr); -+ qs[i].uval = fwb.val; -+ qs[i].bitset = fwb.bitset; -+ } -+ -+ return qs; -+} -+ - SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, - struct old_timespec32 __user *, utime, u32 __user *, uaddr2, - u32, val3) -@@ -4113,7 +4453,8 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, - - if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET || -- cmd == FUTEX_WAIT_REQUEUE_PI)) { -+ cmd == FUTEX_WAIT_REQUEUE_PI || -+ cmd == FUTEX_WAIT_MULTIPLE)) { - if (get_old_timespec32(&ts, utime)) - return -EFAULT; - if (!timespec64_valid(&ts)) -@@ -4128,6 +4469,19 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, - cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) - val2 = (int) (unsigned long) utime; - -+ if (cmd == FUTEX_WAIT_MULTIPLE) { -+ int ret; -+ struct futex_q *qs = compat_futex_read_wait_block(uaddr, val); -+ -+ if (IS_ERR(qs)) -+ return PTR_ERR(qs); -+ -+ ret = futex_wait_multiple(qs, op, val, tp); -+ kfree(qs); -+ -+ return ret; -+ } -+ - return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); - } - #endif /* CONFIG_COMPAT_32BIT_TIME */ -diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c -index ee55e6d389a3f053194435342c4e471dc7cf8786..2a63e1c2cfb6407a5988233217cff2e52787bc66 100644 ---- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c -+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c -@@ -11,6 +11,7 @@ - * - * HISTORY - * 2009-Nov-6: Initial version by Darren Hart -+ * 2019-Dec-13: Add WAIT_MULTIPLE test by Krisman - * - *****************************************************************************/ - -@@ -41,6 +42,8 @@ int main(int argc, char *argv[]) - { - futex_t f1 = FUTEX_INITIALIZER; - struct timespec to; -+ time_t secs; -+ struct futex_wait_block fwb = {&f1, f1, 0}; - int res, ret = RET_PASS; - int c; - -@@ -65,7 +68,7 @@ int main(int argc, char *argv[]) - } - - ksft_print_header(); -- ksft_set_plan(1); -+ ksft_set_plan(2); - ksft_print_msg("%s: Block on a futex and wait for timeout\n", - basename(argv[0])); - ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); -@@ -79,8 +82,39 @@ int main(int argc, char *argv[]) - if (!res || errno != ETIMEDOUT) { - fail("futex_wait returned %d\n", ret < 0 ? errno : ret); - ret = RET_FAIL; -+ } else -+ ksft_test_result_pass("futex_wait timeout succeeds\n"); -+ -+ info("Calling futex_wait_multiple on f1: %u @ %p\n", f1, &f1); -+ -+ /* Setup absolute time */ -+ ret = clock_gettime(CLOCK_REALTIME, &to); -+ secs = (to.tv_nsec + timeout_ns) / 1000000000; -+ to.tv_nsec = ((int64_t)to.tv_nsec + timeout_ns) % 1000000000; -+ to.tv_sec += secs; -+ info("to.tv_sec = %ld\n", to.tv_sec); -+ info("to.tv_nsec = %ld\n", to.tv_nsec); -+ -+ res = futex_wait_multiple(&fwb, 1, &to, -+ FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME); -+ -+#ifdef __ILP32__ -+ if (res == -1 && errno == ENOSYS) { -+ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); -+ } else { -+ ksft_test_result_fail("futex_wait_multiple returned %d\n", -+ res < 0 ? errno : res); -+ ret = RET_FAIL; - } -+#else -+ if (!res || errno != ETIMEDOUT) { -+ ksft_test_result_fail("futex_wait_multiple returned %d\n", -+ res < 0 ? errno : res); -+ ret = RET_FAIL; -+ } else -+ ksft_test_result_pass("futex_wait_multiple timeout succeeds\n"); -+#endif /* __ILP32__ */ - -- print_result(TEST_NAME, ret); -+ ksft_print_cnts(); - return ret; - } -diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h -index ddbcfc9b7bac4aebb5bac2f249e26ecfd948aa84..bb103bef4557012ef9a389ca74c868e4476a8a31 100644 ---- a/tools/testing/selftests/futex/include/futextest.h -+++ b/tools/testing/selftests/futex/include/futextest.h -@@ -38,6 +38,14 @@ typedef volatile u_int32_t futex_t; - #ifndef FUTEX_CMP_REQUEUE_PI - #define FUTEX_CMP_REQUEUE_PI 12 - #endif -+#ifndef FUTEX_WAIT_MULTIPLE -+#define FUTEX_WAIT_MULTIPLE 13 -+struct futex_wait_block { -+ futex_t *uaddr; -+ futex_t val; -+ __u32 bitset; -+}; -+#endif - #ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE - #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ - FUTEX_PRIVATE_FLAG) -@@ -80,6 +88,20 @@ futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags) - return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); - } - -+/** -+ * futex_wait_multiple() - block on several futexes with optional timeout -+ * @fwb: wait block user space address -+ * @count: number of entities at fwb -+ * @timeout: absolute timeout -+ */ -+static inline int -+futex_wait_multiple(struct futex_wait_block *fwb, int count, -+ struct timespec *timeout, int opflags) -+{ -+ return futex(fwb, FUTEX_WAIT_MULTIPLE, count, timeout, NULL, 0, -+ opflags); -+} -+ - /** - * futex_wake() - wake one or more tasks blocked on uaddr - * @nr_wake: wake up to this many tasks -diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c -index 0ae390ff816449c88d0bb655a26eb014382c2b4f..bcbac042992d447e0bc9ef5fefe94e875de310f2 100644 ---- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c -+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c -@@ -12,6 +12,7 @@ - * - * HISTORY - * 2009-Nov-14: Initial version by Gowrishankar -+ * 2019-Dec-13: Add WAIT_MULTIPLE test by Krisman - * - *****************************************************************************/ - -@@ -40,6 +41,7 @@ int main(int argc, char *argv[]) - { - struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; - futex_t f1 = FUTEX_INITIALIZER; -+ struct futex_wait_block fwb = {&f1, f1+1, 0}; - int res, ret = RET_PASS; - int c; - -@@ -61,7 +63,7 @@ int main(int argc, char *argv[]) - } - - ksft_print_header(); -- ksft_set_plan(1); -+ ksft_set_plan(2); - ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", - basename(argv[0])); - -@@ -71,8 +73,30 @@ int main(int argc, char *argv[]) - fail("futex_wait returned: %d %s\n", - res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; -+ } else -+ ksft_test_result_pass("futex_wait wouldblock succeeds\n"); -+ -+ info("Calling futex_wait_multiple on f1: %u @ %p with val=%u\n", -+ f1, &f1, f1+1); -+ res = futex_wait_multiple(&fwb, 1, NULL, FUTEX_PRIVATE_FLAG); -+ -+#ifdef __ILP32__ -+ if (res != -1 || errno != ENOSYS) { -+ ksft_test_result_fail("futex_wait_multiple returned %d\n", -+ res < 0 ? errno : res); -+ ret = RET_FAIL; -+ } else { -+ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); -+ } -+#else -+ if (!res || errno != EWOULDBLOCK) { -+ ksft_test_result_fail("futex_wait_multiple returned %d\n", -+ res < 0 ? errno : res); -+ ret = RET_FAIL; - } -+ ksft_test_result_pass("futex_wait_multiple wouldblock succeeds\n"); -+#endif /* __ILP32__ */ - -- print_result(TEST_NAME, ret); -+ ksft_print_cnts(); - return ret; - } -diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore -index a09f570619023750f558c84004aff166b4337d72..4660128a545edb04a17cc6bd9760931c1386122f 100644 ---- a/tools/testing/selftests/futex/functional/.gitignore -+++ b/tools/testing/selftests/futex/functional/.gitignore -@@ -5,3 +5,4 @@ futex_wait_private_mapped_file - futex_wait_timeout - futex_wait_uninitialized_heap - futex_wait_wouldblock -+futex_wait_multiple -diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile -index 30996306cabcfe89a47977643e529b122893bb7e..75f9fface11fa3c90c1bdb9a49b3ea51291afd58 100644 ---- a/tools/testing/selftests/futex/functional/Makefile -+++ b/tools/testing/selftests/futex/functional/Makefile -@@ -14,7 +14,8 @@ TEST_GEN_FILES := \ - futex_requeue_pi_signal_restart \ - futex_requeue_pi_mismatched_ops \ - futex_wait_uninitialized_heap \ -- futex_wait_private_mapped_file -+ futex_wait_private_mapped_file \ -+ futex_wait_multiple - - TEST_PROGS := run.sh - -diff --git a/tools/testing/selftests/futex/functional/futex_wait_multiple.c b/tools/testing/selftests/futex/functional/futex_wait_multiple.c -new file mode 100644 -index 0000000000000000000000000000000000000000..b48422e79f42edba1653bb0bd2a4c4fd98d2d48d ---- /dev/null -+++ b/tools/testing/selftests/futex/functional/futex_wait_multiple.c -@@ -0,0 +1,173 @@ -+// SPDX-License-Identifier: GPL-2.0-or-later -+/****************************************************************************** -+ * -+ * Copyright © Collabora, Ltd., 2019 -+ * -+ * DESCRIPTION -+ * Test basic semantics of FUTEX_WAIT_MULTIPLE -+ * -+ * AUTHOR -+ * Gabriel Krisman Bertazi -+ * -+ * HISTORY -+ * 2019-Dec-13: Initial version by Krisman -+ * -+ *****************************************************************************/ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "futextest.h" -+#include "logging.h" -+ -+#define TEST_NAME "futex-wait-multiple" -+#define timeout_ns 100000 -+#define MAX_COUNT 128 -+#define WAKE_WAIT_US 3000000 -+ -+int ret = RET_PASS; -+char *progname; -+futex_t f[MAX_COUNT] = {0}; -+struct futex_wait_block fwb[MAX_COUNT]; -+ -+void usage(char *prog) -+{ -+ printf("Usage: %s\n", prog); -+ printf(" -c Use color\n"); -+ printf(" -h Display this help message\n"); -+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", -+ VQUIET, VCRITICAL, VINFO); -+} -+ -+void test_count_overflow(void) -+{ -+ futex_t f = FUTEX_INITIALIZER; -+ struct futex_wait_block fwb[MAX_COUNT+1]; -+ int res, i; -+ -+ ksft_print_msg("%s: Test a too big number of futexes\n", progname); -+ -+ for (i = 0; i < MAX_COUNT+1; i++) { -+ fwb[i].uaddr = &f; -+ fwb[i].val = f; -+ fwb[i].bitset = 0; -+ } -+ -+ res = futex_wait_multiple(fwb, MAX_COUNT+1, NULL, FUTEX_PRIVATE_FLAG); -+ -+#ifdef __ILP32__ -+ if (res != -1 || errno != ENOSYS) { -+ ksft_test_result_fail("futex_wait_multiple returned %d\n", -+ res < 0 ? errno : res); -+ ret = RET_FAIL; -+ } else { -+ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); -+ } -+#else -+ if (res != -1 || errno != EINVAL) { -+ ksft_test_result_fail("futex_wait_multiple returned %d\n", -+ res < 0 ? errno : res); -+ ret = RET_FAIL; -+ } else { -+ ksft_test_result_pass("futex_wait_multiple count overflow succeed\n"); -+ } -+ -+#endif /* __ILP32__ */ -+} -+ -+void *waiterfn(void *arg) -+{ -+ int res; -+ -+ res = futex_wait_multiple(fwb, MAX_COUNT, NULL, FUTEX_PRIVATE_FLAG); -+ -+#ifdef __ILP32__ -+ if (res != -1 || errno != ENOSYS) { -+ ksft_test_result_fail("futex_wait_multiple returned %d\n", -+ res < 0 ? errno : res); -+ ret = RET_FAIL; -+ } else { -+ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); -+ } -+#else -+ if (res < 0) -+ ksft_print_msg("waiter failed %d\n", res); -+ -+ info("futex_wait_multiple: Got hint futex %d was freed\n", res); -+#endif /* __ILP32__ */ -+ -+ return NULL; -+} -+ -+void test_fwb_wakeup(void) -+{ -+ int res, i; -+ pthread_t waiter; -+ -+ ksft_print_msg("%s: Test wake up in a list of futex\n", progname); -+ -+ for (i = 0; i < MAX_COUNT; i++) { -+ fwb[i].uaddr = &f[i]; -+ fwb[i].val = f[i]; -+ fwb[i].bitset = 0xffffffff; -+ } -+ -+ res = pthread_create(&waiter, NULL, waiterfn, NULL); -+ if (res) { -+ ksft_test_result_fail("Creating waiting thread failed"); -+ ksft_exit_fail(); -+ } -+ -+ usleep(WAKE_WAIT_US); -+ res = futex_wake(&(f[MAX_COUNT-1]), 1, FUTEX_PRIVATE_FLAG); -+ if (res != 1) { -+ ksft_test_result_fail("Failed to wake thread res=%d\n", res); -+ ksft_exit_fail(); -+ } -+ -+ pthread_join(waiter, NULL); -+ ksft_test_result_pass("%s succeed\n", __func__); -+} -+ -+int main(int argc, char *argv[]) -+{ -+ int c; -+ -+ while ((c = getopt(argc, argv, "cht:v:")) != -1) { -+ switch (c) { -+ case 'c': -+ log_color(1); -+ break; -+ case 'h': -+ usage(basename(argv[0])); -+ exit(0); -+ case 'v': -+ log_verbosity(atoi(optarg)); -+ break; -+ default: -+ usage(basename(argv[0])); -+ exit(1); -+ } -+ } -+ -+ progname = basename(argv[0]); -+ -+ ksft_print_header(); -+ ksft_set_plan(2); -+ -+ test_count_overflow(); -+ -+#ifdef __ILP32__ -+ // if it's a 32x binary, there's no futex to wakeup -+ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); -+#else -+ test_fwb_wakeup(); -+#endif /* __ILP32__ */ -+ -+ ksft_print_cnts(); -+ return ret; -+} -diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh -index 1acb6ace1680e8f3d6b3ee2dc528c19ddfdb018e..a8be94f28ff78b4879d2d19bca5d9b0fcb26c1f8 100755 ---- a/tools/testing/selftests/futex/functional/run.sh -+++ b/tools/testing/selftests/futex/functional/run.sh -@@ -73,3 +73,6 @@ echo - echo - ./futex_wait_uninitialized_heap $COLOR - ./futex_wait_private_mapped_file $COLOR -+ -+echo -+./futex_wait_multiple $COLOR -diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h -index 580001e89c6caed57dd8b3cb491d65dce846caff..a3e760886b8e7e74285fdcf2caaaa6f66ad16675 100644 ---- a/include/uapi/linux/futex.h -+++ b/include/uapi/linux/futex.h -@@ -21,7 +21,7 @@ - #define FUTEX_WAKE_BITSET 10 - #define FUTEX_WAIT_REQUEUE_PI 11 - #define FUTEX_CMP_REQUEUE_PI 12 --#define FUTEX_WAIT_MULTIPLE 13 -+#define FUTEX_WAIT_MULTIPLE 31 - - #define FUTEX_PRIVATE_FLAG 128 - #define FUTEX_CLOCK_REALTIME 256 -diff --git a/kernel/futex.c b/kernel/futex.c -index 58cf9eb2b851b4858e29b5ef4114a29a92e676ba..e0bb628a5e1988dcc9ae5442a4259edc229d578d 100644 ---- a/kernel/futex.c -+++ b/kernel/futex.c -@@ -4198,7 +4198,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - return -EINVAL; - - t = timespec64_to_ktime(ts); -- if (cmd == FUTEX_WAIT) -+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) - t = ktime_add_safe(ktime_get(), t); - tp = &t; - } -@@ -4399,6 +4399,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, - */ - struct compat_futex_wait_block { - compat_uptr_t uaddr; -+ __u32 pad; - __u32 val; - __u32 bitset; - }; -@@ -4461,7 +4462,7 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, - return -EINVAL; - - t = timespec64_to_ktime(ts); -- if (cmd == FUTEX_WAIT) -+ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) - t = ktime_add_safe(ktime_get(), t); - tp = &t; - } diff --git a/SOURCES/fsync.patch b/SOURCES/fsync.patch new file mode 100644 index 0000000..01c86d8 --- /dev/null +++ b/SOURCES/fsync.patch @@ -0,0 +1,908 @@ +From f7f49141a5dbe9c99d78196b58c44307fb2e6be3 Mon Sep 17 00:00:00 2001 +From: Tk-Glitch +Date: Mon, 20 Apr 2020 14:09:11 +0200 +Subject: Import Fsync v3 patchset - Squashed from https://gitlab.collabora.com/tonyk/linux/-/commits/futex-proton-v3 + +diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h +index a89eb0accd5e2ee527be1e3e11b1117ff5bf94b4..580001e89c6caed57dd8b3cb491d65dce846caff 100644 +--- a/include/uapi/linux/futex.h ++++ b/include/uapi/linux/futex.h +@@ -21,6 +21,7 @@ + #define FUTEX_WAKE_BITSET 10 + #define FUTEX_WAIT_REQUEUE_PI 11 + #define FUTEX_CMP_REQUEUE_PI 12 ++#define FUTEX_WAIT_MULTIPLE 13 + + #define FUTEX_PRIVATE_FLAG 128 + #define FUTEX_CLOCK_REALTIME 256 +@@ -40,6 +41,8 @@ + FUTEX_PRIVATE_FLAG) + #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) ++#define FUTEX_WAIT_MULTIPLE_PRIVATE (FUTEX_WAIT_MULTIPLE | \ ++ FUTEX_PRIVATE_FLAG) + + /* + * Support for robust futexes: the kernel cleans up held futexes at +@@ -150,4 +153,21 @@ struct robust_list_head { + (((op & 0xf) << 28) | ((cmp & 0xf) << 24) \ + | ((oparg & 0xfff) << 12) | (cmparg & 0xfff)) + ++/* ++ * Maximum number of multiple futexes to wait for ++ */ ++#define FUTEX_MULTIPLE_MAX_COUNT 128 ++ ++/** ++ * struct futex_wait_block - Block of futexes to be waited for ++ * @uaddr: User address of the futex ++ * @val: Futex value expected by userspace ++ * @bitset: Bitset for the optional bitmasked wakeup ++ */ ++struct futex_wait_block { ++ __u32 __user *uaddr; ++ __u32 val; ++ __u32 bitset; ++}; ++ + #endif /* _UAPI_LINUX_FUTEX_H */ +diff --git a/kernel/futex.c b/kernel/futex.c +index 0cf84c8664f207c574325b899ef2e57f01295a94..58cf9eb2b851b4858e29b5ef4114a29a92e676ba 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -215,6 +215,8 @@ struct futex_pi_state { + * @rt_waiter: rt_waiter storage for use with requeue_pi + * @requeue_pi_key: the requeue_pi target futex key + * @bitset: bitset for the optional bitmasked wakeup ++ * @uaddr: userspace address of futex ++ * @uval: expected futex's value + * + * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so + * we can wake only the relevant ones (hashed queues may be shared). +@@ -237,6 +239,8 @@ struct futex_q { + struct rt_mutex_waiter *rt_waiter; + union futex_key *requeue_pi_key; + u32 bitset; ++ u32 __user *uaddr; ++ u32 uval; + } __randomize_layout; + + static const struct futex_q futex_q_init = { +@@ -2420,6 +2424,29 @@ static int unqueue_me(struct futex_q *q) + return ret; + } + ++/** ++ * unqueue_multiple() - Remove several futexes from their futex_hash_bucket ++ * @q: The list of futexes to unqueue ++ * @count: Number of futexes in the list ++ * ++ * Helper to unqueue a list of futexes. This can't fail. ++ * ++ * Return: ++ * - >=0 - Index of the last futex that was awoken; ++ * - -1 - If no futex was awoken ++ */ ++static int unqueue_multiple(struct futex_q *q, int count) ++{ ++ int ret = -1; ++ int i; ++ ++ for (i = 0; i < count; i++) { ++ if (!unqueue_me(&q[i])) ++ ret = i; ++ } ++ return ret; ++} ++ + /* + * PI futexes can not be requeued and must remove themself from the + * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry +@@ -2783,6 +2810,211 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, + return ret; + } + ++/** ++ * futex_wait_multiple_setup() - Prepare to wait and enqueue multiple futexes ++ * @qs: The corresponding futex list ++ * @count: The size of the lists ++ * @flags: Futex flags (FLAGS_SHARED, etc.) ++ * @awaken: Index of the last awoken futex ++ * ++ * Prepare multiple futexes in a single step and enqueue them. This may fail if ++ * the futex list is invalid or if any futex was already awoken. On success the ++ * task is ready to interruptible sleep. ++ * ++ * Return: ++ * - 1 - One of the futexes was awaken by another thread ++ * - 0 - Success ++ * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL ++ */ ++static int futex_wait_multiple_setup(struct futex_q *qs, int count, ++ unsigned int flags, int *awaken) ++{ ++ struct futex_hash_bucket *hb; ++ int ret, i; ++ u32 uval; ++ ++ /* ++ * Enqueuing multiple futexes is tricky, because we need to ++ * enqueue each futex in the list before dealing with the next ++ * one to avoid deadlocking on the hash bucket. But, before ++ * enqueuing, we need to make sure that current->state is ++ * TASK_INTERRUPTIBLE, so we don't absorb any awake events, which ++ * cannot be done before the get_futex_key of the next key, ++ * because it calls get_user_pages, which can sleep. Thus, we ++ * fetch the list of futexes keys in two steps, by first pinning ++ * all the memory keys in the futex key, and only then we read ++ * each key and queue the corresponding futex. ++ */ ++retry: ++ for (i = 0; i < count; i++) { ++ qs[i].key = FUTEX_KEY_INIT; ++ ret = get_futex_key(qs[i].uaddr, flags & FLAGS_SHARED, ++ &qs[i].key, FUTEX_READ); ++ if (unlikely(ret)) { ++ for (--i; i >= 0; i--) ++ put_futex_key(&qs[i].key); ++ return ret; ++ } ++ } ++ ++ set_current_state(TASK_INTERRUPTIBLE); ++ ++ for (i = 0; i < count; i++) { ++ struct futex_q *q = &qs[i]; ++ ++ hb = queue_lock(q); ++ ++ ret = get_futex_value_locked(&uval, q->uaddr); ++ if (ret) { ++ /* ++ * We need to try to handle the fault, which ++ * cannot be done without sleep, so we need to ++ * undo all the work already done, to make sure ++ * we don't miss any wake ups. Therefore, clean ++ * up, handle the fault and retry from the ++ * beginning. ++ */ ++ queue_unlock(hb); ++ ++ /* ++ * Keys 0..(i-1) are implicitly put ++ * on unqueue_multiple. ++ */ ++ put_futex_key(&q->key); ++ ++ *awaken = unqueue_multiple(qs, i); ++ ++ __set_current_state(TASK_RUNNING); ++ ++ /* ++ * On a real fault, prioritize the error even if ++ * some other futex was awoken. Userspace gave ++ * us a bad address, -EFAULT them. ++ */ ++ ret = get_user(uval, q->uaddr); ++ if (ret) ++ return ret; ++ ++ /* ++ * Even if the page fault was handled, If ++ * something was already awaken, we can safely ++ * give up and succeed to give a hint for userspace to ++ * acquire the right futex faster. ++ */ ++ if (*awaken >= 0) ++ return 1; ++ ++ goto retry; ++ } ++ ++ if (uval != q->uval) { ++ queue_unlock(hb); ++ ++ put_futex_key(&qs[i].key); ++ ++ /* ++ * If something was already awaken, we can ++ * safely ignore the error and succeed. ++ */ ++ *awaken = unqueue_multiple(qs, i); ++ __set_current_state(TASK_RUNNING); ++ if (*awaken >= 0) ++ return 1; ++ ++ return -EWOULDBLOCK; ++ } ++ ++ /* ++ * The bucket lock can't be held while dealing with the ++ * next futex. Queue each futex at this moment so hb can ++ * be unlocked. ++ */ ++ queue_me(&qs[i], hb); ++ } ++ return 0; ++} ++ ++/** ++ * futex_wait_multiple() - Prepare to wait on and enqueue several futexes ++ * @qs: The list of futexes to wait on ++ * @op: Operation code from futex's syscall ++ * @count: The number of objects ++ * @abs_time: Timeout before giving up and returning to userspace ++ * ++ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function ++ * sleeps on a group of futexes and returns on the first futex that ++ * triggered, or after the timeout has elapsed. ++ * ++ * Return: ++ * - >=0 - Hint to the futex that was awoken ++ * - <0 - On error ++ */ ++static int futex_wait_multiple(struct futex_q *qs, int op, ++ u32 count, ktime_t *abs_time) ++{ ++ struct hrtimer_sleeper timeout, *to; ++ int ret, flags = 0, hint = 0; ++ unsigned int i; ++ ++ if (!(op & FUTEX_PRIVATE_FLAG)) ++ flags |= FLAGS_SHARED; ++ ++ if (op & FUTEX_CLOCK_REALTIME) ++ flags |= FLAGS_CLOCKRT; ++ ++ to = futex_setup_timer(abs_time, &timeout, flags, 0); ++ while (1) { ++ ret = futex_wait_multiple_setup(qs, count, flags, &hint); ++ if (ret) { ++ if (ret > 0) { ++ /* A futex was awaken during setup */ ++ ret = hint; ++ } ++ break; ++ } ++ ++ if (to) ++ hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); ++ ++ /* ++ * Avoid sleeping if another thread already tried to ++ * wake us. ++ */ ++ for (i = 0; i < count; i++) { ++ if (plist_node_empty(&qs[i].list)) ++ break; ++ } ++ ++ if (i == count && (!to || to->task)) ++ freezable_schedule(); ++ ++ ret = unqueue_multiple(qs, count); ++ ++ __set_current_state(TASK_RUNNING); ++ ++ if (ret >= 0) ++ break; ++ if (to && !to->task) { ++ ret = -ETIMEDOUT; ++ break; ++ } else if (signal_pending(current)) { ++ ret = -ERESTARTSYS; ++ break; ++ } ++ /* ++ * The final case is a spurious wakeup, for ++ * which just retry. ++ */ ++ } ++ ++ if (to) { ++ hrtimer_cancel(&to->timer); ++ destroy_hrtimer_on_stack(&to->timer); ++ } ++ ++ return ret; ++} ++ + static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, + ktime_t *abs_time, u32 bitset) + { +@@ -3907,6 +4139,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + return -ENOSYS; + } + ++/** ++ * futex_read_wait_block - Read an array of futex_wait_block from userspace ++ * @uaddr: Userspace address of the block ++ * @count: Number of blocks to be read ++ * ++ * This function creates and allocate an array of futex_q (we zero it to ++ * initialize the fields) and then, for each futex_wait_block element from ++ * userspace, fill a futex_q element with proper values. ++ */ ++inline struct futex_q *futex_read_wait_block(u32 __user *uaddr, u32 count) ++{ ++ unsigned int i; ++ struct futex_q *qs; ++ struct futex_wait_block fwb; ++ struct futex_wait_block __user *entry = ++ (struct futex_wait_block __user *)uaddr; ++ ++ if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) ++ return ERR_PTR(-EINVAL); ++ ++ qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); ++ if (!qs) ++ return ERR_PTR(-ENOMEM); ++ ++ for (i = 0; i < count; i++) { ++ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { ++ kfree(qs); ++ return ERR_PTR(-EFAULT); ++ } ++ ++ qs[i].uaddr = fwb.uaddr; ++ qs[i].uval = fwb.val; ++ qs[i].bitset = fwb.bitset; ++ } ++ ++ return qs; ++} + + SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + struct __kernel_timespec __user *, utime, u32 __user *, uaddr2, +@@ -3919,7 +4188,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + + if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || + cmd == FUTEX_WAIT_BITSET || +- cmd == FUTEX_WAIT_REQUEUE_PI)) { ++ cmd == FUTEX_WAIT_REQUEUE_PI || ++ cmd == FUTEX_WAIT_MULTIPLE)) { + if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) + return -EFAULT; + if (get_timespec64(&ts, utime)) +@@ -3940,6 +4210,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) + val2 = (u32) (unsigned long) utime; + ++ if (cmd == FUTEX_WAIT_MULTIPLE) { ++ int ret; ++ struct futex_q *qs; ++ ++#ifdef CONFIG_X86_X32 ++ if (unlikely(in_x32_syscall())) ++ return -ENOSYS; ++#endif ++ qs = futex_read_wait_block(uaddr, val); ++ ++ if (IS_ERR(qs)) ++ return PTR_ERR(qs); ++ ++ ret = futex_wait_multiple(qs, op, val, tp); ++ kfree(qs); ++ ++ return ret; ++ } ++ + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); + } + +@@ -4102,6 +4391,57 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + #endif /* CONFIG_COMPAT */ + + #ifdef CONFIG_COMPAT_32BIT_TIME ++/** ++ * struct compat_futex_wait_block - Block of futexes to be waited for ++ * @uaddr: User address of the futex (compatible pointer) ++ * @val: Futex value expected by userspace ++ * @bitset: Bitset for the optional bitmasked wakeup ++ */ ++struct compat_futex_wait_block { ++ compat_uptr_t uaddr; ++ __u32 val; ++ __u32 bitset; ++}; ++ ++/** ++ * compat_futex_read_wait_block - Read an array of futex_wait_block from ++ * userspace ++ * @uaddr: Userspace address of the block ++ * @count: Number of blocks to be read ++ * ++ * This function does the same as futex_read_wait_block(), except that it ++ * converts the pointer to the futex from the compat version to the regular one. ++ */ ++inline struct futex_q *compat_futex_read_wait_block(u32 __user *uaddr, ++ u32 count) ++{ ++ unsigned int i; ++ struct futex_q *qs; ++ struct compat_futex_wait_block fwb; ++ struct compat_futex_wait_block __user *entry = ++ (struct compat_futex_wait_block __user *)uaddr; ++ ++ if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) ++ return ERR_PTR(-EINVAL); ++ ++ qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); ++ if (!qs) ++ return ERR_PTR(-ENOMEM); ++ ++ for (i = 0; i < count; i++) { ++ if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { ++ kfree(qs); ++ return ERR_PTR(-EFAULT); ++ } ++ ++ qs[i].uaddr = compat_ptr(fwb.uaddr); ++ qs[i].uval = fwb.val; ++ qs[i].bitset = fwb.bitset; ++ } ++ ++ return qs; ++} ++ + SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + struct old_timespec32 __user *, utime, u32 __user *, uaddr2, + u32, val3) +@@ -4113,7 +4453,8 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + + if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || + cmd == FUTEX_WAIT_BITSET || +- cmd == FUTEX_WAIT_REQUEUE_PI)) { ++ cmd == FUTEX_WAIT_REQUEUE_PI || ++ cmd == FUTEX_WAIT_MULTIPLE)) { + if (get_old_timespec32(&ts, utime)) + return -EFAULT; + if (!timespec64_valid(&ts)) +@@ -4128,6 +4469,19 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) + val2 = (int) (unsigned long) utime; + ++ if (cmd == FUTEX_WAIT_MULTIPLE) { ++ int ret; ++ struct futex_q *qs = compat_futex_read_wait_block(uaddr, val); ++ ++ if (IS_ERR(qs)) ++ return PTR_ERR(qs); ++ ++ ret = futex_wait_multiple(qs, op, val, tp); ++ kfree(qs); ++ ++ return ret; ++ } ++ + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); + } + #endif /* CONFIG_COMPAT_32BIT_TIME */ +diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c +index ee55e6d389a3f053194435342c4e471dc7cf8786..2a63e1c2cfb6407a5988233217cff2e52787bc66 100644 +--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c ++++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c +@@ -11,6 +11,7 @@ + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart ++ * 2019-Dec-13: Add WAIT_MULTIPLE test by Krisman + * + *****************************************************************************/ + +@@ -41,6 +42,8 @@ int main(int argc, char *argv[]) + { + futex_t f1 = FUTEX_INITIALIZER; + struct timespec to; ++ time_t secs; ++ struct futex_wait_block fwb = {&f1, f1, 0}; + int res, ret = RET_PASS; + int c; + +@@ -65,7 +68,7 @@ int main(int argc, char *argv[]) + } + + ksft_print_header(); +- ksft_set_plan(1); ++ ksft_set_plan(2); + ksft_print_msg("%s: Block on a futex and wait for timeout\n", + basename(argv[0])); + ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); +@@ -79,8 +82,39 @@ int main(int argc, char *argv[]) + if (!res || errno != ETIMEDOUT) { + fail("futex_wait returned %d\n", ret < 0 ? errno : ret); + ret = RET_FAIL; ++ } else ++ ksft_test_result_pass("futex_wait timeout succeeds\n"); ++ ++ info("Calling futex_wait_multiple on f1: %u @ %p\n", f1, &f1); ++ ++ /* Setup absolute time */ ++ ret = clock_gettime(CLOCK_REALTIME, &to); ++ secs = (to.tv_nsec + timeout_ns) / 1000000000; ++ to.tv_nsec = ((int64_t)to.tv_nsec + timeout_ns) % 1000000000; ++ to.tv_sec += secs; ++ info("to.tv_sec = %ld\n", to.tv_sec); ++ info("to.tv_nsec = %ld\n", to.tv_nsec); ++ ++ res = futex_wait_multiple(&fwb, 1, &to, ++ FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME); ++ ++#ifdef __ILP32__ ++ if (res == -1 && errno == ENOSYS) { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } else { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; + } ++#else ++ if (!res || errno != ETIMEDOUT) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else ++ ksft_test_result_pass("futex_wait_multiple timeout succeeds\n"); ++#endif /* __ILP32__ */ + +- print_result(TEST_NAME, ret); ++ ksft_print_cnts(); + return ret; + } +diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h +index ddbcfc9b7bac4aebb5bac2f249e26ecfd948aa84..bb103bef4557012ef9a389ca74c868e4476a8a31 100644 +--- a/tools/testing/selftests/futex/include/futextest.h ++++ b/tools/testing/selftests/futex/include/futextest.h +@@ -38,6 +38,14 @@ typedef volatile u_int32_t futex_t; + #ifndef FUTEX_CMP_REQUEUE_PI + #define FUTEX_CMP_REQUEUE_PI 12 + #endif ++#ifndef FUTEX_WAIT_MULTIPLE ++#define FUTEX_WAIT_MULTIPLE 13 ++struct futex_wait_block { ++ futex_t *uaddr; ++ futex_t val; ++ __u32 bitset; ++}; ++#endif + #ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE + #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +@@ -80,6 +88,20 @@ futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags) + return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); + } + ++/** ++ * futex_wait_multiple() - block on several futexes with optional timeout ++ * @fwb: wait block user space address ++ * @count: number of entities at fwb ++ * @timeout: absolute timeout ++ */ ++static inline int ++futex_wait_multiple(struct futex_wait_block *fwb, int count, ++ struct timespec *timeout, int opflags) ++{ ++ return futex(fwb, FUTEX_WAIT_MULTIPLE, count, timeout, NULL, 0, ++ opflags); ++} ++ + /** + * futex_wake() - wake one or more tasks blocked on uaddr + * @nr_wake: wake up to this many tasks +diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +index 0ae390ff816449c88d0bb655a26eb014382c2b4f..bcbac042992d447e0bc9ef5fefe94e875de310f2 100644 +--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c ++++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +@@ -12,6 +12,7 @@ + * + * HISTORY + * 2009-Nov-14: Initial version by Gowrishankar ++ * 2019-Dec-13: Add WAIT_MULTIPLE test by Krisman + * + *****************************************************************************/ + +@@ -40,6 +41,7 @@ int main(int argc, char *argv[]) + { + struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + futex_t f1 = FUTEX_INITIALIZER; ++ struct futex_wait_block fwb = {&f1, f1+1, 0}; + int res, ret = RET_PASS; + int c; + +@@ -61,7 +63,7 @@ int main(int argc, char *argv[]) + } + + ksft_print_header(); +- ksft_set_plan(1); ++ ksft_set_plan(2); + ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", + basename(argv[0])); + +@@ -71,8 +73,30 @@ int main(int argc, char *argv[]) + fail("futex_wait returned: %d %s\n", + res ? errno : res, res ? strerror(errno) : ""); + ret = RET_FAIL; ++ } else ++ ksft_test_result_pass("futex_wait wouldblock succeeds\n"); ++ ++ info("Calling futex_wait_multiple on f1: %u @ %p with val=%u\n", ++ f1, &f1, f1+1); ++ res = futex_wait_multiple(&fwb, 1, NULL, FUTEX_PRIVATE_FLAG); ++ ++#ifdef __ILP32__ ++ if (res != -1 || errno != ENOSYS) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } ++#else ++ if (!res || errno != EWOULDBLOCK) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; + } ++ ksft_test_result_pass("futex_wait_multiple wouldblock succeeds\n"); ++#endif /* __ILP32__ */ + +- print_result(TEST_NAME, ret); ++ ksft_print_cnts(); + return ret; + } +diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore +index a09f570619023750f558c84004aff166b4337d72..4660128a545edb04a17cc6bd9760931c1386122f 100644 +--- a/tools/testing/selftests/futex/functional/.gitignore ++++ b/tools/testing/selftests/futex/functional/.gitignore +@@ -5,3 +5,4 @@ futex_wait_private_mapped_file + futex_wait_timeout + futex_wait_uninitialized_heap + futex_wait_wouldblock ++futex_wait_multiple +diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile +index 30996306cabcfe89a47977643e529b122893bb7e..75f9fface11fa3c90c1bdb9a49b3ea51291afd58 100644 +--- a/tools/testing/selftests/futex/functional/Makefile ++++ b/tools/testing/selftests/futex/functional/Makefile +@@ -14,7 +14,8 @@ TEST_GEN_FILES := \ + futex_requeue_pi_signal_restart \ + futex_requeue_pi_mismatched_ops \ + futex_wait_uninitialized_heap \ +- futex_wait_private_mapped_file ++ futex_wait_private_mapped_file \ ++ futex_wait_multiple + + TEST_PROGS := run.sh + +diff --git a/tools/testing/selftests/futex/functional/futex_wait_multiple.c b/tools/testing/selftests/futex/functional/futex_wait_multiple.c +new file mode 100644 +index 0000000000000000000000000000000000000000..b48422e79f42edba1653bb0bd2a4c4fd98d2d48d +--- /dev/null ++++ b/tools/testing/selftests/futex/functional/futex_wait_multiple.c +@@ -0,0 +1,173 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/****************************************************************************** ++ * ++ * Copyright © Collabora, Ltd., 2019 ++ * ++ * DESCRIPTION ++ * Test basic semantics of FUTEX_WAIT_MULTIPLE ++ * ++ * AUTHOR ++ * Gabriel Krisman Bertazi ++ * ++ * HISTORY ++ * 2019-Dec-13: Initial version by Krisman ++ * ++ *****************************************************************************/ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "futextest.h" ++#include "logging.h" ++ ++#define TEST_NAME "futex-wait-multiple" ++#define timeout_ns 100000 ++#define MAX_COUNT 128 ++#define WAKE_WAIT_US 3000000 ++ ++int ret = RET_PASS; ++char *progname; ++futex_t f[MAX_COUNT] = {0}; ++struct futex_wait_block fwb[MAX_COUNT]; ++ ++void usage(char *prog) ++{ ++ printf("Usage: %s\n", prog); ++ printf(" -c Use color\n"); ++ printf(" -h Display this help message\n"); ++ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", ++ VQUIET, VCRITICAL, VINFO); ++} ++ ++void test_count_overflow(void) ++{ ++ futex_t f = FUTEX_INITIALIZER; ++ struct futex_wait_block fwb[MAX_COUNT+1]; ++ int res, i; ++ ++ ksft_print_msg("%s: Test a too big number of futexes\n", progname); ++ ++ for (i = 0; i < MAX_COUNT+1; i++) { ++ fwb[i].uaddr = &f; ++ fwb[i].val = f; ++ fwb[i].bitset = 0; ++ } ++ ++ res = futex_wait_multiple(fwb, MAX_COUNT+1, NULL, FUTEX_PRIVATE_FLAG); ++ ++#ifdef __ILP32__ ++ if (res != -1 || errno != ENOSYS) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } ++#else ++ if (res != -1 || errno != EINVAL) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_pass("futex_wait_multiple count overflow succeed\n"); ++ } ++ ++#endif /* __ILP32__ */ ++} ++ ++void *waiterfn(void *arg) ++{ ++ int res; ++ ++ res = futex_wait_multiple(fwb, MAX_COUNT, NULL, FUTEX_PRIVATE_FLAG); ++ ++#ifdef __ILP32__ ++ if (res != -1 || errno != ENOSYS) { ++ ksft_test_result_fail("futex_wait_multiple returned %d\n", ++ res < 0 ? errno : res); ++ ret = RET_FAIL; ++ } else { ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++ } ++#else ++ if (res < 0) ++ ksft_print_msg("waiter failed %d\n", res); ++ ++ info("futex_wait_multiple: Got hint futex %d was freed\n", res); ++#endif /* __ILP32__ */ ++ ++ return NULL; ++} ++ ++void test_fwb_wakeup(void) ++{ ++ int res, i; ++ pthread_t waiter; ++ ++ ksft_print_msg("%s: Test wake up in a list of futex\n", progname); ++ ++ for (i = 0; i < MAX_COUNT; i++) { ++ fwb[i].uaddr = &f[i]; ++ fwb[i].val = f[i]; ++ fwb[i].bitset = 0xffffffff; ++ } ++ ++ res = pthread_create(&waiter, NULL, waiterfn, NULL); ++ if (res) { ++ ksft_test_result_fail("Creating waiting thread failed"); ++ ksft_exit_fail(); ++ } ++ ++ usleep(WAKE_WAIT_US); ++ res = futex_wake(&(f[MAX_COUNT-1]), 1, FUTEX_PRIVATE_FLAG); ++ if (res != 1) { ++ ksft_test_result_fail("Failed to wake thread res=%d\n", res); ++ ksft_exit_fail(); ++ } ++ ++ pthread_join(waiter, NULL); ++ ksft_test_result_pass("%s succeed\n", __func__); ++} ++ ++int main(int argc, char *argv[]) ++{ ++ int c; ++ ++ while ((c = getopt(argc, argv, "cht:v:")) != -1) { ++ switch (c) { ++ case 'c': ++ log_color(1); ++ break; ++ case 'h': ++ usage(basename(argv[0])); ++ exit(0); ++ case 'v': ++ log_verbosity(atoi(optarg)); ++ break; ++ default: ++ usage(basename(argv[0])); ++ exit(1); ++ } ++ } ++ ++ progname = basename(argv[0]); ++ ++ ksft_print_header(); ++ ksft_set_plan(2); ++ ++ test_count_overflow(); ++ ++#ifdef __ILP32__ ++ // if it's a 32x binary, there's no futex to wakeup ++ ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); ++#else ++ test_fwb_wakeup(); ++#endif /* __ILP32__ */ ++ ++ ksft_print_cnts(); ++ return ret; ++} +diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh +index 1acb6ace1680e8f3d6b3ee2dc528c19ddfdb018e..a8be94f28ff78b4879d2d19bca5d9b0fcb26c1f8 100755 +--- a/tools/testing/selftests/futex/functional/run.sh ++++ b/tools/testing/selftests/futex/functional/run.sh +@@ -73,3 +73,6 @@ echo + echo + ./futex_wait_uninitialized_heap $COLOR + ./futex_wait_private_mapped_file $COLOR ++ ++echo ++./futex_wait_multiple $COLOR +diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h +index 580001e89c6caed57dd8b3cb491d65dce846caff..a3e760886b8e7e74285fdcf2caaaa6f66ad16675 100644 +--- a/include/uapi/linux/futex.h ++++ b/include/uapi/linux/futex.h +@@ -21,7 +21,7 @@ + #define FUTEX_WAKE_BITSET 10 + #define FUTEX_WAIT_REQUEUE_PI 11 + #define FUTEX_CMP_REQUEUE_PI 12 +-#define FUTEX_WAIT_MULTIPLE 13 ++#define FUTEX_WAIT_MULTIPLE 31 + + #define FUTEX_PRIVATE_FLAG 128 + #define FUTEX_CLOCK_REALTIME 256 +diff --git a/kernel/futex.c b/kernel/futex.c +index 58cf9eb2b851b4858e29b5ef4114a29a92e676ba..e0bb628a5e1988dcc9ae5442a4259edc229d578d 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -4198,7 +4198,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + return -EINVAL; + + t = timespec64_to_ktime(ts); +- if (cmd == FUTEX_WAIT) ++ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) + t = ktime_add_safe(ktime_get(), t); + tp = &t; + } +@@ -4399,6 +4399,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + */ + struct compat_futex_wait_block { + compat_uptr_t uaddr; ++ __u32 pad; + __u32 val; + __u32 bitset; + }; +@@ -4461,7 +4462,7 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, + return -EINVAL; + + t = timespec64_to_ktime(ts); +- if (cmd == FUTEX_WAIT) ++ if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) + t = ktime_add_safe(ktime_get(), t); + tp = &t; + } diff --git a/SOURCES/zen.patch b/SOURCES/zen.patch new file mode 100644 index 0000000..89c1934 --- /dev/null +++ b/SOURCES/zen.patch @@ -0,0 +1,308 @@ +From f85ed068b4d0e6c31edce8574a95757a60e58b87 Mon Sep 17 00:00:00 2001 +From: Etienne Juvigny +Date: Mon, 3 Sep 2018 17:36:25 +0200 +Subject: [PATCH 07/17] Zenify & stuff + +--- + init/Kconfig | 32 ++++++++++++++++++++++++++++++++ + kernel/sched/fair.c | 25 +++++++++++++++++++++++++ + mm/page-writeback.c | 8 ++++++++ + 3 files changed, 65 insertions(+) + +diff --git a/init/Kconfig b/init/Kconfig +index 3ae8678e1145..da708eed0f1e 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -92,6 +92,38 @@ config THREAD_INFO_IN_TASK + + menu "General setup" + ++config ZENIFY ++ bool "A selection of patches from Zen/Liquorix kernel and additional tweaks for a better gaming experience" ++ default y ++ help ++ Tunes the kernel for responsiveness at the cost of throughput and power usage. ++ ++ --- Virtual Memory Subsystem --------------------------- ++ ++ Mem dirty before bg writeback..: 10 % -> 20 % ++ Mem dirty before sync writeback: 20 % -> 50 % ++ ++ --- Block Layer ---------------------------------------- ++ ++ Queue depth...............: 128 -> 512 ++ Default MQ scheduler......: mq-deadline -> bfq ++ ++ --- CFS CPU Scheduler ---------------------------------- ++ ++ Scheduling latency.............: 6 -> 3 ms ++ Minimal granularity............: 0.75 -> 0.3 ms ++ Wakeup granularity.............: 1 -> 0.5 ms ++ CPU migration cost.............: 0.5 -> 0.25 ms ++ Bandwidth slice size...........: 5 -> 3 ms ++ Ondemand fine upscaling limit..: 95 % -> 85 % ++ ++ --- MuQSS CPU Scheduler -------------------------------- ++ ++ Scheduling interval............: 6 -> 3 ms ++ ISO task max realtime use......: 70 % -> 25 % ++ Ondemand coarse upscaling limit: 80 % -> 45 % ++ Ondemand fine upscaling limit..: 95 % -> 45 % ++ + config BROKEN + bool + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 6b3b59cc51d6..2a0072192c3d 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -37,8 +37,13 @@ + * + * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) + */ ++#ifdef CONFIG_ZENIFY ++unsigned int sysctl_sched_latency = 3000000ULL; ++static unsigned int normalized_sysctl_sched_latency = 3000000ULL; ++#else + unsigned int sysctl_sched_latency = 6000000ULL; + static unsigned int normalized_sysctl_sched_latency = 6000000ULL; ++#endif + + /* + * The initial- and re-scaling of tunables is configurable +@@ -58,13 +63,22 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L + * + * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) + */ ++#ifdef CONFIG_ZENIFY ++unsigned int sysctl_sched_min_granularity = 300000ULL; ++static unsigned int normalized_sysctl_sched_min_granularity = 300000ULL; ++#else + unsigned int sysctl_sched_min_granularity = 750000ULL; + static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; ++#endif + + /* + * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity + */ ++#ifdef CONFIG_ZENIFY ++static unsigned int sched_nr_latency = 10; ++#else + static unsigned int sched_nr_latency = 8; ++#endif + + /* + * After fork, child runs first. If set to 0 (default) then +@@ -81,10 +95,17 @@ unsigned int sysctl_sched_child_runs_first __read_mostly; + * + * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) + */ ++#ifdef CONFIG_ZENIFY ++unsigned int sysctl_sched_wakeup_granularity = 500000UL; ++static unsigned int normalized_sysctl_sched_wakeup_granularity = 500000UL; ++ ++const_debug unsigned int sysctl_sched_migration_cost = 50000UL; ++#else + unsigned int sysctl_sched_wakeup_granularity = 1000000UL; + static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; + + const_debug unsigned int sysctl_sched_migration_cost = 500000UL; ++#endif + + int sched_thermal_decay_shift; + static int __init setup_sched_thermal_decay_shift(char *str) +@@ -128,8 +149,12 @@ int __weak arch_asym_cpu_priority(int cpu) + * + * (default: 5 msec, units: microseconds) + */ ++#ifdef CONFIG_ZENIFY ++unsigned int sysctl_sched_cfs_bandwidth_slice = 3000UL; ++#else + unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; + #endif ++#endif + + static inline void update_load_add(struct load_weight *lw, unsigned long inc) + { +diff --git a/mm/page-writeback.c b/mm/page-writeback.c +index 28b3e7a67565..01a1aef2b9b1 100644 +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -71,7 +71,11 @@ static long ratelimit_pages = 32; + /* + * Start background writeback (via writeback threads) at this percentage + */ ++#ifdef CONFIG_ZENIFY ++int dirty_background_ratio = 20; ++#else + int dirty_background_ratio = 10; ++#endif + + /* + * dirty_background_bytes starts at 0 (disabled) so that it is a function of +@@ -88,7 +92,11 @@ int vm_highmem_is_dirtyable; + /* + * The generator of dirty data starts writeback at this percentage + */ ++#ifdef CONFIG_ZENIFY ++int vm_dirty_ratio = 50; ++#else + int vm_dirty_ratio = 20; ++#endif + + /* + * vm_dirty_bytes starts at 0 (disabled) so that it is a function of +-- +2.28.0 + + +From e92e67143385cf285851e12aa8b7f083dd38dd24 Mon Sep 17 00:00:00 2001 +From: Steven Barrett +Date: Sun, 16 Jan 2011 18:57:32 -0600 +Subject: [PATCH 08/17] ZEN: Allow TCP YeAH as default congestion control + +4.4: In my tests YeAH dramatically slowed down transfers over a WLAN, + reducing throughput from ~65Mbps (CUBIC) to ~7MBps (YeAH) over 10 + seconds (netperf TCP_STREAM) including long stalls. + + Be careful when choosing this. ~heftig +--- + net/ipv4/Kconfig | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig +index e64e59b536d3..bfb55ef7ebbe 100644 +--- a/net/ipv4/Kconfig ++++ b/net/ipv4/Kconfig +@@ -691,6 +691,9 @@ choice + config DEFAULT_VEGAS + bool "Vegas" if TCP_CONG_VEGAS=y + ++ config DEFAULT_YEAH ++ bool "YeAH" if TCP_CONG_YEAH=y ++ + config DEFAULT_VENO + bool "Veno" if TCP_CONG_VENO=y + +@@ -724,6 +727,7 @@ config DEFAULT_TCP_CONG + default "htcp" if DEFAULT_HTCP + default "hybla" if DEFAULT_HYBLA + default "vegas" if DEFAULT_VEGAS ++ default "yeah" if DEFAULT_YEAH + default "westwood" if DEFAULT_WESTWOOD + default "veno" if DEFAULT_VENO + default "reno" if DEFAULT_RENO +-- +2.28.0 + + +From 76dbe7477bfde1b5e8bf29a71b5af7ab2be9b98e Mon Sep 17 00:00:00 2001 +From: Steven Barrett +Date: Wed, 28 Nov 2018 19:01:27 -0600 +Subject: [PATCH 09/17] zen: Use [defer+madvise] as default khugepaged defrag + strategy + +For some reason, the default strategy to respond to THP fault fallbacks +is still just madvise, meaning stall if the program wants transparent +hugepages, but don't trigger a background reclaim / compaction if THP +begins to fail allocations. This creates a snowball affect where we +still use the THP code paths, but we almost always fail once a system +has been active and busy for a while. + +The option "defer" was created for interactive systems where THP can +still improve performance. If we have to fallback to a regular page due +to an allocation failure or anything else, we will trigger a background +reclaim and compaction so future THP attempts succeed and previous +attempts eventually have their smaller pages combined without stalling +running applications. + +We still want madvise to stall applications that explicitely want THP, +so defer+madvise _does_ make a ton of sense. Make it the default for +interactive systems, especially if the kernel maintainer left +transparent hugepages on "always". + +Reasoning and details in the original patch: https://lwn.net/Articles/711248/ +--- + mm/huge_memory.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 74300e337c3c..9277f22c10a7 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -53,7 +53,11 @@ unsigned long transparent_hugepage_flags __read_mostly = + #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE + (1< +Date: Wed, 15 Jan 2020 20:43:56 -0600 +Subject: [PATCH 17/17] ZEN: intel-pstate: Implement "enable" parameter + +If intel-pstate is compiled into the kernel, it will preempt the loading +of acpi-cpufreq so you can take advantage of hardware p-states without +any friction. + +However, intel-pstate is not completely superior to cpufreq's ondemand +for one reason. There's no concept of an up_threshold property. + +In ondemand, up_threshold essentially reduces the maximum utilization to +compare against, allowing you to hit max frequencies and turbo boost +from a much lower core utilization. + +With intel-pstate, you have the concept of minimum and maximum +performance, but no tunable that lets you define, maximum frequency +means 50% core utilization. For just this oversight, there's reasons +you may want ondemand. + +Lets support setting "enable" in kernel boot parameters. This lets +kernel maintainers include "intel_pstate=disable" statically in the +static boot parameters, but let users of the kernel override this +selection. +--- + Documentation/admin-guide/kernel-parameters.txt | 3 +++ + drivers/cpufreq/intel_pstate.c | 2 ++ + 2 files changed, 5 insertions(+) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index fb95fad81c79..3e92fee81e33 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -1857,6 +1857,9 @@ + disable + Do not enable intel_pstate as the default + scaling driver for the supported processors ++ enable ++ Enable intel_pstate in-case "disable" was passed ++ previously in the kernel boot parameters + passive + Use intel_pstate as a scaling driver, but configure it + to work with generic cpufreq governors (instead of +diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c +index 36a469150ff9..aee891c9b78a 100644 +--- a/drivers/cpufreq/intel_pstate.c ++++ b/drivers/cpufreq/intel_pstate.c +@@ -2845,6 +2845,8 @@ static int __init intel_pstate_setup(char *str) + pr_info("HWP disabled\n"); + no_hwp = 1; + } ++ if (!strcmp(str, "enable")) ++ no_load = 0; + if (!strcmp(str, "force")) + force_load = 1; + if (!strcmp(str, "hwp_only")) +-- +2.28.0 + -- cgit v1.2.3