From 8f8f3f20f5ab8e547cc9b47c3678eb6cdb414ed9 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:06 +0200 Subject: [PATCH 01/13] futex: Clarify FUTEX2 flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.7-rc1 category: feature sys_futex_waitv() is part of the futex2 series (the first and only so far) of syscalls and has a flags field per futex (as opposed to flags being encoded in the futex op). This new flags field has a new namespace, which unfortunately isn't super explicit. Notably it currently takes FUTEX_32 and FUTEX_PRIVATE_FLAG. Introduce the FUTEX2 namespace to clarify this Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: André Almeida Link: https://lore.kernel.org/r/20230921105247.507327749@noisy.programming.kicks-ass.net (cherry picked from commit 4923954bbc4a760e0b2210e0cb5733726ac2e2e9) Signed-off-by: Wentao Guan --- include/uapi/linux/futex.h | 16 +++++++++++++--- kernel/futex/syscalls.c | 7 +++---- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 71a5df8d26898..21d4eff411626 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -44,10 +44,20 @@ FUTEX_PRIVATE_FLAG) /* - * Flags to specify the bit length of the futex word for futex2 syscalls. - * Currently, only 32 is supported. + * Flags for futex2 syscalls. */ -#define FUTEX_32 2 + /* 0x00 */ + /* 0x01 */ +#define FUTEX2_SIZE_U32 0x02 + /* 0x04 */ + /* 0x08 */ + /* 0x10 */ + /* 0x20 */ + /* 0x40 */ +#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG + +/* do not use */ +#define FUTEX_32 FUTEX2_SIZE_U32 /* historical accident :-( */ /* * Max numbers of elements in a futex_waitv array diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 48feaa545b3c7..5be8df77c9605 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -211,8 +211,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } -/* Mask of available flags for each futex in futex_waitv list */ -#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG) +#define FUTEX2_VALID_MASK (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE) /** * futex_parse_waitv - Parse a waitv array from userspace @@ -233,10 +232,10 @@ static int futex_parse_waitv(struct futex_vector *futexv, if (copy_from_user(&aux, &uwaitv[i], sizeof(aux))) return -EFAULT; - if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved) + if ((aux.flags & ~FUTEX2_VALID_MASK) || aux.__reserved) return -EINVAL; - if (!(aux.flags & FUTEX_32)) + if (!(aux.flags & FUTEX2_SIZE_U32)) return -EINVAL; futexv[i].w.flags = aux.flags; From fa3f33bf9e5d8e5057b652a43b063b695c7ea687 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:07 +0200 Subject: [PATCH 02/13] futex: Extend the FUTEX2 flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.7-rc1 category: feature Add the definition for the missing but always intended extra sizes, and add a NUMA flag for the planned numa extention. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: André Almeida Link: https://lore.kernel.org/r/20230921105247.617057368@noisy.programming.kicks-ass.net (cherry picked from commit d6d08d24790e82c69a46ef78ae44fe1b1ed30775) Signed-off-by: Wentao Guan --- include/uapi/linux/futex.h | 21 ++++++++++++++++++--- kernel/futex/syscalls.c | 9 +++++++-- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 21d4eff411626..d2ee625ea1890 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -45,17 +45,32 @@ /* * Flags for futex2 syscalls. + * + * NOTE: these are not pure flags, they can also be seen as: + * + * union { + * u32 flags; + * struct { + * u32 size : 2, + * numa : 1, + * : 4, + * private : 1; + * }; + * }; */ - /* 0x00 */ - /* 0x01 */ +#define FUTEX2_SIZE_U8 0x00 +#define FUTEX2_SIZE_U16 0x01 #define FUTEX2_SIZE_U32 0x02 - /* 0x04 */ +#define FUTEX2_SIZE_U64 0x03 +#define FUTEX2_NUMA 0x04 /* 0x08 */ /* 0x10 */ /* 0x20 */ /* 0x40 */ #define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG +#define FUTEX2_SIZE_MASK 0x03 + /* do not use */ #define FUTEX_32 FUTEX2_SIZE_U32 /* historical accident :-( */ diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 5be8df77c9605..554b61d504f7a 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -211,7 +211,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } -#define FUTEX2_VALID_MASK (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE) +#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE) /** * futex_parse_waitv - Parse a waitv array from userspace @@ -235,7 +235,12 @@ static int futex_parse_waitv(struct futex_vector *futexv, if ((aux.flags & ~FUTEX2_VALID_MASK) || aux.__reserved) return -EINVAL; - if (!(aux.flags & FUTEX2_SIZE_U32)) + if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) { + if ((aux.flags & FUTEX2_SIZE_MASK) == FUTEX2_SIZE_U64) + return -EINVAL; + } + + if ((aux.flags & FUTEX2_SIZE_MASK) != FUTEX2_SIZE_U32) return -EINVAL; futexv[i].w.flags = aux.flags; From 7736b63f7322ffd09cc00c98716997b197378101 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:08 +0200 Subject: [PATCH 03/13] futex: Flag conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.7-rc1 category: feature Futex has 3 sets of flags: - legacy futex op bits - futex2 flags - internal flags Add a few helpers to convert from the API flags into the internal flags. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: André Almeida Link: https://lore.kernel.org/r/20230921105247.722140574@noisy.programming.kicks-ass.net (cherry picked from commit 5694289ce183bc3336407a78c8c722a0b9208f9b) Signed-off-by: Wentao Guan --- kernel/futex/futex.h | 63 +++++++++++++++++++++++++++++++++++++++-- kernel/futex/syscalls.c | 24 ++++++---------- kernel/futex/waitwake.c | 4 +-- 3 files changed, 71 insertions(+), 20 deletions(-) diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index b5379c0e6d6d1..68fc052dc09be 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -5,6 +5,7 @@ #include #include #include +#include #ifdef CONFIG_PREEMPT_RT #include @@ -16,8 +17,15 @@ * Futex flags used to encode options to functions and preserve them across * restarts. */ +#define FLAGS_SIZE_8 0x00 +#define FLAGS_SIZE_16 0x01 +#define FLAGS_SIZE_32 0x02 +#define FLAGS_SIZE_64 0x03 + +#define FLAGS_SIZE_MASK 0x03 + #ifdef CONFIG_MMU -# define FLAGS_SHARED 0x01 +# define FLAGS_SHARED 0x10 #else /* * NOMMU does not have per process address space. Let the compiler optimize @@ -25,8 +33,57 @@ */ # define FLAGS_SHARED 0x00 #endif -#define FLAGS_CLOCKRT 0x02 -#define FLAGS_HAS_TIMEOUT 0x04 +#define FLAGS_CLOCKRT 0x20 +#define FLAGS_HAS_TIMEOUT 0x40 +#define FLAGS_NUMA 0x80 + +/* FUTEX_ to FLAGS_ */ +static inline unsigned int futex_to_flags(unsigned int op) +{ + unsigned int flags = FLAGS_SIZE_32; + + if (!(op & FUTEX_PRIVATE_FLAG)) + flags |= FLAGS_SHARED; + + if (op & FUTEX_CLOCK_REALTIME) + flags |= FLAGS_CLOCKRT; + + return flags; +} + +/* FUTEX2_ to FLAGS_ */ +static inline unsigned int futex2_to_flags(unsigned int flags2) +{ + unsigned int flags = flags2 & FUTEX2_SIZE_MASK; + + if (!(flags2 & FUTEX2_PRIVATE)) + flags |= FLAGS_SHARED; + + if (flags2 & FUTEX2_NUMA) + flags |= FLAGS_NUMA; + + return flags; +} + +static inline unsigned int futex_size(unsigned int flags) +{ + return 1 << (flags & FLAGS_SIZE_MASK); +} + +static inline bool futex_flags_valid(unsigned int flags) +{ + /* Only 64bit futexes for 64bit code */ + if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) { + if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64) + return false; + } + + /* Only 32bit futexes are implemented -- for now */ + if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32) + return false; + + return true; +} #ifdef CONFIG_FAIL_FUTEX extern bool should_fail_futex(bool fshared); diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 554b61d504f7a..782e64117a654 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -#include #include #include @@ -113,15 +112,12 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { + unsigned int flags = futex_to_flags(op); int cmd = op & FUTEX_CMD_MASK; - unsigned int flags = 0; - if (!(op & FUTEX_PRIVATE_FLAG)) - flags |= FLAGS_SHARED; - - if (op & FUTEX_CLOCK_REALTIME) { - flags |= FLAGS_CLOCKRT; - if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI && + if (flags & FLAGS_CLOCKRT) { + if (cmd != FUTEX_WAIT_BITSET && + cmd != FUTEX_WAIT_REQUEUE_PI && cmd != FUTEX_LOCK_PI2) return -ENOSYS; } @@ -229,21 +225,19 @@ static int futex_parse_waitv(struct futex_vector *futexv, unsigned int i; for (i = 0; i < nr_futexes; i++) { + unsigned int flags; + if (copy_from_user(&aux, &uwaitv[i], sizeof(aux))) return -EFAULT; if ((aux.flags & ~FUTEX2_VALID_MASK) || aux.__reserved) return -EINVAL; - if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) { - if ((aux.flags & FUTEX2_SIZE_MASK) == FUTEX2_SIZE_U64) - return -EINVAL; - } - - if ((aux.flags & FUTEX2_SIZE_MASK) != FUTEX2_SIZE_U32) + flags = futex2_to_flags(aux.flags); + if (!futex_flags_valid(flags)) return -EINVAL; - futexv[i].w.flags = aux.flags; + futexv[i].w.flags = flags; futexv[i].w.val = aux.val; futexv[i].w.uaddr = aux.uaddr; futexv[i].q = futex_q_init; diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index ba01b94082033..fa9757766103c 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -419,11 +419,11 @@ static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *wo */ retry: for (i = 0; i < count; i++) { - if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry) + if (!(vs[i].w.flags & FLAGS_SHARED) && retry) continue; ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr), - !(vs[i].w.flags & FUTEX_PRIVATE_FLAG), + vs[i].w.flags & FLAGS_SHARED, &vs[i].q.key, FUTEX_READ); if (unlikely(ret)) From 5f665a943e9e73911739dafe086872bb96de6bf3 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:09 +0200 Subject: [PATCH 04/13] futex: Validate futex value against futex size mainline inclusion from mainline-v6.7-rc1 category: feature Ensure the futex value fits in the given futex size. Since this adds a constraint to an existing syscall, it might possibly change behaviour. Currently the value would be truncated to a u32 and any high bits would get silently lost. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230921105247.828934099@noisy.programming.kicks-ass.net (cherry picked from commit 698eb826383616ce0e817d2384da6413d1439fb6) Signed-off-by: Wentao Guan --- kernel/futex/futex.h | 10 ++++++++++ kernel/futex/syscalls.c | 3 +++ 2 files changed, 13 insertions(+) diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 68fc052dc09be..a3f1fceafcbe6 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -85,6 +85,16 @@ static inline bool futex_flags_valid(unsigned int flags) return true; } +static inline bool futex_validate_input(unsigned int flags, u64 val) +{ + int bits = 8 * futex_size(flags); + + if (bits < 64 && (val >> bits)) + return false; + + return true; +} + #ifdef CONFIG_FAIL_FUTEX extern bool should_fail_futex(bool fshared); #else diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 782e64117a654..53ab08c38dc41 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -237,6 +237,9 @@ static int futex_parse_waitv(struct futex_vector *futexv, if (!futex_flags_valid(flags)) return -EINVAL; + if (!futex_validate_input(flags, aux.val)) + return -EINVAL; + futexv[i].w.flags = flags; futexv[i].w.val = aux.val; futexv[i].w.uaddr = aux.uaddr; From f5ef5f397ae0d82c5017dc45f92429cc5659aca4 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:10 +0200 Subject: [PATCH 05/13] futex: Add sys_futex_wake() mainline inclusion from mainline-v6.7-rc1 category: feature To complement sys_futex_waitv() add sys_futex_wake(). This syscall implements what was previously known as FUTEX_WAKE_BITSET except it uses 'unsigned long' for the bitmask and takes FUTEX2 flags. The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105247.936205525@noisy.programming.kicks-ass.net (cherry picked from commit 9f6c532f59b20580acf8ede9409c9b8dce6e74e1) Signed-off-by: Wentao Guan --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/linux/syscalls.h | 3 +++ include/uapi/asm-generic/unistd.h | 4 ++- kernel/futex/syscalls.c | 30 +++++++++++++++++++++ kernel/sys_ni.c | 1 + 22 files changed, 56 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index ad37569d05072..3b86519d68e43 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -492,3 +492,4 @@ 560 common set_mempolicy_home_node sys_ni_syscall 561 common cachestat sys_cachestat 562 common fchmodat2 sys_fchmodat2 +563 common futex_wake sys_futex_wake diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c572d6c3dee0f..714abeb1e6fa3 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -466,3 +466,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index bd77253b62e01..63a8a9c4abc16 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 453 +#define __NR_compat_syscalls 455 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 545a4a7b5371c..7abf238f1c261 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -911,6 +911,8 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) __SYSCALL(__NR_cachestat, sys_cachestat) #define __NR_fchmodat2 452 __SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 83d8609aec030..cd50247508e62 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -373,3 +373,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 259ceb125367d..21eb35c693e12 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -452,3 +452,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a3798c2637fd1..3a4e8513a8e16 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -458,3 +458,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 4a296124604a1..7000948180a97 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -391,3 +391,4 @@ 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 n32 cachestat sys_cachestat 452 n32 fchmodat2 sys_fchmodat2 +454 n32 futex_wake sys_futex_wake diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index cb5e757f6621c..48bc0fb4e3dc2 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -367,3 +367,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 n64 cachestat sys_cachestat 452 n64 fchmodat2 sys_fchmodat2 +454 n64 futex_wake sys_futex_wake diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 1ee62a861380a..b15ae2ef5816f 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -440,3 +440,4 @@ 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 o32 cachestat sys_cachestat 452 o32 fchmodat2 sys_fchmodat2 +454 o32 futex_wake sys_futex_wake diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 443ce9a053674..7262d18f8c80f 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 40f6751271d3d..a48b0f2edc1f9 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -543,3 +543,4 @@ 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 51cc3616d5f98..367e945d7c212 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -455,3 +455,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake sys_futex_wake diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 7e1ceb2ba5721..a2249793b7be9 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -456,3 +456,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index d0f535230ad8b..7a5ff87b86c59 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -498,3 +498,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 38db5ef2329f3..ea3ce649a9662 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -457,3 +457,4 @@ 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node 451 i386 cachestat sys_cachestat 452 i386 fchmodat2 sys_fchmodat2 +454 i386 futex_wake sys_futex_wake diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 1d6eee30eceb2..d10a6003a7c9f 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -375,6 +375,7 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 453 64 map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index fc1a4f3c81d9b..4e511bfd4b8f5 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -423,3 +423,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 36c592e43d652..dfa2a61b61a58 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -550,6 +550,9 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); + +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 05c412c582399..60470857c20b5 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -822,9 +822,11 @@ __SYSCALL(__NR_cachestat, sys_cachestat) #define __NR_fchmodat2 452 __SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) #undef __NR_syscalls -#define __NR_syscalls 453 +#define __NR_syscalls 455 /* * 32 bit systems traditionally used different diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 53ab08c38dc41..3100005bc5963 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -334,6 +334,36 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, return ret; } +/* + * sys_futex_wake - Wake a number of futexes + * @uaddr: Address of the futex(es) to wake + * @mask: bitmask + * @nr: Number of the futexes to wake + * @flags: FUTEX2 flags + * + * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the + * futex2 family of calls. + */ + +SYSCALL_DEFINE4(futex_wake, + void __user *, uaddr, + unsigned long, mask, + int, nr, + unsigned int, flags) +{ + if (flags & ~FUTEX2_VALID_MASK) + return -EINVAL; + + flags = futex2_to_flags(flags); + if (!futex_flags_valid(flags)) + return -EINVAL; + + if (!futex_validate_input(flags, mask)) + return -EINVAL; + + return futex_wake(uaddr, flags, nr, mask); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e8e1177873770..b8c21a05c51f7 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -87,6 +87,7 @@ COND_SYSCALL_COMPAT(set_robust_list); COND_SYSCALL(get_robust_list); COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_waitv); +COND_SYSCALL(futex_wake); COND_SYSCALL(kexec_load); COND_SYSCALL_COMPAT(kexec_load); COND_SYSCALL(init_module); From e52c3c796a26a91af9af8f8d2d4423d1cd99ab38 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:11 +0200 Subject: [PATCH 06/13] futex: FLAGS_STRICT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.7-rc1 category: feature The current semantics for futex_wake() are a bit loose, specifically asking for 0 futexes to be woken actually gets you 1. Adding a !nr check to sys_futex_wake() makes that it would return 0 for unaligned futex words, because that check comes in the shared futex_wake() function. Adding the !nr check there, would affect the legacy sys_futex() semantics. Hence frob a flag :-( Suggested-by: André Almeida Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230921105248.048643656@noisy.programming.kicks-ass.net (cherry picked from commit 43adf844951084c266f172561f84c5f8120dd60b) Signed-off-by: Wentao Guan --- kernel/futex/futex.h | 21 +++++++++++---------- kernel/futex/syscalls.c | 2 +- kernel/futex/waitwake.c | 3 +++ 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index a3f1fceafcbe6..0e7821a944a27 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -17,25 +17,26 @@ * Futex flags used to encode options to functions and preserve them across * restarts. */ -#define FLAGS_SIZE_8 0x00 -#define FLAGS_SIZE_16 0x01 -#define FLAGS_SIZE_32 0x02 -#define FLAGS_SIZE_64 0x03 +#define FLAGS_SIZE_8 0x0000 +#define FLAGS_SIZE_16 0x0001 +#define FLAGS_SIZE_32 0x0002 +#define FLAGS_SIZE_64 0x0003 -#define FLAGS_SIZE_MASK 0x03 +#define FLAGS_SIZE_MASK 0x0003 #ifdef CONFIG_MMU -# define FLAGS_SHARED 0x10 +# define FLAGS_SHARED 0x0010 #else /* * NOMMU does not have per process address space. Let the compiler optimize * code away. */ -# define FLAGS_SHARED 0x00 +# define FLAGS_SHARED 0x0000 #endif -#define FLAGS_CLOCKRT 0x20 -#define FLAGS_HAS_TIMEOUT 0x40 -#define FLAGS_NUMA 0x80 +#define FLAGS_CLOCKRT 0x0020 +#define FLAGS_HAS_TIMEOUT 0x0040 +#define FLAGS_NUMA 0x0080 +#define FLAGS_STRICT 0x0100 /* FUTEX_ to FLAGS_ */ static inline unsigned int futex_to_flags(unsigned int op) diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 3100005bc5963..6a3f9bd71ca5c 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -361,7 +361,7 @@ SYSCALL_DEFINE4(futex_wake, if (!futex_validate_input(flags, mask)) return -EINVAL; - return futex_wake(uaddr, flags, nr, mask); + return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask); } #ifdef CONFIG_COMPAT diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index fa9757766103c..ceb05b8765974 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -155,6 +155,9 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) if (unlikely(ret != 0)) return ret; + if ((flags & FLAGS_STRICT) && !nr_wake) + return 0; + hb = futex_hash(&key); /* Make sure we really have tasks to wakeup */ From b216e899b7a69eef4e62a52411a45c574c08670e Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:12 +0200 Subject: [PATCH 07/13] futex: Add sys_futex_wait() mainline inclusion from mainline-v6.7-rc1 category: feature To complement sys_futex_waitv()/wake(), add sys_futex_wait(). This syscall implements what was previously known as FUTEX_WAIT_BITSET except it uses 'unsigned long' for the value and bitmask arguments, takes timespec and clockid_t arguments for the absolute timeout and uses FUTEX2 flags. The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105248.164324363@noisy.programming.kicks-ass.net (cherry picked from commit cb8c4312afca1b2dc64107e7e7cea81911055612) Signed-off-by: Wentao Guan --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 + arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/linux/syscalls.h | 4 + include/uapi/asm-generic/unistd.h | 4 +- kernel/futex/futex.h | 3 + kernel/futex/syscalls.c | 120 +++++++++++++++----- kernel/futex/waitwake.c | 61 ++++++---- kernel/sys_ni.c | 1 + 24 files changed, 156 insertions(+), 57 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 3b86519d68e43..c49f12fd264e3 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -493,3 +493,4 @@ 561 common cachestat sys_cachestat 562 common fchmodat2 sys_fchmodat2 563 common futex_wake sys_futex_wake +564 common futex_wait sys_futex_wait diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 714abeb1e6fa3..a6cf562773270 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -467,3 +467,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 63a8a9c4abc16..f33190f17ebb6 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 455 +#define __NR_compat_syscalls 456 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 7abf238f1c261..60d3c6ed13e19 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -913,6 +913,8 @@ __SYSCALL(__NR_cachestat, sys_cachestat) __SYSCALL(__NR_fchmodat2, sys_fchmodat2) #define __NR_futex_wake 454 __SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index cd50247508e62..4043f0c55170e 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -374,3 +374,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 21eb35c693e12..24841674acc53 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 3a4e8513a8e16..f03927ab0220a 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -459,3 +459,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 7000948180a97..498eab2b86a3c 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -392,3 +392,4 @@ 451 n32 cachestat sys_cachestat 452 n32 fchmodat2 sys_fchmodat2 454 n32 futex_wake sys_futex_wake +455 n32 futex_wait sys_futex_wait diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 48bc0fb4e3dc2..faff8dfd29833 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -368,3 +368,4 @@ 451 n64 cachestat sys_cachestat 452 n64 fchmodat2 sys_fchmodat2 454 n64 futex_wake sys_futex_wake +455 n64 futex_wait sys_futex_wait diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index b15ae2ef5816f..caacf64ebb726 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -441,3 +441,4 @@ 451 o32 cachestat sys_cachestat 452 o32 fchmodat2 sys_fchmodat2 454 o32 futex_wake sys_futex_wake +455 o32 futex_wait sys_futex_wait diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 7262d18f8c80f..cced159bdc041 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -452,3 +452,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index a48b0f2edc1f9..799f83da8b150 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -544,3 +544,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 367e945d7c212..e3c0e99e63921 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -456,3 +456,4 @@ 451 common cachestat sys_cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait sys_futex_wait diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index a2249793b7be9..37641fd1d02ca 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -457,3 +457,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 7a5ff87b86c59..134c4656bb276 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -499,3 +499,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ea3ce649a9662..0812b33e03ea1 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -458,3 +458,4 @@ 451 i386 cachestat sys_cachestat 452 i386 fchmodat2 sys_fchmodat2 454 i386 futex_wake sys_futex_wake +455 i386 futex_wait sys_futex_wait diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index d10a6003a7c9f..ddf6288823ad0 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -376,6 +376,7 @@ 452 common fchmodat2 sys_fchmodat2 453 64 map_shadow_stack sys_map_shadow_stack 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 4e511bfd4b8f5..ac278dbce2eee 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -424,3 +424,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index dfa2a61b61a58..0760b0bb1c4ee 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -553,6 +553,10 @@ asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask, + unsigned int flags, struct __kernel_timespec __user *timespec, + clockid_t clockid); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 60470857c20b5..325f513f204c4 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -824,9 +824,11 @@ __SYSCALL(__NR_cachestat, sys_cachestat) __SYSCALL(__NR_fchmodat2, sys_fchmodat2) #define __NR_futex_wake 454 __SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) #undef __NR_syscalls -#define __NR_syscalls 455 +#define __NR_syscalls 456 /* * 32 bit systems traditionally used different diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 0e7821a944a27..e74888a7d71db 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -332,6 +332,9 @@ extern int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi); +extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, + struct hrtimer_sleeper *to, u32 bitset); + extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset); diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 6a3f9bd71ca5c..be83d67c68d74 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -249,6 +249,46 @@ static int futex_parse_waitv(struct futex_vector *futexv, return 0; } +static int futex2_setup_timeout(struct __kernel_timespec __user *timeout, + clockid_t clockid, struct hrtimer_sleeper *to) +{ + int flag_clkid = 0, flag_init = 0; + struct timespec64 ts; + ktime_t time; + int ret; + + if (!timeout) + return 0; + + if (clockid == CLOCK_REALTIME) { + flag_clkid = FLAGS_CLOCKRT; + flag_init = FUTEX_CLOCK_REALTIME; + } + + if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) + return -EINVAL; + + if (get_timespec64(&ts, timeout)) + return -EFAULT; + + /* + * Since there's no opcode for futex_waitv, use + * FUTEX_WAIT_BITSET that uses absolute timeout as well + */ + ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); + if (ret) + return ret; + + futex_setup_timer(&time, to, flag_clkid, 0); + return 0; +} + +static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to) +{ + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); +} + /** * sys_futex_waitv - Wait on a list of futexes * @waiters: List of futexes to wait on @@ -278,8 +318,6 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, { struct hrtimer_sleeper to; struct futex_vector *futexv; - struct timespec64 ts; - ktime_t time; int ret; /* This syscall supports no flags for now */ @@ -289,30 +327,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters) return -EINVAL; - if (timeout) { - int flag_clkid = 0, flag_init = 0; - - if (clockid == CLOCK_REALTIME) { - flag_clkid = FLAGS_CLOCKRT; - flag_init = FUTEX_CLOCK_REALTIME; - } - - if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) - return -EINVAL; - - if (get_timespec64(&ts, timeout)) - return -EFAULT; - - /* - * Since there's no opcode for futex_waitv, use - * FUTEX_WAIT_BITSET that uses absolute timeout as well - */ - ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); - if (ret) - return ret; - - futex_setup_timer(&time, &to, flag_clkid, 0); - } + if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) + return ret; futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); if (!futexv) { @@ -327,10 +343,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, kfree(futexv); destroy_timer: - if (timeout) { - hrtimer_cancel(&to.timer); - destroy_hrtimer_on_stack(&to.timer); - } + if (timeout) + futex2_destroy_timeout(&to); return ret; } @@ -364,6 +378,52 @@ SYSCALL_DEFINE4(futex_wake, return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask); } +/* + * sys_futex_wait - Wait on a futex + * @uaddr: Address of the futex to wait on + * @val: Value of @uaddr + * @mask: bitmask + * @flags: FUTEX2 flags + * @timeout: Optional absolute timeout + * @clockid: Clock to be used for the timeout, realtime or monotonic + * + * Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the + * futex2 familiy of calls. + */ + +SYSCALL_DEFINE6(futex_wait, + void __user *, uaddr, + unsigned long, val, + unsigned long, mask, + unsigned int, flags, + struct __kernel_timespec __user *, timeout, + clockid_t, clockid) +{ + struct hrtimer_sleeper to; + int ret; + + if (flags & ~FUTEX2_VALID_MASK) + return -EINVAL; + + flags = futex2_to_flags(flags); + if (!futex_flags_valid(flags)) + return -EINVAL; + + if (!futex_validate_input(flags, val) || + !futex_validate_input(flags, mask)) + return -EINVAL; + + if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) + return ret; + + ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask); + + if (timeout) + futex2_destroy_timeout(&to); + + return ret; +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index ceb05b8765974..b109a0810a2c6 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -632,20 +632,18 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, return ret; } -int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) +int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, + struct hrtimer_sleeper *to, u32 bitset) { - struct hrtimer_sleeper timeout, *to; - struct restart_block *restart; - struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; + struct futex_hash_bucket *hb; int ret; if (!bitset) return -EINVAL; + q.bitset = bitset; - to = futex_setup_timer(abs_time, &timeout, flags, - current->timer_slack_ns); retry: /* * Prepare to wait on uaddr. On success, it holds hb->lock and q @@ -653,18 +651,17 @@ int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time */ ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) - goto out; + return ret; /* futex_queue and wait for wakeup, timeout, or a signal. */ futex_wait_queue(hb, &q, to); /* If we were woken (and unqueued), we succeeded, whatever. */ - ret = 0; if (!futex_unqueue(&q)) - goto out; - ret = -ETIMEDOUT; + return 0; + if (to && !to->task) - goto out; + return -ETIMEDOUT; /* * We expect signal_pending(current), but we might be the @@ -673,24 +670,38 @@ int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time if (!signal_pending(current)) goto retry; - ret = -ERESTARTSYS; - if (!abs_time) - goto out; + return -ERESTARTSYS; +} - restart = ¤t->restart_block; - restart->futex.uaddr = uaddr; - restart->futex.val = val; - restart->futex.time = *abs_time; - restart->futex.bitset = bitset; - restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; +int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) +{ + struct hrtimer_sleeper timeout, *to; + struct restart_block *restart; + int ret; + + to = futex_setup_timer(abs_time, &timeout, flags, + current->timer_slack_ns); + + ret = __futex_wait(uaddr, flags, val, to, bitset); + + /* No timeout, nothing to clean up. */ + if (!to) + return ret; + + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); - ret = set_restart_fn(restart, futex_wait_restart); + if (ret == -ERESTARTSYS) { + restart = ¤t->restart_block; + restart->futex.uaddr = uaddr; + restart->futex.val = val; + restart->futex.time = *abs_time; + restart->futex.bitset = bitset; + restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; -out: - if (to) { - hrtimer_cancel(&to->timer); - destroy_hrtimer_on_stack(&to->timer); + return set_restart_fn(restart, futex_wait_restart); } + return ret; } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index b8c21a05c51f7..d6abb2b14a2e4 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -88,6 +88,7 @@ COND_SYSCALL(get_robust_list); COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_waitv); COND_SYSCALL(futex_wake); +COND_SYSCALL(futex_wait); COND_SYSCALL(kexec_load); COND_SYSCALL_COMPAT(kexec_load); COND_SYSCALL(init_module); From f7247e8dafd7f9368488cee95405cae4df355f8d Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:13 +0200 Subject: [PATCH 08/13] futex: Propagate flags into get_futex_key() mainline inclusion from mainline-v6.7-rc1 category: feature Instead of only passing FLAGS_SHARED as a boolean, pass down flags as a whole. No functional change intended. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230921105248.282857501@noisy.programming.kicks-ass.net Conflicts: kernel/futex/pi.c (cherry picked from commit 3b63a55f498b763aba0886b244df613587a73c46) Signed-off-by: Wentao Guan --- kernel/futex/core.c | 7 +++++-- kernel/futex/futex.h | 2 +- kernel/futex/pi.c | 4 ++-- kernel/futex/requeue.c | 6 +++--- kernel/futex/waitwake.c | 14 +++++++------- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 79f1b682089d1..c64bd6e7d6165 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -193,7 +193,7 @@ static u64 get_inode_sequence_number(struct inode *inode) /** * get_futex_key() - Get parameters which are the keys for a futex * @uaddr: virtual address of the futex - * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED + * @flags: FLAGS_* * @key: address where result is stored. * @rw: mapping needs to be read/write (values: FUTEX_READ, * FUTEX_WRITE) @@ -217,7 +217,7 @@ static u64 get_inode_sequence_number(struct inode *inode) * * lock_page() might sleep, the caller should not hold a spinlock. */ -int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, +int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, enum futex_access rw) { unsigned long address = (unsigned long)uaddr; @@ -225,6 +225,9 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, struct page *page, *tail; struct address_space *mapping; int err, ro = 0; + bool fshared; + + fshared = flags & FLAGS_SHARED; /* * The futex address must be "naturally" aligned. diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index e74888a7d71db..a8ea5ef524242 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -184,7 +184,7 @@ enum futex_access { FUTEX_WRITE }; -extern int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, +extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, enum futex_access rw); extern struct hrtimer_sleeper * diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index ade6d0fe6e3c9..a309bf88f1e10 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -934,7 +934,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl retry: exiting = NULL; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE); + ret = get_futex_key(uaddr, flags, &q.key, FUTEX_WRITE); if (unlikely(ret != 0)) goto out; @@ -1130,7 +1130,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) if ((uval & FUTEX_TID_MASK) != vpid) return -EPERM; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE); + ret = get_futex_key(uaddr, flags, &key, FUTEX_WRITE); if (ret) return ret; diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c index fc3427e37320c..dcfed5e6012f5 100644 --- a/kernel/futex/requeue.c +++ b/kernel/futex/requeue.c @@ -429,10 +429,10 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, } retry: - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); + ret = get_futex_key(uaddr1, flags, &key1, FUTEX_READ); if (unlikely(ret != 0)) return ret; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, + ret = get_futex_key(uaddr2, flags, &key2, requeue_pi ? FUTEX_WRITE : FUTEX_READ); if (unlikely(ret != 0)) return ret; @@ -796,7 +796,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ rt_mutex_init_waiter(&rt_waiter); - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); + ret = get_futex_key(uaddr2, flags, &key2, FUTEX_WRITE); if (unlikely(ret != 0)) goto out; diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index b109a0810a2c6..37860f794bf74 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -145,13 +145,13 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) struct futex_hash_bucket *hb; struct futex_q *this, *next; union futex_key key = FUTEX_KEY_INIT; - int ret; DEFINE_WAKE_Q(wake_q); + int ret; if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ); + ret = get_futex_key(uaddr, flags, &key, FUTEX_READ); if (unlikely(ret != 0)) return ret; @@ -248,10 +248,10 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, DEFINE_WAKE_Q(wake_q); retry: - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); + ret = get_futex_key(uaddr1, flags, &key1, FUTEX_READ); if (unlikely(ret != 0)) return ret; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); + ret = get_futex_key(uaddr2, flags, &key2, FUTEX_WRITE); if (unlikely(ret != 0)) return ret; @@ -426,7 +426,7 @@ static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *wo continue; ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr), - vs[i].w.flags & FLAGS_SHARED, + vs[i].w.flags, &vs[i].q.key, FUTEX_READ); if (unlikely(ret)) @@ -438,7 +438,7 @@ static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *wo for (i = 0; i < count; i++) { u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr; struct futex_q *q = &vs[i].q; - u32 val = (u32)vs[i].w.val; + u32 val = vs[i].w.val; hb = futex_q_lock(q); ret = futex_get_value_locked(&uval, uaddr); @@ -602,7 +602,7 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, * while the syscall executes. */ retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ); + ret = get_futex_key(uaddr, flags, &q->key, FUTEX_READ); if (unlikely(ret != 0)) return ret; From 0b0e9c4c21679ebed3af49cd47b8986183c4da46 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:14 +0200 Subject: [PATCH 09/13] futex: Add flags2 argument to futex_requeue() mainline inclusion from mainline-v6.7-rc1 category: feature In order to support mixed size requeue, add a second flags argument to the internal futex_requeue() function. No functional change intended. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230921105248.396780136@noisy.programming.kicks-ass.net (cherry picked from commit 27b88f3519e72d71c8cead6b835a26c171109c9b) Signed-off-by: Wentao Guan --- kernel/futex/futex.h | 5 +++-- kernel/futex/requeue.c | 12 +++++++----- kernel/futex/syscalls.c | 6 +++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index a8ea5ef524242..a06030a1a27b9 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -328,8 +328,9 @@ extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset, u32 __user *uaddr2); -extern int futex_requeue(u32 __user *uaddr1, unsigned int flags, - u32 __user *uaddr2, int nr_wake, int nr_requeue, +extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1, + u32 __user *uaddr2, unsigned int flags2, + int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi); extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c index dcfed5e6012f5..a7641ee9d1604 100644 --- a/kernel/futex/requeue.c +++ b/kernel/futex/requeue.c @@ -351,8 +351,9 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, /** * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 * @uaddr1: source futex user address - * @flags: futex flags (FLAGS_SHARED, etc.) + * @flags1: futex flags (FLAGS_SHARED, etc.) * @uaddr2: target futex user address + * @flags2: futex flags (FLAGS_SHARED, etc.) * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) * @nr_requeue: number of waiters to requeue (0-INT_MAX) * @cmpval: @uaddr1 expected value (or %NULL) @@ -366,7 +367,8 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, * - >=0 - on success, the number of tasks requeued or woken; * - <0 - on error */ -int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, +int futex_requeue(u32 __user *uaddr1, unsigned int flags1, + u32 __user *uaddr2, unsigned int flags2, int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; @@ -429,10 +431,10 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, } retry: - ret = get_futex_key(uaddr1, flags, &key1, FUTEX_READ); + ret = get_futex_key(uaddr1, flags1, &key1, FUTEX_READ); if (unlikely(ret != 0)) return ret; - ret = get_futex_key(uaddr2, flags, &key2, + ret = get_futex_key(uaddr2, flags2, &key2, requeue_pi ? FUTEX_WRITE : FUTEX_READ); if (unlikely(ret != 0)) return ret; @@ -464,7 +466,7 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, if (ret) return ret; - if (!(flags & FLAGS_SHARED)) + if (!(flags1 & FLAGS_SHARED)) goto retry_private; goto retry; diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index be83d67c68d74..9528d308278e7 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -134,9 +134,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, case FUTEX_WAKE_BITSET: return futex_wake(uaddr, flags, val, val3); case FUTEX_REQUEUE: - return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0); + return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, NULL, 0); case FUTEX_CMP_REQUEUE: - return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0); + return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 0); case FUTEX_WAKE_OP: return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); case FUTEX_LOCK_PI: @@ -153,7 +153,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, uaddr2); case FUTEX_CMP_REQUEUE_PI: - return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); + return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 1); } return -ENOSYS; } From 9227278dea10d07483296cf09833c4197c29bb63 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:15 +0200 Subject: [PATCH 10/13] futex: Add sys_futex_requeue() mainline inclusion from mainline-v6.7-rc1 category: feature Finish off the 'simple' futex2 syscall group by adding sys_futex_requeue(). Unlike sys_futex_{wait,wake}() its arguments are too numerous to fit into a regular syscall. As such, use struct futex_waitv to pass the 'source' and 'destination' futexes to the syscall. This syscall implements what was previously known as FUTEX_CMP_REQUEUE and uses {val, uaddr, flags} for source and {uaddr, flags} for destination. This design explicitly allows requeueing between different types of futex by having a different flags word per uaddr. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105248.511860556@noisy.programming.kicks-ass.net (cherry picked from commit 0f4b5f972216782a4acb1ae00dcb55173847c2ff) Signed-off-by: Wentao Guan --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/linux/syscalls.h | 3 ++ include/uapi/asm-generic/unistd.h | 4 ++- kernel/futex/syscalls.c | 38 +++++++++++++++++++++ kernel/sys_ni.c | 1 + 22 files changed, 64 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index c49f12fd264e3..b1865f9bb31e2 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -494,3 +494,4 @@ 562 common fchmodat2 sys_fchmodat2 563 common futex_wake sys_futex_wake 564 common futex_wait sys_futex_wait +565 common futex_requeue sys_futex_requeue diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index a6cf562773270..93d0d46cbb15a 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -468,3 +468,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index f33190f17ebb6..531effca5f1fc 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 456 +#define __NR_compat_syscalls 457 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 60d3c6ed13e19..a5b5b0a114005 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -915,6 +915,8 @@ __SYSCALL(__NR_fchmodat2, sys_fchmodat2) __SYSCALL(__NR_futex_wake, sys_futex_wake) #define __NR_futex_wait 455 __SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 4043f0c55170e..81375ea782882 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -375,3 +375,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 24841674acc53..f7f997a88bab5 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -454,3 +454,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index f03927ab0220a..2967ec26b9781 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -460,3 +460,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 498eab2b86a3c..71a4743b7d571 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -393,3 +393,4 @@ 452 n32 fchmodat2 sys_fchmodat2 454 n32 futex_wake sys_futex_wake 455 n32 futex_wait sys_futex_wait +456 n32 futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index faff8dfd29833..c9bd09ba905f1 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -369,3 +369,4 @@ 452 n64 fchmodat2 sys_fchmodat2 454 n64 futex_wake sys_futex_wake 455 n64 futex_wait sys_futex_wait +456 n64 futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index caacf64ebb726..66370d57ab924 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -442,3 +442,4 @@ 452 o32 fchmodat2 sys_fchmodat2 454 o32 futex_wake sys_futex_wake 455 o32 futex_wait sys_futex_wait +456 o32 futex_requeue sys_futex_requeue diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index cced159bdc041..6d701573e90bf 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 799f83da8b150..dea552c1b1b4f 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -545,3 +545,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e3c0e99e63921..d69c1eae27547 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -457,3 +457,4 @@ 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue sys_futex_requeue diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 37641fd1d02ca..6e7ff9f0e77da 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -458,3 +458,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 134c4656bb276..d4b3dccb482f7 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -500,3 +500,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 0812b33e03ea1..222be60d169cc 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -459,3 +459,4 @@ 452 i386 fchmodat2 sys_fchmodat2 454 i386 futex_wake sys_futex_wake 455 i386 futex_wait sys_futex_wait +456 i386 futex_requeue sys_futex_requeue diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index ddf6288823ad0..a577bb27c16d1 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -377,6 +377,7 @@ 453 64 map_shadow_stack sys_map_shadow_stack 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index ac278dbce2eee..dd71ecce8b86e 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -425,3 +425,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0760b0bb1c4ee..3cec081ea0942 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -557,6 +557,9 @@ asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned l unsigned int flags, struct __kernel_timespec __user *timespec, clockid_t clockid); +asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters, + unsigned int flags, int nr_wake, int nr_requeue); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 325f513f204c4..cfdb31fbe732d 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -826,9 +826,11 @@ __SYSCALL(__NR_fchmodat2, sys_fchmodat2) __SYSCALL(__NR_futex_wake, sys_futex_wake) #define __NR_futex_wait 455 __SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) #undef __NR_syscalls -#define __NR_syscalls 456 +#define __NR_syscalls 457 /* * 32 bit systems traditionally used different diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 9528d308278e7..9387185924ac5 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -424,6 +424,44 @@ SYSCALL_DEFINE6(futex_wait, return ret; } +/* + * sys_futex_requeue - Requeue a waiter from one futex to another + * @waiters: array describing the source and destination futex + * @flags: unused + * @nr_wake: number of futexes to wake + * @nr_requeue: number of futexes to requeue + * + * Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the + * futex2 family of calls. + */ + +SYSCALL_DEFINE4(futex_requeue, + struct futex_waitv __user *, waiters, + unsigned int, flags, + int, nr_wake, + int, nr_requeue) +{ + struct futex_vector futexes[2]; + u32 cmpval; + int ret; + + if (flags) + return -EINVAL; + + if (!waiters) + return -EINVAL; + + ret = futex_parse_waitv(futexes, waiters, 2); + if (ret) + return ret; + + cmpval = futexes[0].w.val; + + return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags, + u64_to_user_ptr(futexes[1].w.uaddr), futexes[1].w.flags, + nr_wake, nr_requeue, &cmpval, 0); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index d6abb2b14a2e4..1d6f265896981 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -89,6 +89,7 @@ COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_waitv); COND_SYSCALL(futex_wake); COND_SYSCALL(futex_wait); +COND_SYSCALL(futex_requeue); COND_SYSCALL(kexec_load); COND_SYSCALL_COMPAT(kexec_load); COND_SYSCALL(init_module); From ff05207d540caf7d5c37a83cb0f8ed03d279458c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Mar 2026 13:35:53 +0100 Subject: [PATCH 11/13] futex: Require sys_futex_requeue() to have identical flags stable inclusion from stable-v6.18.21 category: bugfix [ Upstream commit 19f94b39058681dec64a10ebeb6f23fe7fc3f77a ] Nicholas reported that his LLM found it was possible to create a UaF when sys_futex_requeue() is used with different flags. The initial motivation for allowing different flags was the variable sized futex, but since that hasn't been merged (yet), simply mandate the flags are identical, as is the case for the old style sys_futex() requeue operations. Fixes: 0f4b5f972216 ("futex: Add sys_futex_requeue()") Reported-by: Nicholas Carlini Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sasha Levin (cherry picked from commit 18b7d09c2b794c71d4252f3ea2cf84ad12b73d6a) Signed-off-by: Wentao Guan --- kernel/futex/syscalls.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 9387185924ac5..ea424390e7a2c 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -455,6 +455,14 @@ SYSCALL_DEFINE4(futex_requeue, if (ret) return ret; + /* + * For now mandate both flags are identical, like the sys_futex() + * interface has. If/when we merge the variable sized futex support, + * that patch can modify this test to allow a difference in size. + */ + if (futexes[0].w.flags != futexes[1].w.flags) + return -EINVAL; + cmpval = futexes[0].w.val; return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags, From 99fd1589539ed519d6cbd02d3e56271369426ca9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Oct 2023 13:12:54 +0200 Subject: [PATCH 12/13] alpha: Fix up new futex syscall numbers mainline inclusion from mainline-v6.7-rc1 category: bugfix As per Arnd, Alpha syscalls since time64 are offset by 120, retain this offset. Fixes: 9f6c532f59b2 ("futex: Add sys_futex_wake()") Reported-by: Arnd Bergmann Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/cb4bb8e2-7dfe-4ca4-aa70-060f7b2f8f95@app.fastmail.com (cherry picked from commit dcc134510eefaec6dda4fe71ab824f0300ed9f9f) Signed-off-by: Wentao Guan --- arch/alpha/kernel/syscalls/syscall.tbl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index b1865f9bb31e2..b68f1f56b8366 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -492,6 +492,7 @@ 560 common set_mempolicy_home_node sys_ni_syscall 561 common cachestat sys_cachestat 562 common fchmodat2 sys_fchmodat2 -563 common futex_wake sys_futex_wake -564 common futex_wait sys_futex_wait -565 common futex_requeue sys_futex_requeue +# 563 reserved for map_shadow_stack +564 common futex_wake sys_futex_wake +565 common futex_wait sys_futex_wait +566 common futex_requeue sys_futex_requeue From 85259e4b92c7122a9ee2a509740969697cf4ce48 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 14 Nov 2023 21:36:13 +0100 Subject: [PATCH 13/13] futex: Fix hardcoded flags mainline inclusion from mainline-v6.7-rc2 category: bugfix Xi reported that commit 5694289ce183 ("futex: Flag conversion") broke glibc's robust futex tests. This was narrowed down to the change of FLAGS_SHARED from 0x01 to 0x10, at which point Florian noted that handle_futex_death() has a hardcoded flags argument of 1. Change this to: FLAGS_SIZE_32 | FLAGS_SHARED, matching how futex_to_flags() unconditionally sets FLAGS_SIZE_32 for all legacy futex ops. Reported-by: Xi Ruoyao Reported-by: Florian Weimer Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20231114201402.GA25315@noisy.programming.kicks-ass.net Fixes: 5694289ce183 ("futex: Flag conversion") Cc: (cherry picked from commit c9bd1568d5462f4108417518ce1af7b924acfb6f) Signed-off-by: Wentao Guan --- kernel/futex/core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index c64bd6e7d6165..012a9a3b72699 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -710,7 +710,8 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, owner = uval & FUTEX_TID_MASK; if (pending_op && !pi && !owner) { - futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1, + FUTEX_BITSET_MATCH_ANY); return 0; } @@ -762,8 +763,10 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, * Wake robust non-PI futexes here. The wakeup of * PI futexes happens in exit_pi_state(): */ - if (!pi && (uval & FUTEX_WAITERS)) - futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + if (!pi && (uval & FUTEX_WAITERS)) { + futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1, + FUTEX_BITSET_MATCH_ANY); + } return 0; }