diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index ad37569d05072..b68f1f56b8366 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -492,3 +492,7 @@ 560 common set_mempolicy_home_node sys_ni_syscall 561 common cachestat sys_cachestat 562 common fchmodat2 sys_fchmodat2 +# 563 reserved for map_shadow_stack +564 common futex_wake sys_futex_wake +565 common futex_wait sys_futex_wait +566 common futex_requeue sys_futex_requeue diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c572d6c3dee0f..93d0d46cbb15a 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -466,3 +466,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index bd77253b62e01..531effca5f1fc 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 453 +#define __NR_compat_syscalls 457 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 545a4a7b5371c..a5b5b0a114005 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -911,6 +911,12 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) __SYSCALL(__NR_cachestat, sys_cachestat) #define __NR_fchmodat2 452 __SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 83d8609aec030..81375ea782882 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -373,3 +373,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 259ceb125367d..f7f997a88bab5 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -452,3 +452,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a3798c2637fd1..2967ec26b9781 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -458,3 +458,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 4a296124604a1..71a4743b7d571 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -391,3 +391,6 @@ 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 n32 cachestat sys_cachestat 452 n32 fchmodat2 sys_fchmodat2 +454 n32 futex_wake sys_futex_wake +455 n32 futex_wait sys_futex_wait +456 n32 futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index cb5e757f6621c..c9bd09ba905f1 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -367,3 +367,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 n64 cachestat sys_cachestat 452 n64 fchmodat2 sys_fchmodat2 +454 n64 futex_wake sys_futex_wake +455 n64 futex_wait sys_futex_wait +456 n64 futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 1ee62a861380a..66370d57ab924 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -440,3 +440,6 @@ 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 o32 cachestat sys_cachestat 452 o32 fchmodat2 sys_fchmodat2 +454 o32 futex_wake sys_futex_wake +455 o32 futex_wait sys_futex_wait +456 o32 futex_requeue sys_futex_requeue diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 443ce9a053674..6d701573e90bf 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -451,3 +451,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 40f6751271d3d..dea552c1b1b4f 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -543,3 +543,6 @@ 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 51cc3616d5f98..d69c1eae27547 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -455,3 +455,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue sys_futex_requeue diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 7e1ceb2ba5721..6e7ff9f0e77da 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -456,3 +456,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index d0f535230ad8b..d4b3dccb482f7 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -498,3 +498,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 38db5ef2329f3..222be60d169cc 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -457,3 +457,6 @@ 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node 451 i386 cachestat sys_cachestat 452 i386 fchmodat2 sys_fchmodat2 +454 i386 futex_wake sys_futex_wake +455 i386 futex_wait sys_futex_wait +456 i386 futex_requeue sys_futex_requeue diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 1d6eee30eceb2..a577bb27c16d1 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -375,6 +375,9 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 453 64 map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index fc1a4f3c81d9b..dd71ecce8b86e 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -423,3 +423,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 36c592e43d652..3cec081ea0942 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -550,6 +550,16 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); + +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); + +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask, + unsigned int flags, struct __kernel_timespec __user *timespec, + clockid_t clockid); + +asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters, + unsigned int flags, int nr_wake, int nr_requeue); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 05c412c582399..cfdb31fbe732d 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -822,9 +822,15 @@ __SYSCALL(__NR_cachestat, sys_cachestat) #define __NR_fchmodat2 452 __SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) #undef __NR_syscalls -#define __NR_syscalls 453 +#define __NR_syscalls 457 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 71a5df8d26898..d2ee625ea1890 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -44,10 +44,35 @@ FUTEX_PRIVATE_FLAG) /* - * Flags to specify the bit length of the futex word for futex2 syscalls. - * Currently, only 32 is supported. + * Flags for futex2 syscalls. + * + * NOTE: these are not pure flags, they can also be seen as: + * + * union { + * u32 flags; + * struct { + * u32 size : 2, + * numa : 1, + * : 4, + * private : 1; + * }; + * }; */ -#define FUTEX_32 2 +#define FUTEX2_SIZE_U8 0x00 +#define FUTEX2_SIZE_U16 0x01 +#define FUTEX2_SIZE_U32 0x02 +#define FUTEX2_SIZE_U64 0x03 +#define FUTEX2_NUMA 0x04 + /* 0x08 */ + /* 0x10 */ + /* 0x20 */ + /* 0x40 */ +#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG + +#define FUTEX2_SIZE_MASK 0x03 + +/* do not use */ +#define FUTEX_32 FUTEX2_SIZE_U32 /* historical accident :-( */ /* * Max numbers of elements in a futex_waitv array diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 79f1b682089d1..012a9a3b72699 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -193,7 +193,7 @@ static u64 get_inode_sequence_number(struct inode *inode) /** * get_futex_key() - Get parameters which are the keys for a futex * @uaddr: virtual address of the futex - * @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED + * @flags: FLAGS_* * @key: address where result is stored. * @rw: mapping needs to be read/write (values: FUTEX_READ, * FUTEX_WRITE) @@ -217,7 +217,7 @@ static u64 get_inode_sequence_number(struct inode *inode) * * lock_page() might sleep, the caller should not hold a spinlock. */ -int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, +int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, enum futex_access rw) { unsigned long address = (unsigned long)uaddr; @@ -225,6 +225,9 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, struct page *page, *tail; struct address_space *mapping; int err, ro = 0; + bool fshared; + + fshared = flags & FLAGS_SHARED; /* * The futex address must be "naturally" aligned. @@ -707,7 +710,8 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, owner = uval & FUTEX_TID_MASK; if (pending_op && !pi && !owner) { - futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1, + FUTEX_BITSET_MATCH_ANY); return 0; } @@ -759,8 +763,10 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, * Wake robust non-PI futexes here. The wakeup of * PI futexes happens in exit_pi_state(): */ - if (!pi && (uval & FUTEX_WAITERS)) - futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + if (!pi && (uval & FUTEX_WAITERS)) { + futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1, + FUTEX_BITSET_MATCH_ANY); + } return 0; } diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index b5379c0e6d6d1..a06030a1a27b9 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -5,6 +5,7 @@ #include #include #include +#include #ifdef CONFIG_PREEMPT_RT #include @@ -16,17 +17,84 @@ * Futex flags used to encode options to functions and preserve them across * restarts. */ +#define FLAGS_SIZE_8 0x0000 +#define FLAGS_SIZE_16 0x0001 +#define FLAGS_SIZE_32 0x0002 +#define FLAGS_SIZE_64 0x0003 + +#define FLAGS_SIZE_MASK 0x0003 + #ifdef CONFIG_MMU -# define FLAGS_SHARED 0x01 +# define FLAGS_SHARED 0x0010 #else /* * NOMMU does not have per process address space. Let the compiler optimize * code away. */ -# define FLAGS_SHARED 0x00 +# define FLAGS_SHARED 0x0000 #endif -#define FLAGS_CLOCKRT 0x02 -#define FLAGS_HAS_TIMEOUT 0x04 +#define FLAGS_CLOCKRT 0x0020 +#define FLAGS_HAS_TIMEOUT 0x0040 +#define FLAGS_NUMA 0x0080 +#define FLAGS_STRICT 0x0100 + +/* FUTEX_ to FLAGS_ */ +static inline unsigned int futex_to_flags(unsigned int op) +{ + unsigned int flags = FLAGS_SIZE_32; + + if (!(op & FUTEX_PRIVATE_FLAG)) + flags |= FLAGS_SHARED; + + if (op & FUTEX_CLOCK_REALTIME) + flags |= FLAGS_CLOCKRT; + + return flags; +} + +/* FUTEX2_ to FLAGS_ */ +static inline unsigned int futex2_to_flags(unsigned int flags2) +{ + unsigned int flags = flags2 & FUTEX2_SIZE_MASK; + + if (!(flags2 & FUTEX2_PRIVATE)) + flags |= FLAGS_SHARED; + + if (flags2 & FUTEX2_NUMA) + flags |= FLAGS_NUMA; + + return flags; +} + +static inline unsigned int futex_size(unsigned int flags) +{ + return 1 << (flags & FLAGS_SIZE_MASK); +} + +static inline bool futex_flags_valid(unsigned int flags) +{ + /* Only 64bit futexes for 64bit code */ + if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) { + if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64) + return false; + } + + /* Only 32bit futexes are implemented -- for now */ + if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32) + return false; + + return true; +} + +static inline bool futex_validate_input(unsigned int flags, u64 val) +{ + int bits = 8 * futex_size(flags); + + if (bits < 64 && (val >> bits)) + return false; + + return true; +} #ifdef CONFIG_FAIL_FUTEX extern bool should_fail_futex(bool fshared); @@ -116,7 +184,7 @@ enum futex_access { FUTEX_WRITE }; -extern int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key, +extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, enum futex_access rw); extern struct hrtimer_sleeper * @@ -260,10 +328,14 @@ extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset, u32 __user *uaddr2); -extern int futex_requeue(u32 __user *uaddr1, unsigned int flags, - u32 __user *uaddr2, int nr_wake, int nr_requeue, +extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1, + u32 __user *uaddr2, unsigned int flags2, + int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi); +extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, + struct hrtimer_sleeper *to, u32 bitset); + extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset); diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index ade6d0fe6e3c9..a309bf88f1e10 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -934,7 +934,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl retry: exiting = NULL; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE); + ret = get_futex_key(uaddr, flags, &q.key, FUTEX_WRITE); if (unlikely(ret != 0)) goto out; @@ -1130,7 +1130,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) if ((uval & FUTEX_TID_MASK) != vpid) return -EPERM; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE); + ret = get_futex_key(uaddr, flags, &key, FUTEX_WRITE); if (ret) return ret; diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c index fc3427e37320c..a7641ee9d1604 100644 --- a/kernel/futex/requeue.c +++ b/kernel/futex/requeue.c @@ -351,8 +351,9 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, /** * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 * @uaddr1: source futex user address - * @flags: futex flags (FLAGS_SHARED, etc.) + * @flags1: futex flags (FLAGS_SHARED, etc.) * @uaddr2: target futex user address + * @flags2: futex flags (FLAGS_SHARED, etc.) * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) * @nr_requeue: number of waiters to requeue (0-INT_MAX) * @cmpval: @uaddr1 expected value (or %NULL) @@ -366,7 +367,8 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, * - >=0 - on success, the number of tasks requeued or woken; * - <0 - on error */ -int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, +int futex_requeue(u32 __user *uaddr1, unsigned int flags1, + u32 __user *uaddr2, unsigned int flags2, int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; @@ -429,10 +431,10 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, } retry: - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); + ret = get_futex_key(uaddr1, flags1, &key1, FUTEX_READ); if (unlikely(ret != 0)) return ret; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, + ret = get_futex_key(uaddr2, flags2, &key2, requeue_pi ? FUTEX_WRITE : FUTEX_READ); if (unlikely(ret != 0)) return ret; @@ -464,7 +466,7 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, if (ret) return ret; - if (!(flags & FLAGS_SHARED)) + if (!(flags1 & FLAGS_SHARED)) goto retry_private; goto retry; @@ -796,7 +798,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ rt_mutex_init_waiter(&rt_waiter); - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); + ret = get_futex_key(uaddr2, flags, &key2, FUTEX_WRITE); if (unlikely(ret != 0)) goto out; diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 48feaa545b3c7..ea424390e7a2c 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -#include #include #include @@ -113,15 +112,12 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { + unsigned int flags = futex_to_flags(op); int cmd = op & FUTEX_CMD_MASK; - unsigned int flags = 0; - if (!(op & FUTEX_PRIVATE_FLAG)) - flags |= FLAGS_SHARED; - - if (op & FUTEX_CLOCK_REALTIME) { - flags |= FLAGS_CLOCKRT; - if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI && + if (flags & FLAGS_CLOCKRT) { + if (cmd != FUTEX_WAIT_BITSET && + cmd != FUTEX_WAIT_REQUEUE_PI && cmd != FUTEX_LOCK_PI2) return -ENOSYS; } @@ -138,9 +134,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, case FUTEX_WAKE_BITSET: return futex_wake(uaddr, flags, val, val3); case FUTEX_REQUEUE: - return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0); + return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, NULL, 0); case FUTEX_CMP_REQUEUE: - return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0); + return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 0); case FUTEX_WAKE_OP: return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); case FUTEX_LOCK_PI: @@ -157,7 +153,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, uaddr2); case FUTEX_CMP_REQUEUE_PI: - return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); + return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 1); } return -ENOSYS; } @@ -211,8 +207,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } -/* Mask of available flags for each futex in futex_waitv list */ -#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG) +#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE) /** * futex_parse_waitv - Parse a waitv array from userspace @@ -230,16 +225,22 @@ static int futex_parse_waitv(struct futex_vector *futexv, unsigned int i; for (i = 0; i < nr_futexes; i++) { + unsigned int flags; + if (copy_from_user(&aux, &uwaitv[i], sizeof(aux))) return -EFAULT; - if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved) + if ((aux.flags & ~FUTEX2_VALID_MASK) || aux.__reserved) return -EINVAL; - if (!(aux.flags & FUTEX_32)) + flags = futex2_to_flags(aux.flags); + if (!futex_flags_valid(flags)) return -EINVAL; - futexv[i].w.flags = aux.flags; + if (!futex_validate_input(flags, aux.val)) + return -EINVAL; + + futexv[i].w.flags = flags; futexv[i].w.val = aux.val; futexv[i].w.uaddr = aux.uaddr; futexv[i].q = futex_q_init; @@ -248,6 +249,46 @@ static int futex_parse_waitv(struct futex_vector *futexv, return 0; } +static int futex2_setup_timeout(struct __kernel_timespec __user *timeout, + clockid_t clockid, struct hrtimer_sleeper *to) +{ + int flag_clkid = 0, flag_init = 0; + struct timespec64 ts; + ktime_t time; + int ret; + + if (!timeout) + return 0; + + if (clockid == CLOCK_REALTIME) { + flag_clkid = FLAGS_CLOCKRT; + flag_init = FUTEX_CLOCK_REALTIME; + } + + if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) + return -EINVAL; + + if (get_timespec64(&ts, timeout)) + return -EFAULT; + + /* + * Since there's no opcode for futex_waitv, use + * FUTEX_WAIT_BITSET that uses absolute timeout as well + */ + ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); + if (ret) + return ret; + + futex_setup_timer(&time, to, flag_clkid, 0); + return 0; +} + +static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to) +{ + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); +} + /** * sys_futex_waitv - Wait on a list of futexes * @waiters: List of futexes to wait on @@ -277,8 +318,6 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, { struct hrtimer_sleeper to; struct futex_vector *futexv; - struct timespec64 ts; - ktime_t time; int ret; /* This syscall supports no flags for now */ @@ -288,30 +327,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters) return -EINVAL; - if (timeout) { - int flag_clkid = 0, flag_init = 0; - - if (clockid == CLOCK_REALTIME) { - flag_clkid = FLAGS_CLOCKRT; - flag_init = FUTEX_CLOCK_REALTIME; - } - - if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) - return -EINVAL; - - if (get_timespec64(&ts, timeout)) - return -EFAULT; - - /* - * Since there's no opcode for futex_waitv, use - * FUTEX_WAIT_BITSET that uses absolute timeout as well - */ - ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); - if (ret) - return ret; - - futex_setup_timer(&time, &to, flag_clkid, 0); - } + if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) + return ret; futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); if (!futexv) { @@ -326,13 +343,133 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, kfree(futexv); destroy_timer: - if (timeout) { - hrtimer_cancel(&to.timer); - destroy_hrtimer_on_stack(&to.timer); - } + if (timeout) + futex2_destroy_timeout(&to); return ret; } +/* + * sys_futex_wake - Wake a number of futexes + * @uaddr: Address of the futex(es) to wake + * @mask: bitmask + * @nr: Number of the futexes to wake + * @flags: FUTEX2 flags + * + * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the + * futex2 family of calls. + */ + +SYSCALL_DEFINE4(futex_wake, + void __user *, uaddr, + unsigned long, mask, + int, nr, + unsigned int, flags) +{ + if (flags & ~FUTEX2_VALID_MASK) + return -EINVAL; + + flags = futex2_to_flags(flags); + if (!futex_flags_valid(flags)) + return -EINVAL; + + if (!futex_validate_input(flags, mask)) + return -EINVAL; + + return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask); +} + +/* + * sys_futex_wait - Wait on a futex + * @uaddr: Address of the futex to wait on + * @val: Value of @uaddr + * @mask: bitmask + * @flags: FUTEX2 flags + * @timeout: Optional absolute timeout + * @clockid: Clock to be used for the timeout, realtime or monotonic + * + * Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the + * futex2 familiy of calls. + */ + +SYSCALL_DEFINE6(futex_wait, + void __user *, uaddr, + unsigned long, val, + unsigned long, mask, + unsigned int, flags, + struct __kernel_timespec __user *, timeout, + clockid_t, clockid) +{ + struct hrtimer_sleeper to; + int ret; + + if (flags & ~FUTEX2_VALID_MASK) + return -EINVAL; + + flags = futex2_to_flags(flags); + if (!futex_flags_valid(flags)) + return -EINVAL; + + if (!futex_validate_input(flags, val) || + !futex_validate_input(flags, mask)) + return -EINVAL; + + if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) + return ret; + + ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask); + + if (timeout) + futex2_destroy_timeout(&to); + + return ret; +} + +/* + * sys_futex_requeue - Requeue a waiter from one futex to another + * @waiters: array describing the source and destination futex + * @flags: unused + * @nr_wake: number of futexes to wake + * @nr_requeue: number of futexes to requeue + * + * Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the + * futex2 family of calls. + */ + +SYSCALL_DEFINE4(futex_requeue, + struct futex_waitv __user *, waiters, + unsigned int, flags, + int, nr_wake, + int, nr_requeue) +{ + struct futex_vector futexes[2]; + u32 cmpval; + int ret; + + if (flags) + return -EINVAL; + + if (!waiters) + return -EINVAL; + + ret = futex_parse_waitv(futexes, waiters, 2); + if (ret) + return ret; + + /* + * For now mandate both flags are identical, like the sys_futex() + * interface has. If/when we merge the variable sized futex support, + * that patch can modify this test to allow a difference in size. + */ + if (futexes[0].w.flags != futexes[1].w.flags) + return -EINVAL; + + cmpval = futexes[0].w.val; + + return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags, + u64_to_user_ptr(futexes[1].w.uaddr), futexes[1].w.flags, + nr_wake, nr_requeue, &cmpval, 0); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index ba01b94082033..37860f794bf74 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -145,16 +145,19 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) struct futex_hash_bucket *hb; struct futex_q *this, *next; union futex_key key = FUTEX_KEY_INIT; - int ret; DEFINE_WAKE_Q(wake_q); + int ret; if (!bitset) return -EINVAL; - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ); + ret = get_futex_key(uaddr, flags, &key, FUTEX_READ); if (unlikely(ret != 0)) return ret; + if ((flags & FLAGS_STRICT) && !nr_wake) + return 0; + hb = futex_hash(&key); /* Make sure we really have tasks to wakeup */ @@ -245,10 +248,10 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, DEFINE_WAKE_Q(wake_q); retry: - ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ); + ret = get_futex_key(uaddr1, flags, &key1, FUTEX_READ); if (unlikely(ret != 0)) return ret; - ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE); + ret = get_futex_key(uaddr2, flags, &key2, FUTEX_WRITE); if (unlikely(ret != 0)) return ret; @@ -419,11 +422,11 @@ static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *wo */ retry: for (i = 0; i < count; i++) { - if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry) + if (!(vs[i].w.flags & FLAGS_SHARED) && retry) continue; ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr), - !(vs[i].w.flags & FUTEX_PRIVATE_FLAG), + vs[i].w.flags, &vs[i].q.key, FUTEX_READ); if (unlikely(ret)) @@ -435,7 +438,7 @@ static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *wo for (i = 0; i < count; i++) { u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr; struct futex_q *q = &vs[i].q; - u32 val = (u32)vs[i].w.val; + u32 val = vs[i].w.val; hb = futex_q_lock(q); ret = futex_get_value_locked(&uval, uaddr); @@ -599,7 +602,7 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, * while the syscall executes. */ retry: - ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ); + ret = get_futex_key(uaddr, flags, &q->key, FUTEX_READ); if (unlikely(ret != 0)) return ret; @@ -629,20 +632,18 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, return ret; } -int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) +int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, + struct hrtimer_sleeper *to, u32 bitset) { - struct hrtimer_sleeper timeout, *to; - struct restart_block *restart; - struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; + struct futex_hash_bucket *hb; int ret; if (!bitset) return -EINVAL; + q.bitset = bitset; - to = futex_setup_timer(abs_time, &timeout, flags, - current->timer_slack_ns); retry: /* * Prepare to wait on uaddr. On success, it holds hb->lock and q @@ -650,18 +651,17 @@ int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time */ ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) - goto out; + return ret; /* futex_queue and wait for wakeup, timeout, or a signal. */ futex_wait_queue(hb, &q, to); /* If we were woken (and unqueued), we succeeded, whatever. */ - ret = 0; if (!futex_unqueue(&q)) - goto out; - ret = -ETIMEDOUT; + return 0; + if (to && !to->task) - goto out; + return -ETIMEDOUT; /* * We expect signal_pending(current), but we might be the @@ -670,24 +670,38 @@ int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time if (!signal_pending(current)) goto retry; - ret = -ERESTARTSYS; - if (!abs_time) - goto out; + return -ERESTARTSYS; +} - restart = ¤t->restart_block; - restart->futex.uaddr = uaddr; - restart->futex.val = val; - restart->futex.time = *abs_time; - restart->futex.bitset = bitset; - restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; +int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) +{ + struct hrtimer_sleeper timeout, *to; + struct restart_block *restart; + int ret; - ret = set_restart_fn(restart, futex_wait_restart); + to = futex_setup_timer(abs_time, &timeout, flags, + current->timer_slack_ns); -out: - if (to) { - hrtimer_cancel(&to->timer); - destroy_hrtimer_on_stack(&to->timer); + ret = __futex_wait(uaddr, flags, val, to, bitset); + + /* No timeout, nothing to clean up. */ + if (!to) + return ret; + + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + + if (ret == -ERESTARTSYS) { + restart = ¤t->restart_block; + restart->futex.uaddr = uaddr; + restart->futex.val = val; + restart->futex.time = *abs_time; + restart->futex.bitset = bitset; + restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; + + return set_restart_fn(restart, futex_wait_restart); } + return ret; } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e8e1177873770..1d6f265896981 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -87,6 +87,9 @@ COND_SYSCALL_COMPAT(set_robust_list); COND_SYSCALL(get_robust_list); COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_waitv); +COND_SYSCALL(futex_wake); +COND_SYSCALL(futex_wait); +COND_SYSCALL(futex_requeue); COND_SYSCALL(kexec_load); COND_SYSCALL_COMPAT(kexec_load); COND_SYSCALL(init_module);