[PATCH] uapi: futex: Add a futex syscall

From: Alistair Francis
Date: Thu Oct 14 2021 - 20:59:42 EST


From: Alistair Francis <alistair.francis@xxxxxxx>

This commit adds two futex syscall wrappers that are exposed to
userspace.

Neither the kernel or glibc currently expose a futex wrapper, so
userspace is left performing raw syscalls. This has mostly been becuase
the overloading of one of the arguments makes it impossible to provide a
single type safe function.

Until recently the single syscall has worked fine. With the introduction
of a 64-bit time_t futex call on 32-bit architectures, this has become
more complex. The logic of handling the two possible futex syscalls is
complex and often implemented incorrectly.

This patch adds two futux syscall functions that correctly handle the
time_t complexity for userspace.

This idea is based on previous discussions: https://lkml.org/lkml/2021/9/21/143

Signed-off-by: Alistair Francis <alistair.francis@xxxxxxx>
---
include/uapi/linux/futex_syscall.h | 79 ++++++++++++++++++++++++++++++
1 file changed, 79 insertions(+)
create mode 100644 include/uapi/linux/futex_syscall.h

diff --git a/include/uapi/linux/futex_syscall.h b/include/uapi/linux/futex_syscall.h
new file mode 100644
index 0000000000000..039d371346159
--- /dev/null
+++ b/include/uapi/linux/futex_syscall.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FUTEX_SYSCALL_H
+#define _UAPI_LINUX_FUTEX_SYSCALL_H
+
+#include <errno.h>
+#include <linux/types.h>
+#include <linux/time_types.h>
+
+/**
+ * futex_syscall_timeout() - __NR_futex/__NR_futex_time64 syscall wrapper
+ * @uaddr: address of first futex
+ * @op: futex op code
+ * @val: typically expected value of uaddr, but varies by op
+ * @timeout: an absolute struct timespec
+ * @uaddr2: address of second futex for some ops
+ * @val3: varies by op
+ */
+static inline int
+futex_syscall_timeout(volatile u_int32_t *uaddr, int op, u_int32_t val,
+ struct timespec *timeout, volatile u_int32_t *uaddr2, int val3)
+{
+#if defined(__NR_futex_time64)
+ if (sizeof(*timeout) != sizeof(struct __kernel_old_timespec)) {
+ int ret = syscall(__NR_futex_time64, uaddr, op, val, timeout, uaddr2, val3);
+
+ if (ret == 0 || errno != ENOSYS)
+ return ret;
+ }
+#endif
+
+#if defined(__NR_futex)
+ if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec))
+ return syscall(__NR_futex, uaddr, op, val, timeout, uaddr2, val3);
+
+ if (timeout && timeout->tv_sec == (long)timeout->tv_sec) {
+ struct __kernel_old_timespec ts32;
+
+ ts32.tv_sec = (__kernel_long_t) timeout->tv_sec;
+ ts32.tv_nsec = (__kernel_long_t) timeout->tv_nsec;
+
+ return syscall(__NR_futex, uaddr, op, val, ts32, uaddr2, val3);
+ } else if (!timeout) {
+ return syscall(__NR_futex, uaddr, op, val, NULL, uaddr2, val3);
+ }
+#endif
+
+ errno = ENOSYS;
+ return -1;
+}
+
+/**
+ * futex_syscall_nr_requeue() - __NR_futex/__NR_futex_time64 syscall wrapper
+ * @uaddr: address of first futex
+ * @op: futex op code
+ * @val: typically expected value of uaddr, but varies by op
+ * @nr_requeue: an op specific meaning
+ * @uaddr2: address of second futex for some ops
+ * @val3: varies by op
+ */
+static inline int
+futex_syscall_nr_requeue(volatile u_int32_t *uaddr, int op, u_int32_t val,
+ u_int32_t nr_requeue, volatile u_int32_t *uaddr2, int val3)
+{
+#if defined(__NR_futex_time64)
+ int ret = syscall(__NR_futex_time64, uaddr, op, val, nr_requeue, uaddr2, val3);
+
+ if (ret == 0 || errno != ENOSYS)
+ return ret;
+#endif
+
+#if defined(__NR_futex)
+ return syscall(__NR_futex, uaddr, op, val, nr_requeue, uaddr2, val3);
+#endif
+
+ errno = ENOSYS;
+ return -1;
+}
+
+#endif /* _UAPI_LINUX_FUTEX_SYSCALL_H */
--
2.31.1