[RFC PATCH 09/13] x86/uintr: Introduce vector registration and uintr_fd syscall
From: Sohil Mehta
Date: Mon Sep 13 2021 - 16:05:20 EST
Each receiving task has its own interrupt vector space of 64 vectors.
For each vector registered by a task create a uintr_fd. Only tasks that
have previously registered a user interrupt handler can register a
vector.
The sender for the user interrupt could be another userspace
application, kernel or an external source (like a device). Any sender
that wants to generate a user interrupt needs access to receiver's
vector number and UPID. uintr_fd abstracts that information and allows
a sender with access to uintr_fd to connect and generate a user
interrupt. Upon interrupt delivery, the interrupt handler would be
invoked with the associated vector number pushed onto the stack.
Using an FD abstraction automatically provides a secure mechanism to
connect with a receiver. It also makes the tracking and management of
the interrupt vector resource easier for userspace.
uintr_fd can be useful in some of the usages where eventfd is used for
userspace event notifications. Though uintr_fd is nowhere close to a
drop-in replacement, the semantics are meant to be somewhat similar to
an eventfd or the write end of a pipe.
Access to uintr_fd can be achieved in the following ways:
- direct access if the task is part of the same thread group (process)
- inherited by a child process.
- explicitly shared using any of the FD sharing mechanisms.
If the sender is another userspace task, it can use the uintr_fd to send
user IPIs to the receiver. This works in conjunction with the SENDUIPI
instruction. The details related to this are covered later.
The exact APIs for the sender being a kernel or another external source
are still being worked upon. The general idea is that the receiver would
pass the uintr_fd to the kernel by extending some existing API (like
io_uring).
The vector associated with uintr_fd can be unregistered by closing all
references to the uintr_fd.
Signed-off-by: Sohil Mehta <sohil.mehta@xxxxxxxxx>
---
arch/x86/include/asm/uintr.h | 14 ++++
arch/x86/kernel/uintr_core.c | 129 +++++++++++++++++++++++++++++++++--
arch/x86/kernel/uintr_fd.c | 94 +++++++++++++++++++++++++
3 files changed, 232 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/uintr.h b/arch/x86/include/asm/uintr.h
index cef4dd81d40e..1f00e2a63da4 100644
--- a/arch/x86/include/asm/uintr.h
+++ b/arch/x86/include/asm/uintr.h
@@ -4,9 +4,23 @@
#ifdef CONFIG_X86_USER_INTERRUPTS
+struct uintr_upid_ctx {
+ struct task_struct *task; /* Receiver task */
+ struct uintr_upid *upid;
+ refcount_t refs;
+};
+
+struct uintr_receiver_info {
+ struct uintr_upid_ctx *upid_ctx; /* UPID context */
+ struct callback_head twork; /* Task work head */
+ u64 uvec; /* Vector number */
+};
+
bool uintr_arch_enabled(void);
int do_uintr_register_handler(u64 handler);
int do_uintr_unregister_handler(void);
+int do_uintr_register_vector(struct uintr_receiver_info *r_info);
+void do_uintr_unregister_vector(struct uintr_receiver_info *r_info);
void uintr_free(struct task_struct *task);
diff --git a/arch/x86/kernel/uintr_core.c b/arch/x86/kernel/uintr_core.c
index a2a13f890139..9dcb9f60e5bc 100644
--- a/arch/x86/kernel/uintr_core.c
+++ b/arch/x86/kernel/uintr_core.c
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/slab.h>
+#include <linux/task_work.h>
#include <linux/uaccess.h>
#include <asm/apic.h>
@@ -20,6 +21,8 @@
#include <asm/msr-index.h>
#include <asm/uintr.h>
+#define UINTR_MAX_UVEC_NR 64
+
/* User Posted Interrupt Descriptor (UPID) */
struct uintr_upid {
struct {
@@ -36,13 +39,9 @@ struct uintr_upid {
#define UPID_ON 0x0 /* Outstanding notification */
#define UPID_SN 0x1 /* Suppressed notification */
-struct uintr_upid_ctx {
- struct uintr_upid *upid;
- refcount_t refs;
-};
-
struct uintr_receiver {
struct uintr_upid_ctx *upid_ctx;
+ u64 uvec_mask; /* track active vector per bit */
};
inline bool uintr_arch_enabled(void)
@@ -69,6 +68,7 @@ static inline u32 cpu_to_ndst(int cpu)
static void free_upid(struct uintr_upid_ctx *upid_ctx)
{
+ put_task_struct(upid_ctx->task);
kfree(upid_ctx->upid);
upid_ctx->upid = NULL;
kfree(upid_ctx);
@@ -93,6 +93,7 @@ static struct uintr_upid_ctx *alloc_upid(void)
upid_ctx->upid = upid;
refcount_set(&upid_ctx->refs, 1);
+ upid_ctx->task = get_task_struct(current);
return upid_ctx;
}
@@ -103,6 +104,77 @@ static void put_upid_ref(struct uintr_upid_ctx *upid_ctx)
free_upid(upid_ctx);
}
+static struct uintr_upid_ctx *get_upid_ref(struct uintr_upid_ctx *upid_ctx)
+{
+ refcount_inc(&upid_ctx->refs);
+ return upid_ctx;
+}
+
+static void __clear_vector_from_upid(u64 uvec, struct uintr_upid *upid)
+{
+ clear_bit(uvec, (unsigned long *)&upid->puir);
+}
+
+static void __clear_vector_from_task(u64 uvec)
+{
+ struct task_struct *t = current;
+
+ pr_debug("recv: task=%d free vector %llu\n", t->pid, uvec);
+
+ if (!(BIT_ULL(uvec) & t->thread.ui_recv->uvec_mask))
+ return;
+
+ clear_bit(uvec, (unsigned long *)&t->thread.ui_recv->uvec_mask);
+
+ if (!t->thread.ui_recv->uvec_mask)
+ pr_debug("recv: task=%d unregistered all user vectors\n", t->pid);
+}
+
+/* Callback to clear the vector structures when a vector is unregistered. */
+static void receiver_clear_uvec(struct callback_head *head)
+{
+ struct uintr_receiver_info *r_info;
+ struct uintr_upid_ctx *upid_ctx;
+ struct task_struct *t = current;
+ u64 uvec;
+
+ r_info = container_of(head, struct uintr_receiver_info, twork);
+ uvec = r_info->uvec;
+ upid_ctx = r_info->upid_ctx;
+
+ /*
+ * If a task has unregistered the interrupt handler the vector
+ * structures would have already been cleared.
+ */
+ if (is_uintr_receiver(t)) {
+ /*
+ * The UPID context in the callback might differ from the one
+ * on the task if the task unregistered its interrupt handler
+ * and then registered itself again. The vector structures
+ * related to the previous UPID would have already been cleared
+ * in that case.
+ */
+ if (t->thread.ui_recv->upid_ctx != upid_ctx) {
+ pr_debug("recv: task %d is now using a different UPID\n",
+ t->pid);
+ goto out_free;
+ }
+
+ /*
+ * If the vector has been recognized in the UIRR don't modify
+ * it. We need to disable User Interrupts before modifying the
+ * UIRR. It might be better to just let that interrupt get
+ * delivered.
+ */
+ __clear_vector_from_upid(uvec, upid_ctx->upid);
+ __clear_vector_from_task(uvec);
+ }
+
+out_free:
+ put_upid_ref(upid_ctx);
+ kfree(r_info);
+}
+
int do_uintr_unregister_handler(void)
{
struct task_struct *t = current;
@@ -239,6 +311,53 @@ int do_uintr_register_handler(u64 handler)
return 0;
}
+void do_uintr_unregister_vector(struct uintr_receiver_info *r_info)
+{
+ int ret;
+
+ pr_debug("recv: Adding task work to clear vector %llu added for task=%d\n",
+ r_info->uvec, r_info->upid_ctx->task->pid);
+
+ init_task_work(&r_info->twork, receiver_clear_uvec);
+ ret = task_work_add(r_info->upid_ctx->task, &r_info->twork, true);
+ if (ret) {
+ pr_debug("recv: Clear vector task=%d has already exited\n",
+ r_info->upid_ctx->task->pid);
+ put_upid_ref(r_info->upid_ctx);
+ kfree(r_info);
+ return;
+ }
+}
+
+int do_uintr_register_vector(struct uintr_receiver_info *r_info)
+{
+ struct uintr_receiver *ui_recv;
+ struct task_struct *t = current;
+
+ /*
+ * A vector should only be registered by a task that
+ * has an interrupt handler registered.
+ */
+ if (!is_uintr_receiver(t))
+ return -EINVAL;
+
+ if (r_info->uvec >= UINTR_MAX_UVEC_NR)
+ return -ENOSPC;
+
+ ui_recv = t->thread.ui_recv;
+
+ if (ui_recv->uvec_mask & BIT_ULL(r_info->uvec))
+ return -EBUSY;
+
+ ui_recv->uvec_mask |= BIT_ULL(r_info->uvec);
+ pr_debug("recv: task %d new uvec=%llu, new mask %llx\n",
+ t->pid, r_info->uvec, ui_recv->uvec_mask);
+
+ r_info->upid_ctx = get_upid_ref(ui_recv->upid_ctx);
+
+ return 0;
+}
+
/* Suppress notifications since this task is being context switched out */
void switch_uintr_prepare(struct task_struct *prev)
{
diff --git a/arch/x86/kernel/uintr_fd.c b/arch/x86/kernel/uintr_fd.c
index a1a9c105fdab..f0548bbac776 100644
--- a/arch/x86/kernel/uintr_fd.c
+++ b/arch/x86/kernel/uintr_fd.c
@@ -6,11 +6,105 @@
*/
#define pr_fmt(fmt) "uintr: " fmt
+#include <linux/anon_inodes.h>
+#include <linux/fdtable.h>
#include <linux/sched.h>
#include <linux/syscalls.h>
#include <asm/uintr.h>
+struct uintrfd_ctx {
+ struct uintr_receiver_info *r_info;
+};
+
+#ifdef CONFIG_PROC_FS
+static void uintrfd_show_fdinfo(struct seq_file *m, struct file *file)
+{
+ struct uintrfd_ctx *uintrfd_ctx = file->private_data;
+
+ /* Check: Should we print the receiver and sender info here? */
+ seq_printf(m, "user_vector:%llu\n", uintrfd_ctx->r_info->uvec);
+}
+#endif
+
+static int uintrfd_release(struct inode *inode, struct file *file)
+{
+ struct uintrfd_ctx *uintrfd_ctx = file->private_data;
+
+ pr_debug("recv: Release uintrfd for r_task %d uvec %llu\n",
+ uintrfd_ctx->r_info->upid_ctx->task->pid,
+ uintrfd_ctx->r_info->uvec);
+
+ do_uintr_unregister_vector(uintrfd_ctx->r_info);
+ kfree(uintrfd_ctx);
+
+ return 0;
+}
+
+static const struct file_operations uintrfd_fops = {
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = uintrfd_show_fdinfo,
+#endif
+ .release = uintrfd_release,
+ .llseek = noop_llseek,
+};
+
+/*
+ * sys_uintr_create_fd - Create a uintr_fd for the registered interrupt vector.
+ */
+SYSCALL_DEFINE2(uintr_create_fd, u64, vector, unsigned int, flags)
+{
+ struct uintrfd_ctx *uintrfd_ctx;
+ int uintrfd;
+ int ret;
+
+ if (!uintr_arch_enabled())
+ return -EOPNOTSUPP;
+
+ if (flags)
+ return -EINVAL;
+
+ uintrfd_ctx = kzalloc(sizeof(*uintrfd_ctx), GFP_KERNEL);
+ if (!uintrfd_ctx)
+ return -ENOMEM;
+
+ uintrfd_ctx->r_info = kzalloc(sizeof(*uintrfd_ctx->r_info), GFP_KERNEL);
+ if (!uintrfd_ctx->r_info) {
+ ret = -ENOMEM;
+ goto out_free_ctx;
+ }
+
+ uintrfd_ctx->r_info->uvec = vector;
+ ret = do_uintr_register_vector(uintrfd_ctx->r_info);
+ if (ret) {
+ kfree(uintrfd_ctx->r_info);
+ goto out_free_ctx;
+ }
+
+ /* TODO: Get user input for flags - UFD_CLOEXEC */
+ /* Check: Do we need O_NONBLOCK? */
+ uintrfd = anon_inode_getfd("[uintrfd]", &uintrfd_fops, uintrfd_ctx,
+ O_RDONLY | O_CLOEXEC | O_NONBLOCK);
+
+ if (uintrfd < 0) {
+ ret = uintrfd;
+ goto out_free_uvec;
+ }
+
+ pr_debug("recv: Alloc vector success uintrfd %d uvec %llu for task=%d\n",
+ uintrfd, uintrfd_ctx->r_info->uvec, current->pid);
+
+ return uintrfd;
+
+out_free_uvec:
+ do_uintr_unregister_vector(uintrfd_ctx->r_info);
+out_free_ctx:
+ kfree(uintrfd_ctx);
+ pr_debug("recv: Alloc vector failed for task=%d ret %d\n",
+ current->pid, ret);
+ return ret;
+}
+
/*
* sys_uintr_register_handler - setup user interrupt handler for receiver.
*/
--
2.33.0