[PATCH v6 05/20] vfio: mdev: common lib code for setting up Interrupt Message Store

From: Dave Jiang
Date: Fri May 21 2021 - 20:19:57 EST


Add common helper code to setup IMS once the MSI domain has been
setup by the device driver. The main helper function is
mdev_ims_set_msix_trigger() that is called by the VFIO ioctl
VFIO_DEVICE_SET_IRQS. The function deals with the setup and
teardown of emulated and IMS backed eventfd that gets exported
to the guest kernel via VFIO as MSIX vectors.

Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Signed-off-by: Dave Jiang <dave.jiang@xxxxxxxxx>
---
drivers/vfio/mdev/Kconfig | 12 ++
drivers/vfio/mdev/Makefile | 3
drivers/vfio/mdev/mdev_irqs.c | 318 +++++++++++++++++++++++++++++++++++++++++
include/linux/mdev.h | 51 +++++++
4 files changed, 384 insertions(+)
create mode 100644 drivers/vfio/mdev/mdev_irqs.c

diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig
index 763c877a1318..82f79d99a7db 100644
--- a/drivers/vfio/mdev/Kconfig
+++ b/drivers/vfio/mdev/Kconfig
@@ -9,3 +9,15 @@ config VFIO_MDEV
See Documentation/driver-api/vfio-mediated-device.rst for more details.

If you don't know what do here, say N.
+
+config VFIO_MDEV_IRQS
+ bool "Mediated device driver common lib code for interrupts"
+ depends on VFIO_MDEV
+ select IMS_MSI_ARRAY
+ select IRQ_BYPASS_MANAGER
+ default n
+ help
+ Provide common library code to deal with IMS interrupts for mediated
+ devices.
+
+ If you don't know what to do here, say N.
diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile
index 7c236ba1b90e..c3f160cae192 100644
--- a/drivers/vfio/mdev/Makefile
+++ b/drivers/vfio/mdev/Makefile
@@ -2,4 +2,7 @@

mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o

+mdev-$(CONFIG_VFIO_MDEV_IRQS) += mdev_irqs.o
+
obj-$(CONFIG_VFIO_MDEV) += mdev.o
+
diff --git a/drivers/vfio/mdev/mdev_irqs.c b/drivers/vfio/mdev/mdev_irqs.c
new file mode 100644
index 000000000000..ed2d11a7c729
--- /dev/null
+++ b/drivers/vfio/mdev/mdev_irqs.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Mediate device IMS library code
+ *
+ * Copyright (c) 2021 Intel Corp. All rights reserved.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/irqchip/irq-ims-msi.h>
+#include <linux/eventfd.h>
+#include <linux/irqreturn.h>
+#include <linux/msi.h>
+#include <linux/vfio.h>
+#include <linux/irqbypass.h>
+#include <linux/mdev.h>
+
+static irqreturn_t mdev_irq_handler(int irq, void *arg)
+{
+ struct eventfd_ctx *trigger = arg;
+
+ eventfd_signal(trigger, 1);
+ return IRQ_HANDLED;
+}
+
+/*
+ * Common helper routine to send signal to the eventfd that has been setup.
+ *
+ * @mdev_irq [in] : struct mdev_irq context
+ * @vector [in] : vector index for eventfd
+ *
+ * No return value.
+ */
+void mdev_msix_send_signal(struct mdev_device *mdev, int vector)
+{
+ struct mdev_irq *mdev_irq = &mdev->mdev_irq;
+ struct eventfd_ctx *trigger = mdev_irq->irq_entries[vector].trigger;
+
+ if (!mdev_irq->irq_entries || !trigger) {
+ dev_warn(&mdev->dev, "EventFD %d trigger not setup, can't send!\n", vector);
+ return;
+ }
+ mdev_irq_handler(0, (void *)trigger);
+}
+EXPORT_SYMBOL_GPL(mdev_msix_send_signal);
+
+static int mdev_msix_set_vector_signal(struct mdev_irq *mdev_irq, int vector, int fd)
+{
+ int rc, irq;
+ struct mdev_device *mdev = irq_to_mdev(mdev_irq);
+ struct mdev_irq_entry *entry;
+ struct device *dev = &mdev->dev;
+ struct eventfd_ctx *trigger;
+ char *name;
+ bool pasid_en;
+ u32 auxval;
+
+ if (vector < 0 || vector >= mdev_irq->num)
+ return -EINVAL;
+
+ entry = &mdev_irq->irq_entries[vector];
+
+ if (entry->ims)
+ irq = dev_msi_irq_vector(dev, entry->ims_id);
+ else
+ irq = 0;
+
+ pasid_en = mdev_irq->pasid != INVALID_IOASID ? true : false;
+
+ /* IMS and invalid pasid is not a valid configuration */
+ if (entry->ims && !pasid_en)
+ return -EINVAL;
+
+ if (entry->trigger) {
+ if (irq) {
+ irq_bypass_unregister_producer(&entry->producer);
+ free_irq(irq, entry->trigger);
+ if (pasid_en) {
+ auxval = ims_ctrl_pasid_aux(0, false);
+ irq_set_auxdata(irq, IMS_AUXDATA_CONTROL_WORD, auxval);
+ }
+ }
+ kfree(entry->name);
+ eventfd_ctx_put(entry->trigger);
+ entry->trigger = NULL;
+ }
+
+ if (fd < 0)
+ return 0;
+
+ name = kasprintf(GFP_KERNEL, "vfio-mdev-irq[%d](%s)", vector, dev_name(dev));
+ if (!name)
+ return -ENOMEM;
+
+ trigger = eventfd_ctx_fdget(fd);
+ if (IS_ERR(trigger)) {
+ kfree(name);
+ return PTR_ERR(trigger);
+ }
+
+ entry->name = name;
+ entry->trigger = trigger;
+
+ if (!irq)
+ return 0;
+
+ if (pasid_en) {
+ auxval = ims_ctrl_pasid_aux(mdev_irq->pasid, true);
+ rc = irq_set_auxdata(irq, IMS_AUXDATA_CONTROL_WORD, auxval);
+ if (rc < 0)
+ goto err;
+ }
+
+ rc = request_irq(irq, mdev_irq_handler, 0, name, trigger);
+ if (rc < 0)
+ goto irq_err;
+
+ entry->producer.token = trigger;
+ entry->producer.irq = irq;
+ rc = irq_bypass_register_producer(&entry->producer);
+ if (unlikely(rc)) {
+ dev_warn(dev, "irq bypass producer (token %p) registration fails: %d\n",
+ &entry->producer.token, rc);
+ entry->producer.token = NULL;
+ }
+
+ return 0;
+
+ irq_err:
+ if (pasid_en) {
+ auxval = ims_ctrl_pasid_aux(0, false);
+ irq_set_auxdata(irq, IMS_AUXDATA_CONTROL_WORD, auxval);
+ }
+ err:
+ kfree(name);
+ eventfd_ctx_put(trigger);
+ entry->trigger = NULL;
+ return rc;
+}
+
+static int mdev_msix_set_vector_signals(struct mdev_irq *mdev_irq, unsigned int start,
+ unsigned int count, int *fds)
+{
+ int i, j, rc = 0;
+
+ if (start >= mdev_irq->num || start + count > mdev_irq->num)
+ return -EINVAL;
+
+ for (i = 0, j = start; j < count && !rc; i++, j++) {
+ int fd = fds ? fds[i] : -1;
+
+ rc = mdev_msix_set_vector_signal(mdev_irq, j, fd);
+ }
+
+ if (rc) {
+ for (--j; j >= (int)start; j--)
+ mdev_msix_set_vector_signal(mdev_irq, j, -1);
+ }
+
+ return rc;
+}
+
+static int mdev_msix_enable(struct mdev_irq *mdev_irq, int nvec)
+{
+ struct mdev_device *mdev = irq_to_mdev(mdev_irq);
+ struct device *dev;
+ int rc;
+
+ if (nvec != mdev_irq->num)
+ return -EINVAL;
+
+ if (mdev_irq->ims_num) {
+ dev = &mdev->dev;
+ rc = msi_domain_alloc_irqs(dev_get_msi_domain(dev), dev, mdev_irq->ims_num);
+ if (rc < 0)
+ return rc;
+ }
+
+ mdev_irq->irq_type = VFIO_PCI_MSIX_IRQ_INDEX;
+ return 0;
+}
+
+static int mdev_msix_disable(struct mdev_irq *mdev_irq)
+{
+ struct mdev_device *mdev = irq_to_mdev(mdev_irq);
+ struct device *dev = &mdev->dev;
+ struct irq_domain *irq_domain;
+
+ mdev_msix_set_vector_signals(mdev_irq, 0, mdev_irq->num, NULL);
+ irq_domain = dev_get_msi_domain(&mdev->dev);
+ if (irq_domain)
+ msi_domain_free_irqs(irq_domain, dev);
+ mdev_irq->irq_type = VFIO_PCI_NUM_IRQS;
+ return 0;
+}
+
+/*
+ * Common helper function that sets up the MSIX vectors for the mdev device that are
+ * Interrupt Message Store (IMS) backed. Certain mdev devices can have the first
+ * vector emulated rather than backed by IMS.
+ *
+ * @mdev [in] : mdev device
+ * @index [in] : type of VFIO vectors to setup
+ * @start [in] : start position of the vector index
+ * @count [in] : number of vectors
+ * @flags [in] : VFIO_IRQ action to be taken
+ * @data [in] : data accompanied for the call
+ * Return error code on failure or 0 on success.
+ */
+
+int mdev_set_msix_trigger(struct mdev_device *mdev, unsigned int index,
+ unsigned int start, unsigned int count, u32 flags,
+ void *data)
+{
+ struct mdev_irq *mdev_irq = &mdev->mdev_irq;
+ int i, rc = 0;
+
+ if (count > mdev_irq->num)
+ count = mdev_irq->num;
+
+ if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
+ mdev_msix_disable(mdev_irq);
+ return 0;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ int *fds = data;
+
+ if (mdev_irq->irq_type == index)
+ return mdev_msix_set_vector_signals(mdev_irq, start, count, fds);
+
+ rc = mdev_msix_enable(mdev_irq, start + count);
+ if (rc < 0)
+ return rc;
+
+ rc = mdev_msix_set_vector_signals(mdev_irq, start, count, fds);
+ if (rc < 0)
+ mdev_msix_disable(mdev_irq);
+
+ return rc;
+ }
+
+ if (start + count > mdev_irq->num)
+ return -EINVAL;
+
+ for (i = start; i < start + count; i++) {
+ if (!mdev_irq->irq_entries[i].trigger)
+ continue;
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ eventfd_signal(mdev_irq->irq_entries[i].trigger, 1);
+ } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+ u8 *bools = data;
+
+ if (bools[i - start])
+ eventfd_signal(mdev_irq->irq_entries[i].trigger, 1);
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mdev_set_msix_trigger);
+
+void mdev_irqs_set_pasid(struct mdev_device *mdev, u32 pasid)
+{
+ mdev->mdev_irq.pasid = pasid;
+}
+EXPORT_SYMBOL_GPL(mdev_irqs_set_pasid);
+
+/*
+ * Initialize and setup the mdev_irq context under mdev.
+ *
+ * @mdev [in] : mdev device
+ * @num [in] : number of vectors
+ * @ims_map [in] : bool array that indicates whether a guest MSIX vector is
+ * backed by an IMS vector or emulated
+ * Return error code on failure or 0 on success.
+ */
+int mdev_irqs_init(struct mdev_device *mdev, int num, bool *ims_map)
+{
+ struct mdev_irq *mdev_irq = &mdev->mdev_irq;
+ int i;
+
+ if (num < 1)
+ return -EINVAL;
+
+ mdev_irq->irq_type = VFIO_PCI_NUM_IRQS;
+ mdev_irq->num = num;
+ mdev_irq->pasid = INVALID_IOASID;
+
+ mdev_irq->irq_entries = kcalloc(num, sizeof(*mdev_irq->irq_entries), GFP_KERNEL);
+ if (!mdev_irq->irq_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < num; i++) {
+ mdev_irq->irq_entries[i].ims = ims_map[i];
+ if (ims_map[i]) {
+ mdev_irq->irq_entries[i].ims_id = mdev_irq->ims_num;
+ mdev_irq->ims_num++;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mdev_irqs_init);
+
+/*
+ * Free allocated memory in mdev_irq
+ *
+ * @mdev [in] : mdev device
+ */
+void mdev_irqs_free(struct mdev_device *mdev)
+{
+ kfree(mdev->mdev_irq.irq_entries);
+ memset(&mdev->mdev_irq, 0, sizeof(mdev->mdev_irq));
+}
+EXPORT_SYMBOL_GPL(mdev_irqs_free);
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 0cd8db2d3422..035c021e8068 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -10,8 +10,26 @@
#ifndef MDEV_H
#define MDEV_H

+#include <linux/irqbypass.h>
+
struct mdev_type;

+struct mdev_irq_entry {
+ struct eventfd_ctx *trigger;
+ struct irq_bypass_producer producer;
+ char *name;
+ bool ims;
+ int ims_id;
+};
+
+struct mdev_irq {
+ struct mdev_irq_entry *irq_entries;
+ int num;
+ int ims_num;
+ int irq_type;
+ int pasid;
+};
+
struct mdev_device {
struct device dev;
guid_t uuid;
@@ -19,8 +37,14 @@ struct mdev_device {
struct mdev_type *type;
struct device *iommu_device;
struct mutex creation_lock;
+ struct mdev_irq mdev_irq;
};

+static inline struct mdev_device *irq_to_mdev(struct mdev_irq *mdev_irq)
+{
+ return container_of(mdev_irq, struct mdev_device, mdev_irq);
+}
+
static inline struct mdev_device *to_mdev_device(struct device *dev)
{
return container_of(dev, struct mdev_device, dev);
@@ -99,4 +123,31 @@ static inline struct mdev_device *mdev_from_dev(struct device *dev)
return dev->bus == &mdev_bus_type ? to_mdev_device(dev) : NULL;
}

+#if IS_ENABLED(CONFIG_VFIO_MDEV_IRQS)
+int mdev_set_msix_trigger(struct mdev_device *mdev, unsigned int index,
+ unsigned int start, unsigned int count, u32 flags,
+ void *data);
+void mdev_msix_send_signal(struct mdev_device *mdev, int vector);
+int mdev_irqs_init(struct mdev_device *mdev, int num, bool *ims_map);
+void mdev_irqs_free(struct mdev_device *mdev);
+void mdev_irqs_set_pasid(struct mdev_device *mdev, u32 pasid);
+#else
+static inline int mdev_set_msix_trigger(struct mdev_device *mdev, unsigned int index,
+ unsigned int start, unsigned int count, u32 flags,
+ void *data)
+{
+ return -EOPNOTSUPP;
+}
+
+void mdev_msix_send_signal(struct mdev_device *mdev, int vector) {}
+
+static inline int mdev_irqs_init(struct mdev_device *mdev, int num, bool *ims_map)
+{
+ return -EOPNOTSUPP;
+}
+
+void mdev_irqs_free(struct mdev_device *mdev) {}
+void mdev_irqs_set_pasid(struct mdev_device *mdev, u32 pasid) {}
+#endif /* CONFIG_VFIO_MDEV_IMS */
+
#endif /* MDEV_H */