[PATCH v2 2/3] uio_pci_generic: add MSI/MSI-X support

From: Vlad Zolotarov
Date: Sun Oct 04 2015 - 13:36:36 EST


Add support for MSI and MSI-X interrupt modes:
- Interrupt mode selection order is:
INT#X (for backward compatibility) -> MSI-X -> MSI.
- Add ioctl() commands:
- UIO_PCI_GENERIC_INT_MODE_GET: query the current interrupt mode.
- UIO_PCI_GENERIC_IRQ_NUM_GET: query the maximum number of IRQs.
- UIO_PCI_GENERIC_IRQ_SET: bind the IRQ to eventfd (similar to vfio).
- Add mappings to all bars (memory and portio): some devices have
registers related to MSI/MSI-X handling outside BAR0.

Signed-off-by: Vlad Zolotarov <vladz@xxxxxxxxxxxxxxxxxxxx>
---
New in v2:
- Added #include <linux/uaccess.h> to uio_pci_generic.c
---
drivers/uio/uio_pci_generic.c | 410 +++++++++++++++++++++++++++++++++++++---
include/linux/uio_pci_generic.h | 36 ++++
2 files changed, 423 insertions(+), 23 deletions(-)
create mode 100644 include/linux/uio_pci_generic.h

diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
index d0b508b..90e7b0d 100644
--- a/drivers/uio/uio_pci_generic.c
+++ b/drivers/uio/uio_pci_generic.c
@@ -22,16 +22,32 @@
#include <linux/device.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/msi.h>
#include <linux/slab.h>
#include <linux/uio_driver.h>
+#include <linux/uio_pci_generic.h>
+#include <linux/eventfd.h>
+#include <linux/uaccess.h>

#define DRIVER_VERSION "0.01.0"
#define DRIVER_AUTHOR "Michael S. Tsirkin <mst@xxxxxxxxxx>"
#define DRIVER_DESC "Generic UIO driver for PCI 2.3 devices"

+struct msix_info {
+ int num_irqs;
+ struct msix_entry *table;
+ struct uio_msix_irq_ctx {
+ struct eventfd_ctx *trigger; /* MSI-x vector to eventfd */
+ char *name; /* name in /proc/interrupts */
+ } *ctx;
+};
+
struct uio_pci_generic_dev {
struct uio_info info;
struct pci_dev *pdev;
+ struct mutex msix_state_lock; /* ioctl mutex */
+ enum uio_int_mode int_mode;
+ struct msix_info msix;
};

static inline struct uio_pci_generic_dev *
@@ -40,9 +56,177 @@ to_uio_pci_generic_dev(struct uio_info *info)
return container_of(info, struct uio_pci_generic_dev, info);
}

-/* Interrupt handler. Read/modify/write the command register to disable
- * the interrupt. */
-static irqreturn_t irqhandler(int irq, struct uio_info *info)
+/* Unmap previously ioremap'd resources */
+static void release_iomaps(struct uio_pci_generic_dev *gdev)
+{
+ int i;
+ struct uio_mem *mem = gdev->info.mem;
+
+ for (i = 0; i < MAX_UIO_MAPS; i++, mem++) {
+ if (mem->internal_addr) {
+ iounmap(mem->internal_addr);
+ mem->internal_addr = NULL;
+ }
+ }
+}
+
+static int setup_maps(struct pci_dev *pdev, struct uio_info *info)
+{
+ int i, m = 0, p = 0, err;
+ static const char * const bar_names[] = {
+ "BAR0", "BAR1", "BAR2", "BAR3", "BAR4", "BAR5",
+ };
+
+ for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
+ unsigned long start = pci_resource_start(pdev, i);
+ unsigned long flags = pci_resource_flags(pdev, i);
+ unsigned long len = pci_resource_len(pdev, i);
+
+ if (start == 0 || len == 0)
+ continue;
+
+ if (flags & IORESOURCE_MEM) {
+ void *addr;
+
+ if (m >= MAX_UIO_MAPS)
+ continue;
+
+ addr = ioremap(start, len);
+ if (addr == NULL) {
+ err = -EINVAL;
+ goto fail;
+ }
+
+ info->mem[m].name = bar_names[i];
+ info->mem[m].addr = start;
+ info->mem[m].internal_addr = addr;
+ info->mem[m].size = len;
+ info->mem[m].memtype = UIO_MEM_PHYS;
+ ++m;
+ } else if (flags & IORESOURCE_IO) {
+ if (p >= MAX_UIO_PORT_REGIONS)
+ continue;
+
+ info->port[p].name = bar_names[i];
+ info->port[p].start = start;
+ info->port[p].size = len;
+ info->port[p].porttype = UIO_PORT_X86;
+ ++p;
+ }
+ }
+
+ return 0;
+fail:
+ for (i = 0; i < m; i++) {
+ iounmap(info->mem[i].internal_addr);
+ info->mem[i].internal_addr = NULL;
+ }
+
+ return err;
+}
+
+static irqreturn_t msix_irqhandler(int irq, void *arg);
+
+/* set the mapping between vector # and existing eventfd. */
+static int set_irq_eventfd(struct uio_pci_generic_dev *gdev, int vec, int fd)
+{
+ struct uio_msix_irq_ctx *ctx;
+ struct eventfd_ctx *trigger;
+ struct pci_dev *pdev = gdev->pdev;
+ int irq, err;
+
+ if (vec >= gdev->msix.num_irqs) {
+ dev_notice(&gdev->pdev->dev, "vec %u >= num_vec %u\n",
+ vec, gdev->msix.num_irqs);
+ return -ERANGE;
+ }
+
+ irq = gdev->msix.table[vec].vector;
+
+ /* Cleanup existing irq mapping */
+ ctx = &gdev->msix.ctx[vec];
+ if (ctx->trigger) {
+ free_irq(irq, ctx->trigger);
+ eventfd_ctx_put(ctx->trigger);
+ ctx->trigger = NULL;
+ }
+
+ /* Passing -1 is used to disable interrupt */
+ if (fd < 0)
+ return 0;
+
+
+ trigger = eventfd_ctx_fdget(fd);
+ if (IS_ERR(trigger)) {
+ err = PTR_ERR(trigger);
+ dev_notice(&gdev->pdev->dev,
+ "eventfd ctx get failed: %d\n", err);
+ return err;
+ }
+
+ err = request_irq(irq, msix_irqhandler, 0, ctx->name, trigger);
+ if (err) {
+ dev_notice(&pdev->dev, "request irq failed: %d\n", err);
+ eventfd_ctx_put(trigger);
+ return err;
+ }
+
+ dev_dbg(&pdev->dev, "map vector %u to fd %d trigger %p\n",
+ vec, fd, trigger);
+ ctx->trigger = trigger;
+
+ return 0;
+}
+
+static int uio_pci_generic_ioctl(struct uio_info *info, unsigned int cmd,
+ unsigned long arg)
+{
+ struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
+ struct uio_pci_generic_irq_set hdr;
+ int err;
+
+ switch (cmd) {
+ case UIO_PCI_GENERIC_IRQ_SET:
+ if (copy_from_user(&hdr, (void __user *)arg, sizeof(hdr)))
+ return -EFAULT;
+
+ /* Locking is needed to ensure two things:
+ * 1) Two IRQ_SET ioctl()'s are not running in parallel.
+ * 2) IRQ_SET ioctl() is not running in parallel with remove().
+ */
+ mutex_lock(&gdev->msix_state_lock);
+ if (gdev->int_mode != UIO_INT_MODE_MSIX) {
+ mutex_unlock(&gdev->msix_state_lock);
+ return -EOPNOTSUPP;
+ }
+
+ err = set_irq_eventfd(gdev, hdr.vec, hdr.fd);
+ mutex_unlock(&gdev->msix_state_lock);
+
+ break;
+ case UIO_PCI_GENERIC_IRQ_NUM_GET:
+ if (gdev->int_mode == UIO_INT_MODE_NONE)
+ err = put_user(0, (u32 __user *)arg);
+ else if (gdev->int_mode != UIO_INT_MODE_MSIX)
+ err = put_user(1, (u32 __user *)arg);
+ else
+ err = put_user(gdev->msix.num_irqs,
+ (u32 __user *)arg);
+
+ break;
+ case UIO_PCI_GENERIC_INT_MODE_GET:
+ err = put_user(gdev->int_mode, (u32 __user *)arg);
+
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
+/* INT#X interrupt handler. */
+static irqreturn_t intx_irqhandler(int irq, struct uio_info *info)
{
struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);

@@ -53,8 +237,162 @@ static irqreturn_t irqhandler(int irq, struct uio_info *info)
return IRQ_HANDLED;
}

-static int probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
+/* MSI interrupt handler. */
+static irqreturn_t msi_irqhandler(int irq, struct uio_info *info)
+{
+ /* UIO core will signal the user process. */
+ return IRQ_HANDLED;
+}
+
+/* MSI-X interrupt handler. */
+static irqreturn_t msix_irqhandler(int irq, void *arg)
+{
+ struct eventfd_ctx *trigger = arg;
+
+ pr_devel("irq %u trigger %p\n", irq, trigger);
+
+ eventfd_signal(trigger, 1);
+ return IRQ_HANDLED;
+}
+
+static bool enable_intx(struct uio_pci_generic_dev *gdev)
+{
+ struct pci_dev *pdev = gdev->pdev;
+
+ if (!pdev->irq || !pci_intx_mask_supported(pdev))
+ return false;
+
+ gdev->int_mode = UIO_INT_MODE_INTX;
+ gdev->info.irq = pdev->irq;
+ gdev->info.irq_flags = IRQF_SHARED;
+ gdev->info.handler = intx_irqhandler;
+
+ return true;
+}
+
+static void set_pci_master(struct pci_dev *pdev)
+{
+ pci_set_master(pdev);
+ dev_warn(&pdev->dev, "Enabling PCI bus mastering. Bogus userspace application is able to trash kernel memory using DMA");
+ add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+}
+
+static bool enable_msi(struct uio_pci_generic_dev *gdev)
+{
+ struct pci_dev *pdev = gdev->pdev;
+
+ set_pci_master(pdev);
+
+ if (pci_enable_msi(pdev))
+ return false;
+
+ gdev->int_mode = UIO_INT_MODE_MSI;
+ gdev->info.irq = pdev->irq;
+ gdev->info.irq_flags = 0;
+ gdev->info.handler = msi_irqhandler;
+
+ return true;
+}
+
+static bool enable_msix(struct uio_pci_generic_dev *gdev)
+{
+ struct pci_dev *pdev = gdev->pdev;
+ int i, vectors = pci_msix_vec_count(pdev);
+
+ if (vectors <= 0)
+ return false;
+
+ gdev->msix.table = kcalloc(vectors, sizeof(struct msix_entry),
+ GFP_KERNEL);
+ if (!gdev->msix.table) {
+ dev_err(&pdev->dev, "Failed to allocate memory for MSI-X table");
+ return false;
+ }
+
+ gdev->msix.ctx = kcalloc(vectors, sizeof(struct uio_msix_irq_ctx),
+ GFP_KERNEL);
+ if (!gdev->msix.ctx) {
+ dev_err(&pdev->dev, "Failed to allocate memory for MSI-X contexts");
+ goto err_ctx_alloc;
+ }
+
+ for (i = 0; i < vectors; i++) {
+ gdev->msix.table[i].entry = i;
+ gdev->msix.ctx[i].name = kasprintf(GFP_KERNEL,
+ KBUILD_MODNAME "[%d](%s)",
+ i, pci_name(pdev));
+ if (!gdev->msix.ctx[i].name)
+ goto err_name_alloc;
+ }
+
+ set_pci_master(pdev);
+
+ if (pci_enable_msix(pdev, gdev->msix.table, vectors))
+ goto err_msix_enable;
+
+ gdev->int_mode = UIO_INT_MODE_MSIX;
+ gdev->info.irq = UIO_IRQ_CUSTOM;
+ gdev->msix.num_irqs = vectors;
+
+ return true;
+
+err_msix_enable:
+ pci_clear_master(pdev);
+err_name_alloc:
+ for (i = 0; i < vectors; i++)
+ kfree(gdev->msix.ctx[i].name);
+
+ kfree(gdev->msix.ctx);
+err_ctx_alloc:
+ kfree(gdev->msix.table);
+
+ return false;
+}
+
+/**
+ * Disable interrupts and free related resources.
+ *
+ * @gdev device handle
+ *
+ * This function should be called after the corresponding UIO device has been
+ * unregistered. This will ensure that there are no currently running ioctl()s
+ * and there won't be any new ones until next probe() call.
+ */
+static void disable_intr(struct uio_pci_generic_dev *gdev)
+{
+ struct pci_dev *pdev = gdev->pdev;
+ int i;
+
+ switch (gdev->int_mode) {
+ case UIO_INT_MODE_MSI:
+ pci_disable_msi(pdev);
+ pci_clear_master(pdev);
+
+ break;
+ case UIO_INT_MODE_MSIX:
+ /* No need for locking here since there shouldn't be any
+ * ioctl()s running by now.
+ */
+ for (i = 0; i < gdev->msix.num_irqs; i++) {
+ if (gdev->msix.ctx[i].trigger)
+ set_irq_eventfd(gdev, i, -1);
+
+ kfree(gdev->msix.ctx[i].name);
+ }
+
+ pci_disable_msix(pdev);
+ pci_clear_master(pdev);
+ kfree(gdev->msix.ctx);
+ kfree(gdev->msix.table);
+
+ break;
+ default:
+ break;
+ }
+}
+
+
+static int probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct uio_pci_generic_dev *gdev;
int err;
@@ -66,42 +404,64 @@ static int probe(struct pci_dev *pdev,
return err;
}

- if (!pdev->irq) {
- dev_warn(&pdev->dev, "No IRQ assigned to device: "
- "no support for interrupts?\n");
- pci_disable_device(pdev);
- return -ENODEV;
- }
-
- if (!pci_intx_mask_supported(pdev)) {
- err = -ENODEV;
- goto err_verify;
- }
-
gdev = kzalloc(sizeof(struct uio_pci_generic_dev), GFP_KERNEL);
if (!gdev) {
err = -ENOMEM;
goto err_alloc;
}

+ gdev->pdev = pdev;
gdev->info.name = "uio_pci_generic";
gdev->info.version = DRIVER_VERSION;
- gdev->info.irq = pdev->irq;
- gdev->info.irq_flags = IRQF_SHARED;
- gdev->info.handler = irqhandler;
- gdev->pdev = pdev;
+ gdev->info.ioctl = uio_pci_generic_ioctl;
+ mutex_init(&gdev->msix_state_lock);
+
+ err = pci_request_regions(pdev, "uio_pci_generic");
+ if (err != 0) {
+ dev_err(&pdev->dev, "Cannot request regions\n");
+ goto err_request_regions;
+ }
+
+ /* Enable the corresponding interrupt mode. Try to enable INT#X first
+ * for backward compatibility.
+ */
+ if (enable_intx(gdev))
+ dev_info(&pdev->dev, "Using INT#x mode: IRQ %ld",
+ gdev->info.irq);
+ else if (enable_msix(gdev))
+ dev_info(&pdev->dev, "Using MSI-X mode: number of IRQs %d",
+ gdev->msix.num_irqs);
+ else if (enable_msi(gdev))
+ dev_info(&pdev->dev, "Using MSI mode: IRQ %ld", gdev->info.irq);
+ else {
+ err = -ENODEV;
+ goto err_verify;
+ }
+
+ /* remap resources */
+ err = setup_maps(pdev, &gdev->info);
+ if (err)
+ goto err_maps;

err = uio_register_device(&pdev->dev, &gdev->info);
if (err)
goto err_register;
+
pci_set_drvdata(pdev, gdev);

return 0;
+
err_register:
+ release_iomaps(gdev);
+err_maps:
+ disable_intr(gdev);
+err_verify:
+ pci_release_regions(pdev);
+err_request_regions:
kfree(gdev);
err_alloc:
-err_verify:
pci_disable_device(pdev);
+
return err;
}

@@ -110,8 +470,12 @@ static void remove(struct pci_dev *pdev)
struct uio_pci_generic_dev *gdev = pci_get_drvdata(pdev);

uio_unregister_device(&gdev->info);
- pci_disable_device(pdev);
+ disable_intr(gdev);
+ release_iomaps(gdev);
+ pci_release_regions(pdev);
kfree(gdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
}

static struct pci_driver uio_pci_driver = {
diff --git a/include/linux/uio_pci_generic.h b/include/linux/uio_pci_generic.h
new file mode 100644
index 0000000..10716fc
--- /dev/null
+++ b/include/linux/uio_pci_generic.h
@@ -0,0 +1,36 @@
+/*
+ * include/linux/uio_pci_generic.h
+ *
+ * Userspace generic PCI IO driver.
+ *
+ * Licensed under the GPLv2 only.
+ */
+
+#ifndef _UIO_PCI_GENERIC_H_
+#define _UIO_PCI_GENERIC_H_
+
+#include <linux/ioctl.h>
+
+enum uio_int_mode {
+ UIO_INT_MODE_NONE,
+ UIO_INT_MODE_INTX,
+ UIO_INT_MODE_MSI,
+ UIO_INT_MODE_MSIX
+};
+
+/* bind the requested IRQ to the given eventfd */
+struct uio_pci_generic_irq_set {
+ int vec; /* index of the IRQ to connect to starting from 0 */
+ int fd;
+};
+
+#define UIO_PCI_GENERIC_BASE 0x86
+
+#define UIO_PCI_GENERIC_IRQ_SET _IOW('I', UIO_PCI_GENERIC_BASE + 1, \
+ struct uio_pci_generic_irq_set)
+#define UIO_PCI_GENERIC_IRQ_NUM_GET _IOW('I', UIO_PCI_GENERIC_BASE + 2, \
+ uint32_t)
+#define UIO_PCI_GENERIC_INT_MODE_GET _IOW('I', UIO_PCI_GENERIC_BASE + 3, \
+ uint32_t)
+
+#endif /* _UIO_PCI_GENERIC_H_ */
--
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/