[PATCH v13 08/13] vfio/pci: Add framework for custom interrupt indices

From: Eric Auger
Date: Sun Apr 11 2021 - 07:49:04 EST


Implement IRQ capability chain infrastructure. All interrupt
indexes beyond VFIO_PCI_NUM_IRQS are handled as extended
interrupts. They are registered with a specific type/subtype
and supported flags.

Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx>

---

v11 -> v12:
- check !vdev->num_ext_irqs in vfio_pci_set_ext_irq_trigger()
[Shameer, Qubingbing]
---
drivers/vfio/pci/vfio_pci.c | 99 +++++++++++++++++++++++------
drivers/vfio/pci/vfio_pci_intrs.c | 62 ++++++++++++++++++
drivers/vfio/pci/vfio_pci_private.h | 14 ++++
3 files changed, 157 insertions(+), 18 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index eef76560be55..b3fc6ed4ed7a 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -610,6 +610,14 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
ret = iommu_unregister_device_fault_handler(&vdev->pdev->dev);
WARN_ON(ret == -EBUSY);

+ for (i = 0; i < vdev->num_ext_irqs; i++)
+ vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
+ VFIO_IRQ_SET_ACTION_TRIGGER,
+ VFIO_PCI_NUM_IRQS + i, 0, 0, NULL);
+ vdev->num_ext_irqs = 0;
+ kfree(vdev->ext_irqs);
+ vdev->ext_irqs = NULL;
+
/* Device closed, don't need mutex here */
list_for_each_entry_safe(ioeventfd, ioeventfd_tmp,
&vdev->ioeventfds_list, next) {
@@ -825,6 +833,9 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
return 1;
} else if (irq_type == VFIO_PCI_REQ_IRQ_INDEX) {
return 1;
+ } else if (irq_type >= VFIO_PCI_NUM_IRQS &&
+ irq_type < VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs) {
+ return 1;
}

return 0;
@@ -1011,7 +1022,7 @@ static long vfio_pci_ioctl(void *device_data,
info.flags |= VFIO_DEVICE_FLAGS_RESET;

info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
- info.num_irqs = VFIO_PCI_NUM_IRQS;
+ info.num_irqs = VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs;

ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
if (ret && ret != -ENODEV) {
@@ -1187,36 +1198,87 @@ static long vfio_pci_ioctl(void *device_data,

} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
struct vfio_irq_info info;
+ struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+ unsigned long capsz;

minsz = offsetofend(struct vfio_irq_info, count);

+ /* For backward compatibility, cannot require this */
+ capsz = offsetofend(struct vfio_irq_info, cap_offset);
+
if (copy_from_user(&info, (void __user *)arg, minsz))
return -EFAULT;

- if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
+ if (info.argsz < minsz ||
+ info.index >= VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs)
return -EINVAL;

- switch (info.index) {
- case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
- case VFIO_PCI_REQ_IRQ_INDEX:
- break;
- case VFIO_PCI_ERR_IRQ_INDEX:
- if (pci_is_pcie(vdev->pdev))
- break;
- fallthrough;
- default:
- return -EINVAL;
- }
+ if (info.argsz >= capsz)
+ minsz = capsz;

info.flags = VFIO_IRQ_INFO_EVENTFD;

- info.count = vfio_pci_get_irq_count(vdev, info.index);
-
- if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
+ switch (info.index) {
+ case VFIO_PCI_INTX_IRQ_INDEX:
info.flags |= (VFIO_IRQ_INFO_MASKABLE |
VFIO_IRQ_INFO_AUTOMASKED);
- else
+ break;
+ case VFIO_PCI_MSI_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
+ case VFIO_PCI_REQ_IRQ_INDEX:
info.flags |= VFIO_IRQ_INFO_NORESIZE;
+ break;
+ case VFIO_PCI_ERR_IRQ_INDEX:
+ info.flags |= VFIO_IRQ_INFO_NORESIZE;
+ if (!pci_is_pcie(vdev->pdev))
+ return -EINVAL;
+ break;
+ default:
+ {
+ struct vfio_irq_info_cap_type cap_type = {
+ .header.id = VFIO_IRQ_INFO_CAP_TYPE,
+ .header.version = 1 };
+ int ret, i;
+
+ if (info.index >= VFIO_PCI_NUM_IRQS +
+ vdev->num_ext_irqs)
+ return -EINVAL;
+ info.index = array_index_nospec(info.index,
+ VFIO_PCI_NUM_IRQS +
+ vdev->num_ext_irqs);
+ i = info.index - VFIO_PCI_NUM_IRQS;
+
+ info.flags = vdev->ext_irqs[i].flags;
+ cap_type.type = vdev->ext_irqs[i].type;
+ cap_type.subtype = vdev->ext_irqs[i].subtype;
+
+ ret = vfio_info_add_capability(&caps,
+ &cap_type.header,
+ sizeof(cap_type));
+ if (ret)
+ return ret;
+ }
+ }
+
+ info.count = vfio_pci_get_irq_count(vdev, info.index);
+
+ if (caps.size) {
+ info.flags |= VFIO_IRQ_INFO_FLAG_CAPS;
+ if (info.argsz < sizeof(info) + caps.size) {
+ info.argsz = sizeof(info) + caps.size;
+ info.cap_offset = 0;
+ } else {
+ vfio_info_cap_shift(&caps, sizeof(info));
+ if (copy_to_user((void __user *)arg +
+ sizeof(info), caps.buf,
+ caps.size)) {
+ kfree(caps.buf);
+ return -EFAULT;
+ }
+ info.cap_offset = sizeof(info);
+ }
+
+ kfree(caps.buf);
+ }

return copy_to_user((void __user *)arg, &info, minsz) ?
-EFAULT : 0;
@@ -1235,7 +1297,8 @@ static long vfio_pci_ioctl(void *device_data,
max = vfio_pci_get_irq_count(vdev, hdr.index);

ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
- VFIO_PCI_NUM_IRQS, &data_size);
+ VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs,
+ &data_size);
if (ret)
return ret;

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 869dce5f134d..d67995fe872f 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -19,6 +19,7 @@
#include <linux/vfio.h>
#include <linux/wait.h>
#include <linux/slab.h>
+#include <linux/nospec.h>

#include "vfio_pci_private.h"

@@ -635,6 +636,24 @@ static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
count, flags, data);
}

+static int vfio_pci_set_ext_irq_trigger(struct vfio_pci_device *vdev,
+ unsigned int index, unsigned int start,
+ unsigned int count, uint32_t flags,
+ void *data)
+{
+ int i;
+
+ if (start != 0 || count > 1 || !vdev->num_ext_irqs)
+ return -EINVAL;
+
+ index = array_index_nospec(index,
+ VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs);
+ i = index - VFIO_PCI_NUM_IRQS;
+
+ return vfio_pci_set_ctx_trigger_single(&vdev->ext_irqs[i].trigger,
+ count, flags, data);
+}
+
int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
unsigned index, unsigned start, unsigned count,
void *data)
@@ -684,6 +703,13 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
break;
}
break;
+ default:
+ switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+ case VFIO_IRQ_SET_ACTION_TRIGGER:
+ func = vfio_pci_set_ext_irq_trigger;
+ break;
+ }
+ break;
}

if (!func)
@@ -691,3 +717,39 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,

return func(vdev, index, start, count, flags, data);
}
+
+int vfio_pci_get_ext_irq_index(struct vfio_pci_device *vdev,
+ unsigned int type, unsigned int subtype)
+{
+ int i;
+
+ for (i = 0; i < vdev->num_ext_irqs; i++) {
+ if (vdev->ext_irqs[i].type == type &&
+ vdev->ext_irqs[i].subtype == subtype) {
+ return i;
+ }
+ }
+ return -EINVAL;
+}
+
+int vfio_pci_register_irq(struct vfio_pci_device *vdev,
+ unsigned int type, unsigned int subtype,
+ u32 flags)
+{
+ struct vfio_ext_irq *ext_irqs;
+
+ ext_irqs = krealloc(vdev->ext_irqs,
+ (vdev->num_ext_irqs + 1) * sizeof(*ext_irqs),
+ GFP_KERNEL);
+ if (!ext_irqs)
+ return -ENOMEM;
+
+ vdev->ext_irqs = ext_irqs;
+
+ vdev->ext_irqs[vdev->num_ext_irqs].type = type;
+ vdev->ext_irqs[vdev->num_ext_irqs].subtype = subtype;
+ vdev->ext_irqs[vdev->num_ext_irqs].flags = flags;
+ vdev->ext_irqs[vdev->num_ext_irqs].trigger = NULL;
+ vdev->num_ext_irqs++;
+ return 0;
+}
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 76cb3bd11375..e2a999107bcb 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -77,6 +77,13 @@ struct vfio_pci_region {
u32 flags;
};

+struct vfio_ext_irq {
+ u32 type;
+ u32 subtype;
+ u32 flags;
+ struct eventfd_ctx *trigger;
+};
+
struct vfio_pci_dummy_resource {
struct resource resource;
int index;
@@ -111,6 +118,8 @@ struct vfio_pci_device {
struct vfio_pci_irq_ctx *ctx;
int num_ctx;
int irq_type;
+ struct vfio_ext_irq *ext_irqs;
+ int num_ext_irqs;
int num_regions;
struct vfio_pci_region *region;
u8 msi_qmax;
@@ -154,6 +163,11 @@ struct vfio_pci_device {

extern void vfio_pci_intx_mask(struct vfio_pci_device *vdev);
extern void vfio_pci_intx_unmask(struct vfio_pci_device *vdev);
+extern int vfio_pci_register_irq(struct vfio_pci_device *vdev,
+ unsigned int type, unsigned int subtype,
+ u32 flags);
+extern int vfio_pci_get_ext_irq_index(struct vfio_pci_device *vdev,
+ unsigned int type, unsigned int subtype);

extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev,
uint32_t flags, unsigned index,
--
2.26.3