On 2019/8/8 20:07, Ben Luo wrote:ok, keep reverse christmas tree in v2
When userspace (e.g. qemu) triggers a switch between KVMstruct eventfd_ctx *trigger = NULL;
irqfd and userspace eventfd, only dev_id of irq action
(i.e. the "trigger" in this patch's context) will be
changed, but a free-then-request-irq action is taken in
current code. And, irq affinity setting in VM will also
trigger a free-then-request-irq action, which actully
changes nothing, but only fires a producer re-registration
to update irte in case that posted-interrupt is in use.
This patch makes use of update_irq_devid() and optimize
both cases above, which reduces the risk of losing interrupt
and also cuts some overhead.
Signed-off-by: Ben Luo <luoben@xxxxxxxxxxxxxxxxx>
Reviewed-by: Liu Jiang <gerry@xxxxxxxxxxxxxxxxx>
---
drivers/vfio/pci/vfio_pci_intrs.c | 100 +++++++++++++++++++++++---------------
1 file changed, 62 insertions(+), 38 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 3fa3f72..1323dc8 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -285,69 +285,93 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
int vector, int fd, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
- struct eventfd_ctx *trigger;
+ struct eventfd_ctx *trigger = NULL;
struct pci_dev *pdev = vdev->pdev;
to maintain reverse christmas tree?
int irq, ret;It seems vdev->ctx[vector].trigger is freed even if fd < 0 before
if (vector < 0 || vector >= vdev->num_ctx)
return -EINVAL;
+ if (fd >= 0) {
+ trigger = eventfd_ctx_fdget(fd);
+ if (IS_ERR(trigger))
+ return PTR_ERR(trigger);
this patch. If it return here, vdev->ctx[vector].trigger is not free?
I will reformat this chunk in v2
+ }Maybe adjust it a litte to reduce indent and to improve readability?
+
irq = pci_irq_vector(pdev, vector);
if (vdev->ctx[vector].trigger) {
- free_irq(irq, vdev->ctx[vector].trigger);
- irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
- kfree(vdev->ctx[vector].name);
- eventfd_ctx_put(vdev->ctx[vector].trigger);
- vdev->ctx[vector].trigger = NULL;
+ if (vdev->ctx[vector].trigger != trigger) {
+ if (trigger) {
+ ret = update_irq_devid(irq,
+ vdev->ctx[vector].trigger, trigger);
+ if (unlikely(ret)) {
+ dev_info(&pdev->dev,
+ "update_irq_devid %d (token %p) fails: %d\n",
+ irq, vdev->ctx[vector].trigger, ret);
+ eventfd_ctx_put(trigger);
+ return ret;
+ }
+ irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
+ eventfd_ctx_put(vdev->ctx[vector].trigger);
+ vdev->ctx[vector].producer.token = trigger;
+ vdev->ctx[vector].trigger = trigger;
+ } else {
+ free_irq(irq, vdev->ctx[vector].trigger);
+ irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
+ kfree(vdev->ctx[vector].name);
+ eventfd_ctx_put(vdev->ctx[vector].trigger);
+ vdev->ctx[vector].trigger = NULL;
+ }
+ } else
+ irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
}
if (vdev->ctx[vector].trigger && vdev->ctx[vector].trigger == trigger) {
irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
} else if (vdev->ctx[vector].trigger && !trigger) {
free_irq(irq, vdev->ctx[vector].trigger);
irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
kfree(vdev->ctx[vector].name);
eventfd_ctx_put(vdev->ctx[vector].trigger);
vdev->ctx[vector].trigger = NULL;
} else if (vdev->ctx[vector].trigger) {
ret = update_irq_devid(irq, vdev->ctx[vector].trigger, trigger);
if (unlikely(ret)) {
dev_info(&pdev->dev,
"update_irq_devid %d (token %p) fails: %d\n",
irq, vdev->ctx[vector].trigger, ret);
eventfd_ctx_put(trigger);
return ret;
}
irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
eventfd_ctx_put(vdev->ctx[vector].trigger);
vdev->ctx[vector].producer.token = trigger;
vdev->ctx[vector].trigger = trigger;
}
ok, make it this way in v2if (fd < 0)It may be common to use the below check to do NULL checking:
return 0;
- vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
- msix ? "x" : "", vector,
- pci_name(pdev));
- if (!vdev->ctx[vector].name)
- return -ENOMEM;
+ if (vdev->ctx[vector].trigger == NULL) {
If (!vdev->ctx[vector].trigger)
+ vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
+ msix ? "x" : "", vector,
+ pci_name(pdev));
+ if (!vdev->ctx[vector].name) {
+ eventfd_ctx_put(trigger);
+ return -ENOMEM;
+ }
- trigger = eventfd_ctx_fdget(fd);
- if (IS_ERR(trigger)) {
- kfree(vdev->ctx[vector].name);
- return PTR_ERR(trigger);
- }
+ /*
+ * The MSIx vector table resides in device memory which may be cleared
+ * via backdoor resets. We don't allow direct access to the vector
+ * table so even if a userspace driver attempts to save/restore around
+ * such a reset it would be unsuccessful. To avoid this, restore the
+ * cached value of the message prior to enabling.
+ */
+ if (msix) {
+ struct msi_msg msg;
- /*
- * The MSIx vector table resides in device memory which may be cleared
- * via backdoor resets. We don't allow direct access to the vector
- * table so even if a userspace driver attempts to save/restore around
- * such a reset it would be unsuccessful. To avoid this, restore the
- * cached value of the message prior to enabling.
- */
- if (msix) {
- struct msi_msg msg;
+ get_cached_msi_msg(irq, &msg);
+ pci_write_msi_msg(irq, &msg);
+ }
- get_cached_msi_msg(irq, &msg);
- pci_write_msi_msg(irq, &msg);
- }
+ ret = request_irq(irq, vfio_msihandler, 0,
+ vdev->ctx[vector].name, trigger);
+ if (ret) {
+ kfree(vdev->ctx[vector].name);
+ eventfd_ctx_put(trigger);
+ return ret;
+ }
- ret = request_irq(irq, vfio_msihandler, 0,
- vdev->ctx[vector].name, trigger);
- if (ret) {
- kfree(vdev->ctx[vector].name);
- eventfd_ctx_put(trigger);
- return ret;
+ vdev->ctx[vector].producer.token = trigger;
+ vdev->ctx[vector].producer.irq = irq;
+ vdev->ctx[vector].trigger = trigger;
}
- vdev->ctx[vector].producer.token = trigger;
- vdev->ctx[vector].producer.irq = irq;
+ /* always update irte for posted mode */
ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
if (unlikely(ret))
dev_info(&pdev->dev,
"irq bypass producer (token %p) registration fails: %d\n",
vdev->ctx[vector].producer.token, ret);
- vdev->ctx[vector].trigger = trigger;
-
return 0;
}