Re: [PATCH 2/9] virtio_pci: use shared interrupts for virtqueues

From: Jason Wang
Date: Tue Feb 07 2017 - 02:18:19 EST




On 2017å02æ06æ 01:15, Christoph Hellwig wrote:
This lets IRQ layer handle dispatching IRQs to separate handlers for the
case where we don't have per-VQ MSI-X vectors, and allows us to greatly
simplify the code based on the assumption that we always have interrupt
vector 0 (legacy INTx or config interrupt for MSI-X) available, and
any other interrupt is request/freed throught the VQ, even if the
actual interrupt line might be shared in some cases.

This allows removing a great deal of variables keeping track of the
interrupt state in struct virtio_pci_device, as we can now simply walk the
list of VQs and deal with per-VQ interrupt handlers there, and only treat
vector 0 special.

Additionally clean up the VQ allocation code to properly unwind on error
instead of having a single global cleanup label, which is error prone,
and in this case also leads to more code.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>

Reviewed-by: Jason Wang <jasowang@xxxxxxxxxx>

---
drivers/virtio/virtio_pci_common.c | 235 ++++++++++++++++---------------------
drivers/virtio/virtio_pci_common.h | 16 +--
2 files changed, 106 insertions(+), 145 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index a33767318cbf..274dc1ff09c0 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -33,10 +33,8 @@ void vp_synchronize_vectors(struct virtio_device *vdev)
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i;
- if (vp_dev->intx_enabled)
- synchronize_irq(vp_dev->pci_dev->irq);
-
- for (i = 0; i < vp_dev->msix_vectors; ++i)
+ synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0));
+ for (i = 1; i < vp_dev->msix_vectors; i++)
synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
}
@@ -99,77 +97,10 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
return vp_vring_interrupt(irq, opaque);
}
-static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
- bool per_vq_vectors)
-{
- struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- const char *name = dev_name(&vp_dev->vdev.dev);
- unsigned i, v;
- int err = -ENOMEM;
-
- vp_dev->msix_vectors = nvectors;
-
- vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
- GFP_KERNEL);
- if (!vp_dev->msix_names)
- goto error;
- vp_dev->msix_affinity_masks
- = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
- GFP_KERNEL);
- if (!vp_dev->msix_affinity_masks)
- goto error;
- for (i = 0; i < nvectors; ++i)
- if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
- GFP_KERNEL))
- goto error;
-
- err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
- PCI_IRQ_MSIX);
- if (err < 0)
- goto error;
- vp_dev->msix_enabled = 1;
-
- /* Set the vector used for configuration */
- v = vp_dev->msix_used_vectors;
- snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
- "%s-config", name);
- err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
- vp_config_changed, 0, vp_dev->msix_names[v],
- vp_dev);
- if (err)
- goto error;
- ++vp_dev->msix_used_vectors;
-
- v = vp_dev->config_vector(vp_dev, v);
- /* Verify we had enough resources to assign the vector */
- if (v == VIRTIO_MSI_NO_VECTOR) {
- err = -EBUSY;
- goto error;
- }
-
- if (!per_vq_vectors) {
- /* Shared vector for all VQs */
- v = vp_dev->msix_used_vectors;
- snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
- "%s-virtqueues", name);
- err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
- vp_vring_interrupt, 0, vp_dev->msix_names[v],
- vp_dev);
- if (err)
- goto error;
- ++vp_dev->msix_used_vectors;
- }
- return 0;
-error:
- return err;
-}
-
-/* the config->del_vqs() implementation */
-void vp_del_vqs(struct virtio_device *vdev)
+static void vp_remove_vqs(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq, *n;
- int i;
list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
if (vp_dev->msix_vector_map) {
@@ -181,35 +112,33 @@ void vp_del_vqs(struct virtio_device *vdev)
}
vp_dev->del_vq(vq);
}
+}
- if (vp_dev->intx_enabled) {
- free_irq(vp_dev->pci_dev->irq, vp_dev);
- vp_dev->intx_enabled = 0;
- }
+/* the config->del_vqs() implementation */
+void vp_del_vqs(struct virtio_device *vdev)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ int i;
- for (i = 0; i < vp_dev->msix_used_vectors; ++i)
- free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
+ if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs)))
+ return;
- for (i = 0; i < vp_dev->msix_vectors; i++)
- if (vp_dev->msix_affinity_masks[i])
- free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+ vp_remove_vqs(vdev);
if (vp_dev->msix_enabled) {
+ for (i = 0; i < vp_dev->msix_vectors; i++)
+ free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+
/* Disable the vector used for configuration */
vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
- pci_free_irq_vectors(vp_dev->pci_dev);
- vp_dev->msix_enabled = 0;
+ kfree(vp_dev->msix_affinity_masks);
+ kfree(vp_dev->msix_names);
+ kfree(vp_dev->msix_vector_map);
}
- vp_dev->msix_vectors = 0;
- vp_dev->msix_used_vectors = 0;
- kfree(vp_dev->msix_names);
- vp_dev->msix_names = NULL;
- kfree(vp_dev->msix_affinity_masks);
- vp_dev->msix_affinity_masks = NULL;
- kfree(vp_dev->msix_vector_map);
- vp_dev->msix_vector_map = NULL;
+ free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
+ pci_free_irq_vectors(vp_dev->pci_dev);
}
static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
@@ -219,79 +148,122 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
bool per_vq_vectors)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ const char *name = dev_name(&vp_dev->vdev.dev);
+ int i, err = -ENOMEM, allocated_vectors, nvectors;
u16 msix_vec;
- int i, err, nvectors, allocated_vectors;
+
+ nvectors = 1;
+ for (i = 0; i < nvqs; i++)
+ if (callbacks[i])
+ nvectors++;
if (per_vq_vectors) {
- /* Best option: one for change interrupt, one per vq. */
- nvectors = 1;
- for (i = 0; i < nvqs; ++i)
- if (callbacks[i])
- ++nvectors;
+ err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
+ PCI_IRQ_MSIX);
} else {
- /* Second best: one for change, shared for all vqs. */
- nvectors = 2;
+ err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2,
+ PCI_IRQ_MSIX);
}
+ if (err < 0)
+ return err;
- err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
+ vp_dev->msix_vectors = nvectors;
+ vp_dev->msix_names = kmalloc_array(nvectors,
+ sizeof(*vp_dev->msix_names), GFP_KERNEL);
+ if (!vp_dev->msix_names)
+ goto out_free_irq_vectors;
+
+ vp_dev->msix_affinity_masks = kcalloc(nvectors,
+ sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL);
+ if (!vp_dev->msix_affinity_masks)
+ goto out_free_msix_names;
+
+ for (i = 0; i < nvectors; ++i) {
+ if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
+ GFP_KERNEL))
+ goto out_free_msix_affinity_masks;
+ }
+
+ /* Set the vector used for configuration */
+ snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names),
+ "%s-config", name);
+ err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed,
+ 0, vp_dev->msix_names[0], vp_dev);
if (err)
- goto error_find;
+ goto out_free_irq_vectors;
- if (per_vq_vectors) {
- vp_dev->msix_vector_map = kmalloc_array(nvqs,
- sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
- if (!vp_dev->msix_vector_map)
- goto error_find;
+ /* Verify we had enough resources to assign the vector */
+ if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) {
+ err = -EBUSY;
+ goto out_free_config_irq;
}
- allocated_vectors = vp_dev->msix_used_vectors;
+ vp_dev->msix_vector_map = kmalloc_array(nvqs,
+ sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
+ if (!vp_dev->msix_vector_map)
+ goto out_disable_config_irq;
+
+ allocated_vectors = 1; /* vector 0 is the config interrupt */
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
vqs[i] = NULL;
continue;
}
- if (!callbacks[i])
- msix_vec = VIRTIO_MSI_NO_VECTOR;
- else if (per_vq_vectors)
- msix_vec = allocated_vectors++;
+ if (callbacks[i])
+ msix_vec = allocated_vectors;
else
- msix_vec = VP_MSIX_VQ_VECTOR;
+ msix_vec = VIRTIO_MSI_NO_VECTOR;
+
vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
msix_vec);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
- goto error_find;
+ goto out_remove_vqs;
}
- if (!per_vq_vectors)
- continue;
-
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
continue;
}
- /* allocate per-vq irq if available and necessary */
- snprintf(vp_dev->msix_names[msix_vec],
- sizeof *vp_dev->msix_names,
- "%s-%s",
+ snprintf(vp_dev->msix_names[i + 1],
+ sizeof(*vp_dev->msix_names), "%s-%s",
dev_name(&vp_dev->vdev.dev), names[i]);
err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
- vring_interrupt, 0,
- vp_dev->msix_names[msix_vec],
- vqs[i]);
+ vring_interrupt, IRQF_SHARED,
+ vp_dev->msix_names[i + 1], vqs[i]);
if (err) {
/* don't free this irq on error */
vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
- goto error_find;
+ goto out_remove_vqs;
}
vp_dev->msix_vector_map[i] = msix_vec;
+
+ if (per_vq_vectors)
+ allocated_vectors++;
}
+
+ vp_dev->msix_enabled = 1;
return 0;
-error_find:
- vp_del_vqs(vdev);
+out_remove_vqs:
+ vp_remove_vqs(vdev);
+ kfree(vp_dev->msix_vector_map);
+out_disable_config_irq:
+ vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
+out_free_config_irq:
+ free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
+out_free_msix_affinity_masks:
+ for (i = 0; i < nvectors; i++) {
+ if (vp_dev->msix_affinity_masks[i])
+ free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+ }
+ kfree(vp_dev->msix_affinity_masks);
+out_free_msix_names:
+ kfree(vp_dev->msix_names);
+out_free_irq_vectors:
+ pci_free_irq_vectors(vp_dev->pci_dev);
return err;
}
@@ -305,9 +277,8 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
dev_name(&vdev->dev), vp_dev);
if (err)
- goto out_del_vqs;
+ return err;
- vp_dev->intx_enabled = 1;
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
vqs[i] = NULL;
@@ -317,13 +288,15 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
VIRTIO_MSI_NO_VECTOR);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
- goto out_del_vqs;
+ goto out_remove_vqs;
}
}
return 0;
-out_del_vqs:
- vp_del_vqs(vdev);
+
+out_remove_vqs:
+ vp_remove_vqs(vdev);
+ free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
return err;
}
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 2038887bdf23..85593867e712 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -66,16 +66,12 @@ struct virtio_pci_device {
/* MSI-X support */
int msix_enabled;
- int intx_enabled;
cpumask_var_t *msix_affinity_masks;
/* Name strings for interrupts. This size should be enough,
* and I'm too lazy to allocate each name separately. */
char (*msix_names)[256];
- /* Number of available vectors */
- unsigned msix_vectors;
- /* Vectors allocated, excluding per-vq vectors if any */
- unsigned msix_used_vectors;
-
+ /* Total Number of MSI-X vectors (including per-VQ ones). */
+ int msix_vectors;
/* Map of per-VQ MSI-X vectors, may be NULL */
unsigned *msix_vector_map;
@@ -89,14 +85,6 @@ struct virtio_pci_device {
u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
};
-/* Constants for MSI-X */
-/* Use first vector for configuration changes, second and the rest for
- * virtqueues Thus, we need at least 2 vectors for MSI. */
-enum {
- VP_MSIX_CONFIG_VECTOR = 0,
- VP_MSIX_VQ_VECTOR = 1,
-};
-
/* Convert a generic virtio device to our structure */
static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
{