[RFC 2/3] virtio: add poll_source virtqueue polling
From: Stefan Hajnoczi
Date: Tue Jul 13 2021 - 12:19:31 EST
VIRTIO drivers can cheaply disable interrupts by setting
RING_EVENT_FLAGS_DISABLE in the packed virtqueues's Driver Event
Suppression structure. See "2.7.10 Driver and Device Event Suppression"
in the VIRTIO 1.1 specification for details
(https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.html).
Add a per-virtqueue poll_source that disables events in ->start(),
processes completed virtqueue buffers in ->poll(), and re-enables events
in ->stop().
This optimization avoids interrupt injection during cpuidle polling.
Workloads that submit requests and then halt the vCPU until completion
benefit from this.
To enable this optimization:
1. Enable the cpuidle haltpoll driver:
https://www.kernel.org/doc/html/latest/virt/guest-halt-polling.html
2. Enable poll_source on the virtio device:
# echo -n 1 > /sys/block/vda/device/poll_source
Note that this feature currently as no effect on split virtqueues when
VIRTIO_F_EVENT_IDX is negotiated. It may be possible to tweak
virtqueue_disable_cb_split() but I haven't attempted it here.
This optimization has been benchmarked successfully with virtio-blk
devices. Currently it does not improve virtio-net performance, probably
because it doesn't combine with NAPI polling.
Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxxx>
---
drivers/virtio/virtio_pci_common.h | 7 +++
include/linux/virtio.h | 2 +
include/linux/virtio_config.h | 2 +
drivers/virtio/virtio.c | 34 ++++++++++++
drivers/virtio/virtio_pci_common.c | 86 ++++++++++++++++++++++++++++++
drivers/virtio/virtio_pci_modern.c | 2 +
6 files changed, 133 insertions(+)
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index beec047a8f8d..630746043183 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -21,6 +21,7 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
+#include <linux/poll_source.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
@@ -38,6 +39,9 @@ struct virtio_pci_vq_info {
/* MSI-X vector (or none) */
unsigned msix_vector;
+
+ /* the cpuidle poll_source */
+ struct poll_source poll_source;
};
/* Our device structure */
@@ -102,6 +106,9 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
return container_of(vdev, struct virtio_pci_device, vdev);
}
+/* enable poll_source API for vq polling */
+int vp_enable_poll_source(struct virtio_device *vdev, bool enable);
+
/* wait for pending irq handlers */
void vp_synchronize_vectors(struct virtio_device *vdev);
/* the notify function used when creating a virt queue */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b1894e0323fa..baaa3c8fadb1 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -93,6 +93,7 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
* virtio_device - representation of a device using virtio
* @index: unique position on the virtio bus
* @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
+ * @poll_source_enabled: poll_source API enabled for vq polling
* @config_enabled: configuration change reporting enabled
* @config_change_pending: configuration change reported while disabled
* @config_lock: protects configuration change reporting
@@ -107,6 +108,7 @@ dma_addr_t virtqueue_get_used_addr(struct virtqueue *vq);
struct virtio_device {
int index;
bool failed;
+ bool poll_source_enabled;
bool config_enabled;
bool config_change_pending;
spinlock_t config_lock;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 8519b3ae5d52..5fb78d56fd44 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -72,6 +72,7 @@ struct virtio_shm_region {
* @set_vq_affinity: set the affinity for a virtqueue (optional).
* @get_vq_affinity: get the affinity for a virtqueue (optional).
* @get_shm_region: get a shared memory region based on the index.
+ * @enable_poll_source: enable/disable poll_source API vq polling (optional).
*/
typedef void vq_callback_t(struct virtqueue *);
struct virtio_config_ops {
@@ -97,6 +98,7 @@ struct virtio_config_ops {
int index);
bool (*get_shm_region)(struct virtio_device *vdev,
struct virtio_shm_region *region, u8 id);
+ int (*enable_poll_source)(struct virtio_device *dev, bool enable);
};
/* If driver didn't advertise the feature, it will never appear. */
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 4b15c00c0a0a..22fee71bbe34 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -59,12 +59,44 @@ static ssize_t features_show(struct device *_d,
}
static DEVICE_ATTR_RO(features);
+static ssize_t poll_source_show(struct device *_d,
+ struct device_attribute *attr, char *buf)
+{
+ struct virtio_device *dev = dev_to_virtio(_d);
+ return sprintf(buf, "%d\n", dev->poll_source_enabled);
+}
+
+static ssize_t poll_source_store(struct device *_d,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct virtio_device *dev = dev_to_virtio(_d);
+ bool val;
+ int rc;
+
+ rc = kstrtobool(buf, &val);
+ if (rc)
+ return rc;
+
+ if (val == dev->poll_source_enabled)
+ return count;
+ if (!dev->config->enable_poll_source)
+ return -ENOTSUPP;
+
+ rc = dev->config->enable_poll_source(dev, val);
+ if (rc)
+ return rc;
+ return count;
+}
+static DEVICE_ATTR_RW(poll_source);
+
static struct attribute *virtio_dev_attrs[] = {
&dev_attr_device.attr,
&dev_attr_vendor.attr,
&dev_attr_status.attr,
&dev_attr_modalias.attr,
&dev_attr_features.attr,
+ &dev_attr_poll_source.attr,
NULL,
};
ATTRIBUTE_GROUPS(virtio_dev);
@@ -343,6 +375,8 @@ int register_virtio_device(struct virtio_device *dev)
dev->index = err;
dev_set_name(&dev->dev, "virtio%u", dev->index);
+ dev->poll_source_enabled = false;
+
spin_lock_init(&dev->config_lock);
dev->config_enabled = false;
dev->config_change_pending = false;
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 222d630c41fc..6de372e12afb 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -24,6 +24,83 @@ MODULE_PARM_DESC(force_legacy,
"Force legacy mode for transitional virtio 1 devices");
#endif
+static void vp_poll_source_start(struct poll_source *src)
+{
+ struct virtio_pci_vq_info *info =
+ container_of(src, struct virtio_pci_vq_info, poll_source);
+
+ /* This API does not require a lock */
+ virtqueue_disable_cb(info->vq);
+}
+
+static void vp_poll_source_stop(struct poll_source *src)
+{
+ struct virtio_pci_vq_info *info =
+ container_of(src, struct virtio_pci_vq_info, poll_source);
+
+ /* Poll one last time in case */
+ /* TODO allow driver to provide spinlock */
+ if (!virtqueue_enable_cb(info->vq))
+ vring_interrupt(0 /* ignored */, info->vq);
+}
+
+static void vp_poll_source_poll(struct poll_source *src)
+{
+ struct virtio_pci_vq_info *info =
+ container_of(src, struct virtio_pci_vq_info, poll_source);
+
+ vring_interrupt(0 /* ignored */, info->vq);
+}
+
+static const struct poll_source_ops vp_poll_source_ops = {
+ .start = vp_poll_source_start,
+ .stop = vp_poll_source_stop,
+ .poll = vp_poll_source_poll,
+};
+
+/* call this when irq affinity changes to update cpuidle poll_source */
+/* TODO this function waits for a smp_call_function_single() completion, is that allowed in all caller contexts? */
+/* TODO this function is not thread-safe, do all callers hold the same lock? */
+static int vp_update_poll_source(struct virtio_device *vdev, int index)
+{
+ struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+ struct poll_source *src = &vp_dev->vqs[index]->poll_source;
+ const struct cpumask *mask;
+
+ if (!list_empty(&src->node))
+ poll_source_unregister(src);
+
+ if (!vdev->poll_source_enabled)
+ return 0;
+
+ mask = vp_get_vq_affinity(vdev, index);
+ if (!mask)
+ return -ENOTTY;
+
+ /* Update the poll_source cpu */
+ src->cpu = cpumask_first(mask);
+
+ /* Don't use poll_source if interrupts are handled on multiple CPUs */
+ if (cpumask_next(src->cpu, mask) < nr_cpu_ids)
+ return 0;
+
+ return poll_source_register(src);
+}
+
+/* TODO add this to virtio_pci_legacy config ops? */
+int vp_enable_poll_source(struct virtio_device *vdev, bool enable)
+{
+ struct virtqueue *vq;
+
+ vdev->poll_source_enabled = enable;
+
+ /* TODO locking */
+ list_for_each_entry(vq, &vdev->vqs, list) {
+ vp_update_poll_source(vdev, vq->index); /* TODO handle errors? */
+ }
+ return 0;
+}
+
/* wait for pending irq handlers */
void vp_synchronize_vectors(struct virtio_device *vdev)
{
@@ -186,6 +263,9 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
if (!info)
return ERR_PTR(-ENOMEM);
+ info->poll_source.ops = &vp_poll_source_ops;
+ INIT_LIST_HEAD(&info->poll_source.node);
+
vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
msix_vec);
if (IS_ERR(vq))
@@ -237,6 +317,7 @@ void vp_del_vqs(struct virtio_device *vdev)
int irq = pci_irq_vector(vp_dev->pci_dev, v);
irq_set_affinity_hint(irq, NULL);
+ vp_update_poll_source(vdev, vq->index);
free_irq(irq, vq);
}
}
@@ -342,6 +423,9 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
vqs[i]);
if (err)
goto error_find;
+
+ if (desc)
+ vp_update_poll_source(vdev, i);
}
return 0;
@@ -440,6 +524,8 @@ int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
cpumask_copy(mask, cpu_mask);
irq_set_affinity_hint(irq, mask);
}
+
+ vp_update_poll_source(vdev, vq->index);
}
return 0;
}
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 30654d3a0b41..417d568590f2 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -394,6 +394,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
.set_vq_affinity = vp_set_vq_affinity,
.get_vq_affinity = vp_get_vq_affinity,
.get_shm_region = vp_get_shm_region,
+ .enable_poll_source = vp_enable_poll_source,
};
static const struct virtio_config_ops virtio_pci_config_ops = {
@@ -411,6 +412,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
.set_vq_affinity = vp_set_vq_affinity,
.get_vq_affinity = vp_get_vq_affinity,
.get_shm_region = vp_get_shm_region,
+ .enable_poll_source = vp_enable_poll_source,
};
/* the PCI probing function */
--
2.31.1