[PATCH 1/4] vdpa/octeon_ep: enable support for multiple interrupts per device
From: Shijith Thotton
Date: Wed Nov 20 2024 - 02:07:12 EST
Updated the driver to utilize all the MSI-X interrupt vectors supported
by each OCTEON endpoint VF, instead of relying on a single vector.
Enabling more interrupts allows packets from multiple rings to be
distributed across multiple cores, improving parallelism and
performance.
Signed-off-by: Shijith Thotton <sthotton@xxxxxxxxxxx>
---
drivers/vdpa/octeon_ep/octep_vdpa.h | 10 +--
drivers/vdpa/octeon_ep/octep_vdpa_hw.c | 2 -
drivers/vdpa/octeon_ep/octep_vdpa_main.c | 88 ++++++++++++++++--------
3 files changed, 63 insertions(+), 37 deletions(-)
diff --git a/drivers/vdpa/octeon_ep/octep_vdpa.h b/drivers/vdpa/octeon_ep/octep_vdpa.h
index 046710ec4d42..2d4bb07f91b3 100644
--- a/drivers/vdpa/octeon_ep/octep_vdpa.h
+++ b/drivers/vdpa/octeon_ep/octep_vdpa.h
@@ -29,12 +29,12 @@
#define OCTEP_EPF_RINFO(x) (0x000209f0 | ((x) << 25))
#define OCTEP_VF_MBOX_DATA(x) (0x00010210 | ((x) << 17))
#define OCTEP_PF_MBOX_DATA(x) (0x00022000 | ((x) << 4))
-
-#define OCTEP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF)
-#define OCTEP_EPF_RINFO_NVFS(val) (((val) >> 48) & 0x7F)
+#define OCTEP_VF_IN_CTRL(x) (0x00010000 | ((x) << 17))
+#define OCTEP_VF_IN_CTRL_RPVF(val) (((val) >> 48) & 0xF)
#define OCTEP_FW_READY_SIGNATURE0 0xFEEDFEED
#define OCTEP_FW_READY_SIGNATURE1 0x3355ffaa
+#define OCTEP_MAX_CB_INTR 8
enum octep_vdpa_dev_status {
OCTEP_VDPA_DEV_STATUS_INVALID,
@@ -50,7 +50,6 @@ struct octep_vring_info {
void __iomem *notify_addr;
u32 __iomem *cb_notify_addr;
phys_addr_t notify_pa;
- char msix_name[256];
};
struct octep_hw {
@@ -68,7 +67,8 @@ struct octep_hw {
u64 features;
u16 nr_vring;
u32 config_size;
- int irq;
+ int nb_irqs;
+ int *irqs;
};
u8 octep_hw_get_status(struct octep_hw *oct_hw);
diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c
index 1d4767b33315..d5a599f87e18 100644
--- a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c
+++ b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c
@@ -495,8 +495,6 @@ int octep_hw_caps_read(struct octep_hw *oct_hw, struct pci_dev *pdev)
if (!oct_hw->vqs)
return -ENOMEM;
- oct_hw->irq = -1;
-
dev_info(&pdev->dev, "Device features : %llx\n", oct_hw->features);
dev_info(&pdev->dev, "Maximum queues : %u\n", oct_hw->nr_vring);
diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_main.c b/drivers/vdpa/octeon_ep/octep_vdpa_main.c
index cd55b1aac151..482c178a5221 100644
--- a/drivers/vdpa/octeon_ep/octep_vdpa_main.c
+++ b/drivers/vdpa/octeon_ep/octep_vdpa_main.c
@@ -47,13 +47,30 @@ static struct octep_hw *vdpa_to_octep_hw(struct vdpa_device *vdpa_dev)
static irqreturn_t octep_vdpa_intr_handler(int irq, void *data)
{
struct octep_hw *oct_hw = data;
- int i;
+ int i, ring_start, ring_stride;
+
+ /* Each device has multiple interrupts (nb_irqs) shared among receive
+ * rings (nr_vring). Device interrupts are mapped to specific receive
+ * rings in a round-robin fashion. Only rings handling receive
+ * operations require interrupts, and these are at even indices.
+ *
+ * For example, if nb_irqs = 8 and nr_vring = 64:
+ * 0 -> 0, 16, 32, 48;
+ * 1 -> 2, 18, 34, 50;
+ * ...
+ * 7 -> 14, 30, 46, 62;
+ */
+ ring_start = (irq - oct_hw->irqs[0]) * 2;
+ ring_stride = oct_hw->nb_irqs * 2;
- for (i = 0; i < oct_hw->nr_vring; i++) {
- if (oct_hw->vqs[i].cb.callback && ioread32(oct_hw->vqs[i].cb_notify_addr)) {
- /* Acknowledge the per queue notification to the device */
+ for (i = ring_start; i < oct_hw->nr_vring; i += ring_stride) {
+ if (ioread32(oct_hw->vqs[i].cb_notify_addr)) {
+ /* Acknowledge the per ring notification to the device */
iowrite32(0, oct_hw->vqs[i].cb_notify_addr);
- oct_hw->vqs[i].cb.callback(oct_hw->vqs[i].cb.private);
+
+ if (likely(oct_hw->vqs[i].cb.callback))
+ oct_hw->vqs[i].cb.callback(oct_hw->vqs[i].cb.private);
+ break;
}
}
@@ -63,44 +80,53 @@ static irqreturn_t octep_vdpa_intr_handler(int irq, void *data)
static void octep_free_irqs(struct octep_hw *oct_hw)
{
struct pci_dev *pdev = oct_hw->pdev;
+ int irq;
+
+ for (irq = 0; irq < oct_hw->nb_irqs && oct_hw->irqs; irq++) {
+ if (oct_hw->irqs[irq] < 0)
+ continue;
- if (oct_hw->irq != -1) {
- devm_free_irq(&pdev->dev, oct_hw->irq, oct_hw);
- oct_hw->irq = -1;
+ devm_free_irq(&pdev->dev, oct_hw->irqs[irq], oct_hw);
}
+
pci_free_irq_vectors(pdev);
+ kfree(oct_hw->irqs);
}
static int octep_request_irqs(struct octep_hw *oct_hw)
{
struct pci_dev *pdev = oct_hw->pdev;
- int ret, irq;
+ int ret, irq, idx;
- /* Currently HW device provisions one IRQ per VF, hence
- * allocate one IRQ for all virtqueues call interface.
- */
- ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX);
+ ret = pci_alloc_irq_vectors(pdev, 1, oct_hw->nb_irqs, PCI_IRQ_MSIX);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to alloc msix vector");
return ret;
}
- snprintf(oct_hw->vqs->msix_name, sizeof(oct_hw->vqs->msix_name),
- OCTEP_VDPA_DRIVER_NAME "-vf-%d", pci_iov_vf_id(pdev));
+ oct_hw->irqs = kcalloc(oct_hw->nb_irqs, sizeof(int), GFP_KERNEL);
+ if (!oct_hw->irqs) {
+ ret = -ENOMEM;
+ goto free_irqs;
+ }
- irq = pci_irq_vector(pdev, 0);
- ret = devm_request_irq(&pdev->dev, irq, octep_vdpa_intr_handler, 0,
- oct_hw->vqs->msix_name, oct_hw);
- if (ret) {
- dev_err(&pdev->dev, "Failed to register interrupt handler\n");
- goto free_irq_vec;
+ memset(oct_hw->irqs, -1, sizeof(oct_hw->irqs));
+
+ for (idx = 0; idx < oct_hw->nb_irqs; idx++) {
+ irq = pci_irq_vector(pdev, idx);
+ ret = devm_request_irq(&pdev->dev, irq, octep_vdpa_intr_handler, 0,
+ dev_name(&pdev->dev), oct_hw);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register interrupt handler\n");
+ goto free_irqs;
+ }
+ oct_hw->irqs[idx] = irq;
}
- oct_hw->irq = irq;
return 0;
-free_irq_vec:
- pci_free_irq_vectors(pdev);
+free_irqs:
+ octep_free_irqs(oct_hw);
return ret;
}
@@ -559,6 +585,7 @@ static void octep_vdpa_setup_task(struct work_struct *work)
struct device *dev = &pdev->dev;
struct octep_hw *oct_hw;
unsigned long timeout;
+ u64 val;
int ret;
oct_hw = &mgmt_dev->oct_hw;
@@ -590,6 +617,13 @@ static void octep_vdpa_setup_task(struct work_struct *work)
if (ret)
return;
+ val = readq(oct_hw->base[OCTEP_HW_MBOX_BAR] + OCTEP_VF_IN_CTRL(0));
+ oct_hw->nb_irqs = OCTEP_VF_IN_CTRL_RPVF(val);
+ if (!oct_hw->nb_irqs || oct_hw->nb_irqs > OCTEP_MAX_CB_INTR) {
+ dev_err(dev, "Invalid number of interrupts %d\n", oct_hw->nb_irqs);
+ goto unmap_region;
+ }
+
ret = octep_hw_caps_read(oct_hw, pdev);
if (ret < 0)
goto unmap_region;
@@ -768,12 +802,6 @@ static int octep_vdpa_pf_setup(struct octep_pf *octpf)
return -EINVAL;
}
- if (OCTEP_EPF_RINFO_RPVF(val) != BIT_ULL(0)) {
- val &= ~GENMASK_ULL(35, 32);
- val |= BIT_ULL(32);
- writeq(val, addr + OCTEP_EPF_RINFO(0));
- }
-
len = pci_resource_len(pdev, OCTEP_HW_CAPS_BAR);
octpf->vf_stride = len / totalvfs;
--
2.25.1