[RFC 1/5] virtio-pci: flexible configuration layout

From: Sasha Levin
Date: Mon Nov 14 2011 - 18:47:33 EST


From: "Michael S. Tsirkin" <mst@xxxxxxxxxx>

Add a flexible mechanism to specify virtio configuration layout, using
pci vendor-specific capability. A separate capability is used for each
of common, device specific and data-path accesses.

Warning: compiled only.
This patch also needs to be split up, pci_iomap changes
also need arch updates for non-x86.
There might also be more spec changes.

Posting here for early feedback, and to allow Sasha to
proceed with his "kvm tool" work.

Changes from v1:
Updated to match v3 of the spec, see:
Subject: [PATCHv3 RFC] virtio-spec: flexible configuration layout
Message-ID: <20111110122436.GA13144@xxxxxxxxxx>
In-Reply-To: <20111109195901.GA28155@xxxxxxxxxx>

In particular:
- split ISR out
- reorg capability
- add offset alignment

Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx>
---
drivers/virtio/virtio_pci.c | 184 +++++++++++++++++++++++++++++++++++++++---
include/asm-generic/io.h | 4 +
include/asm-generic/iomap.h | 11 +++
include/linux/pci_regs.h | 4 +
include/linux/virtio_pci.h | 41 ++++++++++
lib/iomap.c | 40 ++++++++-
6 files changed, 265 insertions(+), 19 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 3d1bf41..7625434 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -37,8 +37,12 @@ struct virtio_pci_device
struct virtio_device vdev;
struct pci_dev *pci_dev;

- /* the IO mapping for the PCI config space */
+ /* the IO address for the common PCI config space */
void __iomem *ioaddr;
+ /* the IO address for device specific config */
+ void __iomem *ioaddr_device;
+ /* the IO address to use for notifications operations */
+ void __iomem *ioaddr_notify;

/* a list of queues so we can dispatch IRQs */
spinlock_t lock;
@@ -57,8 +61,158 @@ struct virtio_pci_device
unsigned msix_used_vectors;
/* Whether we have vector per vq */
bool per_vq_vectors;
+
+ /* Various IO mappings: used for resource tracking only. */
+
+ /* Legacy BAR0: typically PIO. */
+ void __iomem *legacy_map;
+
+ /* Mappings specified by device capabilities: typically in MMIO */
+ void __iomem *isr_map;
+ void __iomem *notify_map;
+ void __iomem *common_map;
+ void __iomem *device_map;
};

+/*
+ * With PIO, device-specific config moves as MSI-X is enabled/disabled.
+ * Use this accessor to keep pointer to that config in sync.
+ */
+static void virtio_pci_set_msix_enabled(struct virtio_pci_device *vp_dev, int enabled)
+{
+ vp_dev->msix_enabled = enabled;
+ if (vp_dev->device_map)
+ vp_dev->ioaddr_device = vp_dev->device_map;
+ else
+ vp_dev->ioaddr_device = vp_dev->legacy_map +
+ VIRTIO_PCI_CONFIG(vp_dev);
+}
+
+static void __iomem *virtio_pci_map_cfg(struct virtio_pci_device *vp_dev, u8 cap_id,
+ u32 align)
+{
+ u32 size;
+ u32 offset;
+ u8 bir;
+ u8 cap_len;
+ int pos;
+ struct pci_dev *dev = vp_dev->pci_dev;
+ void __iomem *p;
+
+ for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
+ pos > 0;
+ pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
+ u8 id;
+ pci_read_config_byte(dev, pos + PCI_VNDR_CAP_LEN, &cap_len);
+ if (cap_len < VIRTIO_PCI_CAP_ID + 1)
+ continue;
+ pci_read_config_byte(dev, pos + VIRTIO_PCI_CAP_ID, &id);
+ id >>= VIRTIO_PCI_CAP_ID_SHIFT;
+ id &= VIRTIO_PCI_CAP_ID_MASK;
+ if (id == cap_id)
+ break;
+ }
+
+ if (pos <= 0)
+ return NULL;
+
+ if (cap_len < VIRTIO_PCI_CAP_CFG_BIR + 1)
+ goto err;
+ pci_read_config_byte(dev, pos + VIRTIO_PCI_CAP_CFG_BIR, &bir);
+ if (cap_len < VIRTIO_PCI_CAP_CFG_OFF + 4)
+ goto err;
+ pci_read_config_dword(dev, pos + VIRTIO_PCI_CAP_CFG_OFF, &offset);
+ if (cap_len < VIRTIO_PCI_CAP_CFG_SIZE + 4)
+ goto err;
+ pci_read_config_dword(dev, pos + VIRTIO_PCI_CAP_CFG_SIZE, &size);
+ bir >>= VIRTIO_PCI_CAP_CFG_BIR_SHIFT;
+ bir &= VIRTIO_PCI_CAP_CFG_BIR_MASK;
+ size >>= VIRTIO_PCI_CAP_CFG_SIZE_SHIFT;
+ size &= VIRTIO_PCI_CAP_CFG_SIZE_MASK;
+ off >>= VIRTIO_PCI_CAP_CFG_OFF_SHIFT;
+ off &= VIRTIO_PCI_CAP_CFG_OFF_MASK;
+ /* Align offset appropriately */
+ off &= ~(align - 1);
+
+ /* It's possible for a device to supply a huge config space,
+ * but we'll never need to map more than a page ATM. */
+ p = pci_iomap_range(dev, bir, offset, size, PAGE_SIZE);
+ if (!p)
+ dev_err(&vp_dev->vdev.dev, "Unable to map virtio pci memory");
+ return p;
+err:
+ dev_err(&vp_dev->vdev.dev, "Unable to parse virtio pci capability");
+ return NULL;
+}
+
+static void virtio_pci_iounmap(struct virtio_pci_device *vp_dev)
+{
+ if (vp_dev->legacy_map)
+ pci_iounmap(vp_dev->pci_dev, vp_dev->legacy_map);
+ if (vp_dev->isr_map)
+ pci_iounmap(vp_dev->pci_dev, vp_dev->isr_map);
+ if (vp_dev->notify_map)
+ pci_iounmap(vp_dev->pci_dev, vp_dev->notify_map);
+ if (vp_dev->common_map)
+ pci_iounmap(vp_dev->pci_dev, vp_dev->common_map);
+ if (vp_dev->device_map)
+ pci_iounmap(vp_dev->pci_dev, vp_dev->device_map);
+}
+
+static int virtio_pci_iomap(struct virtio_pci_device *vp_dev)
+{
+ vp_dev->isr_map = virtio_pci_map_cfg(vp_dev,
+ VIRTIO_PCI_CAP_ISR_CFG,
+ sizeof(u8));
+ vp_dev->notify_map = virtio_pci_map_cfg(vp_dev,
+ VIRTIO_PCI_CAP_NOTIFY_CFG,
+ sizeof(u16));
+ vp_dev->common_map = virtio_pci_map_cfg(vp_dev,
+ VIRTIO_PCI_CAP_COMMON_CFG,
+ sizeof(u32));
+ vp_dev->device_map = virtio_pci_map_cfg(vp_dev,
+ VIRTIO_PCI_CAP_DEVICE_CFG,
+ sizeof(u8));
+
+ if (!vp_dev->notify_map || !vp_dev->common_map ||
+ !vp_dev->device_map) {
+ /*
+ * If not all capabilities present, map legacy PIO.
+ * Legacy access is at BAR 0. We never need to map
+ * more than 256 bytes there, since legacy config space
+ * used PIO which has this size limit.
+ * */
+ vp_dev->legacy_map = pci_iomap(vp_dev->pci_dev, 0, 256);
+ if (!vp_dev->legacy_map) {
+ dev_err(&vp_dev->vdev.dev, "Unable to map legacy PIO");
+ goto err;
+ }
+ }
+
+ /* Prefer MMIO if available. If not, fallback to legacy PIO. */
+ if (vp_dev->common_map)
+ vp_dev->ioaddr = vp_dev->common_map;
+ else
+ vp_dev->ioaddr = vp_dev->legacy_map;
+
+ if (vp_dev->device_map)
+ vp_dev->ioaddr_device = vp_dev->device_map;
+ else
+ vp_dev->ioaddr_device = vp_dev->legacy_map +
+ VIRTIO_PCI_CONFIG(vp_dev);
+
+ if (vp_dev->notify_map)
+ vp_dev->ioaddr_notify = vp_dev->notify_map;
+ else
+ vp_dev->ioaddr_notify = vp_dev->legacy_map +
+ VIRTIO_PCI_QUEUE_NOTIFY;
+
+ return 0;
+err:
+ virtio_pci_iounmap(vp_dev);
+ return -EINVAL;
+}
+
/* Constants for MSI-X */
/* Use first vector for configuration changes, second and the rest for
* virtqueues Thus, we need at least 2 vectors for MSI. */
@@ -130,8 +284,7 @@ static void vp_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- void __iomem *ioaddr = vp_dev->ioaddr +
- VIRTIO_PCI_CONFIG(vp_dev) + offset;
+ void __iomem *ioaddr = vp_dev->ioaddr_device + offset;
u8 *ptr = buf;
int i;

@@ -145,8 +298,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- void __iomem *ioaddr = vp_dev->ioaddr +
- VIRTIO_PCI_CONFIG(vp_dev) + offset;
+ void __iomem *ioaddr = vp_dev->ioaddr_device + offset;
const u8 *ptr = buf;
int i;

@@ -184,7 +336,7 @@ static void vp_notify(struct virtqueue *vq)

/* we write the queue's selector into the notification register to
* signal the other end */
- iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+ iowrite16(info->queue_index, vp_dev->ioaddr_notify);
}

/* Handle a configuration change: Tell driver if it wants to know. */
@@ -231,7 +383,8 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)

/* reading the ISR has the effect of also clearing it so it's very
* important to save off the value. */
- isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+ isr = ioread8(vp_dev->ioaddr_notify +
+ VIRTIO_PCI_ISR - VIRTIO_PCI_QUEUE_NOTIFY);

/* It's definitely not us if the ISR was not high */
if (!isr)
@@ -265,7 +418,7 @@ static void vp_free_vectors(struct virtio_device *vdev)
ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);

pci_disable_msix(vp_dev->pci_dev);
- vp_dev->msix_enabled = 0;
+ virtio_pci_set_msix_enabled(vp_dev, 0);
vp_dev->msix_vectors = 0;
}

@@ -303,7 +456,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
if (err)
goto error;
vp_dev->msix_vectors = nvectors;
- vp_dev->msix_enabled = 1;
+ virtio_pci_set_msix_enabled(vp_dev, 1);

/* Set the vector used for configuration */
v = vp_dev->msix_used_vectors;
@@ -451,7 +604,10 @@ static void vp_del_vq(struct virtqueue *vq)
iowrite16(VIRTIO_MSI_NO_VECTOR,
vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
/* Flush the write out to device */
- ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+ ioread8(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+ /* And clear ISR: TODO: really needed? */
+ ioread8(vp_dev->ioaddr_notify +
+ VIRTIO_PCI_ISR - VIRTIO_PCI_QUEUE_NOTIFY);
}

vring_del_virtqueue(vq);
@@ -642,8 +798,8 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,
if (err)
goto out_enable_device;

- vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
- if (vp_dev->ioaddr == NULL)
+ err = virtio_pci_iomap(vp_dev);
+ if (err)
goto out_req_regions;

pci_set_drvdata(pci_dev, vp_dev);
@@ -665,7 +821,7 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev,

out_set_drvdata:
pci_set_drvdata(pci_dev, NULL);
- pci_iounmap(pci_dev, vp_dev->ioaddr);
+ virtio_pci_iounmap(vp_dev);
out_req_regions:
pci_release_regions(pci_dev);
out_enable_device:
@@ -683,7 +839,7 @@ static void __devexit virtio_pci_remove(struct pci_dev *pci_dev)

vp_del_vqs(&vp_dev->vdev);
pci_set_drvdata(pci_dev, NULL);
- pci_iounmap(pci_dev, vp_dev->ioaddr);
+ virtio_pci_iounmap(vp_dev);
pci_release_regions(pci_dev);
pci_disable_device(pci_dev);
kfree(vp_dev);
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 9120887..3cf1787 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -286,6 +286,10 @@ static inline void writesb(const void __iomem *addr, const void *buf, int len)
/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
struct pci_dev;
extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+ unsigned offset,
+ unsigned long minlen,
+ unsigned long maxlen);
static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
{
}
diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index 98dcd76..6f192d4 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -70,8 +70,19 @@ extern void ioport_unmap(void __iomem *);
/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
struct pci_dev;
extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+ unsigned offset,
+ unsigned long minlen,
+ unsigned long maxlen);
extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
#else
+static inline void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+ unsigned offset,
+ unsigned long minlen,
+ unsigned long maxlen)
+{
+ return NULL;
+}
struct pci_dev;
static inline void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
{
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index b5d9657..3e7905c 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -375,6 +375,10 @@
#define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */
#define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */

+/* Vendor specific capability */
+#define PCI_VNDR_CAP_LEN 2 /* Capability length (8 bits), including
+ bytes: ID, NEXT and LEN itself. */
+
/* PCI Bridge Subsystem ID registers */

#define PCI_SSVID_VENDOR_ID 4 /* PCI-Bridge subsystem vendor id register */
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index ea66f3f..d6568e7 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -92,4 +92,45 @@
/* The alignment to use between consumer and producer parts of vring.
* x86 pagesize again. */
#define VIRTIO_PCI_VRING_ALIGN 4096
+
+/*
+ * Layout for Virtio PCI vendor specific capability (little-endian):
+ * 5 bit virtio capability id.
+ * 3 bit BAR index register, specifying which BAR to use.
+ * 4 byte cfg offset within the BAR.
+ * 4 byte cfg size.
+ */
+
+/* A single virtio device has multiple vendor specific capabilities, we use the
+ * 5 bit ID field to distinguish between these. */
+#define VIRTIO_PCI_CAP_ID 3
+#define VIRTIO_PCI_CAP_ID_MASK 0xff
+#define VIRTIO_PCI_CAP_ID_SHIFT 0
+
+/* IDs for different capabilities. If a specific configuration
+ * is missing, legacy PIO path is used. */
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG 1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG 2
+/* ISR access */
+#define VIRTIO_PCI_CAP_ISR_CFG 3
+/* Device specific confiuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG 4
+
+/* Index of the BAR including this configuration */
+#define VIRTIO_PCI_CAP_CFG_BIR 4
+#define VIRTIO_PCI_CAP_CFG_BIR_MASK (0x7)
+#define VIRTIO_PCI_CAP_CFG_BIR_SHIFT 0
+
+/* Size of the configuration space */
+#define VIRTIO_PCI_CAP_CFG_SIZE 4
+#define VIRTIO_PCI_CAP_CFG_SIZE_MASK 0xffffff
+#define VIRTIO_PCI_CAP_CFG_SIZE_SHIFT 8
+
+/* Offset within the BAR */
+#define VIRTIO_PCI_CAP_CFG_OFF 8
+#define VIRTIO_PCI_CAP_CFG_OFF_MASK 0xffffffff
+#define VIRTIO_PCI_CAP_CFG_OFF_SHIFT 0
+
#endif
diff --git a/lib/iomap.c b/lib/iomap.c
index 5dbcb4b..f28720e 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -243,26 +243,36 @@ EXPORT_SYMBOL(ioport_unmap);

#ifdef CONFIG_PCI
/**
- * pci_iomap - create a virtual mapping cookie for a PCI BAR
+ * pci_iomap_range - create a virtual mapping cookie for a PCI BAR
* @dev: PCI device that owns the BAR
* @bar: BAR number
- * @maxlen: length of the memory to map
+ * @offset: map memory at the given offset in BAR
+ * @minlen: min length of the memory to map
+ * @maxlen: max length of the memory to map
*
* Using this function you will get a __iomem address to your device BAR.
* You can access it using ioread*() and iowrite*(). These functions hide
* the details if this is a MMIO or PIO address space and will just do what
* you expect from them in the correct way.
*
+ * @minlen specifies the minimum length to map. We check that BAR is
+ * large enough.
* @maxlen specifies the maximum length to map. If you want to get access to
- * the complete BAR without checking for its length first, pass %0 here.
+ * the complete BAR from offset to the end, pass %0 here.
* */
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+ unsigned offset,
+ unsigned long minlen,
+ unsigned long maxlen)
{
resource_size_t start = pci_resource_start(dev, bar);
resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);

- if (!len || !start)
+ if (len <= offset || !start)
+ return NULL;
+ len -= offset;
+ if (len < minlen)
return NULL;
if (maxlen && len > maxlen)
len = maxlen;
@@ -277,10 +287,30 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
return NULL;
}

+/**
+ * pci_iomap - create a virtual mapping cookie for a PCI BAR
+ * @dev: PCI device that owns the BAR
+ * @bar: BAR number
+ * @maxlen: length of the memory to map
+ *
+ * Using this function you will get a __iomem address to your device BAR.
+ * You can access it using ioread*() and iowrite*(). These functions hide
+ * the details if this is a MMIO or PIO address space and will just do what
+ * you expect from them in the correct way.
+ *
+ * @maxlen specifies the maximum length to map. If you want to get access to
+ * the complete BAR without checking for its length first, pass %0 here.
+ * */
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+ return pci_iomap_range(dev, bar, 0, 0, maxlen);
+}
+
void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
{
IO_COND(addr, /* nothing */, iounmap(addr));
}
EXPORT_SYMBOL(pci_iomap);
+EXPORT_SYMBOL(pci_iomap_range);
EXPORT_SYMBOL(pci_iounmap);
#endif /* CONFIG_PCI */
--
1.7.8.rc1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/