Re: [PATCH v2 05/13] pci: Add ACS validation utility

From: Don Dutile
Date: Thu May 24 2012 - 17:31:09 EST


On 05/22/2012 01:05 AM, Alex Williamson wrote:
In a PCI environment, transactions aren't always required to reach
the root bus before being re-routed. Intermediate switches between
an endpoint and the root bus can redirect DMA back downstream before
things like IOMMUs have a chance to intervene. Legacy PCI is always
susceptible to this as it operates on a shared bus. PCIe added a
new capability to describe and control this behavior, Access Control
Services, or ACS. The utility function pci_acs_enabled() allows us
to test the ACS capabilities of an individual devices against a set
of flags while pci_acs_path_enabled() tests a complete path from
a given downstream device up to the specified upstream device. We
also include the ability to add device specific tests as it's
likely we'll see devices that do no implement ACS, but want to
indicate support for various capabilities in this space.

Signed-off-by: Alex Williamson<alex.williamson@xxxxxxxxxx>
---

drivers/pci/pci.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++
drivers/pci/quirks.c | 29 +++++++++++++++++++
include/linux/pci.h | 10 ++++++-
3 files changed, 114 insertions(+), 1 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 111569c..ab6c2a6 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2359,6 +2359,82 @@ void pci_enable_acs(struct pci_dev *dev)
}

/**
+ * pci_acs_enable - test ACS against required flags for a given device
typo: ^^^ missing 'd'

+ * @pdev: device to test
+ * @acs_flags: required PCI ACS flags
+ *
+ * Return true if the device supports the provided flags. Automatically
+ * filters out flags that are not implemented on multifunction devices.
+ */
+bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags)
+{
+ int pos;
+ u16 ctrl;
+
+ if (pci_dev_specific_acs_enabled(pdev, acs_flags))
+ return true;
+
+ if (!pci_is_pcie(pdev))
+ return false;
+
+ if (pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM ||
+ pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS);
+ if (!pos)
+ return false;
+
+ pci_read_config_word(pdev, pos + PCI_ACS_CTRL,&ctrl);
+ if ((ctrl& acs_flags) != acs_flags)
+ return false;
+ } else if (pdev->multifunction) {
+ /* Filter out flags not applicable to multifunction */
+ acs_flags&= (PCI_ACS_RR | PCI_ACS_CR |
+ PCI_ACS_EC | PCI_ACS_DT);
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS);
+ if (!pos)
+ return false;
+
+ pci_read_config_word(pdev, pos + PCI_ACS_CTRL,&ctrl);
+ if ((ctrl& acs_flags) != acs_flags)
+ return false;
+ }
+
+ return true;
or, to reduce duplicated code (which compiler may do?):

/* Filter out flags not applicable to multifunction */
if (pdev->multifunction)
acs_flags &= (PCI_ACS_RR | PCI_ACS_CR |
PCI_ACS_EC | PCI_ACS_DT);

if (pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM ||
pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
pdev->multifunction) {
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS);
if (!pos)
return false;
pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl);
if ((ctrl & acs_flags) != acs_flags)
return false;
}

return true;
+}

But the above doesn't handle the case where the RC does not do
peer-to-peer btwn root ports. Per ACS spec, such a RC's root ports
don't need to provide an ACS cap, since peer-to-peer port xfers aren't
allowed/enabled/supported, so by design, the root port is ACS compliant.
ATM, an IOMMU-capable system is a pre-req for VFIO,
and all such systems have an ACS cap, but they may not always be true.

+EXPORT_SYMBOL_GPL(pci_acs_enabled);
+
+/**
+ * pci_acs_path_enable - test ACS flags from start to end in a hierarchy
+ * @start: starting downstream device
+ * @end: ending upstream device or NULL to search to the root bus
+ * @acs_flags: required flags
+ *
+ * Walk up a device tree from start to end testing PCI ACS support. If
+ * any step along the way does not support the required flags, return false.
+ */
+bool pci_acs_path_enabled(struct pci_dev *start,
+ struct pci_dev *end, u16 acs_flags)
+{
+ struct pci_dev *pdev, *parent = start;
+
+ do {
+ pdev = parent;
+
+ if (!pci_acs_enabled(pdev, acs_flags))
+ return false;
+
+ if (pci_is_root_bus(pdev->bus))
+ return (end == NULL);
doesn't this mean that a caller can't pass the pdev of the root port?
I would think that is a valid call, albeit not the common one.
Also worried that the above code may be true on Intel machines, but not on AMD
machines (the latter reps its IOMMU as a pdev of root bus, doesn't it?)

+
+ parent = pdev->bus->self;
+ } while (pdev != end);
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(pci_acs_path_enabled);
+
+/**
* pci_swizzle_interrupt_pin - swizzle INTx for device behind bridge
* @dev: the PCI device
* @pin: the INTx pin (1=INTA, 2=INTB, 3=INTD, 4=INTD)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index a2dd77f..4ed6aa6 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3149,3 +3149,32 @@ struct pci_dev *pci_dma_source(struct pci_dev *dev)

return dev;
}
+
+static const struct pci_dev_acs_enabled {
+ u16 vendor;
+ u16 device;
+ bool (*acs_enabled)(struct pci_dev *dev, u16 acs_flags);
+} pci_dev_acs_enabled[] = {
+ { 0 }
+};
+
+bool pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags)
+{
+ const struct pci_dev_acs_enabled *i;
+
+ /*
+ * Allow devices that do not expose standard PCI ACS capabilities
+ * or control to indicate their support here. Multi-function devices
+ * which do not allow internal peer-to-peer between functions, but
+ * do not implement PCI ACS may wish to return true here.
+ */
+ for (i = pci_dev_acs_enabled; i->acs_enabled; i++) {
+ if ((i->vendor == dev->vendor ||
+ i->vendor == (u16)PCI_ANY_ID)&&
+ (i->device == dev->device ||
+ i->device == (u16)PCI_ANY_ID))
+ return i->acs_enabled(dev, acs_flags);
+ }
+
+ return false;
+}
I can't wait until these quirks are filled in! :)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 02dbfed..2559735 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1480,6 +1480,7 @@ enum pci_fixup_pass {
#ifdef CONFIG_PCI_QUIRKS
void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
struct pci_dev *pci_dma_source(struct pci_dev *dev);
+bool pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
#else
static inline void pci_fixup_device(enum pci_fixup_pass pass,
struct pci_dev *dev) {}
@@ -1487,6 +1488,11 @@ static inline struct pci_dev *pci_dma_source(struct pci_dev *dev)
{
return dev;
}
+static inline bool pci_dev_specific_acs_enabled(struct pci_dev *dev,
+ u16 acs_flags)
+{
+ return false;
+}
#endif

void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
@@ -1589,7 +1595,9 @@ static inline bool pci_is_pcie(struct pci_dev *dev)
}

void pci_request_acs(void);
-
+bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags);
+bool pci_acs_path_enabled(struct pci_dev *start,
+ struct pci_dev *end, u16 acs_flags);

#define PCI_VPD_LRDT 0x80 /* Large Resource Data Type */
#define PCI_VPD_LRDT_ID(x) (x | PCI_VPD_LRDT)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/