Re: 3.6-rc7 boot crash + bisection

From: Alex Williamson
Date: Wed Sep 26 2012 - 18:04:05 EST


On Wed, 2012-09-26 at 13:50 -0600, Alex Williamson wrote:
> On Wed, 2012-09-26 at 10:21 -0600, Alex Williamson wrote:
> > On Wed, 2012-09-26 at 17:10 +0200, Roedel, Joerg wrote:
> > > On Wed, Sep 26, 2012 at 08:35:59AM -0600, Alex Williamson wrote:
> > > > Hmm, that throws a kink in iommu groups. So perhaps we need to make an
> > > > alias interface to iommu groups. Seems like this could just be an extra
> > > > parameter to iommu_group_get and iommu_group_add_device (empty in the
> > > > typical case). Then we have the problem of what's the type for an
> > > > alias? For AMI-Vi, it's a u16, but we need to be more generic than
> > > > that. Maybe iommu groups should just treat it as a void* so iommus can
> > > > use a pointer to some structure or a fixed value like a u16 bus:slot.
> > > > Thoughts?
> > >
> > > Good question. The iommu-groups are part of the IOMMU-API, with an
> > > interface to the IOMMU drivers and one to the users of IOMMU-API. So the
> > > alias handling itself should be a function of the interface to the IOMMU
> > > driver. In general the interface should not be bus specific.
> > >
> > > So a void pointer seems the only logical choice then. But I would not
> > > limit its scope to alias handling. How about making it a bus-private
> > > pointer where IOMMU driver store bus-specific information. That way we
> > > make sure that there is one struct per bus-type for this pointer, and
> > > not one structure per IOMMU driver.
> >
> > I thought of another approach that may actually be more 3.6 worthy.
> > What if we just make the iommu driver handle it? For instance,
> > amd_iommu can walk the alias table looking for entries that use the same
> > alias and get the device via pci_get_bus_and_slot. If it finds a device
> > with an iommu group, it attaches the new device to the same group,
> > hiding anything about aliases from the group layer. It just groups all
> > devices within the range. I think the only complication is making sure
> > we're safe around device hotplug while we're doing this. Thanks,
>
> I think this could work. Instead of searching for other devices, check
> for or allocate an iommu group on the alias dev_data, any "virtual"
> aliases use that iommu group. Florian, could you test this as well?

Here's a lockdep clean version of it:

amd_iommu: Handle aliases not backed by devices

Aliases sometimes don't have a struct pci_dev backing them. This breaks
our attempt to figure out the topology and device quirks that may effect
IOMMU grouping. When this happens, allocate an IOMMU group on the
dev_data for the alias and make use of it for all devices referencing
this alias.

Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
---

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b64502d..4eacb17 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -71,6 +71,7 @@ static DEFINE_SPINLOCK(iommu_pd_list_lock);
/* List of all available dev_data structures */
static LIST_HEAD(dev_data_list);
static DEFINE_SPINLOCK(dev_data_list_lock);
+static DEFINE_MUTEX(dev_data_iommu_group_lock);

/*
* Domain for untranslated devices - only allocated
@@ -128,6 +129,9 @@ static void free_dev_data(struct iommu_dev_data *dev_data)
list_del(&dev_data->dev_data_list);
spin_unlock_irqrestore(&dev_data_list_lock, flags);

+ if (dev_data->group)
+ iommu_group_put(dev_data->group);
+
kfree(dev_data);
}

@@ -256,6 +260,34 @@ static bool check_device(struct device *dev)
return true;
}

+/*
+ * Sometimes there's no actual device for an alias. When that happens
+ * we allocate an iommu group on the dev_data and use it for anything
+ * aliasing back to this device. This makes sure that multiple devices
+ * aliased to a non-existent device id all get grouped together. Hold
+ * on to the reference for the group, it can be static rather than get
+ * automatically reclaimed if this device later gets removed.
+ */
+static int dev_data_add_iommu_group(struct iommu_dev_data *dev_data,
+ struct device *dev)
+{
+ mutex_lock(&dev_data_iommu_group_lock);
+
+ if (!dev_data->group) {
+ struct iommu_group *group = iommu_group_alloc();
+ if (IS_ERR(group)) {
+ mutex_unlock(&dev_data_iommu_group_lock);
+ return PTR_ERR(group);
+ }
+
+ dev_data->group = group;
+ }
+
+ mutex_unlock(&dev_data_iommu_group_lock);
+
+ return iommu_group_add_device(dev_data->group, dev);
+}
+
static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
{
pci_dev_put(*from);
@@ -264,38 +296,17 @@ static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)

#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)

-static int iommu_init_device(struct device *dev)
+/*
+ * Given a pci device, look at device quirks and topology between it
+ * and the IOMMU to determine the IOMMU group. Once we've found or
+ * created an IOMMU group, add the provided device to it.
+ */
+static int pdev_add_iommu_group(struct pci_dev *pdev, struct device *dev)
{
- struct pci_dev *dma_pdev, *pdev = to_pci_dev(dev);
- struct iommu_dev_data *dev_data;
+ struct pci_dev *dma_pdev = pdev;
struct iommu_group *group;
- u16 alias;
int ret;

- if (dev->archdata.iommu)
- return 0;
-
- dev_data = find_dev_data(get_device_id(dev));
- if (!dev_data)
- return -ENOMEM;
-
- alias = amd_iommu_alias_table[dev_data->devid];
- if (alias != dev_data->devid) {
- struct iommu_dev_data *alias_data;
-
- alias_data = find_dev_data(alias);
- if (alias_data == NULL) {
- pr_err("AMD-Vi: Warning: Unhandled device %s\n",
- dev_name(dev));
- free_dev_data(dev_data);
- return -ENOTSUPP;
- }
- dev_data->alias_data = alias_data;
-
- dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff);
- } else
- dma_pdev = pci_dev_get(pdev);
-
/* Account for quirked devices */
swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));

@@ -344,8 +355,61 @@ root_bus:

iommu_group_put(group);

- if (ret)
- return ret;
+ return ret;
+}
+
+static int iommu_init_device(struct device *dev)
+{
+ struct pci_dev *dma_pdev, *pdev = to_pci_dev(dev);
+ struct iommu_dev_data *dev_data;
+ u16 alias;
+ int ret;
+
+ if (dev->archdata.iommu)
+ return 0;
+
+ dev_data = find_dev_data(get_device_id(dev));
+ if (!dev_data)
+ return -ENOMEM;
+
+ alias = amd_iommu_alias_table[dev_data->devid];
+ if (alias != dev_data->devid) {
+ struct iommu_dev_data *alias_data;
+
+ alias_data = find_dev_data(alias);
+ if (alias_data == NULL) {
+ pr_err("AMD-Vi: Warning: Unhandled device %s\n",
+ dev_name(dev));
+ free_dev_data(dev_data);
+ return -ENOTSUPP;
+ }
+ dev_data->alias_data = alias_data;
+
+ /*
+ * If the alias device exists, use it as the base dma
+ * device. This results in all devices aliasing to this
+ * one to be in the same iommu group. If it doesn't
+ * actually exist, store the iommu group on the alias
+ * dev_data and use that for all aliases.
+ */
+ dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff);
+ if (!dma_pdev) {
+ ret = dev_data_add_iommu_group(alias_data, dev);
+ if (ret) {
+ free_dev_data(dev_data);
+ return ret;
+ }
+ }
+ } else
+ dma_pdev = pci_dev_get(pdev);
+
+ if (dma_pdev) {
+ ret = pdev_add_iommu_group(dma_pdev, dev);
+ if (ret) {
+ free_dev_data(dev_data);
+ return ret;
+ }
+ }

if (pci_iommuv2_capable(pdev)) {
struct amd_iommu *iommu;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index d0dab86..6597d6a 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -404,6 +404,7 @@ struct iommu_dev_data {
struct list_head dev_data_list; /* For global dev_data_list */
struct iommu_dev_data *alias_data;/* The alias dev_data */
struct protection_domain *domain; /* Domain the device is bound to */
+ struct iommu_group *group; /* IOMMU group for virtual aliases */
atomic_t bind; /* Domain attach reverent count */
u16 devid; /* PCI Device ID */
bool iommu_v2; /* Device can make use of IOMMUv2 */


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/