Re: Machine crashes right *after* ~successful resume

From: Yinghai Lu
Date: Wed Oct 15 2014 - 14:39:22 EST


On Wed, Oct 15, 2014 at 6:58 AM, Bjorn Helgaas <bhelgaas@xxxxxxxxxx> wrote:
> [+cc Yinghai, author of 928bea964827 ("PCI: Delay enabling bridges
> until they're needed")]
>
> On Wed, Oct 15, 2014 at 5:16 AM, Wilmer van der Gaast <wilmer@xxxxxxxxx>
>> Not sure why 2e8b... was initially found guilty by git bisect, I fear
>> that my testing was not thorough enough. I've verified a couple of times
>> now that 928bea96... does cause crashes and the previous revision does not.

so third resume will not work? that is strange.
second and third should not use same code path...

>>
>> 928bea... seems to reshuffle PCI initialisation a little bit and has
>> caused more troubles, judging from a Google query for it. Some changes
>> were made already as a result, and this unfortunately makes a revert on
>> a later kernel tree (to see if that fixes the problem for me) much less
>> straight-forward. :-(
>
> More details (from initial post) here: http://roy.gaast.net/~wilmer/.lkml/

Please check if attached reverting patch would work on 3.17.

Yinghai
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 17a26c1..306ca53 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -538,6 +538,12 @@ void pci_common_init_dev(struct device *parent, struct hw_pci *hw)
* Assign resources.
*/
pci_bus_assign_resources(bus);
+
+
+ /*
+ * Enable bridges
+ */
+ pci_enable_bridges(bus);
}

/*
diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c
index df96792..b33f97a 100644
--- a/arch/m68k/coldfire/pci.c
+++ b/arch/m68k/coldfire/pci.c
@@ -319,6 +319,7 @@ static int __init mcf_pci_init(void)
pci_fixup_irqs(pci_common_swizzle, mcf_pci_map_irq);
pci_bus_size_bridges(rootbus);
pci_bus_assign_resources(rootbus);
+ pci_enable_bridges(rootbus);
return 0;
}

diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 1bf60b1..4f2e17d 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -113,6 +113,7 @@ static void pcibios_scanbus(struct pci_controller *hose)
if (!pci_has_flag(PCI_PROBE_ONLY)) {
pci_bus_size_bridges(bus);
pci_bus_assign_resources(bus);
+ pci_enable_bridges(bus);
}
}
}
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 1bc09ee..5272327 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -69,6 +69,7 @@ static void pcibios_scanbus(struct pci_channel *hose)

pci_bus_size_bridges(bus);
pci_bus_assign_resources(bus);
+ pci_enable_bridges(bus);
} else {
pci_free_resource_list(&resources);
}
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index cd4de7e..c15bc3c 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -614,6 +614,9 @@ static int acpi_pci_root_add(struct acpi_device *device,
if (system_state != SYSTEM_BOOTING) {
pcibios_resource_survey_bus(root->bus);
pci_assign_unassigned_root_bus_resources(root->bus);
+
+ /* need to after hot-added ioapic is registered */
+ pci_enable_bridges(root->bus);
}

pci_lock_rescan_remove();
diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
index 37e71ff..19f6f70 100644
--- a/drivers/parisc/lba_pci.c
+++ b/drivers/parisc/lba_pci.c
@@ -1590,6 +1590,7 @@ lba_driver_probe(struct parisc_device *dev)
lba_dump_res(&lba_dev->hba.lmmio_space, 2);
#endif
}
+ pci_enable_bridges(lba_bus);

/*
** Once PCI register ops has walked the bus, access to config
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 73aef51..761601e 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -283,6 +283,26 @@ void pci_bus_add_devices(const struct pci_bus *bus)
}
EXPORT_SYMBOL(pci_bus_add_devices);

+void pci_enable_bridges(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+ int retval;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ if (dev->subordinate) {
+ if (!pci_is_enabled(dev)) {
+ retval = pci_enable_device(dev);
+ if (retval)
+ dev_err(&dev->dev, "Error enabling bridge (%d), continuing\n", retval);
+ pci_set_master(dev);
+ }
+ pci_enable_bridges(dev->subordinate);
+ }
+ }
+}
+EXPORT_SYMBOL(pci_enable_bridges);
+
+
/** pci_walk_bus - walk devices on/under bus, calling callback.
* @top bus whose devices should be walked
* @cb callback to be called for each device found
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index bcb90e4..8fadc84 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -511,6 +511,7 @@ static void enable_slot(struct acpiphp_slot *slot)
acpiphp_sanitize_bus(bus);
pcie_bus_configure_settings(bus);
acpiphp_set_acpi_region(slot);
+ pci_enable_bridges(bus);

list_for_each_entry(dev, &bus->devices, bus_list) {
/* Assume that newly added devices are powered on already. */
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 625a4ac..4121518 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1242,31 +1242,8 @@ int pci_reenable_device(struct pci_dev *dev)
}
EXPORT_SYMBOL(pci_reenable_device);

-static void pci_enable_bridge(struct pci_dev *dev)
-{
- struct pci_dev *bridge;
- int retval;
-
- bridge = pci_upstream_bridge(dev);
- if (bridge)
- pci_enable_bridge(bridge);
-
- if (pci_is_enabled(dev)) {
- if (!dev->is_busmaster)
- pci_set_master(dev);
- return;
- }
-
- retval = pci_enable_device(dev);
- if (retval)
- dev_err(&dev->dev, "Error enabling bridge (%d), continuing\n",
- retval);
- pci_set_master(dev);
-}
-
static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
{
- struct pci_dev *bridge;
int err;
int i, bars = 0;

@@ -1285,10 +1262,6 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
if (atomic_inc_return(&dev->enable_cnt) > 1)
return 0; /* already enabled */

- bridge = pci_upstream_bridge(dev);
- if (bridge)
- pci_enable_bridge(bridge);
-
/* only skip sriov related */
for (i = 0; i <= PCI_ROM_RESOURCE; i++)
if (dev->resource[i].flags & flags)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 5ed9930..df17ba8 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2177,6 +2177,7 @@ unsigned int pci_rescan_bus(struct pci_bus *bus)

max = pci_scan_child_bus(bus);
pci_assign_unassigned_bus_resources(bus);
+ pci_enable_bridges(bus);
pci_bus_add_devices(bus);

return max;
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 0482235..2cfb1eb 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1587,7 +1587,7 @@ again:

/* any device complain? */
if (list_empty(&fail_head))
- goto dump;
+ goto enable_and_dump;

if (tried_times >= pci_try_num) {
if (enable_local == undefined)
@@ -1596,7 +1596,7 @@ again:
dev_info(&bus->dev, "Automatically enabled pci realloc, if you have problem, try booting with pci=realloc=off\n");

free_list(&fail_head);
- goto dump;
+ goto enable_and_dump;
}

dev_printk(KERN_DEBUG, &bus->dev,
@@ -1629,7 +1629,10 @@ again:

goto again;

-dump:
+enable_and_dump:
+ /* Depth last, update the hardware. */
+ pci_enable_bridges(bus);
+
/* dump the resource on buses */
pci_bus_dump_resources(bus);
}
@@ -1700,6 +1703,7 @@ enable_all:
if (retval)
dev_err(&bridge->dev, "Error reenabling bridge (%d)\n", retval);
pci_set_master(bridge);
+ pci_enable_bridges(parent);
}
EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources);

diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c
index 4fe4cc4..9cbe4cf 100644
--- a/drivers/pcmcia/cardbus.c
+++ b/drivers/pcmcia/cardbus.c
@@ -92,6 +92,7 @@ int __ref cb_alloc(struct pcmcia_socket *s)
if (s->tune_bridge)
s->tune_bridge(s, bus);

+ pci_enable_bridges(bus);
pci_bus_add_devices(bus);

pci_unlock_rescan_remove();
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5be8db4..1f85fb5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1105,7 +1105,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
resource_size_t,
resource_size_t),
void *alignf_data);
-
+void pci_enable_bridges(struct pci_bus *bus);

int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);