[PATCH v2 02/19] PCI: introduce recursive rwsem to serialize PCI hotplug operations

From: Jiang Liu
Date: Fri Apr 27 2012 - 11:20:18 EST


From: Jiang Liu <jiang.liu@xxxxxxxxxx>

There are multiple ways to trigger PCI hotplug requests concurrently,
such as:
1. Sysfs interfaces exported by the PCI core subsystem
2. Sysfs interfaces exported by the PCI hotplug subsystem
3. PCI hotplug events triggered by PCI Hotplug Controllers
4. ACPI hotplug events for PCI host bridges
5. Driver binding/unbinding events

The PCI core subsystem doesn't support concurrent hotplug operations yet,
so all PCI hotplug requests should be globally serialized. This patch
introduces several new interfaces to serialize PCI hotplug operations.

pci_hotplug_try_enter(): try to acquire write lock
pci_hotplug_enter(): acquire write lock
pci_hotplug_exit(): release write lock
pci_hotplug_disable(): acquire read lock
pci_hotplug_enable(): release read lock

Today we have reproduced the issue on a real platform by using
acpiphp driver. It's an IA64 platform running Suse 11SP1 (official
2.6.32.12 kernel). The test script is:

This issue could be reproduced on an IA64 platform with Suse 11SP1
(official 2.6.32.12 kernel) and acpiphp driver.
---------------------------------------------------------------------
#!/bin/bash

for ((i=0;i<100;i++))
do
echo 1 > /sys/bus/pci/devices/0000\:43\:00.0/remove
echo 0 > /sys/bus/pci/slots/3/power
sleep 1
echo 1 > /sys/bus/pci/slots/3/power
done

And the bug report is:

------------[ cut here ]------------
WARNING: at fs/sysfs/group.c:138 sysfs_remove_group+0x210/0x240()
Hardware name: H8900
sysfs group a0000001012014f0 not found for kobject '0000:45:00.1'
Modules linked in: acpiphp(N) ipv6(N) cpufreq_conservative(N) cpufreq_userspace(
N) cpufreq_powersave(N) acpi_cpufreq(N) binfmt_misc(N) fuse(N) nls_iso8859_1(N)
loop(N) dm_mod(N) tpm_tis(N) tpm(N) ppdev(N) shpchp(N) tpm_bios(N) serio_raw(N)
qla2xxx(N) i2c_i801(N) scsi_transport_fc(N) pci_hotplug(N) scsi_tgt(N) iTCO_wdt(
N) sg(N) iTCO_vendor_support(N) i2c_core(N) mptctl(N) igb(N) parport_pc(N) parpo
rt(N) button(N) container(N) usbhid(N) hid(N) uhci_hcd(N) ehci_hcd(N) usbcore(N)
sd_mod(N) crc_t10dif(N) ext3(N) mbcache(N) jbd(N) fan(N) processor(N) ide_pci_g
eneric(N) ide_core(N) ata_piix(N) libata(N) mptsas(N) mptscsih(N) mptbase(N) scs
i_transport_sas(N) scsi_mod(N) thermal(N) thermal_sys(N) hwmon(N)
Supported: Yes

Call Trace:
[<a000000100017640>] show_stack+0x80/0xa0
sp=e000002f4421fc00 bsp=e000002f44211678
[<a0000001008cfd10>] dump_stack+0x30/0x50
sp=e000002f4421fdd0 bsp=e000002f44211660
[<a0000001000b9bc0>] warn_slowpath_common+0xc0/0x120
sp=e000002f4421fdd0 bsp=e000002f44211628
[<a0000001000b9d10>] warn_slowpath_fmt+0x90/0xc0
sp=e000002f4421fdd0 bsp=e000002f442115c0
[<a000000100331690>] sysfs_remove_group+0x210/0x240
sp=e000002f4421fe10 bsp=e000002f44211590
[<a000000100636190>] dpm_sysfs_remove+0x30/0x60
sp=e000002f4421fe10 bsp=e000002f44211570
[<a0000001006236c0>] device_del+0x80/0x460
sp=e000002f4421fe10 bsp=e000002f44211528
[<a000000100623ae0>] device_unregister+0x40/0x140
sp=e000002f4421fe10 bsp=e000002f44211508
[<a0000001004d2320>] pci_stop_bus_device+0x160/0x200
sp=e000002f4421fe10 bsp=e000002f442114d8
[<a000000223104e70>] acpiphp_disable_slot+0x170/0x580 [acpiphp]
sp=e000002f4421fe10 bsp=e000002f44211470
[<a000000223100b70>] disable_slot+0x50/0x160 [acpiphp]
sp=e000002f4421fe20 bsp=e000002f44211448
[<a00000021e960e60>] power_write_file+0x240/0x340 [pci_hotplug]
sp=e000002f4421fe20 bsp=e000002f44211418
[<a0000001004e5e00>] pci_slot_attr_store+0x60/0xa0
sp=e000002f4421fe20 bsp=e000002f442113d8
[<a00000010032a260>] sysfs_write_file+0x240/0x340
sp=e000002f4421fe20 bsp=e000002f44211380
[<a000000100232910>] vfs_write+0x1b0/0x3c0
sp=e000002f4421fe20 bsp=e000002f44211330
[<a000000100232ce0>] sys_write+0x80/0x100
sp=e000002f4421fe20 bsp=e000002f442112b8
[<a00000010000c9c0>] ia64_ret_from_syscall+0x0/0x20
sp=e000002f4421fe30 bsp=e000002f442112b8
[<a000000000010720>] __kernel_syscall_via_break+0x0/0x20
sp=e000002f44220000 bsp=e000002f442112b8
---[ end trace bd659e9a3f4f6279 ]---
offline_pci.sh[6450]: NaT consumption 17179869216 [1]
Modules linked in: acpiphp(N) ipv6(N) cpufreq_conservative(N) cpufreq_userspace(
N) cpufreq_powersave(N) acpi_cpufreq(N) binfmt_misc(N) fuse(N) nls_iso8859_1(N)
loop(N) dm_mod(N) tpm_tis(N) tpm(N) ppdev(N) shpchp(N) tpm_bios(N) serio_raw(N) qla2xxx(N) i2c_i801(N) scsi_transport_fc(N) pci_hotplug(N) scsi_tgt(N) iTCO_wdt(
N) sg(N) iTCO_vendor_support(N) i2c_core(N) mptctl(N) igb(N) parport_pc(N) parpo rt(N) button(N) container(N) usbhid(N) hid(N) uhci_hcd(N) ehci_hcd(N) usbcore(N)
sd_mod(N) crc_t10dif(N) ext3(N) mbcache(N) jbd(N) fan(N) processor(N) ide_pci_g
eneric(N) ide_core(N) ata_piix(N) libata(N) mptsas(N) mptscsih(N) mptbase(N) scs
i_transport_sas(N) scsi_mod(N) thermal(N) thermal_sys(N) hwmon(N)
Supported: Yes

Pid: 6450, CPU 11, comm: offline_pci.sh
psr : 0000101009526030 ifs : 8000000000000389 ip : [<a0000001008a9870>] Tain
ted: G W N (2.6.32.12-yyz)
ip is at klist_put+0x30/0x160
unat: 0000000000000000 pfs : 0000000000000206 rsc : 0000000000000003
rnat: 8000000000000711 bsps: 0000000000000000 pr : 65519aa656999969
ldrs: 0000000000000000 ccv : 0000000040000000 fpsr: 0009804c0270033f
csd : 0000000000000000 ssd : 0000000000000000
b0 : a0000001008a9a50 b6 : a0000001004b1320 b7 : a00000010000d170
qla2xxx 0000:45:00.1: PCI INT B disabled
f6 : 000000000000000000000 f7 : 1003e9e3779b97f4a7c16
f8 : 1003e0a00000000001072 f9 : 1003effffffffffffffee
f10 : 1003e0000000000000023 f11 : 1003e8208208208208209
r1 : a0000001015c8460 r2 : 0000000000000000 r3 : a0000001013e75b0
r8 : 0000000000000001 r9 : a0000001013e75b0 r10 : a0000001013e8ed8
r11 : 0000000000000000 r12 : e000002f4421fe10 r13 : e000002f44210000
r14 : 0000000000000020 r15 : 0000000000004000 r16 : 0000000000000009
r17 : 0000000000000200 r18 : 0000000040000000 r19 : 0000000040000000
r20 : 0000000040000200 r21 : 0000000040000000 r22 : 000000000001ae13
r23 : 0000000000100000 r24 : a0000001029780f0 r25 : 000000000001ae10
r26 : 000000000001ae10 r27 : 0000000000100000 r28 : 0000000000000034
r29 : 0000000000000034 r30 : a0000001029780f1 r31 : 000000000001ae11

Call Trace:
[<a000000100017640>] show_stack+0x80/0xa0
sp=e000002f4421f850 bsp=e000002f442116f8
[<a000000100017ca0>] show_regs+0x640/0x920
sp=e000002f4421fa20 bsp=e000002f442116a0
[<a000000100028c70>] die+0x190/0x2e0
sp=e000002f4421fa30 bsp=e000002f44211660
[<a000000100028e10>] die_if_kernel+0x50/0x80
sp=e000002f4421fa30 bsp=e000002f44211630
[<a0000001008d8d70>] ia64_fault+0xf0/0x1640
sp=e000002f4421fa30 bsp=e000002f442115d8
[<a00000010000cb60>] ia64_native_leave_kernel+0x0/0x270
sp=e000002f4421fc40 bsp=e000002f442115d8
[<a0000001008a9870>] klist_put+0x30/0x160
sp=e000002f4421fe10 bsp=e000002f44211590
[<a0000001008a9a50>] klist_del+0x30/0x60
sp=e000002f4421fe10 bsp=e000002f44211570
[<a0000001006236e0>] device_del+0xa0/0x460
sp=e000002f4421fe10 bsp=e000002f44211528
[<a000000100623ae0>] device_unregister+0x40/0x140
sp=e000002f4421fe10 bsp=e000002f44211508
[<a0000001004d2320>] pci_stop_bus_device+0x160/0x200
sp=e000002f4421fe10 bsp=e000002f442114d8
[<a000000223104e70>] acpiphp_disable_slot+0x170/0x580 [acpiphp]
sp=e000002f4421fe10 bsp=e000002f44211470
[<a000000223100b70>] disable_slot+0x50/0x160 [acpiphp]
sp=e000002f4421fe20 bsp=e000002f44211448
[<a00000021e960e60>] power_write_file+0x240/0x340 [pci_hotplug]
sp=e000002f4421fe20 bsp=e000002f44211418
[<a0000001004e5e00>] pci_slot_attr_store+0x60/0xa0
sp=e000002f4421fe20 bsp=e000002f442113d8
[<a00000010032a260>] sysfs_write_file+0x240/0x340
sp=e000002f4421fe20 bsp=e000002f44211380
[<a000000100232910>] vfs_write+0x1b0/0x3c0
sp=e000002f4421fe20 bsp=e000002f44211330
[<a000000100232ce0>] sys_write+0x80/0x100
sp=e000002f4421fe20 bsp=e000002f442112b8
[<a00000010000c9c0>] ia64_ret_from_syscall+0x0/0x20
sp=e000002f4421fe30 bsp=e000002f442112b8
[<a000000000010720>] __kernel_syscall_via_break+0x0/0x20
sp=e000002f44220000 bsp=e000002f442112b8
Disabling lock debugging due to kernel taint

Signed-off-by: Jiang Liu <liuj97@xxxxxxxxx>
---
drivers/pci/hotplug.c | 55 ++++++++++++++++++++++++++++++++
drivers/pci/hotplug/pci_hotplug_core.c | 8 ++--
include/linux/pci.h | 14 ++++++++
3 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/hotplug.c b/drivers/pci/hotplug.c
index 2b5352a..975bd3d 100644
--- a/drivers/pci/hotplug.c
+++ b/drivers/pci/hotplug.c
@@ -1,8 +1,63 @@
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/module.h>
+#include <linux/rwsem.h>
#include "pci.h"

+/* Recursive mutex for PCI hotplug operations. */
+static DECLARE_RWSEM(pci_hotplug_rwsem);
+static struct task_struct *pci_hotplug_mutex_owner;
+static int pci_hotplug_mutex_recursive;
+
+/*
+ * trylock for writing -- returns 1 if successful, 0 if contention
+ */
+int pci_hotplug_try_enter(void)
+{
+ if (current != pci_hotplug_mutex_owner) {
+ if (down_write_trylock(&pci_hotplug_rwsem) == 0)
+ return 0;
+ pci_hotplug_mutex_owner = current;
+ }
+ pci_hotplug_mutex_recursive++;
+
+ return 1;
+}
+EXPORT_SYMBOL(pci_hotplug_try_enter);
+
+void pci_hotplug_enter(void)
+{
+ if (current != pci_hotplug_mutex_owner) {
+ down_write(&pci_hotplug_rwsem);
+ pci_hotplug_mutex_owner = current;
+ }
+ pci_hotplug_mutex_recursive++;
+
+}
+EXPORT_SYMBOL(pci_hotplug_enter);
+
+void pci_hotplug_exit(void)
+{
+ BUG_ON(pci_hotplug_mutex_owner != current);
+ if (--pci_hotplug_mutex_recursive == 0) {
+ pci_hotplug_mutex_owner = NULL;
+ up_write(&pci_hotplug_rwsem);
+ }
+}
+EXPORT_SYMBOL(pci_hotplug_exit);
+
+void pci_hotplug_enable(void)
+{
+ up_read(&pci_hotplug_rwsem);
+}
+EXPORT_SYMBOL(pci_hotplug_enable);
+
+void pci_hotplug_disable(void)
+{
+ down_read(&pci_hotplug_rwsem);
+}
+EXPORT_SYMBOL(pci_hotplug_disable);
+
int pci_uevent(struct device *dev, struct kobj_uevent_env *env)
{
struct pci_dev *pdev;
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 202f4a9..1572665 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -537,7 +537,7 @@ int __must_check pci_hp_change_slot_info(struct hotplug_slot *hotplug,
return 0;
}

-static int __init pci_hotplug_init (void)
+static int __init pci_hp_init(void)
{
int result;

@@ -553,13 +553,13 @@ err_cpci:
return result;
}

-static void __exit pci_hotplug_exit (void)
+static void __exit pci_hp_exit(void)
{
cpci_hotplug_exit();
}

-module_init(pci_hotplug_init);
-module_exit(pci_hotplug_exit);
+module_init(pci_hp_init);
+module_exit(pci_hp_exit);

MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0603a60..1c5f153 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -884,6 +884,20 @@ unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge);
unsigned int pci_rescan_bus(struct pci_bus *bus);
#endif

+#ifdef CONFIG_HOTPLUG
+extern int pci_hotplug_try_enter(void);
+extern void pci_hotplug_enter(void);
+extern void pci_hotplug_exit(void);
+extern void pci_hotplug_disable(void);
+extern void pci_hotplug_enable(void);
+#else
+static inline int pci_hotplug_try_enter(void) { return 1; }
+static inline void pci_hotplug_enter(void) {}
+static inline void pci_hotplug_exit(void) {}
+static inline void pci_hotplug_enable(void) {}
+static inline void pci_hotplug_disable(void) {}
+#endif
+
/* Vital product data routines */
ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
--
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/