Re: Machine crashes right *after* ~successful resume

From: Yinghai Lu
Date: Mon Oct 27 2014 - 14:23:54 EST


On Mon, Oct 27, 2014 at 3:50 AM, Wilmer van der Gaast <wilmer@xxxxxxxxx> wrote:

> http://gaast.net/~wilmer/.lkml/bad3.17-pcidumps.txt

[ 252.028142] PCI: 0000:04:00.0
0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
0010: ff ff ff ff ff ff ff ff


04:00.0 PCI bridge: Integrated Technology Express, Inc. Device 8892
(rev 10) (prog-if 01 [Subtractive decode])
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr-
Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
<TAbort- <MAbort- >SERR- <PERR- INTx-
Latency: 0, Cache Line Size: 4 bytes
Bus: primary=04, secondary=05, subordinate=05, sec-latency=32
I/O behind bridge: 0000d000-0000dfff
Memory behind bridge: fbc00000-fbcfffff
Secondary status: 66MHz+ FastB2B- ParErr- DEVSEL=medium >TAbort-
<TAbort- <MAbort+ <SERR- <PERR-
BridgeCtl: Parity- SERR- NoISA- VGA- MAbort- >Reset- FastB2B-
PriDiscTmr- SecDiscTmr+ DiscTmrStat- DiscTmrSERREn-
Capabilities: [90] Power Management version 2
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=55mA PME(D0+,D1+,D2+,D3hot+,D3cold+)
Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [a0] Subsystem: Gigabyte Technology Co., Ltd Device 5000

under

00:1c.3 PCI bridge: Intel Corporation 82801 PCI Bridge (rev b5)
(prog-if 01 [Subtractive decode])

So that ITE will not work after suspend/resume?

Please apply 4 attached patches and try to remove the device like

echo 1 > /sys/bus/pci/devices/0000\:04\:00.0/remove
echo 1 > /sys/bus/pci/devices/0000\:00\:1c.3/pcie_link_disable

before suspend/resume test.

Thanks

Yinghai
Subject: [PATCH] PCI: Add generic pcie_link_disable

Remove not needed return value checking that Linus pointed out before.

Will use it from /sys/.../pcie/link_disable

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
drivers/pci/Makefile | 2 +-
drivers/pci/pcie-link.c | 42 ++++++++++++++++++++++++++++++++++++++++++
include/linux/pci.h | 2 ++
3 files changed, 45 insertions(+), 1 deletion(-)

Index: linux-2.6/drivers/pci/pcie-link.c
===================================================================
--- /dev/null
+++ linux-2.6/drivers/pci/pcie-link.c
@@ -0,0 +1,42 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+
+int pcie_link_disable_get(struct pci_dev *dev)
+{
+ u16 lnk_ctrl;
+ if (!pci_is_pcie(dev))
+ return 0;
+
+ pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &lnk_ctrl);
+
+ return !!(lnk_ctrl & PCI_EXP_LNKCTL_LD);
+}
+
+void pcie_link_disable_set(struct pci_dev *dev, int bit)
+{
+ u16 lnk_ctrl, old_lnk_ctrl;
+
+ if (!pci_is_pcie(dev))
+ return;
+
+ pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &lnk_ctrl);
+ old_lnk_ctrl = lnk_ctrl;
+
+ if (!bit)
+ lnk_ctrl &= ~PCI_EXP_LNKCTL_LD;
+ else
+ lnk_ctrl |= PCI_EXP_LNKCTL_LD;
+
+ if (old_lnk_ctrl == lnk_ctrl)
+ return;
+
+ pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctrl);
+
+ dev_printk(KERN_DEBUG, &dev->dev, "%s: lnk_ctrl = %x\n", __func__,
+ lnk_ctrl);
+}
+EXPORT_SYMBOL(pcie_link_disable_set);
Index: linux-2.6/include/linux/pci.h
===================================================================
--- linux-2.6.orig/include/linux/pci.h
+++ linux-2.6/include/linux/pci.h
@@ -842,6 +842,8 @@ struct pci_bus *pci_scan_root_bus(struct
struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
int busnr);
void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
+void pcie_link_disable_set(struct pci_dev *dev, int bit);
+int pcie_link_disable_get(struct pci_dev *dev);
struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
const char *name,
struct hotplug_slot *hotplug);
Index: linux-2.6/drivers/pci/Makefile
===================================================================
--- linux-2.6.orig/drivers/pci/Makefile
+++ linux-2.6/drivers/pci/Makefile
@@ -4,7 +4,7 @@

obj-y += access.o bus.o probe.o host-bridge.o remove.o pci.o \
pci-driver.o search.o pci-sysfs.o rom.o setup-res.o \
- irq.o vpd.o setup-bus.o vc.o
+ irq.o vpd.o setup-bus.o pcie-link.o vc.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_SYSFS) += slot.o

Subject: [PATCH] PCI, pciehp: Use generic pcie_link_disable

Also remove old version with not needed return check.

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
drivers/pci/hotplug/pciehp_hpc.c | 30 +++---------------------------
1 file changed, 3 insertions(+), 27 deletions(-)

Index: linux-2.6/drivers/pci/hotplug/pciehp_hpc.c
===================================================================
--- linux-2.6.orig/drivers/pci/hotplug/pciehp_hpc.c
+++ linux-2.6/drivers/pci/hotplug/pciehp_hpc.c
@@ -305,28 +305,6 @@ int pciehp_check_link_status(struct cont
return 0;
}

-static int __pciehp_link_set(struct controller *ctrl, bool enable)
-{
- struct pci_dev *pdev = ctrl_dev(ctrl);
- u16 lnk_ctrl;
-
- pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &lnk_ctrl);
-
- if (enable)
- lnk_ctrl &= ~PCI_EXP_LNKCTL_LD;
- else
- lnk_ctrl |= PCI_EXP_LNKCTL_LD;
-
- pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, lnk_ctrl);
- ctrl_dbg(ctrl, "%s: lnk_ctrl = %x\n", __func__, lnk_ctrl);
- return 0;
-}
-
-static int pciehp_link_enable(struct controller *ctrl)
-{
- return __pciehp_link_set(ctrl, true);
-}
-
void pciehp_get_attention_status(struct slot *slot, u8 *status)
{
struct controller *ctrl = slot->ctrl;
@@ -473,7 +451,6 @@ int pciehp_power_on_slot(struct slot * s
struct controller *ctrl = slot->ctrl;
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_status;
- int retval;

/* Clear sticky power-fault bit from previous power failures */
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
@@ -487,11 +464,10 @@ int pciehp_power_on_slot(struct slot * s
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
PCI_EXP_SLTCTL_PWR_ON);

- retval = pciehp_link_enable(ctrl);
- if (retval)
- ctrl_err(ctrl, "%s: Can not enable the link!\n", __func__);
+ /* Enable the link */
+ pcie_link_disable_set(ctrl->pcie->port, 0);

- return retval;
+ return 0;
}

void pciehp_power_off_slot(struct slot * slot)
Subject: [PATCH] PCI, sysfs: Add pcie attrs for pcie device under pci dev dir.

Will put link_disable and link_retrain

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
drivers/pci/Makefile | 2 +-
drivers/pci/pci-sysfs.c | 1 +
drivers/pci/pci.h | 1 +
drivers/pci/pcie-sysfs.c | 23 +++++++++++++++++++++++
4 files changed, 26 insertions(+), 1 deletion(-)

Index: linux-2.6/drivers/pci/pci-sysfs.c
===================================================================
--- linux-2.6.orig/drivers/pci/pci-sysfs.c
+++ linux-2.6/drivers/pci/pci-sysfs.c
@@ -1608,6 +1608,7 @@ static struct attribute_group pci_dev_br
static const struct attribute_group *pci_dev_attr_groups[] = {
&pci_dev_attr_group,
&pci_dev_bridge_attr_group,
+ &pci_dev_pcie_attr_group,
&pci_dev_hp_attr_group,
#ifdef CONFIG_PCI_IOV
&sriov_dev_attr_group,
Index: linux-2.6/drivers/pci/pcie-sysfs.c
===================================================================
--- /dev/null
+++ linux-2.6/drivers/pci/pcie-sysfs.c
@@ -0,0 +1,23 @@
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+static struct attribute *pci_dev_pcie_dev_attrs[] = {
+ NULL,
+};
+
+static umode_t pci_dev_pcie_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (!pci_is_pcie(pdev))
+ return 0;
+
+ return a->mode;
+}
+
+struct attribute_group pci_dev_pcie_attr_group = {
+ .is_visible = pci_dev_pcie_attrs_are_visible,
+ .attrs = pci_dev_pcie_dev_attrs,
+};
Index: linux-2.6/drivers/pci/pci.h
===================================================================
--- linux-2.6.orig/drivers/pci/pci.h
+++ linux-2.6/drivers/pci/pci.h
@@ -152,6 +152,7 @@ static inline int pci_no_d1d2(struct pci
extern const struct attribute_group *pci_dev_groups[];
extern const struct attribute_group *pcibus_groups[];
extern struct device_type pci_dev_type;
+extern struct attribute_group pci_dev_pcie_attr_group;
extern const struct attribute_group *pci_bus_groups[];


Index: linux-2.6/drivers/pci/Makefile
===================================================================
--- linux-2.6.orig/drivers/pci/Makefile
+++ linux-2.6/drivers/pci/Makefile
@@ -4,7 +4,7 @@

obj-y += access.o bus.o probe.o host-bridge.o remove.o pci.o \
pci-driver.o search.o pci-sysfs.o rom.o setup-res.o \
- irq.o vpd.o setup-bus.o pcie-link.o vc.o
+ irq.o vpd.o setup-bus.o pcie-link.o pcie-sysfs.o vc.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_SYSFS) += slot.o

Subject: [PATCH] PCI: Add link_disable in /sysfs for pcie device

Found PCIe cards from one vendor, will not respond to scan from bridge,
if we change bus number setting in bridge device.

Have to do link disable/enable on the pcie root port.

So try to expose link disable bit of pcie link control register. We can use
echo 1 > /sys/..../link_disable
echo 0 > /sys/..../link_disable
to bring the pcie device back to respond to scan.

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
drivers/pci/pcie-sysfs.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)

Index: linux-2.6/drivers/pci/pcie-sysfs.c
===================================================================
--- linux-2.6.orig/drivers/pci/pcie-sysfs.c
+++ linux-2.6/drivers/pci/pcie-sysfs.c
@@ -1,7 +1,35 @@
#include <linux/kernel.h>
#include <linux/pci.h>

+static ssize_t
+pcie_link_disable_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ return sprintf(buf, "%u\n", pcie_link_disable_get(pdev));
+}
+static ssize_t
+pcie_link_disable_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ unsigned long val;
+
+ if (kstrtoul(buf, 0, &val) < 0)
+ return -EINVAL;
+
+ pcie_link_disable_set(pdev, val);
+
+ return count;
+}
+
+static struct device_attribute pcie_link_disable_attr =
+ __ATTR(pcie_link_disable, 0644,
+ pcie_link_disable_show, pcie_link_disable_store);
+
static struct attribute *pci_dev_pcie_dev_attrs[] = {
+ &pcie_link_disable_attr.attr,
NULL,
};

@@ -14,6 +42,11 @@ static umode_t pci_dev_pcie_attrs_are_vi
if (!pci_is_pcie(pdev))
return 0;

+ if (a == &pcie_link_disable_attr.attr)
+ if ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) &&
+ (pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM))
+ return 0;
+
return a->mode;
}