Re: [BUG] igb: reconnecting of cable not always detected
From: Holger Schurig
Date: Thu Apr 26 2018 - 05:08:37 EST
Hi,
> Thanks. I'm suspecting we may need to instrument igb_rd32 at this
> point. In order to trigger what you are seeing I am assuming the
> device has been detached due to a read failure of some sort.
Okay, I added a printk to igb_rd32. And because no one calls this
function directly (all access goes via the rd32/rd32_array macro) I also
added the output of the calling function. This should help greatly in
identifying the read from the hardware to the consumer.
Finally, I noticed that igb_update_stats() produced a lot of churn that
most likely are unrelated. So I helper variable to make output from this
function go away.
I installed this modified driver, rebooted, and removed / inserted the
LAN cable until the error was present.
As before, "ethtool" and "mii-tool" now said that the device is not
there, while "ip link" showed the device as present.
The full output of "journalctl -fk | grep igb" is 600 kB. So put the
whole file at Google Drive:
https://drive.google.com/open?id=1p9cCT2d_EHnSHh29oS3AepUgFTKGFSeA
I looked at the output to see patterns, e.g with
grep -n igb_get_cfg_done_i210 igb.error.txt
grep -n __igb_shutdown igb.error.txt
...
(and almost all other function names). I hoped to see patterns. But for
my untrained eye, things looked not out of the order.
(For reference, here is the debug patch)
Index: linux-4.16/drivers/net/ethernet/intel/igb/igb_main.c
===================================================================
--- linux-4.16.orig/drivers/net/ethernet/intel/igb/igb_main.c 2018-04-01 23:20:27.000000000 +0200
+++ linux-4.16/drivers/net/ethernet/intel/igb/igb_main.c 2018-04-26 10:36:09.625135952 +0200
@@ -759,7 +759,8 @@
}
}
-u32 igb_rd32(struct e1000_hw *hw, u32 reg)
+int igb_rd32_silent = 0;
+u32 igb_rd32(const char *func, struct e1000_hw *hw, u32 reg)
{
struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw);
u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
@@ -769,6 +770,8 @@
return ~value;
value = readl(&hw_addr[reg]);
+ if (!igb_rd32_silent)
+ printk("rd32 %s %08x %08x\n", func, reg, value);
/* reads should not return all F's */
if (!(~value) && (!reg || !(~readl(hw_addr)))) {
@@ -5935,6 +5938,7 @@
if (pci_channel_offline(pdev))
return;
+ igb_rd32_silent = 1;
bytes = 0;
packets = 0;
@@ -6100,6 +6104,7 @@
adapter->stats.b2ospc += rd32(E1000_B2OSPC);
adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
}
+ igb_rd32_silent = 0;
}
static void igb_tsync_interrupt(struct igb_adapter *adapter)
Index: linux-4.16/drivers/net/ethernet/intel/igb/e1000_regs.h
===================================================================
--- linux-4.16.orig/drivers/net/ethernet/intel/igb/e1000_regs.h 2018-04-01 23:20:27.000000000 +0200
+++ linux-4.16/drivers/net/ethernet/intel/igb/e1000_regs.h 2018-04-26 10:34:24.332157000 +0200
@@ -370,7 +370,8 @@
struct e1000_hw;
-u32 igb_rd32(struct e1000_hw *hw, u32 reg);
+extern int igb_rd32_silent;
+u32 igb_rd32(const char *fname, struct e1000_hw *hw, u32 reg);
/* write operations, indexed using DWORDS */
#define wr32(reg, val) \
@@ -380,14 +381,14 @@
writel((val), &hw_addr[(reg)]); \
} while (0)
-#define rd32(reg) (igb_rd32(hw, reg))
+#define rd32(reg) (igb_rd32(__func__, hw, reg))
#define wrfl() ((void)rd32(E1000_STATUS))
#define array_wr32(reg, offset, value) \
wr32((reg) + ((offset) << 2), (value))
-#define array_rd32(reg, offset) (igb_rd32(hw, reg + ((offset) << 2)))
+#define array_rd32(reg, offset) (igb_rd32(__func__, hw, reg + ((offset) << 2)))
/* DMA Coalescing registers */
#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */